diff options
author | Julian Anastasov <ja@ssi.bg> | 2022-11-22 18:46:03 +0200 |
---|---|---|
committer | Pablo Neira Ayuso <pablo@netfilter.org> | 2022-12-10 22:44:43 +0100 |
commit | f0be83d5421718ead31707b6ece34cf77d411c00 (patch) | |
tree | 8365c1d0b9452b052eee47c4e71e985b9012cdb4 /net/netfilter/ipvs | |
parent | 705dd34440812735ece298eb5bc153fde9544d42 (diff) |
ipvs: add est_cpulist and est_nice sysctl vars
Allow the kthreads for stats to be configured for
specific cpulist (isolation) and niceness (scheduling
priority).
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Cc: yunhong-cgl jiang <xintian1976@gmail.com>
Cc: "dust.li" <dust.li@linux.alibaba.com>
Reviewed-by: Jiri Wiesner <jwiesner@suse.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Diffstat (limited to 'net/netfilter/ipvs')
-rw-r--r-- | net/netfilter/ipvs/ip_vs_ctl.c | 143 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_est.c | 12 |
2 files changed, 151 insertions, 4 deletions
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c41a5392edc9..38df3ee655ed 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -263,7 +263,7 @@ static void est_reload_work_handler(struct work_struct *work) /* New config ? Stop kthread tasks */ if (genid != genid_done) ip_vs_est_kthread_stop(kd); - if (!kd->task) { + if (!kd->task && !ip_vs_est_stopped(ipvs)) { /* Do not start kthreads above 0 in calc phase */ if ((!id || !ipvs->est_calc_phase) && ip_vs_est_kthread_start(ipvs, kd) < 0) @@ -1940,6 +1940,122 @@ proc_do_sync_ports(struct ctl_table *table, int write, return rc; } +static int ipvs_proc_est_cpumask_set(struct ctl_table *table, void *buffer) +{ + struct netns_ipvs *ipvs = table->extra2; + cpumask_var_t *valp = table->data; + cpumask_var_t newmask; + int ret; + + if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) + return -ENOMEM; + + ret = cpulist_parse(buffer, newmask); + if (ret) + goto out; + + mutex_lock(&ipvs->est_mutex); + + if (!ipvs->est_cpulist_valid) { + if (!zalloc_cpumask_var(valp, GFP_KERNEL)) { + ret = -ENOMEM; + goto unlock; + } + ipvs->est_cpulist_valid = 1; + } + cpumask_and(newmask, newmask, ¤t->cpus_mask); + cpumask_copy(*valp, newmask); + /* est_max_threads may depend on cpulist size */ + ipvs->est_max_threads = ip_vs_est_max_threads(ipvs); + ipvs->est_calc_phase = 1; + ip_vs_est_reload_start(ipvs); + +unlock: + mutex_unlock(&ipvs->est_mutex); + +out: + free_cpumask_var(newmask); + return ret; +} + +static int ipvs_proc_est_cpumask_get(struct ctl_table *table, void *buffer, + size_t size) +{ + struct netns_ipvs *ipvs = table->extra2; + cpumask_var_t *valp = table->data; + struct cpumask *mask; + int ret; + + mutex_lock(&ipvs->est_mutex); + + if (ipvs->est_cpulist_valid) + mask = *valp; + else + mask = (struct cpumask *)housekeeping_cpumask(HK_TYPE_KTHREAD); + ret = scnprintf(buffer, size, "%*pbl\n", cpumask_pr_args(mask)); + + mutex_unlock(&ipvs->est_mutex); + + return ret; +} + +static int ipvs_proc_est_cpulist(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + int ret; + + /* Ignore both read and write(append) if *ppos not 0 */ + if (*ppos || !*lenp) { + *lenp = 0; + return 0; + } + if (write) { + /* proc_sys_call_handler() appends terminator */ + ret = ipvs_proc_est_cpumask_set(table, buffer); + if (ret >= 0) + *ppos += *lenp; + } else { + /* proc_sys_call_handler() allocates 1 byte for terminator */ + ret = ipvs_proc_est_cpumask_get(table, buffer, *lenp + 1); + if (ret >= 0) { + *lenp = ret; + *ppos += *lenp; + ret = 0; + } + } + return ret; +} + +static int ipvs_proc_est_nice(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct netns_ipvs *ipvs = table->extra2; + int *valp = table->data; + int val = *valp; + int ret; + + struct ctl_table tmp_table = { + .data = &val, + .maxlen = sizeof(int), + .mode = table->mode, + }; + + ret = proc_dointvec(&tmp_table, write, buffer, lenp, ppos); + if (write && ret >= 0) { + if (val < MIN_NICE || val > MAX_NICE) { + ret = -EINVAL; + } else { + mutex_lock(&ipvs->est_mutex); + if (*valp != val) { + *valp = val; + ip_vs_est_reload_start(ipvs); + } + mutex_unlock(&ipvs->est_mutex); + } + } + return ret; +} + /* * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/) * Do not change order or insert new entries without @@ -2116,6 +2232,18 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "est_cpulist", + .maxlen = NR_CPUS, /* unused */ + .mode = 0644, + .proc_handler = ipvs_proc_est_cpulist, + }, + { + .procname = "est_nice", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = ipvs_proc_est_nice, + }, #ifdef CONFIG_IP_VS_DEBUG { .procname = "debug_level", @@ -4134,6 +4262,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler); INIT_DELAYED_WORK(&ipvs->expire_nodest_conn_work, expire_nodest_conn_handler); + ipvs->est_stopped = 0; if (!net_eq(net, &init_net)) { tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL); @@ -4195,6 +4324,15 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) tbl[idx++].data = &ipvs->sysctl_ignore_tunneled; ipvs->sysctl_run_estimation = 1; tbl[idx++].data = &ipvs->sysctl_run_estimation; + + ipvs->est_cpulist_valid = 0; + tbl[idx].extra2 = ipvs; + tbl[idx++].data = &ipvs->sysctl_est_cpulist; + + ipvs->sysctl_est_nice = IPVS_EST_NICE; + tbl[idx].extra2 = ipvs; + tbl[idx++].data = &ipvs->sysctl_est_nice; + #ifdef CONFIG_IP_VS_DEBUG /* Global sysctls must be ro in non-init netns */ if (!net_eq(net, &init_net)) @@ -4234,6 +4372,9 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) unregister_net_sysctl_table(ipvs->sysctl_hdr); ip_vs_stop_estimator(ipvs, &ipvs->tot_stats->s); + if (ipvs->est_cpulist_valid) + free_cpumask_var(ipvs->sysctl_est_cpulist); + if (!net_eq(net, &init_net)) kfree(ipvs->sysctl_tbl); } diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 2fb6c097437c..e0f5f5da5b6d 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -57,6 +57,9 @@ - kthread contexts are created and attached to array - the kthread tasks are started when first service is added, before that the total stats are not estimated + - when configuration (cpulist/nice) is changed, the tasks are restarted + by work (est_reload_work) + - kthread tasks are stopped while the cpulist is empty - the kthread context holds lists with estimators (chains) which are processed every 2 seconds - as estimators can be added dynamically and in bursts, we try to spread @@ -229,6 +232,7 @@ void ip_vs_est_reload_start(struct netns_ipvs *ipvs) /* Ignore reloads before first service is added */ if (!ipvs->enable) return; + ip_vs_est_stopped_recalc(ipvs); /* Bump the kthread configuration genid */ atomic_inc(&ipvs->est_genid); queue_delayed_work(system_long_wq, &ipvs->est_reload_work, 0); @@ -259,6 +263,9 @@ int ip_vs_est_kthread_start(struct netns_ipvs *ipvs, goto out; } + set_user_nice(kd->task, sysctl_est_nice(ipvs)); + set_cpus_allowed_ptr(kd->task, sysctl_est_cpulist(ipvs)); + pr_info("starting estimator thread %d...\n", kd->id); wake_up_process(kd->task); @@ -334,7 +341,7 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs) } /* Start kthread tasks only when services are present */ - if (ipvs->enable) { + if (ipvs->enable && !ip_vs_est_stopped(ipvs)) { ret = ip_vs_est_kthread_start(ipvs, kd); if (ret < 0) goto out; @@ -478,8 +485,7 @@ int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats) int ret; if (!ipvs->est_max_threads && ipvs->enable) - ipvs->est_max_threads = IPVS_EST_CPU_KTHREADS * - num_possible_cpus(); + ipvs->est_max_threads = ip_vs_est_max_threads(ipvs); est->ktid = -1; est->ktrow = IPVS_EST_NTICKS - 1; /* Initial delay */ |