summary refs log tree commit diff
path: root/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.7.patch
diff options
context:
space:
mode:
Diffstat (limited to 'pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.7.patch')
-rw-r--r--pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.7.patch407
1 files changed, 407 insertions, 0 deletions
diff --git a/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.7.patch b/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.7.patch
new file mode 100644
index 00000000000..74dae740e37
--- /dev/null
+++ b/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.7.patch
@@ -0,0 +1,407 @@
+commit 0d966df508ef4d6c0b1baae9e369f4fb0d3e10af
+Author: Tejun Heo <tj@kernel.org>
+Date:   Fri Mar 11 07:31:23 2016 -0500
+
+    sched: Misc preps for cgroup unified hierarchy interface
+    
+    Make the following changes in preparation for the cpu controller
+    interface implementation for the unified hierarchy.  This patch
+    doesn't cause any functional differences.
+    
+    * s/cpu_stats_show()/cpu_cfs_stats_show()/
+    
+    * s/cpu_files/cpu_legacy_files/
+    
+    * Separate out cpuacct_stats_read() from cpuacct_stats_show().  While
+      at it, remove pointless cpuacct_stat_desc[] array.
+    
+    Signed-off-by: Tejun Heo <tj@kernel.org>
+    Cc: Ingo Molnar <mingo@redhat.com>
+    Cc: Peter Zijlstra <peterz@infradead.org>
+    Cc: Li Zefan <lizefan@huawei.com>
+    Cc: Johannes Weiner <hannes@cmpxchg.org>
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 97ee9ac..c148dfe 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -8482,7 +8482,7 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota)
+ 	return ret;
+ }
+ 
+-static int cpu_stats_show(struct seq_file *sf, void *v)
++static int cpu_cfs_stats_show(struct seq_file *sf, void *v)
+ {
+ 	struct task_group *tg = css_tg(seq_css(sf));
+ 	struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
+@@ -8522,7 +8522,7 @@ static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css,
+ }
+ #endif /* CONFIG_RT_GROUP_SCHED */
+ 
+-static struct cftype cpu_files[] = {
++static struct cftype cpu_legacy_files[] = {
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+ 	{
+ 		.name = "shares",
+@@ -8543,7 +8543,7 @@ static struct cftype cpu_files[] = {
+ 	},
+ 	{
+ 		.name = "stat",
+-		.seq_show = cpu_stats_show,
++		.seq_show = cpu_cfs_stats_show,
+ 	},
+ #endif
+ #ifdef CONFIG_RT_GROUP_SCHED
+@@ -8568,7 +8568,7 @@ struct cgroup_subsys cpu_cgrp_subsys = {
+ 	.fork		= cpu_cgroup_fork,
+ 	.can_attach	= cpu_cgroup_can_attach,
+ 	.attach		= cpu_cgroup_attach,
+-	.legacy_cftypes	= cpu_files,
++	.legacy_cftypes	= cpu_legacy_files,
+ 	.early_init	= true,
+ };
+ 
+diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
+index 41f85c4..3eb9eda 100644
+--- a/kernel/sched/cpuacct.c
++++ b/kernel/sched/cpuacct.c
+@@ -242,36 +242,33 @@ static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
+ 	return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_NRUSAGE);
+ }
+ 
+-static const char * const cpuacct_stat_desc[] = {
+-	[CPUACCT_STAT_USER] = "user",
+-	[CPUACCT_STAT_SYSTEM] = "system",
+-};
+-
+-static int cpuacct_stats_show(struct seq_file *sf, void *v)
++static void cpuacct_stats_read(struct cpuacct *ca, u64 *userp, u64 *sysp)
+ {
+-	struct cpuacct *ca = css_ca(seq_css(sf));
+ 	int cpu;
+-	s64 val = 0;
+ 
++	*userp = 0;
+ 	for_each_possible_cpu(cpu) {
+ 		struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
+-		val += kcpustat->cpustat[CPUTIME_USER];
+-		val += kcpustat->cpustat[CPUTIME_NICE];
++		*userp += kcpustat->cpustat[CPUTIME_USER];
++		*userp += kcpustat->cpustat[CPUTIME_NICE];
+ 	}
+-	val = cputime64_to_clock_t(val);
+-	seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_USER], val);
+ 
+-	val = 0;
++	*sysp = 0;
+ 	for_each_possible_cpu(cpu) {
+ 		struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
+-		val += kcpustat->cpustat[CPUTIME_SYSTEM];
+-		val += kcpustat->cpustat[CPUTIME_IRQ];
+-		val += kcpustat->cpustat[CPUTIME_SOFTIRQ];
++		*sysp += kcpustat->cpustat[CPUTIME_SYSTEM];
++		*sysp += kcpustat->cpustat[CPUTIME_IRQ];
++		*sysp += kcpustat->cpustat[CPUTIME_SOFTIRQ];
+ 	}
++}
+ 
+-	val = cputime64_to_clock_t(val);
+-	seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val);
++static int cpuacct_stats_show(struct seq_file *sf, void *v)
++{
++	cputime64_t user, sys;
+ 
++	cpuacct_stats_read(css_ca(seq_css(sf)), &user, &sys);
++	seq_printf(sf, "user %lld\n", cputime64_to_clock_t(user));
++	seq_printf(sf, "system %lld\n", cputime64_to_clock_t(sys));
+ 	return 0;
+ }
+ 
+
+commit ed6d93036ec930cb774da10b7c87f67905ce71f1
+Author: Tejun Heo <tj@kernel.org>
+Date:   Fri Mar 11 07:31:23 2016 -0500
+
+    sched: Implement interface for cgroup unified hierarchy
+    
+    While the cpu controller doesn't have any functional problems, there
+    are a couple interface issues which can be addressed in the v2
+    interface.
+    
+    * cpuacct being a separate controller.  This separation is artificial
+      and rather pointless as demonstrated by most use cases co-mounting
+      the two controllers.  It also forces certain information to be
+      accounted twice.
+    
+    * Use of different time units.  Writable control knobs use
+      microseconds, some stat fields use nanoseconds while other cpuacct
+      stat fields use centiseconds.
+    
+    * Control knobs which can't be used in the root cgroup still show up
+      in the root.
+    
+    * Control knob names and semantics aren't consistent with other
+      controllers.
+    
+    This patchset implements cpu controller's interface on the unified
+    hierarchy which adheres to the controller file conventions described
+    in Documentation/cgroups/unified-hierarchy.txt.  Overall, the
+    following changes are made.
+    
+    * cpuacct is implictly enabled and disabled by cpu and its information
+      is reported through "cpu.stat" which now uses microseconds for all
+      time durations.  All time duration fields now have "_usec" appended
+      to them for clarity.  While this doesn't solve the double accounting
+      immediately, once majority of users switch to v2, cpu can directly
+      account and report the relevant stats and cpuacct can be disabled on
+      the unified hierarchy.
+    
+      Note that cpuacct.usage_percpu is currently not included in
+      "cpu.stat".  If this information is actually called for, it can be
+      added later.
+    
+    * "cpu.shares" is replaced with "cpu.weight" and operates on the
+      standard scale defined by CGROUP_WEIGHT_MIN/DFL/MAX (1, 100, 10000).
+      The weight is scaled to scheduler weight so that 100 maps to 1024
+      and the ratio relationship is preserved - if weight is W and its
+      scaled value is S, W / 100 == S / 1024.  While the mapped range is a
+      bit smaller than the orignal scheduler weight range, the dead zones
+      on both sides are relatively small and covers wider range than the
+      nice value mappings.  This file doesn't make sense in the root
+      cgroup and isn't create on root.
+    
+    * "cpu.cfs_quota_us" and "cpu.cfs_period_us" are replaced by "cpu.max"
+      which contains both quota and period.
+    
+    * "cpu.rt_runtime_us" and "cpu.rt_period_us" are replaced by
+      "cpu.rt.max" which contains both runtime and period.
+    
+    v2: cpu_stats_show() was incorrectly using CONFIG_FAIR_GROUP_SCHED for
+        CFS bandwidth stats and also using raw division for u64.  Use
+        CONFIG_CFS_BANDWITH and do_div() instead.
+    
+        The semantics of "cpu.rt.max" is not fully decided yet.  Dropped
+        for now.
+    
+    Signed-off-by: Tejun Heo <tj@kernel.org>
+    Cc: Ingo Molnar <mingo@redhat.com>
+    Cc: Peter Zijlstra <peterz@infradead.org>
+    Cc: Li Zefan <lizefan@huawei.com>
+    Cc: Johannes Weiner <hannes@cmpxchg.org>
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index c148dfe..7bba2c5 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -8561,6 +8561,139 @@ static struct cftype cpu_legacy_files[] = {
+ 	{ }	/* terminate */
+ };
+ 
++static int cpu_stats_show(struct seq_file *sf, void *v)
++{
++	cpuacct_cpu_stats_show(sf);
++
++#ifdef CONFIG_CFS_BANDWIDTH
++	{
++		struct task_group *tg = css_tg(seq_css(sf));
++		struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
++		u64 throttled_usec;
++
++		throttled_usec = cfs_b->throttled_time;
++		do_div(throttled_usec, NSEC_PER_USEC);
++
++		seq_printf(sf, "nr_periods %d\n"
++			   "nr_throttled %d\n"
++			   "throttled_usec %llu\n",
++			   cfs_b->nr_periods, cfs_b->nr_throttled,
++			   throttled_usec);
++	}
++#endif
++	return 0;
++}
++
++#ifdef CONFIG_FAIR_GROUP_SCHED
++static u64 cpu_weight_read_u64(struct cgroup_subsys_state *css,
++			       struct cftype *cft)
++{
++	struct task_group *tg = css_tg(css);
++	u64 weight = scale_load_down(tg->shares);
++
++	return DIV_ROUND_CLOSEST_ULL(weight * CGROUP_WEIGHT_DFL, 1024);
++}
++
++static int cpu_weight_write_u64(struct cgroup_subsys_state *css,
++				struct cftype *cftype, u64 weight)
++{
++	/*
++	 * cgroup weight knobs should use the common MIN, DFL and MAX
++	 * values which are 1, 100 and 10000 respectively.  While it loses
++	 * a bit of range on both ends, it maps pretty well onto the shares
++	 * value used by scheduler and the round-trip conversions preserve
++	 * the original value over the entire range.
++	 */
++	if (weight < CGROUP_WEIGHT_MIN || weight > CGROUP_WEIGHT_MAX)
++		return -ERANGE;
++
++	weight = DIV_ROUND_CLOSEST_ULL(weight * 1024, CGROUP_WEIGHT_DFL);
++
++	return sched_group_set_shares(css_tg(css), scale_load(weight));
++}
++#endif
++
++static void __maybe_unused cpu_period_quota_print(struct seq_file *sf,
++						  long period, long quota)
++{
++	if (quota < 0)
++		seq_puts(sf, "max");
++	else
++		seq_printf(sf, "%ld", quota);
++
++	seq_printf(sf, " %ld\n", period);
++}
++
++/* caller should put the current value in *@periodp before calling */
++static int __maybe_unused cpu_period_quota_parse(char *buf,
++						 u64 *periodp, u64 *quotap)
++{
++	char tok[21];	/* U64_MAX */
++
++	if (!sscanf(buf, "%s %llu", tok, periodp))
++		return -EINVAL;
++
++	*periodp *= NSEC_PER_USEC;
++
++	if (sscanf(tok, "%llu", quotap))
++		*quotap *= NSEC_PER_USEC;
++	else if (!strcmp(tok, "max"))
++		*quotap = RUNTIME_INF;
++	else
++		return -EINVAL;
++
++	return 0;
++}
++
++#ifdef CONFIG_CFS_BANDWIDTH
++static int cpu_max_show(struct seq_file *sf, void *v)
++{
++	struct task_group *tg = css_tg(seq_css(sf));
++
++	cpu_period_quota_print(sf, tg_get_cfs_period(tg), tg_get_cfs_quota(tg));
++	return 0;
++}
++
++static ssize_t cpu_max_write(struct kernfs_open_file *of,
++			     char *buf, size_t nbytes, loff_t off)
++{
++	struct task_group *tg = css_tg(of_css(of));
++	u64 period = tg_get_cfs_period(tg);
++	u64 quota;
++	int ret;
++
++	ret = cpu_period_quota_parse(buf, &period, &quota);
++	if (!ret)
++		ret = tg_set_cfs_bandwidth(tg, period, quota);
++	return ret ?: nbytes;
++}
++#endif
++
++static struct cftype cpu_files[] = {
++	{
++		.name = "stat",
++		.flags = CFTYPE_NOT_ON_ROOT,
++		.seq_show = cpu_stats_show,
++	},
++#ifdef CONFIG_FAIR_GROUP_SCHED
++	{
++		.name = "weight",
++		.flags = CFTYPE_NOT_ON_ROOT,
++		.read_u64 = cpu_weight_read_u64,
++		.write_u64 = cpu_weight_write_u64,
++	},
++#endif
++#ifdef CONFIG_CFS_BANDWIDTH
++	{
++		.name = "max",
++		.flags = CFTYPE_NOT_ON_ROOT,
++		.seq_show = cpu_max_show,
++		.write = cpu_max_write,
++	},
++#endif
++	{ }	/* terminate */
++};
++
+ struct cgroup_subsys cpu_cgrp_subsys = {
+ 	.css_alloc	= cpu_cgroup_css_alloc,
+ 	.css_released	= cpu_cgroup_css_released,
+@@ -8569,7 +8702,15 @@ struct cgroup_subsys cpu_cgrp_subsys = {
+ 	.can_attach	= cpu_cgroup_can_attach,
+ 	.attach		= cpu_cgroup_attach,
+ 	.legacy_cftypes	= cpu_legacy_files,
++	.dfl_cftypes	= cpu_files,
+ 	.early_init	= true,
++#ifdef CONFIG_CGROUP_CPUACCT
++	/*
++	 * cpuacct is enabled together with cpu on the unified hierarchy
++	 * and its stats are reported through "cpu.stat".
++	 */
++	.depends_on	= 1 << cpuacct_cgrp_id,
++#endif
+ };
+ 
+ #endif	/* CONFIG_CGROUP_SCHED */
+diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
+index 3eb9eda..7a02d26 100644
+--- a/kernel/sched/cpuacct.c
++++ b/kernel/sched/cpuacct.c
+@@ -305,6 +305,30 @@ static struct cftype files[] = {
+ 	{ }	/* terminate */
+ };
+ 
++/* used to print cpuacct stats in cpu.stat on the unified hierarchy */
++void cpuacct_cpu_stats_show(struct seq_file *sf)
++{
++	struct cgroup_subsys_state *css;
++	u64 usage, user, sys;
++
++	css = cgroup_get_e_css(seq_css(sf)->cgroup, &cpuacct_cgrp_subsys);
++
++	usage = cpuusage_read(css, seq_cft(sf));
++	cpuacct_stats_read(css_ca(css), &user, &sys);
++
++	user *= TICK_NSEC;
++	sys *= TICK_NSEC;
++	do_div(usage, NSEC_PER_USEC);
++	do_div(user, NSEC_PER_USEC);
++	do_div(sys, NSEC_PER_USEC);
++
++	seq_printf(sf, "usage_usec %llu\n"
++		   "user_usec %llu\n"
++		   "system_usec %llu\n", usage, user, sys);
++
++	css_put(css);
++}
++
+ /*
+  * charge this task's execution time to its accounting group.
+  *
+diff --git a/kernel/sched/cpuacct.h b/kernel/sched/cpuacct.h
+index ba72807..ddf7af4 100644
+--- a/kernel/sched/cpuacct.h
++++ b/kernel/sched/cpuacct.h
+@@ -2,6 +2,7 @@
+ 
+ extern void cpuacct_charge(struct task_struct *tsk, u64 cputime);
+ extern void cpuacct_account_field(struct task_struct *tsk, int index, u64 val);
++extern void cpuacct_cpu_stats_show(struct seq_file *sf);
+ 
+ #else
+ 
+@@ -14,4 +15,8 @@ cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
+ {
+ }
+ 
++static inline void cpuacct_cpu_stats_show(struct seq_file *sf)
++{
++}
++
+ #endif