psi: Reduce calls to sched_clock() in psi

mainline inclusion from mainline-v5.13-rc1 commit df774306 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I562O9 CVE: NA backport: openEuler-22.03-LTS Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=df77430639c9cf73559bac0f25084518bf9a812d -------------------------------- We noticed that the cost of psi increases with the increase in the levels of the cgroups. Particularly the cost of cpu_clock() sticks out as the kernel calls it multiple times as it traverses up the cgroup tree. This patch reduces the calls to cpu_clock(). Performed perf bench on Intel Broadwell with 3 levels of cgroup. Before the patch: $ perf bench sched all # Running sched/messaging benchmark... # 20 sender and receiver processes per group # 10 groups == 400 processes run Total time: 0.747 [sec] # Running sched/pipe benchmark... # Executed 1000000 pipe operations between two processes Total time: 3.516 [sec] 3.516689 usecs/op 284358 ops/sec After the patch: $ perf bench sched all # Running sched/messaging benchmark... # 20 sender and receiver processes per group # 10 groups == 400 processes run Total time: 0.640 [sec] # Running sched/pipe benchmark... # Executed 1000000 pipe operations between two processes Total time: 3.329 [sec] 3.329820 usecs/op 300316 ops/sec Signed-off-by: N Shakeel Butt <shakeelb@google.com> Signed-off-by: N Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: N Johannes Weiner <hannes@cmpxchg.org> Link: https://lkml.kernel.org/r/20210321205156.4186483-1-shakeelb@google.comSigned-off-by: N Chen Wandun <chenwandun@huawei.com> Reviewed-by: N Kefeng Wang <wangkefeng.wang@huawei.com> Signed-off-by: N Zheng Zengkai <zhengzengkai@huawei.com>

psi: Reduce calls to sched_clock() in psi
mainline inclusion from mainline-v5.13-rc1 commit df774306 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I562O9 CVE: NA backport: openEuler-22.03-LTS Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=df77430639c9cf73559bac0f25084518bf9a812d -------------------------------- We noticed that the cost of psi increases with the increase in the levels of the cgroups. Particularly the cost of cpu_clock() sticks out as the kernel calls it multiple times as it traverses up the cgroup tree. This patch reduces the calls to cpu_clock(). Performed perf bench on Intel Broadwell with 3 levels of cgroup. Before the patch: $ perf bench sched all # Running sched/messaging benchmark... # 20 sender and receiver processes per group # 10 groups == 400 processes run Total time: 0.747 [sec] # Running sched/pipe benchmark... # Executed 1000000 pipe operations between two processes Total time: 3.516 [sec] 3.516689 usecs/op 284358 ops/sec After the patch: $ perf bench sched all # Running sched/messaging benchmark... # 20 sender and receiver processes per group # 10 groups == 400 processes run Total time: 0.640 [sec] # Running sched/pipe benchmark... # Executed 1000000 pipe operations between two processes Total time: 3.329 [sec] 3.329820 usecs/op 300316 ops/sec Signed-off-by: N Shakeel Butt <shakeelb@google.com> Signed-off-by: N Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: N Johannes Weiner <hannes@cmpxchg.org> Link: https://lkml.kernel.org/r/20210321205156.4186483-1-shakeelb@google.comSigned-off-by: N Chen Wandun <chenwandun@huawei.com> Reviewed-by: N Kefeng Wang <wangkefeng.wang@huawei.com> Signed-off-by: N Zheng Zengkai <zhengzengkai@huawei.com>
5f48fa66 · Shakeel Butt · Zheng Zengkai · 67c22ceb · 5f48fa66
隐藏空白更改
内联并排

Showing with 10 addition and 9 deletion

kernel/sched/psi.c kernel/sched/psi.c +10 -9

未找到文件。
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -651,12 +651,10 @@ static void poll_timer_fn(struct timer_list *t)
 	wake_up_interruptible(&group->poll_wait);
 }
-static void record_times(struct psi_group_cpu *groupc, int cpu)
+static void record_times(struct psi_group_cpu *groupc, u64 now)
 {
 	u32 delta;
-	u64 now;
-	now = cpu_clock(cpu);
 	delta = now - groupc->state_start;
 	groupc->state_start = now;
@@ -683,7 +681,7 @@ static void record_times(struct psi_group_cpu *groupc, int cpu)
 }
 static void psi_group_change(struct psi_group *group, int cpu,
-			     unsigned int clear, unsigned int set,
+			     unsigned int clear, unsigned int set, u64 now,
 			     bool wake_clock)
 {
 	struct psi_group_cpu *groupc;
@@ -703,7 +701,7 @@ static void psi_group_change(struct psi_group *group, int cpu,
 	 */
 	write_seqcount_begin(&groupc->seq);
-	record_times(groupc, cpu);
+	record_times(groupc, now);
 	for (t = 0, m = clear; m; m &= ~(1 << t), t++) {
 		if (!(m & (1 << t)))
@@ -810,12 +808,14 @@ void psi_task_change(struct task_struct *task, int clear, int set)
 	struct psi_group *group;
 	bool wake_clock = true;
 	void *iter = NULL;
+	u64 now;
 	if (!task->pid)
 		return;
 	psi_flags_change(task, clear, set);
+	now = cpu_clock(cpu);
 	/*
 	 * Periodic aggregation shuts off if there is a period of no
 	 * task changes, so we wake it back up if necessary. However,
@@ -828,7 +828,7 @@ void psi_task_change(struct task_struct *task, int clear, int set)
 		wake_clock = false;
 	while ((group = iterate_groups(task, &iter)))
-		psi_group_change(group, cpu, clear, set, wake_clock);
+		psi_group_change(group, cpu, clear, set, now, wake_clock);
 }
 void psi_task_switch(struct task_struct *prev, struct task_struct *next,
@@ -837,6 +837,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
 	struct psi_group *group, *common = NULL;
 	int cpu = task_cpu(prev);
 	void *iter;
+	u64 now = cpu_clock(cpu);
 	if (next->pid) {
 		bool identical_state;
@@ -858,7 +859,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
 				break;
 			}
-			psi_group_change(group, cpu, 0, TSK_ONCPU, true);
+			psi_group_change(group, cpu, 0, TSK_ONCPU, now, true);
 		}
 	}
@@ -880,7 +881,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
 		iter = NULL;
 		while ((group = iterate_groups(prev, &iter)) && group != common)
-			psi_group_change(group, cpu, clear, set, true);
+			psi_group_change(group, cpu, clear, set, now, true);
 		/*
 		 * TSK_ONCPU is handled up to the common ancestor. If we're tasked
@@ -889,7 +890,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
 		if (sleep) {
 			clear &= ~TSK_ONCPU;
 			for (; group; group = iterate_groups(prev, &iter))
-				psi_group_change(group, cpu, clear, set, true);
+				psi_group_change(group, cpu, clear, set, now, true);
 		}
 	}
 }