Upgrade BMQ and PDS io scheduler to version v5.14-r1

Signed-off-by: Mike Pagano <mpagano@gentoo.org>
author: Mike Pagano <mpagano@gentoo.org> 2021-10-03 15:14:20 -0400
committer: Mike Pagano <mpagano@gentoo.org> 2021-10-03 15:14:20 -0400
commit: 3840a675683c2df1aea2f9efed23617ce7eb9e01 (patch)
tree: 0672bd20b29ae74c914f64c922f4fe5b977ded58
parent: Linux patch 5.14.9 (diff)
download: linux-patches-3840a675.tar.gz
linux-patches-3840a675.tar.bz2
linux-patches-3840a675.zip
2 files changed, 142 insertions, 144 deletions
diff --git a/0000_README b/0000_README
index 21444f8a..2d15afdc 100644
--- a/0000_README
+++ b/0000_README
@@ -115,7 +115,7 @@ Patch:  5010_enable-cpu-optimizations-universal.patch
 From:   https://github.com/graysky2/kernel_compiler_patch
 Desc:   Kernel >= 5.8 patch enables gcc = v9+ optimizations for additional CPUs.
 
-Patch:  5020_BMQ-and-PDS-io-scheduler-v5.14-r1.patch
+Patch:  5020_BMQ-and-PDS-io-scheduler-v5.14-r3.patch
 From:   https://gitlab.com/alfredchen/linux-prjc
 Desc:   BMQ(BitMap Queue) Scheduler. A new CPU scheduler developed from PDS(incld). Inspired by the scheduler in zircon.
 
diff --git a/5020_BMQ-and-PDS-io-scheduler-v5.14-r1.patch b/5020_BMQ-and-PDS-io-scheduler-v5.14-r3.patch
index 4c6f75c5..99adff7c 100644
--- a/5020_BMQ-and-PDS-io-scheduler-v5.14-r1.patch
+++ b/5020_BMQ-and-PDS-io-scheduler-v5.14-r3.patch
@@ -341,6 +341,20 @@ index e5af028c08b4..0a7565d0d3cf 100644
  	return false;
  }
  
+diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
+index 8f0f778b7c91..991f2280475b 100644
+--- a/include/linux/sched/topology.h
++++ b/include/linux/sched/topology.h
+@@ -225,7 +225,8 @@ static inline bool cpus_share_cache(int this_cpu, int that_cpu)
+ 
+ #endif	/* !CONFIG_SMP */
+ 
+-#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
++#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) && \
++	!defined(CONFIG_SCHED_ALT)
+ extern void rebuild_sched_domains_energy(void);
+ #else
+ static inline void rebuild_sched_domains_energy(void)
 diff --git a/init/Kconfig b/init/Kconfig
 index 55f9f7738ebb..9a9b244d3ca3 100644
 --- a/init/Kconfig
@@ -659,10 +673,10 @@ index 978fcfca5871..0425ee149b4d 100644
  obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
 diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c
 new file mode 100644
-index 000000000000..900889c838ea
+index 000000000000..56aed2b1e42c
 --- /dev/null
 +++ b/kernel/sched/alt_core.c
-@@ -0,0 +1,7248 @@
+@@ -0,0 +1,7341 @@
 +/*
 + *  kernel/sched/alt_core.c
 + *
@@ -732,7 +746,7 @@ index 000000000000..900889c838ea
 +#define sched_feat(x)	(0)
 +#endif /* CONFIG_SCHED_DEBUG */
 +
-+#define ALT_SCHED_VERSION "v5.14-r1"
++#define ALT_SCHED_VERSION "v5.14-r3"
 +
 +/* rt_prio(prio) defined in include/linux/sched/rt.h */
 +#define rt_task(p)		rt_prio((p)->prio)
@@ -1249,6 +1263,101 @@ index 000000000000..900889c838ea
 +	update_rq_clock_task(rq, delta);
 +}
 +
++/*
++ * RQ Load update routine
++ */
++#define RQ_LOAD_HISTORY_BITS		(sizeof(s32) * 8ULL)
++#define RQ_UTIL_SHIFT			(8)
++#define RQ_LOAD_HISTORY_TO_UTIL(l)	(((l) >> (RQ_LOAD_HISTORY_BITS - 1 - RQ_UTIL_SHIFT)) & 0xff)
++
++#define LOAD_BLOCK(t)		((t) >> 17)
++#define LOAD_HALF_BLOCK(t)	((t) >> 16)
++#define BLOCK_MASK(t)		((t) & ((0x01 << 18) - 1))
++#define LOAD_BLOCK_BIT(b)	(1UL << (RQ_LOAD_HISTORY_BITS - 1 - (b)))
++#define CURRENT_LOAD_BIT	LOAD_BLOCK_BIT(0)
++
++static inline void rq_load_update(struct rq *rq)
++{
++	u64 time = rq->clock;
++	u64 delta = min(LOAD_BLOCK(time) - LOAD_BLOCK(rq->load_stamp),
++			RQ_LOAD_HISTORY_BITS - 1);
++	u64 prev = !!(rq->load_history & CURRENT_LOAD_BIT);
++	u64 curr = !!cpu_rq(rq->cpu)->nr_running;
++
++	if (delta) {
++		rq->load_history = rq->load_history >> delta;
++
++		if (delta < RQ_UTIL_SHIFT) {
++			rq->load_block += (~BLOCK_MASK(rq->load_stamp)) * prev;
++			if (!!LOAD_HALF_BLOCK(rq->load_block) ^ curr)
++				rq->load_history ^= LOAD_BLOCK_BIT(delta);
++		}
++
++		rq->load_block = BLOCK_MASK(time) * prev;
++	} else {
++		rq->load_block += (time - rq->load_stamp) * prev;
++	}
++	if (prev ^ curr)
++		rq->load_history ^= CURRENT_LOAD_BIT;
++	rq->load_stamp = time;
++}
++
++unsigned long rq_load_util(struct rq *rq, unsigned long max)
++{
++	return RQ_LOAD_HISTORY_TO_UTIL(rq->load_history) * (max >> RQ_UTIL_SHIFT);
++}
++
++#ifdef CONFIG_SMP
++unsigned long sched_cpu_util(int cpu, unsigned long max)
++{
++	return rq_load_util(cpu_rq(cpu), max);
++}
++#endif /* CONFIG_SMP */
++
++#ifdef CONFIG_CPU_FREQ
++/**
++ * cpufreq_update_util - Take a note about CPU utilization changes.
++ * @rq: Runqueue to carry out the update for.
++ * @flags: Update reason flags.
++ *
++ * This function is called by the scheduler on the CPU whose utilization is
++ * being updated.
++ *
++ * It can only be called from RCU-sched read-side critical sections.
++ *
++ * The way cpufreq is currently arranged requires it to evaluate the CPU
++ * performance state (frequency/voltage) on a regular basis to prevent it from
++ * being stuck in a completely inadequate performance level for too long.
++ * That is not guaranteed to happen if the updates are only triggered from CFS
++ * and DL, though, because they may not be coming in if only RT tasks are
++ * active all the time (or there are RT tasks only).
++ *
++ * As a workaround for that issue, this function is called periodically by the
++ * RT sched class to trigger extra cpufreq updates to prevent it from stalling,
++ * but that really is a band-aid.  Going forward it should be replaced with
++ * solutions targeted more specifically at RT tasks.
++ */
++static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
++{
++	struct update_util_data *data;
++
++#ifdef CONFIG_SMP
++	rq_load_update(rq);
++#endif
++	data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data,
++						  cpu_of(rq)));
++	if (data)
++		data->func(data, rq_clock(rq), flags);
++}
++#else
++static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
++{
++#ifdef CONFIG_SMP
++	rq_load_update(rq);
++#endif
++}
++#endif /* CONFIG_CPU_FREQ */
++
 +#ifdef CONFIG_NO_HZ_FULL
 +/*
 + * Tick may be needed by tasks in the runqueue depending on their policy and
@@ -4038,6 +4147,7 @@ index 000000000000..900889c838ea
 +	s64 ns = rq->clock_task - p->last_ran;
 +
 +	p->sched_time += ns;
++	cgroup_account_cputime(p, ns);
 +	account_group_exec_runtime(p, ns);
 +
 +	p->time_slice -= ns;
@@ -4600,6 +4710,7 @@ index 000000000000..900889c838ea
 +		if (cpumask_test_cpu(dest_cpu, p->cpus_ptr)) {
 +			__SCHED_DEQUEUE_TASK(p, rq, 0, );
 +			set_task_cpu(p, dest_cpu);
++			sched_task_sanity_check(p, dest_rq);
 +			__SCHED_ENQUEUE_TASK(p, dest_rq, 0);
 +			nr_migrated++;
 +		}
@@ -5753,11 +5864,7 @@ index 000000000000..900889c838ea
 +		 * the runqueue. This will be done when the task deboost
 +		 * itself.
 +		 */
-+		if (rt_effective_prio(p, newprio) == p->prio) {
-+			__setscheduler_params(p, attr);
-+			retval = 0;
-+			goto unlock;
-+		}
++		newprio = rt_effective_prio(p, newprio);
 +	}
 +
 +	if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) {
@@ -6969,7 +7076,6 @@ index 000000000000..900889c838ea
 +	struct task_struct *push_task = rq->curr;
 +
 +	lockdep_assert_held(&rq->lock);
-+	SCHED_WARN_ON(rq->cpu != smp_processor_id());
 +
 +	/*
 +	 * Ensure the thing is persistent until balance_push_set(.on = false);
@@ -6977,9 +7083,10 @@ index 000000000000..900889c838ea
 +	rq->balance_callback = &balance_push_callback;
 +
 +	/*
-+	 * Only active while going offline.
++	 * Only active while going offline and when invoked on the outgoing
++	 * CPU.
 +	 */
-+	if (!cpu_dying(rq->cpu))
++	if (!cpu_dying(rq->cpu) || rq != this_rq())
 +		return;
 +
 +	/*
@@ -7950,10 +8057,10 @@ index 000000000000..1212a031700e
 +{}
 diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h
 new file mode 100644
-index 000000000000..f03af9ab9123
+index 000000000000..289058a09bd5
 --- /dev/null
 +++ b/kernel/sched/alt_sched.h
-@@ -0,0 +1,692 @@
+@@ -0,0 +1,666 @@
 +#ifndef ALT_SCHED_H
 +#define ALT_SCHED_H
 +
@@ -8153,6 +8260,7 @@ index 000000000000..f03af9ab9123
 +	struct rcuwait		hotplug_wait;
 +#endif
 +	unsigned int		nr_pinned;
++
 +#endif /* CONFIG_SMP */
 +#ifdef CONFIG_IRQ_TIME_ACCOUNTING
 +	u64 prev_irq_time;
@@ -8164,6 +8272,11 @@ index 000000000000..f03af9ab9123
 +	u64 prev_steal_time_rq;
 +#endif /* CONFIG_PARAVIRT_TIME_ACCOUNTING */
 +
++	/* For genenal cpu load util */
++	s32 load_history;
++	u64 load_block;
++	u64 load_stamp;
++
 +	/* calc_load related fields */
 +	unsigned long calc_load_update;
 +	long calc_load_active;
@@ -8216,6 +8329,8 @@ index 000000000000..f03af9ab9123
 +#endif /* CONFIG_NO_HZ_COMMON */
 +};
 +
++extern unsigned long rq_load_util(struct rq *rq, unsigned long max);
++
 +extern unsigned long calc_load_update;
 +extern atomic_long_t calc_load_tasks;
 +
@@ -8528,40 +8643,6 @@ index 000000000000..f03af9ab9123
 +
 +#ifdef CONFIG_CPU_FREQ
 +DECLARE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data);
-+
-+/**
-+ * cpufreq_update_util - Take a note about CPU utilization changes.
-+ * @rq: Runqueue to carry out the update for.
-+ * @flags: Update reason flags.
-+ *
-+ * This function is called by the scheduler on the CPU whose utilization is
-+ * being updated.
-+ *
-+ * It can only be called from RCU-sched read-side critical sections.
-+ *
-+ * The way cpufreq is currently arranged requires it to evaluate the CPU
-+ * performance state (frequency/voltage) on a regular basis to prevent it from
-+ * being stuck in a completely inadequate performance level for too long.
-+ * That is not guaranteed to happen if the updates are only triggered from CFS
-+ * and DL, though, because they may not be coming in if only RT tasks are
-+ * active all the time (or there are RT tasks only).
-+ *
-+ * As a workaround for that issue, this function is called periodically by the
-+ * RT sched class to trigger extra cpufreq updates to prevent it from stalling,
-+ * but that really is a band-aid.  Going forward it should be replaced with
-+ * solutions targeted more specifically at RT tasks.
-+ */
-+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
-+{
-+	struct update_util_data *data;
-+
-+	data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data,
-+						  cpu_of(rq)));
-+	if (data)
-+		data->func(data, rq_clock(rq), flags);
-+}
-+#else
-+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
 +#endif /* CONFIG_CPU_FREQ */
 +
 +#ifdef CONFIG_NO_HZ_FULL
@@ -8764,88 +8845,25 @@ index 000000000000..be3ee4a553ca
 +
 +static inline void update_rq_time_edge(struct rq *rq) {}
 diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
-index 57124614363d..4057e51cef45 100644
+index 57124614363d..f0e9c7543542 100644
 --- a/kernel/sched/cpufreq_schedutil.c
 +++ b/kernel/sched/cpufreq_schedutil.c
-@@ -57,6 +57,13 @@ struct sugov_cpu {
- 	unsigned long		bw_dl;
- 	unsigned long		max;
- 
-+#ifdef CONFIG_SCHED_ALT
-+	/* For genenal cpu load util */
-+	s32			load_history;
-+	u64			load_block;
-+	u64			load_stamp;
-+#endif
-+
- 	/* The field below is for single-CPU policies only: */
- #ifdef CONFIG_NO_HZ_COMMON
- 	unsigned long		saved_idle_calls;
-@@ -161,6 +168,7 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy,
- 	return cpufreq_driver_resolve_freq(policy, freq);
- }
+@@ -167,9 +167,14 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu)
+ 	unsigned long max = arch_scale_cpu_capacity(sg_cpu->cpu);
  
+ 	sg_cpu->max = max;
 +#ifndef CONFIG_SCHED_ALT
- static void sugov_get_util(struct sugov_cpu *sg_cpu)
- {
- 	struct rq *rq = cpu_rq(sg_cpu->cpu);
-@@ -172,6 +180,55 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu)
+ 	sg_cpu->bw_dl = cpu_bw_dl(rq);
+ 	sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(rq), max,
  					  FREQUENCY_UTIL, NULL);
- }
- 
-+#else /* CONFIG_SCHED_ALT */
-+
-+#define SG_CPU_LOAD_HISTORY_BITS	(sizeof(s32) * 8ULL)
-+#define SG_CPU_UTIL_SHIFT		(8)
-+#define SG_CPU_LOAD_HISTORY_SHIFT	(SG_CPU_LOAD_HISTORY_BITS - 1 - SG_CPU_UTIL_SHIFT)
-+#define SG_CPU_LOAD_HISTORY_TO_UTIL(l)	(((l) >> SG_CPU_LOAD_HISTORY_SHIFT) & 0xff)
-+
-+#define LOAD_BLOCK(t)		((t) >> 17)
-+#define LOAD_HALF_BLOCK(t)	((t) >> 16)
-+#define BLOCK_MASK(t)		((t) & ((0x01 << 18) - 1))
-+#define LOAD_BLOCK_BIT(b)	(1UL << (SG_CPU_LOAD_HISTORY_BITS - 1 - (b)))
-+#define CURRENT_LOAD_BIT	LOAD_BLOCK_BIT(0)
-+
-+static void sugov_get_util(struct sugov_cpu *sg_cpu)
-+{
-+	unsigned long max = arch_scale_cpu_capacity(sg_cpu->cpu);
-+
-+	sg_cpu->max = max;
++#else
 +	sg_cpu->bw_dl = 0;
-+	sg_cpu->util = SG_CPU_LOAD_HISTORY_TO_UTIL(sg_cpu->load_history) *
-+		(max >> SG_CPU_UTIL_SHIFT);
-+}
-+
-+static inline void sugov_cpu_load_update(struct sugov_cpu *sg_cpu, u64 time)
-+{
-+	u64 delta = min(LOAD_BLOCK(time) - LOAD_BLOCK(sg_cpu->load_stamp),
-+			SG_CPU_LOAD_HISTORY_BITS - 1);
-+	u64 prev = !!(sg_cpu->load_history & CURRENT_LOAD_BIT);
-+	u64 curr = !!cpu_rq(sg_cpu->cpu)->nr_running;
-+
-+	if (delta) {
-+		sg_cpu->load_history = sg_cpu->load_history >> delta;
-+
-+		if (delta <= SG_CPU_UTIL_SHIFT) {
-+			sg_cpu->load_block += (~BLOCK_MASK(sg_cpu->load_stamp)) * prev;
-+			if (!!LOAD_HALF_BLOCK(sg_cpu->load_block) ^ curr)
-+				sg_cpu->load_history ^= LOAD_BLOCK_BIT(delta);
-+		}
-+
-+		sg_cpu->load_block = BLOCK_MASK(time) * prev;
-+	} else {
-+		sg_cpu->load_block += (time - sg_cpu->load_stamp) * prev;
-+	}
-+	if (prev ^ curr)
-+		sg_cpu->load_history ^= CURRENT_LOAD_BIT;
-+	sg_cpu->load_stamp = time;
-+}
++	sg_cpu->util = rq_load_util(rq, max);
 +#endif /* CONFIG_SCHED_ALT */
-+
+ }
+ 
  /**
-  * sugov_iowait_reset() - Reset the IO boost status of a CPU.
-  * @sg_cpu: the sugov data for the CPU to boost
-@@ -312,13 +369,19 @@ static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
+@@ -312,8 +317,10 @@ static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
   */
  static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu)
  {
@@ -8856,27 +8874,7 @@ index 57124614363d..4057e51cef45 100644
  }
  
  static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu,
- 					      u64 time, unsigned int flags)
- {
-+#ifdef CONFIG_SCHED_ALT
-+	sugov_cpu_load_update(sg_cpu, time);
-+#endif /* CONFIG_SCHED_ALT */
-+
- 	sugov_iowait_boost(sg_cpu, time, flags);
- 	sg_cpu->last_update = time;
- 
-@@ -439,6 +502,10 @@ sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags)
- 
- 	raw_spin_lock(&sg_policy->update_lock);
- 
-+#ifdef CONFIG_SCHED_ALT
-+	sugov_cpu_load_update(sg_cpu, time);
-+#endif /* CONFIG_SCHED_ALT */
-+
- 	sugov_iowait_boost(sg_cpu, time, flags);
- 	sg_cpu->last_update = time;
- 
-@@ -599,6 +666,7 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy)
+@@ -599,6 +606,7 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy)
  	}
  
  	ret = sched_setattr_nocheck(thread, &attr);
@@ -8884,7 +8882,7 @@ index 57124614363d..4057e51cef45 100644
  	if (ret) {
  		kthread_stop(thread);
  		pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__);
-@@ -833,7 +901,9 @@ cpufreq_governor_init(schedutil_gov);
+@@ -833,7 +841,9 @@ cpufreq_governor_init(schedutil_gov);
  #ifdef CONFIG_ENERGY_MODEL
  static void rebuild_sd_workfn(struct work_struct *work)
  {
author	Mike Pagano <mpagano@gentoo.org>	2021-10-03 15:14:20 -0400
committer	Mike Pagano <mpagano@gentoo.org>	2021-10-03 15:14:20 -0400
commit	3840a675683c2df1aea2f9efed23617ce7eb9e01 (patch)
tree	0672bd20b29ae74c914f64c922f4fe5b977ded58
parent	Linux patch 5.14.9 (diff)
download	linux-patches-3840a675.tar.gz linux-patches-3840a675.tar.bz2 linux-patches-3840a675.zip