Bump BMQ Patch to 6.1-r4

Signed-off-by: Mike Pagano <mpagano@gentoo.org>
author: Mike Pagano <mpagano@gentoo.org> 2023-02-09 07:39:52 -0500
committer: Mike Pagano <mpagano@gentoo.org> 2023-02-09 07:39:52 -0500
commit: aa99b5941813b8267a0dee6085aadda60385aede (patch)
tree: ef52ec86bc453dacd86e87cc60b576cd8b4a7e20
parent: Linux patch 6.1.11 (diff)
download: linux-patches-aa99b594.tar.gz
linux-patches-aa99b594.tar.bz2
linux-patches-aa99b594.zip
2 files changed, 148 insertions, 70 deletions
diff --git a/0000_README b/0000_README
index 1fb22543..62ade3e6 100644
--- a/0000_README
+++ b/0000_README
@@ -139,6 +139,6 @@ Patch:  5010_enable-cpu-optimizations-universal.patch
 From:   https://github.com/graysky2/kernel_compiler_patch
 Desc:   Kernel >= 5.15 patch enables gcc = v11.1+ optimizations for additional CPUs.
 
-Patch:  5020_BMQ-and-PDS-io-scheduler-v6.1-r0.patch
+Patch:  5020_BMQ-and-PDS-io-scheduler-v6.1-r4.patch
 From:   https://gitlab.com/alfredchen/projectc
 Desc:   BMQ(BitMap Queue) Scheduler. A new CPU scheduler developed from PDS(incld). Inspired by the scheduler in zircon.
diff --git a/5020_BMQ-and-PDS-io-scheduler-v6.1-r0.patch b/5020_BMQ-and-PDS-io-scheduler-v6.1-r4.patch
index 783f3bca..7c2a77d3 100644
--- a/5020_BMQ-and-PDS-io-scheduler-v6.1-r0.patch
+++ b/5020_BMQ-and-PDS-io-scheduler-v6.1-r4.patch
@@ -369,7 +369,7 @@ index 94125d3b6893..c87ba766d354 100644
  
 +menuconfig SCHED_ALT
 +	bool "Alternative CPU Schedulers"
-+	default n
++	default y
 +	help
 +	  This feature enable alternative CPU scheduler"
 +
@@ -632,10 +632,10 @@ index 976092b7bd45..31d587c16ec1 100644
  obj-y += build_utility.o
 diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c
 new file mode 100644
-index 000000000000..4bea0c025475
+index 000000000000..a9e906b229eb
 --- /dev/null
 +++ b/kernel/sched/alt_core.c
-@@ -0,0 +1,7912 @@
+@@ -0,0 +1,7982 @@
 +/*
 + *  kernel/sched/alt_core.c
 + *
@@ -665,7 +665,6 @@ index 000000000000..4bea0c025475
 +#include <linux/init_task.h>
 +#include <linux/kcov.h>
 +#include <linux/kprobes.h>
-+#include <linux/profile.h>
 +#include <linux/nmi.h>
 +#include <linux/scs.h>
 +
@@ -706,7 +705,7 @@ index 000000000000..4bea0c025475
 +#define sched_feat(x)	(0)
 +#endif /* CONFIG_SCHED_DEBUG */
 +
-+#define ALT_SCHED_VERSION "v6.1-r0"
++#define ALT_SCHED_VERSION "v6.1-r4"
 +
 +/* rt_prio(prio) defined in include/linux/sched/rt.h */
 +#define rt_task(p)		rt_prio((p)->prio)
@@ -786,7 +785,8 @@ index 000000000000..4bea0c025475
 +#ifdef CONFIG_SCHED_SMT
 +static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp;
 +#endif
-+static cpumask_t sched_rq_watermark[SCHED_QUEUE_BITS] ____cacheline_aligned_in_smp;
++static cpumask_t sched_preempt_mask[SCHED_QUEUE_BITS] ____cacheline_aligned_in_smp;
++static cpumask_t *const sched_idle_mask = &sched_preempt_mask[0];
 +
 +/* sched_queue related functions */
 +static inline void sched_queue_init(struct sched_queue *q)
@@ -810,44 +810,66 @@ index 000000000000..4bea0c025475
 +	list_add(&idle->sq_node, &q->heads[idle->sq_idx]);
 +}
 +
++static inline void
++clear_recorded_preempt_mask(int pr, int low, int high, int cpu)
++{
++	if (low < pr && pr <= high)
++		cpumask_clear_cpu(cpu, sched_preempt_mask + SCHED_QUEUE_BITS - pr);
++}
++
++static inline void
++set_recorded_preempt_mask(int pr, int low, int high, int cpu)
++{
++	if (low < pr && pr <= high)
++		cpumask_set_cpu(cpu, sched_preempt_mask + SCHED_QUEUE_BITS - pr);
++}
++
++static atomic_t sched_prio_record = ATOMIC_INIT(0);
++
 +/* water mark related functions */
-+static inline void update_sched_rq_watermark(struct rq *rq)
++static inline void update_sched_preempt_mask(struct rq *rq)
 +{
-+	unsigned long watermark = find_first_bit(rq->queue.bitmap, SCHED_QUEUE_BITS);
-+	unsigned long last_wm = rq->watermark;
-+	unsigned long i;
-+	int cpu;
++	unsigned long prio = find_first_bit(rq->queue.bitmap, SCHED_QUEUE_BITS);
++	unsigned long last_prio = rq->prio;
++	int cpu, pr;
 +
-+	if (watermark == last_wm)
++	if (prio == last_prio)
 +		return;
 +
-+	rq->watermark = watermark;
++	rq->prio = prio;
 +	cpu = cpu_of(rq);
-+	if (watermark < last_wm) {
-+		for (i = last_wm; i > watermark; i--)
-+			cpumask_clear_cpu(cpu, sched_rq_watermark + SCHED_QUEUE_BITS - i);
++	pr = atomic_read(&sched_prio_record);
++
++	if (prio < last_prio) {
++		if (IDLE_TASK_SCHED_PRIO == last_prio) {
++			cpumask_clear_cpu(cpu, sched_idle_mask);
++			last_prio -= 2;
 +#ifdef CONFIG_SCHED_SMT
-+		if (static_branch_likely(&sched_smt_present) &&
-+		    IDLE_TASK_SCHED_PRIO == last_wm)
-+			cpumask_andnot(&sched_sg_idle_mask,
-+				       &sched_sg_idle_mask, cpu_smt_mask(cpu));
++			if (static_branch_likely(&sched_smt_present))
++				cpumask_andnot(&sched_sg_idle_mask,
++					       &sched_sg_idle_mask, cpu_smt_mask(cpu));
 +#endif
++		}
++		clear_recorded_preempt_mask(pr, prio, last_prio, cpu);
++
 +		return;
 +	}
-+	/* last_wm < watermark */
-+	for (i = watermark; i > last_wm; i--)
-+		cpumask_set_cpu(cpu, sched_rq_watermark + SCHED_QUEUE_BITS - i);
++	/* last_prio < prio */
++	if (IDLE_TASK_SCHED_PRIO == prio) {
++		cpumask_set_cpu(cpu, sched_idle_mask);
++		prio -= 2;
 +#ifdef CONFIG_SCHED_SMT
-+	if (static_branch_likely(&sched_smt_present) &&
-+	    IDLE_TASK_SCHED_PRIO == watermark) {
-+		cpumask_t tmp;
++		if (static_branch_likely(&sched_smt_present)) {
++			cpumask_t tmp;
 +
-+		cpumask_and(&tmp, cpu_smt_mask(cpu), sched_rq_watermark);
-+		if (cpumask_equal(&tmp, cpu_smt_mask(cpu)))
-+			cpumask_or(&sched_sg_idle_mask,
-+				   &sched_sg_idle_mask, cpu_smt_mask(cpu));
-+	}
++			cpumask_and(&tmp, cpu_smt_mask(cpu), sched_idle_mask);
++			if (cpumask_equal(&tmp, cpu_smt_mask(cpu)))
++				cpumask_or(&sched_sg_idle_mask,
++					   &sched_sg_idle_mask, cpu_smt_mask(cpu));
++		}
 +#endif
++	}
++	set_recorded_preempt_mask(pr, last_prio, prio, cpu);
 +}
 +
 +/*
@@ -1369,8 +1391,8 @@ index 000000000000..4bea0c025475
 + * Context: rq->lock
 + */
 +#define __SCHED_DEQUEUE_TASK(p, rq, flags)					\
-+	psi_dequeue(p, flags & DEQUEUE_SLEEP);					\
 +	sched_info_dequeue(rq, p);						\
++	psi_dequeue(p, flags & DEQUEUE_SLEEP);					\
 +										\
 +	list_del(&p->sq_node);							\
 +	if (list_empty(&rq->queue.heads[p->sq_idx])) 				\
@@ -1378,7 +1400,7 @@ index 000000000000..4bea0c025475
 +
 +#define __SCHED_ENQUEUE_TASK(p, rq, flags)				\
 +	sched_info_enqueue(rq, p);					\
-+	psi_enqueue(p, flags);						\
++	psi_enqueue(p, flags & ENQUEUE_WAKEUP);				\
 +									\
 +	p->sq_idx = task_sched_prio_idx(p, rq);				\
 +	list_add_tail(&p->sq_node, &rq->queue.heads[p->sq_idx]);	\
@@ -1411,7 +1433,7 @@ index 000000000000..4bea0c025475
 +		  task_cpu(p), cpu_of(rq));
 +
 +	__SCHED_ENQUEUE_TASK(p, rq, flags);
-+	update_sched_rq_watermark(rq);
++	update_sched_preempt_mask(rq);
 +	++rq->nr_running;
 +#ifdef CONFIG_SMP
 +	if (2 == rq->nr_running)
@@ -1436,7 +1458,7 @@ index 000000000000..4bea0c025475
 +				  rq->queue.bitmap);
 +		p->sq_idx = idx;
 +		set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap);
-+		update_sched_rq_watermark(rq);
++		update_sched_preempt_mask(rq);
 +	}
 +}
 +
@@ -2007,11 +2029,13 @@ index 000000000000..4bea0c025475
 +
 +	WARN_ON_ONCE(is_migration_disabled(p));
 +#endif
-+	if (task_cpu(p) == new_cpu)
-+		return;
 +	trace_sched_migrate_task(p, new_cpu);
-+	rseq_migrate(p);
-+	perf_event_task_migrate(p);
++
++	if (task_cpu(p) != new_cpu)
++	{
++		rseq_migrate(p);
++		perf_event_task_migrate(p);
++	}
 +
 +	__set_task_cpu(p, new_cpu);
 +}
@@ -2163,7 +2187,7 @@ index 000000000000..4bea0c025475
 +
 +	WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING);
 +	dequeue_task(p, rq, 0);
-+	update_sched_rq_watermark(rq);
++	update_sched_preempt_mask(rq);
 +	set_task_cpu(p, new_cpu);
 +	raw_spin_unlock(&rq->lock);
 +
@@ -2525,23 +2549,50 @@ index 000000000000..4bea0c025475
 +	return dest_cpu;
 +}
 +
++static inline void
++sched_preempt_mask_flush(cpumask_t *mask, int prio)
++{
++	int cpu;
++
++	cpumask_copy(mask, sched_idle_mask);
++
++	for_each_cpu_not(cpu, mask) {
++		if (prio < cpu_rq(cpu)->prio)
++			cpumask_set_cpu(cpu, mask);
++	}
++}
++
++static inline int
++preempt_mask_check(struct task_struct *p, cpumask_t *allow_mask, cpumask_t *preempt_mask)
++{
++	int task_prio = task_sched_prio(p);
++	cpumask_t *mask = sched_preempt_mask + SCHED_QUEUE_BITS - 1 - task_prio;
++	int pr = atomic_read(&sched_prio_record);
++
++	if (pr != task_prio) {
++		sched_preempt_mask_flush(mask, task_prio);
++		atomic_set(&sched_prio_record, task_prio);
++	}
++
++	return cpumask_and(preempt_mask, allow_mask, mask);
++}
++
 +static inline int select_task_rq(struct task_struct *p)
 +{
-+	cpumask_t chk_mask, tmp;
++	cpumask_t allow_mask, mask;
 +
-+	if (unlikely(!cpumask_and(&chk_mask, p->cpus_ptr, cpu_active_mask)))
++	if (unlikely(!cpumask_and(&allow_mask, p->cpus_ptr, cpu_active_mask)))
 +		return select_fallback_rq(task_cpu(p), p);
 +
 +	if (
 +#ifdef CONFIG_SCHED_SMT
-+	    cpumask_and(&tmp, &chk_mask, &sched_sg_idle_mask) ||
++	    cpumask_and(&mask, &allow_mask, &sched_sg_idle_mask) ||
 +#endif
-+	    cpumask_and(&tmp, &chk_mask, sched_rq_watermark) ||
-+	    cpumask_and(&tmp, &chk_mask,
-+			sched_rq_watermark + SCHED_QUEUE_BITS - 1 - task_sched_prio(p)))
-+		return best_mask_cpu(task_cpu(p), &tmp);
++	    cpumask_and(&mask, &allow_mask, sched_idle_mask) ||
++	    preempt_mask_check(p, &allow_mask, &mask))
++		return best_mask_cpu(task_cpu(p), &mask);
 +
-+	return best_mask_cpu(task_cpu(p), &chk_mask);
++	return best_mask_cpu(task_cpu(p), &allow_mask);
 +}
 +
 +void sched_set_stop_task(int cpu, struct task_struct *stop)
@@ -4678,12 +4729,12 @@ index 000000000000..4bea0c025475
 +	 * find potential cpus which can migrate the current running task
 +	 */
 +	if (cpumask_test_cpu(cpu, &sched_sg_idle_mask) &&
-+	    cpumask_andnot(&chk, cpu_online_mask, sched_rq_watermark) &&
++	    cpumask_andnot(&chk, cpu_online_mask, sched_idle_mask) &&
 +	    cpumask_andnot(&chk, &chk, &sched_rq_pending_mask)) {
 +		int i;
 +
 +		for_each_cpu_wrap(i, &chk, cpu) {
-+			if (cpumask_subset(cpu_smt_mask(i), &chk) &&
++			if (!cpumask_intersects(cpu_smt_mask(i), sched_idle_mask) &&\
 +			    sg_balance_trigger(i))
 +				return;
 +		}
@@ -4806,6 +4857,7 @@ index 000000000000..4bea0c025475
 +static void sched_tick_stop(int cpu)
 +{
 +	struct tick_work *twork;
++	int os;
 +
 +	if (housekeeping_cpu(cpu, HK_TYPE_TICK))
 +		return;
@@ -4813,7 +4865,10 @@ index 000000000000..4bea0c025475
 +	WARN_ON_ONCE(!tick_work_cpu);
 +
 +	twork = per_cpu_ptr(tick_work_cpu, cpu);
-+	cancel_delayed_work_sync(&twork->work);
++	/* There cannot be competing actions, but don't rely on stop-machine. */
++	os = atomic_xchg(&twork->state, TICK_SCHED_REMOTE_OFFLINING);
++	WARN_ON_ONCE(os != TICK_SCHED_REMOTE_RUNNING);
++	/* Don't cancel, as this would mess up the state machine. */
 +}
 +#endif /* CONFIG_HOTPLUG_CPU */
 +
@@ -4988,7 +5043,7 @@ index 000000000000..4bea0c025475
 +{
 +	printk(KERN_INFO "sched: pending: 0x%04lx, idle: 0x%04lx, sg_idle: 0x%04lx\n",
 +	       sched_rq_pending_mask.bits[0],
-+	       sched_rq_watermark[0].bits[0],
++	       sched_idle_mask->bits[0],
 +	       sched_sg_idle_mask.bits[0]);
 +}
 +#else
@@ -5060,15 +5115,15 @@ index 000000000000..4bea0c025475
 +				if (src_rq->nr_running < 2)
 +					cpumask_clear_cpu(i, &sched_rq_pending_mask);
 +
++				spin_release(&src_rq->lock.dep_map, _RET_IP_);
++				do_raw_spin_unlock(&src_rq->lock);
++
 +				rq->nr_running += nr_migrated;
 +				if (rq->nr_running > 1)
 +					cpumask_set_cpu(cpu, &sched_rq_pending_mask);
 +
 +				cpufreq_update_util(rq, 0);
 +
-+				spin_release(&src_rq->lock.dep_map, _RET_IP_);
-+				do_raw_spin_unlock(&src_rq->lock);
-+
 +				return 1;
 +			}
 +
@@ -5097,7 +5152,7 @@ index 000000000000..4bea0c025475
 +}
 +
 +static inline struct task_struct *
-+choose_next_task(struct rq *rq, int cpu, struct task_struct *prev)
++choose_next_task(struct rq *rq, int cpu)
 +{
 +	struct task_struct *next;
 +
@@ -5254,7 +5309,7 @@ index 000000000000..4bea0c025475
 +			prev->sched_contributes_to_load =
 +				(prev_state & TASK_UNINTERRUPTIBLE) &&
 +				!(prev_state & TASK_NOLOAD) &&
-+				!(prev->flags & TASK_FROZEN);
++				!(prev_state & TASK_FROZEN);
 +
 +			if (prev->sched_contributes_to_load)
 +				rq->nr_uninterruptible++;
@@ -5284,7 +5339,7 @@ index 000000000000..4bea0c025475
 +
 +	check_curr(prev, rq);
 +
-+	next = choose_next_task(rq, cpu, prev);
++	next = choose_next_task(rq, cpu);
 +	clear_tsk_need_resched(prev);
 +	clear_preempt_need_resched();
 +#ifdef CONFIG_SCHED_DEBUG
@@ -5293,7 +5348,7 @@ index 000000000000..4bea0c025475
 +
 +	if (likely(prev != next)) {
 +		if (deactivated)
-+			update_sched_rq_watermark(rq);
++			update_sched_preempt_mask(rq);
 +		next->last_ran = rq->clock_task;
 +		rq->last_ts_switch = rq->clock;
 +
@@ -5714,6 +5769,7 @@ index 000000000000..4bea0c025475
 +		return;
 +
 +	rq = __task_access_lock(p, &lock);
++	update_rq_clock(rq);
 +	/*
 +	 * Set under pi_lock && rq->lock, such that the value can be used under
 +	 * either lock.
@@ -6593,6 +6649,13 @@ index 000000000000..4bea0c025475
 +	return retval;
 +}
 +
++#ifdef CONFIG_SMP
++int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask)
++{
++	return 0;
++}
++#endif
++
 +static int
 +__sched_setaffinity(struct task_struct *p, const struct cpumask *mask)
 +{
@@ -7431,7 +7494,6 @@ index 000000000000..4bea0c025475
 +
 +	raw_spin_lock_irqsave(&idle->pi_lock, flags);
 +	raw_spin_lock(&rq->lock);
-+	update_rq_clock(rq);
 +
 +	idle->last_ran = rq->clock_task;
 +	idle->__state = TASK_RUNNING;
@@ -7978,6 +8040,14 @@ index 000000000000..4bea0c025475
 +
 +	sched_smp_initialized = true;
 +}
++
++static int __init migration_init(void)
++{
++	sched_cpu_starting(smp_processor_id());
++	return 0;
++}
++early_initcall(migration_init);
++
 +#else
 +void __init sched_init_smp(void)
 +{
@@ -8030,7 +8100,7 @@ index 000000000000..4bea0c025475
 +
 +#ifdef CONFIG_SMP
 +	for (i = 0; i < SCHED_QUEUE_BITS; i++)
-+		cpumask_copy(sched_rq_watermark + i, cpu_present_mask);
++		cpumask_copy(sched_preempt_mask + i, cpu_present_mask);
 +#endif
 +
 +#ifdef CONFIG_CGROUP_SCHED
@@ -8044,7 +8114,7 @@ index 000000000000..4bea0c025475
 +		rq = cpu_rq(i);
 +
 +		sched_queue_init(&rq->queue);
-+		rq->watermark = IDLE_TASK_SCHED_PRIO;
++		rq->prio = IDLE_TASK_SCHED_PRIO;
 +		rq->skip = NULL;
 +
 +		raw_spin_lock_init(&rq->lock);
@@ -8587,14 +8657,15 @@ index 000000000000..1212a031700e
 +{}
 diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h
 new file mode 100644
-index 000000000000..7c1cc0cbca0d
+index 000000000000..c32403ed82b6
 --- /dev/null
 +++ b/kernel/sched/alt_sched.h
-@@ -0,0 +1,660 @@
+@@ -0,0 +1,668 @@
 +#ifndef ALT_SCHED_H
 +#define ALT_SCHED_H
 +
 +#include <linux/context_tracking.h>
++#include <linux/profile.h>
 +#include <linux/psi.h>
 +#include <linux/stop_machine.h>
 +#include <linux/syscalls.h>
@@ -8732,7 +8803,7 @@ index 000000000000..7c1cc0cbca0d
 +#ifdef CONFIG_SCHED_PDS
 +	u64			time_edge;
 +#endif
-+	unsigned long watermark;
++	unsigned long prio;
 +
 +	/* switch count */
 +	u64 nr_switches;
@@ -8997,17 +9068,24 @@ index 000000000000..7c1cc0cbca0d
 +}
 +
 +static inline void
-+rq_unlock_irq(struct rq *rq, struct rq_flags *rf)
++rq_unlock(struct rq *rq, struct rq_flags *rf)
 +	__releases(rq->lock)
 +{
-+	raw_spin_unlock_irq(&rq->lock);
++	raw_spin_unlock(&rq->lock);
 +}
 +
 +static inline void
-+rq_unlock(struct rq *rq, struct rq_flags *rf)
++rq_lock_irq(struct rq *rq, struct rq_flags *rf)
++	__acquires(rq->lock)
++{
++	raw_spin_lock_irq(&rq->lock);
++}
++
++static inline void
++rq_unlock_irq(struct rq *rq, struct rq_flags *rf)
 +	__releases(rq->lock)
 +{
-+	raw_spin_unlock(&rq->lock);
++	raw_spin_unlock_irq(&rq->lock);
 +}
 +
 +static inline struct rq *
author	Mike Pagano <mpagano@gentoo.org>	2023-02-09 07:39:52 -0500
committer	Mike Pagano <mpagano@gentoo.org>	2023-02-09 07:39:52 -0500
commit	aa99b5941813b8267a0dee6085aadda60385aede (patch)
tree	ef52ec86bc453dacd86e87cc60b576cd8b4a7e20
parent	Linux patch 6.1.11 (diff)
download	linux-patches-aa99b594.tar.gz linux-patches-aa99b594.tar.bz2 linux-patches-aa99b594.zip