summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pagano <mpagano@gentoo.org>2023-02-09 07:39:52 -0500
committerMike Pagano <mpagano@gentoo.org>2023-02-09 07:39:52 -0500
commitaa99b5941813b8267a0dee6085aadda60385aede (patch)
treeef52ec86bc453dacd86e87cc60b576cd8b4a7e20
parentLinux patch 6.1.11 (diff)
downloadlinux-patches-aa99b594.tar.gz
linux-patches-aa99b594.tar.bz2
linux-patches-aa99b594.zip
Bump BMQ Patch to 6.1-r4
Signed-off-by: Mike Pagano <mpagano@gentoo.org>
-rw-r--r--0000_README2
-rw-r--r--5020_BMQ-and-PDS-io-scheduler-v6.1-r4.patch (renamed from 5020_BMQ-and-PDS-io-scheduler-v6.1-r0.patch)216
2 files changed, 148 insertions, 70 deletions
diff --git a/0000_README b/0000_README
index 1fb22543..62ade3e6 100644
--- a/0000_README
+++ b/0000_README
@@ -139,6 +139,6 @@ Patch: 5010_enable-cpu-optimizations-universal.patch
From: https://github.com/graysky2/kernel_compiler_patch
Desc: Kernel >= 5.15 patch enables gcc = v11.1+ optimizations for additional CPUs.
-Patch: 5020_BMQ-and-PDS-io-scheduler-v6.1-r0.patch
+Patch: 5020_BMQ-and-PDS-io-scheduler-v6.1-r4.patch
From: https://gitlab.com/alfredchen/projectc
Desc: BMQ(BitMap Queue) Scheduler. A new CPU scheduler developed from PDS(incld). Inspired by the scheduler in zircon.
diff --git a/5020_BMQ-and-PDS-io-scheduler-v6.1-r0.patch b/5020_BMQ-and-PDS-io-scheduler-v6.1-r4.patch
index 783f3bca..7c2a77d3 100644
--- a/5020_BMQ-and-PDS-io-scheduler-v6.1-r0.patch
+++ b/5020_BMQ-and-PDS-io-scheduler-v6.1-r4.patch
@@ -369,7 +369,7 @@ index 94125d3b6893..c87ba766d354 100644
+menuconfig SCHED_ALT
+ bool "Alternative CPU Schedulers"
-+ default n
++ default y
+ help
+ This feature enable alternative CPU scheduler"
+
@@ -632,10 +632,10 @@ index 976092b7bd45..31d587c16ec1 100644
obj-y += build_utility.o
diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c
new file mode 100644
-index 000000000000..4bea0c025475
+index 000000000000..a9e906b229eb
--- /dev/null
+++ b/kernel/sched/alt_core.c
-@@ -0,0 +1,7912 @@
+@@ -0,0 +1,7982 @@
+/*
+ * kernel/sched/alt_core.c
+ *
@@ -665,7 +665,6 @@ index 000000000000..4bea0c025475
+#include <linux/init_task.h>
+#include <linux/kcov.h>
+#include <linux/kprobes.h>
-+#include <linux/profile.h>
+#include <linux/nmi.h>
+#include <linux/scs.h>
+
@@ -706,7 +705,7 @@ index 000000000000..4bea0c025475
+#define sched_feat(x) (0)
+#endif /* CONFIG_SCHED_DEBUG */
+
-+#define ALT_SCHED_VERSION "v6.1-r0"
++#define ALT_SCHED_VERSION "v6.1-r4"
+
+/* rt_prio(prio) defined in include/linux/sched/rt.h */
+#define rt_task(p) rt_prio((p)->prio)
@@ -786,7 +785,8 @@ index 000000000000..4bea0c025475
+#ifdef CONFIG_SCHED_SMT
+static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp;
+#endif
-+static cpumask_t sched_rq_watermark[SCHED_QUEUE_BITS] ____cacheline_aligned_in_smp;
++static cpumask_t sched_preempt_mask[SCHED_QUEUE_BITS] ____cacheline_aligned_in_smp;
++static cpumask_t *const sched_idle_mask = &sched_preempt_mask[0];
+
+/* sched_queue related functions */
+static inline void sched_queue_init(struct sched_queue *q)
@@ -810,44 +810,66 @@ index 000000000000..4bea0c025475
+ list_add(&idle->sq_node, &q->heads[idle->sq_idx]);
+}
+
++static inline void
++clear_recorded_preempt_mask(int pr, int low, int high, int cpu)
++{
++ if (low < pr && pr <= high)
++ cpumask_clear_cpu(cpu, sched_preempt_mask + SCHED_QUEUE_BITS - pr);
++}
++
++static inline void
++set_recorded_preempt_mask(int pr, int low, int high, int cpu)
++{
++ if (low < pr && pr <= high)
++ cpumask_set_cpu(cpu, sched_preempt_mask + SCHED_QUEUE_BITS - pr);
++}
++
++static atomic_t sched_prio_record = ATOMIC_INIT(0);
++
+/* water mark related functions */
-+static inline void update_sched_rq_watermark(struct rq *rq)
++static inline void update_sched_preempt_mask(struct rq *rq)
+{
-+ unsigned long watermark = find_first_bit(rq->queue.bitmap, SCHED_QUEUE_BITS);
-+ unsigned long last_wm = rq->watermark;
-+ unsigned long i;
-+ int cpu;
++ unsigned long prio = find_first_bit(rq->queue.bitmap, SCHED_QUEUE_BITS);
++ unsigned long last_prio = rq->prio;
++ int cpu, pr;
+
-+ if (watermark == last_wm)
++ if (prio == last_prio)
+ return;
+
-+ rq->watermark = watermark;
++ rq->prio = prio;
+ cpu = cpu_of(rq);
-+ if (watermark < last_wm) {
-+ for (i = last_wm; i > watermark; i--)
-+ cpumask_clear_cpu(cpu, sched_rq_watermark + SCHED_QUEUE_BITS - i);
++ pr = atomic_read(&sched_prio_record);
++
++ if (prio < last_prio) {
++ if (IDLE_TASK_SCHED_PRIO == last_prio) {
++ cpumask_clear_cpu(cpu, sched_idle_mask);
++ last_prio -= 2;
+#ifdef CONFIG_SCHED_SMT
-+ if (static_branch_likely(&sched_smt_present) &&
-+ IDLE_TASK_SCHED_PRIO == last_wm)
-+ cpumask_andnot(&sched_sg_idle_mask,
-+ &sched_sg_idle_mask, cpu_smt_mask(cpu));
++ if (static_branch_likely(&sched_smt_present))
++ cpumask_andnot(&sched_sg_idle_mask,
++ &sched_sg_idle_mask, cpu_smt_mask(cpu));
+#endif
++ }
++ clear_recorded_preempt_mask(pr, prio, last_prio, cpu);
++
+ return;
+ }
-+ /* last_wm < watermark */
-+ for (i = watermark; i > last_wm; i--)
-+ cpumask_set_cpu(cpu, sched_rq_watermark + SCHED_QUEUE_BITS - i);
++ /* last_prio < prio */
++ if (IDLE_TASK_SCHED_PRIO == prio) {
++ cpumask_set_cpu(cpu, sched_idle_mask);
++ prio -= 2;
+#ifdef CONFIG_SCHED_SMT
-+ if (static_branch_likely(&sched_smt_present) &&
-+ IDLE_TASK_SCHED_PRIO == watermark) {
-+ cpumask_t tmp;
++ if (static_branch_likely(&sched_smt_present)) {
++ cpumask_t tmp;
+
-+ cpumask_and(&tmp, cpu_smt_mask(cpu), sched_rq_watermark);
-+ if (cpumask_equal(&tmp, cpu_smt_mask(cpu)))
-+ cpumask_or(&sched_sg_idle_mask,
-+ &sched_sg_idle_mask, cpu_smt_mask(cpu));
-+ }
++ cpumask_and(&tmp, cpu_smt_mask(cpu), sched_idle_mask);
++ if (cpumask_equal(&tmp, cpu_smt_mask(cpu)))
++ cpumask_or(&sched_sg_idle_mask,
++ &sched_sg_idle_mask, cpu_smt_mask(cpu));
++ }
+#endif
++ }
++ set_recorded_preempt_mask(pr, last_prio, prio, cpu);
+}
+
+/*
@@ -1369,8 +1391,8 @@ index 000000000000..4bea0c025475
+ * Context: rq->lock
+ */
+#define __SCHED_DEQUEUE_TASK(p, rq, flags) \
-+ psi_dequeue(p, flags & DEQUEUE_SLEEP); \
+ sched_info_dequeue(rq, p); \
++ psi_dequeue(p, flags & DEQUEUE_SLEEP); \
+ \
+ list_del(&p->sq_node); \
+ if (list_empty(&rq->queue.heads[p->sq_idx])) \
@@ -1378,7 +1400,7 @@ index 000000000000..4bea0c025475
+
+#define __SCHED_ENQUEUE_TASK(p, rq, flags) \
+ sched_info_enqueue(rq, p); \
-+ psi_enqueue(p, flags); \
++ psi_enqueue(p, flags & ENQUEUE_WAKEUP); \
+ \
+ p->sq_idx = task_sched_prio_idx(p, rq); \
+ list_add_tail(&p->sq_node, &rq->queue.heads[p->sq_idx]); \
@@ -1411,7 +1433,7 @@ index 000000000000..4bea0c025475
+ task_cpu(p), cpu_of(rq));
+
+ __SCHED_ENQUEUE_TASK(p, rq, flags);
-+ update_sched_rq_watermark(rq);
++ update_sched_preempt_mask(rq);
+ ++rq->nr_running;
+#ifdef CONFIG_SMP
+ if (2 == rq->nr_running)
@@ -1436,7 +1458,7 @@ index 000000000000..4bea0c025475
+ rq->queue.bitmap);
+ p->sq_idx = idx;
+ set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap);
-+ update_sched_rq_watermark(rq);
++ update_sched_preempt_mask(rq);
+ }
+}
+
@@ -2007,11 +2029,13 @@ index 000000000000..4bea0c025475
+
+ WARN_ON_ONCE(is_migration_disabled(p));
+#endif
-+ if (task_cpu(p) == new_cpu)
-+ return;
+ trace_sched_migrate_task(p, new_cpu);
-+ rseq_migrate(p);
-+ perf_event_task_migrate(p);
++
++ if (task_cpu(p) != new_cpu)
++ {
++ rseq_migrate(p);
++ perf_event_task_migrate(p);
++ }
+
+ __set_task_cpu(p, new_cpu);
+}
@@ -2163,7 +2187,7 @@ index 000000000000..4bea0c025475
+
+ WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING);
+ dequeue_task(p, rq, 0);
-+ update_sched_rq_watermark(rq);
++ update_sched_preempt_mask(rq);
+ set_task_cpu(p, new_cpu);
+ raw_spin_unlock(&rq->lock);
+
@@ -2525,23 +2549,50 @@ index 000000000000..4bea0c025475
+ return dest_cpu;
+}
+
++static inline void
++sched_preempt_mask_flush(cpumask_t *mask, int prio)
++{
++ int cpu;
++
++ cpumask_copy(mask, sched_idle_mask);
++
++ for_each_cpu_not(cpu, mask) {
++ if (prio < cpu_rq(cpu)->prio)
++ cpumask_set_cpu(cpu, mask);
++ }
++}
++
++static inline int
++preempt_mask_check(struct task_struct *p, cpumask_t *allow_mask, cpumask_t *preempt_mask)
++{
++ int task_prio = task_sched_prio(p);
++ cpumask_t *mask = sched_preempt_mask + SCHED_QUEUE_BITS - 1 - task_prio;
++ int pr = atomic_read(&sched_prio_record);
++
++ if (pr != task_prio) {
++ sched_preempt_mask_flush(mask, task_prio);
++ atomic_set(&sched_prio_record, task_prio);
++ }
++
++ return cpumask_and(preempt_mask, allow_mask, mask);
++}
++
+static inline int select_task_rq(struct task_struct *p)
+{
-+ cpumask_t chk_mask, tmp;
++ cpumask_t allow_mask, mask;
+
-+ if (unlikely(!cpumask_and(&chk_mask, p->cpus_ptr, cpu_active_mask)))
++ if (unlikely(!cpumask_and(&allow_mask, p->cpus_ptr, cpu_active_mask)))
+ return select_fallback_rq(task_cpu(p), p);
+
+ if (
+#ifdef CONFIG_SCHED_SMT
-+ cpumask_and(&tmp, &chk_mask, &sched_sg_idle_mask) ||
++ cpumask_and(&mask, &allow_mask, &sched_sg_idle_mask) ||
+#endif
-+ cpumask_and(&tmp, &chk_mask, sched_rq_watermark) ||
-+ cpumask_and(&tmp, &chk_mask,
-+ sched_rq_watermark + SCHED_QUEUE_BITS - 1 - task_sched_prio(p)))
-+ return best_mask_cpu(task_cpu(p), &tmp);
++ cpumask_and(&mask, &allow_mask, sched_idle_mask) ||
++ preempt_mask_check(p, &allow_mask, &mask))
++ return best_mask_cpu(task_cpu(p), &mask);
+
-+ return best_mask_cpu(task_cpu(p), &chk_mask);
++ return best_mask_cpu(task_cpu(p), &allow_mask);
+}
+
+void sched_set_stop_task(int cpu, struct task_struct *stop)
@@ -4678,12 +4729,12 @@ index 000000000000..4bea0c025475
+ * find potential cpus which can migrate the current running task
+ */
+ if (cpumask_test_cpu(cpu, &sched_sg_idle_mask) &&
-+ cpumask_andnot(&chk, cpu_online_mask, sched_rq_watermark) &&
++ cpumask_andnot(&chk, cpu_online_mask, sched_idle_mask) &&
+ cpumask_andnot(&chk, &chk, &sched_rq_pending_mask)) {
+ int i;
+
+ for_each_cpu_wrap(i, &chk, cpu) {
-+ if (cpumask_subset(cpu_smt_mask(i), &chk) &&
++ if (!cpumask_intersects(cpu_smt_mask(i), sched_idle_mask) &&\
+ sg_balance_trigger(i))
+ return;
+ }
@@ -4806,6 +4857,7 @@ index 000000000000..4bea0c025475
+static void sched_tick_stop(int cpu)
+{
+ struct tick_work *twork;
++ int os;
+
+ if (housekeeping_cpu(cpu, HK_TYPE_TICK))
+ return;
@@ -4813,7 +4865,10 @@ index 000000000000..4bea0c025475
+ WARN_ON_ONCE(!tick_work_cpu);
+
+ twork = per_cpu_ptr(tick_work_cpu, cpu);
-+ cancel_delayed_work_sync(&twork->work);
++ /* There cannot be competing actions, but don't rely on stop-machine. */
++ os = atomic_xchg(&twork->state, TICK_SCHED_REMOTE_OFFLINING);
++ WARN_ON_ONCE(os != TICK_SCHED_REMOTE_RUNNING);
++ /* Don't cancel, as this would mess up the state machine. */
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
@@ -4988,7 +5043,7 @@ index 000000000000..4bea0c025475
+{
+ printk(KERN_INFO "sched: pending: 0x%04lx, idle: 0x%04lx, sg_idle: 0x%04lx\n",
+ sched_rq_pending_mask.bits[0],
-+ sched_rq_watermark[0].bits[0],
++ sched_idle_mask->bits[0],
+ sched_sg_idle_mask.bits[0]);
+}
+#else
@@ -5060,15 +5115,15 @@ index 000000000000..4bea0c025475
+ if (src_rq->nr_running < 2)
+ cpumask_clear_cpu(i, &sched_rq_pending_mask);
+
++ spin_release(&src_rq->lock.dep_map, _RET_IP_);
++ do_raw_spin_unlock(&src_rq->lock);
++
+ rq->nr_running += nr_migrated;
+ if (rq->nr_running > 1)
+ cpumask_set_cpu(cpu, &sched_rq_pending_mask);
+
+ cpufreq_update_util(rq, 0);
+
-+ spin_release(&src_rq->lock.dep_map, _RET_IP_);
-+ do_raw_spin_unlock(&src_rq->lock);
-+
+ return 1;
+ }
+
@@ -5097,7 +5152,7 @@ index 000000000000..4bea0c025475
+}
+
+static inline struct task_struct *
-+choose_next_task(struct rq *rq, int cpu, struct task_struct *prev)
++choose_next_task(struct rq *rq, int cpu)
+{
+ struct task_struct *next;
+
@@ -5254,7 +5309,7 @@ index 000000000000..4bea0c025475
+ prev->sched_contributes_to_load =
+ (prev_state & TASK_UNINTERRUPTIBLE) &&
+ !(prev_state & TASK_NOLOAD) &&
-+ !(prev->flags & TASK_FROZEN);
++ !(prev_state & TASK_FROZEN);
+
+ if (prev->sched_contributes_to_load)
+ rq->nr_uninterruptible++;
@@ -5284,7 +5339,7 @@ index 000000000000..4bea0c025475
+
+ check_curr(prev, rq);
+
-+ next = choose_next_task(rq, cpu, prev);
++ next = choose_next_task(rq, cpu);
+ clear_tsk_need_resched(prev);
+ clear_preempt_need_resched();
+#ifdef CONFIG_SCHED_DEBUG
@@ -5293,7 +5348,7 @@ index 000000000000..4bea0c025475
+
+ if (likely(prev != next)) {
+ if (deactivated)
-+ update_sched_rq_watermark(rq);
++ update_sched_preempt_mask(rq);
+ next->last_ran = rq->clock_task;
+ rq->last_ts_switch = rq->clock;
+
@@ -5714,6 +5769,7 @@ index 000000000000..4bea0c025475
+ return;
+
+ rq = __task_access_lock(p, &lock);
++ update_rq_clock(rq);
+ /*
+ * Set under pi_lock && rq->lock, such that the value can be used under
+ * either lock.
@@ -6593,6 +6649,13 @@ index 000000000000..4bea0c025475
+ return retval;
+}
+
++#ifdef CONFIG_SMP
++int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask)
++{
++ return 0;
++}
++#endif
++
+static int
+__sched_setaffinity(struct task_struct *p, const struct cpumask *mask)
+{
@@ -7431,7 +7494,6 @@ index 000000000000..4bea0c025475
+
+ raw_spin_lock_irqsave(&idle->pi_lock, flags);
+ raw_spin_lock(&rq->lock);
-+ update_rq_clock(rq);
+
+ idle->last_ran = rq->clock_task;
+ idle->__state = TASK_RUNNING;
@@ -7978,6 +8040,14 @@ index 000000000000..4bea0c025475
+
+ sched_smp_initialized = true;
+}
++
++static int __init migration_init(void)
++{
++ sched_cpu_starting(smp_processor_id());
++ return 0;
++}
++early_initcall(migration_init);
++
+#else
+void __init sched_init_smp(void)
+{
@@ -8030,7 +8100,7 @@ index 000000000000..4bea0c025475
+
+#ifdef CONFIG_SMP
+ for (i = 0; i < SCHED_QUEUE_BITS; i++)
-+ cpumask_copy(sched_rq_watermark + i, cpu_present_mask);
++ cpumask_copy(sched_preempt_mask + i, cpu_present_mask);
+#endif
+
+#ifdef CONFIG_CGROUP_SCHED
@@ -8044,7 +8114,7 @@ index 000000000000..4bea0c025475
+ rq = cpu_rq(i);
+
+ sched_queue_init(&rq->queue);
-+ rq->watermark = IDLE_TASK_SCHED_PRIO;
++ rq->prio = IDLE_TASK_SCHED_PRIO;
+ rq->skip = NULL;
+
+ raw_spin_lock_init(&rq->lock);
@@ -8587,14 +8657,15 @@ index 000000000000..1212a031700e
+{}
diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h
new file mode 100644
-index 000000000000..7c1cc0cbca0d
+index 000000000000..c32403ed82b6
--- /dev/null
+++ b/kernel/sched/alt_sched.h
-@@ -0,0 +1,660 @@
+@@ -0,0 +1,668 @@
+#ifndef ALT_SCHED_H
+#define ALT_SCHED_H
+
+#include <linux/context_tracking.h>
++#include <linux/profile.h>
+#include <linux/psi.h>
+#include <linux/stop_machine.h>
+#include <linux/syscalls.h>
@@ -8732,7 +8803,7 @@ index 000000000000..7c1cc0cbca0d
+#ifdef CONFIG_SCHED_PDS
+ u64 time_edge;
+#endif
-+ unsigned long watermark;
++ unsigned long prio;
+
+ /* switch count */
+ u64 nr_switches;
@@ -8997,17 +9068,24 @@ index 000000000000..7c1cc0cbca0d
+}
+
+static inline void
-+rq_unlock_irq(struct rq *rq, struct rq_flags *rf)
++rq_unlock(struct rq *rq, struct rq_flags *rf)
+ __releases(rq->lock)
+{
-+ raw_spin_unlock_irq(&rq->lock);
++ raw_spin_unlock(&rq->lock);
+}
+
+static inline void
-+rq_unlock(struct rq *rq, struct rq_flags *rf)
++rq_lock_irq(struct rq *rq, struct rq_flags *rf)
++ __acquires(rq->lock)
++{
++ raw_spin_lock_irq(&rq->lock);
++}
++
++static inline void
++rq_unlock_irq(struct rq *rq, struct rq_flags *rf)
+ __releases(rq->lock)
+{
-+ raw_spin_unlock(&rq->lock);
++ raw_spin_unlock_irq(&rq->lock);
+}
+
+static inline struct rq *