[PATCH] sched: add new SCHED_BATCH policy

Add a new SCHED_BATCH (3) scheduling policy: such tasks are presumed CPU-intensive, and will acquire a constant +5 priority level penalty. Such policy is nice for workloads that are non-interactive, but which do not want to give up their nice levels. The policy is also useful for workloads that want a deterministic scheduling policy without interactivity causing extra preemptions (between that workload's tasks). Signed-off-by: Ingo Molnar <mingo@elte.hu> Cc: Michael Kerrisk <mtk-manpages@gmx.net> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
author: Ingo Molnar <mingo@elte.hu> 2006-01-14 13:20:41 -0800
committer: Linus Torvalds <torvalds@g5.osdl.org> 2006-01-14 18:25:20 -0800
commit: b0a9499c3dd50d333e2aedb7e894873c58da3785 (patch)
tree: 1b9610020884091984ce8290c70bebdc3e7bb09b
parent: 2d0cfb527944c2cfee2cffab14f52d483e329fcf (diff)
download: linux-3.10-b0a9499c3dd50d333e2aedb7e894873c58da3785.tar.gz
linux-3.10-b0a9499c3dd50d333e2aedb7e894873c58da3785.tar.bz2
linux-3.10-b0a9499c3dd50d333e2aedb7e894873c58da3785.zip
3 files changed, 40 insertions, 19 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a72e1713542..2df1a1a2fee 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -160,6 +160,7 @@ extern unsigned long nr_iowait(void);
 #define SCHED_NORMAL		0
 #define SCHED_FIFO		1
 #define SCHED_RR		2
+#define SCHED_BATCH		3
 
 struct sched_param {
 	int sched_priority;
@@ -470,9 +471,9 @@ struct signal_struct {
 
 /*
  * Priority of a process goes from 0..MAX_PRIO-1, valid RT
- * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL tasks are
- * in the range MAX_RT_PRIO..MAX_PRIO-1. Priority values
- * are inverted: lower p->prio value means higher priority.
+ * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
+ * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
+ * values are inverted: lower p->prio value means higher priority.
  *
  * The MAX_USER_RT_PRIO value allows the actual maximum
  * RT priority to be separate from the value exported to
diff --git a/kernel/exit.c b/kernel/exit.c
index f8e609ff189..7fb541cb8d6 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -244,7 +244,9 @@ static inline void reparent_to_init(void)
 	/* Set the exit signal to SIGCHLD so we signal init on exit */
 	current->exit_signal = SIGCHLD;
 
-	if ((current->policy == SCHED_NORMAL) && (task_nice(current) < 0))
+	if ((current->policy == SCHED_NORMAL ||
+			current->policy == SCHED_BATCH)
+				&& (task_nice(current) < 0))
 		set_user_nice(current, 0);
 	/* cpus_allowed? */
 	/* rt_priority? */
diff --git a/kernel/sched.c b/kernel/sched.c
index c9dec2aa197..e1dc903d5a7 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -748,10 +748,14 @@ static int recalc_task_prio(task_t *p, unsigned long long now)
 	unsigned long long __sleep_time = now - p->timestamp;
 	unsigned long sleep_time;
 
-	if (__sleep_time > NS_MAX_SLEEP_AVG)
-		sleep_time = NS_MAX_SLEEP_AVG;
-	else
-		sleep_time = (unsigned long)__sleep_time;
+	if (unlikely(p->policy == SCHED_BATCH))
+		sleep_time = 0;
+	else {
+		if (__sleep_time > NS_MAX_SLEEP_AVG)
+			sleep_time = NS_MAX_SLEEP_AVG;
+		else
+			sleep_time = (unsigned long)__sleep_time;
+	}
 
 	if (likely(sleep_time > 0)) {
 		/*
@@ -3560,7 +3564,7 @@ void set_user_nice(task_t *p, long nice)
 	 * The RT priorities are set via sched_setscheduler(), but we still
 	 * allow the 'normal' nice value to be set - but as expected
 	 * it wont have any effect on scheduling until the task is
-	 * not SCHED_NORMAL:
+	 * not SCHED_NORMAL/SCHED_BATCH:
 	 */
 	if (rt_task(p)) {
 		p->static_prio = NICE_TO_PRIO(nice);
@@ -3706,10 +3710,16 @@ static void __setscheduler(struct task_struct *p, int policy, int prio)
 	BUG_ON(p->array);
 	p->policy = policy;
 	p->rt_priority = prio;
-	if (policy != SCHED_NORMAL)
+	if (policy != SCHED_NORMAL && policy != SCHED_BATCH) {
 		p->prio = MAX_RT_PRIO-1 - p->rt_priority;
-	else
+	} else {
 		p->prio = p->static_prio;
+		/*
+		 * SCHED_BATCH tasks are treated as perpetual CPU hogs:
+		 */
+		if (policy == SCHED_BATCH)
+			p->sleep_avg = 0;
+	}
 }
 
 /**
@@ -3733,29 +3743,35 @@ recheck:
 	if (policy < 0)
 		policy = oldpolicy = p->policy;
 	else if (policy != SCHED_FIFO && policy != SCHED_RR &&
-				policy != SCHED_NORMAL)
-			return -EINVAL;
+			policy != SCHED_NORMAL && policy != SCHED_BATCH)
+		return -EINVAL;
 	/*
 	 * Valid priorities for SCHED_FIFO and SCHED_RR are
-	 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL is 0.
+	 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL and
+	 * SCHED_BATCH is 0.
 	 */
 	if (param->sched_priority < 0 ||
 	    (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) ||
 	    (!p->mm && param->sched_priority > MAX_RT_PRIO-1))
 		return -EINVAL;
-	if ((policy == SCHED_NORMAL) != (param->sched_priority == 0))
+	if ((policy == SCHED_NORMAL || policy == SCHED_BATCH)
+					!= (param->sched_priority == 0))
 		return -EINVAL;
 
 	/*
 	 * Allow unprivileged RT tasks to decrease priority:
 	 */
 	if (!capable(CAP_SYS_NICE)) {
-		/* can't change policy */
-		if (policy != p->policy &&
-			!p->signal->rlim[RLIMIT_RTPRIO].rlim_cur)
+		/*
+		 * can't change policy, except between SCHED_NORMAL
+		 * and SCHED_BATCH:
+		 */
+		if (((policy != SCHED_NORMAL && p->policy != SCHED_BATCH) &&
+			(policy != SCHED_BATCH && p->policy != SCHED_NORMAL)) &&
+				!p->signal->rlim[RLIMIT_RTPRIO].rlim_cur)
 			return -EPERM;
 		/* can't increase priority */
-		if (policy != SCHED_NORMAL &&
+		if ((policy != SCHED_NORMAL && policy != SCHED_BATCH) &&
 		    param->sched_priority > p->rt_priority &&
 		    param->sched_priority >
 				p->signal->rlim[RLIMIT_RTPRIO].rlim_cur)
@@ -4233,6 +4249,7 @@ asmlinkage long sys_sched_get_priority_max(int policy)
 		ret = MAX_USER_RT_PRIO-1;
 		break;
 	case SCHED_NORMAL:
+	case SCHED_BATCH:
 		ret = 0;
 		break;
 	}
@@ -4256,6 +4273,7 @@ asmlinkage long sys_sched_get_priority_min(int policy)
 		ret = 1;
 		break;
 	case SCHED_NORMAL:
+	case SCHED_BATCH:
 		ret = 0;
 	}
 	return ret;
author	Ingo Molnar <mingo@elte.hu>	2006-01-14 13:20:41 -0800
committer	Linus Torvalds <torvalds@g5.osdl.org>	2006-01-14 18:25:20 -0800
commit	b0a9499c3dd50d333e2aedb7e894873c58da3785 (patch)
tree	1b9610020884091984ce8290c70bebdc3e7bb09b
parent	2d0cfb527944c2cfee2cffab14f52d483e329fcf (diff)
download	linux-3.10-b0a9499c3dd50d333e2aedb7e894873c58da3785.tar.gz linux-3.10-b0a9499c3dd50d333e2aedb7e894873c58da3785.tar.bz2 linux-3.10-b0a9499c3dd50d333e2aedb7e894873c58da3785.zip