diff -ruN linux-2.4.20-8/arch/i386/kernel/entry.S procstate-2.4.20-8/arch/i386/kernel/entry.S
--- linux-2.4.20-8/arch/i386/kernel/entry.S	2003-03-13 14:24:26.000000000 -0800
+++ procstate-2.4.20-8/arch/i386/kernel/entry.S	2004-05-21 16:17:20.000000000 -0700
@@ -673,6 +673,8 @@
 	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_epoll_wait */
 	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_remap_file_pages */
 	.long SYMBOL_NAME(sys_set_tid_address)
+	.long SYMBOL_NAME(sys_sched_setprocstate)
+	.long SYMBOL_NAME(sys_sched_getprocstate) /* 260 */
 
 
 	.rept NR_syscalls-(.-sys_call_table)/4
diff -ruN linux-2.4.20-8/arch/i386/kernel/setup.c procstate-2.4.20-8/arch/i386/kernel/setup.c
--- linux-2.4.20-8/arch/i386/kernel/setup.c	2003-03-13 14:24:26.000000000 -0800
+++ procstate-2.4.20-8/arch/i386/kernel/setup.c	2004-05-21 10:57:11.000000000 -0700
@@ -2964,6 +2964,9 @@
 		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 	};
+	static char *procstate[] = {
+		"enabled", "restricted",
+	};
 	struct cpuinfo_x86 *c = v;
 	int i, n = c - cpu_data;
 	int fpu_exception;
@@ -3027,9 +3030,13 @@
 		     x86_cap_flags[i] != NULL )
 			seq_printf(m, " %s", x86_cap_flags[i]);
 
-	seq_printf(m, "\nbogomips\t: %lu.%02lu\n\n",
+	seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
 		     c->loops_per_jiffy/(500000/HZ),
 		     (c->loops_per_jiffy/(5000/HZ)) % 100);
+
+	seq_printf(m, "procstate\t: %s\n", procstate[get_procstate(n)]);
+	seq_printf(m, "\n");
+
 	return 0;
 }
 
diff -ruN linux-2.4.20-8/fs/proc/array.c procstate-2.4.20-8/fs/proc/array.c
--- linux-2.4.20-8/fs/proc/array.c	2003-03-13 14:24:29.000000000 -0800
+++ procstate-2.4.20-8/fs/proc/array.c	2004-10-12 17:32:36.000000000 -0700
@@ -273,6 +273,16 @@
 			    cap_t(p->cap_effective));
 }
 
+static inline char *task_curcpu(struct task_struct *p, char *buffer)
+{
+    return buffer + sprintf(buffer, "CPU:\t%d\n", task_cpu(p));
+}
+
+static inline char *task_affinity(struct task_struct *p, char *buffer)
+{
+    return buffer + sprintf(buffer, "Affin:\t%08lx\n", p->cpus_allowed);
+}
+
 
 int proc_pid_status(struct task_struct *task, char * buffer)
 {
@@ -292,6 +302,8 @@
 	}
 	buffer = task_sig(task, buffer);
 	buffer = task_cap(task, buffer);
+	buffer = task_curcpu(task, buffer);
+	buffer = task_affinity(task, buffer);
 #if defined(CONFIG_ARCH_S390)
 	buffer = task_show_regs(task, buffer);
 #endif
diff -ruN linux-2.4.20-8/include/asm-i386/unistd.h procstate-2.4.20-8/include/asm-i386/unistd.h
--- linux-2.4.20-8/include/asm-i386/unistd.h	2003-03-13 14:24:21.000000000 -0800
+++ procstate-2.4.20-8/include/asm-i386/unistd.h	2004-05-21 09:08:39.000000000 -0700
@@ -259,6 +259,8 @@
 #define __NR_exit_group		252
 #define __NR_lookup_dcookie	253
 #define __NR_set_tid_address	258
+#define __NR_sched_setprocstate	259
+#define __NR_sched_getprocstate 260
 
 /* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
 
diff -ruN linux-2.4.20-8/include/linux/sched.h procstate-2.4.20-8/include/linux/sched.h
--- linux-2.4.20-8/include/linux/sched.h	2003-03-13 14:32:17.000000000 -0800
+++ procstate-2.4.20-8/include/linux/sched.h	2004-10-24 10:45:00.000000000 -0700
@@ -143,6 +143,15 @@
 
 struct completion;
 
+/*
+ * states for sched_{get,set}procstate - the numbering of these is used by
+ * /proc/cpuinfo code.
+ */
+#define PROC_ENABLED		0
+#define PROC_RESTRICTED		1
+#define PROCSTATE_MIN		PROC_ENABLED
+#define PROCSTATE_MAX		PROC_RESTRICTED
+
 #ifdef __KERNEL__
 
 #include <linux/spinlock.h>
@@ -568,8 +577,10 @@
 
 #if CONFIG_SMP
 extern void set_cpus_allowed(task_t *p, unsigned long new_mask);
+extern int get_procstate(int cpu);
 #else
 #define set_cpus_allowed(p, new_mask)	do { } while (0)
+#define get_procstate(cpu)		(PROC_ENABLED)
 #endif
 
 extern void set_user_nice(task_t *p, long nice);
diff -ruN linux-2.4.20-8/include/linux/sys.h procstate-2.4.20-8/include/linux/sys.h
--- linux-2.4.20-8/include/linux/sys.h	2003-03-13 14:24:21.000000000 -0800
+++ procstate-2.4.20-8/include/linux/sys.h	2004-05-21 16:23:18.000000000 -0700
@@ -4,7 +4,7 @@
 /*
  * system call entry points ... but not all are defined
  */
-#define NR_syscalls 260
+#define NR_syscalls 262
 
 /*
  * These are system calls that will be removed at some time
diff -ruN linux-2.4.20-8/kernel/sched.c procstate-2.4.20-8/kernel/sched.c
--- linux-2.4.20-8/kernel/sched.c	2003-03-13 14:24:20.000000000 -0800
+++ procstate-2.4.20-8/kernel/sched.c	2004-10-24 15:54:00.000000000 -0700
@@ -1,3 +1,10 @@
+/* remove this block and all PRDBG() lines when done */
+#define PROCSTATE_DEBUG 1
+#define PRDBG(fmt, args...) do { \
+	if (PROCSTATE_DEBUG) \
+		printk(KERN_ERR "DBG: " fmt, ##args); \
+} while (0)
+
 /*
  *  kernel/sched.c
  *
@@ -179,6 +186,12 @@
 #endif
 
 /*
+ * Per-CPU processor state
+ */
+static spinlock_t procstate_lock = SPIN_LOCK_UNLOCKED;
+static int procstate[NR_CPUS];
+
+/*
  * task_rq_lock - lock the runqueue a given task resides on and disable
  * interrupts.  Note the ordering: we can safely lookup the task_rq without
  * explicitly disabling preemption.
@@ -222,6 +235,83 @@
 	spin_unlock_irq(&rq->lock);
 }
 
+static inline int __cpu_allowed_in_mask(int cpu, unsigned long cpus_allowed)
+{
+	unsigned long mask = 1UL << cpu;
+
+	/* it might not be in the cpus_allowed */
+	if (!(cpus_allowed & mask))
+		return 0;
+
+	if (likely(procstate[cpu] == PROC_ENABLED))
+		return 1;
+	else if (procstate[cpu] == PROC_RESTRICTED
+	  && cpus_allowed == mask)
+		return 1;
+
+	return 0;
+}
+
+static inline int __cpu_allowed(task_t *task, int cpu)
+{
+	return __cpu_allowed_in_mask(cpu, task->cpus_allowed);
+}
+
+/* figure out if a task is eligible for a given CPU */
+static inline int cpu_allowed(task_t *task, int cpu)
+{
+	int ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&procstate_lock, flags);
+	ret = __cpu_allowed(task, cpu);
+	spin_unlock_irqrestore(&procstate_lock, flags);
+
+	return ret;
+}
+
+/* find an eligible CPU or return -1 */
+static inline int __find_first_allowed(task_t *task)
+{
+	int i;
+	for (i = 0;  i < smp_num_cpus; i++) {
+		if (__cpu_allowed(task, i))
+			return i;
+	}
+	return -1;
+}
+
+/* find an eligible CPU or don't change, but always return a valid CPU # */
+static inline int find_first_allowed(task_t *task)
+{
+	int i;
+	unsigned long flags;
+
+	spin_lock_irqsave(&procstate_lock, flags);
+
+	i = __find_first_allowed(task);
+	if (i < 0) {
+		if (__cpu_allowed(task, task->cpu))
+			i = task->cpu;
+		else
+			i = __ffs(task->cpus_allowed);
+	}
+
+	spin_unlock_irqrestore(&procstate_lock, flags);
+
+	return i;
+}
+
+static inline int __runnable_mask(unsigned long allowed)
+{
+	int i;
+	for (i = 0;  i < smp_num_cpus; i++) {
+		if (__cpu_allowed_in_mask(i, allowed))
+			return 1;
+	}
+	return 0;
+}
+
 /*
  * Adding/removing a task to/from a priority array:
  */
@@ -418,7 +508,7 @@
 			 */
 			if (unlikely(sync && !task_running(rq, p) &&
 				(task_cpu(p) != smp_processor_id()) &&
-				(p->cpus_allowed & (1UL << smp_processor_id())))) {
+				(cpu_allowed(p, smp_processor_id())))) {
 
 				set_task_cpu(p, smp_processor_id());
 				task_rq_unlock(rq, &flags);
@@ -836,8 +926,7 @@
 
 #define CAN_MIGRATE_TASK(p,rq,this_cpu)					\
 	((jiffies - (p)->last_run > cache_decay_ticks) &&	\
-		!task_running(rq, p) &&					\
-			((p)->cpus_allowed & (1UL << (this_cpu))))
+		!task_running(rq, p) && (cpu_allowed((p), (this_cpu))))
 
 	curr = curr->prev;
 
@@ -978,6 +1067,95 @@
 	spin_unlock(&rq->lock);
 }
 
+#if CONFIG_SMP
+
+static void procstate_changed(int cpu, int oldstate, int newstate)
+{
+	unsigned long flags;
+
+	if (oldstate != newstate)
+		PRDBG("changing CPU %d procstate from %d to %d\n",
+		    cpu, oldstate, newstate);
+
+	/* if it is less restricted, we don't care */
+	if (newstate <= oldstate)
+		return;
+
+	/* if it's more restricted, some tasks may be disallowed */
+	PRDBG("scanning tasklist for invalids\n");
+
+	/* 
+	 * We have to do current before anything else, so that the
+	 * migration thread has something to wake up.  Otherwise we
+	 * can get stuck.
+	 */
+	if (!cpu_allowed(current, current->cpu)) {
+		PRDBG("migrating current task first\n");
+		set_cpus_allowed(current, current->cpus_allowed);
+	}
+
+	while (1) {
+		task_t *g, *p;
+		task_t *task = NULL;
+
+		/*
+		 * This weird approach is because we have to drop the
+		 * tasklist lock to call set_cpus_allowed.
+		 */
+		read_lock_irqsave(&tasklist_lock, flags);
+		do_each_thread(g, p) {
+			spin_lock(&procstate_lock);
+			if (!__cpu_allowed(p, p->cpu)) {
+				if (unlikely(__find_first_allowed(p) < 0)) {
+					/* unrunnable task */
+					PRDBG("sending SIGPWR to %d (%16s)\n",
+					    p->pid, p->comm);
+					p->cpus_allowed = -1;
+					send_sig(SIGPWR, p, 0);
+				} else {
+					/* migrate it */
+					PRDBG("need to migrate %d (%16s)\n",
+					    p->pid, p->comm);
+					task = p;
+				}
+			}
+			spin_unlock(&procstate_lock);
+
+			if (task) {
+				get_task_struct(task);
+				goto found_one;
+			}
+		} while_each_thread(g, p);
+
+found_one:
+		read_unlock_irqrestore(&tasklist_lock, flags);
+
+		if (!task)
+			break;
+
+		/*
+		 * Once we have a task that is not eligible on it's current
+		 * cpu, we let set_cpus_allowed() do it's thing.  Running
+		 * tasks will be migrated off to another CPU.
+		 *
+		 * set_cpus_allowed() can sleep - no locks allowed
+		 */
+		PRDBG("invalid %d (%16s) ->cpu was %d\n",
+		    task->pid, task->comm, task->cpu);
+		set_cpus_allowed(task, task->cpus_allowed);
+		PRDBG("invalid %d (%16s) ->cpu is  %d\n",
+		    task->pid, task->comm, task->cpu);
+
+		/* decrement use counter */
+		put_task_struct(task);
+	}
+	PRDBG("done scanning\n");
+
+}
+#else
+#define procstate_changed(c, o, n)
+#endif
+
 void scheduling_functions_start_here(void) { }
 
 /*
@@ -1552,7 +1730,7 @@
  * @len: length in bytes of the bitmask pointed to by user_mask_ptr
  * @user_mask_ptr: user-space pointer to the new cpu mask
  */
-asmlinkage int sys_sched_setaffinity(pid_t pid, unsigned int len,
+asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len,
 				      unsigned long *user_mask_ptr)
 {
 	unsigned long new_mask;
@@ -1565,8 +1743,7 @@
 	if (copy_from_user(&new_mask, user_mask_ptr, sizeof(new_mask)))
 		return -EFAULT;
 
-	new_mask &= cpu_online_map;
-	if (!new_mask)
+	if (!(new_mask & cpu_online_map))
 		return -EINVAL;
 
 	read_lock(&tasklist_lock);
@@ -1590,6 +1767,10 @@
 			!capable(CAP_SYS_NICE))
 		goto out_unlock;
 
+	retval = -EINVAL;
+	if (!__runnable_mask(new_mask))
+		goto out_unlock;
+
 	retval = 0;
 	set_cpus_allowed(p, new_mask);
 
@@ -1604,7 +1785,7 @@
  * @len: length in bytes of the bitmask pointed to by user_mask_ptr
  * @user_mask_ptr: user-space pointer to hold the current cpu mask
  */
-asmlinkage int sys_sched_getaffinity(pid_t pid, unsigned int len,
+asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len,
 				      unsigned long *user_mask_ptr)
 {
 	unsigned int real_len;
@@ -1624,7 +1805,7 @@
 		goto out_unlock;
 
 	retval = 0;
-	mask = p->cpus_allowed & cpu_online_map;
+	mask = p->cpus_allowed;
 
 out_unlock:
 	read_unlock(&tasklist_lock);
@@ -1635,6 +1816,65 @@
 	return real_len;
 }
 
+asmlinkage long sys_sched_setprocstate(int cpu, int state)
+{
+	int oldstate;
+	unsigned long flags;
+	int ret = 0;
+
+	PRDBG("sys_sched_setprocstate(%d, %d)\n", cpu, state);
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	if (cpu >= smp_num_cpus || cpu < 0 ||
+	    ((1UL<<cpu) & cpu_online_map) == 0)
+		return -EINVAL;
+	if (state < PROCSTATE_MIN || state > PROCSTATE_MAX)
+		return -EINVAL;
+
+	spin_lock_irqsave(&procstate_lock, flags);
+
+	/* must have at least 1 ENABLED cpu */
+	if (state != PROC_ENABLED) {
+		int i;
+		int count = 0;
+
+		for (i = 0; i < smp_num_cpus; i++)
+			count += (procstate[i] == PROC_ENABLED);
+
+		if (count == 1) {
+			spin_unlock_irqrestore(&procstate_lock, flags);
+			ret = -EBUSY;
+			goto out;
+		}
+	}
+
+	oldstate = procstate[cpu];
+	procstate[cpu] = state;
+
+	spin_unlock_irqrestore(&procstate_lock, flags);
+
+	procstate_changed(cpu, oldstate, state);
+
+out:
+	return ret;
+}
+
+asmlinkage long sys_sched_getprocstate(int cpu, int *state_ptr)
+{
+	int state;
+
+	if (cpu >= smp_num_cpus || cpu < 0 ||
+	    ((1UL<<cpu) & cpu_online_map) == 0)
+		return -EINVAL;
+
+	state = get_procstate(cpu);
+
+	PRDBG("sys_sched_getprocstate(%d) = %d\n", cpu, state);
+
+	return copy_to_user(state_ptr, &state, sizeof(state));
+}
+
 /**
  * sys_sched_yield - yield the current processor to other threads.
  *
@@ -1963,14 +2203,13 @@
 	if (!new_mask)
 		BUG();
 #endif
-
 	rq = task_rq_lock(p, &flags);
 	p->cpus_allowed = new_mask;
 	/*
 	 * Can the task run on the task's current CPU? If not then
 	 * migrate the thread off to a proper CPU.
 	 */
-	if (new_mask & (1UL << task_cpu(p))) {
+	if (cpu_allowed(p, p->cpu)) {
 		task_rq_unlock(rq, &flags);
 		return;
 	}
@@ -1979,7 +2218,7 @@
 	 * it is sufficient to simply update the task's cpu field.
 	 */
 	if (!p->array && !task_running(rq, p)) {
-		set_task_cpu(p, __ffs(p->cpus_allowed));
+		set_task_cpu(p, find_first_allowed(p));
 		task_rq_unlock(rq, &flags);
 		return;
 	}
@@ -2042,7 +2281,7 @@
 		spin_unlock_irqrestore(&rq->lock, flags);
 
 		p = req->task;
-		cpu_dest = __ffs(p->cpus_allowed);
+		cpu_dest = find_first_allowed(p);
 		rq_dest = cpu_rq(cpu_dest);
 repeat:
 		cpu_src = task_cpu(p);
@@ -2097,6 +2336,18 @@
 	return 0;
 }
 
+int get_procstate(int cpu)
+{
+	int ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&procstate_lock, flags);
+	ret = procstate[cpu];
+	spin_unlock_irqrestore(&procstate_lock, flags);
+
+	return ret;
+}
+
 #endif
 
 
@@ -2119,6 +2370,7 @@
 		spin_lock_init(&rq->lock);
 		INIT_LIST_HEAD(&rq->migration_queue);
 		atomic_set(&rq->nr_iowait, 0);
+		procstate[i] = PROC_ENABLED;
 
 		for (j = 0; j < 2; j++) {
 			array = rq->arrays + j;

