Home Home > GIT Browse
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Love <rml@tech9.net>2002-02-08 19:11:35 -0800
committerLinus Torvalds <torvalds@home.transmeta.com>2002-02-08 19:11:35 -0800
commitec332cd30cf1ccde914a87330ff66744414c8d24 (patch)
tree36d00dc307aa1e4eef2c5c91ec722dd2f1a45834
parentd7b654751759e2a2e1d49aebf595c12e55ca7b69 (diff)
[PATCH] Re: [PATCH] Preemptible Kernel for 2.5
On Sat, 2002-02-09 at 01:43, Linus Torvalds wrote: > That will clean up all your issues with header file ordering. You are right, it did. I removed all the sched.h dependencies and this reduced the size of the patch greatly. I now use current_thread_info() and none of the header or include hackery from before. I've tested this with and without preemption enabled with success. I appreciate your help with this. Again, this is a minimal i386-only patch. I have other arches, documentation, etc. Patch against 2.5.4-pre5. Enjoy, Robert Love
-rw-r--r--arch/i386/Config.help10
-rw-r--r--arch/i386/config.in8
-rw-r--r--arch/i386/kernel/entry.S53
-rw-r--r--arch/i386/kernel/i387.c2
-rw-r--r--arch/i386/kernel/smp.c4
-rw-r--r--arch/i386/kernel/traps.c2
-rw-r--r--fs/exec.c2
-rw-r--r--include/asm-i386/hardirq.h2
-rw-r--r--include/asm-i386/highmem.h7
-rw-r--r--include/asm-i386/hw_irq.h15
-rw-r--r--include/asm-i386/i387.h3
-rw-r--r--include/asm-i386/pgalloc.h12
-rw-r--r--include/asm-i386/smplock.h14
-rw-r--r--include/asm-i386/softirq.h8
-rw-r--r--include/asm-i386/spinlock.h18
-rw-r--r--include/asm-i386/thread_info.h4
-rw-r--r--include/linux/brlock.h10
-rw-r--r--include/linux/sched.h1
-rw-r--r--include/linux/smp.h2
-rw-r--r--include/linux/smp_lock.h2
-rw-r--r--include/linux/spinlock.h86
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/fork.c7
-rw-r--r--kernel/ksyms.c3
-rw-r--r--kernel/sched.c56
-rw-r--r--net/socket.c2
26 files changed, 292 insertions, 43 deletions
diff --git a/arch/i386/Config.help b/arch/i386/Config.help
index 98b4c7542cd1..abd88edb2efa 100644
--- a/arch/i386/Config.help
+++ b/arch/i386/Config.help
@@ -25,6 +25,16 @@ CONFIG_SMP
If you don't know what to do here, say N.
+CONFIG_PREEMPT
+ This option reduces the latency of the kernel when reacting to
+ real-time or interactive events by allowing a low priority process to
+ be preempted even if it is in kernel mode executing a system call.
+ This allows applications to run more reliably even when the system is
+ under load.
+
+ Say Y here if you are building a kernel for a desktop, embedded
+ or real-time system. Say N if you are unsure.
+
CONFIG_X86
This is Linux's home port. Linux was originally native to the Intel
386, and runs on all the later x86 processors including the Intel
diff --git a/arch/i386/config.in b/arch/i386/config.in
index ae7cd0b7d463..21c2586884c6 100644
--- a/arch/i386/config.in
+++ b/arch/i386/config.in
@@ -167,6 +167,7 @@ fi
bool 'Math emulation' CONFIG_MATH_EMULATION
bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR
bool 'Symmetric multi-processing support' CONFIG_SMP
+bool 'Preemptible Kernel' CONFIG_PREEMPT
if [ "$CONFIG_SMP" != "y" ]; then
bool 'Local APIC support on uniprocessors' CONFIG_X86_UP_APIC
dep_bool 'IO-APIC support on uniprocessors' CONFIG_X86_UP_IOAPIC $CONFIG_X86_UP_APIC
@@ -180,9 +181,12 @@ else
bool 'Multiquad NUMA system' CONFIG_MULTIQUAD
fi
-if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then
- define_bool CONFIG_HAVE_DEC_LOCK y
+if [ "$CONFIG_SMP" = "y" -o "$CONFIG_PREEMPT" = "y" ]; then
+ if [ "$CONFIG_X86_CMPXCHG" = "y" ]; then
+ define_bool CONFIG_HAVE_DEC_LOCK y
+ fi
fi
+
endmenu
mainmenu_option next_comment
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 65bfc86e6828..f006f47c4d18 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -69,6 +69,37 @@ IF_MASK = 0x00000200
NT_MASK = 0x00004000
VM_MASK = 0x00020000
+/* These are offsets into the irq_stat structure
+ * There is one per cpu and it is aligned to 32
+ * byte boundry (we put that here as a shift count)
+ */
+irq_array_shift = CONFIG_X86_L1_CACHE_SHIFT
+irq_stat_local_irq_count = 4
+irq_stat_local_bh_count = 8
+
+#ifdef CONFIG_SMP
+#define GET_CPU_INDX movl TI_CPU(%ebx),%eax; \
+ shll $irq_array_shift,%eax
+#define GET_CURRENT_CPU_INDX GET_THREAD_INFO(%ebx); \
+ GET_CPU_INDX
+#define CPU_INDX (,%eax)
+#else
+#define GET_CPU_INDX
+#define GET_CURRENT_CPU_INDX GET_THREAD_INFO(%ebx)
+#define CPU_INDX
+#endif
+
+#ifdef CONFIG_PREEMPT
+#define preempt_stop cli
+#define init_ret_intr \
+ cli; \
+ decl TI_PRE_COUNT(%ebx);
+#else
+#define preempt_stop
+#define init_ret_intr
+#define resume_kernel restore_all
+#endif
+
#define SAVE_ALL \
cld; \
pushl %es; \
@@ -176,11 +207,12 @@ ENTRY(ret_from_fork)
ALIGN
ENTRY(ret_from_intr)
GET_THREAD_INFO(%ebx)
+ init_ret_intr
ret_from_exception:
movl EFLAGS(%esp),%eax # mix EFLAGS and CS
movb CS(%esp),%al
testl $(VM_MASK | 3),%eax
- jz restore_all # returning to kernel-space or vm86-space
+ jz resume_kernel # returning to kernel or vm86-space
ENTRY(resume_userspace)
cli # make sure we don't miss an interrupt setting need_resched
# or sigpending between sampling and the iret
@@ -189,6 +221,22 @@ ENTRY(resume_userspace)
jne work_pending
jmp restore_all
+#ifdef CONFIG_PREEMPT
+ENTRY(resume_kernel)
+ cmpl $0,TI_PRE_COUNT(%ebx)
+ jnz restore_all
+ movl TI_FLAGS(%ebx),%ecx
+ testb $_TIF_NEED_RESCHED,%cl
+ jz restore_all
+ movl SYMBOL_NAME(irq_stat)+irq_stat_local_bh_count CPU_INDX,%ecx
+ addl SYMBOL_NAME(irq_stat)+irq_stat_local_irq_count CPU_INDX,%ecx
+ jnz restore_all
+ incl TI_PRE_COUNT(%ebx)
+ sti
+ call SYMBOL_NAME(preempt_schedule)
+ jmp ret_from_intr
+#endif
+
# system call handler stub
ALIGN
ENTRY(system_call)
@@ -302,6 +350,7 @@ error_code:
GET_THREAD_INFO(%ebx)
call *%edi
addl $8,%esp
+ preempt_stop
jmp ret_from_exception
ENTRY(coprocessor_error)
@@ -321,12 +370,14 @@ ENTRY(device_not_available)
movl %cr0,%eax
testl $0x4,%eax # EM (math emulation bit)
jne device_not_available_emulate
+ preempt_stop
call SYMBOL_NAME(math_state_restore)
jmp ret_from_exception
device_not_available_emulate:
pushl $0 # temporary storage for ORIG_EIP
call SYMBOL_NAME(math_emulate)
addl $4,%esp
+ preempt_stop
jmp ret_from_exception
ENTRY(debug)
diff --git a/arch/i386/kernel/i387.c b/arch/i386/kernel/i387.c
index a87a362c4e6a..c237c22fef9b 100644
--- a/arch/i386/kernel/i387.c
+++ b/arch/i386/kernel/i387.c
@@ -10,6 +10,7 @@
#include <linux/config.h>
#include <linux/sched.h>
+#include <linux/spinlock.h>
#include <asm/processor.h>
#include <asm/i387.h>
#include <asm/math_emu.h>
@@ -63,6 +64,7 @@ void save_init_fpu( struct task_struct *tsk )
void kernel_fpu_begin(void)
{
+ preempt_disable();
if (test_thread_flag(TIF_USEDFPU)) {
__save_init_fpu(current);
return;
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index af1dc7387206..7fdbdfdba7da 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -497,7 +497,7 @@ void smp_migrate_task(int cpu, task_t *p)
/*
* The target CPU will unlock the migration spinlock:
*/
- spin_lock(&migration_lock);
+ _raw_spin_lock(&migration_lock);
new_task = p;
send_IPI_mask(1 << cpu, TASK_MIGRATION_VECTOR);
}
@@ -511,7 +511,7 @@ asmlinkage void smp_task_migration_interrupt(void)
ack_APIC_irq();
p = new_task;
- spin_unlock(&migration_lock);
+ _raw_spin_unlock(&migration_lock);
sched_task_migrated(p);
}
/*
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index ad68256f83c5..7c2c2520d1c1 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -710,6 +710,8 @@ asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs,
*
* Careful.. There are problems with IBM-designed IRQ13 behaviour.
* Don't touch unless you *really* know how it works.
+ *
+ * Must be called with kernel preemption disabled.
*/
asmlinkage void math_state_restore(struct pt_regs regs)
{
diff --git a/fs/exec.c b/fs/exec.c
index 3e34704f507c..0167e7c5b891 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -420,8 +420,8 @@ static int exec_mmap(void)
active_mm = current->active_mm;
current->mm = mm;
current->active_mm = mm;
- task_unlock(current);
activate_mm(active_mm, mm);
+ task_unlock(current);
mm_release();
if (old_mm) {
if (active_mm != old_mm) BUG();
diff --git a/include/asm-i386/hardirq.h b/include/asm-i386/hardirq.h
index 4acb4b09ddc3..64ef2bcf2a89 100644
--- a/include/asm-i386/hardirq.h
+++ b/include/asm-i386/hardirq.h
@@ -36,6 +36,8 @@ typedef struct {
#define synchronize_irq() barrier()
+#define release_irqlock(cpu) do { } while (0)
+
#else
#include <asm/atomic.h>
diff --git a/include/asm-i386/highmem.h b/include/asm-i386/highmem.h
index 42f32426eac5..e8d4f37ae84a 100644
--- a/include/asm-i386/highmem.h
+++ b/include/asm-i386/highmem.h
@@ -88,6 +88,7 @@ static inline void *kmap_atomic(struct page *page, enum km_type type)
enum fixed_addresses idx;
unsigned long vaddr;
+ preempt_disable();
if (page < highmem_start_page)
return page_address(page);
@@ -109,8 +110,10 @@ static inline void kunmap_atomic(void *kvaddr, enum km_type type)
unsigned long vaddr = (unsigned long) kvaddr;
enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
- if (vaddr < FIXADDR_START) // FIXME
+ if (vaddr < FIXADDR_START) { // FIXME
+ preempt_enable();
return;
+ }
if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx))
BUG();
@@ -122,6 +125,8 @@ static inline void kunmap_atomic(void *kvaddr, enum km_type type)
pte_clear(kmap_pte-idx);
__flush_tlb_one(vaddr);
#endif
+
+ preempt_enable();
}
#endif /* __KERNEL__ */
diff --git a/include/asm-i386/hw_irq.h b/include/asm-i386/hw_irq.h
index 5b43b7c41d64..b572c28744cb 100644
--- a/include/asm-i386/hw_irq.h
+++ b/include/asm-i386/hw_irq.h
@@ -96,6 +96,18 @@ extern char _stext, _etext;
#define __STR(x) #x
#define STR(x) __STR(x)
+#define GET_THREAD_INFO \
+ "movl $-8192, %ebx\n\t" \
+ "andl %esp, %ebx\n\t"
+
+#ifdef CONFIG_PREEMPT
+#define BUMP_LOCK_COUNT \
+ GET_THREAD_INFO \
+ "incl 16(%ebx)\n\t"
+#else
+#define BUMP_LOCK_COUNT
+#endif
+
#define SAVE_ALL \
"cld\n\t" \
"pushl %es\n\t" \
@@ -109,7 +121,8 @@ extern char _stext, _etext;
"pushl %ebx\n\t" \
"movl $" STR(__KERNEL_DS) ",%edx\n\t" \
"movl %edx,%ds\n\t" \
- "movl %edx,%es\n\t"
+ "movl %edx,%es\n\t" \
+ BUMP_LOCK_COUNT
#define IRQ_NAME2(nr) nr##_interrupt(void)
#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
diff --git a/include/asm-i386/i387.h b/include/asm-i386/i387.h
index 462ec5a234c5..b8b60c2744e9 100644
--- a/include/asm-i386/i387.h
+++ b/include/asm-i386/i387.h
@@ -12,6 +12,7 @@
#define __ASM_I386_I387_H
#include <linux/sched.h>
+#include <linux/spinlock.h>
#include <asm/processor.h>
#include <asm/sigcontext.h>
#include <asm/user.h>
@@ -24,7 +25,7 @@ extern void save_init_fpu( struct task_struct *tsk );
extern void restore_fpu( struct task_struct *tsk );
extern void kernel_fpu_begin(void);
-#define kernel_fpu_end() stts()
+#define kernel_fpu_end() do { stts(); preempt_enable(); } while(0)
#define unlazy_fpu( tsk ) do { \
diff --git a/include/asm-i386/pgalloc.h b/include/asm-i386/pgalloc.h
index 090573f59076..67773c0472ca 100644
--- a/include/asm-i386/pgalloc.h
+++ b/include/asm-i386/pgalloc.h
@@ -75,20 +75,26 @@ static inline pgd_t *get_pgd_fast(void)
{
unsigned long *ret;
+ preempt_disable();
if ((ret = pgd_quicklist) != NULL) {
pgd_quicklist = (unsigned long *)(*ret);
ret[0] = 0;
pgtable_cache_size--;
- } else
+ preempt_enable();
+ } else {
+ preempt_enable();
ret = (unsigned long *)get_pgd_slow();
+ }
return (pgd_t *)ret;
}
static inline void free_pgd_fast(pgd_t *pgd)
{
+ preempt_disable();
*(unsigned long *)pgd = (unsigned long) pgd_quicklist;
pgd_quicklist = (unsigned long *) pgd;
pgtable_cache_size++;
+ preempt_enable();
}
static inline void free_pgd_slow(pgd_t *pgd)
@@ -119,19 +125,23 @@ static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
{
unsigned long *ret;
+ preempt_disable();
if ((ret = (unsigned long *)pte_quicklist) != NULL) {
pte_quicklist = (unsigned long *)(*ret);
ret[0] = ret[1];
pgtable_cache_size--;
}
+ preempt_enable();
return (pte_t *)ret;
}
static inline void pte_free_fast(pte_t *pte)
{
+ preempt_disable();
*(unsigned long *)pte = (unsigned long) pte_quicklist;
pte_quicklist = (unsigned long *) pte;
pgtable_cache_size++;
+ preempt_enable();
}
static __inline__ void pte_free_slow(pte_t *pte)
diff --git a/include/asm-i386/smplock.h b/include/asm-i386/smplock.h
index c270defe9be4..199084cce08f 100644
--- a/include/asm-i386/smplock.h
+++ b/include/asm-i386/smplock.h
@@ -10,7 +10,15 @@
extern spinlock_t kernel_flag;
+#ifdef CONFIG_SMP
#define kernel_locked() spin_is_locked(&kernel_flag)
+#else
+#ifdef CONFIG_PREEMPT
+#define kernel_locked() preempt_get_count()
+#else
+#define kernel_locked() 1
+#endif
+#endif
/*
* Release global kernel lock and global interrupt lock
@@ -43,6 +51,11 @@ do { \
*/
static __inline__ void lock_kernel(void)
{
+#ifdef CONFIG_PREEMPT
+ if (current->lock_depth == -1)
+ spin_lock(&kernel_flag);
+ ++current->lock_depth;
+#else
#if 1
if (!++current->lock_depth)
spin_lock(&kernel_flag);
@@ -55,6 +68,7 @@ static __inline__ void lock_kernel(void)
:"=m" (__dummy_lock(&kernel_flag)),
"=m" (current->lock_depth));
#endif
+#endif
}
static __inline__ void unlock_kernel(void)
diff --git a/include/asm-i386/softirq.h b/include/asm-i386/softirq.h
index b9f7796b296d..c62cbece6ce7 100644
--- a/include/asm-i386/softirq.h
+++ b/include/asm-i386/softirq.h
@@ -5,9 +5,9 @@
#include <asm/hardirq.h>
#define __cpu_bh_enable(cpu) \
- do { barrier(); local_bh_count(cpu)--; } while (0)
+ do { barrier(); local_bh_count(cpu)--; preempt_enable(); } while (0)
#define cpu_bh_disable(cpu) \
- do { local_bh_count(cpu)++; barrier(); } while (0)
+ do { preempt_disable(); local_bh_count(cpu)++; barrier(); } while (0)
#define local_bh_disable() cpu_bh_disable(smp_processor_id())
#define __local_bh_enable() __cpu_bh_enable(smp_processor_id())
@@ -22,7 +22,7 @@
* If you change the offsets in irq_stat then you have to
* update this code as well.
*/
-#define local_bh_enable() \
+#define _local_bh_enable() \
do { \
unsigned int *ptr = &local_bh_count(smp_processor_id()); \
\
@@ -45,4 +45,6 @@ do { \
/* no registers clobbered */ ); \
} while (0)
+#define local_bh_enable() do { _local_bh_enable(); preempt_enable(); } while (0)
+
#endif /* __ASM_SOFTIRQ_H */
diff --git a/include/asm-i386/spinlock.h b/include/asm-i386/spinlock.h
index 89118fced025..1a4b4879c358 100644
--- a/include/asm-i386/spinlock.h
+++ b/include/asm-i386/spinlock.h
@@ -77,7 +77,7 @@ typedef struct {
:"=m" (lock->lock) : : "memory"
-static inline void spin_unlock(spinlock_t *lock)
+static inline void _raw_spin_unlock(spinlock_t *lock)
{
#if SPINLOCK_DEBUG
if (lock->magic != SPINLOCK_MAGIC)
@@ -97,7 +97,7 @@ static inline void spin_unlock(spinlock_t *lock)
:"=q" (oldval), "=m" (lock->lock) \
:"0" (oldval) : "memory"
-static inline void spin_unlock(spinlock_t *lock)
+static inline void _raw_spin_unlock(spinlock_t *lock)
{
char oldval = 1;
#if SPINLOCK_DEBUG
@@ -113,7 +113,7 @@ static inline void spin_unlock(spinlock_t *lock)
#endif
-static inline int spin_trylock(spinlock_t *lock)
+static inline int _raw_spin_trylock(spinlock_t *lock)
{
char oldval;
__asm__ __volatile__(
@@ -123,7 +123,7 @@ static inline int spin_trylock(spinlock_t *lock)
return oldval > 0;
}
-static inline void spin_lock(spinlock_t *lock)
+static inline void _raw_spin_lock(spinlock_t *lock)
{
#if SPINLOCK_DEBUG
__label__ here;
@@ -179,7 +179,7 @@ typedef struct {
*/
/* the spinlock helpers are in arch/i386/kernel/semaphore.c */
-static inline void read_lock(rwlock_t *rw)
+static inline void _raw_read_lock(rwlock_t *rw)
{
#if SPINLOCK_DEBUG
if (rw->magic != RWLOCK_MAGIC)
@@ -188,7 +188,7 @@ static inline void read_lock(rwlock_t *rw)
__build_read_lock(rw, "__read_lock_failed");
}
-static inline void write_lock(rwlock_t *rw)
+static inline void _raw_write_lock(rwlock_t *rw)
{
#if SPINLOCK_DEBUG
if (rw->magic != RWLOCK_MAGIC)
@@ -197,10 +197,10 @@ static inline void write_lock(rwlock_t *rw)
__build_write_lock(rw, "__write_lock_failed");
}
-#define read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
-#define write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
+#define _raw_read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
+#define _raw_write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
-static inline int write_trylock(rwlock_t *lock)
+static inline int _raw_write_trylock(rwlock_t *lock)
{
atomic_t *count = (atomic_t *)lock;
if (atomic_sub_and_test(RW_LOCK_BIAS, count))
diff --git a/include/asm-i386/thread_info.h b/include/asm-i386/thread_info.h
index 0ce93f99f90c..0359b0948f97 100644
--- a/include/asm-i386/thread_info.h
+++ b/include/asm-i386/thread_info.h
@@ -25,6 +25,7 @@ struct thread_info {
struct exec_domain *exec_domain; /* execution domain */
__u32 flags; /* low level flags */
__u32 cpu; /* current CPU */
+ __s32 preempt_count; /* 0 => preemptable, <0 => BUG */
mm_segment_t addr_limit; /* thread address space:
0-0xBFFFFFFF for user-thead
@@ -41,7 +42,8 @@ struct thread_info {
#define TI_EXEC_DOMAIN 0x00000004
#define TI_FLAGS 0x00000008
#define TI_CPU 0x0000000C
-#define TI_ADDR_LIMIT 0x00000010
+#define TI_PRE_COUNT 0x00000010
+#define TI_ADDR_LIMIT 0x00000014
#endif
diff --git a/include/linux/brlock.h b/include/linux/brlock.h
index 208c4573381a..e36492e06f04 100644
--- a/include/linux/brlock.h
+++ b/include/linux/brlock.h
@@ -171,11 +171,11 @@ static inline void br_write_unlock (enum brlock_indices idx)
}
#else
-# define br_read_lock(idx) ((void)(idx))
-# define br_read_unlock(idx) ((void)(idx))
-# define br_write_lock(idx) ((void)(idx))
-# define br_write_unlock(idx) ((void)(idx))
-#endif
+# define br_read_lock(idx) ({ (void)(idx); preempt_disable(); })
+# define br_read_unlock(idx) ({ (void)(idx); preempt_enable(); })
+# define br_write_lock(idx) ({ (void)(idx); preempt_disable(); })
+# define br_write_unlock(idx) ({ (void)(idx); preempt_enable(); })
+#endif /* CONFIG_SMP */
/*
* Now enumerate all of the possible sw/hw IRQ protected
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 78a5834f8a25..ad38cabb1619 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -91,6 +91,7 @@ extern unsigned long nr_running(void);
#define TASK_UNINTERRUPTIBLE 2
#define TASK_ZOMBIE 4
#define TASK_STOPPED 8
+#define PREEMPT_ACTIVE 0x4000000
#define __set_task_state(tsk, state_value) \
do { (tsk)->state = (state_value); } while (0)
diff --git a/include/linux/smp.h b/include/linux/smp.h
index bb1ff5c5ea1a..43bef9087932 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -81,7 +81,9 @@ extern volatile int smp_msg_id;
#define smp_processor_id() 0
#define hard_smp_processor_id() 0
#define smp_threads_ready 1
+#ifndef CONFIG_PREEMPT
#define kernel_lock()
+#endif
#define cpu_logical_map(cpu) 0
#define cpu_number_map(cpu) 0
#define smp_call_function(func,info,retry,wait) ({ 0; })
diff --git a/include/linux/smp_lock.h b/include/linux/smp_lock.h
index d1bb03872447..13d8c7ace0bb 100644
--- a/include/linux/smp_lock.h
+++ b/include/linux/smp_lock.h
@@ -3,7 +3,7 @@
#include <linux/config.h>
-#ifndef CONFIG_SMP
+#if !defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT)
#define lock_kernel() do { } while(0)
#define unlock_kernel() do { } while(0)
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index dc27910a6ad5..6e3ef75fd885 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -2,6 +2,10 @@
#define __LINUX_SPINLOCK_H
#include <linux/config.h>
+#include <linux/linkage.h>
+#include <linux/compiler.h>
+#include <linux/thread_info.h>
+#include <linux/kernel.h>
/*
* These are the generic versions of the spinlocks and read-write
@@ -62,8 +66,10 @@
#if (DEBUG_SPINLOCKS < 1)
+#ifndef CONFIG_PREEMPT
#define atomic_dec_and_lock(atomic,lock) atomic_dec_and_test(atomic)
#define ATOMIC_DEC_AND_LOCK
+#endif
/*
* Your basic spinlocks, allowing only a single CPU anywhere
@@ -79,11 +85,11 @@
#endif
#define spin_lock_init(lock) do { } while(0)
-#define spin_lock(lock) (void)(lock) /* Not "unused variable". */
+#define _raw_spin_lock(lock) (void)(lock) /* Not "unused variable". */
#define spin_is_locked(lock) (0)
-#define spin_trylock(lock) ({1; })
+#define _raw_spin_trylock(lock) ({1; })
#define spin_unlock_wait(lock) do { } while(0)
-#define spin_unlock(lock) do { } while(0)
+#define _raw_spin_unlock(lock) do { } while(0)
#elif (DEBUG_SPINLOCKS < 2)
@@ -142,13 +148,79 @@ typedef struct {
#endif
#define rwlock_init(lock) do { } while(0)
-#define read_lock(lock) (void)(lock) /* Not "unused variable". */
-#define read_unlock(lock) do { } while(0)
-#define write_lock(lock) (void)(lock) /* Not "unused variable". */
-#define write_unlock(lock) do { } while(0)
+#define _raw_read_lock(lock) (void)(lock) /* Not "unused variable". */
+#define _raw_read_unlock(lock) do { } while(0)
+#define _raw_write_lock(lock) (void)(lock) /* Not "unused variable". */
+#define _raw_write_unlock(lock) do { } while(0)
#endif /* !SMP */
+#ifdef CONFIG_PREEMPT
+
+asmlinkage void preempt_schedule(void);
+
+#define preempt_get_count() (current_thread_info()->preempt_count)
+
+#define preempt_disable() \
+do { \
+ ++current_thread_info()->preempt_count; \
+ barrier(); \
+} while (0)
+
+#define preempt_enable_no_resched() \
+do { \
+ --current_thread_info()->preempt_count; \
+ barrier(); \
+} while (0)
+
+#define preempt_enable() \
+do { \
+ --current_thread_info()->preempt_count; \
+ barrier(); \
+ if (unlikely(!(current_thread_info()->preempt_count) && \
+ test_thread_flag(TIF_NEED_RESCHED))) \
+ preempt_schedule(); \
+} while (0)
+
+#define spin_lock(lock) \
+do { \
+ preempt_disable(); \
+ _raw_spin_lock(lock); \
+} while(0)
+
+#define spin_trylock(lock) ({preempt_disable(); _raw_spin_trylock(lock) ? \
+ 1 : ({preempt_enable(); 0;});})
+#define spin_unlock(lock) \
+do { \
+ _raw_spin_unlock(lock); \
+ preempt_enable(); \
+} while (0)
+
+#define read_lock(lock) ({preempt_disable(); _raw_read_lock(lock);})
+#define read_unlock(lock) ({_raw_read_unlock(lock); preempt_enable();})
+#define write_lock(lock) ({preempt_disable(); _raw_write_lock(lock);})
+#define write_unlock(lock) ({_raw_write_unlock(lock); preempt_enable();})
+#define write_trylock(lock) ({preempt_disable();_raw_write_trylock(lock) ? \
+ 1 : ({preempt_enable(); 0;});})
+
+#else
+
+#define preempt_get_count() do { } while (0)
+#define preempt_disable() do { } while (0)
+#define preempt_enable_no_resched() do {} while(0)
+#define preempt_enable() do { } while (0)
+
+#define spin_lock(lock) _raw_spin_lock(lock)
+#define spin_trylock(lock) _raw_spin_trylock(lock)
+#define spin_unlock(lock) _raw_spin_unlock(lock)
+
+#define read_lock(lock) _raw_read_lock(lock)
+#define read_unlock(lock) _raw_read_unlock(lock)
+#define write_lock(lock) _raw_write_lock(lock)
+#define write_unlock(lock) _raw_write_unlock(lock)
+#define write_trylock(lock) _raw_write_trylock(lock)
+#endif
+
/* "lock on reference count zero" */
#ifndef ATOMIC_DEC_AND_LOCK
#include <asm/atomic.h>
diff --git a/kernel/exit.c b/kernel/exit.c
index 6b5a7cba048e..e5e631714afc 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -390,8 +390,8 @@ static inline void __exit_mm(struct task_struct * tsk)
/* more a memory barrier than a real lock */
task_lock(tsk);
tsk->mm = NULL;
- task_unlock(tsk);
enter_lazy_tlb(mm, current, smp_processor_id());
+ task_unlock(tsk);
mmput(mm);
}
}
diff --git a/kernel/fork.c b/kernel/fork.c
index 3e49ad5c1ebc..9ac534b70d1f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -650,6 +650,13 @@ int do_fork(unsigned long clone_flags, unsigned long stack_start,
if (p->binfmt && p->binfmt->module)
__MOD_INC_USE_COUNT(p->binfmt->module);
+#ifdef CONFIG_PREEMPT
+ /*
+ * schedule_tail drops this_rq()->lock so we compensate with a count
+ * of 1. Also, we want to start with kernel preemption disabled.
+ */
+ p->thread_info->preempt_count = 1;
+#endif
p->did_exec = 0;
p->swappable = 0;
p->state = TASK_UNINTERRUPTIBLE;
diff --git a/kernel/ksyms.c b/kernel/ksyms.c
index ae89152ce936..41635b99dafe 100644
--- a/kernel/ksyms.c
+++ b/kernel/ksyms.c
@@ -445,6 +445,9 @@ EXPORT_SYMBOL(sleep_on_timeout);
EXPORT_SYMBOL(interruptible_sleep_on);
EXPORT_SYMBOL(interruptible_sleep_on_timeout);
EXPORT_SYMBOL(schedule);
+#ifdef CONFIG_PREEMPT
+EXPORT_SYMBOL(preempt_schedule);
+#endif
EXPORT_SYMBOL(schedule_timeout);
EXPORT_SYMBOL(sys_sched_yield);
EXPORT_SYMBOL(set_user_nice);
diff --git a/kernel/sched.c b/kernel/sched.c
index 56fb9a54e7fe..a8cf63321a11 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -61,10 +61,12 @@ static inline runqueue_t *lock_task_rq(task_t *p, unsigned long *flags)
struct runqueue *__rq;
repeat_lock_task:
+ preempt_disable();
__rq = task_rq(p);
spin_lock_irqsave(&__rq->lock, *flags);
if (unlikely(__rq != task_rq(p))) {
spin_unlock_irqrestore(&__rq->lock, *flags);
+ preempt_enable();
goto repeat_lock_task;
}
return __rq;
@@ -73,6 +75,7 @@ repeat_lock_task:
static inline void unlock_task_rq(runqueue_t *rq, unsigned long *flags)
{
spin_unlock_irqrestore(&rq->lock, *flags);
+ preempt_enable();
}
/*
* Adding/removing a task to/from a priority array:
@@ -195,6 +198,7 @@ static inline void resched_task(task_t *p)
#ifdef CONFIG_SMP
int need_resched, nrpolling;
+ preempt_disable();
/* minimise the chance of sending an interrupt to poll_idle() */
nrpolling = test_tsk_thread_flag(p,TIF_POLLING_NRFLAG);
need_resched = test_and_set_tsk_thread_flag(p,TIF_NEED_RESCHED);
@@ -202,6 +206,7 @@ static inline void resched_task(task_t *p)
if (!need_resched && !nrpolling && (p->thread_info->cpu != smp_processor_id()))
smp_send_reschedule(p->thread_info->cpu);
+ preempt_enable();
#else
set_tsk_need_resched(p);
#endif
@@ -219,6 +224,7 @@ void wait_task_inactive(task_t * p)
runqueue_t *rq;
repeat:
+ preempt_disable();
rq = task_rq(p);
while (unlikely(rq->curr == p)) {
cpu_relax();
@@ -227,9 +233,11 @@ repeat:
rq = lock_task_rq(p, &flags);
if (unlikely(rq->curr == p)) {
unlock_task_rq(rq, &flags);
+ preempt_enable();
goto repeat;
}
unlock_task_rq(rq, &flags);
+ preempt_enable();
}
/*
@@ -295,7 +303,10 @@ int wake_up_process(task_t * p)
void wake_up_forked_process(task_t * p)
{
- runqueue_t *rq = this_rq();
+ runqueue_t *rq;
+
+ preempt_disable();
+ rq = this_rq();
p->state = TASK_RUNNING;
if (!rt_task(p)) {
@@ -308,6 +319,7 @@ void wake_up_forked_process(task_t * p)
p->thread_info->cpu = smp_processor_id();
activate_task(p, rq);
spin_unlock_irq(&rq->lock);
+ preempt_enable();
}
asmlinkage void schedule_tail(task_t *prev)
@@ -635,17 +647,31 @@ void scheduling_functions_start_here(void) { }
*/
asmlinkage void schedule(void)
{
- task_t *prev = current, *next;
- runqueue_t *rq = this_rq();
+ task_t *prev, *next;
+ runqueue_t *rq;
prio_array_t *array;
list_t *queue;
int idx;
if (unlikely(in_interrupt()))
BUG();
+
+ preempt_disable();
+ prev = current;
+ rq = this_rq();
+
release_kernel_lock(prev, smp_processor_id());
spin_lock_irq(&rq->lock);
+#ifdef CONFIG_PREEMPT
+ /*
+ * if entering from preempt_schedule, off a kernel preemption,
+ * go straight to picking the next task.
+ */
+ if (unlikely(preempt_get_count() & PREEMPT_ACTIVE))
+ goto pick_next_task;
+#endif
+
switch (prev->state) {
case TASK_RUNNING:
prev->sleep_timestamp = jiffies;
@@ -659,7 +685,7 @@ asmlinkage void schedule(void)
default:
deactivate_task(prev, rq);
}
-#if CONFIG_SMP
+#if CONFIG_SMP || CONFIG_PREEMPT
pick_next_task:
#endif
if (unlikely(!rq->nr_running)) {
@@ -707,9 +733,25 @@ switch_tasks:
spin_unlock_irq(&rq->lock);
reacquire_kernel_lock(current);
+ preempt_enable_no_resched();
return;
}
+#ifdef CONFIG_PREEMPT
+/*
+ * this is is the entry point to schedule() from in-kernel preemption.
+ */
+asmlinkage void preempt_schedule(void)
+{
+ do {
+ current_thread_info()->preempt_count += PREEMPT_ACTIVE;
+ schedule();
+ current_thread_info()->preempt_count -= PREEMPT_ACTIVE;
+ barrier();
+ } while (test_thread_flag(TIF_NEED_RESCHED));
+}
+#endif /* CONFIG_PREEMPT */
+
/*
* The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just
* wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve
@@ -1105,9 +1147,12 @@ out_unlock:
asmlinkage long sys_sched_yield(void)
{
- runqueue_t *rq = this_rq();
+ runqueue_t *rq;
prio_array_t *array;
+ preempt_disable();
+ rq = this_rq();
+
/*
* Decrease the yielding task's priority by one, to avoid
* livelocks. This priority loss is temporary, it's recovered
@@ -1134,6 +1179,7 @@ asmlinkage long sys_sched_yield(void)
__set_bit(current->prio, array->bitmap);
}
spin_unlock(&rq->lock);
+ preempt_enable_no_resched();
schedule();
diff --git a/net/socket.c b/net/socket.c
index 2965aa8d0f49..588328c30ff3 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -132,7 +132,7 @@ static struct file_operations socket_file_ops = {
static struct net_proto_family *net_families[NPROTO];
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
static atomic_t net_family_lockct = ATOMIC_INIT(0);
static spinlock_t net_family_lock = SPIN_LOCK_UNLOCKED;