Home Home > GIT Browse
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Renninger <trenn@suse.de>2011-06-17 13:23:54 +0200
committerThomas Renninger <trenn@suse.de>2011-06-17 13:23:54 +0200
commitaa3d6e2b140aac24a432f830d30047b1842aed0b (patch)
tree691f7d843907f13bd9ae053f3808a540aefebcf0
parentbc3a79d83165d5cca2759b0e4d3ec1427d5058fa (diff)
- stop_machine: implement stop_machine_from_offline_cpu()
(bnc#697859). - stop_machine: kill __stop_machine() (bnc#697859). - x86, mtrr: lock stop machine during MTRR rendezvous sequence (bnc#697859). - stop_machine: reorganize stop_cpus() implementation (bnc#697859). - x86, mtrr: use __stop_machine() for doing MTRR rendezvous (bnc#697859).
-rw-r--r--patches.arch/implement_stop_machine_from_offline_cpu.patch156
-rw-r--r--patches.arch/kill-__stop_machine.patch184
-rw-r--r--patches.arch/mtrr_stop_machine_quick_fix.patch109
-rw-r--r--patches.arch/reorganize_stop_cpus.patch68
-rw-r--r--patches.arch/use_stop_machine_for_mtrr_rendezvous.patch288
-rw-r--r--series.conf7
6 files changed, 810 insertions, 2 deletions
diff --git a/patches.arch/implement_stop_machine_from_offline_cpu.patch b/patches.arch/implement_stop_machine_from_offline_cpu.patch
new file mode 100644
index 0000000000..acea4a6d6b
--- /dev/null
+++ b/patches.arch/implement_stop_machine_from_offline_cpu.patch
@@ -0,0 +1,156 @@
+From: Tejun Heo <tj@kernel.org>
+Subject: stop_machine: implement stop_machine_from_offline_cpu()
+References: bnc#697859
+Patch-Mainline: not yet
+
+Signed-off-by: Thomas Renninger <trenn@suse.de>
+
+Currently, mtrr wants stop_machine functionality while a CPU is being
+brought up. As stop_machine() requires the calling CPU to be online,
+mtrr implements its own stop_machine using stop_one_cpu() on each
+online CPU. This doesn't only unnecessarily duplicate complex logic
+but also introduces a possibility of deadlock when it races against
+the generic stop_machine().
+
+This patch implements stop_machine_from_offline_cpu() to serve such
+use cases. Its functionality is basically the same to stop_machine();
+however, it should be called from a CPU which isn't online and doesn't
+depend on working scheduling on the calling CPU.
+
+This is achieved by using busy loops for synchronization and
+open-coding stop_cpus queueing and waiting with direct invocation of
+fn() for local CPU inbetween.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
+Cc: Ingo Molnar <mingo@elte.hu>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
+---
+ include/linux/stop_machine.h | 14 ++++++++-
+ kernel/stop_machine.c | 62 ++++++++++++++++++++++++++++++++++++++++++-
+ 2 files changed, 73 insertions(+), 3 deletions(-)
+
+Index: linux-2.6-tip/include/linux/stop_machine.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/stop_machine.h
++++ linux-2.6-tip/include/linux/stop_machine.h
+@@ -101,18 +101,28 @@ static inline int try_stop_cpus(const st
+ #if defined(CONFIG_STOP_MACHINE) && defined(CONFIG_SMP)
+
+ int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
++int stop_machine_from_offline_cpu(int (*fn)(void *), void *data,
++ const struct cpumask *cpus);
+
+ #else /* CONFIG_STOP_MACHINE && CONFIG_SMP */
+
+ static inline int stop_machine(int (*fn)(void *), void *data,
+ const struct cpumask *cpus)
+ {
++ unsigned long flags;
+ int ret;
+- local_irq_disable();
++
++ local_irq_save(flags);
+ ret = fn(data);
+- local_irq_enable();
++ local_irq_restore(flags);
+ return ret;
+ }
+
++static inline int stop_machine_from_offline_cpu(int (*fn)(void *), void *data,
++ const struct cpumask *cpus)
++{
++ return stop_machine(fn, data, cpus);
++}
++
+ #endif /* CONFIG_STOP_MACHINE && CONFIG_SMP */
+ #endif /* _LINUX_STOP_MACHINE */
+Index: linux-2.6-tip/kernel/stop_machine.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/stop_machine.c
++++ linux-2.6-tip/kernel/stop_machine.c
+@@ -439,8 +439,15 @@ static int stop_machine_cpu_stop(void *d
+ struct stop_machine_data *smdata = data;
+ enum stopmachine_state curstate = STOPMACHINE_NONE;
+ int cpu = smp_processor_id(), err = 0;
++ unsigned long flags;
+ bool is_active;
+
++ /*
++ * When called from stop_machine_from_offline_cpu(), irq might
++ * already be disabled. Save the state and restore it on exit.
++ */
++ local_save_flags(flags);
++
+ if (!smdata->active_cpus)
+ is_active = cpu == cpumask_first(cpu_online_mask);
+ else
+@@ -468,7 +475,7 @@ static int stop_machine_cpu_stop(void *d
+ }
+ } while (curstate != STOPMACHINE_EXIT);
+
+- local_irq_enable();
++ local_irq_restore(flags);
+ return err;
+ }
+
+@@ -511,4 +518,57 @@ int stop_machine(int (*fn)(void *), void
+ }
+ EXPORT_SYMBOL_GPL(stop_machine);
+
++/**
++ * stop_machine_from_offline_cpu - stop_machine() from offline CPU
++ * @fn: the function to run
++ * @data: the data ptr for the @fn()
++ * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
++ *
++ * This is identical to stop_machine() but can be called from a CPU which
++ * isn't online. The local CPU is in the process of hotplug (so no other
++ * CPU hotplug can start) and not marked online and doesn't have enough
++ * context to sleep.
++ *
++ * This function provides stop_machine() functionality for such state by
++ * using busy-wait for synchronization and executing @fn directly for local
++ * CPU.
++ *
++ * CONTEXT:
++ * Local CPU is offline. Temporarily stops all online CPUs.
++ *
++ * RETURNS:
++ * 0 if all executions of @fn returned 0, any non zero return value if any
++ * returned non zero.
++ */
++int stop_machine_from_offline_cpu(int (*fn)(void *), void *data,
++ const struct cpumask *cpus)
++{
++ struct stop_machine_data smdata = { .fn = fn, .data = data,
++ .active_cpus = cpus };
++ struct cpu_stop_done done;
++ int ret;
++
++ /* Local CPU must be offline and CPU hotplug in progress. */
++ BUG_ON(cpu_online(raw_smp_processor_id()));
++ smdata.num_threads = num_online_cpus() + 1; /* +1 for local */
++
++ /* No proper task established and can't sleep - busy wait for lock. */
++ while (!mutex_trylock(&stop_cpus_mutex))
++ cpu_relax();
++
++ /* Schedule work on other CPUs and execute directly for local CPU */
++ set_state(&smdata, STOPMACHINE_PREPARE);
++ cpu_stop_init_done(&done, num_online_cpus());
++ queue_stop_cpus_work(cpu_online_mask, stop_machine_cpu_stop, &smdata,
++ &done);
++ ret = stop_machine_cpu_stop(&smdata);
++
++ /* Busy wait for completion. */
++ while (!completion_done(&done.completion))
++ cpu_relax();
++
++ mutex_unlock(&stop_cpus_mutex);
++ return ret ?: done.ret;
++}
++
+ #endif /* CONFIG_STOP_MACHINE */
diff --git a/patches.arch/kill-__stop_machine.patch b/patches.arch/kill-__stop_machine.patch
new file mode 100644
index 0000000000..032d0112ca
--- /dev/null
+++ b/patches.arch/kill-__stop_machine.patch
@@ -0,0 +1,184 @@
+From: Tejun Heo <tj@kernel.org>
+Subject: stop_machine: kill __stop_machine()
+References: bnc#697859
+Patch-Mainline: not yet
+
+Signed-off-by: Thomas Renninger <trenn@suse.de>
+
+stop_machine() is different from __stop_machine() in that it
+automatically calls get/put_online_cpus() to disable CPU hotplug. For
+__stop_machine(), the caller is responsible for achieving exclusion
+against CPU hotplug using either get/put_online_cpus() or
+cpu_hotplug_begin/done().
+
+However, get_online_cpus() can nest safely inside both another
+get_online_cpus() or cpu_hotplug_begin(); thus, it's safe to use
+stop_machine() instead of __stop_machine() making the distinction
+pointless - the overhead of extra get/put_online_cpus() is negligible
+compared to stop_machine and they basically become noop if hotplug is
+in progress.
+
+This patch converts all current __stop_machine() users to
+stop_machine() and kills __stop_machine(). While at it, move function
+comment for stop_machine() from function declaration to definition and
+update it slightly.
+
+Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Cc: x86@kernel.org
+Cc: Ingo Molnar <mingo@elte.hu>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
+---
+ arch/x86/kernel/alternative.c | 5 ++---
+ include/linux/stop_machine.h | 34 ++--------------------------------
+ kernel/cpu.c | 2 +-
+ kernel/stop_machine.c | 38 +++++++++++++++++++++++++++-----------
+ 4 files changed, 32 insertions(+), 47 deletions(-)
+
+Index: linux-2.6-tip/arch/x86/kernel/alternative.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/alternative.c
++++ linux-2.6-tip/arch/x86/kernel/alternative.c
+@@ -719,8 +719,7 @@ void *__kprobes text_poke_smp(void *addr
+ tpp.nparams = 1;
+ atomic_set(&stop_machine_first, 1);
+ wrote_text = 0;
+- /* Use __stop_machine() because the caller already got online_cpus. */
+- __stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask);
++ stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask);
+ return addr;
+ }
+
+@@ -741,5 +740,5 @@ void __kprobes text_poke_smp_batch(struc
+
+ atomic_set(&stop_machine_first, 1);
+ wrote_text = 0;
+- __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
++ stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
+ }
+Index: linux-2.6-tip/include/linux/stop_machine.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/stop_machine.h
++++ linux-2.6-tip/include/linux/stop_machine.h
+@@ -100,36 +100,12 @@ static inline int try_stop_cpus(const st
+ */
+ #if defined(CONFIG_STOP_MACHINE) && defined(CONFIG_SMP)
+
+-/**
+- * stop_machine: freeze the machine on all CPUs and run this function
+- * @fn: the function to run
+- * @data: the data ptr for the @fn()
+- * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
+- *
+- * Description: This causes a thread to be scheduled on every cpu,
+- * each of which disables interrupts. The result is that no one is
+- * holding a spinlock or inside any other preempt-disabled region when
+- * @fn() runs.
+- *
+- * This can be thought of as a very heavy write lock, equivalent to
+- * grabbing every spinlock in the kernel. */
+ int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
+
+-/**
+- * __stop_machine: freeze the machine on all CPUs and run this function
+- * @fn: the function to run
+- * @data: the data ptr for the @fn
+- * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
+- *
+- * Description: This is a special version of the above, which assumes cpus
+- * won't come or go while it's being called. Used by hotplug cpu.
+- */
+-int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
+-
+ #else /* CONFIG_STOP_MACHINE && CONFIG_SMP */
+
+-static inline int __stop_machine(int (*fn)(void *), void *data,
+- const struct cpumask *cpus)
++static inline int stop_machine(int (*fn)(void *), void *data,
++ const struct cpumask *cpus)
+ {
+ int ret;
+ local_irq_disable();
+@@ -138,11 +114,5 @@ static inline int __stop_machine(int (*f
+ return ret;
+ }
+
+-static inline int stop_machine(int (*fn)(void *), void *data,
+- const struct cpumask *cpus)
+-{
+- return __stop_machine(fn, data, cpus);
+-}
+-
+ #endif /* CONFIG_STOP_MACHINE && CONFIG_SMP */
+ #endif /* _LINUX_STOP_MACHINE */
+Index: linux-2.6-tip/kernel/cpu.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/cpu.c
++++ linux-2.6-tip/kernel/cpu.c
+@@ -235,7 +235,7 @@ static int __ref _cpu_down(unsigned int
+ goto out_release;
+ }
+
+- err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
++ err = stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
+ if (err) {
+ /* CPU didn't die: tell everyone. Can't complain. */
+ cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu);
+Index: linux-2.6-tip/kernel/stop_machine.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/stop_machine.c
++++ linux-2.6-tip/kernel/stop_machine.c
+@@ -464,24 +464,40 @@ static int stop_machine_cpu_stop(void *d
+ return err;
+ }
+
+-int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
++/**
++ * stop_machine - freeze the machine on all online CPUs and run this function
++ * @fn: the function to run
++ * @data: the data ptr for the @fn()
++ * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
++ *
++ * This causes a thread to be scheduled on every cpu, each of which
++ * disables interrupts. The result is that no one is holding a spinlock or
++ * inside any other preempt-disabled region when @fn() runs.
++ *
++ * This can be thought of as a very heavy write lock, equivalent to
++ * grabbing every spinlock in the kernel.
++ *
++ * CONTEXT:
++ * Might sleep. Temporarily stops all online CPUs.
++ *
++ * RETURNS:
++ * 0 if all executions of @fn returned 0, any non zero return value if any
++ * returned non zero.
++ */
++int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
+ {
+ struct stop_machine_data smdata = { .fn = fn, .data = data,
+- .num_threads = num_online_cpus(),
+ .active_cpus = cpus };
+-
+- /* Set the initial state and stop all online cpus. */
+- set_state(&smdata, STOPMACHINE_PREPARE);
+- return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata);
+-}
+-
+-int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
+-{
+ int ret;
+
+ /* No CPUs can come up or down during this. */
+ get_online_cpus();
+- ret = __stop_machine(fn, data, cpus);
++ smdata.num_threads = num_online_cpus(),
++
++ /* Set the initial state and stop all online cpus. */
++ set_state(&smdata, STOPMACHINE_PREPARE);
++ ret = stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata);
++
+ put_online_cpus();
+ return ret;
+ }
diff --git a/patches.arch/mtrr_stop_machine_quick_fix.patch b/patches.arch/mtrr_stop_machine_quick_fix.patch
new file mode 100644
index 0000000000..f761ac82e9
--- /dev/null
+++ b/patches.arch/mtrr_stop_machine_quick_fix.patch
@@ -0,0 +1,109 @@
+From: Suresh Siddha <suresh.b.siddha@intel.com>
+Subject: x86, mtrr: lock stop machine during MTRR rendezvous sequence
+References: bnc#697859
+Patch-Mainline: not yet
+
+Signed-off-by: Thomas Renninger <trenn@suse.de>
+
+MTRR rendezvous sequence using stop_one_cpu_nowait() can potentially
+happen in parallel with another system wide rendezvous using
+stop_machine(). This can lead to deadlock (The order in which
+works are queued can be different on different cpu's. Some cpu's
+will be running the first rendezvous handler and others will be running
+the second rendezvous handler. Each set waiting for the other set to join
+for the system wide rendezvous, leading to a deadlock).
+
+MTRR rendezvous sequence is not implemened using stop_machine() as this
+gets called both from the process context aswell as the cpu online paths
+(where the cpu has not come online and the interrupts are disabled etc).
+stop_machine() works with only online cpus.
+
+For now, take the stop_machine mutex in the MTRR rendezvous sequence that
+gets called from an online cpu (here we are in the process context
+and can potentially sleep while taking the mutex). And the MTRR rendezvous
+that gets triggered during cpu online doesn't need to take this stop_machine
+lock (as the stop_machine() already ensures that there is no cpu hotplug
+going on in parallel by doing get_online_cpus())
+
+ TBD: Pursue a cleaner solution of extending the stop_machine()
+ infrastructure to handle the case where the calling cpu is
+ still not online and use this for MTRR rendezvous sequence.
+
+fixes: https://bugzilla.novell.com/show_bug.cgi?id=672008
+
+(will be forwarded to stable series for inclusion in kernels v2.6.35-v2.6.39
+ after some testing in mainline).
+
+Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
+---
+ arch/x86/kernel/cpu/mtrr/main.c | 19 ++++++++++++++++++-
+ include/linux/stop_machine.h | 2 ++
+ kernel/stop_machine.c | 2 +-
+ 3 files changed, 21 insertions(+), 2 deletions(-)
+
+Index: linux-2.6-tip/arch/x86/kernel/cpu/mtrr/main.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/cpu/mtrr/main.c
++++ linux-2.6-tip/arch/x86/kernel/cpu/mtrr/main.c
+@@ -244,9 +244,22 @@ static inline int types_compatible(mtrr_
+ static void
+ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type)
+ {
++ int cpu = raw_smp_processor_id();
++ int online = cpu_online(cpu);
+ struct set_mtrr_data data;
+ unsigned long flags;
+- int cpu;
++
++#ifdef CONFIG_SMP
++ /*
++ * If we are not yet online, then there can be no stop_machine() in
++ * parallel. Stop machine ensures this by using get_online_cpus().
++ *
++ * Otherwise, we need to prevent a stop_machine() happening in parallel
++ * by taking this lock.
++ */
++ if (online)
++ mutex_lock(&stop_cpus_mutex);
++#endif
+
+ preempt_disable();
+
+@@ -330,6 +343,10 @@ set_mtrr(unsigned int reg, unsigned long
+
+ local_irq_restore(flags);
+ preempt_enable();
++#ifdef CONFIG_SMP
++ if (online)
++ mutex_unlock(&stop_cpus_mutex);
++#endif
+ }
+
+ /**
+Index: linux-2.6-tip/include/linux/stop_machine.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/stop_machine.h
++++ linux-2.6-tip/include/linux/stop_machine.h
+@@ -27,6 +27,8 @@ struct cpu_stop_work {
+ struct cpu_stop_done *done;
+ };
+
++extern struct mutex stop_cpus_mutex;
++
+ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg);
+ void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
+ struct cpu_stop_work *work_buf);
+Index: linux-2.6-tip/kernel/stop_machine.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/stop_machine.c
++++ linux-2.6-tip/kernel/stop_machine.c
+@@ -132,8 +132,8 @@ void stop_one_cpu_nowait(unsigned int cp
+ cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), work_buf);
+ }
+
++DEFINE_MUTEX(stop_cpus_mutex);
+ /* static data for stop_cpus */
+-static DEFINE_MUTEX(stop_cpus_mutex);
+ static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work);
+
+ int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
diff --git a/patches.arch/reorganize_stop_cpus.patch b/patches.arch/reorganize_stop_cpus.patch
new file mode 100644
index 0000000000..677b0abb96
--- /dev/null
+++ b/patches.arch/reorganize_stop_cpus.patch
@@ -0,0 +1,68 @@
+From: Tejun Heo <tj@kernel.org>
+Subject: stop_machine: reorganize stop_cpus() implementation
+References: bnc#697859
+Patch-Mainline: not yet
+
+Signed-off-by: Thomas Renninger <trenn@suse.de>
+
+Refactor the queueing part of the stop cpus work from __stop_cpus() into
+queue_stop_cpus_work().
+
+The reorganization is to help future improvements to stop_machine()
+and doesn't introduce any behavior difference.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
+Cc: Ingo Molnar <mingo@elte.hu>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
+---
+ kernel/stop_machine.c | 16 ++++++++++++----
+ 1 file changed, 12 insertions(+), 4 deletions(-)
+
+Index: linux-2.6-tip/kernel/stop_machine.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/stop_machine.c
++++ linux-2.6-tip/kernel/stop_machine.c
+@@ -136,10 +136,11 @@ DEFINE_MUTEX(stop_cpus_mutex);
+ /* static data for stop_cpus */
+ static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work);
+
+-int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
++static void queue_stop_cpus_work(const struct cpumask *cpumask,
++ cpu_stop_fn_t fn, void *arg,
++ struct cpu_stop_done *done)
+ {
+ struct cpu_stop_work *work;
+- struct cpu_stop_done done;
+ unsigned int cpu;
+
+ /* initialize works and done */
+@@ -147,9 +148,8 @@ int __stop_cpus(const struct cpumask *cp
+ work = &per_cpu(stop_cpus_work, cpu);
+ work->fn = fn;
+ work->arg = arg;
+- work->done = &done;
++ work->done = done;
+ }
+- cpu_stop_init_done(&done, cpumask_weight(cpumask));
+
+ /*
+ * Disable preemption while queueing to avoid getting
+@@ -161,7 +161,15 @@ int __stop_cpus(const struct cpumask *cp
+ cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu),
+ &per_cpu(stop_cpus_work, cpu));
+ preempt_enable();
++}
+
++static int __stop_cpus(const struct cpumask *cpumask,
++ cpu_stop_fn_t fn, void *arg)
++{
++ struct cpu_stop_done done;
++
++ cpu_stop_init_done(&done, cpumask_weight(cpumask));
++ queue_stop_cpus_work(cpumask, fn, arg, &done);
+ wait_for_completion(&done.completion);
+ return done.executed ? done.ret : -ENOENT;
+ }
diff --git a/patches.arch/use_stop_machine_for_mtrr_rendezvous.patch b/patches.arch/use_stop_machine_for_mtrr_rendezvous.patch
new file mode 100644
index 0000000000..6c5cb87334
--- /dev/null
+++ b/patches.arch/use_stop_machine_for_mtrr_rendezvous.patch
@@ -0,0 +1,288 @@
+From: Suresh Siddha <suresh.b.siddha@intel.com>
+Subject: x86, mtrr: use __stop_machine() for doing MTRR rendezvous
+References: bnc#697859
+Patch-Mainline: not yet
+
+Signed-off-by: Thomas Renninger <trenn@suse.de>
+
+MTRR rendezvous sequence is not implemened using stop_machine() before, as this
+gets called both from the process context aswell as the cpu online paths
+(where the cpu has not come online and the interrupts are disabled etc).
+
+Now that __stop_machine() works even when the calling cpu is not online,
+use __stop_machine() to implement the MTRR rendezvous sequence. This
+will consolidate and cleanup the code.
+
+Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
+---
+ arch/x86/kernel/cpu/mtrr/main.c | 186 ++++++++--------------------------------
+ include/linux/stop_machine.h | 2
+ kernel/stop_machine.c | 2
+ 3 files changed, 42 insertions(+), 148 deletions(-)
+
+Index: linux-2.6-tip/arch/x86/kernel/cpu/mtrr/main.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/cpu/mtrr/main.c
++++ linux-2.6-tip/arch/x86/kernel/cpu/mtrr/main.c
+@@ -137,18 +137,15 @@ static void __init init_table(void)
+ }
+
+ struct set_mtrr_data {
+- atomic_t count;
+- atomic_t gate;
+ unsigned long smp_base;
+ unsigned long smp_size;
+ unsigned int smp_reg;
+ mtrr_type smp_type;
+ };
+
+-static DEFINE_PER_CPU(struct cpu_stop_work, mtrr_work);
+-
+ /**
+- * mtrr_work_handler - Synchronisation handler. Executed by "other" CPUs.
++ * mtrr_work_handler - Work done in the synchronisation handler. Executed by
++ * all the CPUs.
+ * @info: pointer to mtrr configuration data
+ *
+ * Returns nothing.
+@@ -157,35 +154,26 @@ static int mtrr_work_handler(void *info)
+ {
+ #ifdef CONFIG_SMP
+ struct set_mtrr_data *data = info;
+- unsigned long flags;
+-
+- atomic_dec(&data->count);
+- while (!atomic_read(&data->gate))
+- cpu_relax();
+-
+- local_irq_save(flags);
+-
+- atomic_dec(&data->count);
+- while (atomic_read(&data->gate))
+- cpu_relax();
+
+- /* The master has cleared me to execute */
++ /*
++ * We use this same function to initialize the mtrrs during boot,
++ * resume, runtime cpu online and on an explicit request to set a
++ * specific MTRR.
++ *
++ * During boot or suspend, the state of the boot cpu's mtrrs has been
++ * saved, and we want to replicate that across all the cpus that come
++ * online (either at the end of boot or resume or during a runtime cpu
++ * online). If we're doing that, @reg is set to something special and on
++ * all the cpu's we do mtrr_if->set_all() (On the logical cpu that
++ * started the boot/resume sequence, this might be a duplicate
++ * set_all()).
++ */
+ if (data->smp_reg != ~0U) {
+ mtrr_if->set(data->smp_reg, data->smp_base,
+ data->smp_size, data->smp_type);
+- } else if (mtrr_aps_delayed_init) {
+- /*
+- * Initialize the MTRRs inaddition to the synchronisation.
+- */
++ } else if (mtrr_aps_delayed_init || !cpu_online(smp_processor_id())) {
+ mtrr_if->set_all();
+ }
+-
+- atomic_dec(&data->count);
+- while (!atomic_read(&data->gate))
+- cpu_relax();
+-
+- atomic_dec(&data->count);
+- local_irq_restore(flags);
+ #endif
+ return 0;
+ }
+@@ -223,20 +211,11 @@ static inline int types_compatible(mtrr_
+ * 14. Wait for buddies to catch up
+ * 15. Enable interrupts.
+ *
+- * What does that mean for us? Well, first we set data.count to the number
+- * of CPUs. As each CPU announces that it started the rendezvous handler by
+- * decrementing the count, We reset data.count and set the data.gate flag
+- * allowing all the cpu's to proceed with the work. As each cpu disables
+- * interrupts, it'll decrement data.count once. We wait until it hits 0 and
+- * proceed. We clear the data.gate flag and reset data.count. Meanwhile, they
+- * are waiting for that flag to be cleared. Once it's cleared, each
+- * CPU goes through the transition of updating MTRRs.
+- * The CPU vendors may each do it differently,
+- * so we call mtrr_if->set() callback and let them take care of it.
+- * When they're done, they again decrement data->count and wait for data.gate
+- * to be set.
+- * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag
+- * Everyone then enables interrupts and we all continue on.
++ * What does that mean for us? Well, stop_machine() will ensure that
++ * the rendezvous handler is started on each CPU. And in lockstep they
++ * do the state transition of disabling interrupts, updating MTRR's
++ * (the CPU vendors may each do it differently, so we call mtrr_if->set()
++ * callback and let them take care of it.) and enabling interrupts.
+ *
+ * Note that the mechanism is the same for UP systems, too; all the SMP stuff
+ * becomes nops.
+@@ -244,109 +223,26 @@ static inline int types_compatible(mtrr_
+ static void
+ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type)
+ {
+- int cpu = raw_smp_processor_id();
+- int online = cpu_online(cpu);
+- struct set_mtrr_data data;
+- unsigned long flags;
+-
+-#ifdef CONFIG_SMP
+- /*
+- * If we are not yet online, then there can be no stop_machine() in
+- * parallel. Stop machine ensures this by using get_online_cpus().
+- *
+- * Otherwise, we need to prevent a stop_machine() happening in parallel
+- * by taking this lock.
+- */
+- if (online)
+- mutex_lock(&stop_cpus_mutex);
+-#endif
+-
+- preempt_disable();
+-
+- data.smp_reg = reg;
+- data.smp_base = base;
+- data.smp_size = size;
+- data.smp_type = type;
+- atomic_set(&data.count, num_booting_cpus() - 1);
+-
+- /* Make sure data.count is visible before unleashing other CPUs */
+- smp_wmb();
+- atomic_set(&data.gate, 0);
+-
+- /* Start the ball rolling on other CPUs */
+- for_each_online_cpu(cpu) {
+- struct cpu_stop_work *work = &per_cpu(mtrr_work, cpu);
++ struct set_mtrr_data data = { .smp_reg = reg,
++ .smp_base = base,
++ .smp_size = size,
++ .smp_type = type
++ };
++
++ stop_machine(mtrr_work_handler, &data, cpu_online_mask);
++}
++
++static void set_mtrr_from_offline_cpu(unsigned int reg, unsigned long base,
++ unsigned long size, mtrr_type type)
++{
++ struct set_mtrr_data data = { .smp_reg = reg,
++ .smp_base = base,
++ .smp_size = size,
++ .smp_type = type
++ };
+
+- if (cpu == smp_processor_id())
+- continue;
+-
+- stop_one_cpu_nowait(cpu, mtrr_work_handler, &data, work);
+- }
+-
+-
+- while (atomic_read(&data.count))
+- cpu_relax();
+-
+- /* Ok, reset count and toggle gate */
+- atomic_set(&data.count, num_booting_cpus() - 1);
+- smp_wmb();
+- atomic_set(&data.gate, 1);
+-
+- local_irq_save(flags);
+-
+- while (atomic_read(&data.count))
+- cpu_relax();
+-
+- /* Ok, reset count and toggle gate */
+- atomic_set(&data.count, num_booting_cpus() - 1);
+- smp_wmb();
+- atomic_set(&data.gate, 0);
+-
+- /* Do our MTRR business */
+-
+- /*
+- * HACK!
+- *
+- * We use this same function to initialize the mtrrs during boot,
+- * resume, runtime cpu online and on an explicit request to set a
+- * specific MTRR.
+- *
+- * During boot or suspend, the state of the boot cpu's mtrrs has been
+- * saved, and we want to replicate that across all the cpus that come
+- * online (either at the end of boot or resume or during a runtime cpu
+- * online). If we're doing that, @reg is set to something special and on
+- * this cpu we still do mtrr_if->set_all(). During boot/resume, this
+- * is unnecessary if at this point we are still on the cpu that started
+- * the boot/resume sequence. But there is no guarantee that we are still
+- * on the same cpu. So we do mtrr_if->set_all() on this cpu aswell to be
+- * sure that we are in sync with everyone else.
+- */
+- if (reg != ~0U)
+- mtrr_if->set(reg, base, size, type);
+- else
+- mtrr_if->set_all();
+-
+- /* Wait for the others */
+- while (atomic_read(&data.count))
+- cpu_relax();
+-
+- atomic_set(&data.count, num_booting_cpus() - 1);
+- smp_wmb();
+- atomic_set(&data.gate, 1);
+-
+- /*
+- * Wait here for everyone to have seen the gate change
+- * So we're the last ones to touch 'data'
+- */
+- while (atomic_read(&data.count))
+- cpu_relax();
+-
+- local_irq_restore(flags);
+- preempt_enable();
+-#ifdef CONFIG_SMP
+- if (online)
+- mutex_unlock(&stop_cpus_mutex);
+-#endif
++ stop_machine_from_offline_cpu(mtrr_work_handler, &data,
++ cpu_callout_mask);
+ }
+
+ /**
+@@ -800,7 +696,7 @@ void mtrr_ap_init(void)
+ * 2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug
+ * lock to prevent mtrr entry changes
+ */
+- set_mtrr(~0U, 0, 0, 0);
++ set_mtrr_from_offline_cpu(~0U, 0, 0, 0);
+ }
+
+ /**
+Index: linux-2.6-tip/include/linux/stop_machine.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/stop_machine.h
++++ linux-2.6-tip/include/linux/stop_machine.h
+@@ -27,8 +27,6 @@ struct cpu_stop_work {
+ struct cpu_stop_done *done;
+ };
+
+-extern struct mutex stop_cpus_mutex;
+-
+ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg);
+ void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
+ struct cpu_stop_work *work_buf);
+Index: linux-2.6-tip/kernel/stop_machine.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/stop_machine.c
++++ linux-2.6-tip/kernel/stop_machine.c
+@@ -132,8 +132,8 @@ void stop_one_cpu_nowait(unsigned int cp
+ cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), work_buf);
+ }
+
+-DEFINE_MUTEX(stop_cpus_mutex);
+ /* static data for stop_cpus */
++static DEFINE_MUTEX(stop_cpus_mutex);
+ static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work);
+
+ static void queue_stop_cpus_work(const struct cpumask *cpumask,
diff --git a/series.conf b/series.conf
index 23461e72d1..9f0de59b44 100644
--- a/series.conf
+++ b/series.conf
@@ -110,8 +110,11 @@
+needs_update patches.arch/kvm-split-the-KVM-pv-ops-support-by-feature
+needs_update patches.arch/kvm-replace-kvm-io-delay-pv-ops-with-linux-magic
- patches.fixes/x86_mtrr_stop_machine_1_2.patch
- patches.fixes/x86_mtrr_use_stop_machine_2_2.patch
+ patches.arch/mtrr_stop_machine_quick_fix.patch
+ patches.arch/kill-__stop_machine.patch
+ patches.arch/reorganize_stop_cpus.patch
+ patches.arch/implement_stop_machine_from_offline_cpu.patch
+ patches.arch/use_stop_machine_for_mtrr_rendezvous.patch
########################################################
# x86 MCE/MCA (Machine Check Error/Architecture) extensions