Home Home > GIT Browse > openSUSE-15.0
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Beulich <jbeulich@suse.com>2013-07-02 11:34:13 +0200
committerJan Beulich <jbeulich@suse.com>2013-07-02 11:34:13 +0200
commitc5ad3f985e69b5db90c7518b175cb1523f1b7172 (patch)
treed1d1ad365fadc92fecc4724ed7243519b29c2a37
parentf080c74e6a74c7b0875ab304d62d2a07af1557a5 (diff)
- Update Xen patches to 3.10-final.rpm-3.10.0-2.3.gc5ad3f9
-rw-r--r--patches.xen/ipv6-no-autoconf8
-rw-r--r--patches.xen/pci-guestdev22
-rw-r--r--patches.xen/pci-reserve8
-rw-r--r--patches.xen/xen-netback-kernel-threads13
-rw-r--r--patches.xen/xen-x86-EFI9
-rw-r--r--patches.xen/xen3-auto-xen-arch.diff1409
-rw-r--r--patches.xen/xen3-auto-xen-kconfig.diff28
-rw-r--r--patches.xen/xen3-patch-2.6.19197
-rw-r--r--patches.xen/xen3-patch-2.6.2067
-rw-r--r--patches.xen/xen3-patch-2.6.2197
-rw-r--r--patches.xen/xen3-patch-2.6.22452
-rw-r--r--patches.xen/xen3-patch-2.6.23121
-rw-r--r--patches.xen/xen3-patch-2.6.24244
-rw-r--r--patches.xen/xen3-patch-2.6.252579
-rw-r--r--patches.xen/xen3-patch-2.6.261462
-rw-r--r--patches.xen/xen3-patch-2.6.3254
-rw-r--r--patches.xen/xen3-patch-3.10 (renamed from patches.xen/xen3-patch-3.10-rc6)82
-rw-r--r--patches.xen/xen3-patch-3.25
-rw-r--r--patches.xen/xen3-patch-3.317
-rw-r--r--patches.xen/xen3-patch-3.747
-rw-r--r--patches.xen/xen3-patch-3.982
-rw-r--r--series.conf2
22 files changed, 1165 insertions, 5840 deletions
diff --git a/patches.xen/ipv6-no-autoconf b/patches.xen/ipv6-no-autoconf
index b3095067df..1cd217177e 100644
--- a/patches.xen/ipv6-no-autoconf
+++ b/patches.xen/ipv6-no-autoconf
@@ -15,9 +15,9 @@ This patch makes autoconf (DAD and router discovery) depend on the
interface's ability to do multicast. Turning off multicast for an
interface before bringing it up will suppress autoconfiguration.
---- head.orig/net/ipv6/addrconf.c 2013-06-20 14:56:43.000000000 +0200
-+++ head/net/ipv6/addrconf.c 2013-06-20 14:58:40.000000000 +0200
-@@ -3186,6 +3186,7 @@ static void addrconf_dad_start(struct in
+--- head.orig/net/ipv6/addrconf.c 2013-07-02 09:20:54.000000000 +0200
++++ head/net/ipv6/addrconf.c 2013-07-02 09:21:28.000000000 +0200
+@@ -3189,6 +3189,7 @@ static void addrconf_dad_start(struct in
goto out;
if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
@@ -25,7 +25,7 @@ interface before bringing it up will suppress autoconfiguration.
idev->cnf.accept_dad < 1 ||
!(ifp->flags&IFA_F_TENTATIVE) ||
ifp->flags & IFA_F_NODAD) {
-@@ -3288,6 +3289,7 @@ static void addrconf_dad_completed(struc
+@@ -3291,6 +3292,7 @@ static void addrconf_dad_completed(struc
if (ipv6_accept_ra(ifp->idev) &&
ifp->idev->cnf.rtr_solicits > 0 &&
(dev->flags&IFF_LOOPBACK) == 0 &&
diff --git a/patches.xen/pci-guestdev b/patches.xen/pci-guestdev
index 0d7f189d26..d055caabde 100644
--- a/patches.xen/pci-guestdev
+++ b/patches.xen/pci-guestdev
@@ -28,7 +28,7 @@ Acked-by: jbeulich@novell.com
include/uapi/xen/public/iomulti.h | 50 ++
15 files changed, 2522 insertions(+), 1 deletion(-)
---- head.orig/Documentation/kernel-parameters.txt 2013-05-23 08:57:01.000000000 +0200
+--- head.orig/Documentation/kernel-parameters.txt 2013-07-02 09:20:54.000000000 +0200
+++ head/Documentation/kernel-parameters.txt 2013-05-23 15:36:49.000000000 +0200
@@ -978,6 +978,24 @@ bytes respectively. Such letter suffixes
Format: <unsigned int> such that (rxsize & ~0x1fffc0) == 0.
@@ -66,7 +66,7 @@ Acked-by: jbeulich@novell.com
reboot= [BUGS=X86-32,BUGS=ARM,BUGS=IA-64] Rebooting mode
Format: <reboot_mode>[,<reboot_mode2>[,...]]
See arch/*/kernel/reboot.c or arch/*/kernel/process.c
---- head.orig/drivers/acpi/pci_root.c 2013-06-04 11:48:43.000000000 +0200
+--- head.orig/drivers/acpi/pci_root.c 2013-07-02 09:20:54.000000000 +0200
+++ head/drivers/acpi/pci_root.c 2013-06-04 13:47:47.000000000 +0200
@@ -374,6 +374,41 @@ out:
}
@@ -156,7 +156,7 @@ Acked-by: jbeulich@novell.com
+ return FALSE;
+}
+#endif
---- head.orig/drivers/pci/Kconfig 2012-12-11 04:30:57.000000000 +0100
+--- head.orig/drivers/pci/Kconfig 2013-07-02 09:20:54.000000000 +0200
+++ head/drivers/pci/Kconfig 2012-10-22 16:26:32.000000000 +0200
@@ -43,6 +43,20 @@ config PCI_REALLOC_ENABLE_AUTO
@@ -179,7 +179,7 @@ Acked-by: jbeulich@novell.com
config PCI_STUB
tristate "PCI Stub driver"
depends on PCI
---- head.orig/drivers/pci/Makefile 2013-02-19 00:58:34.000000000 +0100
+--- head.orig/drivers/pci/Makefile 2013-07-02 09:20:54.000000000 +0200
+++ head/drivers/pci/Makefile 2011-01-31 14:31:28.000000000 +0100
@@ -7,6 +7,10 @@ obj-y += access.o bus.o probe.o host-br
irq.o vpd.o setup-bus.o
@@ -2104,7 +2104,7 @@ Acked-by: jbeulich@novell.com
+int pci_iomul_switch_io_allocated(const struct pci_iomul_switch *);
+void pci_iomul_get_lock_switch(struct pci_dev *, struct pci_iomul_switch **,
+ struct pci_iomul_slot **);
---- head.orig/drivers/pci/pci.c 2013-05-23 08:56:16.000000000 +0200
+--- head.orig/drivers/pci/pci.c 2013-07-02 09:20:54.000000000 +0200
+++ head/drivers/pci/pci.c 2013-05-23 15:37:06.000000000 +0200
@@ -3760,7 +3760,7 @@ void pci_reassigndev_resource_alignment(
@@ -2115,9 +2115,9 @@ Acked-by: jbeulich@novell.com
return;
if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
---- head.orig/drivers/pci/pci.h 2013-05-23 08:56:16.000000000 +0200
-+++ head/drivers/pci/pci.h 2013-05-23 15:37:09.000000000 +0200
-@@ -312,4 +312,12 @@ static inline int pci_dev_specific_reset
+--- head.orig/drivers/pci/pci.h 2013-07-02 09:20:54.000000000 +0200
++++ head/drivers/pci/pci.h 2013-07-02 09:21:41.000000000 +0200
+@@ -317,4 +317,12 @@ static inline int pci_dev_specific_reset
}
#endif
@@ -2573,7 +2573,7 @@ Acked-by: jbeulich@novell.com
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Isaku Yamahata <yamahata@valinux.co.jp>");
+MODULE_DESCRIPTION("PCI IO space multiplexing driver");
---- head.orig/include/linux/acpi.h 2013-05-23 08:57:01.000000000 +0200
+--- head.orig/include/linux/acpi.h 2013-07-02 09:20:54.000000000 +0200
+++ head/include/linux/acpi.h 2013-05-23 15:37:27.000000000 +0200
@@ -275,6 +275,8 @@ int acpi_check_region(resource_size_t st
@@ -2584,7 +2584,7 @@ Acked-by: jbeulich@novell.com
#ifdef CONFIG_HIBERNATION
void __init acpi_no_s4_hw_signature(void);
#endif
---- head.orig/include/linux/pci.h 2013-05-23 08:56:44.000000000 +0200
+--- head.orig/include/linux/pci.h 2013-07-02 09:20:54.000000000 +0200
+++ head/include/linux/pci.h 2013-05-23 15:37:32.000000000 +0200
@@ -1884,4 +1884,10 @@ static inline struct eeh_dev *pci_dev_to
*/
@@ -2597,7 +2597,7 @@ Acked-by: jbeulich@novell.com
+#endif
+
#endif /* LINUX_PCI_H */
---- head.orig/include/uapi/xen/Kbuild 2012-12-11 04:30:57.000000000 +0100
+--- head.orig/include/uapi/xen/Kbuild 2013-07-02 09:20:54.000000000 +0200
+++ head/include/uapi/xen/Kbuild 2012-10-22 16:25:23.000000000 +0200
@@ -1,3 +1,4 @@
# UAPI Header export list
diff --git a/patches.xen/pci-reserve b/patches.xen/pci-reserve
index 61e68404cc..c1056489d0 100644
--- a/patches.xen/pci-reserve
+++ b/patches.xen/pci-reserve
@@ -50,9 +50,9 @@ Acked-by: jbeulich@novell.com
obj-$(CONFIG_PCI_QUIRKS) += quirks.o
---- head.orig/drivers/pci/pci.h 2013-05-23 15:37:09.000000000 +0200
-+++ head/drivers/pci/pci.h 2013-05-23 15:37:37.000000000 +0200
-@@ -320,4 +320,19 @@ extern int pci_is_iomuldev(struct pci_de
+--- head.orig/drivers/pci/pci.h 2013-07-02 09:21:41.000000000 +0200
++++ head/drivers/pci/pci.h 2013-07-02 09:21:45.000000000 +0200
+@@ -325,4 +325,19 @@ extern int pci_is_iomuldev(struct pci_de
#define pci_is_iomuldev(dev) 0
#endif
@@ -212,7 +212,7 @@ Acked-by: jbeulich@novell.com
+ return 1;
+}
+__setup("pci_reserve=", pci_reserve_setup);
---- head.orig/drivers/pci/setup-bus.c 2013-05-23 08:56:16.000000000 +0200
+--- head.orig/drivers/pci/setup-bus.c 2013-07-02 09:20:54.000000000 +0200
+++ head/drivers/pci/setup-bus.c 2012-10-22 16:33:41.000000000 +0200
@@ -747,7 +747,7 @@ static void pbus_size_io(struct pci_bus
{
diff --git a/patches.xen/xen-netback-kernel-threads b/patches.xen/xen-netback-kernel-threads
index 84f8cb87ce..d71eca177b 100644
--- a/patches.xen/xen-netback-kernel-threads
+++ b/patches.xen/xen-netback-kernel-threads
@@ -67,7 +67,7 @@ Acked-by: jbeulich@novell.com
netif_tx_request_t slots[XEN_NETIF_NR_SLOTS_MIN];
} tx;
-+ wait_queue_head_t netbk_action_wq;
++ wait_queue_head_t action_wq;
+ struct task_struct *task;
+
struct xen_netbk_rx {
@@ -105,15 +105,14 @@ Acked-by: jbeulich@novell.com
int netbk_copy_skb_mode;
-@@ -223,6 +230,23 @@ static void flush_notify_list(netif_t *l
+@@ -223,6 +230,22 @@ static void flush_notify_list(netif_t *l
BUG();
}
+static void netbk_rx_schedule(struct xen_netbk_rx *netbk)
+{
+ if (use_kthreads)
-+ wake_up(&container_of(netbk, struct xen_netbk,
-+ rx)->netbk_action_wq);
++ wake_up(&container_of(netbk, struct xen_netbk, rx)->action_wq);
+ else
+ tasklet_schedule(&netbk->tasklet);
+}
@@ -121,7 +120,7 @@ Acked-by: jbeulich@novell.com
+static void netbk_tx_schedule(struct xen_netbk *netbk)
+{
+ if (use_kthreads)
-+ wake_up(&netbk->netbk_action_wq);
++ wake_up(&netbk->action_wq);
+ else
+ tasklet_schedule(&netbk->tx.tasklet);
+}
@@ -223,7 +222,7 @@ Acked-by: jbeulich@novell.com
+ struct xen_netbk *netbk = &xen_netbk[group];
+
+ while (!kthread_should_stop()) {
-+ wait_event_interruptible(netbk->netbk_action_wq,
++ wait_event_interruptible(netbk->action_wq,
+ rx_work_todo(netbk) ||
+ tx_work_todo(netbk) ||
+ kthread_should_stop());
@@ -259,7 +258,7 @@ Acked-by: jbeulich@novell.com
}
+
+ if (use_kthreads) {
-+ init_waitqueue_head(&netbk->netbk_action_wq);
++ init_waitqueue_head(&netbk->action_wq);
+ netbk->task = kthread_create(netbk_action_thread,
+ (void *)(long)group,
+ "netback/%u", group);
diff --git a/patches.xen/xen-x86-EFI b/patches.xen/xen-x86-EFI
index b6709a7806..5eb54c0f72 100644
--- a/patches.xen/xen-x86-EFI
+++ b/patches.xen/xen-x86-EFI
@@ -77,8 +77,8 @@ References: fate#311376, fate#311529, bnc#578927, bnc#628554
+ccflags-$(CONFIG_XEN) += -fshort-wchar
+disabled-obj-$(CONFIG_XEN) := efi_%$(BITS).o
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
-+++ head/arch/x86/platform/efi/efi-xen.c 2013-06-20 15:58:25.000000000 +0200
-@@ -0,0 +1,661 @@
++++ head/arch/x86/platform/efi/efi-xen.c 2013-06-27 12:13:42.000000000 +0200
+@@ -0,0 +1,664 @@
+/*
+ * Common EFI (Extensible Firmware Interface) support functions
+ * Based on Extensible Firmware Interface Specification version 1.0
@@ -699,7 +699,10 @@ References: fate#311376, fate#311529, bnc#578927, bnc#628554
+ * that by attempting to use more space than is available.
+ */
+ unsigned long dummy_size = remaining_size + 1024;
-+ void *dummy = kmalloc(dummy_size, GFP_ATOMIC|__GFP_ZERO);
++ void *dummy = kzalloc(dummy_size, GFP_ATOMIC);
++
++ if (!dummy)
++ return EFI_OUT_OF_RESOURCES;
+
+ status = xen_efi_set_variable(efi_dummy_name, &EFI_DUMMY_GUID,
+ EFI_VARIABLE_NON_VOLATILE |
diff --git a/patches.xen/xen3-auto-xen-arch.diff b/patches.xen/xen3-auto-xen-arch.diff
index a64a544645..888ebf1f33 100644
--- a/patches.xen/xen3-auto-xen-arch.diff
+++ b/patches.xen/xen3-auto-xen-arch.diff
@@ -20,13 +20,11 @@ places):
+++ linux/arch/x86/kernel/pci-nommu-xen.c
+++ linux/arch/x86/kernel/process-xen.c
+++ linux/arch/x86/kernel/setup-xen.c
-+++ linux/arch/x86/kernel/smp-xen.c
+++ linux/arch/x86/kernel/syscall_32-xen.c
+++ linux/arch/x86/kernel/traps-xen.c
+++ linux/arch/x86/kernel/x86_init-xen.c
+++ linux/arch/x86/lib/cache-smp-xen.c
+++ linux/arch/x86/mm/dump_pagetables-xen.c
-+++ linux/arch/x86/mm/fault-xen.c
+++ linux/arch/x86/mm/init-xen.c
+++ linux/arch/x86/mm/iomap_32-xen.c
+++ linux/arch/x86/mm/pat-xen.c
@@ -7730,614 +7728,6 @@ pick them up (for reference, prefixed with the version the removal occured):
+ * End:
+ */
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
-+++ head/arch/x86/kernel/smp_32-xen.c 2007-12-10 08:47:31.000000000 +0100
-@@ -0,0 +1,605 @@
-+/*
-+ * Intel SMP support routines.
-+ *
-+ * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
-+ * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
-+ *
-+ * This code is released under the GNU General Public License version 2 or
-+ * later.
-+ */
-+
-+#include <linux/init.h>
-+
-+#include <linux/mm.h>
-+#include <linux/delay.h>
-+#include <linux/spinlock.h>
-+#include <linux/smp_lock.h>
-+#include <linux/kernel_stat.h>
-+#include <linux/mc146818rtc.h>
-+#include <linux/cache.h>
-+#include <linux/interrupt.h>
-+#include <linux/cpu.h>
-+#include <linux/module.h>
-+
-+#include <asm/mtrr.h>
-+#include <asm/tlbflush.h>
-+#if 0
-+#include <mach_apic.h>
-+#endif
-+#include <xen/evtchn.h>
-+
-+/*
-+ * Some notes on x86 processor bugs affecting SMP operation:
-+ *
-+ * Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
-+ * The Linux implications for SMP are handled as follows:
-+ *
-+ * Pentium III / [Xeon]
-+ * None of the E1AP-E3AP errata are visible to the user.
-+ *
-+ * E1AP. see PII A1AP
-+ * E2AP. see PII A2AP
-+ * E3AP. see PII A3AP
-+ *
-+ * Pentium II / [Xeon]
-+ * None of the A1AP-A3AP errata are visible to the user.
-+ *
-+ * A1AP. see PPro 1AP
-+ * A2AP. see PPro 2AP
-+ * A3AP. see PPro 7AP
-+ *
-+ * Pentium Pro
-+ * None of 1AP-9AP errata are visible to the normal user,
-+ * except occasional delivery of 'spurious interrupt' as trap #15.
-+ * This is very rare and a non-problem.
-+ *
-+ * 1AP. Linux maps APIC as non-cacheable
-+ * 2AP. worked around in hardware
-+ * 3AP. fixed in C0 and above steppings microcode update.
-+ * Linux does not use excessive STARTUP_IPIs.
-+ * 4AP. worked around in hardware
-+ * 5AP. symmetric IO mode (normal Linux operation) not affected.
-+ * 'noapic' mode has vector 0xf filled out properly.
-+ * 6AP. 'noapic' mode might be affected - fixed in later steppings
-+ * 7AP. We do not assume writes to the LVT deassering IRQs
-+ * 8AP. We do not enable low power mode (deep sleep) during MP bootup
-+ * 9AP. We do not use mixed mode
-+ *
-+ * Pentium
-+ * There is a marginal case where REP MOVS on 100MHz SMP
-+ * machines with B stepping processors can fail. XXX should provide
-+ * an L1cache=Writethrough or L1cache=off option.
-+ *
-+ * B stepping CPUs may hang. There are hardware work arounds
-+ * for this. We warn about it in case your board doesn't have the work
-+ * arounds. Basically thats so I can tell anyone with a B stepping
-+ * CPU and SMP problems "tough".
-+ *
-+ * Specific items [From Pentium Processor Specification Update]
-+ *
-+ * 1AP. Linux doesn't use remote read
-+ * 2AP. Linux doesn't trust APIC errors
-+ * 3AP. We work around this
-+ * 4AP. Linux never generated 3 interrupts of the same priority
-+ * to cause a lost local interrupt.
-+ * 5AP. Remote read is never used
-+ * 6AP. not affected - worked around in hardware
-+ * 7AP. not affected - worked around in hardware
-+ * 8AP. worked around in hardware - we get explicit CS errors if not
-+ * 9AP. only 'noapic' mode affected. Might generate spurious
-+ * interrupts, we log only the first one and count the
-+ * rest silently.
-+ * 10AP. not affected - worked around in hardware
-+ * 11AP. Linux reads the APIC between writes to avoid this, as per
-+ * the documentation. Make sure you preserve this as it affects
-+ * the C stepping chips too.
-+ * 12AP. not affected - worked around in hardware
-+ * 13AP. not affected - worked around in hardware
-+ * 14AP. we always deassert INIT during bootup
-+ * 15AP. not affected - worked around in hardware
-+ * 16AP. not affected - worked around in hardware
-+ * 17AP. not affected - worked around in hardware
-+ * 18AP. not affected - worked around in hardware
-+ * 19AP. not affected - worked around in BIOS
-+ *
-+ * If this sounds worrying believe me these bugs are either ___RARE___,
-+ * or are signal timing bugs worked around in hardware and there's
-+ * about nothing of note with C stepping upwards.
-+ */
-+
-+DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0, };
-+
-+/*
-+ * the following functions deal with sending IPIs between CPUs.
-+ *
-+ * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
-+ */
-+
-+static inline int __prepare_ICR (unsigned int shortcut, int vector)
-+{
-+ unsigned int icr = shortcut | APIC_DEST_LOGICAL;
-+
-+ switch (vector) {
-+ default:
-+ icr |= APIC_DM_FIXED | vector;
-+ break;
-+ case NMI_VECTOR:
-+ icr |= APIC_DM_NMI;
-+ break;
-+ }
-+ return icr;
-+}
-+
-+static inline int __prepare_ICR2 (unsigned int mask)
-+{
-+ return SET_APIC_DEST_FIELD(mask);
-+}
-+
-+DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]);
-+
-+static inline void __send_IPI_one(unsigned int cpu, int vector)
-+{
-+ int irq = per_cpu(ipi_to_irq, cpu)[vector];
-+ BUG_ON(irq < 0);
-+ notify_remote_via_irq(irq);
-+}
-+
-+void __send_IPI_shortcut(unsigned int shortcut, int vector)
-+{
-+ int cpu;
-+
-+ switch (shortcut) {
-+ case APIC_DEST_SELF:
-+ __send_IPI_one(smp_processor_id(), vector);
-+ break;
-+ case APIC_DEST_ALLBUT:
-+ for (cpu = 0; cpu < NR_CPUS; ++cpu) {
-+ if (cpu == smp_processor_id())
-+ continue;
-+ if (cpu_isset(cpu, cpu_online_map)) {
-+ __send_IPI_one(cpu, vector);
-+ }
-+ }
-+ break;
-+ default:
-+ printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut,
-+ vector);
-+ break;
-+ }
-+}
-+
-+void fastcall send_IPI_self(int vector)
-+{
-+ __send_IPI_shortcut(APIC_DEST_SELF, vector);
-+}
-+
-+/*
-+ * This is only used on smaller machines.
-+ */
-+void send_IPI_mask_bitmask(cpumask_t mask, int vector)
-+{
-+ unsigned long flags;
-+ unsigned int cpu;
-+
-+ local_irq_save(flags);
-+ WARN_ON(cpus_addr(mask)[0] & ~cpus_addr(cpu_online_map)[0]);
-+
-+ for (cpu = 0; cpu < NR_CPUS; ++cpu) {
-+ if (cpu_isset(cpu, mask)) {
-+ __send_IPI_one(cpu, vector);
-+ }
-+ }
-+
-+ local_irq_restore(flags);
-+}
-+
-+void send_IPI_mask_sequence(cpumask_t mask, int vector)
-+{
-+
-+ send_IPI_mask_bitmask(mask, vector);
-+}
-+
-+#include <mach_ipi.h> /* must come after the send_IPI functions above for inlining */
-+
-+#if 0 /* XEN */
-+/*
-+ * Smarter SMP flushing macros.
-+ * c/o Linus Torvalds.
-+ *
-+ * These mean you can really definitely utterly forget about
-+ * writing to user space from interrupts. (Its not allowed anyway).
-+ *
-+ * Optimizations Manfred Spraul <manfred@colorfullife.com>
-+ */
-+
-+static cpumask_t flush_cpumask;
-+static struct mm_struct * flush_mm;
-+static unsigned long flush_va;
-+static DEFINE_SPINLOCK(tlbstate_lock);
-+#define FLUSH_ALL 0xffffffff
-+
-+/*
-+ * We cannot call mmdrop() because we are in interrupt context,
-+ * instead update mm->cpu_vm_mask.
-+ *
-+ * We need to reload %cr3 since the page tables may be going
-+ * away from under us..
-+ */
-+static inline void leave_mm (unsigned long cpu)
-+{
-+ if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
-+ BUG();
-+ cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
-+ load_cr3(swapper_pg_dir);
-+}
-+
-+/*
-+ *
-+ * The flush IPI assumes that a thread switch happens in this order:
-+ * [cpu0: the cpu that switches]
-+ * 1) switch_mm() either 1a) or 1b)
-+ * 1a) thread switch to a different mm
-+ * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
-+ * Stop ipi delivery for the old mm. This is not synchronized with
-+ * the other cpus, but smp_invalidate_interrupt ignore flush ipis
-+ * for the wrong mm, and in the worst case we perform a superflous
-+ * tlb flush.
-+ * 1a2) set cpu_tlbstate to TLBSTATE_OK
-+ * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
-+ * was in lazy tlb mode.
-+ * 1a3) update cpu_tlbstate[].active_mm
-+ * Now cpu0 accepts tlb flushes for the new mm.
-+ * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
-+ * Now the other cpus will send tlb flush ipis.
-+ * 1a4) change cr3.
-+ * 1b) thread switch without mm change
-+ * cpu_tlbstate[].active_mm is correct, cpu0 already handles
-+ * flush ipis.
-+ * 1b1) set cpu_tlbstate to TLBSTATE_OK
-+ * 1b2) test_and_set the cpu bit in cpu_vm_mask.
-+ * Atomically set the bit [other cpus will start sending flush ipis],
-+ * and test the bit.
-+ * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
-+ * 2) switch %%esp, ie current
-+ *
-+ * The interrupt must handle 2 special cases:
-+ * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
-+ * - the cpu performs speculative tlb reads, i.e. even if the cpu only
-+ * runs in kernel space, the cpu could load tlb entries for user space
-+ * pages.
-+ *
-+ * The good news is that cpu_tlbstate is local to each cpu, no
-+ * write/read ordering problems.
-+ */
-+
-+/*
-+ * TLB flush IPI:
-+ *
-+ * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
-+ * 2) Leave the mm if we are in the lazy tlb mode.
-+ */
-+
-+irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id,
-+ struct pt_regs *regs)
-+{
-+ unsigned long cpu;
-+
-+ cpu = get_cpu();
-+
-+ if (!cpu_isset(cpu, flush_cpumask))
-+ goto out;
-+ /*
-+ * This was a BUG() but until someone can quote me the
-+ * line from the intel manual that guarantees an IPI to
-+ * multiple CPUs is retried _only_ on the erroring CPUs
-+ * its staying as a return
-+ *
-+ * BUG();
-+ */
-+
-+ if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
-+ if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
-+ if (flush_va == FLUSH_ALL)
-+ local_flush_tlb();
-+ else
-+ __flush_tlb_one(flush_va);
-+ } else
-+ leave_mm(cpu);
-+ }
-+ smp_mb__before_clear_bit();
-+ cpu_clear(cpu, flush_cpumask);
-+ smp_mb__after_clear_bit();
-+out:
-+ put_cpu_no_resched();
-+
-+ return IRQ_HANDLED;
-+}
-+
-+static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
-+ unsigned long va)
-+{
-+ /*
-+ * A couple of (to be removed) sanity checks:
-+ *
-+ * - current CPU must not be in mask
-+ * - mask must exist :)
-+ */
-+ BUG_ON(cpus_empty(cpumask));
-+ BUG_ON(cpu_isset(smp_processor_id(), cpumask));
-+ BUG_ON(!mm);
-+
-+ /* If a CPU which we ran on has gone down, OK. */
-+ cpus_and(cpumask, cpumask, cpu_online_map);
-+ if (cpus_empty(cpumask))
-+ return;
-+
-+ /*
-+ * i'm not happy about this global shared spinlock in the
-+ * MM hot path, but we'll see how contended it is.
-+ * Temporarily this turns IRQs off, so that lockups are
-+ * detected by the NMI watchdog.
-+ */
-+ spin_lock(&tlbstate_lock);
-+
-+ flush_mm = mm;
-+ flush_va = va;
-+#if NR_CPUS <= BITS_PER_LONG
-+ atomic_set_mask(cpumask, &flush_cpumask);
-+#else
-+ {
-+ int k;
-+ unsigned long *flush_mask = (unsigned long *)&flush_cpumask;
-+ unsigned long *cpu_mask = (unsigned long *)&cpumask;
-+ for (k = 0; k < BITS_TO_LONGS(NR_CPUS); ++k)
-+ atomic_set_mask(cpu_mask[k], &flush_mask[k]);
-+ }
-+#endif
-+ /*
-+ * We have to send the IPI only to
-+ * CPUs affected.
-+ */
-+ send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
-+
-+ while (!cpus_empty(flush_cpumask))
-+ /* nothing. lockup detection does not belong here */
-+ mb();
-+
-+ flush_mm = NULL;
-+ flush_va = 0;
-+ spin_unlock(&tlbstate_lock);
-+}
-+
-+void flush_tlb_current_task(void)
-+{
-+ struct mm_struct *mm = current->mm;
-+ cpumask_t cpu_mask;
-+
-+ preempt_disable();
-+ cpu_mask = mm->cpu_vm_mask;
-+ cpu_clear(smp_processor_id(), cpu_mask);
-+
-+ local_flush_tlb();
-+ if (!cpus_empty(cpu_mask))
-+ flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
-+ preempt_enable();
-+}
-+
-+void flush_tlb_mm (struct mm_struct * mm)
-+{
-+ cpumask_t cpu_mask;
-+
-+ preempt_disable();
-+ cpu_mask = mm->cpu_vm_mask;
-+ cpu_clear(smp_processor_id(), cpu_mask);
-+
-+ if (current->active_mm == mm) {
-+ if (current->mm)
-+ local_flush_tlb();
-+ else
-+ leave_mm(smp_processor_id());
-+ }
-+ if (!cpus_empty(cpu_mask))
-+ flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
-+
-+ preempt_enable();
-+}
-+
-+void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
-+{
-+ struct mm_struct *mm = vma->vm_mm;
-+ cpumask_t cpu_mask;
-+
-+ preempt_disable();
-+ cpu_mask = mm->cpu_vm_mask;
-+ cpu_clear(smp_processor_id(), cpu_mask);
-+
-+ if (current->active_mm == mm) {
-+ if(current->mm)
-+ __flush_tlb_one(va);
-+ else
-+ leave_mm(smp_processor_id());
-+ }
-+
-+ if (!cpus_empty(cpu_mask))
-+ flush_tlb_others(cpu_mask, mm, va);
-+
-+ preempt_enable();
-+}
-+EXPORT_SYMBOL(flush_tlb_page);
-+
-+static void do_flush_tlb_all(void* info)
-+{
-+ unsigned long cpu = smp_processor_id();
-+
-+ __flush_tlb_all();
-+ if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
-+ leave_mm(cpu);
-+}
-+
-+void flush_tlb_all(void)
-+{
-+ on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
-+}
-+
-+#endif /* XEN */
-+
-+/*
-+ * this function sends a 'reschedule' IPI to another CPU.
-+ * it goes straight through and wastes no time serializing
-+ * anything. Worst case is that we lose a reschedule ...
-+ */
-+void smp_send_reschedule(int cpu)
-+{
-+ WARN_ON(cpu_is_offline(cpu));
-+ send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
-+}
-+
-+/*
-+ * Structure and data for smp_call_function(). This is designed to minimise
-+ * static memory requirements. It also looks cleaner.
-+ */
-+static DEFINE_SPINLOCK(call_lock);
-+
-+struct call_data_struct {
-+ void (*func) (void *info);
-+ void *info;
-+ atomic_t started;
-+ atomic_t finished;
-+ int wait;
-+};
-+
-+void lock_ipi_call_lock(void)
-+{
-+ spin_lock_irq(&call_lock);
-+}
-+
-+void unlock_ipi_call_lock(void)
-+{
-+ spin_unlock_irq(&call_lock);
-+}
-+
-+static struct call_data_struct *call_data;
-+
-+/**
-+ * smp_call_function(): Run a function on all other CPUs.
-+ * @func: The function to run. This must be fast and non-blocking.
-+ * @info: An arbitrary pointer to pass to the function.
-+ * @nonatomic: currently unused.
-+ * @wait: If true, wait (atomically) until function has completed on other CPUs.
-+ *
-+ * Returns 0 on success, else a negative status code. Does not return until
-+ * remote CPUs are nearly ready to execute <<func>> or are or have executed.
-+ *
-+ * You must not call this function with disabled interrupts or from a
-+ * hardware interrupt handler or from a bottom half handler.
-+ */
-+int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
-+ int wait)
-+{
-+ struct call_data_struct data;
-+ int cpus;
-+
-+ /* Holding any lock stops cpus from going down. */
-+ spin_lock(&call_lock);
-+ cpus = num_online_cpus() - 1;
-+ if (!cpus) {
-+ spin_unlock(&call_lock);
-+ return 0;
-+ }
-+
-+ /* Can deadlock when called with interrupts disabled */
-+ WARN_ON(irqs_disabled());
-+
-+ data.func = func;
-+ data.info = info;
-+ atomic_set(&data.started, 0);
-+ data.wait = wait;
-+ if (wait)
-+ atomic_set(&data.finished, 0);
-+
-+ call_data = &data;
-+ mb();
-+
-+ /* Send a message to all other CPUs and wait for them to respond */
-+ send_IPI_allbutself(CALL_FUNCTION_VECTOR);
-+
-+ /* Wait for response */
-+ while (atomic_read(&data.started) != cpus)
-+ cpu_relax();
-+
-+ if (wait)
-+ while (atomic_read(&data.finished) != cpus)
-+ cpu_relax();
-+ spin_unlock(&call_lock);
-+
-+ return 0;
-+}
-+EXPORT_SYMBOL(smp_call_function);
-+
-+static void stop_this_cpu (void * dummy)
-+{
-+ /*
-+ * Remove this CPU:
-+ */
-+ cpu_clear(smp_processor_id(), cpu_online_map);
-+ local_irq_disable();
-+ disable_all_local_evtchn();
-+ if (cpu_data[smp_processor_id()].hlt_works_ok)
-+ for(;;) halt();
-+ for (;;);
-+}
-+
-+/*
-+ * this function calls the 'stop' function on all other CPUs in the system.
-+ */
-+
-+void smp_send_stop(void)
-+{
-+ smp_call_function(stop_this_cpu, NULL, 1, 0);
-+
-+ local_irq_disable();
-+ disable_all_local_evtchn();
-+ local_irq_enable();
-+}
-+
-+/*
-+ * Reschedule call back. Nothing to do,
-+ * all the work is done automatically when
-+ * we return from the interrupt.
-+ */
-+irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id,
-+ struct pt_regs *regs)
-+{
-+
-+ return IRQ_HANDLED;
-+}
-+
-+#include <linux/kallsyms.h>
-+irqreturn_t smp_call_function_interrupt(int irq, void *dev_id,
-+ struct pt_regs *regs)
-+{
-+ void (*func) (void *info) = call_data->func;
-+ void *info = call_data->info;
-+ int wait = call_data->wait;
-+
-+ /*
-+ * Notify initiating CPU that I've grabbed the data and am
-+ * about to execute the function
-+ */
-+ mb();
-+ atomic_inc(&call_data->started);
-+ /*
-+ * At this point the info structure may be out of scope unless wait==1
-+ */
-+ irq_enter();
-+ (*func)(info);
-+ irq_exit();
-+
-+ if (wait) {
-+ mb();
-+ atomic_inc(&call_data->finished);
-+ }
-+
-+ return IRQ_HANDLED;
-+}
-+
---- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ head/arch/x86/kernel/time-xen.c 2010-08-31 09:24:21.000000000 +0200
@@ -0,0 +1,1242 @@
+/*
@@ -10970,801 +10360,6 @@ pick them up (for reference, prefixed with the version the removal occured):
+ clear_page(v);
+}
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
-+++ head/arch/x86/mm/fault_32-xen.c 2010-09-23 15:39:04.000000000 +0200
-@@ -0,0 +1,792 @@
-+/*
-+ * linux/arch/i386/mm/fault.c
-+ *
-+ * Copyright (C) 1995 Linus Torvalds
-+ */
-+
-+#include <linux/signal.h>
-+#include <linux/sched.h>
-+#include <linux/kernel.h>
-+#include <linux/errno.h>
-+#include <linux/string.h>
-+#include <linux/types.h>
-+#include <linux/ptrace.h>
-+#include <linux/mman.h>
-+#include <linux/mm.h>
-+#include <linux/smp.h>
-+#include <linux/smp_lock.h>
-+#include <linux/interrupt.h>
-+#include <linux/init.h>
-+#include <linux/tty.h>
-+#include <linux/vt_kern.h> /* For unblank_screen() */
-+#include <linux/highmem.h>
-+#include <linux/module.h>
-+#include <linux/kprobes.h>
-+
-+#include <asm/system.h>
-+#include <asm/uaccess.h>
-+#include <asm/desc.h>
-+#include <asm/kdebug.h>
-+
-+extern void die(const char *,struct pt_regs *,long);
-+
-+#ifdef CONFIG_KPROBES
-+ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
-+int register_page_fault_notifier(struct notifier_block *nb)
-+{
-+ vmalloc_sync_all();
-+ return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
-+}
-+
-+int unregister_page_fault_notifier(struct notifier_block *nb)
-+{
-+ return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
-+}
-+
-+static inline int notify_page_fault(enum die_val val, const char *str,
-+ struct pt_regs *regs, long err, int trap, int sig)
-+{
-+ struct die_args args = {
-+ .regs = regs,
-+ .str = str,
-+ .err = err,
-+ .trapnr = trap,
-+ .signr = sig
-+ };
-+ return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
-+}
-+#else
-+static inline int notify_page_fault(enum die_val val, const char *str,
-+ struct pt_regs *regs, long err, int trap, int sig)
-+{
-+ return NOTIFY_DONE;
-+}
-+#endif
-+
-+
-+/*
-+ * Unlock any spinlocks which will prevent us from getting the
-+ * message out
-+ */
-+void bust_spinlocks(int yes)
-+{
-+ int loglevel_save = console_loglevel;
-+
-+ if (yes) {
-+ oops_in_progress = 1;
-+ return;
-+ }
-+#ifdef CONFIG_VT
-+ unblank_screen();
-+#endif
-+ oops_in_progress = 0;
-+ /*
-+ * OK, the message is on the console. Now we call printk()
-+ * without oops_in_progress set so that printk will give klogd
-+ * a poke. Hold onto your hats...
-+ */
-+ console_loglevel = 15; /* NMI oopser may have shut the console up */
-+ printk(" ");
-+ console_loglevel = loglevel_save;
-+}
-+
-+/*
-+ * Return EIP plus the CS segment base. The segment limit is also
-+ * adjusted, clamped to the kernel/user address space (whichever is
-+ * appropriate), and returned in *eip_limit.
-+ *
-+ * The segment is checked, because it might have been changed by another
-+ * task between the original faulting instruction and here.
-+ *
-+ * If CS is no longer a valid code segment, or if EIP is beyond the
-+ * limit, or if it is a kernel address when CS is not a kernel segment,
-+ * then the returned value will be greater than *eip_limit.
-+ *
-+ * This is slow, but is very rarely executed.
-+ */
-+static inline unsigned long get_segment_eip(struct pt_regs *regs,
-+ unsigned long *eip_limit)
-+{
-+ unsigned long eip = regs->eip;
-+ unsigned seg = regs->xcs & 0xffff;
-+ u32 seg_ar, seg_limit, base, *desc;
-+
-+ /* Unlikely, but must come before segment checks. */
-+ if (unlikely(regs->eflags & VM_MASK)) {
-+ base = seg << 4;
-+ *eip_limit = base + 0xffff;
-+ return base + (eip & 0xffff);
-+ }
-+
-+ /* The standard kernel/user address space limit. */
-+ *eip_limit = (seg & 2) ? USER_DS.seg : KERNEL_DS.seg;
-+
-+ /* By far the most common cases. */
-+ if (likely(seg == __USER_CS || seg == GET_KERNEL_CS()))
-+ return eip;
-+
-+ /* Check the segment exists, is within the current LDT/GDT size,
-+ that kernel/user (ring 0..3) has the appropriate privilege,
-+ that it's a code segment, and get the limit. */
-+ __asm__ ("larl %3,%0; lsll %3,%1"
-+ : "=&r" (seg_ar), "=r" (seg_limit) : "0" (0), "rm" (seg));
-+ if ((~seg_ar & 0x9800) || eip > seg_limit) {
-+ *eip_limit = 0;
-+ return 1; /* So that returned eip > *eip_limit. */
-+ }
-+
-+ /* Get the GDT/LDT descriptor base.
-+ When you look for races in this code remember that
-+ LDT and other horrors are only used in user space. */
-+ if (seg & (1<<2)) {
-+ /* Must lock the LDT while reading it. */
-+ down(&current->mm->context.sem);
-+ desc = current->mm->context.ldt;
-+ desc = (void *)desc + (seg & ~7);
-+ } else {
-+ /* Must disable preemption while reading the GDT. */
-+ desc = (u32 *)get_cpu_gdt_table(get_cpu());
-+ desc = (void *)desc + (seg & ~7);
-+ }
-+
-+ /* Decode the code segment base from the descriptor */
-+ base = get_desc_base((unsigned long *)desc);
-+
-+ if (seg & (1<<2)) {
-+ up(&current->mm->context.sem);
-+ } else
-+ put_cpu();
-+
-+ /* Adjust EIP and segment limit, and clamp at the kernel limit.
-+ It's legitimate for segments to wrap at 0xffffffff. */
-+ seg_limit += base;
-+ if (seg_limit < *eip_limit && seg_limit >= base)
-+ *eip_limit = seg_limit;
-+ return eip + base;
-+}
-+
-+/*
-+ * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
-+ * Check that here and ignore it.
-+ */
-+static int __is_prefetch(struct pt_regs *regs, unsigned long addr)
-+{
-+ unsigned long limit;
-+ unsigned long instr = get_segment_eip (regs, &limit);
-+ int scan_more = 1;
-+ int prefetch = 0;
-+ int i;
-+
-+ for (i = 0; scan_more && i < 15; i++) {
-+ unsigned char opcode;
-+ unsigned char instr_hi;
-+ unsigned char instr_lo;
-+
-+ if (instr > limit)
-+ break;
-+ if (__get_user(opcode, (unsigned char __user *) instr))
-+ break;
-+
-+ instr_hi = opcode & 0xf0;
-+ instr_lo = opcode & 0x0f;
-+ instr++;
-+
-+ switch (instr_hi) {
-+ case 0x20:
-+ case 0x30:
-+ /* Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes. */
-+ scan_more = ((instr_lo & 7) == 0x6);
-+ break;
-+
-+ case 0x60:
-+ /* 0x64 thru 0x67 are valid prefixes in all modes. */
-+ scan_more = (instr_lo & 0xC) == 0x4;
-+ break;
-+ case 0xF0:
-+ /* 0xF0, 0xF2, and 0xF3 are valid prefixes */
-+ scan_more = !instr_lo || (instr_lo>>1) == 1;
-+ break;
-+ case 0x00:
-+ /* Prefetch instruction is 0x0F0D or 0x0F18 */
-+ scan_more = 0;
-+ if (instr > limit)
-+ break;
-+ if (__get_user(opcode, (unsigned char __user *) instr))
-+ break;
-+ prefetch = (instr_lo == 0xF) &&
-+ (opcode == 0x0D || opcode == 0x18);
-+ break;
-+ default:
-+ scan_more = 0;
-+ break;
-+ }
-+ }
-+ return prefetch;
-+}
-+
-+static inline int is_prefetch(struct pt_regs *regs, unsigned long addr,
-+ unsigned long error_code)
-+{
-+ if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
-+ boot_cpu_data.x86 >= 6)) {
-+ /* Catch an obscure case of prefetch inside an NX page. */
-+ if (nx_enabled && (error_code & 16))
-+ return 0;
-+ return __is_prefetch(regs, addr);
-+ }
-+ return 0;
-+}
-+
-+static noinline void force_sig_info_fault(int si_signo, int si_code,
-+ unsigned long address, struct task_struct *tsk)
-+{
-+ siginfo_t info;
-+
-+ info.si_signo = si_signo;
-+ info.si_errno = 0;
-+ info.si_code = si_code;
-+ info.si_addr = (void __user *)address;
-+ force_sig_info(si_signo, &info, tsk);
-+}
-+
-+fastcall void do_invalid_op(struct pt_regs *, unsigned long);
-+
-+#ifdef CONFIG_X86_PAE
-+static void dump_fault_path(unsigned long address)
-+{
-+ unsigned long *p, page;
-+ unsigned long mfn;
-+
-+ page = read_cr3();
-+ p = (unsigned long *)__va(page);
-+ p += (address >> 30) * 2;
-+ printk(KERN_ALERT "%08lx -> *pde = %08lx:%08lx\n", page, p[1], p[0]);
-+ if (p[0] & _PAGE_PRESENT) {
-+ mfn = (p[0] >> PAGE_SHIFT) | (p[1] << 20);
-+ page = mfn_to_pfn(mfn) << PAGE_SHIFT;
-+ p = (unsigned long *)__va(page);
-+ address &= 0x3fffffff;
-+ p += (address >> 21) * 2;
-+ printk(KERN_ALERT "%08lx -> *pme = %08lx:%08lx\n",
-+ page, p[1], p[0]);
-+ mfn = (p[0] >> PAGE_SHIFT) | (p[1] << 20);
-+#ifdef CONFIG_HIGHPTE
-+ if (mfn_to_pfn(mfn) >= highstart_pfn)
-+ return;
-+#endif
-+ if (p[0] & _PAGE_PRESENT) {
-+ page = mfn_to_pfn(mfn) << PAGE_SHIFT;
-+ p = (unsigned long *) __va(page);
-+ address &= 0x001fffff;
-+ p += (address >> 12) * 2;
-+ printk(KERN_ALERT "%08lx -> *pte = %08lx:%08lx\n",
-+ page, p[1], p[0]);
-+ }
-+ }
-+}
-+#else
-+static void dump_fault_path(unsigned long address)
-+{
-+ unsigned long page;
-+
-+ page = read_cr3();
-+ page = ((unsigned long *) __va(page))[address >> 22];
-+ if (oops_may_print())
-+ printk(KERN_ALERT "*pde = ma %08lx pa %08lx\n", page,
-+ machine_to_phys(page));
-+ /*
-+ * We must not directly access the pte in the highpte
-+ * case if the page table is located in highmem.
-+ * And lets rather not kmap-atomic the pte, just in case
-+ * it's allocated already.
-+ */
-+#ifdef CONFIG_HIGHPTE
-+ if ((page >> PAGE_SHIFT) >= highstart_pfn)
-+ return;
-+#endif
-+ if ((page & 1) && oops_may_print()) {
-+ page &= PAGE_MASK;
-+ address &= 0x003ff000;
-+ page = machine_to_phys(page);
-+ page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT];
-+ printk(KERN_ALERT "*pte = ma %08lx pa %08lx\n", page,
-+ machine_to_phys(page));
-+ }
-+}
-+#endif
-+
-+static int spurious_fault(struct pt_regs *regs,
-+ unsigned long address,
-+ unsigned long error_code)
-+{
-+ pgd_t *pgd;
-+ pud_t *pud;
-+ pmd_t *pmd;
-+ pte_t *pte;
-+
-+ /* Reserved-bit violation or user access to kernel space? */
-+ if (error_code & 0x0c)
-+ return 0;
-+
-+ pgd = init_mm.pgd + pgd_index(address);
-+ if (!pgd_present(*pgd))
-+ return 0;
-+
-+ pud = pud_offset(pgd, address);
-+ if (!pud_present(*pud))
-+ return 0;
-+
-+ pmd = pmd_offset(pud, address);
-+ if (!pmd_present(*pmd))
-+ return 0;
-+
-+ pte = pte_offset_kernel(pmd, address);
-+ if (!pte_present(*pte))
-+ return 0;
-+ if ((error_code & 0x02) && !pte_write(*pte))
-+ return 0;
-+#ifdef CONFIG_X86_PAE
-+ if ((error_code & 0x10) && (__pte_val(*pte) & _PAGE_NX))
-+ return 0;
-+#endif
-+
-+ return 1;
-+}
-+
-+static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
-+{
-+ unsigned index = pgd_index(address);
-+ pgd_t *pgd_k;
-+ pud_t *pud, *pud_k;
-+ pmd_t *pmd, *pmd_k;
-+
-+ pgd += index;
-+ pgd_k = init_mm.pgd + index;
-+
-+ if (!pgd_present(*pgd_k))
-+ return NULL;
-+
-+ /*
-+ * set_pgd(pgd, *pgd_k); here would be useless on PAE
-+ * and redundant with the set_pmd() on non-PAE. As would
-+ * set_pud.
-+ */
-+
-+ pud = pud_offset(pgd, address);
-+ pud_k = pud_offset(pgd_k, address);
-+ if (!pud_present(*pud_k))
-+ return NULL;
-+
-+ pmd = pmd_offset(pud, address);
-+ pmd_k = pmd_offset(pud_k, address);
-+ if (!pmd_present(*pmd_k))
-+ return NULL;
-+ if (!pmd_present(*pmd))
-+#if CONFIG_XEN_COMPAT > 0x030002
-+ set_pmd(pmd, *pmd_k);
-+#else
-+ /*
-+ * When running on older Xen we must launder *pmd_k through
-+ * pmd_val() to ensure that _PAGE_PRESENT is correctly set.
-+ */
-+ set_pmd(pmd, __pmd(pmd_val(*pmd_k)));
-+#endif
-+ else
-+ BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
-+ return pmd_k;
-+}
-+
-+/*
-+ * Handle a fault on the vmalloc or module mapping area
-+ *
-+ * This assumes no large pages in there.
-+ */
-+static inline int vmalloc_fault(unsigned long address)
-+{
-+ unsigned long pgd_paddr;
-+ pmd_t *pmd_k;
-+ pte_t *pte_k;
-+ /*
-+ * Synchronize this task's top level page-table
-+ * with the 'reference' page table.
-+ *
-+ * Do _not_ use "current" here. We might be inside
-+ * an interrupt in the middle of a task switch..
-+ */
-+ pgd_paddr = read_cr3();
-+ pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
-+ if (!pmd_k)
-+ return -1;
-+ pte_k = pte_offset_kernel(pmd_k, address);
-+ if (!pte_present(*pte_k))
-+ return -1;
-+ return 0;
-+}
-+
-+/*
-+ * This routine handles page faults. It determines the address,
-+ * and the problem, and then passes it off to one of the appropriate
-+ * routines.
-+ *
-+ * error_code:
-+ * bit 0 == 0 means no page found, 1 means protection fault
-+ * bit 1 == 0 means read, 1 means write
-+ * bit 2 == 0 means kernel, 1 means user-mode
-+ * bit 3 == 1 means use of reserved bit detected
-+ * bit 4 == 1 means fault was an instruction fetch
-+ */
-+fastcall void __kprobes do_page_fault(struct pt_regs *regs,
-+ unsigned long error_code)
-+{
-+ struct task_struct *tsk;
-+ struct mm_struct *mm;
-+ struct vm_area_struct * vma;
-+ unsigned long address;
-+ int write, si_code;
-+
-+ /* get the address */
-+ address = read_cr2();
-+
-+ /* Set the "privileged fault" bit to something sane. */
-+ error_code &= ~4;
-+ error_code |= (regs->xcs & 2) << 1;
-+ if (regs->eflags & X86_EFLAGS_VM)
-+ error_code |= 4;
-+
-+ tsk = current;
-+
-+ si_code = SEGV_MAPERR;
-+
-+ /*
-+ * We fault-in kernel-space virtual memory on-demand. The
-+ * 'reference' page table is init_mm.pgd.
-+ *
-+ * NOTE! We MUST NOT take any locks for this case. We may
-+ * be in an interrupt or a critical region, and should
-+ * only copy the information from the master page table,
-+ * nothing more.
-+ *
-+ * This verifies that the fault happens in kernel space
-+ * (error_code & 4) == 0, and that the fault was not a
-+ * protection error (error_code & 9) == 0.
-+ */
-+ if (unlikely(address >= TASK_SIZE)) {
-+#ifdef CONFIG_XEN
-+ /* Faults in hypervisor area can never be patched up. */
-+ if (address >= hypervisor_virt_start)
-+ goto bad_area_nosemaphore;
-+#endif
-+ if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0)
-+ return;
-+ /* Can take a spurious fault if mapping changes R/O -> R/W. */
-+ if (spurious_fault(regs, address, error_code))
-+ return;
-+ if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
-+ SIGSEGV) == NOTIFY_STOP)
-+ return;
-+ /*
-+ * Don't take the mm semaphore here. If we fixup a prefetch
-+ * fault we could otherwise deadlock.
-+ */
-+ goto bad_area_nosemaphore;
-+ }
-+
-+ if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
-+ SIGSEGV) == NOTIFY_STOP)
-+ return;
-+
-+ /* It's safe to allow irq's after cr2 has been saved and the vmalloc
-+ fault has been handled. */
-+ if (regs->eflags & (X86_EFLAGS_IF|VM_MASK))
-+ local_irq_enable();
-+
-+ mm = tsk->mm;
-+
-+ /*
-+ * If we're in an interrupt, have no user context or are running in an
-+ * atomic region then we must not take the fault..
-+ */
-+ if (in_atomic() || !mm)
-+ goto bad_area_nosemaphore;
-+
-+ /* When running in the kernel we expect faults to occur only to
-+ * addresses in user space. All other faults represent errors in the
-+ * kernel and should generate an OOPS. Unfortunatly, in the case of an
-+ * erroneous fault occurring in a code path which already holds mmap_sem
-+ * we will deadlock attempting to validate the fault against the
-+ * address space. Luckily the kernel only validly references user
-+ * space from well defined areas of code, which are listed in the
-+ * exceptions table.
-+ *
-+ * As the vast majority of faults will be valid we will only perform
-+ * the source reference check when there is a possibilty of a deadlock.
-+ * Attempt to lock the address space, if we cannot we then validate the
-+ * source. If this is invalid we can skip the address space check,
-+ * thus avoiding the deadlock.
-+ */
-+ if (!down_read_trylock(&mm->mmap_sem)) {
-+ if ((error_code & 4) == 0 &&
-+ !search_exception_tables(regs->eip))
-+ goto bad_area_nosemaphore;
-+ down_read(&mm->mmap_sem);
-+ }
-+
-+ vma = find_vma(mm, address);
-+ if (!vma)
-+ goto bad_area;
-+ if (vma->vm_start <= address)
-+ goto good_area;
-+ if (!(vma->vm_flags & VM_GROWSDOWN))
-+ goto bad_area;
-+ if (error_code & 4) {
-+ /*
-+ * Accessing the stack below %esp is always a bug.
-+ * The large cushion allows instructions like enter
-+ * and pusha to work. ("enter $65535,$31" pushes
-+ * 32 pointers and then decrements %esp by 65535.)
-+ */
-+ if (address + 65536 + 32 * sizeof(unsigned long) < regs->esp)
-+ goto bad_area;
-+ }
-+ if (expand_stack(vma, address))
-+ goto bad_area;
-+/*
-+ * Ok, we have a good vm_area for this memory access, so
-+ * we can handle it..
-+ */
-+good_area:
-+ si_code = SEGV_ACCERR;
-+ write = 0;
-+ switch (error_code & 3) {
-+ default: /* 3: write, present */
-+#ifdef TEST_VERIFY_AREA
-+ if (regs->cs == GET_KERNEL_CS())
-+ printk("WP fault at %08lx\n", regs->eip);
-+#endif
-+ /* fall through */
-+ case 2: /* write, not present */
-+ if (!(vma->vm_flags & VM_WRITE))
-+ goto bad_area;
-+ write++;
-+ break;
-+ case 1: /* read, present */
-+ goto bad_area;
-+ case 0: /* read, not present */
-+ if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
-+ goto bad_area;
-+ }
-+
-+ survive:
-+ /*
-+ * If for any reason at all we couldn't handle the fault,
-+ * make sure we exit gracefully rather than endlessly redo
-+ * the fault.
-+ */
-+ switch (handle_mm_fault(mm, vma, address, write)) {
-+ case VM_FAULT_MINOR:
-+ tsk->min_flt++;
-+ break;
-+ case VM_FAULT_MAJOR:
-+ tsk->maj_flt++;
-+ break;
-+ case VM_FAULT_SIGBUS:
-+ goto do_sigbus;
-+ case VM_FAULT_OOM:
-+ goto out_of_memory;
-+ default:
-+ BUG();
-+ }
-+
-+ /*
-+ * Did it hit the DOS screen memory VA from vm86 mode?
-+ */
-+ if (regs->eflags & VM_MASK) {
-+ unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT;
-+ if (bit < 32)
-+ tsk->thread.screen_bitmap |= 1 << bit;
-+ }
-+ up_read(&mm->mmap_sem);
-+ return;
-+
-+/*
-+ * Something tried to access memory that isn't in our memory map..
-+ * Fix it, but check if it's kernel or user first..
-+ */
-+bad_area:
-+ up_read(&mm->mmap_sem);
-+
-+bad_area_nosemaphore:
-+ /* User mode accesses just cause a SIGSEGV */
-+ if (error_code & 4) {
-+ /*
-+ * Valid to do another page fault here because this one came
-+ * from user space.
-+ */
-+ if (is_prefetch(regs, address, error_code))
-+ return;
-+
-+ tsk->thread.cr2 = address;
-+ /* Kernel addresses are always protection faults */
-+ tsk->thread.error_code = error_code | (address >= TASK_SIZE);
-+ tsk->thread.trap_no = 14;
-+ force_sig_info_fault(SIGSEGV, si_code, address, tsk);
-+ return;
-+ }
-+
-+#ifdef CONFIG_X86_F00F_BUG
-+ /*
-+ * Pentium F0 0F C7 C8 bug workaround.
-+ */
-+ if (boot_cpu_data.f00f_bug) {
-+ unsigned long nr;
-+
-+ nr = (address - idt_descr.address) >> 3;
-+
-+ if (nr == 6) {
-+ do_invalid_op(regs, 0);
-+ return;
-+ }
-+ }
-+#endif
-+
-+no_context:
-+ /* Are we prepared to handle this kernel fault? */
-+ if (fixup_exception(regs))
-+ return;
-+
-+ /*
-+ * Valid to do another page fault here, because if this fault
-+ * had been triggered by is_prefetch fixup_exception would have
-+ * handled it.
-+ */
-+ if (is_prefetch(regs, address, error_code))
-+ return;
-+
-+/*
-+ * Oops. The kernel tried to access some bad page. We'll have to
-+ * terminate things with extreme prejudice.
-+ */
-+
-+ bust_spinlocks(1);
-+
-+ if (oops_may_print()) {
-+ #ifdef CONFIG_X86_PAE
-+ if (error_code & 16) {
-+ pte_t *pte = lookup_address(address);
-+
-+ if (pte && pte_present(*pte) && !pte_exec_kernel(*pte))
-+ printk(KERN_CRIT "kernel tried to execute "
-+ "NX-protected page - exploit attempt? "
-+ "(uid: %d)\n", current->uid);
-+ }
-+ #endif
-+ if (address < PAGE_SIZE)
-+ printk(KERN_ALERT "BUG: unable to handle kernel NULL "
-+ "pointer dereference");
-+ else
-+ printk(KERN_ALERT "BUG: unable to handle kernel paging"
-+ " request");
-+ printk(" at virtual address %08lx\n",address);
-+ printk(KERN_ALERT " printing eip:\n");
-+ printk("%08lx\n", regs->eip);
-+ }
-+ dump_fault_path(address);
-+ tsk->thread.cr2 = address;
-+ tsk->thread.trap_no = 14;
-+ tsk->thread.error_code = error_code;
-+ die("Oops", regs, error_code);
-+ bust_spinlocks(0);
-+ do_exit(SIGKILL);
-+
-+/*
-+ * We ran out of memory, or some other thing happened to us that made
-+ * us unable to handle the page fault gracefully.
-+ */
-+out_of_memory:
-+ up_read(&mm->mmap_sem);
-+ if (tsk->pid == 1) {
-+ yield();
-+ down_read(&mm->mmap_sem);
-+ goto survive;
-+ }
-+ printk("VM: killing process %s\n", tsk->comm);
-+ if (error_code & 4)
-+ do_exit(SIGKILL);
-+ goto no_context;
-+
-+do_sigbus:
-+ up_read(&mm->mmap_sem);
-+
-+ /* Kernel mode? Handle exceptions or die */
-+ if (!(error_code & 4))
-+ goto no_context;
-+
-+ /* User space => ok to do another page fault */
-+ if (is_prefetch(regs, address, error_code))
-+ return;
-+
-+ tsk->thread.cr2 = address;
-+ tsk->thread.error_code = error_code;
-+ tsk->thread.trap_no = 14;
-+ force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
-+}
-+
-+#if !HAVE_SHARED_KERNEL_PMD
-+void vmalloc_sync_all(void)
-+{
-+ /*
-+ * Note that races in the updates of insync and start aren't
-+ * problematic: insync can only get set bits added, and updates to
-+ * start are only improving performance (without affecting correctness
-+ * if undone).
-+ * XEN: To work on PAE, we need to iterate over PMDs rather than PGDs.
-+ * This change works just fine with 2-level paging too.
-+ */
-+#define sync_index(a) ((a) >> PMD_SHIFT)
-+ static DECLARE_BITMAP(insync, PTRS_PER_PGD*PTRS_PER_PMD);
-+ static unsigned long start = TASK_SIZE;
-+ unsigned long address;
-+
-+ BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
-+ for (address = start;
-+ address >= TASK_SIZE && address < hypervisor_virt_start;
-+ address += 1UL << PMD_SHIFT) {
-+ if (!test_bit(sync_index(address), insync)) {
-+ unsigned long flags;
-+ struct page *page;
-+
-+ spin_lock_irqsave(&pgd_lock, flags);
-+ /* XEN: failure path assumes non-empty pgd_list. */
-+ if (unlikely(!pgd_list)) {
-+ spin_unlock_irqrestore(&pgd_lock, flags);
-+ return;
-+ }
-+ for (page = pgd_list; page; page =
-+ (struct page *)page->index) {
-+ spinlock_t *lock = page->mapping
-+ ? &((struct mm_struct *)page->mapping)
-+ ->page_table_lock
-+ : NULL;
-+ pmd_t *pmd;
-+
-+ if (lock)
-+ spin_lock(lock);
-+ pmd = vmalloc_sync_one(page_address(page),
-+ address);
-+ if (lock)
-+ spin_unlock(lock);
-+
-+ if (!pmd) {
-+ BUG_ON(page != pgd_list);
-+ break;
-+ }
-+ }
-+ spin_unlock_irqrestore(&pgd_lock, flags);
-+ if (!page)
-+ set_bit(sync_index(address), insync);
-+ }
-+ if (address == start && test_bit(sync_index(address), insync))
-+ start = address + (1UL << PMD_SHIFT);
-+ }
-+}
-+#endif
---- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ head/arch/x86/mm/highmem_32-xen.c 2008-10-29 09:55:56.000000000 +0100
@@ -0,0 +1,183 @@
+#include <linux/highmem.h>
@@ -23549,7 +22144,7 @@ pick them up (for reference, prefixed with the version the removal occured):
+device_initcall(add_pcspkr);
+#endif
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
-+++ head/arch/x86/kernel/smp_64-xen.c 2008-04-02 12:34:02.000000000 +0200
++++ head/arch/x86/kernel/smp-xen.c 2008-04-02 12:34:02.000000000 +0200
@@ -0,0 +1,575 @@
+/*
+ * Intel SMP support routines.
@@ -25534,7 +24129,7 @@ pick them up (for reference, prefixed with the version the removal occured):
+
+__initcall(vsyscall_init);
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
-+++ head/arch/x86/mm/fault_64-xen.c 2011-10-07 10:44:29.000000000 +0200
++++ head/arch/x86/mm/fault-xen.c 2011-10-07 10:44:29.000000000 +0200
@@ -0,0 +1,731 @@
+/*
+ * linux/arch/x86-64/mm/fault.c
diff --git a/patches.xen/xen3-auto-xen-kconfig.diff b/patches.xen/xen3-auto-xen-kconfig.diff
index 5cced633e7..890caf9772 100644
--- a/patches.xen/xen3-auto-xen-kconfig.diff
+++ b/patches.xen/xen3-auto-xen-kconfig.diff
@@ -4,8 +4,8 @@ From: xen-devel@lists.xen.org
Patch-mainline: n/a
Acked-by: jbeulich@suse.com
---- head.orig/arch/x86/Kconfig 2013-05-23 08:57:02.000000000 +0200
-+++ head/arch/x86/Kconfig 2013-05-23 15:38:09.000000000 +0200
+--- head.orig/arch/x86/Kconfig 2013-07-02 09:20:54.000000000 +0200
++++ head/arch/x86/Kconfig 2013-07-02 09:34:53.000000000 +0200
@@ -235,7 +235,17 @@ config X86_64_SMP
config X86_HT
@@ -320,7 +320,7 @@ Acked-by: jbeulich@suse.com
---help---
Find out whether you have ISA slots on your motherboard. ISA is the
name of a bus system, i.e. the way the CPU talks to the other stuff
-@@ -2332,7 +2405,9 @@ source "net/Kconfig"
+@@ -2333,7 +2406,9 @@ source "net/Kconfig"
source "drivers/Kconfig"
@@ -330,14 +330,14 @@ Acked-by: jbeulich@suse.com
source "fs/Kconfig"
-@@ -2344,4 +2419,6 @@ source "crypto/Kconfig"
+@@ -2345,4 +2420,6 @@ source "crypto/Kconfig"
source "arch/x86/kvm/Kconfig"
+source "drivers/xen/Kconfig"
+
source "lib/Kconfig"
---- head.orig/arch/x86/Kconfig.cpu 2013-02-19 00:58:34.000000000 +0100
+--- head.orig/arch/x86/Kconfig.cpu 2013-07-02 09:20:54.000000000 +0200
+++ head/arch/x86/Kconfig.cpu 2013-01-08 09:10:04.000000000 +0100
@@ -319,7 +319,7 @@ config X86_PPRO_FENCE
@@ -356,7 +356,7 @@ Acked-by: jbeulich@suse.com
config X86_CMPXCHG64
def_bool y
---- head.orig/arch/x86/Kconfig.debug 2013-05-23 08:55:27.000000000 +0200
+--- head.orig/arch/x86/Kconfig.debug 2013-07-02 09:20:54.000000000 +0200
+++ head/arch/x86/Kconfig.debug 2012-02-08 10:28:21.000000000 +0100
@@ -122,7 +122,7 @@ config DEBUG_NX_TEST
config DOUBLEFAULT
@@ -367,7 +367,7 @@ Acked-by: jbeulich@suse.com
---help---
This option allows trapping of rare doublefault exceptions that
would otherwise cause a system to silently reboot. Disabling this
---- head.orig/drivers/acpi/Kconfig 2013-05-23 08:55:33.000000000 +0200
+--- head.orig/drivers/acpi/Kconfig 2013-07-02 09:20:54.000000000 +0200
+++ head/drivers/acpi/Kconfig 2013-05-23 15:38:21.000000000 +0200
@@ -311,6 +311,7 @@ config ACPI_PCI_SLOT
config X86_PM_TIMER
@@ -391,7 +391,7 @@ Acked-by: jbeulich@suse.com
+ depends on (X86 || IA64) && XEN
+ default y
endif # ACPI
---- head.orig/drivers/char/Kconfig 2013-05-23 08:57:02.000000000 +0200
+--- head.orig/drivers/char/Kconfig 2013-07-02 09:20:54.000000000 +0200
+++ head/drivers/char/Kconfig 2013-03-21 11:52:40.000000000 +0100
@@ -520,7 +520,7 @@ config MAX_RAW_DEVS
config HPET
@@ -402,7 +402,7 @@ Acked-by: jbeulich@suse.com
help
If you say Y here, you will have a miscdevice named "/dev/hpet/". Each
open selects one of the timers supported by the HPET. The timers are
---- head.orig/drivers/char/tpm/Kconfig 2013-04-29 02:36:01.000000000 +0200
+--- head.orig/drivers/char/tpm/Kconfig 2013-07-02 09:20:54.000000000 +0200
+++ head/drivers/char/tpm/Kconfig 2013-03-21 11:52:29.000000000 +0100
@@ -91,4 +91,13 @@ config TCG_ST33_I2C
To compile this driver as a module, choose M here; the module will be
@@ -418,7 +418,7 @@ Acked-by: jbeulich@suse.com
+ will be called tpm_xenu.
+
endif # TCG_TPM
---- head.orig/drivers/cpufreq/Kconfig 2013-05-23 08:55:37.000000000 +0200
+--- head.orig/drivers/cpufreq/Kconfig 2013-07-02 09:20:54.000000000 +0200
+++ head/drivers/cpufreq/Kconfig 2011-06-30 15:36:01.000000000 +0200
@@ -2,6 +2,7 @@ menu "CPU Frequency scaling"
@@ -428,7 +428,7 @@ Acked-by: jbeulich@suse.com
help
CPU Frequency scaling allows you to change the clock speed of
CPUs on the fly. This is a nice method to save power, because
---- head.orig/drivers/tty/serial/8250/Kconfig 2013-04-29 02:36:01.000000000 +0200
+--- head.orig/drivers/tty/serial/8250/Kconfig 2013-07-02 09:20:54.000000000 +0200
+++ head/drivers/tty/serial/8250/Kconfig 2012-02-08 10:28:04.000000000 +0100
@@ -5,6 +5,7 @@
@@ -438,7 +438,7 @@ Acked-by: jbeulich@suse.com
select SERIAL_CORE
---help---
This selects whether you want to include the driver for the standard
---- head.orig/drivers/xen/Kconfig 2013-05-23 08:56:33.000000000 +0200
+--- head.orig/drivers/xen/Kconfig 2013-07-02 09:20:54.000000000 +0200
+++ head/drivers/xen/Kconfig 2013-05-23 15:38:14.000000000 +0200
@@ -1,9 +1,381 @@
+#
@@ -848,7 +848,7 @@ Acked-by: jbeulich@suse.com
If in doubt, say yes.
config XEN_DEV_EVTCHN
---- head.orig/fs/Kconfig 2013-05-23 08:57:01.000000000 +0200
+--- head.orig/fs/Kconfig 2013-07-02 09:20:54.000000000 +0200
+++ head/fs/Kconfig 2013-01-30 10:32:38.000000000 +0100
@@ -160,6 +160,7 @@ config HUGETLBFS
bool "HugeTLB file system support"
@@ -858,7 +858,7 @@ Acked-by: jbeulich@suse.com
help
hugetlbfs is a filesystem backing for HugeTLB pages, based on
ramfs. For architectures that support it, say Y here and read
---- head.orig/kernel/Kconfig.preempt 2013-05-23 08:57:01.000000000 +0200
+--- head.orig/kernel/Kconfig.preempt 2013-07-02 09:20:54.000000000 +0200
+++ head/kernel/Kconfig.preempt 2012-04-10 15:16:13.000000000 +0200
@@ -36,6 +36,7 @@ config PREEMPT_VOLUNTARY
diff --git a/patches.xen/xen3-patch-2.6.19 b/patches.xen/xen3-patch-2.6.19
index 6f2aa53d4b..753551315f 100644
--- a/patches.xen/xen3-patch-2.6.19
+++ b/patches.xen/xen3-patch-2.6.19
@@ -1634,107 +1634,6 @@ Acked-by: jbeulich@novell.com
#ifdef CONFIG_ACPI
acpi_boot_init();
---- head.orig/arch/x86/kernel/smp_32-xen.c 2007-12-10 08:47:31.000000000 +0100
-+++ head/arch/x86/kernel/smp_32-xen.c 2011-01-31 17:29:16.000000000 +0100
-@@ -279,8 +279,7 @@ static inline void leave_mm (unsigned lo
- * 2) Leave the mm if we are in the lazy tlb mode.
- */
-
--irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id,
-- struct pt_regs *regs)
-+irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id)
- {
- unsigned long cpu;
-
-@@ -567,16 +566,14 @@ void smp_send_stop(void)
- * all the work is done automatically when
- * we return from the interrupt.
- */
--irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id,
-- struct pt_regs *regs)
-+irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id)
- {
-
- return IRQ_HANDLED;
- }
-
- #include <linux/kallsyms.h>
--irqreturn_t smp_call_function_interrupt(int irq, void *dev_id,
-- struct pt_regs *regs)
-+irqreturn_t smp_call_function_interrupt(int irq, void *dev_id)
- {
- void (*func) (void *info) = call_data->func;
- void *info = call_data->info;
-@@ -603,3 +600,69 @@ irqreturn_t smp_call_function_interrupt(
- return IRQ_HANDLED;
- }
-
-+/*
-+ * this function sends a 'generic call function' IPI to one other CPU
-+ * in the system.
-+ *
-+ * cpu is a standard Linux logical CPU number.
-+ */
-+static void
-+__smp_call_function_single(int cpu, void (*func) (void *info), void *info,
-+ int nonatomic, int wait)
-+{
-+ struct call_data_struct data;
-+ int cpus = 1;
-+
-+ data.func = func;
-+ data.info = info;
-+ atomic_set(&data.started, 0);
-+ data.wait = wait;
-+ if (wait)
-+ atomic_set(&data.finished, 0);
-+
-+ call_data = &data;
-+ wmb();
-+ /* Send a message to all other CPUs and wait for them to respond */
-+ send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR);
-+
-+ /* Wait for response */
-+ while (atomic_read(&data.started) != cpus)
-+ cpu_relax();
-+
-+ if (!wait)
-+ return;
-+
-+ while (atomic_read(&data.finished) != cpus)
-+ cpu_relax();
-+}
-+
-+/*
-+ * smp_call_function_single - Run a function on another CPU
-+ * @func: The function to run. This must be fast and non-blocking.
-+ * @info: An arbitrary pointer to pass to the function.
-+ * @nonatomic: Currently unused.
-+ * @wait: If true, wait until function has completed on other CPUs.
-+ *
-+ * Retrurns 0 on success, else a negative status code.
-+ *
-+ * Does not return until the remote CPU is nearly ready to execute <func>
-+ * or is or has executed.
-+ */
-+
-+int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
-+ int nonatomic, int wait)
-+{
-+ /* prevent preemption and reschedule on another processor */
-+ int me = get_cpu();
-+ if (cpu == me) {
-+ WARN_ON(1);
-+ put_cpu();
-+ return -EBUSY;
-+ }
-+ spin_lock_bh(&call_lock);
-+ __smp_call_function_single(cpu, func, info, nonatomic, wait);
-+ spin_unlock_bh(&call_lock);
-+ put_cpu();
-+ return 0;
-+}
-+EXPORT_SYMBOL(smp_call_function_single);
--- head.orig/arch/x86/kernel/time-xen.c 2011-07-12 11:09:48.000000000 +0200
+++ head/arch/x86/kernel/time-xen.c 2011-07-12 11:10:26.000000000 +0200
@@ -88,7 +88,6 @@ int pit_latch_buggy; /* ext
@@ -2383,94 +2282,6 @@ Acked-by: jbeulich@novell.com
if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
machine_to_phys_mapping = (unsigned long *)mapping.v_start;
---- head.orig/arch/x86/mm/fault_32-xen.c 2010-09-23 15:39:04.000000000 +0200
-+++ head/arch/x86/mm/fault_32-xen.c 2011-01-31 17:29:16.000000000 +0100
-@@ -27,21 +27,24 @@
- #include <asm/uaccess.h>
- #include <asm/desc.h>
- #include <asm/kdebug.h>
-+#include <asm/segment.h>
-
- extern void die(const char *,struct pt_regs *,long);
-
--#ifdef CONFIG_KPROBES
--ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
-+static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
-+
- int register_page_fault_notifier(struct notifier_block *nb)
- {
- vmalloc_sync_all();
- return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
- }
-+EXPORT_SYMBOL_GPL(register_page_fault_notifier);
-
- int unregister_page_fault_notifier(struct notifier_block *nb)
- {
- return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
- }
-+EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
-
- static inline int notify_page_fault(enum die_val val, const char *str,
- struct pt_regs *regs, long err, int trap, int sig)
-@@ -55,14 +58,6 @@ static inline int notify_page_fault(enum
- };
- return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
- }
--#else
--static inline int notify_page_fault(enum die_val val, const char *str,
-- struct pt_regs *regs, long err, int trap, int sig)
--{
-- return NOTIFY_DONE;
--}
--#endif
--
-
- /*
- * Unlock any spinlocks which will prevent us from getting the
-@@ -119,10 +114,10 @@ static inline unsigned long get_segment_
- }
-
- /* The standard kernel/user address space limit. */
-- *eip_limit = (seg & 2) ? USER_DS.seg : KERNEL_DS.seg;
-+ *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg;
-
- /* By far the most common cases. */
-- if (likely(seg == __USER_CS || seg == GET_KERNEL_CS()))
-+ if (likely(SEGMENT_IS_FLAT_CODE(seg)))
- return eip;
-
- /* Check the segment exists, is within the current LDT/GDT size,
-@@ -559,11 +554,7 @@ good_area:
- write = 0;
- switch (error_code & 3) {
- default: /* 3: write, present */
--#ifdef TEST_VERIFY_AREA
-- if (regs->cs == GET_KERNEL_CS())
-- printk("WP fault at %08lx\n", regs->eip);
--#endif
-- /* fall through */
-+ /* fall through */
- case 2: /* write, not present */
- if (!(vma->vm_flags & VM_WRITE))
- goto bad_area;
-@@ -572,7 +563,7 @@ good_area:
- case 1: /* read, present */
- goto bad_area;
- case 0: /* read, not present */
-- if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
-+ if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
- goto bad_area;
- }
-
-@@ -704,7 +695,7 @@ no_context:
- */
- out_of_memory:
- up_read(&mm->mmap_sem);
-- if (tsk->pid == 1) {
-+ if (is_init(tsk)) {
- yield();
- down_read(&mm->mmap_sem);
- goto survive;
--- head.orig/arch/x86/mm/highmem_32-xen.c 2008-10-29 09:55:56.000000000 +0100
+++ head/arch/x86/mm/highmem_32-xen.c 2011-01-31 17:29:16.000000000 +0100
@@ -38,11 +38,9 @@ static void *__kmap_atomic(struct page *
@@ -6484,8 +6295,8 @@ Acked-by: jbeulich@novell.com
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
/* VIA/Cyrix/Centaur-defined */
---- head.orig/arch/x86/kernel/smp_64-xen.c 2008-04-02 12:34:02.000000000 +0200
-+++ head/arch/x86/kernel/smp_64-xen.c 2011-01-31 17:29:16.000000000 +0100
+--- head.orig/arch/x86/kernel/smp-xen.c 2008-04-02 12:34:02.000000000 +0200
++++ head/arch/x86/kernel/smp-xen.c 2011-01-31 17:29:16.000000000 +0100
@@ -381,9 +381,8 @@ int smp_call_function_single (int cpu, v
/* prevent preemption and reschedule on another processor */
int me = get_cpu();
@@ -7199,8 +7010,8 @@ Acked-by: jbeulich@novell.com
return 0;
}
---- head.orig/arch/x86/mm/fault_64-xen.c 2011-10-07 10:44:29.000000000 +0200
-+++ head/arch/x86/mm/fault_64-xen.c 2011-07-26 09:24:01.000000000 +0200
+--- head.orig/arch/x86/mm/fault-xen.c 2011-10-07 10:44:29.000000000 +0200
++++ head/arch/x86/mm/fault-xen.c 2011-07-26 09:24:01.000000000 +0200
@@ -40,8 +40,7 @@
#define PF_RSVD (1<<3)
#define PF_INSTR (1<<4)
diff --git a/patches.xen/xen3-patch-2.6.20 b/patches.xen/xen3-patch-2.6.20
index 5e0b514c4c..aeaa15a02f 100644
--- a/patches.xen/xen3-patch-2.6.20
+++ b/patches.xen/xen3-patch-2.6.20
@@ -2688,19 +2688,6 @@ Acked-by: jbeulich@novell.com
if (is_initial_xendomain()) {
#ifdef CONFIG_VT
---- head.orig/arch/x86/kernel/smp_32-xen.c 2011-01-31 17:29:16.000000000 +0100
-+++ head/arch/x86/kernel/smp_32-xen.c 2011-01-31 17:32:16.000000000 +0100
-@@ -659,6 +659,10 @@ int smp_call_function_single(int cpu, vo
- put_cpu();
- return -EBUSY;
- }
-+
-+ /* Can deadlock when called with interrupts disabled */
-+ WARN_ON(irqs_disabled());
-+
- spin_lock_bh(&call_lock);
- __smp_call_function_single(cpu, func, info, nonatomic, wait);
- spin_unlock_bh(&call_lock);
--- head.orig/arch/x86/kernel/time-xen.c 2011-07-12 11:10:26.000000000 +0200
+++ head/arch/x86/kernel/time-xen.c 2011-07-12 11:10:37.000000000 +0200
@@ -60,6 +60,7 @@
@@ -3119,52 +3106,6 @@ Acked-by: jbeulich@novell.com
default y
---help---
Say Y here to get to see options for using your Linux host to run other
---- head.orig/arch/x86/mm/fault_32-xen.c 2011-01-31 17:29:16.000000000 +0100
-+++ head/arch/x86/mm/fault_32-xen.c 2011-01-31 17:32:16.000000000 +0100
-@@ -22,9 +22,9 @@
- #include <linux/highmem.h>
- #include <linux/module.h>
- #include <linux/kprobes.h>
-+#include <linux/uaccess.h>
-
- #include <asm/system.h>
--#include <asm/uaccess.h>
- #include <asm/desc.h>
- #include <asm/kdebug.h>
- #include <asm/segment.h>
-@@ -167,7 +167,7 @@ static inline unsigned long get_segment_
- static int __is_prefetch(struct pt_regs *regs, unsigned long addr)
- {
- unsigned long limit;
-- unsigned long instr = get_segment_eip (regs, &limit);
-+ unsigned char *instr = (unsigned char *)get_segment_eip (regs, &limit);
- int scan_more = 1;
- int prefetch = 0;
- int i;
-@@ -177,9 +177,9 @@ static int __is_prefetch(struct pt_regs
- unsigned char instr_hi;
- unsigned char instr_lo;
-
-- if (instr > limit)
-+ if (instr > (unsigned char *)limit)
- break;
-- if (__get_user(opcode, (unsigned char __user *) instr))
-+ if (probe_kernel_address(instr, opcode))
- break;
-
- instr_hi = opcode & 0xf0;
-@@ -204,9 +204,9 @@ static int __is_prefetch(struct pt_regs
- case 0x00:
- /* Prefetch instruction is 0x0F0D or 0x0F18 */
- scan_more = 0;
-- if (instr > limit)
-+ if (instr > (unsigned char *)limit)
- break;
-- if (__get_user(opcode, (unsigned char __user *) instr))
-+ if (probe_kernel_address(instr, opcode))
- break;
- prefetch = (instr_lo == 0xF) &&
- (opcode == 0x0D || opcode == 0x18);
--- head.orig/arch/x86/mm/highmem_32-xen.c 2011-01-31 17:29:16.000000000 +0100
+++ head/arch/x86/mm/highmem_32-xen.c 2011-01-31 17:32:16.000000000 +0100
@@ -32,7 +32,7 @@ static void *__kmap_atomic(struct page *
@@ -3769,8 +3710,8 @@ Acked-by: jbeulich@novell.com
c->x86_max_cores = intel_num_cpu_cores(c);
srat_detect_node();
---- head.orig/arch/x86/kernel/smp_64-xen.c 2011-01-31 17:29:16.000000000 +0100
-+++ head/arch/x86/kernel/smp_64-xen.c 2011-01-31 17:32:16.000000000 +0100
+--- head.orig/arch/x86/kernel/smp-xen.c 2011-01-31 17:29:16.000000000 +0100
++++ head/arch/x86/kernel/smp-xen.c 2011-01-31 17:32:16.000000000 +0100
@@ -384,12 +384,17 @@ int smp_call_function_single (int cpu, v
put_cpu();
return 0;
@@ -4059,8 +4000,8 @@ Acked-by: jbeulich@novell.com
__set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
}
---- head.orig/arch/x86/mm/fault_64-xen.c 2011-07-26 09:24:01.000000000 +0200
-+++ head/arch/x86/mm/fault_64-xen.c 2011-07-26 09:27:24.000000000 +0200
+--- head.orig/arch/x86/mm/fault-xen.c 2011-07-26 09:24:01.000000000 +0200
++++ head/arch/x86/mm/fault-xen.c 2011-07-26 09:27:24.000000000 +0200
@@ -23,9 +23,9 @@
#include <linux/compiler.h>
#include <linux/module.h>
diff --git a/patches.xen/xen3-patch-2.6.21 b/patches.xen/xen3-patch-2.6.21
index ff4c326d97..80fd967588 100644
--- a/patches.xen/xen3-patch-2.6.21
+++ b/patches.xen/xen3-patch-2.6.21
@@ -847,27 +847,6 @@ Acked-by: jbeulich@novell.com
- * c-basic-offset:8
- * End:
- */
---- head.orig/arch/x86/kernel/smp_32-xen.c 2011-01-31 17:32:16.000000000 +0100
-+++ head/arch/x86/kernel/smp_32-xen.c 2011-01-31 17:32:22.000000000 +0100
-@@ -335,8 +335,7 @@ static void flush_tlb_others(cpumask_t c
- /*
- * i'm not happy about this global shared spinlock in the
- * MM hot path, but we'll see how contended it is.
-- * Temporarily this turns IRQs off, so that lockups are
-- * detected by the NMI watchdog.
-+ * AK: x86-64 has a faster method that could be ported.
- */
- spin_lock(&tlbstate_lock);
-
-@@ -361,7 +360,7 @@ static void flush_tlb_others(cpumask_t c
-
- while (!cpus_empty(flush_cpumask))
- /* nothing. lockup detection does not belong here */
-- mb();
-+ cpu_relax();
-
- flush_mm = NULL;
- flush_va = 0;
--- head.orig/arch/x86/kernel/time-xen.c 2011-07-12 11:10:37.000000000 +0200
+++ head/arch/x86/kernel/time-xen.c 2011-07-12 11:13:30.000000000 +0200
@@ -50,6 +50,7 @@
@@ -1480,78 +1459,6 @@ Acked-by: jbeulich@novell.com
+ return 1;
+}
+__setup("code_bytes=", code_bytes_setup);
---- head.orig/arch/x86/mm/fault_32-xen.c 2011-01-31 17:32:16.000000000 +0100
-+++ head/arch/x86/mm/fault_32-xen.c 2011-01-31 17:32:22.000000000 +0100
-@@ -46,43 +46,17 @@ int unregister_page_fault_notifier(struc
- }
- EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
-
--static inline int notify_page_fault(enum die_val val, const char *str,
-- struct pt_regs *regs, long err, int trap, int sig)
-+static inline int notify_page_fault(struct pt_regs *regs, long err)
- {
- struct die_args args = {
- .regs = regs,
-- .str = str,
-+ .str = "page fault",
- .err = err,
-- .trapnr = trap,
-- .signr = sig
-+ .trapnr = 14,
-+ .signr = SIGSEGV
- };
-- return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
--}
--
--/*
-- * Unlock any spinlocks which will prevent us from getting the
-- * message out
-- */
--void bust_spinlocks(int yes)
--{
-- int loglevel_save = console_loglevel;
--
-- if (yes) {
-- oops_in_progress = 1;
-- return;
-- }
--#ifdef CONFIG_VT
-- unblank_screen();
--#endif
-- oops_in_progress = 0;
-- /*
-- * OK, the message is on the console. Now we call printk()
-- * without oops_in_progress set so that printk will give klogd
-- * a poke. Hold onto your hats...
-- */
-- console_loglevel = 15; /* NMI oopser may have shut the console up */
-- printk(" ");
-- console_loglevel = loglevel_save;
-+ return atomic_notifier_call_chain(&notify_page_fault_chain,
-+ DIE_PAGE_FAULT, &args);
- }
-
- /*
-@@ -476,8 +450,7 @@ fastcall void __kprobes do_page_fault(st
- /* Can take a spurious fault if mapping changes R/O -> R/W. */
- if (spurious_fault(regs, address, error_code))
- return;
-- if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
-- SIGSEGV) == NOTIFY_STOP)
-+ if (notify_page_fault(regs, error_code) == NOTIFY_STOP)
- return;
- /*
- * Don't take the mm semaphore here. If we fixup a prefetch
-@@ -486,8 +459,7 @@ fastcall void __kprobes do_page_fault(st
- goto bad_area_nosemaphore;
- }
-
-- if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
-- SIGSEGV) == NOTIFY_STOP)
-+ if (notify_page_fault(regs, error_code) == NOTIFY_STOP)
- return;
-
- /* It's safe to allow irq's after cr2 has been saved and the vmalloc
--- head.orig/arch/x86/mm/highmem_32-xen.c 2011-01-31 17:32:16.000000000 +0100
+++ head/arch/x86/mm/highmem_32-xen.c 2011-01-31 17:32:22.000000000 +0100
@@ -33,14 +33,16 @@ static void *__kmap_atomic(struct page *
@@ -3146,8 +3053,8 @@ Acked-by: jbeulich@novell.com
#endif
on_each_cpu(cpu_vsyscall_init, NULL, 0, 1);
hotcpu_notifier(cpu_vsyscall_notifier, 0);
---- head.orig/arch/x86/mm/fault_64-xen.c 2011-07-26 09:27:24.000000000 +0200
-+++ head/arch/x86/mm/fault_64-xen.c 2011-07-26 09:27:34.000000000 +0200
+--- head.orig/arch/x86/mm/fault-xen.c 2011-07-26 09:27:24.000000000 +0200
++++ head/arch/x86/mm/fault-xen.c 2011-07-26 09:27:34.000000000 +0200
@@ -56,38 +56,17 @@ int unregister_page_fault_notifier(struc
}
EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
diff --git a/patches.xen/xen3-patch-2.6.22 b/patches.xen/xen3-patch-2.6.22
index 031b2a5b70..540716204d 100644
--- a/patches.xen/xen3-patch-2.6.22
+++ b/patches.xen/xen3-patch-2.6.22
@@ -796,328 +796,6 @@ Acked-by: jbeulich@novell.com
return prev_p;
}
---- head.orig/arch/x86/kernel/smp_32-xen.c 2011-01-31 17:32:22.000000000 +0100
-+++ head/arch/x86/kernel/smp_32-xen.c 2011-01-31 17:32:29.000000000 +0100
-@@ -13,7 +13,6 @@
- #include <linux/mm.h>
- #include <linux/delay.h>
- #include <linux/spinlock.h>
--#include <linux/smp_lock.h>
- #include <linux/kernel_stat.h>
- #include <linux/mc146818rtc.h>
- #include <linux/cache.h>
-@@ -216,7 +215,6 @@ static cpumask_t flush_cpumask;
- static struct mm_struct * flush_mm;
- static unsigned long flush_va;
- static DEFINE_SPINLOCK(tlbstate_lock);
--#define FLUSH_ALL 0xffffffff
-
- /*
- * We cannot call mmdrop() because we are in interrupt context,
-@@ -298,7 +296,7 @@ irqreturn_t smp_invalidate_interrupt(int
-
- if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
- if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
-- if (flush_va == FLUSH_ALL)
-+ if (flush_va == TLB_FLUSH_ALL)
- local_flush_tlb();
- else
- __flush_tlb_one(flush_va);
-@@ -314,9 +312,11 @@ out:
- return IRQ_HANDLED;
- }
-
--static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
-- unsigned long va)
-+void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
-+ unsigned long va)
- {
-+ cpumask_t cpumask = *cpumaskp;
-+
- /*
- * A couple of (to be removed) sanity checks:
- *
-@@ -327,10 +327,12 @@ static void flush_tlb_others(cpumask_t c
- BUG_ON(cpu_isset(smp_processor_id(), cpumask));
- BUG_ON(!mm);
-
-+#ifdef CONFIG_HOTPLUG_CPU
- /* If a CPU which we ran on has gone down, OK. */
- cpus_and(cpumask, cpumask, cpu_online_map);
-- if (cpus_empty(cpumask))
-+ if (unlikely(cpus_empty(cpumask)))
- return;
-+#endif
-
- /*
- * i'm not happy about this global shared spinlock in the
-@@ -341,17 +343,7 @@ static void flush_tlb_others(cpumask_t c
-
- flush_mm = mm;
- flush_va = va;
--#if NR_CPUS <= BITS_PER_LONG
-- atomic_set_mask(cpumask, &flush_cpumask);
--#else
-- {
-- int k;
-- unsigned long *flush_mask = (unsigned long *)&flush_cpumask;
-- unsigned long *cpu_mask = (unsigned long *)&cpumask;
-- for (k = 0; k < BITS_TO_LONGS(NR_CPUS); ++k)
-- atomic_set_mask(cpu_mask[k], &flush_mask[k]);
-- }
--#endif
-+ cpus_or(flush_cpumask, cpumask, flush_cpumask);
- /*
- * We have to send the IPI only to
- * CPUs affected.
-@@ -378,7 +370,7 @@ void flush_tlb_current_task(void)
-
- local_flush_tlb();
- if (!cpus_empty(cpu_mask))
-- flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
-+ flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
- preempt_enable();
- }
-
-@@ -397,7 +389,7 @@ void flush_tlb_mm (struct mm_struct * mm
- leave_mm(smp_processor_id());
- }
- if (!cpus_empty(cpu_mask))
-- flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
-+ flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
-
- preempt_enable();
- }
-@@ -446,7 +438,7 @@ void flush_tlb_all(void)
- * it goes straight through and wastes no time serializing
- * anything. Worst case is that we lose a reschedule ...
- */
--void smp_send_reschedule(int cpu)
-+void xen_smp_send_reschedule(int cpu)
- {
- WARN_ON(cpu_is_offline(cpu));
- send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
-@@ -478,36 +470,79 @@ void unlock_ipi_call_lock(void)
-
- static struct call_data_struct *call_data;
-
-+static void __smp_call_function(void (*func) (void *info), void *info,
-+ int nonatomic, int wait)
-+{
-+ struct call_data_struct data;
-+ int cpus = num_online_cpus() - 1;
-+
-+ if (!cpus)
-+ return;
-+
-+ data.func = func;
-+ data.info = info;
-+ atomic_set(&data.started, 0);
-+ data.wait = wait;
-+ if (wait)
-+ atomic_set(&data.finished, 0);
-+
-+ call_data = &data;
-+ mb();
-+
-+ /* Send a message to all other CPUs and wait for them to respond */
-+ send_IPI_allbutself(CALL_FUNCTION_VECTOR);
-+
-+ /* Wait for response */
-+ while (atomic_read(&data.started) != cpus)
-+ cpu_relax();
-+
-+ if (wait)
-+ while (atomic_read(&data.finished) != cpus)
-+ cpu_relax();
-+}
-+
-+
- /**
-- * smp_call_function(): Run a function on all other CPUs.
-+ * smp_call_function_mask(): Run a function on a set of other CPUs.
-+ * @mask: The set of cpus to run on. Must not include the current cpu.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
-- * @nonatomic: currently unused.
- * @wait: If true, wait (atomically) until function has completed on other CPUs.
- *
-- * Returns 0 on success, else a negative status code. Does not return until
-- * remote CPUs are nearly ready to execute <<func>> or are or have executed.
-+ * Returns 0 on success, else a negative status code.
-+ *
-+ * If @wait is true, then returns once @func has returned; otherwise
-+ * it returns just before the target cpu calls @func.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
--int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
-- int wait)
-+int
-+xen_smp_call_function_mask(cpumask_t mask,
-+ void (*func)(void *), void *info,
-+ int wait)
- {
- struct call_data_struct data;
-+ cpumask_t allbutself;
- int cpus;
-
-+ /* Can deadlock when called with interrupts disabled */
-+ WARN_ON(irqs_disabled());
-+
- /* Holding any lock stops cpus from going down. */
- spin_lock(&call_lock);
-- cpus = num_online_cpus() - 1;
-+
-+ allbutself = cpu_online_map;
-+ cpu_clear(smp_processor_id(), allbutself);
-+
-+ cpus_and(mask, mask, allbutself);
-+ cpus = cpus_weight(mask);
-+
- if (!cpus) {
- spin_unlock(&call_lock);
- return 0;
- }
-
-- /* Can deadlock when called with interrupts disabled */
-- WARN_ON(irqs_disabled());
--
- data.func = func;
- data.info = info;
- atomic_set(&data.started, 0);
-@@ -517,9 +552,12 @@ int smp_call_function (void (*func) (voi
-
- call_data = &data;
- mb();
--
-- /* Send a message to all other CPUs and wait for them to respond */
-- send_IPI_allbutself(CALL_FUNCTION_VECTOR);
-+
-+ /* Send a message to other CPUs */
-+ if (cpus_equal(mask, allbutself))
-+ send_IPI_allbutself(CALL_FUNCTION_VECTOR);
-+ else
-+ send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
-
- /* Wait for response */
- while (atomic_read(&data.started) != cpus)
-@@ -532,15 +570,14 @@ int smp_call_function (void (*func) (voi
-
- return 0;
- }
--EXPORT_SYMBOL(smp_call_function);
-
- static void stop_this_cpu (void * dummy)
- {
-+ local_irq_disable();
- /*
- * Remove this CPU:
- */
- cpu_clear(smp_processor_id(), cpu_online_map);
-- local_irq_disable();
- disable_all_local_evtchn();
- if (cpu_data[smp_processor_id()].hlt_works_ok)
- for(;;) halt();
-@@ -551,13 +588,18 @@ static void stop_this_cpu (void * dummy)
- * this function calls the 'stop' function on all other CPUs in the system.
- */
-
--void smp_send_stop(void)
-+void xen_smp_send_stop(void)
- {
-- smp_call_function(stop_this_cpu, NULL, 1, 0);
-+ /* Don't deadlock on the call lock in panic */
-+ int nolock = !spin_trylock(&call_lock);
-+ unsigned long flags;
-
-- local_irq_disable();
-+ local_irq_save(flags);
-+ __smp_call_function(stop_this_cpu, NULL, 0, 0);
-+ if (!nolock)
-+ spin_unlock(&call_lock);
- disable_all_local_evtchn();
-- local_irq_enable();
-+ local_irq_restore(flags);
- }
-
- /*
-@@ -598,74 +640,3 @@ irqreturn_t smp_call_function_interrupt(
-
- return IRQ_HANDLED;
- }
--
--/*
-- * this function sends a 'generic call function' IPI to one other CPU
-- * in the system.
-- *
-- * cpu is a standard Linux logical CPU number.
-- */
--static void
--__smp_call_function_single(int cpu, void (*func) (void *info), void *info,
-- int nonatomic, int wait)
--{
-- struct call_data_struct data;
-- int cpus = 1;
--
-- data.func = func;
-- data.info = info;
-- atomic_set(&data.started, 0);
-- data.wait = wait;
-- if (wait)
-- atomic_set(&data.finished, 0);
--
-- call_data = &data;
-- wmb();
-- /* Send a message to all other CPUs and wait for them to respond */
-- send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR);
--
-- /* Wait for response */
-- while (atomic_read(&data.started) != cpus)
-- cpu_relax();
--
-- if (!wait)
-- return;
--
-- while (atomic_read(&data.finished) != cpus)
-- cpu_relax();
--}
--
--/*
-- * smp_call_function_single - Run a function on another CPU
-- * @func: The function to run. This must be fast and non-blocking.
-- * @info: An arbitrary pointer to pass to the function.
-- * @nonatomic: Currently unused.
-- * @wait: If true, wait until function has completed on other CPUs.
-- *
-- * Retrurns 0 on success, else a negative status code.
-- *
-- * Does not return until the remote CPU is nearly ready to execute <func>
-- * or is or has executed.
-- */
--
--int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
-- int nonatomic, int wait)
--{
-- /* prevent preemption and reschedule on another processor */
-- int me = get_cpu();
-- if (cpu == me) {
-- WARN_ON(1);
-- put_cpu();
-- return -EBUSY;
-- }
--
-- /* Can deadlock when called with interrupts disabled */
-- WARN_ON(irqs_disabled());
--
-- spin_lock_bh(&call_lock);
-- __smp_call_function_single(cpu, func, info, nonatomic, wait);
-- spin_unlock_bh(&call_lock);
-- put_cpu();
-- return 0;
--}
--EXPORT_SYMBOL(smp_call_function_single);
--- head.orig/arch/x86/kernel/time-xen.c 2011-07-12 11:13:30.000000000 +0200
+++ head/arch/x86/kernel/time-xen.c 2012-02-10 13:26:34.000000000 +0100
@@ -42,7 +42,6 @@
@@ -1529,128 +1207,6 @@ Acked-by: jbeulich@novell.com
unsigned long base = (kesp - uesp) & -THREAD_SIZE;
unsigned long new_kesp = kesp - base;
unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
---- head.orig/arch/x86/mm/fault_32-xen.c 2011-01-31 17:32:22.000000000 +0100
-+++ head/arch/x86/mm/fault_32-xen.c 2011-01-31 17:32:29.000000000 +0100
-@@ -14,19 +14,20 @@
- #include <linux/mman.h>
- #include <linux/mm.h>
- #include <linux/smp.h>
--#include <linux/smp_lock.h>
- #include <linux/interrupt.h>
- #include <linux/init.h>
- #include <linux/tty.h>
- #include <linux/vt_kern.h> /* For unblank_screen() */
- #include <linux/highmem.h>
-+#include <linux/bootmem.h> /* for max_low_pfn */
-+#include <linux/vmalloc.h>
- #include <linux/module.h>
- #include <linux/kprobes.h>
- #include <linux/uaccess.h>
-+#include <linux/kdebug.h>
-
- #include <asm/system.h>
- #include <asm/desc.h>
--#include <asm/kdebug.h>
- #include <asm/segment.h>
-
- extern void die(const char *,struct pt_regs *,long);
-@@ -259,25 +260,20 @@ static void dump_fault_path(unsigned lon
- unsigned long page;
-
- page = read_cr3();
-- page = ((unsigned long *) __va(page))[address >> 22];
-- if (oops_may_print())
-- printk(KERN_ALERT "*pde = ma %08lx pa %08lx\n", page,
-- machine_to_phys(page));
-+ page = ((unsigned long *) __va(page))[address >> PGDIR_SHIFT];
-+ printk(KERN_ALERT "*pde = ma %08lx pa %08lx\n", page,
-+ machine_to_phys(page));
- /*
- * We must not directly access the pte in the highpte
- * case if the page table is located in highmem.
- * And lets rather not kmap-atomic the pte, just in case
- * it's allocated already.
- */
--#ifdef CONFIG_HIGHPTE
-- if ((page >> PAGE_SHIFT) >= highstart_pfn)
-- return;
--#endif
-- if ((page & 1) && oops_may_print()) {
-- page &= PAGE_MASK;
-- address &= 0x003ff000;
-- page = machine_to_phys(page);
-- page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT];
-+ if ((machine_to_phys(page) >> PAGE_SHIFT) < max_low_pfn
-+ && (page & _PAGE_PRESENT)) {
-+ page = machine_to_phys(page & PAGE_MASK);
-+ page = ((unsigned long *) __va(page))[(address >> PAGE_SHIFT)
-+ & (PTRS_PER_PTE - 1)];
- printk(KERN_ALERT "*pte = ma %08lx pa %08lx\n", page,
- machine_to_phys(page));
- }
-@@ -581,6 +577,11 @@ bad_area:
- bad_area_nosemaphore:
- /* User mode accesses just cause a SIGSEGV */
- if (error_code & 4) {
-+ /*
-+ * It's possible to have interrupts off here.
-+ */
-+ local_irq_enable();
-+
- /*
- * Valid to do another page fault here because this one came
- * from user space.
-@@ -633,7 +634,7 @@ no_context:
- bust_spinlocks(1);
-
- if (oops_may_print()) {
-- #ifdef CONFIG_X86_PAE
-+#ifdef CONFIG_X86_PAE
- if (error_code & 16) {
- pte_t *pte = lookup_address(address);
-
-@@ -642,7 +643,7 @@ no_context:
- "NX-protected page - exploit attempt? "
- "(uid: %d)\n", current->uid);
- }
-- #endif
-+#endif
- if (address < PAGE_SIZE)
- printk(KERN_ALERT "BUG: unable to handle kernel NULL "
- "pointer dereference");
-@@ -652,8 +653,8 @@ no_context:
- printk(" at virtual address %08lx\n",address);
- printk(KERN_ALERT " printing eip:\n");
- printk("%08lx\n", regs->eip);
-+ dump_fault_path(address);
- }
-- dump_fault_path(address);
- tsk->thread.cr2 = address;
- tsk->thread.trap_no = 14;
- tsk->thread.error_code = error_code;
-@@ -694,7 +695,6 @@ do_sigbus:
- force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
- }
-
--#if !HAVE_SHARED_KERNEL_PMD
- void vmalloc_sync_all(void)
- {
- /*
-@@ -710,6 +710,9 @@ void vmalloc_sync_all(void)
- static unsigned long start = TASK_SIZE;
- unsigned long address;
-
-+ if (SHARED_KERNEL_PMD)
-+ return;
-+
- BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
- for (address = start;
- address >= TASK_SIZE && address < hypervisor_virt_start;
-@@ -752,4 +755,3 @@ void vmalloc_sync_all(void)
- start = address + (1UL << PMD_SHIFT);
- }
- }
--#endif
--- head.orig/arch/x86/mm/highmem_32-xen.c 2011-01-31 17:32:22.000000000 +0100
+++ head/arch/x86/mm/highmem_32-xen.c 2011-01-31 17:32:29.000000000 +0100
@@ -26,7 +26,7 @@ void kunmap(struct page *page)
@@ -2999,8 +2555,8 @@ Acked-by: jbeulich@novell.com
};
---- head.orig/arch/x86/kernel/smp_64-xen.c 2011-01-31 17:32:16.000000000 +0100
-+++ head/arch/x86/kernel/smp_64-xen.c 2011-01-31 17:32:29.000000000 +0100
+--- head.orig/arch/x86/kernel/smp-xen.c 2011-01-31 17:32:16.000000000 +0100
++++ head/arch/x86/kernel/smp-xen.c 2011-01-31 17:32:29.000000000 +0100
@@ -14,7 +14,6 @@
#include <linux/mm.h>
#include <linux/delay.h>
@@ -3339,8 +2895,8 @@ Acked-by: jbeulich@novell.com
smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
return NOTIFY_DONE;
}
---- head.orig/arch/x86/mm/fault_64-xen.c 2011-07-26 09:27:34.000000000 +0200
-+++ head/arch/x86/mm/fault_64-xen.c 2011-07-26 09:27:41.000000000 +0200
+--- head.orig/arch/x86/mm/fault-xen.c 2011-07-26 09:27:34.000000000 +0200
++++ head/arch/x86/mm/fault-xen.c 2011-07-26 09:27:41.000000000 +0200
@@ -15,22 +15,22 @@
#include <linux/mman.h>
#include <linux/mm.h>
diff --git a/patches.xen/xen3-patch-2.6.23 b/patches.xen/xen3-patch-2.6.23
index ec01a8f1e1..bbb858011b 100644
--- a/patches.xen/xen3-patch-2.6.23
+++ b/patches.xen/xen3-patch-2.6.23
@@ -405,32 +405,6 @@ Acked-by: jbeulich@novell.com
if (is_initial_xendomain()) {
#ifdef CONFIG_VT
---- head.orig/arch/x86/kernel/smp_32-xen.c 2011-01-31 17:32:29.000000000 +0100
-+++ head/arch/x86/kernel/smp_32-xen.c 2011-01-31 17:49:31.000000000 +0100
-@@ -22,6 +22,7 @@
-
- #include <asm/mtrr.h>
- #include <asm/tlbflush.h>
-+#include <asm/mmu_context.h>
- #if 0
- #include <mach_apic.h>
- #endif
-@@ -217,13 +218,13 @@ static unsigned long flush_va;
- static DEFINE_SPINLOCK(tlbstate_lock);
-
- /*
-- * We cannot call mmdrop() because we are in interrupt context,
-+ * We cannot call mmdrop() because we are in interrupt context,
- * instead update mm->cpu_vm_mask.
- *
- * We need to reload %cr3 since the page tables may be going
- * away from under us..
- */
--static inline void leave_mm (unsigned long cpu)
-+void leave_mm(unsigned long cpu)
- {
- if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
- BUG();
--- head.orig/arch/x86/kernel/time-xen.c 2012-02-10 13:26:34.000000000 +0100
+++ head/arch/x86/kernel/time-xen.c 2012-02-10 13:26:46.000000000 +0100
@@ -74,11 +74,12 @@
@@ -745,93 +719,6 @@ Acked-by: jbeulich@novell.com
+ set_pmd(pmd, __pmd(__pa_symbol(swapper_pg_pmd) | _PAGE_TABLE));
+ }
}
---- head.orig/arch/x86/mm/fault_32-xen.c 2011-01-31 17:32:29.000000000 +0100
-+++ head/arch/x86/mm/fault_32-xen.c 2011-01-31 17:49:31.000000000 +0100
-@@ -346,7 +346,10 @@ static inline pmd_t *vmalloc_sync_one(pg
- pmd_k = pmd_offset(pud_k, address);
- if (!pmd_present(*pmd_k))
- return NULL;
-- if (!pmd_present(*pmd))
-+ if (!pmd_present(*pmd)) {
-+ bool lazy = x86_read_percpu(xen_lazy_mmu);
-+
-+ x86_write_percpu(xen_lazy_mmu, false);
- #if CONFIG_XEN_COMPAT > 0x030002
- set_pmd(pmd, *pmd_k);
- #else
-@@ -356,7 +359,8 @@ static inline pmd_t *vmalloc_sync_one(pg
- */
- set_pmd(pmd, __pmd(pmd_val(*pmd_k)));
- #endif
-- else
-+ x86_write_percpu(xen_lazy_mmu, lazy);
-+ } else
- BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
- return pmd_k;
- }
-@@ -388,6 +392,8 @@ static inline int vmalloc_fault(unsigned
- return 0;
- }
-
-+int show_unhandled_signals = 1;
-+
- /*
- * This routine handles page faults. It determines the address,
- * and the problem, and then passes it off to one of the appropriate
-@@ -408,6 +414,7 @@ fastcall void __kprobes do_page_fault(st
- struct vm_area_struct * vma;
- unsigned long address;
- int write, si_code;
-+ int fault;
-
- /* get the address */
- address = read_cr2();
-@@ -541,20 +548,18 @@ good_area:
- * make sure we exit gracefully rather than endlessly redo
- * the fault.
- */
-- switch (handle_mm_fault(mm, vma, address, write)) {
-- case VM_FAULT_MINOR:
-- tsk->min_flt++;
-- break;
-- case VM_FAULT_MAJOR:
-- tsk->maj_flt++;
-- break;
-- case VM_FAULT_SIGBUS:
-- goto do_sigbus;
-- case VM_FAULT_OOM:
-+ fault = handle_mm_fault(mm, vma, address, write);
-+ if (unlikely(fault & VM_FAULT_ERROR)) {
-+ if (fault & VM_FAULT_OOM)
- goto out_of_memory;
-- default:
-- BUG();
-+ else if (fault & VM_FAULT_SIGBUS)
-+ goto do_sigbus;
-+ BUG();
- }
-+ if (fault & VM_FAULT_MAJOR)
-+ tsk->maj_flt++;
-+ else
-+ tsk->min_flt++;
-
- /*
- * Did it hit the DOS screen memory VA from vm86 mode?
-@@ -589,6 +594,14 @@ bad_area_nosemaphore:
- if (is_prefetch(regs, address, error_code))
- return;
-
-+ if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
-+ printk_ratelimit()) {
-+ printk("%s%s[%d]: segfault at %08lx eip %08lx "
-+ "esp %08lx error %lx\n",
-+ tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
-+ tsk->comm, tsk->pid, address, regs->eip,
-+ regs->esp, error_code);
-+ }
- tsk->thread.cr2 = address;
- /* Kernel addresses are always protection faults */
- tsk->thread.error_code = error_code | (address >= TASK_SIZE);
--- head.orig/arch/x86/mm/highmem_32-xen.c 2011-01-31 17:32:29.000000000 +0100
+++ head/arch/x86/mm/highmem_32-xen.c 2011-01-31 17:49:31.000000000 +0100
@@ -34,17 +34,16 @@ void *kmap_atomic_prot(struct page *page
@@ -1703,8 +1590,8 @@ Acked-by: jbeulich@novell.com
};
static char *x86_power_flags[] = {
"ts", /* temperature sensor */
---- head.orig/arch/x86/kernel/smp_64-xen.c 2011-01-31 17:32:29.000000000 +0100
-+++ head/arch/x86/kernel/smp_64-xen.c 2011-01-31 17:49:31.000000000 +0100
+--- head.orig/arch/x86/kernel/smp-xen.c 2011-01-31 17:32:29.000000000 +0100
++++ head/arch/x86/kernel/smp-xen.c 2011-01-31 17:49:31.000000000 +0100
@@ -362,7 +362,7 @@ __smp_call_function_single(int cpu, void
}
@@ -1860,8 +1747,8 @@ Acked-by: jbeulich@novell.com
write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
}
---- head.orig/arch/x86/mm/fault_64-xen.c 2011-07-26 09:27:41.000000000 +0200
-+++ head/arch/x86/mm/fault_64-xen.c 2011-08-15 10:44:41.000000000 +0200
+--- head.orig/arch/x86/mm/fault-xen.c 2011-07-26 09:27:41.000000000 +0200
++++ head/arch/x86/mm/fault-xen.c 2011-08-15 10:44:41.000000000 +0200
@@ -160,7 +160,9 @@ void dump_pagetable(unsigned long addres
pmd_t *pmd;
pte_t *pte;
diff --git a/patches.xen/xen3-patch-2.6.24 b/patches.xen/xen3-patch-2.6.24
index 6f98841f08..1b3ddf306f 100644
--- a/patches.xen/xen3-patch-2.6.24
+++ b/patches.xen/xen3-patch-2.6.24
@@ -2071,61 +2071,8 @@ Acked-by: jbeulich@novell.com
return c_start(m, pos);
}
---- head.orig/arch/x86/kernel/smp_32-xen.c 2011-01-31 17:49:31.000000000 +0100
-+++ head/arch/x86/kernel/smp_32-xen.c 2011-01-31 17:56:27.000000000 +0100
-@@ -72,7 +72,7 @@
- *
- * B stepping CPUs may hang. There are hardware work arounds
- * for this. We warn about it in case your board doesn't have the work
-- * arounds. Basically thats so I can tell anyone with a B stepping
-+ * arounds. Basically that's so I can tell anyone with a B stepping
- * CPU and SMP problems "tough".
- *
- * Specific items [From Pentium Processor Specification Update]
-@@ -241,7 +241,7 @@ void leave_mm(unsigned long cpu)
- * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
- * Stop ipi delivery for the old mm. This is not synchronized with
- * the other cpus, but smp_invalidate_interrupt ignore flush ipis
-- * for the wrong mm, and in the worst case we perform a superflous
-+ * for the wrong mm, and in the worst case we perform a superfluous
- * tlb flush.
- * 1a2) set cpu_tlbstate to TLBSTATE_OK
- * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
-@@ -309,6 +309,7 @@ irqreturn_t smp_invalidate_interrupt(int
- smp_mb__after_clear_bit();
- out:
- put_cpu_no_resched();
-+ __get_cpu_var(irq_stat).irq_tlb_count++;
-
- return IRQ_HANDLED;
- }
-@@ -580,7 +581,7 @@ static void stop_this_cpu (void * dummy)
- */
- cpu_clear(smp_processor_id(), cpu_online_map);
- disable_all_local_evtchn();
-- if (cpu_data[smp_processor_id()].hlt_works_ok)
-+ if (cpu_data(smp_processor_id()).hlt_works_ok)
- for(;;) halt();
- for (;;);
- }
-@@ -610,6 +611,7 @@ void xen_smp_send_stop(void)
- */
- irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id)
- {
-+ __get_cpu_var(irq_stat).irq_resched_count++;
-
- return IRQ_HANDLED;
- }
-@@ -632,6 +634,7 @@ irqreturn_t smp_call_function_interrupt(
- */
- irq_enter();
- (*func)(info);
-+ __get_cpu_var(irq_stat).irq_call_count++;
- irq_exit();
-
- if (wait) {
---- head.orig/arch/x86/kernel/smp_64-xen.c 2011-01-31 17:49:31.000000000 +0100
-+++ head/arch/x86/kernel/smp_64-xen.c 2011-01-31 17:56:27.000000000 +0100
+--- head.orig/arch/x86/kernel/smp-xen.c 2011-01-31 17:49:31.000000000 +0100
++++ head/arch/x86/kernel/smp-xen.c 2011-01-31 17:56:27.000000000 +0100
@@ -167,6 +167,7 @@ asmlinkage void smp_invalidate_interrupt
out:
ack_APIC_irq();
@@ -2930,191 +2877,8 @@ Acked-by: jbeulich@novell.com
write_rdtscp_aux((node << 12) | cpu);
/* Store cpu number in limit so that it can be loaded quickly
---- head.orig/arch/x86/mm/fault_32-xen.c 2011-01-31 17:49:31.000000000 +0100
-+++ head/arch/x86/mm/fault_32-xen.c 2011-01-31 17:56:27.000000000 +0100
-@@ -25,6 +25,7 @@
- #include <linux/kprobes.h>
- #include <linux/uaccess.h>
- #include <linux/kdebug.h>
-+#include <linux/kprobes.h>
-
- #include <asm/system.h>
- #include <asm/desc.h>
-@@ -32,33 +33,27 @@
-
- extern void die(const char *,struct pt_regs *,long);
-
--static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
--
--int register_page_fault_notifier(struct notifier_block *nb)
-+#ifdef CONFIG_KPROBES
-+static inline int notify_page_fault(struct pt_regs *regs)
- {
-- vmalloc_sync_all();
-- return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
--}
--EXPORT_SYMBOL_GPL(register_page_fault_notifier);
-+ int ret = 0;
-
--int unregister_page_fault_notifier(struct notifier_block *nb)
--{
-- return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
--}
--EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
-+ /* kprobe_running() needs smp_processor_id() */
-+ if (!user_mode_vm(regs)) {
-+ preempt_disable();
-+ if (kprobe_running() && kprobe_fault_handler(regs, 14))
-+ ret = 1;
-+ preempt_enable();
-+ }
-
--static inline int notify_page_fault(struct pt_regs *regs, long err)
-+ return ret;
-+}
-+#else
-+static inline int notify_page_fault(struct pt_regs *regs)
- {
-- struct die_args args = {
-- .regs = regs,
-- .str = "page fault",
-- .err = err,
-- .trapnr = 14,
-- .signr = SIGSEGV
-- };
-- return atomic_notifier_call_chain(&notify_page_fault_chain,
-- DIE_PAGE_FAULT, &args);
-+ return 0;
- }
-+#endif
-
- /*
- * Return EIP plus the CS segment base. The segment limit is also
-@@ -110,7 +105,7 @@ static inline unsigned long get_segment_
- LDT and other horrors are only used in user space. */
- if (seg & (1<<2)) {
- /* Must lock the LDT while reading it. */
-- down(&current->mm->context.sem);
-+ mutex_lock(&current->mm->context.lock);
- desc = current->mm->context.ldt;
- desc = (void *)desc + (seg & ~7);
- } else {
-@@ -123,7 +118,7 @@ static inline unsigned long get_segment_
- base = get_desc_base((unsigned long *)desc);
-
- if (seg & (1<<2)) {
-- up(&current->mm->context.sem);
-+ mutex_unlock(&current->mm->context.lock);
- } else
- put_cpu();
-
-@@ -244,7 +239,7 @@ static void dump_fault_path(unsigned lon
- if (mfn_to_pfn(mfn) >= highstart_pfn)
- return;
- #endif
-- if (p[0] & _PAGE_PRESENT) {
-+ if ((p[0] & _PAGE_PRESENT) && !(p[0] & _PAGE_PSE)) {
- page = mfn_to_pfn(mfn) << PAGE_SHIFT;
- p = (unsigned long *) __va(page);
- address &= 0x001fffff;
-@@ -270,7 +265,8 @@ static void dump_fault_path(unsigned lon
- * it's allocated already.
- */
- if ((machine_to_phys(page) >> PAGE_SHIFT) < max_low_pfn
-- && (page & _PAGE_PRESENT)) {
-+ && (page & _PAGE_PRESENT)
-+ && !(page & _PAGE_PSE)) {
- page = machine_to_phys(page & PAGE_MASK);
- page = ((unsigned long *) __va(page))[(address >> PAGE_SHIFT)
- & (PTRS_PER_PTE - 1)];
-@@ -416,6 +412,11 @@ fastcall void __kprobes do_page_fault(st
- int write, si_code;
- int fault;
-
-+ /*
-+ * We can fault from pretty much anywhere, with unknown IRQ state.
-+ */
-+ trace_hardirqs_fixup();
-+
- /* get the address */
- address = read_cr2();
-
-@@ -453,7 +454,7 @@ fastcall void __kprobes do_page_fault(st
- /* Can take a spurious fault if mapping changes R/O -> R/W. */
- if (spurious_fault(regs, address, error_code))
- return;
-- if (notify_page_fault(regs, error_code) == NOTIFY_STOP)
-+ if (notify_page_fault(regs))
- return;
- /*
- * Don't take the mm semaphore here. If we fixup a prefetch
-@@ -462,7 +463,7 @@ fastcall void __kprobes do_page_fault(st
- goto bad_area_nosemaphore;
- }
-
-- if (notify_page_fault(regs, error_code) == NOTIFY_STOP)
-+ if (notify_page_fault(regs))
- return;
-
- /* It's safe to allow irq's after cr2 has been saved and the vmalloc
-@@ -481,7 +482,7 @@ fastcall void __kprobes do_page_fault(st
-
- /* When running in the kernel we expect faults to occur only to
- * addresses in user space. All other faults represent errors in the
-- * kernel and should generate an OOPS. Unfortunatly, in the case of an
-+ * kernel and should generate an OOPS. Unfortunately, in the case of an
- * erroneous fault occurring in a code path which already holds mmap_sem
- * we will deadlock attempting to validate the fault against the
- * address space. Luckily the kernel only validly references user
-@@ -489,7 +490,7 @@ fastcall void __kprobes do_page_fault(st
- * exceptions table.
- *
- * As the vast majority of faults will be valid we will only perform
-- * the source reference check when there is a possibilty of a deadlock.
-+ * the source reference check when there is a possibility of a deadlock.
- * Attempt to lock the address space, if we cannot we then validate the
- * source. If this is invalid we can skip the address space check,
- * thus avoiding the deadlock.
-@@ -598,8 +599,8 @@ bad_area_nosemaphore:
- printk_ratelimit()) {
- printk("%s%s[%d]: segfault at %08lx eip %08lx "
- "esp %08lx error %lx\n",
-- tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
-- tsk->comm, tsk->pid, address, regs->eip,
-+ task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
-+ tsk->comm, task_pid_nr(tsk), address, regs->eip,
- regs->esp, error_code);
- }
- tsk->thread.cr2 = address;
-@@ -664,8 +665,7 @@ no_context:
- printk(KERN_ALERT "BUG: unable to handle kernel paging"
- " request");
- printk(" at virtual address %08lx\n",address);
-- printk(KERN_ALERT " printing eip:\n");
-- printk("%08lx\n", regs->eip);
-+ printk(KERN_ALERT "printing eip: %08lx\n", regs->eip);
- dump_fault_path(address);
- }
- tsk->thread.cr2 = address;
-@@ -681,14 +681,14 @@ no_context:
- */
- out_of_memory:
- up_read(&mm->mmap_sem);
-- if (is_init(tsk)) {
-+ if (is_global_init(tsk)) {
- yield();
- down_read(&mm->mmap_sem);
- goto survive;
- }
- printk("VM: killing process %s\n", tsk->comm);
- if (error_code & 4)
-- do_exit(SIGKILL);
-+ do_group_exit(SIGKILL);
- goto no_context;
-
- do_sigbus:
---- head.orig/arch/x86/mm/fault_64-xen.c 2011-08-15 10:44:41.000000000 +0200
-+++ head/arch/x86/mm/fault_64-xen.c 2011-07-26 09:28:01.000000000 +0200
+--- head.orig/arch/x86/mm/fault-xen.c 2011-08-15 10:44:41.000000000 +0200
++++ head/arch/x86/mm/fault-xen.c 2011-07-26 09:28:01.000000000 +0200
@@ -25,6 +25,7 @@
#include <linux/kprobes.h>
#include <linux/uaccess.h>
diff --git a/patches.xen/xen3-patch-2.6.25 b/patches.xen/xen3-patch-2.6.25
index 2481e0bc11..20a712332b 100644
--- a/patches.xen/xen3-patch-2.6.25
+++ b/patches.xen/xen3-patch-2.6.25
@@ -4403,7 +4403,7 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches
+ unsigned long range_end = mm->brk + 0x02000000;
+ return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
+}
---- head.orig/arch/x86/kernel/rtc.c 2013-06-20 14:56:42.000000000 +0200
+--- head.orig/arch/x86/kernel/rtc.c 2013-07-02 09:20:53.000000000 +0200
+++ head/arch/x86/kernel/rtc.c 2013-05-23 17:11:10.000000000 +0200
@@ -31,6 +31,7 @@ EXPORT_SYMBOL(cmos_lock);
DEFINE_SPINLOCK(rtc_lock);
@@ -6158,35 +6158,8 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches
.next = c_next,
.stop = c_stop,
.show = show_cpuinfo,
---- head.orig/arch/x86/kernel/smp_32-xen.c 2011-01-31 17:56:27.000000000 +0100
-+++ head/arch/x86/kernel/smp_32-xen.c 2011-01-31 18:01:51.000000000 +0100
-@@ -168,7 +168,7 @@ void __send_IPI_shortcut(unsigned int sh
- }
- }
-
--void fastcall send_IPI_self(int vector)
-+void send_IPI_self(int vector)
- {
- __send_IPI_shortcut(APIC_DEST_SELF, vector);
- }
-@@ -224,13 +224,14 @@ static DEFINE_SPINLOCK(tlbstate_lock);
- * We need to reload %cr3 since the page tables may be going
- * away from under us..
- */
--void leave_mm(unsigned long cpu)
-+void leave_mm(int cpu)
- {
- if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
- BUG();
- cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
- load_cr3(swapper_pg_dir);
- }
-+EXPORT_SYMBOL_GPL(leave_mm);
-
- /*
- *
---- head.orig/arch/x86/kernel/smp_64-xen.c 2011-01-31 17:56:27.000000000 +0100
-+++ head/arch/x86/kernel/smp_64-xen.c 2011-01-31 18:01:51.000000000 +0100
+--- head.orig/arch/x86/kernel/smp-xen.c 2011-01-31 17:56:27.000000000 +0100
++++ head/arch/x86/kernel/smp-xen.c 2011-01-31 18:01:51.000000000 +0100
@@ -33,7 +33,7 @@
#ifndef CONFIG_XEN
@@ -8273,46 +8246,39 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches
+ set_pmd(pmd, __pmd(__pa_symbol(swapper_pg_fixmap) | _PAGE_TABLE));
}
}
---- /dev/null 1970-01-01 00:00:00.000000000 +0000
+--- head.orig/arch/x86/mm/fault-xen.c 2011-07-26 09:28:01.000000000 +0200
+++ head/arch/x86/mm/fault-xen.c 2011-08-15 10:46:15.000000000 +0200
-@@ -0,0 +1,1037 @@
-+/*
-+ * Copyright (C) 1995 Linus Torvalds
-+ * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs.
-+ */
-+
-+#include <linux/signal.h>
-+#include <linux/sched.h>
-+#include <linux/kernel.h>
-+#include <linux/errno.h>
-+#include <linux/string.h>
-+#include <linux/types.h>
-+#include <linux/ptrace.h>
-+#include <linux/mman.h>
-+#include <linux/mm.h>
-+#include <linux/smp.h>
-+#include <linux/interrupt.h>
-+#include <linux/init.h>
-+#include <linux/tty.h>
-+#include <linux/vt_kern.h> /* For unblank_screen() */
-+#include <linux/compiler.h>
+@@ -1,6 +1,4 @@
+ /*
+- * linux/arch/x86-64/mm/fault.c
+- *
+ * Copyright (C) 1995 Linus Torvalds
+ * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs.
+ */
+@@ -20,34 +18,48 @@
+ #include <linux/tty.h>
+ #include <linux/vt_kern.h> /* For unblank_screen() */
+ #include <linux/compiler.h>
+#include <linux/highmem.h>
+#include <linux/bootmem.h> /* for max_low_pfn */
-+#include <linux/vmalloc.h>
-+#include <linux/module.h>
-+#include <linux/kprobes.h>
-+#include <linux/uaccess.h>
-+#include <linux/kdebug.h>
-+
-+#include <asm/system.h>
+ #include <linux/vmalloc.h>
+ #include <linux/module.h>
+ #include <linux/kprobes.h>
+ #include <linux/uaccess.h>
+ #include <linux/kdebug.h>
+-#include <linux/kprobes.h>
+
+ #include <asm/system.h>
+#include <asm/desc.h>
+#include <asm/segment.h>
-+#include <asm/pgalloc.h>
-+#include <asm/smp.h>
-+#include <asm/tlbflush.h>
-+#include <asm/proto.h>
-+#include <asm-generic/sections.h>
-+
+ #include <asm/pgalloc.h>
+ #include <asm/smp.h>
+ #include <asm/tlbflush.h>
+ #include <asm/proto.h>
+ #include <asm-generic/sections.h>
+
+-/* Page fault error code bits */
+-#define PF_PROT (1<<0) /* or no page found */
+/*
+ * Page fault error code bits
+ * bit 0 == 0 means no page found, 1 means protection fault
@@ -8322,34 +8288,47 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches
+ * bit 4 == 1 means fault was an instruction fetch
+ */
+#define PF_PROT (1<<0)
-+#define PF_WRITE (1<<1)
+ #define PF_WRITE (1<<1)
+-#define PF_USER (1<<2)
+-#define PF_RSVD (1<<3)
+#define PF_USER (1<<2)
+#define PF_RSVD (1<<3)
-+#define PF_INSTR (1<<4)
-+
-+static inline int notify_page_fault(struct pt_regs *regs)
-+{
+ #define PF_INSTR (1<<4)
+
+-#ifdef CONFIG_KPROBES
+ static inline int notify_page_fault(struct pt_regs *regs)
+ {
+#ifdef CONFIG_KPROBES
-+ int ret = 0;
-+
-+ /* kprobe_running() needs smp_processor_id() */
+ int ret = 0;
+
+ /* kprobe_running() needs smp_processor_id() */
+#ifdef CONFIG_X86_32
+ if (!user_mode_vm(regs)) {
+#else
-+ if (!user_mode(regs)) {
-+#endif
-+ preempt_disable();
-+ if (kprobe_running() && kprobe_fault_handler(regs, 14))
-+ ret = 1;
-+ preempt_enable();
-+ }
-+
-+ return ret;
-+#else
-+ return 0;
+ if (!user_mode(regs)) {
+#endif
+ preempt_disable();
+ if (kprobe_running() && kprobe_fault_handler(regs, 14))
+ ret = 1;
+@@ -55,100 +67,164 @@ static inline int notify_page_fault(stru
+ }
+
+ return ret;
+-}
+ #else
+-static inline int notify_page_fault(struct pt_regs *regs)
+-{
+ return 0;
+-}
+ #endif
+}
-+
+
+-/* Sometimes the CPU reports invalid exceptions on prefetch.
+- Check that here and ignore.
+- Opcode checker based on code by Richard Brunner */
+-static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
+- unsigned long error_code)
+-{
+/*
+ * X86_32
+ * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
@@ -8364,49 +8343,69 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches
+static int is_prefetch(struct pt_regs *regs, unsigned long addr,
+ unsigned long error_code)
+{
-+ unsigned char *instr;
-+ int scan_more = 1;
+ unsigned char *instr;
+ int scan_more = 1;
+- int prefetch = 0;
+ int prefetch = 0;
-+ unsigned char *max_instr;
-+
+ unsigned char *max_instr;
+
+- /* If it was a exec fault ignore */
+ /*
+ * If it was a exec (instruction fetch) fault on NX page, then
+ * do not ignore the fault:
+ */
-+ if (error_code & PF_INSTR)
-+ return 0;
+ if (error_code & PF_INSTR)
+ return 0;
+-
+- instr = (unsigned char __user *)convert_rip_to_linear(current, regs);
+
+ instr = (unsigned char *)convert_ip_to_linear(current, regs);
-+ max_instr = instr + 15;
-+
-+ if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
-+ return 0;
-+
+ max_instr = instr + 15;
+
+ if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
+ return 0;
+
+- while (scan_more && instr < max_instr) {
+ while (scan_more && instr < max_instr) {
-+ unsigned char opcode;
-+ unsigned char instr_hi;
-+ unsigned char instr_lo;
-+
-+ if (probe_kernel_address(instr, opcode))
+ unsigned char opcode;
+ unsigned char instr_hi;
+ unsigned char instr_lo;
+
+ if (probe_kernel_address(instr, opcode))
+- break;
+ break;
-+
+
+- instr_hi = opcode & 0xf0;
+- instr_lo = opcode & 0x0f;
+ instr_hi = opcode & 0xf0;
+ instr_lo = opcode & 0x0f;
-+ instr++;
-+
+ instr++;
+
+- switch (instr_hi) {
+ switch (instr_hi) {
-+ case 0x20:
-+ case 0x30:
+ case 0x20:
+ case 0x30:
+- /* Values 0x26,0x2E,0x36,0x3E are valid x86
+- prefixes. In long mode, the CPU will signal
+- invalid opcode if some of these prefixes are
+- present so we will never get here anyway */
+ /*
+ * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes.
+ * In X86_64 long mode, the CPU will signal invalid
+ * opcode if some of these prefixes are present so
+ * X86_64 will never get here anyway
+ */
-+ scan_more = ((instr_lo & 7) == 0x6);
-+ break;
+ scan_more = ((instr_lo & 7) == 0x6);
+ break;
+-
+#ifdef CONFIG_X86_64
-+ case 0x40:
+ case 0x40:
+- /* In AMD64 long mode, 0x40 to 0x4F are valid REX prefixes
+- Need to figure out under what instruction mode the
+- instruction was issued ... */
+- /* Could check the LDT for lm, but for now it's good
+- enough to assume that long mode only uses well known
+- segments or kernel. */
+ /*
+ * In AMD64 long mode 0x40..0x4F are valid REX prefixes
+ * Need to figure out under what instruction mode the
@@ -8414,35 +8413,43 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches
+ * but for now it's good enough to assume that long
+ * mode only uses well known segments or kernel.
+ */
-+ scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS)
-+ || (regs->cs == FLAT_USER_CS64);
-+ break;
+ scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS)
+ || (regs->cs == FLAT_USER_CS64);
+ break;
+-
+#endif
-+ case 0x60:
-+ /* 0x64 thru 0x67 are valid prefixes in all modes. */
-+ scan_more = (instr_lo & 0xC) == 0x4;
+ case 0x60:
+ /* 0x64 thru 0x67 are valid prefixes in all modes. */
+ scan_more = (instr_lo & 0xC) == 0x4;
+- break;
+ break;
-+ case 0xF0:
+ case 0xF0:
+- /* 0xF0, 0xF2, and 0xF3 are valid prefixes in all modes. */
+ /* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */
-+ scan_more = !instr_lo || (instr_lo>>1) == 1;
+ scan_more = !instr_lo || (instr_lo>>1) == 1;
+- break;
+ break;
-+ case 0x00:
-+ /* Prefetch instruction is 0x0F0D or 0x0F18 */
-+ scan_more = 0;
+ case 0x00:
+ /* Prefetch instruction is 0x0F0D or 0x0F18 */
+ scan_more = 0;
+
-+ if (probe_kernel_address(instr, opcode))
-+ break;
-+ prefetch = (instr_lo == 0xF) &&
-+ (opcode == 0x0D || opcode == 0x18);
-+ break;
-+ default:
-+ scan_more = 0;
+ if (probe_kernel_address(instr, opcode))
+ break;
+ prefetch = (instr_lo == 0xF) &&
+ (opcode == 0x0D || opcode == 0x18);
+- break;
+ break;
+ default:
+ scan_more = 0;
+ break;
+- }
+ }
-+ }
-+ return prefetch;
-+}
-+
+ }
+ return prefetch;
+ }
+
+-static int bad_address(void *p)
+-{
+static void force_sig_info_fault(int si_signo, int si_code,
+ unsigned long address, struct task_struct *tsk)
+{
@@ -8458,13 +8465,15 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches
+#ifdef CONFIG_X86_64
+static int bad_address(void *p)
+{
-+ unsigned long dummy;
-+ return probe_kernel_address((unsigned long *)p, dummy);
+ unsigned long dummy;
+ return probe_kernel_address((unsigned long *)p, dummy);
+-}
+}
+#endif
-+
+
+-void dump_pagetable(unsigned long address)
+static void dump_pagetable(unsigned long address)
-+{
+ {
+#ifdef CONFIG_X86_32
+ __typeof__(pte_val(__pte(0))) page;
+
@@ -8503,41 +8512,44 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches
+
+ printk(KERN_CONT "\n");
+#else /* CONFIG_X86_64 */
-+ pgd_t *pgd;
-+ pud_t *pud;
-+ pmd_t *pmd;
-+ pte_t *pte;
-+
-+ pgd = (pgd_t *)read_cr3();
-+
-+ pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
-+ pgd += pgd_index(address);
-+ if (bad_address(pgd)) goto bad;
-+ printk("PGD %lx ", pgd_val(*pgd));
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+@@ -160,62 +236,191 @@ void dump_pagetable(unsigned long addres
+ pgd += pgd_index(address);
+ if (bad_address(pgd)) goto bad;
+ printk("PGD %lx ", pgd_val(*pgd));
+- if (!pgd_present(*pgd)) goto ret;
+ if (!pgd_present(*pgd)) goto ret;
-+
-+ pud = pud_offset(pgd, address);
-+ if (bad_address(pud)) goto bad;
+
+ pud = pud_offset(pgd, address);
+ if (bad_address(pud)) goto bad;
+- printk("PUD %lx ", pud_val(*pud));
+- if (!pud_present(*pud)) goto ret;
+ printk(KERN_CONT "PUD %lx ", pud_val(*pud));
+ if (!pud_present(*pud) || pud_large(*pud))
+ goto ret;
-+
-+ pmd = pmd_offset(pud, address);
-+ if (bad_address(pmd)) goto bad;
+
+ pmd = pmd_offset(pud, address);
+ if (bad_address(pmd)) goto bad;
+- printk("PMD %lx ", pmd_val(*pmd));
+ printk(KERN_CONT "PMD %lx ", pmd_val(*pmd));
-+ if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret;
-+
-+ pte = pte_offset_kernel(pmd, address);
-+ if (bad_address(pte)) goto bad;
+ if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret;
+
+ pte = pte_offset_kernel(pmd, address);
+ if (bad_address(pte)) goto bad;
+- printk("PTE %lx", pte_val(*pte));
+ printk(KERN_CONT "PTE %lx", pte_val(*pte));
-+ret:
+ ret:
+- printk("\n");
+ printk(KERN_CONT "\n");
-+ return;
-+bad:
-+ printk("BAD\n");
+ return;
+ bad:
+ printk("BAD\n");
+#endif
-+}
-+
+ }
+
+-static const char errata93_warning[] =
+#ifdef CONFIG_X86_32
+static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
+{
@@ -8589,36 +8601,47 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches
+
+#ifdef CONFIG_X86_64
+static const char errata93_warning[] =
-+KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
-+KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
-+KERN_ERR "******* Please consider a BIOS update.\n"
-+KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
+ KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
+ KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
+ KERN_ERR "******* Please consider a BIOS update.\n"
+ KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
+#endif
-+
-+/* Workaround for K8 erratum #93 & buggy BIOS.
-+ BIOS SMM functions are required to use a specific workaround
+
+ /* Workaround for K8 erratum #93 & buggy BIOS.
+ BIOS SMM functions are required to use a specific workaround
+- to avoid corruption of the 64bit RIP register on C stepping K8.
+- A lot of BIOS that didn't get tested properly miss this.
+ to avoid corruption of the 64bit RIP register on C stepping K8.
+ A lot of BIOS that didn't get tested properly miss this.
-+ The OS sees this as a page fault with the upper 32bits of RIP cleared.
-+ Try to work around it here.
+ The OS sees this as a page fault with the upper 32bits of RIP cleared.
+ Try to work around it here.
+- Note we only handle faults in kernel here. */
+-
+-static int is_errata93(struct pt_regs *regs, unsigned long address)
+ Note we only handle faults in kernel here.
+ Does nothing for X86_32
+ */
+static int is_errata93(struct pt_regs *regs, unsigned long address)
-+{
+ {
+#ifdef CONFIG_X86_64
-+ static int warned;
+ static int warned;
+- if (address != regs->rip)
+ if (address != regs->ip)
-+ return 0;
+ return 0;
+- if ((address >> 32) != 0)
+ if ((address >> 32) != 0)
-+ return 0;
-+ address |= 0xffffffffUL << 32;
+ return 0;
+ address |= 0xffffffffUL << 32;
+- if ((address >= (u64)_stext && address <= (u64)_etext) ||
+- (address >= MODULES_VADDR && address <= MODULES_END)) {
+ if ((address >= (u64)_stext && address <= (u64)_etext) ||
+ (address >= MODULES_VADDR && address <= MODULES_END)) {
-+ if (!warned) {
+ if (!warned) {
+- printk(errata93_warning);
+ printk(errata93_warning);
-+ warned = 1;
-+ }
+ warned = 1;
+ }
+- regs->rip = address;
+ regs->ip = address;
+ return 1;
+ }
@@ -8637,7 +8660,7 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches
+#ifdef CONFIG_X86_64
+ if ((regs->cs == __USER32_CS || regs->cs == FLAT_USER_CS32 ||
+ (regs->cs & (1<<2))) && (address >> 32))
-+ return 1;
+ return 1;
+#endif
+ return 0;
+}
@@ -8658,9 +8681,10 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches
+ do_invalid_op(regs, 0);
+ return 1;
+ }
-+ }
+ }
+#endif
-+ return 0;
+ return 0;
+-}
+}
+
+static void show_fault_oops(struct pt_regs *regs, unsigned long error_code,
@@ -8670,7 +8694,7 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches
+ if (!oops_may_print())
+ return;
+#endif
-+
+
+#ifdef CONFIG_X86_PAE
+ if (error_code & PF_INSTR) {
+ unsigned int level;
@@ -8699,19 +8723,16 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches
+}
+
+#ifdef CONFIG_X86_64
-+static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
-+ unsigned long error_code)
-+{
-+ unsigned long flags = oops_begin();
-+ struct task_struct *tsk;
-+
-+ printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
-+ current->comm, address);
-+ dump_pagetable(address);
-+ tsk = current;
-+ tsk->thread.cr2 = address;
-+ tsk->thread.trap_no = 14;
-+ tsk->thread.error_code = error_code;
+ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
+ unsigned long error_code)
+ {
+@@ -229,23 +434,108 @@ static noinline void pgtable_bad(unsigne
+ tsk->thread.cr2 = address;
+ tsk->thread.trap_no = 14;
+ tsk->thread.error_code = error_code;
+- __die("Bad pagetable", regs, error_code);
+- oops_end(flags);
+- do_exit(SIGKILL);
+ if (__die("Bad pagetable", regs, error_code))
+ regs = NULL;
+ oops_end(flags, regs, SIGKILL);
@@ -8726,9 +8747,9 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches
+ return 0;
+
+ return 1;
-+}
-+
-+/*
+ }
+
+ /*
+ * Handle a spurious fault caused by a stale TLB entry. This allows
+ * us to lazily refresh the TLB when increasing the permissions of a
+ * kernel page (RO -> RW or NX -> X). Doing it eagerly is very
@@ -8779,12 +8800,12 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches
+ * Handle a fault on the vmalloc or module mapping area
+ *
+ * X86_64
-+ * Handle a fault on the vmalloc area
-+ *
-+ * This assumes no large pages in there.
-+ */
-+static int vmalloc_fault(unsigned long address)
-+{
+ * Handle a fault on the vmalloc area
+ *
+ * This assumes no large pages in there.
+ */
+ static int vmalloc_fault(unsigned long address)
+ {
+#ifdef CONFIG_X86_32
+ unsigned long pgd_paddr;
+ pmd_t *pmd_k;
@@ -8805,121 +8826,149 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches
+ return -1;
+ return 0;
+#else
-+ pgd_t *pgd, *pgd_ref;
-+ pud_t *pud, *pud_ref;
-+ pmd_t *pmd, *pmd_ref;
-+ pte_t *pte, *pte_ref;
-+
+ pgd_t *pgd, *pgd_ref;
+ pud_t *pud, *pud_ref;
+ pmd_t *pmd, *pmd_ref;
+ pte_t *pte, *pte_ref;
+
+ /* Make sure we are in vmalloc area */
+ if (!(address >= VMALLOC_START && address < VMALLOC_END))
+ return -1;
+
-+ /* Copy kernel mappings over when needed. This can also
-+ happen within a race in page table update. In the later
-+ case just flush. */
-+
-+ /* On Xen the line below does not always work. Needs investigating! */
-+ /*pgd = pgd_offset(current->mm ?: &init_mm, address);*/
-+ pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK);
-+ pgd += pgd_index(address);
-+ pgd_ref = pgd_offset_k(address);
-+ if (pgd_none(*pgd_ref))
-+ return -1;
-+ if (pgd_none(*pgd))
-+ set_pgd(pgd, *pgd_ref);
-+ else
-+ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
-+
-+ /* Below here mismatches are bugs because these lower tables
-+ are shared */
-+
-+ pud = pud_offset(pgd, address);
-+ pud_ref = pud_offset(pgd_ref, address);
-+ if (pud_none(*pud_ref))
-+ return -1;
-+ if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
-+ BUG();
-+ pmd = pmd_offset(pud, address);
-+ pmd_ref = pmd_offset(pud_ref, address);
-+ if (pmd_none(*pmd_ref))
-+ return -1;
-+ if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
-+ BUG();
-+ pte_ref = pte_offset_kernel(pmd_ref, address);
-+ if (!pte_present(*pte_ref))
-+ return -1;
-+ pte = pte_offset_kernel(pmd, address);
-+ /* Don't use pte_page here, because the mappings can point
-+ outside mem_map, and the NUMA hash lookup cannot handle
-+ that. */
-+ if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
-+ BUG();
-+ return 0;
+ /* Copy kernel mappings over when needed. This can also
+ happen within a race in page table update. In the later
+ case just flush. */
+@@ -287,89 +577,42 @@ static int vmalloc_fault(unsigned long a
+ if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
+ BUG();
+ return 0;
+#endif
-+}
-+
-+int show_unhandled_signals = 1;
-+
-+/*
-+ * This routine handles page faults. It determines the address,
-+ * and the problem, and then passes it off to one of the appropriate
-+ * routines.
-+ */
+ }
+
+ int show_unhandled_signals = 1;
+
+-
+-#define MEM_VERBOSE 1
+-
+-#ifdef MEM_VERBOSE
+-#define MEM_LOG(_f, _a...) \
+- printk("fault.c:[%d]-> " _f "\n", \
+- __LINE__ , ## _a )
+-#else
+-#define MEM_LOG(_f, _a...) ((void)0)
+-#endif
+-
+-static int spurious_fault(struct pt_regs *regs,
+- unsigned long address,
+- unsigned long error_code)
+-{
+- pgd_t *pgd;
+- pud_t *pud;
+- pmd_t *pmd;
+- pte_t *pte;
+-
+-#ifdef CONFIG_XEN
+- /* Faults in hypervisor area are never spurious. */
+- if ((address >= HYPERVISOR_VIRT_START) &&
+- (address < HYPERVISOR_VIRT_END))
+- return 0;
+-#endif
+-
+- /* Reserved-bit violation or user access to kernel space? */
+- if (error_code & (PF_RSVD|PF_USER))
+- return 0;
+-
+- pgd = init_mm.pgd + pgd_index(address);
+- if (!pgd_present(*pgd))
+- return 0;
+-
+- pud = pud_offset(pgd, address);
+- if (!pud_present(*pud))
+- return 0;
+-
+- pmd = pmd_offset(pud, address);
+- if (!pmd_present(*pmd))
+- return 0;
+-
+- pte = pte_offset_kernel(pmd, address);
+- if (!pte_present(*pte))
+- return 0;
+- if ((error_code & PF_WRITE) && !pte_write(*pte))
+- return 0;
+- if ((error_code & PF_INSTR) && (__pte_val(*pte) & _PAGE_NX))
+- return 0;
+-
+- return 1;
+-}
+-
+ /*
+ * This routine handles page faults. It determines the address,
+ * and the problem, and then passes it off to one of the appropriate
+ * routines.
+ */
+-asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
+- unsigned long error_code)
+#ifdef CONFIG_X86_64
+asmlinkage
+#endif
+void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
-+{
-+ struct task_struct *tsk;
-+ struct mm_struct *mm;
+ {
+ struct task_struct *tsk;
+ struct mm_struct *mm;
+- struct vm_area_struct * vma;
+ struct vm_area_struct *vma;
-+ unsigned long address;
+ unsigned long address;
+- const struct exception_table_entry *fixup;
+- int write, fault;
+ int write, si_code;
+ int fault;
+#ifdef CONFIG_X86_64
-+ unsigned long flags;
+ unsigned long flags;
+- siginfo_t info;
+-
+- if (!user_mode(regs))
+- error_code &= ~PF_USER; /* means kernel */
+#endif
-+
-+ /*
-+ * We can fault from pretty much anywhere, with unknown IRQ state.
-+ */
-+ trace_hardirqs_fixup();
-+
+
+ /*
+ * We can fault from pretty much anywhere, with unknown IRQ state.
+ */
+ trace_hardirqs_fixup();
+
+ /* Set the "privileged fault" bit to something sane. */
+ if (user_mode_vm(regs))
+ error_code |= PF_USER;
+ else
+ error_code &= ~PF_USER;
+
-+ tsk = current;
-+ mm = tsk->mm;
-+ prefetchw(&mm->mmap_sem);
-+
-+ /* get the address */
-+ address = read_cr2();
-+
+ tsk = current;
+ mm = tsk->mm;
+ prefetchw(&mm->mmap_sem);
+@@ -377,8 +620,10 @@ asmlinkage void __kprobes do_page_fault(
+ /* get the address */
+ address = read_cr2();
+
+- info.si_code = SEGV_MAPERR;
+ si_code = SEGV_MAPERR;
-+
+
+ if (notify_page_fault(regs))
+ return;
-+
-+ /*
-+ * We fault-in kernel-space virtual memory on-demand. The
-+ * 'reference' page table is init_mm.pgd.
-+ *
-+ * NOTE! We MUST NOT take any locks for this case. We may
-+ * be in an interrupt or a critical region, and should
-+ * only copy the information from the master page table,
-+ * nothing more.
-+ *
-+ * This verifies that the fault happens in kernel space
-+ * (error_code & 4) == 0, and that the fault was not a
-+ * protection error (error_code & 9) == 0.
-+ */
+
+ /*
+ * We fault-in kernel-space virtual memory on-demand. The
+@@ -393,22 +638,28 @@ asmlinkage void __kprobes do_page_fault(
+ * (error_code & 4) == 0, and that the fault was not a
+ * protection error (error_code & 9) == 0.
+ */
+#ifdef CONFIG_X86_32
+ if (unlikely(address >= TASK_SIZE)) {
+#else
-+ if (unlikely(address >= TASK_SIZE64)) {
+ if (unlikely(address >= TASK_SIZE64)) {
+- /*
+- * Don't check for the module range here: its PML4
+- * is always initialized because it's shared with the main
+- * kernel text. Only vmalloc may need PML4 syncups.
+- */
+#endif
+ /* Faults in hypervisor area can never be patched up. */
+#if defined(CONFIG_X86_XEN)
@@ -8930,22 +8979,32 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches
+ && address < HYPERVISOR_VIRT_END)
+ goto bad_area_nosemaphore;
+#endif
-+ if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
+ if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
+- ((address >= VMALLOC_START && address < VMALLOC_END))) {
+- if (vmalloc_fault(address) >= 0)
+- return;
+- }
+- /* Can take a spurious fault if mapping changes R/O -> R/W. */
+- if (spurious_fault(regs, address, error_code))
+ vmalloc_fault(address) >= 0)
-+ return;
+ return;
+- if (notify_page_fault(regs))
+
+ /* Can handle a stale RO->RW TLB */
+ if (spurious_fault(address, error_code))
-+ return;
-+
-+ /*
-+ * Don't take the mm semaphore here. If we fixup a prefetch
-+ * fault we could otherwise deadlock.
-+ */
-+ goto bad_area_nosemaphore;
-+ }
-+
+ return;
+
+ /*
+ * Don't take the mm semaphore here. If we fixup a prefetch
+ * fault we could otherwise deadlock.
+@@ -416,18 +667,29 @@ asmlinkage void __kprobes do_page_fault(
+ goto bad_area_nosemaphore;
+ }
+
+- if (notify_page_fault(regs))
+- return;
+
+- if (likely(regs->eflags & X86_EFLAGS_IF))
+#ifdef CONFIG_X86_32
+ /* It's safe to allow irq's after cr2 has been saved and the vmalloc
+ fault has been handled. */
@@ -8960,75 +9019,79 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches
+ goto bad_area_nosemaphore;
+#else /* CONFIG_X86_64 */
+ if (likely(regs->flags & X86_EFLAGS_IF))
-+ local_irq_enable();
-+
-+ if (unlikely(error_code & PF_RSVD))
-+ pgtable_bad(address, regs, error_code);
-+
-+ /*
+ local_irq_enable();
+
+ if (unlikely(error_code & PF_RSVD))
+ pgtable_bad(address, regs, error_code);
+
+ /*
+- * If we're in an interrupt or have no user
+- * context, we must not take the fault..
+ * If we're in an interrupt, have no user context or are running in an
+ * atomic region then we must not take the fault.
-+ */
-+ if (unlikely(in_atomic() || !mm))
-+ goto bad_area_nosemaphore;
-+
-+ /*
-+ * User-mode registers count as a user access even for any
-+ * potential system fault or CPU buglet.
-+ */
-+ if (user_mode_vm(regs))
-+ error_code |= PF_USER;
+ */
+ if (unlikely(in_atomic() || !mm))
+ goto bad_area_nosemaphore;
+@@ -438,8 +700,8 @@ asmlinkage void __kprobes do_page_fault(
+ */
+ if (user_mode_vm(regs))
+ error_code |= PF_USER;
+-
+- again:
+again:
+#endif
-+ /* When running in the kernel we expect faults to occur only to
-+ * addresses in user space. All other faults represent errors in the
-+ * kernel and should generate an OOPS. Unfortunately, in the case of an
-+ * erroneous fault occurring in a code path which already holds mmap_sem
-+ * we will deadlock attempting to validate the fault against the
-+ * address space. Luckily the kernel only validly references user
-+ * space from well defined areas of code, which are listed in the
-+ * exceptions table.
-+ *
-+ * As the vast majority of faults will be valid we will only perform
-+ * the source reference check when there is a possibility of a deadlock.
-+ * Attempt to lock the address space, if we cannot we then validate the
-+ * source. If this is invalid we can skip the address space check,
-+ * thus avoiding the deadlock.
-+ */
-+ if (!down_read_trylock(&mm->mmap_sem)) {
-+ if ((error_code & PF_USER) == 0 &&
+ /* When running in the kernel we expect faults to occur only to
+ * addresses in user space. All other faults represent errors in the
+ * kernel and should generate an OOPS. Unfortunately, in the case of an
+@@ -457,7 +719,7 @@ asmlinkage void __kprobes do_page_fault(
+ */
+ if (!down_read_trylock(&mm->mmap_sem)) {
+ if ((error_code & PF_USER) == 0 &&
+- !search_exception_tables(regs->rip))
+ !search_exception_tables(regs->ip))
-+ goto bad_area_nosemaphore;
-+ down_read(&mm->mmap_sem);
-+ }
-+
-+ vma = find_vma(mm, address);
-+ if (!vma)
-+ goto bad_area;
+ goto bad_area_nosemaphore;
+ down_read(&mm->mmap_sem);
+ }
+@@ -465,15 +727,18 @@ asmlinkage void __kprobes do_page_fault(
+ vma = find_vma(mm, address);
+ if (!vma)
+ goto bad_area;
+- if (likely(vma->vm_start <= address))
+ if (vma->vm_start <= address)
-+ goto good_area;
-+ if (!(vma->vm_flags & VM_GROWSDOWN))
-+ goto bad_area;
+ goto good_area;
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ goto bad_area;
+- if (error_code & 4) {
+- /* Allow userspace just enough access below the stack pointer
+- * to let the 'enter' instruction work.
+ if (error_code & PF_USER) {
+ /*
+ * Accessing the stack below %sp is always a bug.
+ * The large cushion allows instructions like enter
+ * and pusha to work. ("enter $65535,$31" pushes
+ * 32 pointers and then decrements %sp by 65535.)
-+ */
+ */
+- if (address + 65536 + 32 * sizeof(unsigned long) < regs->rsp)
+ if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp)
-+ goto bad_area;
-+ }
-+ if (expand_stack(vma, address))
-+ goto bad_area;
-+/*
-+ * Ok, we have a good vm_area for this memory access, so
-+ * we can handle it..
-+ */
-+good_area:
+ goto bad_area;
+ }
+ if (expand_stack(vma, address))
+@@ -483,23 +748,26 @@ asmlinkage void __kprobes do_page_fault(
+ * we can handle it..
+ */
+ good_area:
+- info.si_code = SEGV_ACCERR;
+ si_code = SEGV_ACCERR;
-+ write = 0;
-+ switch (error_code & (PF_PROT|PF_WRITE)) {
+ write = 0;
+ switch (error_code & (PF_PROT|PF_WRITE)) {
+- default: /* 3: write, present */
+- /* fall through */
+- case PF_WRITE: /* write, not present */
+- if (!(vma->vm_flags & VM_WRITE))
+- goto bad_area;
+- write++;
+- break;
+- case PF_PROT: /* read, present */
+ default: /* 3: write, present */
+ /* fall through */
+ case PF_WRITE: /* write, not present */
@@ -9040,29 +9103,22 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches
+ goto bad_area;
+ case 0: /* read, not present */
+ if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
-+ goto bad_area;
-+ }
-+
+ goto bad_area;
+- case 0: /* read, not present */
+- if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
+- goto bad_area;
+ }
+
+#ifdef CONFIG_X86_32
+survive:
+#endif
-+ /*
-+ * If for any reason at all we couldn't handle the fault,
-+ * make sure we exit gracefully rather than endlessly redo
-+ * the fault.
-+ */
-+ fault = handle_mm_fault(mm, vma, address, write);
-+ if (unlikely(fault & VM_FAULT_ERROR)) {
-+ if (fault & VM_FAULT_OOM)
-+ goto out_of_memory;
-+ else if (fault & VM_FAULT_SIGBUS)
-+ goto do_sigbus;
-+ BUG();
-+ }
-+ if (fault & VM_FAULT_MAJOR)
-+ tsk->maj_flt++;
-+ else
-+ tsk->min_flt++;
+ /*
+ * If for any reason at all we couldn't handle the fault,
+ * make sure we exit gracefully rather than endlessly redo
+@@ -517,6 +785,17 @@ good_area:
+ tsk->maj_flt++;
+ else
+ tsk->min_flt++;
+
+#ifdef CONFIG_X86_32
+ /*
@@ -9074,37 +9130,44 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches
+ tsk->thread.screen_bitmap |= 1 << bit;
+ }
+#endif
-+ up_read(&mm->mmap_sem);
-+ return;
-+
-+/*
-+ * Something tried to access memory that isn't in our memory map..
-+ * Fix it, but check if it's kernel or user first..
-+ */
-+bad_area:
-+ up_read(&mm->mmap_sem);
-+
-+bad_area_nosemaphore:
-+ /* User mode accesses just cause a SIGSEGV */
-+ if (error_code & PF_USER) {
-+ /*
-+ * It's possible to have interrupts off here.
-+ */
-+ local_irq_enable();
-+
+ up_read(&mm->mmap_sem);
+ return;
+
+@@ -530,87 +809,94 @@ bad_area:
+ bad_area_nosemaphore:
+ /* User mode accesses just cause a SIGSEGV */
+ if (error_code & PF_USER) {
+-
+ /*
+ * It's possible to have interrupts off here.
+ */
+ local_irq_enable();
+
+ /*
+ * Valid to do another page fault here because this one came
+ * from user space.
+ */
-+ if (is_prefetch(regs, address, error_code))
-+ return;
-+
+ if (is_prefetch(regs, address, error_code))
+ return;
+
+- /* Work around K8 erratum #100 K8 in compat mode
+- occasionally jumps to illegal addresses >4GB. We
+- catch this here in the page fault handler because
+- these addresses are not reachable. Just detect this
+- case and return. Any code segment in LDT is
+- compatibility mode. */
+- if ((regs->cs == __USER32_CS || regs->cs == FLAT_USER_CS32 ||
+- (regs->cs & (1<<2))) && (address >> 32))
+ if (is_errata100(regs, address))
-+ return;
-+
-+ if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
-+ printk_ratelimit()) {
-+ printk(
+ return;
+
+ if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
+ printk_ratelimit()) {
+ printk(
+- "%s%s[%d]: segfault at %lx rip %lx rsp %lx error %lx\n",
+- tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
+- tsk->comm, tsk->pid, address, regs->rip,
+- regs->rsp, error_code);
+#ifdef CONFIG_X86_32
+ "%s%s[%d]: segfault at %lx ip %08lx sp %08lx error %lx",
+#else
@@ -9115,24 +9178,36 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches
+ regs->sp, error_code);
+ print_vma_addr(" in ", regs->ip);
+ printk("\n");
-+ }
+ }
+-
+
-+ tsk->thread.cr2 = address;
-+ /* Kernel addresses are always protection faults */
-+ tsk->thread.error_code = error_code | (address >= TASK_SIZE);
-+ tsk->thread.trap_no = 14;
+ tsk->thread.cr2 = address;
+ /* Kernel addresses are always protection faults */
+ tsk->thread.error_code = error_code | (address >= TASK_SIZE);
+ tsk->thread.trap_no = 14;
+- info.si_signo = SIGSEGV;
+- info.si_errno = 0;
+- /* info.si_code has been set above */
+- info.si_addr = (void __user *)address;
+- force_sig_info(SIGSEGV, &info, tsk);
+ force_sig_info_fault(SIGSEGV, si_code, address, tsk);
-+ return;
-+ }
-+
+ return;
+ }
+
+ if (is_f00f_bug(regs, address))
+ return;
+
-+no_context:
-+ /* Are we prepared to handle this kernel fault? */
+ no_context:
+-
+ /* Are we prepared to handle this kernel fault? */
+- fixup = search_exception_tables(regs->rip);
+- if (fixup) {
+- regs->rip = fixup->fixup;
+ if (fixup_exception(regs))
-+ return;
-+
+ return;
+- }
+
+- /*
+ /*
+ * X86_32
+ * Valid to do another page fault here, because if this fault
@@ -9140,29 +9215,42 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches
+ * handled it.
+ *
+ * X86_64
-+ * Hall of shame of CPU/BIOS bugs.
-+ */
+ * Hall of shame of CPU/BIOS bugs.
+ */
+-
+- if (is_prefetch(regs, address, error_code))
+- return;
+ if (is_prefetch(regs, address, error_code))
+ return;
-+
-+ if (is_errata93(regs, address))
+
+ if (is_errata93(regs, address))
+- return;
+ return;
-+
-+/*
-+ * Oops. The kernel tried to access some bad page. We'll have to
-+ * terminate things with extreme prejudice.
-+ */
+
+ /*
+ * Oops. The kernel tried to access some bad page. We'll have to
+ * terminate things with extreme prejudice.
+ */
+-
+#ifdef CONFIG_X86_32
+ bust_spinlocks(1);
+#else
-+ flags = oops_begin();
+ flags = oops_begin();
+#endif
+
+ show_fault_oops(regs, error_code, address);
-+
-+ tsk->thread.cr2 = address;
-+ tsk->thread.trap_no = 14;
-+ tsk->thread.error_code = error_code;
+
+- if (address < PAGE_SIZE)
+- printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
+- else
+- printk(KERN_ALERT "Unable to handle kernel paging request");
+- printk(" at %016lx RIP: \n" KERN_ALERT,address);
+- printk_address(regs->rip);
+- dump_pagetable(address);
+ tsk->thread.cr2 = address;
+ tsk->thread.trap_no = 14;
+ tsk->thread.error_code = error_code;
+- __die("Oops", regs, error_code);
+
+#ifdef CONFIG_X86_32
+ die("Oops", regs, error_code);
@@ -9171,57 +9259,67 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches
+#else
+ if (__die("Oops", regs, error_code))
+ regs = NULL;
-+ /* Executive summary in case the body of the oops scrolled away */
-+ printk(KERN_EMERG "CR2: %016lx\n", address);
+ /* Executive summary in case the body of the oops scrolled away */
+ printk(KERN_EMERG "CR2: %016lx\n", address);
+- oops_end(flags);
+- do_exit(SIGKILL);
+ oops_end(flags, regs, SIGKILL);
+#endif
-+
-+/*
-+ * We ran out of memory, or some other thing happened to us that made
-+ * us unable to handle the page fault gracefully.
-+ */
-+out_of_memory:
-+ up_read(&mm->mmap_sem);
+
+ /*
+ * We ran out of memory, or some other thing happened to us that made
+@@ -618,12 +904,18 @@ no_context:
+ */
+ out_of_memory:
+ up_read(&mm->mmap_sem);
+- if (is_global_init(current)) {
+ if (is_global_init(tsk)) {
-+ yield();
+ yield();
+#ifdef CONFIG_X86_32
+ down_read(&mm->mmap_sem);
+ goto survive;
+#else
-+ goto again;
+ goto again;
+#endif
-+ }
+ }
+
-+ printk("VM: killing process %s\n", tsk->comm);
+ printk("VM: killing process %s\n", tsk->comm);
+- if (error_code & 4)
+ if (error_code & PF_USER)
-+ do_group_exit(SIGKILL);
-+ goto no_context;
-+
-+do_sigbus:
-+ up_read(&mm->mmap_sem);
-+
-+ /* Kernel mode? Handle exceptions or die */
-+ if (!(error_code & PF_USER))
-+ goto no_context;
+ do_group_exit(SIGKILL);
+ goto no_context;
+
+@@ -633,16 +925,15 @@ do_sigbus:
+ /* Kernel mode? Handle exceptions or die */
+ if (!(error_code & PF_USER))
+ goto no_context;
+-
+#ifdef CONFIG_X86_32
+ /* User space => ok to do another page fault */
+ if (is_prefetch(regs, address, error_code))
+ return;
+#endif
-+ tsk->thread.cr2 = address;
-+ tsk->thread.error_code = error_code;
-+ tsk->thread.trap_no = 14;
+ tsk->thread.cr2 = address;
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_no = 14;
+- info.si_signo = SIGBUS;
+- info.si_errno = 0;
+- info.si_code = BUS_ADRERR;
+- info.si_addr = (void __user *)address;
+- force_sig_info(SIGBUS, &info, tsk);
+- return;
+ force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
-+}
-+
-+DEFINE_SPINLOCK(pgd_lock);
-+LIST_HEAD(pgd_list);
-+
-+#define pgd_page_table(what, pg) \
-+ spin_##what(&((struct mm_struct *)(pg)->private)->page_table_lock)
-+
-+void vmalloc_sync_all(void)
-+{
+ }
+
+ DEFINE_SPINLOCK(pgd_lock);
+@@ -653,10 +944,62 @@ LIST_HEAD(pgd_list);
+
+ void vmalloc_sync_all(void)
+ {
+- /* Note that races in the updates of insync and start aren't
+- problematic:
+- insync can only get set bits added, and updates to start are only
+- improving performance (without affecting correctness if undone). */
+#ifdef CONFIG_X86_32
+ /*
+ * Note that races in the updates of insync and start aren't
@@ -9278,1510 +9376,45 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches
+ * start are only improving performance (without affecting correctness
+ * if undone).
+ */
-+ static DECLARE_BITMAP(insync, PTRS_PER_PGD);
-+ static unsigned long start = VMALLOC_START & PGDIR_MASK;
-+ unsigned long address;
-+
-+ for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
-+ if (!test_bit(pgd_index(address), insync)) {
-+ const pgd_t *pgd_ref = pgd_offset_k(address);
+ static DECLARE_BITMAP(insync, PTRS_PER_PGD);
+ static unsigned long start = VMALLOC_START & PGDIR_MASK;
+ unsigned long address;
+@@ -664,15 +1007,15 @@ void vmalloc_sync_all(void)
+ for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
+ if (!test_bit(pgd_index(address), insync)) {
+ const pgd_t *pgd_ref = pgd_offset_k(address);
+ unsigned long flags;
-+ struct page *page;
-+
-+ if (pgd_none(*pgd_ref))
-+ continue;
+ struct page *page;
+
+ if (pgd_none(*pgd_ref))
+ continue;
+- spin_lock(&pgd_lock);
+ spin_lock_irqsave(&pgd_lock, flags);
-+ list_for_each_entry(page, &pgd_list, lru) {
-+ pgd_t *pgd;
-+ pgd = (pgd_t *)page_address(page) + pgd_index(address);
-+ pgd_page_table(lock, page);
-+ if (pgd_none(*pgd))
-+ set_pgd(pgd, *pgd_ref);
-+ else
-+ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
-+ pgd_page_table(unlock, page);
-+ }
+ list_for_each_entry(page, &pgd_list, lru) {
+ pgd_t *pgd;
+ pgd = (pgd_t *)page_address(page) + pgd_index(address);
+-
+ pgd_page_table(lock, page);
+ if (pgd_none(*pgd))
+ set_pgd(pgd, *pgd_ref);
+@@ -680,7 +1023,7 @@ void vmalloc_sync_all(void)
+ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
+ pgd_page_table(unlock, page);
+ }
+- spin_unlock(&pgd_lock);
+ spin_unlock_irqrestore(&pgd_lock, flags);
-+ set_bit(pgd_index(address), insync);
-+ }
-+ if (address == start)
-+ start = address + PGDIR_SIZE;
-+ }
-+ /* Check that there is no need to do the same for the modules area. */
-+ BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
+ set_bit(pgd_index(address), insync);
+ }
+ if (address == start)
+@@ -688,6 +1031,7 @@ void vmalloc_sync_all(void)
+ }
+ /* Check that there is no need to do the same for the modules area. */
+ BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
+- BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
+ BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
-+ (__START_KERNEL & PGDIR_MASK)));
+ (__START_KERNEL & PGDIR_MASK)));
+#endif
-+}
---- head.orig/arch/x86/mm/fault_32-xen.c 2011-01-31 17:56:27.000000000 +0100
-+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
-@@ -1,770 +0,0 @@
--/*
-- * linux/arch/i386/mm/fault.c
-- *
-- * Copyright (C) 1995 Linus Torvalds
-- */
--
--#include <linux/signal.h>
--#include <linux/sched.h>
--#include <linux/kernel.h>
--#include <linux/errno.h>
--#include <linux/string.h>
--#include <linux/types.h>
--#include <linux/ptrace.h>
--#include <linux/mman.h>
--#include <linux/mm.h>
--#include <linux/smp.h>
--#include <linux/interrupt.h>
--#include <linux/init.h>
--#include <linux/tty.h>
--#include <linux/vt_kern.h> /* For unblank_screen() */
--#include <linux/highmem.h>
--#include <linux/bootmem.h> /* for max_low_pfn */
--#include <linux/vmalloc.h>
--#include <linux/module.h>
--#include <linux/kprobes.h>
--#include <linux/uaccess.h>
--#include <linux/kdebug.h>
--#include <linux/kprobes.h>
--
--#include <asm/system.h>
--#include <asm/desc.h>
--#include <asm/segment.h>
--
--extern void die(const char *,struct pt_regs *,long);
--
--#ifdef CONFIG_KPROBES
--static inline int notify_page_fault(struct pt_regs *regs)
--{
-- int ret = 0;
--
-- /* kprobe_running() needs smp_processor_id() */
-- if (!user_mode_vm(regs)) {
-- preempt_disable();
-- if (kprobe_running() && kprobe_fault_handler(regs, 14))
-- ret = 1;
-- preempt_enable();
-- }
--
-- return ret;
--}
--#else
--static inline int notify_page_fault(struct pt_regs *regs)
--{
-- return 0;
--}
--#endif
--
--/*
-- * Return EIP plus the CS segment base. The segment limit is also
-- * adjusted, clamped to the kernel/user address space (whichever is
-- * appropriate), and returned in *eip_limit.
-- *
-- * The segment is checked, because it might have been changed by another
-- * task between the original faulting instruction and here.
-- *
-- * If CS is no longer a valid code segment, or if EIP is beyond the
-- * limit, or if it is a kernel address when CS is not a kernel segment,
-- * then the returned value will be greater than *eip_limit.
-- *
-- * This is slow, but is very rarely executed.
-- */
--static inline unsigned long get_segment_eip(struct pt_regs *regs,
-- unsigned long *eip_limit)
--{
-- unsigned long eip = regs->eip;
-- unsigned seg = regs->xcs & 0xffff;
-- u32 seg_ar, seg_limit, base, *desc;
--
-- /* Unlikely, but must come before segment checks. */
-- if (unlikely(regs->eflags & VM_MASK)) {
-- base = seg << 4;
-- *eip_limit = base + 0xffff;
-- return base + (eip & 0xffff);
-- }
--
-- /* The standard kernel/user address space limit. */
-- *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg;
--
-- /* By far the most common cases. */
-- if (likely(SEGMENT_IS_FLAT_CODE(seg)))
-- return eip;
--
-- /* Check the segment exists, is within the current LDT/GDT size,
-- that kernel/user (ring 0..3) has the appropriate privilege,
-- that it's a code segment, and get the limit. */
-- __asm__ ("larl %3,%0; lsll %3,%1"
-- : "=&r" (seg_ar), "=r" (seg_limit) : "0" (0), "rm" (seg));
-- if ((~seg_ar & 0x9800) || eip > seg_limit) {
-- *eip_limit = 0;
-- return 1; /* So that returned eip > *eip_limit. */
-- }
--
-- /* Get the GDT/LDT descriptor base.
-- When you look for races in this code remember that
-- LDT and other horrors are only used in user space. */
-- if (seg & (1<<2)) {
-- /* Must lock the LDT while reading it. */
-- mutex_lock(&current->mm->context.lock);
-- desc = current->mm->context.ldt;
-- desc = (void *)desc + (seg & ~7);
-- } else {
-- /* Must disable preemption while reading the GDT. */
-- desc = (u32 *)get_cpu_gdt_table(get_cpu());
-- desc = (void *)desc + (seg & ~7);
-- }
--
-- /* Decode the code segment base from the descriptor */
-- base = get_desc_base((unsigned long *)desc);
--
-- if (seg & (1<<2)) {
-- mutex_unlock(&current->mm->context.lock);
-- } else
-- put_cpu();
--
-- /* Adjust EIP and segment limit, and clamp at the kernel limit.
-- It's legitimate for segments to wrap at 0xffffffff. */
-- seg_limit += base;
-- if (seg_limit < *eip_limit && seg_limit >= base)
-- *eip_limit = seg_limit;
-- return eip + base;
--}
--
--/*
-- * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
-- * Check that here and ignore it.
-- */
--static int __is_prefetch(struct pt_regs *regs, unsigned long addr)
--{
-- unsigned long limit;
-- unsigned char *instr = (unsigned char *)get_segment_eip (regs, &limit);
-- int scan_more = 1;
-- int prefetch = 0;
-- int i;
--
-- for (i = 0; scan_more && i < 15; i++) {
-- unsigned char opcode;
-- unsigned char instr_hi;
-- unsigned char instr_lo;
--
-- if (instr > (unsigned char *)limit)
-- break;
-- if (probe_kernel_address(instr, opcode))
-- break;
--
-- instr_hi = opcode & 0xf0;
-- instr_lo = opcode & 0x0f;
-- instr++;
--
-- switch (instr_hi) {
-- case 0x20:
-- case 0x30:
-- /* Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes. */
-- scan_more = ((instr_lo & 7) == 0x6);
-- break;
--
-- case 0x60:
-- /* 0x64 thru 0x67 are valid prefixes in all modes. */
-- scan_more = (instr_lo & 0xC) == 0x4;
-- break;
-- case 0xF0:
-- /* 0xF0, 0xF2, and 0xF3 are valid prefixes */
-- scan_more = !instr_lo || (instr_lo>>1) == 1;
-- break;
-- case 0x00:
-- /* Prefetch instruction is 0x0F0D or 0x0F18 */
-- scan_more = 0;
-- if (instr > (unsigned char *)limit)
-- break;
-- if (probe_kernel_address(instr, opcode))
-- break;
-- prefetch = (instr_lo == 0xF) &&
-- (opcode == 0x0D || opcode == 0x18);
-- break;
-- default:
-- scan_more = 0;
-- break;
-- }
-- }
-- return prefetch;
--}
--
--static inline int is_prefetch(struct pt_regs *regs, unsigned long addr,
-- unsigned long error_code)
--{
-- if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
-- boot_cpu_data.x86 >= 6)) {
-- /* Catch an obscure case of prefetch inside an NX page. */
-- if (nx_enabled && (error_code & 16))
-- return 0;
-- return __is_prefetch(regs, addr);
-- }
-- return 0;
--}
--
--static noinline void force_sig_info_fault(int si_signo, int si_code,
-- unsigned long address, struct task_struct *tsk)
--{
-- siginfo_t info;
--
-- info.si_signo = si_signo;
-- info.si_errno = 0;
-- info.si_code = si_code;
-- info.si_addr = (void __user *)address;
-- force_sig_info(si_signo, &info, tsk);
--}
--
--fastcall void do_invalid_op(struct pt_regs *, unsigned long);
--
--#ifdef CONFIG_X86_PAE
--static void dump_fault_path(unsigned long address)
--{
-- unsigned long *p, page;
-- unsigned long mfn;
--
-- page = read_cr3();
-- p = (unsigned long *)__va(page);
-- p += (address >> 30) * 2;
-- printk(KERN_ALERT "%08lx -> *pde = %08lx:%08lx\n", page, p[1], p[0]);
-- if (p[0] & _PAGE_PRESENT) {
-- mfn = (p[0] >> PAGE_SHIFT) | (p[1] << 20);
-- page = mfn_to_pfn(mfn) << PAGE_SHIFT;
-- p = (unsigned long *)__va(page);
-- address &= 0x3fffffff;
-- p += (address >> 21) * 2;
-- printk(KERN_ALERT "%08lx -> *pme = %08lx:%08lx\n",
-- page, p[1], p[0]);
-- mfn = (p[0] >> PAGE_SHIFT) | (p[1] << 20);
--#ifdef CONFIG_HIGHPTE
-- if (mfn_to_pfn(mfn) >= highstart_pfn)
-- return;
--#endif
-- if ((p[0] & _PAGE_PRESENT) && !(p[0] & _PAGE_PSE)) {
-- page = mfn_to_pfn(mfn) << PAGE_SHIFT;
-- p = (unsigned long *) __va(page);
-- address &= 0x001fffff;
-- p += (address >> 12) * 2;
-- printk(KERN_ALERT "%08lx -> *pte = %08lx:%08lx\n",
-- page, p[1], p[0]);
-- }
-- }
--}
--#else
--static void dump_fault_path(unsigned long address)
--{
-- unsigned long page;
--
-- page = read_cr3();
-- page = ((unsigned long *) __va(page))[address >> PGDIR_SHIFT];
-- printk(KERN_ALERT "*pde = ma %08lx pa %08lx\n", page,
-- machine_to_phys(page));
-- /*
-- * We must not directly access the pte in the highpte
-- * case if the page table is located in highmem.
-- * And lets rather not kmap-atomic the pte, just in case
-- * it's allocated already.
-- */
-- if ((machine_to_phys(page) >> PAGE_SHIFT) < max_low_pfn
-- && (page & _PAGE_PRESENT)
-- && !(page & _PAGE_PSE)) {
-- page = machine_to_phys(page & PAGE_MASK);
-- page = ((unsigned long *) __va(page))[(address >> PAGE_SHIFT)
-- & (PTRS_PER_PTE - 1)];
-- printk(KERN_ALERT "*pte = ma %08lx pa %08lx\n", page,
-- machine_to_phys(page));
-- }
--}
--#endif
--
--static int spurious_fault(struct pt_regs *regs,
-- unsigned long address,
-- unsigned long error_code)
--{
-- pgd_t *pgd;
-- pud_t *pud;
-- pmd_t *pmd;
-- pte_t *pte;
--
-- /* Reserved-bit violation or user access to kernel space? */
-- if (error_code & 0x0c)
-- return 0;
--
-- pgd = init_mm.pgd + pgd_index(address);
-- if (!pgd_present(*pgd))
-- return 0;
--
-- pud = pud_offset(pgd, address);
-- if (!pud_present(*pud))
-- return 0;
--
-- pmd = pmd_offset(pud, address);
-- if (!pmd_present(*pmd))
-- return 0;
--
-- pte = pte_offset_kernel(pmd, address);
-- if (!pte_present(*pte))
-- return 0;
-- if ((error_code & 0x02) && !pte_write(*pte))
-- return 0;
--#ifdef CONFIG_X86_PAE
-- if ((error_code & 0x10) && (__pte_val(*pte) & _PAGE_NX))
-- return 0;
--#endif
--
-- return 1;
--}
--
--static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
--{
-- unsigned index = pgd_index(address);
-- pgd_t *pgd_k;
-- pud_t *pud, *pud_k;
-- pmd_t *pmd, *pmd_k;
--
-- pgd += index;
-- pgd_k = init_mm.pgd + index;
--
-- if (!pgd_present(*pgd_k))
-- return NULL;
--
-- /*
-- * set_pgd(pgd, *pgd_k); here would be useless on PAE
-- * and redundant with the set_pmd() on non-PAE. As would
-- * set_pud.
-- */
--
-- pud = pud_offset(pgd, address);
-- pud_k = pud_offset(pgd_k, address);
-- if (!pud_present(*pud_k))
-- return NULL;
--
-- pmd = pmd_offset(pud, address);
-- pmd_k = pmd_offset(pud_k, address);
-- if (!pmd_present(*pmd_k))
-- return NULL;
-- if (!pmd_present(*pmd)) {
-- bool lazy = x86_read_percpu(xen_lazy_mmu);
--
-- x86_write_percpu(xen_lazy_mmu, false);
--#if CONFIG_XEN_COMPAT > 0x030002
-- set_pmd(pmd, *pmd_k);
--#else
-- /*
-- * When running on older Xen we must launder *pmd_k through
-- * pmd_val() to ensure that _PAGE_PRESENT is correctly set.
-- */
-- set_pmd(pmd, __pmd(pmd_val(*pmd_k)));
--#endif
-- x86_write_percpu(xen_lazy_mmu, lazy);
-- } else
-- BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
-- return pmd_k;
--}
--
--/*
-- * Handle a fault on the vmalloc or module mapping area
-- *
-- * This assumes no large pages in there.
-- */
--static inline int vmalloc_fault(unsigned long address)
--{
-- unsigned long pgd_paddr;
-- pmd_t *pmd_k;
-- pte_t *pte_k;
-- /*
-- * Synchronize this task's top level page-table
-- * with the 'reference' page table.
-- *
-- * Do _not_ use "current" here. We might be inside
-- * an interrupt in the middle of a task switch..
-- */
-- pgd_paddr = read_cr3();
-- pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
-- if (!pmd_k)
-- return -1;
-- pte_k = pte_offset_kernel(pmd_k, address);
-- if (!pte_present(*pte_k))
-- return -1;
-- return 0;
--}
--
--int show_unhandled_signals = 1;
--
--/*
-- * This routine handles page faults. It determines the address,
-- * and the problem, and then passes it off to one of the appropriate
-- * routines.
-- *
-- * error_code:
-- * bit 0 == 0 means no page found, 1 means protection fault
-- * bit 1 == 0 means read, 1 means write
-- * bit 2 == 0 means kernel, 1 means user-mode
-- * bit 3 == 1 means use of reserved bit detected
-- * bit 4 == 1 means fault was an instruction fetch
-- */
--fastcall void __kprobes do_page_fault(struct pt_regs *regs,
-- unsigned long error_code)
--{
-- struct task_struct *tsk;
-- struct mm_struct *mm;
-- struct vm_area_struct * vma;
-- unsigned long address;
-- int write, si_code;
-- int fault;
--
-- /*
-- * We can fault from pretty much anywhere, with unknown IRQ state.
-- */
-- trace_hardirqs_fixup();
--
-- /* get the address */
-- address = read_cr2();
--
-- /* Set the "privileged fault" bit to something sane. */
-- error_code &= ~4;
-- error_code |= (regs->xcs & 2) << 1;
-- if (regs->eflags & X86_EFLAGS_VM)
-- error_code |= 4;
--
-- tsk = current;
--
-- si_code = SEGV_MAPERR;
--
-- /*
-- * We fault-in kernel-space virtual memory on-demand. The
-- * 'reference' page table is init_mm.pgd.
-- *
-- * NOTE! We MUST NOT take any locks for this case. We may
-- * be in an interrupt or a critical region, and should
-- * only copy the information from the master page table,
-- * nothing more.
-- *
-- * This verifies that the fault happens in kernel space
-- * (error_code & 4) == 0, and that the fault was not a
-- * protection error (error_code & 9) == 0.
-- */
-- if (unlikely(address >= TASK_SIZE)) {
--#ifdef CONFIG_XEN
-- /* Faults in hypervisor area can never be patched up. */
-- if (address >= hypervisor_virt_start)
-- goto bad_area_nosemaphore;
--#endif
-- if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0)
-- return;
-- /* Can take a spurious fault if mapping changes R/O -> R/W. */
-- if (spurious_fault(regs, address, error_code))
-- return;
-- if (notify_page_fault(regs))
-- return;
-- /*
-- * Don't take the mm semaphore here. If we fixup a prefetch
-- * fault we could otherwise deadlock.
-- */
-- goto bad_area_nosemaphore;
-- }
--
-- if (notify_page_fault(regs))
-- return;
--
-- /* It's safe to allow irq's after cr2 has been saved and the vmalloc
-- fault has been handled. */
-- if (regs->eflags & (X86_EFLAGS_IF|VM_MASK))
-- local_irq_enable();
--
-- mm = tsk->mm;
--
-- /*
-- * If we're in an interrupt, have no user context or are running in an
-- * atomic region then we must not take the fault..
-- */
-- if (in_atomic() || !mm)
-- goto bad_area_nosemaphore;
--
-- /* When running in the kernel we expect faults to occur only to
-- * addresses in user space. All other faults represent errors in the
-- * kernel and should generate an OOPS. Unfortunately, in the case of an
-- * erroneous fault occurring in a code path which already holds mmap_sem
-- * we will deadlock attempting to validate the fault against the
-- * address space. Luckily the kernel only validly references user
-- * space from well defined areas of code, which are listed in the
-- * exceptions table.
-- *
-- * As the vast majority of faults will be valid we will only perform
-- * the source reference check when there is a possibility of a deadlock.
-- * Attempt to lock the address space, if we cannot we then validate the
-- * source. If this is invalid we can skip the address space check,
-- * thus avoiding the deadlock.
-- */
-- if (!down_read_trylock(&mm->mmap_sem)) {
-- if ((error_code & 4) == 0 &&
-- !search_exception_tables(regs->eip))
-- goto bad_area_nosemaphore;
-- down_read(&mm->mmap_sem);
-- }
--
-- vma = find_vma(mm, address);
-- if (!vma)
-- goto bad_area;
-- if (vma->vm_start <= address)
-- goto good_area;
-- if (!(vma->vm_flags & VM_GROWSDOWN))
-- goto bad_area;
-- if (error_code & 4) {
-- /*
-- * Accessing the stack below %esp is always a bug.
-- * The large cushion allows instructions like enter
-- * and pusha to work. ("enter $65535,$31" pushes
-- * 32 pointers and then decrements %esp by 65535.)
-- */
-- if (address + 65536 + 32 * sizeof(unsigned long) < regs->esp)
-- goto bad_area;
-- }
-- if (expand_stack(vma, address))
-- goto bad_area;
--/*
-- * Ok, we have a good vm_area for this memory access, so
-- * we can handle it..
-- */
--good_area:
-- si_code = SEGV_ACCERR;
-- write = 0;
-- switch (error_code & 3) {
-- default: /* 3: write, present */
-- /* fall through */
-- case 2: /* write, not present */
-- if (!(vma->vm_flags & VM_WRITE))
-- goto bad_area;
-- write++;
-- break;
-- case 1: /* read, present */
-- goto bad_area;
-- case 0: /* read, not present */
-- if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
-- goto bad_area;
-- }
--
-- survive:
-- /*
-- * If for any reason at all we couldn't handle the fault,
-- * make sure we exit gracefully rather than endlessly redo
-- * the fault.
-- */
-- fault = handle_mm_fault(mm, vma, address, write);
-- if (unlikely(fault & VM_FAULT_ERROR)) {
-- if (fault & VM_FAULT_OOM)
-- goto out_of_memory;
-- else if (fault & VM_FAULT_SIGBUS)
-- goto do_sigbus;
-- BUG();
-- }
-- if (fault & VM_FAULT_MAJOR)
-- tsk->maj_flt++;
-- else
-- tsk->min_flt++;
--
-- /*
-- * Did it hit the DOS screen memory VA from vm86 mode?
-- */
-- if (regs->eflags & VM_MASK) {
-- unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT;
-- if (bit < 32)
-- tsk->thread.screen_bitmap |= 1 << bit;
-- }
-- up_read(&mm->mmap_sem);
-- return;
--
--/*
-- * Something tried to access memory that isn't in our memory map..
-- * Fix it, but check if it's kernel or user first..
-- */
--bad_area:
-- up_read(&mm->mmap_sem);
--
--bad_area_nosemaphore:
-- /* User mode accesses just cause a SIGSEGV */
-- if (error_code & 4) {
-- /*
-- * It's possible to have interrupts off here.
-- */
-- local_irq_enable();
--
-- /*
-- * Valid to do another page fault here because this one came
-- * from user space.
-- */
-- if (is_prefetch(regs, address, error_code))
-- return;
--
-- if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
-- printk_ratelimit()) {
-- printk("%s%s[%d]: segfault at %08lx eip %08lx "
-- "esp %08lx error %lx\n",
-- task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
-- tsk->comm, task_pid_nr(tsk), address, regs->eip,
-- regs->esp, error_code);
-- }
-- tsk->thread.cr2 = address;
-- /* Kernel addresses are always protection faults */
-- tsk->thread.error_code = error_code | (address >= TASK_SIZE);
-- tsk->thread.trap_no = 14;
-- force_sig_info_fault(SIGSEGV, si_code, address, tsk);
-- return;
-- }
--
--#ifdef CONFIG_X86_F00F_BUG
-- /*
-- * Pentium F0 0F C7 C8 bug workaround.
-- */
-- if (boot_cpu_data.f00f_bug) {
-- unsigned long nr;
--
-- nr = (address - idt_descr.address) >> 3;
--
-- if (nr == 6) {
-- do_invalid_op(regs, 0);
-- return;
-- }
-- }
--#endif
--
--no_context:
-- /* Are we prepared to handle this kernel fault? */
-- if (fixup_exception(regs))
-- return;
--
-- /*
-- * Valid to do another page fault here, because if this fault
-- * had been triggered by is_prefetch fixup_exception would have
-- * handled it.
-- */
-- if (is_prefetch(regs, address, error_code))
-- return;
--
--/*
-- * Oops. The kernel tried to access some bad page. We'll have to
-- * terminate things with extreme prejudice.
-- */
--
-- bust_spinlocks(1);
--
-- if (oops_may_print()) {
--#ifdef CONFIG_X86_PAE
-- if (error_code & 16) {
-- pte_t *pte = lookup_address(address);
--
-- if (pte && pte_present(*pte) && !pte_exec_kernel(*pte))
-- printk(KERN_CRIT "kernel tried to execute "
-- "NX-protected page - exploit attempt? "
-- "(uid: %d)\n", current->uid);
-- }
--#endif
-- if (address < PAGE_SIZE)
-- printk(KERN_ALERT "BUG: unable to handle kernel NULL "
-- "pointer dereference");
-- else
-- printk(KERN_ALERT "BUG: unable to handle kernel paging"
-- " request");
-- printk(" at virtual address %08lx\n",address);
-- printk(KERN_ALERT "printing eip: %08lx\n", regs->eip);
-- dump_fault_path(address);
-- }
-- tsk->thread.cr2 = address;
-- tsk->thread.trap_no = 14;
-- tsk->thread.error_code = error_code;
-- die("Oops", regs, error_code);
-- bust_spinlocks(0);
-- do_exit(SIGKILL);
--
--/*
-- * We ran out of memory, or some other thing happened to us that made
-- * us unable to handle the page fault gracefully.
-- */
--out_of_memory:
-- up_read(&mm->mmap_sem);
-- if (is_global_init(tsk)) {
-- yield();
-- down_read(&mm->mmap_sem);
-- goto survive;
-- }
-- printk("VM: killing process %s\n", tsk->comm);
-- if (error_code & 4)
-- do_group_exit(SIGKILL);
-- goto no_context;
--
--do_sigbus:
-- up_read(&mm->mmap_sem);
--
-- /* Kernel mode? Handle exceptions or die */
-- if (!(error_code & 4))
-- goto no_context;
--
-- /* User space => ok to do another page fault */
-- if (is_prefetch(regs, address, error_code))
-- return;
--
-- tsk->thread.cr2 = address;
-- tsk->thread.error_code = error_code;
-- tsk->thread.trap_no = 14;
-- force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
--}
--
--void vmalloc_sync_all(void)
--{
-- /*
-- * Note that races in the updates of insync and start aren't
-- * problematic: insync can only get set bits added, and updates to
-- * start are only improving performance (without affecting correctness
-- * if undone).
-- * XEN: To work on PAE, we need to iterate over PMDs rather than PGDs.
-- * This change works just fine with 2-level paging too.
-- */
--#define sync_index(a) ((a) >> PMD_SHIFT)
-- static DECLARE_BITMAP(insync, PTRS_PER_PGD*PTRS_PER_PMD);
-- static unsigned long start = TASK_SIZE;
-- unsigned long address;
--
-- if (SHARED_KERNEL_PMD)
-- return;
--
-- BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
-- for (address = start;
-- address >= TASK_SIZE && address < hypervisor_virt_start;
-- address += 1UL << PMD_SHIFT) {
-- if (!test_bit(sync_index(address), insync)) {
-- unsigned long flags;
-- struct page *page;
--
-- spin_lock_irqsave(&pgd_lock, flags);
-- /* XEN: failure path assumes non-empty pgd_list. */
-- if (unlikely(!pgd_list)) {
-- spin_unlock_irqrestore(&pgd_lock, flags);
-- return;
-- }
-- for (page = pgd_list; page; page =
-- (struct page *)page->index) {
-- spinlock_t *lock = page->mapping
-- ? &((struct mm_struct *)page->mapping)
-- ->page_table_lock
-- : NULL;
-- pmd_t *pmd;
--
-- if (lock)
-- spin_lock(lock);
-- pmd = vmalloc_sync_one(page_address(page),
-- address);
-- if (lock)
-- spin_unlock(lock);
--
-- if (!pmd) {
-- BUG_ON(page != pgd_list);
-- break;
-- }
-- }
-- spin_unlock_irqrestore(&pgd_lock, flags);
-- if (!page)
-- set_bit(sync_index(address), insync);
-- }
-- if (address == start && test_bit(sync_index(address), insync))
-- start = address + (1UL << PMD_SHIFT);
-- }
--}
---- head.orig/arch/x86/mm/fault_64-xen.c 2011-07-26 09:28:01.000000000 +0200
-+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
-@@ -1,693 +0,0 @@
--/*
-- * linux/arch/x86-64/mm/fault.c
-- *
-- * Copyright (C) 1995 Linus Torvalds
-- * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs.
-- */
--
--#include <linux/signal.h>
--#include <linux/sched.h>
--#include <linux/kernel.h>
--#include <linux/errno.h>
--#include <linux/string.h>
--#include <linux/types.h>
--#include <linux/ptrace.h>
--#include <linux/mman.h>
--#include <linux/mm.h>
--#include <linux/smp.h>
--#include <linux/interrupt.h>
--#include <linux/init.h>
--#include <linux/tty.h>
--#include <linux/vt_kern.h> /* For unblank_screen() */
--#include <linux/compiler.h>
--#include <linux/vmalloc.h>
--#include <linux/module.h>
--#include <linux/kprobes.h>
--#include <linux/uaccess.h>
--#include <linux/kdebug.h>
--#include <linux/kprobes.h>
--
--#include <asm/system.h>
--#include <asm/pgalloc.h>
--#include <asm/smp.h>
--#include <asm/tlbflush.h>
--#include <asm/proto.h>
--#include <asm-generic/sections.h>
--
--/* Page fault error code bits */
--#define PF_PROT (1<<0) /* or no page found */
--#define PF_WRITE (1<<1)
--#define PF_USER (1<<2)
--#define PF_RSVD (1<<3)
--#define PF_INSTR (1<<4)
--
--#ifdef CONFIG_KPROBES
--static inline int notify_page_fault(struct pt_regs *regs)
--{
-- int ret = 0;
--
-- /* kprobe_running() needs smp_processor_id() */
-- if (!user_mode(regs)) {
-- preempt_disable();
-- if (kprobe_running() && kprobe_fault_handler(regs, 14))
-- ret = 1;
-- preempt_enable();
-- }
--
-- return ret;
--}
--#else
--static inline int notify_page_fault(struct pt_regs *regs)
--{
-- return 0;
--}
--#endif
--
--/* Sometimes the CPU reports invalid exceptions on prefetch.
-- Check that here and ignore.
-- Opcode checker based on code by Richard Brunner */
--static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
-- unsigned long error_code)
--{
-- unsigned char *instr;
-- int scan_more = 1;
-- int prefetch = 0;
-- unsigned char *max_instr;
--
-- /* If it was a exec fault ignore */
-- if (error_code & PF_INSTR)
-- return 0;
--
-- instr = (unsigned char __user *)convert_rip_to_linear(current, regs);
-- max_instr = instr + 15;
--
-- if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
-- return 0;
--
-- while (scan_more && instr < max_instr) {
-- unsigned char opcode;
-- unsigned char instr_hi;
-- unsigned char instr_lo;
--
-- if (probe_kernel_address(instr, opcode))
-- break;
--
-- instr_hi = opcode & 0xf0;
-- instr_lo = opcode & 0x0f;
-- instr++;
--
-- switch (instr_hi) {
-- case 0x20:
-- case 0x30:
-- /* Values 0x26,0x2E,0x36,0x3E are valid x86
-- prefixes. In long mode, the CPU will signal
-- invalid opcode if some of these prefixes are
-- present so we will never get here anyway */
-- scan_more = ((instr_lo & 7) == 0x6);
-- break;
--
-- case 0x40:
-- /* In AMD64 long mode, 0x40 to 0x4F are valid REX prefixes
-- Need to figure out under what instruction mode the
-- instruction was issued ... */
-- /* Could check the LDT for lm, but for now it's good
-- enough to assume that long mode only uses well known
-- segments or kernel. */
-- scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS)
-- || (regs->cs == FLAT_USER_CS64);
-- break;
--
-- case 0x60:
-- /* 0x64 thru 0x67 are valid prefixes in all modes. */
-- scan_more = (instr_lo & 0xC) == 0x4;
-- break;
-- case 0xF0:
-- /* 0xF0, 0xF2, and 0xF3 are valid prefixes in all modes. */
-- scan_more = !instr_lo || (instr_lo>>1) == 1;
-- break;
-- case 0x00:
-- /* Prefetch instruction is 0x0F0D or 0x0F18 */
-- scan_more = 0;
-- if (probe_kernel_address(instr, opcode))
-- break;
-- prefetch = (instr_lo == 0xF) &&
-- (opcode == 0x0D || opcode == 0x18);
-- break;
-- default:
-- scan_more = 0;
-- break;
-- }
-- }
-- return prefetch;
--}
--
--static int bad_address(void *p)
--{
-- unsigned long dummy;
-- return probe_kernel_address((unsigned long *)p, dummy);
--}
--
--void dump_pagetable(unsigned long address)
--{
-- pgd_t *pgd;
-- pud_t *pud;
-- pmd_t *pmd;
-- pte_t *pte;
--
-- pgd = (pgd_t *)read_cr3();
--
-- pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
-- pgd += pgd_index(address);
-- if (bad_address(pgd)) goto bad;
-- printk("PGD %lx ", pgd_val(*pgd));
-- if (!pgd_present(*pgd)) goto ret;
--
-- pud = pud_offset(pgd, address);
-- if (bad_address(pud)) goto bad;
-- printk("PUD %lx ", pud_val(*pud));
-- if (!pud_present(*pud)) goto ret;
--
-- pmd = pmd_offset(pud, address);
-- if (bad_address(pmd)) goto bad;
-- printk("PMD %lx ", pmd_val(*pmd));
-- if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret;
--
-- pte = pte_offset_kernel(pmd, address);
-- if (bad_address(pte)) goto bad;
-- printk("PTE %lx", pte_val(*pte));
--ret:
-- printk("\n");
-- return;
--bad:
-- printk("BAD\n");
--}
--
--static const char errata93_warning[] =
--KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
--KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
--KERN_ERR "******* Please consider a BIOS update.\n"
--KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
--
--/* Workaround for K8 erratum #93 & buggy BIOS.
-- BIOS SMM functions are required to use a specific workaround
-- to avoid corruption of the 64bit RIP register on C stepping K8.
-- A lot of BIOS that didn't get tested properly miss this.
-- The OS sees this as a page fault with the upper 32bits of RIP cleared.
-- Try to work around it here.
-- Note we only handle faults in kernel here. */
--
--static int is_errata93(struct pt_regs *regs, unsigned long address)
--{
-- static int warned;
-- if (address != regs->rip)
-- return 0;
-- if ((address >> 32) != 0)
-- return 0;
-- address |= 0xffffffffUL << 32;
-- if ((address >= (u64)_stext && address <= (u64)_etext) ||
-- (address >= MODULES_VADDR && address <= MODULES_END)) {
-- if (!warned) {
-- printk(errata93_warning);
-- warned = 1;
-- }
-- regs->rip = address;
-- return 1;
-- }
-- return 0;
--}
--
--static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
-- unsigned long error_code)
--{
-- unsigned long flags = oops_begin();
-- struct task_struct *tsk;
--
-- printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
-- current->comm, address);
-- dump_pagetable(address);
-- tsk = current;
-- tsk->thread.cr2 = address;
-- tsk->thread.trap_no = 14;
-- tsk->thread.error_code = error_code;
-- __die("Bad pagetable", regs, error_code);
-- oops_end(flags);
-- do_exit(SIGKILL);
--}
--
--/*
-- * Handle a fault on the vmalloc area
-- *
-- * This assumes no large pages in there.
-- */
--static int vmalloc_fault(unsigned long address)
--{
-- pgd_t *pgd, *pgd_ref;
-- pud_t *pud, *pud_ref;
-- pmd_t *pmd, *pmd_ref;
-- pte_t *pte, *pte_ref;
--
-- /* Copy kernel mappings over when needed. This can also
-- happen within a race in page table update. In the later
-- case just flush. */
--
-- /* On Xen the line below does not always work. Needs investigating! */
-- /*pgd = pgd_offset(current->mm ?: &init_mm, address);*/
-- pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK);
-- pgd += pgd_index(address);
-- pgd_ref = pgd_offset_k(address);
-- if (pgd_none(*pgd_ref))
-- return -1;
-- if (pgd_none(*pgd))
-- set_pgd(pgd, *pgd_ref);
-- else
-- BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
--
-- /* Below here mismatches are bugs because these lower tables
-- are shared */
--
-- pud = pud_offset(pgd, address);
-- pud_ref = pud_offset(pgd_ref, address);
-- if (pud_none(*pud_ref))
-- return -1;
-- if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
-- BUG();
-- pmd = pmd_offset(pud, address);
-- pmd_ref = pmd_offset(pud_ref, address);
-- if (pmd_none(*pmd_ref))
-- return -1;
-- if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
-- BUG();
-- pte_ref = pte_offset_kernel(pmd_ref, address);
-- if (!pte_present(*pte_ref))
-- return -1;
-- pte = pte_offset_kernel(pmd, address);
-- /* Don't use pte_page here, because the mappings can point
-- outside mem_map, and the NUMA hash lookup cannot handle
-- that. */
-- if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
-- BUG();
-- return 0;
--}
--
--int show_unhandled_signals = 1;
--
--
--#define MEM_VERBOSE 1
--
--#ifdef MEM_VERBOSE
--#define MEM_LOG(_f, _a...) \
-- printk("fault.c:[%d]-> " _f "\n", \
-- __LINE__ , ## _a )
--#else
--#define MEM_LOG(_f, _a...) ((void)0)
--#endif
--
--static int spurious_fault(struct pt_regs *regs,
-- unsigned long address,
-- unsigned long error_code)
--{
-- pgd_t *pgd;
-- pud_t *pud;
-- pmd_t *pmd;
-- pte_t *pte;
--
--#ifdef CONFIG_XEN
-- /* Faults in hypervisor area are never spurious. */
-- if ((address >= HYPERVISOR_VIRT_START) &&
-- (address < HYPERVISOR_VIRT_END))
-- return 0;
--#endif
--
-- /* Reserved-bit violation or user access to kernel space? */
-- if (error_code & (PF_RSVD|PF_USER))
-- return 0;
--
-- pgd = init_mm.pgd + pgd_index(address);
-- if (!pgd_present(*pgd))
-- return 0;
--
-- pud = pud_offset(pgd, address);
-- if (!pud_present(*pud))
-- return 0;
--
-- pmd = pmd_offset(pud, address);
-- if (!pmd_present(*pmd))
-- return 0;
--
-- pte = pte_offset_kernel(pmd, address);
-- if (!pte_present(*pte))
-- return 0;
-- if ((error_code & PF_WRITE) && !pte_write(*pte))
-- return 0;
-- if ((error_code & PF_INSTR) && (__pte_val(*pte) & _PAGE_NX))
-- return 0;
--
-- return 1;
--}
--
--/*
-- * This routine handles page faults. It determines the address,
-- * and the problem, and then passes it off to one of the appropriate
-- * routines.
-- */
--asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
-- unsigned long error_code)
--{
-- struct task_struct *tsk;
-- struct mm_struct *mm;
-- struct vm_area_struct * vma;
-- unsigned long address;
-- const struct exception_table_entry *fixup;
-- int write, fault;
-- unsigned long flags;
-- siginfo_t info;
--
-- if (!user_mode(regs))
-- error_code &= ~PF_USER; /* means kernel */
--
-- /*
-- * We can fault from pretty much anywhere, with unknown IRQ state.
-- */
-- trace_hardirqs_fixup();
--
-- tsk = current;
-- mm = tsk->mm;
-- prefetchw(&mm->mmap_sem);
--
-- /* get the address */
-- address = read_cr2();
--
-- info.si_code = SEGV_MAPERR;
--
--
-- /*
-- * We fault-in kernel-space virtual memory on-demand. The
-- * 'reference' page table is init_mm.pgd.
-- *
-- * NOTE! We MUST NOT take any locks for this case. We may
-- * be in an interrupt or a critical region, and should
-- * only copy the information from the master page table,
-- * nothing more.
-- *
-- * This verifies that the fault happens in kernel space
-- * (error_code & 4) == 0, and that the fault was not a
-- * protection error (error_code & 9) == 0.
-- */
-- if (unlikely(address >= TASK_SIZE64)) {
-- /*
-- * Don't check for the module range here: its PML4
-- * is always initialized because it's shared with the main
-- * kernel text. Only vmalloc may need PML4 syncups.
-- */
-- if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
-- ((address >= VMALLOC_START && address < VMALLOC_END))) {
-- if (vmalloc_fault(address) >= 0)
-- return;
-- }
-- /* Can take a spurious fault if mapping changes R/O -> R/W. */
-- if (spurious_fault(regs, address, error_code))
-- return;
-- if (notify_page_fault(regs))
-- return;
-- /*
-- * Don't take the mm semaphore here. If we fixup a prefetch
-- * fault we could otherwise deadlock.
-- */
-- goto bad_area_nosemaphore;
-- }
--
-- if (notify_page_fault(regs))
-- return;
--
-- if (likely(regs->eflags & X86_EFLAGS_IF))
-- local_irq_enable();
--
-- if (unlikely(error_code & PF_RSVD))
-- pgtable_bad(address, regs, error_code);
--
-- /*
-- * If we're in an interrupt or have no user
-- * context, we must not take the fault..
-- */
-- if (unlikely(in_atomic() || !mm))
-- goto bad_area_nosemaphore;
--
-- /*
-- * User-mode registers count as a user access even for any
-- * potential system fault or CPU buglet.
-- */
-- if (user_mode_vm(regs))
-- error_code |= PF_USER;
--
-- again:
-- /* When running in the kernel we expect faults to occur only to
-- * addresses in user space. All other faults represent errors in the
-- * kernel and should generate an OOPS. Unfortunately, in the case of an
-- * erroneous fault occurring in a code path which already holds mmap_sem
-- * we will deadlock attempting to validate the fault against the
-- * address space. Luckily the kernel only validly references user
-- * space from well defined areas of code, which are listed in the
-- * exceptions table.
-- *
-- * As the vast majority of faults will be valid we will only perform
-- * the source reference check when there is a possibility of a deadlock.
-- * Attempt to lock the address space, if we cannot we then validate the
-- * source. If this is invalid we can skip the address space check,
-- * thus avoiding the deadlock.
-- */
-- if (!down_read_trylock(&mm->mmap_sem)) {
-- if ((error_code & PF_USER) == 0 &&
-- !search_exception_tables(regs->rip))
-- goto bad_area_nosemaphore;
-- down_read(&mm->mmap_sem);
-- }
--
-- vma = find_vma(mm, address);
-- if (!vma)
-- goto bad_area;
-- if (likely(vma->vm_start <= address))
-- goto good_area;
-- if (!(vma->vm_flags & VM_GROWSDOWN))
-- goto bad_area;
-- if (error_code & 4) {
-- /* Allow userspace just enough access below the stack pointer
-- * to let the 'enter' instruction work.
-- */
-- if (address + 65536 + 32 * sizeof(unsigned long) < regs->rsp)
-- goto bad_area;
-- }
-- if (expand_stack(vma, address))
-- goto bad_area;
--/*
-- * Ok, we have a good vm_area for this memory access, so
-- * we can handle it..
-- */
--good_area:
-- info.si_code = SEGV_ACCERR;
-- write = 0;
-- switch (error_code & (PF_PROT|PF_WRITE)) {
-- default: /* 3: write, present */
-- /* fall through */
-- case PF_WRITE: /* write, not present */
-- if (!(vma->vm_flags & VM_WRITE))
-- goto bad_area;
-- write++;
-- break;
-- case PF_PROT: /* read, present */
-- goto bad_area;
-- case 0: /* read, not present */
-- if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
-- goto bad_area;
-- }
--
-- /*
-- * If for any reason at all we couldn't handle the fault,
-- * make sure we exit gracefully rather than endlessly redo
-- * the fault.
-- */
-- fault = handle_mm_fault(mm, vma, address, write);
-- if (unlikely(fault & VM_FAULT_ERROR)) {
-- if (fault & VM_FAULT_OOM)
-- goto out_of_memory;
-- else if (fault & VM_FAULT_SIGBUS)
-- goto do_sigbus;
-- BUG();
-- }
-- if (fault & VM_FAULT_MAJOR)
-- tsk->maj_flt++;
-- else
-- tsk->min_flt++;
-- up_read(&mm->mmap_sem);
-- return;
--
--/*
-- * Something tried to access memory that isn't in our memory map..
-- * Fix it, but check if it's kernel or user first..
-- */
--bad_area:
-- up_read(&mm->mmap_sem);
--
--bad_area_nosemaphore:
-- /* User mode accesses just cause a SIGSEGV */
-- if (error_code & PF_USER) {
--
-- /*
-- * It's possible to have interrupts off here.
-- */
-- local_irq_enable();
--
-- if (is_prefetch(regs, address, error_code))
-- return;
--
-- /* Work around K8 erratum #100 K8 in compat mode
-- occasionally jumps to illegal addresses >4GB. We
-- catch this here in the page fault handler because
-- these addresses are not reachable. Just detect this
-- case and return. Any code segment in LDT is
-- compatibility mode. */
-- if ((regs->cs == __USER32_CS || regs->cs == FLAT_USER_CS32 ||
-- (regs->cs & (1<<2))) && (address >> 32))
-- return;
--
-- if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
-- printk_ratelimit()) {
-- printk(
-- "%s%s[%d]: segfault at %lx rip %lx rsp %lx error %lx\n",
-- tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
-- tsk->comm, tsk->pid, address, regs->rip,
-- regs->rsp, error_code);
-- }
--
-- tsk->thread.cr2 = address;
-- /* Kernel addresses are always protection faults */
-- tsk->thread.error_code = error_code | (address >= TASK_SIZE);
-- tsk->thread.trap_no = 14;
-- info.si_signo = SIGSEGV;
-- info.si_errno = 0;
-- /* info.si_code has been set above */
-- info.si_addr = (void __user *)address;
-- force_sig_info(SIGSEGV, &info, tsk);
-- return;
-- }
--
--no_context:
--
-- /* Are we prepared to handle this kernel fault? */
-- fixup = search_exception_tables(regs->rip);
-- if (fixup) {
-- regs->rip = fixup->fixup;
-- return;
-- }
--
-- /*
-- * Hall of shame of CPU/BIOS bugs.
-- */
--
-- if (is_prefetch(regs, address, error_code))
-- return;
--
-- if (is_errata93(regs, address))
-- return;
--
--/*
-- * Oops. The kernel tried to access some bad page. We'll have to
-- * terminate things with extreme prejudice.
-- */
--
-- flags = oops_begin();
--
-- if (address < PAGE_SIZE)
-- printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
-- else
-- printk(KERN_ALERT "Unable to handle kernel paging request");
-- printk(" at %016lx RIP: \n" KERN_ALERT,address);
-- printk_address(regs->rip);
-- dump_pagetable(address);
-- tsk->thread.cr2 = address;
-- tsk->thread.trap_no = 14;
-- tsk->thread.error_code = error_code;
-- __die("Oops", regs, error_code);
-- /* Executive summary in case the body of the oops scrolled away */
-- printk(KERN_EMERG "CR2: %016lx\n", address);
-- oops_end(flags);
-- do_exit(SIGKILL);
--
--/*
-- * We ran out of memory, or some other thing happened to us that made
-- * us unable to handle the page fault gracefully.
-- */
--out_of_memory:
-- up_read(&mm->mmap_sem);
-- if (is_global_init(current)) {
-- yield();
-- goto again;
-- }
-- printk("VM: killing process %s\n", tsk->comm);
-- if (error_code & 4)
-- do_group_exit(SIGKILL);
-- goto no_context;
--
--do_sigbus:
-- up_read(&mm->mmap_sem);
--
-- /* Kernel mode? Handle exceptions or die */
-- if (!(error_code & PF_USER))
-- goto no_context;
--
-- tsk->thread.cr2 = address;
-- tsk->thread.error_code = error_code;
-- tsk->thread.trap_no = 14;
-- info.si_signo = SIGBUS;
-- info.si_errno = 0;
-- info.si_code = BUS_ADRERR;
-- info.si_addr = (void __user *)address;
-- force_sig_info(SIGBUS, &info, tsk);
-- return;
--}
--
--DEFINE_SPINLOCK(pgd_lock);
--LIST_HEAD(pgd_list);
--
--#define pgd_page_table(what, pg) \
-- spin_##what(&((struct mm_struct *)(pg)->private)->page_table_lock)
--
--void vmalloc_sync_all(void)
--{
-- /* Note that races in the updates of insync and start aren't
-- problematic:
-- insync can only get set bits added, and updates to start are only
-- improving performance (without affecting correctness if undone). */
-- static DECLARE_BITMAP(insync, PTRS_PER_PGD);
-- static unsigned long start = VMALLOC_START & PGDIR_MASK;
-- unsigned long address;
--
-- for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
-- if (!test_bit(pgd_index(address), insync)) {
-- const pgd_t *pgd_ref = pgd_offset_k(address);
-- struct page *page;
--
-- if (pgd_none(*pgd_ref))
-- continue;
-- spin_lock(&pgd_lock);
-- list_for_each_entry(page, &pgd_list, lru) {
-- pgd_t *pgd;
-- pgd = (pgd_t *)page_address(page) + pgd_index(address);
--
-- pgd_page_table(lock, page);
-- if (pgd_none(*pgd))
-- set_pgd(pgd, *pgd_ref);
-- else
-- BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
-- pgd_page_table(unlock, page);
-- }
-- spin_unlock(&pgd_lock);
-- set_bit(pgd_index(address), insync);
-- }
-- if (address == start)
-- start = address + PGDIR_SIZE;
-- }
-- /* Check that there is no need to do the same for the modules area. */
-- BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
-- BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
-- (__START_KERNEL & PGDIR_MASK)));
--}
+ }
--- head.orig/arch/x86/mm/highmem_32-xen.c 2011-01-31 17:49:31.000000000 +0100
+++ head/arch/x86/mm/highmem_32-xen.c 2011-01-31 18:01:51.000000000 +0100
@@ -18,6 +18,49 @@ void kunmap(struct page *page)
@@ -15810,7 +14443,7 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches
vdso32.so-$(CONFIG_XEN) += $(xen-vdso32-y)
vdso32-images = $(vdso32.so-y:%=vdso32-%.so)
---- head.orig/arch/x86/vdso/vdso32/syscall.S 2013-06-20 14:56:42.000000000 +0200
+--- head.orig/arch/x86/vdso/vdso32/syscall.S 2013-07-02 09:20:53.000000000 +0200
+++ head/arch/x86/vdso/vdso32/syscall.S 2011-01-31 18:01:51.000000000 +0100
@@ -19,8 +19,10 @@ __kernel_vsyscall:
.Lpush_ebp:
@@ -15823,7 +14456,7 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches
movl %ebp, %ecx
popl %ebp
.Lpop_ebp:
---- head.orig/arch/x86/vdso/vdso32.S 2013-06-20 14:56:42.000000000 +0200
+--- head.orig/arch/x86/vdso/vdso32.S 2013-07-02 09:20:53.000000000 +0200
+++ head/arch/x86/vdso/vdso32.S 2011-01-31 18:01:51.000000000 +0100
@@ -19,4 +19,16 @@ vdso32_sysenter_start:
.incbin "arch/x86/vdso/vdso32-sysenter.so"
@@ -17513,8 +16146,8 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches
+ spinning->ticket == token) {
+#if CONFIG_XEN_SPINLOCK_ACQUIRE_NESTING
+ token = spinning->irq_count
-+ < per_cpu(_irq_count, cpu)
-+ ? ticket_drop(spinning, token, cpu) : -2;
++ < per_cpu(_irq_count, cpu)
++ ? ticket_drop(spinning, token, cpu) : -2;
+#endif
+ break;
+ }
@@ -17714,7 +16347,7 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches
.resume = xenoprof_resume,
.suspend = xenoprof_suspend
};
---- head.orig/arch/x86/include/uapi/asm/e820.h 2013-06-20 14:56:42.000000000 +0200
+--- head.orig/arch/x86/include/uapi/asm/e820.h 2013-07-02 09:20:53.000000000 +0200
+++ head/arch/x86/include/uapi/asm/e820.h 2013-01-08 11:47:19.000000000 +0100
@@ -60,7 +60,11 @@ struct e820map {
struct e820entry map[E820_X_MAX];
@@ -17728,7 +16361,7 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches
#define ISA_END_ADDRESS 0x100000
#define BIOS_BEGIN 0x000a0000
---- head.orig/arch/x86/include/asm/hardirq.h 2013-06-20 14:56:42.000000000 +0200
+--- head.orig/arch/x86/include/asm/hardirq.h 2013-07-02 09:20:53.000000000 +0200
+++ head/arch/x86/include/asm/hardirq.h 2013-05-23 17:11:22.000000000 +0200
@@ -21,11 +21,15 @@ typedef struct {
#ifdef CONFIG_SMP
@@ -25184,7 +23817,7 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches
void leave_mm(int cpu);
#else
static inline void leave_mm(int cpu)
---- head.orig/arch/x86/include/asm/ptrace.h 2013-06-20 14:56:42.000000000 +0200
+--- head.orig/arch/x86/include/asm/ptrace.h 2013-07-02 09:20:53.000000000 +0200
+++ head/arch/x86/include/asm/ptrace.h 2013-01-08 11:47:39.000000000 +0100
@@ -224,7 +224,9 @@ static inline unsigned long regs_get_ker
}
diff --git a/patches.xen/xen3-patch-2.6.26 b/patches.xen/xen3-patch-2.6.26
index 1658d1844d..e0453e121a 100644
--- a/patches.xen/xen3-patch-2.6.26
+++ b/patches.xen/xen3-patch-2.6.26
@@ -9,7 +9,7 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
3.1/init/Kconfig (done differently)
--- head.orig/arch/x86/Kconfig 2013-05-23 17:11:13.000000000 +0200
-+++ head/arch/x86/Kconfig 2013-05-23 17:17:20.000000000 +0200
++++ head/arch/x86/Kconfig 2013-07-02 09:36:56.000000000 +0200
@@ -53,7 +53,7 @@ config X86
select HAVE_SYSCALL_TRACEPOINTS
select SYSCTL_EXCEPTION_TRACE
@@ -36,7 +36,7 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
---help---
Allow the kernel linear mapping to use 1GB pages on CPUs that
support it. This can improve the kernel's performance a tiny bit by
-@@ -2422,6 +2423,4 @@ source "crypto/Kconfig"
+@@ -2423,6 +2424,4 @@ source "crypto/Kconfig"
source "arch/x86/kvm/Kconfig"
@@ -689,7 +689,7 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
}
void mtrr_ap_init(void)
---- head.orig/arch/x86/kernel/cpu/bugs.c 2013-06-20 14:56:42.000000000 +0200
+--- head.orig/arch/x86/kernel/cpu/bugs.c 2013-07-02 09:20:53.000000000 +0200
+++ head/arch/x86/kernel/cpu/bugs.c 2013-05-23 17:17:14.000000000 +0200
@@ -75,10 +75,12 @@ static void __init check_fpu(void)
@@ -704,7 +704,7 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
}
void __init check_bugs(void)
---- head.orig/arch/x86/kernel/cpu/proc.c 2013-06-20 14:56:42.000000000 +0200
+--- head.orig/arch/x86/kernel/cpu/proc.c 2013-07-02 09:20:53.000000000 +0200
+++ head/arch/x86/kernel/cpu/proc.c 2013-05-23 17:16:30.000000000 +0200
@@ -10,7 +10,7 @@
static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
@@ -1797,7 +1797,7 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
return error;
}
---- head.orig/arch/x86/kernel/mmconf-fam10h_64.c 2013-06-20 14:56:42.000000000 +0200
+--- head.orig/arch/x86/kernel/mmconf-fam10h_64.c 2013-07-02 09:20:53.000000000 +0200
+++ head/arch/x86/kernel/mmconf-fam10h_64.c 2011-01-31 18:07:35.000000000 +0100
@@ -205,12 +205,20 @@ void __cpuinit fam10h_check_enable_mmcfg
return;
@@ -5955,1020 +5955,39 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
- .stop = c_stop,
- .show = show_cpuinfo,
-};
---- /dev/null 1970-01-01 00:00:00.000000000 +0000
+--- head.orig/arch/x86/kernel/smp-xen.c 2011-01-31 18:01:51.000000000 +0100
+++ head/arch/x86/kernel/smp-xen.c 2011-01-31 18:07:35.000000000 +0100
-@@ -0,0 +1,327 @@
-+/*
-+ * Intel SMP support routines.
-+ *
-+ * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
-+ * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
-+ * (c) 2002,2003 Andi Kleen, SuSE Labs.
-+ *
+@@ -5,6 +5,8 @@
+ * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
+ * (c) 2002,2003 Andi Kleen, SuSE Labs.
+ *
+ * i386 and x86_64 integration by Glauber Costa <gcosta@redhat.com>
+ *
-+ * This code is released under the GNU General Public License version 2 or
-+ * later.
-+ */
-+
-+#include <linux/init.h>
-+
-+#include <linux/mm.h>
-+#include <linux/delay.h>
-+#include <linux/spinlock.h>
-+#include <linux/kernel_stat.h>
-+#include <linux/mc146818rtc.h>
+ * This code is released under the GNU General Public License version 2 or
+ * later.
+ */
+@@ -14,289 +16,108 @@
+ #include <linux/mm.h>
+ #include <linux/delay.h>
+ #include <linux/spinlock.h>
+-#include <linux/smp.h>
+ #include <linux/kernel_stat.h>
+ #include <linux/mc146818rtc.h>
+#include <linux/cache.h>
-+#include <linux/interrupt.h>
+ #include <linux/interrupt.h>
+#include <linux/cpu.h>
-+
-+#include <asm/mtrr.h>
-+#include <asm/tlbflush.h>
-+#include <asm/mmu_context.h>
-+#include <asm/proto.h>
-+#include <mach_ipi.h>
-+#include <xen/evtchn.h>
-+/*
-+ * Some notes on x86 processor bugs affecting SMP operation:
-+ *
-+ * Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
-+ * The Linux implications for SMP are handled as follows:
-+ *
-+ * Pentium III / [Xeon]
-+ * None of the E1AP-E3AP errata are visible to the user.
-+ *
-+ * E1AP. see PII A1AP
-+ * E2AP. see PII A2AP
-+ * E3AP. see PII A3AP
-+ *
-+ * Pentium II / [Xeon]
-+ * None of the A1AP-A3AP errata are visible to the user.
-+ *
-+ * A1AP. see PPro 1AP
-+ * A2AP. see PPro 2AP
-+ * A3AP. see PPro 7AP
-+ *
-+ * Pentium Pro
-+ * None of 1AP-9AP errata are visible to the normal user,
-+ * except occasional delivery of 'spurious interrupt' as trap #15.
-+ * This is very rare and a non-problem.
-+ *
-+ * 1AP. Linux maps APIC as non-cacheable
-+ * 2AP. worked around in hardware
-+ * 3AP. fixed in C0 and above steppings microcode update.
-+ * Linux does not use excessive STARTUP_IPIs.
-+ * 4AP. worked around in hardware
-+ * 5AP. symmetric IO mode (normal Linux operation) not affected.
-+ * 'noapic' mode has vector 0xf filled out properly.
-+ * 6AP. 'noapic' mode might be affected - fixed in later steppings
-+ * 7AP. We do not assume writes to the LVT deassering IRQs
-+ * 8AP. We do not enable low power mode (deep sleep) during MP bootup
-+ * 9AP. We do not use mixed mode
-+ *
-+ * Pentium
-+ * There is a marginal case where REP MOVS on 100MHz SMP
-+ * machines with B stepping processors can fail. XXX should provide
-+ * an L1cache=Writethrough or L1cache=off option.
-+ *
-+ * B stepping CPUs may hang. There are hardware work arounds
-+ * for this. We warn about it in case your board doesn't have the work
-+ * arounds. Basically that's so I can tell anyone with a B stepping
-+ * CPU and SMP problems "tough".
-+ *
-+ * Specific items [From Pentium Processor Specification Update]
-+ *
-+ * 1AP. Linux doesn't use remote read
-+ * 2AP. Linux doesn't trust APIC errors
-+ * 3AP. We work around this
-+ * 4AP. Linux never generated 3 interrupts of the same priority
-+ * to cause a lost local interrupt.
-+ * 5AP. Remote read is never used
-+ * 6AP. not affected - worked around in hardware
-+ * 7AP. not affected - worked around in hardware
-+ * 8AP. worked around in hardware - we get explicit CS errors if not
-+ * 9AP. only 'noapic' mode affected. Might generate spurious
-+ * interrupts, we log only the first one and count the
-+ * rest silently.
-+ * 10AP. not affected - worked around in hardware
-+ * 11AP. Linux reads the APIC between writes to avoid this, as per
-+ * the documentation. Make sure you preserve this as it affects
-+ * the C stepping chips too.
-+ * 12AP. not affected - worked around in hardware
-+ * 13AP. not affected - worked around in hardware
-+ * 14AP. we always deassert INIT during bootup
-+ * 15AP. not affected - worked around in hardware
-+ * 16AP. not affected - worked around in hardware
-+ * 17AP. not affected - worked around in hardware
-+ * 18AP. not affected - worked around in hardware
-+ * 19AP. not affected - worked around in BIOS
-+ *
-+ * If this sounds worrying believe me these bugs are either ___RARE___,
-+ * or are signal timing bugs worked around in hardware and there's
-+ * about nothing of note with C stepping upwards.
-+ */
-+
-+/*
-+ * this function sends a 'reschedule' IPI to another CPU.
-+ * it goes straight through and wastes no time serializing
-+ * anything. Worst case is that we lose a reschedule ...
-+ */
-+void xen_smp_send_reschedule(int cpu)
-+{
-+ if (unlikely(cpu_is_offline(cpu))) {
-+ WARN_ON(1);
-+ return;
-+ }
-+ send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
-+}
-+
-+/*
-+ * Structure and data for smp_call_function(). This is designed to minimise
-+ * static memory requirements. It also looks cleaner.
-+ */
-+static DEFINE_SPINLOCK(call_lock);
-+
-+struct call_data_struct {
-+ void (*func) (void *info);
-+ void *info;
-+ atomic_t started;
-+ atomic_t finished;
-+ int wait;
-+};
-+
-+void lock_ipi_call_lock(void)
-+{
-+ spin_lock_irq(&call_lock);
-+}
-+
-+void unlock_ipi_call_lock(void)
-+{
-+ spin_unlock_irq(&call_lock);
-+}
-+
-+static struct call_data_struct *call_data;
-+
-+static void __smp_call_function(void (*func) (void *info), void *info,
-+ int nonatomic, int wait)
-+{
-+ struct call_data_struct data;
-+ int cpus = num_online_cpus() - 1;
-+
-+ if (!cpus)
-+ return;
-+
-+ data.func = func;
-+ data.info = info;
-+ atomic_set(&data.started, 0);
-+ data.wait = wait;
-+ if (wait)
-+ atomic_set(&data.finished, 0);
-+
-+ call_data = &data;
-+ mb();
-+
-+ /* Send a message to all other CPUs and wait for them to respond */
-+ send_IPI_allbutself(CALL_FUNCTION_VECTOR);
-+
-+ /* Wait for response */
-+ while (atomic_read(&data.started) != cpus)
-+ cpu_relax();
-+
-+ if (wait)
-+ while (atomic_read(&data.finished) != cpus)
-+ cpu_relax();
-+}
-+
-+
-+/**
-+ * smp_call_function_mask(): Run a function on a set of other CPUs.
-+ * @mask: The set of cpus to run on. Must not include the current cpu.
-+ * @func: The function to run. This must be fast and non-blocking.
-+ * @info: An arbitrary pointer to pass to the function.
-+ * @wait: If true, wait (atomically) until function has completed on other CPUs.
-+ *
-+ * Returns 0 on success, else a negative status code.
-+ *
-+ * If @wait is true, then returns once @func has returned; otherwise
-+ * it returns just before the target cpu calls @func.
-+ *
-+ * You must not call this function with disabled interrupts or from a
-+ * hardware interrupt handler or from a bottom half handler.
-+ */
-+int
-+xen_smp_call_function_mask(cpumask_t mask,
-+ void (*func)(void *), void *info,
-+ int wait)
-+{
-+ struct call_data_struct data;
-+ cpumask_t allbutself;
-+ int cpus;
-+
-+ /* Can deadlock when called with interrupts disabled */
-+ WARN_ON(irqs_disabled());
-+
-+ /* Holding any lock stops cpus from going down. */
-+ spin_lock(&call_lock);
-+
-+ allbutself = cpu_online_map;
-+ cpu_clear(smp_processor_id(), allbutself);
-+
-+ cpus_and(mask, mask, allbutself);
-+ cpus = cpus_weight(mask);
-+
-+ if (!cpus) {
-+ spin_unlock(&call_lock);
-+ return 0;
-+ }
-+
-+ data.func = func;
-+ data.info = info;
-+ atomic_set(&data.started, 0);
-+ data.wait = wait;
-+ if (wait)
-+ atomic_set(&data.finished, 0);
-+
-+ call_data = &data;
-+ wmb();
-+
-+ /* Send a message to other CPUs */
-+ if (cpus_equal(mask, allbutself) &&
-+ cpus_equal(cpu_online_map, cpu_callout_map))
-+ send_IPI_allbutself(CALL_FUNCTION_VECTOR);
-+ else
-+ send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
-+
-+ /* Wait for response */
-+ while (atomic_read(&data.started) != cpus)
-+ cpu_relax();
-+
-+ if (wait)
-+ while (atomic_read(&data.finished) != cpus)
-+ cpu_relax();
-+ spin_unlock(&call_lock);
-+
-+ return 0;
-+}
-+
-+static void stop_this_cpu(void *dummy)
-+{
-+ local_irq_disable();
-+ /*
-+ * Remove this CPU:
-+ */
-+ cpu_clear(smp_processor_id(), cpu_online_map);
-+ disable_all_local_evtchn();
-+ if (hlt_works(smp_processor_id()))
-+ for (;;) halt();
-+ for (;;);
-+}
-+
-+/*
-+ * this function calls the 'stop' function on all other CPUs in the system.
-+ */
-+
-+void xen_smp_send_stop(void)
-+{
-+ int nolock;
-+ unsigned long flags;
-+
-+ /* Don't deadlock on the call lock in panic */
-+ nolock = !spin_trylock(&call_lock);
-+ local_irq_save(flags);
-+ __smp_call_function(stop_this_cpu, NULL, 0, 0);
-+ if (!nolock)
-+ spin_unlock(&call_lock);
-+ disable_all_local_evtchn();
-+ local_irq_restore(flags);
-+}
-+
-+/*
-+ * Reschedule call back. Nothing to do,
-+ * all the work is done automatically when
-+ * we return from the interrupt.
-+ */
-+irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id)
-+{
-+#ifdef CONFIG_X86_32
-+ __get_cpu_var(irq_stat).irq_resched_count++;
-+#else
-+ add_pda(irq_resched_count, 1);
-+#endif
-+ return IRQ_HANDLED;
-+}
-+
-+irqreturn_t smp_call_function_interrupt(int irq, void *dev_id)
-+{
-+ void (*func) (void *info) = call_data->func;
-+ void *info = call_data->info;
-+ int wait = call_data->wait;
-+
-+ /*
-+ * Notify initiating CPU that I've grabbed the data and am
-+ * about to execute the function
-+ */
-+ mb();
-+ atomic_inc(&call_data->started);
-+ /*
-+ * At this point the info structure may be out of scope unless wait==1
-+ */
-+ (*func)(info);
-+#ifdef CONFIG_X86_32
-+ __get_cpu_var(irq_stat).irq_call_count++;
-+#else
-+ add_pda(irq_call_count, 1);
-+#endif
-+
-+ if (wait) {
-+ mb();
-+ atomic_inc(&call_data->finished);
-+ }
-+
-+ return IRQ_HANDLED;
-+}
---- head.orig/arch/x86/kernel/smp_32-xen.c 2011-01-31 18:01:51.000000000 +0100
-+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
-@@ -1,647 +0,0 @@
--/*
-- * Intel SMP support routines.
-- *
-- * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
-- * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
-- *
-- * This code is released under the GNU General Public License version 2 or
-- * later.
-- */
--
--#include <linux/init.h>
--
--#include <linux/mm.h>
--#include <linux/delay.h>
--#include <linux/spinlock.h>
--#include <linux/kernel_stat.h>
--#include <linux/mc146818rtc.h>
--#include <linux/cache.h>
--#include <linux/interrupt.h>
--#include <linux/cpu.h>
--#include <linux/module.h>
--
--#include <asm/mtrr.h>
--#include <asm/tlbflush.h>
--#include <asm/mmu_context.h>
--#if 0
--#include <mach_apic.h>
--#endif
--#include <xen/evtchn.h>
--
--/*
-- * Some notes on x86 processor bugs affecting SMP operation:
-- *
-- * Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
-- * The Linux implications for SMP are handled as follows:
-- *
-- * Pentium III / [Xeon]
-- * None of the E1AP-E3AP errata are visible to the user.
-- *
-- * E1AP. see PII A1AP
-- * E2AP. see PII A2AP
-- * E3AP. see PII A3AP
-- *
-- * Pentium II / [Xeon]
-- * None of the A1AP-A3AP errata are visible to the user.
-- *
-- * A1AP. see PPro 1AP
-- * A2AP. see PPro 2AP
-- * A3AP. see PPro 7AP
-- *
-- * Pentium Pro
-- * None of 1AP-9AP errata are visible to the normal user,
-- * except occasional delivery of 'spurious interrupt' as trap #15.
-- * This is very rare and a non-problem.
-- *
-- * 1AP. Linux maps APIC as non-cacheable
-- * 2AP. worked around in hardware
-- * 3AP. fixed in C0 and above steppings microcode update.
-- * Linux does not use excessive STARTUP_IPIs.
-- * 4AP. worked around in hardware
-- * 5AP. symmetric IO mode (normal Linux operation) not affected.
-- * 'noapic' mode has vector 0xf filled out properly.
-- * 6AP. 'noapic' mode might be affected - fixed in later steppings
-- * 7AP. We do not assume writes to the LVT deassering IRQs
-- * 8AP. We do not enable low power mode (deep sleep) during MP bootup
-- * 9AP. We do not use mixed mode
-- *
-- * Pentium
-- * There is a marginal case where REP MOVS on 100MHz SMP
-- * machines with B stepping processors can fail. XXX should provide
-- * an L1cache=Writethrough or L1cache=off option.
-- *
-- * B stepping CPUs may hang. There are hardware work arounds
-- * for this. We warn about it in case your board doesn't have the work
-- * arounds. Basically that's so I can tell anyone with a B stepping
-- * CPU and SMP problems "tough".
-- *
-- * Specific items [From Pentium Processor Specification Update]
-- *
-- * 1AP. Linux doesn't use remote read
-- * 2AP. Linux doesn't trust APIC errors
-- * 3AP. We work around this
-- * 4AP. Linux never generated 3 interrupts of the same priority
-- * to cause a lost local interrupt.
-- * 5AP. Remote read is never used
-- * 6AP. not affected - worked around in hardware
-- * 7AP. not affected - worked around in hardware
-- * 8AP. worked around in hardware - we get explicit CS errors if not
-- * 9AP. only 'noapic' mode affected. Might generate spurious
-- * interrupts, we log only the first one and count the
-- * rest silently.
-- * 10AP. not affected - worked around in hardware
-- * 11AP. Linux reads the APIC between writes to avoid this, as per
-- * the documentation. Make sure you preserve this as it affects
-- * the C stepping chips too.
-- * 12AP. not affected - worked around in hardware
-- * 13AP. not affected - worked around in hardware
-- * 14AP. we always deassert INIT during bootup
-- * 15AP. not affected - worked around in hardware
-- * 16AP. not affected - worked around in hardware
-- * 17AP. not affected - worked around in hardware
-- * 18AP. not affected - worked around in hardware
-- * 19AP. not affected - worked around in BIOS
-- *
-- * If this sounds worrying believe me these bugs are either ___RARE___,
-- * or are signal timing bugs worked around in hardware and there's
-- * about nothing of note with C stepping upwards.
-- */
--
--DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0, };
--
--/*
-- * the following functions deal with sending IPIs between CPUs.
-- *
-- * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
-- */
--
--static inline int __prepare_ICR (unsigned int shortcut, int vector)
--{
-- unsigned int icr = shortcut | APIC_DEST_LOGICAL;
--
-- switch (vector) {
-- default:
-- icr |= APIC_DM_FIXED | vector;
-- break;
-- case NMI_VECTOR:
-- icr |= APIC_DM_NMI;
-- break;
-- }
-- return icr;
--}
--
--static inline int __prepare_ICR2 (unsigned int mask)
--{
-- return SET_APIC_DEST_FIELD(mask);
--}
--
--DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]);
--
--static inline void __send_IPI_one(unsigned int cpu, int vector)
--{
-- int irq = per_cpu(ipi_to_irq, cpu)[vector];
-- BUG_ON(irq < 0);
-- notify_remote_via_irq(irq);
--}
--
--void __send_IPI_shortcut(unsigned int shortcut, int vector)
--{
-- int cpu;
--
-- switch (shortcut) {
-- case APIC_DEST_SELF:
-- __send_IPI_one(smp_processor_id(), vector);
-- break;
-- case APIC_DEST_ALLBUT:
-- for (cpu = 0; cpu < NR_CPUS; ++cpu) {
-- if (cpu == smp_processor_id())
-- continue;
-- if (cpu_isset(cpu, cpu_online_map)) {
-- __send_IPI_one(cpu, vector);
-- }
-- }
-- break;
-- default:
-- printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut,
-- vector);
-- break;
-- }
--}
--
--void send_IPI_self(int vector)
--{
-- __send_IPI_shortcut(APIC_DEST_SELF, vector);
--}
--
--/*
-- * This is only used on smaller machines.
-- */
--void send_IPI_mask_bitmask(cpumask_t mask, int vector)
--{
-- unsigned long flags;
-- unsigned int cpu;
--
-- local_irq_save(flags);
-- WARN_ON(cpus_addr(mask)[0] & ~cpus_addr(cpu_online_map)[0]);
--
-- for (cpu = 0; cpu < NR_CPUS; ++cpu) {
-- if (cpu_isset(cpu, mask)) {
-- __send_IPI_one(cpu, vector);
-- }
-- }
--
-- local_irq_restore(flags);
--}
--
--void send_IPI_mask_sequence(cpumask_t mask, int vector)
--{
--
-- send_IPI_mask_bitmask(mask, vector);
--}
--
--#include <mach_ipi.h> /* must come after the send_IPI functions above for inlining */
--
--#if 0 /* XEN */
--/*
-- * Smarter SMP flushing macros.
-- * c/o Linus Torvalds.
-- *
-- * These mean you can really definitely utterly forget about
-- * writing to user space from interrupts. (Its not allowed anyway).
-- *
-- * Optimizations Manfred Spraul <manfred@colorfullife.com>
-- */
--
--static cpumask_t flush_cpumask;
--static struct mm_struct * flush_mm;
--static unsigned long flush_va;
--static DEFINE_SPINLOCK(tlbstate_lock);
--
--/*
-- * We cannot call mmdrop() because we are in interrupt context,
-- * instead update mm->cpu_vm_mask.
-- *
-- * We need to reload %cr3 since the page tables may be going
-- * away from under us..
-- */
--void leave_mm(int cpu)
--{
-- if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
-- BUG();
-- cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
-- load_cr3(swapper_pg_dir);
--}
--EXPORT_SYMBOL_GPL(leave_mm);
--
--/*
-- *
-- * The flush IPI assumes that a thread switch happens in this order:
-- * [cpu0: the cpu that switches]
-- * 1) switch_mm() either 1a) or 1b)
-- * 1a) thread switch to a different mm
-- * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
-- * Stop ipi delivery for the old mm. This is not synchronized with
-- * the other cpus, but smp_invalidate_interrupt ignore flush ipis
-- * for the wrong mm, and in the worst case we perform a superfluous
-- * tlb flush.
-- * 1a2) set cpu_tlbstate to TLBSTATE_OK
-- * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
-- * was in lazy tlb mode.
-- * 1a3) update cpu_tlbstate[].active_mm
-- * Now cpu0 accepts tlb flushes for the new mm.
-- * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
-- * Now the other cpus will send tlb flush ipis.
-- * 1a4) change cr3.
-- * 1b) thread switch without mm change
-- * cpu_tlbstate[].active_mm is correct, cpu0 already handles
-- * flush ipis.
-- * 1b1) set cpu_tlbstate to TLBSTATE_OK
-- * 1b2) test_and_set the cpu bit in cpu_vm_mask.
-- * Atomically set the bit [other cpus will start sending flush ipis],
-- * and test the bit.
-- * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
-- * 2) switch %%esp, ie current
-- *
-- * The interrupt must handle 2 special cases:
-- * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
-- * - the cpu performs speculative tlb reads, i.e. even if the cpu only
-- * runs in kernel space, the cpu could load tlb entries for user space
-- * pages.
-- *
-- * The good news is that cpu_tlbstate is local to each cpu, no
-- * write/read ordering problems.
-- */
--
--/*
-- * TLB flush IPI:
-- *
-- * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
-- * 2) Leave the mm if we are in the lazy tlb mode.
-- */
--
--irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id)
--{
-- unsigned long cpu;
--
-- cpu = get_cpu();
--
-- if (!cpu_isset(cpu, flush_cpumask))
-- goto out;
-- /*
-- * This was a BUG() but until someone can quote me the
-- * line from the intel manual that guarantees an IPI to
-- * multiple CPUs is retried _only_ on the erroring CPUs
-- * its staying as a return
-- *
-- * BUG();
-- */
--
-- if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
-- if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
-- if (flush_va == TLB_FLUSH_ALL)
-- local_flush_tlb();
-- else
-- __flush_tlb_one(flush_va);
-- } else
-- leave_mm(cpu);
-- }
-- smp_mb__before_clear_bit();
-- cpu_clear(cpu, flush_cpumask);
-- smp_mb__after_clear_bit();
--out:
-- put_cpu_no_resched();
-- __get_cpu_var(irq_stat).irq_tlb_count++;
--
-- return IRQ_HANDLED;
--}
--
--void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
-- unsigned long va)
--{
-- cpumask_t cpumask = *cpumaskp;
--
-- /*
-- * A couple of (to be removed) sanity checks:
-- *
-- * - current CPU must not be in mask
-- * - mask must exist :)
-- */
-- BUG_ON(cpus_empty(cpumask));
-- BUG_ON(cpu_isset(smp_processor_id(), cpumask));
-- BUG_ON(!mm);
--
--#ifdef CONFIG_HOTPLUG_CPU
-- /* If a CPU which we ran on has gone down, OK. */
-- cpus_and(cpumask, cpumask, cpu_online_map);
-- if (unlikely(cpus_empty(cpumask)))
-- return;
--#endif
--
-- /*
-- * i'm not happy about this global shared spinlock in the
-- * MM hot path, but we'll see how contended it is.
-- * AK: x86-64 has a faster method that could be ported.
-- */
-- spin_lock(&tlbstate_lock);
--
-- flush_mm = mm;
-- flush_va = va;
-- cpus_or(flush_cpumask, cpumask, flush_cpumask);
-- /*
-- * We have to send the IPI only to
-- * CPUs affected.
-- */
-- send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
--
-- while (!cpus_empty(flush_cpumask))
-- /* nothing. lockup detection does not belong here */
-- cpu_relax();
--
-- flush_mm = NULL;
-- flush_va = 0;
-- spin_unlock(&tlbstate_lock);
--}
--
--void flush_tlb_current_task(void)
--{
-- struct mm_struct *mm = current->mm;
-- cpumask_t cpu_mask;
--
-- preempt_disable();
-- cpu_mask = mm->cpu_vm_mask;
-- cpu_clear(smp_processor_id(), cpu_mask);
--
-- local_flush_tlb();
-- if (!cpus_empty(cpu_mask))
-- flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
-- preempt_enable();
--}
--
--void flush_tlb_mm (struct mm_struct * mm)
--{
-- cpumask_t cpu_mask;
--
-- preempt_disable();
-- cpu_mask = mm->cpu_vm_mask;
-- cpu_clear(smp_processor_id(), cpu_mask);
--
-- if (current->active_mm == mm) {
-- if (current->mm)
-- local_flush_tlb();
-- else
-- leave_mm(smp_processor_id());
-- }
-- if (!cpus_empty(cpu_mask))
-- flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
--
-- preempt_enable();
--}
--
--void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
--{
-- struct mm_struct *mm = vma->vm_mm;
-- cpumask_t cpu_mask;
--
-- preempt_disable();
-- cpu_mask = mm->cpu_vm_mask;
-- cpu_clear(smp_processor_id(), cpu_mask);
--
-- if (current->active_mm == mm) {
-- if(current->mm)
-- __flush_tlb_one(va);
-- else
-- leave_mm(smp_processor_id());
-- }
--
-- if (!cpus_empty(cpu_mask))
-- flush_tlb_others(cpu_mask, mm, va);
--
-- preempt_enable();
--}
--EXPORT_SYMBOL(flush_tlb_page);
--
--static void do_flush_tlb_all(void* info)
--{
-- unsigned long cpu = smp_processor_id();
--
-- __flush_tlb_all();
-- if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
-- leave_mm(cpu);
--}
--
--void flush_tlb_all(void)
--{
-- on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
--}
--
--#endif /* XEN */
--
--/*
-- * this function sends a 'reschedule' IPI to another CPU.
-- * it goes straight through and wastes no time serializing
-- * anything. Worst case is that we lose a reschedule ...
-- */
--void xen_smp_send_reschedule(int cpu)
--{
-- WARN_ON(cpu_is_offline(cpu));
-- send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
--}
--
--/*
-- * Structure and data for smp_call_function(). This is designed to minimise
-- * static memory requirements. It also looks cleaner.
-- */
--static DEFINE_SPINLOCK(call_lock);
--
--struct call_data_struct {
-- void (*func) (void *info);
-- void *info;
-- atomic_t started;
-- atomic_t finished;
-- int wait;
--};
--
--void lock_ipi_call_lock(void)
--{
-- spin_lock_irq(&call_lock);
--}
--
--void unlock_ipi_call_lock(void)
--{
-- spin_unlock_irq(&call_lock);
--}
--
--static struct call_data_struct *call_data;
--
--static void __smp_call_function(void (*func) (void *info), void *info,
-- int nonatomic, int wait)
--{
-- struct call_data_struct data;
-- int cpus = num_online_cpus() - 1;
--
-- if (!cpus)
-- return;
--
-- data.func = func;
-- data.info = info;
-- atomic_set(&data.started, 0);
-- data.wait = wait;
-- if (wait)
-- atomic_set(&data.finished, 0);
--
-- call_data = &data;
-- mb();
--
-- /* Send a message to all other CPUs and wait for them to respond */
-- send_IPI_allbutself(CALL_FUNCTION_VECTOR);
--
-- /* Wait for response */
-- while (atomic_read(&data.started) != cpus)
-- cpu_relax();
--
-- if (wait)
-- while (atomic_read(&data.finished) != cpus)
-- cpu_relax();
--}
--
--
--/**
-- * smp_call_function_mask(): Run a function on a set of other CPUs.
-- * @mask: The set of cpus to run on. Must not include the current cpu.
-- * @func: The function to run. This must be fast and non-blocking.
-- * @info: An arbitrary pointer to pass to the function.
-- * @wait: If true, wait (atomically) until function has completed on other CPUs.
-- *
-- * Returns 0 on success, else a negative status code.
-- *
-- * If @wait is true, then returns once @func has returned; otherwise
-- * it returns just before the target cpu calls @func.
-- *
-- * You must not call this function with disabled interrupts or from a
-- * hardware interrupt handler or from a bottom half handler.
-- */
--int
--xen_smp_call_function_mask(cpumask_t mask,
-- void (*func)(void *), void *info,
-- int wait)
--{
-- struct call_data_struct data;
-- cpumask_t allbutself;
-- int cpus;
--
-- /* Can deadlock when called with interrupts disabled */
-- WARN_ON(irqs_disabled());
--
-- /* Holding any lock stops cpus from going down. */
-- spin_lock(&call_lock);
--
-- allbutself = cpu_online_map;
-- cpu_clear(smp_processor_id(), allbutself);
--
-- cpus_and(mask, mask, allbutself);
-- cpus = cpus_weight(mask);
--
-- if (!cpus) {
-- spin_unlock(&call_lock);
-- return 0;
-- }
--
-- data.func = func;
-- data.info = info;
-- atomic_set(&data.started, 0);
-- data.wait = wait;
-- if (wait)
-- atomic_set(&data.finished, 0);
--
-- call_data = &data;
-- mb();
--
-- /* Send a message to other CPUs */
-- if (cpus_equal(mask, allbutself))
-- send_IPI_allbutself(CALL_FUNCTION_VECTOR);
-- else
-- send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
--
-- /* Wait for response */
-- while (atomic_read(&data.started) != cpus)
-- cpu_relax();
--
-- if (wait)
-- while (atomic_read(&data.finished) != cpus)
-- cpu_relax();
-- spin_unlock(&call_lock);
--
-- return 0;
--}
--
--static void stop_this_cpu (void * dummy)
--{
-- local_irq_disable();
-- /*
-- * Remove this CPU:
-- */
-- cpu_clear(smp_processor_id(), cpu_online_map);
-- disable_all_local_evtchn();
-- if (cpu_data(smp_processor_id()).hlt_works_ok)
-- for(;;) halt();
-- for (;;);
--}
--
--/*
-- * this function calls the 'stop' function on all other CPUs in the system.
-- */
--
--void xen_smp_send_stop(void)
--{
-- /* Don't deadlock on the call lock in panic */
-- int nolock = !spin_trylock(&call_lock);
-- unsigned long flags;
--
-- local_irq_save(flags);
-- __smp_call_function(stop_this_cpu, NULL, 0, 0);
-- if (!nolock)
-- spin_unlock(&call_lock);
-- disable_all_local_evtchn();
-- local_irq_restore(flags);
--}
--
--/*
-- * Reschedule call back. Nothing to do,
-- * all the work is done automatically when
-- * we return from the interrupt.
-- */
--irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id)
--{
-- __get_cpu_var(irq_stat).irq_resched_count++;
--
-- return IRQ_HANDLED;
--}
--
--#include <linux/kallsyms.h>
--irqreturn_t smp_call_function_interrupt(int irq, void *dev_id)
--{
-- void (*func) (void *info) = call_data->func;
-- void *info = call_data->info;
-- int wait = call_data->wait;
--
-- /*
-- * Notify initiating CPU that I've grabbed the data and am
-- * about to execute the function
-- */
-- mb();
-- atomic_inc(&call_data->started);
-- /*
-- * At this point the info structure may be out of scope unless wait==1
-- */
-- irq_enter();
-- (*func)(info);
-- __get_cpu_var(irq_stat).irq_call_count++;
-- irq_exit();
--
-- if (wait) {
-- mb();
-- atomic_inc(&call_data->finished);
-- }
--
-- return IRQ_HANDLED;
--}
---- head.orig/arch/x86/kernel/smp_64-xen.c 2011-01-31 18:01:51.000000000 +0100
-+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
-@@ -1,554 +0,0 @@
--/*
-- * Intel SMP support routines.
-- *
-- * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
-- * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
-- * (c) 2002,2003 Andi Kleen, SuSE Labs.
-- *
-- * This code is released under the GNU General Public License version 2 or
-- * later.
-- */
--
--#include <linux/init.h>
--
--#include <linux/mm.h>
--#include <linux/delay.h>
--#include <linux/spinlock.h>
--#include <linux/smp.h>
--#include <linux/kernel_stat.h>
--#include <linux/mc146818rtc.h>
--#include <linux/interrupt.h>
--
--#include <asm/mtrr.h>
+
+ #include <asm/mtrr.h>
-#include <asm/pgalloc.h>
--#include <asm/tlbflush.h>
+ #include <asm/tlbflush.h>
-#include <asm/mach_apic.h>
--#include <asm/mmu_context.h>
--#include <asm/proto.h>
+ #include <asm/mmu_context.h>
+ #include <asm/proto.h>
-#include <asm/apicdef.h>
-#include <asm/idle.h>
-#ifdef CONFIG_XEN
--#include <xen/evtchn.h>
++#include <mach_ipi.h>
+ #include <xen/evtchn.h>
-#endif
-
-#ifndef CONFIG_XEN
@@ -7022,8 +6041,9 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
-}
-EXPORT_SYMBOL_GPL(leave_mm);
-
--/*
-- *
+ /*
++ * Some notes on x86 processor bugs affecting SMP operation:
+ *
- * The flush IPI assumes that a thread switch happens in this order:
- * [cpu0: the cpu that switches]
- * 1) switch_mm() either 1a) or 1b)
@@ -7056,20 +6076,92 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
- * - the cpu performs speculative tlb reads, i.e. even if the cpu only
- * runs in kernel space, the cpu could load tlb entries for user space
- * pages.
-- *
++ * Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
++ * The Linux implications for SMP are handled as follows:
+ *
- * The good news is that cpu mmu_state is local to each cpu, no
- * write/read ordering problems.
- */
-
-/*
- * TLB flush IPI:
-- *
++ * Pentium III / [Xeon]
++ * None of the E1AP-E3AP errata are visible to the user.
+ *
- * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
- * 2) Leave the mm if we are in the lazy tlb mode.
- *
- * Interrupts are disabled.
-- */
--
++ * E1AP. see PII A1AP
++ * E2AP. see PII A2AP
++ * E3AP. see PII A3AP
++ *
++ * Pentium II / [Xeon]
++ * None of the A1AP-A3AP errata are visible to the user.
++ *
++ * A1AP. see PPro 1AP
++ * A2AP. see PPro 2AP
++ * A3AP. see PPro 7AP
++ *
++ * Pentium Pro
++ * None of 1AP-9AP errata are visible to the normal user,
++ * except occasional delivery of 'spurious interrupt' as trap #15.
++ * This is very rare and a non-problem.
++ *
++ * 1AP. Linux maps APIC as non-cacheable
++ * 2AP. worked around in hardware
++ * 3AP. fixed in C0 and above steppings microcode update.
++ * Linux does not use excessive STARTUP_IPIs.
++ * 4AP. worked around in hardware
++ * 5AP. symmetric IO mode (normal Linux operation) not affected.
++ * 'noapic' mode has vector 0xf filled out properly.
++ * 6AP. 'noapic' mode might be affected - fixed in later steppings
++ * 7AP. We do not assume writes to the LVT deassering IRQs
++ * 8AP. We do not enable low power mode (deep sleep) during MP bootup
++ * 9AP. We do not use mixed mode
++ *
++ * Pentium
++ * There is a marginal case where REP MOVS on 100MHz SMP
++ * machines with B stepping processors can fail. XXX should provide
++ * an L1cache=Writethrough or L1cache=off option.
++ *
++ * B stepping CPUs may hang. There are hardware work arounds
++ * for this. We warn about it in case your board doesn't have the work
++ * arounds. Basically that's so I can tell anyone with a B stepping
++ * CPU and SMP problems "tough".
++ *
++ * Specific items [From Pentium Processor Specification Update]
++ *
++ * 1AP. Linux doesn't use remote read
++ * 2AP. Linux doesn't trust APIC errors
++ * 3AP. We work around this
++ * 4AP. Linux never generated 3 interrupts of the same priority
++ * to cause a lost local interrupt.
++ * 5AP. Remote read is never used
++ * 6AP. not affected - worked around in hardware
++ * 7AP. not affected - worked around in hardware
++ * 8AP. worked around in hardware - we get explicit CS errors if not
++ * 9AP. only 'noapic' mode affected. Might generate spurious
++ * interrupts, we log only the first one and count the
++ * rest silently.
++ * 10AP. not affected - worked around in hardware
++ * 11AP. Linux reads the APIC between writes to avoid this, as per
++ * the documentation. Make sure you preserve this as it affects
++ * the C stepping chips too.
++ * 12AP. not affected - worked around in hardware
++ * 13AP. not affected - worked around in hardware
++ * 14AP. we always deassert INIT during bootup
++ * 15AP. not affected - worked around in hardware
++ * 16AP. not affected - worked around in hardware
++ * 17AP. not affected - worked around in hardware
++ * 18AP. not affected - worked around in hardware
++ * 19AP. not affected - worked around in BIOS
++ *
++ * If this sounds worrying believe me these bugs are either ___RARE___,
++ * or are signal timing bugs worked around in hardware and there's
++ * about nothing of note with C stepping upwards.
+ */
+
-asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
-{
- int cpu;
@@ -7229,43 +6321,35 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
-}
-#endif /* Xen */
-
--/*
-- * this function sends a 'reschedule' IPI to another CPU.
-- * it goes straight through and wastes no time serializing
-- * anything. Worst case is that we lose a reschedule ...
-- */
+ /*
+ * this function sends a 'reschedule' IPI to another CPU.
+ * it goes straight through and wastes no time serializing
+ * anything. Worst case is that we lose a reschedule ...
+ */
-
-void smp_send_reschedule(int cpu)
--{
-- send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
--}
--
--/*
-- * Structure and data for smp_call_function(). This is designed to minimise
-- * static memory requirements. It also looks cleaner.
-- */
--static DEFINE_SPINLOCK(call_lock);
--
--struct call_data_struct {
-- void (*func) (void *info);
-- void *info;
-- atomic_t started;
-- atomic_t finished;
-- int wait;
--};
--
++void xen_smp_send_reschedule(int cpu)
+ {
++ if (unlikely(cpu_is_offline(cpu))) {
++ WARN_ON(1);
++ return;
++ }
+ send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
+ }
+
+@@ -314,8 +135,6 @@ struct call_data_struct {
+ int wait;
+ };
+
-static struct call_data_struct * call_data;
-
--void lock_ipi_call_lock(void)
--{
-- spin_lock_irq(&call_lock);
--}
--
--void unlock_ipi_call_lock(void)
--{
-- spin_unlock_irq(&call_lock);
--}
--
+ void lock_ipi_call_lock(void)
+ {
+ spin_lock_irq(&call_lock);
+@@ -326,26 +145,16 @@ void unlock_ipi_call_lock(void)
+ spin_unlock_irq(&call_lock);
+ }
+
-/*
- * this function sends a 'generic call function' IPI to all other CPU
- * of the system defined in the mask.
@@ -7273,8 +6357,12 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
-static int __smp_call_function_mask(cpumask_t mask,
- void (*func)(void *), void *info,
- int wait)
--{
-- struct call_data_struct data;
++static struct call_data_struct *call_data;
++
++static void __smp_call_function(void (*func) (void *info), void *info,
++ int nonatomic, int wait)
+ {
+ struct call_data_struct data;
- cpumask_t allbutself;
- int cpus;
-
@@ -7283,69 +6371,85 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
-
- cpus_and(mask, mask, allbutself);
- cpus = cpus_weight(mask);
--
-- if (!cpus)
++ int cpus = num_online_cpus() - 1;
+
+ if (!cpus)
- return 0;
--
-- data.func = func;
-- data.info = info;
-- atomic_set(&data.started, 0);
-- data.wait = wait;
-- if (wait)
-- atomic_set(&data.finished, 0);
--
-- call_data = &data;
++ return;
+
+ data.func = func;
+ data.info = info;
+@@ -355,26 +164,21 @@ static int __smp_call_function_mask(cpum
+ atomic_set(&data.finished, 0);
+
+ call_data = &data;
- wmb();
--
++ mb();
+
- /* Send a message to other CPUs */
- if (cpus_equal(mask, allbutself))
- send_IPI_allbutself(CALL_FUNCTION_VECTOR);
- else
- send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
--
-- /* Wait for response */
-- while (atomic_read(&data.started) != cpus)
-- cpu_relax();
--
++ /* Send a message to all other CPUs and wait for them to respond */
++ send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+
+ /* Wait for response */
+ while (atomic_read(&data.started) != cpus)
+ cpu_relax();
+
- if (!wait)
- return 0;
--
++ if (wait)
++ while (atomic_read(&data.finished) != cpus)
++ cpu_relax();
++}
+
- while (atomic_read(&data.finished) != cpus)
- cpu_relax();
--
+
- return 0;
-}
--/**
-- * smp_call_function_mask(): Run a function on a set of other CPUs.
-- * @mask: The set of cpus to run on. Must not include the current cpu.
-- * @func: The function to run. This must be fast and non-blocking.
-- * @info: An arbitrary pointer to pass to the function.
-- * @wait: If true, wait (atomically) until function has completed on other CPUs.
-- *
+ /**
+ * smp_call_function_mask(): Run a function on a set of other CPUs.
+ * @mask: The set of cpus to run on. Must not include the current cpu.
+@@ -382,7 +186,7 @@ static int __smp_call_function_mask(cpum
+ * @info: An arbitrary pointer to pass to the function.
+ * @wait: If true, wait (atomically) until function has completed on other CPUs.
+ *
- * Returns 0 on success, else a negative status code.
-- *
-- * If @wait is true, then returns once @func has returned; otherwise
-- * it returns just before the target cpu calls @func.
-- *
-- * You must not call this function with disabled interrupts or from a
-- * hardware interrupt handler or from a bottom half handler.
-- */
++ * Returns 0 on success, else a negative status code.
+ *
+ * If @wait is true, then returns once @func has returned; otherwise
+ * it returns just before the target cpu calls @func.
+@@ -390,80 +194,60 @@ static int __smp_call_function_mask(cpum
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler or from a bottom half handler.
+ */
-int smp_call_function_mask(cpumask_t mask,
- void (*func)(void *), void *info,
- int wait)
--{
++int
++xen_smp_call_function_mask(cpumask_t mask,
++ void (*func)(void *), void *info,
++ int wait)
+ {
- int ret;
--
-- /* Can deadlock when called with interrupts disabled */
-- WARN_ON(irqs_disabled());
--
-- spin_lock(&call_lock);
++ struct call_data_struct data;
++ cpumask_t allbutself;
++ int cpus;
+
+ /* Can deadlock when called with interrupts disabled */
+ WARN_ON(irqs_disabled());
+
++ /* Holding any lock stops cpus from going down. */
+ spin_lock(&call_lock);
- ret = __smp_call_function_mask(mask, func, info, wait);
- spin_unlock(&call_lock);
- return ret;
-}
-EXPORT_SYMBOL(smp_call_function_mask);
--
+
-/*
- * smp_call_function_single - Run a function on a specific CPU
- * @func: The function to run. This must be fast and non-blocking.
@@ -7364,25 +6468,39 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
-{
- /* prevent preemption and reschedule on another processor */
- int ret, me = get_cpu();
--
++ allbutself = cpu_online_map;
++ cpu_clear(smp_processor_id(), allbutself);
+
- /* Can deadlock when called with interrupts disabled */
- WARN_ON(irqs_disabled());
--
++ cpus_and(mask, mask, allbutself);
++ cpus = cpus_weight(mask);
+
- if (cpu == me) {
- local_irq_disable();
- func(info);
- local_irq_enable();
- put_cpu();
-- return 0;
-- }
--
++ if (!cpus) {
++ spin_unlock(&call_lock);
+ return 0;
+ }
+
- ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
--
++ data.func = func;
++ data.info = info;
++ atomic_set(&data.started, 0);
++ data.wait = wait;
++ if (wait)
++ atomic_set(&data.finished, 0);
+
- put_cpu();
- return ret;
-}
-EXPORT_SYMBOL(smp_call_function_single);
--
++ call_data = &data;
++ wmb();
+
-/*
- * smp_call_function - run a function on all other CPUs.
- * @func: The function to run. This must be fast and non-blocking.
@@ -7402,96 +6520,126 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
- int wait)
-{
- return smp_call_function_mask(cpu_online_map, func, info, wait);
--}
++ /* Send a message to other CPUs */
++ if (cpus_equal(mask, allbutself) &&
++ cpus_equal(cpu_online_map, cpu_callout_map))
++ send_IPI_allbutself(CALL_FUNCTION_VECTOR);
++ else
++ send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
++
++ /* Wait for response */
++ while (atomic_read(&data.started) != cpus)
++ cpu_relax();
++
++ if (wait)
++ while (atomic_read(&data.finished) != cpus)
++ cpu_relax();
++ spin_unlock(&call_lock);
++
++ return 0;
+ }
-EXPORT_SYMBOL(smp_call_function);
--
--static void stop_this_cpu(void *dummy)
--{
-- local_irq_disable();
-- /*
-- * Remove this CPU:
-- */
-- cpu_clear(smp_processor_id(), cpu_online_map);
-- disable_all_local_evtchn();
+
+ static void stop_this_cpu(void *dummy)
+ {
+@@ -473,24 +257,24 @@ static void stop_this_cpu(void *dummy)
+ */
+ cpu_clear(smp_processor_id(), cpu_online_map);
+ disable_all_local_evtchn();
- for (;;)
- halt();
--}
--
++ if (hlt_works(smp_processor_id()))
++ for (;;) halt();
++ for (;;);
+ }
+
-void smp_send_stop(void)
--{
-- int nolock;
-- unsigned long flags;
--
++/*
++ * this function calls the 'stop' function on all other CPUs in the system.
++ */
++
++void xen_smp_send_stop(void)
+ {
+ int nolock;
+ unsigned long flags;
+
-#ifndef CONFIG_XEN
- if (reboot_force)
- return;
-#endif
-
-- /* Don't deadlock on the call lock in panic */
-- nolock = !spin_trylock(&call_lock);
-- local_irq_save(flags);
+ /* Don't deadlock on the call lock in panic */
+ nolock = !spin_trylock(&call_lock);
+ local_irq_save(flags);
- __smp_call_function_mask(cpu_online_map, stop_this_cpu, NULL, 0);
-- if (!nolock)
-- spin_unlock(&call_lock);
-- disable_all_local_evtchn();
-- local_irq_restore(flags);
--}
--
--/*
-- * Reschedule call back. Nothing to do,
-- * all the work is done automatically when
-- * we return from the interrupt.
-- */
++ __smp_call_function(stop_this_cpu, NULL, 0, 0);
+ if (!nolock)
+ spin_unlock(&call_lock);
+ disable_all_local_evtchn();
+@@ -502,34 +286,22 @@ void smp_send_stop(void)
+ * all the work is done automatically when
+ * we return from the interrupt.
+ */
-#ifndef CONFIG_XEN
-asmlinkage void smp_reschedule_interrupt(void)
-#else
-asmlinkage irqreturn_t smp_reschedule_interrupt(int irq, void *ctx)
-#endif
--{
++irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id)
+ {
-#ifndef CONFIG_XEN
- ack_APIC_irq();
-#endif
-- add_pda(irq_resched_count, 1);
++#ifdef CONFIG_X86_32
++ __get_cpu_var(irq_stat).irq_resched_count++;
++#else
+ add_pda(irq_resched_count, 1);
-#ifdef CONFIG_XEN
- return IRQ_HANDLED;
--#endif
--}
--
+ #endif
++ return IRQ_HANDLED;
+ }
+
-#ifndef CONFIG_XEN
-asmlinkage void smp_call_function_interrupt(void)
-#else
-asmlinkage irqreturn_t smp_call_function_interrupt(int irq, void *ctx)
-#endif
--{
-- void (*func) (void *info) = call_data->func;
-- void *info = call_data->info;
-- int wait = call_data->wait;
--
++irqreturn_t smp_call_function_interrupt(int irq, void *dev_id)
+ {
+ void (*func) (void *info) = call_data->func;
+ void *info = call_data->info;
+ int wait = call_data->wait;
+
-#ifndef CONFIG_XEN
- ack_APIC_irq();
-#endif
-- /*
-- * Notify initiating CPU that I've grabbed the data and am
-- * about to execute the function
-- */
-- mb();
-- atomic_inc(&call_data->started);
-- /*
-- * At this point the info structure may be out of scope unless wait==1
-- */
+ /*
+ * Notify initiating CPU that I've grabbed the data and am
+ * about to execute the function
+@@ -539,16 +311,17 @@ asmlinkage irqreturn_t smp_call_function
+ /*
+ * At this point the info structure may be out of scope unless wait==1
+ */
- exit_idle();
- irq_enter();
-- (*func)(info);
-- add_pda(irq_call_count, 1);
+ (*func)(info);
++#ifdef CONFIG_X86_32
++ __get_cpu_var(irq_stat).irq_call_count++;
++#else
+ add_pda(irq_call_count, 1);
- irq_exit();
-- if (wait) {
-- mb();
-- atomic_inc(&call_data->finished);
-- }
++#endif
++
+ if (wait) {
+ mb();
+ atomic_inc(&call_data->finished);
+ }
-#ifdef CONFIG_XEN
-- return IRQ_HANDLED;
++
+ return IRQ_HANDLED;
-#endif
--}
+ }
--- head.orig/arch/x86/kernel/time-xen.c 2012-02-10 13:27:37.000000000 +0100
+++ head/arch/x86/kernel/time-xen.c 2012-02-10 13:27:59.000000000 +0100
@@ -709,8 +709,6 @@ static void init_missing_ticks_accountin
@@ -12556,7 +11704,7 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
arch_fix_phys_package_id(pr->id, object.integer.value);
return 0;
---- head.orig/drivers/firmware/Kconfig 2013-06-20 14:56:42.000000000 +0200
+--- head.orig/drivers/firmware/Kconfig 2013-07-02 09:20:53.000000000 +0200
+++ head/drivers/firmware/Kconfig 2013-06-05 13:51:23.000000000 +0200
@@ -19,7 +19,7 @@ config EDD
@@ -12620,7 +11768,7 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
EXPORT_SYMBOL(pci_disable_msix);
/**
---- head.orig/drivers/video/Kconfig 2013-06-20 14:56:42.000000000 +0200
+--- head.orig/drivers/video/Kconfig 2013-07-02 09:20:53.000000000 +0200
+++ head/drivers/video/Kconfig 2013-05-23 17:16:58.000000000 +0200
@@ -2262,7 +2262,7 @@ config FB_VIRTUAL
@@ -13668,7 +12816,7 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
};
static int xenfb_mmap(struct fb_info *fb_info, struct vm_area_struct *vma)
---- head.orig/drivers/xen/features.c 2013-06-20 14:56:42.000000000 +0200
+--- head.orig/drivers/xen/features.c 2013-07-02 09:20:53.000000000 +0200
+++ head/drivers/xen/features.c 2011-01-31 18:07:35.000000000 +0100
@@ -9,14 +9,21 @@
#include <linux/cache.h>
diff --git a/patches.xen/xen3-patch-2.6.32 b/patches.xen/xen3-patch-2.6.32
index 8dfc6e0046..561098abd1 100644
--- a/patches.xen/xen3-patch-2.6.32
+++ b/patches.xen/xen3-patch-2.6.32
@@ -211,9 +211,9 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches
- .quad sys_perf_counter_open
+ .quad sys_perf_event_open
ia32_syscall_end:
---- head.orig/arch/x86/include/asm/irq.h 2013-06-20 14:56:42.000000000 +0200
-+++ head/arch/x86/include/asm/irq.h 2011-04-13 13:55:46.000000000 +0200
-@@ -17,7 +17,8 @@ struct ctl_table;
+--- head.orig/arch/x86/include/asm/irq.h 2013-07-02 09:20:53.000000000 +0200
++++ head/arch/x86/include/asm/irq.h 2013-07-02 09:40:47.000000000 +0200
+@@ -41,7 +41,8 @@ extern int vector_used_by_percpu_irq(uns
extern void init_ISA_irqs(void);
@@ -223,7 +223,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches
void arch_trigger_all_cpu_backtrace(void);
#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
#endif
---- head.orig/arch/x86/include/asm/uv/uv_hub.h 2013-06-20 14:56:42.000000000 +0200
+--- head.orig/arch/x86/include/asm/uv/uv_hub.h 2013-07-02 09:20:53.000000000 +0200
+++ head/arch/x86/include/asm/uv/uv_hub.h 2011-02-01 14:54:13.000000000 +0100
@@ -11,7 +11,7 @@
#ifndef _ASM_X86_UV_UV_HUB_H
@@ -725,9 +725,9 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches
endif
disabled-obj-$(CONFIG_XEN) := %_uv.o crash.o early-quirks.o i8237.o i8253.o \
---- head.orig/arch/x86/kernel/apic/hw_nmi.c 2013-06-20 14:56:42.000000000 +0200
-+++ head/arch/x86/kernel/apic/hw_nmi.c 2011-04-13 13:55:59.000000000 +0200
-@@ -26,6 +26,10 @@ u64 hw_nmi_get_sample_period(int watchdo
+--- head.orig/arch/x86/kernel/apic/hw_nmi.c 2013-07-02 09:20:53.000000000 +0200
++++ head/arch/x86/kernel/apic/hw_nmi.c 2013-07-02 09:41:16.000000000 +0200
+@@ -27,6 +27,10 @@ u64 hw_nmi_get_sample_period(int watchdo
#endif
#ifdef arch_trigger_all_cpu_backtrace
@@ -738,7 +738,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches
/* For reliability, we're prepared to waste bits here. */
static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
-@@ -46,7 +50,11 @@ void arch_trigger_all_cpu_backtrace(void
+@@ -47,7 +51,11 @@ void arch_trigger_all_cpu_backtrace(void
cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
printk(KERN_INFO "sending NMI to all CPUs:\n");
@@ -1775,7 +1775,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches
#endif
/* Make sure %fs and %gs are initialized properly in idle threads */
---- head.orig/arch/x86/kernel/cpu/mcheck/mce-inject.c 2013-06-20 14:56:42.000000000 +0200
+--- head.orig/arch/x86/kernel/cpu/mcheck/mce-inject.c 2013-07-02 09:20:53.000000000 +0200
+++ head/arch/x86/kernel/cpu/mcheck/mce-inject.c 2012-10-23 15:27:54.000000000 +0200
@@ -152,7 +152,7 @@ static void raise_mce(struct mce *m)
if (context == MCJ_CTX_RANDOM)
@@ -4030,7 +4030,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches
#endif /* CONFIG_X86_32 */
#ifdef CONFIG_XEN
---- head.orig/arch/x86/platform/sfi/sfi.c 2013-06-20 14:56:42.000000000 +0200
+--- head.orig/arch/x86/platform/sfi/sfi.c 2013-07-02 09:20:53.000000000 +0200
+++ head/arch/x86/platform/sfi/sfi.c 2011-02-02 08:45:00.000000000 +0100
@@ -32,6 +32,7 @@
#include <asm/apic.h>
@@ -5768,7 +5768,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches
BUG();
} else {
xen_l1_entry_update(ptep, entry);
---- head.orig/arch/x86/mm/physaddr.c 2013-06-20 14:56:42.000000000 +0200
+--- head.orig/arch/x86/mm/physaddr.c 2013-07-02 09:20:53.000000000 +0200
+++ head/arch/x86/mm/physaddr.c 2013-03-21 15:12:19.000000000 +0100
@@ -9,6 +9,10 @@
@@ -6030,7 +6030,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches
pr->cdev = thermal_cooling_device_register("Processor", device,
&processor_cooling_ops);
---- head.orig/drivers/char/agp/agp.h 2013-06-20 14:56:42.000000000 +0200
+--- head.orig/drivers/char/agp/agp.h 2013-07-02 09:20:53.000000000 +0200
+++ head/drivers/char/agp/agp.h 2011-02-01 14:54:13.000000000 +0100
@@ -31,6 +31,10 @@
@@ -6043,7 +6043,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches
#define PFX "agpgart: "
//#define AGP_DEBUG 1
---- head.orig/drivers/char/agp/amd-k7-agp.c 2013-06-20 14:56:42.000000000 +0200
+--- head.orig/drivers/char/agp/amd-k7-agp.c 2013-07-02 09:20:53.000000000 +0200
+++ head/drivers/char/agp/amd-k7-agp.c 2011-02-17 10:18:42.000000000 +0100
@@ -142,7 +142,7 @@ static int amd_create_gatt_table(struct
@@ -6063,7 +6063,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches
page_dir.remapped+GET_PAGE_DIR_OFF(addr));
readl(page_dir.remapped+GET_PAGE_DIR_OFF(addr)); /* PCI Posting. */
}
---- head.orig/drivers/char/agp/amd64-agp.c 2013-06-20 14:56:42.000000000 +0200
+--- head.orig/drivers/char/agp/amd64-agp.c 2013-07-02 09:20:53.000000000 +0200
+++ head/drivers/char/agp/amd64-agp.c 2013-01-14 13:52:00.000000000 +0100
@@ -178,7 +178,7 @@ static const struct aper_size_info_32 am
@@ -6083,7 +6083,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches
amd64_aperture_sizes[bridge->aperture_size_idx].size);
agp_remove_bridge(bridge);
agp_put_bridge(bridge);
---- head.orig/drivers/char/agp/ati-agp.c 2013-06-20 14:56:42.000000000 +0200
+--- head.orig/drivers/char/agp/ati-agp.c 2013-07-02 09:20:53.000000000 +0200
+++ head/drivers/char/agp/ati-agp.c 2011-02-01 14:54:13.000000000 +0100
@@ -361,7 +361,7 @@ static int ati_create_gatt_table(struct
@@ -6103,7 +6103,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches
page_dir.remapped+GET_PAGE_DIR_OFF(addr));
readl(page_dir.remapped+GET_PAGE_DIR_OFF(addr)); /* PCI Posting. */
}
---- head.orig/drivers/char/agp/efficeon-agp.c 2013-06-20 14:56:42.000000000 +0200
+--- head.orig/drivers/char/agp/efficeon-agp.c 2013-07-02 09:20:53.000000000 +0200
+++ head/drivers/char/agp/efficeon-agp.c 2011-02-01 14:54:13.000000000 +0100
@@ -227,7 +227,7 @@ static int efficeon_create_gatt_table(st
@@ -6145,7 +6145,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches
#define USE_PCI_DMA_API 1
#else
#define USE_PCI_DMA_API 0
---- head.orig/drivers/char/agp/sworks-agp.c 2013-06-20 14:56:42.000000000 +0200
+--- head.orig/drivers/char/agp/sworks-agp.c 2013-07-02 09:20:53.000000000 +0200
+++ head/drivers/char/agp/sworks-agp.c 2011-02-01 14:54:13.000000000 +0100
@@ -155,7 +155,7 @@ static int serverworks_create_gatt_table
/* Create a fake scratch directory */
@@ -6185,7 +6185,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches
#endif
#endif /* IOATDMA_H */
---- head.orig/drivers/dma/ioat/dma_v2.h 2013-06-20 14:56:42.000000000 +0200
+--- head.orig/drivers/dma/ioat/dma_v2.h 2013-07-02 09:20:53.000000000 +0200
+++ head/drivers/dma/ioat/dma_v2.h 2013-05-23 17:33:47.000000000 +0200
@@ -178,4 +178,10 @@ int ioat2_quiesce(struct ioat_chan_commo
int ioat2_reset_sync(struct ioat_chan_common *chan, unsigned long tmo);
@@ -6198,7 +6198,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches
+#endif
+
#endif /* IOATDMA_V2_H */
---- head.orig/drivers/dma/ioat/hw.h 2013-06-20 14:56:42.000000000 +0200
+--- head.orig/drivers/dma/ioat/hw.h 2013-07-02 09:20:53.000000000 +0200
+++ head/drivers/dma/ioat/hw.h 2013-05-23 17:33:52.000000000 +0200
@@ -64,7 +64,11 @@
#define IOAT_VER_3_3 0x33 /* Version 3.3 */
@@ -6212,9 +6212,9 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches
struct ioat_dma_descriptor {
uint32_t size;
---- head.orig/drivers/gpu/drm/radeon/radeon_device.c 2013-06-20 14:56:42.000000000 +0200
-+++ head/drivers/gpu/drm/radeon/radeon_device.c 2013-06-04 13:57:29.000000000 +0200
-@@ -590,6 +590,18 @@ int radeon_dummy_page_init(struct radeon
+--- head.orig/drivers/gpu/drm/radeon/radeon_device.c 2013-07-02 09:20:53.000000000 +0200
++++ head/drivers/gpu/drm/radeon/radeon_device.c 2013-07-02 09:41:04.000000000 +0200
+@@ -585,6 +585,18 @@ int radeon_dummy_page_init(struct radeon
rdev->dummy_page.page = alloc_page(GFP_DMA32 | GFP_KERNEL | __GFP_ZERO);
if (rdev->dummy_page.page == NULL)
return -ENOMEM;
@@ -6484,7 +6484,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches
nr++;
for (fn = next_fn(bus, dev, 0); fn > 0; fn = next_fn(bus, dev, fn)) {
---- head.orig/drivers/sfi/sfi_core.c 2013-06-20 14:56:42.000000000 +0200
+--- head.orig/drivers/sfi/sfi_core.c 2013-07-02 09:20:53.000000000 +0200
+++ head/drivers/sfi/sfi_core.c 2011-02-01 14:54:13.000000000 +0100
@@ -486,6 +486,11 @@ void __init sfi_init(void)
if (!acpi_disabled)
@@ -6788,7 +6788,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches
if (err) {
pr_err("Xen suspend can't run on CPU0 (%d)\n", err);
goto fail;
---- head.orig/drivers/xen/dbgp.c 2013-06-20 14:56:42.000000000 +0200
+--- head.orig/drivers/xen/dbgp.c 2013-07-02 09:20:53.000000000 +0200
+++ head/drivers/xen/dbgp.c 2012-05-02 15:15:24.000000000 +0200
@@ -2,7 +2,11 @@
#include <linux/usb.h>
@@ -7048,7 +7048,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches
#include <linux/proc_fs.h>
#include <linux/notifier.h>
#include <linux/mutex.h>
---- head.orig/fs/proc/kcore.c 2013-06-20 14:56:42.000000000 +0200
+--- head.orig/fs/proc/kcore.c 2013-07-02 09:20:53.000000000 +0200
+++ head/fs/proc/kcore.c 2013-05-23 17:32:56.000000000 +0200
@@ -134,7 +134,7 @@ static void __kcore_update_ram(struct li
}
@@ -7071,7 +7071,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches
ent->type = KCORE_RAM;
list_add(&ent->list, &head);
__kcore_update_ram(&head);
---- head.orig/include/linux/nmi.h 2013-06-20 14:56:42.000000000 +0200
+--- head.orig/include/linux/nmi.h 2013-07-02 09:20:53.000000000 +0200
+++ head/include/linux/nmi.h 2011-02-16 09:06:03.000000000 +0100
@@ -18,6 +18,9 @@
#include <asm/nmi.h>
@@ -7083,7 +7083,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches
static inline void touch_nmi_watchdog(void)
{
touch_softlockup_watchdog();
---- head.orig/include/linux/usb/ehci_def.h 2013-06-20 14:56:42.000000000 +0200
+--- head.orig/include/linux/usb/ehci_def.h 2013-07-02 09:20:53.000000000 +0200
+++ head/include/linux/usb/ehci_def.h 2012-10-23 15:27:11.000000000 +0200
@@ -223,7 +223,7 @@ extern struct console early_dbgp_console
diff --git a/patches.xen/xen3-patch-3.10-rc6 b/patches.xen/xen3-patch-3.10
index 90b2ca8cf9..6987b887ee 100644
--- a/patches.xen/xen3-patch-3.10-rc6
+++ b/patches.xen/xen3-patch-3.10
@@ -1,10 +1,10 @@
From: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
-Subject: Linux: 3.10-rc6
-Patch-mainline: 3.10-rc6
+Subject: Linux: 3.10
+Patch-mainline: 3.10
- This patch contains the differences between 3.9 and 3.10-rc6.
+ This patch contains the differences between 3.9 and 3.10.
-Automatically created from "patch-3.10-rc4" by xen-port-patches.py
+Automatically created from "patch-3.10" by xen-port-patches.py
Acked-by: jbeulich@suse.com
--- head.orig/arch/x86/include/asm/hypervisor.h 2013-05-23 17:21:57.000000000 +0200
@@ -258,7 +258,7 @@ Acked-by: jbeulich@suse.com
#endif
--- head.orig/arch/x86/kernel/process-xen.c 2013-03-25 09:13:57.000000000 +0100
-+++ head/arch/x86/kernel/process-xen.c 2013-05-27 17:24:56.000000000 +0200
++++ head/arch/x86/kernel/process-xen.c 2013-07-02 10:03:05.000000000 +0200
@@ -125,30 +125,6 @@ void exit_thread(void)
drop_fpu(me);
}
@@ -300,7 +300,7 @@ Acked-by: jbeulich@suse.com
#ifndef CONFIG_SMP
static inline void play_dead(void)
-@@ -290,13 +268,7 @@ void exit_idle(void)
+@@ -290,87 +268,40 @@ void exit_idle(void)
}
#endif
@@ -311,14 +311,16 @@ Acked-by: jbeulich@suse.com
- * somebody to say that they'd like to reschedule)
- */
-void cpu_idle(void)
-+void arch_cpu_idle_prepare(void)
++void arch_cpu_idle_enter(void)
{
- /*
- * If we're the non-boot CPU, nothing set the stack canary up
-@@ -306,71 +278,42 @@ void cpu_idle(void)
- * canaries already on the stack wont ever trigger).
- */
- boot_init_stack_canary();
+- /*
+- * If we're the non-boot CPU, nothing set the stack canary up
+- * for us. CPU0 already has it initialized but no harm in
+- * doing it again. This is a good place for updating it, as
+- * we wont ever return from this function (so the invalid
+- * canaries already on the stack wont ever trigger).
+- */
+- boot_init_stack_canary();
- current_thread_info()->status |= TS_POLLING;
-
- while (1) {
@@ -345,12 +347,9 @@ Acked-by: jbeulich@suse.com
-
- /* enter_idle() needs rcu for notifiers */
- rcu_idle_enter();
-+}
-
+-
- if (cpuidle_idle_call())
- xen_idle();
-+void arch_cpu_idle_enter(void)
-+{
+ local_touch_nmi();
+ enter_idle();
+}
@@ -412,7 +411,7 @@ Acked-by: jbeulich@suse.com
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
}
#ifdef CONFIG_APM_MODULE
-@@ -400,20 +343,6 @@ void stop_this_cpu(void *dummy)
+@@ -400,20 +331,6 @@ void stop_this_cpu(void *dummy)
halt();
}
@@ -433,7 +432,7 @@ Acked-by: jbeulich@suse.com
#ifndef CONFIG_XEN
bool amd_e400_c1e_detected;
EXPORT_SYMBOL(amd_e400_c1e_detected);
-@@ -433,9 +362,6 @@ void amd_e400_remove_cpu(int cpu)
+@@ -433,9 +350,6 @@ void amd_e400_remove_cpu(int cpu)
*/
static void amd_e400_idle(void)
{
@@ -443,7 +442,7 @@ Acked-by: jbeulich@suse.com
if (!amd_e400_c1e_detected) {
u32 lo, hi;
-@@ -481,13 +407,13 @@ void __cpuinit select_idle_routine(const
+@@ -481,13 +395,13 @@ void __cpuinit select_idle_routine(const
{
#ifndef CONFIG_XEN
#ifdef CONFIG_SMP
@@ -460,7 +459,7 @@ Acked-by: jbeulich@suse.com
/* E400: APIC timer interrupt does not wake up CPU from C1e */
pr_info("using AMD E400 aware idle routine\n");
x86_idle = amd_e400_idle;
-@@ -512,8 +438,8 @@ static int __init idle_setup(char *str)
+@@ -512,8 +426,8 @@ static int __init idle_setup(char *str)
if (!strcmp(str, "poll")) {
pr_info("using polling idle threads\n");
@@ -1123,7 +1122,7 @@ Acked-by: jbeulich@suse.com
return -ENODEV;
}
return 0;
---- head.orig/drivers/net/caif/Kconfig 2013-06-20 14:56:41.000000000 +0200
+--- head.orig/drivers/net/caif/Kconfig 2013-07-02 09:20:52.000000000 +0200
+++ head/drivers/net/caif/Kconfig 2013-06-05 14:20:37.000000000 +0200
@@ -43,7 +43,7 @@ config CAIF_HSI
@@ -1414,7 +1413,7 @@ Acked-by: jbeulich@suse.com
+ if (dev->msix_cap)
+ msix_set_enable(dev, 0);
}
---- head.orig/drivers/platform/x86/Kconfig 2013-06-20 14:56:41.000000000 +0200
+--- head.orig/drivers/platform/x86/Kconfig 2013-07-02 09:20:52.000000000 +0200
+++ head/drivers/platform/x86/Kconfig 2013-05-27 16:24:27.000000000 +0200
@@ -783,7 +783,7 @@ config APPLE_GMUX
@@ -1425,7 +1424,7 @@ Acked-by: jbeulich@suse.com
---help---
This driver provides support for the pvpanic device. pvpanic is
a paravirtualized device provided by QEMU; it lets a virtual machine
---- head.orig/drivers/scsi/lpfc/lpfc_init.c 2013-06-20 14:56:41.000000000 +0200
+--- head.orig/drivers/scsi/lpfc/lpfc_init.c 2013-07-02 09:20:52.000000000 +0200
+++ head/drivers/scsi/lpfc/lpfc_init.c 2013-06-05 14:51:48.000000000 +0200
@@ -8414,9 +8414,6 @@ lpfc_sli4_set_affinity(struct lpfc_hba *
int i, idx, saved_chann, used_chann, cpu, phys_id;
@@ -1683,7 +1682,7 @@ Acked-by: jbeulich@suse.com
}
#ifdef MODULE
---- head.orig/drivers/xen/netback/netback.c 2013-06-13 12:14:42.000000000 +0200
+--- head.orig/drivers/xen/netback/netback.c 2013-07-02 09:43:02.000000000 +0200
+++ head/drivers/xen/netback/netback.c 2013-06-13 12:15:54.000000000 +0200
@@ -1714,6 +1714,8 @@ static void net_tx_action(unsigned long
continue;
@@ -2084,9 +2083,15 @@ Acked-by: jbeulich@suse.com
}
return pollflags;
---- head.orig/include/xen/net-util.h 2013-05-24 14:47:36.000000000 +0200
-+++ head/include/xen/net-util.h 2013-05-24 14:43:10.000000000 +0200
-@@ -11,7 +11,6 @@ static inline int skb_checksum_setup(str
+--- head.orig/include/xen/net-util.h 2013-07-02 09:55:33.000000000 +0200
++++ head/include/xen/net-util.h 2013-07-02 09:59:59.000000000 +0200
+@@ -6,13 +6,11 @@
+ #include <linux/tcp.h>
+ #include <linux/udp.h>
+ #include <net/ip.h>
+-#include <net/flow_keys.h>
+
+ static inline int skb_checksum_setup(struct sk_buff *skb,
unsigned long *fixup_counter)
{
struct iphdr *iph = (void *)skb->data;
@@ -2094,7 +2099,7 @@ Acked-by: jbeulich@suse.com
__be16 *csum = NULL;
int err = -EPROTO;
-@@ -33,21 +32,20 @@ static inline int skb_checksum_setup(str
+@@ -35,22 +33,20 @@ static inline int skb_checksum_setup(str
if (skb->protocol != htons(ETH_P_IP))
goto out;
@@ -2102,6 +2107,7 @@ Acked-by: jbeulich@suse.com
- if (th >= skb_tail_pointer(skb))
- goto out;
-
+- skb_set_transport_header(skb, 4 * iph->ihl);
- skb->csum_start = th - skb->head;
switch (iph->protocol) {
case IPPROTO_TCP:
@@ -2124,7 +2130,7 @@ Acked-by: jbeulich@suse.com
break;
default:
net_err_ratelimited("Attempting to checksum a non-TCP/UDP packet,"
-@@ -56,9 +54,6 @@ static inline int skb_checksum_setup(str
+@@ -59,9 +55,6 @@ static inline int skb_checksum_setup(str
goto out;
}
@@ -2134,6 +2140,22 @@ Acked-by: jbeulich@suse.com
if (csum) {
*csum = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
skb->len - iph->ihl*4,
+@@ -69,14 +62,7 @@ static inline int skb_checksum_setup(str
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ }
+
+- if (!skb_transport_header_was_set(skb)) {
+- struct flow_keys keys;
+-
+- if (skb_flow_dissect(skb, &keys))
+- skb_set_transport_header(skb, keys.thoff);
+- else
+- skb_reset_transport_header(skb);
+- }
++ skb_probe_transport_header(skb, 0);
+
+ err = 0;
+ out:
--- head.orig/include/xen/xen_proc.h 2007-06-12 13:14:19.000000000 +0200
+++ head/include/xen/xen_proc.h 2013-05-27 17:41:43.000000000 +0200
@@ -4,9 +4,9 @@
diff --git a/patches.xen/xen3-patch-3.2 b/patches.xen/xen3-patch-3.2
index 00a970ea15..20c35e9155 100644
--- a/patches.xen/xen3-patch-3.2
+++ b/patches.xen/xen3-patch-3.2
@@ -3141,9 +3141,10 @@ Acked-by: jbeulich@suse.com
+ spinning->ticket == ticket) {
#if CONFIG_XEN_SPINLOCK_ACQUIRE_NESTING
- token = spinning->irq_count
+- < per_cpu(_irq_count, cpu)
+- ? ticket_drop(spinning, token, cpu) : -2;
+ ticket = spinning->irq_count
- < per_cpu(_irq_count, cpu)
-- ? ticket_drop(spinning, token, cpu) : -2;
++ < per_cpu(_irq_count, cpu)
+ ? ticket_drop(spinning, ticket, cpu) : -2;
#endif
break;
diff --git a/patches.xen/xen3-patch-3.3 b/patches.xen/xen3-patch-3.3
index 7ce1dfb4c9..cacf2172a4 100644
--- a/patches.xen/xen3-patch-3.3
+++ b/patches.xen/xen3-patch-3.3
@@ -3966,7 +3966,7 @@ Acked-by: jbeulich@suse.com
netif_t *netif = netdev_priv(dev);
--- head.orig/drivers/xen/netback/netback.c 2013-06-13 08:56:09.000000000 +0200
-+++ head/drivers/xen/netback/netback.c 2013-06-13 14:28:46.000000000 +0200
++++ head/drivers/xen/netback/netback.c 2013-06-24 12:44:29.000000000 +0200
@@ -51,6 +51,12 @@ struct netbk_rx_meta {
u8 copy:1;
};
@@ -4162,15 +4162,14 @@ Acked-by: jbeulich@suse.com
GNTMAP_host_map | GNTMAP_readonly,
txp->gref, netif->domid);
-@@ -1131,14 +1216,18 @@ static gnttab_map_grant_ref_t *netbk_get
+@@ -1131,14 +1216,17 @@ static gnttab_map_grant_ref_t *netbk_get
frag_set_pending_idx(&frags[i], pending_idx);
}
- return mop;
-+ if ((void *)gop->map > (void *)gop->copy)
-+ net_warn_ratelimited("%s: Grant op overrun (%p > %p)\n",
-+ netdev_name(netif->dev),
-+ gop->map, gop->copy);
++ if ((void *)gop->map > (void *)gop->copy && net_ratelimit())
++ netdev_warn(netif->dev, "Grant op overrun (%p > %p)\n",
++ gop->map, gop->copy);
}
-static int netbk_tx_check_mop(struct sk_buff *skb,
@@ -4266,9 +4265,9 @@ Acked-by: jbeulich@suse.com
- *mopp = mop + 1;
+ gop->map = mop;
+ gop->copy = cop;
-+ if ((void *)mop > (void *)cop)
-+ net_warn_ratelimited("%s: Grant op check overrun (%p > %p)\n",
-+ netdev_name(netif->dev), mop, cop);
++ if ((void *)mop > (void *)cop && net_ratelimit())
++ netdev_warn(netif->dev, "Grant op check overrun (%p > %p)\n",
++ mop, cop);
return err;
}
diff --git a/patches.xen/xen3-patch-3.7 b/patches.xen/xen3-patch-3.7
index b3578fd1f2..4c9c3c8dcb 100644
--- a/patches.xen/xen3-patch-3.7
+++ b/patches.xen/xen3-patch-3.7
@@ -7,9 +7,9 @@ Patch-mainline: 3.7
Automatically created from "patch-3.7" by xen-port-patches.py
Acked-by: jbeulich@suse.com
---- head.orig/arch/arm/Kconfig 2013-06-20 14:56:41.000000000 +0200
-+++ head/arch/arm/Kconfig 2013-05-23 17:55:07.000000000 +0200
-@@ -1783,9 +1783,9 @@ config CC_STACKPROTECTOR
+--- head.orig/arch/arm/Kconfig 2013-07-02 09:20:52.000000000 +0200
++++ head/arch/arm/Kconfig 2013-07-02 09:43:11.000000000 +0200
+@@ -1807,9 +1807,9 @@ config CC_STACKPROTECTOR
config XEN_DOM0
def_bool y
@@ -21,7 +21,7 @@ Acked-by: jbeulich@suse.com
bool "Xen guest support on ARM (EXPERIMENTAL)"
depends on ARM && AEABI && OF
depends on CPU_V7 && !CPU_V6
---- head.orig/arch/arm/Makefile 2013-06-20 14:56:41.000000000 +0200
+--- head.orig/arch/arm/Makefile 2013-07-02 09:20:52.000000000 +0200
+++ head/arch/arm/Makefile 2013-05-23 17:55:09.000000000 +0200
@@ -248,7 +248,7 @@ endif
core-$(CONFIG_FPE_NWFPE) += arch/arm/nwfpe/
@@ -32,7 +32,7 @@ Acked-by: jbeulich@suse.com
core-$(CONFIG_KVM_ARM_HOST) += arch/arm/kvm/
# If we have a machine-specific directory, then include it in the build.
---- head.orig/arch/arm/include/asm/xen/interface.h 2013-06-20 14:56:41.000000000 +0200
+--- head.orig/arch/arm/include/asm/xen/interface.h 2013-07-02 09:20:52.000000000 +0200
+++ head/arch/arm/include/asm/xen/interface.h 2012-10-31 11:29:25.000000000 +0100
@@ -11,14 +11,14 @@
@@ -2640,7 +2640,7 @@ Acked-by: jbeulich@suse.com
};
static int adjust_tjmax(struct platform_data *c, u32 id, struct device *dev)
---- head.orig/drivers/remoteproc/Kconfig 2013-06-20 14:56:41.000000000 +0200
+--- head.orig/drivers/remoteproc/Kconfig 2013-07-02 09:20:52.000000000 +0200
+++ head/drivers/remoteproc/Kconfig 2012-11-02 12:55:30.000000000 +0100
@@ -1,4 +1,5 @@
menu "Remoteproc drivers"
@@ -2923,7 +2923,7 @@ Acked-by: jbeulich@suse.com
return rc;
}
---- head.orig/drivers/xen/fallback.c 2013-06-20 14:56:41.000000000 +0200
+--- head.orig/drivers/xen/fallback.c 2013-07-02 09:20:52.000000000 +0200
+++ head/drivers/xen/fallback.c 2013-04-03 10:48:55.000000000 +0200
@@ -3,7 +3,16 @@
#include <linux/bug.h>
@@ -2970,8 +2970,8 @@ Acked-by: jbeulich@suse.com
/* This flag prevents this VM area being copied on a fork(). A better
* behaviour might be to explicitly carry out the appropriate mappings
---- head.orig/drivers/xen/netback/netback.c 2013-06-13 14:28:46.000000000 +0200
-+++ head/drivers/xen/netback/netback.c 2013-06-13 12:14:42.000000000 +0200
+--- head.orig/drivers/xen/netback/netback.c 2013-06-24 12:44:29.000000000 +0200
++++ head/drivers/xen/netback/netback.c 2013-07-02 09:43:02.000000000 +0200
@@ -36,6 +36,7 @@
#include "common.h"
@@ -3163,6 +3163,33 @@ Acked-by: jbeulich@suse.com
RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
irq = netif->irq;
+@@ -1216,9 +1256,10 @@ void netbk_get_requests(netif_t *netif,
+ frag_set_pending_idx(&frags[i], pending_idx);
+ }
+
+- if ((void *)gop->map > (void *)gop->copy && net_ratelimit())
+- netdev_warn(netif->dev, "Grant op overrun (%p > %p)\n",
+- gop->map, gop->copy);
++ if ((void *)gop->map > (void *)gop->copy)
++ net_warn_ratelimited("%s: Grant op overrun (%p > %p)\n",
++ netdev_name(netif->dev),
++ gop->map, gop->copy);
+ }
+
+ static int netbk_tx_check_gop(struct sk_buff *skb,
+@@ -1317,9 +1358,9 @@ static int netbk_tx_check_gop(struct sk_
+
+ gop->map = mop;
+ gop->copy = cop;
+- if ((void *)mop > (void *)cop && net_ratelimit())
+- netdev_warn(netif->dev, "Grant op check overrun (%p > %p)\n",
+- mop, cop);
++ if ((void *)mop > (void *)cop)
++ net_warn_ratelimited("%s: Grant op check overrun (%p > %p)\n",
++ netdev_name(netif->dev), mop, cop);
+ return err;
+ }
+
--- head.orig/drivers/xen/netfront/netfront.c 2013-05-10 14:37:51.000000000 +0200
+++ head/drivers/xen/netfront/netfront.c 2013-05-10 14:38:37.000000000 +0200
@@ -572,7 +572,7 @@ static void backend_changed(struct xenbu
@@ -3581,7 +3608,7 @@ Acked-by: jbeulich@suse.com
err = xenbus_scanf(XBT_NIL, "control",
"platform-feature-xs_reset_watches", "%d",
&supported);
---- head.orig/include/uapi/xen/evtchn.h 2013-06-20 14:56:41.000000000 +0200
+--- head.orig/include/uapi/xen/evtchn.h 2013-07-02 09:20:52.000000000 +0200
+++ head/include/uapi/xen/evtchn.h 2012-10-31 16:44:01.000000000 +0100
@@ -1,88 +1 @@
-/******************************************************************************
diff --git a/patches.xen/xen3-patch-3.9 b/patches.xen/xen3-patch-3.9
index 7c5bafc313..faf34921a9 100644
--- a/patches.xen/xen3-patch-3.9
+++ b/patches.xen/xen3-patch-3.9
@@ -44,7 +44,7 @@ Acked-by: jbeulich@suse.com
ALIGN
GLOBAL(stub32_clone)
---- head.orig/arch/x86/include/asm/efi.h 2013-06-20 14:56:41.000000000 +0200
+--- head.orig/arch/x86/include/asm/efi.h 2013-07-02 09:20:52.000000000 +0200
+++ head/arch/x86/include/asm/efi.h 2013-06-20 15:32:48.000000000 +0200
@@ -106,7 +106,11 @@ extern void efi_memory_uc(u64 addr, unsi
@@ -75,7 +75,7 @@ Acked-by: jbeulich@suse.com
static inline unsigned long __phys_addr_nodebug(unsigned long x)
{
---- head.orig/arch/x86/include/asm/processor.h 2013-06-20 14:56:41.000000000 +0200
+--- head.orig/arch/x86/include/asm/processor.h 2013-07-02 09:20:52.000000000 +0200
+++ head/arch/x86/include/asm/processor.h 2013-05-23 17:57:56.000000000 +0200
@@ -974,7 +974,7 @@ extern unsigned long arch_align_stack(un
extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
@@ -1629,7 +1629,7 @@ Acked-by: jbeulich@suse.com
-
start_kernel();
}
---- head.orig/arch/x86/kernel/head_64.S 2013-06-20 14:56:41.000000000 +0200
+--- head.orig/arch/x86/kernel/head_64.S 2013-07-02 09:20:52.000000000 +0200
+++ head/arch/x86/kernel/head_64.S 2013-06-04 13:59:45.000000000 +0200
@@ -465,7 +465,7 @@ NEXT_PAGE(early_dynamic_pgts)
@@ -1679,9 +1679,9 @@ Acked-by: jbeulich@suse.com
{
struct thread_struct *t = &current->thread;
unsigned int old = t->iopl >> 12;
---- head.orig/arch/x86/kernel/process.c 2013-06-20 14:56:41.000000000 +0200
-+++ head/arch/x86/kernel/process.c 2013-05-23 17:57:32.000000000 +0200
-@@ -329,7 +329,7 @@ void default_idle(void)
+--- head.orig/arch/x86/kernel/process.c 2013-07-02 09:20:52.000000000 +0200
++++ head/arch/x86/kernel/process.c 2013-07-02 09:49:24.000000000 +0200
+@@ -317,7 +317,7 @@ void default_idle(void)
EXPORT_SYMBOL(default_idle);
#endif
@@ -4534,7 +4534,7 @@ Acked-by: jbeulich@suse.com
void __meminit vmemmap_populate_print_last(void)
{
if (p_start) {
---- head.orig/arch/x86/mm/mm_internal.h 2013-06-20 14:56:41.000000000 +0200
+--- head.orig/arch/x86/mm/mm_internal.h 2013-07-02 09:20:52.000000000 +0200
+++ head/arch/x86/mm/mm_internal.h 2013-04-03 17:43:19.000000000 +0200
@@ -14,6 +14,8 @@ unsigned long kernel_physical_mapping_in
unsigned long page_size_mask);
@@ -5010,7 +5010,7 @@ Acked-by: jbeulich@suse.com
{ 0x36, ANY, 100000 }, /* Atom Cedar Trail/Cedarview (N2xxx, D2xxx) */
};
---- head.orig/drivers/misc/vmw_vmci/Kconfig 2013-06-20 14:56:41.000000000 +0200
+--- head.orig/drivers/misc/vmw_vmci/Kconfig 2013-07-02 09:20:52.000000000 +0200
+++ head/drivers/misc/vmw_vmci/Kconfig 2013-06-04 13:59:32.000000000 +0200
@@ -4,7 +4,7 @@
@@ -5056,7 +5056,7 @@ Acked-by: jbeulich@suse.com
void pci_msi_shutdown(struct pci_dev *dev)
{
int pirq, pos;
---- head.orig/drivers/thermal/Kconfig 2013-06-20 14:56:41.000000000 +0200
+--- head.orig/drivers/thermal/Kconfig 2013-07-02 09:20:52.000000000 +0200
+++ head/drivers/thermal/Kconfig 2013-05-23 17:57:14.000000000 +0200
@@ -162,7 +162,7 @@ config DB8500_CPUFREQ_COOLING
config INTEL_POWERCLAMP
@@ -5298,32 +5298,22 @@ Acked-by: jbeulich@suse.com
} u;
} __attribute__((__packed__));
#endif
---- head.orig/kernel/context_tracking.c 2013-06-20 14:56:41.000000000 +0200
-+++ head/kernel/context_tracking.c 2013-06-05 15:05:02.000000000 +0200
-@@ -15,6 +15,7 @@
- */
-
- #include <linux/context_tracking.h>
-+#include <linux/kconfig.h>
- #include <linux/rcupdate.h>
- #include <linux/sched.h>
- #include <linux/hardirq.h>
-@@ -103,6 +103,10 @@ void user_exit(void)
+--- head.orig/kernel/context_tracking.c 2013-07-02 09:20:52.000000000 +0200
++++ head/kernel/context_tracking.c 2013-07-02 09:49:19.000000000 +0200
+@@ -142,6 +142,8 @@ void user_exit(void)
local_irq_restore(flags);
}
-+#if IS_ENABLED(CONFIG_KVM)
-+
-+#include <linux/kvm_host.h>
++#ifndef CONFIG_XEN
+
void guest_enter(void)
{
if (vtime_accounting_enabled())
-@@ -121,6 +125,7 @@ void guest_exit(void)
+@@ -160,6 +162,7 @@ void guest_exit(void)
}
EXPORT_SYMBOL_GPL(guest_exit);
-+#endif
++#endif /* CONFIG_XEN */
/**
* context_tracking_task_switch - context switch the syscall callbacks
@@ -5539,3 +5529,45 @@ Acked-by: jbeulich@suse.com
mask = dma_get_seg_boundary(hwdev);
offset_slots = -IO_TLB_SEGSIZE;
+--- head.orig/include/xen/net-util.h 2013-05-24 14:47:36.000000000 +0200
++++ head/include/xen/net-util.h 2013-07-02 09:55:33.000000000 +0200
+@@ -6,6 +6,7 @@
+ #include <linux/tcp.h>
+ #include <linux/udp.h>
+ #include <net/ip.h>
++#include <net/flow_keys.h>
+
+ static inline int skb_checksum_setup(struct sk_buff *skb,
+ unsigned long *fixup_counter)
+@@ -15,6 +16,7 @@ static inline int skb_checksum_setup(str
+ __be16 *csum = NULL;
+ int err = -EPROTO;
+
++ skb_reset_network_header(skb);
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ /* A non-CHECKSUM_PARTIAL SKB does not require setup. */
+ if (!skb_is_gso(skb))
+@@ -37,6 +39,7 @@ static inline int skb_checksum_setup(str
+ if (th >= skb_tail_pointer(skb))
+ goto out;
+
++ skb_set_transport_header(skb, 4 * iph->ihl);
+ skb->csum_start = th - skb->head;
+ switch (iph->protocol) {
+ case IPPROTO_TCP:
+@@ -66,6 +69,15 @@ static inline int skb_checksum_setup(str
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ }
+
++ if (!skb_transport_header_was_set(skb)) {
++ struct flow_keys keys;
++
++ if (skb_flow_dissect(skb, &keys))
++ skb_set_transport_header(skb, keys.thoff);
++ else
++ skb_reset_transport_header(skb);
++ }
++
+ err = 0;
+ out:
+ return err;
diff --git a/series.conf b/series.conf
index e00f331756..ea032e4c57 100644
--- a/series.conf
+++ b/series.conf
@@ -602,7 +602,7 @@
patches.xen/xen3-patch-3.7
patches.xen/xen3-patch-3.8
patches.xen/xen3-patch-3.9
- patches.xen/xen3-patch-3.10-rc6
+ patches.xen/xen3-patch-3.10
# ports of other patches
patches.xen/xen3-010-acpi_initrd_override_tables.patch