Home Home > GIT Browse
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJiri Kosina <jkosina@suse.cz>2015-05-28 10:23:48 +0200
committerJiri Kosina <jkosina@suse.cz>2015-05-28 10:23:48 +0200
commitb348f9bdc7a2a9a609b34424f398c9e3e3dbf281 (patch)
tree3d83497fc08c341a2ff35a4fee195a2b92a644e2
parent5e89917557d37574422c5d40752d57b61fb423e9 (diff)
parent41e1045944873afa60129691aa710712db192d0f (diff)
Merge remote-tracking branch 'origin/users/bpetkov/SLE11-SP3/for-next' into SLE11-SP3
Pull fix for bsc#914987 from Borislav Petkov.
-rw-r--r--patches.arch/01-x86-mce-update-mce-severity-condition-check.patch51
-rw-r--r--patches.arch/02-x86-mce-fix-mce-regression-from-recent-cleanup.patch40
-rw-r--r--patches.arch/03-x86-mce-introduce-mce_gather_info.patch121
-rw-r--r--series.conf6
4 files changed, 217 insertions, 1 deletions
diff --git a/patches.arch/01-x86-mce-update-mce-severity-condition-check.patch b/patches.arch/01-x86-mce-update-mce-severity-condition-check.patch
new file mode 100644
index 0000000000..79ca91d220
--- /dev/null
+++ b/patches.arch/01-x86-mce-update-mce-severity-condition-check.patch
@@ -0,0 +1,51 @@
+From: Chen Gong <gong.chen@linux.intel.com>
+Date: Thu, 20 Jun 2013 05:16:12 -0400
+Subject: x86/mce: Update MCE severity condition check
+Git-commit: 33d7885b594e169256daef652e8d3527b2298e75
+Patch-mainline: v3.11-rc1
+References: bsc#914987
+
+Update some SRAR severity conditions check to make it clearer,
+according to latest Intel SDM Vol 3(June 2013), table 15-20.
+
+Signed-off-by: Chen Gong <gong.chen@linux.intel.com>
+Acked-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Acked-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/kernel/cpu/mcheck/mce-severity.c | 15 +++++----------
+ 1 file changed, 5 insertions(+), 10 deletions(-)
+
+diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
+index beb1f1689e52..e2703520d120 100644
+--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
++++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
+@@ -110,22 +110,17 @@ static struct severity {
+ /* known AR MCACODs: */
+ #ifdef CONFIG_MEMORY_FAILURE
+ MCESEV(
+- KEEP, "HT thread notices Action required: data load error",
+- SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
+- MCGMASK(MCG_STATUS_EIPV, 0)
++ KEEP, "Action required but unaffected thread is continuable",
++ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR),
++ MCGMASK(MCG_STATUS_RIPV, MCG_STATUS_RIPV)
+ ),
+ MCESEV(
+- AR, "Action required: data load error",
++ AR, "Action required: data load error in a user process",
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
+ USER
+ ),
+ MCESEV(
+- KEEP, "HT thread notices Action required: instruction fetch error",
+- SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
+- MCGMASK(MCG_STATUS_EIPV, 0)
+- ),
+- MCESEV(
+- AR, "Action required: instruction fetch error",
++ AR, "Action required: instruction fetch error in a user process",
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
+ USER
+ ),
+
diff --git a/patches.arch/02-x86-mce-fix-mce-regression-from-recent-cleanup.patch b/patches.arch/02-x86-mce-fix-mce-regression-from-recent-cleanup.patch
new file mode 100644
index 0000000000..7761150093
--- /dev/null
+++ b/patches.arch/02-x86-mce-fix-mce-regression-from-recent-cleanup.patch
@@ -0,0 +1,40 @@
+From: Tony Luck <tony.luck@intel.com>
+Date: Wed, 24 Jul 2013 10:09:43 -0700
+Subject: x86/mce: Fix mce regression from recent cleanup
+Git-commit: 1a7f0e3c4fff449f6dd08787beb98a8e57d8cdc7
+Patch-mainline: v3.11-rc4
+References: bsc#914987
+
+In commit 33d7885b594e169256daef652e8d3527b2298e75
+ x86/mce: Update MCE severity condition check
+
+We simplified the rules to recognise each classification of recoverable
+machine check combining the instruction and data fetch rules into a
+single entry based on clarifications in the June 2013 SDM that all
+recoverable events would be reported on the unaffected processor with
+MCG_STATUS.EIPV=0 and MCG_STATUS.RIPV=1. Unfortunately the simplified
+rule has a couple of bugs. Fix them here.
+
+Acked-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Acked-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/kernel/cpu/mcheck/mce-severity.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
+index e2703520d120..c370e1c4468b 100644
+--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
++++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
+@@ -111,8 +111,8 @@ static struct severity {
+ #ifdef CONFIG_MEMORY_FAILURE
+ MCESEV(
+ KEEP, "Action required but unaffected thread is continuable",
+- SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR),
+- MCGMASK(MCG_STATUS_RIPV, MCG_STATUS_RIPV)
++ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR, MCI_UC_SAR|MCI_ADDR),
++ MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, MCG_STATUS_RIPV)
+ ),
+ MCESEV(
+ AR, "Action required: data load error in a user process",
+
diff --git a/patches.arch/03-x86-mce-introduce-mce_gather_info.patch b/patches.arch/03-x86-mce-introduce-mce_gather_info.patch
new file mode 100644
index 0000000000..0059240a06
--- /dev/null
+++ b/patches.arch/03-x86-mce-introduce-mce_gather_info.patch
@@ -0,0 +1,121 @@
+From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
+Date: Wed, 8 Jun 2011 10:57:46 +0900
+Subject: x86, mce: Introduce mce_gather_info()
+Git-commit: b8325c5b110d7ff460b79588e7e9afdcc73d5c3c
+Patch-mainline: v3.1-rc1
+References: bsc#914987
+
+This patch introduces mce_gather_info() which is to be called at the
+beginning of error handling and gathers minimum error information from
+proper error registers (and saved registers).
+
+As the result of mce_get_rip() is integrated, unnecessary zeroing
+is removed. This also takes care of saving RIP which is required to
+make some decision about error severity for SRAR errors, instead of
+retrieving it later in the handler.
+
+Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
+Acked-by: Tony Luck <tony.luck@intel.com>
+Link: http://lkml.kernel.org/r/4DEED71A.1060906@jp.fujitsu.com
+Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
+Acked-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/kernel/cpu/mcheck/mce.c | 57 ++++++++++++++++++---------------------
+ 1 file changed, 27 insertions(+), 30 deletions(-)
+
+--- a/arch/x86/kernel/cpu/mcheck/mce.c
++++ b/arch/x86/kernel/cpu/mcheck/mce.c
+@@ -386,6 +386,31 @@ static int under_injection(void)
+ }
+
+ /*
++ * Collect all global (w.r.t. this processor) status about this machine
++ * check into our "mce" struct so that we can use it later to assess
++ * the severity of the problem as we read per-bank specific details.
++ */
++static inline void mce_gather_info(struct mce *m, struct pt_regs *regs)
++{
++ mce_setup(m);
++
++ m->mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
++ if (regs) {
++ /*
++ * Get the address of the instruction at the time of
++ * the machine check error.
++ */
++ if (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) {
++ m->ip = regs->ip;
++ m->cs = regs->cs;
++ }
++ /* Use accurate RIP reporting if available. */
++ if (rip_msr)
++ m->ip = mce_rdmsrl(rip_msr);
++ }
++}
++
++/*
+ * Simple lockless ring to communicate PFNs from the exception handler with the
+ * process context work function. This is vastly simplified because there's
+ * only a single reader and a single writer.
+@@ -456,31 +481,6 @@ static void mce_schedule_work(void)
+ }
+ }
+
+-/*
+- * Get the address of the instruction at the time of the machine check
+- * error.
+- */
+-static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
+-{
+-
+- if (regs && (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV))) {
+- m->ip = regs->ip;
+- m->cs = regs->cs;
+- /*
+- * When in VM86 mode make the cs look like ring 3
+- * always. This is a lie, but it's better than passing
+- * the additional vm86 bit around everywhere.
+- */
+- if (v8086_mode(regs))
+- m->cs |= 3;
+- } else {
+- m->ip = 0;
+- m->cs = 0;
+- }
+- if (rip_msr)
+- m->ip = mce_rdmsrl(rip_msr);
+-}
+-
+ #ifdef CONFIG_X86_LOCAL_APIC
+ /*
+ * Called after interrupts have been reenabled again
+@@ -582,9 +582,8 @@ void machine_check_poll(enum mcp_flags f
+
+ percpu_inc(mce_poll_count);
+
+- mce_setup(&m);
++ mce_gather_info(&m, NULL);
+
+- m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
+ for (i = 0; i < banks; i++) {
+ if (!mce_banks[i].ctl || !test_bit(i, *b))
+ continue;
+@@ -1036,9 +1035,8 @@ void do_machine_check(struct pt_regs *re
+ if (!banks)
+ goto out;
+
+- mce_setup(&m);
++ mce_gather_info(&m, regs);
+
+- m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
+ final = &__get_cpu_var(mces_seen);
+ *final = m;
+
+@@ -1089,7 +1087,6 @@ void do_machine_check(struct pt_regs *re
+ */
+ add_taint(TAINT_MACHINE_CHECK);
+
+- mce_get_rip(&m, regs);
+ severity = mce_severity(&m, tolerant, NULL);
+
+ /*
diff --git a/series.conf b/series.conf
index c402eb1e2d..27b7dde680 100644
--- a/series.conf
+++ b/series.conf
@@ -626,7 +626,6 @@
patches.arch/x86-mce-amd-Disable-error-thresholding-bank-4
patches.arch/x86-mce-amd-Hide-interrupt_enable-sysfs-node
-
########################################################
# x86_64/4096CPUS - from SGI
########################################################
@@ -16654,6 +16653,11 @@
patches.fixes/pci_aer_move_aer_severity_defines_to_aer.h.patch
patches.fixes/pci_aer_force_fatal_when_component_has_been_reset.patch
+ # bsc#914987 - SRAR error injection
+ patches.arch/01-x86-mce-update-mce-severity-condition-check.patch
+ patches.arch/02-x86-mce-fix-mce-regression-from-recent-cleanup.patch
+ patches.arch/03-x86-mce-introduce-mce_gather_info.patch
+
# Check the EFI variable and the space usage more stringent.
# bnc#806499 and fixing some Samsung machines
patches.drivers/0001-efi_pstore-Check-remaining-space-with-QueryVariableI.patch