Home Home > GIT Browse
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKernel Build Daemon <kbuild@suse.de>2018-05-22 07:17:22 +0200
committerKernel Build Daemon <kbuild@suse.de>2018-05-22 07:17:22 +0200
commit442a52ced261db7380bdb550b239d98565b76459 (patch)
tree8dc00d15aee1b04cdfb2ad25309601d80dd566b1
parent5edfd137d6bdbe7a15392da3da04bd0d5a3ca908 (diff)
parentf7f97b6371a44b21ecd0772c50fab38297e68dc8 (diff)
Merge branch 'SLE15' into openSUSE-15.0
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-cpu1
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt45
-rw-r--r--Documentation/userspace-api/spec_ctrl.rst94
-rw-r--r--arch/powerpc/include/asm/exception-64s.h29
-rw-r--r--arch/powerpc/include/asm/feature-fixups.h19
-rw-r--r--arch/powerpc/include/asm/setup.h12
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S19
-rw-r--r--arch/powerpc/kernel/setup_64.c119
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S14
-rw-r--r--arch/powerpc/lib/feature-fixups.c114
-rw-r--r--arch/powerpc/platforms/powernv/setup.c1
-rw-r--r--arch/powerpc/platforms/pseries/setup.c1
-rw-r--r--arch/x86/include/asm/cpufeatures.h4
-rw-r--r--arch/x86/include/asm/msr-index.h7
-rw-r--r--arch/x86/include/asm/nospec-branch.h51
-rw-r--r--arch/x86/include/asm/spec-ctrl.h38
-rw-r--r--arch/x86/include/asm/thread_info.h4
-rw-r--r--arch/x86/kernel/cpu/amd.c26
-rw-r--r--arch/x86/kernel/cpu/bugs.c381
-rw-r--r--arch/x86/kernel/cpu/common.c54
-rw-r--r--arch/x86/kernel/cpu/cpu.h2
-rw-r--r--arch/x86/kernel/cpu/intel.c1
-rw-r--r--arch/x86/kernel/process.c22
-rw-r--r--arch/x86/kvm/cpuid.c2
-rw-r--r--arch/x86/kvm/svm.c32
-rw-r--r--arch/x86/kvm/vmx.c16
-rw-r--r--drivers/base/cpu.c8
-rw-r--r--fs/proc/array.c25
-rw-r--r--include/linux/bpf_verifier.h3
-rw-r--r--include/linux/cpu.h2
-rw-r--r--include/linux/nospec.h10
-rw-r--r--include/linux/sched.h10
-rw-r--r--include/linux/seccomp.h3
-rw-r--r--include/uapi/linux/prctl.h12
-rw-r--r--include/uapi/linux/seccomp.h3
-rw-r--r--kernel/bpf/verifier.c59
-rw-r--r--kernel/seccomp.c21
-rw-r--r--kernel/sys.c23
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c6
39 files changed, 1205 insertions, 88 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 258902db14bf..8355e79350b7 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -378,6 +378,7 @@ What: /sys/devices/system/cpu/vulnerabilities
/sys/devices/system/cpu/vulnerabilities/meltdown
/sys/devices/system/cpu/vulnerabilities/spectre_v1
/sys/devices/system/cpu/vulnerabilities/spectre_v2
+ /sys/devices/system/cpu/vulnerabilities/spec_store_bypass
Date: January 2018
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description: Information about CPU vulnerabilities
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 2fce2a81eadc..e28a6dbce1dc 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2591,6 +2591,9 @@
allow data leaks with this option, which is equivalent
to spectre_v2=off.
+ nospec_store_bypass_disable
+ [HW] Disable all mitigations for the Speculative Store Bypass vulnerability
+
noxsave [BUGS=X86] Disables x86 extended register state save
and restore using xsave. The kernel will fallback to
enabling legacy floating-point and sse state.
@@ -3904,6 +3907,48 @@
Not specifying this option is equivalent to
spectre_v2=auto.
+ spec_store_bypass_disable=
+ [HW] Control Speculative Store Bypass (SSB) Disable mitigation
+ (Speculative Store Bypass vulnerability)
+
+ Certain CPUs are vulnerable to an exploit against a
+ a common industry wide performance optimization known
+ as "Speculative Store Bypass" in which recent stores
+ to the same memory location may not be observed by
+ later loads during speculative execution. The idea
+ is that such stores are unlikely and that they can
+ be detected prior to instruction retirement at the
+ end of a particular speculation execution window.
+
+ In vulnerable processors, the speculatively forwarded
+ store can be used in a cache side channel attack, for
+ example to read memory to which the attacker does not
+ directly have access (e.g. inside sandboxed code).
+
+ This parameter controls whether the Speculative Store
+ Bypass optimization is used.
+
+ on - Unconditionally disable Speculative Store Bypass
+ off - Unconditionally enable Speculative Store Bypass
+ auto - Kernel detects whether the CPU model contains an
+ implementation of Speculative Store Bypass and
+ picks the most appropriate mitigation. If the
+ CPU is not vulnerable, "off" is selected. If the
+ CPU is vulnerable the default mitigation is
+ architecture and Kconfig dependent. See below.
+ prctl - Control Speculative Store Bypass per thread
+ via prctl. Speculative Store Bypass is enabled
+ for a process by default. The state of the control
+ is inherited on fork.
+ seccomp - Same as "prctl" above, but all seccomp threads
+ will disable SSB unless they explicitly opt out.
+
+ Not specifying this option is equivalent to
+ spec_store_bypass_disable=auto.
+
+ Default mitigations:
+ X86: If CONFIG_SECCOMP=y "seccomp", otherwise "prctl"
+
spia_io_base= [HW,MTD]
spia_fio_base=
spia_pedr=
diff --git a/Documentation/userspace-api/spec_ctrl.rst b/Documentation/userspace-api/spec_ctrl.rst
new file mode 100644
index 000000000000..1b3690d30943
--- /dev/null
+++ b/Documentation/userspace-api/spec_ctrl.rst
@@ -0,0 +1,94 @@
+===================
+Speculation Control
+===================
+
+Quite some CPUs have speculation related misfeatures which are in fact
+vulnerabilites causing data leaks in various forms even accross privilege
+domains.
+
+The kernel provides mitigation for such vulnerabilities in various
+forms. Some of these mitigations are compile time configurable and some on
+the kernel command line.
+
+There is also a class of mitigations which are very expensive, but they can
+be restricted to a certain set of processes or tasks in controlled
+environments. The mechanism to control these mitigations is via
+:manpage:`prctl(2)`.
+
+There are two prctl options which are related to this:
+
+ * PR_GET_SPECULATION_CTRL
+
+ * PR_SET_SPECULATION_CTRL
+
+PR_GET_SPECULATION_CTRL
+-----------------------
+
+PR_GET_SPECULATION_CTRL returns the state of the speculation misfeature
+which is selected with arg2 of prctl(2). The return value uses bits 0-3 with
+the following meaning:
+
+==== ===================== ===================================================
+Bit Define Description
+==== ===================== ===================================================
+0 PR_SPEC_PRCTL Mitigation can be controlled per task by
+ PR_SET_SPECULATION_CTRL
+1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is
+ disabled
+2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is
+ enabled
+3 PR_SPEC_FORCE_DISABLE Same as PR_SPEC_DISABLE, but cannot be undone. A
+ subsequent prctl(..., PR_SPEC_ENABLE) will fail.
+==== ===================== ===================================================
+
+If all bits are 0 the CPU is not affected by the speculation misfeature.
+
+If PR_SPEC_PRCTL is set, then the per task control of the mitigation is
+available. If not set, prctl(PR_SET_SPECULATION_CTRL) for the speculation
+misfeature will fail.
+
+PR_SET_SPECULATION_CTRL
+-----------------------
+
+PR_SET_SPECULATION_CTRL allows to control the speculation misfeature, which
+is selected by arg2 of :manpage:`prctl(2)` per task. arg3 is used to hand
+in the control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE or
+PR_SPEC_FORCE_DISABLE.
+
+Common error codes
+------------------
+======= =================================================================
+Value Meaning
+======= =================================================================
+EINVAL The prctl is not implemented by the architecture or unused
+ prctl(2) arguments are not 0
+
+ENODEV arg2 is selecting a not supported speculation misfeature
+======= =================================================================
+
+PR_SET_SPECULATION_CTRL error codes
+-----------------------------------
+======= =================================================================
+Value Meaning
+======= =================================================================
+0 Success
+
+ERANGE arg3 is incorrect, i.e. it's neither PR_SPEC_ENABLE nor
+ PR_SPEC_DISABLE nor PR_SPEC_FORCE_DISABLE
+
+ENXIO Control of the selected speculation misfeature is not possible.
+ See PR_GET_SPECULATION_CTRL.
+
+EPERM Speculation was disabled with PR_SPEC_FORCE_DISABLE and caller
+ tried to enable it again.
+======= =================================================================
+
+Speculation misfeature controls
+-------------------------------
+- PR_SPEC_STORE_BYPASS: Speculative Store Bypass
+
+ Invocations:
+ * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, 0, 0, 0);
+ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0);
+ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0);
+ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0);
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 74419070d605..d838b2ceebe1 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -54,6 +54,27 @@
#define EX_SIZE 13 /* size in u64 units */
+#define STF_ENTRY_BARRIER_SLOT \
+ STF_ENTRY_BARRIER_FIXUP_SECTION; \
+ mflr r10; \
+ bl stf_barrier_fallback; \
+ mtlr r10
+
+#define STF_EXIT_BARRIER_SLOT \
+ STF_EXIT_BARRIER_FIXUP_SECTION; \
+ nop; \
+ nop; \
+ nop; \
+ nop; \
+ nop; \
+ nop
+
+/*
+ * r10 must be free to use, r13 must be paca
+ */
+#define INTERRUPT_TO_KERNEL \
+ STF_ENTRY_BARRIER_SLOT
+
/*
* Macros for annotating the expected destination of (h)rfid
*
@@ -70,16 +91,19 @@
rfid
#define RFI_TO_USER \
+ STF_EXIT_BARRIER_SLOT; \
RFI_FLUSH_SLOT; \
rfid; \
b rfi_flush_fallback
#define RFI_TO_USER_OR_KERNEL \
+ STF_EXIT_BARRIER_SLOT; \
RFI_FLUSH_SLOT; \
rfid; \
b rfi_flush_fallback
#define RFI_TO_GUEST \
+ STF_EXIT_BARRIER_SLOT; \
RFI_FLUSH_SLOT; \
rfid; \
b rfi_flush_fallback
@@ -88,21 +112,25 @@
hrfid
#define HRFI_TO_USER \
+ STF_EXIT_BARRIER_SLOT; \
RFI_FLUSH_SLOT; \
hrfid; \
b hrfi_flush_fallback
#define HRFI_TO_USER_OR_KERNEL \
+ STF_EXIT_BARRIER_SLOT; \
RFI_FLUSH_SLOT; \
hrfid; \
b hrfi_flush_fallback
#define HRFI_TO_GUEST \
+ STF_EXIT_BARRIER_SLOT; \
RFI_FLUSH_SLOT; \
hrfid; \
b hrfi_flush_fallback
#define HRFI_TO_UNKNOWN \
+ STF_EXIT_BARRIER_SLOT; \
RFI_FLUSH_SLOT; \
hrfid; \
b hrfi_flush_fallback
@@ -234,6 +262,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
#define __EXCEPTION_PROLOG_1(area, extra, vec) \
OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR); \
OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR); \
+ INTERRUPT_TO_KERNEL; \
SAVE_CTR(r10, area); \
mfcr r9; \
extra(vec); \
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index dcac380656e9..861972a7031e 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -187,6 +187,22 @@ label##3: \
FTR_ENTRY_OFFSET label##1b-label##3b; \
.popsection;
+#define STF_ENTRY_BARRIER_FIXUP_SECTION \
+953: \
+ .pushsection __stf_entry_barrier_fixup,"a"; \
+ .align 2; \
+954: \
+ FTR_ENTRY_OFFSET 953b-954b; \
+ .popsection;
+
+#define STF_EXIT_BARRIER_FIXUP_SECTION \
+955: \
+ .pushsection __stf_exit_barrier_fixup,"a"; \
+ .align 2; \
+956: \
+ FTR_ENTRY_OFFSET 955b-956b; \
+ .popsection;
+
#define RFI_FLUSH_FIXUP_SECTION \
951: \
.pushsection __rfi_flush_fixup,"a"; \
@@ -207,6 +223,9 @@ label##3: \
#ifndef __ASSEMBLY__
#include <linux/types.h>
+extern long stf_barrier_fallback;
+extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup;
+extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup;
extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup;
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index addb0672b1f3..26f0cbecf1a2 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -38,9 +38,17 @@ static inline void pseries_big_endian_exceptions(void) {}
static inline void pseries_little_endian_exceptions(void) {}
#endif /* CONFIG_PPC_PSERIES */
-void rfi_flush_enable(bool enable);
-
/* These are bit flags */
+enum stf_barrier_type {
+ STF_BARRIER_NONE = 0x1,
+ STF_BARRIER_FALLBACK = 0x2,
+ STF_BARRIER_EIEIO = 0x4,
+ STF_BARRIER_SYNC_ORI = 0x8,
+};
+
+void setup_stf_barrier(void);
+void do_stf_barrier_fixups(enum stf_barrier_type types);
+
enum l1d_flush_type {
L1D_FLUSH_NONE = 0x1,
L1D_FLUSH_FALLBACK = 0x2,
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 40007ebf0fb1..02c78571198c 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -837,7 +837,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
#endif
-EXC_REAL_MASKABLE(decrementer, 0x900, 0x80)
+EXC_REAL_OOL_MASKABLE(decrementer, 0x900, 0x80)
EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900)
TRAMP_KVM(PACA_EXGEN, 0x900)
EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt)
@@ -913,6 +913,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
mtctr r13; \
GET_PACA(r13); \
std r10,PACA_EXGEN+EX_R10(r13); \
+ INTERRUPT_TO_KERNEL; \
KVMTEST_PR(0xc00); /* uses r10, branch to do_kvm_0xc00_system_call */ \
HMT_MEDIUM; \
mfctr r9;
@@ -921,7 +922,8 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
#define SYSCALL_KVMTEST \
HMT_MEDIUM; \
mr r9,r13; \
- GET_PACA(r13);
+ GET_PACA(r13); \
+ INTERRUPT_TO_KERNEL;
#endif
#define LOAD_SYSCALL_HANDLER(reg) \
@@ -1423,6 +1425,19 @@ masked_##_H##interrupt: \
##_H##RFI_TO_KERNEL; \
b .
+TRAMP_REAL_BEGIN(stf_barrier_fallback)
+ std r9,PACA_EXRFI+EX_R9(r13)
+ std r10,PACA_EXRFI+EX_R10(r13)
+ sync
+ ld r9,PACA_EXRFI+EX_R9(r13)
+ ld r10,PACA_EXRFI+EX_R10(r13)
+ ori 31,31,0
+ .rept 14
+ b 1f
+1:
+ .endr
+ blr
+
TRAMP_REAL_BEGIN(rfi_flush_fallback)
SET_SCRATCH0(r13);
GET_PACA(r13);
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 6dfb5fc15513..307b347e8da0 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -69,6 +69,7 @@
#include <asm/livepatch.h>
#include <asm/opal.h>
#include <asm/cputhreads.h>
+#include <asm/security_features.h>
#ifdef DEBUG
#define DBG(fmt...) udbg_printf(fmt)
@@ -784,11 +785,48 @@ early_initcall(disable_hardlockup_detector);
#endif
#ifdef CONFIG_PPC_BOOK3S_64
-static enum l1d_flush_type enabled_flush_types;
+static enum stf_barrier_type stf_enabled_flush_types;
+static bool no_stf_barrier;
+bool stf_barrier;
+
+static enum l1d_flush_type rfi_enabled_flush_types;
static void *l1d_flush_fallback_area;
static bool no_rfi_flush;
bool rfi_flush;
+static int __init handle_no_stf_barrier(char *p)
+{
+ pr_info("stf-barrier: disabled on command line.");
+ no_stf_barrier = true;
+ return 0;
+}
+
+early_param("no_stf_barrier", handle_no_stf_barrier);
+
+/* This is the generic flag used by other architectures */
+static int __init handle_ssbd(char *p)
+{
+ if (!p || strncmp(p, "off", 3) == 0) {
+ handle_no_stf_barrier(NULL);
+ return 0;
+ } else if (strncmp(p, "auto", 5) == 0 || strncmp(p, "on", 2) == 0 )
+ /* Until firmware tells us, we have the barrier with auto */
+ return 0;
+ else
+ return 1;
+
+ return 0;
+}
+early_param("spec_store_bypass_disable", handle_ssbd);
+
+/* This is the generic flag used by other architectures */
+static int __init handle_no_ssbd(char *p)
+{
+ handle_no_stf_barrier(NULL);
+ return 0;
+}
+early_param("nospec_store_bypass_disable", handle_no_ssbd);
+
static int __init handle_no_rfi_flush(char *p)
{
pr_info("rfi-flush: disabled on command line.");
@@ -817,10 +855,21 @@ static void do_nothing(void *unused)
*/
}
-void rfi_flush_enable(bool enable)
+static void stf_barrier_enable(bool enable)
{
if (enable) {
- do_rfi_flush_fixups(enabled_flush_types);
+ do_stf_barrier_fixups(stf_enabled_flush_types);
+ on_each_cpu(do_nothing, NULL, 1);
+ } else
+ do_stf_barrier_fixups(STF_BARRIER_NONE);
+
+ stf_barrier = enable;
+}
+
+static void rfi_flush_enable(bool enable)
+{
+ if (enable) {
+ do_rfi_flush_fixups(rfi_enabled_flush_types);
on_each_cpu(do_nothing, NULL, 1);
} else
do_rfi_flush_fixups(L1D_FLUSH_NONE);
@@ -828,6 +877,41 @@ void rfi_flush_enable(bool enable)
rfi_flush = enable;
}
+void setup_stf_barrier(void)
+{
+ enum stf_barrier_type type;
+ bool enable, hv;
+
+ hv = cpu_has_feature(CPU_FTR_HVMODE);
+
+ /* Default to fallback in case fw-features are not available */
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ type = STF_BARRIER_EIEIO;
+ else if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ type = STF_BARRIER_SYNC_ORI;
+ else if (cpu_has_feature(CPU_FTR_ARCH_206))
+ type = STF_BARRIER_FALLBACK;
+ else
+ type = STF_BARRIER_NONE;
+
+ enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+ (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) ||
+ (security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && hv));
+
+ if (type == STF_BARRIER_FALLBACK) {
+ pr_info("stf-barrier: fallback barrier available\n");
+ } else if (type == STF_BARRIER_SYNC_ORI) {
+ pr_info("stf-barrier: hwsync barrier available\n");
+ } else if (type == STF_BARRIER_EIEIO) {
+ pr_info("stf-barrier: eieio barrier available\n");
+ }
+
+ stf_enabled_flush_types = type;
+
+ if (!no_stf_barrier)
+ stf_barrier_enable(enable);
+}
+
static void init_fallback_flush(void)
{
u64 l1d_size, limit;
@@ -867,13 +951,39 @@ void setup_rfi_flush(enum l1d_flush_type types, bool enable)
if (types & L1D_FLUSH_MTTRIG)
pr_info("rfi-flush: mttrig type flush available\n");
- enabled_flush_types = types;
+ rfi_enabled_flush_types = types;
if (!no_rfi_flush)
rfi_flush_enable(enable);
}
#ifdef CONFIG_DEBUG_FS
+static int stf_barrier_set(void *data, u64 val)
+{
+ bool enable;
+
+ if (val == 1)
+ enable = true;
+ else if (val == 0)
+ enable = false;
+ else
+ return -EINVAL;
+
+ /* Only do anything if we're changing state */
+ if (enable != stf_barrier)
+ stf_barrier_enable(enable);
+
+ return 0;
+}
+
+static int stf_barrier_get(void *data, u64 *val)
+{
+ *val = stf_barrier ? 1 : 0;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_stf_barrier, stf_barrier_get, stf_barrier_set, "%llu\n");
+
static int rfi_flush_set(void *data, u64 val)
{
bool enable;
@@ -903,6 +1013,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n");
static __init int rfi_flush_debugfs_init(void)
{
debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush);
+ debugfs_create_file("stf_barrier", 0600, powerpc_debugfs_root, NULL, &fops_stf_barrier);
return 0;
}
device_initcall(rfi_flush_debugfs_init);
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index fdd7e1bc3811..7219d83917e0 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -136,6 +136,20 @@ SECTIONS
#ifdef CONFIG_PPC64
. = ALIGN(8);
+ __stf_entry_barrier_fixup : AT(ADDR(__stf_entry_barrier_fixup) - LOAD_OFFSET) {
+ __start___stf_entry_barrier_fixup = .;
+ *(__stf_entry_barrier_fixup)
+ __stop___stf_entry_barrier_fixup = .;
+ }
+
+ . = ALIGN(8);
+ __stf_exit_barrier_fixup : AT(ADDR(__stf_exit_barrier_fixup) - LOAD_OFFSET) {
+ __start___stf_exit_barrier_fixup = .;
+ *(__stf_exit_barrier_fixup)
+ __stop___stf_exit_barrier_fixup = .;
+ }
+
+ . = ALIGN(8);
__rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) {
__start___rfi_flush_fixup = .;
*(__rfi_flush_fixup)
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 465fb6cbd981..c0f53a599f36 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -117,6 +117,120 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
}
#ifdef CONFIG_PPC_BOOK3S_64
+void do_stf_entry_barrier_fixups(enum stf_barrier_type types)
+{
+ unsigned int instrs[3], *dest;
+ long *start, *end;
+ int i;
+
+ start = PTRRELOC(&__start___stf_entry_barrier_fixup),
+ end = PTRRELOC(&__stop___stf_entry_barrier_fixup);
+
+ instrs[0] = 0x60000000; /* nop */
+ instrs[1] = 0x60000000; /* nop */
+ instrs[2] = 0x60000000; /* nop */
+
+ i = 0;
+ if (types & STF_BARRIER_FALLBACK) {
+ instrs[i++] = 0x7d4802a6; /* mflr r10 */
+ instrs[i++] = 0x60000000; /* branch patched below */
+ instrs[i++] = 0x7d4803a6; /* mtlr r10 */
+ } else if (types & STF_BARRIER_EIEIO) {
+ instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */
+ } else if (types & STF_BARRIER_SYNC_ORI) {
+ instrs[i++] = 0x7c0004ac; /* hwsync */
+ instrs[i++] = 0xe94d0000; /* ld r10,0(r13) */
+ instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
+ }
+
+ for (i = 0; start < end; start++, i++) {
+ dest = (void *)start + *start;
+
+ pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+ patch_instruction(dest, instrs[0]);
+
+ if (types & STF_BARRIER_FALLBACK)
+ patch_branch(dest + 1, (unsigned long)&stf_barrier_fallback,
+ BRANCH_SET_LINK);
+ else
+ patch_instruction(dest + 1, instrs[1]);
+
+ patch_instruction(dest + 2, instrs[2]);
+ }
+
+ printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i,
+ (types == STF_BARRIER_NONE) ? "no" :
+ (types == STF_BARRIER_FALLBACK) ? "fallback" :
+ (types == STF_BARRIER_EIEIO) ? "eieio" :
+ (types == (STF_BARRIER_SYNC_ORI)) ? "hwsync"
+ : "unknown");
+}
+
+void do_stf_exit_barrier_fixups(enum stf_barrier_type types)
+{
+ unsigned int instrs[6], *dest;
+ long *start, *end;
+ int i;
+
+ start = PTRRELOC(&__start___stf_exit_barrier_fixup),
+ end = PTRRELOC(&__stop___stf_exit_barrier_fixup);
+
+ instrs[0] = 0x60000000; /* nop */
+ instrs[1] = 0x60000000; /* nop */
+ instrs[2] = 0x60000000; /* nop */
+ instrs[3] = 0x60000000; /* nop */
+ instrs[4] = 0x60000000; /* nop */
+ instrs[5] = 0x60000000; /* nop */
+
+ i = 0;
+ if (types & STF_BARRIER_FALLBACK || types & STF_BARRIER_SYNC_ORI) {
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ instrs[i++] = 0x7db14ba6; /* mtspr 0x131, r13 (HSPRG1) */
+ instrs[i++] = 0x7db04aa6; /* mfspr r13, 0x130 (HSPRG0) */
+ } else {
+ instrs[i++] = 0x7db243a6; /* mtsprg 2,r13 */
+ instrs[i++] = 0x7db142a6; /* mfsprg r13,1 */
+ }
+ instrs[i++] = 0x7c0004ac; /* hwsync */
+ instrs[i++] = 0xe9ad0000; /* ld r13,0(r13) */
+ instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ instrs[i++] = 0x7db14aa6; /* mfspr r13, 0x131 (HSPRG1) */
+ } else {
+ instrs[i++] = 0x7db242a6; /* mfsprg r13,2 */
+ }
+ } else if (types & STF_BARRIER_EIEIO) {
+ instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */
+ }
+
+ for (i = 0; start < end; start++, i++) {
+ dest = (void *)start + *start;
+
+ pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+ patch_instruction(dest, instrs[0]);
+ patch_instruction(dest + 1, instrs[1]);
+ patch_instruction(dest + 2, instrs[2]);
+ patch_instruction(dest + 3, instrs[3]);
+ patch_instruction(dest + 4, instrs[4]);
+ patch_instruction(dest + 5, instrs[5]);
+ }
+ printk(KERN_DEBUG "stf-barrier: patched %d exit locations (%s barrier)\n", i,
+ (types == STF_BARRIER_NONE) ? "no" :
+ (types == STF_BARRIER_FALLBACK) ? "fallback" :
+ (types == STF_BARRIER_EIEIO) ? "eieio" :
+ (types == (STF_BARRIER_SYNC_ORI)) ? "hwsync"
+ : "unknown");
+}
+
+
+void do_stf_barrier_fixups(enum stf_barrier_type types)
+{
+ do_stf_entry_barrier_fixups(types);
+ do_stf_exit_barrier_fixups(types);
+}
+
void do_rfi_flush_fixups(enum l1d_flush_type types)
{
unsigned int instrs[3], *dest;
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index dbced412fd7f..0f4a487282d0 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -131,6 +131,7 @@ static void __init pnv_setup_arch(void)
{
set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
+ setup_stf_barrier();
pnv_setup_rfi_flush();
/* Initialize SMP */
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 15b63bb5ec52..16d26cde2beb 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -711,6 +711,7 @@ static void __init pSeries_setup_arch(void)
fwnmi_init();
pseries_setup_rfi_flush();
+ setup_stf_barrier();
/* By default, only probe PCI (can be overridden by rtas_pci) */
pci_add_flags(PCI_PROBE_ONLY);
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index a67b11df51db..60e6c8effa4e 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -215,6 +215,8 @@
#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
#define X86_FEATURE_USE_IBRS ( 7*32+23) /* "" Use IBRS for Spectre v2 safety */
+#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+24) /* "" Disable Speculative Store Bypass. */
+#define X86_FEATURE_AMD_SSBD (7*32+25) /* "" AMD SSBD implementation */
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
@@ -331,6 +333,7 @@
#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
+#define X86_FEATURE_SSBD (18*32+31) /* Speculative Store Bypass Disable */
/*
* BUG word(s)
@@ -360,4 +363,5 @@
#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
+#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index b2c60697f9cf..4a7932ed1ca2 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -41,6 +41,8 @@
#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */
#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */
#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */
+#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
+#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */
@@ -67,6 +69,11 @@
#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */
#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */
+#define ARCH_CAP_SSBD_NO (1 << 4) /*
+ * Not susceptible to Speculative Store Bypass
+ * attack, so no Speculative Store Bypass
+ * control required.
+ */
#define MSR_IA32_BBL_CR_CTL 0x00000119
#define MSR_IA32_BBL_CR_CTL3 0x0000011e
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 55660055fce7..e1bbcb9e5c69 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -214,6 +214,25 @@ enum spectre_v2_mitigation {
SPECTRE_V2_IBRS,
};
+/*
+ * The Intel specification for the SPEC_CTRL MSR requires that we
+ * preserve any already set reserved bits at boot time (e.g. for
+ * future additions that this kernel is not currently aware of).
+ * We then set any additional mitigation bits that we want
+ * ourselves and always use this as the base for SPEC_CTRL.
+ * We also use this when handling guest entry/exit as below.
+ */
+extern void x86_spec_ctrl_set(u64);
+extern u64 x86_spec_ctrl_get_default(void);
+
+/* The Speculative Store Bypass disable variants */
+enum ssb_mitigation {
+ SPEC_STORE_BYPASS_NONE,
+ SPEC_STORE_BYPASS_DISABLE,
+ SPEC_STORE_BYPASS_PRCTL,
+ SPEC_STORE_BYPASS_SECCOMP,
+};
+
extern char __indirect_thunk_start[];
extern char __indirect_thunk_end[];
@@ -236,20 +255,22 @@ static inline void vmexit_fill_RSB(void)
: : "memory" );
}
-#define alternative_msr_write(_msr, _val, _feature) \
- asm volatile(ALTERNATIVE("", \
- "movl %[msr], %%ecx\n\t" \
- "movl %[val], %%eax\n\t" \
- "movl $0, %%edx\n\t" \
- "wrmsr", \
- _feature) \
- : : [msr] "i" (_msr), [val] "i" (_val) \
- : "eax", "ecx", "edx", "memory")
+static __always_inline
+void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
+{
+ asm volatile(ALTERNATIVE("", "wrmsr", %c[feature])
+ : : "c" (msr),
+ "a" (val),
+ "d" (val >> 32),
+ [feature] "i" (feature)
+ : "memory");
+}
static inline void indirect_branch_prediction_barrier(void)
{
- alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB,
- X86_FEATURE_USE_IBPB);
+ u64 val = PRED_CMD_IBPB;
+
+ alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB);
}
/*
@@ -296,14 +317,18 @@ static inline void unrestrict_branch_speculation(void)
*/
#define firmware_restrict_branch_speculation_start() \
do { \
+ u64 val = x86_spec_ctrl_get_default() | SPEC_CTRL_IBRS; \
+ \
preempt_disable(); \
- alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, \
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
X86_FEATURE_USE_IBRS_FW); \
} while (0)
#define firmware_restrict_branch_speculation_end() \
do { \
- alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, \
+ u64 val = x86_spec_ctrl_get_default(); \
+ \
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
X86_FEATURE_USE_IBRS_FW); \
preempt_enable(); \
} while (0)
diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
new file mode 100644
index 000000000000..dc21209790bf
--- /dev/null
+++ b/arch/x86/include/asm/spec-ctrl.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SPECCTRL_H_
+#define _ASM_X86_SPECCTRL_H_
+
+#include <linux/thread_info.h>
+#include <asm/nospec-branch.h>
+
+/*
+ * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR
+ * the guest has, while on VMEXIT we restore the host view. This
+ * would be easier if SPEC_CTRL were architecturally maskable or
+ * shadowable for guests but this is not (currently) the case.
+ * Takes the guest view of SPEC_CTRL MSR as a parameter.
+ */
+extern void x86_spec_ctrl_set_guest(u64);
+extern void x86_spec_ctrl_restore_host(u64);
+
+/* AMD specific Speculative Store Bypass MSR data */
+extern u64 x86_amd_ls_cfg_base;
+extern u64 x86_amd_ls_cfg_ssbd_mask;
+
+/* The Intel SPEC CTRL MSR base value cache */
+extern u64 x86_spec_ctrl_base;
+
+static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn)
+{
+ BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT);
+ return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
+}
+
+static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn)
+{
+ return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL;
+}
+
+extern void speculative_store_bypass_update(void);
+
+#endif
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index cf95135981aa..722715d40a7e 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -80,6 +80,7 @@ struct thread_info {
#define TIF_SIGPENDING 2 /* signal pending */
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
+#define TIF_SSBD 5 /* Reduced data speculation */
#define TIF_SYSCALL_EMU 6 /* syscall emulation active */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SECCOMP 8 /* secure computing */
@@ -105,6 +106,7 @@ struct thread_info {
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
+#define _TIF_SSBD (1 << TIF_SSBD)
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
@@ -142,7 +144,7 @@ struct thread_info {
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW \
- (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP)
+ (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD)
#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 013b0e88878e..e1ba8011dcba 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -10,6 +10,7 @@
#include <asm/processor.h>
#include <asm/apic.h>
#include <asm/cpu.h>
+#include <asm/spec-ctrl.h>
#include <asm/smp.h>
#include <asm/pci-direct.h>
#include <asm/delay.h>
@@ -553,6 +554,26 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
rdmsrl(MSR_FAM10H_NODE_ID, value);
nodes_per_socket = ((value >> 3) & 7) + 1;
}
+
+ if (c->x86 >= 0x15 && c->x86 <= 0x17) {
+ unsigned int bit;
+
+ switch (c->x86) {
+ case 0x15: bit = 54; break;
+ case 0x16: bit = 33; break;
+ case 0x17: bit = 10; break;
+ default: return;
+ }
+ /*
+ * Try to cache the base value so further operations can
+ * avoid RMW. If that faults, do not enable SSBD.
+ */
+ if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) {
+ setup_force_cpu_cap(X86_FEATURE_SSBD);
+ setup_force_cpu_cap(X86_FEATURE_AMD_SSBD);
+ x86_amd_ls_cfg_ssbd_mask = 1ULL << bit;
+ }
+ }
}
static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
@@ -886,6 +907,11 @@ static void init_amd(struct cpuinfo_x86 *c)
/* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */
if (!cpu_has(c, X86_FEATURE_XENPV))
set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
+
+ if (boot_cpu_has(X86_FEATURE_AMD_SSBD)) {
+ set_cpu_cap(c, X86_FEATURE_SSBD);
+ set_cpu_cap(c, X86_FEATURE_AMD_SSBD);
+ }
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 803a0ca79168..7271cc15fb05 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -11,8 +11,10 @@
#include <linux/utsname.h>
#include <linux/cpu.h>
#include <linux/module.h>
+#include <linux/nospec.h>
+#include <linux/prctl.h>
-#include <asm/nospec-branch.h>
+#include <asm/spec-ctrl.h>
#include <asm/cmdline.h>
#include <asm/bugs.h>
#include <asm/processor.h>
@@ -26,6 +28,26 @@
#include <asm/intel-family.h>
static void __init spectre_v2_select_mitigation(void);
+static void __init ssb_select_mitigation(void);
+
+/*
+ * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any
+ * writes to SPEC_CTRL contain whatever reserved bits have been set.
+ */
+u64 __ro_after_init x86_spec_ctrl_base;
+
+/*
+ * The vendor and possibly platform specific bits which can be modified in
+ * x86_spec_ctrl_base.
+ */
+static u64 __ro_after_init x86_spec_ctrl_mask = ~SPEC_CTRL_IBRS;
+
+/*
+ * AMD specific MSR info for Speculative Store Bypass control.
+ * x86_amd_ls_cfg_ssbd_mask is initialized in identify_boot_cpu().
+ */
+u64 __ro_after_init x86_amd_ls_cfg_base;
+u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask;
void __init check_bugs(void)
{
@@ -36,9 +58,23 @@ void __init check_bugs(void)
print_cpu_info(&boot_cpu_data);
}
+ /*
+ * Read the SPEC_CTRL MSR to account for reserved bits which may
+ * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD
+ * init code as it is not enumerated and depends on the family.
+ */
+ if (boot_cpu_has(X86_FEATURE_IBRS))
+ rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+
/* Select the proper spectre mitigation before patching alternatives */
spectre_v2_select_mitigation();
+ /*
+ * Select proper mitigation for any exposure to the Speculative Store
+ * Bypass vulnerability.
+ */
+ ssb_select_mitigation();
+
#ifdef CONFIG_X86_32
/*
* Check whether we are able to run this kernel safely on SMP.
@@ -94,7 +130,65 @@ static const char *spectre_v2_strings[] = {
#undef pr_fmt
#define pr_fmt(fmt) "Spectre V2 : " fmt
-static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
+static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
+ SPECTRE_V2_NONE;
+
+void x86_spec_ctrl_set(u64 val)
+{
+ if (val & x86_spec_ctrl_mask)
+ WARN_ONCE(1, "SPEC_CTRL MSR value 0x%16llx is unknown.\n", val);
+ else
+ wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base | val);
+}
+EXPORT_SYMBOL_GPL(x86_spec_ctrl_set);
+
+u64 x86_spec_ctrl_get_default(void)
+{
+ u64 msrval = x86_spec_ctrl_base;
+
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ msrval |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags);
+ return msrval;
+}
+EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default);
+
+void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl)
+{
+ u64 host = x86_spec_ctrl_base;
+
+ if (!boot_cpu_has(X86_FEATURE_IBRS))
+ return;
+
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags);
+
+ if (host != guest_spec_ctrl)
+ wrmsrl(MSR_IA32_SPEC_CTRL, guest_spec_ctrl);
+}
+EXPORT_SYMBOL_GPL(x86_spec_ctrl_set_guest);
+
+void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl)
+{
+ u64 host = x86_spec_ctrl_base;
+
+ if (!boot_cpu_has(X86_FEATURE_IBRS))
+ return;
+
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags);
+
+ if (host != guest_spec_ctrl)
+ wrmsrl(MSR_IA32_SPEC_CTRL, host);
+}
+EXPORT_SYMBOL_GPL(x86_spec_ctrl_restore_host);
+
+static void x86_amd_ssb_disable(void)
+{
+ u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_ssbd_mask;
+
+ if (boot_cpu_has(X86_FEATURE_AMD_SSBD))
+ wrmsrl(MSR_AMD64_LS_CFG, msrval);
+}
#ifdef RETPOLINE
static bool spectre_v2_bad_module;
@@ -330,32 +424,289 @@ retpoline_auto:
}
#undef pr_fmt
+#define pr_fmt(fmt) "Speculative Store Bypass: " fmt
+
+static enum ssb_mitigation ssb_mode __ro_after_init = SPEC_STORE_BYPASS_NONE;
+
+/* The kernel command line selection */
+enum ssb_mitigation_cmd {
+ SPEC_STORE_BYPASS_CMD_NONE,
+ SPEC_STORE_BYPASS_CMD_AUTO,
+ SPEC_STORE_BYPASS_CMD_ON,
+ SPEC_STORE_BYPASS_CMD_PRCTL,
+ SPEC_STORE_BYPASS_CMD_SECCOMP,
+};
+
+static const char *ssb_strings[] = {
+ [SPEC_STORE_BYPASS_NONE] = "Vulnerable",
+ [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled",
+ [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl",
+ [SPEC_STORE_BYPASS_SECCOMP] = "Mitigation: Speculative Store Bypass disabled via prctl and seccomp",
+};
+
+static const struct {
+ const char *option;
+ enum ssb_mitigation_cmd cmd;
+} ssb_mitigation_options[] = {
+ { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */
+ { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */
+ { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */
+ { "prctl", SPEC_STORE_BYPASS_CMD_PRCTL }, /* Disable Speculative Store Bypass via prctl */
+ { "seccomp", SPEC_STORE_BYPASS_CMD_SECCOMP }, /* Disable Speculative Store Bypass via prctl and seccomp */
+};
+
+static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void)
+{
+ enum ssb_mitigation_cmd cmd = SPEC_STORE_BYPASS_CMD_AUTO;
+ char arg[20];
+ int ret, i;
+
+ if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable")) {
+ return SPEC_STORE_BYPASS_CMD_NONE;
+ } else {
+ ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable",
+ arg, sizeof(arg));
+ if (ret < 0)
+ return SPEC_STORE_BYPASS_CMD_AUTO;
+
+ for (i = 0; i < ARRAY_SIZE(ssb_mitigation_options); i++) {
+ if (!match_option(arg, ret, ssb_mitigation_options[i].option))
+ continue;
+
+ cmd = ssb_mitigation_options[i].cmd;
+ break;
+ }
+
+ if (i >= ARRAY_SIZE(ssb_mitigation_options)) {
+ pr_err("unknown option (%s). Switching to AUTO select\n", arg);
+ return SPEC_STORE_BYPASS_CMD_AUTO;
+ }
+ }
+
+ return cmd;
+}
+
+static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void)
+{
+ enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE;
+ enum ssb_mitigation_cmd cmd;
+
+ if (!boot_cpu_has(X86_FEATURE_SSBD))
+ return mode;
+
+ cmd = ssb_parse_cmdline();
+ if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS) &&
+ (cmd == SPEC_STORE_BYPASS_CMD_NONE ||
+ cmd == SPEC_STORE_BYPASS_CMD_AUTO))
+ return mode;
+
+ switch (cmd) {
+ case SPEC_STORE_BYPASS_CMD_AUTO:
+ case SPEC_STORE_BYPASS_CMD_SECCOMP:
+ /*
+ * Choose prctl+seccomp as the default mode if seccomp is
+ * enabled.
+ */
+ if (IS_ENABLED(CONFIG_SECCOMP))
+ mode = SPEC_STORE_BYPASS_SECCOMP;
+ else
+ mode = SPEC_STORE_BYPASS_PRCTL;
+ break;
+ case SPEC_STORE_BYPASS_CMD_ON:
+ mode = SPEC_STORE_BYPASS_DISABLE;
+ break;
+ case SPEC_STORE_BYPASS_CMD_PRCTL:
+ mode = SPEC_STORE_BYPASS_PRCTL;
+ break;
+ case SPEC_STORE_BYPASS_CMD_NONE:
+ break;
+ }
+
+ /*
+ * We have three CPU feature flags that are in play here:
+ * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
+ * - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass
+ * - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation
+ */
+ if (mode == SPEC_STORE_BYPASS_DISABLE) {
+ setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE);
+ /*
+ * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses
+ * a completely different MSR and bit dependent on family.
+ */
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_INTEL:
+ x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
+ x86_spec_ctrl_mask &= ~SPEC_CTRL_SSBD;
+ x86_spec_ctrl_set(SPEC_CTRL_SSBD);
+ break;
+ case X86_VENDOR_AMD:
+ x86_amd_ssb_disable();
+ break;
+ }
+ }
+
+ return mode;
+}
+
+static void ssb_select_mitigation()
+{
+ ssb_mode = __ssb_select_mitigation();
+
+ if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
+ pr_info("%s\n", ssb_strings[ssb_mode]);
+}
+
+#undef pr_fmt
+#define pr_fmt(fmt) "Speculation prctl: " fmt
+
+static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
+{
+ bool update;
+
+ if (ssb_mode != SPEC_STORE_BYPASS_PRCTL &&
+ ssb_mode != SPEC_STORE_BYPASS_SECCOMP)
+ return -ENXIO;
+
+ switch (ctrl) {
+ case PR_SPEC_ENABLE:
+ /* If speculation is force disabled, enable is not allowed */
+ if (task_spec_ssb_force_disable(task))
+ return -EPERM;
+ task_clear_spec_ssb_disable(task);
+ update = test_and_clear_tsk_thread_flag(task, TIF_SSBD);
+ break;
+ case PR_SPEC_DISABLE:
+ task_set_spec_ssb_disable(task);
+ update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
+ break;
+ case PR_SPEC_FORCE_DISABLE:
+ task_set_spec_ssb_disable(task);
+ task_set_spec_ssb_force_disable(task);
+ update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ /*
+ * If being set on non-current task, delay setting the CPU
+ * mitigation until it is next scheduled.
+ */
+ if (task == current && update)
+ speculative_store_bypass_update();
+
+ return 0;
+}
+
+int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
+ unsigned long ctrl)
+{
+ switch (which) {
+ case PR_SPEC_STORE_BYPASS:
+ return ssb_prctl_set(task, ctrl);
+ default:
+ return -ENODEV;
+ }
+}
+
+#ifdef CONFIG_SECCOMP
+void arch_seccomp_spec_mitigate(struct task_struct *task)
+{
+ if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP)
+ ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE);
+}
+#endif
+
+static int ssb_prctl_get(struct task_struct *task)
+{
+ switch (ssb_mode) {
+ case SPEC_STORE_BYPASS_DISABLE:
+ return PR_SPEC_DISABLE;
+ case SPEC_STORE_BYPASS_SECCOMP:
+ case SPEC_STORE_BYPASS_PRCTL:
+ if (task_spec_ssb_force_disable(task))
+ return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
+ if (task_spec_ssb_disable(task))
+ return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
+ return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
+ default:
+ if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
+ return PR_SPEC_ENABLE;
+ return PR_SPEC_NOT_AFFECTED;
+ }
+}
+
+int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
+{
+ switch (which) {
+ case PR_SPEC_STORE_BYPASS:
+ return ssb_prctl_get(task);
+ default:
+ return -ENODEV;
+ }
+}
+
+void x86_spec_ctrl_setup_ap(void)
+{
+ if (boot_cpu_has(X86_FEATURE_IBRS))
+ x86_spec_ctrl_set(x86_spec_ctrl_base & ~x86_spec_ctrl_mask);
+
+ if (ssb_mode == SPEC_STORE_BYPASS_DISABLE)
+ x86_amd_ssb_disable();
+}
#ifdef CONFIG_SYSFS
-ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
+
+ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
+ char *buf, unsigned int bug)
{
- if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
+ if (!boot_cpu_has_bug(bug))
return sprintf(buf, "Not affected\n");
- if (boot_cpu_has(X86_FEATURE_PTI))
- return sprintf(buf, "Mitigation: PTI\n");
+
+ switch (bug) {
+ case X86_BUG_CPU_MELTDOWN:
+ if (boot_cpu_has(X86_FEATURE_PTI))
+ return sprintf(buf, "Mitigation: PTI\n");
+
+ break;
+
+ case X86_BUG_SPECTRE_V1:
+ return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+
+ case X86_BUG_SPECTRE_V2:
+ return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
+ boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
+ boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
+ spectre_v2_module_string());
+
+ case X86_BUG_SPEC_STORE_BYPASS:
+ return sprintf(buf, "%s\n", ssb_strings[ssb_mode]);
+
+ default:
+ break;
+ }
+
return sprintf(buf, "Vulnerable\n");
}
+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_CPU_MELTDOWN);
+}
+
ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
{
- if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
- return sprintf(buf, "Not affected\n");
- return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+ return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V1);
}
ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf)
{
- if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
- return sprintf(buf, "Not affected\n");
+ return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V2);
+}
- return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
- boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
- boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
- spectre_v2_module_string());
+ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_SPEC_STORE_BYPASS);
}
#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 5b5fbdd691f0..1ff36cc97f58 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -894,21 +894,55 @@ static const __initconst struct x86_cpu_id cpu_no_meltdown[] = {
{}
};
-static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c)
+static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = {
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM },
+ { X86_VENDOR_CENTAUR, 5, },
+ { X86_VENDOR_INTEL, 5, },
+ { X86_VENDOR_NSC, 5, },
+ { X86_VENDOR_AMD, 0x12, },
+ { X86_VENDOR_AMD, 0x11, },
+ { X86_VENDOR_AMD, 0x10, },
+ { X86_VENDOR_AMD, 0xf, },
+ { X86_VENDOR_ANY, 4, },
+ {}
+};
+
+static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
{
u64 ia32_cap = 0;
- if (x86_match_cpu(cpu_no_meltdown))
- return false;
-
if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
+ if (!x86_match_cpu(cpu_no_spec_store_bypass) &&
+ !(ia32_cap & ARCH_CAP_SSBD_NO))
+ setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
+
+ if (x86_match_cpu(cpu_no_speculation))
+ return;
+
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+
+ if (x86_match_cpu(cpu_no_meltdown))
+ return;
+
/* Rogue Data Cache Load? No! */
if (ia32_cap & ARCH_CAP_RDCL_NO)
- return false;
+ return;
- return true;
+ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
}
/*
@@ -958,12 +992,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
- if (!x86_match_cpu(cpu_no_speculation)) {
- if (cpu_vulnerable_to_meltdown(c))
- setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
- setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
- setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
- }
+ cpu_set_bug_bits(c);
fpu__init_system(c);
}
@@ -1314,6 +1343,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c)
#endif
mtrr_ap_init();
validate_apic_and_package_id(c);
+ x86_spec_ctrl_setup_ap();
}
static __init int setup_noclflush(char *arg)
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 2584265d4745..ffe9faeace81 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -46,4 +46,6 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[],
extern void get_cpu_cap(struct cpuinfo_x86 *c);
extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
+extern void x86_spec_ctrl_setup_ap(void);
+
#endif /* ARCH_X86_CPU_H */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index f515441c3336..c921b647438c 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -188,6 +188,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
setup_clear_cpu_cap(X86_FEATURE_STIBP);
setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL);
setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP);
+ setup_clear_cpu_cap(X86_FEATURE_SSBD);
}
/*
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c55cd4a30417..493f421c894e 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -38,6 +38,7 @@
#include <asm/switch_to.h>
#include <asm/desc.h>
#include <asm/prctl.h>
+#include <asm/spec-ctrl.h>
/*
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -278,6 +279,24 @@ static inline void switch_to_bitmap(struct tss_struct *tss,
}
}
+static __always_inline void __speculative_store_bypass_update(unsigned long tifn)
+{
+ u64 msr;
+
+ if (static_cpu_has(X86_FEATURE_AMD_SSBD)) {
+ msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn);
+ wrmsrl(MSR_AMD64_LS_CFG, msr);
+ } else {
+ msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn);
+ wrmsrl(MSR_IA32_SPEC_CTRL, msr);
+ }
+}
+
+void speculative_store_bypass_update(void)
+{
+ __speculative_store_bypass_update(current_thread_info()->flags);
+}
+
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
struct tss_struct *tss)
{
@@ -309,6 +328,9 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
if ((tifp ^ tifn) & _TIF_NOCPUID)
set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
+
+ if ((tifp ^ tifn) & _TIF_SSBD)
+ __speculative_store_bypass_update(tifn);
}
/*
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 21dc5af583b5..cb97f65550c6 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -394,7 +394,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* cpuid 7.0.edx*/
const u32 kvm_cpuid_7_0_edx_x86_features =
- F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
+ F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | F(SSBD) |
F(ARCH_CAPABILITIES);
/* all calls to cpuid_count() should be made on the same cpu */
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 0f6cdc28d9ff..be99e5a6039f 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -50,7 +50,7 @@
#include <asm/kvm_para.h>
#include <asm/irq_remapping.h>
#include <asm/microcode.h>
-#include <asm/nospec-branch.h>
+#include <asm/spec-ctrl.h>
#include <asm/virtext.h>
#include "trace.h"
@@ -5355,8 +5355,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
* is no need to worry about the conditional branch over the wrmsr
* being speculatively taken.
*/
- if (svm->spec_ctrl)
- native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
+ x86_spec_ctrl_set_guest(svm->spec_ctrl);
asm volatile (
"push %%" _ASM_BP "; \n\t"
@@ -5450,6 +5449,18 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
#endif
);
+ /* Eliminate branch target predictions from guest mode */
+ vmexit_fill_RSB();
+
+#ifdef CONFIG_X86_64
+ wrmsrl(MSR_GS_BASE, svm->host.gs_base);
+#else
+ loadsegment(fs, svm->host.fs);
+#ifndef CONFIG_X86_32_LAZY_GS
+ loadsegment(gs, svm->host.gs);
+#endif
+#endif
+
/*
* We do not use IBRS in the kernel. If this vCPU has used the
* SPEC_CTRL MSR it may have left it on; save the value and
@@ -5468,20 +5479,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
- if (svm->spec_ctrl)
- native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
-
- /* Eliminate branch target predictions from guest mode */
- vmexit_fill_RSB();
-
-#ifdef CONFIG_X86_64
- wrmsrl(MSR_GS_BASE, svm->host.gs_base);
-#else
- loadsegment(fs, svm->host.fs);
-#ifndef CONFIG_X86_32_LAZY_GS
- loadsegment(gs, svm->host.gs);
-#endif
-#endif
+ x86_spec_ctrl_restore_host(svm->spec_ctrl);
reload_tss(vcpu);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 924064ba5690..936e8517d39a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -52,7 +52,7 @@
#include <asm/irq_remapping.h>
#include <asm/mmu_context.h>
#include <asm/microcode.h>
-#include <asm/nospec-branch.h>
+#include <asm/spec-ctrl.h>
#include "trace.h"
#include "pmu.h"
@@ -3297,7 +3297,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_SPEC_CTRL:
if (!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_IBRS) &&
- !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
+ !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_SSBD))
return 1;
msr_info->data = to_vmx(vcpu)->spec_ctrl;
@@ -3418,11 +3419,12 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_SPEC_CTRL:
if (!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_IBRS) &&
- !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
+ !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_SSBD))
return 1;
/* The STIBP bit doesn't fault even if it's not advertised */
- if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
+ if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
return 1;
vmx->spec_ctrl = data;
@@ -9463,8 +9465,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
* is no need to worry about the conditional branch over the wrmsr
* being speculatively taken.
*/
- if (vmx->spec_ctrl)
- native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+ x86_spec_ctrl_set_guest(vmx->spec_ctrl);
vmx->__launched = vmx->loaded_vmcs->launched;
asm(
@@ -9602,8 +9603,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
- if (vmx->spec_ctrl)
- native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+ x86_spec_ctrl_restore_host(vmx->spec_ctrl);
/* Eliminate branch target predictions from guest mode */
vmexit_fill_RSB();
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 825964efda1d..433f14bcab15 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -521,14 +521,22 @@ ssize_t __weak cpu_show_spectre_v2(struct device *dev,
return sprintf(buf, "Not affected\n");
}
+ssize_t __weak cpu_show_spec_store_bypass(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "Not affected\n");
+}
+
static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
+static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL);
static struct attribute *cpu_root_vulnerabilities_attrs[] = {
&dev_attr_meltdown.attr,
&dev_attr_spectre_v1.attr,
&dev_attr_spectre_v2.attr,
+ &dev_attr_spec_store_bypass.attr,
NULL
};
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 9c09192c560d..7db34eb24816 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -84,6 +84,7 @@
#include <linux/delayacct.h>
#include <linux/seq_file.h>
#include <linux/pid_namespace.h>
+#include <linux/prctl.h>
#include <linux/ptrace.h>
#include <linux/tracehook.h>
#include <linux/string_helpers.h>
@@ -351,6 +352,30 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
#ifdef CONFIG_SECCOMP
seq_put_decimal_ull(m, "\nSeccomp:\t", p->seccomp.mode);
#endif
+ seq_printf(m, "\nSpeculation_Store_Bypass:\t");
+ switch (arch_prctl_spec_ctrl_get(p, PR_SPEC_STORE_BYPASS)) {
+ case -EINVAL:
+ seq_printf(m, "unknown");
+ break;
+ case PR_SPEC_NOT_AFFECTED:
+ seq_printf(m, "not vulnerable");
+ break;
+ case PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE:
+ seq_printf(m, "thread force mitigated");
+ break;
+ case PR_SPEC_PRCTL | PR_SPEC_DISABLE:
+ seq_printf(m, "thread mitigated");
+ break;
+ case PR_SPEC_PRCTL | PR_SPEC_ENABLE:
+ seq_printf(m, "thread vulnerable");
+ break;
+ case PR_SPEC_DISABLE:
+ seq_printf(m, "globally mitigated");
+ break;
+ default:
+ seq_printf(m, "vulnerable");
+ break;
+ }
seq_putc(m, '\n');
}
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 3175da585efc..b3c6275d46c3 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -116,6 +116,9 @@ struct bpf_insn_aux_data {
};
int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
bool seen; /* this insn was processed by the verifier */
+#ifndef __GENKSYMS__
+ int sanitize_stack_off; /* stack slot to be cleared */
+#endif
};
#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 3fe87e7e43a6..3423ace0c431 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -52,6 +52,8 @@ extern ssize_t cpu_show_spectre_v1(struct device *dev,
struct device_attribute *attr, char *buf);
extern ssize_t cpu_show_spectre_v2(struct device *dev,
struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_spec_store_bypass(struct device *dev,
+ struct device_attribute *attr, char *buf);
extern __printf(4, 5)
struct device *cpu_device_create(struct device *parent, void *drvdata,
diff --git a/include/linux/nospec.h b/include/linux/nospec.h
index fbc98e2c8228..6efc581d6fc7 100644
--- a/include/linux/nospec.h
+++ b/include/linux/nospec.h
@@ -6,6 +6,8 @@
#ifndef _LINUX_NOSPEC_H
#define _LINUX_NOSPEC_H
+struct task_struct;
+
/**
* array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise
* @index: array element index
@@ -75,4 +77,12 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
_i &= _mask; \
_i; \
})
+
+/* Speculation control prctl */
+int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which);
+int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
+ unsigned long ctrl);
+/* Speculation control for seccomp enforced mitigation */
+void arch_seccomp_spec_mitigate(struct task_struct *task);
+
#endif /* _LINUX_NOSPEC_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f65dd77e44da..370f127ef39c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1294,7 +1294,8 @@ extern struct pid *cad_pid;
#define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */
#define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */
#define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */
-
+#define PFA_SPEC_SSB_DISABLE 3 /* Speculative Store Bypass disabled */
+#define PFA_SPEC_SSB_FORCE_DISABLE 4 /* Speculative Store Bypass force disabled*/
#define TASK_PFA_TEST(name, func) \
static inline bool task_##func(struct task_struct *p) \
@@ -1319,6 +1320,13 @@ TASK_PFA_TEST(SPREAD_SLAB, spread_slab)
TASK_PFA_SET(SPREAD_SLAB, spread_slab)
TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
+TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable)
+TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable)
+TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable)
+
+TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
+TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
+
static inline void
current_restore_flags(unsigned long orig_flags, unsigned long flags)
{
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index ecc296c137cd..50c460a956f1 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -3,7 +3,8 @@
#include <uapi/linux/seccomp.h>
-#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC)
+#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \
+ SECCOMP_FILTER_FLAG_SPEC_ALLOW)
#ifdef CONFIG_SECCOMP
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index a8d0759a9e40..64776b72e1eb 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -197,4 +197,16 @@ struct prctl_mm_map {
# define PR_CAP_AMBIENT_LOWER 3
# define PR_CAP_AMBIENT_CLEAR_ALL 4
+/* Per task speculation control */
+#define PR_GET_SPECULATION_CTRL 52
+#define PR_SET_SPECULATION_CTRL 53
+/* Speculation control variants */
+# define PR_SPEC_STORE_BYPASS 0
+/* Return and control values for PR_SET/GET_SPECULATION_CTRL */
+# define PR_SPEC_NOT_AFFECTED 0
+# define PR_SPEC_PRCTL (1UL << 0)
+# define PR_SPEC_ENABLE (1UL << 1)
+# define PR_SPEC_DISABLE (1UL << 2)
+# define PR_SPEC_FORCE_DISABLE (1UL << 3)
+
#endif /* _LINUX_PRCTL_H */
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index 0f238a43ff1e..8bdf2882d9aa 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -15,7 +15,8 @@
#define SECCOMP_SET_MODE_FILTER 1
/* Valid flags for SECCOMP_SET_MODE_FILTER */
-#define SECCOMP_FILTER_FLAG_TSYNC 1
+#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
+#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
/*
* All BPF programs must return a 32-bit value.
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index baa2855d78b0..244dc03f1582 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -658,7 +658,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
*/
static int check_stack_write(struct bpf_verifier_env *env,
struct bpf_verifier_state *state, int off,
- int size, int value_regno)
+ int size, int value_regno, int insn_idx)
{
int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
@@ -689,8 +689,33 @@ static int check_stack_write(struct bpf_verifier_env *env,
state->stack[spi].spilled_ptr = state->regs[value_regno];
state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
- for (i = 0; i < BPF_REG_SIZE; i++)
+ for (i = 0; i < BPF_REG_SIZE; i++) {
+ if (state->stack[spi].slot_type[i] == STACK_MISC &&
+ !env->allow_ptr_leaks) {
+ int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
+ int soff = (-spi - 1) * BPF_REG_SIZE;
+
+ /* detected reuse of integer stack slot with a pointer
+ * which means either llvm is reusing stack slot or
+ * an attacker is trying to exploit CVE-2018-3639
+ * (speculative store bypass)
+ * Have to sanitize that slot with preemptive
+ * store of zero.
+ */
+ if (*poff && *poff != soff) {
+ /* disallow programs where single insn stores
+ * into two different stack slots, since verifier
+ * cannot sanitize them
+ */
+ verbose(env,
+ "insn %d cannot access two stack slots fp%d and fp%d",
+ insn_idx, *poff, soff);
+ return -EINVAL;
+ }
+ *poff = soff;
+ }
state->stack[spi].slot_type[i] = STACK_SPILL;
+ }
} else {
/* regular write of data into stack */
state->stack[spi].spilled_ptr = (struct bpf_reg_state) {};
@@ -1181,7 +1206,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
if (t == BPF_WRITE)
err = check_stack_write(env, state, off, size,
- value_regno);
+ value_regno, insn_idx);
else
err = check_stack_read(env, state, off, size,
value_regno);
@@ -4232,6 +4257,34 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
else
continue;
+ if (type == BPF_WRITE &&
+ env->insn_aux_data[i + delta].sanitize_stack_off) {
+ struct bpf_insn patch[] = {
+ /* Sanitize suspicious stack slot with zero.
+ * There are no memory dependencies for this store,
+ * since it's only using frame pointer and immediate
+ * constant of zero
+ */
+ BPF_ST_MEM(BPF_DW, BPF_REG_FP,
+ env->insn_aux_data[i + delta].sanitize_stack_off,
+ 0),
+ /* the original STX instruction will immediately
+ * overwrite the same stack slot with appropriate value
+ */
+ *insn,
+ };
+
+ cnt = ARRAY_SIZE(patch);
+ new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ continue;
+ }
+
if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
continue;
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index bd3877f998c7..7210a1c5c443 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -17,6 +17,8 @@
#include <linux/audit.h>
#include <linux/compat.h>
#include <linux/coredump.h>
+#include <linux/nospec.h>
+#include <linux/prctl.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
#include <linux/seccomp.h>
@@ -215,8 +217,11 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
return true;
}
+void __weak arch_seccomp_spec_mitigate(struct task_struct *task) { }
+
static inline void seccomp_assign_mode(struct task_struct *task,
- unsigned long seccomp_mode)
+ unsigned long seccomp_mode,
+ unsigned long flags)
{
assert_spin_locked(&task->sighand->siglock);
@@ -226,6 +231,9 @@ static inline void seccomp_assign_mode(struct task_struct *task,
* filter) is set.
*/
smp_mb__before_atomic();
+ /* Assume default seccomp processes want spec flaw mitigation. */
+ if ((flags & SECCOMP_FILTER_FLAG_SPEC_ALLOW) == 0)
+ arch_seccomp_spec_mitigate(task);
set_tsk_thread_flag(task, TIF_SECCOMP);
}
@@ -293,7 +301,7 @@ static inline pid_t seccomp_can_sync_threads(void)
* without dropping the locks.
*
*/
-static inline void seccomp_sync_threads(void)
+static inline void seccomp_sync_threads(unsigned long flags)
{
struct task_struct *thread, *caller;
@@ -334,7 +342,8 @@ static inline void seccomp_sync_threads(void)
* allow one thread to transition the other.
*/
if (thread->seccomp.mode == SECCOMP_MODE_DISABLED)
- seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
+ seccomp_assign_mode(thread, SECCOMP_MODE_FILTER,
+ flags);
}
}
@@ -453,7 +462,7 @@ static long seccomp_attach_filter(unsigned int flags,
/* Now that the new filter is in place, synchronize to all threads. */
if (flags & SECCOMP_FILTER_FLAG_TSYNC)
- seccomp_sync_threads();
+ seccomp_sync_threads(flags);
return 0;
}
@@ -728,7 +737,7 @@ static long seccomp_set_mode_strict(void)
#ifdef TIF_NOTSC
disable_TSC();
#endif
- seccomp_assign_mode(current, seccomp_mode);
+ seccomp_assign_mode(current, seccomp_mode, 0);
ret = 0;
out:
@@ -786,7 +795,7 @@ static long seccomp_set_mode_filter(unsigned int flags,
/* Do not free the successfully attached filter. */
prepared = NULL;
- seccomp_assign_mode(current, seccomp_mode);
+ seccomp_assign_mode(current, seccomp_mode, flags);
out:
spin_unlock_irq(&current->sighand->siglock);
if (flags & SECCOMP_FILTER_FLAG_TSYNC)
diff --git a/kernel/sys.c b/kernel/sys.c
index e48f0636c7fd..2e62b625d105 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -60,6 +60,8 @@
#include <linux/uidgid.h>
#include <linux/cred.h>
+#include <linux/nospec.h>
+
#include <linux/kmsg_dump.h>
/* Move somewhere else to avoid recompiling? */
#include <generated/utsrelease.h>
@@ -2093,6 +2095,17 @@ static int propagate_has_child_subreaper(struct task_struct *p, void *data)
return 1;
}
+int __weak arch_prctl_spec_ctrl_get(struct task_struct *t, unsigned long which)
+{
+ return -EINVAL;
+}
+
+int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which,
+ unsigned long ctrl)
+{
+ return -EINVAL;
+}
+
SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
unsigned long, arg4, unsigned long, arg5)
{
@@ -2295,6 +2308,16 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
case PR_GET_FP_MODE:
error = GET_FP_MODE(me);
break;
+ case PR_GET_SPECULATION_CTRL:
+ if (arg3 || arg4 || arg5)
+ return -EINVAL;
+ error = arch_prctl_spec_ctrl_get(me, arg2);
+ break;
+ case PR_SET_SPECULATION_CTRL:
+ if (arg4 || arg5)
+ return -EINVAL;
+ error = arch_prctl_spec_ctrl_set(me, arg2, arg3);
+ break;
default:
error = -EINVAL;
break;
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 03f1fa495d74..cdc074b922e1 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -1684,7 +1684,11 @@ TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
#endif
#ifndef SECCOMP_FILTER_FLAG_TSYNC
-#define SECCOMP_FILTER_FLAG_TSYNC 1
+#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
+#endif
+
+#ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW
+#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
#endif
#ifndef seccomp