Home Home > GIT Browse
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTakashi Iwai <tiwai@suse.de>2018-05-23 08:57:07 +0200
committerTakashi Iwai <tiwai@suse.de>2018-05-23 08:57:07 +0200
commit173b76dd4e4183ddb6241665d878858df571bec2 (patch)
tree5913d2794a65b2269672db3d0fc72093d4ea9deb
parent080a444f1690db485aafd365c6e5dcd384c82541 (diff)
parent3e14e5c937b5cc0cff83e9f4dc1ac33c7ea8cbe3 (diff)
Merge branch 'SLE12-SP3' into openSUSE-42.3rpm-4.4.132-53
-rw-r--r--arch/x86/Kconfig4
-rw-r--r--arch/x86/include/asm/rmspec.h24
-rw-r--r--arch/x86/include/asm/spec_ctrl.h3
-rw-r--r--arch/x86/kernel/cpu/bugs.c84
-rw-r--r--include/linux/filter.h34
-rw-r--r--include/linux/nospec.h11
-rw-r--r--kernel/bpf/verifier.c79
-rw-r--r--net/core/filter.c65
8 files changed, 83 insertions, 221 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a781cdd2efc9..b0b43f119e1d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -28,7 +28,6 @@ config X86
select ARCH_HAS_FAST_MULTIPLIER
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_PMEM_API if X86_64
- select ARCH_HAS_REDUCED_MEMORY_SPECULATION
select ARCH_HAS_MMIO_FLUSH
select ARCH_HAS_SG_CHAIN
select ARCH_HAVE_NMI_SAFE_CMPXCHG
@@ -225,9 +224,6 @@ config RWSEM_XCHGADD_ALGORITHM
config GENERIC_CALIBRATE_DELAY
def_bool y
-config ARCH_HAS_REDUCED_MEMORY_SPECULATION
- def_bool y
-
config ARCH_HAS_CPU_RELAX
def_bool y
diff --git a/arch/x86/include/asm/rmspec.h b/arch/x86/include/asm/rmspec.h
deleted file mode 100644
index 7032ece49618..000000000000
--- a/arch/x86/include/asm/rmspec.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef _LINUX_RMSPEC_H
-#define _LINUX_RMSPEC_H
-#include <asm/msr.h>
-#include <asm/spec_ctrl.h>
-
-/*
- * We call these when we *know* the CPU can go in/out of its
- * "safer" reduced memory speculation mode.
- *
- * For BPF, x86_sync_spec_ctrl() reads the per-cpu BPF state
- * variable and figures out the MSR value by itself. Thus,
- * we do not need to pass the "direction".
- */
-static inline void cpu_enter_reduced_memory_speculation(void)
-{
- x86_sync_spec_ctrl();
-}
-
-static inline void cpu_leave_reduced_memory_speculation(void)
-{
- x86_sync_spec_ctrl();
-}
-
-#endif /* _LINUX_RMSPEC_H */
diff --git a/arch/x86/include/asm/spec_ctrl.h b/arch/x86/include/asm/spec_ctrl.h
index 21114d5ba931..831f17be1b37 100644
--- a/arch/x86/include/asm/spec_ctrl.h
+++ b/arch/x86/include/asm/spec_ctrl.h
@@ -113,9 +113,6 @@ static inline void x86_ibp_barrier(void)
extern void x86_spec_ctrl_set_guest(u64);
extern void x86_spec_ctrl_restore_host(u64);
-/* Write a new SPEC_CTRL MSR based on current kernel state: */
-extern void x86_sync_spec_ctrl(void);
-
/* AMD specific Speculative Store Bypass MSR data */
extern u64 x86_amd_ls_cfg_base;
extern u64 x86_amd_ls_cfg_ssbd_mask;
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index aa41ae42b70e..db904d27169d 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -10,7 +10,6 @@
#include <linux/init.h>
#include <linux/utsname.h>
#include <linux/cpu.h>
-#include <linux/filter.h>
#include <linux/module.h>
#include <linux/nospec.h>
#include <linux/prctl.h>
@@ -151,88 +150,35 @@ u64 x86_spec_ctrl_get_default(void)
}
EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default);
-static inline u64 intel_rds_mask(void)
+void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl)
{
- u64 mask;
-
- if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
- return 0;
-
- if (!boot_cpu_has(X86_FEATURE_SSBD))
- return 0;
-
- mask = ssbd_tif_to_spec_ctrl(current_thread_info()->flags);
-
- /*
- * BPF programs can be exploited to attack the kernel.
- * Leave the RDS bit on when we recently ran one. This
- * bit gets cleared after a BFP program has not run on
- * the CPU for a while.
- */
- if (get_cpu_var(bpf_prog_ran))
- mask |= SPEC_CTRL_SSBD;
-
- return mask;
-}
-
-/*
- * Calculate the SPEC_CTRL MSR value that the kernel
- * should be using under normal operation.
- */
-static u64 x86_calculate_kernel_spec_ctrl(void)
-{
- u64 spec_ctrl;
+ u64 host = x86_spec_ctrl_base;
if (!boot_cpu_has(X86_FEATURE_SPEC_CTRL))
- return 0;
-
- spec_ctrl = x86_spec_ctrl_base;
- spec_ctrl |= intel_rds_mask();
+ return;
- return spec_ctrl;
-}
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags);
-/* We are entering a guest and need to set its MSR value. */
-void x86_spec_ctrl_set_guest(u64 new_spec_ctrl)
-{
- if (x86_calculate_kernel_spec_ctrl() != new_spec_ctrl)
- wrmsrl(MSR_IA32_SPEC_CTRL, new_spec_ctrl);
+ if (host != guest_spec_ctrl)
+ wrmsrl(MSR_IA32_SPEC_CTRL, guest_spec_ctrl);
}
EXPORT_SYMBOL_GPL(x86_spec_ctrl_set_guest);
-/*
- * We are leaving a guest and need to restore the kernel's MSR
- * value that it uses for normal operation.
- */
-void x86_spec_ctrl_restore_host(u64 current_spec_ctrl)
-{
- u64 new_spec_ctrl = x86_calculate_kernel_spec_ctrl();
-
- if (new_spec_ctrl != current_spec_ctrl)
- wrmsrl(MSR_IA32_SPEC_CTRL, new_spec_ctrl);
-}
-EXPORT_SYMBOL_GPL(x86_spec_ctrl_restore_host);
-
-/*
- * A condition that may affect the SPEC_CTRL MSR has changed.
- * Recalculate a new value for this CPU and set it.
- *
- * It is not easy to optimize the wrmsrl() away unless the
- * callers have a full understanding of all the conditions
- * that affect the output of x86_calculate_kernel_spec_ctrl().
- *
- * Try not to call this too often.
- */
-void x86_sync_spec_ctrl(void)
+void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl)
{
- u64 new_spec_ctrl = x86_calculate_kernel_spec_ctrl();
+ u64 host = x86_spec_ctrl_base;
if (!boot_cpu_has(X86_FEATURE_SPEC_CTRL))
return;
- wrmsrl(MSR_IA32_SPEC_CTRL, new_spec_ctrl);
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags);
+
+ if (host != guest_spec_ctrl)
+ wrmsrl(MSR_IA32_SPEC_CTRL, host);
}
-EXPORT_SYMBOL_GPL(x86_sync_spec_ctrl);
+EXPORT_SYMBOL_GPL(x86_spec_ctrl_restore_host);
static void x86_amd_ssb_disable(void)
{
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 5479e555afa7..677fa3b42194 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -13,7 +13,6 @@
#include <linux/printk.h>
#include <linux/workqueue.h>
#include <linux/sched.h>
-#include <linux/nospec.h>
#include <net/sch_generic.h>
#include <asm/cacheflush.h>
@@ -349,39 +348,8 @@ struct sk_filter {
struct bpf_prog *prog;
};
-DECLARE_PER_CPU(unsigned int, bpf_prog_ran);
+#define BPF_PROG_RUN(filter, ctx) (*filter->bpf_func)(ctx, filter->insnsi)
-static inline void bpf_enter_prog(const struct bpf_prog *fp)
-{
- /*
- * Upon the first entry to BPF code, we need to reduce
- * memory speculation to mitigate attacks targeting it.
- */
- if (this_cpu_inc_return(bpf_prog_ran) == 1)
- cpu_enter_reduced_memory_speculation();
-}
-
-extern void bpf_leave_prog_deferred(const struct bpf_prog *fp);
-static inline void bpf_leave_prog(const struct bpf_prog *fp)
-{
- int *count = this_cpu_ptr(&bpf_prog_ran);
- if (*count == 1)
- bpf_leave_prog_deferred(fp);
- else
- (*count)--;
- put_cpu_var(bpf_prog_ran);
-}
-
-#define BPF_PROG_RUN(filter, ctx) ({ \
- int __ret; \
- \
- bpf_enter_prog(filter); \
- __ret = (*(filter)->bpf_func)(ctx, (filter)->insnsi); \
- bpf_leave_prog(filter); \
- \
- __ret; \
-})
-
static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog,
struct sk_buff *skb)
{
diff --git a/include/linux/nospec.h b/include/linux/nospec.h
index c1b72f567e84..0c5ef54fd416 100644
--- a/include/linux/nospec.h
+++ b/include/linux/nospec.h
@@ -65,15 +65,4 @@ int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
/* Speculation control for seccomp enforced mitigation */
void arch_seccomp_spec_mitigate(struct task_struct *task);
-#ifndef CONFIG_ARCH_HAS_REDUCED_MEMORY_SPECULATION
-static inline void cpu_enter_reduced_memory_speculation(void)
-{
-}
-static inline void cpu_leave_reduced_memory_speculation(void)
-{
-}
-#else
-#include <asm/rmspec.h>
-#endif
-
#endif /* _LINUX_NOSPEC_H */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 79e3c21a35d0..26d3d87f2b39 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -192,6 +192,7 @@ struct bpf_insn_aux_data {
struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */
};
bool seen; /* this insn was processed by the verifier */
+ int sanitize_stack_off; /* stack slot to be cleared */
};
#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
@@ -569,10 +570,12 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
/* check_stack_read/write functions track spill/fill of registers,
* stack boundary and alignment are checked in check_mem_access()
*/
-static int check_stack_write(struct verifier_state *state, int off, int size,
- int value_regno)
+static int check_stack_write(struct verifier_env *env,
+ struct verifier_state *state, int off, int size,
+ int value_regno, int insn_idx)
{
int i;
+ int slot = -off - 1, spi = slot / BPF_REG_SIZE;
/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
* so it's aligned access and [off, off + size) are within stack limits
*/
@@ -590,8 +593,32 @@ static int check_stack_write(struct verifier_state *state, int off, int size,
state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] =
state->regs[value_regno];
- for (i = 0; i < BPF_REG_SIZE; i++)
+ for (i = 0; i < BPF_REG_SIZE; i++) {
+ if (state->stack_slot_type[MAX_BPF_STACK + off + i] == STACK_MISC &&
+ !env->allow_ptr_leaks) {
+ int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
+ int soff = (-spi - 1) * BPF_REG_SIZE;
+
+ /* detected reuse of integer stack slot with a pointer
+ * which means either llvm is reusing stack slot or
+ * an attacker is trying to exploit CVE-2018-3639
+ * (speculative store bypass)
+ * Have to sanitize that slot with preemptive
+ * store of zero.
+ */
+ if (*poff && *poff != soff) {
+ /* disallow programs where single insn stores
+ * into two different stack slots, since verifier
+ * cannot sanitize them
+ */
+ verbose("insn %d cannot access two stack slots fp%d and fp%d",
+ insn_idx, *poff, soff);
+ return -EINVAL;
+ }
+ *poff = soff;
+ }
state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_SPILL;
+ }
} else {
/* regular write of data into stack */
state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] =
@@ -696,7 +723,7 @@ static bool is_ctx_reg(struct verifier_env *env, int regno)
* if t==write && value_regno==-1, some unknown value is stored into memory
* if t==read && value_regno==-1, don't care what we read from memory
*/
-static int check_mem_access(struct verifier_env *env, u32 regno, int off,
+static int check_mem_access(struct verifier_env *env, int insn_idx, u32 regno, int off,
int bpf_size, enum bpf_access_type t,
int value_regno)
{
@@ -748,7 +775,7 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
verbose("attempt to corrupt spilled pointer on stack\n");
return -EACCES;
}
- err = check_stack_write(state, off, size, value_regno);
+ err = check_stack_write(env, state, off, size, value_regno, insn_idx);
} else {
err = check_stack_read(state, off, size, value_regno);
}
@@ -760,7 +787,7 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
return err;
}
-static int check_xadd(struct verifier_env *env, struct bpf_insn *insn)
+static int check_xadd(struct verifier_env *env, int insn_idx, struct bpf_insn *insn)
{
struct reg_state *regs = env->cur_state.regs;
int err;
@@ -793,13 +820,13 @@ static int check_xadd(struct verifier_env *env, struct bpf_insn *insn)
}
/* check whether atomic_add can read the memory */
- err = check_mem_access(env, insn->dst_reg, insn->off,
+ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
BPF_SIZE(insn->code), BPF_READ, -1);
if (err)
return err;
/* check whether atomic_add can write into the same memory */
- return check_mem_access(env, insn->dst_reg, insn->off,
+ return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
BPF_SIZE(insn->code), BPF_WRITE, -1);
}
@@ -1838,7 +1865,7 @@ static int do_check(struct verifier_env *env)
/* check that memory (src_reg + off) is readable,
* the state of dst_reg will be updated by this func
*/
- err = check_mem_access(env, insn->src_reg, insn->off,
+ err = check_mem_access(env, insn_idx, insn->src_reg, insn->off,
BPF_SIZE(insn->code), BPF_READ,
insn->dst_reg);
if (err)
@@ -1876,7 +1903,7 @@ static int do_check(struct verifier_env *env)
enum bpf_reg_type *prev_dst_type, dst_reg_type;
if (BPF_MODE(insn->code) == BPF_XADD) {
- err = check_xadd(env, insn);
+ err = check_xadd(env, insn_idx, insn);
if (err)
return err;
insn_idx++;
@@ -1895,7 +1922,7 @@ static int do_check(struct verifier_env *env)
dst_reg_type = regs[insn->dst_reg].type;
/* check that memory (dst_reg + off) is writeable */
- err = check_mem_access(env, insn->dst_reg, insn->off,
+ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
BPF_SIZE(insn->code), BPF_WRITE,
insn->src_reg);
if (err)
@@ -1930,7 +1957,7 @@ static int do_check(struct verifier_env *env)
}
/* check that memory (dst_reg + off) is writeable */
- err = check_mem_access(env, insn->dst_reg, insn->off,
+ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
BPF_SIZE(insn->code), BPF_WRITE,
-1);
if (err)
@@ -2227,6 +2254,34 @@ static int convert_ctx_accesses(struct verifier_env *env)
else
continue;
+ if (type == BPF_WRITE &&
+ env->insn_aux_data[i + delta].sanitize_stack_off) {
+ struct bpf_insn patch[] = {
+ /* Sanitize suspicious stack slot with zero.
+ * There are no memory dependencies for this store,
+ * since it's only using frame pointer and immediate
+ * constant of zero
+ */
+ BPF_ST_MEM(BPF_DW, BPF_REG_FP,
+ env->insn_aux_data[i + delta].sanitize_stack_off,
+ 0),
+ /* the original STX instruction will immediately
+ * overwrite the same stack slot with appropriate value
+ */
+ *insn,
+ };
+
+ cnt = ARRAY_SIZE(patch);
+ new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ continue;
+ }
+
if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
continue;
diff --git a/net/core/filter.c b/net/core/filter.c
index 65670e8868ad..556e878b23b6 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -31,7 +31,6 @@
#include <linux/netdevice.h>
#include <linux/if_packet.h>
#include <linux/gfp.h>
-#include <linux/nospec.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/netlink.h>
@@ -2000,67 +1999,3 @@ out:
release_sock(sk);
return ret;
}
-
-/*
- * 0 when no BPF code has executed on the CPU.
- * Incremented when running BPF code.
- * When ==1, work will be scheduled.
- * When >1, work will not be scheduled because work is already
- * scheduled.
- * When work is performed, count will be decremented from 1->0.
- */
-DEFINE_PER_CPU(unsigned int, bpf_prog_ran);
-EXPORT_SYMBOL_GPL(bpf_prog_ran);
-static void bpf_done_on_this_cpu(struct work_struct *work)
-{
- if (this_cpu_dec_return(bpf_prog_ran)) {
- /*
- * This is unexpected. The elevated refcount indicates
- * being in the *middle* of a BPF program, which should
- * be impossible. They are executed inside
- * rcu_read_lock() where we can not sleep and where
- * preemption is disabled.
- */
- WARN_ON_ONCE(1);
- }
-
- /*
- * Unsafe BPF code is no longer running, disable mitigations.
- * This must be done after bpf_prog_ran because the mitigation
- * code looks at its state.
- */
- cpu_leave_reduced_memory_speculation();
-}
-
-DEFINE_PER_CPU(struct delayed_work, bpf_prog_delayed_work);
-static __init int bpf_init_delayed_work(void)
-{
- int i;
-
- for_each_possible_cpu(i) {
- struct delayed_work *w = &per_cpu(bpf_prog_delayed_work, i);
-
- INIT_DELAYED_WORK(w, bpf_done_on_this_cpu);
- }
- return 0;
-}
-subsys_initcall(bpf_init_delayed_work);
-
-/*
- * Must be called with preempt disabled
- *
- * The schedule_delayed_work_on() is relatively expensive. So,
- * this way, someone doing a bunch of repeated BPF calls will
- * only pay the cost of scheduling work on the *first* BPF call.
- * The subsequent calls only pay the cost of incrementing a
- * per-cpu variable, which is cheap.
- */
-void bpf_leave_prog_deferred(const struct bpf_prog *fp)
-{
- int cpu = smp_processor_id();
- struct delayed_work *w = &per_cpu(bpf_prog_delayed_work, cpu);
- unsigned long delay_jiffies = msecs_to_jiffies(10);
-
- schedule_delayed_work_on(cpu, w, delay_jiffies);
-}
-EXPORT_SYMBOL_GPL(bpf_leave_prog_deferred);