Home Home > GIT Browse
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTakashi Iwai <tiwai@suse.de>2018-01-04 09:11:03 +0100
committerTakashi Iwai <tiwai@suse.de>2018-01-04 09:11:03 +0100
commit7db19124eed3ca155d26fcbf3a2d3c0329e4ca60 (patch)
tree649f2824cc8a0e4f3fb18a6c4f9caecdc243cc25
parent853ac0ba9a23db170cd054d4ab9638ec63bcccc0 (diff)
parent14b04164b2ff91035a9a8f4672f4d86b929697ce (diff)
Merge branch 'SLE12-SP3' into openSUSE-42.3rpm-4.4.104-39
Conflicts: series.conf
-rw-r--r--blacklist.conf3
-rw-r--r--config/arm64/default2
-rw-r--r--config/ppc64le/debug2
-rw-r--r--config/ppc64le/default2
-rw-r--r--config/s390x/default2
-rw-r--r--config/s390x/zfcpdump2
-rw-r--r--config/x86_64/debug3
-rw-r--r--config/x86_64/default3
-rw-r--r--patches.arch/s390-sles12sp3-99-01-cpu-alternatives.patch395
-rw-r--r--patches.arch/s390-sles12sp3-99-02-gmb.patch36
-rw-r--r--patches.arch/s390-sles12sp3-99-03-nobp.patch220
-rw-r--r--patches.drivers/ALSA-hda-fix-headset-mic-problem-for-Dell-machines-alc27436
-rw-r--r--patches.drivers/ALSA-hda-realtek-Fix-Dell-AIO-LineOut-issue17
-rw-r--r--patches.drivers/ALSA-hda-realtek-Fix-pincfg-for-Dell-XPS-13-93702
-rw-r--r--patches.drivers/ALSA-hda-realtek-Fix-typo-of-pincfg-for-Dell-quirk29
-rw-r--r--patches.fixes/0001-sunrpc-add-hash_cred-function-to-rpc_authops-struct.patch30
-rw-r--r--patches.fixes/0002-sunrpc-add-generic_auth-hash_cred-function.patch52
-rw-r--r--patches.fixes/0003-sunrpc-add-auth_unix-hash_cred-function.patch52
-rw-r--r--patches.fixes/0004-sunrpc-add-RPCSEC_GSS-hash_cred-function.patch42
-rw-r--r--patches.fixes/0005-sunrpc-replace-generic-auth_cred-hash-with-auth-spec.patch30
-rw-r--r--patches.fixes/0006-sunrpc-include-sup-groups-in-hash.patch73
-rw-r--r--patches.fixes/0007-nfs-limit-access-cache-size.patch60
-rw-r--r--patches.kabi/0001-sunrpc-add-hash_cred-function-to-rpc_authops-struct.kabi34
-rw-r--r--patches.kabi/kaiser-preserve-kabi.patch107
-rw-r--r--patches.suse/0001-locking-barriers-introduce-new-memory-barrier-gmb.patch48
-rw-r--r--patches.suse/0002-bpf-prevent-speculative-execution-in-eBPF-interprete.patch48
-rw-r--r--patches.suse/0004-uvcvideo-prevent-speculative-execution.patch26
-rw-r--r--patches.suse/0005-carl9170-prevent-speculative-execution.patch26
-rw-r--r--patches.suse/0006-p54-prevent-speculative-execution.patch26
-rw-r--r--patches.suse/0007-qla2xxx-prevent-speculative-execution.patch48
-rw-r--r--patches.suse/0008-cw1200-prevent-speculative-execution.patch26
-rw-r--r--patches.suse/0009-Thermal-int340x-prevent-speculative-execution.patch40
-rw-r--r--patches.suse/0010-userns-prevent-speculative-execution.patch30
-rw-r--r--patches.suse/0011-ipv6-prevent-speculative-execution.patch26
-rw-r--r--patches.suse/0012-fs-prevent-speculative-execution.patch30
-rw-r--r--patches.suse/0013-net-mpls-prevent-speculative-execution.patch27
-rw-r--r--patches.suse/0014-udf-prevent-speculative-execution.patch45
-rw-r--r--patches.suse/01-x86-feature-enable-the-x86-feature-to-control-speculation.patch71
-rw-r--r--patches.suse/02-x86-enter-add-macros-to-set-clear-ibrs-and-set-ibpb.patch87
-rw-r--r--patches.suse/03-x86-entry-use-ibrs-on-entry-to-kernel-space.patch184
-rw-r--r--patches.suse/04-x86-msr-move-native_-msr-u64-to-msr-h.patch69
-rw-r--r--patches.suse/05-x86-spec-add-ibrs-control-functions.patch71
-rw-r--r--patches.suse/06-x86-idle-toggle-ibrs-when-going-idle.patch98
-rw-r--r--patches.suse/07-x86-idle-disable-ibrs-when-offlining-a-cpu-and-re-enable-on-wakeup.patch42
-rw-r--r--patches.suse/08-x86-spec_ctrl-add-an-indirect-branch-predictor-barrier.patch33
-rw-r--r--patches.suse/09-x86-mm-set-ibpb-upon-context-switch.patch37
-rw-r--r--patches.suse/10-ptrace-add-a-new-thread-access-check.patch91
-rw-r--r--patches.suse/11-x86-mm-only-set-ibpb-when-the-new-thread-cannot-ptrace-current-thread.patch41
-rw-r--r--patches.suse/12-x86-entry-add-a-function-to-overwrite-the-rsb.patch110
-rw-r--r--patches.suse/13-x86-entry-stuff-rsb-for-entry-to-kernel-for-non-smep-platform.patch86
-rw-r--r--patches.suse/14-x86-kvm-add-msr_ia32_spec_ctrl-and-msr_ia32_pred_cmd-to-kvm.patch133
-rw-r--r--patches.suse/15-x86-kvm-flush-ibp-when-switching-vms.patch28
-rw-r--r--patches.suse/16-x86-kvm-toggle-ibrs-on-vm-entry-and-exit.patch32
-rw-r--r--patches.suse/17-x86-kvm-pad-rsb-on-vm-transition.patch96
-rw-r--r--patches.suse/18-x86-spec_ctrl-check-whether-ibrs-is-enabled-before-using-it.patch149
-rw-r--r--patches.suse/19-x86-spec_ctrl-check-whether-ibpb-is-enabled-before-using-it.patch59
-rw-r--r--patches.suse/20-x86-cpu-check-speculation-control-cpuid-bit.patch105
-rw-r--r--patches.suse/21-x86-spec-add-nospec-chicken-bit.patch53
-rw-r--r--patches.suse/22-x86-cpu-amd-add-speculative-control-support-for-amd.patch102
-rw-r--r--patches.suse/23-x86-spec-check-cpuid-direclty-post-microcode-reload-to-support-ibpb-feature.patch56
-rw-r--r--patches.suse/24-kvm-svm-do-not-intercept-new-speculative-control-msrs.patch30
-rw-r--r--patches.suse/25-x86-svm-set-ibrs-value-on-vm-entry-and-exit.patch81
-rw-r--r--patches.suse/26-x86-svm-set-ibpb-when-running-a-different-vcpu.patch61
-rw-r--r--patches.suse/27-kvm-x86-add-speculative-control-cpuid-support-for-guests.patch53
-rw-r--r--patches.suse/28-x86-svm-clobber-the-rsb-on-vm-exit.patch29
-rw-r--r--patches.suse/29-x86-svm-add-code-to-clear-registers-on-vm-exit.patch42
-rw-r--r--patches.suse/30-x86-cpu-amd-make-the-lfence-instruction-serialized.patch57
-rw-r--r--patches.suse/31-x86-cpu-amd-remove-now-unused-definition-of-mfence_rdtsc-feature.patch59
-rw-r--r--patches.suse/32-move-pti-feature-check-up.patch77
-rw-r--r--patches.suse/4.4-01-x86-mm-add-invpcid-helpers.patch95
-rw-r--r--patches.suse/4.4-02-x86-mm-fix-invpcid-asm-constraint.patch69
-rw-r--r--patches.suse/4.4-03-x86-mm-add-a-noinvpcid-boot-option-to-turn-off-invpcid.patch73
-rw-r--r--patches.suse/4.4-04-x86-mm-if-invpcid-is-available-use-it-to-flush-global-mappings.patch55
-rw-r--r--patches.suse/4.4-06-mm-mmu_context-sched-core-fix-mmu_context-h-assumption.patch41
-rw-r--r--patches.suse/4.4-07-sched-core-add-switch_mm_irqs_off-and-use-it-in-the-scheduler.patch74
-rw-r--r--patches.suse/4.4-08-x86-mm-build-arch-x86-mm-tlb-c-even-on-smp.patch64
-rw-r--r--patches.suse/4.4-09-x86-mm-sched-core-uninline-switch_mm.patch244
-rw-r--r--patches.suse/4.4-10-x86-mm-sched-core-turn-off-irqs-in-switch_mm.patch64
-rw-r--r--patches.suse/4.4-11-arm-hide-finish_arch_post_lock_switch-from-modules.patch93
-rw-r--r--patches.suse/4.4-12-sched-core-idle_task_exit-shouldn-t-use-switch_mm_irqs_off.patch42
-rw-r--r--patches.suse/4.4-15-x86-mm-remove-flush_tlb-and-flush_tlb_current_task.patch98
-rw-r--r--patches.suse/4.4-16-x86-mm-make-flush_tlb_mm_range-more-predictable.patch78
-rw-r--r--patches.suse/4.4-17-x86-mm-reimplement-flush_tlb_page-using-flush_tlb_mm_range.patch101
-rw-r--r--patches.suse/4.4-18-x86-mm-remove-the-up-asm-tlbflush-h-code-always-use-the-formerly-smp-code.patch297
-rw-r--r--patches.suse/4.4-20-x86-mm-add-the-nopcid-boot-option-to-turn-off-pcid.patch70
-rw-r--r--patches.suse/4.4-21-x86-mm-enable-cr4-pcide-on-supported-systems.patch105
-rw-r--r--patches.suse/4.4-22-KAISER-Kernel-Address-Isolation.patch972
-rw-r--r--patches.suse/4.4-23-kaiser-merged-update.patch1303
-rw-r--r--patches.suse/4.4-24-kaiser-do-not-set-_PAGE_NX-on-pgd_none.patch188
-rw-r--r--patches.suse/4.4-25-kaiser-stack-map-PAGE_SIZE-at-THREAD_SIZE-PAGE_SIZE.patch139
-rw-r--r--patches.suse/4.4-26-kaiser-fix-build-and-FIXME-in-alloc_ldt_struct.patch58
-rw-r--r--patches.suse/4.4-27-kaiser-KAISER-depends-on-SMP.patch59
-rw-r--r--patches.suse/4.4-28-kaiser-fix-regs-to-do_nmi-ifndef-CONFIG_KAISER.patch72
-rw-r--r--patches.suse/4.4-29-kaiser-fix-perf-crashes.patch158
-rw-r--r--patches.suse/4.4-30-kaiser-ENOMEM-if-kaiser_pagetable_walk-NULL.patch52
-rw-r--r--patches.suse/4.4-31-kaiser-tidied-up-asm-kaiser.h-somewhat.patch110
-rw-r--r--patches.suse/4.4-32-kaiser-tidied-up-kaiser_add-remove_mapping-slightly.patch50
-rw-r--r--patches.suse/4.4-33-kaiser-kaiser_remove_mapping-move-along-the-pgd.patch50
-rw-r--r--patches.suse/4.4-34-kaiser-align-addition-to-x86-mm-Makefile.patch26
-rw-r--r--patches.suse/4.4-35-kaiser-cleanups-while-trying-for-gold-link.patch134
-rw-r--r--patches.suse/4.4-36-kaiser-name-that-0x1000-KAISER_SHADOW_PGD_OFFSET.patch66
-rw-r--r--patches.suse/4.4-37-kaiser-delete-KAISER_REAL_SWITCH-option.patch79
-rw-r--r--patches.suse/4.4-38-kaiser-vmstat-show-NR_KAISERTABLE-as-nr_overhead.patch111
-rw-r--r--patches.suse/4.4-39-kaiser-enhanced-by-kernel-and-user-PCIDs.patch399
-rw-r--r--patches.suse/4.4-40-kaiser-load_new_mm_cr3-let-SWITCH_USER_CR3-flush-use.patch394
-rw-r--r--patches.suse/4.4-41-kaiser-PCID-0-for-kernel-and-128-for-user.patch129
-rw-r--r--patches.suse/4.4-42-kaiser-x86_cr3_pcid_noflush-and-x86_cr3_pcid_user.patch141
-rw-r--r--patches.suse/4.4-43-kaiser-paranoid_entry-pass-cr3-need-to-paranoid_exit.patch154
-rw-r--r--patches.suse/4.4-44-kaiser-_pgd_alloc-without-__GFP_REPEAT-to-avoid-stal.patch68
-rw-r--r--patches.suse/4.4-45-kaiser-fix-unlikely-error-in-alloc_ldt_struct.patch38
-rw-r--r--patches.suse/4.4-46-x86-mm-64-fix-reboot-interaction-with-cr4-pcide.patch41
-rw-r--r--patches.suse/4.4-47-kaiser-add-nokaiser-boot-option-using-ALTERNATIVE.patch639
-rw-r--r--patches.suse/4.4-48-rename-and-simplify-feature-setting.patch95
-rw-r--r--patches.suse/4.4-49-x86-boot-add-early-cmdline-parsing-for-options-with-arguments.patch171
-rw-r--r--patches.suse/4.4-50-kaiser-add_pti_cmdline_option_and_documentation.patch119
-rw-r--r--patches.suse/4.4-51-kaiser-use-ALTERNATIVE-instead-of-x86_cr3_pcid_noflu.patch129
-rw-r--r--patches.suse/4.4-52-kaiser-drop-is_atomic-arg-to-kaiser_pagetable_walk.patch52
-rw-r--r--patches.suse/4.4-53-kaiser-asm-tlbflush.h-handle-noPGE-at-lower-level.patch90
-rw-r--r--patches.suse/4.4-54-kaiser-kaiser_flush_tlb_on_return_to_user-check-PCID.patch85
-rw-r--r--patches.suse/4.4-55-x86-paravirt-dont-patch-flush_tlb_single.patch65
-rw-r--r--patches.suse/4.4-57-Reenable_PARAVIRT.patch25
-rw-r--r--patches.suse/4.4-58-kaiser-disable-on-xen.patch39
-rw-r--r--patches.suse/kgr-0002-livepatch-add-infrastructure.patch32
-rw-r--r--patches.suse/powerpc-Secure-memory-rfi-flush-SLE12SP3.patch558
-rw-r--r--patches.suse/powerpc-add-gmb.patch17
-rw-r--r--series.conf122
126 files changed, 12860 insertions, 34 deletions
diff --git a/blacklist.conf b/blacklist.conf
index 1d3204aca5..4bca5d3728 100644
--- a/blacklist.conf
+++ b/blacklist.conf
@@ -354,3 +354,6 @@ a04b5de5050ab8b891128eb2c47a0916fe8622e1 # uuid_t is not present
3f3295709edea6268ff1609855f498035286af73 # optimization only
5bd933fe4481688d595ae1dd0440006c8675a1a8 # breaks ABI
5a8e12678c767ccf8bb16d6237569e4a707d655b # not applicable
+5dd0b16cdaff9b94da06074d5888b03235c0bf17 # we don't support CONFIG_SMP=n
+c7ad5ad297e644601747d6dbee978bf85e14f7bc # cleanup not needed once CR4.PCIDE is setup correctly during boot
+f34902c5c6c08024371202a680ce69f2d488776d # not needed in trees where PCID is used only for KAISER, and therefore always 0 for kernel
diff --git a/config/arm64/default b/config/arm64/default
index 54a9fd2380..e9769d6b3c 100644
--- a/config/arm64/default
+++ b/config/arm64/default
@@ -1,6 +1,6 @@
#
# Automatically generated file; DO NOT EDIT.
-# Linux/arm64 4.4.99 Kernel Configuration
+# Linux/arm64 4.4.103 Kernel Configuration
#
CONFIG_ARM64=y
CONFIG_64BIT=y
diff --git a/config/ppc64le/debug b/config/ppc64le/debug
index 58b6d0649e..3577b578c9 100644
--- a/config/ppc64le/debug
+++ b/config/ppc64le/debug
@@ -1,6 +1,6 @@
#
# Automatically generated file; DO NOT EDIT.
-# Linux/powerpc 4.4.99 Kernel Configuration
+# Linux/powerpc 4.4.103 Kernel Configuration
#
CONFIG_PPC64=y
diff --git a/config/ppc64le/default b/config/ppc64le/default
index a0422547fb..62f7ad1a32 100644
--- a/config/ppc64le/default
+++ b/config/ppc64le/default
@@ -1,6 +1,6 @@
#
# Automatically generated file; DO NOT EDIT.
-# Linux/powerpc 4.4.99 Kernel Configuration
+# Linux/powerpc 4.4.103 Kernel Configuration
#
CONFIG_PPC64=y
diff --git a/config/s390x/default b/config/s390x/default
index 76177f900c..7a8921e4e4 100644
--- a/config/s390x/default
+++ b/config/s390x/default
@@ -1,6 +1,6 @@
#
# Automatically generated file; DO NOT EDIT.
-# Linux/s390 4.4.99 Kernel Configuration
+# Linux/s390 4.4.103 Kernel Configuration
#
CONFIG_MMU=y
CONFIG_ZONE_DMA=y
diff --git a/config/s390x/zfcpdump b/config/s390x/zfcpdump
index 11931d5ee7..3b8af15f09 100644
--- a/config/s390x/zfcpdump
+++ b/config/s390x/zfcpdump
@@ -1,6 +1,6 @@
#
# Automatically generated file; DO NOT EDIT.
-# Linux/s390 4.4.99 Kernel Configuration
+# Linux/s390 4.4.103 Kernel Configuration
#
CONFIG_MMU=y
CONFIG_ZONE_DMA=y
diff --git a/config/x86_64/debug b/config/x86_64/debug
index 9b40ea5041..6d8f239e14 100644
--- a/config/x86_64/debug
+++ b/config/x86_64/debug
@@ -1,6 +1,6 @@
#
# Automatically generated file; DO NOT EDIT.
-# Linux/x86_64 4.4.99 Kernel Configuration
+# Linux/x86_64 4.4.103 Kernel Configuration
#
CONFIG_64BIT=y
CONFIG_X86_64=y
@@ -7545,6 +7545,7 @@ CONFIG_TRUSTED_KEYS=m
CONFIG_ENCRYPTED_KEYS=y
# CONFIG_SECURITY_DMESG_RESTRICT is not set
CONFIG_SECURITY=y
+CONFIG_KAISER=y
CONFIG_SECURITYFS=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_NETWORK_XFRM=y
diff --git a/config/x86_64/default b/config/x86_64/default
index 2a30876b7b..808f8f98c8 100644
--- a/config/x86_64/default
+++ b/config/x86_64/default
@@ -1,6 +1,6 @@
#
# Automatically generated file; DO NOT EDIT.
-# Linux/x86_64 4.4.99 Kernel Configuration
+# Linux/x86_64 4.4.103 Kernel Configuration
#
CONFIG_64BIT=y
CONFIG_X86_64=y
@@ -7536,6 +7536,7 @@ CONFIG_TRUSTED_KEYS=m
CONFIG_ENCRYPTED_KEYS=y
# CONFIG_SECURITY_DMESG_RESTRICT is not set
CONFIG_SECURITY=y
+CONFIG_KAISER=y
CONFIG_SECURITYFS=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_NETWORK_XFRM=y
diff --git a/patches.arch/s390-sles12sp3-99-01-cpu-alternatives.patch b/patches.arch/s390-sles12sp3-99-01-cpu-alternatives.patch
new file mode 100644
index 0000000000..e131286bc8
--- /dev/null
+++ b/patches.arch/s390-sles12sp3-99-01-cpu-alternatives.patch
@@ -0,0 +1,395 @@
+From: Vasily Gorbik <gor@linux.vnet.ibm.com>
+Subject: s390: introduce CPU alternatives
+Git-commit: 686140a1a9c41d85a4212a1c26d671139b76404b
+Patch-mainline: v4.15-rc1
+References: bsc#1068032
+
+Implement CPU alternatives, which allows to optionally patch newer
+instructions at runtime, based on CPU facilities availability.
+
+A new kernel boot parameter "noaltinstr" disables patching.
+
+Current implementation is derived from x86 alternatives. Although
+ideal instructions padding (when altinstr is longer then oldinstr)
+is added at compile time, and no oldinstr nops optimization has to be
+done at runtime. Also couple of compile time sanity checks are done:
+1. oldinstr and altinstr must be <= 254 bytes long,
+2. oldinstr and altinstr must not have an odd length.
+
+alternative(oldinstr, altinstr, facility);
+alternative_2(oldinstr, altinstr1, facility1, altinstr2, facility2);
+
+Both compile time and runtime padding consists of either 6/4/2 bytes nop
+or a jump (brcl) + 2 bytes nop filler if padding is longer then 6 bytes.
+
+.altinstructions and .altinstr_replacement sections are part of
+__init_begin : __init_end region and are freed after initialization.
+
+Signed-off-by: Vasily Gorbik <gor@linux.vnet.ibm.com>
+Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+---
+ arch/s390/include/asm/alternative.h | 149 ++++++++++++++++++++++++++++++++++++
+ arch/s390/kernel/Makefile | 2
+ arch/s390/kernel/alternative.c | 110 ++++++++++++++++++++++++++
+ arch/s390/kernel/module.c | 13 +++
+ arch/s390/kernel/setup.c | 3
+ arch/s390/kernel/vmlinux.lds.S | 23 +++++
+ 6 files changed, 299 insertions(+), 1 deletion(-)
+
+--- /dev/null
++++ b/arch/s390/include/asm/alternative.h
+@@ -0,0 +1,149 @@
++#ifndef _ASM_S390_ALTERNATIVE_H
++#define _ASM_S390_ALTERNATIVE_H
++
++#ifndef __ASSEMBLY__
++
++#include <linux/types.h>
++#include <linux/stddef.h>
++#include <linux/stringify.h>
++
++struct alt_instr {
++ s32 instr_offset; /* original instruction */
++ s32 repl_offset; /* offset to replacement instruction */
++ u16 facility; /* facility bit set for replacement */
++ u8 instrlen; /* length of original instruction */
++ u8 replacementlen; /* length of new instruction */
++} __packed;
++
++extern void apply_alternative_instructions(void);
++extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
++
++/*
++ * |661: |662: |6620 |663:
++ * +-----------+---------------------+
++ * | oldinstr | oldinstr_padding |
++ * | +----------+----------+
++ * | | | |
++ * | | >6 bytes |6/4/2 nops|
++ * | |6 bytes jg----------->
++ * +-----------+---------------------+
++ * ^^ static padding ^^
++ *
++ * .altinstr_replacement section
++ * +---------------------+-----------+
++ * |6641: |6651:
++ * | alternative instr 1 |
++ * +-----------+---------+- - - - - -+
++ * |6642: |6652: |
++ * | alternative instr 2 | padding
++ * +---------------------+- - - - - -+
++ * ^ runtime ^
++ *
++ * .altinstructions section
++ * +---------------------------------+
++ * | alt_instr entries for each |
++ * | alternative instr |
++ * +---------------------------------+
++ */
++
++#define b_altinstr(num) "664"#num
++#define e_altinstr(num) "665"#num
++
++#define e_oldinstr_pad_end "663"
++#define oldinstr_len "662b-661b"
++#define oldinstr_total_len e_oldinstr_pad_end"b-661b"
++#define altinstr_len(num) e_altinstr(num)"b-"b_altinstr(num)"b"
++#define oldinstr_pad_len(num) \
++ "-(((" altinstr_len(num) ")-(" oldinstr_len ")) > 0) * " \
++ "((" altinstr_len(num) ")-(" oldinstr_len "))"
++
++#define INSTR_LEN_SANITY_CHECK(len) \
++ ".if " len " > 254\n" \
++ "\t.error \"cpu alternatives does not support instructions " \
++ "blocks > 254 bytes\"\n" \
++ ".endif\n" \
++ ".if (" len ") %% 2\n" \
++ "\t.error \"cpu alternatives instructions length is odd\"\n" \
++ ".endif\n"
++
++#define OLDINSTR_PADDING(oldinstr, num) \
++ ".if " oldinstr_pad_len(num) " > 6\n" \
++ "\tjg " e_oldinstr_pad_end "f\n" \
++ "6620:\n" \
++ "\t.fill (" oldinstr_pad_len(num) " - (6620b-662b)) / 2, 2, 0x0700\n" \
++ ".else\n" \
++ "\t.fill " oldinstr_pad_len(num) " / 6, 6, 0xc0040000\n" \
++ "\t.fill " oldinstr_pad_len(num) " %% 6 / 4, 4, 0x47000000\n" \
++ "\t.fill " oldinstr_pad_len(num) " %% 6 %% 4 / 2, 2, 0x0700\n" \
++ ".endif\n"
++
++#define OLDINSTR(oldinstr, num) \
++ "661:\n\t" oldinstr "\n662:\n" \
++ OLDINSTR_PADDING(oldinstr, num) \
++ e_oldinstr_pad_end ":\n" \
++ INSTR_LEN_SANITY_CHECK(oldinstr_len)
++
++#define OLDINSTR_2(oldinstr, num1, num2) \
++ "661:\n\t" oldinstr "\n662:\n" \
++ ".if " altinstr_len(num1) " < " altinstr_len(num2) "\n" \
++ OLDINSTR_PADDING(oldinstr, num2) \
++ ".else\n" \
++ OLDINSTR_PADDING(oldinstr, num1) \
++ ".endif\n" \
++ e_oldinstr_pad_end ":\n" \
++ INSTR_LEN_SANITY_CHECK(oldinstr_len)
++
++#define ALTINSTR_ENTRY(facility, num) \
++ "\t.long 661b - .\n" /* old instruction */ \
++ "\t.long " b_altinstr(num)"b - .\n" /* alt instruction */ \
++ "\t.word " __stringify(facility) "\n" /* facility bit */ \
++ "\t.byte " oldinstr_total_len "\n" /* source len */ \
++ "\t.byte " altinstr_len(num) "\n" /* alt instruction len */
++
++#define ALTINSTR_REPLACEMENT(altinstr, num) /* replacement */ \
++ b_altinstr(num)":\n\t" altinstr "\n" e_altinstr(num) ":\n" \
++ INSTR_LEN_SANITY_CHECK(altinstr_len(num))
++
++/* alternative assembly primitive: */
++#define ALTERNATIVE(oldinstr, altinstr, facility) \
++ ".pushsection .altinstr_replacement, \"ax\"\n" \
++ ALTINSTR_REPLACEMENT(altinstr, 1) \
++ ".popsection\n" \
++ OLDINSTR(oldinstr, 1) \
++ ".pushsection .altinstructions,\"a\"\n" \
++ ALTINSTR_ENTRY(facility, 1) \
++ ".popsection\n"
++
++#define ALTERNATIVE_2(oldinstr, altinstr1, facility1, altinstr2, facility2)\
++ ".pushsection .altinstr_replacement, \"ax\"\n" \
++ ALTINSTR_REPLACEMENT(altinstr1, 1) \
++ ALTINSTR_REPLACEMENT(altinstr2, 2) \
++ ".popsection\n" \
++ OLDINSTR_2(oldinstr, 1, 2) \
++ ".pushsection .altinstructions,\"a\"\n" \
++ ALTINSTR_ENTRY(facility1, 1) \
++ ALTINSTR_ENTRY(facility2, 2) \
++ ".popsection\n"
++
++/*
++ * Alternative instructions for different CPU types or capabilities.
++ *
++ * This allows to use optimized instructions even on generic binary
++ * kernels.
++ *
++ * oldinstr is padded with jump and nops at compile time if altinstr is
++ * longer. altinstr is padded with jump and nops at run-time during patching.
++ *
++ * For non barrier like inlines please define new variants
++ * without volatile and memory clobber.
++ */
++#define alternative(oldinstr, altinstr, facility) \
++ asm volatile(ALTERNATIVE(oldinstr, altinstr, facility) : : : "memory")
++
++#define alternative_2(oldinstr, altinstr1, facility1, altinstr2, facility2) \
++ asm volatile(ALTERNATIVE_2(oldinstr, altinstr1, facility1, \
++ altinstr2, facility2) ::: "memory")
++
++#endif /* __ASSEMBLY__ */
++
++#endif /* _ASM_S390_ALTERNATIVE_H */
+--- a/arch/s390/kernel/Makefile
++++ b/arch/s390/kernel/Makefile
+@@ -44,7 +44,7 @@ obj-y += processor.o sys_s390.o ptrace.o
+ obj-y += debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o
+ obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o
+ obj-y += runtime_instr.o cache.o dumpstack.o
+-obj-y += entry.o reipl.o relocate_kernel.o
++obj-y += entry.o reipl.o relocate_kernel.o alternative.o
+
+ extra-y += head.o head64.o vmlinux.lds
+
+--- /dev/null
++++ b/arch/s390/kernel/alternative.c
+@@ -0,0 +1,110 @@
++#include <linux/module.h>
++#include <asm/alternative.h>
++#include <asm/facility.h>
++
++#define MAX_PATCH_LEN (255 - 1)
++
++static int __initdata_or_module alt_instr_disabled;
++
++static int __init disable_alternative_instructions(char *str)
++{
++ alt_instr_disabled = 1;
++ return 0;
++}
++
++early_param("noaltinstr", disable_alternative_instructions);
++
++struct brcl_insn {
++ u16 opc;
++ s32 disp;
++} __packed;
++
++static u16 __initdata_or_module nop16 = 0x0700;
++static u32 __initdata_or_module nop32 = 0x47000000;
++static struct brcl_insn __initdata_or_module nop48 = {
++ 0xc004, 0
++};
++
++static const void * __initdata_or_module nops[] = {
++ &nop16,
++ &nop32,
++ &nop48
++};
++
++static void __init_or_module add_jump_padding(void *insns, unsigned int len)
++{
++ struct brcl_insn brcl = {
++ 0xc0f4,
++ len / 2
++ };
++
++ memcpy(insns, &brcl, sizeof(brcl));
++ insns += sizeof(brcl);
++ len -= sizeof(brcl);
++
++ while (len > 0) {
++ memcpy(insns, &nop16, 2);
++ insns += 2;
++ len -= 2;
++ }
++}
++
++static void __init_or_module add_padding(void *insns, unsigned int len)
++{
++ if (len > 6)
++ add_jump_padding(insns, len);
++ else if (len >= 2)
++ memcpy(insns, nops[len / 2 - 1], len);
++}
++
++static void __init_or_module __apply_alternatives(struct alt_instr *start,
++ struct alt_instr *end)
++{
++ struct alt_instr *a;
++ u8 *instr, *replacement;
++ u8 insnbuf[MAX_PATCH_LEN];
++
++ /*
++ * The scan order should be from start to end. A later scanned
++ * alternative code can overwrite previously scanned alternative code.
++ */
++ for (a = start; a < end; a++) {
++ int insnbuf_sz = 0;
++
++ instr = (u8 *)&a->instr_offset + a->instr_offset;
++ replacement = (u8 *)&a->repl_offset + a->repl_offset;
++
++ if (!test_facility(a->facility))
++ continue;
++
++ if (unlikely(a->instrlen % 2 || a->replacementlen % 2)) {
++ WARN_ONCE(1, "cpu alternatives instructions length is "
++ "odd, skipping patching\n");
++ continue;
++ }
++
++ memcpy(insnbuf, replacement, a->replacementlen);
++ insnbuf_sz = a->replacementlen;
++
++ if (a->instrlen > a->replacementlen) {
++ add_padding(insnbuf + a->replacementlen,
++ a->instrlen - a->replacementlen);
++ insnbuf_sz += a->instrlen - a->replacementlen;
++ }
++
++ s390_kernel_write(instr, insnbuf, insnbuf_sz);
++ }
++}
++
++void __init_or_module apply_alternatives(struct alt_instr *start,
++ struct alt_instr *end)
++{
++ if (!alt_instr_disabled)
++ __apply_alternatives(start, end);
++}
++
++extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
++void __init apply_alternative_instructions(void)
++{
++ apply_alternatives(__alt_instructions, __alt_instructions_end);
++}
+--- a/arch/s390/kernel/module.c
++++ b/arch/s390/kernel/module.c
+@@ -31,6 +31,7 @@
+ #include <linux/kernel.h>
+ #include <linux/moduleloader.h>
+ #include <linux/bug.h>
++#include <asm/alternative.h>
+
+ #if 0
+ #define DEBUGP printk
+@@ -424,6 +425,18 @@ int module_finalize(const Elf_Ehdr *hdr,
+ const Elf_Shdr *sechdrs,
+ struct module *me)
+ {
++ const Elf_Shdr *s;
++ char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
++
++ for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
++ if (!strcmp(".altinstructions", secstrings + s->sh_name)) {
++ /* patch .altinstructions */
++ void *aseg = (void *)s->sh_addr;
++
++ apply_alternatives(aseg, aseg + s->sh_size);
++ }
++ }
++
+ jump_label_apply_nops(me);
+ vfree(me->arch.syminfo);
+ me->arch.syminfo = NULL;
+--- a/arch/s390/kernel/setup.c
++++ b/arch/s390/kernel/setup.c
+@@ -63,6 +63,7 @@
+ #include <asm/sclp.h>
+ #include <asm/sysinfo.h>
+ #include <asm/numa.h>
++#include <asm/alternative.h>
+ #include "entry.h"
+
+ /*
+@@ -897,6 +898,8 @@ void __init setup_arch(char **cmdline_p)
+ conmode_default();
+ set_preferred_console();
+
++ apply_alternative_instructions();
++
+ /* Setup zfcpdump support */
+ setup_zfcpdump();
+
+--- a/arch/s390/kernel/vmlinux.lds.S
++++ b/arch/s390/kernel/vmlinux.lds.S
+@@ -72,6 +72,29 @@ SECTIONS
+ EXIT_DATA
+ }
+
++ /*
++ * struct alt_inst entries. From the header (alternative.h):
++ * "Alternative instructions for different CPU types or capabilities"
++ * Think locking instructions on spinlocks.
++ * Note, that it is a part of __init region.
++ */
++ . = ALIGN(8);
++ .altinstructions : {
++ __alt_instructions = .;
++ *(.altinstructions)
++ __alt_instructions_end = .;
++ }
++
++ /*
++ * And here are the replacement instructions. The linker sticks
++ * them as binary blobs. The .altinstructions has enough data to
++ * get the address and the length of them to patch the kernel safely.
++ * Note, that it is a part of __init region.
++ */
++ .altinstr_replacement : {
++ *(.altinstr_replacement)
++ }
++
+ /* early.c uses stsi, which requires page aligned data. */
+ . = ALIGN(PAGE_SIZE);
+ INIT_DATA_SECTION(0x100)
diff --git a/patches.arch/s390-sles12sp3-99-02-gmb.patch b/patches.arch/s390-sles12sp3-99-02-gmb.patch
new file mode 100644
index 0000000000..071c37c270
--- /dev/null
+++ b/patches.arch/s390-sles12sp3-99-02-gmb.patch
@@ -0,0 +1,36 @@
+From: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Subject: s390/spinlock: add gmb memory barrier
+References: bsc#1068032
+Patch-mainline: Not yet, under development
+
+Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+---
+ arch/s390/include/asm/barrier.h | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/arch/s390/include/asm/barrier.h
++++ b/arch/s390/include/asm/barrier.h
+@@ -7,6 +7,8 @@
+ #ifndef __ASM_BARRIER_H
+ #define __ASM_BARRIER_H
+
++#include <asm/alternative.h>
++
+ /*
+ * Force strict CPU ordering.
+ * And yes, this is required on UP too when we're talking
+@@ -22,6 +24,13 @@
+
+ #define mb() do { asm volatile(__ASM_BARRIER : : : "memory"); } while (0)
+
++static inline void gmb(void)
++{
++ asm volatile(
++ ALTERNATIVE("", ".long 0xb2e8f000", 81)
++ : : : "memory");
++}
++
+ #define rmb() barrier()
+ #define wmb() barrier()
+ #define dma_rmb() mb()
diff --git a/patches.arch/s390-sles12sp3-99-03-nobp.patch b/patches.arch/s390-sles12sp3-99-03-nobp.patch
new file mode 100644
index 0000000000..764a2ebf91
--- /dev/null
+++ b/patches.arch/s390-sles12sp3-99-03-nobp.patch
@@ -0,0 +1,220 @@
+From: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Subject: s390: add ppa to system call and program check path
+References: bsc#1068032
+Patch-mainline: Not yet, under development
+
+Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+---
+ arch/s390/include/asm/processor.h | 1
+ arch/s390/kernel/alternative.c | 13 ++++++++++
+ arch/s390/kernel/entry.S | 47 ++++++++++++++++++++++++++++++++++++++
+ arch/s390/kernel/ipl.c | 1
+ arch/s390/kernel/smp.c | 2 +
+ arch/s390/kernel/vmlinux.lds.S | 3 ++
+ 6 files changed, 67 insertions(+)
+
+--- a/arch/s390/include/asm/processor.h
++++ b/arch/s390/include/asm/processor.h
+@@ -69,6 +69,7 @@ extern void s390_adjust_jiffies(void);
+ extern const struct seq_operations cpuinfo_op;
+ extern int sysctl_ieee_emulation_warnings;
+ extern void execve_tail(void);
++extern void __bpon(void);
+
+ /*
+ * User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit.
+--- a/arch/s390/kernel/alternative.c
++++ b/arch/s390/kernel/alternative.c
+@@ -14,6 +14,19 @@ static int __init disable_alternative_in
+
+ early_param("noaltinstr", disable_alternative_instructions);
+
++extern struct alt_instr __alt_nobp[], __alt_nobp_end[];
++static int __init nobp_setup(char *str)
++{
++ bool enabled;
++ int rc;
++
++ rc = kstrtobool(str, &enabled);
++ if (!rc && enabled)
++ apply_alternatives(__alt_nobp, __alt_nobp_end);
++ return rc;
++}
++__setup("nobp=", nobp_setup);
++
+ struct brcl_insn {
+ u16 opc;
+ s32 disp;
+--- a/arch/s390/kernel/entry.S
++++ b/arch/s390/kernel/entry.S
+@@ -171,8 +171,41 @@ _PIF_WORK = (_PIF_PER_TRAP)
+ tm off+\addr, \mask
+ .endm
+
++ .macro BPOFF
++ .pushsection .altinstr_replacement, "ax"
++660: .long 0xb2e8c000
++ .popsection
++661: .long 0x47000000
++ .pushsection .altnobp, "a"
++ .long 661b - .
++ .long 660b - .
++ .word 82
++ .byte 4
++ .byte 4
++ .popsection
++ .endm
++
++ .macro BPON
++ .pushsection .altinstr_replacement, "ax"
++662: .long 0xb2e8d000
++ .popsection
++663: .long 0x47000000
++ .pushsection .altnobp, "a"
++ .long 663b - .
++ .long 662b - .
++ .word 82
++ .byte 4
++ .byte 4
++ .popsection
++ .endm
++
+ .section .kprobes.text, "ax"
+
++ENTRY(__bpon)
++ .globl __bpon
++ BPON
++ br %r14
++
+ /*
+ * Scheduler resume function, called by switch_to
+ * gpr2 = (task_struct *) prev
+@@ -233,7 +266,9 @@ ENTRY(sie64a)
+ jnz .Lsie_skip
+ TSTMSK __LC_CPU_FLAGS,_CIF_FPU
+ jo .Lsie_skip # exit if fp/vx regs changed
++ BPON
+ sie 0(%r14)
++ BPOFF
+ .Lsie_skip:
+ ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
+ lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
+@@ -277,6 +312,7 @@ ENTRY(system_call)
+ stpt __LC_SYNC_ENTER_TIMER
+ .Lsysc_stmg:
+ stmg %r8,%r15,__LC_SAVE_AREA_SYNC
++ BPOFF
+ lg %r10,__LC_LAST_BREAK
+ lg %r12,__LC_THREAD_INFO
+ lghi %r14,_PIF_SYSCALL
+@@ -326,6 +362,7 @@ ENTRY(system_call)
+ lg %r14,__LC_VDSO_PER_CPU
+ lmg %r0,%r10,__PT_R0(%r11)
+ mvc __LC_RETURN_PSW(16),__PT_PSW(%r11)
++ BPON
+ .Lsysc_exit_timer:
+ stpt __LC_EXIT_TIMER
+ mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
+@@ -484,6 +521,7 @@ ENTRY(kernel_thread_starter)
+
+ ENTRY(pgm_check_handler)
+ stpt __LC_SYNC_ENTER_TIMER
++ BPOFF
+ stmg %r8,%r15,__LC_SAVE_AREA_SYNC
+ lg %r10,__LC_LAST_BREAK
+ lg %r12,__LC_THREAD_INFO
+@@ -573,6 +611,7 @@ ENTRY(pgm_check_handler)
+ ENTRY(io_int_handler)
+ STCK __LC_INT_CLOCK
+ stpt __LC_ASYNC_ENTER_TIMER
++ BPOFF
+ stmg %r8,%r15,__LC_SAVE_AREA_ASYNC
+ lg %r10,__LC_LAST_BREAK
+ lg %r12,__LC_THREAD_INFO
+@@ -614,9 +653,13 @@ ENTRY(io_int_handler)
+ lg %r14,__LC_VDSO_PER_CPU
+ lmg %r0,%r10,__PT_R0(%r11)
+ mvc __LC_RETURN_PSW(16),__PT_PSW(%r11)
++ tm __PT_PSW+1(%r11),0x01 # returning to user ?
++ jno .Lio_exit_kernel
++ BPON
+ .Lio_exit_timer:
+ stpt __LC_EXIT_TIMER
+ mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
++.Lio_exit_kernel:
+ lmg %r11,%r15,__PT_R11(%r11)
+ lpswe __LC_RETURN_PSW
+ .Lio_done:
+@@ -749,6 +792,7 @@ ENTRY(io_int_handler)
+ ENTRY(ext_int_handler)
+ STCK __LC_INT_CLOCK
+ stpt __LC_ASYNC_ENTER_TIMER
++ BPOFF
+ stmg %r8,%r15,__LC_SAVE_AREA_ASYNC
+ lg %r10,__LC_LAST_BREAK
+ lg %r12,__LC_THREAD_INFO
+@@ -787,6 +831,7 @@ ENTRY(psw_idle)
+ .insn rsy,0xeb0000000017,%r1,5,__SF_EMPTY+16(%r15)
+ .Lpsw_idle_stcctm:
+ #endif
++ BPON
+ STCK __CLOCK_IDLE_ENTER(%r2)
+ stpt __TIMER_IDLE_ENTER(%r2)
+ .Lpsw_idle_lpsw:
+@@ -891,6 +936,7 @@ load_fpu_regs:
+ */
+ ENTRY(mcck_int_handler)
+ STCK __LC_MCCK_CLOCK
++ BPOFF
+ la %r1,4095 # revalidate r1
+ spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer
+ lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs
+@@ -947,6 +993,7 @@ ENTRY(mcck_int_handler)
+ mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW
+ tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
+ jno 0f
++ BPON
+ stpt __LC_EXIT_TIMER
+ mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
+ 0: lmg %r11,%r15,__PT_R11(%r11)
+--- a/arch/s390/kernel/ipl.c
++++ b/arch/s390/kernel/ipl.c
+@@ -563,6 +563,7 @@ static struct kset *ipl_kset;
+
+ static void __ipl_run(void *unused)
+ {
++ __bpon();
+ diag308(DIAG308_IPL, NULL);
+ if (MACHINE_IS_VM)
+ __cpcmd("IPL", NULL, 0, NULL);
+--- a/arch/s390/kernel/smp.c
++++ b/arch/s390/kernel/smp.c
+@@ -299,6 +299,7 @@ static void pcpu_delegate(struct pcpu *p
+ mem_assign_absolute(lc->restart_fn, (unsigned long) func);
+ mem_assign_absolute(lc->restart_data, (unsigned long) data);
+ mem_assign_absolute(lc->restart_source, source_cpu);
++ __bpon();
+ asm volatile(
+ "0: sigp 0,%0,%2 # sigp restart to target cpu\n"
+ " brc 2,0b # busy, try again\n"
+@@ -884,6 +885,7 @@ void __cpu_die(unsigned int cpu)
+ void __noreturn cpu_die(void)
+ {
+ idle_task_exit();
++ __bpon();
+ pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0);
+ for (;;) ;
+ }
+--- a/arch/s390/kernel/vmlinux.lds.S
++++ b/arch/s390/kernel/vmlinux.lds.S
+@@ -83,6 +83,9 @@ SECTIONS
+ __alt_instructions = .;
+ *(.altinstructions)
+ __alt_instructions_end = .;
++ __alt_nobp = .;
++ *(.altnobp)
++ __alt_nobp_end = .;
+ }
+
+ /*
diff --git a/patches.drivers/ALSA-hda-fix-headset-mic-problem-for-Dell-machines-alc274 b/patches.drivers/ALSA-hda-fix-headset-mic-problem-for-Dell-machines-alc274
new file mode 100644
index 0000000000..5dec5c98c2
--- /dev/null
+++ b/patches.drivers/ALSA-hda-fix-headset-mic-problem-for-Dell-machines-alc274
@@ -0,0 +1,36 @@
+From 75ee94b20b46459e3d29f5ac2c3af3cebdeef777 Mon Sep 17 00:00:00 2001
+From: Hui Wang <hui.wang@canonical.com>
+Date: Thu, 9 Nov 2017 08:48:08 +0800
+Subject: [PATCH] ALSA: hda - fix headset mic problem for Dell machines with alc274
+Git-commit: 75ee94b20b46459e3d29f5ac2c3af3cebdeef777
+Patch-mainline: 4.14
+References: bsc#1031717
+
+Confirmed with Kailang of Realtek, the pin 0x19 is for Headset Mic, and
+the pin 0x1a is for Headphone Mic, he suggested to apply
+ALC269_FIXUP_DELL1_MIC_NO_PRESENCE to fix this problem. And we
+verified applying this FIXUP can fix this problem.
+
+Cc: <stable@vger.kernel.org>
+Cc: Kailang Yang <kailang@realtek.com>
+Signed-off-by: Hui Wang <hui.wang@canonical.com>
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+
+---
+ sound/pci/hda/patch_realtek.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -6476,6 +6476,11 @@ static const struct snd_hda_pin_quirk al
+ {0x14, 0x90170110},
+ {0x1b, 0x90a70130},
+ {0x21, 0x03211020}),
++ SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
++ {0x12, 0xb7a60130},
++ {0x13, 0xb8a61140},
++ {0x16, 0x90170110},
++ {0x21, 0x04211020}),
+ SND_HDA_PIN_QUIRK(0x10ec0280, 0x103c, "HP", ALC280_FIXUP_HP_GPIO4,
+ {0x12, 0x90a60130},
+ {0x14, 0x90170110},
diff --git a/patches.drivers/ALSA-hda-realtek-Fix-Dell-AIO-LineOut-issue b/patches.drivers/ALSA-hda-realtek-Fix-Dell-AIO-LineOut-issue
index b671b4d562..5d69b133eb 100644
--- a/patches.drivers/ALSA-hda-realtek-Fix-Dell-AIO-LineOut-issue
+++ b/patches.drivers/ALSA-hda-realtek-Fix-Dell-AIO-LineOut-issue
@@ -24,8 +24,8 @@ Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
- sound/pci/hda/patch_realtek.c | 38 ++++++++++++++++++++++++++++++++++++++
- 1 file changed, 38 insertions(+)
+ sound/pci/hda/patch_realtek.c | 35 ++++++++++++++++++++++++++++++++++-
+ 1 file changed, 34 insertions(+), 1 deletion(-)
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -83,15 +83,12 @@ Signed-off-by: Takashi Iwai <tiwai@suse.de>
};
static const struct snd_pci_quirk alc269_fixup_tbl[] = {
-@@ -6476,6 +6509,11 @@ static const struct snd_hda_pin_quirk al
+@@ -6476,7 +6509,7 @@ static const struct snd_hda_pin_quirk al
{0x14, 0x90170110},
{0x1b, 0x90a70130},
{0x21, 0x03211020}),
+- SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
+ SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB,
-+ {0x12, 0xb7a60130},
-+ {0x13, 0xb8a61140},
-+ {0x16, 0x90170110},
-+ {0x21, 0x04211020}),
- SND_HDA_PIN_QUIRK(0x10ec0280, 0x103c, "HP", ALC280_FIXUP_HP_GPIO4,
- {0x12, 0x90a60130},
- {0x14, 0x90170110},
+ {0x12, 0xb7a60130},
+ {0x13, 0xb8a61140},
+ {0x16, 0x90170110},
diff --git a/patches.drivers/ALSA-hda-realtek-Fix-pincfg-for-Dell-XPS-13-9370 b/patches.drivers/ALSA-hda-realtek-Fix-pincfg-for-Dell-XPS-13-9370
index 1a5ae305d1..1aced005dd 100644
--- a/patches.drivers/ALSA-hda-realtek-Fix-pincfg-for-Dell-XPS-13-9370
+++ b/patches.drivers/ALSA-hda-realtek-Fix-pincfg-for-Dell-XPS-13-9370
@@ -53,7 +53,7 @@ Signed-off-by: Takashi Iwai <tiwai@suse.de>
SND_HDA_PIN_QUIRK(0x10ec0299, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
ALC225_STANDARD_PINS,
{0x12, 0xb7a60130},
-- {0x13, 0xb8a60140},
+- {0x13, 0xb8a61140},
{0x17, 0x90170110}),
{}
};
diff --git a/patches.drivers/ALSA-hda-realtek-Fix-typo-of-pincfg-for-Dell-quirk b/patches.drivers/ALSA-hda-realtek-Fix-typo-of-pincfg-for-Dell-quirk
new file mode 100644
index 0000000000..c1e746c7b2
--- /dev/null
+++ b/patches.drivers/ALSA-hda-realtek-Fix-typo-of-pincfg-for-Dell-quirk
@@ -0,0 +1,29 @@
+From b4576de87243c32fab50dda9f8eba1e3cf13a7e2 Mon Sep 17 00:00:00 2001
+From: "Shih-Yuan Lee (FourDollars)" <sylee@canonical.com>
+Date: Mon, 3 Jul 2017 14:13:29 +0800
+Subject: [PATCH] ALSA: hda/realtek - Fix typo of pincfg for Dell quirk
+Git-commit: b4576de87243c32fab50dda9f8eba1e3cf13a7e2
+Patch-mainline: 4.13-rc1
+References: bsc#1031717
+
+The PIN number for Dell headset mode of ALC3271 is wrong.
+
+Fixes: fcc6c877a01f ("ALSA: hda/realtek - Support Dell headset mode for ALC3271")
+Signed-off-by: Shih-Yuan Lee (FourDollars) <sylee@canonical.com>
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+
+---
+ sound/pci/hda/patch_realtek.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -6453,7 +6453,7 @@ static const struct snd_hda_pin_quirk al
+ SND_HDA_PIN_QUIRK(0x10ec0299, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
+ ALC225_STANDARD_PINS,
+ {0x12, 0xb7a60130},
+- {0x13, 0xb8a60140},
++ {0x13, 0xb8a61140},
+ {0x17, 0x90170110}),
+ {}
+ };
diff --git a/patches.fixes/0001-sunrpc-add-hash_cred-function-to-rpc_authops-struct.patch b/patches.fixes/0001-sunrpc-add-hash_cred-function-to-rpc_authops-struct.patch
new file mode 100644
index 0000000000..10005df847
--- /dev/null
+++ b/patches.fixes/0001-sunrpc-add-hash_cred-function-to-rpc_authops-struct.patch
@@ -0,0 +1,30 @@
+From: Frank Sorenson <sorenson@redhat.com>
+Date: Thu, 29 Sep 2016 10:44:37 -0500
+Subject: [PATCH] sunrpc: add hash_cred() function to rpc_authops struct
+Git-commit: e856a231d5d5742fe7c63e3a2b266bef668af5b4
+Patch-mainline: v4.9
+References: bsc#1012917
+
+Currently, a single hash algorithm is used to hash the auth_cred for
+the credcache for all rpc_auth types. Add a hash_cred() function to
+the rpc_authops struct to allow a hash function specific to each
+auth flavor.
+
+Signed-off-by: Frank Sorenson <sorenson@redhat.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Acked-by: NeilBrown <neilb@suse.com>
+
+---
+ include/linux/sunrpc/auth.h | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/include/linux/sunrpc/auth.h
++++ b/include/linux/sunrpc/auth.h
+@@ -119,6 +119,7 @@ struct rpc_authops {
+ struct rpc_auth * (*create)(struct rpc_auth_create_args *, struct rpc_clnt *);
+ void (*destroy)(struct rpc_auth *);
+
++ int (*hash_cred)(struct auth_cred *, unsigned int);
+ struct rpc_cred * (*lookup_cred)(struct rpc_auth *, struct auth_cred *, int);
+ struct rpc_cred * (*crcreate)(struct rpc_auth*, struct auth_cred *, int);
+ int (*list_pseudoflavors)(rpc_authflavor_t *, int);
diff --git a/patches.fixes/0002-sunrpc-add-generic_auth-hash_cred-function.patch b/patches.fixes/0002-sunrpc-add-generic_auth-hash_cred-function.patch
new file mode 100644
index 0000000000..1c1f9ce575
--- /dev/null
+++ b/patches.fixes/0002-sunrpc-add-generic_auth-hash_cred-function.patch
@@ -0,0 +1,52 @@
+From: Frank Sorenson <sorenson@redhat.com>
+Date: Thu, 29 Sep 2016 10:44:38 -0500
+Subject: [PATCH] sunrpc: add generic_auth hash_cred() function
+Git-commit: 18028c967e423673a055a30b1bf4e603ec64026d
+Patch-mainline: v4.9
+References: bsc#1012917
+
+Add a hash_cred() function for generic_auth, using both the
+uid and gid from the auth_cred.
+
+Signed-off-by: Frank Sorenson <sorenson@redhat.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Acked-by: NeilBrown <neilb@suse.com>
+
+---
+ net/sunrpc/auth_generic.c | 11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+--- a/net/sunrpc/auth_generic.c
++++ b/net/sunrpc/auth_generic.c
+@@ -13,6 +13,7 @@
+ #include <linux/sunrpc/clnt.h>
+ #include <linux/sunrpc/debug.h>
+ #include <linux/sunrpc/sched.h>
++#include <linux/hash.h>
+
+ #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+ # define RPCDBG_FACILITY RPCDBG_AUTH
+@@ -71,6 +72,15 @@ static struct rpc_cred *generic_bind_cre
+ return auth->au_ops->lookup_cred(auth, acred, lookupflags);
+ }
+
++static int
++generic_hash_cred(struct auth_cred *acred, unsigned int hashbits)
++{
++ u64 uid = from_kuid(&init_user_ns, acred->uid);
++ u64 gid = from_kgid(&init_user_ns, acred->gid);
++ return hash_64(gid | (uid << (sizeof(gid_t) * 8)),
++ hashbits);
++}
++
+ /*
+ * Lookup generic creds for current process
+ */
+@@ -258,6 +268,7 @@ out_put:
+ static const struct rpc_authops generic_auth_ops = {
+ .owner = THIS_MODULE,
+ .au_name = "Generic",
++ .hash_cred = generic_hash_cred,
+ .lookup_cred = generic_lookup_cred,
+ .crcreate = generic_create_cred,
+ .key_timeout = generic_key_timeout,
diff --git a/patches.fixes/0003-sunrpc-add-auth_unix-hash_cred-function.patch b/patches.fixes/0003-sunrpc-add-auth_unix-hash_cred-function.patch
new file mode 100644
index 0000000000..4c173899e4
--- /dev/null
+++ b/patches.fixes/0003-sunrpc-add-auth_unix-hash_cred-function.patch
@@ -0,0 +1,52 @@
+From: Frank Sorenson <sorenson@redhat.com>
+Date: Thu, 29 Sep 2016 10:44:39 -0500
+Subject: [PATCH] sunrpc: add auth_unix hash_cred() function
+Git-commit: 1e035d065f3415809c056fb7537320a74c718537
+Patch-mainline: v4.9
+References: bsc#1012917
+
+Add a hash_cred() function for auth_unix, using both the
+uid and gid from the auth_cred.
+
+Signed-off-by: Frank Sorenson <sorenson@redhat.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Acked-by: NeilBrown <neilb@suse.com>
+
+---
+ net/sunrpc/auth_unix.c | 11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+--- a/net/sunrpc/auth_unix.c
++++ b/net/sunrpc/auth_unix.c
+@@ -13,6 +13,7 @@
+ #include <linux/sunrpc/clnt.h>
+ #include <linux/sunrpc/auth.h>
+ #include <linux/user_namespace.h>
++#include <linux/hash.h>
+
+ #define NFS_NGROUPS 16
+
+@@ -48,6 +49,15 @@ unx_destroy(struct rpc_auth *auth)
+ rpcauth_clear_credcache(auth->au_credcache);
+ }
+
++static int
++unx_hash_cred(struct auth_cred *acred, unsigned int hashbits)
++{
++ u64 uid = from_kuid(&init_user_ns, acred->uid);
++ u64 gid = from_kgid(&init_user_ns, acred->gid);
++ return hash_64(gid | (uid << (sizeof(gid_t) * 8)),
++ hashbits);
++}
++
+ /*
+ * Lookup AUTH_UNIX creds for current process
+ */
+@@ -222,6 +232,7 @@ const struct rpc_authops authunix_ops =
+ .au_name = "UNIX",
+ .create = unx_create,
+ .destroy = unx_destroy,
++ .hash_cred = unx_hash_cred,
+ .lookup_cred = unx_lookup_cred,
+ .crcreate = unx_create_cred,
+ };
diff --git a/patches.fixes/0004-sunrpc-add-RPCSEC_GSS-hash_cred-function.patch b/patches.fixes/0004-sunrpc-add-RPCSEC_GSS-hash_cred-function.patch
new file mode 100644
index 0000000000..a4bcdf030f
--- /dev/null
+++ b/patches.fixes/0004-sunrpc-add-RPCSEC_GSS-hash_cred-function.patch
@@ -0,0 +1,42 @@
+From: Frank Sorenson <sorenson@redhat.com>
+Date: Thu, 29 Sep 2016 10:44:40 -0500
+Subject: [PATCH] sunrpc: add RPCSEC_GSS hash_cred() function
+Git-commit: a960f8d6db431f5785ee28dbe903d61d821f368e
+Patch-mainline: v4.9
+References: bsc#1012917
+
+Add a hash_cred() function for RPCSEC_GSS, using only the
+uid from the auth_cred.
+
+Signed-off-by: Frank Sorenson <sorenson@redhat.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Acked-by: NeilBrown <neilb@suse.com>
+
+---
+ net/sunrpc/auth_gss/auth_gss.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/net/sunrpc/auth_gss/auth_gss.c
++++ b/net/sunrpc/auth_gss/auth_gss.c
+@@ -1300,6 +1300,13 @@ gss_destroy_cred(struct rpc_cred *cred)
+ gss_destroy_nullcred(cred);
+ }
+
++static int
++gss_hash_cred(struct auth_cred *acred, unsigned int hashbits)
++{
++ u64 uid = from_kuid(&init_user_ns, acred->uid);
++ return hash_64(uid, hashbits);
++}
++
+ /*
+ * Lookup RPCSEC_GSS cred for the current process
+ */
+@@ -1984,6 +1991,7 @@ static const struct rpc_authops authgss_
+ .au_name = "RPCSEC_GSS",
+ .create = gss_create,
+ .destroy = gss_destroy,
++ .hash_cred = gss_hash_cred,
+ .lookup_cred = gss_lookup_cred,
+ .crcreate = gss_create_cred,
+ .list_pseudoflavors = gss_mech_list_pseudoflavors,
diff --git a/patches.fixes/0005-sunrpc-replace-generic-auth_cred-hash-with-auth-spec.patch b/patches.fixes/0005-sunrpc-replace-generic-auth_cred-hash-with-auth-spec.patch
new file mode 100644
index 0000000000..2d34df16f1
--- /dev/null
+++ b/patches.fixes/0005-sunrpc-replace-generic-auth_cred-hash-with-auth-spec.patch
@@ -0,0 +1,30 @@
+From: Frank Sorenson <sorenson@redhat.com>
+Date: Thu, 29 Sep 2016 10:44:41 -0500
+Subject: [PATCH] sunrpc: replace generic auth_cred hash with auth-specific
+ function
+Git-commit: 66cbd4ba8ac4711e8bad97b5bded31ec298c7433
+Patch-mainline: v4.9
+References: bsc#1012917
+
+Replace the generic code to hash the auth_cred with the call to
+the auth-specific hash function in the rpc_authops struct.
+
+Signed-off-by: Frank Sorenson <sorenson@redhat.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Acked-by: NeilBrown <neilb@suse.com>
+
+---
+ net/sunrpc/auth.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/sunrpc/auth.c
++++ b/net/sunrpc/auth.c
+@@ -551,7 +551,7 @@ rpcauth_lookup_credcache(struct rpc_auth
+ *entry, *new;
+ unsigned int nr;
+
+- nr = hash_long(from_kuid(&init_user_ns, acred->uid), cache->hashbits);
++ nr = auth->au_ops->hash_cred(acred, cache->hashbits);
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(entry, &cache->hashtable[nr], cr_hash) {
diff --git a/patches.fixes/0006-sunrpc-include-sup-groups-in-hash.patch b/patches.fixes/0006-sunrpc-include-sup-groups-in-hash.patch
new file mode 100644
index 0000000000..51628cdde7
--- /dev/null
+++ b/patches.fixes/0006-sunrpc-include-sup-groups-in-hash.patch
@@ -0,0 +1,73 @@
+From: NeilBrown <neilb@suse.com>
+Subject: [PATCH] sunrpc: use supplimental groups in auth hash.
+Patch-mainline: not yet, under development
+References: bsc#1012917
+
+Some sites vary some supplimental groups a lot.
+To avoid unduely long hash chains, include all of these
+in the hash calculcation.
+
+Also use hash_32 as it provides better results on 3.0 kernels.
+
+Signed-off-by: NeilBrown <neilb@suse.com>
+Acked-by: NeilBrown <neilb@suse.com>
+
+---
+ net/sunrpc/auth_generic.c | 18 ++++++++++++++----
+ net/sunrpc/auth_unix.c | 18 ++++++++++++++----
+ 2 files changed, 28 insertions(+), 8 deletions(-)
+
+--- a/net/sunrpc/auth_generic.c
++++ b/net/sunrpc/auth_generic.c
+@@ -75,10 +75,20 @@ static struct rpc_cred *generic_bind_cre
+ static int
+ generic_hash_cred(struct auth_cred *acred, unsigned int hashbits)
+ {
+- u64 uid = from_kuid(&init_user_ns, acred->uid);
+- u64 gid = from_kgid(&init_user_ns, acred->gid);
+- return hash_64(gid | (uid << (sizeof(gid_t) * 8)),
+- hashbits);
++ u32 uid = from_kuid(&init_user_ns, acred->uid);
++ u32 gid;
++ int ret = hash_32(uid, 32);
++
++ if (acred->group_info) {
++ int g;
++
++ for (g = 0; g < acred->group_info->ngroups; g++) {
++ gid = from_kgid(&init_user_ns, GROUP_AT(acred->group_info, g));
++ ret = hash_32(ret ^ gid, 32);
++ }
++ }
++ gid = from_kgid(&init_user_ns, acred->gid);
++ return hash_32(ret ^ gid, hashbits);
+ }
+
+ /*
+--- a/net/sunrpc/auth_unix.c
++++ b/net/sunrpc/auth_unix.c
+@@ -52,10 +52,20 @@ unx_destroy(struct rpc_auth *auth)
+ static int
+ unx_hash_cred(struct auth_cred *acred, unsigned int hashbits)
+ {
+- u64 uid = from_kuid(&init_user_ns, acred->uid);
+- u64 gid = from_kgid(&init_user_ns, acred->gid);
+- return hash_64(gid | (uid << (sizeof(gid_t) * 8)),
+- hashbits);
++ u32 uid = from_kuid(&init_user_ns, acred->uid);
++ u32 gid;
++ int ret = hash_32(uid, 32);
++
++ if (acred->group_info) {
++ int g;
++
++ for (g = 0; g < acred->group_info->ngroups && g < NFS_NGROUPS; g++) {
++ gid = from_kgid(&init_user_ns, GROUP_AT(acred->group_info, g));
++ ret = hash_32(ret ^ gid, 32);
++ }
++ }
++ gid = from_kgid(&init_user_ns, acred->gid);
++ return hash_32(ret ^ gid, hashbits);
+ }
+
+ /*
diff --git a/patches.fixes/0007-nfs-limit-access-cache-size.patch b/patches.fixes/0007-nfs-limit-access-cache-size.patch
new file mode 100644
index 0000000000..cd5d5504d9
--- /dev/null
+++ b/patches.fixes/0007-nfs-limit-access-cache-size.patch
@@ -0,0 +1,60 @@
+From: NeilBrown <neilb@suse.com>
+Subject: [PATCH] nfs: improve shinking of access cache.
+Patch-mainline: not yet, under development
+References: bsc#1012917
+
+This patch contains 3 changes to help keep the per-inode
+access cache at a reasonable size.
+
+1/ The shinker shouldn't round the current total size
+ down to a multiple of 100. If it then discards
+ fewer than 100 entries it could report no change
+ which is confusing.
+
+2/ The shrinker should keep shrinking until it
+ has achieved the goal, or cannot. Currently it
+ discards are most one entry per file. If there
+ are few files, each with many entries, this isn't
+ very effective.
+
+3/ When adding an entry, remove the oldest entry if
+ has already expired. This keeps the size smaller
+ even when no shrinking happens.
+
+Acked-by: NeilBrown <neilb@suse.com>
+Signed-off-by: Neil Brown <neilb@suse.com>
+
+---
+ fs/nfs/dir.c | 21 +++++++++++++++++++++
+ 1 file changed, 21 insertions(+)
+
+--- a/fs/nfs/dir.c
++++ b/fs/nfs/dir.c
+@@ -2328,6 +2328,27 @@ void nfs_access_add_cache(struct inode *
+ struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL);
+ if (cache == NULL)
+ return;
++ /* If there is an old entry, remove it first to avoid cache getting
++ * too large
++ */
++ if (!list_empty(&NFS_I(inode)->access_cache_entry_lru)) {
++ struct nfs_access_entry *old;
++ spin_lock(&inode->i_lock);
++ old = list_first_entry_or_null(&NFS_I(inode)->access_cache_entry_lru,
++ struct nfs_access_entry, lru);
++ if (old &&
++ !nfs_have_delegated_attributes(inode) &&
++ !time_in_range_open(jiffies, old->jiffies,
++ old->jiffies + NFS_I(inode)->attrtimeo)) {
++ list_del_init(&old->lru);
++ rb_erase(&old->rb_node, &NFS_I(inode)->access_cache);
++ } else
++ old = NULL;
++ spin_unlock(&inode->i_lock);
++ if (old)
++ nfs_access_free_entry(old);
++ }
++
+ RB_CLEAR_NODE(&cache->rb_node);
+ cache->jiffies = set->jiffies;
+ cache->cred = get_rpccred(set->cred);
diff --git a/patches.kabi/0001-sunrpc-add-hash_cred-function-to-rpc_authops-struct.kabi b/patches.kabi/0001-sunrpc-add-hash_cred-function-to-rpc_authops-struct.kabi
new file mode 100644
index 0000000000..4d0529a103
--- /dev/null
+++ b/patches.kabi/0001-sunrpc-add-hash_cred-function-to-rpc_authops-struct.kabi
@@ -0,0 +1,34 @@
+From: NeilBrown <neilb@suse.com>
+Subject: kabi fix for new hash_cred function.
+References: bsc#1012917
+Patch-mainline: Never, kabi
+
+Hide this from kabi.
+
+Acked-by: NeilBrown <neilb@suse.com>
+Signed-off-by: Neil Brown <neilb@suse.com>
+
+---
+ include/linux/sunrpc/auth.h | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/include/linux/sunrpc/auth.h
++++ b/include/linux/sunrpc/auth.h
+@@ -119,7 +119,6 @@ struct rpc_authops {
+ struct rpc_auth * (*create)(struct rpc_auth_create_args *, struct rpc_clnt *);
+ void (*destroy)(struct rpc_auth *);
+
+- int (*hash_cred)(struct auth_cred *, unsigned int);
+ struct rpc_cred * (*lookup_cred)(struct rpc_auth *, struct auth_cred *, int);
+ struct rpc_cred * (*crcreate)(struct rpc_auth*, struct auth_cred *, int);
+ int (*list_pseudoflavors)(rpc_authflavor_t *, int);
+@@ -128,6 +127,9 @@ struct rpc_authops {
+ struct rpcsec_gss_info *);
+ int (*key_timeout)(struct rpc_auth *,
+ struct rpc_cred *);
++#ifndef __GENKSYMS__
++ int (*hash_cred)(struct auth_cred *, unsigned int);
++#endif
+ };
+
+ struct rpc_credops {
diff --git a/patches.kabi/kaiser-preserve-kabi.patch b/patches.kabi/kaiser-preserve-kabi.patch
new file mode 100644
index 0000000000..2d80c6dbac
--- /dev/null
+++ b/patches.kabi/kaiser-preserve-kabi.patch
@@ -0,0 +1,107 @@
+From: Jiri Kosina <jkosina@suse.cz>
+Subject: [PATCH] kaiser: work around kABI
+Patch-mainline: Never, SUSE-specific
+References: bsc#1068032
+
+The most potentially dangerous one is the vmstats one. I can't imagine what
+3rd party module would realistically be directly allocating pglist_data,
+per_cpu_nodestat, memcg_stat_item, lruvec_stat, etc, but the potential
+non-zero risk is there.
+
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -319,7 +319,11 @@
+
+ } ____cacheline_aligned;
+
++#ifndef __GENKSYMS__
+ DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss);
++#else
++DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
++#endif
+
+ #ifdef CONFIG_X86_32
+ DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -95,7 +95,11 @@
+
+ static const struct cpu_dev *this_cpu = &default_cpu;
+
++#ifndef __GENKSYMS__
+ DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct gdt_page, gdt_page) = { .gdt = {
++#else
++DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
++#endif
+ #ifdef CONFIG_X86_64
+ /*
+ * We need valid kernel segments for data and code in long mode too
+--- a/include/linux/mmu_context.h
++++ b/include/linux/mmu_context.h
+@@ -1,7 +1,9 @@
+ #ifndef _LINUX_MMU_CONTEXT_H
+ #define _LINUX_MMU_CONTEXT_H
+
++#ifndef __GENKSYMS__
+ #include <asm/mmu_context.h>
++#endif
+
+ struct mm_struct;
+
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -39,7 +39,11 @@
+ * section. Since TSS's are completely CPU-local, we want them
+ * on exact cacheline boundaries, to eliminate cacheline ping-pong.
+ */
++#if defined(CONFIG_GENKSYMS)
++__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
++#else
+ __visible DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss) = {
++#endif
+ .x86_tss = {
+ .sp0 = TOP_OF_INIT_STACK,
+ #ifdef CONFIG_X86_32
+--- a/arch/x86/include/asm/desc.h
++++ b/arch/x86/include/asm/desc.h
+@@ -43,7 +43,11 @@
+ struct desc_struct gdt[GDT_ENTRIES];
+ } __attribute__((aligned(PAGE_SIZE)));
+
++#ifdef __GENKSYMS__
++DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
++#else
+ DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct gdt_page, gdt_page);
++#endif
+
+ static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
+ {
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -6,7 +6,9 @@
+
+ #include <asm/processor.h>
+ #include <asm/special_insns.h>
++#ifndef __GENKSYMS__
+ #include <asm/smp.h>
++#endif
+
+ static inline void __invpcid(unsigned long pcid, unsigned long addr,
+ unsigned long type)
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -32,7 +32,11 @@
+ #include <linux/init.h>
+ #include <linux/uaccess.h>
+ #include <linux/highmem.h>
++#ifndef __GENKSYMS__
+ #include <linux/mmu_context.h>
++#else
++#include <asm/mmu_context.h>
++#endif
+ #include <linux/interrupt.h>
+ #include <linux/capability.h>
+ #include <linux/completion.h>
+
diff --git a/patches.suse/0001-locking-barriers-introduce-new-memory-barrier-gmb.patch b/patches.suse/0001-locking-barriers-introduce-new-memory-barrier-gmb.patch
new file mode 100644
index 0000000000..441a6039de
--- /dev/null
+++ b/patches.suse/0001-locking-barriers-introduce-new-memory-barrier-gmb.patch
@@ -0,0 +1,48 @@
+From: Elena Reshetova <elena.reshetova@intel.com>
+Date: Mon, 4 Sep 2017 13:11:43 +0300
+Subject: locking/barriers: introduce new memory barrier gmb()
+References: bsc#1068032 CVE-2017-5753
+Patch-mainline: submitted on 2018/1/9
+References: bnc#1068032
+
+In constrast to existing mb() and rmb() barriers,
+gmb() barrier is arch-independent and can be used to
+implement any type of memory barrier.
+In x86 case, it is either lfence or mfence, based on
+processor type. ARM and others can define it according
+to their needs.
+
+Suggested-by: Arjan van de Ven <arjan@linux.intel.com>
+Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ arch/x86/include/asm/barrier.h | 3 +++
+ include/asm-generic/barrier.h | 4 ++++
+ 2 files changed, 7 insertions(+)
+
+--- a/arch/x86/include/asm/barrier.h
++++ b/arch/x86/include/asm/barrier.h
+@@ -24,6 +24,9 @@
+ #define wmb() asm volatile("sfence" ::: "memory")
+ #endif
+
++#define gmb() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \
++ "lfence", X86_FEATURE_LFENCE_RDTSC);
++
+ #ifdef CONFIG_X86_PPRO_FENCE
+ #define dma_rmb() rmb()
+ #else
+--- a/include/asm-generic/barrier.h
++++ b/include/asm-generic/barrier.h
+@@ -42,6 +42,10 @@
+ #define wmb() mb()
+ #endif
+
++#ifndef gmb
++#define gmb() do { } while (0)
++#endif
++
+ #ifndef dma_rmb
+ #define dma_rmb() rmb()
+ #endif
diff --git a/patches.suse/0002-bpf-prevent-speculative-execution-in-eBPF-interprete.patch b/patches.suse/0002-bpf-prevent-speculative-execution-in-eBPF-interprete.patch
new file mode 100644
index 0000000000..0f4e8b7558
--- /dev/null
+++ b/patches.suse/0002-bpf-prevent-speculative-execution-in-eBPF-interprete.patch
@@ -0,0 +1,48 @@
+From: Elena Reshetova <elena.reshetova@intel.com>
+Date: Mon, 4 Sep 2017 13:11:44 +0300
+Subject: bpf: prevent speculative execution in eBPF interpreter
+References: bsc#1068032 CVE-2017-5753
+Patch-mainline: submitted on 2018/1/9
+References: bnc#1068032
+
+This adds a generic memory barrier before LD_IMM_DW and
+LDX_MEM_B/H/W/DW eBPF instructions during eBPF program
+execution in order to prevent speculative execution on out
+of bound BFP_MAP array indexes. This way an arbitary kernel
+memory is not exposed through side channel attacks.
+
+For more details, please see this Google Project Zero report: tbd
+
+Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ kernel/bpf/core.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/kernel/bpf/core.c
++++ b/kernel/bpf/core.c
+@@ -29,6 +29,7 @@
+ #include <linux/bpf.h>
+
+ #include <asm/unaligned.h>
++#include <asm/barrier.h>
+
+ /* Registers */
+ #define BPF_R0 regs[BPF_REG_0]
+@@ -356,6 +357,7 @@ select_insn:
+ DST = IMM;
+ CONT;
+ LD_IMM_DW:
++ gmb();
+ DST = (u64) (u32) insn[0].imm | ((u64) (u32) insn[1].imm) << 32;
+ insn++;
+ CONT;
+@@ -570,6 +572,7 @@ out:
+ *(SIZE *)(unsigned long) (DST + insn->off) = IMM; \
+ CONT; \
+ LDX_MEM_##SIZEOP: \
++ gmb(); \
+ DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
+ CONT;
+
diff --git a/patches.suse/0004-uvcvideo-prevent-speculative-execution.patch b/patches.suse/0004-uvcvideo-prevent-speculative-execution.patch
new file mode 100644
index 0000000000..f88bb061bd
--- /dev/null
+++ b/patches.suse/0004-uvcvideo-prevent-speculative-execution.patch
@@ -0,0 +1,26 @@
+From: Elena Reshetova <elena.reshetova@intel.com>
+Date: Mon, 4 Sep 2017 13:11:46 +0300
+Subject: uvcvideo: prevent speculative execution
+References: bsc#1068032 CVE-2017-5753
+Patch-mainline: submitted on 2018/1/9
+References: bnc#1068032
+
+real commit text tbd
+
+Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ drivers/media/usb/uvc/uvc_v4l2.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/media/usb/uvc/uvc_v4l2.c
++++ b/drivers/media/usb/uvc/uvc_v4l2.c
+@@ -811,6 +811,7 @@ static int uvc_ioctl_enum_input(struct f
+ }
+ pin = iterm->id;
+ } else if (index < selector->bNrInPins) {
++ gmb();
+ pin = selector->baSourceID[index];
+ list_for_each_entry(iterm, &chain->entities, chain) {
+ if (!UVC_ENTITY_IS_ITERM(iterm))
diff --git a/patches.suse/0005-carl9170-prevent-speculative-execution.patch b/patches.suse/0005-carl9170-prevent-speculative-execution.patch
new file mode 100644
index 0000000000..5504023733
--- /dev/null
+++ b/patches.suse/0005-carl9170-prevent-speculative-execution.patch
@@ -0,0 +1,26 @@
+From: Elena Reshetova <elena.reshetova@intel.com>
+Date: Mon, 4 Sep 2017 13:11:47 +0300
+Subject: carl9170: prevent speculative execution
+References: bsc#1068032 CVE-2017-5753
+Patch-mainline: submitted on 2018/1/9
+References: bnc#1068032
+
+Real commit text tbd
+
+Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ drivers/net/wireless/ath/carl9170/main.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/wireless/ath/carl9170/main.c
++++ b/drivers/net/wireless/ath/carl9170/main.c
+@@ -1388,6 +1388,7 @@ static int carl9170_op_conf_tx(struct ie
+
+ mutex_lock(&ar->mutex);
+ if (queue < ar->hw->queues) {
++ gmb();
+ memcpy(&ar->edcf[ar9170_qmap[queue]], param, sizeof(*param));
+ ret = carl9170_set_qos(ar);
+ } else {
diff --git a/patches.suse/0006-p54-prevent-speculative-execution.patch b/patches.suse/0006-p54-prevent-speculative-execution.patch
new file mode 100644
index 0000000000..6b2246e7cd
--- /dev/null
+++ b/patches.suse/0006-p54-prevent-speculative-execution.patch
@@ -0,0 +1,26 @@
+From: Elena Reshetova <elena.reshetova@intel.com>
+Date: Mon, 4 Sep 2017 13:11:48 +0300
+Subject: p54: prevent speculative execution
+References: bsc#1068032 CVE-2017-5753
+Patch-mainline: submitted on 2018/1/9
+References: bnc#1068032
+
+Real commit text tbd
+
+Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ drivers/net/wireless/p54/main.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/wireless/p54/main.c
++++ b/drivers/net/wireless/p54/main.c
+@@ -415,6 +415,7 @@ static int p54_conf_tx(struct ieee80211_
+
+ mutex_lock(&priv->conf_mutex);
+ if (queue < dev->queues) {
++ gmb();
+ P54_SET_QUEUE(priv->qos_params[queue], params->aifs,
+ params->cw_min, params->cw_max, params->txop);
+ ret = p54_set_edcf(priv);
diff --git a/patches.suse/0007-qla2xxx-prevent-speculative-execution.patch b/patches.suse/0007-qla2xxx-prevent-speculative-execution.patch
new file mode 100644
index 0000000000..13eec6d89b
--- /dev/null
+++ b/patches.suse/0007-qla2xxx-prevent-speculative-execution.patch
@@ -0,0 +1,48 @@
+From: Elena Reshetova <elena.reshetova@intel.com>
+Date: Mon, 4 Sep 2017 13:11:49 +0300
+Subject: qla2xxx: prevent speculative execution
+References: bsc#1068032 CVE-2017-5753
+Patch-mainline: submitted on 2018/1/9
+References: bnc#1068032
+
+Real commit text tbd
+
+Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ drivers/scsi/qla2xxx/qla_mr.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+--- a/drivers/scsi/qla2xxx/qla_mr.c
++++ b/drivers/scsi/qla2xxx/qla_mr.c
+@@ -2302,10 +2302,12 @@ qlafx00_status_entry(scsi_qla_host_t *vh
+ req = ha->req_q_map[que];
+
+ /* Validate handle. */
+- if (handle < req->num_outstanding_cmds)
++ if (handle < req->num_outstanding_cmds) {
++ gmb();
+ sp = req->outstanding_cmds[handle];
+- else
++ } else {
+ sp = NULL;
++ }
+
+ if (sp == NULL) {
+ ql_dbg(ql_dbg_io, vha, 0x3034,
+@@ -2653,10 +2655,12 @@ qlafx00_multistatus_entry(struct scsi_ql
+ req = ha->req_q_map[que];
+
+ /* Validate handle. */
+- if (handle < req->num_outstanding_cmds)
++ if (handle < req->num_outstanding_cmds) {
++ gmb();
+ sp = req->outstanding_cmds[handle];
+- else
++ } else {
+ sp = NULL;
++ }
+
+ if (sp == NULL) {
+ ql_dbg(ql_dbg_io, vha, 0x3044,
diff --git a/patches.suse/0008-cw1200-prevent-speculative-execution.patch b/patches.suse/0008-cw1200-prevent-speculative-execution.patch
new file mode 100644
index 0000000000..3ce0179fca
--- /dev/null
+++ b/patches.suse/0008-cw1200-prevent-speculative-execution.patch
@@ -0,0 +1,26 @@
+From: Elena Reshetova <elena.reshetova@intel.com>
+Date: Mon, 4 Sep 2017 13:11:50 +0300
+Subject: cw1200: prevent speculative execution
+References: bsc#1068032 CVE-2017-5753
+Patch-mainline: submitted on 2018/1/9
+References: bnc#1068032
+
+Real commit text tbd
+
+Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ drivers/net/wireless/cw1200/sta.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/wireless/cw1200/sta.c
++++ b/drivers/net/wireless/cw1200/sta.c
+@@ -619,6 +619,7 @@ int cw1200_conf_tx(struct ieee80211_hw *
+ mutex_lock(&priv->conf_mutex);
+
+ if (queue < dev->queues) {
++ gmb();
+ old_uapsd_flags = le16_to_cpu(priv->uapsd_info.uapsd_flags);
+
+ WSM_TX_QUEUE_SET(&priv->tx_queue_params, queue, 0, 0, 0);
diff --git a/patches.suse/0009-Thermal-int340x-prevent-speculative-execution.patch b/patches.suse/0009-Thermal-int340x-prevent-speculative-execution.patch
new file mode 100644
index 0000000000..3adaba2d8d
--- /dev/null
+++ b/patches.suse/0009-Thermal-int340x-prevent-speculative-execution.patch
@@ -0,0 +1,40 @@
+From: Elena Reshetova <elena.reshetova@intel.com>
+Date: Mon, 4 Sep 2017 13:11:51 +0300
+Subject: Thermal/int340x: prevent speculative execution
+References: bsc#1068032 CVE-2017-5753
+Patch-mainline: submitted on 2018/1/9
+References: bnc#1068032
+
+Real commit text tbd
+
+Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ drivers/thermal/int340x_thermal/int340x_thermal_zone.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/drivers/thermal/int340x_thermal/int340x_thermal_zone.c
++++ b/drivers/thermal/int340x_thermal/int340x_thermal_zone.c
+@@ -57,15 +57,16 @@ static int int340x_thermal_get_trip_temp
+ if (d->override_ops && d->override_ops->get_trip_temp)
+ return d->override_ops->get_trip_temp(zone, trip, temp);
+
+- if (trip < d->aux_trip_nr)
++ if (trip < d->aux_trip_nr) {
++ gmb();
+ *temp = d->aux_trips[trip];
+- else if (trip == d->crt_trip_id)
++ } else if (trip == d->crt_trip_id) {
+ *temp = d->crt_temp;
+- else if (trip == d->psv_trip_id)
++ } else if (trip == d->psv_trip_id) {
+ *temp = d->psv_temp;
+- else if (trip == d->hot_trip_id)
++ } else if (trip == d->hot_trip_id) {
+ *temp = d->hot_temp;
+- else {
++ } else {
+ for (i = 0; i < INT340X_THERMAL_MAX_ACT_TRIP_COUNT; i++) {
+ if (d->act_trips[i].valid &&
+ d->act_trips[i].id == trip) {
diff --git a/patches.suse/0010-userns-prevent-speculative-execution.patch b/patches.suse/0010-userns-prevent-speculative-execution.patch
new file mode 100644
index 0000000000..ea4477dcbb
--- /dev/null
+++ b/patches.suse/0010-userns-prevent-speculative-execution.patch
@@ -0,0 +1,30 @@
+From: Elena Reshetova <elena.reshetova@intel.com>
+Date: Mon, 4 Sep 2017 13:11:52 +0300
+Subject: userns: prevent speculative execution
+References: bsc#1068032 CVE-2017-5753
+Patch-mainline: submitted on 2018/1/9
+References: bnc#1068032
+
+Real commit text tbd
+
+Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ kernel/user_namespace.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/kernel/user_namespace.c
++++ b/kernel/user_namespace.c
+@@ -496,8 +496,10 @@ static void *m_start(struct seq_file *se
+ struct uid_gid_extent *extent = NULL;
+ loff_t pos = *ppos;
+
+- if (pos < map->nr_extents)
++ if (pos < map->nr_extents) {
++ gmb();
+ extent = &map->extent[pos];
++ }
+
+ return extent;
+ }
diff --git a/patches.suse/0011-ipv6-prevent-speculative-execution.patch b/patches.suse/0011-ipv6-prevent-speculative-execution.patch
new file mode 100644
index 0000000000..09c807fde2
--- /dev/null
+++ b/patches.suse/0011-ipv6-prevent-speculative-execution.patch
@@ -0,0 +1,26 @@
+From: Elena Reshetova <elena.reshetova@intel.com>
+Date: Mon, 4 Sep 2017 13:11:53 +0300
+Subject: ipv6: prevent speculative execution
+References: bsc#1068032 CVE-2017-5753
+Patch-mainline: submitted on 2018/1/9
+References: bnc#1068032
+
+Real commit text tbd
+
+Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ net/ipv6/raw.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/ipv6/raw.c
++++ b/net/ipv6/raw.c
+@@ -714,6 +714,7 @@ static int raw6_getfrag(void *from, char
+ if (offset < rfv->hlen) {
+ int copy = min(rfv->hlen - offset, len);
+
++ gmb();
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ memcpy(to, rfv->c + offset, copy);
+ else
diff --git a/patches.suse/0012-fs-prevent-speculative-execution.patch b/patches.suse/0012-fs-prevent-speculative-execution.patch
new file mode 100644
index 0000000000..bb73918b3b
--- /dev/null
+++ b/patches.suse/0012-fs-prevent-speculative-execution.patch
@@ -0,0 +1,30 @@
+From: Elena Reshetova <elena.reshetova@intel.com>
+Date: Mon, 4 Sep 2017 13:11:54 +0300
+Subject: fs: prevent speculative execution
+References: bsc#1068032 CVE-2017-5753
+Patch-mainline: submitted on 2018/1/9
+References: bnc#1068032
+
+Real commit text tbd
+
+Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ include/linux/fdtable.h | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/include/linux/fdtable.h
++++ b/include/linux/fdtable.h
+@@ -81,8 +81,10 @@ static inline struct file *__fcheck_file
+ {
+ struct fdtable *fdt = rcu_dereference_raw(files->fdt);
+
+- if (fd < fdt->max_fds)
++ if (fd < fdt->max_fds) {
++ gmb();
+ return rcu_dereference_raw(fdt->fd[fd]);
++ }
+ return NULL;
+ }
+
diff --git a/patches.suse/0013-net-mpls-prevent-speculative-execution.patch b/patches.suse/0013-net-mpls-prevent-speculative-execution.patch
new file mode 100644
index 0000000000..6428a4e8f6
--- /dev/null
+++ b/patches.suse/0013-net-mpls-prevent-speculative-execution.patch
@@ -0,0 +1,27 @@
+From: Elena Reshetova <elena.reshetova@intel.com>
+Date: Mon, 4 Sep 2017 13:11:55 +0300
+Subject: net: mpls: prevent speculative execution
+References: bsc#1068032 CVE-2017-5753
+Patch-mainline: submitted on 2018/1/9
+References: bnc#1068032
+
+Real commit text tbd
+
+Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ net/mpls/af_mpls.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/mpls/af_mpls.c
++++ b/net/mpls/af_mpls.c
+@@ -43,6 +43,8 @@ static struct mpls_route *mpls_route_inp
+ if (index < net->mpls.platform_labels) {
+ struct mpls_route __rcu **platform_label =
+ rcu_dereference(net->mpls.platform_label);
++
++ gmb();
+ rt = rcu_dereference(platform_label[index]);
+ }
+ return rt;
diff --git a/patches.suse/0014-udf-prevent-speculative-execution.patch b/patches.suse/0014-udf-prevent-speculative-execution.patch
new file mode 100644
index 0000000000..90c370bbf6
--- /dev/null
+++ b/patches.suse/0014-udf-prevent-speculative-execution.patch
@@ -0,0 +1,45 @@
+From: Elena Reshetova <elena.reshetova@intel.com>
+Date: Mon, 4 Sep 2017 13:11:56 +0300
+Subject: udf: prevent speculative execution
+References: bsc#1068032 CVE-2017-5753
+Patch-mainline: submitted on 2018/1/9
+References: bnc#1068032
+
+Real commit text tbd
+
+Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ fs/udf/misc.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/fs/udf/misc.c
++++ b/fs/udf/misc.c
+@@ -104,6 +104,8 @@ struct genericFormat *udf_add_extendedat
+ iinfo->i_lenEAttr) {
+ uint32_t aal =
+ le32_to_cpu(eahd->appAttrLocation);
++
++ gmb();
+ memmove(&ea[offset - aal + size],
+ &ea[aal], offset - aal);
+ offset -= aal;
+@@ -114,6 +116,8 @@ struct genericFormat *udf_add_extendedat
+ iinfo->i_lenEAttr) {
+ uint32_t ial =
+ le32_to_cpu(eahd->impAttrLocation);
++
++ gmb();
+ memmove(&ea[offset - ial + size],
+ &ea[ial], offset - ial);
+ offset -= ial;
+@@ -125,6 +129,8 @@ struct genericFormat *udf_add_extendedat
+ iinfo->i_lenEAttr) {
+ uint32_t aal =
+ le32_to_cpu(eahd->appAttrLocation);
++
++ gmb();
+ memmove(&ea[offset - aal + size],
+ &ea[aal], offset - aal);
+ offset -= aal;
diff --git a/patches.suse/01-x86-feature-enable-the-x86-feature-to-control-speculation.patch b/patches.suse/01-x86-feature-enable-the-x86-feature-to-control-speculation.patch
new file mode 100644
index 0000000000..1b9eacffe3
--- /dev/null
+++ b/patches.suse/01-x86-feature-enable-the-x86-feature-to-control-speculation.patch
@@ -0,0 +1,71 @@
+From: Tim Chen <tim.c.chen@linux.intel.com>
+Date: Thu, 24 Aug 2017 09:34:41 -0700
+Subject: x86/feature: Enable the x86 feature to control Speculation
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+References: bsc#1068032
+Patch-mainline: submitted on 2018/1/9
+
+cpuid ax=0x7, return rdx bit 26 to indicate presence of this feature
+IA32_SPEC_CTRL (0x48) and IA32_PRED_CMD (0x49)
+IA32_SPEC_CTRL, bit0 – Indirect Branch Restricted Speculation (IBRS)
+IA32_PRED_CMD, bit0 – Indirect Branch Prediction Barrier (IBPB)
+
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/include/asm/cpufeature.h | 1 +
+ arch/x86/include/asm/msr-index.h | 5 +++++
+ arch/x86/kernel/cpu/scattered.c | 1 +
+ 3 files changed, 7 insertions(+)
+
+diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
+index 41c4220d37f5..97701e2fd274 100644
+--- a/arch/x86/include/asm/cpufeature.h
++++ b/arch/x86/include/asm/cpufeature.h
+@@ -203,6 +203,7 @@
+ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
+ #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
+ #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
++#define X86_FEATURE_SPEC_CTRL ( 7*32+19) /* Control Speculation Control */
+
+ /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
+ #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_KAISER w/o nokaiser */
+diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
+index c7258ca9d4f4..d0dd37c81da5 100644
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -32,6 +32,9 @@
+ #define EFER_FFXSR (1<<_EFER_FFXSR)
+
+ /* Intel MSRs. Some also available on other CPUs */
++#define MSR_IA32_SPEC_CTRL 0x00000048
++#define MSR_IA32_PRED_CMD 0x00000049
++
+ #define MSR_IA32_PERFCTR0 0x000000c1
+ #define MSR_IA32_PERFCTR1 0x000000c2
+ #define MSR_FSB_FREQ 0x000000cd
+@@ -425,6 +428,8 @@
+ #define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1)
+ #define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2)
+ #define FEATURE_CONTROL_LMCE (1<<20)
++#define FEATURE_ENABLE_IBRS (1<<0)
++#define FEATURE_SET_IBPB (1<<0)
+
+ #define MSR_IA32_APICBASE 0x0000001b
+ #define MSR_IA32_APICBASE_BSP (1<<8)
+diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
+index dc847057c83c..ce84c60b227c 100644
+--- a/arch/x86/kernel/cpu/scattered.c
++++ b/arch/x86/kernel/cpu/scattered.c
+@@ -34,6 +34,7 @@ static const struct cpuid_bit cpuid_bits[] = {
+ { X86_FEATURE_INTEL_PT, CPUID_EBX,25, 0x00000007, 0 },
+ { X86_FEATURE_AVX512_4VNNIW, CPUID_EDX, 2, 0x00000007, 0 },
+ { X86_FEATURE_AVX512_4FMAPS, CPUID_EDX, 3, 0x00000007, 0 },
++ { X86_FEATURE_SPEC_CTRL, CPUID_EDX,26, 0x00000007, 0 },
+ { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 },
+ { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
+ { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX,11, 0x80000007, 0 },
+
+
diff --git a/patches.suse/02-x86-enter-add-macros-to-set-clear-ibrs-and-set-ibpb.patch b/patches.suse/02-x86-enter-add-macros-to-set-clear-ibrs-and-set-ibpb.patch
new file mode 100644
index 0000000000..c2569e1a12
--- /dev/null
+++ b/patches.suse/02-x86-enter-add-macros-to-set-clear-ibrs-and-set-ibpb.patch
@@ -0,0 +1,87 @@
+From: Tim Chen <tim.c.chen@linux.intel.com>
+Date: Fri, 15 Sep 2017 18:04:53 -0700
+Subject: x86/enter: Add macros to set/clear IBRS and set IBPB
+Patch-mainline: submitted on 2018/1/9
+References: bsc#1068032
+
+Add setup macros to control IBRS and IBPB.
+
+Boris:
+
+Change the alternatives to jump over the code so that backports
+to older versions are easier since ALTERNATIVEs padding came in
+in v4.1.
+
+Also, make them proper asm macros.
+
+Also, use XOR to zero out regs.
+
+Also, fold in __ENABLE_IBRS_CLOBBER into the other macros.
+
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/include/asm/spec_ctrl.h | 54 ++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 54 insertions(+)
+
+diff --git a/arch/x86/include/asm/spec_ctrl.h b/arch/x86/include/asm/spec_ctrl.h
+new file mode 100644
+index 000000000000..5e8c4124abed
+--- /dev/null
++++ b/arch/x86/include/asm/spec_ctrl.h
+@@ -0,0 +1,54 @@
++#ifndef _ASM_X86_SPEC_CTRL_H
++#define _ASM_X86_SPEC_CTRL_H
++
++#include <linux/stringify.h>
++#include <asm/msr-index.h>
++#include <asm/cpufeature.h>
++#include <asm/alternative-asm.h>
++
++#ifdef __ASSEMBLY__
++
++.macro __ENABLE_IBRS_CLOBBER
++ movl $MSR_IA32_SPEC_CTRL, %ecx
++ xorl %edx, %edx
++ movl $FEATURE_ENABLE_IBRS, %eax
++ wrmsr
++.endm
++
++.macro ENABLE_IBRS_CLOBBER
++ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_SPEC_CTRL
++ __ENABLE_IBRS_CLOBBER
++.Lend_\@:
++.endm
++
++
++.macro ENABLE_IBRS
++ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_SPEC_CTRL
++ pushq %rax
++ pushq %rcx
++ pushq %rdx
++ __ENABLE_IBRS_CLOBBER
++ popq %rdx
++ popq %rcx
++ popq %rax
++.Lend_\@:
++.endm
++
++
++.macro DISABLE_IBRS
++ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_SPEC_CTRL
++ pushq %rax
++ pushq %rcx
++ pushq %rdx
++ movl $MSR_IA32_SPEC_CTRL, %ecx
++ xorl %edx, %edx
++ xorl %eax, %eax
++ wrmsr
++ popq %rdx
++ popq %rcx
++ popq %rax
++.Lend_\@:
++.endm
++
++#endif /* __ASSEMBLY__ */
++#endif /* _ASM_X86_SPEC_CTRL_H */
+
diff --git a/patches.suse/03-x86-entry-use-ibrs-on-entry-to-kernel-space.patch b/patches.suse/03-x86-entry-use-ibrs-on-entry-to-kernel-space.patch
new file mode 100644
index 0000000000..0886eb3827
--- /dev/null
+++ b/patches.suse/03-x86-entry-use-ibrs-on-entry-to-kernel-space.patch
@@ -0,0 +1,184 @@
+From: Borislav Petkov <bp@suse.de>
+Date: Fri, 15 Dec 2017 19:56:13 +0100
+Subject: x86/entry: Use IBRS on entry to kernel space
+Patch-mainline: submitted on 2018/1/9
+References: bsc#1068032
+
+Toggle IBRS on entry to kernel space: enable *after* CR3 write and
+disable *before* CR3 write.
+
+Originally-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/entry/entry_64.S | 22 ++++++++++++++++++++++
+ arch/x86/entry/entry_64_compat.S | 9 +++++++++
+ 2 files changed, 31 insertions(+)
+
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index 038aa6a8b824..cc74fbce80f4 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -36,6 +36,7 @@
+ #include <asm/smap.h>
+ #include <asm/pgtable_types.h>
+ #include <asm/kaiser.h>
++#include <asm/spec_ctrl.h>
+ #include <linux/err.h>
+
+ /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
+@@ -173,6 +174,8 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
+ pushq %r11 /* pt_regs->r11 */
+ sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
+
++ ENABLE_IBRS
++
+ testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
+ jnz tracesys
+ entry_SYSCALL_64_fastpath:
+@@ -211,6 +214,9 @@ entry_SYSCALL_64_fastpath:
+
+ movq RIP(%rsp), %rcx
+ movq EFLAGS(%rsp), %r11
++
++ DISABLE_IBRS
++
+ RESTORE_C_REGS_EXCEPT_RCX_R11
+ /*
+ * This opens a window where we have a user CR3, but are
+@@ -355,6 +361,8 @@ GLOBAL(int_ret_from_sys_call)
+ * perf profiles. Nothing jumps here.
+ */
+ syscall_return_via_sysret:
++ DISABLE_IBRS
++
+ /* rcx and r11 are already restored (see code above) */
+ RESTORE_C_REGS_EXCEPT_RCX_R11
+ /*
+@@ -369,6 +377,8 @@ syscall_return_via_sysret:
+ USERGS_SYSRET64
+
+ opportunistic_sysret_failed:
++ DISABLE_IBRS
++
+ /*
+ * This opens a window where we have a user CR3, but are
+ * running in the kernel. This makes using the CS
+@@ -536,6 +546,7 @@ END(irq_entries_start)
+ */
+ SWAPGS
+ SWITCH_KERNEL_CR3
++ ENABLE_IBRS
+
+ /*
+ * We need to tell lockdep that IRQs are off. We can't do this until
+@@ -593,6 +604,7 @@ GLOBAL(retint_user)
+ mov %rsp,%rdi
+ call prepare_exit_to_usermode
+ TRACE_IRQS_IRETQ
++ DISABLE_IBRS
+ SWITCH_USER_CR3
+ SWAPGS
+ jmp restore_regs_and_iret
+@@ -1091,6 +1103,7 @@ ENTRY(paranoid_entry)
+ movq %rax, %cr3
+ 2:
+ #endif
++ ENABLE_IBRS
+ ret
+ END(paranoid_entry)
+
+@@ -1113,6 +1126,7 @@ ENTRY(paranoid_exit)
+ DISABLE_INTERRUPTS(CLBR_NONE)
+ TRACE_IRQS_OFF_DEBUG
+ TRACE_IRQS_IRETQ_DEBUG
++ DISABLE_IBRS
+ #ifdef CONFIG_KAISER
+ /* No ALTERNATIVE for X86_FEATURE_KAISER: paranoid_entry sets %ebx */
+ testl $2, %ebx /* SWITCH_USER_CR3 needed? */
+@@ -1156,6 +1170,8 @@ ENTRY(error_entry)
+ */
+ SWAPGS
+
++ ENABLE_IBRS
++
+ .Lerror_entry_from_usermode_after_swapgs:
+ /*
+ * We need to tell lockdep that IRQs are off. We can't do this until
+@@ -1200,6 +1216,7 @@ ENTRY(error_entry)
+ /* fall through */
+
+ .Lerror_bad_iret:
++ ENABLE_IBRS_CLOBBER
+ /*
+ * We came from an IRET to user mode, so we have user gsbase.
+ * Switch to kernel gsbase:
+@@ -1354,8 +1371,13 @@ ENTRY(nmi)
+ movq %rax, %cr3
+ 2:
+ #endif
++
++ ENABLE_IBRS
++
+ call do_nmi
+
++ DISABLE_IBRS
++
+ #ifdef CONFIG_KAISER
+ /*
+ * Unconditionally restore CR3. I know we return to
+diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
+index 7519c638b0f0..a907572d7591 100644
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -15,6 +15,7 @@
+ #include <asm/smap.h>
+ #include <asm/pgtable_types.h>
+ #include <asm/kaiser.h>
++#include <asm/spec_ctrl.h>
+ #include <linux/linkage.h>
+ #include <linux/err.h>
+
+@@ -99,6 +100,8 @@ ENTRY(entry_SYSENTER_compat)
+ pushq %r8 /* pt_regs->r15 = 0 */
+ cld
+
++ ENABLE_IBRS
++
+ /*
+ * Sysenter doesn't filter flags, so we need to clear NT
+ * ourselves. To save a few cycles, we can check whether
+@@ -197,6 +200,8 @@ ENTRY(entry_SYSCALL_compat)
+ pushq %r8 /* pt_regs->r14 = 0 */
+ pushq %r8 /* pt_regs->r15 = 0 */
+
++ ENABLE_IBRS
++
+ /*
+ * User mode is traced as though IRQs are on, and SYSENTER
+ * turned them off.
+@@ -212,6 +217,7 @@ ENTRY(entry_SYSCALL_compat)
+ /* Opportunistic SYSRET */
+ sysret32_from_system_call:
+ TRACE_IRQS_ON /* User mode traces as IRQs on. */
++ DISABLE_IBRS
+ SWITCH_USER_CR3
+ movq RBX(%rsp), %rbx /* pt_regs->rbx */
+ movq RBP(%rsp), %rbp /* pt_regs->rbp */
+@@ -305,6 +311,8 @@ ENTRY(entry_INT80_compat)
+ pushq %r15 /* pt_regs->r15 */
+ cld
+
++ ENABLE_IBRS
++
+ /*
+ * User mode is traced as though IRQs are on, and the interrupt
+ * gate turned them off.
+@@ -317,6 +325,7 @@ ENTRY(entry_INT80_compat)
+
+ /* Go back to user mode. */
+ TRACE_IRQS_ON
++ DISABLE_IBRS
+ SWITCH_USER_CR3
+ SWAPGS
+ jmp restore_regs_and_iret
+
diff --git a/patches.suse/04-x86-msr-move-native_-msr-u64-to-msr-h.patch b/patches.suse/04-x86-msr-move-native_-msr-u64-to-msr-h.patch
new file mode 100644
index 0000000000..ee8dc83bc4
--- /dev/null
+++ b/patches.suse/04-x86-msr-move-native_-msr-u64-to-msr-h.patch
@@ -0,0 +1,69 @@
+From: Borislav Petkov <bp@suse.de>
+Date: Sat, 16 Dec 2017 12:57:44 +0100
+Subject: x86/MSR: Move native_*msr(.. u64) to msr.h
+Patch-mainline: submitted on 2018/1/9
+References: bsc#1068032
+
+Move them to the msr.h header for a wider use.
+
+No functionality change.
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/include/asm/microcode.h | 15 +--------------
+ arch/x86/include/asm/msr.h | 15 +++++++++++++++
+ 2 files changed, 16 insertions(+), 14 deletions(-)
+
+diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
+index 712b24ed3a64..b811c9efdc98 100644
+--- a/arch/x86/include/asm/microcode.h
++++ b/arch/x86/include/asm/microcode.h
+@@ -4,20 +4,7 @@
+ #include <linux/earlycpio.h>
+ #include <linux/initrd.h>
+
+-#define native_rdmsr(msr, val1, val2) \
+-do { \
+- u64 __val = native_read_msr((msr)); \
+- (void)((val1) = (u32)__val); \
+- (void)((val2) = (u32)(__val >> 32)); \
+-} while (0)
+-
+-#define native_wrmsr(msr, low, high) \
+- native_write_msr(msr, low, high)
+-
+-#define native_wrmsrl(msr, val) \
+- native_write_msr((msr), \
+- (u32)((u64)(val)), \
+- (u32)((u64)(val) >> 32))
++#include <asm/msr.h>
+
+ struct cpu_signature {
+ unsigned int sig;
+diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
+index 77d8b284e4a7..cc937a3f3577 100644
+--- a/arch/x86/include/asm/msr.h
++++ b/arch/x86/include/asm/msr.h
+@@ -163,6 +163,21 @@ static inline unsigned long long native_read_pmc(int counter)
+ return EAX_EDX_VAL(val, low, high);
+ }
+
++#define native_rdmsr(msr, val1, val2) \
++do { \
++ u64 __val = native_read_msr((msr)); \
++ (void)((val1) = (u32)__val); \
++ (void)((val2) = (u32)(__val >> 32)); \
++} while (0)
++
++#define native_wrmsr(msr, low, high) \
++ native_write_msr(msr, low, high)
++
++#define native_wrmsrl(msr, val) \
++ native_write_msr((msr), \
++ (u32)((u64)(val)), \
++ (u32)((u64)(val) >> 32))
++
+ #ifdef CONFIG_PARAVIRT
+ #include <asm/paravirt.h>
+ #else
+
diff --git a/patches.suse/05-x86-spec-add-ibrs-control-functions.patch b/patches.suse/05-x86-spec-add-ibrs-control-functions.patch
new file mode 100644
index 0000000000..a4edf54a4a
--- /dev/null
+++ b/patches.suse/05-x86-spec-add-ibrs-control-functions.patch
@@ -0,0 +1,71 @@
+From: Borislav Petkov <bp@suse.de>
+Date: Sat, 16 Dec 2017 17:50:52 +0100
+Subject: x86/spec: Add IBRS control functions
+Patch-mainline: submitted on 2018/1/9
+References: bsc#1068032
+
+... into a separate compilation unit.
+
+Carved out from a patch by Tim Chen <tim.c.chen@linux.intel.com>.
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/include/asm/spec_ctrl.h | 3 +++
+ arch/x86/kernel/cpu/Makefile | 1 +
+ arch/x86/kernel/cpu/spec_ctrl.c | 22 ++++++++++++++++++++++
+ 3 files changed, 26 insertions(+)
+
+diff --git a/arch/x86/include/asm/spec_ctrl.h b/arch/x86/include/asm/spec_ctrl.h
+index 5e8c4124abed..cae607a2fb6c 100644
+--- a/arch/x86/include/asm/spec_ctrl.h
++++ b/arch/x86/include/asm/spec_ctrl.h
+@@ -50,5 +50,8 @@
+ .Lend_\@:
+ .endm
+
++#else /* __ASSEMBLY__ */
++void x86_enable_ibrs(void);
++void x86_disable_ibrs(void);
+ #endif /* __ASSEMBLY__ */
+ #endif /* _ASM_X86_SPEC_CTRL_H */
+diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
+index 58031303e304..c8205ff2a0f3 100644
+--- a/arch/x86/kernel/cpu/Makefile
++++ b/arch/x86/kernel/cpu/Makefile
+@@ -16,6 +16,7 @@ obj-y := intel_cacheinfo.o scattered.o topology.o
+ obj-y += common.o
+ obj-y += rdrand.o
+ obj-y += match.o
++obj-y += spec_ctrl.o
+
+ obj-$(CONFIG_PROC_FS) += proc.o
+ obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
+diff --git a/arch/x86/kernel/cpu/spec_ctrl.c b/arch/x86/kernel/cpu/spec_ctrl.c
+new file mode 100644
+index 000000000000..10c2c6fff3e5
+--- /dev/null
++++ b/arch/x86/kernel/cpu/spec_ctrl.c
+@@ -0,0 +1,22 @@
++/*
++ * Speculation control stuff
++ *
++ */
++
++#include <asm/msr.h>
++#include <asm/processor.h>
++#include <asm/spec_ctrl.h>
++
++void x86_disable_ibrs(void)
++{
++ if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
++ native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
++}
++EXPORT_SYMBOL_GPL(x86_disable_ibrs);
++
++void x86_enable_ibrs(void)
++{
++ if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
++ native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
++}
++EXPORT_SYMBOL_GPL(x86_enable_ibrs);
+
diff --git a/patches.suse/06-x86-idle-toggle-ibrs-when-going-idle.patch b/patches.suse/06-x86-idle-toggle-ibrs-when-going-idle.patch
new file mode 100644
index 0000000000..cc2e4cee2e
--- /dev/null
+++ b/patches.suse/06-x86-idle-toggle-ibrs-when-going-idle.patch
@@ -0,0 +1,98 @@
+From: Borislav Petkov <bp@suse.de>
+Date: Sat, 16 Dec 2017 17:59:42 +0100
+Subject: x86/idle: Toggle IBRS when going idle
+Patch-mainline: submitted on 2018/1/9
+References: bsc#1068032
+
+Disable when entering idle and reenable it back on exit.
+
+Orginally-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/include/asm/mwait.h | 6 ++++++
+ arch/x86/kernel/process.c | 5 +++++
+ arch/x86/lib/delay.c | 8 ++++++++
+ 3 files changed, 19 insertions(+)
+
+diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
+index c70689b5e5aa..d1def1422510 100644
+--- a/arch/x86/include/asm/mwait.h
++++ b/arch/x86/include/asm/mwait.h
+@@ -3,6 +3,8 @@
+
+ #include <linux/sched.h>
+
++#include <asm/spec_ctrl.h>
++
+ #define MWAIT_SUBSTATE_MASK 0xf
+ #define MWAIT_CSTATE_MASK 0xf
+ #define MWAIT_SUBSTATE_SIZE 4
+@@ -102,9 +104,13 @@ static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
+ mb();
+ }
+
++ x86_disable_ibrs();
++
+ __monitor((void *)&current_thread_info()->flags, 0, 0);
+ if (!need_resched())
+ __mwait(eax, ecx);
++
++ x86_enable_ibrs();
+ }
+ current_clr_polling();
+ }
+diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
+index a662c21039d9..064a8d5a9bc8 100644
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -422,11 +422,16 @@ static void mwait_idle(void)
+ smp_mb(); /* quirk */
+ }
+
++ x86_disable_ibrs();
++
+ __monitor((void *)&current_thread_info()->flags, 0, 0);
+ if (!need_resched())
+ __sti_mwait(0, 0);
+ else
+ local_irq_enable();
++
++ x86_enable_ibrs();
++
+ trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
+ } else {
+ local_irq_enable();
+diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
+index 45772560aceb..ee269309efcc 100644
+--- a/arch/x86/lib/delay.c
++++ b/arch/x86/lib/delay.c
+@@ -26,6 +26,8 @@
+ # include <asm/smp.h>
+ #endif
+
++#define IBRS_DISABLE_THRESHOLD 1000
++
+ /* simple loop based delay: */
+ static void delay_loop(unsigned long loops)
+ {
+@@ -105,6 +107,9 @@ static void delay_mwaitx(unsigned long __loops)
+ for (;;) {
+ delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
+
++ if (delay > IBRS_DISABLE_THRESHOLD)
++ x86_disable_ibrs();
++
+ /*
+ * Use cpu_tss as a cacheline-aligned, seldomly
+ * accessed per-cpu variable as the monitor target.
+@@ -118,6 +123,9 @@ static void delay_mwaitx(unsigned long __loops)
+ */
+ __mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);
+
++ if (delay > IBRS_DISABLE_THRESHOLD)
++ x86_enable_ibrs();
++
+ end = rdtsc_ordered();
+
+ if (loops <= end - start)
+
diff --git a/patches.suse/07-x86-idle-disable-ibrs-when-offlining-a-cpu-and-re-enable-on-wakeup.patch b/patches.suse/07-x86-idle-disable-ibrs-when-offlining-a-cpu-and-re-enable-on-wakeup.patch
new file mode 100644
index 0000000000..f660899149
--- /dev/null
+++ b/patches.suse/07-x86-idle-disable-ibrs-when-offlining-a-cpu-and-re-enable-on-wakeup.patch
@@ -0,0 +1,42 @@
+From: Tim Chen <tim.c.chen@linux.intel.com>
+Date: Wed, 15 Nov 2017 12:24:19 -0800
+Subject: x86/idle: Disable IBRS when offlining a CPU and re-enable on wakeup
+Patch-mainline: submitted on 2018/1/9
+References: bsc#1068032
+
+Clear IBRS when cpu is offlined and set it when brining it back online.
+
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+[ Switch to accessors. ]
+Signed-off-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/kernel/smpboot.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
+index 0ff1d9aaa2ac..2100ee34888c 100644
+--- a/arch/x86/kernel/smpboot.c
++++ b/arch/x86/kernel/smpboot.c
+@@ -75,6 +75,7 @@
+ #include <asm/i8259.h>
+ #include <asm/realmode.h>
+ #include <asm/misc.h>
++#include <asm/spec_ctrl.h>
+
+ /* Number of siblings per CPU package */
+ int smp_num_siblings = 1;
+@@ -1544,9 +1545,13 @@ void native_play_dead(void)
+ play_dead_common();
+ tboot_shutdown(TB_SHUTDOWN_WFS);
+
++ x86_disable_ibrs();
++
+ mwait_play_dead(); /* Only returns on failure */
+ if (cpuidle_play_dead())
+ hlt_play_dead();
++
++ x86_enable_ibrs();
+ }
+
+ #else /* ... !CONFIG_HOTPLUG_CPU */
+
diff --git a/patches.suse/08-x86-spec_ctrl-add-an-indirect-branch-predictor-barrier.patch b/patches.suse/08-x86-spec_ctrl-add-an-indirect-branch-predictor-barrier.patch
new file mode 100644
index 0000000000..6d0c991157
--- /dev/null
+++ b/patches.suse/08-x86-spec_ctrl-add-an-indirect-branch-predictor-barrier.patch
@@ -0,0 +1,33 @@
+From: Borislav Petkov <bp@suse.de>
+Date: Sat, 16 Dec 2017 18:18:34 +0100
+Subject: x86/spec_ctrl: Add an Indirect Branch Predictor barrier
+Patch-mainline: submitted on 2018/1/9
+References: bsc#1068032
+
+... to call when code is context-switching to a separate address space
+and needs to prevent earlier code from having influence on later branch
+prediction.
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/include/asm/spec_ctrl.h | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/arch/x86/include/asm/spec_ctrl.h b/arch/x86/include/asm/spec_ctrl.h
+index cae607a2fb6c..a7355c87d34b 100644
+--- a/arch/x86/include/asm/spec_ctrl.h
++++ b/arch/x86/include/asm/spec_ctrl.h
+@@ -53,5 +53,12 @@
+ #else /* __ASSEMBLY__ */
+ void x86_enable_ibrs(void);
+ void x86_disable_ibrs(void);
++
++static inline void x86_ibp_barrier(void)
++{
++ if (static_cpu_has(X86_FEATURE_SPEC_CTRL))
++ native_wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB);
++}
++
+ #endif /* __ASSEMBLY__ */
+ #endif /* _ASM_X86_SPEC_CTRL_H */
+
diff --git a/patches.suse/09-x86-mm-set-ibpb-upon-context-switch.patch b/patches.suse/09-x86-mm-set-ibpb-upon-context-switch.patch
new file mode 100644
index 0000000000..4e0c2a425d
--- /dev/null
+++ b/patches.suse/09-x86-mm-set-ibpb-upon-context-switch.patch
@@ -0,0 +1,37 @@
+From: Tim Chen <tim.c.chen@linux.intel.com>
+Date: Sat, 16 Dec 2017 18:25:12 +0100
+Subject: x86/mm: Set IBPB upon context switch
+Patch-mainline: submitted on 2018/1/9
+References: bsc#1068032
+
+Set IBPB on context switch when writing CR3.
+
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+[ Convert to do x86_ibp_barrier(). ]
+Signed-off-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/mm/tlb.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
+index 0ebde77a29ab..b2dccc3b2cd7 100644
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -14,6 +14,7 @@
+ #include <asm/apic.h>
+ #include <asm/uv/uv.h>
+ #include <asm/kaiser.h>
++#include <asm/spec_ctrl.h>
+
+ /*
+ * TLB flushing, formerly SMP-only
+@@ -104,6 +105,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+ unsigned cpu = smp_processor_id();
+
+ if (likely(prev != next)) {
++ x86_ibp_barrier();
++
+ this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+ this_cpu_write(cpu_tlbstate.active_mm, next);
+ cpumask_set_cpu(cpu, mm_cpumask(next));
+
diff --git a/patches.suse/10-ptrace-add-a-new-thread-access-check.patch b/patches.suse/10-ptrace-add-a-new-thread-access-check.patch
new file mode 100644
index 0000000000..67f227b9d2
--- /dev/null
+++ b/patches.suse/10-ptrace-add-a-new-thread-access-check.patch
@@ -0,0 +1,91 @@
+From: Borislav Petkov <bp@suse.de>
+Date: Sat, 16 Dec 2017 18:32:52 +0100
+Subject: ptrace: Add a new thread access check
+Patch-mainline: submitted on 2018/1/9
+References: bsc#1068032
+
+... which checks current and a target task. Add IBPB ptrace mode too.
+
+Carved out from a patch by Tim Chen <tim.c.chen@linux.intel.com>
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+---
+ include/linux/ptrace.h | 6 ++++++
+ kernel/ptrace.c | 18 ++++++++++++++----
+ 2 files changed, 20 insertions(+), 4 deletions(-)
+
+diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
+index 81fdf4b8aba4..c84b8817755b 100644
+--- a/include/linux/ptrace.h
++++ b/include/linux/ptrace.h
+@@ -59,12 +59,15 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead);
+ #define PTRACE_MODE_NOAUDIT 0x04
+ #define PTRACE_MODE_FSCREDS 0x08
+ #define PTRACE_MODE_REALCREDS 0x10
++#define PTRACE_MODE_NOACCESS_CHK 0x20
+
+ /* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */
+ #define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS)
+ #define PTRACE_MODE_READ_REALCREDS (PTRACE_MODE_READ | PTRACE_MODE_REALCREDS)
+ #define PTRACE_MODE_ATTACH_FSCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_FSCREDS)
+ #define PTRACE_MODE_ATTACH_REALCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_REALCREDS)
++#define PTRACE_MODE_IBPB (PTRACE_MODE_ATTACH | PTRACE_MODE_NOAUDIT \
++ | PTRACE_MODE_NOACCESS_CHK | PTRACE_MODE_REALCREDS)
+
+ /**
+ * ptrace_may_access - check whether the caller is permitted to access
+@@ -82,6 +85,9 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead);
+ */
+ extern bool ptrace_may_access(struct task_struct *task, unsigned int mode);
+
++extern int ___ptrace_may_access(struct task_struct *cur, struct task_struct *task,
++ unsigned int mode);
++
+ static inline int ptrace_reparented(struct task_struct *child)
+ {
+ return !same_thread_group(child->real_parent, child->parent);
+diff --git a/kernel/ptrace.c b/kernel/ptrace.c
+index 5e2cd1030702..14cc49a52881 100644
+--- a/kernel/ptrace.c
++++ b/kernel/ptrace.c
+@@ -235,9 +235,10 @@ static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode)
+ }
+
+ /* Returns 0 on success, -errno on denial. */
+-static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
++int ___ptrace_may_access(struct task_struct *cur, struct task_struct *task,
++ unsigned int mode)
+ {
+- const struct cred *cred = current_cred(), *tcred;
++ const struct cred *cred = __task_cred(cur), *tcred;
+ struct mm_struct *mm;
+ kuid_t caller_uid;
+ kgid_t caller_gid;
+@@ -257,7 +258,7 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
+ */
+
+ /* Don't let security modules deny introspection */
+- if (same_thread_group(task, current))
++ if (same_thread_group(task, cur))
+ return 0;
+ rcu_read_lock();
+ if (mode & PTRACE_MODE_FSCREDS) {
+@@ -295,7 +296,16 @@ ok:
+ !ptrace_has_cap(mm->user_ns, mode)))
+ return -EPERM;
+
+- return security_ptrace_access_check(task, mode);
++ if (!(mode & PTRACE_MODE_NOACCESS_CHK))
++ return security_ptrace_access_check(task, mode);
++
++ return 0;
++}
++EXPORT_SYMBOL_GPL(___ptrace_may_access);
++
++static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
++{
++ return ___ptrace_may_access(current, task, mode);
+ }
+
+ bool ptrace_may_access(struct task_struct *task, unsigned int mode)
+
diff --git a/patches.suse/11-x86-mm-only-set-ibpb-when-the-new-thread-cannot-ptrace-current-thread.patch b/patches.suse/11-x86-mm-only-set-ibpb-when-the-new-thread-cannot-ptrace-current-thread.patch
new file mode 100644
index 0000000000..10014f08c0
--- /dev/null
+++ b/patches.suse/11-x86-mm-only-set-ibpb-when-the-new-thread-cannot-ptrace-current-thread.patch
@@ -0,0 +1,41 @@
+From: Tim Chen <tim.c.chen@linux.intel.com>
+Date: Sat, 16 Dec 2017 18:37:19 +0100
+Subject: x86/mm: Only set IBPB when the new thread cannot ptrace current thread
+Patch-mainline: submitted on 2018/1/9
+References: bsc#1068032
+
+To reduce overhead of setting IBPB, we only do that when the new thread
+cannot ptrace the current one. If the new thread has ptrace capability
+on current thread, it is safe.
+
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/mm/tlb.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
+index b2dccc3b2cd7..a5858460a730 100644
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -7,6 +7,7 @@
+ #include <linux/module.h>
+ #include <linux/cpu.h>
+ #include <linux/debugfs.h>
++#include <linux/ptrace.h>
+
+ #include <asm/tlbflush.h>
+ #include <asm/mmu_context.h>
+@@ -105,7 +106,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+ unsigned cpu = smp_processor_id();
+
+ if (likely(prev != next)) {
+- x86_ibp_barrier();
++
++ /* Null tsk means switching to kernel, so that's safe */
++ if (tsk && ___ptrace_may_access(tsk, current, PTRACE_MODE_IBPB))
++ x86_ibp_barrier();
+
+ this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+ this_cpu_write(cpu_tlbstate.active_mm, next);
+
diff --git a/patches.suse/12-x86-entry-add-a-function-to-overwrite-the-rsb.patch b/patches.suse/12-x86-entry-add-a-function-to-overwrite-the-rsb.patch
new file mode 100644
index 0000000000..2ba29bc5ef
--- /dev/null
+++ b/patches.suse/12-x86-entry-add-a-function-to-overwrite-the-rsb.patch
@@ -0,0 +1,110 @@
+From: Borislav Petkov <bp@suse.de>
+Date: Sat, 16 Dec 2017 18:45:35 +0100
+Subject: x86/entry: Add a function to overwrite the RSB
+Patch-mainline: submitted on 2018/1/9
+References: bsc#1068032
+
+Fill up the 32-entry Return Stack Buffer.
+
+Carved out from a patch by Tim Chen <tim.c.chen@linux.intel.com>
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/entry/entry_64.S | 70 ++++++++++++++++++++++++++++++++++++++++
+ arch/x86/include/asm/spec_ctrl.h | 4 +++
+ 2 files changed, 74 insertions(+)
+
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index cc74fbce80f4..bc27d350f9c3 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1668,3 +1668,73 @@ ENTRY(ignore_sysret)
+ mov $-ENOSYS, %eax
+ sysret
+ END(ignore_sysret)
++
++ENTRY(stuff_rsb)
++ call 1f
++ pause
++1: call 2f
++ pause
++2: call 3f;
++ pause
++3: call 4f
++ pause
++4: call 5f
++ pause
++5: call 6f
++ pause
++6: call 7f
++ pause
++7: call 8f
++ pause
++8: call 9f
++ pause
++9: call 10f
++ pause
++10: call 11f
++ pause
++11: call 12f
++ pause
++12: call 13f
++ pause
++13: call 14f
++ pause
++14: call 15f
++ pause
++15: call 16f
++ pause
++16: call 17f
++ pause
++17: call 18f
++ pause
++18: call 19f
++ pause
++19: call 20f
++ pause
++20: call 21f
++ pause
++21: call 22f
++ pause
++22: call 23f
++ pause
++23: call 24f
++ pause
++24: call 25f
++ pause
++25: call 26f
++ pause
++26: call 27f
++ pause
++27: call 28f
++ pause
++28: call 29f
++ pause
++29: call 30f
++ pause
++30: call 31f
++ pause
++31: call 32f
++ pause
++32:
++ add $(32*8), %rsp
++ ret
++END(stuff_rsb)
+diff --git a/arch/x86/include/asm/spec_ctrl.h b/arch/x86/include/asm/spec_ctrl.h
+index a7355c87d34b..e584b91ebdd2 100644
+--- a/arch/x86/include/asm/spec_ctrl.h
++++ b/arch/x86/include/asm/spec_ctrl.h
+@@ -50,6 +50,10 @@
+ .Lend_\@:
+ .endm
+
++.macro STUFF_RSB
++ ALTERNATIVE "call stuff_rsb", "", X86_FEATURE_SMEP
++.endm
++
+ #else /* __ASSEMBLY__ */
+ void x86_enable_ibrs(void);
+ void x86_disable_ibrs(void);
+
diff --git a/patches.suse/13-x86-entry-stuff-rsb-for-entry-to-kernel-for-non-smep-platform.patch b/patches.suse/13-x86-entry-stuff-rsb-for-entry-to-kernel-for-non-smep-platform.patch
new file mode 100644
index 0000000000..0fb29f6122
--- /dev/null
+++ b/patches.suse/13-x86-entry-stuff-rsb-for-entry-to-kernel-for-non-smep-platform.patch
@@ -0,0 +1,86 @@
+From: Tim Chen <tim.c.chen@linux.intel.com>
+Date: Sat, 16 Dec 2017 19:01:26 +0100
+Subject: x86/entry: Stuff RSB for entry to kernel for non-SMEP platform
+Patch-mainline: submitted on 2018/1/9
+References: bsc#1068032
+
+Stuff RSB to prevent RSB underflow on non-SMEP platforms.
+
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/entry/entry_64.S | 10 ++++++++++
+ arch/x86/entry/entry_64_compat.S | 3 +++
+ 2 files changed, 13 insertions(+)
+
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index bc27d350f9c3..ab2686eae08e 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -175,6 +175,7 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
+ sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
+
+ ENABLE_IBRS
++ STUFF_RSB
+
+ testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
+ jnz tracesys
+@@ -537,6 +538,8 @@ END(irq_entries_start)
+ SAVE_C_REGS
+ SAVE_EXTRA_REGS
+
++ STUFF_RSB
++
+ testb $3, CS(%rsp)
+ jz 1f
+
+@@ -1077,6 +1080,10 @@ ENTRY(paranoid_entry)
+ cld
+ SAVE_C_REGS 8
+ SAVE_EXTRA_REGS 8
++
++ /* Do the stuffing unconditionally from user/kernel to be safe */
++ STUFF_RSB
++
+ movl $1, %ebx
+ movl $MSR_GS_BASE, %ecx
+ rdmsr
+@@ -1152,6 +1159,9 @@ ENTRY(error_entry)
+ cld
+ SAVE_C_REGS 8
+ SAVE_EXTRA_REGS 8
++
++ STUFF_RSB
++
+ /*
+ * error_entry() always returns with a kernel gsbase and
+ * CR3. We must also have a kernel CR3/gsbase before
+diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
+index a907572d7591..5c4270f7d5b2 100644
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -101,6 +101,7 @@ ENTRY(entry_SYSENTER_compat)
+ cld
+
+ ENABLE_IBRS
++ STUFF_RSB
+
+ /*
+ * Sysenter doesn't filter flags, so we need to clear NT
+@@ -201,6 +202,7 @@ ENTRY(entry_SYSCALL_compat)
+ pushq %r8 /* pt_regs->r15 = 0 */
+
+ ENABLE_IBRS
++ STUFF_RSB
+
+ /*
+ * User mode is traced as though IRQs are on, and SYSENTER
+@@ -312,6 +314,7 @@ ENTRY(entry_INT80_compat)
+ cld
+
+ ENABLE_IBRS
++ STUFF_RSB
+
+ /*
+ * User mode is traced as though IRQs are on, and the interrupt
+
diff --git a/patches.suse/14-x86-kvm-add-msr_ia32_spec_ctrl-and-msr_ia32_pred_cmd-to-kvm.patch b/patches.suse/14-x86-kvm-add-msr_ia32_spec_ctrl-and-msr_ia32_pred_cmd-to-kvm.patch
new file mode 100644
index 0000000000..ebe671fb89
--- /dev/null
+++ b/patches.suse/14-x86-kvm-add-msr_ia32_spec_ctrl-and-msr_ia32_pred_cmd-to-kvm.patch
@@ -0,0 +1,133 @@
+From: Wei Wang <wei.w.wang@intel.com>
+Date: Sat, 16 Dec 2017 19:18:48 +0100
+Subject: x86/kvm: Add MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD to kvm
+Patch-mainline: submitted on 2018/1/9
+References: bsc#1068032
+
+Add field to access guest MSR_IA332_SPEC_CTRL and MSR_IA32_PRED_CMD state.
+
+Signed-off-by: Wei Wang <wei.w.wang@intel.com>
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+[ Move MSR u64 to struct vcpu_vmx so as not to break kABI. ]
+Signed-off-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/kvm/vmx.c | 23 ++++++++++++++++++-----
+ arch/x86/kvm/x86.c | 2 +-
+ 2 files changed, 19 insertions(+), 6 deletions(-)
+
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index 683e854a7fe7..50fb42244cdf 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -48,6 +48,7 @@
+ #include <asm/kexec.h>
+ #include <asm/apic.h>
+ #include <asm/irq_remapping.h>
++#include <asm/spec_ctrl.h>
+
+ #include "trace.h"
+ #include "pmu.h"
+@@ -611,6 +612,8 @@ struct vcpu_vmx {
+ */
+ u64 msr_ia32_feature_control;
+ u64 msr_ia32_feature_control_valid_bits;
++
++ u64 spec_ctrl;
+ };
+
+ enum segment_cache_field {
+@@ -2815,6 +2818,7 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
+ */
+ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+ {
++ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct shared_msr_entry *msr;
+
+ switch (msr_info->index) {
+@@ -2826,8 +2830,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+ msr_info->data = vmcs_readl(GUEST_GS_BASE);
+ break;
+ case MSR_KERNEL_GS_BASE:
+- vmx_load_host_state(to_vmx(vcpu));
+- msr_info->data = to_vmx(vcpu)->msr_guest_kernel_gs_base;
++ vmx_load_host_state(vmx);
++ msr_info->data = vmx->msr_guest_kernel_gs_base;
+ break;
+ #endif
+ case MSR_EFER:
+@@ -2835,6 +2839,9 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+ case MSR_IA32_TSC:
+ msr_info->data = guest_read_tsc(vcpu);
+ break;
++ case MSR_IA32_SPEC_CTRL:
++ msr_info->data = vmx->spec_ctrl;
++ break;
+ case MSR_IA32_SYSENTER_CS:
+ msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
+ break;
+@@ -2852,13 +2859,13 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+ break;
+ case MSR_IA32_MCG_EXT_CTL:
+ if (!msr_info->host_initiated &&
+- !(to_vmx(vcpu)->msr_ia32_feature_control &
++ !(vmx->msr_ia32_feature_control &
+ FEATURE_CONTROL_LMCE))
+ return 1;
+ msr_info->data = vcpu->arch.mcg_ext_ctl;
+ break;
+ case MSR_IA32_FEATURE_CONTROL:
+- msr_info->data = to_vmx(vcpu)->msr_ia32_feature_control;
++ msr_info->data = vmx->msr_ia32_feature_control;
+ break;
+ case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
+ if (!nested_vmx_allowed(vcpu))
+@@ -2874,7 +2881,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+ return 1;
+ /* Otherwise falls through */
+ default:
+- msr = find_msr_entry(to_vmx(vcpu), msr_info->index);
++ msr = find_msr_entry(vmx, msr_info->index);
+ if (msr) {
+ msr_info->data = msr->data;
+ break;
+@@ -2939,6 +2946,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+ case MSR_IA32_TSC:
+ kvm_write_tsc(vcpu, msr_info);
+ break;
++ case MSR_IA32_SPEC_CTRL:
++ vmx->spec_ctrl = msr_info->data;
++ break;
+ case MSR_IA32_CR_PAT:
+ if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
+ if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
+@@ -5723,6 +5733,7 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu)
+
+ msr_info.index = ecx;
+ msr_info.host_initiated = false;
++
+ if (vmx_get_msr(vcpu, &msr_info)) {
+ trace_kvm_msr_read_ex(ecx);
+ kvm_inject_gp(vcpu, 0);
+@@ -6307,6 +6318,8 @@ static __init int hardware_setup(void)
+ vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
+ vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
+ vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
++ vmx_disable_intercept_for_msr(MSR_IA32_SPEC_CTRL, false);
++ vmx_disable_intercept_for_msr(MSR_IA32_PRED_CMD, false);
+
+ memcpy(vmx_msr_bitmap_legacy_x2apic,
+ vmx_msr_bitmap_legacy, PAGE_SIZE);
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 4a94cabe5491..679ff90b3834 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -971,7 +971,7 @@ static u32 msrs_to_save[] = {
+ MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
+ #endif
+ MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
+- MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
++ MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, MSR_IA32_SPEC_CTRL,
+ };
+
+ static unsigned num_msrs_to_save;
+
diff --git a/patches.suse/15-x86-kvm-flush-ibp-when-switching-vms.patch b/patches.suse/15-x86-kvm-flush-ibp-when-switching-vms.patch
new file mode 100644
index 0000000000..33c762f8e5
--- /dev/null
+++ b/patches.suse/15-x86-kvm-flush-ibp-when-switching-vms.patch
@@ -0,0 +1,28 @@
+From: Tim Chen <tim.c.chen@linux.intel.com>
+Date: Fri, 13 Oct 2017 14:31:46 -0700
+Subject: x86/kvm: Flush IBP when switching VMs
+Patch-mainline: submitted on 2018/1/9
+References: bsc#1068032
+
+Set IBPB (Indirect branch prediction barrier) when switching VMs.
+
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/kvm/vmx.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index e6f0f91543b0..2c30ca38bc2b 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -2062,6 +2062,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+ if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
+ per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
+ vmcs_load(vmx->loaded_vmcs->vmcs);
++
++ x86_ibp_barrier();
+ }
+
+ if (vmx->loaded_vmcs->cpu != cpu) {
+
diff --git a/patches.suse/16-x86-kvm-toggle-ibrs-on-vm-entry-and-exit.patch b/patches.suse/16-x86-kvm-toggle-ibrs-on-vm-entry-and-exit.patch
new file mode 100644
index 0000000000..891816061d
--- /dev/null
+++ b/patches.suse/16-x86-kvm-toggle-ibrs-on-vm-entry-and-exit.patch
@@ -0,0 +1,32 @@
+From: Tim Chen <tim.c.chen@linux.intel.com>
+Date: Fri, 20 Oct 2017 17:04:35 -0700
+Subject: x86/kvm: Toggle IBRS on VM entry and exit
+Patch-mainline: submitted on 2018/1/9
+References: bsc#1068032
+
+Restore guest IBRS on VM entry and set it to 1 on VM exit
+back to kernel.
+
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/kvm/vmx.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index 015ff8072b97..d2b5b230ae82 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -8653,6 +8653,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
+ __write_pkru(vmx->guest_pkru);
+
+ atomic_switch_perf_msrs(vmx);
++
++ if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
++ add_atomic_switch_msr(vmx, MSR_IA32_SPEC_CTRL,
++ vmx->spec_ctrl, FEATURE_ENABLE_IBRS);
++
+ debugctlmsr = get_debugctlmsr();
+
+ vmx->__launched = vmx->loaded_vmcs->launched;
+
diff --git a/patches.suse/17-x86-kvm-pad-rsb-on-vm-transition.patch b/patches.suse/17-x86-kvm-pad-rsb-on-vm-transition.patch
new file mode 100644
index 0000000000..995d6357b5
--- /dev/null
+++ b/patches.suse/17-x86-kvm-pad-rsb-on-vm-transition.patch
@@ -0,0 +1,96 @@
+From: Tim Chen <tim.c.chen@linux.intel.com>
+Date: Sat, 16 Dec 2017 19:35:49 +0100
+Subject: x86/kvm: Pad RSB on VM transition
+Patch-mainline: submitted on 2018/1/9
+References: bsc#1068032
+
+Add code to pad the local CPU's RSB entries to protect
+from previous less privilege mode.
+
+Boris:
+
+ - Use asm function instead of duplicating a C function.
+ - Add indirection to stuff_rsb() so that EXPORT_SYMBOL_GPL works.
+ Otherwise we'd need to backport the asm versions of those from 4.9.
+
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/include/asm/proto.h | 1 +
+ arch/x86/include/asm/spec_ctrl.h | 1 +
+ arch/x86/kernel/cpu/spec_ctrl.c | 11 +++++++++++
+ arch/x86/kvm/vmx.c | 3 +++
+ 4 files changed, 16 insertions(+)
+
+diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
+index a4a77286cb1d..6cd9c49ba651 100644
+--- a/arch/x86/include/asm/proto.h
++++ b/arch/x86/include/asm/proto.h
+@@ -16,6 +16,7 @@ void entry_SYSENTER_compat(void);
+
+ void x86_configure_nx(void);
+ void x86_report_nx(void);
++void stuff_rsb(void);
+
+ extern int reboot_force;
+
+diff --git a/arch/x86/include/asm/spec_ctrl.h b/arch/x86/include/asm/spec_ctrl.h
+index e584b91ebdd2..fa904b1e6dba 100644
+--- a/arch/x86/include/asm/spec_ctrl.h
++++ b/arch/x86/include/asm/spec_ctrl.h
+@@ -57,6 +57,7 @@
+ #else /* __ASSEMBLY__ */
+ void x86_enable_ibrs(void);
+ void x86_disable_ibrs(void);
++void stuff_RSB(void);
+
+ static inline void x86_ibp_barrier(void)
+ {
+diff --git a/arch/x86/kernel/cpu/spec_ctrl.c b/arch/x86/kernel/cpu/spec_ctrl.c
+index 10c2c6fff3e5..3bb4dba93b7f 100644
+--- a/arch/x86/kernel/cpu/spec_ctrl.c
++++ b/arch/x86/kernel/cpu/spec_ctrl.c
+@@ -4,6 +4,7 @@
+ */
+
+ #include <asm/msr.h>
++#include <asm/proto.h>
+ #include <asm/processor.h>
+ #include <asm/spec_ctrl.h>
+
+@@ -20,3 +21,13 @@ void x86_enable_ibrs(void)
+ native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
+ }
+ EXPORT_SYMBOL_GPL(x86_enable_ibrs);
++
++/*
++ * Do this indirection as otherwise we'd need to backport the
++ * EXPORT_SYMBOL_GPL() for asm stuff.
++ */
++void stuff_RSB(void)
++{
++ stuff_rsb();
++}
++EXPORT_SYMBOL_GPL(stuff_RSB);
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index 7cbd8353ada1..94f07ed07bb0 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -49,6 +49,7 @@
+ #include <asm/apic.h>
+ #include <asm/irq_remapping.h>
+ #include <asm/spec_ctrl.h>
++#include <asm/proto.h>
+
+ #include "trace.h"
+ #include "pmu.h"
+@@ -8763,6 +8764,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
+ #endif
+ );
+
++ stuff_RSB();
++
+ /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
+ if (debugctlmsr)
+ update_debugctlmsr(debugctlmsr);
+
diff --git a/patches.suse/18-x86-spec_ctrl-check-whether-ibrs-is-enabled-before-using-it.patch b/patches.suse/18-x86-spec_ctrl-check-whether-ibrs-is-enabled-before-using-it.patch
new file mode 100644
index 0000000000..555fb5a8cc
--- /dev/null
+++ b/patches.suse/18-x86-spec_ctrl-check-whether-ibrs-is-enabled-before-using-it.patch
@@ -0,0 +1,149 @@
+From: Borislav Petkov <bp@suse.de>
+Date: Sun, 17 Dec 2017 16:01:57 +0100
+Subject: x86/spec_ctrl: Check whether IBRS is enabled before using it
+Patch-mainline: submitted on 2018/1/9
+References: bsc#1068032
+
+Check whether IBRS is enabled before using it.
+
+Carved out from a patch by Tim Chen <tim.c.chen@linux.intel.com>
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/include/asm/spec_ctrl.h | 37 +++++++++++++++++++++++++++++++++++++
+ arch/x86/kernel/cpu/spec_ctrl.c | 15 +++++++++++++--
+ arch/x86/kvm/vmx.c | 2 +-
+ 3 files changed, 51 insertions(+), 3 deletions(-)
+
+diff --git a/arch/x86/include/asm/spec_ctrl.h b/arch/x86/include/asm/spec_ctrl.h
+index fa904b1e6dba..372f68d6a6f7 100644
+--- a/arch/x86/include/asm/spec_ctrl.h
++++ b/arch/x86/include/asm/spec_ctrl.h
+@@ -17,27 +17,55 @@
+
+ .macro ENABLE_IBRS_CLOBBER
+ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_SPEC_CTRL
++ call x86_ibrs_enabled
++ test %eax, %eax
++ jz .Llfence_\@
++
+ __ENABLE_IBRS_CLOBBER
++ jmp .Lend_\@
++
++.Llfence_\@:
++ lfence
+ .Lend_\@:
+ .endm
+
+
+ .macro ENABLE_IBRS
+ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_SPEC_CTRL
++
+ pushq %rax
++
++ call x86_ibrs_enabled
++ test %eax, %eax
++ jz .Llfence_\@
++
+ pushq %rcx
+ pushq %rdx
+ __ENABLE_IBRS_CLOBBER
+ popq %rdx
+ popq %rcx
++
++ jmp .Lpop_\@
++
++.Llfence_\@:
++ lfence
++
++.Lpop_\@:
+ popq %rax
++
+ .Lend_\@:
+ .endm
+
+
+ .macro DISABLE_IBRS
+ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_SPEC_CTRL
++
+ pushq %rax
++
++ call x86_ibrs_enabled
++ test %eax, %eax
++ jz .Llfence_\@
++
+ pushq %rcx
+ pushq %rdx
+ movl $MSR_IA32_SPEC_CTRL, %ecx
+@@ -46,7 +74,15 @@
+ wrmsr
+ popq %rdx
+ popq %rcx
++
++ jmp .Lpop_\@
++
++.Llfence_\@:
++ lfence
++
++.Lpop_\@:
+ popq %rax
++
+ .Lend_\@:
+ .endm
+
+@@ -58,6 +94,7 @@
+ void x86_enable_ibrs(void);
+ void x86_disable_ibrs(void);
+ void stuff_RSB(void);
++unsigned int x86_ibrs_enabled(void);
+
+ static inline void x86_ibp_barrier(void)
+ {
+diff --git a/arch/x86/kernel/cpu/spec_ctrl.c b/arch/x86/kernel/cpu/spec_ctrl.c
+index 3bb4dba93b7f..1475d51f9c4f 100644
+--- a/arch/x86/kernel/cpu/spec_ctrl.c
++++ b/arch/x86/kernel/cpu/spec_ctrl.c
+@@ -8,16 +8,27 @@
+ #include <asm/processor.h>
+ #include <asm/spec_ctrl.h>
+
++/*
++ * Keep it open for more flags in case needed.
++ */
++static unsigned int ibrs_state = 0;
++
++unsigned int notrace x86_ibrs_enabled(void)
++{
++ return ibrs_state;
++}
++EXPORT_SYMBOL_GPL(x86_ibrs_enabled);
++
+ void x86_disable_ibrs(void)
+ {
+- if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
++ if (x86_ibrs_enabled())
+ native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+ }
+ EXPORT_SYMBOL_GPL(x86_disable_ibrs);
+
+ void x86_enable_ibrs(void)
+ {
+- if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
++ if (x86_ibrs_enabled())
+ native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
+ }
+ EXPORT_SYMBOL_GPL(x86_enable_ibrs);
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index 55c65cf29412..d0072861b385 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -8655,7 +8655,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
+
+ atomic_switch_perf_msrs(vmx);
+
+- if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
++ if (x86_ibrs_enabled())
+ add_atomic_switch_msr(vmx, MSR_IA32_SPEC_CTRL,
+ vmx->spec_ctrl, FEATURE_ENABLE_IBRS);
+
+
diff --git a/patches.suse/19-x86-spec_ctrl-check-whether-ibpb-is-enabled-before-using-it.patch b/patches.suse/19-x86-spec_ctrl-check-whether-ibpb-is-enabled-before-using-it.patch
new file mode 100644
index 0000000000..f09637e26f
--- /dev/null
+++ b/patches.suse/19-x86-spec_ctrl-check-whether-ibpb-is-enabled-before-using-it.patch
@@ -0,0 +1,59 @@
+From: Borislav Petkov <bp@suse.de>
+Date: Sun, 17 Dec 2017 16:01:57 +0100
+Subject: x86/spec_ctrl: Check whether IBPB is enabled before using it
+Patch-mainline: submitted on 2018/1/9
+References: bsc#1068032
+
+Check whether IBPB is enabled before using it.
+
+Carved out from a patch by Tim Chen <tim.c.chen@linux.intel.com>
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/include/asm/spec_ctrl.h | 3 ++-
+ arch/x86/kernel/cpu/spec_ctrl.c | 7 +++++++
+ 2 files changed, 9 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/include/asm/spec_ctrl.h b/arch/x86/include/asm/spec_ctrl.h
+index 372f68d6a6f7..384b3b8b79b3 100644
+--- a/arch/x86/include/asm/spec_ctrl.h
++++ b/arch/x86/include/asm/spec_ctrl.h
+@@ -95,10 +95,11 @@ void x86_enable_ibrs(void);
+ void x86_disable_ibrs(void);
+ void stuff_RSB(void);
+ unsigned int x86_ibrs_enabled(void);
++unsigned int x86_ibpb_enabled(void);
+
+ static inline void x86_ibp_barrier(void)
+ {
+- if (static_cpu_has(X86_FEATURE_SPEC_CTRL))
++ if (x86_ibpb_enabled())
+ native_wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB);
+ }
+
+diff --git a/arch/x86/kernel/cpu/spec_ctrl.c b/arch/x86/kernel/cpu/spec_ctrl.c
+index 1475d51f9c4f..672c32da1b02 100644
+--- a/arch/x86/kernel/cpu/spec_ctrl.c
++++ b/arch/x86/kernel/cpu/spec_ctrl.c
+@@ -12,6 +12,7 @@
+ * Keep it open for more flags in case needed.
+ */
+ static unsigned int ibrs_state = 0;
++static unsigned int ibpb_state = 0;
+
+ unsigned int notrace x86_ibrs_enabled(void)
+ {
+@@ -19,6 +20,12 @@ unsigned int notrace x86_ibrs_enabled(void)
+ }
+ EXPORT_SYMBOL_GPL(x86_ibrs_enabled);
+
++unsigned int notrace x86_ibpb_enabled(void)
++{
++ return ibpb_state;
++}
++EXPORT_SYMBOL_GPL(x86_ibpb_enabled);
++
+ void x86_disable_ibrs(void)
+ {
+ if (x86_ibrs_enabled())
+
diff --git a/patches.suse/20-x86-cpu-check-speculation-control-cpuid-bit.patch b/patches.suse/20-x86-cpu-check-speculation-control-cpuid-bit.patch
new file mode 100644
index 0000000000..409c4b0810
--- /dev/null
+++ b/patches.suse/20-x86-cpu-check-speculation-control-cpuid-bit.patch
@@ -0,0 +1,105 @@
+From: Borislav Petkov <bp@suse.de>
+Date: Sun, 17 Dec 2017 16:37:58 +0100
+Subject: x86/CPU: Check speculation control CPUID bit
+Patch-mainline: submitted on 2018/1/9
+References: bsc#1068032
+
+... and enable the corresponding flags.
+
+Carved out from a patch by Tim Chen <tim.c.chen@linux.intel.com> and
+improved.
+
+After microcode reload, we need to check CPUID directly as we don't
+update the X86_FEATURE flags after a reload.
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/include/asm/spec_ctrl.h | 1 +
+ arch/x86/kernel/cpu/intel.c | 3 +++
+ arch/x86/kernel/cpu/microcode/core.c | 6 +++++-
+ arch/x86/kernel/cpu/spec_ctrl.c | 16 ++++++++++++++++
+ 4 files changed, 25 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/include/asm/spec_ctrl.h b/arch/x86/include/asm/spec_ctrl.h
+index 384b3b8b79b3..96f61a82396d 100644
+--- a/arch/x86/include/asm/spec_ctrl.h
++++ b/arch/x86/include/asm/spec_ctrl.h
+@@ -96,6 +96,7 @@ void x86_disable_ibrs(void);
+ void stuff_RSB(void);
+ unsigned int x86_ibrs_enabled(void);
+ unsigned int x86_ibpb_enabled(void);
++void x86_spec_check(void);
+
+ static inline void x86_ibp_barrier(void)
+ {
+diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
+index 176ccd2964ff..f68e0bc85f2f 100644
+--- a/arch/x86/kernel/cpu/intel.c
++++ b/arch/x86/kernel/cpu/intel.c
+@@ -16,6 +16,7 @@
+ #include <asm/intel-family.h>
+ #include <asm/hwcap2.h>
+ #include <asm/elf.h>
++#include <asm/spec_ctrl.h>
+
+ #ifdef CONFIG_X86_64
+ #include <linux/topology.h>
+@@ -560,6 +561,8 @@ static void init_intel(struct cpuinfo_x86 *c)
+ detect_vmx_virtcap(c);
+
+ probe_xeon_phi_r3mwait(c);
++
++ x86_spec_check();
+ }
+
+ #ifdef CONFIG_X86_32
+diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
+index b3e94ef461fd..b7dc3ca4518a 100644
+--- a/arch/x86/kernel/cpu/microcode/core.c
++++ b/arch/x86/kernel/cpu/microcode/core.c
+@@ -39,6 +39,7 @@
+ #include <asm/microcode.h>
+ #include <asm/processor.h>
+ #include <asm/cmdline.h>
++#include <asm/spec_ctrl.h>
+
+ #define MICROCODE_VERSION "2.01"
+
+@@ -417,8 +418,11 @@ static ssize_t reload_store(struct device *dev,
+ if (!ret)
+ ret = tmp_ret;
+ }
+- if (!ret)
++ if (!ret) {
+ perf_check_microcode();
++ x86_spec_check();
++ }
++
+ mutex_unlock(&microcode_mutex);
+ put_online_cpus();
+
+diff --git a/arch/x86/kernel/cpu/spec_ctrl.c b/arch/x86/kernel/cpu/spec_ctrl.c
+index 672c32da1b02..dbf00ab91589 100644
+--- a/arch/x86/kernel/cpu/spec_ctrl.c
++++ b/arch/x86/kernel/cpu/spec_ctrl.c
+@@ -49,3 +49,19 @@ void stuff_RSB(void)
+ stuff_rsb();
+ }
+ EXPORT_SYMBOL_GPL(stuff_RSB);
++
++/*
++ * Called after upgrading microcode, check CPUID directly.
++ */
++void x86_spec_check(void)
++{
++ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
++ if (cpuid_edx(7) & BIT(26)) {
++ ibrs_state = 1;
++ ibpb_state = 1;
++
++ setup_force_cpu_cap(X86_FEATURE_SPEC_CTRL);
++ }
++ }
++}
++EXPORT_SYMBOL_GPL(x86_spec_check);
+
diff --git a/patches.suse/21-x86-spec-add-nospec-chicken-bit.patch b/patches.suse/21-x86-spec-add-nospec-chicken-bit.patch
new file mode 100644
index 0000000000..856197f960
--- /dev/null
+++ b/patches.suse/21-x86-spec-add-nospec-chicken-bit.patch
@@ -0,0 +1,53 @@
+From: Borislav Petkov <bp@suse.de>
+Date: Sun, 17 Dec 2017 16:45:58 +0100
+Subject: x86/spec: Add "nospec" chicken bit
+Patch-mainline: submitted on 2018/1/9
+References: bsc#1068032
+
+... which disables the speculation features toggle and avoids the
+performance overhead from them.
+
+Carved out from a patch by Tim Chen <tim.c.chen@linux.intel.com>
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+---
+ Documentation/kernel-parameters.txt | 6 ++++++
+ arch/x86/kernel/cpu/spec_ctrl.c | 10 ++++++++++
+ 2 files changed, 16 insertions(+)
+
+diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
+index 8eb059fcf63f..47de3730509b 100644
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -2521,6 +2521,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
+ nosmep [X86]
+ Disable SMEP (Supervisor Mode Execution Prevention)
+ even if it is supported by processor.
++
++ nospec [X86]
++ Disable indirect branch restricted speculation and
++ indirect branch prediction barrier to avoid performance
++ penalties in trusted environments.
++
+
+ noexec32 [X86-64]
+ This affects only 32-bit executables.
+diff --git a/arch/x86/kernel/cpu/spec_ctrl.c b/arch/x86/kernel/cpu/spec_ctrl.c
+index dbf00ab91589..2bd52f35ac9a 100644
+--- a/arch/x86/kernel/cpu/spec_ctrl.c
++++ b/arch/x86/kernel/cpu/spec_ctrl.c
+@@ -65,3 +65,13 @@ void x86_spec_check(void)
+ }
+ }
+ EXPORT_SYMBOL_GPL(x86_spec_check);
++
++static int __init nospec(char *str)
++{
++ setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL);
++ ibrs_state = 0;
++ ibpb_state = 0;
++
++ return 0;
++}
++early_param("nospec", nospec);
+
diff --git a/patches.suse/22-x86-cpu-amd-add-speculative-control-support-for-amd.patch b/patches.suse/22-x86-cpu-amd-add-speculative-control-support-for-amd.patch
new file mode 100644
index 0000000000..43d6a27cc1
--- /dev/null
+++ b/patches.suse/22-x86-cpu-amd-add-speculative-control-support-for-amd.patch
@@ -0,0 +1,102 @@
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Mon, 18 Dec 2017 11:50:09 +0100
+Subject: x86/CPU/AMD: Add speculative control support for AMD
+Patch-mainline: submitted on 2018/1/9
+References: bsc#1068032
+
+Add speculative control support for AMD processors. For AMD, speculative
+control is indicated as follows:
+
+ CPUID EAX=0x00000007, ECX=0x00 return EDX[26] indicates support for
+ both IBRS and IBPB.
+
+ CPUID EAX=0x80000008, ECX=0x00 return EBX[12] indicates support for
+ just IBPB.
+
+On AMD family 0x10, 0x12 and 0x16 processors where either of the above
+features are not supported, IBPB can be achieved by disabling
+indirect branch predictor support in MSR 0xc0011021[14] at boot.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+[ Move everything to spec_ctrl.c ]
+Signed-off-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/include/asm/cpufeature.h | 1 +
+ arch/x86/include/asm/msr-index.h | 1 +
+ arch/x86/kernel/cpu/amd.c | 3 +++
+ arch/x86/kernel/cpu/spec_ctrl.c | 16 ++++++++++++++++
+ 4 files changed, 21 insertions(+)
+
+diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
+index 97701e2fd274..89bfebf5453b 100644
+--- a/arch/x86/include/asm/cpufeature.h
++++ b/arch/x86/include/asm/cpufeature.h
+@@ -272,6 +272,7 @@
+ /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
+ #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
+ #define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */
++#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */
+
+ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
+ #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
+diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
+index d0dd37c81da5..ef487e22ebb3 100644
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -336,6 +336,7 @@
+ #define MSR_F15H_NB_PERF_CTL 0xc0010240
+ #define MSR_F15H_NB_PERF_CTR 0xc0010241
+ #define MSR_F15H_PTSC 0xc0010280
++#define MSR_F15H_IC_CFG 0xc0011021
+
+ /* Fam 10h MSRs */
+ #define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
+diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
+index c9cbdd48f094..bdc11eebd33f 100644
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -12,6 +12,7 @@
+ #include <asm/smp.h>
+ #include <asm/pci-direct.h>
+ #include <asm/delay.h>
++#include <asm/spec_ctrl.h>
+
+ #ifdef CONFIG_X86_64
+ # include <asm/mmconfig.h>
+@@ -789,6 +790,8 @@ static void init_amd(struct cpuinfo_x86 *c)
+
+ /* AMD CPUs don't reset SS attributes on SYSRET */
+ set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
++
++ x86_spec_check();
+ }
+
+ #ifdef CONFIG_X86_32
+diff --git a/arch/x86/kernel/cpu/spec_ctrl.c b/arch/x86/kernel/cpu/spec_ctrl.c
+index 2bd52f35ac9a..9c1ef3795b5b 100644
+--- a/arch/x86/kernel/cpu/spec_ctrl.c
++++ b/arch/x86/kernel/cpu/spec_ctrl.c
+@@ -62,6 +62,22 @@ void x86_spec_check(void)
+
+ setup_force_cpu_cap(X86_FEATURE_SPEC_CTRL);
+ }
++ } else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
++ if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
++ ibrs_state = 1;
++
++ if (boot_cpu_has(X86_FEATURE_IBPB)) {
++ ibpb_state = 1;
++ } else {
++ switch (boot_cpu_data.x86) {
++ case 0x10:
++ case 0x12:
++ case 0x16:
++ pr_info_once("Disabling indirect branch predictor support\n");
++ msr_set_bit(MSR_F15H_IC_CFG, 14);
++ break;
++ }
++ }
+ }
+ }
+ EXPORT_SYMBOL_GPL(x86_spec_check);
+
diff --git a/patches.suse/23-x86-spec-check-cpuid-direclty-post-microcode-reload-to-support-ibpb-feature.patch b/patches.suse/23-x86-spec-check-cpuid-direclty-post-microcode-reload-to-support-ibpb-feature.patch
new file mode 100644
index 0000000000..63e4c306f8
--- /dev/null
+++ b/patches.suse/23-x86-spec-check-cpuid-direclty-post-microcode-reload-to-support-ibpb-feature.patch
@@ -0,0 +1,56 @@
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Mon, 18 Dec 2017 11:55:18 +0100
+Subject: x86/spec: Check CPUID direclty post microcode reload to support IBPB
+ feature
+Patch-mainline: submitted on 2018/1/9
+References: bsc#1068032
+
+Add an IBPB feature check to the speculative control update check after
+a microcode reload.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+[ Check CPUID directly. ]
+Signed-off-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/kernel/cpu/spec_ctrl.c | 18 ++++++++----------
+ 1 file changed, 8 insertions(+), 10 deletions(-)
+
+diff --git a/arch/x86/kernel/cpu/spec_ctrl.c b/arch/x86/kernel/cpu/spec_ctrl.c
+index 9c1ef3795b5b..21dd82c74429 100644
+--- a/arch/x86/kernel/cpu/spec_ctrl.c
++++ b/arch/x86/kernel/cpu/spec_ctrl.c
+@@ -55,18 +55,15 @@ EXPORT_SYMBOL_GPL(stuff_RSB);
+ */
+ void x86_spec_check(void)
+ {
+- if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
+- if (cpuid_edx(7) & BIT(26)) {
+- ibrs_state = 1;
+- ibpb_state = 1;
++ if (cpuid_edx(7) & BIT(26)) {
++ ibrs_state = 1;
++ ibpb_state = 1;
+
+- setup_force_cpu_cap(X86_FEATURE_SPEC_CTRL);
+- }
+- } else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+- if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
+- ibrs_state = 1;
++ setup_force_cpu_cap(X86_FEATURE_SPEC_CTRL);
++ }
+
+- if (boot_cpu_has(X86_FEATURE_IBPB)) {
++ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
++ if (cpuid_ebx(0x80000008) & BIT(12)) {
+ ibpb_state = 1;
+ } else {
+ switch (boot_cpu_data.x86) {
+@@ -77,6 +74,7 @@ void x86_spec_check(void)
+ msr_set_bit(MSR_F15H_IC_CFG, 14);
+ break;
+ }
++ ibpb_state = 0;
+ }
+ }
+ }
+
diff --git a/patches.suse/24-kvm-svm-do-not-intercept-new-speculative-control-msrs.patch b/patches.suse/24-kvm-svm-do-not-intercept-new-speculative-control-msrs.patch
new file mode 100644
index 0000000000..caac9b691c
--- /dev/null
+++ b/patches.suse/24-kvm-svm-do-not-intercept-new-speculative-control-msrs.patch
@@ -0,0 +1,30 @@
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Mon, 18 Dec 2017 12:06:31 +0100
+Subject: KVM: SVM: Do not intercept new speculative control MSRs
+Patch-mainline: submitted on 2018/1/9
+References: bsc#1068032
+
+Allow guest access to the speculative control MSRs without being
+intercepted.
+
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/kvm/svm.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
+index bc86daf689b1..7773534f9f76 100644
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -247,6 +247,8 @@ static const struct svm_direct_access_msrs {
+ { .index = MSR_CSTAR, .always = true },
+ { .index = MSR_SYSCALL_MASK, .always = true },
+ #endif
++ { .index = MSR_IA32_SPEC_CTRL, .always = true },
++ { .index = MSR_IA32_PRED_CMD, .always = true },
+ { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false },
+ { .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
+ { .index = MSR_IA32_LASTINTFROMIP, .always = false },
+
diff --git a/patches.suse/25-x86-svm-set-ibrs-value-on-vm-entry-and-exit.patch b/patches.suse/25-x86-svm-set-ibrs-value-on-vm-entry-and-exit.patch
new file mode 100644
index 0000000000..e07bf8fe24
--- /dev/null
+++ b/patches.suse/25-x86-svm-set-ibrs-value-on-vm-entry-and-exit.patch
@@ -0,0 +1,81 @@
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Mon, 18 Dec 2017 12:23:33 +0100
+Subject: x86/svm: Set IBRS value on VM entry and exit
+Patch-mainline: submitted on 2018/1/9
+References: bsc#1068032
+
+Set/restore the guests IBRS value on VM entry. On VM exit back to the
+kernel save the guest IBRS value and then set IBRS to 1.
+
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/kvm/svm.c | 18 ++++++++++++++++++
+ 1 file changed, 18 insertions(+)
+
+diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
+index 7773534f9f76..b289e72cb6aa 100644
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -44,6 +44,7 @@
+ #include <asm/debugreg.h>
+ #include <asm/kvm_para.h>
+ #include <asm/irq_remapping.h>
++#include <asm/spec_ctrl.h>
+
+ #include <asm/virtext.h>
+ #include "trace.h"
+@@ -174,6 +175,8 @@ struct vcpu_svm {
+
+ u64 next_rip;
+
++ u64 spec_ctrl;
++
+ u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
+ struct {
+ u16 fs;
+@@ -3533,6 +3536,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+ case MSR_VM_CR:
+ msr_info->data = svm->nested.vm_cr_msr;
+ break;
++ case MSR_IA32_SPEC_CTRL:
++ msr_info->data = svm->spec_ctrl;
++ break;
+ case MSR_IA32_UCODE_REV:
+ msr_info->data = 0x01000065;
+ break;
+@@ -3671,6 +3677,9 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
+ case MSR_VM_IGNNE:
+ vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
+ break;
++ case MSR_IA32_SPEC_CTRL:
++ svm->spec_ctrl = data;
++ break;
+ case MSR_IA32_APICBASE:
+ if (kvm_vcpu_apicv_active(vcpu))
+ avic_update_vapic_bar(to_svm(vcpu), data);
+@@ -4834,6 +4843,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
+
+ local_irq_enable();
+
++ if (x86_ibrs_enabled() && (svm->spec_ctrl != FEATURE_ENABLE_IBRS))
++ wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
++
+ asm volatile (
+ "push %%" _ASM_BP "; \n\t"
+ "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
+@@ -4907,6 +4919,12 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
+ #endif
+ );
+
++ if (x86_ibrs_enabled()) {
++ rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
++ if (svm->spec_ctrl != FEATURE_ENABLE_IBRS)
++ wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
++ }
++
+ #ifdef CONFIG_X86_64
+ wrmsrl(MSR_GS_BASE, svm->host.gs_base);
+ #else
+
diff --git a/patches.suse/26-x86-svm-set-ibpb-when-running-a-different-vcpu.patch b/patches.suse/26-x86-svm-set-ibpb-when-running-a-different-vcpu.patch
new file mode 100644
index 0000000000..abcf30e891
--- /dev/null
+++ b/patches.suse/26-x86-svm-set-ibpb-when-running-a-different-vcpu.patch
@@ -0,0 +1,61 @@
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Mon, 18 Dec 2017 12:47:01 +0100
+Subject: x86/svm: Set IBPB when running a different VCPU
+Patch-mainline: submitted on 2018/1/9
+References: bsc#1068032
+
+Set IBPB (Indirect Branch Prediction Barrier) when the current CPU is
+going to run a VCPU different from what was previously run.
+
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/kvm/svm.c | 14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
+index b289e72cb6aa..3e3f74e1fff6 100644
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -510,6 +510,8 @@ struct svm_cpu_data {
+ struct kvm_ldttss_desc *tss_desc;
+
+ struct page *save_area;
++
++ struct vmcb *current_vmcb;
+ };
+
+ static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
+@@ -1662,11 +1664,18 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
+ __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
+ kvm_vcpu_uninit(vcpu);
+ kmem_cache_free(kvm_vcpu_cache, svm);
++
++ /*
++ * The VMCB could be recycled, causing a false negative in svm_vcpu_load;
++ * block speculative execution.
++ */
++ x86_ibp_barrier();
+ }
+
+ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+ {
+ struct vcpu_svm *svm = to_svm(vcpu);
++ struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
+ int i;
+
+ if (unlikely(cpu != vcpu->cpu)) {
+@@ -1695,6 +1704,11 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+ if (static_cpu_has(X86_FEATURE_RDTSCP))
+ wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
+
++ if (sd->current_vmcb != svm->vmcb) {
++ sd->current_vmcb = svm->vmcb;
++ x86_ibp_barrier();
++ }
++
+ avic_vcpu_load(vcpu, cpu);
+ }
+
+
diff --git a/patches.suse/27-kvm-x86-add-speculative-control-cpuid-support-for-guests.patch b/patches.suse/27-kvm-x86-add-speculative-control-cpuid-support-for-guests.patch
new file mode 100644
index 0000000000..e67ff44ad5
--- /dev/null
+++ b/patches.suse/27-kvm-x86-add-speculative-control-cpuid-support-for-guests.patch
@@ -0,0 +1,53 @@
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Mon, 18 Dec 2017 12:53:04 +0100
+Subject: KVM: x86: Add speculative control CPUID support for guests
+Patch-mainline: submitted on 2018/1/9
+References: bsc#1068032
+
+Provide the guest with the speculative control CPUID related values.
+
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/kvm/cpuid.c | 12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
+index 22f0e88f8abd..9f550afedf66 100644
+--- a/arch/x86/kvm/cpuid.c
++++ b/arch/x86/kvm/cpuid.c
+@@ -69,6 +69,7 @@ u64 kvm_supported_xcr0(void)
+ /* These are scattered features in cpufeatures.h. */
+ #define KVM_CPUID_BIT_AVX512_4VNNIW 2
+ #define KVM_CPUID_BIT_AVX512_4FMAPS 3
++#define KVM_CPUID_BIT_SPEC_CTRL 26
+ #define KF(x) bit(KVM_CPUID_BIT_##x)
+
+ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
+@@ -384,7 +385,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
+
+ /* cpuid 7.0.edx*/
+ const u32 kvm_cpuid_7_0_edx_x86_features =
+- KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS);
++ KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) |
++ KF(SPEC_CTRL);
++
++ /* cpuid 0x80000008.0.ebx */
++ const u32 kvm_cpuid_80000008_0_ebx_x86_features =
++ F(IBPB);
+
+ /* all calls to cpuid_count() should be made on the same cpu */
+ get_cpu();
+@@ -619,7 +625,9 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
+ if (!g_phys_as)
+ g_phys_as = phys_as;
+ entry->eax = g_phys_as | (virt_as << 8);
+- entry->ebx = entry->edx = 0;
++ entry->ebx &= kvm_cpuid_80000008_0_ebx_x86_features;
++ cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
++ entry->edx = 0;
+ break;
+ }
+ case 0x80000019:
+
diff --git a/patches.suse/28-x86-svm-clobber-the-rsb-on-vm-exit.patch b/patches.suse/28-x86-svm-clobber-the-rsb-on-vm-exit.patch
new file mode 100644
index 0000000000..ad298d1da6
--- /dev/null
+++ b/patches.suse/28-x86-svm-clobber-the-rsb-on-vm-exit.patch
@@ -0,0 +1,29 @@
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Mon, 18 Dec 2017 13:06:12 +0100
+Subject: x86/svm: Clobber the RSB on VM exit
+Patch-mainline: submitted on 2018/1/9
+References: bsc#1068032
+
+Overwrite the local CPU RSB entries from the previous less privileged
+mode.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/kvm/svm.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
+index 3e3f74e1fff6..fbfcf32f4160 100644
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -4939,6 +4939,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
+ wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
+ }
+
++ stuff_RSB();
++
+ #ifdef CONFIG_X86_64
+ wrmsrl(MSR_GS_BASE, svm->host.gs_base);
+ #else
+
diff --git a/patches.suse/29-x86-svm-add-code-to-clear-registers-on-vm-exit.patch b/patches.suse/29-x86-svm-add-code-to-clear-registers-on-vm-exit.patch
new file mode 100644
index 0000000000..50fb4273b2
--- /dev/null
+++ b/patches.suse/29-x86-svm-add-code-to-clear-registers-on-vm-exit.patch
@@ -0,0 +1,42 @@
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Mon, 18 Dec 2017 14:05:00 +0100
+Subject: x86/svm: Add code to clear registers on VM exit
+Patch-mainline: submitted on 2018/1/9
+References: bsc#1068032
+
+Clear registers on VM exit to prevent speculative use of them.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/kvm/svm.c | 16 ++++++++++++++++
+ 1 file changed, 16 insertions(+)
+
+diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
+index fbfcf32f4160..8b6f78faa6d1 100644
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -4904,6 +4904,22 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
+ "mov %%r14, %c[r14](%[svm]) \n\t"
+ "mov %%r15, %c[r15](%[svm]) \n\t"
+ #endif
++ /* Clear host registers (marked as clobbered so it's safe) */
++ "xor %%" _ASM_BX ", %%" _ASM_BX " \n\t"
++ "xor %%" _ASM_CX ", %%" _ASM_CX " \n\t"
++ "xor %%" _ASM_DX ", %%" _ASM_DX " \n\t"
++ "xor %%" _ASM_SI ", %%" _ASM_SI " \n\t"
++ "xor %%" _ASM_DI ", %%" _ASM_DI " \n\t"
++#ifdef CONFIG_X86_64
++ "xor %%r8, %%r8 \n\t"
++ "xor %%r9, %%r9 \n\t"
++ "xor %%r10, %%r10 \n\t"
++ "xor %%r11, %%r11 \n\t"
++ "xor %%r12, %%r12 \n\t"
++ "xor %%r13, %%r13 \n\t"
++ "xor %%r14, %%r14 \n\t"
++ "xor %%r15, %%r15 \n\t"
++#endif
+ "pop %%" _ASM_BP
+ :
+ : [svm]"a"(svm),
+
diff --git a/patches.suse/30-x86-cpu-amd-make-the-lfence-instruction-serialized.patch b/patches.suse/30-x86-cpu-amd-make-the-lfence-instruction-serialized.patch
new file mode 100644
index 0000000000..8fb0b9e5af
--- /dev/null
+++ b/patches.suse/30-x86-cpu-amd-make-the-lfence-instruction-serialized.patch
@@ -0,0 +1,57 @@
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Mon, 18 Dec 2017 14:13:37 +0100
+Subject: x86/CPU/AMD: Make the LFENCE instruction serialized
+Patch-mainline: submitted on 2018/1/9
+References: bsc#1068032
+
+In order to reduce the impact of using MFENCE, make the execution of the
+LFENCE instruction serialized. This is done by setting bit 1 of MSR
+0xc0011029 (DE_CFG).
+
+Some families that support LFENCE do not have this MSR. For these
+families, the LFENCE instruction is already serialized.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/include/asm/msr-index.h | 2 ++
+ arch/x86/kernel/cpu/amd.c | 12 ++++++++++--
+ 2 files changed, 12 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
+index ef487e22ebb3..38fedaa5e131 100644
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -346,6 +346,8 @@
+ #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL
+ #define FAM10H_MMIO_CONF_BASE_SHIFT 20
+ #define MSR_FAM10H_NODE_ID 0xc001100c
++#define MSR_F10H_DECFG 0xc0011029
++#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1
+
+ /* K8 MSRs */
+ #define MSR_K8_TOP_MEM1 0xc001001a
+diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
+index bdc11eebd33f..5957b15cb54e 100644
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -767,8 +767,16 @@ static void init_amd(struct cpuinfo_x86 *c)
+ set_cpu_cap(c, X86_FEATURE_K8);
+
+ if (cpu_has_xmm2) {
+- /* MFENCE stops RDTSC speculation */
+- set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
++ /*
++ * Use LFENCE for execution serialization. On families which
++ * don't have that MSR, LFENCE is already serialized.
++ */
++ if (c->x86 > 0xf)
++ msr_set_bit(MSR_F10H_DECFG,
++ MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
++
++ /* LFENCE with MSR_F10H_DECFG[1]=1 stops RDTSC speculation */
++ set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
+ }
+
+ /*
+
diff --git a/patches.suse/31-x86-cpu-amd-remove-now-unused-definition-of-mfence_rdtsc-feature.patch b/patches.suse/31-x86-cpu-amd-remove-now-unused-definition-of-mfence_rdtsc-feature.patch
new file mode 100644
index 0000000000..52fbc85103
--- /dev/null
+++ b/patches.suse/31-x86-cpu-amd-remove-now-unused-definition-of-mfence_rdtsc-feature.patch
@@ -0,0 +1,59 @@
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Mon, 18 Dec 2017 14:47:53 +0100
+Subject: x86/CPU/AMD: Remove now unused definition of MFENCE_RDTSC feature
+Patch-mainline: submitted on 2018/1/9
+References: bsc#1068032
+
+With the switch to using LFENCE_RDTSC on AMD platforms there is no longer
+a need for the MFENCE_RDTSC feature. Remove its usage and definition.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/include/asm/barrier.h | 3 +--
+ arch/x86/include/asm/cpufeature.h | 2 +-
+ arch/x86/include/asm/msr.h | 3 +--
+ 3 files changed, 3 insertions(+), 5 deletions(-)
+
+diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
+index aae78054cae2..d00432579444 100644
+--- a/arch/x86/include/asm/barrier.h
++++ b/arch/x86/include/asm/barrier.h
+@@ -23,8 +23,7 @@
+ #define wmb() asm volatile("sfence" ::: "memory")
+ #endif
+
+-#define gmb() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \
+- "lfence", X86_FEATURE_LFENCE_RDTSC);
++#define gmb() alternative("", "lfence", X86_FEATURE_LFENCE_RDTSC);
+
+ #ifdef CONFIG_X86_PPRO_FENCE
+ #define dma_rmb() rmb()
+diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
+index 89bfebf5453b..2ddcaf350dc5 100644
+--- a/arch/x86/include/asm/cpufeature.h
++++ b/arch/x86/include/asm/cpufeature.h
+@@ -92,7 +92,7 @@
+ #define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */
+ #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */
+ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */
+-#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
++/* free, was #define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) * "" Mfence synchronizes RDTSC */
+ #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
+ /* free, was #define X86_FEATURE_11AP ( 3*32+19) * "" Bad local APIC aka 11AP */
+ #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
+diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
+index cc937a3f3577..9f900ef276cf 100644
+--- a/arch/x86/include/asm/msr.h
++++ b/arch/x86/include/asm/msr.h
+@@ -147,8 +147,7 @@ static __always_inline unsigned long long rdtsc_ordered(void)
+ * that some other imaginary CPU is updating continuously with a
+ * time stamp.
+ */
+- alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
+- "lfence", X86_FEATURE_LFENCE_RDTSC);
++ alternative("", "lfence", X86_FEATURE_LFENCE_RDTSC);
+ return rdtsc();
+ }
+
+
diff --git a/patches.suse/32-move-pti-feature-check-up.patch b/patches.suse/32-move-pti-feature-check-up.patch
new file mode 100644
index 0000000000..50fce58493
--- /dev/null
+++ b/patches.suse/32-move-pti-feature-check-up.patch
@@ -0,0 +1,77 @@
+From: Borislav Petkov <bp@suse.de>
+Date: Mon Dec 25 13:57:16 CET 2017
+Subject: x86/kaiser: Move feature detection up
+Patch-mainline: Not yet, under development
+References: bsc#1068032
+
+... before the first use of kaiser_enabled as otherwise funky
+things happen:
+
+ about to get started...
+ (XEN) d0v0 Unhandled page fault fault/trap [#14, ec=0000]
+ (XEN) Pagetable walk from ffff88022a449090:
+ (XEN) L4[0x110] = 0000000229e0e067 0000000000001e0e
+ (XEN) L3[0x008] = 0000000000000000 ffffffffffffffff
+ (XEN) domain_crash_sync called from entry.S: fault at ffff82d08033fd08
+ entry.o#create_bounce_frame+0x135/0x14d
+ (XEN) Domain 0 (vcpu#0) crashed on cpu#0:
+ (XEN) ----[ Xen-4.9.1_02-3.21 x86_64 debug=n Not tainted ]----
+ (XEN) CPU: 0
+ (XEN) RIP: e033:[<ffffffff81007460>]
+ (XEN) RFLAGS: 0000000000000286 EM: 1 CONTEXT: pv guest (d0v0)
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+
+---
+ arch/x86/include/asm/kaiser.h | 2 ++
+ arch/x86/kernel/setup.c | 7 +++++++
+ arch/x86/mm/kaiser.c | 2 --
+ 3 files changed, 9 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -96,8 +96,10 @@ DECLARE_PER_CPU(unsigned long, x86_cr3_p
+ extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
+
+ extern int kaiser_enabled;
++extern void __init kaiser_check_boottime_disable(void);
+ #else
+ #define kaiser_enabled 0
++static inline void __init kaiser_check_boottime_disable(void) {}
+ #endif /* CONFIG_KAISER */
+
+ /*
+--- a/arch/x86/kernel/setup.c
++++ b/arch/x86/kernel/setup.c
+@@ -116,6 +116,7 @@
+ #include <asm/mmu_context.h>
+
+ #include <asm/suspend.h>
++#include <asm/kaiser.h>
+
+ /*
+ * max_low_pfn_mapped: highest direct mapped pfn under 4GB
+@@ -1037,6 +1038,12 @@ void __init setup_arch(char **cmdline_p)
+ */
+ init_hypervisor_platform();
+
++ /*
++ * This needs to happen right after XENPV is set on xen and
++ * kaiser_enabled is checked below in cleanup_highmap().
++ */
++ kaiser_check_boottime_disable();
++
+ x86_init.resources.probe_roms();
+
+ /* after parse_early_param, so could debug it */
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -305,8 +305,6 @@ void __init kaiser_init(void)
+ {
+ int cpu;
+
+- kaiser_check_boottime_disable();
+-
+ if (!kaiser_enabled)
+ return;
+
diff --git a/patches.suse/4.4-01-x86-mm-add-invpcid-helpers.patch b/patches.suse/4.4-01-x86-mm-add-invpcid-helpers.patch
new file mode 100644
index 0000000000..ac993d68f9
--- /dev/null
+++ b/patches.suse/4.4-01-x86-mm-add-invpcid-helpers.patch
@@ -0,0 +1,95 @@
+From: Andy Lutomirski <luto@kernel.org>
+Date: Fri, 29 Jan 2016 11:42:57 -0800
+Subject: x86/mm: Add INVPCID helpers
+References: bsc#1068032 CVE-2017-5754
+Git-commit: 060a402a1ddb551455ee410de2eadd3349f2801b
+Patch-mainline: v4.6-rc1
+References: bsc#1068032
+
+This adds helpers for each of the four currently-specified INVPCID
+modes.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Luis R. Rodriguez <mcgrof@suse.com>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Toshi Kani <toshi.kani@hp.com>
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/8a62b23ad686888cee01da134c91409e22064db9.1454096309.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/include/asm/tlbflush.h | 48 +++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 48 insertions(+)
+
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
+index 6df2029405a3..8b576832777e 100644
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -7,6 +7,54 @@
+ #include <asm/processor.h>
+ #include <asm/special_insns.h>
+
++static inline void __invpcid(unsigned long pcid, unsigned long addr,
++ unsigned long type)
++{
++ u64 desc[2] = { pcid, addr };
++
++ /*
++ * The memory clobber is because the whole point is to invalidate
++ * stale TLB entries and, especially if we're flushing global
++ * mappings, we don't want the compiler to reorder any subsequent
++ * memory accesses before the TLB flush.
++ *
++ * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
++ * invpcid (%rcx), %rax in long mode.
++ */
++ asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
++ : : "m" (desc), "a" (type), "c" (desc) : "memory");
++}
++
++#define INVPCID_TYPE_INDIV_ADDR 0
++#define INVPCID_TYPE_SINGLE_CTXT 1
++#define INVPCID_TYPE_ALL_INCL_GLOBAL 2
++#define INVPCID_TYPE_ALL_NON_GLOBAL 3
++
++/* Flush all mappings for a given pcid and addr, not including globals. */
++static inline void invpcid_flush_one(unsigned long pcid,
++ unsigned long addr)
++{
++ __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
++}
++
++/* Flush all mappings for a given PCID, not including globals. */
++static inline void invpcid_flush_single_context(unsigned long pcid)
++{
++ __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
++}
++
++/* Flush all mappings, including globals, for all PCIDs. */
++static inline void invpcid_flush_all(void)
++{
++ __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
++}
++
++/* Flush all mappings for all PCIDs except globals. */
++static inline void invpcid_flush_all_nonglobals(void)
++{
++ __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
++}
++
+ #ifdef CONFIG_PARAVIRT
+ #include <asm/paravirt.h>
+ #else
+
diff --git a/patches.suse/4.4-02-x86-mm-fix-invpcid-asm-constraint.patch b/patches.suse/4.4-02-x86-mm-fix-invpcid-asm-constraint.patch
new file mode 100644
index 0000000000..3fa24b07f6
--- /dev/null
+++ b/patches.suse/4.4-02-x86-mm-fix-invpcid-asm-constraint.patch
@@ -0,0 +1,69 @@
+From: Borislav Petkov <bp@suse.de>
+Date: Wed, 10 Feb 2016 15:51:16 +0100
+Subject: x86/mm: Fix INVPCID asm constraint
+References: bsc#1068032 CVE-2017-5754
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+Git-commit: e2c7698cd61f11d4077fdb28148b2d31b82ac848
+Patch-mainline: v4.6-rc1
+References: bsc#1068032
+
+So we want to specify the dependency on both @pcid and @addr so that the
+compiler doesn't reorder accesses to them *before* the TLB flush. But
+for that to work, we need to express this properly in the inline asm and
+deref the whole desc array, not the pointer to it. See clwb() for an
+example.
+
+This fixes the build error on 32-bit:
+
+ arch/x86/include/asm/tlbflush.h: In function ‘__invpcid’:
+ arch/x86/include/asm/tlbflush.h:26:18: error: memory input 0 is not directly addressable
+
+which gcc4.7 caught but 5.x didn't. Which is strange. :-\
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Luis R. Rodriguez <mcgrof@suse.com>
+Cc: Michael Matz <matz@suse.de>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Toshi Kani <toshi.kani@hp.com>
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+---
+ arch/x86/include/asm/tlbflush.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
+index fc9a2fda1404..d0cce90b0855 100644
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -10,7 +10,7 @@
+ static inline void __invpcid(unsigned long pcid, unsigned long addr,
+ unsigned long type)
+ {
+- u64 desc[2] = { pcid, addr };
++ struct { u64 d[2]; } desc = { { pcid, addr } };
+
+ /*
+ * The memory clobber is because the whole point is to invalidate
+@@ -22,7 +22,7 @@ static inline void __invpcid(unsigned long pcid, unsigned long addr,
+ * invpcid (%rcx), %rax in long mode.
+ */
+ asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
+- : : "m" (desc), "a" (type), "c" (desc) : "memory");
++ : : "m" (desc), "a" (type), "c" (&desc) : "memory");
+ }
+
+ #define INVPCID_TYPE_INDIV_ADDR 0
+
diff --git a/patches.suse/4.4-03-x86-mm-add-a-noinvpcid-boot-option-to-turn-off-invpcid.patch b/patches.suse/4.4-03-x86-mm-add-a-noinvpcid-boot-option-to-turn-off-invpcid.patch
new file mode 100644
index 0000000000..f149683471
--- /dev/null
+++ b/patches.suse/4.4-03-x86-mm-add-a-noinvpcid-boot-option-to-turn-off-invpcid.patch
@@ -0,0 +1,73 @@
+From: Andy Lutomirski <luto@kernel.org>
+Date: Fri, 29 Jan 2016 11:42:58 -0800
+Subject: x86/mm: Add a 'noinvpcid' boot option to turn off INVPCID
+References: bsc#1068032 CVE-2017-5754
+Git-commit: d12a72b844a49d4162f24cefdab30bed3f86730e
+Patch-mainline: v4.6-rc1
+References: bsc#1068032
+
+This adds a chicken bit to turn off INVPCID in case something goes
+wrong. It's an early_param() because we do TLB flushes before we
+parse __setup() parameters.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Luis R. Rodriguez <mcgrof@suse.com>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Toshi Kani <toshi.kani@hp.com>
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/f586317ed1bc2b87aee652267e515b90051af385.1454096309.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Borislav Petkov <bp@suse.de>
+---
+ Documentation/kernel-parameters.txt | 2 ++
+ arch/x86/kernel/cpu/common.c | 16 ++++++++++++++++
+ 2 files changed, 18 insertions(+)
+
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -165,6 +165,22 @@ static int __init x86_mpx_setup(char *s)
+ }
+ __setup("nompx", x86_mpx_setup);
+
++static int __init x86_noinvpcid_setup(char *s)
++{
++ /* noinvpcid doesn't accept parameters */
++ if (s)
++ return -EINVAL;
++
++ /* do not emit a message if the feature is not present */
++ if (!boot_cpu_has(X86_FEATURE_INVPCID))
++ return 0;
++
++ setup_clear_cpu_cap(X86_FEATURE_INVPCID);
++ pr_info("noinvpcid: INVPCID feature disabled\n");
++ return 0;
++}
++early_param("noinvpcid", x86_noinvpcid_setup);
++
+ #ifdef CONFIG_X86_32
+ static int cachesize_override = -1;
+ static int disable_x86_serial_nr = 1;
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -2604,6 +2604,8 @@ bytes respectively. Such letter suffixes
+
+ nointroute [IA-64]
+
++ noinvpcid [X86] Disable the INVPCID cpu feature.
++
+ nojitter [IA-64] Disables jitter checking for ITC timers.
+
+ no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver
diff --git a/patches.suse/4.4-04-x86-mm-if-invpcid-is-available-use-it-to-flush-global-mappings.patch b/patches.suse/4.4-04-x86-mm-if-invpcid-is-available-use-it-to-flush-global-mappings.patch
new file mode 100644
index 0000000000..8dae609c96
--- /dev/null
+++ b/patches.suse/4.4-04-x86-mm-if-invpcid-is-available-use-it-to-flush-global-mappings.patch
@@ -0,0 +1,55 @@
+From: Andy Lutomirski <luto@kernel.org>
+Date: Fri, 29 Jan 2016 11:42:59 -0800
+Subject: x86/mm: If INVPCID is available, use it to flush global mappings
+References: bsc#1068032 CVE-2017-5754
+Git-commit: d8bced79af1db6734f66b42064cc773cada2ce99
+Patch-mainline: v4.6-rc1
+References: bsc#1068032
+
+On my Skylake laptop, INVPCID function 2 (flush absolutely
+everything) takes about 376ns, whereas saving flags, twiddling
+CR4.PGE to flush global mappings, and restoring flags takes about
+539ns.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Luis R. Rodriguez <mcgrof@suse.com>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Toshi Kani <toshi.kani@hp.com>
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/ed0ef62581c0ea9c99b9bf6df726015e96d44743.1454096309.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/include/asm/tlbflush.h | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -159,6 +159,15 @@ static inline void __native_flush_tlb_gl
+ {
+ unsigned long flags;
+
++ if (static_cpu_has(X86_FEATURE_INVPCID)) {
++ /*
++ * Using INVPCID is considerably faster than a pair of writes
++ * to CR4 sandwiched inside an IRQ flag save/restore.
++ */
++ invpcid_flush_all();
++ return;
++ }
++
+ /*
+ * Read-modify-write to CR4 - protect it from preemption and
+ * from interrupts. (Use the raw variant because this code can
diff --git a/patches.suse/4.4-06-mm-mmu_context-sched-core-fix-mmu_context-h-assumption.patch b/patches.suse/4.4-06-mm-mmu_context-sched-core-fix-mmu_context-h-assumption.patch
new file mode 100644
index 0000000000..d292bcf588
--- /dev/null
+++ b/patches.suse/4.4-06-mm-mmu_context-sched-core-fix-mmu_context-h-assumption.patch
@@ -0,0 +1,41 @@
+From: Ingo Molnar <mingo@kernel.org>
+Date: Thu, 28 Apr 2016 11:39:12 +0200
+Subject: mm/mmu_context, sched/core: Fix mmu_context.h assumption
+References: bsc#1068032 CVE-2017-5754
+Git-commit: 8efd755ac2fe262d4c8d5c9bbe054bb67dae93da
+Patch-mainline: v4.7-rc1
+References: bsc#1068032
+
+Some architectures (such as Alpha) rely on include/linux/sched.h definitions
+in their mmu_context.h files.
+
+So include sched.h before mmu_context.h.
+
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: linux-kernel@vger.kernel.org
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Borislav Petkov <bp@suse.de>
+---
+ mm/mmu_context.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/mm/mmu_context.c b/mm/mmu_context.c
+index f802c2d216a7..6f4d27c5bb32 100644
+--- a/mm/mmu_context.c
++++ b/mm/mmu_context.c
+@@ -4,9 +4,9 @@
+ */
+
+ #include <linux/mm.h>
++#include <linux/sched.h>
+ #include <linux/mmu_context.h>
+ #include <linux/export.h>
+-#include <linux/sched.h>
+
+ #include <asm/mmu_context.h>
+
+
diff --git a/patches.suse/4.4-07-sched-core-add-switch_mm_irqs_off-and-use-it-in-the-scheduler.patch b/patches.suse/4.4-07-sched-core-add-switch_mm_irqs_off-and-use-it-in-the-scheduler.patch
new file mode 100644
index 0000000000..80ab77e633
--- /dev/null
+++ b/patches.suse/4.4-07-sched-core-add-switch_mm_irqs_off-and-use-it-in-the-scheduler.patch
@@ -0,0 +1,74 @@
+From: Andy Lutomirski <luto@kernel.org>
+Date: Tue, 26 Apr 2016 09:39:06 -0700
+Subject: sched/core: Add switch_mm_irqs_off() and use it in the scheduler
+References: bsc#1068032 CVE-2017-5754
+Git-commit: f98db6013c557c216da5038d9c52045be55cd039
+Patch-mainline: v4.7-rc1
+References: bsc#1068032
+
+By default, this is the same thing as switch_mm().
+
+x86 will override it as an optimization.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/df401df47bdd6be3e389c6f1e3f5310d70e81b2c.1461688545.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Borislav Petkov <bp@suse.de>
+---
+ include/linux/mmu_context.h | 7 +++++++
+ kernel/sched/core.c | 6 +++---
+ 2 files changed, 10 insertions(+), 3 deletions(-)
+
+--- a/include/linux/mmu_context.h
++++ b/include/linux/mmu_context.h
+@@ -1,9 +1,16 @@
+ #ifndef _LINUX_MMU_CONTEXT_H
+ #define _LINUX_MMU_CONTEXT_H
+
++#include <asm/mmu_context.h>
++
+ struct mm_struct;
+
+ void use_mm(struct mm_struct *mm);
+ void unuse_mm(struct mm_struct *mm);
+
++/* Architectures that care about IRQ state in switch_mm can override this. */
++#ifndef switch_mm_irqs_off
++# define switch_mm_irqs_off switch_mm
++#endif
++
+ #endif
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -32,7 +32,7 @@
+ #include <linux/init.h>
+ #include <linux/uaccess.h>
+ #include <linux/highmem.h>
+-#include <asm/mmu_context.h>
++#include <linux/mmu_context.h>
+ #include <linux/interrupt.h>
+ #include <linux/capability.h>
+ #include <linux/completion.h>
+@@ -2994,7 +2994,7 @@ context_switch(struct rq *rq, struct tas
+ atomic_inc(&oldmm->mm_count);
+ enter_lazy_tlb(oldmm, next);
+ } else
+- switch_mm(oldmm, mm, next);
++ switch_mm_irqs_off(oldmm, mm, next);
+
+ if (!prev->mm) {
+ prev->active_mm = NULL;
+@@ -5541,7 +5541,7 @@ void idle_task_exit(void)
+ BUG_ON(cpu_online(smp_processor_id()));
+
+ if (mm != &init_mm) {
+- switch_mm(mm, &init_mm, current);
++ switch_mm_irqs_off(mm, &init_mm, current);
+ finish_arch_post_lock_switch();
+ }
+ mmdrop(mm);
diff --git a/patches.suse/4.4-08-x86-mm-build-arch-x86-mm-tlb-c-even-on-smp.patch b/patches.suse/4.4-08-x86-mm-build-arch-x86-mm-tlb-c-even-on-smp.patch
new file mode 100644
index 0000000000..c4451f16a2
--- /dev/null
+++ b/patches.suse/4.4-08-x86-mm-build-arch-x86-mm-tlb-c-even-on-smp.patch
@@ -0,0 +1,64 @@
+From: Andy Lutomirski <luto@kernel.org>
+Date: Tue, 26 Apr 2016 09:39:07 -0700
+Subject: x86/mm: Build arch/x86/mm/tlb.c even on !SMP
+References: bsc#1068032 CVE-2017-5754
+Git-commit: e1074888c326038340a1ada9129d679e661f2ea6
+Patch-mainline: v4.7-rc1
+References: bsc#1068032
+
+Currently all of the functions that live in tlb.c are inlined on
+!SMP builds. One can debate whether this is a good idea (in many
+respects the code in tlb.c is better than the inlined UP code).
+
+Regardless, I want to add code that needs to be built on UP and SMP
+kernels and relates to tlb flushing, so arrange for tlb.c to be
+compiled unconditionally.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/f0d778f0d828fc46e5d1946bca80f0aaf9abf032.1461688545.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/mm/Makefile | 3 +--
+ arch/x86/mm/tlb.c | 4 ++++
+ 2 files changed, 5 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/mm/Makefile
++++ b/arch/x86/mm/Makefile
+@@ -1,5 +1,5 @@
+ obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
+- pat.o pgtable.o physaddr.o gup.o setup_nx.o
++ pat.o pgtable.o physaddr.o gup.o setup_nx.o tlb.o
+
+ # Make sure __phys_addr has no stackprotector
+ nostackp := $(call cc-option, -fno-stack-protector)
+@@ -9,7 +9,6 @@ CFLAGS_setup_nx.o := $(nostackp)
+ CFLAGS_fault.o := -I$(src)/../include/asm/trace
+
+ obj-$(CONFIG_X86_PAT) += pat_rbtree.o
+-obj-$(CONFIG_SMP) += tlb.o
+
+ obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
+
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -28,6 +28,8 @@
+ * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
+ */
+
++#ifdef CONFIG_SMP
++
+ struct flush_tlb_info {
+ struct mm_struct *flush_mm;
+ unsigned long flush_start;
+@@ -351,3 +353,5 @@ static int __init create_tlb_single_page
+ return 0;
+ }
+ late_initcall(create_tlb_single_page_flush_ceiling);
++
++#endif /* CONFIG_SMP */
diff --git a/patches.suse/4.4-09-x86-mm-sched-core-uninline-switch_mm.patch b/patches.suse/4.4-09-x86-mm-sched-core-uninline-switch_mm.patch
new file mode 100644
index 0000000000..8806637ebc
--- /dev/null
+++ b/patches.suse/4.4-09-x86-mm-sched-core-uninline-switch_mm.patch
@@ -0,0 +1,244 @@
+From: Andy Lutomirski <luto@kernel.org>
+Date: Tue, 26 Apr 2016 09:39:08 -0700
+Subject: x86/mm, sched/core: Uninline switch_mm()
+References: bsc#1068032 CVE-2017-5754
+Git-commit: 69c0319aabba45bcf33178916a2f06967b4adede
+Patch-mainline: v4.7-rc1
+References: bsc#1068032
+
+It's fairly large and it has quite a few callers. This may also
+help untangle some headers down the road.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/54f3367803e7f80b2be62c8a21879aa74b1a5f57.1461688545.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/include/asm/mmu_context.h | 98 -----------------------------------
+ arch/x86/mm/tlb.c | 102 +++++++++++++++++++++++++++++++++++++
+ 2 files changed, 104 insertions(+), 96 deletions(-)
+
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -123,103 +123,9 @@ static inline void destroy_context(struc
+ destroy_context_ldt(mm);
+ }
+
+-static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+- struct task_struct *tsk)
+-{
+- unsigned cpu = smp_processor_id();
++extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
++ struct task_struct *tsk);
+
+- if (likely(prev != next)) {
+-#ifdef CONFIG_SMP
+- this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+- this_cpu_write(cpu_tlbstate.active_mm, next);
+-#endif
+- cpumask_set_cpu(cpu, mm_cpumask(next));
+-
+- /*
+- * Re-load page tables.
+- *
+- * This logic has an ordering constraint:
+- *
+- * CPU 0: Write to a PTE for 'next'
+- * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI.
+- * CPU 1: set bit 1 in next's mm_cpumask
+- * CPU 1: load from the PTE that CPU 0 writes (implicit)
+- *
+- * We need to prevent an outcome in which CPU 1 observes
+- * the new PTE value and CPU 0 observes bit 1 clear in
+- * mm_cpumask. (If that occurs, then the IPI will never
+- * be sent, and CPU 0's TLB will contain a stale entry.)
+- *
+- * The bad outcome can occur if either CPU's load is
+- * reordered before that CPU's store, so both CPUs must
+- * execute full barriers to prevent this from happening.
+- *
+- * Thus, switch_mm needs a full barrier between the
+- * store to mm_cpumask and any operation that could load
+- * from next->pgd. TLB fills are special and can happen
+- * due to instruction fetches or for no reason at all,
+- * and neither LOCK nor MFENCE orders them.
+- * Fortunately, load_cr3() is serializing and gives the
+- * ordering guarantee we need.
+- *
+- */
+- load_cr3(next->pgd);
+-
+- trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+-
+- /* Stop flush ipis for the previous mm */
+- cpumask_clear_cpu(cpu, mm_cpumask(prev));
+-
+- /* Load per-mm CR4 state */
+- load_mm_cr4(next);
+-
+-#ifdef CONFIG_MODIFY_LDT_SYSCALL
+- /*
+- * Load the LDT, if the LDT is different.
+- *
+- * It's possible that prev->context.ldt doesn't match
+- * the LDT register. This can happen if leave_mm(prev)
+- * was called and then modify_ldt changed
+- * prev->context.ldt but suppressed an IPI to this CPU.
+- * In this case, prev->context.ldt != NULL, because we
+- * never set context.ldt to NULL while the mm still
+- * exists. That means that next->context.ldt !=
+- * prev->context.ldt, because mms never share an LDT.
+- */
+- if (unlikely(prev->context.ldt != next->context.ldt))
+- load_mm_ldt(next);
+-#endif
+- }
+-#ifdef CONFIG_SMP
+- else {
+- this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+- BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
+-
+- if (!cpumask_test_cpu(cpu, mm_cpumask(next))) {
+- /*
+- * On established mms, the mm_cpumask is only changed
+- * from irq context, from ptep_clear_flush() while in
+- * lazy tlb mode, and here. Irqs are blocked during
+- * schedule, protecting us from simultaneous changes.
+- */
+- cpumask_set_cpu(cpu, mm_cpumask(next));
+-
+- /*
+- * We were in lazy tlb mode and leave_mm disabled
+- * tlb flush IPI delivery. We must reload CR3
+- * to make sure to use no freed page tables.
+- *
+- * As above, load_cr3() is serializing and orders TLB
+- * fills with respect to the mm_cpumask write.
+- */
+- load_cr3(next->pgd);
+- trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+- load_mm_cr4(next);
+- load_mm_ldt(next);
+- }
+- }
+-#endif
+-}
+
+ #define activate_mm(prev, next) \
+ do { \
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -59,6 +59,108 @@ void leave_mm(int cpu)
+ }
+ EXPORT_SYMBOL_GPL(leave_mm);
+
++#endif /* CONFIG_SMP */
++
++void switch_mm(struct mm_struct *prev, struct mm_struct *next,
++ struct task_struct *tsk)
++{
++ unsigned cpu = smp_processor_id();
++
++ if (likely(prev != next)) {
++#ifdef CONFIG_SMP
++ this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
++ this_cpu_write(cpu_tlbstate.active_mm, next);
++#endif
++ cpumask_set_cpu(cpu, mm_cpumask(next));
++
++ /*
++ * Re-load page tables.
++ *
++ * This logic has an ordering constraint:
++ *
++ * CPU 0: Write to a PTE for 'next'
++ * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI.
++ * CPU 1: set bit 1 in next's mm_cpumask
++ * CPU 1: load from the PTE that CPU 0 writes (implicit)
++ *
++ * We need to prevent an outcome in which CPU 1 observes
++ * the new PTE value and CPU 0 observes bit 1 clear in
++ * mm_cpumask. (If that occurs, then the IPI will never
++ * be sent, and CPU 0's TLB will contain a stale entry.)
++ *
++ * The bad outcome can occur if either CPU's load is
++ * reordered before that CPU's store, so both CPUs must
++ * execute full barriers to prevent this from happening.
++ *
++ * Thus, switch_mm needs a full barrier between the
++ * store to mm_cpumask and any operation that could load
++ * from next->pgd. TLB fills are special and can happen
++ * due to instruction fetches or for no reason at all,
++ * and neither LOCK nor MFENCE orders them.
++ * Fortunately, load_cr3() is serializing and gives the
++ * ordering guarantee we need.
++ *
++ */
++ load_cr3(next->pgd);
++
++ trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
++
++ /* Stop flush ipis for the previous mm */
++ cpumask_clear_cpu(cpu, mm_cpumask(prev));
++
++ /* Load per-mm CR4 state */
++ load_mm_cr4(next);
++
++#ifdef CONFIG_MODIFY_LDT_SYSCALL
++ /*
++ * Load the LDT, if the LDT is different.
++ *
++ * It's possible that prev->context.ldt doesn't match
++ * the LDT register. This can happen if leave_mm(prev)
++ * was called and then modify_ldt changed
++ * prev->context.ldt but suppressed an IPI to this CPU.
++ * In this case, prev->context.ldt != NULL, because we
++ * never set context.ldt to NULL while the mm still
++ * exists. That means that next->context.ldt !=
++ * prev->context.ldt, because mms never share an LDT.
++ */
++ if (unlikely(prev->context.ldt != next->context.ldt))
++ load_mm_ldt(next);
++#endif
++ }
++#ifdef CONFIG_SMP
++ else {
++ this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
++ BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
++
++ if (!cpumask_test_cpu(cpu, mm_cpumask(next))) {
++ /*
++ * On established mms, the mm_cpumask is only changed
++ * from irq context, from ptep_clear_flush() while in
++ * lazy tlb mode, and here. Irqs are blocked during
++ * schedule, protecting us from simultaneous changes.
++ */
++ cpumask_set_cpu(cpu, mm_cpumask(next));
++
++ /*
++ * We were in lazy tlb mode and leave_mm disabled
++ * tlb flush IPI delivery. We must reload CR3
++ * to make sure to use no freed page tables.
++ *
++ * As above, load_cr3() is serializing and orders TLB
++ * fills with respect to the mm_cpumask write.
++ */
++ load_cr3(next->pgd);
++ trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
++ load_mm_cr4(next);
++ load_mm_ldt(next);
++ }
++ }
++#endif
++}
++
++#ifdef CONFIG_SMP
++
+ /*
+ * The flush IPI assumes that a thread switch happens in this order:
+ * [cpu0: the cpu that switches]
diff --git a/patches.suse/4.4-10-x86-mm-sched-core-turn-off-irqs-in-switch_mm.patch b/patches.suse/4.4-10-x86-mm-sched-core-turn-off-irqs-in-switch_mm.patch
new file mode 100644
index 0000000000..934c29bb00
--- /dev/null
+++ b/patches.suse/4.4-10-x86-mm-sched-core-turn-off-irqs-in-switch_mm.patch
@@ -0,0 +1,64 @@
+From: Andy Lutomirski <luto@kernel.org>
+Date: Tue, 26 Apr 2016 09:39:09 -0700
+Subject: x86/mm, sched/core: Turn off IRQs in switch_mm()
+References: bsc#1068032 CVE-2017-5754
+Git-commit: 078194f8e9fe3cf54c8fd8bded48a1db5bd8eb8a
+Patch-mainline: v4.7-rc1
+References: bsc#1068032
+
+Potential races between switch_mm() and TLB-flush or LDT-flush IPIs
+could be very messy. AFAICT the code is currently okay, whether by
+accident or by careful design, but enabling PCID will make it
+considerably more complicated and will no longer be obviously safe.
+
+Fix it with a big hammer: run switch_mm() with IRQs off.
+
+To avoid a performance hit in the scheduler, we take advantage of
+our knowledge that the scheduler already has IRQs disabled when it
+calls switch_mm().
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/f19baf759693c9dcae64bbff76189db77cb13398.1461688545.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/include/asm/mmu_context.h | 3 +++
+ arch/x86/mm/tlb.c | 10 ++++++++++
+ 2 files changed, 13 insertions(+)
+
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -126,6 +126,9 @@ static inline void destroy_context(struc
+ extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk);
+
++extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
++ struct task_struct *tsk);
++#define switch_mm_irqs_off switch_mm_irqs_off
+
+ #define activate_mm(prev, next) \
+ do { \
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -64,6 +64,16 @@ EXPORT_SYMBOL_GPL(leave_mm);
+ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk)
+ {
++ unsigned long flags;
++
++ local_irq_save(flags);
++ switch_mm_irqs_off(prev, next, tsk);
++ local_irq_restore(flags);
++}
++
++void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
++ struct task_struct *tsk)
++{
+ unsigned cpu = smp_processor_id();
+
+ if (likely(prev != next)) {
diff --git a/patches.suse/4.4-11-arm-hide-finish_arch_post_lock_switch-from-modules.patch b/patches.suse/4.4-11-arm-hide-finish_arch_post_lock_switch-from-modules.patch
new file mode 100644
index 0000000000..e887606fc0
--- /dev/null
+++ b/patches.suse/4.4-11-arm-hide-finish_arch_post_lock_switch-from-modules.patch
@@ -0,0 +1,93 @@
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Fri, 13 May 2016 15:30:13 +0200
+Subject: ARM: Hide finish_arch_post_lock_switch() from modules
+References: bsc#1068032 CVE-2017-5754
+Git-commit: ef0491ea17f8019821c7e9c8e801184ecf17f85a
+Patch-mainline: v4.7-rc1
+References: bsc#1068032
+
+The introduction of switch_mm_irqs_off() brought back an old bug
+regarding the use of preempt_enable_no_resched:
+
+As part of:
+
+ 62b94a08da1b ("sched/preempt: Take away preempt_enable_no_resched() from modules")
+
+the definition of preempt_enable_no_resched() is only available in
+built-in code, not in loadable modules, so we can't generally use
+it from header files.
+
+However, the ARM version of finish_arch_post_lock_switch()
+calls preempt_enable_no_resched() and is defined as a static
+inline function in asm/mmu_context.h. This in turn means we cannot
+include asm/mmu_context.h from modules.
+
+With today's tip tree, asm/mmu_context.h gets included from
+linux/mmu_context.h, which is normally the exact pattern one would
+expect, but unfortunately, linux/mmu_context.h can be included from
+the vhost driver that is a loadable module, now causing this compile
+time error with modular configs:
+
+ In file included from ../include/linux/mmu_context.h:4:0,
+ from ../drivers/vhost/vhost.c:18:
+ ../arch/arm/include/asm/mmu_context.h: In function 'finish_arch_post_lock_switch':
+ ../arch/arm/include/asm/mmu_context.h:88:3: error: implicit declaration of function 'preempt_enable_no_resched' [-Werror=implicit-function-declaration]
+ preempt_enable_no_resched();
+
+Andy already tried to fix the bug by including linux/preempt.h
+from asm/mmu_context.h, but that didn't help. Arnd suggested reordering
+the header files, which wasn't popular, so let's use this
+workaround instead:
+
+The finish_arch_post_lock_switch() definition is now also hidden
+inside of #ifdef MODULE, so we don't see anything referencing
+preempt_enable_no_resched() from a header file. I've built a
+few hundred randconfig kernels with this, and did not see any
+new problems.
+
+Tested-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
+Cc: Borislav Petkov <bp@suse.de>
+Cc: Frederic Weisbecker <fweisbec@gmail.com>
+Cc: Jiri Olsa <jolsa@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Russell King - ARM Linux <linux@armlinux.org.uk>
+Cc: Stephane Eranian <eranian@google.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Vince Weaver <vincent.weaver@maine.edu>
+Cc: linux-arm-kernel@lists.infradead.org
+Fixes: f98db6013c55 ("sched/core: Add switch_mm_irqs_off() and use it in the scheduler")
+Link: http://lkml.kernel.org/r/1463146234-161304-1-git-send-email-arnd@arndb.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Borislav Petkov <bp@suse.de>
+---
+ arch/arm/include/asm/mmu_context.h | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/arch/arm/include/asm/mmu_context.h
++++ b/arch/arm/include/asm/mmu_context.h
+@@ -61,6 +61,7 @@ static inline void check_and_switch_cont
+ cpu_switch_mm(mm->pgd, mm);
+ }
+
++#ifndef MODULE
+ #define finish_arch_post_lock_switch \
+ finish_arch_post_lock_switch
+ static inline void finish_arch_post_lock_switch(void)
+@@ -82,6 +83,7 @@ static inline void finish_arch_post_lock
+ preempt_enable_no_resched();
+ }
+ }
++#endif /* !MODULE */
+
+ #endif /* CONFIG_MMU */
+
diff --git a/patches.suse/4.4-12-sched-core-idle_task_exit-shouldn-t-use-switch_mm_irqs_off.patch b/patches.suse/4.4-12-sched-core-idle_task_exit-shouldn-t-use-switch_mm_irqs_off.patch
new file mode 100644
index 0000000000..b1f88a2861
--- /dev/null
+++ b/patches.suse/4.4-12-sched-core-idle_task_exit-shouldn-t-use-switch_mm_irqs_off.patch
@@ -0,0 +1,42 @@
+From: Andy Lutomirski <luto@kernel.org>
+Date: Fri, 9 Jun 2017 11:49:15 -0700
+Subject: sched/core: Idle_task_exit() shouldn't use switch_mm_irqs_off()
+References: bsc#1068032 CVE-2017-5754
+Git-commit: 252d2a4117bc181b287eeddf848863788da733ae
+Patch-mainline: v4.12-rc6
+References: bsc#1068032
+
+idle_task_exit() can be called with IRQs on x86 on and therefore
+should use switch_mm(), not switch_mm_irqs_off().
+
+This doesn't seem to cause any problems right now, but it will
+confuse my upcoming TLB flush changes. Nonetheless, I think it
+should be backported because it's trivial. There won't be any
+meaningful performance impact because idle_task_exit() is only
+used when offlining a CPU.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@suse.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Fixes: f98db6013c55 ("sched/core: Add switch_mm_irqs_off() and use it in the scheduler")
+Link: http://lkml.kernel.org/r/ca3d1a9fa93a0b49f5a8ff729eda3640fb6abdf9.1497034141.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Borislav Petkov <bp@suse.de>
+---
+ kernel/sched/core.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -5541,7 +5541,7 @@ void idle_task_exit(void)
+ BUG_ON(cpu_online(smp_processor_id()));
+
+ if (mm != &init_mm) {
+- switch_mm_irqs_off(mm, &init_mm, current);
++ switch_mm(mm, &init_mm, current);
+ finish_arch_post_lock_switch();
+ }
+ mmdrop(mm);
diff --git a/patches.suse/4.4-15-x86-mm-remove-flush_tlb-and-flush_tlb_current_task.patch b/patches.suse/4.4-15-x86-mm-remove-flush_tlb-and-flush_tlb_current_task.patch
new file mode 100644
index 0000000000..9bb740715b
--- /dev/null
+++ b/patches.suse/4.4-15-x86-mm-remove-flush_tlb-and-flush_tlb_current_task.patch
@@ -0,0 +1,98 @@
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sat, 22 Apr 2017 00:01:20 -0700
+Subject: x86/mm: Remove flush_tlb() and flush_tlb_current_task()
+References: bsc#1068032 CVE-2017-5754
+Git-commit: 29961b59a51f8c6838a26a45e871a7ed6771809b
+Patch-mainline: v4.12-rc1
+References: bsc#1068032
+
+I was trying to figure out what how flush_tlb_current_task() would
+possibly work correctly if current->mm != current->active_mm, but I
+realized I could spare myself the effort: it has no callers except
+the unused flush_tlb() macro.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Nadav Amit <namit@vmware.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/e52d64c11690f85e9f1d69d7b48cc2269cd2e94b.1492844372.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/include/asm/tlbflush.h | 9 ---------
+ arch/x86/mm/tlb.c | 17 -----------------
+ 2 files changed, 26 deletions(-)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -204,7 +204,6 @@ static inline void __flush_tlb_one(unsig
+ /*
+ * TLB flushing:
+ *
+- * - flush_tlb() flushes the current mm struct TLBs
+ * - flush_tlb_all() flushes all processes TLBs
+ * - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ * - flush_tlb_page(vma, vmaddr) flushes one page
+@@ -236,11 +235,6 @@ static inline void flush_tlb_all(void)
+ __flush_tlb_all();
+ }
+
+-static inline void flush_tlb(void)
+-{
+- __flush_tlb_up();
+-}
+-
+ static inline void local_flush_tlb(void)
+ {
+ __flush_tlb_up();
+@@ -302,14 +296,11 @@ static inline void flush_tlb_kernel_rang
+ flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags)
+
+ extern void flush_tlb_all(void);
+-extern void flush_tlb_current_task(void);
+ extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
+ extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
+ unsigned long end, unsigned long vmflag);
+ extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+
+-#define flush_tlb() flush_tlb_current_task()
+-
+ void native_flush_tlb_others(struct cpumask *cpumask,
+ struct mm_struct *mm,
+ unsigned long start, unsigned long end);
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -272,23 +272,6 @@ void native_flush_tlb_others(struct cpum
+ smp_call_function_many(cpumask, flush_tlb_func, &info, 1);
+ }
+
+-void flush_tlb_current_task(void)
+-{
+- struct mm_struct *mm = current->mm;
+-
+- preempt_disable();
+-
+- count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+-
+- /* This is an implicit full barrier that synchronizes with switch_mm. */
+- local_flush_tlb();
+-
+- trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL);
+- if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
+- flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
+- preempt_enable();
+-}
+-
+ /*
+ * See Documentation/x86/tlb.txt for details. We choose 33
+ * because it is large enough to cover the vast majority (at
diff --git a/patches.suse/4.4-16-x86-mm-make-flush_tlb_mm_range-more-predictable.patch b/patches.suse/4.4-16-x86-mm-make-flush_tlb_mm_range-more-predictable.patch
new file mode 100644
index 0000000000..1875693ca5
--- /dev/null
+++ b/patches.suse/4.4-16-x86-mm-make-flush_tlb_mm_range-more-predictable.patch
@@ -0,0 +1,78 @@
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sat, 22 Apr 2017 00:01:21 -0700
+Subject: x86/mm: Make flush_tlb_mm_range() more predictable
+References: bsc#1068032 CVE-2017-5754
+Git-commit: ce27374fabf553153c3f53efcaa9bfab9216bd8c
+Patch-mainline: v4.12-rc1
+References: bsc#1068032
+
+I'm about to rewrite the function almost completely, but first I
+want to get a functional change out of the way. Currently, if
+flush_tlb_mm_range() does not flush the local TLB at all, it will
+never do individual page flushes on remote CPUs. This seems to be
+an accident, and preserving it will be awkward. Let's change it
+first so that any regressions in the rewrite will be easier to
+bisect and so that the rewrite can attempt to change no visible
+behavior at all.
+
+The fix is simple: we can simply avoid short-circuiting the
+calculation of base_pages_to_flush.
+
+As a side effect, this also eliminates a potential corner case: if
+tlb_single_page_flush_ceiling == TLB_FLUSH_ALL, flush_tlb_mm_range()
+could have ended up flushing the entire address space one page at a
+time.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Nadav Amit <namit@vmware.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/4b29b771d9975aad7154c314534fec235618175a.1492844372.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/mm/tlb.c | 12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -292,6 +292,12 @@ void flush_tlb_mm_range(struct mm_struct
+ unsigned long base_pages_to_flush = TLB_FLUSH_ALL;
+
+ preempt_disable();
++
++ if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB))
++ base_pages_to_flush = (end - start) >> PAGE_SHIFT;
++ if (base_pages_to_flush > tlb_single_page_flush_ceiling)
++ base_pages_to_flush = TLB_FLUSH_ALL;
++
+ if (current->active_mm != mm) {
+ /* Synchronize with switch_mm. */
+ smp_mb();
+@@ -308,15 +314,11 @@ void flush_tlb_mm_range(struct mm_struct
+ goto out;
+ }
+
+- if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB))
+- base_pages_to_flush = (end - start) >> PAGE_SHIFT;
+-
+ /*
+ * Both branches below are implicit full barriers (MOV to CR or
+ * INVLPG) that synchronize with switch_mm.
+ */
+- if (base_pages_to_flush > tlb_single_page_flush_ceiling) {
+- base_pages_to_flush = TLB_FLUSH_ALL;
++ if (base_pages_to_flush == TLB_FLUSH_ALL) {
+ count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+ local_flush_tlb();
+ } else {
diff --git a/patches.suse/4.4-17-x86-mm-reimplement-flush_tlb_page-using-flush_tlb_mm_range.patch b/patches.suse/4.4-17-x86-mm-reimplement-flush_tlb_page-using-flush_tlb_mm_range.patch
new file mode 100644
index 0000000000..301689b286
--- /dev/null
+++ b/patches.suse/4.4-17-x86-mm-reimplement-flush_tlb_page-using-flush_tlb_mm_range.patch
@@ -0,0 +1,101 @@
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 22 May 2017 15:30:01 -0700
+Subject: x86/mm: Reimplement flush_tlb_page() using flush_tlb_mm_range()
+References: bsc#1068032 CVE-2017-5754
+Git-commit: ca6c99c0794875c6d1db6e22f246699691ab7e6b
+Patch-mainline: v4.13-rc1
+References: bsc#1068032
+
+flush_tlb_page() was very similar to flush_tlb_mm_range() except that
+it had a couple of issues:
+
+ - It was missing an smp_mb() in the case where
+ current->active_mm != mm. (This is a longstanding bug reported by Nadav Amit)
+
+ - It was missing tracepoints and vm counter updates.
+
+The only reason that I can see for keeping it at as a separate
+function is that it could avoid a few branches that
+flush_tlb_mm_range() needs to decide to flush just one page. This
+hardly seems worthwhile. If we decide we want to get rid of those
+branches again, a better way would be to introduce an
+__flush_tlb_mm_range() helper and make both flush_tlb_page() and
+flush_tlb_mm_range() use it.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Acked-by: Kees Cook <keescook@chromium.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Nadav Amit <nadav.amit@gmail.com>
+Cc: Nadav Amit <namit@vmware.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/3cc3847cf888d8907577569b8bac3f01992ef8f9.1495492063.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/include/asm/tlbflush.h | 6 +++++-
+ arch/x86/mm/tlb.c | 27 ---------------------------
+ 2 files changed, 5 insertions(+), 28 deletions(-)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -296,11 +296,15 @@ static inline void flush_tlb_kernel_rang
+ flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags)
+
+ extern void flush_tlb_all(void);
+-extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
+ extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
+ unsigned long end, unsigned long vmflag);
+ extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+
++static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
++{
++ flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, VM_NONE);
++}
++
+ void native_flush_tlb_others(struct cpumask *cpumask,
+ struct mm_struct *mm,
+ unsigned long start, unsigned long end);
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -339,33 +339,6 @@ out:
+ preempt_enable();
+ }
+
+-void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
+-{
+- struct mm_struct *mm = vma->vm_mm;
+-
+- preempt_disable();
+-
+- if (current->active_mm == mm) {
+- if (current->mm) {
+- /*
+- * Implicit full barrier (INVLPG) that synchronizes
+- * with switch_mm.
+- */
+- __flush_tlb_one(start);
+- } else {
+- leave_mm(smp_processor_id());
+-
+- /* Synchronize with switch_mm. */
+- smp_mb();
+- }
+- }
+-
+- if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
+- flush_tlb_others(mm_cpumask(mm), mm, start, start + PAGE_SIZE);
+-
+- preempt_enable();
+-}
+-
+ static void do_flush_tlb_all(void *info)
+ {
+ count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
diff --git a/patches.suse/4.4-18-x86-mm-remove-the-up-asm-tlbflush-h-code-always-use-the-formerly-smp-code.patch b/patches.suse/4.4-18-x86-mm-remove-the-up-asm-tlbflush-h-code-always-use-the-formerly-smp-code.patch
new file mode 100644
index 0000000000..53af3a1c98
--- /dev/null
+++ b/patches.suse/4.4-18-x86-mm-remove-the-up-asm-tlbflush-h-code-always-use-the-formerly-smp-code.patch
@@ -0,0 +1,297 @@
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sun, 28 May 2017 10:00:14 -0700
+Subject: x86/mm: Remove the UP asm/tlbflush.h code, always use the (formerly)
+References: bsc#1068032 CVE-2017-5754
+ SMP code
+Git-commit: ce4a4e565f5264909a18c733b864c3f74467f69e
+Patch-mainline: v4.13-rc1
+References: bsc#1068032
+
+The UP asm/tlbflush.h generates somewhat nicer code than the SMP version.
+Aside from that, it's fallen quite a bit behind the SMP code:
+
+ - flush_tlb_mm_range() didn't flush individual pages if the range
+ was small.
+
+ - The lazy TLB code was much weaker. This usually wouldn't matter,
+ but, if a kernel thread flushed its lazy "active_mm" more than
+ once (due to reclaim or similar), it wouldn't be unlazied and
+ would instead pointlessly flush repeatedly.
+
+ - Tracepoints were missing.
+
+Aside from that, simply having the UP code around was a maintanence
+burden, since it means that any change to the TLB flush code had to
+make sure not to break it.
+
+Simplify everything by deleting the UP code.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Nadav Amit <nadav.amit@gmail.com>
+Cc: Nadav Amit <namit@vmware.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/Kconfig | 2
+ arch/x86/include/asm/hardirq.h | 2
+ arch/x86/include/asm/mmu.h | 6 --
+ arch/x86/include/asm/mmu_context.h | 2
+ arch/x86/include/asm/tlbflush.h | 78 -------------------------------------
+ arch/x86/mm/init.c | 2
+ arch/x86/mm/tlb.c | 17 --------
+ 7 files changed, 5 insertions(+), 104 deletions(-)
+
+--- a/arch/x86/include/asm/hardirq.h
++++ b/arch/x86/include/asm/hardirq.h
+@@ -22,8 +22,8 @@ typedef struct {
+ #ifdef CONFIG_SMP
+ unsigned int irq_resched_count;
+ unsigned int irq_call_count;
+- unsigned int irq_tlb_count;
+ #endif
++ unsigned int irq_tlb_count;
+ #ifdef CONFIG_X86_THERMAL_VECTOR
+ unsigned int irq_thermal_count;
+ #endif
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -99,10 +99,8 @@ static inline void load_mm_ldt(struct mm
+
+ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+ {
+-#ifdef CONFIG_SMP
+ if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
+ this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
+-#endif
+ }
+
+ static inline int init_new_context(struct task_struct *tsk,
+--- a/arch/x86/include/asm/mmu.h
++++ b/arch/x86/include/asm/mmu.h
+@@ -36,12 +36,6 @@ typedef struct {
+ #endif
+ } mm_context_t;
+
+-#ifdef CONFIG_SMP
+ void leave_mm(int cpu);
+-#else
+-static inline void leave_mm(int cpu)
+-{
+-}
+-#endif
+
+ #endif /* _ASM_X86_MMU_H */
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -6,6 +6,7 @@
+
+ #include <asm/processor.h>
+ #include <asm/special_insns.h>
++#include <asm/smp.h>
+
+ static inline void __invpcid(unsigned long pcid, unsigned long addr,
+ unsigned long type)
+@@ -64,10 +65,8 @@ static inline void invpcid_flush_all_non
+ #endif
+
+ struct tlb_state {
+-#ifdef CONFIG_SMP
+ struct mm_struct *active_mm;
+ int state;
+-#endif
+
+ /*
+ * Access to this CR4 shadow and to H/W CR4 is protected by
+@@ -215,79 +214,6 @@ static inline void __flush_tlb_one(unsig
+ * and page-granular flushes are available only on i486 and up.
+ */
+
+-#ifndef CONFIG_SMP
+-
+-/* "_up" is for UniProcessor.
+- *
+- * This is a helper for other header functions. *Not* intended to be called
+- * directly. All global TLB flushes need to either call this, or to bump the
+- * vm statistics themselves.
+- */
+-static inline void __flush_tlb_up(void)
+-{
+- count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+- __flush_tlb();
+-}
+-
+-static inline void flush_tlb_all(void)
+-{
+- count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+- __flush_tlb_all();
+-}
+-
+-static inline void local_flush_tlb(void)
+-{
+- __flush_tlb_up();
+-}
+-
+-static inline void flush_tlb_mm(struct mm_struct *mm)
+-{
+- if (mm == current->active_mm)
+- __flush_tlb_up();
+-}
+-
+-static inline void flush_tlb_page(struct vm_area_struct *vma,
+- unsigned long addr)
+-{
+- if (vma->vm_mm == current->active_mm)
+- __flush_tlb_one(addr);
+-}
+-
+-static inline void flush_tlb_range(struct vm_area_struct *vma,
+- unsigned long start, unsigned long end)
+-{
+- if (vma->vm_mm == current->active_mm)
+- __flush_tlb_up();
+-}
+-
+-static inline void flush_tlb_mm_range(struct mm_struct *mm,
+- unsigned long start, unsigned long end, unsigned long vmflag)
+-{
+- if (mm == current->active_mm)
+- __flush_tlb_up();
+-}
+-
+-static inline void native_flush_tlb_others(const struct cpumask *cpumask,
+- struct mm_struct *mm,
+- unsigned long start,
+- unsigned long end)
+-{
+-}
+-
+-static inline void reset_lazy_tlbstate(void)
+-{
+-}
+-
+-static inline void flush_tlb_kernel_range(unsigned long start,
+- unsigned long end)
+-{
+- flush_tlb_all();
+-}
+-
+-#else /* SMP */
+-
+-#include <asm/smp.h>
+-
+ #define local_flush_tlb() __flush_tlb()
+
+ #define flush_tlb_mm(mm) flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL)
+@@ -319,8 +245,6 @@ static inline void reset_lazy_tlbstate(v
+ this_cpu_write(cpu_tlbstate.active_mm, &init_mm);
+ }
+
+-#endif /* SMP */
+-
+ #ifndef CONFIG_PARAVIRT
+ #define flush_tlb_others(mask, mm, start, end) \
+ native_flush_tlb_others(mask, mm, start, end)
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -42,7 +42,7 @@ config X86
+ select ARCH_USE_CMPXCHG_LOCKREF if X86_64
+ select ARCH_USE_QUEUED_RWLOCKS
+ select ARCH_USE_QUEUED_SPINLOCKS
+- select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP
++ select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+ select ARCH_WANTS_DYNAMIC_TASK_STRUCT
+ select ARCH_WANT_FRAME_POINTERS
+ select ARCH_WANT_IPC_PARSE_VERSION if X86_32
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -753,10 +753,8 @@ void __init zone_sizes_init(void)
+ }
+
+ DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
+-#ifdef CONFIG_SMP
+ .active_mm = &init_mm,
+ .state = 0,
+-#endif
+ .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */
+ };
+ EXPORT_SYMBOL_GPL(cpu_tlbstate);
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -15,7 +15,7 @@
+ #include <linux/debugfs.h>
+
+ /*
+- * Smarter SMP flushing macros.
++ * TLB flushing, formerly SMP-only
+ * c/o Linus Torvalds.
+ *
+ * These mean you can really definitely utterly forget about
+@@ -28,8 +28,6 @@
+ * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
+ */
+
+-#ifdef CONFIG_SMP
+-
+ struct flush_tlb_info {
+ struct mm_struct *flush_mm;
+ unsigned long flush_start;
+@@ -59,8 +57,6 @@ void leave_mm(int cpu)
+ }
+ EXPORT_SYMBOL_GPL(leave_mm);
+
+-#endif /* CONFIG_SMP */
+-
+ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk)
+ {
+@@ -77,10 +73,8 @@ void switch_mm_irqs_off(struct mm_struct
+ unsigned cpu = smp_processor_id();
+
+ if (likely(prev != next)) {
+-#ifdef CONFIG_SMP
+ this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+ this_cpu_write(cpu_tlbstate.active_mm, next);
+-#endif
+ cpumask_set_cpu(cpu, mm_cpumask(next));
+
+ /*
+@@ -137,9 +131,7 @@ void switch_mm_irqs_off(struct mm_struct
+ if (unlikely(prev->context.ldt != next->context.ldt))
+ load_mm_ldt(next);
+ #endif
+- }
+-#ifdef CONFIG_SMP
+- else {
++ } else {
+ this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+ BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
+
+@@ -166,11 +158,8 @@ void switch_mm_irqs_off(struct mm_struct
+ load_mm_ldt(next);
+ }
+ }
+-#endif
+ }
+
+-#ifdef CONFIG_SMP
+-
+ /*
+ * The flush IPI assumes that a thread switch happens in this order:
+ * [cpu0: the cpu that switches]
+@@ -423,5 +412,3 @@ static int __init create_tlb_single_page
+ return 0;
+ }
+ late_initcall(create_tlb_single_page_flush_ceiling);
+-
+-#endif /* CONFIG_SMP */
diff --git a/patches.suse/4.4-20-x86-mm-add-the-nopcid-boot-option-to-turn-off-pcid.patch b/patches.suse/4.4-20-x86-mm-add-the-nopcid-boot-option-to-turn-off-pcid.patch
new file mode 100644
index 0000000000..9d7062817a
--- /dev/null
+++ b/patches.suse/4.4-20-x86-mm-add-the-nopcid-boot-option-to-turn-off-pcid.patch
@@ -0,0 +1,70 @@
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 29 Jun 2017 08:53:20 -0700
+Subject: x86/mm: Add the 'nopcid' boot option to turn off PCID
+References: bsc#1068032 CVE-2017-5754
+Git-commit: 0790c9aad84901ca1bdc14746175549c8b5da215
+Patch-mainline: v4.14-rc1
+References: bsc#1068032
+
+The parameter is only present on x86_64 systems to save a few bytes,
+as PCID is always disabled on x86_32.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/8bbb2e65bcd249a5f18bfb8128b4689f08ac2b60.1498751203.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Borislav Petkov <bp@suse.de>
+---
+ Documentation/kernel-parameters.txt | 2 ++
+ arch/x86/kernel/cpu/common.c | 18 ++++++++++++++++++
+ 2 files changed, 20 insertions(+)
+
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -165,6 +165,24 @@ static int __init x86_mpx_setup(char *s)
+ }
+ __setup("nompx", x86_mpx_setup);
+
++#ifdef CONFIG_X86_64
++static int __init x86_pcid_setup(char *s)
++{
++ /* require an exact match without trailing characters */
++ if (strlen(s))
++ return 0;
++
++ /* do not emit a message if the feature is not present */
++ if (!boot_cpu_has(X86_FEATURE_PCID))
++ return 1;
++
++ setup_clear_cpu_cap(X86_FEATURE_PCID);
++ pr_info("nopcid: PCID feature disabled\n");
++ return 1;
++}
++__setup("nopcid", x86_pcid_setup);
++#endif
++
+ static int __init x86_noinvpcid_setup(char *s)
+ {
+ /* noinvpcid doesn't accept parameters */
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -2640,6 +2640,8 @@ bytes respectively. Such letter suffixes
+ nopat [X86] Disable PAT (page attribute table extension of
+ pagetables) support.
+
++ nopcid [X86-64] Disable the PCID cpu feature.
++
+ norandmaps Don't use address space randomization. Equivalent to
+ echo 0 > /proc/sys/kernel/randomize_va_space
+
diff --git a/patches.suse/4.4-21-x86-mm-enable-cr4-pcide-on-supported-systems.patch b/patches.suse/4.4-21-x86-mm-enable-cr4-pcide-on-supported-systems.patch
new file mode 100644
index 0000000000..38e9e14f28
--- /dev/null
+++ b/patches.suse/4.4-21-x86-mm-enable-cr4-pcide-on-supported-systems.patch
@@ -0,0 +1,105 @@
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 29 Jun 2017 08:53:21 -0700
+Subject: x86/mm: Enable CR4.PCIDE on supported systems
+References: bsc#1068032 CVE-2017-5754
+Git-commit: 660da7c9228f685b2ebe664f9fd69aaddcc420b5
+Patch-mainline: v4.14-rc1
+References: bsc#1068032
+
+We can use PCID if the CPU has PCID and PGE and we're not on Xen.
+
+By itself, this has no effect. A followup patch will start using PCID.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
+Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/6327ecd907b32f79d5aa0d466f04503bbec5df88.1498751203.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/include/asm/tlbflush.h | 8 ++++++++
+ arch/x86/kernel/cpu/common.c | 22 ++++++++++++++++++++++
+ arch/x86/xen/enlighten.c | 6 ++++++
+ 3 files changed, 36 insertions(+)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -190,6 +190,14 @@ static inline void __flush_tlb_all(void)
+ __flush_tlb_global();
+ else
+ __flush_tlb();
++
++ /*
++ * Note: if we somehow had PCID but not PGE, then this wouldn't work --
++ * we'd end up flushing kernel translations for the current ASID but
++ * we might fail to flush kernel translations for other cached ASIDs.
++ *
++ * To avoid this issue, we force PCID off if PGE is off.
++ */
+ }
+
+ static inline void __flush_tlb_one(unsigned long addr)
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -326,6 +326,25 @@ static __always_inline void setup_smap(s
+ }
+ }
+
++static void setup_pcid(struct cpuinfo_x86 *c)
++{
++ if (cpu_has(c, X86_FEATURE_PCID)) {
++ if (cpu_has(c, X86_FEATURE_PGE)) {
++ cr4_set_bits(X86_CR4_PCIDE);
++ } else {
++ /*
++ * flush_tlb_all(), as currently implemented, won't
++ * work if PCID is on but PGE is not. Since that
++ * combination doesn't exist on real hardware, there's
++ * no reason to try to fully support it, but it's
++ * polite to avoid corrupting data if we're on
++ * an improperly configured VM.
++ */
++ clear_cpu_cap(c, X86_FEATURE_PCID);
++ }
++ }
++}
++
+ /*
+ * Protection Keys are not available in 32-bit mode.
+ */
+@@ -1005,6 +1024,9 @@ static void identify_cpu(struct cpuinfo_
+ setup_smep(c);
+ setup_smap(c);
+
++ /* Set up PCID */
++ setup_pcid(c);
++
+ /*
+ * The vendor-specific functions might have changed features.
+ * Now we do "generic changes."
+--- a/arch/x86/xen/enlighten.c
++++ b/arch/x86/xen/enlighten.c
+@@ -443,6 +443,12 @@ static void __init xen_init_cpuid_mask(v
+ ~((1 << X86_FEATURE_MTRR) | /* disable MTRR */
+ (1 << X86_FEATURE_ACC)); /* thermal monitoring */
+
++ /*
++ * Xen PV would need some work to support PCID: CR3 handling as well
++ * as xen_flush_tlb_others() would need updating.
++ */
++ cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_PCID % 32)); /* disable PCID */
++
+ if (!xen_initial_domain())
+ cpuid_leaf1_edx_mask &=
+ ~((1 << X86_FEATURE_ACPI)); /* disable ACPI */
diff --git a/patches.suse/4.4-22-KAISER-Kernel-Address-Isolation.patch b/patches.suse/4.4-22-KAISER-Kernel-Address-Isolation.patch
new file mode 100644
index 0000000000..202f526390
--- /dev/null
+++ b/patches.suse/4.4-22-KAISER-Kernel-Address-Isolation.patch
@@ -0,0 +1,972 @@
+From af622162cf90062d66278f3de8f487e97fc4c544 Mon Sep 17 00:00:00 2001
+From: Richard Fellner <richard.fellner@student.tugraz.at>
+Date: Thu, 4 May 2017 14:26:50 +0200
+Subject: [PATCH 22/43] KAISER: Kernel Address Isolation
+References: bsc#1068032 CVE-2017-5754
+Patch-mainline: Not yet, under development
+
+This patch introduces our implementation of KAISER (Kernel Address Isolation to
+have Side-channels Efficiently Removed), a kernel isolation technique to close
+hardware side channels on kernel address information.
+
+More information about the patch can be found on:
+
+ https://github.com/IAIK/KAISER
+
+From: Richard Fellner <richard.fellner@student.tugraz.at>
+From: Daniel Gruss <daniel.gruss@iaik.tugraz.at>
+X-Subject: [RFC, PATCH] x86_64: KAISER - do not map kernel in user mode
+Date: Thu, 4 May 2017 14:26:50 +0200
+Link: http://marc.info/?l=linux-kernel&m=149390087310405&w=2
+Kaiser-4.10-SHA1: c4b1831d44c6144d3762ccc72f0c4e71a0c713e5
+
+To: <linux-kernel@vger.kernel.org>
+To: <kernel-hardening@lists.openwall.com>
+Cc: <clementine.maurice@iaik.tugraz.at>
+Cc: <moritz.lipp@iaik.tugraz.at>
+Cc: Michael Schwarz <michael.schwarz@iaik.tugraz.at>
+Cc: Richard Fellner <richard.fellner@student.tugraz.at>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: <kirill.shutemov@linux.intel.com>
+Cc: <anders.fogh@gdata-adan.de>
+
+After several recent works [1,2,3] KASLR on x86_64 was basically
+considered dead by many researchers. We have been working on an
+efficient but effective fix for this problem and found that not mapping
+the kernel space when running in user mode is the solution to this
+problem [4] (the corresponding paper [5] will be presented at ESSoS17).
+
+With this RFC patch we allow anybody to configure their kernel with the
+flag CONFIG_KAISER to add our defense mechanism.
+
+If there are any questions we would love to answer them.
+We also appreciate any comments!
+
+Cheers,
+Daniel (+ the KAISER team from Graz University of Technology)
+
+[1] http://www.ieee-security.org/TC/SP2013/papers/4977a191.pdf
+[2] https://www.blackhat.com/docs/us-16/materials/us-16-Fogh-Using-Undocumented-CPU-Behaviour-To-See-Into-Kernel-Mode-And-Break-KASLR-In-The-Process.pdf
+[3] https://www.blackhat.com/docs/us-16/materials/us-16-Jang-Breaking-Kernel-Address-Space-Layout-Randomization-KASLR-With-Intel-TSX.pdf
+[4] https://github.com/IAIK/KAISER
+[5] https://gruss.cc/files/kaiser.pdf
+
+(cherry picked from Change-Id: I0eb000c33290af01fc4454ca0c701d00f1d30b1d)
+Conflicts:
+arch/x86/entry/entry_64.S
+arch/x86/entry/entry_64_compat.S
+arch/x86/mm/Makefile
+kernel/fork.c
+
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+---
+ arch/x86/entry/entry_64.S | 19 +++-
+ arch/x86/entry/entry_64_compat.S | 6 +
+ arch/x86/include/asm/hw_irq.h | 2
+ arch/x86/include/asm/kaiser.h | 113 ++++++++++++++++++++++++
+ arch/x86/include/asm/pgtable.h | 4
+ arch/x86/include/asm/pgtable_64.h | 21 ++++
+ arch/x86/include/asm/pgtable_types.h | 12 ++
+ arch/x86/include/asm/processor.h | 7 +
+ arch/x86/kernel/cpu/common.c | 4
+ arch/x86/kernel/espfix_64.c | 6 +
+ arch/x86/kernel/head_64.S | 16 ++-
+ arch/x86/kernel/irqinit.c | 2
+ arch/x86/kernel/process.c | 2
+ arch/x86/mm/Makefile | 2
+ arch/x86/mm/kaiser.c | 160 +++++++++++++++++++++++++++++++++++
+ arch/x86/mm/pageattr.c | 2
+ arch/x86/mm/pgtable.c | 26 +++++
+ include/asm-generic/vmlinux.lds.h | 11 ++
+ include/linux/percpu-defs.h | 30 ++++++
+ init/main.c | 6 +
+ kernel/fork.c | 8 +
+ security/Kconfig | 7 +
+ 22 files changed, 449 insertions(+), 17 deletions(-)
+ create mode 100644 arch/x86/include/asm/kaiser.h
+ create mode 100644 arch/x86/mm/kaiser.c
+
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -13,6 +13,7 @@
+ #include <asm/irqflags.h>
+ #include <asm/asm.h>
+ #include <asm/smap.h>
++#include <asm/kaiser.h>
+ #include <linux/linkage.h>
+ #include <linux/err.h>
+
+@@ -50,6 +51,7 @@ ENDPROC(native_usergs_sysret32)
+ ENTRY(entry_SYSENTER_compat)
+ /* Interrupts are off on entry. */
+ SWAPGS_UNSAFE_STACK
++ SWITCH_KERNEL_CR3_NO_STACK
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+
+ /*
+@@ -161,6 +163,7 @@ ENDPROC(entry_SYSENTER_compat)
+ ENTRY(entry_SYSCALL_compat)
+ /* Interrupts are off on entry. */
+ SWAPGS_UNSAFE_STACK
++ SWITCH_KERNEL_CR3_NO_STACK
+
+ /* Stash user ESP and switch to the kernel stack. */
+ movl %esp, %r8d
+@@ -208,6 +211,7 @@ ENTRY(entry_SYSCALL_compat)
+ /* Opportunistic SYSRET */
+ sysret32_from_system_call:
+ TRACE_IRQS_ON /* User mode traces as IRQs on. */
++ SWITCH_USER_CR3
+ movq RBX(%rsp), %rbx /* pt_regs->rbx */
+ movq RBP(%rsp), %rbp /* pt_regs->rbp */
+ movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */
+@@ -269,6 +273,7 @@ ENTRY(entry_INT80_compat)
+ PARAVIRT_ADJUST_EXCEPTION_FRAME
+ ASM_CLAC /* Do this early to minimize exposure */
+ SWAPGS
++ SWITCH_KERNEL_CR3_NO_STACK
+
+ /*
+ * User tracing code (ptrace or signal handlers) might assume that
+@@ -311,6 +316,7 @@ ENTRY(entry_INT80_compat)
+
+ /* Go back to user mode. */
+ TRACE_IRQS_ON
++ SWITCH_USER_CR3
+ SWAPGS
+ jmp restore_regs_and_iret
+ END(entry_INT80_compat)
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -35,6 +35,7 @@
+ #include <asm/asm.h>
+ #include <asm/smap.h>
+ #include <asm/pgtable_types.h>
++#include <asm/kaiser.h>
+ #include <linux/err.h>
+
+ /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
+@@ -135,6 +136,7 @@ ENTRY(entry_SYSCALL_64)
+ * it is too small to ever cause noticeable irq latency.
+ */
+ SWAPGS_UNSAFE_STACK
++ SWITCH_KERNEL_CR3_NO_STACK
+ /*
+ * A hypervisor implementation might want to use a label
+ * after the swapgs, so that it can do the swapgs
+@@ -207,9 +209,10 @@ entry_SYSCALL_64_fastpath:
+ testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
+ jnz int_ret_from_sys_call_irqs_off /* Go to the slow path */
+
+- RESTORE_C_REGS_EXCEPT_RCX_R11
+ movq RIP(%rsp), %rcx
+ movq EFLAGS(%rsp), %r11
++ RESTORE_C_REGS_EXCEPT_RCX_R11
++ SWITCH_USER_CR3
+ movq RSP(%rsp), %rsp
+ /*
+ * 64-bit SYSRET restores rip from rcx,
+@@ -347,10 +350,12 @@ GLOBAL(int_ret_from_sys_call)
+ syscall_return_via_sysret:
+ /* rcx and r11 are already restored (see code above) */
+ RESTORE_C_REGS_EXCEPT_RCX_R11
++ SWITCH_USER_CR3
+ movq RSP(%rsp), %rsp
+ USERGS_SYSRET64
+
+ opportunistic_sysret_failed:
++ SWITCH_USER_CR3
+ SWAPGS
+ jmp restore_c_regs_and_iret
+ END(entry_SYSCALL_64)
+@@ -509,6 +514,7 @@ END(irq_entries_start)
+ * tracking that we're in kernel mode.
+ */
+ SWAPGS
++ SWITCH_KERNEL_CR3
+
+ /*
+ * We need to tell lockdep that IRQs are off. We can't do this until
+@@ -566,6 +572,7 @@ GLOBAL(retint_user)
+ mov %rsp,%rdi
+ call prepare_exit_to_usermode
+ TRACE_IRQS_IRETQ
++ SWITCH_USER_CR3
+ SWAPGS
+ jmp restore_regs_and_iret
+
+@@ -623,6 +630,7 @@ native_irq_return_ldt:
+ pushq %rax
+ pushq %rdi
+ SWAPGS
++ SWITCH_KERNEL_CR3
+ movq PER_CPU_VAR(espfix_waddr), %rdi
+ movq %rax, (0*8)(%rdi) /* RAX */
+ movq (2*8)(%rsp), %rax /* RIP */
+@@ -638,6 +646,7 @@ native_irq_return_ldt:
+ andl $0xffff0000, %eax
+ popq %rdi
+ orq PER_CPU_VAR(espfix_stack), %rax
++ SWITCH_USER_CR3
+ SWAPGS
+ movq %rax, %rsp
+ popq %rax
+@@ -1037,6 +1046,7 @@ ENTRY(paranoid_entry)
+ testl %edx, %edx
+ js 1f /* negative -> in kernel */
+ SWAPGS
++ SWITCH_KERNEL_CR3
+ xorl %ebx, %ebx
+ 1: ret
+ END(paranoid_entry)
+@@ -1059,6 +1069,7 @@ ENTRY(paranoid_exit)
+ testl %ebx, %ebx /* swapgs needed? */
+ jnz paranoid_exit_no_swapgs
+ TRACE_IRQS_IRETQ
++ SWITCH_USER_CR3_NO_STACK
+ SWAPGS_UNSAFE_STACK
+ jmp paranoid_exit_restore
+ paranoid_exit_no_swapgs:
+@@ -1088,6 +1099,7 @@ ENTRY(error_entry)
+ * from user mode due to an IRET fault.
+ */
+ SWAPGS
++ SWITCH_KERNEL_CR3
+
+ .Lerror_entry_from_usermode_after_swapgs:
+ /*
+@@ -1138,7 +1150,7 @@ ENTRY(error_entry)
+ * Switch to kernel gsbase:
+ */
+ SWAPGS
+-
++ SWITCH_KERNEL_CR3
+ /*
+ * Pretend that the exception came from user mode: set up pt_regs
+ * as if we faulted immediately after IRET and clear EBX so that
+@@ -1238,6 +1250,7 @@ ENTRY(nmi)
+ */
+
+ SWAPGS_UNSAFE_STACK
++ SWITCH_KERNEL_CR3_NO_STACK
+ cld
+ movq %rsp, %rdx
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+@@ -1278,6 +1291,7 @@ ENTRY(nmi)
+ * work, because we don't want to enable interrupts. Fortunately,
+ * do_nmi doesn't modify pt_regs.
+ */
++ SWITCH_USER_CR3
+ SWAPGS
+ jmp restore_c_regs_and_iret
+
+@@ -1489,6 +1503,7 @@ end_repeat_nmi:
+ testl %ebx, %ebx /* swapgs needed? */
+ jnz nmi_restore
+ nmi_swapgs:
++ SWITCH_USER_CR3_NO_STACK
+ SWAPGS_UNSAFE_STACK
+ nmi_restore:
+ RESTORE_EXTRA_REGS
+--- a/arch/x86/include/asm/hw_irq.h
++++ b/arch/x86/include/asm/hw_irq.h
+@@ -192,7 +192,7 @@ extern char irq_entries_start[];
+ #define VECTOR_RETRIGGERED ((void *)~0UL)
+
+ typedef struct irq_desc* vector_irq_t[NR_VECTORS];
+-DECLARE_PER_CPU(vector_irq_t, vector_irq);
++DECLARE_PER_CPU_USER_MAPPED(vector_irq_t, vector_irq);
+
+ #endif /* !ASSEMBLY_ */
+
+--- /dev/null
++++ b/arch/x86/include/asm/kaiser.h
+@@ -0,0 +1,113 @@
++#ifndef _ASM_X86_KAISER_H
++#define _ASM_X86_KAISER_H
++
++/* This file includes the definitions for the KAISER feature.
++ * KAISER is a counter measure against x86_64 side channel attacks on the kernel virtual memory.
++ * It has a shodow-pgd for every process. the shadow-pgd has a minimalistic kernel-set mapped,
++ * but includes the whole user memory. Within a kernel context switch, or when an interrupt is handled,
++ * the pgd is switched to the normal one. When the system switches to user mode, the shadow pgd is enabled.
++ * By this, the virtual memory chaches are freed, and the user may not attack the whole kernel memory.
++ *
++ * A minimalistic kernel mapping holds the parts needed to be mapped in user mode, as the entry/exit functions
++ * of the user space, or the stacks.
++ */
++#ifdef __ASSEMBLY__
++#ifdef CONFIG_KAISER
++
++.macro _SWITCH_TO_KERNEL_CR3 reg
++movq %cr3, \reg
++andq $(~0x1000), \reg
++movq \reg, %cr3
++.endm
++
++.macro _SWITCH_TO_USER_CR3 reg
++movq %cr3, \reg
++orq $(0x1000), \reg
++movq \reg, %cr3
++.endm
++
++.macro SWITCH_KERNEL_CR3
++pushq %rax
++_SWITCH_TO_KERNEL_CR3 %rax
++popq %rax
++.endm
++
++.macro SWITCH_USER_CR3
++pushq %rax
++_SWITCH_TO_USER_CR3 %rax
++popq %rax
++.endm
++
++.macro SWITCH_KERNEL_CR3_NO_STACK
++movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)
++_SWITCH_TO_KERNEL_CR3 %rax
++movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
++.endm
++
++
++.macro SWITCH_USER_CR3_NO_STACK
++
++movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)
++_SWITCH_TO_USER_CR3 %rax
++movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
++
++.endm
++
++#else /* CONFIG_KAISER */
++
++.macro SWITCH_KERNEL_CR3 reg
++.endm
++.macro SWITCH_USER_CR3 reg
++.endm
++.macro SWITCH_USER_CR3_NO_STACK
++.endm
++.macro SWITCH_KERNEL_CR3_NO_STACK
++.endm
++
++#endif /* CONFIG_KAISER */
++#else /* __ASSEMBLY__ */
++
++
++#ifdef CONFIG_KAISER
++// Upon kernel/user mode switch, it may happen that
++// the address space has to be switched before the registers have been stored.
++// To change the address space, another register is needed.
++// A register therefore has to be stored/restored.
++//
++DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
++
++#endif /* CONFIG_KAISER */
++
++/**
++ * shadowmem_add_mapping - map a virtual memory part to the shadow mapping
++ * @addr: the start address of the range
++ * @size: the size of the range
++ * @flags: The mapping flags of the pages
++ *
++ * the mapping is done on a global scope, so no bigger synchronization has to be done.
++ * the pages have to be manually unmapped again when they are not needed any longer.
++ */
++extern void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags);
++
++
++/**
++ * shadowmem_remove_mapping - unmap a virtual memory part of the shadow mapping
++ * @addr: the start address of the range
++ * @size: the size of the range
++ */
++extern void kaiser_remove_mapping(unsigned long start, unsigned long size);
++
++/**
++ * shadowmem_initialize_mapping - Initalize the shadow mapping
++ *
++ * most parts of the shadow mapping can be mapped upon boot time.
++ * only the thread stacks have to be mapped on runtime.
++ * the mapped regions are not unmapped at all.
++ */
++extern void kaiser_init(void);
++
++#endif
++
++
++
++#endif /* _ASM_X86_KAISER_H */
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -106,9 +106,30 @@ static inline void native_pud_clear(pud_
+ native_set_pud(pud, native_make_pud(0));
+ }
+
++#ifdef CONFIG_KAISER
++static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp) {
++ return (pgd_t *)(void*)((unsigned long)(void*)pgdp | (unsigned long)PAGE_SIZE);
++}
++
++static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp) {
++ return (pgd_t *)(void*)((unsigned long)(void*)pgdp & ~(unsigned long)PAGE_SIZE);
++}
++#endif /* CONFIG_KAISER */
++
+ static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
+ {
++#ifdef CONFIG_KAISER
++ // We know that a pgd is page aligned.
++ // Therefore the lower indices have to be mapped to user space.
++ // These pages are mapped to the shadow mapping.
++ if ((((unsigned long)pgdp) % PAGE_SIZE) < (PAGE_SIZE / 2)) {
++ native_get_shadow_pgd(pgdp)->pgd = pgd.pgd;
++ }
++
++ pgdp->pgd = pgd.pgd & ~_PAGE_USER;
++#else /* CONFIG_KAISER */
+ *pgdp = pgd;
++#endif
+ }
+
+ static inline void native_pgd_clear(pgd_t *pgd)
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -901,6 +901,10 @@ static inline void pmdp_set_wrprotect(st
+ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
+ {
+ memcpy(dst, src, count * sizeof(pgd_t));
++#ifdef CONFIG_KAISER
++ // clone the shadow pgd part as well
++ memcpy(native_get_shadow_pgd(dst), native_get_shadow_pgd(src), count * sizeof(pgd_t));
++#endif
+ }
+
+ #define PTE_SHIFT ilog2(PTRS_PER_PTE)
+--- a/arch/x86/include/asm/pgtable_types.h
++++ b/arch/x86/include/asm/pgtable_types.h
+@@ -45,7 +45,11 @@
+ #define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED)
+ #define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY)
+ #define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE)
+-#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
++#ifdef CONFIG_KAISER
++#define _PAGE_GLOBAL (_AT(pteval_t, 0))
++#else
++#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
++#endif
+ #define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1)
+ #define _PAGE_SOFTW2 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2)
+ #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT)
+@@ -120,7 +124,11 @@
+ #define _PAGE_DEVMAP (_AT(pteval_t, 0))
+ #endif
+
+-#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
++#ifdef CONFIG_KAISER
++#define _PAGE_PROTNONE (_AT(pteval_t, 0))
++#else
++#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
++#endif
+
+ #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
+ _PAGE_ACCESSED | _PAGE_DIRTY)
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -319,7 +319,7 @@ struct tss_struct {
+
+ } ____cacheline_aligned;
+
+-DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
++DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss);
+
+ #ifdef CONFIG_X86_32
+ DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
+@@ -346,6 +346,11 @@ union irq_stack_union {
+ char gs_base[40];
+ unsigned long stack_canary;
+ };
++
++ struct {
++ char irq_stack_pointer[64];
++ char unused[IRQ_STACK_SIZE - 64];
++ };
+ };
+
+ DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible;
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -95,7 +95,7 @@ static const struct cpu_dev default_cpu
+
+ static const struct cpu_dev *this_cpu = &default_cpu;
+
+-DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
++DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct gdt_page, gdt_page) = { .gdt = {
+ #ifdef CONFIG_X86_64
+ /*
+ * We need valid kernel segments for data and code in long mode too
+@@ -1283,7 +1283,7 @@ static const unsigned int exception_stac
+ [DEBUG_STACK - 1] = DEBUG_STKSZ
+ };
+
+-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
++DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(char, exception_stacks
+ [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
+
+ /* May not be marked __init: used by software suspend */
+--- a/arch/x86/kernel/espfix_64.c
++++ b/arch/x86/kernel/espfix_64.c
+@@ -41,6 +41,7 @@
+ #include <asm/pgalloc.h>
+ #include <asm/setup.h>
+ #include <asm/espfix.h>
++#include <asm/kaiser.h>
+
+ /*
+ * Note: we only need 6*8 = 48 bytes for the espfix stack, but round
+@@ -126,6 +127,11 @@ void __init init_espfix_bsp(void)
+ /* Install the espfix pud into the kernel page directory */
+ pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)];
+ pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page);
++#ifdef CONFIG_KAISER
++ // add the esp stack pud to the shadow mapping here.
++ // This can be done directly, because the fixup stack has its own pud
++ set_pgd(native_get_shadow_pgd(pgd_p), __pgd(_PAGE_TABLE | __pa((pud_t *)espfix_pud_page)));
++#endif
+
+ /* Randomize the locations */
+ init_espfix_random();
+--- a/arch/x86/kernel/head_64.S
++++ b/arch/x86/kernel/head_64.S
+@@ -441,6 +441,14 @@ early_idt_ripmsg:
+ .balign PAGE_SIZE; \
+ GLOBAL(name)
+
++#ifdef CONFIG_KAISER
++#define NEXT_PGD_PAGE(name) \
++ .balign 2 * PAGE_SIZE; \
++GLOBAL(name)
++#else
++#define NEXT_PGD_PAGE(name) NEXT_PAGE(name)
++#endif
++
+ /* Automate the creation of 1 to 1 mapping pmd entries */
+ #define PMDS(START, PERM, COUNT) \
+ i = 0 ; \
+@@ -450,7 +458,7 @@ GLOBAL(name)
+ .endr
+
+ __INITDATA
+-NEXT_PAGE(early_level4_pgt)
++NEXT_PGD_PAGE(early_level4_pgt)
+ .fill 511,8,0
+ .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
+
+@@ -460,10 +468,10 @@ NEXT_PAGE(early_dynamic_pgts)
+ .data
+
+ #ifndef CONFIG_XEN
+-NEXT_PAGE(init_level4_pgt)
+- .fill 512,8,0
++NEXT_PGD_PAGE(init_level4_pgt)
++ .fill 2*512,8,0
+ #else
+-NEXT_PAGE(init_level4_pgt)
++NEXT_PGD_PAGE(init_level4_pgt)
+ .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+ .org init_level4_pgt + L4_PAGE_OFFSET*8, 0
+ .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+--- a/arch/x86/kernel/irqinit.c
++++ b/arch/x86/kernel/irqinit.c
+@@ -51,7 +51,7 @@ static struct irqaction irq2 = {
+ .flags = IRQF_NO_THREAD,
+ };
+
+-DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
++DEFINE_PER_CPU_USER_MAPPED(vector_irq_t, vector_irq) = {
+ [0 ... NR_VECTORS - 1] = VECTOR_UNUSED,
+ };
+
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -39,7 +39,7 @@
+ * section. Since TSS's are completely CPU-local, we want them
+ * on exact cacheline boundaries, to eliminate cacheline ping-pong.
+ */
+-__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
++__visible DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss) = {
+ .x86_tss = {
+ .sp0 = TOP_OF_INIT_STACK,
+ #ifdef CONFIG_X86_32
+--- /dev/null
++++ b/arch/x86/mm/kaiser.c
+@@ -0,0 +1,160 @@
++
++
++#include <linux/kernel.h>
++#include <linux/errno.h>
++#include <linux/string.h>
++#include <linux/types.h>
++#include <linux/bug.h>
++#include <linux/init.h>
++#include <linux/spinlock.h>
++#include <linux/mm.h>
++
++#include <linux/uaccess.h>
++#include <asm/pgtable.h>
++#include <asm/pgalloc.h>
++#include <asm/desc.h>
++#ifdef CONFIG_KAISER
++
++__visible DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
++
++/**
++ * Get the real ppn from a address in kernel mapping.
++ * @param address The virtual adrress
++ * @return the physical address
++ */
++static inline unsigned long get_pa_from_mapping (unsigned long address)
++{
++ pgd_t *pgd;
++ pud_t *pud;
++ pmd_t *pmd;
++ pte_t *pte;
++
++ pgd = pgd_offset_k(address);
++ BUG_ON(pgd_none(*pgd) || pgd_large(*pgd));
++
++ pud = pud_offset(pgd, address);
++ BUG_ON(pud_none(*pud));
++
++ if (pud_large(*pud)) {
++ return (pud_pfn(*pud) << PAGE_SHIFT) | (address & ~PUD_PAGE_MASK);
++ }
++
++ pmd = pmd_offset(pud, address);
++ BUG_ON(pmd_none(*pmd));
++
++ if (pmd_large(*pmd)) {
++ return (pmd_pfn(*pmd) << PAGE_SHIFT) | (address & ~PMD_PAGE_MASK);
++ }
++
++ pte = pte_offset_kernel(pmd, address);
++ BUG_ON(pte_none(*pte));
++
++ return (pte_pfn(*pte) << PAGE_SHIFT) | (address & ~PAGE_MASK);
++}
++
++void _kaiser_copy (unsigned long start_addr, unsigned long size,
++ unsigned long flags)
++{
++ pgd_t *pgd;
++ pud_t *pud;
++ pmd_t *pmd;
++ pte_t *pte;
++ unsigned long address;
++ unsigned long end_addr = start_addr + size;
++ unsigned long target_address;
++
++ for (address = PAGE_ALIGN(start_addr - (PAGE_SIZE - 1));
++ address < PAGE_ALIGN(end_addr); address += PAGE_SIZE) {
++ target_address = get_pa_from_mapping(address);
++
++ pgd = native_get_shadow_pgd(pgd_offset_k(address));
++
++ BUG_ON(pgd_none(*pgd) && "All shadow pgds should be mapped at this time\n");
++ BUG_ON(pgd_large(*pgd));
++
++ pud = pud_offset(pgd, address);
++ if (pud_none(*pud)) {
++ set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd_alloc_one(0, address))));
++ }
++ BUG_ON(pud_large(*pud));
++
++ pmd = pmd_offset(pud, address);
++ if (pmd_none(*pmd)) {
++ set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte_alloc_one_kernel(0, address))));
++ }
++ BUG_ON(pmd_large(*pmd));
++
++ pte = pte_offset_kernel(pmd, address);
++ if (pte_none(*pte)) {
++ set_pte(pte, __pte(flags | target_address));
++ } else {
++ BUG_ON(__pa(pte_page(*pte)) != target_address);
++ }
++ }
++}
++
++// at first, add a pmd for every pgd entry in the shadowmem-kernel-part of the kernel mapping
++static inline void __init _kaiser_init(void)
++{
++ pgd_t *pgd;
++ int i = 0;
++
++ pgd = native_get_shadow_pgd(pgd_offset_k((unsigned long )0));
++ for (i = PTRS_PER_PGD / 2; i < PTRS_PER_PGD; i++) {
++ set_pgd(pgd + i, __pgd(_PAGE_TABLE |__pa(pud_alloc_one(0, 0))));
++ }
++}
++
++extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
++spinlock_t shadow_table_lock;
++void __init kaiser_init(void)
++{
++ int cpu;
++ spin_lock_init(&shadow_table_lock);
++
++ spin_lock(&shadow_table_lock);
++
++ _kaiser_init();
++
++ for_each_possible_cpu(cpu) {
++ // map the per cpu user variables
++ _kaiser_copy(
++ (unsigned long) (__per_cpu_user_mapped_start + per_cpu_offset(cpu)),
++ (unsigned long) __per_cpu_user_mapped_end - (unsigned long) __per_cpu_user_mapped_start,
++ __PAGE_KERNEL);
++ }
++
++ // map the entry/exit text section, which is responsible to switch between user- and kernel mode
++ _kaiser_copy(
++ (unsigned long) __entry_text_start,
++ (unsigned long) __entry_text_end - (unsigned long) __entry_text_start,
++ __PAGE_KERNEL_RX);
++
++ // the fixed map address of the idt_table
++ _kaiser_copy(
++ (unsigned long) idt_descr.address,
++ sizeof(gate_desc) * NR_VECTORS,
++ __PAGE_KERNEL_RO);
++
++ spin_unlock(&shadow_table_lock);
++}
++
++// add a mapping to the shadow-mapping, and synchronize the mappings
++void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags)
++{
++ spin_lock(&shadow_table_lock);
++ _kaiser_copy(addr, size, flags);
++ spin_unlock(&shadow_table_lock);
++}
++
++extern void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end);
++void kaiser_remove_mapping(unsigned long start, unsigned long size)
++{
++ pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(start));
++ spin_lock(&shadow_table_lock);
++ do {
++ unmap_pud_range(pgd, start, start + size);
++ } while (pgd++ != native_get_shadow_pgd(pgd_offset_k(start + size)));
++ spin_unlock(&shadow_table_lock);
++}
++#endif /* CONFIG_KAISER */
+--- a/arch/x86/mm/Makefile
++++ b/arch/x86/mm/Makefile
+@@ -33,4 +33,4 @@ obj-$(CONFIG_NUMA_EMU) += numa_emulatio
+
+ obj-$(CONFIG_X86_INTEL_MPX) += mpx.o
+ obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
+-
++obj-$(CONFIG_KAISER) += kaiser.o
+--- a/arch/x86/mm/pageattr.c
++++ b/arch/x86/mm/pageattr.c
+@@ -829,7 +829,7 @@ static void unmap_pmd_range(pud_t *pud,
+ pud_clear(pud);
+ }
+
+-static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
++void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
+ {
+ pud_t *pud = pud_offset(pgd, start);
+
+--- a/arch/x86/mm/pgtable.c
++++ b/arch/x86/mm/pgtable.c
+@@ -342,12 +342,38 @@ static inline void _pgd_free(pgd_t *pgd)
+ #else
+ static inline pgd_t *_pgd_alloc(void)
+ {
++#ifdef CONFIG_KAISER
++ // Instead of one PML4, we aquire two PML4s and, thus, an 8kb-aligned memory
++ // block. Therefore, we have to allocate at least 3 pages. However, the
++ // __get_free_pages returns us 4 pages. Hence, we store the base pointer at
++ // the beginning of the page of our 8kb-aligned memory block in order to
++ // correctly free it afterwars.
++
++ unsigned long pages = __get_free_pages(PGALLOC_GFP, get_order(4*PAGE_SIZE));
++
++ if(native_get_normal_pgd((pgd_t*) pages) == (pgd_t*) pages)
++ {
++ *((unsigned long*)(pages + 2 * PAGE_SIZE)) = pages;
++ return (pgd_t *) pages;
++ }
++ else
++ {
++ *((unsigned long*)(pages + 3 * PAGE_SIZE)) = pages;
++ return (pgd_t *) (pages + PAGE_SIZE);
++ }
++#else
+ return (pgd_t *)__get_free_page(PGALLOC_GFP);
++#endif
+ }
+
+ static inline void _pgd_free(pgd_t *pgd)
+ {
++#ifdef CONFIG_KAISER
++ unsigned long pages = *((unsigned long*) ((char*) pgd + 2 * PAGE_SIZE));
++ free_pages(pages, get_order(4*PAGE_SIZE));
++#else
+ free_page((unsigned long)pgd);
++#endif
+ }
+ #endif /* CONFIG_X86_PAE */
+
+--- a/include/asm-generic/vmlinux.lds.h
++++ b/include/asm-generic/vmlinux.lds.h
+@@ -728,7 +728,16 @@
+ */
+ #define PERCPU_INPUT(cacheline) \
+ VMLINUX_SYMBOL(__per_cpu_start) = .; \
+- *(.data..percpu..first) \
++ \
++ VMLINUX_SYMBOL(__per_cpu_user_mapped_start) = .; \
++ *(.data..percpu..first) \
++ . = ALIGN(cacheline); \
++ *(.data..percpu..user_mapped) \
++ *(.data..percpu..user_mapped..shared_aligned) \
++ . = ALIGN(PAGE_SIZE); \
++ *(.data..percpu..user_mapped..page_aligned) \
++ VMLINUX_SYMBOL(__per_cpu_user_mapped_end) = .; \
++ \
+ . = ALIGN(PAGE_SIZE); \
+ *(.data..percpu..page_aligned) \
+ . = ALIGN(cacheline); \
+--- a/include/linux/percpu-defs.h
++++ b/include/linux/percpu-defs.h
+@@ -35,6 +35,12 @@
+
+ #endif
+
++#ifdef CONFIG_KAISER
++#define USER_MAPPED_SECTION "..user_mapped"
++#else
++#define USER_MAPPED_SECTION ""
++#endif
++
+ /*
+ * Base implementations of per-CPU variable declarations and definitions, where
+ * the section in which the variable is to be placed is provided by the
+@@ -115,6 +121,12 @@
+ #define DEFINE_PER_CPU(type, name) \
+ DEFINE_PER_CPU_SECTION(type, name, "")
+
++#define DECLARE_PER_CPU_USER_MAPPED(type, name) \
++ DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION)
++
++#define DEFINE_PER_CPU_USER_MAPPED(type, name) \
++ DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION)
++
+ /*
+ * Declaration/definition used for per-CPU variables that must come first in
+ * the set of variables.
+@@ -144,6 +156,14 @@
+ DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \
+ ____cacheline_aligned_in_smp
+
++#define DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \
++ DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \
++ ____cacheline_aligned_in_smp
++
++#define DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \
++ DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \
++ ____cacheline_aligned_in_smp
++
+ #define DECLARE_PER_CPU_ALIGNED(type, name) \
+ DECLARE_PER_CPU_SECTION(type, name, PER_CPU_ALIGNED_SECTION) \
+ ____cacheline_aligned
+@@ -162,6 +182,16 @@
+ #define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \
+ DEFINE_PER_CPU_SECTION(type, name, "..page_aligned") \
+ __aligned(PAGE_SIZE)
++/*
++ * Declaration/definition used for per-CPU variables that must be page aligned and need to be mapped in user mode.
++ */
++#define DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \
++ DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \
++ __aligned(PAGE_SIZE)
++
++#define DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \
++ DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \
++ __aligned(PAGE_SIZE)
+
+ /*
+ * Declaration/definition used for per-CPU variables that must be read mostly.
+--- a/init/main.c
++++ b/init/main.c
+@@ -88,6 +88,9 @@
+ #include <asm/setup.h>
+ #include <asm/sections.h>
+ #include <asm/cacheflush.h>
++#ifdef CONFIG_KAISER
++#include <asm/kaiser.h>
++#endif
+
+ static int kernel_init(void *);
+
+@@ -493,6 +496,9 @@ static void __init mm_init(void)
+ pgtable_init();
+ vmalloc_init();
+ ioremap_huge_init();
++#ifdef CONFIG_KAISER
++ kaiser_init();
++#endif
+ }
+
+ asmlinkage __visible void __init start_kernel(void)
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -167,8 +167,12 @@ static struct thread_info *alloc_thread_
+ return page ? page_address(page) : NULL;
+ }
+
++extern void kaiser_remove_mapping(unsigned long start_addr, unsigned long size);
+ static inline void free_thread_info(struct thread_info *ti)
+ {
++#ifdef CONFIG_KAISER
++ kaiser_remove_mapping((unsigned long)ti, THREAD_SIZE);
++#endif
+ free_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
+ }
+ # else
+@@ -331,6 +335,7 @@ void set_task_stack_end_magic(struct tas
+ *stackend = STACK_END_MAGIC; /* for overflow detection */
+ }
+
++extern void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags);
+ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
+ {
+ struct task_struct *tsk;
+@@ -352,6 +357,9 @@ static struct task_struct *dup_task_stru
+ goto free_ti;
+
+ tsk->stack = ti;
++#ifdef CONFIG_KAISER
++ kaiser_add_mapping((unsigned long)tsk->stack, THREAD_SIZE, __PAGE_KERNEL);
++#endif
+ #ifdef CONFIG_SECCOMP
+ /*
+ * We must handle setting up seccomp filters once we're under
+--- a/security/Kconfig
++++ b/security/Kconfig
+@@ -30,6 +30,13 @@ config SECURITY
+ model will be used.
+
+ If you are unsure how to answer this question, answer N.
++config KAISER
++ bool "Remove the kernel mapping in user mode"
++ depends on X86_64
++ depends on !PARAVIRT
++ help
++ This enforces a strict kernel and user space isolation in order to close
++ hardware side channels on kernel address information.
+
+ config SECURITYFS
+ bool "Enable the securityfs filesystem"
diff --git a/patches.suse/4.4-23-kaiser-merged-update.patch b/patches.suse/4.4-23-kaiser-merged-update.patch
new file mode 100644
index 0000000000..bff7abe32d
--- /dev/null
+++ b/patches.suse/4.4-23-kaiser-merged-update.patch
@@ -0,0 +1,1303 @@
+From e4fbe29cf88992297954c4a9c2ee46705759a4a7 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Wed, 30 Aug 2017 16:23:00 -0700
+Subject: [PATCH 23/43] kaiser: merged update
+References: bsc#1068032 CVE-2017-5754
+Patch-mainline: Not yet, under development
+
+Merged fixes and cleanups, rebased to 4.4.89 tree (no 5-level paging).
+
+(cherry picked from Change-Id: I26c86040761cb8785431e03aa83ffcbb51fea7de)
+Conflicts:
+arch/x86/kernel/cpu/common.c
+arch/x86/mm/kaiser.c
+kernel/fork.c
+
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+---
+ arch/x86/entry/entry_64.S | 106 ++++++++++-
+ arch/x86/include/asm/kaiser.h | 43 ++--
+ arch/x86/include/asm/pgtable.h | 18 +
+ arch/x86/include/asm/pgtable_64.h | 48 ++++-
+ arch/x86/include/asm/pgtable_types.h | 6
+ arch/x86/kernel/espfix_64.c | 13 -
+ arch/x86/kernel/head_64.S | 19 +-
+ arch/x86/kernel/ldt.c | 27 ++
+ arch/x86/kernel/tracepoint.c | 2
+ arch/x86/mm/kaiser.c | 318 +++++++++++++++++++++++++----------
+ arch/x86/mm/pageattr.c | 63 +++++-
+ arch/x86/mm/pgtable.c | 40 +---
+ include/linux/kaiser.h | 26 ++
+ kernel/fork.c | 9
+ security/Kconfig | 5
+ 15 files changed, 553 insertions(+), 190 deletions(-)
+ create mode 100644 include/linux/kaiser.h
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -212,6 +212,13 @@ entry_SYSCALL_64_fastpath:
+ movq RIP(%rsp), %rcx
+ movq EFLAGS(%rsp), %r11
+ RESTORE_C_REGS_EXCEPT_RCX_R11
++ /*
++ * This opens a window where we have a user CR3, but are
++ * running in the kernel. This makes using the CS
++ * register useless for telling whether or not we need to
++ * switch CR3 in NMIs. Normal interrupts are OK because
++ * they are off here.
++ */
+ SWITCH_USER_CR3
+ movq RSP(%rsp), %rsp
+ /*
+@@ -350,11 +357,25 @@ GLOBAL(int_ret_from_sys_call)
+ syscall_return_via_sysret:
+ /* rcx and r11 are already restored (see code above) */
+ RESTORE_C_REGS_EXCEPT_RCX_R11
++ /*
++ * This opens a window where we have a user CR3, but are
++ * running in the kernel. This makes using the CS
++ * register useless for telling whether or not we need to
++ * switch CR3 in NMIs. Normal interrupts are OK because
++ * they are off here.
++ */
+ SWITCH_USER_CR3
+ movq RSP(%rsp), %rsp
+ USERGS_SYSRET64
+
+ opportunistic_sysret_failed:
++ /*
++ * This opens a window where we have a user CR3, but are
++ * running in the kernel. This makes using the CS
++ * register useless for telling whether or not we need to
++ * switch CR3 in NMIs. Normal interrupts are OK because
++ * they are off here.
++ */
+ SWITCH_USER_CR3
+ SWAPGS
+ jmp restore_c_regs_and_iret
+@@ -1089,6 +1110,13 @@ ENTRY(error_entry)
+ cld
+ SAVE_C_REGS 8
+ SAVE_EXTRA_REGS 8
++ /*
++ * error_entry() always returns with a kernel gsbase and
++ * CR3. We must also have a kernel CR3/gsbase before
++ * calling TRACE_IRQS_*. Just unconditionally switch to
++ * the kernel CR3 here.
++ */
++ SWITCH_KERNEL_CR3
+ xorl %ebx, %ebx
+ testb $3, CS+8(%rsp)
+ jz .Lerror_kernelspace
+@@ -1099,7 +1127,6 @@ ENTRY(error_entry)
+ * from user mode due to an IRET fault.
+ */
+ SWAPGS
+- SWITCH_KERNEL_CR3
+
+ .Lerror_entry_from_usermode_after_swapgs:
+ /*
+@@ -1150,7 +1177,7 @@ ENTRY(error_entry)
+ * Switch to kernel gsbase:
+ */
+ SWAPGS
+- SWITCH_KERNEL_CR3
++
+ /*
+ * Pretend that the exception came from user mode: set up pt_regs
+ * as if we faulted immediately after IRET and clear EBX so that
+@@ -1250,7 +1277,10 @@ ENTRY(nmi)
+ */
+
+ SWAPGS_UNSAFE_STACK
+- SWITCH_KERNEL_CR3_NO_STACK
++ /*
++ * percpu variables are mapped with user CR3, so no need
++ * to switch CR3 here.
++ */
+ cld
+ movq %rsp, %rdx
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+@@ -1284,14 +1314,33 @@ ENTRY(nmi)
+
+ movq %rsp, %rdi
+ movq $-1, %rsi
++#ifdef CONFIG_KAISER
++ /* Unconditionally use kernel CR3 for do_nmi() */
++ /* %rax is saved above, so OK to clobber here */
++ movq %cr3, %rax
++ pushq %rax
++#ifdef CONFIG_KAISER_REAL_SWITCH
++ andq $(~0x1000), %rax
++#endif
++ movq %rax, %cr3
++#endif
+ call do_nmi
++ /*
++ * Unconditionally restore CR3. I know we return to
++ * kernel code that needs user CR3, but do we ever return
++ * to "user mode" where we need the kernel CR3?
++ */
++#ifdef CONFIG_KAISER
++ popq %rax
++ mov %rax, %cr3
++#endif
+
+ /*
+ * Return back to user mode. We must *not* do the normal exit
+- * work, because we don't want to enable interrupts. Fortunately,
+- * do_nmi doesn't modify pt_regs.
++ * work, because we don't want to enable interrupts. Do not
++ * switch to user CR3: we might be going back to kernel code
++ * that had a user CR3 set.
+ */
+- SWITCH_USER_CR3
+ SWAPGS
+ jmp restore_c_regs_and_iret
+
+@@ -1487,23 +1536,54 @@ end_repeat_nmi:
+ ALLOC_PT_GPREGS_ON_STACK
+
+ /*
+- * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
+- * as we should not be calling schedule in NMI context.
+- * Even with normal interrupts enabled. An NMI should not be
+- * setting NEED_RESCHED or anything that normal interrupts and
+- * exceptions might do.
++ * Use the same approach as paranoid_entry to handle SWAPGS, but
++ * without CR3 handling since we do that differently in NMIs. No
++ * need to use paranoid_exit as we should not be calling schedule
++ * in NMI context. Even with normal interrupts enabled. An NMI
++ * should not be setting NEED_RESCHED or anything that normal
++ * interrupts and exceptions might do.
+ */
+- call paranoid_entry
++ cld
++ SAVE_C_REGS
++ SAVE_EXTRA_REGS
++ movl $1, %ebx
++ movl $MSR_GS_BASE, %ecx
++ rdmsr
++ testl %edx, %edx
++ js 1f /* negative -> in kernel */
++ SWAPGS
++ xorl %ebx, %ebx
++1:
++#ifdef CONFIG_KAISER
++ /* Unconditionally use kernel CR3 for do_nmi() */
++ /* %rax is saved above, so OK to clobber here */
++ movq %cr3, %rax
++ pushq %rax
++#ifdef CONFIG_KAISER_REAL_SWITCH
++ andq $(~0x1000), %rax
++#endif
++ movq %rax, %cr3
++#endif
+
+ /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
+ movq %rsp, %rdi
++ addq $8, %rdi /* point %rdi at ptregs, fixed up for CR3 */
+ movq $-1, %rsi
+ call do_nmi
++ /*
++ * Unconditionally restore CR3. We might be returning to
++ * kernel code that needs user CR3, like just just before
++ * a sysret.
++ */
++#ifdef CONFIG_KAISER
++ popq %rax
++ mov %rax, %cr3
++#endif
+
+ testl %ebx, %ebx /* swapgs needed? */
+ jnz nmi_restore
+ nmi_swapgs:
+- SWITCH_USER_CR3_NO_STACK
++ /* We fixed up CR3 above, so no need to switch it here */
+ SWAPGS_UNSAFE_STACK
+ nmi_restore:
+ RESTORE_EXTRA_REGS
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -16,13 +16,17 @@
+
+ .macro _SWITCH_TO_KERNEL_CR3 reg
+ movq %cr3, \reg
++#ifdef CONFIG_KAISER_REAL_SWITCH
+ andq $(~0x1000), \reg
++#endif
+ movq \reg, %cr3
+ .endm
+
+ .macro _SWITCH_TO_USER_CR3 reg
+ movq %cr3, \reg
++#ifdef CONFIG_KAISER_REAL_SWITCH
+ orq $(0x1000), \reg
++#endif
+ movq \reg, %cr3
+ .endm
+
+@@ -65,48 +69,53 @@ movq PER_CPU_VAR(unsafe_stack_register_b
+ .endm
+
+ #endif /* CONFIG_KAISER */
++
+ #else /* __ASSEMBLY__ */
+
+
+ #ifdef CONFIG_KAISER
+-// Upon kernel/user mode switch, it may happen that
+-// the address space has to be switched before the registers have been stored.
+-// To change the address space, another register is needed.
+-// A register therefore has to be stored/restored.
+-//
+-DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
++/*
++ * Upon kernel/user mode switch, it may happen that the address
++ * space has to be switched before the registers have been
++ * stored. To change the address space, another register is
++ * needed. A register therefore has to be stored/restored.
++*/
+
+-#endif /* CONFIG_KAISER */
++DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+
+ /**
+- * shadowmem_add_mapping - map a virtual memory part to the shadow mapping
++ * kaiser_add_mapping - map a virtual memory part to the shadow (user) mapping
+ * @addr: the start address of the range
+ * @size: the size of the range
+ * @flags: The mapping flags of the pages
+ *
+- * the mapping is done on a global scope, so no bigger synchronization has to be done.
+- * the pages have to be manually unmapped again when they are not needed any longer.
++ * The mapping is done on a global scope, so no bigger
++ * synchronization has to be done. the pages have to be
++ * manually unmapped again when they are not needed any longer.
+ */
+-extern void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags);
++extern int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags);
+
+
+ /**
+- * shadowmem_remove_mapping - unmap a virtual memory part of the shadow mapping
++ * kaiser_remove_mapping - unmap a virtual memory part of the shadow mapping
+ * @addr: the start address of the range
+ * @size: the size of the range
+ */
+ extern void kaiser_remove_mapping(unsigned long start, unsigned long size);
+
+ /**
+- * shadowmem_initialize_mapping - Initalize the shadow mapping
++ * kaiser_initialize_mapping - Initalize the shadow mapping
+ *
+- * most parts of the shadow mapping can be mapped upon boot time.
+- * only the thread stacks have to be mapped on runtime.
+- * the mapped regions are not unmapped at all.
++ * Most parts of the shadow mapping can be mapped upon boot
++ * time. Only per-process things like the thread stacks
++ * or a new LDT have to be mapped at runtime. These boot-
++ * time mappings are permanent and nevertunmapped.
+ */
+ extern void kaiser_init(void);
+
+-#endif
++#endif /* CONFIG_KAISER */
++
++#endif /* __ASSEMBLY */
+
+
+
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -107,26 +107,58 @@ static inline void native_pud_clear(pud_
+ }
+
+ #ifdef CONFIG_KAISER
+-static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp) {
++static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp)
++{
+ return (pgd_t *)(void*)((unsigned long)(void*)pgdp | (unsigned long)PAGE_SIZE);
+ }
+
+-static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp) {
++static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp)
++{
+ return (pgd_t *)(void*)((unsigned long)(void*)pgdp & ~(unsigned long)PAGE_SIZE);
+ }
++#else
++static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp)
++{
++ BUILD_BUG_ON(1);
++ return NULL;
++}
++static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp)
++{
++ return pgdp;
++}
+ #endif /* CONFIG_KAISER */
+
++/*
++ * Page table pages are page-aligned. The lower half of the top
++ * level is used for userspace and the top half for the kernel.
++ * This returns true for user pages that need to get copied into
++ * both the user and kernel copies of the page tables, and false
++ * for kernel pages that should only be in the kernel copy.
++ */
++static inline bool is_userspace_pgd(void *__ptr)
++{
++ unsigned long ptr = (unsigned long)__ptr;
++
++ return ((ptr % PAGE_SIZE) < (PAGE_SIZE / 2));
++}
++
+ static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
+ {
+ #ifdef CONFIG_KAISER
+- // We know that a pgd is page aligned.
+- // Therefore the lower indices have to be mapped to user space.
+- // These pages are mapped to the shadow mapping.
+- if ((((unsigned long)pgdp) % PAGE_SIZE) < (PAGE_SIZE / 2)) {
++ pteval_t extra_kern_pgd_flags = 0;
++ /* Do we need to also populate the shadow pgd? */
++ if (is_userspace_pgd(pgdp)) {
+ native_get_shadow_pgd(pgdp)->pgd = pgd.pgd;
++ /*
++ * Even if the entry is *mapping* userspace, ensure
++ * that userspace can not use it. This way, if we
++ * get out to userspace running on the kernel CR3,
++ * userspace will crash instead of running.
++ */
++ extra_kern_pgd_flags = _PAGE_NX;
+ }
+-
+- pgdp->pgd = pgd.pgd & ~_PAGE_USER;
++ pgdp->pgd = pgd.pgd;
++ pgdp->pgd |= extra_kern_pgd_flags;
+ #else /* CONFIG_KAISER */
+ *pgdp = pgd;
+ #endif
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -692,7 +692,17 @@ static inline pud_t *pud_offset(pgd_t *p
+
+ static inline int pgd_bad(pgd_t pgd)
+ {
+- return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE;
++ pgdval_t ignore_flags = _PAGE_USER;
++ /*
++ * We set NX on KAISER pgds that map userspace memory so
++ * that userspace can not meaningfully use the kernel
++ * page table by accident; it will fault on the first
++ * instruction it tries to run. See native_set_pgd().
++ */
++ if (IS_ENABLED(CONFIG_KAISER))
++ ignore_flags |= _PAGE_NX;
++
++ return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
+ }
+
+ static inline int pgd_none(pgd_t pgd)
+@@ -902,8 +912,10 @@ static inline void clone_pgd_range(pgd_t
+ {
+ memcpy(dst, src, count * sizeof(pgd_t));
+ #ifdef CONFIG_KAISER
+- // clone the shadow pgd part as well
+- memcpy(native_get_shadow_pgd(dst), native_get_shadow_pgd(src), count * sizeof(pgd_t));
++ /* Clone the shadow pgd part as well */
++ memcpy(native_get_shadow_pgd(dst),
++ native_get_shadow_pgd(src),
++ count * sizeof(pgd_t));
+ #endif
+ }
+
+--- a/arch/x86/include/asm/pgtable_types.h
++++ b/arch/x86/include/asm/pgtable_types.h
+@@ -48,7 +48,7 @@
+ #ifdef CONFIG_KAISER
+ #define _PAGE_GLOBAL (_AT(pteval_t, 0))
+ #else
+-#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
++#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
+ #endif
+ #define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1)
+ #define _PAGE_SOFTW2 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2)
+@@ -124,11 +124,7 @@
+ #define _PAGE_DEVMAP (_AT(pteval_t, 0))
+ #endif
+
+-#ifdef CONFIG_KAISER
+-#define _PAGE_PROTNONE (_AT(pteval_t, 0))
+-#else
+ #define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
+-#endif
+
+ #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
+ _PAGE_ACCESSED | _PAGE_DIRTY)
+--- a/arch/x86/kernel/espfix_64.c
++++ b/arch/x86/kernel/espfix_64.c
+@@ -127,11 +127,14 @@ void __init init_espfix_bsp(void)
+ /* Install the espfix pud into the kernel page directory */
+ pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)];
+ pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page);
+-#ifdef CONFIG_KAISER
+- // add the esp stack pud to the shadow mapping here.
+- // This can be done directly, because the fixup stack has its own pud
+- set_pgd(native_get_shadow_pgd(pgd_p), __pgd(_PAGE_TABLE | __pa((pud_t *)espfix_pud_page)));
+-#endif
++ /*
++ * Just copy the top-level PGD that is mapping the espfix
++ * area to ensure it is mapped into the shadow user page
++ * tables.
++ */
++ if (IS_ENABLED(CONFIG_KAISER))
++ set_pgd(native_get_shadow_pgd(pgd_p),
++ __pgd(_KERNPG_TABLE | __pa((pud_t *)espfix_pud_page)));
+
+ /* Randomize the locations */
+ init_espfix_random();
+--- a/arch/x86/kernel/head_64.S
++++ b/arch/x86/kernel/head_64.S
+@@ -442,11 +442,24 @@ early_idt_ripmsg:
+ GLOBAL(name)
+
+ #ifdef CONFIG_KAISER
++/*
++ * Each PGD needs to be 8k long and 8k aligned. We do not
++ * ever go out to userspace with these, so we do not
++ * strictly *need* the second page, but this allows us to
++ * have a single set_pgd() implementation that does not
++ * need to worry about whether it has 4k or 8k to work
++ * with.
++ *
++ * This ensures PGDs are 8k long:
++ */
++#define KAISER_USER_PGD_FILL 512
++/* This ensures they are 8k-aligned: */
+ #define NEXT_PGD_PAGE(name) \
+ .balign 2 * PAGE_SIZE; \
+ GLOBAL(name)
+ #else
+ #define NEXT_PGD_PAGE(name) NEXT_PAGE(name)
++#define KAISER_USER_PGD_FILL 0
+ #endif
+
+ /* Automate the creation of 1 to 1 mapping pmd entries */
+@@ -461,6 +474,7 @@ GLOBAL(name)
+ NEXT_PGD_PAGE(early_level4_pgt)
+ .fill 511,8,0
+ .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
++ .fill KAISER_USER_PGD_FILL,8,0
+
+ NEXT_PAGE(early_dynamic_pgts)
+ .fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0
+@@ -469,7 +483,8 @@ NEXT_PAGE(early_dynamic_pgts)
+
+ #ifndef CONFIG_XEN
+ NEXT_PGD_PAGE(init_level4_pgt)
+- .fill 2*512,8,0
++ .fill 512,8,0
++ .fill KAISER_USER_PGD_FILL,8,0
+ #else
+ NEXT_PGD_PAGE(init_level4_pgt)
+ .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+@@ -478,6 +493,7 @@ NEXT_PGD_PAGE(init_level4_pgt)
+ .org init_level4_pgt + L4_START_KERNEL*8, 0
+ /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
+ .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
++ .fill KAISER_USER_PGD_FILL,8,0
+
+ NEXT_PAGE(level3_ident_pgt)
+ .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+@@ -488,6 +504,7 @@ NEXT_PAGE(level2_ident_pgt)
+ */
+ PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
+ #endif
++ .fill KAISER_USER_PGD_FILL,8,0
+
+ NEXT_PAGE(level3_kernel_pgt)
+ .fill L3_START_KERNEL,8,0
+--- a/arch/x86/kernel/ldt.c
++++ b/arch/x86/kernel/ldt.c
+@@ -17,6 +17,7 @@
+ #include <linux/uaccess.h>
+
+ #include <asm/ldt.h>
++#include <asm/kaiser.h>
+ #include <asm/desc.h>
+ #include <asm/mmu_context.h>
+ #include <asm/syscalls.h>
+@@ -33,11 +34,21 @@ static void flush_ldt(void *current_mm)
+ set_ldt(pc->ldt->entries, pc->ldt->size);
+ }
+
++static void __free_ldt_struct(struct ldt_struct *ldt)
++{
++ if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE)
++ vfree(ldt->entries);
++ else
++ free_page((unsigned long)ldt->entries);
++ kfree(ldt);
++}
++
+ /* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
+ static struct ldt_struct *alloc_ldt_struct(int size)
+ {
+ struct ldt_struct *new_ldt;
+ int alloc_size;
++ int ret = 0;
+
+ if (size > LDT_ENTRIES)
+ return NULL;
+@@ -65,6 +76,14 @@ static struct ldt_struct *alloc_ldt_stru
+ return NULL;
+ }
+
++ // FIXME: make kaiser_add_mapping() return an error code
++ // when it fails
++ kaiser_add_mapping((unsigned long)new_ldt->entries, alloc_size,
++ __PAGE_KERNEL);
++ if (ret) {
++ __free_ldt_struct(new_ldt);
++ return NULL;
++ }
+ new_ldt->size = size;
+ return new_ldt;
+ }
+@@ -91,12 +110,10 @@ static void free_ldt_struct(struct ldt_s
+ if (likely(!ldt))
+ return;
+
++ kaiser_remove_mapping((unsigned long)ldt->entries,
++ ldt->size * LDT_ENTRY_SIZE);
+ paravirt_free_ldt(ldt->entries, ldt->size);
+- if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE)
+- vfree(ldt->entries);
+- else
+- free_page((unsigned long)ldt->entries);
+- kfree(ldt);
++ __free_ldt_struct(ldt);
+ }
+
+ /*
+--- a/arch/x86/kernel/tracepoint.c
++++ b/arch/x86/kernel/tracepoint.c
+@@ -9,10 +9,12 @@
+ #include <linux/atomic.h>
+
+ atomic_t trace_idt_ctr = ATOMIC_INIT(0);
++__aligned(PAGE_SIZE)
+ struct desc_ptr trace_idt_descr = { NR_VECTORS * 16 - 1,
+ (unsigned long) trace_idt_table };
+
+ /* No need to be aligned, but done to keep all IDTs defined the same way. */
++__aligned(PAGE_SIZE)
+ gate_desc trace_idt_table[NR_VECTORS] __page_aligned_bss;
+
+ static int trace_irq_vector_refcount;
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -1,160 +1,306 @@
+-
+-
++#include <linux/bug.h>
+ #include <linux/kernel.h>
+ #include <linux/errno.h>
+ #include <linux/string.h>
+ #include <linux/types.h>
+ #include <linux/bug.h>
+ #include <linux/init.h>
++#include <linux/interrupt.h>
+ #include <linux/spinlock.h>
+ #include <linux/mm.h>
+-
+ #include <linux/uaccess.h>
++#include <linux/ftrace.h>
++
++#include <asm/kaiser.h>
+ #include <asm/pgtable.h>
+ #include <asm/pgalloc.h>
+ #include <asm/desc.h>
+ #ifdef CONFIG_KAISER
+
+ __visible DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
++/*
++ * At runtime, the only things we map are some things for CPU
++ * hotplug, and stacks for new processes. No two CPUs will ever
++ * be populating the same addresses, so we only need to ensure
++ * that we protect between two CPUs trying to allocate and
++ * populate the same page table page.
++ *
++ * Only take this lock when doing a set_p[4um]d(), but it is not
++ * needed for doing a set_pte(). We assume that only the *owner*
++ * of a given allocation will be doing this for _their_
++ * allocation.
++ *
++ * This ensures that once a system has been running for a while
++ * and there have been stacks all over and these page tables
++ * are fully populated, there will be no further acquisitions of
++ * this lock.
++ */
++static DEFINE_SPINLOCK(shadow_table_allocation_lock);
+
+-/**
+- * Get the real ppn from a address in kernel mapping.
+- * @param address The virtual adrress
+- * @return the physical address
++/*
++ * Returns -1 on error.
+ */
+-static inline unsigned long get_pa_from_mapping (unsigned long address)
++static inline unsigned long get_pa_from_mapping(unsigned long vaddr)
+ {
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+- pgd = pgd_offset_k(address);
+- BUG_ON(pgd_none(*pgd) || pgd_large(*pgd));
++ pgd = pgd_offset_k(vaddr);
++ /*
++ * We made all the kernel PGDs present in kaiser_init().
++ * We expect them to stay that way.
++ */
++ BUG_ON(pgd_none(*pgd));
++ /*
++ * PGDs are either 512GB or 128TB on all x86_64
++ * configurations. We don't handle these.
++ */
++ BUG_ON(pgd_large(*pgd));
++
++ pud = pud_offset(pgd, vaddr);
++ if (pud_none(*pud)) {
++ WARN_ON_ONCE(1);
++ return -1;
++ }
+
+- pud = pud_offset(pgd, address);
+- BUG_ON(pud_none(*pud));
++ if (pud_large(*pud))
++ return (pud_pfn(*pud) << PAGE_SHIFT) | (vaddr & ~PUD_PAGE_MASK);
+
+- if (pud_large(*pud)) {
+- return (pud_pfn(*pud) << PAGE_SHIFT) | (address & ~PUD_PAGE_MASK);
++ pmd = pmd_offset(pud, vaddr);
++ if (pmd_none(*pmd)) {
++ WARN_ON_ONCE(1);
++ return -1;
+ }
+
+- pmd = pmd_offset(pud, address);
+- BUG_ON(pmd_none(*pmd));
++ if (pmd_large(*pmd))
++ return (pmd_pfn(*pmd) << PAGE_SHIFT) | (vaddr & ~PMD_PAGE_MASK);
+
+- if (pmd_large(*pmd)) {
+- return (pmd_pfn(*pmd) << PAGE_SHIFT) | (address & ~PMD_PAGE_MASK);
++ pte = pte_offset_kernel(pmd, vaddr);
++ if (pte_none(*pte)) {
++ WARN_ON_ONCE(1);
++ return -1;
+ }
+
+- pte = pte_offset_kernel(pmd, address);
+- BUG_ON(pte_none(*pte));
+-
+- return (pte_pfn(*pte) << PAGE_SHIFT) | (address & ~PAGE_MASK);
++ return (pte_pfn(*pte) << PAGE_SHIFT) | (vaddr & ~PAGE_MASK);
+ }
+
+-void _kaiser_copy (unsigned long start_addr, unsigned long size,
+- unsigned long flags)
++/*
++ * This is a relatively normal page table walk, except that it
++ * also tries to allocate page tables pages along the way.
++ *
++ * Returns a pointer to a PTE on success, or NULL on failure.
++ */
++static pte_t *kaiser_pagetable_walk(unsigned long address, bool is_atomic)
+ {
+- pgd_t *pgd;
+- pud_t *pud;
+ pmd_t *pmd;
+- pte_t *pte;
+- unsigned long address;
+- unsigned long end_addr = start_addr + size;
+- unsigned long target_address;
++ pud_t *pud;
++ pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(address));
++ gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+
+- for (address = PAGE_ALIGN(start_addr - (PAGE_SIZE - 1));
+- address < PAGE_ALIGN(end_addr); address += PAGE_SIZE) {
+- target_address = get_pa_from_mapping(address);
++ might_sleep();
++ if (is_atomic) {
++ gfp &= ~GFP_KERNEL;
++ gfp |= __GFP_HIGH | __GFP_ATOMIC;
++ }
+
+- pgd = native_get_shadow_pgd(pgd_offset_k(address));
++ if (pgd_none(*pgd)) {
++ WARN_ONCE(1, "All shadow pgds should have been populated");
++ return NULL;
++ }
++ BUILD_BUG_ON(pgd_large(*pgd) != 0);
+
+- BUG_ON(pgd_none(*pgd) && "All shadow pgds should be mapped at this time\n");
+- BUG_ON(pgd_large(*pgd));
++ pud = pud_offset(pgd, address);
++ /* The shadow page tables do not use large mappings: */
++ if (pud_large(*pud)) {
++ WARN_ON(1);
++ return NULL;
++ }
++ if (pud_none(*pud)) {
++ unsigned long new_pmd_page = __get_free_page(gfp);
++ if (!new_pmd_page)
++ return NULL;
++ spin_lock(&shadow_table_allocation_lock);
++ if (pud_none(*pud))
++ set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
++ else
++ free_page(new_pmd_page);
++ spin_unlock(&shadow_table_allocation_lock);
++ }
+
+- pud = pud_offset(pgd, address);
+- if (pud_none(*pud)) {
+- set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd_alloc_one(0, address))));
+- }
+- BUG_ON(pud_large(*pud));
++ pmd = pmd_offset(pud, address);
++ /* The shadow page tables do not use large mappings: */
++ if (pmd_large(*pmd)) {
++ WARN_ON(1);
++ return NULL;
++ }
++ if (pmd_none(*pmd)) {
++ unsigned long new_pte_page = __get_free_page(gfp);
++ if (!new_pte_page)
++ return NULL;
++ spin_lock(&shadow_table_allocation_lock);
++ if (pmd_none(*pmd))
++ set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
++ else
++ free_page(new_pte_page);
++ spin_unlock(&shadow_table_allocation_lock);
++ }
+
+- pmd = pmd_offset(pud, address);
+- if (pmd_none(*pmd)) {
+- set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte_alloc_one_kernel(0, address))));
+- }
+- BUG_ON(pmd_large(*pmd));
++ return pte_offset_kernel(pmd, address);
++}
+
+- pte = pte_offset_kernel(pmd, address);
++int kaiser_add_user_map(const void *__start_addr, unsigned long size,
++ unsigned long flags)
++{
++ int ret = 0;
++ pte_t *pte;
++ unsigned long start_addr = (unsigned long )__start_addr;
++ unsigned long address = start_addr & PAGE_MASK;
++ unsigned long end_addr = PAGE_ALIGN(start_addr + size);
++ unsigned long target_address;
++
++ for (;address < end_addr; address += PAGE_SIZE) {
++ target_address = get_pa_from_mapping(address);
++ if (target_address == -1) {
++ ret = -EIO;
++ break;
++ }
++ pte = kaiser_pagetable_walk(address, false);
+ if (pte_none(*pte)) {
+ set_pte(pte, __pte(flags | target_address));
+ } else {
+- BUG_ON(__pa(pte_page(*pte)) != target_address);
++ pte_t tmp;
++ set_pte(&tmp, __pte(flags | target_address));
++ WARN_ON_ONCE(!pte_same(*pte, tmp));
+ }
+ }
++ return ret;
+ }
+
+-// at first, add a pmd for every pgd entry in the shadowmem-kernel-part of the kernel mapping
+-static inline void __init _kaiser_init(void)
++static int kaiser_add_user_map_ptrs(const void *start, const void *end, unsigned long flags)
++{
++ unsigned long size = end - start;
++
++ return kaiser_add_user_map(start, size, flags);
++}
++
++/*
++ * Ensure that the top level of the (shadow) page tables are
++ * entirely populated. This ensures that all processes that get
++ * forked have the same entries. This way, we do not have to
++ * ever go set up new entries in older processes.
++ *
++ * Note: we never free these, so there are no updates to them
++ * after this.
++ */
++static void __init kaiser_init_all_pgds(void)
+ {
+ pgd_t *pgd;
+ int i = 0;
+
+ pgd = native_get_shadow_pgd(pgd_offset_k((unsigned long )0));
+ for (i = PTRS_PER_PGD / 2; i < PTRS_PER_PGD; i++) {
+- set_pgd(pgd + i, __pgd(_PAGE_TABLE |__pa(pud_alloc_one(0, 0))));
++ pgd_t new_pgd;
++ pud_t *pud = pud_alloc_one(&init_mm, PAGE_OFFSET + i * PGDIR_SIZE);
++ if (!pud) {
++ WARN_ON(1);
++ break;
++ }
++ new_pgd = __pgd(_KERNPG_TABLE |__pa(pud));
++ /*
++ * Make sure not to stomp on some other pgd entry.
++ */
++ if (!pgd_none(pgd[i])) {
++ WARN_ON(1);
++ continue;
++ }
++ set_pgd(pgd + i, new_pgd);
+ }
+ }
+
++#define kaiser_add_user_map_early(start, size, flags) do { \
++ int __ret = kaiser_add_user_map(start, size, flags); \
++ WARN_ON(__ret); \
++} while (0)
++
++#define kaiser_add_user_map_ptrs_early(start, end, flags) do { \
++ int __ret = kaiser_add_user_map_ptrs(start, end, flags); \
++ WARN_ON(__ret); \
++} while (0)
++
+ extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
+-spinlock_t shadow_table_lock;
++/*
++ * If anything in here fails, we will likely die on one of the
++ * first kernel->user transitions and init will die. But, we
++ * will have most of the kernel up by then and should be able to
++ * get a clean warning out of it. If we BUG_ON() here, we run
++ * the risk of being before we have good console output.
++ */
+ void __init kaiser_init(void)
+ {
+ int cpu;
+- spin_lock_init(&shadow_table_lock);
+-
+- spin_lock(&shadow_table_lock);
+
+- _kaiser_init();
++ kaiser_init_all_pgds();
+
+ for_each_possible_cpu(cpu) {
+- // map the per cpu user variables
+- _kaiser_copy(
+- (unsigned long) (__per_cpu_user_mapped_start + per_cpu_offset(cpu)),
+- (unsigned long) __per_cpu_user_mapped_end - (unsigned long) __per_cpu_user_mapped_start,
+- __PAGE_KERNEL);
+- }
+-
+- // map the entry/exit text section, which is responsible to switch between user- and kernel mode
+- _kaiser_copy(
+- (unsigned long) __entry_text_start,
+- (unsigned long) __entry_text_end - (unsigned long) __entry_text_start,
+- __PAGE_KERNEL_RX);
+-
+- // the fixed map address of the idt_table
+- _kaiser_copy(
+- (unsigned long) idt_descr.address,
+- sizeof(gate_desc) * NR_VECTORS,
+- __PAGE_KERNEL_RO);
++ void *percpu_vaddr = __per_cpu_user_mapped_start +
++ per_cpu_offset(cpu);
++ unsigned long percpu_sz = __per_cpu_user_mapped_end -
++ __per_cpu_user_mapped_start;
++ kaiser_add_user_map_early(percpu_vaddr, percpu_sz,
++ __PAGE_KERNEL);
++ }
+
+- spin_unlock(&shadow_table_lock);
++ /*
++ * Map the entry/exit text section, which is needed at
++ * switches from user to and from kernel.
++ */
++ kaiser_add_user_map_ptrs_early(__entry_text_start, __entry_text_end,
++ __PAGE_KERNEL_RX);
++
++#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
++ kaiser_add_user_map_ptrs_early(__irqentry_text_start,
++ __irqentry_text_end,
++ __PAGE_KERNEL_RX);
++#endif
++ kaiser_add_user_map_early((void *)idt_descr.address,
++ sizeof(gate_desc) * NR_VECTORS,
++ __PAGE_KERNEL_RO);
++#ifdef CONFIG_TRACING
++ kaiser_add_user_map_early(&trace_idt_descr,
++ sizeof(trace_idt_descr),
++ __PAGE_KERNEL);
++ kaiser_add_user_map_early(&trace_idt_table,
++ sizeof(gate_desc) * NR_VECTORS,
++ __PAGE_KERNEL);
++#endif
++ kaiser_add_user_map_early(&debug_idt_descr, sizeof(debug_idt_descr),
++ __PAGE_KERNEL);
++ kaiser_add_user_map_early(&debug_idt_table,
++ sizeof(gate_desc) * NR_VECTORS,
++ __PAGE_KERNEL);
+ }
+
++extern void unmap_pud_range_nofree(pgd_t *pgd, unsigned long start, unsigned long end);
+ // add a mapping to the shadow-mapping, and synchronize the mappings
+-void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags)
++int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags)
+ {
+- spin_lock(&shadow_table_lock);
+- _kaiser_copy(addr, size, flags);
+- spin_unlock(&shadow_table_lock);
++ return kaiser_add_user_map((const void *)addr, size, flags);
+ }
+
+-extern void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end);
+ void kaiser_remove_mapping(unsigned long start, unsigned long size)
+ {
+- pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(start));
+- spin_lock(&shadow_table_lock);
+- do {
+- unmap_pud_range(pgd, start, start + size);
+- } while (pgd++ != native_get_shadow_pgd(pgd_offset_k(start + size)));
+- spin_unlock(&shadow_table_lock);
++ unsigned long end = start + size;
++ unsigned long addr;
++
++ for (addr = start; addr < end; addr += PGDIR_SIZE) {
++ pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(addr));
++ /*
++ * unmap_p4d_range() handles > P4D_SIZE unmaps,
++ * so no need to trim 'end'.
++ */
++ unmap_pud_range_nofree(pgd, addr, end);
++ }
+ }
+ #endif /* CONFIG_KAISER */
+--- a/arch/x86/mm/pageattr.c
++++ b/arch/x86/mm/pageattr.c
+@@ -52,6 +52,7 @@ static DEFINE_SPINLOCK(cpa_lock);
+ #define CPA_FLUSHTLB 1
+ #define CPA_ARRAY 2
+ #define CPA_PAGES_ARRAY 4
++#define CPA_FREE_PAGETABLES 8
+
+ #ifdef CONFIG_PROC_FS
+ static unsigned long direct_pages_count[PG_LEVEL_NUM];
+@@ -723,10 +724,13 @@ static int split_large_page(struct cpa_d
+ return 0;
+ }
+
+-static bool try_to_free_pte_page(pte_t *pte)
++static bool try_to_free_pte_page(struct cpa_data *cpa, pte_t *pte)
+ {
+ int i;
+
++ if (!(cpa->flags & CPA_FREE_PAGETABLES))
++ return false;
++
+ for (i = 0; i < PTRS_PER_PTE; i++)
+ if (!pte_none(pte[i]))
+ return false;
+@@ -735,10 +739,13 @@ static bool try_to_free_pte_page(pte_t *
+ return true;
+ }
+
+-static bool try_to_free_pmd_page(pmd_t *pmd)
++static bool try_to_free_pmd_page(struct cpa_data *cpa, pmd_t *pmd)
+ {
+ int i;
+
++ if (!(cpa->flags & CPA_FREE_PAGETABLES))
++ return false;
++
+ for (i = 0; i < PTRS_PER_PMD; i++)
+ if (!pmd_none(pmd[i]))
+ return false;
+@@ -759,7 +766,9 @@ static bool try_to_free_pud_page(pud_t *
+ return true;
+ }
+
+-static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end)
++static bool unmap_pte_range(struct cpa_data *cpa, pmd_t *pmd,
++ unsigned long start,
++ unsigned long end)
+ {
+ pte_t *pte = pte_offset_kernel(pmd, start);
+
+@@ -770,22 +779,23 @@ static bool unmap_pte_range(pmd_t *pmd,
+ pte++;
+ }
+
+- if (try_to_free_pte_page((pte_t *)pmd_page_vaddr(*pmd))) {
++ if (try_to_free_pte_page(cpa, (pte_t *)pmd_page_vaddr(*pmd))) {
+ pmd_clear(pmd);
+ return true;
+ }
+ return false;
+ }
+
+-static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd,
++static void __unmap_pmd_range(struct cpa_data *cpa, pud_t *pud, pmd_t *pmd,
+ unsigned long start, unsigned long end)
+ {
+- if (unmap_pte_range(pmd, start, end))
+- if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
++ if (unmap_pte_range(cpa, pmd, start, end))
++ if (try_to_free_pmd_page(cpa, (pmd_t *)pud_page_vaddr(*pud)))
+ pud_clear(pud);
+ }
+
+-static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
++static void unmap_pmd_range(struct cpa_data *cpa, pud_t *pud,
++ unsigned long start, unsigned long end)
+ {
+ pmd_t *pmd = pmd_offset(pud, start);
+
+@@ -796,7 +806,7 @@ static void unmap_pmd_range(pud_t *pud,
+ unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;
+ unsigned long pre_end = min_t(unsigned long, end, next_page);
+
+- __unmap_pmd_range(pud, pmd, start, pre_end);
++ __unmap_pmd_range(cpa, pud, pmd, start, pre_end);
+
+ start = pre_end;
+ pmd++;
+@@ -809,7 +819,8 @@ static void unmap_pmd_range(pud_t *pud,
+ if (pmd_large(*pmd))
+ pmd_clear(pmd);
+ else
+- __unmap_pmd_range(pud, pmd, start, start + PMD_SIZE);
++ __unmap_pmd_range(cpa, pud, pmd,
++ start, start + PMD_SIZE);
+
+ start += PMD_SIZE;
+ pmd++;
+@@ -819,17 +830,19 @@ static void unmap_pmd_range(pud_t *pud,
+ * 4K leftovers?
+ */
+ if (start < end)
+- return __unmap_pmd_range(pud, pmd, start, end);
++ return __unmap_pmd_range(cpa, pud, pmd, start, end);
+
+ /*
+ * Try again to free the PMD page if haven't succeeded above.
+ */
+ if (!pud_none(*pud))
+- if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
++ if (try_to_free_pmd_page(cpa, (pmd_t *)pud_page_vaddr(*pud)))
+ pud_clear(pud);
+ }
+
+-void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
++static void __unmap_pud_range(struct cpa_data *cpa, pgd_t *pgd,
++ unsigned long start,
++ unsigned long end)
+ {
+ pud_t *pud = pud_offset(pgd, start);
+
+@@ -840,7 +853,7 @@ void unmap_pud_range(pgd_t *pgd, unsigne
+ unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;
+ unsigned long pre_end = min_t(unsigned long, end, next_page);
+
+- unmap_pmd_range(pud, start, pre_end);
++ unmap_pmd_range(cpa, pud, start, pre_end);
+
+ start = pre_end;
+ pud++;
+@@ -854,7 +867,7 @@ void unmap_pud_range(pgd_t *pgd, unsigne
+ if (pud_large(*pud))
+ pud_clear(pud);
+ else
+- unmap_pmd_range(pud, start, start + PUD_SIZE);
++ unmap_pmd_range(cpa, pud, start, start + PUD_SIZE);
+
+ start += PUD_SIZE;
+ pud++;
+@@ -864,7 +877,7 @@ void unmap_pud_range(pgd_t *pgd, unsigne
+ * 2M leftovers?
+ */
+ if (start < end)
+- unmap_pmd_range(pud, start, end);
++ unmap_pmd_range(cpa, pud, start, end);
+
+ /*
+ * No need to try to free the PUD page because we'll free it in
+@@ -872,6 +885,24 @@ void unmap_pud_range(pgd_t *pgd, unsigne
+ */
+ }
+
++static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
++{
++ struct cpa_data cpa = {
++ .flags = CPA_FREE_PAGETABLES,
++ };
++
++ __unmap_pud_range(&cpa, pgd, start, end);
++}
++
++void unmap_pud_range_nofree(pgd_t *pgd, unsigned long start, unsigned long end)
++{
++ struct cpa_data cpa = {
++ .flags = 0,
++ };
++
++ __unmap_pud_range(&cpa, pgd, start, end);
++}
++
+ static void unmap_pgd_range(pgd_t *root, unsigned long addr, unsigned long end)
+ {
+ pgd_t *pgd_entry = root + pgd_index(addr);
+--- a/arch/x86/mm/pgtable.c
++++ b/arch/x86/mm/pgtable.c
+@@ -340,40 +340,26 @@ static inline void _pgd_free(pgd_t *pgd)
+ kmem_cache_free(pgd_cache, pgd);
+ }
+ #else
+-static inline pgd_t *_pgd_alloc(void)
+-{
+-#ifdef CONFIG_KAISER
+- // Instead of one PML4, we aquire two PML4s and, thus, an 8kb-aligned memory
+- // block. Therefore, we have to allocate at least 3 pages. However, the
+- // __get_free_pages returns us 4 pages. Hence, we store the base pointer at
+- // the beginning of the page of our 8kb-aligned memory block in order to
+- // correctly free it afterwars.
+
+- unsigned long pages = __get_free_pages(PGALLOC_GFP, get_order(4*PAGE_SIZE));
+-
+- if(native_get_normal_pgd((pgd_t*) pages) == (pgd_t*) pages)
+- {
+- *((unsigned long*)(pages + 2 * PAGE_SIZE)) = pages;
+- return (pgd_t *) pages;
+- }
+- else
+- {
+- *((unsigned long*)(pages + 3 * PAGE_SIZE)) = pages;
+- return (pgd_t *) (pages + PAGE_SIZE);
+- }
++#ifdef CONFIG_KAISER
++/*
++ * Instead of one pmd, we aquire two pmds. Being order-1, it is
++ * both 8k in size and 8k-aligned. That lets us just flip bit 12
++ * in a pointer to swap between the two 4k halves.
++ */
++#define PGD_ALLOCATION_ORDER 1
+ #else
+- return (pgd_t *)__get_free_page(PGALLOC_GFP);
++#define PGD_ALLOCATION_ORDER 0
+ #endif
++
++static inline pgd_t *_pgd_alloc(void)
++{
++ return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
+ }
+
+ static inline void _pgd_free(pgd_t *pgd)
+ {
+-#ifdef CONFIG_KAISER
+- unsigned long pages = *((unsigned long*) ((char*) pgd + 2 * PAGE_SIZE));
+- free_pages(pages, get_order(4*PAGE_SIZE));
+-#else
+- free_page((unsigned long)pgd);
+-#endif
++ free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
+ }
+ #endif /* CONFIG_X86_PAE */
+
+--- /dev/null
++++ b/include/linux/kaiser.h
+@@ -0,0 +1,26 @@
++#ifndef _INCLUDE_KAISER_H
++#define _INCLUDE_KAISER_H
++
++#ifdef CONFIG_KAISER
++#include <asm/kaiser.h>
++#else
++
++/*
++ * These stubs are used whenever CONFIG_KAISER is off, which
++ * includes architectures that support KAISER, but have it
++ * disabled.
++ */
++
++static inline void kaiser_init(void)
++{
++}
++static inline void kaiser_remove_mapping(unsigned long start, unsigned long size)
++{
++}
++static inline int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags)
++{
++ return 0;
++}
++
++#endif /* !CONFIG_KAISER */
++#endif /* _INCLUDE_KAISER_H */
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -58,6 +58,7 @@
+ #include <linux/tsacct_kern.h>
+ #include <linux/cn_proc.h>
+ #include <linux/freezer.h>
++#include <linux/kaiser.h>
+ #include <linux/delayacct.h>
+ #include <linux/taskstats_kern.h>
+ #include <linux/random.h>
+@@ -335,7 +336,6 @@ void set_task_stack_end_magic(struct tas
+ *stackend = STACK_END_MAGIC; /* for overflow detection */
+ }
+
+-extern void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags);
+ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
+ {
+ struct task_struct *tsk;
+@@ -357,9 +357,10 @@ static struct task_struct *dup_task_stru
+ goto free_ti;
+
+ tsk->stack = ti;
+-#ifdef CONFIG_KAISER
+- kaiser_add_mapping((unsigned long)tsk->stack, THREAD_SIZE, __PAGE_KERNEL);
+-#endif
++
++ err= kaiser_add_mapping((unsigned long)tsk->stack, THREAD_SIZE, __PAGE_KERNEL);
++ if (err)
++ goto free_ti;
+ #ifdef CONFIG_SECCOMP
+ /*
+ * We must handle setting up seccomp filters once we're under
+--- a/security/Kconfig
++++ b/security/Kconfig
+@@ -32,12 +32,17 @@ config SECURITY
+ If you are unsure how to answer this question, answer N.
+ config KAISER
+ bool "Remove the kernel mapping in user mode"
++ default y
+ depends on X86_64
+ depends on !PARAVIRT
+ help
+ This enforces a strict kernel and user space isolation in order to close
+ hardware side channels on kernel address information.
+
++config KAISER_REAL_SWITCH
++ bool "KAISER: actually switch page tables"
++ default y
++
+ config SECURITYFS
+ bool "Enable the securityfs filesystem"
+ help
diff --git a/patches.suse/4.4-24-kaiser-do-not-set-_PAGE_NX-on-pgd_none.patch b/patches.suse/4.4-24-kaiser-do-not-set-_PAGE_NX-on-pgd_none.patch
new file mode 100644
index 0000000000..2752b3d041
--- /dev/null
+++ b/patches.suse/4.4-24-kaiser-do-not-set-_PAGE_NX-on-pgd_none.patch
@@ -0,0 +1,188 @@
+From e865293e8ff0d0dd33b8772446792712dc52fa4e Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Tue, 5 Sep 2017 12:05:01 -0700
+Subject: [PATCH 24/43] kaiser: do not set _PAGE_NX on pgd_none
+References: bsc#1068032 CVE-2017-5754
+Patch-mainline: Not yet, under development
+
+native_pgd_clear() uses native_set_pgd(), so native_set_pgd() must
+avoid setting the _PAGE_NX bit on an otherwise pgd_none() entry:
+usually that just generated a warning on exit, but sometimes
+more mysterious and damaging failures (our production machines
+could not complete booting).
+
+The original fix to this just avoided adding _PAGE_NX to
+an empty entry; but eventually more problems surfaced with kexec,
+and EFI mapping expected to be a problem too. So now instead
+change native_set_pgd() to update shadow only if _PAGE_USER:
+
+A few places (kernel/machine_kexec_64.c, platform/efi/efi_64.c for sure)
+use set_pgd() to set up a temporary internal virtual address space, with
+physical pages remapped at what Kaiser regards as userspace addresses:
+Kaiser then assumes a shadow pgd follows, which it will try to corrupt.
+
+This appears to be responsible for the recent kexec and kdump failures;
+though it's unclear how those did not manifest as a problem before.
+Ah, the shadow pgd will only be assumed to "follow" if the requested
+pgd is on an even-numbered page: so I suppose it was going wrong 50%
+of the time all along.
+
+What we need is a flag to set_pgd(), to tell it we're dealing with
+userspace. Er, isn't that what the pgd's _PAGE_USER bit is saying?
+Add a test for that. But we cannot do the same for pgd_clear()
+(which may be called to clear corrupted entries - set aside the
+question of "corrupt in which pgd?" until later), so there just
+rely on pgd_clear() not being called in the problematic cases -
+with a WARN_ON_ONCE() which should fire half the time if it is.
+
+But this is getting too big for an inline function: move it into
+arch/x86/mm/kaiser.c (which then demands a boot/compressed mod);
+and de-void and de-space native_get_shadow/normal_pgd() while here.
+
+(cherry picked from Change-Id: I17fc68a717ca372137d0225a257b61f8fcb38a87)
+Conflicts:
+arch/x86/mm/kaslr.c (not in this tree)
+
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+---
+ arch/x86/boot/compressed/misc.h | 1
+ arch/x86/include/asm/pgtable_64.h | 51 +++++++++-----------------------------
+ arch/x86/mm/kaiser.c | 42 +++++++++++++++++++++++++++++++
+ 3 files changed, 56 insertions(+), 38 deletions(-)
+
+--- a/arch/x86/boot/compressed/misc.h
++++ b/arch/x86/boot/compressed/misc.h
+@@ -9,6 +9,7 @@
+ */
+ #undef CONFIG_PARAVIRT
+ #undef CONFIG_PARAVIRT_SPINLOCKS
++#undef CONFIG_KAISER
+ #undef CONFIG_KASAN
+
+ #include <linux/linkage.h>
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -107,61 +107,36 @@ static inline void native_pud_clear(pud_
+ }
+
+ #ifdef CONFIG_KAISER
+-static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp)
++extern pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd);
++
++static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp)
+ {
+- return (pgd_t *)(void*)((unsigned long)(void*)pgdp | (unsigned long)PAGE_SIZE);
++ return (pgd_t *)((unsigned long)pgdp | (unsigned long)PAGE_SIZE);
+ }
+
+-static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp)
++static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp)
+ {
+- return (pgd_t *)(void*)((unsigned long)(void*)pgdp & ~(unsigned long)PAGE_SIZE);
++ return (pgd_t *)((unsigned long)pgdp & ~(unsigned long)PAGE_SIZE);
+ }
+ #else
+-static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp)
++static inline pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd)
++{
++ return pgd;
++}
++static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp)
+ {
+ BUILD_BUG_ON(1);
+ return NULL;
+ }
+-static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp)
++static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp)
+ {
+ return pgdp;
+ }
+ #endif /* CONFIG_KAISER */
+
+-/*
+- * Page table pages are page-aligned. The lower half of the top
+- * level is used for userspace and the top half for the kernel.
+- * This returns true for user pages that need to get copied into
+- * both the user and kernel copies of the page tables, and false
+- * for kernel pages that should only be in the kernel copy.
+- */
+-static inline bool is_userspace_pgd(void *__ptr)
+-{
+- unsigned long ptr = (unsigned long)__ptr;
+-
+- return ((ptr % PAGE_SIZE) < (PAGE_SIZE / 2));
+-}
+-
+ static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
+ {
+-#ifdef CONFIG_KAISER
+- pteval_t extra_kern_pgd_flags = 0;
+- /* Do we need to also populate the shadow pgd? */
+- if (is_userspace_pgd(pgdp)) {
+- native_get_shadow_pgd(pgdp)->pgd = pgd.pgd;
+- /*
+- * Even if the entry is *mapping* userspace, ensure
+- * that userspace can not use it. This way, if we
+- * get out to userspace running on the kernel CR3,
+- * userspace will crash instead of running.
+- */
+- extra_kern_pgd_flags = _PAGE_NX;
+- }
+- pgdp->pgd = pgd.pgd;
+- pgdp->pgd |= extra_kern_pgd_flags;
+-#else /* CONFIG_KAISER */
+- *pgdp = pgd;
+-#endif
++ *pgdp = kaiser_set_shadow_pgd(pgdp, pgd);
+ }
+
+ static inline void native_pgd_clear(pgd_t *pgd)
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -303,4 +303,46 @@ void kaiser_remove_mapping(unsigned long
+ unmap_pud_range_nofree(pgd, addr, end);
+ }
+ }
++
++/*
++ * Page table pages are page-aligned. The lower half of the top
++ * level is used for userspace and the top half for the kernel.
++ * This returns true for user pages that need to get copied into
++ * both the user and kernel copies of the page tables, and false
++ * for kernel pages that should only be in the kernel copy.
++ */
++static inline bool is_userspace_pgd(pgd_t *pgdp)
++{
++ return ((unsigned long)pgdp % PAGE_SIZE) < (PAGE_SIZE / 2);
++}
++
++pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd)
++{
++ /*
++ * Do we need to also populate the shadow pgd? Check _PAGE_USER to
++ * skip cases like kexec and EFI which make temporary low mappings.
++ */
++ if (pgd.pgd & _PAGE_USER) {
++ if (is_userspace_pgd(pgdp)) {
++ native_get_shadow_pgd(pgdp)->pgd = pgd.pgd;
++ /*
++ * Even if the entry is *mapping* userspace, ensure
++ * that userspace can not use it. This way, if we
++ * get out to userspace running on the kernel CR3,
++ * userspace will crash instead of running.
++ */
++ pgd.pgd |= _PAGE_NX;
++ }
++ } else if (!pgd.pgd) {
++ /*
++ * pgd_clear() cannot check _PAGE_USER, and is even used to
++ * clear corrupted pgd entries: so just rely on cases like
++ * kexec and EFI never to be using pgd_clear().
++ */
++ if (!WARN_ON_ONCE((unsigned long)pgdp & PAGE_SIZE) &&
++ is_userspace_pgd(pgdp))
++ native_get_shadow_pgd(pgdp)->pgd = pgd.pgd;
++ }
++ return pgd;
++}
+ #endif /* CONFIG_KAISER */
diff --git a/patches.suse/4.4-25-kaiser-stack-map-PAGE_SIZE-at-THREAD_SIZE-PAGE_SIZE.patch b/patches.suse/4.4-25-kaiser-stack-map-PAGE_SIZE-at-THREAD_SIZE-PAGE_SIZE.patch
new file mode 100644
index 0000000000..db309b36f8
--- /dev/null
+++ b/patches.suse/4.4-25-kaiser-stack-map-PAGE_SIZE-at-THREAD_SIZE-PAGE_SIZE.patch
@@ -0,0 +1,139 @@
+From c962a8354e6847474346d9a171a69d7e093dfb08 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Sun, 3 Sep 2017 18:57:03 -0700
+Subject: [PATCH 25/43] kaiser: stack map PAGE_SIZE at THREAD_SIZE-PAGE_SIZE
+References: bsc#1068032 CVE-2017-5754
+Patch-mainline: Not yet, under development
+
+Kaiser only needs to map one page of the stack; and
+kernel/fork.c did not build on powerpc (no __PAGE_KERNEL).
+It's all cleaner if linux/kaiser.h provides kaiser_map_thread_stack()
+and kaiser_unmap_thread_stack() wrappers around asm/kaiser.h's
+kaiser_add_mapping() and kaiser_remove_mapping(). And use
+linux/kaiser.h in init/main.c to avoid the #ifdefs there.
+
+(cherry picked from Change-Id: I700edd7d3b1c0b8c13367f61a483f92bc34e8654)
+
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+---
+ include/linux/kaiser.h | 40 +++++++++++++++++++++++++++++++++-------
+ init/main.c | 6 +-----
+ kernel/fork.c | 7 ++-----
+ 3 files changed, 36 insertions(+), 17 deletions(-)
+
+--- a/include/linux/kaiser.h
++++ b/include/linux/kaiser.h
+@@ -1,26 +1,52 @@
+-#ifndef _INCLUDE_KAISER_H
+-#define _INCLUDE_KAISER_H
++#ifndef _LINUX_KAISER_H
++#define _LINUX_KAISER_H
+
+ #ifdef CONFIG_KAISER
+ #include <asm/kaiser.h>
++
++static inline int kaiser_map_thread_stack(void *stack)
++{
++ /*
++ * Map that page of kernel stack on which we enter from user context.
++ */
++ return kaiser_add_mapping((unsigned long)stack +
++ THREAD_SIZE - PAGE_SIZE, PAGE_SIZE, __PAGE_KERNEL);
++}
++
++static inline void kaiser_unmap_thread_stack(void *stack)
++{
++ /*
++ * Note: may be called even when kaiser_map_thread_stack() failed.
++ */
++ kaiser_remove_mapping((unsigned long)stack +
++ THREAD_SIZE - PAGE_SIZE, PAGE_SIZE);
++}
+ #else
+
+ /*
+ * These stubs are used whenever CONFIG_KAISER is off, which
+- * includes architectures that support KAISER, but have it
+- * disabled.
++ * includes architectures that support KAISER, but have it disabled.
+ */
+
+ static inline void kaiser_init(void)
+ {
+ }
+-static inline void kaiser_remove_mapping(unsigned long start, unsigned long size)
++static inline int kaiser_add_mapping(unsigned long addr,
++ unsigned long size, unsigned long flags)
++{
++ return 0;
++}
++static inline void kaiser_remove_mapping(unsigned long start,
++ unsigned long size)
+ {
+ }
+-static inline int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags)
++static inline int kaiser_map_thread_stack(void *stack)
+ {
+ return 0;
+ }
++static inline void kaiser_unmap_thread_stack(void *stack)
++{
++}
+
+ #endif /* !CONFIG_KAISER */
+-#endif /* _INCLUDE_KAISER_H */
++#endif /* _LINUX_KAISER_H */
+--- a/init/main.c
++++ b/init/main.c
+@@ -82,15 +82,13 @@
+ #include <linux/integrity.h>
+ #include <linux/proc_ns.h>
+ #include <linux/io.h>
++#include <linux/kaiser.h>
+
+ #include <asm/io.h>
+ #include <asm/bugs.h>
+ #include <asm/setup.h>
+ #include <asm/sections.h>
+ #include <asm/cacheflush.h>
+-#ifdef CONFIG_KAISER
+-#include <asm/kaiser.h>
+-#endif
+
+ static int kernel_init(void *);
+
+@@ -496,9 +494,7 @@ static void __init mm_init(void)
+ pgtable_init();
+ vmalloc_init();
+ ioremap_huge_init();
+-#ifdef CONFIG_KAISER
+ kaiser_init();
+-#endif
+ }
+
+ asmlinkage __visible void __init start_kernel(void)
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -168,12 +168,9 @@ static struct thread_info *alloc_thread_
+ return page ? page_address(page) : NULL;
+ }
+
+-extern void kaiser_remove_mapping(unsigned long start_addr, unsigned long size);
+ static inline void free_thread_info(struct thread_info *ti)
+ {
+-#ifdef CONFIG_KAISER
+- kaiser_remove_mapping((unsigned long)ti, THREAD_SIZE);
+-#endif
++ kaiser_unmap_thread_stack(ti);
+ free_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
+ }
+ # else
+@@ -358,7 +355,7 @@ static struct task_struct *dup_task_stru
+
+ tsk->stack = ti;
+
+- err= kaiser_add_mapping((unsigned long)tsk->stack, THREAD_SIZE, __PAGE_KERNEL);
++ err = kaiser_map_thread_stack(tsk->stack);
+ if (err)
+ goto free_ti;
+ #ifdef CONFIG_SECCOMP
diff --git a/patches.suse/4.4-26-kaiser-fix-build-and-FIXME-in-alloc_ldt_struct.patch b/patches.suse/4.4-26-kaiser-fix-build-and-FIXME-in-alloc_ldt_struct.patch
new file mode 100644
index 0000000000..c9cd6f148a
--- /dev/null
+++ b/patches.suse/4.4-26-kaiser-fix-build-and-FIXME-in-alloc_ldt_struct.patch
@@ -0,0 +1,58 @@
+From 7e811c7363a755345b357b8b7ba31208b7eeebfd Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Sun, 3 Sep 2017 17:09:44 -0700
+Subject: [PATCH 26/43] kaiser: fix build and FIXME in alloc_ldt_struct()
+References: bsc#1068032 CVE-2017-5754
+Patch-mainline: Not yet, under development
+
+Include linux/kaiser.h instead of asm/kaiser.h to build ldt.c without
+CONFIG_KAISER. kaiser_add_mapping() does already return an error code,
+so fix the FIXME.
+
+(cherry picked from Change-Id: I954c53636d4a2fb5755eb8366f01b2f5f5e1e75d)
+
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+---
+ arch/x86/kernel/ldt.c | 10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
+index 890730f317f1..b7e041dd46fa 100644
+--- a/arch/x86/kernel/ldt.c
++++ b/arch/x86/kernel/ldt.c
+@@ -15,9 +15,9 @@
+ #include <linux/slab.h>
+ #include <linux/vmalloc.h>
+ #include <linux/uaccess.h>
++#include <linux/kaiser.h>
+
+ #include <asm/ldt.h>
+-#include <asm/kaiser.h>
+ #include <asm/desc.h>
+ #include <asm/mmu_context.h>
+ #include <asm/syscalls.h>
+@@ -48,7 +48,7 @@ static struct ldt_struct *alloc_ldt_struct(int size)
+ {
+ struct ldt_struct *new_ldt;
+ int alloc_size;
+- int ret = 0;
++ int ret;
+
+ if (size > LDT_ENTRIES)
+ return NULL;
+@@ -76,10 +76,8 @@ static struct ldt_struct *alloc_ldt_struct(int size)
+ return NULL;
+ }
+
+- // FIXME: make kaiser_add_mapping() return an error code
+- // when it fails
+- kaiser_add_mapping((unsigned long)new_ldt->entries, alloc_size,
+- __PAGE_KERNEL);
++ ret = kaiser_add_mapping((unsigned long)new_ldt->entries, alloc_size,
++ __PAGE_KERNEL);
+ if (ret) {
+ __free_ldt_struct(new_ldt);
+ return NULL;
+--
+2.14.2.822.g60be5d43e6-goog
+
diff --git a/patches.suse/4.4-27-kaiser-KAISER-depends-on-SMP.patch b/patches.suse/4.4-27-kaiser-KAISER-depends-on-SMP.patch
new file mode 100644
index 0000000000..28499394c8
--- /dev/null
+++ b/patches.suse/4.4-27-kaiser-KAISER-depends-on-SMP.patch
@@ -0,0 +1,59 @@
+From 79e324b143d6070072c9a9e68f240267924c7189 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Wed, 13 Sep 2017 14:03:10 -0700
+Subject: [PATCH 27/43] kaiser: KAISER depends on SMP
+References: bsc#1068032 CVE-2017-5754
+Patch-mainline: Not yet, under development
+
+It is absurd that KAISER should depend on SMP, but apparently nobody
+has tried a UP build before: which breaks on implicit declaration of
+function 'per_cpu_offset' in arch/x86/mm/kaiser.c.
+
+Now, you would expect that to be trivially fixed up; but looking at
+the System.map when that block is #ifdef'ed out of kaiser_init(),
+I see that in a UP build __per_cpu_user_mapped_end is precisely at
+__per_cpu_user_mapped_start, and the items carefully gathered into
+that section for user-mapping on SMP, dispersed elsewhere on UP.
+
+So, some other kind of section assignment will be needed on UP,
+but implementing that is not a priority: just make KAISER depend
+on SMP for now.
+
+Also inserted a blank line before the option, tidied up the
+brief Kconfig help message, and added an "If unsure, Y".
+
+(cherry picked from Change-Id: I5e5a6744b3f8cdf90ada9d4a8ceb16038d279087)
+
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+---
+ security/Kconfig | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/security/Kconfig b/security/Kconfig
+index 45cdb0098f38..0d54e3cc4586 100644
+--- a/security/Kconfig
++++ b/security/Kconfig
+@@ -30,14 +30,16 @@ config SECURITY
+ model will be used.
+
+ If you are unsure how to answer this question, answer N.
++
+ config KAISER
+ bool "Remove the kernel mapping in user mode"
+ default y
+- depends on X86_64
+- depends on !PARAVIRT
++ depends on X86_64 && SMP && !PARAVIRT
+ help
+- This enforces a strict kernel and user space isolation in order to close
+- hardware side channels on kernel address information.
++ This enforces a strict kernel and user space isolation, in order
++ to close hardware side channels on kernel address information.
++
++ If you are unsure how to answer this question, answer Y.
+
+ config KAISER_REAL_SWITCH
+ bool "KAISER: actually switch page tables"
+--
+2.14.2.822.g60be5d43e6-goog
+
diff --git a/patches.suse/4.4-28-kaiser-fix-regs-to-do_nmi-ifndef-CONFIG_KAISER.patch b/patches.suse/4.4-28-kaiser-fix-regs-to-do_nmi-ifndef-CONFIG_KAISER.patch
new file mode 100644
index 0000000000..b3f487cdeb
--- /dev/null
+++ b/patches.suse/4.4-28-kaiser-fix-regs-to-do_nmi-ifndef-CONFIG_KAISER.patch
@@ -0,0 +1,72 @@
+From 97d669e3b3db7e3834b018bcd0a388e4c021fe54 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Thu, 21 Sep 2017 20:39:56 -0700
+Subject: [PATCH 28/43] kaiser: fix regs to do_nmi() ifndef CONFIG_KAISER
+References: bsc#1068032 CVE-2017-5754
+Patch-mainline: Not yet, under development
+
+pjt has observed that nmi's second (nmi_from_kernel) call to do_nmi()
+adjusted the %rdi regs arg, rightly when CONFIG_KAISER, but wrongly
+when not CONFIG_KAISER.
+
+Although the minimal change is to add an #ifdef CONFIG_KAISER around
+the addq line, that looks cluttered, and I prefer how the first call
+to do_nmi() handled it: prepare args in %rdi and %rsi before getting
+into the CONFIG_KAISER block, since it does not touch them at all.
+
+And while we're here, place the "#ifdef CONFIG_KAISER" that follows
+each, to enclose the "Unconditionally restore CR3" comment: matching
+how the "Unconditionally use kernel CR3" comment above is enclosed.
+
+(cherry picked from Change-Id: Ia90f6e10d0ba7939cf6cce6fe62901765416e2f5)
+
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+---
+ arch/x86/entry/entry_64.S | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1325,12 +1325,13 @@ ENTRY(nmi)
+ movq %rax, %cr3
+ #endif
+ call do_nmi
++
++#ifdef CONFIG_KAISER
+ /*
+ * Unconditionally restore CR3. I know we return to
+ * kernel code that needs user CR3, but do we ever return
+ * to "user mode" where we need the kernel CR3?
+ */
+-#ifdef CONFIG_KAISER
+ popq %rax
+ mov %rax, %cr3
+ #endif
+@@ -1554,6 +1555,8 @@ end_repeat_nmi:
+ SWAPGS
+ xorl %ebx, %ebx
+ 1:
++ movq %rsp, %rdi
++ movq $-1, %rsi
+ #ifdef CONFIG_KAISER
+ /* Unconditionally use kernel CR3 for do_nmi() */
+ /* %rax is saved above, so OK to clobber here */
+@@ -1566,16 +1569,14 @@ end_repeat_nmi:
+ #endif
+
+ /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
+- movq %rsp, %rdi
+- addq $8, %rdi /* point %rdi at ptregs, fixed up for CR3 */
+- movq $-1, %rsi
+ call do_nmi
++
++#ifdef CONFIG_KAISER
+ /*
+ * Unconditionally restore CR3. We might be returning to
+ * kernel code that needs user CR3, like just just before
+ * a sysret.
+ */
+-#ifdef CONFIG_KAISER
+ popq %rax
+ mov %rax, %cr3
+ #endif
diff --git a/patches.suse/4.4-29-kaiser-fix-perf-crashes.patch b/patches.suse/4.4-29-kaiser-fix-perf-crashes.patch
new file mode 100644
index 0000000000..0569c4ee0d
--- /dev/null
+++ b/patches.suse/4.4-29-kaiser-fix-perf-crashes.patch
@@ -0,0 +1,158 @@
+From 9a585582954f0fc71fa14f1cc5e916af997a4b1e Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Wed, 23 Aug 2017 14:21:14 -0700
+Subject: [PATCH 29/43] kaiser: fix perf crashes
+References: bsc#1068032 CVE-2017-5754
+Patch-mainline: Not yet, under development
+
+Avoid perf crashes: place debug_store in the user-mapped per-cpu area
+instead of allocating, and use page allocator plus kaiser_add_mapping()
+to keep the BTS and PEBS buffers user-mapped (that is, present in the
+user mapping, though visible only to kernel and hardware). The PEBS
+fixup buffer does not need this treatment.
+
+The need for a user-mapped struct debug_store showed up before doing
+any conscious perf testing: in a couple of kernel paging oopses on
+Westmere, implicating the debug_store offset of the per-cpu area.
+
+(cherry picked from Change-Id: I347f6c3b84c2ff25f4cfc85317ff5bf8a851ec95)
+Conflicts:
+arch/x86/events/intel/ds.c (not in this tree)
+arch/x86/kernel/cpu/perf_event_intel_ds.c (patched instead of that)
+
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+---
+ arch/x86/kernel/cpu/perf_event_intel_ds.c | 57 ++++++++++++++++++++++++-------
+ 1 file changed, 45 insertions(+), 12 deletions(-)
+
+diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
+index 1e7de3cefc9c..4e334e3ac16f 100644
+--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
++++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
+@@ -2,11 +2,15 @@
+ #include <linux/types.h>
+ #include <linux/slab.h>
+
++#include <asm/kaiser.h>
+ #include <asm/perf_event.h>
+ #include <asm/insn.h>
+
+ #include "perf_event.h"
+
++static
++DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct debug_store, cpu_debug_store);
++
+ /* The size of a BTS record in bytes: */
+ #define BTS_RECORD_SIZE 24
+
+@@ -268,6 +272,39 @@ void fini_debug_store_on_cpu(int cpu)
+
+ static DEFINE_PER_CPU(void *, insn_buffer);
+
++static void *dsalloc(size_t size, gfp_t flags, int node)
++{
++#ifdef CONFIG_KAISER
++ unsigned int order = get_order(size);
++ struct page *page;
++ unsigned long addr;
++
++ page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
++ if (!page)
++ return NULL;
++ addr = (unsigned long)page_address(page);
++ if (kaiser_add_mapping(addr, size, __PAGE_KERNEL) < 0) {
++ __free_pages(page, order);
++ addr = 0;
++ }
++ return (void *)addr;
++#else
++ return kmalloc_node(size, flags | __GFP_ZERO, node);
++#endif
++}
++
++static void dsfree(const void *buffer, size_t size)
++{
++#ifdef CONFIG_KAISER
++ if (!buffer)
++ return;
++ kaiser_remove_mapping((unsigned long)buffer, size);
++ free_pages((unsigned long)buffer, get_order(size));
++#else
++ kfree(buffer);
++#endif
++}
++
+ static int alloc_pebs_buffer(int cpu)
+ {
+ struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+@@ -278,7 +315,7 @@ static int alloc_pebs_buffer(int cpu)
+ if (!x86_pmu.pebs)
+ return 0;
+
+- buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
++ buffer = dsalloc(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
+ if (unlikely(!buffer))
+ return -ENOMEM;
+
+@@ -289,7 +326,7 @@ static int alloc_pebs_buffer(int cpu)
+ if (x86_pmu.intel_cap.pebs_format < 2) {
+ ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
+ if (!ibuffer) {
+- kfree(buffer);
++ dsfree(buffer, x86_pmu.pebs_buffer_size);
+ return -ENOMEM;
+ }
+ per_cpu(insn_buffer, cpu) = ibuffer;
+@@ -315,7 +352,8 @@ static void release_pebs_buffer(int cpu)
+ kfree(per_cpu(insn_buffer, cpu));
+ per_cpu(insn_buffer, cpu) = NULL;
+
+- kfree((void *)(unsigned long)ds->pebs_buffer_base);
++ dsfree((void *)(unsigned long)ds->pebs_buffer_base,
++ x86_pmu.pebs_buffer_size);
+ ds->pebs_buffer_base = 0;
+ }
+
+@@ -329,7 +367,7 @@ static int alloc_bts_buffer(int cpu)
+ if (!x86_pmu.bts)
+ return 0;
+
+- buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
++ buffer = dsalloc(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
+ if (unlikely(!buffer)) {
+ WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
+ return -ENOMEM;
+@@ -355,19 +393,15 @@ static void release_bts_buffer(int cpu)
+ if (!ds || !x86_pmu.bts)
+ return;
+
+- kfree((void *)(unsigned long)ds->bts_buffer_base);
++ dsfree((void *)(unsigned long)ds->bts_buffer_base, BTS_BUFFER_SIZE);
+ ds->bts_buffer_base = 0;
+ }
+
+ static int alloc_ds_buffer(int cpu)
+ {
+- int node = cpu_to_node(cpu);
+- struct debug_store *ds;
+-
+- ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
+- if (unlikely(!ds))
+- return -ENOMEM;
++ struct debug_store *ds = per_cpu_ptr(&cpu_debug_store, cpu);
+
++ memset(ds, 0, sizeof(*ds));
+ per_cpu(cpu_hw_events, cpu).ds = ds;
+
+ return 0;
+@@ -381,7 +415,6 @@ static void release_ds_buffer(int cpu)
+ return;
+
+ per_cpu(cpu_hw_events, cpu).ds = NULL;
+- kfree(ds);
+ }
+
+ void release_ds_buffers(void)
+--
+2.14.2.822.g60be5d43e6-goog
+
diff --git a/patches.suse/4.4-30-kaiser-ENOMEM-if-kaiser_pagetable_walk-NULL.patch b/patches.suse/4.4-30-kaiser-ENOMEM-if-kaiser_pagetable_walk-NULL.patch
new file mode 100644
index 0000000000..e2ab8d41a1
--- /dev/null
+++ b/patches.suse/4.4-30-kaiser-ENOMEM-if-kaiser_pagetable_walk-NULL.patch
@@ -0,0 +1,52 @@
+From 438107a9158c1d4c02df875396fb66ff2105d344 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Sun, 3 Sep 2017 18:48:02 -0700
+Subject: [PATCH 30/43] kaiser: ENOMEM if kaiser_pagetable_walk() NULL
+References: bsc#1068032 CVE-2017-5754
+Patch-mainline: Not yet, under development
+
+kaiser_add_user_map() took no notice when kaiser_pagetable_walk() failed.
+And avoid its might_sleep() when atomic (though atomic at present unused).
+
+(cherry picked from Change-Id: Id9d884e8f3e61cdad2f3e53e70c94ca57233829e)
+
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+---
+ arch/x86/mm/kaiser.c | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -99,11 +99,11 @@ static pte_t *kaiser_pagetable_walk(unsi
+ pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(address));
+ gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+
+- might_sleep();
+ if (is_atomic) {
+ gfp &= ~GFP_KERNEL;
+ gfp |= __GFP_HIGH | __GFP_ATOMIC;
+- }
++ } else
++ might_sleep();
+
+ if (pgd_none(*pgd)) {
+ WARN_ONCE(1, "All shadow pgds should have been populated");
+@@ -160,13 +160,17 @@ int kaiser_add_user_map(const void *__st
+ unsigned long end_addr = PAGE_ALIGN(start_addr + size);
+ unsigned long target_address;
+
+- for (;address < end_addr; address += PAGE_SIZE) {
++ for (; address < end_addr; address += PAGE_SIZE) {
+ target_address = get_pa_from_mapping(address);
+ if (target_address == -1) {
+ ret = -EIO;
+ break;
+ }
+ pte = kaiser_pagetable_walk(address, false);
++ if (!pte) {
++ ret = -ENOMEM;
++ break;
++ }
+ if (pte_none(*pte)) {
+ set_pte(pte, __pte(flags | target_address));
+ } else {
diff --git a/patches.suse/4.4-31-kaiser-tidied-up-asm-kaiser.h-somewhat.patch b/patches.suse/4.4-31-kaiser-tidied-up-asm-kaiser.h-somewhat.patch
new file mode 100644
index 0000000000..983363ebe3
--- /dev/null
+++ b/patches.suse/4.4-31-kaiser-tidied-up-asm-kaiser.h-somewhat.patch
@@ -0,0 +1,110 @@
+From e97198ecb5afd74e15d30a46b9a9865ea25344ad Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Sun, 3 Sep 2017 19:18:07 -0700
+Subject: [PATCH 31/43] kaiser: tidied up asm/kaiser.h somewhat
+References: bsc#1068032 CVE-2017-5754
+Patch-mainline: Not yet, under development
+
+Mainly deleting a surfeit of blank lines, and reflowing header comment.
+
+(cherry picked from Change-Id: I40515764842d56ac897d1e918e2b15aac1ea524e)
+
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+---
+ arch/x86/include/asm/kaiser.h | 32 +++++++++++++-------------------
+ 1 file changed, 13 insertions(+), 19 deletions(-)
+
+diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h
+index 0703f48777f3..7394ba9f9951 100644
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -1,15 +1,17 @@
+ #ifndef _ASM_X86_KAISER_H
+ #define _ASM_X86_KAISER_H
+-
+-/* This file includes the definitions for the KAISER feature.
+- * KAISER is a counter measure against x86_64 side channel attacks on the kernel virtual memory.
+- * It has a shodow-pgd for every process. the shadow-pgd has a minimalistic kernel-set mapped,
+- * but includes the whole user memory. Within a kernel context switch, or when an interrupt is handled,
+- * the pgd is switched to the normal one. When the system switches to user mode, the shadow pgd is enabled.
+- * By this, the virtual memory chaches are freed, and the user may not attack the whole kernel memory.
++/*
++ * This file includes the definitions for the KAISER feature.
++ * KAISER is a counter measure against x86_64 side channel attacks on
++ * the kernel virtual memory. It has a shadow pgd for every process: the
++ * shadow pgd has a minimalistic kernel-set mapped, but includes the whole
++ * user memory. Within a kernel context switch, or when an interrupt is handled,
++ * the pgd is switched to the normal one. When the system switches to user mode,
++ * the shadow pgd is enabled. By this, the virtual memory caches are freed,
++ * and the user may not attack the whole kernel memory.
+ *
+- * A minimalistic kernel mapping holds the parts needed to be mapped in user mode, as the entry/exit functions
+- * of the user space, or the stacks.
++ * A minimalistic kernel mapping holds the parts needed to be mapped in user
++ * mode, such as the entry/exit functions of the user space, or the stacks.
+ */
+ #ifdef __ASSEMBLY__
+ #ifdef CONFIG_KAISER
+@@ -48,13 +50,10 @@ _SWITCH_TO_KERNEL_CR3 %rax
+ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
+ .endm
+
+-
+ .macro SWITCH_USER_CR3_NO_STACK
+-
+ movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)
+ _SWITCH_TO_USER_CR3 %rax
+ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
+-
+ .endm
+
+ #else /* CONFIG_KAISER */
+@@ -72,7 +71,6 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
+
+ #else /* __ASSEMBLY__ */
+
+-
+ #ifdef CONFIG_KAISER
+ /*
+ * Upon kernel/user mode switch, it may happen that the address
+@@ -80,7 +78,6 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
+ * stored. To change the address space, another register is
+ * needed. A register therefore has to be stored/restored.
+ */
+-
+ DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+
+ /**
+@@ -95,7 +92,6 @@ DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+ */
+ extern int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags);
+
+-
+ /**
+ * kaiser_remove_mapping - unmap a virtual memory part of the shadow mapping
+ * @addr: the start address of the range
+@@ -104,12 +100,12 @@ extern int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned l
+ extern void kaiser_remove_mapping(unsigned long start, unsigned long size);
+
+ /**
+- * kaiser_initialize_mapping - Initalize the shadow mapping
++ * kaiser_init - Initialize the shadow mapping
+ *
+ * Most parts of the shadow mapping can be mapped upon boot
+ * time. Only per-process things like the thread stacks
+ * or a new LDT have to be mapped at runtime. These boot-
+- * time mappings are permanent and nevertunmapped.
++ * time mappings are permanent and never unmapped.
+ */
+ extern void kaiser_init(void);
+
+@@ -117,6 +113,4 @@ extern void kaiser_init(void);
+
+ #endif /* __ASSEMBLY */
+
+-
+-
+ #endif /* _ASM_X86_KAISER_H */
+--
+2.14.2.822.g60be5d43e6-goog
+
diff --git a/patches.suse/4.4-32-kaiser-tidied-up-kaiser_add-remove_mapping-slightly.patch b/patches.suse/4.4-32-kaiser-tidied-up-kaiser_add-remove_mapping-slightly.patch
new file mode 100644
index 0000000000..0f9f9db26b
--- /dev/null
+++ b/patches.suse/4.4-32-kaiser-tidied-up-kaiser_add-remove_mapping-slightly.patch
@@ -0,0 +1,50 @@
+From 0020055cba3af80de6258168ad4b1826dd0fdc7e Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Sun, 3 Sep 2017 19:23:08 -0700
+Subject: [PATCH 32/43] kaiser: tidied up kaiser_add/remove_mapping slightly
+References: bsc#1068032 CVE-2017-5754
+Patch-mainline: Not yet, under development
+
+Yes, unmap_pud_range_nofree()'s declaration ought to be in a
+header file really, but I'm not sure we want to use it anyway:
+so for now just declare it inside kaiser_remove_mapping().
+And there doesn't seem to be such a thing as unmap_p4d_range(),
+even in a 5-level paging tree.
+
+(cherry picked from Change-Id: I70901c57bd4af838917f6b974bc71ed3cf765dfe)
+
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+---
+ arch/x86/mm/kaiser.c | 9 +++------
+ 1 file changed, 3 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -286,8 +286,7 @@ void __init kaiser_init(void)
+ __PAGE_KERNEL);
+ }
+
+-extern void unmap_pud_range_nofree(pgd_t *pgd, unsigned long start, unsigned long end);
+-// add a mapping to the shadow-mapping, and synchronize the mappings
++/* Add a mapping to the shadow mapping, and synchronize the mappings */
+ int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags)
+ {
+ return kaiser_add_user_map((const void *)addr, size, flags);
+@@ -295,15 +294,13 @@ int kaiser_add_mapping(unsigned long add
+
+ void kaiser_remove_mapping(unsigned long start, unsigned long size)
+ {
++ extern void unmap_pud_range_nofree(pgd_t *pgd,
++ unsigned long start, unsigned long end);
+ unsigned long end = start + size;
+ unsigned long addr;
+
+ for (addr = start; addr < end; addr += PGDIR_SIZE) {
+ pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(addr));
+- /*
+- * unmap_p4d_range() handles > P4D_SIZE unmaps,
+- * so no need to trim 'end'.
+- */
+ unmap_pud_range_nofree(pgd, addr, end);
+ }
+ }
diff --git a/patches.suse/4.4-33-kaiser-kaiser_remove_mapping-move-along-the-pgd.patch b/patches.suse/4.4-33-kaiser-kaiser_remove_mapping-move-along-the-pgd.patch
new file mode 100644
index 0000000000..7af8cd12f2
--- /dev/null
+++ b/patches.suse/4.4-33-kaiser-kaiser_remove_mapping-move-along-the-pgd.patch
@@ -0,0 +1,50 @@
+From f6cf50cae603d059903cade0000fef7dd1bae9ea Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Mon, 2 Oct 2017 10:57:24 -0700
+Subject: [PATCH 33/43] kaiser: kaiser_remove_mapping() move along the pgd
+References: bsc#1068032 CVE-2017-5754
+Patch-mainline: Not yet, under development
+
+When removing the bogus comment from kaiser_remove_mapping(),
+I really ought to have checked the extent of its bogosity: as
+Neel points out, there is nothing to stop unmap_pud_range_nofree()
+from continuing beyond the end of a pud (and starting in the wrong
+position on the next).
+
+Fix kaiser_remove_mapping() to constrain the extent and advance pgd
+pointer correctly: use pgd_addr_end() macro as used throughout base
+mm (but don't assume page-rounded start and size in this case).
+
+But this bug was very unlikely to trigger in this backport: since
+any buddy allocation is contained within a single pud extent, and
+we are not using vmapped stacks (and are only mapping one page of
+stack anyway): the only way to hit this bug here would be when
+freeing a large modified ldt.
+
+(cherry picked from Change-Id: I216df7099de1d9e3b186e19586a7a5bc4784feed)
+
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+---
+ arch/x86/mm/kaiser.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -297,11 +297,13 @@ void kaiser_remove_mapping(unsigned long
+ extern void unmap_pud_range_nofree(pgd_t *pgd,
+ unsigned long start, unsigned long end);
+ unsigned long end = start + size;
+- unsigned long addr;
++ unsigned long addr, next;
++ pgd_t *pgd;
+
+- for (addr = start; addr < end; addr += PGDIR_SIZE) {
+- pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(addr));
+- unmap_pud_range_nofree(pgd, addr, end);
++ pgd = native_get_shadow_pgd(pgd_offset_k(start));
++ for (addr = start; addr < end; pgd++, addr = next) {
++ next = pgd_addr_end(addr, end);
++ unmap_pud_range_nofree(pgd, addr, next);
+ }
+ }
+
diff --git a/patches.suse/4.4-34-kaiser-align-addition-to-x86-mm-Makefile.patch b/patches.suse/4.4-34-kaiser-align-addition-to-x86-mm-Makefile.patch
new file mode 100644
index 0000000000..392c2e6f21
--- /dev/null
+++ b/patches.suse/4.4-34-kaiser-align-addition-to-x86-mm-Makefile.patch
@@ -0,0 +1,26 @@
+From e41f7b7b194dc960129e76f17717fd22c17df709 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Sun, 3 Sep 2017 19:51:10 -0700
+Subject: [PATCH 34/43] kaiser: align addition to x86/mm/Makefile
+References: bsc#1068032 CVE-2017-5754
+Patch-mainline: Not yet, under development
+
+Use tab not space so they line up properly, kaslr.o also.
+
+(cherry picked from Change-Id: I2f2b64c130b1bafe7eceb5b2a3d5be9a9014b02b)
+Conflicts:
+arch/x86/mm/Makefile
+
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+---
+ arch/x86/mm/Makefile | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/mm/Makefile
++++ b/arch/x86/mm/Makefile
+@@ -33,4 +33,4 @@ obj-$(CONFIG_NUMA_EMU) += numa_emulatio
+
+ obj-$(CONFIG_X86_INTEL_MPX) += mpx.o
+ obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
+-obj-$(CONFIG_KAISER) += kaiser.o
++obj-$(CONFIG_KAISER) += kaiser.o
diff --git a/patches.suse/4.4-35-kaiser-cleanups-while-trying-for-gold-link.patch b/patches.suse/4.4-35-kaiser-cleanups-while-trying-for-gold-link.patch
new file mode 100644
index 0000000000..d70c7c18d7
--- /dev/null
+++ b/patches.suse/4.4-35-kaiser-cleanups-while-trying-for-gold-link.patch
@@ -0,0 +1,134 @@
+From c755b42b4ffc6b23cba9f780fa3ee3e73a290000 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Mon, 21 Aug 2017 20:11:43 -0700
+Subject: [PATCH 35/43] kaiser: cleanups while trying for gold link
+References: bsc#1068032 CVE-2017-5754
+Patch-mainline: Not yet, under development
+
+While trying to get our gold link to work, four cleanups:
+matched the gdt_page declaration to its definition;
+in fiddling unsuccessfully with PERCPU_INPUT(), lined up backslashes;
+lined up the backslashes according to convention in percpu-defs.h;
+deleted the unused irq_stack_pointer addition to irq_stack_union.
+
+Sad to report that aligning backslashes does not appear to help gold
+align to 8192: but while these did not help, they are worth keeping.
+
+(cherry picked from Change-Id: I71adb56b95e7c30251f23b7de23ebe16881a99be)
+
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+---
+ arch/x86/include/asm/desc.h | 2 +-
+ arch/x86/include/asm/processor.h | 5 -----
+ include/asm-generic/vmlinux.lds.h | 18 ++++++++----------
+ include/linux/percpu-defs.h | 24 ++++++++++++------------
+ 4 files changed, 21 insertions(+), 28 deletions(-)
+
+--- a/arch/x86/include/asm/desc.h
++++ b/arch/x86/include/asm/desc.h
+@@ -43,7 +43,7 @@ struct gdt_page {
+ struct desc_struct gdt[GDT_ENTRIES];
+ } __attribute__((aligned(PAGE_SIZE)));
+
+-DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
++DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct gdt_page, gdt_page);
+
+ static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
+ {
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -346,11 +346,6 @@ union irq_stack_union {
+ char gs_base[40];
+ unsigned long stack_canary;
+ };
+-
+- struct {
+- char irq_stack_pointer[64];
+- char unused[IRQ_STACK_SIZE - 64];
+- };
+ };
+
+ DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible;
+--- a/include/asm-generic/vmlinux.lds.h
++++ b/include/asm-generic/vmlinux.lds.h
+@@ -728,16 +728,14 @@
+ */
+ #define PERCPU_INPUT(cacheline) \
+ VMLINUX_SYMBOL(__per_cpu_start) = .; \
+- \
+- VMLINUX_SYMBOL(__per_cpu_user_mapped_start) = .; \
+- *(.data..percpu..first) \
+- . = ALIGN(cacheline); \
+- *(.data..percpu..user_mapped) \
+- *(.data..percpu..user_mapped..shared_aligned) \
+- . = ALIGN(PAGE_SIZE); \
+- *(.data..percpu..user_mapped..page_aligned) \
+- VMLINUX_SYMBOL(__per_cpu_user_mapped_end) = .; \
+- \
++ VMLINUX_SYMBOL(__per_cpu_user_mapped_start) = .; \
++ *(.data..percpu..first) \
++ . = ALIGN(cacheline); \
++ *(.data..percpu..user_mapped) \
++ *(.data..percpu..user_mapped..shared_aligned) \
++ . = ALIGN(PAGE_SIZE); \
++ *(.data..percpu..user_mapped..page_aligned) \
++ VMLINUX_SYMBOL(__per_cpu_user_mapped_end) = .; \
+ . = ALIGN(PAGE_SIZE); \
+ *(.data..percpu..page_aligned) \
+ . = ALIGN(cacheline); \
+--- a/include/linux/percpu-defs.h
++++ b/include/linux/percpu-defs.h
+@@ -121,10 +121,10 @@
+ #define DEFINE_PER_CPU(type, name) \
+ DEFINE_PER_CPU_SECTION(type, name, "")
+
+-#define DECLARE_PER_CPU_USER_MAPPED(type, name) \
++#define DECLARE_PER_CPU_USER_MAPPED(type, name) \
+ DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION)
+
+-#define DEFINE_PER_CPU_USER_MAPPED(type, name) \
++#define DEFINE_PER_CPU_USER_MAPPED(type, name) \
+ DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION)
+
+ /*
+@@ -156,11 +156,11 @@
+ DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \
+ ____cacheline_aligned_in_smp
+
+-#define DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \
++#define DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \
+ DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \
+ ____cacheline_aligned_in_smp
+
+-#define DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \
++#define DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \
+ DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \
+ ____cacheline_aligned_in_smp
+
+@@ -185,18 +185,18 @@
+ /*
+ * Declaration/definition used for per-CPU variables that must be page aligned and need to be mapped in user mode.
+ */
+-#define DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \
+- DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \
+- __aligned(PAGE_SIZE)
+-
+-#define DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \
+- DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \
+- __aligned(PAGE_SIZE)
++#define DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \
++ DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \
++ __aligned(PAGE_SIZE)
++
++#define DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \
++ DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \
++ __aligned(PAGE_SIZE)
+
+ /*
+ * Declaration/definition used for per-CPU variables that must be read mostly.
+ */
+-#define DECLARE_PER_CPU_READ_MOSTLY(type, name) \
++#define DECLARE_PER_CPU_READ_MOSTLY(type, name) \
+ DECLARE_PER_CPU_SECTION(type, name, "..read_mostly")
+
+ #define DEFINE_PER_CPU_READ_MOSTLY(type, name) \
diff --git a/patches.suse/4.4-36-kaiser-name-that-0x1000-KAISER_SHADOW_PGD_OFFSET.patch b/patches.suse/4.4-36-kaiser-name-that-0x1000-KAISER_SHADOW_PGD_OFFSET.patch
new file mode 100644
index 0000000000..78ed7204ba
--- /dev/null
+++ b/patches.suse/4.4-36-kaiser-name-that-0x1000-KAISER_SHADOW_PGD_OFFSET.patch
@@ -0,0 +1,66 @@
+From 52f05a70895ed05590da7d78fc2389f5069c3162 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Sat, 9 Sep 2017 17:31:18 -0700
+Subject: [PATCH 36/43] kaiser: name that 0x1000 KAISER_SHADOW_PGD_OFFSET
+References: bsc#1068032 CVE-2017-5754
+Patch-mainline: Not yet, under development
+
+There's a 0x1000 in various places, which looks better with a name.
+
+(cherry picked from Change-Id: Iba0990de14f283120234d7446e01e102afe7d4a3)
+
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+---
+ arch/x86/entry/entry_64.S | 4 ++--
+ arch/x86/include/asm/kaiser.h | 7 +++++--
+ 2 files changed, 7 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1320,7 +1320,7 @@ ENTRY(nmi)
+ movq %cr3, %rax
+ pushq %rax
+ #ifdef CONFIG_KAISER_REAL_SWITCH
+- andq $(~0x1000), %rax
++ andq $(~KAISER_SHADOW_PGD_OFFSET), %rax
+ #endif
+ movq %rax, %cr3
+ #endif
+@@ -1563,7 +1563,7 @@ end_repeat_nmi:
+ movq %cr3, %rax
+ pushq %rax
+ #ifdef CONFIG_KAISER_REAL_SWITCH
+- andq $(~0x1000), %rax
++ andq $(~KAISER_SHADOW_PGD_OFFSET), %rax
+ #endif
+ movq %rax, %cr3
+ #endif
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -13,13 +13,16 @@
+ * A minimalistic kernel mapping holds the parts needed to be mapped in user
+ * mode, such as the entry/exit functions of the user space, or the stacks.
+ */
++
++#define KAISER_SHADOW_PGD_OFFSET 0x1000
++
+ #ifdef __ASSEMBLY__
+ #ifdef CONFIG_KAISER
+
+ .macro _SWITCH_TO_KERNEL_CR3 reg
+ movq %cr3, \reg
+ #ifdef CONFIG_KAISER_REAL_SWITCH
+-andq $(~0x1000), \reg
++andq $(~KAISER_SHADOW_PGD_OFFSET), \reg
+ #endif
+ movq \reg, %cr3
+ .endm
+@@ -27,7 +30,7 @@ movq \reg, %cr3
+ .macro _SWITCH_TO_USER_CR3 reg
+ movq %cr3, \reg
+ #ifdef CONFIG_KAISER_REAL_SWITCH
+-orq $(0x1000), \reg
++orq $(KAISER_SHADOW_PGD_OFFSET), \reg
+ #endif
+ movq \reg, %cr3
+ .endm
diff --git a/patches.suse/4.4-37-kaiser-delete-KAISER_REAL_SWITCH-option.patch b/patches.suse/4.4-37-kaiser-delete-KAISER_REAL_SWITCH-option.patch
new file mode 100644
index 0000000000..a1ba25d898
--- /dev/null
+++ b/patches.suse/4.4-37-kaiser-delete-KAISER_REAL_SWITCH-option.patch
@@ -0,0 +1,79 @@
+From bd2a3aaac4fc3ed0870d2d6b3d451283ebdc9aa6 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Sun, 3 Sep 2017 18:30:43 -0700
+Subject: [PATCH 37/43] kaiser: delete KAISER_REAL_SWITCH option
+References: bsc#1068032 CVE-2017-5754
+Patch-mainline: Not yet, under development
+
+We fail to see what CONFIG_KAISER_REAL_SWITCH is for: it seems to be
+left over from early development, and now just obscures tricky parts
+of the code. Delete it before adding PCIDs, or nokaiser boot option.
+
+(Or if there is some good reason to keep the option, then it needs
+a help text - and a "depends on KAISER", so that all those without
+KAISER are not asked the question.)
+
+(cherry picked from Change-Id: Ic1ba10f7933d52bf2bac706f708fe39f1aec60d2)
+
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+---
+ arch/x86/entry/entry_64.S | 4 ----
+ arch/x86/include/asm/kaiser.h | 4 ----
+ security/Kconfig | 4 ----
+ 3 files changed, 12 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1319,9 +1319,7 @@ ENTRY(nmi)
+ /* %rax is saved above, so OK to clobber here */
+ movq %cr3, %rax
+ pushq %rax
+-#ifdef CONFIG_KAISER_REAL_SWITCH
+ andq $(~KAISER_SHADOW_PGD_OFFSET), %rax
+-#endif
+ movq %rax, %cr3
+ #endif
+ call do_nmi
+@@ -1562,9 +1560,7 @@ end_repeat_nmi:
+ /* %rax is saved above, so OK to clobber here */
+ movq %cr3, %rax
+ pushq %rax
+-#ifdef CONFIG_KAISER_REAL_SWITCH
+ andq $(~KAISER_SHADOW_PGD_OFFSET), %rax
+-#endif
+ movq %rax, %cr3
+ #endif
+
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -21,17 +21,13 @@
+
+ .macro _SWITCH_TO_KERNEL_CR3 reg
+ movq %cr3, \reg
+-#ifdef CONFIG_KAISER_REAL_SWITCH
+ andq $(~KAISER_SHADOW_PGD_OFFSET), \reg
+-#endif
+ movq \reg, %cr3
+ .endm
+
+ .macro _SWITCH_TO_USER_CR3 reg
+ movq %cr3, \reg
+-#ifdef CONFIG_KAISER_REAL_SWITCH
+ orq $(KAISER_SHADOW_PGD_OFFSET), \reg
+-#endif
+ movq \reg, %cr3
+ .endm
+
+--- a/security/Kconfig
++++ b/security/Kconfig
+@@ -41,10 +41,6 @@ config KAISER
+
+ If you are unsure how to answer this question, answer Y.
+
+-config KAISER_REAL_SWITCH
+- bool "KAISER: actually switch page tables"
+- default y
+-
+ config SECURITYFS
+ bool "Enable the securityfs filesystem"
+ help
diff --git a/patches.suse/4.4-38-kaiser-vmstat-show-NR_KAISERTABLE-as-nr_overhead.patch b/patches.suse/4.4-38-kaiser-vmstat-show-NR_KAISERTABLE-as-nr_overhead.patch
new file mode 100644
index 0000000000..b3de015455
--- /dev/null
+++ b/patches.suse/4.4-38-kaiser-vmstat-show-NR_KAISERTABLE-as-nr_overhead.patch
@@ -0,0 +1,111 @@
+From 99a20744dc997e1ab9224e9a072e2476be61c6b9 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Sat, 9 Sep 2017 21:27:32 -0700
+Subject: [PATCH 38/43] kaiser: vmstat show NR_KAISERTABLE as nr_overhead
+References: bsc#1068032 CVE-2017-5754
+Patch-mainline: Not yet, under development
+
+The kaiser update made an interesting choice, never to free any shadow
+page tables. Contention on global spinlock was worrying, particularly
+with it held across page table scans when freeing. Something had to be
+done: I was going to add refcounting; but simply never to free them is
+an appealing choice, minimizing contention without complicating the code
+(the more a page table is found already, the less the spinlock is used).
+
+But leaking pages in this way is also a worry: can we get away with it?
+At the very least, we need a count to show how bad it actually gets:
+in principle, one might end up wasting about 1/256 of memory that way
+(1/512 for when direct-mapped pages have to be user-mapped, plus 1/512
+for when they are user-mapped from the vmalloc area on another occasion
+(but we don't have vmalloc'ed stacks, so only large ldts are vmalloc'ed).
+
+Add per-cpu stat NR_KAISERTABLE: including 256 at startup for the
+shared pgd entries, and 1 for each intermediate page table added
+thereafter for user-mapping - but leave out the 1 per mm, for its
+shadow pgd, because that distracts from the monotonic increase.
+Shown in /proc/vmstat as nr_overhead (0 if kaiser not enabled).
+
+In practice, it doesn't look so bad so far: more like 1/12000 after
+nine hours of gtests below; and movable pageblock segregation should
+tend to cluster the kaiser tables into a subset of the address space
+(if not, they will be bad for compaction too). But production may
+tell a different story: keep an eye on this number, and bring back
+lighter freeing if it gets out of control (maybe a shrinker).
+
+(cherry picked from Change-Id: I254959b47c694f8604400ab411e02075a30f922a)
+
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+---
+ arch/x86/mm/kaiser.c | 16 +++++++++++-----
+ include/linux/mmzone.h | 3 ++-
+ mm/vmstat.c | 1 +
+ 3 files changed, 14 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -122,9 +122,11 @@ static pte_t *kaiser_pagetable_walk(unsi
+ if (!new_pmd_page)
+ return NULL;
+ spin_lock(&shadow_table_allocation_lock);
+- if (pud_none(*pud))
++ if (pud_none(*pud)) {
+ set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
+- else
++ __inc_zone_page_state(virt_to_page((void *)
++ new_pmd_page), NR_KAISERTABLE);
++ } else
+ free_page(new_pmd_page);
+ spin_unlock(&shadow_table_allocation_lock);
+ }
+@@ -140,9 +142,11 @@ static pte_t *kaiser_pagetable_walk(unsi
+ if (!new_pte_page)
+ return NULL;
+ spin_lock(&shadow_table_allocation_lock);
+- if (pmd_none(*pmd))
++ if (pmd_none(*pmd)) {
+ set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
+- else
++ __inc_zone_page_state(virt_to_page((void *)
++ new_pte_page), NR_KAISERTABLE);
++ } else
+ free_page(new_pte_page);
+ spin_unlock(&shadow_table_allocation_lock);
+ }
+@@ -206,11 +210,13 @@ static void __init kaiser_init_all_pgds(
+ pgd = native_get_shadow_pgd(pgd_offset_k((unsigned long )0));
+ for (i = PTRS_PER_PGD / 2; i < PTRS_PER_PGD; i++) {
+ pgd_t new_pgd;
+- pud_t *pud = pud_alloc_one(&init_mm, PAGE_OFFSET + i * PGDIR_SIZE);
++ pud_t *pud = pud_alloc_one(&init_mm,
++ PAGE_OFFSET + i * PGDIR_SIZE);
+ if (!pud) {
+ WARN_ON(1);
+ break;
+ }
++ inc_zone_page_state(virt_to_page(pud), NR_KAISERTABLE);
+ new_pgd = __pgd(_KERNPG_TABLE |__pa(pud));
+ /*
+ * Make sure not to stomp on some other pgd entry.
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -127,8 +127,9 @@ enum zone_stat_item {
+ NR_SLAB_RECLAIMABLE,
+ NR_SLAB_UNRECLAIMABLE,
+ NR_PAGETABLE, /* used for pagetables */
+- NR_KERNEL_STACK,
+ /* Second 128 byte cacheline */
++ NR_KERNEL_STACK,
++ NR_KAISERTABLE,
+ NR_UNSTABLE_NFS, /* NFS unstable pages */
+ NR_BOUNCE,
+ NR_VMSCAN_WRITE,
+--- a/mm/vmstat.c
++++ b/mm/vmstat.c
+@@ -713,6 +713,7 @@ const char * const vmstat_text[] = {
+ "nr_slab_unreclaimable",
+ "nr_page_table_pages",
+ "nr_kernel_stack",
++ "nr_overhead",
+ "nr_unstable",
+ "nr_bounce",
+ "nr_vmscan_write",
diff --git a/patches.suse/4.4-39-kaiser-enhanced-by-kernel-and-user-PCIDs.patch b/patches.suse/4.4-39-kaiser-enhanced-by-kernel-and-user-PCIDs.patch
new file mode 100644
index 0000000000..56a3e8bd0a
--- /dev/null
+++ b/patches.suse/4.4-39-kaiser-enhanced-by-kernel-and-user-PCIDs.patch
@@ -0,0 +1,399 @@
+From 78dd553cccc1aa0ce8fdd1f1e935b32683c7359e Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Wed, 30 Aug 2017 16:23:00 -0700
+Subject: [PATCH 39/43] kaiser: enhanced by kernel and user PCIDs
+References: bsc#1068032 CVE-2017-5754
+Patch-mainline: Not yet, under development
+
+Merged performance improvements to Kaiser, using distinct kernel
+and user Process Context Identifiers to minimize the TLB flushing.
+
+(cherry picked from Change-Id: I6c7d791433950a3e5954228f7379fff4c364f43b)
+
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+---
+ arch/x86/entry/entry_64.S | 10 ++++-
+ arch/x86/entry/entry_64_compat.S | 1
+ arch/x86/include/asm/cpufeature.h | 1
+ arch/x86/include/asm/kaiser.h | 15 ++++++-
+ arch/x86/include/asm/pgtable_types.h | 26 +++++++++++++
+ arch/x86/include/asm/tlbflush.h | 54 +++++++++++++++++++++++-----
+ arch/x86/include/uapi/asm/processor-flags.h | 3 +
+ arch/x86/kernel/cpu/common.c | 34 +++++++++++++++++
+ arch/x86/kvm/x86.c | 3 +
+ arch/x86/mm/kaiser.c | 7 +++
+ arch/x86/mm/tlb.c | 46 ++++++++++++++++++++++-
+ 11 files changed, 182 insertions(+), 18 deletions(-)
+
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -13,6 +13,7 @@
+ #include <asm/irqflags.h>
+ #include <asm/asm.h>
+ #include <asm/smap.h>
++#include <asm/pgtable_types.h>
+ #include <asm/kaiser.h>
+ #include <linux/linkage.h>
+ #include <linux/err.h>
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1319,7 +1319,10 @@ ENTRY(nmi)
+ /* %rax is saved above, so OK to clobber here */
+ movq %cr3, %rax
+ pushq %rax
+- andq $(~KAISER_SHADOW_PGD_OFFSET), %rax
++ /* mask off "user" bit of pgd address and 12 PCID bits: */
++ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
++ /* Add back kernel PCID and "no flush" bit */
++ orq X86_CR3_PCID_KERN_VAR, %rax
+ movq %rax, %cr3
+ #endif
+ call do_nmi
+@@ -1560,7 +1563,10 @@ end_repeat_nmi:
+ /* %rax is saved above, so OK to clobber here */
+ movq %cr3, %rax
+ pushq %rax
+- andq $(~KAISER_SHADOW_PGD_OFFSET), %rax
++ /* mask off "user" bit of pgd address and 12 PCID bits: */
++ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
++ /* Add back kernel PCID and "no flush" bit */
++ orq X86_CR3_PCID_KERN_VAR, %rax
+ movq %rax, %cr3
+ #endif
+
+--- a/arch/x86/include/asm/cpufeature.h
++++ b/arch/x86/include/asm/cpufeature.h
+@@ -189,6 +189,7 @@
+ #define X86_FEATURE_ARAT ( 7*32+ 1) /* Always Running APIC Timer */
+ #define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
+ #define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
++#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 4) /* Effectively INVPCID && CR4.PCIDE=1 */
+ #define X86_FEATURE_PLN ( 7*32+ 5) /* Intel Power Limit Notification */
+ #define X86_FEATURE_PTS ( 7*32+ 6) /* Intel Package Thermal Status */
+ #define X86_FEATURE_DTHERM ( 7*32+ 7) /* Digital Thermal Sensor */
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -1,5 +1,8 @@
+ #ifndef _ASM_X86_KAISER_H
+ #define _ASM_X86_KAISER_H
++
++#include <uapi/asm/processor-flags.h> /* For PCID constants */
++
+ /*
+ * This file includes the definitions for the KAISER feature.
+ * KAISER is a counter measure against x86_64 side channel attacks on
+@@ -21,13 +24,21 @@
+
+ .macro _SWITCH_TO_KERNEL_CR3 reg
+ movq %cr3, \reg
+-andq $(~KAISER_SHADOW_PGD_OFFSET), \reg
++andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg
++orq X86_CR3_PCID_KERN_VAR, \reg
+ movq \reg, %cr3
+ .endm
+
+ .macro _SWITCH_TO_USER_CR3 reg
+ movq %cr3, \reg
+-orq $(KAISER_SHADOW_PGD_OFFSET), \reg
++andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg
++/*
++ * This can obviously be one instruction by putting the
++ * KAISER_SHADOW_PGD_OFFSET bit in the X86_CR3_PCID_USER_VAR.
++ * But, just leave it now for simplicity.
++ */
++orq X86_CR3_PCID_USER_VAR, \reg
++orq $(KAISER_SHADOW_PGD_OFFSET), \reg
+ movq \reg, %cr3
+ .endm
+
+--- a/arch/x86/include/asm/pgtable_types.h
++++ b/arch/x86/include/asm/pgtable_types.h
+@@ -142,6 +142,32 @@
+ _PAGE_SOFT_DIRTY)
+ #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
+
++/* The ASID is the lower 12 bits of CR3 */
++#define X86_CR3_PCID_ASID_MASK (_AC((1<<12)-1,UL))
++
++/* Mask for all the PCID-related bits in CR3: */
++#define X86_CR3_PCID_MASK (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_MASK)
++#if defined(CONFIG_KAISER) && defined(CONFIG_X86_64)
++#define X86_CR3_PCID_ASID_KERN (_AC(0x4,UL))
++#define X86_CR3_PCID_ASID_USER (_AC(0x6,UL))
++
++#define X86_CR3_PCID_KERN_FLUSH (X86_CR3_PCID_ASID_KERN)
++#define X86_CR3_PCID_USER_FLUSH (X86_CR3_PCID_ASID_USER)
++#define X86_CR3_PCID_KERN_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_KERN)
++#define X86_CR3_PCID_USER_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_USER)
++#else
++#define X86_CR3_PCID_ASID_KERN (_AC(0x0,UL))
++#define X86_CR3_PCID_ASID_USER (_AC(0x0,UL))
++/*
++ * PCIDs are unsupported on 32-bit and none of these bits can be
++ * set in CR3:
++ */
++#define X86_CR3_PCID_KERN_FLUSH (0)
++#define X86_CR3_PCID_USER_FLUSH (0)
++#define X86_CR3_PCID_KERN_NOFLUSH (0)
++#define X86_CR3_PCID_USER_NOFLUSH (0)
++#endif
++
+ /*
+ * The cache modes defined here are used to translate between pure SW usage
+ * and the HW defined cache mode bits and/or PAT entries.
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -12,7 +12,6 @@ static inline void __invpcid(unsigned lo
+ unsigned long type)
+ {
+ struct { u64 d[2]; } desc = { { pcid, addr } };
+-
+ /*
+ * The memory clobber is because the whole point is to invalidate
+ * stale TLB entries and, especially if we're flushing global
+@@ -133,14 +132,25 @@ static inline void cr4_set_bits_and_upda
+
+ static inline void __native_flush_tlb(void)
+ {
++ if (!cpu_feature_enabled(X86_FEATURE_INVPCID)) {
++ /*
++ * If current->mm == NULL then we borrow a mm which may change during a
++ * task switch and therefore we must not be preempted while we write CR3
++ * back:
++ */
++ preempt_disable();
++ native_write_cr3(native_read_cr3());
++ preempt_enable();
++ return;
++ }
+ /*
+- * If current->mm == NULL then we borrow a mm which may change during a
+- * task switch and therefore we must not be preempted while we write CR3
+- * back:
+- */
+- preempt_disable();
+- native_write_cr3(native_read_cr3());
+- preempt_enable();
++ * We are no longer using globals with KAISER, so a
++ * "nonglobals" flush would work too. But, this is more
++ * conservative.
++ *
++ * Note, this works with CR4.PCIDE=0 or 1.
++ */
++ invpcid_flush_all();
+ }
+
+ static inline void __native_flush_tlb_global_irq_disabled(void)
+@@ -162,6 +172,8 @@ static inline void __native_flush_tlb_gl
+ /*
+ * Using INVPCID is considerably faster than a pair of writes
+ * to CR4 sandwiched inside an IRQ flag save/restore.
++ *
++ * Note, this works with CR4.PCIDE=0 or 1.
+ */
+ invpcid_flush_all();
+ return;
+@@ -181,7 +193,31 @@ static inline void __native_flush_tlb_gl
+
+ static inline void __native_flush_tlb_single(unsigned long addr)
+ {
+- asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
++ /*
++ * SIMICS #GP's if you run INVPCID with type 2/3
++ * and X86_CR4_PCIDE clear. Shame!
++ *
++ * The ASIDs used below are hard-coded. But, we must not
++ * call invpcid(type=1/2) before CR4.PCIDE=1. Just call
++ * invpcid in the case we are called early.
++ */
++ if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) {
++ asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
++ return;
++ }
++ /* Flush the address out of both PCIDs. */
++ /*
++ * An optimization here might be to determine addresses
++ * that are only kernel-mapped and only flush the kernel
++ * ASID. But, userspace flushes are probably much more
++ * important performance-wise.
++ *
++ * Make sure to do only a single invpcid when KAISER is
++ * disabled and we have only a single ASID.
++ */
++ if (X86_CR3_PCID_ASID_KERN != X86_CR3_PCID_ASID_USER)
++ invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr);
++ invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr);
+ }
+
+ static inline void __flush_tlb_all(void)
+--- a/arch/x86/include/uapi/asm/processor-flags.h
++++ b/arch/x86/include/uapi/asm/processor-flags.h
+@@ -77,7 +77,8 @@
+ #define X86_CR3_PWT _BITUL(X86_CR3_PWT_BIT)
+ #define X86_CR3_PCD_BIT 4 /* Page Cache Disable */
+ #define X86_CR3_PCD _BITUL(X86_CR3_PCD_BIT)
+-#define X86_CR3_PCID_MASK _AC(0x00000fff,UL) /* PCID Mask */
++#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */
++#define X86_CR3_PCID_NOFLUSH _BITULL(X86_CR3_PCID_NOFLUSH_BIT)
+
+ /*
+ * Intel CPU features in CR4
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -326,11 +326,45 @@ static __always_inline void setup_smap(s
+ }
+ }
+
++/*
++ * These can have bit 63 set, so we can not just use a plain "or"
++ * instruction to get their value or'd into CR3. It would take
++ * another register. So, we use a memory reference to these
++ * instead.
++ *
++ * This is also handy because systems that do not support
++ * PCIDs just end up or'ing a 0 into their CR3, which does
++ * no harm.
++ */
++__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR = 0;
++__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_USER_VAR = 0;
++
+ static void setup_pcid(struct cpuinfo_x86 *c)
+ {
+ if (cpu_has(c, X86_FEATURE_PCID)) {
+ if (cpu_has(c, X86_FEATURE_PGE)) {
+ cr4_set_bits(X86_CR4_PCIDE);
++ /*
++ * These variables are used by the entry/exit
++ * code to change PCIDs.
++ */
++#ifdef CONFIG_KAISER
++ X86_CR3_PCID_KERN_VAR = X86_CR3_PCID_KERN_NOFLUSH;
++ X86_CR3_PCID_USER_VAR = X86_CR3_PCID_USER_NOFLUSH;
++#endif
++ /*
++ * INVPCID has two "groups" of types:
++ * 1/2: Invalidate an individual address
++ * 3/4: Invalidate all contexts
++ *
++ * 1/2 take a PCID, but 3/4 do not. So, 3/4
++ * ignore the PCID argument in the descriptor.
++ * But, we have to be careful not to call 1/2
++ * with an actual non-zero PCID in them before
++ * we do the above cr4_set_bits().
++ */
++ if (cpu_has(c, X86_FEATURE_INVPCID))
++ set_cpu_cap(c, X86_FEATURE_INVPCID_SINGLE);
+ } else {
+ /*
+ * flush_tlb_all(), as currently implemented, won't
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -770,7 +770,8 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, u
+ return 1;
+
+ /* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */
+- if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
++ if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_ASID_MASK) ||
++ !is_long_mode(vcpu))
+ return 1;
+ }
+
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -240,6 +240,8 @@ static void __init kaiser_init_all_pgds(
+ } while (0)
+
+ extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
++extern unsigned long X86_CR3_PCID_KERN_VAR;
++extern unsigned long X86_CR3_PCID_USER_VAR;
+ /*
+ * If anything in here fails, we will likely die on one of the
+ * first kernel->user transitions and init will die. But, we
+@@ -290,6 +292,11 @@ void __init kaiser_init(void)
+ kaiser_add_user_map_early(&debug_idt_table,
+ sizeof(gate_desc) * NR_VECTORS,
+ __PAGE_KERNEL);
++
++ kaiser_add_user_map_early(&X86_CR3_PCID_KERN_VAR, PAGE_SIZE,
++ __PAGE_KERNEL);
++ kaiser_add_user_map_early(&X86_CR3_PCID_USER_VAR, PAGE_SIZE,
++ __PAGE_KERNEL);
+ }
+
+ /* Add a mapping to the shadow mapping, and synchronize the mappings */
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -34,6 +34,46 @@ struct flush_tlb_info {
+ unsigned long flush_end;
+ };
+
++static void load_new_mm_cr3(pgd_t *pgdir)
++{
++ unsigned long new_mm_cr3 = __pa(pgdir);
++
++ /*
++ * KAISER, plus PCIDs needs some extra work here. But,
++ * if either of features is not present, we need no
++ * PCIDs here and just do a normal, full TLB flush with
++ * the write_cr3()
++ */
++ if (!IS_ENABLED(CONFIG_KAISER) ||
++ !cpu_feature_enabled(X86_FEATURE_PCID))
++ goto out_set_cr3;
++ /*
++ * We reuse the same PCID for different tasks, so we must
++ * flush all the entires for the PCID out when we change
++ * tasks.
++ */
++ new_mm_cr3 = X86_CR3_PCID_KERN_FLUSH | __pa(pgdir);
++
++ /*
++ * The flush from load_cr3() may leave old TLB entries
++ * for userspace in place. We must flush that context
++ * separately. We can theoretically delay doing this
++ * until we actually load up the userspace CR3, but
++ * that's a bit tricky. We have to have the "need to
++ * flush userspace PCID" bit per-cpu and check it in the
++ * exit-to-userspace paths.
++ */
++ invpcid_flush_single_context(X86_CR3_PCID_ASID_USER);
++
++out_set_cr3:
++ /*
++ * Caution: many callers of this function expect
++ * that load_cr3() is serializing and orders TLB
++ * fills with respect to the mm_cpumask writes.
++ */
++ write_cr3(new_mm_cr3);
++}
++
+ /*
+ * We cannot call mmdrop() because we are in interrupt context,
+ * instead update mm->cpu_vm_mask.
+@@ -45,7 +85,7 @@ void leave_mm(int cpu)
+ BUG();
+ if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) {
+ cpumask_clear_cpu(cpu, mm_cpumask(active_mm));
+- load_cr3(swapper_pg_dir);
++ load_new_mm_cr3(swapper_pg_dir);
+ /*
+ * This gets called in the idle path where RCU
+ * functions differently. Tracing normally
+@@ -105,7 +145,7 @@ void switch_mm_irqs_off(struct mm_struct
+ * ordering guarantee we need.
+ *
+ */
+- load_cr3(next->pgd);
++ load_new_mm_cr3(next->pgd);
+
+ trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+
+@@ -152,7 +192,7 @@ void switch_mm_irqs_off(struct mm_struct
+ * As above, load_cr3() is serializing and orders TLB
+ * fills with respect to the mm_cpumask write.
+ */
+- load_cr3(next->pgd);
++ load_new_mm_cr3(next->pgd);
+ trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+ load_mm_cr4(next);
+ load_mm_ldt(next);
diff --git a/patches.suse/4.4-40-kaiser-load_new_mm_cr3-let-SWITCH_USER_CR3-flush-use.patch b/patches.suse/4.4-40-kaiser-load_new_mm_cr3-let-SWITCH_USER_CR3-flush-use.patch
new file mode 100644
index 0000000000..1672bbd2d0
--- /dev/null
+++ b/patches.suse/4.4-40-kaiser-load_new_mm_cr3-let-SWITCH_USER_CR3-flush-use.patch
@@ -0,0 +1,394 @@
+From 8863a363f0094837cf053546c8e7f0ffff600472 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Thu, 17 Aug 2017 15:00:37 -0700
+Subject: [PATCH 40/43] kaiser: load_new_mm_cr3() let SWITCH_USER_CR3 flush
+References: bsc#1068032 CVE-2017-5754
+Patch-mainline: Not yet, under development
+ user
+
+We have many machines (Westmere, Sandybridge, Ivybridge) supporting
+PCID but not INVPCID: on these load_new_mm_cr3() simply crashed.
+
+Flushing user context inside load_new_mm_cr3() without the use of
+invpcid is difficult: momentarily switch from kernel to user context
+and back to do so? I'm not sure whether that can be safely done at
+all, and would risk polluting user context with kernel internals,
+and kernel context with stale user externals.
+
+Instead, follow the hint in the comment that was there: change
+X86_CR3_PCID_USER_VAR to be a per-cpu variable, then load_new_mm_cr3()
+can leave a note in it, for SWITCH_USER_CR3 on return to userspace to
+flush user context TLB, instead of default X86_CR3_PCID_USER_NOFLUSH.
+
+Which works well enough that there's no need to do it this way only
+when invpcid is unsupported: it's a good alternative to invpcid here.
+But there's a couple of inlines in asm/tlbflush.h that need to do the
+same trick, so it's best to localize all this per-cpu business in
+mm/kaiser.c: moving that part of the initialization from setup_pcid()
+to kaiser_setup_pcid(); with kaiser_flush_tlb_on_return_to_user() the
+function for noting an X86_CR3_PCID_USER_FLUSH. And let's keep a
+KAISER_SHADOW_PGD_OFFSET in there, to avoid the extra OR on exit.
+
+I did try to make the feature tests in asm/tlbflush.h more consistent
+with each other: there seem to be far too many ways of performing such
+tests, and I don't have a good grasp of their differences. At first
+I converted them all to be static_cpu_has(): but that proved to be a
+mistake, as the comment in __native_flush_tlb_single() hints; so then
+I reversed and made them all this_cpu_has(). Probably all gratuitous
+change, but that's the way it's working at present.
+
+I am slightly bothered by the way non-per-cpu X86_CR3_PCID_KERN_VAR
+gets re-initialized by each cpu (before and after these changes):
+no problem when (as usual) all cpus on a machine have the same
+features, but in principle incorrect. However, my experiment
+to per-cpu-ify that one did not end well...
+
+(cherry picked from Change-Id: I171de6d71072b3d16b8f6078d445395c902da043)
+
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+---
+ arch/x86/include/asm/kaiser.h | 18 +++++++-----
+ arch/x86/include/asm/tlbflush.h | 58 +++++++++++++++++++++++++++-------------
+ arch/x86/kernel/cpu/common.c | 22 ---------------
+ arch/x86/mm/kaiser.c | 50 ++++++++++++++++++++++++++++++----
+ arch/x86/mm/tlb.c | 46 ++++++++++++-------------------
+ 5 files changed, 114 insertions(+), 80 deletions(-)
+
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -32,13 +32,12 @@ movq \reg, %cr3
+ .macro _SWITCH_TO_USER_CR3 reg
+ movq %cr3, \reg
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg
+-/*
+- * This can obviously be one instruction by putting the
+- * KAISER_SHADOW_PGD_OFFSET bit in the X86_CR3_PCID_USER_VAR.
+- * But, just leave it now for simplicity.
+- */
+-orq X86_CR3_PCID_USER_VAR, \reg
+-orq $(KAISER_SHADOW_PGD_OFFSET), \reg
++orq PER_CPU_VAR(X86_CR3_PCID_USER_VAR), \reg
++js 9f
++// FLUSH this time, reset to NOFLUSH for next time
++// But if nopcid? Consider using 0x80 for user pcid?
++movb $(0x80), PER_CPU_VAR(X86_CR3_PCID_USER_VAR+7)
++9:
+ movq \reg, %cr3
+ .endm
+
+@@ -90,6 +89,11 @@ movq PER_CPU_VAR(unsafe_stack_register_b
+ */
+ DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+
++extern unsigned long X86_CR3_PCID_KERN_VAR;
++DECLARE_PER_CPU(unsigned long, X86_CR3_PCID_USER_VAR);
++
++extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
++
+ /**
+ * kaiser_add_mapping - map a virtual memory part to the shadow (user) mapping
+ * @addr: the start address of the range
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -12,6 +12,7 @@ static inline void __invpcid(unsigned lo
+ unsigned long type)
+ {
+ struct { u64 d[2]; } desc = { { pcid, addr } };
++
+ /*
+ * The memory clobber is because the whole point is to invalidate
+ * stale TLB entries and, especially if we're flushing global
+@@ -130,27 +131,42 @@ static inline void cr4_set_bits_and_upda
+ cr4_set_bits(mask);
+ }
+
++/*
++ * Declare a couple of kaiser interfaces here for convenience,
++ * to avoid the need for asm/kaiser.h in unexpected places.
++ */
++#ifdef CONFIG_KAISER
++extern void kaiser_setup_pcid(void);
++extern void kaiser_flush_tlb_on_return_to_user(void);
++#else
++static inline void kaiser_setup_pcid(void)
++{
++}
++static inline void kaiser_flush_tlb_on_return_to_user(void)
++{
++}
++#endif
++
+ static inline void __native_flush_tlb(void)
+ {
+- if (!cpu_feature_enabled(X86_FEATURE_INVPCID)) {
+- /*
+- * If current->mm == NULL then we borrow a mm which may change during a
+- * task switch and therefore we must not be preempted while we write CR3
+- * back:
++ if (this_cpu_has(X86_FEATURE_INVPCID)) {
++ /*
++ * Note, this works with CR4.PCIDE=0 or 1.
+ */
+- preempt_disable();
+- native_write_cr3(native_read_cr3());
+- preempt_enable();
++ invpcid_flush_all_nonglobals();
+ return;
+ }
++
+ /*
+- * We are no longer using globals with KAISER, so a
+- * "nonglobals" flush would work too. But, this is more
+- * conservative.
+- *
+- * Note, this works with CR4.PCIDE=0 or 1.
++ * If current->mm == NULL then we borrow a mm which may change during a
++ * task switch and therefore we must not be preempted while we write CR3
++ * back:
+ */
+- invpcid_flush_all();
++ preempt_disable();
++ if (this_cpu_has(X86_FEATURE_PCID))
++ kaiser_flush_tlb_on_return_to_user();
++ native_write_cr3(native_read_cr3());
++ preempt_enable();
+ }
+
+ static inline void __native_flush_tlb_global_irq_disabled(void)
+@@ -166,9 +182,13 @@ static inline void __native_flush_tlb_gl
+
+ static inline void __native_flush_tlb_global(void)
+ {
++#ifdef CONFIG_KAISER
++ /* Globals are not used at all */
++ __native_flush_tlb();
++#else
+ unsigned long flags;
+
+- if (static_cpu_has(X86_FEATURE_INVPCID)) {
++ if (this_cpu_has(X86_FEATURE_INVPCID)) {
+ /*
+ * Using INVPCID is considerably faster than a pair of writes
+ * to CR4 sandwiched inside an IRQ flag save/restore.
+@@ -185,10 +205,9 @@ static inline void __native_flush_tlb_gl
+ * be called from deep inside debugging code.)
+ */
+ raw_local_irq_save(flags);
+-
+ __native_flush_tlb_global_irq_disabled();
+-
+ raw_local_irq_restore(flags);
++#endif
+ }
+
+ static inline void __native_flush_tlb_single(unsigned long addr)
+@@ -199,9 +218,12 @@ static inline void __native_flush_tlb_si
+ *
+ * The ASIDs used below are hard-coded. But, we must not
+ * call invpcid(type=1/2) before CR4.PCIDE=1. Just call
+- * invpcid in the case we are called early.
++ * invlpg in the case we are called early.
+ */
++
+ if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) {
++ if (this_cpu_has(X86_FEATURE_PCID))
++ kaiser_flush_tlb_on_return_to_user();
+ asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
+ return;
+ }
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -326,33 +326,12 @@ static __always_inline void setup_smap(s
+ }
+ }
+
+-/*
+- * These can have bit 63 set, so we can not just use a plain "or"
+- * instruction to get their value or'd into CR3. It would take
+- * another register. So, we use a memory reference to these
+- * instead.
+- *
+- * This is also handy because systems that do not support
+- * PCIDs just end up or'ing a 0 into their CR3, which does
+- * no harm.
+- */
+-__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR = 0;
+-__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_USER_VAR = 0;
+-
+ static void setup_pcid(struct cpuinfo_x86 *c)
+ {
+ if (cpu_has(c, X86_FEATURE_PCID)) {
+ if (cpu_has(c, X86_FEATURE_PGE)) {
+ cr4_set_bits(X86_CR4_PCIDE);
+ /*
+- * These variables are used by the entry/exit
+- * code to change PCIDs.
+- */
+-#ifdef CONFIG_KAISER
+- X86_CR3_PCID_KERN_VAR = X86_CR3_PCID_KERN_NOFLUSH;
+- X86_CR3_PCID_USER_VAR = X86_CR3_PCID_USER_NOFLUSH;
+-#endif
+- /*
+ * INVPCID has two "groups" of types:
+ * 1/2: Invalidate an individual address
+ * 3/4: Invalidate all contexts
+@@ -377,6 +356,7 @@ static void setup_pcid(struct cpuinfo_x8
+ clear_cpu_cap(c, X86_FEATURE_PCID);
+ }
+ }
++ kaiser_setup_pcid();
+ }
+
+ /*
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -12,12 +12,26 @@
+ #include <linux/ftrace.h>
+
+ #include <asm/kaiser.h>
++#include <asm/tlbflush.h> /* to verify its kaiser declarations */
+ #include <asm/pgtable.h>
+ #include <asm/pgalloc.h>
+ #include <asm/desc.h>
++
+ #ifdef CONFIG_KAISER
++__visible
++DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
++
++/*
++ * These can have bit 63 set, so we can not just use a plain "or"
++ * instruction to get their value or'd into CR3. It would take
++ * another register. So, we use a memory reference to these instead.
++ *
++ * This is also handy because systems that do not support PCIDs
++ * just end up or'ing a 0 into their CR3, which does no harm.
++ */
++__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR;
++DEFINE_PER_CPU(unsigned long, X86_CR3_PCID_USER_VAR);
+
+-__visible DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+ /*
+ * At runtime, the only things we map are some things for CPU
+ * hotplug, and stacks for new processes. No two CPUs will ever
+@@ -239,9 +253,6 @@ static void __init kaiser_init_all_pgds(
+ WARN_ON(__ret); \
+ } while (0)
+
+-extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
+-extern unsigned long X86_CR3_PCID_KERN_VAR;
+-extern unsigned long X86_CR3_PCID_USER_VAR;
+ /*
+ * If anything in here fails, we will likely die on one of the
+ * first kernel->user transitions and init will die. But, we
+@@ -295,8 +306,6 @@ void __init kaiser_init(void)
+
+ kaiser_add_user_map_early(&X86_CR3_PCID_KERN_VAR, PAGE_SIZE,
+ __PAGE_KERNEL);
+- kaiser_add_user_map_early(&X86_CR3_PCID_USER_VAR, PAGE_SIZE,
+- __PAGE_KERNEL);
+ }
+
+ /* Add a mapping to the shadow mapping, and synchronize the mappings */
+@@ -361,4 +370,33 @@ pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp,
+ }
+ return pgd;
+ }
++
++void kaiser_setup_pcid(void)
++{
++ unsigned long kern_cr3 = 0;
++ unsigned long user_cr3 = KAISER_SHADOW_PGD_OFFSET;
++
++ if (this_cpu_has(X86_FEATURE_PCID)) {
++ kern_cr3 |= X86_CR3_PCID_KERN_NOFLUSH;
++ user_cr3 |= X86_CR3_PCID_USER_NOFLUSH;
++ }
++ /*
++ * These variables are used by the entry/exit
++ * code to change PCID and pgd and TLB flushing.
++ */
++ X86_CR3_PCID_KERN_VAR = kern_cr3;
++ this_cpu_write(X86_CR3_PCID_USER_VAR, user_cr3);
++}
++
++/*
++ * Make a note that this cpu will need to flush USER tlb on return to user.
++ * Caller checks whether this_cpu_has(X86_FEATURE_PCID) before calling:
++ * if cpu does not, then the NOFLUSH bit will never have been set.
++ */
++void kaiser_flush_tlb_on_return_to_user(void)
++{
++ this_cpu_write(X86_CR3_PCID_USER_VAR,
++ X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET);
++}
++EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user);
+ #endif /* CONFIG_KAISER */
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -6,13 +6,14 @@
+ #include <linux/interrupt.h>
+ #include <linux/module.h>
+ #include <linux/cpu.h>
++#include <linux/debugfs.h>
+
+ #include <asm/tlbflush.h>
+ #include <asm/mmu_context.h>
+ #include <asm/cache.h>
+ #include <asm/apic.h>
+ #include <asm/uv/uv.h>
+-#include <linux/debugfs.h>
++#include <asm/kaiser.h>
+
+ /*
+ * TLB flushing, formerly SMP-only
+@@ -38,34 +39,23 @@ static void load_new_mm_cr3(pgd_t *pgdir
+ {
+ unsigned long new_mm_cr3 = __pa(pgdir);
+
+- /*
+- * KAISER, plus PCIDs needs some extra work here. But,
+- * if either of features is not present, we need no
+- * PCIDs here and just do a normal, full TLB flush with
+- * the write_cr3()
+- */
+- if (!IS_ENABLED(CONFIG_KAISER) ||
+- !cpu_feature_enabled(X86_FEATURE_PCID))
+- goto out_set_cr3;
+- /*
+- * We reuse the same PCID for different tasks, so we must
+- * flush all the entires for the PCID out when we change
+- * tasks.
+- */
+- new_mm_cr3 = X86_CR3_PCID_KERN_FLUSH | __pa(pgdir);
+-
+- /*
+- * The flush from load_cr3() may leave old TLB entries
+- * for userspace in place. We must flush that context
+- * separately. We can theoretically delay doing this
+- * until we actually load up the userspace CR3, but
+- * that's a bit tricky. We have to have the "need to
+- * flush userspace PCID" bit per-cpu and check it in the
+- * exit-to-userspace paths.
+- */
+- invpcid_flush_single_context(X86_CR3_PCID_ASID_USER);
++#ifdef CONFIG_KAISER
++ if (this_cpu_has(X86_FEATURE_PCID)) {
++ /*
++ * We reuse the same PCID for different tasks, so we must
++ * flush all the entries for the PCID out when we change tasks.
++ * Flush KERN below, flush USER when returning to userspace in
++ * kaiser's SWITCH_USER_CR3 (_SWITCH_TO_USER_CR3) macro.
++ *
++ * invpcid_flush_single_context(X86_CR3_PCID_ASID_USER) could
++ * do it here, but can only be used if X86_FEATURE_INVPCID is
++ * available - and many machines support pcid without invpcid.
++ */
++ new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH;
++ kaiser_flush_tlb_on_return_to_user();
++ }
++#endif /* CONFIG_KAISER */
+
+-out_set_cr3:
+ /*
+ * Caution: many callers of this function expect
+ * that load_cr3() is serializing and orders TLB
diff --git a/patches.suse/4.4-41-kaiser-PCID-0-for-kernel-and-128-for-user.patch b/patches.suse/4.4-41-kaiser-PCID-0-for-kernel-and-128-for-user.patch
new file mode 100644
index 0000000000..8f85977a6b
--- /dev/null
+++ b/patches.suse/4.4-41-kaiser-PCID-0-for-kernel-and-128-for-user.patch
@@ -0,0 +1,129 @@
+From 8ce4616f97b08e166f2028db38341ffde461e15b Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Fri, 8 Sep 2017 19:26:30 -0700
+Subject: [PATCH 41/43] kaiser: PCID 0 for kernel and 128 for user
+References: bsc#1068032 CVE-2017-5754
+Patch-mainline: Not yet, under development
+
+Why was 4 chosen for kernel PCID and 6 for user PCID?
+No good reason in a backport where PCIDs are only used for Kaiser.
+
+If we continue with those, then we shall need to add Andy Lutomirski's
+4.13 commit 6c690ee1039b ("x86/mm: Split read_cr3() into read_cr3_pa()
+and __read_cr3()"), which deals with the problem of read_cr3() callers
+finding stray bits in the cr3 that they expected to be page-aligned;
+and for hibernation, his 4.14 commit f34902c5c6c0 ("x86/hibernate/64:
+Mask off CR3's PCID bits in the saved CR3").
+
+But if 0 is used for kernel PCID, then there's no need to add in those
+commits - whenever the kernel looks, it sees 0 in the lower bits; and
+0 for kernel seems an obvious choice.
+
+And I naughtily propose 128 for user PCID. Because there's a place
+in _SWITCH_TO_USER_CR3 where it takes note of the need for TLB FLUSH,
+but needs to reset that to NOFLUSH for the next occasion. Currently
+it does so with a "movb $(0x80)" into the high byte of the per-cpu
+quadword, but that will cause a machine without PCID support to crash.
+Now, if %al just happened to have 0x80 in it at that point, on a
+machine with PCID support, but 0 on a machine without PCID support...
+
+(That will go badly wrong once the pgd can be at a physical address
+above 2^56, but even with 5-level paging, physical goes up to 2^52.)
+
+(cherry picked from Change-Id: I17fccf9c0624d3db717cb5d1f949c1320ed1b09e)
+
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+---
+ arch/x86/include/asm/kaiser.h | 19 ++++++++++++-------
+ arch/x86/include/asm/pgtable_types.h | 7 ++++---
+ arch/x86/mm/tlb.c | 3 +++
+ 3 files changed, 19 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -29,14 +29,19 @@ orq X86_CR3_PCID_KERN_VAR, \reg
+ movq \reg, %cr3
+ .endm
+
+-.macro _SWITCH_TO_USER_CR3 reg
++.macro _SWITCH_TO_USER_CR3 reg regb
++/*
++ * regb must be the low byte portion of reg: because we have arranged
++ * for the low byte of the user PCID to serve as the high byte of NOFLUSH
++ * (0x80 for each when PCID is enabled, or 0x00 when PCID and NOFLUSH are
++ * not enabled): so that the one register can update both memory and cr3.
++ */
+ movq %cr3, \reg
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg
+ orq PER_CPU_VAR(X86_CR3_PCID_USER_VAR), \reg
+ js 9f
+-// FLUSH this time, reset to NOFLUSH for next time
+-// But if nopcid? Consider using 0x80 for user pcid?
+-movb $(0x80), PER_CPU_VAR(X86_CR3_PCID_USER_VAR+7)
++/* FLUSH this time, reset to NOFLUSH for next time (if PCID enabled) */
++movb \regb, PER_CPU_VAR(X86_CR3_PCID_USER_VAR+7)
+ 9:
+ movq \reg, %cr3
+ .endm
+@@ -49,7 +54,7 @@ popq %rax
+
+ .macro SWITCH_USER_CR3
+ pushq %rax
+-_SWITCH_TO_USER_CR3 %rax
++_SWITCH_TO_USER_CR3 %rax %al
+ popq %rax
+ .endm
+
+@@ -61,7 +66,7 @@ movq PER_CPU_VAR(unsafe_stack_register_b
+
+ .macro SWITCH_USER_CR3_NO_STACK
+ movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)
+-_SWITCH_TO_USER_CR3 %rax
++_SWITCH_TO_USER_CR3 %rax %al
+ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
+ .endm
+
+@@ -69,7 +74,7 @@ movq PER_CPU_VAR(unsafe_stack_register_b
+
+ .macro SWITCH_KERNEL_CR3 reg
+ .endm
+-.macro SWITCH_USER_CR3 reg
++.macro SWITCH_USER_CR3 reg regb
+ .endm
+ .macro SWITCH_USER_CR3_NO_STACK
+ .endm
+--- a/arch/x86/include/asm/pgtable_types.h
++++ b/arch/x86/include/asm/pgtable_types.h
+@@ -147,16 +147,17 @@
+
+ /* Mask for all the PCID-related bits in CR3: */
+ #define X86_CR3_PCID_MASK (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_MASK)
++#define X86_CR3_PCID_ASID_KERN (_AC(0x0,UL))
++
+ #if defined(CONFIG_KAISER) && defined(CONFIG_X86_64)
+-#define X86_CR3_PCID_ASID_KERN (_AC(0x4,UL))
+-#define X86_CR3_PCID_ASID_USER (_AC(0x6,UL))
++/* Let X86_CR3_PCID_ASID_USER be usable for the X86_CR3_PCID_NOFLUSH bit */
++#define X86_CR3_PCID_ASID_USER (_AC(0x80,UL))
+
+ #define X86_CR3_PCID_KERN_FLUSH (X86_CR3_PCID_ASID_KERN)
+ #define X86_CR3_PCID_USER_FLUSH (X86_CR3_PCID_ASID_USER)
+ #define X86_CR3_PCID_KERN_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_KERN)
+ #define X86_CR3_PCID_USER_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_USER)
+ #else
+-#define X86_CR3_PCID_ASID_KERN (_AC(0x0,UL))
+ #define X86_CR3_PCID_ASID_USER (_AC(0x0,UL))
+ /*
+ * PCIDs are unsupported on 32-bit and none of these bits can be
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -50,6 +50,9 @@ static void load_new_mm_cr3(pgd_t *pgdir
+ * invpcid_flush_single_context(X86_CR3_PCID_ASID_USER) could
+ * do it here, but can only be used if X86_FEATURE_INVPCID is
+ * available - and many machines support pcid without invpcid.
++ *
++ * The line below is a no-op: X86_CR3_PCID_KERN_FLUSH is now 0;
++ * but keep that line in there in case something changes.
+ */
+ new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH;
+ kaiser_flush_tlb_on_return_to_user();
diff --git a/patches.suse/4.4-42-kaiser-x86_cr3_pcid_noflush-and-x86_cr3_pcid_user.patch b/patches.suse/4.4-42-kaiser-x86_cr3_pcid_noflush-and-x86_cr3_pcid_user.patch
new file mode 100644
index 0000000000..44cb360ca3
--- /dev/null
+++ b/patches.suse/4.4-42-kaiser-x86_cr3_pcid_noflush-and-x86_cr3_pcid_user.patch
@@ -0,0 +1,141 @@
+From 648e3526304072e09657cabaf9bb3c8a1130bbb1 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Sun, 27 Aug 2017 16:24:27 -0700
+Subject: [PATCH 42/43] kaiser: x86_cr3_pcid_noflush and x86_cr3_pcid_user
+References: bsc#1068032 CVE-2017-5754
+Patch-mainline: Not yet, under development
+
+Mostly this commit is just unshouting X86_CR3_PCID_KERN_VAR and
+X86_CR3_PCID_USER_VAR: we usually name variables in lower-case.
+
+But why does x86_cr3_pcid_noflush need to be __aligned(PAGE_SIZE)?
+Ah, it's a leftover from when kaiser_add_user_map() once complained
+about mapping the same page twice. Make it __read_mostly instead.
+(I'm a little uneasy about all the unrelated data which shares its
+page getting user-mapped too, but that was so before, and not a big
+deal: though we call it user-mapped, it's not mapped with _PAGE_USER.)
+
+And there is a little change around the two calls to do_nmi().
+Previously they set the NOFLUSH bit (if PCID supported) when
+forcing to kernel context before do_nmi(); now they also have the
+NOFLUSH bit set (if PCID supported) when restoring context after:
+nothing done in do_nmi() should require a TLB to be flushed here.
+
+(cherry picked from Change-Id: If15c617e9ec86a7c06fc965e78911a2e10f53d16)
+
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+---
+ arch/x86/entry/entry_64.S | 8 ++++----
+ arch/x86/include/asm/kaiser.h | 11 +++++------
+ arch/x86/mm/kaiser.c | 13 +++++++------
+ 3 files changed, 16 insertions(+), 16 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1318,11 +1318,11 @@ ENTRY(nmi)
+ /* Unconditionally use kernel CR3 for do_nmi() */
+ /* %rax is saved above, so OK to clobber here */
+ movq %cr3, %rax
++ /* If PCID enabled, NOFLUSH now and NOFLUSH on return */
++ orq x86_cr3_pcid_noflush, %rax
+ pushq %rax
+ /* mask off "user" bit of pgd address and 12 PCID bits: */
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+- /* Add back kernel PCID and "no flush" bit */
+- orq X86_CR3_PCID_KERN_VAR, %rax
+ movq %rax, %cr3
+ #endif
+ call do_nmi
+@@ -1562,11 +1562,11 @@ end_repeat_nmi:
+ /* Unconditionally use kernel CR3 for do_nmi() */
+ /* %rax is saved above, so OK to clobber here */
+ movq %cr3, %rax
++ /* If PCID enabled, NOFLUSH now and NOFLUSH on return */
++ orq x86_cr3_pcid_noflush, %rax
+ pushq %rax
+ /* mask off "user" bit of pgd address and 12 PCID bits: */
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+- /* Add back kernel PCID and "no flush" bit */
+- orq X86_CR3_PCID_KERN_VAR, %rax
+ movq %rax, %cr3
+ #endif
+
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -25,7 +25,7 @@
+ .macro _SWITCH_TO_KERNEL_CR3 reg
+ movq %cr3, \reg
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg
+-orq X86_CR3_PCID_KERN_VAR, \reg
++orq x86_cr3_pcid_noflush, \reg
+ movq \reg, %cr3
+ .endm
+
+@@ -37,11 +37,10 @@ movq \reg, %cr3
+ * not enabled): so that the one register can update both memory and cr3.
+ */
+ movq %cr3, \reg
+-andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg
+-orq PER_CPU_VAR(X86_CR3_PCID_USER_VAR), \reg
++orq PER_CPU_VAR(x86_cr3_pcid_user), \reg
+ js 9f
+ /* FLUSH this time, reset to NOFLUSH for next time (if PCID enabled) */
+-movb \regb, PER_CPU_VAR(X86_CR3_PCID_USER_VAR+7)
++movb \regb, PER_CPU_VAR(x86_cr3_pcid_user+7)
+ 9:
+ movq \reg, %cr3
+ .endm
+@@ -94,8 +93,8 @@ movq PER_CPU_VAR(unsafe_stack_register_b
+ */
+ DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+
+-extern unsigned long X86_CR3_PCID_KERN_VAR;
+-DECLARE_PER_CPU(unsigned long, X86_CR3_PCID_USER_VAR);
++extern unsigned long x86_cr3_pcid_noflush;
++DECLARE_PER_CPU(unsigned long, x86_cr3_pcid_user);
+
+ extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
+
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -29,8 +29,8 @@ DEFINE_PER_CPU_USER_MAPPED(unsigned long
+ * This is also handy because systems that do not support PCIDs
+ * just end up or'ing a 0 into their CR3, which does no harm.
+ */
+-__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR;
+-DEFINE_PER_CPU(unsigned long, X86_CR3_PCID_USER_VAR);
++unsigned long x86_cr3_pcid_noflush __read_mostly;
++DEFINE_PER_CPU(unsigned long, x86_cr3_pcid_user);
+
+ /*
+ * At runtime, the only things we map are some things for CPU
+@@ -304,7 +304,8 @@ void __init kaiser_init(void)
+ sizeof(gate_desc) * NR_VECTORS,
+ __PAGE_KERNEL);
+
+- kaiser_add_user_map_early(&X86_CR3_PCID_KERN_VAR, PAGE_SIZE,
++ kaiser_add_user_map_early(&x86_cr3_pcid_noflush,
++ sizeof(x86_cr3_pcid_noflush),
+ __PAGE_KERNEL);
+ }
+
+@@ -384,8 +385,8 @@ void kaiser_setup_pcid(void)
+ * These variables are used by the entry/exit
+ * code to change PCID and pgd and TLB flushing.
+ */
+- X86_CR3_PCID_KERN_VAR = kern_cr3;
+- this_cpu_write(X86_CR3_PCID_USER_VAR, user_cr3);
++ x86_cr3_pcid_noflush = kern_cr3;
++ this_cpu_write(x86_cr3_pcid_user, user_cr3);
+ }
+
+ /*
+@@ -395,7 +396,7 @@ void kaiser_setup_pcid(void)
+ */
+ void kaiser_flush_tlb_on_return_to_user(void)
+ {
+- this_cpu_write(X86_CR3_PCID_USER_VAR,
++ this_cpu_write(x86_cr3_pcid_user,
+ X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET);
+ }
+ EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user);
diff --git a/patches.suse/4.4-43-kaiser-paranoid_entry-pass-cr3-need-to-paranoid_exit.patch b/patches.suse/4.4-43-kaiser-paranoid_entry-pass-cr3-need-to-paranoid_exit.patch
new file mode 100644
index 0000000000..e160776965
--- /dev/null
+++ b/patches.suse/4.4-43-kaiser-paranoid_entry-pass-cr3-need-to-paranoid_exit.patch
@@ -0,0 +1,154 @@
+From 4b45a1e04f03e97a33b540974ed9e1a35f9728db Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Tue, 26 Sep 2017 18:43:07 -0700
+Subject: [PATCH 43/43] kaiser: paranoid_entry pass cr3 need to paranoid_exit
+References: bsc#1068032 CVE-2017-5754
+Patch-mainline: Not yet, under development
+
+Neel Natu points out that paranoid_entry() was wrong to assume that
+an entry that did not need swapgs would not need SWITCH_KERNEL_CR3:
+paranoid_entry (used for debug breakpoint, int3, double fault or MCE;
+though I think it's only the MCE case that is cause for concern here)
+can break in at an awkward time, between cr3 switch and swapgs, but
+its handling always needs kernel gs and kernel cr3.
+
+Easy to fix in itself, but paranoid_entry() also needs to convey to
+paranoid_exit() (and my reading of macro idtentry says paranoid_entry
+and paranoid_exit are always paired) how to restore the prior state.
+The swapgs state is already conveyed by %ebx (0 or 1), so extend that
+also to convey when SWITCH_USER_CR3 will be needed (2 or 3).
+
+(Yes, I'd much prefer that 0 meant no swapgs, whereas it's the other
+way round: and a convention shared with error_entry() and error_exit(),
+which I don't want to touch. Perhaps I should have inverted the bit
+for switch cr3 too, but did not.)
+
+paranoid_exit() would be straightforward, except for TRACE_IRQS: it
+did TRACE_IRQS_IRETQ when doing swapgs, but TRACE_IRQS_IRETQ_DEBUG
+when not: which is it supposed to use when SWITCH_USER_CR3 is split
+apart from that? As best as I can determine, commit 5963e317b1e9
+("ftrace/x86: Do not change stacks in DEBUG when calling lockdep")
+missed the swapgs case, and should have used TRACE_IRQS_IRETQ_DEBUG
+there too (the discrepancy has nothing to do with the liberal use
+of _NO_STACK and _UNSAFE_STACK hereabouts: TRACE_IRQS_OFF_DEBUG has
+just been used in all cases); discrepancy lovingly preserved across
+several paranoid_exit() cleanups, but I'm now removing it.
+
+Neel further indicates that to use SWITCH_USER_CR3_NO_STACK there in
+paranoid_exit() is now not only unnecessary but unsafe: might corrupt
+syscall entry's unsafe_stack_register_backup of %rax. Just use
+SWITCH_USER_CR3: and delete SWITCH_USER_CR3_NO_STACK altogether,
+before we make the mistake of using it again.
+
+(cherry picked from Change-Id: I3ca8aa9c7bc1f8f72d49898b7b0ed43d5d46d506)
+
+hughd adds: this commit fixes an issue in the Kaiser-without-PCIDs
+part of the series, and ought to be moved earlier, if you decided
+to make a release of Kaiser-without-PCIDs.
+
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+---
+ arch/x86/entry/entry_64.S | 46 ++++++++++++++++++++++++++++++++----------
+ arch/x86/include/asm/kaiser.h | 8 -------
+ 2 files changed, 36 insertions(+), 18 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1055,7 +1055,11 @@ idtentry machine_check has_error_cod
+ /*
+ * Save all registers in pt_regs, and switch gs if needed.
+ * Use slow, but surefire "are we in kernel?" check.
+- * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
++ *
++ * Return: ebx=0: needs swapgs but not SWITCH_USER_CR3 in paranoid_exit
++ * ebx=1: needs neither swapgs nor SWITCH_USER_CR3 in paranoid_exit
++ * ebx=2: needs both swapgs and SWITCH_USER_CR3 in paranoid_exit
++ * ebx=3: needs SWITCH_USER_CR3 but not swapgs in paranoid_exit
+ */
+ ENTRY(paranoid_entry)
+ cld
+@@ -1067,9 +1071,26 @@ ENTRY(paranoid_entry)
+ testl %edx, %edx
+ js 1f /* negative -> in kernel */
+ SWAPGS
+- SWITCH_KERNEL_CR3
+ xorl %ebx, %ebx
+-1: ret
++1:
++#ifdef CONFIG_KAISER
++ /*
++ * We might have come in between a swapgs and a SWITCH_KERNEL_CR3
++ * on entry, or between a SWITCH_USER_CR3 and a swapgs on exit.
++ * Do a conditional SWITCH_KERNEL_CR3: this could safely be done
++ * unconditionally, but we need to find out whether the reverse
++ * should be done on return (conveyed to paranoid_exit in %ebx).
++ */
++ movq %cr3, %rax
++ testl $KAISER_SHADOW_PGD_OFFSET, %eax
++ jz 2f
++ orl $2, %ebx
++ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
++ orq x86_cr3_pcid_noflush, %rax
++ movq %rax, %cr3
++2:
++#endif
++ ret
+ END(paranoid_entry)
+
+ /*
+@@ -1082,20 +1103,25 @@ END(paranoid_entry)
+ * be complicated. Fortunately, we there's no good reason
+ * to try to handle preemption here.
+ *
+- * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it)
++ * On entry: ebx=0: needs swapgs but not SWITCH_USER_CR3
++ * ebx=1: needs neither swapgs nor SWITCH_USER_CR3
++ * ebx=2: needs both swapgs and SWITCH_USER_CR3
++ * ebx=3: needs SWITCH_USER_CR3 but not swapgs
+ */
+ ENTRY(paranoid_exit)
+ DISABLE_INTERRUPTS(CLBR_NONE)
+ TRACE_IRQS_OFF_DEBUG
+- testl %ebx, %ebx /* swapgs needed? */
++ TRACE_IRQS_IRETQ_DEBUG
++#ifdef CONFIG_KAISER
++ testl $2, %ebx /* SWITCH_USER_CR3 needed? */
++ jz paranoid_exit_no_switch
++ SWITCH_USER_CR3
++paranoid_exit_no_switch:
++#endif
++ testl $1, %ebx /* swapgs needed? */
+ jnz paranoid_exit_no_swapgs
+- TRACE_IRQS_IRETQ
+- SWITCH_USER_CR3_NO_STACK
+ SWAPGS_UNSAFE_STACK
+- jmp paranoid_exit_restore
+ paranoid_exit_no_swapgs:
+- TRACE_IRQS_IRETQ_DEBUG
+-paranoid_exit_restore:
+ RESTORE_EXTRA_REGS
+ RESTORE_C_REGS
+ REMOVE_PT_GPREGS_FROM_STACK 8
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -63,20 +63,12 @@ _SWITCH_TO_KERNEL_CR3 %rax
+ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
+ .endm
+
+-.macro SWITCH_USER_CR3_NO_STACK
+-movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)
+-_SWITCH_TO_USER_CR3 %rax %al
+-movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
+-.endm
+-
+ #else /* CONFIG_KAISER */
+
+ .macro SWITCH_KERNEL_CR3 reg
+ .endm
+ .macro SWITCH_USER_CR3 reg regb
+ .endm
+-.macro SWITCH_USER_CR3_NO_STACK
+-.endm
+ .macro SWITCH_KERNEL_CR3_NO_STACK
+ .endm
+
diff --git a/patches.suse/4.4-44-kaiser-_pgd_alloc-without-__GFP_REPEAT-to-avoid-stal.patch b/patches.suse/4.4-44-kaiser-_pgd_alloc-without-__GFP_REPEAT-to-avoid-stal.patch
new file mode 100644
index 0000000000..6e4bb221d4
--- /dev/null
+++ b/patches.suse/4.4-44-kaiser-_pgd_alloc-without-__GFP_REPEAT-to-avoid-stal.patch
@@ -0,0 +1,68 @@
+From 8a44cdcb9c71998d7b74cc5d523810bdd503b161 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Fri, 13 Oct 2017 12:10:00 -0700
+Subject: [PATCH 44/43] kaiser: _pgd_alloc() without __GFP_REPEAT to avoid stalls
+References: bsc#1068032 CVE-2017-5754
+Patch-mainline: Not yet, under development
+
+Synthetic filesystem mempressure testing has shown softlockups, with
+hour-long page allocation stalls, and pgd_alloc() trying for order:1
+with __GFP_REPEAT in one of the backtraces each time.
+
+That's _pgd_alloc() going for a Kaiser double-pgd, using the __GFP_REPEAT
+common to all page table allocations, but actually having no effect on
+order:0 (see should_alloc_oom() and should_continue_reclaim() in this
+tree, but beware that ports to another tree might behave differently).
+
+Order:1 stack allocation has been working satisfactorily without
+__GFP_REPEAT forever, and page table allocation only asks __GFP_REPEAT
+for awkward occasions in a long-running process: it's not appropriate
+at fork or exec time, and seems to be doing much more harm than good:
+getting those contiguous pages under very heavy mempressure can be
+hard (though even without it, Kaiser does generate more mempressure).
+
+Mask out that __GFP_REPEAT inside _pgd_alloc(). Why not take it out
+of the PGALLOG_GFP altogether, as v4.7 commit a3a9a59d2067 ("x86: get
+rid of superfluous __GFP_REPEAT") did? Because I think that might
+make a difference to our page table memcg charging, which I'd prefer
+not to interfere with at this time.
+
+(cherry-picked from Change-Id: If4b36ae5fc02bf4e0d0a5fde6a77de3cb8203769)
+
+hughd adds: __alloc_pages_slowpath() in the 4.4.89-stable tree handles
+__GFP_REPEAT a little differently than in prod kernel or 3.18.72-stable,
+so it may not always be exactly a no-op on order:0 pages, as said above;
+but I think still appropriate to omit it from Kaiser or non-Kaiser pgd.
+
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+---
+ arch/x86/mm/pgtable.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
+index e2bd5c81279e..d0a424988f82 100644
+--- a/arch/x86/mm/pgtable.c
++++ b/arch/x86/mm/pgtable.c
+@@ -6,7 +6,7 @@
+ #include <asm/fixmap.h>
+ #include <asm/mtrr.h>
+
+-#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO
++#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO)
+
+ #ifdef CONFIG_HIGHPTE
+ #define PGALLOC_USER_GFP __GFP_HIGHMEM
+@@ -354,7 +354,9 @@ static inline void _pgd_free(pgd_t *pgd)
+
+ static inline pgd_t *_pgd_alloc(void)
+ {
+- return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
++ /* No __GFP_REPEAT: to avoid page allocation stalls in order-1 case */
++ return (pgd_t *)__get_free_pages(PGALLOC_GFP & ~__GFP_REPEAT,
++ PGD_ALLOCATION_ORDER);
+ }
+
+ static inline void _pgd_free(pgd_t *pgd)
+--
+2.15.0.531.g2ccb3012c9-goog
+
diff --git a/patches.suse/4.4-45-kaiser-fix-unlikely-error-in-alloc_ldt_struct.patch b/patches.suse/4.4-45-kaiser-fix-unlikely-error-in-alloc_ldt_struct.patch
new file mode 100644
index 0000000000..e43ba9ec5c
--- /dev/null
+++ b/patches.suse/4.4-45-kaiser-fix-unlikely-error-in-alloc_ldt_struct.patch
@@ -0,0 +1,38 @@
+From de654f04423e9799b548b3f04e97fdc8ef5ce199 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Mon, 4 Dec 2017 20:13:35 -0800
+Subject: [PATCH 45/43] kaiser: fix unlikely error in alloc_ldt_struct()
+References: bsc#1068032 CVE-2017-5754
+Patch-mainline: Not yet, under development
+
+An error from kaiser_add_mapping() here is not at all likely, but
+Eric Biggers rightly points out that __free_ldt_struct() relies on
+new_ldt->size being initialized: move that up.
+
+(cherry picked from Change-Id: I0a30c9c52e95115e6dced8982b0bb279ab820fec)
+
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+---
+ arch/x86/kernel/ldt.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
+index b7e041dd46fa..7bfd9ae0d228 100644
+--- a/arch/x86/kernel/ldt.c
++++ b/arch/x86/kernel/ldt.c
+@@ -78,11 +78,11 @@ static struct ldt_struct *alloc_ldt_struct(int size)
+
+ ret = kaiser_add_mapping((unsigned long)new_ldt->entries, alloc_size,
+ __PAGE_KERNEL);
++ new_ldt->size = size;
+ if (ret) {
+ __free_ldt_struct(new_ldt);
+ return NULL;
+ }
+- new_ldt->size = size;
+ return new_ldt;
+ }
+
+--
+2.15.0.531.g2ccb3012c9-goog
+
diff --git a/patches.suse/4.4-46-x86-mm-64-fix-reboot-interaction-with-cr4-pcide.patch b/patches.suse/4.4-46-x86-mm-64-fix-reboot-interaction-with-cr4-pcide.patch
new file mode 100644
index 0000000000..04db44ee29
--- /dev/null
+++ b/patches.suse/4.4-46-x86-mm-64-fix-reboot-interaction-with-cr4-pcide.patch
@@ -0,0 +1,41 @@
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sun, 8 Oct 2017 21:53:05 -0700
+Subject: x86/mm/64: Fix reboot interaction with CR4.PCIDE
+References: bsc#1068032 CVE-2017-5754
+Git-commit: 924c6b900cfdf376b07bccfd80e62b21914f8a5a
+Patch-mainline: v4.14-rc5
+References: bsc#1068032
+
+Trying to reboot via real mode fails with PCID on: long mode cannot
+be exited while CR4.PCIDE is set. (No, I have no idea why, but the
+SDM and actual CPUs are in agreement here.) The result is a GPF and
+a hang instead of a reboot.
+
+I didn't catch this in testing because neither my computer nor my VM
+reboots this way. I can trigger it with reboot=bios, though.
+
+Fixes: 660da7c9228f ("x86/mm: Enable CR4.PCIDE on supported systems")
+Reported-and-tested-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Borislav Petkov <bp@alien8.de>
+Link: https://lkml.kernel.org/r/f1e7d965998018450a7a70c2823873686a8b21c0.1507524746.git.luto@kernel.org
+
+Acked-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/kernel/reboot.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/x86/kernel/reboot.c
++++ b/arch/x86/kernel/reboot.c
+@@ -93,6 +93,10 @@ void __noreturn machine_real_restart(uns
+ load_cr3(initial_page_table);
+ #else
+ write_cr3(real_mode_header->trampoline_pgd);
++
++ /* Exiting long mode will fail if CR4.PCIDE is set. */
++ if (static_cpu_has(X86_FEATURE_PCID))
++ cr4_clear_bits(X86_CR4_PCIDE);
+ #endif
+
+ /* Jump to the identity-mapped low memory code */
diff --git a/patches.suse/4.4-47-kaiser-add-nokaiser-boot-option-using-ALTERNATIVE.patch b/patches.suse/4.4-47-kaiser-add-nokaiser-boot-option-using-ALTERNATIVE.patch
new file mode 100644
index 0000000000..40c183eb75
--- /dev/null
+++ b/patches.suse/4.4-47-kaiser-add-nokaiser-boot-option-using-ALTERNATIVE.patch
@@ -0,0 +1,639 @@
+From ef0f8509845be925b4293e5ec0ce4aade928ea00 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Sun, 24 Sep 2017 16:59:49 -0700
+Subject: [PATCH 02/14] kaiser: add "nokaiser" boot option, using ALTERNATIVE
+References: bsc#1068032 CVE-2017-5754
+Patch-mainline: Not yet, under development
+
+Added "nokaiser" boot option: an early param like "noinvpcid".
+Most places now check int kaiser_enabled (#defined 0 when not
+CONFIG_KAISER) instead of #ifdef CONFIG_KAISER; but entry_64.S
+and entry_64_compat.S are using the ALTERNATIVE technique, which
+patches in the preferred instructions at runtime. That technique
+is tied to x86 cpu features, so X86_FEATURE_KAISER fabricated
+("" in its comment so "kaiser" not magicked into /proc/cpuinfo).
+
+Prior to "nokaiser", Kaiser #defined _PAGE_GLOBAL 0: revert that,
+but be careful with both _PAGE_GLOBAL and CR4.PGE: setting them when
+nokaiser like when !CONFIG_KAISER, but not setting either when kaiser -
+neither matters on its own, but it's hard to be sure that _PAGE_GLOBAL
+won't get set in some obscure corner, or something add PGE into CR4.
+By omitting _PAGE_GLOBAL from __supported_pte_mask when kaiser_enabled,
+all page table setup which uses pte_pfn() masks it out of the ptes.
+
+It's slightly shameful that the same declaration versus definition of
+kaiser_enabled appears in not one, not two, but in three header files
+(asm/kaiser.h, asm/pgtable.h, asm/tlbflush.h). I felt safer that way,
+than with #including any of those in any of the others; and did not
+feel it worth an asm/kaiser_enabled.h - kernel/cpu/common.c includes
+them all, so we shall hear about it if they get out of synch.
+
+Cleanups while in the area: removed the silly #ifdef CONFIG_KAISER
+from kaiser.c; removed the unused native_get_normal_pgd(); removed
+the spurious reg clutter from SWITCH_*_CR3 macro stubs; corrected some
+comments. But more interestingly, set CR4.PSE in secondary_startup_64:
+the manual is clear that it does not matter whether it's 0 or 1 when
+4-level-pts are enabled, but I was distracted to find cr4 different on
+BSP and auxiliaries - BSP alone was adding PSE, in probe_page_size_mask().
+
+(cherry picked from Change-Id: I8e5bec716944444359cbd19f6729311eff943e9a)
+
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+---
+ Documentation/kernel-parameters.txt | 2 +
+ arch/x86/entry/entry_64.S | 15 +++++++------
+ arch/x86/include/asm/cpufeature.h | 3 ++
+ arch/x86/include/asm/kaiser.h | 27 +++++++++++++++++-------
+ arch/x86/include/asm/pgtable.h | 20 ++++++++++++-----
+ arch/x86/include/asm/pgtable_64.h | 13 +++--------
+ arch/x86/include/asm/pgtable_types.h | 4 ---
+ arch/x86/include/asm/tlbflush.h | 39 ++++++++++++++++++++++-------------
+ arch/x86/kernel/cpu/common.c | 28 ++++++++++++++++++++++++-
+ arch/x86/kernel/espfix_64.c | 3 +-
+ arch/x86/kernel/head_64.S | 4 +--
+ arch/x86/mm/init.c | 2 -
+ arch/x86/mm/init_64.c | 10 ++++++++
+ arch/x86/mm/kaiser.c | 26 +++++++++++++++++++----
+ arch/x86/mm/pgtable.c | 8 +------
+ arch/x86/mm/tlb.c | 4 ---
+ 16 files changed, 143 insertions(+), 65 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1081,7 +1081,7 @@ ENTRY(paranoid_entry)
+ * unconditionally, but we need to find out whether the reverse
+ * should be done on return (conveyed to paranoid_exit in %ebx).
+ */
+- movq %cr3, %rax
++ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
+ testl $KAISER_SHADOW_PGD_OFFSET, %eax
+ jz 2f
+ orl $2, %ebx
+@@ -1113,6 +1113,7 @@ ENTRY(paranoid_exit)
+ TRACE_IRQS_OFF_DEBUG
+ TRACE_IRQS_IRETQ_DEBUG
+ #ifdef CONFIG_KAISER
++ /* No ALTERNATIVE for X86_FEATURE_KAISER: paranoid_entry sets %ebx */
+ testl $2, %ebx /* SWITCH_USER_CR3 needed? */
+ jz paranoid_exit_no_switch
+ SWITCH_USER_CR3
+@@ -1343,13 +1344,14 @@ ENTRY(nmi)
+ #ifdef CONFIG_KAISER
+ /* Unconditionally use kernel CR3 for do_nmi() */
+ /* %rax is saved above, so OK to clobber here */
+- movq %cr3, %rax
++ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
+ /* If PCID enabled, NOFLUSH now and NOFLUSH on return */
+ orq x86_cr3_pcid_noflush, %rax
+ pushq %rax
+ /* mask off "user" bit of pgd address and 12 PCID bits: */
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+ movq %rax, %cr3
++2:
+ #endif
+ call do_nmi
+
+@@ -1359,8 +1361,7 @@ ENTRY(nmi)
+ * kernel code that needs user CR3, but do we ever return
+ * to "user mode" where we need the kernel CR3?
+ */
+- popq %rax
+- mov %rax, %cr3
++ ALTERNATIVE "", "popq %rax; movq %rax, %cr3", X86_FEATURE_KAISER
+ #endif
+
+ /*
+@@ -1587,13 +1588,14 @@ end_repeat_nmi:
+ #ifdef CONFIG_KAISER
+ /* Unconditionally use kernel CR3 for do_nmi() */
+ /* %rax is saved above, so OK to clobber here */
+- movq %cr3, %rax
++ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
+ /* If PCID enabled, NOFLUSH now and NOFLUSH on return */
+ orq x86_cr3_pcid_noflush, %rax
+ pushq %rax
+ /* mask off "user" bit of pgd address and 12 PCID bits: */
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+ movq %rax, %cr3
++2:
+ #endif
+
+ /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
+@@ -1605,8 +1607,7 @@ end_repeat_nmi:
+ * kernel code that needs user CR3, like just just before
+ * a sysret.
+ */
+- popq %rax
+- mov %rax, %cr3
++ ALTERNATIVE "", "popq %rax; movq %rax, %cr3", X86_FEATURE_KAISER
+ #endif
+
+ testl %ebx, %ebx /* swapgs needed? */
+--- a/arch/x86/include/asm/cpufeature.h
++++ b/arch/x86/include/asm/cpufeature.h
+@@ -204,6 +204,9 @@
+ #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
+ #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+
++/* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
++#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_KAISER w/o nokaiser */
++
+ /* Virtualization flags: Linux defined, word 8 */
+ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
+ #define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -46,28 +46,33 @@ movq \reg, %cr3
+ .endm
+
+ .macro SWITCH_KERNEL_CR3
+-pushq %rax
++ALTERNATIVE "jmp 8f", "pushq %rax", X86_FEATURE_KAISER
+ _SWITCH_TO_KERNEL_CR3 %rax
+ popq %rax
++8:
+ .endm
+
+ .macro SWITCH_USER_CR3
+-pushq %rax
++ALTERNATIVE "jmp 8f", "pushq %rax", X86_FEATURE_KAISER
+ _SWITCH_TO_USER_CR3 %rax %al
+ popq %rax
++8:
+ .endm
+
+ .macro SWITCH_KERNEL_CR3_NO_STACK
+-movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)
++ALTERNATIVE "jmp 8f", \
++ __stringify(movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)), \
++ X86_FEATURE_KAISER
+ _SWITCH_TO_KERNEL_CR3 %rax
+ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
++8:
+ .endm
+
+ #else /* CONFIG_KAISER */
+
+-.macro SWITCH_KERNEL_CR3 reg
++.macro SWITCH_KERNEL_CR3
+ .endm
+-.macro SWITCH_USER_CR3 reg regb
++.macro SWITCH_USER_CR3
+ .endm
+ .macro SWITCH_KERNEL_CR3_NO_STACK
+ .endm
+@@ -90,6 +95,16 @@ DECLARE_PER_CPU(unsigned long, x86_cr3_p
+
+ extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
+
++extern int kaiser_enabled;
++#else
++#define kaiser_enabled 0
++#endif /* CONFIG_KAISER */
++
++/*
++ * Kaiser function prototypes are needed even when CONFIG_KAISER is not set,
++ * so as to build with tests on kaiser_enabled instead of #ifdefs.
++ */
++
+ /**
+ * kaiser_add_mapping - map a virtual memory part to the shadow (user) mapping
+ * @addr: the start address of the range
+@@ -119,8 +134,6 @@ extern void kaiser_remove_mapping(unsign
+ */
+ extern void kaiser_init(void);
+
+-#endif /* CONFIG_KAISER */
+-
+ #endif /* __ASSEMBLY */
+
+ #endif /* _ASM_X86_KAISER_H */
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -111,13 +111,12 @@ extern pgd_t kaiser_set_shadow_pgd(pgd_t
+
+ static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp)
+ {
++#ifdef CONFIG_DEBUG_VM
++ /* linux/mmdebug.h may not have been included at this point */
++ BUG_ON(!kaiser_enabled);
++#endif
+ return (pgd_t *)((unsigned long)pgdp | (unsigned long)PAGE_SIZE);
+ }
+-
+-static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp)
+-{
+- return (pgd_t *)((unsigned long)pgdp & ~(unsigned long)PAGE_SIZE);
+-}
+ #else
+ static inline pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd)
+ {
+@@ -128,10 +127,6 @@ static inline pgd_t *native_get_shadow_p
+ BUILD_BUG_ON(1);
+ return NULL;
+ }
+-static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp)
+-{
+- return pgdp;
+-}
+ #endif /* CONFIG_KAISER */
+
+ static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -18,6 +18,12 @@
+ #ifndef __ASSEMBLY__
+ #include <asm/x86_init.h>
+
++#ifdef CONFIG_KAISER
++extern int kaiser_enabled;
++#else
++#define kaiser_enabled 0
++#endif
++
+ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
+ void ptdump_walk_pgd_level_checkwx(void);
+
+@@ -699,7 +705,7 @@ static inline int pgd_bad(pgd_t pgd)
+ * page table by accident; it will fault on the first
+ * instruction it tries to run. See native_set_pgd().
+ */
+- if (IS_ENABLED(CONFIG_KAISER))
++ if (kaiser_enabled)
+ ignore_flags |= _PAGE_NX;
+
+ return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
+@@ -910,12 +916,14 @@ static inline void pmdp_set_wrprotect(st
+ */
+ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
+ {
+- memcpy(dst, src, count * sizeof(pgd_t));
++ memcpy(dst, src, count * sizeof(pgd_t));
+ #ifdef CONFIG_KAISER
+- /* Clone the shadow pgd part as well */
+- memcpy(native_get_shadow_pgd(dst),
+- native_get_shadow_pgd(src),
+- count * sizeof(pgd_t));
++ if (kaiser_enabled) {
++ /* Clone the shadow pgd part as well */
++ memcpy(native_get_shadow_pgd(dst),
++ native_get_shadow_pgd(src),
++ count * sizeof(pgd_t));
++ }
+ #endif
+ }
+
+--- a/arch/x86/include/asm/pgtable_types.h
++++ b/arch/x86/include/asm/pgtable_types.h
+@@ -45,11 +45,7 @@
+ #define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED)
+ #define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY)
+ #define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE)
+-#ifdef CONFIG_KAISER
+-#define _PAGE_GLOBAL (_AT(pteval_t, 0))
+-#else
+ #define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
+-#endif
+ #define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1)
+ #define _PAGE_SOFTW2 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2)
+ #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT)
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -136,9 +136,11 @@ static inline void cr4_set_bits_and_upda
+ * to avoid the need for asm/kaiser.h in unexpected places.
+ */
+ #ifdef CONFIG_KAISER
++extern int kaiser_enabled;
+ extern void kaiser_setup_pcid(void);
+ extern void kaiser_flush_tlb_on_return_to_user(void);
+ #else
++#define kaiser_enabled 0
+ static inline void kaiser_setup_pcid(void)
+ {
+ }
+@@ -163,7 +165,7 @@ static inline void __native_flush_tlb(vo
+ * back:
+ */
+ preempt_disable();
+- if (this_cpu_has(X86_FEATURE_PCID))
++ if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID))
+ kaiser_flush_tlb_on_return_to_user();
+ native_write_cr3(native_read_cr3());
+ preempt_enable();
+@@ -174,20 +176,30 @@ static inline void __native_flush_tlb_gl
+ unsigned long cr4;
+
+ cr4 = this_cpu_read(cpu_tlbstate.cr4);
+- /* clear PGE */
+- native_write_cr4(cr4 & ~X86_CR4_PGE);
+- /* write old PGE again and flush TLBs */
+- native_write_cr4(cr4);
++ if (cr4 & X86_CR4_PGE) {
++ /* clear PGE and flush TLB of all entries */
++ native_write_cr4(cr4 & ~X86_CR4_PGE);
++ /* restore PGE as it was before */
++ native_write_cr4(cr4);
++ } else {
++ /*
++ * x86_64 microcode update comes this way when CR4.PGE is not
++ * enabled, and it's safer for all callers to allow this case.
++ */
++ native_write_cr3(native_read_cr3());
++ }
+ }
+
+ static inline void __native_flush_tlb_global(void)
+ {
+-#ifdef CONFIG_KAISER
+- /* Globals are not used at all */
+- __native_flush_tlb();
+-#else
+ unsigned long flags;
+
++ if (kaiser_enabled) {
++ /* Globals are not used at all */
++ __native_flush_tlb();
++ return;
++ }
++
+ if (this_cpu_has(X86_FEATURE_INVPCID)) {
+ /*
+ * Using INVPCID is considerably faster than a pair of writes
+@@ -207,7 +219,6 @@ static inline void __native_flush_tlb_gl
+ raw_local_irq_save(flags);
+ __native_flush_tlb_global_irq_disabled();
+ raw_local_irq_restore(flags);
+-#endif
+ }
+
+ static inline void __native_flush_tlb_single(unsigned long addr)
+@@ -222,7 +233,7 @@ static inline void __native_flush_tlb_si
+ */
+
+ if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) {
+- if (this_cpu_has(X86_FEATURE_PCID))
++ if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID))
+ kaiser_flush_tlb_on_return_to_user();
+ asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
+ return;
+@@ -237,9 +248,9 @@ static inline void __native_flush_tlb_si
+ * Make sure to do only a single invpcid when KAISER is
+ * disabled and we have only a single ASID.
+ */
+- if (X86_CR3_PCID_ASID_KERN != X86_CR3_PCID_ASID_USER)
+- invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr);
+- invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr);
++ if (kaiser_enabled)
++ invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr);
++ invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr);
+ }
+
+ static inline void __flush_tlb_all(void)
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -181,6 +181,20 @@ static int __init x86_pcid_setup(char *s
+ return 1;
+ }
+ __setup("nopcid", x86_pcid_setup);
++
++static int __init x86_nokaiser_setup(char *s)
++{
++ /* nokaiser doesn't accept parameters */
++ if (s)
++ return -EINVAL;
++#ifdef CONFIG_KAISER
++ kaiser_enabled = 0;
++ setup_clear_cpu_cap(X86_FEATURE_KAISER);
++ pr_info("nokaiser: KAISER feature disabled\n");
++#endif
++ return 0;
++}
++early_param("nokaiser", x86_nokaiser_setup);
+ #endif
+
+ static int __init x86_noinvpcid_setup(char *s)
+@@ -329,7 +343,7 @@ static __always_inline void setup_smap(s
+ static void setup_pcid(struct cpuinfo_x86 *c)
+ {
+ if (cpu_has(c, X86_FEATURE_PCID)) {
+- if (cpu_has(c, X86_FEATURE_PGE)) {
++ if (cpu_has(c, X86_FEATURE_PGE) || kaiser_enabled) {
+ cr4_set_bits(X86_CR4_PCIDE);
+ /*
+ * INVPCID has two "groups" of types:
+@@ -800,6 +814,10 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
+ #endif
+
+ init_scattered_cpuid_features(c);
++#ifdef CONFIG_KAISER
++ if (kaiser_enabled)
++ set_cpu_cap(c, X86_FEATURE_KAISER);
++#endif
+ }
+
+ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
+@@ -1460,6 +1478,14 @@ void cpu_init(void)
+ * try to read it.
+ */
+ cr4_init_shadow();
++ if (!kaiser_enabled) {
++ /*
++ * secondary_startup_64() deferred setting PGE in cr4:
++ * probe_page_size_mask() sets it on the boot cpu,
++ * but it needs to be set on each secondary cpu.
++ */
++ cr4_set_bits(X86_CR4_PGE);
++ }
+
+ /*
+ * Load microcode on this cpu if a valid microcode is available.
+--- a/arch/x86/kernel/espfix_64.c
++++ b/arch/x86/kernel/espfix_64.c
+@@ -132,9 +132,10 @@ void __init init_espfix_bsp(void)
+ * area to ensure it is mapped into the shadow user page
+ * tables.
+ */
+- if (IS_ENABLED(CONFIG_KAISER))
++ if (kaiser_enabled) {
+ set_pgd(native_get_shadow_pgd(pgd_p),
+ __pgd(_KERNPG_TABLE | __pa((pud_t *)espfix_pud_page)));
++ }
+
+ /* Randomize the locations */
+ init_espfix_random();
+--- a/arch/x86/kernel/head_64.S
++++ b/arch/x86/kernel/head_64.S
+@@ -183,8 +183,8 @@ ENTRY(secondary_startup_64)
+ movq $(init_level4_pgt - __START_KERNEL_map), %rax
+ 1:
+
+- /* Enable PAE mode and PGE */
+- movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx
++ /* Enable PAE and PSE, but defer PGE until kaiser_enabled is decided */
++ movl $(X86_CR4_PAE | X86_CR4_PSE), %ecx
+ movq %rcx, %cr4
+
+ /* Setup early boot stage 4 level pagetables. */
+--- a/arch/x86/mm/init_64.c
++++ b/arch/x86/mm/init_64.c
+@@ -397,6 +397,16 @@ void __init cleanup_highmap(void)
+ continue;
+ if (vaddr < (unsigned long) _text || vaddr > end)
+ set_pmd(pmd, __pmd(0));
++ else if (kaiser_enabled) {
++ /*
++ * level2_kernel_pgt is initialized with _PAGE_GLOBAL:
++ * clear that now. This is not important, so long as
++ * CR4.PGE remains clear, but it removes an anomaly.
++ * Physical mapping setup below avoids _PAGE_GLOBAL
++ * by use of massage_pgprot() inside pfn_pte() etc.
++ */
++ set_pmd(pmd, pmd_clear_flags(*pmd, _PAGE_GLOBAL));
++ }
+ }
+ }
+
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -165,7 +165,7 @@ static void __init probe_page_size_mask(
+ cr4_set_bits_and_update_boot(X86_CR4_PSE);
+
+ /* Enable PGE if available */
+- if (cpu_has_pge) {
++ if (cpu_has_pge && !kaiser_enabled) {
+ cr4_set_bits_and_update_boot(X86_CR4_PGE);
+ __supported_pte_mask |= _PAGE_GLOBAL;
+ } else
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -17,7 +17,9 @@
+ #include <asm/pgalloc.h>
+ #include <asm/desc.h>
+
+-#ifdef CONFIG_KAISER
++int kaiser_enabled __read_mostly = 1;
++EXPORT_SYMBOL(kaiser_enabled); /* for inlined TLB flush functions */
++
+ __visible
+ DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+
+@@ -168,8 +170,8 @@ static pte_t *kaiser_pagetable_walk(unsi
+ return pte_offset_kernel(pmd, address);
+ }
+
+-int kaiser_add_user_map(const void *__start_addr, unsigned long size,
+- unsigned long flags)
++static int kaiser_add_user_map(const void *__start_addr, unsigned long size,
++ unsigned long flags)
+ {
+ int ret = 0;
+ pte_t *pte;
+@@ -178,6 +180,15 @@ int kaiser_add_user_map(const void *__st
+ unsigned long end_addr = PAGE_ALIGN(start_addr + size);
+ unsigned long target_address;
+
++ /*
++ * It is convenient for callers to pass in __PAGE_KERNEL etc,
++ * and there is no actual harm from setting _PAGE_GLOBAL, so
++ * long as CR4.PGE is not set. But it is nonetheless troubling
++ * to see Kaiser itself setting _PAGE_GLOBAL (now that "nokaiser"
++ * requires that not to be #defined to 0): so mask it off here.
++ */
++ flags &= ~_PAGE_GLOBAL;
++
+ for (; address < end_addr; address += PAGE_SIZE) {
+ target_address = get_pa_from_mapping(address);
+ if (target_address == -1) {
+@@ -264,6 +275,8 @@ void __init kaiser_init(void)
+ {
+ int cpu;
+
++ if (!kaiser_enabled)
++ return;
+ kaiser_init_all_pgds();
+
+ for_each_possible_cpu(cpu) {
+@@ -312,6 +325,8 @@ void __init kaiser_init(void)
+ /* Add a mapping to the shadow mapping, and synchronize the mappings */
+ int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags)
+ {
++ if (!kaiser_enabled)
++ return 0;
+ return kaiser_add_user_map((const void *)addr, size, flags);
+ }
+
+@@ -323,6 +338,8 @@ void kaiser_remove_mapping(unsigned long
+ unsigned long addr, next;
+ pgd_t *pgd;
+
++ if (!kaiser_enabled)
++ return;
+ pgd = native_get_shadow_pgd(pgd_offset_k(start));
+ for (addr = start; addr < end; pgd++, addr = next) {
+ next = pgd_addr_end(addr, end);
+@@ -344,6 +361,8 @@ static inline bool is_userspace_pgd(pgd_
+
+ pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd)
+ {
++ if (!kaiser_enabled)
++ return pgd;
+ /*
+ * Do we need to also populate the shadow pgd? Check _PAGE_USER to
+ * skip cases like kexec and EFI which make temporary low mappings.
+@@ -400,4 +419,3 @@ void kaiser_flush_tlb_on_return_to_user(
+ X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET);
+ }
+ EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user);
+-#endif /* CONFIG_KAISER */
+--- a/arch/x86/mm/pgtable.c
++++ b/arch/x86/mm/pgtable.c
+@@ -341,16 +341,12 @@ static inline void _pgd_free(pgd_t *pgd)
+ }
+ #else
+
+-#ifdef CONFIG_KAISER
+ /*
+- * Instead of one pmd, we aquire two pmds. Being order-1, it is
++ * Instead of one pgd, Kaiser acquires two pgds. Being order-1, it is
+ * both 8k in size and 8k-aligned. That lets us just flip bit 12
+ * in a pointer to swap between the two 4k halves.
+ */
+-#define PGD_ALLOCATION_ORDER 1
+-#else
+-#define PGD_ALLOCATION_ORDER 0
+-#endif
++#define PGD_ALLOCATION_ORDER kaiser_enabled
+
+ static inline pgd_t *_pgd_alloc(void)
+ {
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -39,8 +39,7 @@ static void load_new_mm_cr3(pgd_t *pgdir
+ {
+ unsigned long new_mm_cr3 = __pa(pgdir);
+
+-#ifdef CONFIG_KAISER
+- if (this_cpu_has(X86_FEATURE_PCID)) {
++ if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) {
+ /*
+ * We reuse the same PCID for different tasks, so we must
+ * flush all the entries for the PCID out when we change tasks.
+@@ -57,7 +56,6 @@ static void load_new_mm_cr3(pgd_t *pgdir
+ new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH;
+ kaiser_flush_tlb_on_return_to_user();
+ }
+-#endif /* CONFIG_KAISER */
+
+ /*
+ * Caution: many callers of this function expect
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -2608,6 +2608,8 @@ bytes respectively. Such letter suffixes
+
+ nojitter [IA-64] Disables jitter checking for ITC timers.
+
++ nokaiser [X86-64] Disable KAISER isolation of kernel from user.
++
+ no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver
+
+ no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page
diff --git a/patches.suse/4.4-48-rename-and-simplify-feature-setting.patch b/patches.suse/4.4-48-rename-and-simplify-feature-setting.patch
new file mode 100644
index 0000000000..4f287c63df
--- /dev/null
+++ b/patches.suse/4.4-48-rename-and-simplify-feature-setting.patch
@@ -0,0 +1,95 @@
+From: Borislav Petkov <bp@suse.de>
+Subject: x86/kaiser: Rename and simplify X86_FEATURE_KAISER handling
+References: bsc#1068032 CVE-2017-5754
+Patch-mainline: Not yet, under development
+
+Concentrate it in arch/x86/mm/kaiser.c and use the upstream string "nopti".
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+---
+
+---
+ Documentation/kernel-parameters.txt | 2 +-
+ arch/x86/kernel/cpu/common.c | 18 ------------------
+ arch/x86/mm/kaiser.c | 20 +++++++++++++++++++-
+ 3 files changed, 20 insertions(+), 20 deletions(-)
+
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -181,20 +181,6 @@ static int __init x86_pcid_setup(char *s
+ return 1;
+ }
+ __setup("nopcid", x86_pcid_setup);
+-
+-static int __init x86_nokaiser_setup(char *s)
+-{
+- /* nokaiser doesn't accept parameters */
+- if (s)
+- return -EINVAL;
+-#ifdef CONFIG_KAISER
+- kaiser_enabled = 0;
+- setup_clear_cpu_cap(X86_FEATURE_KAISER);
+- pr_info("nokaiser: KAISER feature disabled\n");
+-#endif
+- return 0;
+-}
+-early_param("nokaiser", x86_nokaiser_setup);
+ #endif
+
+ static int __init x86_noinvpcid_setup(char *s)
+@@ -814,10 +800,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
+ #endif
+
+ init_scattered_cpuid_features(c);
+-#ifdef CONFIG_KAISER
+- if (kaiser_enabled)
+- set_cpu_cap(c, X86_FEATURE_KAISER);
+-#endif
+ }
+
+ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -275,8 +275,13 @@ void __init kaiser_init(void)
+ {
+ int cpu;
+
+- if (!kaiser_enabled)
++ if (!kaiser_enabled) {
++ setup_clear_cpu_cap(X86_FEATURE_KAISER);
+ return;
++ }
++
++ setup_force_cpu_cap(X86_FEATURE_KAISER);
++
+ kaiser_init_all_pgds();
+
+ for_each_possible_cpu(cpu) {
+@@ -419,3 +424,16 @@ void kaiser_flush_tlb_on_return_to_user(
+ X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET);
+ }
+ EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user);
++
++static int __init x86_nokaiser_setup(char *s)
++{
++ /* nopti doesn't accept parameters */
++ if (s)
++ return -EINVAL;
++
++ kaiser_enabled = 0;
++ pr_info("Kernel/User page tables isolation: disabled\n");
++
++ return 0;
++}
++early_param("nopti", x86_nokaiser_setup);
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -2608,7 +2608,7 @@ bytes respectively. Such letter suffixes
+
+ nojitter [IA-64] Disables jitter checking for ITC timers.
+
+- nokaiser [X86-64] Disable KAISER isolation of kernel from user.
++ nopti [X86-64] Disable KAISER isolation of kernel from user.
+
+ no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver
+
diff --git a/patches.suse/4.4-49-x86-boot-add-early-cmdline-parsing-for-options-with-arguments.patch b/patches.suse/4.4-49-x86-boot-add-early-cmdline-parsing-for-options-with-arguments.patch
new file mode 100644
index 0000000000..e50ea84e19
--- /dev/null
+++ b/patches.suse/4.4-49-x86-boot-add-early-cmdline-parsing-for-options-with-arguments.patch
@@ -0,0 +1,171 @@
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Mon, 17 Jul 2017 16:10:33 -0500
+Subject: x86/boot: Add early cmdline parsing for options with arguments
+References: bsc#1068032 CVE-2017-5754
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+Git-commit: e505371dd83963caae1a37ead9524e8d997341be
+Patch-mainline: v4.14-rc1
+
+Add a cmdline_find_option() function to look for cmdline options that
+take arguments. The argument is returned in a supplied buffer and the
+argument length (regardless of whether it fits in the supplied buffer)
+is returned, with -1 indicating not found.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Alexander Potapenko <glider@google.com>
+Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brijesh Singh <brijesh.singh@amd.com>
+Cc: Dave Young <dyoung@redhat.com>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Cc: Larry Woodman <lwoodman@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Matt Fleming <matt@codeblueprint.co.uk>
+Cc: Michael S. Tsirkin <mst@redhat.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Radim Krčmář <rkrcmar@redhat.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Toshimitsu Kani <toshi.kani@hpe.com>
+Cc: kasan-dev@googlegroups.com
+Cc: kvm@vger.kernel.org
+Cc: linux-arch@vger.kernel.org
+Cc: linux-doc@vger.kernel.org
+Cc: linux-efi@vger.kernel.org
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/36b5f97492a9745dce27682305f990fc20e5cf8a.1500319216.git.thomas.lendacky@amd.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/include/asm/cmdline.h | 2
+ arch/x86/lib/cmdline.c | 105 +++++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 107 insertions(+)
+
+--- a/arch/x86/include/asm/cmdline.h
++++ b/arch/x86/include/asm/cmdline.h
+@@ -2,5 +2,7 @@
+ #define _ASM_X86_CMDLINE_H
+
+ int cmdline_find_option_bool(const char *cmdline_ptr, const char *option);
++int cmdline_find_option(const char *cmdline_ptr, const char *option,
++ char *buffer, int bufsize);
+
+ #endif /* _ASM_X86_CMDLINE_H */
+--- a/arch/x86/lib/cmdline.c
++++ b/arch/x86/lib/cmdline.c
+@@ -82,3 +82,108 @@ int cmdline_find_option_bool(const char
+
+ return 0; /* Buffer overrun */
+ }
++
++/*
++ * Find a non-boolean option (i.e. option=argument). In accordance with
++ * standard Linux practice, if this option is repeated, this returns the
++ * last instance on the command line.
++ *
++ * @cmdline: the cmdline string
++ * @max_cmdline_size: the maximum size of cmdline
++ * @option: option string to look for
++ * @buffer: memory buffer to return the option argument
++ * @bufsize: size of the supplied memory buffer
++ *
++ * Returns the length of the argument (regardless of if it was
++ * truncated to fit in the buffer), or -1 on not found.
++ */
++static int
++__cmdline_find_option(const char *cmdline, int max_cmdline_size,
++ const char *option, char *buffer, int bufsize)
++{
++ char c;
++ int pos = 0, len = -1;
++ const char *opptr = NULL;
++ char *bufptr = buffer;
++ enum {
++ st_wordstart = 0, /* Start of word/after whitespace */
++ st_wordcmp, /* Comparing this word */
++ st_wordskip, /* Miscompare, skip */
++ st_bufcpy, /* Copying this to buffer */
++ } state = st_wordstart;
++
++ if (!cmdline)
++ return -1; /* No command line */
++
++ /*
++ * This 'pos' check ensures we do not overrun
++ * a non-NULL-terminated 'cmdline'
++ */
++ while (pos++ < max_cmdline_size) {
++ c = *(char *)cmdline++;
++ if (!c)
++ break;
++
++ switch (state) {
++ case st_wordstart:
++ if (myisspace(c))
++ break;
++
++ state = st_wordcmp;
++ opptr = option;
++ /* fall through */
++
++ case st_wordcmp:
++ if ((c == '=') && !*opptr) {
++ /*
++ * We matched all the way to the end of the
++ * option we were looking for, prepare to
++ * copy the argument.
++ */
++ len = 0;
++ bufptr = buffer;
++ state = st_bufcpy;
++ break;
++ } else if (c == *opptr++) {
++ /*
++ * We are currently matching, so continue
++ * to the next character on the cmdline.
++ */
++ break;
++ }
++ state = st_wordskip;
++ /* fall through */
++
++ case st_wordskip:
++ if (myisspace(c))
++ state = st_wordstart;
++ break;
++
++ case st_bufcpy:
++ if (myisspace(c)) {
++ state = st_wordstart;
++ } else {
++ /*
++ * Increment len, but don't overrun the
++ * supplied buffer and leave room for the
++ * NULL terminator.
++ */
++ if (++len < bufsize)
++ *bufptr++ = c;
++ }
++ break;
++ }
++ }
++
++ if (bufsize)
++ *bufptr = '\0';
++
++ return len;
++}
++
++int cmdline_find_option(const char *cmdline, const char *option, char *buffer,
++ int bufsize)
++{
++ return __cmdline_find_option(cmdline, COMMAND_LINE_SIZE, option,
++ buffer, bufsize);
++}
diff --git a/patches.suse/4.4-50-kaiser-add_pti_cmdline_option_and_documentation.patch b/patches.suse/4.4-50-kaiser-add_pti_cmdline_option_and_documentation.patch
new file mode 100644
index 0000000000..dd2219bb84
--- /dev/null
+++ b/patches.suse/4.4-50-kaiser-add_pti_cmdline_option_and_documentation.patch
@@ -0,0 +1,119 @@
+From: Borislav Petkov <bp@suse.de>
+Subject: x86/kaiser: Check boottime cmdline params
+References: bsc#1068032 CVE-2017-5754
+Patch-mainline: Not yet, under development
+
+AMD (and possibly other vendors) are not affected by the leak
+KAISER is protecting against.
+
+Keep the "nopti" for traditional reasons and add pti=<on|off|auto>
+like upstream.
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+---
+ Documentation/kernel-parameters.txt | 6 +++
+ arch/x86/mm/kaiser.c | 59 +++++++++++++++++++++++++-----------
+ 2 files changed, 47 insertions(+), 18 deletions(-)
+
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -16,6 +16,7 @@
+ #include <asm/pgtable.h>
+ #include <asm/pgalloc.h>
+ #include <asm/desc.h>
++#include <asm/cmdline.h>
+
+ int kaiser_enabled __read_mostly = 1;
+ EXPORT_SYMBOL(kaiser_enabled); /* for inlined TLB flush functions */
+@@ -258,6 +259,43 @@ static void __init kaiser_init_all_pgds(
+ WARN_ON(__ret); \
+ } while (0)
+
++void __init kaiser_check_boottime_disable(void)
++{
++ bool enable = true;
++ char arg[5];
++ int ret;
++
++ ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg));
++ if (ret > 0) {
++ if (!strncmp(arg, "on", 2))
++ goto enable;
++
++ if (!strncmp(arg, "off", 3))
++ goto disable;
++
++ if (!strncmp(arg, "auto", 4))
++ goto skip;
++ }
++
++ if (cmdline_find_option_bool(boot_command_line, "nopti"))
++ goto disable;
++
++skip:
++ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
++ goto disable;
++
++enable:
++ if (enable)
++ setup_force_cpu_cap(X86_FEATURE_KAISER);
++
++ return;
++
++disable:
++ pr_info("Kernel/User page tables isolation: disabled\n");
++ kaiser_enabled = 0;
++ setup_clear_cpu_cap(X86_FEATURE_KAISER);
++}
++
+ /*
+ * If anything in here fails, we will likely die on one of the
+ * first kernel->user transitions and init will die. But, we
+@@ -269,12 +307,10 @@ void __init kaiser_init(void)
+ {
+ int cpu;
+
+- if (!kaiser_enabled) {
+- setup_clear_cpu_cap(X86_FEATURE_KAISER);
+- return;
+- }
++ kaiser_check_boottime_disable();
+
+- setup_force_cpu_cap(X86_FEATURE_KAISER);
++ if (!kaiser_enabled)
++ return;
+
+ kaiser_init_all_pgds();
+
+@@ -418,16 +454,3 @@ void kaiser_flush_tlb_on_return_to_user(
+ X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET);
+ }
+ EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user);
+-
+-static int __init x86_nokaiser_setup(char *s)
+-{
+- /* nopti doesn't accept parameters */
+- if (s)
+- return -EINVAL;
+-
+- kaiser_enabled = 0;
+- pr_info("Kernel/User page tables isolation: disabled\n");
+-
+- return 0;
+-}
+-early_param("nopti", x86_nokaiser_setup);
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -3165,6 +3165,12 @@ bytes respectively. Such letter suffixes
+ pt. [PARIDE]
+ See Documentation/blockdev/paride.txt.
+
++ pti= [X86_64]
++ Control KAISER user/kernel address space isolation:
++ on - enable
++ off - disable
++ auto - default setting
++
+ pty.legacy_count=
+ [KNL] Number of legacy pty's. Overwrites compiled-in
+ default number.
diff --git a/patches.suse/4.4-51-kaiser-use-ALTERNATIVE-instead-of-x86_cr3_pcid_noflu.patch b/patches.suse/4.4-51-kaiser-use-ALTERNATIVE-instead-of-x86_cr3_pcid_noflu.patch
new file mode 100644
index 0000000000..2f26360c0e
--- /dev/null
+++ b/patches.suse/4.4-51-kaiser-use-ALTERNATIVE-instead-of-x86_cr3_pcid_noflu.patch
@@ -0,0 +1,129 @@
+From a4eab37144f611c1ccafa52971c2f051060bdc87 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Tue, 3 Oct 2017 20:49:04 -0700
+Subject: [PATCH 03/14] kaiser: use ALTERNATIVE instead of x86_cr3_pcid_noflush
+References: bsc#1068032 CVE-2017-5754
+Patch-mainline: Not yet, under development
+
+Now that we're playing the ALTERNATIVE game, use that more efficient
+method: instead of user-mapping an extra page, and reading an extra
+cacheline each time for x86_cr3_pcid_noflush.
+
+Neel has found that __stringify(bts $X86_CR3_PCID_NOFLUSH_BIT, %rax)
+is a working substitute for the "bts $63, %rax" in these ALTERNATIVEs;
+but the one line with $63 in looks clearer, so let's stick with that.
+
+Worried about what happens with an ALTERNATIVE between the jump and
+jump label in another ALTERNATIVE? I was, but have checked the
+combinations in SWITCH_KERNEL_CR3_NO_STACK at entry_SYSCALL_64,
+and it does a good job.
+
+(cherry picked from Change-Id: I46d06167615aa8d628eed9972125ab2faca93f05)
+
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+---
+ arch/x86/entry/entry_64.S | 7 ++++---
+ arch/x86/include/asm/kaiser.h | 6 +++---
+ arch/x86/mm/kaiser.c | 11 +----------
+ 3 files changed, 8 insertions(+), 16 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1086,7 +1086,8 @@ ENTRY(paranoid_entry)
+ jz 2f
+ orl $2, %ebx
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+- orq x86_cr3_pcid_noflush, %rax
++ /* If PCID enabled, set X86_CR3_PCID_NOFLUSH_BIT */
++ ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID
+ movq %rax, %cr3
+ 2:
+ #endif
+@@ -1346,7 +1347,7 @@ ENTRY(nmi)
+ /* %rax is saved above, so OK to clobber here */
+ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
+ /* If PCID enabled, NOFLUSH now and NOFLUSH on return */
+- orq x86_cr3_pcid_noflush, %rax
++ ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID
+ pushq %rax
+ /* mask off "user" bit of pgd address and 12 PCID bits: */
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+@@ -1590,7 +1591,7 @@ end_repeat_nmi:
+ /* %rax is saved above, so OK to clobber here */
+ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
+ /* If PCID enabled, NOFLUSH now and NOFLUSH on return */
+- orq x86_cr3_pcid_noflush, %rax
++ ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID
+ pushq %rax
+ /* mask off "user" bit of pgd address and 12 PCID bits: */
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -25,7 +25,8 @@
+ .macro _SWITCH_TO_KERNEL_CR3 reg
+ movq %cr3, \reg
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg
+-orq x86_cr3_pcid_noflush, \reg
++/* If PCID enabled, set X86_CR3_PCID_NOFLUSH_BIT */
++ALTERNATIVE "", "bts $63, \reg", X86_FEATURE_PCID
+ movq \reg, %cr3
+ .endm
+
+@@ -39,7 +40,7 @@ movq \reg, %cr3
+ movq %cr3, \reg
+ orq PER_CPU_VAR(x86_cr3_pcid_user), \reg
+ js 9f
+-/* FLUSH this time, reset to NOFLUSH for next time (if PCID enabled) */
++/* If PCID enabled, FLUSH this time, reset to NOFLUSH for next time */
+ movb \regb, PER_CPU_VAR(x86_cr3_pcid_user+7)
+ 9:
+ movq \reg, %cr3
+@@ -90,7 +91,6 @@ movq PER_CPU_VAR(unsafe_stack_register_b
+ */
+ DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+
+-extern unsigned long x86_cr3_pcid_noflush;
+ DECLARE_PER_CPU(unsigned long, x86_cr3_pcid_user);
+
+ extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -32,7 +32,6 @@ DEFINE_PER_CPU_USER_MAPPED(unsigned long
+ * This is also handy because systems that do not support PCIDs
+ * just end up or'ing a 0 into their CR3, which does no harm.
+ */
+-unsigned long x86_cr3_pcid_noflush __read_mostly;
+ DEFINE_PER_CPU(unsigned long, x86_cr3_pcid_user);
+
+ /*
+@@ -355,10 +354,6 @@ void __init kaiser_init(void)
+ kaiser_add_user_map_early(&debug_idt_table,
+ sizeof(gate_desc) * NR_VECTORS,
+ __PAGE_KERNEL);
+-
+- kaiser_add_user_map_early(&x86_cr3_pcid_noflush,
+- sizeof(x86_cr3_pcid_noflush),
+- __PAGE_KERNEL);
+ }
+
+ /* Add a mapping to the shadow mapping, and synchronize the mappings */
+@@ -432,18 +427,14 @@ pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp,
+
+ void kaiser_setup_pcid(void)
+ {
+- unsigned long kern_cr3 = 0;
+ unsigned long user_cr3 = KAISER_SHADOW_PGD_OFFSET;
+
+- if (this_cpu_has(X86_FEATURE_PCID)) {
+- kern_cr3 |= X86_CR3_PCID_KERN_NOFLUSH;
++ if (this_cpu_has(X86_FEATURE_PCID))
+ user_cr3 |= X86_CR3_PCID_USER_NOFLUSH;
+- }
+ /*
+ * These variables are used by the entry/exit
+ * code to change PCID and pgd and TLB flushing.
+ */
+- x86_cr3_pcid_noflush = kern_cr3;
+ this_cpu_write(x86_cr3_pcid_user, user_cr3);
+ }
+
diff --git a/patches.suse/4.4-52-kaiser-drop-is_atomic-arg-to-kaiser_pagetable_walk.patch b/patches.suse/4.4-52-kaiser-drop-is_atomic-arg-to-kaiser_pagetable_walk.patch
new file mode 100644
index 0000000000..4d15f9bc9c
--- /dev/null
+++ b/patches.suse/4.4-52-kaiser-drop-is_atomic-arg-to-kaiser_pagetable_walk.patch
@@ -0,0 +1,52 @@
+From 6c3d3a8785b870933477d8482f081f423d6cb783 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Sun, 29 Oct 2017 11:36:19 -0700
+Subject: [PATCH 04/14] kaiser: drop is_atomic arg to kaiser_pagetable_walk()
+References: bsc#1068032 CVE-2017-5754
+Patch-mainline: Not yet, under development
+
+I have not observed a might_sleep() warning from setup_fixmap_gdt()'s
+use of kaiser_add_mapping() in our tree (why not?), but like upstream
+we have not provided a way for that to pass is_atomic true down to
+kaiser_pagetable_walk(), and at startup it's far from a likely source
+of trouble: so just delete the walk's is_atomic arg and might_sleep().
+
+(cherry picked from Change-Id: I3d3bd33bed80260a74edf3e1e182156a3f4c82ab)
+
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+---
+ arch/x86/mm/kaiser.c | 10 ++--------
+ 1 file changed, 2 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -108,19 +108,13 @@ static inline unsigned long get_pa_from_
+ *
+ * Returns a pointer to a PTE on success, or NULL on failure.
+ */
+-static pte_t *kaiser_pagetable_walk(unsigned long address, bool is_atomic)
++static pte_t *kaiser_pagetable_walk(unsigned long address)
+ {
+ pmd_t *pmd;
+ pud_t *pud;
+ pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(address));
+ gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+
+- if (is_atomic) {
+- gfp &= ~GFP_KERNEL;
+- gfp |= __GFP_HIGH | __GFP_ATOMIC;
+- } else
+- might_sleep();
+-
+ if (pgd_none(*pgd)) {
+ WARN_ONCE(1, "All shadow pgds should have been populated");
+ return NULL;
+@@ -195,7 +189,7 @@ static int kaiser_add_user_map(const voi
+ ret = -EIO;
+ break;
+ }
+- pte = kaiser_pagetable_walk(address, false);
++ pte = kaiser_pagetable_walk(address);
+ if (!pte) {
+ ret = -ENOMEM;
+ break;
diff --git a/patches.suse/4.4-53-kaiser-asm-tlbflush.h-handle-noPGE-at-lower-level.patch b/patches.suse/4.4-53-kaiser-asm-tlbflush.h-handle-noPGE-at-lower-level.patch
new file mode 100644
index 0000000000..634cbec29b
--- /dev/null
+++ b/patches.suse/4.4-53-kaiser-asm-tlbflush.h-handle-noPGE-at-lower-level.patch
@@ -0,0 +1,90 @@
+From 52d24397f79ed5327ebff0921c73a3ceae6ec700 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Sat, 4 Nov 2017 18:23:24 -0700
+Subject: [PATCH 05/14] kaiser: asm/tlbflush.h handle noPGE at lower level
+Patch-mainline: Not yet, under development
+References: bsc#1068032
+
+I found asm/tlbflush.h too twisty, and think it safer not to avoid
+__native_flush_tlb_global_irq_disabled() in the kaiser_enabled case,
+but instead let it handle kaiser_enabled along with cr3: it can just
+use __native_flush_tlb() for that, no harm in re-disabling preemption.
+
+(This is not the same change as Kirill and Dave have suggested for
+upstream, flipping PGE in cr4: that's neat, but needs a cpu_has_pge
+check; cr3 is enough for kaiser, and thought to be cheaper than cr4.)
+
+Also delete the X86_FEATURE_INVPCID invpcid_flush_all_nonglobals()
+preference from __native_flush_tlb(): unlike the invpcid_flush_all()
+preference in __native_flush_tlb_global(), it's not seen in upstream
+4.14, and was recently reported to be surprisingly slow.
+
+(cherry picked from Change-Id: I0da819a797ff46bca6590040b6480178dff6ba1e)
+
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+---
+ arch/x86/include/asm/tlbflush.h | 27 +++------------------------
+ 1 file changed, 3 insertions(+), 24 deletions(-)
+
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
+index b376095a1fd9..6fdc8c399601 100644
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -151,14 +151,6 @@ static inline void kaiser_flush_tlb_on_return_to_user(void)
+
+ static inline void __native_flush_tlb(void)
+ {
+- if (this_cpu_has(X86_FEATURE_INVPCID)) {
+- /*
+- * Note, this works with CR4.PCIDE=0 or 1.
+- */
+- invpcid_flush_all_nonglobals();
+- return;
+- }
+-
+ /*
+ * If current->mm == NULL then we borrow a mm which may change during a
+ * task switch and therefore we must not be preempted while we write CR3
+@@ -182,11 +174,8 @@ static inline void __native_flush_tlb_global_irq_disabled(void)
+ /* restore PGE as it was before */
+ native_write_cr4(cr4);
+ } else {
+- /*
+- * x86_64 microcode update comes this way when CR4.PGE is not
+- * enabled, and it's safer for all callers to allow this case.
+- */
+- native_write_cr3(native_read_cr3());
++ /* do it with cr3, letting kaiser flush user PCID */
++ __native_flush_tlb();
+ }
+ }
+
+@@ -194,12 +183,6 @@ static inline void __native_flush_tlb_global(void)
+ {
+ unsigned long flags;
+
+- if (kaiser_enabled) {
+- /* Globals are not used at all */
+- __native_flush_tlb();
+- return;
+- }
+-
+ if (this_cpu_has(X86_FEATURE_INVPCID)) {
+ /*
+ * Using INVPCID is considerably faster than a pair of writes
+@@ -255,11 +238,7 @@ static inline void __native_flush_tlb_single(unsigned long addr)
+
+ static inline void __flush_tlb_all(void)
+ {
+- if (cpu_has_pge)
+- __flush_tlb_global();
+- else
+- __flush_tlb();
+-
++ __flush_tlb_global();
+ /*
+ * Note: if we somehow had PCID but not PGE, then this wouldn't work --
+ * we'd end up flushing kernel translations for the current ASID but
+--
+2.15.1.424.g9478a66081-goog
+
diff --git a/patches.suse/4.4-54-kaiser-kaiser_flush_tlb_on_return_to_user-check-PCID.patch b/patches.suse/4.4-54-kaiser-kaiser_flush_tlb_on_return_to_user-check-PCID.patch
new file mode 100644
index 0000000000..216c455d62
--- /dev/null
+++ b/patches.suse/4.4-54-kaiser-kaiser_flush_tlb_on_return_to_user-check-PCID.patch
@@ -0,0 +1,85 @@
+From e5edda043f30c948e4da9c963e06a633b821f31f Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Sat, 4 Nov 2017 18:43:06 -0700
+Subject: [PATCH 06/14] kaiser: kaiser_flush_tlb_on_return_to_user() check PCID
+Patch-mainline: Not yet, under development
+References: bsc#1068032
+
+Let kaiser_flush_tlb_on_return_to_user() do the X86_FEATURE_PCID
+check, instead of each caller doing it inline first: nobody needs
+to optimize for the noPCID case, it's clearer this way, and better
+suits later changes. Replace those no-op X86_CR3_PCID_KERN_FLUSH lines
+by a BUILD_BUG_ON() in load_new_mm_cr3(), in case something changes.
+
+(cherry picked from Change-Id: I9b528ed9d7c1ae4a3b4738c2894ee1740b6fb0b9)
+
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+---
+ arch/x86/include/asm/tlbflush.h | 4 ++--
+ arch/x86/mm/kaiser.c | 6 +++---
+ arch/x86/mm/tlb.c | 8 ++++----
+ 3 files changed, 9 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -157,7 +157,7 @@ static inline void __native_flush_tlb(vo
+ * back:
+ */
+ preempt_disable();
+- if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID))
++ if (kaiser_enabled)
+ kaiser_flush_tlb_on_return_to_user();
+ native_write_cr3(native_read_cr3());
+ preempt_enable();
+@@ -216,7 +216,7 @@ static inline void __native_flush_tlb_si
+ */
+
+ if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) {
+- if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID))
++ if (kaiser_enabled)
+ kaiser_flush_tlb_on_return_to_user();
+ asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
+ return;
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -434,12 +434,12 @@ void kaiser_setup_pcid(void)
+
+ /*
+ * Make a note that this cpu will need to flush USER tlb on return to user.
+- * Caller checks whether this_cpu_has(X86_FEATURE_PCID) before calling:
+- * if cpu does not, then the NOFLUSH bit will never have been set.
++ * If cpu does not have PCID, then the NOFLUSH bit will never have been set.
+ */
+ void kaiser_flush_tlb_on_return_to_user(void)
+ {
+- this_cpu_write(x86_cr3_pcid_user,
++ if (this_cpu_has(X86_FEATURE_PCID))
++ this_cpu_write(x86_cr3_pcid_user,
+ X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET);
+ }
+ EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user);
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -39,7 +39,7 @@ static void load_new_mm_cr3(pgd_t *pgdir
+ {
+ unsigned long new_mm_cr3 = __pa(pgdir);
+
+- if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) {
++ if (kaiser_enabled) {
+ /*
+ * We reuse the same PCID for different tasks, so we must
+ * flush all the entries for the PCID out when we change tasks.
+@@ -50,10 +50,10 @@ static void load_new_mm_cr3(pgd_t *pgdir
+ * do it here, but can only be used if X86_FEATURE_INVPCID is
+ * available - and many machines support pcid without invpcid.
+ *
+- * The line below is a no-op: X86_CR3_PCID_KERN_FLUSH is now 0;
+- * but keep that line in there in case something changes.
++ * If X86_CR3_PCID_KERN_FLUSH actually added something, then it
++ * would be needed in the write_cr3() below - if PCIDs enabled.
+ */
+- new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH;
++ BUILD_BUG_ON(X86_CR3_PCID_KERN_FLUSH);
+ kaiser_flush_tlb_on_return_to_user();
+ }
+
diff --git a/patches.suse/4.4-55-x86-paravirt-dont-patch-flush_tlb_single.patch b/patches.suse/4.4-55-x86-paravirt-dont-patch-flush_tlb_single.patch
new file mode 100644
index 0000000000..09f39d0b5a
--- /dev/null
+++ b/patches.suse/4.4-55-x86-paravirt-dont-patch-flush_tlb_single.patch
@@ -0,0 +1,65 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 4 Dec 2017 15:07:30 +0100
+Subject: x86/paravirt: Dont patch flush_tlb_single
+Git-commit: 4a24c80e6f8a8468669cf46926a552c56fbb71c8
+Patch-mainline: v4.16 or v4.15-rc4 (next release)
+References: bsc#1068032
+
+native_flush_tlb_single() will be changed with the upcoming
+PAGE_TABLE_ISOLATION feature. This requires to have more code in
+there than INVLPG.
+
+Remove the paravirt patching for it.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Acked-by: Peter Zijlstra <peterz@infradead.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Cc: michael.schwarz@iaik.tugraz.at
+Cc: moritz.lipp@iaik.tugraz.at
+Cc: richard.fellner@student.tugraz.at
+Link: https://lkml.kernel.org/r/20171204150606.828111617@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/kernel/paravirt_patch_64.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/arch/x86/kernel/paravirt_patch_64.c
++++ b/arch/x86/kernel/paravirt_patch_64.c
+@@ -9,7 +9,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq;
+ DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
+ DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
+ DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
+-DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
+ DEF_NATIVE(pv_cpu_ops, clts, "clts");
+ DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
+
+@@ -62,7 +61,6 @@ unsigned native_patch(u8 type, u16 clobb
+ PATCH_SITE(pv_mmu_ops, read_cr3);
+ PATCH_SITE(pv_mmu_ops, write_cr3);
+ PATCH_SITE(pv_cpu_ops, clts);
+- PATCH_SITE(pv_mmu_ops, flush_tlb_single);
+ PATCH_SITE(pv_cpu_ops, wbinvd);
+ #if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+ case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
diff --git a/patches.suse/4.4-57-Reenable_PARAVIRT.patch b/patches.suse/4.4-57-Reenable_PARAVIRT.patch
new file mode 100644
index 0000000000..5835e8fd77
--- /dev/null
+++ b/patches.suse/4.4-57-Reenable_PARAVIRT.patch
@@ -0,0 +1,25 @@
+From: Borislav Petkov <bp@suse.de>
+Subject: x86/kaiser: Reenable PARAVIRT
+Patch-mainline: Not yet, under development
+References: bsc#1068032
+
+Now that the required bits have been addressed, reenable
+PARAVIRT.
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+
+---
+ security/Kconfig | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/security/Kconfig
++++ b/security/Kconfig
+@@ -34,7 +34,7 @@ config SECURITY
+ config KAISER
+ bool "Remove the kernel mapping in user mode"
+ default y
+- depends on X86_64 && SMP && !PARAVIRT
++ depends on X86_64 && SMP
+ help
+ This enforces a strict kernel and user space isolation, in order
+ to close hardware side channels on kernel address information.
diff --git a/patches.suse/4.4-58-kaiser-disable-on-xen.patch b/patches.suse/4.4-58-kaiser-disable-on-xen.patch
new file mode 100644
index 0000000000..d038ac1fef
--- /dev/null
+++ b/patches.suse/4.4-58-kaiser-disable-on-xen.patch
@@ -0,0 +1,39 @@
+From: Jiri Kosina <jkosina@suse.cz>
+Subject: [PATCH] kaiser: disabled on Xen PV
+Patch-mainline: Not yet, under development
+References: bsc#1068032
+
+Kaiser cannot be used on paravirtualized MMUs (namely reading and writing CR3).
+This does not work with KAISER as the CR3 switch from and to user space PGD
+would require to map the whole XEN_PV machinery into both.
+
+More importantly, enabling KAISER on Xen PV doesn't make too much sense, as PV
+guests use distinct %cr3 values for kernel and user already.
+
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+
+---
+ arch/x86/mm/kaiser.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -258,6 +258,9 @@ void __init kaiser_check_boottime_disabl
+ char arg[5];
+ int ret;
+
++ if (boot_cpu_has(X86_FEATURE_XENPV))
++ goto silent_disable;
++
+ ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg));
+ if (ret > 0) {
+ if (!strncmp(arg, "on", 2))
+@@ -285,6 +288,8 @@ enable:
+
+ disable:
+ pr_info("Kernel/User page tables isolation: disabled\n");
++
++silent_disable:
+ kaiser_enabled = 0;
+ setup_clear_cpu_cap(X86_FEATURE_KAISER);
+ }
diff --git a/patches.suse/kgr-0002-livepatch-add-infrastructure.patch b/patches.suse/kgr-0002-livepatch-add-infrastructure.patch
index fa96f35d6e..5ddbc1da5b 100644
--- a/patches.suse/kgr-0002-livepatch-add-infrastructure.patch
+++ b/patches.suse/kgr-0002-livepatch-add-infrastructure.patch
@@ -55,16 +55,6 @@ Cc: x86@kernel.org
#define _TIF_31BIT _BITUL(TIF_31BIT)
#define _TIF_SINGLE_STEP _BITUL(TIF_SINGLE_STEP)
---- a/arch/s390/kernel/Makefile
-+++ b/arch/s390/kernel/Makefile
-@@ -56,6 +56,7 @@ obj-$(CONFIG_AUDIT) += audit.o
- compat-obj-$(CONFIG_AUDIT) += compat_audit.o
- obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o
- obj-$(CONFIG_COMPAT) += compat_wrapper.o $(compat-obj-y)
-+obj-$(CONFIG_LIVEPATCH) += livepatch.o
-
- obj-$(CONFIG_STACKTRACE) += stacktrace.o
- obj-$(CONFIG_KPROBES) += kprobes.o
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -46,7 +46,7 @@ STACK_SIZE = 1 << STACK_SHIFT
@@ -92,7 +82,7 @@ Cc: x86@kernel.org
.macro CHECK_STACK stacksize,savearea
#ifdef CONFIG_CHECK_STACK
tml %r15,\stacksize - CONFIG_STACK_GUARD
-@@ -275,6 +284,7 @@ ENTRY(system_call)
+@@ -319,6 +328,7 @@ ENTRY(system_call)
mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC
stg %r14,__PT_FLAGS(%r11)
.Lsysc_do_svc:
@@ -100,7 +90,7 @@ Cc: x86@kernel.org
lg %r10,__TI_sysc_table(%r12) # address of system call table
llgh %r8,__PT_INT_CODE+2(%r11)
slag %r8,%r8,2 # shift and test for svc 0
-@@ -328,6 +338,8 @@ ENTRY(system_call)
+@@ -374,6 +384,8 @@ ENTRY(system_call)
#endif
TSTMSK __PT_FLAGS(%r11),_PIF_PER_TRAP
jo .Lsysc_singlestep
@@ -109,7 +99,7 @@ Cc: x86@kernel.org
TSTMSK __TI_flags(%r12),_TIF_SIGPENDING
jo .Lsysc_sigpending
TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME
-@@ -651,6 +663,7 @@ ENTRY(io_int_handler)
+@@ -704,6 +716,7 @@ ENTRY(io_int_handler)
jo .Lio_mcck_pending
TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED
jo .Lio_reschedule
@@ -134,6 +124,16 @@ Cc: x86@kernel.org
+{
+ klp_kgraft_mark_task_safe(current);
+}
+--- a/arch/s390/kernel/Makefile
++++ b/arch/s390/kernel/Makefile
+@@ -56,6 +56,7 @@ obj-$(CONFIG_AUDIT) += audit.o
+ compat-obj-$(CONFIG_AUDIT) += compat_audit.o
+ obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o
+ obj-$(CONFIG_COMPAT) += compat_wrapper.o $(compat-obj-y)
++obj-$(CONFIG_LIVEPATCH) += livepatch.o
+
+ obj-$(CONFIG_STACKTRACE) += stacktrace.o
+ obj-$(CONFIG_KPROBES) += kprobes.o
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -141,6 +141,13 @@ unsigned long syscall_trace_enter_phase1
@@ -207,7 +207,7 @@ Cc: x86@kernel.org
#define _TIF_WORK_CTXSW \
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
-@@ -2243,6 +2243,17 @@ static const struct file_operations proc
+@@ -2240,6 +2240,17 @@ static const struct file_operations proc
.release = seq_release_private,
};
@@ -225,7 +225,7 @@ Cc: x86@kernel.org
static int proc_pident_instantiate(struct inode *dir,
struct dentry *dentry, struct task_struct *task, const void *ptr)
{
-@@ -2820,6 +2831,9 @@ static const struct pid_entry tgid_base_
+@@ -2817,6 +2828,9 @@ static const struct pid_entry tgid_base_
#ifdef CONFIG_CHECKPOINT_RESTORE
REG("timers", S_IRUGO, proc_timers_operations),
#endif
@@ -237,7 +237,7 @@ Cc: x86@kernel.org
static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
-@@ -3170,6 +3170,29 @@ static inline void mm_update_next_owner(
+@@ -3290,6 +3290,29 @@ static inline void mm_update_next_owner(
}
#endif /* CONFIG_MEMCG */
diff --git a/patches.suse/powerpc-Secure-memory-rfi-flush-SLE12SP3.patch b/patches.suse/powerpc-Secure-memory-rfi-flush-SLE12SP3.patch
new file mode 100644
index 0000000000..3a8e5d45a7
--- /dev/null
+++ b/patches.suse/powerpc-Secure-memory-rfi-flush-SLE12SP3.patch
@@ -0,0 +1,558 @@
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Fri, 15 Dec 2017 16:34:38 +1100
+Subject: [PATCH] powerpc: Secure memory rfi flush
+Patch-mainline: Not yet, under development
+References: bsc#1068032
+
+This puts a nop before each rfid/hrfid and patches in an L1-D
+cache flush instruction where possible.
+
+It provides /sys/devices/system/cpu/secure_memory_protection which can
+report and can patch the rfi flushes at runtime.
+
+This has some debug checking in the rfi instructions to make sure
+we're returning to the context we think we are, so we can avoid
+some flushes.
+
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+---
+--- a/arch/powerpc/include/asm/exception-64s.h
++++ b/arch/powerpc/include/asm/exception-64s.h
+@@ -34,6 +34,7 @@
+ * exception handlers (including pSeries LPAR) and iSeries LPAR
+ * implementations as possible.
+ */
++#include <asm/bug.h>
+
+ #define EX_R9 0
+ #define EX_R10 8
+@@ -50,6 +51,58 @@
+ #define EX_PPR 88 /* SMT thread status register (priority) */
+ #define EX_CTR 96
+
++/*
++ * The nop instruction allows a secure memory protection instruction to be
++ * inserted with the rfi flush fixup.
++ */
++#define PREPARE_RFI_TO_USER \
++ RFI_FLUSH_FIXUP_SECTION; \
++ nop
++
++#define PREPARE_RFI_TO_GUEST \
++ RFI_FLUSH_FIXUP_SECTION; \
++ nop
++
++#define DEBUG_RFI
++
++#ifdef DEBUG_RFI
++#define CHECK_TARGET_MSR_PR(srr_reg, expected_pr) \
++ SET_SCRATCH0(r3); \
++ mfspr r3,srr_reg; \
++ extrdi r3,r3,1,63-MSR_PR_LG; \
++666: tdnei r3,expected_pr; \
++ EMIT_BUG_ENTRY 666b,__FILE__,__LINE__,0; \
++ GET_SCRATCH0(r3);
++#else
++#define CHECK_TARGET_MSR_PR(expected)
++#endif
++
++#define RFI_TO_KERNEL \
++ CHECK_TARGET_MSR_PR(SPRN_SRR1, 0); \
++ rfid
++
++#define RFI_TO_USER \
++ CHECK_TARGET_MSR_PR(SPRN_SRR1, 1); \
++ PREPARE_RFI_TO_USER; \
++ rfid
++
++#define RFI_TO_GUEST \
++ PREPARE_RFI_TO_GUEST; \
++ rfid
++
++#define HRFI_TO_KERNEL \
++ CHECK_TARGET_MSR_PR(SPRN_HSRR1, 0); \
++ hrfid
++
++#define HRFI_TO_USER \
++ CHECK_TARGET_MSR_PR(SPRN_HSRR1, 1); \
++ PREPARE_RFI_TO_USER; \
++ hrfid
++
++#define HRFI_TO_GUEST \
++ PREPARE_RFI_TO_GUEST; \
++ hrfid
++
+ #ifdef CONFIG_RELOCATABLE
+ #define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \
+ ld r12,PACAKBASE(r13); /* get high part of &label */ \
+@@ -191,7 +244,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
+ mtspr SPRN_##h##SRR0,r12; \
+ mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \
+ mtspr SPRN_##h##SRR1,r10; \
+- h##rfid; \
++ h##rfid; /* h##RFI_TO_KERNEL runs out of space */ \
+ b . /* prevent speculative execution */
+ #define EXCEPTION_PROLOG_PSERIES_1(label, h) \
+ __EXCEPTION_PROLOG_PSERIES_1(label, h)
+--- a/arch/powerpc/include/asm/feature-fixups.h
++++ b/arch/powerpc/include/asm/feature-fixups.h
+@@ -184,4 +184,19 @@ label##3: \
+ FTR_ENTRY_OFFSET label##1b-label##3b; \
+ .popsection;
+
++#define RFI_FLUSH_FIXUP_SECTION \
++951: \
++ .pushsection __rfi_flush_fixup,"a"; \
++ .align 2; \
++952: \
++ FTR_ENTRY_OFFSET 951b-952b; \
++ .popsection;
++
++#ifndef __ASSEMBLY__
++#include <linux/types.h>
++
++extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
++extern void do_rfi_flush_fixups(bool enable, unsigned int insn);
++
++#endif
+ #endif /* __ASM_POWERPC_FEATURE_FIXUPS_H */
+--- a/arch/powerpc/include/asm/setup.h
++++ b/arch/powerpc/include/asm/setup.h
+@@ -26,6 +26,10 @@ void initmem_init(void);
+ void setup_panic(void);
+ #define ARCH_PANIC_TIMEOUT 180
+
++extern bool rfi_flush;
++void rfi_flush_enable(bool enable);
++void __init setup_rfi_flush(void);
++
+ #endif /* !__ASSEMBLY__ */
+
+ #endif /* _ASM_POWERPC_SETUP_H */
+--- a/arch/powerpc/kernel/entry_64.S
++++ b/arch/powerpc/kernel/entry_64.S
+@@ -37,6 +37,9 @@
+ #include <asm/hw_irq.h>
+ #include <asm/context_tracking.h>
+ #include <asm/tm.h>
++#ifdef CONFIG_PPC_BOOK3S
++#include <asm/exception-64s.h>
++#endif
+
+ /*
+ * System calls.
+@@ -226,13 +229,23 @@ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECK
+ ACCOUNT_CPU_USER_EXIT(r11, r12)
+ HMT_MEDIUM_LOW_HAS_PPR
+ ld r13,GPR13(r1) /* only restore r13 if returning to usermode */
++ ld r2,GPR2(r1)
++ ld r1,GPR1(r1)
++ mtlr r4
++ mtcr r5
++ mtspr SPRN_SRR0,r7
++ mtspr SPRN_SRR1,r8
++ RFI_TO_USER
++ b . /* prevent speculative execution */
++
++ /* exit to kernel */
+ 1: ld r2,GPR2(r1)
+ ld r1,GPR1(r1)
+ mtlr r4
+ mtcr r5
+ mtspr SPRN_SRR0,r7
+ mtspr SPRN_SRR1,r8
+- RFI
++ RFI_TO_KERNEL
+ b . /* prevent speculative execution */
+
+ syscall_error:
+@@ -888,7 +901,7 @@ BEGIN_FTR_SECTION
+ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+ ACCOUNT_CPU_USER_EXIT(r2, r4)
+ REST_GPR(13, r1)
+-1:
++
+ mtspr SPRN_SRR1,r3
+
+ ld r2,_CCR(r1)
+@@ -901,8 +914,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+ ld r3,GPR3(r1)
+ ld r4,GPR4(r1)
+ ld r1,GPR1(r1)
++ RFI_TO_USER
++ b . /* prevent speculative execution */
+
+- rfid
++1: mtspr SPRN_SRR1,r3
++
++ ld r2,_CCR(r1)
++ mtcrf 0xFF,r2
++ ld r2,_NIP(r1)
++ mtspr SPRN_SRR0,r2
++
++ ld r0,GPR0(r1)
++ ld r2,GPR2(r1)
++ ld r3,GPR3(r1)
++ ld r4,GPR4(r1)
++ ld r1,GPR1(r1)
++ RFI_TO_KERNEL
+ b . /* prevent speculative execution */
+
+ #endif /* CONFIG_PPC_BOOK3E */
+@@ -1078,7 +1105,7 @@ _GLOBAL(enter_rtas)
+
+ mtspr SPRN_SRR0,r5
+ mtspr SPRN_SRR1,r6
+- rfid
++ RFI_TO_KERNEL
+ b . /* prevent speculative execution */
+
+ rtas_return_loc:
+@@ -1103,7 +1130,7 @@ rtas_return_loc:
+
+ mtspr SPRN_SRR0,r3
+ mtspr SPRN_SRR1,r4
+- rfid
++ RFI_TO_KERNEL
+ b . /* prevent speculative execution */
+
+ .align 3
+--- a/arch/powerpc/kernel/exceptions-64s.S
++++ b/arch/powerpc/kernel/exceptions-64s.S
+@@ -54,7 +54,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
+ 1: mfspr r12,SPRN_SRR1 ; \
+ xori r12,r12,MSR_LE ; \
+ mtspr SPRN_SRR1,r12 ; \
+- rfid ; /* return to userspace */ \
++ RFI_TO_USER ; /* return to userspace */ \
+ b . ; /* prevent speculative execution */
+
+ #if defined(CONFIG_RELOCATABLE)
+@@ -671,7 +671,7 @@ masked_##_H##interrupt: \
+ ld r10,PACA_EXGEN+EX_R10(r13); \
+ ld r11,PACA_EXGEN+EX_R11(r13); \
+ GET_SCRATCH0(r13); \
+- ##_H##rfid; \
++ ##_H##RFI_TO_KERNEL; \
+ b .
+
+ MASKED_INTERRUPT()
+@@ -747,7 +747,7 @@ slb_miss_user_pseries:
+ mtspr SRR0,r12
+ mfspr r12,SRR1 /* and SRR1 */
+ mtspr SRR1,r10
+- rfid
++ RFI_TO_KERNEL
+ b . /* prevent spec. execution */
+ #endif /* __DISABLED__ */
+
+@@ -761,7 +761,7 @@ kvmppc_skip_interrupt:
+ addi r13, r13, 4
+ mtspr SPRN_SRR0, r13
+ GET_SCRATCH0(r13)
+- rfid
++ RFI_TO_GUEST
+ b .
+
+ kvmppc_skip_Hinterrupt:
+@@ -773,7 +773,7 @@ kvmppc_skip_Hinterrupt:
+ addi r13, r13, 4
+ mtspr SPRN_HSRR0, r13
+ GET_SCRATCH0(r13)
+- hrfid
++ HRFI_TO_GUEST
+ b .
+ #endif
+
+@@ -1525,7 +1525,7 @@ slb_miss_realmode:
+ mtspr SPRN_SRR0,r10
+ ld r10,PACAKMSR(r13)
+ mtspr SPRN_SRR1,r10
+- rfid
++ RFI_TO_KERNEL
+ b .
+
+ unrecov_slb:
+--- a/arch/powerpc/kernel/setup_64.c
++++ b/arch/powerpc/kernel/setup_64.c
+@@ -847,3 +847,80 @@ static int __init disable_hardlockup_det
+ }
+ early_initcall(disable_hardlockup_detector);
+ #endif
++
++#ifdef CONFIG_PPC_BOOK3S_64
++enum l1d_flush_type {
++ L1D_FLUSH_NONE,
++ L1D_FLUSH_ORI,
++ L1D_FLUSH_MTTRIG,
++};
++
++enum l1d_flush_type l1d_flush_type;
++
++bool rfi_flush;
++
++static void do_rfi_flush(void *val)
++{
++ switch (l1d_flush_type) {
++ case L1D_FLUSH_ORI:
++ asm volatile("ori 30,30,0" ::: "memory");
++ break;
++ case L1D_FLUSH_MTTRIG:
++ asm volatile("mtspr 882,0" ::: "memory");
++ break;
++ default:
++ break;
++ }
++}
++
++void rfi_flush_enable(bool enable)
++{
++ unsigned int insn;
++
++ if (rfi_flush == enable)
++ return;
++
++ switch (l1d_flush_type) {
++ case L1D_FLUSH_ORI:
++ insn = 0x63de0000;
++ break;
++ case L1D_FLUSH_MTTRIG:
++ insn = 0x7c12dba6;
++ break;
++ default:
++ printk("Secure memory protection not enabled! System is vulnerable to local exploit. Update firmware.\n");
++ return;
++ }
++
++ do_rfi_flush_fixups(enable, insn);
++
++ if (enable)
++ on_each_cpu(do_rfi_flush, NULL, 1);
++
++ rfi_flush = enable;
++}
++
++/* This tries to guess the cpu characteristics based on the PVR. */
++static bool get_cpu_characteristics(void)
++{
++ if (pvr_version_is(PVR_POWER7) || pvr_version_is(PVR_POWER7p))
++ l1d_flush_type = L1D_FLUSH_NONE;
++ else if (pvr_version_is(PVR_POWER8E) ||
++ pvr_version_is(PVR_POWER8NVL) ||
++ pvr_version_is(PVR_POWER8))
++ l1d_flush_type = L1D_FLUSH_ORI;
++ else {
++ /* unknown CPU */
++ l1d_flush_type = L1D_FLUSH_NONE;
++ return false;
++ }
++
++ return true;
++}
++
++void __init setup_rfi_flush(void)
++{
++ if (get_cpu_characteristics())
++ rfi_flush_enable(true);
++}
++#endif /* CONFIG_PPC_BOOK3S_64 */
+--- a/arch/powerpc/kernel/sysfs.c
++++ b/arch/powerpc/kernel/sysfs.c
+@@ -18,8 +18,10 @@
+ #include <asm/smp.h>
+ #include <asm/pmc.h>
+ #include <asm/firmware.h>
++// #include <asm/ppc_asm.h>
+
+ #include "cacheinfo.h"
++// #include "setup.h"
+
+ #ifdef CONFIG_PPC64
+ #include <asm/paca.h>
+@@ -496,6 +498,43 @@ static DEVICE_ATTR(spurr, 0400, show_spu
+ static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
+ static DEVICE_ATTR(pir, 0400, show_pir, NULL);
+
++#ifdef CONFIG_PPC_BOOK3S_64
++static ssize_t show_rfi_flush(struct device *dev,
++ struct device_attribute *attr, char *buf)
++{
++ return sprintf(buf, "%d\n", rfi_flush ? 1 : 0);
++}
++
++static ssize_t __used store_rfi_flush(struct device *dev,
++ struct device_attribute *attr, const char *buf,
++ size_t count)
++{
++ int val;
++ int ret = 0;
++
++ ret = sscanf(buf, "%d", &val);
++ if (ret != 1)
++ return -EINVAL;
++
++ if (val == 1)
++ rfi_flush_enable(true);
++ else if (val == 0)
++ rfi_flush_enable(false);
++ else
++ return -EINVAL;
++
++ return count;
++}
++
++static DEVICE_ATTR(rfi_flush, 0600,
++ show_rfi_flush, store_rfi_flush);
++
++static void sysfs_create_rfi_flush(void)
++{
++ device_create_file(cpu_subsys.dev_root, &dev_attr_rfi_flush);
++}
++#endif /* CONFIG_PPC_BOOK3S_64 */
++
+ /*
+ * This is the system wide DSCR register default value. Any
+ * change to this default value through the sysfs interface
+@@ -1058,6 +1097,7 @@ static int __init topology_init(void)
+
+ #ifdef CONFIG_PPC64
+ sysfs_create_dscr_default();
++ sysfs_create_rfi_flush();
+ #endif /* CONFIG_PPC64 */
+
+ return 0;
+--- a/arch/powerpc/kernel/vmlinux.lds.S
++++ b/arch/powerpc/kernel/vmlinux.lds.S
+@@ -72,6 +72,15 @@ SECTIONS
+ /* Read-only data */
+ RODATA
+
++#ifdef CONFIG_PPC64
++ . = ALIGN(8);
++ __rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) {
++ __start___rfi_flush_fixup = .;
++ *(__rfi_flush_fixup)
++ __stop___rfi_flush_fixup = .;
++ }
++#endif
++
+ EXCEPTION_TABLE(0)
+
+ NOTES :kernel :notes
+--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
++++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+@@ -64,7 +64,7 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
+ mtmsrd r0,1 /* clear RI in MSR */
+ mtsrr0 r5
+ mtsrr1 r6
+- RFI
++ RFI_TO_KERNEL
+
+ kvmppc_call_hv_entry:
+ ld r4, HSTATE_KVM_VCPU(r13)
+@@ -170,7 +170,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ mtsrr0 r8
+ mtsrr1 r7
+ beq cr1, 13f /* machine check */
+- RFI
++ RFI_TO_KERNEL
+
+ /* On POWER7, we have external interrupts set to use HSRR0/1 */
+ 11: mtspr SPRN_HSRR0, r8
+@@ -965,8 +965,7 @@ BEGIN_FTR_SECTION
+ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+ ld r0, VCPU_GPR(R0)(r4)
+ ld r4, VCPU_GPR(R4)(r4)
+-
+- hrfid
++ HRFI_TO_GUEST
+ b .
+
+ secondary_too_late:
+--- a/arch/powerpc/kvm/book3s_rmhandlers.S
++++ b/arch/powerpc/kvm/book3s_rmhandlers.S
+@@ -141,7 +141,7 @@ kvmppc_handler_skip_ins:
+ GET_SCRATCH0(r13)
+
+ /* And get back into the code */
+- RFI
++ RFI_TO_GUEST
+ #endif
+
+ /*
+@@ -164,6 +164,6 @@ _GLOBAL_TOC(kvmppc_entry_trampoline)
+ ori r5, r5, MSR_EE
+ mtsrr0 r7
+ mtsrr1 r6
+- RFI
++ RFI_TO_KERNEL
+
+ #include "book3s_segment.S"
+--- a/arch/powerpc/lib/feature-fixups.c
++++ b/arch/powerpc/lib/feature-fixups.c
+@@ -20,6 +20,7 @@
+ #include <asm/code-patching.h>
+ #include <asm/page.h>
+ #include <asm/sections.h>
++#include <asm/setup.h>
+
+
+ struct fixup_entry {
+@@ -113,6 +114,33 @@ void do_feature_fixups(unsigned long val
+ }
+ }
+
++#ifdef CONFIG_PPC_BOOK3S_64
++void do_rfi_flush_fixups(bool enable, unsigned int insn)
++{
++ long *start, *end;
++ unsigned int *dest;
++ int i;
++
++ start = PTRRELOC(&__start___rfi_flush_fixup),
++ end = PTRRELOC(&__stop___rfi_flush_fixup);
++
++ for (i = 0; start < end; start++, i++) {
++ dest = (void *)start + *start;
++
++ pr_devel("RFI FLUSH FIXUP %s %lx\n", enable ? "enable" : "disable", (unsigned long)start);
++ if (!enable) {
++ pr_devel("patching dest %lx\n", (unsigned long)dest);
++ patch_instruction(dest, PPC_INST_NOP);
++ } else {
++ pr_devel("patching dest %lx\n", (unsigned long)dest);
++ patch_instruction(dest, insn);
++ }
++ }
++
++ printk(KERN_DEBUG "rfi-fixups: patched %d locations\n", i);
++}
++#endif /* CONFIG_PPC_BOOK3S_64 */
++
+ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
+ {
+ long *start, *end;
+--- a/arch/powerpc/platforms/powernv/setup.c
++++ b/arch/powerpc/platforms/powernv/setup.c
+@@ -42,6 +42,8 @@ static void __init pnv_setup_arch(void)
+ {
+ set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
+
++ setup_rfi_flush();
++
+ /* Initialize SMP */
+ pnv_smp_init();
+
+--- a/arch/powerpc/platforms/pseries/setup.c
++++ b/arch/powerpc/platforms/pseries/setup.c
+@@ -561,6 +561,8 @@ static void __init pSeries_setup_arch(vo
+
+ fwnmi_init();
+
++ setup_rfi_flush();
++
+ /* By default, only probe PCI (can be overriden by rtas_pci) */
+ pci_add_flags(PCI_PROBE_ONLY);
+
diff --git a/patches.suse/powerpc-add-gmb.patch b/patches.suse/powerpc-add-gmb.patch
new file mode 100644
index 0000000000..8b06e854b2
--- /dev/null
+++ b/patches.suse/powerpc-add-gmb.patch
@@ -0,0 +1,17 @@
+From: Jiri Kosina <jkosina@suse.cz>
+Subject: powerpc/barrier: add gmb
+Patch-mainline: Not yet, under development
+References: bsc#1068032 CVE-2017-5753
+
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+
+--- a/arch/powerpc/include/asm/barrier.h
++++ b/arch/powerpc/include/asm/barrier.h
+@@ -75,6 +75,7 @@
+ })
+
+ #define smp_mb__before_spinlock() smp_mb()
++#define gmb() asm volatile("ori 31,31,0");
+
+ #include <asm-generic/barrier.h>
+
diff --git a/series.conf b/series.conf
index 4f41ddc28e..bb273122d6 100644
--- a/series.conf
+++ b/series.conf
@@ -1283,6 +1283,20 @@
patches.fixes/kernel-watchdog-Prevent-false-positives-with-turbo-m.patch
+ patches.suse/0001-locking-barriers-introduce-new-memory-barrier-gmb.patch
+ patches.suse/0002-bpf-prevent-speculative-execution-in-eBPF-interprete.patch
+ patches.suse/0004-uvcvideo-prevent-speculative-execution.patch
+ patches.suse/0005-carl9170-prevent-speculative-execution.patch
+ patches.suse/0006-p54-prevent-speculative-execution.patch
+ patches.suse/0007-qla2xxx-prevent-speculative-execution.patch
+ patches.suse/0008-cw1200-prevent-speculative-execution.patch
+ patches.suse/0009-Thermal-int340x-prevent-speculative-execution.patch
+ patches.suse/0010-userns-prevent-speculative-execution.patch
+ patches.suse/0011-ipv6-prevent-speculative-execution.patch
+ patches.suse/0012-fs-prevent-speculative-execution.patch
+ patches.suse/0013-net-mpls-prevent-speculative-execution.patch
+ patches.suse/0014-udf-prevent-speculative-execution.patch
+
########################################################
# interrupts/core
########################################################
@@ -1920,6 +1934,7 @@
patches.arch/powerpc-numa-Fix-multiple-bugs-in-memory_hotplug_max.patch
patches.arch/powerpc-opal-Fix-EBUSY-bug-in-acquiring-tokens.patch
patches.arch/powerpc-powernv-Make-opal_event_shutdown-callable-fr.patch
+ patches.suse/powerpc-add-gmb.patch
########################################################
# powerpc/little endian
@@ -2085,6 +2100,8 @@
patches.fixes/powerpc-64-Fix-flush_-di-cache_range-called-from-modules
patches.arch/powerpc-powernv-ioda-Fix-endianness-when-reading-TCE.patch
+ patches.suse/powerpc-Secure-memory-rfi-flush-SLE12SP3.patch
+
########################################################
# ARM
########################################################
@@ -3432,6 +3449,10 @@
patches.suse/net-smc-dev_put-for-netdev-after-usage-of-ib_query_g.patch
patches.suse/s390-disassembler-correct-disassembly-lines-alignmen.patch
+ patches.arch/s390-sles12sp3-99-01-cpu-alternatives.patch
+ patches.arch/s390-sles12sp3-99-02-gmb.patch
+ patches.arch/s390-sles12sp3-99-03-nobp.patch
+
# bsc#1072216 - SLES 12 SP3 - IBM LTC System z maintenance kernel patches (#07)
patches.arch/s390-sles12sp3-07-03-01-qeth-add-missing-hash-table-initializations.patch
patches.arch/s390-sles12sp3-07-03-02-qeth-fix-early-exit-from-error-path.patch
@@ -4493,6 +4514,15 @@
patches.fixes/0001-NFS-Don-t-disconnect-open-owner-on-NFS4ERR_BAD_SEQID.patch
patches.fixes/0003-NFSv4-always-set-NFS_LOCK_LOST-when-a-lock-is-lost.patch
+ patches.fixes/0001-sunrpc-add-hash_cred-function-to-rpc_authops-struct.patch
+ patches.kabi/0001-sunrpc-add-hash_cred-function-to-rpc_authops-struct.kabi
+ patches.fixes/0002-sunrpc-add-generic_auth-hash_cred-function.patch
+ patches.fixes/0003-sunrpc-add-auth_unix-hash_cred-function.patch
+ patches.fixes/0004-sunrpc-add-RPCSEC_GSS-hash_cred-function.patch
+ patches.fixes/0005-sunrpc-replace-generic-auth_cred-hash-with-auth-spec.patch
+ patches.fixes/0006-sunrpc-include-sup-groups-in-hash.patch
+ patches.fixes/0007-nfs-limit-access-cache-size.patch
+
########################################################
# cifs patches
########################################################
@@ -13872,6 +13902,7 @@
patches.drivers/ALSA-hda-realtek-Update-headset-mode-for-ALC225
patches.drivers/ALSA-hda-realtek-ALC891-headset-mode-for-Dell
patches.drivers/ALSA-hda-realtek-Support-Dell-headset-mode-for-ALC32
+ patches.drivers/ALSA-hda-realtek-Fix-typo-of-pincfg-for-Dell-quirk
patches.drivers/ALSA-hda-realtek-Add-support-for-Acer-Aspire-E5-475-
patches.drivers/ALSA-hda-realtek-Fix-headset-mic-on-several-Asus-lap
patches.drivers/ALSA-hda-realtek-fix-headset-mic-detection-for-MSI-M
@@ -13884,6 +13915,7 @@
patches.drivers/ALSA-hda-realtek-Enable-jack-detection-function-for-
patches.drivers/ALSA-hda-Skip-Realtek-SKU-check-for-Lenovo-machines
patches.drivers/ALSA-hda-fix-headset-mic-detection-issue-on-a-Dell-m
+ patches.drivers/ALSA-hda-fix-headset-mic-problem-for-Dell-machines-alc274
patches.drivers/ALSA-hda-realtek-Fix-pincfg-for-Dell-XPS-13-9370
patches.drivers/ALSA-hda-realtek-Fix-Dell-AIO-LineOut-issue
patches.drivers/ALSA-hda-change-the-location-for-one-mic-on-a-Lenovo
@@ -19838,6 +19870,96 @@
# ugly workaround for turning off CONFIG_RT_GROUP_SCHED
patches.kabi/CONFIG_RT_GROUP_SCHED-disablement-workaround.patch
+ # PTI
+ patches.suse/4.4-01-x86-mm-add-invpcid-helpers.patch
+ patches.suse/4.4-02-x86-mm-fix-invpcid-asm-constraint.patch
+ patches.suse/4.4-03-x86-mm-add-a-noinvpcid-boot-option-to-turn-off-invpcid.patch
+ patches.suse/4.4-04-x86-mm-if-invpcid-is-available-use-it-to-flush-global-mappings.patch
+ patches.suse/4.4-06-mm-mmu_context-sched-core-fix-mmu_context-h-assumption.patch
+ patches.suse/4.4-07-sched-core-add-switch_mm_irqs_off-and-use-it-in-the-scheduler.patch
+ patches.suse/4.4-08-x86-mm-build-arch-x86-mm-tlb-c-even-on-smp.patch
+ patches.suse/4.4-09-x86-mm-sched-core-uninline-switch_mm.patch
+ patches.suse/4.4-10-x86-mm-sched-core-turn-off-irqs-in-switch_mm.patch
+ patches.suse/4.4-11-arm-hide-finish_arch_post_lock_switch-from-modules.patch
+ patches.suse/4.4-12-sched-core-idle_task_exit-shouldn-t-use-switch_mm_irqs_off.patch
+ patches.suse/4.4-15-x86-mm-remove-flush_tlb-and-flush_tlb_current_task.patch
+ patches.suse/4.4-16-x86-mm-make-flush_tlb_mm_range-more-predictable.patch
+ patches.suse/4.4-17-x86-mm-reimplement-flush_tlb_page-using-flush_tlb_mm_range.patch
+ patches.suse/4.4-18-x86-mm-remove-the-up-asm-tlbflush-h-code-always-use-the-formerly-smp-code.patch
+ patches.suse/4.4-20-x86-mm-add-the-nopcid-boot-option-to-turn-off-pcid.patch
+ patches.suse/4.4-21-x86-mm-enable-cr4-pcide-on-supported-systems.patch
+ patches.suse/4.4-22-KAISER-Kernel-Address-Isolation.patch
+ patches.suse/4.4-23-kaiser-merged-update.patch
+ patches.suse/4.4-24-kaiser-do-not-set-_PAGE_NX-on-pgd_none.patch
+ patches.suse/4.4-25-kaiser-stack-map-PAGE_SIZE-at-THREAD_SIZE-PAGE_SIZE.patch
+ patches.suse/4.4-26-kaiser-fix-build-and-FIXME-in-alloc_ldt_struct.patch
+ patches.suse/4.4-27-kaiser-KAISER-depends-on-SMP.patch
+ patches.suse/4.4-28-kaiser-fix-regs-to-do_nmi-ifndef-CONFIG_KAISER.patch
+ patches.suse/4.4-29-kaiser-fix-perf-crashes.patch
+ patches.suse/4.4-30-kaiser-ENOMEM-if-kaiser_pagetable_walk-NULL.patch
+ patches.suse/4.4-31-kaiser-tidied-up-asm-kaiser.h-somewhat.patch
+ patches.suse/4.4-32-kaiser-tidied-up-kaiser_add-remove_mapping-slightly.patch
+ patches.suse/4.4-33-kaiser-kaiser_remove_mapping-move-along-the-pgd.patch
+ patches.suse/4.4-34-kaiser-align-addition-to-x86-mm-Makefile.patch
+ patches.suse/4.4-35-kaiser-cleanups-while-trying-for-gold-link.patch
+ patches.suse/4.4-36-kaiser-name-that-0x1000-KAISER_SHADOW_PGD_OFFSET.patch
+ patches.suse/4.4-37-kaiser-delete-KAISER_REAL_SWITCH-option.patch
++jikos patches.suse/4.4-38-kaiser-vmstat-show-NR_KAISERTABLE-as-nr_overhead.patch
+ patches.suse/4.4-39-kaiser-enhanced-by-kernel-and-user-PCIDs.patch
+ patches.suse/4.4-40-kaiser-load_new_mm_cr3-let-SWITCH_USER_CR3-flush-use.patch
+ patches.suse/4.4-41-kaiser-PCID-0-for-kernel-and-128-for-user.patch
+ patches.suse/4.4-42-kaiser-x86_cr3_pcid_noflush-and-x86_cr3_pcid_user.patch
+ patches.suse/4.4-43-kaiser-paranoid_entry-pass-cr3-need-to-paranoid_exit.patch
+ patches.suse/4.4-44-kaiser-_pgd_alloc-without-__GFP_REPEAT-to-avoid-stal.patch
+ patches.suse/4.4-45-kaiser-fix-unlikely-error-in-alloc_ldt_struct.patch
+ patches.suse/4.4-46-x86-mm-64-fix-reboot-interaction-with-cr4-pcide.patch
+ patches.suse/4.4-47-kaiser-add-nokaiser-boot-option-using-ALTERNATIVE.patch
+ patches.suse/4.4-48-rename-and-simplify-feature-setting.patch
+ patches.suse/4.4-49-x86-boot-add-early-cmdline-parsing-for-options-with-arguments.patch
+ patches.suse/4.4-50-kaiser-add_pti_cmdline_option_and_documentation.patch
+ patches.suse/4.4-51-kaiser-use-ALTERNATIVE-instead-of-x86_cr3_pcid_noflu.patch
+ patches.suse/4.4-52-kaiser-drop-is_atomic-arg-to-kaiser_pagetable_walk.patch
+ patches.suse/4.4-53-kaiser-asm-tlbflush.h-handle-noPGE-at-lower-level.patch
+ patches.suse/4.4-54-kaiser-kaiser_flush_tlb_on_return_to_user-check-PCID.patch
+ patches.suse/4.4-55-x86-paravirt-dont-patch-flush_tlb_single.patch
+ patches.suse/4.4-57-Reenable_PARAVIRT.patch
+ patches.suse/4.4-58-kaiser-disable-on-xen.patch
+ patches.kabi/kaiser-preserve-kabi.patch
+
+ # bsc#1068032, var2
+ patches.suse/01-x86-feature-enable-the-x86-feature-to-control-speculation.patch
+ patches.suse/02-x86-enter-add-macros-to-set-clear-ibrs-and-set-ibpb.patch
+ patches.suse/03-x86-entry-use-ibrs-on-entry-to-kernel-space.patch
+ patches.suse/04-x86-msr-move-native_-msr-u64-to-msr-h.patch
+ patches.suse/05-x86-spec-add-ibrs-control-functions.patch
+ patches.suse/06-x86-idle-toggle-ibrs-when-going-idle.patch
+ patches.suse/07-x86-idle-disable-ibrs-when-offlining-a-cpu-and-re-enable-on-wakeup.patch
+ patches.suse/08-x86-spec_ctrl-add-an-indirect-branch-predictor-barrier.patch
+ patches.suse/09-x86-mm-set-ibpb-upon-context-switch.patch
+ patches.suse/10-ptrace-add-a-new-thread-access-check.patch
+ patches.suse/11-x86-mm-only-set-ibpb-when-the-new-thread-cannot-ptrace-current-thread.patch
+ patches.suse/12-x86-entry-add-a-function-to-overwrite-the-rsb.patch
+ patches.suse/13-x86-entry-stuff-rsb-for-entry-to-kernel-for-non-smep-platform.patch
+ patches.suse/14-x86-kvm-add-msr_ia32_spec_ctrl-and-msr_ia32_pred_cmd-to-kvm.patch
+ patches.suse/15-x86-kvm-flush-ibp-when-switching-vms.patch
+ patches.suse/16-x86-kvm-toggle-ibrs-on-vm-entry-and-exit.patch
+ patches.suse/17-x86-kvm-pad-rsb-on-vm-transition.patch
+ patches.suse/18-x86-spec_ctrl-check-whether-ibrs-is-enabled-before-using-it.patch
+ patches.suse/19-x86-spec_ctrl-check-whether-ibpb-is-enabled-before-using-it.patch
+ patches.suse/20-x86-cpu-check-speculation-control-cpuid-bit.patch
+ patches.suse/21-x86-spec-add-nospec-chicken-bit.patch
+ patches.suse/22-x86-cpu-amd-add-speculative-control-support-for-amd.patch
+ patches.suse/23-x86-spec-check-cpuid-direclty-post-microcode-reload-to-support-ibpb-feature.patch
+ patches.suse/24-kvm-svm-do-not-intercept-new-speculative-control-msrs.patch
+ patches.suse/25-x86-svm-set-ibrs-value-on-vm-entry-and-exit.patch
+ patches.suse/26-x86-svm-set-ibpb-when-running-a-different-vcpu.patch
+ patches.suse/27-kvm-x86-add-speculative-control-cpuid-support-for-guests.patch
+ patches.suse/28-x86-svm-clobber-the-rsb-on-vm-exit.patch
+ patches.suse/29-x86-svm-add-code-to-clear-registers-on-vm-exit.patch
+ patches.suse/30-x86-cpu-amd-make-the-lfence-instruction-serialized.patch
+ patches.suse/31-x86-cpu-amd-remove-now-unused-definition-of-mfence_rdtsc-feature.patch
+ patches.suse/32-move-pti-feature-check-up.patch
+
########################################################
# You'd better have a good reason for adding a patch
# below here.