Home Home > GIT Browse
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOlaf Hering <ohering@suse.de>2018-11-05 07:54:07 +0100
committerOlaf Hering <ohering@suse.de>2018-11-05 07:54:07 +0100
commit167213fb90a92a4752652bb15e24543d3196231a (patch)
treeb777f5afd096a1083356e4d656f9dbb83817175b
parent9188d06a552385cb05ba716eb2ecdab8ef23a666 (diff)
parenta00eabe24f861f9ad2f6d7223dc1f3ef3f58ac58 (diff)
Merge remote-tracking branch 'kerncvs/SLE15' into SLE15-AZURE
-rw-r--r--blacklist.conf2
-rw-r--r--kabi/severities6
-rw-r--r--patches.arch/KVM-PPC-Add-pt_regs-into-kvm_vcpu_arch-and-move-vcpu.patch355
-rw-r--r--patches.arch/KVM-PPC-Avoid-marking-DMA-mapped-pages-dirty-in-real.patch367
-rw-r--r--patches.arch/KVM-PPC-Book-3S-HV-Do-ptesync-in-radix-guest-exit-pa.patch55
-rw-r--r--patches.arch/KVM-PPC-Book3S-Add-MMIO-emulation-for-VMX-instructio.patch330
-rw-r--r--patches.arch/KVM-PPC-Book3S-Allow-backing-bigger-guest-IOMMU-page.patch248
-rw-r--r--patches.arch/KVM-PPC-Book3S-Check-KVM_CREATE_SPAPR_TCE_64-paramet.patch43
-rw-r--r--patches.arch/KVM-PPC-Book3S-Eliminate-some-unnecessary-checks.patch57
-rw-r--r--patches.arch/KVM-PPC-Book3S-Fix-compile-error-that-occurs-with-so.patch44
-rw-r--r--patches.arch/KVM-PPC-Book3S-Fix-guest-DMA-when-guest-partially-ba.patch22
-rw-r--r--patches.arch/KVM-PPC-Book3S-Fix-matching-of-hardware-and-emulated.patch76
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Add-of_node_put-in-success-path.patch39
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Add-online-register-to-ONE_REG-int.patch76
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Allow-HPT-and-radix-on-the-same-co.patch105
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Allow-creating-max-number-of-VCPUs.patch53
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Avoid-crash-from-THP-collapse-duri.patch60
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Avoid-shifts-by-negative-amounts.patch93
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Check-DR-not-IR-to-chose-real-vs-v.patch33
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Disable-tb_offset.patch30
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Do-SLB-load-unload-with-guest-LPCR.patch212
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Don-t-truncate-HPTE-index-in-xlate.patch47
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Don-t-use-compound_order-to-determ.patch158
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Don-t-use-existing-prodded-flag-fo.patch104
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Enable-migration-of-decrementer-re.patch115
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Factor-fake-suspend-handling-out-o.patch339
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Fix-VRMA-initialization-with-2MB-o.patch59
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Fix-conditions-for-starting-vcpu.patch63
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Fix-constant-size-warning.patch63
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Fix-duplication-of-host-SLB-entrie.patch91
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Fix-guest-r11-corruption-with-POWE.patch60
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Fix-handling-of-large-pages-in-rad.patch172
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Fix-handling-of-secondary-HPTEG-in.patch54
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Fix-inaccurate-comment.patch35
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Fix-kvmppc_bad_host_intr-for-real-.patch45
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Fix-trap-number-return-from-__kvmp.patch115
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Fix-typo-in-kvmppc_hv_get_dirty_lo.patch38
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Handle-1GB-pages-in-radix-page-fau.patch205
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Improve-handling-of-debug-trigger-.patch302
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Keep-XIVE-escalation-interrupt-mas.patch186
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Lockless-tlbie-for-HPT-hcalls.patch123
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Make-HPT-resizing-work-on-POWER9.patch136
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Make-radix-clear-pte-when-unmappin.patch38
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Make-radix-use-correct-tlbie-seque.patch45
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Make-xive_pushed-a-byte-not-a-word.patch64
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Pack-VCORE-IDs-to-access-full-VCPU.patch256
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Radix-page-fault-handler-optimizat.patch114
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Read-kvm-arch.emul_smt_mode-under-.patch68
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Recursively-unmap-all-page-table-e.patch280
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Remove-useless-statement.patch37
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Remove-vcpu-arch.dec-usage.patch36
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Send-kvmppc_bad_interrupt-NMIs-to-.patch85
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Set-RWMR-on-POWER8-so-PURR-SPURR-c.patch158
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Snapshot-timebase-offset-on-guest-.patch256
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Streamline-setting-of-reference-an.patch110
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Use-__gfn_to_pfn_memslot-in-page-f.patch228
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Use-a-helper-to-unmap-ptes-in-the-.patch105
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-XIVE-Resend-re-routed-interrupts-o.patch181
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-radix-Do-not-clear-partition-PTE-w.patch151
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-radix-Refine-IO-region-partition-s.patch48
-rw-r--r--patches.arch/KVM-PPC-Book3S-PR-Add-guest-MSR-parameter-for-kvmppc.patch321
-rw-r--r--patches.arch/KVM-PPC-Book3S-PR-Move-kvmppc_save_tm-kvmppc_restore.patch592
-rw-r--r--patches.arch/KVM-PPC-Book3S-Use-correct-page-shift-in-H_STUFF_TCE.patch56
-rw-r--r--patches.arch/KVM-PPC-Check-if-IOMMU-page-is-contained-in-the-pinn.patch46
-rw-r--r--patches.arch/KVM-PPC-Fix-a-mmio_host_swabbed-uninitialized-usage-.patch59
-rw-r--r--patches.arch/KVM-PPC-Make-iommu_table-it_userspace-big-endian.patch193
-rw-r--r--patches.arch/KVM-PPC-Move-nip-ctr-lr-xer-registers-to-pt_regs-in-.patch568
-rw-r--r--patches.arch/KVM-PPC-Use-seq_puts-in-kvmppc_exit_timing_show.patch38
-rw-r--r--patches.arch/powerpc-introduce-get_mm_addr_key-helper.patch16
-rw-r--r--patches.arch/powerpc-kvm-Switch-kvm-pmd-allocator-to-custom-alloc.patch117
-rw-r--r--patches.arch/powerpc-kvm-booke-Fix-altivec-related-build-break.patch44
-rw-r--r--patches.arch/powerpc-mm-Rename-find_linux_pte_or_hugepte.patch518
-rw-r--r--patches.arch/powerpc-powernv-Add-indirect-levels-to-it_userspace.patch415
-rw-r--r--patches.arch/powerpc-powernv-Move-TCE-manupulation-code-to-its-ow.patch783
-rw-r--r--patches.arch/powerpc-powernv-Rework-TCE-level-allocation.patch78
-rw-r--r--patches.arch/powerpc-powernv-ioda-Allocate-indirect-TCE-levels-on.patch364
-rw-r--r--patches.arch/powerpc-powernv-ioda-Finish-removing-explicit-max-wi.patch37
-rw-r--r--patches.arch/powerpc-powernv-ioda-Remove-explicit-max-window-size.patch45
-rw-r--r--patches.arch/powerpc-powernv-ioda2-Reduce-upper-limit-for-DMA-win.patch44
-rw-r--r--patches.arch/powerpc-xive-Move-definition-of-ESB-bits.patch115
-rw-r--r--patches.arch/powerpc-xmon-Add-ISA-v3.0-SPRs-to-SPR-dump.patch80
-rw-r--r--patches.arch/x86-kexec-correct-kexec_backup_src_end-off-by-one-error.patch62
-rw-r--r--patches.drivers/edac-thunderx-fix-memory-leak-in-thunderx_l2c_threaded_isr.patch47
-rw-r--r--patches.fixes/resource-include-resource-end-in-walk_-interfaces.patch79
-rw-r--r--patches.kabi/KABI-hide-new-member-in-struct-iommu_table-from-genk.patch123
-rw-r--r--patches.kabi/KABI-powerpc-export-__find_linux_pte-as-__find_linux.patch28
-rw-r--r--patches.kabi/KABI-powerpc-mmu_context-provide-old-version-of-mm_i.patch38
-rw-r--r--patches.suse/livepatch-create-and-include-UAPI-headers.patch116
-rw-r--r--patches.suse/livepatch-modpost-ignore-unresolved-symbols.patch50
-rw-r--r--patches.suse/tty-Don-t-block-on-IO-when-ldisc-change-is-pending.patch145
-rw-r--r--patches.suse/tty-Hold-tty_ldisc_lock-during-tty_reopen.patch9
-rw-r--r--patches.suse/tty-Simplify-tty-count-math-in-tty_reopen.patch4
-rw-r--r--patches.suse/tty-ldsem-Add-lockdep-asserts-for-ldisc_sem.patch16
-rw-r--r--patches.suse/tty-ldsem-Convert-to-regular-lockdep-annotations.patch6
-rw-r--r--patches.suse/tty-ldsem-Decrement-wait_readers-on-timeouted-down_r.patch4
-rw-r--r--patches.suse/tty-ldsem-Wake-up-readers-after-timed-out-down_write.patch9
-rwxr-xr-xrpm/klp-symbols4
-rw-r--r--series.conf87
98 files changed, 12282 insertions, 150 deletions
diff --git a/blacklist.conf b/blacklist.conf
index 30bec138ed..8075996aa6 100644
--- a/blacklist.conf
+++ b/blacklist.conf
@@ -883,3 +883,5 @@ d60996ab430c8a6033a0944c068edc5ec5becb9b # Duplicate of 3af71f649d22f359790b4032
c894696188d5c2af1e636e458190e80c53fb893d # rtlwifi: the target function doesn't exist yet
16d571bb0fe6aa7fed82e19166ca1542026c9c06 # rt2x00: not cleanly applicable, merely optimization
d59d2f9995d28974877750f429e821324bd603c7 # r8822be: not present in SLE15
+d3d4ffaae439981e1e441ebb125aa3588627c5d8 # Duplicate of 7233b8cab39014620ac9534da11f0f3e506d8fd8
+711f76a328cbe5b49164bb14bcb593fa52102051 # trivial, no need
diff --git a/kabi/severities b/kabi/severities
index 79a120a6fd..c2e8b78a77 100644
--- a/kabi/severities
+++ b/kabi/severities
@@ -19,6 +19,12 @@ arch/x86/kvm/* PASS
arch/powerpc/kvm/* PASS
arch/x86/kvm/* PASS
kvmppc_* PASS
+__xive_vm_h_* PASS
+realmode_pfn_to_page PASS
+iommu_tce_xchg_rm PASS
+mm_iommu_lookup_rm PASS
+mm_iommu_ua_to_hpa_rm PASS
+mm_iommu_ua_to_hpa_shift_rm PASS
# removed upstream, not included in inlines/defines
x86_spec_ctrl_get_default PASS
diff --git a/patches.arch/KVM-PPC-Add-pt_regs-into-kvm_vcpu_arch-and-move-vcpu.patch b/patches.arch/KVM-PPC-Add-pt_regs-into-kvm_vcpu_arch-and-move-vcpu.patch
new file mode 100644
index 0000000000..d959f2923b
--- /dev/null
+++ b/patches.arch/KVM-PPC-Add-pt_regs-into-kvm_vcpu_arch-and-move-vcpu.patch
@@ -0,0 +1,355 @@
+From 1143a70665c2175a33a40d8f2dc277978fbf7640 Mon Sep 17 00:00:00 2001
+From: Simon Guo <wei.guo.simon@gmail.com>
+Date: Mon, 7 May 2018 14:20:07 +0800
+Subject: [PATCH] KVM: PPC: Add pt_regs into kvm_vcpu_arch and move
+ vcpu->arch.gpr[] into it
+
+References: bsc#1061840
+Patch-mainline: v4.18-rc1
+Git-commit: 1143a70665c2175a33a40d8f2dc277978fbf7640
+
+Current regs are scattered at kvm_vcpu_arch structure and it will
+be more neat to organize them into pt_regs structure.
+
+Also it will enable reimplementation of MMIO emulation code with
+analyse_instr() later.
+
+Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/include/asm/kvm_book3s.h | 4 +--
+ arch/powerpc/include/asm/kvm_book3s_64.h | 8 ++---
+ arch/powerpc/include/asm/kvm_booke.h | 4 +--
+ arch/powerpc/include/asm/kvm_host.h | 2 +-
+ arch/powerpc/kernel/asm-offsets.c | 2 +-
+ arch/powerpc/kvm/book3s_64_vio_hv.c | 2 +-
+ arch/powerpc/kvm/book3s_hv_builtin.c | 6 ++--
+ arch/powerpc/kvm/book3s_hv_rm_mmu.c | 15 +++++----
+ arch/powerpc/kvm/book3s_hv_rm_xics.c | 2 +-
+ arch/powerpc/kvm/book3s_pr.c | 56 ++++++++++++++++----------------
+ arch/powerpc/kvm/book3s_xive_template.c | 4 +--
+ arch/powerpc/kvm/e500_emulate.c | 4 +--
+ 12 files changed, 55 insertions(+), 54 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
+index c1f3a870c48a..e3182f7ae499 100644
+--- a/arch/powerpc/include/asm/kvm_book3s.h
++++ b/arch/powerpc/include/asm/kvm_book3s.h
+@@ -275,12 +275,12 @@ static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
+
+ static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
+ {
+- vcpu->arch.gpr[num] = val;
++ vcpu->arch.regs.gpr[num] = val;
+ }
+
+ static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
+ {
+- return vcpu->arch.gpr[num];
++ return vcpu->arch.regs.gpr[num];
+ }
+
+ static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
+diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
+index c424e44f4c00..38dbcad086d6 100644
+--- a/arch/powerpc/include/asm/kvm_book3s_64.h
++++ b/arch/powerpc/include/asm/kvm_book3s_64.h
+@@ -490,8 +490,8 @@ static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu)
+ vcpu->arch.ppr = vcpu->arch.ppr_tm;
+ vcpu->arch.dscr = vcpu->arch.dscr_tm;
+ vcpu->arch.tar = vcpu->arch.tar_tm;
+- memcpy(vcpu->arch.gpr, vcpu->arch.gpr_tm,
+- sizeof(vcpu->arch.gpr));
++ memcpy(vcpu->arch.regs.gpr, vcpu->arch.gpr_tm,
++ sizeof(vcpu->arch.regs.gpr));
+ vcpu->arch.fp = vcpu->arch.fp_tm;
+ vcpu->arch.vr = vcpu->arch.vr_tm;
+ vcpu->arch.vrsave = vcpu->arch.vrsave_tm;
+@@ -507,8 +507,8 @@ static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu)
+ vcpu->arch.ppr_tm = vcpu->arch.ppr;
+ vcpu->arch.dscr_tm = vcpu->arch.dscr;
+ vcpu->arch.tar_tm = vcpu->arch.tar;
+- memcpy(vcpu->arch.gpr_tm, vcpu->arch.gpr,
+- sizeof(vcpu->arch.gpr));
++ memcpy(vcpu->arch.gpr_tm, vcpu->arch.regs.gpr,
++ sizeof(vcpu->arch.regs.gpr));
+ vcpu->arch.fp_tm = vcpu->arch.fp;
+ vcpu->arch.vr_tm = vcpu->arch.vr;
+ vcpu->arch.vrsave_tm = vcpu->arch.vrsave;
+diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h
+index bc6e29e4dfd4..f5fc9569ef56 100644
+--- a/arch/powerpc/include/asm/kvm_booke.h
++++ b/arch/powerpc/include/asm/kvm_booke.h
+@@ -36,12 +36,12 @@
+
+ static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
+ {
+- vcpu->arch.gpr[num] = val;
++ vcpu->arch.regs.gpr[num] = val;
+ }
+
+ static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
+ {
+- return vcpu->arch.gpr[num];
++ return vcpu->arch.regs.gpr[num];
+ }
+
+ static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
+diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
+index 9703f8f229c9..a75443a372bb 100644
+--- a/arch/powerpc/include/asm/kvm_host.h
++++ b/arch/powerpc/include/asm/kvm_host.h
+@@ -486,7 +486,7 @@ struct kvm_vcpu_arch {
+ struct kvmppc_book3s_shadow_vcpu *shadow_vcpu;
+ #endif
+
+- ulong gpr[32];
++ struct pt_regs regs;
+
+ struct thread_fp_state fp;
+
+diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
+index 373dc1d6ef44..774c6a8ebfb4 100644
+--- a/arch/powerpc/kernel/asm-offsets.c
++++ b/arch/powerpc/kernel/asm-offsets.c
+@@ -425,7 +425,7 @@ int main(void)
+ OFFSET(VCPU_HOST_STACK, kvm_vcpu, arch.host_stack);
+ OFFSET(VCPU_HOST_PID, kvm_vcpu, arch.host_pid);
+ OFFSET(VCPU_GUEST_PID, kvm_vcpu, arch.pid);
+- OFFSET(VCPU_GPRS, kvm_vcpu, arch.gpr);
++ OFFSET(VCPU_GPRS, kvm_vcpu, arch.regs.gpr);
+ OFFSET(VCPU_VRSAVE, kvm_vcpu, arch.vrsave);
+ OFFSET(VCPU_FPRS, kvm_vcpu, arch.fp.fpr);
+ #ifdef CONFIG_ALTIVEC
+diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
+index 635f3ca8129a..925fc316a104 100644
+--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
++++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
+@@ -609,7 +609,7 @@ long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+ page = stt->pages[idx / TCES_PER_PAGE];
+ tbl = (u64 *)page_address(page);
+
+- vcpu->arch.gpr[4] = tbl[idx % TCES_PER_PAGE];
++ vcpu->arch.regs.gpr[4] = tbl[idx % TCES_PER_PAGE];
+
+ return H_SUCCESS;
+ }
+diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
+index de18299f92b7..2b127586be30 100644
+--- a/arch/powerpc/kvm/book3s_hv_builtin.c
++++ b/arch/powerpc/kvm/book3s_hv_builtin.c
+@@ -211,9 +211,9 @@ long kvmppc_h_random(struct kvm_vcpu *vcpu)
+
+ /* Only need to do the expensive mfmsr() on radix */
+ if (kvm_is_radix(vcpu->kvm) && (mfmsr() & MSR_IR))
+- r = powernv_get_random_long(&vcpu->arch.gpr[4]);
++ r = powernv_get_random_long(&vcpu->arch.regs.gpr[4]);
+ else
+- r = powernv_get_random_real_mode(&vcpu->arch.gpr[4]);
++ r = powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]);
+ if (r)
+ return H_SUCCESS;
+
+@@ -562,7 +562,7 @@ unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu)
+ {
+ if (!kvmppc_xics_enabled(vcpu))
+ return H_TOO_HARD;
+- vcpu->arch.gpr[5] = get_tb();
++ vcpu->arch.regs.gpr[5] = get_tb();
+ if (xive_enabled()) {
+ if (is_rm())
+ return xive_rm_h_xirr(vcpu);
+diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+index 78e6a392330f..8e12c5c3c4ee 100644
+--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
++++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+@@ -418,7 +418,8 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
+ long pte_index, unsigned long pteh, unsigned long ptel)
+ {
+ return kvmppc_do_h_enter(vcpu->kvm, flags, pte_index, pteh, ptel,
+- vcpu->arch.pgdir, true, &vcpu->arch.gpr[4]);
++ vcpu->arch.pgdir, true,
++ &vcpu->arch.regs.gpr[4]);
+ }
+
+ #ifdef __BIG_ENDIAN__
+@@ -561,13 +562,13 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long pte_index, unsigned long avpn)
+ {
+ return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn,
+- &vcpu->arch.gpr[4]);
++ &vcpu->arch.regs.gpr[4]);
+ }
+
+ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
+ {
+ struct kvm *kvm = vcpu->kvm;
+- unsigned long *args = &vcpu->arch.gpr[4];
++ unsigned long *args = &vcpu->arch.regs.gpr[4];
+ __be64 *hp, *hptes[4];
+ unsigned long tlbrb[4];
+ long int i, j, k, n, found, indexes[4];
+@@ -787,8 +788,8 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
+ r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C));
+ r &= ~HPTE_GR_RESERVED;
+ }
+- vcpu->arch.gpr[4 + i * 2] = v;
+- vcpu->arch.gpr[5 + i * 2] = r;
++ vcpu->arch.regs.gpr[4 + i * 2] = v;
++ vcpu->arch.regs.gpr[5 + i * 2] = r;
+ }
+ return H_SUCCESS;
+ }
+@@ -834,7 +835,7 @@ long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
+ }
+ }
+ }
+- vcpu->arch.gpr[4] = gr;
++ vcpu->arch.regs.gpr[4] = gr;
+ ret = H_SUCCESS;
+ out:
+ unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
+@@ -881,7 +882,7 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
+ kvmppc_set_dirty_from_hpte(kvm, v, gr);
+ }
+ }
+- vcpu->arch.gpr[4] = gr;
++ vcpu->arch.regs.gpr[4] = gr;
+ ret = H_SUCCESS;
+ out:
+ unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
+diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
+index 2a862618f072..758d1d23215e 100644
+--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
++++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
+@@ -517,7 +517,7 @@ unsigned long xics_rm_h_xirr(struct kvm_vcpu *vcpu)
+ } while (!icp_rm_try_update(icp, old_state, new_state));
+
+ /* Return the result in GPR4 */
+- vcpu->arch.gpr[4] = xirr;
++ vcpu->arch.regs.gpr[4] = xirr;
+
+ return check_too_hard(xics, icp);
+ }
+diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
+index d3f304d06adf..899bc9a02ab5 100644
+--- a/arch/powerpc/kvm/book3s_pr.c
++++ b/arch/powerpc/kvm/book3s_pr.c
+@@ -147,20 +147,20 @@ void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu)
+ {
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+
+- svcpu->gpr[0] = vcpu->arch.gpr[0];
+- svcpu->gpr[1] = vcpu->arch.gpr[1];
+- svcpu->gpr[2] = vcpu->arch.gpr[2];
+- svcpu->gpr[3] = vcpu->arch.gpr[3];
+- svcpu->gpr[4] = vcpu->arch.gpr[4];
+- svcpu->gpr[5] = vcpu->arch.gpr[5];
+- svcpu->gpr[6] = vcpu->arch.gpr[6];
+- svcpu->gpr[7] = vcpu->arch.gpr[7];
+- svcpu->gpr[8] = vcpu->arch.gpr[8];
+- svcpu->gpr[9] = vcpu->arch.gpr[9];
+- svcpu->gpr[10] = vcpu->arch.gpr[10];
+- svcpu->gpr[11] = vcpu->arch.gpr[11];
+- svcpu->gpr[12] = vcpu->arch.gpr[12];
+- svcpu->gpr[13] = vcpu->arch.gpr[13];
++ svcpu->gpr[0] = vcpu->arch.regs.gpr[0];
++ svcpu->gpr[1] = vcpu->arch.regs.gpr[1];
++ svcpu->gpr[2] = vcpu->arch.regs.gpr[2];
++ svcpu->gpr[3] = vcpu->arch.regs.gpr[3];
++ svcpu->gpr[4] = vcpu->arch.regs.gpr[4];
++ svcpu->gpr[5] = vcpu->arch.regs.gpr[5];
++ svcpu->gpr[6] = vcpu->arch.regs.gpr[6];
++ svcpu->gpr[7] = vcpu->arch.regs.gpr[7];
++ svcpu->gpr[8] = vcpu->arch.regs.gpr[8];
++ svcpu->gpr[9] = vcpu->arch.regs.gpr[9];
++ svcpu->gpr[10] = vcpu->arch.regs.gpr[10];
++ svcpu->gpr[11] = vcpu->arch.regs.gpr[11];
++ svcpu->gpr[12] = vcpu->arch.regs.gpr[12];
++ svcpu->gpr[13] = vcpu->arch.regs.gpr[13];
+ svcpu->cr = vcpu->arch.cr;
+ svcpu->xer = vcpu->arch.xer;
+ svcpu->ctr = vcpu->arch.ctr;
+@@ -194,20 +194,20 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu)
+ if (!svcpu->in_use)
+ goto out;
+
+- vcpu->arch.gpr[0] = svcpu->gpr[0];
+- vcpu->arch.gpr[1] = svcpu->gpr[1];
+- vcpu->arch.gpr[2] = svcpu->gpr[2];
+- vcpu->arch.gpr[3] = svcpu->gpr[3];
+- vcpu->arch.gpr[4] = svcpu->gpr[4];
+- vcpu->arch.gpr[5] = svcpu->gpr[5];
+- vcpu->arch.gpr[6] = svcpu->gpr[6];
+- vcpu->arch.gpr[7] = svcpu->gpr[7];
+- vcpu->arch.gpr[8] = svcpu->gpr[8];
+- vcpu->arch.gpr[9] = svcpu->gpr[9];
+- vcpu->arch.gpr[10] = svcpu->gpr[10];
+- vcpu->arch.gpr[11] = svcpu->gpr[11];
+- vcpu->arch.gpr[12] = svcpu->gpr[12];
+- vcpu->arch.gpr[13] = svcpu->gpr[13];
++ vcpu->arch.regs.gpr[0] = svcpu->gpr[0];
++ vcpu->arch.regs.gpr[1] = svcpu->gpr[1];
++ vcpu->arch.regs.gpr[2] = svcpu->gpr[2];
++ vcpu->arch.regs.gpr[3] = svcpu->gpr[3];
++ vcpu->arch.regs.gpr[4] = svcpu->gpr[4];
++ vcpu->arch.regs.gpr[5] = svcpu->gpr[5];
++ vcpu->arch.regs.gpr[6] = svcpu->gpr[6];
++ vcpu->arch.regs.gpr[7] = svcpu->gpr[7];
++ vcpu->arch.regs.gpr[8] = svcpu->gpr[8];
++ vcpu->arch.regs.gpr[9] = svcpu->gpr[9];
++ vcpu->arch.regs.gpr[10] = svcpu->gpr[10];
++ vcpu->arch.regs.gpr[11] = svcpu->gpr[11];
++ vcpu->arch.regs.gpr[12] = svcpu->gpr[12];
++ vcpu->arch.regs.gpr[13] = svcpu->gpr[13];
+ vcpu->arch.cr = svcpu->cr;
+ vcpu->arch.xer = svcpu->xer;
+ vcpu->arch.ctr = svcpu->ctr;
+diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c
+index 99c3620b40d9..6e41ba7ec8f4 100644
+--- a/arch/powerpc/kvm/book3s_xive_template.c
++++ b/arch/powerpc/kvm/book3s_xive_template.c
+@@ -334,7 +334,7 @@ X_STATIC unsigned long GLUE(X_PFX,h_xirr)(struct kvm_vcpu *vcpu)
+ */
+
+ /* Return interrupt and old CPPR in GPR4 */
+- vcpu->arch.gpr[4] = hirq | (old_cppr << 24);
++ vcpu->arch.regs.gpr[4] = hirq | (old_cppr << 24);
+
+ return H_SUCCESS;
+ }
+@@ -369,7 +369,7 @@ X_STATIC unsigned long GLUE(X_PFX,h_ipoll)(struct kvm_vcpu *vcpu, unsigned long
+ hirq = GLUE(X_PFX,scan_interrupts)(xc, pending, scan_poll);
+
+ /* Return interrupt and old CPPR in GPR4 */
+- vcpu->arch.gpr[4] = hirq | (xc->cppr << 24);
++ vcpu->arch.regs.gpr[4] = hirq | (xc->cppr << 24);
+
+ return H_SUCCESS;
+ }
+diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
+index 990db69a1d0b..8f871fb75228 100644
+--- a/arch/powerpc/kvm/e500_emulate.c
++++ b/arch/powerpc/kvm/e500_emulate.c
+@@ -53,7 +53,7 @@ static int dbell2prio(ulong param)
+
+ static int kvmppc_e500_emul_msgclr(struct kvm_vcpu *vcpu, int rb)
+ {
+- ulong param = vcpu->arch.gpr[rb];
++ ulong param = vcpu->arch.regs.gpr[rb];
+ int prio = dbell2prio(param);
+
+ if (prio < 0)
+@@ -65,7 +65,7 @@ static int kvmppc_e500_emul_msgclr(struct kvm_vcpu *vcpu, int rb)
+
+ static int kvmppc_e500_emul_msgsnd(struct kvm_vcpu *vcpu, int rb)
+ {
+- ulong param = vcpu->arch.gpr[rb];
++ ulong param = vcpu->arch.regs.gpr[rb];
+ int prio = dbell2prio(rb);
+ int pir = param & PPC_DBELL_PIR_MASK;
+ int i;
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Avoid-marking-DMA-mapped-pages-dirty-in-real.patch b/patches.arch/KVM-PPC-Avoid-marking-DMA-mapped-pages-dirty-in-real.patch
new file mode 100644
index 0000000000..43829de79c
--- /dev/null
+++ b/patches.arch/KVM-PPC-Avoid-marking-DMA-mapped-pages-dirty-in-real.patch
@@ -0,0 +1,367 @@
+From bea02736a6ad85ae20edc853e3c5e0805afa7946 Mon Sep 17 00:00:00 2001
+From: Alexey Kardashevskiy <aik@ozlabs.ru>
+Date: Mon, 10 Sep 2018 18:29:07 +1000
+Subject: [PATCH] KVM: PPC: Avoid marking DMA-mapped pages dirty in real mode
+
+References: bsc#1061840
+Patch-mainline: v4.19-rc5
+Git-commit: 425333bf3a7743715c17e503049d0837d6c4a603
+
+At the moment the real mode handler of H_PUT_TCE calls iommu_tce_xchg_rm()
+which in turn reads the old TCE and if it was a valid entry, marks
+the physical page dirty if it was mapped for writing. Since it is in
+real mode, realmode_pfn_to_page() is used instead of pfn_to_page()
+to get the page struct. However SetPageDirty() itself reads the compound
+page head and returns a virtual address for the head page struct and
+setting dirty bit for that kills the system.
+
+This adds additional dirty bit tracking into the MM/IOMMU API for use
+in the real mode. Note that this does not change how VFIO and
+KVM (in virtual mode) set this bit. The KVM (real mode) changes include:
+- use the lowest bit of the cached host phys address to carry
+the dirty bit;
+- mark pages dirty when they are unpinned which happens when
+the preregistered memory is released which always happens in virtual
+mode;
+- add mm_iommu_ua_mark_dirty_rm() helper to set delayed dirty bit;
+- change iommu_tce_xchg_rm() to take the kvm struct for the mm to use
+in the new mm_iommu_ua_mark_dirty_rm() helper;
+- move iommu_tce_xchg_rm() to book3s_64_vio_hv.c (which is the only
+caller anyway) to reduce the real mode KVM and IOMMU knowledge
+across different subsystems.
+
+This removes realmode_pfn_to_page() as it is not used anymore.
+
+While we at it, remove some EXPORT_SYMBOL_GPL() as that code is for
+the real mode only and modules cannot call it anyway.
+
+Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
+Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/include/asm/book3s/64/pgtable.h | 1 -
+ arch/powerpc/include/asm/iommu.h | 2 --
+ arch/powerpc/include/asm/mmu_context.h | 1 +
+ arch/powerpc/kernel/iommu.c | 25 -------------
+ arch/powerpc/kvm/book3s_64_vio_hv.c | 39 ++++++++++++++++-----
+ arch/powerpc/mm/init_64.c | 52 +---------------------------
+ arch/powerpc/mm/mmu_context_iommu.c | 34 +++++++++++++++---
+ 7 files changed, 63 insertions(+), 91 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
+index ae63ecd4000a..96c7c55fca28 100644
+--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
++++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
+@@ -1028,7 +1028,6 @@ static inline void vmemmap_remove_mapping(unsigned long start,
+ return hash__vmemmap_remove_mapping(start, page_size);
+ }
+ #endif
+-struct page *realmode_pfn_to_page(unsigned long pfn);
+
+ static inline pte_t pmd_pte(pmd_t pmd)
+ {
+diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
+index 470124740864..e734db857b5f 100644
+--- a/arch/powerpc/include/asm/iommu.h
++++ b/arch/powerpc/include/asm/iommu.h
+@@ -214,8 +214,6 @@ extern void iommu_del_device(struct device *dev);
+ extern int __init tce_iommu_bus_notifier_init(void);
+ extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
+ unsigned long *hpa, enum dma_data_direction *direction);
+-extern long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry,
+- unsigned long *hpa, enum dma_data_direction *direction);
+ #else
+ static inline void iommu_register_group(struct iommu_table_group *table_group,
+ int pci_domain_number,
+diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
+index 04a557e2bb07..a87c5239f129 100644
+--- a/arch/powerpc/include/asm/mmu_context.h
++++ b/arch/powerpc/include/asm/mmu_context.h
+@@ -40,6 +40,7 @@ extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
+ unsigned long ua, unsigned int pageshift, unsigned long *hpa);
+ extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
+ unsigned long ua, unsigned int pageshift, unsigned long *hpa);
++extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua);
+ extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem);
+ extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem);
+ #endif
+diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
+index f2b724cd9e64..081ca2546afb 100644
+--- a/arch/powerpc/kernel/iommu.c
++++ b/arch/powerpc/kernel/iommu.c
+@@ -1014,31 +1014,6 @@ long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
+ }
+ EXPORT_SYMBOL_GPL(iommu_tce_xchg);
+
+-#ifdef CONFIG_PPC_BOOK3S_64
+-long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry,
+- unsigned long *hpa, enum dma_data_direction *direction)
+-{
+- long ret;
+-
+- ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
+-
+- if (!ret && ((*direction == DMA_FROM_DEVICE) ||
+- (*direction == DMA_BIDIRECTIONAL))) {
+- struct page *pg = realmode_pfn_to_page(*hpa >> PAGE_SHIFT);
+-
+- if (likely(pg)) {
+- SetPageDirty(pg);
+- } else {
+- tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
+- ret = -EFAULT;
+- }
+- }
+-
+- return ret;
+-}
+-EXPORT_SYMBOL_GPL(iommu_tce_xchg_rm);
+-#endif
+-
+ int iommu_take_ownership(struct iommu_table *tbl)
+ {
+ unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
+diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
+index 841aef714929..6de0a7d57192 100644
+--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
++++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
+@@ -187,12 +187,35 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
+ EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua);
+
+ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+-static void kvmppc_rm_clear_tce(struct iommu_table *tbl, unsigned long entry)
++static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
++ unsigned long entry, unsigned long *hpa,
++ enum dma_data_direction *direction)
++{
++ long ret;
++
++ ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
++
++ if (!ret && ((*direction == DMA_FROM_DEVICE) ||
++ (*direction == DMA_BIDIRECTIONAL))) {
++ __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry);
++ /*
++ * kvmppc_rm_tce_iommu_do_map() updates the UA cache after
++ * calling this so we still get here a valid UA.
++ */
++ if (pua && *pua)
++ mm_iommu_ua_mark_dirty_rm(mm, be64_to_cpu(*pua));
++ }
++
++ return ret;
++}
++
++static void kvmppc_rm_clear_tce(struct kvm *kvm, struct iommu_table *tbl,
++ unsigned long entry)
+ {
+ unsigned long hpa = 0;
+ enum dma_data_direction dir = DMA_NONE;
+
+- iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
++ iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
+ }
+
+ static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
+@@ -228,7 +251,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm,
+ unsigned long hpa = 0;
+ long ret;
+
+- if (iommu_tce_xchg_rm(tbl, entry, &hpa, &dir))
++ if (iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir))
+ /*
+ * real mode xchg can fail if struct page crosses
+ * a page boundary
+@@ -240,7 +263,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm,
+
+ ret = kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry);
+ if (ret)
+- iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
++ iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
+
+ return ret;
+ }
+@@ -290,7 +313,7 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
+ if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem)))
+ return H_CLOSED;
+
+- ret = iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
++ ret = iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
+ if (ret) {
+ mm_iommu_mapped_dec(mem);
+ /*
+@@ -379,7 +402,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+ return ret;
+
+ WARN_ON_ONCE_RM(1);
+- kvmppc_rm_clear_tce(stit->tbl, entry);
++ kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
+ }
+
+ kvmppc_tce_put(stt, entry, tce);
+@@ -528,7 +551,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
+ goto unlock_exit;
+
+ WARN_ON_ONCE_RM(1);
+- kvmppc_rm_clear_tce(stit->tbl, entry);
++ kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
+ }
+
+ kvmppc_tce_put(stt, entry + i, tce);
+@@ -579,7 +602,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
+ return ret;
+
+ WARN_ON_ONCE_RM(1);
+- kvmppc_rm_clear_tce(stit->tbl, entry);
++ kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
+ }
+ }
+
+diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
+index ead129772012..e5d8d218f2c2 100644
+--- a/arch/powerpc/mm/init_64.c
++++ b/arch/powerpc/mm/init_64.c
+@@ -281,57 +281,7 @@ void register_page_bootmem_memmap(unsigned long section_nr,
+ struct page *start_page, unsigned long size)
+ {
+ }
+-
+-/*
+- * We do not have access to the sparsemem vmemmap, so we fallback to
+- * walking the list of sparsemem blocks which we already maintain for
+- * the sake of crashdump. In the long run, we might want to maintain
+- * a tree if performance of that linear walk becomes a problem.
+- *
+- * realmode_pfn_to_page functions can fail due to:
+- * 1) As real sparsemem blocks do not lay in RAM continously (they
+- * are in virtual address space which is not available in the real mode),
+- * the requested page struct can be split between blocks so get_page/put_page
+- * may fail.
+- * 2) When huge pages are used, the get_page/put_page API will fail
+- * in real mode as the linked addresses in the page struct are virtual
+- * too.
+- */
+-struct page *realmode_pfn_to_page(unsigned long pfn)
+-{
+- struct vmemmap_backing *vmem_back;
+- struct page *page;
+- unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
+- unsigned long pg_va = (unsigned long) pfn_to_page(pfn);
+-
+- for (vmem_back = vmemmap_list; vmem_back; vmem_back = vmem_back->list) {
+- if (pg_va < vmem_back->virt_addr)
+- continue;
+-
+- /* After vmemmap_list entry free is possible, need check all */
+- if ((pg_va + sizeof(struct page)) <=
+- (vmem_back->virt_addr + page_size)) {
+- page = (struct page *) (vmem_back->phys + pg_va -
+- vmem_back->virt_addr);
+- return page;
+- }
+- }
+-
+- /* Probably that page struct is split between real pages */
+- return NULL;
+-}
+-EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
+-
+-#elif defined(CONFIG_FLATMEM)
+-
+-struct page *realmode_pfn_to_page(unsigned long pfn)
+-{
+- struct page *page = pfn_to_page(pfn);
+- return page;
+-}
+-EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
+-
+-#endif /* CONFIG_SPARSEMEM_VMEMMAP/CONFIG_FLATMEM */
++#endif /* CONFIG_SPARSEMEM_VMEMMAP */
+
+ #ifdef CONFIG_PPC_BOOK3S_64
+ static bool disable_radix = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT);
+diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
+index 06156403481c..32dc0e4c425d 100644
+--- a/arch/powerpc/mm/mmu_context_iommu.c
++++ b/arch/powerpc/mm/mmu_context_iommu.c
+@@ -18,11 +18,15 @@
+ #include <linux/migrate.h>
+ #include <linux/hugetlb.h>
+ #include <linux/swap.h>
++#include <linux/sizes.h>
+ #include <asm/mmu_context.h>
+ #include <asm/pte-walk.h>
+
+ static DEFINE_MUTEX(mem_list_mutex);
+
++#define MM_IOMMU_TABLE_GROUP_PAGE_DIRTY 0x1
++#define MM_IOMMU_TABLE_GROUP_PAGE_MASK ~(SZ_4K - 1)
++
+ struct mm_iommu_table_group_mem_t {
+ struct list_head next;
+ struct rcu_head rcu;
+@@ -264,6 +268,9 @@ static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem)
+ if (!page)
+ continue;
+
++ if (mem->hpas[i] & MM_IOMMU_TABLE_GROUP_PAGE_DIRTY)
++ SetPageDirty(page);
++
+ put_page(page);
+ mem->hpas[i] = 0;
+ }
+@@ -361,7 +368,6 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm,
+
+ return ret;
+ }
+-EXPORT_SYMBOL_GPL(mm_iommu_lookup_rm);
+
+ struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
+ unsigned long ua, unsigned long entries)
+@@ -391,7 +397,7 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
+ if (pageshift > mem->pageshift)
+ return -EFAULT;
+
+- *hpa = *va | (ua & ~PAGE_MASK);
++ *hpa = (*va & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);
+
+ return 0;
+ }
+@@ -414,11 +420,31 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
+ if (!pa)
+ return -EFAULT;
+
+- *hpa = *pa | (ua & ~PAGE_MASK);
++ *hpa = (*pa & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);
+
+ return 0;
+ }
+-EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa_rm);
++
++extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua)
++{
++ struct mm_iommu_table_group_mem_t *mem;
++ long entry;
++ void *va;
++ unsigned long *pa;
++
++ mem = mm_iommu_lookup_rm(mm, ua, PAGE_SIZE);
++ if (!mem)
++ return;
++
++ entry = (ua - mem->ua) >> PAGE_SHIFT;
++ va = &mem->hpas[entry];
++
++ pa = (void *) vmalloc_to_phys(va);
++ if (!pa)
++ return;
++
++ *pa |= MM_IOMMU_TABLE_GROUP_PAGE_DIRTY;
++}
+
+ long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem)
+ {
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book-3S-HV-Do-ptesync-in-radix-guest-exit-pa.patch b/patches.arch/KVM-PPC-Book-3S-HV-Do-ptesync-in-radix-guest-exit-pa.patch
new file mode 100644
index 0000000000..96bd8c0f47
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book-3S-HV-Do-ptesync-in-radix-guest-exit-pa.patch
@@ -0,0 +1,55 @@
+From df158189dbcc2e0ee29dc4b917d45ee5bf25a35e Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Thu, 17 May 2018 14:47:59 +1000
+Subject: [PATCH] KVM: PPC: Book 3S HV: Do ptesync in radix guest exit path
+
+References: bsc#1061840
+Patch-mainline: v4.17-rc7
+Git-commit: df158189dbcc2e0ee29dc4b917d45ee5bf25a35e
+
+A radix guest can execute tlbie instructions to invalidate TLB entries.
+After a tlbie or a group of tlbies, it must then do the architected
+sequence eieio; tlbsync; ptesync to ensure that the TLB invalidation
+has been processed by all CPUs in the system before it can rely on
+no CPU using any translation that it just invalidated.
+
+In fact it is the ptesync which does the actual synchronization in
+this sequence, and hardware has a requirement that the ptesync must
+be executed on the same CPU thread as the tlbies which it is expected
+to order. Thus, if a vCPU gets moved from one physical CPU to
+another after it has done some tlbies but before it can get to do the
+ptesync, the ptesync will not have the desired effect when it is
+executed on the second physical CPU.
+
+To fix this, we do a ptesync in the exit path for radix guests. If
+there are any pending tlbies, this will wait for them to complete.
+If there aren't, then ptesync will just do the same as sync.
+
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+index 25c32e421b57..07ca1b2a7966 100644
+--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
++++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+@@ -1908,6 +1908,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+ cmpwi cr2, r0, 0
+ beq cr2, 4f
+
++ /*
++ * Radix: do eieio; tlbsync; ptesync sequence in case we
++ * interrupted the guest between a tlbie and a ptesync.
++ */
++ eieio
++ tlbsync
++ ptesync
++
+ /* Radix: Handle the case where the guest used an illegal PID */
+ LOAD_REG_ADDR(r4, mmu_base_pid)
+ lwz r3, VCPU_GUEST_PID(r9)
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-Add-MMIO-emulation-for-VMX-instructio.patch b/patches.arch/KVM-PPC-Book3S-Add-MMIO-emulation-for-VMX-instructio.patch
new file mode 100644
index 0000000000..4f83601af4
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-Add-MMIO-emulation-for-VMX-instructio.patch
@@ -0,0 +1,330 @@
+From 09f984961c137c4b252c368adab7e1c9f035fa59 Mon Sep 17 00:00:00 2001
+From: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
+Date: Sat, 3 Feb 2018 18:24:26 -0200
+Subject: [PATCH] KVM: PPC: Book3S: Add MMIO emulation for VMX instructions
+
+References: bsc#1061840
+Patch-mainline: v4.16-rc1
+Git-commit: 09f984961c137c4b252c368adab7e1c9f035fa59
+
+This patch provides the MMIO load/store vector indexed
+X-Form emulation.
+
+Instructions implemented:
+lvx: the quadword in storage addressed by the result of EA &
+0xffff_ffff_ffff_fff0 is loaded into VRT.
+
+stvx: the contents of VRS are stored into the quadword in storage
+addressed by the result of EA & 0xffff_ffff_ffff_fff0.
+
+Reported-by: Gopesh Kumar Chaudhary <gopchaud@in.ibm.com>
+Reported-by: Balamuruhan S <bala24@linux.vnet.ibm.com>
+Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/include/asm/kvm_host.h | 2 +
+ arch/powerpc/include/asm/kvm_ppc.h | 4 +
+ arch/powerpc/include/asm/ppc-opcode.h | 6 ++
+ arch/powerpc/kvm/emulate_loadstore.c | 36 ++++++++
+ arch/powerpc/kvm/powerpc.c | 150 ++++++++++++++++++++++++++++++++++
+ 5 files changed, 198 insertions(+)
+
+diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
+index fef8133becc8..1f53b562726f 100644
+--- a/arch/powerpc/include/asm/kvm_host.h
++++ b/arch/powerpc/include/asm/kvm_host.h
+@@ -690,6 +690,7 @@ struct kvm_vcpu_arch {
+ u8 mmio_vsx_offset;
+ u8 mmio_vsx_copy_type;
+ u8 mmio_vsx_tx_sx_enabled;
++ u8 mmio_vmx_copy_nums;
+ u8 osi_needed;
+ u8 osi_enabled;
+ u8 papr_enabled;
+@@ -804,6 +805,7 @@ struct kvm_vcpu_arch {
+ #define KVM_MMIO_REG_QPR 0x0040
+ #define KVM_MMIO_REG_FQPR 0x0060
+ #define KVM_MMIO_REG_VSX 0x0080
++#define KVM_MMIO_REG_VMX 0x00c0
+
+ #define __KVM_HAVE_ARCH_WQP
+ #define __KVM_HAVE_CREATE_DEVICE
+diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
+index 941c2a3f231b..28c203003519 100644
+--- a/arch/powerpc/include/asm/kvm_ppc.h
++++ b/arch/powerpc/include/asm/kvm_ppc.h
+@@ -81,6 +81,10 @@ extern int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ extern int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int rt, unsigned int bytes,
+ int is_default_endian, int mmio_sign_extend);
++extern int kvmppc_handle_load128_by2x64(struct kvm_run *run,
++ struct kvm_vcpu *vcpu, unsigned int rt, int is_default_endian);
++extern int kvmppc_handle_store128_by2x64(struct kvm_run *run,
++ struct kvm_vcpu *vcpu, unsigned int rs, int is_default_endian);
+ extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ u64 val, unsigned int bytes,
+ int is_default_endian);
+diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
+index ce0930d68857..a51febca08c5 100644
+--- a/arch/powerpc/include/asm/ppc-opcode.h
++++ b/arch/powerpc/include/asm/ppc-opcode.h
+@@ -156,6 +156,12 @@
+ #define OP_31_XOP_LFDX 599
+ #define OP_31_XOP_LFDUX 631
+
++/* VMX Vector Load Instructions */
++#define OP_31_XOP_LVX 103
++
++/* VMX Vector Store Instructions */
++#define OP_31_XOP_STVX 231
++
+ #define OP_LWZ 32
+ #define OP_STFS 52
+ #define OP_STFSU 53
+diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c
+index af833531af31..a382e15135e6 100644
+--- a/arch/powerpc/kvm/emulate_loadstore.c
++++ b/arch/powerpc/kvm/emulate_loadstore.c
+@@ -58,6 +58,18 @@ static bool kvmppc_check_vsx_disabled(struct kvm_vcpu *vcpu)
+ }
+ #endif /* CONFIG_VSX */
+
++#ifdef CONFIG_ALTIVEC
++static bool kvmppc_check_altivec_disabled(struct kvm_vcpu *vcpu)
++{
++ if (!(kvmppc_get_msr(vcpu) & MSR_VEC)) {
++ kvmppc_core_queue_vec_unavail(vcpu);
++ return true;
++ }
++
++ return false;
++}
++#endif /* CONFIG_ALTIVEC */
++
+ /*
+ * XXX to do:
+ * lfiwax, lfiwzx
+@@ -98,6 +110,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
+ vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_NONE;
+ vcpu->arch.mmio_sp64_extend = 0;
+ vcpu->arch.mmio_sign_extend = 0;
++ vcpu->arch.mmio_vmx_copy_nums = 0;
+
+ switch (get_op(inst)) {
+ case 31:
+@@ -459,6 +472,29 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
+ rs, 4, 1);
+ break;
+ #endif /* CONFIG_VSX */
++
++#ifdef CONFIG_ALTIVEC
++ case OP_31_XOP_LVX:
++ if (kvmppc_check_altivec_disabled(vcpu))
++ return EMULATE_DONE;
++ vcpu->arch.vaddr_accessed &= ~0xFULL;
++ vcpu->arch.paddr_accessed &= ~0xFULL;
++ vcpu->arch.mmio_vmx_copy_nums = 2;
++ emulated = kvmppc_handle_load128_by2x64(run, vcpu,
++ KVM_MMIO_REG_VMX|rt, 1);
++ break;
++
++ case OP_31_XOP_STVX:
++ if (kvmppc_check_altivec_disabled(vcpu))
++ return EMULATE_DONE;
++ vcpu->arch.vaddr_accessed &= ~0xFULL;
++ vcpu->arch.paddr_accessed &= ~0xFULL;
++ vcpu->arch.mmio_vmx_copy_nums = 2;
++ emulated = kvmppc_handle_store128_by2x64(run, vcpu,
++ rs, 1);
++ break;
++#endif /* CONFIG_ALTIVEC */
++
+ default:
+ emulated = EMULATE_FAIL;
+ break;
+diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
+index cf86aeb43fcf..47c7a302fd03 100644
+--- a/arch/powerpc/kvm/powerpc.c
++++ b/arch/powerpc/kvm/powerpc.c
+@@ -924,6 +924,34 @@ static inline void kvmppc_set_vsr_word(struct kvm_vcpu *vcpu,
+ }
+ #endif /* CONFIG_VSX */
+
++#ifdef CONFIG_ALTIVEC
++static inline void kvmppc_set_vmx_dword(struct kvm_vcpu *vcpu,
++ u64 gpr)
++{
++ int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
++ u32 hi, lo;
++ u32 di;
++
++#ifdef __BIG_ENDIAN
++ hi = gpr >> 32;
++ lo = gpr & 0xffffffff;
++#else
++ lo = gpr >> 32;
++ hi = gpr & 0xffffffff;
++#endif
++
++ di = 2 - vcpu->arch.mmio_vmx_copy_nums; /* doubleword index */
++ if (di > 1)
++ return;
++
++ if (vcpu->arch.mmio_host_swabbed)
++ di = 1 - di;
++
++ VCPU_VSX_VR(vcpu, index).u[di * 2] = hi;
++ VCPU_VSX_VR(vcpu, index).u[di * 2 + 1] = lo;
++}
++#endif /* CONFIG_ALTIVEC */
++
+ #ifdef CONFIG_PPC_FPU
+ static inline u64 sp_to_dp(u32 fprs)
+ {
+@@ -1027,6 +1055,11 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
+ kvmppc_set_vsr_dword_dump(vcpu, gpr);
+ break;
+ #endif
++#ifdef CONFIG_ALTIVEC
++ case KVM_MMIO_REG_VMX:
++ kvmppc_set_vmx_dword(vcpu, gpr);
++ break;
++#endif
+ default:
+ BUG();
+ }
+@@ -1302,6 +1335,111 @@ static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu,
+ }
+ #endif /* CONFIG_VSX */
+
++#ifdef CONFIG_ALTIVEC
++/* handle quadword load access in two halves */
++int kvmppc_handle_load128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu,
++ unsigned int rt, int is_default_endian)
++{
++ enum emulation_result emulated;
++
++ while (vcpu->arch.mmio_vmx_copy_nums) {
++ emulated = __kvmppc_handle_load(run, vcpu, rt, 8,
++ is_default_endian, 0);
++
++ if (emulated != EMULATE_DONE)
++ break;
++
++ vcpu->arch.paddr_accessed += run->mmio.len;
++ vcpu->arch.mmio_vmx_copy_nums--;
++ }
++
++ return emulated;
++}
++
++static inline int kvmppc_get_vmx_data(struct kvm_vcpu *vcpu, int rs, u64 *val)
++{
++ vector128 vrs = VCPU_VSX_VR(vcpu, rs);
++ u32 di;
++ u64 w0, w1;
++
++ di = 2 - vcpu->arch.mmio_vmx_copy_nums; /* doubleword index */
++ if (di > 1)
++ return -1;
++
++ if (vcpu->arch.mmio_host_swabbed)
++ di = 1 - di;
++
++ w0 = vrs.u[di * 2];
++ w1 = vrs.u[di * 2 + 1];
++
++#ifdef __BIG_ENDIAN
++ *val = (w0 << 32) | w1;
++#else
++ *val = (w1 << 32) | w0;
++#endif
++ return 0;
++}
++
++/* handle quadword store in two halves */
++int kvmppc_handle_store128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu,
++ unsigned int rs, int is_default_endian)
++{
++ u64 val = 0;
++ enum emulation_result emulated = EMULATE_DONE;
++
++ vcpu->arch.io_gpr = rs;
++
++ while (vcpu->arch.mmio_vmx_copy_nums) {
++ if (kvmppc_get_vmx_data(vcpu, rs, &val) == -1)
++ return EMULATE_FAIL;
++
++ emulated = kvmppc_handle_store(run, vcpu, val, 8,
++ is_default_endian);
++ if (emulated != EMULATE_DONE)
++ break;
++
++ vcpu->arch.paddr_accessed += run->mmio.len;
++ vcpu->arch.mmio_vmx_copy_nums--;
++ }
++
++ return emulated;
++}
++
++static int kvmppc_emulate_mmio_vmx_loadstore(struct kvm_vcpu *vcpu,
++ struct kvm_run *run)
++{
++ enum emulation_result emulated = EMULATE_FAIL;
++ int r;
++
++ vcpu->arch.paddr_accessed += run->mmio.len;
++
++ if (!vcpu->mmio_is_write) {
++ emulated = kvmppc_handle_load128_by2x64(run, vcpu,
++ vcpu->arch.io_gpr, 1);
++ } else {
++ emulated = kvmppc_handle_store128_by2x64(run, vcpu,
++ vcpu->arch.io_gpr, 1);
++ }
++
++ switch (emulated) {
++ case EMULATE_DO_MMIO:
++ run->exit_reason = KVM_EXIT_MMIO;
++ r = RESUME_HOST;
++ break;
++ case EMULATE_FAIL:
++ pr_info("KVM: MMIO emulation failed (VMX repeat)\n");
++ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
++ run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
++ r = RESUME_HOST;
++ break;
++ default:
++ r = RESUME_GUEST;
++ break;
++ }
++ return r;
++}
++#endif /* CONFIG_ALTIVEC */
++
+ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+ {
+ int r = 0;
+@@ -1421,6 +1559,18 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
+ }
+ }
+ #endif
++#ifdef CONFIG_ALTIVEC
++ if (vcpu->arch.mmio_vmx_copy_nums > 0)
++ vcpu->arch.mmio_vmx_copy_nums--;
++
++ if (vcpu->arch.mmio_vmx_copy_nums > 0) {
++ r = kvmppc_emulate_mmio_vmx_loadstore(vcpu, run);
++ if (r == RESUME_HOST) {
++ vcpu->mmio_needed = 1;
++ return r;
++ }
++ }
++#endif
+ } else if (vcpu->arch.osi_needed) {
+ u64 *gprs = run->osi.gprs;
+ int i;
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-Allow-backing-bigger-guest-IOMMU-page.patch b/patches.arch/KVM-PPC-Book3S-Allow-backing-bigger-guest-IOMMU-page.patch
new file mode 100644
index 0000000000..30125ea287
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-Allow-backing-bigger-guest-IOMMU-page.patch
@@ -0,0 +1,248 @@
+From ca1fc489cfa06a554fd71eb46d8927614ec7e6f3 Mon Sep 17 00:00:00 2001
+From: Alexey Kardashevskiy <aik@ozlabs.ru>
+Date: Mon, 14 May 2018 20:00:28 +1000
+Subject: [PATCH] KVM: PPC: Book3S: Allow backing bigger guest IOMMU pages with
+ smaller physical pages
+
+References: bsc#1061840
+Patch-mainline: v4.18-rc1
+Git-commit: ca1fc489cfa06a554fd71eb46d8927614ec7e6f3
+
+At the moment we only support in the host the IOMMU page sizes which
+the guest is aware of, which is 4KB/64KB/16MB. However P9 does not support
+16MB IOMMU pages, 2MB and 1GB pages are supported instead. We can still
+emulate bigger guest pages (for example 16MB) with smaller host pages
+(4KB/64KB/2MB).
+
+This allows the physical IOMMU pages to use a page size smaller or equal
+than the guest visible IOMMU page size.
+
+Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
+Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_64_vio.c | 64 +++++++++++++++++++++++++++++--------
+ arch/powerpc/kvm/book3s_64_vio_hv.c | 50 +++++++++++++++++++++++++----
+ 2 files changed, 94 insertions(+), 20 deletions(-)
+
+diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
+index 041e54d26750..984f1978a19c 100644
+--- a/arch/powerpc/kvm/book3s_64_vio.c
++++ b/arch/powerpc/kvm/book3s_64_vio.c
+@@ -176,14 +176,12 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
+
+ if (!tbltmp)
+ continue;
+- /*
+- * Make sure hardware table parameters are exactly the same;
+- * this is used in the TCE handlers where boundary checks
+- * use only the first attached table.
+- */
+- if ((tbltmp->it_page_shift == stt->page_shift) &&
+- (tbltmp->it_offset == stt->offset) &&
+- (tbltmp->it_size == stt->size)) {
++ /* Make sure hardware table parameters are compatible */
++ if ((tbltmp->it_page_shift <= stt->page_shift) &&
++ (tbltmp->it_offset << tbltmp->it_page_shift ==
++ stt->offset << stt->page_shift) &&
++ (tbltmp->it_size << tbltmp->it_page_shift ==
++ stt->size << stt->page_shift)) {
+ /*
+ * Reference the table to avoid races with
+ * add/remove DMA windows.
+@@ -396,7 +394,7 @@ static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm,
+ return H_SUCCESS;
+ }
+
+-static long kvmppc_tce_iommu_unmap(struct kvm *kvm,
++static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm,
+ struct iommu_table *tbl, unsigned long entry)
+ {
+ enum dma_data_direction dir = DMA_NONE;
+@@ -416,7 +414,24 @@ static long kvmppc_tce_iommu_unmap(struct kvm *kvm,
+ return ret;
+ }
+
+-long kvmppc_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl,
++static long kvmppc_tce_iommu_unmap(struct kvm *kvm,
++ struct kvmppc_spapr_tce_table *stt, struct iommu_table *tbl,
++ unsigned long entry)
++{
++ unsigned long i, ret = H_SUCCESS;
++ unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
++ unsigned long io_entry = entry * subpages;
++
++ for (i = 0; i < subpages; ++i) {
++ ret = kvmppc_tce_iommu_do_unmap(kvm, tbl, io_entry + i);
++ if (ret != H_SUCCESS)
++ break;
++ }
++
++ return ret;
++}
++
++long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
+ unsigned long entry, unsigned long ua,
+ enum dma_data_direction dir)
+ {
+@@ -453,6 +468,27 @@ long kvmppc_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl,
+ return 0;
+ }
+
++static long kvmppc_tce_iommu_map(struct kvm *kvm,
++ struct kvmppc_spapr_tce_table *stt, struct iommu_table *tbl,
++ unsigned long entry, unsigned long ua,
++ enum dma_data_direction dir)
++{
++ unsigned long i, pgoff, ret = H_SUCCESS;
++ unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
++ unsigned long io_entry = entry * subpages;
++
++ for (i = 0, pgoff = 0; i < subpages;
++ ++i, pgoff += IOMMU_PAGE_SIZE(tbl)) {
++
++ ret = kvmppc_tce_iommu_do_map(kvm, tbl,
++ io_entry + i, ua + pgoff, dir);
++ if (ret != H_SUCCESS)
++ break;
++ }
++
++ return ret;
++}
++
+ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+ unsigned long ioba, unsigned long tce)
+ {
+@@ -491,10 +527,10 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+
+ list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+ if (dir == DMA_NONE)
+- ret = kvmppc_tce_iommu_unmap(vcpu->kvm,
++ ret = kvmppc_tce_iommu_unmap(vcpu->kvm, stt,
+ stit->tbl, entry);
+ else
+- ret = kvmppc_tce_iommu_map(vcpu->kvm, stit->tbl,
++ ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl,
+ entry, ua, dir);
+
+ if (ret == H_SUCCESS)
+@@ -570,7 +606,7 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
+ return H_PARAMETER;
+
+ list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+- ret = kvmppc_tce_iommu_map(vcpu->kvm,
++ ret = kvmppc_tce_iommu_map(vcpu->kvm, stt,
+ stit->tbl, entry + i, ua,
+ iommu_tce_direction(tce));
+
+@@ -618,7 +654,7 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
+ unsigned long entry = ioba >> stt->page_shift;
+
+ for (i = 0; i < npages; ++i) {
+- ret = kvmppc_tce_iommu_unmap(vcpu->kvm,
++ ret = kvmppc_tce_iommu_unmap(vcpu->kvm, stt,
+ stit->tbl, entry + i);
+
+ if (ret == H_SUCCESS)
+diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
+index e220fabb2f5d..635f3ca8129a 100644
+--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
++++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
+@@ -221,7 +221,7 @@ static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
+ return H_SUCCESS;
+ }
+
+-static long kvmppc_rm_tce_iommu_unmap(struct kvm *kvm,
++static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm,
+ struct iommu_table *tbl, unsigned long entry)
+ {
+ enum dma_data_direction dir = DMA_NONE;
+@@ -245,7 +245,24 @@ static long kvmppc_rm_tce_iommu_unmap(struct kvm *kvm,
+ return ret;
+ }
+
+-static long kvmppc_rm_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl,
++static long kvmppc_rm_tce_iommu_unmap(struct kvm *kvm,
++ struct kvmppc_spapr_tce_table *stt, struct iommu_table *tbl,
++ unsigned long entry)
++{
++ unsigned long i, ret = H_SUCCESS;
++ unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
++ unsigned long io_entry = entry * subpages;
++
++ for (i = 0; i < subpages; ++i) {
++ ret = kvmppc_rm_tce_iommu_do_unmap(kvm, tbl, io_entry + i);
++ if (ret != H_SUCCESS)
++ break;
++ }
++
++ return ret;
++}
++
++static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
+ unsigned long entry, unsigned long ua,
+ enum dma_data_direction dir)
+ {
+@@ -290,6 +307,27 @@ static long kvmppc_rm_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl,
+ return 0;
+ }
+
++static long kvmppc_rm_tce_iommu_map(struct kvm *kvm,
++ struct kvmppc_spapr_tce_table *stt, struct iommu_table *tbl,
++ unsigned long entry, unsigned long ua,
++ enum dma_data_direction dir)
++{
++ unsigned long i, pgoff, ret = H_SUCCESS;
++ unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
++ unsigned long io_entry = entry * subpages;
++
++ for (i = 0, pgoff = 0; i < subpages;
++ ++i, pgoff += IOMMU_PAGE_SIZE(tbl)) {
++
++ ret = kvmppc_rm_tce_iommu_do_map(kvm, tbl,
++ io_entry + i, ua + pgoff, dir);
++ if (ret != H_SUCCESS)
++ break;
++ }
++
++ return ret;
++}
++
+ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+ unsigned long ioba, unsigned long tce)
+ {
+@@ -327,10 +365,10 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+
+ list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+ if (dir == DMA_NONE)
+- ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm,
++ ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm, stt,
+ stit->tbl, entry);
+ else
+- ret = kvmppc_rm_tce_iommu_map(vcpu->kvm,
++ ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt,
+ stit->tbl, entry, ua, dir);
+
+ if (ret == H_SUCCESS)
+@@ -477,7 +515,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
+ return H_PARAMETER;
+
+ list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+- ret = kvmppc_rm_tce_iommu_map(vcpu->kvm,
++ ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt,
+ stit->tbl, entry + i, ua,
+ iommu_tce_direction(tce));
+
+@@ -529,7 +567,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
+ unsigned long entry = ioba >> stt->page_shift;
+
+ for (i = 0; i < npages; ++i) {
+- ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm,
++ ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm, stt,
+ stit->tbl, entry + i);
+
+ if (ret == H_SUCCESS)
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-Check-KVM_CREATE_SPAPR_TCE_64-paramet.patch b/patches.arch/KVM-PPC-Book3S-Check-KVM_CREATE_SPAPR_TCE_64-paramet.patch
new file mode 100644
index 0000000000..a4231d0fdb
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-Check-KVM_CREATE_SPAPR_TCE_64-paramet.patch
@@ -0,0 +1,43 @@
+From e45719af1caff16dbc0f6bf7bbfbc5e7a54738a5 Mon Sep 17 00:00:00 2001
+From: Alexey Kardashevskiy <aik@ozlabs.ru>
+Date: Mon, 14 May 2018 20:00:29 +1000
+Subject: [PATCH] KVM: PPC: Book3S: Check KVM_CREATE_SPAPR_TCE_64 parameters
+
+References: bsc#1061840
+Patch-mainline: v4.18-rc1
+Git-commit: e45719af1caff16dbc0f6bf7bbfbc5e7a54738a5
+
+Although it does not seem possible to break the host by passing bad
+parameters when creating a TCE table in KVM, it is still better to get
+an early clear indication of that than debugging weird effect this might
+bring.
+
+This adds some sanity checks that the page size is 4KB..16GB as this is
+what the actual LoPAPR supports and that the window actually fits 64bit
+space.
+
+Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
+Acked-by: Balbir Singh <bsingharora@gmail.com>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_64_vio.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
+index 984f1978a19c..80ead383d8ee 100644
+--- a/arch/powerpc/kvm/book3s_64_vio.c
++++ b/arch/powerpc/kvm/book3s_64_vio.c
+@@ -300,7 +300,8 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
+ int ret = -ENOMEM;
+ int i;
+
+- if (!args->size)
++ if (!args->size || args->page_shift < 12 || args->page_shift > 34 ||
++ (args->offset + args->size > (ULLONG_MAX >> args->page_shift)))
+ return -EINVAL;
+
+ size = _ALIGN_UP(args->size, PAGE_SIZE >> 3);
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-Eliminate-some-unnecessary-checks.patch b/patches.arch/KVM-PPC-Book3S-Eliminate-some-unnecessary-checks.patch
new file mode 100644
index 0000000000..875ca1118c
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-Eliminate-some-unnecessary-checks.patch
@@ -0,0 +1,57 @@
+From 9aa6825bbb7526a7fdec137b7cc3b042581cd2fc Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Mon, 20 Nov 2017 19:56:27 +1100
+Subject: [PATCH] KVM: PPC: Book3S: Eliminate some unnecessary checks
+
+References: bsc#1061840
+Patch-mainline: v4.16-rc1
+Git-commit: 9aa6825bbb7526a7fdec137b7cc3b042581cd2fc
+
+In an excess of caution, commit 6f63e81bda98 ("KVM: PPC: Book3S: Add
+MMIO emulation for FP and VSX instructions", 2017-02-21) included
+checks for the case that vcpu->arch.mmio_vsx_copy_nums is less than
+zero, even though its type is u8. This causes a Coverity warning,
+so we remove the check for < 0. We also adjust the associated
+comment to be more accurate ("4 or less" rather than "less than 4").
+
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/powerpc.c | 12 ++++--------
+ 1 file changed, 4 insertions(+), 8 deletions(-)
+
+diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
+index 6b6c53c42ac9..c2c7ef330553 100644
+--- a/arch/powerpc/kvm/powerpc.c
++++ b/arch/powerpc/kvm/powerpc.c
+@@ -1101,11 +1101,9 @@ int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ {
+ enum emulation_result emulated = EMULATE_DONE;
+
+- /* Currently, mmio_vsx_copy_nums only allowed to be less than 4 */
+- if ( (vcpu->arch.mmio_vsx_copy_nums > 4) ||
+- (vcpu->arch.mmio_vsx_copy_nums < 0) ) {
++ /* Currently, mmio_vsx_copy_nums only allowed to be 4 or less */
++ if (vcpu->arch.mmio_vsx_copy_nums > 4)
+ return EMULATE_FAIL;
+- }
+
+ while (vcpu->arch.mmio_vsx_copy_nums) {
+ emulated = __kvmppc_handle_load(run, vcpu, rt, bytes,
+@@ -1247,11 +1245,9 @@ int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
+
+ vcpu->arch.io_gpr = rs;
+
+- /* Currently, mmio_vsx_copy_nums only allowed to be less than 4 */
+- if ( (vcpu->arch.mmio_vsx_copy_nums > 4) ||
+- (vcpu->arch.mmio_vsx_copy_nums < 0) ) {
++ /* Currently, mmio_vsx_copy_nums only allowed to be 4 or less */
++ if (vcpu->arch.mmio_vsx_copy_nums > 4)
+ return EMULATE_FAIL;
+- }
+
+ while (vcpu->arch.mmio_vsx_copy_nums) {
+ if (kvmppc_get_vsr_data(vcpu, rs, &val) == -1)
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-Fix-compile-error-that-occurs-with-so.patch b/patches.arch/KVM-PPC-Book3S-Fix-compile-error-that-occurs-with-so.patch
new file mode 100644
index 0000000000..5b6d604ee4
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-Fix-compile-error-that-occurs-with-so.patch
@@ -0,0 +1,44 @@
+From 6df3877fc962c2bb3d0438633dfd24a185af6838 Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Tue, 13 Feb 2018 15:45:21 +1100
+Subject: [PATCH] KVM: PPC: Book3S: Fix compile error that occurs with some gcc
+ versions
+
+References: bsc#1061840
+Patch-mainline: v4.16-rc5
+Git-commit: 6df3877fc962c2bb3d0438633dfd24a185af6838
+
+Some versions of gcc generate a warning that the variable "emulated"
+may be used uninitialized in function kvmppc_handle_load128_by2x64().
+It would be used uninitialized if kvmppc_handle_load128_by2x64 was
+ever called with vcpu->arch.mmio_vmx_copy_nums == 0, but neither of
+the callers ever do that, so there is no actual bug. When gcc
+generates a warning, it causes the build to fail because arch/powerpc
+is compiled with -Werror.
+
+This silences the warning by initializing "emulated" to EMULATE_DONE.
+
+Fixes: 09f984961c13 ("KVM: PPC: Book3S: Add MMIO emulation for VMX instructions")
+Reported-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/powerpc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
+index 0083142c2f84..52c205373986 100644
+--- a/arch/powerpc/kvm/powerpc.c
++++ b/arch/powerpc/kvm/powerpc.c
+@@ -1345,7 +1345,7 @@ static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu,
+ int kvmppc_handle_load128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int rt, int is_default_endian)
+ {
+- enum emulation_result emulated;
++ enum emulation_result emulated = EMULATE_DONE;
+
+ while (vcpu->arch.mmio_vmx_copy_nums) {
+ emulated = __kvmppc_handle_load(run, vcpu, rt, 8,
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-Fix-guest-DMA-when-guest-partially-ba.patch b/patches.arch/KVM-PPC-Book3S-Fix-guest-DMA-when-guest-partially-ba.patch
index e3c97f77d5..9d7caef1b9 100644
--- a/patches.arch/KVM-PPC-Book3S-Fix-guest-DMA-when-guest-partially-ba.patch
+++ b/patches.arch/KVM-PPC-Book3S-Fix-guest-DMA-when-guest-partially-ba.patch
@@ -1,4 +1,4 @@
-From 7f06bcc828113d5a92b27542ecd02131fa10cd1d Mon Sep 17 00:00:00 2001
+From 8cfbdbdc24815417a3ab35101ccf706b9a23ff17 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Thu, 23 Aug 2018 10:08:58 +1000
Subject: [PATCH] KVM: PPC: Book3S: Fix guest DMA when guest partially backed
@@ -57,14 +57,14 @@ Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Acked-by: Michal Suchanek <msuchanek@suse.de>
---
- arch/powerpc/mm/mmu_context_iommu.c | 19 +++++++++++--------
- 1 file changed, 11 insertions(+), 8 deletions(-)
+ arch/powerpc/mm/mmu_context_iommu.c | 17 ++++++++++-------
+ 1 file changed, 10 insertions(+), 7 deletions(-)
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
-index d0dbc56967d4..f633669cccab 100644
+index a4ca57612558..c9ee9e23845f 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
-@@ -130,6 +130,7 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
+@@ -129,6 +129,7 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
long i, j, ret = 0, locked_entries = 0;
unsigned int pageshift;
unsigned long flags;
@@ -72,7 +72,7 @@ index d0dbc56967d4..f633669cccab 100644
struct page *page = NULL;
mutex_lock(&mem_list_mutex);
-@@ -178,7 +179,8 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
+@@ -177,7 +178,8 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
}
for (i = 0; i < entries; ++i) {
@@ -82,7 +82,7 @@ index d0dbc56967d4..f633669cccab 100644
1/* pages */, 1/* iswrite */, &page)) {
ret = -EFAULT;
for (j = 0; j < i; ++j)
-@@ -197,7 +199,7 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
+@@ -196,7 +198,7 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
if (is_migrate_cma_page(page)) {
if (mm_iommu_move_page_from_cma(page))
goto populate;
@@ -91,7 +91,7 @@ index d0dbc56967d4..f633669cccab 100644
1/* pages */, 1/* iswrite */,
&page)) {
ret = -EFAULT;
-@@ -211,21 +213,22 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
+@@ -210,20 +212,21 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
}
populate:
pageshift = PAGE_SHIFT;
@@ -103,11 +103,9 @@ index d0dbc56967d4..f633669cccab 100644
+ unsigned int pteshift;
local_irq_save(flags); /* disables as well */
-- pte = find_linux_pte_or_hugepte(mm->pgd, ua, NULL,
-- &pageshift);
+- pte = find_linux_pte(mm->pgd, ua, NULL, &pageshift);
- local_irq_restore(flags);
-+ pte = find_linux_pte_or_hugepte(mm->pgd, cur_ua, NULL,
-+ &pteshift);
++ pte = find_linux_pte(mm->pgd, cur_ua, NULL, &pteshift);
/* Double check it is still the same pinned page */
if (pte && pte_page(*pte) == head &&
diff --git a/patches.arch/KVM-PPC-Book3S-Fix-matching-of-hardware-and-emulated.patch b/patches.arch/KVM-PPC-Book3S-Fix-matching-of-hardware-and-emulated.patch
new file mode 100644
index 0000000000..505f4047bc
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-Fix-matching-of-hardware-and-emulated.patch
@@ -0,0 +1,76 @@
+From 76346cd93a5eca33700f82685d56172dd65d4c0a Mon Sep 17 00:00:00 2001
+From: Alexey Kardashevskiy <aik@ozlabs.ru>
+Date: Wed, 20 Jun 2018 18:42:58 +1000
+Subject: [PATCH] KVM: PPC: Book3S: Fix matching of hardware and emulated TCE
+ tables
+
+References: bsc#1061840
+Patch-mainline: v4.19-rc1
+Git-commit: 76346cd93a5eca33700f82685d56172dd65d4c0a
+
+When attaching a hardware table to LIOBN in KVM, we match table parameters
+such as page size, table offset and table size. However the tables are
+created via very different paths - VFIO and KVM - and the VFIO path goes
+through the platform code which has minimum TCE page size requirement
+(which is 4K but since we allocate memory by pages and cannot avoid
+alignment anyway, we align to 64k pages for powernv_defconfig).
+
+So when we match the tables, one might be bigger that the other which
+means the hardware table cannot get attached to LIOBN and DMA mapping
+fails.
+
+This removes the table size alignment from the guest visible table.
+This does not affect the memory allocation which is still aligned -
+kvmppc_tce_pages() takes care of this.
+
+This relaxes the check we do when attaching tables to allow the hardware
+table be bigger than the guest visible table.
+
+Ideally we want the KVM table to cover the same space as the hardware
+table does but since the hardware table may use multiple levels, and
+all levels must use the same table size (IODA2 design), the area it can
+actually cover might get very different from the window size which
+the guest requested, even though the guest won't map it all.
+
+Fixes: ca1fc489cf "KVM: PPC: Book3S: Allow backing bigger guest IOMMU pages with smaller physical pages"
+Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
+Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_64_vio.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
+index d066e37551ec..85396e93747f 100644
+--- a/arch/powerpc/kvm/book3s_64_vio.c
++++ b/arch/powerpc/kvm/book3s_64_vio.c
+@@ -180,7 +180,7 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
+ if ((tbltmp->it_page_shift <= stt->page_shift) &&
+ (tbltmp->it_offset << tbltmp->it_page_shift ==
+ stt->offset << stt->page_shift) &&
+- (tbltmp->it_size << tbltmp->it_page_shift ==
++ (tbltmp->it_size << tbltmp->it_page_shift >=
+ stt->size << stt->page_shift)) {
+ /*
+ * Reference the table to avoid races with
+@@ -296,7 +296,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
+ {
+ struct kvmppc_spapr_tce_table *stt = NULL;
+ struct kvmppc_spapr_tce_table *siter;
+- unsigned long npages, size;
++ unsigned long npages, size = args->size;
+ int ret = -ENOMEM;
+ int i;
+
+@@ -304,7 +304,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
+ (args->offset + args->size > (ULLONG_MAX >> args->page_shift)))
+ return -EINVAL;
+
+- size = _ALIGN_UP(args->size, PAGE_SIZE >> 3);
+ npages = kvmppc_tce_pages(size);
+ ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true);
+ if (ret)
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Add-of_node_put-in-success-path.patch b/patches.arch/KVM-PPC-Book3S-HV-Add-of_node_put-in-success-path.patch
new file mode 100644
index 0000000000..6d35e7ea18
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Add-of_node_put-in-success-path.patch
@@ -0,0 +1,39 @@
+From 51eaa08f029c7343df846325d7cf047be8b96e81 Mon Sep 17 00:00:00 2001
+From: Nicholas Mc Guire <hofrat@osadl.org>
+Date: Sat, 7 Jul 2018 08:53:07 +0200
+Subject: [PATCH] KVM: PPC: Book3S HV: Add of_node_put() in success path
+
+References: bsc#1061840
+Patch-mainline: v4.19-rc1
+Git-commit: 51eaa08f029c7343df846325d7cf047be8b96e81
+
+The call to of_find_compatible_node() is returning a pointer with
+incremented refcount so it must be explicitly decremented after the
+last use. As here it is only being used for checking of node presence
+but the result is not actually used in the success path it can be
+dropped immediately.
+
+Signed-off-by: Nicholas Mc Guire <hofrat@osadl.org>
+Fixes: commit f725758b899f ("KVM: PPC: Book3S HV: Use OPAL XICS emulation on POWER9")
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_hv.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
+index de686b340f4a..fba21c91b2ff 100644
+--- a/arch/powerpc/kvm/book3s_hv.c
++++ b/arch/powerpc/kvm/book3s_hv.c
+@@ -4561,6 +4561,8 @@ static int kvmppc_book3s_init_hv(void)
+ pr_err("KVM-HV: Cannot determine method for accessing XICS\n");
+ return -ENODEV;
+ }
++ /* presence of intc confirmed - node can be dropped again */
++ of_node_put(np);
+ }
+ #endif
+
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Add-online-register-to-ONE_REG-int.patch b/patches.arch/KVM-PPC-Book3S-HV-Add-online-register-to-ONE_REG-int.patch
new file mode 100644
index 0000000000..b8cef6a08c
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Add-online-register-to-ONE_REG-int.patch
@@ -0,0 +1,76 @@
+From a1f158262a3e00fe396f2d21ef1cffdfc29226dc Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Fri, 20 Apr 2018 15:33:21 +1000
+Subject: [PATCH] KVM: PPC: Book3S HV: Add 'online' register to ONE_REG
+ interface
+
+References: bsc#1061840
+Patch-mainline: v4.18-rc1
+Git-commit: a1f158262a3e00fe396f2d21ef1cffdfc29226dc
+
+This adds a new KVM_REG_PPC_ONLINE register which userspace can set
+to 0 or 1 via the GET/SET_ONE_REG interface to indicate whether it
+considers the VCPU to be offline (0), that is, not currently running,
+or online (1). This will be used in a later patch to configure the
+register which controls PURR and SPURR accumulation.
+
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/include/asm/kvm_host.h | 2 ++
+ arch/powerpc/include/uapi/asm/kvm.h | 1 +
+ arch/powerpc/kvm/book3s_hv.c | 6 ++++++
+ 3 files changed, 9 insertions(+)
+
+diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
+index 17498e9a26e4..9703f8f229c9 100644
+--- a/arch/powerpc/include/asm/kvm_host.h
++++ b/arch/powerpc/include/asm/kvm_host.h
+@@ -772,6 +772,8 @@ struct kvm_vcpu_arch {
+ u64 busy_preempt;
+
+ u32 emul_inst;
++
++ u32 online;
+ #endif
+
+ #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
+index 833ed9a16adf..1b32b56a03d3 100644
+--- a/arch/powerpc/include/uapi/asm/kvm.h
++++ b/arch/powerpc/include/uapi/asm/kvm.h
+@@ -633,6 +633,7 @@ struct kvm_ppc_cpu_char {
+ #define KVM_REG_PPC_PSSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd)
+
+ #define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe)
++#define KVM_REG_PPC_ONLINE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf)
+
+ /* Transactional Memory checkpointed state:
+ * This is all GPRs, all VSX regs and a subset of SPRs
+diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
+index 9963f65c212b..04bd71796098 100644
+--- a/arch/powerpc/kvm/book3s_hv.c
++++ b/arch/powerpc/kvm/book3s_hv.c
+@@ -1526,6 +1526,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
+ *val = get_reg_val(id, vcpu->arch.dec_expires +
+ vcpu->arch.vcore->tb_offset);
+ break;
++ case KVM_REG_PPC_ONLINE:
++ *val = get_reg_val(id, vcpu->arch.online);
++ break;
+ default:
+ r = -EINVAL;
+ break;
+@@ -1757,6 +1760,9 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
+ vcpu->arch.dec_expires = set_reg_val(id, *val) -
+ vcpu->arch.vcore->tb_offset;
+ break;
++ case KVM_REG_PPC_ONLINE:
++ vcpu->arch.online = set_reg_val(id, *val);
++ break;
+ default:
+ r = -EINVAL;
+ break;
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Allow-HPT-and-radix-on-the-same-co.patch b/patches.arch/KVM-PPC-Book3S-HV-Allow-HPT-and-radix-on-the-same-co.patch
new file mode 100644
index 0000000000..e916acbf37
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Allow-HPT-and-radix-on-the-same-co.patch
@@ -0,0 +1,105 @@
+From 00608e1f007e4cf6031485c5630e0e504bceef9b Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Thu, 11 Jan 2018 16:54:26 +1100
+Subject: [PATCH] KVM: PPC: Book3S HV: Allow HPT and radix on the same core for
+ POWER9 v2.2
+
+References: bsc#1061840
+Patch-mainline: v4.16-rc1
+Git-commit: 00608e1f007e4cf6031485c5630e0e504bceef9b
+
+POWER9 chip versions starting with "Nimbus" v2.2 can support running
+with some threads of a core in HPT mode and others in radix mode.
+This means that we don't have to prohibit independent-threads mode
+when running a HPT guest on a radix host, and we don't have to do any
+of the synchronization between threads that was introduced in commit
+c01015091a77 ("KVM: PPC: Book3S HV: Run HPT guests on POWER9 radix
+hosts", 2017-10-19).
+
+Rather than using up another CPU feature bit, we just do an
+explicit test on the PVR (processor version register) at module
+startup time to determine whether we have to take steps to avoid
+having some threads in HPT mode and some in radix mode (so-called
+"mixed mode"). We test for "Nimbus" (indicated by 0 or 1 in the top
+nibble of the lower 16 bits) v2.2 or later, or "Cumulus" (indicated by
+2 or 3 in that nibble) v1.1 or later.
+
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_hv.c | 30 ++++++++++++++++++++++++------
+ 1 file changed, 24 insertions(+), 6 deletions(-)
+
+diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
+index b2d448c75008..76cf48051eb3 100644
+--- a/arch/powerpc/kvm/book3s_hv.c
++++ b/arch/powerpc/kvm/book3s_hv.c
+@@ -118,6 +118,9 @@ module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect,
+ MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
+ #endif
+
++/* If set, the threads on each CPU core have to be in the same MMU mode */
++static bool no_mixing_hpt_and_radix;
++
+ static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
+ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
+
+@@ -2386,8 +2389,8 @@ static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
+ static bool subcore_config_ok(int n_subcores, int n_threads)
+ {
+ /*
+- * POWER9 "SMT4" cores are permanently in what is effectively a 4-way split-core
+- * mode, with one thread per subcore.
++ * POWER9 "SMT4" cores are permanently in what is effectively a 4-way
++ * split-core mode, with one thread per subcore.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ return n_subcores <= 4 && n_threads == 1;
+@@ -2423,8 +2426,8 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return false;
+
+- /* POWER9 currently requires all threads to be in the same MMU mode */
+- if (cpu_has_feature(CPU_FTR_ARCH_300) &&
++ /* Some POWER9 chips require all threads to be in the same MMU mode */
++ if (no_mixing_hpt_and_radix &&
+ kvm_is_radix(vc->kvm) != kvm_is_radix(cip->vc[0]->kvm))
+ return false;
+
+@@ -2687,9 +2690,11 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
+ * threads are offline. Also check if the number of threads in this
+ * guest are greater than the current system threads per guest.
+ * On POWER9, we need to be not in independent-threads mode if
+- * this is a HPT guest on a radix host.
++ * this is a HPT guest on a radix host machine where the
++ * CPU threads may not be in different MMU modes.
+ */
+- hpt_on_radix = radix_enabled() && !kvm_is_radix(vc->kvm);
++ hpt_on_radix = no_mixing_hpt_and_radix && radix_enabled() &&
++ !kvm_is_radix(vc->kvm);
+ if (((controlled_threads > 1) &&
+ ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) ||
+ (hpt_on_radix && vc->kvm->arch.threads_indep)) {
+@@ -4446,6 +4451,19 @@ static int kvmppc_book3s_init_hv(void)
+
+ if (kvmppc_radix_possible())
+ r = kvmppc_radix_init();
++
++ /*
++ * POWER9 chips before version 2.02 can't have some threads in
++ * HPT mode and some in radix mode on the same core.
++ */
++ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
++ unsigned int pvr = mfspr(SPRN_PVR);
++ if ((pvr >> 16) == PVR_POWER9 &&
++ (((pvr & 0xe000) == 0 && (pvr & 0xfff) < 0x202) ||
++ ((pvr & 0xe000) == 0x2000 && (pvr & 0xfff) < 0x101)))
++ no_mixing_hpt_and_radix = true;
++ }
++
+ return r;
+ }
+
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Allow-creating-max-number-of-VCPUs.patch b/patches.arch/KVM-PPC-Book3S-HV-Allow-creating-max-number-of-VCPUs.patch
new file mode 100644
index 0000000000..cae8d16646
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Allow-creating-max-number-of-VCPUs.patch
@@ -0,0 +1,53 @@
+From 1ebe6b81ebdba8faf377d1d7d84ad9368e7a0bae Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Thu, 26 Jul 2018 14:53:54 +1000
+Subject: [PATCH] KVM: PPC: Book3S HV: Allow creating max number of VCPUs on
+ POWER9
+
+References: bsc#1061840
+Patch-mainline: v4.19-rc1
+Git-commit: 1ebe6b81ebdba8faf377d1d7d84ad9368e7a0bae
+
+Commit 1e175d2 ("KVM: PPC: Book3S HV: Pack VCORE IDs to access full
+VCPU ID space", 2018-07-25) allowed use of VCPU IDs up to
+KVM_MAX_VCPU_ID on POWER9 in all guest SMT modes and guest emulated
+hardware SMT modes. However, with the current definition of
+KVM_MAX_VCPU_ID, a guest SMT mode of 1 and an emulated SMT mode of 8,
+it is only possible to create KVM_MAX_VCPUS / 2 VCPUS, because
+threads_per_subcore is 4 on POWER9 CPUs. (Using an emulated SMT mode
+of 8 is useful when migrating VMs to or from POWER8 hosts.)
+
+This increases KVM_MAX_VCPU_ID to 8 * KVM_MAX_VCPUS when HV KVM is
+configured in, so that a full complement of KVM_MAX_VCPUS VCPUs can
+be created on POWER9 in all guest SMT modes and emulated hardware
+SMT modes.
+
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/include/asm/kvm_host.h | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
+index 5b9e6608c5bf..906bcbdfd2a1 100644
+--- a/arch/powerpc/include/asm/kvm_host.h
++++ b/arch/powerpc/include/asm/kvm_host.h
+@@ -42,7 +42,14 @@
+ #define KVM_USER_MEM_SLOTS 512
+
+ #include <asm/cputhreads.h>
+-#define KVM_MAX_VCPU_ID (threads_per_subcore * KVM_MAX_VCORES)
++
++#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
++#include <asm/kvm_book3s_asm.h> /* for MAX_SMT_THREADS */
++#define KVM_MAX_VCPU_ID (MAX_SMT_THREADS * KVM_MAX_VCORES)
++
++#else
++#define KVM_MAX_VCPU_ID KVM_MAX_VCPUS
++#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
+
+ #define __KVM_HAVE_ARCH_INTC_INITIALIZED
+
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Avoid-crash-from-THP-collapse-duri.patch b/patches.arch/KVM-PPC-Book3S-HV-Avoid-crash-from-THP-collapse-duri.patch
new file mode 100644
index 0000000000..778c03cd2e
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Avoid-crash-from-THP-collapse-duri.patch
@@ -0,0 +1,60 @@
+From 6579804c431712d56956a63b1a01509441cc6800 Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Thu, 4 Oct 2018 14:51:11 +1000
+Subject: [PATCH] KVM: PPC: Book3S HV: Avoid crash from THP collapse during
+ radix page fault
+
+References: bsc#1061840
+Patch-mainline: v4.19-rc7
+Git-commit: 6579804c431712d56956a63b1a01509441cc6800
+
+Commit 71d29f43b633 ("KVM: PPC: Book3S HV: Don't use compound_order to
+determine host mapping size", 2018-09-11) added a call to
+__find_linux_pte() and a dereference of the returned PTE pointer to the
+radix page fault path in the common case where the page is normal
+system memory. Previously, __find_linux_pte() was only called for
+mappings to physical addresses which don't have a page struct (e.g.
+memory-mapped I/O) or where the page struct is marked as reserved
+memory.
+
+This exposes us to the possibility that the returned PTE pointer
+could be NULL, for example in the case of a concurrent THP collapse
+operation. Dereferencing the returned NULL pointer causes a host
+crash.
+
+To fix this, we check for NULL, and if it is NULL, we retry the
+operation by returning to the guest, with the expectation that it
+will generate the same page fault again (unless of course it has
+been fixed up by another CPU in the meantime).
+
+Fixes: 71d29f43b633 ("KVM: PPC: Book3S HV: Don't use compound_order to determine host mapping size")
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_64_mmu_radix.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+index 933c574e1cf7..998f8d089ac7 100644
+--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
++++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+@@ -646,6 +646,16 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ */
+ local_irq_disable();
+ ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
++ /*
++ * If the PTE disappeared temporarily due to a THP
++ * collapse, just return and let the guest try again.
++ */
++ if (!ptep) {
++ local_irq_enable();
++ if (page)
++ put_page(page);
++ return RESUME_GUEST;
++ }
+ pte = *ptep;
+ local_irq_enable();
+
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Avoid-shifts-by-negative-amounts.patch b/patches.arch/KVM-PPC-Book3S-HV-Avoid-shifts-by-negative-amounts.patch
new file mode 100644
index 0000000000..f9a3992d34
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Avoid-shifts-by-negative-amounts.patch
@@ -0,0 +1,93 @@
+From cda2eaa35948893d70145490d5d6ded546fc3bc6 Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Fri, 10 Nov 2017 16:40:24 +1100
+Subject: [PATCH] KVM: PPC: Book3S HV: Avoid shifts by negative amounts
+
+References: bsc#1061840
+Patch-mainline: v4.16-rc1
+Git-commit: cda2eaa35948893d70145490d5d6ded546fc3bc6
+
+The kvmppc_hpte_page_shifts function decodes the actual and base page
+sizes for a HPTE, returning -1 if it doesn't recognize the page size
+encoding. This then gets used as a shift amount in various places,
+which is undefined behaviour. This was reported by Coverity.
+
+In fact this should never occur, since we should only get HPTEs in the
+HPT which have a recognized page size encoding. The only place where
+this might not be true is in the call to kvmppc_actual_pgsz() near the
+beginning of kvmppc_do_h_enter(), where we are validating the HPTE
+value passed in from the guest.
+
+So to fix this and eliminate the undefined behaviour, we make
+kvmppc_hpte_page_shifts return 0 for unrecognized page size encodings,
+and make kvmppc_actual_pgsz() detect that case and return 0 for the
+page size, which will then cause kvmppc_do_h_enter() to return an
+error and refuse to insert any HPTE with an unrecognized page size
+encoding.
+
+To ensure that we don't get undefined behaviour in compute_tlbie_rb(),
+we take the 4k page size path for any unrecognized page size encoding.
+This should never be hit in practice because it is only used on HPTE
+values which have previously been checked for having a recognized
+page size encoding.
+
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/include/asm/kvm_book3s_64.h | 14 +++++++++-----
+ 1 file changed, 9 insertions(+), 5 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
+index 735cfa35298a..998f7b7aaa9e 100644
+--- a/arch/powerpc/include/asm/kvm_book3s_64.h
++++ b/arch/powerpc/include/asm/kvm_book3s_64.h
+@@ -122,13 +122,13 @@ static inline int kvmppc_hpte_page_shifts(unsigned long h, unsigned long l)
+ lphi = (l >> 16) & 0xf;
+ switch ((l >> 12) & 0xf) {
+ case 0:
+- return !lphi ? 24 : -1; /* 16MB */
++ return !lphi ? 24 : 0; /* 16MB */
+ break;
+ case 1:
+ return 16; /* 64kB */
+ break;
+ case 3:
+- return !lphi ? 34 : -1; /* 16GB */
++ return !lphi ? 34 : 0; /* 16GB */
+ break;
+ case 7:
+ return (16 << 8) + 12; /* 64kB in 4kB */
+@@ -140,7 +140,7 @@ static inline int kvmppc_hpte_page_shifts(unsigned long h, unsigned long l)
+ return (24 << 8) + 12; /* 16MB in 4kB */
+ break;
+ }
+- return -1;
++ return 0;
+ }
+
+ static inline int kvmppc_hpte_base_page_shift(unsigned long h, unsigned long l)
+@@ -159,7 +159,11 @@ static inline int kvmppc_hpte_actual_page_shift(unsigned long h, unsigned long l
+
+ static inline unsigned long kvmppc_actual_pgsz(unsigned long v, unsigned long r)
+ {
+- return 1ul << kvmppc_hpte_actual_page_shift(v, r);
++ int shift = kvmppc_hpte_actual_page_shift(v, r);
++
++ if (shift)
++ return 1ul << shift;
++ return 0;
+ }
+
+ static inline int kvmppc_pgsize_lp_encoding(int base_shift, int actual_shift)
+@@ -232,7 +236,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
+ va_low ^= v >> (SID_SHIFT_1T - 16);
+ va_low &= 0x7ff;
+
+- if (b_pgshift == 12) {
++ if (b_pgshift <= 12) {
+ if (a_pgshift > 12) {
+ sllp = (a_pgshift == 16) ? 5 : 4;
+ rb |= sllp << 5; /* AP field */
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Check-DR-not-IR-to-chose-real-vs-v.patch b/patches.arch/KVM-PPC-Book3S-HV-Check-DR-not-IR-to-chose-real-vs-v.patch
new file mode 100644
index 0000000000..1aa3fcd8c8
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Check-DR-not-IR-to-chose-real-vs-v.patch
@@ -0,0 +1,33 @@
+From 2662efd050953824de5c9b24449d6b5b342db10b Mon Sep 17 00:00:00 2001
+From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Date: Fri, 12 Jan 2018 13:37:14 +1100
+Subject: [PATCH] KVM: PPC: Book3S HV: Check DR not IR to chose real vs virt
+ mode MMIOs
+
+References: bsc#1061840
+Patch-mainline: v4.16-rc1
+Git-commit: 2662efd050953824de5c9b24449d6b5b342db10b
+
+Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+index 34dbab7deb39..948f21cf84d5 100644
+--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
++++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+@@ -1464,7 +1464,7 @@ mc_cont:
+ li r7, TM_SPC_PULL_OS_CTX
+ li r6, TM_QW1_OS
+ mfmsr r0
+- andi. r0, r0, MSR_IR /* in real mode? */
++ andi. r0, r0, MSR_DR /* in real mode? */
+ beq 2f
+ ld r10, HSTATE_XIVE_TIMA_VIRT(r13)
+ cmpldi cr0, r10, 0
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Disable-tb_offset.patch b/patches.arch/KVM-PPC-Book3S-HV-Disable-tb_offset.patch
deleted file mode 100644
index 21d50fabf9..0000000000
--- a/patches.arch/KVM-PPC-Book3S-HV-Disable-tb_offset.patch
+++ /dev/null
@@ -1,30 +0,0 @@
-From 2f6de67a1831308fbb4fc883e5546fb712dde6f7 Mon Sep 17 00:00:00 2001
-From: Michal Suchanek <msuchanek@suse.de>
-Date: Thu, 22 Mar 2018 17:41:33 +0100
-Subject: [PATCH] KVM: PPC: Book3S HV: Disable tb_offset
-
-References: bsc#1086196
-Patch-mainline: no, testing
-
-This seems to improve timing sensitive workloads running in KVM guests.
-
-Signed-off-by: Michal Suchanek <msuchanek@suse.de>
----
- arch/powerpc/kvm/book3s_hv.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
-index 899b3a769082..aaa2045b6385 100644
---- a/arch/powerpc/kvm/book3s_hv.c
-+++ b/arch/powerpc/kvm/book3s_hv.c
-@@ -1642,6 +1642,7 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
- r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
- break;
- case KVM_REG_PPC_TB_OFFSET:
-+ break;
- /*
- * POWER9 DD1 has an erratum where writing TBU40 causes
- * the timebase to lose ticks. So we don't let the
---
-2.13.6
-
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Do-SLB-load-unload-with-guest-LPCR.patch b/patches.arch/KVM-PPC-Book3S-HV-Do-SLB-load-unload-with-guest-LPCR.patch
new file mode 100644
index 0000000000..6063b21342
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Do-SLB-load-unload-with-guest-LPCR.patch
@@ -0,0 +1,212 @@
+From 6964e6a4e4894c707e42d51d9d30683c57f43201 Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Thu, 11 Jan 2018 14:51:02 +1100
+Subject: [PATCH] KVM: PPC: Book3S HV: Do SLB load/unload with guest LPCR value
+ loaded
+
+References: bsc#1061840
+Patch-mainline: v4.16-rc1
+Git-commit: 6964e6a4e4894c707e42d51d9d30683c57f43201
+
+This moves the code that loads and unloads the guest SLB values so that
+it is done while the guest LPCR value is loaded in the LPCR register.
+The reason for doing this is that on POWER9, the behaviour of the
+slbmte instruction depends on the LPCR[UPRT] bit. If UPRT is 1, as
+it is for a radix host (or guest), the SLB index is truncated to
+2 bits. This means that for a HPT guest on a radix host, the SLB
+was not being loaded correctly, causing the guest to crash.
+
+The SLB is now loaded much later in the guest entry path, after the
+LPCR is loaded, which for a secondary thread is after it sees that
+the primary thread has switched the MMU to the guest. The loop that
+waits for the primary thread has a branch out to the exit code that
+is taken if it sees that other threads have commenced exiting the
+guest. Since we have now not loaded the SLB at this point, we make
+this path branch to a new label 'guest_bypass' and we move the SLB
+unload code to before this label.
+
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 109 ++++++++++++++++----------------
+ 1 file changed, 55 insertions(+), 54 deletions(-)
+
+diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+index 76332a3f6c0d..30ece4cebaf5 100644
+--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
++++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+@@ -617,13 +617,6 @@ kvmppc_hv_entry:
+ lbz r0, KVM_RADIX(r9)
+ cmpwi cr7, r0, 0
+
+- /* Clear out SLB if hash */
+- bne cr7, 2f
+- li r6,0
+- slbmte r6,r6
+- slbia
+- ptesync
+-2:
+ /*
+ * POWER7/POWER8 host -> guest partition switch code.
+ * We don't have to lock against concurrent tlbies,
+@@ -738,19 +731,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ 10: cmpdi r4, 0
+ beq kvmppc_primary_no_guest
+ kvmppc_got_guest:
+-
+- /* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
+- lwz r5,VCPU_SLB_MAX(r4)
+- cmpwi r5,0
+- beq 9f
+- mtctr r5
+- addi r6,r4,VCPU_SLB
+-1: ld r8,VCPU_SLB_E(r6)
+- ld r9,VCPU_SLB_V(r6)
+- slbmte r9,r8
+- addi r6,r6,VCPU_SLB_SIZE
+- bdnz 1b
+-9:
+ /* Increment yield count if they have a VPA */
+ ld r3, VCPU_VPA(r4)
+ cmpdi r3, 0
+@@ -1017,6 +997,29 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
+ cmpdi r3, 512 /* 1 microsecond */
+ blt hdec_soon
+
++ /* For hash guest, clear out and reload the SLB */
++ ld r6, VCPU_KVM(r4)
++ lbz r0, KVM_RADIX(r6)
++ cmpwi r0, 0
++ bne 9f
++ li r6, 0
++ slbmte r6, r6
++ slbia
++ ptesync
++
++ /* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
++ lwz r5,VCPU_SLB_MAX(r4)
++ cmpwi r5,0
++ beq 9f
++ mtctr r5
++ addi r6,r4,VCPU_SLB
++1: ld r8,VCPU_SLB_E(r6)
++ ld r9,VCPU_SLB_V(r6)
++ slbmte r9,r8
++ addi r6,r6,VCPU_SLB_SIZE
++ bdnz 1b
++9:
++
+ #ifdef CONFIG_KVM_XICS
+ /* We are entering the guest on that thread, push VCPU to XIVE */
+ ld r10, HSTATE_XIVE_TIMA_PHYS(r13)
+@@ -1193,7 +1196,7 @@ hdec_soon:
+ addi r3, r4, VCPU_TB_RMEXIT
+ bl kvmhv_accumulate_time
+ #endif
+- b guest_exit_cont
++ b guest_bypass
+
+ /******************************************************************************
+ * *
+@@ -1481,34 +1484,12 @@ mc_cont:
+ 1:
+ #endif /* CONFIG_KVM_XICS */
+
+- mr r3, r12
+- /* Increment exit count, poke other threads to exit */
+- bl kvmhv_commence_exit
+- nop
+- ld r9, HSTATE_KVM_VCPU(r13)
+- lwz r12, VCPU_TRAP(r9)
+-
+- /* Stop others sending VCPU interrupts to this physical CPU */
+- li r0, -1
+- stw r0, VCPU_CPU(r9)
+- stw r0, VCPU_THREAD_CPU(r9)
+-
+- /* Save guest CTRL register, set runlatch to 1 */
+- mfspr r6,SPRN_CTRLF
+- stw r6,VCPU_CTRL(r9)
+- andi. r0,r6,1
+- bne 4f
+- ori r6,r6,1
+- mtspr SPRN_CTRLT,r6
+-4:
+- /* Check if we are running hash or radix and store it in cr2 */
++ /* For hash guest, read the guest SLB and save it away */
+ ld r5, VCPU_KVM(r9)
+ lbz r0, KVM_RADIX(r5)
+- cmpwi cr2,r0,0
+-
+- /* Read the guest SLB and save it away */
+ li r5, 0
+- bne cr2, 3f /* for radix, save 0 entries */
++ cmpwi r0, 0
++ bne 3f /* for radix, save 0 entries */
+ lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */
+ mtctr r0
+ li r6,0
+@@ -1524,8 +1505,34 @@ mc_cont:
+ addi r5,r5,1
+ 2: addi r6,r6,1
+ bdnz 1b
++ /* Finally clear out the SLB */
++ li r0,0
++ slbmte r0,r0
++ slbia
++ ptesync
+ 3: stw r5,VCPU_SLB_MAX(r9)
+
++guest_bypass:
++ mr r3, r12
++ /* Increment exit count, poke other threads to exit */
++ bl kvmhv_commence_exit
++ nop
++ ld r9, HSTATE_KVM_VCPU(r13)
++ lwz r12, VCPU_TRAP(r9)
++
++ /* Stop others sending VCPU interrupts to this physical CPU */
++ li r0, -1
++ stw r0, VCPU_CPU(r9)
++ stw r0, VCPU_THREAD_CPU(r9)
++
++ /* Save guest CTRL register, set runlatch to 1 */
++ mfspr r6,SPRN_CTRLF
++ stw r6,VCPU_CTRL(r9)
++ andi. r0,r6,1
++ bne 4f
++ ori r6,r6,1
++ mtspr SPRN_CTRLT,r6
++4:
+ /*
+ * Save the guest PURR/SPURR
+ */
+@@ -1803,7 +1810,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+ ld r5, VCPU_KVM(r9)
+ lbz r0, KVM_RADIX(r5)
+ cmpwi cr2, r0, 0
+- beq cr2, 3f
++ beq cr2, 4f
+
+ /* Radix: Handle the case where the guest used an illegal PID */
+ LOAD_REG_ADDR(r4, mmu_base_pid)
+@@ -1839,15 +1846,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+ BEGIN_FTR_SECTION
+ PPC_INVALIDATE_ERAT
+ END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
+- b 4f
++4:
+ #endif /* CONFIG_PPC_RADIX_MMU */
+
+- /* Hash: clear out SLB */
+-3: li r5,0
+- slbmte r5,r5
+- slbia
+- ptesync
+-4:
+ /*
+ * POWER7/POWER8 guest -> host partition switch code.
+ * We don't have to lock against tlbies but we do
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Don-t-truncate-HPTE-index-in-xlate.patch b/patches.arch/KVM-PPC-Book3S-HV-Don-t-truncate-HPTE-index-in-xlate.patch
new file mode 100644
index 0000000000..0c6bbd2433
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Don-t-truncate-HPTE-index-in-xlate.patch
@@ -0,0 +1,47 @@
+From 46dec40fb741f00f1864580130779aeeaf24fb3d Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Mon, 20 Aug 2018 16:05:45 +1000
+Subject: [PATCH] KVM: PPC: Book3S HV: Don't truncate HPTE index in xlate
+ function
+
+References: bsc#1061840
+Patch-mainline: v4.19-rc3
+Git-commit: 46dec40fb741f00f1864580130779aeeaf24fb3d
+
+This fixes a bug which causes guest virtual addresses to get translated
+to guest real addresses incorrectly when the guest is using the HPT MMU
+and has more than 256GB of RAM, or more specifically has a HPT larger
+than 2GB. This has showed up in testing as a failure of the host to
+emulate doorbell instructions correctly on POWER9 for HPT guests with
+more than 256GB of RAM.
+
+The bug is that the HPTE index in kvmppc_mmu_book3s_64_hv_xlate()
+is stored as an int, and in forming the HPTE address, the index gets
+shifted left 4 bits as an int before being signed-extended to 64 bits.
+The simple fix is to make the variable a long int, matching the
+return type of kvmppc_hv_find_lock_hpte(), which is what calculates
+the index.
+
+Fixes: 697d3899dcb4 ("KVM: PPC: Implement MMIO emulation support for Book3S HV guests")
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
+index 7f3a8cf5d66f..4c08f42f6406 100644
+--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
++++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
+@@ -359,7 +359,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
+ unsigned long pp, key;
+ unsigned long v, orig_v, gr;
+ __be64 *hptep;
+- int index;
++ long int index;
+ int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR);
+
+ if (kvm_is_radix(vcpu->kvm))
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Don-t-use-compound_order-to-determ.patch b/patches.arch/KVM-PPC-Book3S-HV-Don-t-use-compound_order-to-determ.patch
new file mode 100644
index 0000000000..d5c7ee3d81
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Don-t-use-compound_order-to-determ.patch
@@ -0,0 +1,158 @@
+From 71d29f43b6332badc5598c656616a62575e83342 Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Tue, 11 Sep 2018 20:48:34 +1000
+Subject: [PATCH] KVM: PPC: Book3S HV: Don't use compound_order to determine
+ host mapping size
+
+References: bsc#1061840
+Patch-mainline: v4.19-rc5
+Git-commit: 71d29f43b6332badc5598c656616a62575e83342
+
+THP paths can defer splitting compound pages until after the actual
+remap and TLB flushes to split a huge PMD/PUD. This causes radix
+partition scope page table mappings to get out of synch with the host
+qemu page table mappings.
+
+This results in random memory corruption in the guest when running
+with THP. The easiest way to reproduce is use KVM balloon to free up
+a lot of memory in the guest and then shrink the balloon to give the
+memory back, while some work is being done in the guest.
+
+Cc: David Gibson <david@gibson.dropbear.id.au>
+Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
+Cc: kvm-ppc@vger.kernel.org
+Cc: linuxppc-dev@lists.ozlabs.org
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_64_mmu_radix.c | 91 ++++++++++++++--------------------
+ 1 file changed, 37 insertions(+), 54 deletions(-)
+
+diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+index fd6e8c13685f..933c574e1cf7 100644
+--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
++++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+@@ -525,8 +525,8 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned long ea, unsigned long dsisr)
+ {
+ struct kvm *kvm = vcpu->kvm;
+- unsigned long mmu_seq, pte_size;
+- unsigned long gpa, gfn, hva, pfn;
++ unsigned long mmu_seq;
++ unsigned long gpa, gfn, hva;
+ struct kvm_memory_slot *memslot;
+ struct page *page = NULL;
+ long ret;
+@@ -623,9 +623,10 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ */
+ hva = gfn_to_hva_memslot(memslot, gfn);
+ if (upgrade_p && __get_user_pages_fast(hva, 1, 1, &page) == 1) {
+- pfn = page_to_pfn(page);
+ upgrade_write = true;
+ } else {
++ unsigned long pfn;
++
+ /* Call KVM generic code to do the slow-path check */
+ pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
+ writing, upgrade_p);
+@@ -639,63 +640,45 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ }
+ }
+
+- /* See if we can insert a 1GB or 2MB large PTE here */
+- level = 0;
+- if (page && PageCompound(page)) {
+- pte_size = PAGE_SIZE << compound_order(compound_head(page));
+- if (pte_size >= PUD_SIZE &&
+- (gpa & (PUD_SIZE - PAGE_SIZE)) ==
+- (hva & (PUD_SIZE - PAGE_SIZE))) {
+- level = 2;
+- pfn &= ~((PUD_SIZE >> PAGE_SHIFT) - 1);
+- } else if (pte_size >= PMD_SIZE &&
+- (gpa & (PMD_SIZE - PAGE_SIZE)) ==
+- (hva & (PMD_SIZE - PAGE_SIZE))) {
+- level = 1;
+- pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1);
+- }
+- }
+-
+ /*
+- * Compute the PTE value that we need to insert.
++ * Read the PTE from the process' radix tree and use that
++ * so we get the shift and attribute bits.
+ */
+- if (page) {
+- pgflags = _PAGE_READ | _PAGE_EXEC | _PAGE_PRESENT | _PAGE_PTE |
+- _PAGE_ACCESSED;
+- if (writing || upgrade_write)
+- pgflags |= _PAGE_WRITE | _PAGE_DIRTY;
+- pte = pfn_pte(pfn, __pgprot(pgflags));
++ local_irq_disable();
++ ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
++ pte = *ptep;
++ local_irq_enable();
++
++ /* Get pte level from shift/size */
++ if (shift == PUD_SHIFT &&
++ (gpa & (PUD_SIZE - PAGE_SIZE)) ==
++ (hva & (PUD_SIZE - PAGE_SIZE))) {
++ level = 2;
++ } else if (shift == PMD_SHIFT &&
++ (gpa & (PMD_SIZE - PAGE_SIZE)) ==
++ (hva & (PMD_SIZE - PAGE_SIZE))) {
++ level = 1;
+ } else {
+- /*
+- * Read the PTE from the process' radix tree and use that
+- * so we get the attribute bits.
+- */
+- local_irq_disable();
+- ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
+- pte = *ptep;
+- local_irq_enable();
+- if (shift == PUD_SHIFT &&
+- (gpa & (PUD_SIZE - PAGE_SIZE)) ==
+- (hva & (PUD_SIZE - PAGE_SIZE))) {
+- level = 2;
+- } else if (shift == PMD_SHIFT &&
+- (gpa & (PMD_SIZE - PAGE_SIZE)) ==
+- (hva & (PMD_SIZE - PAGE_SIZE))) {
+- level = 1;
+- } else if (shift && shift != PAGE_SHIFT) {
+- /* Adjust PFN */
+- unsigned long mask = (1ul << shift) - PAGE_SIZE;
+- pte = __pte(pte_val(pte) | (hva & mask));
+- }
+- pte = __pte(pte_val(pte) | _PAGE_EXEC | _PAGE_ACCESSED);
+- if (writing || upgrade_write) {
+- if (pte_val(pte) & _PAGE_WRITE)
+- pte = __pte(pte_val(pte) | _PAGE_DIRTY);
+- } else {
+- pte = __pte(pte_val(pte) & ~(_PAGE_WRITE | _PAGE_DIRTY));
++ level = 0;
++ if (shift > PAGE_SHIFT) {
++ /*
++ * If the pte maps more than one page, bring over
++ * bits from the virtual address to get the real
++ * address of the specific single page we want.
++ */
++ unsigned long rpnmask = (1ul << shift) - PAGE_SIZE;
++ pte = __pte(pte_val(pte) | (hva & rpnmask));
+ }
+ }
+
++ pte = __pte(pte_val(pte) | _PAGE_EXEC | _PAGE_ACCESSED);
++ if (writing || upgrade_write) {
++ if (pte_val(pte) & _PAGE_WRITE)
++ pte = __pte(pte_val(pte) | _PAGE_DIRTY);
++ } else {
++ pte = __pte(pte_val(pte) & ~(_PAGE_WRITE | _PAGE_DIRTY));
++ }
++
+ /* Allocate space in the tree and write the PTE */
+ ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
+
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Don-t-use-existing-prodded-flag-fo.patch b/patches.arch/KVM-PPC-Book3S-HV-Don-t-use-existing-prodded-flag-fo.patch
new file mode 100644
index 0000000000..f339b8f35c
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Don-t-use-existing-prodded-flag-fo.patch
@@ -0,0 +1,104 @@
+From 2267ea7661798a42f0da648a2970e2a03f4bc370 Mon Sep 17 00:00:00 2001
+From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Date: Fri, 12 Jan 2018 13:37:13 +1100
+Subject: [PATCH] KVM: PPC: Book3S HV: Don't use existing "prodded" flag for
+ XIVE escalations
+
+References: bsc#1061840
+Patch-mainline: v4.16-rc1
+Git-commit: 2267ea7661798a42f0da648a2970e2a03f4bc370
+
+The prodded flag is only cleared at the beginning of H_CEDE,
+so every time we have an escalation, we will cause the *next*
+H_CEDE to return immediately.
+
+Instead use a dedicated "irq_pending" flag to indicate that
+a guest interrupt is pending for the VCPU. We don't reuse the
+existing exception bitmap so as to avoid expensive atomic ops.
+
+Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/include/asm/kvm_host.h | 1 +
+ arch/powerpc/kernel/asm-offsets.c | 1 +
+ arch/powerpc/kvm/book3s_hv.c | 2 +-
+ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 10 ++++++++++
+ arch/powerpc/kvm/book3s_xive.c | 3 +--
+ 5 files changed, 14 insertions(+), 3 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
+index 3aa5b577cd60..bfe51356af5e 100644
+--- a/arch/powerpc/include/asm/kvm_host.h
++++ b/arch/powerpc/include/asm/kvm_host.h
+@@ -709,6 +709,7 @@ struct kvm_vcpu_arch {
+ u8 ceded;
+ u8 prodded;
+ u8 doorbell_request;
++ u8 irq_pending; /* Used by XIVE to signal pending guest irqs */
+ u32 last_inst;
+
+ struct swait_queue_head *wqp;
+diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
+index 6b958414b4e0..825089cf3e23 100644
+--- a/arch/powerpc/kernel/asm-offsets.c
++++ b/arch/powerpc/kernel/asm-offsets.c
+@@ -514,6 +514,7 @@ int main(void)
+ OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions);
+ OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded);
+ OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded);
++ OFFSET(VCPU_IRQ_PENDING, kvm_vcpu, arch.irq_pending);
+ OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request);
+ OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr);
+ OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc);
+diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
+index 76cf48051eb3..e5f81fc108e0 100644
+--- a/arch/powerpc/kvm/book3s_hv.c
++++ b/arch/powerpc/kvm/book3s_hv.c
+@@ -2999,7 +2999,7 @@ static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
+ {
+ if (!xive_enabled())
+ return false;
+- return vcpu->arch.xive_saved_state.pipr <
++ return vcpu->arch.irq_pending || vcpu->arch.xive_saved_state.pipr <
+ vcpu->arch.xive_saved_state.cppr;
+ }
+ #else
+diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+index 7daf21be33d0..34dbab7deb39 100644
+--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
++++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+@@ -1035,6 +1035,16 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
+ li r9, 1
+ stw r9, VCPU_XIVE_PUSHED(r4)
+ eieio
++
++ /*
++ * We clear the irq_pending flag. There is a small chance of a
++ * race vs. the escalation interrupt happening on another
++ * processor setting it again, but the only consequence is to
++ * cause a spurrious wakeup on the next H_CEDE which is not an
++ * issue.
++ */
++ li r0,0
++ stb r0, VCPU_IRQ_PENDING(r4)
+ no_xive:
+ #endif /* CONFIG_KVM_XICS */
+
+diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
+index a102efeabf05..eef9ccafdc09 100644
+--- a/arch/powerpc/kvm/book3s_xive.c
++++ b/arch/powerpc/kvm/book3s_xive.c
+@@ -84,8 +84,7 @@ static irqreturn_t xive_esc_irq(int irq, void *data)
+ {
+ struct kvm_vcpu *vcpu = data;
+
+- /* We use the existing H_PROD mechanism to wake up the target */
+- vcpu->arch.prodded = 1;
++ vcpu->arch.irq_pending = 1;
+ smp_mb();
+ if (vcpu->arch.ceded)
+ kvmppc_fast_vcpu_kick(vcpu);
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Enable-migration-of-decrementer-re.patch b/patches.arch/KVM-PPC-Book3S-HV-Enable-migration-of-decrementer-re.patch
new file mode 100644
index 0000000000..e6ca2753eb
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Enable-migration-of-decrementer-re.patch
@@ -0,0 +1,115 @@
+From 5855564c8ab2d9cefca7b2933bd19818eb795e40 Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Fri, 12 Jan 2018 20:55:20 +1100
+Subject: [PATCH] KVM: PPC: Book3S HV: Enable migration of decrementer register
+
+References: bsc#1061840
+Patch-mainline: v4.16-rc1
+Git-commit: 5855564c8ab2d9cefca7b2933bd19818eb795e40
+
+This adds a register identifier for use with the one_reg interface
+to allow the decrementer expiry time to be read and written by
+userspace. The decrementer expiry time is in guest timebase units
+and is equal to the sum of the decrementer and the guest timebase.
+(The expiry time is used rather than the decrementer value itself
+because the expiry time is not constantly changing, though the
+decrementer value is, while the guest vcpu is not running.)
+
+Without this, a guest vcpu migrated to a new host will see its
+decrementer set to some random value. On POWER8 and earlier, the
+decrementer is 32 bits wide and counts down at 512MHz, so the
+guest vcpu will potentially see no decrementer interrupts for up
+to about 4 seconds, which will lead to a stall. With POWER9, the
+decrementer is now 56 bits side, so the stall can be much longer
+(up to 2.23 years) and more noticeable.
+
+To help work around the problem in cases where userspace has not been
+updated to migrate the decrementer expiry time, we now set the
+default decrementer expiry at vcpu creation time to the current time
+rather than the maximum possible value. This should mean an
+immediate decrementer interrupt when a migrated vcpu starts
+running. In cases where the decrementer is 32 bits wide and more
+than 4 seconds elapse between the creation of the vcpu and when it
+first runs, the decrementer would have wrapped around to positive
+values and there may still be a stall - but this is no worse than
+the current situation. In the large-decrementer case, we are sure
+to get an immediate decrementer interrupt (assuming the time from
+vcpu creation to first run is less than 2.23 years) and we thus
+avoid a very long stall.
+
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ Documentation/virtual/kvm/api.txt | 1 +
+ arch/powerpc/include/uapi/asm/kvm.h | 2 ++
+ arch/powerpc/kvm/book3s_hv.c | 8 ++++++++
+ arch/powerpc/kvm/powerpc.c | 2 +-
+ 4 files changed, 12 insertions(+), 1 deletion(-)
+
+diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
+index f670e4b9e7f3..c6f9eebb79f2 100644
+--- a/Documentation/virtual/kvm/api.txt
++++ b/Documentation/virtual/kvm/api.txt
+@@ -1841,6 +1841,7 @@ registers, find a list below:
+ PPC | KVM_REG_PPC_DBSR | 32
+ PPC | KVM_REG_PPC_TIDR | 64
+ PPC | KVM_REG_PPC_PSSCR | 64
++ PPC | KVM_REG_PPC_DEC_EXPIRY | 64
+ PPC | KVM_REG_PPC_TM_GPR0 | 64
+ ...
+ PPC | KVM_REG_PPC_TM_GPR31 | 64
+diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
+index 61d6049f4c1e..8aaec831053a 100644
+--- a/arch/powerpc/include/uapi/asm/kvm.h
++++ b/arch/powerpc/include/uapi/asm/kvm.h
+@@ -607,6 +607,8 @@ struct kvm_ppc_rmmu_info {
+ #define KVM_REG_PPC_TIDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc)
+ #define KVM_REG_PPC_PSSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd)
+
++#define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe)
++
+ /* Transactional Memory checkpointed state:
+ * This is all GPRs, all VSX regs and a subset of SPRs
+ */
+diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
+index c4f0bebfc5ba..b2d448c75008 100644
+--- a/arch/powerpc/kvm/book3s_hv.c
++++ b/arch/powerpc/kvm/book3s_hv.c
+@@ -1497,6 +1497,10 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
+ case KVM_REG_PPC_ARCH_COMPAT:
+ *val = get_reg_val(id, vcpu->arch.vcore->arch_compat);
+ break;
++ case KVM_REG_PPC_DEC_EXPIRY:
++ *val = get_reg_val(id, vcpu->arch.dec_expires +
++ vcpu->arch.vcore->tb_offset);
++ break;
+ default:
+ r = -EINVAL;
+ break;
+@@ -1724,6 +1728,10 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
+ case KVM_REG_PPC_ARCH_COMPAT:
+ r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val));
+ break;
++ case KVM_REG_PPC_DEC_EXPIRY:
++ vcpu->arch.dec_expires = set_reg_val(id, *val) -
++ vcpu->arch.vcore->tb_offset;
++ break;
+ default:
+ r = -EINVAL;
+ break;
+diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
+index c2c7ef330553..7c9e45f54186 100644
+--- a/arch/powerpc/kvm/powerpc.c
++++ b/arch/powerpc/kvm/powerpc.c
+@@ -758,7 +758,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
+
+ hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
+ vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
+- vcpu->arch.dec_expires = ~(u64)0;
++ vcpu->arch.dec_expires = get_tb();
+
+ #ifdef CONFIG_KVM_EXIT_TIMING
+ mutex_init(&vcpu->arch.exit_timing_lock);
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Factor-fake-suspend-handling-out-o.patch b/patches.arch/KVM-PPC-Book3S-HV-Factor-fake-suspend-handling-out-o.patch
new file mode 100644
index 0000000000..3784ba70ce
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Factor-fake-suspend-handling-out-o.patch
@@ -0,0 +1,339 @@
+From 7b0e827c6970e8ca77c60ae87592204c39e41245 Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Wed, 30 May 2018 20:07:52 +1000
+Subject: [PATCH] KVM: PPC: Book3S HV: Factor fake-suspend handling out of
+ kvmppc_save/restore_tm
+
+References: bsc#1061840
+Patch-mainline: v4.18-rc1
+Git-commit: 7b0e827c6970e8ca77c60ae87592204c39e41245
+
+This splits out the handling of "fake suspend" mode, part of the
+hypervisor TM assist code for POWER9, and puts almost all of it in
+new kvmppc_save_tm_hv and kvmppc_restore_tm_hv functions. The new
+functions branch to kvmppc_save/restore_tm if the CPU does not
+require hypervisor TM assistance.
+
+With this, it will be more straightforward to move kvmppc_save_tm and
+kvmppc_restore_tm to another file and use them for transactional
+memory support in PR KVM. Additionally, it also makes the code a
+bit clearer and reduces the number of feature sections.
+
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 195 +++++++++++++++++++++-----------
+ 1 file changed, 126 insertions(+), 69 deletions(-)
+
+diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+index 5e6e493e065e..bfca999695f1 100644
+--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
++++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+@@ -795,7 +795,7 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
+ /*
+ * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
+ */
+- bl kvmppc_restore_tm
++ bl kvmppc_restore_tm_hv
+ 91:
+ #endif
+
+@@ -1779,7 +1779,7 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
+ /*
+ * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
+ */
+- bl kvmppc_save_tm
++ bl kvmppc_save_tm_hv
+ 91:
+ #endif
+
+@@ -2683,7 +2683,7 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
+ * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
+ */
+ ld r9, HSTATE_KVM_VCPU(r13)
+- bl kvmppc_save_tm
++ bl kvmppc_save_tm_hv
+ 91:
+ #endif
+
+@@ -2801,7 +2801,7 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
+ /*
+ * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
+ */
+- bl kvmppc_restore_tm
++ bl kvmppc_restore_tm_hv
+ 91:
+ #endif
+
+@@ -3126,7 +3126,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+ * This can modify all checkpointed registers, but
+ * restores r1, r2 and r9 (vcpu pointer) before exit.
+ */
+-kvmppc_save_tm:
++kvmppc_save_tm_hv:
++ /* See if we need to handle fake suspend mode */
++BEGIN_FTR_SECTION
++ b kvmppc_save_tm
++END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST)
++
++ lbz r0, HSTATE_FAKE_SUSPEND(r13) /* Were we fake suspended? */
++ cmpwi r0, 0
++ beq kvmppc_save_tm
++
++ /* The following code handles the fake_suspend = 1 case */
+ mflr r0
+ std r0, PPC_LR_STKOFF(r1)
+ stdu r1, -PPC_MIN_STKFRM(r1)
+@@ -3137,59 +3147,37 @@ kvmppc_save_tm:
+ rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG
+ mtmsrd r8
+
+- ld r5, VCPU_MSR(r9)
+- rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
+- beq 1f /* TM not active in guest. */
+-
+- std r1, HSTATE_HOST_R1(r13)
+- li r3, TM_CAUSE_KVM_RESCHED
+-
+-BEGIN_FTR_SECTION
+- lbz r0, HSTATE_FAKE_SUSPEND(r13) /* Were we fake suspended? */
+- cmpwi r0, 0
+- beq 3f
+ rldicl. r8, r8, 64 - MSR_TS_S_LG, 62 /* Did we actually hrfid? */
+ beq 4f
+-BEGIN_FTR_SECTION_NESTED(96)
++BEGIN_FTR_SECTION
+ bl pnv_power9_force_smt4_catch
+-END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96)
++END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
+ nop
+- b 6f
+-3:
+- /* Emulation of the treclaim instruction needs TEXASR before treclaim */
+- mfspr r6, SPRN_TEXASR
+- std r6, VCPU_ORIG_TEXASR(r9)
+-6:
+-END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
+
+- /* Clear the MSR RI since r1, r13 are all going to be foobar. */
++ std r1, HSTATE_HOST_R1(r13)
++
++ /* Clear the MSR RI since r1, r13 may be foobar. */
+ li r5, 0
+ mtmsrd r5, 1
+
+- /* All GPRs are volatile at this point. */
++ /* We have to treclaim here because that's the only way to do S->N */
++ li r3, TM_CAUSE_KVM_RESCHED
+ TRECLAIM(R3)
+
+- /* Temporarily store r13 and r9 so we have some regs to play with */
+- SET_SCRATCH0(r13)
+- GET_PACA(r13)
+- std r9, PACATMSCRATCH(r13)
+-
+- /* If doing TM emulation on POWER9 DD2.2, check for fake suspend mode */
+-BEGIN_FTR_SECTION
+- lbz r9, HSTATE_FAKE_SUSPEND(r13)
+- cmpwi r9, 0
+- beq 2f
+ /*
+ * We were in fake suspend, so we are not going to save the
+ * register state as the guest checkpointed state (since
+ * we already have it), therefore we can now use any volatile GPR.
+ */
+- /* Reload stack pointer and TOC. */
++ /* Reload PACA pointer, stack pointer and TOC. */
++ GET_PACA(r13)
+ ld r1, HSTATE_HOST_R1(r13)
+ ld r2, PACATOC(r13)
++
+ /* Set MSR RI now we have r1 and r13 back. */
+ li r5, MSR_RI
+ mtmsrd r5, 1
++
+ HMT_MEDIUM
+ ld r6, HSTATE_DSCR(r13)
+ mtspr SPRN_DSCR, r6
+@@ -3204,12 +3192,53 @@ END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96)
+ li r0, PSSCR_FAKE_SUSPEND
+ andc r3, r3, r0
+ mtspr SPRN_PSSCR, r3
+- ld r9, HSTATE_KVM_VCPU(r13)
++
+ /* Don't save TEXASR, use value from last exit in real suspend state */
+- b 11f
+-2:
++ ld r9, HSTATE_KVM_VCPU(r13)
++ mfspr r5, SPRN_TFHAR
++ mfspr r6, SPRN_TFIAR
++ std r5, VCPU_TFHAR(r9)
++ std r6, VCPU_TFIAR(r9)
++
++ addi r1, r1, PPC_MIN_STKFRM
++ ld r0, PPC_LR_STKOFF(r1)
++ mtlr r0
++ blr
++
++kvmppc_save_tm:
++ mflr r0
++ std r0, PPC_LR_STKOFF(r1)
++
++ /* Turn on TM. */
++ mfmsr r8
++ li r0, 1
++ rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG
++ mtmsrd r8
++
++ ld r5, VCPU_MSR(r9)
++ rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
++ beq 1f /* TM not active in guest. */
++
++ std r1, HSTATE_HOST_R1(r13)
++ li r3, TM_CAUSE_KVM_RESCHED
++
++BEGIN_FTR_SECTION
++ /* Emulation of the treclaim instruction needs TEXASR before treclaim */
++ mfspr r6, SPRN_TEXASR
++ std r6, VCPU_ORIG_TEXASR(r9)
+ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
+
++ /* Clear the MSR RI since r1, r13 are all going to be foobar. */
++ li r5, 0
++ mtmsrd r5, 1
++
++ /* All GPRs are volatile at this point. */
++ TRECLAIM(R3)
++
++ /* Temporarily store r13 and r9 so we have some regs to play with */
++ SET_SCRATCH0(r13)
++ GET_PACA(r13)
++ std r9, PACATMSCRATCH(r13)
+ ld r9, HSTATE_KVM_VCPU(r13)
+
+ /* Get a few more GPRs free. */
+@@ -3288,7 +3317,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
+ std r5, VCPU_TFHAR(r9)
+ std r6, VCPU_TFIAR(r9)
+
+- addi r1, r1, PPC_MIN_STKFRM
+ ld r0, PPC_LR_STKOFF(r1)
+ mtlr r0
+ blr
+@@ -3299,6 +3327,61 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
+ * This potentially modifies all checkpointed registers.
+ * It restores r1, r2, r4 from the PACA.
+ */
++kvmppc_restore_tm_hv:
++ /*
++ * If we are doing TM emulation for the guest on a POWER9 DD2,
++ * then we don't actually do a trechkpt -- we either set up
++ * fake-suspend mode, or emulate a TM rollback.
++ */
++BEGIN_FTR_SECTION
++ b kvmppc_restore_tm
++END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST)
++ mflr r0
++ std r0, PPC_LR_STKOFF(r1)
++
++ li r0, 0
++ stb r0, HSTATE_FAKE_SUSPEND(r13)
++
++ /* Turn on TM so we can restore TM SPRs */
++ mfmsr r5
++ li r0, 1
++ rldimi r5, r0, MSR_TM_LG, 63-MSR_TM_LG
++ mtmsrd r5
++
++ /*
++ * The user may change these outside of a transaction, so they must
++ * always be context switched.
++ */
++ ld r5, VCPU_TFHAR(r4)
++ ld r6, VCPU_TFIAR(r4)
++ ld r7, VCPU_TEXASR(r4)
++ mtspr SPRN_TFHAR, r5
++ mtspr SPRN_TFIAR, r6
++ mtspr SPRN_TEXASR, r7
++
++ ld r5, VCPU_MSR(r4)
++ rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
++ beqlr /* TM not active in guest */
++
++ /* Make sure the failure summary is set */
++ oris r7, r7, (TEXASR_FS)@h
++ mtspr SPRN_TEXASR, r7
++
++ cmpwi r5, 1 /* check for suspended state */
++ bgt 10f
++ stb r5, HSTATE_FAKE_SUSPEND(r13)
++ b 9f /* and return */
++10: stdu r1, -PPC_MIN_STKFRM(r1)
++ /* guest is in transactional state, so simulate rollback */
++ mr r3, r4
++ bl kvmhv_emulate_tm_rollback
++ nop
++ ld r4, HSTATE_KVM_VCPU(r13) /* our vcpu pointer has been trashed */
++ addi r1, r1, PPC_MIN_STKFRM
++9: ld r0, PPC_LR_STKOFF(r1)
++ mtlr r0
++ blr
++
+ kvmppc_restore_tm:
+ mflr r0
+ std r0, PPC_LR_STKOFF(r1)
+@@ -3323,8 +3406,6 @@ kvmppc_restore_tm:
+ mtspr SPRN_TFIAR, r6
+ mtspr SPRN_TEXASR, r7
+
+- li r0, 0
+- stb r0, HSTATE_FAKE_SUSPEND(r13)
+ ld r5, VCPU_MSR(r4)
+ rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
+ beqlr /* TM not active in guest */
+@@ -3339,15 +3420,6 @@ kvmppc_restore_tm:
+ mtspr SPRN_TEXASR, r7
+
+ /*
+- * If we are doing TM emulation for the guest on a POWER9 DD2,
+- * then we don't actually do a trechkpt -- we either set up
+- * fake-suspend mode, or emulate a TM rollback.
+- */
+-BEGIN_FTR_SECTION
+- b .Ldo_tm_fake_load
+-END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
+-
+- /*
+ * We need to load up the checkpointed state for the guest.
+ * We need to do this early as it will blow away any GPRs, VSRs and
+ * some SPRs.
+@@ -3419,25 +3491,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
+ /* Set the MSR RI since we have our registers back. */
+ li r5, MSR_RI
+ mtmsrd r5, 1
+-9:
+ ld r0, PPC_LR_STKOFF(r1)
+ mtlr r0
+ blr
+-
+-.Ldo_tm_fake_load:
+- cmpwi r5, 1 /* check for suspended state */
+- bgt 10f
+- stb r5, HSTATE_FAKE_SUSPEND(r13)
+- b 9b /* and return */
+-10: stdu r1, -PPC_MIN_STKFRM(r1)
+- /* guest is in transactional state, so simulate rollback */
+- mr r3, r4
+- bl kvmhv_emulate_tm_rollback
+- nop
+- ld r4, HSTATE_KVM_VCPU(r13) /* our vcpu pointer has been trashed */
+- addi r1, r1, PPC_MIN_STKFRM
+- b 9b
+-#endif
++#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
+ /*
+ * We come here if we get any exception or interrupt while we are
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Fix-VRMA-initialization-with-2MB-o.patch b/patches.arch/KVM-PPC-Book3S-HV-Fix-VRMA-initialization-with-2MB-o.patch
new file mode 100644
index 0000000000..aae2fd6997
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Fix-VRMA-initialization-with-2MB-o.patch
@@ -0,0 +1,59 @@
+From debd574f4195e205ba505b25e19b2b797f4bcd94 Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Fri, 2 Mar 2018 15:38:04 +1100
+Subject: [PATCH] KVM: PPC: Book3S HV: Fix VRMA initialization with 2MB or 1GB
+ memory backing
+
+References: bsc#1061840
+Patch-mainline: v4.16-rc5
+Git-commit: debd574f4195e205ba505b25e19b2b797f4bcd94
+
+The current code for initializing the VRMA (virtual real memory area)
+for HPT guests requires the page size of the backing memory to be one
+of 4kB, 64kB or 16MB. With a radix host we have the possibility that
+the backing memory page size can be 2MB or 1GB. In these cases, if the
+guest switches to HPT mode, KVM will not initialize the VRMA and the
+guest will fail to run.
+
+In fact it is not necessary that the VRMA page size is the same as the
+backing memory page size; any VRMA page size less than or equal to the
+backing memory page size is acceptable. Therefore we now choose the
+largest page size out of the set {4k, 64k, 16M} which is not larger
+than the backing memory page size.
+
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_hv.c | 12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
+index 89707354c2ef..b4a538b29da5 100644
+--- a/arch/powerpc/kvm/book3s_hv.c
++++ b/arch/powerpc/kvm/book3s_hv.c
+@@ -3656,15 +3656,17 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
+ goto up_out;
+
+ psize = vma_kernel_pagesize(vma);
+- porder = __ilog2(psize);
+
+ up_read(&current->mm->mmap_sem);
+
+ /* We can handle 4k, 64k or 16M pages in the VRMA */
+- err = -EINVAL;
+- if (!(psize == 0x1000 || psize == 0x10000 ||
+- psize == 0x1000000))
+- goto out_srcu;
++ if (psize >= 0x1000000)
++ psize = 0x1000000;
++ else if (psize >= 0x10000)
++ psize = 0x10000;
++ else
++ psize = 0x1000;
++ porder = __ilog2(psize);
+
+ senc = slb_pgsize_encoding(psize);
+ kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Fix-conditions-for-starting-vcpu.patch b/patches.arch/KVM-PPC-Book3S-HV-Fix-conditions-for-starting-vcpu.patch
new file mode 100644
index 0000000000..85d67a02c0
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Fix-conditions-for-starting-vcpu.patch
@@ -0,0 +1,63 @@
+From c0093f1a38a0fd6c32a2269f0533bb13fb95143d Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Mon, 20 Nov 2017 16:12:25 +1100
+Subject: [PATCH] KVM: PPC: Book3S HV: Fix conditions for starting vcpu
+
+References: bsc#1061840
+Patch-mainline: v4.16-rc1
+Git-commit: c0093f1a38a0fd6c32a2269f0533bb13fb95143d
+
+This corrects the test that determines whether a vcpu that has just
+become able to run in the guest (e.g. it has just finished handling
+a hypercall or hypervisor page fault) and whose virtual core is
+already running somewhere as a "piggybacked" vcore can start
+immediately or not. (A piggybacked vcore is one which is executing
+along with another vcore as a result of dynamic micro-threading.)
+
+Previously the test tried to lock the piggybacked vcore using
+spin_trylock, which would always fail because the vcore was already
+locked, and so the vcpu would have to wait until its vcore exited
+the guest before it could enter.
+
+In fact the vcpu can enter if its vcore is in VCORE_PIGGYBACK state
+and not already exiting (or exited) the guest, so the test in
+VCORE_PIGGYBACK state is basically the same as for VCORE_RUNNING
+state.
+
+Coverity detected this as a double unlock issue, which it isn't
+because the spin_trylock would always fail. This will fix the
+apparent double unlock as well.
+
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_hv.c | 13 ++-----------
+ 1 file changed, 2 insertions(+), 11 deletions(-)
+
+diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
+index 597498d6db2e..c4f0bebfc5ba 100644
+--- a/arch/powerpc/kvm/book3s_hv.c
++++ b/arch/powerpc/kvm/book3s_hv.c
+@@ -3175,17 +3175,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+ * this thread straight away and have it join in.
+ */
+ if (!signal_pending(current)) {
+- if (vc->vcore_state == VCORE_PIGGYBACK) {
+- if (spin_trylock(&vc->lock)) {
+- if (vc->vcore_state == VCORE_RUNNING &&
+- !VCORE_IS_EXITING(vc)) {
+- kvmppc_create_dtl_entry(vcpu, vc);
+- kvmppc_start_thread(vcpu, vc);
+- trace_kvm_guest_enter(vcpu);
+- }
+- spin_unlock(&vc->lock);
+- }
+- } else if (vc->vcore_state == VCORE_RUNNING &&
++ if ((vc->vcore_state == VCORE_PIGGYBACK ||
++ vc->vcore_state == VCORE_RUNNING) &&
+ !VCORE_IS_EXITING(vc)) {
+ kvmppc_create_dtl_entry(vcpu, vc);
+ kvmppc_start_thread(vcpu, vc);
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Fix-constant-size-warning.patch b/patches.arch/KVM-PPC-Book3S-HV-Fix-constant-size-warning.patch
new file mode 100644
index 0000000000..a73a1218e5
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Fix-constant-size-warning.patch
@@ -0,0 +1,63 @@
+From 0abb75b7a16d21e2fb0d98634df44c37c184f186 Mon Sep 17 00:00:00 2001
+From: Nicholas Mc Guire <hofrat@osadl.org>
+Date: Sat, 7 Jul 2018 11:07:25 +0200
+Subject: [PATCH] KVM: PPC: Book3S HV: Fix constant size warning
+
+References: bsc#1061840
+Patch-mainline: v4.19-rc1
+Git-commit: 0abb75b7a16d21e2fb0d98634df44c37c184f186
+
+The constants are 64bit but not explicitly declared UL resulting
+in sparse warnings. Fix this by declaring the constants UL.
+
+Signed-off-by: Nicholas Mc Guire <hofrat@osadl.org>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/include/asm/reg.h | 2 +-
+ arch/powerpc/kvm/book3s_hv.c | 16 ++++++++--------
+ 2 files changed, 9 insertions(+), 9 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
+index 562568414cf4..858aa7984ab0 100644
+--- a/arch/powerpc/include/asm/reg.h
++++ b/arch/powerpc/include/asm/reg.h
+@@ -161,7 +161,7 @@
+ #define PSSCR_ESL 0x00200000 /* Enable State Loss */
+ #define PSSCR_SD 0x00400000 /* Status Disable */
+ #define PSSCR_PLS 0xf000000000000000 /* Power-saving Level Status */
+-#define PSSCR_GUEST_VIS 0xf0000000000003ff /* Guest-visible PSSCR fields */
++#define PSSCR_GUEST_VIS 0xf0000000000003ffUL /* Guest-visible PSSCR fields */
+ #define PSSCR_FAKE_SUSPEND 0x00000400 /* Fake-suspend bit (P9 DD2.2) */
+ #define PSSCR_FAKE_SUSPEND_LG 10 /* Fake-suspend bit position */
+
+diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
+index fba21c91b2ff..d73b29b6aaa1 100644
+--- a/arch/powerpc/kvm/book3s_hv.c
++++ b/arch/powerpc/kvm/book3s_hv.c
+@@ -128,14 +128,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
+ * and SPURR count and should be set according to the number of
+ * online threads in the vcore being run.
+ */
+-#define RWMR_RPA_P8_1THREAD 0x164520C62609AECA
+-#define RWMR_RPA_P8_2THREAD 0x7FFF2908450D8DA9
+-#define RWMR_RPA_P8_3THREAD 0x164520C62609AECA
+-#define RWMR_RPA_P8_4THREAD 0x199A421245058DA9
+-#define RWMR_RPA_P8_5THREAD 0x164520C62609AECA
+-#define RWMR_RPA_P8_6THREAD 0x164520C62609AECA
+-#define RWMR_RPA_P8_7THREAD 0x164520C62609AECA
+-#define RWMR_RPA_P8_8THREAD 0x164520C62609AECA
++#define RWMR_RPA_P8_1THREAD 0x164520C62609AECAUL
++#define RWMR_RPA_P8_2THREAD 0x7FFF2908450D8DA9UL
++#define RWMR_RPA_P8_3THREAD 0x164520C62609AECAUL
++#define RWMR_RPA_P8_4THREAD 0x199A421245058DA9UL
++#define RWMR_RPA_P8_5THREAD 0x164520C62609AECAUL
++#define RWMR_RPA_P8_6THREAD 0x164520C62609AECAUL
++#define RWMR_RPA_P8_7THREAD 0x164520C62609AECAUL
++#define RWMR_RPA_P8_8THREAD 0x164520C62609AECAUL
+
+ static unsigned long p8_rwmr_values[MAX_SMT_THREADS + 1] = {
+ RWMR_RPA_P8_1THREAD,
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Fix-duplication-of-host-SLB-entrie.patch b/patches.arch/KVM-PPC-Book3S-HV-Fix-duplication-of-host-SLB-entrie.patch
new file mode 100644
index 0000000000..1c8049ae82
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Fix-duplication-of-host-SLB-entrie.patch
@@ -0,0 +1,91 @@
+From cda4a14733138b8f15b0b69d97ec198dc41eccae Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Thu, 22 Mar 2018 09:48:54 +1100
+Subject: [PATCH] KVM: PPC: Book3S HV: Fix duplication of host SLB entries
+
+References: bsc#1061840
+Patch-mainline: v4.16
+Git-commit: cda4a14733138b8f15b0b69d97ec198dc41eccae
+
+Since commit 6964e6a4e489 ("KVM: PPC: Book3S HV: Do SLB load/unload
+with guest LPCR value loaded", 2018-01-11), we have been seeing
+occasional machine check interrupts on POWER8 systems when running
+KVM guests, due to SLB multihit errors.
+
+This turns out to be due to the guest exit code reloading the host
+SLB entries from the SLB shadow buffer when the SLB was not previously
+cleared in the guest entry path. This can happen because the path
+which skips from the guest entry code to the guest exit code without
+entering the guest now does the skip before the SLB is cleared and
+loaded with guest values, but the host values are loaded after the
+point in the guest exit path that we skip to.
+
+To fix this, we move the code that reloads the host SLB values up
+so that it occurs just before the point in the guest exit code (the
+label guest_bypass:) where we skip to from the guest entry path.
+
+Reported-by: Alexey Kardashevskiy <aik@ozlabs.ru>
+Fixes: 6964e6a4e489 ("KVM: PPC: Book3S HV: Do SLB load/unload with guest LPCR value loaded")
+Tested-by: Alexey Kardashevskiy <aik@ozlabs.ru>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 35 +++++++++++++++++----------------
+ 1 file changed, 18 insertions(+), 17 deletions(-)
+
+diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+index d33264697a31..f86a20270e50 100644
+--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
++++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+@@ -1557,6 +1557,24 @@ mc_cont:
+ ptesync
+ 3: stw r5,VCPU_SLB_MAX(r9)
+
++ /* load host SLB entries */
++BEGIN_MMU_FTR_SECTION
++ b 0f
++END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
++ ld r8,PACA_SLBSHADOWPTR(r13)
++
++ .rept SLB_NUM_BOLTED
++ li r3, SLBSHADOW_SAVEAREA
++ LDX_BE r5, r8, r3
++ addi r3, r3, 8
++ LDX_BE r6, r8, r3
++ andis. r7,r5,SLB_ESID_V@h
++ beq 1f
++ slbmte r6,r5
++1: addi r8,r8,16
++ .endr
++0:
++
+ guest_bypass:
+ stw r12, STACK_SLOT_TRAP(r1)
+ mr r3, r12
+@@ -2018,23 +2036,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+ mtspr SPRN_LPCR,r8
+ isync
+ 48:
+- /* load host SLB entries */
+-BEGIN_MMU_FTR_SECTION
+- b 0f
+-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
+- ld r8,PACA_SLBSHADOWPTR(r13)
+-
+- .rept SLB_NUM_BOLTED
+- li r3, SLBSHADOW_SAVEAREA
+- LDX_BE r5, r8, r3
+- addi r3, r3, 8
+- LDX_BE r6, r8, r3
+- andis. r7,r5,SLB_ESID_V@h
+- beq 1f
+- slbmte r6,r5
+-1: addi r8,r8,16
+- .endr
+-0:
+ #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+ /* Finish timing, if we have a vcpu */
+ ld r4, HSTATE_KVM_VCPU(r13)
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Fix-guest-r11-corruption-with-POWE.patch b/patches.arch/KVM-PPC-Book3S-HV-Fix-guest-r11-corruption-with-POWE.patch
new file mode 100644
index 0000000000..2ccd047a5e
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Fix-guest-r11-corruption-with-POWE.patch
@@ -0,0 +1,60 @@
+From f14040bca89258b8a1c71e2112e430462172ce93 Mon Sep 17 00:00:00 2001
+From: Michael Neuling <mikey@neuling.org>
+Date: Thu, 13 Sep 2018 15:33:47 +1000
+Subject: [PATCH] KVM: PPC: Book3S HV: Fix guest r11 corruption with POWER9 TM
+ workarounds
+
+References: bsc#1061840
+Patch-mainline: v4.19-rc6
+Git-commit: f14040bca89258b8a1c71e2112e430462172ce93
+
+When we come into the softpatch handler (0x1500), we use r11 to store
+the HSRR0 for later use by the denorm handler.
+
+We also use the softpatch handler for the TM workarounds for
+POWER9. Unfortunately, in kvmppc_interrupt_hv we later store r11 out
+to the vcpu assuming it's still what we got from userspace.
+
+This causes r11 to be corrupted in the VCPU and hence when we restore
+the guest, we get a corrupted r11. We've seen this when running TM
+tests inside guests on P9.
+
+This fixes the problem by only touching r11 in the denorm case.
+
+Fixes: 4bb3c7a020 ("KVM: PPC: Book3S HV: Work around transactional memory bugs in POWER9")
+Cc: <stable@vger.kernel.org> # 4.17+
+Test-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
+Reviewed-by: Paul Mackerras <paulus@ozlabs.org>
+Signed-off-by: Michael Neuling <mikey@neuling.org>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kernel/exceptions-64s.S | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
+index ea04dfb8c092..2d8fc8c9da7a 100644
+--- a/arch/powerpc/kernel/exceptions-64s.S
++++ b/arch/powerpc/kernel/exceptions-64s.S
+@@ -1314,9 +1314,7 @@ EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100)
+
+ #ifdef CONFIG_PPC_DENORMALISATION
+ mfspr r10,SPRN_HSRR1
+- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */
+- addi r11,r11,-4 /* HSRR0 is next instruction */
+ bne+ denorm_assist
+ #endif
+
+@@ -1382,6 +1380,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
+ */
+ XVCPSGNDP32(32)
+ denorm_done:
++ mfspr r11,SPRN_HSRR0
++ subi r11,r11,4
+ mtspr SPRN_HSRR0,r11
+ mtcrf 0x80,r9
+ ld r9,PACA_EXGEN+EX_R9(r13)
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Fix-handling-of-large-pages-in-rad.patch b/patches.arch/KVM-PPC-Book3S-HV-Fix-handling-of-large-pages-in-rad.patch
new file mode 100644
index 0000000000..b59be4dec2
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Fix-handling-of-large-pages-in-rad.patch
@@ -0,0 +1,172 @@
+From c3856aeb29402e94ad9b3879030165cc6a4fdc56 Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Fri, 23 Feb 2018 21:21:12 +1100
+Subject: [PATCH] KVM: PPC: Book3S HV: Fix handling of large pages in radix
+ page fault handler
+
+References: bsc#1061840
+Patch-mainline: v4.16-rc5
+Git-commit: c3856aeb29402e94ad9b3879030165cc6a4fdc56
+
+This fixes several bugs in the radix page fault handler relating to
+the way large pages in the memory backing the guest were handled.
+First, the check for large pages only checked for explicit huge pages
+and missed transparent huge pages. Then the check that the addresses
+(host virtual vs. guest physical) had appropriate alignment was
+wrong, meaning that the code never put a large page in the partition
+scoped radix tree; it was always demoted to a small page.
+
+Fixing this exposed bugs in kvmppc_create_pte(). We were never
+invalidating a 2MB PTE, which meant that if a page was initially
+faulted in without write permission and the guest then attempted
+to store to it, we would never update the PTE to have write permission.
+If we find a valid 2MB PTE in the PMD, we need to clear it and
+do a TLB invalidation before installing either the new 2MB PTE or
+a pointer to a page table page.
+
+This also corrects an assumption that get_user_pages_fast would set
+the _PAGE_DIRTY bit if we are writing, which is not true. Instead we
+mark the page dirty explicitly with set_page_dirty_lock(). This
+also means we don't need the dirty bit set on the host PTE when
+providing write access on a read fault.
+
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_64_mmu_radix.c | 69 +++++++++++++++++++++-------------
+ 1 file changed, 43 insertions(+), 26 deletions(-)
+
+diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+index 0c854816e653..5cb4e4687107 100644
+--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
++++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+@@ -195,6 +195,12 @@ static void kvmppc_pte_free(pte_t *ptep)
+ kmem_cache_free(kvm_pte_cache, ptep);
+ }
+
++/* Like pmd_huge() and pmd_large(), but works regardless of config options */
++static inline int pmd_is_leaf(pmd_t pmd)
++{
++ return !!(pmd_val(pmd) & _PAGE_PTE);
++}
++
+ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
+ unsigned int level, unsigned long mmu_seq)
+ {
+@@ -219,7 +225,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
+ else
+ new_pmd = pmd_alloc_one(kvm->mm, gpa);
+
+- if (level == 0 && !(pmd && pmd_present(*pmd)))
++ if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd)))
+ new_ptep = kvmppc_pte_alloc();
+
+ /* Check if we might have been invalidated; let the guest retry if so */
+@@ -244,12 +250,30 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
+ new_pmd = NULL;
+ }
+ pmd = pmd_offset(pud, gpa);
+- if (pmd_large(*pmd)) {
+- /* Someone else has instantiated a large page here; retry */
+- ret = -EAGAIN;
+- goto out_unlock;
+- }
+- if (level == 1 && !pmd_none(*pmd)) {
++ if (pmd_is_leaf(*pmd)) {
++ unsigned long lgpa = gpa & PMD_MASK;
++
++ /*
++ * If we raced with another CPU which has just put
++ * a 2MB pte in after we saw a pte page, try again.
++ */
++ if (level == 0 && !new_ptep) {
++ ret = -EAGAIN;
++ goto out_unlock;
++ }
++ /* Valid 2MB page here already, remove it */
++ old = kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd),
++ ~0UL, 0, lgpa, PMD_SHIFT);
++ kvmppc_radix_tlbie_page(kvm, lgpa, PMD_SHIFT);
++ if (old & _PAGE_DIRTY) {
++ unsigned long gfn = lgpa >> PAGE_SHIFT;
++ struct kvm_memory_slot *memslot;
++ memslot = gfn_to_memslot(kvm, gfn);
++ if (memslot && memslot->dirty_bitmap)
++ kvmppc_update_dirty_map(memslot,
++ gfn, PMD_SIZE);
++ }
++ } else if (level == 1 && !pmd_none(*pmd)) {
+ /*
+ * There's a page table page here, but we wanted
+ * to install a large page. Tell the caller and let
+@@ -412,28 +436,24 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ } else {
+ page = pages[0];
+ pfn = page_to_pfn(page);
+- if (PageHuge(page)) {
+- page = compound_head(page);
+- pte_size <<= compound_order(page);
++ if (PageCompound(page)) {
++ pte_size <<= compound_order(compound_head(page));
+ /* See if we can insert a 2MB large-page PTE here */
+ if (pte_size >= PMD_SIZE &&
+- (gpa & PMD_MASK & PAGE_MASK) ==
+- (hva & PMD_MASK & PAGE_MASK)) {
++ (gpa & (PMD_SIZE - PAGE_SIZE)) ==
++ (hva & (PMD_SIZE - PAGE_SIZE))) {
+ level = 1;
+ pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1);
+ }
+ }
+ /* See if we can provide write access */
+ if (writing) {
+- /*
+- * We assume gup_fast has set dirty on the host PTE.
+- */
+ pgflags |= _PAGE_WRITE;
+ } else {
+ local_irq_save(flags);
+ ptep = find_current_mm_pte(current->mm->pgd,
+ hva, NULL, NULL);
+- if (ptep && pte_write(*ptep) && pte_dirty(*ptep))
++ if (ptep && pte_write(*ptep))
+ pgflags |= _PAGE_WRITE;
+ local_irq_restore(flags);
+ }
+@@ -459,18 +479,15 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ pte = pfn_pte(pfn, __pgprot(pgflags));
+ ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
+ }
+- if (ret == 0 || ret == -EAGAIN)
+- ret = RESUME_GUEST;
+
+ if (page) {
+- /*
+- * We drop pages[0] here, not page because page might
+- * have been set to the head page of a compound, but
+- * we have to drop the reference on the correct tail
+- * page to match the get inside gup()
+- */
+- put_page(pages[0]);
++ if (!ret && (pgflags & _PAGE_WRITE))
++ set_page_dirty_lock(page);
++ put_page(page);
+ }
++
++ if (ret == 0 || ret == -EAGAIN)
++ ret = RESUME_GUEST;
+ return ret;
+ }
+
+@@ -644,7 +661,7 @@ void kvmppc_free_radix(struct kvm *kvm)
+ continue;
+ pmd = pmd_offset(pud, 0);
+ for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) {
+- if (pmd_huge(*pmd)) {
++ if (pmd_is_leaf(*pmd)) {
+ pmd_clear(pmd);
+ continue;
+ }
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Fix-handling-of-secondary-HPTEG-in.patch b/patches.arch/KVM-PPC-Book3S-HV-Fix-handling-of-secondary-HPTEG-in.patch
new file mode 100644
index 0000000000..8eaa2665e6
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Fix-handling-of-secondary-HPTEG-in.patch
@@ -0,0 +1,54 @@
+From 05f2bb0313a2855e491dadfc8319b7da261d7074 Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Wed, 7 Feb 2018 19:49:54 +1100
+Subject: [PATCH] KVM: PPC: Book3S HV: Fix handling of secondary HPTEG in HPT
+ resizing code
+
+References: bsc#1061840
+Patch-mainline: v4.16-rc1
+Git-commit: 05f2bb0313a2855e491dadfc8319b7da261d7074
+
+This fixes the computation of the HPTE index to use when the HPT
+resizing code encounters a bolted HPTE which is stored in its
+secondary HPTE group. The code inverts the HPTE group number, which
+is correct, but doesn't then mask it with new_hash_mask. As a result,
+new_pteg will be effectively negative, resulting in new_hptep
+pointing before the new HPT, which will corrupt memory.
+
+In addition, this removes two BUG_ON statements. The condition that
+the BUG_ONs were testing -- that we have computed the hash value
+incorrectly -- has never been observed in testing, and if it did
+occur, would only affect the guest, not the host. Given that
+BUG_ON should only be used in conditions where the kernel (i.e.
+the host kernel, in this case) can't possibly continue execution,
+it is not appropriate here.
+
+Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_64_mmu_hv.c | 8 ++------
+ 1 file changed, 2 insertions(+), 6 deletions(-)
+
+diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
+index 966097232d21..d19649960bbf 100644
+--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
++++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
+@@ -1329,12 +1329,8 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
+ }
+
+ new_pteg = hash & new_hash_mask;
+- if (vpte & HPTE_V_SECONDARY) {
+- BUG_ON(~pteg != (hash & old_hash_mask));
+- new_pteg = ~new_pteg;
+- } else {
+- BUG_ON(pteg != (hash & old_hash_mask));
+- }
++ if (vpte & HPTE_V_SECONDARY)
++ new_pteg = ~hash & new_hash_mask;
+
+ new_idx = new_pteg * HPTES_PER_GROUP + (idx % HPTES_PER_GROUP);
+ new_hptep = (__be64 *)(new->virt + (new_idx << 4));
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Fix-inaccurate-comment.patch b/patches.arch/KVM-PPC-Book3S-HV-Fix-inaccurate-comment.patch
new file mode 100644
index 0000000000..9265d9a8c0
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Fix-inaccurate-comment.patch
@@ -0,0 +1,35 @@
+From 48e70b1ce667dc032f9166cc00ddb594ecc0065e Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Thu, 19 Apr 2018 11:49:51 +1000
+Subject: [PATCH] KVM: PPC: Book3S HV: Fix inaccurate comment
+
+References: bsc#1061840
+Patch-mainline: v4.18-rc1
+Git-commit: 48e70b1ce667dc032f9166cc00ddb594ecc0065e
+
+We now have interrupts hard-disabled when coming back from
+kvmppc_hv_entry_trampoline, so this changes the comment to reflect
+that.
+
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_hv_interrupts.S | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
+index 0e8493033288..82f2ff9410b6 100644
+--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
++++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
+@@ -137,7 +137,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+ /*
+ * We return here in virtual mode after the guest exits
+ * with something that we can't handle in real mode.
+- * Interrupts are enabled again at this point.
++ * Interrupts are still hard-disabled.
+ */
+
+ /*
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Fix-kvmppc_bad_host_intr-for-real-.patch b/patches.arch/KVM-PPC-Book3S-HV-Fix-kvmppc_bad_host_intr-for-real-.patch
new file mode 100644
index 0000000000..c877eabbcc
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Fix-kvmppc_bad_host_intr-for-real-.patch
@@ -0,0 +1,45 @@
+From eadce3b48b5a8ffec7c8abbd4950a501c91d2515 Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Fri, 18 May 2018 03:49:43 +1000
+Subject: [PATCH] KVM: PPC: Book3S HV: Fix kvmppc_bad_host_intr for real mode
+ interrupts
+
+References: bsc#1061840
+Patch-mainline: v4.18-rc1
+Git-commit: eadce3b48b5a8ffec7c8abbd4950a501c91d2515
+
+When CONFIG_RELOCATABLE=n, the Linux real mode interrupt handlers call
+into KVM using real address. This needs to be translated to the kernel
+linear effective address before the MMU is switched on.
+
+kvmppc_bad_host_intr misses adding these bits, so when it is used to
+handle a system reset interrupt (that always gets delivered in real
+mode), it results in an instruction access fault immediately after
+the MMU is turned on.
+
+Fix this by ensuring the top 2 address bits are set when the MMU is
+turned on.
+
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+index ef9e665fc8e2..5e6e493e065e 100644
+--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
++++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+@@ -3568,6 +3568,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
+ bcl 20, 31, .+4
+ 5: mflr r3
+ addi r3, r3, 9f - 5b
++ li r4, -1
++ rldimi r3, r4, 62, 0 /* ensure 0xc000000000000000 bits are set */
+ ld r4, PACAKMSR(r13)
+ mtspr SPRN_SRR0, r3
+ mtspr SPRN_SRR1, r4
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Fix-trap-number-return-from-__kvmp.patch b/patches.arch/KVM-PPC-Book3S-HV-Fix-trap-number-return-from-__kvmp.patch
new file mode 100644
index 0000000000..99ab0806c1
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Fix-trap-number-return-from-__kvmp.patch
@@ -0,0 +1,115 @@
+From a8b48a4dccea77e29462e59f1dbf0d5aa1ff167c Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Wed, 7 Mar 2018 22:17:20 +1100
+Subject: [PATCH] KVM: PPC: Book3S HV: Fix trap number return from
+ __kvmppc_vcore_entry
+
+References: bsc#1061840
+Patch-mainline: v4.16-rc6
+Git-commit: a8b48a4dccea77e29462e59f1dbf0d5aa1ff167c
+
+This fixes a bug where the trap number that is returned by
+__kvmppc_vcore_entry gets corrupted. The effect of the corruption
+is that IPIs get ignored on POWER9 systems when the IPI is sent via
+a doorbell interrupt to a CPU which is executing in a KVM guest.
+The effect of the IPI being ignored is often that another CPU locks
+up inside smp_call_function_many() (and if that CPU is holding a
+spinlock, other CPUs then lock up inside raw_spin_lock()).
+
+The trap number is currently held in register r12 for most of the
+assembly-language part of the guest exit path. In that path, we
+call kvmppc_subcore_exit_guest(), which is a C function, without
+restoring r12 afterwards. Depending on the kernel config and the
+compiler, it may modify r12 or it may not, so some config/compiler
+combinations see the bug and others don't.
+
+To fix this, we arrange for the trap number to be stored on the
+stack from the 'guest_bypass:' label until the end of the function,
+then the trap number is loaded and returned in r12 as before.
+
+Cc: stable@vger.kernel.org # v4.8+
+Fixes: fd7bacbca47a ("KVM: PPC: Book3S HV: Fix TB corruption in guest exit path on HMI interrupt")
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+index f31f357b8c5a..d33264697a31 100644
+--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
++++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+@@ -320,7 +320,6 @@ kvm_novcpu_exit:
+ stw r12, STACK_SLOT_TRAP(r1)
+ bl kvmhv_commence_exit
+ nop
+- lwz r12, STACK_SLOT_TRAP(r1)
+ b kvmhv_switch_to_host
+
+ /*
+@@ -1220,6 +1219,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+
+ secondary_too_late:
+ li r12, 0
++ stw r12, STACK_SLOT_TRAP(r1)
+ cmpdi r4, 0
+ beq 11f
+ stw r12, VCPU_TRAP(r4)
+@@ -1558,12 +1558,12 @@ mc_cont:
+ 3: stw r5,VCPU_SLB_MAX(r9)
+
+ guest_bypass:
++ stw r12, STACK_SLOT_TRAP(r1)
+ mr r3, r12
+ /* Increment exit count, poke other threads to exit */
+ bl kvmhv_commence_exit
+ nop
+ ld r9, HSTATE_KVM_VCPU(r13)
+- lwz r12, VCPU_TRAP(r9)
+
+ /* Stop others sending VCPU interrupts to this physical CPU */
+ li r0, -1
+@@ -1898,6 +1898,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
+ * POWER7/POWER8 guest -> host partition switch code.
+ * We don't have to lock against tlbies but we do
+ * have to coordinate the hardware threads.
++ * Here STACK_SLOT_TRAP(r1) contains the trap number.
+ */
+ kvmhv_switch_to_host:
+ /* Secondary threads wait for primary to do partition switch */
+@@ -1950,12 +1951,12 @@ BEGIN_FTR_SECTION
+ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
+ /* If HMI, call kvmppc_realmode_hmi_handler() */
++ lwz r12, STACK_SLOT_TRAP(r1)
+ cmpwi r12, BOOK3S_INTERRUPT_HMI
+ bne 27f
+ bl kvmppc_realmode_hmi_handler
+ nop
+ cmpdi r3, 0
+- li r12, BOOK3S_INTERRUPT_HMI
+ /*
+ * At this point kvmppc_realmode_hmi_handler may have resync-ed
+ * the TB, and if it has, we must not subtract the guest timebase
+@@ -2008,10 +2009,8 @@ BEGIN_FTR_SECTION
+ lwz r8, KVM_SPLIT_DO_RESTORE(r3)
+ cmpwi r8, 0
+ beq 47f
+- stw r12, STACK_SLOT_TRAP(r1)
+ bl kvmhv_p9_restore_lpcr
+ nop
+- lwz r12, STACK_SLOT_TRAP(r1)
+ b 48f
+ 47:
+ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+@@ -2049,6 +2048,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
+ li r0, KVM_GUEST_MODE_NONE
+ stb r0, HSTATE_IN_GUEST(r13)
+
++ lwz r12, STACK_SLOT_TRAP(r1) /* return trap # in r12 */
+ ld r0, SFS+PPC_LR_STKOFF(r1)
+ addi r1, r1, SFS
+ mtlr r0
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Fix-typo-in-kvmppc_hv_get_dirty_lo.patch b/patches.arch/KVM-PPC-Book3S-HV-Fix-typo-in-kvmppc_hv_get_dirty_lo.patch
new file mode 100644
index 0000000000..2c31f4cb5d
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Fix-typo-in-kvmppc_hv_get_dirty_lo.patch
@@ -0,0 +1,38 @@
+From 117647ff936e2d9684cc881d87c0291f46669c20 Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Fri, 10 Nov 2017 16:43:35 +1100
+Subject: [PATCH] KVM: PPC: Book3S HV: Fix typo in
+ kvmppc_hv_get_dirty_log_radix()
+
+References: bsc#1061840
+Patch-mainline: v4.16-rc1
+Git-commit: 117647ff936e2d9684cc881d87c0291f46669c20
+
+This fixes a typo where the intent was to assign to 'j' in order to
+skip some number of bits in the dirty bitmap for a guest. The effect
+of the typo is benign since it means we just iterate through all the
+bits rather than skipping bits which we know will be zero. This issue
+was found by Coverity.
+
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_64_mmu_radix.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+index 58618f644c56..0c854816e653 100644
+--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
++++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+@@ -573,7 +573,7 @@ long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
+ j = i + 1;
+ if (npages) {
+ set_dirty_bits(map, i, npages);
+- i = j + npages;
++ j = i + npages;
+ }
+ }
+ return 0;
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Handle-1GB-pages-in-radix-page-fau.patch b/patches.arch/KVM-PPC-Book3S-HV-Handle-1GB-pages-in-radix-page-fau.patch
new file mode 100644
index 0000000000..e2f99b7ad5
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Handle-1GB-pages-in-radix-page-fau.patch
@@ -0,0 +1,205 @@
+From 58c5c276b4c2ceb2b02ecd959ad9784b997d4332 Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Sat, 24 Feb 2018 20:14:37 +1100
+Subject: [PATCH] KVM: PPC: Book3S HV: Handle 1GB pages in radix page fault
+ handler
+
+References: bsc#1061840
+Patch-mainline: v4.17-rc1
+Git-commit: 58c5c276b4c2ceb2b02ecd959ad9784b997d4332
+
+This adds code to the radix hypervisor page fault handler to handle the
+case where the guest memory is backed by 1GB hugepages, and put them
+into the partition-scoped radix tree at the PUD level. The code is
+essentially analogous to the code for 2MB pages. This also rearranges
+kvmppc_create_pte() to make it easier to follow.
+
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_64_mmu_radix.c | 129 ++++++++++++++++++++++++---------
+ 1 file changed, 93 insertions(+), 36 deletions(-)
+
+diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+index f783b067e5ac..05acc67e0eb2 100644
+--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
++++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+@@ -150,7 +150,9 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
+ {
+ int psize = MMU_BASE_PSIZE;
+
+- if (pshift >= PMD_SHIFT)
++ if (pshift >= PUD_SHIFT)
++ psize = MMU_PAGE_1G;
++ else if (pshift >= PMD_SHIFT)
+ psize = MMU_PAGE_2M;
+ addr &= ~0xfffUL;
+ addr |= mmu_psize_defs[psize].ap << 5;
+@@ -231,9 +233,9 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
+ new_pud = pud_alloc_one(kvm->mm, gpa);
+
+ pmd = NULL;
+- if (pud && pud_present(*pud))
++ if (pud && pud_present(*pud) && !pud_huge(*pud))
+ pmd = pmd_offset(pud, gpa);
+- else
++ else if (level <= 1)
+ new_pmd = pmd_alloc_one(kvm->mm, gpa);
+
+ if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd)))
+@@ -254,6 +256,50 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
+ new_pud = NULL;
+ }
+ pud = pud_offset(pgd, gpa);
++ if (pud_huge(*pud)) {
++ unsigned long hgpa = gpa & PUD_MASK;
++
++ /*
++ * If we raced with another CPU which has just put
++ * a 1GB pte in after we saw a pmd page, try again.
++ */
++ if (level <= 1 && !new_pmd) {
++ ret = -EAGAIN;
++ goto out_unlock;
++ }
++ /* Check if we raced and someone else has set the same thing */
++ if (level == 2 && pud_raw(*pud) == pte_raw(pte)) {
++ ret = 0;
++ goto out_unlock;
++ }
++ /* Valid 1GB page here already, remove it */
++ old = kvmppc_radix_update_pte(kvm, (pte_t *)pud,
++ ~0UL, 0, hgpa, PUD_SHIFT);
++ kvmppc_radix_tlbie_page(kvm, hgpa, PUD_SHIFT);
++ if (old & _PAGE_DIRTY) {
++ unsigned long gfn = hgpa >> PAGE_SHIFT;
++ struct kvm_memory_slot *memslot;
++ memslot = gfn_to_memslot(kvm, gfn);
++ if (memslot && memslot->dirty_bitmap)
++ kvmppc_update_dirty_map(memslot,
++ gfn, PUD_SIZE);
++ }
++ }
++ if (level == 2) {
++ if (!pud_none(*pud)) {
++ /*
++ * There's a page table page here, but we wanted to
++ * install a large page, so remove and free the page
++ * table page. new_pmd will be NULL since level == 2.
++ */
++ new_pmd = pmd_offset(pud, 0);
++ pud_clear(pud);
++ kvmppc_radix_flush_pwc(kvm, gpa);
++ }
++ kvmppc_radix_set_pte_at(kvm, gpa, (pte_t *)pud, pte);
++ ret = 0;
++ goto out_unlock;
++ }
+ if (pud_none(*pud)) {
+ if (!new_pmd)
+ goto out_unlock;
+@@ -289,41 +335,43 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
+ kvmppc_update_dirty_map(memslot,
+ gfn, PMD_SIZE);
+ }
+- } else if (level == 1 && !pmd_none(*pmd)) {
+- /*
+- * There's a page table page here, but we wanted to
+- * install a large page, so remove and free the page
+- * table page. new_ptep will be NULL since level == 1.
+- */
+- new_ptep = pte_offset_kernel(pmd, 0);
+- pmd_clear(pmd);
+- kvmppc_radix_flush_pwc(kvm, gpa);
+ }
+- if (level == 0) {
+- if (pmd_none(*pmd)) {
+- if (!new_ptep)
+- goto out_unlock;
+- pmd_populate(kvm->mm, pmd, new_ptep);
+- new_ptep = NULL;
+- }
+- ptep = pte_offset_kernel(pmd, gpa);
+- if (pte_present(*ptep)) {
+- /* Check if someone else set the same thing */
+- if (pte_raw(*ptep) == pte_raw(pte)) {
+- ret = 0;
+- goto out_unlock;
+- }
+- /* PTE was previously valid, so invalidate it */
+- old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT,
+- 0, gpa, 0);
+- kvmppc_radix_tlbie_page(kvm, gpa, 0);
+- if (old & _PAGE_DIRTY)
+- mark_page_dirty(kvm, gpa >> PAGE_SHIFT);
++ if (level == 1) {
++ if (!pmd_none(*pmd)) {
++ /*
++ * There's a page table page here, but we wanted to
++ * install a large page, so remove and free the page
++ * table page. new_ptep will be NULL since level == 1.
++ */
++ new_ptep = pte_offset_kernel(pmd, 0);
++ pmd_clear(pmd);
++ kvmppc_radix_flush_pwc(kvm, gpa);
+ }
+- kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte);
+- } else {
+ kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte);
++ ret = 0;
++ goto out_unlock;
++ }
++ if (pmd_none(*pmd)) {
++ if (!new_ptep)
++ goto out_unlock;
++ pmd_populate(kvm->mm, pmd, new_ptep);
++ new_ptep = NULL;
++ }
++ ptep = pte_offset_kernel(pmd, gpa);
++ if (pte_present(*ptep)) {
++ /* Check if someone else set the same thing */
++ if (pte_raw(*ptep) == pte_raw(pte)) {
++ ret = 0;
++ goto out_unlock;
++ }
++ /* PTE was previously valid, so invalidate it */
++ old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT,
++ 0, gpa, 0);
++ kvmppc_radix_tlbie_page(kvm, gpa, 0);
++ if (old & _PAGE_DIRTY)
++ mark_page_dirty(kvm, gpa >> PAGE_SHIFT);
+ }
++ kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte);
+ ret = 0;
+
+ out_unlock:
+@@ -446,8 +494,13 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ pfn = page_to_pfn(page);
+ if (PageCompound(page)) {
+ pte_size <<= compound_order(compound_head(page));
+- /* See if we can insert a 2MB large-page PTE here */
+- if (pte_size >= PMD_SIZE &&
++ /* See if we can insert a 1GB or 2MB large PTE here */
++ if (pte_size >= PUD_SIZE &&
++ (gpa & (PUD_SIZE - PAGE_SIZE)) ==
++ (hva & (PUD_SIZE - PAGE_SIZE))) {
++ level = 2;
++ pfn &= ~((PUD_SIZE >> PAGE_SHIFT) - 1);
++ } else if (pte_size >= PMD_SIZE &&
+ (gpa & (PMD_SIZE - PAGE_SIZE)) ==
+ (hva & (PMD_SIZE - PAGE_SIZE))) {
+ level = 1;
+@@ -657,6 +710,10 @@ void kvmppc_free_radix(struct kvm *kvm)
+ for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++pud) {
+ if (!pud_present(*pud))
+ continue;
++ if (pud_huge(*pud)) {
++ pud_clear(pud);
++ continue;
++ }
+ pmd = pmd_offset(pud, 0);
+ for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) {
+ if (pmd_is_leaf(*pmd)) {
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Improve-handling-of-debug-trigger-.patch b/patches.arch/KVM-PPC-Book3S-HV-Improve-handling-of-debug-trigger-.patch
new file mode 100644
index 0000000000..b462efcfb4
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Improve-handling-of-debug-trigger-.patch
@@ -0,0 +1,302 @@
+From d075745d893c78730e4a3b7a60fca23c2f764081 Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Wed, 17 Jan 2018 20:51:13 +1100
+Subject: [PATCH] KVM: PPC: Book3S HV: Improve handling of debug-trigger HMIs
+ on POWER9
+
+References: bsc#1061840
+Patch-mainline: v4.16-rc1
+Git-commit: d075745d893c78730e4a3b7a60fca23c2f764081
+
+Hypervisor maintenance interrupts (HMIs) are generated by various
+causes, signalled by bits in the hypervisor maintenance exception
+register (HMER). In most cases calling OPAL to handle the interrupt
+is the correct thing to do, but the "debug trigger" HMIs signalled by
+PPC bit 17 (bit 46) of HMER are used to invoke software workarounds
+for hardware bugs, and OPAL does not have any code to handle this
+cause. The debug trigger HMI is used in POWER9 DD2.0 and DD2.1 chips
+to work around a hardware bug in executing vector load instructions to
+cache inhibited memory. In POWER9 DD2.2 chips, it is generated when
+conditions are detected relating to threads being in TM (transactional
+memory) suspended mode when the core SMT configuration needs to be
+reconfigured.
+
+The kernel currently has code to detect the vector CI load condition,
+but only when the HMI occurs in the host, not when it occurs in a
+guest. If a HMI occurs in the guest, it is always passed to OPAL, and
+then we always re-sync the timebase, because the HMI cause might have
+been a timebase error, for which OPAL would re-sync the timebase, thus
+removing the timebase offset which KVM applied for the guest. Since
+we don't know what OPAL did, we don't know whether to subtract the
+timebase offset from the timebase, so instead we re-sync the timebase.
+
+This adds code to determine explicitly what the cause of a debug
+trigger HMI will be. This is based on a new device-tree property
+under the CPU nodes called ibm,hmi-special-triggers, if it is
+present, or otherwise based on the PVR (processor version register).
+The handling of debug trigger HMIs is pulled out into a separate
+function which can be called from the KVM guest exit code. If this
+function handles and clears the HMI, and no other HMI causes remain,
+then we skip calling OPAL and we proceed to subtract the guest
+timebase offset from the timebase.
+
+The overall handling for HMIs that occur in the host (i.e. not in a
+KVM guest) is largely unchanged, except that we now don't set the flag
+for the vector CI load workaround on DD2.2 processors.
+
+This also removes a BUG_ON in the KVM code. BUG_ON is generally not
+useful in KVM guest entry/exit code since it is difficult to handle
+the resulting trap gracefully.
+
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/include/asm/hmi.h | 4 +
+ arch/powerpc/include/asm/reg.h | 5 +-
+ arch/powerpc/kernel/mce.c | 142 +++++++++++++++++++++++++-------
+ arch/powerpc/kvm/book3s_hv_ras.c | 8 +-
+ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 9 +-
+ 5 files changed, 131 insertions(+), 37 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/hmi.h b/arch/powerpc/include/asm/hmi.h
+index 85b7a1a21e22..9c14f7b5c46c 100644
+--- a/arch/powerpc/include/asm/hmi.h
++++ b/arch/powerpc/include/asm/hmi.h
+@@ -42,4 +42,8 @@ extern void wait_for_tb_resync(void);
+ static inline void wait_for_subcore_guest_exit(void) { }
+ static inline void wait_for_tb_resync(void) { }
+ #endif
++
++struct pt_regs;
++extern long hmi_handle_debugtrig(struct pt_regs *regs);
++
+ #endif /* __ASM_PPC64_HMI_H__ */
+diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
+index b779f3ccd412..14e41b843952 100644
+--- a/arch/powerpc/include/asm/reg.h
++++ b/arch/powerpc/include/asm/reg.h
+@@ -432,8 +432,9 @@
+ #define SPRN_LPID 0x13F /* Logical Partition Identifier */
+ #endif
+ #define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */
+-#define SPRN_HMER 0x150 /* Hardware m? error recovery */
+-#define SPRN_HMEER 0x151 /* Hardware m? enable error recovery */
++#define SPRN_HMER 0x150 /* Hypervisor maintenance exception reg */
++#define HMER_DEBUG_TRIG (1ul << (63 - 17)) /* Debug trigger */
++#define SPRN_HMEER 0x151 /* Hyp maintenance exception enable reg */
+ #define SPRN_PCR 0x152 /* Processor compatibility register */
+ #define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */
+ #define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */
+diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
+index 742e4658c5dc..d2fecaec4fec 100644
+--- a/arch/powerpc/kernel/mce.c
++++ b/arch/powerpc/kernel/mce.c
+@@ -495,37 +495,123 @@ long machine_check_early(struct pt_regs *regs)
+ return handled;
+ }
+
+-long hmi_exception_realmode(struct pt_regs *regs)
++/* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
++static enum {
++ DTRIG_UNKNOWN,
++ DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */
++ DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */
++} hmer_debug_trig_function;
++
++static int init_debug_trig_function(void)
+ {
+- __this_cpu_inc(irq_stat.hmi_exceptions);
+-
+-#ifdef CONFIG_PPC_BOOK3S_64
+- /* Workaround for P9 vector CI loads (see p9_hmi_special_emu) */
+- if (pvr_version_is(PVR_POWER9)) {
+- unsigned long hmer = mfspr(SPRN_HMER);
+-
+- /* Do we have the debug bit set */
+- if (hmer & PPC_BIT(17)) {
+- hmer &= ~PPC_BIT(17);
+- mtspr(SPRN_HMER, hmer);
+-
+- /*
+- * Now to avoid problems with soft-disable we
+- * only do the emulation if we are coming from
+- * user space
+- */
+- if (user_mode(regs))
+- local_paca->hmi_p9_special_emu = 1;
+-
+- /*
+- * Don't bother going to OPAL if that's the
+- * only relevant bit.
+- */
+- if (!(hmer & mfspr(SPRN_HMEER)))
+- return local_paca->hmi_p9_special_emu;
++ int pvr;
++ struct device_node *cpun;
++ struct property *prop = NULL;
++ const char *str;
++
++ /* First look in the device tree */
++ preempt_disable();
++ cpun = of_get_cpu_node(smp_processor_id(), NULL);
++ if (cpun) {
++ of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
++ prop, str) {
++ if (strcmp(str, "bit17-vector-ci-load") == 0)
++ hmer_debug_trig_function = DTRIG_VECTOR_CI;
++ else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
++ hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
+ }
++ of_node_put(cpun);
++ }
++ preempt_enable();
++
++ /* If we found the property, don't look at PVR */
++ if (prop)
++ goto out;
++
++ pvr = mfspr(SPRN_PVR);
++ /* Check for POWER9 Nimbus (scale-out) */
++ if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
++ /* DD2.2 and later */
++ if ((pvr & 0xfff) >= 0x202)
++ hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
++ /* DD2.0 and DD2.1 - used for vector CI load emulation */
++ else if ((pvr & 0xfff) >= 0x200)
++ hmer_debug_trig_function = DTRIG_VECTOR_CI;
++ }
++
++ out:
++ switch (hmer_debug_trig_function) {
++ case DTRIG_VECTOR_CI:
++ pr_debug("HMI debug trigger used for vector CI load\n");
++ break;
++ case DTRIG_SUSPEND_ESCAPE:
++ pr_debug("HMI debug trigger used for TM suspend escape\n");
++ break;
++ default:
++ break;
+ }
+-#endif /* CONFIG_PPC_BOOK3S_64 */
++ return 0;
++}
++__initcall(init_debug_trig_function);
++
++/*
++ * Handle HMIs that occur as a result of a debug trigger.
++ * Return values:
++ * -1 means this is not a HMI cause that we know about
++ * 0 means no further handling is required
++ * 1 means further handling is required
++ */
++long hmi_handle_debugtrig(struct pt_regs *regs)
++{
++ unsigned long hmer = mfspr(SPRN_HMER);
++ long ret = 0;
++
++ /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
++ if (!((hmer & HMER_DEBUG_TRIG)
++ && hmer_debug_trig_function != DTRIG_UNKNOWN))
++ return -1;
++
++ hmer &= ~HMER_DEBUG_TRIG;
++ /* HMER is a write-AND register */
++ mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
++
++ switch (hmer_debug_trig_function) {
++ case DTRIG_VECTOR_CI:
++ /*
++ * Now to avoid problems with soft-disable we
++ * only do the emulation if we are coming from
++ * host user space
++ */
++ if (regs && user_mode(regs))
++ ret = local_paca->hmi_p9_special_emu = 1;
++
++ break;
++
++ default:
++ break;
++ }
++
++ /*
++ * See if any other HMI causes remain to be handled
++ */
++ if (hmer & mfspr(SPRN_HMEER))
++ return -1;
++
++ return ret;
++}
++
++/*
++ * Return values:
++ */
++long hmi_exception_realmode(struct pt_regs *regs)
++{
++ int ret;
++
++ __this_cpu_inc(irq_stat.hmi_exceptions);
++
++ ret = hmi_handle_debugtrig(regs);
++ if (ret >= 0)
++ return ret;
+
+ wait_for_subcore_guest_exit();
+
+diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
+index c356f9a40b24..c296343d0dcc 100644
+--- a/arch/powerpc/kvm/book3s_hv_ras.c
++++ b/arch/powerpc/kvm/book3s_hv_ras.c
+@@ -268,17 +268,19 @@ static void kvmppc_tb_resync_done(void)
+ * secondary threads to proceed.
+ * - All secondary threads will eventually call opal hmi handler on
+ * their exit path.
++ *
++ * Returns 1 if the timebase offset should be applied, 0 if not.
+ */
+
+ long kvmppc_realmode_hmi_handler(void)
+ {
+- int ptid = local_paca->kvm_hstate.ptid;
+ bool resync_req;
+
+- /* This is only called on primary thread. */
+- BUG_ON(ptid != 0);
+ __this_cpu_inc(irq_stat.hmi_exceptions);
+
++ if (hmi_handle_debugtrig(NULL) >= 0)
++ return 1;
++
+ /*
+ * By now primary thread has already completed guest->host
+ * partition switch but haven't signaled secondaries yet.
+diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+index 2659844784b8..bd0b623335af 100644
+--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
++++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+@@ -1909,16 +1909,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ bne 27f
+ bl kvmppc_realmode_hmi_handler
+ nop
++ cmpdi r3, 0
+ li r12, BOOK3S_INTERRUPT_HMI
+ /*
+- * At this point kvmppc_realmode_hmi_handler would have resync-ed
+- * the TB. Hence it is not required to subtract guest timebase
+- * offset from timebase. So, skip it.
++ * At this point kvmppc_realmode_hmi_handler may have resync-ed
++ * the TB, and if it has, we must not subtract the guest timebase
++ * offset from the timebase. So, skip it.
+ *
+ * Also, do not call kvmppc_subcore_exit_guest() because it has
+ * been invoked as part of kvmppc_realmode_hmi_handler().
+ */
+- b 30f
++ beq 30f
+
+ 27:
+ /* Subtract timebase offset from timebase */
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Keep-XIVE-escalation-interrupt-mas.patch b/patches.arch/KVM-PPC-Book3S-HV-Keep-XIVE-escalation-interrupt-mas.patch
new file mode 100644
index 0000000000..2add81e4fe
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Keep-XIVE-escalation-interrupt-mas.patch
@@ -0,0 +1,186 @@
+From 9b9b13a6d1537ddc4caccd6f1c41b78edbc08437 Mon Sep 17 00:00:00 2001
+From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Date: Fri, 12 Jan 2018 13:37:16 +1100
+Subject: [PATCH] KVM: PPC: Book3S HV: Keep XIVE escalation interrupt masked
+ unless ceded
+
+References: bsc#1061840
+Patch-mainline: v4.16-rc1
+Git-commit: 9b9b13a6d1537ddc4caccd6f1c41b78edbc08437
+
+This works on top of the single escalation support. When in single
+escalation, with this change, we will keep the escalation interrupt
+disabled unless the VCPU is in H_CEDE (idle). In any other case, we
+know the VCPU will be rescheduled and thus there is no need to take
+escalation interrupts in the host whenever a guest interrupt fires.
+
+Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/include/asm/kvm_host.h | 3 ++
+ arch/powerpc/kernel/asm-offsets.c | 3 ++
+ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 62 ++++++++++++++++++++++++++++++++-
+ arch/powerpc/kvm/book3s_xive.c | 30 ++++++++++++++++
+ 4 files changed, 97 insertions(+), 1 deletion(-)
+
+diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
+index 0c44fa67608d..fef8133becc8 100644
+--- a/arch/powerpc/include/asm/kvm_host.h
++++ b/arch/powerpc/include/asm/kvm_host.h
+@@ -740,7 +740,10 @@ struct kvm_vcpu_arch {
+ struct kvmppc_xive_vcpu *xive_vcpu; /* XIVE virtual CPU data */
+ __be32 xive_cam_word; /* Cooked W2 in proper endian with valid bit */
+ u8 xive_pushed; /* Is the VP pushed on the physical CPU ? */
++ u8 xive_esc_on; /* Is the escalation irq enabled ? */
+ union xive_tma_w01 xive_saved_state; /* W0..1 of XIVE thread state */
++ u64 xive_esc_raddr; /* Escalation interrupt ESB real addr */
++ u64 xive_esc_vaddr; /* Escalation interrupt ESB virt addr */
+ #endif
+
+ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
+index 825089cf3e23..1672dffd94e2 100644
+--- a/arch/powerpc/kernel/asm-offsets.c
++++ b/arch/powerpc/kernel/asm-offsets.c
+@@ -734,6 +734,9 @@ int main(void)
+ DEFINE(VCPU_XIVE_CAM_WORD, offsetof(struct kvm_vcpu,
+ arch.xive_cam_word));
+ DEFINE(VCPU_XIVE_PUSHED, offsetof(struct kvm_vcpu, arch.xive_pushed));
++ DEFINE(VCPU_XIVE_ESC_ON, offsetof(struct kvm_vcpu, arch.xive_esc_on));
++ DEFINE(VCPU_XIVE_ESC_RADDR, offsetof(struct kvm_vcpu, arch.xive_esc_raddr));
++ DEFINE(VCPU_XIVE_ESC_VADDR, offsetof(struct kvm_vcpu, arch.xive_esc_vaddr));
+ #endif
+
+ #ifdef CONFIG_KVM_EXIT_TIMING
+diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+index a7f429bc6de0..a7a20b85d8eb 100644
+--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
++++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+@@ -1045,6 +1045,41 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
+ */
+ li r0,0
+ stb r0, VCPU_IRQ_PENDING(r4)
++
++ /*
++ * In single escalation mode, if the escalation interrupt is
++ * on, we mask it.
++ */
++ lbz r0, VCPU_XIVE_ESC_ON(r4)
++ cmpwi r0,0
++ beq 1f
++ ld r10, VCPU_XIVE_ESC_RADDR(r4)
++ li r9, XIVE_ESB_SET_PQ_01
++ ldcix r0, r10, r9
++ sync
++
++ /* We have a possible subtle race here: The escalation interrupt might
++ * have fired and be on its way to the host queue while we mask it,
++ * and if we unmask it early enough (re-cede right away), there is
++ * a theorical possibility that it fires again, thus landing in the
++ * target queue more than once which is a big no-no.
++ *
++ * Fortunately, solving this is rather easy. If the above load setting
++ * PQ to 01 returns a previous value where P is set, then we know the
++ * escalation interrupt is somewhere on its way to the host. In that
++ * case we simply don't clear the xive_esc_on flag below. It will be
++ * eventually cleared by the handler for the escalation interrupt.
++ *
++ * Then, when doing a cede, we check that flag again before re-enabling
++ * the escalation interrupt, and if set, we abort the cede.
++ */
++ andi. r0, r0, XIVE_ESB_VAL_P
++ bne- 1f
++
++ /* Now P is 0, we can clear the flag */
++ li r0, 0
++ stb r0, VCPU_XIVE_ESC_ON(r4)
++1:
+ no_xive:
+ #endif /* CONFIG_KVM_XICS */
+
+@@ -2756,7 +2791,32 @@ kvm_cede_prodded:
+ /* we've ceded but we want to give control to the host */
+ kvm_cede_exit:
+ ld r9, HSTATE_KVM_VCPU(r13)
+- b guest_exit_cont
++#ifdef CONFIG_KVM_XICS
++ /* Abort if we still have a pending escalation */
++ lbz r5, VCPU_XIVE_ESC_ON(r9)
++ cmpwi r5, 0
++ beq 1f
++ li r0, 0
++ stb r0, VCPU_CEDED(r9)
++1: /* Enable XIVE escalation */
++ li r5, XIVE_ESB_SET_PQ_00
++ mfmsr r0
++ andi. r0, r0, MSR_DR /* in real mode? */
++ beq 1f
++ ld r10, VCPU_XIVE_ESC_VADDR(r9)
++ cmpdi r10, 0
++ beq 3f
++ ldx r0, r10, r5
++ b 2f
++1: ld r10, VCPU_XIVE_ESC_RADDR(r9)
++ cmpdi r10, 0
++ beq 3f
++ ldcix r0, r10, r5
++2: sync
++ li r0, 1
++ stb r0, VCPU_XIVE_ESC_ON(r9)
++#endif /* CONFIG_KVM_XICS */
++3: b guest_exit_cont
+
+ /* Try to handle a machine check in real mode */
+ machine_check_realmode:
+diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
+index eef9ccafdc09..7a047bc88f11 100644
+--- a/arch/powerpc/kvm/book3s_xive.c
++++ b/arch/powerpc/kvm/book3s_xive.c
+@@ -89,6 +89,17 @@ static irqreturn_t xive_esc_irq(int irq, void *data)
+ if (vcpu->arch.ceded)
+ kvmppc_fast_vcpu_kick(vcpu);
+
++ /* Since we have the no-EOI flag, the interrupt is effectively
++ * disabled now. Clearing xive_esc_on means we won't bother
++ * doing so on the next entry.
++ *
++ * This also allows the entry code to know that if a PQ combination
++ * of 10 is observed while xive_esc_on is true, it means the queue
++ * contains an unprocessed escalation interrupt. We don't make use of
++ * that knowledge today but might (see comment in book3s_hv_rmhandler.S)
++ */
++ vcpu->arch.xive_esc_on = false;
++
+ return IRQ_HANDLED;
+ }
+
+@@ -134,6 +145,25 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
+ goto error;
+ }
+ xc->esc_virq_names[prio] = name;
++
++ /* In single escalation mode, we grab the ESB MMIO of the
++ * interrupt and mask it. Also populate the VCPU v/raddr
++ * of the ESB page for use by asm entry/exit code. Finally
++ * set the XIVE_IRQ_NO_EOI flag which will prevent the
++ * core code from performing an EOI on the escalation
++ * interrupt, thus leaving it effectively masked after
++ * it fires once.
++ */
++ if (xc->xive->single_escalation) {
++ struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]);
++ struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
++
++ xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01);
++ vcpu->arch.xive_esc_raddr = xd->eoi_page;
++ vcpu->arch.xive_esc_vaddr = (__force u64)xd->eoi_mmio;
++ xd->flags |= XIVE_IRQ_NO_EOI;
++ }
++
+ return 0;
+ error:
+ irq_dispose_mapping(xc->esc_virq[prio]);
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Lockless-tlbie-for-HPT-hcalls.patch b/patches.arch/KVM-PPC-Book3S-HV-Lockless-tlbie-for-HPT-hcalls.patch
new file mode 100644
index 0000000000..456e01e8d6
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Lockless-tlbie-for-HPT-hcalls.patch
@@ -0,0 +1,123 @@
+From b7557451475d747740bc1598045bd273ece80ab0 Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Thu, 17 May 2018 16:59:10 +1000
+Subject: [PATCH] KVM: PPC: Book3S HV: Lockless tlbie for HPT hcalls
+
+References: bsc#1061840
+Patch-mainline: v4.18-rc1
+Git-commit: b7557451475d747740bc1598045bd273ece80ab0
+
+tlbies to an LPAR do not have to be serialised since POWER4/PPC970,
+after which the MMU_FTR_LOCKLESS_TLBIE feature was introduced to
+avoid tlbie locking.
+
+Since commit c17b98cf6028 ("KVM: PPC: Book3S HV: Remove code for
+PPC970 processors"), KVM no longer supports processors that do not
+have this feature, so the tlbie locking can be removed completely.
+A sanity check for the feature is put in kvmppc_mmu_hv_init.
+
+Testing was done on a POWER9 system in HPT mode, with a -smp 32 guest
+in HPT mode. 32 instances of the powerpc fork benchmark from selftests
+were run with --fork, and the results measured.
+
+Without this patch, total throughput was about 13.5K/sec, and this is
+the top of the host profile:
+
+ 74.52% [k] do_tlbies
+ 2.95% [k] kvmppc_book3s_hv_page_fault
+ 1.80% [k] calc_checksum
+ 1.80% [k] kvmppc_vcpu_run_hv
+ 1.49% [k] kvmppc_run_core
+
+After this patch, throughput was about 51K/sec, with this profile:
+
+ 21.28% [k] do_tlbies
+ 5.26% [k] kvmppc_run_core
+ 4.88% [k] kvmppc_book3s_hv_page_fault
+ 3.30% [k] _raw_spin_lock_irqsave
+ 3.25% [k] gup_pgd_range
+
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/include/asm/kvm_host.h | 1 -
+ arch/powerpc/kvm/book3s_64_mmu_hv.c | 3 +++
+ arch/powerpc/kvm/book3s_hv_rm_mmu.c | 21 ---------------------
+ 3 files changed, 3 insertions(+), 22 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
+index 8b0ee5e09ea3..89f44ecc4dbd 100644
+--- a/arch/powerpc/include/asm/kvm_host.h
++++ b/arch/powerpc/include/asm/kvm_host.h
+@@ -269,7 +269,6 @@ struct kvm_arch {
+ unsigned long host_lpcr;
+ unsigned long sdr1;
+ unsigned long host_sdr1;
+- int tlbie_lock;
+ unsigned long lpcr;
+ unsigned long vrma_slb_v;
+ int mmu_ready;
+diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
+index a670fa5fbe50..37cd6434d1c8 100644
+--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
++++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
+@@ -272,6 +272,9 @@ int kvmppc_mmu_hv_init(void)
+ if (!cpu_has_feature(CPU_FTR_HVMODE))
+ return -EINVAL;
+
++ if (!mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE))
++ return -EINVAL;
++
+ /* POWER7 has 10-bit LPIDs (12-bit in POWER8) */
+ host_lpid = mfspr(SPRN_LPID);
+ rsvd_lpid = LPID_RSVD;
+diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+index 8e12c5c3c4ee..1f22d9e977d4 100644
+--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
++++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+@@ -435,24 +435,6 @@ static inline int is_mmio_hpte(unsigned long v, unsigned long r)
+ (HPTE_R_KEY_HI | HPTE_R_KEY_LO));
+ }
+
+-static inline int try_lock_tlbie(unsigned int *lock)
+-{
+- unsigned int tmp, old;
+- unsigned int token = LOCK_TOKEN;
+-
+- asm volatile("1:lwarx %1,0,%2\n"
+- " cmpwi cr0,%1,0\n"
+- " bne 2f\n"
+- " stwcx. %3,0,%2\n"
+- " bne- 1b\n"
+- " isync\n"
+- "2:"
+- : "=&r" (tmp), "=&r" (old)
+- : "r" (lock), "r" (token)
+- : "cc", "memory");
+- return old == 0;
+-}
+-
+ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
+ long npages, int global, bool need_sync)
+ {
+@@ -464,8 +446,6 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
+ * the RS field, this is backwards-compatible with P7 and P8.
+ */
+ if (global) {
+- while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
+- cpu_relax();
+ if (need_sync)
+ asm volatile("ptesync" : : : "memory");
+ for (i = 0; i < npages; ++i) {
+@@ -484,7 +464,6 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
+ }
+
+ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+- kvm->arch.tlbie_lock = 0;
+ } else {
+ if (need_sync)
+ asm volatile("ptesync" : : : "memory");
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Make-HPT-resizing-work-on-POWER9.patch b/patches.arch/KVM-PPC-Book3S-HV-Make-HPT-resizing-work-on-POWER9.patch
new file mode 100644
index 0000000000..c1015f2ee1
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Make-HPT-resizing-work-on-POWER9.patch
@@ -0,0 +1,136 @@
+From 790a9df5fbef982f2a6992194fe497dd2b794a3d Mon Sep 17 00:00:00 2001
+From: David Gibson <david@gibson.dropbear.id.au>
+Date: Fri, 2 Feb 2018 14:29:08 +1100
+Subject: [PATCH] KVM: PPC: Book3S HV: Make HPT resizing work on POWER9
+
+References: bsc#1061840
+Patch-mainline: v4.16-rc1
+Git-commit: 790a9df5fbef982f2a6992194fe497dd2b794a3d
+
+This adds code to enable the HPT resizing code to work on POWER9,
+which uses a slightly modified HPT entry format compared to POWER8.
+On POWER9, we convert HPTEs read from the HPT from the new format to
+the old format so that the rest of the HPT resizing code can work as
+before. HPTEs written to the new HPT are converted to the new format
+as the last step before writing them into the new HPT.
+
+This takes out the checks added by commit bcd3bb63dbc8 ("KVM: PPC:
+Book3S HV: Disable HPT resizing on POWER9 for now", 2017-02-18),
+now that HPT resizing works on POWER9.
+
+On POWER9, when we pivot to the new HPT, we now call
+kvmppc_setup_partition_table() to update the partition table in order
+to make the hardware use the new HPT.
+
+[paulus@ozlabs.org - added kvmppc_setup_partition_table() call,
+ wrote commit message.]
+
+Tested-by: Laurent Vivier <lvivier@redhat.com>
+Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_64_mmu_hv.c | 30 +++++++++++++++++++++++-------
+ arch/powerpc/kvm/powerpc.c | 3 +--
+ 2 files changed, 24 insertions(+), 9 deletions(-)
+
+diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
+index d19649960bbf..cb34be7d1a49 100644
+--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
++++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
+@@ -1261,6 +1261,11 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
+ /* Nothing to do */
+ goto out;
+
++ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
++ rpte = be64_to_cpu(hptep[1]);
++ vpte = hpte_new_to_old_v(vpte, rpte);
++ }
++
+ /* Unmap */
+ rev = &old->rev[idx];
+ guest_rpte = rev->guest_rpte;
+@@ -1290,7 +1295,6 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
+
+ /* Reload PTE after unmap */
+ vpte = be64_to_cpu(hptep[0]);
+-
+ BUG_ON(vpte & HPTE_V_VALID);
+ BUG_ON(!(vpte & HPTE_V_ABSENT));
+
+@@ -1299,6 +1303,12 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
+ goto out;
+
+ rpte = be64_to_cpu(hptep[1]);
++
++ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
++ vpte = hpte_new_to_old_v(vpte, rpte);
++ rpte = hpte_new_to_old_r(rpte);
++ }
++
+ pshift = kvmppc_hpte_base_page_shift(vpte, rpte);
+ avpn = HPTE_V_AVPN_VAL(vpte) & ~(((1ul << pshift) - 1) >> 23);
+ pteg = idx / HPTES_PER_GROUP;
+@@ -1336,6 +1346,10 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
+ new_hptep = (__be64 *)(new->virt + (new_idx << 4));
+
+ replace_vpte = be64_to_cpu(new_hptep[0]);
++ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
++ unsigned long replace_rpte = be64_to_cpu(new_hptep[1]);
++ replace_vpte = hpte_new_to_old_v(replace_vpte, replace_rpte);
++ }
+
+ if (replace_vpte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
+ BUG_ON(new->order >= old->order);
+@@ -1351,6 +1365,11 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
+ /* Discard the previous HPTE */
+ }
+
++ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
++ rpte = hpte_old_to_new_r(vpte, rpte);
++ vpte = hpte_old_to_new_v(vpte);
++ }
++
+ new_hptep[1] = cpu_to_be64(rpte);
+ new->rev[new_idx].guest_rpte = guest_rpte;
+ /* No need for a barrier, since new HPT isn't active */
+@@ -1368,12 +1387,6 @@ static int resize_hpt_rehash(struct kvm_resize_hpt *resize)
+ unsigned long i;
+ int rc;
+
+- /*
+- * resize_hpt_rehash_hpte() doesn't handle the new-format HPTEs
+- * that POWER9 uses, and could well hit a BUG_ON on POWER9.
+- */
+- if (cpu_has_feature(CPU_FTR_ARCH_300))
+- return -EIO;
+ for (i = 0; i < kvmppc_hpt_npte(&kvm->arch.hpt); i++) {
+ rc = resize_hpt_rehash_hpte(resize, i);
+ if (rc != 0)
+@@ -1404,6 +1417,9 @@ static void resize_hpt_pivot(struct kvm_resize_hpt *resize)
+
+ synchronize_srcu_expedited(&kvm->srcu);
+
++ if (cpu_has_feature(CPU_FTR_ARCH_300))
++ kvmppc_setup_partition_table(kvm);
++
+ resize_hpt_debug(resize, "resize_hpt_pivot() done\n");
+ }
+
+diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
+index 77eb25abc601..cf86aeb43fcf 100644
+--- a/arch/powerpc/kvm/powerpc.c
++++ b/arch/powerpc/kvm/powerpc.c
+@@ -633,8 +633,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
+ r = 1;
+ break;
+ case KVM_CAP_SPAPR_RESIZE_HPT:
+- /* Disable this on POWER9 until code handles new HPTE format */
+- r = !!hv_enabled && !cpu_has_feature(CPU_FTR_ARCH_300);
++ r = !!hv_enabled;
+ break;
+ #endif
+ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Make-radix-clear-pte-when-unmappin.patch b/patches.arch/KVM-PPC-Book3S-HV-Make-radix-clear-pte-when-unmappin.patch
new file mode 100644
index 0000000000..10567e13f1
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Make-radix-clear-pte-when-unmappin.patch
@@ -0,0 +1,38 @@
+From 7e3d9a1d0f2c681456a2e04b8ba9a2fb448fe515 Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Wed, 9 May 2018 12:20:15 +1000
+Subject: [PATCH] KVM: PPC: Book3S HV: Make radix clear pte when unmapping
+
+References: bsc#1061840
+Patch-mainline: v4.17-rc7
+Git-commit: 7e3d9a1d0f2c681456a2e04b8ba9a2fb448fe515
+
+The current partition table unmap code clears the _PAGE_PRESENT bit
+out of the pte, which leaves pud_huge/pmd_huge true and does not
+clear pud_present/pmd_present. This can confuse subsequent page
+faults and possibly lead to the guest looping doing continual
+hypervisor page faults.
+
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_64_mmu_radix.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+index a6870288c0e0..361f42c8c73e 100644
+--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
++++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+@@ -584,7 +584,7 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
+
+ ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
+ if (ptep && pte_present(*ptep)) {
+- old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 0,
++ old = kvmppc_radix_update_pte(kvm, ptep, ~0UL, 0,
+ gpa, shift);
+ kvmppc_radix_tlbie_page(kvm, gpa, shift);
+ if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap) {
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Make-radix-use-correct-tlbie-seque.patch b/patches.arch/KVM-PPC-Book3S-HV-Make-radix-use-correct-tlbie-seque.patch
new file mode 100644
index 0000000000..43b3edae27
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Make-radix-use-correct-tlbie-seque.patch
@@ -0,0 +1,45 @@
+From e2560b108fb1375b5fab196c1ec0d910bbe8a38b Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Wed, 9 May 2018 12:20:14 +1000
+Subject: [PATCH] KVM: PPC: Book3S HV: Make radix use correct tlbie sequence in
+ kvmppc_radix_tlbie_page
+
+References: bsc#1061840
+Patch-mainline: v4.17-rc7
+Git-commit: e2560b108fb1375b5fab196c1ec0d910bbe8a38b
+
+The standard eieio ; tlbsync ; ptesync must follow tlbie to ensure it
+is ordered with respect to subsequent operations.
+
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_64_mmu_radix.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+index a57eafec4dc2..a6870288c0e0 100644
+--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
++++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+@@ -162,7 +162,7 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG))
+ asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1)
+ : : "r" (addr), "r" (kvm->arch.lpid) : "memory");
+- asm volatile("ptesync": : :"memory");
++ asm volatile("eieio ; tlbsync ; ptesync": : :"memory");
+ }
+
+ static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr)
+@@ -173,7 +173,7 @@ static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr)
+ /* RIC=1 PRS=0 R=1 IS=2 */
+ asm volatile(PPC_TLBIE_5(%0, %1, 1, 0, 1)
+ : : "r" (rb), "r" (kvm->arch.lpid) : "memory");
+- asm volatile("ptesync": : :"memory");
++ asm volatile("eieio ; tlbsync ; ptesync": : :"memory");
+ }
+
+ unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep,
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Make-xive_pushed-a-byte-not-a-word.patch b/patches.arch/KVM-PPC-Book3S-HV-Make-xive_pushed-a-byte-not-a-word.patch
new file mode 100644
index 0000000000..07adae9513
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Make-xive_pushed-a-byte-not-a-word.patch
@@ -0,0 +1,64 @@
+From 35c2405efc0142860c4b698f4c6331567c4ca1ef Mon Sep 17 00:00:00 2001
+From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Date: Fri, 12 Jan 2018 13:37:15 +1100
+Subject: [PATCH] KVM: PPC: Book3S HV: Make xive_pushed a byte, not a word
+
+References: bsc#1061840
+Patch-mainline: v4.16-rc1
+Git-commit: 35c2405efc0142860c4b698f4c6331567c4ca1ef
+
+Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/include/asm/kvm_host.h | 2 +-
+ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 6 +++---
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
+index bfe51356af5e..0c44fa67608d 100644
+--- a/arch/powerpc/include/asm/kvm_host.h
++++ b/arch/powerpc/include/asm/kvm_host.h
+@@ -739,7 +739,7 @@ struct kvm_vcpu_arch {
+ struct kvmppc_icp *icp; /* XICS presentation controller */
+ struct kvmppc_xive_vcpu *xive_vcpu; /* XIVE virtual CPU data */
+ __be32 xive_cam_word; /* Cooked W2 in proper endian with valid bit */
+- u32 xive_pushed; /* Is the VP pushed on the physical CPU ? */
++ u8 xive_pushed; /* Is the VP pushed on the physical CPU ? */
+ union xive_tma_w01 xive_saved_state; /* W0..1 of XIVE thread state */
+ #endif
+
+diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+index 948f21cf84d5..a7f429bc6de0 100644
+--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
++++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+@@ -1033,7 +1033,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
+ li r9, TM_QW1_OS + TM_WORD2
+ stwcix r11,r9,r10
+ li r9, 1
+- stw r9, VCPU_XIVE_PUSHED(r4)
++ stb r9, VCPU_XIVE_PUSHED(r4)
+ eieio
+
+ /*
+@@ -1458,7 +1458,7 @@ mc_cont:
+ #endif
+ #ifdef CONFIG_KVM_XICS
+ /* We are exiting, pull the VP from the XIVE */
+- lwz r0, VCPU_XIVE_PUSHED(r9)
++ lbz r0, VCPU_XIVE_PUSHED(r9)
+ cmpwi cr0, r0, 0
+ beq 1f
+ li r7, TM_SPC_PULL_OS_CTX
+@@ -1487,7 +1487,7 @@ mc_cont:
+ /* Fixup some of the state for the next load */
+ li r10, 0
+ li r0, 0xff
+- stw r10, VCPU_XIVE_PUSHED(r9)
++ stb r10, VCPU_XIVE_PUSHED(r9)
+ stb r10, (VCPU_XIVE_SAVED_STATE+3)(r9)
+ stb r0, (VCPU_XIVE_SAVED_STATE+4)(r9)
+ eieio
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Pack-VCORE-IDs-to-access-full-VCPU.patch b/patches.arch/KVM-PPC-Book3S-HV-Pack-VCORE-IDs-to-access-full-VCPU.patch
new file mode 100644
index 0000000000..01eda55f06
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Pack-VCORE-IDs-to-access-full-VCPU.patch
@@ -0,0 +1,256 @@
+From 1e175d2e07c71d9574f5b1c74523abca54e2654f Mon Sep 17 00:00:00 2001
+From: Sam Bobroff <sam.bobroff@au1.ibm.com>
+Date: Wed, 25 Jul 2018 16:12:02 +1000
+Subject: [PATCH] KVM: PPC: Book3S HV: Pack VCORE IDs to access full VCPU ID
+ space
+
+References: bsc#1061840
+Patch-mainline: v4.19-rc1
+Git-commit: 1e175d2e07c71d9574f5b1c74523abca54e2654f
+
+It is not currently possible to create the full number of possible
+VCPUs (KVM_MAX_VCPUS) on Power9 with KVM-HV when the guest uses fewer
+threads per core than its core stride (or "VSMT mode"). This is
+because the VCORE ID and XIVE offsets grow beyond KVM_MAX_VCPUS
+even though the VCPU ID is less than KVM_MAX_VCPU_ID.
+
+To address this, "pack" the VCORE ID and XIVE offsets by using
+knowledge of the way the VCPU IDs will be used when there are fewer
+guest threads per core than the core stride. The primary thread of
+each core will always be used first. Then, if the guest uses more than
+one thread per core, these secondary threads will sequentially follow
+the primary in each core.
+
+So, the only way an ID above KVM_MAX_VCPUS can be seen, is if the
+VCPUs are being spaced apart, so at least half of each core is empty,
+and IDs between KVM_MAX_VCPUS and (KVM_MAX_VCPUS * 2) can be mapped
+into the second half of each core (4..7, in an 8-thread core).
+
+Similarly, if IDs above KVM_MAX_VCPUS * 2 are seen, at least 3/4 of
+each core is being left empty, and we can map down into the second and
+third quarters of each core (2, 3 and 5, 6 in an 8-thread core).
+
+Lastly, if IDs above KVM_MAX_VCPUS * 4 are seen, only the primary
+threads are being used and 7/8 of the core is empty, allowing use of
+the 1, 5, 3 and 7 thread slots.
+
+(Strides less than 8 are handled similarly.)
+
+This allows the VCORE ID or offset to be calculated quickly from the
+VCPU ID or XIVE server numbers, without access to the VCPU structure.
+
+[paulus@ozlabs.org - tidied up comment a little, changed some WARN_ONCE
+ to pr_devel, wrapped line, fixed id check.]
+
+Signed-off-by: Sam Bobroff <sam.bobroff@au1.ibm.com>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/include/asm/kvm_book3s.h | 47 +++++++++++++++++++++++++++++++++++
+ arch/powerpc/kvm/book3s_hv.c | 27 +++++++++++++++-----
+ arch/powerpc/kvm/book3s_xive.c | 19 ++++++++------
+ 3 files changed, 80 insertions(+), 13 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
+index 1f345a0b6ba2..83a9aa3cf689 100644
+--- a/arch/powerpc/include/asm/kvm_book3s.h
++++ b/arch/powerpc/include/asm/kvm_book3s.h
+@@ -390,4 +390,51 @@ extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu);
+ #define SPLIT_HACK_MASK 0xff000000
+ #define SPLIT_HACK_OFFS 0xfb000000
+
++/*
++ * This packs a VCPU ID from the [0..KVM_MAX_VCPU_ID) space down to the
++ * [0..KVM_MAX_VCPUS) space, using knowledge of the guest's core stride
++ * (but not its actual threading mode, which is not available) to avoid
++ * collisions.
++ *
++ * The implementation leaves VCPU IDs from the range [0..KVM_MAX_VCPUS) (block
++ * 0) unchanged: if the guest is filling each VCORE completely then it will be
++ * using consecutive IDs and it will fill the space without any packing.
++ *
++ * For higher VCPU IDs, the packed ID is based on the VCPU ID modulo
++ * KVM_MAX_VCPUS (effectively masking off the top bits) and then an offset is
++ * added to avoid collisions.
++ *
++ * VCPU IDs in the range [KVM_MAX_VCPUS..(KVM_MAX_VCPUS*2)) (block 1) are only
++ * possible if the guest is leaving at least 1/2 of each VCORE empty, so IDs
++ * can be safely packed into the second half of each VCORE by adding an offset
++ * of (stride / 2).
++ *
++ * Similarly, if VCPU IDs in the range [(KVM_MAX_VCPUS*2)..(KVM_MAX_VCPUS*4))
++ * (blocks 2 and 3) are seen, the guest must be leaving at least 3/4 of each
++ * VCORE empty so packed IDs can be offset by (stride / 4) and (stride * 3 / 4).
++ *
++ * Finally, VCPU IDs from blocks 5..7 will only be seen if the guest is using a
++ * stride of 8 and 1 thread per core so the remaining offsets of 1, 5, 3 and 7
++ * must be free to use.
++ *
++ * (The offsets for each block are stored in block_offsets[], indexed by the
++ * block number if the stride is 8. For cases where the guest's stride is less
++ * than 8, we can re-use the block_offsets array by multiplying the block
++ * number by (MAX_SMT_THREADS / stride) to reach the correct entry.)
++ */
++static inline u32 kvmppc_pack_vcpu_id(struct kvm *kvm, u32 id)
++{
++ const int block_offsets[MAX_SMT_THREADS] = {0, 4, 2, 6, 1, 5, 3, 7};
++ int stride = kvm->arch.emul_smt_mode;
++ int block = (id / KVM_MAX_VCPUS) * (MAX_SMT_THREADS / stride);
++ u32 packed_id;
++
++ if (WARN_ONCE(block >= MAX_SMT_THREADS, "VCPU ID too large to pack"))
++ return 0;
++ packed_id = (id % KVM_MAX_VCPUS) + block_offsets[block];
++ if (WARN_ONCE(packed_id >= KVM_MAX_VCPUS, "VCPU ID packing failed"))
++ return 0;
++ return packed_id;
++}
++
+ #endif /* __ASM_KVM_BOOK3S_H__ */
+diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
+index d73b29b6aaa1..785245e09f32 100644
+--- a/arch/powerpc/kvm/book3s_hv.c
++++ b/arch/powerpc/kvm/book3s_hv.c
+@@ -1816,7 +1816,7 @@ static int threads_per_vcore(struct kvm *kvm)
+ return threads_per_subcore;
+ }
+
+-static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
++static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int id)
+ {
+ struct kvmppc_vcore *vcore;
+
+@@ -1830,7 +1830,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
+ init_swait_queue_head(&vcore->wq);
+ vcore->preempt_tb = TB_NIL;
+ vcore->lpcr = kvm->arch.lpcr;
+- vcore->first_vcpuid = core * kvm->arch.smt_mode;
++ vcore->first_vcpuid = id;
+ vcore->kvm = kvm;
+ INIT_LIST_HEAD(&vcore->preempt_list);
+
+@@ -1989,10 +1989,16 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
+ unsigned int id)
+ {
+ struct kvm_vcpu *vcpu;
+- int err;
++ int err = -EINVAL;
+ int core;
+ struct kvmppc_vcore *vcore;
+
++ if (id >= (KVM_MAX_VCPUS * kvm->arch.emul_smt_mode) &&
++ cpu_has_feature(CPU_FTR_ARCH_300)) {
++ pr_devel("DNCI: VCPU ID too high\n");
++ goto out;
++ }
++
+ err = -ENOMEM;
+ vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+ if (!vcpu)
+@@ -2048,12 +2054,21 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
+ mutex_lock(&kvm->lock);
+ vcore = NULL;
+ err = -EINVAL;
+- core = id / kvm->arch.smt_mode;
++ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
++ BUG_ON(kvm->arch.smt_mode != 1);
++ core = kvmppc_pack_vcpu_id(kvm, id);
++ } else {
++ core = id / kvm->arch.smt_mode;
++ }
+ if (core < KVM_MAX_VCORES) {
+ vcore = kvm->arch.vcores[core];
+- if (!vcore) {
++ if (vcore && cpu_has_feature(CPU_FTR_ARCH_300)) {
++ pr_devel("KVM: collision on id %u", id);
++ vcore = NULL;
++ } else if (!vcore) {
+ err = -ENOMEM;
+- vcore = kvmppc_vcore_create(kvm, core);
++ vcore = kvmppc_vcore_create(kvm,
++ id & ~(kvm->arch.smt_mode - 1));
+ kvm->arch.vcores[core] = vcore;
+ kvm->arch.online_vcores++;
+ }
+diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
+index f9818d7d3381..126f02b3ffb8 100644
+--- a/arch/powerpc/kvm/book3s_xive.c
++++ b/arch/powerpc/kvm/book3s_xive.c
+@@ -317,6 +317,11 @@ static int xive_select_target(struct kvm *kvm, u32 *server, u8 prio)
+ return -EBUSY;
+ }
+
++static u32 xive_vp(struct kvmppc_xive *xive, u32 server)
++{
++ return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server);
++}
++
+ static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
+ struct kvmppc_xive_src_block *sb,
+ struct kvmppc_xive_irq_state *state)
+@@ -362,7 +367,7 @@ static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
+ */
+ if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
+ xive_native_configure_irq(hw_num,
+- xive->vp_base + state->act_server,
++ xive_vp(xive, state->act_server),
+ MASKED, state->number);
+ /* set old_p so we can track if an H_EOI was done */
+ state->old_p = true;
+@@ -418,7 +423,7 @@ static void xive_finish_unmask(struct kvmppc_xive *xive,
+ */
+ if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
+ xive_native_configure_irq(hw_num,
+- xive->vp_base + state->act_server,
++ xive_vp(xive, state->act_server),
+ state->act_priority, state->number);
+ /* If an EOI is needed, do it here */
+ if (!state->old_p)
+@@ -495,7 +500,7 @@ static int xive_target_interrupt(struct kvm *kvm,
+ kvmppc_xive_select_irq(state, &hw_num, NULL);
+
+ return xive_native_configure_irq(hw_num,
+- xive->vp_base + server,
++ xive_vp(xive, server),
+ prio, state->number);
+ }
+
+@@ -883,7 +888,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
+ * which is fine for a never started interrupt.
+ */
+ xive_native_configure_irq(hw_irq,
+- xive->vp_base + state->act_server,
++ xive_vp(xive, state->act_server),
+ state->act_priority, state->number);
+
+ /*
+@@ -959,7 +964,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
+
+ /* Reconfigure the IPI */
+ xive_native_configure_irq(state->ipi_number,
+- xive->vp_base + state->act_server,
++ xive_vp(xive, state->act_server),
+ state->act_priority, state->number);
+
+ /*
+@@ -1084,7 +1089,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
+ pr_devel("Duplicate !\n");
+ return -EEXIST;
+ }
+- if (cpu >= KVM_MAX_VCPUS) {
++ if (cpu >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) {
+ pr_devel("Out of bounds !\n");
+ return -EINVAL;
+ }
+@@ -1098,7 +1103,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
+ xc->xive = xive;
+ xc->vcpu = vcpu;
+ xc->server_num = cpu;
+- xc->vp_id = xive->vp_base + cpu;
++ xc->vp_id = xive_vp(xive, cpu);
+ xc->mfrr = 0xff;
+ xc->valid = true;
+
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Radix-page-fault-handler-optimizat.patch b/patches.arch/KVM-PPC-Book3S-HV-Radix-page-fault-handler-optimizat.patch
new file mode 100644
index 0000000000..b379a1fca9
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Radix-page-fault-handler-optimizat.patch
@@ -0,0 +1,114 @@
+From c4c8a7643e74ebd7f2cfa80807562f16bb58c1d9 Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Fri, 23 Feb 2018 21:40:49 +1100
+Subject: [PATCH] KVM: PPC: Book3S HV: Radix page fault handler optimizations
+
+References: bsc#1061840
+Patch-mainline: v4.17-rc1
+Git-commit: c4c8a7643e74ebd7f2cfa80807562f16bb58c1d9
+
+This improves the handling of transparent huge pages in the radix
+hypervisor page fault handler. Previously, if a small page is faulted
+in to a 2MB region of guest physical space, that means that there is
+a page table pointer at the PMD level, which could never be replaced
+by a leaf (2MB) PMD entry. This adds the code to clear the PMD,
+invlidate the page walk cache and free the page table page in this
+situation, so that the leaf PMD entry can be created.
+
+This also adds code to check whether a PMD or PTE being inserted is
+the same as is already there (because of a race with another CPU that
+faulted on the same page) and if so, we don't replace the existing
+entry, meaning that we don't invalidate the PTE or PMD and do a TLB
+invalidation.
+
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_64_mmu_radix.c | 42 ++++++++++++++++++++++------------
+ 1 file changed, 27 insertions(+), 15 deletions(-)
+
+diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+index 5cb4e4687107..ed62164f8474 100644
+--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
++++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+@@ -160,6 +160,17 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
+ asm volatile("ptesync": : :"memory");
+ }
+
++static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr)
++{
++ unsigned long rb = 0x2 << PPC_BITLSHIFT(53); /* IS = 2 */
++
++ asm volatile("ptesync": : :"memory");
++ /* RIC=1 PRS=0 R=1 IS=2 */
++ asm volatile(PPC_TLBIE_5(%0, %1, 1, 0, 1)
++ : : "r" (rb), "r" (kvm->arch.lpid) : "memory");
++ asm volatile("ptesync": : :"memory");
++}
++
+ unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep,
+ unsigned long clr, unsigned long set,
+ unsigned long addr, unsigned int shift)
+@@ -261,6 +272,11 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
+ ret = -EAGAIN;
+ goto out_unlock;
+ }
++ /* Check if we raced and someone else has set the same thing */
++ if (level == 1 && pmd_raw(*pmd) == pte_raw(pte)) {
++ ret = 0;
++ goto out_unlock;
++ }
+ /* Valid 2MB page here already, remove it */
+ old = kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd),
+ ~0UL, 0, lgpa, PMD_SHIFT);
+@@ -275,12 +291,13 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
+ }
+ } else if (level == 1 && !pmd_none(*pmd)) {
+ /*
+- * There's a page table page here, but we wanted
+- * to install a large page. Tell the caller and let
+- * it try installing a normal page if it wants.
++ * There's a page table page here, but we wanted to
++ * install a large page, so remove and free the page
++ * table page. new_ptep will be NULL since level == 1.
+ */
+- ret = -EBUSY;
+- goto out_unlock;
++ new_ptep = pte_offset_kernel(pmd, 0);
++ pmd_clear(pmd);
++ kvmppc_radix_flush_pwc(kvm, gpa);
+ }
+ if (level == 0) {
+ if (pmd_none(*pmd)) {
+@@ -291,6 +308,11 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
+ }
+ ptep = pte_offset_kernel(pmd, gpa);
+ if (pte_present(*ptep)) {
++ /* Check if someone else set the same thing */
++ if (pte_raw(*ptep) == pte_raw(pte)) {
++ ret = 0;
++ goto out_unlock;
++ }
+ /* PTE was previously valid, so invalidate it */
+ old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT,
+ 0, gpa, 0);
+@@ -469,16 +491,6 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+
+ /* Allocate space in the tree and write the PTE */
+ ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
+- if (ret == -EBUSY) {
+- /*
+- * There's already a PMD where wanted to install a large page;
+- * for now, fall back to installing a small page.
+- */
+- level = 0;
+- pfn |= gfn & ((PMD_SIZE >> PAGE_SHIFT) - 1);
+- pte = pfn_pte(pfn, __pgprot(pgflags));
+- ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
+- }
+
+ if (page) {
+ if (!ret && (pgflags & _PAGE_WRITE))
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Read-kvm-arch.emul_smt_mode-under-.patch b/patches.arch/KVM-PPC-Book3S-HV-Read-kvm-arch.emul_smt_mode-under-.patch
new file mode 100644
index 0000000000..7296641f7d
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Read-kvm-arch.emul_smt_mode-under-.patch
@@ -0,0 +1,68 @@
+From b5c6f7607b908b1445f2556c8d2f3b1ec5fc5aa8 Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Thu, 26 Jul 2018 15:38:41 +1000
+Subject: [PATCH] KVM: PPC: Book3S HV: Read kvm->arch.emul_smt_mode under
+ kvm->lock
+
+References: bsc#1061840
+Patch-mainline: v4.19-rc1
+Git-commit: b5c6f7607b908b1445f2556c8d2f3b1ec5fc5aa8
+
+Commit 1e175d2 ("KVM: PPC: Book3S HV: Pack VCORE IDs to access full
+VCPU ID space", 2018-07-25) added code that uses kvm->arch.emul_smt_mode
+before any VCPUs are created. However, userspace can change
+kvm->arch.emul_smt_mode at any time up until the first VCPU is created.
+Hence it is (theoretically) possible for the check in
+kvmppc_core_vcpu_create_hv() to race with another userspace thread
+changing kvm->arch.emul_smt_mode.
+
+This fixes it by moving the test that uses kvm->arch.emul_smt_mode into
+the block where kvm->lock is held.
+
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_hv.c | 17 ++++++++---------
+ 1 file changed, 8 insertions(+), 9 deletions(-)
+
+diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
+index 785245e09f32..113f81577668 100644
+--- a/arch/powerpc/kvm/book3s_hv.c
++++ b/arch/powerpc/kvm/book3s_hv.c
+@@ -1989,16 +1989,10 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
+ unsigned int id)
+ {
+ struct kvm_vcpu *vcpu;
+- int err = -EINVAL;
++ int err;
+ int core;
+ struct kvmppc_vcore *vcore;
+
+- if (id >= (KVM_MAX_VCPUS * kvm->arch.emul_smt_mode) &&
+- cpu_has_feature(CPU_FTR_ARCH_300)) {
+- pr_devel("DNCI: VCPU ID too high\n");
+- goto out;
+- }
+-
+ err = -ENOMEM;
+ vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+ if (!vcpu)
+@@ -2055,8 +2049,13 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
+ vcore = NULL;
+ err = -EINVAL;
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+- BUG_ON(kvm->arch.smt_mode != 1);
+- core = kvmppc_pack_vcpu_id(kvm, id);
++ if (id >= (KVM_MAX_VCPUS * kvm->arch.emul_smt_mode)) {
++ pr_devel("KVM: VCPU ID too high\n");
++ core = KVM_MAX_VCORES;
++ } else {
++ BUG_ON(kvm->arch.smt_mode != 1);
++ core = kvmppc_pack_vcpu_id(kvm, id);
++ }
+ } else {
+ core = id / kvm->arch.smt_mode;
+ }
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Recursively-unmap-all-page-table-e.patch b/patches.arch/KVM-PPC-Book3S-HV-Recursively-unmap-all-page-table-e.patch
new file mode 100644
index 0000000000..eeb7aaa9d1
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Recursively-unmap-all-page-table-e.patch
@@ -0,0 +1,280 @@
+From a5704e83aa3d672327409509b2d1bff2def72966 Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Thu, 17 May 2018 17:06:27 +1000
+Subject: [PATCH] KVM: PPC: Book3S HV: Recursively unmap all page table entries
+ when unmapping
+
+References: bsc#1061840
+Patch-mainline: v4.18-rc1
+Git-commit: a5704e83aa3d672327409509b2d1bff2def72966
+
+When partition scope mappings are unmapped with kvm_unmap_radix, the
+pte is cleared, but the page table structure is left in place. If the
+next page fault requests a different page table geometry (e.g., due to
+THP promotion or split), kvmppc_create_pte is responsible for changing
+the page tables.
+
+When a page table entry is to be converted to a large pte, the page
+table entry is cleared, the PWC flushed, then the page table it points
+to freed. This will cause pte page tables to leak when a 1GB page is
+to replace a pud entry points to a pmd table with pte tables under it:
+The pmd table will be freed, but its pte tables will be missed.
+
+Fix this by replacing the simple clear and free code with one that
+walks down the page tables and frees children. Care must be taken to
+clear the root entry being unmapped then flushing the PWC before
+freeing any page tables, as explained in comments.
+
+This requires PWC flush to logically become a flush-all-PWC (which it
+already is in hardware, but the KVM API needs to be changed to avoid
+confusion).
+
+This code also checks that no unexpected pte entries exist in any page
+table being freed, and unmaps those and emits a WARN. This is an
+expensive operation for the pte page level, but partition scope
+changes are rare, so it's unconditional for now to iron out bugs. It
+can be put under a CONFIG option or removed after some time.
+
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_64_mmu_radix.c | 192 +++++++++++++++++++++++----------
+ 1 file changed, 138 insertions(+), 54 deletions(-)
+
+diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+index 2c49b31ec7fb..e514370ab5ae 100644
+--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
++++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+@@ -165,7 +165,7 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
+ asm volatile("eieio ; tlbsync ; ptesync": : :"memory");
+ }
+
+-static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr)
++static void kvmppc_radix_flush_pwc(struct kvm *kvm)
+ {
+ unsigned long rb = 0x2 << PPC_BITLSHIFT(53); /* IS = 2 */
+
+@@ -247,6 +247,139 @@ static void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte,
+ }
+ }
+
++/*
++ * kvmppc_free_p?d are used to free existing page tables, and recursively
++ * descend and clear and free children.
++ * Callers are responsible for flushing the PWC.
++ *
++ * When page tables are being unmapped/freed as part of page fault path
++ * (full == false), ptes are not expected. There is code to unmap them
++ * and emit a warning if encountered, but there may already be data
++ * corruption due to the unexpected mappings.
++ */
++static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full)
++{
++ if (full) {
++ memset(pte, 0, sizeof(long) << PTE_INDEX_SIZE);
++ } else {
++ pte_t *p = pte;
++ unsigned long it;
++
++ for (it = 0; it < PTRS_PER_PTE; ++it, ++p) {
++ if (pte_val(*p) == 0)
++ continue;
++ WARN_ON_ONCE(1);
++ kvmppc_unmap_pte(kvm, p,
++ pte_pfn(*p) << PAGE_SHIFT,
++ PAGE_SHIFT);
++ }
++ }
++
++ kvmppc_pte_free(pte);
++}
++
++static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t *pmd, bool full)
++{
++ unsigned long im;
++ pmd_t *p = pmd;
++
++ for (im = 0; im < PTRS_PER_PMD; ++im, ++p) {
++ if (!pmd_present(*p))
++ continue;
++ if (pmd_is_leaf(*p)) {
++ if (full) {
++ pmd_clear(p);
++ } else {
++ WARN_ON_ONCE(1);
++ kvmppc_unmap_pte(kvm, (pte_t *)p,
++ pte_pfn(*(pte_t *)p) << PAGE_SHIFT,
++ PMD_SHIFT);
++ }
++ } else {
++ pte_t *pte;
++
++ pte = pte_offset_map(p, 0);
++ kvmppc_unmap_free_pte(kvm, pte, full);
++ pmd_clear(p);
++ }
++ }
++ kvmppc_pmd_free(pmd);
++}
++
++static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t *pud)
++{
++ unsigned long iu;
++ pud_t *p = pud;
++
++ for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++p) {
++ if (!pud_present(*p))
++ continue;
++ if (pud_huge(*p)) {
++ pud_clear(p);
++ } else {
++ pmd_t *pmd;
++
++ pmd = pmd_offset(p, 0);
++ kvmppc_unmap_free_pmd(kvm, pmd, true);
++ pud_clear(p);
++ }
++ }
++ pud_free(kvm->mm, pud);
++}
++
++void kvmppc_free_radix(struct kvm *kvm)
++{
++ unsigned long ig;
++ pgd_t *pgd;
++
++ if (!kvm->arch.pgtable)
++ return;
++ pgd = kvm->arch.pgtable;
++ for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) {
++ pud_t *pud;
++
++ if (!pgd_present(*pgd))
++ continue;
++ pud = pud_offset(pgd, 0);
++ kvmppc_unmap_free_pud(kvm, pud);
++ pgd_clear(pgd);
++ }
++ pgd_free(kvm->mm, kvm->arch.pgtable);
++ kvm->arch.pgtable = NULL;
++}
++
++static void kvmppc_unmap_free_pmd_entry_table(struct kvm *kvm, pmd_t *pmd,
++ unsigned long gpa)
++{
++ pte_t *pte = pte_offset_kernel(pmd, 0);
++
++ /*
++ * Clearing the pmd entry then flushing the PWC ensures that the pte
++ * page no longer be cached by the MMU, so can be freed without
++ * flushing the PWC again.
++ */
++ pmd_clear(pmd);
++ kvmppc_radix_flush_pwc(kvm);
++
++ kvmppc_unmap_free_pte(kvm, pte, false);
++}
++
++static void kvmppc_unmap_free_pud_entry_table(struct kvm *kvm, pud_t *pud,
++ unsigned long gpa)
++{
++ pmd_t *pmd = pmd_offset(pud, 0);
++
++ /*
++ * Clearing the pud entry then flushing the PWC ensures that the pmd
++ * page and any children pte pages will no longer be cached by the MMU,
++ * so can be freed without flushing the PWC again.
++ */
++ pud_clear(pud);
++ kvmppc_radix_flush_pwc(kvm);
++
++ kvmppc_unmap_free_pmd(kvm, pmd, false);
++}
++
+ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
+ unsigned int level, unsigned long mmu_seq)
+ {
+@@ -312,11 +445,9 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
+ /*
+ * There's a page table page here, but we wanted to
+ * install a large page, so remove and free the page
+- * table page. new_pmd will be NULL since level == 2.
++ * table page.
+ */
+- new_pmd = pmd_offset(pud, 0);
+- pud_clear(pud);
+- kvmppc_radix_flush_pwc(kvm, gpa);
++ kvmppc_unmap_free_pud_entry_table(kvm, pud, gpa);
+ }
+ kvmppc_radix_set_pte_at(kvm, gpa, (pte_t *)pud, pte);
+ ret = 0;
+@@ -353,11 +484,9 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
+ /*
+ * There's a page table page here, but we wanted to
+ * install a large page, so remove and free the page
+- * table page. new_ptep will be NULL since level == 1.
++ * table page.
+ */
+- new_ptep = pte_offset_kernel(pmd, 0);
+- pmd_clear(pmd);
+- kvmppc_radix_flush_pwc(kvm, gpa);
++ kvmppc_unmap_free_pmd_entry_table(kvm, pmd, gpa);
+ }
+ kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte);
+ ret = 0;
+@@ -734,51 +863,6 @@ int kvmppc_init_vm_radix(struct kvm *kvm)
+ return 0;
+ }
+
+-void kvmppc_free_radix(struct kvm *kvm)
+-{
+- unsigned long ig, iu, im;
+- pte_t *pte;
+- pmd_t *pmd;
+- pud_t *pud;
+- pgd_t *pgd;
+-
+- if (!kvm->arch.pgtable)
+- return;
+- pgd = kvm->arch.pgtable;
+- for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) {
+- if (!pgd_present(*pgd))
+- continue;
+- pud = pud_offset(pgd, 0);
+- for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++pud) {
+- if (!pud_present(*pud))
+- continue;
+- if (pud_huge(*pud)) {
+- pud_clear(pud);
+- continue;
+- }
+- pmd = pmd_offset(pud, 0);
+- for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) {
+- if (pmd_is_leaf(*pmd)) {
+- pmd_clear(pmd);
+- continue;
+- }
+- if (!pmd_present(*pmd))
+- continue;
+- pte = pte_offset_map(pmd, 0);
+- memset(pte, 0, sizeof(long) << PTE_INDEX_SIZE);
+- kvmppc_pte_free(pte);
+- pmd_clear(pmd);
+- }
+- kvmppc_pmd_free(pmd_offset(pud, 0));
+- pud_clear(pud);
+- }
+- pud_free(kvm->mm, pud_offset(pgd, 0));
+- pgd_clear(pgd);
+- }
+- pgd_free(kvm->mm, kvm->arch.pgtable);
+- kvm->arch.pgtable = NULL;
+-}
+-
+ static void pte_ctor(void *addr)
+ {
+ memset(addr, 0, RADIX_PTE_TABLE_SIZE);
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Remove-useless-statement.patch b/patches.arch/KVM-PPC-Book3S-HV-Remove-useless-statement.patch
new file mode 100644
index 0000000000..a6e1fc4aed
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Remove-useless-statement.patch
@@ -0,0 +1,37 @@
+From 4fcf361dbdbdb43038bb173e2391c4073e713745 Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Mon, 20 Nov 2017 14:17:53 +1100
+Subject: [PATCH] KVM: PPC: Book3S HV: Remove useless statement
+
+References: bsc#1061840
+Patch-mainline: v4.16-rc1
+Git-commit: 4fcf361dbdbdb43038bb173e2391c4073e713745
+
+This removes a statement that has no effect. It should have been
+removed in commit 898b25b202f3 ("KVM: PPC: Book3S HV: Simplify dynamic
+micro-threading code", 2017-06-22) along with the loop over the
+piggy-backed virtual cores.
+
+This issue was reported by Coverity.
+
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_hv.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
+index 2d46037ce936..597498d6db2e 100644
+--- a/arch/powerpc/kvm/book3s_hv.c
++++ b/arch/powerpc/kvm/book3s_hv.c
+@@ -2831,7 +2831,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
+ */
+ if (!thr0_done)
+ kvmppc_start_thread(NULL, pvc);
+- thr += pvc->num_threads;
+ }
+
+ /*
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Remove-vcpu-arch.dec-usage.patch b/patches.arch/KVM-PPC-Book3S-HV-Remove-vcpu-arch.dec-usage.patch
new file mode 100644
index 0000000000..3015bcfd96
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Remove-vcpu-arch.dec-usage.patch
@@ -0,0 +1,36 @@
+From 81ceca05a42e24768c73d4d6dbd3701a60f1ed85 Mon Sep 17 00:00:00 2001
+From: Alexander Graf <agraf@suse.de>
+Date: Tue, 19 Dec 2017 15:56:24 +0100
+Subject: [PATCH] KVM: PPC: Book3S HV: Remove vcpu->arch.dec usage
+
+References: bsc#1061840
+Patch-mainline: v4.16-rc1
+Git-commit: 81ceca05a42e24768c73d4d6dbd3701a60f1ed85
+
+On Book3S in HV mode, we don't use the vcpu->arch.dec field at all.
+Instead, all logic is built around vcpu->arch.dec_expires.
+
+So let's remove the one remaining piece of code that was setting it.
+
+Signed-off-by: Alexander Graf <agraf@suse.de>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+index 2659844784b8..c8ffd69adfec 100644
+--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
++++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+@@ -957,7 +957,6 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
+ mftb r7
+ subf r3,r7,r8
+ mtspr SPRN_DEC,r3
+- std r3,VCPU_DEC(r4)
+
+ ld r5, VCPU_SPRG0(r4)
+ ld r6, VCPU_SPRG1(r4)
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Send-kvmppc_bad_interrupt-NMIs-to-.patch b/patches.arch/KVM-PPC-Book3S-HV-Send-kvmppc_bad_interrupt-NMIs-to-.patch
new file mode 100644
index 0000000000..aa9fe4f242
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Send-kvmppc_bad_interrupt-NMIs-to-.patch
@@ -0,0 +1,85 @@
+From 7c1bd80cc216e7255bfabb94222676b51ab6868e Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Fri, 18 May 2018 03:49:44 +1000
+Subject: [PATCH] KVM: PPC: Book3S HV: Send kvmppc_bad_interrupt NMIs to Linux
+ handlers
+
+References: bsc#1061840
+Patch-mainline: v4.18-rc1
+Git-commit: 7c1bd80cc216e7255bfabb94222676b51ab6868e
+
+It's possible to take a SRESET or MCE in these paths due to a bug
+in the host code or a NMI IPI, etc. A recent bug attempting to load
+a virtual address from real mode gave th complete but cryptic error,
+abridged:
+
+ Oops: Bad interrupt in KVM entry/exit code, sig: 6 [#1]
+ LE SMP NR_CPUS=2048 NUMA PowerNV
+ CPU: 53 PID: 6582 Comm: qemu-system-ppc Not tainted
+ NIP: c0000000000155ac LR: c0000000000c2430 CTR: c000000000015580
+ REGS: c000000fff76dd80 TRAP: 0200 Not tainted
+ MSR: 9000000000201003 <SF,HV,ME,RI,LE> CR: 48082222 XER: 00000000
+ CFAR: 0000000102900ef0 DAR: d00017fffd941a28 DSISR: 00000040 SOFTE: 3
+ NIP [c0000000000155ac] perf_trace_tlbie+0x2c/0x1a0
+ LR [c0000000000c2430] do_tlbies+0x230/0x2f0
+
+Sending the NMIs through the Linux handlers gives a nicer output:
+
+ Severe Machine check interrupt [Not recovered]
+ NIP [c0000000000155ac]: perf_trace_tlbie+0x2c/0x1a0
+ Initiator: CPU
+ Error type: Real address [Load (bad)]
+ Effective address: d00017fffcc01a28
+ opal: Machine check interrupt unrecoverable: MSR(RI=0)
+ opal: Hardware platform error: Unrecoverable Machine Check exception
+ CPU: 0 PID: 6700 Comm: qemu-system-ppc Tainted: G M
+ NIP: c0000000000155ac LR: c0000000000c23c0 CTR: c000000000015580
+ REGS: c000000fff9e9d80 TRAP: 0200 Tainted: G M
+ MSR: 9000000000201001 <SF,HV,ME,LE> CR: 48082222 XER: 00000000
+ CFAR: 000000010cbc1a30 DAR: d00017fffcc01a28 DSISR: 00000040 SOFTE: 3
+ NIP [c0000000000155ac] perf_trace_tlbie+0x2c/0x1a0
+ LR [c0000000000c23c0] do_tlbies+0x1c0/0x280
+
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_hv_builtin.c | 15 ++++++++++++++-
+ 1 file changed, 14 insertions(+), 1 deletion(-)
+
+diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
+index 2b127586be30..d4a3f4da409b 100644
+--- a/arch/powerpc/kvm/book3s_hv_builtin.c
++++ b/arch/powerpc/kvm/book3s_hv_builtin.c
+@@ -18,6 +18,7 @@
+ #include <linux/cma.h>
+ #include <linux/bitops.h>
+
++#include <asm/asm-prototypes.h>
+ #include <asm/cputable.h>
+ #include <asm/kvm_ppc.h>
+ #include <asm/kvm_book3s.h>
+@@ -633,7 +634,19 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
+
+ void kvmppc_bad_interrupt(struct pt_regs *regs)
+ {
+- die("Bad interrupt in KVM entry/exit code", regs, SIGABRT);
++ /*
++ * 100 could happen at any time, 200 can happen due to invalid real
++ * address access for example (or any time due to a hardware problem).
++ */
++ if (TRAP(regs) == 0x100) {
++ get_paca()->in_nmi++;
++ system_reset_exception(regs);
++ get_paca()->in_nmi--;
++ } else if (TRAP(regs) == 0x200) {
++ machine_check_exception(regs);
++ } else {
++ die("Bad interrupt in KVM entry/exit code", regs, SIGABRT);
++ }
+ panic("Bad KVM trap");
+ }
+
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Set-RWMR-on-POWER8-so-PURR-SPURR-c.patch b/patches.arch/KVM-PPC-Book3S-HV-Set-RWMR-on-POWER8-so-PURR-SPURR-c.patch
new file mode 100644
index 0000000000..e4e05d77b6
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Set-RWMR-on-POWER8-so-PURR-SPURR-c.patch
@@ -0,0 +1,158 @@
+From 7aa15842c15f8a32000372ad2b3195029fde6fd4 Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Fri, 20 Apr 2018 19:53:22 +1000
+Subject: [PATCH] KVM: PPC: Book3S HV: Set RWMR on POWER8 so PURR/SPURR count
+ correctly
+
+References: bsc#1061840
+Patch-mainline: v4.18-rc1
+Git-commit: 7aa15842c15f8a32000372ad2b3195029fde6fd4
+
+Although Linux doesn't use PURR and SPURR ((Scaled) Processor
+Utilization of Resources Register), other OSes depend on them.
+On POWER8 they count at a rate depending on whether the VCPU is
+idle or running, the activity of the VCPU, and the value in the
+RWMR (Region-Weighting Mode Register). Hardware expects the
+hypervisor to update the RWMR when a core is dispatched to reflect
+the number of online VCPUs in the vcore.
+
+This adds code to maintain a count in the vcore struct indicating
+how many VCPUs are online. In kvmppc_run_core we use that count
+to set the RWMR register on POWER8. If the core is split because
+of a static or dynamic micro-threading mode, we use the value for
+8 threads. The RWMR value is not relevant when the host is
+executing because Linux does not use the PURR or SPURR register,
+so we don't bother saving and restoring the host value.
+
+For the sake of old userspace which does not set the KVM_REG_PPC_ONLINE
+register, we set online to 1 if it was 0 at the time of a KVM_RUN
+ioctl.
+
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/include/asm/kvm_book3s.h | 1 +
+ arch/powerpc/include/asm/reg.h | 1 +
+ arch/powerpc/kvm/book3s_hv.c | 61 ++++++++++++++++++++++++++++++++++-
+ 3 files changed, 62 insertions(+), 1 deletion(-)
+
+diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
+index e7377b73cfec..c1f3a870c48a 100644
+--- a/arch/powerpc/include/asm/kvm_book3s.h
++++ b/arch/powerpc/include/asm/kvm_book3s.h
+@@ -104,6 +104,7 @@ struct kvmppc_vcore {
+ ulong vtb; /* virtual timebase */
+ ulong conferring_threads;
+ unsigned int halt_poll_ns;
++ atomic_t online_count;
+ };
+
+ struct kvmppc_vcpu_book3s {
+diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
+index cb0f272ce123..44b2be4a65d1 100644
+--- a/arch/powerpc/include/asm/reg.h
++++ b/arch/powerpc/include/asm/reg.h
+@@ -365,6 +365,7 @@
+ #define SPRN_PSSCR 0x357 /* Processor Stop Status and Control Register (ISA 3.0) */
+ #define SPRN_PSSCR_PR 0x337 /* PSSCR ISA 3.0, privileged mode access */
+ #define SPRN_PMCR 0x374 /* Power Management Control Register */
++#define SPRN_RWMR 0x375 /* Region-Weighting Mode Register */
+
+ /* HFSCR and FSCR bit numbers are the same */
+ #define FSCR_SCV_LG 12 /* Enable System Call Vectored */
+diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
+index 04bd71796098..f61dd9efa6fb 100644
+--- a/arch/powerpc/kvm/book3s_hv.c
++++ b/arch/powerpc/kvm/book3s_hv.c
+@@ -123,6 +123,32 @@ static bool no_mixing_hpt_and_radix;
+ static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
+ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
+
++/*
++ * RWMR values for POWER8. These control the rate at which PURR
++ * and SPURR count and should be set according to the number of
++ * online threads in the vcore being run.
++ */
++#define RWMR_RPA_P8_1THREAD 0x164520C62609AECA
++#define RWMR_RPA_P8_2THREAD 0x7FFF2908450D8DA9
++#define RWMR_RPA_P8_3THREAD 0x164520C62609AECA
++#define RWMR_RPA_P8_4THREAD 0x199A421245058DA9
++#define RWMR_RPA_P8_5THREAD 0x164520C62609AECA
++#define RWMR_RPA_P8_6THREAD 0x164520C62609AECA
++#define RWMR_RPA_P8_7THREAD 0x164520C62609AECA
++#define RWMR_RPA_P8_8THREAD 0x164520C62609AECA
++
++static unsigned long p8_rwmr_values[MAX_SMT_THREADS + 1] = {
++ RWMR_RPA_P8_1THREAD,
++ RWMR_RPA_P8_1THREAD,
++ RWMR_RPA_P8_2THREAD,
++ RWMR_RPA_P8_3THREAD,
++ RWMR_RPA_P8_4THREAD,
++ RWMR_RPA_P8_5THREAD,
++ RWMR_RPA_P8_6THREAD,
++ RWMR_RPA_P8_7THREAD,
++ RWMR_RPA_P8_8THREAD,
++};
++
+ static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc,
+ int *ip)
+ {
+@@ -1761,7 +1787,12 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
+ vcpu->arch.vcore->tb_offset;
+ break;
+ case KVM_REG_PPC_ONLINE:
+- vcpu->arch.online = set_reg_val(id, *val);
++ i = set_reg_val(id, *val);
++ if (i && !vcpu->arch.online)
++ atomic_inc(&vcpu->arch.vcore->online_count);
++ else if (!i && vcpu->arch.online)
++ atomic_dec(&vcpu->arch.vcore->online_count);
++ vcpu->arch.online = i;
+ break;
+ default:
+ r = -EINVAL;
+@@ -2856,6 +2887,25 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
+ }
+ }
+
++ /*
++ * On POWER8, set RWMR register.
++ * Since it only affects PURR and SPURR, it doesn't affect
++ * the host, so we don't save/restore the host value.
++ */
++ if (is_power8) {
++ unsigned long rwmr_val = RWMR_RPA_P8_8THREAD;
++ int n_online = atomic_read(&vc->online_count);
++
++ /*
++ * Use the 8-thread value if we're doing split-core
++ * or if the vcore's online count looks bogus.
++ */
++ if (split == 1 && threads_per_subcore == MAX_SMT_THREADS &&
++ n_online >= 1 && n_online <= MAX_SMT_THREADS)
++ rwmr_val = p8_rwmr_values[n_online];
++ mtspr(SPRN_RWMR, rwmr_val);
++ }
++
+ /* Start all the threads */
+ active = 0;
+ for (sub = 0; sub < core_info.n_subcores; ++sub) {
+@@ -3358,6 +3408,15 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
+ }
+ #endif
+
++ /*
++ * Force online to 1 for the sake of old userspace which doesn't
++ * set it.
++ */
++ if (!vcpu->arch.online) {
++ atomic_inc(&vcpu->arch.vcore->online_count);
++ vcpu->arch.online = 1;
++ }
++
+ kvmppc_core_prepare_to_enter(vcpu);
+
+ /* No need to go into the guest when all we'll do is come back out */
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Snapshot-timebase-offset-on-guest-.patch b/patches.arch/KVM-PPC-Book3S-HV-Snapshot-timebase-offset-on-guest-.patch
new file mode 100644
index 0000000000..da1a16c2a8
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Snapshot-timebase-offset-on-guest-.patch
@@ -0,0 +1,256 @@
+From 57b8daa70a179bc23cc4240420ab6fbcdd7faf77 Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Fri, 20 Apr 2018 22:51:11 +1000
+Subject: [PATCH] KVM: PPC: Book3S HV: Snapshot timebase offset on guest entry
+
+References: bsc#1061840, bsc#1086196
+Patch-mainline: v4.17-rc7
+Git-commit: 57b8daa70a179bc23cc4240420ab6fbcdd7faf77
+
+Currently, the HV KVM guest entry/exit code adds the timebase offset
+from the vcore struct to the timebase on guest entry, and subtracts
+it on guest exit. Which is fine, except that it is possible for
+userspace to change the offset using the SET_ONE_REG interface while
+the vcore is running, as there is only one timebase offset per vcore
+but potentially multiple VCPUs in the vcore. If that were to happen,
+KVM would subtract a different offset on guest exit from that which
+it had added on guest entry, leading to the timebase being out of sync
+between cores in the host, which then leads to bad things happening
+such as hangs and spurious watchdog timeouts.
+
+To fix this, we add a new field 'tb_offset_applied' to the vcore struct
+which stores the offset that is currently applied to the timebase.
+This value is set from the vcore tb_offset field on guest entry, and
+is what is subtracted from the timebase on guest exit. Since it is
+zero when the timebase offset is not applied, we can simplify the
+logic in kvmhv_start_timing and kvmhv_accumulate_time.
+
+In addition, we had secondary threads reading the timebase while
+running concurrently with code on the primary thread which would
+eventually add or subtract the timebase offset from the timebase.
+This occurred while saving or restoring the DEC register value on
+the secondary threads. Although no specific incorrect behaviour has
+been observed, this is a race which should be fixed. To fix it, we
+move the DEC saving code to just before we call kvmhv_commence_exit,
+and the DEC restoring code to after the point where we have waited
+for the primary thread to switch the MMU context and add the timebase
+offset. That way we are sure that the timebase contains the guest
+timebase value in both cases.
+
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/include/asm/kvm_book3s.h | 1 +
+ arch/powerpc/kernel/asm-offsets.c | 1 +
+ arch/powerpc/kvm/book3s_hv.c | 1 +
+ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 89 ++++++++++++++++-----------------
+ 4 files changed, 47 insertions(+), 45 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
+index 4c02a7378d06..e7377b73cfec 100644
+--- a/arch/powerpc/include/asm/kvm_book3s.h
++++ b/arch/powerpc/include/asm/kvm_book3s.h
+@@ -96,6 +96,7 @@ struct kvmppc_vcore {
+ struct kvm_vcpu *runner;
+ struct kvm *kvm;
+ u64 tb_offset; /* guest timebase - host timebase */
++ u64 tb_offset_applied; /* timebase offset currently in force */
+ ulong lpcr;
+ u32 arch_compat;
+ ulong pcr;
+diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
+index 6bee65f3cfd3..373dc1d6ef44 100644
+--- a/arch/powerpc/kernel/asm-offsets.c
++++ b/arch/powerpc/kernel/asm-offsets.c
+@@ -562,6 +562,7 @@ int main(void)
+ OFFSET(VCORE_NAPPING_THREADS, kvmppc_vcore, napping_threads);
+ OFFSET(VCORE_KVM, kvmppc_vcore, kvm);
+ OFFSET(VCORE_TB_OFFSET, kvmppc_vcore, tb_offset);
++ OFFSET(VCORE_TB_OFFSET_APPL, kvmppc_vcore, tb_offset_applied);
+ OFFSET(VCORE_LPCR, kvmppc_vcore, lpcr);
+ OFFSET(VCORE_PCR, kvmppc_vcore, pcr);
+ OFFSET(VCORE_DPDES, kvmppc_vcore, dpdes);
+diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
+index 4d07fca5121c..9963f65c212b 100644
+--- a/arch/powerpc/kvm/book3s_hv.c
++++ b/arch/powerpc/kvm/book3s_hv.c
+@@ -2441,6 +2441,7 @@ static void init_vcore_to_run(struct kvmppc_vcore *vc)
+ vc->in_guest = 0;
+ vc->napping_threads = 0;
+ vc->conferring_threads = 0;
++ vc->tb_offset_applied = 0;
+ }
+
+ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
+diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+index bd63fa8a08b5..25c32e421b57 100644
+--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
++++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+@@ -692,6 +692,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+ 22: ld r8,VCORE_TB_OFFSET(r5)
+ cmpdi r8,0
+ beq 37f
++ std r8, VCORE_TB_OFFSET_APPL(r5)
+ mftb r6 /* current host timebase */
+ add r8,r8,r6
+ mtspr SPRN_TBU40,r8 /* update upper 40 bits */
+@@ -940,18 +941,6 @@ FTR_SECTION_ELSE
+ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
+ 8:
+
+- /*
+- * Set the decrementer to the guest decrementer.
+- */
+- ld r8,VCPU_DEC_EXPIRES(r4)
+- /* r8 is a host timebase value here, convert to guest TB */
+- ld r5,HSTATE_KVM_VCORE(r13)
+- ld r6,VCORE_TB_OFFSET(r5)
+- add r8,r8,r6
+- mftb r7
+- subf r3,r7,r8
+- mtspr SPRN_DEC,r3
+-
+ ld r5, VCPU_SPRG0(r4)
+ ld r6, VCPU_SPRG1(r4)
+ ld r7, VCPU_SPRG2(r4)
+@@ -1005,6 +994,18 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
+ mtspr SPRN_LPCR,r8
+ isync
+
++ /*
++ * Set the decrementer to the guest decrementer.
++ */
++ ld r8,VCPU_DEC_EXPIRES(r4)
++ /* r8 is a host timebase value here, convert to guest TB */
++ ld r5,HSTATE_KVM_VCORE(r13)
++ ld r6,VCORE_TB_OFFSET_APPL(r5)
++ add r8,r8,r6
++ mftb r7
++ subf r3,r7,r8
++ mtspr SPRN_DEC,r3
++
+ /* Check if HDEC expires soon */
+ mfspr r3, SPRN_HDEC
+ EXTEND_HDEC(r3)
+@@ -1597,8 +1598,27 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
+
+ guest_bypass:
+ stw r12, STACK_SLOT_TRAP(r1)
+- mr r3, r12
++
++ /* Save DEC */
++ /* Do this before kvmhv_commence_exit so we know TB is guest TB */
++ ld r3, HSTATE_KVM_VCORE(r13)
++ mfspr r5,SPRN_DEC
++ mftb r6
++ /* On P9, if the guest has large decr enabled, don't sign extend */
++BEGIN_FTR_SECTION
++ ld r4, VCORE_LPCR(r3)
++ andis. r4, r4, LPCR_LD@h
++ bne 16f
++END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
++ extsw r5,r5
++16: add r5,r5,r6
++ /* r5 is a guest timebase value here, convert to host TB */
++ ld r4,VCORE_TB_OFFSET_APPL(r3)
++ subf r5,r4,r5
++ std r5,VCPU_DEC_EXPIRES(r9)
++
+ /* Increment exit count, poke other threads to exit */
++ mr r3, r12
+ bl kvmhv_commence_exit
+ nop
+ ld r9, HSTATE_KVM_VCPU(r13)
+@@ -1639,23 +1659,6 @@ guest_bypass:
+ mtspr SPRN_PURR,r3
+ mtspr SPRN_SPURR,r4
+
+- /* Save DEC */
+- ld r3, HSTATE_KVM_VCORE(r13)
+- mfspr r5,SPRN_DEC
+- mftb r6
+- /* On P9, if the guest has large decr enabled, don't sign extend */
+-BEGIN_FTR_SECTION
+- ld r4, VCORE_LPCR(r3)
+- andis. r4, r4, LPCR_LD@h
+- bne 16f
+-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+- extsw r5,r5
+-16: add r5,r5,r6
+- /* r5 is a guest timebase value here, convert to host TB */
+- ld r4,VCORE_TB_OFFSET(r3)
+- subf r5,r4,r5
+- std r5,VCPU_DEC_EXPIRES(r9)
+-
+ BEGIN_FTR_SECTION
+ b 8f
+ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
+@@ -2017,9 +2020,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
+ 27:
+ /* Subtract timebase offset from timebase */
+- ld r8,VCORE_TB_OFFSET(r5)
++ ld r8, VCORE_TB_OFFSET_APPL(r5)
+ cmpdi r8,0
+ beq 17f
++ li r0, 0
++ std r0, VCORE_TB_OFFSET_APPL(r5)
+ mftb r6 /* current guest timebase */
+ subf r8,r8,r6
+ mtspr SPRN_TBU40,r8 /* update upper 40 bits */
+@@ -2700,7 +2705,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+ add r3, r3, r5
+ ld r4, HSTATE_KVM_VCPU(r13)
+ ld r5, HSTATE_KVM_VCORE(r13)
+- ld r6, VCORE_TB_OFFSET(r5)
++ ld r6, VCORE_TB_OFFSET_APPL(r5)
+ subf r3, r6, r3 /* convert to host TB value */
+ std r3, VCPU_DEC_EXPIRES(r4)
+
+@@ -2799,7 +2804,7 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
+ /* Restore guest decrementer */
+ ld r3, VCPU_DEC_EXPIRES(r4)
+ ld r5, HSTATE_KVM_VCORE(r13)
+- ld r6, VCORE_TB_OFFSET(r5)
++ ld r6, VCORE_TB_OFFSET_APPL(r5)
+ add r3, r3, r6 /* convert host TB to guest TB value */
+ mftb r7
+ subf r3, r7, r3
+@@ -3606,12 +3611,9 @@ kvmppc_fix_pmao:
+ */
+ kvmhv_start_timing:
+ ld r5, HSTATE_KVM_VCORE(r13)
+- lbz r6, VCORE_IN_GUEST(r5)
+- cmpwi r6, 0
+- beq 5f /* if in guest, need to */
+- ld r6, VCORE_TB_OFFSET(r5) /* subtract timebase offset */
+-5: mftb r5
+- subf r5, r6, r5
++ ld r6, VCORE_TB_OFFSET_APPL(r5)
++ mftb r5
++ subf r5, r6, r5 /* subtract current timebase offset */
+ std r3, VCPU_CUR_ACTIVITY(r4)
+ std r5, VCPU_ACTIVITY_START(r4)
+ blr
+@@ -3622,15 +3624,12 @@ kvmhv_start_timing:
+ */
+ kvmhv_accumulate_time:
+ ld r5, HSTATE_KVM_VCORE(r13)
+- lbz r8, VCORE_IN_GUEST(r5)
+- cmpwi r8, 0
+- beq 4f /* if in guest, need to */
+- ld r8, VCORE_TB_OFFSET(r5) /* subtract timebase offset */
+-4: ld r5, VCPU_CUR_ACTIVITY(r4)
++ ld r8, VCORE_TB_OFFSET_APPL(r5)
++ ld r5, VCPU_CUR_ACTIVITY(r4)
+ ld r6, VCPU_ACTIVITY_START(r4)
+ std r3, VCPU_CUR_ACTIVITY(r4)
+ mftb r7
+- subf r7, r8, r7
++ subf r7, r8, r7 /* subtract current timebase offset */
+ std r7, VCPU_ACTIVITY_START(r4)
+ cmpdi r5, 0
+ beqlr
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Streamline-setting-of-reference-an.patch b/patches.arch/KVM-PPC-Book3S-HV-Streamline-setting-of-reference-an.patch
new file mode 100644
index 0000000000..3702a79b66
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Streamline-setting-of-reference-an.patch
@@ -0,0 +1,110 @@
+From f7caf712d885713986baeac86b1b64bcbd9dcd91 Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Sat, 24 Feb 2018 20:08:51 +1100
+Subject: [PATCH] KVM: PPC: Book3S HV: Streamline setting of reference and
+ change bits
+
+References: bsc#1061840
+Patch-mainline: v4.17-rc1
+Git-commit: f7caf712d885713986baeac86b1b64bcbd9dcd91
+
+When using the radix MMU, we can get hypervisor page fault interrupts
+with the DSISR_SET_RC bit set in DSISR/HSRR1, indicating that an
+attempt to set the R (reference) or C (change) bit in a PTE atomically
+failed. Previously we would find the corresponding Linux PTE and
+check the permission and dirty bits there, but this is not really
+necessary since we only need to do what the hardware was trying to
+do, namely set R or C atomically. This removes the code that reads
+the Linux PTE and just update the partition-scoped PTE, having first
+checked that it is still present, and if the access is a write, that
+the PTE still has write permission.
+
+Furthermore, we now check whether any other relevant bits are set
+in DSISR, and if there are, then we proceed with the rest of the
+function in order to handle whatever condition they represent,
+instead of returning to the guest as we did previously.
+
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_64_mmu_radix.c | 52 +++++++++++++---------------------
+ 1 file changed, 19 insertions(+), 33 deletions(-)
+
+diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+index ed62164f8474..f783b067e5ac 100644
+--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
++++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+@@ -345,7 +345,7 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned long gpa, gfn, hva, pfn;
+ struct kvm_memory_slot *memslot;
+ struct page *page = NULL, *pages[1];
+- long ret, npages, ok;
++ long ret, npages;
+ unsigned int writing;
+ struct vm_area_struct *vma;
+ unsigned long flags;
+@@ -397,43 +397,29 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ if (dsisr & DSISR_SET_RC) {
+ /*
+ * Need to set an R or C bit in the 2nd-level tables;
+- * if the relevant bits aren't already set in the linux
+- * page tables, fall through to do the gup_fast to
+- * set them in the linux page tables too.
++ * since we are just helping out the hardware here,
++ * it is sufficient to do what the hardware does.
+ */
+- ok = 0;
+ pgflags = _PAGE_ACCESSED;
+ if (writing)
+ pgflags |= _PAGE_DIRTY;
+- local_irq_save(flags);
+- ptep = find_current_mm_pte(current->mm->pgd, hva, NULL, NULL);
+- if (ptep) {
+- pte = READ_ONCE(*ptep);
+- if (pte_present(pte) &&
+- (pte_val(pte) & pgflags) == pgflags)
+- ok = 1;
+- }
+- local_irq_restore(flags);
+- if (ok) {
+- spin_lock(&kvm->mmu_lock);
+- if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) {
+- spin_unlock(&kvm->mmu_lock);
+- return RESUME_GUEST;
+- }
+- /*
+- * We are walking the secondary page table here. We can do this
+- * without disabling irq.
+- */
+- ptep = __find_linux_pte(kvm->arch.pgtable,
+- gpa, NULL, &shift);
+- if (ptep && pte_present(*ptep)) {
+- kvmppc_radix_update_pte(kvm, ptep, 0, pgflags,
+- gpa, shift);
+- spin_unlock(&kvm->mmu_lock);
+- return RESUME_GUEST;
+- }
+- spin_unlock(&kvm->mmu_lock);
++ /*
++ * We are walking the secondary page table here. We can do this
++ * without disabling irq.
++ */
++ spin_lock(&kvm->mmu_lock);
++ ptep = __find_linux_pte(kvm->arch.pgtable,
++ gpa, NULL, &shift);
++ if (ptep && pte_present(*ptep) &&
++ (!writing || pte_write(*ptep))) {
++ kvmppc_radix_update_pte(kvm, ptep, 0, pgflags,
++ gpa, shift);
++ dsisr &= ~DSISR_SET_RC;
+ }
++ spin_unlock(&kvm->mmu_lock);
++ if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE |
++ DSISR_PROTFAULT | DSISR_SET_RC)))
++ return RESUME_GUEST;
+ }
+
+ ret = -EFAULT;
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Use-__gfn_to_pfn_memslot-in-page-f.patch b/patches.arch/KVM-PPC-Book3S-HV-Use-__gfn_to_pfn_memslot-in-page-f.patch
new file mode 100644
index 0000000000..bdd21d9127
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Use-__gfn_to_pfn_memslot-in-page-f.patch
@@ -0,0 +1,228 @@
+From 31c8b0d0694a1f7e3b46df0d1341a874ecb5e0de Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Thu, 1 Mar 2018 15:14:02 +1100
+Subject: [PATCH] KVM: PPC: Book3S HV: Use __gfn_to_pfn_memslot() in page fault
+ handler
+
+References: bsc#1061840
+Patch-mainline: v4.17-rc1
+Git-commit: 31c8b0d0694a1f7e3b46df0d1341a874ecb5e0de
+
+This changes the hypervisor page fault handler for radix guests to use
+the generic KVM __gfn_to_pfn_memslot() function instead of using
+get_user_pages_fast() and then handling the case of VM_PFNMAP vmas
+specially. The old code missed the case of VM_IO vmas; with this
+change, VM_IO vmas will now be handled correctly by code within
+__gfn_to_pfn_memslot.
+
+Currently, __gfn_to_pfn_memslot calls hva_to_pfn, which only uses
+__get_user_pages_fast for the initial lookup in the cases where
+either atomic or async is set. Since we are not setting either
+atomic or async, we do our own __get_user_pages_fast first, for now.
+
+This also adds code to check for the KVM_MEM_READONLY flag on the
+memslot. If it is set and this is a write access, we synthesize a
+data storage interrupt for the guest.
+
+In the case where the page is not normal RAM (i.e. page == NULL in
+kvmppc_book3s_radix_page_fault(), we read the PTE from the Linux page
+tables because we need the mapping attribute bits as well as the PFN.
+(The mapping attribute bits indicate whether accesses have to be
+non-cacheable and/or guarded.)
+
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_64_mmu_radix.c | 148 ++++++++++++++++++++-------------
+ 1 file changed, 88 insertions(+), 60 deletions(-)
+
+diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+index 05acc67e0eb2..0590f1667607 100644
+--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
++++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+@@ -392,11 +392,11 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned long mmu_seq, pte_size;
+ unsigned long gpa, gfn, hva, pfn;
+ struct kvm_memory_slot *memslot;
+- struct page *page = NULL, *pages[1];
+- long ret, npages;
+- unsigned int writing;
+- struct vm_area_struct *vma;
+- unsigned long flags;
++ struct page *page = NULL;
++ long ret;
++ bool writing;
++ bool upgrade_write = false;
++ bool *upgrade_p = &upgrade_write;
+ pte_t pte, *ptep;
+ unsigned long pgflags;
+ unsigned int shift, level;
+@@ -436,12 +436,17 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ dsisr & DSISR_ISSTORE);
+ }
+
+- /* used to check for invalidations in progress */
+- mmu_seq = kvm->mmu_notifier_seq;
+- smp_rmb();
+-
+ writing = (dsisr & DSISR_ISSTORE) != 0;
+- hva = gfn_to_hva_memslot(memslot, gfn);
++ if (memslot->flags & KVM_MEM_READONLY) {
++ if (writing) {
++ /* give the guest a DSI */
++ dsisr = DSISR_ISSTORE | DSISR_PROTFAULT;
++ kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
++ return RESUME_GUEST;
++ }
++ upgrade_p = NULL;
++ }
++
+ if (dsisr & DSISR_SET_RC) {
+ /*
+ * Need to set an R or C bit in the 2nd-level tables;
+@@ -470,69 +475,92 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ return RESUME_GUEST;
+ }
+
+- ret = -EFAULT;
+- pfn = 0;
+- pte_size = PAGE_SIZE;
+- pgflags = _PAGE_READ | _PAGE_EXEC;
+- level = 0;
+- npages = get_user_pages_fast(hva, 1, writing, pages);
+- if (npages < 1) {
+- /* Check if it's an I/O mapping */
+- down_read(&current->mm->mmap_sem);
+- vma = find_vma(current->mm, hva);
+- if (vma && vma->vm_start <= hva && hva < vma->vm_end &&
+- (vma->vm_flags & VM_PFNMAP)) {
+- pfn = vma->vm_pgoff +
+- ((hva - vma->vm_start) >> PAGE_SHIFT);
+- pgflags = pgprot_val(vma->vm_page_prot);
+- }
+- up_read(&current->mm->mmap_sem);
+- if (!pfn)
+- return -EFAULT;
+- } else {
+- page = pages[0];
++ /* used to check for invalidations in progress */
++ mmu_seq = kvm->mmu_notifier_seq;
++ smp_rmb();
++
++ /*
++ * Do a fast check first, since __gfn_to_pfn_memslot doesn't
++ * do it with !atomic && !async, which is how we call it.
++ * We always ask for write permission since the common case
++ * is that the page is writable.
++ */
++ hva = gfn_to_hva_memslot(memslot, gfn);
++ if (upgrade_p && __get_user_pages_fast(hva, 1, 1, &page) == 1) {
+ pfn = page_to_pfn(page);
+- if (PageCompound(page)) {
+- pte_size <<= compound_order(compound_head(page));
+- /* See if we can insert a 1GB or 2MB large PTE here */
+- if (pte_size >= PUD_SIZE &&
+- (gpa & (PUD_SIZE - PAGE_SIZE)) ==
+- (hva & (PUD_SIZE - PAGE_SIZE))) {
+- level = 2;
+- pfn &= ~((PUD_SIZE >> PAGE_SHIFT) - 1);
+- } else if (pte_size >= PMD_SIZE &&
+- (gpa & (PMD_SIZE - PAGE_SIZE)) ==
+- (hva & (PMD_SIZE - PAGE_SIZE))) {
+- level = 1;
+- pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1);
+- }
++ upgrade_write = true;
++ } else {
++ /* Call KVM generic code to do the slow-path check */
++ pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
++ writing, upgrade_p);
++ if (is_error_noslot_pfn(pfn))
++ return -EFAULT;
++ page = NULL;
++ if (pfn_valid(pfn)) {
++ page = pfn_to_page(pfn);
++ if (PageReserved(page))
++ page = NULL;
+ }
+- /* See if we can provide write access */
+- if (writing) {
+- pgflags |= _PAGE_WRITE;
+- } else {
+- local_irq_save(flags);
+- ptep = find_current_mm_pte(current->mm->pgd,
+- hva, NULL, NULL);
+- if (ptep && pte_write(*ptep))
+- pgflags |= _PAGE_WRITE;
+- local_irq_restore(flags);
++ }
++
++ /* See if we can insert a 1GB or 2MB large PTE here */
++ level = 0;
++ if (page && PageCompound(page)) {
++ pte_size = PAGE_SIZE << compound_order(compound_head(page));
++ if (pte_size >= PUD_SIZE &&
++ (gpa & (PUD_SIZE - PAGE_SIZE)) ==
++ (hva & (PUD_SIZE - PAGE_SIZE))) {
++ level = 2;
++ pfn &= ~((PUD_SIZE >> PAGE_SHIFT) - 1);
++ } else if (pte_size >= PMD_SIZE &&
++ (gpa & (PMD_SIZE - PAGE_SIZE)) ==
++ (hva & (PMD_SIZE - PAGE_SIZE))) {
++ level = 1;
++ pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1);
+ }
+ }
+
+ /*
+ * Compute the PTE value that we need to insert.
+ */
+- pgflags |= _PAGE_PRESENT | _PAGE_PTE | _PAGE_ACCESSED;
+- if (pgflags & _PAGE_WRITE)
+- pgflags |= _PAGE_DIRTY;
+- pte = pfn_pte(pfn, __pgprot(pgflags));
++ if (page) {
++ pgflags = _PAGE_READ | _PAGE_EXEC | _PAGE_PRESENT | _PAGE_PTE |
++ _PAGE_ACCESSED;
++ if (writing || upgrade_write)
++ pgflags |= _PAGE_WRITE | _PAGE_DIRTY;
++ pte = pfn_pte(pfn, __pgprot(pgflags));
++ } else {
++ /*
++ * Read the PTE from the process' radix tree and use that
++ * so we get the attribute bits.
++ */
++ local_irq_disable();
++ ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
++ pte = *ptep;
++ local_irq_enable();
++ if (shift == PUD_SHIFT &&
++ (gpa & (PUD_SIZE - PAGE_SIZE)) ==
++ (hva & (PUD_SIZE - PAGE_SIZE))) {
++ level = 2;
++ } else if (shift == PMD_SHIFT &&
++ (gpa & (PMD_SIZE - PAGE_SIZE)) ==
++ (hva & (PMD_SIZE - PAGE_SIZE))) {
++ level = 1;
++ } else if (shift && shift != PAGE_SHIFT) {
++ /* Adjust PFN */
++ unsigned long mask = (1ul << shift) - PAGE_SIZE;
++ pte = __pte(pte_val(pte) | (hva & mask));
++ }
++ if (!(writing || upgrade_write))
++ pte = __pte(pte_val(pte) & ~ _PAGE_WRITE);
++ pte = __pte(pte_val(pte) | _PAGE_EXEC);
++ }
+
+ /* Allocate space in the tree and write the PTE */
+ ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
+
+ if (page) {
+- if (!ret && (pgflags & _PAGE_WRITE))
++ if (!ret && (pte_val(pte) & _PAGE_WRITE))
+ set_page_dirty_lock(page);
+ put_page(page);
+ }
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Use-a-helper-to-unmap-ptes-in-the-.patch b/patches.arch/KVM-PPC-Book3S-HV-Use-a-helper-to-unmap-ptes-in-the-.patch
new file mode 100644
index 0000000000..4aee673067
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Use-a-helper-to-unmap-ptes-in-the-.patch
@@ -0,0 +1,105 @@
+From a5fad1e959529eda20f38d1e02be65ab629de899 Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Thu, 17 May 2018 17:06:26 +1000
+Subject: [PATCH] KVM: PPC: Book3S HV: Use a helper to unmap ptes in the radix
+ fault path
+
+References: bsc#1061840
+Patch-mainline: v4.18-rc1
+Git-commit: a5fad1e959529eda20f38d1e02be65ab629de899
+
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_64_mmu_radix.c | 46 +++++++++++++++++-----------------
+ 1 file changed, 23 insertions(+), 23 deletions(-)
+
+diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+index 481da8f93fa4..2c49b31ec7fb 100644
+--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
++++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+@@ -228,6 +228,25 @@ static void kvmppc_pmd_free(pmd_t *pmdp)
+ kmem_cache_free(kvm_pmd_cache, pmdp);
+ }
+
++static void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte,
++ unsigned long gpa, unsigned int shift)
++
++{
++ unsigned long page_size = 1ul << shift;
++ unsigned long old;
++
++ old = kvmppc_radix_update_pte(kvm, pte, ~0UL, 0, gpa, shift);
++ kvmppc_radix_tlbie_page(kvm, gpa, shift);
++ if (old & _PAGE_DIRTY) {
++ unsigned long gfn = gpa >> PAGE_SHIFT;
++ struct kvm_memory_slot *memslot;
++
++ memslot = gfn_to_memslot(kvm, gfn);
++ if (memslot && memslot->dirty_bitmap)
++ kvmppc_update_dirty_map(memslot, gfn, page_size);
++ }
++}
++
+ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
+ unsigned int level, unsigned long mmu_seq)
+ {
+@@ -235,7 +254,6 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
+ pud_t *pud, *new_pud = NULL;
+ pmd_t *pmd, *new_pmd = NULL;
+ pte_t *ptep, *new_ptep = NULL;
+- unsigned long old;
+ int ret;
+
+ /* Traverse the guest's 2nd-level tree, allocate new levels needed */
+@@ -287,17 +305,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
+ goto out_unlock;
+ }
+ /* Valid 1GB page here already, remove it */
+- old = kvmppc_radix_update_pte(kvm, (pte_t *)pud,
+- ~0UL, 0, hgpa, PUD_SHIFT);
+- kvmppc_radix_tlbie_page(kvm, hgpa, PUD_SHIFT);
+- if (old & _PAGE_DIRTY) {
+- unsigned long gfn = hgpa >> PAGE_SHIFT;
+- struct kvm_memory_slot *memslot;
+- memslot = gfn_to_memslot(kvm, gfn);
+- if (memslot && memslot->dirty_bitmap)
+- kvmppc_update_dirty_map(memslot,
+- gfn, PUD_SIZE);
+- }
++ kvmppc_unmap_pte(kvm, (pte_t *)pud, hgpa, PUD_SHIFT);
+ }
+ if (level == 2) {
+ if (!pud_none(*pud)) {
+@@ -338,17 +346,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
+ goto out_unlock;
+ }
+ /* Valid 2MB page here already, remove it */
+- old = kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd),
+- ~0UL, 0, lgpa, PMD_SHIFT);
+- kvmppc_radix_tlbie_page(kvm, lgpa, PMD_SHIFT);
+- if (old & _PAGE_DIRTY) {
+- unsigned long gfn = lgpa >> PAGE_SHIFT;
+- struct kvm_memory_slot *memslot;
+- memslot = gfn_to_memslot(kvm, gfn);
+- if (memslot && memslot->dirty_bitmap)
+- kvmppc_update_dirty_map(memslot,
+- gfn, PMD_SIZE);
+- }
++ kvmppc_unmap_pte(kvm, pmdp_ptep(pmd), lgpa, PMD_SHIFT);
+ }
+ if (level == 1) {
+ if (!pmd_none(*pmd)) {
+@@ -373,6 +371,8 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
+ }
+ ptep = pte_offset_kernel(pmd, gpa);
+ if (pte_present(*ptep)) {
++ unsigned long old;
++
+ /* Check if someone else set the same thing */
+ if (pte_raw(*ptep) == pte_raw(pte)) {
+ ret = 0;
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-XIVE-Resend-re-routed-interrupts-o.patch b/patches.arch/KVM-PPC-Book3S-HV-XIVE-Resend-re-routed-interrupts-o.patch
new file mode 100644
index 0000000000..cefdc88b69
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-XIVE-Resend-re-routed-interrupts-o.patch
@@ -0,0 +1,181 @@
+From 9dc81d6b0f1e3c40bdf97671dd26a24f128e1182 Mon Sep 17 00:00:00 2001
+From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Date: Thu, 10 May 2018 13:06:42 +1000
+Subject: [PATCH] KVM: PPC: Book3S HV: XIVE: Resend re-routed interrupts on CPU
+ priority change
+
+References: bsc#1061840
+Patch-mainline: v4.17-rc7
+Git-commit: 9dc81d6b0f1e3c40bdf97671dd26a24f128e1182
+
+When a vcpu priority (CPPR) is set to a lower value (masking more
+interrupts), we stop processing interrupts already in the queue
+for the priorities that have now been masked.
+
+If those interrupts were previously re-routed to a different
+CPU, they might still be stuck until the older one that has
+them in its queue processes them. In the case of guest CPU
+unplug, that can be never.
+
+To address that without creating additional overhead for
+the normal interrupt processing path, this changes H_CPPR
+handling so that when such a priority change occurs, we
+scan the interrupt queue for that vCPU, and for any
+interrupt in there that has been re-routed, we replace it
+with a dummy and force a re-trigger.
+
+Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Tested-by: Alexey Kardashevskiy <aik@ozlabs.ru>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_xive_template.c | 108 +++++++++++++++++++++++++++++---
+ 1 file changed, 101 insertions(+), 7 deletions(-)
+
+diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c
+index c7a5deadd1cc..99c3620b40d9 100644
+--- a/arch/powerpc/kvm/book3s_xive_template.c
++++ b/arch/powerpc/kvm/book3s_xive_template.c
+@@ -11,6 +11,9 @@
+ #define XGLUE(a,b) a##b
+ #define GLUE(a,b) XGLUE(a,b)
+
++/* Dummy interrupt used when taking interrupts out of a queue in H_CPPR */
++#define XICS_DUMMY 1
++
+ static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc)
+ {
+ u8 cppr;
+@@ -205,6 +208,10 @@ static u32 GLUE(X_PFX,scan_interrupts)(struct kvmppc_xive_vcpu *xc,
+ goto skip_ipi;
+ }
+
++ /* If it's the dummy interrupt, continue searching */
++ if (hirq == XICS_DUMMY)
++ goto skip_ipi;
++
+ /* If fetching, update queue pointers */
+ if (scan_type == scan_fetch) {
+ q->idx = idx;
+@@ -385,9 +392,76 @@ static void GLUE(X_PFX,push_pending_to_hw)(struct kvmppc_xive_vcpu *xc)
+ __x_writeb(prio, __x_tima + TM_SPC_SET_OS_PENDING);
+ }
+
++static void GLUE(X_PFX,scan_for_rerouted_irqs)(struct kvmppc_xive *xive,
++ struct kvmppc_xive_vcpu *xc)
++{
++ unsigned int prio;
++
++ /* For each priority that is now masked */
++ for (prio = xc->cppr; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
++ struct xive_q *q = &xc->queues[prio];
++ struct kvmppc_xive_irq_state *state;
++ struct kvmppc_xive_src_block *sb;
++ u32 idx, toggle, entry, irq, hw_num;
++ struct xive_irq_data *xd;
++ __be32 *qpage;
++ u16 src;
++
++ idx = q->idx;
++ toggle = q->toggle;
++ qpage = READ_ONCE(q->qpage);
++ if (!qpage)
++ continue;
++
++ /* For each interrupt in the queue */
++ for (;;) {
++ entry = be32_to_cpup(qpage + idx);
++
++ /* No more ? */
++ if ((entry >> 31) == toggle)
++ break;
++ irq = entry & 0x7fffffff;
++
++ /* Skip dummies and IPIs */
++ if (irq == XICS_DUMMY || irq == XICS_IPI)
++ goto next;
++ sb = kvmppc_xive_find_source(xive, irq, &src);
++ if (!sb)
++ goto next;
++ state = &sb->irq_state[src];
++
++ /* Has it been rerouted ? */
++ if (xc->server_num == state->act_server)
++ goto next;
++
++ /*
++ * Allright, it *has* been re-routed, kill it from
++ * the queue.
++ */
++ qpage[idx] = cpu_to_be32((entry & 0x80000000) | XICS_DUMMY);
++
++ /* Find the HW interrupt */
++ kvmppc_xive_select_irq(state, &hw_num, &xd);
++
++ /* If it's not an LSI, set PQ to 11 the EOI will force a resend */
++ if (!(xd->flags & XIVE_IRQ_FLAG_LSI))
++ GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_11);
++
++ /* EOI the source */
++ GLUE(X_PFX,source_eoi)(hw_num, xd);
++
++ next:
++ idx = (idx + 1) & q->msk;
++ if (idx == 0)
++ toggle ^= 1;
++ }
++ }
++}
++
+ X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
+ {
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
++ struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
+ u8 old_cppr;
+
+ pr_devel("H_CPPR(cppr=%ld)\n", cppr);
+@@ -407,14 +481,34 @@ X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
+ */
+ smp_mb();
+
+- /*
+- * We are masking less, we need to look for pending things
+- * to deliver and set VP pending bits accordingly to trigger
+- * a new interrupt otherwise we might miss MFRR changes for
+- * which we have optimized out sending an IPI signal.
+- */
+- if (cppr > old_cppr)
++ if (cppr > old_cppr) {
++ /*
++ * We are masking less, we need to look for pending things
++ * to deliver and set VP pending bits accordingly to trigger
++ * a new interrupt otherwise we might miss MFRR changes for
++ * which we have optimized out sending an IPI signal.
++ */
+ GLUE(X_PFX,push_pending_to_hw)(xc);
++ } else {
++ /*
++ * We are masking more, we need to check the queue for any
++ * interrupt that has been routed to another CPU, take
++ * it out (replace it with the dummy) and retrigger it.
++ *
++ * This is necessary since those interrupts may otherwise
++ * never be processed, at least not until this CPU restores
++ * its CPPR.
++ *
++ * This is in theory racy vs. HW adding new interrupts to
++ * the queue. In practice this works because the interesting
++ * cases are when the guest has done a set_xive() to move the
++ * interrupt away, which flushes the xive, followed by the
++ * target CPU doing a H_CPPR. So any new interrupt coming into
++ * the queue must still be routed to us and isn't a source
++ * of concern.
++ */
++ GLUE(X_PFX,scan_for_rerouted_irqs)(xive, xc);
++ }
+
+ /* Apply new CPPR */
+ xc->hw_cppr = cppr;
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-radix-Do-not-clear-partition-PTE-w.patch b/patches.arch/KVM-PPC-Book3S-HV-radix-Do-not-clear-partition-PTE-w.patch
new file mode 100644
index 0000000000..ced5bf0cb0
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-radix-Do-not-clear-partition-PTE-w.patch
@@ -0,0 +1,151 @@
+From b9719579658ac239b99ae2113e110945d02cfe02 Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Thu, 17 May 2018 17:06:31 +1000
+Subject: [PATCH] KVM: PPC: Book3S HV: radix: Do not clear partition PTE when
+ RC or write bits do not match
+
+References: bsc#1061840
+Patch-mainline: v4.18-rc1
+Git-commit: 878cf2bb2d8d6164df7b63b2239859f99fea212a
+
+Adding the write bit and RC bits to pte permissions does not require a
+pte clear and flush. There should not be other bits changed here,
+because restricting access or changing the PFN must have already
+invalidated any existing ptes (otherwise the race is already lost).
+
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_64_mmu_radix.c | 68 +++++++++++++++++++++++-----------
+ 1 file changed, 47 insertions(+), 21 deletions(-)
+
+diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+index 730127704cdb..8ed0bace7f27 100644
+--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
++++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+@@ -176,7 +176,7 @@ static void kvmppc_radix_flush_pwc(struct kvm *kvm)
+ asm volatile("eieio ; tlbsync ; ptesync": : :"memory");
+ }
+
+-unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep,
++static unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep,
+ unsigned long clr, unsigned long set,
+ unsigned long addr, unsigned int shift)
+ {
+@@ -380,6 +380,15 @@ static void kvmppc_unmap_free_pud_entry_table(struct kvm *kvm, pud_t *pud,
+ kvmppc_unmap_free_pmd(kvm, pmd, false);
+ }
+
++/*
++ * There are a number of bits which may differ between different faults to
++ * the same partition scope entry. RC bits, in the course of cleaning and
++ * aging. And the write bit can change, either the access could have been
++ * upgraded, or a read fault could happen concurrently with a write fault
++ * that sets those bits first.
++ */
++#define PTE_BITS_MUST_MATCH (~(_PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED))
++
+ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
+ unsigned int level, unsigned long mmu_seq)
+ {
+@@ -424,19 +433,28 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
+ if (pud_huge(*pud)) {
+ unsigned long hgpa = gpa & PUD_MASK;
+
++ /* Check if we raced and someone else has set the same thing */
++ if (level == 2) {
++ if (pud_raw(*pud) == pte_raw(pte)) {
++ ret = 0;
++ goto out_unlock;
++ }
++ /* Valid 1GB page here already, add our extra bits */
++ WARN_ON_ONCE((pud_val(*pud) ^ pte_val(pte)) &
++ PTE_BITS_MUST_MATCH);
++ kvmppc_radix_update_pte(kvm, (pte_t *)pud,
++ 0, pte_val(pte), hgpa, PUD_SHIFT);
++ ret = 0;
++ goto out_unlock;
++ }
+ /*
+ * If we raced with another CPU which has just put
+ * a 1GB pte in after we saw a pmd page, try again.
+ */
+- if (level <= 1 && !new_pmd) {
++ if (!new_pmd) {
+ ret = -EAGAIN;
+ goto out_unlock;
+ }
+- /* Check if we raced and someone else has set the same thing */
+- if (level == 2 && pud_raw(*pud) == pte_raw(pte)) {
+- ret = 0;
+- goto out_unlock;
+- }
+ /* Valid 1GB page here already, remove it */
+ kvmppc_unmap_pte(kvm, (pte_t *)pud, hgpa, PUD_SHIFT);
+ }
+@@ -463,19 +481,29 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
+ if (pmd_is_leaf(*pmd)) {
+ unsigned long lgpa = gpa & PMD_MASK;
+
++ /* Check if we raced and someone else has set the same thing */
++ if (level == 1) {
++ if (pmd_raw(*pmd) == pte_raw(pte)) {
++ ret = 0;
++ goto out_unlock;
++ }
++ /* Valid 2MB page here already, add our extra bits */
++ WARN_ON_ONCE((pmd_val(*pmd) ^ pte_val(pte)) &
++ PTE_BITS_MUST_MATCH);
++ kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd),
++ 0, pte_val(pte), lgpa, PMD_SHIFT);
++ ret = 0;
++ goto out_unlock;
++ }
++
+ /*
+ * If we raced with another CPU which has just put
+ * a 2MB pte in after we saw a pte page, try again.
+ */
+- if (level == 0 && !new_ptep) {
++ if (!new_ptep) {
+ ret = -EAGAIN;
+ goto out_unlock;
+ }
+- /* Check if we raced and someone else has set the same thing */
+- if (level == 1 && pmd_raw(*pmd) == pte_raw(pte)) {
+- ret = 0;
+- goto out_unlock;
+- }
+ /* Valid 2MB page here already, remove it */
+ kvmppc_unmap_pte(kvm, pmdp_ptep(pmd), lgpa, PMD_SHIFT);
+ }
+@@ -500,19 +528,17 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
+ }
+ ptep = pte_offset_kernel(pmd, gpa);
+ if (pte_present(*ptep)) {
+- unsigned long old;
+-
+ /* Check if someone else set the same thing */
+ if (pte_raw(*ptep) == pte_raw(pte)) {
+ ret = 0;
+ goto out_unlock;
+ }
+- /* PTE was previously valid, so invalidate it */
+- old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT,
+- 0, gpa, 0);
+- kvmppc_radix_tlbie_page(kvm, gpa, 0);
+- if (old & _PAGE_DIRTY)
+- mark_page_dirty(kvm, gpa >> PAGE_SHIFT);
++ /* Valid page here already, add our extra bits */
++ WARN_ON_ONCE((pte_val(*ptep) ^ pte_val(pte)) &
++ PTE_BITS_MUST_MATCH);
++ kvmppc_radix_update_pte(kvm, ptep, 0, pte_val(pte), gpa, 0);
++ ret = 0;
++ goto out_unlock;
+ }
+ kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte);
+ ret = 0;
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-HV-radix-Refine-IO-region-partition-s.patch b/patches.arch/KVM-PPC-Book3S-HV-radix-Refine-IO-region-partition-s.patch
new file mode 100644
index 0000000000..326eeb6499
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-radix-Refine-IO-region-partition-s.patch
@@ -0,0 +1,48 @@
+From bc64dd0e1c4eddbec75dd5aa86b60c2a834aaef3 Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Thu, 17 May 2018 17:06:30 +1000
+Subject: [PATCH] KVM: PPC: Book3S HV: radix: Refine IO region partition scope
+ attributes
+
+References: bsc#1061840
+Patch-mainline: v4.18-rc1
+Git-commit: bc64dd0e1c4eddbec75dd5aa86b60c2a834aaef3
+
+When the radix fault handler has no page from the process address
+space (e.g., for IO memory), it looks up the process pte and sets
+partition table pte using that to get attributes like CI and guarded.
+If the process table entry is to be writable, set _PAGE_DIRTY as well
+to avoid an RC update. If not, then ensure _PAGE_DIRTY does not come
+across. Set _PAGE_ACCESSED as well to avoid RC update.
+
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_64_mmu_radix.c | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+index e55db915af49..b0ba3628adc2 100644
+--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
++++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+@@ -674,9 +674,13 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned long mask = (1ul << shift) - PAGE_SIZE;
+ pte = __pte(pte_val(pte) | (hva & mask));
+ }
+- if (!(writing || upgrade_write))
+- pte = __pte(pte_val(pte) & ~ _PAGE_WRITE);
+- pte = __pte(pte_val(pte) | _PAGE_EXEC);
++ pte = __pte(pte_val(pte) | _PAGE_EXEC | _PAGE_ACCESSED);
++ if (writing || upgrade_write) {
++ if (pte_val(pte) & _PAGE_WRITE)
++ pte = __pte(pte_val(pte) | _PAGE_DIRTY);
++ } else {
++ pte = __pte(pte_val(pte) & ~(_PAGE_WRITE | _PAGE_DIRTY));
++ }
+ }
+
+ /* Allocate space in the tree and write the PTE */
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-PR-Add-guest-MSR-parameter-for-kvmppc.patch b/patches.arch/KVM-PPC-Book3S-PR-Add-guest-MSR-parameter-for-kvmppc.patch
new file mode 100644
index 0000000000..96b86534b2
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-PR-Add-guest-MSR-parameter-for-kvmppc.patch
@@ -0,0 +1,321 @@
+From 6f597c6b63b6f3675914b5ec8fcd008a58678650 Mon Sep 17 00:00:00 2001
+From: Simon Guo <wei.guo.simon@gmail.com>
+Date: Wed, 23 May 2018 15:01:48 +0800
+Subject: [PATCH] KVM: PPC: Book3S PR: Add guest MSR parameter for
+ kvmppc_save_tm()/kvmppc_restore_tm()
+
+References: bsc#1061840
+Patch-mainline: v4.18-rc1
+Git-commit: 6f597c6b63b6f3675914b5ec8fcd008a58678650
+
+HV KVM and PR KVM need different MSR source to indicate whether
+treclaim. or trecheckpoint. is necessary.
+
+This patch add new parameter (guest MSR) for these kvmppc_save_tm/
+kvmppc_restore_tm() APIs:
+- For HV KVM, it is VCPU_MSR
+- For PR KVM, it is current host MSR or VCPU_SHADOW_SRR1
+
+This enhancement enables these 2 APIs to be reused by PR KVM later.
+And the patch keeps HV KVM logic unchanged.
+
+This patch also reworks kvmppc_save_tm()/kvmppc_restore_tm() to
+have a clean ABI: r3 for vcpu and r4 for guest_msr.
+
+During kvmppc_save_tm/kvmppc_restore_tm(), the R1 need to be saved
+or restored. Currently the R1 is saved into HSTATE_HOST_R1. In PR
+KVM, we are going to add a C function wrapper for
+kvmppc_save_tm/kvmppc_restore_tm() where the R1 will be incremented
+with added stackframe and save into HSTATE_HOST_R1. There are several
+places in HV KVM to load HSTATE_HOST_R1 as R1, and we don't want to
+bring risk or confusion by TM code.
+
+This patch will use HSTATE_SCRATCH2 to save/restore R1 in
+kvmppc_save_tm/kvmppc_restore_tm() to avoid future confusion, since
+the r1 is actually a temporary/scratch value to be saved/stored.
+
+[paulus@ozlabs.org - rebased on top of 7b0e827c6970 ("KVM: PPC: Book3S HV:
+ Factor fake-suspend handling out of kvmppc_save/restore_tm", 2018-05-30)]
+
+Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 33 +++++++++------
+ arch/powerpc/kvm/tm.S | 71 +++++++++++++++++----------------
+ 2 files changed, 57 insertions(+), 47 deletions(-)
+
+diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+index 8e016598692e..75e3bbf8c957 100644
+--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
++++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+@@ -793,7 +793,10 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
+ /*
+ * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
+ */
++ mr r3, r4
++ ld r4, VCPU_MSR(r3)
+ bl kvmppc_restore_tm_hv
++ ld r4, HSTATE_KVM_VCPU(r13)
+ 91:
+ #endif
+
+@@ -1777,7 +1780,10 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
+ /*
+ * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
+ */
++ mr r3, r9
++ ld r4, VCPU_MSR(r3)
+ bl kvmppc_save_tm_hv
++ ld r9, HSTATE_KVM_VCPU(r13)
+ 91:
+ #endif
+
+@@ -2680,7 +2686,8 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
+ /*
+ * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
+ */
+- ld r9, HSTATE_KVM_VCPU(r13)
++ ld r3, HSTATE_KVM_VCPU(r13)
++ ld r4, VCPU_MSR(r3)
+ bl kvmppc_save_tm_hv
+ 91:
+ #endif
+@@ -2799,7 +2806,10 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
+ /*
+ * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
+ */
++ mr r3, r4
++ ld r4, VCPU_MSR(r3)
+ bl kvmppc_restore_tm_hv
++ ld r4, HSTATE_KVM_VCPU(r13)
+ 91:
+ #endif
+
+@@ -3120,9 +3130,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+ #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ /*
+ * Save transactional state and TM-related registers.
+- * Called with r9 pointing to the vcpu struct.
++ * Called with r3 pointing to the vcpu struct and r4 containing
++ * the guest MSR value.
+ * This can modify all checkpointed registers, but
+- * restores r1, r2 and r9 (vcpu pointer) before exit.
++ * restores r1 and r2 before exit.
+ */
+ kvmppc_save_tm_hv:
+ /* See if we need to handle fake suspend mode */
+@@ -3205,9 +3216,10 @@ END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96)
+
+ /*
+ * Restore transactional state and TM-related registers.
+- * Called with r4 pointing to the vcpu struct.
++ * Called with r3 pointing to the vcpu struct
++ * and r4 containing the guest MSR value.
+ * This potentially modifies all checkpointed registers.
+- * It restores r1, r2, r4 from the PACA.
++ * It restores r1 and r2 from the PACA.
+ */
+ kvmppc_restore_tm_hv:
+ /*
+@@ -3234,15 +3246,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST)
+ * The user may change these outside of a transaction, so they must
+ * always be context switched.
+ */
+- ld r5, VCPU_TFHAR(r4)
+- ld r6, VCPU_TFIAR(r4)
+- ld r7, VCPU_TEXASR(r4)
++ ld r5, VCPU_TFHAR(r3)
++ ld r6, VCPU_TFIAR(r3)
++ ld r7, VCPU_TEXASR(r3)
+ mtspr SPRN_TFHAR, r5
+ mtspr SPRN_TFIAR, r6
+ mtspr SPRN_TEXASR, r7
+
+- ld r5, VCPU_MSR(r4)
+- rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
++ rldicl. r5, r4, 64 - MSR_TS_S_LG, 62
+ beqlr /* TM not active in guest */
+
+ /* Make sure the failure summary is set */
+@@ -3255,10 +3266,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST)
+ b 9f /* and return */
+ 10: stdu r1, -PPC_MIN_STKFRM(r1)
+ /* guest is in transactional state, so simulate rollback */
+- mr r3, r4
+ bl kvmhv_emulate_tm_rollback
+ nop
+- ld r4, HSTATE_KVM_VCPU(r13) /* our vcpu pointer has been trashed */
+ addi r1, r1, PPC_MIN_STKFRM
+ 9: ld r0, PPC_LR_STKOFF(r1)
+ mtlr r0
+diff --git a/arch/powerpc/kvm/tm.S b/arch/powerpc/kvm/tm.S
+index ba97789c41ca..f027b5a0c0f0 100644
+--- a/arch/powerpc/kvm/tm.S
++++ b/arch/powerpc/kvm/tm.S
+@@ -26,9 +26,12 @@
+
+ /*
+ * Save transactional state and TM-related registers.
+- * Called with r9 pointing to the vcpu struct.
++ * Called with:
++ * - r3 pointing to the vcpu struct
++ * - r4 points to the MSR with current TS bits:
++ * (For HV KVM, it is VCPU_MSR ; For PR KVM, it is host MSR).
+ * This can modify all checkpointed registers, but
+- * restores r1, r2 and r9 (vcpu pointer) before exit.
++ * restores r1, r2 before exit.
+ */
+ _GLOBAL(kvmppc_save_tm)
+ mflr r0
+@@ -40,20 +43,17 @@ _GLOBAL(kvmppc_save_tm)
+ rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG
+ mtmsrd r8
+
+-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+- ld r5, VCPU_MSR(r9)
+- rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
++ rldicl. r4, r4, 64 - MSR_TS_S_LG, 62
+ beq 1f /* TM not active in guest. */
+-#endif
+
+- std r1, HSTATE_HOST_R1(r13)
+- li r3, TM_CAUSE_KVM_RESCHED
++ std r1, HSTATE_SCRATCH2(r13)
++ std r3, HSTATE_SCRATCH1(r13)
+
+ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ BEGIN_FTR_SECTION
+ /* Emulation of the treclaim instruction needs TEXASR before treclaim */
+ mfspr r6, SPRN_TEXASR
+- std r6, VCPU_ORIG_TEXASR(r9)
++ std r6, VCPU_ORIG_TEXASR(r3)
+ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
+ #endif
+
+@@ -61,6 +61,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
+ li r5, 0
+ mtmsrd r5, 1
+
++ li r3, TM_CAUSE_KVM_RESCHED
++
+ /* All GPRs are volatile at this point. */
+ TRECLAIM(R3)
+
+@@ -68,9 +70,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
+ SET_SCRATCH0(r13)
+ GET_PACA(r13)
+ std r9, PACATMSCRATCH(r13)
+-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+- ld r9, HSTATE_KVM_VCPU(r13)
+-#endif
++ ld r9, HSTATE_SCRATCH1(r13)
+
+ /* Get a few more GPRs free. */
+ std r29, VCPU_GPRS_TM(29)(r9)
+@@ -102,7 +102,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
+ std r4, VCPU_GPRS_TM(9)(r9)
+
+ /* Reload stack pointer and TOC. */
+- ld r1, HSTATE_HOST_R1(r13)
++ ld r1, HSTATE_SCRATCH2(r13)
+ ld r2, PACATOC(r13)
+
+ /* Set MSR RI now we have r1 and r13 back. */
+@@ -156,9 +156,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
+
+ /*
+ * Restore transactional state and TM-related registers.
+- * Called with r4 pointing to the vcpu struct.
++ * Called with:
++ * - r3 pointing to the vcpu struct.
++ * - r4 is the guest MSR with desired TS bits:
++ * For HV KVM, it is VCPU_MSR
++ * For PR KVM, it is provided by caller
+ * This potentially modifies all checkpointed registers.
+- * It restores r1, r2, r4 from the PACA.
++ * It restores r1, r2 from the PACA.
+ */
+ _GLOBAL(kvmppc_restore_tm)
+ mflr r0
+@@ -177,19 +181,17 @@ _GLOBAL(kvmppc_restore_tm)
+ * The user may change these outside of a transaction, so they must
+ * always be context switched.
+ */
+- ld r5, VCPU_TFHAR(r4)
+- ld r6, VCPU_TFIAR(r4)
+- ld r7, VCPU_TEXASR(r4)
++ ld r5, VCPU_TFHAR(r3)
++ ld r6, VCPU_TFIAR(r3)
++ ld r7, VCPU_TEXASR(r3)
+ mtspr SPRN_TFHAR, r5
+ mtspr SPRN_TFIAR, r6
+ mtspr SPRN_TEXASR, r7
+
+-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+- ld r5, VCPU_MSR(r4)
++ mr r5, r4
+ rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
+ beqlr /* TM not active in guest */
+-#endif
+- std r1, HSTATE_HOST_R1(r13)
++ std r1, HSTATE_SCRATCH2(r13)
+
+ /* Make sure the failure summary is set, otherwise we'll program check
+ * when we trechkpt. It's possible that this might have been not set
+@@ -205,21 +207,21 @@ _GLOBAL(kvmppc_restore_tm)
+ * some SPRs.
+ */
+
+- mr r31, r4
++ mr r31, r3
+ addi r3, r31, VCPU_FPRS_TM
+ bl load_fp_state
+ addi r3, r31, VCPU_VRS_TM
+ bl load_vr_state
+- mr r4, r31
+- lwz r7, VCPU_VRSAVE_TM(r4)
++ mr r3, r31
++ lwz r7, VCPU_VRSAVE_TM(r3)
+ mtspr SPRN_VRSAVE, r7
+
+- ld r5, VCPU_LR_TM(r4)
+- lwz r6, VCPU_CR_TM(r4)
+- ld r7, VCPU_CTR_TM(r4)
+- ld r8, VCPU_AMR_TM(r4)
+- ld r9, VCPU_TAR_TM(r4)
+- ld r10, VCPU_XER_TM(r4)
++ ld r5, VCPU_LR_TM(r3)
++ lwz r6, VCPU_CR_TM(r3)
++ ld r7, VCPU_CTR_TM(r3)
++ ld r8, VCPU_AMR_TM(r3)
++ ld r9, VCPU_TAR_TM(r3)
++ ld r10, VCPU_XER_TM(r3)
+ mtlr r5
+ mtcr r6
+ mtctr r7
+@@ -232,8 +234,8 @@ _GLOBAL(kvmppc_restore_tm)
+ * till the last moment to avoid running with userspace PPR and DSCR for
+ * too long.
+ */
+- ld r29, VCPU_DSCR_TM(r4)
+- ld r30, VCPU_PPR_TM(r4)
++ ld r29, VCPU_DSCR_TM(r3)
++ ld r30, VCPU_PPR_TM(r3)
+
+ std r2, PACATMSCRATCH(r13) /* Save TOC */
+
+@@ -265,9 +267,8 @@ _GLOBAL(kvmppc_restore_tm)
+ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ ld r29, HSTATE_DSCR(r13)
+ mtspr SPRN_DSCR, r29
+- ld r4, HSTATE_KVM_VCPU(r13)
+ #endif
+- ld r1, HSTATE_HOST_R1(r13)
++ ld r1, HSTATE_SCRATCH2(r13)
+ ld r2, PACATMSCRATCH(r13)
+
+ /* Set the MSR RI since we have our registers back. */
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-PR-Move-kvmppc_save_tm-kvmppc_restore.patch b/patches.arch/KVM-PPC-Book3S-PR-Move-kvmppc_save_tm-kvmppc_restore.patch
new file mode 100644
index 0000000000..6775d9924c
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-PR-Move-kvmppc_save_tm-kvmppc_restore.patch
@@ -0,0 +1,592 @@
+From 009c872a8bc4d38f487a9bd62423d019e4322517 Mon Sep 17 00:00:00 2001
+From: Simon Guo <wei.guo.simon@gmail.com>
+Date: Wed, 23 May 2018 15:01:47 +0800
+Subject: [PATCH] KVM: PPC: Book3S PR: Move kvmppc_save_tm/kvmppc_restore_tm to
+ separate file
+
+References: bsc#1061840
+Patch-mainline: v4.18-rc1
+Git-commit: 009c872a8bc4d38f487a9bd62423d019e4322517
+
+It is a simple patch just for moving kvmppc_save_tm/kvmppc_restore_tm()
+functionalities to tm.S. There is no logic change. The reconstruct of
+those APIs will be done in later patches to improve readability.
+
+It is for preparation of reusing those APIs on both HV/PR PPC KVM.
+
+Some slight change during move the functions includes:
+- surrounds some HV KVM specific code with CONFIG_KVM_BOOK3S_HV_POSSIBLE
+for compilation.
+- use _GLOBAL() to define kvmppc_save_tm/kvmppc_restore_tm()
+
+[paulus@ozlabs.org - rebased on top of 7b0e827c6970 ("KVM: PPC: Book3S HV:
+ Factor fake-suspend handling out of kvmppc_save/restore_tm", 2018-05-30)]
+
+Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/Makefile | 3 +
+ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 231 --------------------------
+ arch/powerpc/kvm/tm.S | 279 ++++++++++++++++++++++++++++++++
+ 3 files changed, 282 insertions(+), 231 deletions(-)
+ create mode 100644 arch/powerpc/kvm/tm.S
+
+diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
+index 4b19da8c87ae..f872c04bb5b1 100644
+--- a/arch/powerpc/kvm/Makefile
++++ b/arch/powerpc/kvm/Makefile
+@@ -63,6 +63,9 @@ kvm-pr-y := \
+ book3s_64_mmu.o \
+ book3s_32_mmu.o
+
++kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
++ tm.o
++
+ ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
+ book3s_rmhandlers.o
+diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+index bfca999695f1..8e016598692e 100644
+--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
++++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+@@ -39,8 +39,6 @@ BEGIN_FTR_SECTION; \
+ extsw reg, reg; \
+ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
+
+-#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
+-
+ /* Values in HSTATE_NAPPING(r13) */
+ #define NAPPING_CEDE 1
+ #define NAPPING_NOVCPU 2
+@@ -3205,122 +3203,6 @@ END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96)
+ mtlr r0
+ blr
+
+-kvmppc_save_tm:
+- mflr r0
+- std r0, PPC_LR_STKOFF(r1)
+-
+- /* Turn on TM. */
+- mfmsr r8
+- li r0, 1
+- rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG
+- mtmsrd r8
+-
+- ld r5, VCPU_MSR(r9)
+- rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
+- beq 1f /* TM not active in guest. */
+-
+- std r1, HSTATE_HOST_R1(r13)
+- li r3, TM_CAUSE_KVM_RESCHED
+-
+-BEGIN_FTR_SECTION
+- /* Emulation of the treclaim instruction needs TEXASR before treclaim */
+- mfspr r6, SPRN_TEXASR
+- std r6, VCPU_ORIG_TEXASR(r9)
+-END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
+-
+- /* Clear the MSR RI since r1, r13 are all going to be foobar. */
+- li r5, 0
+- mtmsrd r5, 1
+-
+- /* All GPRs are volatile at this point. */
+- TRECLAIM(R3)
+-
+- /* Temporarily store r13 and r9 so we have some regs to play with */
+- SET_SCRATCH0(r13)
+- GET_PACA(r13)
+- std r9, PACATMSCRATCH(r13)
+- ld r9, HSTATE_KVM_VCPU(r13)
+-
+- /* Get a few more GPRs free. */
+- std r29, VCPU_GPRS_TM(29)(r9)
+- std r30, VCPU_GPRS_TM(30)(r9)
+- std r31, VCPU_GPRS_TM(31)(r9)
+-
+- /* Save away PPR and DSCR soon so don't run with user values. */
+- mfspr r31, SPRN_PPR
+- HMT_MEDIUM
+- mfspr r30, SPRN_DSCR
+- ld r29, HSTATE_DSCR(r13)
+- mtspr SPRN_DSCR, r29
+-
+- /* Save all but r9, r13 & r29-r31 */
+- reg = 0
+- .rept 29
+- .if (reg != 9) && (reg != 13)
+- std reg, VCPU_GPRS_TM(reg)(r9)
+- .endif
+- reg = reg + 1
+- .endr
+- /* ... now save r13 */
+- GET_SCRATCH0(r4)
+- std r4, VCPU_GPRS_TM(13)(r9)
+- /* ... and save r9 */
+- ld r4, PACATMSCRATCH(r13)
+- std r4, VCPU_GPRS_TM(9)(r9)
+-
+- /* Reload stack pointer and TOC. */
+- ld r1, HSTATE_HOST_R1(r13)
+- ld r2, PACATOC(r13)
+-
+- /* Set MSR RI now we have r1 and r13 back. */
+- li r5, MSR_RI
+- mtmsrd r5, 1
+-
+- /* Save away checkpinted SPRs. */
+- std r31, VCPU_PPR_TM(r9)
+- std r30, VCPU_DSCR_TM(r9)
+- mflr r5
+- mfcr r6
+- mfctr r7
+- mfspr r8, SPRN_AMR
+- mfspr r10, SPRN_TAR
+- mfxer r11
+- std r5, VCPU_LR_TM(r9)
+- stw r6, VCPU_CR_TM(r9)
+- std r7, VCPU_CTR_TM(r9)
+- std r8, VCPU_AMR_TM(r9)
+- std r10, VCPU_TAR_TM(r9)
+- std r11, VCPU_XER_TM(r9)
+-
+- /* Restore r12 as trap number. */
+- lwz r12, VCPU_TRAP(r9)
+-
+- /* Save FP/VSX. */
+- addi r3, r9, VCPU_FPRS_TM
+- bl store_fp_state
+- addi r3, r9, VCPU_VRS_TM
+- bl store_vr_state
+- mfspr r6, SPRN_VRSAVE
+- stw r6, VCPU_VRSAVE_TM(r9)
+-1:
+- /*
+- * We need to save these SPRs after the treclaim so that the software
+- * error code is recorded correctly in the TEXASR. Also the user may
+- * change these outside of a transaction, so they must always be
+- * context switched.
+- */
+- mfspr r7, SPRN_TEXASR
+- std r7, VCPU_TEXASR(r9)
+-11:
+- mfspr r5, SPRN_TFHAR
+- mfspr r6, SPRN_TFIAR
+- std r5, VCPU_TFHAR(r9)
+- std r6, VCPU_TFIAR(r9)
+-
+- ld r0, PPC_LR_STKOFF(r1)
+- mtlr r0
+- blr
+-
+ /*
+ * Restore transactional state and TM-related registers.
+ * Called with r4 pointing to the vcpu struct.
+@@ -3381,119 +3263,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST)
+ 9: ld r0, PPC_LR_STKOFF(r1)
+ mtlr r0
+ blr
+-
+-kvmppc_restore_tm:
+- mflr r0
+- std r0, PPC_LR_STKOFF(r1)
+-
+- /* Turn on TM/FP/VSX/VMX so we can restore them. */
+- mfmsr r5
+- li r6, MSR_TM >> 32
+- sldi r6, r6, 32
+- or r5, r5, r6
+- ori r5, r5, MSR_FP
+- oris r5, r5, (MSR_VEC | MSR_VSX)@h
+- mtmsrd r5
+-
+- /*
+- * The user may change these outside of a transaction, so they must
+- * always be context switched.
+- */
+- ld r5, VCPU_TFHAR(r4)
+- ld r6, VCPU_TFIAR(r4)
+- ld r7, VCPU_TEXASR(r4)
+- mtspr SPRN_TFHAR, r5
+- mtspr SPRN_TFIAR, r6
+- mtspr SPRN_TEXASR, r7
+-
+- ld r5, VCPU_MSR(r4)
+- rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
+- beqlr /* TM not active in guest */
+- std r1, HSTATE_HOST_R1(r13)
+-
+- /* Make sure the failure summary is set, otherwise we'll program check
+- * when we trechkpt. It's possible that this might have been not set
+- * on a kvmppc_set_one_reg() call but we shouldn't let this crash the
+- * host.
+- */
+- oris r7, r7, (TEXASR_FS)@h
+- mtspr SPRN_TEXASR, r7
+-
+- /*
+- * We need to load up the checkpointed state for the guest.
+- * We need to do this early as it will blow away any GPRs, VSRs and
+- * some SPRs.
+- */
+-
+- mr r31, r4
+- addi r3, r31, VCPU_FPRS_TM
+- bl load_fp_state
+- addi r3, r31, VCPU_VRS_TM
+- bl load_vr_state
+- mr r4, r31
+- lwz r7, VCPU_VRSAVE_TM(r4)
+- mtspr SPRN_VRSAVE, r7
+-
+- ld r5, VCPU_LR_TM(r4)
+- lwz r6, VCPU_CR_TM(r4)
+- ld r7, VCPU_CTR_TM(r4)
+- ld r8, VCPU_AMR_TM(r4)
+- ld r9, VCPU_TAR_TM(r4)
+- ld r10, VCPU_XER_TM(r4)
+- mtlr r5
+- mtcr r6
+- mtctr r7
+- mtspr SPRN_AMR, r8
+- mtspr SPRN_TAR, r9
+- mtxer r10
+-
+- /*
+- * Load up PPR and DSCR values but don't put them in the actual SPRs
+- * till the last moment to avoid running with userspace PPR and DSCR for
+- * too long.
+- */
+- ld r29, VCPU_DSCR_TM(r4)
+- ld r30, VCPU_PPR_TM(r4)
+-
+- std r2, PACATMSCRATCH(r13) /* Save TOC */
+-
+- /* Clear the MSR RI since r1, r13 are all going to be foobar. */
+- li r5, 0
+- mtmsrd r5, 1
+-
+- /* Load GPRs r0-r28 */
+- reg = 0
+- .rept 29
+- ld reg, VCPU_GPRS_TM(reg)(r31)
+- reg = reg + 1
+- .endr
+-
+- mtspr SPRN_DSCR, r29
+- mtspr SPRN_PPR, r30
+-
+- /* Load final GPRs */
+- ld 29, VCPU_GPRS_TM(29)(r31)
+- ld 30, VCPU_GPRS_TM(30)(r31)
+- ld 31, VCPU_GPRS_TM(31)(r31)
+-
+- /* TM checkpointed state is now setup. All GPRs are now volatile. */
+- TRECHKPT
+-
+- /* Now let's get back the state we need. */
+- HMT_MEDIUM
+- GET_PACA(r13)
+- ld r29, HSTATE_DSCR(r13)
+- mtspr SPRN_DSCR, r29
+- ld r4, HSTATE_KVM_VCPU(r13)
+- ld r1, HSTATE_HOST_R1(r13)
+- ld r2, PACATMSCRATCH(r13)
+-
+- /* Set the MSR RI since we have our registers back. */
+- li r5, MSR_RI
+- mtmsrd r5, 1
+- ld r0, PPC_LR_STKOFF(r1)
+- mtlr r0
+- blr
+ #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
+ /*
+diff --git a/arch/powerpc/kvm/tm.S b/arch/powerpc/kvm/tm.S
+new file mode 100644
+index 000000000000..ba97789c41ca
+--- /dev/null
++++ b/arch/powerpc/kvm/tm.S
+@@ -0,0 +1,279 @@
++/*
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License, version 2, as
++ * published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * Derived from book3s_hv_rmhandlers.S, which is:
++ *
++ * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
++ *
++ */
++
++#include <asm/reg.h>
++#include <asm/ppc_asm.h>
++#include <asm/asm-offsets.h>
++#include <asm/export.h>
++#include <asm/tm.h>
++#include <asm/cputable.h>
++
++#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
++#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
++
++/*
++ * Save transactional state and TM-related registers.
++ * Called with r9 pointing to the vcpu struct.
++ * This can modify all checkpointed registers, but
++ * restores r1, r2 and r9 (vcpu pointer) before exit.
++ */
++_GLOBAL(kvmppc_save_tm)
++ mflr r0
++ std r0, PPC_LR_STKOFF(r1)
++
++ /* Turn on TM. */
++ mfmsr r8
++ li r0, 1
++ rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG
++ mtmsrd r8
++
++#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
++ ld r5, VCPU_MSR(r9)
++ rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
++ beq 1f /* TM not active in guest. */
++#endif
++
++ std r1, HSTATE_HOST_R1(r13)
++ li r3, TM_CAUSE_KVM_RESCHED
++
++#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
++BEGIN_FTR_SECTION
++ /* Emulation of the treclaim instruction needs TEXASR before treclaim */
++ mfspr r6, SPRN_TEXASR
++ std r6, VCPU_ORIG_TEXASR(r9)
++END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
++#endif
++
++ /* Clear the MSR RI since r1, r13 are all going to be foobar. */
++ li r5, 0
++ mtmsrd r5, 1
++
++ /* All GPRs are volatile at this point. */
++ TRECLAIM(R3)
++
++ /* Temporarily store r13 and r9 so we have some regs to play with */
++ SET_SCRATCH0(r13)
++ GET_PACA(r13)
++ std r9, PACATMSCRATCH(r13)
++#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
++ ld r9, HSTATE_KVM_VCPU(r13)
++#endif
++
++ /* Get a few more GPRs free. */
++ std r29, VCPU_GPRS_TM(29)(r9)
++ std r30, VCPU_GPRS_TM(30)(r9)
++ std r31, VCPU_GPRS_TM(31)(r9)
++
++ /* Save away PPR and DSCR soon so don't run with user values. */
++ mfspr r31, SPRN_PPR
++ HMT_MEDIUM
++ mfspr r30, SPRN_DSCR
++#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
++ ld r29, HSTATE_DSCR(r13)
++ mtspr SPRN_DSCR, r29
++#endif
++
++ /* Save all but r9, r13 & r29-r31 */
++ reg = 0
++ .rept 29
++ .if (reg != 9) && (reg != 13)
++ std reg, VCPU_GPRS_TM(reg)(r9)
++ .endif
++ reg = reg + 1
++ .endr
++ /* ... now save r13 */
++ GET_SCRATCH0(r4)
++ std r4, VCPU_GPRS_TM(13)(r9)
++ /* ... and save r9 */
++ ld r4, PACATMSCRATCH(r13)
++ std r4, VCPU_GPRS_TM(9)(r9)
++
++ /* Reload stack pointer and TOC. */
++ ld r1, HSTATE_HOST_R1(r13)
++ ld r2, PACATOC(r13)
++
++ /* Set MSR RI now we have r1 and r13 back. */
++ li r5, MSR_RI
++ mtmsrd r5, 1
++
++ /* Save away checkpinted SPRs. */
++ std r31, VCPU_PPR_TM(r9)
++ std r30, VCPU_DSCR_TM(r9)
++ mflr r5
++ mfcr r6
++ mfctr r7
++ mfspr r8, SPRN_AMR
++ mfspr r10, SPRN_TAR
++ mfxer r11
++ std r5, VCPU_LR_TM(r9)
++ stw r6, VCPU_CR_TM(r9)
++ std r7, VCPU_CTR_TM(r9)
++ std r8, VCPU_AMR_TM(r9)
++ std r10, VCPU_TAR_TM(r9)
++ std r11, VCPU_XER_TM(r9)
++
++ /* Restore r12 as trap number. */
++ lwz r12, VCPU_TRAP(r9)
++
++ /* Save FP/VSX. */
++ addi r3, r9, VCPU_FPRS_TM
++ bl store_fp_state
++ addi r3, r9, VCPU_VRS_TM
++ bl store_vr_state
++ mfspr r6, SPRN_VRSAVE
++ stw r6, VCPU_VRSAVE_TM(r9)
++1:
++ /*
++ * We need to save these SPRs after the treclaim so that the software
++ * error code is recorded correctly in the TEXASR. Also the user may
++ * change these outside of a transaction, so they must always be
++ * context switched.
++ */
++ mfspr r7, SPRN_TEXASR
++ std r7, VCPU_TEXASR(r9)
++11:
++ mfspr r5, SPRN_TFHAR
++ mfspr r6, SPRN_TFIAR
++ std r5, VCPU_TFHAR(r9)
++ std r6, VCPU_TFIAR(r9)
++
++ ld r0, PPC_LR_STKOFF(r1)
++ mtlr r0
++ blr
++
++/*
++ * Restore transactional state and TM-related registers.
++ * Called with r4 pointing to the vcpu struct.
++ * This potentially modifies all checkpointed registers.
++ * It restores r1, r2, r4 from the PACA.
++ */
++_GLOBAL(kvmppc_restore_tm)
++ mflr r0
++ std r0, PPC_LR_STKOFF(r1)
++
++ /* Turn on TM/FP/VSX/VMX so we can restore them. */
++ mfmsr r5
++ li r6, MSR_TM >> 32
++ sldi r6, r6, 32
++ or r5, r5, r6
++ ori r5, r5, MSR_FP
++ oris r5, r5, (MSR_VEC | MSR_VSX)@h
++ mtmsrd r5
++
++ /*
++ * The user may change these outside of a transaction, so they must
++ * always be context switched.
++ */
++ ld r5, VCPU_TFHAR(r4)
++ ld r6, VCPU_TFIAR(r4)
++ ld r7, VCPU_TEXASR(r4)
++ mtspr SPRN_TFHAR, r5
++ mtspr SPRN_TFIAR, r6
++ mtspr SPRN_TEXASR, r7
++
++#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
++ ld r5, VCPU_MSR(r4)
++ rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
++ beqlr /* TM not active in guest */
++#endif
++ std r1, HSTATE_HOST_R1(r13)
++
++ /* Make sure the failure summary is set, otherwise we'll program check
++ * when we trechkpt. It's possible that this might have been not set
++ * on a kvmppc_set_one_reg() call but we shouldn't let this crash the
++ * host.
++ */
++ oris r7, r7, (TEXASR_FS)@h
++ mtspr SPRN_TEXASR, r7
++
++ /*
++ * We need to load up the checkpointed state for the guest.
++ * We need to do this early as it will blow away any GPRs, VSRs and
++ * some SPRs.
++ */
++
++ mr r31, r4
++ addi r3, r31, VCPU_FPRS_TM
++ bl load_fp_state
++ addi r3, r31, VCPU_VRS_TM
++ bl load_vr_state
++ mr r4, r31
++ lwz r7, VCPU_VRSAVE_TM(r4)
++ mtspr SPRN_VRSAVE, r7
++
++ ld r5, VCPU_LR_TM(r4)
++ lwz r6, VCPU_CR_TM(r4)
++ ld r7, VCPU_CTR_TM(r4)
++ ld r8, VCPU_AMR_TM(r4)
++ ld r9, VCPU_TAR_TM(r4)
++ ld r10, VCPU_XER_TM(r4)
++ mtlr r5
++ mtcr r6
++ mtctr r7
++ mtspr SPRN_AMR, r8
++ mtspr SPRN_TAR, r9
++ mtxer r10
++
++ /*
++ * Load up PPR and DSCR values but don't put them in the actual SPRs
++ * till the last moment to avoid running with userspace PPR and DSCR for
++ * too long.
++ */
++ ld r29, VCPU_DSCR_TM(r4)
++ ld r30, VCPU_PPR_TM(r4)
++
++ std r2, PACATMSCRATCH(r13) /* Save TOC */
++
++ /* Clear the MSR RI since r1, r13 are all going to be foobar. */
++ li r5, 0
++ mtmsrd r5, 1
++
++ /* Load GPRs r0-r28 */
++ reg = 0
++ .rept 29
++ ld reg, VCPU_GPRS_TM(reg)(r31)
++ reg = reg + 1
++ .endr
++
++ mtspr SPRN_DSCR, r29
++ mtspr SPRN_PPR, r30
++
++ /* Load final GPRs */
++ ld 29, VCPU_GPRS_TM(29)(r31)
++ ld 30, VCPU_GPRS_TM(30)(r31)
++ ld 31, VCPU_GPRS_TM(31)(r31)
++
++ /* TM checkpointed state is now setup. All GPRs are now volatile. */
++ TRECHKPT
++
++ /* Now let's get back the state we need. */
++ HMT_MEDIUM
++ GET_PACA(r13)
++#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
++ ld r29, HSTATE_DSCR(r13)
++ mtspr SPRN_DSCR, r29
++ ld r4, HSTATE_KVM_VCPU(r13)
++#endif
++ ld r1, HSTATE_HOST_R1(r13)
++ ld r2, PACATMSCRATCH(r13)
++
++ /* Set the MSR RI since we have our registers back. */
++ li r5, MSR_RI
++ mtmsrd r5, 1
++ ld r0, PPC_LR_STKOFF(r1)
++ mtlr r0
++ blr
++#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Book3S-Use-correct-page-shift-in-H_STUFF_TCE.patch b/patches.arch/KVM-PPC-Book3S-Use-correct-page-shift-in-H_STUFF_TCE.patch
new file mode 100644
index 0000000000..ec745f7299
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-Use-correct-page-shift-in-H_STUFF_TCE.patch
@@ -0,0 +1,56 @@
+From c6b61661d229e42b58d5e511191e925d105a5cce Mon Sep 17 00:00:00 2001
+From: Alexey Kardashevskiy <aik@ozlabs.ru>
+Date: Mon, 14 May 2018 20:00:27 +1000
+Subject: [PATCH] KVM: PPC: Book3S: Use correct page shift in H_STUFF_TCE
+
+References: bsc#1061840
+Patch-mainline: v4.18-rc1
+Git-commit: c6b61661d229e42b58d5e511191e925d105a5cce
+
+The other TCE handlers use page shift from the guest visible TCE table
+(described by kvmppc_spapr_tce_iommu_table) so let's make H_STUFF_TCE
+handlers do the same thing.
+
+This should cause no behavioral change now but soon we will allow
+the iommu_table::it_page_shift being different from from the emulated
+table page size so this will play a role.
+
+Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
+Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
+Acked-by: Balbir Singh <bsingharora@gmail.com>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_64_vio.c | 2 +-
+ arch/powerpc/kvm/book3s_64_vio_hv.c | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
+index 4dffa611376d..041e54d26750 100644
+--- a/arch/powerpc/kvm/book3s_64_vio.c
++++ b/arch/powerpc/kvm/book3s_64_vio.c
+@@ -615,7 +615,7 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
+ return H_PARAMETER;
+
+ list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+- unsigned long entry = ioba >> stit->tbl->it_page_shift;
++ unsigned long entry = ioba >> stt->page_shift;
+
+ for (i = 0; i < npages; ++i) {
+ ret = kvmppc_tce_iommu_unmap(vcpu->kvm,
+diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
+index 6651f736a0b1..e220fabb2f5d 100644
+--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
++++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
+@@ -526,7 +526,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
+ return H_PARAMETER;
+
+ list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+- unsigned long entry = ioba >> stit->tbl->it_page_shift;
++ unsigned long entry = ioba >> stt->page_shift;
+
+ for (i = 0; i < npages; ++i) {
+ ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm,
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Check-if-IOMMU-page-is-contained-in-the-pinn.patch b/patches.arch/KVM-PPC-Check-if-IOMMU-page-is-contained-in-the-pinn.patch
index 3e5422d07b..b94d557b5a 100644
--- a/patches.arch/KVM-PPC-Check-if-IOMMU-page-is-contained-in-the-pinn.patch
+++ b/patches.arch/KVM-PPC-Check-if-IOMMU-page-is-contained-in-the-pinn.patch
@@ -1,6 +1,6 @@
-From 026e8f4017f934f99c32a23db381b872a7a8202b Mon Sep 17 00:00:00 2001
+From 76fa4975f3ed12d15762bc979ca44078598ed8ee Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
-Date: Tue, 24 Jul 2018 15:32:47 +1000
+Date: Tue, 17 Jul 2018 17:19:13 +1000
Subject: [PATCH] KVM: PPC: Check if IOMMU page is contained in the pinned
physical page
@@ -46,22 +46,20 @@ Cc: stable@vger.kernel.org # v4.12+
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-(cherry picked from commit 76fa4975f3ed12d15762bc979ca44078598ed8ee)
-Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Acked-by: Michal Suchanek <msuchanek@suse.de>
---
arch/powerpc/include/asm/mmu_context.h | 4 ++--
arch/powerpc/kvm/book3s_64_vio.c | 2 +-
arch/powerpc/kvm/book3s_64_vio_hv.c | 6 ++++--
- arch/powerpc/mm/mmu_context_iommu.c | 38 ++++++++++++++++++++++++++++++++--
+ arch/powerpc/mm/mmu_context_iommu.c | 37 ++++++++++++++++++++++++++++++++--
drivers/vfio/vfio_iommu_spapr_tce.c | 2 +-
- 5 files changed, 44 insertions(+), 8 deletions(-)
+ 5 files changed, 43 insertions(+), 8 deletions(-)
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
-index 6390e1aee7b7..2bfb46efb4e4 100644
+index 896efa559996..79d570cbf332 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
-@@ -34,9 +34,9 @@ extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(
+@@ -35,9 +35,9 @@ extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(
extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
unsigned long ua, unsigned long entries);
extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
@@ -74,10 +72,10 @@ index 6390e1aee7b7..2bfb46efb4e4 100644
extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem);
#endif
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
-index 4dffa611376d..e14cec6bc339 100644
+index d066e37551ec..8c456fa691a5 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
-@@ -433,7 +433,7 @@ long kvmppc_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl,
+@@ -449,7 +449,7 @@ long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
/* This only handles v2 IOMMU type, v1 is handled via ioctl() */
return H_TOO_HARD;
@@ -87,10 +85,10 @@ index 4dffa611376d..e14cec6bc339 100644
if (mm_iommu_mapped_inc(mem))
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
-index 3adfd2f5301c..b333624c9924 100644
+index 925fc316a104..5b298f5a1a14 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
-@@ -261,7 +261,8 @@ static long kvmppc_rm_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl,
+@@ -279,7 +279,8 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
if (!mem)
return H_TOO_HARD;
@@ -100,7 +98,7 @@ index 3adfd2f5301c..b333624c9924 100644
return H_HARDWARE;
pua = (void *) vmalloc_to_phys(pua);
-@@ -421,7 +422,8 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
+@@ -469,7 +470,8 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K);
if (mem)
@@ -111,14 +109,13 @@ index 3adfd2f5301c..b333624c9924 100644
if (!prereg) {
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
-index e0a2d8e806ed..78ff91f13e45 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -19,6 +19,7 @@
#include <linux/hugetlb.h>
#include <linux/swap.h>
#include <asm/mmu_context.h>
-+#include <asm/pgtable.h>
++#include <asm/pte-walk.h>
static DEFINE_MUTEX(mem_list_mutex);
@@ -130,7 +127,7 @@ index e0a2d8e806ed..78ff91f13e45 100644
u64 ua; /* userspace address */
u64 entries; /* number of entries in hpas[] */
u64 *hpas; /* vmalloc'ed */
-@@ -126,6 +128,8 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
+@@ -125,6 +127,8 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
{
struct mm_iommu_table_group_mem_t *mem;
long i, j, ret = 0, locked_entries = 0;
@@ -139,7 +136,7 @@ index e0a2d8e806ed..78ff91f13e45 100644
struct page *page = NULL;
mutex_lock(&mem_list_mutex);
-@@ -160,6 +164,12 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
+@@ -159,6 +163,12 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
goto unlock_exit;
}
@@ -152,7 +149,7 @@ index e0a2d8e806ed..78ff91f13e45 100644
mem->hpas = vzalloc(entries * sizeof(mem->hpas[0]));
if (!mem->hpas) {
kfree(mem);
-@@ -200,6 +210,24 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
+@@ -199,6 +209,23 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
}
}
populate:
@@ -163,8 +160,7 @@ index e0a2d8e806ed..78ff91f13e45 100644
+ unsigned int compshift = compound_order(head);
+
+ local_irq_save(flags); /* disables as well */
-+ pte = find_linux_pte_or_hugepte(mm->pgd, ua, NULL,
-+ &pageshift);
++ pte = find_linux_pte(mm->pgd, ua, NULL, &pageshift);
+ local_irq_restore(flags);
+
+ /* Double check it is still the same pinned page */
@@ -177,7 +173,7 @@ index e0a2d8e806ed..78ff91f13e45 100644
mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
}
-@@ -350,7 +378,7 @@ struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
+@@ -349,7 +376,7 @@ struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
EXPORT_SYMBOL_GPL(mm_iommu_find);
long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
@@ -186,7 +182,7 @@ index e0a2d8e806ed..78ff91f13e45 100644
{
const long entry = (ua - mem->ua) >> PAGE_SHIFT;
u64 *va = &mem->hpas[entry];
-@@ -358,6 +386,9 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
+@@ -357,6 +384,9 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
if (entry >= mem->entries)
return -EFAULT;
@@ -196,7 +192,7 @@ index e0a2d8e806ed..78ff91f13e45 100644
*hpa = *va | (ua & ~PAGE_MASK);
return 0;
-@@ -365,7 +396,7 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
+@@ -364,7 +394,7 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa);
long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
@@ -205,7 +201,7 @@ index e0a2d8e806ed..78ff91f13e45 100644
{
const long entry = (ua - mem->ua) >> PAGE_SHIFT;
void *va = &mem->hpas[entry];
-@@ -374,6 +405,9 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
+@@ -373,6 +403,9 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
if (entry >= mem->entries)
return -EFAULT;
@@ -216,7 +212,7 @@ index e0a2d8e806ed..78ff91f13e45 100644
if (!pa)
return -EFAULT;
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
-index 63112c36ab2d..f1593ebd60fc 100644
+index 2da5f054257a..7cd63b0c1a46 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -467,7 +467,7 @@ static int tce_iommu_prereg_ua_to_hpa(struct tce_container *container,
diff --git a/patches.arch/KVM-PPC-Fix-a-mmio_host_swabbed-uninitialized-usage-.patch b/patches.arch/KVM-PPC-Fix-a-mmio_host_swabbed-uninitialized-usage-.patch
new file mode 100644
index 0000000000..a694851a4f
--- /dev/null
+++ b/patches.arch/KVM-PPC-Fix-a-mmio_host_swabbed-uninitialized-usage-.patch
@@ -0,0 +1,59 @@
+From f19d1f367a506bc645f8d6695942b8873fc82c84 Mon Sep 17 00:00:00 2001
+From: Simon Guo <wei.guo.simon@gmail.com>
+Date: Mon, 7 May 2018 14:20:09 +0800
+Subject: [PATCH] KVM: PPC: Fix a mmio_host_swabbed uninitialized usage issue
+
+References: bsc#1061840
+Patch-mainline: v4.18-rc1
+Git-commit: f19d1f367a506bc645f8d6695942b8873fc82c84
+
+When KVM emulates VMX store, it will invoke kvmppc_get_vmx_data() to
+retrieve VMX reg val. kvmppc_get_vmx_data() will check mmio_host_swabbed
+to decide which double word of vr[] to be used. But the
+mmio_host_swabbed can be uninitialized during VMX store procedure:
+
+kvmppc_emulate_loadstore
+ \- kvmppc_handle_store128_by2x64
+ \- kvmppc_get_vmx_data
+
+So vcpu->arch.mmio_host_swabbed is not meant to be used at all for
+emulation of store instructions, and this patch makes that true for
+VMX stores. This patch also initializes mmio_host_swabbed to avoid
+possible future problems.
+
+Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/emulate_loadstore.c | 1 +
+ arch/powerpc/kvm/powerpc.c | 2 +-
+ 2 files changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c
+index a382e15135e6..b8a3aefc3033 100644
+--- a/arch/powerpc/kvm/emulate_loadstore.c
++++ b/arch/powerpc/kvm/emulate_loadstore.c
+@@ -111,6 +111,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
+ vcpu->arch.mmio_sp64_extend = 0;
+ vcpu->arch.mmio_sign_extend = 0;
+ vcpu->arch.mmio_vmx_copy_nums = 0;
++ vcpu->arch.mmio_host_swabbed = 0;
+
+ switch (get_op(inst)) {
+ case 31:
+diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
+index 4e387647b5af..bef27b16d233 100644
+--- a/arch/powerpc/kvm/powerpc.c
++++ b/arch/powerpc/kvm/powerpc.c
+@@ -1374,7 +1374,7 @@ static inline int kvmppc_get_vmx_data(struct kvm_vcpu *vcpu, int rs, u64 *val)
+ if (di > 1)
+ return -1;
+
+- if (vcpu->arch.mmio_host_swabbed)
++ if (kvmppc_need_byteswap(vcpu))
+ di = 1 - di;
+
+ w0 = vrs.u[di * 2];
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Make-iommu_table-it_userspace-big-endian.patch b/patches.arch/KVM-PPC-Make-iommu_table-it_userspace-big-endian.patch
new file mode 100644
index 0000000000..8b03359da0
--- /dev/null
+++ b/patches.arch/KVM-PPC-Make-iommu_table-it_userspace-big-endian.patch
@@ -0,0 +1,193 @@
+From 3474389de987c8fcf938cc3a507d5fe9cc0eb142 Mon Sep 17 00:00:00 2001
+From: Alexey Kardashevskiy <aik@ozlabs.ru>
+Date: Wed, 4 Jul 2018 16:13:46 +1000
+Subject: [PATCH] KVM: PPC: Make iommu_table::it_userspace big endian
+
+References: bsc#1061840
+Patch-mainline: v4.19-rc1
+Git-commit: 00a5c58d9499bd0c290b57205f43a70f2e69d3f6
+
+We are going to reuse multilevel TCE code for the userspace copy of
+the TCE table and since it is big endian, let's make the copy big endian
+too.
+
+Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
+Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
+Acked-by: Paul Mackerras <paulus@ozlabs.org>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/include/asm/iommu.h | 2 +-
+ arch/powerpc/kvm/book3s_64_vio.c | 11 ++++++-----
+ arch/powerpc/kvm/book3s_64_vio_hv.c | 10 +++++-----
+ drivers/vfio/vfio_iommu_spapr_tce.c | 19 +++++++++----------
+ 4 files changed, 21 insertions(+), 21 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
+index 8a8ce220d7d0..470124740864 100644
+--- a/arch/powerpc/include/asm/iommu.h
++++ b/arch/powerpc/include/asm/iommu.h
+@@ -117,7 +117,7 @@ struct iommu_table {
+ unsigned long *it_map; /* A simple allocation bitmap for now */
+ unsigned long it_page_shift;/* table iommu page size */
+ struct list_head it_group_list;/* List of iommu_table_group_link */
+- unsigned long *it_userspace; /* userspace view of the table */
++ __be64 *it_userspace; /* userspace view of the table */
+ struct iommu_table_ops *it_ops;
+ struct kref it_kref;
+ };
+diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
+index 8c456fa691a5..963059af8f3d 100644
+--- a/arch/powerpc/kvm/book3s_64_vio.c
++++ b/arch/powerpc/kvm/book3s_64_vio.c
+@@ -378,19 +378,19 @@ static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm,
+ {
+ struct mm_iommu_table_group_mem_t *mem = NULL;
+ const unsigned long pgsize = 1ULL << tbl->it_page_shift;
+- unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
++ __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+
+ if (!pua)
+ /* it_userspace allocation might be delayed */
+ return H_TOO_HARD;
+
+- mem = mm_iommu_lookup(kvm->mm, *pua, pgsize);
++ mem = mm_iommu_lookup(kvm->mm, be64_to_cpu(*pua), pgsize);
+ if (!mem)
+ return H_TOO_HARD;
+
+ mm_iommu_mapped_dec(mem);
+
+- *pua = 0;
++ *pua = cpu_to_be64(0);
+
+ return H_SUCCESS;
+ }
+@@ -437,7 +437,8 @@ long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
+ enum dma_data_direction dir)
+ {
+ long ret;
+- unsigned long hpa, *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
++ unsigned long hpa;
++ __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+ struct mm_iommu_table_group_mem_t *mem;
+
+ if (!pua)
+@@ -464,7 +465,7 @@ long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
+ if (dir != DMA_NONE)
+ kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry);
+
+- *pua = ua;
++ *pua = cpu_to_be64(ua);
+
+ return 0;
+ }
+diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
+index 5b298f5a1a14..841aef714929 100644
+--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
++++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
+@@ -200,7 +200,7 @@ static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
+ {
+ struct mm_iommu_table_group_mem_t *mem = NULL;
+ const unsigned long pgsize = 1ULL << tbl->it_page_shift;
+- unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
++ __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+
+ if (!pua)
+ /* it_userspace allocation might be delayed */
+@@ -210,13 +210,13 @@ static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
+ if (WARN_ON_ONCE_RM(!pua))
+ return H_HARDWARE;
+
+- mem = mm_iommu_lookup_rm(kvm->mm, *pua, pgsize);
++ mem = mm_iommu_lookup_rm(kvm->mm, be64_to_cpu(*pua), pgsize);
+ if (!mem)
+ return H_TOO_HARD;
+
+ mm_iommu_mapped_dec(mem);
+
+- *pua = 0;
++ *pua = cpu_to_be64(0);
+
+ return H_SUCCESS;
+ }
+@@ -268,7 +268,7 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
+ {
+ long ret;
+ unsigned long hpa = 0;
+- unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
++ __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+ struct mm_iommu_table_group_mem_t *mem;
+
+ if (!pua)
+@@ -303,7 +303,7 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
+ if (dir != DMA_NONE)
+ kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry);
+
+- *pua = ua;
++ *pua = cpu_to_be64(ua);
+
+ return 0;
+ }
+diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
+index b4c68f3b82be..a78974e1fee6 100644
+--- a/drivers/vfio/vfio_iommu_spapr_tce.c
++++ b/drivers/vfio/vfio_iommu_spapr_tce.c
+@@ -230,7 +230,7 @@ static long tce_iommu_userspace_view_alloc(struct iommu_table *tbl,
+ decrement_locked_vm(mm, cb >> PAGE_SHIFT);
+ return -ENOMEM;
+ }
+- tbl->it_userspace = uas;
++ tbl->it_userspace = (__be64 *) uas;
+
+ return 0;
+ }
+@@ -482,20 +482,20 @@ static void tce_iommu_unuse_page_v2(struct tce_container *container,
+ struct mm_iommu_table_group_mem_t *mem = NULL;
+ int ret;
+ unsigned long hpa = 0;
+- unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
++ __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+
+ if (!pua)
+ return;
+
+- ret = tce_iommu_prereg_ua_to_hpa(container, *pua, tbl->it_page_shift,
+- &hpa, &mem);
++ ret = tce_iommu_prereg_ua_to_hpa(container, be64_to_cpu(*pua),
++ tbl->it_page_shift, &hpa, &mem);
+ if (ret)
+- pr_debug("%s: tce %lx at #%lx was not cached, ret=%d\n",
+- __func__, *pua, entry, ret);
++ pr_debug("%s: tce %llx at #%lx was not cached, ret=%d\n",
++ __func__, be64_to_cpu(*pua), entry, ret);
+ if (mem)
+ mm_iommu_mapped_dec(mem);
+
+- *pua = 0;
++ *pua = cpu_to_be64(0);
+ }
+
+ static int tce_iommu_clear(struct tce_container *container,
+@@ -605,8 +605,7 @@ static long tce_iommu_build_v2(struct tce_container *container,
+
+ for (i = 0; i < pages; ++i) {
+ struct mm_iommu_table_group_mem_t *mem = NULL;
+- unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl,
+- entry + i);
++ __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry + i);
+
+ ret = tce_iommu_prereg_ua_to_hpa(container,
+ tce, tbl->it_page_shift, &hpa, &mem);
+@@ -640,7 +639,7 @@ static long tce_iommu_build_v2(struct tce_container *container,
+ if (dirtmp != DMA_NONE)
+ tce_iommu_unuse_page_v2(container, tbl, entry + i);
+
+- *pua = tce;
++ *pua = cpu_to_be64(tce);
+
+ tce += IOMMU_PAGE_SIZE(tbl);
+ }
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Move-nip-ctr-lr-xer-registers-to-pt_regs-in-.patch b/patches.arch/KVM-PPC-Move-nip-ctr-lr-xer-registers-to-pt_regs-in-.patch
new file mode 100644
index 0000000000..d15a675d20
--- /dev/null
+++ b/patches.arch/KVM-PPC-Move-nip-ctr-lr-xer-registers-to-pt_regs-in-.patch
@@ -0,0 +1,568 @@
+From 8ed09b010711dfaab8ef261f23e968e002ec04f8 Mon Sep 17 00:00:00 2001
+From: Simon Guo <wei.guo.simon@gmail.com>
+Date: Mon, 7 May 2018 14:20:08 +0800
+Subject: [PATCH] KVM: PPC: Move nip/ctr/lr/xer registers to pt_regs in
+ kvm_vcpu_arch
+
+References: bsc#1061840
+Patch-mainline: v4.18-rc1
+Git-commit: 173c520a049f57e2af498a3f0557d07797ce1c1b
+
+This patch moves nip/ctr/lr/xer registers from scattered places in
+kvm_vcpu_arch to pt_regs structure.
+
+cr register is "unsigned long" in pt_regs and u32 in vcpu->arch.
+It will need more consideration and may move in later patches.
+
+Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/include/asm/kvm_book3s.h | 16 ++++++-------
+ arch/powerpc/include/asm/kvm_book3s_64.h | 12 +++++-----
+ arch/powerpc/include/asm/kvm_booke.h | 16 ++++++-------
+ arch/powerpc/include/asm/kvm_host.h | 4 ----
+ arch/powerpc/kernel/asm-offsets.c | 16 ++++++-------
+ arch/powerpc/kvm/book3s_32_mmu.c | 2 +-
+ arch/powerpc/kvm/book3s_hv.c | 6 ++---
+ arch/powerpc/kvm/book3s_hv_tm.c | 10 ++++----
+ arch/powerpc/kvm/book3s_hv_tm_builtin.c | 10 ++++----
+ arch/powerpc/kvm/book3s_pr.c | 16 ++++++-------
+ arch/powerpc/kvm/booke.c | 41 +++++++++++++++++---------------
+ arch/powerpc/kvm/booke_emulate.c | 6 ++---
+ arch/powerpc/kvm/e500_emulate.c | 2 +-
+ arch/powerpc/kvm/e500_mmu.c | 2 +-
+ 14 files changed, 79 insertions(+), 80 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
+index e3182f7ae499..20d3d5a87296 100644
+--- a/arch/powerpc/include/asm/kvm_book3s.h
++++ b/arch/powerpc/include/asm/kvm_book3s.h
+@@ -295,42 +295,42 @@ static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
+
+ static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
+ {
+- vcpu->arch.xer = val;
++ vcpu->arch.regs.xer = val;
+ }
+
+ static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu)
+ {
+- return vcpu->arch.xer;
++ return vcpu->arch.regs.xer;
+ }
+
+ static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val)
+ {
+- vcpu->arch.ctr = val;
++ vcpu->arch.regs.ctr = val;
+ }
+
+ static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu)
+ {
+- return vcpu->arch.ctr;
++ return vcpu->arch.regs.ctr;
+ }
+
+ static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val)
+ {
+- vcpu->arch.lr = val;
++ vcpu->arch.regs.link = val;
+ }
+
+ static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu)
+ {
+- return vcpu->arch.lr;
++ return vcpu->arch.regs.link;
+ }
+
+ static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val)
+ {
+- vcpu->arch.pc = val;
++ vcpu->arch.regs.nip = val;
+ }
+
+ static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu)
+ {
+- return vcpu->arch.pc;
++ return vcpu->arch.regs.nip;
+ }
+
+ static inline u64 kvmppc_get_msr(struct kvm_vcpu *vcpu);
+diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
+index 38dbcad086d6..dc435a5af7d6 100644
+--- a/arch/powerpc/include/asm/kvm_book3s_64.h
++++ b/arch/powerpc/include/asm/kvm_book3s_64.h
+@@ -483,9 +483,9 @@ static inline u64 sanitize_msr(u64 msr)
+ static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu)
+ {
+ vcpu->arch.cr = vcpu->arch.cr_tm;
+- vcpu->arch.xer = vcpu->arch.xer_tm;
+- vcpu->arch.lr = vcpu->arch.lr_tm;
+- vcpu->arch.ctr = vcpu->arch.ctr_tm;
++ vcpu->arch.regs.xer = vcpu->arch.xer_tm;
++ vcpu->arch.regs.link = vcpu->arch.lr_tm;
++ vcpu->arch.regs.ctr = vcpu->arch.ctr_tm;
+ vcpu->arch.amr = vcpu->arch.amr_tm;
+ vcpu->arch.ppr = vcpu->arch.ppr_tm;
+ vcpu->arch.dscr = vcpu->arch.dscr_tm;
+@@ -500,9 +500,9 @@ static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu)
+ static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu)
+ {
+ vcpu->arch.cr_tm = vcpu->arch.cr;
+- vcpu->arch.xer_tm = vcpu->arch.xer;
+- vcpu->arch.lr_tm = vcpu->arch.lr;
+- vcpu->arch.ctr_tm = vcpu->arch.ctr;
++ vcpu->arch.xer_tm = vcpu->arch.regs.xer;
++ vcpu->arch.lr_tm = vcpu->arch.regs.link;
++ vcpu->arch.ctr_tm = vcpu->arch.regs.ctr;
+ vcpu->arch.amr_tm = vcpu->arch.amr;
+ vcpu->arch.ppr_tm = vcpu->arch.ppr;
+ vcpu->arch.dscr_tm = vcpu->arch.dscr;
+diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h
+index f5fc9569ef56..d513e3ed1c65 100644
+--- a/arch/powerpc/include/asm/kvm_booke.h
++++ b/arch/powerpc/include/asm/kvm_booke.h
+@@ -56,12 +56,12 @@ static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
+
+ static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
+ {
+- vcpu->arch.xer = val;
++ vcpu->arch.regs.xer = val;
+ }
+
+ static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu)
+ {
+- return vcpu->arch.xer;
++ return vcpu->arch.regs.xer;
+ }
+
+ static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu)
+@@ -72,32 +72,32 @@ static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu)
+
+ static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val)
+ {
+- vcpu->arch.ctr = val;
++ vcpu->arch.regs.ctr = val;
+ }
+
+ static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu)
+ {
+- return vcpu->arch.ctr;
++ return vcpu->arch.regs.ctr;
+ }
+
+ static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val)
+ {
+- vcpu->arch.lr = val;
++ vcpu->arch.regs.link = val;
+ }
+
+ static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu)
+ {
+- return vcpu->arch.lr;
++ return vcpu->arch.regs.link;
+ }
+
+ static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val)
+ {
+- vcpu->arch.pc = val;
++ vcpu->arch.regs.nip = val;
+ }
+
+ static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu)
+ {
+- return vcpu->arch.pc;
++ return vcpu->arch.regs.nip;
+ }
+
+ static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
+diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
+index 19184c742c16..1dc540b38eba 100644
+--- a/arch/powerpc/include/asm/kvm_host.h
++++ b/arch/powerpc/include/asm/kvm_host.h
+@@ -526,14 +526,10 @@ struct kvm_vcpu_arch {
+ u32 qpr[32];
+ #endif
+
+- ulong pc;
+- ulong ctr;
+- ulong lr;
+ #ifdef CONFIG_PPC_BOOK3S
+ ulong tar;
+ #endif
+
+- ulong xer;
+ u32 cr;
+
+ #ifdef CONFIG_PPC_BOOK3S
+diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
+index 4c374dec043c..aae1592d7c8a 100644
+--- a/arch/powerpc/kernel/asm-offsets.c
++++ b/arch/powerpc/kernel/asm-offsets.c
+@@ -425,14 +425,14 @@ int main(void)
+ #ifdef CONFIG_ALTIVEC
+ OFFSET(VCPU_VRS, kvm_vcpu, arch.vr.vr);
+ #endif
+- OFFSET(VCPU_XER, kvm_vcpu, arch.xer);
+- OFFSET(VCPU_CTR, kvm_vcpu, arch.ctr);
+- OFFSET(VCPU_LR, kvm_vcpu, arch.lr);
++ OFFSET(VCPU_XER, kvm_vcpu, arch.regs.xer);
++ OFFSET(VCPU_CTR, kvm_vcpu, arch.regs.ctr);
++ OFFSET(VCPU_LR, kvm_vcpu, arch.regs.link);
+ #ifdef CONFIG_PPC_BOOK3S
+ OFFSET(VCPU_TAR, kvm_vcpu, arch.tar);
+ #endif
+ OFFSET(VCPU_CR, kvm_vcpu, arch.cr);
+- OFFSET(VCPU_PC, kvm_vcpu, arch.pc);
++ OFFSET(VCPU_PC, kvm_vcpu, arch.regs.nip);
+ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ OFFSET(VCPU_MSR, kvm_vcpu, arch.shregs.msr);
+ OFFSET(VCPU_SRR0, kvm_vcpu, arch.shregs.srr0);
+@@ -689,10 +689,10 @@ int main(void)
+
+ #else /* CONFIG_PPC_BOOK3S */
+ OFFSET(VCPU_CR, kvm_vcpu, arch.cr);
+- OFFSET(VCPU_XER, kvm_vcpu, arch.xer);
+- OFFSET(VCPU_LR, kvm_vcpu, arch.lr);
+- OFFSET(VCPU_CTR, kvm_vcpu, arch.ctr);
+- OFFSET(VCPU_PC, kvm_vcpu, arch.pc);
++ OFFSET(VCPU_XER, kvm_vcpu, arch.regs.xer);
++ OFFSET(VCPU_LR, kvm_vcpu, arch.regs.link);
++ OFFSET(VCPU_CTR, kvm_vcpu, arch.regs.ctr);
++ OFFSET(VCPU_PC, kvm_vcpu, arch.regs.nip);
+ OFFSET(VCPU_SPRG9, kvm_vcpu, arch.sprg9);
+ OFFSET(VCPU_LAST_INST, kvm_vcpu, arch.last_inst);
+ OFFSET(VCPU_FAULT_DEAR, kvm_vcpu, arch.fault_dear);
+diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
+index 1992676c7a94..45c8ea4a0487 100644
+--- a/arch/powerpc/kvm/book3s_32_mmu.c
++++ b/arch/powerpc/kvm/book3s_32_mmu.c
+@@ -52,7 +52,7 @@
+ static inline bool check_debug_ip(struct kvm_vcpu *vcpu)
+ {
+ #ifdef DEBUG_MMU_PTE_IP
+- return vcpu->arch.pc == DEBUG_MMU_PTE_IP;
++ return vcpu->arch.regs.nip == DEBUG_MMU_PTE_IP;
+ #else
+ return true;
+ #endif
+diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
+index 73dd78918e9b..68e8ed80f57c 100644
+--- a/arch/powerpc/kvm/book3s_hv.c
++++ b/arch/powerpc/kvm/book3s_hv.c
+@@ -399,13 +399,13 @@ static void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
+
+ pr_err("vcpu %p (%d):\n", vcpu, vcpu->vcpu_id);
+ pr_err("pc = %.16lx msr = %.16llx trap = %x\n",
+- vcpu->arch.pc, vcpu->arch.shregs.msr, vcpu->arch.trap);
++ vcpu->arch.regs.nip, vcpu->arch.shregs.msr, vcpu->arch.trap);
+ for (r = 0; r < 16; ++r)
+ pr_err("r%2d = %.16lx r%d = %.16lx\n",
+ r, kvmppc_get_gpr(vcpu, r),
+ r+16, kvmppc_get_gpr(vcpu, r+16));
+ pr_err("ctr = %.16lx lr = %.16lx\n",
+- vcpu->arch.ctr, vcpu->arch.lr);
++ vcpu->arch.regs.ctr, vcpu->arch.regs.link);
+ pr_err("srr0 = %.16llx srr1 = %.16llx\n",
+ vcpu->arch.shregs.srr0, vcpu->arch.shregs.srr1);
+ pr_err("sprg0 = %.16llx sprg1 = %.16llx\n",
+@@ -413,7 +413,7 @@ static void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
+ pr_err("sprg2 = %.16llx sprg3 = %.16llx\n",
+ vcpu->arch.shregs.sprg2, vcpu->arch.shregs.sprg3);
+ pr_err("cr = %.8x xer = %.16lx dsisr = %.8x\n",
+- vcpu->arch.cr, vcpu->arch.xer, vcpu->arch.shregs.dsisr);
++ vcpu->arch.cr, vcpu->arch.regs.xer, vcpu->arch.shregs.dsisr);
+ pr_err("dar = %.16llx\n", vcpu->arch.shregs.dar);
+ pr_err("fault dar = %.16lx dsisr = %.8x\n",
+ vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
+diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c
+index bf710ad3a6d7..008285058f9b 100644
+--- a/arch/powerpc/kvm/book3s_hv_tm.c
++++ b/arch/powerpc/kvm/book3s_hv_tm.c
+@@ -19,7 +19,7 @@ static void emulate_tx_failure(struct kvm_vcpu *vcpu, u64 failure_cause)
+ u64 texasr, tfiar;
+ u64 msr = vcpu->arch.shregs.msr;
+
+- tfiar = vcpu->arch.pc & ~0x3ull;
++ tfiar = vcpu->arch.regs.nip & ~0x3ull;
+ texasr = (failure_cause << 56) | TEXASR_ABORT | TEXASR_FS | TEXASR_EXACT;
+ if (MSR_TM_SUSPENDED(vcpu->arch.shregs.msr))
+ texasr |= TEXASR_SUSP;
+@@ -57,8 +57,8 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
+ (newmsr & MSR_TM)));
+ newmsr = sanitize_msr(newmsr);
+ vcpu->arch.shregs.msr = newmsr;
+- vcpu->arch.cfar = vcpu->arch.pc - 4;
+- vcpu->arch.pc = vcpu->arch.shregs.srr0;
++ vcpu->arch.cfar = vcpu->arch.regs.nip - 4;
++ vcpu->arch.regs.nip = vcpu->arch.shregs.srr0;
+ return RESUME_GUEST;
+
+ case PPC_INST_RFEBB:
+@@ -90,8 +90,8 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
+ vcpu->arch.bescr = bescr;
+ msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
+ vcpu->arch.shregs.msr = msr;
+- vcpu->arch.cfar = vcpu->arch.pc - 4;
+- vcpu->arch.pc = vcpu->arch.ebbrr;
++ vcpu->arch.cfar = vcpu->arch.regs.nip - 4;
++ vcpu->arch.regs.nip = vcpu->arch.ebbrr;
+ return RESUME_GUEST;
+
+ case PPC_INST_MTMSRD:
+diff --git a/arch/powerpc/kvm/book3s_hv_tm_builtin.c b/arch/powerpc/kvm/book3s_hv_tm_builtin.c
+index d98ccfd2b88c..b2c7c6fca4f9 100644
+--- a/arch/powerpc/kvm/book3s_hv_tm_builtin.c
++++ b/arch/powerpc/kvm/book3s_hv_tm_builtin.c
+@@ -35,8 +35,8 @@ int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu)
+ return 0;
+ newmsr = sanitize_msr(newmsr);
+ vcpu->arch.shregs.msr = newmsr;
+- vcpu->arch.cfar = vcpu->arch.pc - 4;
+- vcpu->arch.pc = vcpu->arch.shregs.srr0;
++ vcpu->arch.cfar = vcpu->arch.regs.nip - 4;
++ vcpu->arch.regs.nip = vcpu->arch.shregs.srr0;
+ return 1;
+
+ case PPC_INST_RFEBB:
+@@ -58,8 +58,8 @@ int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu)
+ mtspr(SPRN_BESCR, bescr);
+ msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
+ vcpu->arch.shregs.msr = msr;
+- vcpu->arch.cfar = vcpu->arch.pc - 4;
+- vcpu->arch.pc = mfspr(SPRN_EBBRR);
++ vcpu->arch.cfar = vcpu->arch.regs.nip - 4;
++ vcpu->arch.regs.nip = mfspr(SPRN_EBBRR);
+ return 1;
+
+ case PPC_INST_MTMSRD:
+@@ -103,7 +103,7 @@ int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu)
+ void kvmhv_emulate_tm_rollback(struct kvm_vcpu *vcpu)
+ {
+ vcpu->arch.shregs.msr &= ~MSR_TS_MASK; /* go to N state */
+- vcpu->arch.pc = vcpu->arch.tfhar;
++ vcpu->arch.regs.nip = vcpu->arch.tfhar;
+ copy_from_checkpoint(vcpu);
+ vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 0xa0000000;
+ }
+diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
+index 899bc9a02ab5..67061d399cd9 100644
+--- a/arch/powerpc/kvm/book3s_pr.c
++++ b/arch/powerpc/kvm/book3s_pr.c
+@@ -162,10 +162,10 @@ void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu)
+ svcpu->gpr[12] = vcpu->arch.regs.gpr[12];
+ svcpu->gpr[13] = vcpu->arch.regs.gpr[13];
+ svcpu->cr = vcpu->arch.cr;
+- svcpu->xer = vcpu->arch.xer;
+- svcpu->ctr = vcpu->arch.ctr;
+- svcpu->lr = vcpu->arch.lr;
+- svcpu->pc = vcpu->arch.pc;
++ svcpu->xer = vcpu->arch.regs.xer;
++ svcpu->ctr = vcpu->arch.regs.ctr;
++ svcpu->lr = vcpu->arch.regs.link;
++ svcpu->pc = vcpu->arch.regs.nip;
+ #ifdef CONFIG_PPC_BOOK3S_64
+ svcpu->shadow_fscr = vcpu->arch.shadow_fscr;
+ #endif
+@@ -209,10 +209,10 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu)
+ vcpu->arch.regs.gpr[12] = svcpu->gpr[12];
+ vcpu->arch.regs.gpr[13] = svcpu->gpr[13];
+ vcpu->arch.cr = svcpu->cr;
+- vcpu->arch.xer = svcpu->xer;
+- vcpu->arch.ctr = svcpu->ctr;
+- vcpu->arch.lr = svcpu->lr;
+- vcpu->arch.pc = svcpu->pc;
++ vcpu->arch.regs.xer = svcpu->xer;
++ vcpu->arch.regs.ctr = svcpu->ctr;
++ vcpu->arch.regs.link = svcpu->lr;
++ vcpu->arch.regs.nip = svcpu->pc;
+ vcpu->arch.shadow_srr1 = svcpu->shadow_srr1;
+ vcpu->arch.fault_dar = svcpu->fault_dar;
+ vcpu->arch.fault_dsisr = svcpu->fault_dsisr;
+diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
+index 071b87ee682f..0521910be3e2 100644
+--- a/arch/powerpc/kvm/booke.c
++++ b/arch/powerpc/kvm/booke.c
+@@ -77,8 +77,10 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
+ {
+ int i;
+
+- printk("pc: %08lx msr: %08llx\n", vcpu->arch.pc, vcpu->arch.shared->msr);
+- printk("lr: %08lx ctr: %08lx\n", vcpu->arch.lr, vcpu->arch.ctr);
++ printk("pc: %08lx msr: %08llx\n", vcpu->arch.regs.nip,
++ vcpu->arch.shared->msr);
++ printk("lr: %08lx ctr: %08lx\n", vcpu->arch.regs.link,
++ vcpu->arch.regs.ctr);
+ printk("srr0: %08llx srr1: %08llx\n", vcpu->arch.shared->srr0,
+ vcpu->arch.shared->srr1);
+
+@@ -484,24 +486,25 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
+ if (allowed) {
+ switch (int_class) {
+ case INT_CLASS_NONCRIT:
+- set_guest_srr(vcpu, vcpu->arch.pc,
++ set_guest_srr(vcpu, vcpu->arch.regs.nip,
+ vcpu->arch.shared->msr);
+ break;
+ case INT_CLASS_CRIT:
+- set_guest_csrr(vcpu, vcpu->arch.pc,
++ set_guest_csrr(vcpu, vcpu->arch.regs.nip,
+ vcpu->arch.shared->msr);
+ break;
+ case INT_CLASS_DBG:
+- set_guest_dsrr(vcpu, vcpu->arch.pc,
++ set_guest_dsrr(vcpu, vcpu->arch.regs.nip,
+ vcpu->arch.shared->msr);
+ break;
+ case INT_CLASS_MC:
+- set_guest_mcsrr(vcpu, vcpu->arch.pc,
++ set_guest_mcsrr(vcpu, vcpu->arch.regs.nip,
+ vcpu->arch.shared->msr);
+ break;
+ }
+
+- vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
++ vcpu->arch.regs.nip = vcpu->arch.ivpr |
++ vcpu->arch.ivor[priority];
+ if (update_esr == true)
+ kvmppc_set_esr(vcpu, vcpu->arch.queued_esr);
+ if (update_dear == true)
+@@ -819,7 +822,7 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
+
+ case EMULATE_FAIL:
+ printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
+- __func__, vcpu->arch.pc, vcpu->arch.last_inst);
++ __func__, vcpu->arch.regs.nip, vcpu->arch.last_inst);
+ /* For debugging, encode the failing instruction and
+ * report it to userspace. */
+ run->hw.hardware_exit_reason = ~0ULL << 32;
+@@ -868,7 +871,7 @@ static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu)
+ */
+ vcpu->arch.dbsr = 0;
+ run->debug.arch.status = 0;
+- run->debug.arch.address = vcpu->arch.pc;
++ run->debug.arch.address = vcpu->arch.regs.nip;
+
+ if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
+ run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
+@@ -964,7 +967,7 @@ static int kvmppc_resume_inst_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+
+ case EMULATE_FAIL:
+ pr_debug("%s: load instruction from guest address %lx failed\n",
+- __func__, vcpu->arch.pc);
++ __func__, vcpu->arch.regs.nip);
+ /* For debugging, encode the failing instruction and
+ * report it to userspace. */
+ run->hw.hardware_exit_reason = ~0ULL << 32;
+@@ -1162,7 +1165,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ case BOOKE_INTERRUPT_SPE_FP_DATA:
+ case BOOKE_INTERRUPT_SPE_FP_ROUND:
+ printk(KERN_CRIT "%s: unexpected SPE interrupt %u at %08lx\n",
+- __func__, exit_nr, vcpu->arch.pc);
++ __func__, exit_nr, vcpu->arch.regs.nip);
+ run->hw.hardware_exit_reason = exit_nr;
+ r = RESUME_HOST;
+ break;
+@@ -1292,7 +1295,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ }
+
+ case BOOKE_INTERRUPT_ITLB_MISS: {
+- unsigned long eaddr = vcpu->arch.pc;
++ unsigned long eaddr = vcpu->arch.regs.nip;
+ gpa_t gpaddr;
+ gfn_t gfn;
+ int gtlb_index;
+@@ -1384,7 +1387,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+ int i;
+ int r;
+
+- vcpu->arch.pc = 0;
++ vcpu->arch.regs.nip = 0;
+ vcpu->arch.shared->pir = vcpu->vcpu_id;
+ kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */
+ kvmppc_set_msr(vcpu, 0);
+@@ -1432,10 +1435,10 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+ {
+ int i;
+
+- regs->pc = vcpu->arch.pc;
++ regs->pc = vcpu->arch.regs.nip;
+ regs->cr = kvmppc_get_cr(vcpu);
+- regs->ctr = vcpu->arch.ctr;
+- regs->lr = vcpu->arch.lr;
++ regs->ctr = vcpu->arch.regs.ctr;
++ regs->lr = vcpu->arch.regs.link;
+ regs->xer = kvmppc_get_xer(vcpu);
+ regs->msr = vcpu->arch.shared->msr;
+ regs->srr0 = kvmppc_get_srr0(vcpu);
+@@ -1460,10 +1463,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+ {
+ int i;
+
+- vcpu->arch.pc = regs->pc;
++ vcpu->arch.regs.nip = regs->pc;
+ kvmppc_set_cr(vcpu, regs->cr);
+- vcpu->arch.ctr = regs->ctr;
+- vcpu->arch.lr = regs->lr;
++ vcpu->arch.regs.ctr = regs->ctr;
++ vcpu->arch.regs.link = regs->lr;
+ kvmppc_set_xer(vcpu, regs->xer);
+ kvmppc_set_msr(vcpu, regs->msr);
+ kvmppc_set_srr0(vcpu, regs->srr0);
+diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
+index a82f64502de1..d23e582f0fee 100644
+--- a/arch/powerpc/kvm/booke_emulate.c
++++ b/arch/powerpc/kvm/booke_emulate.c
+@@ -34,19 +34,19 @@
+
+ static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
+ {
+- vcpu->arch.pc = vcpu->arch.shared->srr0;
++ vcpu->arch.regs.nip = vcpu->arch.shared->srr0;
+ kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1);
+ }
+
+ static void kvmppc_emul_rfdi(struct kvm_vcpu *vcpu)
+ {
+- vcpu->arch.pc = vcpu->arch.dsrr0;
++ vcpu->arch.regs.nip = vcpu->arch.dsrr0;
+ kvmppc_set_msr(vcpu, vcpu->arch.dsrr1);
+ }
+
+ static void kvmppc_emul_rfci(struct kvm_vcpu *vcpu)
+ {
+- vcpu->arch.pc = vcpu->arch.csrr0;
++ vcpu->arch.regs.nip = vcpu->arch.csrr0;
+ kvmppc_set_msr(vcpu, vcpu->arch.csrr1);
+ }
+
+diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
+index 8f871fb75228..3f8189eb56ed 100644
+--- a/arch/powerpc/kvm/e500_emulate.c
++++ b/arch/powerpc/kvm/e500_emulate.c
+@@ -94,7 +94,7 @@ static int kvmppc_e500_emul_ehpriv(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ switch (get_oc(inst)) {
+ case EHPRIV_OC_DEBUG:
+ run->exit_reason = KVM_EXIT_DEBUG;
+- run->debug.arch.address = vcpu->arch.pc;
++ run->debug.arch.address = vcpu->arch.regs.nip;
+ run->debug.arch.status = 0;
+ kvmppc_account_exit(vcpu, DEBUG_EXITS);
+ emulated = EMULATE_EXIT_USER;
+diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
+index ddbf8f0284c0..24296f4cadc6 100644
+--- a/arch/powerpc/kvm/e500_mmu.c
++++ b/arch/powerpc/kvm/e500_mmu.c
+@@ -513,7 +513,7 @@ void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu)
+ {
+ unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS);
+
+- kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.pc, as);
++ kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.regs.nip, as);
+ }
+
+ void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu)
+--
+2.13.7
+
diff --git a/patches.arch/KVM-PPC-Use-seq_puts-in-kvmppc_exit_timing_show.patch b/patches.arch/KVM-PPC-Use-seq_puts-in-kvmppc_exit_timing_show.patch
new file mode 100644
index 0000000000..bf7b35604e
--- /dev/null
+++ b/patches.arch/KVM-PPC-Use-seq_puts-in-kvmppc_exit_timing_show.patch
@@ -0,0 +1,38 @@
+From 1627301020cb460f5a74e13c291f8db3b2a8062e Mon Sep 17 00:00:00 2001
+From: Markus Elfring <elfring@users.sourceforge.net>
+Date: Sun, 7 Jan 2018 10:07:36 +0100
+Subject: [PATCH] KVM: PPC: Use seq_puts() in kvmppc_exit_timing_show()
+
+References: bsc#1061840
+Patch-mainline: v4.16-rc1
+Git-commit: 1627301020cb460f5a74e13c291f8db3b2a8062e
+
+A headline should be quickly put into a sequence. Thus use the
+function "seq_puts" instead of "seq_printf" for this purpose.
+
+This issue was detected by using the Coccinelle software.
+
+Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/timing.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
+index e44d2b2ea97e..1c03c978eb18 100644
+--- a/arch/powerpc/kvm/timing.c
++++ b/arch/powerpc/kvm/timing.c
+@@ -143,8 +143,7 @@ static int kvmppc_exit_timing_show(struct seq_file *m, void *private)
+ int i;
+ u64 min, max, sum, sum_quad;
+
+- seq_printf(m, "%s", "type count min max sum sum_squared\n");
+-
++ seq_puts(m, "type count min max sum sum_squared\n");
+
+ for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
+
+--
+2.13.7
+
diff --git a/patches.arch/powerpc-introduce-get_mm_addr_key-helper.patch b/patches.arch/powerpc-introduce-get_mm_addr_key-helper.patch
index fc37d5653d..09a6d835b4 100644
--- a/patches.arch/powerpc-introduce-get_mm_addr_key-helper.patch
+++ b/patches.arch/powerpc-introduce-get_mm_addr_key-helper.patch
@@ -1,7 +1,7 @@
-From cba5b07de8cfcd2a78b81acc6f13f426554aa4a8 Mon Sep 17 00:00:00 2001
+From 087003e9ef7c1c5bec932387e47511429eff2a54 Mon Sep 17 00:00:00 2001
From: Ram Pai <linuxram@us.ibm.com>
Date: Thu, 18 Jan 2018 17:50:41 -0800
-Subject: [PATCH 39/45] powerpc: introduce get_mm_addr_key() helper
+Subject: [PATCH] powerpc: introduce get_mm_addr_key() helper
References: FATE#322447, bsc#1078248
Patch-mainline: v4.16-rc1
@@ -19,10 +19,10 @@ Acked-by: Michal Suchanek <msuchanek@suse.de>
2 files changed, 33 insertions(+)
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
-index 78260409dc9c..f437799feb5e 100644
+index 6364f5c2cc3e..bb38312cff28 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
-@@ -259,6 +259,15 @@ static inline bool early_radix_enabled(void)
+@@ -260,6 +260,15 @@ static inline bool early_radix_enabled(void)
}
#endif
@@ -39,10 +39,10 @@ index 78260409dc9c..f437799feb5e 100644
/* The kernel use the constants below to index in the page sizes array.
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
-index bd2a8a5907e4..08b5201e6846 100644
+index a78e24cf93ff..462c34e7b01d 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
-@@ -1573,6 +1573,30 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
+@@ -1581,6 +1581,30 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
local_irq_restore(flags);
}
@@ -61,7 +61,7 @@ index bd2a8a5907e4..08b5201e6846 100644
+ return 0;
+
+ local_irq_save(flags);
-+ ptep = find_linux_pte_or_hugepte(mm->pgd, address, NULL, NULL);
++ ptep = find_linux_pte(mm->pgd, address, NULL, NULL);
+ if (ptep)
+ pkey = pte_to_pkey_bits(pte_val(READ_ONCE(*ptep)));
+ local_irq_restore(flags);
@@ -74,5 +74,5 @@ index bd2a8a5907e4..08b5201e6846 100644
static inline void tm_flush_hash_page(int local)
{
--
-2.13.6
+2.13.7
diff --git a/patches.arch/powerpc-kvm-Switch-kvm-pmd-allocator-to-custom-alloc.patch b/patches.arch/powerpc-kvm-Switch-kvm-pmd-allocator-to-custom-alloc.patch
new file mode 100644
index 0000000000..33f8ca9b3f
--- /dev/null
+++ b/patches.arch/powerpc-kvm-Switch-kvm-pmd-allocator-to-custom-alloc.patch
@@ -0,0 +1,117 @@
+From 21828c99ee91bec94c3d2c32b3d5562ffdea980a Mon Sep 17 00:00:00 2001
+From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Date: Mon, 16 Apr 2018 16:57:15 +0530
+Subject: [PATCH] powerpc/kvm: Switch kvm pmd allocator to custom allocator
+
+References: bsc#1061840
+Patch-mainline: v4.18-rc1
+Git-commit: 21828c99ee91bec94c3d2c32b3d5562ffdea980a
+
+In the next set of patches, we will switch pmd allocator to use page fragments
+and the locking will be updated to split pmd ptlock. We want to avoid using
+fragments for partition-scoped table. Use slab cache similar to level 4 table
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_64_mmu_radix.c | 36 +++++++++++++++++++++++++++++-----
+ 1 file changed, 31 insertions(+), 5 deletions(-)
+
+diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+index a57eafec4dc2..ccdf3761eec0 100644
+--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
++++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+@@ -200,6 +200,7 @@ void kvmppc_radix_set_pte_at(struct kvm *kvm, unsigned long addr,
+ }
+
+ static struct kmem_cache *kvm_pte_cache;
++static struct kmem_cache *kvm_pmd_cache;
+
+ static pte_t *kvmppc_pte_alloc(void)
+ {
+@@ -217,6 +218,16 @@ static inline int pmd_is_leaf(pmd_t pmd)
+ return !!(pmd_val(pmd) & _PAGE_PTE);
+ }
+
++static pmd_t *kvmppc_pmd_alloc(void)
++{
++ return kmem_cache_alloc(kvm_pmd_cache, GFP_KERNEL);
++}
++
++static void kvmppc_pmd_free(pmd_t *pmdp)
++{
++ kmem_cache_free(kvm_pmd_cache, pmdp);
++}
++
+ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
+ unsigned int level, unsigned long mmu_seq)
+ {
+@@ -239,7 +250,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
+ if (pud && pud_present(*pud) && !pud_huge(*pud))
+ pmd = pmd_offset(pud, gpa);
+ else if (level <= 1)
+- new_pmd = pmd_alloc_one(kvm->mm, gpa);
++ new_pmd = kvmppc_pmd_alloc();
+
+ if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd)))
+ new_ptep = kvmppc_pte_alloc();
+@@ -382,7 +393,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
+ if (new_pud)
+ pud_free(kvm->mm, new_pud);
+ if (new_pmd)
+- pmd_free(kvm->mm, new_pmd);
++ kvmppc_pmd_free(new_pmd);
+ if (new_ptep)
+ kvmppc_pte_free(new_ptep);
+ return ret;
+@@ -758,7 +769,7 @@ void kvmppc_free_radix(struct kvm *kvm)
+ kvmppc_pte_free(pte);
+ pmd_clear(pmd);
+ }
+- pmd_free(kvm->mm, pmd_offset(pud, 0));
++ kvmppc_pmd_free(pmd_offset(pud, 0));
+ pud_clear(pud);
+ }
+ pud_free(kvm->mm, pud_offset(pgd, 0));
+@@ -770,20 +781,35 @@ void kvmppc_free_radix(struct kvm *kvm)
+
+ static void pte_ctor(void *addr)
+ {
+- memset(addr, 0, PTE_TABLE_SIZE);
++ memset(addr, 0, RADIX_PTE_TABLE_SIZE);
++}
++
++static void pmd_ctor(void *addr)
++{
++ memset(addr, 0, RADIX_PMD_TABLE_SIZE);
+ }
+
+ int kvmppc_radix_init(void)
+ {
+- unsigned long size = sizeof(void *) << PTE_INDEX_SIZE;
++ unsigned long size = sizeof(void *) << RADIX_PTE_INDEX_SIZE;
+
+ kvm_pte_cache = kmem_cache_create("kvm-pte", size, size, 0, pte_ctor);
+ if (!kvm_pte_cache)
+ return -ENOMEM;
++
++ size = sizeof(void *) << RADIX_PMD_INDEX_SIZE;
++
++ kvm_pmd_cache = kmem_cache_create("kvm-pmd", size, size, 0, pmd_ctor);
++ if (!kvm_pmd_cache) {
++ kmem_cache_destroy(kvm_pte_cache);
++ return -ENOMEM;
++ }
++
+ return 0;
+ }
+
+ void kvmppc_radix_exit(void)
+ {
+ kmem_cache_destroy(kvm_pte_cache);
++ kmem_cache_destroy(kvm_pmd_cache);
+ }
+--
+2.13.7
+
diff --git a/patches.arch/powerpc-kvm-booke-Fix-altivec-related-build-break.patch b/patches.arch/powerpc-kvm-booke-Fix-altivec-related-build-break.patch
new file mode 100644
index 0000000000..c9d818debf
--- /dev/null
+++ b/patches.arch/powerpc-kvm-booke-Fix-altivec-related-build-break.patch
@@ -0,0 +1,44 @@
+From b2d7ecbe355698010a6b7a15eb179e09eb3d6a34 Mon Sep 17 00:00:00 2001
+From: Laurentiu Tudor <laurentiu.tudor@nxp.com>
+Date: Thu, 26 Apr 2018 15:33:19 +0300
+Subject: [PATCH] powerpc/kvm/booke: Fix altivec related build break
+
+References: bsc#1061840
+Patch-mainline: v4.17-rc3
+Git-commit: b2d7ecbe355698010a6b7a15eb179e09eb3d6a34
+
+Add missing "altivec unavailable" interrupt injection helper
+thus fixing the linker error below:
+
+ arch/powerpc/kvm/emulate_loadstore.o: In function `kvmppc_check_altivec_disabled':
+ arch/powerpc/kvm/emulate_loadstore.c: undefined reference to `.kvmppc_core_queue_vec_unavail'
+
+Fixes: 09f984961c137c4b ("KVM: PPC: Book3S: Add MMIO emulation for VMX instructions")
+Signed-off-by: Laurentiu Tudor <laurentiu.tudor@nxp.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/booke.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
+index 6038e2e7aee0..876d4f294fdd 100644
+--- a/arch/powerpc/kvm/booke.c
++++ b/arch/powerpc/kvm/booke.c
+@@ -305,6 +305,13 @@ void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu)
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL);
+ }
+
++#ifdef CONFIG_ALTIVEC
++void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu)
++{
++ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALTIVEC_UNAVAIL);
++}
++#endif
++
+ void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
+ {
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER);
+--
+2.13.7
+
diff --git a/patches.arch/powerpc-mm-Rename-find_linux_pte_or_hugepte.patch b/patches.arch/powerpc-mm-Rename-find_linux_pte_or_hugepte.patch
new file mode 100644
index 0000000000..74276d34df
--- /dev/null
+++ b/patches.arch/powerpc-mm-Rename-find_linux_pte_or_hugepte.patch
@@ -0,0 +1,518 @@
+From 2916d472c2b7cff1bfc770906432417771b9f1ea Mon Sep 17 00:00:00 2001
+From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Date: Thu, 27 Jul 2017 11:54:53 +0530
+Subject: [PATCH] powerpc/mm: Rename find_linux_pte_or_hugepte()
+
+References: bsc#1061840
+Patch-mainline: v4.14-rc1
+Git-commit: 94171b19c3f1f4d9d4c0e3aaa1aa161def1ec7ea
+
+Add newer helpers to make the function usage simpler. It is always
+recommended to use find_current_mm_pte() for walking the page table.
+If we cannot use find_current_mm_pte(), it should be documented why
+the said usage of __find_linux_pte() is safe against a parallel THP
+split.
+
+For now we have KVM code using __find_linux_pte(). This is because kvm
+code ends up calling __find_linux_pte() in real mode with MSR_EE=0 but
+with PACA soft_enabled = 1. We may want to fix that later and make
+sure we keep the MSR_EE and PACA soft_enabled in sync. When we do that
+we can switch kvm to use find_linux_pte().
+
+[note: the exact equivalent of find_linux_pte_or_hugepte() is find_linux_pte().
+Although find_current_mm_pte() is the recommended interface it may not work in
+all cases as it adds more checks. We have additional occurence in
+follow_huge_addr() which is removed upstream. We are missing the core
+infrastructure that replaces it.]
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/include/asm/pgtable.h | 10 +---------
+ arch/powerpc/include/asm/pte-walk.h | 35 ++++++++++++++++++++++++++++++++++
+ arch/powerpc/kernel/eeh.c | 4 ++--
+ arch/powerpc/kernel/io-workarounds.c | 5 +++--
+ arch/powerpc/kvm/book3s_64_mmu_hv.c | 5 +++--
+ arch/powerpc/kvm/book3s_64_mmu_radix.c | 28 +++++++++++++--------------
+ arch/powerpc/kvm/book3s_64_vio_hv.c | 12 +++++++++++-
+ arch/powerpc/kvm/book3s_hv_rm_mmu.c | 18 ++++++++---------
+ arch/powerpc/kvm/e500_mmu_host.c | 3 ++-
+ arch/powerpc/mm/hash_utils_64.c | 5 +++--
+ arch/powerpc/mm/hugetlbpage.c | 24 +++++++++++++----------
+ arch/powerpc/mm/tlb_hash64.c | 6 ++++--
+ arch/powerpc/perf/callchain.c | 3 ++-
+ 13 files changed, 103 insertions(+), 55 deletions(-)
+ create mode 100644 arch/powerpc/include/asm/pte-walk.h
+
+diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
+index dd01212935ac..9fa263ad7cb3 100644
+--- a/arch/powerpc/include/asm/pgtable.h
++++ b/arch/powerpc/include/asm/pgtable.h
+@@ -66,16 +66,8 @@ extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
+ #ifndef CONFIG_TRANSPARENT_HUGEPAGE
+ #define pmd_large(pmd) 0
+ #endif
+-pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
+- bool *is_thp, unsigned *shift);
+-static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
+- bool *is_thp, unsigned *shift)
+-{
+- VM_WARN(!arch_irqs_disabled(),
+- "%s called with irq enabled\n", __func__);
+- return __find_linux_pte_or_hugepte(pgdir, ea, is_thp, shift);
+-}
+
++/* can we use this in kvm */
+ unsigned long vmalloc_to_phys(void *vmalloc_addr);
+
+ void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
+diff --git a/arch/powerpc/include/asm/pte-walk.h b/arch/powerpc/include/asm/pte-walk.h
+new file mode 100644
+index 000000000000..2d633e9d686c
+--- /dev/null
++++ b/arch/powerpc/include/asm/pte-walk.h
+@@ -0,0 +1,35 @@
++#ifndef _ASM_POWERPC_PTE_WALK_H
++#define _ASM_POWERPC_PTE_WALK_H
++
++#include <linux/sched.h>
++
++/* Don't use this directly */
++extern pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
++ bool *is_thp, unsigned *hshift);
++
++static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea,
++ bool *is_thp, unsigned *hshift)
++{
++ VM_WARN(!arch_irqs_disabled(), "%s called with irq enabled\n", __func__);
++ return __find_linux_pte(pgdir, ea, is_thp, hshift);
++}
++
++static inline pte_t *find_init_mm_pte(unsigned long ea, unsigned *hshift)
++{
++ pgd_t *pgdir = init_mm.pgd;
++ return __find_linux_pte(pgdir, ea, NULL, hshift);
++}
++/*
++ * This is what we should always use. Any other lockless page table lookup needs
++ * careful audit against THP split.
++ */
++static inline pte_t *find_current_mm_pte(pgd_t *pgdir, unsigned long ea,
++ bool *is_thp, unsigned *hshift)
++{
++ VM_WARN(!arch_irqs_disabled(), "%s called with irq enabled\n", __func__);
++ VM_WARN(pgdir != current->mm->pgd,
++ "%s lock less page table lookup called on wrong mm\n", __func__);
++ return __find_linux_pte(pgdir, ea, is_thp, hshift);
++}
++
++#endif /* _ASM_POWERPC_PTE_WALK_H */
+diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
+index 63992b2d8e15..5e6887c40528 100644
+--- a/arch/powerpc/kernel/eeh.c
++++ b/arch/powerpc/kernel/eeh.c
+@@ -44,6 +44,7 @@
+ #include <asm/machdep.h>
+ #include <asm/ppc-pci.h>
+ #include <asm/rtas.h>
++#include <asm/pte-walk.h>
+
+
+ /** Overview:
+@@ -352,8 +353,7 @@ static inline unsigned long eeh_token_to_phys(unsigned long token)
+ * worried about _PAGE_SPLITTING/collapse. Also we will not hit
+ * page table free, because of init_mm.
+ */
+- ptep = __find_linux_pte_or_hugepte(init_mm.pgd, token,
+- NULL, &hugepage_shift);
++ ptep = find_init_mm_pte(token, &hugepage_shift);
+ if (!ptep)
+ return token;
+ WARN_ON(hugepage_shift);
+diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
+index a582e0d42525..bbe85f5aea71 100644
+--- a/arch/powerpc/kernel/io-workarounds.c
++++ b/arch/powerpc/kernel/io-workarounds.c
+@@ -19,6 +19,8 @@
+ #include <asm/pgtable.h>
+ #include <asm/ppc-pci.h>
+ #include <asm/io-workarounds.h>
++#include <asm/pte-walk.h>
++
+
+ #define IOWA_MAX_BUS 8
+
+@@ -75,8 +77,7 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
+ * We won't find huge pages here (iomem). Also can't hit
+ * a page table free due to init_mm
+ */
+- ptep = __find_linux_pte_or_hugepte(init_mm.pgd, vaddr,
+- NULL, &hugepage_shift);
++ ptep = find_init_mm_pte(vaddr, &hugepage_shift);
+ if (ptep == NULL)
+ paddr = 0;
+ else {
+diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
+index 1c10e26cebbb..33de69da4464 100644
+--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
++++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
+@@ -37,6 +37,7 @@
+ #include <asm/synch.h>
+ #include <asm/ppc-opcode.h>
+ #include <asm/cputable.h>
++#include <asm/pte-walk.h>
+
+ #include "trace_hv.h"
+
+@@ -599,8 +600,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ * hugepage split and collapse.
+ */
+ local_irq_save(flags);
+- ptep = find_linux_pte_or_hugepte(current->mm->pgd,
+- hva, NULL, NULL);
++ ptep = find_current_mm_pte(current->mm->pgd,
++ hva, NULL, NULL);
+ if (ptep) {
+ pte = kvmppc_read_update_linux_pte(ptep, 1);
+ if (__pte_write(pte))
+diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+index 6d677c79eeb1..c5d7435455f1 100644
+--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
++++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+@@ -17,6 +17,7 @@
+ #include <asm/mmu.h>
+ #include <asm/pgtable.h>
+ #include <asm/pgalloc.h>
++#include <asm/pte-walk.h>
+
+ /*
+ * Supported radix tree geometry.
+@@ -359,8 +360,7 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ if (writing)
+ pgflags |= _PAGE_DIRTY;
+ local_irq_save(flags);
+- ptep = __find_linux_pte_or_hugepte(current->mm->pgd, hva,
+- NULL, NULL);
++ ptep = find_current_mm_pte(current->mm->pgd, hva, NULL, NULL);
+ if (ptep) {
+ pte = READ_ONCE(*ptep);
+ if (pte_present(pte) &&
+@@ -374,8 +374,12 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ spin_unlock(&kvm->mmu_lock);
+ return RESUME_GUEST;
+ }
+- ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable,
+- gpa, NULL, &shift);
++ /*
++ * We are walking the secondary page table here. We can do this
++ * without disabling irq.
++ */
++ ptep = __find_linux_pte(kvm->arch.pgtable,
++ gpa, NULL, &shift);
+ if (ptep && pte_present(*ptep)) {
+ kvmppc_radix_update_pte(kvm, ptep, 0, pgflags,
+ gpa, shift);
+@@ -427,8 +431,8 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ pgflags |= _PAGE_WRITE;
+ } else {
+ local_irq_save(flags);
+- ptep = __find_linux_pte_or_hugepte(current->mm->pgd,
+- hva, NULL, NULL);
++ ptep = find_current_mm_pte(current->mm->pgd,
++ hva, NULL, NULL);
+ if (ptep && pte_write(*ptep) && pte_dirty(*ptep))
+ pgflags |= _PAGE_WRITE;
+ local_irq_restore(flags);
+@@ -499,8 +503,7 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ unsigned int shift;
+ unsigned long old;
+
+- ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa,
+- NULL, &shift);
++ ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
+ if (ptep && pte_present(*ptep)) {
+ old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 0,
+ gpa, shift);
+@@ -525,8 +528,7 @@ int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ unsigned int shift;
+ int ref = 0;
+
+- ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa,
+- NULL, &shift);
++ ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
+ if (ptep && pte_present(*ptep) && pte_young(*ptep)) {
+ kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0,
+ gpa, shift);
+@@ -545,8 +547,7 @@ int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ unsigned int shift;
+ int ref = 0;
+
+- ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa,
+- NULL, &shift);
++ ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
+ if (ptep && pte_present(*ptep) && pte_young(*ptep))
+ ref = 1;
+ return ref;
+@@ -562,8 +563,7 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm,
+ unsigned int shift;
+ int ret = 0;
+
+- ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa,
+- NULL, &shift);
++ ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
+ if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) {
+ ret = 1;
+ if (shift)
+diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
+index 3adfd2f5301c..c32e9bfe75b1 100644
+--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
++++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
+@@ -39,6 +39,7 @@
+ #include <asm/udbg.h>
+ #include <asm/iommu.h>
+ #include <asm/tce.h>
++#include <asm/pte-walk.h>
+
+ #ifdef CONFIG_BUG
+
+@@ -353,7 +354,16 @@ static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu,
+ pte_t *ptep, pte;
+ unsigned shift = 0;
+
+- ptep = __find_linux_pte_or_hugepte(vcpu->arch.pgdir, ua, NULL, &shift);
++ /*
++ * Called in real mode with MSR_EE = 0. We are safe here.
++ * It is ok to do the lookup with arch.pgdir here, because
++ * we are doing this on secondary cpus and current task there
++ * is not the hypervisor. Also this is safe against THP in the
++ * host, because an IPI to primary thread will wait for the secondary
++ * to exit which will agains result in the below page table walk
++ * to finish.
++ */
++ ptep = __find_linux_pte(vcpu->arch.pgdir, ua, NULL, &shift);
+ if (!ptep || !pte_present(*ptep))
+ return -ENXIO;
+ pte = *ptep;
+diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+index 584c74c8119f..fedb0139524c 100644
+--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
++++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+@@ -22,6 +22,7 @@
+ #include <asm/hvcall.h>
+ #include <asm/synch.h>
+ #include <asm/ppc-opcode.h>
++#include <asm/pte-walk.h>
+
+ /* Translate address of a vmalloc'd thing to a linear map address */
+ static void *real_vmalloc_addr(void *x)
+@@ -31,9 +32,9 @@ static void *real_vmalloc_addr(void *x)
+ /*
+ * assume we don't have huge pages in vmalloc space...
+ * So don't worry about THP collapse/split. Called
+- * Only in realmode, hence won't need irq_save/restore.
++ * Only in realmode with MSR_EE = 0, hence won't need irq_save/restore.
+ */
+- p = __find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL, NULL);
++ p = find_init_mm_pte(addr, NULL);
+ if (!p || !pte_present(*p))
+ return NULL;
+ addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK);
+@@ -230,14 +231,13 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
+ * If we had a page table table change after lookup, we would
+ * retry via mmu_notifier_retry.
+ */
+- if (realmode)
+- ptep = __find_linux_pte_or_hugepte(pgdir, hva, NULL,
+- &hpage_shift);
+- else {
++ if (!realmode)
+ local_irq_save(irq_flags);
+- ptep = find_linux_pte_or_hugepte(pgdir, hva, NULL,
+- &hpage_shift);
+- }
++ /*
++ * If called in real mode we have MSR_EE = 0. Otherwise
++ * we disable irq above.
++ */
++ ptep = __find_linux_pte(pgdir, hva, NULL, &hpage_shift);
+ if (ptep) {
+ pte_t pte;
+ unsigned int host_pte_size;
+diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
+index 77fd043b3ecc..c6c734424c70 100644
+--- a/arch/powerpc/kvm/e500_mmu_host.c
++++ b/arch/powerpc/kvm/e500_mmu_host.c
+@@ -30,6 +30,7 @@
+ #include <linux/vmalloc.h>
+ #include <linux/hugetlb.h>
+ #include <asm/kvm_ppc.h>
++#include <asm/pte-walk.h>
+
+ #include "e500.h"
+ #include "timing.h"
+@@ -476,7 +477,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
+ * can't run hence pfn won't change.
+ */
+ local_irq_save(flags);
+- ptep = find_linux_pte_or_hugepte(pgdir, hva, NULL, NULL);
++ ptep = find_linux_pte(pgdir, hva, NULL, NULL);
+ if (ptep) {
+ pte_t pte = READ_ONCE(*ptep);
+
+diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
+index a93137c358ea..bdc178915bfc 100644
+--- a/arch/powerpc/mm/hash_utils_64.c
++++ b/arch/powerpc/mm/hash_utils_64.c
+@@ -61,6 +61,7 @@
+ #include <asm/tm.h>
+ #include <asm/trace.h>
+ #include <asm/ps3.h>
++#include <asm/pte-walk.h>
+
+ #ifdef DEBUG
+ #define DBG(fmt...) udbg_printf(fmt)
+@@ -1298,7 +1299,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
+ #endif /* CONFIG_PPC_64K_PAGES */
+
+ /* Get PTE and page size from page tables */
+- ptep = __find_linux_pte_or_hugepte(pgdir, ea, &is_thp, &hugeshift);
++ ptep = find_linux_pte(pgdir, ea, &is_thp, &hugeshift);
+ if (ptep == NULL || !pte_present(*ptep)) {
+ DBG_LOW(" no PTE !\n");
+ rc = 1;
+@@ -1527,7 +1528,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
+ * THP pages use update_mmu_cache_pmd. We don't do
+ * hash preload there. Hence can ignore THP here
+ */
+- ptep = find_linux_pte_or_hugepte(pgdir, ea, NULL, &hugepage_shift);
++ ptep = find_current_mm_pte(pgdir, ea, NULL, &hugepage_shift);
+ if (!ptep)
+ goto out_exit;
+
+diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
+index 58ca476f2314..656b84ec29da 100644
+--- a/arch/powerpc/mm/hugetlbpage.c
++++ b/arch/powerpc/mm/hugetlbpage.c
+@@ -22,6 +22,8 @@
+ #include <asm/tlb.h>
+ #include <asm/setup.h>
+ #include <asm/hugetlb.h>
++#include <asm/pte-walk.h>
++
+
+ #ifdef CONFIG_HUGETLB_PAGE
+
+@@ -57,8 +59,11 @@ static unsigned nr_gpages;
+
+ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+ {
+- /* Only called for hugetlbfs pages, hence can ignore THP */
+- return __find_linux_pte_or_hugepte(mm->pgd, addr, NULL, NULL);
++ /*
++ * Only called for hugetlbfs pages, hence can ignore THP and the
++ * irq disabled walk.
++ */
++ return __find_linux_pte(mm->pgd, addr, NULL, NULL);
+ }
+
+ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
+@@ -906,9 +911,8 @@ void flush_dcache_icache_hugepage(struct page *page)
+ * This function need to be called with interrupts disabled. We use this variant
+ * when we have MSR[EE] = 0 but the paca->soft_enabled = 1
+ */
+-
+-pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
+- bool *is_thp, unsigned *shift)
++pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
++ bool *is_thp, unsigned *hpage_shift)
+ {
+ pgd_t pgd, *pgdp;
+ pud_t pud, *pudp;
+@@ -917,8 +921,8 @@ pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
+ hugepd_t *hpdp = NULL;
+ unsigned pdshift = PGDIR_SHIFT;
+
+- if (shift)
+- *shift = 0;
++ if (hpage_shift)
++ *hpage_shift = 0;
+
+ if (is_thp)
+ *is_thp = false;
+@@ -988,11 +992,11 @@ pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
+ ret_pte = hugepte_offset(*hpdp, ea, pdshift);
+ pdshift = hugepd_shift(*hpdp);
+ out:
+- if (shift)
+- *shift = pdshift;
++ if (hpage_shift)
++ *hpage_shift = pdshift;
+ return ret_pte;
+ }
+-EXPORT_SYMBOL_GPL(__find_linux_pte_or_hugepte);
++EXPORT_SYMBOL_GPL(__find_linux_pte);
+
+ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
+ unsigned long end, int write, struct page **pages, int *nr)
+diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
+index 4517aa43a8b1..b3e6116b4317 100644
+--- a/arch/powerpc/mm/tlb_hash64.c
++++ b/arch/powerpc/mm/tlb_hash64.c
+@@ -29,6 +29,8 @@
+ #include <asm/tlbflush.h>
+ #include <asm/tlb.h>
+ #include <asm/bug.h>
++#include <asm/pte-walk.h>
++
+
+ #include <trace/events/thp.h>
+
+@@ -209,8 +211,8 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
+ local_irq_save(flags);
+ arch_enter_lazy_mmu_mode();
+ for (; start < end; start += PAGE_SIZE) {
+- pte_t *ptep = find_linux_pte_or_hugepte(mm->pgd, start, &is_thp,
+- &hugepage_shift);
++ pte_t *ptep = find_current_mm_pte(mm->pgd, start, &is_thp,
++ &hugepage_shift);
+ unsigned long pte;
+
+ if (ptep == NULL)
+diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
+index 0fc26714780a..0af051a1974e 100644
+--- a/arch/powerpc/perf/callchain.c
++++ b/arch/powerpc/perf/callchain.c
+@@ -22,6 +22,7 @@
+ #ifdef CONFIG_PPC64
+ #include "../kernel/ppc32.h"
+ #endif
++#include <asm/pte-walk.h>
+
+
+ /*
+@@ -127,7 +128,7 @@ static int read_user_stack_slow(void __user *ptr, void *buf, int nb)
+ return -EFAULT;
+
+ local_irq_save(flags);
+- ptep = find_linux_pte_or_hugepte(pgdir, addr, NULL, &shift);
++ ptep = find_current_mm_pte(pgdir, addr, NULL, &shift);
+ if (!ptep)
+ goto err_out;
+ if (!shift)
+--
+2.13.7
+
+diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
+index c0546d629918..9a242724b318 100644
+--- a/arch/powerpc/mm/hugetlbpage.c
++++ b/arch/powerpc/mm/hugetlbpage.c
+@@ -635,7 +635,7 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
+ struct page *page = ERR_PTR(-EINVAL);
+
+ local_irq_save(flags);
+- ptep = find_linux_pte_or_hugepte(mm->pgd, address, &is_thp, &shift);
++ ptep = find_linux_pte(mm->pgd, address, &is_thp, &shift);
+ if (!ptep)
+ goto no_page;
+ pte = READ_ONCE(*ptep);
diff --git a/patches.arch/powerpc-powernv-Add-indirect-levels-to-it_userspace.patch b/patches.arch/powerpc-powernv-Add-indirect-levels-to-it_userspace.patch
new file mode 100644
index 0000000000..dba0ab3f0c
--- /dev/null
+++ b/patches.arch/powerpc-powernv-Add-indirect-levels-to-it_userspace.patch
@@ -0,0 +1,415 @@
+From 2551de6d5b70c46cdc650f4b73add0acccd66a7a Mon Sep 17 00:00:00 2001
+From: Alexey Kardashevskiy <aik@ozlabs.ru>
+Date: Wed, 4 Jul 2018 16:13:47 +1000
+Subject: [PATCH] powerpc/powernv: Add indirect levels to it_userspace
+
+References: bsc#1061840
+Patch-mainline: v4.19-rc1
+Git-commit: 090bad39b237aad92d8e01baa033699cf0c81cbe
+
+We want to support sparse memory and therefore huge chunks of DMA windows
+do not need to be mapped. If a DMA window big enough to require 2 or more
+indirect levels, and a DMA window is used to map all RAM (which is
+a default case for 64bit window), we can actually save some memory by
+not allocation TCE for regions which we are not going to map anyway.
+
+The hardware tables alreary support indirect levels but we also keep
+host-physical-to-userspace translation array which is allocated by
+vmalloc() and is a flat array which might use quite some memory.
+
+This converts it_userspace from vmalloc'ed array to a multi level table.
+
+As the format becomes platform dependend, this replaces the direct access
+to it_usespace with a iommu_table_ops::useraddrptr hook which returns
+a pointer to the userspace copy of a TCE; future extension will return
+NULL if the level was not allocated.
+
+This should not change non-KVM handling of TCE tables and it_userspace
+will not be allocated for non-KVM tables.
+
+Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
+Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/include/asm/iommu.h | 6 +--
+ arch/powerpc/kvm/book3s_64_vio_hv.c | 8 ----
+ arch/powerpc/platforms/powernv/pci-ioda-tce.c | 65 +++++++++++++++++++++------
+ arch/powerpc/platforms/powernv/pci-ioda.c | 23 +++++++---
+ arch/powerpc/platforms/powernv/pci.h | 3 +-
+ drivers/vfio/vfio_iommu_spapr_tce.c | 46 -------------------
+ 6 files changed, 73 insertions(+), 78 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
+index 470124740864..f9751159297e 100644
+--- a/arch/powerpc/include/asm/iommu.h
++++ b/arch/powerpc/include/asm/iommu.h
+@@ -69,6 +69,8 @@ struct iommu_table_ops {
+ long index,
+ unsigned long *hpa,
+ enum dma_data_direction *direction);
++
++ __be64 *(*useraddrptr)(struct iommu_table *tbl, long index);
+ #endif
+ void (*clear)(struct iommu_table *tbl,
+ long index, long npages);
+@@ -123,9 +125,7 @@ struct iommu_table {
+ };
+
+ #define IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry) \
+- ((tbl)->it_userspace ? \
+- &((tbl)->it_userspace[(entry) - (tbl)->it_offset]) : \
+- NULL)
++ ((tbl)->it_ops->useraddrptr((tbl), (entry)))
+
+ /* Pure 2^n version of get_order */
+ static inline __attribute_const__
+diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
+index d5f269aedb7e..3df526dabdab 100644
+--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
++++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
+@@ -206,10 +206,6 @@ static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
+ /* it_userspace allocation might be delayed */
+ return H_TOO_HARD;
+
+- pua = (void *) vmalloc_to_phys(pua);
+- if (WARN_ON_ONCE_RM(!pua))
+- return H_HARDWARE;
+-
+ mem = mm_iommu_lookup_rm(kvm->mm, be64_to_cpu(*pua), pgsize);
+ if (!mem)
+ return H_TOO_HARD;
+@@ -283,10 +279,6 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
+ &hpa)))
+ return H_HARDWARE;
+
+- pua = (void *) vmalloc_to_phys(pua);
+- if (WARN_ON_ONCE_RM(!pua))
+- return H_HARDWARE;
+-
+ if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem)))
+ return H_CLOSED;
+
+diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
+index 726b8693f5ae..88cecc1815d9 100644
+--- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c
++++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
+@@ -31,9 +31,9 @@ void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
+ tbl->it_type = TCE_PCI;
+ }
+
+-static __be64 *pnv_tce(struct iommu_table *tbl, long idx)
++static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx)
+ {
+- __be64 *tmp = ((__be64 *)tbl->it_base);
++ __be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base;
+ int level = tbl->it_indirect_levels;
+ const long shift = ilog2(tbl->it_level_size);
+ unsigned long mask = (tbl->it_level_size - 1) << (level * shift);
+@@ -67,7 +67,7 @@ int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
+ ((rpn + i) << tbl->it_page_shift);
+ unsigned long idx = index - tbl->it_offset + i;
+
+- *(pnv_tce(tbl, idx)) = cpu_to_be64(newtce);
++ *(pnv_tce(tbl, false, idx)) = cpu_to_be64(newtce);
+ }
+
+ return 0;
+@@ -86,12 +86,21 @@ int pnv_tce_xchg(struct iommu_table *tbl, long index,
+ if (newtce & TCE_PCI_WRITE)
+ newtce |= TCE_PCI_READ;
+
+- oldtce = be64_to_cpu(xchg(pnv_tce(tbl, idx), cpu_to_be64(newtce)));
++ oldtce = be64_to_cpu(xchg(pnv_tce(tbl, false, idx),
++ cpu_to_be64(newtce)));
+ *hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
+ *direction = iommu_tce_direction(oldtce);
+
+ return 0;
+ }
++
++__be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index)
++{
++ if (WARN_ON_ONCE(!tbl->it_userspace))
++ return NULL;
++
++ return pnv_tce(tbl, true, index - tbl->it_offset);
++}
+ #endif
+
+ void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
+@@ -101,13 +110,15 @@ void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
+ for (i = 0; i < npages; i++) {
+ unsigned long idx = index - tbl->it_offset + i;
+
+- *(pnv_tce(tbl, idx)) = cpu_to_be64(0);
++ *(pnv_tce(tbl, false, idx)) = cpu_to_be64(0);
+ }
+ }
+
+ unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
+ {
+- return be64_to_cpu(*(pnv_tce(tbl, index - tbl->it_offset)));
++ __be64 *ptce = pnv_tce(tbl, false, index - tbl->it_offset);
++
++ return be64_to_cpu(*ptce);
+ }
+
+ static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
+@@ -144,6 +155,10 @@ void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl)
+
+ pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size,
+ tbl->it_indirect_levels);
++ if (tbl->it_userspace) {
++ pnv_pci_ioda2_table_do_free_pages(tbl->it_userspace, size,
++ tbl->it_indirect_levels);
++ }
+ }
+
+ static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned int shift,
+@@ -191,10 +206,11 @@ static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned int shift,
+
+ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
+ __u32 page_shift, __u64 window_size, __u32 levels,
+- struct iommu_table *tbl)
++ bool alloc_userspace_copy, struct iommu_table *tbl)
+ {
+- void *addr;
++ void *addr, *uas = NULL;
+ unsigned long offset = 0, level_shift, total_allocated = 0;
++ unsigned long total_allocated_uas = 0;
+ const unsigned int window_shift = ilog2(window_size);
+ unsigned int entries_shift = window_shift - page_shift;
+ unsigned int table_shift = max_t(unsigned int, entries_shift + 3,
+@@ -228,10 +244,20 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
+ * we did not allocate as much as we wanted,
+ * release partially allocated table.
+ */
+- if (offset < tce_table_size) {
+- pnv_pci_ioda2_table_do_free_pages(addr,
+- 1ULL << (level_shift - 3), levels - 1);
+- return -ENOMEM;
++ if (offset < tce_table_size)
++ goto free_tces_exit;
++
++ /* Allocate userspace view of the TCE table */
++ if (alloc_userspace_copy) {
++ offset = 0;
++ uas = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
++ levels, tce_table_size, &offset,
++ &total_allocated_uas);
++ if (!uas)
++ goto free_tces_exit;
++ if (offset < tce_table_size ||
++ total_allocated_uas != total_allocated)
++ goto free_uas_exit;
+ }
+
+ /* Setup linux iommu table */
+@@ -240,11 +266,22 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
+ tbl->it_level_size = 1ULL << (level_shift - 3);
+ tbl->it_indirect_levels = levels - 1;
+ tbl->it_allocated_size = total_allocated;
++ tbl->it_userspace = uas;
+
+- pr_devel("Created TCE table: ws=%08llx ts=%lx @%08llx\n",
+- window_size, tce_table_size, bus_offset);
++ pr_debug("Created TCE table: ws=%08llx ts=%lx @%08llx base=%lx uas=%p levels=%d\n",
++ window_size, tce_table_size, bus_offset, tbl->it_base,
++ tbl->it_userspace, levels);
+
+ return 0;
++
++free_uas_exit:
++ pnv_pci_ioda2_table_do_free_pages(uas,
++ 1ULL << (level_shift - 3), levels - 1);
++free_tces_exit:
++ pnv_pci_ioda2_table_do_free_pages(addr,
++ 1ULL << (level_shift - 3), levels - 1);
++
++ return -ENOMEM;
+ }
+
+ static void pnv_iommu_table_group_link_free(struct rcu_head *head)
+diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
+index 732250618a50..375cd823145b 100644
+--- a/arch/powerpc/platforms/powernv/pci-ioda.c
++++ b/arch/powerpc/platforms/powernv/pci-ioda.c
+@@ -1881,6 +1881,7 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = {
+ #ifdef CONFIG_IOMMU_API
+ .exchange = pnv_ioda1_tce_xchg,
+ .exchange_rm = pnv_ioda1_tce_xchg_rm,
++ .useraddrptr = pnv_tce_useraddrptr,
+ #endif
+ .clear = pnv_ioda1_tce_free,
+ .get = pnv_tce_get,
+@@ -2050,6 +2051,7 @@ static struct iommu_table_ops pnv_ioda2_iommu_ops = {
+ #ifdef CONFIG_IOMMU_API
+ .exchange = pnv_ioda2_tce_xchg,
+ .exchange_rm = pnv_ioda2_tce_xchg_rm,
++ .useraddrptr = pnv_tce_useraddrptr,
+ #endif
+ .clear = pnv_ioda2_tce_free,
+ .get = pnv_tce_get,
+@@ -2305,7 +2307,7 @@ void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
+
+ static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group,
+ int num, __u32 page_shift, __u64 window_size, __u32 levels,
+- struct iommu_table **ptbl)
++ bool alloc_userspace_copy, struct iommu_table **ptbl)
+ {
+ struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+ table_group);
+@@ -2322,7 +2324,7 @@ static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group,
+
+ ret = pnv_pci_ioda2_table_alloc_pages(nid,
+ bus_offset, page_shift, window_size,
+- levels, tbl);
++ levels, alloc_userspace_copy, tbl);
+ if (ret) {
+ iommu_tce_table_put(tbl);
+ return ret;
+@@ -2355,7 +2357,7 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe)
+ rc = pnv_pci_ioda2_create_table(&pe->table_group, 0,
+ IOMMU_PAGE_SHIFT_4K,
+ window_size,
+- POWERNV_IOMMU_DEFAULT_LEVELS, &tbl);
++ POWERNV_IOMMU_DEFAULT_LEVELS, false, &tbl);
+ if (rc) {
+ pe_err(pe, "Failed to create 32-bit TCE table, err %ld",
+ rc);
+@@ -2443,7 +2445,16 @@ static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
+ tce_table_size, direct_table_size);
+ }
+
+- return bytes;
++ return bytes + bytes; /* one for HW table, one for userspace copy */
++}
++
++static long pnv_pci_ioda2_create_table_userspace(
++ struct iommu_table_group *table_group,
++ int num, __u32 page_shift, __u64 window_size, __u32 levels,
++ struct iommu_table **ptbl)
++{
++ return pnv_pci_ioda2_create_table(table_group,
++ num, page_shift, window_size, levels, true, ptbl);
+ }
+
+ static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
+@@ -2472,7 +2483,7 @@ static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group)
+
+ static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
+ .get_table_size = pnv_pci_ioda2_get_table_size,
+- .create_table = pnv_pci_ioda2_create_table,
++ .create_table = pnv_pci_ioda2_create_table_userspace,
+ .set_window = pnv_pci_ioda2_set_window,
+ .unset_window = pnv_pci_ioda2_unset_window,
+ .take_ownership = pnv_ioda2_take_ownership,
+@@ -2556,7 +2567,7 @@ static void pnv_ioda2_npu_take_ownership(struct iommu_table_group *table_group)
+
+ static struct iommu_table_group_ops pnv_pci_ioda2_npu_ops = {
+ .get_table_size = pnv_pci_ioda2_get_table_size,
+- .create_table = pnv_pci_ioda2_create_table,
++ .create_table = pnv_pci_ioda2_create_table_userspace,
+ .set_window = pnv_pci_ioda2_npu_set_window,
+ .unset_window = pnv_pci_ioda2_npu_unset_window,
+ .take_ownership = pnv_ioda2_npu_take_ownership,
+diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
+index e1ab2a433d21..7a0f417af47d 100644
+--- a/arch/powerpc/platforms/powernv/pci.h
++++ b/arch/powerpc/platforms/powernv/pci.h
+@@ -260,11 +260,12 @@ extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
+ extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages);
+ extern int pnv_tce_xchg(struct iommu_table *tbl, long index,
+ unsigned long *hpa, enum dma_data_direction *direction);
++extern __be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index);
+ extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index);
+
+ extern long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
+ __u32 page_shift, __u64 window_size, __u32 levels,
+- struct iommu_table *tbl);
++ bool alloc_userspace_copy, struct iommu_table *tbl);
+ extern void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl);
+
+ extern long pnv_pci_link_table_and_group(int node, int num,
+diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
+index a78974e1fee6..b61ada8f5a33 100644
+--- a/drivers/vfio/vfio_iommu_spapr_tce.c
++++ b/drivers/vfio/vfio_iommu_spapr_tce.c
+@@ -211,44 +211,6 @@ static long tce_iommu_register_pages(struct tce_container *container,
+ return 0;
+ }
+
+-static long tce_iommu_userspace_view_alloc(struct iommu_table *tbl,
+- struct mm_struct *mm)
+-{
+- unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) *
+- tbl->it_size, PAGE_SIZE);
+- unsigned long *uas;
+- long ret;
+-
+- BUG_ON(tbl->it_userspace);
+-
+- ret = try_increment_locked_vm(mm, cb >> PAGE_SHIFT);
+- if (ret)
+- return ret;
+-
+- uas = vzalloc(cb);
+- if (!uas) {
+- decrement_locked_vm(mm, cb >> PAGE_SHIFT);
+- return -ENOMEM;
+- }
+- tbl->it_userspace = (__be64 *) uas;
+-
+- return 0;
+-}
+-
+-static void tce_iommu_userspace_view_free(struct iommu_table *tbl,
+- struct mm_struct *mm)
+-{
+- unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) *
+- tbl->it_size, PAGE_SIZE);
+-
+- if (!tbl->it_userspace)
+- return;
+-
+- vfree(tbl->it_userspace);
+- tbl->it_userspace = NULL;
+- decrement_locked_vm(mm, cb >> PAGE_SHIFT);
+-}
+-
+ static bool tce_page_is_contained(struct page *page, unsigned page_shift)
+ {
+ /*
+@@ -597,12 +559,6 @@ static long tce_iommu_build_v2(struct tce_container *container,
+ unsigned long hpa;
+ enum dma_data_direction dirtmp;
+
+- if (!tbl->it_userspace) {
+- ret = tce_iommu_userspace_view_alloc(tbl, container->mm);
+- if (ret)
+- return ret;
+- }
+-
+ for (i = 0; i < pages; ++i) {
+ struct mm_iommu_table_group_mem_t *mem = NULL;
+ __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry + i);
+@@ -683,7 +639,6 @@ static void tce_iommu_free_table(struct tce_container *container,
+ {
+ unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT;
+
+- tce_iommu_userspace_view_free(tbl, container->mm);
+ iommu_tce_table_put(tbl);
+ decrement_locked_vm(container->mm, pages);
+ }
+@@ -1198,7 +1153,6 @@ static void tce_iommu_release_ownership(struct tce_container *container,
+ continue;
+
+ tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
+- tce_iommu_userspace_view_free(tbl, container->mm);
+ if (tbl->it_map)
+ iommu_release_ownership(tbl);
+
+--
+2.13.7
+
diff --git a/patches.arch/powerpc-powernv-Move-TCE-manupulation-code-to-its-ow.patch b/patches.arch/powerpc-powernv-Move-TCE-manupulation-code-to-its-ow.patch
new file mode 100644
index 0000000000..7cd037ac68
--- /dev/null
+++ b/patches.arch/powerpc-powernv-Move-TCE-manupulation-code-to-its-ow.patch
@@ -0,0 +1,783 @@
+From e1c4cc85fde0b60c09390dae2777e17c48eb25c2 Mon Sep 17 00:00:00 2001
+From: Alexey Kardashevskiy <aik@ozlabs.ru>
+Date: Wed, 4 Jul 2018 16:13:45 +1000
+Subject: [PATCH] powerpc/powernv: Move TCE manupulation code to its own file
+
+References: bsc#1061840
+Patch-mainline: v4.19-rc1
+Git-commit: 191c22879fbcfd98a7fe9a51786ef41253b1549b
+
+Right now we have allocation code in pci-ioda.c and traversing code in
+pci.c, let's keep them toghether. However both files are big enough
+already so let's move this business to a new file.
+
+While we at it, move the code which links IOMMU table groups to
+IOMMU tables as it is not specific to any PNV PHB model.
+
+These puts exported symbols from the new file together.
+
+This fixes several warnings from checkpatch.pl like this:
+"WARNING: Prefer 'unsigned int' to bare use of 'unsigned'".
+
+As this is almost cut-n-paste, there should be no behavioral change.
+
+Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
+Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/platforms/powernv/Makefile | 2 +-
+ arch/powerpc/platforms/powernv/pci-ioda-tce.c | 313 ++++++++++++++++++++++++++
+ arch/powerpc/platforms/powernv/pci-ioda.c | 146 ------------
+ arch/powerpc/platforms/powernv/pci.c | 158 -------------
+ arch/powerpc/platforms/powernv/pci.h | 41 ++--
+ 5 files changed, 340 insertions(+), 320 deletions(-)
+ create mode 100644 arch/powerpc/platforms/powernv/pci-ioda-tce.c
+
+diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
+index 177b3d4542b5..be975390c4a6 100644
+--- a/arch/powerpc/platforms/powernv/Makefile
++++ b/arch/powerpc/platforms/powernv/Makefile
+@@ -5,7 +5,7 @@ obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
+ obj-y += opal-kmsg.o opal-powercap.o opal-psr.o opal-sensor-groups.o
+
+ obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o
+-obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o
++obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o pci-ioda-tce.o
+ obj-$(CONFIG_CXL_BASE) += pci-cxl.o
+ obj-$(CONFIG_EEH) += eeh-powernv.o
+ obj-$(CONFIG_PPC_SCOM) += opal-xscom.o
+diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
+new file mode 100644
+index 000000000000..726b8693f5ae
+--- /dev/null
++++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
+@@ -0,0 +1,313 @@
++// SPDX-License-Identifier: GPL-2.0+
++/*
++ * TCE helpers for IODA PCI/PCIe on PowerNV platforms
++ *
++ * Copyright 2018 IBM Corp.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version
++ * 2 of the License, or (at your option) any later version.
++ */
++
++#include <linux/kernel.h>
++#include <linux/iommu.h>
++
++#include <asm/iommu.h>
++#include <asm/tce.h>
++#include "pci.h"
++
++void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
++ void *tce_mem, u64 tce_size,
++ u64 dma_offset, unsigned int page_shift)
++{
++ tbl->it_blocksize = 16;
++ tbl->it_base = (unsigned long)tce_mem;
++ tbl->it_page_shift = page_shift;
++ tbl->it_offset = dma_offset >> tbl->it_page_shift;
++ tbl->it_index = 0;
++ tbl->it_size = tce_size >> 3;
++ tbl->it_busno = 0;
++ tbl->it_type = TCE_PCI;
++}
++
++static __be64 *pnv_tce(struct iommu_table *tbl, long idx)
++{
++ __be64 *tmp = ((__be64 *)tbl->it_base);
++ int level = tbl->it_indirect_levels;
++ const long shift = ilog2(tbl->it_level_size);
++ unsigned long mask = (tbl->it_level_size - 1) << (level * shift);
++
++ while (level) {
++ int n = (idx & mask) >> (level * shift);
++ unsigned long tce = be64_to_cpu(tmp[n]);
++
++ tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE));
++ idx &= ~mask;
++ mask >>= shift;
++ --level;
++ }
++
++ return tmp + idx;
++}
++
++int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
++ unsigned long uaddr, enum dma_data_direction direction,
++ unsigned long attrs)
++{
++ u64 proto_tce = iommu_direction_to_tce_perm(direction);
++ u64 rpn = __pa(uaddr) >> tbl->it_page_shift;
++ long i;
++
++ if (proto_tce & TCE_PCI_WRITE)
++ proto_tce |= TCE_PCI_READ;
++
++ for (i = 0; i < npages; i++) {
++ unsigned long newtce = proto_tce |
++ ((rpn + i) << tbl->it_page_shift);
++ unsigned long idx = index - tbl->it_offset + i;
++
++ *(pnv_tce(tbl, idx)) = cpu_to_be64(newtce);
++ }
++
++ return 0;
++}
++
++#ifdef CONFIG_IOMMU_API
++int pnv_tce_xchg(struct iommu_table *tbl, long index,
++ unsigned long *hpa, enum dma_data_direction *direction)
++{
++ u64 proto_tce = iommu_direction_to_tce_perm(*direction);
++ unsigned long newtce = *hpa | proto_tce, oldtce;
++ unsigned long idx = index - tbl->it_offset;
++
++ BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl));
++
++ if (newtce & TCE_PCI_WRITE)
++ newtce |= TCE_PCI_READ;
++
++ oldtce = be64_to_cpu(xchg(pnv_tce(tbl, idx), cpu_to_be64(newtce)));
++ *hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
++ *direction = iommu_tce_direction(oldtce);
++
++ return 0;
++}
++#endif
++
++void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
++{
++ long i;
++
++ for (i = 0; i < npages; i++) {
++ unsigned long idx = index - tbl->it_offset + i;
++
++ *(pnv_tce(tbl, idx)) = cpu_to_be64(0);
++ }
++}
++
++unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
++{
++ return be64_to_cpu(*(pnv_tce(tbl, index - tbl->it_offset)));
++}
++
++static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
++ unsigned long size, unsigned int levels)
++{
++ const unsigned long addr_ul = (unsigned long) addr &
++ ~(TCE_PCI_READ | TCE_PCI_WRITE);
++
++ if (levels) {
++ long i;
++ u64 *tmp = (u64 *) addr_ul;
++
++ for (i = 0; i < size; ++i) {
++ unsigned long hpa = be64_to_cpu(tmp[i]);
++
++ if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE)))
++ continue;
++
++ pnv_pci_ioda2_table_do_free_pages(__va(hpa), size,
++ levels - 1);
++ }
++ }
++
++ free_pages(addr_ul, get_order(size << 3));
++}
++
++void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl)
++{
++ const unsigned long size = tbl->it_indirect_levels ?
++ tbl->it_level_size : tbl->it_size;
++
++ if (!tbl->it_size)
++ return;
++
++ pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size,
++ tbl->it_indirect_levels);
++}
++
++static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned int shift,
++ unsigned int levels, unsigned long limit,
++ unsigned long *current_offset, unsigned long *total_allocated)
++{
++ struct page *tce_mem = NULL;
++ __be64 *addr, *tmp;
++ unsigned int order = max_t(unsigned int, shift, PAGE_SHIFT) -
++ PAGE_SHIFT;
++ unsigned long allocated = 1UL << (order + PAGE_SHIFT);
++ unsigned int entries = 1UL << (shift - 3);
++ long i;
++
++ tce_mem = alloc_pages_node(nid, GFP_KERNEL, order);
++ if (!tce_mem) {
++ pr_err("Failed to allocate a TCE memory, order=%d\n", order);
++ return NULL;
++ }
++ addr = page_address(tce_mem);
++ memset(addr, 0, allocated);
++ *total_allocated += allocated;
++
++ --levels;
++ if (!levels) {
++ *current_offset += allocated;
++ return addr;
++ }
++
++ for (i = 0; i < entries; ++i) {
++ tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift,
++ levels, limit, current_offset, total_allocated);
++ if (!tmp)
++ break;
++
++ addr[i] = cpu_to_be64(__pa(tmp) |
++ TCE_PCI_READ | TCE_PCI_WRITE);
++
++ if (*current_offset >= limit)
++ break;
++ }
++
++ return addr;
++}
++
++long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
++ __u32 page_shift, __u64 window_size, __u32 levels,
++ struct iommu_table *tbl)
++{
++ void *addr;
++ unsigned long offset = 0, level_shift, total_allocated = 0;
++ const unsigned int window_shift = ilog2(window_size);
++ unsigned int entries_shift = window_shift - page_shift;
++ unsigned int table_shift = max_t(unsigned int, entries_shift + 3,
++ PAGE_SHIFT);
++ const unsigned long tce_table_size = 1UL << table_shift;
++
++ if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS))
++ return -EINVAL;
++
++ if (!is_power_of_2(window_size))
++ return -EINVAL;
++
++ /* Adjust direct table size from window_size and levels */
++ entries_shift = (entries_shift + levels - 1) / levels;
++ level_shift = entries_shift + 3;
++ level_shift = max_t(unsigned int, level_shift, PAGE_SHIFT);
++
++ if ((level_shift - 3) * levels + page_shift >= 60)
++ return -EINVAL;
++
++ /* Allocate TCE table */
++ addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
++ levels, tce_table_size, &offset, &total_allocated);
++
++ /* addr==NULL means that the first level allocation failed */
++ if (!addr)
++ return -ENOMEM;
++
++ /*
++ * First level was allocated but some lower level failed as
++ * we did not allocate as much as we wanted,
++ * release partially allocated table.
++ */
++ if (offset < tce_table_size) {
++ pnv_pci_ioda2_table_do_free_pages(addr,
++ 1ULL << (level_shift - 3), levels - 1);
++ return -ENOMEM;
++ }
++
++ /* Setup linux iommu table */
++ pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset,
++ page_shift);
++ tbl->it_level_size = 1ULL << (level_shift - 3);
++ tbl->it_indirect_levels = levels - 1;
++ tbl->it_allocated_size = total_allocated;
++
++ pr_devel("Created TCE table: ws=%08llx ts=%lx @%08llx\n",
++ window_size, tce_table_size, bus_offset);
++
++ return 0;
++}
++
++static void pnv_iommu_table_group_link_free(struct rcu_head *head)
++{
++ struct iommu_table_group_link *tgl = container_of(head,
++ struct iommu_table_group_link, rcu);
++
++ kfree(tgl);
++}
++
++void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
++ struct iommu_table_group *table_group)
++{
++ long i;
++ bool found;
++ struct iommu_table_group_link *tgl;
++
++ if (!tbl || !table_group)
++ return;
++
++ /* Remove link to a group from table's list of attached groups */
++ found = false;
++ list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
++ if (tgl->table_group == table_group) {
++ list_del_rcu(&tgl->next);
++ call_rcu(&tgl->rcu, pnv_iommu_table_group_link_free);
++ found = true;
++ break;
++ }
++ }
++ if (WARN_ON(!found))
++ return;
++
++ /* Clean a pointer to iommu_table in iommu_table_group::tables[] */
++ found = false;
++ for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
++ if (table_group->tables[i] == tbl) {
++ table_group->tables[i] = NULL;
++ found = true;
++ break;
++ }
++ }
++ WARN_ON(!found);
++}
++
++long pnv_pci_link_table_and_group(int node, int num,
++ struct iommu_table *tbl,
++ struct iommu_table_group *table_group)
++{
++ struct iommu_table_group_link *tgl = NULL;
++
++ if (WARN_ON(!tbl || !table_group))
++ return -EINVAL;
++
++ tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
++ node);
++ if (!tgl)
++ return -ENOMEM;
++
++ tgl->table_group = table_group;
++ list_add_rcu(&tgl->next, &tbl->it_group_list);
++
++ table_group->tables[num] = tbl;
++
++ return 0;
++}
+diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
+index bf0f7fda0876..732250618a50 100644
+--- a/arch/powerpc/platforms/powernv/pci-ioda.c
++++ b/arch/powerpc/platforms/powernv/pci-ioda.c
+@@ -51,11 +51,7 @@
+ #define PNV_IODA1_M64_SEGS 8 /* Segments per M64 BAR */
+ #define PNV_IODA1_DMA32_SEGSIZE 0x10000000
+
+-#define POWERNV_IOMMU_DEFAULT_LEVELS 1
+-#define POWERNV_IOMMU_MAX_LEVELS 5
+-
+ static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU" };
+-static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl);
+
+ void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
+ const char *fmt, ...)
+@@ -2307,10 +2303,6 @@ void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
+ pe->tce_bypass_enabled = enable;
+ }
+
+-static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
+- __u32 page_shift, __u64 window_size, __u32 levels,
+- struct iommu_table *tbl);
+-
+ static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group,
+ int num, __u32 page_shift, __u64 window_size, __u32 levels,
+ struct iommu_table **ptbl)
+@@ -2598,144 +2590,6 @@ static void pnv_pci_ioda_setup_iommu_api(void)
+ static void pnv_pci_ioda_setup_iommu_api(void) { };
+ #endif
+
+-static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned shift,
+- unsigned levels, unsigned long limit,
+- unsigned long *current_offset, unsigned long *total_allocated)
+-{
+- struct page *tce_mem = NULL;
+- __be64 *addr, *tmp;
+- unsigned order = max_t(unsigned, shift, PAGE_SHIFT) - PAGE_SHIFT;
+- unsigned long allocated = 1UL << (order + PAGE_SHIFT);
+- unsigned entries = 1UL << (shift - 3);
+- long i;
+-
+- tce_mem = alloc_pages_node(nid, GFP_KERNEL, order);
+- if (!tce_mem) {
+- pr_err("Failed to allocate a TCE memory, order=%d\n", order);
+- return NULL;
+- }
+- addr = page_address(tce_mem);
+- memset(addr, 0, allocated);
+- *total_allocated += allocated;
+-
+- --levels;
+- if (!levels) {
+- *current_offset += allocated;
+- return addr;
+- }
+-
+- for (i = 0; i < entries; ++i) {
+- tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift,
+- levels, limit, current_offset, total_allocated);
+- if (!tmp)
+- break;
+-
+- addr[i] = cpu_to_be64(__pa(tmp) |
+- TCE_PCI_READ | TCE_PCI_WRITE);
+-
+- if (*current_offset >= limit)
+- break;
+- }
+-
+- return addr;
+-}
+-
+-static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
+- unsigned long size, unsigned level);
+-
+-static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
+- __u32 page_shift, __u64 window_size, __u32 levels,
+- struct iommu_table *tbl)
+-{
+- void *addr;
+- unsigned long offset = 0, level_shift, total_allocated = 0;
+- const unsigned window_shift = ilog2(window_size);
+- unsigned entries_shift = window_shift - page_shift;
+- unsigned table_shift = max_t(unsigned, entries_shift + 3, PAGE_SHIFT);
+- const unsigned long tce_table_size = 1UL << table_shift;
+-
+- if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS))
+- return -EINVAL;
+-
+- if (!is_power_of_2(window_size))
+- return -EINVAL;
+-
+- /* Adjust direct table size from window_size and levels */
+- entries_shift = (entries_shift + levels - 1) / levels;
+- level_shift = entries_shift + 3;
+- level_shift = max_t(unsigned, level_shift, PAGE_SHIFT);
+-
+- if ((level_shift - 3) * levels + page_shift >= 60)
+- return -EINVAL;
+-
+- /* Allocate TCE table */
+- addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
+- levels, tce_table_size, &offset, &total_allocated);
+-
+- /* addr==NULL means that the first level allocation failed */
+- if (!addr)
+- return -ENOMEM;
+-
+- /*
+- * First level was allocated but some lower level failed as
+- * we did not allocate as much as we wanted,
+- * release partially allocated table.
+- */
+- if (offset < tce_table_size) {
+- pnv_pci_ioda2_table_do_free_pages(addr,
+- 1ULL << (level_shift - 3), levels - 1);
+- return -ENOMEM;
+- }
+-
+- /* Setup linux iommu table */
+- pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset,
+- page_shift);
+- tbl->it_level_size = 1ULL << (level_shift - 3);
+- tbl->it_indirect_levels = levels - 1;
+- tbl->it_allocated_size = total_allocated;
+-
+- pr_devel("Created TCE table: ws=%08llx ts=%lx @%08llx\n",
+- window_size, tce_table_size, bus_offset);
+-
+- return 0;
+-}
+-
+-static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
+- unsigned long size, unsigned level)
+-{
+- const unsigned long addr_ul = (unsigned long) addr &
+- ~(TCE_PCI_READ | TCE_PCI_WRITE);
+-
+- if (level) {
+- long i;
+- u64 *tmp = (u64 *) addr_ul;
+-
+- for (i = 0; i < size; ++i) {
+- unsigned long hpa = be64_to_cpu(tmp[i]);
+-
+- if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE)))
+- continue;
+-
+- pnv_pci_ioda2_table_do_free_pages(__va(hpa), size,
+- level - 1);
+- }
+- }
+-
+- free_pages(addr_ul, get_order(size << 3));
+-}
+-
+-static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl)
+-{
+- const unsigned long size = tbl->it_indirect_levels ?
+- tbl->it_level_size : tbl->it_size;
+-
+- if (!tbl->it_size)
+- return;
+-
+- pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size,
+- tbl->it_indirect_levels);
+-}
+-
+ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
+ struct pnv_ioda_pe *pe)
+ {
+diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
+index 5422f4a6317c..7799b19bb5dd 100644
+--- a/arch/powerpc/platforms/powernv/pci.c
++++ b/arch/powerpc/platforms/powernv/pci.c
+@@ -800,85 +800,6 @@ struct pci_ops pnv_pci_ops = {
+ .write = pnv_pci_write_config,
+ };
+
+-static __be64 *pnv_tce(struct iommu_table *tbl, long idx)
+-{
+- __be64 *tmp = ((__be64 *)tbl->it_base);
+- int level = tbl->it_indirect_levels;
+- const long shift = ilog2(tbl->it_level_size);
+- unsigned long mask = (tbl->it_level_size - 1) << (level * shift);
+-
+- while (level) {
+- int n = (idx & mask) >> (level * shift);
+- unsigned long tce = be64_to_cpu(tmp[n]);
+-
+- tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE));
+- idx &= ~mask;
+- mask >>= shift;
+- --level;
+- }
+-
+- return tmp + idx;
+-}
+-
+-int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
+- unsigned long uaddr, enum dma_data_direction direction,
+- unsigned long attrs)
+-{
+- u64 proto_tce = iommu_direction_to_tce_perm(direction);
+- u64 rpn = __pa(uaddr) >> tbl->it_page_shift;
+- long i;
+-
+- if (proto_tce & TCE_PCI_WRITE)
+- proto_tce |= TCE_PCI_READ;
+-
+- for (i = 0; i < npages; i++) {
+- unsigned long newtce = proto_tce |
+- ((rpn + i) << tbl->it_page_shift);
+- unsigned long idx = index - tbl->it_offset + i;
+-
+- *(pnv_tce(tbl, idx)) = cpu_to_be64(newtce);
+- }
+-
+- return 0;
+-}
+-
+-#ifdef CONFIG_IOMMU_API
+-int pnv_tce_xchg(struct iommu_table *tbl, long index,
+- unsigned long *hpa, enum dma_data_direction *direction)
+-{
+- u64 proto_tce = iommu_direction_to_tce_perm(*direction);
+- unsigned long newtce = *hpa | proto_tce, oldtce;
+- unsigned long idx = index - tbl->it_offset;
+-
+- BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl));
+-
+- if (newtce & TCE_PCI_WRITE)
+- newtce |= TCE_PCI_READ;
+-
+- oldtce = be64_to_cpu(xchg(pnv_tce(tbl, idx), cpu_to_be64(newtce)));
+- *hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
+- *direction = iommu_tce_direction(oldtce);
+-
+- return 0;
+-}
+-#endif
+-
+-void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
+-{
+- long i;
+-
+- for (i = 0; i < npages; i++) {
+- unsigned long idx = index - tbl->it_offset + i;
+-
+- *(pnv_tce(tbl, idx)) = cpu_to_be64(0);
+- }
+-}
+-
+-unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
+-{
+- return be64_to_cpu(*(pnv_tce(tbl, index - tbl->it_offset)));
+-}
+-
+ struct iommu_table *pnv_pci_table_alloc(int nid)
+ {
+ struct iommu_table *tbl;
+@@ -893,85 +814,6 @@ struct iommu_table *pnv_pci_table_alloc(int nid)
+ return tbl;
+ }
+
+-long pnv_pci_link_table_and_group(int node, int num,
+- struct iommu_table *tbl,
+- struct iommu_table_group *table_group)
+-{
+- struct iommu_table_group_link *tgl = NULL;
+-
+- if (WARN_ON(!tbl || !table_group))
+- return -EINVAL;
+-
+- tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
+- node);
+- if (!tgl)
+- return -ENOMEM;
+-
+- tgl->table_group = table_group;
+- list_add_rcu(&tgl->next, &tbl->it_group_list);
+-
+- table_group->tables[num] = tbl;
+-
+- return 0;
+-}
+-
+-static void pnv_iommu_table_group_link_free(struct rcu_head *head)
+-{
+- struct iommu_table_group_link *tgl = container_of(head,
+- struct iommu_table_group_link, rcu);
+-
+- kfree(tgl);
+-}
+-
+-void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
+- struct iommu_table_group *table_group)
+-{
+- long i;
+- bool found;
+- struct iommu_table_group_link *tgl;
+-
+- if (!tbl || !table_group)
+- return;
+-
+- /* Remove link to a group from table's list of attached groups */
+- found = false;
+- list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
+- if (tgl->table_group == table_group) {
+- list_del_rcu(&tgl->next);
+- call_rcu(&tgl->rcu, pnv_iommu_table_group_link_free);
+- found = true;
+- break;
+- }
+- }
+- if (WARN_ON(!found))
+- return;
+-
+- /* Clean a pointer to iommu_table in iommu_table_group::tables[] */
+- found = false;
+- for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+- if (table_group->tables[i] == tbl) {
+- table_group->tables[i] = NULL;
+- found = true;
+- break;
+- }
+- }
+- WARN_ON(!found);
+-}
+-
+-void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
+- void *tce_mem, u64 tce_size,
+- u64 dma_offset, unsigned page_shift)
+-{
+- tbl->it_blocksize = 16;
+- tbl->it_base = (unsigned long)tce_mem;
+- tbl->it_page_shift = page_shift;
+- tbl->it_offset = dma_offset >> tbl->it_page_shift;
+- tbl->it_index = 0;
+- tbl->it_size = tce_size >> 3;
+- tbl->it_busno = 0;
+- tbl->it_type = TCE_PCI;
+-}
+-
+ void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
+ {
+ struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
+index a95273c524f6..e1ab2a433d21 100644
+--- a/arch/powerpc/platforms/powernv/pci.h
++++ b/arch/powerpc/platforms/powernv/pci.h
+@@ -196,13 +196,6 @@ struct pnv_phb {
+ };
+
+ extern struct pci_ops pnv_pci_ops;
+-extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
+- unsigned long uaddr, enum dma_data_direction direction,
+- unsigned long attrs);
+-extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages);
+-extern int pnv_tce_xchg(struct iommu_table *tbl, long index,
+- unsigned long *hpa, enum dma_data_direction *direction);
+-extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index);
+
+ void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
+ unsigned char *log_buff);
+@@ -212,14 +205,6 @@ int pnv_pci_cfg_write(struct pci_dn *pdn,
+ int where, int size, u32 val);
+ extern struct iommu_table *pnv_pci_table_alloc(int nid);
+
+-extern long pnv_pci_link_table_and_group(int node, int num,
+- struct iommu_table *tbl,
+- struct iommu_table_group *table_group);
+-extern void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
+- struct iommu_table_group *table_group);
+-extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
+- void *tce_mem, u64 tce_size,
+- u64 dma_offset, unsigned page_shift);
+ extern void pnv_pci_init_ioda_hub(struct device_node *np);
+ extern void pnv_pci_init_ioda2_phb(struct device_node *np);
+ extern void pnv_pci_init_npu_phb(struct device_node *np);
+@@ -265,4 +250,30 @@ extern void pnv_cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev);
+ /* phb ops (cxl switches these when enabling the kernel api on the phb) */
+ extern const struct pci_controller_ops pnv_cxl_cx4_ioda_controller_ops;
+
++/* pci-ioda-tce.c */
++#define POWERNV_IOMMU_DEFAULT_LEVELS 1
++#define POWERNV_IOMMU_MAX_LEVELS 5
++
++extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
++ unsigned long uaddr, enum dma_data_direction direction,
++ unsigned long attrs);
++extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages);
++extern int pnv_tce_xchg(struct iommu_table *tbl, long index,
++ unsigned long *hpa, enum dma_data_direction *direction);
++extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index);
++
++extern long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
++ __u32 page_shift, __u64 window_size, __u32 levels,
++ struct iommu_table *tbl);
++extern void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl);
++
++extern long pnv_pci_link_table_and_group(int node, int num,
++ struct iommu_table *tbl,
++ struct iommu_table_group *table_group);
++extern void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
++ struct iommu_table_group *table_group);
++extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
++ void *tce_mem, u64 tce_size,
++ u64 dma_offset, unsigned int page_shift);
++
+ #endif /* __POWERNV_PCI_H */
+--
+2.13.7
+
diff --git a/patches.arch/powerpc-powernv-Rework-TCE-level-allocation.patch b/patches.arch/powerpc-powernv-Rework-TCE-level-allocation.patch
new file mode 100644
index 0000000000..c8496a9efa
--- /dev/null
+++ b/patches.arch/powerpc-powernv-Rework-TCE-level-allocation.patch
@@ -0,0 +1,78 @@
+From 9bc98c8a43c4900ee63b160f805c65051e35d917 Mon Sep 17 00:00:00 2001
+From: Alexey Kardashevskiy <aik@ozlabs.ru>
+Date: Wed, 4 Jul 2018 16:13:48 +1000
+Subject: [PATCH] powerpc/powernv: Rework TCE level allocation
+
+References: bsc#1061840
+Patch-mainline: v4.19-rc1
+Git-commit: 9bc98c8a43c4900ee63b160f805c65051e35d917
+
+This moves actual pages allocation to a separate function which is going
+to be reused later in on-demand TCE allocation.
+
+While we are at it, remove unnecessary level size round up as the caller
+does this already.
+
+Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
+Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/platforms/powernv/pci-ioda-tce.c | 30 +++++++++++++++++----------
+ 1 file changed, 19 insertions(+), 11 deletions(-)
+
+diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
+index 88cecc1815d9..123c49925b46 100644
+--- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c
++++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
+@@ -31,6 +31,23 @@ void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
+ tbl->it_type = TCE_PCI;
+ }
+
++static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift)
++{
++ struct page *tce_mem = NULL;
++ __be64 *addr;
++
++ tce_mem = alloc_pages_node(nid, GFP_KERNEL, shift - PAGE_SHIFT);
++ if (!tce_mem) {
++ pr_err("Failed to allocate a TCE memory, level shift=%d\n",
++ shift);
++ return NULL;
++ }
++ addr = page_address(tce_mem);
++ memset(addr, 0, 1UL << shift);
++
++ return addr;
++}
++
+ static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx)
+ {
+ __be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base;
+@@ -165,21 +182,12 @@ static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned int shift,
+ unsigned int levels, unsigned long limit,
+ unsigned long *current_offset, unsigned long *total_allocated)
+ {
+- struct page *tce_mem = NULL;
+ __be64 *addr, *tmp;
+- unsigned int order = max_t(unsigned int, shift, PAGE_SHIFT) -
+- PAGE_SHIFT;
+- unsigned long allocated = 1UL << (order + PAGE_SHIFT);
++ unsigned long allocated = 1UL << shift;
+ unsigned int entries = 1UL << (shift - 3);
+ long i;
+
+- tce_mem = alloc_pages_node(nid, GFP_KERNEL, order);
+- if (!tce_mem) {
+- pr_err("Failed to allocate a TCE memory, order=%d\n", order);
+- return NULL;
+- }
+- addr = page_address(tce_mem);
+- memset(addr, 0, allocated);
++ addr = pnv_alloc_tce_level(nid, shift);
+ *total_allocated += allocated;
+
+ --levels;
+--
+2.13.7
+
diff --git a/patches.arch/powerpc-powernv-ioda-Allocate-indirect-TCE-levels-on.patch b/patches.arch/powerpc-powernv-ioda-Allocate-indirect-TCE-levels-on.patch
new file mode 100644
index 0000000000..7c8596cdd0
--- /dev/null
+++ b/patches.arch/powerpc-powernv-ioda-Allocate-indirect-TCE-levels-on.patch
@@ -0,0 +1,364 @@
+From a68bd1267b7286b1687905651b404e765046de25 Mon Sep 17 00:00:00 2001
+From: Alexey Kardashevskiy <aik@ozlabs.ru>
+Date: Wed, 4 Jul 2018 16:13:49 +1000
+Subject: [PATCH] powerpc/powernv/ioda: Allocate indirect TCE levels on demand
+
+References: bsc#1061840
+Patch-mainline: v4.19-rc1
+Git-commit: a68bd1267b7286b1687905651b404e765046de25
+
+At the moment we allocate the entire TCE table, twice (hardware part and
+userspace translation cache). This normally works as we normally have
+contigous memory and the guest will map entire RAM for 64bit DMA.
+
+However if we have sparse RAM (one example is a memory device), then
+we will allocate TCEs which will never be used as the guest only maps
+actual memory for DMA. If it is a single level TCE table, there is nothing
+we can really do but if it a multilevel table, we can skip allocating
+TCEs we know we won't need.
+
+This adds ability to allocate only first level, saving memory.
+
+This changes iommu_table::free() to avoid allocating of an extra level;
+iommu_table::set() will do this when needed.
+
+This adds @alloc parameter to iommu_table::exchange() to tell the callback
+if it can allocate an extra level; the flag is set to "false" for
+the realmode KVM handlers of H_PUT_TCE hcalls and the callback returns
+H_TOO_HARD.
+
+This still requires the entire table to be counted in mm::locked_vm.
+
+To be conservative, this only does on-demand allocation when
+the usespace cache table is requested which is the case of VFIO.
+
+The example math for a system replicating a powernv setup with NVLink2
+in a guest:
+16GB RAM mapped at 0x0
+128GB GPU RAM window (16GB of actual RAM) mapped at 0x244000000000
+
+the table to cover that all with 64K pages takes:
+(((0x244000000000 + 0x2000000000) >> 16)*8)>>20 = 4556MB
+
+If we allocate only necessary TCE levels, we will only need:
+(((0x400000000 + 0x400000000) >> 16)*8)>>20 = 4MB (plus some for indirect
+levels).
+
+Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
+Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/include/asm/iommu.h | 7 ++-
+ arch/powerpc/kvm/book3s_64_vio_hv.c | 4 +-
+ arch/powerpc/platforms/powernv/pci-ioda-tce.c | 73 +++++++++++++++++++++------
+ arch/powerpc/platforms/powernv/pci-ioda.c | 8 +--
+ arch/powerpc/platforms/powernv/pci.h | 6 ++-
+ drivers/vfio/vfio_iommu_spapr_tce.c | 2 +-
+ 6 files changed, 73 insertions(+), 27 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
+index 4bdcf22509e6..daa3ee5d7ad2 100644
+--- a/arch/powerpc/include/asm/iommu.h
++++ b/arch/powerpc/include/asm/iommu.h
+@@ -70,7 +70,7 @@ struct iommu_table_ops {
+ unsigned long *hpa,
+ enum dma_data_direction *direction);
+
+- __be64 *(*useraddrptr)(struct iommu_table *tbl, long index);
++ __be64 *(*useraddrptr)(struct iommu_table *tbl, long index, bool alloc);
+ #endif
+ void (*clear)(struct iommu_table *tbl,
+ long index, long npages);
+@@ -122,10 +122,13 @@ struct iommu_table {
+ __be64 *it_userspace; /* userspace view of the table */
+ struct iommu_table_ops *it_ops;
+ struct kref it_kref;
++ int it_nid;
+ };
+
++#define IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry) \
++ ((tbl)->it_ops->useraddrptr((tbl), (entry), false))
+ #define IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry) \
+- ((tbl)->it_ops->useraddrptr((tbl), (entry)))
++ ((tbl)->it_ops->useraddrptr((tbl), (entry), true))
+
+ /* Pure 2^n version of get_order */
+ static inline __attribute_const__
+diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
+index ee98cf6180d7..d4bcd1b17b09 100644
+--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
++++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
+@@ -200,7 +200,7 @@ static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
+ {
+ struct mm_iommu_table_group_mem_t *mem = NULL;
+ const unsigned long pgsize = 1ULL << tbl->it_page_shift;
+- __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
++ __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry);
+
+ if (!pua)
+ /* it_userspace allocation might be delayed */
+@@ -264,7 +264,7 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
+ {
+ long ret;
+ unsigned long hpa = 0;
+- __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
++ __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry);
+ struct mm_iommu_table_group_mem_t *mem;
+
+ if (!pua)
+diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
+index 123c49925b46..6c5db1acbe8d 100644
+--- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c
++++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
+@@ -48,7 +48,7 @@ static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift)
+ return addr;
+ }
+
+-static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx)
++static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc)
+ {
+ __be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base;
+ int level = tbl->it_indirect_levels;
+@@ -57,7 +57,23 @@ static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx)
+
+ while (level) {
+ int n = (idx & mask) >> (level * shift);
+- unsigned long tce = be64_to_cpu(tmp[n]);
++ unsigned long tce;
++
++ if (tmp[n] == 0) {
++ __be64 *tmp2;
++
++ if (!alloc)
++ return NULL;
++
++ tmp2 = pnv_alloc_tce_level(tbl->it_nid,
++ ilog2(tbl->it_level_size) + 3);
++ if (!tmp2)
++ return NULL;
++
++ tmp[n] = cpu_to_be64(__pa(tmp2) |
++ TCE_PCI_READ | TCE_PCI_WRITE);
++ }
++ tce = be64_to_cpu(tmp[n]);
+
+ tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE));
+ idx &= ~mask;
+@@ -84,7 +100,7 @@ int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
+ ((rpn + i) << tbl->it_page_shift);
+ unsigned long idx = index - tbl->it_offset + i;
+
+- *(pnv_tce(tbl, false, idx)) = cpu_to_be64(newtce);
++ *(pnv_tce(tbl, false, idx, true)) = cpu_to_be64(newtce);
+ }
+
+ return 0;
+@@ -92,31 +108,46 @@ int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
+
+ #ifdef CONFIG_IOMMU_API
+ int pnv_tce_xchg(struct iommu_table *tbl, long index,
+- unsigned long *hpa, enum dma_data_direction *direction)
++ unsigned long *hpa, enum dma_data_direction *direction,
++ bool alloc)
+ {
+ u64 proto_tce = iommu_direction_to_tce_perm(*direction);
+ unsigned long newtce = *hpa | proto_tce, oldtce;
+ unsigned long idx = index - tbl->it_offset;
++ __be64 *ptce = NULL;
+
+ BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl));
+
++ if (*direction == DMA_NONE) {
++ ptce = pnv_tce(tbl, false, idx, false);
++ if (!ptce) {
++ *hpa = 0;
++ return 0;
++ }
++ }
++
++ if (!ptce) {
++ ptce = pnv_tce(tbl, false, idx, alloc);
++ if (!ptce)
++ return alloc ? H_HARDWARE : H_TOO_HARD;
++ }
++
+ if (newtce & TCE_PCI_WRITE)
+ newtce |= TCE_PCI_READ;
+
+- oldtce = be64_to_cpu(xchg(pnv_tce(tbl, false, idx),
+- cpu_to_be64(newtce)));
++ oldtce = be64_to_cpu(xchg(ptce, cpu_to_be64(newtce)));
+ *hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
+ *direction = iommu_tce_direction(oldtce);
+
+ return 0;
+ }
+
+-__be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index)
++__be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index, bool alloc)
+ {
+ if (WARN_ON_ONCE(!tbl->it_userspace))
+ return NULL;
+
+- return pnv_tce(tbl, true, index - tbl->it_offset);
++ return pnv_tce(tbl, true, index - tbl->it_offset, alloc);
+ }
+ #endif
+
+@@ -126,14 +157,19 @@ void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
+
+ for (i = 0; i < npages; i++) {
+ unsigned long idx = index - tbl->it_offset + i;
++ __be64 *ptce = pnv_tce(tbl, false, idx, false);
+
+- *(pnv_tce(tbl, false, idx)) = cpu_to_be64(0);
++ if (ptce)
++ *ptce = cpu_to_be64(0);
+ }
+ }
+
+ unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
+ {
+- __be64 *ptce = pnv_tce(tbl, false, index - tbl->it_offset);
++ __be64 *ptce = pnv_tce(tbl, false, index - tbl->it_offset, false);
++
++ if (!ptce)
++ return 0;
+
+ return be64_to_cpu(*ptce);
+ }
+@@ -224,6 +260,7 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
+ unsigned int table_shift = max_t(unsigned int, entries_shift + 3,
+ PAGE_SHIFT);
+ const unsigned long tce_table_size = 1UL << table_shift;
++ unsigned int tmplevels = levels;
+
+ if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS))
+ return -EINVAL;
+@@ -231,6 +268,9 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
+ if (!is_power_of_2(window_size))
+ return -EINVAL;
+
++ if (alloc_userspace_copy && (window_size > (1ULL << 32)))
++ tmplevels = 1;
++
+ /* Adjust direct table size from window_size and levels */
+ entries_shift = (entries_shift + levels - 1) / levels;
+ level_shift = entries_shift + 3;
+@@ -241,7 +281,7 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
+
+ /* Allocate TCE table */
+ addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
+- levels, tce_table_size, &offset, &total_allocated);
++ tmplevels, tce_table_size, &offset, &total_allocated);
+
+ /* addr==NULL means that the first level allocation failed */
+ if (!addr)
+@@ -252,7 +292,7 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
+ * we did not allocate as much as we wanted,
+ * release partially allocated table.
+ */
+- if (offset < tce_table_size)
++ if (tmplevels == levels && offset < tce_table_size)
+ goto free_tces_exit;
+
+ /* Allocate userspace view of the TCE table */
+@@ -263,8 +303,8 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
+ &total_allocated_uas);
+ if (!uas)
+ goto free_tces_exit;
+- if (offset < tce_table_size ||
+- total_allocated_uas != total_allocated)
++ if (tmplevels == levels && (offset < tce_table_size ||
++ total_allocated_uas != total_allocated))
+ goto free_uas_exit;
+ }
+
+@@ -275,10 +315,11 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
+ tbl->it_indirect_levels = levels - 1;
+ tbl->it_allocated_size = total_allocated;
+ tbl->it_userspace = uas;
++ tbl->it_nid = nid;
+
+- pr_debug("Created TCE table: ws=%08llx ts=%lx @%08llx base=%lx uas=%p levels=%d\n",
++ pr_debug("Created TCE table: ws=%08llx ts=%lx @%08llx base=%lx uas=%p levels=%d/%d\n",
+ window_size, tce_table_size, bus_offset, tbl->it_base,
+- tbl->it_userspace, levels);
++ tbl->it_userspace, tmplevels, levels);
+
+ return 0;
+
+diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
+index fc38f06ee41d..b4475f71a0b4 100644
+--- a/arch/powerpc/platforms/powernv/pci-ioda.c
++++ b/arch/powerpc/platforms/powernv/pci-ioda.c
+@@ -2003,7 +2003,7 @@ static int pnv_ioda1_tce_build(struct iommu_table *tbl, long index,
+ static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, long index,
+ unsigned long *hpa, enum dma_data_direction *direction)
+ {
+- long ret = pnv_tce_xchg(tbl, index, hpa, direction);
++ long ret = pnv_tce_xchg(tbl, index, hpa, direction, true);
+
+ if (!ret)
+ pnv_pci_p7ioc_tce_invalidate(tbl, index, 1, false);
+@@ -2014,7 +2014,7 @@ static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, long index,
+ static int pnv_ioda1_tce_xchg_rm(struct iommu_table *tbl, long index,
+ unsigned long *hpa, enum dma_data_direction *direction)
+ {
+- long ret = pnv_tce_xchg(tbl, index, hpa, direction);
++ long ret = pnv_tce_xchg(tbl, index, hpa, direction, false);
+
+ if (!ret)
+ pnv_pci_p7ioc_tce_invalidate(tbl, index, 1, true);
+@@ -2168,7 +2168,7 @@ static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index,
+ static int pnv_ioda2_tce_xchg(struct iommu_table *tbl, long index,
+ unsigned long *hpa, enum dma_data_direction *direction)
+ {
+- long ret = pnv_tce_xchg(tbl, index, hpa, direction);
++ long ret = pnv_tce_xchg(tbl, index, hpa, direction, true);
+
+ if (!ret)
+ pnv_pci_ioda2_tce_invalidate(tbl, index, 1, false);
+@@ -2179,7 +2179,7 @@ static int pnv_ioda2_tce_xchg(struct iommu_table *tbl, long index,
+ static int pnv_ioda2_tce_xchg_rm(struct iommu_table *tbl, long index,
+ unsigned long *hpa, enum dma_data_direction *direction)
+ {
+- long ret = pnv_tce_xchg(tbl, index, hpa, direction);
++ long ret = pnv_tce_xchg(tbl, index, hpa, direction, false);
+
+ if (!ret)
+ pnv_pci_ioda2_tce_invalidate(tbl, index, 1, true);
+diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
+index 2962f6ddb2a8..0020937fc694 100644
+--- a/arch/powerpc/platforms/powernv/pci.h
++++ b/arch/powerpc/platforms/powernv/pci.h
+@@ -266,8 +266,10 @@ extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
+ unsigned long attrs);
+ extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages);
+ extern int pnv_tce_xchg(struct iommu_table *tbl, long index,
+- unsigned long *hpa, enum dma_data_direction *direction);
+-extern __be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index);
++ unsigned long *hpa, enum dma_data_direction *direction,
++ bool alloc);
++extern __be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index,
++ bool alloc);
+ extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index);
+
+ extern long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
+diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
+index 54ae6c2be1b7..11a4c194d6e3 100644
+--- a/drivers/vfio/vfio_iommu_spapr_tce.c
++++ b/drivers/vfio/vfio_iommu_spapr_tce.c
+@@ -631,7 +631,7 @@ static long tce_iommu_create_table(struct tce_container *container,
+ page_shift, window_size, levels, ptbl);
+
+ WARN_ON(!ret && !(*ptbl)->it_ops->free);
+- WARN_ON(!ret && ((*ptbl)->it_allocated_size != table_size));
++ WARN_ON(!ret && ((*ptbl)->it_allocated_size > table_size));
+
+ return ret;
+ }
+--
+2.13.7
+
diff --git a/patches.arch/powerpc-powernv-ioda-Finish-removing-explicit-max-wi.patch b/patches.arch/powerpc-powernv-ioda-Finish-removing-explicit-max-wi.patch
new file mode 100644
index 0000000000..39b9c74ea4
--- /dev/null
+++ b/patches.arch/powerpc-powernv-ioda-Finish-removing-explicit-max-wi.patch
@@ -0,0 +1,37 @@
+From ae677ff02f2ddb0980953efd4afed1c90a56c88f Mon Sep 17 00:00:00 2001
+From: Alexey Kardashevskiy <aik@ozlabs.ru>
+Date: Thu, 18 Jan 2018 13:51:03 +1100
+Subject: [PATCH] powerpc/powernv/ioda: Finish removing explicit max window
+ size check
+
+References: bsc#1061840
+Patch-mainline: v4.16-rc1
+Git-commit: ae677ff02f2ddb0980953efd4afed1c90a56c88f
+
+9003a2498 removed checn from the DMA window pages allocator, however
+the VFIO driver tests limits before doing so by calling
+the get_table_size hook which was left behind; this fixes it.
+
+Fixes: 9003a2498 "powerpc/powernv/ioda: Remove explicit max window size check"
+Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/platforms/powernv/pci-ioda.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
+index b9146d5aef81..1069f9cb273a 100644
+--- a/arch/powerpc/platforms/powernv/pci-ioda.c
++++ b/arch/powerpc/platforms/powernv/pci-ioda.c
+@@ -2572,7 +2572,6 @@ static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
+ unsigned long direct_table_size;
+
+ if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS) ||
+- (window_size > memory_hotplug_max()) ||
+ !is_power_of_2(window_size))
+ return 0;
+
+--
+2.13.7
+
diff --git a/patches.arch/powerpc-powernv-ioda-Remove-explicit-max-window-size.patch b/patches.arch/powerpc-powernv-ioda-Remove-explicit-max-window-size.patch
new file mode 100644
index 0000000000..0b7b438cea
--- /dev/null
+++ b/patches.arch/powerpc-powernv-ioda-Remove-explicit-max-window-size.patch
@@ -0,0 +1,45 @@
+From 9003a249815a15704f415954039d1c7ea27da9ad Mon Sep 17 00:00:00 2001
+From: Alexey Kardashevskiy <aik@ozlabs.ru>
+Date: Tue, 7 Nov 2017 14:43:01 +1100
+Subject: [PATCH] powerpc/powernv/ioda: Remove explicit max window size check
+
+References: bsc#1061840
+Patch-mainline: v4.15-rc1
+Git-commit: 9003a249815a15704f415954039d1c7ea27da9ad
+
+DMA windows can only have a size of power of two on IODA2 hardware and
+using memory_hotplug_max() to determine the upper limit won't work
+correcly if it returns not power of two value.
+
+This removes the check as the platform code does this check in
+pnv_pci_ioda2_setup_default_config() anyway; the other client is VFIO
+and that thing checks against locked_vm limit which prevents the userspace
+from locking too much memory.
+
+It is expected to impact DPDK on machines with non-power-of-two RAM size,
+mostly. KVM guests are less likely to be affected as usually guests get
+less than half of hosts RAM.
+
+Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/platforms/powernv/pci-ioda.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
+index 7e87867984e7..749055553064 100644
+--- a/arch/powerpc/platforms/powernv/pci-ioda.c
++++ b/arch/powerpc/platforms/powernv/pci-ioda.c
+@@ -2797,7 +2797,7 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
+ if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS))
+ return -EINVAL;
+
+- if ((window_size > memory_hotplug_max()) || !is_power_of_2(window_size))
++ if (!is_power_of_2(window_size))
+ return -EINVAL;
+
+ /* Adjust direct table size from window_size and levels */
+--
+2.13.7
+
diff --git a/patches.arch/powerpc-powernv-ioda2-Reduce-upper-limit-for-DMA-win.patch b/patches.arch/powerpc-powernv-ioda2-Reduce-upper-limit-for-DMA-win.patch
index 94d2f3d057..4b20fb8998 100644
--- a/patches.arch/powerpc-powernv-ioda2-Reduce-upper-limit-for-DMA-win.patch
+++ b/patches.arch/powerpc-powernv-ioda2-Reduce-upper-limit-for-DMA-win.patch
@@ -1,38 +1,48 @@
-From d3d4ffaae439981e1e441ebb125aa3588627c5d8 Mon Sep 17 00:00:00 2001
+From 7233b8cab39014620ac9534da11f0f3e506d8fd8 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
-Date: Fri, 1 Jun 2018 18:06:16 +1000
+Date: Tue, 11 Sep 2018 15:38:05 +1000
Subject: [PATCH] powerpc/powernv/ioda2: Reduce upper limit for DMA window size
-References: bsc#1055120
-Patch-mainline: v4.19-rc1
-Git-commit: d3d4ffaae439981e1e441ebb125aa3588627c5d8
+References: bsc#1061840, bsc#1055120
+Patch-mainline: v4.19-rc6
+Git-commit: 7233b8cab39014620ac9534da11f0f3e506d8fd8
+
+mpe: This was fixed originally in commit d3d4ffaae439
+("powerpc/powernv/ioda2: Reduce upper limit for DMA window size"), but
+contrary to what the merge commit says was inadvertently lost by me in
+commit ce57c6610cc2 ("Merge branch 'topic/ppc-kvm' into next") which
+brought in changes that moved the code to a new file. So reapply it to
+the new file.
+
+Original commit message follows:
We use PHB in mode1 which uses bit 59 to select a correct DMA window.
However there is mode2 which uses bits 59:55 and allows up to 32 DMA
windows per a PE.
Even though documentation does not clearly specify that, it seems that
-the actual hardware does not support bits 59:55 even in mode1, in other
-words we can create a window as big as 1<<58 but DMA simply won't work.
+the actual hardware does not support bits 59:55 even in mode1, in
+other words we can create a window as big as 1<<58 but DMA simply
+won't work.
-This reduces the upper limit from 59 to 55 bits to let the userspace know
-about the hardware limits.
+This reduces the upper limit from 59 to 55 bits to let the userspace
+know about the hardware limits.
-Fixes: 7aafac11e3 "powerpc/powernv/ioda2: Gracefully fail if too many TCE levels requested"
+Fixes: ce57c6610cc2 ("Merge branch 'topic/ppc-kvm' into next")
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Acked-by: Michal Suchanek <msuchanek@suse.de>
---
- arch/powerpc/platforms/powernv/pci-ioda.c | 2 +-
+ arch/powerpc/platforms/powernv/pci-ioda-tce.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
-diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
-index 5bd0eb6681bc..ab678177d36e 100644
---- a/arch/powerpc/platforms/powernv/pci-ioda.c
-+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
-@@ -2840,7 +2840,7 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
+diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
+index 6c5db1acbe8d..fe9691040f54 100644
+--- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c
++++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
+@@ -276,7 +276,7 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
level_shift = entries_shift + 3;
- level_shift = max_t(unsigned, level_shift, PAGE_SHIFT);
+ level_shift = max_t(unsigned int, level_shift, PAGE_SHIFT);
- if ((level_shift - 3) * levels + page_shift >= 60)
+ if ((level_shift - 3) * levels + page_shift >= 55)
diff --git a/patches.arch/powerpc-xive-Move-definition-of-ESB-bits.patch b/patches.arch/powerpc-xive-Move-definition-of-ESB-bits.patch
new file mode 100644
index 0000000000..0937acd51c
--- /dev/null
+++ b/patches.arch/powerpc-xive-Move-definition-of-ESB-bits.patch
@@ -0,0 +1,115 @@
+From 12c1f339cd49119e39063ae67f02d936f988c079 Mon Sep 17 00:00:00 2001
+From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Date: Fri, 12 Jan 2018 13:39:27 +1100
+Subject: [PATCH] powerpc/xive: Move definition of ESB bits
+
+References: bsc#1061840
+Patch-mainline: v4.16-rc1
+Git-commit: 12c1f339cd49119e39063ae67f02d936f988c079
+
+From xive.h to xive-regs.h since it's a HW register definition
+and it can be used from assembly
+
+Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/include/asm/xive-regs.h | 35 +++++++++++++++++++++++++++++++++++
+ arch/powerpc/include/asm/xive.h | 35 -----------------------------------
+ 2 files changed, 35 insertions(+), 35 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/xive-regs.h b/arch/powerpc/include/asm/xive-regs.h
+index 1d3f2be5ae39..fa4288822b68 100644
+--- a/arch/powerpc/include/asm/xive-regs.h
++++ b/arch/powerpc/include/asm/xive-regs.h
+@@ -10,6 +10,41 @@
+ #define _ASM_POWERPC_XIVE_REGS_H
+
+ /*
++ * "magic" Event State Buffer (ESB) MMIO offsets.
++ *
++ * Each interrupt source has a 2-bit state machine called ESB
++ * which can be controlled by MMIO. It's made of 2 bits, P and
++ * Q. P indicates that an interrupt is pending (has been sent
++ * to a queue and is waiting for an EOI). Q indicates that the
++ * interrupt has been triggered while pending.
++ *
++ * This acts as a coalescing mechanism in order to guarantee
++ * that a given interrupt only occurs at most once in a queue.
++ *
++ * When doing an EOI, the Q bit will indicate if the interrupt
++ * needs to be re-triggered.
++ *
++ * The following offsets into the ESB MMIO allow to read or
++ * manipulate the PQ bits. They must be used with an 8-bytes
++ * load instruction. They all return the previous state of the
++ * interrupt (atomically).
++ *
++ * Additionally, some ESB pages support doing an EOI via a
++ * store at 0 and some ESBs support doing a trigger via a
++ * separate trigger page.
++ */
++#define XIVE_ESB_STORE_EOI 0x400 /* Store */
++#define XIVE_ESB_LOAD_EOI 0x000 /* Load */
++#define XIVE_ESB_GET 0x800 /* Load */
++#define XIVE_ESB_SET_PQ_00 0xc00 /* Load */
++#define XIVE_ESB_SET_PQ_01 0xd00 /* Load */
++#define XIVE_ESB_SET_PQ_10 0xe00 /* Load */
++#define XIVE_ESB_SET_PQ_11 0xf00 /* Load */
++
++#define XIVE_ESB_VAL_P 0x2
++#define XIVE_ESB_VAL_Q 0x1
++
++/*
+ * Thread Management (aka "TM") registers
+ */
+
+diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
+index 371fbebf1ec9..0e77005cf021 100644
+--- a/arch/powerpc/include/asm/xive.h
++++ b/arch/powerpc/include/asm/xive.h
+@@ -72,41 +72,6 @@ struct xive_q {
+ atomic_t pending_count;
+ };
+
+-/*
+- * "magic" Event State Buffer (ESB) MMIO offsets.
+- *
+- * Each interrupt source has a 2-bit state machine called ESB
+- * which can be controlled by MMIO. It's made of 2 bits, P and
+- * Q. P indicates that an interrupt is pending (has been sent
+- * to a queue and is waiting for an EOI). Q indicates that the
+- * interrupt has been triggered while pending.
+- *
+- * This acts as a coalescing mechanism in order to guarantee
+- * that a given interrupt only occurs at most once in a queue.
+- *
+- * When doing an EOI, the Q bit will indicate if the interrupt
+- * needs to be re-triggered.
+- *
+- * The following offsets into the ESB MMIO allow to read or
+- * manipulate the PQ bits. They must be used with an 8-bytes
+- * load instruction. They all return the previous state of the
+- * interrupt (atomically).
+- *
+- * Additionally, some ESB pages support doing an EOI via a
+- * store at 0 and some ESBs support doing a trigger via a
+- * separate trigger page.
+- */
+-#define XIVE_ESB_STORE_EOI 0x400 /* Store */
+-#define XIVE_ESB_LOAD_EOI 0x000 /* Load */
+-#define XIVE_ESB_GET 0x800 /* Load */
+-#define XIVE_ESB_SET_PQ_00 0xc00 /* Load */
+-#define XIVE_ESB_SET_PQ_01 0xd00 /* Load */
+-#define XIVE_ESB_SET_PQ_10 0xe00 /* Load */
+-#define XIVE_ESB_SET_PQ_11 0xf00 /* Load */
+-
+-#define XIVE_ESB_VAL_P 0x2
+-#define XIVE_ESB_VAL_Q 0x1
+-
+ /* Global enable flags for the XIVE support */
+ extern bool __xive_enabled;
+
+--
+2.13.7
+
diff --git a/patches.arch/powerpc-xmon-Add-ISA-v3.0-SPRs-to-SPR-dump.patch b/patches.arch/powerpc-xmon-Add-ISA-v3.0-SPRs-to-SPR-dump.patch
new file mode 100644
index 0000000000..ff983405d8
--- /dev/null
+++ b/patches.arch/powerpc-xmon-Add-ISA-v3.0-SPRs-to-SPR-dump.patch
@@ -0,0 +1,80 @@
+From d1e1b351f50f9e5941f436f6c63949731979e00c Mon Sep 17 00:00:00 2001
+From: Balbir Singh <bsingharora@gmail.com>
+Date: Wed, 30 Aug 2017 21:45:09 +1000
+Subject: [PATCH] powerpc/xmon: Add ISA v3.0 SPRs to SPR dump
+
+References: bsc#1061840
+Patch-mainline: v4.14-rc1
+Git-commit: d1e1b351f50f9e5941f436f6c63949731979e00c
+
+Add support for printing the PIDR/TIDR for ISA 300 and PSSCR and PTCR
+in ISA 3.0 hypervisor mode.
+
+SPRN_PSSCR_PR is the privileged mode access and is used when we are
+not in hypervisor mode.
+
+Signed-off-by: Balbir Singh <bsingharora@gmail.com>
+[mpe: Split out of larger patch]
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/include/asm/reg.h | 1 +
+ arch/powerpc/xmon/xmon.c | 23 +++++++++++++++++++++++
+ 2 files changed, 24 insertions(+)
+
+diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
+index c36823d64ec9..2c4366ada976 100644
+--- a/arch/powerpc/include/asm/reg.h
++++ b/arch/powerpc/include/asm/reg.h
+@@ -356,6 +356,7 @@
+ #define SPRN_PMSR 0x355 /* Power Management Status Reg */
+ #define SPRN_PMMAR 0x356 /* Power Management Memory Activity Register */
+ #define SPRN_PSSCR 0x357 /* Processor Stop Status and Control Register (ISA 3.0) */
++#define SPRN_PSSCR_PR 0x337 /* PSSCR ISA 3.0, privileged mode access */
+ #define SPRN_PMCR 0x374 /* Power Management Control Register */
+
+ /* HFSCR and FSCR bit numbers are the same */
+diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
+index 9d2c5e0ef305..33351c6704b1 100644
+--- a/arch/powerpc/xmon/xmon.c
++++ b/arch/powerpc/xmon/xmon.c
+@@ -1807,6 +1807,28 @@ static void dump_207_sprs(void)
+ #endif
+ }
+
++static void dump_300_sprs(void)
++{
++#ifdef CONFIG_PPC64
++ bool hv = mfmsr() & MSR_HV;
++
++ if (!cpu_has_feature(CPU_FTR_ARCH_300))
++ return;
++
++ printf("pidr = %.16lx tidr = %.16lx\n",
++ mfspr(SPRN_PID), mfspr(SPRN_TIDR));
++ printf("asdr = %.16lx psscr = %.16lx\n",
++ mfspr(SPRN_ASDR), hv ? mfspr(SPRN_PSSCR)
++ : mfspr(SPRN_PSSCR_PR));
++
++ if (!hv)
++ return;
++
++ printf("ptcr = %.16lx\n",
++ mfspr(SPRN_PTCR));
++#endif
++}
++
+ static void dump_one_spr(int spr, bool show_unimplemented)
+ {
+ unsigned long val;
+@@ -1860,6 +1882,7 @@ static void super_regs(void)
+
+ dump_206_sprs();
+ dump_207_sprs();
++ dump_300_sprs();
+
+ return;
+ }
+--
+2.13.7
+
diff --git a/patches.arch/x86-kexec-correct-kexec_backup_src_end-off-by-one-error.patch b/patches.arch/x86-kexec-correct-kexec_backup_src_end-off-by-one-error.patch
new file mode 100644
index 0000000000..52d767bdff
--- /dev/null
+++ b/patches.arch/x86-kexec-correct-kexec_backup_src_end-off-by-one-error.patch
@@ -0,0 +1,62 @@
+From: Bjorn Helgaas <bhelgaas@google.com>
+Date: Thu, 27 Sep 2018 09:21:55 -0500
+Subject: x86/kexec: Correct KEXEC_BACKUP_SRC_END off-by-one error
+Git-commit: 51fbf14f2528a8c6401290e37f1c893a2412f1d3
+Patch-mainline: v4.20-rc1
+References: bsc#1114279
+
+The only use of KEXEC_BACKUP_SRC_END is as an argument to
+walk_system_ram_res():
+
+ int crash_load_segments(struct kimage *image)
+ {
+ ...
+ walk_system_ram_res(KEXEC_BACKUP_SRC_START, KEXEC_BACKUP_SRC_END,
+ image, determine_backup_region);
+
+walk_system_ram_res() expects "start, end" arguments that are inclusive,
+i.e., the range to be walked includes both the start and end addresses.
+
+KEXEC_BACKUP_SRC_END was previously defined as (640 * 1024UL), which is the
+first address *past* the desired 0-640KB range.
+
+Define KEXEC_BACKUP_SRC_END as (640 * 1024UL - 1) so the KEXEC_BACKUP_SRC
+region is [0-0x9ffff], not [0-0xa0000].
+
+Fixes: dd5f726076cc ("kexec: support for kexec on panic using new system call")
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+CC: "H. Peter Anvin" <hpa@zytor.com>
+CC: Andrew Morton <akpm@linux-foundation.org>
+CC: Brijesh Singh <brijesh.singh@amd.com>
+CC: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+CC: Ingo Molnar <mingo@redhat.com>
+CC: Lianbo Jiang <lijiang@redhat.com>
+CC: Takashi Iwai <tiwai@suse.de>
+CC: Thomas Gleixner <tglx@linutronix.de>
+CC: Tom Lendacky <thomas.lendacky@amd.com>
+CC: Vivek Goyal <vgoyal@redhat.com>
+CC: baiyaowei@cmss.chinamobile.com
+CC: bhe@redhat.com
+CC: dan.j.williams@intel.com
+CC: dyoung@redhat.com
+CC: kexec@lists.infradead.org
+Link: http://lkml.kernel.org/r/153805811578.1157.6948388946904655969.stgit@bhelgaas-glaptop.roam.corp.google.com
+---
+ arch/x86/include/asm/kexec.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
+index f327236f0fa7..5125fca472bb 100644
+--- a/arch/x86/include/asm/kexec.h
++++ b/arch/x86/include/asm/kexec.h
+@@ -67,7 +67,7 @@ struct kimage;
+
+ /* Memory to backup during crash kdump */
+ #define KEXEC_BACKUP_SRC_START (0UL)
+-#define KEXEC_BACKUP_SRC_END (640 * 1024UL) /* 640K */
++#define KEXEC_BACKUP_SRC_END (640 * 1024UL - 1) /* 640K */
+
+ /*
+ * CPU does not save ss and sp on stack if execution is already
+
diff --git a/patches.drivers/edac-thunderx-fix-memory-leak-in-thunderx_l2c_threaded_isr.patch b/patches.drivers/edac-thunderx-fix-memory-leak-in-thunderx_l2c_threaded_isr.patch
new file mode 100644
index 0000000000..8d07e3208f
--- /dev/null
+++ b/patches.drivers/edac-thunderx-fix-memory-leak-in-thunderx_l2c_threaded_isr.patch
@@ -0,0 +1,47 @@
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Sat, 13 Oct 2018 13:28:43 +0300
+Subject: EDAC, thunderx: Fix memory leak in thunderx_l2c_threaded_isr()
+Git-commit: d8c27ba86a2fd806d3957e5a9b30e66dfca2a61d
+Patch-mainline: v4.20-rc1
+References: bsc#1114279
+
+Fix memory leak in L2c threaded interrupt handler.
+
+ [ bp: Rewrite commit message. ]
+
+Fixes: 41003396f932 ("EDAC, thunderx: Add Cavium ThunderX EDAC driver")
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+CC: David Daney <david.daney@cavium.com>
+CC: Jan Glauber <jglauber@cavium.com>
+CC: Mauro Carvalho Chehab <mchehab@kernel.org>
+CC: Sergey Temerkhanov <s.temerkhanov@gmail.com>
+CC: linux-edac <linux-edac@vger.kernel.org>
+Link: http://lkml.kernel.org/r/20181013102843.GG16086@mwanda
+---
+ drivers/edac/thunderx_edac.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/edac/thunderx_edac.c b/drivers/edac/thunderx_edac.c
+index c009d94f40c5..34be60fe6892 100644
+--- a/drivers/edac/thunderx_edac.c
++++ b/drivers/edac/thunderx_edac.c
+@@ -1884,7 +1884,7 @@ static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id)
+ default:
+ dev_err(&l2c->pdev->dev, "Unsupported device: %04x\n",
+ l2c->pdev->device);
+- return IRQ_NONE;
++ goto err_free;
+ }
+
+ while (CIRC_CNT(l2c->ring_head, l2c->ring_tail,
+@@ -1906,7 +1906,7 @@ static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id)
+ l2c->ring_tail++;
+ }
+
+- return IRQ_HANDLED;
++ ret = IRQ_HANDLED;
+
+ err_free:
+ kfree(other);
+
diff --git a/patches.fixes/resource-include-resource-end-in-walk_-interfaces.patch b/patches.fixes/resource-include-resource-end-in-walk_-interfaces.patch
new file mode 100644
index 0000000000..540e6de955
--- /dev/null
+++ b/patches.fixes/resource-include-resource-end-in-walk_-interfaces.patch
@@ -0,0 +1,79 @@
+From: Bjorn Helgaas <bhelgaas@google.com>
+Date: Thu, 27 Sep 2018 09:22:02 -0500
+Subject: resource: Include resource end in walk_*() interfaces
+Git-commit: a98959fdbda1849a01b2150bb635ed559ec06700
+Patch-mainline: v4.20-rc1
+References: bsc#1114279
+
+find_next_iomem_res() finds an iomem resource that covers part of a range
+described by "start, end". All callers expect that range to be inclusive,
+i.e., both start and end are included, but find_next_iomem_res() doesn't
+handle the end address correctly.
+
+If it finds an iomem resource that contains exactly the end address, it
+skips it, e.g., if "start, end" is [0x0-0x10000] and there happens to be an
+iomem resource [mem 0x10000-0x10000] (the single byte at 0x10000), we skip
+it:
+
+ find_next_iomem_res(...)
+ {
+ start = 0x0;
+ end = 0x10000;
+ for (p = next_resource(...)) {
+ # p->start = 0x10000;
+ # p->end = 0x10000;
+ # we *should* return this resource, but this condition is false:
+ if ((p->end >= start) && (p->start < end))
+ break;
+
+Adjust find_next_iomem_res() so it allows a resource that includes the
+single byte at the end of the range. This is a corner case that we
+probably don't see in practice.
+
+Fixes: 58c1b5b07907 ("[PATCH] memory hotadd fixes: find_next_system_ram catch range fix")
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+CC: Andrew Morton <akpm@linux-foundation.org>
+CC: Brijesh Singh <brijesh.singh@amd.com>
+CC: Dan Williams <dan.j.williams@intel.com>
+CC: H. Peter Anvin <hpa@zytor.com>
+CC: Lianbo Jiang <lijiang@redhat.com>
+CC: Takashi Iwai <tiwai@suse.de>
+CC: Thomas Gleixner <tglx@linutronix.de>
+CC: Tom Lendacky <thomas.lendacky@amd.com>
+CC: Vivek Goyal <vgoyal@redhat.com>
+CC: Yaowei Bai <baiyaowei@cmss.chinamobile.com>
+CC: bhe@redhat.com
+CC: dan.j.williams@intel.com
+CC: dyoung@redhat.com
+CC: kexec@lists.infradead.org
+CC: mingo@redhat.com
+CC: x86-ml <x86@kernel.org>
+Link: http://lkml.kernel.org/r/153805812254.1157.16736368485811773752.stgit@bhelgaas-glaptop.roam.corp.google.com
+---
+ kernel/resource.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/resource.c b/kernel/resource.c
+index 30e1bc68503b..155ec873ea4d 100644
+--- a/kernel/resource.c
++++ b/kernel/resource.c
+@@ -319,7 +319,7 @@ int release_resource(struct resource *old)
+ EXPORT_SYMBOL(release_resource);
+
+ /*
+- * Finds the lowest iomem resource existing within [res->start.res->end).
++ * Finds the lowest iomem resource existing within [res->start..res->end].
+ * The caller must specify res->start, res->end, res->flags, and optionally
+ * desc. If found, returns 0, res is overwritten, if not found, returns -1.
+ * This function walks the whole tree and not just first level children until
+@@ -352,7 +352,7 @@ static int find_next_iomem_res(struct resource *res, unsigned long desc,
+ p = NULL;
+ break;
+ }
+- if ((p->end >= start) && (p->start < end))
++ if ((p->end >= start) && (p->start <= end))
+ break;
+ }
+
+
diff --git a/patches.kabi/KABI-hide-new-member-in-struct-iommu_table-from-genk.patch b/patches.kabi/KABI-hide-new-member-in-struct-iommu_table-from-genk.patch
new file mode 100644
index 0000000000..f9be4a9554
--- /dev/null
+++ b/patches.kabi/KABI-hide-new-member-in-struct-iommu_table-from-genk.patch
@@ -0,0 +1,123 @@
+From 11e13750b26517cee98c734463af5fc1dbcca9fa Mon Sep 17 00:00:00 2001
+From: Michal Suchanek <msuchanek@suse.de>
+Date: Tue, 30 Oct 2018 23:24:32 +0100
+Subject: [PATCH] KABI: hide new member in struct iommu_table from genksyms.
+
+References: bsc#1061840
+Patch-mainline: no, kabi
+
+The it_userspace table should not be used outside KVM. If anyone abuses
+the pointer for anything else they can continue to do so but they will
+not get access to the KVM-internal data there. A separate pointer is
+added for the new sparse table format.
+
+Signed-off-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/include/asm/iommu.h | 16 +++++++++++-----
+ arch/powerpc/platforms/powernv/pci-ioda.c | 11 +++++++++--
+ 2 files changed, 20 insertions(+), 7 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
+index 1317e3bcabe8..e1609a12f148 100644
+--- a/arch/powerpc/include/asm/iommu.h
++++ b/arch/powerpc/include/asm/iommu.h
+@@ -69,8 +69,6 @@ struct iommu_table_ops {
+ long index,
+ unsigned long *hpa,
+ enum dma_data_direction *direction);
+-
+- __be64 *(*useraddrptr)(struct iommu_table *tbl, long index, bool alloc);
+ #endif
+ void (*clear)(struct iommu_table *tbl,
+ long index, long npages);
+@@ -79,6 +77,9 @@ struct iommu_table_ops {
+ void (*flush)(struct iommu_table *tbl);
+ void (*free)(struct iommu_table *tbl);
+ };
++struct iommu_table_ops_2 {
++ __be64 *(*useraddrptr)(struct iommu_table *tbl, long index, bool alloc);
++};
+
+ /* These are used by VIO */
+ extern struct iommu_table_ops iommu_table_lpar_multi_ops;
+@@ -119,16 +120,21 @@ struct iommu_table {
+ unsigned long *it_map; /* A simple allocation bitmap for now */
+ unsigned long it_page_shift;/* table iommu page size */
+ struct list_head it_group_list;/* List of iommu_table_group_link */
+- __be64 *it_userspace; /* userspace view of the table */
++ unsigned long *it_userspace; /* userspace view of the table */
+ struct iommu_table_ops *it_ops;
+ struct kref it_kref;
++#ifndef __GENKSYMS__
++#define it_userspace it_userspace_sparse
++ __be64 *it_userspace; /* userspace view of the table */
+ int it_nid;
++ struct iommu_table_ops_2 *it_ops2;
++#endif
+ };
+
+ #define IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry) \
+- ((tbl)->it_ops->useraddrptr((tbl), (entry), false))
++ ((tbl)->it_ops2->useraddrptr((tbl), (entry), false))
+ #define IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry) \
+- ((tbl)->it_ops->useraddrptr((tbl), (entry), true))
++ ((tbl)->it_ops2->useraddrptr((tbl), (entry), true))
+
+ /* Pure 2^n version of get_order */
+ static inline __attribute_const__
+diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
+index 1ade757b7fc6..a904e98d74c4 100644
+--- a/arch/powerpc/platforms/powernv/pci-ioda.c
++++ b/arch/powerpc/platforms/powernv/pci-ioda.c
+@@ -1881,11 +1881,13 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = {
+ #ifdef CONFIG_IOMMU_API
+ .exchange = pnv_ioda1_tce_xchg,
+ .exchange_rm = pnv_ioda1_tce_xchg_rm,
+- .useraddrptr = pnv_tce_useraddrptr,
+ #endif
+ .clear = pnv_ioda1_tce_free,
+ .get = pnv_tce_get,
+ };
++static struct iommu_table_ops_2 pnv_ioda1_iommu_ops_2 = {
++ .useraddrptr = pnv_tce_useraddrptr,
++};
+
+ #define PHB3_TCE_KILL_INVAL_ALL PPC_BIT(0)
+ #define PHB3_TCE_KILL_INVAL_PE PPC_BIT(1)
+@@ -2051,13 +2053,16 @@ static struct iommu_table_ops pnv_ioda2_iommu_ops = {
+ #ifdef CONFIG_IOMMU_API
+ .exchange = pnv_ioda2_tce_xchg,
+ .exchange_rm = pnv_ioda2_tce_xchg_rm,
+- .useraddrptr = pnv_tce_useraddrptr,
+ #endif
+ .clear = pnv_ioda2_tce_free,
+ .get = pnv_tce_get,
+ .free = pnv_ioda2_table_free,
+ };
+
++static struct iommu_table_ops_2 pnv_ioda2_iommu_ops_2 = {
++ .useraddrptr = pnv_tce_useraddrptr,
++};
++
+ static int pnv_pci_ioda_dev_dma_weight(struct pci_dev *dev, void *data)
+ {
+ unsigned int *weight = (unsigned int *)data;
+@@ -2212,6 +2217,7 @@ static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb,
+ IOMMU_PAGE_SHIFT_4K);
+
+ tbl->it_ops = &pnv_ioda1_iommu_ops;
++ tbl->it_ops2 = &pnv_ioda1_iommu_ops_2;
+ pe->table_group.tce32_start = tbl->it_offset << tbl->it_page_shift;
+ pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift;
+ iommu_init_table(tbl, phb->hose->node);
+@@ -2321,6 +2327,7 @@ static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group,
+ return -ENOMEM;
+
+ tbl->it_ops = &pnv_ioda2_iommu_ops;
++ tbl->it_ops2 = &pnv_ioda2_iommu_ops_2;
+
+ ret = pnv_pci_ioda2_table_alloc_pages(nid,
+ bus_offset, page_shift, window_size,
+--
+2.13.7
+
diff --git a/patches.kabi/KABI-powerpc-export-__find_linux_pte-as-__find_linux.patch b/patches.kabi/KABI-powerpc-export-__find_linux_pte-as-__find_linux.patch
new file mode 100644
index 0000000000..de3b0fe867
--- /dev/null
+++ b/patches.kabi/KABI-powerpc-export-__find_linux_pte-as-__find_linux.patch
@@ -0,0 +1,28 @@
+From a30085bec032a742c3f57acbba989b6ad00ba97f Mon Sep 17 00:00:00 2001
+From: Michal Suchanek <msuchanek@suse.de>
+Date: Thu, 25 Oct 2018 22:51:47 +0200
+Subject: [PATCH] KABI: powerpc: export __find_linux_pte as
+ __find_linux_pte_or_hugepte
+
+References: bsc#1061840
+Patch-mainline: no, kabi
+Signed-off-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/include/asm/pte-walk.h | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/arch/powerpc/include/asm/pte-walk.h b/arch/powerpc/include/asm/pte-walk.h
+index 2d633e9d686c..20bf907cbbea 100644
+--- a/arch/powerpc/include/asm/pte-walk.h
++++ b/arch/powerpc/include/asm/pte-walk.h
+@@ -1,5 +1,7 @@
+ #ifndef _ASM_POWERPC_PTE_WALK_H
+ #define _ASM_POWERPC_PTE_WALK_H
++/* KABI: export the old name */
++#define __find_linux_pte __find_linux_pte_or_hugepte
+
+ #include <linux/sched.h>
+
+--
+2.13.7
+
diff --git a/patches.kabi/KABI-powerpc-mmu_context-provide-old-version-of-mm_i.patch b/patches.kabi/KABI-powerpc-mmu_context-provide-old-version-of-mm_i.patch
index 1db23125d0..c0661e6c47 100644
--- a/patches.kabi/KABI-powerpc-mmu_context-provide-old-version-of-mm_i.patch
+++ b/patches.kabi/KABI-powerpc-mmu_context-provide-old-version-of-mm_i.patch
@@ -9,36 +9,34 @@ Patch-mainline: Never, kabi
Signed-off-by: Michal Suchanek <msuchanek@suse.de>
---
- arch/powerpc/include/asm/mmu_context.h | 3 +++
- arch/powerpc/mm/mmu_context_iommu.c | 43 ++++++++++++++++++++++++++++++++++
- 2 files changed, 46 insertions(+)
+ arch/powerpc/include/asm/mmu_context.h | 2 ++
+ arch/powerpc/mm/mmu_context_iommu.c | 20 ++++++++++++++++++++
+ 2 files changed, 22 insertions(+)
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
-index 2bfb46efb4e4..04a557e2bb07 100644
+index 6caaecd7e911..27937b3cf817 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
-@@ -33,6 +33,9 @@ extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(
+@@ -33,6 +33,8 @@ extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(
struct mm_struct *mm, unsigned long ua, unsigned long size);
extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
unsigned long ua, unsigned long entries);
+/* rename so old compat version can be provided */
+#define mm_iommu_ua_to_hpa mm_iommu_ua_to_hpa_shift
-+#define mm_iommu_ua_to_hpa_rm mm_iommu_ua_to_hpa_shift_rm
extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
unsigned long ua, unsigned int pageshift, unsigned long *hpa);
extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
-index 78ff91f13e45..d0dbc56967d4 100644
+index 3d0d8650ecc2..a71135c6260e 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
-@@ -438,3 +438,46 @@ void mm_iommu_init(struct mm_struct *mm)
+@@ -466,3 +466,23 @@ void mm_iommu_init(struct mm_struct *mm)
{
INIT_LIST_HEAD_RCU(&mm->context.iommu_group_mem_list);
}
+
+/* old version for KABI compatibility */
+#undef mm_iommu_ua_to_hpa
-+#undef mm_iommu_ua_to_hpa_rm
+
+long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
+ unsigned long ua, unsigned long *hpa)
@@ -56,28 +54,6 @@ index 78ff91f13e45..d0dbc56967d4 100644
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa);
-+
-+long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
-+ unsigned long ua, unsigned long *hpa)
-+{
-+ const long entry = (ua - mem->ua) >> PAGE_SHIFT;
-+ void *va = &mem->hpas[entry];
-+ unsigned long *pa;
-+
-+ WARN("Use of old and insecure %s API\n", __func__);
-+
-+ if (entry >= mem->entries)
-+ return -EFAULT;
-+
-+ pa = (void *) vmalloc_to_phys(va);
-+ if (!pa)
-+ return -EFAULT;
-+
-+ *hpa = *pa | (ua & ~PAGE_MASK);
-+
-+ return 0;
-+}
-+EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa_rm);
--
2.13.7
diff --git a/patches.suse/livepatch-create-and-include-UAPI-headers.patch b/patches.suse/livepatch-create-and-include-UAPI-headers.patch
new file mode 100644
index 0000000000..d3ca1a7203
--- /dev/null
+++ b/patches.suse/livepatch-create-and-include-UAPI-headers.patch
@@ -0,0 +1,116 @@
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Subject: livepatch: create and include UAPI headers
+Patch-mainline: Submitted, https://lore.kernel.org/lkml/20170829190140.401-1-jmoreira@suse.de/
+References: fate#326849
+
+Replace hard-coded values in kernel/livepatch/core.c with defines and put
+them into a new UAPI file so they can also be used by klp-convert, an elf
+processing tool for formatting live patching relocations properly.
+
+Define macros KLP_MODULE_RELOC and KLP_SYMPOS in
+include/linux/livepatch.h to improve user-friendliness of the
+livepatch annotation process.
+
+Update MAINTAINERS.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Reviewed-by: Petr Mladek <pmladek@suse.com>
+Signed-off-by: Joao Moreira <jmoreira@suse.de>
+Signed-off-by: Miroslav Benes <mbenes@suse.cz>
+
+---
+ MAINTAINERS | 1 +
+ include/linux/livepatch.h | 12 ++++++++++++
+ include/uapi/linux/livepatch.h | 28 ++++++++++++++++++++++++++++
+ kernel/livepatch/core.c | 4 ++--
+ 4 files changed, 43 insertions(+), 2 deletions(-)
+
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -7728,6 +7728,7 @@ R: Petr Mladek <pmladek@suse.com>
+ S: Maintained
+ F: kernel/livepatch/
+ F: include/linux/livepatch.h
++F: include/uapi/linux/livepatch.h
+ F: arch/x86/include/asm/livepatch.h
+ F: arch/x86/kernel/livepatch.c
+ F: Documentation/livepatch/
+--- a/include/linux/livepatch.h
++++ b/include/linux/livepatch.h
+@@ -25,6 +25,7 @@
+ #include <linux/ftrace.h>
+ #include <linux/completion.h>
+ #include <linux/list.h>
++#include <uapi/linux/livepatch.h>
+
+ #if IS_ENABLED(CONFIG_LIVEPATCH)
+
+@@ -230,6 +231,17 @@ void *klp_shadow_get_or_alloc(void *obj,
+ void klp_shadow_free(void *obj, unsigned long id, klp_shadow_dtor_t dtor);
+ void klp_shadow_free_all(unsigned long id, klp_shadow_dtor_t dtor);
+
++/* Used to annotate symbol relocations in live patches */
++#define KLP_MODULE_RELOC(obj) \
++ struct klp_module_reloc \
++ __attribute__((__section__(".klp.module_relocs." #obj)))
++
++#define KLP_SYMPOS(symbol, pos) \
++ { \
++ .sym = &symbol, \
++ .sympos = pos, \
++ },
++
+ #else /* !CONFIG_LIVEPATCH */
+
+ static inline int klp_module_coming(struct module *mod) { return 0; }
+--- /dev/null
++++ b/include/uapi/linux/livepatch.h
+@@ -0,0 +1,28 @@
++/*
++ * livepatch.h - Kernel Live Patching Core
++ *
++ * Copyright (C) 2016 Josh Poimboeuf <jpoimboe@redhat.com>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version 2
++ * of the License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, see <http://www.gnu.org/licenses/>.
++ */
++
++#ifndef _UAPI_LIVEPATCH_H
++#define _UAPI_LIVEPATCH_H
++
++#include <linux/types.h>
++
++#define KLP_RELA_PREFIX ".klp.rela."
++#define KLP_SYM_PREFIX ".klp.sym."
++
++#endif /* _UAPI_LIVEPATCH_H */
+--- a/kernel/livepatch/core.c
++++ b/kernel/livepatch/core.c
+@@ -261,7 +261,7 @@ static int klp_resolve_symbols(Elf_Shdr
+
+ /* Format: .klp.sym.objname.symname,sympos */
+ cnt = sscanf(strtab + sym->st_name,
+- ".klp.sym.%55[^.].%127[^,],%lu",
++ KLP_SYM_PREFIX "%55[^.].%127[^,],%lu",
+ objname, symname, &sympos);
+ if (cnt != 3) {
+ pr_err("symbol %s has an incorrectly formatted name\n",
+@@ -307,7 +307,7 @@ static int klp_write_object_relocations(
+ * See comment in klp_resolve_symbols() for an explanation
+ * of the selected field width value.
+ */
+- cnt = sscanf(secname, ".klp.rela.%55[^.]", sec_objname);
++ cnt = sscanf(secname, KLP_RELA_PREFIX "%55[^.]", sec_objname);
+ if (cnt != 1) {
+ pr_err("section %s has an incorrectly formatted name\n",
+ secname);
diff --git a/patches.suse/livepatch-modpost-ignore-unresolved-symbols.patch b/patches.suse/livepatch-modpost-ignore-unresolved-symbols.patch
new file mode 100644
index 0000000000..bfc991ab24
--- /dev/null
+++ b/patches.suse/livepatch-modpost-ignore-unresolved-symbols.patch
@@ -0,0 +1,50 @@
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Subject: modpost: ignore livepatch unresolved relocations
+Patch-mainline: Submitted, https://lore.kernel.org/lkml/20170829190140.401-1-jmoreira@suse.de/
+References: fate#326849
+
+Make modpost ignore unresolved symbols when handling livepatches. These symbols
+are to be taken care of by klp-convert.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Reviewed-by: Petr Mladek <pmladek@suse.com>
+Signed-off-by: Joao Moreira <jmoreira@suse.de>
+Signed-off-by: Miroslav Benes <mbenes@suse.cz>
+
+---
+ scripts/mod/modpost.c | 6 +++++-
+ scripts/mod/modpost.h | 1 +
+ 2 files changed, 6 insertions(+), 1 deletion(-)
+
+--- a/scripts/mod/modpost.c
++++ b/scripts/mod/modpost.c
+@@ -2072,6 +2072,10 @@ static void read_symbols(char *modname)
+ "license", license);
+ }
+
++ /* Livepatch modules have unresolved symbols resolved by klp-convert */
++ if (get_modinfo(info.modinfo, info.modinfo_len, "livepatch"))
++ mod->livepatch = 1;
++
+ for (sym = info.symtab_start; sym < info.symtab_stop; sym++) {
+ symname = remove_dot(info.strtab + sym->st_name);
+
+@@ -2282,7 +2286,7 @@ static int add_versions(struct buffer *b
+ for (s = mod->unres; s; s = s->next) {
+ exp = find_symbol(s->name);
+ if (!exp || exp->module == mod) {
+- if (have_vmlinux && !s->weak) {
++ if (have_vmlinux && !s->weak && !mod->livepatch) {
+ if (warn_unresolved) {
+ warn("\"%s\" [%s.ko] undefined!\n",
+ s->name, mod->name);
+--- a/scripts/mod/modpost.h
++++ b/scripts/mod/modpost.h
+@@ -117,6 +117,7 @@ struct module {
+ int skip;
+ int has_init;
+ int has_cleanup;
++ int livepatch;
+ struct buffer dev_table_buf;
+ char srcversion[25];
+ int is_dot_o;
diff --git a/patches.suse/tty-Don-t-block-on-IO-when-ldisc-change-is-pending.patch b/patches.suse/tty-Don-t-block-on-IO-when-ldisc-change-is-pending.patch
new file mode 100644
index 0000000000..b1f79dcb3e
--- /dev/null
+++ b/patches.suse/tty-Don-t-block-on-IO-when-ldisc-change-is-pending.patch
@@ -0,0 +1,145 @@
+From: Dmitry Safonov <dima@arista.com>
+Date: Thu, 1 Nov 2018 00:24:48 +0000
+Subject: tty: Don't block on IO when ldisc change is pending
+Patch-mainline: Submitted on 2018/11/1
+References: bnc#1105428
+
+There might be situations where tty_ldisc_lock() has blocked, but there
+is already IO on tty and it prevents line discipline changes.
+It might theoretically turn into dead-lock.
+
+Basically, provide more priority to pending tty_ldisc_lock() than to
+servicing reads/writes over tty.
+
+User-visible issue was reported by Mikulas where on pa-risc with
+Debian 5 reboot took either 80 seconds, 3 minutes or 3:25 after proper
+locking in tty_reopen().
+
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Jiri Slaby <jslaby@suse.com>
+Reported-by: Mikulas Patocka <mpatocka@redhat.com>
+Signed-off-by: Dmitry Safonov <dima@arista.com>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ drivers/tty/n_hdlc.c | 4 ++--
+ drivers/tty/n_r3964.c | 2 +-
+ drivers/tty/n_tty.c | 8 ++++----
+ drivers/tty/tty_ldisc.c | 7 +++++++
+ include/linux/tty.h | 7 +++++++
+ 5 files changed, 21 insertions(+), 7 deletions(-)
+
+--- a/drivers/tty/n_hdlc.c
++++ b/drivers/tty/n_hdlc.c
+@@ -613,7 +613,7 @@ static ssize_t n_hdlc_tty_read(struct tt
+ }
+
+ /* no data */
+- if (file->f_flags & O_NONBLOCK) {
++ if (tty_io_nonblock(tty, file)) {
+ ret = -EAGAIN;
+ break;
+ }
+@@ -680,7 +680,7 @@ static ssize_t n_hdlc_tty_write(struct t
+ if (tbuf)
+ break;
+
+- if (file->f_flags & O_NONBLOCK) {
++ if (tty_io_nonblock(tty, file)) {
+ error = -EAGAIN;
+ break;
+ }
+--- a/drivers/tty/n_r3964.c
++++ b/drivers/tty/n_r3964.c
+@@ -1080,7 +1080,7 @@ static ssize_t r3964_read(struct tty_str
+ pMsg = remove_msg(pInfo, pClient);
+ if (pMsg == NULL) {
+ /* no messages available. */
+- if (file->f_flags & O_NONBLOCK) {
++ if (tty_io_nonblock(tty, file)) {
+ ret = -EAGAIN;
+ goto unlock;
+ }
+--- a/drivers/tty/n_tty.c
++++ b/drivers/tty/n_tty.c
+@@ -1691,7 +1691,7 @@ n_tty_receive_buf_common(struct tty_stru
+
+ down_read(&tty->termios_rwsem);
+
+- while (1) {
++ do {
+ /*
+ * When PARMRK is set, each input char may take up to 3 chars
+ * in the read buf; reduce the buffer space avail by 3x
+@@ -1733,7 +1733,7 @@ n_tty_receive_buf_common(struct tty_stru
+ fp += n;
+ count -= n;
+ rcvd += n;
+- }
++ } while (!test_bit(TTY_LDISC_CHANGING, &tty->flags));
+
+ tty->receive_room = room;
+
+@@ -2199,7 +2199,7 @@ static ssize_t n_tty_read(struct tty_str
+ break;
+ if (!timeout)
+ break;
+- if (file->f_flags & O_NONBLOCK) {
++ if (tty_io_nonblock(tty, file)) {
+ retval = -EAGAIN;
+ break;
+ }
+@@ -2353,7 +2353,7 @@ static ssize_t n_tty_write(struct tty_st
+ }
+ if (!nr)
+ break;
+- if (file->f_flags & O_NONBLOCK) {
++ if (tty_io_nonblock(tty, file)) {
+ retval = -EAGAIN;
+ break;
+ }
+--- a/drivers/tty/tty_ldisc.c
++++ b/drivers/tty/tty_ldisc.c
+@@ -339,6 +339,11 @@ int tty_ldisc_lock(struct tty_struct *tt
+ {
+ int ret;
+
++ /* Kindly asking blocked readers to release the read side */
++ set_bit(TTY_LDISC_CHANGING, &tty->flags);
++ wake_up_interruptible_all(&tty->read_wait);
++ wake_up_interruptible_all(&tty->write_wait);
++
+ ret = __tty_ldisc_lock(tty, timeout);
+ if (!ret)
+ return -EBUSY;
+@@ -349,6 +354,8 @@ int tty_ldisc_lock(struct tty_struct *tt
+ void tty_ldisc_unlock(struct tty_struct *tty)
+ {
+ clear_bit(TTY_LDISC_HALTED, &tty->flags);
++ /* Can be cleared here - ldisc_unlock will wake up writers firstly */
++ clear_bit(TTY_LDISC_CHANGING, &tty->flags);
+ __tty_ldisc_unlock(tty);
+ }
+
+--- a/include/linux/tty.h
++++ b/include/linux/tty.h
+@@ -363,6 +363,7 @@ struct tty_file_private {
+ #define TTY_NO_WRITE_SPLIT 17 /* Preserve write boundaries to driver */
+ #define TTY_HUPPED 18 /* Post driver->hangup() */
+ #define TTY_HUPPING 19 /* Hangup in progress */
++#define TTY_LDISC_CHANGING 20 /* Change pending - non-block IO */
+ #define TTY_LDISC_HALTED 22 /* Line discipline is halted */
+
+ /* Values for tty->flow_change */
+@@ -380,6 +381,12 @@ static inline void tty_set_flow_change(s
+ smp_mb();
+ }
+
++static inline bool tty_io_nonblock(struct tty_struct *tty, struct file *file)
++{
++ return file->f_flags & O_NONBLOCK ||
++ test_bit(TTY_LDISC_CHANGING, &tty->flags);
++}
++
+ static inline bool tty_io_error(struct tty_struct *tty)
+ {
+ return test_bit(TTY_IO_ERROR, &tty->flags);
diff --git a/patches.suse/tty-Hold-tty_ldisc_lock-during-tty_reopen.patch b/patches.suse/tty-Hold-tty_ldisc_lock-during-tty_reopen.patch
index 56a77e0543..ac0f33d9da 100644
--- a/patches.suse/tty-Hold-tty_ldisc_lock-during-tty_reopen.patch
+++ b/patches.suse/tty-Hold-tty_ldisc_lock-during-tty_reopen.patch
@@ -1,7 +1,7 @@
From: Dmitry Safonov <dima@arista.com>
-Date: Tue, 18 Sep 2018 00:52:54 +0100
+Date: Thu, 1 Nov 2018 00:24:47 +0000
Subject: tty: Hold tty_ldisc_lock() during tty_reopen()
-Patch-mainline: Submitted on 2018/09/18
+Patch-mainline: Submitted on 2018/11/1
References: bnc#1105428
tty_ldisc_reinit() doesn't race with neither tty_ldisc_hangup()
@@ -26,12 +26,9 @@ Call Trace:
tty_ldisc_reinit() should be called with ldisc_sem hold for writing,
which will protect any reader against line discipline changes.
-Backport-first: b027e2298bd5 ("tty: fix data race between tty_init_dev
-and flush of buf")
-Cc: stable@vger.kernel.org
-
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jiri Slaby <jslaby@suse.com>
+Cc: stable@vger.kernel.org # b027e2298bd5 ("tty: fix data race between tty_init_dev and flush of buf")
Reviewed-by: Jiri Slaby <jslaby@suse.cz>
Reported-by: syzbot+3aa9784721dfb90e984d@syzkaller.appspotmail.com
Tested-by: Mark Rutland <mark.rutland@arm.com>
diff --git a/patches.suse/tty-Simplify-tty-count-math-in-tty_reopen.patch b/patches.suse/tty-Simplify-tty-count-math-in-tty_reopen.patch
index 10455330f1..b7f96cf5f2 100644
--- a/patches.suse/tty-Simplify-tty-count-math-in-tty_reopen.patch
+++ b/patches.suse/tty-Simplify-tty-count-math-in-tty_reopen.patch
@@ -1,7 +1,7 @@
From: Dmitry Safonov <dima@arista.com>
-Date: Tue, 18 Sep 2018 00:52:55 +0100
+Date: Thu, 1 Nov 2018 00:24:49 +0000
Subject: tty: Simplify tty->count math in tty_reopen()
-Patch-mainline: Submitted on 2018/09/18
+Patch-mainline: Submitted on 2018/11/1
References: bnc#1105428
As notted by Jiri, tty_ldisc_reinit() shouldn't rely on tty counter.
diff --git a/patches.suse/tty-ldsem-Add-lockdep-asserts-for-ldisc_sem.patch b/patches.suse/tty-ldsem-Add-lockdep-asserts-for-ldisc_sem.patch
index 535995a6d9..fd8e3ba344 100644
--- a/patches.suse/tty-ldsem-Add-lockdep-asserts-for-ldisc_sem.patch
+++ b/patches.suse/tty-ldsem-Add-lockdep-asserts-for-ldisc_sem.patch
@@ -1,7 +1,7 @@
From: Dmitry Safonov <dima@arista.com>
-Date: Tue, 18 Sep 2018 00:52:57 +0100
+Date: Thu, 1 Nov 2018 00:24:51 +0000
Subject: tty/ldsem: Add lockdep asserts for ldisc_sem
-Patch-mainline: Submitted on 2018/09/18
+Patch-mainline: Submitted on 2018/11/1
References: bnc#1105428
Make sure under CONFIG_LOCKDEP that each change to line discipline
@@ -28,7 +28,7 @@ Signed-off-by: Jiri Slaby <jslaby@suse.cz>
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
-@@ -483,6 +483,7 @@ static int tty_ldisc_open(struct tty_str
+@@ -490,6 +490,7 @@ static int tty_ldisc_open(struct tty_str
static void tty_ldisc_close(struct tty_struct *tty, struct tty_ldisc *ld)
{
@@ -36,7 +36,7 @@ Signed-off-by: Jiri Slaby <jslaby@suse.cz>
WARN_ON(!test_bit(TTY_LDISC_OPEN, &tty->flags));
clear_bit(TTY_LDISC_OPEN, &tty->flags);
if (ld->ops->close)
-@@ -504,6 +505,7 @@ static int tty_ldisc_failto(struct tty_s
+@@ -511,6 +512,7 @@ static int tty_ldisc_failto(struct tty_s
struct tty_ldisc *disc = tty_ldisc_get(tty, ld);
int r;
@@ -44,7 +44,7 @@ Signed-off-by: Jiri Slaby <jslaby@suse.cz>
if (IS_ERR(disc))
return PTR_ERR(disc);
tty->ldisc = disc;
-@@ -626,6 +628,7 @@ err:
+@@ -633,6 +635,7 @@ err:
*/
static void tty_ldisc_kill(struct tty_struct *tty)
{
@@ -52,7 +52,7 @@ Signed-off-by: Jiri Slaby <jslaby@suse.cz>
if (!tty->ldisc)
return;
/*
-@@ -673,6 +676,7 @@ int tty_ldisc_reinit(struct tty_struct *
+@@ -680,6 +683,7 @@ int tty_ldisc_reinit(struct tty_struct *
struct tty_ldisc *ld;
int retval;
@@ -60,7 +60,7 @@ Signed-off-by: Jiri Slaby <jslaby@suse.cz>
ld = tty_ldisc_get(tty, disc);
if (IS_ERR(ld)) {
BUG_ON(disc == N_TTY);
-@@ -772,6 +776,10 @@ int tty_ldisc_setup(struct tty_struct *t
+@@ -779,6 +783,10 @@ int tty_ldisc_setup(struct tty_struct *t
return retval;
if (o_tty) {
@@ -71,7 +71,7 @@ Signed-off-by: Jiri Slaby <jslaby@suse.cz>
retval = tty_ldisc_open(o_tty, o_tty->ldisc);
if (retval) {
tty_ldisc_close(tty, tty->ldisc);
-@@ -836,6 +844,7 @@ int tty_ldisc_init(struct tty_struct *tt
+@@ -843,6 +851,7 @@ int tty_ldisc_init(struct tty_struct *tt
*/
void tty_ldisc_deinit(struct tty_struct *tty)
{
diff --git a/patches.suse/tty-ldsem-Convert-to-regular-lockdep-annotations.patch b/patches.suse/tty-ldsem-Convert-to-regular-lockdep-annotations.patch
index ed5f75c2bf..6221c2c46f 100644
--- a/patches.suse/tty-ldsem-Convert-to-regular-lockdep-annotations.patch
+++ b/patches.suse/tty-ldsem-Convert-to-regular-lockdep-annotations.patch
@@ -1,11 +1,13 @@
From: Peter Zijlstra <peterz@infradead.org>
-Date: Tue, 18 Sep 2018 00:52:56 +0100
+Date: Thu, 1 Nov 2018 00:24:50 +0000
Subject: tty/ldsem: Convert to regular lockdep annotations
-Patch-mainline: Submitted on 2018/09/18
+Patch-mainline: Submitted on 2018/11/1
References: bnc#1105428
For some reason ldsem has its own lockdep wrappers, make them go away.
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Jiri Slaby <jslaby@suse.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
diff --git a/patches.suse/tty-ldsem-Decrement-wait_readers-on-timeouted-down_r.patch b/patches.suse/tty-ldsem-Decrement-wait_readers-on-timeouted-down_r.patch
index 1e3fa456c8..886d0a9fa8 100644
--- a/patches.suse/tty-ldsem-Decrement-wait_readers-on-timeouted-down_r.patch
+++ b/patches.suse/tty-ldsem-Decrement-wait_readers-on-timeouted-down_r.patch
@@ -1,7 +1,7 @@
From: Dmitry Safonov <dima@arista.com>
-Date: Tue, 18 Sep 2018 00:52:58 +0100
+Date: Thu, 1 Nov 2018 00:24:52 +0000
Subject: tty/ldsem: Decrement wait_readers on timeouted down_read()
-Patch-mainline: Submitted on 2018/09/18
+Patch-mainline: Submitted on 2018/11/1
References: bnc#1105428
It seems like when ldsem_down_read() fails with timeout, it misses
diff --git a/patches.suse/tty-ldsem-Wake-up-readers-after-timed-out-down_write.patch b/patches.suse/tty-ldsem-Wake-up-readers-after-timed-out-down_write.patch
index 50ea5f03b2..6d272541a5 100644
--- a/patches.suse/tty-ldsem-Wake-up-readers-after-timed-out-down_write.patch
+++ b/patches.suse/tty-ldsem-Wake-up-readers-after-timed-out-down_write.patch
@@ -1,7 +1,7 @@
From: Dmitry Safonov <dima@arista.com>
-Date: Tue, 18 Sep 2018 00:52:53 +0100
+Date: Thu, 1 Nov 2018 00:24:46 +0000
Subject: tty/ldsem: Wake up readers after timed out down_write()
-Patch-mainline: Submitted on 2018/09/18
+Patch-mainline: Submitted on 2018/11/1
References: bnc#1105428
ldsem_down_read() will sleep if there is pending writer in the queue.
@@ -21,11 +21,12 @@ other readers soft locked up:
Prevent readers wait for active readers to release ldisc semaphore.
-Link: lkml.kernel.org/r/<20171121132855.ajdv4k6swzhvktl6@wfg-t540p.sh.intel.com>
-Link: lkml.kernel.org/r/<20180907045041.GF1110@shao2-debian>
+Link: lkml.kernel.org/r/20171121132855.ajdv4k6swzhvktl6@wfg-t540p.sh.intel.com
+Link: lkml.kernel.org/r/20180907045041.GF1110@shao2-debian
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jiri Slaby <jslaby@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: stable@vger.kernel.org
Reported-by: kernel test robot <rong.a.chen@intel.com>
Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
diff --git a/rpm/klp-symbols b/rpm/klp-symbols
index c2abfe2335..012fca43c3 100755
--- a/rpm/klp-symbols
+++ b/rpm/klp-symbols
@@ -17,6 +17,7 @@ if test "$1" == "-h" -o "$1" == "--help" -o $# -ne 2 ; then
exit 1
fi
+KLP_DATA_VERSION="0.1"
KERNEL_BUILD_DIR="$1"
KLP_SYMBOLS="$2"
@@ -38,7 +39,8 @@ get_symbols()
nm -f posix "$1" | grep -v '\( [UN] \)\|\(\.L\|__crc_\)' | cut -d\ -f1,2
}
-echo "*vmlinux" > "$KLP_SYMBOLS"
+echo "klp-convert-symbol-data.$KLP_DATA_VERSION" > "$KLP_SYMBOLS"
+echo "*vmlinux" >> "$KLP_SYMBOLS"
get_symbols "$VMLINUX" >> "$KLP_SYMBOLS"
find "$MODVER_DIR" -iname '*.mod' | while read KMOD ; do
diff --git a/series.conf b/series.conf
index 592cbcb60e..1f96d92da5 100644
--- a/series.conf
+++ b/series.conf
@@ -6128,6 +6128,7 @@
patches.arch/powerpc-perf-imc-fix-nest-events-on-muti-socket-system.patch
patches.arch/powerpc-Fix-missing-newline-before.patch
patches.arch/powerpc-perf-fix-usage-of-nest_imc_refc.patch
+ patches.arch/powerpc-mm-Rename-find_linux_pte_or_hugepte.patch
patches.arch/powerpc-pseries-remove-dlpar_attach_node-dependency-.patch
patches.arch/powerpc-mm-Move-pgdir-setting-into-a-helper.patch
patches.arch/powerpc-mm-Optimize-detection-of-thread-local-mm-s.patch
@@ -6144,6 +6145,7 @@
patches.arch/powerpc-Machine-check-interrupt-is-a-non-maskable-in.patch
patches.arch/powerpc-xmon-Fix-display-of-SPRs.patch
patches.arch/powerpc-kernel-Change-retrieval-of-pci_dn.patch
+ patches.arch/powerpc-xmon-Add-ISA-v3.0-SPRs-to-SPR-dump.patch
patches.arch/powerpc-conditionally-compile-platform-specific-serial-drivers.patch
patches.arch/cxl-Fix-driver-use-count.patch
patches.arch/powerpc-dlpar-018-pseries-Don-t-attempt-to-acquire-drc-during-.patch
@@ -9863,6 +9865,7 @@
patches.arch/powerpc-64s-idle-avoid-POWER9-DD1-and-DD2.0-ERAT-wor.patch
patches.arch/powerpc-64s-idle-avoid-POWER9-DD1-and-DD2.0-PMU-work.patch
patches.arch/powerpc-powernv-cpufreq-Fix-the-frequency-read-by-pr.patch
+ patches.arch/powerpc-powernv-ioda-Remove-explicit-max-window-size.patch
patches.arch/powerpc-64s-radix-Optimize-TLB-range-flush-barriers.patch
patches.arch/powerpc-mm-radix-Fix-crashes-on-Power9-DD1-with-radix.patch
patches.arch/powerpc-kprobes-Disable-preemption-before-invoking-p.patch
@@ -12831,8 +12834,11 @@
patches.arch/powerpc-Enable-pkey-subsystem.patch
patches.arch/powerpc-sys_pkey_alloc-and-sys_pkey_free-system-call.patch
patches.arch/powerpc-sys_pkey_mprotect-system-call.patch
+ patches.arch/powerpc-powernv-ioda-Finish-removing-explicit-max-wi.patch
patches.arch/powerpc-firmware-Add-definitions-for-new-drc-info-fi.patch
+ patches.arch/powerpc-xive-Move-definition-of-ESB-bits.patch
patches.arch/powerpc-xive-Add-interrupt-flag-to-disable-automatic.patch
+ patches.arch/KVM-PPC-Book3S-HV-Improve-handling-of-debug-trigger-.patch
patches.arch/powerpc-radix-Remove-trace_tlbie-call-from-radix__fl.patch
patches.suse/powerpc-64s-Improve-RFI-L1-D-cache-flush-fallback.patch
patches.arch/powerpc-eeh-Update-VF-config-space-after-EEH.patch
@@ -13228,12 +13234,29 @@
patches.suse/msft-hv-1590-x86-hyperv-Reenlightenment-notifications-support.patch
patches.suse/msft-hv-1591-x86-hyperv-Redirect-reenlightment-notifications-on-C.patch
patches.suse/msft-hv-1592-x86-irq-Count-Hyper-V-reenlightenment-interrupts.patch
+ patches.arch/KVM-PPC-Book3S-HV-Avoid-shifts-by-negative-amounts.patch
+ patches.arch/KVM-PPC-Book3S-HV-Fix-typo-in-kvmppc_hv_get_dirty_lo.patch
+ patches.arch/KVM-PPC-Book3S-HV-Remove-useless-statement.patch
+ patches.arch/KVM-PPC-Book3S-HV-Fix-conditions-for-starting-vcpu.patch
+ patches.arch/KVM-PPC-Book3S-Eliminate-some-unnecessary-checks.patch
+ patches.arch/KVM-PPC-Book3S-HV-Remove-vcpu-arch.dec-usage.patch
+ patches.arch/KVM-PPC-Use-seq_puts-in-kvmppc_exit_timing_show.patch
+ patches.arch/KVM-PPC-Book3S-HV-Enable-migration-of-decrementer-re.patch
patches.arch/KVM-PPC-Book3S-HV-Make-sure-we-don-t-re-enter-guest-.patch
+ patches.arch/KVM-PPC-Book3S-HV-Do-SLB-load-unload-with-guest-LPCR.patch
+ patches.arch/KVM-PPC-Book3S-HV-Allow-HPT-and-radix-on-the-same-co.patch
patches.arch/KVM-PPC-Book3S-HV-Enable-use-of-the-new-XIVE-single-.patch
+ patches.arch/KVM-PPC-Book3S-HV-Don-t-use-existing-prodded-flag-fo.patch
+ patches.arch/KVM-PPC-Book3S-HV-Check-DR-not-IR-to-chose-real-vs-v.patch
+ patches.arch/KVM-PPC-Book3S-HV-Make-xive_pushed-a-byte-not-a-word.patch
+ patches.arch/KVM-PPC-Book3S-HV-Keep-XIVE-escalation-interrupt-mas.patch
patches.arch/KVM-PPC-Book3S-HV-Drop-locks-before-reading-guest-me.patch
patches.fixes/KVM-PPC-Book3S-PR-Fix-svcpu-copying-with-preemption-.patch
patches.arch/KVM-PPC-Book3S-PR-Fix-broken-select-due-to-misspelli.patch
+ patches.arch/KVM-PPC-Book3S-HV-Fix-handling-of-secondary-HPTEG-in.patch
+ patches.arch/KVM-PPC-Book3S-HV-Make-HPT-resizing-work-on-POWER9.patch
patches.suse/KVM-PPC-Book3S-HV-Branch-inside-feature-section.patch
+ patches.arch/KVM-PPC-Book3S-Add-MMIO-emulation-for-VMX-instructio.patch
patches.drivers/cros_ec-fix-nul-termination-for-firmware-build-info
patches.drivers/platform-chrome-cros_ec_lpc-remove-redundant-pointer
patches.drivers/platform-chrome-Use-proper-protocol-transfer-functio
@@ -13720,6 +13743,9 @@
patches.suse/Documentation-sphinx-Fix-Directive-import-error.patch
patches.arch/0001-arm64-mm-fix-thinko-in-non-global-page-table-attribu.patch
patches.arch/0001-arm64-Relax-ARM_SMCCC_ARCH_WORKAROUND_1-discovery.patch
+ patches.arch/KVM-PPC-Book3S-Fix-compile-error-that-occurs-with-so.patch
+ patches.arch/KVM-PPC-Book3S-HV-Fix-handling-of-large-pages-in-rad.patch
+ patches.arch/KVM-PPC-Book3S-HV-Fix-VRMA-initialization-with-2MB-o.patch
patches.arch/KVM-PPC-Book3S-HV-Fix-guest-time-accounting-with-VIR.patch
patches.drivers/IB-mlx5-Fix-incorrect-size-of-klms-in-the-memory-reg.patch
patches.drivers/IB-mlx5-Avoid-passing-an-invalid-QP-type-to-firmware.patch
@@ -13818,6 +13844,7 @@
patches.drivers/drm-i915-gvt-keep-oa-config-in-shadow-ctx
patches.drivers/drm-i915-gvt-Correct-the-privilege-shadow-batch-buff
patches.suse/btrfs-add-missing-initialization-in-btrfs_check_shared.patch
+ patches.arch/KVM-PPC-Book3S-HV-Fix-trap-number-return-from-__kvmp.patch
patches.fixes/kvm-x86-fix-device-passthrough-when-sme-is-active.patch
patches.drivers/irqchip-gic-v3-its-Ensure-nr_ites-nr_lpis.patch
patches.arch/x86-cpufeatures-add-intel-total-memory-encryption-cpufeature
@@ -14047,6 +14074,7 @@
patches.drivers/ALSA-usb-audio-Add-native-DSD-support-for-TEAC-UD-30
patches.drivers/ALSA-pcm-Use-dma_bytes-as-size-parameter-in-dma_mmap
patches.drivers/ALSA-pcm-potential-uninitialized-return-values
+ patches.arch/KVM-PPC-Book3S-HV-Fix-duplication-of-host-SLB-entrie.patch
patches.fixes/ceph-only-dirty-iter_iovec-pages-for-direct-read.patch
patches.suse/netfilter-nf_tables-add-missing-netlink-attrs-to-pol.patch
patches.drivers/lan78xx-Set-ASD-in-MAC_CR-when-EEE-is-enabled.patch
@@ -14972,6 +15000,10 @@
patches.suse/msft-hv-1651-x86-hyper-v-allocate-and-use-Virtual-Processor-Assis.patch
patches.suse/msft-hv-1652-x86-hyper-v-define-struct-hv_enlightened_vmcs-and-cl.patch
patches.suse/msft-hv-1653-x86-hyper-v-detect-nested-features.patch
+ patches.arch/KVM-PPC-Book3S-HV-Radix-page-fault-handler-optimizat.patch
+ patches.arch/KVM-PPC-Book3S-HV-Streamline-setting-of-reference-an.patch
+ patches.arch/KVM-PPC-Book3S-HV-Handle-1GB-pages-in-radix-page-fau.patch
+ patches.arch/KVM-PPC-Book3S-HV-Use-__gfn_to_pfn_memslot-in-page-f.patch
patches.arch/kvm-vmx-raise-internal-error-for-exception-during-invalid-protected-mode-state
patches.fixes/getname_kernel-needs-to-make-sure-that-name-iname-in.patch
patches.suse/net-fool-proof-dev_valid_name.patch
@@ -15371,6 +15403,7 @@
patches.fixes/libceph-validate-con-state-at-the-top-of-try_write.patch
patches.arch/powerpc-powernv-npu-Do-a-PID-GPU-TLB-flush-when-inva.patch
patches.fixes/cpufreq-powernv-Fix-hardlockup-due-to-synchronous-sm.patch
+ patches.arch/powerpc-kvm-booke-Fix-altivec-related-build-break.patch
patches.drivers/crypto-drbg-set-freed-buffers-to-NULL
patches.fixes/ext4-prevent-right-shifting-extents-beyond-EXT_MAX_B.patch
patches.fixes/ext4-set-h_journal-if-there-is-a-failure-starting-a-.patch
@@ -15787,6 +15820,11 @@
patches.fixes/0001-Revert-ipc-shm-Fix-shmat-mmap-nil-page-protection.patch
patches.fixes/0001-ipc-shm-fix-shmat-nil-address-after-round-down-when-.patch
patches.suse/kernel-sys.c-fix-potential-Spectre-v1-issue.patch
+ patches.arch/KVM-PPC-Book3S-HV-Snapshot-timebase-offset-on-guest-.patch
+ patches.arch/KVM-PPC-Book3S-HV-Make-radix-use-correct-tlbie-seque.patch
+ patches.arch/KVM-PPC-Book3S-HV-Make-radix-clear-pte-when-unmappin.patch
+ patches.arch/KVM-PPC-Book3S-HV-XIVE-Resend-re-routed-interrupts-o.patch
+ patches.arch/KVM-PPC-Book-3S-HV-Do-ptesync-in-radix-guest-exit-pa.patch
patches.arch/x86-kvm-fix-lapic-timer-drift-when-guest-uses-periodic-mode
patches.arch/kvm-x86-update-cpuid-properly-when-cr4-osxave-or-cr4-pke-is-changed
patches.arch/46-kvm-x86-ia32_arch_capabilities-is-always-supported.patch
@@ -16322,6 +16360,7 @@
patches.arch/powerpc-xive-prepare-all-hcalls-to-support-long-busy.patch
patches.suse/powerpc-livepatch-implement-reliable-stack-tracing-for-the-consistency-model.patch
patches.arch/powerpc-lib-Fix-the-feature-fixup-tests-to-actually-.patch
+ patches.arch/powerpc-kvm-Switch-kvm-pmd-allocator-to-custom-alloc.patch
patches.arch/powerpc-perf-Fix-memory-allocation-for-core-imc-base.patch
patches.suse/0001-powerpc-ptrace-Fix-enforcement-of-DAWR-constraints.patch
patches.fixes/4.4.139-044-powerpc-ptrace-Fix-setting-512B-aligned-break.patch
@@ -16768,6 +16807,25 @@
patches.drivers/ALSA-usb-audio-Generic-DSD-detection-for-XMOS-based-
patches.drivers/ALSA-usb-audio-Remove-explicitly-listed-Mytek-device
patches.fixes/vhost-fix-info-leak-due-to-uninitialized-memory.patch
+ patches.arch/KVM-PPC-Book3S-HV-Add-online-register-to-ONE_REG-int.patch
+ patches.arch/KVM-PPC-Book3S-HV-Set-RWMR-on-POWER8-so-PURR-SPURR-c.patch
+ patches.arch/KVM-PPC-Book3S-HV-Fix-inaccurate-comment.patch
+ patches.arch/KVM-PPC-Book3S-Use-correct-page-shift-in-H_STUFF_TCE.patch
+ patches.arch/KVM-PPC-Book3S-Allow-backing-bigger-guest-IOMMU-page.patch
+ patches.arch/KVM-PPC-Book3S-Check-KVM_CREATE_SPAPR_TCE_64-paramet.patch
+ patches.arch/KVM-PPC-Add-pt_regs-into-kvm_vcpu_arch-and-move-vcpu.patch
+ patches.arch/KVM-PPC-Move-nip-ctr-lr-xer-registers-to-pt_regs-in-.patch
+ patches.arch/KVM-PPC-Fix-a-mmio_host_swabbed-uninitialized-usage-.patch
+ patches.arch/KVM-PPC-Book3S-HV-Lockless-tlbie-for-HPT-hcalls.patch
+ patches.arch/KVM-PPC-Book3S-HV-Use-a-helper-to-unmap-ptes-in-the-.patch
+ patches.arch/KVM-PPC-Book3S-HV-Recursively-unmap-all-page-table-e.patch
+ patches.arch/KVM-PPC-Book3S-HV-radix-Refine-IO-region-partition-s.patch
+ patches.arch/KVM-PPC-Book3S-HV-radix-Do-not-clear-partition-PTE-w.patch
+ patches.arch/KVM-PPC-Book3S-HV-Fix-kvmppc_bad_host_intr-for-real-.patch
+ patches.arch/KVM-PPC-Book3S-HV-Send-kvmppc_bad_interrupt-NMIs-to-.patch
+ patches.arch/KVM-PPC-Book3S-HV-Factor-fake-suspend-handling-out-o.patch
+ patches.arch/KVM-PPC-Book3S-PR-Move-kvmppc_save_tm-kvmppc_restore.patch
+ patches.arch/KVM-PPC-Book3S-PR-Add-guest-MSR-parameter-for-kvmppc.patch
patches.suse/ipv6-allow-PMTU-exceptions-to-local-routes.patch
patches.suse/net-dsa-add-error-handling-for-pskb_trim_rcsum.patch
patches.drivers/ixgbe-Fix-setting-of-TC-configuration-for-macvlan-ca.patch
@@ -17709,7 +17767,11 @@
patches.suse/0002-module-setup-load-info-before-module_sig_check.patch
patches.suse/0003-modsign-log-module-name-in-the-event-of-an-error.patch
patches.suse/0004-ARM-module-fix-modsign-build-error.patch
- patches.arch/powerpc-powernv-ioda2-Reduce-upper-limit-for-DMA-win.patch
+ patches.arch/powerpc-powernv-Move-TCE-manupulation-code-to-its-ow.patch
+ patches.arch/KVM-PPC-Make-iommu_table-it_userspace-big-endian.patch
+ patches.arch/powerpc-powernv-Add-indirect-levels-to-it_userspace.patch
+ patches.arch/powerpc-powernv-Rework-TCE-level-allocation.patch
+ patches.arch/powerpc-powernv-ioda-Allocate-indirect-TCE-levels-on.patch
patches.arch/cxl-Fix-wrong-comparison-in-cxl_adapter_context_get.patch
patches.arch/powerpc-pkeys-Give-all-threads-control-of-their-key-.patch
patches.arch/powerpc-pkeys-Deny-read-write-execute-by-default.patch
@@ -17837,6 +17899,12 @@
patches.drivers/dmaengine-mv_xor_v2-kill-the-tasklets-upon-exit
patches.drivers/dmaengine-pl330-fix-irq-race-with-terminate_all.patch
patches.drivers/Input-atmel_mxt_ts-only-use-first-T9-instance.patch
+ patches.arch/KVM-PPC-Book3S-Fix-matching-of-hardware-and-emulated.patch
+ patches.arch/KVM-PPC-Book3S-HV-Add-of_node_put-in-success-path.patch
+ patches.arch/KVM-PPC-Book3S-HV-Fix-constant-size-warning.patch
+ patches.arch/KVM-PPC-Book3S-HV-Pack-VCORE-IDs-to-access-full-VCPU.patch
+ patches.arch/KVM-PPC-Book3S-HV-Allow-creating-max-number-of-VCPUs.patch
+ patches.arch/KVM-PPC-Book3S-HV-Read-kvm-arch.emul_smt_mode-under-.patch
patches.fixes/kvm-s390-add-etoken-support-for-guests.patch
patches.suse/msft-hv-1746-KVM-x86-ensure-all-MSRs-can-always-be-KVM_GET-SET_MS.patch
patches.suse/msft-hv-1747-X86-Hyper-V-Add-flush-HvFlushGuestPhysicalAddressSpa.patch
@@ -18078,6 +18146,7 @@
patches.arch/kvm-x86-default-to-not-allowing-emulation-retry-in-kvm_mmu_page_fault
patches.arch/kvm-x86-do-not-re-try-execute-after-failed-emulation-in-l2
patches.arch/KVM-PPC-Book3S-HV-Use-correct-pagesize-in-kvm_unmap_.patch
+ patches.arch/KVM-PPC-Book3S-HV-Don-t-truncate-HPTE-index-in-xlate.patch
patches.arch/x86-microcode-make-sure-boot_cpu_data-microcode-is-up-to-date
patches.arch/x86-microcode-update-the-new-microcode-revision-unconditionally
patches.arch/x86-process-don-t-mix-user-kernel-regs-in-64bit-_show_regs
@@ -18186,6 +18255,8 @@
patches.drivers/floppy-Do-not-copy-a-kernel-pointer-to-user-memory-i.patch
patches.fixes/ubifs-Check-for-name-being-NULL-while-mounting.patch
patches.fixes/Revert-ubifs-xattr-Don-t-operate-on-deleted-inodes.patch
+ patches.arch/KVM-PPC-Avoid-marking-DMA-mapped-pages-dirty-in-real.patch
+ patches.arch/KVM-PPC-Book3S-HV-Don-t-use-compound_order-to-determ.patch
patches.suse/msft-hv-1758-x86-hyper-v-rename-ipi_arg_-ex-non_ex-structures.patch
patches.fixes/0001-x86-paravirt-Fix-some-warning-messages.patch
patches.arch/x86-mm-expand-static-page-table-for-fixmap-space
@@ -18230,7 +18301,9 @@
patches.drivers/drm-mali-dp-Call-drm_crtc_vblank_reset-on-device-ini.patch
patches.fixes/0001-drm-amdgpu-Fix-vce-work-queue-was-not-cancelled-when.patch
patches.drivers/pinctrl-cannonlake-Fix-HOSTSW_OWN-register-offset-of.patch
+ patches.arch/KVM-PPC-Book3S-HV-Fix-guest-r11-corruption-with-POWE.patch
patches.arch/powerpc-Avoid-code-patching-freed-init-sections.patch
+ patches.arch/powerpc-powernv-ioda2-Reduce-upper-limit-for-DMA-win.patch
patches.arch/powerpc-pkeys-Fix-reading-of-ibm-processor-storage-k.patch
patches.arch/powerpc-tm-Fix-userspace-r13-corruption.patch
patches.arch/powerpc-tm-Avoid-possible-userspace-r1-corruption-on.patch
@@ -18277,6 +18350,7 @@
patches.drivers/crypto-qat-Fix-KASAN-stack-out-of-bounds-bug-in-adf_.patch
patches.arch/kvm-nvmx-do-not-expose-mpx-vmx-controls-when-guest-mpx-disabled
patches.arch/kvm-x86-do-not-use-kvm_x86_ops-mpx_supported-directly
+ patches.arch/KVM-PPC-Book3S-HV-Avoid-crash-from-THP-collapse-duri.patch
patches.drivers/iommu-amd-clear-memory-encryption-mask-from-physical-address
patches.drivers/ALSA-hda-realtek-Cannot-adjust-speaker-s-volume-on-D.patch
patches.suse/sched-numa-Stop-multiple-tasks-from-moving-to-the-CPU-at-the-same-time.patch
@@ -18326,6 +18400,8 @@
patches.drivers/spi-sh-msiof-fix-deferred-probing.patch
patches.drivers/spi-bcm63xx-hsspi-keep-pll-clk-enabled.patch
patches.drivers/ACPI-processor-Fix-the-return-value-of-acpi_processo.patch
+ patches.arch/x86-kexec-correct-kexec_backup_src_end-off-by-one-error.patch
+ patches.fixes/resource-include-resource-end-in-walk_-interfaces.patch
patches.drivers/iwlwifi-mvm-send-BCAST-management-frames-to-the-righ.patch
patches.drivers/iwlwifi-dbg-don-t-crash-if-the-firmware-crashes-in-t.patch
patches.drivers/iwlwifi-pcie-gen2-build-A-MSDU-only-for-GSO.patch
@@ -18348,6 +18424,7 @@
patches.fixes/nvdimm-use-namespace-index-data-to-reduce-number-of-label-reads-needed.patch
patches.fixes/libnvdimm-label-fix-sparse-warning.patch
patches.drivers/edac-raise-the-maximum-number-of-memory-controllers.patch
+ patches.drivers/edac-thunderx-fix-memory-leak-in-thunderx_l2c_threaded_isr.patch
patches.drivers/PCI-ASPM-Fix-link_state-teardown-on-device-removal.patch
patches.drivers/scsi-qla2xxx-Fix-process-response-queue-for-ISP26XX-.patch
patches.drivers/scsi-qla2xxx-Fix-incorrect-port-speed-being-set-for-.patch
@@ -18485,6 +18562,7 @@
########################################################
patches.suse/tty-ldsem-Wake-up-readers-after-timed-out-down_write.patch
patches.suse/tty-Hold-tty_ldisc_lock-during-tty_reopen.patch
+ patches.suse/tty-Don-t-block-on-IO-when-ldisc-change-is-pending.patch
patches.suse/tty-Simplify-tty-count-math-in-tty_reopen.patch
patches.suse/tty-ldsem-Convert-to-regular-lockdep-annotations.patch
patches.suse/tty-ldsem-Add-lockdep-asserts-for-ldisc_sem.patch
@@ -18692,7 +18770,6 @@
patches.arch/selftests-powerpc-Add-core-file-test-for-Protection-.patch
patches.arch/selftests-powerpc-Fix-core-pkey-for-default-execute-.patch
patches.arch/selftests-powerpc-Fix-ptrace-pkey-for-default-execut.patch
- patches.arch/KVM-PPC-Book3S-HV-Disable-tb_offset.patch
# FATE#325308
patches.arch/hotplug-cpu-Conditionally-acquire-release-DRC-index.patch
@@ -19161,6 +19238,10 @@
# SUSE-specific
patches.suse/livepatch-send-a-fake-signal-periodically.patch
+ # klp-convert
+ patches.suse/livepatch-modpost-ignore-unresolved-symbols.patch
+ patches.suse/livepatch-create-and-include-UAPI-headers.patch
+
########################################################
# XEN patches
########################################################
@@ -19220,6 +19301,8 @@
patches.kabi/KABI-tpm-do-keep-the-cmd_ready-and-go_idle-as-pm-ops.patch
patches.kabi/KABI-move-mce_data_buf-into-paca_aux.patch
patches.kabi/KABI-move-the-new-handler-to-end-of-machdep_calls-an.patch
+ patches.kabi/KABI-powerpc-export-__find_linux_pte-as-__find_linux.patch
+ patches.kabi/KABI-hide-new-member-in-struct-iommu_table-from-genk.patch
patches.kabi/perf-x86-fix-data-source-decoding-for-skylake-kabi.patch
patches.kabi/kabi-protect-struct-nf_conn.patch