Home Home > GIT Browse
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichal Suchanek <msuchanek@suse.de>2018-10-31 12:36:54 +0100
committerMichal Suchanek <msuchanek@suse.de>2018-10-31 23:28:37 +0100
commit6a01346d5a8d42d1289794033b3bbd7aa258410d (patch)
tree143e93727b08bb9cb56f55aa0a973d13b119182f
parentd59fb4c51dca9d03a2f0c3e4d1b9a6896fa07b1c (diff)
KVM: PPC: Book3S HV: Use __gfn_to_pfn_memslot() in page fault
handler (bsc#1061840).
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Use-__gfn_to_pfn_memslot-in-page-f.patch228
-rw-r--r--series.conf1
2 files changed, 229 insertions, 0 deletions
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Use-__gfn_to_pfn_memslot-in-page-f.patch b/patches.arch/KVM-PPC-Book3S-HV-Use-__gfn_to_pfn_memslot-in-page-f.patch
new file mode 100644
index 0000000000..bdd21d9127
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Use-__gfn_to_pfn_memslot-in-page-f.patch
@@ -0,0 +1,228 @@
+From 31c8b0d0694a1f7e3b46df0d1341a874ecb5e0de Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Thu, 1 Mar 2018 15:14:02 +1100
+Subject: [PATCH] KVM: PPC: Book3S HV: Use __gfn_to_pfn_memslot() in page fault
+ handler
+
+References: bsc#1061840
+Patch-mainline: v4.17-rc1
+Git-commit: 31c8b0d0694a1f7e3b46df0d1341a874ecb5e0de
+
+This changes the hypervisor page fault handler for radix guests to use
+the generic KVM __gfn_to_pfn_memslot() function instead of using
+get_user_pages_fast() and then handling the case of VM_PFNMAP vmas
+specially. The old code missed the case of VM_IO vmas; with this
+change, VM_IO vmas will now be handled correctly by code within
+__gfn_to_pfn_memslot.
+
+Currently, __gfn_to_pfn_memslot calls hva_to_pfn, which only uses
+__get_user_pages_fast for the initial lookup in the cases where
+either atomic or async is set. Since we are not setting either
+atomic or async, we do our own __get_user_pages_fast first, for now.
+
+This also adds code to check for the KVM_MEM_READONLY flag on the
+memslot. If it is set and this is a write access, we synthesize a
+data storage interrupt for the guest.
+
+In the case where the page is not normal RAM (i.e. page == NULL in
+kvmppc_book3s_radix_page_fault(), we read the PTE from the Linux page
+tables because we need the mapping attribute bits as well as the PFN.
+(The mapping attribute bits indicate whether accesses have to be
+non-cacheable and/or guarded.)
+
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_64_mmu_radix.c | 148 ++++++++++++++++++++-------------
+ 1 file changed, 88 insertions(+), 60 deletions(-)
+
+diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+index 05acc67e0eb2..0590f1667607 100644
+--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
++++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+@@ -392,11 +392,11 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned long mmu_seq, pte_size;
+ unsigned long gpa, gfn, hva, pfn;
+ struct kvm_memory_slot *memslot;
+- struct page *page = NULL, *pages[1];
+- long ret, npages;
+- unsigned int writing;
+- struct vm_area_struct *vma;
+- unsigned long flags;
++ struct page *page = NULL;
++ long ret;
++ bool writing;
++ bool upgrade_write = false;
++ bool *upgrade_p = &upgrade_write;
+ pte_t pte, *ptep;
+ unsigned long pgflags;
+ unsigned int shift, level;
+@@ -436,12 +436,17 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ dsisr & DSISR_ISSTORE);
+ }
+
+- /* used to check for invalidations in progress */
+- mmu_seq = kvm->mmu_notifier_seq;
+- smp_rmb();
+-
+ writing = (dsisr & DSISR_ISSTORE) != 0;
+- hva = gfn_to_hva_memslot(memslot, gfn);
++ if (memslot->flags & KVM_MEM_READONLY) {
++ if (writing) {
++ /* give the guest a DSI */
++ dsisr = DSISR_ISSTORE | DSISR_PROTFAULT;
++ kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
++ return RESUME_GUEST;
++ }
++ upgrade_p = NULL;
++ }
++
+ if (dsisr & DSISR_SET_RC) {
+ /*
+ * Need to set an R or C bit in the 2nd-level tables;
+@@ -470,69 +475,92 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ return RESUME_GUEST;
+ }
+
+- ret = -EFAULT;
+- pfn = 0;
+- pte_size = PAGE_SIZE;
+- pgflags = _PAGE_READ | _PAGE_EXEC;
+- level = 0;
+- npages = get_user_pages_fast(hva, 1, writing, pages);
+- if (npages < 1) {
+- /* Check if it's an I/O mapping */
+- down_read(&current->mm->mmap_sem);
+- vma = find_vma(current->mm, hva);
+- if (vma && vma->vm_start <= hva && hva < vma->vm_end &&
+- (vma->vm_flags & VM_PFNMAP)) {
+- pfn = vma->vm_pgoff +
+- ((hva - vma->vm_start) >> PAGE_SHIFT);
+- pgflags = pgprot_val(vma->vm_page_prot);
+- }
+- up_read(&current->mm->mmap_sem);
+- if (!pfn)
+- return -EFAULT;
+- } else {
+- page = pages[0];
++ /* used to check for invalidations in progress */
++ mmu_seq = kvm->mmu_notifier_seq;
++ smp_rmb();
++
++ /*
++ * Do a fast check first, since __gfn_to_pfn_memslot doesn't
++ * do it with !atomic && !async, which is how we call it.
++ * We always ask for write permission since the common case
++ * is that the page is writable.
++ */
++ hva = gfn_to_hva_memslot(memslot, gfn);
++ if (upgrade_p && __get_user_pages_fast(hva, 1, 1, &page) == 1) {
+ pfn = page_to_pfn(page);
+- if (PageCompound(page)) {
+- pte_size <<= compound_order(compound_head(page));
+- /* See if we can insert a 1GB or 2MB large PTE here */
+- if (pte_size >= PUD_SIZE &&
+- (gpa & (PUD_SIZE - PAGE_SIZE)) ==
+- (hva & (PUD_SIZE - PAGE_SIZE))) {
+- level = 2;
+- pfn &= ~((PUD_SIZE >> PAGE_SHIFT) - 1);
+- } else if (pte_size >= PMD_SIZE &&
+- (gpa & (PMD_SIZE - PAGE_SIZE)) ==
+- (hva & (PMD_SIZE - PAGE_SIZE))) {
+- level = 1;
+- pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1);
+- }
++ upgrade_write = true;
++ } else {
++ /* Call KVM generic code to do the slow-path check */
++ pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
++ writing, upgrade_p);
++ if (is_error_noslot_pfn(pfn))
++ return -EFAULT;
++ page = NULL;
++ if (pfn_valid(pfn)) {
++ page = pfn_to_page(pfn);
++ if (PageReserved(page))
++ page = NULL;
+ }
+- /* See if we can provide write access */
+- if (writing) {
+- pgflags |= _PAGE_WRITE;
+- } else {
+- local_irq_save(flags);
+- ptep = find_current_mm_pte(current->mm->pgd,
+- hva, NULL, NULL);
+- if (ptep && pte_write(*ptep))
+- pgflags |= _PAGE_WRITE;
+- local_irq_restore(flags);
++ }
++
++ /* See if we can insert a 1GB or 2MB large PTE here */
++ level = 0;
++ if (page && PageCompound(page)) {
++ pte_size = PAGE_SIZE << compound_order(compound_head(page));
++ if (pte_size >= PUD_SIZE &&
++ (gpa & (PUD_SIZE - PAGE_SIZE)) ==
++ (hva & (PUD_SIZE - PAGE_SIZE))) {
++ level = 2;
++ pfn &= ~((PUD_SIZE >> PAGE_SHIFT) - 1);
++ } else if (pte_size >= PMD_SIZE &&
++ (gpa & (PMD_SIZE - PAGE_SIZE)) ==
++ (hva & (PMD_SIZE - PAGE_SIZE))) {
++ level = 1;
++ pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1);
+ }
+ }
+
+ /*
+ * Compute the PTE value that we need to insert.
+ */
+- pgflags |= _PAGE_PRESENT | _PAGE_PTE | _PAGE_ACCESSED;
+- if (pgflags & _PAGE_WRITE)
+- pgflags |= _PAGE_DIRTY;
+- pte = pfn_pte(pfn, __pgprot(pgflags));
++ if (page) {
++ pgflags = _PAGE_READ | _PAGE_EXEC | _PAGE_PRESENT | _PAGE_PTE |
++ _PAGE_ACCESSED;
++ if (writing || upgrade_write)
++ pgflags |= _PAGE_WRITE | _PAGE_DIRTY;
++ pte = pfn_pte(pfn, __pgprot(pgflags));
++ } else {
++ /*
++ * Read the PTE from the process' radix tree and use that
++ * so we get the attribute bits.
++ */
++ local_irq_disable();
++ ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
++ pte = *ptep;
++ local_irq_enable();
++ if (shift == PUD_SHIFT &&
++ (gpa & (PUD_SIZE - PAGE_SIZE)) ==
++ (hva & (PUD_SIZE - PAGE_SIZE))) {
++ level = 2;
++ } else if (shift == PMD_SHIFT &&
++ (gpa & (PMD_SIZE - PAGE_SIZE)) ==
++ (hva & (PMD_SIZE - PAGE_SIZE))) {
++ level = 1;
++ } else if (shift && shift != PAGE_SHIFT) {
++ /* Adjust PFN */
++ unsigned long mask = (1ul << shift) - PAGE_SIZE;
++ pte = __pte(pte_val(pte) | (hva & mask));
++ }
++ if (!(writing || upgrade_write))
++ pte = __pte(pte_val(pte) & ~ _PAGE_WRITE);
++ pte = __pte(pte_val(pte) | _PAGE_EXEC);
++ }
+
+ /* Allocate space in the tree and write the PTE */
+ ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
+
+ if (page) {
+- if (!ret && (pgflags & _PAGE_WRITE))
++ if (!ret && (pte_val(pte) & _PAGE_WRITE))
+ set_page_dirty_lock(page);
+ put_page(page);
+ }
+--
+2.13.7
+
diff --git a/series.conf b/series.conf
index d6ccd21170..339c43155d 100644
--- a/series.conf
+++ b/series.conf
@@ -14957,6 +14957,7 @@
patches.arch/KVM-PPC-Book3S-HV-Radix-page-fault-handler-optimizat.patch
patches.arch/KVM-PPC-Book3S-HV-Streamline-setting-of-reference-an.patch
patches.arch/KVM-PPC-Book3S-HV-Handle-1GB-pages-in-radix-page-fau.patch
+ patches.arch/KVM-PPC-Book3S-HV-Use-__gfn_to_pfn_memslot-in-page-f.patch
patches.arch/kvm-vmx-raise-internal-error-for-exception-during-invalid-protected-mode-state
patches.suse/net-fool-proof-dev_valid_name.patch
patches.suse/ip_tunnel-better-validate-user-provided-tunnel-names.patch