Home Home > GIT Browse
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichal Suchanek <msuchanek@suse.de>2018-10-31 13:27:38 +0100
committerMichal Suchanek <msuchanek@suse.de>2018-10-31 23:28:44 +0100
commita02fa4e3650b80126fe23c1e24bf047c85f03e97 (patch)
tree6fec110feba3866d8ea0d0da38db892d85f88225
parentd9a3e02ba9ad9101b9c21f14c92eeb48d39014ed (diff)
KVM: PPC: Avoid marking DMA-mapped pages dirty in real mode
(bsc#1061840).
-rw-r--r--patches.arch/KVM-PPC-Avoid-marking-DMA-mapped-pages-dirty-in-real.patch367
-rw-r--r--series.conf1
2 files changed, 368 insertions, 0 deletions
diff --git a/patches.arch/KVM-PPC-Avoid-marking-DMA-mapped-pages-dirty-in-real.patch b/patches.arch/KVM-PPC-Avoid-marking-DMA-mapped-pages-dirty-in-real.patch
new file mode 100644
index 0000000000..43829de79c
--- /dev/null
+++ b/patches.arch/KVM-PPC-Avoid-marking-DMA-mapped-pages-dirty-in-real.patch
@@ -0,0 +1,367 @@
+From bea02736a6ad85ae20edc853e3c5e0805afa7946 Mon Sep 17 00:00:00 2001
+From: Alexey Kardashevskiy <aik@ozlabs.ru>
+Date: Mon, 10 Sep 2018 18:29:07 +1000
+Subject: [PATCH] KVM: PPC: Avoid marking DMA-mapped pages dirty in real mode
+
+References: bsc#1061840
+Patch-mainline: v4.19-rc5
+Git-commit: 425333bf3a7743715c17e503049d0837d6c4a603
+
+At the moment the real mode handler of H_PUT_TCE calls iommu_tce_xchg_rm()
+which in turn reads the old TCE and if it was a valid entry, marks
+the physical page dirty if it was mapped for writing. Since it is in
+real mode, realmode_pfn_to_page() is used instead of pfn_to_page()
+to get the page struct. However SetPageDirty() itself reads the compound
+page head and returns a virtual address for the head page struct and
+setting dirty bit for that kills the system.
+
+This adds additional dirty bit tracking into the MM/IOMMU API for use
+in the real mode. Note that this does not change how VFIO and
+KVM (in virtual mode) set this bit. The KVM (real mode) changes include:
+- use the lowest bit of the cached host phys address to carry
+the dirty bit;
+- mark pages dirty when they are unpinned which happens when
+the preregistered memory is released which always happens in virtual
+mode;
+- add mm_iommu_ua_mark_dirty_rm() helper to set delayed dirty bit;
+- change iommu_tce_xchg_rm() to take the kvm struct for the mm to use
+in the new mm_iommu_ua_mark_dirty_rm() helper;
+- move iommu_tce_xchg_rm() to book3s_64_vio_hv.c (which is the only
+caller anyway) to reduce the real mode KVM and IOMMU knowledge
+across different subsystems.
+
+This removes realmode_pfn_to_page() as it is not used anymore.
+
+While we at it, remove some EXPORT_SYMBOL_GPL() as that code is for
+the real mode only and modules cannot call it anyway.
+
+Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
+Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/include/asm/book3s/64/pgtable.h | 1 -
+ arch/powerpc/include/asm/iommu.h | 2 --
+ arch/powerpc/include/asm/mmu_context.h | 1 +
+ arch/powerpc/kernel/iommu.c | 25 -------------
+ arch/powerpc/kvm/book3s_64_vio_hv.c | 39 ++++++++++++++++-----
+ arch/powerpc/mm/init_64.c | 52 +---------------------------
+ arch/powerpc/mm/mmu_context_iommu.c | 34 +++++++++++++++---
+ 7 files changed, 63 insertions(+), 91 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
+index ae63ecd4000a..96c7c55fca28 100644
+--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
++++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
+@@ -1028,7 +1028,6 @@ static inline void vmemmap_remove_mapping(unsigned long start,
+ return hash__vmemmap_remove_mapping(start, page_size);
+ }
+ #endif
+-struct page *realmode_pfn_to_page(unsigned long pfn);
+
+ static inline pte_t pmd_pte(pmd_t pmd)
+ {
+diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
+index 470124740864..e734db857b5f 100644
+--- a/arch/powerpc/include/asm/iommu.h
++++ b/arch/powerpc/include/asm/iommu.h
+@@ -214,8 +214,6 @@ extern void iommu_del_device(struct device *dev);
+ extern int __init tce_iommu_bus_notifier_init(void);
+ extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
+ unsigned long *hpa, enum dma_data_direction *direction);
+-extern long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry,
+- unsigned long *hpa, enum dma_data_direction *direction);
+ #else
+ static inline void iommu_register_group(struct iommu_table_group *table_group,
+ int pci_domain_number,
+diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
+index 04a557e2bb07..a87c5239f129 100644
+--- a/arch/powerpc/include/asm/mmu_context.h
++++ b/arch/powerpc/include/asm/mmu_context.h
+@@ -40,6 +40,7 @@ extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
+ unsigned long ua, unsigned int pageshift, unsigned long *hpa);
+ extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
+ unsigned long ua, unsigned int pageshift, unsigned long *hpa);
++extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua);
+ extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem);
+ extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem);
+ #endif
+diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
+index f2b724cd9e64..081ca2546afb 100644
+--- a/arch/powerpc/kernel/iommu.c
++++ b/arch/powerpc/kernel/iommu.c
+@@ -1014,31 +1014,6 @@ long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
+ }
+ EXPORT_SYMBOL_GPL(iommu_tce_xchg);
+
+-#ifdef CONFIG_PPC_BOOK3S_64
+-long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry,
+- unsigned long *hpa, enum dma_data_direction *direction)
+-{
+- long ret;
+-
+- ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
+-
+- if (!ret && ((*direction == DMA_FROM_DEVICE) ||
+- (*direction == DMA_BIDIRECTIONAL))) {
+- struct page *pg = realmode_pfn_to_page(*hpa >> PAGE_SHIFT);
+-
+- if (likely(pg)) {
+- SetPageDirty(pg);
+- } else {
+- tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
+- ret = -EFAULT;
+- }
+- }
+-
+- return ret;
+-}
+-EXPORT_SYMBOL_GPL(iommu_tce_xchg_rm);
+-#endif
+-
+ int iommu_take_ownership(struct iommu_table *tbl)
+ {
+ unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
+diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
+index 841aef714929..6de0a7d57192 100644
+--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
++++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
+@@ -187,12 +187,35 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
+ EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua);
+
+ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+-static void kvmppc_rm_clear_tce(struct iommu_table *tbl, unsigned long entry)
++static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
++ unsigned long entry, unsigned long *hpa,
++ enum dma_data_direction *direction)
++{
++ long ret;
++
++ ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
++
++ if (!ret && ((*direction == DMA_FROM_DEVICE) ||
++ (*direction == DMA_BIDIRECTIONAL))) {
++ __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry);
++ /*
++ * kvmppc_rm_tce_iommu_do_map() updates the UA cache after
++ * calling this so we still get here a valid UA.
++ */
++ if (pua && *pua)
++ mm_iommu_ua_mark_dirty_rm(mm, be64_to_cpu(*pua));
++ }
++
++ return ret;
++}
++
++static void kvmppc_rm_clear_tce(struct kvm *kvm, struct iommu_table *tbl,
++ unsigned long entry)
+ {
+ unsigned long hpa = 0;
+ enum dma_data_direction dir = DMA_NONE;
+
+- iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
++ iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
+ }
+
+ static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
+@@ -228,7 +251,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm,
+ unsigned long hpa = 0;
+ long ret;
+
+- if (iommu_tce_xchg_rm(tbl, entry, &hpa, &dir))
++ if (iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir))
+ /*
+ * real mode xchg can fail if struct page crosses
+ * a page boundary
+@@ -240,7 +263,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm,
+
+ ret = kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry);
+ if (ret)
+- iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
++ iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
+
+ return ret;
+ }
+@@ -290,7 +313,7 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
+ if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem)))
+ return H_CLOSED;
+
+- ret = iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
++ ret = iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
+ if (ret) {
+ mm_iommu_mapped_dec(mem);
+ /*
+@@ -379,7 +402,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+ return ret;
+
+ WARN_ON_ONCE_RM(1);
+- kvmppc_rm_clear_tce(stit->tbl, entry);
++ kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
+ }
+
+ kvmppc_tce_put(stt, entry, tce);
+@@ -528,7 +551,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
+ goto unlock_exit;
+
+ WARN_ON_ONCE_RM(1);
+- kvmppc_rm_clear_tce(stit->tbl, entry);
++ kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
+ }
+
+ kvmppc_tce_put(stt, entry + i, tce);
+@@ -579,7 +602,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
+ return ret;
+
+ WARN_ON_ONCE_RM(1);
+- kvmppc_rm_clear_tce(stit->tbl, entry);
++ kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
+ }
+ }
+
+diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
+index ead129772012..e5d8d218f2c2 100644
+--- a/arch/powerpc/mm/init_64.c
++++ b/arch/powerpc/mm/init_64.c
+@@ -281,57 +281,7 @@ void register_page_bootmem_memmap(unsigned long section_nr,
+ struct page *start_page, unsigned long size)
+ {
+ }
+-
+-/*
+- * We do not have access to the sparsemem vmemmap, so we fallback to
+- * walking the list of sparsemem blocks which we already maintain for
+- * the sake of crashdump. In the long run, we might want to maintain
+- * a tree if performance of that linear walk becomes a problem.
+- *
+- * realmode_pfn_to_page functions can fail due to:
+- * 1) As real sparsemem blocks do not lay in RAM continously (they
+- * are in virtual address space which is not available in the real mode),
+- * the requested page struct can be split between blocks so get_page/put_page
+- * may fail.
+- * 2) When huge pages are used, the get_page/put_page API will fail
+- * in real mode as the linked addresses in the page struct are virtual
+- * too.
+- */
+-struct page *realmode_pfn_to_page(unsigned long pfn)
+-{
+- struct vmemmap_backing *vmem_back;
+- struct page *page;
+- unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
+- unsigned long pg_va = (unsigned long) pfn_to_page(pfn);
+-
+- for (vmem_back = vmemmap_list; vmem_back; vmem_back = vmem_back->list) {
+- if (pg_va < vmem_back->virt_addr)
+- continue;
+-
+- /* After vmemmap_list entry free is possible, need check all */
+- if ((pg_va + sizeof(struct page)) <=
+- (vmem_back->virt_addr + page_size)) {
+- page = (struct page *) (vmem_back->phys + pg_va -
+- vmem_back->virt_addr);
+- return page;
+- }
+- }
+-
+- /* Probably that page struct is split between real pages */
+- return NULL;
+-}
+-EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
+-
+-#elif defined(CONFIG_FLATMEM)
+-
+-struct page *realmode_pfn_to_page(unsigned long pfn)
+-{
+- struct page *page = pfn_to_page(pfn);
+- return page;
+-}
+-EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
+-
+-#endif /* CONFIG_SPARSEMEM_VMEMMAP/CONFIG_FLATMEM */
++#endif /* CONFIG_SPARSEMEM_VMEMMAP */
+
+ #ifdef CONFIG_PPC_BOOK3S_64
+ static bool disable_radix = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT);
+diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
+index 06156403481c..32dc0e4c425d 100644
+--- a/arch/powerpc/mm/mmu_context_iommu.c
++++ b/arch/powerpc/mm/mmu_context_iommu.c
+@@ -18,11 +18,15 @@
+ #include <linux/migrate.h>
+ #include <linux/hugetlb.h>
+ #include <linux/swap.h>
++#include <linux/sizes.h>
+ #include <asm/mmu_context.h>
+ #include <asm/pte-walk.h>
+
+ static DEFINE_MUTEX(mem_list_mutex);
+
++#define MM_IOMMU_TABLE_GROUP_PAGE_DIRTY 0x1
++#define MM_IOMMU_TABLE_GROUP_PAGE_MASK ~(SZ_4K - 1)
++
+ struct mm_iommu_table_group_mem_t {
+ struct list_head next;
+ struct rcu_head rcu;
+@@ -264,6 +268,9 @@ static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem)
+ if (!page)
+ continue;
+
++ if (mem->hpas[i] & MM_IOMMU_TABLE_GROUP_PAGE_DIRTY)
++ SetPageDirty(page);
++
+ put_page(page);
+ mem->hpas[i] = 0;
+ }
+@@ -361,7 +368,6 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm,
+
+ return ret;
+ }
+-EXPORT_SYMBOL_GPL(mm_iommu_lookup_rm);
+
+ struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
+ unsigned long ua, unsigned long entries)
+@@ -391,7 +397,7 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
+ if (pageshift > mem->pageshift)
+ return -EFAULT;
+
+- *hpa = *va | (ua & ~PAGE_MASK);
++ *hpa = (*va & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);
+
+ return 0;
+ }
+@@ -414,11 +420,31 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
+ if (!pa)
+ return -EFAULT;
+
+- *hpa = *pa | (ua & ~PAGE_MASK);
++ *hpa = (*pa & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);
+
+ return 0;
+ }
+-EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa_rm);
++
++extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua)
++{
++ struct mm_iommu_table_group_mem_t *mem;
++ long entry;
++ void *va;
++ unsigned long *pa;
++
++ mem = mm_iommu_lookup_rm(mm, ua, PAGE_SIZE);
++ if (!mem)
++ return;
++
++ entry = (ua - mem->ua) >> PAGE_SHIFT;
++ va = &mem->hpas[entry];
++
++ pa = (void *) vmalloc_to_phys(va);
++ if (!pa)
++ return;
++
++ *pa |= MM_IOMMU_TABLE_GROUP_PAGE_DIRTY;
++}
+
+ long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem)
+ {
+--
+2.13.7
+
diff --git a/series.conf b/series.conf
index 0252ed0464..6ad01abf75 100644
--- a/series.conf
+++ b/series.conf
@@ -18122,6 +18122,7 @@
patches.drivers/floppy-Do-not-copy-a-kernel-pointer-to-user-memory-i.patch
patches.fixes/ubifs-Check-for-name-being-NULL-while-mounting.patch
patches.fixes/Revert-ubifs-xattr-Don-t-operate-on-deleted-inodes.patch
+ patches.arch/KVM-PPC-Avoid-marking-DMA-mapped-pages-dirty-in-real.patch
patches.fixes/0001-x86-paravirt-Fix-some-warning-messages.patch
patches.arch/x86-mm-expand-static-page-table-for-fixmap-space
patches.arch/xen-x86-vpmu-zero-struct-pt_regs-before-calling-into-sample-handling-code