Home Home > GIT Browse
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKernel Build Daemon <kbuild@suse.de>2018-04-06 07:23:02 +0200
committerKernel Build Daemon <kbuild@suse.de>2018-04-06 07:23:02 +0200
commit8c6ffec2d0eec5ae79840c2b55ac5209b59ef34b (patch)
tree682facba5bfd101e64fd90e6f886bf8f284c2930
parent5c0d1dd2bc9815008668f716f56c9ad9f1606189 (diff)
parentb6b6748032fe5aa0fdb0175b3b988abc43fd56d2 (diff)
Merge branch 'SLE12-SP3' into openSUSE-42.3
-rw-r--r--patches.fixes/ceph-only-dirty-iter_iovec-pages-for-direct-read.patch71
-rw-r--r--patches.kabi/x86-enforce-entry-stack-alignment.patch25
-rw-r--r--patches.suse/suse-hv-PCI-hv-Fix-2-hang-issues-in-hv_compose_msi_msg.patch140
-rw-r--r--patches.suse/suse-hv-PCI-hv-Fix-a-comment-typo-in-_hv_pcifront_read_confi.patch35
-rw-r--r--patches.suse/suse-hv-PCI-hv-Only-queue-new-work-items-in-hv_pci_devices_p.patch61
-rw-r--r--patches.suse/suse-hv-PCI-hv-Remove-the-bogus-test-in-hv_eject_device_work.patch41
-rw-r--r--patches.suse/suse-hv-PCI-hv-Serialize-the-present-and-eject-work-items.patch158
-rw-r--r--patches.suse/x86-enforce-entry-stack-alignment.patch105
-rw-r--r--patches.suse/x86-kaiser-remove-user-mapping-tss.patch30
-rw-r--r--series.conf9
10 files changed, 660 insertions, 15 deletions
diff --git a/patches.fixes/ceph-only-dirty-iter_iovec-pages-for-direct-read.patch b/patches.fixes/ceph-only-dirty-iter_iovec-pages-for-direct-read.patch
new file mode 100644
index 0000000000..d75d1178c3
--- /dev/null
+++ b/patches.fixes/ceph-only-dirty-iter_iovec-pages-for-direct-read.patch
@@ -0,0 +1,71 @@
+From: "Yan, Zheng" <zyan@redhat.com>
+Date: Fri, 16 Mar 2018 11:22:29 +0800
+Subject: ceph: only dirty ITER_IOVEC pages for direct read
+Git-commit: 85784f9395987a422fa04263e7c0fb13da11eb5c
+Patch-mainline: v4.16
+References: bsc#1084898
+
+If a page is already locked, attempting to dirty it leads to a deadlock
+in lock_page(). This is what currently happens to ITER_BVEC pages when
+a dio-enabled loop device is backed by ceph:
+
+ $ losetup --direct-io /dev/loop0 /mnt/cephfs/img
+ $ xfs_io -c 'pread 0 4k' /dev/loop0
+
+Follow other file systems and only dirty ITER_IOVEC pages.
+
+Cc: stable@kernel.org
+Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
+Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Acked-by: Luis Henriques <lhenriques@suse.com>
+---
+ fs/ceph/file.c | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/fs/ceph/file.c
++++ b/fs/ceph/file.c
+@@ -631,7 +631,8 @@ static ssize_t ceph_sync_read(struct kio
+ struct ceph_aio_request {
+ struct kiocb *iocb;
+ size_t total_len;
+- int write;
++ bool write;
++ bool should_dirty;
+ int error;
+ struct list_head osd_reqs;
+ unsigned num_reqs;
+@@ -746,7 +747,7 @@ static void ceph_aio_complete_req(struct
+ }
+ }
+
+- ceph_put_page_vector(osd_data->pages, num_pages, !aio_req->write);
++ ceph_put_page_vector(osd_data->pages, num_pages, aio_req->should_dirty);
+ ceph_osdc_put_request(req);
+
+ if (rc < 0)
+@@ -846,6 +847,7 @@ ceph_direct_read_write(struct kiocb *ioc
+ size_t count = iov_iter_count(iter);
+ loff_t pos = iocb->ki_pos;
+ bool write = iov_iter_rw(iter) == WRITE;
++ bool should_dirty = !write && iter_is_iovec(iter);
+
+ if (write && ceph_snap(file_inode(file)) != CEPH_NOSNAP)
+ return -EROFS;
+@@ -915,6 +917,7 @@ ceph_direct_read_write(struct kiocb *ioc
+ if (aio_req) {
+ aio_req->iocb = iocb;
+ aio_req->write = write;
++ aio_req->should_dirty = should_dirty;
+ INIT_LIST_HEAD(&aio_req->osd_reqs);
+ if (write) {
+ aio_req->mtime = mtime;
+@@ -973,7 +976,7 @@ ceph_direct_read_write(struct kiocb *ioc
+ len = ret;
+ }
+
+- ceph_put_page_vector(pages, num_pages, !write);
++ ceph_put_page_vector(pages, num_pages, should_dirty);
+
+ ceph_osdc_put_request(req);
+ if (ret < 0)
diff --git a/patches.kabi/x86-enforce-entry-stack-alignment.patch b/patches.kabi/x86-enforce-entry-stack-alignment.patch
new file mode 100644
index 0000000000..1ff9dbbe83
--- /dev/null
+++ b/patches.kabi/x86-enforce-entry-stack-alignment.patch
@@ -0,0 +1,25 @@
+From: Jiri Kosina <jkosina@suse.cz>
+Subject: KABI: x86/kaiser: properly align trampoline stack
+Patch-mainline: Never, SUSE specific
+References: bsc#1087260
+
+Hide the change in tss_struct from kABI checker; we've changed the semantics
+of this structure with trampoline entry anway.
+
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+---
+ arch/x86/include/asm/processor.h | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -306,7 +306,9 @@ struct tss_struct {
+ * Space for the temporary SYSENTER stack:
+ */
+ /* IRQ stacks have to maintain 16-bytes alignment! */
++#ifndef __GENKSYMS__
+ u8 pad;
++#endif
+ unsigned long SYSENTER_stack[64];
+
+ } __attribute__((__aligned__(PAGE_SIZE)));
diff --git a/patches.suse/suse-hv-PCI-hv-Fix-2-hang-issues-in-hv_compose_msi_msg.patch b/patches.suse/suse-hv-PCI-hv-Fix-2-hang-issues-in-hv_compose_msi_msg.patch
new file mode 100644
index 0000000000..7df359aa0d
--- /dev/null
+++ b/patches.suse/suse-hv-PCI-hv-Fix-2-hang-issues-in-hv_compose_msi_msg.patch
@@ -0,0 +1,140 @@
+From: Dexuan Cui <decui@microsoft.com>
+Date: Thu, 15 Mar 2018 14:21:08 +0000
+Patch-mainline: queued in a subsystem tree
+Subject: PCI: hv: Fix 2 hang issues in hv_compose_msi_msg()
+Git-commit: de0aa7b2f97d348ba7d1e17a00744c989baa0cb6
+Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git
+References: fate#323887, bsc#1087659, bsc#1087906
+
+1. With the patch "x86/vector/msi: Switch to global reservation mode",
+the recent v4.15 and newer kernels always hang for 1-vCPU Hyper-V VM
+with SR-IOV. This is because when we reach hv_compose_msi_msg() by
+request_irq() -> request_threaded_irq() ->__setup_irq()->irq_startup()
+-> __irq_startup() -> irq_domain_activate_irq() -> ... ->
+msi_domain_activate() -> ... -> hv_compose_msi_msg(), local irq is
+disabled in __setup_irq().
+
+Note: when we reach hv_compose_msi_msg() by another code path:
+pci_enable_msix_range() -> ... -> irq_domain_activate_irq() -> ... ->
+hv_compose_msi_msg(), local irq is not disabled.
+
+hv_compose_msi_msg() depends on an interrupt from the host.
+With interrupts disabled, a UP VM always hangs in the busy loop in
+the function, because the interrupt callback hv_pci_onchannelcallback()
+can not be called.
+
+We can do nothing but work it around by polling the channel. This
+is ugly, but we don't have any other choice.
+
+2. If the host is ejecting the VF device before we reach
+hv_compose_msi_msg(), in a UP VM, we can hang in hv_compose_msi_msg()
+forever, because at this time the host doesn't respond to the
+CREATE_INTERRUPT request. This issue exists the first day the
+pci-hyperv driver appears in the kernel.
+
+Luckily, this can also by worked around by polling the channel
+for the PCI_EJECT message and hpdev->state, and by checking the
+PCI vendor ID.
+
+Note: actually the above 2 issues also happen to a SMP VM, if
+"hbus->hdev->channel->target_cpu == smp_processor_id()" is true.
+
+Fixes: 4900be83602b ("x86/vector/msi: Switch to global reservation mode")
+Tested-by: Adrian Suhov <v-adsuho@microsoft.com>
+Tested-by: Chris Valean <v-chvale@microsoft.com>
+Signed-off-by: Dexuan Cui <decui@microsoft.com>
+Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+Reviewed-by: Michael Kelley <mikelley@microsoft.com>
+Acked-by: Haiyang Zhang <haiyangz@microsoft.com>
+Cc: <stable@vger.kernel.org>
+Cc: Stephen Hemminger <sthemmin@microsoft.com>
+Cc: K. Y. Srinivasan <kys@microsoft.com>
+Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
+Cc: Jack Morgenstein <jackm@mellanox.com>
+Acked-by: Olaf Hering <ohering@suse.de>
+---
+ drivers/pci/host/pci-hyperv.c | 58 ++++++++++++++++++++++++++++++++++++++++++-
+ 1 file changed, 57 insertions(+), 1 deletion(-)
+
+--- a/drivers/pci/host/pci-hyperv.c
++++ b/drivers/pci/host/pci-hyperv.c
+@@ -521,6 +521,8 @@ struct hv_pci_compl {
+ s32 completion_status;
+ };
+
++static void hv_pci_onchannelcallback(void *context);
++
+ /**
+ * hv_pci_generic_compl() - Invoked for a completion packet
+ * @context: Set up by the sender of the packet.
+@@ -665,6 +667,31 @@ static void _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where,
+ }
+ }
+
++static u16 hv_pcifront_get_vendor_id(struct hv_pci_dev *hpdev)
++{
++ u16 ret;
++ unsigned long flags;
++ void __iomem *addr = hpdev->hbus->cfg_addr + CFG_PAGE_OFFSET +
++ PCI_VENDOR_ID;
++
++ spin_lock_irqsave(&hpdev->hbus->config_lock, flags);
++
++ /* Choose the function to be read. (See comment above) */
++ writel(hpdev->desc.win_slot.slot, hpdev->hbus->cfg_addr);
++ /* Make sure the function was chosen before we start reading. */
++ mb();
++ /* Read from that function's config space. */
++ ret = readw(addr);
++ /*
++ * mb() is not required here, because the spin_unlock_irqrestore()
++ * is a barrier.
++ */
++
++ spin_unlock_irqrestore(&hpdev->hbus->config_lock, flags);
++
++ return ret;
++}
++
+ /**
+ * _hv_pcifront_write_config() - Internal PCI config write
+ * @hpdev: The PCI driver's representation of the device
+@@ -1107,8 +1134,37 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+ * Since this function is called with IRQ locks held, can't
+ * do normal wait for completion; instead poll.
+ */
+- while (!try_wait_for_completion(&comp.comp_pkt.host_event))
++ while (!try_wait_for_completion(&comp.comp_pkt.host_event)) {
++ /* 0xFFFF means an invalid PCI VENDOR ID. */
++ if (hv_pcifront_get_vendor_id(hpdev) == 0xFFFF) {
++ dev_err_once(&hbus->hdev->device,
++ "the device has gone\n");
++ goto free_int_desc;
++ }
++
++ /*
++ * When the higher level interrupt code calls us with
++ * interrupt disabled, we must poll the channel by calling
++ * the channel callback directly when channel->target_cpu is
++ * the current CPU. When the higher level interrupt code
++ * calls us with interrupt enabled, let's add the
++ * local_bh_disable()/enable() to avoid race.
++ */
++ local_bh_disable();
++
++ if (hbus->hdev->channel->target_cpu == smp_processor_id())
++ hv_pci_onchannelcallback(hbus);
++
++ local_bh_enable();
++
++ if (hpdev->state == hv_pcichild_ejecting) {
++ dev_err_once(&hbus->hdev->device,
++ "the device is being ejected\n");
++ goto free_int_desc;
++ }
++
+ udelay(100);
++ }
+
+ if (comp.comp_pkt.completion_status < 0) {
+ dev_err(&hbus->hdev->device,
diff --git a/patches.suse/suse-hv-PCI-hv-Fix-a-comment-typo-in-_hv_pcifront_read_confi.patch b/patches.suse/suse-hv-PCI-hv-Fix-a-comment-typo-in-_hv_pcifront_read_confi.patch
new file mode 100644
index 0000000000..4b804148fe
--- /dev/null
+++ b/patches.suse/suse-hv-PCI-hv-Fix-a-comment-typo-in-_hv_pcifront_read_confi.patch
@@ -0,0 +1,35 @@
+From: Dexuan Cui <decui@microsoft.com>
+Date: Thu, 15 Mar 2018 14:21:35 +0000
+Subject: PCI: hv: Fix a comment typo in _hv_pcifront_read_config()
+Patch-mainline: queued in a subsystem tree
+Git-commit: df3f2159f4e4146d40b244725ce79ed921530b99
+Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git
+References: fate#323887, bsc#1087659
+
+Comment in _hv_pcifront_read_config() contains a typo, fix it.
+
+No functional change.
+
+Signed-off-by: Dexuan Cui <decui@microsoft.com>
+[lorenzo.pieralisi@arm.com: changed commit log]
+Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+Acked-by: Haiyang Zhang <haiyangz@microsoft.com>
+Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
+Cc: Stephen Hemminger <sthemmin@microsoft.com>
+Cc: K. Y. Srinivasan <kys@microsoft.com>
+Acked-by: Olaf Hering <ohering@suse.de>
+---
+ drivers/pci/host/pci-hyperv.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/pci/host/pci-hyperv.c
++++ b/drivers/pci/host/pci-hyperv.c
+@@ -656,7 +656,7 @@ static void _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where,
+ break;
+ }
+ /*
+- * Make sure the write was done before we release the spinlock
++ * Make sure the read was done before we release the spinlock
+ * allowing consecutive reads/writes.
+ */
+ mb();
diff --git a/patches.suse/suse-hv-PCI-hv-Only-queue-new-work-items-in-hv_pci_devices_p.patch b/patches.suse/suse-hv-PCI-hv-Only-queue-new-work-items-in-hv_pci_devices_p.patch
new file mode 100644
index 0000000000..451171c3c4
--- /dev/null
+++ b/patches.suse/suse-hv-PCI-hv-Only-queue-new-work-items-in-hv_pci_devices_p.patch
@@ -0,0 +1,61 @@
+From: Dexuan Cui <decui@microsoft.com>
+Date: Thu, 15 Mar 2018 14:22:00 +0000
+Subject: PCI: hv: Only queue new work items in hv_pci_devices_present() if necessary
+Patch-mainline: queued in a subsystem tree
+Git-commit: 948373b3ed1bcf05a237c24675b84804315aff14
+Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git
+References: fate#323887, bsc#1087659
+
+If there is pending work in hv_pci_devices_present() we just need to add
+the new dr entry into the dr_list. Add a check to detect pending work
+items and update the code to skip queuing work if pending work items
+are detected.
+
+Signed-off-by: Dexuan Cui <decui@microsoft.com>
+[lorenzo.pieralisi@arm.com: updated commit log]
+Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+Reviewed-by: Michael Kelley <mikelley@microsoft.com>
+Acked-by: Haiyang Zhang <haiyangz@microsoft.com>
+Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
+Cc: Jack Morgenstein <jackm@mellanox.com>
+Cc: Stephen Hemminger <sthemmin@microsoft.com>
+Cc: K. Y. Srinivasan <kys@microsoft.com>
+Acked-by: Olaf Hering <ohering@suse.de>
+---
+ drivers/pci/host/pci-hyperv.c | 15 +++++++++++++--
+ 1 file changed, 13 insertions(+), 2 deletions(-)
+
+--- a/drivers/pci/host/pci-hyperv.c
++++ b/drivers/pci/host/pci-hyperv.c
+@@ -1789,6 +1789,7 @@ static void hv_pci_devices_present(struct hv_pcibus_device *hbus,
+ struct hv_dr_state *dr;
+ struct hv_dr_work *dr_wrk;
+ unsigned long flags;
++ bool pending_dr;
+
+ dr_wrk = kzalloc(sizeof(*dr_wrk), GFP_NOWAIT);
+ if (!dr_wrk)
+@@ -1812,11 +1813,21 @@ static void hv_pci_devices_present(struct hv_pcibus_device *hbus,
+ }
+
+ spin_lock_irqsave(&hbus->device_list_lock, flags);
++ /*
++ * If pending_dr is true, we have already queued a work,
++ * which will see the new dr. Otherwise, we need to
++ * queue a new work.
++ */
++ pending_dr = !list_empty(&hbus->dr_list);
+ list_add_tail(&dr->list_entry, &hbus->dr_list);
+ spin_unlock_irqrestore(&hbus->device_list_lock, flags);
+
+- get_hvpcibus(hbus);
+- queue_work(hbus->wq, &dr_wrk->wrk);
++ if (pending_dr) {
++ kfree(dr_wrk);
++ } else {
++ get_hvpcibus(hbus);
++ queue_work(hbus->wq, &dr_wrk->wrk);
++ }
+ }
+
+ /**
diff --git a/patches.suse/suse-hv-PCI-hv-Remove-the-bogus-test-in-hv_eject_device_work.patch b/patches.suse/suse-hv-PCI-hv-Remove-the-bogus-test-in-hv_eject_device_work.patch
new file mode 100644
index 0000000000..a8e3db7df5
--- /dev/null
+++ b/patches.suse/suse-hv-PCI-hv-Remove-the-bogus-test-in-hv_eject_device_work.patch
@@ -0,0 +1,41 @@
+From: Dexuan Cui <decui@microsoft.com>
+Date: Thu, 15 Mar 2018 14:21:43 +0000
+Subject: PCI: hv: Remove the bogus test in hv_eject_device_work()
+Patch-mainline: queued in a subsystem tree
+Git-commit: fca288c0153b2b97114b9081bc3c33c3735145b6
+Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git
+References: fate#323887, bsc#1087659
+
+When kernel is executing hv_eject_device_work(), hpdev->state value must
+be hv_pcichild_ejecting; any other value would consist in a bug,
+therefore replace the bogus check with an explicit WARN_ON() on the
+condition failure detection.
+
+Signed-off-by: Dexuan Cui <decui@microsoft.com>
+[lorenzo.pieralisi@arm.com: updated commit log]
+Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+Reviewed-by: Michael Kelley <mikelley@microsoft.com>
+Acked-by: Haiyang Zhang <haiyangz@microsoft.com>
+Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
+Cc: Jack Morgenstein <jackm@mellanox.com>
+Cc: Stephen Hemminger <sthemmin@microsoft.com>
+Cc: K. Y. Srinivasan <kys@microsoft.com>
+Acked-by: Olaf Hering <ohering@suse.de>
+---
+ drivers/pci/host/pci-hyperv.c | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+--- a/drivers/pci/host/pci-hyperv.c
++++ b/drivers/pci/host/pci-hyperv.c
+@@ -1842,10 +1842,7 @@ static void hv_eject_device_work(struct work_struct *work)
+
+ hpdev = container_of(work, struct hv_pci_dev, wrk);
+
+- if (hpdev->state != hv_pcichild_ejecting) {
+- put_pcichild(hpdev, hv_pcidev_ref_pnp);
+- return;
+- }
++ WARN_ON(hpdev->state != hv_pcichild_ejecting);
+
+ /*
+ * Ejection can come before or after the PCI bus has been set up, so
diff --git a/patches.suse/suse-hv-PCI-hv-Serialize-the-present-and-eject-work-items.patch b/patches.suse/suse-hv-PCI-hv-Serialize-the-present-and-eject-work-items.patch
new file mode 100644
index 0000000000..7cea795423
--- /dev/null
+++ b/patches.suse/suse-hv-PCI-hv-Serialize-the-present-and-eject-work-items.patch
@@ -0,0 +1,158 @@
+From: Dexuan Cui <decui@microsoft.com>
+Date: Thu, 15 Mar 2018 14:20:53 +0000
+Patch-mainline: queued in a subsystem tree
+Subject: PCI: hv: Serialize the present and eject work items
+Git-commit: 021ad274d7dc31611d4f47f7dd4ac7a224526f30
+Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git
+References: fate#323887, bsc#1087659
+
+When we hot-remove the device, we first receive a PCI_EJECT message and
+then receive a PCI_BUS_RELATIONS message with bus_rel->device_count == 0.
+
+The first message is offloaded to hv_eject_device_work(), and the second
+is offloaded to pci_devices_present_work(). Both the paths can be running
+list_del(&hpdev->list_entry), causing general protection fault, because
+system_wq can run them concurrently.
+
+The patch eliminates the race condition.
+
+Since access to present/eject work items is serialized, we do not need the
+hbus->enum_sem anymore, so remove it.
+
+Fixes: 4daace0d8ce8 ("PCI: hv: Add paravirtual PCI front-end for Microsoft Hyper-V VMs")
+Link: https://lkml.kernel.org/r/KL1P15301MB00064DA6B4D221123B5241CFBFD70@KL1P15301MB0006.APCP153.PROD.OUTLOOK.COM
+Tested-by: Adrian Suhov <v-adsuho@microsoft.com>
+Tested-by: Chris Valean <v-chvale@microsoft.com>
+Signed-off-by: Dexuan Cui <decui@microsoft.com>
+[lorenzo.pieralisi@arm.com: squashed semaphore removal patch]
+Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+Reviewed-by: Michael Kelley <mikelley@microsoft.com>
+Acked-by: Haiyang Zhang <haiyangz@microsoft.com>
+Cc: <stable@vger.kernel.org> # v4.6+
+Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
+Cc: Jack Morgenstein <jackm@mellanox.com>
+Cc: Stephen Hemminger <sthemmin@microsoft.com>
+Cc: K. Y. Srinivasan <kys@microsoft.com>
+Acked-by: Olaf Hering <ohering@suse.de>
+---
+ drivers/pci/host/pci-hyperv.c | 34 ++++++++++++++++------------------
+ 1 file changed, 16 insertions(+), 18 deletions(-)
+
+--- a/drivers/pci/host/pci-hyperv.c
++++ b/drivers/pci/host/pci-hyperv.c
+@@ -447,7 +447,6 @@ struct hv_pcibus_device {
+ spinlock_t device_list_lock; /* Protect lists below */
+ void __iomem *cfg_addr;
+
+- struct semaphore enum_sem;
+ struct list_head resources_for_children;
+
+ struct list_head children;
+@@ -461,6 +460,8 @@ struct hv_pcibus_device {
+ struct retarget_msi_interrupt retarget_msi_interrupt_params;
+
+ spinlock_t retarget_msi_interrupt_lock;
++
++ struct workqueue_struct *wq;
+ };
+
+ /*
+@@ -1590,12 +1591,8 @@ static struct hv_pci_dev *get_pcichild_wslot(struct hv_pcibus_device *hbus,
+ * It must also treat the omission of a previously observed device as
+ * notification that the device no longer exists.
+ *
+- * Note that this function is a work item, and it may not be
+- * invoked in the order that it was queued. Back to back
+- * updates of the list of present devices may involve queuing
+- * multiple work items, and this one may run before ones that
+- * were sent later. As such, this function only does something
+- * if is the last one in the queue.
++ * Note that this function is serialized with hv_eject_device_work(),
++ * because both are pushed to the ordered workqueue hbus->wq.
+ */
+ static void pci_devices_present_work(struct work_struct *work)
+ {
+@@ -1616,11 +1613,6 @@ static void pci_devices_present_work(struct work_struct *work)
+
+ INIT_LIST_HEAD(&removed);
+
+- if (down_interruptible(&hbus->enum_sem)) {
+- put_hvpcibus(hbus);
+- return;
+- }
+-
+ /* Pull this off the queue and process it if it was the last one. */
+ spin_lock_irqsave(&hbus->device_list_lock, flags);
+ while (!list_empty(&hbus->dr_list)) {
+@@ -1637,7 +1629,6 @@ static void pci_devices_present_work(struct work_struct *work)
+ spin_unlock_irqrestore(&hbus->device_list_lock, flags);
+
+ if (!dr) {
+- up(&hbus->enum_sem);
+ put_hvpcibus(hbus);
+ return;
+ }
+@@ -1724,7 +1715,6 @@ static void pci_devices_present_work(struct work_struct *work)
+ break;
+ }
+
+- up(&hbus->enum_sem);
+ put_hvpcibus(hbus);
+ kfree(dr);
+ }
+@@ -1770,7 +1760,7 @@ static void hv_pci_devices_present(struct hv_pcibus_device *hbus,
+ spin_unlock_irqrestore(&hbus->device_list_lock, flags);
+
+ get_hvpcibus(hbus);
+- schedule_work(&dr_wrk->wrk);
++ queue_work(hbus->wq, &dr_wrk->wrk);
+ }
+
+ /**
+@@ -1848,7 +1838,7 @@ static void hv_pci_eject_device(struct hv_pci_dev *hpdev)
+ get_pcichild(hpdev, hv_pcidev_ref_pnp);
+ INIT_WORK(&hpdev->wrk, hv_eject_device_work);
+ get_hvpcibus(hpdev->hbus);
+- schedule_work(&hpdev->wrk);
++ queue_work(hpdev->hbus->wq, &hpdev->wrk);
+ }
+
+ /**
+@@ -2461,13 +2451,18 @@ static int hv_pci_probe(struct hv_device *hdev,
+ spin_lock_init(&hbus->config_lock);
+ spin_lock_init(&hbus->device_list_lock);
+ spin_lock_init(&hbus->retarget_msi_interrupt_lock);
+- sema_init(&hbus->enum_sem, 1);
+ init_completion(&hbus->remove_event);
++ hbus->wq = alloc_ordered_workqueue("hv_pci_%x", 0,
++ hbus->sysdata.domain);
++ if (!hbus->wq) {
++ ret = -ENOMEM;
++ goto free_bus;
++ }
+
+ ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0,
+ hv_pci_onchannelcallback, hbus);
+ if (ret)
+- goto free_bus;
++ goto destroy_wq;
+
+ hv_set_drvdata(hdev, hbus);
+
+@@ -2536,6 +2531,8 @@ free_config:
+ hv_free_config_window(hbus);
+ close:
+ vmbus_close(hdev->channel);
++destroy_wq:
++ destroy_workqueue(hbus->wq);
+ free_bus:
+ free_page((unsigned long)hbus);
+ return ret;
+@@ -2615,6 +2612,7 @@ static int hv_pci_remove(struct hv_device *hdev)
+ irq_domain_free_fwnode(hbus->sysdata.fwnode);
+ put_hvpcibus(hbus);
+ wait_for_completion(&hbus->remove_event);
++ destroy_workqueue(hbus->wq);
+ free_page((unsigned long)hbus);
+ return 0;
+ }
diff --git a/patches.suse/x86-enforce-entry-stack-alignment.patch b/patches.suse/x86-enforce-entry-stack-alignment.patch
new file mode 100644
index 0000000000..1dce20f3fc
--- /dev/null
+++ b/patches.suse/x86-enforce-entry-stack-alignment.patch
@@ -0,0 +1,105 @@
+From: Jiri Kosina <jkosina@suse.cz>
+Subject: x86/kaiser: enforce trampoline stack alignment
+References: bsc#1087260
+Patch-mainline: Never, SUSE specific
+
+If 16-byte alignment of the trampoline stack top is not maintained, int 0x80
+(or generally anything going through GATE_INTERRUPT) issued by compat 32bit
+binary observes this exception stackframe on a trampoline stack:
+
+ 00000000f7ff382b 0000000000000023 0000000000000296 00000000ffffd554
+ 000000000000002b 000000000000002b 0000000000000000 0000000000000000
+ 0000000000000000 0000000000000000 0000000000000000 0000000000000000
+
+This is wrong, as there is duplicated 0x2b (user stack segment) pushed at
+tss->sp0, which confuses anything that assumes that pt_regs + iret frame
+ends exactly at the address tss->sp0 points to (such as, but not strictly
+limited to, task_pt_regs()).
+
+This happens because
+
+- on every INT instruction, it's guaranteed that the CPU aligns the stack
+ to 16-bytes
+- the places in the kernel that construct the iret frame manually (such
+ as x86_64 SYSENTER) don't perform this re-alignment
+
+Therefore the spurious 0x2b observed on the compat int 0x80 trampoline
+stack is a leftover from e.g. previous 64bit SYSENTER, and anything that
+starts processing pt_regs + iret frame from tss->sp0 gets immediately
+8 bytes offset.
+
+Once proper 16-byte alignment is established, the discrepancy is gone
+and ia32_syscall observes correct entry stack layout
+
+ 00000000f7ff382b 0000000000000023 0000000000000296 00000000ffffd554
+ 000000000000002b 0000000000000000 0000000000000000 0000000000000000
+ 0000000000000000 0000000000000000 0000000000000000 0000000000000000
+
+While inserting the padding, let's also install a build-time check whether
+the alignment didn't get broken by mistake.
+
+In addition to that, let's make sure that TSS is page-aligned, as that's
+what Intel SDM requires (specifically, it requires first 104 bytes to be
+in the same PFN, otherwise behavior is undefined).
+
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+---
+ arch/x86/include/asm/processor.h | 6 ++++--
+ arch/x86/kernel/cpu/common.c | 6 ++++++
+ arch/x86/kernel/process.c | 4 ++--
+ 3 files changed, 12 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -305,12 +305,14 @@ struct tss_struct {
+ /*
+ * Space for the temporary SYSENTER stack:
+ */
++ /* IRQ stacks have to maintain 16-bytes alignment! */
++ u8 pad;
+ unsigned long SYSENTER_stack[64];
+
+-} ____cacheline_aligned;
++} __attribute__((__aligned__(PAGE_SIZE)));
+
+ #ifndef __GENKSYMS__
+-DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss);
++DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss);
+ #else
+ DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
+ #endif
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1571,6 +1571,12 @@ void cpu_init(void)
+ load_TR_desc();
+ load_mm_ldt(&init_mm);
+
++ /*
++ * Entry stack has to be 16-bytes aligned so that it can serve as stack
++ * for IRQ handlers (INT instruction implicitly aligns %rsp to 16-bytes)
++ */
++ BUILD_BUG_ON(offsetofend(struct tss_struct, SYSENTER_stack) % 16 != 0);
++
+ clear_all_debug_regs();
+ dbg_restore_debug_regs();
+
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -42,7 +42,7 @@
+ #if defined(CONFIG_GENKSYMS)
+ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
+ #else
+-__visible DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss) = {
++__visible DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss) = {
+ #endif
+ .x86_tss = {
+ .sp0 = TOP_OF_INIT_STACK,
+@@ -69,7 +69,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_tss);
+ * original cpu_tss and its .x86_tss.sp0 pointing to a thread stack due to kABI.
+ */
+ #ifdef CONFIG_X86_64
+-__visible DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss_tramp) = {
++__visible DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss_tramp) = {
+ .x86_tss = {
+ .sp0 = TOP_OF_INIT_STACK,
+ },
diff --git a/patches.suse/x86-kaiser-remove-user-mapping-tss.patch b/patches.suse/x86-kaiser-remove-user-mapping-tss.patch
index 106491d432..409f435fe8 100644
--- a/patches.suse/x86-kaiser-remove-user-mapping-tss.patch
+++ b/patches.suse/x86-kaiser-remove-user-mapping-tss.patch
@@ -8,24 +8,23 @@ its user mapping.
Signed-off-by: Miroslav Benes <mbenes@suse.cz>
---
- arch/x86/include/asm/processor.h | 4 ----
- arch/x86/kernel/process.c | 4 ----
- 2 files changed, 8 deletions(-)
+ arch/x86/include/asm/processor.h | 3 +--
+ arch/x86/kernel/process.c | 6 +-----
+ 2 files changed, 2 insertions(+), 7 deletions(-)
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
-@@ -309,11 +309,7 @@ struct tss_struct {
+@@ -312,9 +312,8 @@ struct tss_struct {
+ unsigned long SYSENTER_stack[64];
- } ____cacheline_aligned;
-
--#ifndef __GENKSYMS__
--DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss);
--#else
+ } __attribute__((__aligned__(PAGE_SIZE)));
+-
+ #ifndef __GENKSYMS__
+-DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss);
++DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss);
+ #else
DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
--#endif
-
- #ifdef CONFIG_X86_32
- DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
+ #endif
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -39,11 +39,7 @@
@@ -33,10 +32,11 @@ Signed-off-by: Miroslav Benes <mbenes@suse.cz>
* on exact cacheline boundaries, to eliminate cacheline ping-pong.
*/
-#if defined(CONFIG_GENKSYMS)
- __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
+-__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
-#else
--__visible DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss) = {
+-__visible DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss) = {
-#endif
++__visible DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss) = {
.x86_tss = {
.sp0 = TOP_OF_INIT_STACK,
#ifdef CONFIG_X86_32
diff --git a/series.conf b/series.conf
index 001cbd3f1f..4610bb1a05 100644
--- a/series.conf
+++ b/series.conf
@@ -6106,6 +6106,12 @@
patches.suse/msft-hv-1536-vmbus-unregister-device_obj-channels_kset.patch
patches.suse/msft-hv-1549-x86-retpoline-hyperv-Convert-assembler-indirect-jump.patch
+ patches.suse/suse-hv-PCI-hv-Serialize-the-present-and-eject-work-items.patch
+ patches.suse/suse-hv-PCI-hv-Fix-2-hang-issues-in-hv_compose_msi_msg.patch
+ patches.suse/suse-hv-PCI-hv-Fix-a-comment-typo-in-_hv_pcifront_read_confi.patch
+ patches.suse/suse-hv-PCI-hv-Remove-the-bogus-test-in-hv_eject_device_work.patch
+ patches.suse/suse-hv-PCI-hv-Only-queue-new-work-items-in-hv_pci_devices_p.patch
+
patches.suse/suse-hv-hyperv_fb-Add-screen-refresh-after-pause-resume-oper.patch
patches.suse/suse-hv-kabi-hv_do_hypercall.patch
@@ -10157,6 +10163,7 @@
patches.fixes/ceph-fix-un-balanced-fsc-writeback_count-update.patch
patches.fixes/ceph-fix-incorrect-snaprealm-when-adding-caps.patch
patches.fixes/libceph-check-kstrndup-return-value.patch
+ patches.fixes/ceph-only-dirty-iter_iovec-pages-for-direct-read.patch
# target driver
patches.drivers/target-make-target-db-location-configurable
@@ -22293,6 +22300,8 @@
# kaiser trampoline
patches.suse/x86-kaiser-duplicate-cpu-tss.patch
patches.suse/x86-entry-64-use-a-per-cpu-trampoline-stack.patch
+ patches.suse/x86-enforce-entry-stack-alignment.patch
+ patches.kabi/x86-enforce-entry-stack-alignment.patch
patches.suse/x86-kaiser-remove-user-mapping-tss.patch
# bsc#1076805