Home Home > GIT Browse > openSUSE-15.1
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPetr Tesarik <ptesarik@suse.cz>2019-08-16 00:26:16 +0200
committerPetr Tesarik <ptesarik@suse.cz>2019-08-16 00:26:16 +0200
commit9adee65d0f649be382e4006f97384af647ebcb77 (patch)
tree455c9d96c6dc317a6574071d60ddce8bc0537297
parent7d122eb0b769a1ed4069e14aad19ce8c032418d5 (diff)
parent10fa205d451a5f6c55bbc6fa26555445a442ceec (diff)
Merge branch 'SLE15' into SLE15-SP1
- Refresh patches.arch/powerpc-book3s-64-check-for-NULL-pointer-in-pgd_allo.patch - Refresh patches.drivers/IB-mlx5-Fix-MR-registration-flow-to-use-UMR-properly.patch - Refresh patches.fixes/0001-s390-qeth-be-drop-monitor-friendly.patch - Refresh patches.kabi/KVM-Fix-kABI.patch - Refresh patches.suse/0002-btrfs-Remove-fsid-metadata_fsid-fields-from-btrfs_in.patch - Refresh patches.suse/0031-bcache-add-return-value-check-to-bch_cached_dev_run.patch - Refresh patches.suse/btrfs-prevent-ioctls-from-interfering-with-a-swap-file.patch Conflicts: blacklist.conf kabi/arm64/symtypes-default kabi/arm64/symvers-default kabi/ppc64le/symtypes-default kabi/ppc64le/symvers-default kabi/s390x/symtypes-default kabi/s390x/symvers-default kabi/x86_64/symtypes-default kabi/x86_64/symvers-default patches.arch/s390-sles15sp1-00-08-46-s390-cio-fix-ccw_device_start_timeout-API.patch patches.drivers/Revert-net-ena-ethtool-add-extra-properties-retrieva.patch patches.drivers/mmc-sdhci-pci-add-support-for-intel-icp.patch patches.drivers/net-ena-Fix-bug-where-ring-allocation-backoff-stoppe.patch patches.drivers/net-ena-add-MAX_QUEUES_EXT-get-feature-admin-command.patch patches.drivers/net-ena-add-ethtool-function-for-changing-io-queue-s.patch patches.drivers/net-ena-add-good-checksum-counter.patch patches.drivers/net-ena-add-handling-of-llq-max-tx-burst-size.patch patches.drivers/net-ena-add-newline-at-the-end-of-pr_err-prints.patch patches.drivers/net-ena-add-support-for-changing-max_header_size-in-.patch patches.drivers/net-ena-allow-automatic-fallback-to-polling-mode.patch patches.drivers/net-ena-allow-queue-allocation-backoff-when-low-on-m.patch patches.drivers/net-ena-arrange-ena_probe-function-variables-in-reve.patch patches.drivers/net-ena-enable-negotiating-larger-Rx-ring-size.patch patches.drivers/net-ena-ethtool-add-extra-properties-retrieval-via-g.patch patches.drivers/net-ena-fix-Free-napi-resources-when-ena_up-fails.patch patches.drivers/net-ena-fix-ena_com_fill_hash_function-implementatio.patch patches.drivers/net-ena-fix-incorrect-test-of-supported-hash-functio.patch patches.drivers/net-ena-fix-set-freed-objects-to-NULL-to-avoid-faili.patch patches.drivers/net-ena-fix-swapped-parameters-when-calling-ena_com_.patch patches.drivers/net-ena-gcc-8-fix-compilation-warning.patch patches.drivers/net-ena-improve-latency-by-disabling-adaptive-interr.patch patches.drivers/net-ena-make-ethtool-show-correct-current-and-max-qu.patch patches.drivers/net-ena-optimise-calculations-for-CQ-doorbell.patch patches.drivers/net-ena-remove-inline-keyword-from-functions-in-.c.patch patches.drivers/net-ena-replace-free_tx-rx_ids-union-with-single-fre.patch patches.drivers/net-ena-update-driver-version-from-2.0.3-to-2.1.0.patch patches.drivers/net-ena-use-dev_info_once-instead-of-static-variable.patch patches.drivers/net-mlx4_core-Zero-out-lkey-field-in-SW2HW_MPT-fw-co.patch patches.drivers/qede-fix-write-to-free-d-pointer-error-and-double-fr.patch patches.fixes/0001-s390-qeth-be-drop-monitor-friendly.patch patches.kabi/KVM-Fix-kABI.patch patches.kabi/sched-numa-Remove-redundant-field-kabi.patch patches.suse/0030-bcache-use-sysfs_match_string-instead-of-__sysfs_mat.patch series.conf supported.conf suse-commit: c1822f240a06b1a952dd59d59c622feedcece6c5
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgalloc.h3
-rw-r--r--arch/powerpc/include/asm/perf_event.h3
-rw-r--r--arch/powerpc/include/asm/perf_event_server.h2
-rw-r--r--arch/powerpc/include/asm/rtas.h1
-rw-r--r--arch/powerpc/include/uapi/asm/perf_regs.h1
-rw-r--r--arch/powerpc/kernel/security.c1
-rw-r--r--arch/powerpc/kvm/book3s_hv_tm.c6
-rw-r--r--arch/powerpc/kvm/powerpc.c5
-rw-r--r--arch/powerpc/lib/feature-fixups-test.S4
-rw-r--r--arch/powerpc/mm/hugetlbpage.c9
-rw-r--r--arch/powerpc/perf/core-book3s.c28
-rw-r--r--arch/powerpc/perf/isa207-common.c35
-rw-r--r--arch/powerpc/perf/isa207-common.h5
-rw-r--r--arch/powerpc/perf/perf_regs.c7
-rw-r--r--arch/powerpc/perf/power9-events-list.h2
-rw-r--r--arch/powerpc/perf/power9-pmu.c14
-rw-r--r--arch/powerpc/platforms/powernv/opal-dump.c9
-rw-r--r--arch/powerpc/platforms/powernv/opal-imc.c4
-rw-r--r--arch/powerpc/sysdev/xive/common.c7
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c15
-rw-r--r--arch/x86/kvm/lapic.c2
-rw-r--r--arch/x86/kvm/pmu.c4
-rw-r--r--arch/x86/kvm/svm.c14
-rw-r--r--arch/x86/kvm/vmx.c68
-rw-r--r--arch/x86/kvm/x86.c31
-rw-r--r--arch/x86/mm/fault.c15
-rw-r--r--drivers/char/tpm/st33zp24/st33zp24.c2
-rw-r--r--drivers/char/tpm/tpm-chip.c6
-rw-r--r--drivers/char/tpm/tpm-interface.c14
-rw-r--r--drivers/char/tpm/tpm1_eventlog.c8
-rw-r--r--drivers/char/tpm/tpm2-cmd.c2
-rw-r--r--drivers/char/tpm/tpm_atmel.c2
-rw-r--r--drivers/char/tpm/tpm_i2c_atmel.c10
-rw-r--r--drivers/char/tpm/tpm_i2c_infineon.c2
-rw-r--r--drivers/char/tpm/tpm_i2c_nuvoton.c2
-rw-r--r--drivers/char/tpm/tpm_ibmvtpm.c8
-rw-r--r--drivers/char/tpm/tpm_infineon.c2
-rw-r--r--drivers/char/tpm/tpm_nsc.c2
-rw-r--r--drivers/char/tpm/tpm_tis_core.c2
-rw-r--r--drivers/char/tpm/tpm_vtpm_proxy.c3
-rw-r--r--drivers/char/tpm/xen-tpmfront.c2
-rw-r--r--drivers/crypto/ccp/ccp-ops.c7
-rw-r--r--drivers/gpio/gpio-omap.c25
-rw-r--r--drivers/gpu/drm/i915/i915_gem_userptr.c10
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_msg.c4
-rw-r--r--drivers/hid/hid-ids.h1
-rw-r--r--drivers/hid/usbhid/hid-quirks.c1
-rw-r--r--drivers/hwmon/nct7802.c6
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c27
-rw-r--r--drivers/iommu/amd_iommu_init.c90
-rw-r--r--drivers/iommu/amd_iommu_types.h12
-rw-r--r--drivers/irqchip/irq-gic-v3-its.c8
-rw-r--r--drivers/mfd/intel-lpss-pci.c13
-rw-r--r--drivers/mmc/host/cavium.c4
-rw-r--r--drivers/mmc/host/sdhci-pci-core.c2
-rw-r--r--drivers/mmc/host/sdhci-pci.h2
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_memac.c2
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_tgec.c2
-rw-r--r--drivers/net/ethernet/ibm/ehea/ehea_main.c2
-rw-r--r--drivers/net/ethernet/sis/sis900.c16
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/fw.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/tx.c2
-rw-r--r--drivers/pci/pci.c2
-rw-r--r--drivers/regulator/qcom_spmi-regulator.c11
-rw-r--r--drivers/s390/block/dasd_alias.c22
-rw-r--r--drivers/usb/host/xhci-rcar.c9
-rw-r--r--drivers/usb/misc/iowarrior.c7
-rw-r--r--drivers/usb/misc/yurex.c2
-rw-r--r--drivers/watchdog/f71808e_wdt.c15
-rw-r--r--drivers/watchdog/watchdog_dev.c10
-rw-r--r--drivers/xen/swiotlb-xen.c4
-rw-r--r--fs/btrfs/ctree.h26
-rw-r--r--fs/btrfs/disk-io.c42
-rw-r--r--fs/btrfs/extent-tree.c54
-rw-r--r--fs/btrfs/inode.c13
-rw-r--r--fs/btrfs/ioctl.c23
-rw-r--r--fs/btrfs/reada.c5
-rw-r--r--fs/btrfs/send.c40
-rw-r--r--fs/btrfs/transaction.c15
-rw-r--r--fs/btrfs/volumes.c26
-rw-r--r--fs/exec.c2
-rw-r--r--fs/jbd2/revoke.c4
-rw-r--r--fs/nfs/write.c27
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c199
-rw-r--r--fs/xfs/xfs_log.c61
-rw-r--r--fs/xfs/xfs_log_cil.c84
-rw-r--r--fs/xfs/xfs_log_priv.h1
-rw-r--r--include/linux/kvm_host.h1
-rw-r--r--include/linux/sched.h10
-rw-r--r--include/linux/sched/numa_balancing.h4
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/sched/fair.c144
-rw-r--r--mm/vmalloc.c9
-rw-r--r--sound/firewire/packets-buffer.c2
-rw-r--r--sound/pci/hda/hda_controller.c13
-rw-r--r--sound/pci/hda/hda_controller.h2
-rw-r--r--sound/pci/hda/hda_intel.c63
-rw-r--r--sound/soc/soc-dapm.c8
-rw-r--r--sound/sound_core.c3
-rw-r--r--sound/usb/hiface/pcm.c11
-rw-r--r--tools/arch/powerpc/include/uapi/asm/perf_regs.h1
-rw-r--r--tools/objtool/check.c1
-rw-r--r--tools/objtool/elf.c2
-rw-r--r--tools/perf/arch/powerpc/include/perf_regs.h3
-rw-r--r--tools/perf/arch/powerpc/util/perf_regs.c1
-rw-r--r--virt/kvm/kvm_main.c25
107 files changed, 1120 insertions, 490 deletions
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index d0e369c4c115..9311d72f92d2 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -80,6 +80,9 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
return radix__pgd_alloc(mm);
pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL);
+ if (unlikely(!pgd))
+ return pgd;
+
/*
* With hugetlb, we don't clear the second half of the page table.
* If we share the same slab cache with the pmd or pud level table,
diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h
index 8bf1b6351716..660f65b98bfb 100644
--- a/arch/powerpc/include/asm/perf_event.h
+++ b/arch/powerpc/include/asm/perf_event.h
@@ -37,4 +37,7 @@
(regs)->gpr[1] = current_stack_pointer(); \
asm volatile("mfmsr %0" : "=r" ((regs)->msr)); \
} while (0)
+
+/* To support perf_regs sier update */
+extern bool is_sier_available(void);
#endif
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index 67a8a9585d50..e60aeb46d6a0 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -41,6 +41,8 @@ struct power_pmu {
void (*get_mem_data_src)(union perf_mem_data_src *dsrc,
u32 flags, struct pt_regs *regs);
void (*get_mem_weight)(u64 *weight);
+ unsigned long group_constraint_mask;
+ unsigned long group_constraint_val;
u64 (*bhrb_filter_map)(u64 branch_sample_type);
void (*config_bhrb)(u64 pmu_bhrb_filter);
void (*disable_pmc)(unsigned int pmc, unsigned long mmcr[]);
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 715e0059d1c4..e8215fd3c51d 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -5,6 +5,7 @@
#include <linux/spinlock.h>
#include <asm/page.h>
#include <linux/time.h>
+#include <linux/cpumask.h>
/*
* Definitions for talking to the RTAS on CHRP machines.
diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h b/arch/powerpc/include/uapi/asm/perf_regs.h
index 6a93209748a1..1519be469477 100644
--- a/arch/powerpc/include/uapi/asm/perf_regs.h
+++ b/arch/powerpc/include/uapi/asm/perf_regs.h
@@ -45,6 +45,7 @@ enum perf_event_powerpc_regs {
PERF_REG_POWERPC_TRAP,
PERF_REG_POWERPC_DAR,
PERF_REG_POWERPC_DSISR,
+ PERF_REG_POWERPC_SIER,
PERF_REG_POWERPC_MAX,
};
#endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index 0833d5192004..bcb3a4971f90 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -4,6 +4,7 @@
//
// Copyright 2018, Michael Ellerman, IBM Corporation.
+#include <linux/cpu.h>
#include <linux/kernel.h>
#include <linux/device.h>
#include <linux/seq_buf.h>
diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c
index 008285058f9b..d23a0131b016 100644
--- a/arch/powerpc/kvm/book3s_hv_tm.c
+++ b/arch/powerpc/kvm/book3s_hv_tm.c
@@ -131,7 +131,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
}
/* Set CR0 to indicate previous transactional state */
vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) |
- (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
+ (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 29);
/* L=1 => tresume, L=0 => tsuspend */
if (instr & (1 << 21)) {
if (MSR_TM_SUSPENDED(msr))
@@ -175,7 +175,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
/* Set CR0 to indicate previous transactional state */
vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) |
- (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
+ (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 29);
vcpu->arch.shregs.msr &= ~MSR_TS_MASK;
return RESUME_GUEST;
@@ -205,7 +205,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
/* Set CR0 to indicate previous transactional state */
vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) |
- (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
+ (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 29);
vcpu->arch.shregs.msr = msr | MSR_TS_S;
return RESUME_GUEST;
}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 96f389f50a3f..90031bc12aa2 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -62,6 +62,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
return !!(v->arch.pending_exceptions) || kvm_request_pending(v);
}
+bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
+{
+ return kvm_arch_vcpu_runnable(vcpu);
+}
+
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
{
return false;
diff --git a/arch/powerpc/lib/feature-fixups-test.S b/arch/powerpc/lib/feature-fixups-test.S
index f4613118132e..12ff0f673956 100644
--- a/arch/powerpc/lib/feature-fixups-test.S
+++ b/arch/powerpc/lib/feature-fixups-test.S
@@ -167,9 +167,9 @@ globl(ftr_fixup_test6_expected)
blt 2b
b 3f
b 1b
-2: or 1,1,1
+3: or 1,1,1
or 2,2,2
-3: or 3,3,3
+ or 3,3,3
#if 0
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 822edbab026e..d8d9faff4262 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -171,6 +171,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
else {
pdshift = PUD_SHIFT;
pu = pud_alloc(mm, pg, addr);
+ if (!pu)
+ return NULL;
if (pshift == PUD_SHIFT)
return (pte_t *)pu;
else if (pshift > PMD_SHIFT)
@@ -178,6 +180,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
else {
pdshift = PMD_SHIFT;
pm = pmd_alloc(mm, pu, addr);
+ if (!pm)
+ return NULL;
if (pshift == PMD_SHIFT)
/* 16MB hugepage */
return (pte_t *)pm;
@@ -191,11 +195,16 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
} else {
pdshift = PUD_SHIFT;
pu = pud_alloc(mm, pg, addr);
+ if (!pu)
+ return NULL;
if (pshift >= HUGEPD_PUD_SHIFT) {
hpdp = (hugepd_t *)pu;
} else {
pdshift = PMD_SHIFT;
pm = pmd_alloc(mm, pu, addr);
+ if (!pm)
+ return NULL;
+ ptl = pmd_lockptr(mm, pm);
hpdp = (hugepd_t *)pm;
}
}
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index f61343d6652b..6599a4005564 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -130,6 +130,14 @@ static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {}
static void pmao_restore_workaround(bool ebb) { }
#endif /* CONFIG_PPC32 */
+bool is_sier_available(void)
+{
+ if (ppmu->flags & PPMU_HAS_SIER)
+ return true;
+
+ return false;
+}
+
static bool regs_use_siar(struct pt_regs *regs)
{
/*
@@ -864,6 +872,8 @@ static int power_check_constraints(struct cpu_hw_events *cpuhw,
int i, j;
unsigned long addf = ppmu->add_fields;
unsigned long tadd = ppmu->test_adder;
+ unsigned long grp_mask = ppmu->group_constraint_mask;
+ unsigned long grp_val = ppmu->group_constraint_val;
if (n_ev > ppmu->n_counter)
return -1;
@@ -884,15 +894,23 @@ static int power_check_constraints(struct cpu_hw_events *cpuhw,
for (i = 0; i < n_ev; ++i) {
nv = (value | cpuhw->avalues[i][0]) +
(value & cpuhw->avalues[i][0] & addf);
- if ((((nv + tadd) ^ value) & mask) != 0 ||
- (((nv + tadd) ^ cpuhw->avalues[i][0]) &
- cpuhw->amasks[i][0]) != 0)
+
+ if (((((nv + tadd) ^ value) & mask) & (~grp_mask)) != 0)
+ break;
+
+ if (((((nv + tadd) ^ cpuhw->avalues[i][0]) & cpuhw->amasks[i][0])
+ & (~grp_mask)) != 0)
break;
+
value = nv;
mask |= cpuhw->amasks[i][0];
}
- if (i == n_ev)
- return 0; /* all OK */
+ if (i == n_ev) {
+ if ((value & mask & grp_mask) != (mask & grp_val))
+ return -1;
+ else
+ return 0; /* all OK */
+ }
/* doesn't work, gather alternatives... */
if (!ppmu->get_alternatives)
diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
index 11cf9915738f..a6c24d866b2f 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -234,8 +234,13 @@ void isa207_get_mem_weight(u64 *weight)
u64 mmcra = mfspr(SPRN_MMCRA);
u64 exp = MMCRA_THR_CTR_EXP(mmcra);
u64 mantissa = MMCRA_THR_CTR_MANT(mmcra);
+ u64 sier = mfspr(SPRN_SIER);
+ u64 val = (sier & ISA207_SIER_TYPE_MASK) >> ISA207_SIER_TYPE_SHIFT;
- *weight = mantissa << (2 * exp);
+ if (val == 0 || val == 7)
+ *weight = 0;
+ else
+ *weight = mantissa << (2 * exp);
}
int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
@@ -282,17 +287,25 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
}
if (unit >= 6 && unit <= 9) {
- /*
- * L2/L3 events contain a cache selector field, which is
- * supposed to be programmed into MMCRC. However MMCRC is only
- * HV writable, and there is no API for guest kernels to modify
- * it. The solution is for the hypervisor to initialise the
- * field to zeroes, and for us to only ever allow events that
- * have a cache selector of zero. The bank selector (bit 3) is
- * irrelevant, as long as the rest of the value is 0.
- */
- if (!cpu_has_feature(CPU_FTR_ARCH_300) && (cache & 0x7))
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ mask |= CNST_CACHE_GROUP_MASK;
+ value |= CNST_CACHE_GROUP_VAL(event & 0xff);
+
+ mask |= CNST_CACHE_PMC4_MASK;
+ if (pmc == 4)
+ value |= CNST_CACHE_PMC4_VAL;
+ } else if (cache & 0x7) {
+ /*
+ * L2/L3 events contain a cache selector field, which is
+ * supposed to be programmed into MMCRC. However MMCRC is only
+ * HV writable, and there is no API for guest kernels to modify
+ * it. The solution is for the hypervisor to initialise the
+ * field to zeroes, and for us to only ever allow events that
+ * have a cache selector of zero. The bank selector (bit 3) is
+ * irrelevant, as long as the rest of the value is 0.
+ */
return -1;
+ }
} else if (cpu_has_feature(CPU_FTR_ARCH_300) || (event & EVENT_IS_L1)) {
mask |= CNST_L1_QUAL_MASK;
diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h
index e5a621699a6d..91350f42a662 100644
--- a/arch/powerpc/perf/isa207-common.h
+++ b/arch/powerpc/perf/isa207-common.h
@@ -134,6 +134,11 @@
#define CNST_SAMPLE_VAL(v) (((v) & EVENT_SAMPLE_MASK) << 16)
#define CNST_SAMPLE_MASK CNST_SAMPLE_VAL(EVENT_SAMPLE_MASK)
+#define CNST_CACHE_GROUP_VAL(v) (((v) & 0xffull) << 55)
+#define CNST_CACHE_GROUP_MASK CNST_CACHE_GROUP_VAL(0xff)
+#define CNST_CACHE_PMC4_VAL (1ull << 54)
+#define CNST_CACHE_PMC4_MASK CNST_CACHE_PMC4_VAL
+
/*
* For NC we are counting up to 4 events. This requires three bits, and we need
* the fifth event to overflow and set the 4th bit. To achieve that we bias the
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
index 09ceea6175ba..5c36b3a8d47a 100644
--- a/arch/powerpc/perf/perf_regs.c
+++ b/arch/powerpc/perf/perf_regs.c
@@ -69,6 +69,7 @@ static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
PT_REGS_OFFSET(PERF_REG_POWERPC_TRAP, trap),
PT_REGS_OFFSET(PERF_REG_POWERPC_DAR, dar),
PT_REGS_OFFSET(PERF_REG_POWERPC_DSISR, dsisr),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_SIER, dar),
};
u64 perf_reg_value(struct pt_regs *regs, int idx)
@@ -76,6 +77,12 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
if (WARN_ON_ONCE(idx >= PERF_REG_POWERPC_MAX))
return 0;
+ if (idx == PERF_REG_POWERPC_SIER &&
+ (IS_ENABLED(CONFIG_FSL_EMB_PERF_EVENT) ||
+ IS_ENABLED(CONFIG_PPC32) ||
+ !is_sier_available()))
+ return 0;
+
return regs_get_register(regs, pt_regs_offset[idx]);
}
diff --git a/arch/powerpc/perf/power9-events-list.h b/arch/powerpc/perf/power9-events-list.h
index 063c9d9f2516..6b1dc9a83ede 100644
--- a/arch/powerpc/perf/power9-events-list.h
+++ b/arch/powerpc/perf/power9-events-list.h
@@ -63,8 +63,6 @@ EVENT(PM_RUN_CYC_ALT, 0x200f4)
/* Instruction Dispatched */
EVENT(PM_INST_DISP, 0x200f2)
EVENT(PM_INST_DISP_ALT, 0x300f2)
-/* Alternate Branch event code */
-EVENT(PM_BR_CMPL_ALT, 0x10012)
/* Branch event that are not strongly biased */
EVENT(PM_BR_2PATH, 0x20036)
/* ALternate branch event that are not strongly biased */
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index 2af1f7706261..f3987915cadc 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -63,16 +63,8 @@
* MMCRA[9:11] = thresh_cmp[0:2]
* MMCRA[12:18] = thresh_cmp[3:9]
*
- * if unit == 6 or unit == 7
- * MMCRC[53:55] = cache_sel[1:3] (L2EVENT_SEL)
- * else if unit == 8 or unit == 9:
- * if cache_sel[0] == 0: # L3 bank
- * MMCRC[47:49] = cache_sel[1:3] (L3EVENT_SEL0)
- * else if cache_sel[0] == 1:
- * MMCRC[50:51] = cache_sel[2:3] (L3EVENT_SEL1)
- * else if cache_sel[1]: # L1 event
- * MMCR1[16] = cache_sel[2]
- * MMCR1[17] = cache_sel[3]
+ * MMCR1[16] = cache_sel[2]
+ * MMCR1[17] = cache_sel[3]
*
* if mark:
* MMCRA[63] = 1 (SAMPLE_ENABLE)
@@ -430,6 +422,8 @@ static struct power_pmu power9_pmu = {
.n_counter = MAX_PMU_COUNTERS,
.add_fields = ISA207_ADD_FIELDS,
.test_adder = ISA207_TEST_ADDER,
+ .group_constraint_mask = CNST_CACHE_PMC4_MASK,
+ .group_constraint_val = CNST_CACHE_PMC4_VAL,
.compute_mmcr = isa207_compute_mmcr,
.config_bhrb = power9_config_bhrb,
.bhrb_filter_map = power9_bhrb_filter_map,
diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c
index 4c827826c05e..7374b9f433cb 100644
--- a/arch/powerpc/platforms/powernv/opal-dump.c
+++ b/arch/powerpc/platforms/powernv/opal-dump.c
@@ -368,13 +368,12 @@ static irqreturn_t process_dump(int irq, void *data)
{
int rc;
uint32_t dump_id, dump_size, dump_type;
- struct dump_obj *dump;
char name[22];
struct kobject *kobj;
rc = dump_read_info(&dump_id, &dump_size, &dump_type);
if (rc != OPAL_SUCCESS)
- return rc;
+ return IRQ_HANDLED;
sprintf(name, "0x%x-0x%x", dump_type, dump_id);
@@ -386,12 +385,10 @@ static irqreturn_t process_dump(int irq, void *data)
if (kobj) {
/* Drop reference added by kset_find_obj() */
kobject_put(kobj);
- return 0;
+ return IRQ_HANDLED;
}
- dump = create_dump_obj(dump_id, dump_size, dump_type);
- if (!dump)
- return -1;
+ create_dump_obj(dump_id, dump_size, dump_type);
return IRQ_HANDLED;
}
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index 38eccc66c7f4..600d863f7dd2 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -87,6 +87,10 @@ static int imc_pmu_create(struct device_node *parent, int pmu_index, int domain)
struct imc_pmu *pmu_ptr;
u32 offset;
+ /* Return for unknown domain */
+ if (domain < 0)
+ return -EINVAL;
+
/* memory for pmu */
pmu_ptr = kzalloc(sizeof(struct imc_pmu), GFP_KERNEL);
if (!pmu_ptr)
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 7369f16625e9..eb547aeb99fd 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -483,7 +483,7 @@ static int xive_find_target_in_mask(const struct cpumask *mask,
* Now go through the entire mask until we find a valid
* target.
*/
- for (;;) {
+ do {
/*
* We re-check online as the fallback case passes us
* an untested affinity mask
@@ -491,12 +491,11 @@ static int xive_find_target_in_mask(const struct cpumask *mask,
if (cpu_online(cpu) && xive_try_pick_target(cpu))
return cpu;
cpu = cpumask_next(cpu, mask);
- if (cpu == first)
- break;
/* Wrap around */
if (cpu >= nr_cpu_ids)
cpu = cpumask_first(mask);
- }
+ } while (cpu != first);
+
return -1;
}
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 16a07b6b32ee..7b7a99583adf 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1129,6 +1129,7 @@ struct kvm_x86_ops {
#ifndef __GENKSYMS__
bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu);
+ bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu);
#endif
};
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 017a40881d16..59045129748f 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -758,13 +758,16 @@ static struct syscore_ops mc_syscore_ops = {
.resume = mc_bp_resume,
};
-static int mc_cpu_online(unsigned int cpu)
+static int mc_cpu_starting(unsigned int cpu)
{
- struct device *dev;
-
- dev = get_cpu_device(cpu);
microcode_update_cpu(cpu);
pr_debug("CPU%d added\n", cpu);
+ return 0;
+}
+
+static int mc_cpu_online(unsigned int cpu)
+{
+ struct device *dev = get_cpu_device(cpu);
if (sysfs_create_group(&dev->kobj, &mc_attr_group))
pr_err("Failed to create group for CPU%d\n", cpu);
@@ -844,7 +847,9 @@ int __init microcode_init(void)
#define CPUHP_AP_MICROCODE_LOADER CPUHP_AP_ARM_MVEBU_COHERENCY
- cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:online",
+ cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:starting",
+ mc_cpu_starting, NULL);
+ cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/microcode:online",
mc_cpu_online, mc_cpu_down_prep);
pr_info("Microcode Update Driver: v%s.", DRIVER_VERSION);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 4bd880bee283..163c765e034b 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2201,7 +2201,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
struct kvm_lapic *apic = vcpu->arch.apic;
u32 ppr;
- if (!apic_enabled(apic))
+ if (!kvm_apic_hw_enabled(apic))
return -1;
__apic_update_ppr(apic, &ppr);
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 026db42a86c3..1bca8016ee8a 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -131,8 +131,8 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
intr ? kvm_perf_overflow_intr :
kvm_perf_overflow, pmc);
if (IS_ERR(event)) {
- printk_once("kvm_pmu: event creation failed %ld\n",
- PTR_ERR(event));
+ pr_debug_ratelimited("kvm_pmu: event creation failed %ld for pmc->idx = %d\n",
+ PTR_ERR(event), pmc->idx);
return;
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 55f93bbf2b44..e0ea43713678 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -5086,6 +5086,11 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
kvm_vcpu_wake_up(vcpu);
}
+static bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
+{
+ return false;
+}
+
static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
{
unsigned long flags;
@@ -6065,13 +6070,7 @@ out:
static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
{
- local_irq_enable();
- /*
- * We must have an instruction with interrupts enabled, so
- * the timer interrupt isn't delayed by the interrupt shadow.
- */
- asm("nop");
- local_irq_disable();
+
}
static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
@@ -7173,6 +7172,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.pmu_ops = &amd_pmu_ops,
.deliver_posted_interrupt = svm_deliver_avic_intr,
+ .dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt,
.update_pi_irte = svm_update_pi_irte,
.setup_mce = svm_setup_mce,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 31927b6a3c30..5c4fef9af0e9 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3347,6 +3347,11 @@ static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
case MSR_IA32_VMX_VMCS_ENUM:
vmx->nested.nested_vmx_vmcs_enum = data;
return 0;
+ case MSR_IA32_VMX_VMFUNC:
+ if (data & ~vmx->nested.nested_vmx_vmfunc_controls)
+ return -EINVAL;
+ vmx->nested.nested_vmx_vmfunc_controls = data;
+ return 0;
default:
/*
* The rest of the VMX capability MSRs do not support restore.
@@ -3513,7 +3518,10 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
return vmx_get_vmx_msr(vcpu, msr_info->index, &msr_info->data);
case MSR_IA32_XSS:
- if (!vmx_xsaves_supported())
+ if (!vmx_xsaves_supported() ||
+ (!msr_info->host_initiated &&
+ !(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
+ guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))))
return 1;
msr_info->data = vcpu->arch.ia32_xss;
break;
@@ -3647,9 +3655,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
MSR_TYPE_W);
break;
case MSR_IA32_CR_PAT:
+ if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
+ return 1;
+
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
- if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
- return 1;
vmcs_write64(GUEST_IA32_PAT, data);
vcpu->arch.pat = data;
break;
@@ -3683,7 +3692,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
return vmx_set_vmx_msr(vcpu, msr_index, data);
case MSR_IA32_XSS:
- if (!vmx_xsaves_supported())
+ if (!vmx_xsaves_supported() ||
+ (!msr_info->host_initiated &&
+ !(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
+ guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))))
return 1;
/*
* The only supported bit as of Skylake is bit 8, but
@@ -5625,7 +5637,7 @@ static void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
}
-static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
+u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
{
u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
@@ -7679,6 +7691,7 @@ static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
{
vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS);
vmcs_write64(VMCS_LINK_POINTER, -1ull);
+ vmx->nested.sync_shadow_vmcs = false;
}
static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
@@ -7690,7 +7703,6 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
/* copy to memory all shadowed fields in case
they were modified */
copy_shadow_to_vmcs12(vmx);
- vmx->nested.sync_shadow_vmcs = false;
vmx_disable_shadow_vmcs(vmx);
}
vmx->nested.posted_intr_nv = -1;
@@ -7877,6 +7889,9 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
const unsigned long *fields = shadow_read_write_fields;
const int num_fields = max_shadow_read_write_fields;
+ if (WARN_ON(!shadow_vmcs))
+ return;
+
preempt_disable();
vmcs_load(shadow_vmcs);
@@ -7924,6 +7939,9 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
u64 field_value = 0;
struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
+ if (WARN_ON(!shadow_vmcs))
+ return;
+
vmcs_load(shadow_vmcs);
for (q = 0; q < ARRAY_SIZE(fields); q++) {
@@ -8208,13 +8226,11 @@ static int handle_invept(struct kvm_vcpu *vcpu)
switch (type) {
case VMX_EPT_EXTENT_GLOBAL:
+ case VMX_EPT_EXTENT_CONTEXT:
/*
- * TODO: track mappings and invalidate
- * single context requests appropriately
+ * TODO: Sync the necessary shadow EPT roots here, rather than
+ * at the next emulated VM-entry.
*/
- case VMX_EPT_EXTENT_CONTEXT:
- kvm_mmu_sync_roots(vcpu);
- kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
nested_vmx_succeed(vcpu);
break;
default:
@@ -9411,6 +9427,11 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
return max_irr;
}
+static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
+{
+ return pi_test_on(vcpu_to_pi_desc(vcpu));
+}
+
static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
{
if (!kvm_vcpu_apicv_active(vcpu))
@@ -9432,28 +9453,21 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
{
- u32 exit_intr_info = 0;
- u16 basic_exit_reason = (u16)vmx->exit_reason;
-
- if (!(basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY
- || basic_exit_reason == EXIT_REASON_EXCEPTION_NMI))
+ if (vmx->exit_reason != EXIT_REASON_EXCEPTION_NMI)
return;
- if (!(vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
- exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
- vmx->exit_intr_info = exit_intr_info;
+ vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
/* if exit due to PF check for async PF */
- if (is_page_fault(exit_intr_info))
+ if (is_page_fault(vmx->exit_intr_info))
vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
/* Handle machine checks before interrupts are enabled */
- if (basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY ||
- is_machine_check(exit_intr_info))
+ if (is_machine_check(vmx->exit_intr_info))
kvm_machine_check();
/* We need to handle NMIs before interrupts are enabled */
- if (is_nmi(exit_intr_info)) {
+ if (is_nmi(vmx->exit_intr_info)) {
kvm_before_handle_nmi(&vmx->vcpu);
asm("int $2");
kvm_after_handle_nmi(&vmx->vcpu);
@@ -9958,6 +9972,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx->idt_vectoring_info = 0;
vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON);
+ if ((u16)vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY)
+ kvm_machine_check();
+
if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
return;
@@ -11017,11 +11034,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
vmcs_write64(VMCS_LINK_POINTER, -1ull);
- exec_control = vmcs12->pin_based_vm_exec_control;
+ exec_control = vmx_pin_based_exec_ctrl(vmx);
+ exec_control |= vmcs12->pin_based_vm_exec_control;
/* Preemption timer setting is only taken from vmcs01. */
exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
- exec_control |= vmcs_config.pin_based_exec_ctrl;
if (vmx->hv_deadline_tsc == -1)
exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
@@ -12840,6 +12857,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.hwapic_isr_update = vmx_hwapic_isr_update,
.sync_pir_to_irr = vmx_sync_pir_to_irr,
.deliver_posted_interrupt = vmx_deliver_posted_interrupt,
+ .dy_apicv_has_pending_interrupt = vmx_dy_apicv_has_pending_interrupt,
.set_tss_addr = vmx_set_tss_addr,
.get_tdp_level = get_ept_level,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 574c3d3bd441..16f0431683b0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1398,7 +1398,7 @@ static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
vcpu->arch.tsc_always_catchup = 1;
return 0;
} else {
- WARN(1, "user requested TSC rate below hardware speed\n");
+ pr_warn_ratelimited("user requested TSC rate below hardware speed\n");
return -1;
}
}
@@ -1408,8 +1408,8 @@ static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
user_tsc_khz, tsc_khz);
if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) {
- WARN_ONCE(1, "Invalid TSC scaling ratio - virtual-tsc-khz=%u\n",
- user_tsc_khz);
+ pr_warn_ratelimited("Invalid TSC scaling ratio - virtual-tsc-khz=%u\n",
+ user_tsc_khz);
return -1;
}
@@ -7300,7 +7300,16 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_x86_ops->handle_external_intr(vcpu);
+ /*
+ * Consume any pending interrupts, including the possible source of
+ * VM-Exit on SVM and any ticks that occur between VM-Exit and now.
+ * An instruction is required after local_irq_enable() to fully unblock
+ * interrupts on processors that implement an interrupt shadow, the
+ * stat.exits increment will do nicely.
+ */
+ local_irq_enable();
++vcpu->stat.exits;
+ local_irq_disable();
guest_exit_irqoff();
@@ -8816,6 +8825,22 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
}
+bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
+{
+ if (READ_ONCE(vcpu->arch.pv.pv_unhalted))
+ return true;
+
+ if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
+ kvm_test_request(KVM_REQ_SMI, vcpu) ||
+ kvm_test_request(KVM_REQ_EVENT, vcpu))
+ return true;
+
+ if (vcpu->arch.apicv_active && kvm_x86_ops->dy_apicv_has_pending_interrupt(vcpu))
+ return true;
+
+ return false;
+}
+
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
{
return vcpu->arch.preempted_in_kernel;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 71173f7bae8b..0399b0a02bbf 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -259,13 +259,14 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
pmd = pmd_offset(pud, address);
pmd_k = pmd_offset(pud_k, address);
- if (!pmd_present(*pmd_k))
- return NULL;
- if (!pmd_present(*pmd))
+ if (pmd_present(*pmd) != pmd_present(*pmd_k))
set_pmd(pmd, *pmd_k);
+
+ if (!pmd_present(*pmd_k))
+ return NULL;
else
- BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
+ BUG_ON(pmd_pfn(*pmd) != pmd_pfn(*pmd_k));
return pmd_k;
}
@@ -285,17 +286,13 @@ void vmalloc_sync_all(void)
spin_lock(&pgd_lock);
list_for_each_entry(page, &pgd_list, lru) {
spinlock_t *pgt_lock;
- pmd_t *ret;
/* the pgt_lock only for Xen */
pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
spin_lock(pgt_lock);
- ret = vmalloc_sync_one(page_address(page), address);
+ vmalloc_sync_one(page_address(page), address);
spin_unlock(pgt_lock);
-
- if (!ret)
- break;
}
spin_unlock(&pgd_lock);
}
diff --git a/drivers/char/tpm/st33zp24/st33zp24.c b/drivers/char/tpm/st33zp24/st33zp24.c
index f95b9c75175b..77f3fa10db12 100644
--- a/drivers/char/tpm/st33zp24/st33zp24.c
+++ b/drivers/char/tpm/st33zp24/st33zp24.c
@@ -438,7 +438,7 @@ static int st33zp24_send(struct tpm_chip *chip, unsigned char *buf,
goto out_err;
}
- return len;
+ return 0;
out_err:
st33zp24_cancel(chip);
release_locality(chip);
diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
index bf385e3ee644..2d09c60028f9 100644
--- a/drivers/char/tpm/tpm-chip.c
+++ b/drivers/char/tpm/tpm-chip.c
@@ -163,12 +163,12 @@ static int tpm_class_shutdown(struct device *dev)
{
struct tpm_chip *chip = container_of(dev, struct tpm_chip, dev);
+ down_write(&chip->ops_sem);
if (chip->flags & TPM_CHIP_FLAG_TPM2) {
- down_write(&chip->ops_sem);
tpm2_shutdown(chip, TPM2_SU_CLEAR);
- chip->ops = NULL;
- up_write(&chip->ops_sem);
}
+ chip->ops = NULL;
+ up_write(&chip->ops_sem);
/* Allow bus- and device-specific code to run. Note: since chip->ops
* is NULL, more-specific shutdown code will not be able to issue TPM
* commands.
diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index 7b4c7fbf4114..bb5735c56b9a 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -493,11 +493,21 @@ static ssize_t tpm_try_transmit(struct tpm_chip *chip,
rc = chip->ops->send(chip, (u8 *) buf, count);
if (rc < 0) {
- dev_err(&chip->dev,
- "tpm_transmit: tpm_send: error %d\n", rc);
+ if (rc != -EPIPE)
+ dev_err(&chip->dev,
+ "%s: send(): error %d\n", __func__, rc);
goto out;
}
+ /* A sanity check. send() should just return zero on success e.g.
+ * not the command length.
+ */
+ if (rc > 0) {
+ dev_warn(&chip->dev,
+ "%s: send(): invalid value %d\n", __func__, rc);
+ rc = 0;
+ }
+
if (chip->flags & TPM_CHIP_FLAG_IRQ)
goto out_recv;
diff --git a/drivers/char/tpm/tpm1_eventlog.c b/drivers/char/tpm/tpm1_eventlog.c
index 9a8605e500b5..d168307e4f56 100644
--- a/drivers/char/tpm/tpm1_eventlog.c
+++ b/drivers/char/tpm/tpm1_eventlog.c
@@ -101,7 +101,7 @@ static void *tpm_bios_measurements_start(struct seq_file *m, loff_t *pos)
}
/* now check if current entry is valid */
- if ((addr + sizeof(struct tcpa_event)) >= limit)
+ if ((addr + sizeof(struct tcpa_event)) > limit)
return NULL;
event = addr;
@@ -111,7 +111,7 @@ static void *tpm_bios_measurements_start(struct seq_file *m, loff_t *pos)
if (((converted_event_type == 0) && (converted_event_size == 0))
|| ((addr + sizeof(struct tcpa_event) + converted_event_size)
- >= limit))
+ > limit))
return NULL;
return addr;
@@ -132,7 +132,7 @@ static void *tpm_bios_measurements_next(struct seq_file *m, void *v,
v += sizeof(struct tcpa_event) + converted_event_size;
/* now check if current entry is valid */
- if ((v + sizeof(struct tcpa_event)) >= limit)
+ if ((v + sizeof(struct tcpa_event)) > limit)
return NULL;
event = v;
@@ -141,7 +141,7 @@ static void *tpm_bios_measurements_next(struct seq_file *m, void *v,
converted_event_type = do_endian_conversion(event->event_type);
if (((converted_event_type == 0) && (converted_event_size == 0)) ||
- ((v + sizeof(struct tcpa_event) + converted_event_size) >= limit))
+ ((v + sizeof(struct tcpa_event) + converted_event_size) > limit))
return NULL;
(*pos)++;
diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c
index 4b0541ef1869..b5ae7626f654 100644
--- a/drivers/char/tpm/tpm2-cmd.c
+++ b/drivers/char/tpm/tpm2-cmd.c
@@ -791,7 +791,7 @@ void tpm2_shutdown(struct tpm_chip *chip, u16 shutdown_type)
/* In places where shutdown command is sent there's no much we can do
* except print the error code on a system failure.
*/
- if (rc < 0)
+ if (rc < 0 && rc != -EPIPE)
dev_warn(&chip->dev, "transmit returned %d while stopping the TPM",
rc);
}
diff --git a/drivers/char/tpm/tpm_atmel.c b/drivers/char/tpm/tpm_atmel.c
index 0d322ab11faa..3301e802b541 100644
--- a/drivers/char/tpm/tpm_atmel.c
+++ b/drivers/char/tpm/tpm_atmel.c
@@ -105,7 +105,7 @@ static int tpm_atml_send(struct tpm_chip *chip, u8 *buf, size_t count)
iowrite8(buf[i], priv->iobase);
}
- return count;
+ return 0;
}
static void tpm_atml_cancel(struct tpm_chip *chip)
diff --git a/drivers/char/tpm/tpm_i2c_atmel.c b/drivers/char/tpm/tpm_i2c_atmel.c
index 95ce2e9ccdc6..cc4e642d3180 100644
--- a/drivers/char/tpm/tpm_i2c_atmel.c
+++ b/drivers/char/tpm/tpm_i2c_atmel.c
@@ -65,7 +65,15 @@ static int i2c_atmel_send(struct tpm_chip *chip, u8 *buf, size_t len)
dev_dbg(&chip->dev,
"%s(buf=%*ph len=%0zx) -> sts=%d\n", __func__,
(int)min_t(size_t, 64, len), buf, len, status);
- return status;
+
+ if (status < 0)
+ return status;
+
+ /* The upper layer does not support incomplete sends. */
+ if (status != len)
+ return -E2BIG;
+
+ return 0;
}
static int i2c_atmel_recv(struct tpm_chip *chip, u8 *buf, size_t count)
diff --git a/drivers/char/tpm/tpm_i2c_infineon.c b/drivers/char/tpm/tpm_i2c_infineon.c
index 24356e8387c8..d58a6b0e0b6d 100644
--- a/drivers/char/tpm/tpm_i2c_infineon.c
+++ b/drivers/char/tpm/tpm_i2c_infineon.c
@@ -552,7 +552,7 @@ static int tpm_tis_i2c_send(struct tpm_chip *chip, u8 *buf, size_t len)
/* go and do it */
iic_tpm_write(TPM_STS(tpm_dev.locality), &sts, 1);
- return len;
+ return 0;
out_err:
tpm_tis_i2c_ready(chip);
/* The TPM needs some time to clean up here,
diff --git a/drivers/char/tpm/tpm_i2c_nuvoton.c b/drivers/char/tpm/tpm_i2c_nuvoton.c
index a9681853b1a8..280308009784 100644
--- a/drivers/char/tpm/tpm_i2c_nuvoton.c
+++ b/drivers/char/tpm/tpm_i2c_nuvoton.c
@@ -467,7 +467,7 @@ static int i2c_nuvoton_send(struct tpm_chip *chip, u8 *buf, size_t len)
}
dev_dbg(dev, "%s() -> %zd\n", __func__, len);
- return len;
+ return 0;
}
static bool i2c_nuvoton_req_canceled(struct tpm_chip *chip, u8 status)
diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c
index 44a971f81f5c..7f327aabc92d 100644
--- a/drivers/char/tpm/tpm_ibmvtpm.c
+++ b/drivers/char/tpm/tpm_ibmvtpm.c
@@ -141,14 +141,14 @@ static int tpm_ibmvtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count)
}
/**
- * tpm_ibmvtpm_send - Send tpm request
- *
+ * tpm_ibmvtpm_send() - Send a TPM command
* @chip: tpm chip struct
* @buf: buffer contains data to send
* @count: size of buffer
*
* Return:
- * Number of bytes sent or < 0 on error.
+ * 0 on success,
+ * -errno on error
*/
static int tpm_ibmvtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
{
@@ -194,7 +194,7 @@ static int tpm_ibmvtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
rc = 0;
ibmvtpm->tpm_processing_cmd = false;
} else
- rc = count;
+ rc = 0;
spin_unlock(&ibmvtpm->rtce_lock);
return rc;
diff --git a/drivers/char/tpm/tpm_infineon.c b/drivers/char/tpm/tpm_infineon.c
index 690d948cec46..d26db2afcdfe 100644
--- a/drivers/char/tpm/tpm_infineon.c
+++ b/drivers/char/tpm/tpm_infineon.c
@@ -354,7 +354,7 @@ static int tpm_inf_send(struct tpm_chip *chip, u8 * buf, size_t count)
for (i = 0; i < count; i++) {
wait_and_send(chip, buf[i]);
}
- return count;
+ return 0;
}
static void tpm_inf_cancel(struct tpm_chip *chip)
diff --git a/drivers/char/tpm/tpm_nsc.c b/drivers/char/tpm/tpm_nsc.c
index 5d6cce74cd3f..9bee3c5eb4bf 100644
--- a/drivers/char/tpm/tpm_nsc.c
+++ b/drivers/char/tpm/tpm_nsc.c
@@ -226,7 +226,7 @@ static int tpm_nsc_send(struct tpm_chip *chip, u8 * buf, size_t count)
}
outb(NSC_COMMAND_EOC, priv->base + NSC_COMMAND);
- return count;
+ return 0;
}
static void tpm_nsc_cancel(struct tpm_chip *chip)
diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
index d2345d9fd7b5..0eaea3a7b8f4 100644
--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
@@ -485,7 +485,7 @@ static int tpm_tis_send_main(struct tpm_chip *chip, const u8 *buf, size_t len)
goto out_err;
}
}
- return len;
+ return 0;
out_err:
tpm_tis_ready(chip);
return rc;
diff --git a/drivers/char/tpm/tpm_vtpm_proxy.c b/drivers/char/tpm/tpm_vtpm_proxy.c
index 66024bf92097..8be0434490a6 100644
--- a/drivers/char/tpm/tpm_vtpm_proxy.c
+++ b/drivers/char/tpm/tpm_vtpm_proxy.c
@@ -312,7 +312,6 @@ out:
static int vtpm_proxy_tpm_op_send(struct tpm_chip *chip, u8 *buf, size_t count)
{
struct proxy_dev *proxy_dev = dev_get_drvdata(&chip->dev);
- int rc = 0;
if (count > sizeof(proxy_dev->buffer)) {
dev_err(&chip->dev,
@@ -339,7 +338,7 @@ static int vtpm_proxy_tpm_op_send(struct tpm_chip *chip, u8 *buf, size_t count)
wake_up_interruptible(&proxy_dev->wq);
- return rc;
+ return 0;
}
static void vtpm_proxy_tpm_op_cancel(struct tpm_chip *chip)
diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c
index b150f87f38f5..5a327eb7f63a 100644
--- a/drivers/char/tpm/xen-tpmfront.c
+++ b/drivers/char/tpm/xen-tpmfront.c
@@ -173,7 +173,7 @@ static int vtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
return -ETIME;
}
- return count;
+ return 0;
}
static int vtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count)
diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
index 1b39104de8fd..79473086684d 100644
--- a/drivers/crypto/ccp/ccp-ops.c
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -752,8 +752,7 @@ static int ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q,
while (src.sg_wa.bytes_left) {
ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
if (!src.sg_wa.bytes_left) {
- unsigned int nbytes = aes->src_len
- % AES_BLOCK_SIZE;
+ unsigned int nbytes = ilen % AES_BLOCK_SIZE;
if (nbytes) {
op.eom = 1;
@@ -841,11 +840,11 @@ e_tag:
ccp_dm_free(&final_wa);
e_dst:
- if (aes->src_len && !in_place)
+ if (ilen > 0 && !in_place)
ccp_free_data(&dst, cmd_q);
e_src:
- if (aes->src_len)
+ if (ilen > 0)
ccp_free_data(&src, cmd_q);
e_aad:
diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c
index e80bb3c0a203..668e85c5f7bd 100644
--- a/drivers/gpio/gpio-omap.c
+++ b/drivers/gpio/gpio-omap.c
@@ -296,6 +296,22 @@ static void omap_clear_gpio_debounce(struct gpio_bank *bank, unsigned offset)
}
}
+/*
+ * Off mode wake-up capable GPIOs in bank(s) that are in the wakeup domain.
+ * See TRM section for GPIO for "Wake-Up Generation" for the list of GPIOs
+ * in wakeup domain. If bank->non_wakeup_gpios is not configured, assume none
+ * are capable waking up the system from off mode.
+ */
+static bool omap_gpio_is_off_wakeup_capable(struct gpio_bank *bank, u32 gpio_mask)
+{
+ u32 no_wake = bank->non_wakeup_gpios;
+
+ if (no_wake)
+ return !!(~no_wake & gpio_mask);
+
+ return false;
+}
+
static inline void omap_set_gpio_trigger(struct gpio_bank *bank, int gpio,
unsigned trigger)
{
@@ -327,13 +343,7 @@ static inline void omap_set_gpio_trigger(struct gpio_bank *bank, int gpio,
}
/* This part needs to be executed always for OMAP{34xx, 44xx} */
- if (!bank->regs->irqctrl) {
- /* On omap24xx proceed only when valid GPIO bit is set */
- if (bank->non_wakeup_gpios) {
- if (!(bank->non_wakeup_gpios & gpio_bit))
- goto exit;
- }
-
+ if (!bank->regs->irqctrl && !omap_gpio_is_off_wakeup_capable(bank, gpio)) {
/*
* Log the edge gpio and manually trigger the IRQ
* after resume if the input level changes
@@ -346,7 +356,6 @@ static inline void omap_set_gpio_trigger(struct gpio_bank *bank, int gpio,
bank->enabled_non_wakeup_gpios &= ~gpio_bit;
}
-exit:
bank->level_mask =
readl_relaxed(bank->base + bank->regs->leveldetect0) |
readl_relaxed(bank->base + bank->regs->leveldetect1);
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 9fd86a47fde5..7d1975f07bbd 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -685,15 +685,7 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj,
for_each_sgt_page(page, sgt_iter, pages) {
if (obj->mm.dirty)
- /*
- * As this may not be anonymous memory (e.g. shmem)
- * but exist on a real mapping, we have to lock
- * the page in order to dirty it -- holding
- * the page reference is not sufficient to
- * prevent the inode from being truncated.
- * Play safe and take the lock.
- */
- set_page_dirty_lock(page);
+ set_page_dirty(page);
mark_page_accessed(page);
put_page(page);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
index e4e09d47c5c0..59e9d05ab928 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
@@ -389,8 +389,10 @@ static int vmw_recv_msg(struct rpc_channel *channel, void **msg,
break;
}
- if (retries == RETRIES)
+ if (retries == RETRIES) {
+ kfree(reply);
return -EINVAL;
+ }
*msg_len = reply_len;
*msg = reply;
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index edb775e253d7..194b8ab3cdb7 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -529,6 +529,7 @@
#define USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0B4A 0x0b4a
#define USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE 0x134a
#define USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE_094A 0x094a
+#define USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE_0641 0x0641
#define USB_VENDOR_ID_HUION 0x256c
#define USB_DEVICE_ID_HUION_TABLET 0x006e
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index 0bbf21e70f7d..022b5aee602e 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -100,6 +100,7 @@ static const struct hid_blacklist {
{ USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0B4A, HID_QUIRK_ALWAYS_POLL },
{ USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE, HID_QUIRK_ALWAYS_POLL },
{ USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE_094A, HID_QUIRK_ALWAYS_POLL },
+ { USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE_0641, HID_QUIRK_ALWAYS_POLL },
{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_C077, HID_QUIRK_ALWAYS_POLL },
{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_KEYBOARD_G710_PLUS, HID_QUIRK_NOGET },
{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C01A, HID_QUIRK_ALWAYS_POLL },
diff --git a/drivers/hwmon/nct7802.c b/drivers/hwmon/nct7802.c
index 12b94b094c0d..7f8738a83cb9 100644
--- a/drivers/hwmon/nct7802.c
+++ b/drivers/hwmon/nct7802.c
@@ -768,7 +768,7 @@ static struct attribute *nct7802_in_attrs[] = {
&sensor_dev_attr_in3_alarm.dev_attr.attr,
&sensor_dev_attr_in3_beep.dev_attr.attr,
- &sensor_dev_attr_in4_input.dev_attr.attr, /* 17 */
+ &sensor_dev_attr_in4_input.dev_attr.attr, /* 16 */
&sensor_dev_attr_in4_min.dev_attr.attr,
&sensor_dev_attr_in4_max.dev_attr.attr,
&sensor_dev_attr_in4_alarm.dev_attr.attr,
@@ -794,9 +794,9 @@ static umode_t nct7802_in_is_visible(struct kobject *kobj,
if (index >= 6 && index < 11 && (reg & 0x03) != 0x03) /* VSEN1 */
return 0;
- if (index >= 11 && index < 17 && (reg & 0x0c) != 0x0c) /* VSEN2 */
+ if (index >= 11 && index < 16 && (reg & 0x0c) != 0x0c) /* VSEN2 */
return 0;
- if (index >= 17 && (reg & 0x30) != 0x30) /* VSEN3 */
+ if (index >= 16 && (reg & 0x30) != 0x30) /* VSEN3 */
return 0;
return attr->mode;
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 71e35f89a0ec..96592b706e39 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -51,22 +51,12 @@ static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
static int mr_cache_max_order(struct mlx5_ib_dev *dev);
static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
-static bool umr_can_modify_entity_size(struct mlx5_ib_dev *dev)
-{
- return !MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled);
-}
static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
{
return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled);
}
-static bool use_umr(struct mlx5_ib_dev *dev, int order)
-{
- return order <= mr_cache_max_order(dev) &&
- umr_can_modify_entity_size(dev);
-}
-
static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
@@ -1302,7 +1292,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_mr *mr = NULL;
- bool populate_mtts = false;
+ bool use_umr;
struct ib_umem *umem;
int page_shift;
int npages;
@@ -1335,29 +1325,30 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (err < 0)
return ERR_PTR(err);
- if (use_umr(dev, order)) {
+ use_umr = !MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled) &&
+ (!MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled) ||
+ !MLX5_CAP_GEN(dev->mdev, atomic));
+
+ if (order <= mr_cache_max_order(dev) && use_umr) {
mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont,
page_shift, order, access_flags);
if (PTR_ERR(mr) == -EAGAIN) {
mlx5_ib_dbg(dev, "cache empty for order %d\n", order);
mr = NULL;
}
- populate_mtts = false;
} else if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) {
if (access_flags & IB_ACCESS_ON_DEMAND) {
err = -EINVAL;
pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB\n");
goto error;
}
- populate_mtts = true;
+ use_umr = false;
}
if (!mr) {
- if (!umr_can_modify_entity_size(dev))
- populate_mtts = true;
mutex_lock(&dev->slow_path_mutex);
mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
- page_shift, access_flags, populate_mtts);
+ page_shift, access_flags, !use_umr);
mutex_unlock(&dev->slow_path_mutex);
}
@@ -1375,7 +1366,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
update_odp_mr(mr);
#endif
- if (!populate_mtts) {
+ if (use_umr) {
int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE;
if (access_flags & IB_ACCESS_ON_DEMAND)
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 4a1ed237c6d4..a91a8acdc533 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -32,6 +32,8 @@
#include <linux/mem_encrypt.h>
#include <asm/pci-direct.h>
#include <asm/iommu.h>
+#include <asm/apic.h>
+#include <asm/msidef.h>
#include <asm/gart.h>
#include <asm/x86_init.h>
#include <asm/iommu_table.h>
@@ -1925,6 +1927,90 @@ static int iommu_setup_msi(struct amd_iommu *iommu)
return 0;
}
+#define XT_INT_DEST_MODE(x) (((x) & 0x1ULL) << 2)
+#define XT_INT_DEST_LO(x) (((x) & 0xFFFFFFULL) << 8)
+#define XT_INT_VEC(x) (((x) & 0xFFULL) << 32)
+#define XT_INT_DEST_HI(x) ((((x) >> 24) & 0xFFULL) << 56)
+
+/**
+ * Setup the IntCapXT registers with interrupt routing information
+ * based on the PCI MSI capability block registers, accessed via
+ * MMIO MSI address low/hi and MSI data registers.
+ */
+static void iommu_update_intcapxt(struct amd_iommu *iommu)
+{
+ u64 val;
+ u32 addr_lo = readl(iommu->mmio_base + MMIO_MSI_ADDR_LO_OFFSET);
+ u32 addr_hi = readl(iommu->mmio_base + MMIO_MSI_ADDR_HI_OFFSET);
+ u32 data = readl(iommu->mmio_base + MMIO_MSI_DATA_OFFSET);
+ bool dm = (addr_lo >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1;
+ u32 dest = ((addr_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xFF);
+
+ if (x2apic_enabled())
+ dest |= MSI_ADDR_EXT_DEST_ID(addr_hi);
+
+ val = XT_INT_VEC(data & 0xFF) |
+ XT_INT_DEST_MODE(dm) |
+ XT_INT_DEST_LO(dest) |
+ XT_INT_DEST_HI(dest);
+
+ /**
+ * Current IOMMU implemtation uses the same IRQ for all
+ * 3 IOMMU interrupts.
+ */
+ writeq(val, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET);
+ writeq(val, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET);
+ writeq(val, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET);
+}
+
+static void _irq_notifier_notify(struct irq_affinity_notify *notify,
+ const cpumask_t *mask)
+{
+ struct amd_iommu *iommu;
+
+ for_each_iommu(iommu) {
+ if (iommu->dev->irq == notify->irq) {
+ iommu_update_intcapxt(iommu);
+ break;
+ }
+ }
+}
+
+static void _irq_notifier_release(struct kref *ref)
+{
+}
+
+static int iommu_init_intcapxt(struct amd_iommu *iommu)
+{
+ int ret;
+ struct irq_affinity_notify *notify = &iommu->intcapxt_notify;
+
+ /**
+ * IntCapXT requires XTSup=1, which can be inferred
+ * amd_iommu_xt_mode.
+ */
+ if (amd_iommu_xt_mode != IRQ_REMAP_X2APIC_MODE)
+ return 0;
+
+ /**
+ * Also, we need to setup notifier to update the IntCapXT registers
+ * whenever the irq affinity is changed from user-space.
+ */
+ notify->irq = iommu->dev->irq;
+ notify->notify = _irq_notifier_notify,
+ notify->release = _irq_notifier_release,
+ ret = irq_set_affinity_notifier(iommu->dev->irq, notify);
+ if (ret) {
+ pr_err("Failed to register irq affinity notifier (devid=%#x, irq %d)\n",
+ iommu->devid, iommu->dev->irq);
+ return ret;
+ }
+
+ iommu_update_intcapxt(iommu);
+ iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN);
+ return ret;
+}
+
static int iommu_init_msi(struct amd_iommu *iommu)
{
int ret;
@@ -1941,6 +2027,10 @@ static int iommu_init_msi(struct amd_iommu *iommu)
return ret;
enable_faults:
+ ret = iommu_init_intcapxt(iommu);
+ if (ret)
+ return ret;
+
iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
if (iommu->ppr_log != NULL)
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 0b73d0508a3d..9a505104b1cf 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -72,6 +72,12 @@
#define MMIO_PPR_LOG_OFFSET 0x0038
#define MMIO_GA_LOG_BASE_OFFSET 0x00e0
#define MMIO_GA_LOG_TAIL_OFFSET 0x00e8
+#define MMIO_MSI_ADDR_LO_OFFSET 0x015C
+#define MMIO_MSI_ADDR_HI_OFFSET 0x0160
+#define MMIO_MSI_DATA_OFFSET 0x0164
+#define MMIO_INTCAPXT_EVT_OFFSET 0x0170
+#define MMIO_INTCAPXT_PPR_OFFSET 0x0178
+#define MMIO_INTCAPXT_GALOG_OFFSET 0x0180
#define MMIO_CMD_HEAD_OFFSET 0x2000
#define MMIO_CMD_TAIL_OFFSET 0x2008
#define MMIO_EVT_HEAD_OFFSET 0x2010
@@ -160,6 +166,7 @@
#define CONTROL_GALOG_EN 0x1CULL
#define CONTROL_GAINT_EN 0x1DULL
#define CONTROL_XT_EN 0x32ULL
+#define CONTROL_INTCAPXT_EN 0x33ULL
#define CTRL_INV_TO_MASK (7 << CONTROL_INV_TIMEOUT)
#define CTRL_INV_TO_NONE 0
@@ -595,6 +602,11 @@ struct amd_iommu {
u32 flags;
volatile u64 __aligned(8) cmd_sem;
+
+#ifndef __GENKSYMS__
+ /* IRQ notifier for IntCapXT interrupt */
+ struct irq_affinity_notify intcapxt_notify;
+#endif
};
static inline struct amd_iommu *dev_to_amd_iommu(struct device *dev)
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 8459d04ee1c4..7a5ffff2bed3 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -2548,8 +2548,10 @@ static void its_irq_domain_activate(struct irq_domain *domain,
/* Bind the LPI to the first possible CPU */
cpu = cpumask_first_and(cpu_mask, cpu_online_mask);
if (cpu >= nr_cpu_ids) {
- if (its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144)
- return -EINVAL;
+ if (its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144) {
+ pr_err("ITS: Can't bind LPI to non-local node CPU due to Cavium erratum 23144\n");
+ return;
+ }
cpu = cpumask_first(cpu_online_mask);
}
@@ -3073,7 +3075,7 @@ static void its_vpe_irq_domain_activate(struct irq_domain *domain,
/* If we use the list map, we issue VMAPP on demand... */
if (its_list_map)
- return 0;
+ return;
/* Map the VPE to the first possible CPU */
vpe->col_idx = cpumask_first(cpu_online_mask);
diff --git a/drivers/mfd/intel-lpss-pci.c b/drivers/mfd/intel-lpss-pci.c
index 0b9db3ec7254..bf6cec0b0836 100644
--- a/drivers/mfd/intel-lpss-pci.c
+++ b/drivers/mfd/intel-lpss-pci.c
@@ -130,6 +130,19 @@ static const struct intel_lpss_platform_info cnl_i2c_info = {
};
static const struct pci_device_id intel_lpss_pci_ids[] = {
+ /* CML */
+ { PCI_VDEVICE(INTEL, 0x02a8), (kernel_ulong_t)&spt_uart_info },
+ { PCI_VDEVICE(INTEL, 0x02a9), (kernel_ulong_t)&spt_uart_info },
+ { PCI_VDEVICE(INTEL, 0x02aa), (kernel_ulong_t)&spt_info },
+ { PCI_VDEVICE(INTEL, 0x02ab), (kernel_ulong_t)&spt_info },
+ { PCI_VDEVICE(INTEL, 0x02c5), (kernel_ulong_t)&cnl_i2c_info },
+ { PCI_VDEVICE(INTEL, 0x02c6), (kernel_ulong_t)&cnl_i2c_info },
+ { PCI_VDEVICE(INTEL, 0x02c7), (kernel_ulong_t)&spt_uart_info },
+ { PCI_VDEVICE(INTEL, 0x02e8), (kernel_ulong_t)&cnl_i2c_info },
+ { PCI_VDEVICE(INTEL, 0x02e9), (kernel_ulong_t)&cnl_i2c_info },
+ { PCI_VDEVICE(INTEL, 0x02ea), (kernel_ulong_t)&cnl_i2c_info },
+ { PCI_VDEVICE(INTEL, 0x02eb), (kernel_ulong_t)&cnl_i2c_info },
+ { PCI_VDEVICE(INTEL, 0x02fb), (kernel_ulong_t)&spt_info },
/* BXT A-Step */
{ PCI_VDEVICE(INTEL, 0x0aac), (kernel_ulong_t)&bxt_i2c_info },
{ PCI_VDEVICE(INTEL, 0x0aae), (kernel_ulong_t)&bxt_i2c_info },
diff --git a/drivers/mmc/host/cavium.c b/drivers/mmc/host/cavium.c
index 5fef2f81e827..8a97cb47ccc4 100644
--- a/drivers/mmc/host/cavium.c
+++ b/drivers/mmc/host/cavium.c
@@ -374,6 +374,7 @@ static int finish_dma_single(struct cvm_mmc_host *host, struct mmc_data *data)
{
data->bytes_xfered = data->blocks * data->blksz;
data->error = 0;
+ dma_unmap_sg(host->dev, data->sg, data->sg_len, get_dma_dir(data));
return 1;
}
@@ -1048,7 +1049,8 @@ int cvm_mmc_of_slot_probe(struct device *dev, struct cvm_mmc_host *host)
mmc->max_segs = 1;
/* DMA size field can address up to 8 MB */
- mmc->max_seg_size = 8 * 1024 * 1024;
+ mmc->max_seg_size = min_t(unsigned int, 8 * 1024 * 1024,
+ dma_get_max_seg_size(host->dev));
mmc->max_req_size = mmc->max_seg_size;
/* External DMA is in 512 byte blocks */
mmc->max_blk_size = 512;
diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c
index 651b2a6166a6..70ca58eb2fe1 100644
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c
@@ -1590,6 +1590,8 @@ static const struct pci_device_id pci_ids[] = {
SDHCI_PCI_DEVICE(INTEL, CNPH_SD, intel_byt_sd),
SDHCI_PCI_DEVICE(INTEL, ICP_EMMC, intel_glk_emmc),
SDHCI_PCI_DEVICE(INTEL, ICP_SD, intel_byt_sd),
+ SDHCI_PCI_DEVICE(INTEL, CML_EMMC, intel_glk_emmc),
+ SDHCI_PCI_DEVICE(INTEL, CML_SD, intel_byt_sd),
SDHCI_PCI_DEVICE(O2, 8120, o2),
SDHCI_PCI_DEVICE(O2, 8220, o2),
SDHCI_PCI_DEVICE(O2, 8221, o2),
diff --git a/drivers/mmc/host/sdhci-pci.h b/drivers/mmc/host/sdhci-pci.h
index b2676ec2684e..955b859acbb6 100644
--- a/drivers/mmc/host/sdhci-pci.h
+++ b/drivers/mmc/host/sdhci-pci.h
@@ -43,6 +43,8 @@
#define PCI_DEVICE_ID_INTEL_CNPH_SD 0xa375
#define PCI_DEVICE_ID_INTEL_ICP_EMMC 0x34c4
#define PCI_DEVICE_ID_INTEL_ICP_SD 0x34f8
+#define PCI_DEVICE_ID_INTEL_CML_EMMC 0x02c4
+#define PCI_DEVICE_ID_INTEL_CML_SD 0x02f5
#define PCI_DEVICE_ID_SYSKONNECT_8000 0x8000
#define PCI_DEVICE_ID_VIA_95D0 0x95d0
diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c
index bc6eb30aa20f..41c6fa200e74 100644
--- a/drivers/net/ethernet/freescale/fman/fman_memac.c
+++ b/drivers/net/ethernet/freescale/fman/fman_memac.c
@@ -928,7 +928,7 @@ int memac_add_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr)
hash = get_mac_addr_hash_code(addr) & HASH_CTRL_ADDR_MASK;
/* Create element to be added to the driver hash table */
- hash_entry = kmalloc(sizeof(*hash_entry), GFP_KERNEL);
+ hash_entry = kmalloc(sizeof(*hash_entry), GFP_ATOMIC);
if (!hash_entry)
return -ENOMEM;
hash_entry->addr = addr;
diff --git a/drivers/net/ethernet/freescale/fman/fman_tgec.c b/drivers/net/ethernet/freescale/fman/fman_tgec.c
index 40705938eecc..f75b9c11b2d2 100644
--- a/drivers/net/ethernet/freescale/fman/fman_tgec.c
+++ b/drivers/net/ethernet/freescale/fman/fman_tgec.c
@@ -553,7 +553,7 @@ int tgec_add_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr)
hash = (crc >> TGEC_HASH_MCAST_SHIFT) & TGEC_HASH_ADR_MSK;
/* Create element to be added to the driver hash table */
- hash_entry = kmalloc(sizeof(*hash_entry), GFP_KERNEL);
+ hash_entry = kmalloc(sizeof(*hash_entry), GFP_ATOMIC);
if (!hash_entry)
return -ENOMEM;
hash_entry->addr = addr;
diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c
index b4a0ceda1293..975a7b9867a0 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_main.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c
@@ -1475,7 +1475,7 @@ static int ehea_init_port_res(struct ehea_port *port, struct ehea_port_res *pr,
memset(pr, 0, sizeof(struct ehea_port_res));
- pr->tx_bytes = rx_bytes;
+ pr->tx_bytes = tx_bytes;
pr->tx_packets = tx_packets;
pr->rx_bytes = rx_bytes;
pr->rx_packets = rx_packets;
diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c
index 40bd88362e3d..693f9582173b 100644
--- a/drivers/net/ethernet/sis/sis900.c
+++ b/drivers/net/ethernet/sis/sis900.c
@@ -1057,7 +1057,7 @@ sis900_open(struct net_device *net_dev)
sis900_set_mode(sis_priv, HW_SPEED_10_MBPS, FDX_CAPABLE_HALF_SELECTED);
/* Enable all known interrupts by setting the interrupt mask. */
- sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE);
+ sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE | TxDESC);
sw32(cr, RxENA | sr32(cr));
sw32(ier, IE);
@@ -1580,7 +1580,7 @@ static void sis900_tx_timeout(struct net_device *net_dev)
sw32(txdp, sis_priv->tx_ring_dma);
/* Enable all known interrupts by setting the interrupt mask. */
- sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE);
+ sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE | TxDESC);
}
/**
@@ -1620,7 +1620,7 @@ sis900_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
spin_unlock_irqrestore(&sis_priv->lock, flags);
return NETDEV_TX_OK;
}
- sis_priv->tx_ring[entry].cmdsts = (OWN | skb->len);
+ sis_priv->tx_ring[entry].cmdsts = (OWN | INTR | skb->len);
sw32(cr, TxENA | sr32(cr));
sis_priv->cur_tx ++;
@@ -1676,7 +1676,7 @@ static irqreturn_t sis900_interrupt(int irq, void *dev_instance)
do {
status = sr32(isr);
- if ((status & (HIBERR|TxURN|TxERR|TxIDLE|RxORN|RxERR|RxOK)) == 0)
+ if ((status & (HIBERR|TxURN|TxERR|TxIDLE|TxDESC|RxORN|RxERR|RxOK)) == 0)
/* nothing intresting happened */
break;
handled = 1;
@@ -1686,7 +1686,7 @@ static irqreturn_t sis900_interrupt(int irq, void *dev_instance)
/* Rx interrupt */
sis900_rx(net_dev);
- if (status & (TxURN | TxERR | TxIDLE))
+ if (status & (TxURN | TxERR | TxIDLE | TxDESC))
/* Tx interrupt */
sis900_finish_xmit(net_dev);
@@ -1898,8 +1898,8 @@ static void sis900_finish_xmit (struct net_device *net_dev)
if (tx_status & OWN) {
/* The packet is not transmitted yet (owned by hardware) !
- * Note: the interrupt is generated only when Tx Machine
- * is idle, so this is an almost impossible case */
+ * Note: this is an almost impossible condition
+ * in case of TxDESC ('descriptor interrupt') */
break;
}
@@ -2475,7 +2475,7 @@ static int sis900_resume(struct pci_dev *pci_dev)
sis900_set_mode(sis_priv, HW_SPEED_10_MBPS, FDX_CAPABLE_HALF_SELECTED);
/* Enable all known interrupts by setting the interrupt mask. */
- sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE);
+ sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE | TxDESC);
sw32(cr, RxENA | sr32(cr));
sw32(ier, IE);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index 6fd8e3a01b0a..04ce8803f92d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -724,7 +724,7 @@ static int iwl_mvm_sar_get_ewrd_table(struct iwl_mvm *mvm)
for (i = 0; i < n_profiles; i++) {
/* the tables start at element 3 */
- static int pos = 3;
+ int pos = 3;
/* The EWRD profiles officially go from 2 to 4, but we
* save them in sar_profiles[1-3] (because we don't
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
index f7496a33fd01..e34d4acc519a 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
@@ -403,6 +403,8 @@ static void iwl_pcie_tfd_unmap(struct iwl_trans *trans,
DMA_TO_DEVICE);
}
+ meta->tbs = 0;
+
if (trans->cfg->use_tfh) {
struct iwl_tfh_tfd *tfd_fh = (void *)tfd;
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index f8a93d7ad8ff..2af3d13b4f62 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1196,7 +1196,7 @@ static void pci_restore_rebar_state(struct pci_dev *pdev)
pci_read_config_dword(pdev, pos + PCI_REBAR_CTRL, &ctrl);
bar_idx = ctrl & PCI_REBAR_CTRL_BAR_IDX;
res = pdev->resource + bar_idx;
- size = order_base_2((resource_size(res) >> 20) | 1) - 1;
+ size = ilog2(resource_size(res)) - 20;
ctrl &= ~PCI_REBAR_CTRL_BAR_SIZE;
ctrl |= size << PCI_REBAR_CTRL_BAR_SHIFT;
pci_write_config_dword(pdev, pos + PCI_REBAR_CTRL, ctrl);
diff --git a/drivers/regulator/qcom_spmi-regulator.c b/drivers/regulator/qcom_spmi-regulator.c
index c372b244f3da..9896c1328d03 100644
--- a/drivers/regulator/qcom_spmi-regulator.c
+++ b/drivers/regulator/qcom_spmi-regulator.c
@@ -754,18 +754,17 @@ spmi_regulator_common_set_voltage(struct regulator_dev *rdev, unsigned selector)
return spmi_vreg_write(vreg, SPMI_COMMON_REG_VOLTAGE_RANGE, buf, 2);
}
+static int spmi_regulator_common_list_voltage(struct regulator_dev *rdev,
+ unsigned selector);
+
static int spmi_regulator_set_voltage_time_sel(struct regulator_dev *rdev,
unsigned int old_selector, unsigned int new_selector)
{
struct spmi_regulator *vreg = rdev_get_drvdata(rdev);
- const struct spmi_voltage_range *range;
int diff_uV;
- range = spmi_regulator_find_range(vreg);
- if (!range)
- return -EINVAL;
-
- diff_uV = abs(new_selector - old_selector) * range->step_uV;
+ diff_uV = abs(spmi_regulator_common_list_voltage(rdev, new_selector) -
+ spmi_regulator_common_list_voltage(rdev, old_selector));
return DIV_ROUND_UP(diff_uV, vreg->slew_rate);
}
diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c
index cc8fa7f6c7fc..e7fff4cfe9a6 100644
--- a/drivers/s390/block/dasd_alias.c
+++ b/drivers/s390/block/dasd_alias.c
@@ -382,6 +382,20 @@ suborder_not_supported(struct dasd_ccw_req *cqr)
char msg_format;
char msg_no;
+ /*
+ * intrc values ENODEV, ENOLINK and EPERM
+ * will be optained from sleep_on to indicate that no
+ * IO operation can be started
+ */
+ if (cqr->intrc == -ENODEV)
+ return 1;
+
+ if (cqr->intrc == -ENOLINK)
+ return 1;
+
+ if (cqr->intrc == -EPERM)
+ return 1;
+
sense = dasd_get_sense(&cqr->irb);
if (!sense)
return 0;
@@ -446,12 +460,8 @@ static int read_unit_address_configuration(struct dasd_device *device,
lcu->flags &= ~NEED_UAC_UPDATE;
spin_unlock_irqrestore(&lcu->lock, flags);
- do {
- rc = dasd_sleep_on(cqr);
- if (rc && suborder_not_supported(cqr))
- return -EOPNOTSUPP;
- } while (rc && (cqr->retries > 0));
- if (rc) {
+ rc = dasd_sleep_on(cqr);
+ if (rc && !suborder_not_supported(cqr)) {
spin_lock_irqsave(&lcu->lock, flags);
lcu->flags |= NEED_UAC_UPDATE;
spin_unlock_irqrestore(&lcu->lock, flags);
diff --git a/drivers/usb/host/xhci-rcar.c b/drivers/usb/host/xhci-rcar.c
index 226379873ab5..f19a5cb927d6 100644
--- a/drivers/usb/host/xhci-rcar.c
+++ b/drivers/usb/host/xhci-rcar.c
@@ -191,10 +191,15 @@ int xhci_rcar_init_quirk(struct usb_hcd *hcd)
* pointers. So, this driver clears the AC64 bit of xhci->hcc_params
* to call dma_set_coherent_mask(dev, DMA_BIT_MASK(32)) in
* xhci_gen_setup().
+ *
+ * And, since the firmware/internal CPU control the USBSTS.STS_HALT
+ * and the process speed is down when the roothub port enters U3,
+ * long delay for the handshake of STS_HALT is neeed in xhci_suspend().
*/
if (xhci_rcar_is_gen2(hcd->self.controller) ||
- xhci_rcar_is_gen3(hcd->self.controller))
- xhci->quirks |= XHCI_NO_64BIT_SUPPORT;
+ xhci_rcar_is_gen3(hcd->self.controller)) {
+ xhci->quirks |= XHCI_NO_64BIT_SUPPORT | XHCI_SLOW_SUSPEND;
+ }
xhci->quirks |= XHCI_TRUST_TX_LENGTH;
return xhci_rcar_download_firmware(hcd);
diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c
index 83b05a287b0c..ad33adc87597 100644
--- a/drivers/usb/misc/iowarrior.c
+++ b/drivers/usb/misc/iowarrior.c
@@ -877,19 +877,20 @@ static void iowarrior_disconnect(struct usb_interface *interface)
dev = usb_get_intfdata(interface);
mutex_lock(&iowarrior_open_disc_lock);
usb_set_intfdata(interface, NULL);
+ /* prevent device read, write and ioctl */
+ dev->present = 0;
minor = dev->minor;
+ mutex_unlock(&iowarrior_open_disc_lock);
+ /* give back our minor - this will call close() locks need to be dropped at this point*/
- /* give back our minor */
usb_deregister_dev(interface, &iowarrior_class);
mutex_lock(&dev->mutex);
/* prevent device read, write and ioctl */
- dev->present = 0;
mutex_unlock(&dev->mutex);
- mutex_unlock(&iowarrior_open_disc_lock);
if (dev->opened) {
/* There is a process that holds a filedescriptor to the device ,
diff --git a/drivers/usb/misc/yurex.c b/drivers/usb/misc/yurex.c
index 8ee98bc6c468..081570677f24 100644
--- a/drivers/usb/misc/yurex.c
+++ b/drivers/usb/misc/yurex.c
@@ -96,7 +96,6 @@ static void yurex_delete(struct kref *kref)
dev_dbg(&dev->interface->dev, "%s\n", __func__);
- usb_put_dev(dev->udev);
if (dev->cntl_urb) {
usb_kill_urb(dev->cntl_urb);
kfree(dev->cntl_req);
@@ -112,6 +111,7 @@ static void yurex_delete(struct kref *kref)
dev->int_buffer, dev->urb->transfer_dma);
usb_free_urb(dev->urb);
}
+ usb_put_dev(dev->udev);
kfree(dev);
}
diff --git a/drivers/watchdog/f71808e_wdt.c b/drivers/watchdog/f71808e_wdt.c
index 9ef5d80840a4..fb100da253ce 100644
--- a/drivers/watchdog/f71808e_wdt.c
+++ b/drivers/watchdog/f71808e_wdt.c
@@ -329,6 +329,8 @@ static int f71862fg_pin_configure(unsigned short ioaddr)
static int watchdog_start(void)
{
+ u8 tmp;
+
/* Make sure we don't die as soon as the watchdog is enabled below */
int err = watchdog_keepalive();
if (err)
@@ -376,19 +378,18 @@ static int watchdog_start(void)
break;
case f81866:
- /* Set pin 70 to WDTRST# */
- superio_clear_bit(watchdog.sioaddr, SIO_F81866_REG_PORT_SEL,
- BIT(3) | BIT(0));
- superio_set_bit(watchdog.sioaddr, SIO_F81866_REG_PORT_SEL,
- BIT(2));
/*
* GPIO1 Control Register when 27h BIT3:2 = 01 & BIT0 = 0.
* The PIN 70(GPIO15/WDTRST) is controlled by 2Ch:
* BIT5: 0 -> WDTRST#
* 1 -> GPIO15
*/
- superio_clear_bit(watchdog.sioaddr, SIO_F81866_REG_GPIO1,
- BIT(5));
+ tmp = superio_inb(watchdog.sioaddr, SIO_F81866_REG_PORT_SEL);
+ tmp &= ~(BIT(3) | BIT(0));
+ tmp |= BIT(2);
+ superio_outb(watchdog.sioaddr, SIO_F81866_REG_PORT_SEL, tmp);
+
+ superio_clear_bit(watchdog.sioaddr, SIO_F81866_REG_GPIO1, 5);
break;
default:
diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c
index 2f67194ec47a..84b19863d384 100644
--- a/drivers/watchdog/watchdog_dev.c
+++ b/drivers/watchdog/watchdog_dev.c
@@ -984,16 +984,16 @@ static void watchdog_cdev_unregister(struct watchdog_device *wdd)
old_wd_data = NULL;
}
- mutex_lock(&wd_data->lock);
- wd_data->wdd = NULL;
- wdd->wd_data = NULL;
- mutex_unlock(&wd_data->lock);
-
if (watchdog_active(wdd) &&
test_bit(WDOG_STOP_ON_UNREGISTER, &wdd->status)) {
watchdog_stop(wdd);
}
+ mutex_lock(&wd_data->lock);
+ wd_data->wdd = NULL;
+ wdd->wd_data = NULL;
+ mutex_unlock(&wd_data->lock);
+
cancel_delayed_work_sync(&wd_data->work);
kref_put(&wd_data->kref, watchdog_core_data_release);
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 68a5b4d1eca8..e36c83d41336 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -369,8 +369,8 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
/* Convert the size to actually allocated. */
size = 1UL << (order + XEN_PAGE_SHIFT);
- if (((dev_addr + size - 1 <= dma_mask)) ||
- range_straddles_page_boundary(phys, size))
+ if (!WARN_ON((dev_addr + size - 1 > dma_mask) ||
+ range_straddles_page_boundary(phys, size)))
xen_destroy_contiguous_region(phys, order);
xen_free_coherent_pages(hwdev, size, vaddr, (dma_addr_t)phys, attrs);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2be20d32cc3b..d0451b769262 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -48,6 +48,7 @@
struct btrfs_trans_handle;
struct btrfs_transaction;
struct btrfs_pending_snapshot;
+struct btrfs_delayed_ref_root;
extern struct kmem_cache *btrfs_trans_handle_cachep;
extern struct kmem_cache *btrfs_transaction_cachep;
extern struct kmem_cache *btrfs_bit_radix_cachep;
@@ -777,6 +778,13 @@ bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr);
*/
#define BTRFS_FS_NEED_ASYNC_COMMIT 17
+/*
+ * Indicate that balance has been set up from the ioctl and is in the main
+ * phase. The fs_info::balance_ctl is initialized.
+ * Set and cleared while holding fs_info::balance_mutex.
+ */
+#define BTRFS_FS_BALANCE_RUNNING 18
+
struct btrfs_fs_info {
u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
unsigned long flags;
@@ -1056,7 +1064,6 @@ struct btrfs_fs_info {
/* restriper state */
spinlock_t balance_lock;
struct mutex balance_mutex;
- atomic_t balance_running;
atomic_t balance_pause_req;
atomic_t balance_cancel_req;
struct btrfs_balance_control *balance_ctl;
@@ -1172,6 +1179,12 @@ struct btrfs_fs_info {
u32 sectorsize;
u32 stripesize;
+ /*
+ * Number of send operations in progress.
+ * Updated while holding fs_info::balance_mutex.
+ */
+ int send_in_progress;
+
/* Block groups and devices containing active swapfiles. */
spinlock_t swapfile_pins_lock;
struct rb_root swapfile_pins;
@@ -1354,6 +1367,12 @@ struct btrfs_root {
* manipulation with the read-only status via SUBVOL_SETFLAGS
*/
int send_in_progress;
+ /*
+ * Number of currently running deduplication operations that have a
+ * destination inode belonging to this root. Protected by the lock
+ * root_item_lock.
+ */
+ int dedupe_in_progress;
struct btrfs_subvolume_writers *subv_writers;
atomic_t will_be_snapshoted;
@@ -2705,6 +2724,9 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, unsigned long count);
int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info,
unsigned long count, u64 transid, int wait);
+void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_delayed_ref_head *head);
int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len);
int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
@@ -3262,6 +3284,8 @@ static inline void btrfs_force_ra(struct address_space *mapping,
page_cache_sync_readahead(mapping, ra, file, offset, req_size);
}
+void __btrfs_del_delalloc_inode(struct btrfs_root *root,
+ struct btrfs_inode *inode);
struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index);
int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 282a9d725448..94086b6358c2 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2221,7 +2221,6 @@ static void btrfs_init_balance(struct btrfs_fs_info *fs_info)
{
spin_lock_init(&fs_info->balance_lock);
mutex_init(&fs_info->balance_mutex);
- atomic_set(&fs_info->balance_running, 0);
atomic_set(&fs_info->balance_pause_req, 0);
atomic_set(&fs_info->balance_cancel_req, 0);
fs_info->balance_ctl = NULL;
@@ -2679,6 +2678,8 @@ int open_ctree(struct super_block *sb,
fs_info->sectorsize = 4096;
fs_info->stripesize = 4096;
+ fs_info->send_in_progress = 0;
+
spin_lock_init(&fs_info->swapfile_pins_lock);
fs_info->swapfile_pins = RB_ROOT;
@@ -3931,6 +3932,7 @@ void close_ctree(struct btrfs_fs_info *fs_info)
set_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags);
btrfs_free_qgroup_config(fs_info);
+ ASSERT(list_empty(&fs_info->delalloc_roots));
if (percpu_counter_sum(&fs_info->delalloc_bytes)) {
btrfs_info(fs_info, "at unmount delalloc count %lld",
@@ -4235,15 +4237,15 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info)
static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info)
{
+ /* cleanup FS via transaction */
+ btrfs_cleanup_transaction(fs_info);
+
mutex_lock(&fs_info->cleaner_mutex);
btrfs_run_delayed_iputs(fs_info);
mutex_unlock(&fs_info->cleaner_mutex);
down_write(&fs_info->cleanup_work_sem);
up_write(&fs_info->cleanup_work_sem);
-
- /* cleanup FS via transaction */
- btrfs_cleanup_transaction(fs_info);
}
static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
@@ -4347,6 +4349,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
if (pin_bytes)
btrfs_pin_extent(fs_info, head->bytenr,
head->num_bytes, 1);
+ btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
btrfs_put_delayed_ref_head(head);
cond_resched();
spin_lock(&delayed_refs->lock);
@@ -4368,19 +4371,23 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
list_splice_init(&root->delalloc_inodes, &splice);
while (!list_empty(&splice)) {
+ struct inode *inode = NULL;
btrfs_inode = list_first_entry(&splice, struct btrfs_inode,
delalloc_inodes);
-
- list_del_init(&btrfs_inode->delalloc_inodes);
- clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
- &btrfs_inode->runtime_flags);
+ __btrfs_del_delalloc_inode(root, btrfs_inode);
spin_unlock(&root->delalloc_lock);
- btrfs_invalidate_inodes(btrfs_inode->root);
-
+ /*
+ * Make sure we get a live inode and that it'll not disappear
+ * meanwhile.
+ */
+ inode = igrab(&btrfs_inode->vfs_inode);
+ if (inode) {
+ invalidate_inode_pages2(inode->i_mapping);
+ iput(inode);
+ }
spin_lock(&root->delalloc_lock);
}
-
spin_unlock(&root->delalloc_lock);
}
@@ -4396,7 +4403,6 @@ static void btrfs_destroy_all_delalloc_inodes(struct btrfs_fs_info *fs_info)
while (!list_empty(&splice)) {
root = list_first_entry(&splice, struct btrfs_root,
delalloc_root);
- list_del_init(&root->delalloc_root);
root = btrfs_grab_fs_root(root);
BUG_ON(!root);
spin_unlock(&fs_info->delalloc_root_lock);
@@ -4454,13 +4460,23 @@ static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
unpin = pinned_extents;
again:
while (1) {
+ /*
+ * The btrfs_finish_extent_commit() may get the same range as
+ * ours between find_first_extent_bit and clear_extent_dirty.
+ * Hence, hold the unused_bg_unpin_mutex to avoid double unpin
+ * the same extent range.
+ */
+ mutex_lock(&fs_info->unused_bg_unpin_mutex);
ret = find_first_extent_bit(unpin, 0, &start, &end,
EXTENT_DIRTY, NULL);
- if (ret)
+ if (ret) {
+ mutex_unlock(&fs_info->unused_bg_unpin_mutex);
break;
+ }
clear_extent_dirty(unpin, start, end);
btrfs_error_unpin_extent_range(fs_info, start, end);
+ mutex_unlock(&fs_info->unused_bg_unpin_mutex);
cond_resched();
}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 56fdcfb01406..4fc0b1b60833 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2493,6 +2493,33 @@ static int cleanup_extent_op(struct btrfs_trans_handle *trans,
return ret ? ret : 1;
}
+void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_delayed_ref_head *head)
+{
+ if (head->total_ref_mod < 0) {
+ struct btrfs_space_info *space_info;
+ u64 flags;
+
+ if (head->is_data)
+ flags = BTRFS_BLOCK_GROUP_DATA;
+ else if (head->is_system)
+ flags = BTRFS_BLOCK_GROUP_SYSTEM;
+ else
+ flags = BTRFS_BLOCK_GROUP_METADATA;
+ space_info = __find_space_info(fs_info, flags);
+ ASSERT(space_info);
+ percpu_counter_add(&space_info->total_bytes_pinned,
+ -head->num_bytes);
+
+ if (head->is_data) {
+ spin_lock(&delayed_refs->lock);
+ delayed_refs->pending_csums -= head->num_bytes;
+ spin_unlock(&delayed_refs->lock);
+ }
+ }
+}
+
static int cleanup_ref_head(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_head *head)
@@ -2530,29 +2557,6 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
spin_unlock(&head->lock);
atomic_dec(&delayed_refs->num_entries);
- trace_run_delayed_ref_head(fs_info, head, 0);
-
- if (head->total_ref_mod < 0) {
- struct btrfs_space_info *space_info;
- u64 flags;
-
- if (head->is_data)
- flags = BTRFS_BLOCK_GROUP_DATA;
- else if (head->is_system)
- flags = BTRFS_BLOCK_GROUP_SYSTEM;
- else
- flags = BTRFS_BLOCK_GROUP_METADATA;
- space_info = __find_space_info(fs_info, flags);
- ASSERT(space_info);
- percpu_counter_add(&space_info->total_bytes_pinned,
- -head->num_bytes);
- if (head->is_data) {
- spin_lock(&delayed_refs->lock);
- delayed_refs->pending_csums -= head->num_bytes;
- spin_unlock(&delayed_refs->lock);
- }
- }
-
if (head->must_insert_reserved) {
btrfs_pin_extent(fs_info, head->bytenr,
head->num_bytes, 1);
@@ -2562,6 +2566,9 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
}
}
+ btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
+
+ trace_run_delayed_ref_head(fs_info, head, 0);
btrfs_delayed_ref_unlock(head);
btrfs_put_delayed_ref_head(head);
return 0;
@@ -7107,6 +7114,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
if (head->must_insert_reserved)
ret = 1;
+ btrfs_cleanup_ref_head_accounting(trans->fs_info, delayed_refs, head);
mutex_unlock(&head->mutex);
btrfs_put_delayed_ref_head(head);
return ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5d2c016de5bd..73dae3757a61 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1742,12 +1742,12 @@ static void btrfs_add_delalloc_inodes(struct btrfs_root *root,
spin_unlock(&root->delalloc_lock);
}
-static void btrfs_del_delalloc_inode(struct btrfs_root *root,
- struct btrfs_inode *inode)
+
+void __btrfs_del_delalloc_inode(struct btrfs_root *root,
+ struct btrfs_inode *inode)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
- spin_lock(&root->delalloc_lock);
if (!list_empty(&inode->delalloc_inodes)) {
list_del_init(&inode->delalloc_inodes);
clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
@@ -1760,6 +1760,13 @@ static void btrfs_del_delalloc_inode(struct btrfs_root *root,
spin_unlock(&fs_info->delalloc_root_lock);
}
}
+}
+
+static void btrfs_del_delalloc_inode(struct btrfs_root *root,
+ struct btrfs_inode *inode)
+{
+ spin_lock(&root->delalloc_lock);
+ __btrfs_del_delalloc_inode(root, inode);
spin_unlock(&root->delalloc_lock);
}
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index cc5530da41ec..3962d4356b85 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3257,10 +3257,23 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
int num_pages = PAGE_ALIGN(BTRFS_MAX_DEDUPE_LEN) >> PAGE_SHIFT;
bool same_inode = (src == dst);
u64 i, tail_len, chunk_count;
+ struct btrfs_root *root_dst = BTRFS_I(dst)->root;
if (olen == 0)
return 0;
+ spin_lock(&root_dst->root_item_lock);
+ if (root_dst->send_in_progress) {
+ btrfs_warn_rl(root_dst->fs_info,
+"cannot deduplicate to root %llu while send operations are using it (%d in progress)",
+ root_dst->root_key.objectid,
+ root_dst->send_in_progress);
+ spin_unlock(&root_dst->root_item_lock);
+ return -EAGAIN;
+ }
+ root_dst->dedupe_in_progress++;
+ spin_unlock(&root_dst->root_item_lock);
+
if (same_inode)
inode_lock(src);
else
@@ -3329,6 +3342,10 @@ out_unlock:
else
unlock_two_nondirectories(src, dst);
+ spin_lock(&root_dst->root_item_lock);
+ root_dst->dedupe_in_progress--;
+ spin_unlock(&root_dst->root_item_lock);
+
return ret;
}
@@ -4706,7 +4723,7 @@ void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
bargs->flags = bctl->flags;
- if (atomic_read(&fs_info->balance_running))
+ if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags))
bargs->state |= BTRFS_BALANCE_STATE_RUNNING;
if (atomic_read(&fs_info->balance_pause_req))
bargs->state |= BTRFS_BALANCE_STATE_PAUSE_REQ;
@@ -4759,14 +4776,14 @@ again:
mutex_lock(&fs_info->balance_mutex);
if (fs_info->balance_ctl) {
/* this is either (2) or (3) */
- if (!atomic_read(&fs_info->balance_running)) {
+ if (!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
mutex_unlock(&fs_info->balance_mutex);
if (!mutex_trylock(&fs_info->volume_mutex))
goto again;
mutex_lock(&fs_info->balance_mutex);
if (fs_info->balance_ctl &&
- !atomic_read(&fs_info->balance_running)) {
+ !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
/* this is (3) */
need_unlock = false;
goto locked;
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index a17e775a4a89..ea4d24381c70 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -760,6 +760,7 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info)
u64 total = 0;
int i;
+again:
do {
enqueued = 0;
mutex_lock(&fs_devices->device_list_mutex);
@@ -771,6 +772,10 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info)
mutex_unlock(&fs_devices->device_list_mutex);
total += enqueued;
} while (enqueued && total < 10000);
+ if (fs_devices->seed) {
+ fs_devices = fs_devices->seed;
+ goto again;
+ }
if (enqueued == 0)
return;
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 0cfad0b15c4e..bb1861ca7ddf 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -6621,6 +6621,13 @@ static void btrfs_root_dec_send_in_progress(struct btrfs_root* root)
spin_unlock(&root->root_item_lock);
}
+static void dedupe_in_progress_warn(const struct btrfs_root *root)
+{
+ btrfs_warn_rl(root->fs_info,
+"cannot use root %llu for send while deduplications on it are in progress (%d in progress)",
+ root->root_key.objectid, root->dedupe_in_progress);
+}
+
long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
{
int ret = 0;
@@ -6645,6 +6652,11 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
* making it RW. This also protects against deletion.
*/
spin_lock(&send_root->root_item_lock);
+ if (btrfs_root_readonly(send_root) && send_root->dedupe_in_progress) {
+ dedupe_in_progress_warn(send_root);
+ spin_unlock(&send_root->root_item_lock);
+ return -EAGAIN;
+ }
send_root->send_in_progress++;
spin_unlock(&send_root->root_item_lock);
@@ -6789,6 +6801,13 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
ret = -EPERM;
goto out;
}
+ if (clone_root->dedupe_in_progress) {
+ dedupe_in_progress_warn(clone_root);
+ spin_unlock(&clone_root->root_item_lock);
+ srcu_read_unlock(&fs_info->subvol_srcu, index);
+ ret = -EAGAIN;
+ goto out;
+ }
clone_root->send_in_progress++;
spin_unlock(&clone_root->root_item_lock);
srcu_read_unlock(&fs_info->subvol_srcu, index);
@@ -6823,6 +6842,13 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
ret = -EPERM;
goto out;
}
+ if (sctx->parent_root->dedupe_in_progress) {
+ dedupe_in_progress_warn(sctx->parent_root);
+ spin_unlock(&sctx->parent_root->root_item_lock);
+ srcu_read_unlock(&fs_info->subvol_srcu, index);
+ ret = -EAGAIN;
+ goto out;
+ }
spin_unlock(&sctx->parent_root->root_item_lock);
srcu_read_unlock(&fs_info->subvol_srcu, index);
@@ -6849,9 +6875,23 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
if (ret)
goto out;
+ mutex_lock(&fs_info->balance_mutex);
+ if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
+ mutex_unlock(&fs_info->balance_mutex);
+ btrfs_warn_rl(fs_info,
+ "cannot run send because a balance operation is in progress");
+ ret = -EAGAIN;
+ goto out;
+ }
+ fs_info->send_in_progress++;
+ mutex_unlock(&fs_info->balance_mutex);
+
current->journal_info = BTRFS_SEND_TRANS_STUB;
ret = send_subvol(sctx);
current->journal_info = NULL;
+ mutex_lock(&fs_info->balance_mutex);
+ fs_info->send_in_progress--;
+ mutex_unlock(&fs_info->balance_mutex);
if (ret < 0)
goto out;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 50e19d33e73e..5ce9180030e6 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -543,7 +543,7 @@ again:
* and then we deadlock with somebody doing a freeze.
*
* If we are ATTACH, it means we just want to catch the current
- * transaction and commit it, so we needn't do sb_start_intwrite().
+ * transaction and commit it, so we needn't do sb_start_intwrite().
*/
if (type & __TRANS_FREEZABLE)
sb_start_intwrite(fs_info->sb);
@@ -1921,6 +1921,18 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,
kmem_cache_free(btrfs_trans_handle_cachep, trans);
}
+/*
+ * Release reserved delayed ref space of all pending block groups of the
+ * transaction and remove them from the list
+ */
+static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans)
+{
+ struct btrfs_block_group_cache *block_group, *tmp;
+
+ list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list)
+ list_del_init(&block_group->bg_list);
+}
+
static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
{
if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
@@ -2322,6 +2334,7 @@ scrub_continue:
btrfs_scrub_continue(fs_info);
cleanup_transaction:
btrfs_trans_release_metadata(trans, fs_info);
+ btrfs_cleanup_pending_block_groups(trans);
btrfs_trans_release_chunk_metadata(trans);
trans->block_rsv = NULL;
btrfs_warn(fs_info, "Skipping commit of aborted transaction.");
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 97105e38938c..c43c0869fedb 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4125,6 +4125,14 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
bctl->sys.target));
}
+ if (fs_info->send_in_progress) {
+ btrfs_warn_rl(fs_info,
+"cannot run balance while send operations are in progress (%d in progress)",
+ fs_info->send_in_progress);
+ ret = -EAGAIN;
+ goto out;
+ }
+
ret = insert_balance_item(fs_info, bctl);
if (ret && ret != -EEXIST)
goto out;
@@ -4139,13 +4147,14 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
spin_unlock(&fs_info->balance_lock);
}
- atomic_inc(&fs_info->balance_running);
+ ASSERT(!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
+ set_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags);
mutex_unlock(&fs_info->balance_mutex);
ret = __btrfs_balance(fs_info);
mutex_lock(&fs_info->balance_mutex);
- atomic_dec(&fs_info->balance_running);
+ clear_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags);
if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
fs_info->num_tolerated_disk_barrier_failures =
@@ -4284,16 +4293,16 @@ int btrfs_pause_balance(struct btrfs_fs_info *fs_info)
return -ENOTCONN;
}
- if (atomic_read(&fs_info->balance_running)) {
+ if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
atomic_inc(&fs_info->balance_pause_req);
mutex_unlock(&fs_info->balance_mutex);
wait_event(fs_info->balance_wait_q,
- atomic_read(&fs_info->balance_running) == 0);
+ !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
mutex_lock(&fs_info->balance_mutex);
/* we are good with balance_ctl ripped off from under us */
- BUG_ON(atomic_read(&fs_info->balance_running));
+ BUG_ON(test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
atomic_dec(&fs_info->balance_pause_req);
} else {
ret = -ENOTCONN;
@@ -4319,10 +4328,10 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
* if we are running just wait and return, balance item is
* deleted in btrfs_balance in this case
*/
- if (atomic_read(&fs_info->balance_running)) {
+ if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
mutex_unlock(&fs_info->balance_mutex);
wait_event(fs_info->balance_wait_q,
- atomic_read(&fs_info->balance_running) == 0);
+ !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
mutex_lock(&fs_info->balance_mutex);
} else {
/* __cancel_balance needs volume_mutex */
@@ -4336,7 +4345,8 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
mutex_unlock(&fs_info->volume_mutex);
}
- BUG_ON(fs_info->balance_ctl || atomic_read(&fs_info->balance_running));
+ BUG_ON(fs_info->balance_ctl ||
+ test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
atomic_dec(&fs_info->balance_cancel_req);
mutex_unlock(&fs_info->balance_mutex);
return 0;
diff --git a/fs/exec.c b/fs/exec.c
index 04d473edbd1a..1eb10ef0792b 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1795,7 +1795,7 @@ static int do_execveat_common(int fd, struct filename *filename,
current->fs->in_exec = 0;
current->in_execve = 0;
acct_update_integrals(current);
- task_numa_free(current);
+ task_numa_free(current, false);
free_bprm(bprm);
kfree(pathbuf);
putname(filename);
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index f9aefcda5854..cc7d1f094393 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -637,10 +637,8 @@ static void flush_descriptor(journal_t *journal,
{
jbd2_journal_revoke_header_t *header;
- if (is_journal_aborted(journal)) {
- put_bh(descriptor);
+ if (is_journal_aborted(journal))
return;
- }
header = (jbd2_journal_revoke_header_t *)descriptor->b_data;
header->r_count = cpu_to_be32(offset);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index d87e7bdd10cd..67bac64d1c1e 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -418,7 +418,8 @@ nfs_unroll_locks_and_wait(struct inode *inode, struct nfs_page *head,
*/
static void
nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
- struct nfs_page *old_head)
+ struct nfs_page *old_head,
+ struct inode *inode)
{
while (destroy_list) {
struct nfs_page *subreq = destroy_list;
@@ -443,9 +444,12 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
nfs_page_group_clear_bits(subreq);
/* release the PG_INODE_REF reference */
- if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags))
+ if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags)) {
nfs_release_request(subreq);
- else
+ spin_lock(&inode->i_lock);
+ NFS_I(inode)->nrequests--;
+ spin_unlock(&inode->i_lock);
+ } else
WARN_ON_ONCE(1);
} else {
WARN_ON_ONCE(test_bit(PG_CLEAN, &subreq->wb_flags));
@@ -574,25 +578,24 @@ try_again:
head->wb_bytes = total_bytes;
}
+ /* Postpone destruction of this request */
+ if (test_and_clear_bit(PG_REMOVE, &head->wb_flags)) {
+ set_bit(PG_INODE_REF, &head->wb_flags);
+ kref_get(&head->wb_kref);
+ NFS_I(inode)->nrequests++;
+ }
+
/*
* prepare head request to be added to new pgio descriptor
*/
nfs_page_group_clear_bits(head);
- /*
- * some part of the group was still on the inode list - otherwise
- * the group wouldn't be involved in async write.
- * grab a reference for the head request, iff it needs one.
- */
- if (!test_and_set_bit(PG_INODE_REF, &head->wb_flags))
- kref_get(&head->wb_kref);
-
nfs_page_group_unlock(head);
/* drop lock to clean uprequests on destroy list */
spin_unlock(&inode->i_lock);
- nfs_destroy_unlinked_subrequests(destroy_list, head);
+ nfs_destroy_unlinked_subrequests(destroy_list, head, inode);
/* still holds ref on head from nfs_page_find_head_request_locked
* and still has lock on head from lock loop */
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index 6bd916bd35e2..5f17641f040f 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -34,6 +34,9 @@
#include "xfs_trans_space.h"
#include "xfs_trace.h"
+#define _ALLOC true
+#define _FREE false
+
/*
* A buffer has a format structure overhead in the log in addition
* to the data, so we need to take this into account when reserving
@@ -132,43 +135,77 @@ xfs_calc_inode_res(
}
/*
- * The free inode btree is a conditional feature and the log reservation
- * requirements differ slightly from that of the traditional inode allocation
- * btree. The finobt tracks records for inode chunks with at least one free
- * inode. A record can be removed from the tree for an inode allocation
- * or free and thus the finobt reservation is unconditional across:
+ * Inode btree record insertion/removal modifies the inode btree and free space
+ * btrees (since the inobt does not use the agfl). This requires the following
+ * reservation:
*
- * - inode allocation
- * - inode free
- * - inode chunk allocation
+ * the inode btree: max depth * blocksize
+ * the allocation btrees: 2 trees * (max depth - 1) * block size
*
- * The 'modify' param indicates to include the record modification scenario. The
- * 'alloc' param indicates to include the reservation for free space btree
- * modifications on behalf of finobt modifications. This is required only for
- * transactions that do not already account for free space btree modifications.
+ * The caller must account for SB and AG header modifications, etc.
+ */
+STATIC uint
+xfs_calc_inobt_res(
+ struct xfs_mount *mp)
+{
+ return xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
+ XFS_FSB_TO_B(mp, 1));
+}
+
+/*
+ * The free inode btree is a conditional feature. The behavior differs slightly
+ * from that of the traditional inode btree in that the finobt tracks records
+ * for inode chunks with at least one free inode. A record can be removed from
+ * the tree during individual inode allocation. Therefore the finobt
+ * reservation is unconditional for both the inode chunk allocation and
+ * individual inode allocation (modify) cases.
*
- * the free inode btree: max depth * block size
- * the allocation btrees: 2 trees * (max depth - 1) * block size
- * the free inode btree entry: block size
+ * Behavior aside, the reservation for finobt modification is equivalent to the
+ * traditional inobt: cover a full finobt shape change plus block allocation.
*/
STATIC uint
xfs_calc_finobt_res(
- struct xfs_mount *mp,
- int alloc,
- int modify)
+ struct xfs_mount *mp)
{
- uint res;
-
if (!xfs_sb_version_hasfinobt(&mp->m_sb))
return 0;
- res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1));
- if (alloc)
- res += xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
- XFS_FSB_TO_B(mp, 1));
- if (modify)
- res += (uint)XFS_FSB_TO_B(mp, 1);
+ return xfs_calc_inobt_res(mp);
+}
+/*
+ * Calculate the reservation required to allocate or free an inode chunk. This
+ * includes:
+ *
+ * the allocation btrees: 2 trees * (max depth - 1) * block size
+ * the inode chunk: m_ialloc_blks * N
+ *
+ * The size N of the inode chunk reservation depends on whether it is for
+ * allocation or free and which type of create transaction is in use. An inode
+ * chunk free always invalidates the buffers and only requires reservation for
+ * headers (N == 0). An inode chunk allocation requires a chunk sized
+ * reservation on v4 and older superblocks to initialize the chunk. No chunk
+ * reservation is required for allocation on v5 supers, which use ordered
+ * buffers to initialize.
+ */
+STATIC uint
+xfs_calc_inode_chunk_res(
+ struct xfs_mount *mp,
+ bool alloc)
+{
+ uint res, size = 0;
+
+ res = xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
+ XFS_FSB_TO_B(mp, 1));
+ if (alloc) {
+ /* icreate tx uses ordered buffers */
+ if (xfs_sb_version_hascrc(&mp->m_sb))
+ return res;
+ size = XFS_FSB_TO_B(mp, 1);
+ }
+
+ res += xfs_calc_buf_res(mp->m_ialloc_blks, size);
return res;
}
@@ -232,8 +269,6 @@ xfs_calc_write_reservation(
* the super block to reflect the freed blocks: sector size
* worst case split in allocation btrees per extent assuming 4 extents:
* 4 exts * 2 trees * (2 * max depth - 1) * block size
- * the inode btree: max depth * blocksize
- * the allocation btrees: 2 trees * (max depth - 1) * block size
*/
STATIC uint
xfs_calc_itruncate_reservation(
@@ -245,12 +280,7 @@ xfs_calc_itruncate_reservation(
XFS_FSB_TO_B(mp, 1))),
(xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4),
- XFS_FSB_TO_B(mp, 1)) +
- xfs_calc_buf_res(5, 0) +
- xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
- XFS_FSB_TO_B(mp, 1)) +
- xfs_calc_buf_res(2 + mp->m_ialloc_blks +
- mp->m_in_maxlevels, 0)));
+ XFS_FSB_TO_B(mp, 1))));
}
/*
@@ -282,13 +312,14 @@ xfs_calc_rename_reservation(
* For removing an inode from unlinked list at first, we can modify:
* the agi hash list and counters: sector size
* the on disk inode before ours in the agi hash list: inode cluster size
+ * the on disk inode in the agi hash list: inode cluster size
*/
STATIC uint
xfs_calc_iunlink_remove_reservation(
struct xfs_mount *mp)
{
return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
- max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size);
+ 2 * max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size);
}
/*
@@ -320,13 +351,13 @@ xfs_calc_link_reservation(
/*
* For adding an inode to unlinked list we can modify:
* the agi hash list: sector size
- * the unlinked inode: inode size
+ * the on disk inode: inode cluster size
*/
STATIC uint
xfs_calc_iunlink_add_reservation(xfs_mount_t *mp)
{
return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
- xfs_calc_inode_res(mp, 1);
+ max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size);
}
/*
@@ -379,45 +410,16 @@ xfs_calc_create_resv_modify(
xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
(uint)XFS_FSB_TO_B(mp, 1) +
xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) +
- xfs_calc_finobt_res(mp, 1, 1);
-}
-
-/*
- * For create we can allocate some inodes giving:
- * the agi and agf of the ag getting the new inodes: 2 * sectorsize
- * the superblock for the nlink flag: sector size
- * the inode blocks allocated: mp->m_ialloc_blks * blocksize
- * the inode btree: max depth * blocksize
- * the allocation btrees: 2 trees * (max depth - 1) * block size
- */
-STATIC uint
-xfs_calc_create_resv_alloc(
- struct xfs_mount *mp)
-{
- return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
- mp->m_sb.sb_sectsize +
- xfs_calc_buf_res(mp->m_ialloc_blks, XFS_FSB_TO_B(mp, 1)) +
- xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
- xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
- XFS_FSB_TO_B(mp, 1));
-}
-
-STATIC uint
-__xfs_calc_create_reservation(
- struct xfs_mount *mp)
-{
- return XFS_DQUOT_LOGRES(mp) +
- MAX(xfs_calc_create_resv_alloc(mp),
- xfs_calc_create_resv_modify(mp));
+ xfs_calc_finobt_res(mp);
}
/*
* For icreate we can allocate some inodes giving:
* the agi and agf of the ag getting the new inodes: 2 * sectorsize
* the superblock for the nlink flag: sector size
- * the inode btree: max depth * blocksize
- * the allocation btrees: 2 trees * (max depth - 1) * block size
- * the finobt (record insertion)
+ * the inode chunk (allocation, optional init)
+ * the inobt (record insertion)
+ * the finobt (optional, record insertion)
*/
STATIC uint
xfs_calc_icreate_resv_alloc(
@@ -425,10 +427,9 @@ xfs_calc_icreate_resv_alloc(
{
return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
mp->m_sb.sb_sectsize +
- xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
- xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
- XFS_FSB_TO_B(mp, 1)) +
- xfs_calc_finobt_res(mp, 0, 0);
+ xfs_calc_inode_chunk_res(mp, _ALLOC) +
+ xfs_calc_inobt_res(mp) +
+ xfs_calc_finobt_res(mp);
}
STATIC uint
@@ -440,26 +441,12 @@ xfs_calc_icreate_reservation(xfs_mount_t *mp)
}
STATIC uint
-xfs_calc_create_reservation(
- struct xfs_mount *mp)
-{
- if (xfs_sb_version_hascrc(&mp->m_sb))
- return xfs_calc_icreate_reservation(mp);
- return __xfs_calc_create_reservation(mp);
-
-}
-
-STATIC uint
xfs_calc_create_tmpfile_reservation(
struct xfs_mount *mp)
{
uint res = XFS_DQUOT_LOGRES(mp);
- if (xfs_sb_version_hascrc(&mp->m_sb))
- res += xfs_calc_icreate_resv_alloc(mp);
- else
- res += xfs_calc_create_resv_alloc(mp);
-
+ res += xfs_calc_icreate_resv_alloc(mp);
return res + xfs_calc_iunlink_add_reservation(mp);
}
@@ -470,7 +457,7 @@ STATIC uint
xfs_calc_mkdir_reservation(
struct xfs_mount *mp)
{
- return xfs_calc_create_reservation(mp);
+ return xfs_calc_icreate_reservation(mp);
}
@@ -483,20 +470,24 @@ STATIC uint
xfs_calc_symlink_reservation(
struct xfs_mount *mp)
{
- return xfs_calc_create_reservation(mp) +
+ return xfs_calc_icreate_reservation(mp) +
xfs_calc_buf_res(1, XFS_SYMLINK_MAXLEN);
}
/*
* In freeing an inode we can modify:
* the inode being freed: inode size
- * the super block free inode counter: sector size
- * the agi hash list and counters: sector size
- * the inode btree entry: block size
- * the on disk inode before ours in the agi hash list: inode cluster size
- * the inode btree: max depth * blocksize
- * the allocation btrees: 2 trees * (max depth - 1) * block size
+ * the super block free inode counter, AGF and AGFL: sector size
+ * the on disk inode (agi unlinked list removal)
+ * the inode chunk (invalidated, headers only)
+ * the inode btree
* the finobt (record insertion, removal or modification)
+ *
+ * Note that the inode chunk res. includes an allocfree res. for freeing of the
+ * inode chunk. This is technically extraneous because the inode chunk free is
+ * deferred (it occurs after a transaction roll). Include the extra reservation
+ * anyways since we've had reports of ifree transaction overruns due to too many
+ * agfl fixups during inode chunk frees.
*/
STATIC uint
xfs_calc_ifree_reservation(
@@ -504,15 +495,11 @@ xfs_calc_ifree_reservation(
{
return XFS_DQUOT_LOGRES(mp) +
xfs_calc_inode_res(mp, 1) +
- xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
+ xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
xfs_calc_iunlink_remove_reservation(mp) +
- xfs_calc_buf_res(1, 0) +
- xfs_calc_buf_res(2 + mp->m_ialloc_blks +
- mp->m_in_maxlevels, 0) +
- xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
- XFS_FSB_TO_B(mp, 1)) +
- xfs_calc_finobt_res(mp, 0, 1);
+ xfs_calc_inode_chunk_res(mp, _FREE) +
+ xfs_calc_inobt_res(mp) +
+ xfs_calc_finobt_res(mp);
}
/*
@@ -842,7 +829,7 @@ xfs_trans_resv_calc(
resp->tr_symlink.tr_logcount = XFS_SYMLINK_LOG_COUNT;
resp->tr_symlink.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
- resp->tr_create.tr_logres = xfs_calc_create_reservation(mp);
+ resp->tr_create.tr_logres = xfs_calc_icreate_reservation(mp);
resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT;
resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 2841926bdfa9..2847347d7f63 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -2087,7 +2087,7 @@ xlog_print_tic_res(
};
#undef REG_TYPE_STR
- xfs_warn(mp, "xlog_write: reservation summary:");
+ xfs_warn(mp, "ticket reservation summary:");
xfs_warn(mp, " unit res = %d bytes",
ticket->t_unit_res);
xfs_warn(mp, " current res = %d bytes",
@@ -2108,10 +2108,57 @@ xlog_print_tic_res(
"bad-rtype" : res_type_str[r_type]),
ticket->t_res_arr[i].r_len);
}
+}
+
+/*
+ * Print a summary of the transaction.
+ */
+void
+xlog_print_trans(
+ struct xfs_trans *tp)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_log_item_desc *lidp;
+
+ /* dump core transaction and ticket info */
+ xfs_warn(mp, "transaction summary:");
+ xfs_warn(mp, " log res = %d", tp->t_log_res);
+ xfs_warn(mp, " log count = %d", tp->t_log_count);
+ xfs_warn(mp, " flags = 0x%x", tp->t_flags);
+
+ xlog_print_tic_res(mp, tp->t_ticket);
+
+ /* dump each log item */
+ list_for_each_entry(lidp, &tp->t_items, lid_trans) {
+ struct xfs_log_item *lip = lidp->lid_item;
+ struct xfs_log_vec *lv = lip->li_lv;
+ struct xfs_log_iovec *vec;
+ int i;
+
+ xfs_warn(mp, "log item: ");
+ xfs_warn(mp, " type = 0x%x", lip->li_type);
+ xfs_warn(mp, " flags = 0x%x", lip->li_flags);
+ if (!lv)
+ continue;
+ xfs_warn(mp, " niovecs = %d", lv->lv_niovecs);
+ xfs_warn(mp, " size = %d", lv->lv_size);
+ xfs_warn(mp, " bytes = %d", lv->lv_bytes);
+ xfs_warn(mp, " buf len = %d", lv->lv_buf_len);
- xfs_alert_tag(mp, XFS_PTAG_LOGRES,
- "xlog_write: reservation ran out. Need to up reservation");
- xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
+ /* dump each iovec for the log item */
+ vec = lv->lv_iovecp;
+ for (i = 0; i < lv->lv_niovecs; i++) {
+ int dumplen = min(vec->i_len, 32);
+
+ xfs_warn(mp, " iovec[%d]", i);
+ xfs_warn(mp, " type = 0x%x", vec->i_type);
+ xfs_warn(mp, " len = %d", vec->i_len);
+ xfs_warn(mp, " first %d bytes of iovec[%d]:", dumplen, i);
+ xfs_hex_dump(vec->i_addr, dumplen);
+
+ vec++;
+ }
+ }
}
/*
@@ -2384,8 +2431,12 @@ xlog_write(
if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS))
ticket->t_curr_res -= sizeof(xlog_op_header_t);
- if (ticket->t_curr_res < 0)
+ if (ticket->t_curr_res < 0) {
+ xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
+ "ctx ticket reservation ran out. Need to up reservation");
xlog_print_tic_res(log->l_mp, ticket);
+ xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
+ }
index = 0;
lv = log_vector;
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 1f53dc23aebe..950717856ced 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -410,6 +410,7 @@ xlog_cil_insert_items(
int len = 0;
int diff_iovecs = 0;
int iclog_space;
+ int iovhdr_res = 0, split_res = 0, ctx_res = 0;
ASSERT(tp);
@@ -419,30 +420,11 @@ xlog_cil_insert_items(
*/
xlog_cil_insert_format_items(log, tp, &len, &diff_iovecs);
- /*
- * Now (re-)position everything modified at the tail of the CIL.
- * We do this here so we only need to take the CIL lock once during
- * the transaction commit.
- */
spin_lock(&cil->xc_cil_lock);
- list_for_each_entry(lidp, &tp->t_items, lid_trans) {
- struct xfs_log_item *lip = lidp->lid_item;
-
- /* Skip items which aren't dirty in this transaction. */
- if (!(lidp->lid_flags & XFS_LID_DIRTY))
- continue;
-
- /*
- * Only move the item if it isn't already at the tail. This is
- * to prevent a transient list_empty() state when reinserting
- * an item that is already the only item in the CIL.
- */
- if (!list_is_last(&lip->li_cil, &cil->xc_cil))
- list_move_tail(&lip->li_cil, &cil->xc_cil);
- }
/* account for space used by new iovec headers */
- len += diff_iovecs * sizeof(xlog_op_header_t);
+ iovhdr_res = diff_iovecs * sizeof(xlog_op_header_t);
+ len += iovhdr_res;
ctx->nvecs += diff_iovecs;
/* attach the transaction to the CIL if it has any busy extents */
@@ -457,28 +439,66 @@ xlog_cil_insert_items(
* during the transaction commit.
*/
if (ctx->ticket->t_curr_res == 0) {
- ctx->ticket->t_curr_res = ctx->ticket->t_unit_res;
- tp->t_ticket->t_curr_res -= ctx->ticket->t_unit_res;
+ ctx_res = ctx->ticket->t_unit_res;
+ ctx->ticket->t_curr_res = ctx_res;
+ tp->t_ticket->t_curr_res -= ctx_res;
}
/* do we need space for more log record headers? */
iclog_space = log->l_iclog_size - log->l_iclog_hsize;
if (len > 0 && (ctx->space_used / iclog_space !=
(ctx->space_used + len) / iclog_space)) {
- int hdrs;
-
- hdrs = (len + iclog_space - 1) / iclog_space;
+ split_res = (len + iclog_space - 1) / iclog_space;
/* need to take into account split region headers, too */
- hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
- ctx->ticket->t_unit_res += hdrs;
- ctx->ticket->t_curr_res += hdrs;
- tp->t_ticket->t_curr_res -= hdrs;
+ split_res *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
+ ctx->ticket->t_unit_res += split_res;
+ ctx->ticket->t_curr_res += split_res;
+ tp->t_ticket->t_curr_res -= split_res;
ASSERT(tp->t_ticket->t_curr_res >= len);
}
tp->t_ticket->t_curr_res -= len;
ctx->space_used += len;
+ /*
+ * If we've overrun the reservation, dump the tx details before we move
+ * the log items. Shutdown is imminent...
+ */
+ if (WARN_ON(tp->t_ticket->t_curr_res < 0)) {
+ xfs_warn(log->l_mp, "Transaction log reservation overrun:");
+ xfs_warn(log->l_mp,
+ " log items: %d bytes (iov hdrs: %d bytes)",
+ len, iovhdr_res);
+ xfs_warn(log->l_mp, " split region headers: %d bytes",
+ split_res);
+ xfs_warn(log->l_mp, " ctx ticket: %d bytes", ctx_res);
+ xlog_print_trans(tp);
+ }
+
+ /*
+ * Now (re-)position everything modified at the tail of the CIL.
+ * We do this here so we only need to take the CIL lock once during
+ * the transaction commit.
+ */
+ list_for_each_entry(lidp, &tp->t_items, lid_trans) {
+ struct xfs_log_item *lip = lidp->lid_item;
+
+ /* Skip items which aren't dirty in this transaction. */
+ if (!(lidp->lid_flags & XFS_LID_DIRTY))
+ continue;
+
+ /*
+ * Only move the item if it isn't already at the tail. This is
+ * to prevent a transient list_empty() state when reinserting
+ * an item that is already the only item in the CIL.
+ */
+ if (!list_is_last(&lip->li_cil, &cil->xc_cil))
+ list_move_tail(&lip->li_cil, &cil->xc_cil);
+ }
+
spin_unlock(&cil->xc_cil_lock);
+
+ if (tp->t_ticket->t_curr_res < 0)
+ xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
}
static void
@@ -987,10 +1007,6 @@ xfs_log_commit_cil(
xlog_cil_insert_items(log, tp);
- /* check we didn't blow the reservation */
- if (tp->t_ticket->t_curr_res < 0)
- xlog_print_tic_res(mp, tp->t_ticket);
-
tp->t_commit_lsn = cil->xc_ctx->sequence;
if (commit_lsn)
*commit_lsn = tp->t_commit_lsn;
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 1f112815aa0b..ecdc5e446202 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -459,6 +459,7 @@ xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes)
}
void xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket);
+void xlog_print_trans(struct xfs_trans *);
int
xlog_write(
struct xlog *log,
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5ba3fa2fa50e..e8dbeb9e6192 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -810,6 +810,7 @@ void kvm_arch_check_processor_compat(void *rtn);
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu);
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
+bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu);
#ifndef __KVM_HAVE_ARCH_VM_ALLOC
/*
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4b16d3986b35..3f24404d74f7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -922,7 +922,15 @@ struct task_struct {
u64 last_sum_exec_runtime;
struct callback_head numa_work;
- struct numa_group *numa_group;
+ /*
+ * This pointer is only modified for current in syscall and
+ * pagefault context (and for tasks being destroyed), so it can be read
+ * from any of the following contexts:
+ * - RCU read-side critical section
+ * - current->numa_group from everywhere
+ * - task's runqueue locked, task not running
+ */
+ struct numa_group __rcu *numa_group;
/*
* numa_faults is an array split into four regions:
diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h
index 35d5fc77b4be..fbb107f3afcd 100644
--- a/include/linux/sched/numa_balancing.h
+++ b/include/linux/sched/numa_balancing.h
@@ -18,7 +18,7 @@
extern void task_numa_fault(int last_node, int node, int pages, int flags);
extern pid_t task_numa_group_id(struct task_struct *p);
extern void set_numabalancing_state(bool enabled);
-extern void task_numa_free(struct task_struct *p);
+extern void task_numa_free(struct task_struct *p, bool final);
extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page,
int src_nid, int dst_cpu);
#else
@@ -33,7 +33,7 @@ static inline pid_t task_numa_group_id(struct task_struct *p)
static inline void set_numabalancing_state(bool enabled)
{
}
-static inline void task_numa_free(struct task_struct *p)
+static inline void task_numa_free(struct task_struct *p, bool final)
{
}
static inline bool should_numa_migrate_memory(struct task_struct *p,
diff --git a/kernel/fork.c b/kernel/fork.c
index 75ee3bcd22ff..11a880b869a9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -461,7 +461,7 @@ void __put_task_struct(struct task_struct *tsk)
WARN_ON(tsk == current);
cgroup_free(tsk);
- task_numa_free(tsk);
+ task_numa_free(tsk, true);
security_task_free(tsk);
exit_creds(tsk);
delayacct_tsk_free(tsk);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f0c001ca0df4..29b7ac651692 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1068,6 +1068,21 @@ struct numa_group {
unsigned long faults[0];
};
+/*
+ * For functions that can be called in multiple contexts that permit reading
+ * ->numa_group (see struct task_struct for locking rules).
+ */
+static struct numa_group *deref_task_numa_group(struct task_struct *p)
+{
+ return rcu_dereference_check(p->numa_group, p == current ||
+ (lockdep_is_held(&task_rq(p)->lock) && !READ_ONCE(p->on_cpu)));
+}
+
+static struct numa_group *deref_curr_numa_group(struct task_struct *p)
+{
+ return rcu_dereference_protected(p->numa_group, p == current);
+}
+
static inline unsigned long group_faults_priv(struct numa_group *ng);
static inline unsigned long group_faults_shared(struct numa_group *ng);
@@ -1111,10 +1126,12 @@ static unsigned int task_scan_start(struct task_struct *p)
{
unsigned long smin = task_scan_min(p);
unsigned long period = smin;
+ struct numa_group *ng;
/* Scale the maximum scan period with the amount of shared memory. */
- if (p->numa_group) {
- struct numa_group *ng = p->numa_group;
+ rcu_read_lock();
+ ng = rcu_dereference(p->numa_group);
+ if (ng) {
unsigned long shared = group_faults_shared(ng);
unsigned long private = group_faults_priv(ng);
@@ -1122,6 +1139,7 @@ static unsigned int task_scan_start(struct task_struct *p)
period *= shared + 1;
period /= private + shared + 1;
}
+ rcu_read_unlock();
return max(smin, period);
}
@@ -1130,13 +1148,14 @@ static unsigned int task_scan_max(struct task_struct *p)
{
unsigned long smin = task_scan_min(p);
unsigned long smax;
+ struct numa_group *ng;
/* Watch for min being lower than max due to floor calculations */
smax = sysctl_numa_balancing_scan_period_max / task_nr_scan_windows(p);
/* Scale the maximum scan period with the amount of shared memory. */
- if (p->numa_group) {
- struct numa_group *ng = p->numa_group;
+ ng = deref_curr_numa_group(p);
+ if (ng) {
unsigned long shared = group_faults_shared(ng);
unsigned long private = group_faults_priv(ng);
unsigned long period = smax;
@@ -1168,7 +1187,7 @@ void init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
p->numa_scan_period = sysctl_numa_balancing_scan_delay;
p->numa_work.next = &p->numa_work;
p->numa_faults = NULL;
- p->numa_group = NULL;
+ RCU_INIT_POINTER(p->numa_group, NULL);
p->last_task_numa_placement = 0;
p->last_sum_exec_runtime = 0;
@@ -1215,7 +1234,16 @@ static void account_numa_dequeue(struct rq *rq, struct task_struct *p)
pid_t task_numa_group_id(struct task_struct *p)
{
- return p->numa_group ? p->numa_group->gid : 0;
+ struct numa_group *ng;
+ pid_t gid = 0;
+
+ rcu_read_lock();
+ ng = rcu_dereference(p->numa_group);
+ if (ng)
+ gid = ng->gid;
+ rcu_read_unlock();
+
+ return gid;
}
/*
@@ -1240,11 +1268,13 @@ static inline unsigned long task_faults(struct task_struct *p, int nid)
static inline unsigned long group_faults(struct task_struct *p, int nid)
{
- if (!p->numa_group)
+ struct numa_group *ng = deref_task_numa_group(p);
+
+ if (!ng)
return 0;
- return p->numa_group->faults[task_faults_idx(NUMA_MEM, nid, 0)] +
- p->numa_group->faults[task_faults_idx(NUMA_MEM, nid, 1)];
+ return ng->faults[task_faults_idx(NUMA_MEM, nid, 0)] +
+ ng->faults[task_faults_idx(NUMA_MEM, nid, 1)];
}
static inline unsigned long group_faults_cpu(struct numa_group *group, int nid)
@@ -1382,12 +1412,13 @@ static inline unsigned long task_weight(struct task_struct *p, int nid,
static inline unsigned long group_weight(struct task_struct *p, int nid,
int dist)
{
+ struct numa_group *ng = deref_task_numa_group(p);
unsigned long faults, total_faults;
- if (!p->numa_group)
+ if (!ng)
return 0;
- total_faults = p->numa_group->total_faults;
+ total_faults = ng->total_faults;
if (!total_faults)
return 0;
@@ -1401,7 +1432,7 @@ static inline unsigned long group_weight(struct task_struct *p, int nid,
bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
int src_nid, int dst_cpu)
{
- struct numa_group *ng = p->numa_group;
+ struct numa_group *ng = deref_curr_numa_group(p);
int dst_nid = cpu_to_node(dst_cpu);
int last_cpupid, this_cpupid;
@@ -1585,13 +1616,14 @@ static bool load_too_imbalanced(long src_load, long dst_load,
static void task_numa_compare(struct task_numa_env *env,
long taskimp, long groupimp, bool maymove)
{
+ struct numa_group *cur_ng, *p_ng = deref_curr_numa_group(env->p);
struct rq *dst_rq = cpu_rq(env->dst_cpu);
+ long imp = p_ng ? groupimp : taskimp;
struct task_struct *cur;
long src_load, dst_load;
- long load;
- long imp = env->p->numa_group ? groupimp : taskimp;
- long moveimp = imp;
int dist = env->dist;
+ long moveimp = imp;
+ long load;
if (READ_ONCE(dst_rq->numa_migrate_on))
return;
@@ -1630,21 +1662,22 @@ static void task_numa_compare(struct task_numa_env *env,
* If dst and source tasks are in the same NUMA group, or not
* in any group then look only at task weights.
*/
- if (cur->numa_group == env->p->numa_group) {
+ cur_ng = rcu_dereference(cur->numa_group);
+ if (cur_ng == p_ng) {
imp = taskimp + task_weight(cur, env->src_nid, dist) -
task_weight(cur, env->dst_nid, dist);
/*
* Add some hysteresis to prevent swapping the
* tasks within a group over tiny differences.
*/
- if (cur->numa_group)
+ if (cur_ng)
imp -= imp / 16;
} else {
/*
* Compare the group weights. If a task is all by itself
* (not part of a group), use the task weight instead.
*/
- if (cur->numa_group && env->p->numa_group)
+ if (cur_ng && p_ng)
imp += group_weight(cur, env->src_nid, dist) -
group_weight(cur, env->dst_nid, dist);
else
@@ -1742,11 +1775,12 @@ static int task_numa_migrate(struct task_struct *p)
.best_imp = 0,
.best_cpu = -1,
};
+ unsigned long taskweight, groupweight;
struct sched_domain *sd;
+ long taskimp, groupimp;
+ struct numa_group *ng;
struct rq *best_rq;
- unsigned long taskweight, groupweight;
int nid, ret, dist;
- long taskimp, groupimp;
/*
* Pick the lowest SD_NUMA domain, as that would have the smallest
@@ -1792,7 +1826,8 @@ static int task_numa_migrate(struct task_struct *p)
* multiple NUMA nodes; in order to better consolidate the group,
* we need to check other locations.
*/
- if (env.best_cpu == -1 || (p->numa_group && p->numa_group->active_nodes > 1)) {
+ ng = deref_curr_numa_group(p);
+ if (env.best_cpu == -1 || (ng && ng->active_nodes > 1)) {
for_each_online_node(nid) {
if (nid == env.src_nid || nid == p->numa_preferred_nid)
continue;
@@ -1825,7 +1860,7 @@ static int task_numa_migrate(struct task_struct *p)
* A task that migrated to a second choice node will be better off
* trying for a better one later. Do not set the preferred node here.
*/
- if (p->numa_group) {
+ if (ng) {
if (env.best_cpu == -1)
nid = env.src_nid;
else
@@ -2116,6 +2151,7 @@ static void task_numa_placement(struct task_struct *p)
unsigned long total_faults;
u64 runtime, period;
spinlock_t *group_lock = NULL;
+ struct numa_group *ng;
/*
* The p->mm->numa_scan_seq field gets updated without
@@ -2133,8 +2169,9 @@ static void task_numa_placement(struct task_struct *p)
runtime = numa_get_avg_runtime(p, &period);
/* If the task is part of a group prevent parallel updates to group stats */
- if (p->numa_group) {
- group_lock = &p->numa_group->lock;
+ ng = deref_curr_numa_group(p);
+ if (ng) {
+ group_lock = &ng->lock;
spin_lock_irq(group_lock);
}
@@ -2175,7 +2212,7 @@ static void task_numa_placement(struct task_struct *p)
p->numa_faults[cpu_idx] += f_diff;
faults += p->numa_faults[mem_idx];
p->total_numa_faults += diff;
- if (p->numa_group) {
+ if (ng) {
/*
* safe because we can only change our own group
*
@@ -2183,14 +2220,14 @@ static void task_numa_placement(struct task_struct *p)
* nid and priv in a specific region because it
* is at the beginning of the numa_faults array.
*/
- p->numa_group->faults[mem_idx] += diff;
- p->numa_group->faults_cpu[mem_idx] += f_diff;
- p->numa_group->total_faults += diff;
- group_faults += p->numa_group->faults[mem_idx];
+ ng->faults[mem_idx] += diff;
+ ng->faults_cpu[mem_idx] += f_diff;
+ ng->total_faults += diff;
+ group_faults += ng->faults[mem_idx];
}
}
- if (!p->numa_group) {
+ if (!ng) {
if (faults > max_faults) {
max_faults = faults;
max_nid = nid;
@@ -2201,8 +2238,8 @@ static void task_numa_placement(struct task_struct *p)
}
}
- if (p->numa_group) {
- numa_group_count_active_nodes(p->numa_group);
+ if (ng) {
+ numa_group_count_active_nodes(ng);
spin_unlock_irq(group_lock);
max_nid = preferred_group_nid(p, max_nid);
}
@@ -2236,7 +2273,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags,
int cpu = cpupid_to_cpu(cpupid);
int i;
- if (unlikely(!p->numa_group)) {
+ if (unlikely(!deref_curr_numa_group(p))) {
unsigned int size = sizeof(struct numa_group) +
4*nr_node_ids*sizeof(unsigned long);
@@ -2272,7 +2309,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags,
if (!grp)
goto no_join;
- my_grp = p->numa_group;
+ my_grp = deref_curr_numa_group(p);
if (grp == my_grp)
goto no_join;
@@ -2334,13 +2371,24 @@ no_join:
return;
}
-void task_numa_free(struct task_struct *p)
+/*
+ * Get rid of NUMA staticstics associated with a task (either current or dead).
+ * If @final is set, the task is dead and has reached refcount zero, so we can
+ * safely free all relevant data structures. Otherwise, there might be
+ * concurrent reads from places like load balancing and procfs, and we should
+ * reset the data back to default state without freeing ->numa_faults.
+ */
+void task_numa_free(struct task_struct *p, bool final)
{
- struct numa_group *grp = p->numa_group;
- void *numa_faults = p->numa_faults;
+ /* safe: p either is current or is being freed by current */
+ struct numa_group *grp = rcu_dereference_raw(p->numa_group);
+ unsigned long *numa_faults = p->numa_faults;
unsigned long flags;
int i;
+ if (!numa_faults)
+ return;
+
if (grp) {
spin_lock_irqsave(&grp->lock, flags);
for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
@@ -2353,8 +2401,14 @@ void task_numa_free(struct task_struct *p)
put_numa_group(grp);
}
- p->numa_faults = NULL;
- kfree(numa_faults);
+ if (final) {
+ p->numa_faults = NULL;
+ kfree(numa_faults);
+ } else {
+ p->total_numa_faults = 0;
+ for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
+ numa_faults[i] = 0;
+ }
}
/*
@@ -2407,7 +2461,7 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags)
* actively using should be counted as local. This allows the
* scan rate to slow down when a workload has settled down.
*/
- ng = p->numa_group;
+ ng = deref_curr_numa_group(p);
if (!priv && !local && ng && ng->active_nodes > 1 &&
numa_is_active_node(cpu_node, ng) &&
numa_is_active_node(mem_node, ng))
@@ -9972,18 +10026,22 @@ void show_numa_stats(struct task_struct *p, struct seq_file *m)
{
int node;
unsigned long tsf = 0, tpf = 0, gsf = 0, gpf = 0;
+ struct numa_group *ng;
+ rcu_read_lock();
+ ng = rcu_dereference(p->numa_group);
for_each_online_node(node) {
if (p->numa_faults) {
tsf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 0)];
tpf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 1)];
}
- if (p->numa_group) {
- gsf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 0)],
- gpf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 1)];
+ if (ng) {
+ gsf = ng->faults[task_faults_idx(NUMA_MEM, node, 0)],
+ gpf = ng->faults[task_faults_idx(NUMA_MEM, node, 1)];
}
print_numa_stats(m, node, tsf, tpf, gsf, gpf);
}
+ rcu_read_unlock();
}
#endif /* CONFIG_NUMA_BALANCING */
#endif /* CONFIG_SCHED_DEBUG */
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index d2112a75fb6d..e77fb8cdd6a6 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1761,6 +1761,12 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
return NULL;
/*
+ * First make sure the mappings are removed from all page-tables
+ * before they are freed.
+ */
+ vmalloc_sync_all();
+
+ /*
* In this function, newly allocated vm_struct has VM_UNINITIALIZED
* flag. It means that vm_struct is not fully initialized.
* Now, it is fully initialized, so remove this flag here.
@@ -2305,6 +2311,9 @@ EXPORT_SYMBOL(remap_vmalloc_range);
/*
* Implement a stub for vmalloc_sync_all() if the architecture chose not to
* have one.
+ *
+ * The purpose of this function is to make sure the vmalloc area
+ * mappings are identical in all page-tables in the system.
*/
void __weak vmalloc_sync_all(void)
{
diff --git a/sound/firewire/packets-buffer.c b/sound/firewire/packets-buffer.c
index ea1506679c66..3b09b8ef3a09 100644
--- a/sound/firewire/packets-buffer.c
+++ b/sound/firewire/packets-buffer.c
@@ -37,7 +37,7 @@ int iso_packets_buffer_init(struct iso_packets_buffer *b, struct fw_unit *unit,
packets_per_page = PAGE_SIZE / packet_size;
if (WARN_ON(!packets_per_page)) {
err = -EINVAL;
- goto error;
+ goto err_packets;
}
pages = DIV_ROUND_UP(count, packets_per_page);
diff --git a/sound/pci/hda/hda_controller.c b/sound/pci/hda/hda_controller.c
index 532e081f8b8a..a361b0187f3d 100644
--- a/sound/pci/hda/hda_controller.c
+++ b/sound/pci/hda/hda_controller.c
@@ -609,11 +609,9 @@ static int azx_pcm_open(struct snd_pcm_substream *substream)
}
runtime->private_data = azx_dev;
- if (chip->gts_present)
- azx_pcm_hw.info = azx_pcm_hw.info |
- SNDRV_PCM_INFO_HAS_LINK_SYNCHRONIZED_ATIME;
-
runtime->hw = azx_pcm_hw;
+ if (chip->gts_present)
+ runtime->hw.info |= SNDRV_PCM_INFO_HAS_LINK_SYNCHRONIZED_ATIME;
runtime->hw.channels_min = hinfo->channels_min;
runtime->hw.channels_max = hinfo->channels_max;
runtime->hw.formats = hinfo->formats;
@@ -626,6 +624,13 @@ static int azx_pcm_open(struct snd_pcm_substream *substream)
20,
178000000);
+ /* by some reason, the playback stream stalls on PulseAudio with
+ * tsched=1 when a capture stream triggers. Until we figure out the
+ * real cause, disable tsched mode by telling the PCM info flag.
+ */
+ if (chip->driver_caps & AZX_DCAPS_AMD_WORKAROUND)
+ runtime->hw.info |= SNDRV_PCM_INFO_BATCH;
+
if (chip->align_buffer_size)
/* constrain buffer sizes to be multiple of 128
bytes. This is more efficient in terms of memory
diff --git a/sound/pci/hda/hda_controller.h b/sound/pci/hda/hda_controller.h
index 7185ed574b41..b9248bc8c5b2 100644
--- a/sound/pci/hda/hda_controller.h
+++ b/sound/pci/hda/hda_controller.h
@@ -40,7 +40,7 @@
/* 14 unused */
#define AZX_DCAPS_CTX_WORKAROUND (1 << 15) /* X-Fi workaround */
#define AZX_DCAPS_POSFIX_LPIB (1 << 16) /* Use LPIB as default */
-/* 17 unused */
+#define AZX_DCAPS_AMD_WORKAROUND (1 << 17) /* AMD-specific workaround */
#define AZX_DCAPS_NO_64BIT (1 << 18) /* No 64bit address */
#define AZX_DCAPS_SYNC_WRITE (1 << 19) /* sync each cmd write */
#define AZX_DCAPS_OLD_SSYNC (1 << 20) /* Old SSYNC reg for ICH */
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index a8e8219b9757..9b626e83eae6 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -78,6 +78,7 @@ enum {
POS_FIX_VIACOMBO,
POS_FIX_COMBO,
POS_FIX_SKL,
+ POS_FIX_FIFO,
};
/* Defines for ATI HD Audio support in SB450 south bridge */
@@ -149,7 +150,7 @@ module_param_array(model, charp, NULL, 0444);
MODULE_PARM_DESC(model, "Use the given board model.");
module_param_array(position_fix, int, NULL, 0444);
MODULE_PARM_DESC(position_fix, "DMA pointer read method."
- "(-1 = system default, 0 = auto, 1 = LPIB, 2 = POSBUF, 3 = VIACOMBO, 4 = COMBO, 5 = SKL+).");
+ "(-1 = system default, 0 = auto, 1 = LPIB, 2 = POSBUF, 3 = VIACOMBO, 4 = COMBO, 5 = SKL+, 6 = FIFO).");
module_param_array(bdl_pos_adj, int, NULL, 0644);
MODULE_PARM_DESC(bdl_pos_adj, "BDL position adjustment offset.");
module_param_array(probe_mask, int, NULL, 0444);
@@ -346,6 +347,11 @@ enum {
#define AZX_DCAPS_PRESET_ATI_HDMI_NS \
(AZX_DCAPS_PRESET_ATI_HDMI | AZX_DCAPS_SNOOP_OFF)
+/* quirks for AMD SB */
+#define AZX_DCAPS_PRESET_AMD_SB \
+ (AZX_DCAPS_NO_TCSEL | AZX_DCAPS_SYNC_WRITE | AZX_DCAPS_AMD_WORKAROUND |\
+ AZX_DCAPS_SNOOP_TYPE(ATI) | AZX_DCAPS_PM_RUNTIME)
+
/* quirks for Nvidia */
#define AZX_DCAPS_PRESET_NVIDIA \
(AZX_DCAPS_NO_MSI | AZX_DCAPS_CORBRP_SELF_CLEAR |\
@@ -855,6 +861,49 @@ static unsigned int azx_via_get_position(struct azx *chip,
return bound_pos + mod_dma_pos;
}
+#define AMD_FIFO_SIZE 32
+
+/* get the current DMA position with FIFO size correction */
+static unsigned int azx_get_pos_fifo(struct azx *chip, struct azx_dev *azx_dev)
+{
+ struct snd_pcm_substream *substream = azx_dev->core.substream;
+ struct snd_pcm_runtime *runtime = substream->runtime;
+ unsigned int pos, delay;
+
+ pos = snd_hdac_stream_get_pos_lpib(azx_stream(azx_dev));
+ if (!runtime)
+ return pos;
+
+ runtime->delay = AMD_FIFO_SIZE;
+ delay = frames_to_bytes(runtime, AMD_FIFO_SIZE);
+ if (azx_dev->insufficient) {
+ if (pos < delay) {
+ delay = pos;
+ runtime->delay = bytes_to_frames(runtime, pos);
+ } else {
+ azx_dev->insufficient = 0;
+ }
+ }
+
+ /* correct the DMA position for capture stream */
+ if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) {
+ if (pos < delay)
+ pos += azx_dev->core.bufsize;
+ pos -= delay;
+ }
+
+ return pos;
+}
+
+static int azx_get_delay_from_fifo(struct azx *chip, struct azx_dev *azx_dev,
+ unsigned int pos)
+{
+ struct snd_pcm_substream *substream = azx_dev->core.substream;
+
+ /* just read back the calculated value in the above */
+ return substream->runtime->delay;
+}
+
static unsigned int azx_skl_get_dpib_pos(struct azx *chip,
struct azx_dev *azx_dev)
{
@@ -1431,6 +1480,7 @@ static int check_position_fix(struct azx *chip, int fix)
case POS_FIX_VIACOMBO:
case POS_FIX_COMBO:
case POS_FIX_SKL:
+ case POS_FIX_FIFO:
return fix;
}
@@ -1447,6 +1497,10 @@ static int check_position_fix(struct azx *chip, int fix)
dev_dbg(chip->card->dev, "Using VIACOMBO position fix\n");
return POS_FIX_VIACOMBO;
}
+ if (chip->driver_caps & AZX_DCAPS_AMD_WORKAROUND) {
+ dev_dbg(chip->card->dev, "Using FIFO position fix\n");
+ return POS_FIX_FIFO;
+ }
if (chip->driver_caps & AZX_DCAPS_POSFIX_LPIB) {
dev_dbg(chip->card->dev, "Using LPIB position fix\n");
return POS_FIX_LPIB;
@@ -1467,6 +1521,7 @@ static void assign_position_fix(struct azx *chip, int fix)
[POS_FIX_VIACOMBO] = azx_via_get_position,
[POS_FIX_COMBO] = azx_get_pos_lpib,
[POS_FIX_SKL] = azx_get_pos_skl,
+ [POS_FIX_FIFO] = azx_get_pos_fifo,
};
chip->get_position[0] = chip->get_position[1] = callbacks[fix];
@@ -1481,6 +1536,9 @@ static void assign_position_fix(struct azx *chip, int fix)
azx_get_delay_from_lpib;
}
+ if (fix == POS_FIX_FIFO)
+ chip->get_delay[0] = chip->get_delay[1] =
+ azx_get_delay_from_fifo;
}
/*
@@ -2459,6 +2517,9 @@ static const struct pci_device_id azx_ids[] = {
/* AMD Hudson */
{ PCI_DEVICE(0x1022, 0x780d),
.driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB },
+ /* AMD, X370 & co */
+ { PCI_DEVICE(0x1022, 0x1457),
+ .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_AMD_SB },
/* AMD Stoney */
{ PCI_DEVICE(0x1022, 0x157a),
.driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB |
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index bba6a917cd02..97951f8cc4b2 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -1128,8 +1128,8 @@ static __always_inline int is_connected_ep(struct snd_soc_dapm_widget *widget,
list_add_tail(&widget->work_list, list);
if (custom_stop_condition && custom_stop_condition(widget, dir)) {
- widget->endpoints[dir] = 1;
- return widget->endpoints[dir];
+ list = NULL;
+ custom_stop_condition = NULL;
}
if ((widget->is_ep & SND_SOC_DAPM_DIR_TO_EP(dir)) && widget->connected) {
@@ -1166,8 +1166,8 @@ static __always_inline int is_connected_ep(struct snd_soc_dapm_widget *widget,
*
* Optionally, can be supplied with a function acting as a stopping condition.
* This function takes the dapm widget currently being examined and the walk
- * direction as an arguments, it should return true if the walk should be
- * stopped and false otherwise.
+ * direction as an arguments, it should return true if widgets from that point
+ * in the graph onwards should not be added to the widget list.
*/
static int is_connected_output_ep(struct snd_soc_dapm_widget *widget,
struct list_head *list,
diff --git a/sound/sound_core.c b/sound/sound_core.c
index 99b73c675743..20d4e2e1bacf 100644
--- a/sound/sound_core.c
+++ b/sound/sound_core.c
@@ -287,7 +287,8 @@ retry:
goto retry;
}
spin_unlock(&sound_loader_lock);
- return -EBUSY;
+ r = -EBUSY;
+ goto fail;
}
}
diff --git a/sound/usb/hiface/pcm.c b/sound/usb/hiface/pcm.c
index 24595fdc6159..b2074bf444da 100644
--- a/sound/usb/hiface/pcm.c
+++ b/sound/usb/hiface/pcm.c
@@ -605,14 +605,13 @@ int hiface_pcm_init(struct hiface_chip *chip, u8 extra_freq)
ret = hiface_pcm_init_urb(&rt->out_urbs[i], chip, OUT_EP,
hiface_pcm_out_urb_handler);
if (ret < 0)
- return ret;
+ goto error;
}
ret = snd_pcm_new(chip->card, "USB-SPDIF Audio", 0, 1, 0, &pcm);
if (ret < 0) {
- kfree(rt);
dev_err(&chip->dev->dev, "Cannot create pcm instance\n");
- return ret;
+ goto error;
}
pcm->private_data = rt;
@@ -625,4 +624,10 @@ int hiface_pcm_init(struct hiface_chip *chip, u8 extra_freq)
chip->pcm = rt;
return 0;
+
+error:
+ for (i = 0; i < PCM_N_URBS; i++)
+ kfree(rt->out_urbs[i].buffer);
+ kfree(rt);
+ return ret;
}
diff --git a/tools/arch/powerpc/include/uapi/asm/perf_regs.h b/tools/arch/powerpc/include/uapi/asm/perf_regs.h
index 6a93209748a1..1519be469477 100644
--- a/tools/arch/powerpc/include/uapi/asm/perf_regs.h
+++ b/tools/arch/powerpc/include/uapi/asm/perf_regs.h
@@ -45,6 +45,7 @@ enum perf_event_powerpc_regs {
PERF_REG_POWERPC_TRAP,
PERF_REG_POWERPC_DAR,
PERF_REG_POWERPC_DSISR,
+ PERF_REG_POWERPC_SIER,
PERF_REG_POWERPC_MAX,
};
#endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 45d5087a752b..65a72604278b 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -163,6 +163,7 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func,
"__reiserfs_panic",
"lbug_with_loc",
"fortify_panic",
+ "rewind_stack_do_exit",
};
if (func->bind == STB_WEAK)
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 59ab3d55a858..59dfca239161 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -305,7 +305,7 @@ static int read_symbols(struct elf *elf)
if (sym->type != STT_FUNC)
continue;
sym->pfunc = sym->cfunc = sym;
- coldstr = strstr(sym->name, ".cold.");
+ coldstr = strstr(sym->name, ".cold");
if (coldstr) {
pnamelen = coldstr - sym->name;
if (pnamelen > MAX_NAME_LEN) {
diff --git a/tools/perf/arch/powerpc/include/perf_regs.h b/tools/perf/arch/powerpc/include/perf_regs.h
index c12f4e804f66..845d92254579 100644
--- a/tools/perf/arch/powerpc/include/perf_regs.h
+++ b/tools/perf/arch/powerpc/include/perf_regs.h
@@ -61,7 +61,8 @@ static const char *reg_names[] = {
[PERF_REG_POWERPC_SOFTE] = "softe",
[PERF_REG_POWERPC_TRAP] = "trap",
[PERF_REG_POWERPC_DAR] = "dar",
- [PERF_REG_POWERPC_DSISR] = "dsisr"
+ [PERF_REG_POWERPC_DSISR] = "dsisr",
+ [PERF_REG_POWERPC_SIER] = "sier"
};
static inline const char *perf_reg_name(int id)
diff --git a/tools/perf/arch/powerpc/util/perf_regs.c b/tools/perf/arch/powerpc/util/perf_regs.c
index f860dc411f69..bf8ba00eb19c 100644
--- a/tools/perf/arch/powerpc/util/perf_regs.c
+++ b/tools/perf/arch/powerpc/util/perf_regs.c
@@ -51,6 +51,7 @@ const struct sample_reg sample_reg_masks[] = {
SMPL_REG(trap, PERF_REG_POWERPC_TRAP),
SMPL_REG(dar, PERF_REG_POWERPC_DAR),
SMPL_REG(dsisr, PERF_REG_POWERPC_DSISR),
+ SMPL_REG(sier, PERF_REG_POWERPC_SIER),
SMPL_REG_END
};
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4f0f88c536ba..2417978cd268 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2374,6 +2374,29 @@ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
#endif
}
+/*
+ * Unlike kvm_arch_vcpu_runnable, this function is called outside
+ * a vcpu_load/vcpu_put pair. However, for most architectures
+ * kvm_arch_vcpu_runnable does not require vcpu_load.
+ */
+bool __weak kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
+{
+ return kvm_arch_vcpu_runnable(vcpu);
+}
+
+static bool vcpu_dy_runnable(struct kvm_vcpu *vcpu)
+{
+ if (kvm_arch_dy_runnable(vcpu))
+ return true;
+
+#ifdef CONFIG_KVM_ASYNC_PF
+ if (!list_empty_careful(&vcpu->async_pf.done))
+ return true;
+#endif
+
+ return false;
+}
+
void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
{
struct kvm *kvm = me->kvm;
@@ -2403,7 +2426,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
continue;
if (vcpu == me)
continue;
- if (swait_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
+ if (swait_active(&vcpu->wq) && !vcpu_dy_runnable(vcpu))
continue;
if (yield_to_kernel_mode && !kvm_arch_vcpu_in_kernel(vcpu))
continue;