Home Home > GIT Browse > SLE15-SP2-AZURE
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJiri Kosina <jkosina@suse.cz>2017-10-20 13:22:03 +0200
committerJiri Kosina <jkosina@suse.cz>2017-10-20 13:22:03 +0200
commitf4c3286dae100b21daa3e02573894d68c43a72f5 (patch)
treea4206e50b5f90b8c0c999ac08f914dd045517231
parentb0c30e4605e34d6450a73048bdd6aef734dec675 (diff)
parent11cb57a26ad71f55aa601896848d66ac91282463 (diff)
Merge remote-tracking branch 'origin/users/ohering/SLE15/for-next' into SLE15rpm-4.12.14-2--SLE-15-Packages-Beta1rpm-4.12.14-2
-rw-r--r--MAINTAINERS1
-rw-r--r--arch/x86/Kbuild2
-rw-r--r--arch/x86/hyperv/Makefile2
-rw-r--r--arch/x86/hyperv/hv_init.c95
-rw-r--r--arch/x86/hyperv/mmu.c301
-rw-r--r--arch/x86/include/asm/mshyperv.h150
-rw-r--r--arch/x86/include/asm/trace/hyperv.h40
-rw-r--r--arch/x86/include/uapi/asm/hyperv.h19
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c13
-rw-r--r--drivers/hv/Kconfig1
-rw-r--r--drivers/hv/channel.c149
-rw-r--r--drivers/hv/channel_mgmt.c78
-rw-r--r--drivers/hv/connection.c7
-rw-r--r--drivers/hv/hv.c9
-rw-r--r--drivers/hv/hv_balloon.c12
-rw-r--r--drivers/hv/hv_fcopy.c4
-rw-r--r--drivers/hv/hv_kvp.c2
-rw-r--r--drivers/hv/hyperv_vmbus.h11
-rw-r--r--drivers/hv/ring_buffer.c169
-rw-r--r--drivers/hv/vmbus_drv.c23
-rw-r--r--drivers/net/hyperv/hyperv_net.h25
-rw-r--r--drivers/net/hyperv/netvsc.c128
-rw-r--r--drivers/net/hyperv/netvsc_drv.c366
-rw-r--r--drivers/net/hyperv/rndis_filter.c131
-rw-r--r--drivers/pci/host/pci-hyperv.c62
-rw-r--r--drivers/scsi/storvsc_drv.c2
-rw-r--r--include/linux/hyperv.h107
-rwxr-xr-xtools/hv/bondvf.sh232
-rw-r--r--tools/hv/hv_fcopy_daemon.c32
-rw-r--r--tools/hv/hv_kvp_daemon.c2
-rw-r--r--tools/hv/hv_vss_daemon.c7
31 files changed, 1209 insertions, 973 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index fc6b40328f45..336eec0f140e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6157,6 +6157,7 @@ M: Stephen Hemminger <sthemmin@microsoft.com>
L: devel@linuxdriverproject.org
S: Maintained
F: arch/x86/include/asm/mshyperv.h
+F: arch/x86/include/asm/trace/hyperv.h
F: arch/x86/include/uapi/asm/hyperv.h
F: arch/x86/kernel/cpu/mshyperv.c
F: arch/x86/hyperv
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index 586b786b3edf..3e6f64073005 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -8,7 +8,7 @@ obj-$(CONFIG_KVM) += kvm/
obj-$(CONFIG_XEN) += xen/
# Hyper-V paravirtualization support
-obj-$(CONFIG_HYPERVISOR_GUEST) += hyperv/
+obj-$(subst m,y,$(CONFIG_HYPERV)) += hyperv/
# lguest paravirtualization support
obj-$(CONFIG_LGUEST_GUEST) += lguest/
diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile
index 171ae09864d7..367a8203cfcf 100644
--- a/arch/x86/hyperv/Makefile
+++ b/arch/x86/hyperv/Makefile
@@ -1 +1 @@
-obj-y := hv_init.o
+obj-y := hv_init.o mmu.o
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 21f16a9f7004..211a43334219 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -26,6 +26,8 @@
#include <linux/mm.h>
#include <linux/clockchips.h>
#include <linux/hyperv.h>
+#include <linux/slab.h>
+#include <linux/cpuhotplug.h>
#ifdef CONFIG_HYPERV_TSCPAGE
@@ -75,10 +77,30 @@ static struct clocksource hyperv_cs_msr = {
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
-static void *hypercall_pg;
+void *hv_hypercall_pg;
+EXPORT_SYMBOL_GPL(hv_hypercall_pg);
struct clocksource *hyperv_cs;
EXPORT_SYMBOL_GPL(hyperv_cs);
+u32 *hv_vp_index;
+EXPORT_SYMBOL_GPL(hv_vp_index);
+
+u32 hv_max_vp_index;
+
+static int hv_cpu_init(unsigned int cpu)
+{
+ u64 msr_vp_index;
+
+ hv_get_vp_index(msr_vp_index);
+
+ hv_vp_index[smp_processor_id()] = msr_vp_index;
+
+ if (msr_vp_index > hv_max_vp_index)
+ hv_max_vp_index = msr_vp_index;
+
+ return 0;
+}
+
/*
* This function is to be invoked early in the boot sequence after the
* hypervisor has been detected.
@@ -96,6 +118,16 @@ void hyperv_init(void)
if (x86_hyper != &x86_hyper_ms_hyperv)
return;
+ /* Allocate percpu VP index */
+ hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index),
+ GFP_KERNEL);
+ if (!hv_vp_index)
+ return;
+
+ if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online",
+ hv_cpu_init, NULL) < 0)
+ goto free_vp_index;
+
/*
* Setup the hypercall page and enable hypercalls.
* 1. Register the guest ID
@@ -104,17 +136,19 @@ void hyperv_init(void)
guest_id = generate_guest_id(d1, LINUX_VERSION_CODE, d2);
wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id);
- hypercall_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_RX);
- if (hypercall_pg == NULL) {
+ hv_hypercall_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_RX);
+ if (hv_hypercall_pg == NULL) {
wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
- return;
+ goto free_vp_index;
}
rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
hypercall_msr.enable = 1;
- hypercall_msr.guest_physical_address = vmalloc_to_pfn(hypercall_pg);
+ hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg);
wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+ hyper_alloc_mmu();
+
/*
* Register Hyper-V specific clocksource.
*/
@@ -150,6 +184,12 @@ register_msr_cs:
hyperv_cs = &hyperv_cs_msr;
if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE)
clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100);
+
+ return;
+
+free_vp_index:
+ kfree(hv_vp_index);
+ hv_vp_index = NULL;
}
/*
@@ -172,51 +212,6 @@ void hyperv_cleanup(void)
}
EXPORT_SYMBOL_GPL(hyperv_cleanup);
-/*
- * hv_do_hypercall- Invoke the specified hypercall
- */
-u64 hv_do_hypercall(u64 control, void *input, void *output)
-{
- u64 input_address = (input) ? virt_to_phys(input) : 0;
- u64 output_address = (output) ? virt_to_phys(output) : 0;
-#ifdef CONFIG_X86_64
- u64 hv_status = 0;
-
- if (!hypercall_pg)
- return (u64)ULLONG_MAX;
-
- __asm__ __volatile__("mov %0, %%r8" : : "r" (output_address) : "r8");
- __asm__ __volatile__("call *%3" : "=a" (hv_status) :
- "c" (control), "d" (input_address),
- "m" (hypercall_pg));
-
- return hv_status;
-
-#else
-
- u32 control_hi = control >> 32;
- u32 control_lo = control & 0xFFFFFFFF;
- u32 hv_status_hi = 1;
- u32 hv_status_lo = 1;
- u32 input_address_hi = input_address >> 32;
- u32 input_address_lo = input_address & 0xFFFFFFFF;
- u32 output_address_hi = output_address >> 32;
- u32 output_address_lo = output_address & 0xFFFFFFFF;
-
- if (!hypercall_pg)
- return (u64)ULLONG_MAX;
-
- __asm__ __volatile__ ("call *%8" : "=d"(hv_status_hi),
- "=a"(hv_status_lo) : "d" (control_hi),
- "a" (control_lo), "b" (input_address_hi),
- "c" (input_address_lo), "D"(output_address_hi),
- "S"(output_address_lo), "m" (hypercall_pg));
-
- return hv_status_lo | ((u64)hv_status_hi << 32);
-#endif /* !x86_64 */
-}
-EXPORT_SYMBOL_GPL(hv_do_hypercall);
-
void hyperv_report_panic(struct pt_regs *regs)
{
static bool panic_reported;
diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
new file mode 100644
index 000000000000..1f87627d9909
--- /dev/null
+++ b/arch/x86/hyperv/mmu.c
@@ -0,0 +1,301 @@
+#define pr_fmt(fmt) "Hyper-V: " fmt
+
+#include <linux/hyperv.h>
+#include <linux/log2.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include <asm/fpu/api.h>
+#include <asm/mshyperv.h>
+#include <asm/msr.h>
+#include <asm/tlbflush.h>
+
+#define CREATE_TRACE_POINTS
+#include <asm/trace/hyperv.h>
+
+/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
+struct hv_flush_pcpu {
+ u64 address_space;
+ u64 flags;
+ u64 processor_mask;
+ u64 gva_list[];
+};
+
+/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
+struct hv_flush_pcpu_ex {
+ u64 address_space;
+ u64 flags;
+ struct {
+ u64 format;
+ u64 valid_bank_mask;
+ u64 bank_contents[];
+ } hv_vp_set;
+ u64 gva_list[];
+};
+
+/* Each gva in gva_list encodes up to 4096 pages to flush */
+#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
+
+static struct hv_flush_pcpu __percpu **pcpu_flush;
+
+static struct hv_flush_pcpu_ex __percpu **pcpu_flush_ex;
+
+/*
+ * Fills in gva_list starting from offset. Returns the number of items added.
+ */
+static inline int fill_gva_list(u64 gva_list[], int offset,
+ unsigned long start, unsigned long end)
+{
+ int gva_n = offset;
+ unsigned long cur = start, diff;
+
+ do {
+ diff = end > cur ? end - cur : 0;
+
+ gva_list[gva_n] = cur & PAGE_MASK;
+ /*
+ * Lower 12 bits encode the number of additional
+ * pages to flush (in addition to the 'cur' page).
+ */
+ if (diff >= HV_TLB_FLUSH_UNIT)
+ gva_list[gva_n] |= ~PAGE_MASK;
+ else if (diff)
+ gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT;
+
+ cur += HV_TLB_FLUSH_UNIT;
+ gva_n++;
+
+ } while (cur < end);
+
+ return gva_n - offset;
+}
+
+/* Return the number of banks in the resulting vp_set */
+static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush,
+ const struct cpumask *cpus)
+{
+ int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;
+
+ /* valid_bank_mask can represent up to 64 banks */
+ if (hv_max_vp_index / 64 >= 64)
+ return 0;
+
+ /*
+ * Clear all banks up to the maximum possible bank as hv_flush_pcpu_ex
+ * structs are not cleared between calls, we risk flushing unneeded
+ * vCPUs otherwise.
+ */
+ for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++)
+ flush->hv_vp_set.bank_contents[vcpu_bank] = 0;
+
+ /*
+ * Some banks may end up being empty but this is acceptable.
+ */
+ for_each_cpu(cpu, cpus) {
+ vcpu = hv_cpu_number_to_vp_number(cpu);
+ vcpu_bank = vcpu / 64;
+ vcpu_offset = vcpu % 64;
+ __set_bit(vcpu_offset, (unsigned long *)
+ &flush->hv_vp_set.bank_contents[vcpu_bank]);
+ if (vcpu_bank >= nr_bank)
+ nr_bank = vcpu_bank + 1;
+ }
+ flush->hv_vp_set.valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0);
+
+ return nr_bank;
+}
+
+static void hyperv_flush_tlb_others(const struct cpumask *cpus,
+struct mm_struct *mm, unsigned long start, unsigned long end)
+{
+ int cpu, vcpu, gva_n, max_gvas;
+ struct hv_flush_pcpu **flush_pcpu;
+ struct hv_flush_pcpu *flush;
+ u64 status = U64_MAX;
+ unsigned long flags;
+
+ trace_hyperv_mmu_flush_tlb_others(cpus, mm, start, end);
+
+ if (!pcpu_flush || !hv_hypercall_pg)
+ goto do_native;
+
+ if (cpumask_empty(cpus))
+ return;
+
+ local_irq_save(flags);
+
+ flush_pcpu = this_cpu_ptr(pcpu_flush);
+
+ if (unlikely(!*flush_pcpu))
+ *flush_pcpu = page_address(alloc_page(GFP_ATOMIC));
+
+ flush = *flush_pcpu;
+
+ if (unlikely(!flush)) {
+ local_irq_restore(flags);
+ goto do_native;
+ }
+
+ if (mm) {
+ flush->address_space = virt_to_phys(mm->pgd);
+ flush->flags = 0;
+ } else {
+ flush->address_space = 0;
+ flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ }
+
+ flush->processor_mask = 0;
+ if (cpumask_equal(cpus, cpu_present_mask)) {
+ flush->flags |= HV_FLUSH_ALL_PROCESSORS;
+ } else {
+ for_each_cpu(cpu, cpus) {
+ vcpu = hv_cpu_number_to_vp_number(cpu);
+ if (vcpu >= 64)
+ goto do_native;
+
+ __set_bit(vcpu, (unsigned long *)
+ &flush->processor_mask);
+ }
+ }
+
+ /*
+ * We can flush not more than max_gvas with one hypercall. Flush the
+ * whole address space if we were asked to do more.
+ */
+ max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]);
+
+ if (end == TLB_FLUSH_ALL) {
+ flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
+ status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
+ flush, NULL);
+ } else if (end &&
+ ((end - start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
+ status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
+ flush, NULL);
+ } else {
+ gva_n = fill_gva_list(flush->gva_list, 0,
+ start, end);
+ status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST,
+ gva_n, 0, flush, NULL);
+ }
+
+ local_irq_restore(flags);
+
+ if (!(status & HV_HYPERCALL_RESULT_MASK))
+ return;
+do_native:
+ native_flush_tlb_others(cpus, mm, start, end);
+}
+
+static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
+struct mm_struct *mm, unsigned long start, unsigned long end)
+{
+ int nr_bank = 0, max_gvas, gva_n;
+ struct hv_flush_pcpu_ex **flush_pcpu;
+ struct hv_flush_pcpu_ex *flush;
+ u64 status = U64_MAX;
+ unsigned long flags;
+
+ trace_hyperv_mmu_flush_tlb_others(cpus, mm, start, end);
+
+ if (!pcpu_flush_ex || !hv_hypercall_pg)
+ goto do_native;
+
+ if (cpumask_empty(cpus))
+ return;
+
+ local_irq_save(flags);
+
+ flush_pcpu = this_cpu_ptr(pcpu_flush_ex);
+
+ if (unlikely(!*flush_pcpu))
+ *flush_pcpu = page_address(alloc_page(GFP_ATOMIC));
+
+ flush = *flush_pcpu;
+
+ if (unlikely(!flush)) {
+ local_irq_restore(flags);
+ goto do_native;
+ }
+
+ if (mm) {
+ flush->address_space = virt_to_phys(mm->pgd);
+ flush->flags = 0;
+ } else {
+ flush->address_space = 0;
+ flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ }
+
+ flush->hv_vp_set.valid_bank_mask = 0;
+
+ if (!cpumask_equal(cpus, cpu_present_mask)) {
+ flush->hv_vp_set.format = HV_GENERIC_SET_SPARCE_4K;
+ nr_bank = cpumask_to_vp_set(flush, cpus);
+ }
+
+ if (!nr_bank) {
+ flush->hv_vp_set.format = HV_GENERIC_SET_ALL;
+ flush->flags |= HV_FLUSH_ALL_PROCESSORS;
+ }
+
+ /*
+ * We can flush not more than max_gvas with one hypercall. Flush the
+ * whole address space if we were asked to do more.
+ */
+ max_gvas =
+ (PAGE_SIZE - sizeof(*flush) - nr_bank *
+ sizeof(flush->hv_vp_set.bank_contents[0])) /
+ sizeof(flush->gva_list[0]);
+
+ if (end == TLB_FLUSH_ALL) {
+ flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
+ status = hv_do_rep_hypercall(
+ HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
+ 0, nr_bank, flush, NULL);
+ } else if (end &&
+ ((end - start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
+ status = hv_do_rep_hypercall(
+ HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
+ 0, nr_bank, flush, NULL);
+ } else {
+ gva_n = fill_gva_list(flush->gva_list, nr_bank,
+ start, end);
+ status = hv_do_rep_hypercall(
+ HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
+ gva_n, nr_bank, flush, NULL);
+ }
+
+ local_irq_restore(flags);
+
+ if (!(status & HV_HYPERCALL_RESULT_MASK))
+ return;
+do_native:
+ native_flush_tlb_others(cpus, mm, start, end);
+}
+
+void hyperv_setup_mmu_ops(void)
+{
+ if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
+ return;
+
+ setup_clear_cpu_cap(X86_FEATURE_PCID);
+
+ if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) {
+ pr_info("Using hypercall for remote TLB flush\n");
+ pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others;
+ } else {
+ pr_info("Using ext hypercall for remote TLB flush\n");
+ pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others_ex;
+ }
+}
+
+void hyper_alloc_mmu(void)
+{
+ if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
+ return;
+
+ if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
+ pcpu_flush = alloc_percpu(struct hv_flush_pcpu *);
+ else
+ pcpu_flush_ex = alloc_percpu(struct hv_flush_pcpu_ex *);
+}
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 2b58c8c1eeaa..91cf8d419388 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -3,6 +3,8 @@
#include <linux/types.h>
#include <linux/atomic.h>
+#include <linux/nmi.h>
+#include <asm/io.h>
#include <asm/hyperv.h>
/*
@@ -28,6 +30,8 @@ struct ms_hyperv_info {
u32 features;
u32 misc_features;
u32 hints;
+ u32 max_vp_index;
+ u32 max_lp_index;
};
extern struct ms_hyperv_info ms_hyperv;
@@ -168,12 +172,156 @@ void hv_remove_crash_handler(void);
#if IS_ENABLED(CONFIG_HYPERV)
extern struct clocksource *hyperv_cs;
+extern void *hv_hypercall_pg;
+
+static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
+{
+ u64 input_address = input ? virt_to_phys(input) : 0;
+ u64 output_address = output ? virt_to_phys(output) : 0;
+ u64 hv_status;
+ register void *__sp asm(_ASM_SP);
+
+#ifdef CONFIG_X86_64
+ if (!hv_hypercall_pg)
+ return U64_MAX;
+
+ __asm__ __volatile__("mov %4, %%r8\n"
+ "call *%5"
+ : "=a" (hv_status), "+r" (__sp),
+ "+c" (control), "+d" (input_address)
+ : "r" (output_address), "m" (hv_hypercall_pg)
+ : "cc", "memory", "r8", "r9", "r10", "r11");
+#else
+ u32 input_address_hi = upper_32_bits(input_address);
+ u32 input_address_lo = lower_32_bits(input_address);
+ u32 output_address_hi = upper_32_bits(output_address);
+ u32 output_address_lo = lower_32_bits(output_address);
+
+ if (!hv_hypercall_pg)
+ return U64_MAX;
+
+ __asm__ __volatile__("call *%7"
+ : "=A" (hv_status),
+ "+c" (input_address_lo), "+r" (__sp)
+ : "A" (control),
+ "b" (input_address_hi),
+ "D"(output_address_hi), "S"(output_address_lo),
+ "m" (hv_hypercall_pg)
+ : "cc", "memory");
+#endif /* !x86_64 */
+ return hv_status;
+}
+
+#define HV_HYPERCALL_RESULT_MASK GENMASK_ULL(15, 0)
+#define HV_HYPERCALL_FAST_BIT BIT(16)
+#define HV_HYPERCALL_VARHEAD_OFFSET 17
+#define HV_HYPERCALL_REP_COMP_OFFSET 32
+#define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32)
+#define HV_HYPERCALL_REP_START_OFFSET 48
+#define HV_HYPERCALL_REP_START_MASK GENMASK_ULL(59, 48)
+
+/* Fast hypercall with 8 bytes of input and no output */
+static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
+{
+ u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
+ register void *__sp asm(_ASM_SP);
+
+#ifdef CONFIG_X86_64
+ {
+ __asm__ __volatile__("call *%4"
+ : "=a" (hv_status), "+r" (__sp),
+ "+c" (control), "+d" (input1)
+ : "m" (hv_hypercall_pg)
+ : "cc", "r8", "r9", "r10", "r11");
+ }
+#else
+ {
+ u32 input1_hi = upper_32_bits(input1);
+ u32 input1_lo = lower_32_bits(input1);
+
+ __asm__ __volatile__ ("call *%5"
+ : "=A"(hv_status),
+ "+c"(input1_lo),
+ "+r"(__sp)
+ : "A" (control),
+ "b" (input1_hi),
+ "m" (hv_hypercall_pg)
+ : "cc", "edi", "esi");
+ }
+#endif
+ return hv_status;
+}
+
+/*
+ * Rep hypercalls. Callers of this functions are supposed to ensure that
+ * rep_count and varhead_size comply with Hyper-V hypercall definition.
+ */
+static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size,
+ void *input, void *output)
+{
+ u64 control = code;
+ u64 status;
+ u16 rep_comp;
+
+ control |= (u64)varhead_size << HV_HYPERCALL_VARHEAD_OFFSET;
+ control |= (u64)rep_count << HV_HYPERCALL_REP_COMP_OFFSET;
+
+ do {
+ status = hv_do_hypercall(control, input, output);
+ if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS)
+ return status;
+
+ /* Bits 32-43 of status have 'Reps completed' data. */
+ rep_comp = (status & HV_HYPERCALL_REP_COMP_MASK) >>
+ HV_HYPERCALL_REP_COMP_OFFSET;
+
+ control &= ~HV_HYPERCALL_REP_START_MASK;
+ control |= (u64)rep_comp << HV_HYPERCALL_REP_START_OFFSET;
+
+ touch_nmi_watchdog();
+ } while (rep_comp < rep_count);
+
+ return status;
+}
+
+/*
+ * Hypervisor's notion of virtual processor ID is different from
+ * Linux' notion of CPU ID. This information can only be retrieved
+ * in the context of the calling CPU. Setup a map for easy access
+ * to this information.
+ */
+extern u32 *hv_vp_index;
+extern u32 hv_max_vp_index;
+
+/**
+ * hv_cpu_number_to_vp_number() - Map CPU to VP.
+ * @cpu_number: CPU number in Linux terms
+ *
+ * This function returns the mapping between the Linux processor
+ * number and the hypervisor's virtual processor number, useful
+ * in making hypercalls and such that talk about specific
+ * processors.
+ *
+ * Return: Virtual processor number in Hyper-V terms
+ */
+static inline int hv_cpu_number_to_vp_number(int cpu_number)
+{
+ return hv_vp_index[cpu_number];
+}
void hyperv_init(void);
+void hyperv_setup_mmu_ops(void);
+void hyper_alloc_mmu(void);
void hyperv_report_panic(struct pt_regs *regs);
bool hv_is_hypercall_page_setup(void);
void hyperv_cleanup(void);
-#endif
+#else /* CONFIG_HYPERV */
+static inline void hyperv_init(void) {}
+static inline bool hv_is_hypercall_page_setup(void) { return false; }
+static inline void hyperv_cleanup(void) {}
+static inline void hyperv_setup_mmu_ops(void) {}
+#endif /* CONFIG_HYPERV */
+
#ifdef CONFIG_HYPERV_TSCPAGE
struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
diff --git a/arch/x86/include/asm/trace/hyperv.h b/arch/x86/include/asm/trace/hyperv.h
new file mode 100644
index 000000000000..098ab4394cbd
--- /dev/null
+++ b/arch/x86/include/asm/trace/hyperv.h
@@ -0,0 +1,40 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hyperv
+
+#if !defined(_TRACE_HYPERV_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_HYPERV_H
+
+#include <linux/tracepoint.h>
+
+#if IS_ENABLED(CONFIG_HYPERV)
+
+TRACE_EVENT(hyperv_mmu_flush_tlb_others,
+ TP_PROTO(const struct cpumask *cpus,
+ struct mm_struct *mm, unsigned long start, unsigned long end),
+ TP_ARGS(cpus, mm, start, end),
+ TP_STRUCT__entry(
+ __field(unsigned int, ncpus)
+ __field(struct mm_struct *, mm)
+ __field(unsigned long, addr)
+ __field(unsigned long, end)
+ ),
+ TP_fast_assign(__entry->ncpus = cpumask_weight(cpus);
+ __entry->mm = mm;
+ __entry->addr = start;
+ __entry->end = end;
+ ),
+ TP_printk("ncpus %d mm %p addr %lx, end %lx",
+ __entry->ncpus, __entry->mm,
+ __entry->addr, __entry->end)
+ );
+
+#endif /* CONFIG_HYPERV */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH asm/trace/
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE hyperv
+#endif /* _TRACE_HYPERV_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 127ddadee1a5..f65d12504e80 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -149,12 +149,9 @@
*/
#define HV_X64_DEPRECATING_AEOI_RECOMMENDED (1 << 9)
-/*
- * HV_VP_SET available
- */
+/* Recommend using the newer ExProcessorMasks interface */
#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED (1 << 11)
-
/*
* Crash notification flag.
*/
@@ -242,7 +239,11 @@
(~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1))
/* Declare the various hypercall operations. */
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003
#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014
#define HVCALL_POST_MESSAGE 0x005c
#define HVCALL_SIGNAL_EVENT 0x005d
@@ -259,6 +260,16 @@
#define HV_PROCESSOR_POWER_STATE_C2 2
#define HV_PROCESSOR_POWER_STATE_C3 3
+#define HV_FLUSH_ALL_PROCESSORS BIT(0)
+#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1)
+#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2)
+#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3)
+
+enum HV_GENERIC_SET_FORMAT {
+ HV_GENERIC_SET_SPARCE_4K,
+ HV_GENERIC_SET_ALL,
+};
+
/* hypercall status code */
#define HV_STATUS_SUCCESS 0
#define HV_STATUS_INVALID_HYPERCALL_CODE 2
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 70e717fccdd6..42664f944cbc 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -184,9 +184,15 @@ static void __init ms_hyperv_init_platform(void)
ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES);
ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO);
- pr_info("HyperV: features 0x%x, hints 0x%x\n",
+ pr_info("Hyper-V: features 0x%x, hints 0x%x\n",
ms_hyperv.features, ms_hyperv.hints);
+ ms_hyperv.max_vp_index = cpuid_eax(HVCPUID_IMPLEMENTATION_LIMITS);
+ ms_hyperv.max_lp_index = cpuid_ebx(HVCPUID_IMPLEMENTATION_LIMITS);
+
+ pr_debug("Hyper-V: max %u virtual processors, %u logical processors\n",
+ ms_hyperv.max_vp_index, ms_hyperv.max_lp_index);
+
/*
* Extract host information.
*/
@@ -219,7 +225,7 @@ static void __init ms_hyperv_init_platform(void)
rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency);
hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ);
lapic_timer_frequency = hv_lapic_frequency;
- pr_info("HyperV: LAPIC Timer Frequency: %#x\n",
+ pr_info("Hyper-V: LAPIC Timer Frequency: %#x\n",
lapic_timer_frequency);
}
@@ -249,11 +255,12 @@ static void __init ms_hyperv_init_platform(void)
* Setup the hook to get control post apic initialization.
*/
x86_platform.apic_post_init = hyperv_init;
+ hyperv_setup_mmu_ops();
#endif
}
const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
- .name = "Microsoft HyperV",
+ .name = "Microsoft Hyper-V",
.detect = ms_hyperv_platform,
.init_platform = ms_hyperv_init_platform,
};
diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig
index c29cd5387a35..50b89ea0e60f 100644
--- a/drivers/hv/Kconfig
+++ b/drivers/hv/Kconfig
@@ -3,6 +3,7 @@ menu "Microsoft Hyper-V guest support"
config HYPERV
tristate "Microsoft Hyper-V client drivers"
depends on X86 && ACPI && PCI && X86_LOCAL_APIC && HYPERVISOR_GUEST
+ select PARAVIRT
help
Select this option to run Linux as a Hyper-V client operating
system.
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index e57cc40cb768..894b67ac2cae 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -177,6 +177,11 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
&vmbus_connection.chn_msg_list);
spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
+ if (newchannel->rescind) {
+ err = -ENODEV;
+ goto error_free_gpadl;
+ }
+
ret = vmbus_post_msg(open_msg,
sizeof(struct vmbus_channel_open_channel), true);
@@ -421,6 +426,11 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer,
spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
+ if (channel->rescind) {
+ ret = -ENODEV;
+ goto cleanup;
+ }
+
ret = vmbus_post_msg(gpadlmsg, msginfo->msgsize -
sizeof(*msginfo), true);
if (ret != 0)
@@ -494,6 +504,10 @@ int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle)
list_add_tail(&info->msglistentry,
&vmbus_connection.chn_msg_list);
spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
+
+ if (channel->rescind)
+ goto post_msg_err;
+
ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_gpadl_teardown),
true);
@@ -626,6 +640,7 @@ void vmbus_close(struct vmbus_channel *channel)
*/
return;
}
+ mutex_lock(&vmbus_connection.channel_mutex);
/*
* Close all the sub-channels first and then close the
* primary channel.
@@ -634,22 +649,35 @@ void vmbus_close(struct vmbus_channel *channel)
cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
vmbus_close_internal(cur_channel);
if (cur_channel->rescind) {
- mutex_lock(&vmbus_connection.channel_mutex);
- hv_process_channel_removal(cur_channel,
+ hv_process_channel_removal(
cur_channel->offermsg.child_relid);
- mutex_unlock(&vmbus_connection.channel_mutex);
}
}
/*
* Now close the primary.
*/
vmbus_close_internal(channel);
+ mutex_unlock(&vmbus_connection.channel_mutex);
}
EXPORT_SYMBOL_GPL(vmbus_close);
-int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer,
- u32 bufferlen, u64 requestid,
- enum vmbus_packet_type type, u32 flags)
+/**
+ * vmbus_sendpacket() - Send the specified buffer on the given channel
+ * @channel: Pointer to vmbus_channel structure.
+ * @buffer: Pointer to the buffer you want to receive the data into.
+ * @bufferlen: Maximum size of what the the buffer will hold
+ * @requestid: Identifier of the request
+ * @type: Type of packet that is being send e.g. negotiate, time
+ * packet etc.
+ *
+ * Sends data in @buffer directly to hyper-v via the vmbus
+ * This will send the data unparsed to hyper-v.
+ *
+ * Mainly used by Hyper-V drivers.
+ */
+int vmbus_sendpacket(struct vmbus_channel *channel, void *buffer,
+ u32 bufferlen, u64 requestid,
+ enum vmbus_packet_type type, u32 flags)
{
struct vmpacket_descriptor desc;
u32 packetlen = sizeof(struct vmpacket_descriptor) + bufferlen;
@@ -676,42 +704,19 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer,
return hv_ringbuffer_write(channel, bufferlist, num_vecs);
}
-EXPORT_SYMBOL(vmbus_sendpacket_ctl);
-
-/**
- * vmbus_sendpacket() - Send the specified buffer on the given channel
- * @channel: Pointer to vmbus_channel structure.
- * @buffer: Pointer to the buffer you want to receive the data into.
- * @bufferlen: Maximum size of what the the buffer will hold
- * @requestid: Identifier of the request
- * @type: Type of packet that is being send e.g. negotiate, time
- * packet etc.
- *
- * Sends data in @buffer directly to hyper-v via the vmbus
- * This will send the data unparsed to hyper-v.
- *
- * Mainly used by Hyper-V drivers.
- */
-int vmbus_sendpacket(struct vmbus_channel *channel, void *buffer,
- u32 bufferlen, u64 requestid,
- enum vmbus_packet_type type, u32 flags)
-{
- return vmbus_sendpacket_ctl(channel, buffer, bufferlen, requestid,
- type, flags);
-}
EXPORT_SYMBOL(vmbus_sendpacket);
/*
- * vmbus_sendpacket_pagebuffer_ctl - Send a range of single-page buffer
+ * vmbus_sendpacket_pagebuffer - Send a range of single-page buffer
* packets using a GPADL Direct packet type. This interface allows you
* to control notifying the host. This will be useful for sending
* batched data. Also the sender can control the send flags
* explicitly.
*/
-int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel,
- struct hv_page_buffer pagebuffers[],
- u32 pagecount, void *buffer, u32 bufferlen,
- u64 requestid, u32 flags)
+int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel,
+ struct hv_page_buffer pagebuffers[],
+ u32 pagecount, void *buffer, u32 bufferlen,
+ u64 requestid)
{
int i;
struct vmbus_channel_packet_page_buffer desc;
@@ -736,7 +741,7 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel,
/* Setup the descriptor */
desc.type = VM_PKT_DATA_USING_GPA_DIRECT;
- desc.flags = flags;
+ desc.flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
desc.dataoffset8 = descsize >> 3; /* in 8-bytes granularity */
desc.length8 = (u16)(packetlen_aligned >> 3);
desc.transactionid = requestid;
@@ -757,24 +762,6 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel,
return hv_ringbuffer_write(channel, bufferlist, 3);
}
-EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer_ctl);
-
-/*
- * vmbus_sendpacket_pagebuffer - Send a range of single-page buffer
- * packets using a GPADL Direct packet type.
- */
-int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel,
- struct hv_page_buffer pagebuffers[],
- u32 pagecount, void *buffer, u32 bufferlen,
- u64 requestid)
-{
- u32 flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
-
- return vmbus_sendpacket_pagebuffer_ctl(channel, pagebuffers, pagecount,
- buffer, bufferlen,
- requestid, flags);
-
-}
EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer);
/*
@@ -814,62 +801,6 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
}
EXPORT_SYMBOL_GPL(vmbus_sendpacket_mpb_desc);
-/*
- * vmbus_sendpacket_multipagebuffer - Send a multi-page buffer packet
- * using a GPADL Direct packet type.
- */
-int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel,
- struct hv_multipage_buffer *multi_pagebuffer,
- void *buffer, u32 bufferlen, u64 requestid)
-{
- struct vmbus_channel_packet_multipage_buffer desc;
- u32 descsize;
- u32 packetlen;
- u32 packetlen_aligned;
- struct kvec bufferlist[3];
- u64 aligned_data = 0;
- u32 pfncount = NUM_PAGES_SPANNED(multi_pagebuffer->offset,
- multi_pagebuffer->len);
-
- if (pfncount > MAX_MULTIPAGE_BUFFER_COUNT)
- return -EINVAL;
-
- /*
- * Adjust the size down since vmbus_channel_packet_multipage_buffer is
- * the largest size we support
- */
- descsize = sizeof(struct vmbus_channel_packet_multipage_buffer) -
- ((MAX_MULTIPAGE_BUFFER_COUNT - pfncount) *
- sizeof(u64));
- packetlen = descsize + bufferlen;
- packetlen_aligned = ALIGN(packetlen, sizeof(u64));
-
-
- /* Setup the descriptor */
- desc.type = VM_PKT_DATA_USING_GPA_DIRECT;
- desc.flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
- desc.dataoffset8 = descsize >> 3; /* in 8-bytes granularity */
- desc.length8 = (u16)(packetlen_aligned >> 3);
- desc.transactionid = requestid;
- desc.rangecount = 1;
-
- desc.range.len = multi_pagebuffer->len;
- desc.range.offset = multi_pagebuffer->offset;
-
- memcpy(desc.range.pfn_array, multi_pagebuffer->pfn_array,
- pfncount * sizeof(u64));
-
- bufferlist[0].iov_base = &desc;
- bufferlist[0].iov_len = descsize;
- bufferlist[1].iov_base = buffer;
- bufferlist[1].iov_len = bufferlen;
- bufferlist[2].iov_base = &aligned_data;
- bufferlist[2].iov_len = (packetlen_aligned - packetlen);
-
- return hv_ringbuffer_write(channel, bufferlist, 3);
-}
-EXPORT_SYMBOL_GPL(vmbus_sendpacket_multipagebuffer);
-
/**
* vmbus_recvpacket() - Retrieve the user packet on the specified channel
* @channel: Pointer to vmbus_channel structure.
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 4bbb8dea4727..018d2e0f8ec5 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -159,7 +159,7 @@ static void vmbus_rescind_cleanup(struct vmbus_channel *channel)
spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
-
+ channel->rescind = true;
list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
msglistentry) {
@@ -381,14 +381,21 @@ static void vmbus_release_relid(u32 relid)
true);
}
-void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
+void hv_process_channel_removal(u32 relid)
{
unsigned long flags;
- struct vmbus_channel *primary_channel;
+ struct vmbus_channel *primary_channel, *channel;
- BUG_ON(!channel->rescind);
BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
+ /*
+ * Make sure channel is valid as we may have raced.
+ */
+ channel = relid2channel(relid);
+ if (!channel)
+ return;
+
+ BUG_ON(!channel->rescind);
if (channel->target_cpu != get_cpu()) {
put_cpu();
smp_call_function_single(channel->target_cpu,
@@ -451,6 +458,12 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
/* Make sure this is a new offer */
mutex_lock(&vmbus_connection.channel_mutex);
+ /*
+ * Now that we have acquired the channel_mutex,
+ * we can release the potentially racing rescind thread.
+ */
+ atomic_dec(&vmbus_connection.offer_in_progress);
+
list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
if (!uuid_le_cmp(channel->offermsg.offer.if_type,
newchannel->offermsg.offer.if_type) &&
@@ -481,7 +494,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
channel->num_sc++;
spin_unlock_irqrestore(&channel->lock, flags);
} else {
- atomic_dec(&vmbus_connection.offer_in_progress);
goto err_free_chan;
}
}
@@ -510,7 +522,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
if (!fnew) {
if (channel->sc_creation_callback != NULL)
channel->sc_creation_callback(newchannel);
- atomic_dec(&vmbus_connection.offer_in_progress);
+ newchannel->probe_done = true;
return;
}
@@ -541,7 +553,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
goto err_deq_chan;
}
- atomic_dec(&vmbus_connection.offer_in_progress);
+ newchannel->probe_done = true;
return;
err_deq_chan:
@@ -599,7 +611,7 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
*/
channel->numa_node = 0;
channel->target_cpu = 0;
- channel->target_vp = hv_context.vp_index[0];
+ channel->target_vp = hv_cpu_number_to_vp_number(0);
return;
}
@@ -683,7 +695,7 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
}
channel->target_cpu = cur_cpu;
- channel->target_vp = hv_context.vp_index[cur_cpu];
+ channel->target_vp = hv_cpu_number_to_vp_number(cur_cpu);
}
static void vmbus_wait_for_unload(void)
@@ -805,21 +817,12 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
/*
* Setup state for signalling the host.
*/
- newchannel->sig_event = (struct hv_input_signal_event *)
- (ALIGN((unsigned long)
- &newchannel->sig_buf,
- HV_HYPERCALL_PARAM_ALIGN));
-
- newchannel->sig_event->connectionid.asu32 = 0;
- newchannel->sig_event->connectionid.u.id = VMBUS_EVENT_CONNECTION_ID;
- newchannel->sig_event->flag_number = 0;
- newchannel->sig_event->rsvdz = 0;
+ newchannel->sig_event = VMBUS_EVENT_CONNECTION_ID;
if (vmbus_proto_version != VERSION_WS2008) {
newchannel->is_dedicated_interrupt =
(offer->is_dedicated_interrupt != 0);
- newchannel->sig_event->connectionid.u.id =
- offer->connection_id;
+ newchannel->sig_event = offer->connection_id;
}
memcpy(&newchannel->offermsg, offer,
@@ -839,7 +842,6 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
{
struct vmbus_channel_rescind_offer *rescind;
struct vmbus_channel *channel;
- unsigned long flags;
struct device *dev;
rescind = (struct vmbus_channel_rescind_offer *)hdr;
@@ -878,15 +880,25 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
return;
}
- spin_lock_irqsave(&channel->lock, flags);
- channel->rescind = true;
- spin_unlock_irqrestore(&channel->lock, flags);
+ /*
+ * Now wait for offer handling to complete.
+ */
+ while (READ_ONCE(channel->probe_done) == false) {
+ /*
+ * We wait here until any channel offer is currently
+ * being processed.
+ */
+ msleep(1);
+ }
- vmbus_rescind_cleanup(channel);
+ /*
+ * At this point, the rescind handling can proceed safely.
+ */
if (channel->device_obj) {
if (channel->chn_rescind_callback) {
channel->chn_rescind_callback(channel);
+ vmbus_rescind_cleanup(channel);
return;
}
/*
@@ -895,6 +907,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
*/
dev = get_device(&channel->device_obj->device);
if (dev) {
+ vmbus_rescind_cleanup(channel);
vmbus_device_unregister(channel->device_obj);
put_device(dev);
}
@@ -907,29 +920,25 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
* 1. Close all sub-channels first
* 2. Then close the primary channel.
*/
+ mutex_lock(&vmbus_connection.channel_mutex);
+ vmbus_rescind_cleanup(channel);
if (channel->state == CHANNEL_OPEN_STATE) {
/*
* The channel is currently not open;
* it is safe for us to cleanup the channel.
*/
- mutex_lock(&vmbus_connection.channel_mutex);
- hv_process_channel_removal(channel,
- channel->offermsg.child_relid);
- mutex_unlock(&vmbus_connection.channel_mutex);
+ hv_process_channel_removal(rescind->child_relid);
}
+ mutex_unlock(&vmbus_connection.channel_mutex);
}
}
void vmbus_hvsock_device_unregister(struct vmbus_channel *channel)
{
- mutex_lock(&vmbus_connection.channel_mutex);
-
BUG_ON(!is_hvsock_channel(channel));
channel->rescind = true;
vmbus_device_unregister(channel->device_obj);
-
- mutex_unlock(&vmbus_connection.channel_mutex);
}
EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister);
@@ -1228,8 +1237,7 @@ struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary)
return outgoing_channel;
}
- cur_cpu = hv_context.vp_index[get_cpu()];
- put_cpu();
+ cur_cpu = hv_cpu_number_to_vp_number(smp_processor_id());
list_for_each_safe(cur, tmp, &primary->sc_list) {
cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
if (cur_channel->state != CHANNEL_OPENED_STATE)
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 59c11ff90d12..f41901f80b64 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -32,6 +32,8 @@
#include <linux/hyperv.h>
#include <linux/export.h>
#include <asm/hyperv.h>
+#include <asm/mshyperv.h>
+
#include "hyperv_vmbus.h"
@@ -94,7 +96,8 @@ static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo,
* the CPU attempting to connect may not be CPU 0.
*/
if (version >= VERSION_WIN8_1) {
- msg->target_vcpu = hv_context.vp_index[smp_processor_id()];
+ msg->target_vcpu =
+ hv_cpu_number_to_vp_number(smp_processor_id());
vmbus_connection.connect_cpu = smp_processor_id();
} else {
msg->target_vcpu = 0;
@@ -406,6 +409,6 @@ void vmbus_set_event(struct vmbus_channel *channel)
if (!channel->is_dedicated_interrupt)
vmbus_send_interrupt(child_relid);
- hv_do_hypercall(HVCALL_SIGNAL_EVENT, channel->sig_event, NULL);
+ hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, channel->sig_event);
}
EXPORT_SYMBOL_GPL(vmbus_set_event);
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 2ea12207caa0..8267439dd1ee 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -234,7 +234,6 @@ int hv_synic_init(unsigned int cpu)
union hv_synic_siefp siefp;
union hv_synic_sint shared_sint;
union hv_synic_scontrol sctrl;
- u64 vp_index;
/* Setup the Synic's message page */
hv_get_simp(simp.as_uint64);
@@ -276,14 +275,6 @@ int hv_synic_init(unsigned int cpu)
hv_context.synic_initialized = true;
/*
- * Setup the mapping between Hyper-V's notion
- * of cpuid and Linux' notion of cpuid.
- * This array will be indexed using Linux cpuid.
- */
- hv_get_vp_index(vp_index);
- hv_context.vp_index[cpu] = (u32)vp_index;
-
- /*
* Register the per-cpu clockevent source.
*/
if (ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE)
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index f5728deff893..db0e6652d7ef 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -584,10 +584,6 @@ static int hv_memory_notifier(struct notifier_block *nb, unsigned long val,
switch (val) {
case MEM_ONLINE:
- spin_lock_irqsave(&dm_device.ha_lock, flags);
- dm_device.num_pages_onlined += mem->nr_pages;
- spin_unlock_irqrestore(&dm_device.ha_lock, flags);
- /* Fall through */
case MEM_CANCEL_ONLINE:
if (dm_device.ha_waiting) {
dm_device.ha_waiting = false;
@@ -644,6 +640,9 @@ static void hv_page_online_one(struct hv_hotadd_state *has, struct page *pg)
__online_page_set_limits(pg);
__online_page_increment_counters(pg);
__online_page_free(pg);
+
+ WARN_ON_ONCE(!spin_is_locked(&dm_device.ha_lock));
+ dm_device.num_pages_onlined++;
}
static void hv_bring_pgs_online(struct hv_hotadd_state *has,
@@ -1036,8 +1035,8 @@ static void process_info(struct hv_dynmem_device *dm, struct dm_info_msg *msg)
if (info_hdr->data_size == sizeof(__u64)) {
__u64 *max_page_count = (__u64 *)&info_hdr[1];
- pr_info("INFO_TYPE_MAX_PAGE_CNT = %llu\n",
- *max_page_count);
+ pr_info("Max. dynamic memory size: %llu MB\n",
+ (*max_page_count) >> (20 - PAGE_SHIFT));
}
break;
@@ -1656,6 +1655,7 @@ static int balloon_probe(struct hv_device *dev,
}
dm_device.state = DM_INITIALIZED;
+ last_post_time = jiffies;
return 0;
diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
index d4e4e56c8fb1..002227d1ba9c 100644
--- a/drivers/hv/hv_fcopy.c
+++ b/drivers/hv/hv_fcopy.c
@@ -171,6 +171,10 @@ static void fcopy_send_data(struct work_struct *dummy)
out_src = smsg_out;
break;
+ case WRITE_TO_FILE:
+ out_src = fcopy_transaction.fcopy_msg;
+ out_len = sizeof(struct hv_do_fcopy);
+ break;
default:
out_src = fcopy_transaction.fcopy_msg;
out_len = fcopy_transaction.recv_len;
diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index eefc72a49a5f..e1d5debcb0ed 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -319,7 +319,7 @@ static int process_ob_ipinfo(void *in_msg, void *out_msg, int op)
strlen((char *)in->body.kvp_ip_val.adapter_id),
UTF16_HOST_ENDIAN,
(wchar_t *)out->kvp_ip_val.adapter_id,
- MAX_IP_ADDR_SIZE);
+ MAX_ADAPTER_ID_SIZE);
if (len < 0)
return len;
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 252191b1fa4d..85db40f036dc 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -229,17 +229,6 @@ struct hv_context {
struct hv_per_cpu_context __percpu *cpu_context;
/*
- * Hypervisor's notion of virtual processor ID is different from
- * Linux' notion of CPU ID. This information can only be retrieved
- * in the context of the calling CPU. Setup a map for easy access
- * to this information:
- *
- * vp_index[a] is the Hyper-V's processor ID corresponding to
- * Linux cpuid 'a'.
- */
- u32 vp_index[NR_CPUS];
-
- /*
* To manage allocations in a NUMA node.
* Array indexed by numa node ID.
*/
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 1f450c39a9b0..12eb8caa4263 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -29,6 +29,7 @@
#include <linux/uio.h>
#include <linux/vmalloc.h>
#include <linux/slab.h>
+#include <linux/prefetch.h>
#include "hyperv_vmbus.h"
@@ -94,30 +95,6 @@ hv_set_next_write_location(struct hv_ring_buffer_info *ring_info,
ring_info->ring_buffer->write_index = next_write_location;
}
-/* Get the next read location for the specified ring buffer. */
-static inline u32
-hv_get_next_read_location(const struct hv_ring_buffer_info *ring_info)
-{
- return ring_info->ring_buffer->read_index;
-}
-
-/*
- * Get the next read location + offset for the specified ring buffer.
- * This allows the caller to skip.
- */
-static inline u32
-hv_get_next_readlocation_withoffset(const struct hv_ring_buffer_info *ring_info,
- u32 offset)
-{
- u32 next = ring_info->ring_buffer->read_index;
-
- next += offset;
- if (next >= ring_info->ring_datasize)
- next -= ring_info->ring_datasize;
-
- return next;
-}
-
/* Set the next read location for the specified ring buffer. */
static inline void
hv_set_next_read_location(struct hv_ring_buffer_info *ring_info,
@@ -142,29 +119,6 @@ hv_get_ring_bufferindices(struct hv_ring_buffer_info *ring_info)
}
/*
- * Helper routine to copy to source from ring buffer.
- * Assume there is enough room. Handles wrap-around in src case only!!
- */
-static u32 hv_copyfrom_ringbuffer(
- const struct hv_ring_buffer_info *ring_info,
- void *dest,
- u32 destlen,
- u32 start_read_offset)
-{
- void *ring_buffer = hv_get_ring_buffer(ring_info);
- u32 ring_buffer_size = hv_get_ring_buffersize(ring_info);
-
- memcpy(dest, ring_buffer + start_read_offset, destlen);
-
- start_read_offset += destlen;
- if (start_read_offset >= ring_buffer_size)
- start_read_offset -= ring_buffer_size;
-
- return start_read_offset;
-}
-
-
-/*
* Helper routine to copy from source to ring buffer.
* Assume there is enough room. Handles wrap-around in dest case only!!
*/
@@ -334,33 +288,22 @@ int hv_ringbuffer_write(struct vmbus_channel *channel,
return 0;
}
-static inline void
-init_cached_read_index(struct hv_ring_buffer_info *rbi)
-{
- rbi->cached_read_index = rbi->ring_buffer->read_index;
-}
-
int hv_ringbuffer_read(struct vmbus_channel *channel,
void *buffer, u32 buflen, u32 *buffer_actual_len,
u64 *requestid, bool raw)
{
- u32 bytes_avail_toread;
- u32 next_read_location;
- u64 prev_indices = 0;
- struct vmpacket_descriptor desc;
- u32 offset;
- u32 packetlen;
- struct hv_ring_buffer_info *inring_info = &channel->inbound;
-
- if (buflen <= 0)
+ struct vmpacket_descriptor *desc;
+ u32 packetlen, offset;
+
+ if (unlikely(buflen == 0))
return -EINVAL;
*buffer_actual_len = 0;
*requestid = 0;
- bytes_avail_toread = hv_get_bytes_to_read(inring_info);
/* Make sure there is something to read */
- if (bytes_avail_toread < sizeof(desc)) {
+ desc = hv_pkt_iter_first(channel);
+ if (desc == NULL) {
/*
* No error is set when there is even no header, drivers are
* supposed to analyze buffer_actual_len.
@@ -368,48 +311,22 @@ int hv_ringbuffer_read(struct vmbus_channel *channel,
return 0;
}
- init_cached_read_index(inring_info);
-
- next_read_location = hv_get_next_read_location(inring_info);
- next_read_location = hv_copyfrom_ringbuffer(inring_info, &desc,
- sizeof(desc),
- next_read_location);
-
- offset = raw ? 0 : (desc.offset8 << 3);
- packetlen = (desc.len8 << 3) - offset;
+ offset = raw ? 0 : (desc->offset8 << 3);
+ packetlen = (desc->len8 << 3) - offset;
*buffer_actual_len = packetlen;
- *requestid = desc.trans_id;
-
- if (bytes_avail_toread < packetlen + offset)
- return -EAGAIN;
+ *requestid = desc->trans_id;
- if (packetlen > buflen)
+ if (unlikely(packetlen > buflen))
return -ENOBUFS;
- next_read_location =
- hv_get_next_readlocation_withoffset(inring_info, offset);
+ /* since ring is double mapped, only one copy is necessary */
+ memcpy(buffer, (const char *)desc + offset, packetlen);
- next_read_location = hv_copyfrom_ringbuffer(inring_info,
- buffer,
- packetlen,
- next_read_location);
+ /* Advance ring index to next packet descriptor */
+ __hv_pkt_iter_next(channel, desc);
- next_read_location = hv_copyfrom_ringbuffer(inring_info,
- &prev_indices,
- sizeof(u64),
- next_read_location);
-
- /*
- * Make sure all reads are done before we update the read index since
- * the writer may start writing to the read area once the read index
- * is updated.
- */
- virt_mb();
-
- /* Update the read index */
- hv_set_next_read_location(inring_info, next_read_location);
-
- hv_signal_on_read(channel);
+ /* Notify host of update */
+ hv_pkt_iter_close(channel);
return 0;
}
@@ -440,14 +357,16 @@ static u32 hv_pkt_iter_avail(const struct hv_ring_buffer_info *rbi)
struct vmpacket_descriptor *hv_pkt_iter_first(struct vmbus_channel *channel)
{
struct hv_ring_buffer_info *rbi = &channel->inbound;
-
- /* set state for later hv_signal_on_read() */
- init_cached_read_index(rbi);
+ struct vmpacket_descriptor *desc;
if (hv_pkt_iter_avail(rbi) < sizeof(struct vmpacket_descriptor))
return NULL;
- return hv_get_ring_buffer(rbi) + rbi->priv_read_index;
+ desc = hv_get_ring_buffer(rbi) + rbi->priv_read_index;
+ if (desc)
+ prefetch((char *)desc + (desc->len8 << 3));
+
+ return desc;
}
EXPORT_SYMBOL_GPL(hv_pkt_iter_first);
@@ -471,10 +390,7 @@ __hv_pkt_iter_next(struct vmbus_channel *channel,
rbi->priv_read_index -= dsize;
/* more data? */
- if (hv_pkt_iter_avail(rbi) < sizeof(struct vmpacket_descriptor))
- return NULL;
- else
- return hv_get_ring_buffer(rbi) + rbi->priv_read_index;
+ return hv_pkt_iter_first(channel);
}
EXPORT_SYMBOL_GPL(__hv_pkt_iter_next);
@@ -484,6 +400,7 @@ EXPORT_SYMBOL_GPL(__hv_pkt_iter_next);
void hv_pkt_iter_close(struct vmbus_channel *channel)
{
struct hv_ring_buffer_info *rbi = &channel->inbound;
+ u32 orig_write_sz = hv_get_bytes_to_write(rbi);
/*
* Make sure all reads are done before we update the read index since
@@ -493,6 +410,40 @@ void hv_pkt_iter_close(struct vmbus_channel *channel)
virt_rmb();
rbi->ring_buffer->read_index = rbi->priv_read_index;
- hv_signal_on_read(channel);
+ /*
+ * Issue a full memory barrier before making the signaling decision.
+ * Here is the reason for having this barrier:
+ * If the reading of the pend_sz (in this function)
+ * were to be reordered and read before we commit the new read
+ * index (in the calling function) we could
+ * have a problem. If the host were to set the pending_sz after we
+ * have sampled pending_sz and go to sleep before we commit the
+ * read index, we could miss sending the interrupt. Issue a full
+ * memory barrier to address this.
+ */
+ virt_mb();
+
+ /* If host has disabled notifications then skip */
+ if (rbi->ring_buffer->interrupt_mask)
+ return;
+
+ if (rbi->ring_buffer->feature_bits.feat_pending_send_sz) {
+ u32 pending_sz = READ_ONCE(rbi->ring_buffer->pending_send_sz);
+
+ /*
+ * If there was space before we began iteration,
+ * then host was not blocked. Also handles case where
+ * pending_sz is zero then host has nothing pending
+ * and does not need to be signaled.
+ */
+ if (orig_write_sz > pending_sz)
+ return;
+
+ /* If pending write will not fit, don't give false hope. */
+ if (hv_get_bytes_to_write(rbi) < pending_sz)
+ return;
+ }
+
+ vmbus_setevent(channel);
}
EXPORT_SYMBOL_GPL(hv_pkt_iter_close);
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index ed84e96715a0..937801ac2fe0 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -768,8 +768,7 @@ static void vmbus_device_release(struct device *device)
struct vmbus_channel *channel = hv_dev->channel;
mutex_lock(&vmbus_connection.channel_mutex);
- hv_process_channel_removal(channel,
- channel->offermsg.child_relid);
+ hv_process_channel_removal(channel->offermsg.child_relid);
mutex_unlock(&vmbus_connection.channel_mutex);
kfree(hv_dev);
@@ -940,6 +939,9 @@ static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu)
if (channel->offermsg.child_relid != relid)
continue;
+ if (channel->rescind)
+ continue;
+
switch (channel->callback_mode) {
case HV_CALL_ISR:
vmbus_channel_isr(channel);
@@ -1451,23 +1453,6 @@ void vmbus_free_mmio(resource_size_t start, resource_size_t size)
}
EXPORT_SYMBOL_GPL(vmbus_free_mmio);
-/**
- * vmbus_cpu_number_to_vp_number() - Map CPU to VP.
- * @cpu_number: CPU number in Linux terms
- *
- * This function returns the mapping between the Linux processor
- * number and the hypervisor's virtual processor number, useful
- * in making hypercalls and such that talk about specific
- * processors.
- *
- * Return: Virtual processor number in Hyper-V terms
- */
-int vmbus_cpu_number_to_vp_number(int cpu_number)
-{
- return hv_context.vp_index[cpu_number];
-}
-EXPORT_SYMBOL_GPL(vmbus_cpu_number_to_vp_number);
-
static int vmbus_acpi_add(struct acpi_device *device)
{
acpi_status result;
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index d1ea99a12cf2..5176be76ca7d 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -148,6 +148,10 @@ struct netvsc_device_info {
unsigned char mac_adr[ETH_ALEN];
int ring_size;
u32 num_chn;
+ u32 send_sections;
+ u32 recv_sections;
+ u32 send_section_size;
+ u32 recv_section_size;
};
enum rndis_device_state {
@@ -202,6 +206,8 @@ int netvsc_recv_callback(struct net_device *net,
const struct ndis_pkt_8021q_info *vlan);
void netvsc_channel_cb(void *context);
int netvsc_poll(struct napi_struct *napi, int budget);
+
+void rndis_set_subchannel(struct work_struct *w);
bool rndis_filter_opened(const struct netvsc_device *nvdev);
int rndis_filter_open(struct netvsc_device *nvdev);
int rndis_filter_close(struct netvsc_device *nvdev);
@@ -211,7 +217,7 @@ void rndis_filter_update(struct netvsc_device *nvdev);
void rndis_filter_device_remove(struct hv_device *dev,
struct netvsc_device *nvdev);
int rndis_filter_set_rss_param(struct rndis_device *rdev,
- const u8 *key, int num_queue);
+ const u8 *key);
int rndis_filter_receive(struct net_device *ndev,
struct netvsc_device *net_dev,
struct hv_device *dev,
@@ -634,12 +640,12 @@ struct nvsp_message {
#define NETVSC_SEND_BUFFER_SIZE (1024 * 1024 * 15) /* 15MB */
#define NETVSC_INVALID_INDEX -1
+#define NETVSC_SEND_SECTION_SIZE 6144
+#define NETVSC_RECV_SECTION_SIZE 1728
#define NETVSC_RECEIVE_BUFFER_ID 0xcafe
#define NETVSC_SEND_BUFFER_ID 0
-#define NETVSC_PACKET_SIZE 4096
-
#define VRSS_SEND_TAB_SIZE 16 /* must be power of 2 */
#define VRSS_CHANNEL_MAX 64
#define VRSS_CHANNEL_DEFAULT 8
@@ -678,6 +684,8 @@ struct netvsc_ethtool_stats {
unsigned long tx_no_space;
unsigned long tx_too_big;
unsigned long tx_busy;
+ unsigned long tx_send_full;
+ unsigned long rx_comp_busy;
};
struct netvsc_vf_pcpu_stats {
@@ -716,6 +724,8 @@ struct net_device_context {
u32 tx_send_table[VRSS_SEND_TAB_SIZE];
/* Ethtool settings */
+ bool udp4_l4_hash;
+ bool udp6_l4_hash;
u8 duplex;
u32 speed;
struct netvsc_ethtool_stats eth_stats;
@@ -723,7 +733,7 @@ struct net_device_context {
/* State to manage the associated VF interface. */
struct net_device __rcu *vf_netdev;
struct netvsc_vf_pcpu_stats __percpu *vf_stats;
- struct work_struct vf_takeover;
+ struct delayed_work vf_takeover;
/* 1: allocated, serial number is valid. 0: not allocated */
u32 vf_alloc;
@@ -754,14 +764,13 @@ struct netvsc_device {
/* Receive buffer allocated by us but manages by NetVSP */
void *recv_buf;
- u32 recv_buf_size;
u32 recv_buf_gpadl_handle;
u32 recv_section_cnt;
+ u32 recv_section_size;
u32 recv_completion_cnt;
/* Send buffer allocated by us */
void *send_buf;
- u32 send_buf_size;
u32 send_buf_gpadl_handle;
u32 send_section_cnt;
u32 send_section_size;
@@ -776,7 +785,9 @@ struct netvsc_device {
u32 max_chn;
u32 num_chn;
- refcount_t sc_offered;
+ atomic_t open_chn;
+ struct work_struct subchan_work;
+ wait_queue_head_t subchan_open;
struct rndis_device *extension;
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 208f03aa83de..8d5077fb0492 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -75,7 +75,10 @@ static struct netvsc_device *alloc_net_device(void)
atomic_set(&net_device->open_cnt, 0);
net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
+
init_completion(&net_device->channel_init_wait);
+ init_waitqueue_head(&net_device->subchan_open);
+ INIT_WORK(&net_device->subchan_work, rndis_set_subchannel);
return net_device;
}
@@ -142,6 +145,7 @@ static void netvsc_destroy_buf(struct hv_device *device)
"revoke receive buffer to netvsp\n");
return;
}
+ net_device->recv_section_cnt = 0;
}
/* Teardown the gpadl on the vsp end */
@@ -172,7 +176,7 @@ static void netvsc_destroy_buf(struct hv_device *device)
* NVSP_MSG1_TYPE_SEND_SEND_BUF msg) therefore, we need
* to send a revoke msg here
*/
- if (net_device->send_section_size) {
+ if (net_device->send_section_cnt) {
/* Send the revoke receive buffer */
revoke_packet = &net_device->revoke_packet;
memset(revoke_packet, 0, sizeof(struct nvsp_message));
@@ -204,6 +208,7 @@ static void netvsc_destroy_buf(struct hv_device *device)
"revoke send buffer to netvsp\n");
return;
}
+ net_device->send_section_cnt = 0;
}
/* Teardown the gpadl on the vsp end */
if (net_device->send_buf_gpadl_handle) {
@@ -243,25 +248,25 @@ int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx)
}
static int netvsc_init_buf(struct hv_device *device,
- struct netvsc_device *net_device)
+ struct netvsc_device *net_device,
+ const struct netvsc_device_info *device_info)
{
- int ret = 0;
- struct nvsp_message *init_packet;
struct nvsp_1_message_send_receive_buffer_complete *resp;
- struct net_device *ndev;
+ struct net_device *ndev = hv_get_drvdata(device);
+ struct nvsp_message *init_packet;
+ unsigned int buf_size;
size_t map_words;
- int node;
-
- ndev = hv_get_drvdata(device);
+ int ret = 0;
- node = cpu_to_node(device->channel->target_cpu);
- net_device->recv_buf = vzalloc_node(net_device->recv_buf_size, node);
- if (!net_device->recv_buf)
- net_device->recv_buf = vzalloc(net_device->recv_buf_size);
+ /* Get receive buffer area. */
+ buf_size = device_info->recv_sections * device_info->recv_section_size;
+ buf_size = roundup(buf_size, PAGE_SIZE);
+ net_device->recv_buf = vzalloc(buf_size);
if (!net_device->recv_buf) {
- netdev_err(ndev, "unable to allocate receive "
- "buffer of size %d\n", net_device->recv_buf_size);
+ netdev_err(ndev,
+ "unable to allocate receive buffer of size %u\n",
+ buf_size);
ret = -ENOMEM;
goto cleanup;
}
@@ -272,7 +277,7 @@ static int netvsc_init_buf(struct hv_device *device,
* than the channel to establish the gpadl handle.
*/
ret = vmbus_establish_gpadl(device->channel, net_device->recv_buf,
- net_device->recv_buf_size,
+ buf_size,
&net_device->recv_buf_gpadl_handle);
if (ret != 0) {
netdev_err(ndev,
@@ -318,33 +323,31 @@ static int netvsc_init_buf(struct hv_device *device,
resp->num_sections, resp->sections[0].sub_alloc_size,
resp->sections[0].num_sub_allocs);
- net_device->recv_section_cnt = resp->num_sections;
-
- /*
- * For 1st release, there should only be 1 section that represents the
- * entire receive buffer
- */
- if (net_device->recv_section_cnt != 1 ||
- resp->sections[0].offset != 0) {
+ /* There should only be one section for the entire receive buffer */
+ if (resp->num_sections != 1 || resp->sections[0].offset != 0) {
ret = -EINVAL;
goto cleanup;
}
+ net_device->recv_section_size = resp->sections[0].sub_alloc_size;
+ net_device->recv_section_cnt = resp->sections[0].num_sub_allocs;
+
/* Setup receive completion ring */
net_device->recv_completion_cnt
- = round_up(resp->sections[0].num_sub_allocs + 1,
+ = round_up(net_device->recv_section_cnt + 1,
PAGE_SIZE / sizeof(u64));
ret = netvsc_alloc_recv_comp_ring(net_device, 0);
if (ret)
goto cleanup;
/* Now setup the send buffer. */
- net_device->send_buf = vzalloc_node(net_device->send_buf_size, node);
- if (!net_device->send_buf)
- net_device->send_buf = vzalloc(net_device->send_buf_size);
+ buf_size = device_info->send_sections * device_info->send_section_size;
+ buf_size = round_up(buf_size, PAGE_SIZE);
+
+ net_device->send_buf = vzalloc(buf_size);
if (!net_device->send_buf) {
- netdev_err(ndev, "unable to allocate send "
- "buffer of size %d\n", net_device->send_buf_size);
+ netdev_err(ndev, "unable to allocate send buffer of size %u\n",
+ buf_size);
ret = -ENOMEM;
goto cleanup;
}
@@ -354,7 +357,7 @@ static int netvsc_init_buf(struct hv_device *device,
* than the channel to establish the gpadl handle.
*/
ret = vmbus_establish_gpadl(device->channel, net_device->send_buf,
- net_device->send_buf_size,
+ buf_size,
&net_device->send_buf_gpadl_handle);
if (ret != 0) {
netdev_err(ndev,
@@ -399,10 +402,8 @@ static int netvsc_init_buf(struct hv_device *device,
net_device->send_section_size = init_packet->msg.
v1_msg.send_send_buf_complete.section_size;
- /* Section count is simply the size divided by the section size.
- */
- net_device->send_section_cnt =
- net_device->send_buf_size / net_device->send_section_size;
+ /* Section count is simply the size divided by the section size. */
+ net_device->send_section_cnt = buf_size / net_device->send_section_size;
netdev_dbg(ndev, "Send section size: %d, Section count:%d\n",
net_device->send_section_size, net_device->send_section_cnt);
@@ -480,7 +481,8 @@ static int negotiate_nvsp_ver(struct hv_device *device,
}
static int netvsc_connect_vsp(struct hv_device *device,
- struct netvsc_device *net_device)
+ struct netvsc_device *net_device,
+ const struct netvsc_device_info *device_info)
{
const u32 ver_list[] = {
NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
@@ -530,14 +532,8 @@ static int netvsc_connect_vsp(struct hv_device *device,
if (ret != 0)
goto cleanup;
- /* Post the big receive buffer to NetVSP */
- if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_2)
- net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY;
- else
- net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
- net_device->send_buf_size = NETVSC_SEND_BUFFER_SIZE;
- ret = netvsc_init_buf(device, net_device);
+ ret = netvsc_init_buf(device, net_device, device_info);
cleanup:
return ret;
@@ -559,6 +555,8 @@ void netvsc_device_remove(struct hv_device *device)
= rtnl_dereference(net_device_ctx->nvdev);
int i;
+ cancel_work_sync(&net_device->subchan_work);
+
netvsc_disconnect_vsp(device);
RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
@@ -777,18 +775,15 @@ static inline int netvsc_send_pkt(
if (packet->cp_partial)
pb += packet->rmsg_pgcnt;
- ret = vmbus_sendpacket_pagebuffer_ctl(out_channel,
- pb, packet->page_buf_cnt,
- &nvmsg,
- sizeof(struct nvsp_message),
- req_id,
- VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ ret = vmbus_sendpacket_pagebuffer(out_channel,
+ pb, packet->page_buf_cnt,
+ &nvmsg, sizeof(nvmsg),
+ req_id);
} else {
- ret = vmbus_sendpacket_ctl(out_channel, &nvmsg,
- sizeof(struct nvsp_message),
- req_id,
- VM_PKT_DATA_INBAND,
- VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ ret = vmbus_sendpacket(out_channel,
+ &nvmsg, sizeof(nvmsg),
+ req_id, VM_PKT_DATA_INBAND,
+ VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
}
if (ret == 0) {
@@ -885,7 +880,9 @@ int netvsc_send(struct net_device_context *ndev_ctx,
} else if (pktlen + net_device->pkt_align <
net_device->send_section_size) {
section_index = netvsc_get_next_send_section(net_device);
- if (section_index != NETVSC_INVALID_INDEX) {
+ if (unlikely(section_index == NETVSC_INVALID_INDEX)) {
+ ++ndev_ctx->eth_stats.tx_send_full;
+ } else {
move_pkt_msd(&msd_send, &msd_skb, msdp);
msd_len = 0;
}
@@ -951,9 +948,10 @@ send_now:
}
/* Send pending recv completions */
-static int send_recv_completions(struct netvsc_channel *nvchan)
+static int send_recv_completions(struct net_device *ndev,
+ struct netvsc_device *nvdev,
+ struct netvsc_channel *nvchan)
{
- struct netvsc_device *nvdev = nvchan->net_device;
struct multi_recv_comp *mrc = &nvchan->mrc;
struct recv_comp_msg {
struct nvsp_message_header hdr;
@@ -971,8 +969,12 @@ static int send_recv_completions(struct netvsc_channel *nvchan)
msg.status = rcd->status;
ret = vmbus_sendpacket(nvchan->channel, &msg, sizeof(msg),
rcd->tid, VM_PKT_COMP, 0);
- if (unlikely(ret))
+ if (unlikely(ret)) {
+ struct net_device_context *ndev_ctx = netdev_priv(ndev);
+
+ ++ndev_ctx->eth_stats.rx_comp_busy;
return ret;
+ }
if (++mrc->first == nvdev->recv_completion_cnt)
mrc->first = 0;
@@ -1013,7 +1015,7 @@ static void enq_receive_complete(struct net_device *ndev,
recv_comp_slot_avail(nvdev, mrc, &filled, &avail);
if (unlikely(filled > NAPI_POLL_WEIGHT)) {
- send_recv_completions(nvchan);
+ send_recv_completions(ndev, nvdev, nvchan);
recv_comp_slot_avail(nvdev, mrc, &filled, &avail);
}
@@ -1190,17 +1192,13 @@ int netvsc_poll(struct napi_struct *napi, int budget)
nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc);
}
- /* if ring is empty, signal host */
- if (!nvchan->desc)
- hv_pkt_iter_close(channel);
-
/* If send of pending receive completions suceeded
* and did not exhaust NAPI budget this time
* and not doing busy poll
* then re-enable host interrupts
* and reschedule if ring is not empty.
*/
- if (send_recv_completions(nvchan) == 0 &&
+ if (send_recv_completions(ndev, net_device, nvchan) == 0 &&
work_done < budget &&
napi_complete_done(napi, work_done) &&
hv_end_read(&channel->inbound)) {
@@ -1268,6 +1266,8 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device,
nvchan->channel = device->channel;
nvchan->net_device = net_device;
+ u64_stats_init(&nvchan->tx_stats.syncp);
+ u64_stats_init(&nvchan->rx_stats.syncp);
}
/* Enable NAPI handler before init callbacks */
@@ -1297,7 +1297,7 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device,
rcu_assign_pointer(net_device_ctx->nvdev, net_device);
/* Connect with the NetVsp */
- ret = netvsc_connect_vsp(device, net_device);
+ ret = netvsc_connect_vsp(device, net_device, device_info);
if (ret != 0) {
netdev_err(ndev,
"unable to connect to NetVSP - %d\n", ret);
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index d5d63184f65b..a32ae02e1b6c 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -45,8 +45,14 @@
#include "hyperv_net.h"
-#define RING_SIZE_MIN 64
+#define RING_SIZE_MIN 64
+#define NETVSC_MIN_TX_SECTIONS 10
+#define NETVSC_DEFAULT_TX 192 /* ~1M */
+#define NETVSC_MIN_RX_SECTIONS 10 /* ~64K */
+#define NETVSC_DEFAULT_RX 10485 /* Max ~16M */
+
#define LINKCHANGE_INT (2 * HZ)
+#define VF_TAKEOVER_INT (HZ / 10)
static int ring_size = 128;
module_param(ring_size, int, S_IRUGO);
@@ -113,12 +119,16 @@ static int netvsc_close(struct net_device *net)
struct net_device *vf_netdev
= rtnl_dereference(net_device_ctx->vf_netdev);
struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
- int ret;
+ int ret = 0;
u32 aread, i, msec = 10, retry = 0, retry_max = 20;
struct vmbus_channel *chn;
netif_tx_disable(net);
+ /* No need to close rndis filter if it is removed already */
+ if (!nvdev)
+ goto out;
+
ret = rndis_filter_close(nvdev);
if (ret != 0) {
netdev_err(net, "unable to close device (ret %d).\n", ret);
@@ -157,6 +167,7 @@ static int netvsc_close(struct net_device *net)
ret = -ETIMEDOUT;
}
+out:
if (vf_netdev)
dev_close(vf_netdev);
@@ -164,7 +175,7 @@ static int netvsc_close(struct net_device *net)
}
static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size,
- int pkt_type)
+ int pkt_type)
{
struct rndis_packet *rndis_pkt;
struct rndis_per_packet_info *ppi;
@@ -184,10 +195,12 @@ static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size,
return ppi;
}
-/* Azure hosts don't support non-TCP port numbers in hashing yet. We compute
- * hash for non-TCP traffic with only IP numbers.
+/* Azure hosts don't support non-TCP port numbers in hashing for fragmented
+ * packets. We can use ethtool to change UDP hash level when necessary.
*/
-static inline u32 netvsc_get_hash(struct sk_buff *skb, struct sock *sk)
+static inline u32 netvsc_get_hash(
+ struct sk_buff *skb,
+ const struct net_device_context *ndc)
{
struct flow_keys flow;
u32 hash;
@@ -198,7 +211,11 @@ static inline u32 netvsc_get_hash(struct sk_buff *skb, struct sock *sk)
if (!skb_flow_dissect_flow_keys(skb, &flow, 0))
return 0;
- if (flow.basic.ip_proto == IPPROTO_TCP) {
+ if (flow.basic.ip_proto == IPPROTO_TCP ||
+ (flow.basic.ip_proto == IPPROTO_UDP &&
+ ((flow.basic.n_proto == htons(ETH_P_IP) && ndc->udp4_l4_hash) ||
+ (flow.basic.n_proto == htons(ETH_P_IPV6) &&
+ ndc->udp6_l4_hash)))) {
return skb_get_hash(skb);
} else {
if (flow.basic.n_proto == htons(ETH_P_IP))
@@ -221,7 +238,7 @@ static inline int netvsc_get_tx_queue(struct net_device *ndev,
struct sock *sk = skb->sk;
int q_idx;
- q_idx = ndc->tx_send_table[netvsc_get_hash(skb, sk) &
+ q_idx = ndc->tx_send_table[netvsc_get_hash(skb, ndc) &
(VRSS_SEND_TAB_SIZE - 1)];
/* If queue index changed record the new value */
@@ -285,7 +302,7 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
}
static u32 fill_pg_buf(struct page *page, u32 offset, u32 len,
- struct hv_page_buffer *pb)
+ struct hv_page_buffer *pb)
{
int j = 0;
@@ -332,10 +349,9 @@ static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb,
* 2. skb linear data
* 3. skb fragment data
*/
- if (hdr != NULL)
- slots_used += fill_pg_buf(virt_to_page(hdr),
- offset_in_page(hdr),
- len, &pb[slots_used]);
+ slots_used += fill_pg_buf(virt_to_page(hdr),
+ offset_in_page(hdr),
+ len, &pb[slots_used]);
packet->rmsg_size = len;
packet->rmsg_pgcnt = slots_used;
@@ -522,9 +538,9 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
rndis_msg_size += NDIS_VLAN_PPI_SIZE;
ppi = init_ppi_data(rndis_msg, NDIS_VLAN_PPI_SIZE,
- IEEE_8021Q_INFO);
- vlan = (struct ndis_pkt_8021q_info *)((void *)ppi +
- ppi->ppi_offset);
+ IEEE_8021Q_INFO);
+
+ vlan = (void *)ppi + ppi->ppi_offset;
vlan->vlanid = skb->vlan_tci & VLAN_VID_MASK;
vlan->pri = (skb->vlan_tci & VLAN_PRIO_MASK) >>
VLAN_PRIO_SHIFT;
@@ -537,8 +553,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE,
TCP_LARGESEND_PKTINFO);
- lso_info = (struct ndis_tcp_lso_info *)((void *)ppi +
- ppi->ppi_offset);
+ lso_info = (void *)ppi + ppi->ppi_offset;
lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
if (skb->protocol == htons(ETH_P_IP)) {
@@ -625,6 +640,7 @@ no_memory:
++net_device_ctx->eth_stats.tx_no_memory;
goto drop;
}
+
/*
* netvsc_linkstatus_callback - Link up/down notification
*/
@@ -648,8 +664,8 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj,
if (indicate->status == RNDIS_STATUS_LINK_SPEED_CHANGE) {
u32 speed;
- speed = *(u32 *)((void *)indicate + indicate->
- status_buf_offset) / 10000;
+ speed = *(u32 *)((void *)indicate
+ + indicate->status_buf_offset) / 10000;
ndev_ctx->speed = speed;
return;
}
@@ -814,9 +830,6 @@ static int netvsc_set_channels(struct net_device *net,
channels->rx_count || channels->tx_count || channels->other_count)
return -EINVAL;
- if (count > net->num_tx_queues || count > VRSS_CHANNEL_MAX)
- return -EINVAL;
-
if (!nvdev || nvdev->destroy)
return -ENODEV;
@@ -831,20 +844,27 @@ static int netvsc_set_channels(struct net_device *net,
if (was_opened)
rndis_filter_close(nvdev);
- rndis_filter_device_remove(dev, nvdev);
-
memset(&device_info, 0, sizeof(device_info));
device_info.num_chn = count;
device_info.ring_size = ring_size;
+ device_info.send_sections = nvdev->send_section_cnt;
+ device_info.send_section_size = nvdev->send_section_size;
+ device_info.recv_sections = nvdev->recv_section_cnt;
+ device_info.recv_section_size = nvdev->recv_section_size;
+
+ rndis_filter_device_remove(dev, nvdev);
nvdev = rndis_filter_device_add(dev, &device_info);
- if (!IS_ERR(nvdev)) {
- netif_set_real_num_tx_queues(net, nvdev->num_chn);
- netif_set_real_num_rx_queues(net, nvdev->num_chn);
- } else {
+ if (IS_ERR(nvdev)) {
ret = PTR_ERR(nvdev);
device_info.num_chn = orig;
- rndis_filter_device_add(dev, &device_info);
+ nvdev = rndis_filter_device_add(dev, &device_info);
+
+ if (IS_ERR(nvdev)) {
+ netdev_err(net, "restoring channel setting failed: %ld\n",
+ PTR_ERR(nvdev));
+ return ret;
+ }
}
if (was_opened)
@@ -878,6 +898,9 @@ static void netvsc_init_settings(struct net_device *dev)
{
struct net_device_context *ndc = netdev_priv(dev);
+ ndc->udp4_l4_hash = true;
+ ndc->udp6_l4_hash = true;
+
ndc->speed = SPEED_UNKNOWN;
ndc->duplex = DUPLEX_FULL;
}
@@ -941,6 +964,10 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
memset(&device_info, 0, sizeof(device_info));
device_info.ring_size = ring_size;
device_info.num_chn = nvdev->num_chn;
+ device_info.send_sections = nvdev->send_section_cnt;
+ device_info.send_section_size = nvdev->send_section_size;
+ device_info.recv_sections = nvdev->recv_section_cnt;
+ device_info.recv_section_size = nvdev->recv_section_size;
rndis_filter_device_remove(hdev, nvdev);
@@ -952,10 +979,16 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
/* Attempt rollback to original MTU */
ndev->mtu = orig_mtu;
- rndis_filter_device_add(hdev, &device_info);
+ nvdev = rndis_filter_device_add(hdev, &device_info);
if (vf_netdev)
dev_set_mtu(vf_netdev, orig_mtu);
+
+ if (IS_ERR(nvdev)) {
+ netdev_err(ndev, "restoring mtu failed: %ld\n",
+ PTR_ERR(nvdev));
+ return ret;
+ }
}
if (was_opened)
@@ -1005,7 +1038,7 @@ static void netvsc_get_stats64(struct net_device *net,
struct net_device_context *ndev_ctx = netdev_priv(net);
struct netvsc_device *nvdev = rcu_dereference_rtnl(ndev_ctx->nvdev);
struct netvsc_vf_pcpu_stats vf_tot;
- int i;
+ int i;
if (!nvdev)
return;
@@ -1052,27 +1085,31 @@ static void netvsc_get_stats64(struct net_device *net,
static int netvsc_set_mac_addr(struct net_device *ndev, void *p)
{
struct net_device_context *ndc = netdev_priv(ndev);
+ struct net_device *vf_netdev = rtnl_dereference(ndc->vf_netdev);
struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
struct sockaddr *addr = p;
- char save_adr[ETH_ALEN];
- unsigned char save_aatype;
int err;
- memcpy(save_adr, ndev->dev_addr, ETH_ALEN);
- save_aatype = ndev->addr_assign_type;
-
- err = eth_mac_addr(ndev, p);
- if (err != 0)
+ err = eth_prepare_mac_addr_change(ndev, p);
+ if (err)
return err;
if (!nvdev)
return -ENODEV;
+ if (vf_netdev) {
+ err = dev_set_mac_address(vf_netdev, addr);
+ if (err)
+ return err;
+ }
+
err = rndis_filter_set_device_mac(nvdev, addr->sa_data);
- if (err != 0) {
- /* roll back to saved MAC */
- memcpy(ndev->dev_addr, save_adr, ETH_ALEN);
- ndev->addr_assign_type = save_aatype;
+ if (!err) {
+ eth_commit_mac_addr_change(ndev, p);
+ } else if (vf_netdev) {
+ /* rollback change on VF */
+ memcpy(addr->sa_data, ndev->dev_addr, ETH_ALEN);
+ dev_set_mac_address(vf_netdev, addr);
}
return err;
@@ -1087,6 +1124,8 @@ static const struct {
{ "tx_no_space", offsetof(struct netvsc_ethtool_stats, tx_no_space) },
{ "tx_too_big", offsetof(struct netvsc_ethtool_stats, tx_too_big) },
{ "tx_busy", offsetof(struct netvsc_ethtool_stats, tx_busy) },
+ { "tx_send_full", offsetof(struct netvsc_ethtool_stats, tx_send_full) },
+ { "rx_comp_busy", offsetof(struct netvsc_ethtool_stats, rx_comp_busy) },
}, vf_stats[] = {
{ "vf_rx_packets", offsetof(struct netvsc_vf_pcpu_stats, rx_packets) },
{ "vf_rx_bytes", offsetof(struct netvsc_vf_pcpu_stats, rx_bytes) },
@@ -1201,7 +1240,7 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
}
static int
-netvsc_get_rss_hash_opts(struct netvsc_device *nvdev,
+netvsc_get_rss_hash_opts(struct net_device_context *ndc,
struct ethtool_rxnfc *info)
{
info->data = RXH_IP_SRC | RXH_IP_DST;
@@ -1210,9 +1249,20 @@ netvsc_get_rss_hash_opts(struct netvsc_device *nvdev,
case TCP_V4_FLOW:
case TCP_V6_FLOW:
info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
- /* fallthrough */
+ break;
+
case UDP_V4_FLOW:
+ if (ndc->udp4_l4_hash)
+ info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+
+ break;
+
case UDP_V6_FLOW:
+ if (ndc->udp6_l4_hash)
+ info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+
+ break;
+
case IPV4_FLOW:
case IPV6_FLOW:
break;
@@ -1240,11 +1290,51 @@ netvsc_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
return 0;
case ETHTOOL_GRXFH:
- return netvsc_get_rss_hash_opts(nvdev, info);
+ return netvsc_get_rss_hash_opts(ndc, info);
}
return -EOPNOTSUPP;
}
+static int netvsc_set_rss_hash_opts(struct net_device_context *ndc,
+ struct ethtool_rxnfc *info)
+{
+ if (info->data == (RXH_IP_SRC | RXH_IP_DST |
+ RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+ if (info->flow_type == UDP_V4_FLOW)
+ ndc->udp4_l4_hash = true;
+ else if (info->flow_type == UDP_V6_FLOW)
+ ndc->udp6_l4_hash = true;
+ else
+ return -EOPNOTSUPP;
+
+ return 0;
+ }
+
+ if (info->data == (RXH_IP_SRC | RXH_IP_DST)) {
+ if (info->flow_type == UDP_V4_FLOW)
+ ndc->udp4_l4_hash = false;
+ else if (info->flow_type == UDP_V6_FLOW)
+ ndc->udp6_l4_hash = false;
+ else
+ return -EOPNOTSUPP;
+
+ return 0;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static int
+netvsc_set_rxnfc(struct net_device *ndev, struct ethtool_rxnfc *info)
+{
+ struct net_device_context *ndc = netdev_priv(ndev);
+
+ if (info->cmd == ETHTOOL_SRXFH)
+ return netvsc_set_rss_hash_opts(ndc, info);
+
+ return -EOPNOTSUPP;
+}
+
#ifdef CONFIG_NET_POLL_CONTROLLER
static void netvsc_poll_controller(struct net_device *dev)
{
@@ -1318,7 +1408,7 @@ static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir,
rndis_dev = ndev->extension;
if (indir) {
for (i = 0; i < ITAB_NUM; i++)
- if (indir[i] >= VRSS_CHANNEL_MAX)
+ if (indir[i] >= ndev->num_chn)
return -EINVAL;
for (i = 0; i < ITAB_NUM; i++)
@@ -1332,7 +1422,107 @@ static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir,
key = rndis_dev->rss_key;
}
- return rndis_filter_set_rss_param(rndis_dev, key, ndev->num_chn);
+ return rndis_filter_set_rss_param(rndis_dev, key);
+}
+
+/* Hyper-V RNDIS protocol does not have ring in the HW sense.
+ * It does have pre-allocated receive area which is divided into sections.
+ */
+static void __netvsc_get_ringparam(struct netvsc_device *nvdev,
+ struct ethtool_ringparam *ring)
+{
+ u32 max_buf_size;
+
+ ring->rx_pending = nvdev->recv_section_cnt;
+ ring->tx_pending = nvdev->send_section_cnt;
+
+ if (nvdev->nvsp_version <= NVSP_PROTOCOL_VERSION_2)
+ max_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY;
+ else
+ max_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
+
+ ring->rx_max_pending = max_buf_size / nvdev->recv_section_size;
+ ring->tx_max_pending = NETVSC_SEND_BUFFER_SIZE
+ / nvdev->send_section_size;
+}
+
+static void netvsc_get_ringparam(struct net_device *ndev,
+ struct ethtool_ringparam *ring)
+{
+ struct net_device_context *ndevctx = netdev_priv(ndev);
+ struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
+
+ if (!nvdev)
+ return;
+
+ __netvsc_get_ringparam(nvdev, ring);
+}
+
+static int netvsc_set_ringparam(struct net_device *ndev,
+ struct ethtool_ringparam *ring)
+{
+ struct net_device_context *ndevctx = netdev_priv(ndev);
+ struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
+ struct hv_device *hdev = ndevctx->device_ctx;
+ struct netvsc_device_info device_info;
+ struct ethtool_ringparam orig;
+ u32 new_tx, new_rx;
+ bool was_opened;
+ int ret = 0;
+
+ if (!nvdev || nvdev->destroy)
+ return -ENODEV;
+
+ memset(&orig, 0, sizeof(orig));
+ __netvsc_get_ringparam(nvdev, &orig);
+
+ new_tx = clamp_t(u32, ring->tx_pending,
+ NETVSC_MIN_TX_SECTIONS, orig.tx_max_pending);
+ new_rx = clamp_t(u32, ring->rx_pending,
+ NETVSC_MIN_RX_SECTIONS, orig.rx_max_pending);
+
+ if (new_tx == orig.tx_pending &&
+ new_rx == orig.rx_pending)
+ return 0; /* no change */
+
+ memset(&device_info, 0, sizeof(device_info));
+ device_info.num_chn = nvdev->num_chn;
+ device_info.ring_size = ring_size;
+ device_info.send_sections = new_tx;
+ device_info.send_section_size = nvdev->send_section_size;
+ device_info.recv_sections = new_rx;
+ device_info.recv_section_size = nvdev->recv_section_size;
+
+ netif_device_detach(ndev);
+ was_opened = rndis_filter_opened(nvdev);
+ if (was_opened)
+ rndis_filter_close(nvdev);
+
+ rndis_filter_device_remove(hdev, nvdev);
+
+ nvdev = rndis_filter_device_add(hdev, &device_info);
+ if (IS_ERR(nvdev)) {
+ ret = PTR_ERR(nvdev);
+
+ device_info.send_sections = orig.tx_pending;
+ device_info.recv_sections = orig.rx_pending;
+ nvdev = rndis_filter_device_add(hdev, &device_info);
+ if (IS_ERR(nvdev)) {
+ netdev_err(ndev, "restoring ringparam failed: %ld\n",
+ PTR_ERR(nvdev));
+ return ret;
+ }
+ }
+
+ if (was_opened)
+ rndis_filter_open(nvdev);
+ netif_device_attach(ndev);
+
+ /* We may have missed link change notifications */
+ ndevctx->last_reconfig = 0;
+ schedule_delayed_work(&ndevctx->dwork, 0);
+
+ return ret;
}
static const struct ethtool_ops ethtool_ops = {
@@ -1345,12 +1535,15 @@ static const struct ethtool_ops ethtool_ops = {
.set_channels = netvsc_set_channels,
.get_ts_info = ethtool_op_get_ts_info,
.get_rxnfc = netvsc_get_rxnfc,
+ .set_rxnfc = netvsc_set_rxnfc,
.get_rxfh_key_size = netvsc_get_rxfh_key_size,
.get_rxfh_indir_size = netvsc_rss_indir_size,
.get_rxfh = netvsc_get_rxfh,
.set_rxfh = netvsc_set_rxfh,
.get_link_ksettings = netvsc_get_link_ksettings,
.set_link_ksettings = netvsc_set_link_ksettings,
+ .get_ringparam = netvsc_get_ringparam,
+ .set_ringparam = netvsc_set_ringparam,
};
static const struct net_device_ops device_ops = {
@@ -1564,7 +1757,9 @@ static int netvsc_vf_join(struct net_device *vf_netdev,
/* set slave flag before open to prevent IPv6 addrconf */
vf_netdev->flags |= IFF_SLAVE;
- schedule_work(&ndev_ctx->vf_takeover);
+ schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT);
+
+ call_netdevice_notifiers(NETDEV_JOIN, vf_netdev);
netdev_info(vf_netdev, "joined to %s\n", ndev->name);
return 0;
@@ -1580,8 +1775,6 @@ static void __netvsc_vf_setup(struct net_device *ndev,
{
int ret;
- call_netdevice_notifiers(NETDEV_JOIN, vf_netdev);
-
/* Align MTU of VF with master */
ret = dev_set_mtu(vf_netdev, ndev->mtu);
if (ret)
@@ -1602,12 +1795,12 @@ static void __netvsc_vf_setup(struct net_device *ndev,
static void netvsc_vf_setup(struct work_struct *w)
{
struct net_device_context *ndev_ctx
- = container_of(w, struct net_device_context, vf_takeover);
+ = container_of(w, struct net_device_context, vf_takeover.work);
struct net_device *ndev = hv_get_drvdata(ndev_ctx->device_ctx);
struct net_device *vf_netdev;
if (!rtnl_trylock()) {
- schedule_work(w);
+ schedule_delayed_work(&ndev_ctx->vf_takeover, 0);
return;
}
@@ -1646,44 +1839,18 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
netdev_info(ndev, "VF registering: %s\n", vf_netdev->name);
- /* Prevent this module from being unloaded while VF is registered */
- try_module_get(THIS_MODULE);
-
dev_hold(vf_netdev);
rcu_assign_pointer(net_device_ctx->vf_netdev, vf_netdev);
return NOTIFY_OK;
}
-static int netvsc_vf_up(struct net_device *vf_netdev)
-{
- struct net_device_context *net_device_ctx;
- struct netvsc_device *netvsc_dev;
- struct net_device *ndev;
-
- ndev = get_netvsc_byref(vf_netdev);
- if (!ndev)
- return NOTIFY_DONE;
-
- net_device_ctx = netdev_priv(ndev);
- netvsc_dev = rtnl_dereference(net_device_ctx->nvdev);
- if (!netvsc_dev)
- return NOTIFY_DONE;
-
- /* Bump refcount when datapath is acvive - Why? */
- rndis_filter_open(netvsc_dev);
-
- /* notify the host to switch the data path. */
- netvsc_switch_datapath(ndev, true);
- netdev_info(ndev, "Data path switched to VF: %s\n", vf_netdev->name);
-
- return NOTIFY_OK;
-}
-
-static int netvsc_vf_down(struct net_device *vf_netdev)
+/* VF up/down change detected, schedule to change data path */
+static int netvsc_vf_changed(struct net_device *vf_netdev)
{
struct net_device_context *net_device_ctx;
struct netvsc_device *netvsc_dev;
struct net_device *ndev;
+ bool vf_is_up = netif_running(vf_netdev);
ndev = get_netvsc_byref(vf_netdev);
if (!ndev)
@@ -1694,9 +1861,9 @@ static int netvsc_vf_down(struct net_device *vf_netdev)
if (!netvsc_dev)
return NOTIFY_DONE;
- netvsc_switch_datapath(ndev, false);
- netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name);
- rndis_filter_close(netvsc_dev);
+ netvsc_switch_datapath(ndev, vf_is_up);
+ netdev_info(ndev, "Data path switched %s VF: %s\n",
+ vf_is_up ? "to" : "from", vf_netdev->name);
return NOTIFY_OK;
}
@@ -1711,14 +1878,15 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev)
return NOTIFY_DONE;
net_device_ctx = netdev_priv(ndev);
- cancel_work_sync(&net_device_ctx->vf_takeover);
+ cancel_delayed_work_sync(&net_device_ctx->vf_takeover);
netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
+ netdev_rx_handler_unregister(vf_netdev);
netdev_upper_dev_unlink(vf_netdev, ndev);
RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL);
dev_put(vf_netdev);
- module_put(THIS_MODULE);
+
return NOTIFY_OK;
}
@@ -1753,7 +1921,7 @@ static int netvsc_probe(struct hv_device *dev,
spin_lock_init(&net_device_ctx->lock);
INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
- INIT_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup);
+ INIT_DELAYED_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup);
net_device_ctx->vf_stats
= netdev_alloc_pcpu_stats(struct netvsc_vf_pcpu_stats);
@@ -1771,6 +1939,10 @@ static int netvsc_probe(struct hv_device *dev,
memset(&device_info, 0, sizeof(device_info));
device_info.ring_size = ring_size;
device_info.num_chn = VRSS_CHANNEL_DEFAULT;
+ device_info.send_sections = NETVSC_DEFAULT_TX;
+ device_info.send_section_size = NETVSC_SEND_SECTION_SIZE;
+ device_info.recv_sections = NETVSC_DEFAULT_RX;
+ device_info.recv_section_size = NETVSC_RECV_SECTION_SIZE;
nvdev = rndis_filter_device_add(dev, &device_info);
if (IS_ERR(nvdev)) {
@@ -1787,9 +1959,6 @@ static int netvsc_probe(struct hv_device *dev,
NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
net->vlan_features = net->features;
- netif_set_real_num_tx_queues(net, nvdev->num_chn);
- netif_set_real_num_rx_queues(net, nvdev->num_chn);
-
netdev_lockdep_set_classes(net);
/* MTU range: 68 - 1500 or 65521 */
@@ -1820,11 +1989,11 @@ no_net:
static int netvsc_remove(struct hv_device *dev)
{
- struct net_device *net;
struct net_device_context *ndev_ctx;
+ struct net_device *vf_netdev;
+ struct net_device *net;
net = hv_get_drvdata(dev);
-
if (net == NULL) {
dev_err(&dev->device, "No net device to remove\n");
return 0;
@@ -1841,12 +2010,16 @@ static int netvsc_remove(struct hv_device *dev)
* removed. Also blocks mtu and channel changes.
*/
rtnl_lock();
+ vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev);
+ if (vf_netdev)
+ netvsc_unregister_vf(vf_netdev);
+
+ unregister_netdevice(net);
+
rndis_filter_device_remove(dev,
rtnl_dereference(ndev_ctx->nvdev));
rtnl_unlock();
- unregister_netdev(net);
-
hv_set_drvdata(dev, NULL);
free_percpu(ndev_ctx->vf_stats);
@@ -1904,9 +2077,8 @@ static int netvsc_netdev_event(struct notifier_block *this,
case NETDEV_UNREGISTER:
return netvsc_unregister_vf(event_dev);
case NETDEV_UP:
- return netvsc_vf_up(event_dev);
case NETDEV_DOWN:
- return netvsc_vf_down(event_dev);
+ return netvsc_vf_changed(event_dev);
default:
return NOTIFY_DONE;
}
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 44165fe328a4..065b204d8e17 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -717,7 +717,7 @@ cleanup:
}
int rndis_filter_set_rss_param(struct rndis_device *rdev,
- const u8 *rss_key, int num_queue)
+ const u8 *rss_key)
{
struct net_device *ndev = rdev->ndev;
struct rndis_request *request;
@@ -1039,8 +1039,6 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
/* Set the channel before opening.*/
nvchan->channel = new_sc;
- netif_napi_add(ndev, &nvchan->napi,
- netvsc_poll, NAPI_POLL_WEIGHT);
ret = vmbus_open(new_sc, nvscdev->ring_size * PAGE_SIZE,
nvscdev->ring_size * PAGE_SIZE, NULL, 0,
@@ -1048,10 +1046,86 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
if (ret == 0)
napi_enable(&nvchan->napi);
else
- netif_napi_del(&nvchan->napi);
+ netdev_notice(ndev, "sub channel open failed: %d\n", ret);
- if (refcount_dec_and_test(&nvscdev->sc_offered))
- complete(&nvscdev->channel_init_wait);
+ if (atomic_inc_return(&nvscdev->open_chn) == nvscdev->num_chn)
+ wake_up(&nvscdev->subchan_open);
+}
+
+/* Open sub-channels after completing the handling of the device probe.
+ * This breaks overlap of processing the host message for the
+ * new primary channel with the initialization of sub-channels.
+ */
+void rndis_set_subchannel(struct work_struct *w)
+{
+ struct netvsc_device *nvdev
+ = container_of(w, struct netvsc_device, subchan_work);
+ struct nvsp_message *init_packet = &nvdev->channel_init_pkt;
+ struct net_device_context *ndev_ctx;
+ struct rndis_device *rdev;
+ struct net_device *ndev;
+ struct hv_device *hv_dev;
+ int i, ret;
+
+ if (!rtnl_trylock()) {
+ schedule_work(w);
+ return;
+ }
+
+ rdev = nvdev->extension;
+ if (!rdev)
+ goto unlock; /* device was removed */
+
+ ndev = rdev->ndev;
+ ndev_ctx = netdev_priv(ndev);
+ hv_dev = ndev_ctx->device_ctx;
+
+ memset(init_packet, 0, sizeof(struct nvsp_message));
+ init_packet->hdr.msg_type = NVSP_MSG5_TYPE_SUBCHANNEL;
+ init_packet->msg.v5_msg.subchn_req.op = NVSP_SUBCHANNEL_ALLOCATE;
+ init_packet->msg.v5_msg.subchn_req.num_subchannels =
+ nvdev->num_chn - 1;
+ ret = vmbus_sendpacket(hv_dev->channel, init_packet,
+ sizeof(struct nvsp_message),
+ (unsigned long)init_packet,
+ VM_PKT_DATA_INBAND,
+ VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ if (ret) {
+ netdev_err(ndev, "sub channel allocate send failed: %d\n", ret);
+ goto failed;
+ }
+
+ wait_for_completion(&nvdev->channel_init_wait);
+ if (init_packet->msg.v5_msg.subchn_comp.status != NVSP_STAT_SUCCESS) {
+ netdev_err(ndev, "sub channel request failed\n");
+ goto failed;
+ }
+
+ nvdev->num_chn = 1 +
+ init_packet->msg.v5_msg.subchn_comp.num_subchannels;
+
+ /* wait for all sub channels to open */
+ wait_event(nvdev->subchan_open,
+ atomic_read(&nvdev->open_chn) == nvdev->num_chn);
+
+ /* ignore failues from setting rss parameters, still have channels */
+ rndis_filter_set_rss_param(rdev, netvsc_hash_key);
+
+ netif_set_real_num_tx_queues(ndev, nvdev->num_chn);
+ netif_set_real_num_rx_queues(ndev, nvdev->num_chn);
+
+ rtnl_unlock();
+ return;
+
+failed:
+ /* fallback to only primary channel */
+ for (i = 1; i < nvdev->num_chn; i++)
+ netif_napi_del(&nvdev->chan_table[i].napi);
+
+ nvdev->max_chn = 1;
+ nvdev->num_chn = 1;
+unlock:
+ rtnl_unlock();
}
struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
@@ -1063,11 +1137,10 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
struct rndis_device *rndis_device;
struct ndis_offload hwcaps;
struct ndis_offload_params offloads;
- struct nvsp_message *init_packet;
struct ndis_recv_scale_cap rsscap;
u32 rsscap_size = sizeof(struct ndis_recv_scale_cap);
unsigned int gso_max_size = GSO_MAX_SIZE;
- u32 mtu, size, num_rss_qs;
+ u32 mtu, size;
const struct cpumask *node_cpu_mask;
u32 num_possible_rss_qs;
int i, ret;
@@ -1091,8 +1164,6 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
net_device->max_chn = 1;
net_device->num_chn = 1;
- refcount_set(&net_device->sc_offered, 0);
-
net_device->extension = rndis_device;
rndis_device->ndev = net;
@@ -1216,9 +1287,8 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
rndis_device->ind_table[i] = ethtool_rxfh_indir_default(i,
net_device->num_chn);
- num_rss_qs = net_device->num_chn - 1;
- if (num_rss_qs == 0)
- return net_device;
+ atomic_set(&net_device->open_chn, 1);
+ vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open);
for (i = 1; i < net_device->num_chn; i++) {
ret = netvsc_alloc_recv_comp_ring(net_device, i);
@@ -1229,36 +1299,15 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
}
}
- refcount_set(&net_device->sc_offered, num_rss_qs);
- vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open);
-
- init_packet = &net_device->channel_init_pkt;
- memset(init_packet, 0, sizeof(struct nvsp_message));
- init_packet->hdr.msg_type = NVSP_MSG5_TYPE_SUBCHANNEL;
- init_packet->msg.v5_msg.subchn_req.op = NVSP_SUBCHANNEL_ALLOCATE;
- init_packet->msg.v5_msg.subchn_req.num_subchannels =
- net_device->num_chn - 1;
- ret = vmbus_sendpacket(dev->channel, init_packet,
- sizeof(struct nvsp_message),
- (unsigned long)init_packet,
- VM_PKT_DATA_INBAND,
- VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
- if (ret)
- goto out;
-
- if (init_packet->msg.v5_msg.subchn_comp.status != NVSP_STAT_SUCCESS) {
- ret = -ENODEV;
- goto out;
- }
- wait_for_completion(&net_device->channel_init_wait);
+ for (i = 1; i < net_device->num_chn; i++)
+ netif_napi_add(net, &net_device->chan_table[i].napi,
+ netvsc_poll, NAPI_POLL_WEIGHT);
- net_device->num_chn = 1 +
- init_packet->msg.v5_msg.subchn_comp.num_subchannels;
+ if (net_device->num_chn > 1)
+ schedule_work(&net_device->subchan_work);
- /* ignore failues from setting rss parameters, still have channels */
- rndis_filter_set_rss_param(rndis_device, netvsc_hash_key,
- net_device->num_chn);
out:
+ /* if unavailable, just proceed with one queue */
if (ret) {
net_device->max_chn = 1;
net_device->num_chn = 1;
@@ -1279,10 +1328,10 @@ void rndis_filter_device_remove(struct hv_device *dev,
/* Halt and release the rndis device */
rndis_filter_halt_device(rndis_dev);
- kfree(rndis_dev);
net_dev->extension = NULL;
netvsc_device_remove(dev);
+ kfree(rndis_dev);
}
int rndis_filter_open(struct netvsc_device *nvdev)
diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index 415dcc69a502..0fe3ea164ee5 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -50,6 +50,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/pci.h>
+#include <linux/delay.h>
#include <linux/semaphore.h>
#include <linux/irqdomain.h>
#include <asm/irqdomain.h>
@@ -562,52 +563,6 @@ static void put_pcichild(struct hv_pci_dev *hv_pcidev,
static void get_hvpcibus(struct hv_pcibus_device *hv_pcibus);
static void put_hvpcibus(struct hv_pcibus_device *hv_pcibus);
-
-/*
- * Temporary CPU to vCPU mapping to address transitioning
- * vmbus_cpu_number_to_vp_number() being migrated to
- * hv_cpu_number_to_vp_number() in a separate patch. Once that patch
- * has been picked up in the main line, remove this code here and use
- * the official code.
- */
-static struct hv_tmpcpumap
-{
- bool initialized;
- u32 vp_index[NR_CPUS];
-} hv_tmpcpumap;
-
-static void hv_tmpcpumap_init_cpu(void *_unused)
-{
- int cpu = smp_processor_id();
- u64 vp_index;
-
- hv_get_vp_index(vp_index);
-
- hv_tmpcpumap.vp_index[cpu] = vp_index;
-}
-
-static void hv_tmpcpumap_init(void)
-{
- if (hv_tmpcpumap.initialized)
- return;
-
- memset(hv_tmpcpumap.vp_index, -1, sizeof(hv_tmpcpumap.vp_index));
- on_each_cpu(hv_tmpcpumap_init_cpu, NULL, true);
- hv_tmpcpumap.initialized = true;
-}
-
-/**
- * hv_tmp_cpu_nr_to_vp_nr() - Convert Linux CPU nr to Hyper-V vCPU nr
- *
- * Remove once vmbus_cpu_number_to_vp_number() has been converted to
- * hv_cpu_number_to_vp_number() and replace callers appropriately.
- */
-static u32 hv_tmp_cpu_nr_to_vp_nr(int cpu)
-{
- return hv_tmpcpumap.vp_index[cpu];
-}
-
-
/**
* devfn_to_wslot() - Convert from Linux PCI slot to Windows
* @devfn: The Linux representation of PCI slot
@@ -971,7 +926,7 @@ static void hv_irq_unmask(struct irq_data *data)
var_size = 1 + HV_VP_SET_BANK_COUNT_MAX;
for_each_cpu_and(cpu, dest, cpu_online_mask) {
- cpu_vmbus = hv_tmp_cpu_nr_to_vp_nr(cpu);
+ cpu_vmbus = hv_cpu_number_to_vp_number(cpu);
if (cpu_vmbus >= HV_VP_SET_BANK_COUNT_MAX * 64) {
dev_err(&hbus->hdev->device,
@@ -986,7 +941,7 @@ static void hv_irq_unmask(struct irq_data *data)
} else {
for_each_cpu_and(cpu, dest, cpu_online_mask) {
params->int_target.vp_mask |=
- (1ULL << hv_tmp_cpu_nr_to_vp_nr(cpu));
+ (1ULL << hv_cpu_number_to_vp_number(cpu));
}
}
@@ -1063,7 +1018,7 @@ static u32 hv_compose_msi_req_v2(
*/
cpu = cpumask_first_and(affinity, cpu_online_mask);
int_pkt->int_desc.processor_array[0] =
- hv_tmp_cpu_nr_to_vp_nr(cpu);
+ hv_cpu_number_to_vp_number(cpu);
int_pkt->int_desc.processor_count = 1;
return sizeof(*int_pkt);
@@ -1159,7 +1114,12 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
goto free_int_desc;
}
- wait_for_completion(&comp.comp_pkt.host_event);
+ /*
+ * Since this function is called with IRQ locks held, can't
+ * do normal wait for completion; instead poll.
+ */
+ while (!try_wait_for_completion(&comp.comp_pkt.host_event))
+ udelay(100);
if (comp.comp_pkt.completion_status < 0) {
dev_err(&hbus->hdev->device,
@@ -2490,8 +2450,6 @@ static int hv_pci_probe(struct hv_device *hdev,
return -ENOMEM;
hbus->state = hv_pcibus_init;
- hv_tmpcpumap_init();
-
/*
* The PCI bus "domain" is what is called "segment" in ACPI and
* other specs. Pull it from the instance ID, to get something
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 3cc8d67783a1..5e7200f05873 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -1640,6 +1640,8 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
put_cpu();
if (ret == -EAGAIN) {
+ if (payload_sz > sizeof(cmd_request->mpb))
+ kfree(payload);
/* no more space */
return SCSI_MLQUEUE_DEVICE_BUSY;
}
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index b7d7bbec74e0..6431087816ba 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -124,10 +124,7 @@ struct hv_ring_buffer_info {
spinlock_t ring_lock;
u32 ring_datasize; /* < ring_size */
- u32 ring_data_startoffset;
- u32 priv_write_index;
u32 priv_read_index;
- u32 cached_read_index;
};
/*
@@ -180,19 +177,6 @@ static inline u32 hv_get_bytes_to_write(const struct hv_ring_buffer_info *rbi)
return write;
}
-static inline u32 hv_get_cached_bytes_to_write(
- const struct hv_ring_buffer_info *rbi)
-{
- u32 read_loc, write_loc, dsize, write;
-
- dsize = rbi->ring_datasize;
- read_loc = rbi->cached_read_index;
- write_loc = rbi->ring_buffer->write_index;
-
- write = write_loc >= read_loc ? dsize - (write_loc - read_loc) :
- read_loc - write_loc;
- return write;
-}
/*
* VMBUS version is 32 bit entity broken up into
* two 16 bit quantities: major_number. minor_number.
@@ -677,18 +661,6 @@ union hv_connection_id {
} u;
};
-/* Definition of the hv_signal_event hypercall input structure. */
-struct hv_input_signal_event {
- union hv_connection_id connectionid;
- u16 flag_number;
- u16 rsvdz;
-};
-
-struct hv_input_signal_event_buffer {
- u64 align8;
- struct hv_input_signal_event event;
-};
-
enum hv_numa_policy {
HV_BALANCED = 0,
HV_LOCALIZED,
@@ -770,8 +742,7 @@ struct vmbus_channel {
} callback_mode;
bool is_dedicated_interrupt;
- struct hv_input_signal_event_buffer sig_buf;
- struct hv_input_signal_event *sig_event;
+ u64 sig_event;
/*
* Starting with win8, this field will be used to specify
@@ -895,6 +866,8 @@ struct vmbus_channel {
*/
enum hv_numa_policy affinity_policy;
+ bool probe_done;
+
};
static inline bool is_hvsock_channel(const struct vmbus_channel *c)
@@ -1030,13 +1003,6 @@ extern int vmbus_sendpacket(struct vmbus_channel *channel,
enum vmbus_packet_type type,
u32 flags);
-extern int vmbus_sendpacket_ctl(struct vmbus_channel *channel,
- void *buffer,
- u32 bufferLen,
- u64 requestid,
- enum vmbus_packet_type type,
- u32 flags);
-
extern int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel,
struct hv_page_buffer pagebuffers[],
u32 pagecount,
@@ -1044,20 +1010,6 @@ extern int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel,
u32 bufferlen,
u64 requestid);
-extern int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel,
- struct hv_page_buffer pagebuffers[],
- u32 pagecount,
- void *buffer,
- u32 bufferlen,
- u64 requestid,
- u32 flags);
-
-extern int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel,
- struct hv_multipage_buffer *mpb,
- void *buffer,
- u32 bufferlen,
- u64 requestid);
-
extern int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
struct vmbus_packet_mpb_array *mpb,
u32 desc_size,
@@ -1186,8 +1138,6 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj,
resource_size_t size, resource_size_t align,
bool fb_overlap_ok);
void vmbus_free_mmio(resource_size_t start, resource_size_t size);
-int vmbus_cpu_number_to_vp_number(int cpu_number);
-u64 hv_do_hypercall(u64 control, void *input, void *output);
/*
* GUID definitions of various offer types - services offered to the guest.
@@ -1453,7 +1403,7 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf,
const int *srv_version, int srv_vercnt,
int *nego_fw_version, int *nego_srv_version);
-void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid);
+void hv_process_channel_removal(u32 relid);
void vmbus_setevent(struct vmbus_channel *channel);
/*
@@ -1474,55 +1424,6 @@ hv_get_ring_buffer(const struct hv_ring_buffer_info *ring_info)
}
/*
- * To optimize the flow management on the send-side,
- * when the sender is blocked because of lack of
- * sufficient space in the ring buffer, potential the
- * consumer of the ring buffer can signal the producer.
- * This is controlled by the following parameters:
- *
- * 1. pending_send_sz: This is the size in bytes that the
- * producer is trying to send.
- * 2. The feature bit feat_pending_send_sz set to indicate if
- * the consumer of the ring will signal when the ring
- * state transitions from being full to a state where
- * there is room for the producer to send the pending packet.
- */
-
-static inline void hv_signal_on_read(struct vmbus_channel *channel)
-{
- u32 cur_write_sz, cached_write_sz;
- u32 pending_sz;
- struct hv_ring_buffer_info *rbi = &channel->inbound;
-
- /*
- * Issue a full memory barrier before making the signaling decision.
- * Here is the reason for having this barrier:
- * If the reading of the pend_sz (in this function)
- * were to be reordered and read before we commit the new read
- * index (in the calling function) we could
- * have a problem. If the host were to set the pending_sz after we
- * have sampled pending_sz and go to sleep before we commit the
- * read index, we could miss sending the interrupt. Issue a full
- * memory barrier to address this.
- */
- virt_mb();
-
- pending_sz = READ_ONCE(rbi->ring_buffer->pending_send_sz);
- /* If the other end is not blocked on write don't bother. */
- if (pending_sz == 0)
- return;
-
- cur_write_sz = hv_get_bytes_to_write(rbi);
-
- if (cur_write_sz < pending_sz)
- return;
-
- cached_write_sz = hv_get_cached_bytes_to_write(rbi);
- if (cached_write_sz < pending_sz)
- vmbus_setevent(channel);
-}
-
-/*
* Mask off host interrupt callback notifications
*/
static inline void hv_begin_read(struct hv_ring_buffer_info *rbi)
diff --git a/tools/hv/bondvf.sh b/tools/hv/bondvf.sh
deleted file mode 100755
index 89b25068cd98..000000000000
--- a/tools/hv/bondvf.sh
+++ /dev/null
@@ -1,232 +0,0 @@
-#!/bin/bash
-
-# This example script creates bonding network devices based on synthetic NIC
-# (the virtual network adapter usually provided by Hyper-V) and the matching
-# VF NIC (SRIOV virtual function). So the synthetic NIC and VF NIC can
-# function as one network device, and fail over to the synthetic NIC if VF is
-# down.
-#
-# Usage:
-# - After configured vSwitch and vNIC with SRIOV, start Linux virtual
-# machine (VM)
-# - Run this scripts on the VM. It will create configuration files in
-# distro specific directory.
-# - Reboot the VM, so that the bonding config are enabled.
-#
-# The config files are DHCP by default. You may edit them if you need to change
-# to Static IP or change other settings.
-#
-
-sysdir=/sys/class/net
-netvsc_cls={f8615163-df3e-46c5-913f-f2d2f965ed0e}
-bondcnt=0
-
-# Detect Distro
-if [ -f /etc/redhat-release ];
-then
- cfgdir=/etc/sysconfig/network-scripts
- distro=redhat
-elif grep -q 'Ubuntu' /etc/issue
-then
- cfgdir=/etc/network
- distro=ubuntu
-elif grep -q 'SUSE' /etc/issue
-then
- cfgdir=/etc/sysconfig/network
- distro=suse
-else
- echo "Unsupported Distro"
- exit 1
-fi
-
-echo Detected Distro: $distro, or compatible
-
-# Get a list of ethernet names
-list_eth=(`cd $sysdir && ls -d */ | cut -d/ -f1 | grep -v bond`)
-eth_cnt=${#list_eth[@]}
-
-echo List of net devices:
-
-# Get the MAC addresses
-for (( i=0; i < $eth_cnt; i++ ))
-do
- list_mac[$i]=`cat $sysdir/${list_eth[$i]}/address`
- echo ${list_eth[$i]}, ${list_mac[$i]}
-done
-
-# Find NIC with matching MAC
-for (( i=0; i < $eth_cnt-1; i++ ))
-do
- for (( j=i+1; j < $eth_cnt; j++ ))
- do
- if [ "${list_mac[$i]}" = "${list_mac[$j]}" ]
- then
- list_match[$i]=${list_eth[$j]}
- break
- fi
- done
-done
-
-function create_eth_cfg_redhat {
- local fn=$cfgdir/ifcfg-$1
-
- rm -f $fn
- echo DEVICE=$1 >>$fn
- echo TYPE=Ethernet >>$fn
- echo BOOTPROTO=none >>$fn
- echo UUID=`uuidgen` >>$fn
- echo ONBOOT=yes >>$fn
- echo PEERDNS=yes >>$fn
- echo IPV6INIT=yes >>$fn
- echo MASTER=$2 >>$fn
- echo SLAVE=yes >>$fn
-}
-
-function create_eth_cfg_pri_redhat {
- create_eth_cfg_redhat $1 $2
-}
-
-function create_bond_cfg_redhat {
- local fn=$cfgdir/ifcfg-$1
-
- rm -f $fn
- echo DEVICE=$1 >>$fn
- echo TYPE=Bond >>$fn
- echo BOOTPROTO=dhcp >>$fn
- echo UUID=`uuidgen` >>$fn
- echo ONBOOT=yes >>$fn
- echo PEERDNS=yes >>$fn
- echo IPV6INIT=yes >>$fn
- echo BONDING_MASTER=yes >>$fn
- echo BONDING_OPTS=\"mode=active-backup miimon=100 primary=$2\" >>$fn
-}
-
-function del_eth_cfg_ubuntu {
- local mainfn=$cfgdir/interfaces
- local fnlist=( $mainfn )
-
- local dirlist=(`awk '/^[ \t]*source/{print $2}' $mainfn`)
-
- local i
- for i in "${dirlist[@]}"
- do
- fnlist+=(`ls $i 2>/dev/null`)
- done
-
- local tmpfl=$(mktemp)
-
- local nic_start='^[ \t]*(auto|iface|mapping|allow-.*)[ \t]+'$1
- local nic_end='^[ \t]*(auto|iface|mapping|allow-.*|source)'
-
- local fn
- for fn in "${fnlist[@]}"
- do
- awk "/$nic_end/{x=0} x{next} /$nic_start/{x=1;next} 1" \
- $fn >$tmpfl
-
- cp $tmpfl $fn
- done
-
- rm $tmpfl
-}
-
-function create_eth_cfg_ubuntu {
- local fn=$cfgdir/interfaces
-
- del_eth_cfg_ubuntu $1
- echo $'\n'auto $1 >>$fn
- echo iface $1 inet manual >>$fn
- echo bond-master $2 >>$fn
-}
-
-function create_eth_cfg_pri_ubuntu {
- local fn=$cfgdir/interfaces
-
- del_eth_cfg_ubuntu $1
- echo $'\n'allow-hotplug $1 >>$fn
- echo iface $1 inet manual >>$fn
- echo bond-master $2 >>$fn
- echo bond-primary $1 >>$fn
-}
-
-function create_bond_cfg_ubuntu {
- local fn=$cfgdir/interfaces
-
- del_eth_cfg_ubuntu $1
-
- echo $'\n'auto $1 >>$fn
- echo iface $1 inet dhcp >>$fn
- echo bond-mode active-backup >>$fn
- echo bond-miimon 100 >>$fn
- echo bond-slaves none >>$fn
-}
-
-function create_eth_cfg_suse {
- local fn=$cfgdir/ifcfg-$1
-
- rm -f $fn
- echo BOOTPROTO=none >>$fn
- echo STARTMODE=auto >>$fn
-}
-
-function create_eth_cfg_pri_suse {
- local fn=$cfgdir/ifcfg-$1
-
- rm -f $fn
- echo BOOTPROTO=none >>$fn
- echo STARTMODE=hotplug >>$fn
-}
-
-function create_bond_cfg_suse {
- local fn=$cfgdir/ifcfg-$1
-
- rm -f $fn
- echo BOOTPROTO=dhcp >>$fn
- echo STARTMODE=auto >>$fn
- echo BONDING_MASTER=yes >>$fn
- echo BONDING_SLAVE_0=$2 >>$fn
- echo BONDING_SLAVE_1=$3 >>$fn
- echo BONDING_MODULE_OPTS=\'mode=active-backup miimon=100 primary=$2\' >>$fn
-}
-
-function create_bond {
- local bondname=bond$bondcnt
- local primary
- local secondary
-
- local class_id1=`cat $sysdir/$1/device/class_id 2>/dev/null`
- local class_id2=`cat $sysdir/$2/device/class_id 2>/dev/null`
-
- if [ "$class_id1" = "$netvsc_cls" ]
- then
- primary=$2
- secondary=$1
- elif [ "$class_id2" = "$netvsc_cls" ]
- then
- primary=$1
- secondary=$2
- else
- return 0
- fi
-
- echo $'\nBond name:' $bondname
-
- echo configuring $primary
- create_eth_cfg_pri_$distro $primary $bondname
-
- echo configuring $secondary
- create_eth_cfg_$distro $secondary $bondname
-
- echo creating: $bondname with primary slave: $primary
- create_bond_cfg_$distro $bondname $primary $secondary
-
- let bondcnt=bondcnt+1
-}
-
-for (( i=0; i < $eth_cnt-1; i++ ))
-do
- if [ -n "${list_match[$i]}" ]
- then
- create_bond ${list_eth[$i]} ${list_match[$i]}
- fi
-done
diff --git a/tools/hv/hv_fcopy_daemon.c b/tools/hv/hv_fcopy_daemon.c
index 26ae609a9448..457a1521f32f 100644
--- a/tools/hv/hv_fcopy_daemon.c
+++ b/tools/hv/hv_fcopy_daemon.c
@@ -138,14 +138,17 @@ void print_usage(char *argv[])
int main(int argc, char *argv[])
{
- int fcopy_fd, len;
+ int fcopy_fd;
int error;
int daemonize = 1, long_index = 0, opt;
int version = FCOPY_CURRENT_VERSION;
- char *buffer[4096 * 2];
- struct hv_fcopy_hdr *in_msg;
+ union {
+ struct hv_fcopy_hdr hdr;
+ struct hv_start_fcopy start;
+ struct hv_do_fcopy copy;
+ __u32 kernel_modver;
+ } buffer = { };
int in_handshake = 1;
- __u32 kernel_modver;
static struct option long_options[] = {
{"help", no_argument, 0, 'h' },
@@ -195,32 +198,31 @@ int main(int argc, char *argv[])
* In this loop we process fcopy messages after the
* handshake is complete.
*/
- len = pread(fcopy_fd, buffer, (4096 * 2), 0);
+ ssize_t len;
+
+ len = pread(fcopy_fd, &buffer, sizeof(buffer), 0);
if (len < 0) {
syslog(LOG_ERR, "pread failed: %s", strerror(errno));
exit(EXIT_FAILURE);
}
if (in_handshake) {
- if (len != sizeof(kernel_modver)) {
+ if (len != sizeof(buffer.kernel_modver)) {
syslog(LOG_ERR, "invalid version negotiation");
exit(EXIT_FAILURE);
}
- kernel_modver = *(__u32 *)buffer;
in_handshake = 0;
- syslog(LOG_INFO, "kernel module version: %d",
- kernel_modver);
+ syslog(LOG_INFO, "kernel module version: %u",
+ buffer.kernel_modver);
continue;
}
- in_msg = (struct hv_fcopy_hdr *)buffer;
-
- switch (in_msg->operation) {
+ switch (buffer.hdr.operation) {
case START_FILE_COPY:
- error = hv_start_fcopy((struct hv_start_fcopy *)in_msg);
+ error = hv_start_fcopy(&buffer.start);
break;
case WRITE_TO_FILE:
- error = hv_copy_data((struct hv_do_fcopy *)in_msg);
+ error = hv_copy_data(&buffer.copy);
break;
case COMPLETE_FCOPY:
error = hv_copy_finished();
@@ -231,7 +233,7 @@ int main(int argc, char *argv[])
default:
syslog(LOG_ERR, "Unknown operation: %d",
- in_msg->operation);
+ buffer.hdr.operation);
}
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index 88b20e007c05..eaa3bec273c8 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -1136,7 +1136,7 @@ static int process_ip_string(FILE *f, char *ip_string, int type)
int i = 0;
int j = 0;
char str[256];
- char sub_str[10];
+ char sub_str[13];
int offset = 0;
memset(addr, 0, sizeof(addr));
diff --git a/tools/hv/hv_vss_daemon.c b/tools/hv/hv_vss_daemon.c
index 7ba54195934c..b2b4ebffab8c 100644
--- a/tools/hv/hv_vss_daemon.c
+++ b/tools/hv/hv_vss_daemon.c
@@ -21,6 +21,7 @@
#include <sys/types.h>
#include <sys/poll.h>
#include <sys/ioctl.h>
+#include <sys/stat.h>
#include <fcntl.h>
#include <stdio.h>
#include <mntent.h>
@@ -30,6 +31,7 @@
#include <ctype.h>
#include <errno.h>
#include <linux/fs.h>
+#include <linux/major.h>
#include <linux/hyperv.h>
#include <syslog.h>
#include <getopt.h>
@@ -70,6 +72,7 @@ static int vss_operate(int operation)
char match[] = "/dev/";
FILE *mounts;
struct mntent *ent;
+ struct stat sb;
char errdir[1024] = {0};
unsigned int cmd;
int error = 0, root_seen = 0, save_errno = 0;
@@ -92,6 +95,10 @@ static int vss_operate(int operation)
while ((ent = getmntent(mounts))) {
if (strncmp(ent->mnt_fsname, match, strlen(match)))
continue;
+ if (stat(ent->mnt_fsname, &sb) == -1)
+ continue;
+ if (S_ISBLK(sb.st_mode) && major(sb.st_rdev) == LOOP_MAJOR)
+ continue;
if (hasmntopt(ent, MNTOPT_RO) != NULL)
continue;
if (strcmp(ent->mnt_type, "vfat") == 0)