Home Home > GIT Browse > openSUSE-15.1
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOliver Neukum <oneukum@suse.com>2018-02-06 11:58:10 +0100
committerOliver Neukum <oneukum@suse.com>2018-02-06 11:58:10 +0100
commit40443518cb77e80037f97d1e43f04e3d13452c4e (patch)
treed585a86e45fad1d45873a91b6cb0889d99398238
parentcefe506d9d1812dd95b50e4535278a81933ab991 (diff)
parent6bdc2077d69d17080f8844c6111f4786fdeca5ba (diff)
Merge remote-tracking branch 'origin/SLE12-SP3' into openSUSE-42.3rpm-4.4.114-42
-rw-r--r--Documentation/bcache.txt281
-rw-r--r--Documentation/speculation.txt143
-rw-r--r--Documentation/x86/pti.txt2
-rw-r--r--Makefile2
-rw-r--r--arch/arm/boot/dts/kirkwood-openblocks_a7.dts10
-rw-r--r--arch/arm64/include/asm/memory.h1
-rw-r--r--arch/arm64/include/asm/page-def.h34
-rw-r--r--arch/arm64/include/asm/page.h12
-rw-r--r--arch/arm64/include/asm/sysreg.h2
-rw-r--r--arch/arm64/kernel/cpu_errata.c10
-rw-r--r--arch/arm64/kernel/cpufeature.c6
-rw-r--r--arch/arm64/mm/context.c5
-rw-r--r--arch/mips/ar7/platform.c2
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/powerpc/kernel/setup_64.c39
-rw-r--r--arch/s390/include/asm/kvm_host.h7
-rw-r--r--arch/s390/include/uapi/asm/kvm.h4
-rw-r--r--arch/s390/kernel/processor.c17
-rw-r--r--arch/s390/kvm/kvm-s390.c17
-rw-r--r--arch/um/Makefile2
-rw-r--r--arch/x86/entry/entry_32.S3
-rw-r--r--arch/x86/entry/entry_64.S3
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c7
-rw-r--r--arch/x86/include/asm/barrier.h29
-rw-r--r--arch/x86/include/asm/cpufeature.h3
-rw-r--r--arch/x86/include/asm/msr.h3
-rw-r--r--arch/x86/include/asm/nospec-branch.h16
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/include/asm/smap.h24
-rw-r--r--arch/x86/include/asm/spec_ctrl.h1
-rw-r--r--arch/x86/include/asm/switch_to.h38
-rw-r--r--arch/x86/include/asm/traps.h1
-rw-r--r--arch/x86/include/asm/uaccess.h15
-rw-r--r--arch/x86/include/asm/uaccess_32.h6
-rw-r--r--arch/x86/include/asm/uaccess_64.h12
-rw-r--r--arch/x86/include/asm/vsyscall.h1
-rw-r--r--arch/x86/kernel/apic/vector.c7
-rw-r--r--arch/x86/kernel/cpu/bugs.c51
-rw-r--r--arch/x86/kernel/cpu/common.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c5
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c21
-rw-r--r--arch/x86/kernel/cpu/spec_ctrl.c16
-rw-r--r--arch/x86/kernel/kprobes/opt.c23
-rw-r--r--arch/x86/kernel/vmlinux.lds.S7
-rw-r--r--arch/x86/kvm/vmx.c19
-rw-r--r--arch/x86/lib/cmdline.c34
-rw-r--r--arch/x86/lib/getuser.S5
-rw-r--r--arch/x86/lib/retpoline.S5
-rw-r--r--arch/x86/lib/usercopy_32.c8
-rw-r--r--arch/x86/mm/kaiser.c2
-rw-r--r--drivers/ata/libata-core.c1
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c57
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.h4
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_ctrl.c8
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_d.h1
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_main.c2
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_puda.c2
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_user.h1
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c6
-rw-r--r--drivers/input/misc/twl4030-vibra.c6
-rw-r--r--drivers/input/misc/twl6040-vibra.c2
-rw-r--r--drivers/input/mouse/trackpoint.c3
-rw-r--r--drivers/input/touchscreen/88pm860x-ts.c16
-rw-r--r--drivers/md/bcache/alloc.c16
-rw-r--r--drivers/md/bcache/bcache.h12
-rw-r--r--drivers/md/bcache/btree.c22
-rw-r--r--drivers/md/bcache/btree.h2
-rw-r--r--drivers/md/bcache/closure.c17
-rw-r--r--drivers/md/bcache/closure.h10
-rw-r--r--drivers/md/bcache/debug.c2
-rw-r--r--drivers/md/bcache/journal.c5
-rw-r--r--drivers/md/bcache/request.c26
-rw-r--r--drivers/md/bcache/super.c47
-rw-r--r--drivers/md/bcache/sysfs.c39
-rw-r--r--drivers/md/bcache/util.c10
-rw-r--r--drivers/md/bcache/util.h4
-rw-r--r--drivers/md/bcache/writeback.c116
-rw-r--r--drivers/md/bcache/writeback.h4
-rw-r--r--drivers/md/dm-thin-metadata.c6
-rw-r--r--drivers/md/persistent-data/dm-btree.c19
-rw-r--r--drivers/net/can/usb/peak_usb/pcan_usb_fd.c21
-rw-r--r--drivers/net/ethernet/emulex/benet/be_main.c9
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c124
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.h2
-rw-r--r--drivers/net/ethernet/intel/e1000e/defines.h1
-rw-r--r--drivers/net/ethernet/intel/e1000e/netdev.c33
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c6
-rw-r--r--drivers/net/ethernet/realtek/r8169.c9
-rw-r--r--drivers/net/ipvlan/ipvlan_core.c4
-rw-r--r--drivers/net/ppp/pppoe.c11
-rw-r--r--drivers/net/usb/lan78xx.c1
-rw-r--r--drivers/net/vmxnet3/vmxnet3_drv.c2
-rw-r--r--drivers/nvme/host/fc.c3
-rw-r--r--drivers/phy/phy-core.c4
-rw-r--r--drivers/rtc/rtc-cmos.c4
-rw-r--r--drivers/scsi/lpfc/lpfc_mem.c2
-rw-r--r--drivers/scsi/sg.c30
-rw-r--r--drivers/usb/class/cdc-acm.c3
-rw-r--r--drivers/usb/usbip/stub_dev.c3
-rw-r--r--drivers/usb/usbip/stub_rx.c46
-rw-r--r--drivers/usb/usbip/usbip_common.c15
-rw-r--r--drivers/usb/usbip/usbip_common.h3
-rw-r--r--drivers/usb/usbip/usbip_event.c5
-rw-r--r--drivers/usb/usbip/vhci_hcd.c90
-rw-r--r--drivers/usb/usbip/vhci_rx.c30
-rw-r--r--drivers/usb/usbip/vhci_sysfs.c44
-rw-r--r--drivers/usb/usbip/vhci_tx.c14
-rw-r--r--fs/ceph/super.c9
-rw-r--r--fs/dlm/debug_fs.c62
-rw-r--r--fs/nfs/write.c2
-rw-r--r--fs/nfsd/auth.c5
-rw-r--r--fs/pipe.c18
-rw-r--r--fs/xfs/xfs_attr_inactive.c2
-rw-r--r--fs/xfs/xfs_log.c14
-rw-r--r--include/linux/ceph/ceph_fs.h2
-rw-r--r--include/linux/ceph/mon_client.h4
-rw-r--r--include/linux/fdtable.h7
-rw-r--r--include/linux/nospec.h65
-rw-r--r--include/linux/tcp.h7
-rw-r--r--include/linux/thread_info.h2
-rw-r--r--include/net/arp.h3
-rw-r--r--include/net/flow_dissector.h2
-rw-r--r--include/net/ipv6.h1
-rw-r--r--include/net/net_namespace.h10
-rw-r--r--include/net/tipc.h62
-rw-r--r--include/scsi/sg.h1
-rw-r--r--include/uapi/linux/eventpoll.h13
-rw-r--r--include/uapi/linux/kvm.h1
-rw-r--r--kernel/futex.c3
-rw-r--r--kernel/gcov/Kconfig1
-rw-r--r--kernel/time/hrtimer.c3
-rw-r--r--kernel/trace/trace_events.c16
-rw-r--r--mm/mprotect.c6
-rw-r--r--net/can/af_can.c22
-rw-r--r--net/ceph/mon_client.c6
-rw-r--r--net/core/dev.c19
-rw-r--r--net/core/flow_dissector.c15
-rw-r--r--net/core/neighbour.c4
-rw-r--r--net/dccp/ccids/ccid2.c3
-rw-r--r--net/ipv4/arp.c7
-rw-r--r--net/ipv4/igmp.c2
-rw-r--r--net/ipv4/tcp.c3
-rw-r--r--net/ipv4/tcp_timer.c15
-rw-r--r--net/ipv6/ip6_output.c8
-rw-r--r--net/ipv6/ipv6_sockglue.c2
-rw-r--r--net/key/af_key.c8
-rw-r--r--net/sctp/socket.c30
-rw-r--r--net/tipc/link.c26
-rw-r--r--net/tipc/msg.h10
-rw-r--r--net/wireless/nl80211.c10
-rw-r--r--scripts/Makefile.build87
-rw-r--r--tools/usb/usbip/libsrc/usbip_common.c9
-rw-r--r--tools/usb/usbip/libsrc/usbip_host_driver.c27
-rw-r--r--tools/usb/usbip/libsrc/vhci_driver.c8
-rw-r--r--tools/usb/usbip/src/usbip.c2
155 files changed, 1952 insertions, 645 deletions
diff --git a/Documentation/bcache.txt b/Documentation/bcache.txt
index 32b6c3189d98..c0ce64d75bbf 100644
--- a/Documentation/bcache.txt
+++ b/Documentation/bcache.txt
@@ -1,14 +1,19 @@
-Say you've got a big slow raid 6, and an X-25E or three. Wouldn't it be
+============================
+A block layer cache (bcache)
+============================
+
+Say you've got a big slow raid 6, and an ssd or three. Wouldn't it be
nice if you could use them as cache... Hence bcache.
Wiki and git repositories are at:
- http://bcache.evilpiepirate.org
- http://evilpiepirate.org/git/linux-bcache.git
- http://evilpiepirate.org/git/bcache-tools.git
+
+ - http://bcache.evilpiepirate.org
+ - http://evilpiepirate.org/git/linux-bcache.git
+ - http://evilpiepirate.org/git/bcache-tools.git
It's designed around the performance characteristics of SSDs - it only allocates
in erase block sized buckets, and it uses a hybrid btree/log to track cached
-extants (which can be anywhere from a single sector to the bucket size). It's
+extents (which can be anywhere from a single sector to the bucket size). It's
designed to avoid random writes at all costs; it fills up an erase block
sequentially, then issues a discard before reusing it.
@@ -37,17 +42,19 @@ to be flushed.
Getting started:
You'll need make-bcache from the bcache-tools repository. Both the cache device
-and backing device must be formatted before use.
+and backing device must be formatted before use::
+
make-bcache -B /dev/sdb
make-bcache -C /dev/sdc
make-bcache has the ability to format multiple devices at the same time - if
you format your backing devices and cache device at the same time, you won't
-have to manually attach:
+have to manually attach::
+
make-bcache -B /dev/sda /dev/sdb -C /dev/sdc
bcache-tools now ships udev rules, and bcache devices are known to the kernel
-immediately. Without udev, you can manually register devices like this:
+immediately. Without udev, you can manually register devices like this::
echo /dev/sdb > /sys/fs/bcache/register
echo /dev/sdc > /sys/fs/bcache/register
@@ -55,34 +62,39 @@ immediately. Without udev, you can manually register devices like this:
Registering the backing device makes the bcache device show up in /dev; you can
now format it and use it as normal. But the first time using a new bcache
device, it'll be running in passthrough mode until you attach it to a cache.
-See the section on attaching.
+If you are thinking about using bcache later, it is recommended to setup all your
+slow devices as bcache backing devices without a cache, and you can choose to add
+a caching device later.
+See 'ATTACHING' section below.
-The devices show up as:
+The devices show up as::
/dev/bcache<N>
-As well as (with udev):
+As well as (with udev)::
/dev/bcache/by-uuid/<uuid>
/dev/bcache/by-label/<label>
-To get started:
+To get started::
mkfs.ext4 /dev/bcache0
mount /dev/bcache0 /mnt
You can control bcache devices through sysfs at /sys/block/bcache<N>/bcache .
+You can also control them through /sys/fs//bcache/<cset-uuid>/ .
Cache devices are managed as sets; multiple caches per set isn't supported yet
but will allow for mirroring of metadata and dirty data in the future. Your new
cache set shows up as /sys/fs/bcache/<UUID>
-ATTACHING:
+Attaching
+---------
After your cache device and backing device are registered, the backing device
must be attached to your cache set to enable caching. Attaching a backing
device to a cache set is done thusly, with the UUID of the cache set in
-/sys/fs/bcache:
+/sys/fs/bcache::
echo <CSET-UUID> > /sys/block/bcache0/bcache/attach
@@ -92,7 +104,7 @@ your bcache devices. If a backing device has data in a cache somewhere, the
important if you have writeback caching turned on.
If you're booting up and your cache device is gone and never coming back, you
-can force run the backing device:
+can force run the backing device::
echo 1 > /sys/block/sdb/bcache/running
@@ -105,7 +117,8 @@ but all the cached data will be invalidated. If there was dirty data in the
cache, don't expect the filesystem to be recoverable - you will have massive
filesystem corruption, though ext4's fsck does work miracles.
-ERROR HANDLING:
+Error Handling
+--------------
Bcache tries to transparently handle IO errors to/from the cache device without
affecting normal operation; if it sees too many errors (the threshold is
@@ -127,20 +140,206 @@ the backing devices to passthrough mode.
writeback mode). It currently doesn't do anything intelligent if it fails to
read some of the dirty data, though.
-TROUBLESHOOTING PERFORMANCE:
+
+Howto/cookbook
+--------------
+
+A) Starting a bcache with a missing caching device
+
+If registering the backing device doesn't help, it's already there, you just need
+to force it to run without the cache::
+
+ host:~# echo /dev/sdb1 > /sys/fs/bcache/register
+ [ 119.844831] bcache: register_bcache() error opening /dev/sdb1: device already registered
+
+Next, you try to register your caching device if it's present. However
+if it's absent, or registration fails for some reason, you can still
+start your bcache without its cache, like so::
+
+ host:/sys/block/sdb/sdb1/bcache# echo 1 > running
+
+Note that this may cause data loss if you were running in writeback mode.
+
+
+B) Bcache does not find its cache::
+
+ host:/sys/block/md5/bcache# echo 0226553a-37cf-41d5-b3ce-8b1e944543a8 > attach
+ [ 1933.455082] bcache: bch_cached_dev_attach() Couldn't find uuid for md5 in set
+ [ 1933.478179] bcache: __cached_dev_store() Can't attach 0226553a-37cf-41d5-b3ce-8b1e944543a8
+ [ 1933.478179] : cache set not found
+
+In this case, the caching device was simply not registered at boot
+or disappeared and came back, and needs to be (re-)registered::
+
+ host:/sys/block/md5/bcache# echo /dev/sdh2 > /sys/fs/bcache/register
+
+
+C) Corrupt bcache crashes the kernel at device registration time:
+
+This should never happen. If it does happen, then you have found a bug!
+Please report it to the bcache development list: linux-bcache@vger.kernel.org
+
+Be sure to provide as much information that you can including kernel dmesg
+output if available so that we may assist.
+
+
+D) Recovering data without bcache:
+
+If bcache is not available in the kernel, a filesystem on the backing
+device is still available at an 8KiB offset. So either via a loopdev
+of the backing device created with --offset 8K, or any value defined by
+--data-offset when you originally formatted bcache with `make-bcache`.
+
+For example::
+
+ losetup -o 8192 /dev/loop0 /dev/your_bcache_backing_dev
+
+This should present your unmodified backing device data in /dev/loop0
+
+If your cache is in writethrough mode, then you can safely discard the
+cache device without loosing data.
+
+
+E) Wiping a cache device
+
+::
+
+ host:~# wipefs -a /dev/sdh2
+ 16 bytes were erased at offset 0x1018 (bcache)
+ they were: c6 85 73 f6 4e 1a 45 ca 82 65 f5 7f 48 ba 6d 81
+
+After you boot back with bcache enabled, you recreate the cache and attach it::
+
+ host:~# make-bcache -C /dev/sdh2
+ UUID: 7be7e175-8f4c-4f99-94b2-9c904d227045
+ Set UUID: 5bc072a8-ab17-446d-9744-e247949913c1
+ version: 0
+ nbuckets: 106874
+ block_size: 1
+ bucket_size: 1024
+ nr_in_set: 1
+ nr_this_dev: 0
+ first_bucket: 1
+ [ 650.511912] bcache: run_cache_set() invalidating existing data
+ [ 650.549228] bcache: register_cache() registered cache device sdh2
+
+start backing device with missing cache::
+
+ host:/sys/block/md5/bcache# echo 1 > running
+
+attach new cache::
+
+ host:/sys/block/md5/bcache# echo 5bc072a8-ab17-446d-9744-e247949913c1 > attach
+ [ 865.276616] bcache: bch_cached_dev_attach() Caching md5 as bcache0 on set 5bc072a8-ab17-446d-9744-e247949913c1
+
+
+F) Remove or replace a caching device::
+
+ host:/sys/block/sda/sda7/bcache# echo 1 > detach
+ [ 695.872542] bcache: cached_dev_detach_finish() Caching disabled for sda7
+
+ host:~# wipefs -a /dev/nvme0n1p4
+ wipefs: error: /dev/nvme0n1p4: probing initialization failed: Device or resource busy
+ Ooops, it's disabled, but not unregistered, so it's still protected
+
+We need to go and unregister it::
+
+ host:/sys/fs/bcache/b7ba27a1-2398-4649-8ae3-0959f57ba128# ls -l cache0
+ lrwxrwxrwx 1 root root 0 Feb 25 18:33 cache0 -> ../../../devices/pci0000:00/0000:00:1d.0/0000:70:00.0/nvme/nvme0/nvme0n1/nvme0n1p4/bcache/
+ host:/sys/fs/bcache/b7ba27a1-2398-4649-8ae3-0959f57ba128# echo 1 > stop
+ kernel: [ 917.041908] bcache: cache_set_free() Cache set b7ba27a1-2398-4649-8ae3-0959f57ba128 unregistered
+
+Now we can wipe it::
+
+ host:~# wipefs -a /dev/nvme0n1p4
+ /dev/nvme0n1p4: 16 bytes were erased at offset 0x00001018 (bcache): c6 85 73 f6 4e 1a 45 ca 82 65 f5 7f 48 ba 6d 81
+
+
+G) dm-crypt and bcache
+
+First setup bcache unencrypted and then install dmcrypt on top of
+/dev/bcache<N> This will work faster than if you dmcrypt both the backing
+and caching devices and then install bcache on top. [benchmarks?]
+
+
+H) Stop/free a registered bcache to wipe and/or recreate it
+
+Suppose that you need to free up all bcache references so that you can
+fdisk run and re-register a changed partition table, which won't work
+if there are any active backing or caching devices left on it:
+
+1) Is it present in /dev/bcache* ? (there are times where it won't be)
+
+ If so, it's easy::
+
+ host:/sys/block/bcache0/bcache# echo 1 > stop
+
+2) But if your backing device is gone, this won't work::
+
+ host:/sys/block/bcache0# cd bcache
+ bash: cd: bcache: No such file or directory
+
+ In this case, you may have to unregister the dmcrypt block device that
+ references this bcache to free it up::
+
+ host:~# dmsetup remove oldds1
+ bcache: bcache_device_free() bcache0 stopped
+ bcache: cache_set_free() Cache set 5bc072a8-ab17-446d-9744-e247949913c1 unregistered
+
+ This causes the backing bcache to be removed from /sys/fs/bcache and
+ then it can be reused. This would be true of any block device stacking
+ where bcache is a lower device.
+
+3) In other cases, you can also look in /sys/fs/bcache/::
+
+ host:/sys/fs/bcache# ls -l */{cache?,bdev?}
+ lrwxrwxrwx 1 root root 0 Mar 5 09:39 0226553a-37cf-41d5-b3ce-8b1e944543a8/bdev1 -> ../../../devices/virtual/block/dm-1/bcache/
+ lrwxrwxrwx 1 root root 0 Mar 5 09:39 0226553a-37cf-41d5-b3ce-8b1e944543a8/cache0 -> ../../../devices/virtual/block/dm-4/bcache/
+ lrwxrwxrwx 1 root root 0 Mar 5 09:39 5bc072a8-ab17-446d-9744-e247949913c1/cache0 -> ../../../devices/pci0000:00/0000:00:01.0/0000:01:00.0/ata10/host9/target9:0:0/9:0:0:0/block/sdl/sdl2/bcache/
+
+ The device names will show which UUID is relevant, cd in that directory
+ and stop the cache::
+
+ host:/sys/fs/bcache/5bc072a8-ab17-446d-9744-e247949913c1# echo 1 > stop
+
+ This will free up bcache references and let you reuse the partition for
+ other purposes.
+
+
+
+Troubleshooting performance
+---------------------------
Bcache has a bunch of config options and tunables. The defaults are intended to
be reasonable for typical desktop and server workloads, but they're not what you
want for getting the best possible numbers when benchmarking.
+ - Backing device alignment
+
+ The default metadata size in bcache is 8k. If your backing device is
+ RAID based, then be sure to align this by a multiple of your stride
+ width using `make-bcache --data-offset`. If you intend to expand your
+ disk array in the future, then multiply a series of primes by your
+ raid stripe size to get the disk multiples that you would like.
+
+ For example: If you have a 64k stripe size, then the following offset
+ would provide alignment for many common RAID5 data spindle counts::
+
+ 64k * 2*2*2*3*3*5*7 bytes = 161280k
+
+ That space is wasted, but for only 157.5MB you can grow your RAID 5
+ volume to the following data-spindle counts without re-aligning::
+
+ 3,4,5,6,7,8,9,10,12,14,15,18,20,21 ...
+
- Bad write performance
If write performance is not what you expected, you probably wanted to be
running in writeback mode, which isn't the default (not due to a lack of
maturity, but simply because in writeback mode you'll lose data if something
- happens to your SSD)
+ happens to your SSD)::
- # echo writeback > /sys/block/bcache0/cache_mode
+ # echo writeback > /sys/block/bcache0/bcache/cache_mode
- Bad performance, or traffic not going to the SSD that you'd expect
@@ -150,13 +349,13 @@ want for getting the best possible numbers when benchmarking.
accessed data out of your cache.
But if you want to benchmark reads from cache, and you start out with fio
- writing an 8 gigabyte test file - so you want to disable that.
+ writing an 8 gigabyte test file - so you want to disable that::
- # echo 0 > /sys/block/bcache0/bcache/sequential_cutoff
+ # echo 0 > /sys/block/bcache0/bcache/sequential_cutoff
- To set it back to the default (4 mb), do
+ To set it back to the default (4 mb), do::
- # echo 4M > /sys/block/bcache0/bcache/sequential_cutoff
+ # echo 4M > /sys/block/bcache0/bcache/sequential_cutoff
- Traffic's still going to the spindle/still getting cache misses
@@ -169,10 +368,10 @@ want for getting the best possible numbers when benchmarking.
throttles traffic if the latency exceeds a threshold (it does this by
cranking down the sequential bypass).
- You can disable this if you need to by setting the thresholds to 0:
+ You can disable this if you need to by setting the thresholds to 0::
- # echo 0 > /sys/fs/bcache/<cache set>/congested_read_threshold_us
- # echo 0 > /sys/fs/bcache/<cache set>/congested_write_threshold_us
+ # echo 0 > /sys/fs/bcache/<cache set>/congested_read_threshold_us
+ # echo 0 > /sys/fs/bcache/<cache set>/congested_write_threshold_us
The default is 2000 us (2 milliseconds) for reads, and 20000 for writes.
@@ -193,7 +392,9 @@ want for getting the best possible numbers when benchmarking.
Solution: warm the cache by doing writes, or use the testing branch (there's
a fix for the issue there).
-SYSFS - BACKING DEVICE:
+
+Sysfs - backing device
+----------------------
Available at /sys/block/<bdev>/bcache, /sys/block/bcache*/bcache and
(if attached) /sys/fs/bcache/<cset-uuid>/bdev*
@@ -238,7 +439,7 @@ sequential_merge
against all new requests to determine which new requests are sequential
continuations of previous requests for the purpose of determining sequential
cutoff. This is necessary if the sequential cutoff value is greater than the
- maximum acceptable sequential size for any single request.
+ maximum acceptable sequential size for any single request.
state
The backing device can be in one of four different states:
@@ -277,7 +478,8 @@ writeback_running
still be added to the cache until it is mostly full; only meant for
benchmarking. Defaults to on.
-SYSFS - BACKING DEVICE STATS:
+Sysfs - backing device stats
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
There are directories with these numbers for a running total, as well as
versions that decay over the past day, hour and 5 minutes; they're also
@@ -286,14 +488,11 @@ aggregated in the cache set directory as well.
bypassed
Amount of IO (both reads and writes) that has bypassed the cache
-cache_hits
-cache_misses
-cache_hit_ratio
+cache_hits, cache_misses, cache_hit_ratio
Hits and misses are counted per individual IO as bcache sees them; a
partial hit is counted as a miss.
-cache_bypass_hits
-cache_bypass_misses
+cache_bypass_hits, cache_bypass_misses
Hits and misses for IO that is intended to skip the cache are still counted,
but broken out here.
@@ -305,7 +504,8 @@ cache_miss_collisions
cache_readaheads
Count of times readahead occurred.
-SYSFS - CACHE SET:
+Sysfs - cache set
+~~~~~~~~~~~~~~~~~
Available at /sys/fs/bcache/<cset-uuid>
@@ -325,7 +525,7 @@ bucket_size
Size of buckets
cache<0..n>
- Symlink to each of the cache devices comprising this cache set.
+ Symlink to each of the cache devices comprising this cache set.
cache_available_percent
Percentage of cache device which doesn't contain dirty data, and could
@@ -343,8 +543,7 @@ flash_vol_create
Echoing a size to this file (in human readable units, k/M/G) creates a thinly
provisioned volume backed by the cache set.
-io_error_halflife
-io_error_limit
+io_error_halflife, io_error_limit
These determines how many errors we accept before disabling the cache.
Each error is decayed by the half life (in # ios). If the decaying count
reaches io_error_limit dirty data is written out and the cache is disabled.
@@ -368,7 +567,8 @@ unregister
Detaches all backing devices and closes the cache devices; if dirty data is
present it will disable writeback caching and wait for it to be flushed.
-SYSFS - CACHE SET INTERNAL:
+Sysfs - cache set internal
+~~~~~~~~~~~~~~~~~~~~~~~~~~
This directory also exposes timings for a number of internal operations, with
separate files for average duration, average frequency, last occurrence and max
@@ -397,7 +597,8 @@ cache_read_races
trigger_gc
Writing to this file forces garbage collection to run.
-SYSFS - CACHE DEVICE:
+Sysfs - Cache device
+~~~~~~~~~~~~~~~~~~~~
Available at /sys/block/<cdev>/bcache
diff --git a/Documentation/speculation.txt b/Documentation/speculation.txt
new file mode 100644
index 000000000000..a47fbffe0dab
--- /dev/null
+++ b/Documentation/speculation.txt
@@ -0,0 +1,143 @@
+This document explains potential effects of speculation, and how undesirable
+effects can be mitigated portably using common APIs.
+
+===========
+Speculation
+===========
+
+To improve performance and minimize average latencies, many contemporary CPUs
+employ speculative execution techniques such as branch prediction, performing
+work which may be discarded at a later stage.
+
+Typically speculative execution cannot be observed from architectural state,
+such as the contents of registers. However, in some cases it is possible to
+observe its impact on microarchitectural state, such as the presence or
+absence of data in caches. Such state may form side-channels which can be
+observed to extract secret information.
+
+For example, in the presence of branch prediction, it is possible for bounds
+checks to be ignored by code which is speculatively executed. Consider the
+following code:
+
+ int load_array(int *array, unsigned int idx)
+ {
+ if (idx >= MAX_ARRAY_ELEMS)
+ return 0;
+ else
+ return array[idx];
+ }
+
+Which, on arm64, may be compiled to an assembly sequence such as:
+
+ CMP <idx>, #MAX_ARRAY_ELEMS
+ B.LT less
+ MOV <returnval>, #0
+ RET
+ less:
+ LDR <returnval>, [<array>, <idx>]
+ RET
+
+It is possible that a CPU mis-predicts the conditional branch, and
+speculatively loads array[idx], even if idx >= MAX_ARRAY_ELEMS. This value
+will subsequently be discarded, but the speculated load may affect
+microarchitectural state which can be subsequently measured.
+
+More complex sequences involving multiple dependent memory accesses may result
+in sensitive information being leaked. Consider the following code, building
+on the prior example:
+
+ int load_dependent_arrays(int *arr1, int *arr2, int idx)
+ {
+ int val1, val2,
+
+ val1 = load_array(arr1, idx);
+ val2 = load_array(arr2, val1);
+
+ return val2;
+ }
+
+Under speculation, the first call to load_array() may return the value of an
+out-of-bounds address, while the second call will influence microarchitectural
+state dependent on this value. This may provide an arbitrary read primitive.
+
+====================================
+Mitigating speculation side-channels
+====================================
+
+The kernel provides a generic API to ensure that bounds checks are respected
+even under speculation. Architectures which are affected by speculation-based
+side-channels are expected to implement these primitives.
+
+The array_ptr() helper in <asm/barrier.h> can be used to prevent
+information from being leaked via side-channels.
+
+A call to array_ptr(arr, idx, sz) returns a sanitized pointer to
+arr[idx] only if idx falls in the [0, sz) interval. When idx < 0 or idx > sz,
+NULL is returned. Additionally, array_ptr() of an out-of-bounds pointer is
+not propagated to code which is speculatively executed.
+
+This can be used to protect the earlier load_array() example:
+
+ int load_array(int *array, unsigned int idx)
+ {
+ int *elem;
+
+ elem = array_ptr(array, idx, MAX_ARRAY_ELEMS);
+ if (elem)
+ return *elem;
+ else
+ return 0;
+ }
+
+This can also be used in situations where multiple fields on a structure are
+accessed:
+
+ struct foo array[SIZE];
+ int a, b;
+
+ void do_thing(int idx)
+ {
+ struct foo *elem;
+
+ elem = array_ptr(array, idx, SIZE);
+ if (elem) {
+ a = elem->field_a;
+ b = elem->field_b;
+ }
+ }
+
+It is imperative that the returned pointer is used. Pointers which are
+generated separately are subject to a number of potential CPU and compiler
+optimizations, and may still be used speculatively. For example, this means
+that the following sequence is unsafe:
+
+ struct foo array[SIZE];
+ int a, b;
+
+ void do_thing(int idx)
+ {
+ if (array_ptr(array, idx, SIZE) != NULL) {
+ // unsafe as wrong pointer is used
+ a = array[idx].field_a;
+ b = array[idx].field_b;
+ }
+ }
+
+Similarly, it is unsafe to compare the returned pointer with other pointers,
+as this may permit the compiler to substitute one pointer with another,
+permitting speculation. For example, the following sequence is unsafe:
+
+ struct foo array[SIZE];
+ int a, b;
+
+ void do_thing(int idx)
+ {
+ struct foo *elem = array_ptr(array, idx, size);
+
+ // unsafe due to pointer substitution
+ if (elem == &array[idx]) {
+ a = elem->field_a;
+ b = elem->field_b;
+ }
+ }
+
diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.txt
index d11eff61fc9a..5cd58439ad2d 100644
--- a/Documentation/x86/pti.txt
+++ b/Documentation/x86/pti.txt
@@ -78,7 +78,7 @@ this protection comes at a cost:
non-PTI SYSCALL entry code, so requires mapping fewer
things into the userspace page tables. The downside is
that stacks must be switched at entry time.
- d. Global pages are disabled for all kernel structures not
+ c. Global pages are disabled for all kernel structures not
mapped into both kernel and userspace page tables. This
feature of the MMU allows different processes to share TLB
entries mapping the kernel. Losing the feature means more
diff --git a/Makefile b/Makefile
index a2c0b32d35f3..5b01200c951e 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
VERSION = 4
PATCHLEVEL = 4
-SUBLEVEL = 112
+SUBLEVEL = 114
EXTRAVERSION =
NAME = Blurry Fish Butt
diff --git a/arch/arm/boot/dts/kirkwood-openblocks_a7.dts b/arch/arm/boot/dts/kirkwood-openblocks_a7.dts
index d5e3bc518968..d57f48543f76 100644
--- a/arch/arm/boot/dts/kirkwood-openblocks_a7.dts
+++ b/arch/arm/boot/dts/kirkwood-openblocks_a7.dts
@@ -53,7 +53,8 @@
};
pinctrl: pin-controller@10000 {
- pinctrl-0 = <&pmx_dip_switches &pmx_gpio_header>;
+ pinctrl-0 = <&pmx_dip_switches &pmx_gpio_header
+ &pmx_gpio_header_gpo>;
pinctrl-names = "default";
pmx_uart0: pmx-uart0 {
@@ -85,11 +86,16 @@
* ground.
*/
pmx_gpio_header: pmx-gpio-header {
- marvell,pins = "mpp17", "mpp7", "mpp29", "mpp28",
+ marvell,pins = "mpp17", "mpp29", "mpp28",
"mpp35", "mpp34", "mpp40";
marvell,function = "gpio";
};
+ pmx_gpio_header_gpo: pxm-gpio-header-gpo {
+ marvell,pins = "mpp7";
+ marvell,function = "gpo";
+ };
+
pmx_gpio_init: pmx-init {
marvell,pins = "mpp38";
marvell,function = "gpio";
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 0d1c67f51d01..9b2126bd45d9 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -24,6 +24,7 @@
#include <linux/compiler.h>
#include <linux/const.h>
#include <linux/types.h>
+#include <asm/page-def.h>
#include <asm/bug.h>
#include <asm/sizes.h>
diff --git a/arch/arm64/include/asm/page-def.h b/arch/arm64/include/asm/page-def.h
new file mode 100644
index 000000000000..01591a29dc2e
--- /dev/null
+++ b/arch/arm64/include/asm/page-def.h
@@ -0,0 +1,34 @@
+/*
+ * Based on arch/arm/include/asm/page.h
+ *
+ * Copyright (C) 1995-2003 Russell King
+ * Copyright (C) 2017 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_PAGE_DEF_H
+#define __ASM_PAGE_DEF_H
+
+#include <linux/const.h>
+
+/* PAGE_SHIFT determines the page size */
+/* CONT_SHIFT determines the number of pages which can be tracked together */
+#define PAGE_SHIFT CONFIG_ARM64_PAGE_SHIFT
+#define CONT_SHIFT CONFIG_ARM64_CONT_SHIFT
+#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
+#define PAGE_MASK (~(PAGE_SIZE-1))
+
+#define CONT_SIZE (_AC(1, UL) << (CONT_SHIFT + PAGE_SHIFT))
+#define CONT_MASK (~(CONT_SIZE-1))
+
+#endif /* __ASM_PAGE_DEF_H */
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 8472c6def5ef..60d02c81a3a2 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -19,17 +19,7 @@
#ifndef __ASM_PAGE_H
#define __ASM_PAGE_H
-#include <linux/const.h>
-
-/* PAGE_SHIFT determines the page size */
-/* CONT_SHIFT determines the number of pages which can be tracked together */
-#define PAGE_SHIFT CONFIG_ARM64_PAGE_SHIFT
-#define CONT_SHIFT CONFIG_ARM64_CONT_SHIFT
-#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
-#define PAGE_MASK (~(PAGE_SIZE-1))
-
-#define CONT_SIZE (_AC(1, UL) << (CONT_SHIFT + PAGE_SHIFT))
-#define CONT_MASK (~(CONT_SIZE-1))
+#include <asm/page-def.h>
#ifndef __ASSEMBLY__
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 2669072456c9..fd4ffa9ea628 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -476,7 +476,7 @@ asm(
* the "%x0" template means XZR.
*/
#define write_sysreg(v, r) do { \
- u64 __val = (u64)v; \
+ u64 __val = (u64)(v); \
asm volatile("msr " __stringify(r) ", %x0" \
: : "rZ" (__val)); \
} while (0)
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 8dcc88376728..57e1a1e6a2e6 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -323,6 +323,16 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
.capability = ARM64_HARDEN_BP_POST_GUEST_EXIT,
MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1),
},
+ {
+ .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
+ MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
+ .enable = enable_psci_bp_hardening,
+ },
+ {
+ .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
+ MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2),
+ .enable = enable_psci_bp_hardening,
+ },
#endif
{
}
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 05c16896bda5..cdf7e848f0a0 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -755,9 +755,9 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
/* Don't force KPTI for CPUs that are not vulnerable */
switch (read_cpuid_id() & MIDR_CPU_MODEL_MASK) {
- case MIDR_CAVIUM_THUNDERX2:
- case MIDR_BRCM_VULCAN:
- return false;
+ case MIDR_CAVIUM_THUNDERX2:
+ case MIDR_BRCM_VULCAN:
+ return false;
}
/* Defer to CPU feature registers */
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 60124792bc2e..b0a211dea54d 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -231,6 +231,9 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
switch_mm_fastpath:
+
+ arm64_apply_bp_hardening();
+
/*
* Defer TTBR0_EL1 setting for user threads to uaccess_enable() when
* emulating PAN.
@@ -246,8 +249,6 @@ asmlinkage void post_ttbr_update_workaround(void)
"ic iallu; dsb nsh; isb",
ARM64_WORKAROUND_CAVIUM_27456,
CONFIG_CAVIUM_ERRATUM_27456));
-
- arm64_apply_bp_hardening();
}
static int asids_init(void)
diff --git a/arch/mips/ar7/platform.c b/arch/mips/ar7/platform.c
index 3446b6fb3acb..9da4e2292fc7 100644
--- a/arch/mips/ar7/platform.c
+++ b/arch/mips/ar7/platform.c
@@ -576,7 +576,7 @@ static int __init ar7_register_uarts(void)
uart_port.type = PORT_AR7;
uart_port.uartclk = clk_get_rate(bus_clk) / 2;
uart_port.iotype = UPIO_MEM32;
- uart_port.flags = UPF_FIXED_TYPE;
+ uart_port.flags = UPF_FIXED_TYPE | UPF_BOOT_AUTOCONF;
uart_port.regshift = 2;
uart_port.line = 0;
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index eee2a940df6e..2503688b9038 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -140,6 +140,7 @@ config PPC
select GENERIC_TIME_VSYSCALL_OLD
select GENERIC_CLOCKEVENTS
select GENERIC_CLOCKEVENTS_BROADCAST if SMP
+ select GENERIC_CPU_VULNERABILITIES if PPC_BOOK3S_64
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 8ba8cb2449a9..c338c462c0eb 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -894,11 +894,24 @@ void rfi_flush_enable(bool enable)
rfi_flush = enable;
}
-static void init_fallback_flush(void)
+static bool init_fallback_flush(void)
{
u64 l1d_size, limit;
int cpu;
+ if (l1d_flush_fallback_area)
+ return true;
+
+ /*
+ * Once the slab allocator is up it's too late to allocate the fallback
+ * flush area, so return an error. This should not really happen
+ * because we call the init_fallback_flush() early. Doing the
+ * allocation later might fail due to memory fragmentation so we want
+ * to avoid that.
+ */
+ if (slab_is_available())
+ return false;
+
l1d_size = ppc64_caches.dsize;
limit = min(safe_stack_limit(), ppc64_rma_size);
@@ -926,14 +939,26 @@ static void init_fallback_flush(void)
paca_aux->l1d_flush_congruence = c;
paca_aux->l1d_flush_sets = c / 128;
}
+
+ return true;
}
void setup_rfi_flush(enum l1d_flush_type types, bool enable)
{
- init_fallback_flush();
+ /*
+ * Allocate the fallback area early during boot even if the detected
+ * flush type doe not need it - it might change due to migration.
+ */
+ if (!no_rfi_flush)
+ init_fallback_flush();
if (types & L1D_FLUSH_FALLBACK) {
- pr_info("rfi-flush: Using fallback displacement flush\n");
+ if (init_fallback_flush())
+ pr_info("rfi-flush: Using fallback displacement flush\n");
+ else {
+ pr_crit("rfi-flush: Error unable to use fallback displacement flush!\n");
+ types &= ~L1D_FLUSH_FALLBACK;
+ }
}
if (types & L1D_FLUSH_ORI)
@@ -976,4 +1001,12 @@ static __init int rfi_flush_debugfs_init(void)
}
device_initcall(rfi_flush_debugfs_init);
#endif
+
+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ if (rfi_flush)
+ return sprintf(buf, "Mitigation: RFI Flush\n");
+
+ return sprintf(buf, "Vulnerable\n");
+}
#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index bbdc539fb3c6..64a7c12e16a1 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -136,7 +136,12 @@ struct kvm_s390_sie_block {
__u16 ipa; /* 0x0056 */
__u32 ipb; /* 0x0058 */
__u32 scaoh; /* 0x005c */
- __u8 reserved60; /* 0x0060 */
+#ifdef __GENKSYMS__
+ __u8 reserved60; /* 0x0060 */
+#else
+#define FPF_BPBC 0x20
+ __u8 fpf; /* 0x0060 */
+#endif
__u8 ecb; /* 0x0061 */
__u8 ecb2; /* 0x0062 */
#define ECB3_AES 0x04
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index ef1a5fcc6c66..cff397553996 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -151,6 +151,7 @@ struct kvm_guest_debug_arch {
#define KVM_SYNC_ARCH0 (1UL << 4)
#define KVM_SYNC_PFAULT (1UL << 5)
#define KVM_SYNC_VRS (1UL << 6)
+#define KVM_SYNC_BPBC (1UL << 10)
/* definition of registers in kvm_run */
struct kvm_sync_regs {
__u64 prefix; /* prefix register */
@@ -168,6 +169,9 @@ struct kvm_sync_regs {
__u64 vrs[32][2]; /* vector registers */
__u8 reserved[512]; /* for future vector expansion */
__u32 fpc; /* only valid with vector registers */
+#ifndef __GENKSYMS__
+ __u8 bpbc : 1; /* bp mode */
+#endif
};
#define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 32ce3ac205e1..6610816449f7 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -11,8 +11,10 @@
#include <linux/seq_file.h>
#include <linux/delay.h>
#include <linux/cpu.h>
+#include <linux/bitops.h>
#include <asm/diag.h>
#include <asm/elf.h>
+#include <asm/facility.h>
#include <asm/lowcore.h>
#include <asm/param.h>
#include <asm/smp.h>
@@ -52,6 +54,20 @@ int cpu_have_feature(unsigned int num)
}
EXPORT_SYMBOL(cpu_have_feature);
+static void show_facilities(struct seq_file *m)
+{
+ unsigned int bit;
+ long *facilities;
+
+ facilities = (long *)&S390_lowcore.stfle_fac_list;
+ seq_puts(m, "facilities :");
+ for (bit = find_first_bit_inv(facilities, MAX_FACILITY_BIT);
+ bit < MAX_FACILITY_BIT;
+ bit = find_next_bit_inv(facilities, MAX_FACILITY_BIT, bit + 1))
+ seq_printf(m, " %d", bit);
+ seq_putc(m, '\n');
+}
+
/*
* show_cpuinfo - Get information on one CPU for use by procfs.
*/
@@ -83,6 +99,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
if (int_hwcap_str[i] && (int_hwcap & (1UL << i)))
seq_printf(m, "%s ", int_hwcap_str[i]);
seq_puts(m, "\n");
+ show_facilities(m);
show_cacheinfo(m);
}
get_online_cpus();
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index d94310ad5bbf..e363677b52fc 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -118,8 +118,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
/* upper facilities limit for kvm */
unsigned long kvm_s390_fac_list_mask[] = {
- 0xffe6fffbfcfdfc40UL,
- 0x005e800000000000UL,
+ 0xffe6ffffffffffffUL,
+ 0x005effffffffffffUL,
};
unsigned long kvm_s390_fac_list_mask_size(void)
@@ -257,6 +257,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_VECTOR_REGISTERS:
r = MACHINE_HAS_VX;
break;
+ case KVM_CAP_S390_BPB:
+ r = test_facility(82);
+ break;
default:
r = 0;
}
@@ -1273,7 +1276,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
KVM_SYNC_ARCH0 |
KVM_SYNC_PFAULT;
if (test_kvm_facility(vcpu->kvm, 129))
+
vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
+ if (test_kvm_facility(vcpu->kvm, 82))
+ vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
if (kvm_is_ucontrol(vcpu->kvm))
return __kvm_ucontrol_vcpu_init(vcpu);
@@ -1337,6 +1343,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
current->thread.fpu.fpc = 0;
vcpu->arch.sie_block->gbea = 1;
vcpu->arch.sie_block->pp = 0;
+ vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
kvm_clear_async_pf_completion_queue(vcpu);
if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
@@ -2155,6 +2162,11 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
kvm_clear_async_pf_completion_queue(vcpu);
}
+ if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
+ test_kvm_facility(vcpu->kvm, 82)) {
+ vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
+ vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
+ }
kvm_run->kvm_dirty_regs = 0;
}
@@ -2172,6 +2184,7 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_run->s.regs.pft = vcpu->arch.pfault_token;
kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
+ kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
}
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
diff --git a/arch/um/Makefile b/arch/um/Makefile
index e3abe6f3156d..9ccf462131c4 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -117,7 +117,7 @@ archheaders:
archprepare: include/generated/user_constants.h
LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static
-LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib
+LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib $(call cc-option, -no-pie)
CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \
$(call cc-option, -fno-stack-protector,) \
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 7d1585642660..da022cda09ba 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -260,7 +260,8 @@ ENTRY(ret_from_kernel_thread)
pushl $0x0202 # Reset kernel eflags
popfl
movl PT_EBP(%esp), %eax
- CALL_NOSPEC PT_EBX(%esp)
+ movl PT_EBX(%esp), %edx
+ CALL_NOSPEC %edx
movl $0, PT_EAX(%esp)
/*
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index d5fbef331fd4..51dea0f27728 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -188,6 +188,7 @@ entry_SYSCALL_64_fastpath:
cmpl $__NR_syscall_max, %eax
#endif
ja 1f /* return -ENOSYS (already in pt_regs->ax) */
+ MASK_NOSPEC %r11 %rax /* sanitize syscall_nr wrt speculation */
movq %r10, %rcx
#ifdef CONFIG_RETPOLINE
movq sys_call_table(, %rax, 8), %rax
@@ -1076,7 +1077,7 @@ idtentry async_page_fault do_async_page_fault has_error_code=1
#endif
#ifdef CONFIG_X86_MCE
-idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip)
+idtentry machine_check do_mce has_error_code=0 paranoid=1
#endif
/*
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 112178b401a1..2d359991a273 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -46,6 +46,7 @@ static enum { EMULATE, NATIVE, NONE } vsyscall_mode =
#else
EMULATE;
#endif
+unsigned long vsyscall_pgprot = __PAGE_KERNEL_VSYSCALL;
static int __init vsyscall_setup(char *str)
{
@@ -336,11 +337,11 @@ void __init map_vsyscall(void)
extern char __vsyscall_page;
unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
+ if (vsyscall_mode != NATIVE)
+ vsyscall_pgprot = __PAGE_KERNEL_VVAR;
if (vsyscall_mode != NONE)
__set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
- vsyscall_mode == NATIVE
- ? PAGE_KERNEL_VSYSCALL
- : PAGE_KERNEL_VVAR);
+ __pgprot(vsyscall_pgprot));
BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
(unsigned long)VSYSCALL_ADDR);
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index aae78054cae2..7d165556cd35 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -23,8 +23,33 @@
#define wmb() asm volatile("sfence" ::: "memory")
#endif
-#define gmb() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \
- "lfence", X86_FEATURE_LFENCE_RDTSC);
+/**
+ * array_ptr_mask - generate a mask for array_ptr() that is ~0UL when
+ * the bounds check succeeds and 0 otherwise
+ */
+#define array_ptr_mask array_ptr_mask
+static inline unsigned long array_ptr_mask(unsigned long idx, unsigned long sz)
+{
+ unsigned long mask;
+
+ /*
+ * mask = index - size, if that result is >= 0 then the index is
+ * invalid and the mask is 0 else ~0
+ */
+#ifdef CONFIG_X86_32
+ asm ("cmpl %1,%2; sbbl %0,%0;"
+#else
+ asm ("cmpq %1,%2; sbbq %0,%0;"
+#endif
+ :"=r" (mask)
+ :"r"(sz),"r" (idx)
+ :"cc");
+ return mask;
+}
+
+/* prevent speculative execution past this barrier */
+#define ifence() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \
+ "lfence", X86_FEATURE_LFENCE_RDTSC)
#ifdef CONFIG_X86_PPRO_FENCE
#define dma_rmb() rmb()
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 9c7a212b84db..2d9d31949ada 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -203,7 +203,8 @@
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
-#define X86_FEATURE_SPEC_CTRL ( 7*32+19) /* Control Speculation Control */
+#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */
+#define X86_FEATURE_SPEC_CTRL ( 7*32+20) /* Control Speculation Control */
#define X86_FEATURE_RETPOLINE ( 7*32+29) /* Generic Retpoline mitigation for Spectre variant 2 */
#define X86_FEATURE_RETPOLINE_AMD ( 7*32+30) /* AMD Retpoline mitigation for Spectre variant 2 */
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index cc937a3f3577..6ed1046de1ea 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -147,8 +147,7 @@ static __always_inline unsigned long long rdtsc_ordered(void)
* that some other imaginary CPU is updating continuously with a
* time stamp.
*/
- alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
- "lfence", X86_FEATURE_LFENCE_RDTSC);
+ ifence();
return rdtsc();
}
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 1afd04eb5fb7..492370b9b35b 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -11,7 +11,7 @@
* Fill the CPU return stack buffer.
*
* Each entry in the RSB, if used for a speculative 'ret', contains an
- * infinite 'pause; jmp' loop to capture speculative execution.
+ * infinite 'pause; lfence; jmp' loop to capture speculative execution.
*
* This is required in various cases for retpoline and IBRS-based
* mitigations for the Spectre variant 2 vulnerability. Sometimes to
@@ -38,11 +38,13 @@
call 772f; \
773: /* speculation trap */ \
pause; \
+ lfence; \
jmp 773b; \
772: \
call 774f; \
775: /* speculation trap */ \
pause; \
+ lfence; \
jmp 775b; \
774: \
dec reg; \
@@ -60,6 +62,7 @@
call .Ldo_rop_\@
.Lspec_trap_\@:
pause
+ lfence
jmp .Lspec_trap_\@
.Ldo_rop_\@:
mov \reg, (%_ASM_SP)
@@ -142,6 +145,7 @@
" .align 16\n" \
"901: call 903f;\n" \
"902: pause;\n" \
+ " lfence;\n" \
" jmp 902b;\n" \
" .align 16\n" \
"903: addl $4, %%esp;\n" \
@@ -167,6 +171,9 @@ enum spectre_v2_mitigation {
SPECTRE_V2_IBRS,
};
+extern char __indirect_thunk_start[];
+extern char __indirect_thunk_end[];
+
/*
* On VMEXIT we must ensure that no RSB predictions learned in the guest
* can be followed in the host, by overwriting the RSB completely. Both
@@ -176,15 +183,16 @@ enum spectre_v2_mitigation {
static inline void vmexit_fill_RSB(void)
{
#ifdef CONFIG_RETPOLINE
- unsigned long loops = RSB_CLEAR_LOOPS / 2;
+ unsigned long loops;
asm volatile (ALTERNATIVE("jmp 910f",
__stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
X86_FEATURE_RETPOLINE)
"910:"
- : "=&r" (loops), ASM_CALL_CONSTRAINT
- : "r" (loops) : "memory" );
+ : "=r" (loops), ASM_CALL_CONSTRAINT
+ : : "memory" );
#endif
}
+
#endif /* __ASSEMBLY__ */
#endif /* __NOSPEC_BRANCH_H__ */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index bdecaacd6c6d..d36cab488c67 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -592,7 +592,7 @@ static inline void sync_core(void)
{
int tmp;
-#ifdef CONFIG_M486
+#ifdef CONFIG_X86_32
/*
* Do a CPUID if available, otherwise do a jump. The jump
* can conveniently enough be the jump around CPUID.
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index ba665ebd17bb..d51ecf087c37 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -25,6 +25,30 @@
#include <asm/alternative-asm.h>
+/*
+ * MASK_NOSPEC - sanitize the value of a user controlled value with
+ * respect to speculation
+ *
+ * In the get_user path once we have determined that the pointer is
+ * below the current address limit sanitize its value with respect to
+ * speculation. In the case when the pointer is above the address limit
+ * this directs the cpu to speculate with a NULL ptr rather than
+ * something targeting kernel memory.
+ *
+ * In the syscall entry path it is possible to speculate past the
+ * validation of the system call number. Use MASK_NOSPEC to sanitize the
+ * syscall array index to zero (sys_read) rather than an arbitrary
+ * target.
+ *
+ * assumes CF is set from a previous 'cmp' i.e.:
+ * cmp TASK_addr_limit, %ptr
+ * cmp __NR_syscall_max, %idx
+ */
+.macro MASK_NOSPEC mask val
+ sbb \mask, \mask
+ and \mask, \val
+.endm
+
#ifdef CONFIG_X86_SMAP
#define ASM_CLAC \
diff --git a/arch/x86/include/asm/spec_ctrl.h b/arch/x86/include/asm/spec_ctrl.h
index 2a754955cc79..0c21941523b9 100644
--- a/arch/x86/include/asm/spec_ctrl.h
+++ b/arch/x86/include/asm/spec_ctrl.h
@@ -91,6 +91,7 @@
.endm
#else /* __ASSEMBLY__ */
+extern int ibrs_state;
void x86_enable_ibrs(void);
void x86_disable_ibrs(void);
unsigned int x86_ibrs_enabled(void);
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 2dd10ca04ea0..a9e2ee46e94b 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -1,6 +1,8 @@
#ifndef _ASM_X86_SWITCH_TO_H
#define _ASM_X86_SWITCH_TO_H
+#include <asm/nospec-branch.h>
+
struct task_struct; /* one of the stranger aspects of C forward declarations */
__visible struct task_struct *__switch_to(struct task_struct *prev,
struct task_struct *next);
@@ -24,6 +26,23 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
#define __switch_canary_iparam
#endif /* CC_STACKPROTECTOR */
+#ifdef CONFIG_RETPOLINE
+ /*
+ * When switching from a shallower to a deeper call stack
+ * the RSB may either underflow or use entries populated
+ * with userspace addresses. On CPUs where those concerns
+ * exist, overwrite the RSB with entries which capture
+ * speculative execution to prevent attack.
+ */
+#define __retpoline_fill_return_buffer \
+ ALTERNATIVE("jmp 910f", \
+ __stringify(__FILL_RETURN_BUFFER(%%ebx, RSB_CLEAR_LOOPS, %%esp)),\
+ X86_FEATURE_RSB_CTXSW) \
+ "910:\n\t"
+#else
+#define __retpoline_fill_return_buffer
+#endif
+
/*
* Saving eflags is important. It switches not only IOPL between tasks,
* it also protects other tasks from NT leaking through sysenter etc.
@@ -46,6 +65,7 @@ do { \
"movl $1f,%[prev_ip]\n\t" /* save EIP */ \
"pushl %[next_ip]\n\t" /* restore EIP */ \
__switch_canary \
+ __retpoline_fill_return_buffer \
"jmp __switch_to\n" /* regparm call */ \
"1:\t" \
"popl %%ebp\n\t" /* restore EBP */ \
@@ -100,6 +120,23 @@ do { \
#define __switch_canary_iparam
#endif /* CC_STACKPROTECTOR */
+#ifdef CONFIG_RETPOLINE
+ /*
+ * When switching from a shallower to a deeper call stack
+ * the RSB may either underflow or use entries populated
+ * with userspace addresses. On CPUs where those concerns
+ * exist, overwrite the RSB with entries which capture
+ * speculative execution to prevent attack.
+ */
+#define __retpoline_fill_return_buffer \
+ ALTERNATIVE("jmp 910f", \
+ __stringify(__FILL_RETURN_BUFFER(%%r12, RSB_CLEAR_LOOPS, %%rsp)),\
+ X86_FEATURE_RSB_CTXSW) \
+ "910:\n\t"
+#else
+#define __retpoline_fill_return_buffer
+#endif
+
/* The stack unwind code needs this but it pollutes traces otherwise */
#ifdef CONFIG_UNWIND_INFO
#define THREAD_RETURN_SYM \
@@ -122,6 +159,7 @@ do { \
THREAD_RETURN_SYM \
"movq "__percpu_arg([current_task])",%%rsi\n\t" \
__switch_canary \
+ __retpoline_fill_return_buffer \
"movq %P[thread_info](%%rsi),%%r8\n\t" \
"movq %%rax,%%rdi\n\t" \
"testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index c3496619740a..156959ca49ce 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -92,6 +92,7 @@ dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long);
#ifdef CONFIG_X86_32
dotraplinkage void do_iret_error(struct pt_regs *, long);
#endif
+dotraplinkage void do_mce(struct pt_regs *, long);
static inline int get_si_code(unsigned long condition)
{
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index c3046c8abb4f..27925c8507c7 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -146,6 +146,11 @@ extern int __get_user_bad(void);
#define __uaccess_begin() stac()
#define __uaccess_end() clac()
+#define __uaccess_begin_nospec() \
+({ \
+ stac(); \
+ ifence(); \
+})
/*
* This is a type: either unsigned long, if the argument fits into
@@ -431,7 +436,7 @@ do { \
({ \
int __gu_err; \
unsigned long __gu_val; \
- __uaccess_begin(); \
+ __uaccess_begin_nospec(); \
__get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \
__uaccess_end(); \
(x) = (__force __typeof__(*(ptr)))__gu_val; \
@@ -473,6 +478,10 @@ struct __large_struct { unsigned long buf[100]; };
__uaccess_begin(); \
barrier();
+#define uaccess_try_nospec do { \
+ current_thread_info()->uaccess_err = 0; \
+ __uaccess_begin_nospec(); \
+
#define uaccess_catch(err) \
__uaccess_end(); \
(err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0); \
@@ -537,7 +546,7 @@ struct __large_struct { unsigned long buf[100]; };
* get_user_ex(...);
* } get_user_catch(err)
*/
-#define get_user_try uaccess_try
+#define get_user_try uaccess_try_nospec
#define get_user_catch(err) uaccess_catch(err)
#define get_user_ex(x, ptr) do { \
@@ -572,7 +581,7 @@ extern void __cmpxchg_wrong_size(void)
__typeof__(ptr) __uval = (uval); \
__typeof__(*(ptr)) __old = (old); \
__typeof__(*(ptr)) __new = (new); \
- __uaccess_begin(); \
+ __uaccess_begin_nospec(); \
switch (size) { \
case 1: \
{ \
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 3fe0eac59462..8f15004d1f7b 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -111,17 +111,17 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
switch (n) {
case 1:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_size(*(u8 *)to, from, 1, ret, 1);
__uaccess_end();
return ret;
case 2:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_size(*(u16 *)to, from, 2, ret, 2);
__uaccess_end();
return ret;
case 4:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_size(*(u32 *)to, from, 4, ret, 4);
__uaccess_end();
return ret;
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index b89c34c4019b..b0307c59e4ec 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -57,31 +57,31 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size)
return copy_user_generic(dst, (__force void *)src, size);
switch (size) {
case 1:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_asm(*(u8 *)dst, (u8 __user *)src,
ret, "b", "b", "=q", 1);
__uaccess_end();
return ret;
case 2:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_asm(*(u16 *)dst, (u16 __user *)src,
ret, "w", "w", "=r", 2);
__uaccess_end();
return ret;
case 4:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_asm(*(u32 *)dst, (u32 __user *)src,
ret, "l", "k", "=r", 4);
__uaccess_end();
return ret;
case 8:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_asm(*(u64 *)dst, (u64 __user *)src,
ret, "q", "", "=r", 8);
__uaccess_end();
return ret;
case 10:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_asm(*(u64 *)dst, (u64 __user *)src,
ret, "q", "", "=r", 10);
if (likely(!ret))
@@ -91,7 +91,7 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size)
__uaccess_end();
return ret;
case 16:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_asm(*(u64 *)dst, (u64 __user *)src,
ret, "q", "", "=r", 16);
if (likely(!ret))
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index 4865e10dbb55..9ee85066f407 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -13,6 +13,7 @@ extern void map_vsyscall(void);
*/
extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);
extern bool vsyscall_enabled(void);
+extern unsigned long vsyscall_pgprot;
#else
static inline void map_vsyscall(void) {}
static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 7a0e947c7f95..8cfaef7ef297 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -361,14 +361,17 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
irq_data->chip_data = data;
irq_data->hwirq = virq + i;
err = assign_irq_vector_policy(virq + i, node, data, info);
- if (err)
+ if (err) {
+ irq_data->chip_data = NULL;
+ free_apic_chip_data(data);
goto error;
+ }
}
return 0;
error:
- x86_vector_free_irqs(domain, virq, i + 1);
+ x86_vector_free_irqs(domain, virq, i);
return err;
}
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 7e64fafb6787..167fcdb7cee1 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -23,6 +23,7 @@
#include <asm/alternative.h>
#include <asm/pgtable.h>
#include <asm/cacheflush.h>
+#include <asm/intel-family.h>
static void __init spectre_v2_select_mitigation(void);
@@ -156,6 +157,23 @@ disable:
return SPECTRE_V2_CMD_NONE;
}
+/* Check for Skylake-like CPUs (for RSB handling) */
+static bool __init is_skylake_era(void)
+{
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+ boot_cpu_data.x86 == 6) {
+ switch (boot_cpu_data.x86_model) {
+ case INTEL_FAM6_SKYLAKE_MOBILE:
+ case INTEL_FAM6_SKYLAKE_DESKTOP:
+ case INTEL_FAM6_SKYLAKE_X:
+ case INTEL_FAM6_KABYLAKE_MOBILE:
+ case INTEL_FAM6_KABYLAKE_DESKTOP:
+ return true;
+ }
+ }
+ return false;
+}
+
static void __init spectre_v2_select_mitigation(void)
{
enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
@@ -214,6 +232,29 @@ retpoline_auto:
spectre_v2_enabled = mode;
pr_info("%s\n", spectre_v2_strings[mode]);
+
+ /*
+ * If neither SMEP or KPTI are available, there is a risk of
+ * hitting userspace addresses in the RSB after a context switch
+ * from a shallow call stack to a deeper one. To prevent this fill
+ * the entire RSB, even when using IBRS.
+ *
+ * Skylake era CPUs have a separate issue with *underflow* of the
+ * RSB, when they will predict 'ret' targets from the generic BTB.
+ * The proper mitigation for this is IBRS. If IBRS is not supported
+ * or deactivated in favour of retpolines the RSB fill on context
+ * switch is required.
+ */
+ if ((!boot_cpu_has(X86_FEATURE_KAISER) &&
+ !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
+ setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
+ pr_info("Filling RSB on context switch\n");
+ }
+
+ if (!is_skylake_era() && x86_ibrs_enabled()) {
+ pr_info("Retpolines enabled, force-disabling IBRS due to !SKL-era core\n");
+ ibrs_state = 0;
+ }
}
#undef pr_fmt
@@ -234,7 +275,7 @@ ssize_t cpu_show_spectre_v1(struct device *dev,
{
if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
return sprintf(buf, "Not affected\n");
- return sprintf(buf, "Vulnerable\n");
+ return sprintf(buf, "Mitigation: Barriers\n");
}
ssize_t cpu_show_spectre_v2(struct device *dev,
@@ -242,7 +283,13 @@ ssize_t cpu_show_spectre_v2(struct device *dev,
{
if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
return sprintf(buf, "Not affected\n");
+ if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) && x86_ibrs_enabled()) {
+ return sprintf(buf, "Mitigation: IBRS+IBPB\n");
+ }
- return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]);
+ if (x86_ibpb_enabled())
+ return sprintf(buf, "%s + IBPB\n", spectre_v2_strings[spectre_v2_enabled]);
+ else
+ return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]);
}
#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 727e1becb013..19bf80515845 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -888,8 +888,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
- /* Assume for now that ALL x86 CPUs are insecure */
- setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+ if (c->x86_vendor != X86_VENDOR_AMD)
+ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 60bbd789a460..9e336df32b3c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1780,6 +1780,11 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code)
void (*machine_check_vector)(struct pt_regs *, long error_code) =
unexpected_machine_check;
+dotraplinkage void do_mce(struct pt_regs *regs, long error_code)
+{
+ machine_check_vector(regs, error_code);
+}
+
/*
* Called for each booted CPU to set up machine checks.
* Must be called with preempt off:
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 884b114590e9..6f0486d6689d 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -39,6 +39,9 @@
#include <asm/setup.h>
#include <asm/msr.h>
+/* last level cache size per core */
+static int llc_size_per_core;
+
static unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT];
static struct mc_saved_data {
unsigned int mc_saved_count;
@@ -996,15 +999,18 @@ static bool is_blacklisted(unsigned int cpu)
/*
* Late loading on model 79 with microcode revision less than 0x0b000021
- * may result in a system hang. This behavior is documented in item
- * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family).
+ * and LLC size per core bigger than 2.5MB may result in a system hang.
+ * This behavior is documented in item BDF90, #334165 (Intel Xeon
+ * Processor E7-8800/4800 v4 Product Family).
*/
if (c->x86 == 6 &&
c->x86_model == 79 &&
c->x86_mask == 0x01 &&
+ llc_size_per_core > 2621440 &&
c->microcode < 0x0b000021) {
pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
+ return true;
}
return false;
@@ -1067,6 +1073,15 @@ static struct microcode_ops microcode_intel_ops = {
.microcode_fini_cpu = microcode_fini_cpu,
};
+static int __init calc_llc_size_per_core(struct cpuinfo_x86 *c)
+{
+ u64 llc_size = c->x86_cache_size * 1024;
+
+ do_div(llc_size, c->x86_max_cores);
+
+ return (int)llc_size;
+}
+
struct microcode_ops * __init init_intel_microcode(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
@@ -1077,6 +1092,8 @@ struct microcode_ops * __init init_intel_microcode(void)
return NULL;
}
+ llc_size_per_core = calc_llc_size_per_core(c);
+
return &microcode_intel_ops;
}
diff --git a/arch/x86/kernel/cpu/spec_ctrl.c b/arch/x86/kernel/cpu/spec_ctrl.c
index 321d6b204e6c..b2a07ee1b318 100644
--- a/arch/x86/kernel/cpu/spec_ctrl.c
+++ b/arch/x86/kernel/cpu/spec_ctrl.c
@@ -11,8 +11,11 @@
* Keep it open for more flags in case needed.
*
* -1 means "not touched by nospec() earlyparam"
+ *
+ * If IBRS is set, IBPB is always set. IBPB can be set independently
+ * on IBRS state (SKL).
*/
-static int ibrs_state = -1;
+int ibrs_state = -1;
static int ibpb_state = -1;
unsigned int notrace x86_ibrs_enabled(void)
@@ -51,18 +54,21 @@ EXPORT_SYMBOL_GPL(x86_enable_ibrs);
void x86_spec_check(void)
{
- if (ibrs_state == 0 || ibpb_state == 0) {
+ if (ibpb_state == 0) {
printk_once(KERN_INFO "IBRS/IBPB: disabled\n");
return;
}
if (cpuid_edx(7) & BIT(26)) {
- ibrs_state = 1;
+ if (ibrs_state == -1) {
+ /* noone force-disabled IBRS */
+ ibrs_state = 1;
+ printk_once(KERN_INFO "IBRS: initialized\n");
+ }
+ printk_once(KERN_INFO "IBPB: initialized\n");
ibpb_state = 1;
setup_force_cpu_cap(X86_FEATURE_SPEC_CTRL);
-
- printk_once(KERN_INFO "IBRS/IBPB: Initialized\n");
}
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index f1b427e0d3ba..3517f6860a58 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -36,6 +36,7 @@
#include <asm/alternative.h>
#include <asm/insn.h>
#include <asm/debugreg.h>
+#include <asm/nospec-branch.h>
#include "common.h"
@@ -191,7 +192,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src)
}
/* Check whether insn is indirect jump */
-static int insn_is_indirect_jump(struct insn *insn)
+static int __insn_is_indirect_jump(struct insn *insn)
{
return ((insn->opcode.bytes[0] == 0xff &&
(X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
@@ -225,6 +226,26 @@ static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
return (start <= target && target <= start + len);
}
+static int insn_is_indirect_jump(struct insn *insn)
+{
+ int ret = __insn_is_indirect_jump(insn);
+
+#ifdef CONFIG_RETPOLINE
+ /*
+ * Jump to x86_indirect_thunk_* is treated as an indirect jump.
+ * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with
+ * older gcc may use indirect jump. So we add this check instead of
+ * replace indirect-jump check.
+ */
+ if (!ret)
+ ret = insn_jump_into_range(insn,
+ (unsigned long)__indirect_thunk_start,
+ (unsigned long)__indirect_thunk_end -
+ (unsigned long)__indirect_thunk_start);
+#endif
+ return ret;
+}
+
/* Decode whole function to ensure any instructions don't jump into target */
static int can_optimize(unsigned long paddr)
{
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 6feaa767513e..9000898c5463 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -104,6 +104,13 @@ SECTIONS
IRQENTRY_TEXT
*(.fixup)
*(.gnu.warning)
+
+#ifdef CONFIG_RETPOLINE
+ __indirect_thunk_start = .;
+ *(.text.__x86.indirect_thunk)
+ __indirect_thunk_end = .;
+#endif
+
/* End of text section */
_etext = .;
} :text = 0x9090
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 79e0a33a36df..8c687ee66bbc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -33,6 +33,7 @@
#include <linux/slab.h>
#include <linux/tboot.h>
#include <linux/hrtimer.h>
+#include <linux/nospec.h>
#include "kvm_cache_regs.h"
#include "x86.h"
@@ -843,21 +844,15 @@ static const unsigned short vmcs_field_to_offset_table[] = {
static inline short vmcs_field_to_offset(unsigned long field)
{
- BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
-
- if (field >= ARRAY_SIZE(vmcs_field_to_offset_table))
- return -ENOENT;
+ const unsigned short *offset;
- /*
- * FIXME: Mitigation for CVE-2017-5753. To be replaced with a
- * generic mechanism.
- */
- asm("lfence");
+ BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
- if (vmcs_field_to_offset_table[field] == 0)
+ offset = array_ptr(vmcs_field_to_offset_table, field,
+ ARRAY_SIZE(vmcs_field_to_offset_table));
+ if (!offset || *offset == 0)
return -ENOENT;
-
- return vmcs_field_to_offset_table[field];
+ return *offset;
}
static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c
index a744506856b1..88ce150186c6 100644
--- a/arch/x86/lib/cmdline.c
+++ b/arch/x86/lib/cmdline.c
@@ -21,12 +21,14 @@ static inline int myisspace(u8 c)
* @option: option string to look for
*
* Returns the position of that @option (starts counting with 1)
- * or 0 on not found.
+ * or 0 on not found. @option will only be found if it is found
+ * as an entire word in @cmdline. For instance, if @option="car"
+ * then a cmdline which contains "cart" will not match.
*/
int cmdline_find_option_bool(const char *cmdline, const char *option)
{
char c;
- int len, pos = 0, wstart = 0;
+ int pos = 0, wstart = 0;
const char *opptr = NULL;
enum {
st_wordstart = 0, /* Start of word/after whitespace */
@@ -37,11 +39,14 @@ int cmdline_find_option_bool(const char *cmdline, const char *option)
if (!cmdline)
return -1; /* No command line */
- len = min_t(int, strlen(cmdline), COMMAND_LINE_SIZE);
- if (!len)
+ if (!strlen(cmdline))
return 0;
- while (len--) {
+ /*
+ * This 'pos' check ensures we do not overrun
+ * a non-NULL-terminated 'cmdline'
+ */
+ while (pos < COMMAND_LINE_SIZE) {
c = *(char *)cmdline++;
pos++;
@@ -58,17 +63,26 @@ int cmdline_find_option_bool(const char *cmdline, const char *option)
/* fall through */
case st_wordcmp:
- if (!*opptr)
+ if (!*opptr) {
+ /*
+ * We matched all the way to the end of the
+ * option we were looking for. If the
+ * command-line has a space _or_ ends, then
+ * we matched!
+ */
if (!c || myisspace(c))
return wstart;
else
state = st_wordskip;
- else if (!c)
+ } else if (!c) {
+ /*
+ * Hit the NULL terminator on the end of
+ * cmdline.
+ */
return 0;
- else if (c != *opptr++)
+ } else if (c != *opptr++) {
state = st_wordskip;
- else if (!len) /* last word and is matching */
- return wstart;
+ }
break;
case st_wordskip:
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 46668cda4ffd..c74b52d4ff65 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -38,6 +38,7 @@ ENTRY(__get_user_1)
GET_THREAD_INFO(%_ASM_DX)
cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
jae bad_get_user
+ MASK_NOSPEC %_ASM_DX, %_ASM_AX
ASM_STAC
1: movzbl (%_ASM_AX),%edx
xor %eax,%eax
@@ -51,6 +52,7 @@ ENTRY(__get_user_2)
GET_THREAD_INFO(%_ASM_DX)
cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
jae bad_get_user
+ MASK_NOSPEC %_ASM_DX, %_ASM_AX
ASM_STAC
2: movzwl -1(%_ASM_AX),%edx
xor %eax,%eax
@@ -64,6 +66,7 @@ ENTRY(__get_user_4)
GET_THREAD_INFO(%_ASM_DX)
cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
jae bad_get_user
+ MASK_NOSPEC %_ASM_DX, %_ASM_AX
ASM_STAC
3: movl -3(%_ASM_AX),%edx
xor %eax,%eax
@@ -78,6 +81,7 @@ ENTRY(__get_user_8)
GET_THREAD_INFO(%_ASM_DX)
cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
jae bad_get_user
+ MASK_NOSPEC %_ASM_DX, %_ASM_AX
ASM_STAC
4: movq -7(%_ASM_AX),%rdx
xor %eax,%eax
@@ -89,6 +93,7 @@ ENTRY(__get_user_8)
GET_THREAD_INFO(%_ASM_DX)
cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
jae bad_get_user_8
+ MASK_NOSPEC %_ASM_DX, %_ASM_AX
ASM_STAC
4: movl -7(%_ASM_AX),%edx
5: movl -3(%_ASM_AX),%ecx
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 019a03599bb0..e611a124c442 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -9,7 +9,7 @@
#include <asm/nospec-branch.h>
.macro THUNK reg
- .section .text.__x86.indirect_thunk.\reg
+ .section .text.__x86.indirect_thunk
ENTRY(__x86_indirect_thunk_\reg)
CFI_STARTPROC
@@ -25,7 +25,8 @@ ENDPROC(__x86_indirect_thunk_\reg)
* than one per register with the correct names. So we do it
* the simple and nasty way...
*/
-#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg)
+#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
+#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg)
GENERATE_THUNK(_ASM_AX)
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 8264f6b0c19d..d964272fdf35 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -570,12 +570,12 @@ do { \
unsigned long __copy_to_user_ll(void __user *to, const void *from,
unsigned long n)
{
- stac();
+ __uaccess_begin_nospec();
if (movsl_is_ok(to, from, n))
__copy_user(to, from, n);
else
n = __copy_user_intel(to, from, n);
- clac();
+ __uaccess_end();
return n;
}
EXPORT_SYMBOL(__copy_to_user_ll);
@@ -610,7 +610,7 @@ EXPORT_SYMBOL(__copy_from_user_ll_nozero);
unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from,
unsigned long n)
{
- stac();
+ __uaccess_begin_nospec();
#ifdef CONFIG_X86_INTEL_USERCOPY
if (n > 64 && cpu_has_xmm2)
n = __copy_user_zeroing_intel_nocache(to, from, n);
@@ -636,7 +636,7 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr
#else
__copy_user(to, from, n);
#endif
- clac();
+ __uaccess_end();
return n;
}
EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero);
diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c
index b15c90b60ec7..052dc3f73f95 100644
--- a/arch/x86/mm/kaiser.c
+++ b/arch/x86/mm/kaiser.c
@@ -339,7 +339,7 @@ void __init kaiser_init(void)
if (vsyscall_enabled())
kaiser_add_user_map_early((void *)VSYSCALL_ADDR,
PAGE_SIZE,
- __PAGE_KERNEL_VSYSCALL);
+ vsyscall_pgprot);
for_each_possible_cpu(cpu) {
void *percpu_vaddr = __per_cpu_user_mapped_start +
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 42b275b55305..6a69c0798d45 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4359,6 +4359,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
* https://bugzilla.kernel.org/show_bug.cgi?id=121671
*/
{ "LITEON CX1-JB*-HP", NULL, ATA_HORKAGE_MAX_SEC_1024 },
+ { "LITEON EP1-*", NULL, ATA_HORKAGE_MAX_SEC_1024 },
/* Devices we expect to fail diagnostics */
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index 55b3bbe09409..79ae0dbc1e81 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -125,13 +125,16 @@ static u8 i40iw_derive_hw_ird_setting(u16 cm_ird)
* @conn_ird: connection IRD
* @conn_ord: connection ORD
*/
-static void i40iw_record_ird_ord(struct i40iw_cm_node *cm_node, u16 conn_ird, u16 conn_ord)
+static void i40iw_record_ird_ord(struct i40iw_cm_node *cm_node, u32 conn_ird,
+ u32 conn_ord)
{
if (conn_ird > I40IW_MAX_IRD_SIZE)
conn_ird = I40IW_MAX_IRD_SIZE;
if (conn_ord > I40IW_MAX_ORD_SIZE)
conn_ord = I40IW_MAX_ORD_SIZE;
+ else if (!conn_ord && cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO)
+ conn_ord = 1;
cm_node->ird_size = conn_ird;
cm_node->ord_size = conn_ord;
@@ -2868,15 +2871,13 @@ static struct i40iw_cm_listener *i40iw_make_listen_node(
* i40iw_create_cm_node - make a connection node with params
* @cm_core: cm's core
* @iwdev: iwarp device structure
- * @private_data_len: len to provate data for mpa request
- * @private_data: pointer to private data for connection
+ * @conn_param: upper layer connection parameters
* @cm_info: quad info for connection
*/
static struct i40iw_cm_node *i40iw_create_cm_node(
struct i40iw_cm_core *cm_core,
struct i40iw_device *iwdev,
- u16 private_data_len,
- void *private_data,
+ struct iw_cm_conn_param *conn_param,
struct i40iw_cm_info *cm_info)
{
struct i40iw_cm_node *cm_node;
@@ -2884,6 +2885,9 @@ static struct i40iw_cm_node *i40iw_create_cm_node(
struct i40iw_cm_node *loopback_remotenode;
struct i40iw_cm_info loopback_cm_info;
+ u16 private_data_len = conn_param->private_data_len;
+ const void *private_data = conn_param->private_data;
+
/* create a CM connection node */
cm_node = i40iw_make_cm_node(cm_core, iwdev, cm_info, NULL);
if (!cm_node)
@@ -2892,6 +2896,8 @@ static struct i40iw_cm_node *i40iw_create_cm_node(
cm_node->tcp_cntxt.client = 1;
cm_node->tcp_cntxt.rcv_wscale = I40IW_CM_DEFAULT_RCV_WND_SCALE;
+ i40iw_record_ird_ord(cm_node, conn_param->ird, conn_param->ord);
+
if (!memcmp(cm_info->loc_addr, cm_info->rem_addr, sizeof(cm_info->loc_addr))) {
loopback_remotelistener = i40iw_find_listener(
cm_core,
@@ -2925,6 +2931,10 @@ static struct i40iw_cm_node *i40iw_create_cm_node(
private_data_len);
loopback_remotenode->pdata.size = private_data_len;
+ if (loopback_remotenode->ord_size > cm_node->ird_size)
+ loopback_remotenode->ord_size =
+ cm_node->ird_size;
+
cm_node->state = I40IW_CM_STATE_OFFLOADED;
cm_node->tcp_cntxt.rcv_nxt =
loopback_remotenode->tcp_cntxt.loc_seq_num;
@@ -3833,20 +3843,13 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
apbvt_set = 1;
cm_id->add_ref(cm_id);
cm_node = i40iw_create_cm_node(&iwdev->cm_core, iwdev,
- conn_param->private_data_len,
- (void *)conn_param->private_data,
- &cm_info);
+ conn_param, &cm_info);
if (IS_ERR(cm_node)) {
err = PTR_ERR(cm_node);
goto err_out;
}
- i40iw_record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord);
- if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO &&
- !cm_node->ord_size)
- cm_node->ord_size = 1;
-
cm_node->apbvt_set = apbvt_set;
cm_node->qhash_set = qhash_set;
iwqp->cm_node = cm_node;
@@ -4251,10 +4254,16 @@ set_qhash:
}
/**
- * i40iw_cm_disconnect_all - disconnect all connected qp's
+ * i40iw_cm_teardown_connections - teardown QPs
* @iwdev: device pointer
+ * @ipaddr: Pointer to IPv4 or IPv6 address
+ * @ipv4: flag indicating IPv4 when true
+ * @disconnect_all: flag indicating disconnect all QPs
+ * teardown QPs where source or destination addr matches ip addr
*/
-void i40iw_cm_disconnect_all(struct i40iw_device *iwdev)
+void i40iw_cm_teardown_connections(struct i40iw_device *iwdev, u32 *ipaddr,
+ struct i40iw_cm_info *nfo,
+ bool disconnect_all)
{
struct i40iw_cm_core *cm_core = &iwdev->cm_core;
struct list_head *list_core_temp;
@@ -4268,8 +4277,13 @@ void i40iw_cm_disconnect_all(struct i40iw_device *iwdev)
spin_lock_irqsave(&cm_core->ht_lock, flags);
list_for_each_safe(list_node, list_core_temp, &cm_core->connected_nodes) {
cm_node = container_of(list_node, struct i40iw_cm_node, list);
- atomic_inc(&cm_node->ref_count);
- list_add(&cm_node->connected_entry, &connected_list);
+ if (disconnect_all ||
+ (nfo->vlan_id == cm_node->vlan_id &&
+ (!memcmp(cm_node->loc_addr, ipaddr, nfo->ipv4 ? 4 : 16) ||
+ !memcmp(cm_node->rem_addr, ipaddr, nfo->ipv4 ? 4 : 16)))) {
+ atomic_inc(&cm_node->ref_count);
+ list_add(&cm_node->connected_entry, &connected_list);
+ }
}
spin_unlock_irqrestore(&cm_core->ht_lock, flags);
@@ -4301,6 +4315,9 @@ void i40iw_if_notify(struct i40iw_device *iwdev, struct net_device *netdev,
enum i40iw_quad_hash_manage_type op =
ifup ? I40IW_QHASH_MANAGE_TYPE_ADD : I40IW_QHASH_MANAGE_TYPE_DELETE;
+ nfo.vlan_id = vlan_id;
+ nfo.ipv4 = ipv4;
+
/* Disable or enable qhash for listeners */
spin_lock_irqsave(&cm_core->listen_list_lock, flags);
list_for_each_entry(listen_node, &cm_core->listen_nodes, list) {
@@ -4310,8 +4327,6 @@ void i40iw_if_notify(struct i40iw_device *iwdev, struct net_device *netdev,
memcpy(nfo.loc_addr, listen_node->loc_addr,
sizeof(nfo.loc_addr));
nfo.loc_port = listen_node->loc_port;
- nfo.ipv4 = listen_node->ipv4;
- nfo.vlan_id = listen_node->vlan_id;
nfo.user_pri = listen_node->user_pri;
if (!list_empty(&listen_node->child_listen_list)) {
i40iw_qhash_ctrl(iwdev,
@@ -4333,7 +4348,7 @@ void i40iw_if_notify(struct i40iw_device *iwdev, struct net_device *netdev,
}
spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
- /* disconnect any connected qp's on ifdown */
+ /* teardown connected qp's on ifdown */
if (!ifup)
- i40iw_cm_disconnect_all(iwdev);
+ i40iw_cm_teardown_connections(iwdev, ipaddr, &nfo, false);
}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.h b/drivers/infiniband/hw/i40iw/i40iw_cm.h
index b72813ddd9fa..875c3089168f 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.h
@@ -450,5 +450,7 @@ int i40iw_arp_table(struct i40iw_device *iwdev,
void i40iw_if_notify(struct i40iw_device *iwdev, struct net_device *netdev,
u32 *ipaddr, bool ipv4, bool ifup);
-void i40iw_cm_disconnect_all(struct i40iw_device *iwdev);
+void i40iw_cm_teardown_connections(struct i40iw_device *iwdev, u32 *ipaddr,
+ struct i40iw_cm_info *nfo,
+ bool disconnect_all);
#endif /* I40IW_CM_H */
diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
index dfd2fb4db97e..18727fdab41b 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
@@ -1834,8 +1834,6 @@ static enum i40iw_status_code i40iw_sc_get_next_aeqe(struct i40iw_sc_aeq *aeq,
static enum i40iw_status_code i40iw_sc_repost_aeq_entries(struct i40iw_sc_dev *dev,
u32 count)
{
- if (count > I40IW_MAX_AEQ_ALLOCATE_COUNT)
- return I40IW_ERR_INVALID_SIZE;
if (dev->is_pf)
i40iw_wr32(dev->hw, I40E_PFPE_AEQALLOC, count);
@@ -3876,8 +3874,10 @@ enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_
hmc_info->hmc_obj[I40IW_HMC_IW_APBVT_ENTRY].cnt = 1;
hmc_info->hmc_obj[I40IW_HMC_IW_MR].cnt = mrwanted;
- hmc_info->hmc_obj[I40IW_HMC_IW_XF].cnt = I40IW_MAX_WQ_ENTRIES * qpwanted;
- hmc_info->hmc_obj[I40IW_HMC_IW_Q1].cnt = 4 * I40IW_MAX_IRD_SIZE * qpwanted;
+ hmc_info->hmc_obj[I40IW_HMC_IW_XF].cnt =
+ roundup_pow_of_two(I40IW_MAX_WQ_ENTRIES * qpwanted);
+ hmc_info->hmc_obj[I40IW_HMC_IW_Q1].cnt =
+ roundup_pow_of_two(2 * I40IW_MAX_IRD_SIZE * qpwanted);
hmc_info->hmc_obj[I40IW_HMC_IW_XFFL].cnt =
hmc_info->hmc_obj[I40IW_HMC_IW_XF].cnt / hmc_fpm_misc->xf_block_size;
hmc_info->hmc_obj[I40IW_HMC_IW_Q1FL].cnt =
diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h
index 219c848c6eb2..01dcce2bc439 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_d.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_d.h
@@ -97,6 +97,7 @@
#define RDMA_OPCODE_MASK 0x0f
#define RDMA_READ_REQ_OPCODE 1
#define Q2_BAD_FRAME_OFFSET 72
+#define Q2_FPSN_OFFSET 64
#define CQE_MAJOR_DRV 0x8000
#define I40IW_TERM_SENT 0x01
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index 0937c49de070..0b2e93d11b54 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -1783,7 +1783,7 @@ static void i40iw_close(struct i40e_info *ldev, struct i40e_client *client, bool
iwdev = &hdl->device;
iwdev->closing = true;
- i40iw_cm_disconnect_all(iwdev);
+ i40iw_cm_teardown_connections(iwdev, NULL, NULL, true);
destroy_workqueue(iwdev->virtchnl_wq);
i40iw_deinit_device(iwdev, reset);
}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c
index 9db317a7cc91..d5842abb2b5a 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_puda.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c
@@ -1376,7 +1376,7 @@ static void i40iw_ieq_handle_exception(struct i40iw_puda_rsrc *ieq,
u32 *hw_host_ctx = (u32 *)qp->hw_host_ctx;
u32 rcv_wnd = hw_host_ctx[23];
/* first partial seq # in q2 */
- u32 fps = qp->q2_buf[16];
+ u32 fps = *(u32 *)(qp->q2_buf + Q2_FPSN_OFFSET);
struct list_head *rxlist = &pfpdu->rxlist;
struct list_head *plist;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_user.h b/drivers/infiniband/hw/i40iw/i40iw_user.h
index 15e8f55983fd..531dc49a0eba 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_user.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_user.h
@@ -59,7 +59,6 @@ enum i40iw_device_capabilities_const {
I40IW_MAX_CEQ_ENTRIES = 131071,
I40IW_MIN_CQ_SIZE = 1,
I40IW_MAX_CQ_SIZE = 1048575,
- I40IW_MAX_AEQ_ALLOCATE_COUNT = 255,
I40IW_DB_ID_ZERO = 0,
I40IW_MAX_WQ_FRAGMENT_COUNT = 3,
I40IW_MAX_SGE_RD = 1,
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 2ed915981311..78d2e594cc0c 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -434,13 +434,13 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
qp->s_state = OP(COMPARE_SWAP);
put_ib_ateth_swap(wqe->atomic_wr.swap,
&ohdr->u.atomic_eth);
- put_ib_ateth_swap(wqe->atomic_wr.compare_add,
- &ohdr->u.atomic_eth);
+ put_ib_ateth_compare(wqe->atomic_wr.compare_add,
+ &ohdr->u.atomic_eth);
} else {
qp->s_state = OP(FETCH_ADD);
put_ib_ateth_swap(wqe->atomic_wr.compare_add,
&ohdr->u.atomic_eth);
- put_ib_ateth_swap(0, &ohdr->u.atomic_eth);
+ put_ib_ateth_compare(0, &ohdr->u.atomic_eth);
}
put_ib_ateth_vaddr(wqe->atomic_wr.remote_addr,
&ohdr->u.atomic_eth);
diff --git a/drivers/input/misc/twl4030-vibra.c b/drivers/input/misc/twl4030-vibra.c
index 10c4e3d462f1..7233db002588 100644
--- a/drivers/input/misc/twl4030-vibra.c
+++ b/drivers/input/misc/twl4030-vibra.c
@@ -178,12 +178,14 @@ static SIMPLE_DEV_PM_OPS(twl4030_vibra_pm_ops,
twl4030_vibra_suspend, twl4030_vibra_resume);
static bool twl4030_vibra_check_coexist(struct twl4030_vibra_data *pdata,
- struct device_node *node)
+ struct device_node *parent)
{
+ struct device_node *node;
+
if (pdata && pdata->coexist)
return true;
- node = of_find_node_by_name(node, "codec");
+ node = of_get_child_by_name(parent, "codec");
if (node) {
of_node_put(node);
return true;
diff --git a/drivers/input/misc/twl6040-vibra.c b/drivers/input/misc/twl6040-vibra.c
index ea63fad48de6..1e968ae37f60 100644
--- a/drivers/input/misc/twl6040-vibra.c
+++ b/drivers/input/misc/twl6040-vibra.c
@@ -262,7 +262,7 @@ static int twl6040_vibra_probe(struct platform_device *pdev)
int vddvibr_uV = 0;
int error;
- twl6040_core_node = of_find_node_by_name(twl6040_core_dev->of_node,
+ twl6040_core_node = of_get_child_by_name(twl6040_core_dev->of_node,
"vibra");
if (!twl6040_core_node) {
dev_err(&pdev->dev, "parent of node is missing?\n");
diff --git a/drivers/input/mouse/trackpoint.c b/drivers/input/mouse/trackpoint.c
index 4074fd5e3781..475703328703 100644
--- a/drivers/input/mouse/trackpoint.c
+++ b/drivers/input/mouse/trackpoint.c
@@ -383,6 +383,9 @@ int trackpoint_detect(struct psmouse *psmouse, bool set_properties)
if (trackpoint_read(&psmouse->ps2dev, TP_EXT_BTN, &button_info)) {
psmouse_warn(psmouse, "failed to get extended button data, assuming 3 buttons\n");
button_info = 0x33;
+ } else if (!button_info) {
+ psmouse_warn(psmouse, "got 0 in extended button data, assuming 3 buttons\n");
+ button_info = 0x33;
}
psmouse->private = kzalloc(sizeof(struct trackpoint_data), GFP_KERNEL);
diff --git a/drivers/input/touchscreen/88pm860x-ts.c b/drivers/input/touchscreen/88pm860x-ts.c
index 251ff2aa0633..7a0dbce4dae9 100644
--- a/drivers/input/touchscreen/88pm860x-ts.c
+++ b/drivers/input/touchscreen/88pm860x-ts.c
@@ -126,7 +126,7 @@ static int pm860x_touch_dt_init(struct platform_device *pdev,
int data, n, ret;
if (!np)
return -ENODEV;
- np = of_find_node_by_name(np, "touch");
+ np = of_get_child_by_name(np, "touch");
if (!np) {
dev_err(&pdev->dev, "Can't find touch node\n");
return -EINVAL;
@@ -144,13 +144,13 @@ static int pm860x_touch_dt_init(struct platform_device *pdev,
if (data) {
ret = pm860x_reg_write(i2c, PM8607_GPADC_MISC1, data);
if (ret < 0)
- return -EINVAL;
+ goto err_put_node;
}
/* set tsi prebias time */
if (!of_property_read_u32(np, "marvell,88pm860x-tsi-prebias", &data)) {
ret = pm860x_reg_write(i2c, PM8607_TSI_PREBIAS, data);
if (ret < 0)
- return -EINVAL;
+ goto err_put_node;
}
/* set prebias & prechg time of pen detect */
data = 0;
@@ -161,10 +161,18 @@ static int pm860x_touch_dt_init(struct platform_device *pdev,
if (data) {
ret = pm860x_reg_write(i2c, PM8607_PD_PREBIAS, data);
if (ret < 0)
- return -EINVAL;
+ goto err_put_node;
}
of_property_read_u32(np, "marvell,88pm860x-resistor-X", res_x);
+
+ of_node_put(np);
+
return 0;
+
+err_put_node:
+ of_node_put(np);
+
+ return -EINVAL;
}
#else
#define pm860x_touch_dt_init(x, y, z) (-1)
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 537903bf9add..ac3a331a2c66 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -68,6 +68,8 @@
#include <linux/random.h>
#include <trace/events/bcache.h>
+#define MAX_OPEN_BUCKETS 128
+
/* Bucket heap / gen */
uint8_t bch_inc_gen(struct cache *ca, struct bucket *b)
@@ -440,6 +442,11 @@ out:
b->prio = INITIAL_PRIO;
}
+ if (ca->set->avail_nbuckets > 0) {
+ ca->set->avail_nbuckets--;
+ bch_update_bucket_in_use(ca->set, &ca->set->gc_stats);
+ }
+
return r;
}
@@ -447,6 +454,11 @@ void __bch_bucket_free(struct cache *ca, struct bucket *b)
{
SET_GC_MARK(b, 0);
SET_GC_SECTORS_USED(b, 0);
+
+ if (ca->set->avail_nbuckets < ca->set->nbuckets) {
+ ca->set->avail_nbuckets++;
+ bch_update_bucket_in_use(ca->set, &ca->set->gc_stats);
+ }
}
void bch_bucket_free(struct cache_set *c, struct bkey *k)
@@ -599,7 +611,7 @@ bool bch_alloc_sectors(struct cache_set *c, struct bkey *k, unsigned sectors,
/*
* If we had to allocate, we might race and not need to allocate the
- * second time we call find_data_bucket(). If we allocated a bucket but
+ * second time we call pick_data_bucket(). If we allocated a bucket but
* didn't use it, drop the refcount bch_bucket_alloc_set() took:
*/
if (KEY_PTRS(&alloc.key))
@@ -672,7 +684,7 @@ int bch_open_buckets_alloc(struct cache_set *c)
spin_lock_init(&c->data_bucket_lock);
- for (i = 0; i < 6; i++) {
+ for (i = 0; i < MAX_OPEN_BUCKETS; i++) {
struct open_bucket *b = kzalloc(sizeof(*b), GFP_KERNEL);
if (!b)
return -ENOMEM;
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 02619cabda8b..2eab4e7afca9 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -265,9 +265,6 @@ struct bcache_device {
atomic_t *stripe_sectors_dirty;
unsigned long *full_dirty_stripes;
- unsigned long sectors_dirty_last;
- long sectors_dirty_derivative;
-
struct bio_set *bio_split;
unsigned data_csum:1;
@@ -362,12 +359,14 @@ struct cached_dev {
uint64_t writeback_rate_target;
int64_t writeback_rate_proportional;
- int64_t writeback_rate_derivative;
- int64_t writeback_rate_change;
+ int64_t writeback_rate_integral;
+ int64_t writeback_rate_integral_scaled;
+ int32_t writeback_rate_change;
unsigned writeback_rate_update_seconds;
- unsigned writeback_rate_d_term;
+ unsigned writeback_rate_i_term_inverse;
unsigned writeback_rate_p_term_inverse;
+ unsigned writeback_rate_minimum;
};
enum alloc_reserve {
@@ -581,6 +580,7 @@ struct cache_set {
uint8_t need_gc;
struct gc_stat gc_stats;
size_t nbuckets;
+ size_t avail_nbuckets;
struct task_struct *gc_thread;
/* Where in the btree gc currently is */
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 6718649519c9..4492fb7d67d2 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -804,7 +804,10 @@ int bch_btree_cache_alloc(struct cache_set *c)
c->shrink.scan_objects = bch_mca_scan;
c->shrink.seeks = 4;
c->shrink.batch = c->btree_pages * 2;
- register_shrinker(&c->shrink);
+
+ if (register_shrinker(&c->shrink))
+ pr_warn("bcache: %s: could not register shrinker",
+ __func__);
return 0;
}
@@ -1238,6 +1241,11 @@ void bch_initial_mark_key(struct cache_set *c, int level, struct bkey *k)
__bch_btree_mark_key(c, level, k);
}
+void bch_update_bucket_in_use(struct cache_set *c, struct gc_stat *stats)
+{
+ stats->in_use = (c->nbuckets - c->avail_nbuckets) * 100 / c->nbuckets;
+}
+
static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc)
{
uint8_t stale = 0;
@@ -1649,9 +1657,8 @@ static void btree_gc_start(struct cache_set *c)
mutex_unlock(&c->bucket_lock);
}
-static size_t bch_btree_gc_finish(struct cache_set *c)
+static void bch_btree_gc_finish(struct cache_set *c)
{
- size_t available = 0;
struct bucket *b;
struct cache *ca;
unsigned i;
@@ -1688,6 +1695,7 @@ static size_t bch_btree_gc_finish(struct cache_set *c)
}
rcu_read_unlock();
+ c->avail_nbuckets = 0;
for_each_cache(ca, c, i) {
uint64_t *i;
@@ -1709,18 +1717,16 @@ static size_t bch_btree_gc_finish(struct cache_set *c)
BUG_ON(!GC_MARK(b) && GC_SECTORS_USED(b));
if (!GC_MARK(b) || GC_MARK(b) == GC_MARK_RECLAIMABLE)
- available++;
+ c->avail_nbuckets++;
}
}
mutex_unlock(&c->bucket_lock);
- return available;
}
static void bch_btree_gc(struct cache_set *c)
{
int ret;
- unsigned long available;
struct gc_stat stats;
struct closure writes;
struct btree_op op;
@@ -1743,14 +1749,14 @@ static void bch_btree_gc(struct cache_set *c)
pr_warn("gc failed!");
} while (ret);
- available = bch_btree_gc_finish(c);
+ bch_btree_gc_finish(c);
wake_up_allocators(c);
bch_time_stats_update(&c->btree_gc_time, start_time);
stats.key_bytes *= sizeof(uint64_t);
stats.data <<= 9;
- stats.in_use = (c->nbuckets - available) * 100 / c->nbuckets;
+ bch_update_bucket_in_use(c, &stats);
memcpy(&c->gc_stats, &stats, sizeof(struct gc_stat));
trace_bcache_gc_end(c);
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index 9b80417cd547..68067d7c7d9b 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -305,5 +305,5 @@ void bch_keybuf_del(struct keybuf *, struct keybuf_key *);
struct keybuf_key *bch_keybuf_next(struct keybuf *);
struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *, struct keybuf *,
struct bkey *, keybuf_pred_fn *);
-
+void bch_update_bucket_in_use(struct cache_set *c, struct gc_stat *stats);
#endif
diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c
index 864e673aec39..1841d0359bac 100644
--- a/drivers/md/bcache/closure.c
+++ b/drivers/md/bcache/closure.c
@@ -64,27 +64,16 @@ EXPORT_SYMBOL(closure_put);
void __closure_wake_up(struct closure_waitlist *wait_list)
{
struct llist_node *list;
- struct closure *cl;
+ struct closure *cl, *t;
struct llist_node *reverse = NULL;
list = llist_del_all(&wait_list->list);
/* We first reverse the list to preserve FIFO ordering and fairness */
-
- while (list) {
- struct llist_node *t = list;
- list = llist_next(list);
-
- t->next = reverse;
- reverse = t;
- }
+ reverse = llist_reverse_order(list);
/* Then do the wakeups */
-
- while (reverse) {
- cl = container_of(reverse, struct closure, list);
- reverse = llist_next(reverse);
-
+ llist_for_each_entry_safe(cl, t, reverse, list) {
closure_set_waiting(cl, 0);
closure_sub(cl, CLOSURE_WAITING + 1);
}
diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h
index 9b2fe2d3e3a9..3f6c45d25dc5 100644
--- a/drivers/md/bcache/closure.h
+++ b/drivers/md/bcache/closure.h
@@ -250,6 +250,12 @@ static inline void set_closure_fn(struct closure *cl, closure_fn *fn,
static inline void closure_queue(struct closure *cl)
{
struct workqueue_struct *wq = cl->wq;
+ /**
+ * Changes made to closure, work_struct, or a couple of other structs
+ * may cause work.func not pointing to the right location.
+ */
+ BUILD_BUG_ON(offsetof(struct closure, fn)
+ != offsetof(struct work_struct, func));
if (wq) {
INIT_WORK(&cl->work, cl->work.func);
BUG_ON(!queue_work(wq, &cl->work));
@@ -311,8 +317,6 @@ static inline void closure_wake_up(struct closure_waitlist *list)
* been dropped with closure_put()), it will resume execution at @fn running out
* of @wq (or, if @wq is NULL, @fn will be called by closure_put() directly).
*
- * NOTE: This macro expands to a return in the calling function!
- *
* This is because after calling continue_at() you no longer have a ref on @cl,
* and whatever @cl owns may be freed out from under you - a running closure fn
* has a ref on its own closure which continue_at() drops.
@@ -339,8 +343,6 @@ do { \
* Causes @fn to be executed out of @cl, in @wq context (or called directly if
* @wq is NULL).
*
- * NOTE: like continue_at(), this macro expands to a return in the caller!
- *
* The ref the caller of continue_at_nobarrier() had on @cl is now owned by @fn,
* thus it's not safe to touch anything protected by @cl after a
* continue_at_nobarrier().
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 06f55056aaae..35a5a7210e51 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -110,7 +110,7 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio)
struct bio_vec bv, cbv;
struct bvec_iter iter, citer = { 0 };
- check = bio_clone(bio, GFP_NOIO);
+ check = bio_clone_kmalloc(bio, GFP_NOIO);
if (!check)
return;
check->bi_opf = REQ_OP_READ;
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 7bf65f8089ac..630ac55188a4 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -169,6 +169,11 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
* find a sequence of buckets with valid journal entries
*/
for (i = 0; i < ca->sb.njournal_buckets; i++) {
+ /*
+ * We must try the index l with ZERO first for
+ * correctness due to the scenario that the journal
+ * bucket is circular buffer which might have wrapped
+ */
l = (i * 2654435769U) % ca->sb.njournal_buckets;
if (test_bit(l, bitmap))
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index cf9f83f0d655..4f8824758095 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -26,12 +26,12 @@ struct kmem_cache *bch_search_cache;
static void bch_data_insert_start(struct closure *);
-static unsigned cache_mode(struct cached_dev *dc, struct bio *bio)
+static unsigned cache_mode(struct cached_dev *dc)
{
return BDEV_CACHE_MODE(&dc->sb);
}
-static bool verify(struct cached_dev *dc, struct bio *bio)
+static bool verify(struct cached_dev *dc)
{
return dc->verify;
}
@@ -369,7 +369,7 @@ static struct hlist_head *iohash(struct cached_dev *dc, uint64_t k)
static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
{
struct cache_set *c = dc->disk.c;
- unsigned mode = cache_mode(dc, bio);
+ unsigned mode = cache_mode(dc);
unsigned sectors, congested = bch_get_congested(c);
struct task_struct *task = current;
struct io *i;
@@ -384,6 +384,14 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
op_is_write(bio_op(bio))))
goto skip;
+ /*
+ * Flag for bypass if the IO is for read-ahead or background,
+ * unless the read-ahead request is for metadata (eg, for gfs2).
+ */
+ if (bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND) &&
+ !(bio->bi_opf & REQ_META))
+ goto skip;
+
if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) ||
bio_sectors(bio) & (c->sb.block_size - 1)) {
pr_debug("skipping unaligned io");
@@ -400,12 +408,6 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
if (!congested && !dc->sequential_cutoff)
goto rescale;
- if (!congested &&
- mode == CACHE_MODE_WRITEBACK &&
- op_is_write(bio->bi_opf) &&
- op_is_sync(bio->bi_opf))
- goto rescale;
-
spin_lock(&dc->io_lock);
hlist_for_each_entry(i, iohash(dc, bio->bi_iter.bi_sector), hash)
@@ -753,7 +755,7 @@ static void cached_dev_read_done(struct closure *cl)
s->cache_miss = NULL;
}
- if (verify(dc, &s->bio.bio) && s->recoverable && !s->read_dirty_data)
+ if (verify(dc) && s->recoverable && !s->read_dirty_data)
bch_data_verify(dc, s->orig_bio);
bio_complete(s);
@@ -778,7 +780,7 @@ static void cached_dev_read_done_bh(struct closure *cl)
if (s->iop.error)
continue_at_nobarrier(cl, cached_dev_read_error, bcache_wq);
- else if (s->iop.bio || verify(dc, &s->bio.bio))
+ else if (s->iop.bio || verify(dc))
continue_at_nobarrier(cl, cached_dev_read_done, bcache_wq);
else
continue_at_nobarrier(cl, cached_dev_bio_complete, NULL);
@@ -907,7 +909,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
s->iop.bypass = true;
if (should_writeback(dc, s->orig_bio,
- cache_mode(dc, bio),
+ cache_mode(dc),
s->iop.bypass)) {
s->iop.bypass = false;
s->iop.writeback = true;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index d626dbd63b1c..ca89998d7b4d 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -53,15 +53,15 @@ LIST_HEAD(bch_cache_sets);
static LIST_HEAD(uncached_devices);
static int bcache_major;
-static DEFINE_IDA(bcache_minor);
+static DEFINE_IDA(bcache_device_idx);
static wait_queue_head_t unregister_wait;
struct workqueue_struct *bcache_wq;
-#define BTREE_MAX_PAGES (256 * 1024 / PAGE_SIZE)
-#define BCACHE_MINORS_BITS 4 /* bcache partition support */
-#define BCACHE_MINORS (1 << BCACHE_MINORS_BITS)
-#define BCACHE_TO_IDA_MINORS(first_minor) ((first_minor) >> BCACHE_MINORS_BITS)
-#define IDA_TO_BCACHE_MINORS(minor) ((minor) << BCACHE_MINORS_BITS)
+#define BTREE_MAX_PAGES (256 * 1024 / PAGE_SIZE)
+/* limitation of partitions number on single bcache device */
+#define BCACHE_MINORS 128
+/* limitation of bcache devices number on single system */
+#define BCACHE_DEVICE_IDX_MAX ((1U << MINORBITS)/BCACHE_MINORS)
/* Superblock */
@@ -724,6 +724,16 @@ static void bcache_device_attach(struct bcache_device *d, struct cache_set *c,
closure_get(&c->caching);
}
+static inline int first_minor_to_idx(int first_minor)
+{
+ return (first_minor/BCACHE_MINORS);
+}
+
+static inline int idx_to_first_minor(int idx)
+{
+ return (idx * BCACHE_MINORS);
+}
+
static void bcache_device_free(struct bcache_device *d)
{
lockdep_assert_held(&bch_register_lock);
@@ -737,7 +747,8 @@ static void bcache_device_free(struct bcache_device *d)
if (d->disk && d->disk->queue)
blk_cleanup_queue(d->disk->queue);
if (d->disk) {
- ida_simple_remove(&bcache_minor, BCACHE_TO_IDA_MINORS(d->disk->first_minor));
+ ida_simple_remove(&bcache_device_idx,
+ first_minor_to_idx(d->disk->first_minor));
put_disk(d->disk);
}
@@ -754,7 +765,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
{
struct request_queue *q;
size_t n;
- int minor;
+ int idx;
if (!d->stripe_size)
d->stripe_size = 1 << 31;
@@ -783,22 +794,22 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
if (!d->full_dirty_stripes)
return -ENOMEM;
- minor = ida_simple_get(&bcache_minor, 0, BCACHE_TO_IDA_MINORS(MINORMASK) + 1, GFP_KERNEL);
- if (minor < 0)
- return minor;
-
+ idx = ida_simple_get(&bcache_device_idx, 0,
+ BCACHE_DEVICE_IDX_MAX, GFP_KERNEL);
+ if (idx < 0)
+ return idx;
if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
!(d->disk = alloc_disk(BCACHE_MINORS))) {
- ida_simple_remove(&bcache_minor, minor);
+ ida_simple_remove(&bcache_device_idx, idx);
return -ENOMEM;
}
set_capacity(d->disk, sectors);
- snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", minor);
+ snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", idx);
d->disk->major = bcache_major;
- d->disk->first_minor = IDA_TO_BCACHE_MINORS(minor);
+ d->disk->first_minor = idx_to_first_minor(idx);
d->disk->fops = &bcache_ops;
d->disk->private_data = d;
@@ -1133,9 +1144,6 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
if (ret)
return ret;
- set_capacity(dc->disk.disk,
- dc->bdev->bd_part->nr_sects - dc->sb.data_offset);
-
dc->disk.disk->queue->backing_dev_info->ra_pages =
max(dc->disk.disk->queue->backing_dev_info->ra_pages,
q->backing_dev_info->ra_pages);
@@ -1381,9 +1389,6 @@ static void cache_set_flush(struct closure *cl)
struct btree *b;
unsigned i;
- if (!c)
- closure_return(cl);
-
bch_cache_accounting_destroy(&c->accounting);
kobject_put(&c->internal);
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 2aebfce7710b..275dbee5290f 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -80,8 +80,9 @@ rw_attribute(writeback_delay);
rw_attribute(writeback_rate);
rw_attribute(writeback_rate_update_seconds);
-rw_attribute(writeback_rate_d_term);
+rw_attribute(writeback_rate_i_term_inverse);
rw_attribute(writeback_rate_p_term_inverse);
+rw_attribute(writeback_rate_minimum);
read_attribute(writeback_rate_debug);
read_attribute(stripe_size);
@@ -129,15 +130,16 @@ SHOW(__bch_cached_dev)
sysfs_hprint(writeback_rate, dc->writeback_rate.rate << 9);
var_print(writeback_rate_update_seconds);
- var_print(writeback_rate_d_term);
+ var_print(writeback_rate_i_term_inverse);
var_print(writeback_rate_p_term_inverse);
+ var_print(writeback_rate_minimum);
if (attr == &sysfs_writeback_rate_debug) {
char rate[20];
char dirty[20];
char target[20];
char proportional[20];
- char derivative[20];
+ char integral[20];
char change[20];
s64 next_io;
@@ -145,7 +147,7 @@ SHOW(__bch_cached_dev)
bch_hprint(dirty, bcache_dev_sectors_dirty(&dc->disk) << 9);
bch_hprint(target, dc->writeback_rate_target << 9);
bch_hprint(proportional,dc->writeback_rate_proportional << 9);
- bch_hprint(derivative, dc->writeback_rate_derivative << 9);
+ bch_hprint(integral, dc->writeback_rate_integral_scaled << 9);
bch_hprint(change, dc->writeback_rate_change << 9);
next_io = div64_s64(dc->writeback_rate.next - local_clock(),
@@ -156,11 +158,11 @@ SHOW(__bch_cached_dev)
"dirty:\t\t%s\n"
"target:\t\t%s\n"
"proportional:\t%s\n"
- "derivative:\t%s\n"
+ "integral:\t%s\n"
"change:\t\t%s/sec\n"
"next io:\t%llims\n",
rate, dirty, target, proportional,
- derivative, change, next_io);
+ integral, change, next_io);
}
sysfs_hprint(dirty_data,
@@ -212,7 +214,7 @@ STORE(__cached_dev)
dc->writeback_rate.rate, 1, INT_MAX);
d_strtoul_nonzero(writeback_rate_update_seconds);
- d_strtoul(writeback_rate_d_term);
+ d_strtoul(writeback_rate_i_term_inverse);
d_strtoul_nonzero(writeback_rate_p_term_inverse);
d_strtoi_h(sequential_cutoff);
@@ -318,7 +320,7 @@ static struct attribute *bch_cached_dev_files[] = {
&sysfs_writeback_percent,
&sysfs_writeback_rate,
&sysfs_writeback_rate_update_seconds,
- &sysfs_writeback_rate_d_term,
+ &sysfs_writeback_rate_i_term_inverse,
&sysfs_writeback_rate_p_term_inverse,
&sysfs_writeback_rate_debug,
&sysfs_dirty_data,
@@ -615,6 +617,17 @@ STORE(__bch_cache_set)
}
if (attr == &sysfs_trigger_gc) {
+ /*
+ * Garbage collection thread only works when sectors_to_gc < 0,
+ * when users write to sysfs entry trigger_gc, most of time
+ * they want to forcibly triger gargage collection. Here -1 is
+ * set to c->sectors_to_gc, to make gc_should_run() give a
+ * chance to permit gc thread to run. "give a chance" means
+ * before going into gc_should_run(), there is still chance
+ * that c->sectors_to_gc being set to other positive value. So
+ * writing sysfs entry trigger_gc won't always make sure gc
+ * thread takes effect.
+ */
atomic_set(&c->sectors_to_gc, -1);
wake_up_gc(c);
}
@@ -733,6 +746,11 @@ static struct attribute *bch_cache_set_internal_files[] = {
};
KTYPE(bch_cache_set_internal);
+static int __bch_cache_cmp(const void *l, const void *r)
+{
+ return *((uint16_t *)r) - *((uint16_t *)l);
+}
+
SHOW(__bch_cache)
{
struct cache *ca = container_of(kobj, struct cache, kobj);
@@ -757,9 +775,6 @@ SHOW(__bch_cache)
CACHE_REPLACEMENT(&ca->sb));
if (attr == &sysfs_priority_stats) {
- int cmp(const void *l, const void *r)
- { return *((uint16_t *) r) - *((uint16_t *) l); }
-
struct bucket *b;
size_t n = ca->sb.nbuckets, i;
size_t unused = 0, available = 0, dirty = 0, meta = 0;
@@ -788,7 +803,7 @@ SHOW(__bch_cache)
p[i] = ca->buckets[i].prio;
mutex_unlock(&ca->set->bucket_lock);
- sort(p, n, sizeof(uint16_t), cmp, NULL);
+ sort(p, n, sizeof(uint16_t), __bch_cache_cmp, NULL);
while (n &&
!cached[n - 1])
diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c
index eb70f6894f05..95ad6ccf9873 100644
--- a/drivers/md/bcache/util.c
+++ b/drivers/md/bcache/util.c
@@ -231,8 +231,14 @@ uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done)
d->next += div_u64(done * NSEC_PER_SEC, d->rate);
- if (time_before64(now + NSEC_PER_SEC, d->next))
- d->next = now + NSEC_PER_SEC;
+ /* Bound the time. Don't let us fall further than 2 seconds behind
+ * (this prevents unnecessary backlog that would make it impossible
+ * to catch up). If we're ahead of the desired writeback rate,
+ * don't let us sleep more than 2.5 seconds (so we can notice/respond
+ * if the control system tells us to speed up!).
+ */
+ if (time_before64(now + NSEC_PER_SEC * 5LLU / 2LLU, d->next))
+ d->next = now + NSEC_PER_SEC * 5LLU / 2LLU;
if (time_after64(now - NSEC_PER_SEC * 2, d->next))
d->next = now - NSEC_PER_SEC * 2;
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index cf2cbc211d83..10de1d43c257 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -449,10 +449,10 @@ struct bch_ratelimit {
uint64_t next;
/*
- * Rate at which we want to do work, in units per nanosecond
+ * Rate at which we want to do work, in units per second
* The units here correspond to the units passed to bch_next_delay()
*/
- unsigned rate;
+ uint32_t rate;
};
static inline void bch_ratelimit_reset(struct bch_ratelimit *d)
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index d45425a7202f..ae554ce06cd8 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -24,48 +24,63 @@ static void __update_writeback_rate(struct cached_dev *dc)
bcache_flash_devs_sectors_dirty(c);
uint64_t cache_dirty_target =
div_u64(cache_sectors * dc->writeback_percent, 100);
-
int64_t target = div64_u64(cache_dirty_target * bdev_sectors(dc->bdev),
c->cached_dev_sectors);
- /* PD controller */
-
+ /*
+ * PI controller:
+ * Figures out the amount that should be written per second.
+ *
+ * First, the error (number of sectors that are dirty beyond our
+ * target) is calculated. The error is accumulated (numerically
+ * integrated).
+ *
+ * Then, the proportional value and integral value are scaled
+ * based on configured values. These are stored as inverses to
+ * avoid fixed point math and to make configuration easy-- e.g.
+ * the default value of 40 for writeback_rate_p_term_inverse
+ * attempts to write at a rate that would retire all the dirty
+ * blocks in 40 seconds.
+ *
+ * The writeback_rate_i_inverse value of 10000 means that 1/10000th
+ * of the error is accumulated in the integral term per second.
+ * This acts as a slow, long-term average that is not subject to
+ * variations in usage like the p term.
+ */
int64_t dirty = bcache_dev_sectors_dirty(&dc->disk);
- int64_t derivative = dirty - dc->disk.sectors_dirty_last;
- int64_t proportional = dirty - target;
- int64_t change;
-
- dc->disk.sectors_dirty_last = dirty;
-
- /* Scale to sectors per second */
-
- proportional *= dc->writeback_rate_update_seconds;
- proportional = div_s64(proportional, dc->writeback_rate_p_term_inverse);
-
- derivative = div_s64(derivative, dc->writeback_rate_update_seconds);
-
- derivative = ewma_add(dc->disk.sectors_dirty_derivative, derivative,
- (dc->writeback_rate_d_term /
- dc->writeback_rate_update_seconds) ?: 1, 0);
-
- derivative *= dc->writeback_rate_d_term;
- derivative = div_s64(derivative, dc->writeback_rate_p_term_inverse);
-
- change = proportional + derivative;
+ int64_t error = dirty - target;
+ int64_t proportional_scaled =
+ div_s64(error, dc->writeback_rate_p_term_inverse);
+ int64_t integral_scaled;
+ uint32_t new_rate;
+
+ if ((error < 0 && dc->writeback_rate_integral > 0) ||
+ (error > 0 && time_before64(local_clock(),
+ dc->writeback_rate.next + NSEC_PER_MSEC))) {
+ /*
+ * Only decrease the integral term if it's more than
+ * zero. Only increase the integral term if the device
+ * is keeping up. (Don't wind up the integral
+ * ineffectively in either case).
+ *
+ * It's necessary to scale this by
+ * writeback_rate_update_seconds to keep the integral
+ * term dimensioned properly.
+ */
+ dc->writeback_rate_integral += error *
+ dc->writeback_rate_update_seconds;
+ }
- /* Don't increase writeback rate if the device isn't keeping up */
- if (change > 0 &&
- time_after64(local_clock(),
- dc->writeback_rate.next + NSEC_PER_MSEC))
- change = 0;
+ integral_scaled = div_s64(dc->writeback_rate_integral,
+ dc->writeback_rate_i_term_inverse);
- dc->writeback_rate.rate =
- clamp_t(int64_t, (int64_t) dc->writeback_rate.rate + change,
- 1, NSEC_PER_MSEC);
+ new_rate = clamp_t(int32_t, (proportional_scaled + integral_scaled),
+ dc->writeback_rate_minimum, NSEC_PER_SEC);
- dc->writeback_rate_proportional = proportional;
- dc->writeback_rate_derivative = derivative;
- dc->writeback_rate_change = change;
+ dc->writeback_rate_proportional = proportional_scaled;
+ dc->writeback_rate_integral_scaled = integral_scaled;
+ dc->writeback_rate_change = new_rate - dc->writeback_rate.rate;
+ dc->writeback_rate.rate = new_rate;
dc->writeback_rate_target = target;
}
@@ -178,13 +193,21 @@ static void write_dirty(struct closure *cl)
struct dirty_io *io = container_of(cl, struct dirty_io, cl);
struct keybuf_key *w = io->bio.bi_private;
- dirty_init(w);
- bio_set_op_attrs(&io->bio, REQ_OP_WRITE, 0);
- io->bio.bi_iter.bi_sector = KEY_START(&w->key);
- io->bio.bi_bdev = io->dc->bdev;
- io->bio.bi_end_io = dirty_endio;
+ /*
+ * IO errors are signalled using the dirty bit on the key.
+ * If we failed to read, we should not attempt to write to the
+ * backing device. Instead, immediately go to write_dirty_finish
+ * to clean up.
+ */
+ if (KEY_DIRTY(&w->key)) {
+ dirty_init(w);
+ bio_set_op_attrs(&io->bio, REQ_OP_WRITE, 0);
+ io->bio.bi_iter.bi_sector = KEY_START(&w->key);
+ io->bio.bi_bdev = io->dc->bdev;
+ io->bio.bi_end_io = dirty_endio;
- closure_bio_submit(&io->bio, cl);
+ closure_bio_submit(&io->bio, cl);
+ }
continue_at(cl, write_dirty_finish, io->dc->writeback_write_wq);
}
@@ -417,6 +440,8 @@ static int bch_writeback_thread(void *arg)
struct cached_dev *dc = arg;
bool searched_full_index;
+ bch_ratelimit_reset(&dc->writeback_rate);
+
while (!kthread_should_stop()) {
klp_kgraft_mark_task_safe(current);
down_write(&dc->writeback_lock);
@@ -445,7 +470,6 @@ static int bch_writeback_thread(void *arg)
up_write(&dc->writeback_lock);
- bch_ratelimit_reset(&dc->writeback_rate);
read_dirty(dc);
if (searched_full_index) {
@@ -457,6 +481,7 @@ static int bch_writeback_thread(void *arg)
klp_kgraft_mark_task_safe(current);
delay = schedule_timeout_interruptible(delay);
}
+ bch_ratelimit_reset(&dc->writeback_rate);
}
}
@@ -494,8 +519,6 @@ void bch_sectors_dirty_init(struct bcache_device *d)
bch_btree_map_keys(&op.op, d->c, &KEY(op.inode, 0, 0),
sectors_dirty_init_fn, 0);
-
- d->sectors_dirty_last = bcache_dev_sectors_dirty(d);
}
void bch_cached_dev_writeback_init(struct cached_dev *dc)
@@ -509,10 +532,11 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
dc->writeback_percent = 10;
dc->writeback_delay = 30;
dc->writeback_rate.rate = 1024;
+ dc->writeback_rate_minimum = 8;
dc->writeback_rate_update_seconds = 5;
- dc->writeback_rate_d_term = 30;
- dc->writeback_rate_p_term_inverse = 6000;
+ dc->writeback_rate_p_term_inverse = 40;
+ dc->writeback_rate_i_term_inverse = 10000;
INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
}
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index e35421d20d2e..34bcf49d737b 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -76,7 +76,9 @@ static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
if (would_skip)
return false;
- return op_is_sync(bio->bi_opf) || in_use <= CUTOFF_WRITEBACK;
+ return (op_is_sync(bio->bi_opf) ||
+ bio->bi_opf & (REQ_META|REQ_PRIO) ||
+ in_use <= CUTOFF_WRITEBACK);
}
static inline void bch_writeback_queue(struct cached_dev *dc)
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 4477bf930cf4..e976f4f39334 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -81,10 +81,14 @@
#define SECTOR_TO_BLOCK_SHIFT 3
/*
+ * For btree insert:
* 3 for btree insert +
* 2 for btree lookup used within space map
+ * For btree remove:
+ * 2 for shadow spine +
+ * 4 for rebalance 3 child node
*/
-#define THIN_MAX_CONCURRENT_LOCKS 5
+#define THIN_MAX_CONCURRENT_LOCKS 6
/* This should be plenty */
#define SPACE_MAP_ROOT_SIZE 128
diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c
index 7a75b5010f73..e4ececd3df00 100644
--- a/drivers/md/persistent-data/dm-btree.c
+++ b/drivers/md/persistent-data/dm-btree.c
@@ -678,23 +678,8 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key)
pn->keys[1] = rn->keys[0];
memcpy_disk(value_ptr(pn, 1), &val, sizeof(__le64));
- /*
- * rejig the spine. This is ugly, since it knows too
- * much about the spine
- */
- if (s->nodes[0] != new_parent) {
- unlock_block(s->info, s->nodes[0]);
- s->nodes[0] = new_parent;
- }
- if (key < le64_to_cpu(rn->keys[0])) {
- unlock_block(s->info, right);
- s->nodes[1] = left;
- } else {
- unlock_block(s->info, left);
- s->nodes[1] = right;
- }
- s->count = 2;
-
+ unlock_block(s->info, left);
+ unlock_block(s->info, right);
return 0;
}
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
index ce44a033f63b..64cc86a82b2d 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
@@ -184,7 +184,7 @@ static int pcan_usb_fd_send_cmd(struct peak_usb_device *dev, void *cmd_tail)
void *cmd_head = pcan_usb_fd_cmd_buffer(dev);
int err = 0;
u8 *packet_ptr;
- int i, n = 1, packet_len;
+ int packet_len;
ptrdiff_t cmd_len;
/* usb device unregistered? */
@@ -201,17 +201,13 @@ static int pcan_usb_fd_send_cmd(struct peak_usb_device *dev, void *cmd_tail)
}
packet_ptr = cmd_head;
+ packet_len = cmd_len;
/* firmware is not able to re-assemble 512 bytes buffer in full-speed */
- if ((dev->udev->speed != USB_SPEED_HIGH) &&
- (cmd_len > PCAN_UFD_LOSPD_PKT_SIZE)) {
- packet_len = PCAN_UFD_LOSPD_PKT_SIZE;
- n += cmd_len / packet_len;
- } else {
- packet_len = cmd_len;
- }
+ if (unlikely(dev->udev->speed != USB_SPEED_HIGH))
+ packet_len = min(packet_len, PCAN_UFD_LOSPD_PKT_SIZE);
- for (i = 0; i < n; i++) {
+ do {
err = usb_bulk_msg(dev->udev,
usb_sndbulkpipe(dev->udev,
PCAN_USBPRO_EP_CMDOUT),
@@ -224,7 +220,12 @@ static int pcan_usb_fd_send_cmd(struct peak_usb_device *dev, void *cmd_tail)
}
packet_ptr += packet_len;
- }
+ cmd_len -= packet_len;
+
+ if (cmd_len < PCAN_UFD_LOSPD_PKT_SIZE)
+ packet_len = cmd_len;
+
+ } while (packet_len > 0);
return err;
}
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 1e8f7599bbfc..136a1584b026 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -4591,6 +4591,15 @@ int be_update_queues(struct be_adapter *adapter)
be_schedule_worker(adapter);
+ /*
+ * The IF was destroyed and re-created. We need to clear
+ * all promiscuous flags valid for the destroyed IF.
+ * Without this promisc mode is not restored during
+ * be_open() because the driver thinks that it is
+ * already enabled in HW.
+ */
+ adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
+
if (netif_running(netdev))
status = be_open(netdev);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 43493312323f..7a5800ad3a7f 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -411,6 +411,10 @@ static int reset_rx_pools(struct ibmvnic_adapter *adapter)
struct ibmvnic_rx_pool *rx_pool;
int rx_scrqs;
int i, j, rc;
+ u64 *size_array;
+
+ size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
+ be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size));
rx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs);
for (i = 0; i < rx_scrqs; i++) {
@@ -418,7 +422,17 @@ static int reset_rx_pools(struct ibmvnic_adapter *adapter)
netdev_dbg(adapter->netdev, "Re-setting rx_pool[%d]\n", i);
- rc = reset_long_term_buff(adapter, &rx_pool->long_term_buff);
+ if (rx_pool->buff_size != be64_to_cpu(size_array[i])) {
+ free_long_term_buff(adapter, &rx_pool->long_term_buff);
+ rx_pool->buff_size = be64_to_cpu(size_array[i]);
+ alloc_long_term_buff(adapter, &rx_pool->long_term_buff,
+ rx_pool->size *
+ rx_pool->buff_size);
+ } else {
+ rc = reset_long_term_buff(adapter,
+ &rx_pool->long_term_buff);
+ }
+
if (rc)
return rc;
@@ -440,14 +454,12 @@ static int reset_rx_pools(struct ibmvnic_adapter *adapter)
static void release_rx_pools(struct ibmvnic_adapter *adapter)
{
struct ibmvnic_rx_pool *rx_pool;
- int rx_scrqs;
int i, j;
if (!adapter->rx_pool)
return;
- rx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs);
- for (i = 0; i < rx_scrqs; i++) {
+ for (i = 0; i < adapter->num_active_rx_pools; i++) {
rx_pool = &adapter->rx_pool[i];
netdev_dbg(adapter->netdev, "Releasing rx_pool[%d]\n", i);
@@ -470,6 +482,7 @@ static void release_rx_pools(struct ibmvnic_adapter *adapter)
kfree(adapter->rx_pool);
adapter->rx_pool = NULL;
+ adapter->num_active_rx_pools = 0;
}
static int init_rx_pools(struct net_device *netdev)
@@ -494,6 +507,8 @@ static int init_rx_pools(struct net_device *netdev)
return -1;
}
+ adapter->num_active_rx_pools = 0;
+
for (i = 0; i < rxadd_subcrqs; i++) {
rx_pool = &adapter->rx_pool[i];
@@ -537,6 +552,8 @@ static int init_rx_pools(struct net_device *netdev)
rx_pool->next_free = 0;
}
+ adapter->num_active_rx_pools = rxadd_subcrqs;
+
return 0;
}
@@ -587,13 +604,12 @@ static void release_vpd_data(struct ibmvnic_adapter *adapter)
static void release_tx_pools(struct ibmvnic_adapter *adapter)
{
struct ibmvnic_tx_pool *tx_pool;
- int i, tx_scrqs;
+ int i;
if (!adapter->tx_pool)
return;
- tx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
- for (i = 0; i < tx_scrqs; i++) {
+ for (i = 0; i < adapter->num_active_tx_pools; i++) {
netdev_dbg(adapter->netdev, "Releasing tx_pool[%d]\n", i);
tx_pool = &adapter->tx_pool[i];
kfree(tx_pool->tx_buff);
@@ -604,6 +620,7 @@ static void release_tx_pools(struct ibmvnic_adapter *adapter)
kfree(adapter->tx_pool);
adapter->tx_pool = NULL;
+ adapter->num_active_tx_pools = 0;
}
static int init_tx_pools(struct net_device *netdev)
@@ -620,6 +637,8 @@ static int init_tx_pools(struct net_device *netdev)
if (!adapter->tx_pool)
return -1;
+ adapter->num_active_tx_pools = 0;
+
for (i = 0; i < tx_subcrqs; i++) {
tx_pool = &adapter->tx_pool[i];
@@ -667,6 +686,8 @@ static int init_tx_pools(struct net_device *netdev)
tx_pool->producer_index = 0;
}
+ adapter->num_active_tx_pools = tx_subcrqs;
+
return 0;
}
@@ -861,7 +882,7 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter)
if (adapter->vpd->buff)
len = adapter->vpd->len;
- reinit_completion(&adapter->fw_done);
+ init_completion(&adapter->fw_done);
crq.get_vpd_size.first = IBMVNIC_CRQ_CMD;
crq.get_vpd_size.cmd = GET_VPD_SIZE;
ibmvnic_send_crq(adapter, &crq);
@@ -923,6 +944,13 @@ static int init_resources(struct ibmvnic_adapter *adapter)
if (!adapter->vpd)
return -ENOMEM;
+ /* Vital Product Data (VPD) */
+ rc = ibmvnic_get_vpd(adapter);
+ if (rc) {
+ netdev_err(netdev, "failed to initialize Vital Product Data (VPD)\n");
+ return rc;
+ }
+
adapter->map_id = 1;
adapter->napi = kcalloc(adapter->req_rx_queues,
sizeof(struct napi_struct), GFP_KERNEL);
@@ -996,7 +1024,7 @@ static int __ibmvnic_open(struct net_device *netdev)
static int ibmvnic_open(struct net_device *netdev)
{
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
- int rc, vpd;
+ int rc;
mutex_lock(&adapter->reset_lock);
@@ -1019,11 +1047,6 @@ static int ibmvnic_open(struct net_device *netdev)
rc = __ibmvnic_open(netdev);
netif_carrier_on(netdev);
- /* Vital Product Data (VPD) */
- vpd = ibmvnic_get_vpd(adapter);
- if (vpd)
- netdev_err(netdev, "failed to initialize Vital Product Data (VPD)\n");
-
mutex_unlock(&adapter->reset_lock);
return rc;
@@ -1280,6 +1303,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
unsigned char *dst;
u64 *handle_array;
int index = 0;
+ u8 proto = 0;
int ret = 0;
if (adapter->resetting) {
@@ -1368,17 +1392,18 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
}
if (skb->protocol == htons(ETH_P_IP)) {
- if (ip_hdr(skb)->version == 4)
- tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_IPV4;
- else if (ip_hdr(skb)->version == 6)
- tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_IPV6;
-
- if (ip_hdr(skb)->protocol == IPPROTO_TCP)
- tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_TCP;
- else if (ip_hdr(skb)->protocol != IPPROTO_TCP)
- tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_UDP;
+ tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_IPV4;
+ proto = ip_hdr(skb)->protocol;
+ } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_IPV6;
+ proto = ipv6_hdr(skb)->nexthdr;
}
+ if (proto == IPPROTO_TCP)
+ tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_TCP;
+ else if (proto == IPPROTO_UDP)
+ tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_UDP;
+
if (skb->ip_summed == CHECKSUM_PARTIAL) {
tx_crq.v1.flags1 |= IBMVNIC_TX_CHKSUM_OFFLOAD;
hdrs += 2;
@@ -1523,15 +1548,19 @@ static int __ibmvnic_set_mac(struct net_device *netdev, struct sockaddr *p)
crq.change_mac_addr.first = IBMVNIC_CRQ_CMD;
crq.change_mac_addr.cmd = CHANGE_MAC_ADDR;
ether_addr_copy(&crq.change_mac_addr.mac_addr[0], addr->sa_data);
+
+ init_completion(&adapter->fw_done);
ibmvnic_send_crq(adapter, &crq);
+ wait_for_completion(&adapter->fw_done);
/* netdev->dev_addr is changed in handle_change_mac_rsp function */
- return 0;
+ return adapter->fw_done_rc ? -EIO : 0;
}
static int ibmvnic_set_mac(struct net_device *netdev, void *p)
{
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
struct sockaddr *addr = p;
+ int rc;
if (adapter->state == VNIC_PROBED) {
memcpy(&adapter->desired.mac, addr, sizeof(struct sockaddr));
@@ -1539,9 +1568,9 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p)
return 0;
}
- __ibmvnic_set_mac(netdev, addr);
+ rc = __ibmvnic_set_mac(netdev, addr);
- return 0;
+ return rc;
}
/**
@@ -1551,6 +1580,7 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p)
static int do_reset(struct ibmvnic_adapter *adapter,
struct ibmvnic_rwi *rwi, u32 reset_state)
{
+ u64 old_num_rx_queues, old_num_tx_queues;
struct net_device *netdev = adapter->netdev;
int i, rc;
@@ -1560,6 +1590,9 @@ static int do_reset(struct ibmvnic_adapter *adapter,
netif_carrier_off(netdev);
adapter->reset_reason = rwi->reset_reason;
+ old_num_rx_queues = adapter->req_rx_queues;
+ old_num_tx_queues = adapter->req_tx_queues;
+
if (rwi->reset_reason == VNIC_RESET_MOBILITY) {
rc = ibmvnic_reenable_crq_queue(adapter);
if (rc)
@@ -1604,6 +1637,12 @@ static int do_reset(struct ibmvnic_adapter *adapter,
rc = init_resources(adapter);
if (rc)
return rc;
+ } else if (adapter->req_rx_queues != old_num_rx_queues ||
+ adapter->req_tx_queues != old_num_tx_queues) {
+ release_rx_pools(adapter);
+ release_tx_pools(adapter);
+ init_rx_pools(netdev);
+ init_tx_pools(netdev);
} else {
rc = reset_tx_pools(adapter);
if (rc)
@@ -3266,7 +3305,11 @@ static void handle_vpd_rsp(union ibmvnic_crq *crq,
*/
substr = strnstr(adapter->vpd->buff, "RM", adapter->vpd->len);
if (!substr) {
- dev_info(dev, "No FW level provided by VPD\n");
+ dev_info(dev, "Warning - No FW level has been provided in the VPD buffer by the VIOS Server\n");
+ ptr = strncpy((char *)adapter->fw_version, "N/A",
+ 3 * sizeof(char));
+ if (!ptr)
+ dev_err(dev, "Failed to inform that firmware version is unavailable to the adapter\n");
goto complete;
}
@@ -3357,7 +3400,11 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter)
return;
}
+ adapter->ip_offload_ctrl.len =
+ cpu_to_be32(sizeof(adapter->ip_offload_ctrl));
adapter->ip_offload_ctrl.version = cpu_to_be32(INITIAL_VERSION_IOB);
+ adapter->ip_offload_ctrl.ipv4_chksum = buf->ipv4_chksum;
+ adapter->ip_offload_ctrl.ipv6_chksum = buf->ipv6_chksum;
adapter->ip_offload_ctrl.tcp_ipv4_chksum = buf->tcp_ipv4_chksum;
adapter->ip_offload_ctrl.udp_ipv4_chksum = buf->udp_ipv4_chksum;
adapter->ip_offload_ctrl.tcp_ipv6_chksum = buf->tcp_ipv6_chksum;
@@ -3530,8 +3577,8 @@ static void handle_error_indication(union ibmvnic_crq *crq,
ibmvnic_reset(adapter, VNIC_RESET_NON_FATAL);
}
-static void handle_change_mac_rsp(union ibmvnic_crq *crq,
- struct ibmvnic_adapter *adapter)
+static int handle_change_mac_rsp(union ibmvnic_crq *crq,
+ struct ibmvnic_adapter *adapter)
{
struct net_device *netdev = adapter->netdev;
struct device *dev = &adapter->vdev->dev;
@@ -3540,10 +3587,13 @@ static void handle_change_mac_rsp(union ibmvnic_crq *crq,
rc = crq->change_mac_addr_rsp.rc.code;
if (rc) {
dev_err(dev, "Error %ld in CHANGE_MAC_ADDR_RSP\n", rc);
- return;
+ goto out;
}
memcpy(netdev->dev_addr, &crq->change_mac_addr_rsp.mac_addr[0],
ETH_ALEN);
+out:
+ complete(&adapter->fw_done);
+ return rc;
}
static void handle_request_cap_rsp(union ibmvnic_crq *crq,
@@ -3597,7 +3647,17 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq,
*req_value,
(long int)be64_to_cpu(crq->request_capability_rsp.
number), name);
- *req_value = be64_to_cpu(crq->request_capability_rsp.number);
+
+ if (be16_to_cpu(crq->request_capability_rsp.capability) ==
+ REQ_MTU) {
+ pr_err("mtu of %llu is not supported. Reverting.\n",
+ *req_value);
+ *req_value = adapter->fallback.mtu;
+ } else {
+ *req_value =
+ be64_to_cpu(crq->request_capability_rsp.number);
+ }
+
ibmvnic_send_req_caps(adapter, 1);
return;
default:
@@ -3993,7 +4053,7 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
break;
case CHANGE_MAC_ADDR_RSP:
netdev_dbg(netdev, "Got MAC address change Response\n");
- handle_change_mac_rsp(crq, adapter);
+ adapter->fw_done_rc = handle_change_mac_rsp(crq, adapter);
break;
case ERROR_INDICATION:
netdev_dbg(netdev, "Got Error Indication\n");
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 2df79fdd800b..fe21a6e2ddae 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -1091,6 +1091,8 @@ struct ibmvnic_adapter {
u64 opt_rxba_entries_per_subcrq;
__be64 tx_rx_desc_req;
u8 map_id;
+ u64 num_active_rx_pools;
+ u64 num_active_tx_pools;
struct tasklet_struct tasklet;
enum vnic_state state;
diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h
index afb7ebe20b24..0641c0098738 100644
--- a/drivers/net/ethernet/intel/e1000e/defines.h
+++ b/drivers/net/ethernet/intel/e1000e/defines.h
@@ -398,7 +398,6 @@
#define E1000_ICR_LSC 0x00000004 /* Link Status Change */
#define E1000_ICR_RXSEQ 0x00000008 /* Rx sequence error */
#define E1000_ICR_RXDMT0 0x00000010 /* Rx desc min. threshold (0) */
-#define E1000_ICR_RXO 0x00000040 /* Receiver Overrun */
#define E1000_ICR_RXT0 0x00000080 /* Rx timer intr (ring 0) */
#define E1000_ICR_ECCER 0x00400000 /* Uncorrectable ECC Error */
/* If this bit asserted, the driver should claim the interrupt */
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index c697910e65d5..5852f4accc55 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1905,30 +1905,14 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data)
struct net_device *netdev = data;
struct e1000_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = &adapter->hw;
- u32 icr;
- bool enable = true;
-
- icr = er32(ICR);
- if (icr & E1000_ICR_RXO) {
- ew32(ICR, E1000_ICR_RXO);
- enable = false;
- /* napi poll will re-enable Other, make sure it runs */
- if (napi_schedule_prep(&adapter->napi)) {
- adapter->total_rx_bytes = 0;
- adapter->total_rx_packets = 0;
- __napi_schedule(&adapter->napi);
- }
- }
- if (icr & E1000_ICR_LSC) {
- ew32(ICR, E1000_ICR_LSC);
- hw->mac.get_link_status = true;
- /* guard against interrupt when we're going down */
- if (!test_bit(__E1000_DOWN, &adapter->state))
- mod_timer(&adapter->watchdog_timer, jiffies + 1);
- }
- if (enable && !test_bit(__E1000_DOWN, &adapter->state))
+ hw->mac.get_link_status = true;
+
+ /* guard against interrupt when we're going down */
+ if (!test_bit(__E1000_DOWN, &adapter->state)) {
+ mod_timer(&adapter->watchdog_timer, jiffies + 1);
ew32(IMS, E1000_IMS_OTHER);
+ }
return IRQ_HANDLED;
}
@@ -2699,8 +2683,7 @@ static int e1000e_poll(struct napi_struct *napi, int weight)
napi_complete_done(napi, work_done);
if (!test_bit(__E1000_DOWN, &adapter->state)) {
if (adapter->msix_entries)
- ew32(IMS, adapter->rx_ring->ims_val |
- E1000_IMS_OTHER);
+ ew32(IMS, adapter->rx_ring->ims_val);
else
e1000_irq_enable(adapter);
}
@@ -4195,7 +4178,7 @@ static void e1000e_trigger_lsc(struct e1000_adapter *adapter)
struct e1000_hw *hw = &adapter->hw;
if (adapter->msix_entries)
- ew32(ICS, E1000_ICS_LSC | E1000_ICS_OTHER);
+ ew32(ICS, E1000_ICS_OTHER);
else
ew32(ICS, E1000_ICS_LSC);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c
index 23ccec4cb7f5..a1f3556307c7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c
@@ -197,9 +197,15 @@ static int mlx5e_am_stats_compare(struct mlx5e_rx_am_stats *curr,
return (curr->bpms > prev->bpms) ? MLX5E_AM_STATS_BETTER :
MLX5E_AM_STATS_WORSE;
+ if (!prev->ppms)
+ return curr->ppms ? MLX5E_AM_STATS_BETTER :
+ MLX5E_AM_STATS_SAME;
+
if (IS_SIGNIFICANT_DIFF(curr->ppms, prev->ppms))
return (curr->ppms > prev->ppms) ? MLX5E_AM_STATS_BETTER :
MLX5E_AM_STATS_WORSE;
+ if (!prev->epms)
+ return MLX5E_AM_STATS_SAME;
if (IS_SIGNIFICANT_DIFF(curr->epms, prev->epms))
return (curr->epms < prev->epms) ? MLX5E_AM_STATS_BETTER :
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 2ad4b78c963d..9959bbabb341 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -2206,19 +2206,14 @@ static bool rtl8169_do_counters(struct net_device *dev, u32 counter_cmd)
void __iomem *ioaddr = tp->mmio_addr;
dma_addr_t paddr = tp->counters_phys_addr;
u32 cmd;
- bool ret;
RTL_W32(CounterAddrHigh, (u64)paddr >> 32);
+ RTL_R32(CounterAddrHigh);
cmd = (u64)paddr & DMA_BIT_MASK(32);
RTL_W32(CounterAddrLow, cmd);
RTL_W32(CounterAddrLow, cmd | counter_cmd);
- ret = rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000);
-
- RTL_W32(CounterAddrLow, 0);
- RTL_W32(CounterAddrHigh, 0);
-
- return ret;
+ return rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000);
}
static bool rtl8169_reset_counters(struct net_device *dev)
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index af827faec7fe..f5ba8a75f882 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -275,13 +275,13 @@ static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff **pskb,
goto out;
*pskb = skb;
- skb->dev = dev;
- skb->pkt_type = PACKET_HOST;
if (local) {
+ skb->pkt_type = PACKET_HOST;
if (dev_forward_skb(ipvlan->dev, skb) == NET_RX_SUCCESS)
success = true;
} else {
+ skb->dev = dev;
ret = RX_HANDLER_ANOTHER;
success = true;
}
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index e05a4e437e69..e00237b8f355 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -860,6 +860,7 @@ static int pppoe_sendmsg(struct socket *sock, struct msghdr *m,
struct pppoe_hdr *ph;
struct net_device *dev;
char *start;
+ int hlen;
lock_sock(sk);
if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) {
@@ -878,16 +879,16 @@ static int pppoe_sendmsg(struct socket *sock, struct msghdr *m,
if (total_len > (dev->mtu + dev->hard_header_len))
goto end;
-
- skb = sock_wmalloc(sk, total_len + dev->hard_header_len + 32,
- 0, GFP_KERNEL);
+ hlen = LL_RESERVED_SPACE(dev);
+ skb = sock_wmalloc(sk, hlen + sizeof(*ph) + total_len +
+ dev->needed_tailroom, 0, GFP_KERNEL);
if (!skb) {
error = -ENOMEM;
goto end;
}
/* Reserve space for headers. */
- skb_reserve(skb, dev->hard_header_len);
+ skb_reserve(skb, hlen);
skb_reset_network_header(skb);
skb->dev = dev;
@@ -948,7 +949,7 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb)
/* Copy the data if there is no space for the header or if it's
* read-only.
*/
- if (skb_cow_head(skb, sizeof(*ph) + dev->hard_header_len))
+ if (skb_cow_head(skb, LL_RESERVED_SPACE(dev) + sizeof(*ph)))
goto abort;
__skb_push(skb, sizeof(*ph));
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 198aa0a72b0c..e928f1b621bf 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -1845,6 +1845,7 @@ static int lan78xx_reset(struct lan78xx_net *dev)
buf = DEFAULT_BURST_CAP_SIZE / FS_USB_PKT_SIZE;
dev->rx_urb_size = DEFAULT_BURST_CAP_SIZE;
dev->rx_qlen = 4;
+ dev->tx_qlen = 4;
}
ret = lan78xx_write_reg(dev, BURST_CAP, buf);
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index edecb676b7f4..cac328c56dbe 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -1586,7 +1586,6 @@ static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq,
rq->rx_ring[i].basePA);
rq->rx_ring[i].base = NULL;
}
- rq->buf_info[i] = NULL;
}
if (rq->comp_ring.base) {
@@ -1601,6 +1600,7 @@ static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq,
(rq->rx_ring[0].size + rq->rx_ring[1].size);
dma_free_coherent(&adapter->pdev->dev, sz, rq->buf_info[0],
rq->buf_info_pa);
+ rq->buf_info[0] = rq->buf_info[1] = NULL;
}
}
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index ccb688d8eb84..dd10ad5b073c 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2124,6 +2124,9 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
if (ctrl->ctrl.state != NVME_CTRL_LIVE)
return;
+ if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
+ return;
+
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: transport association error detected: %s\n",
ctrl->cnum, errmsg);
diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c
index aebb5c0d6ca3..f341a42f91c5 100644
--- a/drivers/phy/phy-core.c
+++ b/drivers/phy/phy-core.c
@@ -380,6 +380,10 @@ static struct phy *_of_phy_get(struct device_node *np, int index)
if (ret)
return ERR_PTR(-ENODEV);
+ /* This phy type handled by the usb-phy subsystem for now */
+ if (of_device_is_compatible(args.np, "usb-nop-xceiv"))
+ return ERR_PTR(-ENODEV);
+
mutex_lock(&phy_provider_mutex);
phy_provider = of_phy_provider_lookup(args.np);
if (IS_ERR(phy_provider) || !try_module_get(phy_provider->owner)) {
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 4c8561bfd462..73e414127f90 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -712,6 +712,8 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
goto cleanup1;
}
+ hpet_rtc_timer_init();
+
if (is_valid_irq(rtc_irq)) {
irq_handler_t rtc_cmos_int_handler;
@@ -719,6 +721,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
rtc_cmos_int_handler = hpet_rtc_interrupt;
retval = hpet_register_irq_handler(cmos_interrupt);
if (retval) {
+ hpet_mask_rtc_irq_bit(RTC_IRQMASK);
dev_warn(dev, "hpet_register_irq_handler "
" failed in rtc_init().");
goto cleanup1;
@@ -734,7 +737,6 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
goto cleanup1;
}
}
- hpet_rtc_timer_init();
/* export at least the first block of NVRAM */
nvram.size = address_space - NVRAM_OFFSET;
diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c
index fcc05a1517c2..668ae95e2f99 100644
--- a/drivers/scsi/lpfc/lpfc_mem.c
+++ b/drivers/scsi/lpfc/lpfc_mem.c
@@ -753,12 +753,12 @@ lpfc_rq_buf_free(struct lpfc_hba *phba, struct lpfc_dmabuf *mp)
drqe.address_hi = putPaddrHigh(rqb_entry->dbuf.phys);
rc = lpfc_sli4_rq_put(rqb_entry->hrq, rqb_entry->drq, &hrqe, &drqe);
if (rc < 0) {
- (rqbp->rqb_free_buffer)(phba, rqb_entry);
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"6409 Cannot post to RQ %d: %x %x\n",
rqb_entry->hrq->queue_id,
rqb_entry->hrq->host_index,
rqb_entry->hrq->hba_index);
+ (rqbp->rqb_free_buffer)(phba, rqb_entry);
} else {
list_add_tail(&rqb_entry->hbuf.list, &rqbp->rqb_buffer_list);
rqbp->buffer_count++;
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 3841e0c759d0..5b8c4b383a7d 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -149,7 +149,6 @@ typedef struct sg_fd { /* holds the state of a file descriptor */
struct list_head rq_list; /* head of request list */
struct fasync_struct *async_qp; /* used by asynchronous notification */
Sg_request req_arr[SG_MAX_QUEUE]; /* used as singly-linked list */
- char low_dma; /* as in parent but possibly overridden to 1 */
char force_packid; /* 1 -> pack_id input to read(), 0 -> ignored */
char cmd_q; /* 1 -> allow command queuing, 0 -> don't */
unsigned char next_cmd_len; /* 0: automatic, >0: use on next write() */
@@ -929,24 +928,14 @@ sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg)
/* strange ..., for backward compatibility */
return sfp->timeout_user;
case SG_SET_FORCE_LOW_DMA:
- result = get_user(val, ip);
- if (result)
- return result;
- if (val) {
- sfp->low_dma = 1;
- if ((0 == sfp->low_dma) && !sfp->res_in_use) {
- val = (int) sfp->reserve.bufflen;
- sg_remove_scat(sfp, &sfp->reserve);
- sg_build_reserve(sfp, val);
- }
- } else {
- if (atomic_read(&sdp->detaching))
- return -ENODEV;
- sfp->low_dma = sdp->device->host->unchecked_isa_dma;
- }
+ /*
+ * N.B. This ioctl never worked properly, but failed to
+ * return an error value. So returning '0' to keep compability
+ * with legacy applications.
+ */
return 0;
case SG_GET_LOW_DMA:
- return put_user((int) sfp->low_dma, ip);
+ return put_user((int) sdp->device->host->unchecked_isa_dma, ip);
case SG_GET_SCSI_ID:
if (!access_ok(VERIFY_WRITE, p, sizeof (sg_scsi_id_t)))
return -EFAULT;
@@ -1866,6 +1855,7 @@ sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size)
int sg_tablesize = sfp->parentdp->sg_tablesize;
int blk_size = buff_size, order;
gfp_t gfp_mask = GFP_ATOMIC | __GFP_COMP | __GFP_NOWARN;
+ struct sg_device *sdp = sfp->parentdp;
if (blk_size < 0)
return -EFAULT;
@@ -1891,7 +1881,7 @@ sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size)
scatter_elem_sz_prev = num;
}
- if (sfp->low_dma)
+ if (sdp->device->host->unchecked_isa_dma)
gfp_mask |= GFP_DMA;
if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
@@ -2155,8 +2145,6 @@ sg_add_sfp(Sg_device * sdp)
sfp->timeout = SG_DEFAULT_TIMEOUT;
sfp->timeout_user = SG_DEFAULT_TIMEOUT_USER;
sfp->force_packid = SG_DEF_FORCE_PACK_ID;
- sfp->low_dma = (SG_DEF_FORCE_LOW_DMA == 0) ?
- sdp->device->host->unchecked_isa_dma : 1;
sfp->cmd_q = SG_DEF_COMMAND_Q;
sfp->keep_orphan = SG_DEF_KEEP_ORPHAN;
sfp->parentdp = sdp;
@@ -2620,7 +2608,7 @@ static void sg_proc_debug_helper(struct seq_file *s, Sg_device * sdp)
jiffies_to_msecs(fp->timeout),
fp->reserve.bufflen,
(int) fp->reserve.k_use_sg,
- (int) fp->low_dma);
+ (int) sdp->device->host->unchecked_isa_dma);
seq_printf(s, " cmd_q=%d f_packid=%d k_orphan=%d closed=0\n",
(int) fp->cmd_q, (int) fp->force_packid,
(int) fp->keep_orphan);
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index caee50b64701..09814266c140 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -1713,6 +1713,9 @@ static const struct usb_device_id acm_ids[] = {
{ USB_DEVICE(0x0ace, 0x1611), /* ZyDAS 56K USB MODEM - new version */
.driver_info = SINGLE_RX_URB, /* firmware bug */
},
+ { USB_DEVICE(0x11ca, 0x0201), /* VeriFone Mx870 Gadget Serial */
+ .driver_info = SINGLE_RX_URB,
+ },
{ USB_DEVICE(0x22b8, 0x7000), /* Motorola Q Phone */
.driver_info = NO_UNION_NORMAL, /* has no union descriptor */
},
diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c
index a3ec49bdc1e6..ec38370ffcab 100644
--- a/drivers/usb/usbip/stub_dev.c
+++ b/drivers/usb/usbip/stub_dev.c
@@ -163,8 +163,7 @@ static void stub_shutdown_connection(struct usbip_device *ud)
* step 1?
*/
if (ud->tcp_socket) {
- dev_dbg(&sdev->udev->dev, "shutdown tcp_socket %p\n",
- ud->tcp_socket);
+ dev_dbg(&sdev->udev->dev, "shutdown sockfd %d\n", ud->sockfd);
kernel_sock_shutdown(ud->tcp_socket, SHUT_RDWR);
}
diff --git a/drivers/usb/usbip/stub_rx.c b/drivers/usb/usbip/stub_rx.c
index 80fe806e6174..52c53899ca1c 100644
--- a/drivers/usb/usbip/stub_rx.c
+++ b/drivers/usb/usbip/stub_rx.c
@@ -338,23 +338,26 @@ static struct stub_priv *stub_priv_alloc(struct stub_device *sdev,
return priv;
}
-static int get_pipe(struct stub_device *sdev, int epnum, int dir)
+static int get_pipe(struct stub_device *sdev, struct usbip_header *pdu)
{
struct usb_device *udev = sdev->udev;
struct usb_host_endpoint *ep;
struct usb_endpoint_descriptor *epd = NULL;
+ int epnum = pdu->base.ep;
+ int dir = pdu->base.direction;
+
+ if (epnum < 0 || epnum > 15)
+ goto err_ret;
if (dir == USBIP_DIR_IN)
ep = udev->ep_in[epnum & 0x7f];
else
ep = udev->ep_out[epnum & 0x7f];
- if (!ep) {
- dev_err(&sdev->interface->dev, "no such endpoint?, %d\n",
- epnum);
- BUG();
- }
+ if (!ep)
+ goto err_ret;
epd = &ep->desc;
+
if (usb_endpoint_xfer_control(epd)) {
if (dir == USBIP_DIR_OUT)
return usb_sndctrlpipe(udev, epnum);
@@ -377,15 +380,37 @@ static int get_pipe(struct stub_device *sdev, int epnum, int dir)
}
if (usb_endpoint_xfer_isoc(epd)) {
+ /* validate packet size and number of packets */
+ unsigned int maxp, packets, bytes;
+
+#define USB_EP_MAXP_MULT_SHIFT 11
+#define USB_EP_MAXP_MULT_MASK (3 << USB_EP_MAXP_MULT_SHIFT)
+#define USB_EP_MAXP_MULT(m) \
+ (((m) & USB_EP_MAXP_MULT_MASK) >> USB_EP_MAXP_MULT_SHIFT)
+
+ maxp = usb_endpoint_maxp(epd);
+ maxp *= (USB_EP_MAXP_MULT(
+ __le16_to_cpu(epd->wMaxPacketSize)) + 1);
+ bytes = pdu->u.cmd_submit.transfer_buffer_length;
+ packets = DIV_ROUND_UP(bytes, maxp);
+
+ if (pdu->u.cmd_submit.number_of_packets < 0 ||
+ pdu->u.cmd_submit.number_of_packets > packets) {
+ dev_err(&sdev->udev->dev,
+ "CMD_SUBMIT: isoc invalid num packets %d\n",
+ pdu->u.cmd_submit.number_of_packets);
+ return -1;
+ }
if (dir == USBIP_DIR_OUT)
return usb_sndisocpipe(udev, epnum);
else
return usb_rcvisocpipe(udev, epnum);
}
+err_ret:
/* NOT REACHED */
- dev_err(&sdev->interface->dev, "get pipe, epnum %d\n", epnum);
- return 0;
+ dev_err(&sdev->udev->dev, "CMD_SUBMIT: invalid epnum %d\n", epnum);
+ return -1;
}
static void masking_bogus_flags(struct urb *urb)
@@ -449,7 +474,10 @@ static void stub_recv_cmd_submit(struct stub_device *sdev,
struct stub_priv *priv;
struct usbip_device *ud = &sdev->ud;
struct usb_device *udev = sdev->udev;
- int pipe = get_pipe(sdev, pdu->base.ep, pdu->base.direction);
+ int pipe = get_pipe(sdev, pdu);
+
+ if (pipe == -1)
+ return;
priv = stub_priv_alloc(sdev, pdu);
if (!priv)
diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c
index 9752b93f754e..1838f1b2c2fa 100644
--- a/drivers/usb/usbip/usbip_common.c
+++ b/drivers/usb/usbip/usbip_common.c
@@ -317,18 +317,14 @@ int usbip_recv(struct socket *sock, void *buf, int size)
struct msghdr msg;
struct kvec iov;
int total = 0;
-
/* for blocks of if (usbip_dbg_flag_xmit) */
char *bp = buf;
int osize = size;
- usbip_dbg_xmit("enter\n");
-
- if (!sock || !buf || !size) {
- pr_err("invalid arg, sock %p buff %p size %d\n", sock, buf,
- size);
+ if (!sock || !buf || !size)
return -EINVAL;
- }
+
+ usbip_dbg_xmit("enter\n");
do {
sock->sk->sk_allocation = GFP_NOIO;
@@ -341,11 +337,8 @@ int usbip_recv(struct socket *sock, void *buf, int size)
msg.msg_flags = MSG_NOSIGNAL;
result = kernel_recvmsg(sock, &msg, &iov, 1, size, MSG_WAITALL);
- if (result <= 0) {
- pr_debug("receive sock %p buf %p size %u ret %d total %d\n",
- sock, buf, size, result, total);
+ if (result <= 0)
goto err;
- }
size -= result;
buf += result;
diff --git a/drivers/usb/usbip/usbip_common.h b/drivers/usb/usbip/usbip_common.h
index 86b08475c254..201f6fed63d7 100644
--- a/drivers/usb/usbip/usbip_common.h
+++ b/drivers/usb/usbip/usbip_common.h
@@ -261,6 +261,9 @@ struct usbip_device {
/* lock for status */
spinlock_t lock;
+#ifndef __GENKSYMS__
+ int sockfd;
+#endif
struct socket *tcp_socket;
struct task_struct *tcp_rx;
diff --git a/drivers/usb/usbip/usbip_event.c b/drivers/usb/usbip/usbip_event.c
index a7d4780142c0..25fb763d9daf 100644
--- a/drivers/usb/usbip/usbip_event.c
+++ b/drivers/usb/usbip/usbip_event.c
@@ -118,11 +118,12 @@ EXPORT_SYMBOL_GPL(usbip_event_add);
int usbip_event_happened(struct usbip_device *ud)
{
int happened = 0;
+ unsigned long flags;
- spin_lock(&ud->lock);
+ spin_lock_irqsave(&ud->lock, flags);
if (ud->event != 0)
happened = 1;
- spin_unlock(&ud->lock);
+ spin_unlock_irqrestore(&ud->lock, flags);
return happened;
}
diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c
index f9af04d7f02f..00d68945548e 100644
--- a/drivers/usb/usbip/vhci_hcd.c
+++ b/drivers/usb/usbip/vhci_hcd.c
@@ -121,9 +121,11 @@ static void dump_port_status_diff(u32 prev_status, u32 new_status)
void rh_port_connect(int rhport, enum usb_device_speed speed)
{
+ unsigned long flags;
+
usbip_dbg_vhci_rh("rh_port_connect %d\n", rhport);
- spin_lock(&the_controller->lock);
+ spin_lock_irqsave(&the_controller->lock, flags);
the_controller->port_status[rhport] |= USB_PORT_STAT_CONNECTION
| (1 << USB_PORT_FEAT_C_CONNECTION);
@@ -139,22 +141,24 @@ void rh_port_connect(int rhport, enum usb_device_speed speed)
break;
}
- spin_unlock(&the_controller->lock);
+ spin_unlock_irqrestore(&the_controller->lock, flags);
usb_hcd_poll_rh_status(vhci_to_hcd(the_controller));
}
static void rh_port_disconnect(int rhport)
{
+ unsigned long flags;
+
usbip_dbg_vhci_rh("rh_port_disconnect %d\n", rhport);
- spin_lock(&the_controller->lock);
+ spin_lock_irqsave(&the_controller->lock, flags);
the_controller->port_status[rhport] &= ~USB_PORT_STAT_CONNECTION;
the_controller->port_status[rhport] |=
(1 << USB_PORT_FEAT_C_CONNECTION);
- spin_unlock(&the_controller->lock);
+ spin_unlock_irqrestore(&the_controller->lock, flags);
usb_hcd_poll_rh_status(vhci_to_hcd(the_controller));
}
@@ -182,13 +186,14 @@ static int vhci_hub_status(struct usb_hcd *hcd, char *buf)
int retval;
int rhport;
int changed = 0;
+ unsigned long flags;
retval = DIV_ROUND_UP(VHCI_NPORTS + 1, 8);
memset(buf, 0, retval);
vhci = hcd_to_vhci(hcd);
- spin_lock(&vhci->lock);
+ spin_lock_irqsave(&vhci->lock, flags);
if (!HCD_HW_ACCESSIBLE(hcd)) {
usbip_dbg_vhci_rh("hw accessible flag not on?\n");
goto done;
@@ -209,7 +214,7 @@ static int vhci_hub_status(struct usb_hcd *hcd, char *buf)
usb_hcd_resume_root_hub(hcd);
done:
- spin_unlock(&vhci->lock);
+ spin_unlock_irqrestore(&vhci->lock, flags);
return changed ? retval : 0;
}
@@ -236,6 +241,7 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
struct vhci_hcd *dum;
int retval = 0;
int rhport;
+ unsigned long flags;
u32 prev_port_status[VHCI_NPORTS];
@@ -254,7 +260,7 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
dum = hcd_to_vhci(hcd);
- spin_lock(&dum->lock);
+ spin_lock_irqsave(&dum->lock, flags);
/* store old status and compare now and old later */
if (usbip_dbg_flag_vhci_rh) {
@@ -408,7 +414,7 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
}
usbip_dbg_vhci_rh(" bye\n");
- spin_unlock(&dum->lock);
+ spin_unlock_irqrestore(&dum->lock, flags);
return retval;
}
@@ -431,6 +437,7 @@ static void vhci_tx_urb(struct urb *urb)
{
struct vhci_device *vdev = get_vdev(urb->dev);
struct vhci_priv *priv;
+ unsigned long flags;
if (!vdev) {
pr_err("could not get virtual device");
@@ -443,7 +450,7 @@ static void vhci_tx_urb(struct urb *urb)
return;
}
- spin_lock(&vdev->priv_lock);
+ spin_lock_irqsave(&vdev->priv_lock, flags);
priv->seqnum = atomic_inc_return(&the_controller->seqnum);
if (priv->seqnum == 0xffff)
@@ -457,7 +464,7 @@ static void vhci_tx_urb(struct urb *urb)
list_add_tail(&priv->list, &vdev->priv_tx);
wake_up(&vdev->waitq_tx);
- spin_unlock(&vdev->priv_lock);
+ spin_unlock_irqrestore(&vdev->priv_lock, flags);
}
static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb,
@@ -466,15 +473,16 @@ static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb,
struct device *dev = &urb->dev->dev;
int ret = 0;
struct vhci_device *vdev;
+ unsigned long flags;
/* patch to usb_sg_init() is in 2.5.60 */
BUG_ON(!urb->transfer_buffer && urb->transfer_buffer_length);
- spin_lock(&the_controller->lock);
+ spin_lock_irqsave(&the_controller->lock, flags);
if (urb->status != -EINPROGRESS) {
dev_err(dev, "URB already unlinked!, status %d\n", urb->status);
- spin_unlock(&the_controller->lock);
+ spin_unlock_irqrestore(&the_controller->lock, flags);
return urb->status;
}
@@ -486,7 +494,7 @@ static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb,
vdev->ud.status == VDEV_ST_ERROR) {
dev_err(dev, "enqueue for inactive port %d\n", vdev->rhport);
spin_unlock(&vdev->ud.lock);
- spin_unlock(&the_controller->lock);
+ spin_unlock_irqrestore(&the_controller->lock, flags);
return -ENODEV;
}
spin_unlock(&vdev->ud.lock);
@@ -559,14 +567,14 @@ static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb,
out:
vhci_tx_urb(urb);
- spin_unlock(&the_controller->lock);
+ spin_unlock_irqrestore(&the_controller->lock, flags);
return 0;
no_need_xmit:
usb_hcd_unlink_urb_from_ep(hcd, urb);
no_need_unlink:
- spin_unlock(&the_controller->lock);
+ spin_unlock_irqrestore(&the_controller->lock, flags);
if (!ret)
usb_hcd_giveback_urb(vhci_to_hcd(the_controller),
urb, urb->status);
@@ -623,14 +631,15 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
{
struct vhci_priv *priv;
struct vhci_device *vdev;
+ unsigned long flags;
- spin_lock(&the_controller->lock);
+ spin_lock_irqsave(&the_controller->lock, flags);
priv = urb->hcpriv;
if (!priv) {
/* URB was never linked! or will be soon given back by
* vhci_rx. */
- spin_unlock(&the_controller->lock);
+ spin_unlock_irqrestore(&the_controller->lock, flags);
return -EIDRM;
}
@@ -639,7 +648,7 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
ret = usb_hcd_check_unlink_urb(hcd, urb, status);
if (ret) {
- spin_unlock(&the_controller->lock);
+ spin_unlock_irqrestore(&the_controller->lock, flags);
return ret;
}
}
@@ -664,10 +673,10 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
*/
usb_hcd_unlink_urb_from_ep(hcd, urb);
- spin_unlock(&the_controller->lock);
+ spin_unlock_irqrestore(&the_controller->lock, flags);
usb_hcd_giveback_urb(vhci_to_hcd(the_controller), urb,
urb->status);
- spin_lock(&the_controller->lock);
+ spin_lock_irqsave(&the_controller->lock, flags);
} else {
/* tcp connection is alive */
@@ -679,7 +688,7 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
unlink = kzalloc(sizeof(struct vhci_unlink), GFP_ATOMIC);
if (!unlink) {
spin_unlock(&vdev->priv_lock);
- spin_unlock(&the_controller->lock);
+ spin_unlock_irqrestore(&the_controller->lock, flags);
usbip_event_add(&vdev->ud, VDEV_EVENT_ERROR_MALLOC);
return -ENOMEM;
}
@@ -698,7 +707,7 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
spin_unlock(&vdev->priv_lock);
}
- spin_unlock(&the_controller->lock);
+ spin_unlock_irqrestore(&the_controller->lock, flags);
usbip_dbg_vhci_hc("leave\n");
return 0;
@@ -707,8 +716,9 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
static void vhci_device_unlink_cleanup(struct vhci_device *vdev)
{
struct vhci_unlink *unlink, *tmp;
+ unsigned long flags;
- spin_lock(&the_controller->lock);
+ spin_lock_irqsave(&the_controller->lock, flags);
spin_lock(&vdev->priv_lock);
list_for_each_entry_safe(unlink, tmp, &vdev->unlink_tx, list) {
@@ -742,19 +752,19 @@ static void vhci_device_unlink_cleanup(struct vhci_device *vdev)
list_del(&unlink->list);
spin_unlock(&vdev->priv_lock);
- spin_unlock(&the_controller->lock);
+ spin_unlock_irqrestore(&the_controller->lock, flags);
usb_hcd_giveback_urb(vhci_to_hcd(the_controller), urb,
urb->status);
- spin_lock(&the_controller->lock);
+ spin_lock_irqsave(&the_controller->lock, flags);
spin_lock(&vdev->priv_lock);
kfree(unlink);
}
spin_unlock(&vdev->priv_lock);
- spin_unlock(&the_controller->lock);
+ spin_unlock_irqrestore(&the_controller->lock, flags);
}
/*
@@ -768,7 +778,7 @@ static void vhci_shutdown_connection(struct usbip_device *ud)
/* need this? see stub_dev.c */
if (ud->tcp_socket) {
- pr_debug("shutdown tcp_socket %p\n", ud->tcp_socket);
+ pr_debug("shutdown sockfd %d\n", ud->sockfd);
kernel_sock_shutdown(ud->tcp_socket, SHUT_RDWR);
}
@@ -821,8 +831,9 @@ static void vhci_shutdown_connection(struct usbip_device *ud)
static void vhci_device_reset(struct usbip_device *ud)
{
struct vhci_device *vdev = container_of(ud, struct vhci_device, ud);
+ unsigned long flags;
- spin_lock(&ud->lock);
+ spin_lock_irqsave(&ud->lock, flags);
vdev->speed = 0;
vdev->devid = 0;
@@ -836,14 +847,16 @@ static void vhci_device_reset(struct usbip_device *ud)
}
ud->status = VDEV_ST_NULL;
- spin_unlock(&ud->lock);
+ spin_unlock_irqrestore(&ud->lock, flags);
}
static void vhci_device_unusable(struct usbip_device *ud)
{
- spin_lock(&ud->lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&ud->lock, flags);
ud->status = VDEV_ST_ERROR;
- spin_unlock(&ud->lock);
+ spin_unlock_irqrestore(&ud->lock, flags);
}
static void vhci_device_init(struct vhci_device *vdev)
@@ -933,12 +946,13 @@ static int vhci_get_frame_number(struct usb_hcd *hcd)
static int vhci_bus_suspend(struct usb_hcd *hcd)
{
struct vhci_hcd *vhci = hcd_to_vhci(hcd);
+ unsigned long flags;
dev_dbg(&hcd->self.root_hub->dev, "%s\n", __func__);
- spin_lock(&vhci->lock);
+ spin_lock_irqsave(&vhci->lock, flags);
hcd->state = HC_STATE_SUSPENDED;
- spin_unlock(&vhci->lock);
+ spin_unlock_irqrestore(&vhci->lock, flags);
return 0;
}
@@ -947,15 +961,16 @@ static int vhci_bus_resume(struct usb_hcd *hcd)
{
struct vhci_hcd *vhci = hcd_to_vhci(hcd);
int rc = 0;
+ unsigned long flags;
dev_dbg(&hcd->self.root_hub->dev, "%s\n", __func__);
- spin_lock(&vhci->lock);
+ spin_lock_irqsave(&vhci->lock, flags);
if (!HCD_HW_ACCESSIBLE(hcd))
rc = -ESHUTDOWN;
else
hcd->state = HC_STATE_RUNNING;
- spin_unlock(&vhci->lock);
+ spin_unlock_irqrestore(&vhci->lock, flags);
return rc;
}
@@ -1053,17 +1068,18 @@ static int vhci_hcd_suspend(struct platform_device *pdev, pm_message_t state)
int rhport = 0;
int connected = 0;
int ret = 0;
+ unsigned long flags;
hcd = platform_get_drvdata(pdev);
- spin_lock(&the_controller->lock);
+ spin_lock_irqsave(&the_controller->lock, flags);
for (rhport = 0; rhport < VHCI_NPORTS; rhport++)
if (the_controller->port_status[rhport] &
USB_PORT_STAT_CONNECTION)
connected += 1;
- spin_unlock(&the_controller->lock);
+ spin_unlock_irqrestore(&the_controller->lock, flags);
if (connected > 0) {
dev_info(&pdev->dev,
diff --git a/drivers/usb/usbip/vhci_rx.c b/drivers/usb/usbip/vhci_rx.c
index cf331a5fce96..959702bc0254 100644
--- a/drivers/usb/usbip/vhci_rx.c
+++ b/drivers/usb/usbip/vhci_rx.c
@@ -71,10 +71,11 @@ static void vhci_recv_ret_submit(struct vhci_device *vdev,
{
struct usbip_device *ud = &vdev->ud;
struct urb *urb;
+ unsigned long flags;
- spin_lock(&vdev->priv_lock);
+ spin_lock_irqsave(&vdev->priv_lock, flags);
urb = pickup_urb_and_free_priv(vdev, pdu->base.seqnum);
- spin_unlock(&vdev->priv_lock);
+ spin_unlock_irqrestore(&vdev->priv_lock, flags);
if (!urb) {
pr_err("cannot find a urb of seqnum %u max seqnum %d\n",
@@ -103,9 +104,9 @@ static void vhci_recv_ret_submit(struct vhci_device *vdev,
usbip_dbg_vhci_rx("now giveback urb %u\n", pdu->base.seqnum);
- spin_lock(&the_controller->lock);
+ spin_lock_irqsave(&the_controller->lock, flags);
usb_hcd_unlink_urb_from_ep(vhci_to_hcd(the_controller), urb);
- spin_unlock(&the_controller->lock);
+ spin_unlock_irqrestore(&the_controller->lock, flags);
usb_hcd_giveback_urb(vhci_to_hcd(the_controller), urb, urb->status);
@@ -116,8 +117,9 @@ static struct vhci_unlink *dequeue_pending_unlink(struct vhci_device *vdev,
struct usbip_header *pdu)
{
struct vhci_unlink *unlink, *tmp;
+ unsigned long flags;
- spin_lock(&vdev->priv_lock);
+ spin_lock_irqsave(&vdev->priv_lock, flags);
list_for_each_entry_safe(unlink, tmp, &vdev->unlink_rx, list) {
pr_info("unlink->seqnum %lu\n", unlink->seqnum);
@@ -126,12 +128,12 @@ static struct vhci_unlink *dequeue_pending_unlink(struct vhci_device *vdev,
unlink->seqnum);
list_del(&unlink->list);
- spin_unlock(&vdev->priv_lock);
+ spin_unlock_irqrestore(&vdev->priv_lock, flags);
return unlink;
}
}
- spin_unlock(&vdev->priv_lock);
+ spin_unlock_irqrestore(&vdev->priv_lock, flags);
return NULL;
}
@@ -141,6 +143,7 @@ static void vhci_recv_ret_unlink(struct vhci_device *vdev,
{
struct vhci_unlink *unlink;
struct urb *urb;
+ unsigned long flags;
usbip_dump_header(pdu);
@@ -151,9 +154,9 @@ static void vhci_recv_ret_unlink(struct vhci_device *vdev,
return;
}
- spin_lock(&vdev->priv_lock);
+ spin_lock_irqsave(&vdev->priv_lock, flags);
urb = pickup_urb_and_free_priv(vdev, unlink->unlink_seqnum);
- spin_unlock(&vdev->priv_lock);
+ spin_unlock_irqrestore(&vdev->priv_lock, flags);
if (!urb) {
/*
@@ -170,9 +173,9 @@ static void vhci_recv_ret_unlink(struct vhci_device *vdev,
urb->status = pdu->u.ret_unlink.status;
pr_info("urb->status %d\n", urb->status);
- spin_lock(&the_controller->lock);
+ spin_lock_irqsave(&the_controller->lock, flags);
usb_hcd_unlink_urb_from_ep(vhci_to_hcd(the_controller), urb);
- spin_unlock(&the_controller->lock);
+ spin_unlock_irqrestore(&the_controller->lock, flags);
usb_hcd_giveback_urb(vhci_to_hcd(the_controller), urb,
urb->status);
@@ -184,10 +187,11 @@ static void vhci_recv_ret_unlink(struct vhci_device *vdev,
static int vhci_priv_tx_empty(struct vhci_device *vdev)
{
int empty = 0;
+ unsigned long flags;
- spin_lock(&vdev->priv_lock);
+ spin_lock_irqsave(&vdev->priv_lock, flags);
empty = list_empty(&vdev->priv_rx);
- spin_unlock(&vdev->priv_lock);
+ spin_unlock_irqrestore(&vdev->priv_lock, flags);
return empty;
}
diff --git a/drivers/usb/usbip/vhci_sysfs.c b/drivers/usb/usbip/vhci_sysfs.c
index 211f43f67ea2..1c7f41a65565 100644
--- a/drivers/usb/usbip/vhci_sysfs.c
+++ b/drivers/usb/usbip/vhci_sysfs.c
@@ -32,23 +32,28 @@ static ssize_t status_show(struct device *dev, struct device_attribute *attr,
{
char *s = out;
int i = 0;
+ unsigned long flags;
BUG_ON(!the_controller || !out);
- spin_lock(&the_controller->lock);
+ spin_lock_irqsave(&the_controller->lock, flags);
/*
* output example:
- * prt sta spd dev socket local_busid
- * 000 004 000 000 c5a7bb80 1-2.3
- * 001 004 000 000 d8cee980 2-3.4
+ * port sta spd dev sockfd local_busid
+ * 0000 004 000 00000000 000003 1-2.3
+ * 0001 004 000 00000000 000004 2-3.4
*
- * IP address can be retrieved from a socket pointer address by looking
- * up /proc/net/{tcp,tcp6}. Also, a userland program may remember a
- * port number and its peer IP address.
+ * Output includes socket fd instead of socket pointer address to
+ * avoid leaking kernel memory address in:
+ * /sys/devices/platform/vhci_hcd.0/status and in debug output.
+ * The socket pointer address is not used at the moment and it was
+ * made visible as a convenient way to find IP address from socket
+ * pointer address by looking up /proc/net/{tcp,tcp6}. As this opens
+ * a security hole, the change is made to use sockfd instead.
*/
out += sprintf(out,
- "prt sta spd bus dev socket local_busid\n");
+ "prt sta spd bus dev sockfd local_busid\n");
for (i = 0; i < VHCI_NPORTS; i++) {
struct vhci_device *vdev = port_to_vdev(i);
@@ -60,17 +65,17 @@ static ssize_t status_show(struct device *dev, struct device_attribute *attr,
out += sprintf(out, "%03u %08x ",
vdev->speed, vdev->devid);
out += sprintf(out, "%16p ", vdev->ud.tcp_socket);
+ out += sprintf(out, "%06u", vdev->ud.sockfd);
out += sprintf(out, "%s", dev_name(&vdev->udev->dev));
- } else {
- out += sprintf(out, "000 000 000 0000000000000000 0-0");
- }
+ } else
+ out += sprintf(out, "000 000 000 000000 0-0");
out += sprintf(out, "\n");
spin_unlock(&vdev->ud.lock);
}
- spin_unlock(&the_controller->lock);
+ spin_unlock_irqrestore(&the_controller->lock, flags);
return out - s;
}
@@ -80,11 +85,12 @@ static DEVICE_ATTR_RO(status);
static int vhci_port_disconnect(__u32 rhport)
{
struct vhci_device *vdev;
+ unsigned long flags;
usbip_dbg_vhci_sysfs("enter\n");
/* lock */
- spin_lock(&the_controller->lock);
+ spin_lock_irqsave(&the_controller->lock, flags);
vdev = port_to_vdev(rhport);
@@ -94,14 +100,14 @@ static int vhci_port_disconnect(__u32 rhport)
/* unlock */
spin_unlock(&vdev->ud.lock);
- spin_unlock(&the_controller->lock);
+ spin_unlock_irqrestore(&the_controller->lock, flags);
return -EINVAL;
}
/* unlock */
spin_unlock(&vdev->ud.lock);
- spin_unlock(&the_controller->lock);
+ spin_unlock_irqrestore(&the_controller->lock, flags);
usbip_event_add(&vdev->ud, VDEV_EVENT_DOWN);
@@ -177,6 +183,7 @@ static ssize_t store_attach(struct device *dev, struct device_attribute *attr,
int sockfd = 0;
__u32 rhport = 0, devid = 0, speed = 0;
int err;
+ unsigned long flags;
/*
* @rhport: port number of vhci_hcd
@@ -202,14 +209,14 @@ static ssize_t store_attach(struct device *dev, struct device_attribute *attr,
/* now need lock until setting vdev status as used */
/* begin a lock */
- spin_lock(&the_controller->lock);
+ spin_lock_irqsave(&the_controller->lock, flags);
vdev = port_to_vdev(rhport);
spin_lock(&vdev->ud.lock);
if (vdev->ud.status != VDEV_ST_NULL) {
/* end of the lock */
spin_unlock(&vdev->ud.lock);
- spin_unlock(&the_controller->lock);
+ spin_unlock_irqrestore(&the_controller->lock, flags);
sockfd_put(socket);
@@ -223,11 +230,12 @@ static ssize_t store_attach(struct device *dev, struct device_attribute *attr,
vdev->devid = devid;
vdev->speed = speed;
+ vdev->ud.sockfd = sockfd;
vdev->ud.tcp_socket = socket;
vdev->ud.status = VDEV_ST_NOTASSIGNED;
spin_unlock(&vdev->ud.lock);
- spin_unlock(&the_controller->lock);
+ spin_unlock_irqrestore(&the_controller->lock, flags);
/* end the lock */
vdev->ud.tcp_rx = kthread_get_run(vhci_rx_loop, &vdev->ud, "vhci_rx");
diff --git a/drivers/usb/usbip/vhci_tx.c b/drivers/usb/usbip/vhci_tx.c
index 99c714c18cbc..e5c0502d2ea7 100644
--- a/drivers/usb/usbip/vhci_tx.c
+++ b/drivers/usb/usbip/vhci_tx.c
@@ -47,16 +47,17 @@ static void setup_cmd_submit_pdu(struct usbip_header *pdup, struct urb *urb)
static struct vhci_priv *dequeue_from_priv_tx(struct vhci_device *vdev)
{
struct vhci_priv *priv, *tmp;
+ unsigned long flags;
- spin_lock(&vdev->priv_lock);
+ spin_lock_irqsave(&vdev->priv_lock, flags);
list_for_each_entry_safe(priv, tmp, &vdev->priv_tx, list) {
list_move_tail(&priv->list, &vdev->priv_rx);
- spin_unlock(&vdev->priv_lock);
+ spin_unlock_irqrestore(&vdev->priv_lock, flags);
return priv;
}
- spin_unlock(&vdev->priv_lock);
+ spin_unlock_irqrestore(&vdev->priv_lock, flags);
return NULL;
}
@@ -137,16 +138,17 @@ static int vhci_send_cmd_submit(struct vhci_device *vdev)
static struct vhci_unlink *dequeue_from_unlink_tx(struct vhci_device *vdev)
{
struct vhci_unlink *unlink, *tmp;
+ unsigned long flags;
- spin_lock(&vdev->priv_lock);
+ spin_lock_irqsave(&vdev->priv_lock, flags);
list_for_each_entry_safe(unlink, tmp, &vdev->unlink_tx, list) {
list_move_tail(&unlink->list, &vdev->unlink_rx);
- spin_unlock(&vdev->priv_lock);
+ spin_unlock_irqrestore(&vdev->priv_lock, flags);
return unlink;
}
- spin_unlock(&vdev->priv_lock);
+ spin_unlock_irqrestore(&vdev->priv_lock, flags);
return NULL;
}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 53f13f4187f7..cf99e0a1cbf3 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -49,9 +49,16 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
struct ceph_statfs st;
u64 fsid;
int err;
+ u64 data_pool;
+
+ if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) {
+ data_pool = fsc->mdsc->mdsmap->m_data_pg_pools[0];
+ } else {
+ data_pool = CEPH_NOPOOL;
+ }
dout("statfs\n");
- err = ceph_monc_do_statfs(&fsc->client->monc, &st);
+ err = ceph_monc_do_statfs(&fsc->client->monc, data_pool, &st);
if (err < 0)
return err;
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index eea64912c9c0..466f7d60edc2 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -607,20 +607,54 @@ static const struct file_operations format2_fops;
static const struct file_operations format3_fops;
static const struct file_operations format4_fops;
-static int table_open(struct inode *inode, struct file *file)
+static int table_open1(struct inode *inode, struct file *file)
{
struct seq_file *seq;
- int ret = -1;
+ int ret;
- if (file->f_op == &format1_fops)
- ret = seq_open(file, &format1_seq_ops);
- else if (file->f_op == &format2_fops)
- ret = seq_open(file, &format2_seq_ops);
- else if (file->f_op == &format3_fops)
- ret = seq_open(file, &format3_seq_ops);
- else if (file->f_op == &format4_fops)
- ret = seq_open(file, &format4_seq_ops);
+ ret = seq_open(file, &format1_seq_ops);
+ if (ret)
+ return ret;
+
+ seq = file->private_data;
+ seq->private = inode->i_private; /* the dlm_ls */
+ return 0;
+}
+
+static int table_open2(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq;
+ int ret;
+
+ ret = seq_open(file, &format2_seq_ops);
+ if (ret)
+ return ret;
+
+ seq = file->private_data;
+ seq->private = inode->i_private; /* the dlm_ls */
+ return 0;
+}
+
+static int table_open3(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq;
+ int ret;
+
+ ret = seq_open(file, &format3_seq_ops);
+ if (ret)
+ return ret;
+
+ seq = file->private_data;
+ seq->private = inode->i_private; /* the dlm_ls */
+ return 0;
+}
+
+static int table_open4(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq;
+ int ret;
+ ret = seq_open(file, &format4_seq_ops);
if (ret)
return ret;
@@ -631,7 +665,7 @@ static int table_open(struct inode *inode, struct file *file)
static const struct file_operations format1_fops = {
.owner = THIS_MODULE,
- .open = table_open,
+ .open = table_open1,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release
@@ -639,7 +673,7 @@ static const struct file_operations format1_fops = {
static const struct file_operations format2_fops = {
.owner = THIS_MODULE,
- .open = table_open,
+ .open = table_open2,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release
@@ -647,7 +681,7 @@ static const struct file_operations format2_fops = {
static const struct file_operations format3_fops = {
.owner = THIS_MODULE,
- .open = table_open,
+ .open = table_open3,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release
@@ -655,7 +689,7 @@ static const struct file_operations format3_fops = {
static const struct file_operations format4_fops = {
.owner = THIS_MODULE,
- .open = table_open,
+ .open = table_open4,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 3bc105ac73ee..43ff80e2556f 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1754,6 +1754,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags);
next:
nfs_unlock_and_release_request(req);
+ /* Latency breaker */
+ cond_resched();
}
nfss = NFS_SERVER(data->inode);
if (atomic_long_read(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index a260060042ad..67eb154af881 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -60,9 +60,10 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
else
GROUP_AT(gi, i) = GROUP_AT(rqgi, i);
- /* Each thread allocates its own gi, no race */
- groups_sort(gi);
}
+
+ /* Each thread allocates its own gi, no race */
+ groups_sort(gi);
} else {
gi = get_group_info(rqgi);
}
diff --git a/fs/pipe.c b/fs/pipe.c
index ad090a1bfcb0..36d5ce46d435 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1005,6 +1005,9 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages)
{
struct pipe_buffer *bufs;
+ if (!nr_pages)
+ return -EINVAL;
+
/*
* We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't
* expect a lot of shrink+grow operations, just free and allocate
@@ -1049,13 +1052,19 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages)
/*
* Currently we rely on the pipe array holding a power-of-2 number
- * of pages.
+ * of pages. Returns 0 on error.
*/
static inline unsigned int round_pipe_size(unsigned int size)
{
unsigned long nr_pages;
+ if (size < pipe_min_size)
+ size = pipe_min_size;
+
nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ if (nr_pages == 0)
+ return 0;
+
return roundup_pow_of_two(nr_pages) << PAGE_SHIFT;
}
@@ -1066,13 +1075,18 @@ static inline unsigned int round_pipe_size(unsigned int size)
int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf,
size_t *lenp, loff_t *ppos)
{
+ unsigned int rounded_pipe_max_size;
int ret;
ret = proc_dointvec_minmax(table, write, buf, lenp, ppos);
if (ret < 0 || !write)
return ret;
- pipe_max_size = round_pipe_size(pipe_max_size);
+ rounded_pipe_max_size = round_pipe_size(pipe_max_size);
+ if (rounded_pipe_max_size == 0)
+ return -EINVAL;
+
+ pipe_max_size = rounded_pipe_max_size;
return ret;
}
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
index 2bb959ada45b..d8359f41f4a7 100644
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -302,6 +302,8 @@ xfs_attr3_node_inactive(
&bp, XFS_ATTR_FORK);
if (error)
return error;
+ node = bp->b_addr;
+ btree = dp->d_ops->node_tree_p(node);
child_fsb = be32_to_cpu(btree[i + 1].before);
xfs_trans_brelse(*trans, bp);
}
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 6370215347f3..fdcede56dda3 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -610,6 +610,7 @@ xfs_log_mount(
xfs_daddr_t blk_offset,
int num_bblks)
{
+ bool fatal = xfs_sb_version_hascrc(&mp->m_sb);
int error = 0;
int min_logfsbs;
@@ -661,9 +662,20 @@ xfs_log_mount(
XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks),
XFS_MAX_LOG_BYTES);
error = -EINVAL;
+ } else if (mp->m_sb.sb_logsunit > 1 &&
+ mp->m_sb.sb_logsunit % mp->m_sb.sb_blocksize) {
+ xfs_warn(mp,
+ "log stripe unit %u bytes must be a multiple of block size",
+ mp->m_sb.sb_logsunit);
+ error = -EINVAL;
+ fatal = true;
}
if (error) {
- if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ /*
+ * Log check errors are always fatal on v5; or whenever bad
+ * metadata leads to a crash.
+ */
+ if (fatal) {
xfs_crit(mp, "AAIEEE! Log failed size checks. Abort!");
ASSERT(0);
goto out_free_log;
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index edf5b04b918a..3e942d35d624 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -167,6 +167,8 @@ struct ceph_mon_request_header {
struct ceph_mon_statfs {
struct ceph_mon_request_header monhdr;
struct ceph_fsid fsid;
+ __u8 contains_data_pool;
+ __le64 data_pool;
} __attribute__ ((packed));
struct ceph_statfs {
diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h
index caf22e082355..433d7851a9c1 100644
--- a/include/linux/ceph/mon_client.h
+++ b/include/linux/ceph/mon_client.h
@@ -134,8 +134,8 @@ extern void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc);
extern int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
unsigned long timeout);
-extern int ceph_monc_do_statfs(struct ceph_mon_client *monc,
- struct ceph_statfs *buf);
+int ceph_monc_do_statfs(struct ceph_mon_client *monc, u64 data_pool,
+ struct ceph_statfs *buf);
int ceph_monc_get_version(struct ceph_mon_client *monc, const char *what,
u64 *newest);
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index 4f2c526d5683..a8026c962e0f 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -9,6 +9,7 @@
#include <linux/compiler.h>
#include <linux/spinlock.h>
#include <linux/rcupdate.h>
+#include <linux/nospec.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/fs.h>
@@ -80,10 +81,12 @@ struct dentry;
static inline struct file *__fcheck_files(struct files_struct *files, unsigned int fd)
{
struct fdtable *fdt = rcu_dereference_raw(files->fdt);
+ struct file __rcu **fdp;
- if (fd < fdt->max_fds) {
+ fdp = array_ptr(fdt->fd, fd, fdt->max_fds);
+ if (fdp) {
gmb();
- return rcu_dereference_raw(fdt->fd[fd]);
+ return rcu_dereference_raw(*fdp);
}
return NULL;
}
diff --git a/include/linux/nospec.h b/include/linux/nospec.h
new file mode 100644
index 000000000000..b8a9222e34d1
--- /dev/null
+++ b/include/linux/nospec.h
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+
+#ifndef __NOSPEC_H__
+#define __NOSPEC_H__
+
+#include <linux/jump_label.h>
+#include <asm/barrier.h>
+
+/*
+ * If idx is negative or if idx > size then bit 63 is set in the mask,
+ * and the value of ~(-1L) is zero. When the mask is zero, bounds check
+ * failed, array_ptr will return NULL.
+ */
+#ifndef array_ptr_mask
+static inline unsigned long array_ptr_mask(unsigned long idx, unsigned long sz)
+{
+ return ~(long)(idx | (sz - 1 - idx)) >> (BITS_PER_LONG - 1);
+}
+#endif
+
+/**
+ * array_ptr - Generate a pointer to an array element, ensuring
+ * the pointer is bounded under speculation to NULL.
+ *
+ * @base: the base of the array
+ * @idx: the index of the element, must be less than LONG_MAX
+ * @sz: the number of elements in the array, must be less than LONG_MAX
+ *
+ * If @idx falls in the interval [0, @sz), returns the pointer to
+ * @arr[@idx], otherwise returns NULL.
+ */
+#define array_ptr(base, idx, sz) \
+({ \
+ union { typeof(*(base)) *_ptr; unsigned long _bit; } __u; \
+ typeof(*(base)) *_arr = (base); \
+ unsigned long _i = (idx); \
+ unsigned long _mask = array_ptr_mask(_i, (sz)); \
+ \
+ __u._ptr = _arr + _i; \
+ __u._bit &= _mask; \
+ __u._ptr; \
+})
+
+/**
+ * array_idx - Generate a pointer to an array index, ensuring the
+ * pointer is bounded under speculation to NULL.
+ *
+ * @idx: the index of the element, must be less than LONG_MAX
+ * @sz: the number of elements in the array, must be less than LONG_MAX
+ *
+ * If @idx falls in the interval [0, @sz), returns &@idx otherwise
+ * returns NULL.
+ */
+#define array_idx(idx, sz) \
+({ \
+ union { typeof((idx)) *_ptr; unsigned long _bit; } __u; \
+ typeof(idx) *_i = &(idx); \
+ unsigned long _mask = array_ptr_mask(*_i, (sz)); \
+ \
+ __u._ptr = _i; \
+ __u._bit &= _mask; \
+ __u._ptr; \
+})
+#endif /* __NOSPEC_H__ */
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 318c24612458..2260f92f1492 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -29,9 +29,14 @@ static inline struct tcphdr *tcp_hdr(const struct sk_buff *skb)
return (struct tcphdr *)skb_transport_header(skb);
}
+static inline unsigned int __tcp_hdrlen(const struct tcphdr *th)
+{
+ return th->doff * 4;
+}
+
static inline unsigned int tcp_hdrlen(const struct sk_buff *skb)
{
- return tcp_hdr(skb)->doff * 4;
+ return __tcp_hdrlen(tcp_hdr(skb));
}
static inline struct tcphdr *inner_tcp_hdr(const struct sk_buff *skb)
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index ff307b548ed3..234f1e9dc08f 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -55,7 +55,7 @@ extern long do_no_restart_syscall(struct restart_block *parm);
#ifdef __KERNEL__
-#ifdef CONFIG_DEBUG_STACK_USAGE
+#if defined(CONFIG_DEBUG_STACK_USAGE) || defined(CONFIG_PAGE_TABLE_ISOLATION)
# define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
#else
# define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK)
diff --git a/include/net/arp.h b/include/net/arp.h
index 5e0f891d476c..1b3f86981757 100644
--- a/include/net/arp.h
+++ b/include/net/arp.h
@@ -19,6 +19,9 @@ static inline u32 arp_hashfn(const void *pkey, const struct net_device *dev, u32
static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev, u32 key)
{
+ if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
+ key = INADDR_ANY;
+
return ___neigh_lookup_noref(&arp_tbl, neigh_key_eq32, arp_hashfn, &key, dev);
}
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index f266b512c3bd..2b50f19fe9f0 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -69,7 +69,7 @@ struct flow_dissector_key_ipv6_addrs {
/**
* struct flow_dissector_key_tipc_addrs:
- * @srcnode: source node address
+ * @srcnode: source node address combined with selector
*/
struct flow_dissector_key_tipc_addrs {
__be32 srcnode;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index ca92302f63d8..794de0805674 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -281,6 +281,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq,
int flags);
int ip6_flowlabel_init(void);
void ip6_flowlabel_cleanup(void);
+bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np);
static inline void fl6_sock_release(struct ip6_flowlabel *fl)
{
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index b5d26b74fd31..f712a44316c4 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -209,6 +209,11 @@ int net_eq(const struct net *net1, const struct net *net2)
return net1 == net2;
}
+static inline int check_net(const struct net *net)
+{
+ return atomic_read(&net->count) != 0;
+}
+
void net_drop_ns(void *);
#else
@@ -233,6 +238,11 @@ int net_eq(const struct net *net1, const struct net *net2)
return 1;
}
+static inline int check_net(const struct net *net)
+{
+ return 1;
+}
+
#define net_drop_ns NULL
#endif
diff --git a/include/net/tipc.h b/include/net/tipc.h
new file mode 100644
index 000000000000..07670ec022a7
--- /dev/null
+++ b/include/net/tipc.h
@@ -0,0 +1,62 @@
+/*
+ * include/net/tipc.h: Include file for TIPC message header routines
+ *
+ * Copyright (c) 2017 Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_HDR_H
+#define _TIPC_HDR_H
+
+#include <linux/random.h>
+
+#define KEEPALIVE_MSG_MASK 0x0e080000 /* LINK_PROTOCOL + MSG_IS_KEEPALIVE */
+
+struct tipc_basic_hdr {
+ __be32 w[4];
+};
+
+static inline u32 tipc_hdr_rps_key(struct tipc_basic_hdr *hdr)
+{
+ u32 w0 = ntohl(hdr->w[0]);
+ bool keepalive_msg = (w0 & KEEPALIVE_MSG_MASK) == KEEPALIVE_MSG_MASK;
+ int key;
+
+ /* Return source node identity as key */
+ if (likely(!keepalive_msg))
+ return hdr->w[3];
+
+ /* Spread PROBE/PROBE_REPLY messages across the cores */
+ get_random_bytes(&key, sizeof(key));
+ return key;
+}
+
+#endif
diff --git a/include/scsi/sg.h b/include/scsi/sg.h
index 3afec7032448..20bc71c3e0b8 100644
--- a/include/scsi/sg.h
+++ b/include/scsi/sg.h
@@ -197,7 +197,6 @@ typedef struct sg_req_info { /* used by SG_GET_REQUEST_TABLE ioctl() */
#define SG_DEFAULT_RETRIES 0
/* Defaults, commented if they differ from original sg driver */
-#define SG_DEF_FORCE_LOW_DMA 0 /* was 1 -> memory below 16MB on i386 */
#define SG_DEF_FORCE_PACK_ID 0
#define SG_DEF_KEEP_ORPHAN 0
#define SG_DEF_RESERVED_SIZE SG_SCATTER_SZ /* load time option */
diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h
index bc81fb2e1f0e..6f04cb419115 100644
--- a/include/uapi/linux/eventpoll.h
+++ b/include/uapi/linux/eventpoll.h
@@ -26,6 +26,19 @@
#define EPOLL_CTL_DEL 2
#define EPOLL_CTL_MOD 3
+/* Epoll event masks */
+#define EPOLLIN 0x00000001
+#define EPOLLPRI 0x00000002
+#define EPOLLOUT 0x00000004
+#define EPOLLERR 0x00000008
+#define EPOLLHUP 0x00000010
+#define EPOLLRDNORM 0x00000040
+#define EPOLLRDBAND 0x00000080
+#define EPOLLWRNORM 0x00000100
+#define EPOLLWRBAND 0x00000200
+#define EPOLLMSG 0x00000400
+#define EPOLLRDHUP 0x00002000
+
/*
* Request the handling of system wakeup events so as to prevent system suspends
* from happening while those events are being processed.
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 5ecce347fbf6..3477e3fd918e 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -839,6 +839,7 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_MSI_DEVID 131
#define KVM_CAP_X86_GUEST_MWAIT 143
#define KVM_CAP_ARM_USER_IRQ (0x1000 | 137) /* Will only be fixed in 4.12 */
+#define KVM_CAP_S390_BPB 152
#ifdef KVM_CAP_IRQ_ROUTING
diff --git a/kernel/futex.c b/kernel/futex.c
index e9202c4f7952..3936bf06c25b 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1699,6 +1699,9 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
struct futex_q *this, *next;
WAKE_Q(wake_q);
+ if (nr_wake < 0 || nr_requeue < 0)
+ return -EINVAL;
+
if (requeue_pi) {
/*
* Requeue PI only works on two distinct uaddrs. This
diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig
index c92e44855ddd..1276aabaab55 100644
--- a/kernel/gcov/Kconfig
+++ b/kernel/gcov/Kconfig
@@ -37,6 +37,7 @@ config ARCH_HAS_GCOV_PROFILE_ALL
config GCOV_PROFILE_ALL
bool "Profile entire Kernel"
+ depends on !COMPILE_TEST
depends on GCOV_KERNEL
depends on ARCH_HAS_GCOV_PROFILE_ALL
default n
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 91c6783ca8de..2befcf8128f3 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -662,7 +662,9 @@ static void hrtimer_reprogram(struct hrtimer *timer,
static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
{
base->expires_next.tv64 = KTIME_MAX;
+ base->hang_detected = 0;
base->hres_active = 0;
+ base->next_timer = NULL;
}
/*
@@ -1620,6 +1622,7 @@ static void init_hrtimers_cpu(int cpu)
timerqueue_init_head(&cpu_base->clock_base[i].active);
}
+ cpu_base->active_bases = 0;
cpu_base->cpu = cpu;
hrtimer_init_hres(cpu_base);
}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 6f9cac642adf..c7d0d0c8d0dc 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2288,6 +2288,7 @@ void trace_event_enum_update(struct trace_enum_map **map, int len)
{
struct trace_event_call *call, *p;
const char *last_system = NULL;
+ bool first = false;
int last_i;
int i;
@@ -2295,15 +2296,28 @@ void trace_event_enum_update(struct trace_enum_map **map, int len)
list_for_each_entry_safe(call, p, &ftrace_events, list) {
/* events are usually grouped together with systems */
if (!last_system || call->class->system != last_system) {
+ first = true;
last_i = 0;
last_system = call->class->system;
}
+ /*
+ * Since calls are grouped by systems, the likelyhood that the
+ * next call in the iteration belongs to the same system as the
+ * previous call is high. As an optimization, we skip seaching
+ * for a map[] that matches the call's system if the last call
+ * was from the same system. That's what last_i is for. If the
+ * call has the same system as the previous call, then last_i
+ * will be the index of the first map[] that has a matching
+ * system.
+ */
for (i = last_i; i < len; i++) {
if (call->class->system == map[i]->system) {
/* Save the first system if need be */
- if (!last_i)
+ if (first) {
last_i = i;
+ first = false;
+ }
update_event_printk(call, map[i]);
}
}
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 2aa38e5eb2a7..4c017b00d357 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -168,7 +168,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
next = pmd_addr_end(addr, end);
if (!pmd_trans_huge(*pmd) && !pmd_devmap(*pmd)
&& pmd_none_or_clear_bad(pmd))
- continue;
+ goto next;
/* invoke the mmu notifier if the pmd is populated */
if (!mni_start) {
@@ -190,7 +190,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
}
/* huge pmd was handled */
- continue;
+ goto next;
}
}
/* fall through, the trans huge pmd just split */
@@ -198,6 +198,8 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
this_pages = change_pte_range(vma, pmd, addr, next, newprot,
dirty_accountable, prot_numa);
pages += this_pages;
+next:
+ cond_resched();
} while (pmd++, addr = next, addr != end);
if (mni_start)
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 928f58064098..c866e761651a 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -722,13 +722,12 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev,
if (unlikely(!net_eq(dev_net(dev), &init_net)))
goto drop;
- if (WARN_ONCE(dev->type != ARPHRD_CAN ||
- skb->len != CAN_MTU ||
- cfd->len > CAN_MAX_DLEN,
- "PF_CAN: dropped non conform CAN skbuf: "
- "dev type %d, len %d, datalen %d\n",
- dev->type, skb->len, cfd->len))
+ if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU ||
+ cfd->len > CAN_MAX_DLEN)) {
+ pr_warn_once("PF_CAN: dropped non conform CAN skbuf: dev type %d, len %d, datalen %d\n",
+ dev->type, skb->len, cfd->len);
goto drop;
+ }
can_receive(skb, dev);
return NET_RX_SUCCESS;
@@ -746,13 +745,12 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev,
if (unlikely(!net_eq(dev_net(dev), &init_net)))
goto drop;
- if (WARN_ONCE(dev->type != ARPHRD_CAN ||
- skb->len != CANFD_MTU ||
- cfd->len > CANFD_MAX_DLEN,
- "PF_CAN: dropped non conform CAN FD skbuf: "
- "dev type %d, len %d, datalen %d\n",
- dev->type, skb->len, cfd->len))
+ if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU ||
+ cfd->len > CANFD_MAX_DLEN)) {
+ pr_warn_once("PF_CAN: dropped non conform CAN FD skbuf: dev type %d, len %d, datalen %d\n",
+ dev->type, skb->len, cfd->len);
goto drop;
+ }
can_receive(skb, dev);
return NET_RX_SUCCESS;
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index cb6dd3f294ff..71b8e4e45057 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -690,7 +690,8 @@ bad:
/*
* Do a synchronous statfs().
*/
-int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf)
+int ceph_monc_do_statfs(struct ceph_mon_client *monc, u64 data_pool,
+ struct ceph_statfs *buf)
{
struct ceph_mon_generic_request *req;
struct ceph_mon_statfs *h;
@@ -710,6 +711,7 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf)
goto out;
req->u.st = buf;
+ req->request->hdr.version = cpu_to_le16(2);
mutex_lock(&monc->mutex);
register_generic_request(req);
@@ -719,6 +721,8 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf)
h->monhdr.session_mon = cpu_to_le16(-1);
h->monhdr.session_mon_tid = 0;
h->fsid = monc->monmap->fsid;
+ h->contains_data_pool = (data_pool != CEPH_NOPOOL);
+ h->data_pool = cpu_to_le64(data_pool);
send_generic_request(monc, req);
mutex_unlock(&monc->mutex);
diff --git a/net/core/dev.c b/net/core/dev.c
index b6cb1654d4d4..c162a416632e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2890,10 +2890,21 @@ static void qdisc_pkt_len_init(struct sk_buff *skb)
hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
/* + transport layer */
- if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
- hdr_len += tcp_hdrlen(skb);
- else
- hdr_len += sizeof(struct udphdr);
+ if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
+ const struct tcphdr *th;
+ struct tcphdr _tcphdr;
+
+ th = skb_header_pointer(skb, skb_transport_offset(skb),
+ sizeof(_tcphdr), &_tcphdr);
+ if (likely(th))
+ hdr_len += __tcp_hdrlen(th);
+ } else {
+ struct udphdr _udphdr;
+
+ if (skb_header_pointer(skb, skb_transport_offset(skb),
+ sizeof(_udphdr), &_udphdr))
+ hdr_len += sizeof(struct udphdr);
+ }
if (shinfo->gso_type & SKB_GSO_DODGY)
gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index daf8c40de9d4..91c85422429c 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -6,6 +6,7 @@
#include <linux/if_vlan.h>
#include <net/ip.h>
#include <net/ipv6.h>
+#include <net/tipc.h>
#include <linux/igmp.h>
#include <linux/icmp.h>
#include <linux/sctp.h>
@@ -303,11 +304,10 @@ ipv6:
}
}
case htons(ETH_P_TIPC): {
- struct {
- __be32 pre[3];
- __be32 srcnode;
- } *hdr, _hdr;
- hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
+ struct tipc_basic_hdr *hdr, _hdr;
+
+ hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr),
+ data, hlen, &_hdr);
if (!hdr)
goto out_bad;
@@ -316,7 +316,7 @@ ipv6:
key_addrs = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_TIPC_ADDRS,
target_container);
- key_addrs->tipcaddrs.srcnode = hdr->srcnode;
+ key_addrs->tipcaddrs.srcnode = tipc_hdr_rps_key(hdr);
key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC_ADDRS;
}
goto out_good;
@@ -502,8 +502,8 @@ ip_proto_again:
out_good:
ret = true;
- key_control->thoff = (u16)nhoff;
out:
+ key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen);
key_basic->n_proto = proto;
key_basic->ip_proto = ip_proto;
@@ -511,7 +511,6 @@ out:
out_bad:
ret = false;
- key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen);
goto out;
}
EXPORT_SYMBOL(__skb_flow_dissect);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index ae92131c4f89..253c86b78ff0 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -496,7 +496,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
- hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
+ hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
if (n->parms->dead) {
rc = ERR_PTR(-EINVAL);
@@ -508,7 +508,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
n1 != NULL;
n1 = rcu_dereference_protected(n1->next,
lockdep_is_held(&tbl->lock))) {
- if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
+ if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
if (want_ref)
neigh_hold(n1);
rc = n1;
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 5e3a7302f774..7753681195c1 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -140,6 +140,9 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
ccid2_pr_debug("RTO_EXPIRE\n");
+ if (sk->sk_state == DCCP_CLOSED)
+ goto out;
+
/* back-off timer */
hc->tx_rto <<= 1;
if (hc->tx_rto > DCCP_RTO_MAX)
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 711b4dfa17c3..cb5eb649ad5f 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -223,11 +223,16 @@ static bool arp_key_eq(const struct neighbour *neigh, const void *pkey)
static int arp_constructor(struct neighbour *neigh)
{
- __be32 addr = *(__be32 *)neigh->primary_key;
+ __be32 addr;
struct net_device *dev = neigh->dev;
struct in_device *in_dev;
struct neigh_parms *parms;
+ u32 inaddr_any = INADDR_ANY;
+ if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
+ memcpy(neigh->primary_key, &inaddr_any, arp_tbl.key_len);
+
+ addr = *(__be32 *)neigh->primary_key;
rcu_read_lock();
in_dev = __in_dev_get_rcu(dev);
if (!in_dev) {
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index b736b4128613..03d199b9a3ff 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -338,7 +338,7 @@ static __be32 igmpv3_get_srcaddr(struct net_device *dev,
return htonl(INADDR_ANY);
for_ifa(in_dev) {
- if (inet_ifa_match(fl4->saddr, ifa))
+ if (fl4->saddr == ifa->ifa_local)
return fl4->saddr;
} endfor_ifa(in_dev);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4bb8973883f9..2ebacc23b145 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2176,6 +2176,9 @@ adjudge_to_death:
tcp_send_active_reset(sk, GFP_ATOMIC);
NET_INC_STATS_BH(sock_net(sk),
LINUX_MIB_TCPABORTONMEMORY);
+ } else if (!check_net(sock_net(sk))) {
+ /* Not possible to send reset; just close */
+ tcp_set_state(sk, TCP_CLOSE);
}
}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 9280e1f8b493..7c7775367773 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -46,11 +46,19 @@ static void tcp_write_err(struct sock *sk)
* to prevent DoS attacks. It is called when a retransmission timeout
* or zero probe timeout occurs on orphaned socket.
*
+ * Also close if our net namespace is exiting; in that case there is no
+ * hope of ever communicating again since all netns interfaces are already
+ * down (or about to be down), and we need to release our dst references,
+ * which have been moved to the netns loopback interface, so the namespace
+ * can finish exiting. This condition is only possible if we are a kernel
+ * socket, as those do not hold references to the namespace.
+ *
* Criteria is still not confirmed experimentally and may change.
* We kill the socket, if:
* 1. If number of orphaned sockets exceeds an administratively configured
* limit.
* 2. If we have strong memory pressure.
+ * 3. If our net namespace is exiting.
*/
static int tcp_out_of_resources(struct sock *sk, bool do_reset)
{
@@ -79,6 +87,13 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY);
return 1;
}
+
+ if (!check_net(sock_net(sk))) {
+ /* Not possible to send reset; just close */
+ tcp_done(sk);
+ return 1;
+ }
+
return 0;
}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index c08b960c7f5c..ea9aa8cc90b5 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -148,7 +148,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
-static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
+bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
{
if (!np->autoflowlabel_set)
return ip6_default_np_autolabel(net);
@@ -1246,14 +1246,16 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
v6_cork->tclass = tclass;
if (rt->dst.flags & DST_XFRM_TUNNEL)
mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
- rt->dst.dev->mtu : dst_mtu(&rt->dst);
+ READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
else
mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
- rt->dst.dev->mtu : dst_mtu(rt->dst.path);
+ READ_ONCE(rt->dst.dev->mtu) : dst_mtu(rt->dst.path);
if (np->frag_size < mtu) {
if (np->frag_size)
mtu = np->frag_size;
}
+ if (mtu < IPV6_MIN_MTU)
+ return -EINVAL;
cork->base.fragsize = mtu;
if (dst_allfrag(rt->dst.path))
cork->base.flags |= IPCORK_ALLFRAG;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index abe07c32cdbe..b4b88e391767 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -1313,7 +1313,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
break;
case IPV6_AUTOFLOWLABEL:
- val = np->autoflowlabel;
+ val = ip6_autoflowlabel(sock_net(sk), np);
break;
default:
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 0e1124c29a26..15150b412930 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -401,6 +401,11 @@ static int verify_address_len(const void *p)
#endif
int len;
+ if (sp->sadb_address_len <
+ DIV_ROUND_UP(sizeof(*sp) + offsetofend(typeof(*addr), sa_family),
+ sizeof(uint64_t)))
+ return -EINVAL;
+
switch (addr->sa_family) {
case AF_INET:
len = DIV_ROUND_UP(sizeof(*sp) + sizeof(*sin), sizeof(uint64_t));
@@ -511,6 +516,9 @@ static int parse_exthdrs(struct sk_buff *skb, const struct sadb_msg *hdr, void *
uint16_t ext_type;
int ext_len;
+ if (len < sizeof(*ehdr))
+ return -EINVAL;
+
ext_len = ehdr->sadb_ext_len;
ext_len *= sizeof(uint64_t);
ext_type = ehdr->sadb_ext_type;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 43af63b3cf88..b3e1b28deef2 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -83,7 +83,7 @@
static int sctp_writeable(struct sock *sk);
static void sctp_wfree(struct sk_buff *skb);
static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
- size_t msg_len, struct sock **orig_sk);
+ size_t msg_len);
static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p);
static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p);
static int sctp_wait_for_accept(struct sock *sk, long timeo);
@@ -332,16 +332,14 @@ static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt,
if (len < sizeof (struct sockaddr))
return NULL;
+ if (!opt->pf->af_supported(addr->sa.sa_family, opt))
+ return NULL;
+
/* V4 mapped address are really of AF_INET family */
if (addr->sa.sa_family == AF_INET6 &&
- ipv6_addr_v4mapped(&addr->v6.sin6_addr)) {
- if (!opt->pf->af_supported(AF_INET, opt))
- return NULL;
- } else {
- /* Does this PF support this AF? */
- if (!opt->pf->af_supported(addr->sa.sa_family, opt))
- return NULL;
- }
+ ipv6_addr_v4mapped(&addr->v6.sin6_addr) &&
+ !opt->pf->af_supported(AF_INET, opt))
+ return NULL;
/* If we get this far, af is valid. */
af = sctp_get_af_specific(addr->sa.sa_family);
@@ -1956,7 +1954,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
if (!sctp_wspace(asoc)) {
/* sk can be changed by peel off when waiting for buf. */
- err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len, &sk);
+ err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
if (err) {
if (err == -ESRCH) {
/* asoc is already dead. */
@@ -6978,12 +6976,12 @@ void sctp_sock_rfree(struct sk_buff *skb)
/* Helper function to wait for space in the sndbuf. */
static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
- size_t msg_len, struct sock **orig_sk)
+ size_t msg_len)
{
struct sock *sk = asoc->base.sk;
- int err = 0;
long current_timeo = *timeo_p;
DEFINE_WAIT(wait);
+ int err = 0;
pr_debug("%s: asoc:%p, timeo:%ld, msg_len:%zu\n", __func__, asoc,
*timeo_p, msg_len);
@@ -7012,17 +7010,13 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
release_sock(sk);
current_timeo = schedule_timeout(current_timeo);
lock_sock(sk);
- if (sk != asoc->base.sk) {
- release_sock(sk);
- sk = asoc->base.sk;
- lock_sock(sk);
- }
+ if (sk != asoc->base.sk)
+ goto do_error;
*timeo_p = current_timeo;
}
out:
- *orig_sk = sk;
finish_wait(&asoc->wait, &wait);
/* Release the association's refcnt. */
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 736fffb28ab6..d7c5d6028eb9 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -115,7 +115,8 @@ static int link_is_up(struct tipc_link *l)
static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
struct sk_buff_head *xmitq);
static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
- u16 rcvgap, int tolerance, int priority,
+ bool probe_reply, u16 rcvgap,
+ int tolerance, int priority,
struct sk_buff_head *xmitq);
static void link_reset_statistics(struct tipc_link *l_ptr);
static void link_print(struct tipc_link *l_ptr, const char *str);
@@ -597,7 +598,7 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
}
if (xmit)
- tipc_link_build_proto_msg(l, mtyp, prb, 0, 0, 0, xmitq);
+ tipc_link_build_proto_msg(l, mtyp, prb, 0, 0, 0, 0, xmitq);
return rc;
}
@@ -985,7 +986,7 @@ int tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
/* Unicast ACK */
l->rcv_unacked = 0;
l->stats.sent_acks++;
- tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq);
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq);
return 0;
}
@@ -998,7 +999,7 @@ void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
if (l->state == LINK_ESTABLISHING)
mtyp = ACTIVATE_MSG;
- tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, xmitq);
+ tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, 0, xmitq);
}
/* tipc_link_build_nack_msg: prepare link nack message for transmission
@@ -1012,7 +1013,7 @@ static void tipc_link_build_nack_msg(struct tipc_link *l,
return;
if ((skb_queue_len(&l->deferdq) == 1) || !(def_cnt % TIPC_NACK_INTV))
- tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq);
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq);
}
/* tipc_link_rcv - process TIPC packets/messages arriving from off-node
@@ -1085,7 +1086,8 @@ drop:
}
static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
- u16 rcvgap, int tolerance, int priority,
+ bool probe_reply, u16 rcvgap,
+ int tolerance, int priority,
struct sk_buff_head *xmitq)
{
struct sk_buff *skb = NULL;
@@ -1124,6 +1126,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
msg_set_probe(hdr, probe);
if (probe)
l->stats.sent_probes++;
+ msg_set_is_keepalive(hdr, probe || probe_reply);
l->stats.sent_states++;
l->rcv_unacked = 0;
} else {
@@ -1224,6 +1227,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
u16 peers_prio = msg_linkprio(hdr);
u16 rcv_nxt = l->rcv_nxt;
int mtyp = msg_type(hdr);
+ bool reply = msg_probe(hdr);
char *if_name;
int rc = 0;
@@ -1296,9 +1300,9 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
/* Send NACK if peer has sent pkts we haven't received yet */
if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l))
rcvgap = peers_snd_nxt - l->rcv_nxt;
- if (rcvgap || (msg_probe(hdr)))
- tipc_link_build_proto_msg(l, STATE_MSG, 0, rcvgap,
- 0, 0, xmitq);
+ if (rcvgap || reply)
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, reply,
+ rcvgap, 0, 0, xmitq);
tipc_link_release_pkts(l, ack);
/* If NACK, retransmit will now start at right position */
@@ -1667,14 +1671,14 @@ int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info)
tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]);
link->tolerance = tol;
- tipc_link_build_proto_msg(link, STATE_MSG, 0, 0, tol, 0, &xmitq);
+ tipc_link_build_proto_msg(link, STATE_MSG, 0, 0, tol, 0, 0, &xmitq);
}
if (props[TIPC_NLA_PROP_PRIO]) {
u32 prio;
prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
link->priority = prio;
- tipc_link_build_proto_msg(link, STATE_MSG, 0, 0, 0, prio, &xmitq);
+ tipc_link_build_proto_msg(link, STATE_MSG, 0, 0, 0, 0, prio, &xmitq);
}
if (props[TIPC_NLA_PROP_WIN]) {
u32 win;
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 55778a0aebf3..b22cf8628eb4 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -226,6 +226,16 @@ static inline void msg_set_dest_droppable(struct tipc_msg *m, u32 d)
msg_set_bits(m, 0, 19, 1, d);
}
+static inline int msg_is_keepalive(struct tipc_msg *m)
+{
+ return msg_bits(m, 0, 19, 1);
+}
+
+static inline void msg_set_is_keepalive(struct tipc_msg *m, u32 d)
+{
+ msg_set_bits(m, 0, 19, 1, d);
+}
+
static inline int msg_src_droppable(struct tipc_msg *m)
{
return msg_bits(m, 0, 18, 1);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 7950506395a8..ca384e699277 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -16,6 +16,7 @@
#include <linux/nl80211.h>
#include <linux/rtnetlink.h>
#include <linux/netlink.h>
+#include <linux/nospec.h>
#include <linux/etherdevice.h>
#include <net/net_namespace.h>
#include <net/genetlink.h>
@@ -1879,20 +1880,23 @@ static const struct nla_policy txq_params_policy[NL80211_TXQ_ATTR_MAX + 1] = {
static int parse_txq_params(struct nlattr *tb[],
struct ieee80211_txq_params *txq_params)
{
+ u8 ac, *idx;
+
if (!tb[NL80211_TXQ_ATTR_AC] || !tb[NL80211_TXQ_ATTR_TXOP] ||
!tb[NL80211_TXQ_ATTR_CWMIN] || !tb[NL80211_TXQ_ATTR_CWMAX] ||
!tb[NL80211_TXQ_ATTR_AIFS])
return -EINVAL;
- txq_params->ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]);
+ ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]);
txq_params->txop = nla_get_u16(tb[NL80211_TXQ_ATTR_TXOP]);
txq_params->cwmin = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMIN]);
txq_params->cwmax = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMAX]);
txq_params->aifs = nla_get_u8(tb[NL80211_TXQ_ATTR_AIFS]);
- if (txq_params->ac >= NL80211_NUM_ACS)
+ idx = array_idx(ac, NL80211_NUM_ACS);
+ if (!idx)
return -EINVAL;
-
+ txq_params->ac = *idx;
return 0;
}
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index d128d7f2d219..2fb32fa2d83e 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -158,7 +158,8 @@ cmd_cc_i_c = $(CPP) $(c_flags) -o $@ $<
$(obj)/%.i: $(src)/%.c FORCE
$(call if_changed_dep,cc_i_c)
-cmd_gensymtypes = \
+# These mirror gensymtypes_S and co below, keep them in synch.
+cmd_gensymtypes_c = \
$(CPP) -D__GENKSYMS__ $(c_flags) $< | \
$(GENKSYMS) $(if $(1), -T $(2)) \
$(patsubst y,-s _,$(CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX)) \
@@ -169,7 +170,7 @@ cmd_gensymtypes = \
quiet_cmd_cc_symtypes_c = SYM $(quiet_modtag) $@
cmd_cc_symtypes_c = \
set -e; \
- $(call cmd_gensymtypes,true,$@) >/dev/null; \
+ $(call cmd_gensymtypes_c,true,$@) >/dev/null; \
test -s $@ || rm -f $@
$(obj)/%.symtypes : $(src)/%.c FORCE
@@ -198,9 +199,10 @@ else
# the actual value of the checksum generated by genksyms
cmd_cc_o_c = $(CC) $(c_flags) -c -o $(@D)/.tmp_$(@F) $<
-cmd_modversions = \
+
+cmd_modversions_c = \
if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \
- $(call cmd_gensymtypes,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \
+ $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \
> $(@D)/.tmp_$(@F:.o=.ver); \
\
$(LD) $(LDFLAGS) -r -o $@ $(@D)/.tmp_$(@F) \
@@ -245,7 +247,7 @@ endif
define rule_cc_o_c
$(call echo-cmd,checksrc) $(cmd_checksrc) \
$(call echo-cmd,cc_o_c) $(cmd_cc_o_c); \
- $(cmd_modversions) \
+ $(cmd_modversions_c) \
$(call echo-cmd,record_mcount) \
$(cmd_record_mcount) \
scripts/basic/fixdep $(depfile) $@ '$(call make-cmd,cc_o_c)' > \
@@ -254,6 +256,15 @@ define rule_cc_o_c
mv -f $(dot-target).tmp $(dot-target).cmd
endef
+define rule_as_o_S
+ $(call echo-cmd,as_o_S) $(cmd_as_o_S); \
+ scripts/basic/fixdep $(depfile) $@ '$(call make-cmd,as_o_S)' > \
+ $(dot-target).tmp; \
+ $(cmd_modversions_S) \
+ rm -f $(depfile); \
+ mv -f $(dot-target).tmp $(dot-target).cmd
+endef
+
# Built-in and composite module parts
$(obj)/%.o: $(src)/%.c $(recordmcount_source) FORCE
$(call cmd,force_checksrc)
@@ -284,6 +295,38 @@ modkern_aflags := $(KBUILD_AFLAGS_KERNEL) $(AFLAGS_KERNEL)
$(real-objs-m) : modkern_aflags := $(KBUILD_AFLAGS_MODULE) $(AFLAGS_MODULE)
$(real-objs-m:.o=.s): modkern_aflags := $(KBUILD_AFLAGS_MODULE) $(AFLAGS_MODULE)
+# .S file exports must have their C prototypes defined in asm/asm-prototypes.h
+# or a file that it includes, in order to get versioned symbols. We build a
+# dummy C file that includes asm-prototypes and the EXPORT_SYMBOL lines from
+# the .S file (with trailing ';'), and run genksyms on that, to extract vers.
+#
+# This is convoluted. The .S file must first be preprocessed to run guards and
+# expand names, then the resulting exports must be constructed into plain
+# EXPORT_SYMBOL(symbol); to build our dummy C file, and that gets preprocessed
+# to make the genksyms input.
+#
+# These mirror gensymtypes_c and co above, keep them in synch.
+cmd_gensymtypes_S = \
+ (echo "\#include <linux/kernel.h>" ; \
+ echo "\#include <asm/asm-prototypes.h>" ; \
+ $(CPP) $(a_flags) $< | \
+ grep "\<___EXPORT_SYMBOL\>" | \
+ sed 's/.*___EXPORT_SYMBOL[[:space:]]*\([a-zA-Z0-9_]*\)[[:space:]]*,.*/EXPORT_SYMBOL(\1);/' ) | \
+ $(CPP) -D__GENKSYMS__ $(c_flags) -xc - | \
+ $(GENKSYMS) $(if $(1), -T $(2)) \
+ $(patsubst y,-s _,$(CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX)) \
+ $(if $(KBUILD_PRESERVE),-p) \
+ -r $(firstword $(wildcard $(2:.symtypes=.symref) /dev/null))
+
+quiet_cmd_cc_symtypes_S = SYM $(quiet_modtag) $@
+cmd_cc_symtypes_S = \
+ set -e; \
+ $(call cmd_gensymtypes_S,true,$@) >/dev/null; \
+ test -s $@ || rm -f $@
+
+$(obj)/%.symtypes : $(src)/%.S FORCE
+ $(call cmd,cc_symtypes_S)
+
quiet_cmd_as_s_S = CPP $(quiet_modtag) $@
cmd_as_s_S = $(CPP) $(a_flags) -o $@ $<
@@ -291,10 +334,40 @@ $(obj)/%.s: $(src)/%.S FORCE
$(call if_changed_dep,as_s_S)
quiet_cmd_as_o_S = AS $(quiet_modtag) $@
-cmd_as_o_S = $(CC) $(a_flags) -c -o $@ $<
+
+ifndef CONFIG_MODVERSIONS
+cmd_as_o_S = $(CC) $(a_flags) -c -o $@ $<
+
+else
+
+ASM_PROTOTYPES := $(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/asm-prototypes.h)
+
+ifeq ($(ASM_PROTOTYPES),)
+cmd_as_o_S = $(CC) $(a_flags) -c -o $@ $<
+
+else
+
+# versioning matches the C process described above, with difference that
+# we parse asm-prototypes.h C header to get function definitions.
+
+cmd_as_o_S = $(CC) $(a_flags) -c -o $(@D)/.tmp_$(@F) $<
+
+cmd_modversions_S = \
+ if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \
+ $(call cmd_gensymtypes_S,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \
+ > $(@D)/.tmp_$(@F:.o=.ver); \
+ \
+ $(LD) $(LDFLAGS) -r -o $@ $(@D)/.tmp_$(@F) \
+ -T $(@D)/.tmp_$(@F:.o=.ver); \
+ rm -f $(@D)/.tmp_$(@F) $(@D)/.tmp_$(@F:.o=.ver); \
+ else \
+ mv -f $(@D)/.tmp_$(@F) $@; \
+ fi;
+endif
+endif
$(obj)/%.o: $(src)/%.S FORCE
- $(call if_changed_dep,as_o_S)
+ $(call if_changed_rule,as_o_S)
targets += $(real-objs-y) $(real-objs-m) $(lib-y)
targets += $(extra-y) $(MAKECMDGOALS) $(always)
diff --git a/tools/usb/usbip/libsrc/usbip_common.c b/tools/usb/usbip/libsrc/usbip_common.c
index ac73710473de..8000445ff884 100644
--- a/tools/usb/usbip/libsrc/usbip_common.c
+++ b/tools/usb/usbip/libsrc/usbip_common.c
@@ -215,9 +215,16 @@ int read_usb_interface(struct usbip_usb_device *udev, int i,
struct usbip_usb_interface *uinf)
{
char busid[SYSFS_BUS_ID_SIZE];
+ int size;
struct udev_device *sif;
- sprintf(busid, "%s:%d.%d", udev->busid, udev->bConfigurationValue, i);
+ size = snprintf(busid, sizeof(busid), "%s:%d.%d",
+ udev->busid, udev->bConfigurationValue, i);
+ if (size < 0 || (unsigned int)size >= sizeof(busid)) {
+ err("busid length %i >= %lu or < 0", size,
+ (unsigned long)sizeof(busid));
+ return -1;
+ }
sif = udev_device_new_from_subsystem_sysname(udev_context, "usb", busid);
if (!sif) {
diff --git a/tools/usb/usbip/libsrc/usbip_host_driver.c b/tools/usb/usbip/libsrc/usbip_host_driver.c
index bef08d5c44e8..071b9ce99420 100644
--- a/tools/usb/usbip/libsrc/usbip_host_driver.c
+++ b/tools/usb/usbip/libsrc/usbip_host_driver.c
@@ -39,13 +39,19 @@ struct udev *udev_context;
static int32_t read_attr_usbip_status(struct usbip_usb_device *udev)
{
char status_attr_path[SYSFS_PATH_MAX];
+ int size;
int fd;
int length;
char status;
int value = 0;
- snprintf(status_attr_path, SYSFS_PATH_MAX, "%s/usbip_status",
- udev->path);
+ size = snprintf(status_attr_path, SYSFS_PATH_MAX, "%s/usbip_status",
+ udev->path);
+ if (size < 0 || (unsigned int)size >= sizeof(status_attr_path)) {
+ err("usbip_status path length %i >= %lu or < 0", size,
+ (unsigned long)sizeof(status_attr_path));
+ return -1;
+ }
fd = open(status_attr_path, O_RDONLY);
if (fd < 0) {
@@ -225,6 +231,7 @@ int usbip_host_export_device(struct usbip_exported_device *edev, int sockfd)
{
char attr_name[] = "usbip_sockfd";
char sockfd_attr_path[SYSFS_PATH_MAX];
+ int size;
char sockfd_buff[30];
int ret;
@@ -244,10 +251,20 @@ int usbip_host_export_device(struct usbip_exported_device *edev, int sockfd)
}
/* only the first interface is true */
- snprintf(sockfd_attr_path, sizeof(sockfd_attr_path), "%s/%s",
- edev->udev.path, attr_name);
+ size = snprintf(sockfd_attr_path, sizeof(sockfd_attr_path), "%s/%s",
+ edev->udev.path, attr_name);
+ if (size < 0 || (unsigned int)size >= sizeof(sockfd_attr_path)) {
+ err("exported device path length %i >= %lu or < 0", size,
+ (unsigned long)sizeof(sockfd_attr_path));
+ return -1;
+ }
- snprintf(sockfd_buff, sizeof(sockfd_buff), "%d\n", sockfd);
+ size = snprintf(sockfd_buff, sizeof(sockfd_buff), "%d\n", sockfd);
+ if (size < 0 || (unsigned int)size >= sizeof(sockfd_buff)) {
+ err("socket length %i >= %lu or < 0", size,
+ (unsigned long)sizeof(sockfd_buff));
+ return -1;
+ }
ret = write_sysfs_attribute(sockfd_attr_path, sockfd_buff,
strlen(sockfd_buff));
diff --git a/tools/usb/usbip/libsrc/vhci_driver.c b/tools/usb/usbip/libsrc/vhci_driver.c
index ad9204773533..1274f326242c 100644
--- a/tools/usb/usbip/libsrc/vhci_driver.c
+++ b/tools/usb/usbip/libsrc/vhci_driver.c
@@ -55,12 +55,12 @@ static int parse_status(const char *value)
while (*c != '\0') {
int port, status, speed, devid;
- unsigned long socket;
+ int sockfd;
char lbusid[SYSFS_BUS_ID_SIZE];
- ret = sscanf(c, "%d %d %d %x %lx %31s\n",
+ ret = sscanf(c, "%d %d %d %x %u %31s\n",
&port, &status, &speed,
- &devid, &socket, lbusid);
+ &devid, &sockfd, lbusid);
if (ret < 5) {
dbg("sscanf failed: %d", ret);
@@ -69,7 +69,7 @@ static int parse_status(const char *value)
dbg("port %d status %d speed %d devid %x",
port, status, speed, devid);
- dbg("socket %lx lbusid %s", socket, lbusid);
+ dbg("sockfd %u lbusid %s", sockfd, lbusid);
/* if a device is connected, look at it */
diff --git a/tools/usb/usbip/src/usbip.c b/tools/usb/usbip/src/usbip.c
index d7599d943529..73d8eee8130b 100644
--- a/tools/usb/usbip/src/usbip.c
+++ b/tools/usb/usbip/src/usbip.c
@@ -176,6 +176,8 @@ int main(int argc, char *argv[])
break;
case '?':
printf("usbip: invalid option\n");
+ /* Terminate after printing error */
+ /* FALLTHRU */
default:
usbip_usage();
goto out;