Home Home > GIT Browse > SLE12-SP3-AZURE
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKernel Build Daemon <kbuild@suse.de>2019-02-12 07:00:30 +0100
committerKernel Build Daemon <kbuild@suse.de>2019-02-12 07:00:30 +0100
commitd21c6f20057812ea77b30c331127024a76d37edf (patch)
tree381af1de7f2649b1987909d5ee7f209b8352a542
parentd27c5c184d1594f413935ce20bf509f5751133bc (diff)
parent56b47a0c581719d3fd8f1cf14621957ac2ede52e (diff)
Merge branch 'SLE12-SP3' into SLE12-SP3-AZURE
-rw-r--r--blacklist.conf3
-rw-r--r--patches.fixes/0001-drm-vmwgfx-Fix-setting-of-dma-masks.patch42
-rw-r--r--patches.fixes/0001-drm-vmwgfx-Return-error-code-from-vmw_execbuf_copy_f.patch41
-rw-r--r--patches.fixes/Bluetooth-Verify-that-l2cap_get_conf_opt-provides-la.patch2
-rw-r--r--patches.fixes/acpi-nfit-fix-ars-overflow-continuation.patch38
-rw-r--r--patches.fixes/fs-fix-lost-error-code-in-dio_complete.patch60
-rw-r--r--patches.kabi/revert-most-of-4.4.174.patch3118
-rw-r--r--patches.kernel.org/4.4.174-001-inet-frags-change-inet_frags_init_net-return-.patch157
-rw-r--r--patches.kernel.org/4.4.174-002-inet-frags-add-a-pointer-to-struct-netns_frag.patch438
-rw-r--r--patches.kernel.org/4.4.174-003-inet-frags-refactor-ipfrag_init.patch44
-rw-r--r--patches.kernel.org/4.4.174-004-inet-frags-refactor-ipv6_frag_init.patch81
-rw-r--r--patches.kernel.org/4.4.174-005-inet-frags-refactor-lowpan_net_frag_init.patch68
-rw-r--r--patches.kernel.org/4.4.174-006-rhashtable-add-rhashtable_lookup_get_insert_k.patch224
-rw-r--r--patches.kernel.org/4.4.174-007-rhashtable-Add-rhashtable_lookup.patch125
-rw-r--r--patches.kernel.org/4.4.174-008-rhashtable-add-schedule-points.patch50
-rw-r--r--patches.kernel.org/4.4.174-009-inet-frags-use-rhashtables-for-reassembly-uni.patch1354
-rw-r--r--patches.kernel.org/4.4.174-010-net-ieee802154-6lowpan-fix-frag-reassembly.patch79
-rw-r--r--patches.kernel.org/4.4.174-011-ipfrag-really-prevent-allocation-on-netns-exi.patch43
-rw-r--r--patches.kernel.org/4.4.174-012-inet-frags-remove-some-helpers.patch142
-rw-r--r--patches.kernel.org/4.4.174-013-inet-frags-get-rif-of-inet_frag_evicting.patch158
-rw-r--r--patches.kernel.org/4.4.174-014-inet-frags-remove-inet_frag_maybe_warn_overfl.patch128
-rw-r--r--patches.kernel.org/4.4.174-015-inet-frags-break-the-2GB-limit-for-frags-stor.patch276
-rw-r--r--patches.kernel.org/4.4.174-016-inet-frags-do-not-clone-skb-in-ip_expire.patch68
-rw-r--r--patches.kernel.org/4.4.174-017-ipv6-frags-rewrite-ip6_expire_frag_queue.patch82
-rw-r--r--patches.kernel.org/4.4.174-018-rhashtable-reorganize-struct-rhashtable-layou.patch57
-rw-r--r--patches.kernel.org/4.4.174-019-inet-frags-reorganize-struct-netns_frags.patch51
-rw-r--r--patches.kernel.org/4.4.174-020-inet-frags-get-rid-of-ipfrag_skb_cb-FRAG_CB.patch54
-rw-r--r--patches.kernel.org/4.4.174-021-inet-frags-fix-ip6frag_low_thresh-boundary.patch227
-rw-r--r--patches.kernel.org/4.4.174-022-ip-discard-IPv4-datagrams-with-overlapping-se.patch159
-rw-r--r--patches.kernel.org/4.4.174-023-net-modify-skb_rbtree_purge-to-return-the-tru.patch75
-rw-r--r--patches.kernel.org/4.4.174-024-ipv6-defrag-drop-non-last-frags-smaller-than-.patch66
-rw-r--r--patches.kernel.org/4.4.174-025-net-pskb_trim_rcsum-and-CHECKSUM_COMPLETE-are.patch81
-rw-r--r--patches.kernel.org/4.4.174-026-ip-use-rb-trees-for-IP-frag-queue.patch483
-rw-r--r--patches.kernel.org/4.4.174-027-ip-add-helpers-to-process-in-order-fragments-.patch172
-rw-r--r--patches.kernel.org/4.4.174-028-ip-process-in-order-fragments-efficiently.patch277
-rw-r--r--patches.kernel.org/4.4.174-029-ip-frags-fix-crash-in-ip_do_fragment.patch118
-rw-r--r--patches.kernel.org/4.4.174-030-ipv4-frags-precedence-bug-in-ip_expire.patch39
-rw-r--r--patches.kernel.org/4.4.174-031-inet-frags-better-deal-with-smp-races.patch94
-rw-r--r--patches.kernel.org/4.4.174-032-net-fix-pskb_trim_rcsum_slow-with-odd-trim-of.patch54
-rw-r--r--patches.kernel.org/4.4.174-033-net-ipv4-do-not-handle-duplicate-fragments-as.patch104
-rw-r--r--patches.kernel.org/4.4.174-034-rcu-Force-boolean-subscript-for-expedited-sta.patch38
-rw-r--r--patches.kernel.org/4.4.174-035-Linux-4.4.174.patch27
-rw-r--r--patches.suse/0002-rcu-Add-more-diagnostics-to-expedited-stall-warning-.patch15
-rw-r--r--series.conf40
44 files changed, 9041 insertions, 11 deletions
diff --git a/blacklist.conf b/blacklist.conf
index 1e49e08522..f60e08b4b1 100644
--- a/blacklist.conf
+++ b/blacklist.conf
@@ -584,3 +584,6 @@ dd6251e463d3d8ea55ac2c5944e24bd6ed8f423b # Since gic implementation_rev missing
0207df4fa1a869281ddbf72db6203dbf036b3e1a # KASAN-specific and not enabled on SLE
8a68d3da50b952232bbb39f7582a9050c40a0d78 # Not a bug
b31a8cc1a53dda3a33b6c9c62779869d4d5fc142 # the test is not in 4.4 yet
+6e00f7dd5e4edc2443f030b226f66fe4f1267667 # 3e67f106f619 was reverted
+e8c8b53ccaff568fef4c13a6ccaf08bf241aa01a # 88078d98d1bb was reverted
+acc93d30d7d43f428272c20a047389c4cbca82ba # Possibly breaking existing setups
diff --git a/patches.fixes/0001-drm-vmwgfx-Fix-setting-of-dma-masks.patch b/patches.fixes/0001-drm-vmwgfx-Fix-setting-of-dma-masks.patch
new file mode 100644
index 0000000000..f096e24b5b
--- /dev/null
+++ b/patches.fixes/0001-drm-vmwgfx-Fix-setting-of-dma-masks.patch
@@ -0,0 +1,42 @@
+From 4cbfa1e6c09e98450aab3240e5119b0ab2c9795b Mon Sep 17 00:00:00 2001
+From: Thomas Hellstrom <thellstrom@vmware.com>
+Date: Mon, 28 Jan 2019 10:31:33 +0100
+Subject: drm/vmwgfx: Fix setting of dma masks
+Git-commit: 4cbfa1e6c09e98450aab3240e5119b0ab2c9795b
+Patch-mainline: v5.0-rc6
+References: bsc#1106929
+
+Previously we set only the dma mask and not the coherent mask. Fix that.
+Also, for clarity, make sure both are initially set to 64 bits.
+
+Cc: <stable@vger.kernel.org>
+Fixes: 0d00c488f3de: ("drm/vmwgfx: Fix the driver for large dma addresses")
+Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
+Reviewed-by: Deepak Rawat <drawat@vmware.com>
+Acked-by: Thomas Zimmermann <tzimmermann@suse.de>
+---
+ drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+@@ -594,13 +594,16 @@ out_fixup:
+ static int vmw_dma_masks(struct vmw_private *dev_priv)
+ {
+ struct drm_device *dev = dev_priv->dev;
++ int ret = 0;
+
+- if (intel_iommu_enabled &&
++ ret = dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64));
++ if (dev_priv->map_mode != vmw_dma_phys &&
+ (sizeof(unsigned long) == 4 || vmw_restrict_dma_mask)) {
+ DRM_INFO("Restricting DMA addresses to 44 bits.\n");
+- return dma_set_mask(dev->dev, DMA_BIT_MASK(44));
++ return dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(44));
+ }
+- return 0;
++
++ return ret;
+ }
+ #else
+ static int vmw_dma_masks(struct vmw_private *dev_priv)
diff --git a/patches.fixes/0001-drm-vmwgfx-Return-error-code-from-vmw_execbuf_copy_f.patch b/patches.fixes/0001-drm-vmwgfx-Return-error-code-from-vmw_execbuf_copy_f.patch
new file mode 100644
index 0000000000..e7bbade1f0
--- /dev/null
+++ b/patches.fixes/0001-drm-vmwgfx-Return-error-code-from-vmw_execbuf_copy_f.patch
@@ -0,0 +1,41 @@
+From 728354c005c36eaf44b6e5552372b67e60d17f56 Mon Sep 17 00:00:00 2001
+From: Thomas Hellstrom <thellstrom@vmware.com>
+Date: Thu, 31 Jan 2019 10:55:37 +0100
+Subject: drm/vmwgfx: Return error code from vmw_execbuf_copy_fence_user
+Git-commit: 728354c005c36eaf44b6e5552372b67e60d17f56
+Patch-mainline: v5.0-rc6
+References: bsc#1106929
+
+The function was unconditionally returning 0, and a caller would have to
+rely on the returned fence pointer being NULL to detect errors. However,
+the function vmw_execbuf_copy_fence_user() would expect a non-zero error
+code in that case and would BUG otherwise.
+
+So make sure we return a proper non-zero error code if the fence pointer
+returned is NULL.
+
+Cc: <stable@vger.kernel.org>
+Fixes: ae2a104058e2: ("vmwgfx: Implement fence objects")
+Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
+Reviewed-by: Deepak Rawat <drawat@vmware.com>
+Acked-by: Thomas Zimmermann <tzimmermann@suse.de>
+---
+ drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+index f2d13a72c05d..88b8178d4687 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+@@ -3570,7 +3570,7 @@ int vmw_execbuf_fence_commands(struct drm_file *file_priv,
+ *p_fence = NULL;
+ }
+
+- return 0;
++ return ret;
+ }
+
+ /**
+--
+2.20.1
+
diff --git a/patches.fixes/Bluetooth-Verify-that-l2cap_get_conf_opt-provides-la.patch b/patches.fixes/Bluetooth-Verify-that-l2cap_get_conf_opt-provides-la.patch
index a10d72b350..2e34b93543 100644
--- a/patches.fixes/Bluetooth-Verify-that-l2cap_get_conf_opt-provides-la.patch
+++ b/patches.fixes/Bluetooth-Verify-that-l2cap_get_conf_opt-provides-la.patch
@@ -5,7 +5,7 @@ Subject: [PATCH] Bluetooth: Verify that l2cap_get_conf_opt provides large
Patch-mainline: Queued in subsystem maintainer repository
Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth-next.git
Git-commit: 7c9cbd0b5e38a1672fcd137894ace3b042dfbf69
-References: bsc#1120758 CVE-2019-3459 CVE-2019-3560
+References: bsc#1120758 CVE-2019-3459 CVE-2019-3460
The function l2cap_get_conf_opt will return L2CAP_CONF_OPT_SIZE + opt->len
as length value. The opt->len however is in control over the remote user
diff --git a/patches.fixes/acpi-nfit-fix-ars-overflow-continuation.patch b/patches.fixes/acpi-nfit-fix-ars-overflow-continuation.patch
new file mode 100644
index 0000000000..31addf1320
--- /dev/null
+++ b/patches.fixes/acpi-nfit-fix-ars-overflow-continuation.patch
@@ -0,0 +1,38 @@
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Thu, 1 Nov 2018 00:30:22 -0700
+Subject: acpi, nfit: Fix ARS overflow continuation
+Git-commit: 3fa58dcab50a0aa16817f16a8d38aee869eb3fb9
+Patch-mainline: v4.20-rc3
+References: bsc#1125000
+
+When the platform BIOS is unable to report all the media error records
+it requires the OS to restart the scrub at a prescribed location. The
+driver detects the overflow condition, but then fails to report it to
+the ARS state machine after reaping the records. Propagate -ENOSPC
+correctly to continue the ARS operation.
+
+Cc: <stable@vger.kernel.org>
+Fixes: 1cf03c00e7c1 ("nfit: scrub and register regions in a workqueue")
+Reported-by: Jacek Zloch <jacek.zloch@intel.com>
+Reviewed-by: Dave Jiang <dave.jiang@intel.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Acked-by: Johannes Thumshirn <jthumshirn@suse.de>
+---
+ drivers/acpi/nfit/core.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/acpi/nfit/core.c
++++ b/drivers/acpi/nfit/core.c
+@@ -2327,9 +2327,9 @@ static int acpi_nfit_query_poison(struct
+ return rc;
+
+ if (ars_status_process_records(acpi_desc, acpi_desc->ars_status))
+- return -ENOMEM;
++ dev_err(acpi_desc->dev, "Failed to process ARS records\n");
+
+- return 0;
++ return rc;
+ }
+
+ static void acpi_nfit_async_scrub(struct acpi_nfit_desc *acpi_desc,
+
diff --git a/patches.fixes/fs-fix-lost-error-code-in-dio_complete.patch b/patches.fixes/fs-fix-lost-error-code-in-dio_complete.patch
new file mode 100644
index 0000000000..b5ee999ca7
--- /dev/null
+++ b/patches.fixes/fs-fix-lost-error-code-in-dio_complete.patch
@@ -0,0 +1,60 @@
+From 41e817bca3acd3980efe5dd7d28af0e6f4ab9247 Mon Sep 17 00:00:00 2001
+From: Maximilian Heyne <mheyne@amazon.de>
+Date: Fri, 30 Nov 2018 08:35:14 -0700
+Subject: [PATCH] fs: fix lost error code in dio_complete
+Git-commit: 41e817bca3acd3980efe5dd7d28af0e6f4ab9247
+Patch-mainline: v4.20-rc5
+References: bsc#1117744
+
+commit e259221763a40403d5bb232209998e8c45804ab8 ("fs: simplify the
+generic_write_sync prototype") reworked callers of generic_write_sync(),
+and ended up dropping the error return for the directio path. Prior to
+that commit, in dio_complete(), an error would be bubbled up the stack,
+but after that commit, errors passed on to dio_complete were eaten up.
+
+This was reported on the list earlier, and a fix was proposed in
+https://lore.kernel.org/lkml/20160921141539.GA17898@infradead.org/, but
+never followed up with. We recently hit this bug in our testing where
+fencing io errors, which were previously erroring out with EIO, were
+being returned as success operations after this commit.
+
+The fix proposed on the list earlier was a little short -- it would have
+still called generic_write_sync() in case `ret` already contained an
+error. This fix ensures generic_write_sync() is only called when there's
+no pending error in the write. Additionally, transferred is replaced
+with ret to bring this code in line with other callers.
+
+Fixes: e259221763a4 ("fs: simplify the generic_write_sync prototype")
+Reported-by: Ravi Nankani <rnankani@amazon.com>
+Signed-off-by: Maximilian Heyne <mheyne@amazon.de>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Cc: Torsten Mehlan <tomeh@amazon.de>
+Cc: Uwe Dannowski <uwed@amazon.de>
+Cc: Amit Shah <aams@amazon.de>
+Cc: David Woodhouse <dwmw@amazon.co.uk>
+Cc: stable@vger.kernel.org
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Acked-by: Jan Kara <jack@suse.cz>
+
+---
+ fs/direct-io.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/fs/direct-io.c b/fs/direct-io.c
+index 722d17c88edb..41a0e97252ae 100644
+--- a/fs/direct-io.c
++++ b/fs/direct-io.c
+@@ -325,8 +325,8 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
+ */
+ dio->iocb->ki_pos += transferred;
+
+- if (dio->op == REQ_OP_WRITE)
+- ret = generic_write_sync(dio->iocb, transferred);
++ if (ret > 0 && dio->op == REQ_OP_WRITE)
++ ret = generic_write_sync(dio->iocb, ret);
+ dio->iocb->ki_complete(dio->iocb, ret, 0);
+ }
+
+--
+2.16.4
+
diff --git a/patches.kabi/revert-most-of-4.4.174.patch b/patches.kabi/revert-most-of-4.4.174.patch
new file mode 100644
index 0000000000..4e28ab44df
--- /dev/null
+++ b/patches.kabi/revert-most-of-4.4.174.patch
@@ -0,0 +1,3118 @@
+From: Jiri Slaby <jslaby@suse.cz>
+Date: Fri, 8 Feb 2019 16:44:49 +0100
+Subject: Revert most of 4.4.174.
+Patch-mainline: never, kabi
+References: kabi
+
+4.4.174 brought FragmentSmack (CVE-2018-5391) upstream fixes. They
+completely broke kABI. We have a different (and decent) fix in our tree,
+so revert this huge net stack rework.
+
+In particular, we revert these upstream commits:
+ade446403bfb (net: ipv4: do not handle duplicate fragments as overlapping)
+d55bef5059dd (net: fix pskb_trim_rcsum_slow() with odd trim offset)
+0d5b9311baf2 (inet: frags: better deal with smp races)
+70837ffe3085 (ipv4: frags: precedence bug in ip_expire())
+5d407b071dc3 (ip: frags: fix crash in ip_do_fragment())
+a4fd284a1f8f (ip: process in-order fragments efficiently)
+353c9cb36087 (ip: add helpers to process in-order fragments faster.)
+fa0f527358bd (ip: use rb trees for IP frag queue.)
+88078d98d1bb (net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends)
+0ed4229b08c1 (ipv6: defrag: drop non-last frags smaller than min mtu)
+385114dec8a4 (net: modify skb_rbtree_purge to return the truesize of all purged skbs.)
+7969e5c40dfd (ip: discard IPv4 datagrams with overlapping segments.)
+3d23401283e8 (inet: frags: fix ip6frag_low_thresh boundary)
+bf66337140c6 (inet: frags: get rid of ipfrag_skb_cb/FRAG_CB)
+c2615cf5a761 (inet: frags: reorganize struct netns_frags)
+e5d672a0780d (rhashtable: reorganize struct rhashtable layout)
+05c0b86b9696 (ipv6: frags: rewrite ip6_expire_frag_queue())
+1eec5d567008 (inet: frags: do not clone skb in ip_expire())
+3e67f106f619 (inet: frags: break the 2GB limit for frags storage)
+2d44ed22e607 (inet: frags: remove inet_frag_maybe_warn_overflow())
+399d1404be66 (inet: frags: get rif of inet_frag_evicting())
+6befe4a78b15 (inet: frags: remove some helpers)
+f6f2a4a2eb92 (ipfrag: really prevent allocation on netns exit)
+f18fa5de5ba7 (net: ieee802154: 6lowpan: fix frag reassembly)
+648700f76b03 (inet: frags: use rhashtables for reassembly units)
+ae6da1f503ab (rhashtable: add schedule points)
+ca26893f05e8 (rhashtable: Add rhashtable_lookup())
+5ca8cc5bf11f (rhashtable: add rhashtable_lookup_get_insert_key())
+807f1844df4a (inet: frags: refactor lowpan_net_frag_init())
+5b975bab2361 (inet: frags: refactor ipv6_frag_init())
+483a6e4fa055 (inet: frags: refactor ipfrag_init())
+093ba72914b6 (inet: frags: add a pointer to struct netns_frags)
+5eb2471ef43e (inet: frags: change inet_frags_init_net() return value)
+
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ Documentation/networking/ip-sysctl.txt | 13 +-
+ include/linux/rhashtable.h | 143 ++----
+ include/linux/skbuff.h | 16 +-
+ include/net/inet_frag.h | 133 +++---
+ include/net/ip.h | 1 +
+ include/net/ipv6.h | 26 +-
+ include/uapi/linux/snmp.h | 1 -
+ lib/rhashtable.c | 15 +-
+ net/core/skbuff.c | 21 +-
+ net/ieee802154/6lowpan/6lowpan_i.h | 26 +-
+ net/ieee802154/6lowpan/reassembly.c | 148 +++---
+ net/ipv4/inet_fragment.c | 389 ++++++++++++----
+ net/ipv4/ip_fragment.c | 571 +++++++++++-------------
+ net/ipv4/proc.c | 7 +-
+ net/ipv6/netfilter/nf_conntrack_reasm.c | 100 +++--
+ net/ipv6/proc.c | 5 +-
+ net/ipv6/reassembly.c | 209 +++++----
+ 17 files changed, 959 insertions(+), 865 deletions(-)
+
+diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
+index 7c229f59016f..2ea4c45cf1c8 100644
+--- a/Documentation/networking/ip-sysctl.txt
++++ b/Documentation/networking/ip-sysctl.txt
+@@ -112,11 +112,14 @@ min_adv_mss - INTEGER
+
+ IP Fragmentation:
+
+-ipfrag_high_thresh - LONG INTEGER
+- Maximum memory used to reassemble IP fragments.
+-
+-ipfrag_low_thresh - LONG INTEGER
+- (Obsolete since linux-4.17)
++ipfrag_high_thresh - INTEGER
++ Maximum memory used to reassemble IP fragments. When
++ ipfrag_high_thresh bytes of memory is allocated for this purpose,
++ the fragment handler will toss packets until ipfrag_low_thresh
++ is reached. This also serves as a maximum limit to namespaces
++ different from the initial one.
++
++ipfrag_low_thresh - INTEGER
+ Maximum memory used to reassemble IP fragments before the kernel
+ begins to remove incomplete fragment queues to free up resources.
+ The kernel still accepts new fragments for defragmentation.
+diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
+index e97cdfd6cba9..e50b31d18462 100644
+--- a/include/linux/rhashtable.h
++++ b/include/linux/rhashtable.h
+@@ -133,23 +133,23 @@ struct rhashtable_params {
+ /**
+ * struct rhashtable - Hash table handle
+ * @tbl: Bucket table
++ * @nelems: Number of elements in table
+ * @key_len: Key length for hashfn
+ * @elasticity: Maximum chain length before rehash
+ * @p: Configuration parameters
+ * @run_work: Deferred worker to expand/shrink asynchronously
+ * @mutex: Mutex to protect current/future table swapping
+ * @lock: Spin lock to protect walker list
+- * @nelems: Number of elements in table
+ */
+ struct rhashtable {
+ struct bucket_table __rcu *tbl;
++ atomic_t nelems;
+ unsigned int key_len;
+ unsigned int elasticity;
+ struct rhashtable_params p;
+ struct work_struct run_work;
+ struct mutex mutex;
+ spinlock_t lock;
+- atomic_t nelems;
+ };
+
+ /**
+@@ -343,8 +343,7 @@ int rhashtable_init(struct rhashtable *ht,
+ struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht,
+ const void *key,
+ struct rhash_head *obj,
+- struct bucket_table *old_tbl,
+- void **data);
++ struct bucket_table *old_tbl);
+ int rhashtable_insert_rehash(struct rhashtable *ht, struct bucket_table *tbl);
+
+ int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter);
+@@ -515,8 +514,18 @@ static inline int rhashtable_compare(struct rhashtable_compare_arg *arg,
+ return memcmp(ptr + ht->p.key_offset, arg->key, ht->p.key_len);
+ }
+
+-/* Internal function, do not use. */
+-static inline struct rhash_head *__rhashtable_lookup(
++/**
++ * rhashtable_lookup_fast - search hash table, inlined version
++ * @ht: hash table
++ * @key: the pointer to the key
++ * @params: hash table parameters
++ *
++ * Computes the hash value for the key and traverses the bucket chain looking
++ * for a entry with an identical key. The first matching entry is returned.
++ *
++ * Returns the first entry on which the compare function returned true.
++ */
++static inline void *rhashtable_lookup_fast(
+ struct rhashtable *ht, const void *key,
+ const struct rhashtable_params params)
+ {
+@@ -528,6 +537,8 @@ static inline struct rhash_head *__rhashtable_lookup(
+ struct rhash_head *he;
+ unsigned int hash;
+
++ rcu_read_lock();
++
+ tbl = rht_dereference_rcu(ht->tbl, ht);
+ restart:
+ hash = rht_key_hashfn(ht, tbl, key, params);
+@@ -536,7 +547,8 @@ restart:
+ params.obj_cmpfn(&arg, rht_obj(ht, he)) :
+ rhashtable_compare(&arg, rht_obj(ht, he)))
+ continue;
+- return he;
++ rcu_read_unlock();
++ return rht_obj(ht, he);
+ }
+
+ /* Ensure we see any new tables. */
+@@ -545,64 +557,13 @@ restart:
+ tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+ if (unlikely(tbl))
+ goto restart;
+-
+- return NULL;
+-}
+-
+-/**
+- * rhashtable_lookup - search hash table
+- * @ht: hash table
+- * @key: the pointer to the key
+- * @params: hash table parameters
+- *
+- * Computes the hash value for the key and traverses the bucket chain looking
+- * for a entry with an identical key. The first matching entry is returned.
+- *
+- * This must only be called under the RCU read lock.
+- *
+- * Returns the first entry on which the compare function returned true.
+- */
+-static inline void *rhashtable_lookup(
+- struct rhashtable *ht, const void *key,
+- const struct rhashtable_params params)
+-{
+- struct rhash_head *he = __rhashtable_lookup(ht, key, params);
+-
+- return he ? rht_obj(ht, he) : NULL;
+-}
+-
+-/**
+- * rhashtable_lookup_fast - search hash table, without RCU read lock
+- * @ht: hash table
+- * @key: the pointer to the key
+- * @params: hash table parameters
+- *
+- * Computes the hash value for the key and traverses the bucket chain looking
+- * for a entry with an identical key. The first matching entry is returned.
+- *
+- * Only use this function when you have other mechanisms guaranteeing
+- * that the object won't go away after the RCU read lock is released.
+- *
+- * Returns the first entry on which the compare function returned true.
+- */
+-static inline void *rhashtable_lookup_fast(
+- struct rhashtable *ht, const void *key,
+- const struct rhashtable_params params)
+-{
+- void *obj;
+-
+- rcu_read_lock();
+- obj = rhashtable_lookup(ht, key, params);
+ rcu_read_unlock();
+
+- return obj;
++ return NULL;
+ }
+
+-/* Internal function, please use rhashtable_insert_fast() instead. This
+- * function returns the existing element already in hashes in there is a clash,
+- * otherwise it returns an error via ERR_PTR().
+- */
+-static inline void *__rhashtable_insert_fast(
++/* Internal function, please use rhashtable_insert_fast() instead */
++static inline int __rhashtable_insert_fast(
+ struct rhashtable *ht, const void *key, struct rhash_head *obj,
+ const struct rhashtable_params params)
+ {
+@@ -615,7 +576,6 @@ static inline void *__rhashtable_insert_fast(
+ spinlock_t *lock;
+ unsigned int elasticity;
+ unsigned int hash;
+- void *data = NULL;
+ int err;
+
+ restart:
+@@ -640,14 +600,11 @@ restart:
+
+ new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+ if (unlikely(new_tbl)) {
+- tbl = rhashtable_insert_slow(ht, key, obj, new_tbl, &data);
++ tbl = rhashtable_insert_slow(ht, key, obj, new_tbl);
+ if (!IS_ERR_OR_NULL(tbl))
+ goto slow_path;
+
+ err = PTR_ERR(tbl);
+- if (err == -EEXIST)
+- err = 0;
+-
+ goto out;
+ }
+
+@@ -661,25 +618,25 @@ slow_path:
+ err = rhashtable_insert_rehash(ht, tbl);
+ rcu_read_unlock();
+ if (err)
+- return ERR_PTR(err);
++ return err;
+
+ goto restart;
+ }
+
+- err = 0;
++ err = -EEXIST;
+ elasticity = ht->elasticity;
+ rht_for_each(head, tbl, hash) {
+ if (key &&
+ unlikely(!(params.obj_cmpfn ?
+ params.obj_cmpfn(&arg, rht_obj(ht, head)) :
+- rhashtable_compare(&arg, rht_obj(ht, head))))) {
+- data = rht_obj(ht, head);
++ rhashtable_compare(&arg, rht_obj(ht, head)))))
+ goto out;
+- }
+ if (!--elasticity)
+ goto slow_path;
+ }
+
++ err = 0;
++
+ head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
+
+ RCU_INIT_POINTER(obj->next, head);
+@@ -694,7 +651,7 @@ out:
+ spin_unlock_bh(lock);
+ rcu_read_unlock();
+
+- return err ? ERR_PTR(err) : data;
++ return err;
+ }
+
+ /**
+@@ -717,13 +674,7 @@ static inline int rhashtable_insert_fast(
+ struct rhashtable *ht, struct rhash_head *obj,
+ const struct rhashtable_params params)
+ {
+- void *ret;
+-
+- ret = __rhashtable_insert_fast(ht, NULL, obj, params);
+- if (IS_ERR(ret))
+- return PTR_ERR(ret);
+-
+- return ret == NULL ? 0 : -EEXIST;
++ return __rhashtable_insert_fast(ht, NULL, obj, params);
+ }
+
+ /**
+@@ -752,15 +703,11 @@ static inline int rhashtable_lookup_insert_fast(
+ const struct rhashtable_params params)
+ {
+ const char *key = rht_obj(ht, obj);
+- void *ret;
+
+ BUG_ON(ht->p.obj_hashfn);
+
+- ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params);
+- if (IS_ERR(ret))
+- return PTR_ERR(ret);
+-
+- return ret == NULL ? 0 : -EEXIST;
++ return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj,
++ params);
+ }
+
+ /**
+@@ -788,32 +735,6 @@ static inline int rhashtable_lookup_insert_fast(
+ static inline int rhashtable_lookup_insert_key(
+ struct rhashtable *ht, const void *key, struct rhash_head *obj,
+ const struct rhashtable_params params)
+-{
+- void *ret;
+-
+- BUG_ON(!ht->p.obj_hashfn || !key);
+-
+- ret = __rhashtable_insert_fast(ht, key, obj, params);
+- if (IS_ERR(ret))
+- return PTR_ERR(ret);
+-
+- return ret == NULL ? 0 : -EEXIST;
+-}
+-
+-/**
+- * rhashtable_lookup_get_insert_key - lookup and insert object into hash table
+- * @ht: hash table
+- * @obj: pointer to hash head inside object
+- * @params: hash table parameters
+- * @data: pointer to element data already in hashes
+- *
+- * Just like rhashtable_lookup_insert_key(), but this function returns the
+- * object if it exists, NULL if it does not and the insertion was successful,
+- * and an ERR_PTR otherwise.
+- */
+-static inline void *rhashtable_lookup_get_insert_key(
+- struct rhashtable *ht, const void *key, struct rhash_head *obj,
+- const struct rhashtable_params params)
+ {
+ BUG_ON(!ht->p.obj_hashfn || !key);
+
+diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
+index 502787c29ce9..6d39d81d3c38 100644
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -556,14 +556,9 @@ struct sk_buff {
+ struct skb_mstamp skb_mstamp;
+ };
+ };
+- struct rb_node rbnode; /* used in netem, ip4 defrag, and tcp stack */
++ struct rb_node rbnode; /* used in netem & tcp stack */
+ };
+-
+- union {
+- struct sock *sk;
+- int ip_defrag_offset;
+- };
+-
++ struct sock *sk;
+ struct net_device *dev;
+
+ /*
+@@ -2278,7 +2273,7 @@ static inline void __skb_queue_purge(struct sk_buff_head *list)
+ kfree_skb(skb);
+ }
+
+-unsigned int skb_rbtree_purge(struct rb_root *root);
++void skb_rbtree_purge(struct rb_root *root);
+
+ void *netdev_alloc_frag(unsigned int fragsz);
+
+@@ -2796,7 +2791,6 @@ static inline unsigned char *skb_push_rcsum(struct sk_buff *skb,
+ return skb->data;
+ }
+
+-int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len);
+ /**
+ * pskb_trim_rcsum - trim received skb and update checksum
+ * @skb: buffer to trim
+@@ -2811,7 +2805,9 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
+ {
+ if (likely(len >= skb->len))
+ return 0;
+- return pskb_trim_rcsum_slow(skb, len);
++ if (skb->ip_summed == CHECKSUM_COMPLETE)
++ skb->ip_summed = CHECKSUM_NONE;
++ return __pskb_trim(skb, len);
+ }
+
+ #define rb_to_skb(rb) rb_entry_safe(rb, struct sk_buff, rbnode)
+diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
+index 6260ec146142..c26a6e4dc306 100644
+--- a/include/net/inet_frag.h
++++ b/include/net/inet_frag.h
+@@ -1,19 +1,13 @@
+ #ifndef __NET_FRAG_H__
+ #define __NET_FRAG_H__
+
+-#include <linux/rhashtable.h>
+-
+ struct netns_frags {
++ /* Keep atomic mem on separate cachelines in structs that include it */
++ atomic_t mem ____cacheline_aligned_in_smp;
+ /* sysctls */
+- long high_thresh;
+- long low_thresh;
+ int timeout;
+- struct inet_frags *f;
+-
+- struct rhashtable rhashtable ____cacheline_aligned_in_smp;
+-
+- /* Keep atomic mem on separate cachelines in structs that include it */
+- atomic_long_t mem ____cacheline_aligned_in_smp;
++ int high_thresh;
++ int low_thresh;
+ };
+
+ /**
+@@ -29,68 +23,74 @@ enum {
+ INET_FRAG_COMPLETE = BIT(2),
+ };
+
+-struct frag_v4_compare_key {
+- __be32 saddr;
+- __be32 daddr;
+- u32 user;
+- u32 vif;
+- __be16 id;
+- u16 protocol;
+-};
+-
+-struct frag_v6_compare_key {
+- struct in6_addr saddr;
+- struct in6_addr daddr;
+- u32 user;
+- __be32 id;
+- u32 iif;
+-};
+-
+ /**
+ * struct inet_frag_queue - fragment queue
+ *
+- * @node: rhash node
+- * @key: keys identifying this frag.
++ * @lock: spinlock protecting the queue
+ * @timer: queue expiration timer
+- * @lock: spinlock protecting this frag
++ * @list: hash bucket list
+ * @refcnt: reference count of the queue
+ * @fragments: received fragments head
+- * @rb_fragments: received fragments rb-tree root
+ * @fragments_tail: received fragments tail
+- * @last_run_head: the head of the last "run". see ip_fragment.c
+ * @stamp: timestamp of the last received fragment
+ * @len: total length of the original datagram
+ * @meat: length of received fragments so far
+ * @flags: fragment queue flags
+ * @max_size: maximum received fragment size
+ * @net: namespace that this frag belongs to
+- * @rcu: rcu head for freeing deferall
++ * @list_evictor: list of queues to forcefully evict (e.g. due to low memory)
+ */
+ struct inet_frag_queue {
+- struct rhash_head node;
+- union {
+- struct frag_v4_compare_key v4;
+- struct frag_v6_compare_key v6;
+- } key;
+- struct timer_list timer;
+ spinlock_t lock;
++ struct timer_list timer;
++ struct hlist_node list;
+ atomic_t refcnt;
+- struct sk_buff *fragments; /* Used in IPv6. */
+- struct rb_root rb_fragments; /* Used in IPv4. */
++ struct sk_buff *fragments;
+ struct sk_buff *fragments_tail;
+- struct sk_buff *last_run_head;
+ ktime_t stamp;
+ int len;
+ int meat;
+ __u8 flags;
+ u16 max_size;
+- struct netns_frags *net;
+- struct rcu_head rcu;
++ struct netns_frags *net;
++ struct hlist_node list_evictor;
++};
++
++#define INETFRAGS_HASHSZ 1024
++
++/* averaged:
++ * max_depth = default ipfrag_high_thresh / INETFRAGS_HASHSZ /
++ * rounded up (SKB_TRUELEN(0) + sizeof(struct ipq or
++ * struct frag_queue))
++ */
++#define INETFRAGS_MAXDEPTH 128
++
++struct inet_frag_bucket {
++ struct hlist_head chain;
++ spinlock_t chain_lock;
+ };
+
+ struct inet_frags {
++ struct inet_frag_bucket hash[INETFRAGS_HASHSZ];
++
++ struct work_struct frags_work;
++ unsigned int next_bucket;
++ unsigned long last_rebuild_jiffies;
++ bool rebuild;
++
++ /* The first call to hashfn is responsible to initialize
++ * rnd. This is best done with net_get_random_once.
++ *
++ * rnd_seqlock is used to let hash insertion detect
++ * when it needs to re-lookup the hash chain to use.
++ */
++ u32 rnd;
++ seqlock_t rnd_seqlock;
+ int qsize;
+
++ unsigned int (*hashfn)(const struct inet_frag_queue *);
++ bool (*match)(const struct inet_frag_queue *q,
++ const void *arg);
+ void (*constructor)(struct inet_frag_queue *q,
+ const void *arg);
+ void (*destructor)(struct inet_frag_queue *);
+@@ -98,47 +98,56 @@ struct inet_frags {
+ void (*frag_expire)(unsigned long data);
+ struct kmem_cache *frags_cachep;
+ const char *frags_cache_name;
+- struct rhashtable_params rhash_params;
+ };
+
+ int inet_frags_init(struct inet_frags *);
+ void inet_frags_fini(struct inet_frags *);
+
+-static inline int inet_frags_init_net(struct netns_frags *nf)
++static inline void inet_frags_init_net(struct netns_frags *nf)
+ {
+- atomic_long_set(&nf->mem, 0);
+- return rhashtable_init(&nf->rhashtable, &nf->f->rhash_params);
++ atomic_set(&nf->mem, 0);
+ }
+-void inet_frags_exit_net(struct netns_frags *nf);
++void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f);
+
+-void inet_frag_kill(struct inet_frag_queue *q);
+-void inet_frag_destroy(struct inet_frag_queue *q);
+-struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key);
++void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f);
++void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f);
++struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
++ struct inet_frags *f, void *key, unsigned int hash);
+
+-/* Free all skbs in the queue; return the sum of their truesizes. */
+-unsigned int inet_frag_rbtree_purge(struct rb_root *root);
++void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
++ const char *prefix);
+
+-static inline void inet_frag_put(struct inet_frag_queue *q)
++static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f)
+ {
+ if (atomic_dec_and_test(&q->refcnt))
+- inet_frag_destroy(q);
++ inet_frag_destroy(q, f);
++}
++
++static inline bool inet_frag_evicting(struct inet_frag_queue *q)
++{
++ return !hlist_unhashed(&q->list_evictor);
+ }
+
+ /* Memory Tracking Functions. */
+
+-static inline long frag_mem_limit(const struct netns_frags *nf)
++static inline int frag_mem_limit(struct netns_frags *nf)
++{
++ return atomic_read(&nf->mem);
++}
++
++static inline void sub_frag_mem_limit(struct netns_frags *nf, int i)
+ {
+- return atomic_long_read(&nf->mem);
++ atomic_sub(i, &nf->mem);
+ }
+
+-static inline void sub_frag_mem_limit(struct netns_frags *nf, long val)
++static inline void add_frag_mem_limit(struct netns_frags *nf, int i)
+ {
+- atomic_long_sub(val, &nf->mem);
++ atomic_add(i, &nf->mem);
+ }
+
+-static inline void add_frag_mem_limit(struct netns_frags *nf, long val)
++static inline int sum_frag_mem_limit(struct netns_frags *nf)
+ {
+- atomic_long_add(val, &nf->mem);
++ return atomic_read(&nf->mem);
+ }
+
+ /* RFC 3168 support :
+diff --git a/include/net/ip.h b/include/net/ip.h
+index 7b968927477d..0530bcdbc212 100644
+--- a/include/net/ip.h
++++ b/include/net/ip.h
+@@ -524,6 +524,7 @@ static inline struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *s
+ return skb;
+ }
+ #endif
++int ip_frag_mem(struct net *net);
+
+ /*
+ * Functions provided by ip_forward.c
+diff --git a/include/net/ipv6.h b/include/net/ipv6.h
+index c07cf9596b6f..0e01d570fa22 100644
+--- a/include/net/ipv6.h
++++ b/include/net/ipv6.h
+@@ -320,6 +320,13 @@ static inline bool ipv6_accept_ra(struct inet6_dev *idev)
+ idev->cnf.accept_ra;
+ }
+
++#if IS_ENABLED(CONFIG_IPV6)
++static inline int ip6_frag_mem(struct net *net)
++{
++ return sum_frag_mem_limit(&net->ipv6.frags);
++}
++#endif
++
+ #define IPV6_FRAG_HIGH_THRESH (4 * 1024*1024) /* 4194304 */
+ #define IPV6_FRAG_LOW_THRESH (3 * 1024*1024) /* 3145728 */
+ #define IPV6_FRAG_TIMEOUT (60 * HZ) /* 60 seconds */
+@@ -498,8 +505,17 @@ enum ip6_defrag_users {
+ __IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX,
+ };
+
++struct ip6_create_arg {
++ __be32 id;
++ u32 user;
++ const struct in6_addr *src;
++ const struct in6_addr *dst;
++ int iif;
++ u8 ecn;
++};
++
+ void ip6_frag_init(struct inet_frag_queue *q, const void *a);
+-extern const struct rhashtable_params ip6_rhash_params;
++bool ip6_frag_match(const struct inet_frag_queue *q, const void *a);
+
+ /*
+ * Equivalent of ipv4 struct ip
+@@ -507,13 +523,19 @@ extern const struct rhashtable_params ip6_rhash_params;
+ struct frag_queue {
+ struct inet_frag_queue q;
+
++ __be32 id; /* fragment id */
++ u32 user;
++ struct in6_addr saddr;
++ struct in6_addr daddr;
++
+ int iif;
+ unsigned int csum;
+ __u16 nhoffset;
+ u8 ecn;
+ };
+
+-void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq);
++void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
++ struct inet_frags *frags);
+
+ static inline bool ipv6_addr_any(const struct in6_addr *a)
+ {
+diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
+index 9de808ebce05..25a9ad8bcef1 100644
+--- a/include/uapi/linux/snmp.h
++++ b/include/uapi/linux/snmp.h
+@@ -55,7 +55,6 @@ enum
+ IPSTATS_MIB_ECT1PKTS, /* InECT1Pkts */
+ IPSTATS_MIB_ECT0PKTS, /* InECT0Pkts */
+ IPSTATS_MIB_CEPKTS, /* InCEPkts */
+- IPSTATS_MIB_REASM_OVERLAPS, /* ReasmOverlaps */
+ __IPSTATS_MIB_MAX
+ };
+
+diff --git a/lib/rhashtable.c b/lib/rhashtable.c
+index 7bb8649429bf..37ea94b636a3 100644
+--- a/lib/rhashtable.c
++++ b/lib/rhashtable.c
+@@ -250,10 +250,8 @@ static int rhashtable_rehash_table(struct rhashtable *ht)
+ if (!new_tbl)
+ return 0;
+
+- for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
++ for (old_hash = 0; old_hash < old_tbl->size; old_hash++)
+ rhashtable_rehash_chain(ht, old_hash);
+- cond_resched();
+- }
+
+ /* Publish the new table pointer. */
+ rcu_assign_pointer(ht->tbl, new_tbl);
+@@ -443,8 +441,7 @@ EXPORT_SYMBOL_GPL(rhashtable_insert_rehash);
+ struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht,
+ const void *key,
+ struct rhash_head *obj,
+- struct bucket_table *tbl,
+- void **data)
++ struct bucket_table *tbl)
+ {
+ struct rhash_head *head;
+ unsigned int hash;
+@@ -455,11 +452,8 @@ struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht,
+ spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING);
+
+ err = -EEXIST;
+- if (key) {
+- *data = rhashtable_lookup_fast(ht, key, ht->p);
+- if (*data)
+- goto exit;
+- }
++ if (key && rhashtable_lookup_fast(ht, key, ht->p))
++ goto exit;
+
+ err = -E2BIG;
+ if (unlikely(rht_grow_above_max(ht, tbl)))
+@@ -844,7 +838,6 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
+ for (i = 0; i < tbl->size; i++) {
+ struct rhash_head *pos, *next;
+
+- cond_resched();
+ for (pos = rht_dereference(tbl->buckets[i], ht),
+ next = !rht_is_a_nulls(pos) ?
+ rht_dereference(pos->next, ht) : NULL;
+diff --git a/net/core/skbuff.c b/net/core/skbuff.c
+index fea7c24e99d0..8a57bbaf7452 100644
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -1502,21 +1502,6 @@ done:
+ }
+ EXPORT_SYMBOL(___pskb_trim);
+
+-/* Note : use pskb_trim_rcsum() instead of calling this directly
+- */
+-int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len)
+-{
+- if (skb->ip_summed == CHECKSUM_COMPLETE) {
+- int delta = skb->len - len;
+-
+- skb->csum = csum_block_sub(skb->csum,
+- skb_checksum(skb, len, delta, 0),
+- len);
+- }
+- return __pskb_trim(skb, len);
+-}
+-EXPORT_SYMBOL(pskb_trim_rcsum_slow);
+-
+ /**
+ * __pskb_pull_tail - advance tail of skb header
+ * @skb: buffer to reallocate
+@@ -2395,27 +2380,23 @@ EXPORT_SYMBOL(skb_queue_purge);
+ /**
+ * skb_rbtree_purge - empty a skb rbtree
+ * @root: root of the rbtree to empty
+- * Return value: the sum of truesizes of all purged skbs.
+ *
+ * Delete all buffers on an &sk_buff rbtree. Each buffer is removed from
+ * the list and one reference dropped. This function does not take
+ * any lock. Synchronization should be handled by the caller (e.g., TCP
+ * out-of-order queue is protected by the socket lock).
+ */
+-unsigned int skb_rbtree_purge(struct rb_root *root)
++void skb_rbtree_purge(struct rb_root *root)
+ {
+ struct rb_node *p = rb_first(root);
+- unsigned int sum = 0;
+
+ while (p) {
+ struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
+
+ p = rb_next(p);
+ rb_erase(&skb->rbnode, root);
+- sum += skb->truesize;
+ kfree_skb(skb);
+ }
+- return sum;
+ }
+
+ /**
+diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h
+index fdbebe51446f..b4e17a7c0df0 100644
+--- a/net/ieee802154/6lowpan/6lowpan_i.h
++++ b/net/ieee802154/6lowpan/6lowpan_i.h
+@@ -16,19 +16,37 @@ typedef unsigned __bitwise__ lowpan_rx_result;
+ #define LOWPAN_DISPATCH_FRAG1 0xc0
+ #define LOWPAN_DISPATCH_FRAGN 0xe0
+
+-struct frag_lowpan_compare_key {
++struct lowpan_create_arg {
+ u16 tag;
+ u16 d_size;
+- struct ieee802154_addr src;
+- struct ieee802154_addr dst;
++ const struct ieee802154_addr *src;
++ const struct ieee802154_addr *dst;
+ };
+
+-/* Equivalent of ipv4 struct ipq
++/* Equivalent of ipv4 struct ip
+ */
+ struct lowpan_frag_queue {
+ struct inet_frag_queue q;
++
++ u16 tag;
++ u16 d_size;
++ struct ieee802154_addr saddr;
++ struct ieee802154_addr daddr;
+ };
+
++static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a)
++{
++ switch (a->mode) {
++ case IEEE802154_ADDR_LONG:
++ return (((__force u64)a->extended_addr) >> 32) ^
++ (((__force u64)a->extended_addr) & 0xffffffff);
++ case IEEE802154_ADDR_SHORT:
++ return (__force u32)(a->short_addr);
++ default:
++ return 0;
++ }
++}
++
+ /* private device info */
+ struct lowpan_dev_info {
+ struct net_device *wdev; /* wpan device ptr */
+diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
+index 6183730d38db..12e8cf4bda9f 100644
+--- a/net/ieee802154/6lowpan/reassembly.c
++++ b/net/ieee802154/6lowpan/reassembly.c
+@@ -37,15 +37,47 @@ static struct inet_frags lowpan_frags;
+ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq,
+ struct sk_buff *prev, struct net_device *ldev);
+
++static unsigned int lowpan_hash_frag(u16 tag, u16 d_size,
++ const struct ieee802154_addr *saddr,
++ const struct ieee802154_addr *daddr)
++{
++ net_get_random_once(&lowpan_frags.rnd, sizeof(lowpan_frags.rnd));
++ return jhash_3words(ieee802154_addr_hash(saddr),
++ ieee802154_addr_hash(daddr),
++ (__force u32)(tag + (d_size << 16)),
++ lowpan_frags.rnd);
++}
++
++static unsigned int lowpan_hashfn(const struct inet_frag_queue *q)
++{
++ const struct lowpan_frag_queue *fq;
++
++ fq = container_of(q, struct lowpan_frag_queue, q);
++ return lowpan_hash_frag(fq->tag, fq->d_size, &fq->saddr, &fq->daddr);
++}
++
++static bool lowpan_frag_match(const struct inet_frag_queue *q, const void *a)
++{
++ const struct lowpan_frag_queue *fq;
++ const struct lowpan_create_arg *arg = a;
++
++ fq = container_of(q, struct lowpan_frag_queue, q);
++ return fq->tag == arg->tag && fq->d_size == arg->d_size &&
++ ieee802154_addr_equal(&fq->saddr, arg->src) &&
++ ieee802154_addr_equal(&fq->daddr, arg->dst);
++}
++
+ static void lowpan_frag_init(struct inet_frag_queue *q, const void *a)
+ {
+- const struct frag_lowpan_compare_key *key = a;
++ const struct lowpan_create_arg *arg = a;
+ struct lowpan_frag_queue *fq;
+
+ fq = container_of(q, struct lowpan_frag_queue, q);
+
+- BUILD_BUG_ON(sizeof(*key) > sizeof(q->key));
+- memcpy(&q->key, key, sizeof(*key));
++ fq->tag = arg->tag;
++ fq->d_size = arg->d_size;
++ fq->saddr = *arg->src;
++ fq->daddr = *arg->dst;
+ }
+
+ static void lowpan_frag_expire(unsigned long data)
+@@ -61,10 +93,10 @@ static void lowpan_frag_expire(unsigned long data)
+ if (fq->q.flags & INET_FRAG_COMPLETE)
+ goto out;
+
+- inet_frag_kill(&fq->q);
++ inet_frag_kill(&fq->q, &lowpan_frags);
+ out:
+ spin_unlock(&fq->q.lock);
+- inet_frag_put(&fq->q);
++ inet_frag_put(&fq->q, &lowpan_frags);
+ }
+
+ static inline struct lowpan_frag_queue *
+@@ -72,20 +104,25 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb,
+ const struct ieee802154_addr *src,
+ const struct ieee802154_addr *dst)
+ {
++ struct inet_frag_queue *q;
++ struct lowpan_create_arg arg;
++ unsigned int hash;
+ struct netns_ieee802154_lowpan *ieee802154_lowpan =
+ net_ieee802154_lowpan(net);
+- struct frag_lowpan_compare_key key = {};
+- struct inet_frag_queue *q;
+
+- key.tag = cb->d_tag;
+- key.d_size = cb->d_size;
+- key.src = *src;
+- key.dst = *dst;
++ arg.tag = cb->d_tag;
++ arg.d_size = cb->d_size;
++ arg.src = src;
++ arg.dst = dst;
+
+- q = inet_frag_find(&ieee802154_lowpan->frags, &key);
+- if (!q)
+- return NULL;
++ hash = lowpan_hash_frag(cb->d_tag, cb->d_size, src, dst);
+
++ q = inet_frag_find(&ieee802154_lowpan->frags,
++ &lowpan_frags, &arg, hash);
++ if (IS_ERR_OR_NULL(q)) {
++ inet_frag_maybe_warn_overflow(q, pr_fmt());
++ return NULL;
++ }
+ return container_of(q, struct lowpan_frag_queue, q);
+ }
+
+@@ -192,7 +229,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
+ struct sk_buff *fp, *head = fq->q.fragments;
+ int sum_truesize;
+
+- inet_frag_kill(&fq->q);
++ inet_frag_kill(&fq->q, &lowpan_frags);
+
+ /* Make the one we just received the head. */
+ if (prev) {
+@@ -371,7 +408,7 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
+ struct lowpan_frag_queue *fq;
+ struct net *net = dev_net(skb->dev);
+ struct lowpan_802154_cb *cb = lowpan_802154_cb(skb);
+- struct ieee802154_hdr hdr = {};
++ struct ieee802154_hdr hdr;
+ int err;
+
+ if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0)
+@@ -400,7 +437,7 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
+ ret = lowpan_frag_queue(fq, skb, frag_type);
+ spin_unlock(&fq->q.lock);
+
+- inet_frag_put(&fq->q);
++ inet_frag_put(&fq->q, &lowpan_frags);
+ return ret;
+ }
+
+@@ -410,22 +447,24 @@ err:
+ }
+
+ #ifdef CONFIG_SYSCTL
++static int zero;
+
+ static struct ctl_table lowpan_frags_ns_ctl_table[] = {
+ {
+ .procname = "6lowpanfrag_high_thresh",
+ .data = &init_net.ieee802154_lowpan.frags.high_thresh,
+- .maxlen = sizeof(unsigned long),
++ .maxlen = sizeof(int),
+ .mode = 0644,
+- .proc_handler = proc_doulongvec_minmax,
++ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &init_net.ieee802154_lowpan.frags.low_thresh
+ },
+ {
+ .procname = "6lowpanfrag_low_thresh",
+ .data = &init_net.ieee802154_lowpan.frags.low_thresh,
+- .maxlen = sizeof(unsigned long),
++ .maxlen = sizeof(int),
+ .mode = 0644,
+- .proc_handler = proc_doulongvec_minmax,
++ .proc_handler = proc_dointvec_minmax,
++ .extra1 = &zero,
+ .extra2 = &init_net.ieee802154_lowpan.frags.high_thresh
+ },
+ {
+@@ -541,20 +580,14 @@ static int __net_init lowpan_frags_init_net(struct net *net)
+ {
+ struct netns_ieee802154_lowpan *ieee802154_lowpan =
+ net_ieee802154_lowpan(net);
+- int res;
+
+ ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+ ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
+ ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
+- ieee802154_lowpan->frags.f = &lowpan_frags;
+
+- res = inet_frags_init_net(&ieee802154_lowpan->frags);
+- if (res < 0)
+- return res;
+- res = lowpan_frags_ns_sysctl_register(net);
+- if (res < 0)
+- inet_frags_exit_net(&ieee802154_lowpan->frags);
+- return res;
++ inet_frags_init_net(&ieee802154_lowpan->frags);
++
++ return lowpan_frags_ns_sysctl_register(net);
+ }
+
+ static void __net_exit lowpan_frags_exit_net(struct net *net)
+@@ -563,7 +596,7 @@ static void __net_exit lowpan_frags_exit_net(struct net *net)
+ net_ieee802154_lowpan(net);
+
+ lowpan_frags_ns_sysctl_unregister(net);
+- inet_frags_exit_net(&ieee802154_lowpan->frags);
++ inet_frags_exit_net(&ieee802154_lowpan->frags, &lowpan_frags);
+ }
+
+ static struct pernet_operations lowpan_frags_ops = {
+@@ -571,64 +604,33 @@ static struct pernet_operations lowpan_frags_ops = {
+ .exit = lowpan_frags_exit_net,
+ };
+
+-static u32 lowpan_key_hashfn(const void *data, u32 len, u32 seed)
+-{
+- return jhash2(data,
+- sizeof(struct frag_lowpan_compare_key) / sizeof(u32), seed);
+-}
+-
+-static u32 lowpan_obj_hashfn(const void *data, u32 len, u32 seed)
+-{
+- const struct inet_frag_queue *fq = data;
+-
+- return jhash2((const u32 *)&fq->key,
+- sizeof(struct frag_lowpan_compare_key) / sizeof(u32), seed);
+-}
+-
+-static int lowpan_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
+-{
+- const struct frag_lowpan_compare_key *key = arg->key;
+- const struct inet_frag_queue *fq = ptr;
+-
+- return !!memcmp(&fq->key, key, sizeof(*key));
+-}
+-
+-static const struct rhashtable_params lowpan_rhash_params = {
+- .head_offset = offsetof(struct inet_frag_queue, node),
+- .hashfn = lowpan_key_hashfn,
+- .obj_hashfn = lowpan_obj_hashfn,
+- .obj_cmpfn = lowpan_obj_cmpfn,
+- .automatic_shrinking = true,
+-};
+-
+ int __init lowpan_net_frag_init(void)
+ {
+ int ret;
+
++ ret = lowpan_frags_sysctl_register();
++ if (ret)
++ return ret;
++
++ ret = register_pernet_subsys(&lowpan_frags_ops);
++ if (ret)
++ goto err_pernet;
++
++ lowpan_frags.hashfn = lowpan_hashfn;
+ lowpan_frags.constructor = lowpan_frag_init;
+ lowpan_frags.destructor = NULL;
+ lowpan_frags.skb_free = NULL;
+ lowpan_frags.qsize = sizeof(struct frag_queue);
++ lowpan_frags.match = lowpan_frag_match;
+ lowpan_frags.frag_expire = lowpan_frag_expire;
+ lowpan_frags.frags_cache_name = lowpan_frags_cache_name;
+- lowpan_frags.rhash_params = lowpan_rhash_params;
+ ret = inet_frags_init(&lowpan_frags);
+- if (ret)
+- goto out;
+-
+- ret = lowpan_frags_sysctl_register();
+- if (ret)
+- goto err_sysctl;
+-
+- ret = register_pernet_subsys(&lowpan_frags_ops);
+ if (ret)
+ goto err_pernet;
+-out:
++
+ return ret;
+ err_pernet:
+ lowpan_frags_sysctl_unregister();
+-err_sysctl:
+- inet_frags_fini(&lowpan_frags);
+ return ret;
+ }
+
+diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
+index c03e5f5859e1..b2001b20e029 100644
+--- a/net/ipv4/inet_fragment.c
++++ b/net/ipv4/inet_fragment.c
+@@ -25,6 +25,12 @@
+ #include <net/inet_frag.h>
+ #include <net/inet_ecn.h>
+
++#define INETFRAGS_EVICT_BUCKETS 128
++#define INETFRAGS_EVICT_MAX 512
++
++/* don't rebuild inetfrag table with new secret more often than this */
++#define INETFRAGS_MIN_REBUILD_INTERVAL (5 * HZ)
++
+ /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
+ * Value : 0xff if frame should be dropped.
+ * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field
+@@ -46,8 +52,157 @@ const u8 ip_frag_ecn_table[16] = {
+ };
+ EXPORT_SYMBOL(ip_frag_ecn_table);
+
++static unsigned int
++inet_frag_hashfn(const struct inet_frags *f, const struct inet_frag_queue *q)
++{
++ return f->hashfn(q) & (INETFRAGS_HASHSZ - 1);
++}
++
++static bool inet_frag_may_rebuild(struct inet_frags *f)
++{
++ return time_after(jiffies,
++ f->last_rebuild_jiffies + INETFRAGS_MIN_REBUILD_INTERVAL);
++}
++
++static void inet_frag_secret_rebuild(struct inet_frags *f)
++{
++ int i;
++
++ write_seqlock_bh(&f->rnd_seqlock);
++
++ if (!inet_frag_may_rebuild(f))
++ goto out;
++
++ get_random_bytes(&f->rnd, sizeof(u32));
++
++ for (i = 0; i < INETFRAGS_HASHSZ; i++) {
++ struct inet_frag_bucket *hb;
++ struct inet_frag_queue *q;
++ struct hlist_node *n;
++
++ hb = &f->hash[i];
++ spin_lock(&hb->chain_lock);
++
++ hlist_for_each_entry_safe(q, n, &hb->chain, list) {
++ unsigned int hval = inet_frag_hashfn(f, q);
++
++ if (hval != i) {
++ struct inet_frag_bucket *hb_dest;
++
++ hlist_del(&q->list);
++
++ /* Relink to new hash chain. */
++ hb_dest = &f->hash[hval];
++
++ /* This is the only place where we take
++ * another chain_lock while already holding
++ * one. As this will not run concurrently,
++ * we cannot deadlock on hb_dest lock below, if its
++ * already locked it will be released soon since
++ * other caller cannot be waiting for hb lock
++ * that we've taken above.
++ */
++ spin_lock_nested(&hb_dest->chain_lock,
++ SINGLE_DEPTH_NESTING);
++ hlist_add_head(&q->list, &hb_dest->chain);
++ spin_unlock(&hb_dest->chain_lock);
++ }
++ }
++ spin_unlock(&hb->chain_lock);
++ }
++
++ f->rebuild = false;
++ f->last_rebuild_jiffies = jiffies;
++out:
++ write_sequnlock_bh(&f->rnd_seqlock);
++}
++
++static bool inet_fragq_should_evict(const struct inet_frag_queue *q)
++{
++ if (!hlist_unhashed(&q->list_evictor))
++ return false;
++
++ return q->net->low_thresh == 0 ||
++ frag_mem_limit(q->net) >= q->net->low_thresh;
++}
++
++static unsigned int
++inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
++{
++ struct inet_frag_queue *fq;
++ struct hlist_node *n;
++ unsigned int evicted = 0;
++ HLIST_HEAD(expired);
++
++ spin_lock(&hb->chain_lock);
++
++ hlist_for_each_entry_safe(fq, n, &hb->chain, list) {
++ if (!inet_fragq_should_evict(fq))
++ continue;
++
++ if (!del_timer(&fq->timer))
++ continue;
++
++ hlist_add_head(&fq->list_evictor, &expired);
++ ++evicted;
++ }
++
++ spin_unlock(&hb->chain_lock);
++
++ hlist_for_each_entry_safe(fq, n, &expired, list_evictor)
++ f->frag_expire((unsigned long) fq);
++
++ return evicted;
++}
++
++static void inet_frag_worker(struct work_struct *work)
++{
++ unsigned int budget = INETFRAGS_EVICT_BUCKETS;
++ unsigned int i, evicted = 0;
++ struct inet_frags *f;
++
++ f = container_of(work, struct inet_frags, frags_work);
++
++ BUILD_BUG_ON(INETFRAGS_EVICT_BUCKETS >= INETFRAGS_HASHSZ);
++
++ local_bh_disable();
++
++ for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) {
++ evicted += inet_evict_bucket(f, &f->hash[i]);
++ i = (i + 1) & (INETFRAGS_HASHSZ - 1);
++ if (evicted > INETFRAGS_EVICT_MAX)
++ break;
++ }
++
++ f->next_bucket = i;
++
++ local_bh_enable();
++
++ if (f->rebuild && inet_frag_may_rebuild(f))
++ inet_frag_secret_rebuild(f);
++}
++
++static void inet_frag_schedule_worker(struct inet_frags *f)
++{
++ if (unlikely(!work_pending(&f->frags_work)))
++ schedule_work(&f->frags_work);
++}
++
+ int inet_frags_init(struct inet_frags *f)
+ {
++ int i;
++
++ INIT_WORK(&f->frags_work, inet_frag_worker);
++
++ for (i = 0; i < INETFRAGS_HASHSZ; i++) {
++ struct inet_frag_bucket *hb = &f->hash[i];
++
++ spin_lock_init(&hb->chain_lock);
++ INIT_HLIST_HEAD(&hb->chain);
++ }
++
++ seqlock_init(&f->rnd_seqlock);
++ f->last_rebuild_jiffies = 0;
+ f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, 0, 0,
+ NULL);
+ if (!f->frags_cachep)
+@@ -59,53 +214,73 @@ EXPORT_SYMBOL(inet_frags_init);
+
+ void inet_frags_fini(struct inet_frags *f)
+ {
+- /* We must wait that all inet_frag_destroy_rcu() have completed. */
+- rcu_barrier();
+-
++ cancel_work_sync(&f->frags_work);
+ kmem_cache_destroy(f->frags_cachep);
+- f->frags_cachep = NULL;
+ }
+ EXPORT_SYMBOL(inet_frags_fini);
+
+-static void inet_frags_free_cb(void *ptr, void *arg)
++void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
+ {
+- struct inet_frag_queue *fq = ptr;
++ unsigned int seq;
++ int i;
+
+- /* If we can not cancel the timer, it means this frag_queue
+- * is already disappearing, we have nothing to do.
+- * Otherwise, we own a refcount until the end of this function.
+- */
+- if (!del_timer(&fq->timer))
+- return;
++ nf->low_thresh = 0;
+
+- spin_lock_bh(&fq->lock);
+- if (!(fq->flags & INET_FRAG_COMPLETE)) {
+- fq->flags |= INET_FRAG_COMPLETE;
+- atomic_dec(&fq->refcnt);
++evict_again:
++ local_bh_disable();
++ seq = read_seqbegin(&f->rnd_seqlock);
++
++ for (i = 0; i < INETFRAGS_HASHSZ ; i++)
++ inet_evict_bucket(f, &f->hash[i]);
++
++ local_bh_enable();
++ cond_resched();
++
++ if (read_seqretry(&f->rnd_seqlock, seq) ||
++ sum_frag_mem_limit(nf))
++ goto evict_again;
++}
++EXPORT_SYMBOL(inet_frags_exit_net);
++
++static struct inet_frag_bucket *
++get_frag_bucket_locked(struct inet_frag_queue *fq, struct inet_frags *f)
++__acquires(hb->chain_lock)
++{
++ struct inet_frag_bucket *hb;
++ unsigned int seq, hash;
++
++ restart:
++ seq = read_seqbegin(&f->rnd_seqlock);
++
++ hash = inet_frag_hashfn(f, fq);
++ hb = &f->hash[hash];
++
++ spin_lock(&hb->chain_lock);
++ if (read_seqretry(&f->rnd_seqlock, seq)) {
++ spin_unlock(&hb->chain_lock);
++ goto restart;
+ }
+- spin_unlock_bh(&fq->lock);
+
+- inet_frag_put(fq);
++ return hb;
+ }
+
+-void inet_frags_exit_net(struct netns_frags *nf)
++static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
+ {
+- nf->high_thresh = 0; /* prevent creation of new frags */
++ struct inet_frag_bucket *hb;
+
+- rhashtable_free_and_destroy(&nf->rhashtable, inet_frags_free_cb, NULL);
++ hb = get_frag_bucket_locked(fq, f);
++ hlist_del(&fq->list);
++ fq->flags |= INET_FRAG_COMPLETE;
++ spin_unlock(&hb->chain_lock);
+ }
+-EXPORT_SYMBOL(inet_frags_exit_net);
+
+-void inet_frag_kill(struct inet_frag_queue *fq)
++void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
+ {
+ if (del_timer(&fq->timer))
+ atomic_dec(&fq->refcnt);
+
+ if (!(fq->flags & INET_FRAG_COMPLETE)) {
+- struct netns_frags *nf = fq->net;
+-
+- fq->flags |= INET_FRAG_COMPLETE;
+- rhashtable_remove_fast(&nf->rhashtable, &fq->node, nf->f->rhash_params);
++ fq_unlink(fq, f);
+ atomic_dec(&fq->refcnt);
+ }
+ }
+@@ -119,23 +294,11 @@ static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f,
+ kfree_skb(skb);
+ }
+
+-static void inet_frag_destroy_rcu(struct rcu_head *head)
+-{
+- struct inet_frag_queue *q = container_of(head, struct inet_frag_queue,
+- rcu);
+- struct inet_frags *f = q->net->f;
+-
+- if (f->destructor)
+- f->destructor(q);
+- kmem_cache_free(f->frags_cachep, q);
+-}
+-
+-void inet_frag_destroy(struct inet_frag_queue *q)
++void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
+ {
+ struct sk_buff *fp;
+ struct netns_frags *nf;
+ unsigned int sum, sum_truesize = 0;
+- struct inet_frags *f;
+
+ WARN_ON(!(q->flags & INET_FRAG_COMPLETE));
+ WARN_ON(del_timer(&q->timer) != 0);
+@@ -143,35 +306,64 @@ void inet_frag_destroy(struct inet_frag_queue *q)
+ /* Release all fragment data. */
+ fp = q->fragments;
+ nf = q->net;
+- f = nf->f;
+- if (fp) {
+- do {
+- struct sk_buff *xp = fp->next;
+-
+- sum_truesize += fp->truesize;
+- frag_kfree_skb(nf, f, fp);
+- fp = xp;
+- } while (fp);
+- } else {
+- sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments);
++ while (fp) {
++ struct sk_buff *xp = fp->next;
++
++ sum_truesize += fp->truesize;
++ frag_kfree_skb(nf, f, fp);
++ fp = xp;
+ }
+ sum = sum_truesize + f->qsize;
+
+- call_rcu(&q->rcu, inet_frag_destroy_rcu);
++ if (f->destructor)
++ f->destructor(q);
++ kmem_cache_free(f->frags_cachep, q);
+
+ sub_frag_mem_limit(nf, sum);
+ }
+ EXPORT_SYMBOL(inet_frag_destroy);
+
++static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
++ struct inet_frag_queue *qp_in,
++ struct inet_frags *f,
++ void *arg)
++{
++ struct inet_frag_bucket *hb = get_frag_bucket_locked(qp_in, f);
++ struct inet_frag_queue *qp;
++
++#ifdef CONFIG_SMP
++ /* With SMP race we have to recheck hash table, because
++ * such entry could have been created on other cpu before
++ * we acquired hash bucket lock.
++ */
++ hlist_for_each_entry(qp, &hb->chain, list) {
++ if (qp->net == nf && f->match(qp, arg)) {
++ atomic_inc(&qp->refcnt);
++ spin_unlock(&hb->chain_lock);
++ qp_in->flags |= INET_FRAG_COMPLETE;
++ inet_frag_put(qp_in, f);
++ return qp;
++ }
++ }
++#endif
++ qp = qp_in;
++ if (!mod_timer(&qp->timer, jiffies + nf->timeout))
++ atomic_inc(&qp->refcnt);
++
++ atomic_inc(&qp->refcnt);
++ hlist_add_head(&qp->list, &hb->chain);
++
++ spin_unlock(&hb->chain_lock);
++
++ return qp;
++}
++
+ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
+ struct inet_frags *f,
+ void *arg)
+ {
+ struct inet_frag_queue *q;
+
+- if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh)
+- return NULL;
+-
+ q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC);
+ if (!q)
+ return NULL;
+@@ -182,52 +374,75 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
+
+ setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
+ spin_lock_init(&q->lock);
+- atomic_set(&q->refcnt, 3);
++ atomic_set(&q->refcnt, 1);
+
+ return q;
+ }
+
+ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
+- void *arg,
+- struct inet_frag_queue **prev)
++ struct inet_frags *f,
++ void *arg)
+ {
+- struct inet_frags *f = nf->f;
+ struct inet_frag_queue *q;
+
+ q = inet_frag_alloc(nf, f, arg);
+- if (!q) {
+- *prev = ERR_PTR(-ENOMEM);
+- return NULL;
+- }
+- mod_timer(&q->timer, jiffies + nf->timeout);
+-
+- *prev = rhashtable_lookup_get_insert_key(&nf->rhashtable, &q->key,
+- &q->node, f->rhash_params);
+- if (*prev) {
+- q->flags |= INET_FRAG_COMPLETE;
+- inet_frag_kill(q);
+- inet_frag_destroy(q);
++ if (!q)
+ return NULL;
+- }
+- return q;
++
++ return inet_frag_intern(nf, q, f, arg);
+ }
+-EXPORT_SYMBOL(inet_frag_create);
+
+-/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
+-struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
++struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
++ struct inet_frags *f, void *key,
++ unsigned int hash)
+ {
+- struct inet_frag_queue *fq = NULL, *prev;
++ struct inet_frag_bucket *hb;
++ struct inet_frag_queue *q;
++ int depth = 0;
++
++ if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) {
++ inet_frag_schedule_worker(f);
++ return NULL;
++ }
++
++ if (frag_mem_limit(nf) > nf->low_thresh)
++ inet_frag_schedule_worker(f);
++
++ hash &= (INETFRAGS_HASHSZ - 1);
++ hb = &f->hash[hash];
++
++ spin_lock(&hb->chain_lock);
++ hlist_for_each_entry(q, &hb->chain, list) {
++ if (q->net == nf && f->match(q, key)) {
++ atomic_inc(&q->refcnt);
++ spin_unlock(&hb->chain_lock);
++ return q;
++ }
++ depth++;
++ }
++ spin_unlock(&hb->chain_lock);
++
++ if (depth <= INETFRAGS_MAXDEPTH)
++ return inet_frag_create(nf, f, key);
+
+- rcu_read_lock();
+- prev = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params);
+- if (!prev)
+- fq = inet_frag_create(nf, key, &prev);
+- if (prev && !IS_ERR(prev)) {
+- fq = prev;
+- if (!atomic_inc_not_zero(&fq->refcnt))
+- fq = NULL;
++ if (inet_frag_may_rebuild(f)) {
++ if (!f->rebuild)
++ f->rebuild = true;
++ inet_frag_schedule_worker(f);
+ }
+- rcu_read_unlock();
+- return fq;
++
++ return ERR_PTR(-ENOBUFS);
+ }
+ EXPORT_SYMBOL(inet_frag_find);
++
++void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
++ const char *prefix)
++{
++ static const char msg[] = "inet_frag_find: Fragment hash bucket"
++ " list length grew over limit " __stringify(INETFRAGS_MAXDEPTH)
++ ". Dropping fragment.\n";
++
++ if (PTR_ERR(q) == -ENOBUFS)
++ net_dbg_ratelimited("%s%s", prefix, msg);
++}
++EXPORT_SYMBOL(inet_frag_maybe_warn_overflow);
+diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
+index 9b09a9b5a4fe..72915658a6b1 100644
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -58,64 +58,27 @@
+ static int sysctl_ipfrag_max_dist __read_mostly = 64;
+ static const char ip_frag_cache_name[] = "ip4-frags";
+
+-/* Use skb->cb to track consecutive/adjacent fragments coming at
+- * the end of the queue. Nodes in the rb-tree queue will
+- * contain "runs" of one or more adjacent fragments.
+- *
+- * Invariants:
+- * - next_frag is NULL at the tail of a "run";
+- * - the head of a "run" has the sum of all fragment lengths in frag_run_len.
+- */
+-struct ipfrag_skb_cb {
++struct ipfrag_skb_cb
++{
+ struct inet_skb_parm h;
+- struct sk_buff *next_frag;
+- int frag_run_len;
++ int offset;
+ };
+
+-#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
+-
+-static void ip4_frag_init_run(struct sk_buff *skb)
+-{
+- BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb));
+-
+- FRAG_CB(skb)->next_frag = NULL;
+- FRAG_CB(skb)->frag_run_len = skb->len;
+-}
+-
+-/* Append skb to the last "run". */
+-static void ip4_frag_append_to_last_run(struct inet_frag_queue *q,
+- struct sk_buff *skb)
+-{
+- RB_CLEAR_NODE(&skb->rbnode);
+- FRAG_CB(skb)->next_frag = NULL;
+-
+- FRAG_CB(q->last_run_head)->frag_run_len += skb->len;
+- FRAG_CB(q->fragments_tail)->next_frag = skb;
+- q->fragments_tail = skb;
+-}
+-
+-/* Create a new "run" with the skb. */
+-static void ip4_frag_create_run(struct inet_frag_queue *q, struct sk_buff *skb)
+-{
+- if (q->last_run_head)
+- rb_link_node(&skb->rbnode, &q->last_run_head->rbnode,
+- &q->last_run_head->rbnode.rb_right);
+- else
+- rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node);
+- rb_insert_color(&skb->rbnode, &q->rb_fragments);
+-
+- ip4_frag_init_run(skb);
+- q->fragments_tail = skb;
+- q->last_run_head = skb;
+-}
++#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
+
+ /* Describe an entry in the "incomplete datagrams" queue. */
+ struct ipq {
+ struct inet_frag_queue q;
+
++ u32 user;
++ __be32 saddr;
++ __be32 daddr;
++ __be16 id;
++ u8 protocol;
+ u8 ecn; /* RFC3168 support */
+ u16 max_df_size; /* largest frag with DF set seen */
+ int iif;
++ int vif; /* L3 master device index */
+ unsigned int rid;
+ struct inet_peer *peer;
+ };
+@@ -127,9 +90,49 @@ static u8 ip4_frag_ecn(u8 tos)
+
+ static struct inet_frags ip4_frags;
+
+-static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
+- struct sk_buff *prev_tail, struct net_device *dev);
++int ip_frag_mem(struct net *net)
++{
++ return sum_frag_mem_limit(&net->ipv4.frags);
++}
++
++static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
++ struct net_device *dev);
++
++struct ip4_create_arg {
++ struct iphdr *iph;
++ u32 user;
++ int vif;
++};
+
++static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
++{
++ net_get_random_once(&ip4_frags.rnd, sizeof(ip4_frags.rnd));
++ return jhash_3words((__force u32)id << 16 | prot,
++ (__force u32)saddr, (__force u32)daddr,
++ ip4_frags.rnd);
++}
++
++static unsigned int ip4_hashfn(const struct inet_frag_queue *q)
++{
++ const struct ipq *ipq;
++
++ ipq = container_of(q, struct ipq, q);
++ return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol);
++}
++
++static bool ip4_frag_match(const struct inet_frag_queue *q, const void *a)
++{
++ const struct ipq *qp;
++ const struct ip4_create_arg *arg = a;
++
++ qp = container_of(q, struct ipq, q);
++ return qp->id == arg->iph->id &&
++ qp->saddr == arg->iph->saddr &&
++ qp->daddr == arg->iph->daddr &&
++ qp->protocol == arg->iph->protocol &&
++ qp->user == arg->user &&
++ qp->vif == arg->vif;
++}
+
+ static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
+ {
+@@ -138,12 +141,17 @@ static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
+ frags);
+ struct net *net = container_of(ipv4, struct net, ipv4);
+
+- const struct frag_v4_compare_key *key = a;
++ const struct ip4_create_arg *arg = a;
+
+- q->key.v4 = *key;
+- qp->ecn = 0;
++ qp->protocol = arg->iph->protocol;
++ qp->id = arg->iph->id;
++ qp->ecn = ip4_frag_ecn(arg->iph->tos);
++ qp->saddr = arg->iph->saddr;
++ qp->daddr = arg->iph->daddr;
++ qp->vif = arg->vif;
++ qp->user = arg->user;
+ qp->peer = sysctl_ipfrag_max_dist ?
+- inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif, 1) :
++ inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, arg->vif, 1) :
+ NULL;
+ }
+
+@@ -161,7 +169,7 @@ static void ip4_frag_free(struct inet_frag_queue *q)
+
+ static void ipq_put(struct ipq *ipq)
+ {
+- inet_frag_put(&ipq->q);
++ inet_frag_put(&ipq->q, &ip4_frags);
+ }
+
+ /* Kill ipq entry. It is not destroyed immediately,
+@@ -169,7 +177,7 @@ static void ipq_put(struct ipq *ipq)
+ */
+ static void ipq_kill(struct ipq *ipq)
+ {
+- inet_frag_kill(&ipq->q);
++ inet_frag_kill(&ipq->q, &ip4_frags);
+ }
+
+ static bool frag_expire_skip_icmp(u32 user)
+@@ -186,11 +194,8 @@ static bool frag_expire_skip_icmp(u32 user)
+ */
+ static void ip_expire(unsigned long arg)
+ {
+- const struct iphdr *iph;
+- struct sk_buff *head = NULL;
+- struct net *net;
+ struct ipq *qp;
+- int err;
++ struct net *net;
+
+ qp = container_of((struct inet_frag_queue *) arg, struct ipq, q);
+ net = container_of(qp->q.net, struct net, ipv4.frags);
+@@ -203,65 +208,51 @@ static void ip_expire(unsigned long arg)
+
+ ipq_kill(qp);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
+- IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT);
+
+- if (!(qp->q.flags & INET_FRAG_FIRST_IN))
+- goto out;
++ if (!inet_frag_evicting(&qp->q)) {
++ struct sk_buff *clone, *head = qp->q.fragments;
++ const struct iphdr *iph;
++ int err;
+
+- /* sk_buff::dev and sk_buff::rbnode are unionized. So we
+- * pull the head out of the tree in order to be able to
+- * deal with head->dev.
+- */
+- if (qp->q.fragments) {
+- head = qp->q.fragments;
+- qp->q.fragments = head->next;
+- } else {
+- head = skb_rb_first(&qp->q.rb_fragments);
+- if (!head)
+- goto out;
+- if (FRAG_CB(head)->next_frag)
+- rb_replace_node(&head->rbnode,
+- &FRAG_CB(head)->next_frag->rbnode,
+- &qp->q.rb_fragments);
+- else
+- rb_erase(&head->rbnode, &qp->q.rb_fragments);
+- memset(&head->rbnode, 0, sizeof(head->rbnode));
+- barrier();
+- }
+- if (head == qp->q.fragments_tail)
+- qp->q.fragments_tail = NULL;
++ IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT);
+
+- sub_frag_mem_limit(qp->q.net, head->truesize);
++ if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments)
++ goto out;
+
+- head->dev = dev_get_by_index_rcu(net, qp->iif);
+- if (!head->dev)
+- goto out;
++ head->dev = dev_get_by_index_rcu(net, qp->iif);
++ if (!head->dev)
++ goto out;
+
+
+- /* skb has no dst, perform route lookup again */
+- iph = ip_hdr(head);
+- err = ip_route_input_noref(head, iph->daddr, iph->saddr,
++ /* skb has no dst, perform route lookup again */
++ iph = ip_hdr(head);
++ err = ip_route_input_noref(head, iph->daddr, iph->saddr,
+ iph->tos, head->dev);
+- if (err)
+- goto out;
++ if (err)
++ goto out;
+
+- /* Only an end host needs to send an ICMP
+- * "Fragment Reassembly Timeout" message, per RFC792.
+- */
+- if (frag_expire_skip_icmp(qp->q.key.v4.user) &&
+- (skb_rtable(head)->rt_type != RTN_LOCAL))
+- goto out;
++ /* Only an end host needs to send an ICMP
++ * "Fragment Reassembly Timeout" message, per RFC792.
++ */
++ if (frag_expire_skip_icmp(qp->user) &&
++ (skb_rtable(head)->rt_type != RTN_LOCAL))
++ goto out;
+
+- spin_unlock(&qp->q.lock);
+- icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
+- goto out_rcu_unlock;
++ clone = skb_clone(head, GFP_ATOMIC);
+
++ /* Send an ICMP "Fragment Reassembly Timeout" message. */
++ if (clone) {
++ spin_unlock(&qp->q.lock);
++ icmp_send(clone, ICMP_TIME_EXCEEDED,
++ ICMP_EXC_FRAGTIME, 0);
++ consume_skb(clone);
++ goto out_rcu_unlock;
++ }
++ }
+ out:
+ spin_unlock(&qp->q.lock);
+ out_rcu_unlock:
+ rcu_read_unlock();
+- if (head)
+- kfree_skb(head);
+ ipq_put(qp);
+ }
+
+@@ -271,20 +262,21 @@ out_rcu_unlock:
+ static struct ipq *ip_find(struct net *net, struct iphdr *iph,
+ u32 user, int vif)
+ {
+- struct frag_v4_compare_key key = {
+- .saddr = iph->saddr,
+- .daddr = iph->daddr,
+- .user = user,
+- .vif = vif,
+- .id = iph->id,
+- .protocol = iph->protocol,
+- };
+ struct inet_frag_queue *q;
++ struct ip4_create_arg arg;
++ unsigned int hash;
+
+- q = inet_frag_find(&net->ipv4.frags, &key);
+- if (!q)
+- return NULL;
++ arg.iph = iph;
++ arg.user = user;
++ arg.vif = vif;
++
++ hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
+
++ q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
++ if (IS_ERR_OR_NULL(q)) {
++ inet_frag_maybe_warn_overflow(q, pr_fmt());
++ return NULL;
++ }
+ return container_of(q, struct ipq, q);
+ }
+
+@@ -304,7 +296,7 @@ static int ip_frag_too_far(struct ipq *qp)
+ end = atomic_inc_return(&peer->rid);
+ qp->rid = end;
+
+- rc = qp->q.fragments_tail && (end - start) > max;
++ rc = qp->q.fragments && (end - start) > max;
+
+ if (rc) {
+ struct net *net;
+@@ -318,6 +310,7 @@ static int ip_frag_too_far(struct ipq *qp)
+
+ static int ip_frag_reinit(struct ipq *qp)
+ {
++ struct sk_buff *fp;
+ unsigned int sum_truesize = 0;
+
+ if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) {
+@@ -325,16 +318,21 @@ static int ip_frag_reinit(struct ipq *qp)
+ return -ETIMEDOUT;
+ }
+
+- sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments);
++ fp = qp->q.fragments;
++ do {
++ struct sk_buff *xp = fp->next;
++
++ sum_truesize += fp->truesize;
++ kfree_skb(fp);
++ fp = xp;
++ } while (fp);
+ sub_frag_mem_limit(qp->q.net, sum_truesize);
+
+ qp->q.flags = 0;
+ qp->q.len = 0;
+ qp->q.meat = 0;
+ qp->q.fragments = NULL;
+- qp->q.rb_fragments = RB_ROOT;
+ qp->q.fragments_tail = NULL;
+- qp->q.last_run_head = NULL;
+ qp->iif = 0;
+ qp->ecn = 0;
+
+@@ -344,13 +342,11 @@ static int ip_frag_reinit(struct ipq *qp)
+ /* Add new segment to existing queue. */
+ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+ {
+- struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
+- struct rb_node **rbn, *parent;
+- struct sk_buff *skb1, *prev_tail;
+- int ihl, end, skb1_run_end;
++ struct sk_buff *prev, *next;
+ struct net_device *dev;
+ unsigned int fragsize;
+ int flags, offset;
++ int ihl, end;
+ int err = -ENOENT;
+ u8 ecn;
+
+@@ -409,68 +405,94 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+ if (err)
+ goto err;
+
+- /* Note : skb->rbnode and skb->dev share the same location. */
+- dev = skb->dev;
+- /* Makes sure compiler wont do silly aliasing games */
+- barrier();
+-
+- /* RFC5722, Section 4, amended by Errata ID : 3089
+- * When reassembling an IPv6 datagram, if
+- * one or more its constituent fragments is determined to be an
+- * overlapping fragment, the entire datagram (and any constituent
+- * fragments) MUST be silently discarded.
+- *
+- * We do the same here for IPv4 (and increment an snmp counter) but
+- * we do not want to drop the whole queue in response to a duplicate
+- * fragment.
++ /* Find out which fragments are in front and at the back of us
++ * in the chain of fragments so far. We must know where to put
++ * this fragment, right?
+ */
++ prev = qp->q.fragments_tail;
++ if (!prev || FRAG_CB(prev)->offset < offset) {
++ next = NULL;
++ goto found;
++ }
++ prev = NULL;
++ for (next = qp->q.fragments; next != NULL; next = next->next) {
++ if (FRAG_CB(next)->offset >= offset)
++ break; /* bingo! */
++ prev = next;
++ }
+
+- err = -EINVAL;
+- /* Find out where to put this fragment. */
+- prev_tail = qp->q.fragments_tail;
+- if (!prev_tail)
+- ip4_frag_create_run(&qp->q, skb); /* First fragment. */
+- else if (prev_tail->ip_defrag_offset + prev_tail->len < end) {
+- /* This is the common case: skb goes to the end. */
+- /* Detect and discard overlaps. */
+- if (offset < prev_tail->ip_defrag_offset + prev_tail->len)
+- goto discard_qp;
+- if (offset == prev_tail->ip_defrag_offset + prev_tail->len)
+- ip4_frag_append_to_last_run(&qp->q, skb);
+- else
+- ip4_frag_create_run(&qp->q, skb);
+- } else {
+- /* Binary search. Note that skb can become the first fragment,
+- * but not the last (covered above).
+- */
+- rbn = &qp->q.rb_fragments.rb_node;
+- do {
+- parent = *rbn;
+- skb1 = rb_to_skb(parent);
+- skb1_run_end = skb1->ip_defrag_offset +
+- FRAG_CB(skb1)->frag_run_len;
+- if (end <= skb1->ip_defrag_offset)
+- rbn = &parent->rb_left;
+- else if (offset >= skb1_run_end)
+- rbn = &parent->rb_right;
+- else if (offset >= skb1->ip_defrag_offset &&
+- end <= skb1_run_end)
+- goto err; /* No new data, potential duplicate */
++found:
++ /* We found where to put this one. Check for overlap with
++ * preceding fragment, and, if needed, align things so that
++ * any overlaps are eliminated.
++ */
++ if (prev) {
++ int i = (FRAG_CB(prev)->offset + prev->len) - offset;
++
++ if (i > 0) {
++ offset += i;
++ err = -EINVAL;
++ if (end <= offset)
++ goto err;
++ err = -ENOMEM;
++ if (!pskb_pull(skb, i))
++ goto err;
++ if (skb->ip_summed != CHECKSUM_UNNECESSARY)
++ skb->ip_summed = CHECKSUM_NONE;
++ }
++ }
++
++ err = -ENOMEM;
++
++ while (next && FRAG_CB(next)->offset < end) {
++ int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */
++
++ if (i < next->len) {
++ /* Eat head of the next overlapped fragment
++ * and leave the loop. The next ones cannot overlap.
++ */
++ if (!pskb_pull(next, i))
++ goto err;
++ FRAG_CB(next)->offset += i;
++ qp->q.meat -= i;
++ if (next->ip_summed != CHECKSUM_UNNECESSARY)
++ next->ip_summed = CHECKSUM_NONE;
++ break;
++ } else {
++ struct sk_buff *free_it = next;
++
++ /* Old fragment is completely overridden with
++ * new one drop it.
++ */
++ next = next->next;
++
++ if (prev)
++ prev->next = next;
+ else
+- goto discard_qp; /* Found an overlap */
+- } while (*rbn);
+- /* Here we have parent properly set, and rbn pointing to
+- * one of its NULL left/right children. Insert skb.
+- */
+- ip4_frag_init_run(skb);
+- rb_link_node(&skb->rbnode, parent, rbn);
+- rb_insert_color(&skb->rbnode, &qp->q.rb_fragments);
++ qp->q.fragments = next;
++
++ qp->q.meat -= free_it->len;
++ sub_frag_mem_limit(qp->q.net, free_it->truesize);
++ kfree_skb(free_it);
++ }
+ }
+
+- if (dev)
+- qp->iif = dev->ifindex;
+- skb->ip_defrag_offset = offset;
++ FRAG_CB(skb)->offset = offset;
++
++ /* Insert this fragment in the chain of fragments. */
++ skb->next = next;
++ if (!next)
++ qp->q.fragments_tail = skb;
++ if (prev)
++ prev->next = skb;
++ else
++ qp->q.fragments = skb;
+
++ dev = skb->dev;
++ if (dev) {
++ qp->iif = dev->ifindex;
++ skb->dev = NULL;
++ }
+ qp->q.stamp = skb->tstamp;
+ qp->q.meat += skb->len;
+ qp->ecn |= ecn;
+@@ -492,7 +514,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+ unsigned long orefdst = skb->_skb_refdst;
+
+ skb->_skb_refdst = 0UL;
+- err = ip_frag_reasm(qp, skb, prev_tail, dev);
++ err = ip_frag_reasm(qp, prev, dev);
+ skb->_skb_refdst = orefdst;
+ return err;
+ }
+@@ -500,23 +522,20 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+ skb_dst_drop(skb);
+ return -EINPROGRESS;
+
+-discard_qp:
+- inet_frag_kill(&qp->q);
+- IP_INC_STATS_BH(net, IPSTATS_MIB_REASM_OVERLAPS);
+ err:
+ kfree_skb(skb);
+ return err;
+ }
+
++
+ /* Build a new IP datagram from all its fragments. */
+-static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
+- struct sk_buff *prev_tail, struct net_device *dev)
++
++static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
++ struct net_device *dev)
+ {
+ struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
+ struct iphdr *iph;
+- struct sk_buff *fp, *head = skb_rb_first(&qp->q.rb_fragments);
+- struct sk_buff **nextp; /* To build frag_list. */
+- struct rb_node *rbn;
++ struct sk_buff *fp, *head = qp->q.fragments;
+ int len;
+ int ihlen;
+ int err;
+@@ -530,27 +549,26 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
+ goto out_fail;
+ }
+ /* Make the one we just received the head. */
+- if (head != skb) {
+- fp = skb_clone(skb, GFP_ATOMIC);
++ if (prev) {
++ head = prev->next;
++ fp = skb_clone(head, GFP_ATOMIC);
+ if (!fp)
+ goto out_nomem;
+- FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag;
+- if (RB_EMPTY_NODE(&skb->rbnode))
+- FRAG_CB(prev_tail)->next_frag = fp;
+- else
+- rb_replace_node(&skb->rbnode, &fp->rbnode,
+- &qp->q.rb_fragments);
+- if (qp->q.fragments_tail == skb)
++
++ fp->next = head->next;
++ if (!fp->next)
+ qp->q.fragments_tail = fp;
+- skb_morph(skb, head);
+- FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag;
+- rb_replace_node(&head->rbnode, &skb->rbnode,
+- &qp->q.rb_fragments);
+- consume_skb(head);
+- head = skb;
++ prev->next = fp;
++
++ skb_morph(head, qp->q.fragments);
++ head->next = qp->q.fragments->next;
++
++ consume_skb(qp->q.fragments);
++ qp->q.fragments = head;
+ }
+
+- WARN_ON(head->ip_defrag_offset != 0);
++ WARN_ON(!head);
++ WARN_ON(FRAG_CB(head)->offset != 0);
+
+ /* Allocate a new buffer for the datagram. */
+ ihlen = ip_hdrlen(head);
+@@ -574,61 +592,35 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
+ clone = alloc_skb(0, GFP_ATOMIC);
+ if (!clone)
+ goto out_nomem;
++ clone->next = head->next;
++ head->next = clone;
+ skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
+ skb_frag_list_init(head);
+ for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
+ plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
+ clone->len = clone->data_len = head->data_len - plen;
+- head->truesize += clone->truesize;
++ head->data_len -= clone->len;
++ head->len -= clone->len;
+ clone->csum = 0;
+ clone->ip_summed = head->ip_summed;
+ add_frag_mem_limit(qp->q.net, clone->truesize);
+- skb_shinfo(head)->frag_list = clone;
+- nextp = &clone->next;
+- } else {
+- nextp = &skb_shinfo(head)->frag_list;
+ }
+
++ skb_shinfo(head)->frag_list = head->next;
+ skb_push(head, head->data - skb_network_header(head));
+
+- /* Traverse the tree in order, to build frag_list. */
+- fp = FRAG_CB(head)->next_frag;
+- rbn = rb_next(&head->rbnode);
+- rb_erase(&head->rbnode, &qp->q.rb_fragments);
+- while (rbn || fp) {
+- /* fp points to the next sk_buff in the current run;
+- * rbn points to the next run.
+- */
+- /* Go through the current run. */
+- while (fp) {
+- *nextp = fp;
+- nextp = &fp->next;
+- fp->prev = NULL;
+- memset(&fp->rbnode, 0, sizeof(fp->rbnode));
+- fp->sk = NULL;
+- head->data_len += fp->len;
+- head->len += fp->len;
+- if (head->ip_summed != fp->ip_summed)
+- head->ip_summed = CHECKSUM_NONE;
+- else if (head->ip_summed == CHECKSUM_COMPLETE)
+- head->csum = csum_add(head->csum, fp->csum);
+- head->truesize += fp->truesize;
+- fp = FRAG_CB(fp)->next_frag;
+- }
+- /* Move to the next run. */
+- if (rbn) {
+- struct rb_node *rbnext = rb_next(rbn);
+-
+- fp = rb_to_skb(rbn);
+- rb_erase(rbn, &qp->q.rb_fragments);
+- rbn = rbnext;
+- }
++ for (fp=head->next; fp; fp = fp->next) {
++ head->data_len += fp->len;
++ head->len += fp->len;
++ if (head->ip_summed != fp->ip_summed)
++ head->ip_summed = CHECKSUM_NONE;
++ else if (head->ip_summed == CHECKSUM_COMPLETE)
++ head->csum = csum_add(head->csum, fp->csum);
++ head->truesize += fp->truesize;
+ }
+ sub_frag_mem_limit(qp->q.net, head->truesize);
+
+- *nextp = NULL;
+ head->next = NULL;
+- head->prev = NULL;
+ head->dev = dev;
+ head->tstamp = qp->q.stamp;
+ IPCB(head)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
+@@ -656,9 +648,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
+
+ IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS);
+ qp->q.fragments = NULL;
+- qp->q.rb_fragments = RB_ROOT;
+ qp->q.fragments_tail = NULL;
+- qp->q.last_run_head = NULL;
+ return 0;
+
+ out_nomem:
+@@ -666,7 +656,7 @@ out_nomem:
+ err = -ENOMEM;
+ goto out_fail;
+ out_oversize:
+- net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->q.key.v4.saddr);
++ net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->saddr);
+ out_fail:
+ IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
+ return err;
+@@ -744,46 +734,25 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
+ }
+ EXPORT_SYMBOL(ip_check_defrag);
+
+-unsigned int inet_frag_rbtree_purge(struct rb_root *root)
+-{
+- struct rb_node *p = rb_first(root);
+- unsigned int sum = 0;
+-
+- while (p) {
+- struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
+-
+- p = rb_next(p);
+- rb_erase(&skb->rbnode, root);
+- while (skb) {
+- struct sk_buff *next = FRAG_CB(skb)->next_frag;
+-
+- sum += skb->truesize;
+- kfree_skb(skb);
+- skb = next;
+- }
+- }
+- return sum;
+-}
+-EXPORT_SYMBOL(inet_frag_rbtree_purge);
+-
+ #ifdef CONFIG_SYSCTL
+-static int dist_min;
++static int zero;
+
+ static struct ctl_table ip4_frags_ns_ctl_table[] = {
+ {
+ .procname = "ipfrag_high_thresh",
+ .data = &init_net.ipv4.frags.high_thresh,
+- .maxlen = sizeof(unsigned long),
++ .maxlen = sizeof(int),
+ .mode = 0644,
+- .proc_handler = proc_doulongvec_minmax,
++ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &init_net.ipv4.frags.low_thresh
+ },
+ {
+ .procname = "ipfrag_low_thresh",
+ .data = &init_net.ipv4.frags.low_thresh,
+- .maxlen = sizeof(unsigned long),
++ .maxlen = sizeof(int),
+ .mode = 0644,
+- .proc_handler = proc_doulongvec_minmax,
++ .proc_handler = proc_dointvec_minmax,
++ .extra1 = &zero,
+ .extra2 = &init_net.ipv4.frags.high_thresh
+ },
+ {
+@@ -812,7 +781,7 @@ static struct ctl_table ip4_frags_ctl_table[] = {
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+- .extra1 = &dist_min,
++ .extra1 = &zero
+ },
+ { }
+ };
+@@ -884,8 +853,6 @@ static void __init ip4_frags_ctl_register(void)
+
+ static int __net_init ipv4_frags_init_net(struct net *net)
+ {
+- int res;
+-
+ /* Fragment cache limits.
+ *
+ * The fragment memory accounting code, (tries to) account for
+@@ -909,21 +876,15 @@ static int __net_init ipv4_frags_init_net(struct net *net)
+ */
+ net->ipv4.frags.timeout = IP_FRAG_TIME;
+
+- net->ipv4.frags.f = &ip4_frags;
++ inet_frags_init_net(&net->ipv4.frags);
+
+- res = inet_frags_init_net(&net->ipv4.frags);
+- if (res < 0)
+- return res;
+- res = ip4_frags_ns_ctl_register(net);
+- if (res < 0)
+- inet_frags_exit_net(&net->ipv4.frags);
+- return res;
++ return ip4_frags_ns_ctl_register(net);
+ }
+
+ static void __net_exit ipv4_frags_exit_net(struct net *net)
+ {
+ ip4_frags_ns_ctl_unregister(net);
+- inet_frags_exit_net(&net->ipv4.frags);
++ inet_frags_exit_net(&net->ipv4.frags, &ip4_frags);
+ }
+
+ static struct pernet_operations ip4_frags_ops = {
+@@ -931,50 +892,18 @@ static struct pernet_operations ip4_frags_ops = {
+ .exit = ipv4_frags_exit_net,
+ };
+
+-
+-static u32 ip4_key_hashfn(const void *data, u32 len, u32 seed)
+-{
+- return jhash2(data,
+- sizeof(struct frag_v4_compare_key) / sizeof(u32), seed);
+-}
+-
+-static u32 ip4_obj_hashfn(const void *data, u32 len, u32 seed)
+-{
+- const struct inet_frag_queue *fq = data;
+-
+- return jhash2((const u32 *)&fq->key.v4,
+- sizeof(struct frag_v4_compare_key) / sizeof(u32), seed);
+-}
+-
+-static int ip4_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
+-{
+- const struct frag_v4_compare_key *key = arg->key;
+- const struct inet_frag_queue *fq = ptr;
+-
+- return !!memcmp(&fq->key, key, sizeof(*key));
+-}
+-
+-static const struct rhashtable_params ip4_rhash_params = {
+- .head_offset = offsetof(struct inet_frag_queue, node),
+- .key_offset = offsetof(struct inet_frag_queue, key),
+- .key_len = sizeof(struct frag_v4_compare_key),
+- .hashfn = ip4_key_hashfn,
+- .obj_hashfn = ip4_obj_hashfn,
+- .obj_cmpfn = ip4_obj_cmpfn,
+- .automatic_shrinking = true,
+-};
+-
+ void __init ipfrag_init(void)
+ {
++ ip4_frags_ctl_register();
++ register_pernet_subsys(&ip4_frags_ops);
++ ip4_frags.hashfn = ip4_hashfn;
+ ip4_frags.constructor = ip4_frag_init;
+ ip4_frags.destructor = ip4_frag_free;
+ ip4_frags.skb_free = NULL;
+ ip4_frags.qsize = sizeof(struct ipq);
++ ip4_frags.match = ip4_frag_match;
+ ip4_frags.frag_expire = ip_expire;
+ ip4_frags.frags_cache_name = ip_frag_cache_name;
+- ip4_frags.rhash_params = ip4_rhash_params;
+ if (inet_frags_init(&ip4_frags))
+ panic("IP: failed to allocate ip4_frags cache\n");
+- ip4_frags_ctl_register();
+- register_pernet_subsys(&ip4_frags_ops);
+ }
+diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
+index b001ad668108..3abd9d7a3adf 100644
+--- a/net/ipv4/proc.c
++++ b/net/ipv4/proc.c
+@@ -52,6 +52,7 @@
+ static int sockstat_seq_show(struct seq_file *seq, void *v)
+ {
+ struct net *net = seq->private;
++ unsigned int frag_mem;
+ int orphans, sockets;
+
+ local_bh_disable();
+@@ -71,9 +72,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
+ sock_prot_inuse_get(net, &udplite_prot));
+ seq_printf(seq, "RAW: inuse %d\n",
+ sock_prot_inuse_get(net, &raw_prot));
+- seq_printf(seq, "FRAG: inuse %u memory %lu\n",
+- atomic_read(&net->ipv4.frags.rhashtable.nelems),
+- frag_mem_limit(&net->ipv4.frags));
++ frag_mem = ip_frag_mem(net);
++ seq_printf(seq, "FRAG: inuse %u memory %u\n", !!frag_mem, frag_mem);
+ return 0;
+ }
+
+@@ -132,7 +132,6 @@ static const struct snmp_mib snmp4_ipextstats_list[] = {
+ SNMP_MIB_ITEM("InECT1Pkts", IPSTATS_MIB_ECT1PKTS),
+ SNMP_MIB_ITEM("InECT0Pkts", IPSTATS_MIB_ECT0PKTS),
+ SNMP_MIB_ITEM("InCEPkts", IPSTATS_MIB_CEPKTS),
+- SNMP_MIB_ITEM("ReasmOverlaps", IPSTATS_MIB_REASM_OVERLAPS),
+ SNMP_MIB_SENTINEL
+ };
+
+diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
+index 664c84e47bab..5a9ae56e7868 100644
+--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
+@@ -64,6 +64,7 @@ struct nf_ct_frag6_skb_cb
+ static struct inet_frags nf_frags;
+
+ #ifdef CONFIG_SYSCTL
++static int zero;
+
+ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
+ {
+@@ -76,17 +77,18 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
+ {
+ .procname = "nf_conntrack_frag6_low_thresh",
+ .data = &init_net.nf_frag.frags.low_thresh,
+- .maxlen = sizeof(unsigned long),
++ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+- .proc_handler = proc_doulongvec_minmax,
++ .proc_handler = proc_dointvec_minmax,
++ .extra1 = &zero,
+ .extra2 = &init_net.nf_frag.frags.high_thresh
+ },
+ {
+ .procname = "nf_conntrack_frag6_high_thresh",
+ .data = &init_net.nf_frag.frags.high_thresh,
+- .maxlen = sizeof(unsigned long),
++ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+- .proc_handler = proc_doulongvec_minmax,
++ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &init_net.nf_frag.frags.low_thresh
+ },
+ { }
+@@ -151,6 +153,23 @@ static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
+ return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
+ }
+
++static unsigned int nf_hash_frag(__be32 id, const struct in6_addr *saddr,
++ const struct in6_addr *daddr)
++{
++ net_get_random_once(&nf_frags.rnd, sizeof(nf_frags.rnd));
++ return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
++ (__force u32)id, nf_frags.rnd);
++}
++
++
++static unsigned int nf_hashfn(const struct inet_frag_queue *q)
++{
++ const struct frag_queue *nq;
++
++ nq = container_of(q, struct frag_queue, q);
++ return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr);
++}
++
+ static void nf_skb_free(struct sk_buff *skb)
+ {
+ if (NFCT_FRAG6_CB(skb)->orig)
+@@ -165,26 +184,34 @@ static void nf_ct_frag6_expire(unsigned long data)
+ fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+ net = container_of(fq->q.net, struct net, nf_frag.frags);
+
+- ip6_expire_frag_queue(net, fq);
++ ip6_expire_frag_queue(net, fq, &nf_frags);
+ }
+
+ /* Creation primitives. */
+-static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
+- const struct ipv6hdr *hdr, int iif)
++static inline struct frag_queue *fq_find(struct net *net, __be32 id,
++ u32 user, struct in6_addr *src,
++ struct in6_addr *dst, int iif, u8 ecn)
+ {
+- struct frag_v6_compare_key key = {
+- .id = id,
+- .saddr = hdr->saddr,
+- .daddr = hdr->daddr,
+- .user = user,
+- .iif = iif,
+- };
+ struct inet_frag_queue *q;
+-
+- q = inet_frag_find(&net->nf_frag.frags, &key);
+- if (!q)
++ struct ip6_create_arg arg;
++ unsigned int hash;
++
++ arg.id = id;
++ arg.user = user;
++ arg.src = src;
++ arg.dst = dst;
++ arg.iif = iif;
++ arg.ecn = ecn;
++
++ local_bh_disable();
++ hash = nf_hash_frag(id, src, dst);
++
++ q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash);
++ local_bh_enable();
++ if (IS_ERR_OR_NULL(q)) {
++ inet_frag_maybe_warn_overflow(q, pr_fmt());
+ return NULL;
+-
++ }
+ return container_of(q, struct frag_queue, q);
+ }
+
+@@ -335,7 +362,7 @@ found:
+ return 0;
+
+ discard_fq:
+- inet_frag_kill(&fq->q);
++ inet_frag_kill(&fq->q, &nf_frags);
+ err:
+ return -1;
+ }
+@@ -356,7 +383,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
+ int payload_len;
+ u8 ecn;
+
+- inet_frag_kill(&fq->q);
++ inet_frag_kill(&fq->q, &nf_frags);
+
+ WARN_ON(head == NULL);
+ WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
+@@ -427,7 +454,6 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
+ else if (head->ip_summed == CHECKSUM_COMPLETE)
+ head->csum = csum_add(head->csum, fp->csum);
+ head->truesize += fp->truesize;
+- fp->sk = NULL;
+ }
+ sub_frag_mem_limit(fq->q.net, head->truesize);
+
+@@ -446,7 +472,6 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
+ head->csum);
+
+ fq->q.fragments = NULL;
+- fq->q.rb_fragments = RB_ROOT;
+ fq->q.fragments_tail = NULL;
+
+ /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */
+@@ -576,13 +601,9 @@ struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 use
+ hdr = ipv6_hdr(clone);
+ fhdr = (struct frag_hdr *)skb_transport_header(clone);
+
+- if (clone->len - skb_network_offset(clone) < IPV6_MIN_MTU &&
+- fhdr->frag_off & htons(IP6_MF))
+- goto ret_orig;
+-
+ skb_orphan(skb);
+- fq = fq_find(net, fhdr->identification, user, hdr,
+- skb->dev ? skb->dev->ifindex : 0);
++ fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
++ skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
+ if (fq == NULL) {
+ pr_debug("Can't find and can't create new queue\n");
+ goto ret_orig;
+@@ -593,7 +614,7 @@ struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 use
+ if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) {
+ spin_unlock_bh(&fq->q.lock);
+ pr_debug("Can't insert skb to queue\n");
+- inet_frag_put(&fq->q);
++ inet_frag_put(&fq->q, &nf_frags);
+ goto ret_orig;
+ }
+
+@@ -605,7 +626,7 @@ struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 use
+ }
+ spin_unlock_bh(&fq->q.lock);
+
+- inet_frag_put(&fq->q);
++ inet_frag_put(&fq->q, &nf_frags);
+ return ret_skb;
+
+ ret_orig:
+@@ -629,26 +650,18 @@ EXPORT_SYMBOL_GPL(nf_ct_frag6_consume_orig);
+
+ static int nf_ct_net_init(struct net *net)
+ {
+- int res;
+-
+ net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+ net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
+ net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
+- net->nf_frag.frags.f = &nf_frags;
+-
+- res = inet_frags_init_net(&net->nf_frag.frags);
+- if (res < 0)
+- return res;
+- res = nf_ct_frag6_sysctl_register(net);
+- if (res < 0)
+- inet_frags_exit_net(&net->nf_frag.frags);
+- return res;
++ inet_frags_init_net(&net->nf_frag.frags);
++
++ return nf_ct_frag6_sysctl_register(net);
+ }
+
+ static void nf_ct_net_exit(struct net *net)
+ {
+ nf_ct_frags6_sysctl_unregister(net);
+- inet_frags_exit_net(&net->nf_frag.frags);
++ inet_frags_exit_net(&net->nf_frag.frags, &nf_frags);
+ }
+
+ static struct pernet_operations nf_ct_net_ops = {
+@@ -660,13 +673,14 @@ int nf_ct_frag6_init(void)
+ {
+ int ret = 0;
+
++ nf_frags.hashfn = nf_hashfn;
+ nf_frags.constructor = ip6_frag_init;
+ nf_frags.destructor = NULL;
+ nf_frags.skb_free = nf_skb_free;
+ nf_frags.qsize = sizeof(struct frag_queue);
++ nf_frags.match = ip6_frag_match;
+ nf_frags.frag_expire = nf_ct_frag6_expire;
+ nf_frags.frags_cache_name = nf_frags_cache_name;
+- nf_frags.rhash_params = ip6_rhash_params;
+ ret = inet_frags_init(&nf_frags);
+ if (ret)
+ goto out;
+diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
+index 73e766e7bc37..679253d0af84 100644
+--- a/net/ipv6/proc.c
++++ b/net/ipv6/proc.c
+@@ -33,6 +33,7 @@
+ static int sockstat6_seq_show(struct seq_file *seq, void *v)
+ {
+ struct net *net = seq->private;
++ unsigned int frag_mem = ip6_frag_mem(net);
+
+ seq_printf(seq, "TCP6: inuse %d\n",
+ sock_prot_inuse_get(net, &tcpv6_prot));
+@@ -42,9 +43,7 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
+ sock_prot_inuse_get(net, &udplitev6_prot));
+ seq_printf(seq, "RAW6: inuse %d\n",
+ sock_prot_inuse_get(net, &rawv6_prot));
+- seq_printf(seq, "FRAG6: inuse %u memory %lu\n",
+- atomic_read(&net->ipv6.frags.rhashtable.nelems),
+- frag_mem_limit(&net->ipv6.frags));
++ seq_printf(seq, "FRAG6: inuse %u memory %u\n", !!frag_mem, frag_mem);
+ return 0;
+ }
+
+diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
+index ec917f58d105..58f2139ebb5e 100644
+--- a/net/ipv6/reassembly.c
++++ b/net/ipv6/reassembly.c
+@@ -79,58 +79,94 @@ static struct inet_frags ip6_frags;
+ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
+ struct net_device *dev);
+
++/*
++ * callers should be careful not to use the hash value outside the ipfrag_lock
++ * as doing so could race with ipfrag_hash_rnd being recalculated.
++ */
++static unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
++ const struct in6_addr *daddr)
++{
++ net_get_random_once(&ip6_frags.rnd, sizeof(ip6_frags.rnd));
++ return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
++ (__force u32)id, ip6_frags.rnd);
++}
++
++static unsigned int ip6_hashfn(const struct inet_frag_queue *q)
++{
++ const struct frag_queue *fq;
++
++ fq = container_of(q, struct frag_queue, q);
++ return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr);
++}
++
++bool ip6_frag_match(const struct inet_frag_queue *q, const void *a)
++{
++ const struct frag_queue *fq;
++ const struct ip6_create_arg *arg = a;
++
++ fq = container_of(q, struct frag_queue, q);
++ return fq->id == arg->id &&
++ fq->user == arg->user &&
++ ipv6_addr_equal(&fq->saddr, arg->src) &&
++ ipv6_addr_equal(&fq->daddr, arg->dst) &&
++ (arg->iif == fq->iif ||
++ !(ipv6_addr_type(arg->dst) & (IPV6_ADDR_MULTICAST |
++ IPV6_ADDR_LINKLOCAL)));
++}
++EXPORT_SYMBOL(ip6_frag_match);
++
+ void ip6_frag_init(struct inet_frag_queue *q, const void *a)
+ {
+ struct frag_queue *fq = container_of(q, struct frag_queue, q);
+- const struct frag_v6_compare_key *key = a;
++ const struct ip6_create_arg *arg = a;
+
+- q->key.v6 = *key;
+- fq->ecn = 0;
++ fq->id = arg->id;
++ fq->user = arg->user;
++ fq->saddr = *arg->src;
++ fq->daddr = *arg->dst;
++ fq->ecn = arg->ecn;
+ }
+ EXPORT_SYMBOL(ip6_frag_init);
+
+-void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq)
++void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
++ struct inet_frags *frags)
+ {
+ struct net_device *dev = NULL;
+- struct sk_buff *head;
+
+- rcu_read_lock();
+ spin_lock(&fq->q.lock);
+
+ if (fq->q.flags & INET_FRAG_COMPLETE)
+ goto out;
+
+- inet_frag_kill(&fq->q);
++ inet_frag_kill(&fq->q, frags);
+
++ rcu_read_lock();
+ dev = dev_get_by_index_rcu(net, fq->iif);
+ if (!dev)
+- goto out;
++ goto out_rcu_unlock;
+
+ IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
++
++ if (inet_frag_evicting(&fq->q))
++ goto out_rcu_unlock;
++
+ IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
+
+ /* Don't send error if the first segment did not arrive. */
+- head = fq->q.fragments;
+- if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head)
+- goto out;
++ if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !fq->q.fragments)
++ goto out_rcu_unlock;
+
+ /* But use as source device on which LAST ARRIVED
+ * segment was received. And do not use fq->dev
+ * pointer directly, device might already disappeared.
+ */
+- head->dev = dev;
+- skb_get(head);
+- spin_unlock(&fq->q.lock);
+-
+- icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
+- kfree_skb(head);
+- goto out_rcu_unlock;
+-
+-out:
+- spin_unlock(&fq->q.lock);
++ fq->q.fragments->dev = dev;
++ icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
+ out_rcu_unlock:
+ rcu_read_unlock();
+- inet_frag_put(&fq->q);
++out:
++ spin_unlock(&fq->q.lock);
++ inet_frag_put(&fq->q, frags);
+ }
+ EXPORT_SYMBOL(ip6_expire_frag_queue);
+
+@@ -142,29 +178,31 @@ static void ip6_frag_expire(unsigned long data)
+ fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+ net = container_of(fq->q.net, struct net, ipv6.frags);
+
+- ip6_expire_frag_queue(net, fq);
++ ip6_expire_frag_queue(net, fq, &ip6_frags);
+ }
+
+ static struct frag_queue *
+-fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif)
++fq_find(struct net *net, __be32 id, const struct in6_addr *src,
++ const struct in6_addr *dst, int iif, u8 ecn)
+ {
+- struct frag_v6_compare_key key = {
+- .id = id,
+- .saddr = hdr->saddr,
+- .daddr = hdr->daddr,
+- .user = IP6_DEFRAG_LOCAL_DELIVER,
+- .iif = iif,
+- };
+ struct inet_frag_queue *q;
++ struct ip6_create_arg arg;
++ unsigned int hash;
+
+- if (!(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_MULTICAST |
+- IPV6_ADDR_LINKLOCAL)))
+- key.iif = 0;
++ arg.id = id;
++ arg.user = IP6_DEFRAG_LOCAL_DELIVER;
++ arg.src = src;
++ arg.dst = dst;
++ arg.iif = iif;
++ arg.ecn = ecn;
+
+- q = inet_frag_find(&net->ipv6.frags, &key);
+- if (!q)
+- return NULL;
++ hash = inet6_hash_frag(id, src, dst);
+
++ q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash);
++ if (IS_ERR_OR_NULL(q)) {
++ inet_frag_maybe_warn_overflow(q, pr_fmt());
++ return NULL;
++ }
+ return container_of(q, struct frag_queue, q);
+ }
+
+@@ -321,7 +359,7 @@ found:
+ return -1;
+
+ discard_fq:
+- inet_frag_kill(&fq->q);
++ inet_frag_kill(&fq->q, &ip6_frags);
+ err:
+ IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_REASMFAILS);
+@@ -348,7 +386,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
+ int sum_truesize;
+ u8 ecn;
+
+- inet_frag_kill(&fq->q);
++ inet_frag_kill(&fq->q, &ip6_frags);
+
+ ecn = ip_frag_ecn_table[fq->ecn];
+ if (unlikely(ecn == 0xff))
+@@ -465,7 +503,6 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
+ IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
+ rcu_read_unlock();
+ fq->q.fragments = NULL;
+- fq->q.rb_fragments = RB_ROOT;
+ fq->q.fragments_tail = NULL;
+ return 1;
+
+@@ -487,7 +524,6 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
+ struct frag_queue *fq;
+ const struct ipv6hdr *hdr = ipv6_hdr(skb);
+ struct net *net = dev_net(skb_dst(skb)->dev);
+- int iif;
+
+ if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
+ goto fail_hdr;
+@@ -516,22 +552,17 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
+ return 1;
+ }
+
+- if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU &&
+- fhdr->frag_off & htons(IP6_MF))
+- goto fail_hdr;
+-
+- iif = skb->dev ? skb->dev->ifindex : 0;
+- fq = fq_find(net, fhdr->identification, hdr, iif);
++ fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
++ skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
+ if (fq) {
+ int ret;
+
+ spin_lock(&fq->q.lock);
+
+- fq->iif = iif;
+ ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
+
+ spin_unlock(&fq->q.lock);
+- inet_frag_put(&fq->q);
++ inet_frag_put(&fq->q, &ip6_frags);
+ return ret;
+ }
+
+@@ -552,22 +583,24 @@ static const struct inet6_protocol frag_protocol = {
+ };
+
+ #ifdef CONFIG_SYSCTL
++static int zero;
+
+ static struct ctl_table ip6_frags_ns_ctl_table[] = {
+ {
+ .procname = "ip6frag_high_thresh",
+ .data = &init_net.ipv6.frags.high_thresh,
+- .maxlen = sizeof(unsigned long),
++ .maxlen = sizeof(int),
+ .mode = 0644,
+- .proc_handler = proc_doulongvec_minmax,
++ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &init_net.ipv6.frags.low_thresh
+ },
+ {
+ .procname = "ip6frag_low_thresh",
+ .data = &init_net.ipv6.frags.low_thresh,
+- .maxlen = sizeof(unsigned long),
++ .maxlen = sizeof(int),
+ .mode = 0644,
+- .proc_handler = proc_doulongvec_minmax,
++ .proc_handler = proc_dointvec_minmax,
++ .extra1 = &zero,
+ .extra2 = &init_net.ipv6.frags.high_thresh
+ },
+ {
+@@ -675,27 +708,19 @@ static void ip6_frags_sysctl_unregister(void)
+
+ static int __net_init ipv6_frags_init_net(struct net *net)
+ {
+- int res;
+-
+ net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+ net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
+ net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
+- net->ipv6.frags.f = &ip6_frags;
+
+- res = inet_frags_init_net(&net->ipv6.frags);
+- if (res < 0)
+- return res;
++ inet_frags_init_net(&net->ipv6.frags);
+
+- res = ip6_frags_ns_sysctl_register(net);
+- if (res < 0)
+- inet_frags_exit_net(&net->ipv6.frags);
+- return res;
++ return ip6_frags_ns_sysctl_register(net);
+ }
+
+ static void __net_exit ipv6_frags_exit_net(struct net *net)
+ {
+ ip6_frags_ns_sysctl_unregister(net);
+- inet_frags_exit_net(&net->ipv6.frags);
++ inet_frags_exit_net(&net->ipv6.frags, &ip6_frags);
+ }
+
+ static struct pernet_operations ip6_frags_ops = {
+@@ -703,54 +728,13 @@ static struct pernet_operations ip6_frags_ops = {
+ .exit = ipv6_frags_exit_net,
+ };
+
+-static u32 ip6_key_hashfn(const void *data, u32 len, u32 seed)
+-{
+- return jhash2(data,
+- sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
+-}
+-
+-static u32 ip6_obj_hashfn(const void *data, u32 len, u32 seed)
+-{
+- const struct inet_frag_queue *fq = data;
+-
+- return jhash2((const u32 *)&fq->key.v6,
+- sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
+-}
+-
+-static int ip6_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
+-{
+- const struct frag_v6_compare_key *key = arg->key;
+- const struct inet_frag_queue *fq = ptr;
+-
+- return !!memcmp(&fq->key, key, sizeof(*key));
+-}
+-
+-const struct rhashtable_params ip6_rhash_params = {
+- .head_offset = offsetof(struct inet_frag_queue, node),
+- .hashfn = ip6_key_hashfn,
+- .obj_hashfn = ip6_obj_hashfn,
+- .obj_cmpfn = ip6_obj_cmpfn,
+- .automatic_shrinking = true,
+-};
+-EXPORT_SYMBOL(ip6_rhash_params);
+-
+ int __init ipv6_frag_init(void)
+ {
+ int ret;
+
+- ip6_frags.constructor = ip6_frag_init;
+- ip6_frags.destructor = NULL;
+- ip6_frags.qsize = sizeof(struct frag_queue);
+- ip6_frags.frag_expire = ip6_frag_expire;
+- ip6_frags.frags_cache_name = ip6_frag_cache_name;
+- ip6_frags.rhash_params = ip6_rhash_params;
+- ret = inet_frags_init(&ip6_frags);
+- if (ret)
+- goto out;
+-
+ ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT);
+ if (ret)
+- goto err_protocol;
++ goto out;
+
+ ret = ip6_frags_sysctl_register();
+ if (ret)
+@@ -760,6 +744,17 @@ int __init ipv6_frag_init(void)
+ if (ret)
+ goto err_pernet;
+
++ ip6_frags.hashfn = ip6_hashfn;
++ ip6_frags.constructor = ip6_frag_init;
++ ip6_frags.destructor = NULL;
++ ip6_frags.skb_free = NULL;
++ ip6_frags.qsize = sizeof(struct frag_queue);
++ ip6_frags.match = ip6_frag_match;
++ ip6_frags.frag_expire = ip6_frag_expire;
++ ip6_frags.frags_cache_name = ip6_frag_cache_name;
++ ret = inet_frags_init(&ip6_frags);
++ if (ret)
++ goto err_pernet;
+ out:
+ return ret;
+
+@@ -767,8 +762,6 @@ err_pernet:
+ ip6_frags_sysctl_unregister();
+ err_sysctl:
+ inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
+-err_protocol:
+- inet_frags_fini(&ip6_frags);
+ goto out;
+ }
+
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-001-inet-frags-change-inet_frags_init_net-return-.patch b/patches.kernel.org/4.4.174-001-inet-frags-change-inet_frags_init_net-return-.patch
new file mode 100644
index 0000000000..a2efe9c0dc
--- /dev/null
+++ b/patches.kernel.org/4.4.174-001-inet-frags-change-inet_frags_init_net-return-.patch
@@ -0,0 +1,157 @@
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 10 Oct 2018 12:29:49 -0700
+Subject: [PATCH] inet: frags: change inet_frags_init_net() return value
+References: bnc#1012382
+Patch-mainline: 4.4.174
+Git-commit: 787bea7748a76130566f881c2342a0be4127d182
+
+commit 787bea7748a76130566f881c2342a0be4127d182 upstream.
+
+We will soon initialize one rhashtable per struct netns_frags
+in inet_frags_init_net().
+
+This patch changes the return value to eventually propagate an
+error.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ include/net/inet_frag.h | 3 ++-
+ net/ieee802154/6lowpan/reassembly.c | 11 ++++++++---
+ net/ipv4/ip_fragment.c | 12 +++++++++---
+ net/ipv6/netfilter/nf_conntrack_reasm.c | 12 +++++++++---
+ net/ipv6/reassembly.c | 11 +++++++++--
+ 5 files changed, 37 insertions(+), 12 deletions(-)
+
+diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
+index c26a6e4dc306..7881c80feefd 100644
+--- a/include/net/inet_frag.h
++++ b/include/net/inet_frag.h
+@@ -103,9 +103,10 @@ struct inet_frags {
+ int inet_frags_init(struct inet_frags *);
+ void inet_frags_fini(struct inet_frags *);
+
+-static inline void inet_frags_init_net(struct netns_frags *nf)
++static inline int inet_frags_init_net(struct netns_frags *nf)
+ {
+ atomic_set(&nf->mem, 0);
++ return 0;
+ }
+ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f);
+
+diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
+index 12e8cf4bda9f..e14962c1fca2 100644
+--- a/net/ieee802154/6lowpan/reassembly.c
++++ b/net/ieee802154/6lowpan/reassembly.c
+@@ -580,14 +580,19 @@ static int __net_init lowpan_frags_init_net(struct net *net)
+ {
+ struct netns_ieee802154_lowpan *ieee802154_lowpan =
+ net_ieee802154_lowpan(net);
++ int res;
+
+ ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+ ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
+ ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
+
+- inet_frags_init_net(&ieee802154_lowpan->frags);
+-
+- return lowpan_frags_ns_sysctl_register(net);
++ res = inet_frags_init_net(&ieee802154_lowpan->frags);
++ if (res < 0)
++ return res;
++ res = lowpan_frags_ns_sysctl_register(net);
++ if (res < 0)
++ inet_frags_exit_net(&ieee802154_lowpan->frags, &lowpan_frags);
++ return res;
+ }
+
+ static void __net_exit lowpan_frags_exit_net(struct net *net)
+diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
+index 72915658a6b1..14c40f799bd4 100644
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -853,6 +853,8 @@ static void __init ip4_frags_ctl_register(void)
+
+ static int __net_init ipv4_frags_init_net(struct net *net)
+ {
++ int res;
++
+ /* Fragment cache limits.
+ *
+ * The fragment memory accounting code, (tries to) account for
+@@ -876,9 +878,13 @@ static int __net_init ipv4_frags_init_net(struct net *net)
+ */
+ net->ipv4.frags.timeout = IP_FRAG_TIME;
+
+- inet_frags_init_net(&net->ipv4.frags);
+-
+- return ip4_frags_ns_ctl_register(net);
++ res = inet_frags_init_net(&net->ipv4.frags);
++ if (res < 0)
++ return res;
++ res = ip4_frags_ns_ctl_register(net);
++ if (res < 0)
++ inet_frags_exit_net(&net->ipv4.frags, &ip4_frags);
++ return res;
+ }
+
+ static void __net_exit ipv4_frags_exit_net(struct net *net)
+diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
+index 5a9ae56e7868..6859d1e084fe 100644
+--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
+@@ -650,12 +650,18 @@ EXPORT_SYMBOL_GPL(nf_ct_frag6_consume_orig);
+
+ static int nf_ct_net_init(struct net *net)
+ {
++ int res;
++
+ net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+ net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
+ net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
+- inet_frags_init_net(&net->nf_frag.frags);
+-
+- return nf_ct_frag6_sysctl_register(net);
++ res = inet_frags_init_net(&net->nf_frag.frags);
++ if (res < 0)
++ return res;
++ res = nf_ct_frag6_sysctl_register(net);
++ if (res < 0)
++ inet_frags_exit_net(&net->nf_frag.frags, &nf_frags);
++ return res;
+ }
+
+ static void nf_ct_net_exit(struct net *net)
+diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
+index 58f2139ebb5e..c38a1abb5c62 100644
+--- a/net/ipv6/reassembly.c
++++ b/net/ipv6/reassembly.c
+@@ -708,13 +708,20 @@ static void ip6_frags_sysctl_unregister(void)
+
+ static int __net_init ipv6_frags_init_net(struct net *net)
+ {
++ int res;
++
+ net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+ net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
+ net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
+
+- inet_frags_init_net(&net->ipv6.frags);
++ res = inet_frags_init_net(&net->ipv6.frags);
++ if (res < 0)
++ return res;
+
+- return ip6_frags_ns_sysctl_register(net);
++ res = ip6_frags_ns_sysctl_register(net);
++ if (res < 0)
++ inet_frags_exit_net(&net->ipv6.frags, &ip6_frags);
++ return res;
+ }
+
+ static void __net_exit ipv6_frags_exit_net(struct net *net)
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-002-inet-frags-add-a-pointer-to-struct-netns_frag.patch b/patches.kernel.org/4.4.174-002-inet-frags-add-a-pointer-to-struct-netns_frag.patch
new file mode 100644
index 0000000000..b12f11f045
--- /dev/null
+++ b/patches.kernel.org/4.4.174-002-inet-frags-add-a-pointer-to-struct-netns_frag.patch
@@ -0,0 +1,438 @@
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 10 Oct 2018 12:29:50 -0700
+Subject: [PATCH] inet: frags: add a pointer to struct netns_frags
+References: bnc#1012382
+Patch-mainline: 4.4.174
+Git-commit: 093ba72914b696521e4885756a68a3332782c8de
+
+commit 093ba72914b696521e4885756a68a3332782c8de upstream.
+
+In order to simplify the API, add a pointer to struct inet_frags.
+This will allow us to make things less complex.
+
+These functions no longer have a struct inet_frags parameter :
+
+inet_frag_destroy(struct inet_frag_queue *q /*, struct inet_frags *f */)
+inet_frag_put(struct inet_frag_queue *q /*, struct inet_frags *f */)
+inet_frag_kill(struct inet_frag_queue *q /*, struct inet_frags *f */)
+inet_frags_exit_net(struct netns_frags *nf /*, struct inet_frags *f */)
+ip6_expire_frag_queue(struct net *net, struct frag_queue *fq)
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+[bwh: Backported to 4.4: inet_frag_{kill,put}() are called in some
+ different places; update all calls]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ include/net/inet_frag.h | 11 ++++++-----
+ include/net/ipv6.h | 3 +--
+ net/ieee802154/6lowpan/reassembly.c | 13 +++++++------
+ net/ipv4/inet_fragment.c | 17 ++++++++++-------
+ net/ipv4/ip_fragment.c | 10 ++++++----
+ net/ipv6/netfilter/nf_conntrack_reasm.c | 16 +++++++++-------
+ net/ipv6/reassembly.c | 20 ++++++++++----------
+ 7 files changed, 49 insertions(+), 41 deletions(-)
+
+diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
+index 7881c80feefd..12589f08e064 100644
+--- a/include/net/inet_frag.h
++++ b/include/net/inet_frag.h
+@@ -8,6 +8,7 @@ struct netns_frags {
+ int timeout;
+ int high_thresh;
+ int low_thresh;
++ struct inet_frags *f;
+ };
+
+ /**
+@@ -108,20 +109,20 @@ static inline int inet_frags_init_net(struct netns_frags *nf)
+ atomic_set(&nf->mem, 0);
+ return 0;
+ }
+-void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f);
++void inet_frags_exit_net(struct netns_frags *nf);
+
+-void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f);
+-void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f);
++void inet_frag_kill(struct inet_frag_queue *q);
++void inet_frag_destroy(struct inet_frag_queue *q);
+ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
+ struct inet_frags *f, void *key, unsigned int hash);
+
+ void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
+ const char *prefix);
+
+-static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f)
++static inline void inet_frag_put(struct inet_frag_queue *q)
+ {
+ if (atomic_dec_and_test(&q->refcnt))
+- inet_frag_destroy(q, f);
++ inet_frag_destroy(q);
+ }
+
+ static inline bool inet_frag_evicting(struct inet_frag_queue *q)
+diff --git a/include/net/ipv6.h b/include/net/ipv6.h
+index 0e01d570fa22..9d8eace6d455 100644
+--- a/include/net/ipv6.h
++++ b/include/net/ipv6.h
+@@ -534,8 +534,7 @@ struct frag_queue {
+ u8 ecn;
+ };
+
+-void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
+- struct inet_frags *frags);
++void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq);
+
+ static inline bool ipv6_addr_any(const struct in6_addr *a)
+ {
+diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
+index e14962c1fca2..8c532743e917 100644
+--- a/net/ieee802154/6lowpan/reassembly.c
++++ b/net/ieee802154/6lowpan/reassembly.c
+@@ -93,10 +93,10 @@ static void lowpan_frag_expire(unsigned long data)
+ if (fq->q.flags & INET_FRAG_COMPLETE)
+ goto out;
+
+- inet_frag_kill(&fq->q, &lowpan_frags);
++ inet_frag_kill(&fq->q);
+ out:
+ spin_unlock(&fq->q.lock);
+- inet_frag_put(&fq->q, &lowpan_frags);
++ inet_frag_put(&fq->q);
+ }
+
+ static inline struct lowpan_frag_queue *
+@@ -229,7 +229,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
+ struct sk_buff *fp, *head = fq->q.fragments;
+ int sum_truesize;
+
+- inet_frag_kill(&fq->q, &lowpan_frags);
++ inet_frag_kill(&fq->q);
+
+ /* Make the one we just received the head. */
+ if (prev) {
+@@ -437,7 +437,7 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
+ ret = lowpan_frag_queue(fq, skb, frag_type);
+ spin_unlock(&fq->q.lock);
+
+- inet_frag_put(&fq->q, &lowpan_frags);
++ inet_frag_put(&fq->q);
+ return ret;
+ }
+
+@@ -585,13 +585,14 @@ static int __net_init lowpan_frags_init_net(struct net *net)
+ ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+ ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
+ ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
++ ieee802154_lowpan->frags.f = &lowpan_frags;
+
+ res = inet_frags_init_net(&ieee802154_lowpan->frags);
+ if (res < 0)
+ return res;
+ res = lowpan_frags_ns_sysctl_register(net);
+ if (res < 0)
+- inet_frags_exit_net(&ieee802154_lowpan->frags, &lowpan_frags);
++ inet_frags_exit_net(&ieee802154_lowpan->frags);
+ return res;
+ }
+
+@@ -601,7 +602,7 @@ static void __net_exit lowpan_frags_exit_net(struct net *net)
+ net_ieee802154_lowpan(net);
+
+ lowpan_frags_ns_sysctl_unregister(net);
+- inet_frags_exit_net(&ieee802154_lowpan->frags, &lowpan_frags);
++ inet_frags_exit_net(&ieee802154_lowpan->frags);
+ }
+
+ static struct pernet_operations lowpan_frags_ops = {
+diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
+index b2001b20e029..8c2b869d035f 100644
+--- a/net/ipv4/inet_fragment.c
++++ b/net/ipv4/inet_fragment.c
+@@ -219,8 +219,9 @@ void inet_frags_fini(struct inet_frags *f)
+ }
+ EXPORT_SYMBOL(inet_frags_fini);
+
+-void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
++void inet_frags_exit_net(struct netns_frags *nf)
+ {
++ struct inet_frags *f =nf->f;
+ unsigned int seq;
+ int i;
+
+@@ -264,23 +265,23 @@ __acquires(hb->chain_lock)
+ return hb;
+ }
+
+-static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
++static inline void fq_unlink(struct inet_frag_queue *fq)
+ {
+ struct inet_frag_bucket *hb;
+
+- hb = get_frag_bucket_locked(fq, f);
++ hb = get_frag_bucket_locked(fq, fq->net->f);
+ hlist_del(&fq->list);
+ fq->flags |= INET_FRAG_COMPLETE;
+ spin_unlock(&hb->chain_lock);
+ }
+
+-void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
++void inet_frag_kill(struct inet_frag_queue *fq)
+ {
+ if (del_timer(&fq->timer))
+ atomic_dec(&fq->refcnt);
+
+ if (!(fq->flags & INET_FRAG_COMPLETE)) {
+- fq_unlink(fq, f);
++ fq_unlink(fq);
+ atomic_dec(&fq->refcnt);
+ }
+ }
+@@ -294,11 +295,12 @@ static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f,
+ kfree_skb(skb);
+ }
+
+-void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
++void inet_frag_destroy(struct inet_frag_queue *q)
+ {
+ struct sk_buff *fp;
+ struct netns_frags *nf;
+ unsigned int sum, sum_truesize = 0;
++ struct inet_frags *f;
+
+ WARN_ON(!(q->flags & INET_FRAG_COMPLETE));
+ WARN_ON(del_timer(&q->timer) != 0);
+@@ -306,6 +308,7 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
+ /* Release all fragment data. */
+ fp = q->fragments;
+ nf = q->net;
++ f = nf->f;
+ while (fp) {
+ struct sk_buff *xp = fp->next;
+
+@@ -341,7 +344,7 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
+ atomic_inc(&qp->refcnt);
+ spin_unlock(&hb->chain_lock);
+ qp_in->flags |= INET_FRAG_COMPLETE;
+- inet_frag_put(qp_in, f);
++ inet_frag_put(qp_in);
+ return qp;
+ }
+ }
+diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
+index 14c40f799bd4..b87f4185bf63 100644
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -169,7 +169,7 @@ static void ip4_frag_free(struct inet_frag_queue *q)
+
+ static void ipq_put(struct ipq *ipq)
+ {
+- inet_frag_put(&ipq->q, &ip4_frags);
++ inet_frag_put(&ipq->q);
+ }
+
+ /* Kill ipq entry. It is not destroyed immediately,
+@@ -177,7 +177,7 @@ static void ipq_put(struct ipq *ipq)
+ */
+ static void ipq_kill(struct ipq *ipq)
+ {
+- inet_frag_kill(&ipq->q, &ip4_frags);
++ inet_frag_kill(&ipq->q);
+ }
+
+ static bool frag_expire_skip_icmp(u32 user)
+@@ -878,19 +878,21 @@ static int __net_init ipv4_frags_init_net(struct net *net)
+ */
+ net->ipv4.frags.timeout = IP_FRAG_TIME;
+
++ net->ipv4.frags.f = &ip4_frags;
++
+ res = inet_frags_init_net(&net->ipv4.frags);
+ if (res < 0)
+ return res;
+ res = ip4_frags_ns_ctl_register(net);
+ if (res < 0)
+- inet_frags_exit_net(&net->ipv4.frags, &ip4_frags);
++ inet_frags_exit_net(&net->ipv4.frags);
+ return res;
+ }
+
+ static void __net_exit ipv4_frags_exit_net(struct net *net)
+ {
+ ip4_frags_ns_ctl_unregister(net);
+- inet_frags_exit_net(&net->ipv4.frags, &ip4_frags);
++ inet_frags_exit_net(&net->ipv4.frags);
+ }
+
+ static struct pernet_operations ip4_frags_ops = {
+diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
+index 6859d1e084fe..15a9da5cf296 100644
+--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
+@@ -184,7 +184,7 @@ static void nf_ct_frag6_expire(unsigned long data)
+ fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+ net = container_of(fq->q.net, struct net, nf_frag.frags);
+
+- ip6_expire_frag_queue(net, fq, &nf_frags);
++ ip6_expire_frag_queue(net, fq);
+ }
+
+ /* Creation primitives. */
+@@ -362,7 +362,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
+ return 0;
+
+ discard_fq:
+- inet_frag_kill(&fq->q, &nf_frags);
++ inet_frag_kill(&fq->q);
+ err:
+ return -1;
+ }
+@@ -383,7 +383,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
+ int payload_len;
+ u8 ecn;
+
+- inet_frag_kill(&fq->q, &nf_frags);
++ inet_frag_kill(&fq->q);
+
+ WARN_ON(head == NULL);
+ WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
+@@ -614,7 +614,7 @@ struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 use
+ if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) {
+ spin_unlock_bh(&fq->q.lock);
+ pr_debug("Can't insert skb to queue\n");
+- inet_frag_put(&fq->q, &nf_frags);
++ inet_frag_put(&fq->q);
+ goto ret_orig;
+ }
+
+@@ -626,7 +626,7 @@ struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 use
+ }
+ spin_unlock_bh(&fq->q.lock);
+
+- inet_frag_put(&fq->q, &nf_frags);
++ inet_frag_put(&fq->q);
+ return ret_skb;
+
+ ret_orig:
+@@ -655,19 +655,21 @@ static int nf_ct_net_init(struct net *net)
+ net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+ net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
+ net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
++ net->nf_frag.frags.f = &nf_frags;
++
+ res = inet_frags_init_net(&net->nf_frag.frags);
+ if (res < 0)
+ return res;
+ res = nf_ct_frag6_sysctl_register(net);
+ if (res < 0)
+- inet_frags_exit_net(&net->nf_frag.frags, &nf_frags);
++ inet_frags_exit_net(&net->nf_frag.frags);
+ return res;
+ }
+
+ static void nf_ct_net_exit(struct net *net)
+ {
+ nf_ct_frags6_sysctl_unregister(net);
+- inet_frags_exit_net(&net->nf_frag.frags, &nf_frags);
++ inet_frags_exit_net(&net->nf_frag.frags);
+ }
+
+ static struct pernet_operations nf_ct_net_ops = {
+diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
+index c38a1abb5c62..bcefda85d27b 100644
+--- a/net/ipv6/reassembly.c
++++ b/net/ipv6/reassembly.c
+@@ -128,8 +128,7 @@ void ip6_frag_init(struct inet_frag_queue *q, const void *a)
+ }
+ EXPORT_SYMBOL(ip6_frag_init);
+
+-void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
+- struct inet_frags *frags)
++void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq)
+ {
+ struct net_device *dev = NULL;
+
+@@ -138,7 +137,7 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
+ if (fq->q.flags & INET_FRAG_COMPLETE)
+ goto out;
+
+- inet_frag_kill(&fq->q, frags);
++ inet_frag_kill(&fq->q);
+
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(net, fq->iif);
+@@ -166,7 +165,7 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
+ rcu_read_unlock();
+ out:
+ spin_unlock(&fq->q.lock);
+- inet_frag_put(&fq->q, frags);
++ inet_frag_put(&fq->q);
+ }
+ EXPORT_SYMBOL(ip6_expire_frag_queue);
+
+@@ -178,7 +177,7 @@ static void ip6_frag_expire(unsigned long data)
+ fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+ net = container_of(fq->q.net, struct net, ipv6.frags);
+
+- ip6_expire_frag_queue(net, fq, &ip6_frags);
++ ip6_expire_frag_queue(net, fq);
+ }
+
+ static struct frag_queue *
+@@ -359,7 +358,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
+ return -1;
+
+ discard_fq:
+- inet_frag_kill(&fq->q, &ip6_frags);
++ inet_frag_kill(&fq->q);
+ err:
+ IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_REASMFAILS);
+@@ -386,7 +385,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
+ int sum_truesize;
+ u8 ecn;
+
+- inet_frag_kill(&fq->q, &ip6_frags);
++ inet_frag_kill(&fq->q);
+
+ ecn = ip_frag_ecn_table[fq->ecn];
+ if (unlikely(ecn == 0xff))
+@@ -562,7 +561,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
+ ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
+
+ spin_unlock(&fq->q.lock);
+- inet_frag_put(&fq->q, &ip6_frags);
++ inet_frag_put(&fq->q);
+ return ret;
+ }
+
+@@ -713,6 +712,7 @@ static int __net_init ipv6_frags_init_net(struct net *net)
+ net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+ net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
+ net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
++ net->ipv6.frags.f = &ip6_frags;
+
+ res = inet_frags_init_net(&net->ipv6.frags);
+ if (res < 0)
+@@ -720,14 +720,14 @@ static int __net_init ipv6_frags_init_net(struct net *net)
+
+ res = ip6_frags_ns_sysctl_register(net);
+ if (res < 0)
+- inet_frags_exit_net(&net->ipv6.frags, &ip6_frags);
++ inet_frags_exit_net(&net->ipv6.frags);
+ return res;
+ }
+
+ static void __net_exit ipv6_frags_exit_net(struct net *net)
+ {
+ ip6_frags_ns_sysctl_unregister(net);
+- inet_frags_exit_net(&net->ipv6.frags, &ip6_frags);
++ inet_frags_exit_net(&net->ipv6.frags);
+ }
+
+ static struct pernet_operations ip6_frags_ops = {
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-003-inet-frags-refactor-ipfrag_init.patch b/patches.kernel.org/4.4.174-003-inet-frags-refactor-ipfrag_init.patch
new file mode 100644
index 0000000000..abc6410ef2
--- /dev/null
+++ b/patches.kernel.org/4.4.174-003-inet-frags-refactor-ipfrag_init.patch
@@ -0,0 +1,44 @@
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 10 Oct 2018 12:29:51 -0700
+Subject: [PATCH] inet: frags: refactor ipfrag_init()
+References: bnc#1012382
+Patch-mainline: 4.4.174
+Git-commit: 483a6e4fa055123142d8956866fe2aa9c98d546d
+
+commit 483a6e4fa055123142d8956866fe2aa9c98d546d upstream.
+
+We need to call inet_frags_init() before register_pernet_subsys(),
+as a prereq for following patch ("inet: frags: use rhashtables for reassembly units")
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ net/ipv4/ip_fragment.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
+index b87f4185bf63..f4978a5aa55e 100644
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -902,8 +902,6 @@ static struct pernet_operations ip4_frags_ops = {
+
+ void __init ipfrag_init(void)
+ {
+- ip4_frags_ctl_register();
+- register_pernet_subsys(&ip4_frags_ops);
+ ip4_frags.hashfn = ip4_hashfn;
+ ip4_frags.constructor = ip4_frag_init;
+ ip4_frags.destructor = ip4_frag_free;
+@@ -914,4 +912,6 @@ void __init ipfrag_init(void)
+ ip4_frags.frags_cache_name = ip_frag_cache_name;
+ if (inet_frags_init(&ip4_frags))
+ panic("IP: failed to allocate ip4_frags cache\n");
++ ip4_frags_ctl_register();
++ register_pernet_subsys(&ip4_frags_ops);
+ }
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-004-inet-frags-refactor-ipv6_frag_init.patch b/patches.kernel.org/4.4.174-004-inet-frags-refactor-ipv6_frag_init.patch
new file mode 100644
index 0000000000..751cdb3895
--- /dev/null
+++ b/patches.kernel.org/4.4.174-004-inet-frags-refactor-ipv6_frag_init.patch
@@ -0,0 +1,81 @@
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 10 Oct 2018 12:29:52 -0700
+Subject: [PATCH] inet: frags: refactor ipv6_frag_init()
+References: bnc#1012382
+Patch-mainline: 4.4.174
+Git-commit: 5b975bab23615cd0fdf67af6c9298eb01c4b9f61
+
+commit 5b975bab23615cd0fdf67af6c9298eb01c4b9f61 upstream.
+
+We want to call inet_frags_init() earlier.
+
+This is a prereq to "inet: frags: use rhashtables for reassembly units"
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+[bwh: Backported to 4.4: Also delete a redundant assignment to
+ ip6_frags.skb_free]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ net/ipv6/reassembly.c | 26 ++++++++++++++------------
+ 1 file changed, 14 insertions(+), 12 deletions(-)
+
+diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
+index bcefda85d27b..0b6f4e1f43db 100644
+--- a/net/ipv6/reassembly.c
++++ b/net/ipv6/reassembly.c
+@@ -739,10 +739,21 @@ int __init ipv6_frag_init(void)
+ {
+ int ret;
+
+- ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT);
++ ip6_frags.hashfn = ip6_hashfn;
++ ip6_frags.constructor = ip6_frag_init;
++ ip6_frags.destructor = NULL;
++ ip6_frags.qsize = sizeof(struct frag_queue);
++ ip6_frags.match = ip6_frag_match;
++ ip6_frags.frag_expire = ip6_frag_expire;
++ ip6_frags.frags_cache_name = ip6_frag_cache_name;
++ ret = inet_frags_init(&ip6_frags);
+ if (ret)
+ goto out;
+
++ ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT);
++ if (ret)
++ goto err_protocol;
++
+ ret = ip6_frags_sysctl_register();
+ if (ret)
+ goto err_sysctl;
+@@ -751,17 +762,6 @@ int __init ipv6_frag_init(void)
+ if (ret)
+ goto err_pernet;
+
+- ip6_frags.hashfn = ip6_hashfn;
+- ip6_frags.constructor = ip6_frag_init;
+- ip6_frags.destructor = NULL;
+- ip6_frags.skb_free = NULL;
+- ip6_frags.qsize = sizeof(struct frag_queue);
+- ip6_frags.match = ip6_frag_match;
+- ip6_frags.frag_expire = ip6_frag_expire;
+- ip6_frags.frags_cache_name = ip6_frag_cache_name;
+- ret = inet_frags_init(&ip6_frags);
+- if (ret)
+- goto err_pernet;
+ out:
+ return ret;
+
+@@ -769,6 +769,8 @@ int __init ipv6_frag_init(void)
+ ip6_frags_sysctl_unregister();
+ err_sysctl:
+ inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
++err_protocol:
++ inet_frags_fini(&ip6_frags);
+ goto out;
+ }
+
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-005-inet-frags-refactor-lowpan_net_frag_init.patch b/patches.kernel.org/4.4.174-005-inet-frags-refactor-lowpan_net_frag_init.patch
new file mode 100644
index 0000000000..50a40a72fa
--- /dev/null
+++ b/patches.kernel.org/4.4.174-005-inet-frags-refactor-lowpan_net_frag_init.patch
@@ -0,0 +1,68 @@
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 10 Oct 2018 12:29:53 -0700
+Subject: [PATCH] inet: frags: refactor lowpan_net_frag_init()
+References: bnc#1012382
+Patch-mainline: 4.4.174
+Git-commit: 807f1844df4ac23594268fa9f41902d0549e92aa
+
+commit 807f1844df4ac23594268fa9f41902d0549e92aa upstream.
+
+We want to call lowpan_net_frag_init() earlier.
+Similar to commit "inet: frags: refactor ipv6_frag_init()"
+
+This is a prereq to "inet: frags: use rhashtables for reassembly units"
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ net/ieee802154/6lowpan/reassembly.c | 20 +++++++++++---------
+ 1 file changed, 11 insertions(+), 9 deletions(-)
+
+diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
+index 8c532743e917..dba0a34cf92a 100644
+--- a/net/ieee802154/6lowpan/reassembly.c
++++ b/net/ieee802154/6lowpan/reassembly.c
+@@ -614,14 +614,6 @@ int __init lowpan_net_frag_init(void)
+ {
+ int ret;
+
+- ret = lowpan_frags_sysctl_register();
+- if (ret)
+- return ret;
+-
+- ret = register_pernet_subsys(&lowpan_frags_ops);
+- if (ret)
+- goto err_pernet;
+-
+ lowpan_frags.hashfn = lowpan_hashfn;
+ lowpan_frags.constructor = lowpan_frag_init;
+ lowpan_frags.destructor = NULL;
+@@ -632,11 +624,21 @@ int __init lowpan_net_frag_init(void)
+ lowpan_frags.frags_cache_name = lowpan_frags_cache_name;
+ ret = inet_frags_init(&lowpan_frags);
+ if (ret)
+- goto err_pernet;
++ goto out;
+
++ ret = lowpan_frags_sysctl_register();
++ if (ret)
++ goto err_sysctl;
++
++ ret = register_pernet_subsys(&lowpan_frags_ops);
++ if (ret)
++ goto err_pernet;
++out:
+ return ret;
+ err_pernet:
+ lowpan_frags_sysctl_unregister();
++err_sysctl:
++ inet_frags_fini(&lowpan_frags);
+ return ret;
+ }
+
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-006-rhashtable-add-rhashtable_lookup_get_insert_k.patch b/patches.kernel.org/4.4.174-006-rhashtable-add-rhashtable_lookup_get_insert_k.patch
new file mode 100644
index 0000000000..a1fe53543e
--- /dev/null
+++ b/patches.kernel.org/4.4.174-006-rhashtable-add-rhashtable_lookup_get_insert_k.patch
@@ -0,0 +1,224 @@
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Wed, 24 Aug 2016 12:31:31 +0200
+Subject: [PATCH] rhashtable: add rhashtable_lookup_get_insert_key()
+Patch-mainline: 4.4.174
+References: bnc#1012382 bsc#1042286
+Git-commit: 5ca8cc5bf11faed257c762018aea9106d529232f
+
+commit 5ca8cc5bf11faed257c762018aea9106d529232f upstream.
+
+This patch modifies __rhashtable_insert_fast() so it returns the
+existing object that clashes with the one that you want to insert.
+In case the object is successfully inserted, NULL is returned.
+Otherwise, you get an error via ERR_PTR().
+
+This patch adapts the existing callers of __rhashtable_insert_fast()
+so they handle this new logic, and it adds a new
+rhashtable_lookup_get_insert_key() interface to fetch this existing
+object.
+
+nf_tables needs this change to improve handling of EEXIST cases via
+honoring the NLM_F_EXCL flag and by checking if the data part of the
+mapping matches what we have.
+
+Cc: Herbert Xu <herbert@gondor.apana.org.au>
+Cc: Thomas Graf <tgraf@suug.ch>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ include/linux/rhashtable.h | 70 +++++++++++++++++++++++++++++++-------
+ lib/rhashtable.c | 10 ++++--
+ 2 files changed, 64 insertions(+), 16 deletions(-)
+
+diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
+index e50b31d18462..e07eca01316c 100644
+--- a/include/linux/rhashtable.h
++++ b/include/linux/rhashtable.h
+@@ -343,7 +343,8 @@ int rhashtable_init(struct rhashtable *ht,
+ struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht,
+ const void *key,
+ struct rhash_head *obj,
+- struct bucket_table *old_tbl);
++ struct bucket_table *old_tbl,
++ void **data);
+ int rhashtable_insert_rehash(struct rhashtable *ht, struct bucket_table *tbl);
+
+ int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter);
+@@ -562,8 +563,11 @@ static inline void *rhashtable_lookup_fast(
+ return NULL;
+ }
+
+-/* Internal function, please use rhashtable_insert_fast() instead */
+-static inline int __rhashtable_insert_fast(
++/* Internal function, please use rhashtable_insert_fast() instead. This
++ * function returns the existing element already in hashes in there is a clash,
++ * otherwise it returns an error via ERR_PTR().
++ */
++static inline void *__rhashtable_insert_fast(
+ struct rhashtable *ht, const void *key, struct rhash_head *obj,
+ const struct rhashtable_params params)
+ {
+@@ -576,6 +580,7 @@ static inline int __rhashtable_insert_fast(
+ spinlock_t *lock;
+ unsigned int elasticity;
+ unsigned int hash;
++ void *data = NULL;
+ int err;
+
+ restart:
+@@ -600,11 +605,14 @@ static inline int __rhashtable_insert_fast(
+
+ new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+ if (unlikely(new_tbl)) {
+- tbl = rhashtable_insert_slow(ht, key, obj, new_tbl);
++ tbl = rhashtable_insert_slow(ht, key, obj, new_tbl, &data);
+ if (!IS_ERR_OR_NULL(tbl))
+ goto slow_path;
+
+ err = PTR_ERR(tbl);
++ if (err == -EEXIST)
++ err = 0;
++
+ goto out;
+ }
+
+@@ -618,25 +626,25 @@ static inline int __rhashtable_insert_fast(
+ err = rhashtable_insert_rehash(ht, tbl);
+ rcu_read_unlock();
+ if (err)
+- return err;
++ return ERR_PTR(err);
+
+ goto restart;
+ }
+
+- err = -EEXIST;
++ err = 0;
+ elasticity = ht->elasticity;
+ rht_for_each(head, tbl, hash) {
+ if (key &&
+ unlikely(!(params.obj_cmpfn ?
+ params.obj_cmpfn(&arg, rht_obj(ht, head)) :
+- rhashtable_compare(&arg, rht_obj(ht, head)))))
++ rhashtable_compare(&arg, rht_obj(ht, head))))) {
++ data = rht_obj(ht, head);
+ goto out;
++ }
+ if (!--elasticity)
+ goto slow_path;
+ }
+
+- err = 0;
+-
+ head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
+
+ RCU_INIT_POINTER(obj->next, head);
+@@ -651,7 +659,7 @@ static inline int __rhashtable_insert_fast(
+ spin_unlock_bh(lock);
+ rcu_read_unlock();
+
+- return err;
++ return err ? ERR_PTR(err) : data;
+ }
+
+ /**
+@@ -674,7 +682,13 @@ static inline int rhashtable_insert_fast(
+ struct rhashtable *ht, struct rhash_head *obj,
+ const struct rhashtable_params params)
+ {
+- return __rhashtable_insert_fast(ht, NULL, obj, params);
++ void *ret;
++
++ ret = __rhashtable_insert_fast(ht, NULL, obj, params);
++ if (IS_ERR(ret))
++ return PTR_ERR(ret);
++
++ return ret == NULL ? 0 : -EEXIST;
+ }
+
+ /**
+@@ -703,11 +717,15 @@ static inline int rhashtable_lookup_insert_fast(
+ const struct rhashtable_params params)
+ {
+ const char *key = rht_obj(ht, obj);
++ void *ret;
+
+ BUG_ON(ht->p.obj_hashfn);
+
+- return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj,
+- params);
++ ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params);
++ if (IS_ERR(ret))
++ return PTR_ERR(ret);
++
++ return ret == NULL ? 0 : -EEXIST;
+ }
+
+ /**
+@@ -735,6 +753,32 @@ static inline int rhashtable_lookup_insert_fast(
+ static inline int rhashtable_lookup_insert_key(
+ struct rhashtable *ht, const void *key, struct rhash_head *obj,
+ const struct rhashtable_params params)
++{
++ void *ret;
++
++ BUG_ON(!ht->p.obj_hashfn || !key);
++
++ ret = __rhashtable_insert_fast(ht, key, obj, params);
++ if (IS_ERR(ret))
++ return PTR_ERR(ret);
++
++ return ret == NULL ? 0 : -EEXIST;
++}
++
++/**
++ * rhashtable_lookup_get_insert_key - lookup and insert object into hash table
++ * @ht: hash table
++ * @obj: pointer to hash head inside object
++ * @params: hash table parameters
++ * @data: pointer to element data already in hashes
++ *
++ * Just like rhashtable_lookup_insert_key(), but this function returns the
++ * object if it exists, NULL if it does not and the insertion was successful,
++ * and an ERR_PTR otherwise.
++ */
++static inline void *rhashtable_lookup_get_insert_key(
++ struct rhashtable *ht, const void *key, struct rhash_head *obj,
++ const struct rhashtable_params params)
+ {
+ BUG_ON(!ht->p.obj_hashfn || !key);
+
+diff --git a/lib/rhashtable.c b/lib/rhashtable.c
+index 37ea94b636a3..991bee32e52e 100644
+--- a/lib/rhashtable.c
++++ b/lib/rhashtable.c
+@@ -441,7 +441,8 @@ EXPORT_SYMBOL_GPL(rhashtable_insert_rehash);
+ struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht,
+ const void *key,
+ struct rhash_head *obj,
+- struct bucket_table *tbl)
++ struct bucket_table *tbl,
++ void **data)
+ {
+ struct rhash_head *head;
+ unsigned int hash;
+@@ -452,8 +453,11 @@ struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht,
+ spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING);
+
+ err = -EEXIST;
+- if (key && rhashtable_lookup_fast(ht, key, ht->p))
+- goto exit;
++ if (key) {
++ *data = rhashtable_lookup_fast(ht, key, ht->p);
++ if (*data)
++ goto exit;
++ }
+
+ err = -E2BIG;
+ if (unlikely(rht_grow_above_max(ht, tbl)))
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-007-rhashtable-Add-rhashtable_lookup.patch b/patches.kernel.org/4.4.174-007-rhashtable-Add-rhashtable_lookup.patch
new file mode 100644
index 0000000000..c9bf763655
--- /dev/null
+++ b/patches.kernel.org/4.4.174-007-rhashtable-Add-rhashtable_lookup.patch
@@ -0,0 +1,125 @@
+From: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Date: Fri, 7 Dec 2018 17:16:46 +0000
+Subject: [PATCH] rhashtable: Add rhashtable_lookup()
+References: bnc#1012382
+Patch-mainline: 4.4.174
+Git-commit: 9e5f4d0b79f8708db79c912404e68c915eb54f4d
+
+Extracted from commit ca26893f05e8 "rhashtable: Add rhlist interface".
+
+Cc: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ include/linux/rhashtable.h | 69 ++++++++++++++++++++++++++++----------
+ 1 file changed, 52 insertions(+), 17 deletions(-)
+
+diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
+index e07eca01316c..753835d05be8 100644
+--- a/include/linux/rhashtable.h
++++ b/include/linux/rhashtable.h
+@@ -515,18 +515,8 @@ static inline int rhashtable_compare(struct rhashtable_compare_arg *arg,
+ return memcmp(ptr + ht->p.key_offset, arg->key, ht->p.key_len);
+ }
+
+-/**
+- * rhashtable_lookup_fast - search hash table, inlined version
+- * @ht: hash table
+- * @key: the pointer to the key
+- * @params: hash table parameters
+- *
+- * Computes the hash value for the key and traverses the bucket chain looking
+- * for a entry with an identical key. The first matching entry is returned.
+- *
+- * Returns the first entry on which the compare function returned true.
+- */
+-static inline void *rhashtable_lookup_fast(
++/* Internal function, do not use. */
++static inline struct rhash_head *__rhashtable_lookup(
+ struct rhashtable *ht, const void *key,
+ const struct rhashtable_params params)
+ {
+@@ -538,8 +528,6 @@ static inline void *rhashtable_lookup_fast(
+ struct rhash_head *he;
+ unsigned int hash;
+
+- rcu_read_lock();
+-
+ tbl = rht_dereference_rcu(ht->tbl, ht);
+ restart:
+ hash = rht_key_hashfn(ht, tbl, key, params);
+@@ -548,8 +536,7 @@ static inline void *rhashtable_lookup_fast(
+ params.obj_cmpfn(&arg, rht_obj(ht, he)) :
+ rhashtable_compare(&arg, rht_obj(ht, he)))
+ continue;
+- rcu_read_unlock();
+- return rht_obj(ht, he);
++ return he;
+ }
+
+ /* Ensure we see any new tables. */
+@@ -558,11 +545,59 @@ static inline void *rhashtable_lookup_fast(
+ tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+ if (unlikely(tbl))
+ goto restart;
+- rcu_read_unlock();
+
+ return NULL;
+ }
+
++/**
++ * rhashtable_lookup - search hash table
++ * @ht: hash table
++ * @key: the pointer to the key
++ * @params: hash table parameters
++ *
++ * Computes the hash value for the key and traverses the bucket chain looking
++ * for a entry with an identical key. The first matching entry is returned.
++ *
++ * This must only be called under the RCU read lock.
++ *
++ * Returns the first entry on which the compare function returned true.
++ */
++static inline void *rhashtable_lookup(
++ struct rhashtable *ht, const void *key,
++ const struct rhashtable_params params)
++{
++ struct rhash_head *he = __rhashtable_lookup(ht, key, params);
++
++ return he ? rht_obj(ht, he) : NULL;
++}
++
++/**
++ * rhashtable_lookup_fast - search hash table, without RCU read lock
++ * @ht: hash table
++ * @key: the pointer to the key
++ * @params: hash table parameters
++ *
++ * Computes the hash value for the key and traverses the bucket chain looking
++ * for a entry with an identical key. The first matching entry is returned.
++ *
++ * Only use this function when you have other mechanisms guaranteeing
++ * that the object won't go away after the RCU read lock is released.
++ *
++ * Returns the first entry on which the compare function returned true.
++ */
++static inline void *rhashtable_lookup_fast(
++ struct rhashtable *ht, const void *key,
++ const struct rhashtable_params params)
++{
++ void *obj;
++
++ rcu_read_lock();
++ obj = rhashtable_lookup(ht, key, params);
++ rcu_read_unlock();
++
++ return obj;
++}
++
+ /* Internal function, please use rhashtable_insert_fast() instead. This
+ * function returns the existing element already in hashes in there is a clash,
+ * otherwise it returns an error via ERR_PTR().
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-008-rhashtable-add-schedule-points.patch b/patches.kernel.org/4.4.174-008-rhashtable-add-schedule-points.patch
new file mode 100644
index 0000000000..3cad80fdb4
--- /dev/null
+++ b/patches.kernel.org/4.4.174-008-rhashtable-add-schedule-points.patch
@@ -0,0 +1,50 @@
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 10 Oct 2018 12:29:55 -0700
+Subject: [PATCH] rhashtable: add schedule points
+References: bnc#1012382
+Patch-mainline: 4.4.174
+Git-commit: ae6da1f503abb5a5081f9f6c4a6881de97830f3e
+
+commit ae6da1f503abb5a5081f9f6c4a6881de97830f3e upstream.
+
+Rehashing and destroying large hash table takes a lot of time,
+and happens in process context. It is safe to add cond_resched()
+in rhashtable_rehash_table() and rhashtable_free_and_destroy()
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ lib/rhashtable.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/lib/rhashtable.c b/lib/rhashtable.c
+index 991bee32e52e..7bb8649429bf 100644
+--- a/lib/rhashtable.c
++++ b/lib/rhashtable.c
+@@ -250,8 +250,10 @@ static int rhashtable_rehash_table(struct rhashtable *ht)
+ if (!new_tbl)
+ return 0;
+
+- for (old_hash = 0; old_hash < old_tbl->size; old_hash++)
++ for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
+ rhashtable_rehash_chain(ht, old_hash);
++ cond_resched();
++ }
+
+ /* Publish the new table pointer. */
+ rcu_assign_pointer(ht->tbl, new_tbl);
+@@ -842,6 +844,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
+ for (i = 0; i < tbl->size; i++) {
+ struct rhash_head *pos, *next;
+
++ cond_resched();
+ for (pos = rht_dereference(tbl->buckets[i], ht),
+ next = !rht_is_a_nulls(pos) ?
+ rht_dereference(pos->next, ht) : NULL;
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-009-inet-frags-use-rhashtables-for-reassembly-uni.patch b/patches.kernel.org/4.4.174-009-inet-frags-use-rhashtables-for-reassembly-uni.patch
new file mode 100644
index 0000000000..6e98bc0135
--- /dev/null
+++ b/patches.kernel.org/4.4.174-009-inet-frags-use-rhashtables-for-reassembly-uni.patch
@@ -0,0 +1,1354 @@
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 10 Oct 2018 12:29:56 -0700
+Subject: [PATCH] inet: frags: use rhashtables for reassembly units
+References: bnc#1012382
+Patch-mainline: 4.4.174
+Git-commit: 648700f76b03b7e8149d13cc2bdb3355035258a9
+
+commit 648700f76b03b7e8149d13cc2bdb3355035258a9 upstream.
+
+Some applications still rely on IP fragmentation, and to be fair linux
+reassembly unit is not working under any serious load.
+
+It uses static hash tables of 1024 buckets, and up to 128 items per bucket (!!!)
+
+A work queue is supposed to garbage collect items when host is under memory
+pressure, and doing a hash rebuild, changing seed used in hash computations.
+
+This work queue blocks softirqs for up to 25 ms when doing a hash rebuild,
+occurring every 5 seconds if host is under fire.
+
+Then there is the problem of sharing this hash table for all netns.
+
+It is time to switch to rhashtables, and allocate one of them per netns
+to speedup netns dismantle, since this is a critical metric these days.
+
+Lookup is now using RCU. A followup patch will even remove
+the refcount hold/release left from prior implementation and save
+a couple of atomic operations.
+
+Before this patch, 16 cpus (16 RX queue NIC) could not handle more
+than 1 Mpps frags DDOS.
+
+After the patch, I reach 9 Mpps without any tuning, and can use up to 2GB
+of storage for the fragments (exact number depends on frags being evicted
+after timeout)
+
+$ grep FRAG /proc/net/sockstat
+FRAG: inuse 1966916 memory 2140004608
+
+A followup patch will change the limits for 64bit arches.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Kirill Tkhai <ktkhai@virtuozzo.com>
+Cc: Herbert Xu <herbert@gondor.apana.org.au>
+Cc: Florian Westphal <fw@strlen.de>
+Cc: Jesper Dangaard Brouer <brouer@redhat.com>
+Cc: Alexander Aring <alex.aring@gmail.com>
+Cc: Stefan Schmidt <stefan@osg.samsung.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+[bwh: Backported to 4.4: adjust context]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ Documentation/networking/ip-sysctl.txt | 7 +-
+ include/net/inet_frag.h | 81 +++---
+ include/net/ipv6.h | 16 +-
+ net/ieee802154/6lowpan/6lowpan_i.h | 26 +-
+ net/ieee802154/6lowpan/reassembly.c | 91 +++---
+ net/ipv4/inet_fragment.c | 349 +++++-------------------
+ net/ipv4/ip_fragment.c | 112 ++++----
+ net/ipv6/netfilter/nf_conntrack_reasm.c | 51 +---
+ net/ipv6/reassembly.c | 110 ++++----
+ 9 files changed, 267 insertions(+), 576 deletions(-)
+
+diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
+index 2ea4c45cf1c8..2aa56ccaa996 100644
+--- a/Documentation/networking/ip-sysctl.txt
++++ b/Documentation/networking/ip-sysctl.txt
+@@ -113,13 +113,10 @@ min_adv_mss - INTEGER
+ IP Fragmentation:
+
+ ipfrag_high_thresh - INTEGER
+- Maximum memory used to reassemble IP fragments. When
+- ipfrag_high_thresh bytes of memory is allocated for this purpose,
+- the fragment handler will toss packets until ipfrag_low_thresh
+- is reached. This also serves as a maximum limit to namespaces
+- different from the initial one.
++ Maximum memory used to reassemble IP fragments.
+
+ ipfrag_low_thresh - INTEGER
++ (Obsolete since linux-4.17)
+ Maximum memory used to reassemble IP fragments before the kernel
+ begins to remove incomplete fragment queues to free up resources.
+ The kernel still accepts new fragments for defragmentation.
+diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
+index 12589f08e064..623eb8222a75 100644
+--- a/include/net/inet_frag.h
++++ b/include/net/inet_frag.h
+@@ -1,7 +1,11 @@
+ #ifndef __NET_FRAG_H__
+ #define __NET_FRAG_H__
+
++#include <linux/rhashtable.h>
++
+ struct netns_frags {
++ struct rhashtable rhashtable ____cacheline_aligned_in_smp;
++
+ /* Keep atomic mem on separate cachelines in structs that include it */
+ atomic_t mem ____cacheline_aligned_in_smp;
+ /* sysctls */
+@@ -24,12 +28,30 @@ enum {
+ INET_FRAG_COMPLETE = BIT(2),
+ };
+
++struct frag_v4_compare_key {
++ __be32 saddr;
++ __be32 daddr;
++ u32 user;
++ u32 vif;
++ __be16 id;
++ u16 protocol;
++};
++
++struct frag_v6_compare_key {
++ struct in6_addr saddr;
++ struct in6_addr daddr;
++ u32 user;
++ __be32 id;
++ u32 iif;
++};
++
+ /**
+ * struct inet_frag_queue - fragment queue
+ *
+- * @lock: spinlock protecting the queue
++ * @node: rhash node
++ * @key: keys identifying this frag.
+ * @timer: queue expiration timer
+- * @list: hash bucket list
++ * @lock: spinlock protecting this frag
+ * @refcnt: reference count of the queue
+ * @fragments: received fragments head
+ * @fragments_tail: received fragments tail
+@@ -39,12 +61,16 @@ enum {
+ * @flags: fragment queue flags
+ * @max_size: maximum received fragment size
+ * @net: namespace that this frag belongs to
+- * @list_evictor: list of queues to forcefully evict (e.g. due to low memory)
++ * @rcu: rcu head for freeing deferall
+ */
+ struct inet_frag_queue {
+- spinlock_t lock;
++ struct rhash_head node;
++ union {
++ struct frag_v4_compare_key v4;
++ struct frag_v6_compare_key v6;
++ } key;
+ struct timer_list timer;
+- struct hlist_node list;
++ spinlock_t lock;
+ atomic_t refcnt;
+ struct sk_buff *fragments;
+ struct sk_buff *fragments_tail;
+@@ -53,45 +79,13 @@ struct inet_frag_queue {
+ int meat;
+ __u8 flags;
+ u16 max_size;
+- struct netns_frags *net;
+- struct hlist_node list_evictor;
+-};
+-
+-#define INETFRAGS_HASHSZ 1024
+-
+-/* averaged:
+- * max_depth = default ipfrag_high_thresh / INETFRAGS_HASHSZ /
+- * rounded up (SKB_TRUELEN(0) + sizeof(struct ipq or
+- * struct frag_queue))
+- */
+-#define INETFRAGS_MAXDEPTH 128
+-
+-struct inet_frag_bucket {
+- struct hlist_head chain;
+- spinlock_t chain_lock;
++ struct netns_frags *net;
++ struct rcu_head rcu;
+ };
+
+ struct inet_frags {
+- struct inet_frag_bucket hash[INETFRAGS_HASHSZ];
+-
+- struct work_struct frags_work;
+- unsigned int next_bucket;
+- unsigned long last_rebuild_jiffies;
+- bool rebuild;
+-
+- /* The first call to hashfn is responsible to initialize
+- * rnd. This is best done with net_get_random_once.
+- *
+- * rnd_seqlock is used to let hash insertion detect
+- * when it needs to re-lookup the hash chain to use.
+- */
+- u32 rnd;
+- seqlock_t rnd_seqlock;
+ int qsize;
+
+- unsigned int (*hashfn)(const struct inet_frag_queue *);
+- bool (*match)(const struct inet_frag_queue *q,
+- const void *arg);
+ void (*constructor)(struct inet_frag_queue *q,
+ const void *arg);
+ void (*destructor)(struct inet_frag_queue *);
+@@ -99,6 +93,7 @@ struct inet_frags {
+ void (*frag_expire)(unsigned long data);
+ struct kmem_cache *frags_cachep;
+ const char *frags_cache_name;
++ struct rhashtable_params rhash_params;
+ };
+
+ int inet_frags_init(struct inet_frags *);
+@@ -107,15 +102,13 @@ void inet_frags_fini(struct inet_frags *);
+ static inline int inet_frags_init_net(struct netns_frags *nf)
+ {
+ atomic_set(&nf->mem, 0);
+- return 0;
++ return rhashtable_init(&nf->rhashtable, &nf->f->rhash_params);
+ }
+ void inet_frags_exit_net(struct netns_frags *nf);
+
+ void inet_frag_kill(struct inet_frag_queue *q);
+ void inet_frag_destroy(struct inet_frag_queue *q);
+-struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
+- struct inet_frags *f, void *key, unsigned int hash);
+-
++struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key);
+ void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
+ const char *prefix);
+
+@@ -127,7 +120,7 @@ static inline void inet_frag_put(struct inet_frag_queue *q)
+
+ static inline bool inet_frag_evicting(struct inet_frag_queue *q)
+ {
+- return !hlist_unhashed(&q->list_evictor);
++ return false;
+ }
+
+ /* Memory Tracking Functions. */
+diff --git a/include/net/ipv6.h b/include/net/ipv6.h
+index 9d8eace6d455..2067bbec021c 100644
+--- a/include/net/ipv6.h
++++ b/include/net/ipv6.h
+@@ -505,17 +505,8 @@ enum ip6_defrag_users {
+ __IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX,
+ };
+
+-struct ip6_create_arg {
+- __be32 id;
+- u32 user;
+- const struct in6_addr *src;
+- const struct in6_addr *dst;
+- int iif;
+- u8 ecn;
+-};
+-
+ void ip6_frag_init(struct inet_frag_queue *q, const void *a);
+-bool ip6_frag_match(const struct inet_frag_queue *q, const void *a);
++extern const struct rhashtable_params ip6_rhash_params;
+
+ /*
+ * Equivalent of ipv4 struct ip
+@@ -523,11 +514,6 @@ bool ip6_frag_match(const struct inet_frag_queue *q, const void *a);
+ struct frag_queue {
+ struct inet_frag_queue q;
+
+- __be32 id; /* fragment id */
+- u32 user;
+- struct in6_addr saddr;
+- struct in6_addr daddr;
+-
+ int iif;
+ unsigned int csum;
+ __u16 nhoffset;
+diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h
+index b4e17a7c0df0..3c5fc06883ba 100644
+--- a/net/ieee802154/6lowpan/6lowpan_i.h
++++ b/net/ieee802154/6lowpan/6lowpan_i.h
+@@ -16,37 +16,19 @@ typedef unsigned __bitwise__ lowpan_rx_result;
+ #define LOWPAN_DISPATCH_FRAG1 0xc0
+ #define LOWPAN_DISPATCH_FRAGN 0xe0
+
+-struct lowpan_create_arg {
++struct frag_lowpan_compare_key {
+ u16 tag;
+ u16 d_size;
+- const struct ieee802154_addr *src;
+- const struct ieee802154_addr *dst;
++ const struct ieee802154_addr src;
++ const struct ieee802154_addr dst;
+ };
+
+-/* Equivalent of ipv4 struct ip
++/* Equivalent of ipv4 struct ipq
+ */
+ struct lowpan_frag_queue {
+ struct inet_frag_queue q;
+-
+- u16 tag;
+- u16 d_size;
+- struct ieee802154_addr saddr;
+- struct ieee802154_addr daddr;
+ };
+
+-static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a)
+-{
+- switch (a->mode) {
+- case IEEE802154_ADDR_LONG:
+- return (((__force u64)a->extended_addr) >> 32) ^
+- (((__force u64)a->extended_addr) & 0xffffffff);
+- case IEEE802154_ADDR_SHORT:
+- return (__force u32)(a->short_addr);
+- default:
+- return 0;
+- }
+-}
+-
+ /* private device info */
+ struct lowpan_dev_info {
+ struct net_device *wdev; /* wpan device ptr */
+diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
+index dba0a34cf92a..65c0b7349f9d 100644
+--- a/net/ieee802154/6lowpan/reassembly.c
++++ b/net/ieee802154/6lowpan/reassembly.c
+@@ -37,47 +37,15 @@ static struct inet_frags lowpan_frags;
+ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq,
+ struct sk_buff *prev, struct net_device *ldev);
+
+-static unsigned int lowpan_hash_frag(u16 tag, u16 d_size,
+- const struct ieee802154_addr *saddr,
+- const struct ieee802154_addr *daddr)
+-{
+- net_get_random_once(&lowpan_frags.rnd, sizeof(lowpan_frags.rnd));
+- return jhash_3words(ieee802154_addr_hash(saddr),
+- ieee802154_addr_hash(daddr),
+- (__force u32)(tag + (d_size << 16)),
+- lowpan_frags.rnd);
+-}
+-
+-static unsigned int lowpan_hashfn(const struct inet_frag_queue *q)
+-{
+- const struct lowpan_frag_queue *fq;
+-
+- fq = container_of(q, struct lowpan_frag_queue, q);
+- return lowpan_hash_frag(fq->tag, fq->d_size, &fq->saddr, &fq->daddr);
+-}
+-
+-static bool lowpan_frag_match(const struct inet_frag_queue *q, const void *a)
+-{
+- const struct lowpan_frag_queue *fq;
+- const struct lowpan_create_arg *arg = a;
+-
+- fq = container_of(q, struct lowpan_frag_queue, q);
+- return fq->tag == arg->tag && fq->d_size == arg->d_size &&
+- ieee802154_addr_equal(&fq->saddr, arg->src) &&
+- ieee802154_addr_equal(&fq->daddr, arg->dst);
+-}
+-
+ static void lowpan_frag_init(struct inet_frag_queue *q, const void *a)
+ {
+- const struct lowpan_create_arg *arg = a;
++ const struct frag_lowpan_compare_key *key = a;
+ struct lowpan_frag_queue *fq;
+
+ fq = container_of(q, struct lowpan_frag_queue, q);
+
+- fq->tag = arg->tag;
+- fq->d_size = arg->d_size;
+- fq->saddr = *arg->src;
+- fq->daddr = *arg->dst;
++ BUILD_BUG_ON(sizeof(*key) > sizeof(q->key));
++ memcpy(&q->key, key, sizeof(*key));
+ }
+
+ static void lowpan_frag_expire(unsigned long data)
+@@ -104,21 +72,17 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb,
+ const struct ieee802154_addr *src,
+ const struct ieee802154_addr *dst)
+ {
+- struct inet_frag_queue *q;
+- struct lowpan_create_arg arg;
+- unsigned int hash;
+ struct netns_ieee802154_lowpan *ieee802154_lowpan =
+ net_ieee802154_lowpan(net);
++ struct frag_lowpan_compare_key key = {
++ .tag = cb->d_tag,
++ .d_size = cb->d_size,
++ .src = *src,
++ .dst = *dst,
++ };
++ struct inet_frag_queue *q;
+
+- arg.tag = cb->d_tag;
+- arg.d_size = cb->d_size;
+- arg.src = src;
+- arg.dst = dst;
+-
+- hash = lowpan_hash_frag(cb->d_tag, cb->d_size, src, dst);
+-
+- q = inet_frag_find(&ieee802154_lowpan->frags,
+- &lowpan_frags, &arg, hash);
++ q = inet_frag_find(&ieee802154_lowpan->frags, &key);
+ if (IS_ERR_OR_NULL(q)) {
+ inet_frag_maybe_warn_overflow(q, pr_fmt());
+ return NULL;
+@@ -610,18 +574,47 @@ static struct pernet_operations lowpan_frags_ops = {
+ .exit = lowpan_frags_exit_net,
+ };
+
++static u32 lowpan_key_hashfn(const void *data, u32 len, u32 seed)
++{
++ return jhash2(data,
++ sizeof(struct frag_lowpan_compare_key) / sizeof(u32), seed);
++}
++
++static u32 lowpan_obj_hashfn(const void *data, u32 len, u32 seed)
++{
++ const struct inet_frag_queue *fq = data;
++
++ return jhash2((const u32 *)&fq->key,
++ sizeof(struct frag_lowpan_compare_key) / sizeof(u32), seed);
++}
++
++static int lowpan_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
++{
++ const struct frag_lowpan_compare_key *key = arg->key;
++ const struct inet_frag_queue *fq = ptr;
++
++ return !!memcmp(&fq->key, key, sizeof(*key));
++}
++
++static const struct rhashtable_params lowpan_rhash_params = {
++ .head_offset = offsetof(struct inet_frag_queue, node),
++ .hashfn = lowpan_key_hashfn,
++ .obj_hashfn = lowpan_obj_hashfn,
++ .obj_cmpfn = lowpan_obj_cmpfn,
++ .automatic_shrinking = true,
++};
++
+ int __init lowpan_net_frag_init(void)
+ {
+ int ret;
+
+- lowpan_frags.hashfn = lowpan_hashfn;
+ lowpan_frags.constructor = lowpan_frag_init;
+ lowpan_frags.destructor = NULL;
+ lowpan_frags.skb_free = NULL;
+ lowpan_frags.qsize = sizeof(struct frag_queue);
+- lowpan_frags.match = lowpan_frag_match;
+ lowpan_frags.frag_expire = lowpan_frag_expire;
+ lowpan_frags.frags_cache_name = lowpan_frags_cache_name;
++ lowpan_frags.rhash_params = lowpan_rhash_params;
+ ret = inet_frags_init(&lowpan_frags);
+ if (ret)
+ goto out;
+diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
+index 8c2b869d035f..10cd7c182136 100644
+--- a/net/ipv4/inet_fragment.c
++++ b/net/ipv4/inet_fragment.c
+@@ -25,12 +25,6 @@
+ #include <net/inet_frag.h>
+ #include <net/inet_ecn.h>
+
+-#define INETFRAGS_EVICT_BUCKETS 128
+-#define INETFRAGS_EVICT_MAX 512
+-
+-/* don't rebuild inetfrag table with new secret more often than this */
+-#define INETFRAGS_MIN_REBUILD_INTERVAL (5 * HZ)
+-
+ /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
+ * Value : 0xff if frame should be dropped.
+ * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field
+@@ -52,157 +46,8 @@ const u8 ip_frag_ecn_table[16] = {
+ };
+ EXPORT_SYMBOL(ip_frag_ecn_table);
+
+-static unsigned int
+-inet_frag_hashfn(const struct inet_frags *f, const struct inet_frag_queue *q)
+-{
+- return f->hashfn(q) & (INETFRAGS_HASHSZ - 1);
+-}
+-
+-static bool inet_frag_may_rebuild(struct inet_frags *f)
+-{
+- return time_after(jiffies,
+- f->last_rebuild_jiffies + INETFRAGS_MIN_REBUILD_INTERVAL);
+-}
+-
+-static void inet_frag_secret_rebuild(struct inet_frags *f)
+-{
+- int i;
+-
+- write_seqlock_bh(&f->rnd_seqlock);
+-
+- if (!inet_frag_may_rebuild(f))
+- goto out;
+-
+- get_random_bytes(&f->rnd, sizeof(u32));
+-
+- for (i = 0; i < INETFRAGS_HASHSZ; i++) {
+- struct inet_frag_bucket *hb;
+- struct inet_frag_queue *q;
+- struct hlist_node *n;
+-
+- hb = &f->hash[i];
+- spin_lock(&hb->chain_lock);
+-
+- hlist_for_each_entry_safe(q, n, &hb->chain, list) {
+- unsigned int hval = inet_frag_hashfn(f, q);
+-
+- if (hval != i) {
+- struct inet_frag_bucket *hb_dest;
+-
+- hlist_del(&q->list);
+-
+- /* Relink to new hash chain. */
+- hb_dest = &f->hash[hval];
+-
+- /* This is the only place where we take
+- * another chain_lock while already holding
+- * one. As this will not run concurrently,
+- * we cannot deadlock on hb_dest lock below, if its
+- * already locked it will be released soon since
+- * other caller cannot be waiting for hb lock
+- * that we've taken above.
+- */
+- spin_lock_nested(&hb_dest->chain_lock,
+- SINGLE_DEPTH_NESTING);
+- hlist_add_head(&q->list, &hb_dest->chain);
+- spin_unlock(&hb_dest->chain_lock);
+- }
+- }
+- spin_unlock(&hb->chain_lock);
+- }
+-
+- f->rebuild = false;
+- f->last_rebuild_jiffies = jiffies;
+-out:
+- write_sequnlock_bh(&f->rnd_seqlock);
+-}
+-
+-static bool inet_fragq_should_evict(const struct inet_frag_queue *q)
+-{
+- if (!hlist_unhashed(&q->list_evictor))
+- return false;
+-
+- return q->net->low_thresh == 0 ||
+- frag_mem_limit(q->net) >= q->net->low_thresh;
+-}
+-
+-static unsigned int
+-inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
+-{
+- struct inet_frag_queue *fq;
+- struct hlist_node *n;
+- unsigned int evicted = 0;
+- HLIST_HEAD(expired);
+-
+- spin_lock(&hb->chain_lock);
+-
+- hlist_for_each_entry_safe(fq, n, &hb->chain, list) {
+- if (!inet_fragq_should_evict(fq))
+- continue;
+-
+- if (!del_timer(&fq->timer))
+- continue;
+-
+- hlist_add_head(&fq->list_evictor, &expired);
+- ++evicted;
+- }
+-
+- spin_unlock(&hb->chain_lock);
+-
+- hlist_for_each_entry_safe(fq, n, &expired, list_evictor)
+- f->frag_expire((unsigned long) fq);
+-
+- return evicted;
+-}
+-
+-static void inet_frag_worker(struct work_struct *work)
+-{
+- unsigned int budget = INETFRAGS_EVICT_BUCKETS;
+- unsigned int i, evicted = 0;
+- struct inet_frags *f;
+-
+- f = container_of(work, struct inet_frags, frags_work);
+-
+- BUILD_BUG_ON(INETFRAGS_EVICT_BUCKETS >= INETFRAGS_HASHSZ);
+-
+- local_bh_disable();
+-
+- for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) {
+- evicted += inet_evict_bucket(f, &f->hash[i]);
+- i = (i + 1) & (INETFRAGS_HASHSZ - 1);
+- if (evicted > INETFRAGS_EVICT_MAX)
+- break;
+- }
+-
+- f->next_bucket = i;
+-
+- local_bh_enable();
+-
+- if (f->rebuild && inet_frag_may_rebuild(f))
+- inet_frag_secret_rebuild(f);
+-}
+-
+-static void inet_frag_schedule_worker(struct inet_frags *f)
+-{
+- if (unlikely(!work_pending(&f->frags_work)))
+- schedule_work(&f->frags_work);
+-}
+-
+ int inet_frags_init(struct inet_frags *f)
+ {
+- int i;
+-
+- INIT_WORK(&f->frags_work, inet_frag_worker);
+-
+- for (i = 0; i < INETFRAGS_HASHSZ; i++) {
+- struct inet_frag_bucket *hb = &f->hash[i];
+-
+- spin_lock_init(&hb->chain_lock);
+- INIT_HLIST_HEAD(&hb->chain);
+- }
+-
+- seqlock_init(&f->rnd_seqlock);
+- f->last_rebuild_jiffies = 0;
+ f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, 0, 0,
+ NULL);
+ if (!f->frags_cachep)
+@@ -214,66 +59,42 @@ EXPORT_SYMBOL(inet_frags_init);
+
+ void inet_frags_fini(struct inet_frags *f)
+ {
+- cancel_work_sync(&f->frags_work);
++ /* We must wait that all inet_frag_destroy_rcu() have completed. */
++ rcu_barrier();
++
+ kmem_cache_destroy(f->frags_cachep);
++ f->frags_cachep = NULL;
+ }
+ EXPORT_SYMBOL(inet_frags_fini);
+
+-void inet_frags_exit_net(struct netns_frags *nf)
+-{
+- struct inet_frags *f =nf->f;
+- unsigned int seq;
+- int i;
+-
+- nf->low_thresh = 0;
+-
+-evict_again:
+- local_bh_disable();
+- seq = read_seqbegin(&f->rnd_seqlock);
+-
+- for (i = 0; i < INETFRAGS_HASHSZ ; i++)
+- inet_evict_bucket(f, &f->hash[i]);
+-
+- local_bh_enable();
+- cond_resched();
+-
+- if (read_seqretry(&f->rnd_seqlock, seq) ||
+- sum_frag_mem_limit(nf))
+- goto evict_again;
+-}
+-EXPORT_SYMBOL(inet_frags_exit_net);
+-
+-static struct inet_frag_bucket *
+-get_frag_bucket_locked(struct inet_frag_queue *fq, struct inet_frags *f)
+-__acquires(hb->chain_lock)
++static void inet_frags_free_cb(void *ptr, void *arg)
+ {
+- struct inet_frag_bucket *hb;
+- unsigned int seq, hash;
++ struct inet_frag_queue *fq = ptr;
+
+- restart:
+- seq = read_seqbegin(&f->rnd_seqlock);
+-
+- hash = inet_frag_hashfn(f, fq);
+- hb = &f->hash[hash];
++ /* If we can not cancel the timer, it means this frag_queue
++ * is already disappearing, we have nothing to do.
++ * Otherwise, we own a refcount until the end of this function.
++ */
++ if (!del_timer(&fq->timer))
++ return;
+
+- spin_lock(&hb->chain_lock);
+- if (read_seqretry(&f->rnd_seqlock, seq)) {
+- spin_unlock(&hb->chain_lock);
+- goto restart;
++ spin_lock_bh(&fq->lock);
++ if (!(fq->flags & INET_FRAG_COMPLETE)) {
++ fq->flags |= INET_FRAG_COMPLETE;
++ atomic_dec(&fq->refcnt);
+ }
++ spin_unlock_bh(&fq->lock);
+
+- return hb;
++ inet_frag_put(fq);
+ }
+
+-static inline void fq_unlink(struct inet_frag_queue *fq)
++void inet_frags_exit_net(struct netns_frags *nf)
+ {
+- struct inet_frag_bucket *hb;
++ nf->low_thresh = 0; /* prevent creation of new frags */
+
+- hb = get_frag_bucket_locked(fq, fq->net->f);
+- hlist_del(&fq->list);
+- fq->flags |= INET_FRAG_COMPLETE;
+- spin_unlock(&hb->chain_lock);
++ rhashtable_free_and_destroy(&nf->rhashtable, inet_frags_free_cb, NULL);
+ }
++EXPORT_SYMBOL(inet_frags_exit_net);
+
+ void inet_frag_kill(struct inet_frag_queue *fq)
+ {
+@@ -281,7 +102,10 @@ void inet_frag_kill(struct inet_frag_queue *fq)
+ atomic_dec(&fq->refcnt);
+
+ if (!(fq->flags & INET_FRAG_COMPLETE)) {
+- fq_unlink(fq);
++ struct netns_frags *nf = fq->net;
++
++ fq->flags |= INET_FRAG_COMPLETE;
++ rhashtable_remove_fast(&nf->rhashtable, &fq->node, nf->f->rhash_params);
+ atomic_dec(&fq->refcnt);
+ }
+ }
+@@ -295,6 +119,17 @@ static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f,
+ kfree_skb(skb);
+ }
+
++static void inet_frag_destroy_rcu(struct rcu_head *head)
++{
++ struct inet_frag_queue *q = container_of(head, struct inet_frag_queue,
++ rcu);
++ struct inet_frags *f = q->net->f;
++
++ if (f->destructor)
++ f->destructor(q);
++ kmem_cache_free(f->frags_cachep, q);
++}
++
+ void inet_frag_destroy(struct inet_frag_queue *q)
+ {
+ struct sk_buff *fp;
+@@ -318,55 +153,21 @@ void inet_frag_destroy(struct inet_frag_queue *q)
+ }
+ sum = sum_truesize + f->qsize;
+
+- if (f->destructor)
+- f->destructor(q);
+- kmem_cache_free(f->frags_cachep, q);
++ call_rcu(&q->rcu, inet_frag_destroy_rcu);
+
+ sub_frag_mem_limit(nf, sum);
+ }
+ EXPORT_SYMBOL(inet_frag_destroy);
+
+-static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
+- struct inet_frag_queue *qp_in,
+- struct inet_frags *f,
+- void *arg)
+-{
+- struct inet_frag_bucket *hb = get_frag_bucket_locked(qp_in, f);
+- struct inet_frag_queue *qp;
+-
+-#ifdef CONFIG_SMP
+- /* With SMP race we have to recheck hash table, because
+- * such entry could have been created on other cpu before
+- * we acquired hash bucket lock.
+- */
+- hlist_for_each_entry(qp, &hb->chain, list) {
+- if (qp->net == nf && f->match(qp, arg)) {
+- atomic_inc(&qp->refcnt);
+- spin_unlock(&hb->chain_lock);
+- qp_in->flags |= INET_FRAG_COMPLETE;
+- inet_frag_put(qp_in);
+- return qp;
+- }
+- }
+-#endif
+- qp = qp_in;
+- if (!mod_timer(&qp->timer, jiffies + nf->timeout))
+- atomic_inc(&qp->refcnt);
+-
+- atomic_inc(&qp->refcnt);
+- hlist_add_head(&qp->list, &hb->chain);
+-
+- spin_unlock(&hb->chain_lock);
+-
+- return qp;
+-}
+-
+ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
+ struct inet_frags *f,
+ void *arg)
+ {
+ struct inet_frag_queue *q;
+
++ if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh)
++ return NULL;
++
+ q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC);
+ if (!q)
+ return NULL;
+@@ -377,64 +178,51 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
+
+ setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
+ spin_lock_init(&q->lock);
+- atomic_set(&q->refcnt, 1);
++ atomic_set(&q->refcnt, 3);
+
+ return q;
+ }
+
+ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
+- struct inet_frags *f,
+ void *arg)
+ {
++ struct inet_frags *f = nf->f;
+ struct inet_frag_queue *q;
++ int err;
+
+ q = inet_frag_alloc(nf, f, arg);
+ if (!q)
+ return NULL;
+
+- return inet_frag_intern(nf, q, f, arg);
+-}
++ mod_timer(&q->timer, jiffies + nf->timeout);
+
+-struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
+- struct inet_frags *f, void *key,
+- unsigned int hash)
+-{
+- struct inet_frag_bucket *hb;
+- struct inet_frag_queue *q;
+- int depth = 0;
+-
+- if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) {
+- inet_frag_schedule_worker(f);
++ err = rhashtable_insert_fast(&nf->rhashtable, &q->node,
++ f->rhash_params);
++ if (err < 0) {
++ q->flags |= INET_FRAG_COMPLETE;
++ inet_frag_kill(q);
++ inet_frag_destroy(q);
+ return NULL;
+ }
++ return q;
++}
++EXPORT_SYMBOL(inet_frag_create);
+
+- if (frag_mem_limit(nf) > nf->low_thresh)
+- inet_frag_schedule_worker(f);
+-
+- hash &= (INETFRAGS_HASHSZ - 1);
+- hb = &f->hash[hash];
+-
+- spin_lock(&hb->chain_lock);
+- hlist_for_each_entry(q, &hb->chain, list) {
+- if (q->net == nf && f->match(q, key)) {
+- atomic_inc(&q->refcnt);
+- spin_unlock(&hb->chain_lock);
+- return q;
+- }
+- depth++;
+- }
+- spin_unlock(&hb->chain_lock);
+-
+- if (depth <= INETFRAGS_MAXDEPTH)
+- return inet_frag_create(nf, f, key);
++/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
++struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
++{
++ struct inet_frag_queue *fq;
+
+- if (inet_frag_may_rebuild(f)) {
+- if (!f->rebuild)
+- f->rebuild = true;
+- inet_frag_schedule_worker(f);
++ rcu_read_lock();
++ fq = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params);
++ if (fq) {
++ if (!atomic_inc_not_zero(&fq->refcnt))
++ fq = NULL;
++ rcu_read_unlock();
++ return fq;
+ }
+-
+- return ERR_PTR(-ENOBUFS);
++ rcu_read_unlock();
++ return inet_frag_create(nf, key);
+ }
+ EXPORT_SYMBOL(inet_frag_find);
+
+@@ -442,8 +230,7 @@ void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
+ const char *prefix)
+ {
+ static const char msg[] = "inet_frag_find: Fragment hash bucket"
+- " list length grew over limit " __stringify(INETFRAGS_MAXDEPTH)
+- ". Dropping fragment.\n";
++ " list length grew over limit. Dropping fragment.\n";
+
+ if (PTR_ERR(q) == -ENOBUFS)
+ net_dbg_ratelimited("%s%s", prefix, msg);
+diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
+index f4978a5aa55e..b383cbc86b13 100644
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -70,15 +70,9 @@ struct ipfrag_skb_cb
+ struct ipq {
+ struct inet_frag_queue q;
+
+- u32 user;
+- __be32 saddr;
+- __be32 daddr;
+- __be16 id;
+- u8 protocol;
+ u8 ecn; /* RFC3168 support */
+ u16 max_df_size; /* largest frag with DF set seen */
+ int iif;
+- int vif; /* L3 master device index */
+ unsigned int rid;
+ struct inet_peer *peer;
+ };
+@@ -98,41 +92,6 @@ int ip_frag_mem(struct net *net)
+ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
+ struct net_device *dev);
+
+-struct ip4_create_arg {
+- struct iphdr *iph;
+- u32 user;
+- int vif;
+-};
+-
+-static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
+-{
+- net_get_random_once(&ip4_frags.rnd, sizeof(ip4_frags.rnd));
+- return jhash_3words((__force u32)id << 16 | prot,
+- (__force u32)saddr, (__force u32)daddr,
+- ip4_frags.rnd);
+-}
+-
+-static unsigned int ip4_hashfn(const struct inet_frag_queue *q)
+-{
+- const struct ipq *ipq;
+-
+- ipq = container_of(q, struct ipq, q);
+- return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol);
+-}
+-
+-static bool ip4_frag_match(const struct inet_frag_queue *q, const void *a)
+-{
+- const struct ipq *qp;
+- const struct ip4_create_arg *arg = a;
+-
+- qp = container_of(q, struct ipq, q);
+- return qp->id == arg->iph->id &&
+- qp->saddr == arg->iph->saddr &&
+- qp->daddr == arg->iph->daddr &&
+- qp->protocol == arg->iph->protocol &&
+- qp->user == arg->user &&
+- qp->vif == arg->vif;
+-}
+
+ static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
+ {
+@@ -141,17 +100,12 @@ static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
+ frags);
+ struct net *net = container_of(ipv4, struct net, ipv4);
+
+- const struct ip4_create_arg *arg = a;
++ const struct frag_v4_compare_key *key = a;
+
+- qp->protocol = arg->iph->protocol;
+- qp->id = arg->iph->id;
+- qp->ecn = ip4_frag_ecn(arg->iph->tos);
+- qp->saddr = arg->iph->saddr;
+- qp->daddr = arg->iph->daddr;
+- qp->vif = arg->vif;
+- qp->user = arg->user;
++ q->key.v4 = *key;
++ qp->ecn = 0;
+ qp->peer = sysctl_ipfrag_max_dist ?
+- inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, arg->vif, 1) :
++ inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif, 1) :
+ NULL;
+ }
+
+@@ -234,7 +188,7 @@ static void ip_expire(unsigned long arg)
+ /* Only an end host needs to send an ICMP
+ * "Fragment Reassembly Timeout" message, per RFC792.
+ */
+- if (frag_expire_skip_icmp(qp->user) &&
++ if (frag_expire_skip_icmp(qp->q.key.v4.user) &&
+ (skb_rtable(head)->rt_type != RTN_LOCAL))
+ goto out;
+
+@@ -262,17 +216,17 @@ static void ip_expire(unsigned long arg)
+ static struct ipq *ip_find(struct net *net, struct iphdr *iph,
+ u32 user, int vif)
+ {
++ struct frag_v4_compare_key key = {
++ .saddr = iph->saddr,
++ .daddr = iph->daddr,
++ .user = user,
++ .vif = vif,
++ .id = iph->id,
++ .protocol = iph->protocol,
++ };
+ struct inet_frag_queue *q;
+- struct ip4_create_arg arg;
+- unsigned int hash;
+-
+- arg.iph = iph;
+- arg.user = user;
+- arg.vif = vif;
+
+- hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
+-
+- q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
++ q = inet_frag_find(&net->ipv4.frags, &key);
+ if (IS_ERR_OR_NULL(q)) {
+ inet_frag_maybe_warn_overflow(q, pr_fmt());
+ return NULL;
+@@ -656,7 +610,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
+ err = -ENOMEM;
+ goto out_fail;
+ out_oversize:
+- net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->saddr);
++ net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->q.key.v4.saddr);
+ out_fail:
+ IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
+ return err;
+@@ -900,16 +854,48 @@ static struct pernet_operations ip4_frags_ops = {
+ .exit = ipv4_frags_exit_net,
+ };
+
++
++static u32 ip4_key_hashfn(const void *data, u32 len, u32 seed)
++{
++ return jhash2(data,
++ sizeof(struct frag_v4_compare_key) / sizeof(u32), seed);
++}
++
++static u32 ip4_obj_hashfn(const void *data, u32 len, u32 seed)
++{
++ const struct inet_frag_queue *fq = data;
++
++ return jhash2((const u32 *)&fq->key.v4,
++ sizeof(struct frag_v4_compare_key) / sizeof(u32), seed);
++}
++
++static int ip4_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
++{
++ const struct frag_v4_compare_key *key = arg->key;
++ const struct inet_frag_queue *fq = ptr;
++
++ return !!memcmp(&fq->key, key, sizeof(*key));
++}
++
++static const struct rhashtable_params ip4_rhash_params = {
++ .head_offset = offsetof(struct inet_frag_queue, node),
++ .key_offset = offsetof(struct inet_frag_queue, key),
++ .key_len = sizeof(struct frag_v4_compare_key),
++ .hashfn = ip4_key_hashfn,
++ .obj_hashfn = ip4_obj_hashfn,
++ .obj_cmpfn = ip4_obj_cmpfn,
++ .automatic_shrinking = true,
++};
++
+ void __init ipfrag_init(void)
+ {
+- ip4_frags.hashfn = ip4_hashfn;
+ ip4_frags.constructor = ip4_frag_init;
+ ip4_frags.destructor = ip4_frag_free;
+ ip4_frags.skb_free = NULL;
+ ip4_frags.qsize = sizeof(struct ipq);
+- ip4_frags.match = ip4_frag_match;
+ ip4_frags.frag_expire = ip_expire;
+ ip4_frags.frags_cache_name = ip_frag_cache_name;
++ ip4_frags.rhash_params = ip4_rhash_params;
+ if (inet_frags_init(&ip4_frags))
+ panic("IP: failed to allocate ip4_frags cache\n");
+ ip4_frags_ctl_register();
+diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
+index 15a9da5cf296..985a9b6411af 100644
+--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
+@@ -153,23 +153,6 @@ static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
+ return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
+ }
+
+-static unsigned int nf_hash_frag(__be32 id, const struct in6_addr *saddr,
+- const struct in6_addr *daddr)
+-{
+- net_get_random_once(&nf_frags.rnd, sizeof(nf_frags.rnd));
+- return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
+- (__force u32)id, nf_frags.rnd);
+-}
+-
+-
+-static unsigned int nf_hashfn(const struct inet_frag_queue *q)
+-{
+- const struct frag_queue *nq;
+-
+- nq = container_of(q, struct frag_queue, q);
+- return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr);
+-}
+-
+ static void nf_skb_free(struct sk_buff *skb)
+ {
+ if (NFCT_FRAG6_CB(skb)->orig)
+@@ -188,26 +171,19 @@ static void nf_ct_frag6_expire(unsigned long data)
+ }
+
+ /* Creation primitives. */
+-static inline struct frag_queue *fq_find(struct net *net, __be32 id,
+- u32 user, struct in6_addr *src,
+- struct in6_addr *dst, int iif, u8 ecn)
++static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
++ const struct ipv6hdr *hdr, int iif)
+ {
++ struct frag_v6_compare_key key = {
++ .id = id,
++ .saddr = hdr->saddr,
++ .daddr = hdr->daddr,
++ .user = user,
++ .iif = iif,
++ };
+ struct inet_frag_queue *q;
+- struct ip6_create_arg arg;
+- unsigned int hash;
+-
+- arg.id = id;
+- arg.user = user;
+- arg.src = src;
+- arg.dst = dst;
+- arg.iif = iif;
+- arg.ecn = ecn;
+-
+- local_bh_disable();
+- hash = nf_hash_frag(id, src, dst);
+
+- q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash);
+- local_bh_enable();
++ q = inet_frag_find(&net->nf_frag.frags, &key);
+ if (IS_ERR_OR_NULL(q)) {
+ inet_frag_maybe_warn_overflow(q, pr_fmt());
+ return NULL;
+@@ -602,8 +578,8 @@ struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 use
+ fhdr = (struct frag_hdr *)skb_transport_header(clone);
+
+ skb_orphan(skb);
+- fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
+- skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
++ fq = fq_find(net, fhdr->identification, user, hdr,
++ skb->dev ? skb->dev->ifindex : 0);
+ if (fq == NULL) {
+ pr_debug("Can't find and can't create new queue\n");
+ goto ret_orig;
+@@ -681,14 +657,13 @@ int nf_ct_frag6_init(void)
+ {
+ int ret = 0;
+
+- nf_frags.hashfn = nf_hashfn;
+ nf_frags.constructor = ip6_frag_init;
+ nf_frags.destructor = NULL;
+ nf_frags.skb_free = nf_skb_free;
+ nf_frags.qsize = sizeof(struct frag_queue);
+- nf_frags.match = ip6_frag_match;
+ nf_frags.frag_expire = nf_ct_frag6_expire;
+ nf_frags.frags_cache_name = nf_frags_cache_name;
++ nf_frags.rhash_params = ip6_rhash_params;
+ ret = inet_frags_init(&nf_frags);
+ if (ret)
+ goto out;
+diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
+index 0b6f4e1f43db..0626c1b894e4 100644
+--- a/net/ipv6/reassembly.c
++++ b/net/ipv6/reassembly.c
+@@ -79,52 +79,13 @@ static struct inet_frags ip6_frags;
+ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
+ struct net_device *dev);
+
+-/*
+- * callers should be careful not to use the hash value outside the ipfrag_lock
+- * as doing so could race with ipfrag_hash_rnd being recalculated.
+- */
+-static unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
+- const struct in6_addr *daddr)
+-{
+- net_get_random_once(&ip6_frags.rnd, sizeof(ip6_frags.rnd));
+- return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
+- (__force u32)id, ip6_frags.rnd);
+-}
+-
+-static unsigned int ip6_hashfn(const struct inet_frag_queue *q)
+-{
+- const struct frag_queue *fq;
+-
+- fq = container_of(q, struct frag_queue, q);
+- return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr);
+-}
+-
+-bool ip6_frag_match(const struct inet_frag_queue *q, const void *a)
+-{
+- const struct frag_queue *fq;
+- const struct ip6_create_arg *arg = a;
+-
+- fq = container_of(q, struct frag_queue, q);
+- return fq->id == arg->id &&
+- fq->user == arg->user &&
+- ipv6_addr_equal(&fq->saddr, arg->src) &&
+- ipv6_addr_equal(&fq->daddr, arg->dst) &&
+- (arg->iif == fq->iif ||
+- !(ipv6_addr_type(arg->dst) & (IPV6_ADDR_MULTICAST |
+- IPV6_ADDR_LINKLOCAL)));
+-}
+-EXPORT_SYMBOL(ip6_frag_match);
+-
+ void ip6_frag_init(struct inet_frag_queue *q, const void *a)
+ {
+ struct frag_queue *fq = container_of(q, struct frag_queue, q);
+- const struct ip6_create_arg *arg = a;
++ const struct frag_v6_compare_key *key = a;
+
+- fq->id = arg->id;
+- fq->user = arg->user;
+- fq->saddr = *arg->src;
+- fq->daddr = *arg->dst;
+- fq->ecn = arg->ecn;
++ q->key.v6 = *key;
++ fq->ecn = 0;
+ }
+ EXPORT_SYMBOL(ip6_frag_init);
+
+@@ -181,23 +142,22 @@ static void ip6_frag_expire(unsigned long data)
+ }
+
+ static struct frag_queue *
+-fq_find(struct net *net, __be32 id, const struct in6_addr *src,
+- const struct in6_addr *dst, int iif, u8 ecn)
++fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif)
+ {
++ struct frag_v6_compare_key key = {
++ .id = id,
++ .saddr = hdr->saddr,
++ .daddr = hdr->daddr,
++ .user = IP6_DEFRAG_LOCAL_DELIVER,
++ .iif = iif,
++ };
+ struct inet_frag_queue *q;
+- struct ip6_create_arg arg;
+- unsigned int hash;
+-
+- arg.id = id;
+- arg.user = IP6_DEFRAG_LOCAL_DELIVER;
+- arg.src = src;
+- arg.dst = dst;
+- arg.iif = iif;
+- arg.ecn = ecn;
+
+- hash = inet6_hash_frag(id, src, dst);
++ if (!(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_MULTICAST |
++ IPV6_ADDR_LINKLOCAL)))
++ key.iif = 0;
+
+- q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash);
++ q = inet_frag_find(&net->ipv6.frags, &key);
+ if (IS_ERR_OR_NULL(q)) {
+ inet_frag_maybe_warn_overflow(q, pr_fmt());
+ return NULL;
+@@ -523,6 +483,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
+ struct frag_queue *fq;
+ const struct ipv6hdr *hdr = ipv6_hdr(skb);
+ struct net *net = dev_net(skb_dst(skb)->dev);
++ int iif;
+
+ if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
+ goto fail_hdr;
+@@ -551,13 +512,14 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
+ return 1;
+ }
+
+- fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
+- skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
++ iif = skb->dev ? skb->dev->ifindex : 0;
++ fq = fq_find(net, fhdr->identification, hdr, iif);
+ if (fq) {
+ int ret;
+
+ spin_lock(&fq->q.lock);
+
++ fq->iif = iif;
+ ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
+
+ spin_unlock(&fq->q.lock);
+@@ -735,17 +697,47 @@ static struct pernet_operations ip6_frags_ops = {
+ .exit = ipv6_frags_exit_net,
+ };
+
++static u32 ip6_key_hashfn(const void *data, u32 len, u32 seed)
++{
++ return jhash2(data,
++ sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
++}
++
++static u32 ip6_obj_hashfn(const void *data, u32 len, u32 seed)
++{
++ const struct inet_frag_queue *fq = data;
++
++ return jhash2((const u32 *)&fq->key.v6,
++ sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
++}
++
++static int ip6_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
++{
++ const struct frag_v6_compare_key *key = arg->key;
++ const struct inet_frag_queue *fq = ptr;
++
++ return !!memcmp(&fq->key, key, sizeof(*key));
++}
++
++const struct rhashtable_params ip6_rhash_params = {
++ .head_offset = offsetof(struct inet_frag_queue, node),
++ .hashfn = ip6_key_hashfn,
++ .obj_hashfn = ip6_obj_hashfn,
++ .obj_cmpfn = ip6_obj_cmpfn,
++ .automatic_shrinking = true,
++};
++EXPORT_SYMBOL(ip6_rhash_params);
++
+ int __init ipv6_frag_init(void)
+ {
+ int ret;
+
+- ip6_frags.hashfn = ip6_hashfn;
+ ip6_frags.constructor = ip6_frag_init;
+ ip6_frags.destructor = NULL;
+ ip6_frags.qsize = sizeof(struct frag_queue);
+- ip6_frags.match = ip6_frag_match;
+ ip6_frags.frag_expire = ip6_frag_expire;
+ ip6_frags.frags_cache_name = ip6_frag_cache_name;
++ ip6_frags.rhash_params = ip6_rhash_params;
+ ret = inet_frags_init(&ip6_frags);
+ if (ret)
+ goto out;
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-010-net-ieee802154-6lowpan-fix-frag-reassembly.patch b/patches.kernel.org/4.4.174-010-net-ieee802154-6lowpan-fix-frag-reassembly.patch
new file mode 100644
index 0000000000..6b143464bd
--- /dev/null
+++ b/patches.kernel.org/4.4.174-010-net-ieee802154-6lowpan-fix-frag-reassembly.patch
@@ -0,0 +1,79 @@
+From: Alexander Aring <aring@mojatatu.com>
+Date: Fri, 20 Apr 2018 14:54:13 -0400
+Subject: [PATCH] net: ieee802154: 6lowpan: fix frag reassembly
+References: bnc#1012382
+Patch-mainline: 4.4.174
+Git-commit: f18fa5de5ba7f1d6650951502bb96a6e4715a948
+
+commit f18fa5de5ba7f1d6650951502bb96a6e4715a948 upstream.
+
+This patch initialize stack variables which are used in
+frag_lowpan_compare_key to zero. In my case there are padding bytes in the
+structures ieee802154_addr as well in frag_lowpan_compare_key. Otherwise
+the key variable contains random bytes. The result is that a compare of
+two keys by memcmp works incorrect.
+
+Fixes: 648700f76b03 ("inet: frags: use rhashtables for reassembly units")
+Signed-off-by: Alexander Aring <aring@mojatatu.com>
+Reported-by: Stefan Schmidt <stefan@osg.samsung.com>
+Signed-off-by: Stefan Schmidt <stefan@osg.samsung.com>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ net/ieee802154/6lowpan/6lowpan_i.h | 4 ++--
+ net/ieee802154/6lowpan/reassembly.c | 14 +++++++-------
+ 2 files changed, 9 insertions(+), 9 deletions(-)
+
+diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h
+index 3c5fc06883ba..fdbebe51446f 100644
+--- a/net/ieee802154/6lowpan/6lowpan_i.h
++++ b/net/ieee802154/6lowpan/6lowpan_i.h
+@@ -19,8 +19,8 @@ typedef unsigned __bitwise__ lowpan_rx_result;
+ struct frag_lowpan_compare_key {
+ u16 tag;
+ u16 d_size;
+- const struct ieee802154_addr src;
+- const struct ieee802154_addr dst;
++ struct ieee802154_addr src;
++ struct ieee802154_addr dst;
+ };
+
+ /* Equivalent of ipv4 struct ipq
+diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
+index 65c0b7349f9d..510568c37476 100644
+--- a/net/ieee802154/6lowpan/reassembly.c
++++ b/net/ieee802154/6lowpan/reassembly.c
+@@ -74,14 +74,14 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb,
+ {
+ struct netns_ieee802154_lowpan *ieee802154_lowpan =
+ net_ieee802154_lowpan(net);
+- struct frag_lowpan_compare_key key = {
+- .tag = cb->d_tag,
+- .d_size = cb->d_size,
+- .src = *src,
+- .dst = *dst,
+- };
++ struct frag_lowpan_compare_key key = {};
+ struct inet_frag_queue *q;
+
++ key.tag = cb->d_tag;
++ key.d_size = cb->d_size;
++ key.src = *src;
++ key.dst = *dst;
++
+ q = inet_frag_find(&ieee802154_lowpan->frags, &key);
+ if (IS_ERR_OR_NULL(q)) {
+ inet_frag_maybe_warn_overflow(q, pr_fmt());
+@@ -372,7 +372,7 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
+ struct lowpan_frag_queue *fq;
+ struct net *net = dev_net(skb->dev);
+ struct lowpan_802154_cb *cb = lowpan_802154_cb(skb);
+- struct ieee802154_hdr hdr;
++ struct ieee802154_hdr hdr = {};
+ int err;
+
+ if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0)
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-011-ipfrag-really-prevent-allocation-on-netns-exi.patch b/patches.kernel.org/4.4.174-011-ipfrag-really-prevent-allocation-on-netns-exi.patch
new file mode 100644
index 0000000000..35987701bd
--- /dev/null
+++ b/patches.kernel.org/4.4.174-011-ipfrag-really-prevent-allocation-on-netns-exi.patch
@@ -0,0 +1,43 @@
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Fri, 6 Jul 2018 12:30:20 +0200
+Subject: [PATCH] ipfrag: really prevent allocation on netns exit
+References: bnc#1012382
+Patch-mainline: 4.4.174
+Git-commit: f6f2a4a2eb92bc73671204198bb2f8ab53ff59fb
+
+commit f6f2a4a2eb92bc73671204198bb2f8ab53ff59fb upstream.
+
+Setting the low threshold to 0 has no effect on frags allocation,
+we need to clear high_thresh instead.
+
+The code was pre-existent to commit 648700f76b03 ("inet: frags:
+use rhashtables for reassembly units"), but before the above,
+such assignment had a different role: prevent concurrent eviction
+from the worker and the netns cleanup helper.
+
+Fixes: 648700f76b03 ("inet: frags: use rhashtables for reassembly units")
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ net/ipv4/inet_fragment.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
+index 10cd7c182136..cca7362c8834 100644
+--- a/net/ipv4/inet_fragment.c
++++ b/net/ipv4/inet_fragment.c
+@@ -90,7 +90,7 @@ static void inet_frags_free_cb(void *ptr, void *arg)
+
+ void inet_frags_exit_net(struct netns_frags *nf)
+ {
+- nf->low_thresh = 0; /* prevent creation of new frags */
++ nf->high_thresh = 0; /* prevent creation of new frags */
+
+ rhashtable_free_and_destroy(&nf->rhashtable, inet_frags_free_cb, NULL);
+ }
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-012-inet-frags-remove-some-helpers.patch b/patches.kernel.org/4.4.174-012-inet-frags-remove-some-helpers.patch
new file mode 100644
index 0000000000..4afeb3beb4
--- /dev/null
+++ b/patches.kernel.org/4.4.174-012-inet-frags-remove-some-helpers.patch
@@ -0,0 +1,142 @@
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 10 Oct 2018 12:29:57 -0700
+Subject: [PATCH] inet: frags: remove some helpers
+References: bnc#1012382
+Patch-mainline: 4.4.174
+Git-commit: 6befe4a78b1553edb6eed3a78b4bcd9748526672
+
+commit 6befe4a78b1553edb6eed3a78b4bcd9748526672 upstream.
+
+Remove sum_frag_mem_limit(), ip_frag_mem() & ip6_frag_mem()
+
+Also since we use rhashtable we can bring back the number of fragments
+in "grep FRAG /proc/net/sockstat /proc/net/sockstat6" that was
+removed in commit 434d305405ab ("inet: frag: don't account number
+of fragment queues")
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ include/net/inet_frag.h | 5 -----
+ include/net/ip.h | 1 -
+ include/net/ipv6.h | 7 -------
+ net/ipv4/ip_fragment.c | 5 -----
+ net/ipv4/proc.c | 6 +++---
+ net/ipv6/proc.c | 5 +++--
+ 6 files changed, 6 insertions(+), 23 deletions(-)
+
+diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
+index 623eb8222a75..319a435cd723 100644
+--- a/include/net/inet_frag.h
++++ b/include/net/inet_frag.h
+@@ -140,11 +140,6 @@ static inline void add_frag_mem_limit(struct netns_frags *nf, int i)
+ atomic_add(i, &nf->mem);
+ }
+
+-static inline int sum_frag_mem_limit(struct netns_frags *nf)
+-{
+- return atomic_read(&nf->mem);
+-}
+-
+ /* RFC 3168 support :
+ * We want to check ECN values of all fragments, do detect invalid combinations.
+ * In ipq->ecn, we store the OR value of each ip4_frag_ecn() fragment value.
+diff --git a/include/net/ip.h b/include/net/ip.h
+index 0530bcdbc212..7b968927477d 100644
+--- a/include/net/ip.h
++++ b/include/net/ip.h
+@@ -524,7 +524,6 @@ static inline struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *s
+ return skb;
+ }
+ #endif
+-int ip_frag_mem(struct net *net);
+
+ /*
+ * Functions provided by ip_forward.c
+diff --git a/include/net/ipv6.h b/include/net/ipv6.h
+index 2067bbec021c..c07cf9596b6f 100644
+--- a/include/net/ipv6.h
++++ b/include/net/ipv6.h
+@@ -320,13 +320,6 @@ static inline bool ipv6_accept_ra(struct inet6_dev *idev)
+ idev->cnf.accept_ra;
+ }
+
+-#if IS_ENABLED(CONFIG_IPV6)
+-static inline int ip6_frag_mem(struct net *net)
+-{
+- return sum_frag_mem_limit(&net->ipv6.frags);
+-}
+-#endif
+-
+ #define IPV6_FRAG_HIGH_THRESH (4 * 1024*1024) /* 4194304 */
+ #define IPV6_FRAG_LOW_THRESH (3 * 1024*1024) /* 3145728 */
+ #define IPV6_FRAG_TIMEOUT (60 * HZ) /* 60 seconds */
+diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
+index b383cbc86b13..44a3982563cc 100644
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -84,11 +84,6 @@ static u8 ip4_frag_ecn(u8 tos)
+
+ static struct inet_frags ip4_frags;
+
+-int ip_frag_mem(struct net *net)
+-{
+- return sum_frag_mem_limit(&net->ipv4.frags);
+-}
+-
+ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
+ struct net_device *dev);
+
+diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
+index 3abd9d7a3adf..d729ad2e4ad1 100644
+--- a/net/ipv4/proc.c
++++ b/net/ipv4/proc.c
+@@ -52,7 +52,6 @@
+ static int sockstat_seq_show(struct seq_file *seq, void *v)
+ {
+ struct net *net = seq->private;
+- unsigned int frag_mem;
+ int orphans, sockets;
+
+ local_bh_disable();
+@@ -72,8 +71,9 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
+ sock_prot_inuse_get(net, &udplite_prot));
+ seq_printf(seq, "RAW: inuse %d\n",
+ sock_prot_inuse_get(net, &raw_prot));
+- frag_mem = ip_frag_mem(net);
+- seq_printf(seq, "FRAG: inuse %u memory %u\n", !!frag_mem, frag_mem);
++ seq_printf(seq, "FRAG: inuse %u memory %u\n",
++ atomic_read(&net->ipv4.frags.rhashtable.nelems),
++ frag_mem_limit(&net->ipv4.frags));
+ return 0;
+ }
+
+diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
+index 679253d0af84..177e2fe20915 100644
+--- a/net/ipv6/proc.c
++++ b/net/ipv6/proc.c
+@@ -33,7 +33,6 @@
+ static int sockstat6_seq_show(struct seq_file *seq, void *v)
+ {
+ struct net *net = seq->private;
+- unsigned int frag_mem = ip6_frag_mem(net);
+
+ seq_printf(seq, "TCP6: inuse %d\n",
+ sock_prot_inuse_get(net, &tcpv6_prot));
+@@ -43,7 +42,9 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
+ sock_prot_inuse_get(net, &udplitev6_prot));
+ seq_printf(seq, "RAW6: inuse %d\n",
+ sock_prot_inuse_get(net, &rawv6_prot));
+- seq_printf(seq, "FRAG6: inuse %u memory %u\n", !!frag_mem, frag_mem);
++ seq_printf(seq, "FRAG6: inuse %u memory %u\n",
++ atomic_read(&net->ipv6.frags.rhashtable.nelems),
++ frag_mem_limit(&net->ipv6.frags));
+ return 0;
+ }
+
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-013-inet-frags-get-rif-of-inet_frag_evicting.patch b/patches.kernel.org/4.4.174-013-inet-frags-get-rif-of-inet_frag_evicting.patch
new file mode 100644
index 0000000000..a6f3575b12
--- /dev/null
+++ b/patches.kernel.org/4.4.174-013-inet-frags-get-rif-of-inet_frag_evicting.patch
@@ -0,0 +1,158 @@
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 10 Oct 2018 12:29:58 -0700
+Subject: [PATCH] inet: frags: get rif of inet_frag_evicting()
+References: bnc#1012382
+Patch-mainline: 4.4.174
+Git-commit: 399d1404be660d355192ff4df5ccc3f4159ec1e4
+
+commit 399d1404be660d355192ff4df5ccc3f4159ec1e4 upstream.
+
+This refactors ip_expire() since one indentation level is removed.
+
+Note: in the future, we should try hard to avoid the skb_clone()
+since this is a serious performance cost.
+Under DDOS, the ICMP message wont be sent because of rate limits.
+
+Fact that ip6_expire_frag_queue() does not use skb_clone() is
+disturbing too. Presumably IPv6 should have the same
+issue than the one we fixed in commit ec4fbd64751d
+("inet: frag: release spinlock before calling icmp_send()")
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Mao Wenan <maowenan@huawei.com>
+[bwh: Backported to 4.4: adjust context]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ include/net/inet_frag.h | 5 ----
+ net/ipv4/ip_fragment.c | 65 ++++++++++++++++++++---------------------
+ net/ipv6/reassembly.c | 4 ---
+ 3 files changed, 32 insertions(+), 42 deletions(-)
+
+diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
+index 319a435cd723..6eb9f7cf1f12 100644
+--- a/include/net/inet_frag.h
++++ b/include/net/inet_frag.h
+@@ -118,11 +118,6 @@ static inline void inet_frag_put(struct inet_frag_queue *q)
+ inet_frag_destroy(q);
+ }
+
+-static inline bool inet_frag_evicting(struct inet_frag_queue *q)
+-{
+- return false;
+-}
+-
+ /* Memory Tracking Functions. */
+
+ static inline int frag_mem_limit(struct netns_frags *nf)
+diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
+index 44a3982563cc..e0465ad21868 100644
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -143,8 +143,11 @@ static bool frag_expire_skip_icmp(u32 user)
+ */
+ static void ip_expire(unsigned long arg)
+ {
+- struct ipq *qp;
++ struct sk_buff *clone, *head;
++ const struct iphdr *iph;
+ struct net *net;
++ struct ipq *qp;
++ int err;
+
+ qp = container_of((struct inet_frag_queue *) arg, struct ipq, q);
+ net = container_of(qp->q.net, struct net, ipv4.frags);
+@@ -158,45 +161,41 @@ static void ip_expire(unsigned long arg)
+ ipq_kill(qp);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
+
+- if (!inet_frag_evicting(&qp->q)) {
+- struct sk_buff *clone, *head = qp->q.fragments;
+- const struct iphdr *iph;
+- int err;
++ head = qp->q.fragments;
+
+- IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT);
++ IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT);
+
+- if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments)
+- goto out;
++ if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !head)
++ goto out;
+
+- head->dev = dev_get_by_index_rcu(net, qp->iif);
+- if (!head->dev)
+- goto out;
++ head->dev = dev_get_by_index_rcu(net, qp->iif);
++ if (!head->dev)
++ goto out;
+
+
+- /* skb has no dst, perform route lookup again */
+- iph = ip_hdr(head);
+- err = ip_route_input_noref(head, iph->daddr, iph->saddr,
++ /* skb has no dst, perform route lookup again */
++ iph = ip_hdr(head);
++ err = ip_route_input_noref(head, iph->daddr, iph->saddr,
+ iph->tos, head->dev);
+- if (err)
+- goto out;
++ if (err)
++ goto out;
+
+- /* Only an end host needs to send an ICMP
+- * "Fragment Reassembly Timeout" message, per RFC792.
+- */
+- if (frag_expire_skip_icmp(qp->q.key.v4.user) &&
+- (skb_rtable(head)->rt_type != RTN_LOCAL))
+- goto out;
+-
+- clone = skb_clone(head, GFP_ATOMIC);
+-
+- /* Send an ICMP "Fragment Reassembly Timeout" message. */
+- if (clone) {
+- spin_unlock(&qp->q.lock);
+- icmp_send(clone, ICMP_TIME_EXCEEDED,
+- ICMP_EXC_FRAGTIME, 0);
+- consume_skb(clone);
+- goto out_rcu_unlock;
+- }
++ /* Only an end host needs to send an ICMP
++ * "Fragment Reassembly Timeout" message, per RFC792.
++ */
++ if (frag_expire_skip_icmp(qp->q.key.v4.user) &&
++ (skb_rtable(head)->rt_type != RTN_LOCAL))
++ goto out;
++
++ clone = skb_clone(head, GFP_ATOMIC);
++
++ /* Send an ICMP "Fragment Reassembly Timeout" message. */
++ if (clone) {
++ spin_unlock(&qp->q.lock);
++ icmp_send(clone, ICMP_TIME_EXCEEDED,
++ ICMP_EXC_FRAGTIME, 0);
++ consume_skb(clone);
++ goto out_rcu_unlock;
+ }
+ out:
+ spin_unlock(&qp->q.lock);
+diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
+index 0626c1b894e4..0076d7a03323 100644
+--- a/net/ipv6/reassembly.c
++++ b/net/ipv6/reassembly.c
+@@ -106,10 +106,6 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq)
+ goto out_rcu_unlock;
+
+ IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
+-
+- if (inet_frag_evicting(&fq->q))
+- goto out_rcu_unlock;
+-
+ IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
+
+ /* Don't send error if the first segment did not arrive. */
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-014-inet-frags-remove-inet_frag_maybe_warn_overfl.patch b/patches.kernel.org/4.4.174-014-inet-frags-remove-inet_frag_maybe_warn_overfl.patch
new file mode 100644
index 0000000000..843cc9c80e
--- /dev/null
+++ b/patches.kernel.org/4.4.174-014-inet-frags-remove-inet_frag_maybe_warn_overfl.patch
@@ -0,0 +1,128 @@
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 10 Oct 2018 12:29:59 -0700
+Subject: [PATCH] inet: frags: remove inet_frag_maybe_warn_overflow()
+References: bnc#1012382
+Patch-mainline: 4.4.174
+Git-commit: 2d44ed22e607f9a285b049de2263e3840673a260
+
+commit 2d44ed22e607f9a285b049de2263e3840673a260 upstream.
+
+This function is obsolete, after rhashtable addition to inet defrag.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ include/net/inet_frag.h | 2 --
+ net/ieee802154/6lowpan/reassembly.c | 5 ++---
+ net/ipv4/inet_fragment.c | 11 -----------
+ net/ipv4/ip_fragment.c | 5 ++---
+ net/ipv6/netfilter/nf_conntrack_reasm.c | 5 ++---
+ net/ipv6/reassembly.c | 5 ++---
+ 6 files changed, 8 insertions(+), 25 deletions(-)
+
+diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
+index 6eb9f7cf1f12..a9453d9e562b 100644
+--- a/include/net/inet_frag.h
++++ b/include/net/inet_frag.h
+@@ -109,8 +109,6 @@ void inet_frags_exit_net(struct netns_frags *nf);
+ void inet_frag_kill(struct inet_frag_queue *q);
+ void inet_frag_destroy(struct inet_frag_queue *q);
+ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key);
+-void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
+- const char *prefix);
+
+ static inline void inet_frag_put(struct inet_frag_queue *q)
+ {
+diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
+index 510568c37476..c5ad89f5f028 100644
+--- a/net/ieee802154/6lowpan/reassembly.c
++++ b/net/ieee802154/6lowpan/reassembly.c
+@@ -83,10 +83,9 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb,
+ key.dst = *dst;
+
+ q = inet_frag_find(&ieee802154_lowpan->frags, &key);
+- if (IS_ERR_OR_NULL(q)) {
+- inet_frag_maybe_warn_overflow(q, pr_fmt());
++ if (!q)
+ return NULL;
+- }
++
+ return container_of(q, struct lowpan_frag_queue, q);
+ }
+
+diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
+index cca7362c8834..bb784c3a40fb 100644
+--- a/net/ipv4/inet_fragment.c
++++ b/net/ipv4/inet_fragment.c
+@@ -225,14 +225,3 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
+ return inet_frag_create(nf, key);
+ }
+ EXPORT_SYMBOL(inet_frag_find);
+-
+-void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
+- const char *prefix)
+-{
+- static const char msg[] = "inet_frag_find: Fragment hash bucket"
+- " list length grew over limit. Dropping fragment.\n";
+-
+- if (PTR_ERR(q) == -ENOBUFS)
+- net_dbg_ratelimited("%s%s", prefix, msg);
+-}
+-EXPORT_SYMBOL(inet_frag_maybe_warn_overflow);
+diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
+index e0465ad21868..c6711b04ad79 100644
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -221,10 +221,9 @@ static struct ipq *ip_find(struct net *net, struct iphdr *iph,
+ struct inet_frag_queue *q;
+
+ q = inet_frag_find(&net->ipv4.frags, &key);
+- if (IS_ERR_OR_NULL(q)) {
+- inet_frag_maybe_warn_overflow(q, pr_fmt());
++ if (!q)
+ return NULL;
+- }
++
+ return container_of(q, struct ipq, q);
+ }
+
+diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
+index 985a9b6411af..41c66395df82 100644
+--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
+@@ -184,10 +184,9 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
+ struct inet_frag_queue *q;
+
+ q = inet_frag_find(&net->nf_frag.frags, &key);
+- if (IS_ERR_OR_NULL(q)) {
+- inet_frag_maybe_warn_overflow(q, pr_fmt());
++ if (!q)
+ return NULL;
+- }
++
+ return container_of(q, struct frag_queue, q);
+ }
+
+diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
+index 0076d7a03323..1ab32ff0823e 100644
+--- a/net/ipv6/reassembly.c
++++ b/net/ipv6/reassembly.c
+@@ -154,10 +154,9 @@ fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif)
+ key.iif = 0;
+
+ q = inet_frag_find(&net->ipv6.frags, &key);
+- if (IS_ERR_OR_NULL(q)) {
+- inet_frag_maybe_warn_overflow(q, pr_fmt());
++ if (!q)
+ return NULL;
+- }
++
+ return container_of(q, struct frag_queue, q);
+ }
+
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-015-inet-frags-break-the-2GB-limit-for-frags-stor.patch b/patches.kernel.org/4.4.174-015-inet-frags-break-the-2GB-limit-for-frags-stor.patch
new file mode 100644
index 0000000000..c14d070822
--- /dev/null
+++ b/patches.kernel.org/4.4.174-015-inet-frags-break-the-2GB-limit-for-frags-stor.patch
@@ -0,0 +1,276 @@
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 10 Oct 2018 12:30:00 -0700
+Subject: [PATCH] inet: frags: break the 2GB limit for frags storage
+References: bnc#1012382
+Patch-mainline: 4.4.174
+Git-commit: 3e67f106f619dcfaf6f4e2039599bdb69848c714
+
+commit 3e67f106f619dcfaf6f4e2039599bdb69848c714 upstream.
+
+Some users are willing to provision huge amounts of memory to be able
+to perform reassembly reasonnably well under pressure.
+
+Current memory tracking is using one atomic_t and integers.
+
+Switch to atomic_long_t so that 64bit arches can use more than 2GB,
+without any cost for 32bit arches.
+
+Note that this patch avoids an overflow error, if high_thresh was set
+to ~2GB, since this test in inet_frag_alloc() was never true :
+
+if (... || frag_mem_limit(nf) > nf->high_thresh)
+
+Tested:
+
+$ echo 16000000000 >/proc/sys/net/ipv4/ipfrag_high_thresh
+
+<frag DDOS>
+
+$ grep FRAG /proc/net/sockstat
+FRAG: inuse 14705885 memory 16000002880
+
+$ nstat -n ; sleep 1 ; nstat | grep Reas
+IpReasmReqds 3317150 0.0
+IpReasmFails 3317112 0.0
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ Documentation/networking/ip-sysctl.txt | 4 ++--
+ include/net/inet_frag.h | 20 ++++++++++----------
+ net/ieee802154/6lowpan/reassembly.c | 10 +++++-----
+ net/ipv4/ip_fragment.c | 10 +++++-----
+ net/ipv4/proc.c | 2 +-
+ net/ipv6/netfilter/nf_conntrack_reasm.c | 10 +++++-----
+ net/ipv6/proc.c | 2 +-
+ net/ipv6/reassembly.c | 6 +++---
+ 8 files changed, 32 insertions(+), 32 deletions(-)
+
+diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
+index 2aa56ccaa996..7c229f59016f 100644
+--- a/Documentation/networking/ip-sysctl.txt
++++ b/Documentation/networking/ip-sysctl.txt
+@@ -112,10 +112,10 @@ min_adv_mss - INTEGER
+
+ IP Fragmentation:
+
+-ipfrag_high_thresh - INTEGER
++ipfrag_high_thresh - LONG INTEGER
+ Maximum memory used to reassemble IP fragments.
+
+-ipfrag_low_thresh - INTEGER
++ipfrag_low_thresh - LONG INTEGER
+ (Obsolete since linux-4.17)
+ Maximum memory used to reassemble IP fragments before the kernel
+ begins to remove incomplete fragment queues to free up resources.
+diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
+index a9453d9e562b..197c172cc811 100644
+--- a/include/net/inet_frag.h
++++ b/include/net/inet_frag.h
+@@ -7,11 +7,11 @@ struct netns_frags {
+ struct rhashtable rhashtable ____cacheline_aligned_in_smp;
+
+ /* Keep atomic mem on separate cachelines in structs that include it */
+- atomic_t mem ____cacheline_aligned_in_smp;
++ atomic_long_t mem ____cacheline_aligned_in_smp;
+ /* sysctls */
++ long high_thresh;
++ long low_thresh;
+ int timeout;
+- int high_thresh;
+- int low_thresh;
+ struct inet_frags *f;
+ };
+
+@@ -101,7 +101,7 @@ void inet_frags_fini(struct inet_frags *);
+
+ static inline int inet_frags_init_net(struct netns_frags *nf)
+ {
+- atomic_set(&nf->mem, 0);
++ atomic_long_set(&nf->mem, 0);
+ return rhashtable_init(&nf->rhashtable, &nf->f->rhash_params);
+ }
+ void inet_frags_exit_net(struct netns_frags *nf);
+@@ -118,19 +118,19 @@ static inline void inet_frag_put(struct inet_frag_queue *q)
+
+ /* Memory Tracking Functions. */
+
+-static inline int frag_mem_limit(struct netns_frags *nf)
++static inline long frag_mem_limit(const struct netns_frags *nf)
+ {
+- return atomic_read(&nf->mem);
++ return atomic_long_read(&nf->mem);
+ }
+
+-static inline void sub_frag_mem_limit(struct netns_frags *nf, int i)
++static inline void sub_frag_mem_limit(struct netns_frags *nf, long val)
+ {
+- atomic_sub(i, &nf->mem);
++ atomic_long_sub(val, &nf->mem);
+ }
+
+-static inline void add_frag_mem_limit(struct netns_frags *nf, int i)
++static inline void add_frag_mem_limit(struct netns_frags *nf, long val)
+ {
+- atomic_add(i, &nf->mem);
++ atomic_long_add(val, &nf->mem);
+ }
+
+ /* RFC 3168 support :
+diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
+index c5ad89f5f028..94ef920530a5 100644
+--- a/net/ieee802154/6lowpan/reassembly.c
++++ b/net/ieee802154/6lowpan/reassembly.c
+@@ -410,23 +410,23 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
+ }
+
+ #ifdef CONFIG_SYSCTL
+-static int zero;
++static long zero;
+
+ static struct ctl_table lowpan_frags_ns_ctl_table[] = {
+ {
+ .procname = "6lowpanfrag_high_thresh",
+ .data = &init_net.ieee802154_lowpan.frags.high_thresh,
+- .maxlen = sizeof(int),
++ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+- .proc_handler = proc_dointvec_minmax,
++ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = &init_net.ieee802154_lowpan.frags.low_thresh
+ },
+ {
+ .procname = "6lowpanfrag_low_thresh",
+ .data = &init_net.ieee802154_lowpan.frags.low_thresh,
+- .maxlen = sizeof(int),
++ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+- .proc_handler = proc_dointvec_minmax,
++ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &init_net.ieee802154_lowpan.frags.high_thresh
+ },
+diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
+index c6711b04ad79..a068a3bcd2d4 100644
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -682,23 +682,23 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
+ EXPORT_SYMBOL(ip_check_defrag);
+
+ #ifdef CONFIG_SYSCTL
+-static int zero;
++static long zero;
+
+ static struct ctl_table ip4_frags_ns_ctl_table[] = {
+ {
+ .procname = "ipfrag_high_thresh",
+ .data = &init_net.ipv4.frags.high_thresh,
+- .maxlen = sizeof(int),
++ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+- .proc_handler = proc_dointvec_minmax,
++ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = &init_net.ipv4.frags.low_thresh
+ },
+ {
+ .procname = "ipfrag_low_thresh",
+ .data = &init_net.ipv4.frags.low_thresh,
+- .maxlen = sizeof(int),
++ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+- .proc_handler = proc_dointvec_minmax,
++ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &init_net.ipv4.frags.high_thresh
+ },
+diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
+index d729ad2e4ad1..d9415fae9467 100644
+--- a/net/ipv4/proc.c
++++ b/net/ipv4/proc.c
+@@ -71,7 +71,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
+ sock_prot_inuse_get(net, &udplite_prot));
+ seq_printf(seq, "RAW: inuse %d\n",
+ sock_prot_inuse_get(net, &raw_prot));
+- seq_printf(seq, "FRAG: inuse %u memory %u\n",
++ seq_printf(seq, "FRAG: inuse %u memory %lu\n",
+ atomic_read(&net->ipv4.frags.rhashtable.nelems),
+ frag_mem_limit(&net->ipv4.frags));
+ return 0;
+diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
+index 41c66395df82..618dc76003a2 100644
+--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
+@@ -64,7 +64,7 @@ struct nf_ct_frag6_skb_cb
+ static struct inet_frags nf_frags;
+
+ #ifdef CONFIG_SYSCTL
+-static int zero;
++static long zero;
+
+ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
+ {
+@@ -77,18 +77,18 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
+ {
+ .procname = "nf_conntrack_frag6_low_thresh",
+ .data = &init_net.nf_frag.frags.low_thresh,
+- .maxlen = sizeof(unsigned int),
++ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+- .proc_handler = proc_dointvec_minmax,
++ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &init_net.nf_frag.frags.high_thresh
+ },
+ {
+ .procname = "nf_conntrack_frag6_high_thresh",
+ .data = &init_net.nf_frag.frags.high_thresh,
+- .maxlen = sizeof(unsigned int),
++ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+- .proc_handler = proc_dointvec_minmax,
++ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = &init_net.nf_frag.frags.low_thresh
+ },
+ { }
+diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
+index 177e2fe20915..73e766e7bc37 100644
+--- a/net/ipv6/proc.c
++++ b/net/ipv6/proc.c
+@@ -42,7 +42,7 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
+ sock_prot_inuse_get(net, &udplitev6_prot));
+ seq_printf(seq, "RAW6: inuse %d\n",
+ sock_prot_inuse_get(net, &rawv6_prot));
+- seq_printf(seq, "FRAG6: inuse %u memory %u\n",
++ seq_printf(seq, "FRAG6: inuse %u memory %lu\n",
+ atomic_read(&net->ipv6.frags.rhashtable.nelems),
+ frag_mem_limit(&net->ipv6.frags));
+ return 0;
+diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
+index 1ab32ff0823e..39e05821b956 100644
+--- a/net/ipv6/reassembly.c
++++ b/net/ipv6/reassembly.c
+@@ -545,15 +545,15 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = {
+ {
+ .procname = "ip6frag_high_thresh",
+ .data = &init_net.ipv6.frags.high_thresh,
+- .maxlen = sizeof(int),
++ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+- .proc_handler = proc_dointvec_minmax,
++ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = &init_net.ipv6.frags.low_thresh
+ },
+ {
+ .procname = "ip6frag_low_thresh",
+ .data = &init_net.ipv6.frags.low_thresh,
+- .maxlen = sizeof(int),
++ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-016-inet-frags-do-not-clone-skb-in-ip_expire.patch b/patches.kernel.org/4.4.174-016-inet-frags-do-not-clone-skb-in-ip_expire.patch
new file mode 100644
index 0000000000..97d2c52c51
--- /dev/null
+++ b/patches.kernel.org/4.4.174-016-inet-frags-do-not-clone-skb-in-ip_expire.patch
@@ -0,0 +1,68 @@
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 10 Oct 2018 12:30:01 -0700
+Subject: [PATCH] inet: frags: do not clone skb in ip_expire()
+References: bnc#1012382
+Patch-mainline: 4.4.174
+Git-commit: 1eec5d5670084ee644597bd26c25e22c69b9f748
+
+commit 1eec5d5670084ee644597bd26c25e22c69b9f748 upstream.
+
+An skb_clone() was added in commit ec4fbd64751d ("inet: frag: release
+spinlock before calling icmp_send()")
+
+While fixing the bug at that time, it also added a very high cost
+for DDOS frags, as the ICMP rate limit is applied after this
+expensive operation (skb_clone() + consume_skb(), implying memory
+allocations, copy, and freeing)
+
+We can use skb_get(head) here, all we want is to make sure skb wont
+be freed by another cpu.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ net/ipv4/ip_fragment.c | 16 ++++++----------
+ 1 file changed, 6 insertions(+), 10 deletions(-)
+
+diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
+index a068a3bcd2d4..24665f004556 100644
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -143,8 +143,8 @@ static bool frag_expire_skip_icmp(u32 user)
+ */
+ static void ip_expire(unsigned long arg)
+ {
+- struct sk_buff *clone, *head;
+ const struct iphdr *iph;
++ struct sk_buff *head;
+ struct net *net;
+ struct ipq *qp;
+ int err;
+@@ -187,16 +187,12 @@ static void ip_expire(unsigned long arg)
+ (skb_rtable(head)->rt_type != RTN_LOCAL))
+ goto out;
+
+- clone = skb_clone(head, GFP_ATOMIC);
++ skb_get(head);
++ spin_unlock(&qp->q.lock);
++ icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
++ kfree_skb(head);
++ goto out_rcu_unlock;
+
+- /* Send an ICMP "Fragment Reassembly Timeout" message. */
+- if (clone) {
+- spin_unlock(&qp->q.lock);
+- icmp_send(clone, ICMP_TIME_EXCEEDED,
+- ICMP_EXC_FRAGTIME, 0);
+- consume_skb(clone);
+- goto out_rcu_unlock;
+- }
+ out:
+ spin_unlock(&qp->q.lock);
+ out_rcu_unlock:
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-017-ipv6-frags-rewrite-ip6_expire_frag_queue.patch b/patches.kernel.org/4.4.174-017-ipv6-frags-rewrite-ip6_expire_frag_queue.patch
new file mode 100644
index 0000000000..ccf3ca5a79
--- /dev/null
+++ b/patches.kernel.org/4.4.174-017-ipv6-frags-rewrite-ip6_expire_frag_queue.patch
@@ -0,0 +1,82 @@
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 10 Oct 2018 12:30:02 -0700
+Subject: [PATCH] ipv6: frags: rewrite ip6_expire_frag_queue()
+References: bnc#1012382
+Patch-mainline: 4.4.174
+Git-commit: 05c0b86b9696802fd0ce5676a92a63f1b455bdf3
+
+commit 05c0b86b9696802fd0ce5676a92a63f1b455bdf3 upstream.
+
+Make it similar to IPv4 ip_expire(), and release the lock
+before calling icmp functions.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+[bwh: Backported to 4.4: adjust context]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ net/ipv6/reassembly.c | 24 ++++++++++++++++--------
+ 1 file changed, 16 insertions(+), 8 deletions(-)
+
+diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
+index 39e05821b956..83aa027d24b6 100644
+--- a/net/ipv6/reassembly.c
++++ b/net/ipv6/reassembly.c
+@@ -92,7 +92,9 @@ EXPORT_SYMBOL(ip6_frag_init);
+ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq)
+ {
+ struct net_device *dev = NULL;
++ struct sk_buff *head;
+
++ rcu_read_lock();
+ spin_lock(&fq->q.lock);
+
+ if (fq->q.flags & INET_FRAG_COMPLETE)
+@@ -100,28 +102,34 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq)
+
+ inet_frag_kill(&fq->q);
+
+- rcu_read_lock();
+ dev = dev_get_by_index_rcu(net, fq->iif);
+ if (!dev)
+- goto out_rcu_unlock;
++ goto out;
+
+ IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
+ IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
+
+ /* Don't send error if the first segment did not arrive. */
+- if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !fq->q.fragments)
+- goto out_rcu_unlock;
++ head = fq->q.fragments;
++ if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head)
++ goto out;
+
+ /* But use as source device on which LAST ARRIVED
+ * segment was received. And do not use fq->dev
+ * pointer directly, device might already disappeared.
+ */
+- fq->q.fragments->dev = dev;
+- icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
+-out_rcu_unlock:
+- rcu_read_unlock();
++ head->dev = dev;
++ skb_get(head);
++ spin_unlock(&fq->q.lock);
++
++ icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
++ kfree_skb(head);
++ goto out_rcu_unlock;
++
+ out:
+ spin_unlock(&fq->q.lock);
++out_rcu_unlock:
++ rcu_read_unlock();
+ inet_frag_put(&fq->q);
+ }
+ EXPORT_SYMBOL(ip6_expire_frag_queue);
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-018-rhashtable-reorganize-struct-rhashtable-layou.patch b/patches.kernel.org/4.4.174-018-rhashtable-reorganize-struct-rhashtable-layou.patch
new file mode 100644
index 0000000000..50ad87aa15
--- /dev/null
+++ b/patches.kernel.org/4.4.174-018-rhashtable-reorganize-struct-rhashtable-layou.patch
@@ -0,0 +1,57 @@
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 10 Oct 2018 12:30:03 -0700
+Subject: [PATCH] rhashtable: reorganize struct rhashtable layout
+References: bnc#1012382
+Patch-mainline: 4.4.174
+Git-commit: e5d672a0780d9e7118caad4c171ec88b8299398d
+
+commit e5d672a0780d9e7118caad4c171ec88b8299398d upstream.
+
+While under frags DDOS I noticed unfortunate false sharing between
+@nelems and @params.automatic_shrinking
+
+Move @nelems at the end of struct rhashtable so that first cache line
+is shared between all cpus, because almost never dirtied.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ include/linux/rhashtable.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
+index 753835d05be8..e97cdfd6cba9 100644
+--- a/include/linux/rhashtable.h
++++ b/include/linux/rhashtable.h
+@@ -133,23 +133,23 @@ struct rhashtable_params {
+ /**
+ * struct rhashtable - Hash table handle
+ * @tbl: Bucket table
+- * @nelems: Number of elements in table
+ * @key_len: Key length for hashfn
+ * @elasticity: Maximum chain length before rehash
+ * @p: Configuration parameters
+ * @run_work: Deferred worker to expand/shrink asynchronously
+ * @mutex: Mutex to protect current/future table swapping
+ * @lock: Spin lock to protect walker list
++ * @nelems: Number of elements in table
+ */
+ struct rhashtable {
+ struct bucket_table __rcu *tbl;
+- atomic_t nelems;
+ unsigned int key_len;
+ unsigned int elasticity;
+ struct rhashtable_params p;
+ struct work_struct run_work;
+ struct mutex mutex;
+ spinlock_t lock;
++ atomic_t nelems;
+ };
+
+ /**
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-019-inet-frags-reorganize-struct-netns_frags.patch b/patches.kernel.org/4.4.174-019-inet-frags-reorganize-struct-netns_frags.patch
new file mode 100644
index 0000000000..5b36f6da38
--- /dev/null
+++ b/patches.kernel.org/4.4.174-019-inet-frags-reorganize-struct-netns_frags.patch
@@ -0,0 +1,51 @@
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 10 Oct 2018 12:30:04 -0700
+Subject: [PATCH] inet: frags: reorganize struct netns_frags
+References: bnc#1012382
+Patch-mainline: 4.4.174
+Git-commit: c2615cf5a761b32bf74e85bddc223dfff3d9b9f0
+
+commit c2615cf5a761b32bf74e85bddc223dfff3d9b9f0 upstream.
+
+Put the read-mostly fields in a separate cache line
+at the beginning of struct netns_frags, to reduce
+false sharing noticed in inet_frag_kill()
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+[bwh: Backported to 4.4: adjust context]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ include/net/inet_frag.h | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
+index 197c172cc811..41a830ba11fc 100644
+--- a/include/net/inet_frag.h
++++ b/include/net/inet_frag.h
+@@ -4,15 +4,16 @@
+ #include <linux/rhashtable.h>
+
+ struct netns_frags {
+- struct rhashtable rhashtable ____cacheline_aligned_in_smp;
+-
+- /* Keep atomic mem on separate cachelines in structs that include it */
+- atomic_long_t mem ____cacheline_aligned_in_smp;
+ /* sysctls */
+ long high_thresh;
+ long low_thresh;
+ int timeout;
+ struct inet_frags *f;
++
++ struct rhashtable rhashtable ____cacheline_aligned_in_smp;
++
++ /* Keep atomic mem on separate cachelines in structs that include it */
++ atomic_long_t mem ____cacheline_aligned_in_smp;
+ };
+
+ /**
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-020-inet-frags-get-rid-of-ipfrag_skb_cb-FRAG_CB.patch b/patches.kernel.org/4.4.174-020-inet-frags-get-rid-of-ipfrag_skb_cb-FRAG_CB.patch
new file mode 100644
index 0000000000..4e9000c5f6
--- /dev/null
+++ b/patches.kernel.org/4.4.174-020-inet-frags-get-rid-of-ipfrag_skb_cb-FRAG_CB.patch
@@ -0,0 +1,54 @@
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 10 Oct 2018 12:30:05 -0700
+Subject: [PATCH] inet: frags: get rid of ipfrag_skb_cb/FRAG_CB
+References: bnc#1012382
+Patch-mainline: 4.4.174
+Git-commit: bf66337140c64c27fa37222b7abca7e49d63fb57
+
+commit bf66337140c64c27fa37222b7abca7e49d63fb57 upstream.
+
+ip_defrag uses skb->cb[] to store the fragment offset, and unfortunately
+this integer is currently in a different cache line than skb->next,
+meaning that we use two cache lines per skb when finding the insertion point.
+
+By aliasing skb->ip_defrag_offset and skb->dev, we pack all the fields
+in a single cache line and save precious memory bandwidth.
+
+Note that after the fast path added by Changli Gao in commit
+d6bebca92c66 ("fragment: add fast path for in-order fragments")
+this change wont help the fast path, since we still need
+to access prev->len (2nd cache line), but will show great
+benefits when slow path is entered, since we perform
+a linear scan of a potentially long list.
+
+Also, note that this potential long list is an attack vector,
+we might consider also using an rb-tree there eventually.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ include/linux/skbuff.h | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
+index 6d39d81d3c38..053bdfb526f7 100644
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -558,6 +558,11 @@ struct sk_buff {
+ };
+ struct rb_node rbnode; /* used in netem & tcp stack */
+ };
++
++ union {
++ int ip_defrag_offset;
++ };
++
+ struct sock *sk;
+ struct net_device *dev;
+
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-021-inet-frags-fix-ip6frag_low_thresh-boundary.patch b/patches.kernel.org/4.4.174-021-inet-frags-fix-ip6frag_low_thresh-boundary.patch
new file mode 100644
index 0000000000..0a93840cb1
--- /dev/null
+++ b/patches.kernel.org/4.4.174-021-inet-frags-fix-ip6frag_low_thresh-boundary.patch
@@ -0,0 +1,227 @@
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 10 Oct 2018 12:30:06 -0700
+Subject: [PATCH] inet: frags: fix ip6frag_low_thresh boundary
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+References: bnc#1012382
+Patch-mainline: 4.4.174
+Git-commit: 3d23401283e80ceb03f765842787e0e79ff598b7
+
+commit 3d23401283e80ceb03f765842787e0e79ff598b7 upstream.
+
+Giving an integer to proc_doulongvec_minmax() is dangerous on 64bit arches,
+since linker might place next to it a non zero value preventing a change
+to ip6frag_low_thresh.
+
+ip6frag_low_thresh is not used anymore in the kernel, but we do not
+want to prematuraly break user scripts wanting to change it.
+
+Since specifying a minimal value of 0 for proc_doulongvec_minmax()
+is moot, let's remove these zero values in all defrag units.
+
+Fixes: 6e00f7dd5e4e ("ipv6: frags: fix /proc/sys/net/ipv6/ip6frag_low_thresh")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Maciej Żenczykowski <maze@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ net/ieee802154/6lowpan/reassembly.c | 2 --
+ net/ipv4/ip_fragment.c | 40 ++++++++++---------------
+ net/ipv6/netfilter/nf_conntrack_reasm.c | 2 --
+ net/ipv6/reassembly.c | 4 +--
+ 4 files changed, 17 insertions(+), 31 deletions(-)
+
+diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
+index 94ef920530a5..6183730d38db 100644
+--- a/net/ieee802154/6lowpan/reassembly.c
++++ b/net/ieee802154/6lowpan/reassembly.c
+@@ -410,7 +410,6 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
+ }
+
+ #ifdef CONFIG_SYSCTL
+-static long zero;
+
+ static struct ctl_table lowpan_frags_ns_ctl_table[] = {
+ {
+@@ -427,7 +426,6 @@ static struct ctl_table lowpan_frags_ns_ctl_table[] = {
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+- .extra1 = &zero,
+ .extra2 = &init_net.ieee802154_lowpan.frags.high_thresh
+ },
+ {
+diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
+index 24665f004556..15d5349180cc 100644
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -58,14 +58,6 @@
+ static int sysctl_ipfrag_max_dist __read_mostly = 64;
+ static const char ip_frag_cache_name[] = "ip4-frags";
+
+-struct ipfrag_skb_cb
+-{
+- struct inet_skb_parm h;
+- int offset;
+-};
+-
+-#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
+-
+ /* Describe an entry in the "incomplete datagrams" queue. */
+ struct ipq {
+ struct inet_frag_queue q;
+@@ -353,13 +345,13 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+ * this fragment, right?
+ */
+ prev = qp->q.fragments_tail;
+- if (!prev || FRAG_CB(prev)->offset < offset) {
++ if (!prev || prev->ip_defrag_offset < offset) {
+ next = NULL;
+ goto found;
+ }
+ prev = NULL;
+ for (next = qp->q.fragments; next != NULL; next = next->next) {
+- if (FRAG_CB(next)->offset >= offset)
++ if (next->ip_defrag_offset >= offset)
+ break; /* bingo! */
+ prev = next;
+ }
+@@ -370,7 +362,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+ * any overlaps are eliminated.
+ */
+ if (prev) {
+- int i = (FRAG_CB(prev)->offset + prev->len) - offset;
++ int i = (prev->ip_defrag_offset + prev->len) - offset;
+
+ if (i > 0) {
+ offset += i;
+@@ -387,8 +379,8 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+
+ err = -ENOMEM;
+
+- while (next && FRAG_CB(next)->offset < end) {
+- int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */
++ while (next && next->ip_defrag_offset < end) {
++ int i = end - next->ip_defrag_offset; /* overlap is 'i' bytes */
+
+ if (i < next->len) {
+ /* Eat head of the next overlapped fragment
+@@ -396,7 +388,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+ */
+ if (!pskb_pull(next, i))
+ goto err;
+- FRAG_CB(next)->offset += i;
++ next->ip_defrag_offset += i;
+ qp->q.meat -= i;
+ if (next->ip_summed != CHECKSUM_UNNECESSARY)
+ next->ip_summed = CHECKSUM_NONE;
+@@ -420,7 +412,13 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+ }
+ }
+
+- FRAG_CB(skb)->offset = offset;
++ /* Note : skb->ip_defrag_offset and skb->dev share the same location */
++ dev = skb->dev;
++ if (dev)
++ qp->iif = dev->ifindex;
++ /* Makes sure compiler wont do silly aliasing games */
++ barrier();
++ skb->ip_defrag_offset = offset;
+
+ /* Insert this fragment in the chain of fragments. */
+ skb->next = next;
+@@ -431,11 +429,6 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+ else
+ qp->q.fragments = skb;
+
+- dev = skb->dev;
+- if (dev) {
+- qp->iif = dev->ifindex;
+- skb->dev = NULL;
+- }
+ qp->q.stamp = skb->tstamp;
+ qp->q.meat += skb->len;
+ qp->ecn |= ecn;
+@@ -511,7 +504,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
+ }
+
+ WARN_ON(!head);
+- WARN_ON(FRAG_CB(head)->offset != 0);
++ WARN_ON(head->ip_defrag_offset != 0);
+
+ /* Allocate a new buffer for the datagram. */
+ ihlen = ip_hdrlen(head);
+@@ -678,7 +671,7 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
+ EXPORT_SYMBOL(ip_check_defrag);
+
+ #ifdef CONFIG_SYSCTL
+-static long zero;
++static int dist_min;
+
+ static struct ctl_table ip4_frags_ns_ctl_table[] = {
+ {
+@@ -695,7 +688,6 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = {
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+- .extra1 = &zero,
+ .extra2 = &init_net.ipv4.frags.high_thresh
+ },
+ {
+@@ -724,7 +716,7 @@ static struct ctl_table ip4_frags_ctl_table[] = {
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+- .extra1 = &zero
++ .extra1 = &dist_min,
+ },
+ { }
+ };
+diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
+index 618dc76003a2..a39b5af43efe 100644
+--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
+@@ -64,7 +64,6 @@ struct nf_ct_frag6_skb_cb
+ static struct inet_frags nf_frags;
+
+ #ifdef CONFIG_SYSCTL
+-static long zero;
+
+ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
+ {
+@@ -80,7 +79,6 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+- .extra1 = &zero,
+ .extra2 = &init_net.nf_frag.frags.high_thresh
+ },
+ {
+diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
+index 83aa027d24b6..32d4659d2d83 100644
+--- a/net/ipv6/reassembly.c
++++ b/net/ipv6/reassembly.c
+@@ -547,7 +547,6 @@ static const struct inet6_protocol frag_protocol = {
+ };
+
+ #ifdef CONFIG_SYSCTL
+-static int zero;
+
+ static struct ctl_table ip6_frags_ns_ctl_table[] = {
+ {
+@@ -563,8 +562,7 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = {
+ .data = &init_net.ipv6.frags.low_thresh,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+- .proc_handler = proc_dointvec_minmax,
+- .extra1 = &zero,
++ .proc_handler = proc_doulongvec_minmax,
+ .extra2 = &init_net.ipv6.frags.high_thresh
+ },
+ {
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-022-ip-discard-IPv4-datagrams-with-overlapping-se.patch b/patches.kernel.org/4.4.174-022-ip-discard-IPv4-datagrams-with-overlapping-se.patch
new file mode 100644
index 0000000000..6205d69e6b
--- /dev/null
+++ b/patches.kernel.org/4.4.174-022-ip-discard-IPv4-datagrams-with-overlapping-se.patch
@@ -0,0 +1,159 @@
+From: Peter Oskolkov <posk@google.com>
+Date: Wed, 10 Oct 2018 12:30:07 -0700
+Subject: [PATCH] ip: discard IPv4 datagrams with overlapping segments.
+Patch-mainline: 4.4.174
+References: CVE-2018-5391 bnc#1012382 bsc#1103097
+Git-commit: 7969e5c40dfd04799d4341f1b7cd266b6e47f227
+
+commit 7969e5c40dfd04799d4341f1b7cd266b6e47f227 upstream.
+
+This behavior is required in IPv6, and there is little need
+to tolerate overlapping fragments in IPv4. This change
+simplifies the code and eliminates potential DDoS attack vectors.
+
+Tested: ran ip_defrag selftest (not yet available uptream).
+
+Suggested-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Peter Oskolkov <posk@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Florian Westphal <fw@strlen.de>
+Acked-by: Stephen Hemminger <stephen@networkplumber.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Mao Wenan <maowenan@huawei.com>
+[bwh: Backported to 4.4:
+ - s/__IP_INC_STATS/IP_INC_STATS_BH/
+ - Deleted code is slightly different]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ include/uapi/linux/snmp.h | 1 +
+ net/ipv4/ip_fragment.c | 70 +++++++++++----------------------------
+ net/ipv4/proc.c | 1 +
+ 3 files changed, 21 insertions(+), 51 deletions(-)
+
+diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
+index 25a9ad8bcef1..9de808ebce05 100644
+--- a/include/uapi/linux/snmp.h
++++ b/include/uapi/linux/snmp.h
+@@ -55,6 +55,7 @@ enum
+ IPSTATS_MIB_ECT1PKTS, /* InECT1Pkts */
+ IPSTATS_MIB_ECT0PKTS, /* InECT0Pkts */
+ IPSTATS_MIB_CEPKTS, /* InCEPkts */
++ IPSTATS_MIB_REASM_OVERLAPS, /* ReasmOverlaps */
+ __IPSTATS_MIB_MAX
+ };
+
+diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
+index 15d5349180cc..6e9d0c7c5159 100644
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -277,6 +277,7 @@ static int ip_frag_reinit(struct ipq *qp)
+ /* Add new segment to existing queue. */
+ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+ {
++ struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
+ struct sk_buff *prev, *next;
+ struct net_device *dev;
+ unsigned int fragsize;
+@@ -357,60 +358,23 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+ }
+
+ found:
+- /* We found where to put this one. Check for overlap with
+- * preceding fragment, and, if needed, align things so that
+- * any overlaps are eliminated.
++ /* RFC5722, Section 4, amended by Errata ID : 3089
++ * When reassembling an IPv6 datagram, if
++ * one or more its constituent fragments is determined to be an
++ * overlapping fragment, the entire datagram (and any constituent
++ * fragments) MUST be silently discarded.
++ *
++ * We do the same here for IPv4.
+ */
+- if (prev) {
+- int i = (prev->ip_defrag_offset + prev->len) - offset;
+-
+- if (i > 0) {
+- offset += i;
+- err = -EINVAL;
+- if (end <= offset)
+- goto err;
+- err = -ENOMEM;
+- if (!pskb_pull(skb, i))
+- goto err;
+- if (skb->ip_summed != CHECKSUM_UNNECESSARY)
+- skb->ip_summed = CHECKSUM_NONE;
+- }
+- }
+-
+- err = -ENOMEM;
+
+- while (next && next->ip_defrag_offset < end) {
+- int i = end - next->ip_defrag_offset; /* overlap is 'i' bytes */
++ /* Is there an overlap with the previous fragment? */
++ if (prev &&
++ (prev->ip_defrag_offset + prev->len) > offset)
++ goto discard_qp;
+
+- if (i < next->len) {
+- /* Eat head of the next overlapped fragment
+- * and leave the loop. The next ones cannot overlap.
+- */
+- if (!pskb_pull(next, i))
+- goto err;
+- next->ip_defrag_offset += i;
+- qp->q.meat -= i;
+- if (next->ip_summed != CHECKSUM_UNNECESSARY)
+- next->ip_summed = CHECKSUM_NONE;
+- break;
+- } else {
+- struct sk_buff *free_it = next;
+-
+- /* Old fragment is completely overridden with
+- * new one drop it.
+- */
+- next = next->next;
+-
+- if (prev)
+- prev->next = next;
+- else
+- qp->q.fragments = next;
+-
+- qp->q.meat -= free_it->len;
+- sub_frag_mem_limit(qp->q.net, free_it->truesize);
+- kfree_skb(free_it);
+- }
+- }
++ /* Is there an overlap with the next fragment? */
++ if (next && next->ip_defrag_offset < end)
++ goto discard_qp;
+
+ /* Note : skb->ip_defrag_offset and skb->dev share the same location */
+ dev = skb->dev;
+@@ -458,6 +422,10 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+ skb_dst_drop(skb);
+ return -EINPROGRESS;
+
++discard_qp:
++ inet_frag_kill(&qp->q);
++ err = -EINVAL;
++ IP_INC_STATS_BH(net, IPSTATS_MIB_REASM_OVERLAPS);
+ err:
+ kfree_skb(skb);
+ return err;
+diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
+index d9415fae9467..b001ad668108 100644
+--- a/net/ipv4/proc.c
++++ b/net/ipv4/proc.c
+@@ -132,6 +132,7 @@ static const struct snmp_mib snmp4_ipextstats_list[] = {
+ SNMP_MIB_ITEM("InECT1Pkts", IPSTATS_MIB_ECT1PKTS),
+ SNMP_MIB_ITEM("InECT0Pkts", IPSTATS_MIB_ECT0PKTS),
+ SNMP_MIB_ITEM("InCEPkts", IPSTATS_MIB_CEPKTS),
++ SNMP_MIB_ITEM("ReasmOverlaps", IPSTATS_MIB_REASM_OVERLAPS),
+ SNMP_MIB_SENTINEL
+ };
+
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-023-net-modify-skb_rbtree_purge-to-return-the-tru.patch b/patches.kernel.org/4.4.174-023-net-modify-skb_rbtree_purge-to-return-the-tru.patch
new file mode 100644
index 0000000000..e7a828faa6
--- /dev/null
+++ b/patches.kernel.org/4.4.174-023-net-modify-skb_rbtree_purge-to-return-the-tru.patch
@@ -0,0 +1,75 @@
+From: Peter Oskolkov <posk@google.com>
+Date: Wed, 10 Oct 2018 12:30:09 -0700
+Subject: [PATCH] net: modify skb_rbtree_purge to return the truesize of all
+ purged skbs.
+References: bnc#1012382
+Patch-mainline: 4.4.174
+Git-commit: 385114dec8a49b5e5945e77ba7de6356106713f4
+
+commit 385114dec8a49b5e5945e77ba7de6356106713f4 upstream.
+
+Tested: see the next patch is the series.
+
+Suggested-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Peter Oskolkov <posk@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Florian Westphal <fw@strlen.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Mao Wenan <maowenan@huawei.com>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ include/linux/skbuff.h | 2 +-
+ net/core/skbuff.c | 6 +++++-
+ 2 files changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
+index 053bdfb526f7..aa4753ae1ff2 100644
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -2278,7 +2278,7 @@ static inline void __skb_queue_purge(struct sk_buff_head *list)
+ kfree_skb(skb);
+ }
+
+-void skb_rbtree_purge(struct rb_root *root);
++unsigned int skb_rbtree_purge(struct rb_root *root);
+
+ void *netdev_alloc_frag(unsigned int fragsz);
+
+diff --git a/net/core/skbuff.c b/net/core/skbuff.c
+index 8a57bbaf7452..49f73fb0840b 100644
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -2380,23 +2380,27 @@ EXPORT_SYMBOL(skb_queue_purge);
+ /**
+ * skb_rbtree_purge - empty a skb rbtree
+ * @root: root of the rbtree to empty
++ * Return value: the sum of truesizes of all purged skbs.
+ *
+ * Delete all buffers on an &sk_buff rbtree. Each buffer is removed from
+ * the list and one reference dropped. This function does not take
+ * any lock. Synchronization should be handled by the caller (e.g., TCP
+ * out-of-order queue is protected by the socket lock).
+ */
+-void skb_rbtree_purge(struct rb_root *root)
++unsigned int skb_rbtree_purge(struct rb_root *root)
+ {
+ struct rb_node *p = rb_first(root);
++ unsigned int sum = 0;
+
+ while (p) {
+ struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
+
+ p = rb_next(p);
+ rb_erase(&skb->rbnode, root);
++ sum += skb->truesize;
+ kfree_skb(skb);
+ }
++ return sum;
+ }
+
+ /**
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-024-ipv6-defrag-drop-non-last-frags-smaller-than-.patch b/patches.kernel.org/4.4.174-024-ipv6-defrag-drop-non-last-frags-smaller-than-.patch
new file mode 100644
index 0000000000..cf139cd4ea
--- /dev/null
+++ b/patches.kernel.org/4.4.174-024-ipv6-defrag-drop-non-last-frags-smaller-than-.patch
@@ -0,0 +1,66 @@
+From: Florian Westphal <fw@strlen.de>
+Date: Wed, 10 Oct 2018 12:30:10 -0700
+Subject: [PATCH] ipv6: defrag: drop non-last frags smaller than min mtu
+Patch-mainline: 4.4.174
+References: CVE-2018-5391 bnc#1012382 bsc#1103097
+Git-commit: 0ed4229b08c13c84a3c301a08defdc9e7f4467e6
+
+commit 0ed4229b08c13c84a3c301a08defdc9e7f4467e6 upstream.
+
+don't bother with pathological cases, they only waste cycles.
+IPv6 requires a minimum MTU of 1280 so we should never see fragments
+smaller than this (except last frag).
+
+v3: don't use awkward "-offset + len"
+v2: drop IPv4 part, which added same check w. IPV4_MIN_MTU (68).
+ There were concerns that there could be even smaller frags
+ generated by intermediate nodes, e.g. on radio networks.
+
+Cc: Peter Oskolkov <posk@google.com>
+Cc: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Mao Wenan <maowenan@huawei.com>
+[bwh: Backported to 4.4: In nf_ct_frag6_gather() use clone instead of skb,
+ and goto ret_orig in case of error]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ net/ipv6/netfilter/nf_conntrack_reasm.c | 4 ++++
+ net/ipv6/reassembly.c | 4 ++++
+ 2 files changed, 8 insertions(+)
+
+diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
+index a39b5af43efe..cb72239dcff7 100644
+--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
+@@ -574,6 +574,10 @@ struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 use
+ hdr = ipv6_hdr(clone);
+ fhdr = (struct frag_hdr *)skb_transport_header(clone);
+
++ if (clone->len - skb_network_offset(clone) < IPV6_MIN_MTU &&
++ fhdr->frag_off & htons(IP6_MF))
++ goto ret_orig;
++
+ skb_orphan(skb);
+ fq = fq_find(net, fhdr->identification, user, hdr,
+ skb->dev ? skb->dev->ifindex : 0);
+diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
+index 32d4659d2d83..3cbcf099f6b2 100644
+--- a/net/ipv6/reassembly.c
++++ b/net/ipv6/reassembly.c
+@@ -515,6 +515,10 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
+ return 1;
+ }
+
++ if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU &&
++ fhdr->frag_off & htons(IP6_MF))
++ goto fail_hdr;
++
+ iif = skb->dev ? skb->dev->ifindex : 0;
+ fq = fq_find(net, fhdr->identification, hdr, iif);
+ if (fq) {
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-025-net-pskb_trim_rcsum-and-CHECKSUM_COMPLETE-are.patch b/patches.kernel.org/4.4.174-025-net-pskb_trim_rcsum-and-CHECKSUM_COMPLETE-are.patch
new file mode 100644
index 0000000000..e020be5541
--- /dev/null
+++ b/patches.kernel.org/4.4.174-025-net-pskb_trim_rcsum-and-CHECKSUM_COMPLETE-are.patch
@@ -0,0 +1,81 @@
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 10 Oct 2018 12:30:11 -0700
+Subject: [PATCH] net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends
+References: bnc#1012382
+Patch-mainline: 4.4.174
+Git-commit: 88078d98d1bb085d72af8437707279e203524fa5
+
+commit 88078d98d1bb085d72af8437707279e203524fa5 upstream.
+
+After working on IP defragmentation lately, I found that some large
+packets defeat CHECKSUM_COMPLETE optimization because of NIC adding
+zero paddings on the last (small) fragment.
+
+While removing the padding with pskb_trim_rcsum(), we set skb->ip_summed
+to CHECKSUM_NONE, forcing a full csum validation, even if all prior
+fragments had CHECKSUM_COMPLETE set.
+
+We can instead compute the checksum of the part we are trimming,
+usually smaller than the part we keep.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ include/linux/skbuff.h | 5 ++---
+ net/core/skbuff.c | 14 ++++++++++++++
+ 2 files changed, 16 insertions(+), 3 deletions(-)
+
+diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
+index aa4753ae1ff2..1e734e221ea3 100644
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -2796,6 +2796,7 @@ static inline unsigned char *skb_push_rcsum(struct sk_buff *skb,
+ return skb->data;
+ }
+
++int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len);
+ /**
+ * pskb_trim_rcsum - trim received skb and update checksum
+ * @skb: buffer to trim
+@@ -2810,9 +2811,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
+ {
+ if (likely(len >= skb->len))
+ return 0;
+- if (skb->ip_summed == CHECKSUM_COMPLETE)
+- skb->ip_summed = CHECKSUM_NONE;
+- return __pskb_trim(skb, len);
++ return pskb_trim_rcsum_slow(skb, len);
+ }
+
+ #define rb_to_skb(rb) rb_entry_safe(rb, struct sk_buff, rbnode)
+diff --git a/net/core/skbuff.c b/net/core/skbuff.c
+index 49f73fb0840b..3d7c92d38a78 100644
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -1502,6 +1502,20 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len)
+ }
+ EXPORT_SYMBOL(___pskb_trim);
+
++/* Note : use pskb_trim_rcsum() instead of calling this directly
++ */
++int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len)
++{
++ if (skb->ip_summed == CHECKSUM_COMPLETE) {
++ int delta = skb->len - len;
++
++ skb->csum = csum_sub(skb->csum,
++ skb_checksum(skb, len, delta, 0));
++ }
++ return __pskb_trim(skb, len);
++}
++EXPORT_SYMBOL(pskb_trim_rcsum_slow);
++
+ /**
+ * __pskb_pull_tail - advance tail of skb header
+ * @skb: buffer to reallocate
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-026-ip-use-rb-trees-for-IP-frag-queue.patch b/patches.kernel.org/4.4.174-026-ip-use-rb-trees-for-IP-frag-queue.patch
new file mode 100644
index 0000000000..46b897309a
--- /dev/null
+++ b/patches.kernel.org/4.4.174-026-ip-use-rb-trees-for-IP-frag-queue.patch
@@ -0,0 +1,483 @@
+From: Peter Oskolkov <posk@google.com>
+Date: Wed, 10 Oct 2018 12:30:13 -0700
+Subject: [PATCH] ip: use rb trees for IP frag queue.
+References: bnc#1012382
+Patch-mainline: 4.4.174
+Git-commit: fa0f527358bd900ef92f925878ed6bfbd51305cc
+
+commit fa0f527358bd900ef92f925878ed6bfbd51305cc upstream.
+
+Similar to TCP OOO RX queue, it makes sense to use rb trees to store
+IP fragments, so that OOO fragments are inserted faster.
+
+Tested:
+
+- a follow-up patch contains a rather comprehensive ip defrag
+ self-test (functional)
+- ran neper `udp_stream -c -H <host> -F 100 -l 300 -T 20`:
+ netstat --statistics
+ Ip:
+ 282078937 total packets received
+ 0 forwarded
+ 0 incoming packets discarded
+ 946760 incoming packets delivered
+ 18743456 requests sent out
+ 101 fragments dropped after timeout
+ 282077129 reassemblies required
+ 944952 packets reassembled ok
+ 262734239 packet reassembles failed
+ (The numbers/stats above are somewhat better re:
+ reassemblies vs a kernel without this patchset. More
+ comprehensive performance testing TBD).
+
+Reported-by: Jann Horn <jannh@google.com>
+Reported-by: Juha-Matti Tilli <juha-matti.tilli@iki.fi>
+Suggested-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Peter Oskolkov <posk@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Florian Westphal <fw@strlen.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Mao Wenan <maowenan@huawei.com>
+[bwh: Backported to 4.4:
+ - Keep using frag_kfree_skb() in inet_frag_destroy()
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ include/linux/skbuff.h | 4 +-
+ include/net/inet_frag.h | 3 +-
+ net/ipv4/inet_fragment.c | 16 ++-
+ net/ipv4/ip_fragment.c | 182 +++++++++++++-----------
+ net/ipv6/netfilter/nf_conntrack_reasm.c | 1 +
+ net/ipv6/reassembly.c | 1 +
+ 6 files changed, 117 insertions(+), 90 deletions(-)
+
+diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
+index 1e734e221ea3..502787c29ce9 100644
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -556,14 +556,14 @@ struct sk_buff {
+ struct skb_mstamp skb_mstamp;
+ };
+ };
+- struct rb_node rbnode; /* used in netem & tcp stack */
++ struct rb_node rbnode; /* used in netem, ip4 defrag, and tcp stack */
+ };
+
+ union {
++ struct sock *sk;
+ int ip_defrag_offset;
+ };
+
+- struct sock *sk;
+ struct net_device *dev;
+
+ /*
+diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
+index 41a830ba11fc..5fd8e76dcd26 100644
+--- a/include/net/inet_frag.h
++++ b/include/net/inet_frag.h
+@@ -73,7 +73,8 @@ struct inet_frag_queue {
+ struct timer_list timer;
+ spinlock_t lock;
+ atomic_t refcnt;
+- struct sk_buff *fragments;
++ struct sk_buff *fragments; /* Used in IPv6. */
++ struct rb_root rb_fragments; /* Used in IPv4. */
+ struct sk_buff *fragments_tail;
+ ktime_t stamp;
+ int len;
+diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
+index bb784c3a40fb..02d399618b8f 100644
+--- a/net/ipv4/inet_fragment.c
++++ b/net/ipv4/inet_fragment.c
+@@ -144,12 +144,16 @@ void inet_frag_destroy(struct inet_frag_queue *q)
+ fp = q->fragments;
+ nf = q->net;
+ f = nf->f;
+- while (fp) {
+- struct sk_buff *xp = fp->next;
+-
+- sum_truesize += fp->truesize;
+- frag_kfree_skb(nf, f, fp);
+- fp = xp;
++ if (fp) {
++ do {
++ struct sk_buff *xp = fp->next;
++
++ sum_truesize += fp->truesize;
++ frag_kfree_skb(nf, f, fp);
++ fp = xp;
++ } while (fp);
++ } else {
++ sum_truesize = skb_rbtree_purge(&q->rb_fragments);
+ }
+ sum = sum_truesize + f->qsize;
+
+diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
+index 6e9d0c7c5159..270792776403 100644
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -136,7 +136,7 @@ static bool frag_expire_skip_icmp(u32 user)
+ static void ip_expire(unsigned long arg)
+ {
+ const struct iphdr *iph;
+- struct sk_buff *head;
++ struct sk_buff *head = NULL;
+ struct net *net;
+ struct ipq *qp;
+ int err;
+@@ -152,14 +152,31 @@ static void ip_expire(unsigned long arg)
+
+ ipq_kill(qp);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
+-
+- head = qp->q.fragments;
+-
+ IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT);
+
+- if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !head)
++ if (!qp->q.flags & INET_FRAG_FIRST_IN)
+ goto out;
+
++ /* sk_buff::dev and sk_buff::rbnode are unionized. So we
++ * pull the head out of the tree in order to be able to
++ * deal with head->dev.
++ */
++ if (qp->q.fragments) {
++ head = qp->q.fragments;
++ qp->q.fragments = head->next;
++ } else {
++ head = skb_rb_first(&qp->q.rb_fragments);
++ if (!head)
++ goto out;
++ rb_erase(&head->rbnode, &qp->q.rb_fragments);
++ memset(&head->rbnode, 0, sizeof(head->rbnode));
++ barrier();
++ }
++ if (head == qp->q.fragments_tail)
++ qp->q.fragments_tail = NULL;
++
++ sub_frag_mem_limit(qp->q.net, head->truesize);
++
+ head->dev = dev_get_by_index_rcu(net, qp->iif);
+ if (!head->dev)
+ goto out;
+@@ -179,16 +196,16 @@ static void ip_expire(unsigned long arg)
+ (skb_rtable(head)->rt_type != RTN_LOCAL))
+ goto out;
+
+- skb_get(head);
+ spin_unlock(&qp->q.lock);
+ icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
+- kfree_skb(head);
+ goto out_rcu_unlock;
+
+ out:
+ spin_unlock(&qp->q.lock);
+ out_rcu_unlock:
+ rcu_read_unlock();
++ if (head)
++ kfree_skb(head);
+ ipq_put(qp);
+ }
+
+@@ -231,7 +248,7 @@ static int ip_frag_too_far(struct ipq *qp)
+ end = atomic_inc_return(&peer->rid);
+ qp->rid = end;
+
+- rc = qp->q.fragments && (end - start) > max;
++ rc = qp->q.fragments_tail && (end - start) > max;
+
+ if (rc) {
+ struct net *net;
+@@ -245,7 +262,6 @@ static int ip_frag_too_far(struct ipq *qp)
+
+ static int ip_frag_reinit(struct ipq *qp)
+ {
+- struct sk_buff *fp;
+ unsigned int sum_truesize = 0;
+
+ if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) {
+@@ -253,20 +269,14 @@ static int ip_frag_reinit(struct ipq *qp)
+ return -ETIMEDOUT;
+ }
+
+- fp = qp->q.fragments;
+- do {
+- struct sk_buff *xp = fp->next;
+-
+- sum_truesize += fp->truesize;
+- kfree_skb(fp);
+- fp = xp;
+- } while (fp);
++ sum_truesize = skb_rbtree_purge(&qp->q.rb_fragments);
+ sub_frag_mem_limit(qp->q.net, sum_truesize);
+
+ qp->q.flags = 0;
+ qp->q.len = 0;
+ qp->q.meat = 0;
+ qp->q.fragments = NULL;
++ qp->q.rb_fragments = RB_ROOT;
+ qp->q.fragments_tail = NULL;
+ qp->iif = 0;
+ qp->ecn = 0;
+@@ -278,7 +288,8 @@ static int ip_frag_reinit(struct ipq *qp)
+ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+ {
+ struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
+- struct sk_buff *prev, *next;
++ struct rb_node **rbn, *parent;
++ struct sk_buff *skb1;
+ struct net_device *dev;
+ unsigned int fragsize;
+ int flags, offset;
+@@ -341,58 +352,58 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+ if (err)
+ goto err;
+
+- /* Find out which fragments are in front and at the back of us
+- * in the chain of fragments so far. We must know where to put
+- * this fragment, right?
+- */
+- prev = qp->q.fragments_tail;
+- if (!prev || prev->ip_defrag_offset < offset) {
+- next = NULL;
+- goto found;
+- }
+- prev = NULL;
+- for (next = qp->q.fragments; next != NULL; next = next->next) {
+- if (next->ip_defrag_offset >= offset)
+- break; /* bingo! */
+- prev = next;
+- }
++ /* Note : skb->rbnode and skb->dev share the same location. */
++ dev = skb->dev;
++ /* Makes sure compiler wont do silly aliasing games */
++ barrier();
+
+-found:
+ /* RFC5722, Section 4, amended by Errata ID : 3089
+ * When reassembling an IPv6 datagram, if
+ * one or more its constituent fragments is determined to be an
+ * overlapping fragment, the entire datagram (and any constituent
+ * fragments) MUST be silently discarded.
+ *
+- * We do the same here for IPv4.
++ * We do the same here for IPv4 (and increment an snmp counter).
+ */
+
+- /* Is there an overlap with the previous fragment? */
+- if (prev &&
+- (prev->ip_defrag_offset + prev->len) > offset)
+- goto discard_qp;
+-
+- /* Is there an overlap with the next fragment? */
+- if (next && next->ip_defrag_offset < end)
+- goto discard_qp;
++ /* Find out where to put this fragment. */
++ skb1 = qp->q.fragments_tail;
++ if (!skb1) {
++ /* This is the first fragment we've received. */
++ rb_link_node(&skb->rbnode, NULL, &qp->q.rb_fragments.rb_node);
++ qp->q.fragments_tail = skb;
++ } else if ((skb1->ip_defrag_offset + skb1->len) < end) {
++ /* This is the common/special case: skb goes to the end. */
++ /* Detect and discard overlaps. */
++ if (offset < (skb1->ip_defrag_offset + skb1->len))
++ goto discard_qp;
++ /* Insert after skb1. */
++ rb_link_node(&skb->rbnode, &skb1->rbnode, &skb1->rbnode.rb_right);
++ qp->q.fragments_tail = skb;
++ } else {
++ /* Binary search. Note that skb can become the first fragment, but
++ * not the last (covered above). */
++ rbn = &qp->q.rb_fragments.rb_node;
++ do {
++ parent = *rbn;
++ skb1 = rb_to_skb(parent);
++ if (end <= skb1->ip_defrag_offset)
++ rbn = &parent->rb_left;
++ else if (offset >= skb1->ip_defrag_offset + skb1->len)
++ rbn = &parent->rb_right;
++ else /* Found an overlap with skb1. */
++ goto discard_qp;
++ } while (*rbn);
++ /* Here we have parent properly set, and rbn pointing to
++ * one of its NULL left/right children. Insert skb. */
++ rb_link_node(&skb->rbnode, parent, rbn);
++ }
++ rb_insert_color(&skb->rbnode, &qp->q.rb_fragments);
+
+- /* Note : skb->ip_defrag_offset and skb->dev share the same location */
+- dev = skb->dev;
+ if (dev)
+ qp->iif = dev->ifindex;
+- /* Makes sure compiler wont do silly aliasing games */
+- barrier();
+ skb->ip_defrag_offset = offset;
+
+- /* Insert this fragment in the chain of fragments. */
+- skb->next = next;
+- if (!next)
+- qp->q.fragments_tail = skb;
+- if (prev)
+- prev->next = skb;
+- else
+- qp->q.fragments = skb;
+-
+ qp->q.stamp = skb->tstamp;
+ qp->q.meat += skb->len;
+ qp->ecn |= ecn;
+@@ -414,7 +425,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+ unsigned long orefdst = skb->_skb_refdst;
+
+ skb->_skb_refdst = 0UL;
+- err = ip_frag_reasm(qp, prev, dev);
++ err = ip_frag_reasm(qp, skb, dev);
+ skb->_skb_refdst = orefdst;
+ return err;
+ }
+@@ -431,15 +442,15 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+ return err;
+ }
+
+-
+ /* Build a new IP datagram from all its fragments. */
+-
+-static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
++static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
+ struct net_device *dev)
+ {
+ struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
+ struct iphdr *iph;
+- struct sk_buff *fp, *head = qp->q.fragments;
++ struct sk_buff *fp, *head = skb_rb_first(&qp->q.rb_fragments);
++ struct sk_buff **nextp; /* To build frag_list. */
++ struct rb_node *rbn;
+ int len;
+ int ihlen;
+ int err;
+@@ -453,25 +464,20 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
+ goto out_fail;
+ }
+ /* Make the one we just received the head. */
+- if (prev) {
+- head = prev->next;
+- fp = skb_clone(head, GFP_ATOMIC);
++ if (head != skb) {
++ fp = skb_clone(skb, GFP_ATOMIC);
+ if (!fp)
+ goto out_nomem;
+-
+- fp->next = head->next;
+- if (!fp->next)
++ rb_replace_node(&skb->rbnode, &fp->rbnode, &qp->q.rb_fragments);
++ if (qp->q.fragments_tail == skb)
+ qp->q.fragments_tail = fp;
+- prev->next = fp;
+-
+- skb_morph(head, qp->q.fragments);
+- head->next = qp->q.fragments->next;
+-
+- consume_skb(qp->q.fragments);
+- qp->q.fragments = head;
++ skb_morph(skb, head);
++ rb_replace_node(&head->rbnode, &skb->rbnode,
++ &qp->q.rb_fragments);
++ consume_skb(head);
++ head = skb;
+ }
+
+- WARN_ON(!head);
+ WARN_ON(head->ip_defrag_offset != 0);
+
+ /* Allocate a new buffer for the datagram. */
+@@ -496,24 +502,35 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
+ clone = alloc_skb(0, GFP_ATOMIC);
+ if (!clone)
+ goto out_nomem;
+- clone->next = head->next;
+- head->next = clone;
+ skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
+ skb_frag_list_init(head);
+ for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
+ plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
+ clone->len = clone->data_len = head->data_len - plen;
+- head->data_len -= clone->len;
+- head->len -= clone->len;
++ skb->truesize += clone->truesize;
+ clone->csum = 0;
+ clone->ip_summed = head->ip_summed;
+ add_frag_mem_limit(qp->q.net, clone->truesize);
++ skb_shinfo(head)->frag_list = clone;
++ nextp = &clone->next;
++ } else {
++ nextp = &skb_shinfo(head)->frag_list;
+ }
+
+- skb_shinfo(head)->frag_list = head->next;
+ skb_push(head, head->data - skb_network_header(head));
+
+- for (fp=head->next; fp; fp = fp->next) {
++ /* Traverse the tree in order, to build frag_list. */
++ rbn = rb_next(&head->rbnode);
++ rb_erase(&head->rbnode, &qp->q.rb_fragments);
++ while (rbn) {
++ struct rb_node *rbnext = rb_next(rbn);
++ fp = rb_to_skb(rbn);
++ rb_erase(rbn, &qp->q.rb_fragments);
++ rbn = rbnext;
++ *nextp = fp;
++ nextp = &fp->next;
++ fp->prev = NULL;
++ memset(&fp->rbnode, 0, sizeof(fp->rbnode));
+ head->data_len += fp->len;
+ head->len += fp->len;
+ if (head->ip_summed != fp->ip_summed)
+@@ -524,7 +541,9 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
+ }
+ sub_frag_mem_limit(qp->q.net, head->truesize);
+
++ *nextp = NULL;
+ head->next = NULL;
++ head->prev = NULL;
+ head->dev = dev;
+ head->tstamp = qp->q.stamp;
+ IPCB(head)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
+@@ -552,6 +571,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
+
+ IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS);
+ qp->q.fragments = NULL;
++ qp->q.rb_fragments = RB_ROOT;
+ qp->q.fragments_tail = NULL;
+ return 0;
+
+diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
+index cb72239dcff7..5ce591aef46e 100644
+--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
+@@ -445,6 +445,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
+ head->csum);
+
+ fq->q.fragments = NULL;
++ fq->q.rb_fragments = RB_ROOT;
+ fq->q.fragments_tail = NULL;
+
+ /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */
+diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
+index 3cbcf099f6b2..ec917f58d105 100644
+--- a/net/ipv6/reassembly.c
++++ b/net/ipv6/reassembly.c
+@@ -465,6 +465,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
+ IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
+ rcu_read_unlock();
+ fq->q.fragments = NULL;
++ fq->q.rb_fragments = RB_ROOT;
+ fq->q.fragments_tail = NULL;
+ return 1;
+
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-027-ip-add-helpers-to-process-in-order-fragments-.patch b/patches.kernel.org/4.4.174-027-ip-add-helpers-to-process-in-order-fragments-.patch
new file mode 100644
index 0000000000..4e1b6e9c23
--- /dev/null
+++ b/patches.kernel.org/4.4.174-027-ip-add-helpers-to-process-in-order-fragments-.patch
@@ -0,0 +1,172 @@
+From: Peter Oskolkov <posk@google.com>
+Date: Wed, 10 Oct 2018 12:30:14 -0700
+Subject: [PATCH] ip: add helpers to process in-order fragments faster.
+References: bnc#1012382
+Patch-mainline: 4.4.174
+Git-commit: 353c9cb360874e737fb000545f783df756c06f9a
+
+commit 353c9cb360874e737fb000545f783df756c06f9a upstream.
+
+This patch introduces several helper functions/macros that will be
+used in the follow-up patch. No runtime changes yet.
+
+The new logic (fully implemented in the second patch) is as follows:
+
+* Nodes in the rb-tree will now contain not single fragments, but lists
+ of consecutive fragments ("runs").
+
+* At each point in time, the current "active" run at the tail is
+ maintained/tracked. Fragments that arrive in-order, adjacent
+ to the previous tail fragment, are added to this tail run without
+ triggering the re-balancing of the rb-tree.
+
+* If a fragment arrives out of order with the offset _before_ the tail run,
+ it is inserted into the rb-tree as a single fragment.
+
+* If a fragment arrives after the current tail fragment (with a gap),
+ it starts a new "tail" run, as is inserted into the rb-tree
+ at the end as the head of the new run.
+
+skb->cb is used to store additional information
+needed here (suggested by Eric Dumazet).
+
+Reported-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: Peter Oskolkov <posk@google.com>
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Florian Westphal <fw@strlen.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Mao Wenan <maowenan@huawei.com>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ include/net/inet_frag.h | 6 ++++
+ net/ipv4/ip_fragment.c | 73 +++++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 79 insertions(+)
+
+diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
+index 5fd8e76dcd26..6260ec146142 100644
+--- a/include/net/inet_frag.h
++++ b/include/net/inet_frag.h
+@@ -55,7 +55,9 @@ struct frag_v6_compare_key {
+ * @lock: spinlock protecting this frag
+ * @refcnt: reference count of the queue
+ * @fragments: received fragments head
++ * @rb_fragments: received fragments rb-tree root
+ * @fragments_tail: received fragments tail
++ * @last_run_head: the head of the last "run". see ip_fragment.c
+ * @stamp: timestamp of the last received fragment
+ * @len: total length of the original datagram
+ * @meat: length of received fragments so far
+@@ -76,6 +78,7 @@ struct inet_frag_queue {
+ struct sk_buff *fragments; /* Used in IPv6. */
+ struct rb_root rb_fragments; /* Used in IPv4. */
+ struct sk_buff *fragments_tail;
++ struct sk_buff *last_run_head;
+ ktime_t stamp;
+ int len;
+ int meat;
+@@ -112,6 +115,9 @@ void inet_frag_kill(struct inet_frag_queue *q);
+ void inet_frag_destroy(struct inet_frag_queue *q);
+ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key);
+
++/* Free all skbs in the queue; return the sum of their truesizes. */
++unsigned int inet_frag_rbtree_purge(struct rb_root *root);
++
+ static inline void inet_frag_put(struct inet_frag_queue *q)
+ {
+ if (atomic_dec_and_test(&q->refcnt))
+diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
+index 270792776403..a32d4e3c99f2 100644
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -58,6 +58,57 @@
+ static int sysctl_ipfrag_max_dist __read_mostly = 64;
+ static const char ip_frag_cache_name[] = "ip4-frags";
+
++/* Use skb->cb to track consecutive/adjacent fragments coming at
++ * the end of the queue. Nodes in the rb-tree queue will
++ * contain "runs" of one or more adjacent fragments.
++ *
++ * Invariants:
++ * - next_frag is NULL at the tail of a "run";
++ * - the head of a "run" has the sum of all fragment lengths in frag_run_len.
++ */
++struct ipfrag_skb_cb {
++ struct inet_skb_parm h;
++ struct sk_buff *next_frag;
++ int frag_run_len;
++};
++
++#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
++
++static void ip4_frag_init_run(struct sk_buff *skb)
++{
++ BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb));
++
++ FRAG_CB(skb)->next_frag = NULL;
++ FRAG_CB(skb)->frag_run_len = skb->len;
++}
++
++/* Append skb to the last "run". */
++static void ip4_frag_append_to_last_run(struct inet_frag_queue *q,
++ struct sk_buff *skb)
++{
++ RB_CLEAR_NODE(&skb->rbnode);
++ FRAG_CB(skb)->next_frag = NULL;
++
++ FRAG_CB(q->last_run_head)->frag_run_len += skb->len;
++ FRAG_CB(q->fragments_tail)->next_frag = skb;
++ q->fragments_tail = skb;
++}
++
++/* Create a new "run" with the skb. */
++static void ip4_frag_create_run(struct inet_frag_queue *q, struct sk_buff *skb)
++{
++ if (q->last_run_head)
++ rb_link_node(&skb->rbnode, &q->last_run_head->rbnode,
++ &q->last_run_head->rbnode.rb_right);
++ else
++ rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node);
++ rb_insert_color(&skb->rbnode, &q->rb_fragments);
++
++ ip4_frag_init_run(skb);
++ q->fragments_tail = skb;
++ q->last_run_head = skb;
++}
++
+ /* Describe an entry in the "incomplete datagrams" queue. */
+ struct ipq {
+ struct inet_frag_queue q;
+@@ -658,6 +709,28 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
+ }
+ EXPORT_SYMBOL(ip_check_defrag);
+
++unsigned int inet_frag_rbtree_purge(struct rb_root *root)
++{
++ struct rb_node *p = rb_first(root);
++ unsigned int sum = 0;
++
++ while (p) {
++ struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
++
++ p = rb_next(p);
++ rb_erase(&skb->rbnode, root);
++ while (skb) {
++ struct sk_buff *next = FRAG_CB(skb)->next_frag;
++
++ sum += skb->truesize;
++ kfree_skb(skb);
++ skb = next;
++ }
++ }
++ return sum;
++}
++EXPORT_SYMBOL(inet_frag_rbtree_purge);
++
+ #ifdef CONFIG_SYSCTL
+ static int dist_min;
+
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-028-ip-process-in-order-fragments-efficiently.patch b/patches.kernel.org/4.4.174-028-ip-process-in-order-fragments-efficiently.patch
new file mode 100644
index 0000000000..57d82a79ba
--- /dev/null
+++ b/patches.kernel.org/4.4.174-028-ip-process-in-order-fragments-efficiently.patch
@@ -0,0 +1,277 @@
+From: Peter Oskolkov <posk@google.com>
+Date: Wed, 10 Oct 2018 12:30:15 -0700
+Subject: [PATCH] ip: process in-order fragments efficiently
+References: bnc#1012382
+Patch-mainline: 4.4.174
+Git-commit: a4fd284a1f8fd4b6c59aa59db2185b1e17c5c11c
+
+commit a4fd284a1f8fd4b6c59aa59db2185b1e17c5c11c upstream.
+
+This patch changes the runtime behavior of IP defrag queue:
+incoming in-order fragments are added to the end of the current
+list/"run" of in-order fragments at the tail.
+
+On some workloads, UDP stream performance is substantially improved:
+
+RX: ./udp_stream -F 10 -T 2 -l 60
+TX: ./udp_stream -c -H <host> -F 10 -T 5 -l 60
+
+with this patchset applied on a 10Gbps receiver:
+
+ throughput=9524.18
+ throughput_units=Mbit/s
+
+upstream (net-next):
+
+ throughput=4608.93
+ throughput_units=Mbit/s
+
+Reported-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: Peter Oskolkov <posk@google.com>
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Florian Westphal <fw@strlen.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Mao Wenan <maowenan@huawei.com>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ net/ipv4/inet_fragment.c | 2 +-
+ net/ipv4/ip_fragment.c | 110 ++++++++++++++++++++++++---------------
+ 2 files changed, 70 insertions(+), 42 deletions(-)
+
+diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
+index 02d399618b8f..b6cb9958b34d 100644
+--- a/net/ipv4/inet_fragment.c
++++ b/net/ipv4/inet_fragment.c
+@@ -153,7 +153,7 @@ void inet_frag_destroy(struct inet_frag_queue *q)
+ fp = xp;
+ } while (fp);
+ } else {
+- sum_truesize = skb_rbtree_purge(&q->rb_fragments);
++ sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments);
+ }
+ sum = sum_truesize + f->qsize;
+
+diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
+index a32d4e3c99f2..3f6c170b40e1 100644
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -127,8 +127,8 @@ static u8 ip4_frag_ecn(u8 tos)
+
+ static struct inet_frags ip4_frags;
+
+-static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
+- struct net_device *dev);
++static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
++ struct sk_buff *prev_tail, struct net_device *dev);
+
+
+ static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
+@@ -219,7 +219,12 @@ static void ip_expire(unsigned long arg)
+ head = skb_rb_first(&qp->q.rb_fragments);
+ if (!head)
+ goto out;
+- rb_erase(&head->rbnode, &qp->q.rb_fragments);
++ if (FRAG_CB(head)->next_frag)
++ rb_replace_node(&head->rbnode,
++ &FRAG_CB(head)->next_frag->rbnode,
++ &qp->q.rb_fragments);
++ else
++ rb_erase(&head->rbnode, &qp->q.rb_fragments);
+ memset(&head->rbnode, 0, sizeof(head->rbnode));
+ barrier();
+ }
+@@ -320,7 +325,7 @@ static int ip_frag_reinit(struct ipq *qp)
+ return -ETIMEDOUT;
+ }
+
+- sum_truesize = skb_rbtree_purge(&qp->q.rb_fragments);
++ sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments);
+ sub_frag_mem_limit(qp->q.net, sum_truesize);
+
+ qp->q.flags = 0;
+@@ -329,6 +334,7 @@ static int ip_frag_reinit(struct ipq *qp)
+ qp->q.fragments = NULL;
+ qp->q.rb_fragments = RB_ROOT;
+ qp->q.fragments_tail = NULL;
++ qp->q.last_run_head = NULL;
+ qp->iif = 0;
+ qp->ecn = 0;
+
+@@ -340,7 +346,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+ {
+ struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
+ struct rb_node **rbn, *parent;
+- struct sk_buff *skb1;
++ struct sk_buff *skb1, *prev_tail;
+ struct net_device *dev;
+ unsigned int fragsize;
+ int flags, offset;
+@@ -418,38 +424,41 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+ */
+
+ /* Find out where to put this fragment. */
+- skb1 = qp->q.fragments_tail;
+- if (!skb1) {
+- /* This is the first fragment we've received. */
+- rb_link_node(&skb->rbnode, NULL, &qp->q.rb_fragments.rb_node);
+- qp->q.fragments_tail = skb;
+- } else if ((skb1->ip_defrag_offset + skb1->len) < end) {
+- /* This is the common/special case: skb goes to the end. */
++ prev_tail = qp->q.fragments_tail;
++ if (!prev_tail)
++ ip4_frag_create_run(&qp->q, skb); /* First fragment. */
++ else if (prev_tail->ip_defrag_offset + prev_tail->len < end) {
++ /* This is the common case: skb goes to the end. */
+ /* Detect and discard overlaps. */
+- if (offset < (skb1->ip_defrag_offset + skb1->len))
++ if (offset < prev_tail->ip_defrag_offset + prev_tail->len)
+ goto discard_qp;
+- /* Insert after skb1. */
+- rb_link_node(&skb->rbnode, &skb1->rbnode, &skb1->rbnode.rb_right);
+- qp->q.fragments_tail = skb;
++ if (offset == prev_tail->ip_defrag_offset + prev_tail->len)
++ ip4_frag_append_to_last_run(&qp->q, skb);
++ else
++ ip4_frag_create_run(&qp->q, skb);
+ } else {
+- /* Binary search. Note that skb can become the first fragment, but
+- * not the last (covered above). */
++ /* Binary search. Note that skb can become the first fragment,
++ * but not the last (covered above).
++ */
+ rbn = &qp->q.rb_fragments.rb_node;
+ do {
+ parent = *rbn;
+ skb1 = rb_to_skb(parent);
+ if (end <= skb1->ip_defrag_offset)
+ rbn = &parent->rb_left;
+- else if (offset >= skb1->ip_defrag_offset + skb1->len)
++ else if (offset >= skb1->ip_defrag_offset +
++ FRAG_CB(skb1)->frag_run_len)
+ rbn = &parent->rb_right;
+ else /* Found an overlap with skb1. */
+ goto discard_qp;
+ } while (*rbn);
+ /* Here we have parent properly set, and rbn pointing to
+- * one of its NULL left/right children. Insert skb. */
++ * one of its NULL left/right children. Insert skb.
++ */
++ ip4_frag_init_run(skb);
+ rb_link_node(&skb->rbnode, parent, rbn);
++ rb_insert_color(&skb->rbnode, &qp->q.rb_fragments);
+ }
+- rb_insert_color(&skb->rbnode, &qp->q.rb_fragments);
+
+ if (dev)
+ qp->iif = dev->ifindex;
+@@ -476,7 +485,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+ unsigned long orefdst = skb->_skb_refdst;
+
+ skb->_skb_refdst = 0UL;
+- err = ip_frag_reasm(qp, skb, dev);
++ err = ip_frag_reasm(qp, skb, prev_tail, dev);
+ skb->_skb_refdst = orefdst;
+ return err;
+ }
+@@ -495,7 +504,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+
+ /* Build a new IP datagram from all its fragments. */
+ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
+- struct net_device *dev)
++ struct sk_buff *prev_tail, struct net_device *dev)
+ {
+ struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
+ struct iphdr *iph;
+@@ -519,10 +528,16 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
+ fp = skb_clone(skb, GFP_ATOMIC);
+ if (!fp)
+ goto out_nomem;
+- rb_replace_node(&skb->rbnode, &fp->rbnode, &qp->q.rb_fragments);
++ FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag;
++ if (RB_EMPTY_NODE(&skb->rbnode))
++ FRAG_CB(prev_tail)->next_frag = fp;
++ else
++ rb_replace_node(&skb->rbnode, &fp->rbnode,
++ &qp->q.rb_fragments);
+ if (qp->q.fragments_tail == skb)
+ qp->q.fragments_tail = fp;
+ skb_morph(skb, head);
++ FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag;
+ rb_replace_node(&head->rbnode, &skb->rbnode,
+ &qp->q.rb_fragments);
+ consume_skb(head);
+@@ -558,7 +573,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
+ for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
+ plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
+ clone->len = clone->data_len = head->data_len - plen;
+- skb->truesize += clone->truesize;
++ head->truesize += clone->truesize;
+ clone->csum = 0;
+ clone->ip_summed = head->ip_summed;
+ add_frag_mem_limit(qp->q.net, clone->truesize);
+@@ -571,24 +586,36 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
+ skb_push(head, head->data - skb_network_header(head));
+
+ /* Traverse the tree in order, to build frag_list. */
++ fp = FRAG_CB(head)->next_frag;
+ rbn = rb_next(&head->rbnode);
+ rb_erase(&head->rbnode, &qp->q.rb_fragments);
+- while (rbn) {
+- struct rb_node *rbnext = rb_next(rbn);
+- fp = rb_to_skb(rbn);
+- rb_erase(rbn, &qp->q.rb_fragments);
+- rbn = rbnext;
+- *nextp = fp;
+- nextp = &fp->next;
+- fp->prev = NULL;
+- memset(&fp->rbnode, 0, sizeof(fp->rbnode));
+- head->data_len += fp->len;
+- head->len += fp->len;
+- if (head->ip_summed != fp->ip_summed)
+- head->ip_summed = CHECKSUM_NONE;
+- else if (head->ip_summed == CHECKSUM_COMPLETE)
+- head->csum = csum_add(head->csum, fp->csum);
+- head->truesize += fp->truesize;
++ while (rbn || fp) {
++ /* fp points to the next sk_buff in the current run;
++ * rbn points to the next run.
++ */
++ /* Go through the current run. */
++ while (fp) {
++ *nextp = fp;
++ nextp = &fp->next;
++ fp->prev = NULL;
++ memset(&fp->rbnode, 0, sizeof(fp->rbnode));
++ head->data_len += fp->len;
++ head->len += fp->len;
++ if (head->ip_summed != fp->ip_summed)
++ head->ip_summed = CHECKSUM_NONE;
++ else if (head->ip_summed == CHECKSUM_COMPLETE)
++ head->csum = csum_add(head->csum, fp->csum);
++ head->truesize += fp->truesize;
++ fp = FRAG_CB(fp)->next_frag;
++ }
++ /* Move to the next run. */
++ if (rbn) {
++ struct rb_node *rbnext = rb_next(rbn);
++
++ fp = rb_to_skb(rbn);
++ rb_erase(rbn, &qp->q.rb_fragments);
++ rbn = rbnext;
++ }
+ }
+ sub_frag_mem_limit(qp->q.net, head->truesize);
+
+@@ -624,6 +651,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
+ qp->q.fragments = NULL;
+ qp->q.rb_fragments = RB_ROOT;
+ qp->q.fragments_tail = NULL;
++ qp->q.last_run_head = NULL;
+ return 0;
+
+ out_nomem:
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-029-ip-frags-fix-crash-in-ip_do_fragment.patch b/patches.kernel.org/4.4.174-029-ip-frags-fix-crash-in-ip_do_fragment.patch
new file mode 100644
index 0000000000..b2bacc6db9
--- /dev/null
+++ b/patches.kernel.org/4.4.174-029-ip-frags-fix-crash-in-ip_do_fragment.patch
@@ -0,0 +1,118 @@
+From: Taehee Yoo <ap420073@gmail.com>
+Date: Wed, 10 Oct 2018 12:30:16 -0700
+Subject: [PATCH] ip: frags: fix crash in ip_do_fragment()
+References: bnc#1012382
+Patch-mainline: 4.4.174
+Git-commit: 5d407b071dc369c26a38398326ee2be53651cfe4
+
+commit 5d407b071dc369c26a38398326ee2be53651cfe4 upstream.
+
+A kernel crash occurrs when defragmented packet is fragmented
+in ip_do_fragment().
+In defragment routine, skb_orphan() is called and
+skb->ip_defrag_offset is set. but skb->sk and
+skb->ip_defrag_offset are same union member. so that
+frag->sk is not NULL.
+Hence crash occurrs in skb->sk check routine in ip_do_fragment() when
+defragmented packet is fragmented.
+
+test commands:
+ %iptables -t nat -I POSTROUTING -j MASQUERADE
+ %hping3 192.168.4.2 -s 1000 -p 2000 -d 60000
+
+splat looks like:
+[ 261.069429] kernel BUG at net/ipv4/ip_output.c:636!
+[ 261.075753] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI
+[ 261.083854] CPU: 1 PID: 1349 Comm: hping3 Not tainted 4.19.0-rc2+ #3
+[ 261.100977] RIP: 0010:ip_do_fragment+0x1613/0x2600
+[ 261.106945] Code: e8 e2 38 e3 fe 4c 8b 44 24 18 48 8b 74 24 08 e9 92 f6 ff ff 80 3c 02 00 0f 85 da 07 00 00 48 8b b5 d0 00 00 00 e9 25 f6 ff ff <0f> 0b 0f 0b 44 8b 54 24 58 4c 8b 4c 24 18 4c 8b 5c 24 60 4c 8b 6c
+[ 261.127015] RSP: 0018:ffff8801031cf2c0 EFLAGS: 00010202
+[ 261.134156] RAX: 1ffff1002297537b RBX: ffffed0020639e6e RCX: 0000000000000004
+[ 261.142156] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff880114ba9bd8
+[ 261.150157] RBP: ffff880114ba8a40 R08: ffffed0022975395 R09: ffffed0022975395
+[ 261.158157] R10: 0000000000000001 R11: ffffed0022975394 R12: ffff880114ba9ca4
+[ 261.166159] R13: 0000000000000010 R14: ffff880114ba9bc0 R15: dffffc0000000000
+[ 261.174169] FS: 00007fbae2199700(0000) GS:ffff88011b400000(0000) knlGS:0000000000000000
+[ 261.183012] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ 261.189013] CR2: 00005579244fe000 CR3: 0000000119bf4000 CR4: 00000000001006e0
+[ 261.198158] Call Trace:
+[ 261.199018] ? dst_output+0x180/0x180
+[ 261.205011] ? save_trace+0x300/0x300
+[ 261.209018] ? ip_copy_metadata+0xb00/0xb00
+[ 261.213034] ? sched_clock_local+0xd4/0x140
+[ 261.218158] ? kill_l4proto+0x120/0x120 [nf_conntrack]
+[ 261.223014] ? rt_cpu_seq_stop+0x10/0x10
+[ 261.227014] ? find_held_lock+0x39/0x1c0
+[ 261.233008] ip_finish_output+0x51d/0xb50
+[ 261.237006] ? ip_fragment.constprop.56+0x220/0x220
+[ 261.243011] ? nf_ct_l4proto_register_one+0x5b0/0x5b0 [nf_conntrack]
+[ 261.250152] ? rcu_is_watching+0x77/0x120
+[ 261.255010] ? nf_nat_ipv4_out+0x1e/0x2b0 [nf_nat_ipv4]
+[ 261.261033] ? nf_hook_slow+0xb1/0x160
+[ 261.265007] ip_output+0x1c7/0x710
+[ 261.269005] ? ip_mc_output+0x13f0/0x13f0
+[ 261.273002] ? __local_bh_enable_ip+0xe9/0x1b0
+[ 261.278152] ? ip_fragment.constprop.56+0x220/0x220
+[ 261.282996] ? nf_hook_slow+0xb1/0x160
+[ 261.287007] raw_sendmsg+0x21f9/0x4420
+[ 261.291008] ? dst_output+0x180/0x180
+[ 261.297003] ? sched_clock_cpu+0x126/0x170
+[ 261.301003] ? find_held_lock+0x39/0x1c0
+[ 261.306155] ? stop_critical_timings+0x420/0x420
+[ 261.311004] ? check_flags.part.36+0x450/0x450
+[ 261.315005] ? _raw_spin_unlock_irq+0x29/0x40
+[ 261.320995] ? _raw_spin_unlock_irq+0x29/0x40
+[ 261.326142] ? cyc2ns_read_end+0x10/0x10
+[ 261.330139] ? raw_bind+0x280/0x280
+[ 261.334138] ? sched_clock_cpu+0x126/0x170
+[ 261.338995] ? check_flags.part.36+0x450/0x450
+[ 261.342991] ? __lock_acquire+0x4500/0x4500
+[ 261.348994] ? inet_sendmsg+0x11c/0x500
+[ 261.352989] ? dst_output+0x180/0x180
+[ 261.357012] inet_sendmsg+0x11c/0x500
+[ ... ]
+
+v2:
+ - clear skb->sk at reassembly routine.(Eric Dumarzet)
+
+Fixes: fa0f527358bd ("ip: use rb trees for IP frag queue.")
+Suggested-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Mao Wenan <maowenan@huawei.com>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ net/ipv4/ip_fragment.c | 1 +
+ net/ipv6/netfilter/nf_conntrack_reasm.c | 1 +
+ 2 files changed, 2 insertions(+)
+
+diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
+index 3f6c170b40e1..cfa523302890 100644
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -599,6 +599,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
+ nextp = &fp->next;
+ fp->prev = NULL;
+ memset(&fp->rbnode, 0, sizeof(fp->rbnode));
++ fp->sk = NULL;
+ head->data_len += fp->len;
+ head->len += fp->len;
+ if (head->ip_summed != fp->ip_summed)
+diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
+index 5ce591aef46e..664c84e47bab 100644
+--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
+@@ -427,6 +427,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
+ else if (head->ip_summed == CHECKSUM_COMPLETE)
+ head->csum = csum_add(head->csum, fp->csum);
+ head->truesize += fp->truesize;
++ fp->sk = NULL;
+ }
+ sub_frag_mem_limit(fq->q.net, head->truesize);
+
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-030-ipv4-frags-precedence-bug-in-ip_expire.patch b/patches.kernel.org/4.4.174-030-ipv4-frags-precedence-bug-in-ip_expire.patch
new file mode 100644
index 0000000000..575a63a84c
--- /dev/null
+++ b/patches.kernel.org/4.4.174-030-ipv4-frags-precedence-bug-in-ip_expire.patch
@@ -0,0 +1,39 @@
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Wed, 10 Oct 2018 12:30:17 -0700
+Subject: [PATCH] ipv4: frags: precedence bug in ip_expire()
+References: bnc#1012382
+Patch-mainline: 4.4.174
+Git-commit: 70837ffe3085c9a91488b52ca13ac84424da1042
+
+commit 70837ffe3085c9a91488b52ca13ac84424da1042 upstream.
+
+We accidentally removed the parentheses here, but they are required
+because '!' has higher precedence than '&'.
+
+Fixes: fa0f527358bd ("ip: use rb trees for IP frag queue.")
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Mao Wenan <maowenan@huawei.com>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ net/ipv4/ip_fragment.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
+index cfa523302890..dbf8045b917f 100644
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -205,7 +205,7 @@ static void ip_expire(unsigned long arg)
+ IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT);
+
+- if (!qp->q.flags & INET_FRAG_FIRST_IN)
++ if (!(qp->q.flags & INET_FRAG_FIRST_IN))
+ goto out;
+
+ /* sk_buff::dev and sk_buff::rbnode are unionized. So we
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-031-inet-frags-better-deal-with-smp-races.patch b/patches.kernel.org/4.4.174-031-inet-frags-better-deal-with-smp-races.patch
new file mode 100644
index 0000000000..48a4377c11
--- /dev/null
+++ b/patches.kernel.org/4.4.174-031-inet-frags-better-deal-with-smp-races.patch
@@ -0,0 +1,94 @@
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 8 Nov 2018 17:34:27 -0800
+Subject: [PATCH] inet: frags: better deal with smp races
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+References: bnc#1012382
+Patch-mainline: 4.4.174
+Git-commit: 0d5b9311baf27bb545f187f12ecfd558220c607d
+
+commit 0d5b9311baf27bb545f187f12ecfd558220c607d upstream.
+
+Multiple cpus might attempt to insert a new fragment in rhashtable,
+if for example RPS is buggy, as reported by 배석진 in
+https://patchwork.ozlabs.org/patch/994601/
+
+We use rhashtable_lookup_get_insert_key() instead of
+rhashtable_insert_fast() to let cpus losing the race
+free their own inet_frag_queue and use the one that
+was inserted by another cpu.
+
+Fixes: 648700f76b03 ("inet: frags: use rhashtables for reassembly units")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: 배석진 <soukjin.bae@samsung.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ net/ipv4/inet_fragment.c | 28 +++++++++++++++-------------
+ 1 file changed, 15 insertions(+), 13 deletions(-)
+
+diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
+index b6cb9958b34d..c03e5f5859e1 100644
+--- a/net/ipv4/inet_fragment.c
++++ b/net/ipv4/inet_fragment.c
+@@ -188,21 +188,22 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
+ }
+
+ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
+- void *arg)
++ void *arg,
++ struct inet_frag_queue **prev)
+ {
+ struct inet_frags *f = nf->f;
+ struct inet_frag_queue *q;
+- int err;
+
+ q = inet_frag_alloc(nf, f, arg);
+- if (!q)
++ if (!q) {
++ *prev = ERR_PTR(-ENOMEM);
+ return NULL;
+-
++ }
+ mod_timer(&q->timer, jiffies + nf->timeout);
+
+- err = rhashtable_insert_fast(&nf->rhashtable, &q->node,
+- f->rhash_params);
+- if (err < 0) {
++ *prev = rhashtable_lookup_get_insert_key(&nf->rhashtable, &q->key,
++ &q->node, f->rhash_params);
++ if (*prev) {
+ q->flags |= INET_FRAG_COMPLETE;
+ inet_frag_kill(q);
+ inet_frag_destroy(q);
+@@ -215,17 +216,18 @@ EXPORT_SYMBOL(inet_frag_create);
+ /* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
+ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
+ {
+- struct inet_frag_queue *fq;
++ struct inet_frag_queue *fq = NULL, *prev;
+
+ rcu_read_lock();
+- fq = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params);
+- if (fq) {
++ prev = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params);
++ if (!prev)
++ fq = inet_frag_create(nf, key, &prev);
++ if (prev && !IS_ERR(prev)) {
++ fq = prev;
+ if (!atomic_inc_not_zero(&fq->refcnt))
+ fq = NULL;
+- rcu_read_unlock();
+- return fq;
+ }
+ rcu_read_unlock();
+- return inet_frag_create(nf, key);
++ return fq;
+ }
+ EXPORT_SYMBOL(inet_frag_find);
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-032-net-fix-pskb_trim_rcsum_slow-with-odd-trim-of.patch b/patches.kernel.org/4.4.174-032-net-fix-pskb_trim_rcsum_slow-with-odd-trim-of.patch
new file mode 100644
index 0000000000..794b421a94
--- /dev/null
+++ b/patches.kernel.org/4.4.174-032-net-fix-pskb_trim_rcsum_slow-with-odd-trim-of.patch
@@ -0,0 +1,54 @@
+From: Dimitris Michailidis <dmichail@google.com>
+Date: Fri, 19 Oct 2018 17:07:13 -0700
+Subject: [PATCH] net: fix pskb_trim_rcsum_slow() with odd trim offset
+References: bnc#1012382
+Patch-mainline: 4.4.174
+Git-commit: d55bef5059dd057bd077155375c581b49d25be7e
+
+commit d55bef5059dd057bd077155375c581b49d25be7e upstream.
+
+We've been getting checksum errors involving small UDP packets, usually
+59B packets with 1 extra non-zero padding byte. netdev_rx_csum_fault()
+has been complaining that HW is providing bad checksums. Turns out the
+problem is in pskb_trim_rcsum_slow(), introduced in commit 88078d98d1bb
+("net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends").
+
+The source of the problem is that when the bytes we are trimming start
+at an odd address, as in the case of the 1 padding byte above,
+skb_checksum() returns a byte-swapped value. We cannot just combine this
+with skb->csum using csum_sub(). We need to use csum_block_sub() here
+that takes into account the parity of the start address and handles the
+swapping.
+
+Matches existing code in __skb_postpull_rcsum() and esp_remove_trailer().
+
+Fixes: 88078d98d1bb ("net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends")
+Signed-off-by: Dimitris Michailidis <dmichail@google.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ net/core/skbuff.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/net/core/skbuff.c b/net/core/skbuff.c
+index 3d7c92d38a78..fea7c24e99d0 100644
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -1509,8 +1509,9 @@ int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len)
+ if (skb->ip_summed == CHECKSUM_COMPLETE) {
+ int delta = skb->len - len;
+
+- skb->csum = csum_sub(skb->csum,
+- skb_checksum(skb, len, delta, 0));
++ skb->csum = csum_block_sub(skb->csum,
++ skb_checksum(skb, len, delta, 0),
++ len);
+ }
+ return __pskb_trim(skb, len);
+ }
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-033-net-ipv4-do-not-handle-duplicate-fragments-as.patch b/patches.kernel.org/4.4.174-033-net-ipv4-do-not-handle-duplicate-fragments-as.patch
new file mode 100644
index 0000000000..a4e84a10f8
--- /dev/null
+++ b/patches.kernel.org/4.4.174-033-net-ipv4-do-not-handle-duplicate-fragments-as.patch
@@ -0,0 +1,104 @@
+From: Michal Kubecek <mkubecek@suse.cz>
+Date: Thu, 13 Dec 2018 17:23:32 +0100
+Subject: [PATCH] net: ipv4: do not handle duplicate fragments as overlapping
+Patch-mainline: 4.4.174
+References: bnc#1012382 bsc#1116345
+Git-commit: ade446403bfb79d3528d56071a84b15351a139ad
+
+commit ade446403bfb79d3528d56071a84b15351a139ad upstream.
+
+Since commit 7969e5c40dfd ("ip: discard IPv4 datagrams with overlapping
+segments.") IPv4 reassembly code drops the whole queue whenever an
+overlapping fragment is received. However, the test is written in a way
+which detects duplicate fragments as overlapping so that in environments
+with many duplicate packets, fragmented packets may be undeliverable.
+
+Add an extra test and for (potentially) duplicate fragment, only drop the
+new fragment rather than the whole queue. Only starting offset and length
+are checked, not the contents of the fragments as that would be too
+expensive. For similar reason, linear list ("run") of a rbtree node is not
+iterated, we only check if the new fragment is a subset of the interval
+covered by existing consecutive fragments.
+
+v2: instead of an exact check iterating through linear list of an rbtree
+node, only check if the new fragment is subset of the "run" (suggested
+by Eric Dumazet)
+
+Fixes: 7969e5c40dfd ("ip: discard IPv4 datagrams with overlapping segments.")
+Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Mao Wenan <maowenan@huawei.com>
+[bwh: Backported to 4.4:
+ - goto discard_qp, not err, in case of overlap
+ - Set err earlier variable, as done upstream in commit 0ff89efb5246
+ "ip: fail fast on IP defrag errors"]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ net/ipv4/ip_fragment.c | 20 +++++++++++++-------
+ 1 file changed, 13 insertions(+), 7 deletions(-)
+
+diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
+index dbf8045b917f..9b09a9b5a4fe 100644
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -347,10 +347,10 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+ struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
+ struct rb_node **rbn, *parent;
+ struct sk_buff *skb1, *prev_tail;
++ int ihl, end, skb1_run_end;
+ struct net_device *dev;
+ unsigned int fragsize;
+ int flags, offset;
+- int ihl, end;
+ int err = -ENOENT;
+ u8 ecn;
+
+@@ -420,9 +420,12 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+ * overlapping fragment, the entire datagram (and any constituent
+ * fragments) MUST be silently discarded.
+ *
+- * We do the same here for IPv4 (and increment an snmp counter).
++ * We do the same here for IPv4 (and increment an snmp counter) but
++ * we do not want to drop the whole queue in response to a duplicate
++ * fragment.
+ */
+
++ err = -EINVAL;
+ /* Find out where to put this fragment. */
+ prev_tail = qp->q.fragments_tail;
+ if (!prev_tail)
+@@ -444,13 +447,17 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+ do {
+ parent = *rbn;
+ skb1 = rb_to_skb(parent);
++ skb1_run_end = skb1->ip_defrag_offset +
++ FRAG_CB(skb1)->frag_run_len;
+ if (end <= skb1->ip_defrag_offset)
+ rbn = &parent->rb_left;
+- else if (offset >= skb1->ip_defrag_offset +
+- FRAG_CB(skb1)->frag_run_len)
++ else if (offset >= skb1_run_end)
+ rbn = &parent->rb_right;
+- else /* Found an overlap with skb1. */
+- goto discard_qp;
++ else if (offset >= skb1->ip_defrag_offset &&
++ end <= skb1_run_end)
++ goto err; /* No new data, potential duplicate */
++ else
++ goto discard_qp; /* Found an overlap */
+ } while (*rbn);
+ /* Here we have parent properly set, and rbn pointing to
+ * one of its NULL left/right children. Insert skb.
+@@ -495,7 +502,6 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+
+ discard_qp:
+ inet_frag_kill(&qp->q);
+- err = -EINVAL;
+ IP_INC_STATS_BH(net, IPSTATS_MIB_REASM_OVERLAPS);
+ err:
+ kfree_skb(skb);
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-034-rcu-Force-boolean-subscript-for-expedited-sta.patch b/patches.kernel.org/4.4.174-034-rcu-Force-boolean-subscript-for-expedited-sta.patch
new file mode 100644
index 0000000000..b485afe688
--- /dev/null
+++ b/patches.kernel.org/4.4.174-034-rcu-Force-boolean-subscript-for-expedited-sta.patch
@@ -0,0 +1,38 @@
+From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
+Date: Mon, 11 Jan 2016 16:29:29 -0800
+Subject: [PATCH] rcu: Force boolean subscript for expedited stall warnings
+References: bnc#1012382
+Patch-mainline: 4.4.174
+Git-commit: ec3833ed02ae6ef2a933ece9de7cbab0c64c699e
+
+commit ec3833ed02ae6ef2a933ece9de7cbab0c64c699e upstream.
+
+The cpu_online() function can return values other than 0 and 1, which
+can result in subscript overflow when applied to a two-element array.
+This commit allows for this behavior by using "!!" on the return value
+from cpu_online() when used as a subscript.
+
+Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+Cc: "Rantala, Tommi" <tommi.t.rantala@nokia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ kernel/rcu/tree.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
+index 8a62cbfe1f2f..4e886ccd40db 100644
+--- a/kernel/rcu/tree.c
++++ b/kernel/rcu/tree.c
+@@ -3817,7 +3817,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
+ continue;
+ rdp = per_cpu_ptr(rsp->rda, cpu);
+ pr_cont(" %d-%c%c%c", cpu,
+- "O."[cpu_online(cpu)],
++ "O."[!!cpu_online(cpu)],
+ "o."[!!(rdp->grpmask & rnp->expmaskinit)],
+ "N."[!!(rdp->grpmask & rnp->expmaskinitnext)]);
+ }
+--
+2.20.1
+
diff --git a/patches.kernel.org/4.4.174-035-Linux-4.4.174.patch b/patches.kernel.org/4.4.174-035-Linux-4.4.174.patch
new file mode 100644
index 0000000000..fd678aec16
--- /dev/null
+++ b/patches.kernel.org/4.4.174-035-Linux-4.4.174.patch
@@ -0,0 +1,27 @@
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Date: Fri, 8 Feb 2019 11:25:33 +0100
+Subject: [PATCH] Linux 4.4.174
+References: bnc#1012382
+Patch-mainline: 4.4.174
+Git-commit: dc5e8c99975bb1a1561de884a83b3c19e4ac7ada
+
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ Makefile | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/Makefile b/Makefile
+index db7665e32da8..1fa281069379 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 4
+ PATCHLEVEL = 4
+-SUBLEVEL = 173
++SUBLEVEL = 174
+ EXTRAVERSION =
+ NAME = Blurry Fish Butt
+
+--
+2.20.1
+
diff --git a/patches.suse/0002-rcu-Add-more-diagnostics-to-expedited-stall-warning-.patch b/patches.suse/0002-rcu-Add-more-diagnostics-to-expedited-stall-warning-.patch
index de4b9fed50..144988c2dc 100644
--- a/patches.suse/0002-rcu-Add-more-diagnostics-to-expedited-stall-warning-.patch
+++ b/patches.suse/0002-rcu-Add-more-diagnostics-to-expedited-stall-warning-.patch
@@ -14,14 +14,12 @@ Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
---
- kernel/rcu/tree.c | 24 +++++++++++++++++++++---
+ kernel/rcu/tree.c | 24 +++++++++++++++++++++---
1 file changed, 21 insertions(+), 3 deletions(-)
-diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
-index bc6b79716a86..6a652d1f3d7f 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
-@@ -3745,6 +3745,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
+@@ -3757,6 +3757,7 @@ static void synchronize_sched_expedited_
unsigned long jiffies_stall;
unsigned long jiffies_start;
unsigned long mask;
@@ -29,7 +27,7 @@ index bc6b79716a86..6a652d1f3d7f 100644
struct rcu_node *rnp;
struct rcu_node *rnp_root = rcu_get_root(rsp);
int ret;
-@@ -3767,14 +3768,16 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
+@@ -3779,14 +3780,16 @@ static void synchronize_sched_expedited_
}
pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {",
rsp->name);
@@ -46,8 +44,8 @@ index bc6b79716a86..6a652d1f3d7f 100644
+ ndetected++;
rdp = per_cpu_ptr(rsp->rda, cpu);
pr_cont(" %d-%c%c%c", cpu,
- "O."[cpu_online(cpu)],
-@@ -3783,8 +3786,23 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
+ "O."[!!cpu_online(cpu)],
+@@ -3795,8 +3798,23 @@ static void synchronize_sched_expedited_
}
mask <<= 1;
}
@@ -73,6 +71,3 @@ index bc6b79716a86..6a652d1f3d7f 100644
rcu_for_each_leaf_node(rsp, rnp) {
mask = 1;
for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) {
---
-2.1.4
-
diff --git a/series.conf b/series.conf
index df22f88608..6664374938 100644
--- a/series.conf
+++ b/series.conf
@@ -5463,6 +5463,41 @@
patches.kernel.org/4.4.173-053-mm-migrate-don-t-rely-on-__PageMovable-of-new.patch
patches.kernel.org/4.4.173-054-fs-don-t-scan-the-inode-cache-before-SB_BORN-.patch
patches.kernel.org/4.4.173-055-Linux-4.4.173.patch
+ patches.kernel.org/4.4.174-001-inet-frags-change-inet_frags_init_net-return-.patch
+ patches.kernel.org/4.4.174-002-inet-frags-add-a-pointer-to-struct-netns_frag.patch
+ patches.kernel.org/4.4.174-003-inet-frags-refactor-ipfrag_init.patch
+ patches.kernel.org/4.4.174-004-inet-frags-refactor-ipv6_frag_init.patch
+ patches.kernel.org/4.4.174-005-inet-frags-refactor-lowpan_net_frag_init.patch
+ patches.kernel.org/4.4.174-006-rhashtable-add-rhashtable_lookup_get_insert_k.patch
+ patches.kernel.org/4.4.174-007-rhashtable-Add-rhashtable_lookup.patch
+ patches.kernel.org/4.4.174-008-rhashtable-add-schedule-points.patch
+ patches.kernel.org/4.4.174-009-inet-frags-use-rhashtables-for-reassembly-uni.patch
+ patches.kernel.org/4.4.174-010-net-ieee802154-6lowpan-fix-frag-reassembly.patch
+ patches.kernel.org/4.4.174-011-ipfrag-really-prevent-allocation-on-netns-exi.patch
+ patches.kernel.org/4.4.174-012-inet-frags-remove-some-helpers.patch
+ patches.kernel.org/4.4.174-013-inet-frags-get-rif-of-inet_frag_evicting.patch
+ patches.kernel.org/4.4.174-014-inet-frags-remove-inet_frag_maybe_warn_overfl.patch
+ patches.kernel.org/4.4.174-015-inet-frags-break-the-2GB-limit-for-frags-stor.patch
+ patches.kernel.org/4.4.174-016-inet-frags-do-not-clone-skb-in-ip_expire.patch
+ patches.kernel.org/4.4.174-017-ipv6-frags-rewrite-ip6_expire_frag_queue.patch
+ patches.kernel.org/4.4.174-018-rhashtable-reorganize-struct-rhashtable-layou.patch
+ patches.kernel.org/4.4.174-019-inet-frags-reorganize-struct-netns_frags.patch
+ patches.kernel.org/4.4.174-020-inet-frags-get-rid-of-ipfrag_skb_cb-FRAG_CB.patch
+ patches.kernel.org/4.4.174-021-inet-frags-fix-ip6frag_low_thresh-boundary.patch
+ patches.kernel.org/4.4.174-022-ip-discard-IPv4-datagrams-with-overlapping-se.patch
+ patches.kernel.org/4.4.174-023-net-modify-skb_rbtree_purge-to-return-the-tru.patch
+ patches.kernel.org/4.4.174-024-ipv6-defrag-drop-non-last-frags-smaller-than-.patch
+ patches.kernel.org/4.4.174-025-net-pskb_trim_rcsum-and-CHECKSUM_COMPLETE-are.patch
+ patches.kernel.org/4.4.174-026-ip-use-rb-trees-for-IP-frag-queue.patch
+ patches.kernel.org/4.4.174-027-ip-add-helpers-to-process-in-order-fragments-.patch
+ patches.kernel.org/4.4.174-028-ip-process-in-order-fragments-efficiently.patch
+ patches.kernel.org/4.4.174-029-ip-frags-fix-crash-in-ip_do_fragment.patch
+ patches.kernel.org/4.4.174-030-ipv4-frags-precedence-bug-in-ip_expire.patch
+ patches.kernel.org/4.4.174-031-inet-frags-better-deal-with-smp-races.patch
+ patches.kernel.org/4.4.174-032-net-fix-pskb_trim_rcsum_slow-with-odd-trim-of.patch
+ patches.kernel.org/4.4.174-033-net-ipv4-do-not-handle-duplicate-fragments-as.patch
+ patches.kernel.org/4.4.174-034-rcu-Force-boolean-subscript-for-expedited-sta.patch
+ patches.kernel.org/4.4.174-035-Linux-4.4.174.patch
########################################################
# Build fixes that apply to the vanilla kernel too.
@@ -5639,6 +5674,7 @@
# reverted stable patches for kABI purposes or to prepare for
# sorted series
########################################################
+ patches.kabi/revert-most-of-4.4.174.patch
patches.kabi/revert-KVM-x86-fix-emulation-of-RSM-and-IRET-instruc.patch
patches.kabi/0001-Revert-tpm-Issue-a-TPM2_Shutdown-for-TPM2-devices.patch
patches.kabi/0002-Revert-Add-shutdown-to-struct-class.patch
@@ -24317,6 +24353,7 @@
patches.drivers/ibmvnic-fix-accelerated-VLAN-handling.patch
patches.fixes/NFSv4-Don-t-exit-the-state-manager-without-clearing-.patch
patches.fixes/0001-floppy-fix-race-condition-in-__floppy_read_block_0.patch
+ patches.fixes/acpi-nfit-fix-ars-overflow-continuation.patch
patches.drivers/xhci-add-quirk-to-workaround-the-errata-seen-on-cavium-thunder-x2-soc.patch
patches.drivers/0003-amd-iommu-fix-guest-virtual-apic-log-tail-address-register
patches.fixes/libceph-fall-back-to-sendmsg-for-slab-pages.patch
@@ -24332,6 +24369,7 @@
patches.arch/kvm-svm-ensure-an-ibpb-on-all-affected-cpus-when-freeing-a-vmcb.patch
patches.suse/msft-hv-1800-Drivers-hv-vmbus-check-the-creation_status-in-vmbus_.patch
patches.fixes/0001-fscache-Fix-race-in-fscache_op_complete-due-to-split.patch
+ patches.fixes/fs-fix-lost-error-code-in-dio_complete.patch
patches.fixes/0001-xen-x86-add-diagnostic-printout-to-xen_mc_flush-in-c.patch
patches.fixes/0001-btrfs-tree-checker-Don-t-check-max-block-group-size-.patch
patches.fixes/scsi-lpfc-fix-block-guard-enablement-on-sli3-adapters.patch
@@ -24384,6 +24422,8 @@
patches.arch/kvm-fix-kvm_ioctl_create_device-reference-counting-cve-2019-6974
patches.arch/kvm-x86-work-around-leak-of-uninitialized-stack-contents-cve-2019-7222
patches.arch/kvm-nvmx-unconditionally-cancel-preemption-timer-in-free_nested-cve-2019-7221
+ patches.fixes/0001-drm-vmwgfx-Return-error-code-from-vmw_execbuf_copy_f.patch
+ patches.fixes/0001-drm-vmwgfx-Fix-setting-of-dma-masks.patch
# mkp/scsi 5.0/scsi-fixes
patches.fixes/scsi-target-make-the-pi_prot_format-ConfigFS-path-re.patch