Home Home > GIT Browse > openSUSE-12.1
summaryrefslogtreecommitdiff
authorBrandon Philips <bphilips@suse.de>2009-09-03 07:48:37 (GMT)
committer Brandon Philips <bphilips@suse.de>2009-09-03 07:48:37 (GMT)
commitf57ebf8e407512d2960fb11423d0426041205f56 (patch) (side-by-side diff)
treed2c39cff261529711688e505fbdc3fb4072144bf
parent2c6393247a03ef5da3438103d91d0bee2ff86ebd (diff)
- patches.drivers/qlge-change-large-rx-buffer-logic-rss-hash-and-qlge_sb_pad-for-performance.patch:
qlge: Change large rx buffer logic, RSS hash and QLGE_SB_PAD for performance (bnc#536130).
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--kernel-source.changes7
-rw-r--r--patches.drivers/qlge-change-large-rx-buffer-logic-rss-hash-and-qlge_sb_pad-for-performance.patch1356
-rw-r--r--series.conf1
3 files changed, 1364 insertions, 0 deletions
diff --git a/kernel-source.changes b/kernel-source.changes
index 94469ec..ce9fcce 100644
--- a/kernel-source.changes
+++ b/kernel-source.changes
@@ -1,4 +1,11 @@
-------------------------------------------------------------------
+Thu Sep 3 09:48:30 CEST 2009 - bphilips@suse.de
+
+- patches.drivers/qlge-change-large-rx-buffer-logic-rss-hash-and-qlge_sb_pad-for-performance.patch:
+ qlge: Change large rx buffer logic, RSS hash and QLGE_SB_PAD
+ for performance (bnc#536130).
+
+-------------------------------------------------------------------
Thu Sep 3 09:45:33 CEST 2009 - bphilips@suse.de
- patches.fixes/net-pkt_sched-fix-return-value-corruption-in-htb-and-tbf.patch:
diff --git a/patches.drivers/qlge-change-large-rx-buffer-logic-rss-hash-and-qlge_sb_pad-for-performance.patch b/patches.drivers/qlge-change-large-rx-buffer-logic-rss-hash-and-qlge_sb_pad-for-performance.patch
new file mode 100644
index 0000000..65ce6fd
--- a/dev/null
+++ b/patches.drivers/qlge-change-large-rx-buffer-logic-rss-hash-and-qlge_sb_pad-for-performance.patch
@@ -0,0 +1,1356 @@
+From: Ron Mercer <ron.mercer@qlogic.com>
+Subject: qlge: Change large rx buffer logic, RSS hash and QLGE_SB_PAD for performance
+Patch-mainline: netdev as "[RFC net-next PATCH 0/4] qlge: Performance changes for qlge."
+References: bnc#536130
+
+Currently we use a single page per large rx buffer. This patch changes
+this to use either chunks of pages for small MTU or multiple pages for
+jumbo MTU depending on the page size.
+Examples:
+64k pages at 1500 MTU:
+ large buffers will be 2048 bytes in length and there will be 32
+ per page.
+4k pages at 9000 MTU:
+ large buffer will use 4 pages as one large buffer.
+
+Also adjust the rss seed and QLGE_SB_PAD for performance.
+
+Signed-off-by: Ron Mercer <ron.mercer@qlogic.com>
+Signed-off-by: Brandon Philips <bphilips@suse.de>
+---
+ drivers/net/qlge/kcompat.h | 5
+ drivers/net/qlge/qlge.h | 55 +-
+ drivers/net/qlge/qlge_main.c | 857 ++++++++++++++++++++++++++-----------------
+ 3 files changed, 574 insertions(+), 343 deletions(-)
+
+Index: linux-2.6.16-SLES10_SP3_BRANCH/drivers/net/qlge/kcompat.h
+===================================================================
+--- linux-2.6.16-SLES10_SP3_BRANCH.orig/drivers/net/qlge/kcompat.h
++++ linux-2.6.16-SLES10_SP3_BRANCH/drivers/net/qlge/kcompat.h
+@@ -31,6 +31,11 @@
+
+ /*****************************************************************************/
+ #if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 18))
++#ifndef NETIF_F_GRO
++#define vlan_gro_receive(_napi, _vlgrp, _vlan, _skb) \
++ vlan_hwaccel_receive_skb(_skb, _vlgrp, _vlan)
++#define napi_gro_receive(_napi, _skb) netif_receive_skb(_skb)
++#endif
+
+ #ifndef CHECKSUM_PARTIAL
+ #define CHECKSUM_PARTIAL CHECKSUM_HW
+Index: linux-2.6.16-SLES10_SP3_BRANCH/drivers/net/qlge/qlge.h
+===================================================================
+--- linux-2.6.16-SLES10_SP3_BRANCH.orig/drivers/net/qlge/qlge.h
++++ linux-2.6.16-SLES10_SP3_BRANCH/drivers/net/qlge/qlge.h
+@@ -17,27 +17,27 @@
+ */
+ #define DRV_NAME "qlge"
+ #define DRV_STRING "QLogic 10 Gigabit PCI-E Ethernet Driver "
+-#define DRV_VERSION "1.00.00.20"
+-#define DIS_VERSION "2.6.16-2.6.18-p20"
+-#define REL_DATE "090721"
++#define DRV_VERSION "1.00.00.21"
++#define DIS_VERSION "2.6.16-2.6.18-p21"
++#define REL_DATE "090803"
+
+ #define PFX "qlge: "
+ #define QPRINTK(qdev, nlevel, klevel, fmt, args...) \
+ do { \
+- if (!((qdev)->msg_enable & NETIF_MSG_##nlevel)) \
+- ; \
+- else \
+- dev_printk(KERN_##klevel, &((qdev)->pdev->dev), \
+- "%s: " fmt, __func__, ##args); \
++ if (!((qdev)->msg_enable & NETIF_MSG_##nlevel)) \
++ ; \
++ else \
++ dev_printk(KERN_##klevel, &((qdev)->pdev->dev), \
++ "%s: " fmt, __func__, ##args); \
+ } while (0)
+ #if 0
+ #define QPRINTK_DBG(qdev, nlevel, klevel, fmt, args...) \
+ do { \
+- if (!((qdev)->msg_enable & NETIF_MSG_##nlevel)) \
+- ; \
+- else \
+- dev_printk(KERN_##klevel, &((qdev)->pdev->dev), \
+- "%s: " fmt, __func__, ##args); \
++ if (!((qdev)->msg_enable & NETIF_MSG_##nlevel)) \
++ ; \
++ else \
++ dev_printk(KERN_##klevel, &((qdev)->pdev->dev), \
++ "%s: " fmt, __func__, ##args); \
+ } while (0)
+ #else
+ #define QPRINTK_DBG(qdev, nlevel, klevel, fmt, args...)
+@@ -71,11 +71,11 @@
+ MAX_DB_PAGES_PER_BQ(NUM_SMALL_BUFFERS) * sizeof(u64) + \
+ MAX_DB_PAGES_PER_BQ(NUM_LARGE_BUFFERS) * sizeof(u64))
+
+-#define SMALL_BUFFER_SIZE 2504 /* Per FCoE largest frame for normal MTU */
++#define SMALL_BUFFER_SIZE 256 /* Per FCoE largest frame for normal MTU */
+ #define LARGE_BUFFER_SIZE 9600 /* Per FCoE largest frame for jumbo MTU */
+
+ #define MAX_SPLIT_SIZE 1023
+-#define QLGE_SB_PAD 32
++#define QLGE_SB_PAD 0
+
+ #define MAX_CQ 128
+ #define DFLT_COALESCE_WAIT 100 /* 100 usec wait for coalescing */
+@@ -1218,7 +1218,7 @@ struct ricb {
+ #define RSS_RI6 0x40
+ #define RSS_RT6 0x80
+ __le16 mask;
+- __le32 hash_cq_id[256];
++ u8 hash_cq_id[1024];
+ __le32 ipv6_hash_key[10];
+ __le32 ipv4_hash_key[4];
+ } __attribute((packed));
+@@ -1244,16 +1244,23 @@ struct tx_ring_desc {
+ struct tx_ring_desc *next;
+ };
+
++struct page_chunk {
++ struct page *page; /* master page */
++ char *va; /* virt addr for this chunk */
++ u64 map; /* mapping for master */
++ unsigned int offset; /* offset for this chunk */
++ unsigned int last_flag; /* flag set for last chunk in page */
++};
++
+ struct bq_desc {
+ union {
+- struct page *lbq_page;
++ struct page_chunk pg_chunk;
+ struct sk_buff *skb;
+ } p;
+- char *pg_addr;
+ __le64 *addr;
+ u32 index;
+- DECLARE_PCI_UNMAP_ADDR(mapaddr);
+- DECLARE_PCI_UNMAP_LEN(maplen);
++ DECLARE_PCI_UNMAP_ADDR(mapaddr);
++ DECLARE_PCI_UNMAP_LEN(maplen);
+ };
+
+ #define QL_TXQ_IDX(qdev, skb) (smp_processor_id()%(qdev->tx_ring_count))
+@@ -1317,6 +1324,7 @@ struct rx_ring {
+ dma_addr_t lbq_base_dma;
+ void *lbq_base_indirect;
+ dma_addr_t lbq_base_indirect_dma;
++ struct page_chunk pg_chunk; /* current page for chunks */
+ struct bq_desc *lbq; /* array of control blocks */
+ void __iomem *lbq_prod_idx_db_reg; /* PCI doorbell mem area + 0x18 */
+ u32 lbq_prod_idx; /* current sw prod idx */
+@@ -1348,6 +1356,12 @@ struct rx_ring {
+ u8 reserved;
+ struct ql_adapter *qdev;
+ struct net_device *dummy_netdev;
++
++#ifdef NETIF_F_GRO
++ struct napi_struct napi;
++#endif
++ unsigned long packets; /* total packets received */
++ unsigned long bytes; /* total bytes received */
+ };
+
+ /*
+@@ -1833,6 +1847,7 @@ struct ql_adapter {
+
+ struct rx_ring rx_ring[MAX_RX_RINGS];
+ struct tx_ring tx_ring[MAX_TX_RINGS];
++ unsigned int lbq_buf_order;
+
+ int rx_csum;
+
+Index: linux-2.6.16-SLES10_SP3_BRANCH/drivers/net/qlge/qlge_main.c
+===================================================================
+--- linux-2.6.16-SLES10_SP3_BRANCH.orig/drivers/net/qlge/qlge_main.c
++++ linux-2.6.16-SLES10_SP3_BRANCH/drivers/net/qlge/qlge_main.c
+@@ -75,8 +75,8 @@ module_param(qlge_mpi_coredump, int, S_I
+ MODULE_PARM_DESC(qlge_mpi_coredump,
+ "Option to enable allocation of memory for an MPI "
+ "firmware dump. Default is 1 - allocate memory.");
+-
+-int qlge_spool_coredump = 0;
++#define SPOOL 0
++static int qlge_spool_coredump = SPOOL;
+ module_param(qlge_spool_coredump, int, S_IRUGO|S_IRUSR);
+ MODULE_PARM_DESC(qlge_spool_coredump,
+ "Option to enable spooling of firmware dump. "
+@@ -501,8 +501,8 @@ exit:
+ return status;
+ }
+
+-/* The NIC function for this chip has 16 routing indexes. Each one can be used
+- * to route different frame types to various inbound queues. We send broadcast/
++/* The NIC function for this chip has 16 routing indexes. Each one can be used
++ * to route different frame types to various inbound queues. We send broadcast
+ * multicast/error frames to the default queue for slow handling,
+ * and CAM hit/RSS frames to the fast handling queues.
+ */
+@@ -694,7 +694,6 @@ static void ql_enable_all_completion_int
+ atomic_set(&qdev->intr_context[i].irq_cnt, 1);
+ ql_enable_completion_interrupt(qdev, i);
+ }
+-
+ }
+
+ static int ql_validate_flash(struct ql_adapter *qdev, u32 size, const char *str)
+@@ -1020,6 +1019,11 @@ end:
+ return status;
+ }
+
++static inline unsigned int ql_lbq_block_size(struct ql_adapter *qdev)
++{
++ return PAGE_SIZE << qdev->lbq_buf_order;
++}
++
+ /* Get the next large buffer. */
+ static struct bq_desc *ql_get_curr_lbuf(struct rx_ring *rx_ring)
+ {
+@@ -1031,6 +1035,28 @@ static struct bq_desc *ql_get_curr_lbuf(
+ return lbq_desc;
+ }
+
++static struct bq_desc *ql_get_curr_lchunk(struct ql_adapter *qdev,
++ struct rx_ring *rx_ring)
++{
++ struct bq_desc *lbq_desc = ql_get_curr_lbuf(rx_ring);
++
++ pci_dma_sync_single_for_cpu(qdev->pdev,
++ pci_unmap_addr(lbq_desc, mapaddr),
++ rx_ring->lbq_buf_map_size,
++ PCI_DMA_FROMDEVICE);
++
++ /* If it's the last chunk of our master page then
++ * we unmap it.
++ */
++ if ((lbq_desc->p.pg_chunk.offset + rx_ring->lbq_buf_map_size)
++ == ql_lbq_block_size(qdev))
++ pci_unmap_page(qdev->pdev,
++ lbq_desc->p.pg_chunk.map,
++ ql_lbq_block_size(qdev),
++ PCI_DMA_FROMDEVICE);
++ return lbq_desc;
++}
++
+ /* Get the next small buffer. */
+ static struct bq_desc *ql_get_curr_sbuf(struct rx_ring *rx_ring)
+ {
+@@ -1059,6 +1085,53 @@ static void ql_write_cq_idx(struct rx_ri
+ ql_write_db_reg(rx_ring->cnsmr_idx, rx_ring->cnsmr_idx_db_reg);
+ }
+
++static int ql_get_next_chunk(struct ql_adapter *qdev, struct rx_ring *rx_ring,
++ struct bq_desc *lbq_desc)
++{
++ if (!rx_ring->pg_chunk.page) {
++ u64 map;
++ rx_ring->pg_chunk.page = alloc_pages(__GFP_COLD | __GFP_COMP |
++ GFP_ATOMIC,
++ qdev->lbq_buf_order);
++ if (unlikely(!rx_ring->pg_chunk.page)) {
++ QPRINTK(qdev, DRV, ERR,
++ "page allocation failed.\n");
++ return -ENOMEM;
++ }
++ rx_ring->pg_chunk.offset = 0;
++ map = pci_map_page(qdev->pdev, rx_ring->pg_chunk.page,
++ 0, ql_lbq_block_size(qdev),
++ PCI_DMA_FROMDEVICE);
++ if (pci_dma_mapping_error(map)) {
++ __free_pages(rx_ring->pg_chunk.page,
++ qdev->lbq_buf_order);
++ QPRINTK(qdev, DRV, ERR,
++ "PCI mapping failed.\n");
++ return -ENOMEM;
++ }
++ rx_ring->pg_chunk.map = map;
++ rx_ring->pg_chunk.va = page_address(rx_ring->pg_chunk.page);
++ }
++
++ /* Copy the current master pg_chunk info
++ * to the current descriptor.
++ */
++ lbq_desc->p.pg_chunk = rx_ring->pg_chunk;
++
++ /* Adjust the master page chunk for next
++ * buffer get.
++ */
++ rx_ring->pg_chunk.offset += rx_ring->lbq_buf_map_size;
++ if (rx_ring->pg_chunk.offset == ql_lbq_block_size(qdev)) {
++ rx_ring->pg_chunk.page = NULL;
++ lbq_desc->p.pg_chunk.last_flag = 1;
++ } else {
++ rx_ring->pg_chunk.va += rx_ring->lbq_buf_map_size;
++ get_page(rx_ring->pg_chunk.page);
++ lbq_desc->p.pg_chunk.last_flag = 0;
++ }
++ return 0;
++}
+ /* Process (refill) a large buffer queue. */
+ static void ql_update_lbq(struct ql_adapter *qdev, struct rx_ring *rx_ring)
+ {
+@@ -1068,47 +1141,30 @@ static void ql_update_lbq(struct ql_adap
+ u64 map;
+ int i;
+
+- while (rx_ring->lbq_free_cnt > 16) {
++ while (rx_ring->lbq_free_cnt > 32) {
+ for (i = 0; i < 16; i++) {
+ QPRINTK_DBG(qdev, RX_STATUS, DEBUG,
+ "lbq: try cleaning clean_idx = %d.\n",
+ clean_idx);
+ lbq_desc = &rx_ring->lbq[clean_idx];
+- if (lbq_desc->p.lbq_page == NULL) {
+- QPRINTK_DBG(qdev, RX_STATUS, DEBUG,
+- "lbq: getting new page for index %d.\n",
+- lbq_desc->index);
+- lbq_desc->p.lbq_page = alloc_page(GFP_ATOMIC);
+- if (lbq_desc->p.lbq_page == NULL) {
+- rx_ring->lbq_clean_idx = clean_idx;
+- QPRINTK(qdev, DRV, ERR,
+- "Couldn't get a page.\n");
+- return;
+- }
+- map = pci_map_page(qdev->pdev,
+- lbq_desc->p.lbq_page, 0,
++ if (ql_get_next_chunk(qdev, rx_ring, lbq_desc)) {
++ QPRINTK(qdev, IFUP, ERR,
++ "Could not get a page chunk.\n");
++ return;
++ }
++ map = lbq_desc->p.pg_chunk.map +
++ lbq_desc->p.pg_chunk.offset;
++ pci_unmap_addr_set(lbq_desc, mapaddr, map);
++ pci_unmap_len_set(lbq_desc, maplen,
++ rx_ring->lbq_buf_map_size);
++ *lbq_desc->addr = cpu_to_le64(map);
++ pci_dma_sync_single_for_device(qdev->pdev, map,
+ rx_ring->lbq_buf_map_size,
+ PCI_DMA_FROMDEVICE);
+- if (pci_dma_mapping_error(map)) {
+- rx_ring->lbq_clean_idx = clean_idx;
+- put_page(lbq_desc->p.lbq_page);
+- lbq_desc->p.lbq_page = NULL;
+- QPRINTK(qdev, RX_STATUS, ERR,
+- "PCI mapping failed.\n");
+- return;
+- }
+- pci_unmap_addr_set(lbq_desc, mapaddr, map);
+- pci_unmap_len_set(lbq_desc, maplen,
+- rx_ring->lbq_buf_map_size);
+- *lbq_desc->addr = cpu_to_le64(map);
+- lbq_desc->pg_addr =
+- page_address(lbq_desc->p.lbq_page);
+- }
+ clean_idx++;
+ if (clean_idx == rx_ring->lbq_len)
+ clean_idx = 0;
+ }
+-
+ rx_ring->lbq_clean_idx = clean_idx;
+ rx_ring->lbq_prod_idx += 16;
+ if (rx_ring->lbq_prod_idx == rx_ring->lbq_len)
+@@ -1146,11 +1202,11 @@ static void ql_update_sbq(struct ql_adap
+ sbq_desc->index);
+ sbq_desc->p.skb =
+ netdev_alloc_skb(qdev->ndev,
+- rx_ring->sbq_buf_size);
++ rx_ring->sbq_buf_size +
++ QLGE_SB_PAD);
+ if (sbq_desc->p.skb == NULL) {
+ QPRINTK(qdev, PROBE, ERR,
+ "Couldn't get an skb.\n");
+- rx_ring->sbq_clean_idx = clean_idx;
+ return;
+ }
+ skb_reserve(sbq_desc->p.skb, QLGE_SB_PAD);
+@@ -1161,7 +1217,6 @@ static void ql_update_sbq(struct ql_adap
+ if (pci_dma_mapping_error(map)) {
+ QPRINTK(qdev, IFUP, ERR,
+ "PCI mapping failed.\n");
+- rx_ring->sbq_clean_idx = clean_idx;
+ dev_kfree_skb_any(sbq_desc->p.skb);
+ sbq_desc->p.skb = NULL;
+ return;
+@@ -1370,275 +1425,171 @@ map_error:
+ return NETDEV_TX_BUSY;
+ }
+
+-static void ql_realign_skb(struct sk_buff *skb, int len)
+-{
+-#if 0
+- void *temp_addr = skb->data;
+-
+- /* Undo the skb_reserve(skb,32) we did before
+- * giving to hardware, and realign data on
+- * a 2-byte boundary.
+- */
+- skb->data -= QLGE_SB_PAD - NET_IP_ALIGN;
+- skb->tail -= QLGE_SB_PAD - NET_IP_ALIGN;
+- memcpy(skb->data, temp_addr, len);
+-#endif
++#ifdef NETIF_F_GRO
++/* Process an inbound completion from an rx ring. */
++static void ql_process_mac_rx_gro_page(struct ql_adapter *qdev,
++ struct rx_ring *rx_ring,
++ struct ib_mac_iocb_rsp *ib_mac_rsp,
++ u32 length,
++ u16 vlan_id)
++{
++ struct sk_buff *skb;
++ struct bq_desc *lbq_desc = ql_get_curr_lchunk(qdev, rx_ring);
++ struct skb_frag_struct *rx_frag;
++ int nr_frags;
++ struct napi_struct *napi = &rx_ring->napi;
++
++ napi->dev = qdev->ndev;
++
++ skb = napi_get_frags(napi);
++ if (!skb) {
++ QPRINTK(qdev, DRV, ERR, "Couldn't get an skb, exiting.\n");
++ put_page(lbq_desc->p.pg_chunk.page);
++ return;
++ }
++ prefetch(lbq_desc->p.pg_chunk.va);
++ rx_frag = skb_shinfo(skb)->frags;
++ nr_frags = skb_shinfo(skb)->nr_frags;
++ rx_frag += nr_frags;
++ rx_frag->page = lbq_desc->p.pg_chunk.page;
++ rx_frag->page_offset = lbq_desc->p.pg_chunk.offset;
++ rx_frag->size = length;
++
++ skb->len += length;
++ skb->data_len += length;
++ skb->truesize += length;
++ skb_shinfo(skb)->nr_frags++;
++
++ skb->ip_summed = CHECKSUM_UNNECESSARY;
++ if (qdev->vlgrp && (vlan_id != 0xffff))
++ vlan_gro_frags(&rx_ring->napi, qdev->vlgrp, vlan_id);
++ else
++ napi_gro_frags(napi);
+ }
++#endif
+
+-/*
+- * This function builds an skb for the given inbound
+- * completion. It will be rewritten for readability in the near
+- * future, but for not it works well.
+- */
+-static struct sk_buff *ql_build_rx_skb(struct ql_adapter *qdev,
++/* Process an inbound completion from an rx ring. */
++static void ql_process_mac_rx_page(struct ql_adapter *qdev,
+ struct rx_ring *rx_ring,
+- struct ib_mac_iocb_rsp *ib_mac_rsp)
++ struct ib_mac_iocb_rsp *ib_mac_rsp,
++ u32 length,
++ u16 vlan_id)
+ {
+- struct bq_desc *lbq_desc;
+- struct bq_desc *sbq_desc;
++ struct net_device *ndev = qdev->ndev;
+ struct sk_buff *skb = NULL;
+- u32 length = le32_to_cpu(ib_mac_rsp->data_len);
+- u32 hdr_len = le32_to_cpu(ib_mac_rsp->hdr_len);
++ void *addr;
++ struct bq_desc *lbq_desc = ql_get_curr_lchunk(qdev, rx_ring);
+
+- /*
+- * Handle the header buffer if present.
+- */
+- if (ib_mac_rsp->flags4 & IB_MAC_IOCB_RSP_HV &&
+- ib_mac_rsp->flags4 & IB_MAC_IOCB_RSP_HS) {
+- QPRINTK_DBG(qdev, RX_STATUS, DEBUG, "Header of %d bytes in "
+- "small buffer.\n", hdr_len);
+- /*
+- * Headers fit nicely into a small buffer.
+- */
+- sbq_desc = ql_get_curr_sbuf(rx_ring);
+- pci_unmap_single(qdev->pdev,
+- pci_unmap_addr(sbq_desc, mapaddr),
+- pci_unmap_len(sbq_desc, maplen),
+- PCI_DMA_FROMDEVICE);
+- skb = sbq_desc->p.skb;
+- ql_realign_skb(skb, hdr_len);
+- skb_put(skb, hdr_len);
+- sbq_desc->p.skb = NULL;
++#ifdef NETIF_F_GRO
++ struct napi_struct *napi = &rx_ring->napi;
++ napi->dev = qdev->ndev;
++#endif
++ skb = netdev_alloc_skb(ndev, length);
++ if (!skb) {
++ QPRINTK(qdev, DRV, ERR, "Couldn't get an skb, "
++ "need to unwind!.\n");
++ put_page(lbq_desc->p.pg_chunk.page);
++ return;
+ }
+
+- /*
+- * Handle the data buffer(s).
++ addr = lbq_desc->p.pg_chunk.va;
++ prefetch(addr);
++
++
++ /* Frame error, so drop the packet. */
++ if (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_ERR_MASK) {
++ QPRINTK(qdev, DRV, ERR, "Receive error, flags2 = 0x%x\n",
++ ib_mac_rsp->flags2);
++ goto err_out;
++ }
++
++ /* The max framesize filter on this chip is set higher than
++ * MTU since FCoE uses 2k frames.
+ */
+- if (unlikely(!length)) { /* Is there data too? */
+- QPRINTK(qdev, RX_STATUS, DEBUG,
+- "No Data buffer in this packet.\n");
+- return skb;
++ if (skb->len > ndev->mtu + ETH_HLEN) {
++ QPRINTK(qdev, DRV, ERR, "Segment too small, dropping.\n");
++ goto err_out;
+ }
++ memcpy(skb_put(skb, ETH_HLEN), addr, ETH_HLEN);
++ QPRINTK(qdev, RX_STATUS, DEBUG,
++ "%d bytes of headers and data in large. Chain "
++ "page to new skb and pull tail.\n", length);
++ skb_fill_page_desc(skb, 0, lbq_desc->p.pg_chunk.page,
++ lbq_desc->p.pg_chunk.offset+ETH_HLEN,
++ length-ETH_HLEN);
++ skb->len += length-ETH_HLEN;
++ skb->data_len += length-ETH_HLEN;
++ skb->truesize += length-ETH_HLEN;
+
+- if (ib_mac_rsp->flags3 & IB_MAC_IOCB_RSP_DS) {
+- if (ib_mac_rsp->flags4 & IB_MAC_IOCB_RSP_HS) {
+- QPRINTK_DBG(qdev, RX_STATUS, DEBUG,
+- "Headers in small, data of %d bytes in small, "
+- "combine them.\n", length);
+- /*
+- * Data is less than small buffer size so it's
+- * stuffed in a small buffer.
+- * For this case we append the data
+- * from the "data" small buffer to the "header" small
+- * buffer.
+- */
+- sbq_desc = ql_get_curr_sbuf(rx_ring);
+- pci_dma_sync_single_for_cpu(qdev->pdev,
+- pci_unmap_addr
+- (sbq_desc, mapaddr),
+- pci_unmap_len
+- (sbq_desc, maplen),
+- PCI_DMA_FROMDEVICE);
+- memcpy(skb_put(skb, length),
+- sbq_desc->p.skb->data, length);
+- pci_dma_sync_single_for_device(qdev->pdev,
+- pci_unmap_addr
+- (sbq_desc,
+- mapaddr),
+- pci_unmap_len
+- (sbq_desc,
+- maplen),
+- PCI_DMA_FROMDEVICE);
+- } else {
+- QPRINTK_DBG(qdev, RX_STATUS, DEBUG,
+- "%d bytes in a single small buffer.\n", length);
+- sbq_desc = ql_get_curr_sbuf(rx_ring);
+- skb = sbq_desc->p.skb;
+- ql_realign_skb(skb, length);
+- skb_put(skb, length);
+- pci_unmap_single(qdev->pdev,
+- pci_unmap_addr(sbq_desc,
+- mapaddr),
+- pci_unmap_len(sbq_desc,
+- maplen),
+- PCI_DMA_FROMDEVICE);
+- sbq_desc->p.skb = NULL;
+- }
+- } else if (ib_mac_rsp->flags3 & IB_MAC_IOCB_RSP_DL) {
+- if (ib_mac_rsp->flags4 & IB_MAC_IOCB_RSP_HS) {
+- QPRINTK_DBG(qdev, RX_STATUS, DEBUG,
+- "Header in small, %d bytes in large. Chain "
+- "large to small!\n", length);
+- /*
+- * The data is in a single large buffer. We
+- * chain it to the header buffer's skb and let
+- * it rip.
+- */
+- lbq_desc = ql_get_curr_lbuf(rx_ring);
+- pci_unmap_page(qdev->pdev,
+- pci_unmap_addr(lbq_desc,
+- mapaddr),
+- pci_unmap_len(lbq_desc, maplen),
+- PCI_DMA_FROMDEVICE);
+- QPRINTK_DBG(qdev, RX_STATUS, DEBUG,
+- "Chaining page to skb.\n");
+- skb_fill_page_desc(skb, 0, lbq_desc->p.lbq_page,
+- 0, length);
+- skb->len += length;
+- skb->data_len += length;
+- skb->truesize += length;
+- lbq_desc->p.lbq_page = NULL;
+- } else {
+- /*
+- * The headers and data are in a single large buffer.
+- * We copy it to a new skb and let it go. This can
+- * happen with jumbo mtu on a non-TCP/UDP frame.
+- */
+- void *vaddr;
+- lbq_desc = ql_get_curr_lbuf(rx_ring);
+- skb = netdev_alloc_skb(qdev->ndev, length);
+- if (skb == NULL) {
+- QPRINTK(qdev, PROBE, ERR,
+- "No skb available, drop the packet.\n");
+- return NULL;
+- }
+- pci_unmap_page(qdev->pdev,
+- pci_unmap_addr(lbq_desc,
+- mapaddr),
+- pci_unmap_len(lbq_desc, maplen),
+- PCI_DMA_FROMDEVICE);
+- prefetch(lbq_desc->pg_addr);
+- skb_reserve(skb, NET_IP_ALIGN);
+- vaddr = kmap_atomic(lbq_desc->p.lbq_page,
+- KM_SKB_DATA_SOFTIRQ);
+- memcpy(skb_put(skb, ETH_HLEN), vaddr, ETH_HLEN);
+- kunmap_atomic(vaddr,
+- KM_SKB_DATA_SOFTIRQ);
+- QPRINTK_DBG(qdev, RX_STATUS, DEBUG,
+- "%d bytes of headers and data in large. Chain "
+- "page to new skb and pull tail.\n", length);
+- skb_fill_page_desc(skb, 0, lbq_desc->p.lbq_page,
+- ETH_HLEN, length-ETH_HLEN);
+- skb->len += length-ETH_HLEN;
+- skb->data_len += length-ETH_HLEN;
+- skb->truesize += length-ETH_HLEN;
+- lbq_desc->p.lbq_page = NULL;
+- }
+- } else {
+- /*
+- * The data is in a chain of large buffers
+- * pointed to by a small buffer. We loop
+- * thru and chain them to the our small header
+- * buffer's skb.
+- * frags: There are 18 max frags and our small
+- * buffer will hold 32 of them. The thing is,
+- * we'll use 3 max for our 9000 byte jumbo
+- * frames. If the MTU goes up we could
+- * eventually be in trouble.
+- */
+- int size, offset, i = 0;
+- __le64 *bq, bq_array[8];
+- sbq_desc = ql_get_curr_sbuf(rx_ring);
+- pci_unmap_single(qdev->pdev,
+- pci_unmap_addr(sbq_desc, mapaddr),
+- pci_unmap_len(sbq_desc, maplen),
+- PCI_DMA_FROMDEVICE);
+- if (!(ib_mac_rsp->flags4 & IB_MAC_IOCB_RSP_HS)) {
+- /*
+- * This is an non TCP/UDP IP frame, so
+- * the headers aren't split into a small
+- * buffer. We have to use the small buffer
+- * that contains our sg list as our skb to
+- * send upstairs. Copy the sg list here to
+- * a local buffer and use it to find the
+- * pages to chain.
+- */
+- QPRINTK_DBG(qdev, RX_STATUS, DEBUG,
+- "%d bytes of headers & data in chain of "
+- "large.\n", length);
+- skb = sbq_desc->p.skb;
+- bq = &bq_array[0];
+- memcpy(bq, skb->data, sizeof(bq_array));
+- sbq_desc->p.skb = NULL;
+- skb_reserve(skb, NET_IP_ALIGN);
+- } else {
+- QPRINTK_DBG(qdev, RX_STATUS, DEBUG,
+- "Headers in small, %d bytes of data in chain "
+- "of large.\n", length);
+- bq = (__le64 *)sbq_desc->p.skb->data;
+- }
+- while (length > 0) {
+- lbq_desc = ql_get_curr_lbuf(rx_ring);
+- pci_unmap_page(qdev->pdev,
+- pci_unmap_addr(lbq_desc,
+- mapaddr),
+- pci_unmap_len(lbq_desc,
+- maplen),
+- PCI_DMA_FROMDEVICE);
+- if (i == 0) {
+- /* If this is the first large buffer then
+- * we copy a piece to the skb-data section
+- * so eth_type_trans() will have something
+- * to read.
+- */
+- void *vaddr;
+- vaddr = kmap_atomic(lbq_desc->p.lbq_page,
+- KM_SKB_DATA_SOFTIRQ);
+- memcpy(skb_put(skb, ETH_HLEN), vaddr, ETH_HLEN);
+- kunmap_atomic(vaddr,
+- KM_SKB_DATA_SOFTIRQ);
+- size = rx_ring->lbq_buf_map_size - ETH_HLEN;
+- offset = ETH_HLEN;
+- length -= ETH_HLEN;
+- } else {
+- size = (length < rx_ring->lbq_buf_map_size) ?
+- length : rx_ring->lbq_buf_map_size;
+- offset = 0;
+- }
++ skb->dev = ndev;
++ skb->protocol = eth_type_trans(skb, ndev);
++ skb->ip_summed = CHECKSUM_NONE;
+
++ if (qdev->rx_csum &&
++ !(ib_mac_rsp->flags1 & IB_MAC_CSUM_ERR_MASK)) {
++ /* TCP frame. */
++ if (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_T) {
+ QPRINTK_DBG(qdev, RX_STATUS, DEBUG,
+- "Adding page %d to skb for %d bytes.\n",
+- i, size);
+- skb_fill_page_desc(skb, i, lbq_desc->p.lbq_page,
+- offset, size);
+- skb->len += size;
+- skb->data_len += size;
+- skb->truesize += size;
+- length -= size;
+- lbq_desc->p.lbq_page = NULL;
+- i++;
++ "TCP checksum done!\n");
++ skb->ip_summed = CHECKSUM_UNNECESSARY;
++ } else if ((ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_U) &&
++ (ib_mac_rsp->flags3 & IB_MAC_IOCB_RSP_V4)) {
++ /* Unfragmented ipv4 UDP frame. */
++ struct iphdr *iph = (struct iphdr *) skb->data;
++ if (!(iph->frag_off &
++ cpu_to_be16(IP_MF|IP_OFFSET))) {
++ skb->ip_summed = CHECKSUM_UNNECESSARY;
++ QPRINTK_DBG(qdev, RX_STATUS, DEBUG,
++ "TCP checksum done!\n");
++ }
+ }
++ }
+
++ if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
++ if (qdev->vlgrp && (vlan_id != 0xffff))
++ vlan_gro_receive(napi, qdev->vlgrp, vlan_id, skb);
++ else
++ napi_gro_receive(napi, skb);
++ } else {
++ if (qdev->vlgrp && (vlan_id != 0xffff))
++ vlan_hwaccel_receive_skb(skb, qdev->vlgrp, vlan_id);
++ else
++ netif_receive_skb(skb);
+ }
+- return skb;
++ return;
++err_out:
++ dev_kfree_skb_any(skb);
++ put_page(lbq_desc->p.pg_chunk.page);
+ }
+
+ /* Process an inbound completion from an rx ring. */
+-static void ql_process_mac_rx_intr(struct ql_adapter *qdev,
++static void ql_process_mac_rx_skb(struct ql_adapter *qdev,
+ struct rx_ring *rx_ring,
+- struct ib_mac_iocb_rsp *ib_mac_rsp)
++ struct ib_mac_iocb_rsp *ib_mac_rsp,
++ u32 length,
++ u16 vlan_id)
+ {
+ struct net_device *ndev = qdev->ndev;
+ struct sk_buff *skb = NULL;
++ struct sk_buff *new_skb = NULL;
++ struct bq_desc *sbq_desc = ql_get_curr_sbuf(rx_ring);
+
+- QL_DUMP_IB_MAC_RSP(ib_mac_rsp);
++#ifdef NETIF_F_GRO
++ struct napi_struct *napi = &rx_ring->napi;
++ napi->dev = qdev->ndev;
++#endif
++ skb = sbq_desc->p.skb;
+
+- skb = ql_build_rx_skb(qdev, rx_ring, ib_mac_rsp);
+- if (unlikely(!skb)) {
+- QPRINTK(qdev, RX_STATUS, ERR,
+- "No skb available, drop packet.\n");
++ /* Allocate new_skb and copy */
++ new_skb = netdev_alloc_skb(qdev->ndev, length + NET_IP_ALIGN);
++ if (new_skb == NULL) {
++ QPRINTK(qdev, PROBE, ERR,
++ "No skb available, drop the packet.\n");
+ return;
+ }
++ skb_reserve(new_skb, NET_IP_ALIGN);
++ memcpy(skb_put(new_skb, length), skb->data, length);
++ skb = new_skb;
+
+ /* Frame error, so drop the packet. */
+ if (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_ERR_MASK) {
+@@ -1690,7 +1641,7 @@ static void ql_process_mac_rx_intr(struc
+ QPRINTK_DBG(qdev, RX_STATUS, DEBUG,
+ "TCP checksum done!\n");
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+- } else if ((ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_U) && \
++ } else if ((ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_U) &&
+ (ib_mac_rsp->flags3 & IB_MAC_IOCB_RSP_V4)) {
+ /* Unfragmented ipv4 UDP frame. */
+ struct iphdr *iph = (struct iphdr *) skb->data;
+@@ -1702,34 +1653,88 @@ static void ql_process_mac_rx_intr(struc
+ }
+ }
+ }
+- qdev->stats.rx_packets++;
+- qdev->stats.rx_bytes += skb->len;
+
+- if (qdev->vlgrp && (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_V)) {
+- QPRINTK_DBG(qdev, RX_STATUS, DEBUG,
+- "Passing a VLAN packet upstream.\n");
+- vlan_hwaccel_receive_skb(skb, qdev->vlgrp,
+- le16_to_cpu(ib_mac_rsp->vlan_id));
++ if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
++ if (qdev->vlgrp && (vlan_id != 0xffff))
++ vlan_gro_receive(&rx_ring->napi, qdev->vlgrp,
++ vlan_id, skb);
++ else
++ napi_gro_receive(&rx_ring->napi, skb);
+ } else {
+- QPRINTK_DBG(qdev, RX_STATUS, DEBUG,
+- "Passing a normal packet upstream.\n");
+- netif_receive_skb(skb);
++ if (qdev->vlgrp && (vlan_id != 0xffff))
++ vlan_hwaccel_receive_skb(skb, qdev->vlgrp, vlan_id);
++ else
++ netif_receive_skb(skb);
+ }
+ }
+
++/* Process an inbound completion from an rx ring. */
++static unsigned long ql_process_mac_rx_intr(struct ql_adapter *qdev,
++ struct rx_ring *rx_ring,
++ struct ib_mac_iocb_rsp *ib_mac_rsp)
++{
++ u32 length = le32_to_cpu(ib_mac_rsp->data_len);
++ u16 vlan_id = (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_V) ?
++ ((le16_to_cpu(ib_mac_rsp->vlan_id) &
++ IB_MAC_IOCB_RSP_VLAN_MASK)) : 0xffff;
++
++ QL_DUMP_IB_MAC_RSP(ib_mac_rsp);
++ if (ib_mac_rsp->flags3 & IB_MAC_IOCB_RSP_DS) {
++ /* The data fit in a single small buffer.
++ * Allocate a new skb, copy the data and
++ * return the buffer to the free pool.
++ */
++ ql_process_mac_rx_skb(qdev, rx_ring, ib_mac_rsp,
++ length, vlan_id);
++#ifdef NETIF_F_GRO
++ } else if ((ib_mac_rsp->flags3 & IB_MAC_IOCB_RSP_DL) &&
++ !(ib_mac_rsp->flags1 & IB_MAC_CSUM_ERR_MASK) &&
++ (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_T)) {
++ /* TCP packet in a page chunk that's been checksummed.
++ * Tack it on to our GRO skb and let it go.
++ */
++ ql_process_mac_rx_gro_page(qdev, rx_ring, ib_mac_rsp,
++ length, vlan_id);
++#endif
++ } else if (ib_mac_rsp->flags3 & IB_MAC_IOCB_RSP_DL) {
++ /* Non-TCP packet in a page chunk. Allocate an
++ * skb, tack it on frags, and send it up.
++ */
++ ql_process_mac_rx_page(qdev, rx_ring, ib_mac_rsp,
++ length, vlan_id);
++ } else {
++ struct bq_desc *lbq_desc;
++
++ /* Free small buffer that holds the IAL */
++ lbq_desc = ql_get_curr_sbuf(rx_ring);
++ QPRINTK(qdev, RX_ERR, ERR, "Dropping frame, len %d > mtu %d\n",
++ length, qdev->ndev->mtu);
++
++ /* Unwind the large buffers for this frame. */
++ while (length > 0) {
++ lbq_desc = ql_get_curr_lchunk(qdev, rx_ring);
++ length -= (length < rx_ring->lbq_buf_map_size) ?
++ length : rx_ring->lbq_buf_map_size;
++ put_page(lbq_desc->p.pg_chunk.page);
++ }
++ }
++
++ return (unsigned long)length;
++}
++
+ /* Process an outbound completion from an rx ring. */
+-static void ql_process_mac_tx_intr(struct ql_adapter *qdev,
++static unsigned long ql_process_mac_tx_intr(struct ql_adapter *qdev,
+ struct ob_mac_iocb_rsp *mac_rsp)
+ {
+ struct tx_ring *tx_ring;
+ struct tx_ring_desc *tx_ring_desc;
++ unsigned long len;
+
+ QL_DUMP_OB_MAC_RSP(mac_rsp);
+ tx_ring = &qdev->tx_ring[mac_rsp->txq_idx];
+ tx_ring_desc = &tx_ring->q[mac_rsp->tid];
+ ql_unmap_send(qdev, tx_ring_desc, tx_ring_desc->map_cnt);
+- qdev->stats.tx_bytes += (tx_ring_desc->skb)->len;
+- qdev->stats.tx_packets++;
++ len = (unsigned long)(tx_ring_desc->skb)->len;
+ dev_kfree_skb_any(tx_ring_desc->skb);
+ tx_ring_desc->skb = NULL;
+
+@@ -1755,6 +1760,7 @@ static void ql_process_mac_tx_intr(struc
+ }
+ }
+ atomic_inc(&tx_ring->tx_count);
++ return len;
+ }
+
+ /* Fire up a handler to reset the MPI processor. */
+@@ -1823,6 +1829,7 @@ static int ql_clean_outbound_rx_ring(str
+ u32 prod = ql_read_sh_reg(rx_ring->prod_idx_sh_reg);
+ struct ob_mac_iocb_rsp *net_rsp = NULL;
+ int count = 0;
++ unsigned long bytes = 0;
+
+ /* While there are entries in the completion queue. */
+ while (prod != rx_ring->cnsmr_idx) {
+@@ -1837,6 +1844,7 @@ static int ql_clean_outbound_rx_ring(str
+
+ case OPCODE_OB_MAC_TSO_IOCB:
+ case OPCODE_OB_MAC_IOCB:
++ bytes +=
+ ql_process_mac_tx_intr(qdev, net_rsp);
+ break;
+ default:
+@@ -1864,7 +1872,8 @@ static int ql_clean_outbound_rx_ring(str
+ */
+ netif_wake_queue(qdev->ndev);
+ }
+-
++ rx_ring->bytes += bytes;
++ rx_ring->packets += (unsigned long) count;
+ return count;
+ }
+
+@@ -1890,6 +1899,7 @@ static int ql_clean_inbound_rx_ring(stru
+ u32 prod = ql_read_sh_reg(rx_ring->prod_idx_sh_reg);
+ struct ql_net_rsp_iocb *net_rsp;
+ int count = 0;
++ unsigned long bytes = 0;
+
+ /* While there are entries in the completion queue. */
+ while (prod != rx_ring->cnsmr_idx) {
+@@ -1902,6 +1912,7 @@ static int ql_clean_inbound_rx_ring(stru
+ rmb();
+ switch (net_rsp->opcode) {
+ case OPCODE_IB_MAC_IOCB:
++ bytes +=
+ ql_process_mac_rx_intr(qdev, rx_ring,
+ (struct ib_mac_iocb_rsp *)
+ net_rsp);
+@@ -1925,6 +1936,8 @@ static int ql_clean_inbound_rx_ring(stru
+ }
+ ql_update_buffer_queues(qdev, rx_ring);
+ ql_write_cq_idx(rx_ring);
++ rx_ring->bytes += bytes;
++ rx_ring->packets += (unsigned long) count;
+ return count;
+ }
+
+@@ -1986,7 +1999,8 @@ static void ql_vlan_rx_kill_vid(struct n
+ }
+
+ /* MSI-X Multiple Vector Interrupt Handler for inbound completions. */
+-static irqreturn_t qlge_msix_rx_isr(int irq, void *dev_id, struct pt_regs *ptregs)
++static irqreturn_t qlge_msix_rx_isr(int irq, void *dev_id,
++ struct pt_regs *ptregs)
+ {
+ struct rx_ring *rx_ring = dev_id;
+
+@@ -1995,7 +2009,8 @@ static irqreturn_t qlge_msix_rx_isr(int
+ }
+
+ /* MSI-X Multiple Vector Interrupt Handler for inbound completions. */
+-static irqreturn_t qlge_msix_dflt_rx_isr(int irq, void *dev_id, struct pt_regs *ptregs)
++static irqreturn_t qlge_msix_dflt_rx_isr(int irq, void *dev_id,
++ struct pt_regs *ptregs)
+ {
+ struct rx_ring *rx_ring = dev_id;
+ struct ql_adapter *qdev = rx_ring->qdev;
+@@ -2429,22 +2444,30 @@ err:
+ static void ql_free_lbq_buffers(struct ql_adapter *qdev,
+ struct rx_ring *rx_ring)
+ {
+- int i;
+ struct bq_desc *lbq_desc;
++ uint32_t curr_idx, clean_idx;
+
+- for (i = 0; i < rx_ring->lbq_len; i++) {
+- lbq_desc = &rx_ring->lbq[i];
+- if (lbq_desc && lbq_desc->p.lbq_page) {
++ curr_idx = rx_ring->lbq_curr_idx;
++ clean_idx = rx_ring->lbq_clean_idx;
++ while (curr_idx != clean_idx) {
++ lbq_desc = &rx_ring->lbq[curr_idx];
++
++ if (lbq_desc->p.pg_chunk.last_flag) {
+ pci_unmap_page(qdev->pdev,
+- pci_unmap_addr(lbq_desc, mapaddr),
+- pci_unmap_len(lbq_desc, maplen),
++ lbq_desc->p.pg_chunk.map,
++ ql_lbq_block_size(qdev),
+ PCI_DMA_FROMDEVICE);
+- put_page(lbq_desc->p.lbq_page);
+- lbq_desc->p.lbq_page = NULL;
++ lbq_desc->p.pg_chunk.last_flag = 0;
+ }
++
++ put_page(lbq_desc->p.pg_chunk.page);
++ lbq_desc->p.pg_chunk.page = NULL;
++
++ if (++curr_idx == rx_ring->lbq_len)
++ curr_idx = 0;
++
+ }
+ }
+-
+ static void ql_free_sbq_buffers(struct ql_adapter *qdev,
+ struct rx_ring *rx_ring)
+ {
+@@ -2478,6 +2501,8 @@ static void ql_free_rx_buffers(struct ql
+ ql_free_lbq_buffers(qdev, rx_ring);
+ if (rx_ring->sbq)
+ ql_free_sbq_buffers(qdev, rx_ring);
++ rx_ring->sbq_clean_idx = 0;
++ rx_ring->lbq_clean_idx = 0;
+ }
+ }
+
+@@ -2841,6 +2866,10 @@ static int ql_start_rx_ring(struct ql_ad
+ cqicb->flags |= FLAGS_LI; /* Load irq delay values */
+ cqicb->irq_delay = cpu_to_le16(qdev->rx_coalesce_usecs);
+ cqicb->pkt_delay = cpu_to_le16(qdev->rx_max_coalesced_frames);
++
++#ifdef NETIF_F_GRO
++ rx_ring->napi.dev = qdev->ndev;
++#endif
+ break;
+ default:
+ QPRINTK_DBG(qdev, IFUP, DEBUG, "Invalid rx_ring->type = %d.\n",
+@@ -3151,6 +3180,12 @@ static int ql_request_irq(struct ql_adap
+
+ static int ql_start_rss(struct ql_adapter *qdev)
+ {
++ u8 init_hash_seed[] = {0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
++ 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f,
++ 0xb0, 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b,
++ 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80,
++ 0x30, 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b,
++ 0xbe, 0xac, 0x01, 0xfa};
+ struct ricb *ricb = &qdev->ricb;
+ int status = 0;
+ int i;
+@@ -3159,21 +3194,19 @@ static int ql_start_rss(struct ql_adapte
+ memset((void *)ricb, 0, sizeof(*ricb));
+
+ ricb->base_cq = RSS_L4K;
++
+ ricb->flags =
+- (RSS_L6K | RSS_LI | RSS_LB | RSS_LM | RSS_RI4 | RSS_RI6 |
+- RSS_RT4 | RSS_RT6);
+- ricb->mask = cpu_to_le16(qdev->rss_ring_count - 1);
++ (RSS_L6K | RSS_LI | RSS_LB | RSS_LM | RSS_RT4 | RSS_RT6);
++ ricb->mask = cpu_to_le16((u16)(0x3ff));
+
+ /*
+ * Fill out the Indirection Table.
+ */
+- for (i = 0; i < 256; i++)
++ for (i = 0; i < 1024; i++)
+ hash_id[i] = (i & (qdev->rss_ring_count - 1));
+- /*
+- * Random values for the IPv6 and IPv4 Hash Keys.
+- */
+- get_random_bytes((void *)&ricb->ipv6_hash_key[0], 40);
+- get_random_bytes((void *)&ricb->ipv4_hash_key[0], 16);
++
++ memcpy((void *)&ricb->ipv6_hash_key[0], init_hash_seed, 40);
++ memcpy((void *)&ricb->ipv4_hash_key[0], init_hash_seed, 16);
+
+ QPRINTK_DBG(qdev, IFUP, DEBUG, "Initializing RSS.\n");
+ status = ql_write_cfg(qdev, ricb, sizeof(*ricb), CFG_LR, 0);
+@@ -3509,9 +3542,15 @@ int ql_wol(struct ql_adapter *qdev)
+ static void ql_disable_napi(struct ql_adapter *qdev)
+ {
+ int i;
++ struct rx_ring *rx_ring;
+
+- for (i = 0; i < qdev->intr_count; i++)
+- netif_poll_disable(qdev->rx_ring[i].dummy_netdev);
++ for (i = 0; i < qdev->intr_count; i++) {
++ rx_ring = &qdev->rx_ring[i];
++ netif_poll_disable(rx_ring->dummy_netdev);
++#ifdef NETIF_F_GRO
++ napi_free_frags(&rx_ring->napi);
++#endif
++ }
+ }
+ static void ql_enable_napi(struct ql_adapter *qdev)
+ {
+@@ -3629,6 +3668,9 @@ static int ql_configure_rings(struct ql_
+ struct tx_ring *tx_ring;
+ struct net_device *temp_netdev;
+ int cpu_cnt = num_online_cpus();
++ unsigned int lbq_buf_len = (qdev->ndev->mtu > 1500) ? 16384 : 2048;
++
++ qdev->lbq_buf_order = get_order(lbq_buf_len);
+
+ if (cpu_cnt > MAX_CPUS)
+ cpu_cnt = MAX_CPUS;
+@@ -3687,9 +3729,11 @@ static int ql_configure_rings(struct ql_
+ rx_ring->lbq_len = NUM_LARGE_BUFFERS;
+ rx_ring->lbq_size =
+ rx_ring->lbq_len * sizeof(__le64);
+- rx_ring->lbq_buf_map_size =
+- (PAGE_SIZE > LARGE_BUFFER_SIZE) ?
+- LARGE_BUFFER_SIZE : PAGE_SIZE;
++ /* Don't use small buffers for page chunks. */
++ rx_ring->lbq_buf_map_size = (u16)lbq_buf_len;
++ QPRINTK_DBG(qdev, IFUP, DEBUG,
++ "lbq_buf_map_size %d, order = %d\n",
++ rx_ring->lbq_buf_map_size, qdev->lbq_buf_order);
+ rx_ring->sbq_len = NUM_SMALL_BUFFERS;
+ rx_ring->sbq_size =
+ rx_ring->sbq_len * sizeof(__le64);
+@@ -3742,22 +3786,166 @@ error_up:
+ return err;
+ }
+
++static void ql_startstop_all_rx_cq(struct ql_adapter *qdev, int start_cq)
++{
++ struct rx_ring *rx_ring;
++ int i;
++ u32 value, mask;
++
++ for (i = 0; i < qdev->rss_ring_count; i++) {
++ rx_ring = &qdev->rx_ring[i];
++ if (start_cq)
++ value = CQ_STOP_TYPE_START;
++ else
++ value = CQ_STOP_TYPE_STOP;
++
++ value |= rx_ring->cq_id;
++ mask = CQ_STOP_QUEUE_MASK | CQ_STOP_TYPE_MASK;
++
++ ql_write32(qdev, CQ_STOP, value | mask);
++
++ udelay(1);
++
++ value = CQ_STOP_TYPE_READ;
++ value |= rx_ring->cq_id;
++ mask = CQ_STOP_QUEUE_MASK | CQ_STOP_TYPE_MASK;
++
++ ql_write32(qdev, CQ_STOP, value | mask);
++ udelay(1);
++ /* Read the current setting */
++ value = ql_read32(qdev, CQ_STOP);
++ if (start_cq && !(value & CQ_STOP_EN))
++ QPRINTK(qdev, IFUP, ERR,
++ "%s: Unable to enable completion queue %d\n",
++ qdev->ndev->name, i);
++ else if (!start_cq && (value & CQ_STOP_EN))
++ QPRINTK(qdev, IFUP, ERR,
++ "%s: Unable to disable completion queue %d\n",
++ qdev->ndev->name, i);
++ }
++}
++
++static void ql_quiesce_rx_cq(struct ql_adapter *qdev)
++{
++ volatile struct rx_ring *rx_ring;
++ int i, rings_done, count = 30;
++ u32 prod;
++
++ msleep(1);
++ while (count) {
++ rings_done = 0;
++ for (i = 0; i < qdev->rss_ring_count; i++) {
++ rx_ring = &qdev->rx_ring[i];
++ prod = ql_read_sh_reg(rx_ring->prod_idx_sh_reg);
++ if (prod == rx_ring->cnsmr_idx)
++ rings_done++;
++ }
++ if (rings_done >= qdev->rss_ring_count) {
++ QPRINTK_DBG(qdev, IFUP, ERR, "Rx CQ's quiesced !\n");
++ break;
++ }
++ count--;
++ msleep(1);
++ }
++
++ if (!count) {
++ QPRINTK_DBG(qdev, IFUP, ERR, "Flushing %d queues\n",
++ (qdev->rss_ring_count - rings_done));
++ for (i = 0; i < qdev->rss_ring_count; i++) {
++ rx_ring = &qdev->rx_ring[i];
++ prod = ql_read_sh_reg(rx_ring->prod_idx_sh_reg);
++ if (prod != rx_ring->cnsmr_idx) {
++ count = 0;
++ while (prod != rx_ring->cnsmr_idx) {
++ ql_clean_inbound_rx_ring((struct rx_ring *)
++ rx_ring, 1);
++ count++;
++ if (count > 32) {
++ QPRINTK_DBG(qdev, IFUP, ERR,
++ "Rx CQ %d, prod = %d, cnsmr_idx = %d\n",
++ i, prod, rx_ring->cnsmr_idx);
++ break;
++ }
++ prod = ql_read_sh_reg(rx_ring->prod_idx_sh_reg);
++ }
++ }
++ }
++ }
++}
++
+ static int qlge_change_mtu(struct net_device *ndev, int new_mtu)
+ {
+ struct ql_adapter *qdev = netdev_priv(ndev);
++ int count = 1000;
++ u32 lbq_buf_len;
++ struct rx_ring *rx_ring;
++ int old_mtu;
+
+ if (ndev->mtu == 1500 && new_mtu == 9000) {
+ QPRINTK_DBG(qdev, IFUP, ERR, "Changing to jumbo MTU.\n");
+- queue_delayed_work(qdev->workqueue,
+- &qdev->mpi_port_cfg_work, 0);
+ } else if (ndev->mtu == 9000 && new_mtu == 1500) {
+ QPRINTK_DBG(qdev, IFUP, ERR,
+ "Changing to normal MTU.\n");
+ } else if ((ndev->mtu == 1500 && new_mtu == 1500) ||
+ (ndev->mtu == 9000 && new_mtu == 9000)) {
++ QPRINTK_DBG(qdev, IFUP, ERR,
++ "MTU unchanged.\n");
++ return 0;
+ } else
+ return -EINVAL;
++ old_mtu = ndev->mtu;
+ ndev->mtu = new_mtu;
++
++ if (netif_running(qdev->ndev)) {
++ while (count && !test_bit(QL_ADAPTER_UP, &qdev->flags)) {
++ if (!(count % 100)) {
++ QPRINTK_DBG(qdev, IFUP, ERR,
++ "Waiting %d msec for adapter UP\n",
++ (1000 - count));
++ }
++ count--;
++ }
++ if (!count) {
++ QPRINTK(qdev, IFUP, ERR,
++ "Timed out waiting for adapter UP\n");
++ ndev->mtu = old_mtu;
++ return -ETIMEDOUT;
++ }
++
++ /* Stop Rx completion queues */
++ ql_startstop_all_rx_cq(qdev, 0);
++
++ /* Quiesce receives */
++ ql_quiesce_rx_cq(qdev);
++
++ /* Free current buffers */
++ ql_free_rx_buffers(qdev);
++
++ /* Allocate large buffers to match the MTU size */
++ lbq_buf_len = (ndev->mtu > 1500) ? 16384 : 2048;
++ qdev->lbq_buf_order = get_order(lbq_buf_len);
++
++ /* Reload Rx completino queues */
++ for (count = 0; count < qdev->rx_ring_count; count++) {
++ rx_ring = &qdev->rx_ring[count];
++ if (TX_Q != rx_ring->type)
++ rx_ring->lbq_buf_map_size = (u16)lbq_buf_len;
++ }
++
++ /* Reload Rx completion queues */
++ for (count = 0; count < qdev->rx_ring_count; count++) {
++ rx_ring = &qdev->rx_ring[count];
++ if (TX_Q != rx_ring->type)
++ ql_start_rx_ring(qdev, rx_ring);
++ }
++
++ /* Redo receive buffers */
++ ql_alloc_rx_buffers(qdev);
++
++ /* Restart all completion queues */
++ ql_startstop_all_rx_cq(qdev, 1);
++ }
++
+ return 0;
+ }
+
+@@ -3765,6 +3953,22 @@ static struct net_device_stats *qlge_get
+ *ndev)
+ {
+ struct ql_adapter *qdev = netdev_priv(ndev);
++ struct rx_ring *rx_ring = &qdev->rx_ring[0];
++ int i;
++
++ for (i = 0; i < qdev->rx_ring_count; i++, rx_ring++) {
++ if (rx_ring->type == TX_Q) {
++ qdev->stats.tx_bytes += rx_ring->bytes;
++ rx_ring->bytes = 0;
++ qdev->stats.tx_packets += rx_ring->packets;
++ rx_ring->packets = 0;
++ } else {
++ qdev->stats.rx_bytes += rx_ring->bytes;
++ rx_ring->bytes = 0;
++ qdev->stats.rx_packets += rx_ring->packets;
++ rx_ring->packets = 0;
++ }
++ }
+ return &qdev->stats;
+ }
+
+@@ -4196,7 +4400,9 @@ static int ql_poll(struct net_device *nd
+ work_done = ql_clean_inbound_rx_ring(rx_ring, work_to_do);
+ *budget -= work_done;
+ ndev->quota -= work_done;
+-
++#ifdef NETIF_F_GRO
++ napi_gro_flush(&rx_ring->napi);
++#endif
+ if (work_done >= work_to_do)
+ return 1;
+
+@@ -4297,6 +4503,9 @@ static int __devinit qlge_probe(struct p
+ | NETIF_F_SG
+ | NETIF_F_TSO
+ | NETIF_F_LLTX
++#ifdef NETIF_F_GRO
++ | NETIF_F_GRO
++#endif
+ #ifdef NETIF_F_TSO6
+ | NETIF_F_TSO6
+ #endif
+@@ -4506,6 +4715,7 @@ static int qlge_suspend(struct pci_dev *
+ int err;
+
+ netif_device_detach(ndev);
++ del_timer_sync(&qdev->eeh_timer);
+
+ if (netif_running(ndev)) {
+ err = ql_adapter_down(qdev);
+@@ -4551,6 +4761,7 @@ static int qlge_resume(struct pci_dev *p
+ return err;
+ }
+
++ mod_timer(&qdev->eeh_timer, jiffies + HZ);
+ netif_device_attach(ndev);
+
+ return 0;
diff --git a/series.conf b/series.conf
index 5812d83..89c3bab 100644
--- a/series.conf
+++ b/series.conf
@@ -2106,6 +2106,7 @@
patches.drivers/qlge-add-v1.00.00.19.patch
patches.drivers/qlge-base-eeh-support
+ patches.drivers/qlge-change-large-rx-buffer-logic-rss-hash-and-qlge_sb_pad-for-performance.patch
patches.drivers/mlx4-add-v1.4.1-driver.patch
patches.drivers/mlx4-add-v1.4.1-driver-compat.patch