Home Home > GIT Browse
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPetr Tesarik <ptesarik@suse.cz>2018-11-09 14:09:07 +0100
committerPetr Tesarik <ptesarik@suse.cz>2018-11-09 14:09:07 +0100
commit37d6fef5c5db9eade22df4d87ddef01d323ca271 (patch)
tree119cdc3b7d419c0a3d0f4fdf1cefc8f8218e57ea
parenta2ebfaf431fff34660bdd1c7287564110441ebbe (diff)
parente264e3b43252fb36ca0274ee077845f9de209581 (diff)
Merge branch 'users/tbogendoerfer/SLE15-SP1/for-next' into SLE15-SP1
Pull Infiniband patches from Thomas Bogendoerfer suse-commit: 067873e0095576382d9af2820e466fce78b61a91
-rw-r--r--Documentation/infiniband/user_verbs.txt2
-rw-r--r--Documentation/networking/00-INDEX2
-rw-r--r--Documentation/networking/xfrm_device.txt135
-rw-r--r--MAINTAINERS3
-rw-r--r--arch/mn10300/mm/fault.c2
-rw-r--r--drivers/infiniband/Kconfig22
-rw-r--r--drivers/infiniband/core/Makefile6
-rw-r--r--drivers/infiniband/core/addr.c105
-rw-r--r--drivers/infiniband/core/cache.c573
-rw-r--r--drivers/infiniband/core/cm.c289
-rw-r--r--drivers/infiniband/core/cma.c204
-rw-r--r--drivers/infiniband/core/cma_configfs.c2
-rw-r--r--drivers/infiniband/core/cma_priv.h97
-rw-r--r--drivers/infiniband/core/core_priv.h61
-rw-r--r--drivers/infiniband/core/cq.c16
-rw-r--r--drivers/infiniband/core/device.c33
-rw-r--r--drivers/infiniband/core/fmr_pool.c12
-rw-r--r--drivers/infiniband/core/iwcm.c3
-rw-r--r--drivers/infiniband/core/iwpm_util.c5
-rw-r--r--drivers/infiniband/core/mad.c1
-rw-r--r--drivers/infiniband/core/netlink.c10
-rw-r--r--drivers/infiniband/core/nldev.c704
-rw-r--r--drivers/infiniband/core/rdma_core.c13
-rw-r--r--drivers/infiniband/core/restrack.c222
-rw-r--r--drivers/infiniband/core/roce_gid_mgmt.c39
-rw-r--r--drivers/infiniband/core/rw.c24
-rw-r--r--drivers/infiniband/core/sa_query.c204
-rw-r--r--drivers/infiniband/core/security.c10
-rw-r--r--drivers/infiniband/core/sysfs.c53
-rw-r--r--drivers/infiniband/core/ucm.c91
-rw-r--r--drivers/infiniband/core/ucma.c81
-rw-r--r--drivers/infiniband/core/user_mad.c123
-rw-r--r--drivers/infiniband/core/uverbs.h64
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c514
-rw-r--r--drivers/infiniband/core/uverbs_ioctl.c73
-rw-r--r--drivers/infiniband/core/uverbs_ioctl_merge.c4
-rw-r--r--drivers/infiniband/core/uverbs_main.c316
-rw-r--r--drivers/infiniband/core/uverbs_marshall.c10
-rw-r--r--drivers/infiniband/core/uverbs_std_types.c331
-rw-r--r--drivers/infiniband/core/uverbs_std_types_cq.c210
-rw-r--r--drivers/infiniband/core/uverbs_std_types_dm.c108
-rw-r--r--drivers/infiniband/core/uverbs_std_types_flow_action.c435
-rw-r--r--drivers/infiniband/core/uverbs_std_types_mr.c147
-rw-r--r--drivers/infiniband/core/verbs.c434
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c86
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.h6
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c4
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c8
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.c4
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.c27
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.c7
-rw-r--r--drivers/infiniband/hw/cxgb3/Kconfig9
-rw-r--r--drivers/infiniband/hw/cxgb3/Makefile2
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_dbg.c206
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.h9
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c6
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cq.c6
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c5
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c3
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c10
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c34
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h4
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c1
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c6
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h4
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c15
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h2
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c50
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c150
-rw-r--r--drivers/infiniband/hw/hfi1/firmware.c19
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h53
-rw-r--r--drivers/infiniband/hw/hfi1/init.c25
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c9
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c15
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c1
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c8
-rw-r--r--drivers/infiniband/hw/hfi1/ruc.c12
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c32
-rw-r--r--drivers/infiniband/hw/hfi1/trace.c8
-rw-r--r--drivers/infiniband/hw/hfi1/trace_ibhdrs.h16
-rw-r--r--drivers/infiniband/hw/hfi1/trace_rx.h30
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c2
-rw-r--r--drivers/infiniband/hw/hfi1/ud.c4
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.c4
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c92
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.h29
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c16
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_ah.c22
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c28
-rw-r--r--drivers/infiniband/hw/i40iw/Kconfig1
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw.h8
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c145
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.h9
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_ctrl.c19
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_d.h12
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_hw.c4
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_main.c32
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_puda.c18
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_puda.h2
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_type.h3
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_uk.c49
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_user.h24
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_utils.c65
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c59
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c18
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c6
-rw-r--r--drivers/infiniband/hw/mlx4/main.c114
-rw-r--r--drivers/infiniband/hw/mlx4/mcg.c1
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h6
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c3
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c50
-rw-r--r--drivers/infiniband/hw/mlx5/Kconfig1
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile1
-rw-r--r--drivers/infiniband/hw/mlx5/ah.c4
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c104
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h4
-rw-r--r--drivers/infiniband/hw/mlx5/cong.c83
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c112
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.c192
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.h72
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c23
-rw-r--r--drivers/infiniband/hw/mlx5/main.c2280
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h237
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c228
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c15
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c726
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c10
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c7
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_user.h112
-rw-r--r--drivers/infiniband/hw/nes/nes.c33
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c11
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.h3
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.c7
-rw-r--r--drivers/infiniband/hw/nes/nes_nic.c11
-rw-r--r--drivers/infiniband/hw/nes/nes_utils.c13
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c13
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c23
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c55
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c4
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_stats.c8
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c48
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.h12
-rw-r--r--drivers/infiniband/hw/qedr/main.c5
-rw-r--r--drivers/infiniband/hw/qedr/qedr_roce_cm.c16
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c127
-rw-r--r--drivers/infiniband/hw/qedr/verbs.h6
-rw-r--r--drivers/infiniband/hw/qib/qib.h37
-rw-r--r--drivers/infiniband/hw/qib/qib_7220.h1
-rw-r--r--drivers/infiniband/hw/qib/qib_diag.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_driver.c24
-rw-r--r--drivers/infiniband/hw/qib/qib_eeprom.c3
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c85
-rw-r--r--drivers/infiniband/hw/qib/qib_iba6120.c62
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7220.c75
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c189
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_keys.c235
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c14
-rw-r--r--drivers/infiniband/hw/qib/qib_pcie.c128
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c5
-rw-r--r--drivers/infiniband/hw/qib/qib_ruc.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_sd7220.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_sdma.c26
-rw-r--r--drivers/infiniband/hw/qib/qib_tx.c4
-rw-r--r--drivers/infiniband/hw/qib/qib_uc.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c7
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_main.c1
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h25
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_sysfs.c2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c26
-rw-r--r--drivers/infiniband/hw/usnic/usnic_transport.c4
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma.h4
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c13
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c53
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c15
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c13
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.c5
-rw-r--r--drivers/infiniband/sw/rdmavt/mcast.c4
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.c2
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c20
-rw-r--r--drivers/infiniband/sw/rdmavt/trace.h4
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.c15
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.h7
-rw-r--r--drivers/infiniband/sw/rxe/Kconfig4
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c10
-rw-r--r--drivers/infiniband/sw/rxe/rxe.h12
-rw-r--r--drivers/infiniband/sw/rxe/rxe_av.c19
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c4
-rw-r--r--drivers/infiniband/sw/rxe/rxe_cq.c15
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h30
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c74
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.h1
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c50
-rw-r--r--drivers/infiniband/sw/rxe/rxe_queue.c24
-rw-r--r--drivers/infiniband/sw/rxe/rxe_queue.h5
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c17
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c18
-rw-r--r--drivers/infiniband/sw/rxe/rxe_srq.c44
-rw-r--r--drivers/infiniband/sw/rxe/rxe_task.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c115
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h5
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c18
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ethtool.c5
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c11
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c81
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c6
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c16
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c2
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c14
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c41
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h22
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h7
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c44
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c3
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c14
-rw-r--r--drivers/infiniband/ulp/srp/Kconfig2
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c909
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h46
-rw-r--r--drivers/infiniband/ulp/srpt/Kconfig2
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c1535
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.h114
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/catas.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/cq.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_ethtool.c42
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_main.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c14
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_port.c38
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c95
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_tx.c46
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/icm.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/intf.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c84
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_en.h11
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h10
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/qp.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/srq.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c59
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h98
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/alloc.c37
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cq.c115
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/dev.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h252
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c310
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h24
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c41
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_common.c14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c267
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_dim.c48
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c545
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs.c274
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c1046
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c180
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.h14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c337
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c343
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c1277
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.h303
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c574
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c55
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c121
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c157
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h71
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c354
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c1290
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h76
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c217
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h72
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c1021
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.h24
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw.c31
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c65
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c56
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c68
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h38
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/port.c175
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/qp.c125
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/rl.c63
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/transobj.c239
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vport.c84
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/wq.c18
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/wq.h22
-rw-r--r--drivers/pci/pci-sysfs.c175
-rw-r--r--drivers/pci/pci.c174
-rw-r--r--drivers/pci/pci.h21
-rw-r--r--drivers/pci/probe.c2
-rw-r--r--drivers/pci/slot.c1
-rw-r--r--include/asm-generic/bug.h2
-rw-r--r--include/linux/mlx4/cq.h3
-rw-r--r--include/linux/mlx4/device.h12
-rw-r--r--include/linux/mlx5/accel.h144
-rw-r--r--include/linux/mlx5/cq.h33
-rw-r--r--include/linux/mlx5/device.h66
-rw-r--r--include/linux/mlx5/driver.h159
-rw-r--r--include/linux/mlx5/eswitch.h58
-rw-r--r--include/linux/mlx5/fs.h12
-rw-r--r--include/linux/mlx5/fs_helpers.h142
-rw-r--r--include/linux/mlx5/mlx5_ifc.h368
-rw-r--r--include/linux/mlx5/mlx5_ifc_fpga.h92
-rw-r--r--include/linux/mlx5/port.h11
-rw-r--r--include/linux/mlx5/qp.h13
-rw-r--r--include/linux/mlx5/transobj.h24
-rw-r--r--include/linux/mlx5/vport.h7
-rw-r--r--include/linux/net_dim.h380
-rw-r--r--include/linux/netdevice.h36
-rw-r--r--include/linux/pci.h5
-rw-r--r--include/net/xfrm.h60
-rw-r--r--include/rdma/ib_addr.h50
-rw-r--r--include/rdma/ib_cache.h29
-rw-r--r--include/rdma/ib_hdrs.h10
-rw-r--r--include/rdma/ib_sa.h35
-rw-r--r--include/rdma/ib_umem.h1
-rw-r--r--include/rdma/ib_verbs.h298
-rw-r--r--include/rdma/rdma_cm.h44
-rw-r--r--include/rdma/rdma_vt.h33
-rw-r--r--include/rdma/rdmavt_qp.h6
-rw-r--r--include/rdma/restrack.h177
-rw-r--r--include/rdma/uverbs_ioctl.h192
-rw-r--r--include/rdma/uverbs_named_ioctl.h90
-rw-r--r--include/rdma/uverbs_std_types.h34
-rw-r--r--include/scsi/srp.h17
-rw-r--r--include/uapi/linux/dcbnl.h1
-rw-r--r--include/uapi/linux/ethtool.h4
-rw-r--r--include/uapi/linux/pci_regs.h8
-rw-r--r--include/uapi/rdma/bnxt_re-abi.h21
-rw-r--r--include/uapi/rdma/cxgb3-abi.h17
-rw-r--r--include/uapi/rdma/cxgb4-abi.h24
-rw-r--r--include/uapi/rdma/hfi/hfi1_ioctl.h32
-rw-r--r--include/uapi/rdma/hfi/hfi1_user.h10
-rw-r--r--include/uapi/rdma/hns-abi.h14
-rw-r--r--include/uapi/rdma/i40iw-abi.h (renamed from drivers/infiniband/hw/i40iw/i40iw_ucontext.h)16
-rw-r--r--include/uapi/rdma/ib_user_cm.h48
-rw-r--r--include/uapi/rdma/ib_user_ioctl_cmds.h134
-rw-r--r--include/uapi/rdma/ib_user_ioctl_verbs.h96
-rw-r--r--include/uapi/rdma/ib_user_mad.h4
-rw-r--r--include/uapi/rdma/ib_user_verbs.h228
-rw-r--r--include/uapi/rdma/mlx4-abi.h46
-rw-r--r--include/uapi/rdma/mlx5-abi.h161
-rw-r--r--include/uapi/rdma/mlx5_user_ioctl_cmds.h48
-rw-r--r--include/uapi/rdma/mlx5_user_ioctl_verbs.h43
-rw-r--r--include/uapi/rdma/mthca-abi.h10
-rw-r--r--include/uapi/rdma/nes-abi.h6
-rw-r--r--include/uapi/rdma/ocrdma-abi.h36
-rw-r--r--include/uapi/rdma/qedr-abi.h20
-rw-r--r--include/uapi/rdma/rdma_netlink.h112
-rw-r--r--include/uapi/rdma/rdma_user_cm.h49
-rw-r--r--include/uapi/rdma/rdma_user_ioctl.h38
-rw-r--r--include/uapi/rdma/rdma_user_ioctl_cmds.h99
-rw-r--r--include/uapi/rdma/rdma_user_rxe.h58
-rw-r--r--include/uapi/rdma/vmw_pvrdma-abi.h61
-rw-r--r--kernel/panic.c2
-rw-r--r--lib/bug.c2
-rw-r--r--lib/kobject.c2
-rw-r--r--net/core/dev.c19
-rw-r--r--net/core/ethtool.c6
-rw-r--r--net/core/sock.c2
-rw-r--r--net/ipv4/esp4.c88
-rw-r--r--net/ipv4/esp4_offload.c80
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c5
-rw-r--r--net/ipv6/esp6.c85
-rw-r--r--net/ipv6/esp6_offload.c87
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c5
-rw-r--r--net/packet/af_packet.c3
-rw-r--r--net/sched/sch_generic.c16
-rw-r--r--net/xfrm/xfrm_device.c201
-rw-r--r--net/xfrm/xfrm_input.c5
-rw-r--r--net/xfrm/xfrm_replay.c2
-rw-r--r--net/xfrm/xfrm_state.c17
-rw-r--r--net/xfrm/xfrm_user.c2
388 files changed, 22936 insertions, 9856 deletions
diff --git a/Documentation/infiniband/user_verbs.txt b/Documentation/infiniband/user_verbs.txt
index e5092d696da2..df049b9f5b6e 100644
--- a/Documentation/infiniband/user_verbs.txt
+++ b/Documentation/infiniband/user_verbs.txt
@@ -5,7 +5,7 @@ USERSPACE VERBS ACCESS
described in chapter 11 of the InfiniBand Architecture Specification.
To use the verbs, the libibverbs library, available from
- http://www.openfabrics.org/, is required. libibverbs contains a
+ https://github.com/linux-rdma/rdma-core, is required. libibverbs contains a
device-independent API for using the ib_uverbs interface.
libibverbs also requires appropriate device-dependent kernel and
userspace driver for your InfiniBand hardware. For example, to use
diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX
index c6beb5f1637f..c8003bd3228f 100644
--- a/Documentation/networking/00-INDEX
+++ b/Documentation/networking/00-INDEX
@@ -230,6 +230,8 @@ x25.txt
- general info on X.25 development.
x25-iface.txt
- description of the X.25 Packet Layer to LAPB device interface.
+xfrm_device.txt
+ - description of XFRM offload API
xfrm_proc.txt
- description of the statistics package for XFRM.
xfrm_sync.txt
diff --git a/Documentation/networking/xfrm_device.txt b/Documentation/networking/xfrm_device.txt
new file mode 100644
index 000000000000..50c34ca65efe
--- /dev/null
+++ b/Documentation/networking/xfrm_device.txt
@@ -0,0 +1,135 @@
+
+===============================================
+XFRM device - offloading the IPsec computations
+===============================================
+Shannon Nelson <shannon.nelson@oracle.com>
+
+
+Overview
+========
+
+IPsec is a useful feature for securing network traffic, but the
+computational cost is high: a 10Gbps link can easily be brought down
+to under 1Gbps, depending on the traffic and link configuration.
+Luckily, there are NICs that offer a hardware based IPsec offload which
+can radically increase throughput and decrease CPU utilization. The XFRM
+Device interface allows NIC drivers to offer to the stack access to the
+hardware offload.
+
+Userland access to the offload is typically through a system such as
+libreswan or KAME/raccoon, but the iproute2 'ip xfrm' command set can
+be handy when experimenting. An example command might look something
+like this:
+
+ ip x s add proto esp dst 14.0.0.70 src 14.0.0.52 spi 0x07 mode transport \
+ reqid 0x07 replay-window 32 \
+ aead 'rfc4106(gcm(aes))' 0x44434241343332312423222114131211f4f3f2f1 128 \
+ sel src 14.0.0.52/24 dst 14.0.0.70/24 proto tcp \
+ offload dev eth4 dir in
+
+Yes, that's ugly, but that's what shell scripts and/or libreswan are for.
+
+
+
+Callbacks to implement
+======================
+
+/* from include/linux/netdevice.h */
+struct xfrmdev_ops {
+ int (*xdo_dev_state_add) (struct xfrm_state *x);
+ void (*xdo_dev_state_delete) (struct xfrm_state *x);
+ void (*xdo_dev_state_free) (struct xfrm_state *x);
+ bool (*xdo_dev_offload_ok) (struct sk_buff *skb,
+ struct xfrm_state *x);
+ void (*xdo_dev_state_advance_esn) (struct xfrm_state *x);
+};
+
+The NIC driver offering ipsec offload will need to implement these
+callbacks to make the offload available to the network stack's
+XFRM subsytem. Additionally, the feature bits NETIF_F_HW_ESP and
+NETIF_F_HW_ESP_TX_CSUM will signal the availability of the offload.
+
+
+
+Flow
+====
+
+At probe time and before the call to register_netdev(), the driver should
+set up local data structures and XFRM callbacks, and set the feature bits.
+The XFRM code's listener will finish the setup on NETDEV_REGISTER.
+
+ adapter->netdev->xfrmdev_ops = &ixgbe_xfrmdev_ops;
+ adapter->netdev->features |= NETIF_F_HW_ESP;
+ adapter->netdev->hw_enc_features |= NETIF_F_HW_ESP;
+
+When new SAs are set up with a request for "offload" feature, the
+driver's xdo_dev_state_add() will be given the new SA to be offloaded
+and an indication of whether it is for Rx or Tx. The driver should
+ - verify the algorithm is supported for offloads
+ - store the SA information (key, salt, target-ip, protocol, etc)
+ - enable the HW offload of the SA
+
+The driver can also set an offload_handle in the SA, an opaque void pointer
+that can be used to convey context into the fast-path offload requests.
+
+ xs->xso.offload_handle = context;
+
+
+When the network stack is preparing an IPsec packet for an SA that has
+been setup for offload, it first calls into xdo_dev_offload_ok() with
+the skb and the intended offload state to ask the driver if the offload
+will serviceable. This can check the packet information to be sure the
+offload can be supported (e.g. IPv4 or IPv6, no IPv4 options, etc) and
+return true of false to signify its support.
+
+When ready to send, the driver needs to inspect the Tx packet for the
+offload information, including the opaque context, and set up the packet
+send accordingly.
+
+ xs = xfrm_input_state(skb);
+ context = xs->xso.offload_handle;
+ set up HW for send
+
+The stack has already inserted the appropriate IPsec headers in the
+packet data, the offload just needs to do the encryption and fix up the
+header values.
+
+
+When a packet is received and the HW has indicated that it offloaded a
+decryption, the driver needs to add a reference to the decoded SA into
+the packet's skb. At this point the data should be decrypted but the
+IPsec headers are still in the packet data; they are removed later up
+the stack in xfrm_input().
+
+ find and hold the SA that was used to the Rx skb
+ get spi, protocol, and destination IP from packet headers
+ xs = find xs from (spi, protocol, dest_IP)
+ xfrm_state_hold(xs);
+
+ store the state information into the skb
+ skb->sp = secpath_dup(skb->sp);
+ skb->sp->xvec[skb->sp->len++] = xs;
+ skb->sp->olen++;
+
+ indicate the success and/or error status of the offload
+ xo = xfrm_offload(skb);
+ xo->flags = CRYPTO_DONE;
+ xo->status = crypto_status;
+
+ hand the packet to napi_gro_receive() as usual
+
+In ESN mode, xdo_dev_state_advance_esn() is called from xfrm_replay_advance_esn().
+Driver will check packet seq number and update HW ESN state machine if needed.
+
+When the SA is removed by the user, the driver's xdo_dev_state_delete()
+is asked to disable the offload. Later, xdo_dev_state_free() is called
+from a garbage collection routine after all reference counts to the state
+have been removed and any remaining resources can be cleared for the
+offload state. How these are used by the driver will depend on specific
+hardware needs.
+
+As a netdev is set to DOWN the XFRM stack's netdev listener will call
+xdo_dev_state_delete() and xdo_dev_state_free() on any remaining offloaded
+states.
+
+
diff --git a/MAINTAINERS b/MAINTAINERS
index 327f2807e091..290a3f8d900c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6582,7 +6582,7 @@ M: Doug Ledford <dledford@redhat.com>
M: Sean Hefty <sean.hefty@intel.com>
M: Hal Rosenstock <hal.rosenstock@gmail.com>
L: linux-rdma@vger.kernel.org
-W: http://www.openfabrics.org/
+W: https://github.com/linux-rdma/rdma-core
Q: http://patchwork.kernel.org/project/linux-rdma/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma.git
S: Supported
@@ -6760,6 +6760,7 @@ M: Shiraz Saleem <shiraz.saleem@intel.com>
L: linux-rdma@vger.kernel.org
S: Supported
F: drivers/infiniband/hw/i40iw/
+F: include/uapi/rdma/i40iw-abi.h
INTEL MERRIFIELD GPIO DRIVER
M: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
diff --git a/arch/mn10300/mm/fault.c b/arch/mn10300/mm/fault.c
index f23781d6bbb3..f0bfa1448744 100644
--- a/arch/mn10300/mm/fault.c
+++ b/arch/mn10300/mm/fault.c
@@ -60,7 +60,7 @@ void bust_spinlocks(int yes)
void do_BUG(const char *file, int line)
{
bust_spinlocks(1);
- printk(KERN_EMERG "------------[ cut here ]------------\n");
+ printk(KERN_EMERG CUT_HERE);
printk(KERN_EMERG "kernel BUG at %s:%d!\n", file, line);
}
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index cbf186522016..2a972ed6851b 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -4,6 +4,7 @@ menuconfig INFINIBAND
depends on NET
depends on INET
depends on m || IPV6 != m
+ depends on !ALPHA
select IRQ_POLL
---help---
Core support for InfiniBand (IB). Make sure to also select
@@ -19,7 +20,8 @@ config INFINIBAND_USER_MAD
Userspace InfiniBand Management Datagram (MAD) support. This
is the kernel side of the userspace MAD support, which allows
userspace processes to send and receive MADs. You will also
- need libibumad from <http://www.openfabrics.org/downloads/management/>.
+ need libibumad from rdma-core
+ <https://github.com/linux-rdma/rdma-core>.
config INFINIBAND_USER_ACCESS
tristate "InfiniBand userspace access (verbs and CM)"
@@ -31,16 +33,15 @@ config INFINIBAND_USER_ACCESS
to set up connections and directly access InfiniBand
hardware for fast-path operations. You will also need
libibverbs, libibcm and a hardware driver library from
- <http://www.openfabrics.org/git/>.
+ rdma-core <https://github.com/linux-rdma/rdma-core>.
-config INFINIBAND_EXP_USER_ACCESS
- bool "Allow experimental support for Infiniband ABI"
+config INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI
+ bool "Allow experimental legacy verbs in new ioctl uAPI (EXPERIMENTAL)"
depends on INFINIBAND_USER_ACCESS
---help---
- IOCTL based ABI support for Infiniband. This allows userspace
- to invoke the experimental IOCTL based ABI.
- These commands are parsed via per-device parsing tree and
- enables per-device features.
+ IOCTL based uAPI support for Infiniband is enabled by default for
+ new verbs only. This allows userspace to invoke the IOCTL based uAPI
+ for current legacy verbs too.
config INFINIBAND_USER_MEM
bool
@@ -60,9 +61,12 @@ config INFINIBAND_ON_DEMAND_PAGING
pages on demand instead.
config INFINIBAND_ADDR_TRANS
- bool
+ bool "RDMA/CM"
depends on INFINIBAND
default y
+ ---help---
+ Support for RDMA communication manager (CM).
+ This allows for a generic connection abstraction over RDMA.
config INFINIBAND_ADDR_TRANS_CONFIGFS
bool
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index b4df164f71a6..728bbed28122 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -11,7 +11,7 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
device.o fmr_pool.o cache.o netlink.o \
roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \
multicast.o mad.o smi.o agent.o mad_rmpp.o \
- security.o nldev.o
+ security.o nldev.o restrack.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o
@@ -33,4 +33,6 @@ ib_ucm-y := ucm.o
ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
rdma_core.o uverbs_std_types.o uverbs_ioctl.o \
- uverbs_ioctl_merge.o
+ uverbs_ioctl_merge.o uverbs_std_types_cq.o \
+ uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
+ uverbs_std_types_mr.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 0547b66718b7..88a7542d8c7b 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -245,8 +245,9 @@ void rdma_addr_unregister_client(struct rdma_addr_client *client)
}
EXPORT_SYMBOL(rdma_addr_unregister_client);
-int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
- const unsigned char *dst_dev_addr)
+void rdma_copy_addr(struct rdma_dev_addr *dev_addr,
+ const struct net_device *dev,
+ const unsigned char *dst_dev_addr)
{
dev_addr->dev_type = dev->type;
memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
@@ -254,24 +255,21 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
if (dst_dev_addr)
memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
dev_addr->bound_dev_if = dev->ifindex;
- return 0;
}
EXPORT_SYMBOL(rdma_copy_addr);
int rdma_translate_ip(const struct sockaddr *addr,
- struct rdma_dev_addr *dev_addr,
- u16 *vlan_id)
+ struct rdma_dev_addr *dev_addr)
{
struct net_device *dev;
- int ret = -EADDRNOTAVAIL;
if (dev_addr->bound_dev_if) {
dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
if (!dev)
return -ENODEV;
- ret = rdma_copy_addr(dev_addr, dev, NULL);
+ rdma_copy_addr(dev_addr, dev, NULL);
dev_put(dev);
- return ret;
+ return 0;
}
switch (addr->sa_family) {
@@ -280,12 +278,9 @@ int rdma_translate_ip(const struct sockaddr *addr,
((const struct sockaddr_in *)addr)->sin_addr.s_addr);
if (!dev)
- return ret;
+ return -EADDRNOTAVAIL;
- ret = rdma_copy_addr(dev_addr, dev, NULL);
- dev_addr->bound_dev_if = dev->ifindex;
- if (vlan_id)
- *vlan_id = rdma_vlan_dev_vlan_id(dev);
+ rdma_copy_addr(dev_addr, dev, NULL);
dev_put(dev);
break;
#if IS_ENABLED(CONFIG_IPV6)
@@ -295,10 +290,7 @@ int rdma_translate_ip(const struct sockaddr *addr,
if (ipv6_chk_addr(dev_addr->net,
&((const struct sockaddr_in6 *)addr)->sin6_addr,
dev, 1)) {
- ret = rdma_copy_addr(dev_addr, dev, NULL);
- dev_addr->bound_dev_if = dev->ifindex;
- if (vlan_id)
- *vlan_id = rdma_vlan_dev_vlan_id(dev);
+ rdma_copy_addr(dev_addr, dev, NULL);
break;
}
}
@@ -306,7 +298,7 @@ int rdma_translate_ip(const struct sockaddr *addr,
break;
#endif
}
- return ret;
+ return 0;
}
EXPORT_SYMBOL(rdma_translate_ip);
@@ -337,7 +329,8 @@ static void queue_req(struct addr_req *req)
mutex_unlock(&lock);
}
-static int ib_nl_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
+static int ib_nl_fetch_ha(const struct dst_entry *dst,
+ struct rdma_dev_addr *dev_addr,
const void *daddr, u32 seq, u16 family)
{
if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS))
@@ -348,11 +341,12 @@ static int ib_nl_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
return ib_nl_ip_send_msg(dev_addr, daddr, seq, family);
}
-static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
+static int dst_fetch_ha(const struct dst_entry *dst,
+ struct rdma_dev_addr *dev_addr,
const void *daddr)
{
struct neighbour *n;
- int ret;
+ int ret = 0;
n = dst_neigh_lookup(dst, daddr);
@@ -362,7 +356,7 @@ static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
neigh_event_send(n, NULL);
ret = -ENODATA;
} else {
- ret = rdma_copy_addr(dev_addr, dst->dev, n->ha);
+ rdma_copy_addr(dev_addr, dst->dev, n->ha);
}
rcu_read_unlock();
@@ -372,7 +366,7 @@ static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
return ret;
}
-static bool has_gateway(struct dst_entry *dst, sa_family_t family)
+static bool has_gateway(const struct dst_entry *dst, sa_family_t family)
{
struct rtable *rt;
struct rt6_info *rt6;
@@ -386,7 +380,7 @@ static bool has_gateway(struct dst_entry *dst, sa_family_t family)
return rt6->rt6i_flags & RTF_GATEWAY;
}
-static int fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
+static int fetch_ha(const struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
const struct sockaddr *dst_in, u32 seq)
{
const struct sockaddr_in *dst_in4 =
@@ -490,7 +484,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
}
#endif
-static int addr_resolve_neigh(struct dst_entry *dst,
+static int addr_resolve_neigh(const struct dst_entry *dst,
const struct sockaddr *dst_in,
struct rdma_dev_addr *addr,
u32 seq)
@@ -498,7 +492,7 @@ static int addr_resolve_neigh(struct dst_entry *dst,
if (dst->dev->flags & IFF_LOOPBACK) {
int ret;
- ret = rdma_translate_ip(dst_in, addr, NULL);
+ ret = rdma_translate_ip(dst_in, addr);
if (!ret)
memcpy(addr->dst_dev_addr, addr->src_dev_addr,
MAX_ADDR_LEN);
@@ -510,7 +504,9 @@ static int addr_resolve_neigh(struct dst_entry *dst,
if (!(dst->dev->flags & IFF_NOARP))
return fetch_ha(dst, addr, dst_in, seq);
- return rdma_copy_addr(addr, dst->dev, NULL);
+ rdma_copy_addr(addr, dst->dev, NULL);
+
+ return 0;
}
static int addr_resolve(struct sockaddr *src_in,
@@ -574,7 +570,7 @@ static int addr_resolve(struct sockaddr *src_in,
if (ndev) {
if (ndev->flags & IFF_LOOPBACK)
- ret = rdma_translate_ip(dst_in, addr, NULL);
+ ret = rdma_translate_ip(dst_in, addr);
else
addr->bound_dev_if = ndev->ifindex;
dev_put(ndev);
@@ -742,7 +738,6 @@ int rdma_resolve_ip_route(struct sockaddr *src_addr,
return addr_resolve(src_in, dst_addr, addr, false, 0);
}
-EXPORT_SYMBOL(rdma_resolve_ip_route);
void rdma_addr_cancel(struct rdma_dev_addr *addr)
{
@@ -763,7 +758,6 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr)
EXPORT_SYMBOL(rdma_addr_cancel);
struct resolve_cb_context {
- struct rdma_dev_addr *addr;
struct completion comp;
int status;
};
@@ -771,39 +765,31 @@ struct resolve_cb_context {
static void resolve_cb(int status, struct sockaddr *src_addr,
struct rdma_dev_addr *addr, void *context)
{
- if (!status)
- memcpy(((struct resolve_cb_context *)context)->addr,
- addr, sizeof(struct rdma_dev_addr));
((struct resolve_cb_context *)context)->status = status;
complete(&((struct resolve_cb_context *)context)->comp);
}
int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
const union ib_gid *dgid,
- u8 *dmac, u16 *vlan_id, int *if_index,
+ u8 *dmac, const struct net_device *ndev,
int *hoplimit)
{
- int ret = 0;
struct rdma_dev_addr dev_addr;
struct resolve_cb_context ctx;
- struct net_device *dev;
-
union {
struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
struct sockaddr_in6 _sockaddr_in6;
} sgid_addr, dgid_addr;
-
+ int ret;
rdma_gid2ip(&sgid_addr._sockaddr, sgid);
rdma_gid2ip(&dgid_addr._sockaddr, dgid);
memset(&dev_addr, 0, sizeof(dev_addr));
- if (if_index)
- dev_addr.bound_dev_if = *if_index;
+ dev_addr.bound_dev_if = ndev->ifindex;
dev_addr.net = &init_net;
- ctx.addr = &dev_addr;
init_completion(&ctx.comp);
ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
&dev_addr, 1000, resolve_cb, &ctx);
@@ -817,42 +803,9 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
return ret;
memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
- dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
- if (!dev)
- return -ENODEV;
- if (if_index)
- *if_index = dev_addr.bound_dev_if;
- if (vlan_id)
- *vlan_id = rdma_vlan_dev_vlan_id(dev);
- if (hoplimit)
- *hoplimit = dev_addr.hoplimit;
- dev_put(dev);
- return ret;
-}
-EXPORT_SYMBOL(rdma_addr_find_l2_eth_by_grh);
-
-int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
-{
- int ret = 0;
- struct rdma_dev_addr dev_addr;
- union {
- struct sockaddr _sockaddr;
- struct sockaddr_in _sockaddr_in;
- struct sockaddr_in6 _sockaddr_in6;
- } gid_addr;
-
- rdma_gid2ip(&gid_addr._sockaddr, sgid);
-
- memset(&dev_addr, 0, sizeof(dev_addr));
- dev_addr.net = &init_net;
- ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
- if (ret)
- return ret;
-
- memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN);
- return ret;
+ *hoplimit = dev_addr.hoplimit;
+ return 0;
}
-EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid);
static int netevent_callback(struct notifier_block *self, unsigned long event,
void *ctx)
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 7ebb4924fa85..ecc55e98ddd3 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -59,8 +59,6 @@ struct ib_update_work {
union ib_gid zgid;
EXPORT_SYMBOL(zgid);
-static const struct ib_gid_attr zattr;
-
enum gid_attr_find_mask {
GID_ATTR_FIND_MASK_GID = 1UL << 0,
GID_ATTR_FIND_MASK_NETDEV = 1UL << 1,
@@ -73,15 +71,6 @@ enum gid_table_entry_props {
GID_TABLE_ENTRY_DEFAULT = 1UL << 1,
};
-enum gid_table_write_action {
- GID_TABLE_WRITE_ACTION_ADD,
- GID_TABLE_WRITE_ACTION_DEL,
- /* MODIFY only updates the GID table. Currently only used by
- * ib_cache_update.
- */
- GID_TABLE_WRITE_ACTION_MODIFY
-};
-
struct ib_gid_table_entry {
unsigned long props;
union ib_gid gid;
@@ -100,16 +89,13 @@ struct ib_gid_table {
* (a) Find the GID
* (b) Delete it.
*
- * Add/delete should be carried out atomically.
- * This is done by locking this mutex from multiple
- * writers. We don't need this lock for IB, as the MAD
- * layer replaces all entries. All data_vec entries
- * are locked by this lock.
**/
- struct mutex lock;
- /* This lock protects the table entries from being
- * read and written simultaneously.
+ /* Any writer to data_vec must hold this lock and the write side of
+ * rwlock. readers must hold only rwlock. All writers must be in a
+ * sleepable context.
*/
+ struct mutex lock;
+ /* rwlock protects data_vec[ix]->props. */
rwlock_t rwlock;
struct ib_gid_table_entry *data_vec;
};
@@ -163,94 +149,127 @@ int ib_cache_gid_parse_type_str(const char *buf)
}
EXPORT_SYMBOL(ib_cache_gid_parse_type_str);
-/* This function expects that rwlock will be write locked in all
- * scenarios and that lock will be locked in sleep-able (RoCE)
- * scenarios.
- */
-static int write_gid(struct ib_device *ib_dev, u8 port,
- struct ib_gid_table *table, int ix,
- const union ib_gid *gid,
- const struct ib_gid_attr *attr,
- enum gid_table_write_action action,
- bool default_gid)
- __releases(&table->rwlock) __acquires(&table->rwlock)
+static void del_roce_gid(struct ib_device *device, u8 port_num,
+ struct ib_gid_table *table, int ix)
{
- int ret = 0;
- struct net_device *old_net_dev;
- enum ib_gid_type old_gid_type;
+ pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
+ device->name, port_num, ix,
+ table->data_vec[ix].gid.raw);
+
+ if (rdma_cap_roce_gid_table(device, port_num))
+ device->del_gid(&table->data_vec[ix].attr,
+ &table->data_vec[ix].context);
+ dev_put(table->data_vec[ix].attr.ndev);
+}
- /* in rdma_cap_roce_gid_table, this funciton should be protected by a
- * sleep-able lock.
- */
+static int add_roce_gid(struct ib_gid_table *table,
+ const union ib_gid *gid,
+ const struct ib_gid_attr *attr)
+{
+ struct ib_gid_table_entry *entry;
+ int ix = attr->index;
+ int ret = 0;
- if (rdma_cap_roce_gid_table(ib_dev, port)) {
- table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
- write_unlock_irq(&table->rwlock);
- /* GID_TABLE_WRITE_ACTION_MODIFY currently isn't supported by
- * RoCE providers and thus only updates the cache.
- */
- if (action == GID_TABLE_WRITE_ACTION_ADD)
- ret = ib_dev->add_gid(ib_dev, port, ix, gid, attr,
- &table->data_vec[ix].context);
- else if (action == GID_TABLE_WRITE_ACTION_DEL)
- ret = ib_dev->del_gid(ib_dev, port, ix,
- &table->data_vec[ix].context);
- write_lock_irq(&table->rwlock);
+ if (!attr->ndev) {
+ pr_err("%s NULL netdev device=%s port=%d index=%d\n",
+ __func__, attr->device->name, attr->port_num,
+ attr->index);
+ return -EINVAL;
}
- old_net_dev = table->data_vec[ix].attr.ndev;
- old_gid_type = table->data_vec[ix].attr.gid_type;
- if (old_net_dev && old_net_dev != attr->ndev)
- dev_put(old_net_dev);
- /* if modify_gid failed, just delete the old gid */
- if (ret || action == GID_TABLE_WRITE_ACTION_DEL) {
- gid = &zgid;
- attr = &zattr;
- table->data_vec[ix].context = NULL;
+ entry = &table->data_vec[ix];
+ if ((entry->props & GID_TABLE_ENTRY_INVALID) == 0) {
+ WARN(1, "GID table corruption device=%s port=%d index=%d\n",
+ attr->device->name, attr->port_num,
+ attr->index);
+ return -EINVAL;
}
- memcpy(&table->data_vec[ix].gid, gid, sizeof(*gid));
- memcpy(&table->data_vec[ix].attr, attr, sizeof(*attr));
- if (default_gid) {
- table->data_vec[ix].props |= GID_TABLE_ENTRY_DEFAULT;
- if (action == GID_TABLE_WRITE_ACTION_DEL)
- table->data_vec[ix].attr.gid_type = old_gid_type;
+ if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) {
+ ret = attr->device->add_gid(gid, attr, &entry->context);
+ if (ret) {
+ pr_err("%s GID add failed device=%s port=%d index=%d\n",
+ __func__, attr->device->name, attr->port_num,
+ attr->index);
+ goto add_err;
+ }
}
- if (table->data_vec[ix].attr.ndev &&
- table->data_vec[ix].attr.ndev != old_net_dev)
- dev_hold(table->data_vec[ix].attr.ndev);
-
- table->data_vec[ix].props &= ~GID_TABLE_ENTRY_INVALID;
+ dev_hold(attr->ndev);
+add_err:
+ if (!ret)
+ pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
+ attr->device->name, attr->port_num, ix, gid->raw);
return ret;
}
-static int add_gid(struct ib_device *ib_dev, u8 port,
- struct ib_gid_table *table, int ix,
- const union ib_gid *gid,
- const struct ib_gid_attr *attr,
- bool default_gid) {
- return write_gid(ib_dev, port, table, ix, gid, attr,
- GID_TABLE_WRITE_ACTION_ADD, default_gid);
-}
+/**
+ * add_modify_gid - Add or modify GID table entry
+ *
+ * @table: GID table in which GID to be added or modified
+ * @gid: GID content
+ * @attr: Attributes of the GID
+ *
+ * Returns 0 on success or appropriate error code. It accepts zero
+ * GID addition for non RoCE ports for HCA's who report them as valid
+ * GID. However such zero GIDs are not added to the cache.
+ */
+static int add_modify_gid(struct ib_gid_table *table,
+ const union ib_gid *gid,
+ const struct ib_gid_attr *attr)
+{
+ int ret;
+
+ if (rdma_protocol_roce(attr->device, attr->port_num)) {
+ ret = add_roce_gid(table, gid, attr);
+ if (ret)
+ return ret;
+ } else {
+ /*
+ * Some HCA's report multiple GID entries with only one
+ * valid GID, but remaining as zero GID.
+ * So ignore such behavior for IB link layer and don't
+ * fail the call, but don't add such entry to GID cache.
+ */
+ if (!memcmp(gid, &zgid, sizeof(*gid)))
+ return 0;
+ }
+
+ lockdep_assert_held(&table->lock);
+ memcpy(&table->data_vec[attr->index].gid, gid, sizeof(*gid));
+ memcpy(&table->data_vec[attr->index].attr, attr, sizeof(*attr));
-static int modify_gid(struct ib_device *ib_dev, u8 port,
- struct ib_gid_table *table, int ix,
- const union ib_gid *gid,
- const struct ib_gid_attr *attr,
- bool default_gid) {
- return write_gid(ib_dev, port, table, ix, gid, attr,
- GID_TABLE_WRITE_ACTION_MODIFY, default_gid);
+ write_lock_irq(&table->rwlock);
+ table->data_vec[attr->index].props &= ~GID_TABLE_ENTRY_INVALID;
+ write_unlock_irq(&table->rwlock);
+ return 0;
}
-static int del_gid(struct ib_device *ib_dev, u8 port,
- struct ib_gid_table *table, int ix,
- bool default_gid) {
- return write_gid(ib_dev, port, table, ix, &zgid, &zattr,
- GID_TABLE_WRITE_ACTION_DEL, default_gid);
+/**
+ * del_gid - Delete GID table entry
+ *
+ * @ib_dev: IB device whose GID entry to be deleted
+ * @port: Port number of the IB device
+ * @table: GID table of the IB device for a port
+ * @ix: GID entry index to delete
+ *
+ */
+static void del_gid(struct ib_device *ib_dev, u8 port,
+ struct ib_gid_table *table, int ix)
+{
+ lockdep_assert_held(&table->lock);
+ write_lock_irq(&table->rwlock);
+ table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
+ write_unlock_irq(&table->rwlock);
+
+ if (rdma_protocol_roce(ib_dev, port))
+ del_roce_gid(ib_dev, port, table, ix);
+ memcpy(&table->data_vec[ix].gid, &zgid, sizeof(zgid));
+ memset(&table->data_vec[ix].attr, 0, sizeof(table->data_vec[ix].attr));
+ table->data_vec[ix].context = NULL;
}
-/* rwlock should be read locked */
+/* rwlock should be read locked, or lock should be held */
static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
const struct ib_gid_attr *val, bool default_gid,
unsigned long mask, int *pempty)
@@ -266,15 +285,36 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
i++;
+ /* find_gid() is used during GID addition where it is expected
+ * to return a free entry slot which is not duplicate.
+ * Free entry slot is requested and returned if pempty is set,
+ * so lookup free slot only if requested.
+ */
+ if (pempty && empty < 0) {
+ if (data->props & GID_TABLE_ENTRY_INVALID &&
+ (default_gid ==
+ !!(data->props & GID_TABLE_ENTRY_DEFAULT))) {
+ /*
+ * Found an invalid (free) entry; allocate it.
+ * If default GID is requested, then our
+ * found slot must be one of the DEFAULT
+ * reserved slots or we fail.
+ * This ensures that only DEFAULT reserved
+ * slots are used for default property GIDs.
+ */
+ empty = curr_index;
+ }
+ }
+
+ /*
+ * Additionally find_gid() is used to find valid entry during
+ * lookup operation, where validity needs to be checked. So
+ * find the empty entry first to continue to search for a free
+ * slot and ignore its INVALID flag.
+ */
if (data->props & GID_TABLE_ENTRY_INVALID)
continue;
- if (empty < 0)
- if (!memcmp(&data->gid, &zgid, sizeof(*gid)) &&
- !memcmp(attr, &zattr, sizeof(*attr)) &&
- !data->props)
- empty = curr_index;
-
if (found >= 0)
continue;
@@ -310,20 +350,56 @@ static void make_default_gid(struct net_device *dev, union ib_gid *gid)
addrconf_ifid_eui48(&gid->raw[8], dev);
}
-int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
- union ib_gid *gid, struct ib_gid_attr *attr)
+static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
+ union ib_gid *gid, struct ib_gid_attr *attr,
+ unsigned long mask, bool default_gid)
{
struct ib_gid_table *table;
- int ix;
int ret = 0;
- struct net_device *idev;
int empty;
+ int ix;
- table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
-
+ /* Do not allow adding zero GID in support of
+ * IB spec version 1.3 section 4.1.1 point (6) and
+ * section 12.7.10 and section 12.7.20
+ */
if (!memcmp(gid, &zgid, sizeof(*gid)))
return -EINVAL;
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
+
+ mutex_lock(&table->lock);
+
+ ix = find_gid(table, gid, attr, default_gid, mask, &empty);
+ if (ix >= 0)
+ goto out_unlock;
+
+ if (empty < 0) {
+ ret = -ENOSPC;
+ goto out_unlock;
+ }
+ attr->device = ib_dev;
+ attr->index = empty;
+ attr->port_num = port;
+ ret = add_modify_gid(table, gid, attr);
+ if (!ret)
+ dispatch_gid_change_event(ib_dev, port);
+
+out_unlock:
+ mutex_unlock(&table->lock);
+ if (ret)
+ pr_warn("%s: unable to add gid %pI6 error=%d\n",
+ __func__, gid->raw, ret);
+ return ret;
+}
+
+int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
+ union ib_gid *gid, struct ib_gid_attr *attr)
+{
+ struct net_device *idev;
+ unsigned long mask;
+ int ret;
+
if (ib_dev->get_netdev) {
idev = ib_dev->get_netdev(ib_dev, port);
if (idev && attr->ndev != idev) {
@@ -340,57 +416,53 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
dev_put(idev);
}
- mutex_lock(&table->lock);
- write_lock_irq(&table->rwlock);
-
- ix = find_gid(table, gid, attr, false, GID_ATTR_FIND_MASK_GID |
- GID_ATTR_FIND_MASK_GID_TYPE |
- GID_ATTR_FIND_MASK_NETDEV, &empty);
- if (ix >= 0)
- goto out_unlock;
-
- if (empty < 0) {
- ret = -ENOSPC;
- goto out_unlock;
- }
-
- ret = add_gid(ib_dev, port, table, empty, gid, attr, false);
- if (!ret)
- dispatch_gid_change_event(ib_dev, port);
+ mask = GID_ATTR_FIND_MASK_GID |
+ GID_ATTR_FIND_MASK_GID_TYPE |
+ GID_ATTR_FIND_MASK_NETDEV;
-out_unlock:
- write_unlock_irq(&table->rwlock);
- mutex_unlock(&table->lock);
+ ret = __ib_cache_gid_add(ib_dev, port, gid, attr, mask, false);
return ret;
}
-int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
- union ib_gid *gid, struct ib_gid_attr *attr)
+static int
+_ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
+ union ib_gid *gid, struct ib_gid_attr *attr,
+ unsigned long mask, bool default_gid)
{
struct ib_gid_table *table;
+ int ret = 0;
int ix;
table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
mutex_lock(&table->lock);
- write_lock_irq(&table->rwlock);
- ix = find_gid(table, gid, attr, false,
- GID_ATTR_FIND_MASK_GID |
- GID_ATTR_FIND_MASK_GID_TYPE |
- GID_ATTR_FIND_MASK_NETDEV |
- GID_ATTR_FIND_MASK_DEFAULT,
- NULL);
- if (ix < 0)
+ ix = find_gid(table, gid, attr, default_gid, mask, NULL);
+ if (ix < 0) {
+ ret = -EINVAL;
goto out_unlock;
+ }
- if (!del_gid(ib_dev, port, table, ix, false))
- dispatch_gid_change_event(ib_dev, port);
+ del_gid(ib_dev, port, table, ix);
+ dispatch_gid_change_event(ib_dev, port);
out_unlock:
- write_unlock_irq(&table->rwlock);
mutex_unlock(&table->lock);
- return 0;
+ if (ret)
+ pr_debug("%s: can't delete gid %pI6 error=%d\n",
+ __func__, gid->raw, ret);
+ return ret;
+}
+
+int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
+ union ib_gid *gid, struct ib_gid_attr *attr)
+{
+ unsigned long mask = GID_ATTR_FIND_MASK_GID |
+ GID_ATTR_FIND_MASK_GID_TYPE |
+ GID_ATTR_FIND_MASK_DEFAULT |
+ GID_ATTR_FIND_MASK_NETDEV;
+
+ return _ib_cache_gid_del(ib_dev, port, gid, attr, mask, false);
}
int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
@@ -403,16 +475,14 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
mutex_lock(&table->lock);
- write_lock_irq(&table->rwlock);
- for (ix = 0; ix < table->sz; ix++)
- if (table->data_vec[ix].attr.ndev == ndev)
- if (!del_gid(ib_dev, port, table, ix,
- !!(table->data_vec[ix].props &
- GID_TABLE_ENTRY_DEFAULT)))
- deleted = true;
+ for (ix = 0; ix < table->sz; ix++) {
+ if (table->data_vec[ix].attr.ndev == ndev) {
+ del_gid(ib_dev, port, table, ix);
+ deleted = true;
+ }
+ }
- write_unlock_irq(&table->rwlock);
mutex_unlock(&table->lock);
if (deleted)
@@ -490,6 +560,19 @@ static int ib_cache_gid_find(struct ib_device *ib_dev,
mask, port, index);
}
+/**
+ * ib_find_cached_gid_by_port - Returns the GID table index where a specified
+ * GID value occurs. It searches for the specified GID value in the local
+ * software cache.
+ * @device: The device to query.
+ * @gid: The GID value to search for.
+ * @gid_type: The GID type to search for.
+ * @port_num: The port number of the device where the GID value should be
+ * searched.
+ * @ndev: In RoCE, the net device of the device. Null means ignore.
+ * @index: The index into the cached GID table where the GID was found. This
+ * parameter may be NULL.
+ */
int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
const union ib_gid *gid,
enum ib_gid_type gid_type,
@@ -526,7 +609,7 @@ int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
EXPORT_SYMBOL(ib_find_cached_gid_by_port);
/**
- * ib_find_gid_by_filter - Returns the GID table index where a specified
+ * ib_cache_gid_find_by_filter - Returns the GID table index where a specified
* GID value occurs
* @device: The device to query.
* @gid: The GID value to search for.
@@ -537,7 +620,7 @@ EXPORT_SYMBOL(ib_find_cached_gid_by_port);
* otherwise, we continue searching the GID table. It's guaranteed that
* while filter is executed, ndev field is valid and the structure won't
* change. filter is executed in an atomic context. filter must not be NULL.
- * @index: The index into the cached GID table where the GID was found. This
+ * @index: The index into the cached GID table where the GID was found. This
* parameter may be NULL.
*
* ib_cache_gid_find_by_filter() searches for the specified GID value
@@ -571,27 +654,24 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
struct ib_gid_attr attr;
if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
- goto next;
+ continue;
if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid)))
- goto next;
+ continue;
memcpy(&attr, &table->data_vec[i].attr, sizeof(attr));
- if (filter(gid, &attr, context))
+ if (filter(gid, &attr, context)) {
found = true;
-
-next:
- if (found)
+ if (index)
+ *index = i;
break;
+ }
}
read_unlock_irqrestore(&table->rwlock, flags);
if (!found)
return -ENOENT;
-
- if (index)
- *index = i;
return 0;
}
@@ -599,6 +679,7 @@ static struct ib_gid_table *alloc_gid_table(int sz)
{
struct ib_gid_table *table =
kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL);
+ int i;
if (!table)
return NULL;
@@ -612,6 +693,11 @@ static struct ib_gid_table *alloc_gid_table(int sz)
table->sz = sz;
rwlock_init(&table->rwlock);
+ /* Mark all entries as invalid so that allocator can allocate
+ * one of the invalid (free) entry.
+ */
+ for (i = 0; i < sz; i++)
+ table->data_vec[i].props |= GID_TABLE_ENTRY_INVALID;
return table;
err_free_table:
@@ -636,16 +722,15 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
if (!table)
return;
- write_lock_irq(&table->rwlock);
+ mutex_lock(&table->lock);
for (i = 0; i < table->sz; ++i) {
if (memcmp(&table->data_vec[i].gid, &zgid,
- sizeof(table->data_vec[i].gid)))
- if (!del_gid(ib_dev, port, table, i,
- table->data_vec[i].props &
- GID_ATTR_FIND_MASK_DEFAULT))
- deleted = true;
+ sizeof(table->data_vec[i].gid))) {
+ del_gid(ib_dev, port, table, i);
+ deleted = true;
+ }
}
- write_unlock_irq(&table->rwlock);
+ mutex_unlock(&table->lock);
if (deleted)
dispatch_gid_change_event(ib_dev, port);
@@ -656,73 +741,34 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
unsigned long gid_type_mask,
enum ib_cache_gid_default_mode mode)
{
- union ib_gid gid;
+ union ib_gid gid = { };
struct ib_gid_attr gid_attr;
- struct ib_gid_attr zattr_type = zattr;
struct ib_gid_table *table;
unsigned int gid_type;
+ unsigned long mask;
table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
- make_default_gid(ndev, &gid);
+ mask = GID_ATTR_FIND_MASK_GID_TYPE |
+ GID_ATTR_FIND_MASK_DEFAULT |
+ GID_ATTR_FIND_MASK_NETDEV;
memset(&gid_attr, 0, sizeof(gid_attr));
gid_attr.ndev = ndev;
for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) {
- int ix;
- union ib_gid current_gid;
- struct ib_gid_attr current_gid_attr = {};
-
if (1UL << gid_type & ~gid_type_mask)
continue;
gid_attr.gid_type = gid_type;
- mutex_lock(&table->lock);
- write_lock_irq(&table->rwlock);
- ix = find_gid(table, NULL, &gid_attr, true,
- GID_ATTR_FIND_MASK_GID_TYPE |
- GID_ATTR_FIND_MASK_DEFAULT,
- NULL);
-
- /* Coudn't find default GID location */
- if (WARN_ON(ix < 0))
- goto release;
-
- zattr_type.gid_type = gid_type;
-
- if (!__ib_cache_gid_get(ib_dev, port, ix,
- &current_gid, &current_gid_attr) &&
- mode == IB_CACHE_GID_DEFAULT_MODE_SET &&
- !memcmp(&gid, &current_gid, sizeof(gid)) &&
- !memcmp(&gid_attr, &current_gid_attr, sizeof(gid_attr)))
- goto release;
-
- if (memcmp(&current_gid, &zgid, sizeof(current_gid)) ||
- memcmp(&current_gid_attr, &zattr_type,
- sizeof(current_gid_attr))) {
- if (del_gid(ib_dev, port, table, ix, true)) {
- pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n",
- ix, gid.raw);
- goto release;
- } else {
- dispatch_gid_change_event(ib_dev, port);
- }
- }
-
if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) {
- if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true))
- pr_warn("ib_cache_gid: unable to add default gid %pI6\n",
- gid.raw);
- else
- dispatch_gid_change_event(ib_dev, port);
+ make_default_gid(ndev, &gid);
+ __ib_cache_gid_add(ib_dev, port, &gid,
+ &gid_attr, mask, true);
+ } else if (mode == IB_CACHE_GID_DEFAULT_MODE_DELETE) {
+ _ib_cache_gid_del(ib_dev, port, &gid,
+ &gid_attr, mask, true);
}
-
-release:
- if (current_gid_attr.ndev)
- dev_put(current_gid_attr.ndev);
- write_unlock_irq(&table->rwlock);
- mutex_unlock(&table->lock);
}
}
@@ -822,12 +868,7 @@ static int gid_table_setup_one(struct ib_device *ib_dev)
if (err)
return err;
- err = roce_rescan_device(ib_dev);
-
- if (err) {
- gid_table_cleanup_one(ib_dev);
- gid_table_release_one(ib_dev);
- }
+ rdma_roce_rescan_device(ib_dev);
return err;
}
@@ -854,6 +895,20 @@ int ib_get_cached_gid(struct ib_device *device,
}
EXPORT_SYMBOL(ib_get_cached_gid);
+/**
+ * ib_find_cached_gid - Returns the port number and GID table index where
+ * a specified GID value occurs.
+ * @device: The device to query.
+ * @gid: The GID value to search for.
+ * @gid_type: The GID type to search for.
+ * @ndev: In RoCE, the net device of the device. NULL means ignore.
+ * @port_num: The port number of the device where the GID value was found.
+ * @index: The index into the cached GID table where the GID was found. This
+ * parameter may be NULL.
+ *
+ * ib_find_cached_gid() searches for the specified GID value in
+ * the local software cache.
+ */
int ib_find_cached_gid(struct ib_device *device,
const union ib_gid *gid,
enum ib_gid_type gid_type,
@@ -881,7 +936,6 @@ int ib_find_gid_by_filter(struct ib_device *device,
port_num, filter,
context, index);
}
-EXPORT_SYMBOL(ib_find_gid_by_filter);
int ib_get_cached_pkey(struct ib_device *device,
u8 port_num,
@@ -917,8 +971,7 @@ int ib_get_cached_subnet_prefix(struct ib_device *device,
unsigned long flags;
int p;
- if (port_num < rdma_start_port(device) ||
- port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
p = port_num - rdma_start_port(device);
@@ -1028,7 +1081,7 @@ int ib_get_cached_port_state(struct ib_device *device,
unsigned long flags;
int ret = 0;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
@@ -1040,21 +1093,46 @@ int ib_get_cached_port_state(struct ib_device *device,
}
EXPORT_SYMBOL(ib_get_cached_port_state);
+static int config_non_roce_gid_cache(struct ib_device *device,
+ u8 port, int gid_tbl_len)
+{
+ struct ib_gid_attr gid_attr = {};
+ struct ib_gid_table *table;
+ union ib_gid gid;
+ int ret = 0;
+ int i;
+
+ gid_attr.device = device;
+ gid_attr.port_num = port;
+ table = device->cache.ports[port - rdma_start_port(device)].gid;
+
+ mutex_lock(&table->lock);
+ for (i = 0; i < gid_tbl_len; ++i) {
+ if (!device->query_gid)
+ continue;
+ ret = device->query_gid(device, port, i, &gid);
+ if (ret) {
+ pr_warn("query_gid failed (%d) for %s (index %d)\n",
+ ret, device->name, i);
+ goto err;
+ }
+ gid_attr.index = i;
+ add_modify_gid(table, &gid, &gid_attr);
+ }
+err:
+ mutex_unlock(&table->lock);
+ return ret;
+}
+
static void ib_cache_update(struct ib_device *device,
u8 port,
bool enforce_security)
{
struct ib_port_attr *tprops = NULL;
struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache;
- struct ib_gid_cache {
- int table_len;
- union ib_gid table[0];
- } *gid_cache = NULL;
int i;
int ret;
struct ib_gid_table *table;
- bool use_roce_gid_table =
- rdma_cap_roce_gid_table(device, port);
if (!rdma_is_port_valid(device, port))
return;
@@ -1072,6 +1150,13 @@ static void ib_cache_update(struct ib_device *device,
goto err;
}
+ if (!rdma_protocol_roce(device, port)) {
+ ret = config_non_roce_gid_cache(device, port,
+ tprops->gid_tbl_len);
+ if (ret)
+ goto err;
+ }
+
pkey_cache = kmalloc(sizeof *pkey_cache + tprops->pkey_tbl_len *
sizeof *pkey_cache->table, GFP_KERNEL);
if (!pkey_cache)
@@ -1079,15 +1164,6 @@ static void ib_cache_update(struct ib_device *device,
pkey_cache->table_len = tprops->pkey_tbl_len;
- if (!use_roce_gid_table) {
- gid_cache = kmalloc(sizeof(*gid_cache) + tprops->gid_tbl_len *
- sizeof(*gid_cache->table), GFP_KERNEL);
- if (!gid_cache)
- goto err;
-
- gid_cache->table_len = tprops->gid_tbl_len;
- }
-
for (i = 0; i < pkey_cache->table_len; ++i) {
ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
if (ret) {
@@ -1097,33 +1173,12 @@ static void ib_cache_update(struct ib_device *device,
}
}
- if (!use_roce_gid_table) {
- for (i = 0; i < gid_cache->table_len; ++i) {
- ret = ib_query_gid(device, port, i,
- gid_cache->table + i, NULL);
- if (ret) {
- pr_warn("ib_query_gid failed (%d) for %s (index %d)\n",
- ret, device->name, i);
- goto err;
- }
- }
- }
-
write_lock_irq(&device->cache.lock);
old_pkey_cache = device->cache.ports[port -
rdma_start_port(device)].pkey;
device->cache.ports[port - rdma_start_port(device)].pkey = pkey_cache;
- if (!use_roce_gid_table) {
- write_lock(&table->rwlock);
- for (i = 0; i < gid_cache->table_len; i++) {
- modify_gid(device, port, table, i, gid_cache->table + i,
- &zattr, false);
- }
- write_unlock(&table->rwlock);
- }
-
device->cache.ports[port - rdma_start_port(device)].lmc = tprops->lmc;
device->cache.ports[port - rdma_start_port(device)].port_state =
tprops->state;
@@ -1137,14 +1192,12 @@ static void ib_cache_update(struct ib_device *device,
port,
tprops->subnet_prefix);
- kfree(gid_cache);
kfree(old_pkey_cache);
kfree(tprops);
return;
err:
kfree(pkey_cache);
- kfree(gid_cache);
kfree(tprops);
}
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 4300755b4f4c..a92e1a5c202b 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -452,22 +452,41 @@ static void cm_set_private_data(struct cm_id_private *cm_id_priv,
cm_id_priv->private_data_len = private_data_len;
}
-static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
- struct ib_grh *grh, struct cm_av *av)
+static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
+ struct ib_grh *grh, struct cm_av *av)
{
av->port = port;
av->pkey_index = wc->pkey_index;
- ib_init_ah_from_wc(port->cm_dev->ib_device, port->port_num, wc,
- grh, &av->ah_attr);
+ return ib_init_ah_attr_from_wc(port->cm_dev->ib_device,
+ port->port_num, wc,
+ grh, &av->ah_attr);
}
-static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
- struct cm_id_private *cm_id_priv)
+static int add_cm_id_to_port_list(struct cm_id_private *cm_id_priv,
+ struct cm_av *av,
+ struct cm_port *port)
+{
+ unsigned long flags;
+ int ret = 0;
+
+ spin_lock_irqsave(&cm.lock, flags);
+
+ if (&cm_id_priv->av == av)
+ list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list);
+ else if (&cm_id_priv->alt_av == av)
+ list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list);
+ else
+ ret = -EINVAL;
+
+ spin_unlock_irqrestore(&cm.lock, flags);
+ return ret;
+}
+
+static struct cm_port *get_cm_port_from_path(struct sa_path_rec *path)
{
struct cm_device *cm_dev;
struct cm_port *port = NULL;
unsigned long flags;
- int ret;
u8 p;
struct net_device *ndev = ib_get_ndev_from_path(path);
@@ -476,7 +495,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
sa_conv_pathrec_to_gid_type(path),
ndev, &p, NULL)) {
- port = cm_dev->port[p-1];
+ port = cm_dev->port[p - 1];
break;
}
}
@@ -484,9 +503,20 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
if (ndev)
dev_put(ndev);
+ return port;
+}
+
+static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
+ struct cm_id_private *cm_id_priv)
+{
+ struct cm_device *cm_dev;
+ struct cm_port *port;
+ int ret;
+ port = get_cm_port_from_path(path);
if (!port)
return -EINVAL;
+ cm_dev = port->cm_dev;
ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num,
be16_to_cpu(path->pkey), &av->pkey_index);
@@ -494,20 +524,14 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
return ret;
av->port = port;
- ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path,
- &av->ah_attr);
- av->timeout = path->packet_life_time + 1;
-
- spin_lock_irqsave(&cm.lock, flags);
- if (&cm_id_priv->av == av)
- list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list);
- else if (&cm_id_priv->alt_av == av)
- list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list);
- else
- ret = -EINVAL;
+ ret = ib_init_ah_attr_from_path(cm_dev->ib_device, port->port_num, path,
+ &av->ah_attr);
+ if (ret)
+ return ret;
- spin_unlock_irqrestore(&cm.lock, flags);
+ av->timeout = path->packet_life_time + 1;
+ ret = add_cm_id_to_port_list(cm_id_priv, av, port);
return ret;
}
@@ -1472,31 +1496,29 @@ static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg,
if (primary_path->rec_type != SA_PATH_REC_TYPE_OPA) {
sa_path_set_dlid(primary_path,
- htonl(ntohs(req_msg->primary_local_lid)));
+ ntohs(req_msg->primary_local_lid));
sa_path_set_slid(primary_path,
- htonl(ntohs(req_msg->primary_remote_lid)));
+ ntohs(req_msg->primary_remote_lid));
} else {
lid = opa_get_lid_from_gid(&req_msg->primary_local_gid);
- sa_path_set_dlid(primary_path, cpu_to_be32(lid));
+ sa_path_set_dlid(primary_path, lid);
lid = opa_get_lid_from_gid(&req_msg->primary_remote_gid);
- sa_path_set_slid(primary_path, cpu_to_be32(lid));
+ sa_path_set_slid(primary_path, lid);
}
if (!cm_req_has_alt_path(req_msg))
return;
if (alt_path->rec_type != SA_PATH_REC_TYPE_OPA) {
- sa_path_set_dlid(alt_path,
- htonl(ntohs(req_msg->alt_local_lid)));
- sa_path_set_slid(alt_path,
- htonl(ntohs(req_msg->alt_remote_lid)));
+ sa_path_set_dlid(alt_path, ntohs(req_msg->alt_local_lid));
+ sa_path_set_slid(alt_path, ntohs(req_msg->alt_remote_lid));
} else {
lid = opa_get_lid_from_gid(&req_msg->alt_local_gid);
- sa_path_set_dlid(alt_path, cpu_to_be32(lid));
+ sa_path_set_dlid(alt_path, lid);
lid = opa_get_lid_from_gid(&req_msg->alt_remote_gid);
- sa_path_set_slid(alt_path, cpu_to_be32(lid));
+ sa_path_set_slid(alt_path, lid);
}
}
@@ -1521,6 +1543,8 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
cm_req_get_primary_local_ack_timeout(req_msg);
primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
primary_path->service_id = req_msg->service_id;
+ if (sa_path_is_roce(primary_path))
+ primary_path->roce.route_resolved = false;
if (cm_req_has_alt_path(req_msg)) {
alt_path->dgid = req_msg->alt_local_gid;
@@ -1540,6 +1564,9 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
cm_req_get_alt_local_ack_timeout(req_msg);
alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
alt_path->service_id = req_msg->service_id;
+
+ if (sa_path_is_roce(alt_path))
+ alt_path->roce.route_resolved = false;
}
cm_format_path_lid_from_req(req_msg, primary_path, alt_path);
}
@@ -1575,15 +1602,13 @@ static void cm_opa_to_ib_sgid(struct cm_work *work,
struct sa_path_rec *path)
{
struct ib_device *dev = work->port->cm_dev->ib_device;
- struct ib_gid_attr gid_attr;
u8 port_num = work->port->port_num;
if (rdma_cap_opa_ah(dev, port_num) &&
(ib_is_opa_gid(&path->sgid))) {
union ib_gid sgid;
- if (ib_get_cached_gid(dev, port_num, 0,
- &sgid, &gid_attr)) {
+ if (ib_get_cached_gid(dev, port_num, 0, &sgid, NULL)) {
dev_warn(&dev->dev,
"Error updating sgid in CM request\n");
return;
@@ -1862,9 +1887,11 @@ static int cm_req_handler(struct cm_work *work)
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
cm_id_priv->id.remote_id = req_msg->local_comm_id;
- cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
- work->mad_recv_wc->recv_buf.grh,
- &cm_id_priv->av);
+ ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
+ work->mad_recv_wc->recv_buf.grh,
+ &cm_id_priv->av);
+ if (ret)
+ goto destroy;
cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
id.local_id);
if (IS_ERR(cm_id_priv->timewait_info)) {
@@ -1877,9 +1904,10 @@ static int cm_req_handler(struct cm_work *work)
listen_cm_id_priv = cm_match_req(work, cm_id_priv);
if (!listen_cm_id_priv) {
+ pr_debug("%s: local_id %d, no listen_cm_id_priv\n", __func__,
+ be32_to_cpu(cm_id->local_id));
ret = -EINVAL;
- kfree(cm_id_priv->timewait_info);
- goto destroy;
+ goto free_timeinfo;
}
cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
@@ -1897,56 +1925,50 @@ static int cm_req_handler(struct cm_work *work)
work->port->port_num,
grh->sgid_index,
&gid, &gid_attr);
- if (!ret) {
- if (gid_attr.ndev) {
- work->path[0].rec_type =
- sa_conv_gid_to_pathrec_type(gid_attr.gid_type);
- sa_path_set_ifindex(&work->path[0],
- gid_attr.ndev->ifindex);
- sa_path_set_ndev(&work->path[0],
- dev_net(gid_attr.ndev));
- dev_put(gid_attr.ndev);
- } else {
- cm_path_set_rec_type(work->port->cm_dev->ib_device,
- work->port->port_num,
- &work->path[0],
- &req_msg->primary_local_gid);
- }
- if (cm_req_has_alt_path(req_msg))
- work->path[1].rec_type = work->path[0].rec_type;
- cm_format_paths_from_req(req_msg, &work->path[0],
- &work->path[1]);
- if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE)
- sa_path_set_dmac(&work->path[0],
- cm_id_priv->av.ah_attr.roce.dmac);
- work->path[0].hop_limit = grh->hop_limit;
- ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av,
- cm_id_priv);
+ if (ret) {
+ ib_send_cm_rej(cm_id, IB_CM_REJ_UNSUPPORTED, NULL, 0, NULL, 0);
+ goto rejected;
+ }
+
+ if (gid_attr.ndev) {
+ work->path[0].rec_type =
+ sa_conv_gid_to_pathrec_type(gid_attr.gid_type);
+ sa_path_set_ifindex(&work->path[0],
+ gid_attr.ndev->ifindex);
+ sa_path_set_ndev(&work->path[0],
+ dev_net(gid_attr.ndev));
+ dev_put(gid_attr.ndev);
+ } else {
+ cm_path_set_rec_type(work->port->cm_dev->ib_device,
+ work->port->port_num,
+ &work->path[0],
+ &req_msg->primary_local_gid);
}
+ if (cm_req_has_alt_path(req_msg))
+ work->path[1].rec_type = work->path[0].rec_type;
+ cm_format_paths_from_req(req_msg, &work->path[0],
+ &work->path[1]);
+ if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE)
+ sa_path_set_dmac(&work->path[0],
+ cm_id_priv->av.ah_attr.roce.dmac);
+ work->path[0].hop_limit = grh->hop_limit;
+ ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av,
+ cm_id_priv);
if (ret) {
- int err = ib_get_cached_gid(work->port->cm_dev->ib_device,
- work->port->port_num, 0,
- &work->path[0].sgid,
- &gid_attr);
- if (!err && gid_attr.ndev) {
- work->path[0].rec_type =
- sa_conv_gid_to_pathrec_type(gid_attr.gid_type);
- sa_path_set_ifindex(&work->path[0],
- gid_attr.ndev->ifindex);
- sa_path_set_ndev(&work->path[0],
- dev_net(gid_attr.ndev));
- dev_put(gid_attr.ndev);
- } else {
- cm_path_set_rec_type(work->port->cm_dev->ib_device,
- work->port->port_num,
- &work->path[0],
- &req_msg->primary_local_gid);
- }
- if (cm_req_has_alt_path(req_msg))
- work->path[1].rec_type = work->path[0].rec_type;
- ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
- &work->path[0].sgid, sizeof work->path[0].sgid,
- NULL, 0);
+ int err;
+
+ err = ib_get_cached_gid(work->port->cm_dev->ib_device,
+ work->port->port_num, 0,
+ &work->path[0].sgid,
+ NULL);
+ if (err)
+ ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
+ NULL, 0, NULL, 0);
+ else
+ ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
+ &work->path[0].sgid,
+ sizeof(work->path[0].sgid),
+ NULL, 0);
goto rejected;
}
if (cm_req_has_alt_path(req_msg)) {
@@ -1955,7 +1977,7 @@ static int cm_req_handler(struct cm_work *work)
if (ret) {
ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
&work->path[0].sgid,
- sizeof work->path[0].sgid, NULL, 0);
+ sizeof(work->path[0].sgid), NULL, 0);
goto rejected;
}
}
@@ -1981,6 +2003,8 @@ static int cm_req_handler(struct cm_work *work)
rejected:
atomic_dec(&cm_id_priv->refcount);
cm_deref_id(listen_cm_id_priv);
+free_timeinfo:
+ kfree(cm_id_priv->timewait_info);
destroy:
ib_destroy_cm_id(cm_id);
return ret;
@@ -2033,6 +2057,8 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_REQ_RCVD &&
cm_id->state != IB_CM_MRA_REQ_SENT) {
+ pr_debug("%s: local_comm_id %d, cm_id->state: %d\n", __func__,
+ be32_to_cpu(cm_id_priv->id.local_id), cm_id->state);
ret = -EINVAL;
goto out;
}
@@ -2099,6 +2125,8 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id,
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_REP_RCVD &&
cm_id->state != IB_CM_MRA_REP_SENT) {
+ pr_debug("%s: local_id %d, cm_id->state %d\n", __func__,
+ be32_to_cpu(cm_id->local_id), cm_id->state);
ret = -EINVAL;
goto error;
}
@@ -2206,6 +2234,8 @@ static int cm_rep_handler(struct cm_work *work)
cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0);
if (!cm_id_priv) {
cm_dup_rep_handler(work);
+ pr_debug("%s: remote_comm_id %d, no cm_id_priv\n", __func__,
+ be32_to_cpu(rep_msg->remote_comm_id));
return -EINVAL;
}
@@ -2219,6 +2249,10 @@ static int cm_rep_handler(struct cm_work *work)
default:
spin_unlock_irq(&cm_id_priv->lock);
ret = -EINVAL;
+ pr_debug("%s: cm_id_priv->id.state: %d, local_comm_id %d, remote_comm_id %d\n",
+ __func__, cm_id_priv->id.state,
+ be32_to_cpu(rep_msg->local_comm_id),
+ be32_to_cpu(rep_msg->remote_comm_id));
goto error;
}
@@ -2232,6 +2266,8 @@ static int cm_rep_handler(struct cm_work *work)
spin_unlock(&cm.lock);
spin_unlock_irq(&cm_id_priv->lock);
ret = -EINVAL;
+ pr_debug("%s: Failed to insert remote id %d\n", __func__,
+ be32_to_cpu(rep_msg->remote_comm_id));
goto error;
}
/* Check for a stale connection. */
@@ -2249,6 +2285,10 @@ static int cm_rep_handler(struct cm_work *work)
IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
NULL, 0);
ret = -EINVAL;
+ pr_debug("%s: Stale connection. local_comm_id %d, remote_comm_id %d\n",
+ __func__, be32_to_cpu(rep_msg->local_comm_id),
+ be32_to_cpu(rep_msg->remote_comm_id));
+
if (cur_cm_id_priv) {
cm_id = &cur_cm_id_priv->id;
ib_send_cm_dreq(cm_id, NULL, 0);
@@ -2395,6 +2435,8 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id,
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_ESTABLISHED) {
+ pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
+ be32_to_cpu(cm_id->local_id), cm_id->state);
ret = -EINVAL;
goto out;
}
@@ -2464,6 +2506,8 @@ int ib_send_cm_drep(struct ib_cm_id *cm_id,
if (cm_id->state != IB_CM_DREQ_RCVD) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
kfree(data);
+ pr_debug("%s: local_id %d, cm_idcm_id->state(%d) != IB_CM_DREQ_RCVD\n",
+ __func__, be32_to_cpu(cm_id->local_id), cm_id->state);
return -EINVAL;
}
@@ -2529,6 +2573,9 @@ static int cm_dreq_handler(struct cm_work *work)
atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
counter[CM_DREQ_COUNTER]);
cm_issue_drep(work->port, work->mad_recv_wc);
+ pr_debug("%s: no cm_id_priv, local_comm_id %d, remote_comm_id %d\n",
+ __func__, be32_to_cpu(dreq_msg->local_comm_id),
+ be32_to_cpu(dreq_msg->remote_comm_id));
return -EINVAL;
}
@@ -2571,6 +2618,9 @@ static int cm_dreq_handler(struct cm_work *work)
counter[CM_DREQ_COUNTER]);
goto unlock;
default:
+ pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
+ __func__, be32_to_cpu(cm_id_priv->id.local_id),
+ cm_id_priv->id.state);
goto unlock;
}
cm_id_priv->id.state = IB_CM_DREQ_RCVD;
@@ -2674,6 +2724,8 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id,
cm_enter_timewait(cm_id_priv);
break;
default:
+ pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
+ be32_to_cpu(cm_id_priv->id.local_id), cm_id->state);
ret = -EINVAL;
goto out;
}
@@ -2784,6 +2836,9 @@ static int cm_rej_handler(struct cm_work *work)
/* fall through */
default:
spin_unlock_irq(&cm_id_priv->lock);
+ pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
+ __func__, be32_to_cpu(cm_id_priv->id.local_id),
+ cm_id_priv->id.state);
ret = -EINVAL;
goto out;
}
@@ -2845,7 +2900,11 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
msg_response = CM_MSG_RESPONSE_OTHER;
break;
}
+ /* fall through */
default:
+ pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
+ __func__, be32_to_cpu(cm_id_priv->id.local_id),
+ cm_id_priv->id.state);
ret = -EINVAL;
goto error1;
}
@@ -2947,6 +3006,9 @@ static int cm_mra_handler(struct cm_work *work)
counter[CM_MRA_COUNTER]);
/* fall through */
default:
+ pr_debug("%s local_id %d, cm_id_priv->id.state: %d\n",
+ __func__, be32_to_cpu(cm_id_priv->id.local_id),
+ cm_id_priv->id.state);
goto out;
}
@@ -3072,14 +3134,14 @@ static void cm_format_path_lid_from_lap(struct cm_lap_msg *lap_msg,
u32 lid;
if (path->rec_type != SA_PATH_REC_TYPE_OPA) {
- sa_path_set_dlid(path, htonl(ntohs(lap_msg->alt_local_lid)));
- sa_path_set_slid(path, htonl(ntohs(lap_msg->alt_remote_lid)));
+ sa_path_set_dlid(path, ntohs(lap_msg->alt_local_lid));
+ sa_path_set_slid(path, ntohs(lap_msg->alt_remote_lid));
} else {
lid = opa_get_lid_from_gid(&lap_msg->alt_local_gid);
- sa_path_set_dlid(path, cpu_to_be32(lid));
+ sa_path_set_dlid(path, lid);
lid = opa_get_lid_from_gid(&lap_msg->alt_remote_gid);
- sa_path_set_slid(path, cpu_to_be32(lid));
+ sa_path_set_slid(path, lid);
}
}
@@ -3113,6 +3175,13 @@ static int cm_lap_handler(struct cm_work *work)
struct ib_mad_send_buf *msg = NULL;
int ret;
+ /* Currently Alternate path messages are not supported for
+ * RoCE link layer.
+ */
+ if (rdma_protocol_roce(work->port->cm_dev->ib_device,
+ work->port->port_num))
+ return -EINVAL;
+
/* todo: verify LAP request and send reject APR if invalid. */
lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id,
@@ -3120,6 +3189,12 @@ static int cm_lap_handler(struct cm_work *work)
if (!cm_id_priv)
return -EINVAL;
+ ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
+ work->mad_recv_wc->recv_buf.grh,
+ &cm_id_priv->av);
+ if (ret)
+ goto deref;
+
param = &work->cm_event.param.lap_rcvd;
memset(&work->path[0], 0, sizeof(work->path[1]));
cm_path_set_rec_type(work->port->cm_dev->ib_device,
@@ -3166,9 +3241,6 @@ static int cm_lap_handler(struct cm_work *work)
cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
cm_id_priv->tid = lap_msg->hdr.tid;
- cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
- work->mad_recv_wc->recv_buf.grh,
- &cm_id_priv->av);
cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av,
cm_id_priv);
ret = atomic_inc_and_test(&cm_id_priv->work_count);
@@ -3259,6 +3331,13 @@ static int cm_apr_handler(struct cm_work *work)
struct cm_apr_msg *apr_msg;
int ret;
+ /* Currently Alternate path messages are not supported for
+ * RoCE link layer.
+ */
+ if (rdma_protocol_roce(work->port->cm_dev->ib_device,
+ work->port->port_num))
+ return -EINVAL;
+
apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id,
apr_msg->local_comm_id);
@@ -3421,6 +3500,7 @@ static int cm_sidr_req_handler(struct cm_work *work)
struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
struct cm_sidr_req_msg *sidr_req_msg;
struct ib_wc *wc;
+ int ret;
cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
if (IS_ERR(cm_id))
@@ -3433,9 +3513,12 @@ static int cm_sidr_req_handler(struct cm_work *work)
wc = work->mad_recv_wc->wc;
cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid);
cm_id_priv->av.dgid.global.interface_id = 0;
- cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
- work->mad_recv_wc->recv_buf.grh,
- &cm_id_priv->av);
+ ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
+ work->mad_recv_wc->recv_buf.grh,
+ &cm_id_priv->av);
+ if (ret)
+ goto out;
+
cm_id_priv->id.remote_id = sidr_req_msg->request_id;
cm_id_priv->tid = sidr_req_msg->hdr.tid;
atomic_inc(&cm_id_priv->work_count);
@@ -3727,6 +3810,7 @@ static void cm_work_handler(struct work_struct *_work)
ret = cm_timewait_handler(work);
break;
default:
+ pr_debug("cm_event.event: 0x%x\n", work->cm_event.event);
ret = -EINVAL;
break;
}
@@ -3762,6 +3846,8 @@ static int cm_establish(struct ib_cm_id *cm_id)
ret = -EISCONN;
break;
default:
+ pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
+ be32_to_cpu(cm_id->local_id), cm_id->state);
ret = -EINVAL;
break;
}
@@ -3959,6 +4045,9 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
ret = 0;
break;
default:
+ pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
+ __func__, be32_to_cpu(cm_id_priv->id.local_id),
+ cm_id_priv->id.state);
ret = -EINVAL;
break;
}
@@ -4006,6 +4095,9 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
ret = 0;
break;
default:
+ pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
+ __func__, be32_to_cpu(cm_id_priv->id.local_id),
+ cm_id_priv->id.state);
ret = -EINVAL;
break;
}
@@ -4065,6 +4157,9 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
ret = 0;
break;
default:
+ pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
+ __func__, be32_to_cpu(cm_id_priv->id.local_id),
+ cm_id_priv->id.state);
ret = -EINVAL;
break;
}
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 369812a8b030..791f349bbcc9 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -62,6 +62,7 @@
#include <rdma/iw_cm.h>
#include "core_priv.h"
+#include "cma_priv.h"
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("Generic RDMA CM Agent");
@@ -174,7 +175,7 @@ static struct cma_pernet *cma_pernet(struct net *net)
return net_generic(net, cma_pernet_id);
}
-static struct idr *cma_pernet_idr(struct net *net, enum rdma_port_space ps)
+static struct idr *cma_pernet_idr(struct net *net, enum rdma_ucm_port_space ps)
{
struct cma_pernet *pernet = cma_pernet(net);
@@ -203,7 +204,7 @@ struct cma_device {
};
struct rdma_bind_list {
- enum rdma_port_space ps;
+ enum rdma_ucm_port_space ps;
struct hlist_head owners;
unsigned short port;
};
@@ -216,7 +217,7 @@ struct class_port_info_context {
u8 port_num;
};
-static int cma_ps_alloc(struct net *net, enum rdma_port_space ps,
+static int cma_ps_alloc(struct net *net, enum rdma_ucm_port_space ps,
struct rdma_bind_list *bind_list, int snum)
{
struct idr *idr = cma_pernet_idr(net, ps);
@@ -225,14 +226,15 @@ static int cma_ps_alloc(struct net *net, enum rdma_port_space ps,
}
static struct rdma_bind_list *cma_ps_find(struct net *net,
- enum rdma_port_space ps, int snum)
+ enum rdma_ucm_port_space ps, int snum)
{
struct idr *idr = cma_pernet_idr(net, ps);
return idr_find(idr, snum);
}
-static void cma_ps_remove(struct net *net, enum rdma_port_space ps, int snum)
+static void cma_ps_remove(struct net *net, enum rdma_ucm_port_space ps,
+ int snum)
{
struct idr *idr = cma_pernet_idr(net, ps);
@@ -327,46 +329,6 @@ struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
* We do this by disabling removal notification while a callback is in process,
* and reporting it after the callback completes.
*/
-struct rdma_id_private {
- struct rdma_cm_id id;
-
- struct rdma_bind_list *bind_list;
- struct hlist_node node;
- struct list_head list; /* listen_any_list or cma_device.list */
- struct list_head listen_list; /* per device listens */
- struct cma_device *cma_dev;
- struct list_head mc_list;
-
- int internal_id;
- enum rdma_cm_state state;
- spinlock_t lock;
- struct mutex qp_mutex;
-
- struct completion comp;
- atomic_t refcount;
- struct mutex handler_mutex;
-
- int backlog;
- int timeout_ms;
- struct ib_sa_query *query;
- int query_id;
- union {
- struct ib_cm_id *ib;
- struct iw_cm_id *iw;
- } cm_id;
-
- u32 seq_num;
- u32 qkey;
- u32 qp_num;
- pid_t owner;
- u32 options;
- u8 srq;
- u8 tos;
- bool tos_set;
- u8 reuseaddr;
- u8 afonly;
- enum ib_gid_type gid_type;
-};
struct cma_multicast {
struct rdma_id_private *id_priv;
@@ -507,6 +469,8 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
id_priv->id.route.addr.dev_addr.transport =
rdma_node_get_transport(cma_dev->device->node_type);
list_add_tail(&id_priv->list, &cma_dev->id_list);
+ id_priv->res.type = RDMA_RESTRACK_CM_ID;
+ rdma_restrack_add(&id_priv->res);
}
static void cma_attach_to_dev(struct rdma_id_private *id_priv,
@@ -603,7 +567,7 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
int ret;
if (addr->sa_family != AF_IB) {
- ret = rdma_translate_ip(addr, dev_addr, NULL);
+ ret = rdma_translate_ip(addr, dev_addr);
} else {
cma_translate_ib((struct sockaddr_ib *) addr, dev_addr);
ret = 0;
@@ -614,11 +578,14 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
static inline int cma_validate_port(struct ib_device *device, u8 port,
enum ib_gid_type gid_type,
- union ib_gid *gid, int dev_type,
- int bound_if_index)
+ union ib_gid *gid,
+ struct rdma_id_private *id_priv)
{
- int ret = -ENODEV;
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+ int bound_if_index = dev_addr->bound_dev_if;
+ int dev_type = dev_addr->dev_type;
struct net_device *ndev = NULL;
+ int ret = -ENODEV;
if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port))
return ret;
@@ -627,7 +594,7 @@ static inline int cma_validate_port(struct ib_device *device, u8 port,
return ret;
if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
- ndev = dev_get_by_index(&init_net, bound_if_index);
+ ndev = dev_get_by_index(dev_addr->net, bound_if_index);
if (!ndev)
return ret;
} else {
@@ -673,8 +640,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
rdma_protocol_ib(cma_dev->device, port) ?
IB_GID_TYPE_IB :
listen_id_priv->gid_type, gidp,
- dev_addr->dev_type,
- dev_addr->bound_dev_if);
+ id_priv);
if (!ret) {
id_priv->id.port_num = port;
goto out;
@@ -695,8 +661,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
rdma_protocol_ib(cma_dev->device, port) ?
IB_GID_TYPE_IB :
cma_dev->default_gid_type[port - 1],
- gidp, dev_addr->dev_type,
- dev_addr->bound_dev_if);
+ gidp, id_priv);
if (!ret) {
id_priv->id.port_num = port;
goto out;
@@ -778,10 +743,10 @@ static void cma_deref_id(struct rdma_id_private *id_priv)
complete(&id_priv->comp);
}
-struct rdma_cm_id *rdma_create_id(struct net *net,
- rdma_cm_event_handler event_handler,
- void *context, enum rdma_port_space ps,
- enum ib_qp_type qp_type)
+struct rdma_cm_id *__rdma_create_id(struct net *net,
+ rdma_cm_event_handler event_handler,
+ void *context, enum rdma_ucm_port_space ps,
+ enum ib_qp_type qp_type, const char *caller)
{
struct rdma_id_private *id_priv;
@@ -789,7 +754,10 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
if (!id_priv)
return ERR_PTR(-ENOMEM);
- id_priv->owner = task_pid_nr(current);
+ if (caller)
+ id_priv->res.kern_name = caller;
+ else
+ rdma_restrack_set_task(&id_priv->res, current);
id_priv->state = RDMA_CM_IDLE;
id_priv->id.context = context;
id_priv->id.event_handler = event_handler;
@@ -809,7 +777,7 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
return &id_priv->id;
}
-EXPORT_SYMBOL(rdma_create_id);
+EXPORT_SYMBOL(__rdma_create_id);
static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
{
@@ -1389,7 +1357,7 @@ static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event,
return net_dev;
}
-static enum rdma_port_space rdma_ps_from_service_id(__be64 service_id)
+static enum rdma_ucm_port_space rdma_ps_from_service_id(__be64 service_id)
{
return (be64_to_cpu(service_id) >> 16) & 0xffff;
}
@@ -1430,21 +1398,12 @@ static bool cma_match_private_data(struct rdma_id_private *id_priv,
return true;
}
-static bool cma_protocol_roce_dev_port(struct ib_device *device, int port_num)
-{
- enum rdma_link_layer ll = rdma_port_get_link_layer(device, port_num);
- enum rdma_transport_type transport =
- rdma_node_get_transport(device->node_type);
-
- return ll == IB_LINK_LAYER_ETHERNET && transport == RDMA_TRANSPORT_IB;
-}
-
static bool cma_protocol_roce(const struct rdma_cm_id *id)
{
struct ib_device *device = id->device;
const int port_num = id->port_num ?: rdma_start_port(device);
- return cma_protocol_roce_dev_port(device, port_num);
+ return rdma_protocol_roce(device, port_num);
}
static bool cma_match_net_dev(const struct rdma_cm_id *id,
@@ -1457,7 +1416,7 @@ static bool cma_match_net_dev(const struct rdma_cm_id *id,
/* This request is an AF_IB request or a RoCE request */
return (!id->port_num || id->port_num == port_num) &&
(addr->src_addr.ss_family == AF_IB ||
- cma_protocol_roce_dev_port(id->device, port_num));
+ rdma_protocol_roce(id->device, port_num));
return !addr->dev_addr.bound_dev_if ||
(net_eq(dev_net(net_dev), addr->dev_addr.net) &&
@@ -1512,7 +1471,7 @@ static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id,
if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) {
/* Assuming the protocol is AF_IB */
*net_dev = NULL;
- } else if (cma_protocol_roce_dev_port(req.device, req.port)) {
+ } else if (rdma_protocol_roce(req.device, req.port)) {
/* TODO find the net dev matching the request parameters
* through the RoCE GID table */
*net_dev = NULL;
@@ -1693,6 +1652,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
mutex_unlock(&id_priv->handler_mutex);
if (id_priv->cma_dev) {
+ rdma_restrack_del(&id_priv->res);
if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
if (id_priv->cm_id.ib)
ib_destroy_cm_id(id_priv->cm_id.ib);
@@ -1842,6 +1802,7 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
struct ib_cm_event *ib_event,
struct net_device *net_dev)
{
+ struct rdma_id_private *listen_id_priv;
struct rdma_id_private *id_priv;
struct rdma_cm_id *id;
struct rdma_route *rt;
@@ -1851,9 +1812,11 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
ib_event->param.req_rcvd.primary_path->service_id;
int ret;
- id = rdma_create_id(listen_id->route.addr.dev_addr.net,
+ listen_id_priv = container_of(listen_id, struct rdma_id_private, id);
+ id = __rdma_create_id(listen_id->route.addr.dev_addr.net,
listen_id->event_handler, listen_id->context,
- listen_id->ps, ib_event->param.req_rcvd.qp_type);
+ listen_id->ps, ib_event->param.req_rcvd.qp_type,
+ listen_id_priv->res.kern_name);
if (IS_ERR(id))
return NULL;
@@ -1875,9 +1838,7 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
if (net_dev) {
- ret = rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL);
- if (ret)
- goto err;
+ rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL);
} else {
if (!cma_protocol_roce(listen_id) &&
cma_any_addr(cma_src_addr(id_priv))) {
@@ -1904,14 +1865,17 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
struct ib_cm_event *ib_event,
struct net_device *net_dev)
{
+ struct rdma_id_private *listen_id_priv;
struct rdma_id_private *id_priv;
struct rdma_cm_id *id;
const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family;
struct net *net = listen_id->route.addr.dev_addr.net;
int ret;
- id = rdma_create_id(net, listen_id->event_handler, listen_id->context,
- listen_id->ps, IB_QPT_UD);
+ listen_id_priv = container_of(listen_id, struct rdma_id_private, id);
+ id = __rdma_create_id(net, listen_id->event_handler, listen_id->context,
+ listen_id->ps, IB_QPT_UD,
+ listen_id_priv->res.kern_name);
if (IS_ERR(id))
return NULL;
@@ -1923,9 +1887,7 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
goto err;
if (net_dev) {
- ret = rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL);
- if (ret)
- goto err;
+ rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL);
} else {
if (!cma_any_addr(cma_src_addr(id_priv))) {
ret = cma_translate_addr(cma_src_addr(id_priv),
@@ -2179,10 +2141,11 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
goto out;
/* Create a new RDMA id for the new IW CM ID */
- new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net,
- listen_id->id.event_handler,
- listen_id->id.context,
- RDMA_PS_TCP, IB_QPT_RC);
+ new_cm_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
+ listen_id->id.event_handler,
+ listen_id->id.context,
+ RDMA_PS_TCP, IB_QPT_RC,
+ listen_id->res.kern_name);
if (IS_ERR(new_cm_id)) {
ret = -ENOMEM;
goto out;
@@ -2191,7 +2154,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
conn_id->state = RDMA_CM_CONNECT;
- ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr, NULL);
+ ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr);
if (ret) {
mutex_unlock(&conn_id->handler_mutex);
rdma_destroy_id(new_cm_id);
@@ -2307,8 +2270,8 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
return;
- id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
- id_priv->id.qp_type);
+ id = __rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
+ id_priv->id.qp_type, id_priv->res.kern_name);
if (IS_ERR(id))
return;
@@ -2570,6 +2533,7 @@ cma_iboe_set_path_rec_l2_fields(struct rdma_id_private *id_priv)
gid_type = ib_network_to_gid_type(addr->dev_addr.network);
route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type);
+ route->path_rec->roce.route_resolved = true;
sa_path_set_ndev(route->path_rec, addr->dev_addr.net);
sa_path_set_ifindex(route->path_rec, ndev->ifindex);
sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr);
@@ -3057,7 +3021,7 @@ static void cma_bind_port(struct rdma_bind_list *bind_list,
hlist_add_head(&id_priv->node, &bind_list->owners);
}
-static int cma_alloc_port(enum rdma_port_space ps,
+static int cma_alloc_port(enum rdma_ucm_port_space ps,
struct rdma_id_private *id_priv, unsigned short snum)
{
struct rdma_bind_list *bind_list;
@@ -3120,7 +3084,7 @@ static int cma_port_is_unique(struct rdma_bind_list *bind_list,
return 0;
}
-static int cma_alloc_any_port(enum rdma_port_space ps,
+static int cma_alloc_any_port(enum rdma_ucm_port_space ps,
struct rdma_id_private *id_priv)
{
static unsigned int last_used_port;
@@ -3198,7 +3162,7 @@ static int cma_check_port(struct rdma_bind_list *bind_list,
return 0;
}
-static int cma_use_port(enum rdma_port_space ps,
+static int cma_use_port(enum rdma_ucm_port_space ps,
struct rdma_id_private *id_priv)
{
struct rdma_bind_list *bind_list;
@@ -3232,8 +3196,8 @@ static int cma_bind_listen(struct rdma_id_private *id_priv)
return ret;
}
-static enum rdma_port_space cma_select_inet_ps(
- struct rdma_id_private *id_priv)
+static enum rdma_ucm_port_space
+cma_select_inet_ps(struct rdma_id_private *id_priv)
{
switch (id_priv->id.ps) {
case RDMA_PS_TCP:
@@ -3247,9 +3211,10 @@ static enum rdma_port_space cma_select_inet_ps(
}
}
-static enum rdma_port_space cma_select_ib_ps(struct rdma_id_private *id_priv)
+static enum rdma_ucm_port_space
+cma_select_ib_ps(struct rdma_id_private *id_priv)
{
- enum rdma_port_space ps = 0;
+ enum rdma_ucm_port_space ps = 0;
struct sockaddr_ib *sib;
u64 sid_ps, mask, sid;
@@ -3280,7 +3245,7 @@ static enum rdma_port_space cma_select_ib_ps(struct rdma_id_private *id_priv)
static int cma_get_port(struct rdma_id_private *id_priv)
{
- enum rdma_port_space ps;
+ enum rdma_ucm_port_space ps;
int ret;
if (cma_family(id_priv) != AF_IB)
@@ -3418,8 +3383,10 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
return 0;
err2:
- if (id_priv->cma_dev)
+ if (id_priv->cma_dev) {
+ rdma_restrack_del(&id_priv->res);
cma_release_dev(id_priv);
+ }
err1:
cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
return ret;
@@ -3491,9 +3458,10 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
event.status = ret;
break;
}
- ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num,
- id_priv->id.route.path_rec,
- &event.param.ud.ah_attr);
+ ib_init_ah_attr_from_path(id_priv->id.device,
+ id_priv->id.port_num,
+ id_priv->id.route.path_rec,
+ &event.param.ud.ah_attr);
event.param.ud.qp_num = rep->qpn;
event.param.ud.qkey = rep->qkey;
event.event = RDMA_CM_EVENT_ESTABLISHED;
@@ -3801,14 +3769,18 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
}
-int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
+int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
+ const char *caller)
{
struct rdma_id_private *id_priv;
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
- id_priv->owner = task_pid_nr(current);
+ if (caller)
+ id_priv->res.kern_name = caller;
+ else
+ rdma_restrack_set_task(&id_priv->res, current);
if (!cma_comp(id_priv, RDMA_CM_CONNECT))
return -EINVAL;
@@ -3848,7 +3820,7 @@ reject:
rdma_reject(id, NULL, 0);
return ret;
}
-EXPORT_SYMBOL(rdma_accept);
+EXPORT_SYMBOL(__rdma_accept);
int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
{
@@ -3960,16 +3932,20 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
struct rdma_dev_addr *dev_addr =
&id_priv->id.route.addr.dev_addr;
struct net_device *ndev =
- dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+ dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
enum ib_gid_type gid_type =
id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
rdma_start_port(id_priv->cma_dev->device)];
event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
- ib_init_ah_from_mcmember(id_priv->id.device,
- id_priv->id.port_num, &multicast->rec,
- ndev, gid_type,
- &event.param.ud.ah_attr);
+ ret = ib_init_ah_from_mcmember(id_priv->id.device,
+ id_priv->id.port_num,
+ &multicast->rec,
+ ndev, gid_type,
+ &event.param.ud.ah_attr);
+ if (ret)
+ event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
+
event.param.ud.qp_num = 0xFFFFFF;
event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
if (ndev)
@@ -4150,7 +4126,7 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
if (dev_addr->bound_dev_if)
- ndev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+ ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
if (!ndev) {
err = -ENODEV;
goto out2;
@@ -4271,7 +4247,7 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
struct net_device *ndev = NULL;
if (dev_addr->bound_dev_if)
- ndev = dev_get_by_index(&init_net,
+ ndev = dev_get_by_index(dev_addr->net,
dev_addr->bound_dev_if);
if (ndev) {
cma_igmp_send(ndev,
@@ -4327,7 +4303,7 @@ static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
if (event != NETDEV_BONDING_FAILOVER)
return NOTIFY_DONE;
- if (!(ndev->flags & IFF_MASTER) || !(ndev->priv_flags & IFF_BONDING))
+ if (!netif_is_bond_master(ndev))
return NOTIFY_DONE;
mutex_lock(&lock);
@@ -4529,7 +4505,7 @@ static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb)
RDMA_NL_RDMA_CM_ATTR_DST_ADDR))
goto out;
- id_stats->pid = id_priv->owner;
+ id_stats->pid = task_pid_vnr(id_priv->res.task);
id_stats->port_space = id->ps;
id_stats->cm_state = id_priv->state;
id_stats->qp_num = id_priv->qp_num;
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
index 54076a3e8007..b3d7f863c0fe 100644
--- a/drivers/infiniband/core/cma_configfs.c
+++ b/drivers/infiniband/core/cma_configfs.c
@@ -295,7 +295,7 @@ static struct config_group *make_cma_dev(struct config_group *group,
goto fail;
}
- strncpy(cma_dev_group->name, name, sizeof(cma_dev_group->name));
+ strlcpy(cma_dev_group->name, name, sizeof(cma_dev_group->name));
config_group_init_type_name(&cma_dev_group->ports_group, "ports",
&cma_ports_group_type);
diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
new file mode 100644
index 000000000000..194cfe78c447
--- /dev/null
+++ b/drivers/infiniband/core/cma_priv.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2005 Voltaire Inc. All rights reserved.
+ * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
+ * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
+ * Copyright (c) 2005-2006 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _CMA_PRIV_H
+#define _CMA_PRIV_H
+
+enum rdma_cm_state {
+ RDMA_CM_IDLE,
+ RDMA_CM_ADDR_QUERY,
+ RDMA_CM_ADDR_RESOLVED,
+ RDMA_CM_ROUTE_QUERY,
+ RDMA_CM_ROUTE_RESOLVED,
+ RDMA_CM_CONNECT,
+ RDMA_CM_DISCONNECT,
+ RDMA_CM_ADDR_BOUND,
+ RDMA_CM_LISTEN,
+ RDMA_CM_DEVICE_REMOVAL,
+ RDMA_CM_DESTROYING
+};
+
+struct rdma_id_private {
+ struct rdma_cm_id id;
+
+ struct rdma_bind_list *bind_list;
+ struct hlist_node node;
+ struct list_head list; /* listen_any_list or cma_device.list */
+ struct list_head listen_list; /* per device listens */
+ struct cma_device *cma_dev;
+ struct list_head mc_list;
+
+ int internal_id;
+ enum rdma_cm_state state;
+ spinlock_t lock;
+ struct mutex qp_mutex;
+
+ struct completion comp;
+ atomic_t refcount;
+ struct mutex handler_mutex;
+
+ int backlog;
+ int timeout_ms;
+ struct ib_sa_query *query;
+ int query_id;
+ union {
+ struct ib_cm_id *ib;
+ struct iw_cm_id *iw;
+ } cm_id;
+
+ u32 seq_num;
+ u32 qkey;
+ u32 qp_num;
+ u32 options;
+ u8 srq;
+ u8 tos;
+ bool tos_set;
+ u8 reuseaddr;
+ u8 afonly;
+ enum ib_gid_type gid_type;
+
+ /*
+ * Internal to RDMA/core, don't use in the drivers
+ */
+ struct rdma_restrack_entry res;
+};
+#endif /* _CMA_PRIV_H */
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 144871d83aed..54163a6e4067 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -40,8 +40,12 @@
#include <rdma/ib_verbs.h>
#include <rdma/opa_addr.h>
#include <rdma/ib_mad.h>
+#include <rdma/restrack.h>
#include "mad_priv.h"
+/* Total number of ports combined across all struct ib_devices's */
+#define RDMA_MAX_PORTS 1024
+
struct pkey_index_qp_list {
struct list_head pkey_index_list;
u16 pkey_index;
@@ -137,7 +141,6 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
int roce_gid_mgmt_init(void);
void roce_gid_mgmt_cleanup(void);
-int roce_rescan_device(struct ib_device *ib_dev);
unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port);
int ib_cache_setup_one(struct ib_device *device);
@@ -206,11 +209,6 @@ int ib_get_cached_subnet_prefix(struct ib_device *device,
u64 *sn_pfx);
#ifdef CONFIG_SECURITY_INFINIBAND
-int ib_security_pkey_access(struct ib_device *dev,
- u8 port_num,
- u16 pkey_index,
- void *sec);
-
void ib_security_destroy_port_pkey_list(struct ib_device *device);
void ib_security_cache_change(struct ib_device *device,
@@ -233,14 +231,6 @@ int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent);
int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index);
#else
-static inline int ib_security_pkey_access(struct ib_device *dev,
- u8 port_num,
- u16 pkey_index,
- void *sec)
-{
- return 0;
-}
-
static inline void ib_security_destroy_port_pkey_list(struct ib_device *device)
{
}
@@ -311,4 +301,47 @@ struct ib_device *ib_device_get_by_index(u32 ifindex);
/* RDMA device netlink */
void nldev_init(void);
void nldev_exit(void);
+
+static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
+ struct ib_pd *pd,
+ struct ib_qp_init_attr *attr,
+ struct ib_udata *udata,
+ struct ib_uobject *uobj)
+{
+ struct ib_qp *qp;
+
+ if (!dev->create_qp)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ qp = dev->create_qp(pd, attr, udata);
+ if (IS_ERR(qp))
+ return qp;
+
+ qp->device = dev;
+ qp->pd = pd;
+ qp->uobject = uobj;
+ /*
+ * We don't track XRC QPs for now, because they don't have PD
+ * and more importantly they are created internaly by driver,
+ * see mlx5 create_dev_resources() as an example.
+ */
+ if (attr->qp_type < IB_QPT_XRC_INI) {
+ qp->res.type = RDMA_RESTRACK_QP;
+ rdma_restrack_add(&qp->res);
+ } else
+ qp->res.valid = false;
+
+ return qp;
+}
+
+struct rdma_dev_addr;
+int rdma_resolve_ip_route(struct sockaddr *src_addr,
+ const struct sockaddr *dst_addr,
+ struct rdma_dev_addr *addr);
+
+int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
+ const union ib_gid *dgid,
+ u8 *dmac, const struct net_device *ndev,
+ int *hoplimit);
+
#endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index 757d308bebe8..af5ad6a56ae4 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -121,20 +121,22 @@ static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
}
/**
- * ib_alloc_cq - allocate a completion queue
+ * __ib_alloc_cq - allocate a completion queue
* @dev: device to allocate the CQ for
* @private: driver private data, accessible from cq->cq_context
* @nr_cqe: number of CQEs to allocate
* @comp_vector: HCA completion vectors for this CQ
* @poll_ctx: context to poll the CQ from.
+ * @caller: module owner name.
*
* This is the proper interface to allocate a CQ for in-kernel users. A
* CQ allocated with this interface will automatically be polled from the
* specified context. The ULP must use wr->wr_cqe instead of wr->wr_id
* to use this CQ abstraction.
*/
-struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
- int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx)
+struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
+ int nr_cqe, int comp_vector,
+ enum ib_poll_context poll_ctx, const char *caller)
{
struct ib_cq_init_attr cq_attr = {
.cqe = nr_cqe,
@@ -158,6 +160,10 @@ struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
if (!cq->wc)
goto out_destroy_cq;
+ cq->res.type = RDMA_RESTRACK_CQ;
+ cq->res.kern_name = caller;
+ rdma_restrack_add(&cq->res);
+
switch (cq->poll_ctx) {
case IB_POLL_DIRECT:
cq->comp_handler = ib_cq_completion_direct;
@@ -182,11 +188,12 @@ struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
out_free_wc:
kfree(cq->wc);
+ rdma_restrack_del(&cq->res);
out_destroy_cq:
cq->device->destroy_cq(cq);
return ERR_PTR(ret);
}
-EXPORT_SYMBOL(ib_alloc_cq);
+EXPORT_SYMBOL(__ib_alloc_cq);
/**
* ib_free_cq - free a completion queue
@@ -213,6 +220,7 @@ void ib_free_cq(struct ib_cq *cq)
}
kfree(cq->wc);
+ rdma_restrack_del(&cq->res);
ret = cq->device->destroy_cq(cq);
WARN_ON_ONCE(ret);
}
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 4dff06ab771e..ea9fbcfb21bd 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -103,7 +103,6 @@ static int ib_device_check_mandatory(struct ib_device *device)
IB_MANDATORY_FUNC(query_device),
IB_MANDATORY_FUNC(query_port),
IB_MANDATORY_FUNC(query_pkey),
- IB_MANDATORY_FUNC(query_gid),
IB_MANDATORY_FUNC(alloc_pd),
IB_MANDATORY_FUNC(dealloc_pd),
IB_MANDATORY_FUNC(create_ah),
@@ -263,6 +262,8 @@ struct ib_device *ib_alloc_device(size_t size)
if (!device)
return NULL;
+ rdma_restrack_init(&device->res);
+
device->dev.class = &ib_class;
device_initialize(&device->dev);
@@ -288,7 +289,8 @@ void ib_dealloc_device(struct ib_device *device)
{
WARN_ON(device->reg_state != IB_DEV_UNREGISTERED &&
device->reg_state != IB_DEV_UNINITIALIZED);
- kobject_put(&device->dev.kobj);
+ rdma_restrack_clean(&device->res);
+ put_device(&device->dev);
}
EXPORT_SYMBOL(ib_dealloc_device);
@@ -850,7 +852,7 @@ int ib_query_port(struct ib_device *device,
if (rdma_port_get_link_layer(device, port_num) != IB_LINK_LAYER_INFINIBAND)
return 0;
- err = ib_query_gid(device, port_num, 0, &gid, NULL);
+ err = device->query_gid(device, port_num, 0, &gid);
if (err)
return err;
@@ -868,19 +870,13 @@ EXPORT_SYMBOL(ib_query_port);
* @attr: Returned GID attributes related to this GID index (only in RoCE).
* NULL means ignore.
*
- * ib_query_gid() fetches the specified GID table entry.
+ * ib_query_gid() fetches the specified GID table entry from the cache.
*/
int ib_query_gid(struct ib_device *device,
u8 port_num, int index, union ib_gid *gid,
struct ib_gid_attr *attr)
{
- if (rdma_cap_roce_gid_table(device, port_num))
- return ib_get_cached_gid(device, port_num, index, gid, attr);
-
- if (attr)
- return -EINVAL;
-
- return device->query_gid(device, port_num, index, gid);
+ return ib_get_cached_gid(device, port_num, index, gid, attr);
}
EXPORT_SYMBOL(ib_query_gid);
@@ -1043,32 +1039,21 @@ EXPORT_SYMBOL(ib_modify_port);
/**
* ib_find_gid - Returns the port number and GID table index where
- * a specified GID value occurs.
+ * a specified GID value occurs. Its searches only for IB link layer.
* @device: The device to query.
* @gid: The GID value to search for.
- * @gid_type: Type of GID.
- * @ndev: The ndev related to the GID to search for.
* @port_num: The port number of the device where the GID value was found.
* @index: The index into the GID table where the GID was found. This
* parameter may be NULL.
*/
int ib_find_gid(struct ib_device *device, union ib_gid *gid,
- enum ib_gid_type gid_type, struct net_device *ndev,
u8 *port_num, u16 *index)
{
union ib_gid tmp_gid;
int ret, port, i;
for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) {
- if (rdma_cap_roce_gid_table(device, port)) {
- if (!ib_find_cached_gid_by_port(device, gid, gid_type, port,
- ndev, index)) {
- *port_num = port;
- return 0;
- }
- }
-
- if (gid_type != IB_GID_TYPE_IB)
+ if (!rdma_protocol_ib(device, port))
continue;
for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index 84d2615b5d4b..a0a9ed719031 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -388,13 +388,11 @@ int ib_flush_fmr_pool(struct ib_fmr_pool *pool)
EXPORT_SYMBOL(ib_flush_fmr_pool);
/**
- * ib_fmr_pool_map_phys -
- * @pool:FMR pool to allocate FMR from
- * @page_list:List of pages to map
- * @list_len:Number of pages in @page_list
- * @io_virtual_address:I/O virtual address for new FMR
- *
- * Map an FMR from an FMR pool.
+ * ib_fmr_pool_map_phys - Map an FMR from an FMR pool.
+ * @pool_handle: FMR pool to allocate FMR from
+ * @page_list: List of pages to map
+ * @list_len: Number of pages in @page_list
+ * @io_virtual_address: I/O virtual address for new FMR
*/
struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle,
u64 *page_list,
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 30d7277249b8..5d676cff41f4 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -447,9 +447,6 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
*/
void iw_destroy_cm_id(struct iw_cm_id *cm_id)
{
- struct iwcm_id_private *cm_id_priv;
-
- cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
destroy_cm_id(cm_id);
}
EXPORT_SYMBOL(iw_destroy_cm_id);
diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c
index cb0fecc958b5..da12da1c36f6 100644
--- a/drivers/infiniband/core/iwpm_util.c
+++ b/drivers/infiniband/core/iwpm_util.c
@@ -442,10 +442,9 @@ struct sk_buff *iwpm_create_nlmsg(u32 nl_op, struct nlmsghdr **nlh,
struct sk_buff *skb = NULL;
skb = dev_alloc_skb(IWPM_MSG_SIZE);
- if (!skb) {
- pr_err("%s Unable to allocate skb\n", __func__);
+ if (!skb)
goto create_nlmsg_exit;
- }
+
if (!(ibnl_put_msg(skb, nlh, 0, 0, nl_client, nl_op,
NLM_F_REQUEST))) {
pr_warn("%s: Unable to put the nlmsg header\n", __func__);
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index d8efdc191c27..b28452a55a08 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -49,7 +49,6 @@
#include "smi.h"
#include "opa_smi.h"
#include "agent.h"
-#include "core_priv.h"
static int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
static int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index 1fb72c356e36..3ccaae18ad75 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -41,8 +41,6 @@
#include <linux/module.h>
#include "core_priv.h"
-#include "core_priv.h"
-
static DEFINE_MUTEX(rdma_nl_mutex);
static struct sock *nls;
static struct {
@@ -83,15 +81,13 @@ static bool is_nl_valid(unsigned int type, unsigned int op)
if (!is_nl_msg_valid(type, op))
return false;
- cb_table = rdma_nl_types[type].cb_table;
-#ifdef CONFIG_MODULES
- if (!cb_table) {
+ if (!rdma_nl_types[type].cb_table) {
mutex_unlock(&rdma_nl_mutex);
request_module("rdma-netlink-subsys-%d", type);
mutex_lock(&rdma_nl_mutex);
- cb_table = rdma_nl_types[type].cb_table;
}
-#endif
+
+ cb_table = rdma_nl_types[type].cb_table;
if (!cb_table || (!cb_table[op].dump && !cb_table[op].doit))
return false;
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 0dcd1aa6f683..eb567765f45c 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -31,10 +31,14 @@
*/
#include <linux/module.h>
+#include <linux/pid.h>
+#include <linux/pid_namespace.h>
#include <net/netlink.h>
+#include <rdma/rdma_cm.h>
#include <rdma/rdma_netlink.h>
#include "core_priv.h"
+#include "cma_priv.h"
static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 },
@@ -52,16 +56,67 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_PORT_STATE] = { .type = NLA_U8 },
[RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 },
[RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_RES_SUMMARY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME] = { .type = NLA_NUL_STRING,
+ .len = 16 },
+ [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_RES_QP] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_QP_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_LQPN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_RQPN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_RQ_PSN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_SQ_PSN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_RES_TYPE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_RES_STATE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_RES_PID] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_KERN_NAME] = { .type = NLA_NUL_STRING,
+ .len = TASK_COMM_LEN },
+ [RDMA_NLDEV_ATTR_RES_CM_ID] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_PS] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_SRC_ADDR] = {
+ .len = sizeof(struct __kernel_sockaddr_storage) },
+ [RDMA_NLDEV_ATTR_RES_DST_ADDR] = {
+ .len = sizeof(struct __kernel_sockaddr_storage) },
+ [RDMA_NLDEV_ATTR_RES_CQ] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_CQ_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_CQE] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_USECNT] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_RES_POLL_CTX] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_RES_MR] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_MR_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_RKEY] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_LKEY] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_IOVA] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_RES_MRLEN] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_RES_PD] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_PD_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_NDEV_INDEX] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_NDEV_NAME] = { .type = NLA_NUL_STRING,
+ .len = IFNAMSIZ },
};
-static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
+static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
{
- char fw[IB_FW_VERSION_NAME_MAX];
-
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
return -EMSGSIZE;
if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, device->name))
return -EMSGSIZE;
+
+ return 0;
+}
+
+static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
+{
+ char fw[IB_FW_VERSION_NAME_MAX];
+
+ if (fill_nldev_handle(msg, device))
+ return -EMSGSIZE;
+
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device)))
return -EMSGSIZE;
@@ -71,7 +126,7 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
return -EMSGSIZE;
ib_get_device_fw_str(device, fw);
- /* Device without FW has strlen(fw) */
+ /* Device without FW has strlen(fw) = 0 */
if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw))
return -EMSGSIZE;
@@ -87,15 +142,16 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
}
static int fill_port_info(struct sk_buff *msg,
- struct ib_device *device, u32 port)
+ struct ib_device *device, u32 port,
+ const struct net *net)
{
+ struct net_device *netdev = NULL;
struct ib_port_attr attr;
int ret;
- if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
- return -EMSGSIZE;
- if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, device->name))
+ if (fill_nldev_handle(msg, device))
return -EMSGSIZE;
+
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
return -EMSGSIZE;
@@ -123,7 +179,328 @@ static int fill_port_info(struct sk_buff *msg,
return -EMSGSIZE;
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
return -EMSGSIZE;
+
+ if (device->get_netdev)
+ netdev = device->get_netdev(device, port);
+
+ if (netdev && net_eq(dev_net(netdev), net)) {
+ ret = nla_put_u32(msg,
+ RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
+ if (ret)
+ goto out;
+ ret = nla_put_string(msg,
+ RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
+ }
+
+out:
+ if (netdev)
+ dev_put(netdev);
+ return ret;
+}
+
+static int fill_res_info_entry(struct sk_buff *msg,
+ const char *name, u64 curr)
+{
+ struct nlattr *entry_attr;
+
+ entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY);
+ if (!entry_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name))
+ goto err;
+ if (nla_put_u64_64bit(msg,
+ RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr, 0))
+ goto err;
+
+ nla_nest_end(msg, entry_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, entry_attr);
+ return -EMSGSIZE;
+}
+
+static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
+{
+ static const char * const names[RDMA_RESTRACK_MAX] = {
+ [RDMA_RESTRACK_PD] = "pd",
+ [RDMA_RESTRACK_CQ] = "cq",
+ [RDMA_RESTRACK_QP] = "qp",
+ [RDMA_RESTRACK_CM_ID] = "cm_id",
+ [RDMA_RESTRACK_MR] = "mr",
+ };
+
+ struct rdma_restrack_root *res = &device->res;
+ struct nlattr *table_attr;
+ int ret, i, curr;
+
+ if (fill_nldev_handle(msg, device))
+ return -EMSGSIZE;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_SUMMARY);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
+ if (!names[i])
+ continue;
+ curr = rdma_restrack_count(res, i, task_active_pid_ns(current));
+ ret = fill_res_info_entry(msg, names[i], curr);
+ if (ret)
+ goto err;
+ }
+
+ nla_nest_end(msg, table_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, table_attr);
+ return ret;
+}
+
+static int fill_res_name_pid(struct sk_buff *msg,
+ struct rdma_restrack_entry *res)
+{
+ /*
+ * For user resources, user is should read /proc/PID/comm to get the
+ * name of the task file.
+ */
+ if (rdma_is_kernel_res(res)) {
+ if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
+ res->kern_name))
+ return -EMSGSIZE;
+ } else {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID,
+ task_pid_vnr(res->task)))
+ return -EMSGSIZE;
+ }
+ return 0;
+}
+
+static int fill_res_qp_entry(struct sk_buff *msg, struct netlink_callback *cb,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_qp *qp = container_of(res, struct ib_qp, res);
+ struct ib_qp_init_attr qp_init_attr;
+ struct nlattr *entry_attr;
+ struct ib_qp_attr qp_attr;
+ int ret;
+
+ ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr);
+ if (ret)
+ return ret;
+
+ if (port && port != qp_attr.port_num)
+ return 0;
+
+ entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
+ if (!entry_attr)
+ goto out;
+
+ /* In create_qp() port is not set yet */
+ if (qp_attr.port_num &&
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp_attr.port_num))
+ goto err;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num))
+ goto err;
+ if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
+ qp_attr.dest_qp_num))
+ goto err;
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN,
+ qp_attr.rq_psn))
+ goto err;
+ }
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn))
+ goto err;
+
+ if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC ||
+ qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) {
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE,
+ qp_attr.path_mig_state))
+ goto err;
+ }
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type))
+ goto err;
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
+ goto err;
+
+ if (fill_res_name_pid(msg, res))
+ goto err;
+
+ nla_nest_end(msg, entry_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, entry_attr);
+out:
+ return -EMSGSIZE;
+}
+
+static int fill_res_cm_id_entry(struct sk_buff *msg,
+ struct netlink_callback *cb,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct rdma_id_private *id_priv =
+ container_of(res, struct rdma_id_private, res);
+ struct rdma_cm_id *cm_id = &id_priv->id;
+ struct nlattr *entry_attr;
+
+ if (port && port != cm_id->port_num)
+ return 0;
+
+ entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY);
+ if (!entry_attr)
+ goto out;
+
+ if (cm_id->port_num &&
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
+ goto err;
+
+ if (id_priv->qp_num) {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num))
+ goto err;
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
+ goto err;
+ }
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
+ goto err;
+
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
+ goto err;
+
+ if (cm_id->route.addr.src_addr.ss_family &&
+ nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR,
+ sizeof(cm_id->route.addr.src_addr),
+ &cm_id->route.addr.src_addr))
+ goto err;
+ if (cm_id->route.addr.dst_addr.ss_family &&
+ nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR,
+ sizeof(cm_id->route.addr.dst_addr),
+ &cm_id->route.addr.dst_addr))
+ goto err;
+
+ if (fill_res_name_pid(msg, res))
+ goto err;
+
+ nla_nest_end(msg, entry_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, entry_attr);
+out:
+ return -EMSGSIZE;
+}
+
+static int fill_res_cq_entry(struct sk_buff *msg, struct netlink_callback *cb,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_cq *cq = container_of(res, struct ib_cq, res);
+ struct nlattr *entry_attr;
+
+ entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CQ_ENTRY);
+ if (!entry_attr)
+ goto out;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
+ goto err;
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
+ atomic_read(&cq->usecnt), 0))
+ goto err;
+
+ /* Poll context is only valid for kernel CQs */
+ if (rdma_is_kernel_res(res) &&
+ nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
+ goto err;
+
+ if (fill_res_name_pid(msg, res))
+ goto err;
+
+ nla_nest_end(msg, entry_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, entry_attr);
+out:
+ return -EMSGSIZE;
+}
+
+static int fill_res_mr_entry(struct sk_buff *msg, struct netlink_callback *cb,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_mr *mr = container_of(res, struct ib_mr, res);
+ struct nlattr *entry_attr;
+
+ entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_MR_ENTRY);
+ if (!entry_attr)
+ goto out;
+
+ if (netlink_capable(cb->skb, CAP_NET_ADMIN)) {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
+ goto err;
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
+ goto err;
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_IOVA,
+ mr->iova, 0))
+ goto err;
+ }
+
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length, 0))
+ goto err;
+
+ if (fill_res_name_pid(msg, res))
+ goto err;
+
+ nla_nest_end(msg, entry_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, entry_attr);
+out:
+ return -EMSGSIZE;
+}
+
+static int fill_res_pd_entry(struct sk_buff *msg, struct netlink_callback *cb,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_pd *pd = container_of(res, struct ib_pd, res);
+ struct nlattr *entry_attr;
+
+ entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_PD_ENTRY);
+ if (!entry_attr)
+ goto out;
+
+ if (netlink_capable(cb->skb, CAP_NET_ADMIN)) {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
+ pd->local_dma_lkey))
+ goto err;
+ if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
+ pd->unsafe_global_rkey))
+ goto err;
+ }
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
+ atomic_read(&pd->usecnt), 0))
+ goto err;
+ if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
+ pd->unsafe_global_rkey))
+ goto err;
+
+ if (fill_res_name_pid(msg, res))
+ goto err;
+
+ nla_nest_end(msg, entry_attr);
return 0;
+
+err:
+ nla_nest_cancel(msg, entry_attr);
+out:
+ return -EMSGSIZE;
}
static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -247,7 +624,7 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
0, 0);
- err = fill_port_info(msg, device, port);
+ err = fill_port_info(msg, device, port, sock_net(skb->sk));
if (err)
goto err_free;
@@ -307,7 +684,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb,
RDMA_NLDEV_CMD_PORT_GET),
0, NLM_F_MULTI);
- if (fill_port_info(skb, device, p)) {
+ if (fill_port_info(skb, device, p, sock_net(skb->sk))) {
nlmsg_cancel(skb, nlh);
goto out;
}
@@ -321,6 +698,284 @@ out:
return skb->len;
}
+static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ struct ib_device *device;
+ struct sk_buff *msg;
+ u32 index;
+ int ret;
+
+ ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, extack);
+ if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
+ return -EINVAL;
+
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(index);
+ if (!device)
+ return -EINVAL;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
+ 0, 0);
+
+ ret = fill_res_info(msg, device);
+ if (ret)
+ goto err_free;
+
+ nlmsg_end(msg, nlh);
+ put_device(&device->dev);
+ return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+
+err_free:
+ nlmsg_free(msg);
+err:
+ put_device(&device->dev);
+ return ret;
+}
+
+static int _nldev_res_get_dumpit(struct ib_device *device,
+ struct sk_buff *skb,
+ struct netlink_callback *cb,
+ unsigned int idx)
+{
+ int start = cb->args[0];
+ struct nlmsghdr *nlh;
+
+ if (idx < start)
+ return 0;
+
+ nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
+ 0, NLM_F_MULTI);
+
+ if (fill_res_info(skb, device)) {
+ nlmsg_cancel(skb, nlh);
+ goto out;
+ }
+
+ nlmsg_end(skb, nlh);
+
+ idx++;
+
+out:
+ cb->args[0] = idx;
+ return skb->len;
+}
+
+static int nldev_res_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
+}
+
+struct nldev_fill_res_entry {
+ int (*fill_res_func)(struct sk_buff *msg, struct netlink_callback *cb,
+ struct rdma_restrack_entry *res, u32 port);
+ enum rdma_nldev_attr nldev_attr;
+ enum rdma_nldev_command nldev_cmd;
+};
+
+static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
+ [RDMA_RESTRACK_QP] = {
+ .fill_res_func = fill_res_qp_entry,
+ .nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET,
+ .nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
+ },
+ [RDMA_RESTRACK_CM_ID] = {
+ .fill_res_func = fill_res_cm_id_entry,
+ .nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET,
+ .nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
+ },
+ [RDMA_RESTRACK_CQ] = {
+ .fill_res_func = fill_res_cq_entry,
+ .nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET,
+ .nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
+ },
+ [RDMA_RESTRACK_MR] = {
+ .fill_res_func = fill_res_mr_entry,
+ .nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET,
+ .nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
+ },
+ [RDMA_RESTRACK_PD] = {
+ .fill_res_func = fill_res_pd_entry,
+ .nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET,
+ .nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
+ },
+};
+
+static int res_get_common_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb,
+ enum rdma_restrack_type res_type)
+{
+ const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ struct rdma_restrack_entry *res;
+ int err, ret = 0, idx = 0;
+ struct nlattr *table_attr;
+ struct ib_device *device;
+ int start = cb->args[0];
+ struct nlmsghdr *nlh;
+ u32 index, port = 0;
+ bool filled = false;
+
+ err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, NULL);
+ /*
+ * Right now, we are expecting the device index to get res information,
+ * but it is possible to extend this code to return all devices in
+ * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
+ * if it doesn't exist, we will iterate over all devices.
+ *
+ * But it is not needed for now.
+ */
+ if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
+ return -EINVAL;
+
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(index);
+ if (!device)
+ return -EINVAL;
+
+ /*
+ * If no PORT_INDEX is supplied, we will return all QPs from that device
+ */
+ if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
+ port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+ if (!rdma_is_port_valid(device, port)) {
+ ret = -EINVAL;
+ goto err_index;
+ }
+ }
+
+ nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
+ 0, NLM_F_MULTI);
+
+ if (fill_nldev_handle(skb, device)) {
+ ret = -EMSGSIZE;
+ goto err;
+ }
+
+ table_attr = nla_nest_start(skb, fe->nldev_attr);
+ if (!table_attr) {
+ ret = -EMSGSIZE;
+ goto err;
+ }
+
+ down_read(&device->res.rwsem);
+ hash_for_each_possible(device->res.hash, res, node, res_type) {
+ if (idx < start)
+ goto next;
+
+ if ((rdma_is_kernel_res(res) &&
+ task_active_pid_ns(current) != &init_pid_ns) ||
+ (!rdma_is_kernel_res(res) && task_active_pid_ns(current) !=
+ task_active_pid_ns(res->task)))
+ /*
+ * 1. Kern resources should be visible in init
+ * namspace only
+ * 2. Present only resources visible in the current
+ * namespace
+ */
+ goto next;
+
+ if (!rdma_restrack_get(res))
+ /*
+ * Resource is under release now, but we are not
+ * relesing lock now, so it will be released in
+ * our next pass, once we will get ->next pointer.
+ */
+ goto next;
+
+ filled = true;
+
+ up_read(&device->res.rwsem);
+ ret = fe->fill_res_func(skb, cb, res, port);
+ down_read(&device->res.rwsem);
+ /*
+ * Return resource back, but it won't be released till
+ * the &device->res.rwsem will be released for write.
+ */
+ rdma_restrack_put(res);
+
+ if (ret == -EMSGSIZE)
+ /*
+ * There is a chance to optimize here.
+ * It can be done by using list_prepare_entry
+ * and list_for_each_entry_continue afterwards.
+ */
+ break;
+ if (ret)
+ goto res_err;
+next: idx++;
+ }
+ up_read(&device->res.rwsem);
+
+ nla_nest_end(skb, table_attr);
+ nlmsg_end(skb, nlh);
+ cb->args[0] = idx;
+
+ /*
+ * No more entries to fill, cancel the message and
+ * return 0 to mark end of dumpit.
+ */
+ if (!filled)
+ goto err;
+
+ put_device(&device->dev);
+ return skb->len;
+
+res_err:
+ nla_nest_cancel(skb, table_attr);
+ up_read(&device->res.rwsem);
+
+err:
+ nlmsg_cancel(skb, nlh);
+
+err_index:
+ put_device(&device->dev);
+ return ret;
+}
+
+static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_QP);
+}
+
+static int nldev_res_get_cm_id_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_CM_ID);
+}
+
+static int nldev_res_get_cq_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_CQ);
+}
+
+static int nldev_res_get_mr_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR);
+}
+
+static int nldev_res_get_pd_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_PD);
+}
+
static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
[RDMA_NLDEV_CMD_GET] = {
.doit = nldev_get_doit,
@@ -330,6 +985,35 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
.doit = nldev_port_get_doit,
.dump = nldev_port_get_dumpit,
},
+ [RDMA_NLDEV_CMD_RES_GET] = {
+ .doit = nldev_res_get_doit,
+ .dump = nldev_res_get_dumpit,
+ },
+ [RDMA_NLDEV_CMD_RES_QP_GET] = {
+ .dump = nldev_res_get_qp_dumpit,
+ /*
+ * .doit is not implemented yet for two reasons:
+ * 1. It is not needed yet.
+ * 2. There is a need to provide identifier, while it is easy
+ * for the QPs (device index + port index + LQPN), it is not
+ * the case for the rest of resources (PD and CQ). Because it
+ * is better to provide similar interface for all resources,
+ * let's wait till we will have other resources implemented
+ * too.
+ */
+ },
+ [RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
+ .dump = nldev_res_get_cm_id_dumpit,
+ },
+ [RDMA_NLDEV_CMD_RES_CQ_GET] = {
+ .dump = nldev_res_get_cq_dumpit,
+ },
+ [RDMA_NLDEV_CMD_RES_MR_GET] = {
+ .dump = nldev_res_get_mr_dumpit,
+ },
+ [RDMA_NLDEV_CMD_RES_PD_GET] = {
+ .dump = nldev_res_get_pd_dumpit,
+ },
};
void __init nldev_init(void)
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index d8eead5d106d..a6e904973ba8 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -350,13 +350,6 @@ struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type,
return type->type_class->alloc_begin(type, ucontext);
}
-static void uverbs_uobject_add(struct ib_uobject *uobject)
-{
- mutex_lock(&uobject->context->uobjects_lock);
- list_add(&uobject->list, &uobject->context->uobjects);
- mutex_unlock(&uobject->context->uobjects_lock);
-}
-
static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj,
enum rdma_remove_reason why)
{
@@ -502,7 +495,6 @@ out:
static void alloc_commit_idr_uobject(struct ib_uobject *uobj)
{
- uverbs_uobject_add(uobj);
spin_lock(&uobj->context->ufile->idr_lock);
/*
* We already allocated this IDR with a NULL object, so
@@ -518,7 +510,6 @@ static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
struct ib_uobject_file *uobj_file =
container_of(uobj, struct ib_uobject_file, uobj);
- uverbs_uobject_add(&uobj_file->uobj);
fd_install(uobj_file->uobj.id, uobj->object);
/* This shouldn't be used anymore. Use the file object instead */
uobj_file->uobj.id = 0;
@@ -545,6 +536,10 @@ int rdma_alloc_commit_uobject(struct ib_uobject *uobj)
assert_uverbs_usecnt(uobj, true);
atomic_set(&uobj->usecnt, 0);
+ mutex_lock(&uobj->context->uobjects_lock);
+ list_add(&uobj->list, &uobj->context->uobjects);
+ mutex_unlock(&uobj->context->uobjects_lock);
+
uobj->type->type_class->alloc_commit(uobj);
up_read(&uobj->context->cleanup_rwsem);
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
new file mode 100644
index 000000000000..efddd13e3edb
--- /dev/null
+++ b/drivers/infiniband/core/restrack.c
@@ -0,0 +1,222 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ */
+
+#include <rdma/rdma_cm.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/restrack.h>
+#include <linux/mutex.h>
+#include <linux/sched/task.h>
+#include <linux/pid_namespace.h>
+
+#include "cma_priv.h"
+
+void rdma_restrack_init(struct rdma_restrack_root *res)
+{
+ init_rwsem(&res->rwsem);
+}
+
+static const char *type2str(enum rdma_restrack_type type)
+{
+ static const char * const names[RDMA_RESTRACK_MAX] = {
+ [RDMA_RESTRACK_PD] = "PD",
+ [RDMA_RESTRACK_CQ] = "CQ",
+ [RDMA_RESTRACK_QP] = "QP",
+ [RDMA_RESTRACK_CM_ID] = "CM_ID",
+ [RDMA_RESTRACK_MR] = "MR",
+ };
+
+ return names[type];
+};
+
+void rdma_restrack_clean(struct rdma_restrack_root *res)
+{
+ struct rdma_restrack_entry *e;
+ char buf[TASK_COMM_LEN];
+ struct ib_device *dev;
+ const char *owner;
+ int bkt;
+
+ if (hash_empty(res->hash))
+ return;
+
+ dev = container_of(res, struct ib_device, res);
+ pr_err("restrack: %s", CUT_HERE);
+ pr_err("restrack: BUG: RESTRACK detected leak of resources on %s\n",
+ dev->name);
+ hash_for_each(res->hash, bkt, e, node) {
+ if (rdma_is_kernel_res(e)) {
+ owner = e->kern_name;
+ } else {
+ /*
+ * There is no need to call get_task_struct here,
+ * because we can be here only if there are more
+ * get_task_struct() call than put_task_struct().
+ */
+ get_task_comm(buf, e->task);
+ owner = buf;
+ }
+
+ pr_err("restrack: %s %s object allocated by %s is not freed\n",
+ rdma_is_kernel_res(e) ? "Kernel" : "User",
+ type2str(e->type), owner);
+ }
+ pr_err("restrack: %s", CUT_HERE);
+}
+
+int rdma_restrack_count(struct rdma_restrack_root *res,
+ enum rdma_restrack_type type,
+ struct pid_namespace *ns)
+{
+ struct rdma_restrack_entry *e;
+ u32 cnt = 0;
+
+ down_read(&res->rwsem);
+ hash_for_each_possible(res->hash, e, node, type) {
+ if (ns == &init_pid_ns ||
+ (!rdma_is_kernel_res(e) &&
+ ns == task_active_pid_ns(e->task)))
+ cnt++;
+ }
+ up_read(&res->rwsem);
+ return cnt;
+}
+EXPORT_SYMBOL(rdma_restrack_count);
+
+static void set_kern_name(struct rdma_restrack_entry *res)
+{
+ struct ib_pd *pd;
+
+ switch (res->type) {
+ case RDMA_RESTRACK_QP:
+ pd = container_of(res, struct ib_qp, res)->pd;
+ if (!pd) {
+ WARN_ONCE(true, "XRC QPs are not supported\n");
+ /* Survive, despite the programmer's error */
+ res->kern_name = " ";
+ }
+ break;
+ case RDMA_RESTRACK_MR:
+ pd = container_of(res, struct ib_mr, res)->pd;
+ break;
+ default:
+ /* Other types set kern_name directly */
+ pd = NULL;
+ break;
+ }
+
+ if (pd)
+ res->kern_name = pd->res.kern_name;
+}
+
+static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
+{
+ switch (res->type) {
+ case RDMA_RESTRACK_PD:
+ return container_of(res, struct ib_pd, res)->device;
+ case RDMA_RESTRACK_CQ:
+ return container_of(res, struct ib_cq, res)->device;
+ case RDMA_RESTRACK_QP:
+ return container_of(res, struct ib_qp, res)->device;
+ case RDMA_RESTRACK_CM_ID:
+ return container_of(res, struct rdma_id_private,
+ res)->id.device;
+ case RDMA_RESTRACK_MR:
+ return container_of(res, struct ib_mr, res)->device;
+ default:
+ WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type);
+ return NULL;
+ }
+}
+
+static bool res_is_user(struct rdma_restrack_entry *res)
+{
+ switch (res->type) {
+ case RDMA_RESTRACK_PD:
+ return container_of(res, struct ib_pd, res)->uobject;
+ case RDMA_RESTRACK_CQ:
+ return container_of(res, struct ib_cq, res)->uobject;
+ case RDMA_RESTRACK_QP:
+ return container_of(res, struct ib_qp, res)->uobject;
+ case RDMA_RESTRACK_CM_ID:
+ return !res->kern_name;
+ case RDMA_RESTRACK_MR:
+ return container_of(res, struct ib_mr, res)->pd->uobject;
+ default:
+ WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type);
+ return false;
+ }
+}
+
+void rdma_restrack_add(struct rdma_restrack_entry *res)
+{
+ struct ib_device *dev = res_to_dev(res);
+
+ if (!dev)
+ return;
+
+ if (res->type != RDMA_RESTRACK_CM_ID || !res_is_user(res))
+ res->task = NULL;
+
+ if (res_is_user(res)) {
+ if (!res->task)
+ rdma_restrack_set_task(res, current);
+ res->kern_name = NULL;
+ } else {
+ set_kern_name(res);
+ }
+
+ kref_init(&res->kref);
+ init_completion(&res->comp);
+ res->valid = true;
+
+ down_write(&dev->res.rwsem);
+ hash_add(dev->res.hash, &res->node, res->type);
+ up_write(&dev->res.rwsem);
+}
+EXPORT_SYMBOL(rdma_restrack_add);
+
+int __must_check rdma_restrack_get(struct rdma_restrack_entry *res)
+{
+ return kref_get_unless_zero(&res->kref);
+}
+EXPORT_SYMBOL(rdma_restrack_get);
+
+static void restrack_release(struct kref *kref)
+{
+ struct rdma_restrack_entry *res;
+
+ res = container_of(kref, struct rdma_restrack_entry, kref);
+ complete(&res->comp);
+}
+
+int rdma_restrack_put(struct rdma_restrack_entry *res)
+{
+ return kref_put(&res->kref, restrack_release);
+}
+EXPORT_SYMBOL(rdma_restrack_put);
+
+void rdma_restrack_del(struct rdma_restrack_entry *res)
+{
+ struct ib_device *dev;
+
+ if (!res->valid)
+ return;
+
+ dev = res_to_dev(res);
+ if (!dev)
+ return;
+
+ rdma_restrack_put(res);
+
+ wait_for_completion(&res->comp);
+
+ down_write(&dev->res.rwsem);
+ hash_del(&res->node);
+ res->valid = false;
+ if (res->task)
+ put_task_struct(res->task);
+ up_write(&dev->res.rwsem);
+}
+EXPORT_SYMBOL(rdma_restrack_del);
diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c
index 94a9eefb3cfc..ccaa3cc17fb5 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -253,6 +253,7 @@ static void bond_delete_netdev_default_gids(struct ib_device *ib_dev,
struct net_device *rdma_ndev)
{
struct net_device *real_dev = rdma_vlan_dev_real_dev(event_ndev);
+ unsigned long gid_type_mask;
if (!rdma_ndev)
return;
@@ -262,21 +263,22 @@ static void bond_delete_netdev_default_gids(struct ib_device *ib_dev,
rcu_read_lock();
- if (rdma_is_upper_dev_rcu(rdma_ndev, event_ndev) &&
- is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) ==
- BONDING_SLAVE_STATE_INACTIVE) {
- unsigned long gid_type_mask;
-
+ if (((rdma_ndev != event_ndev &&
+ !rdma_is_upper_dev_rcu(rdma_ndev, event_ndev)) ||
+ is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev)
+ ==
+ BONDING_SLAVE_STATE_INACTIVE)) {
rcu_read_unlock();
+ return;
+ }
- gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+ rcu_read_unlock();
- ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev,
- gid_type_mask,
- IB_CACHE_GID_DEFAULT_MODE_DELETE);
- } else {
- rcu_read_unlock();
- }
+ gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+
+ ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev,
+ gid_type_mask,
+ IB_CACHE_GID_DEFAULT_MODE_DELETE);
}
static void enum_netdev_ipv4_ips(struct ib_device *ib_dev,
@@ -408,15 +410,18 @@ static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev,
rtnl_unlock();
}
-/* This function will rescan all of the network devices in the system
- * and add their gids, as needed, to the relevant RoCE devices. */
-int roce_rescan_device(struct ib_device *ib_dev)
+/**
+ * rdma_roce_rescan_device - Rescan all of the network devices in the system
+ * and add their gids, as needed, to the relevant RoCE devices.
+ *
+ * @device: the rdma device
+ */
+void rdma_roce_rescan_device(struct ib_device *ib_dev)
{
ib_enum_roce_netdev(ib_dev, pass_all_filter, NULL,
enum_all_gids_of_dev_cb, NULL);
-
- return 0;
}
+EXPORT_SYMBOL(rdma_roce_rescan_device);
static void callback_for_addr_gid_device_scan(struct ib_device *device,
u8 port,
diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c
index 9939dcfb1b6a..3ee0adfb45e9 100644
--- a/drivers/infiniband/core/rw.c
+++ b/drivers/infiniband/core/rw.c
@@ -384,21 +384,17 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
count += ret;
prev_wr = &ctx->sig->data.reg_wr.wr;
- if (prot_sg_cnt) {
- ret = rdma_rw_init_one_mr(qp, port_num, &ctx->sig->prot,
- prot_sg, prot_sg_cnt, 0);
- if (ret < 0)
- goto out_destroy_data_mr;
- count += ret;
+ ret = rdma_rw_init_one_mr(qp, port_num, &ctx->sig->prot,
+ prot_sg, prot_sg_cnt, 0);
+ if (ret < 0)
+ goto out_destroy_data_mr;
+ count += ret;
- if (ctx->sig->prot.inv_wr.next)
- prev_wr->next = &ctx->sig->prot.inv_wr;
- else
- prev_wr->next = &ctx->sig->prot.reg_wr.wr;
- prev_wr = &ctx->sig->prot.reg_wr.wr;
- } else {
- ctx->sig->prot.mr = NULL;
- }
+ if (ctx->sig->prot.inv_wr.next)
+ prev_wr->next = &ctx->sig->prot.inv_wr;
+ else
+ prev_wr->next = &ctx->sig->prot.reg_wr.wr;
+ prev_wr = &ctx->sig->prot.reg_wr.wr;
ctx->sig->sig_mr = ib_mr_pool_get(qp, &qp->sig_mrs);
if (!ctx->sig->sig_mr) {
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index d2d57dc6eb2c..a61ec7e33613 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1227,120 +1227,132 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
return src_path_mask;
}
-int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
- struct sa_path_rec *rec,
- struct rdma_ah_attr *ah_attr)
+static int
+roce_resolve_route_from_path(struct ib_device *device, u8 port_num,
+ struct sa_path_rec *rec)
{
+ struct net_device *resolved_dev;
+ struct net_device *ndev;
+ struct net_device *idev;
+ struct rdma_dev_addr dev_addr = {
+ .bound_dev_if = ((sa_path_get_ifindex(rec) >= 0) ?
+ sa_path_get_ifindex(rec) : 0),
+ .net = sa_path_get_ndev(rec) ?
+ sa_path_get_ndev(rec) :
+ &init_net
+ };
+ union {
+ struct sockaddr _sockaddr;
+ struct sockaddr_in _sockaddr_in;
+ struct sockaddr_in6 _sockaddr_in6;
+ } sgid_addr, dgid_addr;
int ret;
- u16 gid_index;
- int use_roce;
- struct net_device *ndev = NULL;
- memset(ah_attr, 0, sizeof *ah_attr);
- ah_attr->type = rdma_ah_find_type(device, port_num);
+ if (rec->roce.route_resolved)
+ return 0;
- rdma_ah_set_dlid(ah_attr, be32_to_cpu(sa_path_get_dlid(rec)));
+ if (!device->get_netdev)
+ return -EOPNOTSUPP;
- if ((ah_attr->type == RDMA_AH_ATTR_TYPE_OPA) &&
- (rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE)))
- rdma_ah_set_make_grd(ah_attr, true);
+ rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid);
+ rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid);
- rdma_ah_set_sl(ah_attr, rec->sl);
- rdma_ah_set_path_bits(ah_attr, be32_to_cpu(sa_path_get_slid(rec)) &
- get_src_path_mask(device, port_num));
- rdma_ah_set_port_num(ah_attr, port_num);
- rdma_ah_set_static_rate(ah_attr, rec->rate);
- use_roce = rdma_cap_eth_ah(device, port_num);
-
- if (use_roce) {
- struct net_device *idev;
- struct net_device *resolved_dev;
- struct rdma_dev_addr dev_addr = {
- .bound_dev_if = ((sa_path_get_ifindex(rec) >= 0) ?
- sa_path_get_ifindex(rec) : 0),
- .net = sa_path_get_ndev(rec) ?
- sa_path_get_ndev(rec) :
- &init_net
- };
- union {
- struct sockaddr _sockaddr;
- struct sockaddr_in _sockaddr_in;
- struct sockaddr_in6 _sockaddr_in6;
- } sgid_addr, dgid_addr;
-
- if (!device->get_netdev)
- return -EOPNOTSUPP;
-
- rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid);
- rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid);
-
- /* validate the route */
- ret = rdma_resolve_ip_route(&sgid_addr._sockaddr,
- &dgid_addr._sockaddr, &dev_addr);
- if (ret)
- return ret;
+ /* validate the route */
+ ret = rdma_resolve_ip_route(&sgid_addr._sockaddr,
+ &dgid_addr._sockaddr, &dev_addr);
+ if (ret)
+ return ret;
- if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
- dev_addr.network == RDMA_NETWORK_IPV6) &&
- rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2)
- return -EINVAL;
+ if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
+ dev_addr.network == RDMA_NETWORK_IPV6) &&
+ rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2)
+ return -EINVAL;
- idev = device->get_netdev(device, port_num);
- if (!idev)
- return -ENODEV;
+ idev = device->get_netdev(device, port_num);
+ if (!idev)
+ return -ENODEV;
- resolved_dev = dev_get_by_index(dev_addr.net,
- dev_addr.bound_dev_if);
- if (!resolved_dev) {
- dev_put(idev);
- return -ENODEV;
- }
- ndev = ib_get_ndev_from_path(rec);
- rcu_read_lock();
- if ((ndev && ndev != resolved_dev) ||
- (resolved_dev != idev &&
- !rdma_is_upper_dev_rcu(idev, resolved_dev)))
- ret = -EHOSTUNREACH;
- rcu_read_unlock();
- dev_put(idev);
- dev_put(resolved_dev);
- if (ret) {
- if (ndev)
- dev_put(ndev);
- return ret;
- }
+ resolved_dev = dev_get_by_index(dev_addr.net,
+ dev_addr.bound_dev_if);
+ if (!resolved_dev) {
+ ret = -ENODEV;
+ goto done;
}
+ ndev = ib_get_ndev_from_path(rec);
+ rcu_read_lock();
+ if ((ndev && ndev != resolved_dev) ||
+ (resolved_dev != idev &&
+ !rdma_is_upper_dev_rcu(idev, resolved_dev)))
+ ret = -EHOSTUNREACH;
+ rcu_read_unlock();
+ dev_put(resolved_dev);
+ if (ndev)
+ dev_put(ndev);
+done:
+ dev_put(idev);
+ if (!ret)
+ rec->roce.route_resolved = true;
+ return ret;
+}
- if (rec->hop_limit > 0 || use_roce) {
- enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec);
+static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num,
+ struct sa_path_rec *rec,
+ struct rdma_ah_attr *ah_attr)
+{
+ enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec);
+ struct net_device *ndev;
+ u16 gid_index;
+ int ret;
- ret = ib_find_cached_gid_by_port(device, &rec->sgid, type,
- port_num, ndev, &gid_index);
- if (ret) {
- if (ndev)
- dev_put(ndev);
- return ret;
- }
+ ndev = ib_get_ndev_from_path(rec);
+ ret = ib_find_cached_gid_by_port(device, &rec->sgid, type,
+ port_num, ndev, &gid_index);
+ if (ndev)
+ dev_put(ndev);
+ if (ret)
+ return ret;
- rdma_ah_set_grh(ah_attr, &rec->dgid,
- be32_to_cpu(rec->flow_label),
- gid_index, rec->hop_limit,
- rec->traffic_class);
- if (ndev)
- dev_put(ndev);
- }
+ rdma_ah_set_grh(ah_attr, &rec->dgid,
+ be32_to_cpu(rec->flow_label),
+ gid_index, rec->hop_limit,
+ rec->traffic_class);
+ return 0;
+}
+
+int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
+ struct sa_path_rec *rec,
+ struct rdma_ah_attr *ah_attr)
+{
+ int ret = 0;
- if (use_roce) {
- u8 *dmac = sa_path_get_dmac(rec);
+ memset(ah_attr, 0, sizeof(*ah_attr));
+ ah_attr->type = rdma_ah_find_type(device, port_num);
+ rdma_ah_set_sl(ah_attr, rec->sl);
+ rdma_ah_set_port_num(ah_attr, port_num);
+ rdma_ah_set_static_rate(ah_attr, rec->rate);
- if (!dmac)
- return -EINVAL;
- memcpy(ah_attr->roce.dmac, dmac, ETH_ALEN);
+ if (sa_path_is_roce(rec)) {
+ ret = roce_resolve_route_from_path(device, port_num, rec);
+ if (ret)
+ return ret;
+
+ memcpy(ah_attr->roce.dmac, sa_path_get_dmac(rec), ETH_ALEN);
+ } else {
+ rdma_ah_set_dlid(ah_attr, be32_to_cpu(sa_path_get_dlid(rec)));
+ if (sa_path_is_opa(rec) &&
+ rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE))
+ rdma_ah_set_make_grd(ah_attr, true);
+
+ rdma_ah_set_path_bits(ah_attr,
+ be32_to_cpu(sa_path_get_slid(rec)) &
+ get_src_path_mask(device, port_num));
}
- return 0;
+ if (rec->hop_limit > 0 || sa_path_is_roce(rec))
+ ret = init_ah_attr_grh_fields(device, port_num, rec, ah_attr);
+ return ret;
}
-EXPORT_SYMBOL(ib_init_ah_from_path);
+EXPORT_SYMBOL(ib_init_ah_attr_from_path);
static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask)
{
diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c
index 59b2f96d986a..b61dda6b04fc 100644
--- a/drivers/infiniband/core/security.c
+++ b/drivers/infiniband/core/security.c
@@ -653,12 +653,11 @@ int ib_security_modify_qp(struct ib_qp *qp,
}
return ret;
}
-EXPORT_SYMBOL(ib_security_modify_qp);
-int ib_security_pkey_access(struct ib_device *dev,
- u8 port_num,
- u16 pkey_index,
- void *sec)
+static int ib_security_pkey_access(struct ib_device *dev,
+ u8 port_num,
+ u16 pkey_index,
+ void *sec)
{
u64 subnet_prefix;
u16 pkey;
@@ -678,7 +677,6 @@ int ib_security_pkey_access(struct ib_device *dev,
return security_ib_pkey_access(sec, subnet_prefix, pkey);
}
-EXPORT_SYMBOL(ib_security_pkey_access);
static int ib_mad_agent_security_change(struct notifier_block *nb,
unsigned long event,
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 8ae1308eecc7..31c7efaf8e7a 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -273,6 +273,7 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
break;
case IB_SPEED_SDR:
default: /* default to SDR for invalid rates */
+ speed = " SDR";
rate = 25;
break;
}
@@ -388,14 +389,26 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
+ union ib_gid *pgid;
union ib_gid gid;
ssize_t ret;
ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid, NULL);
- if (ret)
- return ret;
- return sprintf(buf, "%pI6\n", gid.raw);
+ /* If reading GID fails, it is likely due to GID entry being empty
+ * (invalid) or reserved GID in the table.
+ * User space expects to read GID table entries as long as it given
+ * index is within GID table size.
+ * Administrative/debugging tool fails to query rest of the GID entries
+ * if it hits error while querying a GID of the given index.
+ * To avoid user space throwing such error on fail to read gid, return
+ * zero GID as before. This maintains backward compatibility.
+ */
+ if (ret)
+ pgid = &zgid;
+ else
+ pgid = &gid;
+ return sprintf(buf, "%pI6\n", pgid->raw);
}
static ssize_t show_port_gid_attr_ndev(struct ib_port *p,
@@ -810,10 +823,15 @@ static ssize_t show_hw_stats(struct kobject *kobj, struct attribute *attr,
dev = port->ibdev;
stats = port->hw_stats;
}
+ mutex_lock(&stats->lock);
ret = update_hw_stats(dev, stats, hsa->port_num, hsa->index);
if (ret)
- return ret;
- return print_hw_stat(stats, hsa->index, buf);
+ goto unlock;
+ ret = print_hw_stat(stats, hsa->index, buf);
+unlock:
+ mutex_unlock(&stats->lock);
+
+ return ret;
}
static ssize_t show_stats_lifespan(struct kobject *kobj,
@@ -821,17 +839,25 @@ static ssize_t show_stats_lifespan(struct kobject *kobj,
char *buf)
{
struct hw_stats_attribute *hsa;
+ struct rdma_hw_stats *stats;
int msecs;
hsa = container_of(attr, struct hw_stats_attribute, attr);
if (!hsa->port_num) {
struct ib_device *dev = container_of((struct device *)kobj,
struct ib_device, dev);
- msecs = jiffies_to_msecs(dev->hw_stats->lifespan);
+
+ stats = dev->hw_stats;
} else {
struct ib_port *p = container_of(kobj, struct ib_port, kobj);
- msecs = jiffies_to_msecs(p->hw_stats->lifespan);
+
+ stats = p->hw_stats;
}
+
+ mutex_lock(&stats->lock);
+ msecs = jiffies_to_msecs(stats->lifespan);
+ mutex_unlock(&stats->lock);
+
return sprintf(buf, "%d\n", msecs);
}
@@ -840,6 +866,7 @@ static ssize_t set_stats_lifespan(struct kobject *kobj,
const char *buf, size_t count)
{
struct hw_stats_attribute *hsa;
+ struct rdma_hw_stats *stats;
int msecs;
int jiffies;
int ret;
@@ -854,11 +881,18 @@ static ssize_t set_stats_lifespan(struct kobject *kobj,
if (!hsa->port_num) {
struct ib_device *dev = container_of((struct device *)kobj,
struct ib_device, dev);
- dev->hw_stats->lifespan = jiffies;
+
+ stats = dev->hw_stats;
} else {
struct ib_port *p = container_of(kobj, struct ib_port, kobj);
- p->hw_stats->lifespan = jiffies;
+
+ stats = p->hw_stats;
}
+
+ mutex_lock(&stats->lock);
+ stats->lifespan = jiffies;
+ mutex_unlock(&stats->lock);
+
return count;
}
@@ -951,6 +985,7 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port,
sysfs_attr_init(hsag->attrs[i]);
}
+ mutex_init(&stats->lock);
/* treat an error here as non-fatal */
hsag->attrs[i] = alloc_hsa_lifespan("lifespan", port_num);
if (hsag->attrs[i])
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index f2a7f62c2834..685b4c95aa58 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -53,6 +53,8 @@
#include <rdma/ib_user_cm.h>
#include <rdma/ib_marshall.h>
+#include "core_priv.h"
+
MODULE_AUTHOR("Libor Michalek");
MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access");
MODULE_LICENSE("Dual BSD/GPL");
@@ -104,10 +106,13 @@ struct ib_ucm_event {
enum {
IB_UCM_MAJOR = 231,
IB_UCM_BASE_MINOR = 224,
- IB_UCM_MAX_DEVICES = 32
+ IB_UCM_MAX_DEVICES = RDMA_MAX_PORTS,
+ IB_UCM_NUM_FIXED_MINOR = 32,
+ IB_UCM_NUM_DYNAMIC_MINOR = IB_UCM_MAX_DEVICES - IB_UCM_NUM_FIXED_MINOR,
};
#define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR)
+static dev_t dynamic_ucm_dev;
static void ib_ucm_add_one(struct ib_device *device);
static void ib_ucm_remove_one(struct ib_device *device, void *client_data);
@@ -425,7 +430,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
uevent->resp.id = ctx->id;
}
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&uevent->resp, sizeof(uevent->resp))) {
result = -EFAULT;
goto done;
@@ -436,7 +441,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
result = -ENOMEM;
goto done;
}
- if (copy_to_user((void __user *)(unsigned long)cmd.data,
+ if (copy_to_user(u64_to_user_ptr(cmd.data),
uevent->data, uevent->data_len)) {
result = -EFAULT;
goto done;
@@ -448,7 +453,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
result = -ENOMEM;
goto done;
}
- if (copy_to_user((void __user *)(unsigned long)cmd.info,
+ if (copy_to_user(u64_to_user_ptr(cmd.info),
uevent->info, uevent->info_len)) {
result = -EFAULT;
goto done;
@@ -497,7 +502,7 @@ static ssize_t ib_ucm_create_id(struct ib_ucm_file *file,
}
resp.id = ctx->id;
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp))) {
result = -EFAULT;
goto err2;
@@ -551,7 +556,7 @@ static ssize_t ib_ucm_destroy_id(struct ib_ucm_file *file,
ib_ucm_cleanup_events(ctx);
resp.events_reported = ctx->events_reported;
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
result = -EFAULT;
@@ -583,7 +588,7 @@ static ssize_t ib_ucm_attr_id(struct ib_ucm_file *file,
resp.local_id = ctx->cm_id->local_id;
resp.remote_id = ctx->cm_id->remote_id;
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
result = -EFAULT;
@@ -620,7 +625,7 @@ static ssize_t ib_ucm_init_qp_attr(struct ib_ucm_file *file,
ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr);
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
result = -EFAULT;
@@ -694,7 +699,7 @@ static int ib_ucm_alloc_data(const void **dest, u64 src, u32 len)
if (!len)
return 0;
- data = memdup_user((void __user *)(unsigned long)src, len);
+ data = memdup_user(u64_to_user_ptr(src), len);
if (IS_ERR(data))
return PTR_ERR(data);
@@ -716,7 +721,7 @@ static int ib_ucm_path_get(struct sa_path_rec **path, u64 src)
if (!sa_path)
return -ENOMEM;
- if (copy_from_user(&upath, (void __user *)(unsigned long)src,
+ if (copy_from_user(&upath, u64_to_user_ptr(src),
sizeof(upath))) {
kfree(sa_path);
@@ -1199,7 +1204,6 @@ static int ib_ucm_close(struct inode *inode, struct file *filp)
return 0;
}
-static DECLARE_BITMAP(overflow_map, IB_UCM_MAX_DEVICES);
static void ib_ucm_release_dev(struct device *dev)
{
struct ib_ucm_device *ucm_dev;
@@ -1210,10 +1214,7 @@ static void ib_ucm_release_dev(struct device *dev)
static void ib_ucm_free_dev(struct ib_ucm_device *ucm_dev)
{
- if (ucm_dev->devnum < IB_UCM_MAX_DEVICES)
- clear_bit(ucm_dev->devnum, dev_map);
- else
- clear_bit(ucm_dev->devnum - IB_UCM_MAX_DEVICES, overflow_map);
+ clear_bit(ucm_dev->devnum, dev_map);
}
static const struct file_operations ucm_fops = {
@@ -1235,27 +1236,6 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
}
static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
-static dev_t overflow_maj;
-static int find_overflow_devnum(void)
-{
- int ret;
-
- if (!overflow_maj) {
- ret = alloc_chrdev_region(&overflow_maj, 0, IB_UCM_MAX_DEVICES,
- "infiniband_cm");
- if (ret) {
- pr_err("ucm: couldn't register dynamic device number\n");
- return ret;
- }
- }
-
- ret = find_first_zero_bit(overflow_map, IB_UCM_MAX_DEVICES);
- if (ret >= IB_UCM_MAX_DEVICES)
- return -1;
-
- return ret;
-}
-
static void ib_ucm_add_one(struct ib_device *device)
{
int devnum;
@@ -1274,19 +1254,14 @@ static void ib_ucm_add_one(struct ib_device *device)
ucm_dev->dev.release = ib_ucm_release_dev;
devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES);
- if (devnum >= IB_UCM_MAX_DEVICES) {
- devnum = find_overflow_devnum();
- if (devnum < 0)
- goto err;
-
- ucm_dev->devnum = devnum + IB_UCM_MAX_DEVICES;
- base = devnum + overflow_maj;
- set_bit(devnum, overflow_map);
- } else {
- ucm_dev->devnum = devnum;
- base = devnum + IB_UCM_BASE_DEV;
- set_bit(devnum, dev_map);
- }
+ if (devnum >= IB_UCM_MAX_DEVICES)
+ goto err;
+ ucm_dev->devnum = devnum;
+ set_bit(devnum, dev_map);
+ if (devnum >= IB_UCM_NUM_FIXED_MINOR)
+ base = dynamic_ucm_dev + devnum - IB_UCM_NUM_FIXED_MINOR;
+ else
+ base = IB_UCM_BASE_DEV + devnum;
cdev_init(&ucm_dev->cdev, &ucm_fops);
ucm_dev->cdev.owner = THIS_MODULE;
@@ -1334,13 +1309,20 @@ static int __init ib_ucm_init(void)
{
int ret;
- ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES,
+ ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_NUM_FIXED_MINOR,
"infiniband_cm");
if (ret) {
pr_err("ucm: couldn't register device number\n");
goto error1;
}
+ ret = alloc_chrdev_region(&dynamic_ucm_dev, 0, IB_UCM_NUM_DYNAMIC_MINOR,
+ "infiniband_cm");
+ if (ret) {
+ pr_err("ucm: couldn't register dynamic device number\n");
+ goto err_alloc;
+ }
+
ret = class_create_file(&cm_class, &class_attr_abi_version.attr);
if (ret) {
pr_err("ucm: couldn't create abi_version attribute\n");
@@ -1357,7 +1339,9 @@ static int __init ib_ucm_init(void)
error3:
class_remove_file(&cm_class, &class_attr_abi_version.attr);
error2:
- unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES);
+ unregister_chrdev_region(dynamic_ucm_dev, IB_UCM_NUM_DYNAMIC_MINOR);
+err_alloc:
+ unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_NUM_FIXED_MINOR);
error1:
return ret;
}
@@ -1366,9 +1350,8 @@ static void __exit ib_ucm_cleanup(void)
{
ib_unregister_client(&ucm_client);
class_remove_file(&cm_class, &class_attr_abi_version.attr);
- unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES);
- if (overflow_maj)
- unregister_chrdev_region(overflow_maj, IB_UCM_MAX_DEVICES);
+ unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_NUM_FIXED_MINOR);
+ unregister_chrdev_region(dynamic_ucm_dev, IB_UCM_NUM_DYNAMIC_MINOR);
idr_destroy(&ctx_id_table);
}
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index f33d2717fac5..453b8c67794e 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -159,6 +159,23 @@ static void ucma_put_ctx(struct ucma_context *ctx)
complete(&ctx->comp);
}
+/*
+ * Same as ucm_get_ctx but requires that ->cm_id->device is valid, eg that the
+ * CM_ID is bound.
+ */
+static struct ucma_context *ucma_get_ctx_dev(struct ucma_file *file, int id)
+{
+ struct ucma_context *ctx = ucma_get_ctx(file, id);
+
+ if (IS_ERR(ctx))
+ return ctx;
+ if (!ctx->cm_id->device) {
+ ucma_put_ctx(ctx);
+ return ERR_PTR(-EINVAL);
+ }
+ return ctx;
+}
+
static void ucma_close_event_id(struct work_struct *work)
{
struct ucma_event *uevent_close = container_of(work, struct ucma_event, close_work);
@@ -382,7 +399,11 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
struct ucma_event *uevent;
int ret = 0;
- if (out_len < sizeof uevent->resp)
+ /*
+ * Old 32 bit user space does not send the 4 byte padding in the
+ * reserved field. We don't care, allow it to keep working.
+ */
+ if (out_len < sizeof(uevent->resp) - sizeof(uevent->resp.reserved))
return -ENOSPC;
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
@@ -416,8 +437,9 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
uevent->resp.id = ctx->id;
}
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
- &uevent->resp, sizeof uevent->resp)) {
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
+ &uevent->resp,
+ min_t(size_t, out_len, sizeof(uevent->resp)))) {
ret = -EFAULT;
goto done;
}
@@ -477,15 +499,15 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
return -ENOMEM;
ctx->uid = cmd.uid;
- cm_id = rdma_create_id(current->nsproxy->net_ns,
- ucma_event_handler, ctx, cmd.ps, qp_type);
+ cm_id = __rdma_create_id(current->nsproxy->net_ns,
+ ucma_event_handler, ctx, cmd.ps, qp_type, NULL);
if (IS_ERR(cm_id)) {
ret = PTR_ERR(cm_id);
goto err1;
}
resp.id = ctx->id;
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp))) {
ret = -EFAULT;
goto err2;
@@ -615,7 +637,7 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
}
resp.events_reported = ucma_free_ctx(ctx);
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
ret = -EFAULT;
@@ -729,7 +751,7 @@ static ssize_t ucma_resolve_route(struct ucma_file *file,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
- ctx = ucma_get_ctx(file, cmd.id);
+ ctx = ucma_get_ctx_dev(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
@@ -845,7 +867,7 @@ static ssize_t ucma_query_route(struct ucma_file *file,
ucma_copy_iw_route(&resp, &ctx->cm_id->route);
out:
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
ret = -EFAULT;
@@ -954,8 +976,8 @@ static ssize_t ucma_query_gid(struct ucma_context *ctx,
} else {
addr->sib_family = AF_IB;
addr->sib_pkey = (__force __be16) resp.pkey;
- rdma_addr_get_sgid(&ctx->cm_id->route.addr.dev_addr,
- (union ib_gid *) &addr->sib_addr);
+ rdma_read_gids(ctx->cm_id, (union ib_gid *)&addr->sib_addr,
+ NULL);
addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *)
&ctx->cm_id->route.addr.src_addr);
}
@@ -967,8 +989,8 @@ static ssize_t ucma_query_gid(struct ucma_context *ctx,
} else {
addr->sib_family = AF_IB;
addr->sib_pkey = (__force __be16) resp.pkey;
- rdma_addr_get_dgid(&ctx->cm_id->route.addr.dev_addr,
- (union ib_gid *) &addr->sib_addr);
+ rdma_read_gids(ctx->cm_id, NULL,
+ (union ib_gid *)&addr->sib_addr);
addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *)
&ctx->cm_id->route.addr.dst_addr);
}
@@ -991,7 +1013,7 @@ static ssize_t ucma_query(struct ucma_file *file,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
- response = (void __user *)(unsigned long) cmd.response;
+ response = u64_to_user_ptr(cmd.response);
ctx = ucma_get_ctx(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
@@ -1045,7 +1067,7 @@ static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf,
if (!cmd.conn_param.valid)
return -EINVAL;
- ctx = ucma_get_ctx(file, cmd.id);
+ ctx = ucma_get_ctx_dev(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
@@ -1087,19 +1109,19 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
- ctx = ucma_get_ctx(file, cmd.id);
+ ctx = ucma_get_ctx_dev(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
if (cmd.conn_param.valid) {
ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
mutex_lock(&file->mut);
- ret = rdma_accept(ctx->cm_id, &conn_param);
+ ret = __rdma_accept(ctx->cm_id, &conn_param, NULL);
if (!ret)
ctx->uid = cmd.uid;
mutex_unlock(&file->mut);
} else
- ret = rdma_accept(ctx->cm_id, NULL);
+ ret = __rdma_accept(ctx->cm_id, NULL, NULL);
ucma_put_ctx(ctx);
return ret;
@@ -1115,7 +1137,7 @@ static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
- ctx = ucma_get_ctx(file, cmd.id);
+ ctx = ucma_get_ctx_dev(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
@@ -1134,7 +1156,7 @@ static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
- ctx = ucma_get_ctx(file, cmd.id);
+ ctx = ucma_get_ctx_dev(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
@@ -1162,15 +1184,10 @@ static ssize_t ucma_init_qp_attr(struct ucma_file *file,
if (cmd.qp_state > IB_QPS_ERR)
return -EINVAL;
- ctx = ucma_get_ctx(file, cmd.id);
+ ctx = ucma_get_ctx_dev(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- if (!ctx->cm_id->device) {
- ret = -EINVAL;
- goto out;
- }
-
resp.qp_attr_mask = 0;
memset(&qp_attr, 0, sizeof qp_attr);
qp_attr.qp_state = cmd.qp_state;
@@ -1179,7 +1196,7 @@ static ssize_t ucma_init_qp_attr(struct ucma_file *file,
goto out;
ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr);
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
ret = -EFAULT;
@@ -1318,7 +1335,7 @@ static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- optval = memdup_user((void __user *) (unsigned long) cmd.optval,
+ optval = memdup_user(u64_to_user_ptr(cmd.optval),
cmd.optlen);
if (IS_ERR(optval)) {
ret = PTR_ERR(optval);
@@ -1379,7 +1396,7 @@ static ssize_t ucma_process_join(struct ucma_file *file,
else
return -EINVAL;
- ctx = ucma_get_ctx(file, cmd->id);
+ ctx = ucma_get_ctx_dev(file, cmd->id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
@@ -1398,7 +1415,7 @@ static ssize_t ucma_process_join(struct ucma_file *file,
goto err2;
resp.id = mc->id;
- if (copy_to_user((void __user *)(unsigned long) cmd->response,
+ if (copy_to_user(u64_to_user_ptr(cmd->response),
&resp, sizeof(resp))) {
ret = -EFAULT;
goto err3;
@@ -1507,7 +1524,7 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file,
resp.events_reported = mc->events_reported;
kfree(mc);
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
ret = -EFAULT;
out:
@@ -1594,7 +1611,7 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file,
ucma_unlock_files(cur_file, new_file);
response:
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
ret = -EFAULT;
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 6511cb21f6e2..5e8ffd12c04e 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -55,16 +55,21 @@
#include <rdma/ib_mad.h>
#include <rdma/ib_user_mad.h>
+#include "core_priv.h"
+
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand userspace MAD packet access");
MODULE_LICENSE("Dual BSD/GPL");
enum {
- IB_UMAD_MAX_PORTS = 64,
+ IB_UMAD_MAX_PORTS = RDMA_MAX_PORTS,
IB_UMAD_MAX_AGENTS = 32,
IB_UMAD_MAJOR = 231,
- IB_UMAD_MINOR_BASE = 0
+ IB_UMAD_MINOR_BASE = 0,
+ IB_UMAD_NUM_FIXED_MINOR = 64,
+ IB_UMAD_NUM_DYNAMIC_MINOR = IB_UMAD_MAX_PORTS - IB_UMAD_NUM_FIXED_MINOR,
+ IB_ISSM_MINOR_BASE = IB_UMAD_NUM_FIXED_MINOR,
};
/*
@@ -127,9 +132,12 @@ struct ib_umad_packet {
static struct class *umad_class;
-static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE);
+static const dev_t base_umad_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE);
+static const dev_t base_issm_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE) +
+ IB_UMAD_NUM_FIXED_MINOR;
+static dev_t dynamic_umad_dev;
+static dev_t dynamic_issm_dev;
-static DEFINE_SPINLOCK(port_lock);
static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS);
static void ib_umad_add_one(struct ib_device *device);
@@ -233,8 +241,7 @@ static void recv_handler(struct ib_mad_agent *agent,
* On OPA devices it is okay to lose the upper 16 bits of LID as this
* information is obtained elsewhere. Mask off the upper 16 bits.
*/
- if (agent->device->port_immutable[agent->port_num].core_cap_flags &
- RDMA_CORE_PORT_INTEL_OPA)
+ if (rdma_cap_opa_mad(agent->device, agent->port_num))
packet->mad.hdr.lid = ib_lid_be16(0xFFFF &
mad_recv_wc->wc->slid);
else
@@ -246,10 +253,14 @@ static void recv_handler(struct ib_mad_agent *agent,
if (packet->mad.hdr.grh_present) {
struct rdma_ah_attr ah_attr;
const struct ib_global_route *grh;
+ int ret;
- ib_init_ah_from_wc(agent->device, agent->port_num,
- mad_recv_wc->wc, mad_recv_wc->recv_buf.grh,
- &ah_attr);
+ ret = ib_init_ah_attr_from_wc(agent->device, agent->port_num,
+ mad_recv_wc->wc,
+ mad_recv_wc->recv_buf.grh,
+ &ah_attr);
+ if (ret)
+ goto err2;
grh = rdma_ah_read_grh(&ah_attr);
packet->mad.hdr.gid_index = grh->sgid_index;
@@ -515,7 +526,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
rdma_ah_set_dgid_raw(&ah_attr, packet->mad.hdr.gid);
}
- ah = rdma_create_ah(agent->qp->pd, &ah_attr);
+ ah = rdma_create_user_ah(agent->qp->pd, &ah_attr, NULL);
if (IS_ERR(ah)) {
ret = PTR_ERR(ah);
goto err_up;
@@ -1139,54 +1150,26 @@ static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
static CLASS_ATTR_STRING(abi_version, S_IRUGO,
__stringify(IB_USER_MAD_ABI_VERSION));
-static dev_t overflow_maj;
-static DECLARE_BITMAP(overflow_map, IB_UMAD_MAX_PORTS);
-static int find_overflow_devnum(struct ib_device *device)
-{
- int ret;
-
- if (!overflow_maj) {
- ret = alloc_chrdev_region(&overflow_maj, 0, IB_UMAD_MAX_PORTS * 2,
- "infiniband_mad");
- if (ret) {
- dev_err(&device->dev,
- "couldn't register dynamic device number\n");
- return ret;
- }
- }
-
- ret = find_first_zero_bit(overflow_map, IB_UMAD_MAX_PORTS);
- if (ret >= IB_UMAD_MAX_PORTS)
- return -1;
-
- return ret;
-}
-
static int ib_umad_init_port(struct ib_device *device, int port_num,
struct ib_umad_device *umad_dev,
struct ib_umad_port *port)
{
int devnum;
- dev_t base;
+ dev_t base_umad;
+ dev_t base_issm;
- spin_lock(&port_lock);
devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
- if (devnum >= IB_UMAD_MAX_PORTS) {
- spin_unlock(&port_lock);
- devnum = find_overflow_devnum(device);
- if (devnum < 0)
- return -1;
-
- spin_lock(&port_lock);
- port->dev_num = devnum + IB_UMAD_MAX_PORTS;
- base = devnum + overflow_maj;
- set_bit(devnum, overflow_map);
+ if (devnum >= IB_UMAD_MAX_PORTS)
+ return -1;
+ port->dev_num = devnum;
+ set_bit(devnum, dev_map);
+ if (devnum >= IB_UMAD_NUM_FIXED_MINOR) {
+ base_umad = dynamic_umad_dev + devnum - IB_UMAD_NUM_FIXED_MINOR;
+ base_issm = dynamic_issm_dev + devnum - IB_UMAD_NUM_FIXED_MINOR;
} else {
- port->dev_num = devnum;
- base = devnum + base_dev;
- set_bit(devnum, dev_map);
+ base_umad = devnum + base_umad_dev;
+ base_issm = devnum + base_issm_dev;
}
- spin_unlock(&port_lock);
port->ib_dev = device;
port->port_num = port_num;
@@ -1198,7 +1181,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
port->cdev.owner = THIS_MODULE;
cdev_set_parent(&port->cdev, &umad_dev->kobj);
kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num);
- if (cdev_add(&port->cdev, base, 1))
+ if (cdev_add(&port->cdev, base_umad, 1))
goto err_cdev;
port->dev = device_create(umad_class, device->dev.parent,
@@ -1212,12 +1195,11 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
if (device_create_file(port->dev, &dev_attr_port))
goto err_dev;
- base += IB_UMAD_MAX_PORTS;
cdev_init(&port->sm_cdev, &umad_sm_fops);
port->sm_cdev.owner = THIS_MODULE;
cdev_set_parent(&port->sm_cdev, &umad_dev->kobj);
kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num);
- if (cdev_add(&port->sm_cdev, base, 1))
+ if (cdev_add(&port->sm_cdev, base_issm, 1))
goto err_sm_cdev;
port->sm_dev = device_create(umad_class, device->dev.parent,
@@ -1244,10 +1226,7 @@ err_dev:
err_cdev:
cdev_del(&port->cdev);
- if (port->dev_num < IB_UMAD_MAX_PORTS)
- clear_bit(devnum, dev_map);
- else
- clear_bit(devnum, overflow_map);
+ clear_bit(devnum, dev_map);
return -1;
}
@@ -1281,11 +1260,7 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
}
mutex_unlock(&port->file_mutex);
-
- if (port->dev_num < IB_UMAD_MAX_PORTS)
- clear_bit(port->dev_num, dev_map);
- else
- clear_bit(port->dev_num - IB_UMAD_MAX_PORTS, overflow_map);
+ clear_bit(port->dev_num, dev_map);
}
static void ib_umad_add_one(struct ib_device *device)
@@ -1361,13 +1336,23 @@ static int __init ib_umad_init(void)
{
int ret;
- ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2,
+ ret = register_chrdev_region(base_umad_dev,
+ IB_UMAD_NUM_FIXED_MINOR * 2,
"infiniband_mad");
if (ret) {
pr_err("couldn't register device number\n");
goto out;
}
+ ret = alloc_chrdev_region(&dynamic_umad_dev, 0,
+ IB_UMAD_NUM_DYNAMIC_MINOR * 2,
+ "infiniband_mad");
+ if (ret) {
+ pr_err("couldn't register dynamic device number\n");
+ goto out_alloc;
+ }
+ dynamic_issm_dev = dynamic_umad_dev + IB_UMAD_NUM_DYNAMIC_MINOR;
+
umad_class = class_create(THIS_MODULE, "infiniband_mad");
if (IS_ERR(umad_class)) {
ret = PTR_ERR(umad_class);
@@ -1395,7 +1380,12 @@ out_class:
class_destroy(umad_class);
out_chrdev:
- unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2);
+ unregister_chrdev_region(dynamic_umad_dev,
+ IB_UMAD_NUM_DYNAMIC_MINOR * 2);
+
+out_alloc:
+ unregister_chrdev_region(base_umad_dev,
+ IB_UMAD_NUM_FIXED_MINOR * 2);
out:
return ret;
@@ -1405,9 +1395,10 @@ static void __exit ib_umad_cleanup(void)
{
ib_unregister_client(&umad_client);
class_destroy(umad_class);
- unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2);
- if (overflow_maj)
- unregister_chrdev_region(overflow_maj, IB_UMAD_MAX_PORTS * 2);
+ unregister_chrdev_region(base_umad_dev,
+ IB_UMAD_NUM_FIXED_MINOR * 2);
+ unregister_chrdev_region(dynamic_umad_dev,
+ IB_UMAD_NUM_DYNAMIC_MINOR * 2);
}
module_init(ib_umad_init);
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index a8f104a4a91b..cfb51618ab7a 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -46,14 +46,22 @@
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/uverbs_std_types.h>
-#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \
- do { \
- (udata)->inbuf = (const void __user *) (ibuf); \
- (udata)->outbuf = (void __user *) (obuf); \
- (udata)->inlen = (ilen); \
- (udata)->outlen = (olen); \
- } while (0)
+#define UVERBS_MODULE_NAME ib_uverbs
+#include <rdma/uverbs_named_ioctl.h>
+
+static inline void
+ib_uverbs_init_udata(struct ib_udata *udata,
+ const void __user *ibuf,
+ void __user *obuf,
+ size_t ilen, size_t olen)
+{
+ udata->inbuf = ibuf;
+ udata->outbuf = obuf;
+ udata->inlen = ilen;
+ udata->outlen = olen;
+}
static inline void
ib_uverbs_init_udata_buf_or_null(struct ib_udata *udata,
@@ -61,7 +69,9 @@ ib_uverbs_init_udata_buf_or_null(struct ib_udata *udata,
void __user *obuf,
size_t ilen, size_t olen)
{
- INIT_UDATA(udata, ilen ? ibuf : NULL, olen ? obuf : NULL, ilen, olen);
+ ib_uverbs_init_udata(udata,
+ ilen ? ibuf : NULL, olen ? obuf : NULL,
+ ilen, olen);
}
/*
@@ -193,11 +203,18 @@ struct ib_ucq_object {
u32 async_events_reported;
};
+struct ib_uflow_resources;
+struct ib_uflow_object {
+ struct ib_uobject uobject;
+ struct ib_uflow_resources *resources;
+};
+
extern const struct file_operations uverbs_event_fops;
void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue);
struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,
struct ib_device *ib_dev);
void ib_uverbs_free_async_event_file(struct ib_uverbs_file *uverbs_file);
+void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res);
void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
struct ib_uverbs_completion_event_file *ev_file,
@@ -220,7 +237,13 @@ int uverbs_dealloc_mw(struct ib_mw *mw);
void ib_uverbs_detach_umcast(struct ib_qp *qp,
struct ib_uqp_object *uobj);
+void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata);
+extern const struct uverbs_attr_def uverbs_uhw_compat_in;
+extern const struct uverbs_attr_def uverbs_uhw_compat_out;
long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+int uverbs_destroy_def_handler(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs);
struct ib_uverbs_flow_spec {
union {
@@ -234,13 +257,37 @@ struct ib_uverbs_flow_spec {
};
struct ib_uverbs_flow_spec_eth eth;
struct ib_uverbs_flow_spec_ipv4 ipv4;
+ struct ib_uverbs_flow_spec_esp esp;
struct ib_uverbs_flow_spec_tcp_udp tcp_udp;
struct ib_uverbs_flow_spec_ipv6 ipv6;
struct ib_uverbs_flow_spec_action_tag flow_tag;
struct ib_uverbs_flow_spec_action_drop drop;
+ struct ib_uverbs_flow_spec_action_handle action;
};
};
+int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type,
+ const void *kern_spec_mask,
+ const void *kern_spec_val,
+ size_t kern_filter_sz,
+ union ib_flow_spec *ib_spec);
+
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_DEVICE);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_PD);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_MR);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_CQ);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_QP);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_AH);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_MW);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_SRQ);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_FLOW);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_WQ);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_XRCD);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_DM);
+
#define IB_UVERBS_DECLARE_CMD(name) \
ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
struct ib_device *ib_dev, \
@@ -300,5 +347,6 @@ IB_UVERBS_DECLARE_EX_CMD(destroy_wq);
IB_UVERBS_DECLARE_EX_CMD(create_rwq_ind_table);
IB_UVERBS_DECLARE_EX_CMD(destroy_rwq_ind_table);
IB_UVERBS_DECLARE_EX_CMD(modify_qp);
+IB_UVERBS_DECLARE_EX_CMD(modify_cq);
#endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 732ca60fc47f..9be1f28443f1 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -50,7 +50,7 @@
static struct ib_uverbs_completion_event_file *
ib_uverbs_lookup_comp_file(int fd, struct ib_ucontext *context)
{
- struct ib_uobject *uobj = uobj_get_read(uobj_get_type(comp_channel),
+ struct ib_uobject *uobj = uobj_get_read(UVERBS_OBJECT_COMP_CHANNEL,
fd, context);
struct ib_uobject_file *uobj_file;
@@ -91,8 +91,8 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
goto err;
}
- INIT_UDATA(&udata, buf + sizeof(cmd),
- (unsigned long) cmd.response + sizeof(resp),
+ ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
+ u64_to_user_ptr(cmd.response) + sizeof(resp),
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
@@ -141,8 +141,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
goto err_fd;
}
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp)) {
+ if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
ret = -EFAULT;
goto err_file;
}
@@ -238,8 +237,7 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
memset(&resp, 0, sizeof resp);
copy_query_dev_fields(file, ib_dev, &resp, &ib_dev->attrs);
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
+ if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
return -EFAULT;
return in_len;
@@ -295,8 +293,7 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
resp.link_layer = rdma_port_get_link_layer(ib_dev,
cmd.port_num);
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
+ if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
return -EFAULT;
return in_len;
@@ -320,12 +317,12 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- INIT_UDATA(&udata, buf + sizeof(cmd),
- (unsigned long) cmd.response + sizeof(resp),
+ ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
+ u64_to_user_ptr(cmd.response) + sizeof(resp),
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
- uobj = uobj_alloc(uobj_get_type(pd), file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_PD, file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -343,9 +340,10 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
uobj->object = pd;
memset(&resp, 0, sizeof resp);
resp.pd_handle = uobj->id;
+ pd->res.type = RDMA_RESTRACK_PD;
+ rdma_restrack_add(&pd->res);
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp)) {
+ if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
ret = -EFAULT;
goto err_copy;
}
@@ -374,7 +372,7 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_get_write(uobj_get_type(pd), cmd.pd_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_PD, cmd.pd_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -490,8 +488,8 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- INIT_UDATA(&udata, buf + sizeof(cmd),
- (unsigned long) cmd.response + sizeof(resp),
+ ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
+ u64_to_user_ptr(cmd.response) + sizeof(resp),
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
@@ -519,7 +517,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
}
}
- obj = (struct ib_uxrcd_object *)uobj_alloc(uobj_get_type(xrcd),
+ obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD,
file->ucontext);
if (IS_ERR(obj)) {
ret = PTR_ERR(obj);
@@ -556,8 +554,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
atomic_inc(&xrcd->usecnt);
}
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp)) {
+ if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
ret = -EFAULT;
goto err_copy;
}
@@ -605,7 +602,7 @@ ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_get_write(uobj_get_type(xrcd), cmd.xrcd_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_XRCD, cmd.xrcd_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -654,8 +651,8 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- INIT_UDATA(&udata, buf + sizeof(cmd),
- (unsigned long) cmd.response + sizeof(resp),
+ ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
+ u64_to_user_ptr(cmd.response) + sizeof(resp),
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
@@ -666,11 +663,11 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
if (ret)
return ret;
- uobj = uobj_alloc(uobj_get_type(mr), file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_MR, file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
- pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err_free;
@@ -694,8 +691,11 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
mr->device = pd->device;
mr->pd = pd;
+ mr->dm = NULL;
mr->uobject = uobj;
atomic_inc(&pd->usecnt);
+ mr->res.type = RDMA_RESTRACK_MR;
+ rdma_restrack_add(&mr->res);
uobj->object = mr;
@@ -704,8 +704,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
resp.rkey = mr->rkey;
resp.mr_handle = uobj->id;
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp)) {
+ if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
ret = -EFAULT;
goto err_copy;
}
@@ -747,8 +746,8 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof(cmd)))
return -EFAULT;
- INIT_UDATA(&udata, buf + sizeof(cmd),
- (unsigned long) cmd.response + sizeof(resp),
+ ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
+ u64_to_user_ptr(cmd.response) + sizeof(resp),
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
@@ -760,13 +759,18 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
(cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
return -EINVAL;
- uobj = uobj_get_write(uobj_get_type(mr), cmd.mr_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
mr = uobj->object;
+ if (mr->dm) {
+ ret = -EINVAL;
+ goto put_uobjs;
+ }
+
if (cmd.flags & IB_MR_REREG_ACCESS) {
ret = ib_check_mr_access(cmd.access_flags);
if (ret)
@@ -774,7 +778,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
}
if (cmd.flags & IB_MR_REREG_PD) {
- pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto put_uobjs;
@@ -799,8 +803,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
resp.lkey = mr->lkey;
resp.rkey = mr->rkey;
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
- &resp, sizeof(resp)))
+ if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp)))
ret = -EFAULT;
else
ret = in_len;
@@ -827,7 +830,7 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_get_write(uobj_get_type(mr), cmd.mr_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -856,18 +859,18 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof(cmd)))
return -EFAULT;
- uobj = uobj_alloc(uobj_get_type(mw), file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_MW, file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
- pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err_free;
}
- INIT_UDATA(&udata, buf + sizeof(cmd),
- (unsigned long)cmd.response + sizeof(resp),
+ ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
+ u64_to_user_ptr(cmd.response) + sizeof(resp),
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
@@ -888,8 +891,7 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
resp.rkey = mw->rkey;
resp.mw_handle = uobj->id;
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
- &resp, sizeof(resp))) {
+ if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) {
ret = -EFAULT;
goto err_copy;
}
@@ -920,7 +922,7 @@ ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof(cmd)))
return -EFAULT;
- uobj = uobj_get_write(uobj_get_type(mw), cmd.mw_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_MW, cmd.mw_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -945,7 +947,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_alloc(uobj_get_type(comp_channel), file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -955,8 +957,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
uobj_file.uobj);
ib_uverbs_init_event_queue(&ev_file->ev_queue);
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp)) {
+ if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
uobj_alloc_abort(uobj);
return -EFAULT;
}
@@ -985,10 +986,13 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
struct ib_uverbs_ex_create_cq_resp resp;
struct ib_cq_init_attr attr = {};
+ if (!ib_dev->create_cq)
+ return ERR_PTR(-EOPNOTSUPP);
+
if (cmd->comp_vector >= file->device->num_comp_vectors)
return ERR_PTR(-EINVAL);
- obj = (struct ib_ucq_object *)uobj_alloc(uobj_get_type(cq),
+ obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ,
file->ucontext);
if (IS_ERR(obj))
return obj;
@@ -1036,12 +1040,14 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
resp.response_length = offsetof(typeof(resp), response_length) +
sizeof(resp.response_length);
+ cq->res.type = RDMA_RESTRACK_CQ;
+ rdma_restrack_add(&cq->res);
+
ret = cb(file, obj, &resp, ucore, context);
if (ret)
goto err_cb;
uobj_alloc_commit(&obj->uobject);
-
return obj;
err_cb:
@@ -1086,10 +1092,11 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof(cmd)))
return -EFAULT;
- INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd), sizeof(resp));
+ ib_uverbs_init_udata(&ucore, buf, u64_to_user_ptr(cmd.response),
+ sizeof(cmd), sizeof(resp));
- INIT_UDATA(&uhw, buf + sizeof(cmd),
- (unsigned long)cmd.response + sizeof(resp),
+ ib_uverbs_init_udata(&uhw, buf + sizeof(cmd),
+ u64_to_user_ptr(cmd.response) + sizeof(resp),
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
@@ -1152,10 +1159,7 @@ int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
min(ucore->inlen, sizeof(cmd)),
ib_uverbs_ex_create_cq_cb, NULL);
- if (IS_ERR(obj))
- return PTR_ERR(obj);
-
- return 0;
+ return PTR_ERR_OR_ZERO(obj);
}
ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
@@ -1172,12 +1176,12 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- INIT_UDATA(&udata, buf + sizeof(cmd),
- (unsigned long) cmd.response + sizeof(resp),
+ ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
+ u64_to_user_ptr(cmd.response) + sizeof(resp),
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
- cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
if (!cq)
return -EINVAL;
@@ -1187,8 +1191,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
resp.cqe = cq->cqe;
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp.cqe))
+ if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp.cqe))
ret = -EFAULT;
out:
@@ -1207,7 +1210,7 @@ static int copy_wc_to_user(struct ib_device *ib_dev, void __user *dest,
tmp.opcode = wc->opcode;
tmp.vendor_err = wc->vendor_err;
tmp.byte_len = wc->byte_len;
- tmp.ex.imm_data = (__u32 __force) wc->ex.imm_data;
+ tmp.ex.imm_data = wc->ex.imm_data;
tmp.qp_num = wc->qp->qp_num;
tmp.src_qp = wc->src_qp;
tmp.wc_flags = wc->wc_flags;
@@ -1243,12 +1246,12 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
if (!cq)
return -EINVAL;
/* we copy a struct ib_uverbs_poll_cq_resp to user space */
- header_ptr = (void __user *)(unsigned long) cmd.response;
+ header_ptr = u64_to_user_ptr(cmd.response);
data_ptr = header_ptr + sizeof resp;
memset(&resp, 0, sizeof resp);
@@ -1290,7 +1293,7 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
if (!cq)
return -EINVAL;
@@ -1317,7 +1320,7 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_get_write(uobj_get_type(cq), cmd.cq_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_CQ, cmd.cq_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -1342,8 +1345,7 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
resp.async_events_reported = obj->async_events_reported;
uverbs_uobject_put(uobj);
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
+ if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
return -EFAULT;
return in_len;
@@ -1377,7 +1379,7 @@ static int create_qp(struct ib_uverbs_file *file,
if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
return -EPERM;
- obj = (struct ib_uqp_object *)uobj_alloc(uobj_get_type(qp),
+ obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP,
file->ucontext);
if (IS_ERR(obj))
return PTR_ERR(obj);
@@ -1388,7 +1390,7 @@ static int create_qp(struct ib_uverbs_file *file,
if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) +
sizeof(cmd->rwq_ind_tbl_handle) &&
(cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) {
- ind_tbl = uobj_get_obj_read(rwq_ind_table,
+ ind_tbl = uobj_get_obj_read(rwq_ind_table, UVERBS_OBJECT_RWQ_IND_TBL,
cmd->rwq_ind_tbl_handle,
file->ucontext);
if (!ind_tbl) {
@@ -1415,7 +1417,7 @@ static int create_qp(struct ib_uverbs_file *file,
has_sq = false;
if (cmd->qp_type == IB_QPT_XRC_TGT) {
- xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd->pd_handle,
+ xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->pd_handle,
file->ucontext);
if (IS_ERR(xrcd_uobj)) {
@@ -1435,7 +1437,7 @@ static int create_qp(struct ib_uverbs_file *file,
cmd->max_recv_sge = 0;
} else {
if (cmd->is_srq) {
- srq = uobj_get_obj_read(srq, cmd->srq_handle,
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd->srq_handle,
file->ucontext);
if (!srq || srq->srq_type == IB_SRQT_XRC) {
ret = -EINVAL;
@@ -1445,7 +1447,7 @@ static int create_qp(struct ib_uverbs_file *file,
if (!ind_tbl) {
if (cmd->recv_cq_handle != cmd->send_cq_handle) {
- rcq = uobj_get_obj_read(cq, cmd->recv_cq_handle,
+ rcq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->recv_cq_handle,
file->ucontext);
if (!rcq) {
ret = -EINVAL;
@@ -1456,11 +1458,11 @@ static int create_qp(struct ib_uverbs_file *file,
}
if (has_sq)
- scq = uobj_get_obj_read(cq, cmd->send_cq_handle,
+ scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->send_cq_handle,
file->ucontext);
if (!ind_tbl)
rcq = rcq ?: scq;
- pd = uobj_get_obj_read(pd, cmd->pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file->ucontext);
if (!pd || (!scq && has_sq)) {
ret = -EINVAL;
goto err_put;
@@ -1500,7 +1502,8 @@ static int create_qp(struct ib_uverbs_file *file,
IB_QP_CREATE_MANAGED_RECV |
IB_QP_CREATE_SCATTER_FCS |
IB_QP_CREATE_CVLAN_STRIPPING |
- IB_QP_CREATE_SOURCE_QPN)) {
+ IB_QP_CREATE_SOURCE_QPN |
+ IB_QP_CREATE_PCI_WRITE_END_PADDING)) {
ret = -EINVAL;
goto err_put;
}
@@ -1525,7 +1528,8 @@ static int create_qp(struct ib_uverbs_file *file,
if (cmd->qp_type == IB_QPT_XRC_TGT)
qp = ib_create_qp(pd, &attr);
else
- qp = device->create_qp(pd, &attr, uhw);
+ qp = _ib_create_qp(device, pd, &attr, uhw,
+ &obj->uevent.uobject);
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
@@ -1538,7 +1542,6 @@ static int create_qp(struct ib_uverbs_file *file,
goto err_cb;
qp->real_qp = qp;
- qp->device = device;
qp->pd = pd;
qp->send_cq = attr.send_cq;
qp->recv_cq = attr.recv_cq;
@@ -1558,8 +1561,10 @@ static int create_qp(struct ib_uverbs_file *file,
atomic_inc(&attr.srq->usecnt);
if (ind_tbl)
atomic_inc(&ind_tbl->usecnt);
+ } else {
+ /* It is done in _ib_create_qp for other QP types */
+ qp->uobject = &obj->uevent.uobject;
}
- qp->uobject = &obj->uevent.uobject;
obj->uevent.uobject.object = qp;
@@ -1649,10 +1654,10 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof(cmd)))
return -EFAULT;
- INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd),
- resp_size);
- INIT_UDATA(&uhw, buf + sizeof(cmd),
- (unsigned long)cmd.response + resp_size,
+ ib_uverbs_init_udata(&ucore, buf, u64_to_user_ptr(cmd.response),
+ sizeof(cmd), resp_size);
+ ib_uverbs_init_udata(&uhw, buf + sizeof(cmd),
+ u64_to_user_ptr(cmd.response) + resp_size,
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - resp_size);
@@ -1749,17 +1754,17 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- INIT_UDATA(&udata, buf + sizeof(cmd),
- (unsigned long) cmd.response + sizeof(resp),
+ ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
+ u64_to_user_ptr(cmd.response) + sizeof(resp),
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
- obj = (struct ib_uqp_object *)uobj_alloc(uobj_get_type(qp),
+ obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP,
file->ucontext);
if (IS_ERR(obj))
return PTR_ERR(obj);
- xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd.pd_handle,
+ xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle,
file->ucontext);
if (IS_ERR(xrcd_uobj)) {
ret = -EINVAL;
@@ -1794,8 +1799,7 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
resp.qpn = qp->qp_num;
resp.qp_handle = obj->uevent.uobject.id;
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp)) {
+ if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
ret = -EFAULT;
goto err_destroy;
}
@@ -1863,7 +1867,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
goto out;
}
- qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
if (!qp) {
ret = -EINVAL;
goto out;
@@ -1910,8 +1914,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
resp.max_inline_data = init_attr->cap.max_inline_data;
resp.sq_sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR;
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
+ if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
ret = -EFAULT;
out:
@@ -1969,7 +1972,7 @@ static int modify_qp(struct ib_uverbs_file *file,
if (!attr)
return -ENOMEM;
- qp = uobj_get_obj_read(qp, cmd->base.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, file->ucontext);
if (!qp) {
ret = -EINVAL;
goto out;
@@ -2043,6 +2046,13 @@ static int modify_qp(struct ib_uverbs_file *file,
goto release_qp;
}
+ if ((cmd->base.attr_mask & IB_QP_CUR_STATE &&
+ cmd->base.cur_qp_state > IB_QPS_ERR) ||
+ cmd->base.qp_state > IB_QPS_ERR) {
+ ret = -EINVAL;
+ goto release_qp;
+ }
+
attr->qp_state = cmd->base.qp_state;
attr->cur_qp_state = cmd->base.cur_qp_state;
attr->path_mtu = cmd->base.path_mtu;
@@ -2103,7 +2113,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
~((IB_USER_LEGACY_LAST_QP_ATTR_MASK << 1) - 1))
return -EOPNOTSUPP;
- INIT_UDATA(&udata, buf + sizeof(cmd.base), NULL,
+ ib_uverbs_init_udata(&udata, buf + sizeof(cmd.base), NULL,
in_len - sizeof(cmd.base) - sizeof(struct ib_uverbs_cmd_hdr),
out_len);
@@ -2166,7 +2176,7 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
memset(&resp, 0, sizeof resp);
- uobj = uobj_get_write(uobj_get_type(qp), cmd.qp_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_QP, cmd.qp_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -2187,8 +2197,7 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
resp.events_reported = obj->uevent.events_reported;
uverbs_uobject_put(uobj);
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
+ if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
return -EFAULT;
return in_len;
@@ -2233,7 +2242,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
if (!user_wr)
return -ENOMEM;
- qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
if (!qp)
goto out;
@@ -2269,7 +2278,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
goto out_put;
}
- ud->ah = uobj_get_obj_read(ah, user_wr->wr.ud.ah,
+ ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH, user_wr->wr.ud.ah,
file->ucontext);
if (!ud->ah) {
kfree(ud);
@@ -2372,8 +2381,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
break;
}
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
+ if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
ret = -EFAULT;
out_put:
@@ -2505,7 +2513,7 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
if (IS_ERR(wr))
return PTR_ERR(wr);
- qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
if (!qp)
goto out;
@@ -2521,8 +2529,7 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
}
}
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
+ if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
ret = -EFAULT;
out:
@@ -2555,7 +2562,7 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
if (IS_ERR(wr))
return PTR_ERR(wr);
- srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext);
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext);
if (!srq)
goto out;
@@ -2571,8 +2578,7 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
break;
}
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
+ if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
ret = -EFAULT;
out:
@@ -2598,7 +2604,6 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
struct rdma_ah_attr attr;
int ret;
struct ib_udata udata;
- u8 *dmac;
if (out_len < sizeof resp)
return -ENOSPC;
@@ -2609,16 +2614,16 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
if (!rdma_is_port_valid(ib_dev, cmd.attr.port_num))
return -EINVAL;
- INIT_UDATA(&udata, buf + sizeof(cmd),
- (unsigned long)cmd.response + sizeof(resp),
+ ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
+ u64_to_user_ptr(cmd.response) + sizeof(resp),
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
- uobj = uobj_alloc(uobj_get_type(ah), file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_AH, file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
- pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err;
@@ -2641,28 +2646,20 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
} else {
rdma_ah_set_ah_flags(&attr, 0);
}
- dmac = rdma_ah_retrieve_dmac(&attr);
- if (dmac)
- memset(dmac, 0, ETH_ALEN);
-
- ah = pd->device->create_ah(pd, &attr, &udata);
+ ah = rdma_create_user_ah(pd, &attr, &udata);
if (IS_ERR(ah)) {
ret = PTR_ERR(ah);
goto err_put;
}
- ah->device = pd->device;
- ah->pd = pd;
- atomic_inc(&pd->usecnt);
ah->uobject = uobj;
uobj->user_handle = cmd.user_handle;
uobj->object = ah;
resp.ah_handle = uobj->id;
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp)) {
+ if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
ret = -EFAULT;
goto err_copy;
}
@@ -2694,7 +2691,7 @@ ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_get_write(uobj_get_type(ah), cmd.ah_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_AH, cmd.ah_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -2717,7 +2714,7 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
if (!qp)
return -EINVAL;
@@ -2768,7 +2765,7 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
if (!qp)
return -EINVAL;
@@ -2797,8 +2794,52 @@ out_put:
return ret ? ret : in_len;
}
-static int kern_spec_to_ib_spec_action(struct ib_uverbs_flow_spec *kern_spec,
- union ib_flow_spec *ib_spec)
+struct ib_uflow_resources {
+ size_t max;
+ size_t num;
+ struct ib_flow_action *collection[0];
+};
+
+static struct ib_uflow_resources *flow_resources_alloc(size_t num_specs)
+{
+ struct ib_uflow_resources *resources;
+
+ resources =
+ kmalloc(sizeof(*resources) +
+ num_specs * sizeof(*resources->collection), GFP_KERNEL);
+
+ if (!resources)
+ return NULL;
+
+ resources->num = 0;
+ resources->max = num_specs;
+
+ return resources;
+}
+
+void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res)
+{
+ unsigned int i;
+
+ for (i = 0; i < uflow_res->num; i++)
+ atomic_dec(&uflow_res->collection[i]->usecnt);
+
+ kfree(uflow_res);
+}
+
+static void flow_resources_add(struct ib_uflow_resources *uflow_res,
+ struct ib_flow_action *action)
+{
+ WARN_ON(uflow_res->num >= uflow_res->max);
+
+ atomic_inc(&action->usecnt);
+ uflow_res->collection[uflow_res->num++] = action;
+}
+
+static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext,
+ struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec,
+ struct ib_uflow_resources *uflow_res)
{
ib_spec->type = kern_spec->type;
switch (ib_spec->type) {
@@ -2817,19 +2858,34 @@ static int kern_spec_to_ib_spec_action(struct ib_uverbs_flow_spec *kern_spec,
ib_spec->drop.size = sizeof(struct ib_flow_spec_action_drop);
break;
+ case IB_FLOW_SPEC_ACTION_HANDLE:
+ if (kern_spec->action.size !=
+ sizeof(struct ib_uverbs_flow_spec_action_handle))
+ return -EOPNOTSUPP;
+ ib_spec->action.act = uobj_get_obj_read(flow_action,
+ UVERBS_OBJECT_FLOW_ACTION,
+ kern_spec->action.handle,
+ ucontext);
+ if (!ib_spec->action.act)
+ return -EINVAL;
+ ib_spec->action.size =
+ sizeof(struct ib_flow_spec_action_handle);
+ flow_resources_add(uflow_res, ib_spec->action.act);
+ uobj_put_obj_read(ib_spec->action.act);
+ break;
default:
return -EINVAL;
}
return 0;
}
-static size_t kern_spec_filter_sz(struct ib_uverbs_flow_spec_hdr *spec)
+static size_t kern_spec_filter_sz(const struct ib_uverbs_flow_spec_hdr *spec)
{
/* Returns user space filter size, includes padding */
return (spec->size - sizeof(struct ib_uverbs_flow_spec_hdr)) / 2;
}
-static ssize_t spec_filter_size(void *kern_spec_filter, u16 kern_filter_size,
+static ssize_t spec_filter_size(const void *kern_spec_filter, u16 kern_filter_size,
u16 ib_real_filter_sz)
{
/*
@@ -2847,28 +2903,21 @@ static ssize_t spec_filter_size(void *kern_spec_filter, u16 kern_filter_size,
return kern_filter_size;
}
-static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
- union ib_flow_spec *ib_spec)
+int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type,
+ const void *kern_spec_mask,
+ const void *kern_spec_val,
+ size_t kern_filter_sz,
+ union ib_flow_spec *ib_spec)
{
ssize_t actual_filter_sz;
- ssize_t kern_filter_sz;
ssize_t ib_filter_sz;
- void *kern_spec_mask;
- void *kern_spec_val;
- if (kern_spec->reserved)
- return -EINVAL;
-
- ib_spec->type = kern_spec->type;
-
- kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr);
/* User flow spec size must be aligned to 4 bytes */
if (kern_filter_sz != ALIGN(kern_filter_sz, 4))
return -EINVAL;
- kern_spec_val = (void *)kern_spec +
- sizeof(struct ib_uverbs_flow_spec_hdr);
- kern_spec_mask = kern_spec_val + kern_filter_sz;
+ ib_spec->type = type;
+
if (ib_spec->type == (IB_FLOW_SPEC_INNER | IB_FLOW_SPEC_VXLAN_TUNNEL))
return -EINVAL;
@@ -2937,20 +2986,56 @@ static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
(ntohl(ib_spec->tunnel.val.tunnel_id)) >= BIT(24))
return -EINVAL;
break;
+ case IB_FLOW_SPEC_ESP:
+ ib_filter_sz = offsetof(struct ib_flow_esp_filter, real_sz);
+ actual_filter_sz = spec_filter_size(kern_spec_mask,
+ kern_filter_sz,
+ ib_filter_sz);
+ if (actual_filter_sz <= 0)
+ return -EINVAL;
+ ib_spec->esp.size = sizeof(struct ib_flow_spec_esp);
+ memcpy(&ib_spec->esp.val, kern_spec_val, actual_filter_sz);
+ memcpy(&ib_spec->esp.mask, kern_spec_mask, actual_filter_sz);
+ break;
default:
return -EINVAL;
}
return 0;
}
-static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
- union ib_flow_spec *ib_spec)
+static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec)
+{
+ ssize_t kern_filter_sz;
+ void *kern_spec_mask;
+ void *kern_spec_val;
+
+ if (kern_spec->reserved)
+ return -EINVAL;
+
+ kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr);
+
+ kern_spec_val = (void *)kern_spec +
+ sizeof(struct ib_uverbs_flow_spec_hdr);
+ kern_spec_mask = kern_spec_val + kern_filter_sz;
+
+ return ib_uverbs_kern_spec_to_ib_spec_filter(kern_spec->type,
+ kern_spec_mask,
+ kern_spec_val,
+ kern_filter_sz, ib_spec);
+}
+
+static int kern_spec_to_ib_spec(struct ib_ucontext *ucontext,
+ struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec,
+ struct ib_uflow_resources *uflow_res)
{
if (kern_spec->reserved)
return -EINVAL;
if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG)
- return kern_spec_to_ib_spec_action(kern_spec, ib_spec);
+ return kern_spec_to_ib_spec_action(ucontext, kern_spec, ib_spec,
+ uflow_res);
else
return kern_spec_to_ib_spec_filter(kern_spec, ib_spec);
}
@@ -2992,18 +3077,18 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
if (cmd.comp_mask)
return -EOPNOTSUPP;
- obj = (struct ib_uwq_object *)uobj_alloc(uobj_get_type(wq),
+ obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ,
file->ucontext);
if (IS_ERR(obj))
return PTR_ERR(obj);
- pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
if (!pd) {
err = -EINVAL;
goto err_uobj;
}
- cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
if (!cq) {
err = -EINVAL;
goto err_put_pd;
@@ -3020,6 +3105,11 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
wq_init_attr.create_flags = cmd.create_flags;
obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
+
+ if (!pd->device->create_wq) {
+ err = -EOPNOTSUPP;
+ goto err_put_cq;
+ }
wq = pd->device->create_wq(pd, &wq_init_attr, uhw);
if (IS_ERR(wq)) {
err = PTR_ERR(wq);
@@ -3102,7 +3192,7 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
return -EOPNOTSUPP;
resp.response_length = required_resp_len;
- uobj = uobj_get_write(uobj_get_type(wq), cmd.wq_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_WQ, cmd.wq_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -3153,7 +3243,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS))
return -EINVAL;
- wq = uobj_get_obj_read(wq, cmd.wq_handle, file->ucontext);
+ wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, file->ucontext);
if (!wq)
return -EINVAL;
@@ -3163,7 +3253,12 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
wq_attr.flags = cmd.flags;
wq_attr.flags_mask = cmd.flags_mask;
}
+ if (!wq->device->modify_wq) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw);
+out:
uobj_put_obj_read(wq);
return ret;
}
@@ -3242,7 +3337,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
for (num_read_wqs = 0; num_read_wqs < num_wq_handles;
num_read_wqs++) {
- wq = uobj_get_obj_read(wq, wqs_handles[num_read_wqs],
+ wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, wqs_handles[num_read_wqs],
file->ucontext);
if (!wq) {
err = -EINVAL;
@@ -3252,7 +3347,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
wqs[num_read_wqs] = wq;
}
- uobj = uobj_alloc(uobj_get_type(rwq_ind_table), file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file->ucontext);
if (IS_ERR(uobj)) {
err = PTR_ERR(uobj);
goto put_wqs;
@@ -3260,6 +3355,11 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size;
init_attr.ind_tbl = wqs;
+
+ if (!ib_dev->create_rwq_ind_table) {
+ err = -EOPNOTSUPP;
+ goto err_uobj;
+ }
rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, uhw);
if (IS_ERR(rwq_ind_tbl)) {
@@ -3334,7 +3434,7 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
if (cmd.comp_mask)
return -EOPNOTSUPP;
- uobj = uobj_get_write(uobj_get_type(rwq_ind_table), cmd.ind_tbl_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_RWQ_IND_TBL, cmd.ind_tbl_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -3350,10 +3450,12 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
struct ib_uverbs_create_flow cmd;
struct ib_uverbs_create_flow_resp resp;
struct ib_uobject *uobj;
+ struct ib_uflow_object *uflow;
struct ib_flow *flow_id;
struct ib_uverbs_flow_attr *kern_flow_attr;
struct ib_flow_attr *flow_attr;
struct ib_qp *qp;
+ struct ib_uflow_resources *uflow_res;
int err = 0;
void *kern_spec;
void *ib_spec;
@@ -3413,13 +3515,13 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
kern_flow_attr = &cmd.flow_attr;
}
- uobj = uobj_alloc(uobj_get_type(flow), file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_FLOW, file->ucontext);
if (IS_ERR(uobj)) {
err = PTR_ERR(uobj);
goto err_free_attr;
}
- qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
if (!qp) {
err = -EINVAL;
goto err_uobj;
@@ -3436,6 +3538,11 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
err = -ENOMEM;
goto err_put;
}
+ uflow_res = flow_resources_alloc(cmd.flow_attr.num_of_specs);
+ if (!uflow_res) {
+ err = -ENOMEM;
+ goto err_free_flow_attr;
+ }
flow_attr->type = kern_flow_attr->type;
flow_attr->priority = kern_flow_attr->priority;
@@ -3450,7 +3557,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec, reserved) &&
cmd.flow_attr.size >=
((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) {
- err = kern_spec_to_ib_spec(kern_spec, ib_spec);
+ err = kern_spec_to_ib_spec(file->ucontext, kern_spec, ib_spec,
+ uflow_res);
if (err)
goto err_free;
flow_attr->size +=
@@ -3472,6 +3580,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
}
flow_id->uobject = uobj;
uobj->object = flow_id;
+ uflow = container_of(uobj, typeof(*uflow), uobject);
+ uflow->resources = uflow_res;
memset(&resp, 0, sizeof(resp));
resp.flow_handle = uobj->id;
@@ -3490,6 +3600,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
err_copy:
ib_destroy_flow(flow_id);
err_free:
+ ib_uverbs_flow_resources_free(uflow_res);
+err_free_flow_attr:
kfree(flow_attr);
err_put:
uobj_put_obj_read(qp);
@@ -3520,7 +3632,7 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
if (cmd.comp_mask)
return -EINVAL;
- uobj = uobj_get_write(uobj_get_type(flow), cmd.flow_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_FLOW, cmd.flow_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -3542,7 +3654,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
struct ib_srq_init_attr attr;
int ret;
- obj = (struct ib_usrq_object *)uobj_alloc(uobj_get_type(srq),
+ obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ,
file->ucontext);
if (IS_ERR(obj))
return PTR_ERR(obj);
@@ -3551,7 +3663,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
attr.ext.tag_matching.max_num_tags = cmd->max_num_tags;
if (cmd->srq_type == IB_SRQT_XRC) {
- xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd->xrcd_handle,
+ xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->xrcd_handle,
file->ucontext);
if (IS_ERR(xrcd_uobj)) {
ret = -EINVAL;
@@ -3569,7 +3681,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
}
if (ib_srq_has_cq(cmd->srq_type)) {
- attr.ext.cq = uobj_get_obj_read(cq, cmd->cq_handle,
+ attr.ext.cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->cq_handle,
file->ucontext);
if (!attr.ext.cq) {
ret = -EINVAL;
@@ -3577,7 +3689,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
}
}
- pd = uobj_get_obj_read(pd, cmd->pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err_put_cq;
@@ -3629,7 +3741,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
if (cmd->srq_type == IB_SRQT_XRC)
resp.srqn = srq->ext.xrc.srq_num;
- if (copy_to_user((void __user *) (unsigned long) cmd->response,
+ if (copy_to_user(u64_to_user_ptr(cmd->response),
&resp, sizeof resp)) {
ret = -EFAULT;
goto err_copy;
@@ -3693,8 +3805,8 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
xcmd.max_sge = cmd.max_sge;
xcmd.srq_limit = cmd.srq_limit;
- INIT_UDATA(&udata, buf + sizeof(cmd),
- (unsigned long) cmd.response + sizeof(resp),
+ ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
+ u64_to_user_ptr(cmd.response) + sizeof(resp),
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
@@ -3720,8 +3832,8 @@ ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- INIT_UDATA(&udata, buf + sizeof(cmd),
- (unsigned long) cmd.response + sizeof(resp),
+ ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
+ u64_to_user_ptr(cmd.response) + sizeof(resp),
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
@@ -3746,10 +3858,10 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
+ ib_uverbs_init_udata(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
out_len);
- srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext);
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext);
if (!srq)
return -EINVAL;
@@ -3780,7 +3892,7 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext);
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext);
if (!srq)
return -EINVAL;
@@ -3797,8 +3909,7 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
resp.max_sge = attr.max_sge;
resp.srq_limit = attr.srq_limit;
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
+ if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
return -EFAULT;
return in_len;
@@ -3818,7 +3929,7 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_get_write(uobj_get_type(srq), cmd.srq_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_SRQ, cmd.srq_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -3839,8 +3950,7 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
}
resp.events_reported = obj->events_reported;
uverbs_uobject_put(uobj);
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
- &resp, sizeof(resp)))
+ if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp)))
return -EFAULT;
return in_len;
@@ -3856,6 +3966,9 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
struct ib_device_attr attr = {0};
int err;
+ if (!ib_dev->query_device)
+ return -EOPNOTSUPP;
+
if (ucore->inlen < sizeof(cmd))
return -EINVAL;
@@ -3944,7 +4057,64 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
resp.tm_caps.max_sge = attr.tm_caps.max_sge;
resp.tm_caps.flags = attr.tm_caps.flags;
resp.response_length += sizeof(resp.tm_caps);
+
+ if (ucore->outlen < resp.response_length + sizeof(resp.cq_moderation_caps))
+ goto end;
+
+ resp.cq_moderation_caps.max_cq_moderation_count =
+ attr.cq_caps.max_cq_moderation_count;
+ resp.cq_moderation_caps.max_cq_moderation_period =
+ attr.cq_caps.max_cq_moderation_period;
+ resp.response_length += sizeof(resp.cq_moderation_caps);
+
+ if (ucore->outlen < resp.response_length + sizeof(resp.max_dm_size))
+ goto end;
+
+ resp.max_dm_size = attr.max_dm_size;
+ resp.response_length += sizeof(resp.max_dm_size);
end:
err = ib_copy_to_udata(ucore, &resp, resp.response_length);
return err;
}
+
+int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file,
+ struct ib_device *ib_dev,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
+{
+ struct ib_uverbs_ex_modify_cq cmd = {};
+ struct ib_cq *cq;
+ size_t required_cmd_sz;
+ int ret;
+
+ required_cmd_sz = offsetof(typeof(cmd), reserved) +
+ sizeof(cmd.reserved);
+ if (ucore->inlen < required_cmd_sz)
+ return -EINVAL;
+
+ /* sanity checks */
+ if (ucore->inlen > sizeof(cmd) &&
+ !ib_is_udata_cleared(ucore, sizeof(cmd),
+ ucore->inlen - sizeof(cmd)))
+ return -EOPNOTSUPP;
+
+ ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+ if (ret)
+ return ret;
+
+ if (!cmd.attr_mask || cmd.reserved)
+ return -EINVAL;
+
+ if (cmd.attr_mask > IB_CQ_MODERATE)
+ return -EOPNOTSUPP;
+
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
+ if (!cq)
+ return -EINVAL;
+
+ ret = rdma_set_cq_moderation(cq, cmd.attr.cq_count, cmd.attr.cq_period);
+
+ uobj_put_obj_read(cq);
+
+ return ret;
+}
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 5feb8bbeff18..8d32c4ae368c 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -35,6 +35,17 @@
#include "rdma_core.h"
#include "uverbs.h"
+static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr,
+ u16 len)
+{
+ if (uattr->len > sizeof(((struct ib_uverbs_attr *)0)->data))
+ return ib_is_buffer_cleared(u64_to_user_ptr(uattr->data) + len,
+ uattr->len - len);
+
+ return !memchr_inv((const void *)&uattr->data + len,
+ 0, uattr->len - len);
+}
+
static int uverbs_process_attr(struct ib_device *ibdev,
struct ib_ucontext *ucontext,
const struct ib_uverbs_attr *uattr,
@@ -44,14 +55,12 @@ static int uverbs_process_attr(struct ib_device *ibdev,
struct ib_uverbs_attr __user *uattr_ptr)
{
const struct uverbs_attr_spec *spec;
+ const struct uverbs_attr_spec *val_spec;
struct uverbs_attr *e;
const struct uverbs_object_spec *object;
struct uverbs_obj_attr *o_attr;
struct uverbs_attr *elements = attr_bundle_h->attrs;
- if (uattr->reserved)
- return -EINVAL;
-
if (attr_id >= attr_spec_bucket->num_attrs) {
if (uattr->flags & UVERBS_ATTR_F_MANDATORY)
return -EINVAL;
@@ -63,15 +72,46 @@ static int uverbs_process_attr(struct ib_device *ibdev,
return -EINVAL;
spec = &attr_spec_bucket->attrs[attr_id];
+ val_spec = spec;
e = &elements[attr_id];
e->uattr = uattr_ptr;
switch (spec->type) {
+ case UVERBS_ATTR_TYPE_ENUM_IN:
+ if (uattr->attr_data.enum_data.elem_id >= spec->enum_def.num_elems)
+ return -EOPNOTSUPP;
+
+ if (uattr->attr_data.enum_data.reserved)
+ return -EINVAL;
+
+ val_spec = &spec->enum_def.ids[uattr->attr_data.enum_data.elem_id];
+
+ /* Currently we only support PTR_IN based enums */
+ if (val_spec->type != UVERBS_ATTR_TYPE_PTR_IN)
+ return -EOPNOTSUPP;
+
+ e->ptr_attr.enum_id = uattr->attr_data.enum_data.elem_id;
+ /* fall through */
case UVERBS_ATTR_TYPE_PTR_IN:
+ /* Ensure that any data provided by userspace beyond the known
+ * struct is zero. Userspace that knows how to use some future
+ * longer struct will fail here if used with an old kernel and
+ * non-zero content, making ABI compat/discovery simpler.
+ */
+ if (uattr->len > val_spec->ptr.len &&
+ val_spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO &&
+ !uverbs_is_attr_cleared(uattr, val_spec->ptr.len))
+ return -EOPNOTSUPP;
+
+ /* fall through */
case UVERBS_ATTR_TYPE_PTR_OUT:
- if (uattr->len < spec->len ||
- (!(spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ) &&
- uattr->len > spec->len))
+ if (uattr->len < val_spec->ptr.min_len ||
+ (!(val_spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO) &&
+ uattr->len > val_spec->ptr.len))
+ return -EINVAL;
+
+ if (spec->type != UVERBS_ATTR_TYPE_ENUM_IN &&
+ uattr->attr_data.reserved)
return -EINVAL;
e->ptr_attr.data = uattr->data;
@@ -84,6 +124,9 @@ static int uverbs_process_attr(struct ib_device *ibdev,
return -EINVAL;
/* fall through */
case UVERBS_ATTR_TYPE_FD:
+ if (uattr->attr_data.reserved)
+ return -EINVAL;
+
if (uattr->len != 0 || !ucontext || uattr->data > INT_MAX)
return -EINVAL;
@@ -253,9 +296,10 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev,
struct uverbs_attr *curr_attr;
unsigned long *curr_bitmap;
size_t ctx_size;
-#ifdef UVERBS_OPTIMIZE_USING_STACK_SZ
uintptr_t data[UVERBS_OPTIMIZE_USING_STACK_SZ / sizeof(uintptr_t)];
-#endif
+
+ if (hdr->driver_id != ib_dev->driver_id)
+ return -EINVAL;
object_spec = uverbs_get_object(ib_dev, hdr->object_id);
if (!object_spec)
@@ -278,13 +322,10 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev,
(method_spec->num_child_attrs / BITS_PER_LONG +
method_spec->num_buckets);
-#ifdef UVERBS_OPTIMIZE_USING_STACK_SZ
if (ctx_size <= UVERBS_OPTIMIZE_USING_STACK_SZ)
ctx = (void *)data;
-
if (!ctx)
-#endif
- ctx = kmalloc(ctx_size, GFP_KERNEL);
+ ctx = kmalloc(ctx_size, GFP_KERNEL);
if (!ctx)
return -ENOMEM;
@@ -330,10 +371,8 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev,
err = -EINVAL;
}
out:
-#ifdef UVERBS_OPTIMIZE_USING_STACK_SZ
- if (ctx_size > UVERBS_OPTIMIZE_USING_STACK_SZ)
-#endif
- kfree(ctx);
+ if (ctx != (void *)data)
+ kfree(ctx);
return err;
}
@@ -366,7 +405,7 @@ long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
goto out;
}
- if (hdr.reserved) {
+ if (hdr.reserved1 || hdr.reserved2) {
err = -EPROTONOSUPPORT;
goto out;
}
diff --git a/drivers/infiniband/core/uverbs_ioctl_merge.c b/drivers/infiniband/core/uverbs_ioctl_merge.c
index 48a99dce976c..0f88a1919d51 100644
--- a/drivers/infiniband/core/uverbs_ioctl_merge.c
+++ b/drivers/infiniband/core/uverbs_ioctl_merge.c
@@ -376,10 +376,10 @@ static struct uverbs_method_spec *build_method_with_attrs(const struct uverbs_me
min_id) ||
WARN(attr_obj_with_special_access &&
!(attr->flags & UVERBS_ATTR_SPEC_F_MANDATORY),
- "ib_uverbs: Tried to merge attr (%d) but it's an object with new/destroy aceess but isn't mandatory\n",
+ "ib_uverbs: Tried to merge attr (%d) but it's an object with new/destroy access but isn't mandatory\n",
min_id) ||
WARN(IS_ATTR_OBJECT(attr) &&
- attr->flags & UVERBS_ATTR_SPEC_F_MIN_SZ,
+ attr->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO,
"ib_uverbs: Tried to merge attr (%d) but it's an object with min_sz flag\n",
min_id)) {
res = -EINVAL;
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 863816d1e884..de6f7e0078dc 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -62,14 +62,16 @@ MODULE_LICENSE("Dual BSD/GPL");
enum {
IB_UVERBS_MAJOR = 231,
IB_UVERBS_BASE_MINOR = 192,
- IB_UVERBS_MAX_DEVICES = 32
+ IB_UVERBS_MAX_DEVICES = RDMA_MAX_PORTS,
+ IB_UVERBS_NUM_FIXED_MINOR = 32,
+ IB_UVERBS_NUM_DYNAMIC_MINOR = IB_UVERBS_MAX_DEVICES - IB_UVERBS_NUM_FIXED_MINOR,
};
#define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)
+static dev_t dynamic_uverbs_dev;
static struct class *uverbs_class;
-static DEFINE_SPINLOCK(map_lock);
static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
@@ -128,6 +130,7 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
[IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table,
[IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table,
[IB_USER_VERBS_EX_CMD_MODIFY_QP] = ib_uverbs_ex_modify_qp,
+ [IB_USER_VERBS_EX_CMD_MODIFY_CQ] = ib_uverbs_ex_modify_cq,
};
static void ib_uverbs_add_one(struct ib_device *device);
@@ -465,7 +468,7 @@ void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
return;
}
- entry = kmalloc(sizeof *entry, GFP_ATOMIC);
+ entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
if (!entry) {
spin_unlock_irqrestore(&ev_queue->lock, flags);
return;
@@ -498,7 +501,7 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
return;
}
- entry = kmalloc(sizeof *entry, GFP_ATOMIC);
+ entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
if (!entry) {
spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
return;
@@ -632,39 +635,87 @@ err_put_refs:
return filp;
}
-static int verify_command_mask(struct ib_device *ib_dev, __u32 command)
+static bool verify_command_mask(struct ib_device *ib_dev,
+ u32 command, bool extended)
{
- u64 mask;
+ if (!extended)
+ return ib_dev->uverbs_cmd_mask & BIT_ULL(command);
- if (command <= IB_USER_VERBS_CMD_OPEN_QP)
- mask = ib_dev->uverbs_cmd_mask;
- else
- mask = ib_dev->uverbs_ex_cmd_mask;
-
- if (mask & ((u64)1 << command))
- return 0;
-
- return -1;
+ return ib_dev->uverbs_ex_cmd_mask & BIT_ULL(command);
}
static bool verify_command_idx(u32 command, bool extended)
{
if (extended)
- return command < ARRAY_SIZE(uverbs_ex_cmd_table);
+ return command < ARRAY_SIZE(uverbs_ex_cmd_table) &&
+ uverbs_ex_cmd_table[command];
- return command < ARRAY_SIZE(uverbs_cmd_table);
+ return command < ARRAY_SIZE(uverbs_cmd_table) &&
+ uverbs_cmd_table[command];
+}
+
+static ssize_t process_hdr(struct ib_uverbs_cmd_hdr *hdr,
+ u32 *command, bool *extended)
+{
+ if (hdr->command & ~(u32)(IB_USER_VERBS_CMD_FLAG_EXTENDED |
+ IB_USER_VERBS_CMD_COMMAND_MASK))
+ return -EINVAL;
+
+ *command = hdr->command & IB_USER_VERBS_CMD_COMMAND_MASK;
+ *extended = hdr->command & IB_USER_VERBS_CMD_FLAG_EXTENDED;
+
+ if (!verify_command_idx(*command, *extended))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr,
+ struct ib_uverbs_ex_cmd_hdr *ex_hdr,
+ size_t count, bool extended)
+{
+ if (extended) {
+ count -= sizeof(*hdr) + sizeof(*ex_hdr);
+
+ if ((hdr->in_words + ex_hdr->provider_in_words) * 8 != count)
+ return -EINVAL;
+
+ if (ex_hdr->cmd_hdr_reserved)
+ return -EINVAL;
+
+ if (ex_hdr->response) {
+ if (!hdr->out_words && !ex_hdr->provider_out_words)
+ return -EINVAL;
+
+ if (!access_ok(VERIFY_WRITE,
+ u64_to_user_ptr(ex_hdr->response),
+ (hdr->out_words + ex_hdr->provider_out_words) * 8))
+ return -EFAULT;
+ } else {
+ if (hdr->out_words || ex_hdr->provider_out_words)
+ return -EINVAL;
+ }
+
+ return 0;
+ }
+
+ /* not extended command */
+ if (hdr->in_words * 4 != count)
+ return -EINVAL;
+
+ return 0;
}
static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
struct ib_uverbs_file *file = filp->private_data;
+ struct ib_uverbs_ex_cmd_hdr ex_hdr;
struct ib_device *ib_dev;
struct ib_uverbs_cmd_hdr hdr;
- bool extended_command;
- __u32 command;
- __u32 flags;
+ bool extended;
int srcu_key;
+ u32 command;
ssize_t ret;
if (!ib_safe_file_access(filp)) {
@@ -673,12 +724,31 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
return -EACCES;
}
- if (count < sizeof hdr)
+ if (count < sizeof(hdr))
return -EINVAL;
- if (copy_from_user(&hdr, buf, sizeof hdr))
+ if (copy_from_user(&hdr, buf, sizeof(hdr)))
return -EFAULT;
+ ret = process_hdr(&hdr, &command, &extended);
+ if (ret)
+ return ret;
+
+ if (!file->ucontext &&
+ (command != IB_USER_VERBS_CMD_GET_CONTEXT || extended))
+ return -EINVAL;
+
+ if (extended) {
+ if (count < (sizeof(hdr) + sizeof(ex_hdr)))
+ return -EINVAL;
+ if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
+ return -EFAULT;
+ }
+
+ ret = verify_hdr(&hdr, &ex_hdr, count, extended);
+ if (ret)
+ return ret;
+
srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
ib_dev = srcu_dereference(file->device->ib_dev,
&file->device->disassociate_srcu);
@@ -687,106 +757,22 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
goto out;
}
- if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
- IB_USER_VERBS_CMD_COMMAND_MASK)) {
- ret = -EINVAL;
- goto out;
- }
-
- command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
- flags = (hdr.command &
- IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
-
- extended_command = flags & IB_USER_VERBS_CMD_FLAG_EXTENDED;
- if (!verify_command_idx(command, extended_command)) {
- ret = -EINVAL;
- goto out;
- }
-
- if (verify_command_mask(ib_dev, command)) {
+ if (!verify_command_mask(ib_dev, command, extended)) {
ret = -EOPNOTSUPP;
goto out;
}
- if (!file->ucontext &&
- command != IB_USER_VERBS_CMD_GET_CONTEXT) {
- ret = -EINVAL;
- goto out;
- }
+ buf += sizeof(hdr);
- if (!flags) {
- if (!uverbs_cmd_table[command]) {
- ret = -EINVAL;
- goto out;
- }
-
- if (hdr.in_words * 4 != count) {
- ret = -EINVAL;
- goto out;
- }
-
- ret = uverbs_cmd_table[command](file, ib_dev,
- buf + sizeof(hdr),
- hdr.in_words * 4,
- hdr.out_words * 4);
-
- } else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) {
- struct ib_uverbs_ex_cmd_hdr ex_hdr;
+ if (!extended) {
+ ret = uverbs_cmd_table[command](file, ib_dev, buf,
+ hdr.in_words * 4,
+ hdr.out_words * 4);
+ } else {
struct ib_udata ucore;
struct ib_udata uhw;
- size_t written_count = count;
-
- if (!uverbs_ex_cmd_table[command]) {
- ret = -ENOSYS;
- goto out;
- }
-
- if (!file->ucontext) {
- ret = -EINVAL;
- goto out;
- }
-
- if (count < (sizeof(hdr) + sizeof(ex_hdr))) {
- ret = -EINVAL;
- goto out;
- }
-
- if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) {
- ret = -EFAULT;
- goto out;
- }
- count -= sizeof(hdr) + sizeof(ex_hdr);
- buf += sizeof(hdr) + sizeof(ex_hdr);
-
- if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count) {
- ret = -EINVAL;
- goto out;
- }
-
- if (ex_hdr.cmd_hdr_reserved) {
- ret = -EINVAL;
- goto out;
- }
-
- if (ex_hdr.response) {
- if (!hdr.out_words && !ex_hdr.provider_out_words) {
- ret = -EINVAL;
- goto out;
- }
-
- if (!access_ok(VERIFY_WRITE,
- u64_to_user_ptr(ex_hdr.response),
- (hdr.out_words + ex_hdr.provider_out_words) * 8)) {
- ret = -EFAULT;
- goto out;
- }
- } else {
- if (hdr.out_words || ex_hdr.provider_out_words) {
- ret = -EINVAL;
- goto out;
- }
- }
+ buf += sizeof(ex_hdr);
ib_uverbs_init_udata_buf_or_null(&ucore, buf,
u64_to_user_ptr(ex_hdr.response),
@@ -799,10 +785,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
ex_hdr.provider_out_words * 8);
ret = uverbs_ex_cmd_table[command](file, ib_dev, &ucore, &uhw);
- if (!ret)
- ret = written_count;
- } else {
- ret = -ENOSYS;
+ ret = (ret) ? : count;
}
out:
@@ -950,9 +933,8 @@ static const struct file_operations uverbs_fops = {
.open = ib_uverbs_open,
.release = ib_uverbs_close,
.llseek = no_llseek,
-#if IS_ENABLED(CONFIG_INFINIBAND_EXP_USER_ACCESS)
.unlocked_ioctl = ib_uverbs_ioctl,
-#endif
+ .compat_ioctl = ib_uverbs_ioctl,
};
static const struct file_operations uverbs_mmap_fops = {
@@ -962,9 +944,8 @@ static const struct file_operations uverbs_mmap_fops = {
.open = ib_uverbs_open,
.release = ib_uverbs_close,
.llseek = no_llseek,
-#if IS_ENABLED(CONFIG_INFINIBAND_EXP_USER_ACCESS)
.unlocked_ioctl = ib_uverbs_ioctl,
-#endif
+ .compat_ioctl = ib_uverbs_ioctl,
};
static struct ib_client uverbs_client = {
@@ -1017,34 +998,6 @@ static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
static CLASS_ATTR_STRING(abi_version, S_IRUGO,
__stringify(IB_USER_VERBS_ABI_VERSION));
-static dev_t overflow_maj;
-static DECLARE_BITMAP(overflow_map, IB_UVERBS_MAX_DEVICES);
-
-/*
- * If we have more than IB_UVERBS_MAX_DEVICES, dynamically overflow by
- * requesting a new major number and doubling the number of max devices we
- * support. It's stupid, but simple.
- */
-static int find_overflow_devnum(void)
-{
- int ret;
-
- if (!overflow_maj) {
- ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES,
- "infiniband_verbs");
- if (ret) {
- pr_err("user_verbs: couldn't register dynamic device number\n");
- return ret;
- }
- }
-
- ret = find_first_zero_bit(overflow_map, IB_UVERBS_MAX_DEVICES);
- if (ret >= IB_UVERBS_MAX_DEVICES)
- return -1;
-
- return ret;
-}
-
static void ib_uverbs_add_one(struct ib_device *device)
{
int devnum;
@@ -1055,7 +1008,7 @@ static void ib_uverbs_add_one(struct ib_device *device)
if (!device->alloc_ucontext)
return;
- uverbs_dev = kzalloc(sizeof *uverbs_dev, GFP_KERNEL);
+ uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL);
if (!uverbs_dev)
return;
@@ -1074,24 +1027,15 @@ static void ib_uverbs_add_one(struct ib_device *device)
INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list);
INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list);
- spin_lock(&map_lock);
devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
- if (devnum >= IB_UVERBS_MAX_DEVICES) {
- spin_unlock(&map_lock);
- devnum = find_overflow_devnum();
- if (devnum < 0)
- goto err;
-
- spin_lock(&map_lock);
- uverbs_dev->devnum = devnum + IB_UVERBS_MAX_DEVICES;
- base = devnum + overflow_maj;
- set_bit(devnum, overflow_map);
- } else {
- uverbs_dev->devnum = devnum;
- base = devnum + IB_UVERBS_BASE_DEV;
- set_bit(devnum, dev_map);
- }
- spin_unlock(&map_lock);
+ if (devnum >= IB_UVERBS_MAX_DEVICES)
+ goto err;
+ uverbs_dev->devnum = devnum;
+ set_bit(devnum, dev_map);
+ if (devnum >= IB_UVERBS_NUM_FIXED_MINOR)
+ base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR;
+ else
+ base = IB_UVERBS_BASE_DEV + devnum;
rcu_assign_pointer(uverbs_dev->ib_dev, device);
uverbs_dev->num_comp_vectors = device->num_comp_vectors;
@@ -1136,10 +1080,7 @@ err_class:
err_cdev:
cdev_del(&uverbs_dev->cdev);
- if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
- clear_bit(devnum, dev_map);
- else
- clear_bit(devnum, overflow_map);
+ clear_bit(devnum, dev_map);
err:
if (atomic_dec_and_test(&uverbs_dev->refcount))
@@ -1231,11 +1172,7 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
dev_set_drvdata(uverbs_dev->dev, NULL);
device_destroy(uverbs_class, uverbs_dev->cdev.dev);
cdev_del(&uverbs_dev->cdev);
-
- if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
- clear_bit(uverbs_dev->devnum, dev_map);
- else
- clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map);
+ clear_bit(uverbs_dev->devnum, dev_map);
if (device->disassociate_ucontext) {
/* We disassociate HW resources and immediately return.
@@ -1277,13 +1214,22 @@ static int __init ib_uverbs_init(void)
{
int ret;
- ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES,
+ ret = register_chrdev_region(IB_UVERBS_BASE_DEV,
+ IB_UVERBS_NUM_FIXED_MINOR,
"infiniband_verbs");
if (ret) {
pr_err("user_verbs: couldn't register device number\n");
goto out;
}
+ ret = alloc_chrdev_region(&dynamic_uverbs_dev, 0,
+ IB_UVERBS_NUM_DYNAMIC_MINOR,
+ "infiniband_verbs");
+ if (ret) {
+ pr_err("couldn't register dynamic device number\n");
+ goto out_alloc;
+ }
+
uverbs_class = class_create(THIS_MODULE, "infiniband_verbs");
if (IS_ERR(uverbs_class)) {
ret = PTR_ERR(uverbs_class);
@@ -1311,7 +1257,12 @@ out_class:
class_destroy(uverbs_class);
out_chrdev:
- unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
+ unregister_chrdev_region(dynamic_uverbs_dev,
+ IB_UVERBS_NUM_DYNAMIC_MINOR);
+
+out_alloc:
+ unregister_chrdev_region(IB_UVERBS_BASE_DEV,
+ IB_UVERBS_NUM_FIXED_MINOR);
out:
return ret;
@@ -1321,9 +1272,10 @@ static void __exit ib_uverbs_cleanup(void)
{
ib_unregister_client(&uverbs_client);
class_destroy(uverbs_class);
- unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
- if (overflow_maj)
- unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES);
+ unregister_chrdev_region(IB_UVERBS_BASE_DEV,
+ IB_UVERBS_NUM_FIXED_MINOR);
+ unregister_chrdev_region(dynamic_uverbs_dev,
+ IB_UVERBS_NUM_DYNAMIC_MINOR);
}
module_init(ib_uverbs_init);
diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c
index 3e17d0654ea2..bb372b4713a4 100644
--- a/drivers/infiniband/core/uverbs_marshall.c
+++ b/drivers/infiniband/core/uverbs_marshall.c
@@ -175,18 +175,18 @@ EXPORT_SYMBOL(ib_copy_path_rec_to_user);
void ib_copy_path_rec_from_user(struct sa_path_rec *dst,
struct ib_user_path_rec *src)
{
- __be32 slid, dlid;
+ u32 slid, dlid;
memset(dst, 0, sizeof(*dst));
if ((ib_is_opa_gid((union ib_gid *)src->sgid)) ||
(ib_is_opa_gid((union ib_gid *)src->dgid))) {
dst->rec_type = SA_PATH_REC_TYPE_OPA;
- slid = htonl(opa_get_lid_from_gid((union ib_gid *)src->sgid));
- dlid = htonl(opa_get_lid_from_gid((union ib_gid *)src->dgid));
+ slid = opa_get_lid_from_gid((union ib_gid *)src->sgid);
+ dlid = opa_get_lid_from_gid((union ib_gid *)src->dgid);
} else {
dst->rec_type = SA_PATH_REC_TYPE_IB;
- slid = htonl(ntohs(src->slid));
- dlid = htonl(ntohs(src->dlid));
+ slid = ntohs(src->slid);
+ dlid = ntohs(src->dlid);
}
memcpy(dst->dgid.raw, src->dgid, sizeof dst->dgid);
memcpy(dst->sgid.raw, src->sgid, sizeof dst->sgid);
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index cca70d36ee15..569f48bd821e 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -35,6 +35,7 @@
#include <rdma/ib_verbs.h>
#include <linux/bug.h>
#include <linux/file.h>
+#include <rdma/restrack.h>
#include "rdma_core.h"
#include "uverbs.h"
@@ -47,7 +48,16 @@ static int uverbs_free_ah(struct ib_uobject *uobject,
static int uverbs_free_flow(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
- return ib_destroy_flow((struct ib_flow *)uobject->object);
+ int ret;
+ struct ib_flow *flow = (struct ib_flow *)uobject->object;
+ struct ib_uflow_object *uflow =
+ container_of(uobject, struct ib_uflow_object, uobject);
+
+ ret = ib_destroy_flow(flow);
+ if (!ret)
+ ib_uverbs_flow_resources_free(uflow->resources);
+
+ return ret;
}
static int uverbs_free_mw(struct ib_uobject *uobject,
@@ -134,31 +144,6 @@ static int uverbs_free_srq(struct ib_uobject *uobject,
return ret;
}
-static int uverbs_free_cq(struct ib_uobject *uobject,
- enum rdma_remove_reason why)
-{
- struct ib_cq *cq = uobject->object;
- struct ib_uverbs_event_queue *ev_queue = cq->cq_context;
- struct ib_ucq_object *ucq =
- container_of(uobject, struct ib_ucq_object, uobject);
- int ret;
-
- ret = ib_destroy_cq(cq);
- if (!ret || why != RDMA_REMOVE_DESTROY)
- ib_uverbs_release_ucq(uobject->context->ufile, ev_queue ?
- container_of(ev_queue,
- struct ib_uverbs_completion_event_file,
- ev_queue) : NULL,
- ucq);
- return ret;
-}
-
-static int uverbs_free_mr(struct ib_uobject *uobject,
- enum rdma_remove_reason why)
-{
- return ib_dereg_mr((struct ib_mr *)uobject->object);
-}
-
static int uverbs_free_xrcd(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
@@ -209,18 +194,26 @@ static int uverbs_hot_unplug_completion_event_file(struct ib_uobject_file *uobj_
return 0;
};
+int uverbs_destroy_def_handler(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ return 0;
+}
+
/*
* This spec is used in order to pass information to the hardware driver in a
* legacy way. Every verb that could get driver specific data should get this
* spec.
*/
-static const struct uverbs_attr_def uverbs_uhw_compat_in =
- UVERBS_ATTR_PTR_IN_SZ(UVERBS_UHW_IN, 0, UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ));
-static const struct uverbs_attr_def uverbs_uhw_compat_out =
- UVERBS_ATTR_PTR_OUT_SZ(UVERBS_UHW_OUT, 0, UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ));
-
-static void create_udata(struct uverbs_attr_bundle *ctx,
- struct ib_udata *udata)
+const struct uverbs_attr_def uverbs_uhw_compat_in =
+ UVERBS_ATTR_PTR_IN_SZ(UVERBS_ATTR_UHW_IN, UVERBS_ATTR_SIZE(0, USHRT_MAX),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO));
+const struct uverbs_attr_def uverbs_uhw_compat_out =
+ UVERBS_ATTR_PTR_OUT_SZ(UVERBS_ATTR_UHW_OUT, UVERBS_ATTR_SIZE(0, USHRT_MAX),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO));
+
+void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata)
{
/*
* This is for ease of conversion. The purpose is to convert all drivers
@@ -228,20 +221,23 @@ static void create_udata(struct uverbs_attr_bundle *ctx,
* Assume attr == 0 is input and attr == 1 is output.
*/
const struct uverbs_attr *uhw_in =
- uverbs_attr_get(ctx, UVERBS_UHW_IN);
+ uverbs_attr_get(ctx, UVERBS_ATTR_UHW_IN);
const struct uverbs_attr *uhw_out =
- uverbs_attr_get(ctx, UVERBS_UHW_OUT);
+ uverbs_attr_get(ctx, UVERBS_ATTR_UHW_OUT);
if (!IS_ERR(uhw_in)) {
- udata->inbuf = uhw_in->ptr_attr.ptr;
udata->inlen = uhw_in->ptr_attr.len;
+ if (uverbs_attr_ptr_is_inline(uhw_in))
+ udata->inbuf = &uhw_in->uattr->data;
+ else
+ udata->inbuf = u64_to_user_ptr(uhw_in->ptr_attr.data);
} else {
udata->inbuf = NULL;
udata->inlen = 0;
}
if (!IS_ERR(uhw_out)) {
- udata->outbuf = uhw_out->ptr_attr.ptr;
+ udata->outbuf = u64_to_user_ptr(uhw_out->ptr_attr.data);
udata->outlen = uhw_out->ptr_attr.len;
} else {
udata->outbuf = NULL;
@@ -249,204 +245,67 @@ static void create_udata(struct uverbs_attr_bundle *ctx,
}
}
-static int uverbs_create_cq_handler(struct ib_device *ib_dev,
- struct ib_uverbs_file *file,
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_ucontext *ucontext = file->ucontext;
- struct ib_ucq_object *obj;
- struct ib_udata uhw;
- int ret;
- u64 user_handle;
- struct ib_cq_init_attr attr = {};
- struct ib_cq *cq;
- struct ib_uverbs_completion_event_file *ev_file = NULL;
- const struct uverbs_attr *ev_file_attr;
- struct ib_uobject *ev_file_uobj;
-
- if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_CREATE_CQ))
- return -EOPNOTSUPP;
-
- ret = uverbs_copy_from(&attr.comp_vector, attrs, CREATE_CQ_COMP_VECTOR);
- if (!ret)
- ret = uverbs_copy_from(&attr.cqe, attrs, CREATE_CQ_CQE);
- if (!ret)
- ret = uverbs_copy_from(&user_handle, attrs, CREATE_CQ_USER_HANDLE);
- if (ret)
- return ret;
-
- /* Optional param, if it doesn't exist, we get -ENOENT and skip it */
- if (uverbs_copy_from(&attr.flags, attrs, CREATE_CQ_FLAGS) == -EFAULT)
- return -EFAULT;
-
- ev_file_attr = uverbs_attr_get(attrs, CREATE_CQ_COMP_CHANNEL);
- if (!IS_ERR(ev_file_attr)) {
- ev_file_uobj = ev_file_attr->obj_attr.uobject;
-
- ev_file = container_of(ev_file_uobj,
- struct ib_uverbs_completion_event_file,
- uobj_file.uobj);
- uverbs_uobject_get(ev_file_uobj);
- }
-
- if (attr.comp_vector >= ucontext->ufile->device->num_comp_vectors) {
- ret = -EINVAL;
- goto err_event_file;
- }
-
- obj = container_of(uverbs_attr_get(attrs, CREATE_CQ_HANDLE)->obj_attr.uobject,
- typeof(*obj), uobject);
- obj->uverbs_file = ucontext->ufile;
- obj->comp_events_reported = 0;
- obj->async_events_reported = 0;
- INIT_LIST_HEAD(&obj->comp_list);
- INIT_LIST_HEAD(&obj->async_list);
-
- /* Temporary, only until drivers get the new uverbs_attr_bundle */
- create_udata(attrs, &uhw);
-
- cq = ib_dev->create_cq(ib_dev, &attr, ucontext, &uhw);
- if (IS_ERR(cq)) {
- ret = PTR_ERR(cq);
- goto err_event_file;
- }
-
- cq->device = ib_dev;
- cq->uobject = &obj->uobject;
- cq->comp_handler = ib_uverbs_comp_handler;
- cq->event_handler = ib_uverbs_cq_event_handler;
- cq->cq_context = ev_file ? &ev_file->ev_queue : NULL;
- obj->uobject.object = cq;
- obj->uobject.user_handle = user_handle;
- atomic_set(&cq->usecnt, 0);
-
- ret = uverbs_copy_to(attrs, CREATE_CQ_RESP_CQE, &cq->cqe);
- if (ret)
- goto err_cq;
-
- return 0;
-err_cq:
- ib_destroy_cq(cq);
-
-err_event_file:
- if (ev_file)
- uverbs_uobject_put(ev_file_uobj);
- return ret;
-};
-
-static DECLARE_UVERBS_METHOD(
- uverbs_method_cq_create, UVERBS_CQ_CREATE, uverbs_create_cq_handler,
- &UVERBS_ATTR_IDR(CREATE_CQ_HANDLE, UVERBS_OBJECT_CQ, UVERBS_ACCESS_NEW,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_IN(CREATE_CQ_CQE, u32,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_IN(CREATE_CQ_USER_HANDLE, u64,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_FD(CREATE_CQ_COMP_CHANNEL, UVERBS_OBJECT_COMP_CHANNEL,
- UVERBS_ACCESS_READ),
- &UVERBS_ATTR_PTR_IN(CREATE_CQ_COMP_VECTOR, u32,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_IN(CREATE_CQ_FLAGS, u32),
- &UVERBS_ATTR_PTR_OUT(CREATE_CQ_RESP_CQE, u32,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &uverbs_uhw_compat_in, &uverbs_uhw_compat_out);
-
-static int uverbs_destroy_cq_handler(struct ib_device *ib_dev,
- struct ib_uverbs_file *file,
- struct uverbs_attr_bundle *attrs)
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COMP_CHANNEL,
+ &UVERBS_TYPE_ALLOC_FD(0,
+ sizeof(struct ib_uverbs_completion_event_file),
+ uverbs_hot_unplug_completion_event_file,
+ &uverbs_event_fops,
+ "[infinibandevent]", O_RDONLY));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_QP,
+ &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0,
+ uverbs_free_qp));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MW,
+ &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_mw));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_SRQ,
+ &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), 0,
+ uverbs_free_srq));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_AH,
+ &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_ah));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_FLOW,
+ &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uflow_object),
+ 0, uverbs_free_flow));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_WQ,
+ &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), 0,
+ uverbs_free_wq));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL,
+ &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_rwq_ind_tbl));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_XRCD,
+ &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), 0,
+ uverbs_free_xrcd));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_PD,
+ /* 2 is used in order to free the PD after MRs */
+ &UVERBS_TYPE_ALLOC_IDR(2, uverbs_free_pd));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DEVICE, NULL);
+
+static DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects,
+ &UVERBS_OBJECT(UVERBS_OBJECT_DEVICE),
+ &UVERBS_OBJECT(UVERBS_OBJECT_PD),
+ &UVERBS_OBJECT(UVERBS_OBJECT_MR),
+ &UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL),
+ &UVERBS_OBJECT(UVERBS_OBJECT_CQ),
+ &UVERBS_OBJECT(UVERBS_OBJECT_QP),
+ &UVERBS_OBJECT(UVERBS_OBJECT_AH),
+ &UVERBS_OBJECT(UVERBS_OBJECT_MW),
+ &UVERBS_OBJECT(UVERBS_OBJECT_SRQ),
+ &UVERBS_OBJECT(UVERBS_OBJECT_FLOW),
+ &UVERBS_OBJECT(UVERBS_OBJECT_WQ),
+ &UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL),
+ &UVERBS_OBJECT(UVERBS_OBJECT_XRCD),
+ &UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION),
+ &UVERBS_OBJECT(UVERBS_OBJECT_DM));
+
+const struct uverbs_object_tree_def *uverbs_default_get_objects(void)
{
- struct ib_uverbs_destroy_cq_resp resp;
- struct ib_uobject *uobj =
- uverbs_attr_get(attrs, DESTROY_CQ_HANDLE)->obj_attr.uobject;
- struct ib_ucq_object *obj = container_of(uobj, struct ib_ucq_object,
- uobject);
- int ret;
-
- if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_DESTROY_CQ))
- return -EOPNOTSUPP;
-
- ret = rdma_explicit_destroy(uobj);
- if (ret)
- return ret;
-
- resp.comp_events_reported = obj->comp_events_reported;
- resp.async_events_reported = obj->async_events_reported;
-
- return uverbs_copy_to(attrs, DESTROY_CQ_RESP, &resp);
+ return &uverbs_default_objects;
}
-
-static DECLARE_UVERBS_METHOD(
- uverbs_method_cq_destroy, UVERBS_CQ_DESTROY, uverbs_destroy_cq_handler,
- &UVERBS_ATTR_IDR(DESTROY_CQ_HANDLE, UVERBS_OBJECT_CQ,
- UVERBS_ACCESS_DESTROY,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_OUT(DESTROY_CQ_RESP, struct ib_uverbs_destroy_cq_resp,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_comp_channel,
- UVERBS_OBJECT_COMP_CHANNEL,
- &UVERBS_TYPE_ALLOC_FD(0,
- sizeof(struct ib_uverbs_completion_event_file),
- uverbs_hot_unplug_completion_event_file,
- &uverbs_event_fops,
- "[infinibandevent]", O_RDONLY));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_cq, UVERBS_OBJECT_CQ,
- &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0,
- uverbs_free_cq),
- &uverbs_method_cq_create,
- &uverbs_method_cq_destroy);
-
-DECLARE_UVERBS_OBJECT(uverbs_object_qp, UVERBS_OBJECT_QP,
- &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0,
- uverbs_free_qp));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_mw, UVERBS_OBJECT_MW,
- &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_mw));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_mr, UVERBS_OBJECT_MR,
- /* 1 is used in order to free the MR after all the MWs */
- &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_mr));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_srq, UVERBS_OBJECT_SRQ,
- &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), 0,
- uverbs_free_srq));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_ah, UVERBS_OBJECT_AH,
- &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_ah));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_flow, UVERBS_OBJECT_FLOW,
- &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_flow));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_wq, UVERBS_OBJECT_WQ,
- &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), 0,
- uverbs_free_wq));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_rwq_ind_table,
- UVERBS_OBJECT_RWQ_IND_TBL,
- &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_rwq_ind_tbl));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_xrcd, UVERBS_OBJECT_XRCD,
- &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), 0,
- uverbs_free_xrcd));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_pd, UVERBS_OBJECT_PD,
- /* 2 is used in order to free the PD after MRs */
- &UVERBS_TYPE_ALLOC_IDR(2, uverbs_free_pd));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_device, UVERBS_OBJECT_DEVICE, NULL);
-
-DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects,
- &uverbs_object_device,
- &uverbs_object_pd,
- &uverbs_object_mr,
- &uverbs_object_comp_channel,
- &uverbs_object_cq,
- &uverbs_object_qp,
- &uverbs_object_ah,
- &uverbs_object_mw,
- &uverbs_object_srq,
- &uverbs_object_flow,
- &uverbs_object_wq,
- &uverbs_object_rwq_ind_table,
- &uverbs_object_xrcd);
+EXPORT_SYMBOL_GPL(uverbs_default_get_objects);
diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
new file mode 100644
index 000000000000..b0dbae9dd0d7
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_cq.c
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+static int uverbs_free_cq(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_cq *cq = uobject->object;
+ struct ib_uverbs_event_queue *ev_queue = cq->cq_context;
+ struct ib_ucq_object *ucq =
+ container_of(uobject, struct ib_ucq_object, uobject);
+ int ret;
+
+ ret = ib_destroy_cq(cq);
+ if (!ret || why != RDMA_REMOVE_DESTROY)
+ ib_uverbs_release_ucq(uobject->context->ufile, ev_queue ?
+ container_of(ev_queue,
+ struct ib_uverbs_completion_event_file,
+ ev_queue) : NULL,
+ ucq);
+ return ret;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_ucontext *ucontext = file->ucontext;
+ struct ib_ucq_object *obj;
+ struct ib_udata uhw;
+ int ret;
+ u64 user_handle;
+ struct ib_cq_init_attr attr = {};
+ struct ib_cq *cq;
+ struct ib_uverbs_completion_event_file *ev_file = NULL;
+ const struct uverbs_attr *ev_file_attr;
+ struct ib_uobject *ev_file_uobj;
+
+ if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_CREATE_CQ))
+ return -EOPNOTSUPP;
+
+ ret = uverbs_copy_from(&attr.comp_vector, attrs,
+ UVERBS_ATTR_CREATE_CQ_COMP_VECTOR);
+ if (!ret)
+ ret = uverbs_copy_from(&attr.cqe, attrs,
+ UVERBS_ATTR_CREATE_CQ_CQE);
+ if (!ret)
+ ret = uverbs_copy_from(&user_handle, attrs,
+ UVERBS_ATTR_CREATE_CQ_USER_HANDLE);
+ if (ret)
+ return ret;
+
+ /* Optional param, if it doesn't exist, we get -ENOENT and skip it */
+ if (IS_UVERBS_COPY_ERR(uverbs_copy_from(&attr.flags, attrs,
+ UVERBS_ATTR_CREATE_CQ_FLAGS)))
+ return -EFAULT;
+
+ ev_file_attr = uverbs_attr_get(attrs, UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL);
+ if (!IS_ERR(ev_file_attr)) {
+ ev_file_uobj = ev_file_attr->obj_attr.uobject;
+
+ ev_file = container_of(ev_file_uobj,
+ struct ib_uverbs_completion_event_file,
+ uobj_file.uobj);
+ uverbs_uobject_get(ev_file_uobj);
+ }
+
+ if (attr.comp_vector >= ucontext->ufile->device->num_comp_vectors) {
+ ret = -EINVAL;
+ goto err_event_file;
+ }
+
+ obj = container_of(uverbs_attr_get(attrs,
+ UVERBS_ATTR_CREATE_CQ_HANDLE)->obj_attr.uobject,
+ typeof(*obj), uobject);
+ obj->uverbs_file = ucontext->ufile;
+ obj->comp_events_reported = 0;
+ obj->async_events_reported = 0;
+ INIT_LIST_HEAD(&obj->comp_list);
+ INIT_LIST_HEAD(&obj->async_list);
+
+ /* Temporary, only until drivers get the new uverbs_attr_bundle */
+ create_udata(attrs, &uhw);
+
+ cq = ib_dev->create_cq(ib_dev, &attr, ucontext, &uhw);
+ if (IS_ERR(cq)) {
+ ret = PTR_ERR(cq);
+ goto err_event_file;
+ }
+
+ cq->device = ib_dev;
+ cq->uobject = &obj->uobject;
+ cq->comp_handler = ib_uverbs_comp_handler;
+ cq->event_handler = ib_uverbs_cq_event_handler;
+ cq->cq_context = ev_file ? &ev_file->ev_queue : NULL;
+ obj->uobject.object = cq;
+ obj->uobject.user_handle = user_handle;
+ atomic_set(&cq->usecnt, 0);
+ cq->res.type = RDMA_RESTRACK_CQ;
+ rdma_restrack_add(&cq->res);
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_CQ_RESP_CQE, &cq->cqe,
+ sizeof(cq->cqe));
+ if (ret)
+ goto err_cq;
+
+ return 0;
+err_cq:
+ ib_destroy_cq(cq);
+
+err_event_file:
+ if (ev_file)
+ uverbs_uobject_put(ev_file_uobj);
+ return ret;
+};
+
+static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_CREATE,
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_CQ_HANDLE, UVERBS_OBJECT_CQ,
+ UVERBS_ACCESS_NEW,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_CQE,
+ UVERBS_ATTR_TYPE(u32),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_USER_HANDLE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL,
+ UVERBS_OBJECT_COMP_CHANNEL,
+ UVERBS_ACCESS_READ),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_COMP_VECTOR, UVERBS_ATTR_TYPE(u32),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_FLAGS, UVERBS_ATTR_TYPE(u32)),
+ &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE, UVERBS_ATTR_TYPE(u32),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &uverbs_uhw_compat_in, &uverbs_uhw_compat_out);
+
+static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_destroy_cq_resp resp;
+ struct ib_uobject *uobj =
+ uverbs_attr_get(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE)->obj_attr.uobject;
+ struct ib_ucq_object *obj = container_of(uobj, struct ib_ucq_object,
+ uobject);
+ int ret;
+
+ if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_DESTROY_CQ))
+ return -EOPNOTSUPP;
+
+ ret = rdma_explicit_destroy(uobj);
+ if (ret)
+ return ret;
+
+ resp.comp_events_reported = obj->comp_events_reported;
+ resp.async_events_reported = obj->async_events_reported;
+
+ return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_CQ_RESP, &resp,
+ sizeof(resp));
+}
+
+static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_DESTROY,
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_CQ_HANDLE, UVERBS_OBJECT_CQ,
+ UVERBS_ACCESS_DESTROY,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_CQ_RESP,
+ UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_cq_resp),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_CQ,
+ &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0,
+ uverbs_free_cq),
+#if IS_ENABLED(CONFIG_INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI)
+ &UVERBS_METHOD(UVERBS_METHOD_CQ_CREATE),
+ &UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY)
+#endif
+ );
+
diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c
new file mode 100644
index 000000000000..8b681575b615
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_dm.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "uverbs.h"
+#include <rdma/uverbs_std_types.h>
+
+static int uverbs_free_dm(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_dm *dm = uobject->object;
+
+ if (why == RDMA_REMOVE_DESTROY && atomic_read(&dm->usecnt))
+ return -EBUSY;
+
+ return dm->device->dealloc_dm(dm);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_ucontext *ucontext = file->ucontext;
+ struct ib_dm_alloc_attr attr = {};
+ struct ib_uobject *uobj;
+ struct ib_dm *dm;
+ int ret;
+
+ if (!ib_dev->alloc_dm)
+ return -EOPNOTSUPP;
+
+ ret = uverbs_copy_from(&attr.length, attrs,
+ UVERBS_ATTR_ALLOC_DM_LENGTH);
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_from(&attr.alignment, attrs,
+ UVERBS_ATTR_ALLOC_DM_ALIGNMENT);
+ if (ret)
+ return ret;
+
+ uobj = uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DM_HANDLE)->obj_attr.uobject;
+
+ dm = ib_dev->alloc_dm(ib_dev, ucontext, &attr, attrs);
+ if (IS_ERR(dm))
+ return PTR_ERR(dm);
+
+ dm->device = ib_dev;
+ dm->length = attr.length;
+ dm->uobject = uobj;
+ atomic_set(&dm->usecnt, 0);
+
+ uobj->object = dm;
+
+ return 0;
+}
+
+static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_ALLOC,
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DM_HANDLE, UVERBS_OBJECT_DM,
+ UVERBS_ACCESS_NEW,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_LENGTH,
+ UVERBS_ATTR_TYPE(u64),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_ALIGNMENT,
+ UVERBS_ATTR_TYPE(u32),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_DM_FREE,
+ uverbs_destroy_def_handler,
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DM_HANDLE,
+ UVERBS_OBJECT_DM,
+ UVERBS_ACCESS_DESTROY,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DM,
+ /* 1 is used in order to free the DM after MRs */
+ &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_dm),
+ &UVERBS_METHOD(UVERBS_METHOD_DM_ALLOC),
+ &UVERBS_METHOD(UVERBS_METHOD_DM_FREE));
diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c
new file mode 100644
index 000000000000..b4f016dfa23d
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c
@@ -0,0 +1,435 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "uverbs.h"
+#include <rdma/uverbs_std_types.h>
+
+static int uverbs_free_flow_action(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_flow_action *action = uobject->object;
+
+ if (why == RDMA_REMOVE_DESTROY &&
+ atomic_read(&action->usecnt))
+ return -EBUSY;
+
+ return action->device->destroy_flow_action(action);
+}
+
+static u64 esp_flags_uverbs_to_verbs(struct uverbs_attr_bundle *attrs,
+ u32 flags, bool is_modify)
+{
+ u64 verbs_flags = flags;
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ESN))
+ verbs_flags |= IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED;
+
+ if (is_modify && uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS))
+ verbs_flags |= IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS;
+
+ return verbs_flags;
+};
+
+static int validate_flow_action_esp_keymat_aes_gcm(struct ib_flow_action_attrs_esp_keymats *keymat)
+{
+ struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm =
+ &keymat->keymat.aes_gcm;
+
+ if (aes_gcm->iv_algo > IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ)
+ return -EOPNOTSUPP;
+
+ if (aes_gcm->key_len != 32 &&
+ aes_gcm->key_len != 24 &&
+ aes_gcm->key_len != 16)
+ return -EINVAL;
+
+ if (aes_gcm->icv_len != 16 &&
+ aes_gcm->icv_len != 8 &&
+ aes_gcm->icv_len != 12)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int (* const flow_action_esp_keymat_validate[])(struct ib_flow_action_attrs_esp_keymats *keymat) = {
+ [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = validate_flow_action_esp_keymat_aes_gcm,
+};
+
+static int flow_action_esp_replay_none(struct ib_flow_action_attrs_esp_replays *replay,
+ bool is_modify)
+{
+ /* This is used in order to modify an esp flow action with an enabled
+ * replay protection to a disabled one. This is only supported via
+ * modify, as in create verb we can simply drop the REPLAY attribute and
+ * achieve the same thing.
+ */
+ return is_modify ? 0 : -EINVAL;
+}
+
+static int flow_action_esp_replay_def_ok(struct ib_flow_action_attrs_esp_replays *replay,
+ bool is_modify)
+{
+ /* Some replay protections could always be enabled without validating
+ * anything.
+ */
+ return 0;
+}
+
+static int (* const flow_action_esp_replay_validate[])(struct ib_flow_action_attrs_esp_replays *replay,
+ bool is_modify) = {
+ [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = flow_action_esp_replay_none,
+ [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = flow_action_esp_replay_def_ok,
+};
+
+static int parse_esp_ip(enum ib_flow_spec_type proto,
+ const void __user *val_ptr,
+ size_t len, union ib_flow_spec *out)
+{
+ int ret;
+ const struct ib_uverbs_flow_ipv4_filter ipv4 = {
+ .src_ip = cpu_to_be32(0xffffffffUL),
+ .dst_ip = cpu_to_be32(0xffffffffUL),
+ .proto = 0xff,
+ .tos = 0xff,
+ .ttl = 0xff,
+ .flags = 0xff,
+ };
+ const struct ib_uverbs_flow_ipv6_filter ipv6 = {
+ .src_ip = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+ .dst_ip = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+ .flow_label = cpu_to_be32(0xffffffffUL),
+ .next_hdr = 0xff,
+ .traffic_class = 0xff,
+ .hop_limit = 0xff,
+ };
+ union {
+ struct ib_uverbs_flow_ipv4_filter ipv4;
+ struct ib_uverbs_flow_ipv6_filter ipv6;
+ } user_val = {};
+ const void *user_pmask;
+ size_t val_len;
+
+ /* If the flow IPv4/IPv6 flow specifications are extended, the mask
+ * should be changed as well.
+ */
+ BUILD_BUG_ON(offsetof(struct ib_uverbs_flow_ipv4_filter, flags) +
+ sizeof(ipv4.flags) != sizeof(ipv4));
+ BUILD_BUG_ON(offsetof(struct ib_uverbs_flow_ipv6_filter, reserved) +
+ sizeof(ipv6.reserved) != sizeof(ipv6));
+
+ switch (proto) {
+ case IB_FLOW_SPEC_IPV4:
+ if (len > sizeof(user_val.ipv4) &&
+ !ib_is_buffer_cleared(val_ptr + sizeof(user_val.ipv4),
+ len - sizeof(user_val.ipv4)))
+ return -EOPNOTSUPP;
+
+ val_len = min_t(size_t, len, sizeof(user_val.ipv4));
+ ret = copy_from_user(&user_val.ipv4, val_ptr,
+ val_len);
+ if (ret)
+ return -EFAULT;
+
+ user_pmask = &ipv4;
+ break;
+ case IB_FLOW_SPEC_IPV6:
+ if (len > sizeof(user_val.ipv6) &&
+ !ib_is_buffer_cleared(val_ptr + sizeof(user_val.ipv6),
+ len - sizeof(user_val.ipv6)))
+ return -EOPNOTSUPP;
+
+ val_len = min_t(size_t, len, sizeof(user_val.ipv6));
+ ret = copy_from_user(&user_val.ipv6, val_ptr,
+ val_len);
+ if (ret)
+ return -EFAULT;
+
+ user_pmask = &ipv6;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return ib_uverbs_kern_spec_to_ib_spec_filter(proto, user_pmask,
+ &user_val,
+ val_len, out);
+}
+
+static int flow_action_esp_get_encap(struct ib_flow_spec_list *out,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_flow_action_esp_encap uverbs_encap;
+ int ret;
+
+ ret = uverbs_copy_from(&uverbs_encap, attrs,
+ UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP);
+ if (ret)
+ return ret;
+
+ /* We currently support only one encap */
+ if (uverbs_encap.next_ptr)
+ return -EOPNOTSUPP;
+
+ if (uverbs_encap.type != IB_FLOW_SPEC_IPV4 &&
+ uverbs_encap.type != IB_FLOW_SPEC_IPV6)
+ return -EOPNOTSUPP;
+
+ return parse_esp_ip(uverbs_encap.type,
+ u64_to_user_ptr(uverbs_encap.val_ptr),
+ uverbs_encap.len,
+ &out->spec);
+}
+
+struct ib_flow_action_esp_attr {
+ struct ib_flow_action_attrs_esp hdr;
+ struct ib_flow_action_attrs_esp_keymats keymat;
+ struct ib_flow_action_attrs_esp_replays replay;
+ /* We currently support only one spec */
+ struct ib_flow_spec_list encap;
+};
+
+#define ESP_LAST_SUPPORTED_FLAG IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW
+static int parse_flow_action_esp(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs,
+ struct ib_flow_action_esp_attr *esp_attr,
+ bool is_modify)
+{
+ struct ib_uverbs_flow_action_esp uverbs_esp = {};
+ int ret;
+
+ /* Optional param, if it doesn't exist, we get -ENOENT and skip it */
+ ret = uverbs_copy_from(&esp_attr->hdr.esn, attrs,
+ UVERBS_ATTR_FLOW_ACTION_ESP_ESN);
+ if (IS_UVERBS_COPY_ERR(ret))
+ return ret;
+
+ /* This can be called from FLOW_ACTION_ESP_MODIFY where
+ * UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS is optional
+ */
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS)) {
+ ret = uverbs_copy_from_or_zero(&uverbs_esp, attrs,
+ UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS);
+ if (ret)
+ return ret;
+
+ if (uverbs_esp.flags & ~((ESP_LAST_SUPPORTED_FLAG << 1) - 1))
+ return -EOPNOTSUPP;
+
+ esp_attr->hdr.spi = uverbs_esp.spi;
+ esp_attr->hdr.seq = uverbs_esp.seq;
+ esp_attr->hdr.tfc_pad = uverbs_esp.tfc_pad;
+ esp_attr->hdr.hard_limit_pkts = uverbs_esp.hard_limit_pkts;
+ }
+ esp_attr->hdr.flags = esp_flags_uverbs_to_verbs(attrs, uverbs_esp.flags,
+ is_modify);
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT)) {
+ esp_attr->keymat.protocol =
+ uverbs_attr_get_enum_id(attrs,
+ UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT);
+ ret = uverbs_copy_from_or_zero(&esp_attr->keymat.keymat,
+ attrs,
+ UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT);
+ if (ret)
+ return ret;
+
+ ret = flow_action_esp_keymat_validate[esp_attr->keymat.protocol](&esp_attr->keymat);
+ if (ret)
+ return ret;
+
+ esp_attr->hdr.keymat = &esp_attr->keymat;
+ }
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY)) {
+ esp_attr->replay.protocol =
+ uverbs_attr_get_enum_id(attrs,
+ UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY);
+
+ ret = uverbs_copy_from_or_zero(&esp_attr->replay.replay,
+ attrs,
+ UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY);
+ if (ret)
+ return ret;
+
+ ret = flow_action_esp_replay_validate[esp_attr->replay.protocol](&esp_attr->replay,
+ is_modify);
+ if (ret)
+ return ret;
+
+ esp_attr->hdr.replay = &esp_attr->replay;
+ }
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP)) {
+ ret = flow_action_esp_get_encap(&esp_attr->encap, attrs);
+ if (ret)
+ return ret;
+
+ esp_attr->hdr.encap = &esp_attr->encap;
+ }
+
+ return 0;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ int ret;
+ struct ib_uobject *uobj;
+ struct ib_flow_action *action;
+ struct ib_flow_action_esp_attr esp_attr = {};
+
+ if (!ib_dev->create_flow_action_esp)
+ return -EOPNOTSUPP;
+
+ ret = parse_flow_action_esp(ib_dev, file, attrs, &esp_attr, false);
+ if (ret)
+ return ret;
+
+ /* No need to check as this attribute is marked as MANDATORY */
+ uobj = uverbs_attr_get(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE)->obj_attr.uobject;
+ action = ib_dev->create_flow_action_esp(ib_dev, &esp_attr.hdr, attrs);
+ if (IS_ERR(action))
+ return PTR_ERR(action);
+
+ atomic_set(&action->usecnt, 0);
+ action->device = ib_dev;
+ action->type = IB_FLOW_ACTION_ESP;
+ action->uobject = uobj;
+ uobj->object = action;
+
+ return 0;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ int ret;
+ struct ib_uobject *uobj;
+ struct ib_flow_action *action;
+ struct ib_flow_action_esp_attr esp_attr = {};
+
+ if (!ib_dev->modify_flow_action_esp)
+ return -EOPNOTSUPP;
+
+ ret = parse_flow_action_esp(ib_dev, file, attrs, &esp_attr, true);
+ if (ret)
+ return ret;
+
+ uobj = uverbs_attr_get(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE)->obj_attr.uobject;
+ action = uobj->object;
+
+ if (action->type != IB_FLOW_ACTION_ESP)
+ return -EINVAL;
+
+ return ib_dev->modify_flow_action_esp(action,
+ &esp_attr.hdr,
+ attrs);
+}
+
+static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = {
+ [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = {
+ { .ptr = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_keymat_aes_gcm),
+ .flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO,
+ } },
+ },
+};
+
+static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = {
+ [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = {
+ { .ptr = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ /* No need to specify any data */
+ .len = 0,
+ } }
+ },
+ [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = {
+ { .ptr = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp, size),
+ .flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO,
+ } }
+ },
+};
+
+static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE, UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_NEW,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, hard_limit_pkts),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
+ UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, UVERBS_ATTR_TYPE(__u32)),
+ &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
+ uverbs_flow_action_esp_keymat,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
+ uverbs_flow_action_esp_replay),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, type)));
+
+static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY,
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE, UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_WRITE,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, hard_limit_pkts),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, UVERBS_ATTR_TYPE(__u32)),
+ &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
+ uverbs_flow_action_esp_keymat),
+ &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
+ uverbs_flow_action_esp_replay),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, type)));
+
+static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_FLOW_ACTION_DESTROY,
+ uverbs_destroy_def_handler,
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_DESTROY,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_FLOW_ACTION,
+ &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_flow_action),
+ &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE),
+ &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY),
+ &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY));
+
diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c
new file mode 100644
index 000000000000..68f7cadf088f
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_mr.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "uverbs.h"
+#include <rdma/uverbs_std_types.h>
+
+static int uverbs_free_mr(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ return ib_dereg_mr((struct ib_mr *)uobject->object);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_dm_mr_attr attr = {};
+ struct ib_uobject *uobj;
+ struct ib_dm *dm;
+ struct ib_pd *pd;
+ struct ib_mr *mr;
+ int ret;
+
+ if (!ib_dev->reg_dm_mr)
+ return -EOPNOTSUPP;
+
+ ret = uverbs_copy_from(&attr.offset, attrs, UVERBS_ATTR_REG_DM_MR_OFFSET);
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_from(&attr.length, attrs,
+ UVERBS_ATTR_REG_DM_MR_LENGTH);
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_from(&attr.access_flags, attrs,
+ UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS);
+ if (ret)
+ return ret;
+
+ if (!(attr.access_flags & IB_ZERO_BASED))
+ return -EINVAL;
+
+ ret = ib_check_mr_access(attr.access_flags);
+ if (ret)
+ return ret;
+
+ pd = uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_PD_HANDLE);
+
+ dm = uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_DM_HANDLE);
+
+ uobj = uverbs_attr_get(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE)->obj_attr.uobject;
+
+ if (attr.offset > dm->length || attr.length > dm->length ||
+ attr.length > dm->length - attr.offset)
+ return -EINVAL;
+
+ mr = pd->device->reg_dm_mr(pd, dm, &attr, attrs);
+ if (IS_ERR(mr))
+ return PTR_ERR(mr);
+
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->dm = dm;
+ mr->uobject = uobj;
+ atomic_inc(&pd->usecnt);
+ atomic_inc(&dm->usecnt);
+
+ uobj->object = mr;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DM_MR_RESP_LKEY, &mr->lkey,
+ sizeof(mr->lkey));
+ if (ret)
+ goto err_dereg;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DM_MR_RESP_RKEY,
+ &mr->rkey, sizeof(mr->rkey));
+ if (ret)
+ goto err_dereg;
+
+ return 0;
+
+err_dereg:
+ ib_dereg_mr(mr);
+
+ return ret;
+}
+
+static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_MR_REG,
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE, UVERBS_OBJECT_MR,
+ UVERBS_ACCESS_NEW,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_OFFSET,
+ UVERBS_ATTR_TYPE(u64),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_LENGTH,
+ UVERBS_ATTR_TYPE(u64),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_PD_HANDLE, UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_READ,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS,
+ UVERBS_ATTR_TYPE(u32),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_DM_HANDLE, UVERBS_OBJECT_DM,
+ UVERBS_ACCESS_READ,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_LKEY,
+ UVERBS_ATTR_TYPE(u32),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_RKEY,
+ UVERBS_ATTR_TYPE(u32),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MR,
+ /* 1 is used in order to free the MR after all the MWs */
+ &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_mr),
+ &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG));
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 954a5cc90bb4..0501659bb186 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -53,6 +53,9 @@
#include "core_priv.h"
+static int ib_resolve_eth_dmac(struct ib_device *device,
+ struct rdma_ah_attr *ah_attr);
+
static const char * const ib_events[] = {
[IB_EVENT_CQ_ERR] = "CQ error",
[IB_EVENT_QP_FATAL] = "QP fatal error",
@@ -121,16 +124,24 @@ EXPORT_SYMBOL(ib_wc_status_msg);
__attribute_const__ int ib_rate_to_mult(enum ib_rate rate)
{
switch (rate) {
- case IB_RATE_2_5_GBPS: return 1;
- case IB_RATE_5_GBPS: return 2;
- case IB_RATE_10_GBPS: return 4;
- case IB_RATE_20_GBPS: return 8;
- case IB_RATE_30_GBPS: return 12;
- case IB_RATE_40_GBPS: return 16;
- case IB_RATE_60_GBPS: return 24;
- case IB_RATE_80_GBPS: return 32;
- case IB_RATE_120_GBPS: return 48;
- default: return -1;
+ case IB_RATE_2_5_GBPS: return 1;
+ case IB_RATE_5_GBPS: return 2;
+ case IB_RATE_10_GBPS: return 4;
+ case IB_RATE_20_GBPS: return 8;
+ case IB_RATE_30_GBPS: return 12;
+ case IB_RATE_40_GBPS: return 16;
+ case IB_RATE_60_GBPS: return 24;
+ case IB_RATE_80_GBPS: return 32;
+ case IB_RATE_120_GBPS: return 48;
+ case IB_RATE_14_GBPS: return 6;
+ case IB_RATE_56_GBPS: return 22;
+ case IB_RATE_112_GBPS: return 45;
+ case IB_RATE_168_GBPS: return 67;
+ case IB_RATE_25_GBPS: return 10;
+ case IB_RATE_100_GBPS: return 40;
+ case IB_RATE_200_GBPS: return 80;
+ case IB_RATE_300_GBPS: return 120;
+ default: return -1;
}
}
EXPORT_SYMBOL(ib_rate_to_mult);
@@ -138,16 +149,24 @@ EXPORT_SYMBOL(ib_rate_to_mult);
__attribute_const__ enum ib_rate mult_to_ib_rate(int mult)
{
switch (mult) {
- case 1: return IB_RATE_2_5_GBPS;
- case 2: return IB_RATE_5_GBPS;
- case 4: return IB_RATE_10_GBPS;
- case 8: return IB_RATE_20_GBPS;
- case 12: return IB_RATE_30_GBPS;
- case 16: return IB_RATE_40_GBPS;
- case 24: return IB_RATE_60_GBPS;
- case 32: return IB_RATE_80_GBPS;
- case 48: return IB_RATE_120_GBPS;
- default: return IB_RATE_PORT_CURRENT;
+ case 1: return IB_RATE_2_5_GBPS;
+ case 2: return IB_RATE_5_GBPS;
+ case 4: return IB_RATE_10_GBPS;
+ case 8: return IB_RATE_20_GBPS;
+ case 12: return IB_RATE_30_GBPS;
+ case 16: return IB_RATE_40_GBPS;
+ case 24: return IB_RATE_60_GBPS;
+ case 32: return IB_RATE_80_GBPS;
+ case 48: return IB_RATE_120_GBPS;
+ case 6: return IB_RATE_14_GBPS;
+ case 22: return IB_RATE_56_GBPS;
+ case 45: return IB_RATE_112_GBPS;
+ case 67: return IB_RATE_168_GBPS;
+ case 10: return IB_RATE_25_GBPS;
+ case 40: return IB_RATE_100_GBPS;
+ case 80: return IB_RATE_200_GBPS;
+ case 120: return IB_RATE_300_GBPS;
+ default: return IB_RATE_PORT_CURRENT;
}
}
EXPORT_SYMBOL(mult_to_ib_rate);
@@ -244,6 +263,10 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
mr_access_flags |= IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE;
}
+ pd->res.type = RDMA_RESTRACK_PD;
+ pd->res.kern_name = caller;
+ rdma_restrack_add(&pd->res);
+
if (mr_access_flags) {
struct ib_mr *mr;
@@ -293,6 +316,7 @@ void ib_dealloc_pd(struct ib_pd *pd)
requires the caller to guarantee we can't race here. */
WARN_ON(atomic_read(&pd->usecnt));
+ rdma_restrack_del(&pd->res);
/* Making delalloc_pd a void return is a WIP, no driver should return
an error here. */
ret = pd->device->dealloc_pd(pd);
@@ -302,11 +326,13 @@ EXPORT_SYMBOL(ib_dealloc_pd);
/* Address handles */
-struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr)
+static struct ib_ah *_rdma_create_ah(struct ib_pd *pd,
+ struct rdma_ah_attr *ah_attr,
+ struct ib_udata *udata)
{
struct ib_ah *ah;
- ah = pd->device->create_ah(pd, ah_attr, NULL);
+ ah = pd->device->create_ah(pd, ah_attr, udata);
if (!IS_ERR(ah)) {
ah->device = pd->device;
@@ -318,8 +344,42 @@ struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr)
return ah;
}
+
+struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr)
+{
+ return _rdma_create_ah(pd, ah_attr, NULL);
+}
EXPORT_SYMBOL(rdma_create_ah);
+/**
+ * rdma_create_user_ah - Creates an address handle for the
+ * given address vector.
+ * It resolves destination mac address for ah attribute of RoCE type.
+ * @pd: The protection domain associated with the address handle.
+ * @ah_attr: The attributes of the address vector.
+ * @udata: pointer to user's input output buffer information need by
+ * provider driver.
+ *
+ * It returns 0 on success and returns appropriate error code on error.
+ * The address handle is used to reference a local or global destination
+ * in all UD QP post sends.
+ */
+struct ib_ah *rdma_create_user_ah(struct ib_pd *pd,
+ struct rdma_ah_attr *ah_attr,
+ struct ib_udata *udata)
+{
+ int err;
+
+ if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
+ err = ib_resolve_eth_dmac(pd->device, ah_attr);
+ if (err)
+ return ERR_PTR(err);
+ }
+
+ return _rdma_create_ah(pd, ah_attr, udata);
+}
+EXPORT_SYMBOL(rdma_create_user_ah);
+
int ib_get_rdma_header_version(const union rdma_network_hdr *hdr)
{
const struct iphdr *ip4h = (struct iphdr *)&hdr->roce4grh;
@@ -382,8 +442,7 @@ static bool find_gid_index(const union ib_gid *gid,
const struct ib_gid_attr *gid_attr,
void *context)
{
- struct find_gid_index_context *ctx =
- (struct find_gid_index_context *)context;
+ struct find_gid_index_context *ctx = context;
if (ctx->gid_type != gid_attr->gid_type)
return false;
@@ -442,8 +501,53 @@ int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
}
EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr);
+/* Resolve destination mac address and hop limit for unicast destination
+ * GID entry, considering the source GID entry as well.
+ * ah_attribute must have have valid port_num, sgid_index.
+ */
+static int ib_resolve_unicast_gid_dmac(struct ib_device *device,
+ struct rdma_ah_attr *ah_attr)
+{
+ struct ib_gid_attr sgid_attr;
+ struct ib_global_route *grh;
+ int hop_limit = 0xff;
+ union ib_gid sgid;
+ int ret;
+
+ grh = rdma_ah_retrieve_grh(ah_attr);
+
+ ret = ib_query_gid(device,
+ rdma_ah_get_port_num(ah_attr),
+ grh->sgid_index,
+ &sgid, &sgid_attr);
+ if (ret || !sgid_attr.ndev) {
+ if (!ret)
+ ret = -ENXIO;
+ return ret;
+ }
+
+ /* If destination is link local and source GID is RoCEv1,
+ * IP stack is not used.
+ */
+ if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw) &&
+ sgid_attr.gid_type == IB_GID_TYPE_ROCE) {
+ rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw,
+ ah_attr->roce.dmac);
+ goto done;
+ }
+
+ ret = rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid,
+ ah_attr->roce.dmac,
+ sgid_attr.ndev, &hop_limit);
+done:
+ dev_put(sgid_attr.ndev);
+
+ grh->hop_limit = hop_limit;
+ return ret;
+}
+
/*
- * This function creates ah from the incoming packet.
+ * This function initializes address handle attributes from the incoming packet.
* Incoming packet has dgid of the receiver node on which this code is
* getting executed and, sgid contains the GID of the sender.
*
@@ -451,13 +555,10 @@ EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr);
* as sgid and, sgid is used as dgid because sgid contains destinations
* GID whom to respond to.
*
- * This is why when calling rdma_addr_find_l2_eth_by_grh() function, the
- * position of arguments dgid and sgid do not match the order of the
- * parameters.
*/
-int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
- const struct ib_wc *wc, const struct ib_grh *grh,
- struct rdma_ah_attr *ah_attr)
+int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num,
+ const struct ib_wc *wc, const struct ib_grh *grh,
+ struct rdma_ah_attr *ah_attr)
{
u32 flow_class;
u16 gid_index;
@@ -484,57 +585,33 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
if (ret)
return ret;
+ rdma_ah_set_sl(ah_attr, wc->sl);
+ rdma_ah_set_port_num(ah_attr, port_num);
+
if (rdma_protocol_roce(device, port_num)) {
- int if_index = 0;
u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ?
wc->vlan_id : 0xffff;
- struct net_device *idev;
- struct net_device *resolved_dev;
if (!(wc->wc_flags & IB_WC_GRH))
return -EPROTOTYPE;
- if (!device->get_netdev)
- return -EOPNOTSUPP;
-
- idev = device->get_netdev(device, port_num);
- if (!idev)
- return -ENODEV;
-
- ret = rdma_addr_find_l2_eth_by_grh(&dgid, &sgid,
- ah_attr->roce.dmac,
- wc->wc_flags & IB_WC_WITH_VLAN ?
- NULL : &vlan_id,
- &if_index, &hoplimit);
- if (ret) {
- dev_put(idev);
- return ret;
- }
-
- resolved_dev = dev_get_by_index(&init_net, if_index);
- rcu_read_lock();
- if (resolved_dev != idev && !rdma_is_upper_dev_rcu(idev,
- resolved_dev))
- ret = -EHOSTUNREACH;
- rcu_read_unlock();
- dev_put(idev);
- dev_put(resolved_dev);
- if (ret)
- return ret;
-
- ret = get_sgid_index_from_eth(device, port_num, vlan_id,
- &dgid, gid_type, &gid_index);
+ ret = get_sgid_index_from_eth(device, port_num,
+ vlan_id, &dgid,
+ gid_type, &gid_index);
if (ret)
return ret;
- }
- rdma_ah_set_dlid(ah_attr, wc->slid);
- rdma_ah_set_sl(ah_attr, wc->sl);
- rdma_ah_set_path_bits(ah_attr, wc->dlid_path_bits);
- rdma_ah_set_port_num(ah_attr, port_num);
+ flow_class = be32_to_cpu(grh->version_tclass_flow);
+ rdma_ah_set_grh(ah_attr, &sgid,
+ flow_class & 0xFFFFF,
+ (u8)gid_index, hoplimit,
+ (flow_class >> 20) & 0xFF);
+ return ib_resolve_unicast_gid_dmac(device, ah_attr);
+ } else {
+ rdma_ah_set_dlid(ah_attr, wc->slid);
+ rdma_ah_set_path_bits(ah_attr, wc->dlid_path_bits);
- if (wc->wc_flags & IB_WC_GRH) {
- if (!rdma_cap_eth_ah(device, port_num)) {
+ if (wc->wc_flags & IB_WC_GRH) {
if (dgid.global.interface_id != cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) {
ret = ib_find_cached_gid_by_port(device, &dgid,
IB_GID_TYPE_IB,
@@ -545,18 +622,17 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
} else {
gid_index = 0;
}
- }
-
- flow_class = be32_to_cpu(grh->version_tclass_flow);
- rdma_ah_set_grh(ah_attr, &sgid,
- flow_class & 0xFFFFF,
- (u8)gid_index, hoplimit,
- (flow_class >> 20) & 0xFF);
+ flow_class = be32_to_cpu(grh->version_tclass_flow);
+ rdma_ah_set_grh(ah_attr, &sgid,
+ flow_class & 0xFFFFF,
+ (u8)gid_index, hoplimit,
+ (flow_class >> 20) & 0xFF);
+ }
+ return 0;
}
- return 0;
}
-EXPORT_SYMBOL(ib_init_ah_from_wc);
+EXPORT_SYMBOL(ib_init_ah_attr_from_wc);
struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
const struct ib_grh *grh, u8 port_num)
@@ -564,7 +640,7 @@ struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
struct rdma_ah_attr ah_attr;
int ret;
- ret = ib_init_ah_from_wc(pd->device, port_num, wc, grh, &ah_attr);
+ ret = ib_init_ah_attr_from_wc(pd->device, port_num, wc, grh, &ah_attr);
if (ret)
return ERR_PTR(ret);
@@ -579,7 +655,7 @@ int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
return ah->device->modify_ah ?
ah->device->modify_ah(ah, ah_attr) :
- -ENOSYS;
+ -EOPNOTSUPP;
}
EXPORT_SYMBOL(rdma_modify_ah);
@@ -587,7 +663,7 @@ int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
{
return ah->device->query_ah ?
ah->device->query_ah(ah, ah_attr) :
- -ENOSYS;
+ -EOPNOTSUPP;
}
EXPORT_SYMBOL(rdma_query_ah);
@@ -613,7 +689,7 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd,
struct ib_srq *srq;
if (!pd->device->create_srq)
- return ERR_PTR(-ENOSYS);
+ return ERR_PTR(-EOPNOTSUPP);
srq = pd->device->create_srq(pd, srq_init_attr, NULL);
@@ -646,7 +722,7 @@ int ib_modify_srq(struct ib_srq *srq,
{
return srq->device->modify_srq ?
srq->device->modify_srq(srq, srq_attr, srq_attr_mask, NULL) :
- -ENOSYS;
+ -EOPNOTSUPP;
}
EXPORT_SYMBOL(ib_modify_srq);
@@ -654,7 +730,7 @@ int ib_query_srq(struct ib_srq *srq,
struct ib_srq_attr *srq_attr)
{
return srq->device->query_srq ?
- srq->device->query_srq(srq, srq_attr) : -ENOSYS;
+ srq->device->query_srq(srq, srq_attr) : -EOPNOTSUPP;
}
EXPORT_SYMBOL(ib_query_srq);
@@ -811,7 +887,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
if (qp_init_attr->cap.max_rdma_ctxs)
rdma_rw_init_qp(device, qp_init_attr);
- qp = device->create_qp(pd, qp_init_attr, NULL);
+ qp = _ib_create_qp(device, pd, qp_init_attr, NULL, NULL);
if (IS_ERR(qp))
return qp;
@@ -821,9 +897,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
return ERR_PTR(ret);
}
- qp->device = device;
qp->real_qp = qp;
- qp->uobject = NULL;
qp->qp_type = qp_init_attr->qp_type;
qp->rwq_ind_tbl = qp_init_attr->rwq_ind_tbl;
@@ -851,7 +925,6 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
atomic_inc(&qp_init_attr->srq->usecnt);
}
- qp->pd = pd;
qp->send_cq = qp_init_attr->send_cq;
qp->xrcd = NULL;
@@ -1191,39 +1264,35 @@ static const struct {
}
};
-int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
- enum ib_qp_type type, enum ib_qp_attr_mask mask,
- enum rdma_link_layer ll)
+bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
+ enum ib_qp_type type, enum ib_qp_attr_mask mask,
+ enum rdma_link_layer ll)
{
enum ib_qp_attr_mask req_param, opt_param;
- if (cur_state < 0 || cur_state > IB_QPS_ERR ||
- next_state < 0 || next_state > IB_QPS_ERR)
- return 0;
-
if (mask & IB_QP_CUR_STATE &&
cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS &&
cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE)
- return 0;
+ return false;
if (!qp_state_table[cur_state][next_state].valid)
- return 0;
+ return false;
req_param = qp_state_table[cur_state][next_state].req_param[type];
opt_param = qp_state_table[cur_state][next_state].opt_param[type];
if ((mask & req_param) != req_param)
- return 0;
+ return false;
if (mask & ~(req_param | opt_param | IB_QP_STATE))
- return 0;
+ return false;
- return 1;
+ return true;
}
EXPORT_SYMBOL(ib_modify_qp_is_ok);
-int ib_resolve_eth_dmac(struct ib_device *device,
- struct rdma_ah_attr *ah_attr)
+static int ib_resolve_eth_dmac(struct ib_device *device,
+ struct rdma_ah_attr *ah_attr)
{
int ret = 0;
struct ib_global_route *grh;
@@ -1231,16 +1300,8 @@ int ib_resolve_eth_dmac(struct ib_device *device,
if (!rdma_is_port_valid(device, rdma_ah_get_port_num(ah_attr)))
return -EINVAL;
- if (ah_attr->type != RDMA_AH_ATTR_TYPE_ROCE)
- return 0;
-
grh = rdma_ah_retrieve_grh(ah_attr);
- if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw)) {
- rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw,
- ah_attr->roce.dmac);
- return 0;
- }
if (rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) {
if (ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw)) {
__be32 addr = 0;
@@ -1252,37 +1313,48 @@ int ib_resolve_eth_dmac(struct ib_device *device,
(char *)ah_attr->roce.dmac);
}
} else {
- union ib_gid sgid;
- struct ib_gid_attr sgid_attr;
- int ifindex;
- int hop_limit;
-
- ret = ib_query_gid(device,
- rdma_ah_get_port_num(ah_attr),
- grh->sgid_index,
- &sgid, &sgid_attr);
-
- if (ret || !sgid_attr.ndev) {
- if (!ret)
- ret = -ENXIO;
- goto out;
- }
-
- ifindex = sgid_attr.ndev->ifindex;
+ ret = ib_resolve_unicast_gid_dmac(device, ah_attr);
+ }
+ return ret;
+}
- ret =
- rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid,
- ah_attr->roce.dmac,
- NULL, &ifindex, &hop_limit);
+/**
+ * IB core internal function to perform QP attributes modification.
+ */
+static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ u8 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
+ int ret;
- dev_put(sgid_attr.ndev);
+ if (rdma_ib_or_roce(qp->device, port)) {
+ if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) {
+ pr_warn("%s: %s rq_psn overflow, masking to 24 bits\n",
+ __func__, qp->device->name);
+ attr->rq_psn &= 0xffffff;
+ }
- grh->hop_limit = hop_limit;
+ if (attr_mask & IB_QP_SQ_PSN && attr->sq_psn & ~0xffffff) {
+ pr_warn("%s: %s sq_psn overflow, masking to 24 bits\n",
+ __func__, qp->device->name);
+ attr->sq_psn &= 0xffffff;
+ }
}
-out:
+
+ ret = ib_security_modify_qp(qp, attr, attr_mask, udata);
+ if (!ret && (attr_mask & IB_QP_PORT))
+ qp->port = attr->port_num;
+
return ret;
}
-EXPORT_SYMBOL(ib_resolve_eth_dmac);
+
+static bool is_qp_type_connected(const struct ib_qp *qp)
+{
+ return (qp->qp_type == IB_QPT_UC ||
+ qp->qp_type == IB_QPT_RC ||
+ qp->qp_type == IB_QPT_XRC_INI ||
+ qp->qp_type == IB_QPT_XRC_TGT);
+}
/**
* ib_modify_qp_with_udata - Modifies the attributes for the specified QP.
@@ -1301,16 +1373,14 @@ int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr,
struct ib_qp *qp = ib_qp->real_qp;
int ret;
- if (attr_mask & IB_QP_AV) {
+ if (attr_mask & IB_QP_AV &&
+ attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE &&
+ is_qp_type_connected(qp)) {
ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
if (ret)
return ret;
}
- ret = ib_security_modify_qp(qp, attr, attr_mask, udata);
- if (!ret && (attr_mask & IB_QP_PORT))
- qp->port = attr->port_num;
-
- return ret;
+ return _ib_modify_qp(qp, attr, attr_mask, udata);
}
EXPORT_SYMBOL(ib_modify_qp_with_udata);
@@ -1373,7 +1443,7 @@ int ib_modify_qp(struct ib_qp *qp,
struct ib_qp_attr *qp_attr,
int qp_attr_mask)
{
- return ib_modify_qp_with_udata(qp, qp_attr, qp_attr_mask, NULL);
+ return _ib_modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL);
}
EXPORT_SYMBOL(ib_modify_qp);
@@ -1384,7 +1454,7 @@ int ib_query_qp(struct ib_qp *qp,
{
return qp->device->query_qp ?
qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) :
- -ENOSYS;
+ -EOPNOTSUPP;
}
EXPORT_SYMBOL(ib_query_qp);
@@ -1467,6 +1537,7 @@ int ib_destroy_qp(struct ib_qp *qp)
if (!qp->uobject)
rdma_rw_cleanup_mrs(qp);
+ rdma_restrack_del(&qp->res);
ret = qp->device->destroy_qp(qp);
if (!ret) {
if (pd)
@@ -1509,24 +1580,27 @@ struct ib_cq *ib_create_cq(struct ib_device *device,
cq->event_handler = event_handler;
cq->cq_context = cq_context;
atomic_set(&cq->usecnt, 0);
+ cq->res.type = RDMA_RESTRACK_CQ;
+ rdma_restrack_add(&cq->res);
}
return cq;
}
EXPORT_SYMBOL(ib_create_cq);
-int ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
+int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period)
{
return cq->device->modify_cq ?
- cq->device->modify_cq(cq, cq_count, cq_period) : -ENOSYS;
+ cq->device->modify_cq(cq, cq_count, cq_period) : -EOPNOTSUPP;
}
-EXPORT_SYMBOL(ib_modify_cq);
+EXPORT_SYMBOL(rdma_set_cq_moderation);
int ib_destroy_cq(struct ib_cq *cq)
{
if (atomic_read(&cq->usecnt))
return -EBUSY;
+ rdma_restrack_del(&cq->res);
return cq->device->destroy_cq(cq);
}
EXPORT_SYMBOL(ib_destroy_cq);
@@ -1534,7 +1608,7 @@ EXPORT_SYMBOL(ib_destroy_cq);
int ib_resize_cq(struct ib_cq *cq, int cqe)
{
return cq->device->resize_cq ?
- cq->device->resize_cq(cq, cqe, NULL) : -ENOSYS;
+ cq->device->resize_cq(cq, cqe, NULL) : -EOPNOTSUPP;
}
EXPORT_SYMBOL(ib_resize_cq);
@@ -1543,11 +1617,16 @@ EXPORT_SYMBOL(ib_resize_cq);
int ib_dereg_mr(struct ib_mr *mr)
{
struct ib_pd *pd = mr->pd;
+ struct ib_dm *dm = mr->dm;
int ret;
+ rdma_restrack_del(&mr->res);
ret = mr->device->dereg_mr(mr);
- if (!ret)
+ if (!ret) {
atomic_dec(&pd->usecnt);
+ if (dm)
+ atomic_dec(&dm->usecnt);
+ }
return ret;
}
@@ -1572,15 +1651,18 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
struct ib_mr *mr;
if (!pd->device->alloc_mr)
- return ERR_PTR(-ENOSYS);
+ return ERR_PTR(-EOPNOTSUPP);
mr = pd->device->alloc_mr(pd, mr_type, max_num_sg);
if (!IS_ERR(mr)) {
mr->device = pd->device;
mr->pd = pd;
+ mr->dm = NULL;
mr->uobject = NULL;
atomic_inc(&pd->usecnt);
mr->need_inval = false;
+ mr->res.type = RDMA_RESTRACK_MR;
+ rdma_restrack_add(&mr->res);
}
return mr;
@@ -1596,7 +1678,7 @@ struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
struct ib_fmr *fmr;
if (!pd->device->alloc_fmr)
- return ERR_PTR(-ENOSYS);
+ return ERR_PTR(-EOPNOTSUPP);
fmr = pd->device->alloc_fmr(pd, mr_access_flags, fmr_attr);
if (!IS_ERR(fmr)) {
@@ -1680,7 +1762,7 @@ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
int ret;
if (!qp->device->attach_mcast)
- return -ENOSYS;
+ return -EOPNOTSUPP;
if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) ||
qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid))
@@ -1698,7 +1780,7 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
int ret;
if (!qp->device->detach_mcast)
- return -ENOSYS;
+ return -EOPNOTSUPP;
if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) ||
qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid))
@@ -1711,12 +1793,12 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
}
EXPORT_SYMBOL(ib_detach_mcast);
-struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device)
+struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller)
{
struct ib_xrcd *xrcd;
if (!device->alloc_xrcd)
- return ERR_PTR(-ENOSYS);
+ return ERR_PTR(-EOPNOTSUPP);
xrcd = device->alloc_xrcd(device, NULL, NULL);
if (!IS_ERR(xrcd)) {
@@ -1729,7 +1811,7 @@ struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device)
return xrcd;
}
-EXPORT_SYMBOL(ib_alloc_xrcd);
+EXPORT_SYMBOL(__ib_alloc_xrcd);
int ib_dealloc_xrcd(struct ib_xrcd *xrcd)
{
@@ -1754,11 +1836,11 @@ EXPORT_SYMBOL(ib_dealloc_xrcd);
* ib_create_wq - Creates a WQ associated with the specified protection
* domain.
* @pd: The protection domain associated with the WQ.
- * @wq_init_attr: A list of initial attributes required to create the
+ * @wq_attr: A list of initial attributes required to create the
* WQ. If WQ creation succeeds, then the attributes are updated to
* the actual capabilities of the created WQ.
*
- * wq_init_attr->max_wr and wq_init_attr->max_sge determine
+ * wq_attr->max_wr and wq_attr->max_sge determine
* the requested size of the WQ, and set to the actual values allocated
* on return.
* If ib_create_wq() succeeds, then max_wr and max_sge will always be
@@ -1770,7 +1852,7 @@ struct ib_wq *ib_create_wq(struct ib_pd *pd,
struct ib_wq *wq;
if (!pd->device->create_wq)
- return ERR_PTR(-ENOSYS);
+ return ERR_PTR(-EOPNOTSUPP);
wq = pd->device->create_wq(pd, wq_attr, NULL);
if (!IS_ERR(wq)) {
@@ -1825,7 +1907,7 @@ int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
int err;
if (!wq->device->modify_wq)
- return -ENOSYS;
+ return -EOPNOTSUPP;
err = wq->device->modify_wq(wq, wq_attr, wq_attr_mask, NULL);
return err;
@@ -1850,7 +1932,7 @@ struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device,
u32 table_size;
if (!device->create_rwq_ind_table)
- return ERR_PTR(-ENOSYS);
+ return ERR_PTR(-EOPNOTSUPP);
table_size = (1 << init_attr->log_ind_tbl_size);
rwq_ind_table = device->create_rwq_ind_table(device,
@@ -1900,7 +1982,7 @@ struct ib_flow *ib_create_flow(struct ib_qp *qp,
{
struct ib_flow *flow_id;
if (!qp->device->create_flow)
- return ERR_PTR(-ENOSYS);
+ return ERR_PTR(-EOPNOTSUPP);
flow_id = qp->device->create_flow(qp, flow_attr, domain);
if (!IS_ERR(flow_id)) {
@@ -1927,7 +2009,7 @@ int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status)
{
return mr->device->check_mr_status ?
- mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS;
+ mr->device->check_mr_status(mr, check_mask, mr_status) : -EOPNOTSUPP;
}
EXPORT_SYMBOL(ib_check_mr_status);
@@ -1935,7 +2017,7 @@ int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
int state)
{
if (!device->set_vf_link_state)
- return -ENOSYS;
+ return -EOPNOTSUPP;
return device->set_vf_link_state(device, vf, port, state);
}
@@ -1945,7 +2027,7 @@ int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
struct ifla_vf_info *info)
{
if (!device->get_vf_config)
- return -ENOSYS;
+ return -EOPNOTSUPP;
return device->get_vf_config(device, vf, port, info);
}
@@ -1955,7 +2037,7 @@ int ib_get_vf_stats(struct ib_device *device, int vf, u8 port,
struct ifla_vf_stats *stats)
{
if (!device->get_vf_stats)
- return -ENOSYS;
+ return -EOPNOTSUPP;
return device->get_vf_stats(device, vf, port, stats);
}
@@ -1965,7 +2047,7 @@ int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid,
int type)
{
if (!device->set_vf_guid)
- return -ENOSYS;
+ return -EOPNOTSUPP;
return device->set_vf_guid(device, vf, port, guid, type);
}
@@ -2000,7 +2082,7 @@ int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset, unsigned int page_size)
{
if (unlikely(!mr->device->map_mr_sg))
- return -ENOSYS;
+ return -EOPNOTSUPP;
mr->page_size = page_size;
@@ -2127,15 +2209,15 @@ static void __ib_drain_sq(struct ib_qp *qp)
};
int ret;
- sdrain.cqe.done = ib_drain_qp_done;
- init_completion(&sdrain.done);
-
ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
if (ret) {
WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
return;
}
+ sdrain.cqe.done = ib_drain_qp_done;
+ init_completion(&sdrain.done);
+
ret = ib_post_send(qp, &swr.wr, &bad_swr);
if (ret) {
WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
@@ -2160,16 +2242,16 @@ static void __ib_drain_rq(struct ib_qp *qp)
struct ib_recv_wr rwr = {}, *bad_rwr;
int ret;
- rwr.wr_cqe = &rdrain.cqe;
- rdrain.cqe.done = ib_drain_qp_done;
- init_completion(&rdrain.done);
-
ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
if (ret) {
WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
return;
}
+ rwr.wr_cqe = &rdrain.cqe;
+ rdrain.cqe.done = ib_drain_qp_done;
+ init_completion(&rdrain.done);
+
ret = ib_post_recv(qp, &rwr, &bad_rwr);
if (ret) {
WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 287c4423fa31..cb1e69bdad0b 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -314,12 +314,11 @@ int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num,
return rc;
}
-int bnxt_re_del_gid(struct ib_device *ibdev, u8 port_num,
- unsigned int index, void **context)
+int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context)
{
int rc = 0;
struct bnxt_re_gid_ctx *ctx, **ctx_tbl;
- struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(attr->device, ibdev);
struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl;
struct bnxt_qplib_gid *gid_to_del;
@@ -365,15 +364,14 @@ int bnxt_re_del_gid(struct ib_device *ibdev, u8 port_num,
return rc;
}
-int bnxt_re_add_gid(struct ib_device *ibdev, u8 port_num,
- unsigned int index, const union ib_gid *gid,
+int bnxt_re_add_gid(const union ib_gid *gid,
const struct ib_gid_attr *attr, void **context)
{
int rc;
u32 tbl_idx = 0;
u16 vlan_id = 0xFFFF;
struct bnxt_re_gid_ctx *ctx, **ctx_tbl;
- struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(attr->device, ibdev);
struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl;
if ((attr->ndev) && is_vlan_dev(attr->ndev))
@@ -673,7 +671,6 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd,
struct bnxt_re_ah *ah;
const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
int rc;
- u16 vlan_tag;
u8 nw_type;
struct ib_gid_attr sgid_attr;
@@ -719,11 +716,7 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd,
grh->sgid_index);
goto fail;
}
- if (sgid_attr.ndev) {
- if (is_vlan_dev(sgid_attr.ndev))
- vlan_tag = vlan_dev_vlan_id(sgid_attr.ndev);
- dev_put(sgid_attr.ndev);
- }
+ dev_put(sgid_attr.ndev);
/* Get network header type for this GID */
nw_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid);
switch (nw_type) {
@@ -737,14 +730,6 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd,
ah->qplib_ah.nw_type = CMDQ_CREATE_AH_TYPE_V1;
break;
}
- rc = rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid,
- ah_attr->roce.dmac, &vlan_tag,
- &sgid_attr.ndev->ifindex,
- NULL);
- if (rc) {
- dev_err(rdev_to_dev(rdev), "Failed to get dmac\n");
- goto fail;
- }
}
memcpy(ah->qplib_ah.dmac, ah_attr->roce.dmac, ETH_ALEN);
@@ -1366,7 +1351,7 @@ int bnxt_re_destroy_srq(struct ib_srq *ib_srq)
return rc;
}
- if (srq->umem && !IS_ERR(srq->umem))
+ if (srq->umem)
ib_umem_release(srq->umem);
kfree(srq);
atomic_dec(&rdev->srq_count);
@@ -1482,11 +1467,8 @@ struct ib_srq *bnxt_re_create_srq(struct ib_pd *ib_pd,
return &srq->ib_srq;
fail:
- if (udata && srq->umem && !IS_ERR(srq->umem)) {
+ if (srq->umem)
ib_umem_release(srq->umem);
- srq->umem = NULL;
- }
-
kfree(srq);
exit:
return ERR_PTR(rc);
@@ -1557,14 +1539,13 @@ int bnxt_re_post_srq_recv(struct ib_srq *ib_srq, struct ib_recv_wr *wr,
ib_srq);
struct bnxt_qplib_swqe wqe;
unsigned long flags;
- int rc = 0, payload_sz = 0;
+ int rc = 0;
spin_lock_irqsave(&srq->lock, flags);
while (wr) {
/* Transcribe each ib_recv_wr to qplib_swqe */
wqe.num_sge = wr->num_sge;
- payload_sz = bnxt_re_build_sgl(wr->sg_list, wqe.sg_list,
- wr->num_sge);
+ bnxt_re_build_sgl(wr->sg_list, wqe.sg_list, wr->num_sge);
wqe.wr_id = wr->wr_id;
wqe.type = BNXT_QPLIB_SWQE_TYPE_RECV;
@@ -1715,7 +1696,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
status = ib_get_cached_gid(&rdev->ibdev, 1,
grh->sgid_index,
&sgid, &sgid_attr);
- if (!status && sgid_attr.ndev) {
+ if (!status) {
memcpy(qp->qplib_qp.smac, sgid_attr.ndev->dev_addr,
ETH_ALEN);
dev_put(sgid_attr.ndev);
@@ -1937,7 +1918,7 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp,
u8 ip_version = 0;
u16 vlan_id = 0xFFFF;
void *buf;
- int i, rc = 0, size;
+ int i, rc = 0;
memset(&qp->qp1_hdr, 0, sizeof(qp->qp1_hdr));
@@ -2054,7 +2035,7 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp,
/* Pack the QP1 to the transmit buffer */
buf = bnxt_qplib_get_qp1_sq_buf(&qp->qplib_qp, &sge);
if (buf) {
- size = ib_ud_header_pack(&qp->qp1_hdr, buf);
+ ib_ud_header_pack(&qp->qp1_hdr, buf);
for (i = wqe->num_sge; i; i--) {
wqe->sg_list[i].addr = wqe->sg_list[i - 1].addr;
wqe->sg_list[i].lkey = wqe->sg_list[i - 1].lkey;
@@ -2517,7 +2498,7 @@ static int bnxt_re_post_recv_shadow_qp(struct bnxt_re_dev *rdev,
struct ib_recv_wr *wr)
{
struct bnxt_qplib_swqe wqe;
- int rc = 0, payload_sz = 0;
+ int rc = 0;
memset(&wqe, 0, sizeof(wqe));
while (wr) {
@@ -2532,8 +2513,7 @@ static int bnxt_re_post_recv_shadow_qp(struct bnxt_re_dev *rdev,
rc = -EINVAL;
break;
}
- payload_sz = bnxt_re_build_sgl(wr->sg_list, wqe.sg_list,
- wr->num_sge);
+ bnxt_re_build_sgl(wr->sg_list, wqe.sg_list, wr->num_sge);
wqe.wr_id = wr->wr_id;
wqe.type = BNXT_QPLIB_SWQE_TYPE_RECV;
@@ -2873,7 +2853,7 @@ static void bnxt_re_process_req_wc(struct ib_wc *wc, struct bnxt_qplib_cqe *cqe)
static int bnxt_re_check_packet_type(u16 raweth_qp1_flags,
u16 raweth_qp1_flags2)
{
- bool is_udp = false, is_ipv6 = false, is_ipv4 = false;
+ bool is_ipv6 = false, is_ipv4 = false;
/* raweth_qp1_flags Bit 9-6 indicates itype */
if ((raweth_qp1_flags & CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_ROCE)
@@ -2884,7 +2864,6 @@ static int bnxt_re_check_packet_type(u16 raweth_qp1_flags,
CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_IP_CS_CALC &&
raweth_qp1_flags2 &
CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_L4_CS_CALC) {
- is_udp = true;
/* raweth_qp1_flags2 Bit 8 indicates ip_type. 0-v4 1 - v6 */
(raweth_qp1_flags2 &
CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_IP_TYPE) ?
@@ -3085,6 +3064,32 @@ static void bnxt_re_process_res_rawqp1_wc(struct ib_wc *wc,
wc->wc_flags |= IB_WC_GRH;
}
+static bool bnxt_re_is_vlan_pkt(struct bnxt_qplib_cqe *orig_cqe,
+ u16 *vid, u8 *sl)
+{
+ bool ret = false;
+ u32 metadata;
+ u16 tpid;
+
+ metadata = orig_cqe->raweth_qp1_metadata;
+ if (orig_cqe->raweth_qp1_flags2 &
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_VLAN) {
+ tpid = ((metadata &
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_TPID_MASK) >>
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_TPID_SFT);
+ if (tpid == ETH_P_8021Q) {
+ *vid = metadata &
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_VID_MASK;
+ *sl = (metadata &
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_PRI_MASK) >>
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_PRI_SFT;
+ ret = true;
+ }
+ }
+
+ return ret;
+}
+
static void bnxt_re_process_res_rc_wc(struct ib_wc *wc,
struct bnxt_qplib_cqe *cqe)
{
@@ -3104,12 +3109,14 @@ static void bnxt_re_process_res_shadow_qp_wc(struct bnxt_re_qp *qp,
struct ib_wc *wc,
struct bnxt_qplib_cqe *cqe)
{
- u32 tbl_idx;
struct bnxt_re_dev *rdev = qp->rdev;
struct bnxt_re_qp *qp1_qp = NULL;
struct bnxt_qplib_cqe *orig_cqe = NULL;
struct bnxt_re_sqp_entries *sqp_entry = NULL;
int nw_type;
+ u32 tbl_idx;
+ u16 vlan_id;
+ u8 sl;
tbl_idx = cqe->wr_id;
@@ -3124,6 +3131,11 @@ static void bnxt_re_process_res_shadow_qp_wc(struct bnxt_re_qp *qp,
wc->ex.imm_data = orig_cqe->immdata;
wc->src_qp = orig_cqe->src_qp;
memcpy(wc->smac, orig_cqe->smac, ETH_ALEN);
+ if (bnxt_re_is_vlan_pkt(orig_cqe, &vlan_id, &sl)) {
+ wc->vlan_id = vlan_id;
+ wc->sl = sl;
+ wc->wc_flags |= IB_WC_WITH_VLAN;
+ }
wc->port_num = 1;
wc->vendor_err = orig_cqe->status;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index e62b7c2c7da6..5c6414cad4af 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -157,10 +157,8 @@ int bnxt_re_get_port_immutable(struct ib_device *ibdev, u8 port_num,
void bnxt_re_query_fw_str(struct ib_device *ibdev, char *str);
int bnxt_re_query_pkey(struct ib_device *ibdev, u8 port_num,
u16 index, u16 *pkey);
-int bnxt_re_del_gid(struct ib_device *ibdev, u8 port_num,
- unsigned int index, void **context);
-int bnxt_re_add_gid(struct ib_device *ibdev, u8 port_num,
- unsigned int index, const union ib_gid *gid,
+int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context);
+int bnxt_re_add_gid(const union ib_gid *gid,
const struct ib_gid_attr *attr, void **context);
int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num,
int index, union ib_gid *gid);
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 4fb80aadbbf6..85cd1a3593d6 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -520,7 +520,7 @@ static struct bnxt_en_dev *bnxt_re_dev_probe(struct net_device *netdev)
return ERR_PTR(-EINVAL);
if (!(en_dev->flags & BNXT_EN_FLAG_ROCE_CAP)) {
- dev_dbg(&pdev->dev,
+ dev_info(&pdev->dev,
"%s: probe error: RoCE is not supported on this device",
ROCE_DRV_MODULE_NAME);
return ERR_PTR(-ENODEV);
@@ -595,7 +595,6 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
ibdev->get_port_immutable = bnxt_re_get_port_immutable;
ibdev->get_dev_fw_str = bnxt_re_query_fw_str;
ibdev->query_pkey = bnxt_re_query_pkey;
- ibdev->query_gid = bnxt_re_query_gid;
ibdev->get_netdev = bnxt_re_get_netdev;
ibdev->add_gid = bnxt_re_add_gid;
ibdev->del_gid = bnxt_re_del_gid;
@@ -640,6 +639,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
ibdev->get_hw_stats = bnxt_re_ib_get_hw_stats;
ibdev->alloc_hw_stats = bnxt_re_ib_alloc_hw_stats;
+ ibdev->driver_id = RDMA_DRIVER_BNXT_RE;
return ib_register_device(ibdev, NULL);
}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 82c5bc919e3c..6ad0d46ab879 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -121,11 +121,6 @@ void bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp)
static void __bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp)
{
- struct bnxt_qplib_cq *scq, *rcq;
-
- scq = qp->scq;
- rcq = qp->rcq;
-
if (qp->sq.flushed) {
qp->sq.flushed = false;
list_del(&qp->sq_flush);
@@ -1611,7 +1606,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
break;
}
- /* else, just fall thru */
+ /* fall thru */
case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_IMM:
case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_INV:
{
@@ -2542,6 +2537,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
cqe->raweth_qp1_flags = le16_to_cpu(hwcqe->raweth_qp1_flags);
cqe->raweth_qp1_flags2 = le32_to_cpu(hwcqe->raweth_qp1_flags2);
+ cqe->raweth_qp1_metadata = le32_to_cpu(hwcqe->raweth_qp1_metadata);
if (cqe->flags & CQ_RES_RAWETH_QP1_FLAGS_SRQ_SRQ) {
srq = qp->srq;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index 3e76e6777441..2852d350ada1 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -88,7 +88,6 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
unsigned long flags;
u32 size, opcode;
u16 cookie, cbit;
- int pg, idx;
u8 *preq;
opcode = req->opcode;
@@ -150,9 +149,6 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
preq = (u8 *)req;
size = req->cmd_size * BNXT_QPLIB_CMDQE_UNITS;
do {
- pg = 0;
- idx = 0;
-
/* Locate the next cmdq slot */
sw_prod = HWQ_CMP(cmdq->prod, cmdq);
cmdqe = &cmdq_ptr[get_cmdq_pg(sw_prod)][get_cmdq_idx(sw_prod)];
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c
index 4e101704e801..539a5d44e6db 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c
@@ -104,13 +104,12 @@ static int __alloc_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl,
if (!sghead) {
for (i = 0; i < pages; i++) {
- pbl->pg_arr[i] = dma_alloc_coherent(&pdev->dev,
- pbl->pg_size,
- &pbl->pg_map_arr[i],
- GFP_KERNEL);
+ pbl->pg_arr[i] = dma_zalloc_coherent(&pdev->dev,
+ pbl->pg_size,
+ &pbl->pg_map_arr[i],
+ GFP_KERNEL);
if (!pbl->pg_arr[i])
goto fail;
- memset(pbl->pg_arr[i], 0, pbl->pg_size);
pbl->pg_count++;
}
} else {
@@ -706,12 +705,8 @@ static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res,
dpit->max = dbr_len / PAGE_SIZE;
dpit->app_tbl = kcalloc(dpit->max, sizeof(void *), GFP_KERNEL);
- if (!dpit->app_tbl) {
- pci_iounmap(res->pdev, dpit->dbr_bar_reg_iomem);
- dev_err(&res->pdev->dev,
- "QPLIB: DPI app tbl allocation failed");
- return -ENOMEM;
- }
+ if (!dpit->app_tbl)
+ goto unmap_io;
bytes = dpit->max >> 3;
if (!bytes)
@@ -719,18 +714,18 @@ static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res,
dpit->tbl = kmalloc(bytes, GFP_KERNEL);
if (!dpit->tbl) {
- pci_iounmap(res->pdev, dpit->dbr_bar_reg_iomem);
kfree(dpit->app_tbl);
dpit->app_tbl = NULL;
- dev_err(&res->pdev->dev,
- "QPLIB: DPI tbl allocation failed for size = %d",
- bytes);
- return -ENOMEM;
+ goto unmap_io;
}
memset((u8 *)dpit->tbl, 0xFF, bytes);
return 0;
+
+unmap_io:
+ pci_iounmap(res->pdev, dpit->dbr_bar_reg_iomem);
+ return -ENOMEM;
}
/* PKEYs */
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index ca9ecfd6b649..4097f3fa25c5 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -154,7 +154,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
attr->tqm_alloc_reqs[i * 4 + 3] = *(++tqm_alloc);
}
- attr->is_atomic = 0;
+ attr->is_atomic = false;
bail:
bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
return rc;
@@ -770,14 +770,13 @@ int bnxt_qplib_map_tc2cos(struct bnxt_qplib_res *res, u16 *cids)
struct cmdq_map_tc_to_cos req;
struct creq_map_tc_to_cos_resp resp;
u16 cmd_flags = 0;
- int rc = 0;
RCFW_CMD_PREP(req, MAP_TC_TO_COS, cmd_flags);
req.cos0 = cpu_to_le16(cids[0]);
req.cos1 = cpu_to_le16(cids[1]);
- rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
- (void *)&resp, NULL, 0);
+ bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp, NULL,
+ 0);
return 0;
}
diff --git a/drivers/infiniband/hw/cxgb3/Kconfig b/drivers/infiniband/hw/cxgb3/Kconfig
index 431be733fbbe..a7b77cb3d5d5 100644
--- a/drivers/infiniband/hw/cxgb3/Kconfig
+++ b/drivers/infiniband/hw/cxgb3/Kconfig
@@ -16,12 +16,3 @@ config INFINIBAND_CXGB3
To compile this driver as a module, choose M here: the module
will be called iw_cxgb3.
-
-config INFINIBAND_CXGB3_DEBUG
- bool "Verbose debugging output"
- depends on INFINIBAND_CXGB3
- default n
- ---help---
- This option causes the Chelsio RDMA driver to produce copious
- amounts of debug messages. Select this if you are developing
- the driver or trying to diagnose a problem.
diff --git a/drivers/infiniband/hw/cxgb3/Makefile b/drivers/infiniband/hw/cxgb3/Makefile
index 2761364185af..5abf39c4023f 100644
--- a/drivers/infiniband/hw/cxgb3/Makefile
+++ b/drivers/infiniband/hw/cxgb3/Makefile
@@ -4,5 +4,3 @@ obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o
iw_cxgb3-y := iwch_cm.o iwch_ev.o iwch_cq.o iwch_qp.o iwch_mem.o \
iwch_provider.o iwch.o cxio_hal.o cxio_resource.o
-
-ccflags-$(CONFIG_INFINIBAND_CXGB3_DEBUG) += -DDEBUG
diff --git a/drivers/infiniband/hw/cxgb3/cxio_dbg.c b/drivers/infiniband/hw/cxgb3/cxio_dbg.c
deleted file mode 100644
index 97dbe728520a..000000000000
--- a/drivers/infiniband/hw/cxgb3/cxio_dbg.c
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef DEBUG
-#include <linux/types.h>
-#include <linux/slab.h>
-#include "common.h"
-#include "cxgb3_ioctl.h"
-#include "cxio_hal.h"
-#include "cxio_wr.h"
-
-void cxio_dump_tpt(struct cxio_rdev *rdev, u32 stag)
-{
- struct ch_mem_range *m;
- u64 *data;
- int rc;
- int size = 32;
-
- m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
- if (!m)
- return;
-
- m->mem_id = MEM_PMRX;
- m->addr = (stag>>8) * 32 + rdev->rnic_info.tpt_base;
- m->len = size;
- pr_debug("%s TPT addr 0x%x len %d\n", __func__, m->addr, m->len);
- rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
- if (rc) {
- pr_debug("%s toectl returned error %d\n", __func__, rc);
- kfree(m);
- return;
- }
-
- data = (u64 *)m->buf;
- while (size > 0) {
- pr_debug("TPT %08x: %016llx\n",
- m->addr, (unsigned long long)*data);
- size -= 8;
- data++;
- m->addr += 8;
- }
- kfree(m);
-}
-
-void cxio_dump_pbl(struct cxio_rdev *rdev, u32 pbl_addr, uint len, u8 shift)
-{
- struct ch_mem_range *m;
- u64 *data;
- int rc;
- int size, npages;
-
- shift += 12;
- npages = (len + (1ULL << shift) - 1) >> shift;
- size = npages * sizeof(u64);
-
- m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
- if (!m)
- return;
-
- m->mem_id = MEM_PMRX;
- m->addr = pbl_addr;
- m->len = size;
- pr_debug("%s PBL addr 0x%x len %d depth %d\n",
- __func__, m->addr, m->len, npages);
- rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
- if (rc) {
- pr_debug("%s toectl returned error %d\n", __func__, rc);
- kfree(m);
- return;
- }
-
- data = (u64 *)m->buf;
- while (size > 0) {
- pr_debug("PBL %08x: %016llx\n",
- m->addr, (unsigned long long)*data);
- size -= 8;
- data++;
- m->addr += 8;
- }
- kfree(m);
-}
-
-void cxio_dump_wqe(union t3_wr *wqe)
-{
- __be64 *data = (__be64 *)wqe;
- uint size = (uint)(be64_to_cpu(*data) & 0xff);
-
- if (size == 0)
- size = 8;
- while (size > 0) {
- pr_debug("WQE %p: %016llx\n",
- data, (unsigned long long)be64_to_cpu(*data));
- size--;
- data++;
- }
-}
-
-void cxio_dump_wce(struct t3_cqe *wce)
-{
- __be64 *data = (__be64 *)wce;
- int size = sizeof(*wce);
-
- while (size > 0) {
- pr_debug("WCE %p: %016llx\n",
- data, (unsigned long long)be64_to_cpu(*data));
- size -= 8;
- data++;
- }
-}
-
-void cxio_dump_rqt(struct cxio_rdev *rdev, u32 hwtid, int nents)
-{
- struct ch_mem_range *m;
- int size = nents * 64;
- u64 *data;
- int rc;
-
- m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
- if (!m)
- return;
-
- m->mem_id = MEM_PMRX;
- m->addr = ((hwtid)<<10) + rdev->rnic_info.rqt_base;
- m->len = size;
- pr_debug("%s RQT addr 0x%x len %d\n", __func__, m->addr, m->len);
- rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
- if (rc) {
- pr_debug("%s toectl returned error %d\n", __func__, rc);
- kfree(m);
- return;
- }
-
- data = (u64 *)m->buf;
- while (size > 0) {
- pr_debug("RQT %08x: %016llx\n",
- m->addr, (unsigned long long)*data);
- size -= 8;
- data++;
- m->addr += 8;
- }
- kfree(m);
-}
-
-void cxio_dump_tcb(struct cxio_rdev *rdev, u32 hwtid)
-{
- struct ch_mem_range *m;
- int size = TCB_SIZE;
- u32 *data;
- int rc;
-
- m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
- if (!m)
- return;
-
- m->mem_id = MEM_CM;
- m->addr = hwtid * size;
- m->len = size;
- pr_debug("%s TCB %d len %d\n", __func__, m->addr, m->len);
- rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
- if (rc) {
- pr_debug("%s toectl returned error %d\n", __func__, rc);
- kfree(m);
- return;
- }
-
- data = (u32 *)m->buf;
- while (size > 0) {
- printk("%2u: %08x %08x %08x %08x %08x %08x %08x %08x\n",
- m->addr,
- *(data+2), *(data+3), *(data),*(data+1),
- *(data+6), *(data+7), *(data+4), *(data+5));
- size -= 32;
- data += 8;
- m->addr += 32;
- }
- kfree(m);
-}
-#endif
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h
index 7e70c5492262..c64e50b5a548 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h
@@ -202,13 +202,4 @@ int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb);
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#ifdef DEBUG
-void cxio_dump_tpt(struct cxio_rdev *rev, u32 stag);
-void cxio_dump_pbl(struct cxio_rdev *rev, u32 pbl_addr, uint len, u8 shift);
-void cxio_dump_wqe(union t3_wr *wqe);
-void cxio_dump_wce(struct t3_cqe *wce);
-void cxio_dump_rqt(struct cxio_rdev *rdev, u32 hwtid, int nents);
-void cxio_dump_tcb(struct cxio_rdev *rdev, u32 hwtid);
-#endif
-
#endif
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 86975370a4c0..edc1c549289e 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -1760,8 +1760,8 @@ static void ep_timeout(unsigned long arg)
int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
{
- int err;
struct iwch_ep *ep = to_ep(cm_id);
+
pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
if (state_read(&ep->com) == DEAD) {
@@ -1772,8 +1772,8 @@ int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
if (mpa_rev == 0)
abort_connection(ep, NULL, GFP_KERNEL);
else {
- err = send_mpa_reject(ep, pdata, pdata_len);
- err = iwch_ep_disconnect(ep, 0, GFP_KERNEL);
+ send_mpa_reject(ep, pdata, pdata_len);
+ iwch_ep_disconnect(ep, 0, GFP_KERNEL);
}
put_ep(&ep->com);
return 0;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c
index dd5348e48806..0a8542c20804 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cq.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c
@@ -200,9 +200,6 @@ int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
spin_lock_irqsave(&chp->lock, flags);
for (npolled = 0; npolled < num_entries; ++npolled) {
-#ifdef DEBUG
- int i=0;
-#endif
/*
* Because T3 can post CQEs that are _not_ associated
@@ -211,9 +208,6 @@ int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
*/
do {
err = iwch_poll_cq_one(rhp, chp, wc + npolled);
-#ifdef DEBUG
- BUG_ON(++i > 1000);
-#endif
} while (err == -EAGAIN);
if (err <= 0)
break;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 099e76f3758a..8e63ab5a2f3c 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -440,7 +440,9 @@ static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev,
php->pdid = pdid;
php->rhp = rhp;
if (context) {
- if (ib_copy_to_udata(udata, &php->pdid, sizeof (__u32))) {
+ struct iwch_alloc_pd_resp resp = {.pdid = php->pdid};
+
+ if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
iwch_deallocate_pd(&php->ibpd);
return ERR_PTR(-EFAULT);
}
@@ -1440,6 +1442,7 @@ int iwch_register_device(struct iwch_dev *dev)
memcpy(dev->ibdev.iwcm->ifname, dev->rdev.t3cdev_p->lldev->name,
sizeof(dev->ibdev.iwcm->ifname));
+ dev->ibdev.driver_id = RDMA_DRIVER_CXGB3;
ret = ib_register_device(&dev->ibdev, NULL);
if (ret)
goto bail1;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index 7f633da0185d..3871e1fd8395 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -722,10 +722,13 @@ int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg)
*/
static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp,
struct iwch_cq *schp)
+ __releases(&qhp->lock)
+ __acquires(&qhp->lock)
{
int count;
int flushed;
+ lockdep_assert_held(&qhp->lock);
pr_debug("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
/* take a ref on the qhp since we must release the lock */
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 099019ca0c99..7d49e03f9616 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -4096,9 +4096,15 @@ static void process_work(struct work_struct *work)
dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *)));
opcode = rpl->ot.opcode;
- ret = work_handlers[opcode](dev, skb);
- if (!ret)
+ if (opcode >= ARRAY_SIZE(work_handlers) ||
+ !work_handlers[opcode]) {
+ pr_err("No handler for opcode 0x%x.\n", opcode);
kfree_skb(skb);
+ } else {
+ ret = work_handlers[opcode](dev, skb);
+ if (!ret)
+ kfree_skb(skb);
+ }
process_timedout_eps();
}
}
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 30e7ca578946..44161ca4d2a8 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -108,19 +108,19 @@ void c4iw_log_wr_stats(struct t4_wq *wq, struct t4_cqe *cqe)
idx = (atomic_inc_return(&wq->rdev->wr_log_idx) - 1) &
(wq->rdev->wr_log_size - 1);
le.poll_sge_ts = cxgb4_read_sge_timestamp(wq->rdev->lldi.ports[0]);
- getnstimeofday(&le.poll_host_ts);
+ le.poll_host_time = ktime_get();
le.valid = 1;
le.cqe_sge_ts = CQE_TS(cqe);
if (SQ_TYPE(cqe)) {
le.qid = wq->sq.qid;
le.opcode = CQE_OPCODE(cqe);
- le.post_host_ts = wq->sq.sw_sq[wq->sq.cidx].host_ts;
+ le.post_host_time = wq->sq.sw_sq[wq->sq.cidx].host_time;
le.post_sge_ts = wq->sq.sw_sq[wq->sq.cidx].sge_ts;
le.wr_id = CQE_WRID_SQ_IDX(cqe);
} else {
le.qid = wq->rq.qid;
le.opcode = FW_RI_RECEIVE;
- le.post_host_ts = wq->rq.sw_rq[wq->rq.cidx].host_ts;
+ le.post_host_time = wq->rq.sw_rq[wq->rq.cidx].host_time;
le.post_sge_ts = wq->rq.sw_rq[wq->rq.cidx].sge_ts;
le.wr_id = CQE_WRID_MSN(cqe);
}
@@ -130,9 +130,9 @@ void c4iw_log_wr_stats(struct t4_wq *wq, struct t4_cqe *cqe)
static int wr_log_show(struct seq_file *seq, void *v)
{
struct c4iw_dev *dev = seq->private;
- struct timespec prev_ts = {0, 0};
+ ktime_t prev_time;
struct wr_log_entry *lep;
- int prev_ts_set = 0;
+ int prev_time_set = 0;
int idx, end;
#define ts2ns(ts) div64_u64((ts) * dev->rdev.lldi.cclk_ps, 1000)
@@ -145,33 +145,29 @@ static int wr_log_show(struct seq_file *seq, void *v)
lep = &dev->rdev.wr_log[idx];
while (idx != end) {
if (lep->valid) {
- if (!prev_ts_set) {
- prev_ts_set = 1;
- prev_ts = lep->poll_host_ts;
+ if (!prev_time_set) {
+ prev_time_set = 1;
+ prev_time = lep->poll_host_time;
}
- seq_printf(seq, "%04u: sec %lu nsec %lu qid %u opcode "
- "%u %s 0x%x host_wr_delta sec %lu nsec %lu "
+ seq_printf(seq, "%04u: nsec %llu qid %u opcode "
+ "%u %s 0x%x host_wr_delta nsec %llu "
"post_sge_ts 0x%llx cqe_sge_ts 0x%llx "
"poll_sge_ts 0x%llx post_poll_delta_ns %llu "
"cqe_poll_delta_ns %llu\n",
idx,
- timespec_sub(lep->poll_host_ts,
- prev_ts).tv_sec,
- timespec_sub(lep->poll_host_ts,
- prev_ts).tv_nsec,
+ ktime_to_ns(ktime_sub(lep->poll_host_time,
+ prev_time)),
lep->qid, lep->opcode,
lep->opcode == FW_RI_RECEIVE ?
"msn" : "wrid",
lep->wr_id,
- timespec_sub(lep->poll_host_ts,
- lep->post_host_ts).tv_sec,
- timespec_sub(lep->poll_host_ts,
- lep->post_host_ts).tv_nsec,
+ ktime_to_ns(ktime_sub(lep->poll_host_time,
+ lep->post_host_time)),
lep->post_sge_ts, lep->cqe_sge_ts,
lep->poll_sge_ts,
ts2ns(lep->poll_sge_ts - lep->post_sge_ts),
ts2ns(lep->poll_sge_ts - lep->cqe_sge_ts));
- prev_ts = lep->poll_host_ts;
+ prev_time = lep->poll_host_time;
}
idx++;
if (idx > (dev->rdev.wr_log_size - 1))
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 5d9bea174a86..c620ba7d6bfc 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -153,8 +153,8 @@ struct c4iw_hw_queue {
};
struct wr_log_entry {
- struct timespec post_host_ts;
- struct timespec poll_host_ts;
+ ktime_t post_host_time;
+ ktime_t poll_host_time;
u64 post_sge_ts;
u64 cqe_sge_ts;
u64 poll_sge_ts;
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 596ceaefacf4..0b9cc73c3ded 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -648,6 +648,7 @@ void c4iw_register_device(struct work_struct *work)
memcpy(dev->ibdev.iwcm->ifname, dev->rdev.lldi.ports[0]->name,
sizeof(dev->ibdev.iwcm->ifname));
+ dev->ibdev.driver_id = RDMA_DRIVER_CXGB4;
ret = ib_register_device(&dev->ibdev, NULL);
if (ret)
goto err_kfree_iwcm;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 5026be5be973..57337861bd26 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -1042,7 +1042,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
if (c4iw_wr_log) {
swsqe->sge_ts = cxgb4_read_sge_timestamp(
qhp->rhp->rdev.lldi.ports[0]);
- getnstimeofday(&swsqe->host_ts);
+ swsqe->host_time = ktime_get();
}
init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16);
@@ -1117,8 +1117,8 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].sge_ts =
cxgb4_read_sge_timestamp(
qhp->rhp->rdev.lldi.ports[0]);
- getnstimeofday(
- &qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].host_ts);
+ qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].host_time =
+ ktime_get();
}
wqe->recv.opcode = FW_RI_RECV_WR;
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index 79e8ee12c391..8369c7c8de83 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -277,7 +277,7 @@ struct t4_swsqe {
int signaled;
u16 idx;
int flushed;
- struct timespec host_ts;
+ ktime_t host_time;
u64 sge_ts;
};
@@ -318,7 +318,7 @@ struct t4_sq {
struct t4_swrqe {
u64 wr_id;
- struct timespec host_ts;
+ ktime_t host_time;
u64 sge_ts;
};
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 3301fde44d3a..e37819fe735b 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -9992,7 +9992,7 @@ int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which)
val = ppd->phy_error_threshold;
break;
case HFI1_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */
- val = dd->link_default;
+ val = HLS_DEFAULT;
break;
case HFI1_IB_CFG_HRTBT: /* Heartbeat off/enable/auto */
@@ -10195,6 +10195,10 @@ static const char * const state_complete_reasons[] = {
[0x33] =
"Link partner completed the VerifyCap state, but the passing lanes do not meet the local link width policy",
[0x34] = tx_out_of_policy,
+ [0x35] = "Negotiated link width is mutually exclusive",
+ [0x36] =
+ "Timed out before receiving verifycap frames in VerifyCap.Exchange",
+ [0x37] = "Unable to resolve secure data exchange",
};
static const char *state_complete_reason_code_string(struct hfi1_pportdata *ppd,
@@ -10592,7 +10596,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
orig_new_state = state;
if (state == HLS_DN_DOWNDEF)
- state = dd->link_default;
+ state = HLS_DEFAULT;
/* interpret poll -> poll as a link bounce */
poll_bounce = ppd->host_link_state == HLS_DN_POLL &&
@@ -15006,9 +15010,8 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
if (num_vls < HFI1_MIN_VLS_SUPPORTED ||
num_vls > HFI1_MAX_VLS_SUPPORTED) {
- hfi1_early_err(&pdev->dev,
- "Invalid num_vls %u, using %u VLs\n",
- num_vls, HFI1_MAX_VLS_SUPPORTED);
+ dd_dev_err(dd, "Invalid num_vls %u, using %u VLs\n",
+ num_vls, HFI1_MAX_VLS_SUPPORTED);
num_vls = HFI1_MAX_VLS_SUPPORTED;
}
ppd->vls_supported = num_vls;
@@ -15033,8 +15036,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
init_vl_arb_caches(ppd);
}
- dd->link_default = HLS_DN_POLL;
-
/*
* Do remaining PCIe setup and save PCIe values in dd.
* Any error printing is already done by the init code.
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index 271f6fd79e2c..fdf389e46e19 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -571,7 +571,7 @@ enum {
/* timeouts */
#define LINK_RESTART_DELAY 1000 /* link restart delay, in ms */
#define TIMEOUT_8051_START 5000 /* 8051 start timeout, in ms */
-#define DC8051_COMMAND_TIMEOUT 20000 /* DC8051 command timeout, in ms */
+#define DC8051_COMMAND_TIMEOUT 1000 /* DC8051 command timeout, in ms */
#define FREEZE_STATUS_TIMEOUT 20 /* wait for freeze indicators, in ms */
#define VL_STATUS_CLEAR_TIMEOUT 5000 /* per-VL status clear, in ms */
#define CCE_STATUS_TIMEOUT 10 /* time to clear CCE Status, in ms */
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 18d10406839d..2d23033d1495 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -159,22 +159,6 @@ static int hfi1_caps_get(char *buffer, const struct kernel_param *kp)
return scnprintf(buffer, PAGE_SIZE, "0x%lx", cap_mask);
}
-const char *get_unit_name(int unit)
-{
- static char iname[16];
-
- snprintf(iname, sizeof(iname), DRIVER_NAME "_%u", unit);
- return iname;
-}
-
-const char *get_card_name(struct rvt_dev_info *rdi)
-{
- struct hfi1_ibdev *ibdev = container_of(rdi, struct hfi1_ibdev, rdi);
- struct hfi1_devdata *dd = container_of(ibdev,
- struct hfi1_devdata, verbs_dev);
- return get_unit_name(dd->unit);
-}
-
struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi)
{
struct hfi1_ibdev *ibdev = container_of(rdi, struct hfi1_ibdev, rdi);
@@ -272,7 +256,12 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
u32 mlid_base;
struct hfi1_ibport *ibp = rcd_to_iport(rcd);
struct hfi1_devdata *dd = ppd->dd;
- struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
+ struct hfi1_ibdev *verbs_dev = &dd->verbs_dev;
+ struct rvt_dev_info *rdi = &verbs_dev->rdi;
+
+ if ((packet->rhf & RHF_DC_ERR) &&
+ hfi1_dbg_fault_suppress_err(verbs_dev))
+ return;
if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR))
return;
@@ -401,6 +390,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
svc_type = IB_CC_SVCTYPE_UC;
break;
default:
+ rcu_read_unlock();
goto drop;
}
@@ -433,6 +423,12 @@ static inline void init_packet(struct hfi1_ctxtdata *rcd,
packet->numpkt = 0;
}
+/* We support only two types - 9B and 16B for now */
+static const hfi1_handle_cnp hfi1_handle_cnp_tbl[2] = {
+ [HFI1_PKT_TYPE_9B] = &return_cnp,
+ [HFI1_PKT_TYPE_16B] = &return_cnp_16B
+};
+
void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
bool do_cnp)
{
@@ -1462,8 +1458,6 @@ static int hfi1_setup_9B_packet(struct hfi1_packet *packet)
packet->sc = hfi1_9B_get_sc5(hdr, packet->rhf);
packet->pad = ib_bth_get_pad(packet->ohdr);
packet->extra_byte = 0;
- packet->fecn = ib_bth_get_fecn(packet->ohdr);
- packet->becn = ib_bth_get_becn(packet->ohdr);
packet->pkey = ib_bth_get_pkey(packet->ohdr);
packet->migrated = ib_bth_is_migration(packet->ohdr);
@@ -1532,8 +1526,6 @@ static int hfi1_setup_bypass_packet(struct hfi1_packet *packet)
packet->sl = ibp->sc_to_sl[packet->sc];
packet->pad = hfi1_16B_bth_get_pad(packet->ohdr);
packet->extra_byte = SIZE_OF_LT;
- packet->fecn = hfi1_16B_get_fecn(packet->hdr);
- packet->becn = hfi1_16B_get_becn(packet->hdr);
packet->pkey = hfi1_16B_get_pkey(packet->hdr);
packet->migrated = opa_bth_is_migration(packet->ohdr);
@@ -1580,19 +1572,7 @@ int process_receive_ib(struct hfi1_packet *packet)
if (hfi1_setup_9B_packet(packet))
return RHF_RCV_CONTINUE;
- trace_hfi1_rcvhdr(packet->rcd->ppd->dd,
- packet->rcd->ctxt,
- rhf_err_flags(packet->rhf),
- RHF_RCV_TYPE_IB,
- packet->hlen,
- packet->tlen,
- packet->updegr,
- rhf_egr_index(packet->rhf));
-
- if (unlikely(
- (hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
- (packet->rhf & RHF_DC_ERR))))
- return RHF_RCV_CONTINUE;
+ trace_hfi1_rcvhdr(packet);
if (unlikely(rhf_err_flags(packet->rhf))) {
handle_eflags(packet);
@@ -1628,6 +1608,8 @@ int process_receive_bypass(struct hfi1_packet *packet)
if (hfi1_setup_bypass_packet(packet))
return RHF_RCV_CONTINUE;
+ trace_hfi1_rcvhdr(packet);
+
if (unlikely(rhf_err_flags(packet->rhf))) {
handle_eflags(packet);
return RHF_RCV_CONTINUE;
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 05e13868d4cb..727124d8f179 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -107,6 +107,7 @@ static unsigned int poll_next(struct file *fp, struct poll_table_struct *pt);
static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt,
unsigned long arg);
static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned long arg);
+static int ctxt_reset(struct hfi1_ctxtdata *uctxt);
static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt,
unsigned long arg);
static int vma_fault(struct vm_fault *vmf);
@@ -280,63 +281,9 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
ret = set_ctxt_pkey(uctxt, arg);
break;
- case HFI1_IOCTL_CTXT_RESET: {
- struct send_context *sc;
- struct hfi1_devdata *dd;
-
- if (!uctxt || !uctxt->dd || !uctxt->sc)
- return -EINVAL;
-
- /*
- * There is no protection here. User level has to
- * guarantee that no one will be writing to the send
- * context while it is being re-initialized.
- * If user level breaks that guarantee, it will break
- * it's own context and no one else's.
- */
- dd = uctxt->dd;
- sc = uctxt->sc;
- /*
- * Wait until the interrupt handler has marked the
- * context as halted or frozen. Report error if we time
- * out.
- */
- wait_event_interruptible_timeout(
- sc->halt_wait, (sc->flags & SCF_HALTED),
- msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT));
- if (!(sc->flags & SCF_HALTED))
- return -ENOLCK;
-
- /*
- * If the send context was halted due to a Freeze,
- * wait until the device has been "unfrozen" before
- * resetting the context.
- */
- if (sc->flags & SCF_FROZEN) {
- wait_event_interruptible_timeout(
- dd->event_queue,
- !(READ_ONCE(dd->flags) & HFI1_FROZEN),
- msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT));
- if (dd->flags & HFI1_FROZEN)
- return -ENOLCK;
-
- if (dd->flags & HFI1_FORCED_FREEZE)
- /*
- * Don't allow context reset if we are into
- * forced freeze
- */
- return -ENODEV;
-
- sc_disable(sc);
- ret = sc_enable(sc);
- hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, uctxt);
- } else {
- ret = sc_restart(sc);
- }
- if (!ret)
- sc_return_credits(sc);
+ case HFI1_IOCTL_CTXT_RESET:
+ ret = ctxt_reset(uctxt);
break;
- }
case HFI1_IOCTL_GET_VERS:
uval = HFI1_USER_SWVERSION;
@@ -827,7 +774,7 @@ static int complete_subctxt(struct hfi1_filedata *fd)
static int assign_ctxt(struct hfi1_filedata *fd, unsigned long arg, u32 len)
{
int ret;
- unsigned int swmajor, swminor;
+ unsigned int swmajor;
struct hfi1_ctxtdata *uctxt = NULL;
struct hfi1_user_info uinfo;
@@ -847,8 +794,6 @@ static int assign_ctxt(struct hfi1_filedata *fd, unsigned long arg, u32 len)
if (uinfo.subctxt_cnt > HFI1_MAX_SHARED_CTXTS)
return -EINVAL;
- swminor = uinfo.userversion & 0xffff;
-
/*
* Acquire the mutex to protect against multiple creations of what
* could be a shared base context.
@@ -1333,34 +1278,34 @@ static int get_base_info(struct hfi1_filedata *fd, unsigned long arg, u32 len)
fd->subctxt,
uctxt->egrbufs.rcvtids[0].dma);
binfo.sdma_comp_bufbase = HFI1_MMAP_TOKEN(SDMA_COMP, uctxt->ctxt,
- fd->subctxt, 0);
+ fd->subctxt, 0);
/*
* user regs are at
* (RXE_PER_CONTEXT_USER + (ctxt * RXE_PER_CONTEXT_SIZE))
*/
binfo.user_regbase = HFI1_MMAP_TOKEN(UREGS, uctxt->ctxt,
- fd->subctxt, 0);
+ fd->subctxt, 0);
offset = offset_in_page((uctxt_offset(uctxt) + fd->subctxt) *
sizeof(*dd->events));
binfo.events_bufbase = HFI1_MMAP_TOKEN(EVENTS, uctxt->ctxt,
- fd->subctxt,
- offset);
+ fd->subctxt,
+ offset);
binfo.status_bufbase = HFI1_MMAP_TOKEN(STATUS, uctxt->ctxt,
- fd->subctxt,
- dd->status);
+ fd->subctxt,
+ dd->status);
if (HFI1_CAP_IS_USET(DMA_RTAIL))
binfo.rcvhdrtail_base = HFI1_MMAP_TOKEN(RTAIL, uctxt->ctxt,
- fd->subctxt, 0);
+ fd->subctxt, 0);
if (uctxt->subctxt_cnt) {
binfo.subctxt_uregbase = HFI1_MMAP_TOKEN(SUBCTXT_UREGS,
- uctxt->ctxt,
- fd->subctxt, 0);
- binfo.subctxt_rcvhdrbuf = HFI1_MMAP_TOKEN(SUBCTXT_RCV_HDRQ,
uctxt->ctxt,
fd->subctxt, 0);
+ binfo.subctxt_rcvhdrbuf = HFI1_MMAP_TOKEN(SUBCTXT_RCV_HDRQ,
+ uctxt->ctxt,
+ fd->subctxt, 0);
binfo.subctxt_rcvegrbuf = HFI1_MMAP_TOKEN(SUBCTXT_EGRBUF,
- uctxt->ctxt,
- fd->subctxt, 0);
+ uctxt->ctxt,
+ fd->subctxt, 0);
}
if (copy_to_user((void __user *)arg, &binfo, len))
@@ -1660,6 +1605,69 @@ static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned long arg)
return -ENOENT;
}
+/**
+ * ctxt_reset - Reset the user context
+ * @uctxt: valid user context
+ */
+static int ctxt_reset(struct hfi1_ctxtdata *uctxt)
+{
+ struct send_context *sc;
+ struct hfi1_devdata *dd;
+ int ret = 0;
+
+ if (!uctxt || !uctxt->dd || !uctxt->sc)
+ return -EINVAL;
+
+ /*
+ * There is no protection here. User level has to guarantee that
+ * no one will be writing to the send context while it is being
+ * re-initialized. If user level breaks that guarantee, it will
+ * break it's own context and no one else's.
+ */
+ dd = uctxt->dd;
+ sc = uctxt->sc;
+
+ /*
+ * Wait until the interrupt handler has marked the context as
+ * halted or frozen. Report error if we time out.
+ */
+ wait_event_interruptible_timeout(
+ sc->halt_wait, (sc->flags & SCF_HALTED),
+ msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT));
+ if (!(sc->flags & SCF_HALTED))
+ return -ENOLCK;
+
+ /*
+ * If the send context was halted due to a Freeze, wait until the
+ * device has been "unfrozen" before resetting the context.
+ */
+ if (sc->flags & SCF_FROZEN) {
+ wait_event_interruptible_timeout(
+ dd->event_queue,
+ !(READ_ONCE(dd->flags) & HFI1_FROZEN),
+ msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT));
+ if (dd->flags & HFI1_FROZEN)
+ return -ENOLCK;
+
+ if (dd->flags & HFI1_FORCED_FREEZE)
+ /*
+ * Don't allow context reset if we are into
+ * forced freeze
+ */
+ return -ENODEV;
+
+ sc_disable(sc);
+ ret = sc_enable(sc);
+ hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, uctxt);
+ } else {
+ ret = sc_restart(sc);
+ }
+ if (!ret)
+ sc_return_credits(sc);
+
+ return ret;
+}
+
static void user_remove(struct hfi1_devdata *dd)
{
diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c
index ebd244412845..2b57ba70ddd6 100644
--- a/drivers/infiniband/hw/hfi1/firmware.c
+++ b/drivers/infiniband/hw/hfi1/firmware.c
@@ -1397,7 +1397,14 @@ int acquire_hw_mutex(struct hfi1_devdata *dd)
unsigned long timeout;
int try = 0;
u8 mask = 1 << dd->hfi1_id;
- u8 user;
+ u8 user = (u8)read_csr(dd, ASIC_CFG_MUTEX);
+
+ if (user == mask) {
+ dd_dev_info(dd,
+ "Hardware mutex already acquired, mutex mask %u\n",
+ (u32)mask);
+ return 0;
+ }
retry:
timeout = msecs_to_jiffies(HM_TIMEOUT) + jiffies;
@@ -1428,7 +1435,15 @@ retry:
void release_hw_mutex(struct hfi1_devdata *dd)
{
- write_csr(dd, ASIC_CFG_MUTEX, 0);
+ u8 mask = 1 << dd->hfi1_id;
+ u8 user = (u8)read_csr(dd, ASIC_CFG_MUTEX);
+
+ if (user != mask)
+ dd_dev_warn(dd,
+ "Unable to release hardware mutex, mutex mask %u, my mask %u\n",
+ (u32)user, (u32)mask);
+ else
+ write_csr(dd, ASIC_CFG_MUTEX, 0);
}
/* return the given resource bit(s) as a mask for the given HFI */
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index c6ac93c4482f..9c97c180c35e 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -70,7 +70,6 @@
#include <linux/rhashtable.h>
#include <linux/netdevice.h>
#include <rdma/rdma_vt.h>
-#include <rdma/opa_addr.h>
#include "chip_registers.h"
#include "common.h"
@@ -352,8 +351,6 @@ struct hfi1_packet {
u8 sc;
u8 sl;
u8 opcode;
- bool becn;
- bool fecn;
bool migrated;
};
@@ -539,6 +536,8 @@ struct rvt_sge_state;
#define HLS_UP (HLS_UP_INIT | HLS_UP_ARMED | HLS_UP_ACTIVE)
#define HLS_DOWN ~(HLS_UP)
+#define HLS_DEFAULT HLS_DN_POLL
+
/* use this MTU size if none other is given */
#define HFI1_DEFAULT_ACTIVE_MTU 10240
/* use this MTU size as the default maximum */
@@ -1121,8 +1120,7 @@ struct hfi1_devdata {
u16 rcvegrbufsize_shift;
/* both sides of the PCIe link are gen3 capable */
u8 link_gen3_capable;
- /* default link down value (poll/sleep) */
- u8 link_default;
+ u8 dc_shutdown;
/* localbus width (1, 2,4,8,16,32) from config space */
u32 lbus_width;
/* localbus speed in MHz */
@@ -1307,7 +1305,6 @@ struct hfi1_devdata {
u8 oui1;
u8 oui2;
u8 oui3;
- u8 dc_shutdown;
/* Timer and counter used to detect RcvBufOvflCnt changes */
struct timer_list rcverr_timer;
@@ -1549,11 +1546,6 @@ typedef void (*hfi1_handle_cnp)(struct hfi1_ibport *ibp, struct rvt_qp *qp,
u32 remote_qpn, u16 pkey, u32 slid, u32 dlid,
u8 sc5, const struct ib_grh *old_grh);
-/* We support only two types - 9B and 16B for now */
-static const hfi1_handle_cnp hfi1_handle_cnp_tbl[2] = {
- [HFI1_PKT_TYPE_9B] = &return_cnp,
- [HFI1_PKT_TYPE_16B] = &return_cnp_16B
-};
#define PKEY_CHECK_INVALID -1
int egress_pkey_check(struct hfi1_pportdata *ppd, u32 slid, u16 pkey,
u8 sc5, int8_t s_pkey_index);
@@ -1637,7 +1629,7 @@ static int ingress_pkey_table_search(struct hfi1_pportdata *ppd, u16 pkey)
* the 'error info' for this failure.
*/
static void ingress_pkey_table_fail(struct hfi1_pportdata *ppd, u16 pkey,
- u16 slid)
+ u32 slid)
{
struct hfi1_devdata *dd = ppd->dd;
@@ -1793,19 +1785,15 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt,
bool do_cnp)
{
- struct ib_other_headers *ohdr = pkt->ohdr;
-
- u32 bth1;
- bool becn = false;
- bool fecn = false;
+ bool becn;
+ bool fecn;
if (pkt->etype == RHF_RCV_TYPE_BYPASS) {
fecn = hfi1_16B_get_fecn(pkt->hdr);
becn = hfi1_16B_get_becn(pkt->hdr);
} else {
- bth1 = be32_to_cpu(ohdr->bth[1]);
- fecn = bth1 & IB_FECN_SMASK;
- becn = bth1 & IB_BECN_SMASK;
+ fecn = ib_bth_get_fecn(pkt->ohdr);
+ becn = ib_bth_get_becn(pkt->ohdr);
}
if (unlikely(fecn || becn)) {
hfi1_process_ecn_slowpath(qp, pkt, do_cnp);
@@ -1987,8 +1975,6 @@ int get_platform_config_field(struct hfi1_devdata *dd,
table_type, int table_index, int field_index,
u32 *data, u32 len);
-const char *get_unit_name(int unit);
-const char *get_card_name(struct rvt_dev_info *rdi);
struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi);
/*
@@ -2138,39 +2124,42 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd)
#define dd_dev_emerg(dd, fmt, ...) \
dev_emerg(&(dd)->pcidev->dev, "%s: " fmt, \
- get_unit_name((dd)->unit), ##__VA_ARGS__)
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
#define dd_dev_err(dd, fmt, ...) \
dev_err(&(dd)->pcidev->dev, "%s: " fmt, \
- get_unit_name((dd)->unit), ##__VA_ARGS__)
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
#define dd_dev_err_ratelimited(dd, fmt, ...) \
dev_err_ratelimited(&(dd)->pcidev->dev, "%s: " fmt, \
- get_unit_name((dd)->unit), ##__VA_ARGS__)
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), \
+ ##__VA_ARGS__)
#define dd_dev_warn(dd, fmt, ...) \
dev_warn(&(dd)->pcidev->dev, "%s: " fmt, \
- get_unit_name((dd)->unit), ##__VA_ARGS__)
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
#define dd_dev_warn_ratelimited(dd, fmt, ...) \
dev_warn_ratelimited(&(dd)->pcidev->dev, "%s: " fmt, \
- get_unit_name((dd)->unit), ##__VA_ARGS__)
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), \
+ ##__VA_ARGS__)
#define dd_dev_info(dd, fmt, ...) \
dev_info(&(dd)->pcidev->dev, "%s: " fmt, \
- get_unit_name((dd)->unit), ##__VA_ARGS__)
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
#define dd_dev_info_ratelimited(dd, fmt, ...) \
dev_info_ratelimited(&(dd)->pcidev->dev, "%s: " fmt, \
- get_unit_name((dd)->unit), ##__VA_ARGS__)
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), \
+ ##__VA_ARGS__)
#define dd_dev_dbg(dd, fmt, ...) \
dev_dbg(&(dd)->pcidev->dev, "%s: " fmt, \
- get_unit_name((dd)->unit), ##__VA_ARGS__)
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
#define hfi1_dev_porterr(dd, port, fmt, ...) \
dev_err(&(dd)->pcidev->dev, "%s: port %u: " fmt, \
- get_unit_name((dd)->unit), (port), ##__VA_ARGS__)
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), (port), ##__VA_ARGS__)
/*
* this is used for formatting hw error messages...
@@ -2431,7 +2420,7 @@ static inline void hfi1_make_ib_hdr(struct ib_header *hdr,
static inline void hfi1_make_16b_hdr(struct hfi1_16b_header *hdr,
u32 slid, u32 dlid,
u16 len, u16 pkey,
- u8 becn, u8 fecn, u8 l4,
+ bool becn, bool fecn, u8 l4,
u8 sc)
{
u32 lrh0 = 0;
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index edc629ef7cc9..4e5bb6f599ca 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -172,7 +172,7 @@ int hfi1_create_kctxts(struct hfi1_devdata *dd)
u16 i;
int ret;
- dd->rcd = kzalloc_node(dd->num_rcv_contexts * sizeof(*dd->rcd),
+ dd->rcd = kcalloc_node(dd->num_rcv_contexts, sizeof(*dd->rcd),
GFP_KERNEL, dd->node);
if (!dd->rcd)
return -ENOMEM;
@@ -438,15 +438,16 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
* The resulting value will be rounded down to the closest
* multiple of dd->rcv_entries.group_size.
*/
- rcd->egrbufs.buffers = kzalloc_node(
- rcd->egrbufs.count * sizeof(*rcd->egrbufs.buffers),
- GFP_KERNEL, numa);
+ rcd->egrbufs.buffers =
+ kcalloc_node(rcd->egrbufs.count,
+ sizeof(*rcd->egrbufs.buffers),
+ GFP_KERNEL, numa);
if (!rcd->egrbufs.buffers)
goto bail;
- rcd->egrbufs.rcvtids = kzalloc_node(
- rcd->egrbufs.count *
- sizeof(*rcd->egrbufs.rcvtids),
- GFP_KERNEL, numa);
+ rcd->egrbufs.rcvtids =
+ kcalloc_node(rcd->egrbufs.count,
+ sizeof(*rcd->egrbufs.rcvtids),
+ GFP_KERNEL, numa);
if (!rcd->egrbufs.rcvtids)
goto bail;
rcd->egrbufs.size = eager_buffer_size;
@@ -1307,6 +1308,8 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
"Could not allocate unit ID: error %d\n", -ret);
goto bail;
}
+ rvt_set_ibdev_name(&dd->verbs_dev.rdi, "%s_%d", class_name(), dd->unit);
+
/*
* Initialize all locks for the device. This needs to be as early as
* possible so locks are usable.
@@ -1327,24 +1330,18 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
dd->int_counter = alloc_percpu(u64);
if (!dd->int_counter) {
ret = -ENOMEM;
- hfi1_early_err(&pdev->dev,
- "Could not allocate per-cpu int_counter\n");
goto bail;
}
dd->rcv_limit = alloc_percpu(u64);
if (!dd->rcv_limit) {
ret = -ENOMEM;
- hfi1_early_err(&pdev->dev,
- "Could not allocate per-cpu rcv_limit\n");
goto bail;
}
dd->send_schedule = alloc_percpu(u64);
if (!dd->send_schedule) {
ret = -ENOMEM;
- hfi1_early_err(&pdev->dev,
- "Could not allocate per-cpu int_counter\n");
goto bail;
}
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 1c2062882c5b..da3dd31c4358 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -721,6 +721,7 @@ static int check_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
/* Bad mkey not a violation below level 2 */
if (ibp->rvp.mkeyprot < 2)
break;
+ /* fall through */
case IB_MGMT_METHOD_SET:
case IB_MGMT_METHOD_TRAP_REPRESS:
if (ibp->rvp.mkey_violations != 0xFFFF)
@@ -2989,7 +2990,6 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
struct _vls_dctrs *vlinfo;
size_t response_data_size;
u32 num_ports;
- u8 num_pslm;
u8 lq, num_vls;
u8 res_lli, res_ler;
u64 port_mask;
@@ -3001,7 +3001,6 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
u16 link_speed;
num_ports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
- num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
num_vls = hweight32(be32_to_cpu(req->vl_select_mask));
vl_select_mask = be32_to_cpu(req->vl_select_mask);
res_lli = (u8)(be32_to_cpu(req->resolution) & MSK_LLI) >> MSK_LLI_SFT;
@@ -4459,11 +4458,7 @@ static int opa_local_smp_check(struct hfi1_ibport *ibp,
*/
if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY)
return 0;
- /*
- * On OPA devices it is okay to lose the upper 16 bits of LID as this
- * information is obtained elsewhere. Mask off the upper 16 bits.
- */
- ingress_pkey_table_fail(ppd, pkey, ib_lid_cpu16(0xFFFF & in_wc->slid));
+ ingress_pkey_table_fail(ppd, pkey, in_wc->slid);
return 1;
}
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 32055df024d3..9cac15d10c4f 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -453,8 +453,8 @@ int init_send_contexts(struct hfi1_devdata *dd)
dd->hw_to_sw = kmalloc_array(TXE_NUM_CONTEXTS, sizeof(u8),
GFP_KERNEL);
dd->send_contexts = kcalloc(dd->num_send_contexts,
- sizeof(struct send_context_info),
- GFP_KERNEL);
+ sizeof(struct send_context_info),
+ GFP_KERNEL);
if (!dd->send_contexts || !dd->hw_to_sw) {
kfree(dd->hw_to_sw);
kfree(dd->send_contexts);
@@ -854,8 +854,9 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
* so head == tail can mean empty.
*/
sc->sr_size = sci->credits + 1;
- sc->sr = kzalloc_node(sizeof(union pio_shadow_ring) *
- sc->sr_size, GFP_KERNEL, numa);
+ sc->sr = kcalloc_node(sc->sr_size,
+ sizeof(union pio_shadow_ring),
+ GFP_KERNEL, numa);
if (!sc->sr) {
sc_free(sc);
return NULL;
@@ -1982,9 +1983,9 @@ int init_pervl_scs(struct hfi1_devdata *dd)
hfi1_init_ctxt(dd->vld[15].sc);
dd->vld[15].mtu = enum_to_mtu(OPA_MTU_2048);
- dd->kernel_send_context = kzalloc_node(dd->num_send_contexts *
- sizeof(struct send_context *),
- GFP_KERNEL, dd->node);
+ dd->kernel_send_context = kcalloc_node(dd->num_send_contexts,
+ sizeof(struct send_context *),
+ GFP_KERNEL, dd->node);
if (!dd->kernel_send_context)
goto freesc15;
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index d30dd1a5b0a6..1697d96151bd 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -481,7 +481,6 @@ static void iowait_sdma_drained(struct iowait *wait)
}
/**
- *
* qp_to_sdma_engine - map a qp to a send engine
* @qp: the QP
* @sc5: the 5 bit sc
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 9306ccb275eb..08c370a80025 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -300,7 +300,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
goto bail;
/* We are in the error state, flush the work request. */
- smp_read_barrier_depends(); /* see post_one_send() */
if (qp->s_last == READ_ONCE(qp->s_head))
goto bail;
/* If DMAs are in progress, we can't flush immediately. */
@@ -344,7 +343,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
newreq = 0;
if (qp->s_cur == qp->s_tail) {
/* Check if send work queue is empty. */
- smp_read_barrier_depends(); /* see post_one_send() */
if (qp->s_tail == READ_ONCE(qp->s_head)) {
clear_ahg(qp);
goto bail;
@@ -814,7 +812,7 @@ static inline void hfi1_make_rc_ack_16B(struct hfi1_packet *packet,
struct ib_other_headers *ohdr;
u32 bth0, bth1 = 0;
u16 len, pkey;
- u8 becn = !!is_fecn;
+ bool becn = is_fecn;
u8 l4 = OPA_16B_L4_IB_LOCAL;
u8 extra_bytes;
@@ -899,7 +897,6 @@ void hfi1_send_rc_ack(struct hfi1_packet *packet, bool is_fecn)
}
/* Ensure s_rdma_ack_cnt changes are committed */
- smp_read_barrier_depends();
if (qp->s_rdma_ack_cnt) {
hfi1_queue_rc_ack(packet, is_fecn);
return;
@@ -1561,7 +1558,6 @@ static void rc_rcv_resp(struct hfi1_packet *packet)
trace_hfi1_ack(qp, psn);
/* Ignore invalid responses. */
- smp_read_barrier_depends(); /* see post_one_send */
if (cmp_psn(psn, READ_ONCE(qp->s_next_psn)) >= 0)
goto ack_done;
@@ -2173,7 +2169,7 @@ send_middle:
goto no_immediate_data;
if (opcode == OP(SEND_ONLY_WITH_INVALIDATE))
goto send_last_inv;
- /* FALLTHROUGH for SEND_ONLY_WITH_IMMEDIATE */
+ /* FALLTHROUGH -- for SEND_ONLY_WITH_IMMEDIATE */
case OP(SEND_LAST_WITH_IMMEDIATE):
send_last_imm:
wc.ex.imm_data = ohdr->u.imm_data;
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index a7eb62ac2823..c0071ca4147a 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -351,7 +351,6 @@ static void ruc_loopback(struct rvt_qp *sqp)
sqp->s_flags |= RVT_S_BUSY;
again:
- smp_read_barrier_depends(); /* see post_one_send() */
if (sqp->s_last == READ_ONCE(sqp->s_head))
goto clr_busy;
wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
@@ -765,7 +764,7 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp,
ps->s_txreq->s_cur_size);
u32 nwords = SIZE_OF_CRC + ((ps->s_txreq->s_cur_size +
extra_bytes + SIZE_OF_LT) >> 2);
- u8 becn = 0;
+ bool becn = false;
if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) &&
hfi1_check_mcast(rdma_ah_get_dlid(&qp->remote_ah_attr))) {
@@ -843,11 +842,9 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp,
{
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_ibport *ibp = ps->ibp;
- struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
u32 bth1 = 0;
u16 pkey = hfi1_get_pkey(ibp, qp->s_pkey_index);
u16 lrh0 = HFI1_LRH_BTH;
- u16 slid;
u8 extra_bytes = -ps->s_txreq->s_cur_size & 3;
u32 nwords = SIZE_OF_CRC + ((ps->s_txreq->s_cur_size +
extra_bytes) >> 2);
@@ -885,13 +882,6 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp,
bth0 |= pkey;
bth0 |= extra_bytes << 20;
hfi1_make_ruc_bth(qp, ohdr, bth0, bth1, bth2);
-
- if (!ppd->lid)
- slid = be16_to_cpu(IB_LID_PERMISSIVE);
- else
- slid = ppd->lid |
- (rdma_ah_get_path_bits(&qp->remote_ah_attr) &
- ((1 << ppd->lmc) - 1));
hfi1_make_ib_hdr(&ps->s_txreq->phdr.hdr.ibh,
lrh0,
ps->s_txreq->hdr_dwords + nwords,
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index a0371847dfd3..22327caf521a 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -553,7 +553,6 @@ static void sdma_hw_clean_up_task(unsigned long opaque)
static inline struct sdma_txreq *get_txhead(struct sdma_engine *sde)
{
- smp_read_barrier_depends(); /* see sdma_update_tail() */
return sde->tx_ring[sde->tx_head & sde->sdma_mask];
}
@@ -1383,11 +1382,19 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
num_engines, descq_cnt);
/* alloc memory for array of send engines */
- dd->per_sdma = kcalloc(num_engines, sizeof(*dd->per_sdma), GFP_KERNEL);
+ dd->per_sdma = kcalloc_node(num_engines, sizeof(*dd->per_sdma),
+ GFP_KERNEL, dd->node);
if (!dd->per_sdma)
return ret;
idle_cnt = ns_to_cclock(dd, idle_cnt);
+ if (idle_cnt)
+ dd->default_desc1 =
+ SDMA_DESC1_HEAD_TO_HOST_FLAG;
+ else
+ dd->default_desc1 =
+ SDMA_DESC1_INT_REQ_FLAG;
+
if (!sdma_desct_intr)
sdma_desct_intr = SDMA_DESC_INTR;
@@ -1432,13 +1439,6 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
sde->tail_csr =
get_kctxt_csr_addr(dd, this_idx, SD(TAIL));
- if (idle_cnt)
- dd->default_desc1 =
- SDMA_DESC1_HEAD_TO_HOST_FLAG;
- else
- dd->default_desc1 =
- SDMA_DESC1_INT_REQ_FLAG;
-
tasklet_init(&sde->sdma_hw_clean_up_task, sdma_hw_clean_up_task,
(unsigned long)sde);
@@ -1461,13 +1461,8 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
if (!sde->descq)
goto bail;
sde->tx_ring =
- kcalloc(descq_cnt, sizeof(struct sdma_txreq *),
- GFP_KERNEL);
- if (!sde->tx_ring)
- sde->tx_ring =
- vzalloc(
- sizeof(struct sdma_txreq *) *
- descq_cnt);
+ kvzalloc_node(sizeof(struct sdma_txreq *) * descq_cnt,
+ GFP_KERNEL, dd->node);
if (!sde->tx_ring)
goto bail;
}
@@ -2139,7 +2134,6 @@ void sdma_dumpstate(struct sdma_engine *sde)
static void dump_sdma_state(struct sdma_engine *sde)
{
- struct hw_sdma_desc *descq;
struct hw_sdma_desc *descqp;
u64 desc[2];
u64 addr;
@@ -2150,7 +2144,6 @@ static void dump_sdma_state(struct sdma_engine *sde)
head = sde->descq_head & sde->sdma_mask;
tail = sde->descq_tail & sde->sdma_mask;
cnt = sdma_descq_freecnt(sde);
- descq = sde->descq;
dd_dev_err(sde->dd,
"SDMA (%u) descq_head: %u descq_tail: %u freecnt: %u FLE %d\n",
@@ -2588,7 +2581,7 @@ static void __sdma_process_event(struct sdma_engine *sde,
* 7220, e.g.
*/
ss->go_s99_running = 1;
- /* fall through and start dma engine */
+ /* fall through -- and start dma engine */
case sdma_event_e10_go_hw_start:
/* This reference means the state machine is started */
sdma_get(&sde->state);
@@ -3011,6 +3004,7 @@ static void __sdma_process_event(struct sdma_engine *sde,
case sdma_event_e60_hw_halted:
need_progress = 1;
sdma_err_progress_check_schedule(sde);
+ /* fall through */
case sdma_event_e90_sw_halted:
/*
* SW initiated halt does not perform engines
diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c
index 959a80429ee9..89bd9851065b 100644
--- a/drivers/infiniband/hw/hfi1/trace.c
+++ b/drivers/infiniband/hw/hfi1/trace.c
@@ -138,7 +138,7 @@ static const char *parse_syndrome(u8 syndrome)
}
void hfi1_trace_parse_9b_bth(struct ib_other_headers *ohdr,
- u8 *ack, u8 *becn, u8 *fecn, u8 *mig,
+ u8 *ack, bool *becn, bool *fecn, u8 *mig,
u8 *se, u8 *pad, u8 *opcode, u8 *tver,
u16 *pkey, u32 *psn, u32 *qpn)
{
@@ -184,7 +184,7 @@ void hfi1_trace_parse_9b_hdr(struct ib_header *hdr, bool sc5,
}
void hfi1_trace_parse_16b_hdr(struct hfi1_16b_header *hdr,
- u8 *age, u8 *becn, u8 *fecn,
+ u8 *age, bool *becn, bool *fecn,
u8 *l4, u8 *rc, u8 *sc,
u16 *entropy, u16 *len, u16 *pkey,
u32 *dlid, u32 *slid)
@@ -207,7 +207,7 @@ void hfi1_trace_parse_16b_hdr(struct hfi1_16b_header *hdr,
#define LRH_16B_PRN "age:%d becn:%d fecn:%d l4:%d " \
"rc:%d sc:%d pkey:0x%.4x entropy:0x%.4x"
const char *hfi1_trace_fmt_lrh(struct trace_seq *p, bool bypass,
- u8 age, u8 becn, u8 fecn, u8 l4,
+ u8 age, bool becn, bool fecn, u8 l4,
u8 lnh, const char *lnh_name, u8 lver,
u8 rc, u8 sc, u8 sl, u16 entropy,
u16 len, u16 pkey, u32 dlid, u32 slid)
@@ -235,7 +235,7 @@ const char *hfi1_trace_fmt_lrh(struct trace_seq *p, bool bypass,
"op:0x%.2x,%s se:%d m:%d pad:%d tver:%d " \
"qpn:0x%.6x a:%d psn:0x%.8x"
const char *hfi1_trace_fmt_bth(struct trace_seq *p, bool bypass,
- u8 ack, u8 becn, u8 fecn, u8 mig,
+ u8 ack, bool becn, bool fecn, u8 mig,
u8 se, u8 pad, u8 opcode, const char *opname,
u8 tver, u16 pkey, u32 psn, u32 qpn)
{
diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
index fb631278eccd..2847626d3819 100644
--- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
+++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
@@ -101,7 +101,7 @@ u8 hfi1_trace_opa_hdr_len(struct hfi1_opa_header *opah);
u8 hfi1_trace_packet_hdr_len(struct hfi1_packet *packet);
const char *hfi1_trace_get_packet_l4_str(u8 l4);
void hfi1_trace_parse_9b_bth(struct ib_other_headers *ohdr,
- u8 *ack, u8 *becn, u8 *fecn, u8 *mig,
+ u8 *ack, bool *becn, bool *fecn, u8 *mig,
u8 *se, u8 *pad, u8 *opcode, u8 *tver,
u16 *pkey, u32 *psn, u32 *qpn);
void hfi1_trace_parse_9b_hdr(struct ib_header *hdr, bool sc5,
@@ -112,19 +112,19 @@ void hfi1_trace_parse_16b_bth(struct ib_other_headers *ohdr,
u8 *pad, u8 *se, u8 *tver,
u32 *psn, u32 *qpn);
void hfi1_trace_parse_16b_hdr(struct hfi1_16b_header *hdr,
- u8 *age, u8 *becn, u8 *fecn,
+ u8 *age, bool *becn, bool *fecn,
u8 *l4, u8 *rc, u8 *sc,
u16 *entropy, u16 *len, u16 *pkey,
u32 *dlid, u32 *slid);
const char *hfi1_trace_fmt_lrh(struct trace_seq *p, bool bypass,
- u8 age, u8 becn, u8 fecn, u8 l4,
+ u8 age, bool becn, bool fecn, u8 l4,
u8 lnh, const char *lnh_name, u8 lver,
u8 rc, u8 sc, u8 sl, u16 entropy,
u16 len, u16 pkey, u32 dlid, u32 slid);
const char *hfi1_trace_fmt_bth(struct trace_seq *p, bool bypass,
- u8 ack, u8 becn, u8 fecn, u8 mig,
+ u8 ack, bool becn, bool fecn, u8 mig,
u8 se, u8 pad, u8 opcode, const char *opname,
u8 tver, u16 pkey, u32 psn, u32 qpn);
@@ -148,8 +148,8 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template,
__field(u8, etype)
__field(u8, ack)
__field(u8, age)
- __field(u8, becn)
- __field(u8, fecn)
+ __field(bool, becn)
+ __field(bool, fecn)
__field(u8, l2)
__field(u8, l4)
__field(u8, lnh)
@@ -290,8 +290,8 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template,
__field(u8, hdr_type)
__field(u8, ack)
__field(u8, age)
- __field(u8, becn)
- __field(u8, fecn)
+ __field(bool, becn)
+ __field(bool, fecn)
__field(u8, l4)
__field(u8, lnh)
__field(u8, lver)
diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h
index 4d487fee105d..7eceb57e0415 100644
--- a/drivers/infiniband/hw/hfi1/trace_rx.h
+++ b/drivers/infiniband/hw/hfi1/trace_rx.h
@@ -63,17 +63,9 @@ __print_symbolic(type, \
#define TRACE_SYSTEM hfi1_rx
TRACE_EVENT(hfi1_rcvhdr,
- TP_PROTO(struct hfi1_devdata *dd,
- u32 ctxt,
- u64 eflags,
- u32 etype,
- u32 hlen,
- u32 tlen,
- u32 updegr,
- u32 etail
- ),
- TP_ARGS(dd, ctxt, eflags, etype, hlen, tlen, updegr, etail),
- TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+ TP_PROTO(struct hfi1_packet *packet),
+ TP_ARGS(packet),
+ TP_STRUCT__entry(DD_DEV_ENTRY(packet->rcd->dd)
__field(u64, eflags)
__field(u32, ctxt)
__field(u32, etype)
@@ -82,14 +74,14 @@ TRACE_EVENT(hfi1_rcvhdr,
__field(u32, updegr)
__field(u32, etail)
),
- TP_fast_assign(DD_DEV_ASSIGN(dd);
- __entry->eflags = eflags;
- __entry->ctxt = ctxt;
- __entry->etype = etype;
- __entry->hlen = hlen;
- __entry->tlen = tlen;
- __entry->updegr = updegr;
- __entry->etail = etail;
+ TP_fast_assign(DD_DEV_ASSIGN(packet->rcd->dd);
+ __entry->eflags = rhf_err_flags(packet->rhf);
+ __entry->ctxt = packet->rcd->ctxt;
+ __entry->etype = packet->etype;
+ __entry->hlen = packet->hlen;
+ __entry->tlen = packet->tlen;
+ __entry->updegr = packet->updegr;
+ __entry->etail = rhf_egr_index(packet->rhf);
),
TP_printk(
"[%s] ctxt %d eflags 0x%llx etype %d,%s hlen %d tlen %d updegr %d etail %d",
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index efaceadb6fa2..d140aaccdf14 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -79,7 +79,6 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
goto bail;
/* We are in the error state, flush the work request. */
- smp_read_barrier_depends(); /* see post_one_send() */
if (qp->s_last == READ_ONCE(qp->s_head))
goto bail;
/* If DMAs are in progress, we can't flush immediately. */
@@ -119,7 +118,6 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
RVT_PROCESS_NEXT_SEND_OK))
goto bail;
/* Check if send work queue is empty. */
- smp_read_barrier_depends(); /* see post_one_send() */
if (qp->s_cur == READ_ONCE(qp->s_head)) {
clear_ahg(qp);
goto bail;
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index 049a4306b1ad..3ae10530f754 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -489,7 +489,6 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
goto bail;
/* We are in the error state, flush the work request. */
- smp_read_barrier_depends(); /* see post_one_send */
if (qp->s_last == READ_ONCE(qp->s_head))
goto bail;
/* If DMAs are in progress, we can't flush immediately. */
@@ -503,7 +502,6 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
}
/* see post_one_send() */
- smp_read_barrier_depends();
if (qp->s_cur == READ_ONCE(qp->s_head))
goto bail;
@@ -856,7 +854,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
int mgmt_pkey_idx = -1;
struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
- struct ib_header *hdr = packet->hdr;
void *data = packet->payload;
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
@@ -882,7 +879,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
dlid_is_permissive = (dlid == permissive_lid);
slid_is_permissive = (slid == permissive_lid);
} else {
- hdr = packet->hdr;
pkey = ib_bth_get_pkey(ohdr);
dlid_is_permissive = (dlid == be16_to_cpu(IB_LID_PERMISSIVE));
slid_is_permissive = (slid == be16_to_cpu(IB_LID_PERMISSIVE));
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index aa7391497ee5..6a4c5142515a 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
@@ -472,7 +472,7 @@ nomem:
tinfo->tidcnt = tididx;
tinfo->length = mapped_pages * PAGE_SIZE;
- if (copy_to_user((void __user *)(unsigned long)tinfo->tidlist,
+ if (copy_to_user(u64_to_user_ptr(tinfo->tidlist),
tidlist, sizeof(tidlist[0]) * tididx)) {
/*
* On failure to copy to the user level, we need to undo
@@ -512,7 +512,7 @@ int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd,
if (unlikely(tinfo->tidcnt > fd->tid_used))
return -EINVAL;
- tidinfo = memdup_user((void __user *)(unsigned long)tinfo->tidlist,
+ tidinfo = memdup_user(u64_to_user_ptr(tinfo->tidlist),
sizeof(tidinfo[0]) * tinfo->tidcnt);
if (IS_ERR(tidinfo))
return PTR_ERR(tidinfo);
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 8ec6e8a8d6f7..a3a7b33196d6 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -956,10 +956,8 @@ static int pin_sdma_pages(struct user_sdma_request *req,
struct hfi1_user_sdma_pkt_q *pq = req->pq;
pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL);
- if (!pages) {
- SDMA_DBG(req, "Failed page array alloc");
+ if (!pages)
return -ENOMEM;
- }
memcpy(pages, node->pages, node->npages * sizeof(*pages));
npages -= node->npages;
@@ -1254,20 +1252,25 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
struct user_sdma_txreq *tx, u32 datalen)
{
u32 ahg[AHG_KDETH_ARRAY_SIZE];
- int diff = 0;
+ int idx = 0;
u8 omfactor; /* KDETH.OM */
struct hfi1_user_sdma_pkt_q *pq = req->pq;
struct hfi1_pkt_header *hdr = &req->hdr;
u16 pbclen = le16_to_cpu(hdr->pbc[0]);
u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen));
+ size_t array_size = ARRAY_SIZE(ahg);
if (PBC2LRH(pbclen) != lrhlen) {
/* PBC.PbcLengthDWs */
- AHG_HEADER_SET(ahg, diff, 0, 0, 12,
- cpu_to_le16(LRH2PBC(lrhlen)));
+ idx = ahg_header_set(ahg, idx, array_size, 0, 0, 12,
+ (__force u16)cpu_to_le16(LRH2PBC(lrhlen)));
+ if (idx < 0)
+ return idx;
/* LRH.PktLen (we need the full 16 bits due to byte swap) */
- AHG_HEADER_SET(ahg, diff, 3, 0, 16,
- cpu_to_be16(lrhlen >> 2));
+ idx = ahg_header_set(ahg, idx, array_size, 3, 0, 16,
+ (__force u16)cpu_to_be16(lrhlen >> 2));
+ if (idx < 0)
+ return idx;
}
/*
@@ -1278,12 +1281,23 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
(HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffff : 0xffffff);
if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK))
val32 |= 1UL << 31;
- AHG_HEADER_SET(ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16));
- AHG_HEADER_SET(ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff));
+ idx = ahg_header_set(ahg, idx, array_size, 6, 0, 16,
+ (__force u16)cpu_to_be16(val32 >> 16));
+ if (idx < 0)
+ return idx;
+ idx = ahg_header_set(ahg, idx, array_size, 6, 16, 16,
+ (__force u16)cpu_to_be16(val32 & 0xffff));
+ if (idx < 0)
+ return idx;
/* KDETH.Offset */
- AHG_HEADER_SET(ahg, diff, 15, 0, 16,
- cpu_to_le16(req->koffset & 0xffff));
- AHG_HEADER_SET(ahg, diff, 15, 16, 16, cpu_to_le16(req->koffset >> 16));
+ idx = ahg_header_set(ahg, idx, array_size, 15, 0, 16,
+ (__force u16)cpu_to_le16(req->koffset & 0xffff));
+ if (idx < 0)
+ return idx;
+ idx = ahg_header_set(ahg, idx, array_size, 15, 16, 16,
+ (__force u16)cpu_to_le16(req->koffset >> 16));
+ if (idx < 0)
+ return idx;
if (req_opcode(req->info.ctrl) == EXPECTED) {
__le16 val;
@@ -1310,10 +1324,13 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE_SHIFT :
KDETH_OM_SMALL_SHIFT;
/* KDETH.OM and KDETH.OFFSET (TID) */
- AHG_HEADER_SET(ahg, diff, 7, 0, 16,
- ((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << 15 |
+ idx = ahg_header_set(
+ ahg, idx, array_size, 7, 0, 16,
+ ((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << 15 |
((req->tidoffset >> omfactor)
- & 0x7fff)));
+ & 0x7fff)));
+ if (idx < 0)
+ return idx;
/* KDETH.TIDCtrl, KDETH.TID, KDETH.Intr, KDETH.SH */
val = cpu_to_le16(((EXP_TID_GET(tidval, CTRL) & 0x3) << 10) |
(EXP_TID_GET(tidval, IDX) & 0x3ff));
@@ -1330,21 +1347,22 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
AHG_KDETH_INTR_SHIFT));
}
- AHG_HEADER_SET(ahg, diff, 7, 16, 14, val);
+ idx = ahg_header_set(ahg, idx, array_size,
+ 7, 16, 14, (__force u16)val);
+ if (idx < 0)
+ return idx;
}
- if (diff < 0)
- return diff;
trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt,
req->info.comp_idx, req->sde->this_idx,
- req->ahg_idx, ahg, diff, tidval);
+ req->ahg_idx, ahg, idx, tidval);
sdma_txinit_ahg(&tx->txreq,
SDMA_TXREQ_F_USE_AHG,
- datalen, req->ahg_idx, diff,
+ datalen, req->ahg_idx, idx,
ahg, sizeof(req->hdr),
user_sdma_txreq_cb);
- return diff;
+ return idx;
}
/*
@@ -1410,6 +1428,8 @@ static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq)
static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
{
+ int i;
+
if (!list_empty(&req->txps)) {
struct sdma_txreq *t, *p;
@@ -1421,22 +1441,20 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
kmem_cache_free(req->pq->txreq_cache, tx);
}
}
- if (req->data_iovs) {
- struct sdma_mmu_node *node;
- int i;
-
- for (i = 0; i < req->data_iovs; i++) {
- node = req->iovs[i].node;
- if (!node)
- continue;
-
- if (unpin)
- hfi1_mmu_rb_remove(req->pq->handler,
- &node->rb);
- else
- atomic_dec(&node->refcount);
- }
+
+ for (i = 0; i < req->data_iovs; i++) {
+ struct sdma_mmu_node *node = req->iovs[i].node;
+
+ if (!node)
+ continue;
+
+ if (unpin)
+ hfi1_mmu_rb_remove(req->pq->handler,
+ &node->rb);
+ else
+ atomic_dec(&node->refcount);
}
+
kfree(req->tids);
clear_bit(req->info.comp_idx, req->pq->req_in_use);
}
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h
index 9b8bb5634c0d..a3d192424344 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.h
+++ b/drivers/infiniband/hw/hfi1/user_sdma.h
@@ -80,15 +80,26 @@
#define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4)
#define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff)
-#define AHG_HEADER_SET(arr, idx, dw, bit, width, value) \
- do { \
- if ((idx) < ARRAY_SIZE((arr))) \
- (arr)[(idx++)] = sdma_build_ahg_descriptor( \
- (__force u16)(value), (dw), (bit), \
- (width)); \
- else \
- return -ERANGE; \
- } while (0)
+/**
+ * Build an SDMA AHG header update descriptor and save it to an array.
+ * @arr - Array to save the descriptor to.
+ * @idx - Index of the array at which the descriptor will be saved.
+ * @array_size - Size of the array arr.
+ * @dw - Update index into the header in DWs.
+ * @bit - Start bit.
+ * @width - Field width.
+ * @value - 16 bits of immediate data to write into the field.
+ * Returns -ERANGE if idx is invalid. If successful, returns the next index
+ * (idx + 1) of the array to be used for the next descriptor.
+ */
+static inline int ahg_header_set(u32 *arr, int idx, size_t array_size,
+ u8 dw, u8 bit, u8 width, u16 value)
+{
+ if ((size_t)idx >= array_size)
+ return -ERANGE;
+ arr[idx++] = sdma_build_ahg_descriptor(value, dw, bit, width);
+ return idx;
+}
/* Tx request flag bits */
#define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 89c5be8dfefd..e6af84a1263e 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -836,7 +836,6 @@ static int build_verbs_tx_desc(
int ret = 0;
struct hfi1_sdma_header *phdr = &tx->phdr;
u16 hdrbytes = (tx->hdr_dwords + sizeof(pbc) / 4) << 2;
- u32 *hdr;
u8 extra_bytes = 0;
if (tx->phdr.hdr.hdr_type) {
@@ -846,9 +845,6 @@ static int build_verbs_tx_desc(
*/
extra_bytes = hfi1_get_16b_padding(hdrbytes - 8, length) +
(SIZE_OF_CRC << 2) + SIZE_OF_LT;
- hdr = (u32 *)&phdr->hdr.opah;
- } else {
- hdr = (u32 *)&phdr->hdr.ibh;
}
if (!ahg_info->ahgcount) {
ret = sdma_txinit_ahg(
@@ -914,14 +910,12 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
u8 sc5 = priv->s_sc;
int ret;
u32 dwords;
- bool bypass = false;
if (ps->s_txreq->phdr.hdr.hdr_type) {
u8 extra_bytes = hfi1_get_16b_padding((hdrwords << 2), len);
dwords = (len + extra_bytes + (SIZE_OF_CRC << 2) +
SIZE_OF_LT) >> 2;
- bypass = true;
} else {
dwords = (len + 3) >> 2;
}
@@ -1058,8 +1052,6 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
int wc_status = IB_WC_SUCCESS;
int ret = 0;
pio_release_cb cb = NULL;
- u32 lrh0_16b;
- bool bypass = false;
u8 extra_bytes = 0;
if (ps->s_txreq->phdr.hdr.hdr_type) {
@@ -1068,8 +1060,6 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
extra_bytes = pad_size + (SIZE_OF_CRC << 2) + SIZE_OF_LT;
dwords = (len + extra_bytes) >> 2;
hdr = (u32 *)&ps->s_txreq->phdr.hdr.opah;
- lrh0_16b = ps->s_txreq->phdr.hdr.opah.lrh[0];
- bypass = true;
} else {
dwords = (len + 3) >> 2;
hdr = (u32 *)&ps->s_txreq->phdr.hdr.ibh;
@@ -1855,7 +1845,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
struct hfi1_ibport *ibp = &ppd->ibport_data;
unsigned i;
int ret;
- size_t lcpysz = IB_DEVICE_NAME_MAX;
for (i = 0; i < dd->num_pports; i++)
init_ibport(ppd + i);
@@ -1883,8 +1872,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
*/
if (!ib_hfi1_sys_image_guid)
ib_hfi1_sys_image_guid = ibdev->node_guid;
- lcpysz = strlcpy(ibdev->name, class_name(), lcpysz);
- strlcpy(ibdev->name + lcpysz, "_%d", IB_DEVICE_NAME_MAX - lcpysz);
ibdev->owner = THIS_MODULE;
ibdev->phys_port_cnt = dd->num_pports;
ibdev->dev.parent = &dd->pcidev->dev;
@@ -1904,7 +1891,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
* Fill in rvt info object.
*/
dd->verbs_dev.rdi.driver_f.port_callback = hfi1_create_port_files;
- dd->verbs_dev.rdi.driver_f.get_card_name = get_card_name;
dd->verbs_dev.rdi.driver_f.get_pci_dev = get_pci_dev;
dd->verbs_dev.rdi.driver_f.check_ah = hfi1_check_ah;
dd->verbs_dev.rdi.driver_f.notify_new_ah = hfi1_notify_new_ah;
@@ -1975,7 +1961,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
i,
ppd->pkeys);
- ret = rvt_register_device(&dd->verbs_dev.rdi);
+ ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_HFI1);
if (ret)
goto err_verbs_txreq;
diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c
index b30bc8830643..d74928621559 100644
--- a/drivers/infiniband/hw/hns/hns_roce_ah.c
+++ b/drivers/infiniband/hw/hns/hns_roce_ah.c
@@ -48,7 +48,6 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
struct ib_gid_attr gid_attr;
struct hns_roce_ah *ah;
u16 vlan_tag = 0xffff;
- struct in6_addr in6;
const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
union ib_gid sgid;
int ret;
@@ -58,18 +57,7 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
return ERR_PTR(-ENOMEM);
/* Get mac address */
- memcpy(&in6, grh->dgid.raw, sizeof(grh->dgid.raw));
- if (rdma_is_multicast_addr(&in6)) {
- rdma_get_mcast_mac(&in6, ah->av.mac);
- } else {
- u8 *dmac = rdma_ah_retrieve_dmac(ah_attr);
-
- if (!dmac) {
- kfree(ah);
- return ERR_PTR(-EINVAL);
- }
- memcpy(ah->av.mac, dmac, ETH_ALEN);
- }
+ memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN);
/* Get source gid */
ret = ib_get_cached_gid(ibpd->device, rdma_ah_get_port_num(ah_attr),
@@ -80,11 +68,9 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
return ERR_PTR(ret);
}
- if (gid_attr.ndev) {
- if (is_vlan_dev(gid_attr.ndev))
- vlan_tag = vlan_dev_vlan_id(gid_attr.ndev);
- dev_put(gid_attr.ndev);
- }
+ if (is_vlan_dev(gid_attr.ndev))
+ vlan_tag = vlan_dev_vlan_id(gid_attr.ndev);
+ dev_put(gid_attr.ndev);
if (vlan_tag < 0x1000)
vlan_tag |= (rdma_ah_get_sl(ah_attr) &
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 1dbb38c79a9a..21b901cfa2d6 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -74,12 +74,11 @@ static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr)
return hr_dev->hw->set_mac(hr_dev, phy_port, addr);
}
-static int hns_roce_add_gid(struct ib_device *device, u8 port_num,
- unsigned int index, const union ib_gid *gid,
+static int hns_roce_add_gid(const union ib_gid *gid,
const struct ib_gid_attr *attr, void **context)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(device);
- u8 port = port_num - 1;
+ struct hns_roce_dev *hr_dev = to_hr_dev(attr->device);
+ u8 port = attr->port_num - 1;
unsigned long flags;
int ret;
@@ -88,20 +87,19 @@ static int hns_roce_add_gid(struct ib_device *device, u8 port_num,
spin_lock_irqsave(&hr_dev->iboe.lock, flags);
- ret = hr_dev->hw->set_gid(hr_dev, port, index, (union ib_gid *)gid,
- attr);
+ ret = hr_dev->hw->set_gid(hr_dev, port, attr->index,
+ (union ib_gid *)gid, attr);
spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
return ret;
}
-static int hns_roce_del_gid(struct ib_device *device, u8 port_num,
- unsigned int index, void **context)
+static int hns_roce_del_gid(const struct ib_gid_attr *attr, void **context)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(device);
+ struct hns_roce_dev *hr_dev = to_hr_dev(attr->device);
struct ib_gid_attr zattr = { };
- u8 port = port_num - 1;
+ u8 port = attr->port_num - 1;
unsigned long flags;
int ret;
@@ -110,7 +108,7 @@ static int hns_roce_del_gid(struct ib_device *device, u8 port_num,
spin_lock_irqsave(&hr_dev->iboe.lock, flags);
- ret = hr_dev->hw->set_gid(hr_dev, port, index, &zgid, &zattr);
+ ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, &zgid, &zattr);
spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
@@ -295,12 +293,6 @@ static enum rdma_link_layer hns_roce_get_link_layer(struct ib_device *device,
return IB_LINK_LAYER_ETHERNET;
}
-static int hns_roce_query_gid(struct ib_device *ib_dev, u8 port_num, int index,
- union ib_gid *gid)
-{
- return 0;
-}
-
static int hns_roce_query_pkey(struct ib_device *ib_dev, u8 port, u16 index,
u16 *pkey)
{
@@ -550,7 +542,6 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
ib_dev->modify_port = hns_roce_modify_port;
ib_dev->get_link_layer = hns_roce_get_link_layer;
ib_dev->get_netdev = hns_roce_get_netdev;
- ib_dev->query_gid = hns_roce_query_gid;
ib_dev->add_gid = hns_roce_add_gid;
ib_dev->del_gid = hns_roce_del_gid;
ib_dev->query_pkey = hns_roce_query_pkey;
@@ -595,6 +586,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
ib_dev->get_port_immutable = hns_roce_port_immutable;
ib_dev->disassociate_ucontext = hns_roce_disassociate_ucontext;
+ ib_dev->driver_id = RDMA_DRIVER_HNS;
ret = ib_register_device(ib_dev, NULL);
if (ret) {
dev_err(dev, "ib_register_device failed!\n");
diff --git a/drivers/infiniband/hw/i40iw/Kconfig b/drivers/infiniband/hw/i40iw/Kconfig
index f6d20ba88c03..2962979c06e9 100644
--- a/drivers/infiniband/hw/i40iw/Kconfig
+++ b/drivers/infiniband/hw/i40iw/Kconfig
@@ -5,4 +5,3 @@ config INFINIBAND_I40IW
select GENERIC_ALLOCATOR
---help---
Intel(R) Ethernet X722 iWARP Driver
- INET && I40IW && INFINIBAND && I40E
diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
index b81154d030de..2f2b4426ded7 100644
--- a/drivers/infiniband/hw/i40iw/i40iw.h
+++ b/drivers/infiniband/hw/i40iw/i40iw.h
@@ -60,7 +60,7 @@
#include <i40e_client.h>
#include "i40iw_type.h"
#include "i40iw_p.h"
-#include "i40iw_ucontext.h"
+#include <rdma/i40iw-abi.h>
#include "i40iw_pble.h"
#include "i40iw_verbs.h"
#include "i40iw_cm.h"
@@ -566,7 +566,8 @@ struct i40iw_cm_node *i40iw_find_node(struct i40iw_cm_core *cm_core,
u32 *rem_addr,
u16 loc_port,
u32 *loc_addr,
- bool add_refcnt);
+ bool add_refcnt,
+ bool accelerated_list);
enum i40iw_status_code i40iw_hw_flush_wqes(struct i40iw_device *iwdev,
struct i40iw_sc_qp *qp,
@@ -594,5 +595,8 @@ int i40iw_inet6addr_event(struct notifier_block *notifier,
int i40iw_net_event(struct notifier_block *notifier,
unsigned long event,
void *ptr);
+int i40iw_netdevice_event(struct notifier_block *notifier,
+ unsigned long event,
+ void *ptr);
#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index 63e3dc6e3016..7d588c1d6403 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -92,14 +92,9 @@ void i40iw_free_sqbuf(struct i40iw_sc_vsi *vsi, void *bufp)
static u8 i40iw_derive_hw_ird_setting(u16 cm_ird)
{
u8 encoded_ird_size;
- u8 pof2_cm_ird = 1;
-
- /* round-off to next powerof2 */
- while (pof2_cm_ird < cm_ird)
- pof2_cm_ird *= 2;
/* ird_size field is encoded in qp_ctx */
- switch (pof2_cm_ird) {
+ switch (cm_ird ? roundup_pow_of_two(cm_ird) : 0) {
case I40IW_HW_IRD_SETTING_64:
encoded_ird_size = 3;
break;
@@ -1188,6 +1183,26 @@ static void i40iw_handle_close_entry(struct i40iw_cm_node *cm_node, u32 rem_node
}
/**
+ * i40iw_build_timer_list - Add cm_nodes to timer list
+ * @timer_list: ptr to timer list
+ * @hte: ptr to accelerated or non-accelerated list
+ */
+static void i40iw_build_timer_list(struct list_head *timer_list,
+ struct list_head *hte)
+{
+ struct i40iw_cm_node *cm_node;
+ struct list_head *list_core_temp, *list_node;
+
+ list_for_each_safe(list_node, list_core_temp, hte) {
+ cm_node = container_of(list_node, struct i40iw_cm_node, list);
+ if (cm_node->close_entry || cm_node->send_entry) {
+ atomic_inc(&cm_node->ref_count);
+ list_add(&cm_node->timer_entry, timer_list);
+ }
+ }
+}
+
+/**
* i40iw_cm_timer_tick - system's timer expired callback
* @pass: Pointing to cm_core
*/
@@ -1202,21 +1217,15 @@ static void i40iw_cm_timer_tick(unsigned long pass)
struct i40iw_cm_core *cm_core = (struct i40iw_cm_core *)pass;
u32 settimer = 0;
unsigned long timetosend;
- struct i40iw_sc_dev *dev;
unsigned long flags;
struct list_head timer_list;
INIT_LIST_HEAD(&timer_list);
- spin_lock_irqsave(&cm_core->ht_lock, flags);
- list_for_each_safe(list_node, list_core_temp, &cm_core->connected_nodes) {
- cm_node = container_of(list_node, struct i40iw_cm_node, list);
- if (cm_node->close_entry || cm_node->send_entry) {
- atomic_inc(&cm_node->ref_count);
- list_add(&cm_node->timer_entry, &timer_list);
- }
- }
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ i40iw_build_timer_list(&timer_list, &cm_core->non_accelerated_list);
+ i40iw_build_timer_list(&timer_list, &cm_core->accelerated_list);
spin_unlock_irqrestore(&cm_core->ht_lock, flags);
list_for_each_safe(list_node, list_core_temp, &timer_list) {
@@ -1273,7 +1282,6 @@ static void i40iw_cm_timer_tick(unsigned long pass)
spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
vsi = &cm_node->iwdev->vsi;
- dev = cm_node->dev;
if (!cm_node->ack_rcvd) {
atomic_inc(&send_entry->sqbuf->refcount);
@@ -1413,19 +1421,22 @@ static int i40iw_send_fin(struct i40iw_cm_node *cm_node)
* @loc_port: local tcp port num
* @loc_addr: loc ip addr
* @add_refcnt: flag to increment refcount of cm_node
+ * @accelerated_list: flag for accelerated vs non-accelerated list to search
*/
struct i40iw_cm_node *i40iw_find_node(struct i40iw_cm_core *cm_core,
u16 rem_port,
u32 *rem_addr,
u16 loc_port,
u32 *loc_addr,
- bool add_refcnt)
+ bool add_refcnt,
+ bool accelerated_list)
{
struct list_head *hte;
struct i40iw_cm_node *cm_node;
unsigned long flags;
- hte = &cm_core->connected_nodes;
+ hte = accelerated_list ?
+ &cm_core->accelerated_list : &cm_core->non_accelerated_list;
/* walk list and find cm_node associated with this session ID */
spin_lock_irqsave(&cm_core->ht_lock, flags);
@@ -1494,22 +1505,40 @@ static struct i40iw_cm_listener *i40iw_find_listener(
static void i40iw_add_hte_node(struct i40iw_cm_core *cm_core,
struct i40iw_cm_node *cm_node)
{
- struct list_head *hte;
unsigned long flags;
if (!cm_node || !cm_core) {
i40iw_pr_err("cm_node or cm_core == NULL\n");
return;
}
- spin_lock_irqsave(&cm_core->ht_lock, flags);
- /* get a handle on the hash table element (list head for this slot) */
- hte = &cm_core->connected_nodes;
- list_add_tail(&cm_node->list, hte);
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ list_add_tail(&cm_node->list, &cm_core->non_accelerated_list);
spin_unlock_irqrestore(&cm_core->ht_lock, flags);
}
/**
+ * i40iw_find_port - find port that matches reference port
+ * @port: port number
+ * @accelerated_list: flag for accelerated vs non-accelerated list
+ */
+static bool i40iw_find_port(struct i40iw_cm_core *cm_core, u16 port,
+ bool accelerated_list)
+{
+ struct list_head *hte;
+ struct i40iw_cm_node *cm_node;
+
+ hte = accelerated_list ?
+ &cm_core->accelerated_list : &cm_core->non_accelerated_list;
+
+ list_for_each_entry(cm_node, hte, list) {
+ if (cm_node->loc_port == port)
+ return true;
+ }
+ return false;
+}
+
+/**
* i40iw_port_in_use - determine if port is in use
* @port: port number
* @active_side: flag for listener side vs active side
@@ -1517,21 +1546,16 @@ static void i40iw_add_hte_node(struct i40iw_cm_core *cm_core,
static bool i40iw_port_in_use(struct i40iw_cm_core *cm_core, u16 port, bool active_side)
{
struct i40iw_cm_listener *listen_node;
- struct i40iw_cm_node *cm_node;
unsigned long flags;
bool ret = false;
if (active_side) {
- /* search connected node list */
spin_lock_irqsave(&cm_core->ht_lock, flags);
- list_for_each_entry(cm_node, &cm_core->connected_nodes, list) {
- if (cm_node->loc_port == port) {
- ret = true;
- break;
- }
- }
- if (!ret)
- clear_bit(port, cm_core->active_side_ports);
+ ret = i40iw_find_port(cm_core, port, true);
+ if (!ret)
+ ret = i40iw_find_port(cm_core, port, false);
+ if (!ret)
+ clear_bit(port, cm_core->active_side_ports);
spin_unlock_irqrestore(&cm_core->ht_lock, flags);
} else {
spin_lock_irqsave(&cm_core->listen_list_lock, flags);
@@ -1836,9 +1860,11 @@ static int i40iw_dec_refcnt_listen(struct i40iw_cm_core *cm_core,
INIT_LIST_HEAD(&reset_list);
if (free_hanging_nodes) {
spin_lock_irqsave(&cm_core->ht_lock, flags);
- list_for_each_safe(list_pos, list_temp, &cm_core->connected_nodes) {
+ list_for_each_safe(list_pos,
+ list_temp, &cm_core->non_accelerated_list) {
cm_node = container_of(list_pos, struct i40iw_cm_node, list);
- if ((cm_node->listener == listener) && !cm_node->accelerated) {
+ if ((cm_node->listener == listener) &&
+ !cm_node->accelerated) {
atomic_inc(&cm_node->ref_count);
list_add(&cm_node->reset_entry, &reset_list);
}
@@ -2414,6 +2440,7 @@ static void i40iw_handle_rst_pkt(struct i40iw_cm_node *cm_node,
case I40IW_CM_STATE_FIN_WAIT1:
case I40IW_CM_STATE_LAST_ACK:
cm_node->cm_id->rem_ref(cm_node->cm_id);
+ /* fall through */
case I40IW_CM_STATE_TIME_WAIT:
cm_node->state = I40IW_CM_STATE_CLOSED;
i40iw_rem_ref_cm_node(cm_node);
@@ -3150,7 +3177,8 @@ void i40iw_receive_ilq(struct i40iw_sc_vsi *vsi, struct i40iw_puda_buf *rbuf)
cm_info.rem_addr,
cm_info.loc_port,
cm_info.loc_addr,
- true);
+ true,
+ false);
if (!cm_node) {
/* Only type of packet accepted are for */
@@ -3208,7 +3236,8 @@ void i40iw_setup_cm_core(struct i40iw_device *iwdev)
cm_core->iwdev = iwdev;
cm_core->dev = &iwdev->sc_dev;
- INIT_LIST_HEAD(&cm_core->connected_nodes);
+ INIT_LIST_HEAD(&cm_core->accelerated_list);
+ INIT_LIST_HEAD(&cm_core->non_accelerated_list);
INIT_LIST_HEAD(&cm_core->listen_nodes);
setup_timer(&cm_core->tcp_timer, i40iw_cm_timer_tick,
@@ -3592,6 +3621,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
struct i40iw_qp *iwqp;
struct i40iw_device *iwdev;
struct i40iw_sc_dev *dev;
+ struct i40iw_cm_core *cm_core;
struct i40iw_cm_node *cm_node;
struct ib_qp_attr attr;
int passive_state;
@@ -3601,6 +3631,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
struct i40iw_kmem_info accept;
enum i40iw_status_code status;
u64 tagged_offset;
+ unsigned long flags;
memset(&attr, 0, sizeof(attr));
ibqp = i40iw_get_qp(cm_id->device, conn_param->qpn);
@@ -3610,6 +3641,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
iwqp = to_iwqp(ibqp);
iwdev = iwqp->iwdev;
dev = &iwdev->sc_dev;
+ cm_core = &iwdev->cm_core;
cm_node = (struct i40iw_cm_node *)cm_id->provider_data;
if (((struct sockaddr_in *)&cm_id->local_addr)->sin_family == AF_INET) {
@@ -3703,7 +3735,11 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
cm_node->qhash_set = false;
i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
- cm_node->accelerated = 1;
+ cm_node->accelerated = true;
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ list_move_tail(&cm_node->list, &cm_core->accelerated_list);
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
status =
i40iw_send_cm_event(cm_node, cm_id, IW_CM_EVENT_ESTABLISHED, 0);
if (status)
@@ -4033,10 +4069,12 @@ static void i40iw_cm_event_connected(struct i40iw_cm_event *event)
{
struct i40iw_qp *iwqp;
struct i40iw_device *iwdev;
+ struct i40iw_cm_core *cm_core;
struct i40iw_cm_node *cm_node;
struct i40iw_sc_dev *dev;
struct ib_qp_attr attr;
struct iw_cm_id *cm_id;
+ unsigned long flags;
int status;
bool read0;
@@ -4045,6 +4083,7 @@ static void i40iw_cm_event_connected(struct i40iw_cm_event *event)
iwqp = (struct i40iw_qp *)cm_id->provider_data;
iwdev = to_iwdev(iwqp->ibqp.device);
dev = &iwdev->sc_dev;
+ cm_core = &iwdev->cm_core;
if (iwqp->destroyed) {
status = -ETIMEDOUT;
@@ -4063,7 +4102,10 @@ static void i40iw_cm_event_connected(struct i40iw_cm_event *event)
cm_node->qhash_set = false;
i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
- cm_node->accelerated = 1;
+ cm_node->accelerated = true;
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ list_move_tail(&cm_node->list, &cm_core->accelerated_list);
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
status = i40iw_send_cm_event(cm_node, cm_id, IW_CM_EVENT_CONNECT_REPLY,
0);
if (status)
@@ -4263,25 +4305,38 @@ void i40iw_cm_teardown_connections(struct i40iw_device *iwdev, u32 *ipaddr,
struct list_head *list_node;
struct i40iw_cm_node *cm_node;
unsigned long flags;
- struct list_head connected_list;
+ struct list_head teardown_list;
struct ib_qp_attr attr;
- INIT_LIST_HEAD(&connected_list);
+ INIT_LIST_HEAD(&teardown_list);
spin_lock_irqsave(&cm_core->ht_lock, flags);
- list_for_each_safe(list_node, list_core_temp, &cm_core->connected_nodes) {
+ list_for_each_safe(list_node, list_core_temp,
+ &cm_core->accelerated_list) {
+ cm_node = container_of(list_node, struct i40iw_cm_node, list);
+ if (disconnect_all ||
+ (nfo->vlan_id == cm_node->vlan_id &&
+ (!memcmp(cm_node->loc_addr, ipaddr, nfo->ipv4 ? 4 : 16) ||
+ !memcmp(cm_node->rem_addr, ipaddr, nfo->ipv4 ? 4 : 16)))) {
+ atomic_inc(&cm_node->ref_count);
+ list_add(&cm_node->teardown_entry, &teardown_list);
+ }
+ }
+ list_for_each_safe(list_node, list_core_temp,
+ &cm_core->non_accelerated_list) {
cm_node = container_of(list_node, struct i40iw_cm_node, list);
if (disconnect_all ||
(nfo->vlan_id == cm_node->vlan_id &&
(!memcmp(cm_node->loc_addr, ipaddr, nfo->ipv4 ? 4 : 16) ||
!memcmp(cm_node->rem_addr, ipaddr, nfo->ipv4 ? 4 : 16)))) {
atomic_inc(&cm_node->ref_count);
- list_add(&cm_node->connected_entry, &connected_list);
+ list_add(&cm_node->teardown_entry, &teardown_list);
}
}
spin_unlock_irqrestore(&cm_core->ht_lock, flags);
- list_for_each_safe(list_node, list_core_temp, &connected_list) {
- cm_node = container_of(list_node, struct i40iw_cm_node, connected_entry);
+ list_for_each_safe(list_node, list_core_temp, &teardown_list) {
+ cm_node = container_of(list_node, struct i40iw_cm_node,
+ teardown_entry);
attr.qp_state = IB_QPS_ERR;
i40iw_modify_qp(&cm_node->iwqp->ibqp, &attr, IB_QP_STATE, NULL);
if (iwdev->reset)
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.h b/drivers/infiniband/hw/i40iw/i40iw_cm.h
index 38e6837dedeb..78ba36ae2bbe 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.h
@@ -276,8 +276,6 @@ struct i40iw_cm_tcp_context {
u32 mss;
u8 snd_wscale;
u8 rcv_wscale;
-
- struct timeval sent_ts;
};
enum i40iw_cm_listener_state {
@@ -337,13 +335,13 @@ struct i40iw_cm_node {
u16 mpav2_ird_ord;
struct iw_cm_id *cm_id;
struct list_head list;
- int accelerated;
+ bool accelerated;
struct i40iw_cm_listener *listener;
int apbvt_set;
int accept_pend;
struct list_head timer_entry;
struct list_head reset_entry;
- struct list_head connected_entry;
+ struct list_head teardown_entry;
atomic_t passive_state;
bool qhash_set;
u8 user_pri;
@@ -405,7 +403,8 @@ struct i40iw_cm_core {
struct i40iw_sc_dev *dev;
struct list_head listen_nodes;
- struct list_head connected_nodes;
+ struct list_head accelerated_list;
+ struct list_head non_accelerated_list;
struct timer_list tcp_timer;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
index f997c2598720..a4231623e830 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
@@ -2465,7 +2465,6 @@ static enum i40iw_status_code i40iw_sc_qp_create(
LS_64(qp->qp_type, I40IW_CQPSQ_QP_QPTYPE) |
LS_64(qp->virtual_map, I40IW_CQPSQ_QP_VQ) |
LS_64(info->cq_num_valid, I40IW_CQPSQ_QP_CQNUMVALID) |
- LS_64(info->static_rsrc, I40IW_CQPSQ_QP_STATRSRC) |
LS_64(info->arp_cache_idx_valid, I40IW_CQPSQ_QP_ARPTABIDXVALID) |
LS_64(info->next_iwarp_state, I40IW_CQPSQ_QP_NEXTIWSTATE) |
LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
@@ -2528,7 +2527,6 @@ static enum i40iw_status_code i40iw_sc_qp_modify(
LS_64(info->cq_num_valid, I40IW_CQPSQ_QP_CQNUMVALID) |
LS_64(info->force_loopback, I40IW_CQPSQ_QP_FORCELOOPBACK) |
LS_64(qp->qp_type, I40IW_CQPSQ_QP_QPTYPE) |
- LS_64(info->static_rsrc, I40IW_CQPSQ_QP_STATRSRC) |
LS_64(info->remove_hash_idx, I40IW_CQPSQ_QP_REMOVEHASHENTRY) |
LS_64(term_actions, I40IW_CQPSQ_QP_TERMACT) |
LS_64(info->reset_tcp_conn, I40IW_CQPSQ_QP_RESETCON) |
@@ -3913,7 +3911,6 @@ enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_
struct i40iw_virt_mem virt_mem;
u32 i, mem_size;
u32 qpwantedoriginal, qpwanted, mrwanted, pblewanted;
- u32 powerof2;
u64 sd_needed;
u32 loop_count = 0;
@@ -3988,16 +3985,10 @@ enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_
if ((loop_count > 1000) ||
((!(loop_count % 10)) &&
(qpwanted > qpwantedoriginal * 2 / 3))) {
- if (qpwanted > FPM_MULTIPLIER) {
- qpwanted -= FPM_MULTIPLIER;
- powerof2 = 1;
- while (powerof2 < qpwanted)
- powerof2 *= 2;
- powerof2 /= 2;
- qpwanted = powerof2;
- } else {
- qpwanted /= 2;
- }
+ if (qpwanted > FPM_MULTIPLIER)
+ qpwanted = roundup_pow_of_two(qpwanted -
+ FPM_MULTIPLIER);
+ qpwanted >>= 1;
}
if (mrwanted > FPM_MULTIPLIER * 10)
mrwanted -= FPM_MULTIPLIER * 10;
@@ -4005,8 +3996,6 @@ enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_
pblewanted -= FPM_MULTIPLIER * 1000;
} while (sd_needed > hmc_fpm_misc->max_sds && loop_count < 2000);
- sd_needed = i40iw_est_sd(dev, hmc_info);
-
i40iw_debug(dev, I40IW_DEBUG_HMC,
"loop_cnt=%d, sd_needed=%lld, qpcnt = %d, cqcnt=%d, mrcnt=%d, pblecnt=%d\n",
loop_count, sd_needed,
diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h
index 75e0a5b90dda..6ddaeec87d2f 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_d.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_d.h
@@ -547,9 +547,6 @@
#define I40IW_CQPSQ_QP_MSSCHANGE_SHIFT 52
#define I40IW_CQPSQ_QP_MSSCHANGE_MASK (1ULL << I40IW_CQPSQ_QP_MSSCHANGE_SHIFT)
-#define I40IW_CQPSQ_QP_STATRSRC_SHIFT 53
-#define I40IW_CQPSQ_QP_STATRSRC_MASK (1ULL << I40IW_CQPSQ_QP_STATRSRC_SHIFT)
-
#define I40IW_CQPSQ_QP_IGNOREMWBOUND_SHIFT 54
#define I40IW_CQPSQ_QP_IGNOREMWBOUND_MASK \
(1ULL << I40IW_CQPSQ_QP_IGNOREMWBOUND_SHIFT)
@@ -1307,8 +1304,13 @@
(0xffffffffULL << I40IWQPC_LOCAL_IPADDR0_SHIFT)
/* wqe size considering 32 bytes per wqe*/
-#define I40IWQP_SW_MIN_WQSIZE 4 /* 128 bytes */
-#define I40IWQP_SW_MAX_WQSIZE 2048 /* 2048 bytes */
+#define I40IW_QP_SW_MIN_WQSIZE 4 /*in WRs*/
+#define I40IW_SQ_RSVD 2
+#define I40IW_RQ_RSVD 1
+#define I40IW_MAX_QUANTAS_PER_WR 2
+#define I40IW_QP_SW_MAX_SQ_QUANTAS 2048
+#define I40IW_QP_SW_MAX_RQ_QUANTAS 16384
+#define I40IW_MAX_QP_WRS ((I40IW_QP_SW_MAX_SQ_QUANTAS / I40IW_MAX_QUANTAS_PER_WR) - 1)
#define I40IWQP_OP_RDMA_WRITE 0
#define I40IWQP_OP_RDMA_READ 1
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c
index 9e4e508adf61..c9f62ca7643c 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_hw.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c
@@ -387,6 +387,8 @@ void i40iw_process_aeq(struct i40iw_device *iwdev)
iwcq->ibcq.event_handler(&ibevent, iwcq->ibcq.cq_context);
}
break;
+ case I40IW_AE_LLP_DOUBT_REACHABILITY:
+ break;
case I40IW_AE_PRIV_OPERATION_DENIED:
case I40IW_AE_STAG_ZERO_INVALID:
case I40IW_AE_IB_RREQ_AND_Q1_FULL:
@@ -405,13 +407,13 @@ void i40iw_process_aeq(struct i40iw_device *iwdev)
case I40IW_AE_LLP_SEGMENT_TOO_SMALL:
case I40IW_AE_LLP_SYN_RECEIVED:
case I40IW_AE_LLP_TOO_MANY_RETRIES:
- case I40IW_AE_LLP_DOUBT_REACHABILITY:
case I40IW_AE_LCE_QP_CATASTROPHIC:
case I40IW_AE_LCE_FUNCTION_CATASTROPHIC:
case I40IW_AE_LCE_CQ_CATASTROPHIC:
case I40IW_AE_UDA_XMIT_DGRAM_TOO_LONG:
case I40IW_AE_UDA_XMIT_DGRAM_TOO_SHORT:
ctx_info->err_rq_idx_valid = false;
+ /* fall through */
default:
if (!info->sq && ctx_info->err_rq_idx_valid) {
ctx_info->err_rq_idx = info->wqe_idx;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index a222ac219974..05001e6da1f8 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -99,6 +99,10 @@ static struct notifier_block i40iw_net_notifier = {
.notifier_call = i40iw_net_event
};
+static struct notifier_block i40iw_netdevice_notifier = {
+ .notifier_call = i40iw_netdevice_event
+};
+
/**
* i40iw_find_i40e_handler - find a handler given a client info
* @ldev: pointer to a client info
@@ -353,6 +357,8 @@ static void i40iw_dele_ceqs(struct i40iw_device *iwdev)
i40iw_disable_irq(dev, msix_vec, (void *)iwceq);
i40iw_destroy_ceq(iwdev, iwceq);
}
+
+ iwdev->sc_dev.ceq_valid = false;
}
/**
@@ -481,6 +487,7 @@ static enum i40iw_status_code i40iw_create_hmc_objs(struct i40iw_device *iwdev,
for (i = 0; i < IW_HMC_OBJ_TYPE_NUM; i++) {
info.rsrc_type = iw_hmc_obj_types[i];
info.count = dev->hmc_info->hmc_obj[info.rsrc_type].cnt;
+ info.add_sd_cnt = 0;
status = i40iw_create_hmc_obj_type(dev, &info);
if (status) {
i40iw_pr_err("create obj type %d status = %d\n",
@@ -605,7 +612,7 @@ static enum i40iw_status_code i40iw_create_cqp(struct i40iw_device *iwdev)
INIT_LIST_HEAD(&cqp->cqp_avail_reqs);
INIT_LIST_HEAD(&cqp->cqp_pending_reqs);
/* init the waitq of the cqp_requests and add them to the list */
- for (i = 0; i < I40IW_CQP_SW_SQSIZE_2048; i++) {
+ for (i = 0; i < sqsize; i++) {
init_waitqueue_head(&cqp->cqp_requests[i].waitq);
list_add_tail(&cqp->cqp_requests[i].list, &cqp->cqp_avail_reqs);
}
@@ -809,17 +816,16 @@ static enum i40iw_status_code i40iw_setup_ceqs(struct i40iw_device *iwdev,
i40iw_enable_intr(&iwdev->sc_dev, msix_vec->idx);
iwdev->ceqs_count++;
}
-
exit:
- if (status) {
- if (!iwdev->ceqs_count) {
- kfree(iwdev->ceqlist);
- iwdev->ceqlist = NULL;
- } else {
- status = 0;
- }
+ if (status && !iwdev->ceqs_count) {
+ kfree(iwdev->ceqlist);
+ iwdev->ceqlist = NULL;
+ return status;
+ } else {
+ iwdev->sc_dev.ceq_valid = true;
+ return 0;
}
- return status;
+
}
/**
@@ -1283,7 +1289,7 @@ static void i40iw_wait_pe_ready(struct i40iw_hw *hw)
__LINE__, statuscpu2);
if ((statuscpu0 == 0x80) && (statuscpu1 == 0x80) && (statuscpu2 == 0x80))
break; /* SUCCESS */
- mdelay(1000);
+ msleep(1000);
retrycount++;
} while (retrycount < 14);
i40iw_wr32(hw, 0xb4040, 0x4C104C5);
@@ -1391,6 +1397,7 @@ static void i40iw_register_notifiers(void)
register_inetaddr_notifier(&i40iw_inetaddr_notifier);
register_inet6addr_notifier(&i40iw_inetaddr6_notifier);
register_netevent_notifier(&i40iw_net_notifier);
+ register_netdevice_notifier(&i40iw_netdevice_notifier);
}
/**
@@ -1402,6 +1409,7 @@ static void i40iw_unregister_notifiers(void)
unregister_netevent_notifier(&i40iw_net_notifier);
unregister_inetaddr_notifier(&i40iw_inetaddr_notifier);
unregister_inet6addr_notifier(&i40iw_inetaddr6_notifier);
+ unregister_netdevice_notifier(&i40iw_netdevice_notifier);
}
/**
@@ -1551,8 +1559,6 @@ static enum i40iw_status_code i40iw_setup_init_state(struct i40iw_handler *hdl,
enum i40iw_status_code status;
memcpy(&hdl->ldev, ldev, sizeof(*ldev));
- if (resource_profile == 1)
- resource_profile = 2;
iwdev->mpa_version = mpa_version;
iwdev->resource_profile = (resource_profile < I40IW_HMC_PROFILE_EQUAL) ?
diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c
index 2070f5901423..d9c7ae6a7030 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_puda.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c
@@ -348,8 +348,8 @@ enum i40iw_status_code i40iw_puda_poll_completion(struct i40iw_sc_dev *dev,
spin_lock_irqsave(&rsrc->bufpool_lock, flags);
rsrc->tx_wqe_avail_cnt++;
spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
- if (!list_empty(&rsrc->vsi->ilq->txpend))
- i40iw_puda_send_buf(rsrc->vsi->ilq, NULL);
+ if (!list_empty(&rsrc->txpend))
+ i40iw_puda_send_buf(rsrc, NULL);
}
done:
@@ -610,7 +610,7 @@ static enum i40iw_status_code i40iw_puda_qp_create(struct i40iw_puda_rsrc *rsrc)
qp->user_pri = 0;
i40iw_qp_add_qos(qp);
i40iw_puda_qp_setctx(rsrc);
- if (rsrc->ceq_valid)
+ if (rsrc->dev->ceq_valid)
ret = i40iw_cqp_qp_create_cmd(rsrc->dev, qp);
else
ret = i40iw_puda_qp_wqe(rsrc->dev, qp);
@@ -705,7 +705,7 @@ static enum i40iw_status_code i40iw_puda_cq_create(struct i40iw_puda_rsrc *rsrc)
ret = dev->iw_priv_cq_ops->cq_init(cq, &info);
if (ret)
goto error;
- if (rsrc->ceq_valid)
+ if (rsrc->dev->ceq_valid)
ret = i40iw_cqp_cq_create_cmd(dev, cq);
else
ret = i40iw_puda_cq_wqe(dev, cq);
@@ -725,7 +725,7 @@ static void i40iw_puda_free_qp(struct i40iw_puda_rsrc *rsrc)
struct i40iw_ccq_cqe_info compl_info;
struct i40iw_sc_dev *dev = rsrc->dev;
- if (rsrc->ceq_valid) {
+ if (rsrc->dev->ceq_valid) {
i40iw_cqp_qp_destroy_cmd(dev, &rsrc->qp);
return;
}
@@ -758,7 +758,7 @@ static void i40iw_puda_free_cq(struct i40iw_puda_rsrc *rsrc)
struct i40iw_ccq_cqe_info compl_info;
struct i40iw_sc_dev *dev = rsrc->dev;
- if (rsrc->ceq_valid) {
+ if (rsrc->dev->ceq_valid) {
i40iw_cqp_cq_destroy_cmd(dev, &rsrc->cq);
return;
}
@@ -814,6 +814,7 @@ void i40iw_puda_dele_resources(struct i40iw_sc_vsi *vsi,
switch (rsrc->completion) {
case PUDA_HASH_CRC_COMPLETE:
i40iw_free_hash_desc(rsrc->hash_desc);
+ /* fall through */
case PUDA_QP_CREATED:
if (!reset)
i40iw_puda_free_qp(rsrc);
@@ -922,7 +923,6 @@ enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_vsi *vsi,
rsrc->xmit_complete = i40iw_ieq_tx_compl;
}
- rsrc->ceq_valid = info->ceq_valid;
rsrc->type = info->type;
rsrc->sq_wrtrk_array = (struct i40iw_sq_uk_wr_trk_info *)((u8 *)vmem->va + pudasize);
rsrc->rq_wrid_array = (u64 *)((u8 *)vmem->va + pudasize + sqwridsize);
@@ -1471,10 +1471,6 @@ static void i40iw_ieq_tx_compl(struct i40iw_sc_vsi *vsi, void *sqwrid)
struct i40iw_puda_buf *buf = (struct i40iw_puda_buf *)sqwrid;
i40iw_puda_ret_bufpool(ieq, buf);
- if (!list_empty(&ieq->txpend)) {
- buf = i40iw_puda_get_listbuf(&ieq->txpend);
- i40iw_puda_send_buf(ieq, buf);
- }
}
/**
diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.h b/drivers/infiniband/hw/i40iw/i40iw_puda.h
index ebe37f157d90..53a7d58c84b5 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_puda.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_puda.h
@@ -100,7 +100,6 @@ struct i40iw_puda_rsrc_info {
enum puda_resource_type type; /* ILQ or IEQ */
u32 count;
u16 pd_id;
- bool ceq_valid;
u32 cq_id;
u32 qp_id;
u32 sq_size;
@@ -125,7 +124,6 @@ struct i40iw_puda_rsrc {
enum puda_resource_type type;
u16 buf_size; /*buffer must be max datalen + tcpip hdr + mac */
u16 mss;
- bool ceq_valid;
u32 cq_id;
u32 qp_id;
u32 sq_size;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h
index 3f76e1069da3..d390d55a1197 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_type.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_type.h
@@ -504,6 +504,7 @@ struct i40iw_sc_dev {
u8 hmc_fn_id;
bool is_pf;
bool vchnl_up;
+ bool ceq_valid;
u8 vf_id;
wait_queue_head_t vf_reqs;
u64 cqp_cmd_stats[OP_SIZE_CQP_STAT_ARRAY];
@@ -533,7 +534,6 @@ struct i40iw_create_qp_info {
bool ord_valid;
bool tcp_ctx_valid;
bool cq_num_valid;
- bool static_rsrc;
bool arp_cache_idx_valid;
};
@@ -545,7 +545,6 @@ struct i40iw_modify_qp_info {
bool ord_valid;
bool tcp_ctx_valid;
bool cq_num_valid;
- bool static_rsrc;
bool arp_cache_idx_valid;
bool reset_tcp_conn;
bool remove_hash_idx;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c
index 42dde5924bb5..8afa5a67a86b 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_uk.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c
@@ -895,7 +895,6 @@ exit:
/**
* i40iw_get_wqe_shift - get shift count for maximum wqe size
- * @wqdepth: depth of wq required.
* @sge: Maximum Scatter Gather Elements wqe
* @inline_data: Maximum inline data size
* @shift: Returns the shift needed based on sge
@@ -905,22 +904,48 @@ exit:
* For 2 or 3 SGEs or inline data <= 48, shift = 1 (wqe size of 64 bytes).
* Shift of 2 otherwise (wqe size of 128 bytes).
*/
-enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u32 sge, u32 inline_data, u8 *shift)
+void i40iw_get_wqe_shift(u32 sge, u32 inline_data, u8 *shift)
{
- u32 size;
-
*shift = 0;
if (sge > 1 || inline_data > 16)
*shift = (sge < 4 && inline_data <= 48) ? 1 : 2;
+}
- /* check if wqdepth is multiple of 2 or not */
+/*
+ * i40iw_get_sqdepth - get SQ depth (quantas)
+ * @sq_size: SQ size
+ * @shift: shift which determines size of WQE
+ * @sqdepth: depth of SQ
+ *
+ */
+enum i40iw_status_code i40iw_get_sqdepth(u32 sq_size, u8 shift, u32 *sqdepth)
+{
+ *sqdepth = roundup_pow_of_two((sq_size << shift) + I40IW_SQ_RSVD);
- if ((wqdepth < I40IWQP_SW_MIN_WQSIZE) || (wqdepth & (wqdepth - 1)))
+ if (*sqdepth < (I40IW_QP_SW_MIN_WQSIZE << shift))
+ *sqdepth = I40IW_QP_SW_MIN_WQSIZE << shift;
+ else if (*sqdepth > I40IW_QP_SW_MAX_SQ_QUANTAS)
return I40IW_ERR_INVALID_SIZE;
- size = wqdepth << *shift; /* multiple of 32 bytes count */
- if (size > I40IWQP_SW_MAX_WQSIZE)
+ return 0;
+}
+
+/*
+ * i40iw_get_rq_depth - get RQ depth (quantas)
+ * @rq_size: RQ size
+ * @shift: shift which determines size of WQE
+ * @rqdepth: depth of RQ
+ *
+ */
+enum i40iw_status_code i40iw_get_rqdepth(u32 rq_size, u8 shift, u32 *rqdepth)
+{
+ *rqdepth = roundup_pow_of_two((rq_size << shift) + I40IW_RQ_RSVD);
+
+ if (*rqdepth < (I40IW_QP_SW_MIN_WQSIZE << shift))
+ *rqdepth = I40IW_QP_SW_MIN_WQSIZE << shift;
+ else if (*rqdepth > I40IW_QP_SW_MAX_RQ_QUANTAS)
return I40IW_ERR_INVALID_SIZE;
+
return 0;
}
@@ -974,9 +999,7 @@ enum i40iw_status_code i40iw_qp_uk_init(struct i40iw_qp_uk *qp,
if (info->max_rq_frag_cnt > I40IW_MAX_WQ_FRAGMENT_COUNT)
return I40IW_ERR_INVALID_FRAG_COUNT;
- ret_code = i40iw_get_wqe_shift(info->sq_size, info->max_sq_frag_cnt, info->max_inline_data, &sqshift);
- if (ret_code)
- return ret_code;
+ i40iw_get_wqe_shift(info->max_sq_frag_cnt, info->max_inline_data, &sqshift);
qp->sq_base = info->sq;
qp->rq_base = info->rq;
@@ -1010,9 +1033,7 @@ enum i40iw_status_code i40iw_qp_uk_init(struct i40iw_qp_uk *qp,
I40IW_RING_INIT(qp->rq_ring, qp->rq_size);
switch (info->abi_ver) {
case 4:
- ret_code = i40iw_get_wqe_shift(info->rq_size, info->max_rq_frag_cnt, 0, &rqshift);
- if (ret_code)
- return ret_code;
+ i40iw_get_wqe_shift(info->max_rq_frag_cnt, 0, &rqshift);
break;
case 5: /* fallthrough until next ABI version */
default:
diff --git a/drivers/infiniband/hw/i40iw/i40iw_user.h b/drivers/infiniband/hw/i40iw/i40iw_user.h
index 531dc49a0eba..b125925641e0 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_user.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_user.h
@@ -71,7 +71,7 @@ enum i40iw_device_capabilities_const {
I40IW_MAX_SQ_PAYLOAD_SIZE = 2145386496,
I40IW_MAX_INLINE_DATA_SIZE = 48,
I40IW_MAX_PUSHMODE_INLINE_DATA_SIZE = 48,
- I40IW_MAX_IRD_SIZE = 63,
+ I40IW_MAX_IRD_SIZE = 64,
I40IW_MAX_ORD_SIZE = 127,
I40IW_MAX_WQ_ENTRIES = 2048,
I40IW_Q2_BUFFER_SIZE = (248 + 100),
@@ -203,18 +203,6 @@ struct i40iw_post_inline_send {
u32 len;
};
-struct i40iw_post_send_w_inv {
- i40iw_sgl sg_list;
- u32 num_sges;
- i40iw_stag remote_stag_to_inv;
-};
-
-struct i40iw_post_inline_send_w_inv {
- void *data;
- u32 len;
- i40iw_stag remote_stag_to_inv;
-};
-
struct i40iw_rdma_write {
i40iw_sgl lo_sg_list;
u32 num_lo_sges;
@@ -256,9 +244,6 @@ struct i40iw_post_sq_info {
bool defer_flag;
union {
struct i40iw_post_send send;
- struct i40iw_post_send send_w_sol;
- struct i40iw_post_send_w_inv send_w_inv;
- struct i40iw_post_send_w_inv send_w_sol_inv;
struct i40iw_rdma_write rdma_write;
struct i40iw_rdma_read rdma_read;
struct i40iw_rdma_read rdma_read_inv;
@@ -266,9 +251,6 @@ struct i40iw_post_sq_info {
struct i40iw_inv_local_stag inv_local_stag;
struct i40iw_inline_rdma_write inline_rdma_write;
struct i40iw_post_inline_send inline_send;
- struct i40iw_post_inline_send inline_send_w_sol;
- struct i40iw_post_inline_send_w_inv inline_send_w_inv;
- struct i40iw_post_inline_send_w_inv inline_send_w_sol_inv;
} op;
};
@@ -442,5 +424,7 @@ enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u32 frag_cnt, u8 *wqe_size);
enum i40iw_status_code i40iw_fragcnt_to_wqesize_rq(u32 frag_cnt, u8 *wqe_size);
enum i40iw_status_code i40iw_inline_data_size_to_wqesize(u32 data_size,
u8 *wqe_size);
-enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u32 sge, u32 inline_data, u8 *shift);
+void i40iw_get_wqe_shift(u32 sge, u32 inline_data, u8 *shift);
+enum i40iw_status_code i40iw_get_sqdepth(u32 sq_size, u8 shift, u32 *sqdepth);
+enum i40iw_status_code i40iw_get_rqdepth(u32 rq_size, u8 shift, u32 *rqdepth);
#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c
index d3d2d2a2535b..d7846038caf0 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_utils.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c
@@ -137,7 +137,7 @@ inline u32 i40iw_rd32(struct i40iw_hw *hw, u32 reg)
}
/**
- * i40iw_inetaddr_event - system notifier for netdev events
+ * i40iw_inetaddr_event - system notifier for ipv4 addr events
* @notfier: not used
* @event: event for notifier
* @ptr: if address
@@ -168,11 +168,16 @@ int i40iw_inetaddr_event(struct notifier_block *notifier,
if (netdev != event_netdev)
return NOTIFY_DONE;
- if (upper_dev)
- local_ipaddr = ntohl(
- ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address);
- else
+ if (upper_dev) {
+ struct in_device *in;
+
+ rcu_read_lock();
+ in = __in_dev_get_rcu(upper_dev);
+ local_ipaddr = ntohl(in->ifa_list->ifa_address);
+ rcu_read_unlock();
+ } else {
local_ipaddr = ntohl(ifa->ifa_address);
+ }
switch (event) {
case NETDEV_DOWN:
action = I40IW_ARP_DELETE;
@@ -195,7 +200,7 @@ int i40iw_inetaddr_event(struct notifier_block *notifier,
}
/**
- * i40iw_inet6addr_event - system notifier for ipv6 netdev events
+ * i40iw_inet6addr_event - system notifier for ipv6 addr events
* @notfier: not used
* @event: event for notifier
* @ptr: if address
@@ -247,7 +252,7 @@ int i40iw_inet6addr_event(struct notifier_block *notifier,
}
/**
- * i40iw_net_event - system notifier for net events
+ * i40iw_net_event - system notifier for netevents
* @notfier: not used
* @event: event for notifier
* @ptr: neighbor
@@ -292,6 +297,50 @@ int i40iw_net_event(struct notifier_block *notifier, unsigned long event, void *
}
/**
+ * i40iw_netdevice_event - system notifier for netdev events
+ * @notfier: not used
+ * @event: event for notifier
+ * @ptr: netdev
+ */
+int i40iw_netdevice_event(struct notifier_block *notifier,
+ unsigned long event,
+ void *ptr)
+{
+ struct net_device *event_netdev;
+ struct net_device *netdev;
+ struct i40iw_device *iwdev;
+ struct i40iw_handler *hdl;
+
+ event_netdev = netdev_notifier_info_to_dev(ptr);
+
+ hdl = i40iw_find_netdev(event_netdev);
+ if (!hdl)
+ return NOTIFY_DONE;
+
+ iwdev = &hdl->device;
+ if (iwdev->init_state < RDMA_DEV_REGISTERED || iwdev->closing)
+ return NOTIFY_DONE;
+
+ netdev = iwdev->ldev->netdev;
+ if (netdev != event_netdev)
+ return NOTIFY_DONE;
+
+ iwdev->iw_status = 1;
+
+ switch (event) {
+ case NETDEV_DOWN:
+ iwdev->iw_status = 0;
+ /* Fall through */
+ case NETDEV_UP:
+ i40iw_port_ibevent(iwdev);
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+/**
* i40iw_get_cqp_request - get cqp struct
* @cqp: device cqp ptr
* @wait: cqp to be used in wait mode
@@ -1357,7 +1406,7 @@ struct i40iw_sc_qp *i40iw_ieq_get_qp(struct i40iw_sc_dev *dev,
rem_port = ntohs(tcph->source);
cm_node = i40iw_find_node(&iwdev->cm_core, rem_port, rem_addr, loc_port,
- loc_addr, false);
+ loc_addr, false, true);
if (!cm_node)
return NULL;
iwqp = cm_node->iwqp;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index bfba83977fe0..937899fea01d 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -38,6 +38,7 @@
#include <linux/highmem.h>
#include <linux/time.h>
#include <linux/hugetlb.h>
+#include <linux/irq.h>
#include <asm/byteorder.h>
#include <net/ip.h>
#include <rdma/ib_verbs.h>
@@ -69,7 +70,7 @@ static int i40iw_query_device(struct ib_device *ibdev,
props->hw_ver = (u32)iwdev->sc_dev.hw_rev;
props->max_mr_size = I40IW_MAX_OUTBOUND_MESSAGE_SIZE;
props->max_qp = iwdev->max_qp - iwdev->used_qps;
- props->max_qp_wr = (I40IW_MAX_WQ_ENTRIES >> 2) - 1;
+ props->max_qp_wr = I40IW_MAX_QP_WRS;
props->max_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
props->max_cq = iwdev->max_cq - iwdev->used_cqs;
props->max_cqe = iwdev->max_cqe;
@@ -381,22 +382,6 @@ static int i40iw_dealloc_pd(struct ib_pd *ibpd)
}
/**
- * i40iw_qp_roundup - return round up qp ring size
- * @wr_ring_size: ring size to round up
- */
-static int i40iw_qp_roundup(u32 wr_ring_size)
-{
- int scount = 1;
-
- if (wr_ring_size < I40IWQP_SW_MIN_WQSIZE)
- wr_ring_size = I40IWQP_SW_MIN_WQSIZE;
-
- for (wr_ring_size--; scount <= 16; scount *= 2)
- wr_ring_size |= wr_ring_size >> scount;
- return ++wr_ring_size;
-}
-
-/**
* i40iw_get_pbl - Retrieve pbl from a list given a virtual
* address
* @va: user virtual address
@@ -517,21 +502,19 @@ static int i40iw_setup_kmode_qp(struct i40iw_device *iwdev,
{
struct i40iw_dma_mem *mem = &iwqp->kqp.dma_mem;
u32 sqdepth, rqdepth;
- u32 sq_size, rq_size;
u8 sqshift;
u32 size;
enum i40iw_status_code status;
struct i40iw_qp_uk_init_info *ukinfo = &info->qp_uk_init_info;
- sq_size = i40iw_qp_roundup(ukinfo->sq_size + 1);
- rq_size = i40iw_qp_roundup(ukinfo->rq_size + 1);
-
- status = i40iw_get_wqe_shift(sq_size, ukinfo->max_sq_frag_cnt, ukinfo->max_inline_data, &sqshift);
+ i40iw_get_wqe_shift(ukinfo->max_sq_frag_cnt, ukinfo->max_inline_data, &sqshift);
+ status = i40iw_get_sqdepth(ukinfo->sq_size, sqshift, &sqdepth);
if (status)
return -ENOMEM;
- sqdepth = sq_size << sqshift;
- rqdepth = rq_size << I40IW_MAX_RQ_WQE_SHIFT;
+ status = i40iw_get_rqdepth(ukinfo->rq_size, I40IW_MAX_RQ_WQE_SHIFT, &rqdepth);
+ if (status)
+ return -ENOMEM;
size = sqdepth * sizeof(struct i40iw_sq_uk_wr_trk_info) + (rqdepth << 3);
iwqp->kqp.wrid_mem = kzalloc(size, GFP_KERNEL);
@@ -561,8 +544,8 @@ static int i40iw_setup_kmode_qp(struct i40iw_device *iwdev,
ukinfo->shadow_area = ukinfo->rq[rqdepth].elem;
info->shadow_area_pa = info->rq_pa + (rqdepth * I40IW_QP_WQE_MIN_SIZE);
- ukinfo->sq_size = sq_size;
- ukinfo->rq_size = rq_size;
+ ukinfo->sq_size = sqdepth >> sqshift;
+ ukinfo->rq_size = rqdepth >> I40IW_MAX_RQ_WQE_SHIFT;
ukinfo->qp_id = iwqp->ibqp.qp_num;
return 0;
}
@@ -2288,14 +2271,12 @@ static int i40iw_post_send(struct ib_qp *ibqp,
info.op.inline_rdma_write.len = ib_wr->sg_list[0].length;
info.op.inline_rdma_write.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr;
info.op.inline_rdma_write.rem_addr.stag = rdma_wr(ib_wr)->rkey;
- info.op.inline_rdma_write.rem_addr.len = ib_wr->sg_list->length;
ret = ukqp->ops.iw_inline_rdma_write(ukqp, &info, false);
} else {
info.op.rdma_write.lo_sg_list = (void *)ib_wr->sg_list;
info.op.rdma_write.num_lo_sges = ib_wr->num_sge;
info.op.rdma_write.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr;
info.op.rdma_write.rem_addr.stag = rdma_wr(ib_wr)->rkey;
- info.op.rdma_write.rem_addr.len = ib_wr->sg_list->length;
ret = ukqp->ops.iw_rdma_write(ukqp, &info, false);
}
@@ -2317,7 +2298,6 @@ static int i40iw_post_send(struct ib_qp *ibqp,
info.op_type = I40IW_OP_TYPE_RDMA_READ;
info.op.rdma_read.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr;
info.op.rdma_read.rem_addr.stag = rdma_wr(ib_wr)->rkey;
- info.op.rdma_read.rem_addr.len = ib_wr->sg_list->length;
info.op.rdma_read.lo_addr.tag_off = ib_wr->sg_list->addr;
info.op.rdma_read.lo_addr.stag = ib_wr->sg_list->lkey;
info.op.rdma_read.lo_addr.len = ib_wr->sg_list->length;
@@ -2776,6 +2756,25 @@ static int i40iw_destroy_ah(struct ib_ah *ah)
}
/**
+ * i40iw_get_vector_affinity - report IRQ affinity mask
+ * @ibdev: IB device
+ * @comp_vector: completion vector index
+ */
+static const struct cpumask *i40iw_get_vector_affinity(struct ib_device *ibdev,
+ int comp_vector)
+{
+ struct i40iw_device *iwdev = to_iwdev(ibdev);
+ struct i40iw_msix_vector *msix_vec;
+
+ if (iwdev->msix_shared)
+ msix_vec = &iwdev->iw_msixtbl[comp_vector];
+ else
+ msix_vec = &iwdev->iw_msixtbl[comp_vector + 1];
+
+ return irq_get_affinity_mask(msix_vec->irq);
+}
+
+/**
* i40iw_init_rdma_device - initialization of iwarp device
* @iwdev: iwarp device
*/
@@ -2871,6 +2870,7 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
iwibdev->ibdev.req_notify_cq = i40iw_req_notify_cq;
iwibdev->ibdev.post_send = i40iw_post_send;
iwibdev->ibdev.post_recv = i40iw_post_recv;
+ iwibdev->ibdev.get_vector_affinity = i40iw_get_vector_affinity;
return iwibdev;
}
@@ -2936,6 +2936,7 @@ int i40iw_register_rdma_device(struct i40iw_device *iwdev)
return -ENOMEM;
iwibdev = iwdev->iwibdev;
+ iwibdev->ibdev.driver_id = RDMA_DRIVER_I40IW;
ret = ib_register_device(&iwibdev->ibdev, NULL);
if (ret)
goto error;
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 538c46a73248..9345d5b546d1 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -92,23 +92,19 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd,
int ret;
memcpy(&in6, grh->dgid.raw, sizeof(in6));
- if (rdma_is_multicast_addr(&in6)) {
+ if (rdma_is_multicast_addr(&in6))
is_mcast = 1;
- rdma_get_mcast_mac(&in6, ah->av.eth.mac);
- } else {
- memcpy(ah->av.eth.mac, ah_attr->roce.dmac, ETH_ALEN);
- }
+
+ memcpy(ah->av.eth.mac, ah_attr->roce.dmac, ETH_ALEN);
ret = ib_get_cached_gid(pd->device, rdma_ah_get_port_num(ah_attr),
grh->sgid_index, &sgid, &gid_attr);
if (ret)
return ERR_PTR(ret);
eth_zero_addr(ah->av.eth.s_mac);
- if (gid_attr.ndev) {
- if (is_vlan_dev(gid_attr.ndev))
- vlan_tag = vlan_dev_vlan_id(gid_attr.ndev);
- memcpy(ah->av.eth.s_mac, gid_attr.ndev->dev_addr, ETH_ALEN);
- dev_put(gid_attr.ndev);
- }
+ if (is_vlan_dev(gid_attr.ndev))
+ vlan_tag = vlan_dev_vlan_id(gid_attr.ndev);
+ memcpy(ah->av.eth.s_mac, gid_attr.ndev->dev_addr, ETH_ALEN);
+ dev_put(gid_attr.ndev);
if (vlan_tag < 0x1000)
vlan_tag |= (rdma_ah_get_sl(ah_attr) & 7) << 13;
ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn |
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 3a6242f8f5c0..82adc0d1d30e 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -170,7 +170,7 @@ err_buf:
return err;
}
-#define CQ_CREATE_FLAGS_SUPPORTED IB_CQ_FLAGS_TIMESTAMP_COMPLETION
+#define CQ_CREATE_FLAGS_SUPPORTED IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION
struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
const struct ib_cq_init_attr *attr,
struct ib_ucontext *context,
@@ -246,7 +246,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar,
cq->db.dma, &cq->mcq, vector, 0,
- !!(cq->create_flags & IB_CQ_FLAGS_TIMESTAMP_COMPLETION));
+ !!(cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION));
if (err)
goto err_dbmap;
@@ -773,11 +773,13 @@ repoll:
switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
case MLX4_OPCODE_RDMA_WRITE_IMM:
wc->wc_flags |= IB_WC_WITH_IMM;
+ /* fall through */
case MLX4_OPCODE_RDMA_WRITE:
wc->opcode = IB_WC_RDMA_WRITE;
break;
case MLX4_OPCODE_SEND_IMM:
wc->wc_flags |= IB_WC_WITH_IMM;
+ /* fall through */
case MLX4_OPCODE_SEND:
case MLX4_OPCODE_SEND_INVAL:
wc->opcode = IB_WC_SEND;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 2b74a232c40d..5b70744f414a 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -246,14 +246,11 @@ static int mlx4_ib_update_gids(struct gid_entry *gids,
return mlx4_ib_update_gids_v1(gids, ibdev, port_num);
}
-static int mlx4_ib_add_gid(struct ib_device *device,
- u8 port_num,
- unsigned int index,
- const union ib_gid *gid,
+static int mlx4_ib_add_gid(const union ib_gid *gid,
const struct ib_gid_attr *attr,
void **context)
{
- struct mlx4_ib_dev *ibdev = to_mdev(device);
+ struct mlx4_ib_dev *ibdev = to_mdev(attr->device);
struct mlx4_ib_iboe *iboe = &ibdev->iboe;
struct mlx4_port_gid_table *port_gid_table;
int free = -1, found = -1;
@@ -262,16 +259,16 @@ static int mlx4_ib_add_gid(struct ib_device *device,
int i;
struct gid_entry *gids = NULL;
- if (!rdma_cap_roce_gid_table(device, port_num))
+ if (!rdma_cap_roce_gid_table(attr->device, attr->port_num))
return -EINVAL;
- if (port_num > MLX4_MAX_PORTS)
+ if (attr->port_num > MLX4_MAX_PORTS)
return -EINVAL;
if (!context)
return -EINVAL;
- port_gid_table = &iboe->gids[port_num - 1];
+ port_gid_table = &iboe->gids[attr->port_num - 1];
spin_lock_bh(&iboe->lock);
for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid)) &&
@@ -318,33 +315,30 @@ static int mlx4_ib_add_gid(struct ib_device *device,
spin_unlock_bh(&iboe->lock);
if (!ret && hw_update) {
- ret = mlx4_ib_update_gids(gids, ibdev, port_num);
+ ret = mlx4_ib_update_gids(gids, ibdev, attr->port_num);
kfree(gids);
}
return ret;
}
-static int mlx4_ib_del_gid(struct ib_device *device,
- u8 port_num,
- unsigned int index,
- void **context)
+static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context)
{
struct gid_cache_context *ctx = *context;
- struct mlx4_ib_dev *ibdev = to_mdev(device);
+ struct mlx4_ib_dev *ibdev = to_mdev(attr->device);
struct mlx4_ib_iboe *iboe = &ibdev->iboe;
struct mlx4_port_gid_table *port_gid_table;
int ret = 0;
int hw_update = 0;
struct gid_entry *gids = NULL;
- if (!rdma_cap_roce_gid_table(device, port_num))
+ if (!rdma_cap_roce_gid_table(attr->device, attr->port_num))
return -EINVAL;
- if (port_num > MLX4_MAX_PORTS)
+ if (attr->port_num > MLX4_MAX_PORTS)
return -EINVAL;
- port_gid_table = &iboe->gids[port_num - 1];
+ port_gid_table = &iboe->gids[attr->port_num - 1];
spin_lock_bh(&iboe->lock);
if (ctx) {
ctx->refcount--;
@@ -376,7 +370,7 @@ static int mlx4_ib_del_gid(struct ib_device *device,
spin_unlock_bh(&iboe->lock);
if (!ret && hw_update) {
- ret = mlx4_ib_update_gids(gids, ibdev, port_num);
+ ret = mlx4_ib_update_gids(gids, ibdev, attr->port_num);
kfree(gids);
}
return ret;
@@ -411,9 +405,6 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
if (attr.ndev)
dev_put(attr.ndev);
- if (!memcmp(&gid, &zgid, sizeof(gid)))
- return -EINVAL;
-
spin_lock_irqsave(&iboe->lock, flags);
port_gid_table = &iboe->gids[port_num - 1];
@@ -429,6 +420,9 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
return real_index;
}
+#define field_avail(type, fld, sz) (offsetof(type, fld) + \
+ sizeof(((type *)0)->fld) <= (sz))
+
static int mlx4_ib_query_device(struct ib_device *ibdev,
struct ib_device_attr *props,
struct ib_udata *uhw)
@@ -556,23 +550,31 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->timestamp_mask = 0xFFFFFFFFFFFFULL;
props->max_ah = INT_MAX;
- if ((dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) &&
- (mlx4_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET ||
- mlx4_ib_port_link_layer(ibdev, 2) == IB_LINK_LAYER_ETHERNET)) {
- props->rss_caps.max_rwq_indirection_tables = props->max_qp;
- props->rss_caps.max_rwq_indirection_table_size =
- dev->dev->caps.max_rss_tbl_sz;
- props->rss_caps.supported_qpts = 1 << IB_QPT_RAW_PACKET;
- props->max_wq_type_rq = props->max_qp;
+ if (mlx4_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET ||
+ mlx4_ib_port_link_layer(ibdev, 2) == IB_LINK_LAYER_ETHERNET) {
+ if (dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) {
+ props->rss_caps.max_rwq_indirection_tables =
+ props->max_qp;
+ props->rss_caps.max_rwq_indirection_table_size =
+ dev->dev->caps.max_rss_tbl_sz;
+ props->rss_caps.supported_qpts = 1 << IB_QPT_RAW_PACKET;
+ props->max_wq_type_rq = props->max_qp;
+ }
+
+ if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP)
+ props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS;
}
+ props->cq_caps.max_cq_moderation_count = MLX4_MAX_CQ_COUNT;
+ props->cq_caps.max_cq_moderation_period = MLX4_MAX_CQ_PERIOD;
+
if (!mlx4_is_slave(dev->dev))
err = mlx4_get_internal_clock_params(dev->dev, &clock_params);
if (uhw->outlen >= resp.response_length + sizeof(resp.hca_core_clock_offset)) {
resp.response_length += sizeof(resp.hca_core_clock_offset);
if (!err && !mlx4_is_slave(dev->dev)) {
- resp.comp_mask |= QUERY_DEVICE_RESP_MASK_TIMESTAMP;
+ resp.comp_mask |= MLX4_IB_QUERY_DEV_RESP_MASK_CORE_CLOCK_OFFSET;
resp.hca_core_clock_offset = clock_params.offset % PAGE_SIZE;
}
}
@@ -584,11 +586,11 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
sizeof(struct mlx4_wqe_data_seg);
}
- if (uhw->outlen >= resp.response_length + sizeof(resp.rss_caps)) {
- resp.response_length += sizeof(resp.rss_caps);
+ if (field_avail(typeof(resp), rss_caps, uhw->outlen)) {
if (props->rss_caps.supported_qpts) {
resp.rss_caps.rx_hash_function =
MLX4_IB_RX_HASH_FUNC_TOEPLITZ;
+
resp.rss_caps.rx_hash_fields_mask =
MLX4_IB_RX_HASH_SRC_IPV4 |
MLX4_IB_RX_HASH_DST_IPV4 |
@@ -598,7 +600,28 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
MLX4_IB_RX_HASH_DST_PORT_TCP |
MLX4_IB_RX_HASH_SRC_PORT_UDP |
MLX4_IB_RX_HASH_DST_PORT_UDP;
+
+ if (dev->dev->caps.tunnel_offload_mode ==
+ MLX4_TUNNEL_OFFLOAD_MODE_VXLAN)
+ resp.rss_caps.rx_hash_fields_mask |=
+ MLX4_IB_RX_HASH_INNER;
+ }
+ resp.response_length = offsetof(typeof(resp), rss_caps) +
+ sizeof(resp.rss_caps);
+ }
+
+ if (field_avail(typeof(resp), tso_caps, uhw->outlen)) {
+ if (dev->dev->caps.max_gso_sz &&
+ ((mlx4_ib_port_link_layer(ibdev, 1) ==
+ IB_LINK_LAYER_ETHERNET) ||
+ (mlx4_ib_port_link_layer(ibdev, 2) ==
+ IB_LINK_LAYER_ETHERNET))) {
+ resp.tso_caps.max_tso = dev->dev->caps.max_gso_sz;
+ resp.tso_caps.supported_qpts |=
+ 1 << IB_QPT_RAW_PACKET;
}
+ resp.response_length = offsetof(typeof(resp), tso_caps) +
+ sizeof(resp.tso_caps);
}
if (uhw->outlen) {
@@ -856,24 +879,9 @@ out:
static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid)
{
- int ret;
-
if (rdma_protocol_ib(ibdev, port))
return __mlx4_ib_query_gid(ibdev, port, index, gid, 0);
-
- if (!rdma_protocol_roce(ibdev, port))
- return -ENODEV;
-
- if (!rdma_cap_roce_gid_table(ibdev, port))
- return -ENODEV;
-
- ret = ib_get_cached_gid(ibdev, port, index, gid, NULL);
- if (ret == -EAGAIN) {
- memcpy(gid, &zgid, sizeof(*gid));
- return 0;
- }
-
- return ret;
+ return 0;
}
static int mlx4_ib_query_sl2vl(struct ib_device *ibdev, u8 port, u64 *sl2vl_tbl)
@@ -1321,7 +1329,7 @@ static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev,
struct mlx4_ib_pd *pd;
int err;
- pd = kmalloc(sizeof *pd, GFP_KERNEL);
+ pd = kzalloc(sizeof(*pd), GFP_KERNEL);
if (!pd)
return ERR_PTR(-ENOMEM);
@@ -1337,7 +1345,6 @@ static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev,
kfree(pd);
return ERR_PTR(-EFAULT);
}
-
return &pd->ibpd;
}
@@ -1851,6 +1858,9 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
if (flow_attr->port < 1 || flow_attr->port > qp->device->phys_port_cnt)
return ERR_PTR(-EINVAL);
+ if (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP)
+ return ERR_PTR(-EOPNOTSUPP);
+
if ((flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) &&
(flow_attr->type != IB_FLOW_ATTR_NORMAL))
return ERR_PTR(-EOPNOTSUPP);
@@ -2753,6 +2763,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.get_dev_fw_str = get_fw_ver_str;
ibdev->ib_dev.disassociate_ucontext = mlx4_ib_disassociate_ucontext;
+ ibdev->ib_dev.uverbs_ex_cmd_mask |=
+ (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ);
+
if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) &&
((mlx4_ib_port_link_layer(&ibdev->ib_dev, 1) ==
IB_LINK_LAYER_ETHERNET) ||
@@ -2921,6 +2934,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (mlx4_ib_alloc_diag_counters(ibdev))
goto err_steer_free_bitmap;
+ ibdev->ib_dev.driver_id = RDMA_DRIVER_MLX4;
if (ib_register_device(&ibdev->ib_dev, NULL))
goto err_diag_counters;
diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c
index 70eb9f917303..81ffc007e0a1 100644
--- a/drivers/infiniband/hw/mlx4/mcg.c
+++ b/drivers/infiniband/hw/mlx4/mcg.c
@@ -944,6 +944,7 @@ int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port,
switch (sa_mad->mad_hdr.method) {
case IB_MGMT_METHOD_SET:
may_create = 1;
+ /* fall through */
case IB_SA_METHOD_DELETE:
req = kzalloc(sizeof *req, GFP_KERNEL);
if (!req)
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 97185a888296..974787a9d6ca 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -47,6 +47,7 @@
#include <linux/mlx4/device.h>
#include <linux/mlx4/doorbell.h>
#include <linux/mlx4/qp.h>
+#include <linux/mlx4/cq.h>
#define MLX4_IB_DRV_NAME "mlx4_ib"
@@ -188,6 +189,7 @@ enum mlx4_ib_qp_flags {
MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP,
+ MLX4_IB_QP_SCATTER_FCS = IB_QP_CREATE_SCATTER_FCS,
/* Mellanox specific flags start from IB_QP_CREATE_RESERVED_START */
MLX4_IB_ROCE_V2_GSI_QP = MLX4_IB_QP_CREATE_ROCE_V2_GSI,
@@ -639,10 +641,6 @@ struct mlx4_uverbs_ex_query_device {
__u32 reserved;
};
-enum query_device_resp_mask {
- QUERY_DEVICE_RESP_MASK_TIMESTAMP = 1UL << 0,
-};
-
static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
{
return container_of(ibdev, struct mlx4_ib_dev, ib_dev);
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 98e921ea0386..c7c85c22e4e3 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -439,6 +439,9 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
goto err_mr;
mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
+ mr->ibmr.length = length;
+ mr->ibmr.iova = virt_addr;
+ mr->ibmr.page_size = 1U << shift;
return &mr->ibmr;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 6c24c15be61a..a2ab87989f88 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -557,7 +557,8 @@ static int set_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_rss *rss_ctx,
MLX4_IB_RX_HASH_SRC_PORT_TCP |
MLX4_IB_RX_HASH_DST_PORT_TCP |
MLX4_IB_RX_HASH_SRC_PORT_UDP |
- MLX4_IB_RX_HASH_DST_PORT_UDP)) {
+ MLX4_IB_RX_HASH_DST_PORT_UDP |
+ MLX4_IB_RX_HASH_INNER)) {
pr_debug("RX Hash fields_mask has unsupported mask (0x%llx)\n",
ucmd->rx_hash_fields_mask);
return (-EOPNOTSUPP);
@@ -618,10 +619,24 @@ static int set_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_rss *rss_ctx,
return (-EOPNOTSUPP);
}
+ if (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_INNER) {
+ if (dev->dev->caps.tunnel_offload_mode ==
+ MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) {
+ /*
+ * Hash according to inner headers if exist, otherwise
+ * according to outer headers.
+ */
+ rss_ctx->flags |= MLX4_RSS_BY_INNER_HEADERS_IPONLY;
+ } else {
+ pr_debug("RSS Hash for inner headers isn't supported\n");
+ return (-EOPNOTSUPP);
+ }
+ }
+
return 0;
}
-static int create_qp_rss(struct mlx4_ib_dev *dev, struct ib_pd *ibpd,
+static int create_qp_rss(struct mlx4_ib_dev *dev,
struct ib_qp_init_attr *init_attr,
struct mlx4_ib_create_qp_rss *ucmd,
struct mlx4_ib_qp *qp)
@@ -744,7 +759,7 @@ static struct ib_qp *_mlx4_ib_create_qp_rss(struct ib_pd *pd,
qp->pri.vid = 0xFFFF;
qp->alt.vid = 0xFFFF;
- err = create_qp_rss(to_mdev(pd->device), pd, init_attr, &ucmd, qp);
+ err = create_qp_rss(to_mdev(pd->device), init_attr, &ucmd, qp);
if (err) {
kfree(qp);
return ERR_PTR(err);
@@ -965,6 +980,17 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
qp->inl_recv_sz = ucmd.qp.inl_recv_sz;
}
+ if (init_attr->create_flags & IB_QP_CREATE_SCATTER_FCS) {
+ if (!(dev->dev->caps.flags &
+ MLX4_DEV_CAP_FLAG_FCS_KEEP)) {
+ pr_debug("scatter FCS is unsupported\n");
+ err = -EOPNOTSUPP;
+ goto err;
+ }
+
+ qp->flags |= MLX4_IB_QP_SCATTER_FCS;
+ }
+
err = set_rq_size(dev, &init_attr->cap, !!pd->uobject,
qp_has_rq(init_attr), qp, qp->inl_recv_sz);
if (err)
@@ -1707,6 +1733,8 @@ static int _mlx4_set_path(struct mlx4_ib_dev *dev,
mlx4_ib_gid_index_to_real_index(dev, port,
grh->sgid_index);
+ if (real_sgid_index < 0)
+ return real_sgid_index;
if (real_sgid_index >= dev->dev->caps.gid_table_len[port]) {
pr_err("sgid_index (%u) too large. max is %d\n",
real_sgid_index, dev->dev->caps.gid_table_len[port] - 1);
@@ -2089,6 +2117,9 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
if (qp->inl_recv_sz)
context->param3 |= cpu_to_be32(1 << 25);
+ if (qp->flags & MLX4_IB_QP_SCATTER_FCS)
+ context->param3 |= cpu_to_be32(1 << 29);
+
if (qp_type == IB_QPT_GSI || qp_type == IB_QPT_SMI)
context->mtu_msgmax = (IB_MTU_4096 << 5) | 11;
else if (qp_type == IB_QPT_RAW_PACKET)
@@ -2211,9 +2242,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
status = ib_get_cached_gid(&dev->ib_dev, port_num,
index, &gid, &gid_attr);
- if (!status && !memcmp(&gid, &zgid, sizeof(gid)))
- status = -ENOENT;
- if (!status && gid_attr.ndev) {
+ if (!status) {
vlan = rdma_vlan_dev_vlan_id(gid_attr.ndev);
memcpy(smac, gid_attr.ndev->dev_addr, ETH_ALEN);
dev_put(gid_attr.ndev);
@@ -3724,8 +3753,8 @@ out:
*/
wmb();
- writel(qp->doorbell_qpn,
- to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL);
+ writel_relaxed(qp->doorbell_qpn,
+ to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL);
/*
* Make sure doorbells don't leak out of SQ spinlock
@@ -4047,7 +4076,7 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd,
return ERR_PTR(-EOPNOTSUPP);
}
- if (init_attr->create_flags) {
+ if (init_attr->create_flags & ~IB_WQ_FLAGS_SCATTER_FCS) {
pr_debug("unsupported create_flags %u\n",
init_attr->create_flags);
return ERR_PTR(-EOPNOTSUPP);
@@ -4068,6 +4097,9 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd,
ib_qp_init_attr.recv_cq = init_attr->cq;
ib_qp_init_attr.send_cq = ib_qp_init_attr.recv_cq; /* Dummy CQ */
+ if (init_attr->create_flags & IB_WQ_FLAGS_SCATTER_FCS)
+ ib_qp_init_attr.create_flags |= IB_QP_CREATE_SCATTER_FCS;
+
err = create_qp_common(dev, pd, MLX4_IB_RWQ_SRC, &ib_qp_init_attr,
udata, 0, &qp);
if (err) {
diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig
index bce263b92821..fb4d77be019b 100644
--- a/drivers/infiniband/hw/mlx5/Kconfig
+++ b/drivers/infiniband/hw/mlx5/Kconfig
@@ -1,6 +1,7 @@
config MLX5_INFINIBAND
tristate "Mellanox Connect-IB HCA support"
depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
+ depends on INFINIBAND_USER_ACCESS || INFINIBAND_USER_ACCESS=n
---help---
This driver provides low-level InfiniBand support for
Mellanox Connect-IB PCI Express host channel adapters (HCAs).
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index bc6299697dda..d42b922bede8 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -2,3 +2,4 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
+mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
index b3a9f645c7c6..e6bde32a83f3 100644
--- a/drivers/infiniband/hw/mlx5/ah.c
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -101,10 +101,6 @@ struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
resp.response_length = min_resp_len;
- err = ib_resolve_eth_dmac(pd->device, ah_attr);
- if (err)
- return ERR_PTR(err);
-
memcpy(resp.dmac, ah_attr->roce.dmac, ETH_ALEN);
err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err)
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index 6f6712f87a73..188512bf46e6 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -66,3 +66,107 @@ int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *dev,
return mlx5_cmd_exec(dev, in, in_size, out, sizeof(out));
}
+
+int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr,
+ u64 length, u32 alignment)
+{
+ struct mlx5_core_dev *dev = memic->dev;
+ u64 num_memic_hw_pages = MLX5_CAP_DEV_MEM(dev, memic_bar_size)
+ >> PAGE_SHIFT;
+ u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr);
+ u32 max_alignment = MLX5_CAP_DEV_MEM(dev, log_max_memic_addr_alignment);
+ u32 num_pages = DIV_ROUND_UP(length, PAGE_SIZE);
+ u32 out[MLX5_ST_SZ_DW(alloc_memic_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_memic_in)] = {};
+ u32 mlx5_alignment;
+ u64 page_idx = 0;
+ int ret = 0;
+
+ if (!length || (length & MLX5_MEMIC_ALLOC_SIZE_MASK))
+ return -EINVAL;
+
+ /* mlx5 device sets alignment as 64*2^driver_value
+ * so normalizing is needed.
+ */
+ mlx5_alignment = (alignment < MLX5_MEMIC_BASE_ALIGN) ? 0 :
+ alignment - MLX5_MEMIC_BASE_ALIGN;
+ if (mlx5_alignment > max_alignment)
+ return -EINVAL;
+
+ MLX5_SET(alloc_memic_in, in, opcode, MLX5_CMD_OP_ALLOC_MEMIC);
+ MLX5_SET(alloc_memic_in, in, range_size, num_pages * PAGE_SIZE);
+ MLX5_SET(alloc_memic_in, in, memic_size, length);
+ MLX5_SET(alloc_memic_in, in, log_memic_addr_alignment,
+ mlx5_alignment);
+
+ while (page_idx < num_memic_hw_pages) {
+ spin_lock(&memic->memic_lock);
+ page_idx = bitmap_find_next_zero_area(memic->memic_alloc_pages,
+ num_memic_hw_pages,
+ page_idx,
+ num_pages, 0);
+
+ if (page_idx < num_memic_hw_pages)
+ bitmap_set(memic->memic_alloc_pages,
+ page_idx, num_pages);
+
+ spin_unlock(&memic->memic_lock);
+
+ if (page_idx >= num_memic_hw_pages)
+ break;
+
+ MLX5_SET64(alloc_memic_in, in, range_start_addr,
+ hw_start_addr + (page_idx * PAGE_SIZE));
+
+ ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (ret) {
+ spin_lock(&memic->memic_lock);
+ bitmap_clear(memic->memic_alloc_pages,
+ page_idx, num_pages);
+ spin_unlock(&memic->memic_lock);
+
+ if (ret == -EAGAIN) {
+ page_idx++;
+ continue;
+ }
+
+ return ret;
+ }
+
+ *addr = pci_resource_start(dev->pdev, 0) +
+ MLX5_GET64(alloc_memic_out, out, memic_start_addr);
+
+ return 0;
+ }
+
+ return -ENOMEM;
+}
+
+int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length)
+{
+ struct mlx5_core_dev *dev = memic->dev;
+ u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr);
+ u32 num_pages = DIV_ROUND_UP(length, PAGE_SIZE);
+ u32 out[MLX5_ST_SZ_DW(dealloc_memic_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(dealloc_memic_in)] = {0};
+ u64 start_page_idx;
+ int err;
+
+ addr -= pci_resource_start(dev->pdev, 0);
+ start_page_idx = (addr - hw_start_addr) >> PAGE_SHIFT;
+
+ MLX5_SET(dealloc_memic_in, in, opcode, MLX5_CMD_OP_DEALLOC_MEMIC);
+ MLX5_SET64(dealloc_memic_in, in, memic_start_addr, addr);
+ MLX5_SET(dealloc_memic_in, in, memic_size, length);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+
+ if (!err) {
+ spin_lock(&memic->memic_lock);
+ bitmap_clear(memic->memic_alloc_pages,
+ start_page_idx, num_pages);
+ spin_unlock(&memic->memic_lock);
+ }
+
+ return err;
+}
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index 78ffded7cc2c..e7206c8a8011 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -33,6 +33,7 @@
#ifndef MLX5_IB_CMD_H
#define MLX5_IB_CMD_H
+#include "mlx5_ib.h"
#include <linux/kernel.h>
#include <linux/mlx5/driver.h>
@@ -41,4 +42,7 @@ int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
void *out, int out_size);
int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,
void *in, int in_size);
+int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr,
+ u64 length, u32 alignment);
+int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length);
#endif /* MLX5_IB_CMD_H */
diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c
index ba4f96c85071..7e4e358a4fd8 100644
--- a/drivers/infiniband/hw/mlx5/cong.c
+++ b/drivers/infiniband/hw/mlx5/cong.c
@@ -247,21 +247,30 @@ static void mlx5_ib_set_cc_param_mask_val(void *field, int offset,
}
}
-static int mlx5_ib_get_cc_params(struct mlx5_ib_dev *dev, int offset, u32 *var)
+static int mlx5_ib_get_cc_params(struct mlx5_ib_dev *dev, u8 port_num,
+ int offset, u32 *var)
{
int outlen = MLX5_ST_SZ_BYTES(query_cong_params_out);
void *out;
void *field;
int err;
enum mlx5_ib_cong_node_type node;
+ struct mlx5_core_dev *mdev;
+
+ /* Takes a 1-based port number */
+ mdev = mlx5_ib_get_native_port_mdev(dev, port_num + 1, NULL);
+ if (!mdev)
+ return -ENODEV;
out = kvzalloc(outlen, GFP_KERNEL);
- if (!out)
- return -ENOMEM;
+ if (!out) {
+ err = -ENOMEM;
+ goto alloc_err;
+ }
node = mlx5_ib_param_to_node(offset);
- err = mlx5_cmd_query_cong_params(dev->mdev, node, out, outlen);
+ err = mlx5_cmd_query_cong_params(mdev, node, out, outlen);
if (err)
goto free;
@@ -270,21 +279,32 @@ static int mlx5_ib_get_cc_params(struct mlx5_ib_dev *dev, int offset, u32 *var)
free:
kvfree(out);
+alloc_err:
+ mlx5_ib_put_native_port_mdev(dev, port_num + 1);
return err;
}
-static int mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, int offset, u32 var)
+static int mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, u8 port_num,
+ int offset, u32 var)
{
int inlen = MLX5_ST_SZ_BYTES(modify_cong_params_in);
void *in;
void *field;
enum mlx5_ib_cong_node_type node;
+ struct mlx5_core_dev *mdev;
u32 attr_mask = 0;
int err;
+ /* Takes a 1-based port number */
+ mdev = mlx5_ib_get_native_port_mdev(dev, port_num + 1, NULL);
+ if (!mdev)
+ return -ENODEV;
+
in = kvzalloc(inlen, GFP_KERNEL);
- if (!in)
- return -ENOMEM;
+ if (!in) {
+ err = -ENOMEM;
+ goto alloc_err;
+ }
MLX5_SET(modify_cong_params_in, in, opcode,
MLX5_CMD_OP_MODIFY_CONG_PARAMS);
@@ -299,8 +319,10 @@ static int mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, int offset, u32 var)
MLX5_SET(field_select_r_roce_rp, field, field_select_r_roce_rp,
attr_mask);
- err = mlx5_cmd_modify_cong_params(dev->mdev, in, inlen);
+ err = mlx5_cmd_modify_cong_params(mdev, in, inlen);
kvfree(in);
+alloc_err:
+ mlx5_ib_put_native_port_mdev(dev, port_num + 1);
return err;
}
@@ -324,7 +346,7 @@ static ssize_t set_param(struct file *filp, const char __user *buf,
if (kstrtou32(lbuf, 0, &var))
return -EINVAL;
- ret = mlx5_ib_set_cc_params(param->dev, offset, var);
+ ret = mlx5_ib_set_cc_params(param->dev, param->port_num, offset, var);
return ret ? ret : count;
}
@@ -337,7 +359,7 @@ static ssize_t get_param(struct file *filp, char __user *buf, size_t count,
int ret;
char lbuf[11];
- ret = mlx5_ib_get_cc_params(param->dev, offset, &var);
+ ret = mlx5_ib_get_cc_params(param->dev, param->port_num, offset, &var);
if (ret)
return ret;
@@ -355,44 +377,51 @@ static const struct file_operations dbg_cc_fops = {
.read = get_param,
};
-void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev)
+void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num)
{
if (!mlx5_debugfs_root ||
- !dev->dbg_cc_params ||
- !dev->dbg_cc_params->root)
+ !dev->port[port_num].dbg_cc_params ||
+ !dev->port[port_num].dbg_cc_params->root)
return;
- debugfs_remove_recursive(dev->dbg_cc_params->root);
- kfree(dev->dbg_cc_params);
- dev->dbg_cc_params = NULL;
+ debugfs_remove_recursive(dev->port[port_num].dbg_cc_params->root);
+ kfree(dev->port[port_num].dbg_cc_params);
+ dev->port[port_num].dbg_cc_params = NULL;
}
-int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev)
+int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num)
{
struct mlx5_ib_dbg_cc_params *dbg_cc_params;
+ struct mlx5_core_dev *mdev;
int i;
if (!mlx5_debugfs_root)
goto out;
- if (!MLX5_CAP_GEN(dev->mdev, cc_query_allowed) ||
- !MLX5_CAP_GEN(dev->mdev, cc_modify_allowed))
+ /* Takes a 1-based port number */
+ mdev = mlx5_ib_get_native_port_mdev(dev, port_num + 1, NULL);
+ if (!mdev)
goto out;
+ if (!MLX5_CAP_GEN(mdev, cc_query_allowed) ||
+ !MLX5_CAP_GEN(mdev, cc_modify_allowed))
+ goto put_mdev;
+
dbg_cc_params = kzalloc(sizeof(*dbg_cc_params), GFP_KERNEL);
if (!dbg_cc_params)
- goto out;
+ goto err;
- dev->dbg_cc_params = dbg_cc_params;
+ dev->port[port_num].dbg_cc_params = dbg_cc_params;
dbg_cc_params->root = debugfs_create_dir("cc_params",
- dev->mdev->priv.dbg_root);
+ mdev->priv.dbg_root);
if (!dbg_cc_params->root)
goto err;
for (i = 0; i < MLX5_IB_DBG_CC_MAX; i++) {
dbg_cc_params->params[i].offset = i;
dbg_cc_params->params[i].dev = dev;
+ dbg_cc_params->params[i].port_num = port_num;
dbg_cc_params->params[i].dentry =
debugfs_create_file(mlx5_ib_dbg_cc_name[i],
0600, dbg_cc_params->root,
@@ -401,11 +430,17 @@ int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev)
if (!dbg_cc_params->params[i].dentry)
goto err;
}
-out: return 0;
+
+put_mdev:
+ mlx5_ib_put_native_port_mdev(dev, port_num + 1);
+out:
+ return 0;
err:
mlx5_ib_warn(dev, "cong debugfs failure\n");
- mlx5_ib_cleanup_cong_debugfs(dev);
+ mlx5_ib_cleanup_cong_debugfs(dev, port_num);
+ mlx5_ib_put_native_port_mdev(dev, port_num + 1);
+
/*
* We don't want to fail driver if debugfs failed to initialize,
* so we are not forwarding error to the user.
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 0124237063e3..7b4ce1a19de0 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -64,14 +64,9 @@ static void mlx5_ib_cq_event(struct mlx5_core_cq *mcq, enum mlx5_event type)
}
}
-static void *get_cqe_from_buf(struct mlx5_ib_cq_buf *buf, int n, int size)
-{
- return mlx5_buf_offset(&buf->buf, n * size);
-}
-
static void *get_cqe(struct mlx5_ib_cq *cq, int n)
{
- return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz);
+ return mlx5_frag_buf_get_wqe(&cq->buf.fbc, n);
}
static u8 sw_ownership_bit(int n, int nent)
@@ -124,11 +119,13 @@ static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
switch (be32_to_cpu(cqe->sop_drop_qpn) >> 24) {
case MLX5_OPCODE_RDMA_WRITE_IMM:
wc->wc_flags |= IB_WC_WITH_IMM;
+ /* fall through */
case MLX5_OPCODE_RDMA_WRITE:
wc->opcode = IB_WC_RDMA_WRITE;
break;
case MLX5_OPCODE_SEND_IMM:
wc->wc_flags |= IB_WC_WITH_IMM;
+ /* fall through */
case MLX5_OPCODE_SEND:
case MLX5_OPCODE_SEND_INVAL:
wc->opcode = IB_WC_SEND;
@@ -270,14 +267,8 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe)
{
- __be32 *p = (__be32 *)cqe;
- int i;
-
mlx5_ib_warn(dev, "dump error cqe\n");
- for (i = 0; i < sizeof(*cqe) / 16; i++, p += 4)
- pr_info("%08x %08x %08x %08x\n", be32_to_cpu(p[0]),
- be32_to_cpu(p[1]), be32_to_cpu(p[2]),
- be32_to_cpu(p[3]));
+ mlx5_dump_err_cqe(dev->mdev, cqe);
}
static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
@@ -402,7 +393,7 @@ static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
{
- mlx5_buf_free(dev->mdev, &buf->buf);
+ mlx5_frag_buf_free(dev->mdev, &buf->fbc.frag_buf);
}
static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
@@ -732,12 +723,25 @@ int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
return ret;
}
-static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf,
- int nent, int cqe_size)
+static int alloc_cq_frag_buf(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_cq_buf *buf,
+ int nent,
+ int cqe_size)
{
+ struct mlx5_frag_buf_ctrl *c = &buf->fbc;
+ struct mlx5_frag_buf *frag_buf = &c->frag_buf;
+ u32 cqc_buff[MLX5_ST_SZ_DW(cqc)] = {0};
int err;
- err = mlx5_buf_alloc(dev->mdev, nent * cqe_size, &buf->buf);
+ MLX5_SET(cqc, cqc_buff, log_cq_size, ilog2(cqe_size));
+ MLX5_SET(cqc, cqc_buff, cqe_sz, (cqe_size == 128) ? 1 : 0);
+
+ mlx5_core_init_cq_frag_buf(&buf->fbc, cqc_buff);
+
+ err = mlx5_frag_buf_alloc_node(dev->mdev,
+ nent * cqe_size,
+ frag_buf,
+ dev->mdev->priv.numa_node);
if (err)
return err;
@@ -762,13 +766,13 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
int err;
ucmdlen = udata->inlen < sizeof(ucmd) ?
- (sizeof(ucmd) - sizeof(ucmd.reserved)) : sizeof(ucmd);
+ (sizeof(ucmd) - sizeof(ucmd.flags)) : sizeof(ucmd);
if (ib_copy_from_udata(&ucmd, udata, ucmdlen))
return -EFAULT;
if (ucmdlen == sizeof(ucmd) &&
- ucmd.reserved != 0)
+ (ucmd.flags & ~(MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD)))
return -EINVAL;
if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128)
@@ -812,8 +816,10 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
*index = to_mucontext(context)->bfregi.sys_pages[0];
if (ucmd.cqe_comp_en == 1) {
- if (unlikely((*cqe_size != 64) ||
- !MLX5_CAP_GEN(dev->mdev, cqe_compression))) {
+ if (!((*cqe_size == 128 &&
+ MLX5_CAP_GEN(dev->mdev, cqe_compression_128)) ||
+ (*cqe_size == 64 &&
+ MLX5_CAP_GEN(dev->mdev, cqe_compression)))) {
err = -EOPNOTSUPP;
mlx5_ib_warn(dev, "CQE compression is not supported for size %d!\n",
*cqe_size);
@@ -836,6 +842,19 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
ilog2(ucmd.cqe_comp_res_format));
}
+ if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD) {
+ if (*cqe_size != 128 ||
+ !MLX5_CAP_GEN(dev->mdev, cqe_128_always)) {
+ err = -EOPNOTSUPP;
+ mlx5_ib_warn(dev,
+ "CQE padding is not supported for CQE size of %dB!\n",
+ *cqe_size);
+ goto err_cqb;
+ }
+
+ cq->private_flags |= MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD;
+ }
+
return 0;
err_cqb:
@@ -855,14 +874,15 @@ static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context)
ib_umem_release(cq->buf.umem);
}
-static void init_cq_buf(struct mlx5_ib_cq *cq, struct mlx5_ib_cq_buf *buf)
+static void init_cq_frag_buf(struct mlx5_ib_cq *cq,
+ struct mlx5_ib_cq_buf *buf)
{
int i;
void *cqe;
struct mlx5_cqe64 *cqe64;
for (i = 0; i < buf->nent; i++) {
- cqe = get_cqe_from_buf(buf, i, buf->cqe_size);
+ cqe = get_cqe(cq, i);
cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64;
cqe64->op_own = MLX5_CQE_INVALID << 4;
}
@@ -884,14 +904,15 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
cq->mcq.arm_db = cq->db.db + 1;
cq->mcq.cqe_sz = cqe_size;
- err = alloc_cq_buf(dev, &cq->buf, entries, cqe_size);
+ err = alloc_cq_frag_buf(dev, &cq->buf, entries, cqe_size);
if (err)
goto err_db;
- init_cq_buf(cq, &cq->buf);
+ init_cq_frag_buf(cq, &cq->buf);
*inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
- MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * cq->buf.buf.npages;
+ MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) *
+ cq->buf.fbc.frag_buf.npages;
*cqb = kvzalloc(*inlen, GFP_KERNEL);
if (!*cqb) {
err = -ENOMEM;
@@ -899,11 +920,12 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
}
pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
- mlx5_fill_page_array(&cq->buf.buf, pas);
+ mlx5_fill_page_frag_array(&cq->buf.fbc.frag_buf, pas);
cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
MLX5_SET(cqc, cqc, log_page_size,
- cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+ cq->buf.fbc.frag_buf.page_shift -
+ MLX5_ADAPTER_PAGE_SHIFT);
*index = dev->mdev->priv.uar->index;
@@ -995,12 +1017,15 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
cq->cqe_size = cqe_size;
cqc = MLX5_ADDR_OF(create_cq_in, cqb, cq_context);
- MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size));
+ MLX5_SET(cqc, cqc, cqe_sz,
+ cqe_sz_to_mlx_sz(cqe_size,
+ cq->private_flags &
+ MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD));
MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries));
MLX5_SET(cqc, cqc, uar_page, index);
MLX5_SET(cqc, cqc, c_eqn, eqn);
MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma);
- if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN)
+ if (cq->create_flags & IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN)
MLX5_SET(cqc, cqc, oi, 1);
err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen);
@@ -1139,6 +1164,9 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
if (!MLX5_CAP_GEN(dev->mdev, cq_moderation))
return -ENOSYS;
+ if (cq_period > MLX5_MAX_CQ_PERIOD)
+ return -EINVAL;
+
err = mlx5_core_modify_cq_moderation(dev->mdev, &mcq->mcq,
cq_period, cq_count);
if (err)
@@ -1199,11 +1227,11 @@ static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
if (!cq->resize_buf)
return -ENOMEM;
- err = alloc_cq_buf(dev, cq->resize_buf, entries, cqe_size);
+ err = alloc_cq_frag_buf(dev, cq->resize_buf, entries, cqe_size);
if (err)
goto ex;
- init_cq_buf(cq, cq->resize_buf);
+ init_cq_frag_buf(cq, cq->resize_buf);
return 0;
@@ -1248,9 +1276,8 @@ static int copy_resize_cqes(struct mlx5_ib_cq *cq)
}
while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) {
- dcqe = get_cqe_from_buf(cq->resize_buf,
- (i + 1) & (cq->resize_buf->nent),
- dsize);
+ dcqe = mlx5_frag_buf_get_wqe(&cq->resize_buf->fbc,
+ (i + 1) & cq->resize_buf->nent);
dcqe64 = dsize == 64 ? dcqe : dcqe + 64;
sw_own = sw_ownership_bit(i + 1, cq->resize_buf->nent);
memcpy(dcqe, scqe, dsize);
@@ -1316,8 +1343,11 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
cqe_size = 64;
err = resize_kernel(dev, cq, entries, cqe_size);
if (!err) {
- npas = cq->resize_buf->buf.npages;
- page_shift = cq->resize_buf->buf.page_shift;
+ struct mlx5_frag_buf_ctrl *c;
+
+ c = &cq->resize_buf->fbc;
+ npas = c->frag_buf.npages;
+ page_shift = c->frag_buf.page_shift;
}
}
@@ -1338,7 +1368,8 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift,
pas, 0);
else
- mlx5_fill_page_array(&cq->resize_buf->buf, pas);
+ mlx5_fill_page_frag_array(&cq->resize_buf->fbc.frag_buf,
+ pas);
MLX5_SET(modify_cq_in, in,
modify_field_select_resize_field_select.resize_field_select.resize_field_select,
@@ -1350,7 +1381,10 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
MLX5_SET(cqc, cqc, log_page_size,
page_shift - MLX5_ADAPTER_PAGE_SHIFT);
- MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size));
+ MLX5_SET(cqc, cqc, cqe_sz,
+ cqe_sz_to_mlx_sz(cqe_size,
+ cq->private_flags &
+ MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD));
MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries));
MLX5_SET(modify_cq_in, in, op_mod, MLX5_CQ_OPMOD_RESIZE);
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
new file mode 100644
index 000000000000..0e04fdddf670
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -0,0 +1,192 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ */
+
+#include "ib_rep.h"
+
+static const struct mlx5_ib_profile rep_profile = {
+ STAGE_CREATE(MLX5_IB_STAGE_INIT,
+ mlx5_ib_stage_init_init,
+ mlx5_ib_stage_init_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
+ mlx5_ib_stage_rep_flow_db_init,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_CAPS,
+ mlx5_ib_stage_caps_init,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+ mlx5_ib_stage_rep_non_default_cb,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_ROCE,
+ mlx5_ib_stage_rep_roce_init,
+ mlx5_ib_stage_rep_roce_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
+ mlx5_ib_stage_dev_res_init,
+ mlx5_ib_stage_dev_res_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
+ mlx5_ib_stage_counters_init,
+ mlx5_ib_stage_counters_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_BFREG,
+ mlx5_ib_stage_bfrag_init,
+ mlx5_ib_stage_bfrag_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
+ NULL,
+ mlx5_ib_stage_pre_ib_reg_umr_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
+ mlx5_ib_stage_ib_reg_init,
+ mlx5_ib_stage_ib_reg_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
+ mlx5_ib_stage_post_ib_reg_umr_init,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
+ mlx5_ib_stage_class_attr_init,
+ NULL),
+};
+
+static int
+mlx5_ib_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+ return 0;
+}
+
+static void
+mlx5_ib_nic_rep_unload(struct mlx5_eswitch_rep *rep)
+{
+ rep->rep_if[REP_IB].priv = NULL;
+}
+
+static int
+mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5_ib_dev *ibdev;
+
+ ibdev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*ibdev));
+ if (!ibdev)
+ return -ENOMEM;
+
+ ibdev->rep = rep;
+ ibdev->mdev = dev;
+ ibdev->num_ports = max(MLX5_CAP_GEN(dev, num_ports),
+ MLX5_CAP_GEN(dev, num_vhca_ports));
+ if (!__mlx5_ib_add(ibdev, &rep_profile))
+ return -EINVAL;
+
+ rep->rep_if[REP_IB].priv = ibdev;
+
+ return 0;
+}
+
+static void
+mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5_ib_dev *dev;
+
+ if (!rep->rep_if[REP_IB].priv)
+ return;
+
+ dev = mlx5_ib_rep_to_dev(rep);
+ __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
+ rep->rep_if[REP_IB].priv = NULL;
+}
+
+static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep)
+{
+ return mlx5_ib_rep_to_dev(rep);
+}
+
+static void mlx5_ib_rep_register_vf_vports(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+ int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev);
+ int vport;
+
+ for (vport = 1; vport < total_vfs; vport++) {
+ struct mlx5_eswitch_rep_if rep_if = {};
+
+ rep_if.load = mlx5_ib_vport_rep_load;
+ rep_if.unload = mlx5_ib_vport_rep_unload;
+ rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev;
+ mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_IB);
+ }
+}
+
+static void mlx5_ib_rep_unregister_vf_vports(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+ int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev);
+ int vport;
+
+ for (vport = 1; vport < total_vfs; vport++)
+ mlx5_eswitch_unregister_vport_rep(esw, vport, REP_IB);
+}
+
+void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+ struct mlx5_eswitch_rep_if rep_if = {};
+
+ rep_if.load = mlx5_ib_nic_rep_load;
+ rep_if.unload = mlx5_ib_nic_rep_unload;
+ rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev;
+ rep_if.priv = dev;
+
+ mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_IB);
+
+ mlx5_ib_rep_register_vf_vports(dev);
+}
+
+void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+
+ mlx5_ib_rep_unregister_vf_vports(dev); /* VFs vports */
+ mlx5_eswitch_unregister_vport_rep(esw, 0, REP_IB); /* UPLINK PF*/
+}
+
+u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
+{
+ return mlx5_eswitch_mode(esw);
+}
+
+struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
+ int vport_index)
+{
+ return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_IB);
+}
+
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+ int vport_index)
+{
+ return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_ETH);
+}
+
+struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw)
+{
+ return mlx5_eswitch_uplink_get_proto_dev(esw, REP_IB);
+}
+
+struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, int vport)
+{
+ return mlx5_eswitch_vport_rep(esw, vport);
+}
+
+int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq)
+{
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+
+ if (!dev->rep)
+ return 0;
+
+ flow_rule =
+ mlx5_eswitch_add_send_to_vport_rule(esw,
+ dev->rep->vport,
+ sq->base.mqp.qpn);
+ if (IS_ERR(flow_rule))
+ return PTR_ERR(flow_rule);
+ sq->flow_rule = flow_rule;
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h
new file mode 100644
index 000000000000..046fd942fd46
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/ib_rep.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef __MLX5_IB_REP_H__
+#define __MLX5_IB_REP_H__
+
+#include <linux/mlx5/eswitch.h>
+#include "mlx5_ib.h"
+
+#ifdef CONFIG_MLX5_ESWITCH
+u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw);
+struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
+ int vport_index);
+struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw);
+struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
+ int vport_index);
+void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev);
+void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev);
+int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq);
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+ int vport_index);
+#else /* CONFIG_MLX5_ESWITCH */
+static inline u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
+{
+ return SRIOV_NONE;
+}
+
+static inline
+struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
+ int vport_index)
+{
+ return NULL;
+}
+
+static inline
+struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw)
+{
+ return NULL;
+}
+
+static inline
+struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
+ int vport_index)
+{
+ return NULL;
+}
+
+static inline void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev) {}
+static inline void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev) {}
+static inline int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq)
+{
+ return 0;
+}
+
+static inline
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+ int vport_index)
+{
+ return NULL;
+}
+#endif
+
+static inline
+struct mlx5_ib_dev *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep)
+{
+ return (struct mlx5_ib_dev *)rep->rep_if[REP_IB].priv;
+}
+#endif /* __MLX5_IB_REP_H__ */
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 1003b0133a49..32a9e9228b13 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -197,10 +197,9 @@ static void pma_cnt_assign(struct ib_pma_portcounters *pma_cnt,
vl_15_dropped);
}
-static int process_pma_cmd(struct ib_device *ibdev, u8 port_num,
+static int process_pma_cmd(struct mlx5_core_dev *mdev, u8 port_num,
const struct ib_mad *in_mad, struct ib_mad *out_mad)
{
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
int err;
void *out_cnt;
@@ -222,7 +221,7 @@ static int process_pma_cmd(struct ib_device *ibdev, u8 port_num,
if (!out_cnt)
return IB_MAD_RESULT_FAILURE;
- err = mlx5_core_query_vport_counter(dev->mdev, 0, 0,
+ err = mlx5_core_query_vport_counter(mdev, 0, 0,
port_num, out_cnt, sz);
if (!err)
pma_cnt_ext_assign(pma_cnt_ext, out_cnt);
@@ -235,7 +234,7 @@ static int process_pma_cmd(struct ib_device *ibdev, u8 port_num,
if (!out_cnt)
return IB_MAD_RESULT_FAILURE;
- err = mlx5_core_query_ib_ppcnt(dev->mdev, port_num,
+ err = mlx5_core_query_ib_ppcnt(mdev, port_num,
out_cnt, sz);
if (!err)
pma_cnt_assign(pma_cnt, out_cnt);
@@ -255,9 +254,11 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
u16 *out_mad_pkey_index)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
- struct mlx5_core_dev *mdev = dev->mdev;
const struct ib_mad *in_mad = (const struct ib_mad *)in;
struct ib_mad *out_mad = (struct ib_mad *)out;
+ struct mlx5_core_dev *mdev;
+ u8 mdev_port_num;
+ int ret;
if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
*out_mad_size != sizeof(*out_mad)))
@@ -265,14 +266,20 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
memset(out_mad->data, 0, sizeof(out_mad->data));
+ mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num);
+ if (!mdev)
+ return IB_MAD_RESULT_FAILURE;
+
if (MLX5_CAP_GEN(mdev, vport_counters) &&
in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT &&
in_mad->mad_hdr.method == IB_MGMT_METHOD_GET) {
- return process_pma_cmd(ibdev, port_num, in_mad, out_mad);
+ ret = process_pma_cmd(mdev, mdev_port_num, in_mad, out_mad);
} else {
- return process_mad(ibdev, mad_flags, port_num, in_wc, in_grh,
+ ret = process_mad(ibdev, mad_flags, port_num, in_wc, in_grh,
in_mad, out_mad);
}
+ mlx5_ib_put_native_port_mdev(dev, port_num);
+ return ret;
}
int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port)
@@ -519,7 +526,7 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
int ext_active_speed;
int err = -ENOMEM;
- if (port < 1 || port > MLX5_CAP_GEN(mdev, num_ports)) {
+ if (port < 1 || port > dev->num_ports) {
mlx5_ib_warn(dev, "invalid port number %d\n", port);
return -EINVAL;
}
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 303961ef7d91..69716a7ea993 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -38,6 +38,7 @@
#include <linux/pci.h>
#include <linux/dma-mapping.h>
#include <linux/slab.h>
+#include <linux/bitmap.h>
#if defined(CONFIG_X86)
#include <asm/pat.h>
#endif
@@ -50,16 +51,23 @@
#include <rdma/ib_cache.h>
#include <linux/mlx5/port.h>
#include <linux/mlx5/vport.h>
+#include <linux/mlx5/fs.h>
#include <linux/list.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
#include <linux/in.h>
#include <linux/etherdevice.h>
-#include <linux/mlx5/fs.h>
-#include <linux/mlx5/vport.h>
#include "mlx5_ib.h"
+#include "ib_rep.h"
#include "cmd.h"
-#include <linux/mlx5/vport.h>
+#include <linux/mlx5/fs_helpers.h>
+#include <linux/mlx5/accel.h>
+#include <rdma/uverbs_std_types.h>
+#include <rdma/mlx5_user_ioctl_verbs.h>
+#include <rdma/mlx5_user_ioctl_cmds.h>
+
+#define UVERBS_MODULE_NAME mlx5_ib
+#include <rdma/uverbs_named_ioctl.h>
#define DRIVER_NAME "mlx5_ib"
#define DRIVER_VERSION "5.0-0"
@@ -72,10 +80,42 @@ static char mlx5_version[] =
DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
DRIVER_VERSION "\n";
+struct mlx5_ib_event_work {
+ struct work_struct work;
+ struct mlx5_core_dev *dev;
+ void *context;
+ enum mlx5_dev_event event;
+ unsigned long param;
+};
+
enum {
MLX5_ATOMIC_SIZE_QP_8BYTES = 1 << 3,
};