summaryrefslogtreecommitdiffstats
path: root/kernel/include/rdma/ib_verbs.h
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/include/rdma/ib_verbs.h')
-rw-r--r--kernel/include/rdma/ib_verbs.h820
1 files changed, 601 insertions, 219 deletions
diff --git a/kernel/include/rdma/ib_verbs.h b/kernel/include/rdma/ib_verbs.h
index 65994a19e..120da1d7f 100644
--- a/kernel/include/rdma/ib_verbs.h
+++ b/kernel/include/rdma/ib_verbs.h
@@ -48,6 +48,7 @@
#include <linux/rwsem.h>
#include <linux/scatterlist.h>
#include <linux/workqueue.h>
+#include <linux/socket.h>
#include <uapi/linux/if_ether.h>
#include <linux/atomic.h>
@@ -64,6 +65,12 @@ union ib_gid {
} global;
};
+extern union ib_gid zgid;
+
+struct ib_gid_attr {
+ struct net_device *ndev;
+};
+
enum rdma_node_type {
/* IB values map to NodeInfo:NodeType. */
RDMA_NODE_IB_CA = 1,
@@ -81,6 +88,13 @@ enum rdma_transport_type {
RDMA_TRANSPORT_USNIC_UDP
};
+enum rdma_protocol_type {
+ RDMA_PROTOCOL_IB,
+ RDMA_PROTOCOL_IBOE,
+ RDMA_PROTOCOL_IWARP,
+ RDMA_PROTOCOL_USNIC_UDP
+};
+
__attribute_const__ enum rdma_transport_type
rdma_node_get_transport(enum rdma_node_type node_type);
@@ -123,6 +137,8 @@ enum ib_device_cap_flags {
IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22),
IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<23),
IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<24),
+ IB_DEVICE_RC_IP_CSUM = (1<<25),
+ IB_DEVICE_RAW_IP_CSUM = (1<<26),
IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29),
IB_DEVICE_SIGNATURE_HANDOVER = (1<<30),
IB_DEVICE_ON_DEMAND_PAGING = (1<<31),
@@ -166,6 +182,16 @@ struct ib_odp_caps {
} per_transport_caps;
};
+enum ib_cq_creation_flags {
+ IB_CQ_FLAGS_TIMESTAMP_COMPLETION = 1 << 0,
+};
+
+struct ib_cq_init_attr {
+ unsigned int cqe;
+ int comp_vector;
+ u32 flags;
+};
+
struct ib_device_attr {
u64 fw_ver;
__be64 sys_image_guid;
@@ -210,6 +236,8 @@ struct ib_device_attr {
int sig_prot_cap;
int sig_guard_cap;
struct ib_odp_caps odp_caps;
+ uint64_t timestamp_mask;
+ uint64_t hca_core_clock; /* in KHZ */
};
enum ib_mtu {
@@ -265,7 +293,7 @@ enum ib_port_cap_flags {
IB_PORT_BOOT_MGMT_SUP = 1 << 23,
IB_PORT_LINK_LATENCY_SUP = 1 << 24,
IB_PORT_CLIENT_REG_SUP = 1 << 25,
- IB_PORT_IP_BASED_GIDS = 1 << 26
+ IB_PORT_IP_BASED_GIDS = 1 << 26,
};
enum ib_port_width {
@@ -346,6 +374,42 @@ union rdma_protocol_stats {
struct iw_protocol_stats iw;
};
+/* Define bits for the various functionality this port needs to be supported by
+ * the core.
+ */
+/* Management 0x00000FFF */
+#define RDMA_CORE_CAP_IB_MAD 0x00000001
+#define RDMA_CORE_CAP_IB_SMI 0x00000002
+#define RDMA_CORE_CAP_IB_CM 0x00000004
+#define RDMA_CORE_CAP_IW_CM 0x00000008
+#define RDMA_CORE_CAP_IB_SA 0x00000010
+#define RDMA_CORE_CAP_OPA_MAD 0x00000020
+
+/* Address format 0x000FF000 */
+#define RDMA_CORE_CAP_AF_IB 0x00001000
+#define RDMA_CORE_CAP_ETH_AH 0x00002000
+
+/* Protocol 0xFFF00000 */
+#define RDMA_CORE_CAP_PROT_IB 0x00100000
+#define RDMA_CORE_CAP_PROT_ROCE 0x00200000
+#define RDMA_CORE_CAP_PROT_IWARP 0x00400000
+
+#define RDMA_CORE_PORT_IBA_IB (RDMA_CORE_CAP_PROT_IB \
+ | RDMA_CORE_CAP_IB_MAD \
+ | RDMA_CORE_CAP_IB_SMI \
+ | RDMA_CORE_CAP_IB_CM \
+ | RDMA_CORE_CAP_IB_SA \
+ | RDMA_CORE_CAP_AF_IB)
+#define RDMA_CORE_PORT_IBA_ROCE (RDMA_CORE_CAP_PROT_ROCE \
+ | RDMA_CORE_CAP_IB_MAD \
+ | RDMA_CORE_CAP_IB_CM \
+ | RDMA_CORE_CAP_AF_IB \
+ | RDMA_CORE_CAP_ETH_AH)
+#define RDMA_CORE_PORT_IWARP (RDMA_CORE_CAP_PROT_IWARP \
+ | RDMA_CORE_CAP_IW_CM)
+#define RDMA_CORE_PORT_INTEL_OPA (RDMA_CORE_PORT_IBA_IB \
+ | RDMA_CORE_CAP_OPA_MAD)
+
struct ib_port_attr {
enum ib_port_state state;
enum ib_mtu max_mtu;
@@ -412,6 +476,8 @@ enum ib_event_type {
IB_EVENT_GID_CHANGE,
};
+const char *__attribute_const__ ib_event_msg(enum ib_event_type event);
+
struct ib_event {
struct ib_device *device;
union {
@@ -499,20 +565,18 @@ __attribute_const__ int ib_rate_to_mult(enum ib_rate rate);
*/
__attribute_const__ int ib_rate_to_mbps(enum ib_rate rate);
-enum ib_mr_create_flags {
- IB_MR_SIGNATURE_EN = 1,
-};
/**
- * ib_mr_init_attr - Memory region init attributes passed to routine
- * ib_create_mr.
- * @max_reg_descriptors: max number of registration descriptors that
- * may be used with registration work requests.
- * @flags: MR creation flags bit mask.
+ * enum ib_mr_type - memory region type
+ * @IB_MR_TYPE_MEM_REG: memory region that is used for
+ * normal registration
+ * @IB_MR_TYPE_SIGNATURE: memory region that is used for
+ * signature operations (data-integrity
+ * capable regions)
*/
-struct ib_mr_init_attr {
- int max_reg_descriptors;
- u32 flags;
+enum ib_mr_type {
+ IB_MR_TYPE_MEM_REG,
+ IB_MR_TYPE_SIGNATURE,
};
/**
@@ -635,7 +699,6 @@ struct ib_ah_attr {
u8 ah_flags;
u8 port_num;
u8 dmac[ETH_ALEN];
- u16 vlan_id;
};
enum ib_wc_status {
@@ -663,6 +726,8 @@ enum ib_wc_status {
IB_WC_GENERAL_ERR
};
+const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status);
+
enum ib_wc_opcode {
IB_WC_SEND,
IB_WC_RDMA_WRITE,
@@ -672,7 +737,7 @@ enum ib_wc_opcode {
IB_WC_BIND_MW,
IB_WC_LSO,
IB_WC_LOCAL_INV,
- IB_WC_FAST_REG_MR,
+ IB_WC_REG_MR,
IB_WC_MASKED_COMP_SWAP,
IB_WC_MASKED_FETCH_ADD,
/*
@@ -809,7 +874,6 @@ enum ib_qp_create_flags {
IB_QP_CREATE_RESERVED_END = 1 << 31,
};
-
/*
* Note: users may not call ib_close_qp or ib_destroy_qp from the event_handler
* callback to destroy the passed in QP.
@@ -893,10 +957,10 @@ enum ib_qp_attr_mask {
IB_QP_PATH_MIG_STATE = (1<<18),
IB_QP_CAP = (1<<19),
IB_QP_DEST_QPN = (1<<20),
- IB_QP_SMAC = (1<<21),
- IB_QP_ALT_SMAC = (1<<22),
- IB_QP_VID = (1<<23),
- IB_QP_ALT_VID = (1<<24),
+ IB_QP_RESERVED1 = (1<<21),
+ IB_QP_RESERVED2 = (1<<22),
+ IB_QP_RESERVED3 = (1<<23),
+ IB_QP_RESERVED4 = (1<<24),
};
enum ib_qp_state {
@@ -946,10 +1010,6 @@ struct ib_qp_attr {
u8 rnr_retry;
u8 alt_port_num;
u8 alt_timeout;
- u8 smac[ETH_ALEN];
- u8 alt_smac[ETH_ALEN];
- u16 vlan_id;
- u16 alt_vlan_id;
};
enum ib_wr_opcode {
@@ -964,7 +1024,7 @@ enum ib_wr_opcode {
IB_WR_SEND_WITH_INV,
IB_WR_RDMA_READ_WITH_INV,
IB_WR_LOCAL_INV,
- IB_WR_FAST_REG_MR,
+ IB_WR_REG_MR,
IB_WR_MASKED_ATOMIC_CMP_AND_SWP,
IB_WR_MASKED_ATOMIC_FETCH_AND_ADD,
IB_WR_BIND_MW,
@@ -1002,12 +1062,6 @@ struct ib_sge {
u32 lkey;
};
-struct ib_fast_reg_page_list {
- struct ib_device *device;
- u64 *page_list;
- unsigned int max_page_list_len;
-};
-
/**
* struct ib_mw_bind_info - Parameters for a memory window bind operation.
* @mr: A memory region to bind the memory window to.
@@ -1036,54 +1090,89 @@ struct ib_send_wr {
__be32 imm_data;
u32 invalidate_rkey;
} ex;
- union {
- struct {
- u64 remote_addr;
- u32 rkey;
- } rdma;
- struct {
- u64 remote_addr;
- u64 compare_add;
- u64 swap;
- u64 compare_add_mask;
- u64 swap_mask;
- u32 rkey;
- } atomic;
- struct {
- struct ib_ah *ah;
- void *header;
- int hlen;
- int mss;
- u32 remote_qpn;
- u32 remote_qkey;
- u16 pkey_index; /* valid for GSI only */
- u8 port_num; /* valid for DR SMPs on switch only */
- } ud;
- struct {
- u64 iova_start;
- struct ib_fast_reg_page_list *page_list;
- unsigned int page_shift;
- unsigned int page_list_len;
- u32 length;
- int access_flags;
- u32 rkey;
- } fast_reg;
- struct {
- struct ib_mw *mw;
- /* The new rkey for the memory window. */
- u32 rkey;
- struct ib_mw_bind_info bind_info;
- } bind_mw;
- struct {
- struct ib_sig_attrs *sig_attrs;
- struct ib_mr *sig_mr;
- int access_flags;
- struct ib_sge *prot;
- } sig_handover;
- } wr;
- u32 xrc_remote_srq_num; /* XRC TGT QPs only */
};
+struct ib_rdma_wr {
+ struct ib_send_wr wr;
+ u64 remote_addr;
+ u32 rkey;
+};
+
+static inline struct ib_rdma_wr *rdma_wr(struct ib_send_wr *wr)
+{
+ return container_of(wr, struct ib_rdma_wr, wr);
+}
+
+struct ib_atomic_wr {
+ struct ib_send_wr wr;
+ u64 remote_addr;
+ u64 compare_add;
+ u64 swap;
+ u64 compare_add_mask;
+ u64 swap_mask;
+ u32 rkey;
+};
+
+static inline struct ib_atomic_wr *atomic_wr(struct ib_send_wr *wr)
+{
+ return container_of(wr, struct ib_atomic_wr, wr);
+}
+
+struct ib_ud_wr {
+ struct ib_send_wr wr;
+ struct ib_ah *ah;
+ void *header;
+ int hlen;
+ int mss;
+ u32 remote_qpn;
+ u32 remote_qkey;
+ u16 pkey_index; /* valid for GSI only */
+ u8 port_num; /* valid for DR SMPs on switch only */
+};
+
+static inline struct ib_ud_wr *ud_wr(struct ib_send_wr *wr)
+{
+ return container_of(wr, struct ib_ud_wr, wr);
+}
+
+struct ib_reg_wr {
+ struct ib_send_wr wr;
+ struct ib_mr *mr;
+ u32 key;
+ int access;
+};
+
+static inline struct ib_reg_wr *reg_wr(struct ib_send_wr *wr)
+{
+ return container_of(wr, struct ib_reg_wr, wr);
+}
+
+struct ib_bind_mw_wr {
+ struct ib_send_wr wr;
+ struct ib_mw *mw;
+ /* The new rkey for the memory window. */
+ u32 rkey;
+ struct ib_mw_bind_info bind_info;
+};
+
+static inline struct ib_bind_mw_wr *bind_mw_wr(struct ib_send_wr *wr)
+{
+ return container_of(wr, struct ib_bind_mw_wr, wr);
+}
+
+struct ib_sig_handover_wr {
+ struct ib_send_wr wr;
+ struct ib_sig_attrs *sig_attrs;
+ struct ib_mr *sig_mr;
+ int access_flags;
+ struct ib_sge *prot;
+};
+
+static inline struct ib_sig_handover_wr *sig_handover_wr(struct ib_send_wr *wr)
+{
+ return container_of(wr, struct ib_sig_handover_wr, wr);
+}
+
struct ib_recv_wr {
struct ib_recv_wr *next;
u64 wr_id;
@@ -1182,6 +1271,7 @@ struct ib_uobject {
int id; /* index into kernel idr */
struct kref ref;
struct rw_semaphore mutex; /* protects .live */
+ struct rcu_head rcu; /* kfree_rcu() overhead */
int live;
};
@@ -1193,9 +1283,11 @@ struct ib_udata {
};
struct ib_pd {
+ u32 local_dma_lkey;
struct ib_device *device;
struct ib_uobject *uobject;
atomic_t usecnt; /* count all resources */
+ struct ib_mr *local_mr;
};
struct ib_xrcd {
@@ -1268,6 +1360,9 @@ struct ib_mr {
struct ib_uobject *uobject;
u32 lkey;
u32 rkey;
+ u64 iova;
+ u32 length;
+ unsigned int page_size;
atomic_t usecnt; /* count number of MWs */
};
@@ -1407,7 +1502,7 @@ struct ib_flow {
struct ib_uobject *uobject;
};
-struct ib_mad;
+struct ib_mad_hdr;
struct ib_grh;
enum ib_process_mad_flags {
@@ -1429,7 +1524,7 @@ struct ib_cache {
rwlock_t lock;
struct ib_event_handler event_handler;
struct ib_pkey_cache **pkey_cache;
- struct ib_gid_cache **gid_cache;
+ struct ib_gid_table **gid_cache;
u8 *lmc_cache;
};
@@ -1474,6 +1569,13 @@ struct ib_dma_mapping_ops {
struct iw_cm_verbs;
+struct ib_port_immutable {
+ int pkey_tbl_len;
+ int gid_tbl_len;
+ u32 core_cap_flags;
+ u32 max_mad_size;
+};
+
struct ib_device {
struct device *dma_device;
@@ -1484,11 +1586,15 @@ struct ib_device {
spinlock_t client_data_lock;
struct list_head core_list;
+ /* Access to the client_data_list is protected by the client_data_lock
+ * spinlock and the lists_rwsem read-write semaphore */
struct list_head client_data_list;
struct ib_cache cache;
- int *pkey_tbl_len;
- int *gid_tbl_len;
+ /**
+ * port_immutable is indexed by port number
+ */
+ struct ib_port_immutable *port_immutable;
int num_comp_vectors;
@@ -1497,15 +1603,54 @@ struct ib_device {
int (*get_protocol_stats)(struct ib_device *device,
union rdma_protocol_stats *stats);
int (*query_device)(struct ib_device *device,
- struct ib_device_attr *device_attr);
+ struct ib_device_attr *device_attr,
+ struct ib_udata *udata);
int (*query_port)(struct ib_device *device,
u8 port_num,
struct ib_port_attr *port_attr);
enum rdma_link_layer (*get_link_layer)(struct ib_device *device,
u8 port_num);
+ /* When calling get_netdev, the HW vendor's driver should return the
+ * net device of device @device at port @port_num or NULL if such
+ * a net device doesn't exist. The vendor driver should call dev_hold
+ * on this net device. The HW vendor's device driver must guarantee
+ * that this function returns NULL before the net device reaches
+ * NETDEV_UNREGISTER_FINAL state.
+ */
+ struct net_device *(*get_netdev)(struct ib_device *device,
+ u8 port_num);
int (*query_gid)(struct ib_device *device,
u8 port_num, int index,
union ib_gid *gid);
+ /* When calling add_gid, the HW vendor's driver should
+ * add the gid of device @device at gid index @index of
+ * port @port_num to be @gid. Meta-info of that gid (for example,
+ * the network device related to this gid is available
+ * at @attr. @context allows the HW vendor driver to store extra
+ * information together with a GID entry. The HW vendor may allocate
+ * memory to contain this information and store it in @context when a
+ * new GID entry is written to. Params are consistent until the next
+ * call of add_gid or delete_gid. The function should return 0 on
+ * success or error otherwise. The function could be called
+ * concurrently for different ports. This function is only called
+ * when roce_gid_table is used.
+ */
+ int (*add_gid)(struct ib_device *device,
+ u8 port_num,
+ unsigned int index,
+ const union ib_gid *gid,
+ const struct ib_gid_attr *attr,
+ void **context);
+ /* When calling del_gid, the HW vendor's driver should delete the
+ * gid of device @device at gid index @index of port @port_num.
+ * Upon the deletion of a GID entry, the HW vendor must free any
+ * allocated memory. The caller will clear @context afterwards.
+ * This function is only called when roce_gid_table is used.
+ */
+ int (*del_gid)(struct ib_device *device,
+ u8 port_num,
+ unsigned int index,
+ void **context);
int (*query_pkey)(struct ib_device *device,
u8 port_num, u16 index, u16 *pkey);
int (*modify_device)(struct ib_device *device,
@@ -1561,8 +1706,8 @@ struct ib_device {
int (*post_recv)(struct ib_qp *qp,
struct ib_recv_wr *recv_wr,
struct ib_recv_wr **bad_recv_wr);
- struct ib_cq * (*create_cq)(struct ib_device *device, int cqe,
- int comp_vector,
+ struct ib_cq * (*create_cq)(struct ib_device *device,
+ const struct ib_cq_init_attr *attr,
struct ib_ucontext *context,
struct ib_udata *udata);
int (*modify_cq)(struct ib_cq *cq, u16 cq_count,
@@ -1599,14 +1744,12 @@ struct ib_device {
int (*query_mr)(struct ib_mr *mr,
struct ib_mr_attr *mr_attr);
int (*dereg_mr)(struct ib_mr *mr);
- int (*destroy_mr)(struct ib_mr *mr);
- struct ib_mr * (*create_mr)(struct ib_pd *pd,
- struct ib_mr_init_attr *mr_init_attr);
- struct ib_mr * (*alloc_fast_reg_mr)(struct ib_pd *pd,
- int max_page_list_len);
- struct ib_fast_reg_page_list * (*alloc_fast_reg_page_list)(struct ib_device *device,
- int page_list_len);
- void (*free_fast_reg_page_list)(struct ib_fast_reg_page_list *page_list);
+ struct ib_mr * (*alloc_mr)(struct ib_pd *pd,
+ enum ib_mr_type mr_type,
+ u32 max_num_sg);
+ int (*map_mr_sg)(struct ib_mr *mr,
+ struct scatterlist *sg,
+ int sg_nents);
int (*rereg_phys_mr)(struct ib_mr *mr,
int mr_rereg_mask,
struct ib_pd *pd,
@@ -1637,10 +1780,13 @@ struct ib_device {
int (*process_mad)(struct ib_device *device,
int process_mad_flags,
u8 port_num,
- struct ib_wc *in_wc,
- struct ib_grh *in_grh,
- struct ib_mad *in_mad,
- struct ib_mad *out_mad);
+ const struct ib_wc *in_wc,
+ const struct ib_grh *in_grh,
+ const struct ib_mad_hdr *in_mad,
+ size_t in_mad_size,
+ struct ib_mad_hdr *out_mad,
+ size_t *out_mad_size,
+ u16 *out_mad_pkey_index);
struct ib_xrcd * (*alloc_xrcd)(struct ib_device *device,
struct ib_ucontext *ucontext,
struct ib_udata *udata);
@@ -1652,6 +1798,7 @@ struct ib_device {
int (*destroy_flow)(struct ib_flow *flow_id);
int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status);
+ void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
struct ib_dma_mapping_ops *dma_ops;
@@ -1673,15 +1820,46 @@ struct ib_device {
char node_desc[64];
__be64 node_guid;
u32 local_dma_lkey;
+ u16 is_switch:1;
u8 node_type;
u8 phys_port_cnt;
+
+ /**
+ * The following mandatory functions are used only at device
+ * registration. Keep functions such as these at the end of this
+ * structure to avoid cache line misses when accessing struct ib_device
+ * in fast paths.
+ */
+ int (*get_port_immutable)(struct ib_device *, u8, struct ib_port_immutable *);
};
struct ib_client {
char *name;
void (*add) (struct ib_device *);
- void (*remove)(struct ib_device *);
-
+ void (*remove)(struct ib_device *, void *client_data);
+
+ /* Returns the net_dev belonging to this ib_client and matching the
+ * given parameters.
+ * @dev: An RDMA device that the net_dev use for communication.
+ * @port: A physical port number on the RDMA device.
+ * @pkey: P_Key that the net_dev uses if applicable.
+ * @gid: A GID that the net_dev uses to communicate.
+ * @addr: An IP address the net_dev is configured with.
+ * @client_data: The device's client data set by ib_set_client_data().
+ *
+ * An ib_client that implements a net_dev on top of RDMA devices
+ * (such as IP over IB) should implement this callback, allowing the
+ * rdma_cm module to find the right net_dev for a given request.
+ *
+ * The caller is responsible for calling dev_put on the returned
+ * netdev. */
+ struct net_device *(*get_net_dev_by_params)(
+ struct ib_device *dev,
+ u8 port,
+ u16 pkey,
+ const union ib_gid *gid,
+ const struct sockaddr *addr,
+ void *client_data);
struct list_head list;
};
@@ -1743,8 +1921,292 @@ int ib_query_port(struct ib_device *device,
enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device,
u8 port_num);
+/**
+ * rdma_cap_ib_switch - Check if the device is IB switch
+ * @device: Device to check
+ *
+ * Device driver is responsible for setting is_switch bit on
+ * in ib_device structure at init time.
+ *
+ * Return: true if the device is IB switch.
+ */
+static inline bool rdma_cap_ib_switch(const struct ib_device *device)
+{
+ return device->is_switch;
+}
+
+/**
+ * rdma_start_port - Return the first valid port number for the device
+ * specified
+ *
+ * @device: Device to be checked
+ *
+ * Return start port number
+ */
+static inline u8 rdma_start_port(const struct ib_device *device)
+{
+ return rdma_cap_ib_switch(device) ? 0 : 1;
+}
+
+/**
+ * rdma_end_port - Return the last valid port number for the device
+ * specified
+ *
+ * @device: Device to be checked
+ *
+ * Return last port number
+ */
+static inline u8 rdma_end_port(const struct ib_device *device)
+{
+ return rdma_cap_ib_switch(device) ? 0 : device->phys_port_cnt;
+}
+
+static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IB;
+}
+
+static inline bool rdma_protocol_roce(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE;
+}
+
+static inline bool rdma_protocol_iwarp(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IWARP;
+}
+
+static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags &
+ (RDMA_CORE_CAP_PROT_IB | RDMA_CORE_CAP_PROT_ROCE);
+}
+
+/**
+ * rdma_cap_ib_mad - Check if the port of a device supports Infiniband
+ * Management Datagrams.
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * Management Datagrams (MAD) are a required part of the InfiniBand
+ * specification and are supported on all InfiniBand devices. A slightly
+ * extended version are also supported on OPA interfaces.
+ *
+ * Return: true if the port supports sending/receiving of MAD packets.
+ */
+static inline bool rdma_cap_ib_mad(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_MAD;
+}
+
+/**
+ * rdma_cap_opa_mad - Check if the port of device provides support for OPA
+ * Management Datagrams.
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * Intel OmniPath devices extend and/or replace the InfiniBand Management
+ * datagrams with their own versions. These OPA MADs share many but not all of
+ * the characteristics of InfiniBand MADs.
+ *
+ * OPA MADs differ in the following ways:
+ *
+ * 1) MADs are variable size up to 2K
+ * IBTA defined MADs remain fixed at 256 bytes
+ * 2) OPA SMPs must carry valid PKeys
+ * 3) OPA SMP packets are a different format
+ *
+ * Return: true if the port supports OPA MAD packet formats.
+ */
+static inline bool rdma_cap_opa_mad(struct ib_device *device, u8 port_num)
+{
+ return (device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_OPA_MAD)
+ == RDMA_CORE_CAP_OPA_MAD;
+}
+
+/**
+ * rdma_cap_ib_smi - Check if the port of a device provides an Infiniband
+ * Subnet Management Agent (SMA) on the Subnet Management Interface (SMI).
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * Each InfiniBand node is required to provide a Subnet Management Agent
+ * that the subnet manager can access. Prior to the fabric being fully
+ * configured by the subnet manager, the SMA is accessed via a well known
+ * interface called the Subnet Management Interface (SMI). This interface
+ * uses directed route packets to communicate with the SM to get around the
+ * chicken and egg problem of the SM needing to know what's on the fabric
+ * in order to configure the fabric, and needing to configure the fabric in
+ * order to send packets to the devices on the fabric. These directed
+ * route packets do not need the fabric fully configured in order to reach
+ * their destination. The SMI is the only method allowed to send
+ * directed route packets on an InfiniBand fabric.
+ *
+ * Return: true if the port provides an SMI.
+ */
+static inline bool rdma_cap_ib_smi(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_SMI;
+}
+
+/**
+ * rdma_cap_ib_cm - Check if the port of device has the capability Infiniband
+ * Communication Manager.
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * The InfiniBand Communication Manager is one of many pre-defined General
+ * Service Agents (GSA) that are accessed via the General Service
+ * Interface (GSI). It's role is to facilitate establishment of connections
+ * between nodes as well as other management related tasks for established
+ * connections.
+ *
+ * Return: true if the port supports an IB CM (this does not guarantee that
+ * a CM is actually running however).
+ */
+static inline bool rdma_cap_ib_cm(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_CM;
+}
+
+/**
+ * rdma_cap_iw_cm - Check if the port of device has the capability IWARP
+ * Communication Manager.
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * Similar to above, but specific to iWARP connections which have a different
+ * managment protocol than InfiniBand.
+ *
+ * Return: true if the port supports an iWARP CM (this does not guarantee that
+ * a CM is actually running however).
+ */
+static inline bool rdma_cap_iw_cm(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IW_CM;
+}
+
+/**
+ * rdma_cap_ib_sa - Check if the port of device has the capability Infiniband
+ * Subnet Administration.
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * An InfiniBand Subnet Administration (SA) service is a pre-defined General
+ * Service Agent (GSA) provided by the Subnet Manager (SM). On InfiniBand
+ * fabrics, devices should resolve routes to other hosts by contacting the
+ * SA to query the proper route.
+ *
+ * Return: true if the port should act as a client to the fabric Subnet
+ * Administration interface. This does not imply that the SA service is
+ * running locally.
+ */
+static inline bool rdma_cap_ib_sa(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_SA;
+}
+
+/**
+ * rdma_cap_ib_mcast - Check if the port of device has the capability Infiniband
+ * Multicast.
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * InfiniBand multicast registration is more complex than normal IPv4 or
+ * IPv6 multicast registration. Each Host Channel Adapter must register
+ * with the Subnet Manager when it wishes to join a multicast group. It
+ * should do so only once regardless of how many queue pairs it subscribes
+ * to this group. And it should leave the group only after all queue pairs
+ * attached to the group have been detached.
+ *
+ * Return: true if the port must undertake the additional adminstrative
+ * overhead of registering/unregistering with the SM and tracking of the
+ * total number of queue pairs attached to the multicast group.
+ */
+static inline bool rdma_cap_ib_mcast(const struct ib_device *device, u8 port_num)
+{
+ return rdma_cap_ib_sa(device, port_num);
+}
+
+/**
+ * rdma_cap_af_ib - Check if the port of device has the capability
+ * Native Infiniband Address.
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * InfiniBand addressing uses a port's GUID + Subnet Prefix to make a default
+ * GID. RoCE uses a different mechanism, but still generates a GID via
+ * a prescribed mechanism and port specific data.
+ *
+ * Return: true if the port uses a GID address to identify devices on the
+ * network.
+ */
+static inline bool rdma_cap_af_ib(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_AF_IB;
+}
+
+/**
+ * rdma_cap_eth_ah - Check if the port of device has the capability
+ * Ethernet Address Handle.
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * RoCE is InfiniBand over Ethernet, and it uses a well defined technique
+ * to fabricate GIDs over Ethernet/IP specific addresses native to the
+ * port. Normally, packet headers are generated by the sending host
+ * adapter, but when sending connectionless datagrams, we must manually
+ * inject the proper headers for the fabric we are communicating over.
+ *
+ * Return: true if we are running as a RoCE port and must force the
+ * addition of a Global Route Header built from our Ethernet Address
+ * Handle into our header list for connectionless packets.
+ */
+static inline bool rdma_cap_eth_ah(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_ETH_AH;
+}
+
+/**
+ * rdma_max_mad_size - Return the max MAD size required by this RDMA Port.
+ *
+ * @device: Device
+ * @port_num: Port number
+ *
+ * This MAD size includes the MAD headers and MAD payload. No other headers
+ * are included.
+ *
+ * Return the max MAD size required by the Port. Will return 0 if the port
+ * does not support MADs
+ */
+static inline size_t rdma_max_mad_size(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].max_mad_size;
+}
+
+/**
+ * rdma_cap_roce_gid_table - Check if the port of device uses roce_gid_table
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * RoCE GID table mechanism manages the various GIDs for a device.
+ *
+ * NOTE: if allocating the port's GID table has failed, this call will still
+ * return true, but any RoCE GID table API will fail.
+ *
+ * Return: true if the port uses RoCE GID table mechanism in order to manage
+ * its GIDs.
+ */
+static inline bool rdma_cap_roce_gid_table(const struct ib_device *device,
+ u8 port_num)
+{
+ return rdma_protocol_roce(device, port_num) &&
+ device->add_gid && device->del_gid;
+}
+
int ib_query_gid(struct ib_device *device,
- u8 port_num, int index, union ib_gid *gid);
+ u8 port_num, int index, union ib_gid *gid,
+ struct ib_gid_attr *attr);
int ib_query_pkey(struct ib_device *device,
u8 port_num, u16 index, u16 *pkey);
@@ -1758,25 +2220,14 @@ int ib_modify_port(struct ib_device *device,
struct ib_port_modify *port_modify);
int ib_find_gid(struct ib_device *device, union ib_gid *gid,
- u8 *port_num, u16 *index);
+ struct net_device *ndev, u8 *port_num, u16 *index);
int ib_find_pkey(struct ib_device *device,
u8 port_num, u16 pkey, u16 *index);
-/**
- * ib_alloc_pd - Allocates an unused protection domain.
- * @device: The device on which to allocate the protection domain.
- *
- * A protection domain object provides an association between QPs, shared
- * receive queues, address handles, memory regions, and memory windows.
- */
struct ib_pd *ib_alloc_pd(struct ib_device *device);
-/**
- * ib_dealloc_pd - Deallocates a protection domain.
- * @pd: The protection domain to deallocate.
- */
-int ib_dealloc_pd(struct ib_pd *pd);
+void ib_dealloc_pd(struct ib_pd *pd);
/**
* ib_create_ah - Creates an address handle for the given address vector.
@@ -1799,8 +2250,9 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
* @ah_attr: Returned attributes that can be used when creating an address
* handle for replying to the message.
*/
-int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,
- struct ib_grh *grh, struct ib_ah_attr *ah_attr);
+int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
+ const struct ib_wc *wc, const struct ib_grh *grh,
+ struct ib_ah_attr *ah_attr);
/**
* ib_create_ah_from_wc - Creates an address handle associated with the
@@ -1814,8 +2266,8 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,
* The address handle is used to reference a local or global destination
* in all UD QP post sends.
*/
-struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc,
- struct ib_grh *grh, u8 port_num);
+struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
+ const struct ib_grh *grh, u8 port_num);
/**
* ib_modify_ah - Modifies the address vector associated with an address
@@ -2011,16 +2463,15 @@ static inline int ib_post_recv(struct ib_qp *qp,
* asynchronous event not associated with a completion occurs on the CQ.
* @cq_context: Context associated with the CQ returned to the user via
* the associated completion and event handlers.
- * @cqe: The minimum size of the CQ.
- * @comp_vector - Completion vector used to signal completion events.
- * Must be >= 0 and < context->num_comp_vectors.
+ * @cq_attr: The attributes the CQ should be created upon.
*
* Users can examine the cq structure to determine the actual CQ size.
*/
struct ib_cq *ib_create_cq(struct ib_device *device,
ib_comp_handler comp_handler,
void (*event_handler)(struct ib_event *, void *),
- void *cq_context, int cqe, int comp_vector);
+ void *cq_context,
+ const struct ib_cq_init_attr *cq_attr);
/**
* ib_resize_cq - Modifies the capacity of the CQ.
@@ -2388,52 +2839,6 @@ static inline void ib_dma_free_coherent(struct ib_device *dev,
}
/**
- * ib_reg_phys_mr - Prepares a virtually addressed memory region for use
- * by an HCA.
- * @pd: The protection domain associated assigned to the registered region.
- * @phys_buf_array: Specifies a list of physical buffers to use in the
- * memory region.
- * @num_phys_buf: Specifies the size of the phys_buf_array.
- * @mr_access_flags: Specifies the memory access rights.
- * @iova_start: The offset of the region's starting I/O virtual address.
- */
-struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd,
- struct ib_phys_buf *phys_buf_array,
- int num_phys_buf,
- int mr_access_flags,
- u64 *iova_start);
-
-/**
- * ib_rereg_phys_mr - Modifies the attributes of an existing memory region.
- * Conceptually, this call performs the functions deregister memory region
- * followed by register physical memory region. Where possible,
- * resources are reused instead of deallocated and reallocated.
- * @mr: The memory region to modify.
- * @mr_rereg_mask: A bit-mask used to indicate which of the following
- * properties of the memory region are being modified.
- * @pd: If %IB_MR_REREG_PD is set in mr_rereg_mask, this field specifies
- * the new protection domain to associated with the memory region,
- * otherwise, this parameter is ignored.
- * @phys_buf_array: If %IB_MR_REREG_TRANS is set in mr_rereg_mask, this
- * field specifies a list of physical buffers to use in the new
- * translation, otherwise, this parameter is ignored.
- * @num_phys_buf: If %IB_MR_REREG_TRANS is set in mr_rereg_mask, this
- * field specifies the size of the phys_buf_array, otherwise, this
- * parameter is ignored.
- * @mr_access_flags: If %IB_MR_REREG_ACCESS is set in mr_rereg_mask, this
- * field specifies the new memory access rights, otherwise, this
- * parameter is ignored.
- * @iova_start: The offset of the region's starting I/O virtual address.
- */
-int ib_rereg_phys_mr(struct ib_mr *mr,
- int mr_rereg_mask,
- struct ib_pd *pd,
- struct ib_phys_buf *phys_buf_array,
- int num_phys_buf,
- int mr_access_flags,
- u64 *iova_start);
-
-/**
* ib_query_mr - Retrieves information about a specific memory region.
* @mr: The memory region to retrieve information about.
* @mr_attr: The attributes of the specified memory region.
@@ -2449,60 +2854,9 @@ int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
*/
int ib_dereg_mr(struct ib_mr *mr);
-
-/**
- * ib_create_mr - Allocates a memory region that may be used for
- * signature handover operations.
- * @pd: The protection domain associated with the region.
- * @mr_init_attr: memory region init attributes.
- */
-struct ib_mr *ib_create_mr(struct ib_pd *pd,
- struct ib_mr_init_attr *mr_init_attr);
-
-/**
- * ib_destroy_mr - Destroys a memory region that was created using
- * ib_create_mr and removes it from HW translation tables.
- * @mr: The memory region to destroy.
- *
- * This function can fail, if the memory region has memory windows bound to it.
- */
-int ib_destroy_mr(struct ib_mr *mr);
-
-/**
- * ib_alloc_fast_reg_mr - Allocates memory region usable with the
- * IB_WR_FAST_REG_MR send work request.
- * @pd: The protection domain associated with the region.
- * @max_page_list_len: requested max physical buffer list length to be
- * used with fast register work requests for this MR.
- */
-struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len);
-
-/**
- * ib_alloc_fast_reg_page_list - Allocates a page list array
- * @device - ib device pointer.
- * @page_list_len - size of the page list array to be allocated.
- *
- * This allocates and returns a struct ib_fast_reg_page_list * and a
- * page_list array that is at least page_list_len in size. The actual
- * size is returned in max_page_list_len. The caller is responsible
- * for initializing the contents of the page_list array before posting
- * a send work request with the IB_WC_FAST_REG_MR opcode.
- *
- * The page_list array entries must be translated using one of the
- * ib_dma_*() functions just like the addresses passed to
- * ib_map_phys_fmr(). Once the ib_post_send() is issued, the struct
- * ib_fast_reg_page_list must not be modified by the caller until the
- * IB_WC_FAST_REG_MR work request completes.
- */
-struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(
- struct ib_device *device, int page_list_len);
-
-/**
- * ib_free_fast_reg_page_list - Deallocates a previously allocated
- * page list array.
- * @page_list - struct ib_fast_reg_page_list pointer to be deallocated.
- */
-void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
+struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
+ enum ib_mr_type mr_type,
+ u32 max_num_sg);
/**
* ib_update_fast_reg_key - updates the key portion of the fast_reg MR
@@ -2668,4 +3022,32 @@ static inline int ib_check_mr_access(int flags)
int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status);
+struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port,
+ u16 pkey, const union ib_gid *gid,
+ const struct sockaddr *addr);
+
+int ib_map_mr_sg(struct ib_mr *mr,
+ struct scatterlist *sg,
+ int sg_nents,
+ unsigned int page_size);
+
+static inline int
+ib_map_mr_sg_zbva(struct ib_mr *mr,
+ struct scatterlist *sg,
+ int sg_nents,
+ unsigned int page_size)
+{
+ int n;
+
+ n = ib_map_mr_sg(mr, sg, sg_nents, page_size);
+ mr->iova = 0;
+
+ return n;
+}
+
+int ib_sg_to_pages(struct ib_mr *mr,
+ struct scatterlist *sgl,
+ int sg_nents,
+ int (*set_page)(struct ib_mr *, u64));
+
#endif /* IB_VERBS_H */