diff options
Diffstat (limited to 'qemu/roms/ipxe/src/net/infiniband')
-rw-r--r-- | qemu/roms/ipxe/src/net/infiniband/ib_cm.c | 496 | ||||
-rw-r--r-- | qemu/roms/ipxe/src/net/infiniband/ib_cmrc.c | 445 | ||||
-rw-r--r-- | qemu/roms/ipxe/src/net/infiniband/ib_mcast.c | 213 | ||||
-rw-r--r-- | qemu/roms/ipxe/src/net/infiniband/ib_mi.c | 415 | ||||
-rw-r--r-- | qemu/roms/ipxe/src/net/infiniband/ib_packet.c | 249 | ||||
-rw-r--r-- | qemu/roms/ipxe/src/net/infiniband/ib_pathrec.c | 289 | ||||
-rw-r--r-- | qemu/roms/ipxe/src/net/infiniband/ib_sma.c | 371 | ||||
-rw-r--r-- | qemu/roms/ipxe/src/net/infiniband/ib_smc.c | 256 | ||||
-rw-r--r-- | qemu/roms/ipxe/src/net/infiniband/ib_srp.c | 581 |
9 files changed, 3315 insertions, 0 deletions
diff --git a/qemu/roms/ipxe/src/net/infiniband/ib_cm.c b/qemu/roms/ipxe/src/net/infiniband/ib_cm.c new file mode 100644 index 000000000..797639bc8 --- /dev/null +++ b/qemu/roms/ipxe/src/net/infiniband/ib_cm.c @@ -0,0 +1,496 @@ +/* + * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <byteswap.h> +#include <errno.h> +#include <assert.h> +#include <ipxe/infiniband.h> +#include <ipxe/ib_mi.h> +#include <ipxe/ib_pathrec.h> +#include <ipxe/ib_cm.h> + +/** + * @file + * + * Infiniband communication management + * + */ + +/** List of connections */ +static LIST_HEAD ( ib_cm_conns ); + +/** + * Find connection by local communication ID + * + * @v local_id Local communication ID + * @ret conn Connection, or NULL + */ +static struct ib_connection * ib_cm_find ( uint32_t local_id ) { + struct ib_connection *conn; + + list_for_each_entry ( conn, &ib_cm_conns, list ) { + if ( conn->local_id == local_id ) + return conn; + } + return NULL; +} + +/** + * Send "ready to use" response + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v av Address vector + * @v local_id Local communication ID + * @v remote_id Remote communication ID + * @ret rc Return status code + */ +static int ib_cm_send_rtu ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + struct ib_address_vector *av, + uint32_t local_id, uint32_t remote_id ) { + union ib_mad mad; + struct ib_cm_ready_to_use *rtu = &mad.cm.cm_data.ready_to_use; + int rc; + + /* Construct "ready to use" response */ + memset ( &mad, 0, sizeof ( mad ) ); + mad.hdr.mgmt_class = IB_MGMT_CLASS_CM; + mad.hdr.class_version = IB_CM_CLASS_VERSION; + mad.hdr.method = IB_MGMT_METHOD_SEND; + mad.hdr.attr_id = htons ( IB_CM_ATTR_READY_TO_USE ); + rtu->local_id = htonl ( local_id ); + rtu->remote_id = htonl ( remote_id ); + if ( ( rc = ib_mi_send ( ibdev, mi, &mad, av ) ) != 0 ){ + DBG ( "CM could not send RTU: %s\n", strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Handle duplicate connection replies + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @v av Source address vector + * @ret rc Return status code + * + * If a "ready to use" MAD is lost, the peer may resend the connection + * reply. We have to respond to these with duplicate "ready to use" + * MADs, otherwise the peer may time out and drop the connection. + */ +static void ib_cm_recv_rep ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad, + struct ib_address_vector *av ) { + struct ib_cm_connect_reply *rep = &mad->cm.cm_data.connect_reply; + struct ib_connection *conn; + uint32_t local_id = ntohl ( rep->remote_id ); + int rc; + + /* Identify connection */ + conn = ib_cm_find ( local_id ); + if ( conn ) { + /* Try to send "ready to use" reply */ + if ( ( rc = ib_cm_send_rtu ( ibdev, mi, av, conn->local_id, + conn->remote_id ) ) != 0 ) { + /* Ignore errors; the remote end will retry */ + } + } else { + DBG ( "CM unidentified connection %08x\n", local_id ); + } +} + +/** + * Send reply to disconnection request + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v av Address vector + * @v local_id Local communication ID + * @v remote_id Remote communication ID + * @ret rc Return status code + */ +static int ib_cm_send_drep ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + struct ib_address_vector *av, + uint32_t local_id, uint32_t remote_id ) { + union ib_mad mad; + struct ib_cm_disconnect_reply *drep = &mad.cm.cm_data.disconnect_reply; + int rc; + + /* Construct reply to disconnection request */ + memset ( &mad, 0, sizeof ( mad ) ); + mad.hdr.mgmt_class = IB_MGMT_CLASS_CM; + mad.hdr.class_version = IB_CM_CLASS_VERSION; + mad.hdr.method = IB_MGMT_METHOD_SEND; + mad.hdr.attr_id = htons ( IB_CM_ATTR_DISCONNECT_REPLY ); + drep->local_id = htonl ( local_id ); + drep->remote_id = htonl ( remote_id ); + if ( ( rc = ib_mi_send ( ibdev, mi, &mad, av ) ) != 0 ){ + DBG ( "CM could not send DREP: %s\n", strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Handle disconnection requests + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @v av Source address vector + * @ret rc Return status code + */ +static void ib_cm_recv_dreq ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad, + struct ib_address_vector *av ) { + struct ib_cm_disconnect_request *dreq = + &mad->cm.cm_data.disconnect_request; + struct ib_connection *conn; + uint32_t local_id = ntohl ( dreq->remote_id ); + uint32_t remote_id = ntohl ( dreq->local_id ); + int rc; + + /* Identify connection */ + conn = ib_cm_find ( local_id ); + if ( conn ) { + /* Notify upper layer */ + conn->op->changed ( ibdev, conn->qp, conn, -ENOTCONN, + &dreq->private_data, + sizeof ( dreq->private_data ) ); + } else { + DBG ( "CM unidentified connection %08x\n", local_id ); + } + + /* Send reply */ + if ( ( rc = ib_cm_send_drep ( ibdev, mi, av, local_id, + remote_id ) ) != 0 ) { + /* Ignore errors; the remote end will retry */ + } +}; + +/** Communication management agents */ +struct ib_mad_agent ib_cm_agent[] __ib_mad_agent = { + { + .mgmt_class = IB_MGMT_CLASS_CM, + .class_version = IB_CM_CLASS_VERSION, + .attr_id = htons ( IB_CM_ATTR_CONNECT_REPLY ), + .handle = ib_cm_recv_rep, + }, + { + .mgmt_class = IB_MGMT_CLASS_CM, + .class_version = IB_CM_CLASS_VERSION, + .attr_id = htons ( IB_CM_ATTR_DISCONNECT_REQUEST ), + .handle = ib_cm_recv_dreq, + }, +}; + +/** + * Convert connection rejection reason to return status code + * + * @v reason Rejection reason (in network byte order) + * @ret rc Return status code + */ +static int ib_cm_rejection_reason_to_rc ( uint16_t reason ) { + switch ( reason ) { + case htons ( IB_CM_REJECT_BAD_SERVICE_ID ) : + return -ENODEV; + case htons ( IB_CM_REJECT_STALE_CONN ) : + return -EALREADY; + case htons ( IB_CM_REJECT_CONSUMER ) : + return -ENOTTY; + default: + return -EPERM; + } +} + +/** + * Handle connection request transaction completion + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v madx Management transaction + * @v rc Status code + * @v mad Received MAD (or NULL on error) + * @v av Source address vector (or NULL on error) + */ +static void ib_cm_req_complete ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + struct ib_mad_transaction *madx, + int rc, union ib_mad *mad, + struct ib_address_vector *av ) { + struct ib_connection *conn = ib_madx_get_ownerdata ( madx ); + struct ib_queue_pair *qp = conn->qp; + struct ib_cm_common *common = &mad->cm.cm_data.common; + struct ib_cm_connect_reply *rep = &mad->cm.cm_data.connect_reply; + struct ib_cm_connect_reject *rej = &mad->cm.cm_data.connect_reject; + void *private_data = NULL; + size_t private_data_len = 0; + + /* Report failures */ + if ( ( rc == 0 ) && ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) )) + rc = -EIO; + if ( rc != 0 ) { + DBGC ( conn, "CM %p connection request failed: %s\n", + conn, strerror ( rc ) ); + goto out; + } + + /* Record remote communication ID */ + conn->remote_id = ntohl ( common->local_id ); + + /* Handle response */ + switch ( mad->hdr.attr_id ) { + + case htons ( IB_CM_ATTR_CONNECT_REPLY ) : + /* Extract fields */ + qp->av.qpn = ( ntohl ( rep->local_qpn ) >> 8 ); + qp->send.psn = ( ntohl ( rep->starting_psn ) >> 8 ); + private_data = &rep->private_data; + private_data_len = sizeof ( rep->private_data ); + DBGC ( conn, "CM %p connected to QPN %lx PSN %x\n", + conn, qp->av.qpn, qp->send.psn ); + + /* Modify queue pair */ + if ( ( rc = ib_modify_qp ( ibdev, qp ) ) != 0 ) { + DBGC ( conn, "CM %p could not modify queue pair: %s\n", + conn, strerror ( rc ) ); + goto out; + } + + /* Send "ready to use" reply */ + if ( ( rc = ib_cm_send_rtu ( ibdev, mi, av, conn->local_id, + conn->remote_id ) ) != 0 ) { + /* Treat as non-fatal */ + rc = 0; + } + break; + + case htons ( IB_CM_ATTR_CONNECT_REJECT ) : + /* Extract fields */ + DBGC ( conn, "CM %p connection rejected (reason %d)\n", + conn, ntohs ( rej->reason ) ); + /* Private data is valid only for a Consumer Reject */ + if ( rej->reason == htons ( IB_CM_REJECT_CONSUMER ) ) { + private_data = &rej->private_data; + private_data_len = sizeof ( rej->private_data ); + } + rc = ib_cm_rejection_reason_to_rc ( rej->reason ); + break; + + default: + DBGC ( conn, "CM %p unexpected response (attribute %04x)\n", + conn, ntohs ( mad->hdr.attr_id ) ); + rc = -ENOTSUP; + break; + } + + out: + /* Destroy the completed transaction */ + ib_destroy_madx ( ibdev, ibdev->gsi, madx ); + conn->madx = NULL; + + /* Hand off to the upper completion handler */ + conn->op->changed ( ibdev, qp, conn, rc, private_data, + private_data_len ); +} + +/** Connection request operations */ +static struct ib_mad_transaction_operations ib_cm_req_op = { + .complete = ib_cm_req_complete, +}; + +/** + * Handle connection path transaction completion + * + * @v ibdev Infiniband device + * @v path Path + * @v rc Status code + * @v av Address vector, or NULL on error + */ +static void ib_cm_path_complete ( struct ib_device *ibdev, + struct ib_path *path, int rc, + struct ib_address_vector *av ) { + struct ib_connection *conn = ib_path_get_ownerdata ( path ); + struct ib_queue_pair *qp = conn->qp; + union ib_mad mad; + struct ib_cm_connect_request *req = &mad.cm.cm_data.connect_request; + size_t private_data_len; + + /* Report failures */ + if ( rc != 0 ) { + DBGC ( conn, "CM %p path lookup failed: %s\n", + conn, strerror ( rc ) ); + conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 ); + goto out; + } + + /* Update queue pair peer path */ + memcpy ( &qp->av, av, sizeof ( qp->av ) ); + + /* Construct connection request */ + memset ( &mad, 0, sizeof ( mad ) ); + mad.hdr.mgmt_class = IB_MGMT_CLASS_CM; + mad.hdr.class_version = IB_CM_CLASS_VERSION; + mad.hdr.method = IB_MGMT_METHOD_SEND; + mad.hdr.attr_id = htons ( IB_CM_ATTR_CONNECT_REQUEST ); + req->local_id = htonl ( conn->local_id ); + memcpy ( &req->service_id, &conn->service_id, + sizeof ( req->service_id ) ); + memcpy ( &req->local_ca, &ibdev->node_guid, sizeof ( req->local_ca ) ); + req->local_qpn__responder_resources = htonl ( ( qp->qpn << 8 ) | 1 ); + req->local_eecn__initiator_depth = htonl ( ( 0 << 8 ) | 1 ); + req->remote_eecn__remote_timeout__service_type__ee_flow_ctrl = + htonl ( ( 0x14 << 3 ) | ( IB_CM_TRANSPORT_RC << 1 ) | + ( 0 << 0 ) ); + req->starting_psn__local_timeout__retry_count = + htonl ( ( qp->recv.psn << 8 ) | ( 0x14 << 3 ) | + ( 0x07 << 0 ) ); + req->pkey = htons ( ibdev->pkey ); + req->payload_mtu__rdc_exists__rnr_retry = + ( ( IB_MTU_2048 << 4 ) | ( 1 << 3 ) | ( 0x07 << 0 ) ); + req->max_cm_retries__srq = ( ( 0x0f << 4 ) | ( 0 << 3 ) ); + req->primary.local_lid = htons ( ibdev->lid ); + req->primary.remote_lid = htons ( conn->qp->av.lid ); + memcpy ( &req->primary.local_gid, &ibdev->gid, + sizeof ( req->primary.local_gid ) ); + memcpy ( &req->primary.remote_gid, &conn->qp->av.gid, + sizeof ( req->primary.remote_gid ) ); + req->primary.flow_label__rate = + htonl ( ( 0 << 12 ) | ( conn->qp->av.rate << 0 ) ); + req->primary.hop_limit = 0; + req->primary.sl__subnet_local = + ( ( conn->qp->av.sl << 4 ) | ( 1 << 3 ) ); + req->primary.local_ack_timeout = ( 0x13 << 3 ); + private_data_len = conn->private_data_len; + if ( private_data_len > sizeof ( req->private_data ) ) + private_data_len = sizeof ( req->private_data ); + memcpy ( &req->private_data, &conn->private_data, private_data_len ); + + /* Create connection request */ + av->qpn = IB_QPN_GSI; + av->qkey = IB_QKEY_GSI; + conn->madx = ib_create_madx ( ibdev, ibdev->gsi, &mad, av, + &ib_cm_req_op ); + if ( ! conn->madx ) { + DBGC ( conn, "CM %p could not create connection request\n", + conn ); + conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 ); + goto out; + } + ib_madx_set_ownerdata ( conn->madx, conn ); + + out: + /* Destroy the completed transaction */ + ib_destroy_path ( ibdev, path ); + conn->path = NULL; +} + +/** Connection path operations */ +static struct ib_path_operations ib_cm_path_op = { + .complete = ib_cm_path_complete, +}; + +/** + * Create connection to remote QP + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v dgid Target GID + * @v service_id Target service ID + * @v private_data Connection request private data + * @v private_data_len Length of connection request private data + * @v op Connection operations + * @ret conn Connection + */ +struct ib_connection * +ib_create_conn ( struct ib_device *ibdev, struct ib_queue_pair *qp, + union ib_gid *dgid, union ib_guid *service_id, + void *private_data, size_t private_data_len, + struct ib_connection_operations *op ) { + struct ib_connection *conn; + + /* Allocate and initialise request */ + conn = zalloc ( sizeof ( *conn ) + private_data_len ); + if ( ! conn ) + goto err_alloc_conn; + conn->ibdev = ibdev; + conn->qp = qp; + memset ( &qp->av, 0, sizeof ( qp->av ) ); + qp->av.gid_present = 1; + memcpy ( &qp->av.gid, dgid, sizeof ( qp->av.gid ) ); + conn->local_id = random(); + memcpy ( &conn->service_id, service_id, sizeof ( conn->service_id ) ); + conn->op = op; + conn->private_data_len = private_data_len; + memcpy ( &conn->private_data, private_data, private_data_len ); + + /* Create path */ + conn->path = ib_create_path ( ibdev, &qp->av, &ib_cm_path_op ); + if ( ! conn->path ) + goto err_create_path; + ib_path_set_ownerdata ( conn->path, conn ); + + /* Add to list of connections */ + list_add ( &conn->list, &ib_cm_conns ); + + DBGC ( conn, "CM %p created for IBDEV %p QPN %lx\n", + conn, ibdev, qp->qpn ); + DBGC ( conn, "CM %p connecting to " IB_GID_FMT " " IB_GUID_FMT "\n", + conn, IB_GID_ARGS ( dgid ), IB_GUID_ARGS ( service_id ) ); + + return conn; + + ib_destroy_path ( ibdev, conn->path ); + err_create_path: + free ( conn ); + err_alloc_conn: + return NULL; +} + +/** + * Destroy connection to remote QP + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v conn Connection + */ +void ib_destroy_conn ( struct ib_device *ibdev, + struct ib_queue_pair *qp __unused, + struct ib_connection *conn ) { + + list_del ( &conn->list ); + if ( conn->madx ) + ib_destroy_madx ( ibdev, ibdev->gsi, conn->madx ); + if ( conn->path ) + ib_destroy_path ( ibdev, conn->path ); + free ( conn ); +} diff --git a/qemu/roms/ipxe/src/net/infiniband/ib_cmrc.c b/qemu/roms/ipxe/src/net/infiniband/ib_cmrc.c new file mode 100644 index 000000000..1cc0fcfef --- /dev/null +++ b/qemu/roms/ipxe/src/net/infiniband/ib_cmrc.c @@ -0,0 +1,445 @@ +/* + * Copyright (C) 2009 Fen Systems Ltd <mbrown@fensystems.co.uk>. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +FILE_LICENCE ( BSD2 ); + +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <ipxe/iobuf.h> +#include <ipxe/xfer.h> +#include <ipxe/process.h> +#include <ipxe/infiniband.h> +#include <ipxe/ib_cm.h> +#include <ipxe/ib_cmrc.h> + +/** + * @file + * + * Infiniband Communication-managed Reliable Connections + * + */ + +/** CMRC number of send WQEs + * + * This is a policy decision. + */ +#define IB_CMRC_NUM_SEND_WQES 4 + +/** CMRC number of receive WQEs + * + * This is a policy decision. + */ +#define IB_CMRC_NUM_RECV_WQES 2 + +/** CMRC number of completion queue entries + * + * This is a policy decision + */ +#define IB_CMRC_NUM_CQES 8 + +/** An Infiniband Communication-Managed Reliable Connection */ +struct ib_cmrc_connection { + /** Reference count */ + struct refcnt refcnt; + /** Data transfer interface */ + struct interface xfer; + /** Infiniband device */ + struct ib_device *ibdev; + /** Completion queue */ + struct ib_completion_queue *cq; + /** Queue pair */ + struct ib_queue_pair *qp; + /** Connection */ + struct ib_connection *conn; + /** Destination GID */ + union ib_gid dgid; + /** Service ID */ + union ib_guid service_id; + /** QP is connected */ + int connected; + /** Shutdown process */ + struct process shutdown; +}; + +/** + * Shut down CMRC connection gracefully + * + * @v cmrc Communication-Managed Reliable Connection + * + * The Infiniband data structures are not reference-counted or + * guarded. It is therefore unsafe to shut them down while we may be + * in the middle of a callback from the Infiniband stack (e.g. in a + * receive completion handler). + * + * This shutdown process will run some time after the call to + * ib_cmrc_close(), after control has returned out of the Infiniband + * core, and will shut down the Infiniband interfaces cleanly. + * + * The shutdown process holds an implicit reference on the CMRC + * connection, ensuring that the structure is not freed before the + * shutdown process has run. + */ +static void ib_cmrc_shutdown ( struct ib_cmrc_connection *cmrc ) { + + DBGC ( cmrc, "CMRC %p shutting down\n", cmrc ); + + /* Shut down Infiniband interface */ + ib_destroy_conn ( cmrc->ibdev, cmrc->qp, cmrc->conn ); + ib_destroy_qp ( cmrc->ibdev, cmrc->qp ); + ib_destroy_cq ( cmrc->ibdev, cmrc->cq ); + ib_close ( cmrc->ibdev ); + + /* Drop the remaining reference */ + ref_put ( &cmrc->refcnt ); +} + +/** + * Close CMRC connection + * + * @v cmrc Communication-Managed Reliable Connection + * @v rc Reason for close + */ +static void ib_cmrc_close ( struct ib_cmrc_connection *cmrc, int rc ) { + + /* Close data transfer interface */ + intf_shutdown ( &cmrc->xfer, rc ); + + /* Schedule shutdown process */ + process_add ( &cmrc->shutdown ); +} + +/** + * Handle change of CMRC connection status + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v conn Connection + * @v rc_cm Connection status code + * @v private_data Private data, if available + * @v private_data_len Length of private data + */ +static void ib_cmrc_changed ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_connection *conn __unused, int rc_cm, + void *private_data, size_t private_data_len ) { + struct ib_cmrc_connection *cmrc = ib_qp_get_ownerdata ( qp ); + int rc_xfer; + + /* Record connection status */ + if ( rc_cm == 0 ) { + DBGC ( cmrc, "CMRC %p connected\n", cmrc ); + cmrc->connected = 1; + } else { + DBGC ( cmrc, "CMRC %p disconnected: %s\n", + cmrc, strerror ( rc_cm ) ); + cmrc->connected = 0; + } + + /* Pass up any private data */ + DBGC2 ( cmrc, "CMRC %p received private data:\n", cmrc ); + DBGC2_HDA ( cmrc, 0, private_data, private_data_len ); + if ( private_data && + ( rc_xfer = xfer_deliver_raw ( &cmrc->xfer, private_data, + private_data_len ) ) != 0 ) { + DBGC ( cmrc, "CMRC %p could not deliver private data: %s\n", + cmrc, strerror ( rc_xfer ) ); + ib_cmrc_close ( cmrc, rc_xfer ); + return; + } + + /* Notify upper connection of window change */ + xfer_window_changed ( &cmrc->xfer ); + + /* If we are disconnected, close the upper connection */ + if ( rc_cm != 0 ) { + ib_cmrc_close ( cmrc, rc_cm ); + return; + } +} + +/** CMRC connection operations */ +static struct ib_connection_operations ib_cmrc_conn_op = { + .changed = ib_cmrc_changed, +}; + +/** + * Handle CMRC send completion + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v iobuf I/O buffer + * @v rc Completion status code + */ +static void ib_cmrc_complete_send ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct io_buffer *iobuf, int rc ) { + struct ib_cmrc_connection *cmrc = ib_qp_get_ownerdata ( qp ); + + /* Free the completed I/O buffer */ + free_iob ( iobuf ); + + /* Close the connection on any send errors */ + if ( rc != 0 ) { + DBGC ( cmrc, "CMRC %p send error: %s\n", + cmrc, strerror ( rc ) ); + ib_cmrc_close ( cmrc, rc ); + return; + } +} + +/** + * Handle CMRC receive completion + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v dest Destination address vector, or NULL + * @v source Source address vector, or NULL + * @v iobuf I/O buffer + * @v rc Completion status code + */ +static void ib_cmrc_complete_recv ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_address_vector *dest __unused, + struct ib_address_vector *source __unused, + struct io_buffer *iobuf, int rc ) { + struct ib_cmrc_connection *cmrc = ib_qp_get_ownerdata ( qp ); + + /* Close the connection on any receive errors */ + if ( rc != 0 ) { + DBGC ( cmrc, "CMRC %p receive error: %s\n", + cmrc, strerror ( rc ) ); + free_iob ( iobuf ); + ib_cmrc_close ( cmrc, rc ); + return; + } + + DBGC2 ( cmrc, "CMRC %p received:\n", cmrc ); + DBGC2_HDA ( cmrc, 0, iobuf->data, iob_len ( iobuf ) ); + + /* Pass up data */ + if ( ( rc = xfer_deliver_iob ( &cmrc->xfer, iobuf ) ) != 0 ) { + DBGC ( cmrc, "CMRC %p could not deliver data: %s\n", + cmrc, strerror ( rc ) ); + ib_cmrc_close ( cmrc, rc ); + return; + } +} + +/** Infiniband CMRC completion operations */ +static struct ib_completion_queue_operations ib_cmrc_completion_ops = { + .complete_send = ib_cmrc_complete_send, + .complete_recv = ib_cmrc_complete_recv, +}; + +/** Infiniband CMRC queue pair operations */ +static struct ib_queue_pair_operations ib_cmrc_queue_pair_ops = { + .alloc_iob = alloc_iob, +}; + +/** + * Send data via CMRC + * + * @v cmrc CMRC connection + * @v iobuf Datagram I/O buffer + * @v meta Data transfer metadata + * @ret rc Return status code + */ +static int ib_cmrc_xfer_deliver ( struct ib_cmrc_connection *cmrc, + struct io_buffer *iobuf, + struct xfer_metadata *meta __unused ) { + int rc; + + /* If no connection has yet been attempted, send this datagram + * as the CM REQ private data. Otherwise, send it via the QP. + */ + if ( ! cmrc->connected ) { + + /* Abort if we have already sent a CM connection request */ + if ( cmrc->conn ) { + DBGC ( cmrc, "CMRC %p attempt to send before " + "connection is complete\n", cmrc ); + rc = -EIO; + goto out; + } + + /* Send via CM connection request */ + cmrc->conn = ib_create_conn ( cmrc->ibdev, cmrc->qp, + &cmrc->dgid, &cmrc->service_id, + iobuf->data, iob_len ( iobuf ), + &ib_cmrc_conn_op ); + if ( ! cmrc->conn ) { + DBGC ( cmrc, "CMRC %p could not connect\n", cmrc ); + rc = -ENOMEM; + goto out; + } + + } else { + + /* Send via QP */ + if ( ( rc = ib_post_send ( cmrc->ibdev, cmrc->qp, NULL, + iob_disown ( iobuf ) ) ) != 0 ) { + DBGC ( cmrc, "CMRC %p could not send: %s\n", + cmrc, strerror ( rc ) ); + goto out; + } + + } + return 0; + + out: + /* Free the I/O buffer if necessary */ + free_iob ( iobuf ); + + /* Close the connection on any errors */ + if ( rc != 0 ) + ib_cmrc_close ( cmrc, rc ); + + return rc; +} + +/** + * Check CMRC flow control window + * + * @v cmrc CMRC connection + * @ret len Length of window + */ +static size_t ib_cmrc_xfer_window ( struct ib_cmrc_connection *cmrc ) { + + /* We indicate a window only when we are successfully + * connected. + */ + return ( cmrc->connected ? IB_MAX_PAYLOAD_SIZE : 0 ); +} + +/** + * Identify device underlying CMRC connection + * + * @v cmrc CMRC connection + * @ret device Underlying device + */ +static struct device * +ib_cmrc_identify_device ( struct ib_cmrc_connection *cmrc ) { + return cmrc->ibdev->dev; +} + +/** CMRC data transfer interface operations */ +static struct interface_operation ib_cmrc_xfer_operations[] = { + INTF_OP ( xfer_deliver, struct ib_cmrc_connection *, + ib_cmrc_xfer_deliver ), + INTF_OP ( xfer_window, struct ib_cmrc_connection *, + ib_cmrc_xfer_window ), + INTF_OP ( intf_close, struct ib_cmrc_connection *, ib_cmrc_close ), + INTF_OP ( identify_device, struct ib_cmrc_connection *, + ib_cmrc_identify_device ), +}; + +/** CMRC data transfer interface descriptor */ +static struct interface_descriptor ib_cmrc_xfer_desc = + INTF_DESC ( struct ib_cmrc_connection, xfer, ib_cmrc_xfer_operations ); + +/** CMRC shutdown process descriptor */ +static struct process_descriptor ib_cmrc_shutdown_desc = + PROC_DESC_ONCE ( struct ib_cmrc_connection, shutdown, + ib_cmrc_shutdown ); + +/** + * Open CMRC connection + * + * @v xfer Data transfer interface + * @v ibdev Infiniband device + * @v dgid Destination GID + * @v service_id Service ID + * @ret rc Returns status code + */ +int ib_cmrc_open ( struct interface *xfer, struct ib_device *ibdev, + union ib_gid *dgid, union ib_guid *service_id ) { + struct ib_cmrc_connection *cmrc; + int rc; + + /* Allocate and initialise structure */ + cmrc = zalloc ( sizeof ( *cmrc ) ); + if ( ! cmrc ) { + rc = -ENOMEM; + goto err_alloc; + } + ref_init ( &cmrc->refcnt, NULL ); + intf_init ( &cmrc->xfer, &ib_cmrc_xfer_desc, &cmrc->refcnt ); + cmrc->ibdev = ibdev; + memcpy ( &cmrc->dgid, dgid, sizeof ( cmrc->dgid ) ); + memcpy ( &cmrc->service_id, service_id, sizeof ( cmrc->service_id ) ); + process_init_stopped ( &cmrc->shutdown, &ib_cmrc_shutdown_desc, + &cmrc->refcnt ); + + /* Open Infiniband device */ + if ( ( rc = ib_open ( ibdev ) ) != 0 ) { + DBGC ( cmrc, "CMRC %p could not open device: %s\n", + cmrc, strerror ( rc ) ); + goto err_open; + } + + /* Create completion queue */ + cmrc->cq = ib_create_cq ( ibdev, IB_CMRC_NUM_CQES, + &ib_cmrc_completion_ops ); + if ( ! cmrc->cq ) { + DBGC ( cmrc, "CMRC %p could not create completion queue\n", + cmrc ); + rc = -ENOMEM; + goto err_create_cq; + } + + /* Create queue pair */ + cmrc->qp = ib_create_qp ( ibdev, IB_QPT_RC, IB_CMRC_NUM_SEND_WQES, + cmrc->cq, IB_CMRC_NUM_RECV_WQES, cmrc->cq, + &ib_cmrc_queue_pair_ops ); + if ( ! cmrc->qp ) { + DBGC ( cmrc, "CMRC %p could not create queue pair\n", cmrc ); + rc = -ENOMEM; + goto err_create_qp; + } + ib_qp_set_ownerdata ( cmrc->qp, cmrc ); + DBGC ( cmrc, "CMRC %p using QPN %lx\n", cmrc, cmrc->qp->qpn ); + + /* Attach to parent interface, transfer reference (implicitly) + * to our shutdown process, and return. + */ + intf_plug_plug ( &cmrc->xfer, xfer ); + return 0; + + ib_destroy_qp ( ibdev, cmrc->qp ); + err_create_qp: + ib_destroy_cq ( ibdev, cmrc->cq ); + err_create_cq: + ib_close ( ibdev ); + err_open: + ref_put ( &cmrc->refcnt ); + err_alloc: + return rc; +} diff --git a/qemu/roms/ipxe/src/net/infiniband/ib_mcast.c b/qemu/roms/ipxe/src/net/infiniband/ib_mcast.c new file mode 100644 index 000000000..0a5e72a37 --- /dev/null +++ b/qemu/roms/ipxe/src/net/infiniband/ib_mcast.c @@ -0,0 +1,213 @@ +/* + * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <string.h> +#include <byteswap.h> +#include <errno.h> +#include <ipxe/list.h> +#include <ipxe/infiniband.h> +#include <ipxe/ib_mi.h> +#include <ipxe/ib_mcast.h> + +/** @file + * + * Infiniband multicast groups + * + */ + +/** + * Generate multicast membership MAD + * + * @v ibdev Infiniband device + * @v gid Multicast GID + * @v join Join (rather than leave) group + * @v mad MAD to fill in + */ +static void ib_mcast_mad ( struct ib_device *ibdev, union ib_gid *gid, + int join, union ib_mad *mad ) { + struct ib_mad_sa *sa = &mad->sa; + + /* Construct multicast membership record request */ + memset ( sa, 0, sizeof ( *sa ) ); + sa->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; + sa->mad_hdr.class_version = IB_SA_CLASS_VERSION; + sa->mad_hdr.method = + ( join ? IB_MGMT_METHOD_SET : IB_MGMT_METHOD_DELETE ); + sa->mad_hdr.attr_id = htons ( IB_SA_ATTR_MC_MEMBER_REC ); + sa->sa_hdr.comp_mask[1] = + htonl ( IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | + IB_SA_MCMEMBER_REC_JOIN_STATE ); + sa->sa_data.mc_member_record.scope__join_state = 1; + memcpy ( &sa->sa_data.mc_member_record.mgid, gid, + sizeof ( sa->sa_data.mc_member_record.mgid ) ); + memcpy ( &sa->sa_data.mc_member_record.port_gid, &ibdev->gid, + sizeof ( sa->sa_data.mc_member_record.port_gid ) ); +} + +/** + * Handle multicast membership record join response + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v madx Management transaction + * @v rc Status code + * @v mad Received MAD (or NULL on error) + * @v av Source address vector (or NULL on error) + */ +static void ib_mcast_complete ( struct ib_device *ibdev, + struct ib_mad_interface *mi __unused, + struct ib_mad_transaction *madx, + int rc, union ib_mad *mad, + struct ib_address_vector *av __unused ) { + struct ib_mc_membership *membership = ib_madx_get_ownerdata ( madx ); + struct ib_queue_pair *qp = membership->qp; + union ib_gid *gid = &membership->gid; + struct ib_mc_member_record *mc_member_record = + &mad->sa.sa_data.mc_member_record; + int joined; + unsigned long qkey; + + /* Report failures */ + if ( ( rc == 0 ) && ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) )) + rc = -ENOTCONN; + if ( rc != 0 ) { + DBGC ( ibdev, "IBDEV %p QPN %lx join failed: %s\n", + ibdev, qp->qpn, strerror ( rc ) ); + goto out; + } + + /* Extract values from MAD */ + joined = ( mad->hdr.method == IB_MGMT_METHOD_GET_RESP ); + qkey = ntohl ( mc_member_record->qkey ); + DBGC ( ibdev, "IBDEV %p QPN %lx %s " IB_GID_FMT " qkey %lx\n", + ibdev, qp->qpn, ( joined ? "joined" : "left" ), + IB_GID_ARGS ( gid ), qkey ); + + /* Set queue key */ + qp->qkey = qkey; + if ( ( rc = ib_modify_qp ( ibdev, qp ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p QPN %lx could not modify qkey: %s\n", + ibdev, qp->qpn, strerror ( rc ) ); + goto out; + } + + out: + /* Destroy the completed transaction */ + ib_destroy_madx ( ibdev, mi, madx ); + membership->madx = NULL; + + /* Hand off to upper completion handler */ + membership->complete ( ibdev, qp, membership, rc, mad ); +} + +/** Multicast membership management transaction completion operations */ +static struct ib_mad_transaction_operations ib_mcast_op = { + .complete = ib_mcast_complete, +}; + +/** + * Join multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v membership Multicast group membership + * @v gid Multicast GID to join + * @v joined Join completion handler + * @ret rc Return status code + */ +int ib_mcast_join ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct ib_mc_membership *membership, union ib_gid *gid, + void ( * complete ) ( struct ib_device *ibdev, + struct ib_queue_pair *qp, + struct ib_mc_membership *membership, + int rc, union ib_mad *mad ) ) { + union ib_mad mad; + int rc; + + DBGC ( ibdev, "IBDEV %p QPN %lx joining " IB_GID_FMT "\n", + ibdev, qp->qpn, IB_GID_ARGS ( gid ) ); + + /* Initialise structure */ + membership->qp = qp; + memcpy ( &membership->gid, gid, sizeof ( membership->gid ) ); + membership->complete = complete; + + /* Attach queue pair to multicast GID */ + if ( ( rc = ib_mcast_attach ( ibdev, qp, gid ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p QPN %lx could not attach: %s\n", + ibdev, qp->qpn, strerror ( rc ) ); + goto err_mcast_attach; + } + + /* Initiate multicast membership join */ + ib_mcast_mad ( ibdev, gid, 1, &mad ); + membership->madx = ib_create_madx ( ibdev, ibdev->gsi, &mad, NULL, + &ib_mcast_op ); + if ( ! membership->madx ) { + DBGC ( ibdev, "IBDEV %p QPN %lx could not create join " + "transaction\n", ibdev, qp->qpn ); + rc = -ENOMEM; + goto err_create_madx; + } + ib_madx_set_ownerdata ( membership->madx, membership ); + + return 0; + + ib_destroy_madx ( ibdev, ibdev->gsi, membership->madx ); + err_create_madx: + ib_mcast_detach ( ibdev, qp, gid ); + err_mcast_attach: + return rc; +} + +/** + * Leave multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v membership Multicast group membership + */ +void ib_mcast_leave ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct ib_mc_membership *membership ) { + union ib_gid *gid = &membership->gid; + union ib_mad mad; + int rc; + + DBGC ( ibdev, "IBDEV %p QPN %lx leaving " IB_GID_FMT "\n", + ibdev, qp->qpn, IB_GID_ARGS ( gid ) ); + + /* Detach from multicast GID */ + ib_mcast_detach ( ibdev, qp, &membership->gid ); + + /* Cancel multicast membership join, if applicable */ + if ( membership->madx ) { + ib_destroy_madx ( ibdev, ibdev->gsi, membership->madx ); + membership->madx = NULL; + } + + /* Send a single group leave MAD */ + ib_mcast_mad ( ibdev, &membership->gid, 0, &mad ); + if ( ( rc = ib_mi_send ( ibdev, ibdev->gsi, &mad, NULL ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p QPN %lx could not send leave request: " + "%s\n", ibdev, qp->qpn, strerror ( rc ) ); + } +} diff --git a/qemu/roms/ipxe/src/net/infiniband/ib_mi.c b/qemu/roms/ipxe/src/net/infiniband/ib_mi.c new file mode 100644 index 000000000..ef6d539f1 --- /dev/null +++ b/qemu/roms/ipxe/src/net/infiniband/ib_mi.c @@ -0,0 +1,415 @@ +/* + * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <stdio.h> +#include <unistd.h> +#include <byteswap.h> +#include <ipxe/infiniband.h> +#include <ipxe/iobuf.h> +#include <ipxe/ib_mi.h> + +/** + * @file + * + * Infiniband management interfaces + * + */ + +/** Management interface number of send WQEs + * + * This is a policy decision. + */ +#define IB_MI_NUM_SEND_WQES 4 + +/** Management interface number of receive WQEs + * + * This is a policy decision. + */ +#define IB_MI_NUM_RECV_WQES 2 + +/** Management interface number of completion queue entries + * + * This is a policy decision + */ +#define IB_MI_NUM_CQES 8 + +/** TID magic signature */ +#define IB_MI_TID_MAGIC ( ( 'i' << 24 ) | ( 'P' << 16 ) | ( 'X' << 8 ) | 'E' ) + +/** TID to use for next MAD */ +static unsigned int next_tid; + +/** + * Handle received MAD + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @v av Source address vector + * @ret rc Return status code + */ +static int ib_mi_handle ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad, + struct ib_address_vector *av ) { + struct ib_mad_hdr *hdr = &mad->hdr; + struct ib_mad_transaction *madx; + struct ib_mad_agent *agent; + + /* Look for a matching transaction by TID */ + list_for_each_entry ( madx, &mi->madx, list ) { + if ( memcmp ( &hdr->tid, &madx->mad.hdr.tid, + sizeof ( hdr->tid ) ) != 0 ) + continue; + /* Found a matching transaction */ + madx->op->complete ( ibdev, mi, madx, 0, mad, av ); + return 0; + } + + /* If there is no matching transaction, look for a listening agent */ + for_each_table_entry ( agent, IB_MAD_AGENTS ) { + if ( ( ( agent->mgmt_class & IB_MGMT_CLASS_MASK ) != + ( hdr->mgmt_class & IB_MGMT_CLASS_MASK ) ) || + ( agent->class_version != hdr->class_version ) || + ( agent->attr_id != hdr->attr_id ) ) + continue; + /* Found a matching agent */ + agent->handle ( ibdev, mi, mad, av ); + return 0; + } + + /* Otherwise, ignore it */ + DBGC ( mi, "MI %p RX TID %08x%08x ignored\n", + mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ) ); + return -ENOTSUP; +} + +/** + * Complete receive via management interface + * + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v dest Destination address vector + * @v source Source address vector + * @v iobuf I/O buffer + * @v rc Completion status code + */ +static void ib_mi_complete_recv ( struct ib_device *ibdev, + struct ib_queue_pair *qp, + struct ib_address_vector *dest __unused, + struct ib_address_vector *source, + struct io_buffer *iobuf, int rc ) { + struct ib_mad_interface *mi = ib_qp_get_ownerdata ( qp ); + union ib_mad *mad; + struct ib_mad_hdr *hdr; + + /* Ignore errors */ + if ( rc != 0 ) { + DBGC ( mi, "MI %p RX error: %s\n", mi, strerror ( rc ) ); + goto out; + } + + /* Sanity checks */ + if ( iob_len ( iobuf ) != sizeof ( *mad ) ) { + DBGC ( mi, "MI %p RX bad size (%zd bytes)\n", + mi, iob_len ( iobuf ) ); + DBGC_HDA ( mi, 0, iobuf->data, iob_len ( iobuf ) ); + goto out; + } + mad = iobuf->data; + hdr = &mad->hdr; + if ( hdr->base_version != IB_MGMT_BASE_VERSION ) { + DBGC ( mi, "MI %p RX unsupported base version %x\n", + mi, hdr->base_version ); + DBGC_HDA ( mi, 0, mad, sizeof ( *mad ) ); + goto out; + } + DBGC ( mi, "MI %p RX TID %08x%08x (%02x,%02x,%02x,%04x) status " + "%04x\n", mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ), + hdr->mgmt_class, hdr->class_version, hdr->method, + ntohs ( hdr->attr_id ), ntohs ( hdr->status ) ); + DBGC2_HDA ( mi, 0, mad, sizeof ( *mad ) ); + + /* Handle MAD */ + if ( ( rc = ib_mi_handle ( ibdev, mi, mad, source ) ) != 0 ) + goto out; + + out: + free_iob ( iobuf ); +} + +/** Management interface completion operations */ +static struct ib_completion_queue_operations ib_mi_completion_ops = { + .complete_recv = ib_mi_complete_recv, +}; + +/** Management interface queue pair operations */ +static struct ib_queue_pair_operations ib_mi_queue_pair_ops = { + .alloc_iob = alloc_iob, +}; + +/** + * Transmit MAD + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad MAD + * @v av Destination address vector + * @ret rc Return status code + */ +int ib_mi_send ( struct ib_device *ibdev, struct ib_mad_interface *mi, + union ib_mad *mad, struct ib_address_vector *av ) { + struct ib_mad_hdr *hdr = &mad->hdr; + struct io_buffer *iobuf; + int rc; + + /* Set common fields */ + hdr->base_version = IB_MGMT_BASE_VERSION; + if ( ( hdr->tid[0] == 0 ) && ( hdr->tid[1] == 0 ) ) { + hdr->tid[0] = htonl ( IB_MI_TID_MAGIC ); + hdr->tid[1] = htonl ( ++next_tid ); + } + DBGC ( mi, "MI %p TX TID %08x%08x (%02x,%02x,%02x,%04x) status " + "%04x\n", mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ), + hdr->mgmt_class, hdr->class_version, hdr->method, + ntohs ( hdr->attr_id ), ntohs ( hdr->status ) ); + DBGC2_HDA ( mi, 0, mad, sizeof ( *mad ) ); + + /* Construct directed route portion of response, if necessary */ + if ( hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE ) { + struct ib_mad_smp *smp = &mad->smp; + unsigned int hop_pointer; + unsigned int hop_count; + + smp->mad_hdr.status |= htons ( IB_SMP_STATUS_D_INBOUND ); + hop_pointer = smp->mad_hdr.class_specific.smp.hop_pointer; + hop_count = smp->mad_hdr.class_specific.smp.hop_count; + assert ( hop_count == hop_pointer ); + if ( hop_pointer < ( sizeof ( smp->return_path.hops ) / + sizeof ( smp->return_path.hops[0] ) ) ) { + smp->return_path.hops[hop_pointer] = ibdev->port; + } else { + DBGC ( mi, "MI %p TX TID %08x%08x invalid hop pointer " + "%d\n", mi, ntohl ( hdr->tid[0] ), + ntohl ( hdr->tid[1] ), hop_pointer ); + return -EINVAL; + } + } + + /* Construct I/O buffer */ + iobuf = alloc_iob ( sizeof ( *mad ) ); + if ( ! iobuf ) { + DBGC ( mi, "MI %p could not allocate buffer for TID " + "%08x%08x\n", + mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ) ); + return -ENOMEM; + } + memcpy ( iob_put ( iobuf, sizeof ( *mad ) ), mad, sizeof ( *mad ) ); + + /* Send I/O buffer */ + if ( ( rc = ib_post_send ( ibdev, mi->qp, av, iobuf ) ) != 0 ) { + DBGC ( mi, "MI %p TX TID %08x%08x failed: %s\n", + mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ), + strerror ( rc ) ); + free_iob ( iobuf ); + return rc; + } + + return 0; +} + +/** + * Handle management transaction timer expiry + * + * @v timer Retry timer + * @v expired Failure indicator + */ +static void ib_mi_timer_expired ( struct retry_timer *timer, int expired ) { + struct ib_mad_transaction *madx = + container_of ( timer, struct ib_mad_transaction, timer ); + struct ib_mad_interface *mi = madx->mi; + struct ib_device *ibdev = mi->ibdev; + struct ib_mad_hdr *hdr = &madx->mad.hdr; + + /* Abandon transaction if we have tried too many times */ + if ( expired ) { + DBGC ( mi, "MI %p abandoning TID %08x%08x\n", + mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ) ); + madx->op->complete ( ibdev, mi, madx, -ETIMEDOUT, NULL, NULL ); + return; + } + + /* Restart retransmission timer */ + start_timer ( timer ); + + /* Resend MAD */ + ib_mi_send ( ibdev, mi, &madx->mad, &madx->av ); +} + +/** + * Create management transaction + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad MAD to send + * @v av Destination address, or NULL to use SM's GSI + * @v op Management transaction operations + * @ret madx Management transaction, or NULL + */ +struct ib_mad_transaction * +ib_create_madx ( struct ib_device *ibdev, struct ib_mad_interface *mi, + union ib_mad *mad, struct ib_address_vector *av, + struct ib_mad_transaction_operations *op ) { + struct ib_mad_transaction *madx; + + /* Allocate and initialise structure */ + madx = zalloc ( sizeof ( *madx ) ); + if ( ! madx ) + return NULL; + timer_init ( &madx->timer, ib_mi_timer_expired, NULL ); + madx->mi = mi; + madx->op = op; + + /* Determine address vector */ + if ( av ) { + memcpy ( &madx->av, av, sizeof ( madx->av ) ); + } else { + madx->av.lid = ibdev->sm_lid; + madx->av.sl = ibdev->sm_sl; + madx->av.qpn = IB_QPN_GSI; + madx->av.qkey = IB_QKEY_GSI; + } + + /* Copy MAD */ + memcpy ( &madx->mad, mad, sizeof ( madx->mad ) ); + + /* Add to list and start timer to send initial MAD */ + list_add ( &madx->list, &mi->madx ); + start_timer_nodelay ( &madx->timer ); + + return madx; +} + +/** + * Destroy management transaction + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v madx Management transaction + */ +void ib_destroy_madx ( struct ib_device *ibdev __unused, + struct ib_mad_interface *mi __unused, + struct ib_mad_transaction *madx ) { + + /* Stop timer and remove from list */ + stop_timer ( &madx->timer ); + list_del ( &madx->list ); + + /* Free transaction */ + free ( madx ); +} + +/** + * Create management interface + * + * @v ibdev Infiniband device + * @v type Queue pair type + * @ret mi Management agent, or NULL + */ +struct ib_mad_interface * ib_create_mi ( struct ib_device *ibdev, + enum ib_queue_pair_type type ) { + struct ib_mad_interface *mi; + int rc; + + /* Allocate and initialise fields */ + mi = zalloc ( sizeof ( *mi ) ); + if ( ! mi ) + goto err_alloc; + mi->ibdev = ibdev; + INIT_LIST_HEAD ( &mi->madx ); + + /* Create completion queue */ + mi->cq = ib_create_cq ( ibdev, IB_MI_NUM_CQES, &ib_mi_completion_ops ); + if ( ! mi->cq ) { + DBGC ( mi, "MI %p could not allocate completion queue\n", mi ); + goto err_create_cq; + } + + /* Create queue pair */ + mi->qp = ib_create_qp ( ibdev, type, IB_MI_NUM_SEND_WQES, mi->cq, + IB_MI_NUM_RECV_WQES, mi->cq, + &ib_mi_queue_pair_ops ); + if ( ! mi->qp ) { + DBGC ( mi, "MI %p could not allocate queue pair\n", mi ); + goto err_create_qp; + } + ib_qp_set_ownerdata ( mi->qp, mi ); + DBGC ( mi, "MI %p (%s) running on QPN %#lx\n", + mi, ( ( type == IB_QPT_SMI ) ? "SMI" : "GSI" ), mi->qp->qpn ); + + /* Set queue key */ + mi->qp->qkey = ( ( type == IB_QPT_SMI ) ? IB_QKEY_SMI : IB_QKEY_GSI ); + if ( ( rc = ib_modify_qp ( ibdev, mi->qp ) ) != 0 ) { + DBGC ( mi, "MI %p could not set queue key: %s\n", + mi, strerror ( rc ) ); + goto err_modify_qp; + } + + /* Fill receive ring */ + ib_refill_recv ( ibdev, mi->qp ); + return mi; + + err_modify_qp: + ib_destroy_qp ( ibdev, mi->qp ); + err_create_qp: + ib_destroy_cq ( ibdev, mi->cq ); + err_create_cq: + free ( mi ); + err_alloc: + return NULL; +} + +/** + * Destroy management interface + * + * @v mi Management interface + */ +void ib_destroy_mi ( struct ib_device *ibdev, struct ib_mad_interface *mi ) { + struct ib_mad_transaction *madx; + struct ib_mad_transaction *tmp; + + /* Flush any outstanding requests */ + list_for_each_entry_safe ( madx, tmp, &mi->madx, list ) { + DBGC ( mi, "MI %p destroyed while TID %08x%08x in progress\n", + mi, ntohl ( madx->mad.hdr.tid[0] ), + ntohl ( madx->mad.hdr.tid[1] ) ); + madx->op->complete ( ibdev, mi, madx, -ECANCELED, NULL, NULL ); + } + + ib_destroy_qp ( ibdev, mi->qp ); + ib_destroy_cq ( ibdev, mi->cq ); + free ( mi ); +} diff --git a/qemu/roms/ipxe/src/net/infiniband/ib_packet.c b/qemu/roms/ipxe/src/net/infiniband/ib_packet.c new file mode 100644 index 000000000..6c850e39b --- /dev/null +++ b/qemu/roms/ipxe/src/net/infiniband/ib_packet.c @@ -0,0 +1,249 @@ +/* + * Copyright (C) 2008 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <byteswap.h> +#include <ipxe/iobuf.h> +#include <ipxe/infiniband.h> +#include <ipxe/ib_packet.h> + +/** + * @file + * + * Infiniband Packet Formats + * + */ + +/** + * Add IB headers + * + * @v ibdev Infiniband device + * @v iobuf I/O buffer to contain headers + * @v qp Queue pair + * @v payload_len Payload length + * @v dest Destination address vector + * @ret rc Return status code + */ +int ib_push ( struct ib_device *ibdev, struct io_buffer *iobuf, + struct ib_queue_pair *qp, size_t payload_len, + const struct ib_address_vector *dest ) { + struct ib_local_route_header *lrh; + struct ib_global_route_header *grh; + struct ib_base_transport_header *bth; + struct ib_datagram_extended_transport_header *deth; + size_t orig_iob_len = iob_len ( iobuf ); + size_t pad_len; + size_t lrh_len; + size_t grh_len; + unsigned int vl; + unsigned int lnh; + + DBGC2 ( ibdev, "IBDEV %p TX %04x:%08lx => %04x:%08lx (key %08lx)\n", + ibdev, ibdev->lid, qp->ext_qpn, dest->lid, dest->qpn, + dest->qkey ); + + /* Calculate packet length */ + pad_len = ( (-payload_len) & 0x3 ); + payload_len += pad_len; + payload_len += 4; /* ICRC */ + + /* Reserve space for headers */ + orig_iob_len = iob_len ( iobuf ); + deth = iob_push ( iobuf, sizeof ( *deth ) ); + bth = iob_push ( iobuf, sizeof ( *bth ) ); + grh_len = ( payload_len + iob_len ( iobuf ) - orig_iob_len ); + grh = ( dest->gid_present ? + iob_push ( iobuf, sizeof ( *grh ) ) : NULL ); + lrh = iob_push ( iobuf, sizeof ( *lrh ) ); + lrh_len = ( payload_len + iob_len ( iobuf ) - orig_iob_len ); + + /* Construct LRH */ + vl = ( ( qp->ext_qpn == IB_QPN_SMI ) ? IB_VL_SMP : IB_VL_DEFAULT ); + lrh->vl__lver = ( vl << 4 ); + lnh = ( grh ? IB_LNH_GRH : IB_LNH_BTH ); + lrh->sl__lnh = ( ( dest->sl << 4 ) | lnh ); + lrh->dlid = htons ( dest->lid ); + lrh->length = htons ( lrh_len >> 2 ); + lrh->slid = htons ( ibdev->lid ); + + /* Construct GRH, if required */ + if ( grh ) { + grh->ipver__tclass__flowlabel = + htonl ( IB_GRH_IPVER_IPv6 << 28 ); + grh->paylen = htons ( grh_len ); + grh->nxthdr = IB_GRH_NXTHDR_IBA; + grh->hoplmt = 0; + memcpy ( &grh->sgid, &ibdev->gid, sizeof ( grh->sgid ) ); + memcpy ( &grh->dgid, &dest->gid, sizeof ( grh->dgid ) ); + } + + /* Construct BTH */ + bth->opcode = BTH_OPCODE_UD_SEND; + bth->se__m__padcnt__tver = ( pad_len << 4 ); + bth->pkey = htons ( ibdev->pkey ); + bth->dest_qp = htonl ( dest->qpn ); + bth->ack__psn = htonl ( ( qp->send.psn++ ) & 0xffffffUL ); + + /* Construct DETH */ + deth->qkey = htonl ( dest->qkey ); + deth->src_qp = htonl ( qp->ext_qpn ); + + DBGCP_HDA ( ibdev, 0, iobuf->data, + ( iob_len ( iobuf ) - orig_iob_len ) ); + + return 0; +} + +/** + * Remove IB headers + * + * @v ibdev Infiniband device + * @v iobuf I/O buffer containing headers + * @v qp Queue pair to fill in, or NULL + * @v payload_len Payload length to fill in, or NULL + * @v dest Destination address vector to fill in + * @v source Source address vector to fill in + * @ret rc Return status code + */ +int ib_pull ( struct ib_device *ibdev, struct io_buffer *iobuf, + struct ib_queue_pair **qp, size_t *payload_len, + struct ib_address_vector *dest, + struct ib_address_vector *source ) { + struct ib_local_route_header *lrh; + struct ib_global_route_header *grh; + struct ib_base_transport_header *bth; + struct ib_datagram_extended_transport_header *deth; + size_t orig_iob_len = iob_len ( iobuf ); + unsigned int lnh; + size_t pad_len; + + /* Clear return values */ + if ( qp ) + *qp = NULL; + if ( payload_len ) + *payload_len = 0; + memset ( dest, 0, sizeof ( *dest ) ); + memset ( source, 0, sizeof ( *source ) ); + + /* Extract LRH */ + if ( iob_len ( iobuf ) < sizeof ( *lrh ) ) { + DBGC ( ibdev, "IBDEV %p RX too short (%zd bytes) for LRH\n", + ibdev, iob_len ( iobuf ) ); + return -EINVAL; + } + lrh = iobuf->data; + iob_pull ( iobuf, sizeof ( *lrh ) ); + dest->lid = ntohs ( lrh->dlid ); + dest->sl = ( lrh->sl__lnh >> 4 ); + source->lid = ntohs ( lrh->slid ); + source->sl = ( lrh->sl__lnh >> 4 ); + lnh = ( lrh->sl__lnh & 0x3 ); + + /* Reject unsupported packets */ + if ( ! ( ( lnh == IB_LNH_BTH ) || ( lnh == IB_LNH_GRH ) ) ) { + DBGC ( ibdev, "IBDEV %p RX unsupported LNH %x\n", + ibdev, lnh ); + return -ENOTSUP; + } + + /* Extract GRH, if present */ + if ( lnh == IB_LNH_GRH ) { + if ( iob_len ( iobuf ) < sizeof ( *grh ) ) { + DBGC ( ibdev, "IBDEV %p RX too short (%zd bytes) " + "for GRH\n", ibdev, iob_len ( iobuf ) ); + return -EINVAL; + } + grh = iobuf->data; + iob_pull ( iobuf, sizeof ( *grh ) ); + dest->gid_present = 1; + memcpy ( &dest->gid, &grh->dgid, sizeof ( dest->gid ) ); + source->gid_present = 1; + memcpy ( &source->gid, &grh->sgid, sizeof ( source->gid ) ); + } else { + grh = NULL; + } + + /* Extract BTH */ + if ( iob_len ( iobuf ) < sizeof ( *bth ) ) { + DBGC ( ibdev, "IBDEV %p RX too short (%zd bytes) for BTH\n", + ibdev, iob_len ( iobuf ) ); + return -EINVAL; + } + bth = iobuf->data; + iob_pull ( iobuf, sizeof ( *bth ) ); + if ( bth->opcode != BTH_OPCODE_UD_SEND ) { + DBGC ( ibdev, "IBDEV %p unsupported BTH opcode %x\n", + ibdev, bth->opcode ); + return -ENOTSUP; + } + dest->qpn = ntohl ( bth->dest_qp ); + + /* Extract DETH */ + if ( iob_len ( iobuf ) < sizeof ( *deth ) ) { + DBGC ( ibdev, "IBDEV %p RX too short (%zd bytes) for DETH\n", + ibdev, iob_len ( iobuf ) ); + return -EINVAL; + } + deth = iobuf->data; + iob_pull ( iobuf, sizeof ( *deth ) ); + source->qpn = ntohl ( deth->src_qp ); + source->qkey = ntohl ( deth->qkey ); + + /* Calculate payload length, if applicable */ + if ( payload_len ) { + pad_len = ( ( bth->se__m__padcnt__tver >> 4 ) & 0x3 ); + *payload_len = ( ( ntohs ( lrh->length ) << 2 ) + - ( orig_iob_len - iob_len ( iobuf ) ) + - pad_len - 4 /* ICRC */ ); + } + + /* Determine destination QP, if applicable */ + if ( qp ) { + if ( IB_LID_MULTICAST ( dest->lid ) && grh ) { + if ( ! ( *qp = ib_find_qp_mgid ( ibdev, &grh->dgid ))){ + DBGC ( ibdev, "IBDEV %p RX for unknown MGID " + IB_GID_FMT "\n", + ibdev, IB_GID_ARGS ( &grh->dgid ) ); + return -ENODEV; + } + } else { + if ( ! ( *qp = ib_find_qp_qpn ( ibdev, dest->qpn ) ) ) { + DBGC ( ibdev, "IBDEV %p RX for nonexistent " + "QPN %lx\n", ibdev, dest->qpn ); + return -ENODEV; + } + } + assert ( *qp ); + } + + DBGC2 ( ibdev, "IBDEV %p RX %04x:%08lx <= %04x:%08lx (key %08x)\n", + ibdev, dest->lid, ( IB_LID_MULTICAST ( dest->lid ) ? + ( qp ? (*qp)->ext_qpn : -1UL ) : dest->qpn ), + source->lid, source->qpn, ntohl ( deth->qkey ) ); + DBGCP_HDA ( ibdev, 0, + ( iobuf->data - ( orig_iob_len - iob_len ( iobuf ) ) ), + ( orig_iob_len - iob_len ( iobuf ) ) ); + + return 0; +} diff --git a/qemu/roms/ipxe/src/net/infiniband/ib_pathrec.c b/qemu/roms/ipxe/src/net/infiniband/ib_pathrec.c new file mode 100644 index 000000000..1b95cbfa8 --- /dev/null +++ b/qemu/roms/ipxe/src/net/infiniband/ib_pathrec.c @@ -0,0 +1,289 @@ +/* + * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <byteswap.h> +#include <errno.h> +#include <ipxe/infiniband.h> +#include <ipxe/ib_mi.h> +#include <ipxe/ib_pathrec.h> + +/** @file + * + * Infiniband path lookups + * + */ + +/** + * Handle path transaction completion + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v madx Management transaction + * @v rc Status code + * @v mad Received MAD (or NULL on error) + * @v av Source address vector (or NULL on error) + */ +static void ib_path_complete ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + struct ib_mad_transaction *madx, + int rc, union ib_mad *mad, + struct ib_address_vector *av __unused ) { + struct ib_path *path = ib_madx_get_ownerdata ( madx ); + union ib_gid *dgid = &path->av.gid; + struct ib_path_record *pathrec = &mad->sa.sa_data.path_record; + + /* Report failures */ + if ( ( rc == 0 ) && ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) )) + rc = -ENETUNREACH; + if ( rc != 0 ) { + DBGC ( ibdev, "IBDEV %p path lookup for " IB_GID_FMT + " failed: %s\n", + ibdev, IB_GID_ARGS ( dgid ), strerror ( rc ) ); + goto out; + } + + /* Extract values from MAD */ + path->av.lid = ntohs ( pathrec->dlid ); + path->av.sl = ( pathrec->reserved__sl & 0x0f ); + path->av.rate = ( pathrec->rate_selector__rate & 0x3f ); + DBGC ( ibdev, "IBDEV %p path to " IB_GID_FMT " is %04x sl %d rate " + "%d\n", ibdev, IB_GID_ARGS ( dgid ), path->av.lid, path->av.sl, + path->av.rate ); + + out: + /* Destroy the completed transaction */ + ib_destroy_madx ( ibdev, mi, madx ); + path->madx = NULL; + + /* Hand off to upper completion handler */ + path->op->complete ( ibdev, path, rc, &path->av ); +} + +/** Path transaction completion operations */ +static struct ib_mad_transaction_operations ib_path_op = { + .complete = ib_path_complete, +}; + +/** + * Create path + * + * @v ibdev Infiniband device + * @v av Address vector to complete + * @v op Path operations + * @ret path Path + */ +struct ib_path * +ib_create_path ( struct ib_device *ibdev, struct ib_address_vector *av, + struct ib_path_operations *op ) { + struct ib_path *path; + union ib_mad mad; + struct ib_mad_sa *sa = &mad.sa; + + /* Allocate and initialise structure */ + path = zalloc ( sizeof ( *path ) ); + if ( ! path ) + goto err_alloc_path; + path->ibdev = ibdev; + memcpy ( &path->av, av, sizeof ( path->av ) ); + path->op = op; + + /* Construct path request */ + memset ( sa, 0, sizeof ( *sa ) ); + sa->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; + sa->mad_hdr.class_version = IB_SA_CLASS_VERSION; + sa->mad_hdr.method = IB_MGMT_METHOD_GET; + sa->mad_hdr.attr_id = htons ( IB_SA_ATTR_PATH_REC ); + sa->sa_hdr.comp_mask[1] = + htonl ( IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID ); + memcpy ( &sa->sa_data.path_record.dgid, &path->av.gid, + sizeof ( sa->sa_data.path_record.dgid ) ); + memcpy ( &sa->sa_data.path_record.sgid, &ibdev->gid, + sizeof ( sa->sa_data.path_record.sgid ) ); + + /* Create management transaction */ + path->madx = ib_create_madx ( ibdev, ibdev->gsi, &mad, NULL, + &ib_path_op ); + if ( ! path->madx ) + goto err_create_madx; + ib_madx_set_ownerdata ( path->madx, path ); + + return path; + + ib_destroy_madx ( ibdev, ibdev->gsi, path->madx ); + err_create_madx: + free ( path ); + err_alloc_path: + return NULL; +} + +/** + * Destroy path + * + * @v ibdev Infiniband device + * @v path Path + */ +void ib_destroy_path ( struct ib_device *ibdev, struct ib_path *path ) { + + if ( path->madx ) + ib_destroy_madx ( ibdev, ibdev->gsi, path->madx ); + free ( path ); +} + +/** Number of path cache entries + * + * Must be a power of two. + */ +#define IB_NUM_CACHED_PATHS 4 + +/** A cached path */ +struct ib_cached_path { + /** Path */ + struct ib_path *path; +}; + +/** Path cache */ +static struct ib_cached_path ib_path_cache[IB_NUM_CACHED_PATHS]; + +/** Oldest path cache entry index */ +static unsigned int ib_path_cache_idx; + +/** + * Find path cache entry + * + * @v ibdev Infiniband device + * @v dgid Destination GID + * @ret path Path cache entry, or NULL + */ +static struct ib_cached_path * +ib_find_path_cache_entry ( struct ib_device *ibdev, union ib_gid *dgid ) { + struct ib_cached_path *cached; + unsigned int i; + + for ( i = 0 ; i < IB_NUM_CACHED_PATHS ; i++ ) { + cached = &ib_path_cache[i]; + if ( ! cached->path ) + continue; + if ( cached->path->ibdev != ibdev ) + continue; + if ( memcmp ( &cached->path->av.gid, dgid, + sizeof ( cached->path->av.gid ) ) != 0 ) + continue; + return cached; + } + + return NULL; +} + +/** + * Handle cached path transaction completion + * + * @v ibdev Infiniband device + * @v path Path + * @v rc Status code + * @v av Address vector, or NULL on error + */ +static void ib_cached_path_complete ( struct ib_device *ibdev, + struct ib_path *path, int rc, + struct ib_address_vector *av __unused ) { + struct ib_cached_path *cached = ib_path_get_ownerdata ( path ); + + /* If the transaction failed, erase the cache entry */ + if ( rc != 0 ) { + /* Destroy the old cache entry */ + ib_destroy_path ( ibdev, path ); + memset ( cached, 0, sizeof ( *cached ) ); + return; + } + + /* Do not destroy the completed transaction; we still need to + * refer to the resolved path. + */ +} + +/** Cached path transaction completion operations */ +static struct ib_path_operations ib_cached_path_op = { + .complete = ib_cached_path_complete, +}; + +/** + * Resolve path + * + * @v ibdev Infiniband device + * @v av Address vector to complete + * @ret rc Return status code + * + * This provides a non-transactional way to resolve a path, via a + * cache similar to ARP. + */ +int ib_resolve_path ( struct ib_device *ibdev, struct ib_address_vector *av ) { + union ib_gid *gid = &av->gid; + struct ib_cached_path *cached; + unsigned int cache_idx; + + /* Sanity check */ + if ( ! av->gid_present ) { + DBGC ( ibdev, "IBDEV %p attempt to look up path without GID\n", + ibdev ); + return -EINVAL; + } + + /* Look in cache for a matching entry */ + cached = ib_find_path_cache_entry ( ibdev, gid ); + if ( cached && cached->path->av.lid ) { + /* Populated entry found */ + av->lid = cached->path->av.lid; + av->rate = cached->path->av.rate; + av->sl = cached->path->av.sl; + DBGC2 ( ibdev, "IBDEV %p cache hit for " IB_GID_FMT "\n", + ibdev, IB_GID_ARGS ( gid ) ); + return 0; + } + DBGC ( ibdev, "IBDEV %p cache miss for " IB_GID_FMT "%s\n", ibdev, + IB_GID_ARGS ( gid ), ( cached ? " (in progress)" : "" ) ); + + /* If lookup is already in progress, do nothing */ + if ( cached ) + return -ENOENT; + + /* Locate a new cache entry to use */ + cache_idx = ( (ib_path_cache_idx++) % IB_NUM_CACHED_PATHS ); + cached = &ib_path_cache[cache_idx]; + + /* Destroy the old cache entry */ + if ( cached->path ) + ib_destroy_path ( ibdev, cached->path ); + memset ( cached, 0, sizeof ( *cached ) ); + + /* Create new path */ + cached->path = ib_create_path ( ibdev, av, &ib_cached_path_op ); + if ( ! cached->path ) { + DBGC ( ibdev, "IBDEV %p could not create path\n", + ibdev ); + return -ENOMEM; + } + ib_path_set_ownerdata ( cached->path, cached ); + + /* Not found yet */ + return -ENOENT; +} diff --git a/qemu/roms/ipxe/src/net/infiniband/ib_sma.c b/qemu/roms/ipxe/src/net/infiniband/ib_sma.c new file mode 100644 index 000000000..86553732a --- /dev/null +++ b/qemu/roms/ipxe/src/net/infiniband/ib_sma.c @@ -0,0 +1,371 @@ +/* + * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <stdio.h> +#include <unistd.h> +#include <byteswap.h> +#include <ipxe/settings.h> +#include <ipxe/infiniband.h> +#include <ipxe/iobuf.h> +#include <ipxe/ib_mi.h> +#include <ipxe/ib_sma.h> + +/** + * @file + * + * Infiniband Subnet Management Agent + * + */ + +/** + * Node information + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @v av Source address vector + */ +static void ib_sma_node_info ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad, + struct ib_address_vector *av ) { + struct ib_node_info *node_info = &mad->smp.smp_data.node_info; + int rc; + + /* Fill in information */ + memset ( node_info, 0, sizeof ( *node_info ) ); + node_info->base_version = IB_MGMT_BASE_VERSION; + node_info->class_version = IB_SMP_CLASS_VERSION; + node_info->node_type = IB_NODE_TYPE_HCA; + node_info->num_ports = ib_count_ports ( ibdev ); + memcpy ( &node_info->sys_guid, &ibdev->node_guid, + sizeof ( node_info->sys_guid ) ); + memcpy ( &node_info->node_guid, &ibdev->node_guid, + sizeof ( node_info->node_guid ) ); + memcpy ( &node_info->port_guid, &ibdev->gid.s.guid, + sizeof ( node_info->port_guid ) ); + node_info->partition_cap = htons ( 1 ); + node_info->local_port_num = ibdev->port; + + /* Send GetResponse */ + mad->hdr.method = IB_MGMT_METHOD_GET_RESP; + if ( ( rc = ib_mi_send ( ibdev, mi, mad, av ) ) != 0 ) { + DBGC ( mi, "SMA %p could not send NodeInfo GetResponse: %s\n", + mi, strerror ( rc ) ); + return; + } +} + +/** + * Node description + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @v av Source address vector + */ +static void ib_sma_node_desc ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad, + struct ib_address_vector *av ) { + struct ib_node_desc *node_desc = &mad->smp.smp_data.node_desc; + union ib_guid *guid = &ibdev->node_guid; + char hostname[ sizeof ( node_desc->node_string ) ]; + int hostname_len; + int rc; + + /* Fill in information */ + memset ( node_desc, 0, sizeof ( *node_desc ) ); + hostname_len = fetch_string_setting ( NULL, &hostname_setting, + hostname, sizeof ( hostname ) ); + snprintf ( node_desc->node_string, sizeof ( node_desc->node_string ), + "iPXE %s%s%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x (%s)", + hostname, ( ( hostname_len >= 0 ) ? " " : "" ), + guid->bytes[0], guid->bytes[1], guid->bytes[2], + guid->bytes[3], guid->bytes[4], guid->bytes[5], + guid->bytes[6], guid->bytes[7], ibdev->dev->name ); + + /* Send GetResponse */ + mad->hdr.method = IB_MGMT_METHOD_GET_RESP; + if ( ( rc = ib_mi_send ( ibdev, mi, mad, av ) ) != 0 ) { + DBGC ( mi, "SMA %p could not send NodeDesc GetResponse: %s\n", + mi, strerror ( rc ) ); + return; + } +} + +/** + * GUID information + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @v av Source address vector + */ +static void ib_sma_guid_info ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad, + struct ib_address_vector *av ) { + struct ib_guid_info *guid_info = &mad->smp.smp_data.guid_info; + int rc; + + /* Fill in information */ + memset ( guid_info, 0, sizeof ( *guid_info ) ); + memcpy ( guid_info->guid[0], &ibdev->gid.s.guid, + sizeof ( guid_info->guid[0] ) ); + + /* Send GetResponse */ + mad->hdr.method = IB_MGMT_METHOD_GET_RESP; + if ( ( rc = ib_mi_send ( ibdev, mi, mad, av ) ) != 0 ) { + DBGC ( mi, "SMA %p could not send GuidInfo GetResponse: %s\n", + mi, strerror ( rc ) ); + return; + } +} + +/** + * Set port information + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @ret rc Return status code + */ +static int ib_sma_set_port_info ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad ) { + const struct ib_port_info *port_info = &mad->smp.smp_data.port_info; + unsigned int link_width_enabled; + unsigned int link_speed_enabled; + int rc; + + /* Set parameters */ + memcpy ( &ibdev->gid.s.prefix, port_info->gid_prefix, + sizeof ( ibdev->gid.s.prefix ) ); + ibdev->lid = ntohs ( port_info->lid ); + ibdev->sm_lid = ntohs ( port_info->mastersm_lid ); + if ( ( link_width_enabled = port_info->link_width_enabled ) ) + ibdev->link_width_enabled = link_width_enabled; + if ( ( link_speed_enabled = + ( port_info->link_speed_active__link_speed_enabled & 0xf ) ) ) + ibdev->link_speed_enabled = link_speed_enabled; + ibdev->sm_sl = ( port_info->neighbour_mtu__mastersm_sl & 0xf ); + DBGC ( mi, "SMA %p set LID %04x SMLID %04x link width %02x speed " + "%02x\n", mi, ibdev->lid, ibdev->sm_lid, + ibdev->link_width_enabled, ibdev->link_speed_enabled ); + + /* Update parameters on device */ + if ( ( rc = ib_set_port_info ( ibdev, mad ) ) != 0 ) { + DBGC ( mi, "SMA %p could not set port information: %s\n", + mi, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Port information + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @v av Source address vector + */ +static void ib_sma_port_info ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad, + struct ib_address_vector *av ) { + struct ib_port_info *port_info = &mad->smp.smp_data.port_info; + int rc; + + /* Set parameters if applicable */ + if ( mad->hdr.method == IB_MGMT_METHOD_SET ) { + if ( ( rc = ib_sma_set_port_info ( ibdev, mi, mad ) ) != 0 ) { + mad->hdr.status = + htons ( IB_MGMT_STATUS_UNSUPPORTED_METHOD_ATTR ); + /* Fall through to generate GetResponse */ + } + } + + /* Fill in information */ + memset ( port_info, 0, sizeof ( *port_info ) ); + memcpy ( port_info->gid_prefix, &ibdev->gid.s.prefix, + sizeof ( port_info->gid_prefix ) ); + port_info->lid = ntohs ( ibdev->lid ); + port_info->mastersm_lid = ntohs ( ibdev->sm_lid ); + port_info->local_port_num = ibdev->port; + port_info->link_width_enabled = ibdev->link_width_enabled; + port_info->link_width_supported = ibdev->link_width_supported; + port_info->link_width_active = ibdev->link_width_active; + port_info->link_speed_supported__port_state = + ( ( ibdev->link_speed_supported << 4 ) | ibdev->port_state ); + port_info->port_phys_state__link_down_def_state = + ( ( IB_PORT_PHYS_STATE_POLLING << 4 ) | + IB_PORT_PHYS_STATE_POLLING ); + port_info->link_speed_active__link_speed_enabled = + ( ( ibdev->link_speed_active << 4 ) | + ibdev->link_speed_enabled ); + port_info->neighbour_mtu__mastersm_sl = + ( ( IB_MTU_2048 << 4 ) | ibdev->sm_sl ); + port_info->vl_cap__init_type = ( IB_VL_0 << 4 ); + port_info->init_type_reply__mtu_cap = IB_MTU_2048; + port_info->operational_vls__enforcement = ( IB_VL_0 << 4 ); + port_info->guid_cap = 1; + + /* Send GetResponse */ + mad->hdr.method = IB_MGMT_METHOD_GET_RESP; + if ( ( rc = ib_mi_send ( ibdev, mi, mad, av ) ) != 0 ) { + DBGC ( mi, "SMA %p could not send PortInfo GetResponse: %s\n", + mi, strerror ( rc ) ); + return; + } +} + +/** + * Set partition key table + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @ret rc Return status code + */ +static int ib_sma_set_pkey_table ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad ) { + struct ib_pkey_table *pkey_table = &mad->smp.smp_data.pkey_table; + int rc; + + /* Set parameters */ + ibdev->pkey = ntohs ( pkey_table->pkey[0] ); + DBGC ( mi, "SMA %p set pkey %04x\n", mi, ibdev->pkey ); + + /* Update parameters on device */ + if ( ( rc = ib_set_pkey_table ( ibdev, mad ) ) != 0 ) { + DBGC ( mi, "SMA %p could not set pkey table: %s\n", + mi, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Partition key table + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @v av Source address vector + */ +static void ib_sma_pkey_table ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad, + struct ib_address_vector *av ) { + struct ib_pkey_table *pkey_table = &mad->smp.smp_data.pkey_table; + int rc; + + /* Set parameters, if applicable */ + if ( mad->hdr.method == IB_MGMT_METHOD_SET ) { + if ( ( rc = ib_sma_set_pkey_table ( ibdev, mi, mad ) ) != 0 ) { + mad->hdr.status = + htons ( IB_MGMT_STATUS_UNSUPPORTED_METHOD_ATTR ); + /* Fall through to generate GetResponse */ + } + } + + /* Fill in information */ + mad->hdr.method = IB_MGMT_METHOD_GET_RESP; + memset ( pkey_table, 0, sizeof ( *pkey_table ) ); + pkey_table->pkey[0] = htons ( ibdev->pkey ); + + /* Send GetResponse */ + mad->hdr.method = IB_MGMT_METHOD_GET_RESP; + if ( ( rc = ib_mi_send ( ibdev, mi, mad, av ) ) != 0 ) { + DBGC ( mi, "SMA %p could not send PKeyTable GetResponse: %s\n", + mi, strerror ( rc ) ); + return; + } +} + +/** Subnet management agent */ +struct ib_mad_agent ib_sma_agent[] __ib_mad_agent = { + { + .mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED, + .class_version = IB_SMP_CLASS_VERSION, + .attr_id = htons ( IB_SMP_ATTR_NODE_INFO ), + .handle = ib_sma_node_info, + }, + { + .mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED, + .class_version = IB_SMP_CLASS_VERSION, + .attr_id = htons ( IB_SMP_ATTR_NODE_DESC ), + .handle = ib_sma_node_desc, + }, + { + .mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED, + .class_version = IB_SMP_CLASS_VERSION, + .attr_id = htons ( IB_SMP_ATTR_GUID_INFO ), + .handle = ib_sma_guid_info, + }, + { + .mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED, + .class_version = IB_SMP_CLASS_VERSION, + .attr_id = htons ( IB_SMP_ATTR_PORT_INFO ), + .handle = ib_sma_port_info, + }, + { + .mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED, + .class_version = IB_SMP_CLASS_VERSION, + .attr_id = htons ( IB_SMP_ATTR_PKEY_TABLE ), + .handle = ib_sma_pkey_table, + }, +}; + +/** + * Create subnet management agent and interface + * + * @v ibdev Infiniband device + * @v mi Management interface + * @ret rc Return status code + */ +int ib_create_sma ( struct ib_device *ibdev, struct ib_mad_interface *mi ) { + + /* Nothing to do */ + DBGC ( ibdev, "IBDEV %p SMA using SMI %p\n", ibdev, mi ); + + return 0; +} + +/** + * Destroy subnet management agent and interface + * + * @v ibdev Infiniband device + * @v mi Management interface + */ +void ib_destroy_sma ( struct ib_device *ibdev __unused, + struct ib_mad_interface *mi __unused ) { + /* Nothing to do */ +} diff --git a/qemu/roms/ipxe/src/net/infiniband/ib_smc.c b/qemu/roms/ipxe/src/net/infiniband/ib_smc.c new file mode 100644 index 000000000..4d947d568 --- /dev/null +++ b/qemu/roms/ipxe/src/net/infiniband/ib_smc.c @@ -0,0 +1,256 @@ +/* + * Copyright (C) 2008 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <byteswap.h> +#include <ipxe/infiniband.h> +#include <ipxe/ib_smc.h> + +/** + * @file + * + * Infiniband Subnet Management Client + * + */ + +/** + * Issue local MAD + * + * @v ibdev Infiniband device + * @v attr_id Attribute ID, in network byte order + * @v attr_mod Attribute modifier, in network byte order + * @v local_mad Method for issuing local MADs + * @v mad Management datagram to fill in + * @ret rc Return status code + */ +static int ib_smc_mad ( struct ib_device *ibdev, uint16_t attr_id, + uint32_t attr_mod, ib_local_mad_t local_mad, + union ib_mad *mad ) { + int rc; + + /* Construct MAD */ + memset ( mad, 0, sizeof ( *mad ) ); + mad->hdr.base_version = IB_MGMT_BASE_VERSION; + mad->hdr.mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + mad->hdr.class_version = 1; + mad->hdr.method = IB_MGMT_METHOD_GET; + mad->hdr.attr_id = attr_id; + mad->hdr.attr_mod = attr_mod; + + /* Issue MAD */ + if ( ( rc = local_mad ( ibdev, mad ) ) != 0 ) + return rc; + + return 0; +} + +/** + * Get node information + * + * @v ibdev Infiniband device + * @v local_mad Method for issuing local MADs + * @v mad Management datagram to fill in + * @ret rc Return status code + */ +static int ib_smc_get_node_info ( struct ib_device *ibdev, + ib_local_mad_t local_mad, + union ib_mad *mad ) { + int rc; + + /* Issue MAD */ + if ( ( rc = ib_smc_mad ( ibdev, htons ( IB_SMP_ATTR_NODE_INFO ), 0, + local_mad, mad ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not get node info: %s\n", + ibdev, strerror ( rc ) ); + return rc; + } + return 0; +} + +/** + * Get port information + * + * @v ibdev Infiniband device + * @v local_mad Method for issuing local MADs + * @v mad Management datagram to fill in + * @ret rc Return status code + */ +static int ib_smc_get_port_info ( struct ib_device *ibdev, + ib_local_mad_t local_mad, + union ib_mad *mad ) { + int rc; + + /* Issue MAD */ + if ( ( rc = ib_smc_mad ( ibdev, htons ( IB_SMP_ATTR_PORT_INFO ), + htonl ( ibdev->port ), local_mad, mad )) !=0){ + DBGC ( ibdev, "IBDEV %p could not get port info: %s\n", + ibdev, strerror ( rc ) ); + return rc; + } + return 0; +} + +/** + * Get GUID information + * + * @v ibdev Infiniband device + * @v local_mad Method for issuing local MADs + * @v mad Management datagram to fill in + * @ret rc Return status code + */ +static int ib_smc_get_guid_info ( struct ib_device *ibdev, + ib_local_mad_t local_mad, + union ib_mad *mad ) { + int rc; + + /* Issue MAD */ + if ( ( rc = ib_smc_mad ( ibdev, htons ( IB_SMP_ATTR_GUID_INFO ), 0, + local_mad, mad ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not get GUID info: %s\n", + ibdev, strerror ( rc ) ); + return rc; + } + return 0; +} + +/** + * Get partition key table + * + * @v ibdev Infiniband device + * @v local_mad Method for issuing local MADs + * @v mad Management datagram to fill in + * @ret rc Return status code + */ +static int ib_smc_get_pkey_table ( struct ib_device *ibdev, + ib_local_mad_t local_mad, + union ib_mad *mad ) { + int rc; + + /* Issue MAD */ + if ( ( rc = ib_smc_mad ( ibdev, htons ( IB_SMP_ATTR_PKEY_TABLE ), 0, + local_mad, mad ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not get pkey table: %s\n", + ibdev, strerror ( rc ) ); + return rc; + } + return 0; +} + +/** + * Get Infiniband parameters using SMC + * + * @v ibdev Infiniband device + * @v local_mad Method for issuing local MADs + * @ret rc Return status code + */ +static int ib_smc_get ( struct ib_device *ibdev, ib_local_mad_t local_mad ) { + union ib_mad mad; + struct ib_node_info *node_info = &mad.smp.smp_data.node_info; + struct ib_port_info *port_info = &mad.smp.smp_data.port_info; + struct ib_guid_info *guid_info = &mad.smp.smp_data.guid_info; + struct ib_pkey_table *pkey_table = &mad.smp.smp_data.pkey_table; + int rc; + + /* Node info gives us the node GUID */ + if ( ( rc = ib_smc_get_node_info ( ibdev, local_mad, &mad ) ) != 0 ) + return rc; + memcpy ( &ibdev->node_guid, &node_info->node_guid, + sizeof ( ibdev->node_guid ) ); + + /* Port info gives us the link state, the first half of the + * port GID and the SM LID. + */ + if ( ( rc = ib_smc_get_port_info ( ibdev, local_mad, &mad ) ) != 0 ) + return rc; + memcpy ( &ibdev->gid.s.prefix, port_info->gid_prefix, + sizeof ( ibdev->gid.s.prefix ) ); + ibdev->lid = ntohs ( port_info->lid ); + ibdev->sm_lid = ntohs ( port_info->mastersm_lid ); + ibdev->link_width_enabled = port_info->link_width_enabled; + ibdev->link_width_supported = port_info->link_width_supported; + ibdev->link_width_active = port_info->link_width_active; + ibdev->link_speed_supported = + ( port_info->link_speed_supported__port_state >> 4 ); + ibdev->port_state = + ( port_info->link_speed_supported__port_state & 0xf ); + ibdev->link_speed_active = + ( port_info->link_speed_active__link_speed_enabled >> 4 ); + ibdev->link_speed_enabled = + ( port_info->link_speed_active__link_speed_enabled & 0xf ); + ibdev->sm_sl = ( port_info->neighbour_mtu__mastersm_sl & 0xf ); + + /* GUID info gives us the second half of the port GID */ + if ( ( rc = ib_smc_get_guid_info ( ibdev, local_mad, &mad ) ) != 0 ) + return rc; + memcpy ( &ibdev->gid.s.guid, guid_info->guid[0], + sizeof ( ibdev->gid.s.guid ) ); + + /* Get partition key */ + if ( ( rc = ib_smc_get_pkey_table ( ibdev, local_mad, &mad ) ) != 0 ) + return rc; + ibdev->pkey = ntohs ( pkey_table->pkey[0] ); + + DBGC ( ibdev, "IBDEV %p port GID is " IB_GID_FMT "\n", + ibdev, IB_GID_ARGS ( &ibdev->gid ) ); + + return 0; +} + +/** + * Initialise Infiniband parameters using SMC + * + * @v ibdev Infiniband device + * @v local_mad Method for issuing local MADs + * @ret rc Return status code + */ +int ib_smc_init ( struct ib_device *ibdev, ib_local_mad_t local_mad ) { + int rc; + + /* Get MAD parameters */ + if ( ( rc = ib_smc_get ( ibdev, local_mad ) ) != 0 ) + return rc; + + return 0; +} + +/** + * Update Infiniband parameters using SMC + * + * @v ibdev Infiniband device + * @v local_mad Method for issuing local MADs + * @ret rc Return status code + */ +int ib_smc_update ( struct ib_device *ibdev, ib_local_mad_t local_mad ) { + int rc; + + /* Get MAD parameters */ + if ( ( rc = ib_smc_get ( ibdev, local_mad ) ) != 0 ) + return rc; + + /* Notify Infiniband core of potential link state change */ + ib_link_state_changed ( ibdev ); + + return 0; +} diff --git a/qemu/roms/ipxe/src/net/infiniband/ib_srp.c b/qemu/roms/ipxe/src/net/infiniband/ib_srp.c new file mode 100644 index 000000000..7b2b2b4ea --- /dev/null +++ b/qemu/roms/ipxe/src/net/infiniband/ib_srp.c @@ -0,0 +1,581 @@ +/* + * Copyright (C) 2009 Fen Systems Ltd <mbrown@fensystems.co.uk>. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +FILE_LICENCE ( BSD2 ); + +#include <stdlib.h> +#include <errno.h> +#include <ipxe/interface.h> +#include <ipxe/uri.h> +#include <ipxe/open.h> +#include <ipxe/base16.h> +#include <ipxe/acpi.h> +#include <ipxe/srp.h> +#include <ipxe/infiniband.h> +#include <ipxe/ib_cmrc.h> +#include <ipxe/ib_srp.h> + +/** + * @file + * + * SCSI RDMA Protocol over Infiniband + * + */ + +/* Disambiguate the various possible EINVALs */ +#define EINVAL_BYTE_STRING_LEN __einfo_error ( EINFO_EINVAL_BYTE_STRING_LEN ) +#define EINFO_EINVAL_BYTE_STRING_LEN __einfo_uniqify \ + ( EINFO_EINVAL, 0x01, "Invalid byte string length" ) +#define EINVAL_INTEGER __einfo_error ( EINFO_EINVAL_INTEGER ) +#define EINFO_EINVAL_INTEGER __einfo_uniqify \ + ( EINFO_EINVAL, 0x03, "Invalid integer" ) +#define EINVAL_RP_TOO_SHORT __einfo_error ( EINFO_EINVAL_RP_TOO_SHORT ) +#define EINFO_EINVAL_RP_TOO_SHORT __einfo_uniqify \ + ( EINFO_EINVAL, 0x04, "Root path too short" ) + +/****************************************************************************** + * + * IB SRP devices + * + ****************************************************************************** + */ + +/** An Infiniband SRP device */ +struct ib_srp_device { + /** Reference count */ + struct refcnt refcnt; + + /** SRP transport interface */ + struct interface srp; + /** CMRC interface */ + struct interface cmrc; + + /** Infiniband device */ + struct ib_device *ibdev; + + /** Destination GID (for boot firmware table) */ + union ib_gid dgid; + /** Service ID (for boot firmware table) */ + union ib_guid service_id; +}; + +/** + * Free IB SRP device + * + * @v refcnt Reference count + */ +static void ib_srp_free ( struct refcnt *refcnt ) { + struct ib_srp_device *ib_srp = + container_of ( refcnt, struct ib_srp_device, refcnt ); + + ibdev_put ( ib_srp->ibdev ); + free ( ib_srp ); +} + +/** + * Close IB SRP device + * + * @v ib_srp IB SRP device + * @v rc Reason for close + */ +static void ib_srp_close ( struct ib_srp_device *ib_srp, int rc ) { + + /* Shut down interfaces */ + intf_shutdown ( &ib_srp->cmrc, rc ); + intf_shutdown ( &ib_srp->srp, rc ); +} + +/** + * Describe IB SRP device in an ACPI table + * + * @v srpdev SRP device + * @v acpi ACPI table + * @v len Length of ACPI table + * @ret rc Return status code + */ +static int ib_srp_describe ( struct ib_srp_device *ib_srp, + struct acpi_description_header *acpi, + size_t len ) { + struct ib_device *ibdev = ib_srp->ibdev; + struct sbft_table *sbft = + container_of ( acpi, struct sbft_table, acpi ); + struct sbft_ib_subtable *ib_sbft; + size_t used; + + /* Sanity check */ + if ( acpi->signature != SBFT_SIG ) + return -EINVAL; + + /* Append IB subtable to existing table */ + used = le32_to_cpu ( sbft->acpi.length ); + sbft->ib_offset = cpu_to_le16 ( used ); + ib_sbft = ( ( ( void * ) sbft ) + used ); + used += sizeof ( *ib_sbft ); + if ( used > len ) + return -ENOBUFS; + sbft->acpi.length = cpu_to_le32 ( used ); + + /* Populate subtable */ + memcpy ( &ib_sbft->sgid, &ibdev->gid, sizeof ( ib_sbft->sgid ) ); + memcpy ( &ib_sbft->dgid, &ib_srp->dgid, sizeof ( ib_sbft->dgid ) ); + memcpy ( &ib_sbft->service_id, &ib_srp->service_id, + sizeof ( ib_sbft->service_id ) ); + ib_sbft->pkey = cpu_to_le16 ( ibdev->pkey ); + + return 0; +} + +/** IB SRP CMRC interface operations */ +static struct interface_operation ib_srp_cmrc_op[] = { + INTF_OP ( intf_close, struct ib_srp_device *, ib_srp_close ), +}; + +/** IB SRP CMRC interface descriptor */ +static struct interface_descriptor ib_srp_cmrc_desc = + INTF_DESC_PASSTHRU ( struct ib_srp_device, cmrc, ib_srp_cmrc_op, srp ); + +/** IB SRP SRP interface operations */ +static struct interface_operation ib_srp_srp_op[] = { + INTF_OP ( acpi_describe, struct ib_srp_device *, ib_srp_describe ), + INTF_OP ( intf_close, struct ib_srp_device *, ib_srp_close ), +}; + +/** IB SRP SRP interface descriptor */ +static struct interface_descriptor ib_srp_srp_desc = + INTF_DESC_PASSTHRU ( struct ib_srp_device, srp, ib_srp_srp_op, cmrc ); + +/** + * Open IB SRP device + * + * @v block Block control interface + * @v ibdev Infiniband device + * @v dgid Destination GID + * @v service_id Service ID + * @v initiator Initiator port ID + * @v target Target port ID + * @v lun SCSI LUN + * @ret rc Return status code + */ +static int ib_srp_open ( struct interface *block, struct ib_device *ibdev, + union ib_gid *dgid, union ib_guid *service_id, + union srp_port_id *initiator, + union srp_port_id *target, struct scsi_lun *lun ) { + struct ib_srp_device *ib_srp; + int rc; + + /* Allocate and initialise structure */ + ib_srp = zalloc ( sizeof ( *ib_srp ) ); + if ( ! ib_srp ) { + rc = -ENOMEM; + goto err_zalloc; + } + ref_init ( &ib_srp->refcnt, ib_srp_free ); + intf_init ( &ib_srp->srp, &ib_srp_srp_desc, &ib_srp->refcnt ); + intf_init ( &ib_srp->cmrc, &ib_srp_cmrc_desc, &ib_srp->refcnt ); + ib_srp->ibdev = ibdev_get ( ibdev ); + DBGC ( ib_srp, "IBSRP %p for " IB_GID_FMT " " IB_GUID_FMT "\n", + ib_srp, IB_GID_ARGS ( dgid ), IB_GUID_ARGS ( service_id ) ); + + /* Preserve parameters required for boot firmware table */ + memcpy ( &ib_srp->dgid, dgid, sizeof ( ib_srp->dgid ) ); + memcpy ( &ib_srp->service_id, service_id, + sizeof ( ib_srp->service_id ) ); + + /* Open CMRC socket */ + if ( ( rc = ib_cmrc_open ( &ib_srp->cmrc, ibdev, dgid, + service_id ) ) != 0 ) { + DBGC ( ib_srp, "IBSRP %p could not open CMRC socket: %s\n", + ib_srp, strerror ( rc ) ); + goto err_cmrc_open; + } + + /* Attach SRP device to parent interface */ + if ( ( rc = srp_open ( block, &ib_srp->srp, initiator, target, + ibdev->rdma_key, lun ) ) != 0 ) { + DBGC ( ib_srp, "IBSRP %p could not create SRP device: %s\n", + ib_srp, strerror ( rc ) ); + goto err_srp_open; + } + + /* Mortalise self and return */ + ref_put ( &ib_srp->refcnt ); + return 0; + + err_srp_open: + err_cmrc_open: + ib_srp_close ( ib_srp, rc ); + ref_put ( &ib_srp->refcnt ); + err_zalloc: + return rc; +} + +/****************************************************************************** + * + * IB SRP URIs + * + ****************************************************************************** + */ + +/** IB SRP parse flags */ +enum ib_srp_parse_flags { + IB_SRP_PARSE_REQUIRED = 0x0000, + IB_SRP_PARSE_OPTIONAL = 0x8000, + IB_SRP_PARSE_FLAG_MASK = 0xf000, +}; + +/** IB SRP root path parameters */ +struct ib_srp_root_path { + /** Source GID */ + union ib_gid sgid; + /** Initiator port ID */ + union ib_srp_initiator_port_id initiator; + /** Destination GID */ + union ib_gid dgid; + /** Partition key */ + uint16_t pkey; + /** Service ID */ + union ib_guid service_id; + /** SCSI LUN */ + struct scsi_lun lun; + /** Target port ID */ + union ib_srp_target_port_id target; +}; + +/** + * Parse IB SRP root path byte-string value + * + * @v rp_comp Root path component string + * @v default_value Default value to use if component string is empty + * @ret value Value + */ +static int ib_srp_parse_byte_string ( const char *rp_comp, uint8_t *bytes, + unsigned int size_flags ) { + size_t size = ( size_flags & ~IB_SRP_PARSE_FLAG_MASK ); + size_t rp_comp_len = strlen ( rp_comp ); + int decoded_size; + + /* Allow optional components to be empty */ + if ( ( rp_comp_len == 0 ) && + ( size_flags & IB_SRP_PARSE_OPTIONAL ) ) + return 0; + + /* Check string length */ + if ( rp_comp_len != ( 2 * size ) ) + return -EINVAL_BYTE_STRING_LEN; + + /* Parse byte string */ + decoded_size = base16_decode ( rp_comp, bytes ); + if ( decoded_size < 0 ) + return decoded_size; + + return 0; +} + +/** + * Parse IB SRP root path integer value + * + * @v rp_comp Root path component string + * @v default_value Default value to use if component string is empty + * @ret value Value + */ +static int ib_srp_parse_integer ( const char *rp_comp, int default_value ) { + int value; + char *end; + + value = strtoul ( rp_comp, &end, 16 ); + if ( *end ) + return -EINVAL_INTEGER; + + if ( end == rp_comp ) + return default_value; + + return value; +} + +/** + * Parse IB SRP root path source GID + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_sgid ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + struct ib_device *ibdev; + + /* Default to the GID of the last opened Infiniband device */ + if ( ( ibdev = last_opened_ibdev() ) != NULL ) + memcpy ( &rp->sgid, &ibdev->gid, sizeof ( rp->sgid ) ); + + return ib_srp_parse_byte_string ( rp_comp, rp->sgid.bytes, + ( sizeof ( rp->sgid ) | + IB_SRP_PARSE_OPTIONAL ) ); +} + +/** + * Parse IB SRP root path initiator identifier extension + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_initiator_id_ext ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + union ib_srp_initiator_port_id *port_id = &rp->initiator; + + return ib_srp_parse_byte_string ( rp_comp, port_id->ib.id_ext.bytes, + ( sizeof ( port_id->ib.id_ext ) | + IB_SRP_PARSE_OPTIONAL ) ); +} + +/** + * Parse IB SRP root path initiator HCA GUID + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_initiator_hca_guid ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + union ib_srp_initiator_port_id *port_id = &rp->initiator; + + /* Default to the GUID portion of the source GID */ + memcpy ( &port_id->ib.hca_guid, &rp->sgid.s.guid, + sizeof ( port_id->ib.hca_guid ) ); + + return ib_srp_parse_byte_string ( rp_comp, port_id->ib.hca_guid.bytes, + ( sizeof ( port_id->ib.hca_guid ) | + IB_SRP_PARSE_OPTIONAL ) ); +} + +/** + * Parse IB SRP root path destination GID + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_dgid ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + return ib_srp_parse_byte_string ( rp_comp, rp->dgid.bytes, + ( sizeof ( rp->dgid ) | + IB_SRP_PARSE_REQUIRED ) ); +} + +/** + * Parse IB SRP root path partition key + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_pkey ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + int pkey; + + if ( ( pkey = ib_srp_parse_integer ( rp_comp, IB_PKEY_DEFAULT ) ) < 0 ) + return pkey; + rp->pkey = pkey; + return 0; +} + +/** + * Parse IB SRP root path service ID + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_service_id ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + return ib_srp_parse_byte_string ( rp_comp, rp->service_id.bytes, + ( sizeof ( rp->service_id ) | + IB_SRP_PARSE_REQUIRED ) ); +} + +/** + * Parse IB SRP root path LUN + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_lun ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + return scsi_parse_lun ( rp_comp, &rp->lun ); +} + +/** + * Parse IB SRP root path target identifier extension + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_target_id_ext ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + union ib_srp_target_port_id *port_id = &rp->target; + + return ib_srp_parse_byte_string ( rp_comp, port_id->ib.id_ext.bytes, + ( sizeof ( port_id->ib.id_ext ) | + IB_SRP_PARSE_REQUIRED ) ); +} + +/** + * Parse IB SRP root path target I/O controller GUID + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_target_ioc_guid ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + union ib_srp_target_port_id *port_id = &rp->target; + + return ib_srp_parse_byte_string ( rp_comp, port_id->ib.ioc_guid.bytes, + ( sizeof ( port_id->ib.ioc_guid ) | + IB_SRP_PARSE_REQUIRED ) ); +} + +/** IB SRP root path component parser */ +struct ib_srp_root_path_parser { + /** + * Parse IB SRP root path component + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ + int ( * parse ) ( const char *rp_comp, struct ib_srp_root_path *rp ); +}; + +/** IB SRP root path components */ +static struct ib_srp_root_path_parser ib_srp_rp_parser[] = { + { ib_srp_parse_sgid }, + { ib_srp_parse_initiator_id_ext }, + { ib_srp_parse_initiator_hca_guid }, + { ib_srp_parse_dgid }, + { ib_srp_parse_pkey }, + { ib_srp_parse_service_id }, + { ib_srp_parse_lun }, + { ib_srp_parse_target_id_ext }, + { ib_srp_parse_target_ioc_guid }, +}; + +/** Number of IB SRP root path components */ +#define IB_SRP_NUM_RP_COMPONENTS \ + ( sizeof ( ib_srp_rp_parser ) / sizeof ( ib_srp_rp_parser[0] ) ) + +/** + * Parse IB SRP root path + * + * @v rp_string Root path string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_root_path ( const char *rp_string, + struct ib_srp_root_path *rp ) { + struct ib_srp_root_path_parser *parser; + char rp_string_copy[ strlen ( rp_string ) + 1 ]; + char *rp_comp[IB_SRP_NUM_RP_COMPONENTS]; + char *rp_string_tmp = rp_string_copy; + unsigned int i = 0; + int rc; + + /* Split root path into component parts */ + strcpy ( rp_string_copy, rp_string ); + while ( 1 ) { + rp_comp[i++] = rp_string_tmp; + if ( i == IB_SRP_NUM_RP_COMPONENTS ) + break; + for ( ; *rp_string_tmp != ':' ; rp_string_tmp++ ) { + if ( ! *rp_string_tmp ) { + DBG ( "IBSRP root path \"%s\" too short\n", + rp_string ); + return -EINVAL_RP_TOO_SHORT; + } + } + *(rp_string_tmp++) = '\0'; + } + + /* Parse root path components */ + for ( i = 0 ; i < IB_SRP_NUM_RP_COMPONENTS ; i++ ) { + parser = &ib_srp_rp_parser[i]; + if ( ( rc = parser->parse ( rp_comp[i], rp ) ) != 0 ) { + DBG ( "IBSRP could not parse \"%s\" in root path " + "\"%s\": %s\n", rp_comp[i], rp_string, + strerror ( rc ) ); + return rc; + } + } + + return 0; +} + +/** + * Open IB SRP URI + * + * @v parent Parent interface + * @v uri URI + * @ret rc Return status code + */ +static int ib_srp_open_uri ( struct interface *parent, struct uri *uri ) { + struct ib_srp_root_path rp; + struct ib_device *ibdev; + int rc; + + /* Parse URI */ + if ( ! uri->opaque ) + return -EINVAL; + memset ( &rp, 0, sizeof ( rp ) ); + if ( ( rc = ib_srp_parse_root_path ( uri->opaque, &rp ) ) != 0 ) + return rc; + + /* Identify Infiniband device */ + ibdev = find_ibdev ( &rp.sgid ); + if ( ! ibdev ) { + DBG ( "IBSRP could not identify Infiniband device\n" ); + return -ENODEV; + } + + /* Open IB SRP device */ + if ( ( rc = ib_srp_open ( parent, ibdev, &rp.dgid, &rp.service_id, + &rp.initiator.srp, &rp.target.srp, + &rp.lun ) ) != 0 ) + return rc; + + return 0; +} + +/** IB SRP URI opener */ +struct uri_opener ib_srp_uri_opener __uri_opener = { + .scheme = "ib_srp", + .open = ib_srp_open_uri, +}; |