diff options
author | Yang Zhang <yang.z.zhang@intel.com> | 2015-08-28 09:58:54 +0800 |
---|---|---|
committer | Yang Zhang <yang.z.zhang@intel.com> | 2015-09-01 12:44:00 +0800 |
commit | e44e3482bdb4d0ebde2d8b41830ac2cdb07948fb (patch) | |
tree | 66b09f592c55df2878107a468a91d21506104d3f /qemu/roms/ipxe/src/net | |
parent | 9ca8dbcc65cfc63d6f5ef3312a33184e1d726e00 (diff) |
Add qemu 2.4.0
Change-Id: Ic99cbad4b61f8b127b7dc74d04576c0bcbaaf4f5
Signed-off-by: Yang Zhang <yang.z.zhang@intel.com>
Diffstat (limited to 'qemu/roms/ipxe/src/net')
72 files changed, 43713 insertions, 0 deletions
diff --git a/qemu/roms/ipxe/src/net/80211/net80211.c b/qemu/roms/ipxe/src/net/80211/net80211.c new file mode 100644 index 000000000..434944523 --- /dev/null +++ b/qemu/roms/ipxe/src/net/80211/net80211.c @@ -0,0 +1,2828 @@ +/* + * The iPXE 802.11 MAC layer. + * + * Copyright (c) 2009 Joshua Oreman <oremanj@rwcr.net>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <string.h> +#include <byteswap.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <ipxe/settings.h> +#include <ipxe/if_arp.h> +#include <ipxe/ethernet.h> +#include <ipxe/ieee80211.h> +#include <ipxe/netdevice.h> +#include <ipxe/net80211.h> +#include <ipxe/sec80211.h> +#include <ipxe/timer.h> +#include <ipxe/nap.h> +#include <ipxe/errortab.h> +#include <ipxe/net80211_err.h> + +/** @file + * + * 802.11 device management + */ + +/** List of 802.11 devices */ +static struct list_head net80211_devices = LIST_HEAD_INIT ( net80211_devices ); + +/** Set of device operations that does nothing */ +static struct net80211_device_operations net80211_null_ops; + +/** Information associated with a received management packet + * + * This is used to keep beacon signal strengths in a parallel queue to + * the beacons themselves. + */ +struct net80211_rx_info { + int signal; + struct list_head list; +}; + +/** Context for a probe operation */ +struct net80211_probe_ctx { + /** 802.11 device to probe on */ + struct net80211_device *dev; + + /** Value of keep_mgmt before probe was started */ + int old_keep_mgmt; + + /** If scanning actively, pointer to probe packet to send */ + struct io_buffer *probe; + + /** If non-"", the ESSID to limit ourselves to */ + const char *essid; + + /** Time probe was started */ + u32 ticks_start; + + /** Time last useful beacon was received */ + u32 ticks_beacon; + + /** Time channel was last changed */ + u32 ticks_channel; + + /** Time to stay on each channel */ + u32 hop_time; + + /** Channels to hop by when changing channel */ + int hop_step; + + /** List of best beacons for each network found so far */ + struct list_head *beacons; +}; + +/** Context for the association task */ +struct net80211_assoc_ctx { + /** Next authentication method to try using */ + int method; + + /** Time (in ticks) of the last sent association-related packet */ + int last_packet; + + /** Number of times we have tried sending it */ + int times_tried; +}; + +/** + * Detect secure 802.11 network when security support is not available + * + * @return -ENOTSUP, always. + */ +__weak int sec80211_detect ( struct io_buffer *iob __unused, + enum net80211_security_proto *secprot __unused, + enum net80211_crypto_alg *crypt __unused ) { + return -ENOTSUP; +} + +/** + * @defgroup net80211_netdev Network device interface functions + * @{ + */ +static int net80211_netdev_open ( struct net_device *netdev ); +static void net80211_netdev_close ( struct net_device *netdev ); +static int net80211_netdev_transmit ( struct net_device *netdev, + struct io_buffer *iobuf ); +static void net80211_netdev_poll ( struct net_device *netdev ); +static void net80211_netdev_irq ( struct net_device *netdev, int enable ); +/** @} */ + +/** + * @defgroup net80211_linklayer 802.11 link-layer protocol functions + * @{ + */ +static int net80211_ll_push ( struct net_device *netdev, + struct io_buffer *iobuf, const void *ll_dest, + const void *ll_source, uint16_t net_proto ); +static int net80211_ll_pull ( struct net_device *netdev, + struct io_buffer *iobuf, const void **ll_dest, + const void **ll_source, uint16_t * net_proto, + unsigned int *flags ); +/** @} */ + +/** + * @defgroup net80211_help 802.11 helper functions + * @{ + */ +static void net80211_add_channels ( struct net80211_device *dev, int start, + int len, int txpower ); +static void net80211_filter_hw_channels ( struct net80211_device *dev ); +static void net80211_set_rtscts_rate ( struct net80211_device *dev ); +static int net80211_process_capab ( struct net80211_device *dev, + u16 capab ); +static int net80211_process_ie ( struct net80211_device *dev, + union ieee80211_ie *ie, void *ie_end ); +static union ieee80211_ie * +net80211_marshal_request_info ( struct net80211_device *dev, + union ieee80211_ie *ie ); +/** @} */ + +/** + * @defgroup net80211_assoc_ll 802.11 association handling functions + * @{ + */ +static void net80211_step_associate ( struct net80211_device *dev ); +static void net80211_handle_auth ( struct net80211_device *dev, + struct io_buffer *iob ); +static void net80211_handle_assoc_reply ( struct net80211_device *dev, + struct io_buffer *iob ); +static int net80211_send_disassoc ( struct net80211_device *dev, int reason, + int deauth ); +static void net80211_handle_mgmt ( struct net80211_device *dev, + struct io_buffer *iob, int signal ); +/** @} */ + +/** + * @defgroup net80211_frag 802.11 fragment handling functions + * @{ + */ +static void net80211_free_frags ( struct net80211_device *dev, int fcid ); +static struct io_buffer *net80211_accum_frags ( struct net80211_device *dev, + int fcid, int nfrags, int size ); +static void net80211_rx_frag ( struct net80211_device *dev, + struct io_buffer *iob, int signal ); +/** @} */ + +/** + * @defgroup net80211_settings 802.11 settings handlers + * @{ + */ +static int net80211_check_settings_update ( void ); + +/** 802.11 settings applicator + * + * When the SSID is changed, this will cause any open devices to + * re-associate; when the encryption key is changed, we similarly + * update their state. + */ +struct settings_applicator net80211_applicator __settings_applicator = { + .apply = net80211_check_settings_update, +}; + +/** The network name to associate with + * + * If this is blank, we scan for all networks and use the one with the + * greatest signal strength. + */ +const struct setting net80211_ssid_setting __setting ( SETTING_NETDEV_EXTRA, + ssid ) = { + .name = "ssid", + .description = "Wireless SSID", + .type = &setting_type_string, +}; + +/** Whether to use active scanning + * + * In order to associate with a hidden SSID, it's necessary to use an + * active scan (send probe packets). If this setting is nonzero, an + * active scan on the 2.4GHz band will be used to associate. + */ +const struct setting net80211_active_setting __setting ( SETTING_NETDEV_EXTRA, + active-scan ) = { + .name = "active-scan", + .description = "Actively scan for wireless networks", + .type = &setting_type_int8, +}; + +/** The cryptographic key to use + * + * For hex WEP keys, as is common, this must be entered using the + * normal iPXE method for entering hex settings; an ASCII string of + * hex characters will not behave as expected. + */ +const struct setting net80211_key_setting __setting ( SETTING_NETDEV_EXTRA, + key ) = { + .name = "key", + .description = "Wireless encryption key", + .type = &setting_type_string, +}; + +/** @} */ + + +/* ---------- net_device wrapper ---------- */ + +/** + * Open 802.11 device and start association + * + * @v netdev Wrapping network device + * @ret rc Return status code + * + * This sets up a default conservative set of channels for probing, + * and starts the auto-association task unless the @c + * NET80211_NO_ASSOC flag is set in the wrapped 802.11 device's @c + * state field. + */ +static int net80211_netdev_open ( struct net_device *netdev ) +{ + struct net80211_device *dev = netdev->priv; + int rc = 0; + + if ( dev->op == &net80211_null_ops ) + return -EFAULT; + + if ( dev->op->open ) + rc = dev->op->open ( dev ); + + if ( rc < 0 ) + return rc; + + if ( ! ( dev->state & NET80211_NO_ASSOC ) ) + net80211_autoassociate ( dev ); + + return 0; +} + +/** + * Close 802.11 device + * + * @v netdev Wrapping network device. + * + * If the association task is running, this will stop it. + */ +static void net80211_netdev_close ( struct net_device *netdev ) +{ + struct net80211_device *dev = netdev->priv; + + if ( dev->state & NET80211_WORKING ) + process_del ( &dev->proc_assoc ); + + /* Send disassociation frame to AP, to be polite */ + if ( dev->state & NET80211_ASSOCIATED ) + net80211_send_disassoc ( dev, IEEE80211_REASON_LEAVING, 0 ); + + if ( dev->handshaker && dev->handshaker->stop && + dev->handshaker->started ) + dev->handshaker->stop ( dev ); + + free ( dev->crypto ); + free ( dev->handshaker ); + dev->crypto = NULL; + dev->handshaker = NULL; + + netdev_link_down ( netdev ); + dev->state = 0; + + if ( dev->op->close ) + dev->op->close ( dev ); +} + +/** + * Transmit packet on 802.11 device + * + * @v netdev Wrapping network device + * @v iobuf I/O buffer + * @ret rc Return status code + * + * If encryption is enabled for the currently associated network, the + * packet will be encrypted prior to transmission. + */ +static int net80211_netdev_transmit ( struct net_device *netdev, + struct io_buffer *iobuf ) +{ + struct net80211_device *dev = netdev->priv; + struct ieee80211_frame *hdr = iobuf->data; + int rc = -ENOSYS; + + if ( dev->crypto && ! ( hdr->fc & IEEE80211_FC_PROTECTED ) && + ( ( hdr->fc & IEEE80211_FC_TYPE ) == IEEE80211_TYPE_DATA ) ) { + struct io_buffer *niob = dev->crypto->encrypt ( dev->crypto, + iobuf ); + if ( ! niob ) + return -ENOMEM; /* only reason encryption could fail */ + + /* Free the non-encrypted iob */ + netdev_tx_complete ( netdev, iobuf ); + + /* Transmit the encrypted iob; the Protected flag is + set, so we won't recurse into here again */ + netdev_tx ( netdev, niob ); + + /* Don't transmit the freed packet */ + return 0; + } + + if ( dev->op->transmit ) + rc = dev->op->transmit ( dev, iobuf ); + + return rc; +} + +/** + * Poll 802.11 device for received packets and completed transmissions + * + * @v netdev Wrapping network device + */ +static void net80211_netdev_poll ( struct net_device *netdev ) +{ + struct net80211_device *dev = netdev->priv; + + if ( dev->op->poll ) + dev->op->poll ( dev ); +} + +/** + * Enable or disable interrupts for 802.11 device + * + * @v netdev Wrapping network device + * @v enable Whether to enable interrupts + */ +static void net80211_netdev_irq ( struct net_device *netdev, int enable ) +{ + struct net80211_device *dev = netdev->priv; + + if ( dev->op->irq ) + dev->op->irq ( dev, enable ); +} + +/** Network device operations for a wrapped 802.11 device */ +static struct net_device_operations net80211_netdev_ops = { + .open = net80211_netdev_open, + .close = net80211_netdev_close, + .transmit = net80211_netdev_transmit, + .poll = net80211_netdev_poll, + .irq = net80211_netdev_irq, +}; + + +/* ---------- 802.11 link-layer protocol ---------- */ + +/** + * Determine whether a transmission rate uses ERP/OFDM + * + * @v rate Rate in 100 kbps units + * @ret is_erp TRUE if the rate is an ERP/OFDM rate + * + * 802.11b supports rates of 1.0, 2.0, 5.5, and 11.0 Mbps; any other + * rate than these on the 2.4GHz spectrum is an ERP (802.11g) rate. + */ +static inline int net80211_rate_is_erp ( u16 rate ) +{ + if ( rate == 10 || rate == 20 || rate == 55 || rate == 110 ) + return 0; + return 1; +} + + +/** + * Calculate one frame's contribution to 802.11 duration field + * + * @v dev 802.11 device + * @v bytes Amount of data to calculate duration for + * @ret dur Duration field in microseconds + * + * To avoid multiple stations attempting to transmit at once, 802.11 + * provides that every packet shall include a duration field + * specifying a length of time for which the wireless medium will be + * reserved after it is transmitted. The duration is measured in + * microseconds and is calculated with respect to the current + * physical-layer parameters of the 802.11 device. + * + * For an unfragmented data or management frame, or the last fragment + * of a fragmented frame, the duration captures only the 10 data bytes + * of one ACK; call once with bytes = 10. + * + * For a fragment of a data or management rame that will be followed + * by more fragments, the duration captures an ACK, the following + * fragment, and its ACK; add the results of three calls, two with + * bytes = 10 and one with bytes set to the next fragment's size. + * + * For an RTS control frame, the duration captures the responding CTS, + * the frame being sent, and its ACK; add the results of three calls, + * two with bytes = 10 and one with bytes set to the next frame's size + * (assuming unfragmented). + * + * For a CTS-to-self control frame, the duration captures the frame + * being protected and its ACK; add the results of two calls, one with + * bytes = 10 and one with bytes set to the next frame's size. + * + * No other frame types are currently supported by iPXE. + */ +u16 net80211_duration ( struct net80211_device *dev, int bytes, u16 rate ) +{ + struct net80211_channel *chan = &dev->channels[dev->channel]; + u32 kbps = rate * 100; + + if ( chan->band == NET80211_BAND_5GHZ || net80211_rate_is_erp ( rate ) ) { + /* OFDM encoding (802.11a/g) */ + int bits_per_symbol = ( kbps * 4 ) / 1000; /* 4us/symbol */ + int bits = 22 + ( bytes << 3 ); /* 22-bit PLCP */ + int symbols = ( bits + bits_per_symbol - 1 ) / bits_per_symbol; + + return 16 + 20 + ( symbols * 4 ); /* 16us SIFS, 20us preamble */ + } else { + /* CCK encoding (802.11b) */ + int phy_time = 144 + 48; /* preamble + PLCP */ + int bits = bytes << 3; + int data_time = ( bits * 1000 + kbps - 1 ) / kbps; + + if ( dev->phy_flags & NET80211_PHY_USE_SHORT_PREAMBLE ) + phy_time >>= 1; + + return 10 + phy_time + data_time; /* 10us SIFS */ + } +} + +/** + * Add 802.11 link-layer header + * + * @v netdev Wrapping network device + * @v iobuf I/O buffer + * @v ll_dest Link-layer destination address + * @v ll_source Link-layer source address + * @v net_proto Network-layer protocol, in network byte order + * @ret rc Return status code + * + * This adds both the 802.11 frame header and the 802.2 LLC/SNAP + * header used on data packets. + * + * We also check here for state of the link that would make it invalid + * to send a data packet; every data packet must pass through here, + * and no non-data packet (e.g. management frame) should. + */ +static int net80211_ll_push ( struct net_device *netdev, + struct io_buffer *iobuf, const void *ll_dest, + const void *ll_source, uint16_t net_proto ) +{ + struct net80211_device *dev = netdev->priv; + struct ieee80211_frame *hdr = iob_push ( iobuf, + IEEE80211_LLC_HEADER_LEN + + IEEE80211_TYP_FRAME_HEADER_LEN ); + struct ieee80211_llc_snap_header *lhdr = + ( void * ) hdr + IEEE80211_TYP_FRAME_HEADER_LEN; + + /* We can't send data packets if we're not associated. */ + if ( ! ( dev->state & NET80211_ASSOCIATED ) ) { + if ( dev->assoc_rc ) + return dev->assoc_rc; + return -ENETUNREACH; + } + + hdr->fc = IEEE80211_THIS_VERSION | IEEE80211_TYPE_DATA | + IEEE80211_STYPE_DATA | IEEE80211_FC_TODS; + + /* We don't send fragmented frames, so duration is the time + for an SIFS + 10-byte ACK. */ + hdr->duration = net80211_duration ( dev, 10, dev->rates[dev->rate] ); + + memcpy ( hdr->addr1, dev->bssid, ETH_ALEN ); + memcpy ( hdr->addr2, ll_source, ETH_ALEN ); + memcpy ( hdr->addr3, ll_dest, ETH_ALEN ); + + hdr->seq = IEEE80211_MAKESEQ ( ++dev->last_tx_seqnr, 0 ); + + lhdr->dsap = IEEE80211_LLC_DSAP; + lhdr->ssap = IEEE80211_LLC_SSAP; + lhdr->ctrl = IEEE80211_LLC_CTRL; + memset ( lhdr->oui, 0x00, 3 ); + lhdr->ethertype = net_proto; + + return 0; +} + +/** + * Remove 802.11 link-layer header + * + * @v netdev Wrapping network device + * @v iobuf I/O buffer + * @ret ll_dest Link-layer destination address + * @ret ll_source Link-layer source + * @ret net_proto Network-layer protocol, in network byte order + * @ret flags Packet flags + * @ret rc Return status code + * + * This expects and removes both the 802.11 frame header and the 802.2 + * LLC/SNAP header that are used on data packets. + */ +static int net80211_ll_pull ( struct net_device *netdev __unused, + struct io_buffer *iobuf, + const void **ll_dest, const void **ll_source, + uint16_t * net_proto, unsigned int *flags ) +{ + struct ieee80211_frame *hdr = iobuf->data; + struct ieee80211_llc_snap_header *lhdr = + ( void * ) hdr + IEEE80211_TYP_FRAME_HEADER_LEN; + + /* Bunch of sanity checks */ + if ( iob_len ( iobuf ) < IEEE80211_TYP_FRAME_HEADER_LEN + + IEEE80211_LLC_HEADER_LEN ) { + DBGC ( netdev->priv, "802.11 %p packet too short (%zd bytes)\n", + netdev->priv, iob_len ( iobuf ) ); + return -EINVAL_PKT_TOO_SHORT; + } + + if ( ( hdr->fc & IEEE80211_FC_VERSION ) != IEEE80211_THIS_VERSION ) { + DBGC ( netdev->priv, "802.11 %p packet invalid version %04x\n", + netdev->priv, hdr->fc & IEEE80211_FC_VERSION ); + return -EINVAL_PKT_VERSION; + } + + if ( ( hdr->fc & IEEE80211_FC_TYPE ) != IEEE80211_TYPE_DATA || + ( hdr->fc & IEEE80211_FC_SUBTYPE ) != IEEE80211_STYPE_DATA ) { + DBGC ( netdev->priv, "802.11 %p packet not data/data (fc=%04x)\n", + netdev->priv, hdr->fc ); + return -EINVAL_PKT_NOT_DATA; + } + + if ( ( hdr->fc & ( IEEE80211_FC_TODS | IEEE80211_FC_FROMDS ) ) != + IEEE80211_FC_FROMDS ) { + DBGC ( netdev->priv, "802.11 %p packet not from DS (fc=%04x)\n", + netdev->priv, hdr->fc ); + return -EINVAL_PKT_NOT_FROMDS; + } + + if ( lhdr->dsap != IEEE80211_LLC_DSAP || lhdr->ssap != IEEE80211_LLC_SSAP || + lhdr->ctrl != IEEE80211_LLC_CTRL || lhdr->oui[0] || lhdr->oui[1] || + lhdr->oui[2] ) { + DBGC ( netdev->priv, "802.11 %p LLC header is not plain EtherType " + "encapsulator: %02x->%02x [%02x] %02x:%02x:%02x %04x\n", + netdev->priv, lhdr->dsap, lhdr->ssap, lhdr->ctrl, + lhdr->oui[0], lhdr->oui[1], lhdr->oui[2], lhdr->ethertype ); + return -EINVAL_PKT_LLC_HEADER; + } + + iob_pull ( iobuf, sizeof ( *hdr ) + sizeof ( *lhdr ) ); + + *ll_dest = hdr->addr1; + *ll_source = hdr->addr3; + *net_proto = lhdr->ethertype; + *flags = ( ( is_multicast_ether_addr ( hdr->addr1 ) ? + LL_MULTICAST : 0 ) | + ( is_broadcast_ether_addr ( hdr->addr1 ) ? + LL_BROADCAST : 0 ) ); + return 0; +} + +/** 802.11 link-layer protocol */ +static struct ll_protocol net80211_ll_protocol __ll_protocol = { + .name = "802.11", + .push = net80211_ll_push, + .pull = net80211_ll_pull, + .init_addr = eth_init_addr, + .ntoa = eth_ntoa, + .mc_hash = eth_mc_hash, + .eth_addr = eth_eth_addr, + .eui64 = eth_eui64, + .ll_proto = htons ( ARPHRD_ETHER ), /* "encapsulated Ethernet" */ + .hw_addr_len = ETH_ALEN, + .ll_addr_len = ETH_ALEN, + .ll_header_len = IEEE80211_TYP_FRAME_HEADER_LEN + + IEEE80211_LLC_HEADER_LEN, +}; + + +/* ---------- 802.11 network management API ---------- */ + +/** + * Get 802.11 device from wrapping network device + * + * @v netdev Wrapping network device + * @ret dev 802.11 device wrapped by network device, or NULL + * + * Returns NULL if the network device does not wrap an 802.11 device. + */ +struct net80211_device * net80211_get ( struct net_device *netdev ) +{ + struct net80211_device *dev; + + list_for_each_entry ( dev, &net80211_devices, list ) { + if ( netdev->priv == dev ) + return netdev->priv; + } + + return NULL; +} + +/** + * Set state of 802.11 device keeping management frames + * + * @v dev 802.11 device + * @v enable Whether to keep management frames + * @ret oldenab Whether management frames were enabled before this call + * + * If enable is TRUE, beacon, probe, and action frames will be kept + * and may be retrieved by calling net80211_mgmt_dequeue(). + */ +int net80211_keep_mgmt ( struct net80211_device *dev, int enable ) +{ + int oldenab = dev->keep_mgmt; + + dev->keep_mgmt = enable; + return oldenab; +} + +/** + * Get 802.11 management frame + * + * @v dev 802.11 device + * @ret signal Signal strength of returned management frame + * @ret iob I/O buffer, or NULL if no management frame is queued + * + * Frames will only be returned by this function if + * net80211_keep_mgmt() has been previously called with enable set to + * TRUE. + * + * The calling function takes ownership of the returned I/O buffer. + */ +struct io_buffer * net80211_mgmt_dequeue ( struct net80211_device *dev, + int *signal ) +{ + struct io_buffer *iobuf; + struct net80211_rx_info *rxi; + + list_for_each_entry ( rxi, &dev->mgmt_info_queue, list ) { + list_del ( &rxi->list ); + if ( signal ) + *signal = rxi->signal; + free ( rxi ); + + assert ( ! list_empty ( &dev->mgmt_queue ) ); + iobuf = list_first_entry ( &dev->mgmt_queue, struct io_buffer, + list ); + list_del ( &iobuf->list ); + return iobuf; + } + + return NULL; +} + +/** + * Transmit 802.11 management frame + * + * @v dev 802.11 device + * @v fc Frame Control flags for management frame + * @v dest Destination access point + * @v iob I/O buffer + * @ret rc Return status code + * + * The @a fc argument must contain at least an IEEE 802.11 management + * subtype number (e.g. IEEE80211_STYPE_PROBE_REQ). If it contains + * IEEE80211_FC_PROTECTED, the frame will be encrypted prior to + * transmission. + * + * It is required that @a iob have at least 24 bytes of headroom + * reserved before its data start. + */ +int net80211_tx_mgmt ( struct net80211_device *dev, u16 fc, u8 dest[6], + struct io_buffer *iob ) +{ + struct ieee80211_frame *hdr = iob_push ( iob, + IEEE80211_TYP_FRAME_HEADER_LEN ); + + hdr->fc = IEEE80211_THIS_VERSION | IEEE80211_TYPE_MGMT | + ( fc & ~IEEE80211_FC_PROTECTED ); + hdr->duration = net80211_duration ( dev, 10, dev->rates[dev->rate] ); + hdr->seq = IEEE80211_MAKESEQ ( ++dev->last_tx_seqnr, 0 ); + + memcpy ( hdr->addr1, dest, ETH_ALEN ); /* DA = RA */ + memcpy ( hdr->addr2, dev->netdev->ll_addr, ETH_ALEN ); /* SA = TA */ + memcpy ( hdr->addr3, dest, ETH_ALEN ); /* BSSID */ + + if ( fc & IEEE80211_FC_PROTECTED ) { + if ( ! dev->crypto ) + return -EINVAL_CRYPTO_REQUEST; + + struct io_buffer *eiob = dev->crypto->encrypt ( dev->crypto, + iob ); + free_iob ( iob ); + iob = eiob; + } + + return netdev_tx ( dev->netdev, iob ); +} + + +/* ---------- Driver API ---------- */ + +/** 802.11 association process descriptor */ +static struct process_descriptor net80211_process_desc = + PROC_DESC ( struct net80211_device, proc_assoc, + net80211_step_associate ); + +/** + * Allocate 802.11 device + * + * @v priv_size Size of driver-private allocation area + * @ret dev Newly allocated 802.11 device + * + * This function allocates a net_device with space in its private area + * for both the net80211_device it will wrap and the driver-private + * data space requested. It initializes the link-layer-specific parts + * of the net_device, and links the net80211_device to the net_device + * appropriately. + */ +struct net80211_device * net80211_alloc ( size_t priv_size ) +{ + struct net80211_device *dev; + struct net_device *netdev = + alloc_netdev ( sizeof ( *dev ) + priv_size ); + + if ( ! netdev ) + return NULL; + + netdev->ll_protocol = &net80211_ll_protocol; + netdev->ll_broadcast = eth_broadcast; + netdev->max_pkt_len = IEEE80211_MAX_DATA_LEN; + netdev_init ( netdev, &net80211_netdev_ops ); + + dev = netdev->priv; + dev->netdev = netdev; + dev->priv = ( u8 * ) dev + sizeof ( *dev ); + dev->op = &net80211_null_ops; + + process_init_stopped ( &dev->proc_assoc, &net80211_process_desc, + &netdev->refcnt ); + INIT_LIST_HEAD ( &dev->mgmt_queue ); + INIT_LIST_HEAD ( &dev->mgmt_info_queue ); + + return dev; +} + +/** + * Register 802.11 device with network stack + * + * @v dev 802.11 device + * @v ops 802.11 device operations + * @v hw 802.11 hardware information + * + * This also registers the wrapping net_device with the higher network + * layers. + */ +int net80211_register ( struct net80211_device *dev, + struct net80211_device_operations *ops, + struct net80211_hw_info *hw ) +{ + dev->op = ops; + dev->hw = malloc ( sizeof ( *hw ) ); + if ( ! dev->hw ) + return -ENOMEM; + + memcpy ( dev->hw, hw, sizeof ( *hw ) ); + memcpy ( dev->netdev->hw_addr, hw->hwaddr, ETH_ALEN ); + + /* Set some sensible channel defaults for driver's open() function */ + memcpy ( dev->channels, dev->hw->channels, + NET80211_MAX_CHANNELS * sizeof ( dev->channels[0] ) ); + dev->channel = 0; + + list_add_tail ( &dev->list, &net80211_devices ); + return register_netdev ( dev->netdev ); +} + +/** + * Unregister 802.11 device from network stack + * + * @v dev 802.11 device + * + * After this call, the device operations are cleared so that they + * will not be called. + */ +void net80211_unregister ( struct net80211_device *dev ) +{ + unregister_netdev ( dev->netdev ); + list_del ( &dev->list ); + dev->op = &net80211_null_ops; +} + +/** + * Free 802.11 device + * + * @v dev 802.11 device + * + * The device should be unregistered before this function is called. + */ +void net80211_free ( struct net80211_device *dev ) +{ + free ( dev->hw ); + rc80211_free ( dev->rctl ); + netdev_nullify ( dev->netdev ); + netdev_put ( dev->netdev ); +} + + +/* ---------- 802.11 network management workhorse code ---------- */ + +/** + * Set state of 802.11 device + * + * @v dev 802.11 device + * @v clear Bitmask of flags to clear + * @v set Bitmask of flags to set + * @v status Status or reason code for most recent operation + * + * If @a status represents a reason code, it should be OR'ed with + * NET80211_IS_REASON. + * + * Clearing authentication also clears association; clearing + * association also clears security handshaking state. Clearing + * association removes the link-up flag from the wrapping net_device, + * but setting it does not automatically set the flag; that is left to + * the judgment of higher-level code. + */ +static inline void net80211_set_state ( struct net80211_device *dev, + short clear, short set, + u16 status ) +{ + /* The conditions in this function are deliberately formulated + to be decidable at compile-time in most cases. Since clear + and set are generally passed as constants, the body of this + function can be reduced down to a few statements by the + compiler. */ + + const int statmsk = NET80211_STATUS_MASK | NET80211_IS_REASON; + + if ( clear & NET80211_PROBED ) + clear |= NET80211_AUTHENTICATED; + + if ( clear & NET80211_AUTHENTICATED ) + clear |= NET80211_ASSOCIATED; + + if ( clear & NET80211_ASSOCIATED ) + clear |= NET80211_CRYPTO_SYNCED; + + dev->state = ( dev->state & ~clear ) | set; + dev->state = ( dev->state & ~statmsk ) | ( status & statmsk ); + + if ( clear & NET80211_ASSOCIATED ) + netdev_link_down ( dev->netdev ); + + if ( ( clear | set ) & NET80211_ASSOCIATED ) + dev->op->config ( dev, NET80211_CFG_ASSOC ); + + if ( status != 0 ) { + if ( status & NET80211_IS_REASON ) + dev->assoc_rc = -E80211_REASON ( status ); + else + dev->assoc_rc = -E80211_STATUS ( status ); + netdev_link_err ( dev->netdev, dev->assoc_rc ); + } +} + +/** + * Add channels to 802.11 device + * + * @v dev 802.11 device + * @v start First channel number to add + * @v len Number of channels to add + * @v txpower TX power (dBm) to allow on added channels + * + * To replace the current list of channels instead of adding to it, + * set the nr_channels field of the 802.11 device to 0 before calling + * this function. + */ +static void net80211_add_channels ( struct net80211_device *dev, int start, + int len, int txpower ) +{ + int i, chan = start; + + for ( i = dev->nr_channels; len-- && i < NET80211_MAX_CHANNELS; i++ ) { + dev->channels[i].channel_nr = chan; + dev->channels[i].maxpower = txpower; + dev->channels[i].hw_value = 0; + + if ( chan >= 1 && chan <= 14 ) { + dev->channels[i].band = NET80211_BAND_2GHZ; + if ( chan == 14 ) + dev->channels[i].center_freq = 2484; + else + dev->channels[i].center_freq = 2407 + 5 * chan; + chan++; + } else { + dev->channels[i].band = NET80211_BAND_5GHZ; + dev->channels[i].center_freq = 5000 + 5 * chan; + chan += 4; + } + } + + dev->nr_channels = i; +} + +/** + * Filter 802.11 device channels for hardware capabilities + * + * @v dev 802.11 device + * + * Hardware may support fewer channels than regulatory restrictions + * allow; this function filters out channels in dev->channels that are + * not supported by the hardware list in dev->hwinfo. It also copies + * over the net80211_channel::hw_value and limits maximum TX power + * appropriately. + * + * Channels are matched based on center frequency, ignoring band and + * channel number. + * + * If the driver specifies no supported channels, the effect will be + * as though all were supported. + */ +static void net80211_filter_hw_channels ( struct net80211_device *dev ) +{ + int delta = 0, i = 0; + int old_freq = dev->channels[dev->channel].center_freq; + struct net80211_channel *chan, *hwchan; + + if ( ! dev->hw->nr_channels ) + return; + + dev->channel = 0; + for ( chan = dev->channels; chan < dev->channels + dev->nr_channels; + chan++, i++ ) { + int ok = 0; + for ( hwchan = dev->hw->channels; + hwchan < dev->hw->channels + dev->hw->nr_channels; + hwchan++ ) { + if ( hwchan->center_freq == chan->center_freq ) { + ok = 1; + break; + } + } + + if ( ! ok ) + delta++; + else { + chan->hw_value = hwchan->hw_value; + if ( hwchan->maxpower != 0 && + chan->maxpower > hwchan->maxpower ) + chan->maxpower = hwchan->maxpower; + if ( old_freq == chan->center_freq ) + dev->channel = i - delta; + if ( delta ) + chan[-delta] = *chan; + } + } + + dev->nr_channels -= delta; + + if ( dev->channels[dev->channel].center_freq != old_freq ) + dev->op->config ( dev, NET80211_CFG_CHANNEL ); +} + +/** + * Update 802.11 device state to reflect received capabilities field + * + * @v dev 802.11 device + * @v capab Capabilities field in beacon, probe, or association frame + * @ret rc Return status code + */ +static int net80211_process_capab ( struct net80211_device *dev, + u16 capab ) +{ + u16 old_phy = dev->phy_flags; + + if ( ( capab & ( IEEE80211_CAPAB_MANAGED | IEEE80211_CAPAB_ADHOC ) ) != + IEEE80211_CAPAB_MANAGED ) { + DBGC ( dev, "802.11 %p cannot handle IBSS network\n", dev ); + return -ENOSYS; + } + + dev->phy_flags &= ~( NET80211_PHY_USE_SHORT_PREAMBLE | + NET80211_PHY_USE_SHORT_SLOT ); + + if ( capab & IEEE80211_CAPAB_SHORT_PMBL ) + dev->phy_flags |= NET80211_PHY_USE_SHORT_PREAMBLE; + + if ( capab & IEEE80211_CAPAB_SHORT_SLOT ) + dev->phy_flags |= NET80211_PHY_USE_SHORT_SLOT; + + if ( old_phy != dev->phy_flags ) + dev->op->config ( dev, NET80211_CFG_PHY_PARAMS ); + + return 0; +} + +/** + * Update 802.11 device state to reflect received information elements + * + * @v dev 802.11 device + * @v ie Pointer to first information element + * @v ie_end Pointer to tail of packet I/O buffer + * @ret rc Return status code + */ +static int net80211_process_ie ( struct net80211_device *dev, + union ieee80211_ie *ie, void *ie_end ) +{ + u16 old_rate = dev->rates[dev->rate]; + u16 old_phy = dev->phy_flags; + int have_rates = 0, i; + int ds_channel = 0; + int changed = 0; + int band = dev->channels[dev->channel].band; + + if ( ! ieee80211_ie_bound ( ie, ie_end ) ) + return 0; + + for ( ; ie; ie = ieee80211_next_ie ( ie, ie_end ) ) { + switch ( ie->id ) { + case IEEE80211_IE_SSID: + if ( ie->len <= 32 ) { + memcpy ( dev->essid, ie->ssid, ie->len ); + dev->essid[ie->len] = 0; + } + break; + + case IEEE80211_IE_RATES: + case IEEE80211_IE_EXT_RATES: + if ( ! have_rates ) { + dev->nr_rates = 0; + dev->basic_rates = 0; + have_rates = 1; + } + for ( i = 0; i < ie->len && + dev->nr_rates < NET80211_MAX_RATES; i++ ) { + u8 rid = ie->rates[i]; + u16 rate = ( rid & 0x7f ) * 5; + + if ( rid & 0x80 ) + dev->basic_rates |= + ( 1 << dev->nr_rates ); + + dev->rates[dev->nr_rates++] = rate; + } + + break; + + case IEEE80211_IE_DS_PARAM: + if ( dev->channel < dev->nr_channels && ds_channel == + dev->channels[dev->channel].channel_nr ) + break; + ds_channel = ie->ds_param.current_channel; + net80211_change_channel ( dev, ds_channel ); + break; + + case IEEE80211_IE_COUNTRY: + dev->nr_channels = 0; + + DBGC ( dev, "802.11 %p setting country regulations " + "for %c%c\n", dev, ie->country.name[0], + ie->country.name[1] ); + for ( i = 0; i < ( ie->len - 3 ) / 3; i++ ) { + union ieee80211_ie_country_triplet *t = + &ie->country.triplet[i]; + if ( t->first > 200 ) { + DBGC ( dev, "802.11 %p ignoring regulatory " + "extension information\n", dev ); + } else { + net80211_add_channels ( dev, + t->band.first_channel, + t->band.nr_channels, + t->band.max_txpower ); + } + } + net80211_filter_hw_channels ( dev ); + break; + + case IEEE80211_IE_ERP_INFO: + dev->phy_flags &= ~( NET80211_PHY_USE_PROTECTION | + NET80211_PHY_USE_SHORT_PREAMBLE ); + if ( ie->erp_info & IEEE80211_ERP_USE_PROTECTION ) + dev->phy_flags |= NET80211_PHY_USE_PROTECTION; + if ( ! ( ie->erp_info & IEEE80211_ERP_BARKER_LONG ) ) + dev->phy_flags |= NET80211_PHY_USE_SHORT_PREAMBLE; + break; + } + } + + if ( have_rates ) { + /* Allow only those rates that are also supported by + the hardware. */ + int delta = 0, j; + + dev->rate = 0; + for ( i = 0; i < dev->nr_rates; i++ ) { + int ok = 0; + for ( j = 0; j < dev->hw->nr_rates[band]; j++ ) { + if ( dev->hw->rates[band][j] == dev->rates[i] ){ + ok = 1; + break; + } + } + + if ( ! ok ) + delta++; + else { + dev->rates[i - delta] = dev->rates[i]; + if ( old_rate == dev->rates[i] ) + dev->rate = i - delta; + } + } + + dev->nr_rates -= delta; + + /* Sort available rates - sorted subclumps tend to already + exist, so insertion sort works well. */ + for ( i = 1; i < dev->nr_rates; i++ ) { + u16 rate = dev->rates[i]; + u32 tmp, br, mask; + + for ( j = i - 1; j >= 0 && dev->rates[j] >= rate; j-- ) + dev->rates[j + 1] = dev->rates[j]; + dev->rates[j + 1] = rate; + + /* Adjust basic_rates to match by rotating the + bits from bit j+1 to bit i left one position. */ + mask = ( ( 1 << i ) - 1 ) & ~( ( 1 << ( j + 1 ) ) - 1 ); + br = dev->basic_rates; + tmp = br & ( 1 << i ); + br = ( br & ~( mask | tmp ) ) | ( ( br & mask ) << 1 ); + br |= ( tmp >> ( i - j - 1 ) ); + dev->basic_rates = br; + } + + net80211_set_rtscts_rate ( dev ); + + if ( dev->rates[dev->rate] != old_rate ) + changed |= NET80211_CFG_RATE; + } + + if ( dev->hw->flags & NET80211_HW_NO_SHORT_PREAMBLE ) + dev->phy_flags &= ~NET80211_PHY_USE_SHORT_PREAMBLE; + if ( dev->hw->flags & NET80211_HW_NO_SHORT_SLOT ) + dev->phy_flags &= ~NET80211_PHY_USE_SHORT_SLOT; + + if ( old_phy != dev->phy_flags ) + changed |= NET80211_CFG_PHY_PARAMS; + + if ( changed ) + dev->op->config ( dev, changed ); + + return 0; +} + +/** + * Create information elements for outgoing probe or association packet + * + * @v dev 802.11 device + * @v ie Pointer to start of information element area + * @ret next_ie Pointer to first byte after added information elements + */ +static union ieee80211_ie * +net80211_marshal_request_info ( struct net80211_device *dev, + union ieee80211_ie *ie ) +{ + int i; + + ie->id = IEEE80211_IE_SSID; + ie->len = strlen ( dev->essid ); + memcpy ( ie->ssid, dev->essid, ie->len ); + + ie = ieee80211_next_ie ( ie, NULL ); + + ie->id = IEEE80211_IE_RATES; + ie->len = dev->nr_rates; + if ( ie->len > 8 ) + ie->len = 8; + + for ( i = 0; i < ie->len; i++ ) { + ie->rates[i] = dev->rates[i] / 5; + if ( dev->basic_rates & ( 1 << i ) ) + ie->rates[i] |= 0x80; + } + + ie = ieee80211_next_ie ( ie, NULL ); + + if ( dev->rsn_ie && dev->rsn_ie->id == IEEE80211_IE_RSN ) { + memcpy ( ie, dev->rsn_ie, dev->rsn_ie->len + 2 ); + ie = ieee80211_next_ie ( ie, NULL ); + } + + if ( dev->nr_rates > 8 ) { + /* 802.11 requires we use an Extended Basic Rates IE + for the rates beyond the eighth. */ + + ie->id = IEEE80211_IE_EXT_RATES; + ie->len = dev->nr_rates - 8; + + for ( ; i < dev->nr_rates; i++ ) { + ie->rates[i - 8] = dev->rates[i] / 5; + if ( dev->basic_rates & ( 1 << i ) ) + ie->rates[i - 8] |= 0x80; + } + + ie = ieee80211_next_ie ( ie, NULL ); + } + + if ( dev->rsn_ie && dev->rsn_ie->id == IEEE80211_IE_VENDOR ) { + memcpy ( ie, dev->rsn_ie, dev->rsn_ie->len + 2 ); + ie = ieee80211_next_ie ( ie, NULL ); + } + + return ie; +} + +/** Seconds to wait after finding a network, to possibly find better APs for it + * + * This is used when a specific SSID to scan for is specified. + */ +#define NET80211_PROBE_GATHER 1 + +/** Seconds to wait after finding a network, to possibly find other networks + * + * This is used when an empty SSID is specified, to scan for all + * networks. + */ +#define NET80211_PROBE_GATHER_ALL 2 + +/** Seconds to allow a probe to take if no network has been found */ +#define NET80211_PROBE_TIMEOUT 6 + +/** + * Begin probe of 802.11 networks + * + * @v dev 802.11 device + * @v essid SSID to probe for, or "" to accept any (may not be NULL) + * @v active Whether to use active scanning + * @ret ctx Probe context + * + * Active scanning may only be used on channels 1-11 in the 2.4GHz + * band, due to iPXE's lack of a complete regulatory database. If + * active scanning is used, probe packets will be sent on each + * channel; this can allow association with hidden-SSID networks if + * the SSID is properly specified. + * + * A @c NULL return indicates an out-of-memory condition. + * + * The returned context must be periodically passed to + * net80211_probe_step() until that function returns zero. + */ +struct net80211_probe_ctx * net80211_probe_start ( struct net80211_device *dev, + const char *essid, + int active ) +{ + struct net80211_probe_ctx *ctx = zalloc ( sizeof ( *ctx ) ); + + if ( ! ctx ) + return NULL; + + assert ( netdev_is_open ( dev->netdev ) ); + + ctx->dev = dev; + ctx->old_keep_mgmt = net80211_keep_mgmt ( dev, 1 ); + ctx->essid = essid; + if ( dev->essid != ctx->essid ) + strcpy ( dev->essid, ctx->essid ); + + if ( active ) { + struct ieee80211_probe_req *probe_req; + union ieee80211_ie *ie; + + ctx->probe = alloc_iob ( 128 ); + iob_reserve ( ctx->probe, IEEE80211_TYP_FRAME_HEADER_LEN ); + probe_req = ctx->probe->data; + + ie = net80211_marshal_request_info ( dev, + probe_req->info_element ); + + iob_put ( ctx->probe, ( void * ) ie - ctx->probe->data ); + } + + ctx->ticks_start = currticks(); + ctx->ticks_beacon = 0; + ctx->ticks_channel = currticks(); + ctx->hop_time = ticks_per_sec() / ( active ? 2 : 6 ); + + /* + * Channels on 2.4GHz overlap, and the most commonly used + * are 1, 6, and 11. We'll get a result faster if we check + * every 5 channels, but in order to hit all of them the + * number of channels must be relatively prime to 5. If it's + * not, tweak the hop. + */ + ctx->hop_step = 5; + while ( dev->nr_channels % ctx->hop_step == 0 && ctx->hop_step > 1 ) + ctx->hop_step--; + + ctx->beacons = malloc ( sizeof ( *ctx->beacons ) ); + INIT_LIST_HEAD ( ctx->beacons ); + + dev->channel = 0; + dev->op->config ( dev, NET80211_CFG_CHANNEL ); + + return ctx; +} + +/** + * Continue probe of 802.11 networks + * + * @v ctx Probe context returned by net80211_probe_start() + * @ret rc Probe status + * + * The return code will be 0 if the probe is still going on (and this + * function should be called again), a positive number if the probe + * completed successfully, or a negative error code if the probe + * failed for that reason. + * + * Whether the probe succeeded or failed, you must call + * net80211_probe_finish_all() or net80211_probe_finish_best() + * (depending on whether you want information on all networks or just + * the best-signal one) in order to release the probe context. A + * failed probe may still have acquired some valid data. + */ +int net80211_probe_step ( struct net80211_probe_ctx *ctx ) +{ + struct net80211_device *dev = ctx->dev; + u32 start_timeout = NET80211_PROBE_TIMEOUT * ticks_per_sec(); + u32 gather_timeout = ticks_per_sec(); + u32 now = currticks(); + struct io_buffer *iob; + int signal; + int rc; + char ssid[IEEE80211_MAX_SSID_LEN + 1]; + + gather_timeout *= ( ctx->essid[0] ? NET80211_PROBE_GATHER : + NET80211_PROBE_GATHER_ALL ); + + /* Time out if necessary */ + if ( now >= ctx->ticks_start + start_timeout ) + return list_empty ( ctx->beacons ) ? -ETIMEDOUT : +1; + + if ( ctx->ticks_beacon > 0 && now >= ctx->ticks_start + gather_timeout ) + return +1; + + /* Change channels if necessary */ + if ( now >= ctx->ticks_channel + ctx->hop_time ) { + dev->channel = ( dev->channel + ctx->hop_step ) + % dev->nr_channels; + dev->op->config ( dev, NET80211_CFG_CHANNEL ); + udelay ( dev->hw->channel_change_time ); + + ctx->ticks_channel = now; + + if ( ctx->probe ) { + struct io_buffer *siob = ctx->probe; /* to send */ + + /* make a copy for future use */ + iob = alloc_iob ( siob->tail - siob->head ); + iob_reserve ( iob, iob_headroom ( siob ) ); + memcpy ( iob_put ( iob, iob_len ( siob ) ), + siob->data, iob_len ( siob ) ); + + ctx->probe = iob; + rc = net80211_tx_mgmt ( dev, IEEE80211_STYPE_PROBE_REQ, + eth_broadcast, + iob_disown ( siob ) ); + if ( rc ) { + DBGC ( dev, "802.11 %p send probe failed: " + "%s\n", dev, strerror ( rc ) ); + return rc; + } + } + } + + /* Check for new management packets */ + while ( ( iob = net80211_mgmt_dequeue ( dev, &signal ) ) != NULL ) { + struct ieee80211_frame *hdr; + struct ieee80211_beacon *beacon; + union ieee80211_ie *ie; + struct net80211_wlan *wlan; + u16 type; + + hdr = iob->data; + type = hdr->fc & IEEE80211_FC_SUBTYPE; + beacon = ( struct ieee80211_beacon * ) hdr->data; + + if ( type != IEEE80211_STYPE_BEACON && + type != IEEE80211_STYPE_PROBE_RESP ) { + DBGC2 ( dev, "802.11 %p probe: non-beacon\n", dev ); + goto drop; + } + + if ( ( void * ) beacon->info_element >= iob->tail ) { + DBGC ( dev, "802.11 %p probe: beacon with no IEs\n", + dev ); + goto drop; + } + + ie = beacon->info_element; + + if ( ! ieee80211_ie_bound ( ie, iob->tail ) ) + ie = NULL; + + while ( ie && ie->id != IEEE80211_IE_SSID ) + ie = ieee80211_next_ie ( ie, iob->tail ); + + if ( ! ie ) { + DBGC ( dev, "802.11 %p probe: beacon with no SSID\n", + dev ); + goto drop; + } + + memcpy ( ssid, ie->ssid, ie->len ); + ssid[ie->len] = 0; + + if ( ctx->essid[0] && strcmp ( ctx->essid, ssid ) != 0 ) { + DBGC2 ( dev, "802.11 %p probe: beacon with wrong SSID " + "(%s)\n", dev, ssid ); + goto drop; + } + + /* See if we've got an entry for this network */ + list_for_each_entry ( wlan, ctx->beacons, list ) { + if ( strcmp ( wlan->essid, ssid ) != 0 ) + continue; + + if ( signal < wlan->signal ) { + DBGC2 ( dev, "802.11 %p probe: beacon for %s " + "(%s) with weaker signal %d\n", dev, + ssid, eth_ntoa ( hdr->addr3 ), signal ); + goto drop; + } + + goto fill; + } + + /* No entry yet - make one */ + wlan = zalloc ( sizeof ( *wlan ) ); + strcpy ( wlan->essid, ssid ); + list_add_tail ( &wlan->list, ctx->beacons ); + + /* Whether we're using an old entry or a new one, fill + it with new data. */ + fill: + memcpy ( wlan->bssid, hdr->addr3, ETH_ALEN ); + wlan->signal = signal; + wlan->channel = dev->channels[dev->channel].channel_nr; + + /* Copy this I/O buffer into a new wlan->beacon; the + * iob we've got probably came from the device driver + * and may have the full 2.4k allocation, which we + * don't want to keep around wasting memory. + */ + free_iob ( wlan->beacon ); + wlan->beacon = alloc_iob ( iob_len ( iob ) ); + memcpy ( iob_put ( wlan->beacon, iob_len ( iob ) ), + iob->data, iob_len ( iob ) ); + + if ( ( rc = sec80211_detect ( wlan->beacon, &wlan->handshaking, + &wlan->crypto ) ) == -ENOTSUP ) { + struct ieee80211_beacon *beacon = + ( struct ieee80211_beacon * ) hdr->data; + + if ( beacon->capability & IEEE80211_CAPAB_PRIVACY ) { + DBG ( "802.11 %p probe: secured network %s but " + "encryption support not compiled in\n", + dev, wlan->essid ); + wlan->handshaking = NET80211_SECPROT_UNKNOWN; + wlan->crypto = NET80211_CRYPT_UNKNOWN; + } else { + wlan->handshaking = NET80211_SECPROT_NONE; + wlan->crypto = NET80211_CRYPT_NONE; + } + } else if ( rc != 0 ) { + DBGC ( dev, "802.11 %p probe warning: network " + "%s with unidentifiable security " + "settings: %s\n", dev, wlan->essid, + strerror ( rc ) ); + } + + ctx->ticks_beacon = now; + + DBGC2 ( dev, "802.11 %p probe: good beacon for %s (%s)\n", + dev, wlan->essid, eth_ntoa ( wlan->bssid ) ); + + drop: + free_iob ( iob ); + } + + return 0; +} + + +/** + * Finish probe of 802.11 networks, returning best-signal network found + * + * @v ctx Probe context + * @ret wlan Best-signal network found, or @c NULL if none were found + * + * If net80211_probe_start() was called with a particular SSID + * parameter as filter, only a network with that SSID (matching + * case-sensitively) can be returned from this function. + */ +struct net80211_wlan * +net80211_probe_finish_best ( struct net80211_probe_ctx *ctx ) +{ + struct net80211_wlan *best = NULL, *wlan; + + if ( ! ctx ) + return NULL; + + list_for_each_entry ( wlan, ctx->beacons, list ) { + if ( ! best || best->signal < wlan->signal ) + best = wlan; + } + + if ( best ) + list_del ( &best->list ); + else + DBGC ( ctx->dev, "802.11 %p probe: found nothing for '%s'\n", + ctx->dev, ctx->essid ); + + net80211_free_wlanlist ( ctx->beacons ); + + net80211_keep_mgmt ( ctx->dev, ctx->old_keep_mgmt ); + + if ( ctx->probe ) + free_iob ( ctx->probe ); + + free ( ctx ); + + return best; +} + + +/** + * Finish probe of 802.11 networks, returning all networks found + * + * @v ctx Probe context + * @ret list List of net80211_wlan detailing networks found + * + * If net80211_probe_start() was called with a particular SSID + * parameter as filter, this will always return either an empty or a + * one-element list. + */ +struct list_head *net80211_probe_finish_all ( struct net80211_probe_ctx *ctx ) +{ + struct list_head *beacons = ctx->beacons; + + if ( ! ctx ) + return NULL; + + net80211_keep_mgmt ( ctx->dev, ctx->old_keep_mgmt ); + + if ( ctx->probe ) + free_iob ( ctx->probe ); + + free ( ctx ); + + return beacons; +} + + +/** + * Free WLAN structure + * + * @v wlan WLAN structure to free + */ +void net80211_free_wlan ( struct net80211_wlan *wlan ) +{ + if ( wlan ) { + free_iob ( wlan->beacon ); + free ( wlan ); + } +} + + +/** + * Free list of WLAN structures + * + * @v list List of WLAN structures to free + */ +void net80211_free_wlanlist ( struct list_head *list ) +{ + struct net80211_wlan *wlan, *tmp; + + if ( ! list ) + return; + + list_for_each_entry_safe ( wlan, tmp, list, list ) { + list_del ( &wlan->list ); + net80211_free_wlan ( wlan ); + } + + free ( list ); +} + + +/** Number of ticks to wait for replies to association management frames */ +#define ASSOC_TIMEOUT TICKS_PER_SEC + +/** Number of times to try sending a particular association management frame */ +#define ASSOC_RETRIES 2 + +/** + * Step 802.11 association process + * + * @v dev 802.11 device + */ +static void net80211_step_associate ( struct net80211_device *dev ) +{ + int rc = 0; + int status = dev->state & NET80211_STATUS_MASK; + + /* + * We use a sort of state machine implemented using bits in + * the dev->state variable. At each call, we take the + * logically first step that has not yet succeeded; either it + * has not been tried yet, it's being retried, or it failed. + * If it failed, we return an error indication; otherwise we + * perform the step. If it succeeds, RX handling code will set + * the appropriate status bit for us. + * + * Probe works a bit differently, since we have to step it + * on every call instead of waiting for a packet to arrive + * that will set the completion bit for us. + */ + + /* If we're waiting for a reply, check for timeout condition */ + if ( dev->state & NET80211_WAITING ) { + /* Sanity check */ + if ( ! dev->associating ) + return; + + if ( currticks() - dev->ctx.assoc->last_packet > ASSOC_TIMEOUT ) { + /* Timed out - fail if too many retries, or retry */ + dev->ctx.assoc->times_tried++; + if ( ++dev->ctx.assoc->times_tried > ASSOC_RETRIES ) { + rc = -ETIMEDOUT; + goto fail; + } + } else { + /* Didn't time out - let it keep going */ + return; + } + } else { + if ( dev->state & NET80211_PROBED ) + dev->ctx.assoc->times_tried = 0; + } + + if ( ! ( dev->state & NET80211_PROBED ) ) { + /* state: probe */ + + if ( ! dev->ctx.probe ) { + /* start probe */ + int active = fetch_intz_setting ( NULL, + &net80211_active_setting ); + int band = dev->hw->bands; + + if ( active ) + band &= ~NET80211_BAND_BIT_5GHZ; + + rc = net80211_prepare_probe ( dev, band, active ); + if ( rc ) + goto fail; + + dev->ctx.probe = net80211_probe_start ( dev, dev->essid, + active ); + if ( ! dev->ctx.probe ) { + dev->assoc_rc = -ENOMEM; + goto fail; + } + } + + rc = net80211_probe_step ( dev->ctx.probe ); + if ( ! rc ) { + return; /* still going */ + } + + dev->associating = net80211_probe_finish_best ( dev->ctx.probe ); + dev->ctx.probe = NULL; + if ( ! dev->associating ) { + if ( rc > 0 ) /* "successful" probe found nothing */ + rc = -ETIMEDOUT; + goto fail; + } + + /* If we probed using a broadcast SSID, record that + fact for the settings applicator before we clobber + it with the specific SSID we've chosen. */ + if ( ! dev->essid[0] ) + dev->state |= NET80211_AUTO_SSID; + + DBGC ( dev, "802.11 %p found network %s (%s)\n", dev, + dev->associating->essid, + eth_ntoa ( dev->associating->bssid ) ); + + dev->ctx.assoc = zalloc ( sizeof ( *dev->ctx.assoc ) ); + if ( ! dev->ctx.assoc ) { + rc = -ENOMEM; + goto fail; + } + + dev->state |= NET80211_PROBED; + dev->ctx.assoc->method = IEEE80211_AUTH_OPEN_SYSTEM; + + return; + } + + /* Record time of sending the packet we're about to send, for timeout */ + dev->ctx.assoc->last_packet = currticks(); + + if ( ! ( dev->state & NET80211_AUTHENTICATED ) ) { + /* state: prepare and authenticate */ + + if ( status != IEEE80211_STATUS_SUCCESS ) { + /* we tried authenticating already, but failed */ + int method = dev->ctx.assoc->method; + + if ( method == IEEE80211_AUTH_OPEN_SYSTEM && + ( status == IEEE80211_STATUS_AUTH_CHALL_INVALID || + status == IEEE80211_STATUS_AUTH_ALGO_UNSUPP ) ) { + /* Maybe this network uses Shared Key? */ + dev->ctx.assoc->method = + IEEE80211_AUTH_SHARED_KEY; + } else { + goto fail; + } + } + + DBGC ( dev, "802.11 %p authenticating with method %d\n", dev, + dev->ctx.assoc->method ); + + rc = net80211_prepare_assoc ( dev, dev->associating ); + if ( rc ) + goto fail; + + rc = net80211_send_auth ( dev, dev->associating, + dev->ctx.assoc->method ); + if ( rc ) + goto fail; + + return; + } + + if ( ! ( dev->state & NET80211_ASSOCIATED ) ) { + /* state: associate */ + + if ( status != IEEE80211_STATUS_SUCCESS ) + goto fail; + + DBGC ( dev, "802.11 %p associating\n", dev ); + + if ( dev->handshaker && dev->handshaker->start && + ! dev->handshaker->started ) { + rc = dev->handshaker->start ( dev ); + if ( rc < 0 ) + goto fail; + dev->handshaker->started = 1; + } + + rc = net80211_send_assoc ( dev, dev->associating ); + if ( rc ) + goto fail; + + return; + } + + if ( ! ( dev->state & NET80211_CRYPTO_SYNCED ) ) { + /* state: crypto sync */ + DBGC ( dev, "802.11 %p security handshaking\n", dev ); + + if ( ! dev->handshaker || ! dev->handshaker->step ) { + dev->state |= NET80211_CRYPTO_SYNCED; + return; + } + + rc = dev->handshaker->step ( dev ); + + if ( rc < 0 ) { + /* Only record the returned error if we're + still marked as associated, because an + asynchronous error will have already been + reported to net80211_deauthenticate() and + assoc_rc thereby set. */ + if ( dev->state & NET80211_ASSOCIATED ) + dev->assoc_rc = rc; + rc = 0; + goto fail; + } + + if ( rc > 0 ) { + dev->assoc_rc = 0; + dev->state |= NET80211_CRYPTO_SYNCED; + } + return; + } + + /* state: done! */ + netdev_link_up ( dev->netdev ); + dev->assoc_rc = 0; + dev->state &= ~NET80211_WORKING; + + free ( dev->ctx.assoc ); + dev->ctx.assoc = NULL; + + net80211_free_wlan ( dev->associating ); + dev->associating = NULL; + + dev->rctl = rc80211_init ( dev ); + + process_del ( &dev->proc_assoc ); + + DBGC ( dev, "802.11 %p associated with %s (%s)\n", dev, + dev->essid, eth_ntoa ( dev->bssid ) ); + + return; + + fail: + dev->state &= ~( NET80211_WORKING | NET80211_WAITING ); + if ( rc ) + dev->assoc_rc = rc; + + netdev_link_err ( dev->netdev, dev->assoc_rc ); + + /* We never reach here from the middle of a probe, so we don't + need to worry about freeing dev->ctx.probe. */ + + if ( dev->state & NET80211_PROBED ) { + free ( dev->ctx.assoc ); + dev->ctx.assoc = NULL; + } + + net80211_free_wlan ( dev->associating ); + dev->associating = NULL; + + process_del ( &dev->proc_assoc ); + + DBGC ( dev, "802.11 %p association failed (state=%04x): " + "%s\n", dev, dev->state, strerror ( dev->assoc_rc ) ); + + /* Try it again: */ + net80211_autoassociate ( dev ); +} + +/** + * Check for 802.11 SSID or key updates + * + * This acts as a settings applicator; if the user changes netX/ssid, + * and netX is currently open, the association task will be invoked + * again. If the user changes the encryption key, the current security + * handshaker will be asked to update its state to match; if that is + * impossible without reassociation, we reassociate. + */ +static int net80211_check_settings_update ( void ) +{ + struct net80211_device *dev; + char ssid[IEEE80211_MAX_SSID_LEN + 1]; + int key_reassoc; + + list_for_each_entry ( dev, &net80211_devices, list ) { + if ( ! netdev_is_open ( dev->netdev ) ) + continue; + + key_reassoc = 0; + if ( dev->handshaker && dev->handshaker->change_key && + dev->handshaker->change_key ( dev ) < 0 ) + key_reassoc = 1; + + fetch_string_setting ( netdev_settings ( dev->netdev ), + &net80211_ssid_setting, ssid, + IEEE80211_MAX_SSID_LEN + 1 ); + + if ( key_reassoc || + ( ! ( ! ssid[0] && ( dev->state & NET80211_AUTO_SSID ) ) && + strcmp ( ssid, dev->essid ) != 0 ) ) { + DBGC ( dev, "802.11 %p updating association: " + "%s -> %s\n", dev, dev->essid, ssid ); + net80211_autoassociate ( dev ); + } + } + + return 0; +} + +/** + * Start 802.11 association process + * + * @v dev 802.11 device + * + * If the association process is running, it will be restarted. + */ +void net80211_autoassociate ( struct net80211_device *dev ) +{ + if ( ! ( dev->state & NET80211_WORKING ) ) { + DBGC2 ( dev, "802.11 %p spawning association process\n", dev ); + process_add ( &dev->proc_assoc ); + } else { + DBGC2 ( dev, "802.11 %p restarting association\n", dev ); + } + + /* Clean up everything an earlier association process might + have been in the middle of using */ + if ( dev->associating ) + net80211_free_wlan ( dev->associating ); + + if ( ! ( dev->state & NET80211_PROBED ) ) + net80211_free_wlan ( + net80211_probe_finish_best ( dev->ctx.probe ) ); + else + free ( dev->ctx.assoc ); + + /* Reset to a clean state */ + fetch_string_setting ( netdev_settings ( dev->netdev ), + &net80211_ssid_setting, dev->essid, + IEEE80211_MAX_SSID_LEN + 1 ); + dev->ctx.probe = NULL; + dev->associating = NULL; + dev->assoc_rc = 0; + net80211_set_state ( dev, NET80211_PROBED, NET80211_WORKING, 0 ); +} + +/** + * Pick TX rate for RTS/CTS packets based on data rate + * + * @v dev 802.11 device + * + * The RTS/CTS rate is the fastest TX rate marked as "basic" that is + * not faster than the data rate. + */ +static void net80211_set_rtscts_rate ( struct net80211_device *dev ) +{ + u16 datarate = dev->rates[dev->rate]; + u16 rtsrate = 0; + int rts_idx = -1; + int i; + + for ( i = 0; i < dev->nr_rates; i++ ) { + u16 rate = dev->rates[i]; + + if ( ! ( dev->basic_rates & ( 1 << i ) ) || rate > datarate ) + continue; + + if ( rate > rtsrate ) { + rtsrate = rate; + rts_idx = i; + } + } + + /* If this is in initialization, we might not have any basic + rates; just use the first data rate in that case. */ + if ( rts_idx < 0 ) + rts_idx = 0; + + dev->rtscts_rate = rts_idx; +} + +/** + * Set data transmission rate for 802.11 device + * + * @v dev 802.11 device + * @v rate Rate to set, as index into @c dev->rates array + */ +void net80211_set_rate_idx ( struct net80211_device *dev, int rate ) +{ + assert ( netdev_is_open ( dev->netdev ) ); + + if ( rate >= 0 && rate < dev->nr_rates && rate != dev->rate ) { + DBGC2 ( dev, "802.11 %p changing rate from %d->%d Mbps\n", + dev, dev->rates[dev->rate] / 10, + dev->rates[rate] / 10 ); + + dev->rate = rate; + net80211_set_rtscts_rate ( dev ); + dev->op->config ( dev, NET80211_CFG_RATE ); + } +} + +/** + * Configure 802.11 device to transmit on a certain channel + * + * @v dev 802.11 device + * @v channel Channel number (1-11 for 2.4GHz) to transmit on + */ +int net80211_change_channel ( struct net80211_device *dev, int channel ) +{ + int i, oldchan = dev->channel; + + assert ( netdev_is_open ( dev->netdev ) ); + + for ( i = 0; i < dev->nr_channels; i++ ) { + if ( dev->channels[i].channel_nr == channel ) { + dev->channel = i; + break; + } + } + + if ( i == dev->nr_channels ) + return -ENOENT; + + if ( i != oldchan ) + return dev->op->config ( dev, NET80211_CFG_CHANNEL ); + + return 0; +} + +/** + * Prepare 802.11 device channel and rate set for scanning + * + * @v dev 802.11 device + * @v band RF band(s) on which to prepare for scanning + * @v active Whether the scanning will be active + * @ret rc Return status code + */ +int net80211_prepare_probe ( struct net80211_device *dev, int band, + int active ) +{ + assert ( netdev_is_open ( dev->netdev ) ); + + if ( active && ( band & NET80211_BAND_BIT_5GHZ ) ) { + DBGC ( dev, "802.11 %p cannot perform active scanning on " + "5GHz band\n", dev ); + return -EINVAL_ACTIVE_SCAN; + } + + if ( band == 0 ) { + /* This can happen for a 5GHz-only card with 5GHz + scanning masked out by an active request. */ + DBGC ( dev, "802.11 %p asked to prepare for scanning nothing\n", + dev ); + return -EINVAL_ACTIVE_SCAN; + } + + dev->nr_channels = 0; + + if ( active ) + net80211_add_channels ( dev, 1, 11, NET80211_REG_TXPOWER ); + else { + if ( band & NET80211_BAND_BIT_2GHZ ) + net80211_add_channels ( dev, 1, 14, + NET80211_REG_TXPOWER ); + if ( band & NET80211_BAND_BIT_5GHZ ) + net80211_add_channels ( dev, 36, 8, + NET80211_REG_TXPOWER ); + } + + net80211_filter_hw_channels ( dev ); + + /* Use channel 1 for now */ + dev->channel = 0; + dev->op->config ( dev, NET80211_CFG_CHANNEL ); + + /* Always do active probes at lowest (presumably first) speed */ + dev->rate = 0; + dev->nr_rates = 1; + dev->rates[0] = dev->hw->rates[dev->channels[0].band][0]; + dev->op->config ( dev, NET80211_CFG_RATE ); + + return 0; +} + +/** + * Prepare 802.11 device channel and rate set for communication + * + * @v dev 802.11 device + * @v wlan WLAN to prepare for communication with + * @ret rc Return status code + */ +int net80211_prepare_assoc ( struct net80211_device *dev, + struct net80211_wlan *wlan ) +{ + struct ieee80211_frame *hdr = wlan->beacon->data; + struct ieee80211_beacon *beacon = + ( struct ieee80211_beacon * ) hdr->data; + struct net80211_handshaker *handshaker; + int rc; + + assert ( netdev_is_open ( dev->netdev ) ); + + net80211_set_state ( dev, NET80211_ASSOCIATED, 0, 0 ); + memcpy ( dev->bssid, wlan->bssid, ETH_ALEN ); + strcpy ( dev->essid, wlan->essid ); + + free ( dev->rsn_ie ); + dev->rsn_ie = NULL; + + dev->last_beacon_timestamp = beacon->timestamp; + dev->tx_beacon_interval = 1024 * beacon->beacon_interval; + + /* Barring an IE that tells us the channel outright, assume + the channel we heard this AP best on is the channel it's + communicating on. */ + net80211_change_channel ( dev, wlan->channel ); + + rc = net80211_process_capab ( dev, beacon->capability ); + if ( rc ) + return rc; + + rc = net80211_process_ie ( dev, beacon->info_element, + wlan->beacon->tail ); + if ( rc ) + return rc; + + /* Associate at the lowest rate so we know it'll get through */ + dev->rate = 0; + dev->op->config ( dev, NET80211_CFG_RATE ); + + /* Free old handshaker and crypto, if they exist */ + if ( dev->handshaker && dev->handshaker->stop && + dev->handshaker->started ) + dev->handshaker->stop ( dev ); + free ( dev->handshaker ); + dev->handshaker = NULL; + free ( dev->crypto ); + free ( dev->gcrypto ); + dev->crypto = dev->gcrypto = NULL; + + /* Find new security handshaker to use */ + for_each_table_entry ( handshaker, NET80211_HANDSHAKERS ) { + if ( handshaker->protocol == wlan->handshaking ) { + dev->handshaker = zalloc ( sizeof ( *handshaker ) + + handshaker->priv_len ); + if ( ! dev->handshaker ) + return -ENOMEM; + + memcpy ( dev->handshaker, handshaker, + sizeof ( *handshaker ) ); + dev->handshaker->priv = ( ( void * ) dev->handshaker + + sizeof ( *handshaker ) ); + break; + } + } + + if ( ( wlan->handshaking != NET80211_SECPROT_NONE ) && + ! dev->handshaker ) { + DBGC ( dev, "802.11 %p no support for handshaking scheme %d\n", + dev, wlan->handshaking ); + return -( ENOTSUP | ( wlan->handshaking << 8 ) ); + } + + /* Initialize security handshaker */ + if ( dev->handshaker ) { + rc = dev->handshaker->init ( dev ); + if ( rc < 0 ) + return rc; + } + + return 0; +} + +/** + * Send 802.11 initial authentication frame + * + * @v dev 802.11 device + * @v wlan WLAN to authenticate with + * @v method Authentication method + * @ret rc Return status code + * + * @a method may be 0 for Open System authentication or 1 for Shared + * Key authentication. Open System provides no security in association + * whatsoever, relying on encryption for confidentiality, but Shared + * Key actively introduces security problems and is very rarely used. + */ +int net80211_send_auth ( struct net80211_device *dev, + struct net80211_wlan *wlan, int method ) +{ + struct io_buffer *iob = alloc_iob ( 64 ); + struct ieee80211_auth *auth; + + net80211_set_state ( dev, 0, NET80211_WAITING, 0 ); + iob_reserve ( iob, IEEE80211_TYP_FRAME_HEADER_LEN ); + auth = iob_put ( iob, sizeof ( *auth ) ); + auth->algorithm = method; + auth->tx_seq = 1; + auth->status = 0; + + return net80211_tx_mgmt ( dev, IEEE80211_STYPE_AUTH, wlan->bssid, iob ); +} + +/** + * Handle receipt of 802.11 authentication frame + * + * @v dev 802.11 device + * @v iob I/O buffer + * + * If the authentication method being used is Shared Key, and the + * frame that was received included challenge text, the frame is + * encrypted using the cryptosystem currently in effect and sent back + * to the AP to complete the authentication. + */ +static void net80211_handle_auth ( struct net80211_device *dev, + struct io_buffer *iob ) +{ + struct ieee80211_frame *hdr = iob->data; + struct ieee80211_auth *auth = + ( struct ieee80211_auth * ) hdr->data; + + if ( auth->tx_seq & 1 ) { + DBGC ( dev, "802.11 %p authentication received improperly " + "directed frame (seq. %d)\n", dev, auth->tx_seq ); + net80211_set_state ( dev, NET80211_WAITING, 0, + IEEE80211_STATUS_FAILURE ); + return; + } + + if ( auth->status != IEEE80211_STATUS_SUCCESS ) { + DBGC ( dev, "802.11 %p authentication failed: status %d\n", + dev, auth->status ); + net80211_set_state ( dev, NET80211_WAITING, 0, + auth->status ); + return; + } + + if ( auth->algorithm == IEEE80211_AUTH_SHARED_KEY && ! dev->crypto ) { + DBGC ( dev, "802.11 %p can't perform shared-key authentication " + "without a cryptosystem\n", dev ); + net80211_set_state ( dev, NET80211_WAITING, 0, + IEEE80211_STATUS_FAILURE ); + return; + } + + if ( auth->algorithm == IEEE80211_AUTH_SHARED_KEY && + auth->tx_seq == 2 ) { + /* Since the iob we got is going to be freed as soon + as we return, we can do some in-place + modification. */ + auth->tx_seq = 3; + auth->status = 0; + + memcpy ( hdr->addr2, hdr->addr1, ETH_ALEN ); + memcpy ( hdr->addr1, hdr->addr3, ETH_ALEN ); + + netdev_tx ( dev->netdev, + dev->crypto->encrypt ( dev->crypto, iob ) ); + return; + } + + net80211_set_state ( dev, NET80211_WAITING, NET80211_AUTHENTICATED, + IEEE80211_STATUS_SUCCESS ); + + return; +} + +/** + * Send 802.11 association frame + * + * @v dev 802.11 device + * @v wlan WLAN to associate with + * @ret rc Return status code + */ +int net80211_send_assoc ( struct net80211_device *dev, + struct net80211_wlan *wlan ) +{ + struct io_buffer *iob = alloc_iob ( 128 ); + struct ieee80211_assoc_req *assoc; + union ieee80211_ie *ie; + + net80211_set_state ( dev, 0, NET80211_WAITING, 0 ); + + iob_reserve ( iob, IEEE80211_TYP_FRAME_HEADER_LEN ); + assoc = iob->data; + + assoc->capability = IEEE80211_CAPAB_MANAGED; + if ( ! ( dev->hw->flags & NET80211_HW_NO_SHORT_PREAMBLE ) ) + assoc->capability |= IEEE80211_CAPAB_SHORT_PMBL; + if ( ! ( dev->hw->flags & NET80211_HW_NO_SHORT_SLOT ) ) + assoc->capability |= IEEE80211_CAPAB_SHORT_SLOT; + if ( wlan->crypto ) + assoc->capability |= IEEE80211_CAPAB_PRIVACY; + + assoc->listen_interval = 1; + + ie = net80211_marshal_request_info ( dev, assoc->info_element ); + + DBGP ( "802.11 %p about to send association request:\n", dev ); + DBGP_HD ( iob->data, ( void * ) ie - iob->data ); + + iob_put ( iob, ( void * ) ie - iob->data ); + + return net80211_tx_mgmt ( dev, IEEE80211_STYPE_ASSOC_REQ, + wlan->bssid, iob ); +} + +/** + * Handle receipt of 802.11 association reply frame + * + * @v dev 802.11 device + * @v iob I/O buffer + */ +static void net80211_handle_assoc_reply ( struct net80211_device *dev, + struct io_buffer *iob ) +{ + struct ieee80211_frame *hdr = iob->data; + struct ieee80211_assoc_resp *assoc = + ( struct ieee80211_assoc_resp * ) hdr->data; + + net80211_process_capab ( dev, assoc->capability ); + net80211_process_ie ( dev, assoc->info_element, iob->tail ); + + if ( assoc->status != IEEE80211_STATUS_SUCCESS ) { + DBGC ( dev, "802.11 %p association failed: status %d\n", + dev, assoc->status ); + net80211_set_state ( dev, NET80211_WAITING, 0, + assoc->status ); + return; + } + + /* ESSID was filled before the association request was sent */ + memcpy ( dev->bssid, hdr->addr3, ETH_ALEN ); + dev->aid = assoc->aid; + + net80211_set_state ( dev, NET80211_WAITING, NET80211_ASSOCIATED, + IEEE80211_STATUS_SUCCESS ); +} + + +/** + * Send 802.11 disassociation frame + * + * @v dev 802.11 device + * @v reason Reason for disassociation + * @v deauth If TRUE, send deauthentication instead of disassociation + * @ret rc Return status code + */ +static int net80211_send_disassoc ( struct net80211_device *dev, int reason, + int deauth ) +{ + struct io_buffer *iob = alloc_iob ( 64 ); + struct ieee80211_disassoc *disassoc; + + if ( ! ( dev->state & NET80211_ASSOCIATED ) ) + return -EINVAL; + + net80211_set_state ( dev, NET80211_ASSOCIATED, 0, 0 ); + iob_reserve ( iob, IEEE80211_TYP_FRAME_HEADER_LEN ); + disassoc = iob_put ( iob, sizeof ( *disassoc ) ); + disassoc->reason = reason; + + return net80211_tx_mgmt ( dev, deauth ? IEEE80211_STYPE_DEAUTH : + IEEE80211_STYPE_DISASSOC, dev->bssid, iob ); +} + + +/** + * Deauthenticate from current network and try again + * + * @v dev 802.11 device + * @v rc Return status code indicating reason + * + * The deauthentication will be sent using an 802.11 "unspecified + * reason", as is common, but @a rc will be set as a link-up + * error to aid the user in debugging. + */ +void net80211_deauthenticate ( struct net80211_device *dev, int rc ) +{ + net80211_send_disassoc ( dev, IEEE80211_REASON_UNSPECIFIED, 1 ); + dev->assoc_rc = rc; + netdev_link_err ( dev->netdev, rc ); + + net80211_autoassociate ( dev ); +} + + +/** Smoothing factor (1-7) for link quality calculation */ +#define LQ_SMOOTH 7 + +/** + * Update link quality information based on received beacon + * + * @v dev 802.11 device + * @v iob I/O buffer containing beacon + * @ret rc Return status code + */ +static void net80211_update_link_quality ( struct net80211_device *dev, + struct io_buffer *iob ) +{ + struct ieee80211_frame *hdr = iob->data; + struct ieee80211_beacon *beacon; + u32 dt, rxi; + + if ( ! ( dev->state & NET80211_ASSOCIATED ) ) + return; + + beacon = ( struct ieee80211_beacon * ) hdr->data; + dt = ( u32 ) ( beacon->timestamp - dev->last_beacon_timestamp ); + rxi = dev->rx_beacon_interval; + + rxi = ( LQ_SMOOTH * rxi ) + ( ( 8 - LQ_SMOOTH ) * dt ); + dev->rx_beacon_interval = rxi >> 3; + + dev->last_beacon_timestamp = beacon->timestamp; +} + + +/** + * Handle receipt of 802.11 management frame + * + * @v dev 802.11 device + * @v iob I/O buffer + * @v signal Signal strength of received frame + */ +static void net80211_handle_mgmt ( struct net80211_device *dev, + struct io_buffer *iob, int signal ) +{ + struct ieee80211_frame *hdr = iob->data; + struct ieee80211_disassoc *disassoc; + u16 stype = hdr->fc & IEEE80211_FC_SUBTYPE; + int keep = 0; + int is_deauth = ( stype == IEEE80211_STYPE_DEAUTH ); + + if ( ( hdr->fc & IEEE80211_FC_TYPE ) != IEEE80211_TYPE_MGMT ) { + free_iob ( iob ); + return; /* only handle management frames */ + } + + switch ( stype ) { + /* We reconnect on deauthentication and disassociation. */ + case IEEE80211_STYPE_DEAUTH: + case IEEE80211_STYPE_DISASSOC: + disassoc = ( struct ieee80211_disassoc * ) hdr->data; + net80211_set_state ( dev, is_deauth ? NET80211_AUTHENTICATED : + NET80211_ASSOCIATED, 0, + NET80211_IS_REASON | disassoc->reason ); + DBGC ( dev, "802.11 %p %s: reason %d\n", + dev, is_deauth ? "deauthenticated" : "disassociated", + disassoc->reason ); + + /* Try to reassociate, in case it's transient. */ + net80211_autoassociate ( dev ); + + break; + + /* We handle authentication and association. */ + case IEEE80211_STYPE_AUTH: + if ( ! ( dev->state & NET80211_AUTHENTICATED ) ) + net80211_handle_auth ( dev, iob ); + break; + + case IEEE80211_STYPE_ASSOC_RESP: + case IEEE80211_STYPE_REASSOC_RESP: + if ( ! ( dev->state & NET80211_ASSOCIATED ) ) + net80211_handle_assoc_reply ( dev, iob ); + break; + + /* We pass probes and beacons onto network scanning + code. Pass actions for future extensibility. */ + case IEEE80211_STYPE_BEACON: + net80211_update_link_quality ( dev, iob ); + /* fall through */ + case IEEE80211_STYPE_PROBE_RESP: + case IEEE80211_STYPE_ACTION: + if ( dev->keep_mgmt ) { + struct net80211_rx_info *rxinf; + rxinf = zalloc ( sizeof ( *rxinf ) ); + if ( ! rxinf ) { + DBGC ( dev, "802.11 %p out of memory\n", dev ); + break; + } + rxinf->signal = signal; + list_add_tail ( &iob->list, &dev->mgmt_queue ); + list_add_tail ( &rxinf->list, &dev->mgmt_info_queue ); + keep = 1; + } + break; + + case IEEE80211_STYPE_PROBE_REQ: + /* Some nodes send these broadcast. Ignore them. */ + break; + + case IEEE80211_STYPE_ASSOC_REQ: + case IEEE80211_STYPE_REASSOC_REQ: + /* We should never receive these, only send them. */ + DBGC ( dev, "802.11 %p received strange management request " + "(%04x)\n", dev, stype ); + break; + + default: + DBGC ( dev, "802.11 %p received unimplemented management " + "packet (%04x)\n", dev, stype ); + break; + } + + if ( ! keep ) + free_iob ( iob ); +} + +/* ---------- Packet handling functions ---------- */ + +/** + * Free buffers used by 802.11 fragment cache entry + * + * @v dev 802.11 device + * @v fcid Fragment cache entry index + * + * After this function, the referenced entry will be marked unused. + */ +static void net80211_free_frags ( struct net80211_device *dev, int fcid ) +{ + int j; + struct net80211_frag_cache *frag = &dev->frags[fcid]; + + for ( j = 0; j < 16; j++ ) { + if ( frag->iob[j] ) { + free_iob ( frag->iob[j] ); + frag->iob[j] = NULL; + } + } + + frag->seqnr = 0; + frag->start_ticks = 0; + frag->in_use = 0; +} + +/** + * Accumulate 802.11 fragments into one I/O buffer + * + * @v dev 802.11 device + * @v fcid Fragment cache entry index + * @v nfrags Number of fragments received + * @v size Sum of sizes of all fragments, including headers + * @ret iob I/O buffer containing reassembled packet + * + * This function does not free the fragment buffers. + */ +static struct io_buffer *net80211_accum_frags ( struct net80211_device *dev, + int fcid, int nfrags, int size ) +{ + struct net80211_frag_cache *frag = &dev->frags[fcid]; + int hdrsize = IEEE80211_TYP_FRAME_HEADER_LEN; + int nsize = size - hdrsize * ( nfrags - 1 ); + int i; + + struct io_buffer *niob = alloc_iob ( nsize ); + struct ieee80211_frame *hdr; + + /* Add the header from the first one... */ + memcpy ( iob_put ( niob, hdrsize ), frag->iob[0]->data, hdrsize ); + + /* ... and all the data from all of them. */ + for ( i = 0; i < nfrags; i++ ) { + int len = iob_len ( frag->iob[i] ) - hdrsize; + memcpy ( iob_put ( niob, len ), + frag->iob[i]->data + hdrsize, len ); + } + + /* Turn off the fragment bit. */ + hdr = niob->data; + hdr->fc &= ~IEEE80211_FC_MORE_FRAG; + + return niob; +} + +/** + * Handle receipt of 802.11 fragment + * + * @v dev 802.11 device + * @v iob I/O buffer containing fragment + * @v signal Signal strength with which fragment was received + */ +static void net80211_rx_frag ( struct net80211_device *dev, + struct io_buffer *iob, int signal ) +{ + struct ieee80211_frame *hdr = iob->data; + int fragnr = IEEE80211_FRAG ( hdr->seq ); + + if ( fragnr == 0 && ( hdr->fc & IEEE80211_FC_MORE_FRAG ) ) { + /* start a frag cache entry */ + int i, newest = -1; + u32 curr_ticks = currticks(), newest_ticks = 0; + u32 timeout = ticks_per_sec() * NET80211_FRAG_TIMEOUT; + + for ( i = 0; i < NET80211_NR_CONCURRENT_FRAGS; i++ ) { + if ( dev->frags[i].in_use == 0 ) + break; + + if ( dev->frags[i].start_ticks + timeout >= + curr_ticks ) { + net80211_free_frags ( dev, i ); + break; + } + + if ( dev->frags[i].start_ticks > newest_ticks ) { + newest = i; + newest_ticks = dev->frags[i].start_ticks; + } + } + + /* If we're being sent more concurrent fragmented + packets than we can handle, drop the newest so the + older ones have time to complete. */ + if ( i == NET80211_NR_CONCURRENT_FRAGS ) { + i = newest; + net80211_free_frags ( dev, i ); + } + + dev->frags[i].in_use = 1; + dev->frags[i].seqnr = IEEE80211_SEQNR ( hdr->seq ); + dev->frags[i].start_ticks = currticks(); + dev->frags[i].iob[0] = iob; + return; + } else { + int i; + for ( i = 0; i < NET80211_NR_CONCURRENT_FRAGS; i++ ) { + if ( dev->frags[i].in_use && dev->frags[i].seqnr == + IEEE80211_SEQNR ( hdr->seq ) ) + break; + } + if ( i == NET80211_NR_CONCURRENT_FRAGS ) { + /* Drop non-first not-in-cache fragments */ + DBGC ( dev, "802.11 %p dropped fragment fc=%04x " + "seq=%04x\n", dev, hdr->fc, hdr->seq ); + free_iob ( iob ); + return; + } + + dev->frags[i].iob[fragnr] = iob; + + if ( ! ( hdr->fc & IEEE80211_FC_MORE_FRAG ) ) { + int j, size = 0; + for ( j = 0; j < fragnr; j++ ) { + size += iob_len ( dev->frags[i].iob[j] ); + if ( dev->frags[i].iob[j] == NULL ) + break; + } + if ( j == fragnr ) { + /* We've got everything */ + struct io_buffer *niob = + net80211_accum_frags ( dev, i, fragnr, + size ); + net80211_free_frags ( dev, i ); + net80211_rx ( dev, niob, signal, 0 ); + } else { + DBGC ( dev, "802.11 %p dropping fragmented " + "packet due to out-of-order arrival, " + "fc=%04x seq=%04x\n", dev, hdr->fc, + hdr->seq ); + net80211_free_frags ( dev, i ); + } + } + } +} + +/** + * Handle receipt of 802.11 frame + * + * @v dev 802.11 device + * @v iob I/O buffer + * @v signal Received signal strength + * @v rate Bitrate at which frame was received, in 100 kbps units + * + * If the rate or signal is unknown, 0 should be passed. + */ +void net80211_rx ( struct net80211_device *dev, struct io_buffer *iob, + int signal, u16 rate ) +{ + struct ieee80211_frame *hdr = iob->data; + u16 type = hdr->fc & IEEE80211_FC_TYPE; + if ( ( hdr->fc & IEEE80211_FC_VERSION ) != IEEE80211_THIS_VERSION ) + goto drop; /* drop invalid-version packets */ + + if ( type == IEEE80211_TYPE_CTRL ) + goto drop; /* we don't handle control packets, + the hardware does */ + + if ( dev->last_rx_seq == hdr->seq ) + goto drop; /* avoid duplicate packet */ + dev->last_rx_seq = hdr->seq; + + if ( dev->hw->flags & NET80211_HW_RX_HAS_FCS ) { + /* discard the FCS */ + iob_unput ( iob, 4 ); + } + + /* Only decrypt packets from our BSSID, to avoid spurious errors */ + if ( ( hdr->fc & IEEE80211_FC_PROTECTED ) && + ! memcmp ( hdr->addr2, dev->bssid, ETH_ALEN ) ) { + /* Decrypt packet; record and drop if it fails */ + struct io_buffer *niob; + struct net80211_crypto *crypto = dev->crypto; + + if ( ! dev->crypto ) { + DBGC ( dev, "802.11 %p cannot decrypt packet " + "without a cryptosystem\n", dev ); + goto drop_crypt; + } + + if ( ( hdr->addr1[0] & 1 ) && dev->gcrypto ) { + /* Use group decryption if needed */ + crypto = dev->gcrypto; + } + + niob = crypto->decrypt ( crypto, iob ); + if ( ! niob ) { + DBGC ( dev, "802.11 %p decryption error\n", dev ); + goto drop_crypt; + } + free_iob ( iob ); + iob = niob; + hdr = iob->data; + } + + dev->last_signal = signal; + + /* Fragments go into the frag cache or get dropped. */ + if ( IEEE80211_FRAG ( hdr->seq ) != 0 + || ( hdr->fc & IEEE80211_FC_MORE_FRAG ) ) { + net80211_rx_frag ( dev, iob, signal ); + return; + } + + /* Management frames get handled, enqueued, or dropped. */ + if ( type == IEEE80211_TYPE_MGMT ) { + net80211_handle_mgmt ( dev, iob, signal ); + return; + } + + /* Data frames get dropped or sent to the net_device. */ + if ( ( hdr->fc & IEEE80211_FC_SUBTYPE ) != IEEE80211_STYPE_DATA ) + goto drop; /* drop QoS, CFP, or null data packets */ + + /* Update rate-control algorithm */ + if ( dev->rctl ) + rc80211_update_rx ( dev, hdr->fc & IEEE80211_FC_RETRY, rate ); + + /* Pass packet onward */ + if ( dev->state & NET80211_ASSOCIATED ) { + netdev_rx ( dev->netdev, iob ); + return; + } + + /* No association? Drop it. */ + goto drop; + + drop_crypt: + netdev_rx_err ( dev->netdev, NULL, EINVAL_CRYPTO_REQUEST ); + drop: + DBGC2 ( dev, "802.11 %p dropped packet fc=%04x seq=%04x\n", dev, + hdr->fc, hdr->seq ); + free_iob ( iob ); + return; +} + +/** Indicate an error in receiving a packet + * + * @v dev 802.11 device + * @v iob I/O buffer with received packet, or NULL + * @v rc Error code + * + * This logs the error with the wrapping net_device, and frees iob if + * it is passed. + */ +void net80211_rx_err ( struct net80211_device *dev, + struct io_buffer *iob, int rc ) +{ + netdev_rx_err ( dev->netdev, iob, rc ); +} + +/** Indicate the completed transmission of a packet + * + * @v dev 802.11 device + * @v iob I/O buffer of transmitted packet + * @v retries Number of times this packet was retransmitted + * @v rc Error code, or 0 for success + * + * This logs an error with the wrapping net_device if one occurred, + * and removes and frees the I/O buffer from its TX queue. The + * provided retry information is used to tune our transmission rate. + * + * If the packet did not need to be retransmitted because it was + * properly ACKed the first time, @a retries should be 0. + */ +void net80211_tx_complete ( struct net80211_device *dev, + struct io_buffer *iob, int retries, int rc ) +{ + /* Update rate-control algorithm */ + if ( dev->rctl ) + rc80211_update_tx ( dev, retries, rc ); + + /* Pass completion onward */ + netdev_tx_complete_err ( dev->netdev, iob, rc ); +} + +/** Common 802.11 errors */ +struct errortab common_wireless_errors[] __errortab = { + __einfo_errortab ( EINFO_EINVAL_CRYPTO_REQUEST ), + __einfo_errortab ( EINFO_ECONNRESET_UNSPECIFIED ), + __einfo_errortab ( EINFO_ECONNRESET_INACTIVITY ), + __einfo_errortab ( EINFO_ECONNRESET_4WAY_TIMEOUT ), + __einfo_errortab ( EINFO_ECONNRESET_8021X_FAILURE ), + __einfo_errortab ( EINFO_ECONNREFUSED_FAILURE ), + __einfo_errortab ( EINFO_ECONNREFUSED_ASSOC_DENIED ), + __einfo_errortab ( EINFO_ECONNREFUSED_AUTH_ALGO_UNSUPP ), +}; diff --git a/qemu/roms/ipxe/src/net/80211/rc80211.c b/qemu/roms/ipxe/src/net/80211/rc80211.c new file mode 100644 index 000000000..eea3bc908 --- /dev/null +++ b/qemu/roms/ipxe/src/net/80211/rc80211.c @@ -0,0 +1,372 @@ +/* + * Simple 802.11 rate-control algorithm for iPXE. + * + * Copyright (c) 2009 Joshua Oreman <oremanj@rwcr.net>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdlib.h> +#include <ipxe/net80211.h> + +/** + * @file + * + * Simple 802.11 rate-control algorithm + */ + +/** @page rc80211 Rate control philosophy + * + * We want to maximize our transmission speed, to the extent that we + * can do that without dropping undue numbers of packets. We also + * don't want to take up very much code space, so our algorithm has to + * be pretty simple + * + * When we receive a packet, we know what rate it was transmitted at, + * and whether it had to be retransmitted to get to us. + * + * When we send a packet, we hear back how many times it had to be + * retried to get through, and whether it got through at all. + * + * Indications of TX success are more reliable than RX success, but RX + * information helps us know where to start. + * + * To handle all of this, we keep for each rate and each direction (TX + * and RX separately) some state information for the most recent + * packets on that rate and the number of packets for which we have + * information. The state is a 32-bit unsigned integer in which two + * bits represent a packet: 11 if it went through well, 10 if it went + * through with one retry, 01 if it went through with more than one + * retry, or 00 if it didn't go through at all. We define the + * "goodness" for a particular (rate, direction) combination as the + * sum of all the 2-bit fields, times 33, divided by the number of + * 2-bit fields containing valid information (16 except when we're + * starting out). The number produced is between 0 and 99; we use -1 + * for rates with less than 4 RX packets or 1 TX, as an indicator that + * we do not have enough information to rely on them. + * + * In deciding which rates are best, we find the weighted average of + * TX and RX goodness, where the weighting is by number of packets + * with data and TX packets are worth 4 times as much as RX packets. + * The weighted average is called "net goodness" and is also a number + * between 0 and 99. If 3 consecutive packets fail transmission + * outright, we automatically ratchet down the rate; otherwise, we + * switch to the best rate whenever the current rate's goodness falls + * below some threshold, and try increasing our rate when the goodness + * is very high. + * + * This system is optimized for iPXE's style of usage. Because normal + * operation always involves receiving something, we'll make our way + * to the best rate pretty quickly. We tend to follow the lead of the + * sending AP in choosing rates, but we won't use rates for long that + * don't work well for us in transmission. We assume iPXE won't be + * running for long enough that rate patterns will change much, so we + * don't have to keep time counters or the like. And if this doesn't + * work well in practice there are many ways it could be tweaked. + * + * To avoid staying at 1Mbps for a long time, we don't track any + * transmitted packets until we've set our rate based on received + * packets. + */ + +/** Two-bit packet status indicator for a packet with no retries */ +#define RC_PKT_OK 0x3 + +/** Two-bit packet status indicator for a packet with one retry */ +#define RC_PKT_RETRIED_ONCE 0x2 + +/** Two-bit packet status indicator for a TX packet with multiple retries + * + * It is not possible to tell whether an RX packet had one or multiple + * retries; we rely instead on the fact that failed RX packets won't + * get to us at all, so if we receive a lot of RX packets on a certain + * rate it must be pretty good. + */ +#define RC_PKT_RETRIED_MULTI 0x1 + +/** Two-bit packet status indicator for a TX packet that was never ACKed + * + * It is not possible to tell whether an RX packet was setn if it + * didn't get through to us, but if we don't see one we won't increase + * the goodness for its rate. This asymmetry is part of why TX packets + * are weighted much more heavily than RX. + */ +#define RC_PKT_FAILED 0x0 + +/** Number of times to weight TX packets more heavily than RX packets */ +#define RC_TX_FACTOR 4 + +/** Number of consecutive failed TX packets that cause an automatic rate drop */ +#define RC_TX_EMERG_FAIL 3 + +/** Minimum net goodness below which we will search for a better rate */ +#define RC_GOODNESS_MIN 85 + +/** Maximum net goodness above which we will try to increase our rate */ +#define RC_GOODNESS_MAX 95 + +/** Minimum (num RX + @c RC_TX_FACTOR * num TX) to use a certain rate */ +#define RC_UNCERTAINTY_THRESH 4 + +/** TX direction */ +#define TX 0 + +/** RX direction */ +#define RX 1 + +/** A rate control context */ +struct rc80211_ctx +{ + /** Goodness state for each rate, TX and RX */ + u32 goodness[2][NET80211_MAX_RATES]; + + /** Number of packets recorded for each rate */ + u8 count[2][NET80211_MAX_RATES]; + + /** Indication of whether we've set the device rate yet */ + int started; + + /** Counter of all packets sent and received */ + int packets; +}; + +/** + * Initialize rate-control algorithm + * + * @v dev 802.11 device + * @ret ctx Rate-control context, to be stored in @c dev->rctl + */ +struct rc80211_ctx * rc80211_init ( struct net80211_device *dev __unused ) +{ + struct rc80211_ctx *ret = zalloc ( sizeof ( *ret ) ); + return ret; +} + +/** + * Calculate net goodness for a certain rate + * + * @v ctx Rate-control context + * @v rate_idx Index of rate to calculate net goodness for + */ +static int rc80211_calc_net_goodness ( struct rc80211_ctx *ctx, + int rate_idx ) +{ + int sum[2], num[2], dir, pkt; + + for ( dir = 0; dir < 2; dir++ ) { + u32 good = ctx->goodness[dir][rate_idx]; + + num[dir] = ctx->count[dir][rate_idx]; + sum[dir] = 0; + + for ( pkt = 0; pkt < num[dir]; pkt++ ) + sum[dir] += ( good >> ( 2 * pkt ) ) & 0x3; + } + + if ( ( num[TX] * RC_TX_FACTOR + num[RX] ) < RC_UNCERTAINTY_THRESH ) + return -1; + + return ( 33 * ( sum[TX] * RC_TX_FACTOR + sum[RX] ) / + ( num[TX] * RC_TX_FACTOR + num[RX] ) ); +} + +/** + * Determine the best rate to switch to and return it + * + * @v dev 802.11 device + * @ret rate_idx Index of the best rate to switch to + */ +static int rc80211_pick_best ( struct net80211_device *dev ) +{ + struct rc80211_ctx *ctx = dev->rctl; + int best_net_good = 0, best_rate = -1, i; + + for ( i = 0; i < dev->nr_rates; i++ ) { + int net_good = rc80211_calc_net_goodness ( ctx, i ); + + if ( net_good > best_net_good || + ( best_net_good > RC_GOODNESS_MIN && + net_good > RC_GOODNESS_MIN ) ) { + best_net_good = net_good; + best_rate = i; + } + } + + if ( best_rate >= 0 ) { + int old_good = rc80211_calc_net_goodness ( ctx, dev->rate ); + if ( old_good != best_net_good ) + DBGC ( ctx, "802.11 RC %p switching from goodness " + "%d to %d\n", ctx, old_good, best_net_good ); + + ctx->started = 1; + return best_rate; + } + + return dev->rate; +} + +/** + * Set 802.11 device rate + * + * @v dev 802.11 device + * @v rate_idx Index of rate to switch to + * + * This is a thin wrapper around net80211_set_rate_idx to insert a + * debugging message where appropriate. + */ +static inline void rc80211_set_rate ( struct net80211_device *dev, + int rate_idx ) +{ + DBGC ( dev->rctl, "802.11 RC %p changing rate %d->%d Mbps\n", dev->rctl, + dev->rates[dev->rate] / 10, dev->rates[rate_idx] / 10 ); + + net80211_set_rate_idx ( dev, rate_idx ); +} + +/** + * Check rate-control state and change rate if necessary + * + * @v dev 802.11 device + */ +static void rc80211_maybe_set_new ( struct net80211_device *dev ) +{ + struct rc80211_ctx *ctx = dev->rctl; + int net_good; + + net_good = rc80211_calc_net_goodness ( ctx, dev->rate ); + + if ( ! ctx->started ) { + rc80211_set_rate ( dev, rc80211_pick_best ( dev ) ); + return; + } + + if ( net_good < 0 ) /* insufficient data */ + return; + + if ( net_good > RC_GOODNESS_MAX && dev->rate + 1 < dev->nr_rates ) { + int higher = rc80211_calc_net_goodness ( ctx, dev->rate + 1 ); + if ( higher > net_good || higher < 0 ) + rc80211_set_rate ( dev, dev->rate + 1 ); + else + rc80211_set_rate ( dev, rc80211_pick_best ( dev ) ); + } + + if ( net_good < RC_GOODNESS_MIN ) { + rc80211_set_rate ( dev, rc80211_pick_best ( dev ) ); + } +} + +/** + * Update rate-control state + * + * @v dev 802.11 device + * @v direction One of the direction constants TX or RX + * @v rate_idx Index of rate at which packet was sent or received + * @v retries Number of times packet was retried before success + * @v failed If nonzero, the packet failed to get through + */ +static void rc80211_update ( struct net80211_device *dev, int direction, + int rate_idx, int retries, int failed ) +{ + struct rc80211_ctx *ctx = dev->rctl; + u32 goodness = ctx->goodness[direction][rate_idx]; + + if ( ctx->count[direction][rate_idx] < 16 ) + ctx->count[direction][rate_idx]++; + + goodness <<= 2; + if ( failed ) + goodness |= RC_PKT_FAILED; + else if ( retries > 1 ) + goodness |= RC_PKT_RETRIED_MULTI; + else if ( retries ) + goodness |= RC_PKT_RETRIED_ONCE; + else + goodness |= RC_PKT_OK; + + ctx->goodness[direction][rate_idx] = goodness; + + ctx->packets++; + + rc80211_maybe_set_new ( dev ); +} + +/** + * Update rate-control state for transmitted packet + * + * @v dev 802.11 device + * @v retries Number of times packet was transmitted before success + * @v rc Return status code for transmission + */ +void rc80211_update_tx ( struct net80211_device *dev, int retries, int rc ) +{ + struct rc80211_ctx *ctx = dev->rctl; + + if ( ! ctx->started ) + return; + + rc80211_update ( dev, TX, dev->rate, retries, rc ); + + /* Check if the last RC_TX_EMERG_FAIL packets have all failed */ + if ( ! ( ctx->goodness[TX][dev->rate] & + ( ( 1 << ( 2 * RC_TX_EMERG_FAIL ) ) - 1 ) ) ) { + if ( dev->rate == 0 ) + DBGC ( dev->rctl, "802.11 RC %p saw %d consecutive " + "failed TX, but cannot lower rate any further\n", + dev->rctl, RC_TX_EMERG_FAIL ); + else { + DBGC ( dev->rctl, "802.11 RC %p lowering rate (%d->%d " + "Mbps) due to %d consecutive TX failures\n", + dev->rctl, dev->rates[dev->rate] / 10, + dev->rates[dev->rate - 1] / 10, + RC_TX_EMERG_FAIL ); + + rc80211_set_rate ( dev, dev->rate - 1 ); + } + } +} + +/** + * Update rate-control state for received packet + * + * @v dev 802.11 device + * @v retry Whether the received packet had been retransmitted + * @v rate Rate at which packet was received, in 100 kbps units + */ +void rc80211_update_rx ( struct net80211_device *dev, int retry, u16 rate ) +{ + int ridx; + + for ( ridx = 0; ridx < dev->nr_rates && dev->rates[ridx] != rate; + ridx++ ) + ; + if ( ridx >= dev->nr_rates ) + return; /* couldn't find the rate */ + + rc80211_update ( dev, RX, ridx, retry, 0 ); +} + +/** + * Free rate-control context + * + * @v ctx Rate-control context + */ +void rc80211_free ( struct rc80211_ctx *ctx ) +{ + free ( ctx ); +} diff --git a/qemu/roms/ipxe/src/net/80211/sec80211.c b/qemu/roms/ipxe/src/net/80211/sec80211.c new file mode 100644 index 000000000..d1bc75e90 --- /dev/null +++ b/qemu/roms/ipxe/src/net/80211/sec80211.c @@ -0,0 +1,518 @@ +/* + * Copyright (c) 2009 Joshua Oreman <oremanj@rwcr.net>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <ipxe/ieee80211.h> +#include <ipxe/net80211.h> +#include <ipxe/sec80211.h> + +/** @file + * + * General secured-network routines required whenever any secure + * network support at all is compiled in. This involves things like + * installing keys, determining the type of security used by a probed + * network, and some small helper functions that take advantage of + * static data in this file. + */ + +/* Unsupported cryptosystem error numbers */ +#define ENOTSUP_WEP __einfo_error ( EINFO_ENOTSUP_WEP ) +#define EINFO_ENOTSUP_WEP __einfo_uniqify ( EINFO_ENOTSUP, \ + ( 0x10 | NET80211_CRYPT_WEP ), "WEP not supported" ) +#define ENOTSUP_TKIP __einfo_error ( EINFO_ENOTSUP_TKIP ) +#define EINFO_ENOTSUP_TKIP __einfo_uniqify ( EINFO_ENOTSUP, \ + ( 0x10 | NET80211_CRYPT_TKIP ), "TKIP not supported" ) +#define ENOTSUP_CCMP __einfo_error ( EINFO_ENOTSUP_CCMP ) +#define EINFO_ENOTSUP_CCMP __einfo_uniqify ( EINFO_ENOTSUP, \ + ( 0x10 | NET80211_CRYPT_CCMP ), "CCMP not supported" ) +#define ENOTSUP_CRYPT( crypt ) \ + EUNIQ ( EINFO_ENOTSUP, ( 0x10 | (crypt) ), \ + ENOTSUP_WEP, ENOTSUP_TKIP, ENOTSUP_CCMP ) + +/** Mapping from net80211 crypto/secprot types to RSN OUI descriptors */ +struct descriptor_map { + /** Value of net80211_crypto_alg or net80211_security_proto */ + u32 net80211_type; + + /** OUI+type in appropriate byte order, masked to exclude vendor */ + u32 oui_type; +}; + +/** Magic number in @a oui_type showing end of list */ +#define END_MAGIC 0xFFFFFFFF + +/** Mapping between net80211 cryptosystems and 802.11i cipher IDs */ +static struct descriptor_map rsn_cipher_map[] = { + { .net80211_type = NET80211_CRYPT_WEP, + .oui_type = IEEE80211_RSN_CTYPE_WEP40 }, + + { .net80211_type = NET80211_CRYPT_WEP, + .oui_type = IEEE80211_RSN_CTYPE_WEP104 }, + + { .net80211_type = NET80211_CRYPT_TKIP, + .oui_type = IEEE80211_RSN_CTYPE_TKIP }, + + { .net80211_type = NET80211_CRYPT_CCMP, + .oui_type = IEEE80211_RSN_CTYPE_CCMP }, + + { .net80211_type = NET80211_CRYPT_UNKNOWN, + .oui_type = END_MAGIC }, +}; + +/** Mapping between net80211 handshakers and 802.11i AKM IDs */ +static struct descriptor_map rsn_akm_map[] = { + { .net80211_type = NET80211_SECPROT_EAP, + .oui_type = IEEE80211_RSN_ATYPE_8021X }, + + { .net80211_type = NET80211_SECPROT_PSK, + .oui_type = IEEE80211_RSN_ATYPE_PSK }, + + { .net80211_type = NET80211_SECPROT_UNKNOWN, + .oui_type = END_MAGIC }, +}; + + +/** + * Install 802.11 cryptosystem + * + * @v which Pointer to the cryptosystem structure to install in + * @v crypt Cryptosystem ID number + * @v key Encryption key to use + * @v len Length of encryption key + * @v rsc Initial receive sequence counter, if applicable + * @ret rc Return status code + * + * The encryption key will not be accessed via the provided pointer + * after this function returns, so you may keep it on the stack. + * + * @a which must point to either @c dev->crypto (for the normal case + * of installing a unicast cryptosystem) or @c dev->gcrypto (to + * install a cryptosystem that will be used only for decrypting + * group-source frames). + */ +int sec80211_install ( struct net80211_crypto **which, + enum net80211_crypto_alg crypt, + const void *key, int len, const void *rsc ) +{ + struct net80211_crypto *crypto = *which; + struct net80211_crypto *tbl_crypto; + + /* Remove old crypto if it exists */ + free ( *which ); + *which = NULL; + + if ( crypt == NET80211_CRYPT_NONE ) { + DBG ( "802.11-Sec not installing null cryptography\n" ); + return 0; + } + + /* Find cryptosystem to use */ + for_each_table_entry ( tbl_crypto, NET80211_CRYPTOS ) { + if ( tbl_crypto->algorithm == crypt ) { + crypto = zalloc ( sizeof ( *crypto ) + + tbl_crypto->priv_len ); + if ( ! crypto ) { + DBG ( "802.11-Sec out of memory\n" ); + return -ENOMEM; + } + + memcpy ( crypto, tbl_crypto, sizeof ( *crypto ) ); + crypto->priv = ( ( void * ) crypto + + sizeof ( *crypto ) ); + break; + } + } + + if ( ! crypto ) { + DBG ( "802.11-Sec no support for cryptosystem %d\n", crypt ); + return -ENOTSUP_CRYPT ( crypt ); + } + + *which = crypto; + + DBG ( "802.11-Sec installing cryptosystem %d as %p with key of " + "length %d\n", crypt, crypto, len ); + + return crypto->init ( crypto, key, len, rsc ); +} + + +/** + * Determine net80211 crypto or handshaking type value to return for RSN info + * + * @v rsnp Pointer to next descriptor count field in RSN IE + * @v rsn_end Pointer to end of RSN IE + * @v map Descriptor map to use + * @v tbl_start Start of linker table to examine for iPXE support + * @v tbl_end End of linker table to examine for iPXE support + * @ret rsnp Updated to point to first byte after descriptors + * @ret map_ent Descriptor map entry of translation to use + * + * The entries in the linker table must be either net80211_crypto or + * net80211_handshaker structures, and @a tbl_stride must be set to + * sizeof() the appropriate one. + * + * This function expects @a rsnp to point at a two-byte descriptor + * count followed by a list of four-byte cipher or AKM descriptors; it + * will return @c NULL if the input packet is malformed, and otherwise + * set @a rsnp to the first byte it has not looked at. It will return + * the first cipher in the list that is supported by the current build + * of iPXE, or the first of all if none are supported. + * + * We play rather fast and loose with type checking, because this + * function is only called from two well-defined places in the + * RSN-checking code. Don't try to use it for anything else. + */ +static struct descriptor_map * rsn_pick_desc ( u8 **rsnp, u8 *rsn_end, + struct descriptor_map *map, + void *tbl_start, void *tbl_end ) +{ + int ndesc; + int ok = 0; + struct descriptor_map *map_ent, *map_ret = NULL; + u8 *rsn = *rsnp; + void *tblp; + size_t tbl_stride = ( map == rsn_cipher_map ? + sizeof ( struct net80211_crypto ) : + sizeof ( struct net80211_handshaker ) ); + + if ( map != rsn_cipher_map && map != rsn_akm_map ) + return NULL; + + /* Determine which types we support */ + for ( tblp = tbl_start; tblp < tbl_end; tblp += tbl_stride ) { + struct net80211_crypto *crypto = tblp; + struct net80211_handshaker *hs = tblp; + + if ( map == rsn_cipher_map ) + ok |= ( 1 << crypto->algorithm ); + else + ok |= ( 1 << hs->protocol ); + } + + /* RSN sanity checks */ + if ( rsn + 2 > rsn_end ) { + DBG ( "RSN detect: malformed descriptor count\n" ); + return NULL; + } + + ndesc = *( u16 * ) rsn; + rsn += 2; + + if ( ! ndesc ) { + DBG ( "RSN detect: no descriptors\n" ); + return NULL; + } + + /* Determine which net80211 crypto types are listed */ + while ( ndesc-- ) { + u32 desc; + + if ( rsn + 4 > rsn_end ) { + DBG ( "RSN detect: malformed descriptor (%d left)\n", + ndesc ); + return NULL; + } + + desc = *( u32 * ) rsn; + rsn += 4; + + for ( map_ent = map; map_ent->oui_type != END_MAGIC; map_ent++ ) + if ( map_ent->oui_type == ( desc & OUI_TYPE_MASK ) ) + break; + + /* Use first cipher as a fallback */ + if ( ! map_ret ) + map_ret = map_ent; + + /* Once we find one we support, use it */ + if ( ok & ( 1 << map_ent->net80211_type ) ) { + map_ret = map_ent; + break; + } + } + + if ( ndesc > 0 ) + rsn += 4 * ndesc; + + *rsnp = rsn; + return map_ret; +} + + +/** + * Find the RSN or WPA information element in the provided beacon frame + * + * @v ie Pointer to first information element to check + * @v ie_end Pointer to end of information element space + * @ret is_rsn TRUE if returned IE is RSN, FALSE if it's WPA + * @ret end Pointer to byte immediately after last byte of data + * @ret data Pointer to first byte of data (the `version' field) + * + * If both an RSN and a WPA information element are found, this + * function will return the first one seen, which by ordering rules + * should always prefer the newer RSN IE. + * + * If no RSN or WPA infomration element is found, returns @c NULL and + * leaves @a is_rsn and @a end in an undefined state. + * + * This function will not return a pointer to an information element + * that states it extends past the tail of the io_buffer, or whose @a + * version field is incorrect. + */ +u8 * sec80211_find_rsn ( union ieee80211_ie *ie, void *ie_end, + int *is_rsn, u8 **end ) +{ + u8 *rsn = NULL; + + if ( ! ieee80211_ie_bound ( ie, ie_end ) ) + return NULL; + + while ( ie ) { + if ( ie->id == IEEE80211_IE_VENDOR && + ie->vendor.oui == IEEE80211_WPA_OUI_VEN ) { + DBG ( "RSN detect: old-style WPA IE found\n" ); + rsn = &ie->vendor.data[0]; + *end = rsn + ie->len - 4; + *is_rsn = 0; + } else if ( ie->id == IEEE80211_IE_RSN ) { + DBG ( "RSN detect: 802.11i RSN IE found\n" ); + rsn = ( u8 * ) &ie->rsn.version; + *end = rsn + ie->len; + *is_rsn = 1; + } + + if ( rsn && ( *end > ( u8 * ) ie_end || rsn >= *end || + *( u16 * ) rsn != IEEE80211_RSN_VERSION ) ) { + DBG ( "RSN detect: malformed RSN IE or unknown " + "version, keep trying\n" ); + rsn = NULL; + } + + if ( rsn ) + break; + + ie = ieee80211_next_ie ( ie, ie_end ); + } + + if ( ! ie ) { + DBG ( "RSN detect: no RSN IE found\n" ); + return NULL; + } + + return rsn; +} + + +/** + * Detect crypto and AKM types from RSN information element + * + * @v is_rsn If TRUE, IE is a new-style RSN information element + * @v start Pointer to first byte of @a version field + * @v end Pointer to first byte not in the RSN IE + * @ret secprot Security handshaking protocol used by network + * @ret crypt Cryptosystem used by network + * @ret rc Return status code + * + * If the IE cannot be parsed, returns an error indication and leaves + * @a secprot and @a crypt unchanged. + */ +int sec80211_detect_ie ( int is_rsn, u8 *start, u8 *end, + enum net80211_security_proto *secprot, + enum net80211_crypto_alg *crypt ) +{ + enum net80211_security_proto sp; + enum net80211_crypto_alg cr; + struct descriptor_map *map; + u8 *rsn = start; + + /* Set some defaults */ + cr = ( is_rsn ? NET80211_CRYPT_CCMP : NET80211_CRYPT_TKIP ); + sp = NET80211_SECPROT_EAP; + + rsn += 2; /* version - already checked */ + rsn += 4; /* group cipher - we don't use it here */ + + if ( rsn >= end ) + goto done; + + /* Pick crypto algorithm */ + map = rsn_pick_desc ( &rsn, end, rsn_cipher_map, + table_start ( NET80211_CRYPTOS ), + table_end ( NET80211_CRYPTOS ) ); + if ( ! map ) + goto invalid_rsn; + + cr = map->net80211_type; + + if ( rsn >= end ) + goto done; + + /* Pick handshaking algorithm */ + map = rsn_pick_desc ( &rsn, end, rsn_akm_map, + table_start ( NET80211_HANDSHAKERS ), + table_end ( NET80211_HANDSHAKERS ) ); + if ( ! map ) + goto invalid_rsn; + + sp = map->net80211_type; + + done: + DBG ( "RSN detect: OK, crypto type %d, secprot type %d\n", cr, sp ); + *secprot = sp; + *crypt = cr; + return 0; + + invalid_rsn: + DBG ( "RSN detect: invalid RSN IE\n" ); + return -EINVAL; +} + + +/** + * Detect the cryptosystem and handshaking protocol used by an 802.11 network + * + * @v iob I/O buffer containing beacon frame + * @ret secprot Security handshaking protocol used by network + * @ret crypt Cryptosystem used by network + * @ret rc Return status code + * + * This function uses weak linkage, as it must be called from generic + * contexts but should only be linked in if some encryption is + * supported; you must test its address against @c NULL before calling + * it. If it does not exist, any network with the PRIVACY bit set in + * beacon->capab should be considered unknown. + */ +int sec80211_detect ( struct io_buffer *iob, + enum net80211_security_proto *secprot, + enum net80211_crypto_alg *crypt ) +{ + struct ieee80211_frame *hdr = iob->data; + struct ieee80211_beacon *beacon = + ( struct ieee80211_beacon * ) hdr->data; + u8 *rsn, *rsn_end; + int is_rsn, rc; + + *crypt = NET80211_CRYPT_UNKNOWN; + *secprot = NET80211_SECPROT_UNKNOWN; + + /* Find RSN or WPA IE */ + if ( ! ( rsn = sec80211_find_rsn ( beacon->info_element, iob->tail, + &is_rsn, &rsn_end ) ) ) { + /* No security IE at all; either WEP or no security. */ + *secprot = NET80211_SECPROT_NONE; + + if ( beacon->capability & IEEE80211_CAPAB_PRIVACY ) + *crypt = NET80211_CRYPT_WEP; + else + *crypt = NET80211_CRYPT_NONE; + + return 0; + } + + /* Determine type of security */ + if ( ( rc = sec80211_detect_ie ( is_rsn, rsn, rsn_end, secprot, + crypt ) ) == 0 ) + return 0; + + /* If we get here, the RSN IE was invalid */ + + *crypt = NET80211_CRYPT_UNKNOWN; + *secprot = NET80211_SECPROT_UNKNOWN; + DBG ( "Failed to handle RSN IE:\n" ); + DBG_HD ( rsn, rsn_end - rsn ); + return rc; +} + + +/** + * Determine RSN descriptor for specified net80211 ID + * + * @v id net80211 ID value + * @v rsnie Whether to return a new-format (RSN IE) descriptor + * @v map Map to use in translation + * @ret desc RSN descriptor, or 0 on error + * + * If @a rsnie is false, returns an old-format (WPA vendor IE) + * descriptor. + */ +static u32 rsn_get_desc ( unsigned id, int rsnie, struct descriptor_map *map ) +{ + u32 vendor = ( rsnie ? IEEE80211_RSN_OUI : IEEE80211_WPA_OUI ); + + for ( ; map->oui_type != END_MAGIC; map++ ) { + if ( map->net80211_type == id ) + return map->oui_type | vendor; + } + + return 0; +} + +/** + * Determine RSN descriptor for specified net80211 cryptosystem number + * + * @v crypt Cryptosystem number + * @v rsnie Whether to return a new-format (RSN IE) descriptor + * @ret desc RSN descriptor + * + * If @a rsnie is false, returns an old-format (WPA vendor IE) + * descriptor. + */ +u32 sec80211_rsn_get_crypto_desc ( enum net80211_crypto_alg crypt, int rsnie ) +{ + return rsn_get_desc ( crypt, rsnie, rsn_cipher_map ); +} + +/** + * Determine RSN descriptor for specified net80211 handshaker number + * + * @v secprot Handshaker number + * @v rsnie Whether to return a new-format (RSN IE) descriptor + * @ret desc RSN descriptor + * + * If @a rsnie is false, returns an old-format (WPA vendor IE) + * descriptor. + */ +u32 sec80211_rsn_get_akm_desc ( enum net80211_security_proto secprot, + int rsnie ) +{ + return rsn_get_desc ( secprot, rsnie, rsn_akm_map ); +} + +/** + * Determine net80211 cryptosystem number from RSN descriptor + * + * @v desc RSN descriptor + * @ret crypt net80211 cryptosystem enumeration value + */ +enum net80211_crypto_alg sec80211_rsn_get_net80211_crypt ( u32 desc ) +{ + struct descriptor_map *map = rsn_cipher_map; + + for ( ; map->oui_type != END_MAGIC; map++ ) { + if ( map->oui_type == ( desc & OUI_TYPE_MASK ) ) + break; + } + + return map->net80211_type; +} diff --git a/qemu/roms/ipxe/src/net/80211/wep.c b/qemu/roms/ipxe/src/net/80211/wep.c new file mode 100644 index 000000000..e22ac8998 --- /dev/null +++ b/qemu/roms/ipxe/src/net/80211/wep.c @@ -0,0 +1,304 @@ +/* + * Copyright (c) 2009 Joshua Oreman <oremanj@rwcr.net>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <ipxe/net80211.h> +#include <ipxe/sec80211.h> +#include <ipxe/crypto.h> +#include <ipxe/arc4.h> +#include <ipxe/crc32.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +/** @file + * + * The WEP wireless encryption method (insecure!) + * + * The data field in a WEP-encrypted packet contains a 3-byte + * initialisation vector, one-byte Key ID field (only the bottom two + * bits are ever used), encrypted data, and a 4-byte encrypted CRC of + * the plaintext data, called the ICV. To decrypt it, the IV is + * prepended to the shared key and the data stream (including ICV) is + * run through the ARC4 stream cipher; if the ICV matches a CRC32 + * calculated on the plaintext, the packet is valid. + * + * For efficiency and code-size reasons, this file assumes it is + * running on a little-endian machine. + */ + +/** Length of WEP initialisation vector */ +#define WEP_IV_LEN 3 + +/** Length of WEP key ID byte */ +#define WEP_KID_LEN 1 + +/** Length of WEP ICV checksum */ +#define WEP_ICV_LEN 4 + +/** Maximum length of WEP key */ +#define WEP_MAX_KEY 16 + +/** Amount of data placed before the encrypted bytes */ +#define WEP_HEADER_LEN 4 + +/** Amount of data placed after the encrypted bytes */ +#define WEP_TRAILER_LEN 4 + +/** Total WEP overhead bytes */ +#define WEP_OVERHEAD 8 + +/** Context for WEP encryption and decryption */ +struct wep_ctx +{ + /** Encoded WEP key + * + * The actual key bytes are stored beginning at offset 3, to + * leave room for easily inserting the IV before a particular + * operation. + */ + u8 key[WEP_IV_LEN + WEP_MAX_KEY]; + + /** Length of WEP key (not including IV bytes) */ + int keylen; + + /** ARC4 context */ + struct arc4_ctx arc4; +}; + +/** + * Initialize WEP algorithm + * + * @v crypto 802.11 cryptographic algorithm + * @v key WEP key to use + * @v keylen Length of WEP key + * @v rsc Initial receive sequence counter (unused) + * @ret rc Return status code + * + * Standard key lengths are 5 and 13 bytes; 16-byte keys are + * occasionally supported as an extension to the standard. + */ +static int wep_init ( struct net80211_crypto *crypto, const void *key, + int keylen, const void *rsc __unused ) +{ + struct wep_ctx *ctx = crypto->priv; + + ctx->keylen = ( keylen > WEP_MAX_KEY ? WEP_MAX_KEY : keylen ); + memcpy ( ctx->key + WEP_IV_LEN, key, ctx->keylen ); + + return 0; +} + +/** + * Encrypt packet using WEP + * + * @v crypto 802.11 cryptographic algorithm + * @v iob I/O buffer of plaintext packet + * @ret eiob Newly allocated I/O buffer for encrypted packet, or NULL + * + * If memory allocation fails, @c NULL is returned. + */ +static struct io_buffer * wep_encrypt ( struct net80211_crypto *crypto, + struct io_buffer *iob ) +{ + struct wep_ctx *ctx = crypto->priv; + struct io_buffer *eiob; + struct ieee80211_frame *hdr; + const int hdrlen = IEEE80211_TYP_FRAME_HEADER_LEN; + int datalen = iob_len ( iob ) - hdrlen; + int newlen = hdrlen + datalen + WEP_OVERHEAD; + u32 iv, icv; + + eiob = alloc_iob ( newlen ); + if ( ! eiob ) + return NULL; + + memcpy ( iob_put ( eiob, hdrlen ), iob->data, hdrlen ); + hdr = eiob->data; + hdr->fc |= IEEE80211_FC_PROTECTED; + + /* Calculate IV, put it in the header (with key ID byte = 0), and + set it up at the start of the encryption key. */ + iv = random() & 0xffffff; /* IV in bottom 3 bytes, top byte = KID = 0 */ + memcpy ( iob_put ( eiob, WEP_HEADER_LEN ), &iv, WEP_HEADER_LEN ); + memcpy ( ctx->key, &iv, WEP_IV_LEN ); + + /* Encrypt the data using RC4 */ + cipher_setkey ( &arc4_algorithm, &ctx->arc4, ctx->key, + ctx->keylen + WEP_IV_LEN ); + cipher_encrypt ( &arc4_algorithm, &ctx->arc4, iob->data + hdrlen, + iob_put ( eiob, datalen ), datalen ); + + /* Add ICV */ + icv = ~crc32_le ( ~0, iob->data + hdrlen, datalen ); + cipher_encrypt ( &arc4_algorithm, &ctx->arc4, &icv, + iob_put ( eiob, WEP_ICV_LEN ), WEP_ICV_LEN ); + + return eiob; +} + +/** + * Decrypt packet using WEP + * + * @v crypto 802.11 cryptographic algorithm + * @v eiob I/O buffer of encrypted packet + * @ret iob Newly allocated I/O buffer for plaintext packet, or NULL + * + * If a consistency check for the decryption fails (usually indicating + * an invalid key), @c NULL is returned. + */ +static struct io_buffer * wep_decrypt ( struct net80211_crypto *crypto, + struct io_buffer *eiob ) +{ + struct wep_ctx *ctx = crypto->priv; + struct io_buffer *iob; + struct ieee80211_frame *hdr; + const int hdrlen = IEEE80211_TYP_FRAME_HEADER_LEN; + int datalen = iob_len ( eiob ) - hdrlen - WEP_OVERHEAD; + int newlen = hdrlen + datalen; + u32 iv, icv, crc; + + iob = alloc_iob ( newlen ); + if ( ! iob ) + return NULL; + + memcpy ( iob_put ( iob, hdrlen ), eiob->data, hdrlen ); + hdr = iob->data; + hdr->fc &= ~IEEE80211_FC_PROTECTED; + + /* Strip off IV and use it to initialize cryptosystem */ + memcpy ( &iv, eiob->data + hdrlen, 4 ); + iv &= 0xffffff; /* ignore key ID byte */ + memcpy ( ctx->key, &iv, WEP_IV_LEN ); + + /* Decrypt the data using RC4 */ + cipher_setkey ( &arc4_algorithm, &ctx->arc4, ctx->key, + ctx->keylen + WEP_IV_LEN ); + cipher_decrypt ( &arc4_algorithm, &ctx->arc4, eiob->data + hdrlen + + WEP_HEADER_LEN, iob_put ( iob, datalen ), datalen ); + + /* Strip off ICV and verify it */ + cipher_decrypt ( &arc4_algorithm, &ctx->arc4, eiob->data + hdrlen + + WEP_HEADER_LEN + datalen, &icv, WEP_ICV_LEN ); + crc = ~crc32_le ( ~0, iob->data + hdrlen, datalen ); + if ( crc != icv ) { + DBGC ( crypto, "WEP %p CRC mismatch: expect %08x, get %08x\n", + crypto, icv, crc ); + free_iob ( iob ); + return NULL; + } + return iob; +} + +/** WEP cryptosystem for 802.11 */ +struct net80211_crypto wep_crypto __net80211_crypto = { + .algorithm = NET80211_CRYPT_WEP, + .init = wep_init, + .encrypt = wep_encrypt, + .decrypt = wep_decrypt, + .priv_len = sizeof ( struct wep_ctx ), +}; + +/** + * Initialize trivial 802.11 security handshaker + * + * @v dev 802.11 device + * @v ctx Security handshaker + * + * This simply fetches a WEP key from netX/key, and if it exists, + * installs WEP cryptography on the 802.11 device. No real handshaking + * is performed. + */ +static int trivial_init ( struct net80211_device *dev ) +{ + u8 key[WEP_MAX_KEY]; /* support up to 128-bit keys */ + int len; + int rc; + + if ( dev->associating && + dev->associating->crypto == NET80211_CRYPT_NONE ) + return 0; /* no crypto? OK. */ + + len = fetch_raw_setting ( netdev_settings ( dev->netdev ), + &net80211_key_setting, key, WEP_MAX_KEY ); + + if ( len <= 0 ) { + DBGC ( dev, "802.11 %p cannot do WEP without a key\n", dev ); + return -EACCES; + } + + /* Full 128-bit keys are a nonstandard extension, but they're + utterly trivial to support, so we do. */ + if ( len != 5 && len != 13 && len != 16 ) { + DBGC ( dev, "802.11 %p invalid WEP key length %d\n", + dev, len ); + return -EINVAL; + } + + DBGC ( dev, "802.11 %p installing %d-bit WEP\n", dev, len * 8 ); + + rc = sec80211_install ( &dev->crypto, NET80211_CRYPT_WEP, key, len, + NULL ); + if ( rc < 0 ) + return rc; + + return 0; +} + +/** + * Check for key change on trivial 802.11 security handshaker + * + * @v dev 802.11 device + * @v ctx Security handshaker + */ +static int trivial_change_key ( struct net80211_device *dev ) +{ + u8 key[WEP_MAX_KEY]; + int len; + int change = 0; + + /* If going from WEP to clear, or something else to WEP, reassociate. */ + if ( ! dev->crypto || ( dev->crypto->init != wep_init ) ) + change ^= 1; + + len = fetch_raw_setting ( netdev_settings ( dev->netdev ), + &net80211_key_setting, key, WEP_MAX_KEY ); + if ( len <= 0 ) + change ^= 1; + + /* Changing crypto type => return nonzero to reassociate. */ + if ( change ) + return -EINVAL; + + /* Going from no crypto to still no crypto => nothing to do. */ + if ( len <= 0 ) + return 0; + + /* Otherwise, reinitialise WEP with new key. */ + return wep_init ( dev->crypto, key, len, NULL ); +} + +/** Trivial 802.11 security handshaker */ +struct net80211_handshaker trivial_handshaker __net80211_handshaker = { + .protocol = NET80211_SECPROT_NONE, + .init = trivial_init, + .change_key = trivial_change_key, + .priv_len = 0, +}; diff --git a/qemu/roms/ipxe/src/net/80211/wpa.c b/qemu/roms/ipxe/src/net/80211/wpa.c new file mode 100644 index 000000000..e2c4945f9 --- /dev/null +++ b/qemu/roms/ipxe/src/net/80211/wpa.c @@ -0,0 +1,915 @@ +/* + * Copyright (c) 2009 Joshua Oreman <oremanj@rwcr.net>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <ipxe/net80211.h> +#include <ipxe/sec80211.h> +#include <ipxe/wpa.h> +#include <ipxe/eapol.h> +#include <ipxe/crypto.h> +#include <ipxe/arc4.h> +#include <ipxe/crc32.h> +#include <ipxe/sha1.h> +#include <ipxe/hmac.h> +#include <ipxe/list.h> +#include <ipxe/ethernet.h> +#include <ipxe/rbg.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <byteswap.h> + +/** @file + * + * Handler for the aspects of WPA handshaking that are independent of + * 802.1X/PSK or TKIP/CCMP; this mostly involves the 4-Way Handshake. + */ + +/** List of WPA contexts in active use. */ +struct list_head wpa_contexts = LIST_HEAD_INIT ( wpa_contexts ); + + +/** + * Return an error code and deauthenticate + * + * @v ctx WPA common context + * @v rc Return status code + * @ret rc The passed return status code + */ +static int wpa_fail ( struct wpa_common_ctx *ctx, int rc ) +{ + net80211_deauthenticate ( ctx->dev, rc ); + return rc; +} + + +/** + * Find a cryptosystem handler structure from a crypto ID + * + * @v crypt Cryptosystem ID + * @ret crypto Cryptosystem handler structure + * + * If support for @a crypt is not compiled in to iPXE, or if @a crypt + * is NET80211_CRYPT_UNKNOWN, returns @c NULL. + */ +static struct net80211_crypto * +wpa_find_cryptosystem ( enum net80211_crypto_alg crypt ) +{ + struct net80211_crypto *crypto; + + for_each_table_entry ( crypto, NET80211_CRYPTOS ) { + if ( crypto->algorithm == crypt ) + return crypto; + } + + return NULL; +} + + +/** + * Find WPA key integrity and encryption handler from key version field + * + * @v ver Version bits of EAPOL-Key info field + * @ret kie Key integrity and encryption handler + */ +struct wpa_kie * wpa_find_kie ( int version ) +{ + struct wpa_kie *kie; + + for_each_table_entry ( kie, WPA_KIES ) { + if ( kie->version == version ) + return kie; + } + + return NULL; +} + + +/** + * Construct RSN or WPA information element + * + * @v dev 802.11 device + * @ret ie_ret RSN or WPA information element + * @ret rc Return status code + * + * This function allocates, fills, and returns a RSN or WPA + * information element suitable for including in an association + * request frame to the network identified by @c dev->associating. + * If it is impossible to construct an information element consistent + * with iPXE's capabilities that is compatible with that network, or + * if none should be sent because that network's beacon included no + * security information, returns an error indication and leaves + * @a ie_ret unchanged. + * + * The returned IE will be of the same type (RSN or WPA) as was + * included in the beacon for the network it is destined for. + */ +int wpa_make_rsn_ie ( struct net80211_device *dev, union ieee80211_ie **ie_ret ) +{ + u8 *rsn, *rsn_end; + int is_rsn; + u32 group_cipher; + enum net80211_crypto_alg gcrypt; + int ie_len; + u8 *iep; + struct ieee80211_ie_rsn *ie; + struct ieee80211_frame *hdr; + struct ieee80211_beacon *beacon; + + if ( ! dev->associating ) { + DBG ( "WPA: Can't make RSN IE for a non-associating device\n" ); + return -EINVAL; + } + + hdr = dev->associating->beacon->data; + beacon = ( struct ieee80211_beacon * ) hdr->data; + rsn = sec80211_find_rsn ( beacon->info_element, + dev->associating->beacon->tail, &is_rsn, + &rsn_end ); + if ( ! rsn ) { + DBG ( "WPA: Can't make RSN IE when we didn't get one\n" ); + return -EINVAL; + } + + rsn += 2; /* skip version */ + group_cipher = *( u32 * ) rsn; + gcrypt = sec80211_rsn_get_net80211_crypt ( group_cipher ); + + if ( ! wpa_find_cryptosystem ( gcrypt ) || + ! wpa_find_cryptosystem ( dev->associating->crypto ) ) { + DBG ( "WPA: No support for (GC:%d, PC:%d)\n", + gcrypt, dev->associating->crypto ); + return -ENOTSUP; + } + + /* Everything looks good - make our IE. */ + + /* WPA IEs need 4 more bytes for the OUI+type */ + ie_len = ieee80211_rsn_size ( 1, 1, 0, is_rsn ) + ( 4 * ! is_rsn ); + iep = malloc ( ie_len ); + if ( ! iep ) + return -ENOMEM; + + *ie_ret = ( union ieee80211_ie * ) iep; + + /* Store ID and length bytes. */ + *iep++ = ( is_rsn ? IEEE80211_IE_RSN : IEEE80211_IE_VENDOR ); + *iep++ = ie_len - 2; + + /* Store OUI+type for WPA IEs. */ + if ( ! is_rsn ) { + *( u32 * ) iep = IEEE80211_WPA_OUI_VEN; + iep += 4; + } + + /* If this is a WPA IE, the id and len bytes in the + ieee80211_ie_rsn structure will not be valid, but by doing + the cast we can fill all the other fields much more + readily. */ + + ie = ( struct ieee80211_ie_rsn * ) ( iep - 2 ); + ie->version = IEEE80211_RSN_VERSION; + ie->group_cipher = group_cipher; + ie->pairwise_count = 1; + ie->pairwise_cipher[0] = + sec80211_rsn_get_crypto_desc ( dev->associating->crypto, + is_rsn ); + ie->akm_count = 1; + ie->akm_list[0] = + sec80211_rsn_get_akm_desc ( dev->associating->handshaking, + is_rsn ); + if ( is_rsn ) { + ie->rsn_capab = 0; + ie->pmkid_count = 0; + } + + return 0; +} + + +/** + * Set up generic WPA support to handle 4-Way Handshake + * + * @v dev 802.11 device + * @v ctx WPA common context + * @v pmk Pairwise Master Key to use for session + * @v pmk_len Length of PMK, almost always 32 + * @ret rc Return status code + */ +int wpa_start ( struct net80211_device *dev, struct wpa_common_ctx *ctx, + const void *pmk, size_t pmk_len ) +{ + struct io_buffer *iob; + struct ieee80211_frame *hdr; + struct ieee80211_beacon *beacon; + u8 *ap_rsn_ie = NULL, *ap_rsn_ie_end; + + if ( ! dev->rsn_ie || ! dev->associating ) + return -EINVAL; + + ctx->dev = dev; + memcpy ( ctx->pmk, pmk, ctx->pmk_len = pmk_len ); + ctx->state = WPA_READY; + ctx->replay = ~0ULL; + + iob = dev->associating->beacon; + hdr = iob->data; + beacon = ( struct ieee80211_beacon * ) hdr->data; + ap_rsn_ie = sec80211_find_rsn ( beacon->info_element, iob->tail, + &ctx->ap_rsn_is_rsn, &ap_rsn_ie_end ); + if ( ap_rsn_ie ) { + ctx->ap_rsn_ie = malloc ( ap_rsn_ie_end - ap_rsn_ie ); + if ( ! ctx->ap_rsn_ie ) + return -ENOMEM; + memcpy ( ctx->ap_rsn_ie, ap_rsn_ie, ap_rsn_ie_end - ap_rsn_ie ); + ctx->ap_rsn_ie_len = ap_rsn_ie_end - ap_rsn_ie; + } else { + return -ENOENT; + } + + ctx->crypt = dev->associating->crypto; + ctx->gcrypt = NET80211_CRYPT_UNKNOWN; + + list_add_tail ( &ctx->list, &wpa_contexts ); + return 0; +} + + +/** + * Disable handling of received WPA handshake frames + * + * @v dev 802.11 device + */ +void wpa_stop ( struct net80211_device *dev ) +{ + struct wpa_common_ctx *ctx, *tmp; + + list_for_each_entry_safe ( ctx, tmp, &wpa_contexts, list ) { + if ( ctx->dev == dev ) { + free ( ctx->ap_rsn_ie ); + ctx->ap_rsn_ie = NULL; + list_del ( &ctx->list ); + } + } +} + + +/** + * Derive pairwise transient key + * + * @v ctx WPA common context + */ +static void wpa_derive_ptk ( struct wpa_common_ctx *ctx ) +{ + struct { + u8 mac1[ETH_ALEN]; + u8 mac2[ETH_ALEN]; + u8 nonce1[WPA_NONCE_LEN]; + u8 nonce2[WPA_NONCE_LEN]; + } __attribute__ (( packed )) ptk_data; + + /* The addresses and nonces are stored in numerical order (!) */ + + if ( memcmp ( ctx->dev->netdev->ll_addr, ctx->dev->bssid, + ETH_ALEN ) < 0 ) { + memcpy ( ptk_data.mac1, ctx->dev->netdev->ll_addr, ETH_ALEN ); + memcpy ( ptk_data.mac2, ctx->dev->bssid, ETH_ALEN ); + } else { + memcpy ( ptk_data.mac1, ctx->dev->bssid, ETH_ALEN ); + memcpy ( ptk_data.mac2, ctx->dev->netdev->ll_addr, ETH_ALEN ); + } + + if ( memcmp ( ctx->Anonce, ctx->Snonce, WPA_NONCE_LEN ) < 0 ) { + memcpy ( ptk_data.nonce1, ctx->Anonce, WPA_NONCE_LEN ); + memcpy ( ptk_data.nonce2, ctx->Snonce, WPA_NONCE_LEN ); + } else { + memcpy ( ptk_data.nonce1, ctx->Snonce, WPA_NONCE_LEN ); + memcpy ( ptk_data.nonce2, ctx->Anonce, WPA_NONCE_LEN ); + } + + DBGC2 ( ctx, "WPA %p A1 %s, A2 %s\n", ctx, eth_ntoa ( ptk_data.mac1 ), + eth_ntoa ( ptk_data.mac2 ) ); + DBGC2 ( ctx, "WPA %p Nonce1, Nonce2:\n", ctx ); + DBGC2_HD ( ctx, ptk_data.nonce1, WPA_NONCE_LEN ); + DBGC2_HD ( ctx, ptk_data.nonce2, WPA_NONCE_LEN ); + + prf_sha1 ( ctx->pmk, ctx->pmk_len, + "Pairwise key expansion", + &ptk_data, sizeof ( ptk_data ), + &ctx->ptk, sizeof ( ctx->ptk ) ); + + DBGC2 ( ctx, "WPA %p PTK:\n", ctx ); + DBGC2_HD ( ctx, &ctx->ptk, sizeof ( ctx->ptk ) ); +} + + +/** + * Install pairwise transient key + * + * @v ctx WPA common context + * @v len Key length (16 for CCMP, 32 for TKIP) + * @ret rc Return status code + */ +static inline int wpa_install_ptk ( struct wpa_common_ctx *ctx, int len ) +{ + DBGC ( ctx, "WPA %p: installing %d-byte pairwise transient key\n", + ctx, len ); + DBGC2_HD ( ctx, &ctx->ptk.tk, len ); + + return sec80211_install ( &ctx->dev->crypto, ctx->crypt, + &ctx->ptk.tk, len, NULL ); +} + +/** + * Install group transient key + * + * @v ctx WPA common context + * @v len Key length (16 for CCMP, 32 for TKIP) + * @v rsc Receive sequence counter field in EAPOL-Key packet + * @ret rc Return status code + */ +static inline int wpa_install_gtk ( struct wpa_common_ctx *ctx, int len, + const void *rsc ) +{ + DBGC ( ctx, "WPA %p: installing %d-byte group transient key\n", + ctx, len ); + DBGC2_HD ( ctx, &ctx->gtk.tk, len ); + + return sec80211_install ( &ctx->dev->gcrypto, ctx->gcrypt, + &ctx->gtk.tk, len, rsc ); +} + +/** + * Search for group transient key, and install it if found + * + * @v ctx WPA common context + * @v ie Pointer to first IE in key data field + * @v ie_end Pointer to first byte not in key data field + * @v rsc Receive sequence counter field in EAPOL-Key packet + * @ret rc Return status code + */ +static int wpa_maybe_install_gtk ( struct wpa_common_ctx *ctx, + union ieee80211_ie *ie, void *ie_end, + const void *rsc ) +{ + struct wpa_kde *kde; + + if ( ! ieee80211_ie_bound ( ie, ie_end ) ) + return -ENOENT; + + while ( ie ) { + if ( ie->id == IEEE80211_IE_VENDOR && + ie->vendor.oui == WPA_KDE_GTK ) + break; + + ie = ieee80211_next_ie ( ie, ie_end ); + } + + if ( ! ie ) + return -ENOENT; + + if ( ie->len - 6u > sizeof ( ctx->gtk.tk ) ) { + DBGC ( ctx, "WPA %p: GTK KDE is too long (%d bytes, max %zd)\n", + ctx, ie->len - 4, sizeof ( ctx->gtk.tk ) ); + return -EINVAL; + } + + /* XXX We ignore key ID for now. */ + kde = ( struct wpa_kde * ) ie; + memcpy ( &ctx->gtk.tk, &kde->gtk_encap.gtk, kde->len - 6 ); + + return wpa_install_gtk ( ctx, kde->len - 6, rsc ); +} + + +/** + * Allocate I/O buffer for construction of outgoing EAPOL-Key frame + * + * @v kdlen Maximum number of bytes in the Key Data field + * @ret iob Newly allocated I/O buffer + * + * The returned buffer will have space reserved for the link-layer and + * EAPOL headers, and will have @c iob->tail pointing to the start of + * the Key Data field. Thus, it is necessary to use iob_put() in + * filling the Key Data. + */ +static struct io_buffer * wpa_alloc_frame ( int kdlen ) +{ + struct io_buffer *ret = alloc_iob ( sizeof ( struct eapol_key_pkt ) + + kdlen + EAPOL_HDR_LEN + + MAX_LL_HEADER_LEN ); + if ( ! ret ) + return NULL; + + iob_reserve ( ret, MAX_LL_HEADER_LEN + EAPOL_HDR_LEN ); + memset ( iob_put ( ret, sizeof ( struct eapol_key_pkt ) ), 0, + sizeof ( struct eapol_key_pkt ) ); + + return ret; +} + + +/** + * Send EAPOL-Key packet + * + * @v iob I/O buffer, with sufficient headroom for headers + * @v dev 802.11 device + * @v kie Key integrity and encryption handler + * @v is_rsn If TRUE, handshake uses new RSN format + * @ret rc Return status code + * + * If a KIE is specified, the MIC will be filled in before transmission. + */ +static int wpa_send_eapol ( struct io_buffer *iob, struct wpa_common_ctx *ctx, + struct wpa_kie *kie ) +{ + struct eapol_key_pkt *pkt = iob->data; + struct eapol_frame *eapol = iob_push ( iob, EAPOL_HDR_LEN ); + + pkt->info = htons ( pkt->info ); + pkt->keysize = htons ( pkt->keysize ); + pkt->datalen = htons ( pkt->datalen ); + pkt->replay = cpu_to_be64 ( pkt->replay ); + eapol->version = EAPOL_THIS_VERSION; + eapol->type = EAPOL_TYPE_KEY; + eapol->length = htons ( iob->tail - iob->data - sizeof ( *eapol ) ); + + memset ( pkt->mic, 0, sizeof ( pkt->mic ) ); + if ( kie ) + kie->mic ( &ctx->ptk.kck, eapol, EAPOL_HDR_LEN + + sizeof ( *pkt ) + ntohs ( pkt->datalen ), + pkt->mic ); + + return net_tx ( iob, ctx->dev->netdev, &eapol_protocol, + ctx->dev->bssid, ctx->dev->netdev->ll_addr ); +} + + +/** + * Send second frame in 4-Way Handshake + * + * @v ctx WPA common context + * @v pkt First frame, to which this is a reply + * @v is_rsn If TRUE, handshake uses new RSN format + * @v kie Key integrity and encryption handler + * @ret rc Return status code + */ +static int wpa_send_2_of_4 ( struct wpa_common_ctx *ctx, + struct eapol_key_pkt *pkt, int is_rsn, + struct wpa_kie *kie ) +{ + struct io_buffer *iob = wpa_alloc_frame ( ctx->dev->rsn_ie->len + 2 ); + struct eapol_key_pkt *npkt; + + if ( ! iob ) + return -ENOMEM; + + npkt = iob->data; + memcpy ( npkt, pkt, sizeof ( *pkt ) ); + npkt->info &= ~EAPOL_KEY_INFO_KEY_ACK; + npkt->info |= EAPOL_KEY_INFO_KEY_MIC; + if ( is_rsn ) + npkt->keysize = 0; + memcpy ( npkt->nonce, ctx->Snonce, sizeof ( npkt->nonce ) ); + npkt->datalen = ctx->dev->rsn_ie->len + 2; + memcpy ( iob_put ( iob, npkt->datalen ), ctx->dev->rsn_ie, + npkt->datalen ); + + DBGC ( ctx, "WPA %p: sending 2/4\n", ctx ); + + return wpa_send_eapol ( iob, ctx, kie ); +} + + +/** + * Handle receipt of first frame in 4-Way Handshake + * + * @v ctx WPA common context + * @v pkt EAPOL-Key packet + * @v is_rsn If TRUE, frame uses new RSN format + * @v kie Key integrity and encryption handler + * @ret rc Return status code + */ +static int wpa_handle_1_of_4 ( struct wpa_common_ctx *ctx, + struct eapol_key_pkt *pkt, int is_rsn, + struct wpa_kie *kie ) +{ + if ( ctx->state == WPA_WAITING ) + return -EINVAL; + + ctx->state = WPA_WORKING; + memcpy ( ctx->Anonce, pkt->nonce, sizeof ( ctx->Anonce ) ); + if ( ! ctx->have_Snonce ) { + rbg_generate ( NULL, 0, 0, ctx->Snonce, + sizeof ( ctx->Snonce ) ); + ctx->have_Snonce = 1; + } + + DBGC ( ctx, "WPA %p: received 1/4, looks OK\n", ctx ); + + wpa_derive_ptk ( ctx ); + + return wpa_send_2_of_4 ( ctx, pkt, is_rsn, kie ); +} + + +/** + * Send fourth frame in 4-Way Handshake, or second in Group Key Handshake + * + * @v ctx WPA common context + * @v pkt EAPOL-Key packet for frame to which we're replying + * @v is_rsn If TRUE, frame uses new RSN format + * @v kie Key integrity and encryption handler + * @ret rc Return status code + */ +static int wpa_send_final ( struct wpa_common_ctx *ctx, + struct eapol_key_pkt *pkt, int is_rsn, + struct wpa_kie *kie ) +{ + struct io_buffer *iob = wpa_alloc_frame ( 0 ); + struct eapol_key_pkt *npkt; + + if ( ! iob ) + return -ENOMEM; + + npkt = iob->data; + memcpy ( npkt, pkt, sizeof ( *pkt ) ); + npkt->info &= ~( EAPOL_KEY_INFO_KEY_ACK | EAPOL_KEY_INFO_INSTALL | + EAPOL_KEY_INFO_KEY_ENC ); + if ( is_rsn ) + npkt->keysize = 0; + memset ( npkt->nonce, 0, sizeof ( npkt->nonce ) ); + memset ( npkt->iv, 0, sizeof ( npkt->iv ) ); + npkt->datalen = 0; + + if ( npkt->info & EAPOL_KEY_INFO_TYPE ) + DBGC ( ctx, "WPA %p: sending 4/4\n", ctx ); + else + DBGC ( ctx, "WPA %p: sending 2/2\n", ctx ); + + return wpa_send_eapol ( iob, ctx, kie ); + +} + + +/** + * Handle receipt of third frame in 4-Way Handshake + * + * @v ctx WPA common context + * @v pkt EAPOL-Key packet + * @v is_rsn If TRUE, frame uses new RSN format + * @v kie Key integrity and encryption handler + * @ret rc Return status code + */ +static int wpa_handle_3_of_4 ( struct wpa_common_ctx *ctx, + struct eapol_key_pkt *pkt, int is_rsn, + struct wpa_kie *kie ) +{ + int rc; + u8 *this_rsn, *this_rsn_end; + u8 *new_rsn, *new_rsn_end; + int this_is_rsn, new_is_rsn; + + if ( ctx->state == WPA_WAITING ) + return -EINVAL; + + ctx->state = WPA_WORKING; + + /* Check nonce */ + if ( memcmp ( ctx->Anonce, pkt->nonce, WPA_NONCE_LEN ) != 0 ) { + DBGC ( ctx, "WPA %p ALERT: nonce mismatch in 3/4\n", ctx ); + return wpa_fail ( ctx, -EACCES ); + } + + /* Check RSN IE */ + this_rsn = sec80211_find_rsn ( ( union ieee80211_ie * ) pkt->data, + pkt->data + pkt->datalen, + &this_is_rsn, &this_rsn_end ); + if ( this_rsn ) + new_rsn = sec80211_find_rsn ( ( union ieee80211_ie * ) + this_rsn_end, + pkt->data + pkt->datalen, + &new_is_rsn, &new_rsn_end ); + else + new_rsn = NULL; + + if ( ! ctx->ap_rsn_ie || ! this_rsn || + ctx->ap_rsn_ie_len != ( this_rsn_end - this_rsn ) || + ctx->ap_rsn_is_rsn != this_is_rsn || + memcmp ( ctx->ap_rsn_ie, this_rsn, ctx->ap_rsn_ie_len ) != 0 ) { + DBGC ( ctx, "WPA %p ALERT: RSN mismatch in 3/4\n", ctx ); + DBGC2 ( ctx, "WPA %p RSNs (in 3/4, in beacon):\n", ctx ); + DBGC2_HD ( ctx, this_rsn, this_rsn_end - this_rsn ); + DBGC2_HD ( ctx, ctx->ap_rsn_ie, ctx->ap_rsn_ie_len ); + return wpa_fail ( ctx, -EACCES ); + } + + /* Don't switch if they just supplied both styles of IE + simultaneously; we need two RSN IEs or two WPA IEs to + switch ciphers. They'll be immediately consecutive because + of ordering guarantees. */ + if ( new_rsn && this_is_rsn == new_is_rsn ) { + struct net80211_wlan *assoc = ctx->dev->associating; + DBGC ( ctx, "WPA %p: accommodating bait-and-switch tactics\n", + ctx ); + DBGC2 ( ctx, "WPA %p RSNs (in 3/4+beacon, new in 3/4):\n", + ctx ); + DBGC2_HD ( ctx, this_rsn, this_rsn_end - this_rsn ); + DBGC2_HD ( ctx, new_rsn, new_rsn_end - new_rsn ); + + if ( ( rc = sec80211_detect_ie ( new_is_rsn, new_rsn, + new_rsn_end, + &assoc->handshaking, + &assoc->crypto ) ) != 0 ) + DBGC ( ctx, "WPA %p: bait-and-switch invalid, staying " + "with original request\n", ctx ); + } else { + new_rsn = this_rsn; + new_is_rsn = this_is_rsn; + new_rsn_end = this_rsn_end; + } + + /* Grab group cryptosystem ID */ + ctx->gcrypt = sec80211_rsn_get_net80211_crypt ( *( u32 * ) + ( new_rsn + 2 ) ); + + /* Check for a GTK, if info field is encrypted */ + if ( pkt->info & EAPOL_KEY_INFO_KEY_ENC ) { + rc = wpa_maybe_install_gtk ( ctx, + ( union ieee80211_ie * ) pkt->data, + pkt->data + pkt->datalen, + pkt->rsc ); + if ( rc < 0 ) { + DBGC ( ctx, "WPA %p did not install GTK in 3/4: %s\n", + ctx, strerror ( rc ) ); + if ( rc != -ENOENT ) + return wpa_fail ( ctx, rc ); + } + } + + DBGC ( ctx, "WPA %p: received 3/4, looks OK\n", ctx ); + + /* Send final message */ + rc = wpa_send_final ( ctx, pkt, is_rsn, kie ); + if ( rc < 0 ) + return wpa_fail ( ctx, rc ); + + /* Install PTK */ + rc = wpa_install_ptk ( ctx, pkt->keysize ); + if ( rc < 0 ) { + DBGC ( ctx, "WPA %p failed to install PTK: %s\n", ctx, + strerror ( rc ) ); + return wpa_fail ( ctx, rc ); + } + + /* Mark us as needing a new Snonce if we rekey */ + ctx->have_Snonce = 0; + + /* Done! */ + ctx->state = WPA_SUCCESS; + return 0; +} + + +/** + * Handle receipt of first frame in Group Key Handshake + * + * @v ctx WPA common context + * @v pkt EAPOL-Key packet + * @v is_rsn If TRUE, frame uses new RSN format + * @v kie Key integrity and encryption handler + * @ret rc Return status code + */ +static int wpa_handle_1_of_2 ( struct wpa_common_ctx *ctx, + struct eapol_key_pkt *pkt, int is_rsn, + struct wpa_kie *kie ) +{ + int rc; + + /* + * WPA and RSN do this completely differently. + * + * The idea of encoding the GTK (or PMKID, or various other + * things) into a KDE that looks like an information element + * is an RSN innovation; old WPA code never encapsulates + * things like that. If it looks like an info element, it + * really is (for the WPA IE check in frames 2/4 and 3/4). The + * "key data encrypted" bit in the info field is also specific + * to RSN. + * + * So from an old WPA host, 3/4 does not contain an + * encapsulated GTK. The first frame of the GK handshake + * contains it, encrypted, but without a KDE wrapper, and with + * the key ID field (which iPXE doesn't use) shoved away in + * the reserved bits in the info field, and the TxRx bit + * stealing the Install bit's spot. + */ + + if ( is_rsn && ( pkt->info & EAPOL_KEY_INFO_KEY_ENC ) ) { + rc = wpa_maybe_install_gtk ( ctx, + ( union ieee80211_ie * ) pkt->data, + pkt->data + pkt->datalen, + pkt->rsc ); + if ( rc < 0 ) { + DBGC ( ctx, "WPA %p: failed to install GTK in 1/2: " + "%s\n", ctx, strerror ( rc ) ); + return wpa_fail ( ctx, rc ); + } + } else { + rc = kie->decrypt ( &ctx->ptk.kek, pkt->iv, pkt->data, + &pkt->datalen ); + if ( rc < 0 ) { + DBGC ( ctx, "WPA %p: failed to decrypt GTK: %s\n", + ctx, strerror ( rc ) ); + return rc; /* non-fatal */ + } + if ( pkt->datalen > sizeof ( ctx->gtk.tk ) ) { + DBGC ( ctx, "WPA %p: too much GTK data (%d > %zd)\n", + ctx, pkt->datalen, sizeof ( ctx->gtk.tk ) ); + return wpa_fail ( ctx, -EINVAL ); + } + + memcpy ( &ctx->gtk.tk, pkt->data, pkt->datalen ); + wpa_install_gtk ( ctx, pkt->datalen, pkt->rsc ); + } + + DBGC ( ctx, "WPA %p: received 1/2, looks OK\n", ctx ); + + return wpa_send_final ( ctx, pkt, is_rsn, kie ); +} + + +/** + * Handle receipt of EAPOL-Key frame for WPA + * + * @v iob I/O buffer + * @v netdev Network device + * @v ll_dest Link-layer destination address + * @v ll_source Source link-layer address + */ +static int eapol_key_rx ( struct io_buffer *iob, struct net_device *netdev, + const void *ll_dest __unused, + const void *ll_source ) +{ + struct net80211_device *dev = net80211_get ( netdev ); + struct eapol_key_pkt *pkt = iob->data; + int is_rsn, found_ctx; + struct wpa_common_ctx *ctx; + int rc = 0; + struct wpa_kie *kie; + u8 their_mic[16], our_mic[16]; + + if ( pkt->type != EAPOL_KEY_TYPE_WPA && + pkt->type != EAPOL_KEY_TYPE_RSN ) { + DBG ( "EAPOL-Key: packet not of 802.11 type\n" ); + rc = -EINVAL; + goto drop; + } + + is_rsn = ( pkt->type == EAPOL_KEY_TYPE_RSN ); + + if ( ! dev ) { + DBG ( "EAPOL-Key: packet not from 802.11\n" ); + rc = -EINVAL; + goto drop; + } + + if ( memcmp ( dev->bssid, ll_source, ETH_ALEN ) != 0 ) { + DBG ( "EAPOL-Key: packet not from associated AP\n" ); + rc = -EINVAL; + goto drop; + } + + if ( ! ( ntohs ( pkt->info ) & EAPOL_KEY_INFO_KEY_ACK ) ) { + DBG ( "EAPOL-Key: packet sent in wrong direction\n" ); + rc = -EINVAL; + goto drop; + } + + found_ctx = 0; + list_for_each_entry ( ctx, &wpa_contexts, list ) { + if ( ctx->dev == dev ) { + found_ctx = 1; + break; + } + } + + if ( ! found_ctx ) { + DBG ( "EAPOL-Key: no WPA context to handle packet for %p\n", + dev ); + rc = -ENOENT; + goto drop; + } + + if ( ( void * ) ( pkt + 1 ) + ntohs ( pkt->datalen ) > iob->tail ) { + DBGC ( ctx, "WPA %p: packet truncated (has %zd extra bytes, " + "states %d)\n", ctx, iob->tail - ( void * ) ( pkt + 1 ), + ntohs ( pkt->datalen ) ); + rc = -EINVAL; + goto drop; + } + + /* Get a handle on key integrity/encryption handler */ + kie = wpa_find_kie ( ntohs ( pkt->info ) & EAPOL_KEY_INFO_VERSION ); + if ( ! kie ) { + DBGC ( ctx, "WPA %p: no support for packet version %d\n", ctx, + ntohs ( pkt->info ) & EAPOL_KEY_INFO_VERSION ); + rc = wpa_fail ( ctx, -ENOTSUP ); + goto drop; + } + + /* Check MIC */ + if ( ntohs ( pkt->info ) & EAPOL_KEY_INFO_KEY_MIC ) { + memcpy ( their_mic, pkt->mic, sizeof ( pkt->mic ) ); + memset ( pkt->mic, 0, sizeof ( pkt->mic ) ); + kie->mic ( &ctx->ptk.kck, ( void * ) pkt - EAPOL_HDR_LEN, + EAPOL_HDR_LEN + sizeof ( *pkt ) + + ntohs ( pkt->datalen ), our_mic ); + DBGC2 ( ctx, "WPA %p MIC comparison (theirs, ours):\n", ctx ); + DBGC2_HD ( ctx, their_mic, 16 ); + DBGC2_HD ( ctx, our_mic, 16 ); + if ( memcmp ( their_mic, our_mic, sizeof ( pkt->mic ) ) != 0 ) { + DBGC ( ctx, "WPA %p: EAPOL MIC failure\n", ctx ); + goto drop; + } + } + + /* Fix byte order to local */ + pkt->info = ntohs ( pkt->info ); + pkt->keysize = ntohs ( pkt->keysize ); + pkt->datalen = ntohs ( pkt->datalen ); + pkt->replay = be64_to_cpu ( pkt->replay ); + + /* Check replay counter */ + if ( ctx->replay != ~0ULL && ctx->replay >= pkt->replay ) { + DBGC ( ctx, "WPA %p ALERT: Replay detected! " + "(%08x:%08x >= %08x:%08x)\n", ctx, + ( u32 ) ( ctx->replay >> 32 ), ( u32 ) ctx->replay, + ( u32 ) ( pkt->replay >> 32 ), ( u32 ) pkt->replay ); + rc = 0; /* ignore without error */ + goto drop; + } + ctx->replay = pkt->replay; + + /* Decrypt key data */ + if ( pkt->info & EAPOL_KEY_INFO_KEY_ENC ) { + rc = kie->decrypt ( &ctx->ptk.kek, pkt->iv, pkt->data, + &pkt->datalen ); + if ( rc < 0 ) { + DBGC ( ctx, "WPA %p: failed to decrypt packet: %s\n", + ctx, strerror ( rc ) ); + goto drop; + } + } + + /* Hand it off to appropriate handler */ + switch ( pkt->info & ( EAPOL_KEY_INFO_TYPE | + EAPOL_KEY_INFO_KEY_MIC ) ) { + case EAPOL_KEY_TYPE_PTK: + rc = wpa_handle_1_of_4 ( ctx, pkt, is_rsn, kie ); + break; + + case EAPOL_KEY_TYPE_PTK | EAPOL_KEY_INFO_KEY_MIC: + rc = wpa_handle_3_of_4 ( ctx, pkt, is_rsn, kie ); + break; + + case EAPOL_KEY_TYPE_GTK | EAPOL_KEY_INFO_KEY_MIC: + rc = wpa_handle_1_of_2 ( ctx, pkt, is_rsn, kie ); + break; + + default: + DBGC ( ctx, "WPA %p: Invalid combination of key flags %04x\n", + ctx, pkt->info ); + rc = -EINVAL; + break; + } + + drop: + free_iob ( iob ); + return rc; +} + +struct eapol_handler eapol_key_handler __eapol_handler = { + .type = EAPOL_TYPE_KEY, + .rx = eapol_key_rx, +}; + +/* WPA always needs EAPOL in order to be useful */ +REQUIRE_OBJECT ( eapol ); diff --git a/qemu/roms/ipxe/src/net/80211/wpa_ccmp.c b/qemu/roms/ipxe/src/net/80211/wpa_ccmp.c new file mode 100644 index 000000000..f98ebea26 --- /dev/null +++ b/qemu/roms/ipxe/src/net/80211/wpa_ccmp.c @@ -0,0 +1,530 @@ +/* + * Copyright (c) 2009 Joshua Oreman <oremanj@rwcr.net>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <string.h> +#include <ipxe/net80211.h> +#include <ipxe/crypto.h> +#include <ipxe/hmac.h> +#include <ipxe/sha1.h> +#include <ipxe/aes.h> +#include <ipxe/wpa.h> +#include <byteswap.h> +#include <errno.h> + +/** @file + * + * Backend for WPA using the CCMP encryption method + */ + +/** Context for CCMP encryption and decryption */ +struct ccmp_ctx +{ + /** AES context - only ever used for encryption */ + u8 aes_ctx[AES_CTX_SIZE]; + + /** Most recently sent packet number */ + u64 tx_seq; + + /** Most recently received packet number */ + u64 rx_seq; +}; + +/** Header structure at the beginning of CCMP frame data */ +struct ccmp_head +{ + u8 pn_lo[2]; /**< Bytes 0 and 1 of packet number */ + u8 _rsvd; /**< Reserved byte */ + u8 kid; /**< Key ID and ExtIV byte */ + u8 pn_hi[4]; /**< Bytes 2-5 (2 first) of packet number */ +} __attribute__ (( packed )); + + +/** CCMP header overhead */ +#define CCMP_HEAD_LEN 8 + +/** CCMP MIC trailer overhead */ +#define CCMP_MIC_LEN 8 + +/** CCMP nonce length */ +#define CCMP_NONCE_LEN 13 + +/** CCMP nonce structure */ +struct ccmp_nonce +{ + u8 prio; /**< Packet priority, 0 for non-QoS */ + u8 a2[ETH_ALEN]; /**< Address 2 from packet header (sender) */ + u8 pn[6]; /**< Packet number */ +} __attribute__ (( packed )); + +/** CCMP additional authentication data length (for non-QoS, non-WDS frames) */ +#define CCMP_AAD_LEN 22 + +/** CCMP additional authentication data structure */ +struct ccmp_aad +{ + u16 fc; /**< Frame Control field */ + u8 a1[6]; /**< Address 1 */ + u8 a2[6]; /**< Address 2 */ + u8 a3[6]; /**< Address 3 */ + u16 seq; /**< Sequence Control field */ + /* Address 4 and QoS Control are included if present */ +} __attribute__ (( packed )); + +/** Mask for Frame Control field in AAD */ +#define CCMP_AAD_FC_MASK 0xC38F + +/** Mask for Sequence Control field in AAD */ +#define CCMP_AAD_SEQ_MASK 0x000F + + +/** + * Convert 6-byte LSB packet number to 64-bit integer + * + * @v pn Pointer to 6-byte packet number + * @ret v 64-bit integer value of @a pn + */ +static u64 pn_to_u64 ( const u8 *pn ) +{ + int i; + u64 ret = 0; + + for ( i = 5; i >= 0; i-- ) { + ret <<= 8; + ret |= pn[i]; + } + + return ret; +} + +/** + * Convert 64-bit integer to 6-byte packet number + * + * @v v 64-bit integer + * @v msb If TRUE, reverse the output PN to be in MSB order + * @ret pn 6-byte packet number + * + * The PN is stored in LSB order in the packet header and in MSB order + * in the nonce. WHYYYYY? + */ +static void u64_to_pn ( u64 v, u8 *pn, int msb ) +{ + int i; + u8 *pnp = pn + ( msb ? 5 : 0 ); + int delta = ( msb ? -1 : +1 ); + + for ( i = 0; i < 6; i++ ) { + *pnp = v & 0xFF; + pnp += delta; + v >>= 8; + } +} + +/** Value for @a msb argument of u64_to_pn() for MSB output */ +#define PN_MSB 1 + +/** Value for @a msb argument of u64_to_pn() for LSB output */ +#define PN_LSB 0 + + + +/** + * Initialise CCMP state and install key + * + * @v crypto CCMP cryptosystem structure + * @v key Pointer to 16-byte temporal key to install + * @v keylen Length of key (16 bytes) + * @v rsc Initial receive sequence counter + */ +static int ccmp_init ( struct net80211_crypto *crypto, const void *key, + int keylen, const void *rsc ) +{ + struct ccmp_ctx *ctx = crypto->priv; + + if ( keylen != 16 ) + return -EINVAL; + + if ( rsc ) + ctx->rx_seq = pn_to_u64 ( rsc ); + + cipher_setkey ( &aes_algorithm, ctx->aes_ctx, key, keylen ); + + return 0; +} + + +/** + * Encrypt or decrypt data stream using AES in Counter mode + * + * @v ctx CCMP cryptosystem context + * @v nonce Nonce value, 13 bytes + * @v srcv Data to encrypt or decrypt + * @v len Number of bytes pointed to by @a src + * @v msrcv MIC value to encrypt or decrypt (may be NULL) + * @ret destv Encrypted or decrypted data + * @ret mdestv Encrypted or decrypted MIC value + * + * This assumes CCMP parameters of L=2 and M=8. The algorithm is + * defined in RFC 3610. + */ +static void ccmp_ctr_xor ( struct ccmp_ctx *ctx, const void *nonce, + const void *srcv, void *destv, int len, + const void *msrcv, void *mdestv ) +{ + u8 A[16], S[16]; + u16 ctr; + int i; + const u8 *src = srcv, *msrc = msrcv; + u8 *dest = destv, *mdest = mdestv; + + A[0] = 0x01; /* flags, L' = L - 1 = 1, other bits rsvd */ + memcpy ( A + 1, nonce, CCMP_NONCE_LEN ); + + if ( msrcv ) { + A[14] = A[15] = 0; + + cipher_encrypt ( &aes_algorithm, ctx->aes_ctx, A, S, 16 ); + + for ( i = 0; i < 8; i++ ) { + *mdest++ = *msrc++ ^ S[i]; + } + } + + for ( ctr = 1 ;; ctr++ ) { + A[14] = ctr >> 8; + A[15] = ctr & 0xFF; + + cipher_encrypt ( &aes_algorithm, ctx->aes_ctx, A, S, 16 ); + + for ( i = 0; i < len && i < 16; i++ ) + *dest++ = *src++ ^ S[i]; + + if ( len <= 16 ) + break; /* we're done */ + + len -= 16; + } +} + + +/** + * Advance one block in CBC-MAC calculation + * + * @v aes_ctx AES encryption context with key set + * @v B Cleartext block to incorporate (16 bytes) + * @v X Previous ciphertext block (16 bytes) + * @ret B Clobbered + * @ret X New ciphertext block (16 bytes) + * + * This function does X := E[key] ( X ^ B ). + */ +static void ccmp_feed_cbc_mac ( void *aes_ctx, u8 *B, u8 *X ) +{ + int i; + for ( i = 0; i < 16; i++ ) + B[i] ^= X[i]; + cipher_encrypt ( &aes_algorithm, aes_ctx, B, X, 16 ); +} + + +/** + * Calculate MIC on plaintext data using CBC-MAC + * + * @v ctx CCMP cryptosystem context + * @v nonce Nonce value, 13 bytes + * @v data Data to calculate MIC over + * @v datalen Length of @a data + * @v aad Additional authentication data, for MIC but not encryption + * @ret mic MIC value (unencrypted), 8 bytes + * + * @a aadlen is assumed to be 22 bytes long, as it always is for + * 802.11 use when transmitting non-QoS, not-between-APs frames (the + * only type we deal with). + */ +static void ccmp_cbc_mac ( struct ccmp_ctx *ctx, const void *nonce, + const void *data, u16 datalen, + const void *aad, void *mic ) +{ + u8 X[16], B[16]; + + /* Zeroth block: flags, nonce, length */ + + /* Rsv AAD - M'- - L'- + * 0 1 0 1 1 0 0 1 for an 8-byte MAC and 2-byte message length + */ + B[0] = 0x59; + memcpy ( B + 1, nonce, CCMP_NONCE_LEN ); + B[14] = datalen >> 8; + B[15] = datalen & 0xFF; + + cipher_encrypt ( &aes_algorithm, ctx->aes_ctx, B, X, 16 ); + + /* First block: AAD length field and 14 bytes of AAD */ + B[0] = 0; + B[1] = CCMP_AAD_LEN; + memcpy ( B + 2, aad, 14 ); + + ccmp_feed_cbc_mac ( ctx->aes_ctx, B, X ); + + /* Second block: Remaining 8 bytes of AAD, 8 bytes zero pad */ + memcpy ( B, aad + 14, 8 ); + memset ( B + 8, 0, 8 ); + + ccmp_feed_cbc_mac ( ctx->aes_ctx, B, X ); + + /* Message blocks */ + while ( datalen ) { + if ( datalen >= 16 ) { + memcpy ( B, data, 16 ); + datalen -= 16; + } else { + memcpy ( B, data, datalen ); + memset ( B + datalen, 0, 16 - datalen ); + datalen = 0; + } + + ccmp_feed_cbc_mac ( ctx->aes_ctx, B, X ); + + data += 16; + } + + /* Get MIC from final value of X */ + memcpy ( mic, X, 8 ); +} + + +/** + * Encapsulate and encrypt a packet using CCMP + * + * @v crypto CCMP cryptosystem + * @v iob I/O buffer containing cleartext packet + * @ret eiob I/O buffer containing encrypted packet + */ +struct io_buffer * ccmp_encrypt ( struct net80211_crypto *crypto, + struct io_buffer *iob ) +{ + struct ccmp_ctx *ctx = crypto->priv; + struct ieee80211_frame *hdr = iob->data; + struct io_buffer *eiob; + const int hdrlen = IEEE80211_TYP_FRAME_HEADER_LEN; + int datalen = iob_len ( iob ) - hdrlen; + struct ccmp_head head; + struct ccmp_nonce nonce; + struct ccmp_aad aad; + u8 mic[8], tx_pn[6]; + void *edata, *emic; + + ctx->tx_seq++; + u64_to_pn ( ctx->tx_seq, tx_pn, PN_LSB ); + + /* Allocate memory */ + eiob = alloc_iob ( iob_len ( iob ) + CCMP_HEAD_LEN + CCMP_MIC_LEN ); + if ( ! eiob ) + return NULL; + + /* Copy frame header */ + memcpy ( iob_put ( eiob, hdrlen ), iob->data, hdrlen ); + hdr = eiob->data; + hdr->fc |= IEEE80211_FC_PROTECTED; + + /* Fill in packet number and extended IV */ + memcpy ( head.pn_lo, tx_pn, 2 ); + memcpy ( head.pn_hi, tx_pn + 2, 4 ); + head.kid = 0x20; /* have Extended IV, key ID 0 */ + head._rsvd = 0; + memcpy ( iob_put ( eiob, sizeof ( head ) ), &head, sizeof ( head ) ); + + /* Form nonce */ + nonce.prio = 0; + memcpy ( nonce.a2, hdr->addr2, ETH_ALEN ); + u64_to_pn ( ctx->tx_seq, nonce.pn, PN_MSB ); + + /* Form additional authentication data */ + aad.fc = hdr->fc & CCMP_AAD_FC_MASK; + memcpy ( aad.a1, hdr->addr1, 3 * ETH_ALEN ); /* all 3 at once */ + aad.seq = hdr->seq & CCMP_AAD_SEQ_MASK; + + /* Calculate MIC over the data */ + ccmp_cbc_mac ( ctx, &nonce, iob->data + hdrlen, datalen, &aad, mic ); + + /* Copy and encrypt data and MIC */ + edata = iob_put ( eiob, datalen ); + emic = iob_put ( eiob, CCMP_MIC_LEN ); + ccmp_ctr_xor ( ctx, &nonce, + iob->data + hdrlen, edata, datalen, + mic, emic ); + + /* Done! */ + DBGC2 ( ctx, "WPA-CCMP %p: encrypted packet %p -> %p\n", ctx, + iob, eiob ); + + return eiob; +} + +/** + * Decrypt a packet using CCMP + * + * @v crypto CCMP cryptosystem + * @v eiob I/O buffer containing encrypted packet + * @ret iob I/O buffer containing cleartext packet + */ +static struct io_buffer * ccmp_decrypt ( struct net80211_crypto *crypto, + struct io_buffer *eiob ) +{ + struct ccmp_ctx *ctx = crypto->priv; + struct ieee80211_frame *hdr; + struct io_buffer *iob; + const int hdrlen = IEEE80211_TYP_FRAME_HEADER_LEN; + int datalen = iob_len ( eiob ) - hdrlen - CCMP_HEAD_LEN - CCMP_MIC_LEN; + struct ccmp_head *head; + struct ccmp_nonce nonce; + struct ccmp_aad aad; + u8 rx_pn[6], their_mic[8], our_mic[8]; + + iob = alloc_iob ( hdrlen + datalen ); + if ( ! iob ) + return NULL; + + /* Copy frame header */ + memcpy ( iob_put ( iob, hdrlen ), eiob->data, hdrlen ); + hdr = iob->data; + hdr->fc &= ~IEEE80211_FC_PROTECTED; + + /* Check and update RX packet number */ + head = eiob->data + hdrlen; + memcpy ( rx_pn, head->pn_lo, 2 ); + memcpy ( rx_pn + 2, head->pn_hi, 4 ); + + if ( pn_to_u64 ( rx_pn ) <= ctx->rx_seq ) { + DBGC ( ctx, "WPA-CCMP %p: packet received out of order " + "(%012llx <= %012llx)\n", ctx, pn_to_u64 ( rx_pn ), + ctx->rx_seq ); + free_iob ( iob ); + return NULL; + } + + ctx->rx_seq = pn_to_u64 ( rx_pn ); + DBGC2 ( ctx, "WPA-CCMP %p: RX packet number %012llx\n", ctx, ctx->rx_seq ); + + /* Form nonce */ + nonce.prio = 0; + memcpy ( nonce.a2, hdr->addr2, ETH_ALEN ); + u64_to_pn ( ctx->rx_seq, nonce.pn, PN_MSB ); + + /* Form additional authentication data */ + aad.fc = ( hdr->fc & CCMP_AAD_FC_MASK ) | IEEE80211_FC_PROTECTED; + memcpy ( aad.a1, hdr->addr1, 3 * ETH_ALEN ); /* all 3 at once */ + aad.seq = hdr->seq & CCMP_AAD_SEQ_MASK; + + /* Copy-decrypt data and MIC */ + ccmp_ctr_xor ( ctx, &nonce, eiob->data + hdrlen + sizeof ( *head ), + iob_put ( iob, datalen ), datalen, + eiob->tail - CCMP_MIC_LEN, their_mic ); + + /* Check MIC */ + ccmp_cbc_mac ( ctx, &nonce, iob->data + hdrlen, datalen, &aad, + our_mic ); + + if ( memcmp ( their_mic, our_mic, CCMP_MIC_LEN ) != 0 ) { + DBGC2 ( ctx, "WPA-CCMP %p: MIC failure\n", ctx ); + free_iob ( iob ); + return NULL; + } + + DBGC2 ( ctx, "WPA-CCMP %p: decrypted packet %p -> %p\n", ctx, + eiob, iob ); + + return iob; +} + + +/** CCMP cryptosystem */ +struct net80211_crypto ccmp_crypto __net80211_crypto = { + .algorithm = NET80211_CRYPT_CCMP, + .init = ccmp_init, + .encrypt = ccmp_encrypt, + .decrypt = ccmp_decrypt, + .priv_len = sizeof ( struct ccmp_ctx ), +}; + + + + +/** + * Calculate HMAC-SHA1 MIC for EAPOL-Key frame + * + * @v kck Key Confirmation Key, 16 bytes + * @v msg Message to calculate MIC over + * @v len Number of bytes to calculate MIC over + * @ret mic Calculated MIC, 16 bytes long + */ +static void ccmp_kie_mic ( const void *kck, const void *msg, size_t len, + void *mic ) +{ + u8 sha1_ctx[SHA1_CTX_SIZE]; + u8 kckb[16]; + u8 hash[SHA1_SIZE]; + size_t kck_len = 16; + + memcpy ( kckb, kck, kck_len ); + + hmac_init ( &sha1_algorithm, sha1_ctx, kckb, &kck_len ); + hmac_update ( &sha1_algorithm, sha1_ctx, msg, len ); + hmac_final ( &sha1_algorithm, sha1_ctx, kckb, &kck_len, hash ); + + memcpy ( mic, hash, 16 ); +} + +/** + * Decrypt key data in EAPOL-Key frame + * + * @v kek Key Encryption Key, 16 bytes + * @v iv Initialisation vector, 16 bytes (unused) + * @v msg Message to decrypt + * @v len Length of message + * @ret msg Decrypted message in place of original + * @ret len Adjusted downward for 8 bytes of overhead + * @ret rc Return status code + * + * The returned message may still contain padding of 0xDD followed by + * zero or more 0x00 octets. It is impossible to remove the padding + * without parsing the IEs in the packet (another design decision that + * tends to make one question the 802.11i committee's intelligence...) + */ +static int ccmp_kie_decrypt ( const void *kek, const void *iv __unused, + void *msg, u16 *len ) +{ + if ( *len % 8 != 0 ) + return -EINVAL; + + if ( aes_unwrap ( kek, msg, msg, *len / 8 - 1 ) != 0 ) + return -EINVAL; + + *len -= 8; + + return 0; +} + +/** CCMP-style key integrity and encryption handler */ +struct wpa_kie ccmp_kie __wpa_kie = { + .version = EAPOL_KEY_VERSION_WPA2, + .mic = ccmp_kie_mic, + .decrypt = ccmp_kie_decrypt, +}; diff --git a/qemu/roms/ipxe/src/net/80211/wpa_psk.c b/qemu/roms/ipxe/src/net/80211/wpa_psk.c new file mode 100644 index 000000000..71190b139 --- /dev/null +++ b/qemu/roms/ipxe/src/net/80211/wpa_psk.c @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2009 Joshua Oreman <oremanj@rwcr.net>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <string.h> +#include <ipxe/net80211.h> +#include <ipxe/sha1.h> +#include <ipxe/wpa.h> +#include <errno.h> + +/** @file + * + * Frontend for WPA using a pre-shared key. + */ + +/** + * Initialise WPA-PSK state + * + * @v dev 802.11 device + * @ret rc Return status code + */ +static int wpa_psk_init ( struct net80211_device *dev ) +{ + return wpa_make_rsn_ie ( dev, &dev->rsn_ie ); +} + +/** + * Start WPA-PSK authentication + * + * @v dev 802.11 device + * @ret rc Return status code + */ +static int wpa_psk_start ( struct net80211_device *dev ) +{ + char passphrase[64+1]; + u8 pmk[WPA_PMK_LEN]; + int len; + struct wpa_common_ctx *ctx = dev->handshaker->priv; + + len = fetch_string_setting ( netdev_settings ( dev->netdev ), + &net80211_key_setting, passphrase, + 64 + 1 ); + + if ( len <= 0 ) { + DBGC ( ctx, "WPA-PSK %p: no passphrase provided!\n", ctx ); + net80211_deauthenticate ( dev, -EACCES ); + return -EACCES; + } + + pbkdf2_sha1 ( passphrase, len, dev->essid, strlen ( dev->essid ), + 4096, pmk, WPA_PMK_LEN ); + + DBGC ( ctx, "WPA-PSK %p: derived PMK from passphrase `%s':\n", ctx, + passphrase ); + DBGC_HD ( ctx, pmk, WPA_PMK_LEN ); + + return wpa_start ( dev, ctx, pmk, WPA_PMK_LEN ); +} + +/** + * Step WPA-PSK authentication + * + * @v dev 802.11 device + * @ret rc Return status code + */ +static int wpa_psk_step ( struct net80211_device *dev ) +{ + struct wpa_common_ctx *ctx = dev->handshaker->priv; + + switch ( ctx->state ) { + case WPA_SUCCESS: + return 1; + case WPA_FAILURE: + return -EACCES; + default: + return 0; + } +} + +/** + * Do-nothing function; you can't change a WPA key post-authentication + * + * @v dev 802.11 device + * @ret rc Return status code + */ +static int wpa_psk_no_change_key ( struct net80211_device *dev __unused ) +{ + return 0; +} + +/** + * Disable handling of received WPA authentication frames + * + * @v dev 802.11 device + */ +static void wpa_psk_stop ( struct net80211_device *dev ) +{ + wpa_stop ( dev ); +} + +/** WPA-PSK security handshaker */ +struct net80211_handshaker wpa_psk_handshaker __net80211_handshaker = { + .protocol = NET80211_SECPROT_PSK, + .init = wpa_psk_init, + .start = wpa_psk_start, + .step = wpa_psk_step, + .change_key = wpa_psk_no_change_key, + .stop = wpa_psk_stop, + .priv_len = sizeof ( struct wpa_common_ctx ), +}; diff --git a/qemu/roms/ipxe/src/net/80211/wpa_tkip.c b/qemu/roms/ipxe/src/net/80211/wpa_tkip.c new file mode 100644 index 000000000..fa3e0763b --- /dev/null +++ b/qemu/roms/ipxe/src/net/80211/wpa_tkip.c @@ -0,0 +1,588 @@ +/* + * Copyright (c) 2009 Joshua Oreman <oremanj@rwcr.net>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <string.h> +#include <ipxe/net80211.h> +#include <ipxe/crypto.h> +#include <ipxe/hmac.h> +#include <ipxe/sha1.h> +#include <ipxe/md5.h> +#include <ipxe/crc32.h> +#include <ipxe/arc4.h> +#include <ipxe/wpa.h> +#include <byteswap.h> +#include <errno.h> + +/** @file + * + * Backend for WPA using the TKIP encryption standard. + */ + +/** Context for one direction of TKIP, either encryption or decryption */ +struct tkip_dir_ctx +{ + /** High 32 bits of last sequence counter value used */ + u32 tsc_hi; + + /** Low 32 bits of last sequence counter value used */ + u16 tsc_lo; + + /** MAC address used to derive TTAK */ + u8 mac[ETH_ALEN]; + + /** If TRUE, TTAK is valid */ + u16 ttak_ok; + + /** TKIP-mixed transmit address and key, depends on tsc_hi and MAC */ + u16 ttak[5]; +}; + +/** Context for TKIP encryption and decryption */ +struct tkip_ctx +{ + /** Temporal key to use */ + struct tkip_tk tk; + + /** State for encryption */ + struct tkip_dir_ctx enc; + + /** State for decryption */ + struct tkip_dir_ctx dec; +}; + +/** Header structure at the beginning of TKIP frame data */ +struct tkip_head +{ + u8 tsc1; /**< High byte of low 16 bits of TSC */ + u8 seed1; /**< Second byte of WEP seed */ + u8 tsc0; /**< Low byte of TSC */ + u8 kid; /**< Key ID and ExtIV byte */ + u32 tsc_hi; /**< High 32 bits of TSC, as an ExtIV */ +} __attribute__ (( packed )); + + +/** TKIP header overhead (IV + KID + ExtIV) */ +#define TKIP_HEAD_LEN 8 + +/** TKIP trailer overhead (MIC + ICV) [assumes unfragmented] */ +#define TKIP_FOOT_LEN 12 + +/** TKIP MIC length */ +#define TKIP_MIC_LEN 8 + +/** TKIP ICV length */ +#define TKIP_ICV_LEN 4 + + +/** TKIP S-box */ +static const u16 Sbox[256] = { + 0xC6A5, 0xF884, 0xEE99, 0xF68D, 0xFF0D, 0xD6BD, 0xDEB1, 0x9154, + 0x6050, 0x0203, 0xCEA9, 0x567D, 0xE719, 0xB562, 0x4DE6, 0xEC9A, + 0x8F45, 0x1F9D, 0x8940, 0xFA87, 0xEF15, 0xB2EB, 0x8EC9, 0xFB0B, + 0x41EC, 0xB367, 0x5FFD, 0x45EA, 0x23BF, 0x53F7, 0xE496, 0x9B5B, + 0x75C2, 0xE11C, 0x3DAE, 0x4C6A, 0x6C5A, 0x7E41, 0xF502, 0x834F, + 0x685C, 0x51F4, 0xD134, 0xF908, 0xE293, 0xAB73, 0x6253, 0x2A3F, + 0x080C, 0x9552, 0x4665, 0x9D5E, 0x3028, 0x37A1, 0x0A0F, 0x2FB5, + 0x0E09, 0x2436, 0x1B9B, 0xDF3D, 0xCD26, 0x4E69, 0x7FCD, 0xEA9F, + 0x121B, 0x1D9E, 0x5874, 0x342E, 0x362D, 0xDCB2, 0xB4EE, 0x5BFB, + 0xA4F6, 0x764D, 0xB761, 0x7DCE, 0x527B, 0xDD3E, 0x5E71, 0x1397, + 0xA6F5, 0xB968, 0x0000, 0xC12C, 0x4060, 0xE31F, 0x79C8, 0xB6ED, + 0xD4BE, 0x8D46, 0x67D9, 0x724B, 0x94DE, 0x98D4, 0xB0E8, 0x854A, + 0xBB6B, 0xC52A, 0x4FE5, 0xED16, 0x86C5, 0x9AD7, 0x6655, 0x1194, + 0x8ACF, 0xE910, 0x0406, 0xFE81, 0xA0F0, 0x7844, 0x25BA, 0x4BE3, + 0xA2F3, 0x5DFE, 0x80C0, 0x058A, 0x3FAD, 0x21BC, 0x7048, 0xF104, + 0x63DF, 0x77C1, 0xAF75, 0x4263, 0x2030, 0xE51A, 0xFD0E, 0xBF6D, + 0x814C, 0x1814, 0x2635, 0xC32F, 0xBEE1, 0x35A2, 0x88CC, 0x2E39, + 0x9357, 0x55F2, 0xFC82, 0x7A47, 0xC8AC, 0xBAE7, 0x322B, 0xE695, + 0xC0A0, 0x1998, 0x9ED1, 0xA37F, 0x4466, 0x547E, 0x3BAB, 0x0B83, + 0x8CCA, 0xC729, 0x6BD3, 0x283C, 0xA779, 0xBCE2, 0x161D, 0xAD76, + 0xDB3B, 0x6456, 0x744E, 0x141E, 0x92DB, 0x0C0A, 0x486C, 0xB8E4, + 0x9F5D, 0xBD6E, 0x43EF, 0xC4A6, 0x39A8, 0x31A4, 0xD337, 0xF28B, + 0xD532, 0x8B43, 0x6E59, 0xDAB7, 0x018C, 0xB164, 0x9CD2, 0x49E0, + 0xD8B4, 0xACFA, 0xF307, 0xCF25, 0xCAAF, 0xF48E, 0x47E9, 0x1018, + 0x6FD5, 0xF088, 0x4A6F, 0x5C72, 0x3824, 0x57F1, 0x73C7, 0x9751, + 0xCB23, 0xA17C, 0xE89C, 0x3E21, 0x96DD, 0x61DC, 0x0D86, 0x0F85, + 0xE090, 0x7C42, 0x71C4, 0xCCAA, 0x90D8, 0x0605, 0xF701, 0x1C12, + 0xC2A3, 0x6A5F, 0xAEF9, 0x69D0, 0x1791, 0x9958, 0x3A27, 0x27B9, + 0xD938, 0xEB13, 0x2BB3, 0x2233, 0xD2BB, 0xA970, 0x0789, 0x33A7, + 0x2DB6, 0x3C22, 0x1592, 0xC920, 0x8749, 0xAAFF, 0x5078, 0xA57A, + 0x038F, 0x59F8, 0x0980, 0x1A17, 0x65DA, 0xD731, 0x84C6, 0xD0B8, + 0x82C3, 0x29B0, 0x5A77, 0x1E11, 0x7BCB, 0xA8FC, 0x6DD6, 0x2C3A, +}; + +/** + * Perform S-box mapping on a 16-bit value + * + * @v v Value to perform S-box mapping on + * @ret Sv S-box mapped value + */ +static inline u16 S ( u16 v ) +{ + return Sbox[v & 0xFF] ^ swap16 ( Sbox[v >> 8] ); +} + +/** + * Rotate 16-bit value right + * + * @v v Value to rotate + * @v bits Number of bits to rotate by + * @ret rotv Rotated value + */ +static inline u16 ror16 ( u16 v, int bits ) +{ + return ( v >> bits ) | ( v << ( 16 - bits ) ); +} + +/** + * Rotate 32-bit value right + * + * @v v Value to rotate + * @v bits Number of bits to rotate by + * @ret rotv Rotated value + */ +static inline u32 ror32 ( u32 v, int bits ) +{ + return ( v >> bits ) | ( v << ( 32 - bits ) ); +} + +/** + * Rotate 32-bit value left + * + * @v v Value to rotate + * @v bits Number of bits to rotate by + * @ret rotv Rotated value + */ +static inline u32 rol32 ( u32 v, int bits ) +{ + return ( v << bits ) | ( v >> ( 32 - bits ) ); +} + + +/** + * Initialise TKIP state and install key + * + * @v crypto TKIP cryptosystem structure + * @v key Pointer to tkip_tk to install + * @v keylen Length of key (32 bytes) + * @v rsc Initial receive sequence counter + */ +static int tkip_init ( struct net80211_crypto *crypto, const void *key, + int keylen, const void *rsc ) +{ + struct tkip_ctx *ctx = crypto->priv; + const u8 *rscb = rsc; + + if ( keylen != sizeof ( ctx->tk ) ) + return -EINVAL; + + if ( rscb ) { + ctx->dec.tsc_lo = ( rscb[1] << 8 ) | rscb[0]; + ctx->dec.tsc_hi = ( ( rscb[5] << 24 ) | ( rscb[4] << 16 ) | + ( rscb[3] << 8 ) | rscb[2] ); + } + + memcpy ( &ctx->tk, key, sizeof ( ctx->tk ) ); + + return 0; +} + +/** + * Perform TKIP key mixing, phase 1 + * + * @v dctx TKIP directional context + * @v tk TKIP temporal key + * @v mac MAC address of transmitter + * + * This recomputes the TTAK in @a dctx if necessary, and sets + * @c dctx->ttak_ok. + */ +static void tkip_mix_1 ( struct tkip_dir_ctx *dctx, struct tkip_tk *tk, u8 *mac ) +{ + int i, j; + + if ( dctx->ttak_ok && ! memcmp ( mac, dctx->mac, ETH_ALEN ) ) + return; + + memcpy ( dctx->mac, mac, ETH_ALEN ); + + dctx->ttak[0] = dctx->tsc_hi & 0xFFFF; + dctx->ttak[1] = dctx->tsc_hi >> 16; + dctx->ttak[2] = ( mac[1] << 8 ) | mac[0]; + dctx->ttak[3] = ( mac[3] << 8 ) | mac[2]; + dctx->ttak[4] = ( mac[5] << 8 ) | mac[4]; + + for ( i = 0; i < 8; i++ ) { + j = 2 * ( i & 1 ); + + dctx->ttak[0] += S ( dctx->ttak[4] ^ ( ( tk->key[1 + j] << 8 ) | + tk->key[0 + j] ) ); + dctx->ttak[1] += S ( dctx->ttak[0] ^ ( ( tk->key[5 + j] << 8 ) | + tk->key[4 + j] ) ); + dctx->ttak[2] += S ( dctx->ttak[1] ^ ( ( tk->key[9 + j] << 8 ) | + tk->key[8 + j] ) ); + dctx->ttak[3] += S ( dctx->ttak[2] ^ ( ( tk->key[13+ j] << 8 ) | + tk->key[12+ j] ) ); + dctx->ttak[4] += S ( dctx->ttak[3] ^ ( ( tk->key[1 + j] << 8 ) | + tk->key[0 + j] ) ) + i; + } + + dctx->ttak_ok = 1; +} + +/** + * Perform TKIP key mixing, phase 2 + * + * @v dctx TKIP directional context + * @v tk TKIP temporal key + * @ret key ARC4 key, 16 bytes long + */ +static void tkip_mix_2 ( struct tkip_dir_ctx *dctx, struct tkip_tk *tk, + void *key ) +{ + u8 *kb = key; + u16 ppk[6]; + int i; + + memcpy ( ppk, dctx->ttak, sizeof ( dctx->ttak ) ); + ppk[5] = dctx->ttak[4] + dctx->tsc_lo; + + ppk[0] += S ( ppk[5] ^ ( ( tk->key[1] << 8 ) | tk->key[0] ) ); + ppk[1] += S ( ppk[0] ^ ( ( tk->key[3] << 8 ) | tk->key[2] ) ); + ppk[2] += S ( ppk[1] ^ ( ( tk->key[5] << 8 ) | tk->key[4] ) ); + ppk[3] += S ( ppk[2] ^ ( ( tk->key[7] << 8 ) | tk->key[6] ) ); + ppk[4] += S ( ppk[3] ^ ( ( tk->key[9] << 8 ) | tk->key[8] ) ); + ppk[5] += S ( ppk[4] ^ ( ( tk->key[11] << 8 ) | tk->key[10] ) ); + + ppk[0] += ror16 ( ppk[5] ^ ( ( tk->key[13] << 8 ) | tk->key[12] ), 1 ); + ppk[1] += ror16 ( ppk[0] ^ ( ( tk->key[15] << 8 ) | tk->key[14] ), 1 ); + ppk[2] += ror16 ( ppk[1], 1 ); + ppk[3] += ror16 ( ppk[2], 1 ); + ppk[4] += ror16 ( ppk[3], 1 ); + ppk[5] += ror16 ( ppk[4], 1 ); + + kb[0] = dctx->tsc_lo >> 8; + kb[1] = ( ( dctx->tsc_lo >> 8 ) | 0x20 ) & 0x7F; + kb[2] = dctx->tsc_lo & 0xFF; + kb[3] = ( ( ppk[5] ^ ( ( tk->key[1] << 8 ) | tk->key[0] ) ) >> 1 ) + & 0xFF; + + for ( i = 0; i < 6; i++ ) { + kb[4 + 2*i] = ppk[i] & 0xFF; + kb[5 + 2*i] = ppk[i] >> 8; + } +} + +/** + * Update Michael message integrity code based on next 32-bit word of data + * + * @v V Michael code state (two 32-bit words) + * @v word Next 32-bit word of data + */ +static void tkip_feed_michael ( u32 *V, u32 word ) +{ + V[0] ^= word; + V[1] ^= rol32 ( V[0], 17 ); + V[0] += V[1]; + V[1] ^= ( ( V[0] & 0xFF00FF00 ) >> 8 ) | ( ( V[0] & 0x00FF00FF ) << 8 ); + V[0] += V[1]; + V[1] ^= rol32 ( V[0], 3 ); + V[0] += V[1]; + V[1] ^= ror32 ( V[0], 2 ); + V[0] += V[1]; +} + +/** + * Calculate Michael message integrity code + * + * @v key MIC key to use (8 bytes) + * @v da Destination link-layer address + * @v sa Source link-layer address + * @v data Start of data to calculate over + * @v len Length of header + data + * @ret mic Calculated Michael MIC (8 bytes) + */ +static void tkip_michael ( const void *key, const void *da, const void *sa, + const void *data, size_t len, void *mic ) +{ + u32 V[2]; /* V[0] = "l", V[1] = "r" in 802.11 */ + union { + u8 byte[12]; + u32 word[3]; + } cap; + const u8 *ptr = data; + const u8 *end = ptr + len; + int i; + + memcpy ( V, key, sizeof ( V ) ); + V[0] = le32_to_cpu ( V[0] ); + V[1] = le32_to_cpu ( V[1] ); + + /* Feed in header (we assume non-QoS, so Priority = 0) */ + memcpy ( &cap.byte[0], da, ETH_ALEN ); + memcpy ( &cap.byte[6], sa, ETH_ALEN ); + tkip_feed_michael ( V, le32_to_cpu ( cap.word[0] ) ); + tkip_feed_michael ( V, le32_to_cpu ( cap.word[1] ) ); + tkip_feed_michael ( V, le32_to_cpu ( cap.word[2] ) ); + tkip_feed_michael ( V, 0 ); + + /* Feed in data */ + while ( ptr + 4 <= end ) { + tkip_feed_michael ( V, le32_to_cpu ( *( u32 * ) ptr ) ); + ptr += 4; + } + + /* Add unaligned part and padding */ + for ( i = 0; ptr < end; i++ ) + cap.byte[i] = *ptr++; + cap.byte[i++] = 0x5a; + for ( ; i < 8; i++ ) + cap.byte[i] = 0; + + /* Feed in padding */ + tkip_feed_michael ( V, le32_to_cpu ( cap.word[0] ) ); + tkip_feed_michael ( V, le32_to_cpu ( cap.word[1] ) ); + + /* Output MIC */ + V[0] = cpu_to_le32 ( V[0] ); + V[1] = cpu_to_le32 ( V[1] ); + memcpy ( mic, V, sizeof ( V ) ); +} + +/** + * Encrypt a packet using TKIP + * + * @v crypto TKIP cryptosystem + * @v iob I/O buffer containing cleartext packet + * @ret eiob I/O buffer containing encrypted packet + */ +static struct io_buffer * tkip_encrypt ( struct net80211_crypto *crypto, + struct io_buffer *iob ) +{ + struct tkip_ctx *ctx = crypto->priv; + struct ieee80211_frame *hdr = iob->data; + struct io_buffer *eiob; + struct arc4_ctx arc4; + u8 key[16]; + struct tkip_head head; + u8 mic[8]; + u32 icv; + const int hdrlen = IEEE80211_TYP_FRAME_HEADER_LEN; + int datalen = iob_len ( iob ) - hdrlen; + + ctx->enc.tsc_lo++; + if ( ctx->enc.tsc_lo == 0 ) { + ctx->enc.tsc_hi++; + ctx->enc.ttak_ok = 0; + } + + tkip_mix_1 ( &ctx->enc, &ctx->tk, hdr->addr2 ); + tkip_mix_2 ( &ctx->enc, &ctx->tk, key ); + + eiob = alloc_iob ( iob_len ( iob ) + TKIP_HEAD_LEN + TKIP_FOOT_LEN ); + if ( ! eiob ) + return NULL; + + /* Copy frame header */ + memcpy ( iob_put ( eiob, hdrlen ), iob->data, hdrlen ); + hdr = eiob->data; + hdr->fc |= IEEE80211_FC_PROTECTED; + + /* Fill in IV and key ID byte, and extended IV */ + memcpy ( &head, key, 3 ); + head.kid = 0x20; /* have Extended IV, key ID 0 */ + head.tsc_hi = cpu_to_le32 ( ctx->enc.tsc_hi ); + memcpy ( iob_put ( eiob, sizeof ( head ) ), &head, sizeof ( head ) ); + + /* Copy and encrypt the data */ + cipher_setkey ( &arc4_algorithm, &arc4, key, 16 ); + cipher_encrypt ( &arc4_algorithm, &arc4, iob->data + hdrlen, + iob_put ( eiob, datalen ), datalen ); + + /* Add MIC */ + hdr = iob->data; + tkip_michael ( &ctx->tk.mic.tx, hdr->addr3, hdr->addr2, + iob->data + hdrlen, datalen, mic ); + cipher_encrypt ( &arc4_algorithm, &arc4, mic, + iob_put ( eiob, sizeof ( mic ) ), sizeof ( mic ) ); + + /* Add ICV */ + icv = crc32_le ( ~0, iob->data + hdrlen, datalen ); + icv = crc32_le ( icv, mic, sizeof ( mic ) ); + icv = cpu_to_le32 ( ~icv ); + cipher_encrypt ( &arc4_algorithm, &arc4, &icv, + iob_put ( eiob, TKIP_ICV_LEN ), TKIP_ICV_LEN ); + + DBGC2 ( ctx, "WPA-TKIP %p: encrypted packet %p -> %p\n", ctx, + iob, eiob ); + + return eiob; +} + +/** + * Decrypt a packet using TKIP + * + * @v crypto TKIP cryptosystem + * @v eiob I/O buffer containing encrypted packet + * @ret iob I/O buffer containing cleartext packet + */ +static struct io_buffer * tkip_decrypt ( struct net80211_crypto *crypto, + struct io_buffer *eiob ) +{ + struct tkip_ctx *ctx = crypto->priv; + struct ieee80211_frame *hdr; + struct io_buffer *iob; + const int hdrlen = IEEE80211_TYP_FRAME_HEADER_LEN; + int datalen = iob_len ( eiob ) - hdrlen - TKIP_HEAD_LEN - TKIP_FOOT_LEN; + struct tkip_head *head; + struct arc4_ctx arc4; + u16 rx_tsc_lo; + u8 key[16]; + u8 mic[8]; + u32 icv, crc; + + iob = alloc_iob ( hdrlen + datalen + TKIP_FOOT_LEN ); + if ( ! iob ) + return NULL; + + /* Copy frame header */ + memcpy ( iob_put ( iob, hdrlen ), eiob->data, hdrlen ); + hdr = iob->data; + hdr->fc &= ~IEEE80211_FC_PROTECTED; + + /* Check and update TSC */ + head = eiob->data + hdrlen; + rx_tsc_lo = ( head->tsc1 << 8 ) | head->tsc0; + + if ( head->tsc_hi < ctx->dec.tsc_hi || + ( head->tsc_hi == ctx->dec.tsc_hi && + rx_tsc_lo <= ctx->dec.tsc_lo ) ) { + DBGC ( ctx, "WPA-TKIP %p: packet received out of order " + "(%08x:%04x <= %08x:%04x)\n", ctx, head->tsc_hi, + rx_tsc_lo, ctx->dec.tsc_hi, ctx->dec.tsc_lo ); + free_iob ( iob ); + return NULL; + } + ctx->dec.tsc_lo = rx_tsc_lo; + if ( ctx->dec.tsc_hi != head->tsc_hi ) { + ctx->dec.ttak_ok = 0; + ctx->dec.tsc_hi = head->tsc_hi; + } + + /* Calculate key */ + tkip_mix_1 ( &ctx->dec, &ctx->tk, hdr->addr2 ); + tkip_mix_2 ( &ctx->dec, &ctx->tk, key ); + + /* Copy-decrypt data, MIC, ICV */ + cipher_setkey ( &arc4_algorithm, &arc4, key, 16 ); + cipher_decrypt ( &arc4_algorithm, &arc4, + eiob->data + hdrlen + TKIP_HEAD_LEN, + iob_put ( iob, datalen ), datalen + TKIP_FOOT_LEN ); + + /* Check ICV */ + icv = le32_to_cpu ( *( u32 * ) ( iob->tail + TKIP_MIC_LEN ) ); + crc = ~crc32_le ( ~0, iob->data + hdrlen, datalen + TKIP_MIC_LEN ); + if ( crc != icv ) { + DBGC ( ctx, "WPA-TKIP %p CRC mismatch: expect %08x, get %08x\n", + ctx, icv, crc ); + free_iob ( iob ); + return NULL; + } + + /* Check MIC */ + tkip_michael ( &ctx->tk.mic.rx, hdr->addr1, hdr->addr3, + iob->data + hdrlen, datalen, mic ); + if ( memcmp ( mic, iob->tail, TKIP_MIC_LEN ) != 0 ) { + DBGC ( ctx, "WPA-TKIP %p ALERT! MIC failure\n", ctx ); + /* XXX we should do the countermeasures here */ + free_iob ( iob ); + return NULL; + } + + DBGC2 ( ctx, "WPA-TKIP %p: decrypted packet %p -> %p\n", ctx, + eiob, iob ); + + return iob; +} + +/** TKIP cryptosystem */ +struct net80211_crypto tkip_crypto __net80211_crypto = { + .algorithm = NET80211_CRYPT_TKIP, + .init = tkip_init, + .encrypt = tkip_encrypt, + .decrypt = tkip_decrypt, + .priv_len = sizeof ( struct tkip_ctx ), +}; + + + + +/** + * Calculate HMAC-MD5 MIC for EAPOL-Key frame + * + * @v kck Key Confirmation Key, 16 bytes + * @v msg Message to calculate MIC over + * @v len Number of bytes to calculate MIC over + * @ret mic Calculated MIC, 16 bytes long + */ +static void tkip_kie_mic ( const void *kck, const void *msg, size_t len, + void *mic ) +{ + uint8_t ctx[MD5_CTX_SIZE]; + u8 kckb[16]; + size_t kck_len = 16; + + memcpy ( kckb, kck, kck_len ); + + hmac_init ( &md5_algorithm, ctx, kckb, &kck_len ); + hmac_update ( &md5_algorithm, ctx, msg, len ); + hmac_final ( &md5_algorithm, ctx, kckb, &kck_len, mic ); +} + +/** + * Decrypt key data in EAPOL-Key frame + * + * @v kek Key Encryption Key, 16 bytes + * @v iv Initialisation vector, 16 bytes + * @v msg Message to decrypt + * @v len Length of message + * @ret msg Decrypted message in place of original + * @ret len Unchanged + * @ret rc Always 0 for success + */ +static int tkip_kie_decrypt ( const void *kek, const void *iv, + void *msg, u16 *len ) +{ + u8 key[32]; + memcpy ( key, iv, 16 ); + memcpy ( key + 16, kek, 16 ); + + arc4_skip ( key, 32, 256, msg, msg, *len ); + + return 0; +} + + +/** TKIP-style key integrity and encryption handler */ +struct wpa_kie tkip_kie __wpa_kie = { + .version = EAPOL_KEY_VERSION_WPA, + .mic = tkip_kie_mic, + .decrypt = tkip_kie_decrypt, +}; diff --git a/qemu/roms/ipxe/src/net/aoe.c b/qemu/roms/ipxe/src/net/aoe.c new file mode 100644 index 000000000..a6d7b3e7b --- /dev/null +++ b/qemu/roms/ipxe/src/net/aoe.c @@ -0,0 +1,1057 @@ +/* + * Copyright (C) 2006 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stddef.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <assert.h> +#include <byteswap.h> +#include <ipxe/list.h> +#include <ipxe/if_ether.h> +#include <ipxe/iobuf.h> +#include <ipxe/uaccess.h> +#include <ipxe/netdevice.h> +#include <ipxe/features.h> +#include <ipxe/interface.h> +#include <ipxe/xfer.h> +#include <ipxe/uri.h> +#include <ipxe/open.h> +#include <ipxe/ata.h> +#include <ipxe/device.h> +#include <ipxe/aoe.h> + +/** @file + * + * AoE protocol + * + */ + +FEATURE ( FEATURE_PROTOCOL, "AoE", DHCP_EB_FEATURE_AOE, 1 ); + +struct net_protocol aoe_protocol __net_protocol; + +/****************************************************************************** + * + * AoE devices and commands + * + ****************************************************************************** + */ + +/** List of all AoE devices */ +static LIST_HEAD ( aoe_devices ); + +/** List of active AoE commands */ +static LIST_HEAD ( aoe_commands ); + +/** An AoE device */ +struct aoe_device { + /** Reference counter */ + struct refcnt refcnt; + + /** Network device */ + struct net_device *netdev; + /** ATA command issuing interface */ + struct interface ata; + + /** Major number */ + uint16_t major; + /** Minor number */ + uint8_t minor; + /** Target MAC address */ + uint8_t target[MAX_LL_ADDR_LEN]; + + /** Saved timeout value */ + unsigned long timeout; + + /** Configuration command interface */ + struct interface config; + /** Device is configued */ + int configured; +}; + +/** An AoE command */ +struct aoe_command { + /** Reference count */ + struct refcnt refcnt; + /** AOE device */ + struct aoe_device *aoedev; + /** List of active commands */ + struct list_head list; + + /** ATA command interface */ + struct interface ata; + + /** ATA command */ + struct ata_cmd command; + /** Command type */ + struct aoe_command_type *type; + /** Command tag */ + uint32_t tag; + + /** Retransmission timer */ + struct retry_timer timer; +}; + +/** An AoE command type */ +struct aoe_command_type { + /** + * Calculate length of AoE command IU + * + * @v aoecmd AoE command + * @ret len Length of command IU + */ + size_t ( * cmd_len ) ( struct aoe_command *aoecmd ); + /** + * Build AoE command IU + * + * @v aoecmd AoE command + * @v data Command IU + * @v len Length of command IU + */ + void ( * cmd ) ( struct aoe_command *aoecmd, void *data, size_t len ); + /** + * Handle AoE response IU + * + * @v aoecmd AoE command + * @v data Response IU + * @v len Length of response IU + * @v ll_source Link-layer source address + * @ret rc Return status code + */ + int ( * rsp ) ( struct aoe_command *aoecmd, const void *data, + size_t len, const void *ll_source ); +}; + +/** + * Get reference to AoE device + * + * @v aoedev AoE device + * @ret aoedev AoE device + */ +static inline __attribute__ (( always_inline )) struct aoe_device * +aoedev_get ( struct aoe_device *aoedev ) { + ref_get ( &aoedev->refcnt ); + return aoedev; +} + +/** + * Drop reference to AoE device + * + * @v aoedev AoE device + */ +static inline __attribute__ (( always_inline )) void +aoedev_put ( struct aoe_device *aoedev ) { + ref_put ( &aoedev->refcnt ); +} + +/** + * Get reference to AoE command + * + * @v aoecmd AoE command + * @ret aoecmd AoE command + */ +static inline __attribute__ (( always_inline )) struct aoe_command * +aoecmd_get ( struct aoe_command *aoecmd ) { + ref_get ( &aoecmd->refcnt ); + return aoecmd; +} + +/** + * Drop reference to AoE command + * + * @v aoecmd AoE command + */ +static inline __attribute__ (( always_inline )) void +aoecmd_put ( struct aoe_command *aoecmd ) { + ref_put ( &aoecmd->refcnt ); +} + +/** + * Name AoE device + * + * @v aoedev AoE device + * @ret name AoE device name + */ +static const char * aoedev_name ( struct aoe_device *aoedev ) { + static char buf[16]; + + snprintf ( buf, sizeof ( buf ), "%s/e%d.%d", aoedev->netdev->name, + aoedev->major, aoedev->minor ); + return buf; +} + +/** + * Free AoE command + * + * @v refcnt Reference counter + */ +static void aoecmd_free ( struct refcnt *refcnt ) { + struct aoe_command *aoecmd = + container_of ( refcnt, struct aoe_command, refcnt ); + + assert ( ! timer_running ( &aoecmd->timer ) ); + assert ( list_empty ( &aoecmd->list ) ); + + aoedev_put ( aoecmd->aoedev ); + free ( aoecmd ); +} + +/** + * Close AoE command + * + * @v aoecmd AoE command + * @v rc Reason for close + */ +static void aoecmd_close ( struct aoe_command *aoecmd, int rc ) { + struct aoe_device *aoedev = aoecmd->aoedev; + + /* Stop timer */ + stop_timer ( &aoecmd->timer ); + + /* Preserve the timeout value for subsequent commands */ + aoedev->timeout = aoecmd->timer.timeout; + + /* Remove from list of commands */ + if ( ! list_empty ( &aoecmd->list ) ) { + list_del ( &aoecmd->list ); + INIT_LIST_HEAD ( &aoecmd->list ); + aoecmd_put ( aoecmd ); + } + + /* Shut down interfaces */ + intf_shutdown ( &aoecmd->ata, rc ); +} + +/** + * Transmit AoE command request + * + * @v aoecmd AoE command + * @ret rc Return status code + */ +static int aoecmd_tx ( struct aoe_command *aoecmd ) { + struct aoe_device *aoedev = aoecmd->aoedev; + struct net_device *netdev = aoedev->netdev; + struct io_buffer *iobuf; + struct aoehdr *aoehdr; + size_t cmd_len; + int rc; + + /* Sanity check */ + assert ( netdev != NULL ); + + /* If we are transmitting anything that requires a response, + * start the retransmission timer. Do this before attempting + * to allocate the I/O buffer, in case allocation itself + * fails. + */ + start_timer ( &aoecmd->timer ); + + /* Create outgoing I/O buffer */ + cmd_len = aoecmd->type->cmd_len ( aoecmd ); + iobuf = alloc_iob ( MAX_LL_HEADER_LEN + cmd_len ); + if ( ! iobuf ) + return -ENOMEM; + iob_reserve ( iobuf, MAX_LL_HEADER_LEN ); + aoehdr = iob_put ( iobuf, cmd_len ); + + /* Fill AoE header */ + memset ( aoehdr, 0, sizeof ( *aoehdr ) ); + aoehdr->ver_flags = AOE_VERSION; + aoehdr->major = htons ( aoedev->major ); + aoehdr->minor = aoedev->minor; + aoehdr->tag = htonl ( aoecmd->tag ); + aoecmd->type->cmd ( aoecmd, iobuf->data, iob_len ( iobuf ) ); + + /* Send packet */ + if ( ( rc = net_tx ( iobuf, netdev, &aoe_protocol, aoedev->target, + netdev->ll_addr ) ) != 0 ) { + DBGC ( aoedev, "AoE %s/%08x could not transmit: %s\n", + aoedev_name ( aoedev ), aoecmd->tag, + strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Receive AoE command response + * + * @v aoecmd AoE command + * @v iobuf I/O buffer + * @v ll_source Link-layer source address + * @ret rc Return status code + */ +static int aoecmd_rx ( struct aoe_command *aoecmd, struct io_buffer *iobuf, + const void *ll_source ) { + struct aoe_device *aoedev = aoecmd->aoedev; + struct aoehdr *aoehdr = iobuf->data; + int rc; + + /* Sanity check */ + if ( iob_len ( iobuf ) < sizeof ( *aoehdr ) ) { + DBGC ( aoedev, "AoE %s/%08x received underlength response " + "(%zd bytes)\n", aoedev_name ( aoedev ), + aoecmd->tag, iob_len ( iobuf ) ); + rc = -EINVAL; + goto done; + } + if ( ( ntohs ( aoehdr->major ) != aoedev->major ) || + ( aoehdr->minor != aoedev->minor ) ) { + DBGC ( aoedev, "AoE %s/%08x received response for incorrect " + "device e%d.%d\n", aoedev_name ( aoedev ), aoecmd->tag, + ntohs ( aoehdr->major ), aoehdr->minor ); + rc = -EINVAL; + goto done; + } + + /* Catch command failures */ + if ( aoehdr->ver_flags & AOE_FL_ERROR ) { + DBGC ( aoedev, "AoE %s/%08x terminated in error\n", + aoedev_name ( aoedev ), aoecmd->tag ); + aoecmd_close ( aoecmd, -EIO ); + rc = -EIO; + goto done; + } + + /* Hand off to command completion handler */ + if ( ( rc = aoecmd->type->rsp ( aoecmd, iobuf->data, iob_len ( iobuf ), + ll_source ) ) != 0 ) + goto done; + + done: + /* Free I/O buffer */ + free_iob ( iobuf ); + + /* Terminate command */ + aoecmd_close ( aoecmd, rc ); + + return rc; +} + +/** + * Handle AoE retry timer expiry + * + * @v timer AoE retry timer + * @v fail Failure indicator + */ +static void aoecmd_expired ( struct retry_timer *timer, int fail ) { + struct aoe_command *aoecmd = + container_of ( timer, struct aoe_command, timer ); + + if ( fail ) { + aoecmd_close ( aoecmd, -ETIMEDOUT ); + } else { + aoecmd_tx ( aoecmd ); + } +} + +/** + * Calculate length of AoE ATA command IU + * + * @v aoecmd AoE command + * @ret len Length of command IU + */ +static size_t aoecmd_ata_cmd_len ( struct aoe_command *aoecmd ) { + struct ata_cmd *command = &aoecmd->command; + + return ( sizeof ( struct aoehdr ) + sizeof ( struct aoeata ) + + command->data_out_len ); +} + +/** + * Build AoE ATA command IU + * + * @v aoecmd AoE command + * @v data Command IU + * @v len Length of command IU + */ +static void aoecmd_ata_cmd ( struct aoe_command *aoecmd, + void *data, size_t len ) { + struct aoe_device *aoedev = aoecmd->aoedev; + struct ata_cmd *command = &aoecmd->command; + struct aoehdr *aoehdr = data; + struct aoeata *aoeata = &aoehdr->payload[0].ata; + + /* Sanity check */ + linker_assert ( AOE_FL_DEV_HEAD == ATA_DEV_SLAVE, __fix_ata_h__ ); + assert ( len == ( sizeof ( *aoehdr ) + sizeof ( *aoeata ) + + command->data_out_len ) ); + + /* Build IU */ + aoehdr->command = AOE_CMD_ATA; + memset ( aoeata, 0, sizeof ( *aoeata ) ); + aoeata->aflags = ( ( command->cb.lba48 ? AOE_FL_EXTENDED : 0 ) | + ( command->cb.device & ATA_DEV_SLAVE ) | + ( command->data_out_len ? AOE_FL_WRITE : 0 ) ); + aoeata->err_feat = command->cb.err_feat.bytes.cur; + aoeata->count = command->cb.count.native; + aoeata->cmd_stat = command->cb.cmd_stat; + aoeata->lba.u64 = cpu_to_le64 ( command->cb.lba.native ); + if ( ! command->cb.lba48 ) + aoeata->lba.bytes[3] |= + ( command->cb.device & ATA_DEV_MASK ); + copy_from_user ( aoeata->data, command->data_out, 0, + command->data_out_len ); + + DBGC2 ( aoedev, "AoE %s/%08x ATA cmd %02x:%02x:%02x:%02x:%08llx", + aoedev_name ( aoedev ), aoecmd->tag, aoeata->aflags, + aoeata->err_feat, aoeata->count, aoeata->cmd_stat, + aoeata->lba.u64 ); + if ( command->data_out_len ) + DBGC2 ( aoedev, " out %04zx", command->data_out_len ); + if ( command->data_in_len ) + DBGC2 ( aoedev, " in %04zx", command->data_in_len ); + DBGC2 ( aoedev, "\n" ); +} + +/** + * Handle AoE ATA response IU + * + * @v aoecmd AoE command + * @v data Response IU + * @v len Length of response IU + * @v ll_source Link-layer source address + * @ret rc Return status code + */ +static int aoecmd_ata_rsp ( struct aoe_command *aoecmd, const void *data, + size_t len, const void *ll_source __unused ) { + struct aoe_device *aoedev = aoecmd->aoedev; + struct ata_cmd *command = &aoecmd->command; + const struct aoehdr *aoehdr = data; + const struct aoeata *aoeata = &aoehdr->payload[0].ata; + size_t data_len; + + /* Sanity check */ + if ( len < ( sizeof ( *aoehdr ) + sizeof ( *aoeata ) ) ) { + DBGC ( aoedev, "AoE %s/%08x received underlength ATA response " + "(%zd bytes)\n", aoedev_name ( aoedev ), + aoecmd->tag, len ); + return -EINVAL; + } + data_len = ( len - ( sizeof ( *aoehdr ) + sizeof ( *aoeata ) ) ); + DBGC2 ( aoedev, "AoE %s/%08x ATA rsp %02x in %04zx\n", + aoedev_name ( aoedev ), aoecmd->tag, aoeata->cmd_stat, + data_len ); + + /* Check for command failure */ + if ( aoeata->cmd_stat & ATA_STAT_ERR ) { + DBGC ( aoedev, "AoE %s/%08x status %02x\n", + aoedev_name ( aoedev ), aoecmd->tag, aoeata->cmd_stat ); + return -EIO; + } + + /* Check data-in length is sufficient. (There may be trailing + * garbage due to Ethernet minimum-frame-size padding.) + */ + if ( data_len < command->data_in_len ) { + DBGC ( aoedev, "AoE %s/%08x data-in underrun (received %zd, " + "expected %zd)\n", aoedev_name ( aoedev ), aoecmd->tag, + data_len, command->data_in_len ); + return -ERANGE; + } + + /* Copy out data payload */ + copy_to_user ( command->data_in, 0, aoeata->data, + command->data_in_len ); + + return 0; +} + +/** AoE ATA command */ +static struct aoe_command_type aoecmd_ata = { + .cmd_len = aoecmd_ata_cmd_len, + .cmd = aoecmd_ata_cmd, + .rsp = aoecmd_ata_rsp, +}; + +/** + * Calculate length of AoE configuration command IU + * + * @v aoecmd AoE command + * @ret len Length of command IU + */ +static size_t aoecmd_cfg_cmd_len ( struct aoe_command *aoecmd __unused ) { + return ( sizeof ( struct aoehdr ) + sizeof ( struct aoecfg ) ); +} + +/** + * Build AoE configuration command IU + * + * @v aoecmd AoE command + * @v data Command IU + * @v len Length of command IU + */ +static void aoecmd_cfg_cmd ( struct aoe_command *aoecmd, + void *data, size_t len ) { + struct aoe_device *aoedev = aoecmd->aoedev; + struct aoehdr *aoehdr = data; + struct aoecfg *aoecfg = &aoehdr->payload[0].cfg; + + /* Sanity check */ + assert ( len == ( sizeof ( *aoehdr ) + sizeof ( *aoecfg ) ) ); + + /* Build IU */ + aoehdr->command = AOE_CMD_CONFIG; + memset ( aoecfg, 0, sizeof ( *aoecfg ) ); + + DBGC ( aoedev, "AoE %s/%08x CONFIG cmd\n", + aoedev_name ( aoedev ), aoecmd->tag ); +} + +/** + * Handle AoE configuration response IU + * + * @v aoecmd AoE command + * @v data Response IU + * @v len Length of response IU + * @v ll_source Link-layer source address + * @ret rc Return status code + */ +static int aoecmd_cfg_rsp ( struct aoe_command *aoecmd, const void *data, + size_t len, const void *ll_source ) { + struct aoe_device *aoedev = aoecmd->aoedev; + struct ll_protocol *ll_protocol = aoedev->netdev->ll_protocol; + const struct aoehdr *aoehdr = data; + const struct aoecfg *aoecfg = &aoehdr->payload[0].cfg; + + /* Sanity check */ + if ( len < ( sizeof ( *aoehdr ) + sizeof ( *aoecfg ) ) ) { + DBGC ( aoedev, "AoE %s/%08x received underlength " + "configuration response (%zd bytes)\n", + aoedev_name ( aoedev ), aoecmd->tag, len ); + return -EINVAL; + } + DBGC ( aoedev, "AoE %s/%08x CONFIG rsp buf %04x fw %04x scnt %02x\n", + aoedev_name ( aoedev ), aoecmd->tag, ntohs ( aoecfg->bufcnt ), + aoecfg->fwver, aoecfg->scnt ); + + /* Record target MAC address */ + memcpy ( aoedev->target, ll_source, ll_protocol->ll_addr_len ); + DBGC ( aoedev, "AoE %s has MAC address %s\n", + aoedev_name ( aoedev ), ll_protocol->ntoa ( aoedev->target ) ); + + return 0; +} + +/** AoE configuration command */ +static struct aoe_command_type aoecmd_cfg = { + .cmd_len = aoecmd_cfg_cmd_len, + .cmd = aoecmd_cfg_cmd, + .rsp = aoecmd_cfg_rsp, +}; + +/** AoE command ATA interface operations */ +static struct interface_operation aoecmd_ata_op[] = { + INTF_OP ( intf_close, struct aoe_command *, aoecmd_close ), +}; + +/** AoE command ATA interface descriptor */ +static struct interface_descriptor aoecmd_ata_desc = + INTF_DESC ( struct aoe_command, ata, aoecmd_ata_op ); + +/** + * Identify AoE command by tag + * + * @v tag Command tag + * @ret aoecmd AoE command, or NULL + */ +static struct aoe_command * aoecmd_find_tag ( uint32_t tag ) { + struct aoe_command *aoecmd; + + list_for_each_entry ( aoecmd, &aoe_commands, list ) { + if ( aoecmd->tag == tag ) + return aoecmd; + } + return NULL; +} + +/** + * Choose an AoE command tag + * + * @ret tag New tag, or negative error + */ +static int aoecmd_new_tag ( void ) { + static uint16_t tag_idx; + unsigned int i; + + for ( i = 0 ; i < 65536 ; i++ ) { + tag_idx++; + if ( aoecmd_find_tag ( tag_idx ) == NULL ) + return ( AOE_TAG_MAGIC | tag_idx ); + } + return -EADDRINUSE; +} + +/** + * Create AoE command + * + * @v aoedev AoE device + * @v type AoE command type + * @ret aoecmd AoE command + */ +static struct aoe_command * aoecmd_create ( struct aoe_device *aoedev, + struct aoe_command_type *type ) { + struct aoe_command *aoecmd; + int tag; + + /* Allocate command tag */ + tag = aoecmd_new_tag(); + if ( tag < 0 ) + return NULL; + + /* Allocate and initialise structure */ + aoecmd = zalloc ( sizeof ( *aoecmd ) ); + if ( ! aoecmd ) + return NULL; + ref_init ( &aoecmd->refcnt, aoecmd_free ); + list_add ( &aoecmd->list, &aoe_commands ); + intf_init ( &aoecmd->ata, &aoecmd_ata_desc, &aoecmd->refcnt ); + timer_init ( &aoecmd->timer, aoecmd_expired, &aoecmd->refcnt ); + aoecmd->aoedev = aoedev_get ( aoedev ); + aoecmd->type = type; + aoecmd->tag = tag; + + /* Preserve timeout from last completed command */ + aoecmd->timer.timeout = aoedev->timeout; + + /* Return already mortalised. (Reference is held by command list.) */ + return aoecmd; +} + +/** + * Issue AoE ATA command + * + * @v aoedev AoE device + * @v parent Parent interface + * @v command ATA command + * @ret tag Command tag, or negative error + */ +static int aoedev_ata_command ( struct aoe_device *aoedev, + struct interface *parent, + struct ata_cmd *command ) { + struct net_device *netdev = aoedev->netdev; + struct aoe_command *aoecmd; + + /* Fail immediately if net device is closed */ + if ( ! netdev_is_open ( netdev ) ) { + DBGC ( aoedev, "AoE %s cannot issue command while net device " + "is closed\n", aoedev_name ( aoedev ) ); + return -EWOULDBLOCK; + } + + /* Create command */ + aoecmd = aoecmd_create ( aoedev, &aoecmd_ata ); + if ( ! aoecmd ) + return -ENOMEM; + memcpy ( &aoecmd->command, command, sizeof ( aoecmd->command ) ); + + /* Attempt to send command. Allow failures to be handled by + * the retry timer. + */ + aoecmd_tx ( aoecmd ); + + /* Attach to parent interface, leave reference with command + * list, and return. + */ + intf_plug_plug ( &aoecmd->ata, parent ); + return aoecmd->tag; +} + +/** + * Issue AoE configuration command + * + * @v aoedev AoE device + * @v parent Parent interface + * @ret tag Command tag, or negative error + */ +static int aoedev_cfg_command ( struct aoe_device *aoedev, + struct interface *parent ) { + struct aoe_command *aoecmd; + + /* Create command */ + aoecmd = aoecmd_create ( aoedev, &aoecmd_cfg ); + if ( ! aoecmd ) + return -ENOMEM; + + /* Attempt to send command. Allow failures to be handled by + * the retry timer. + */ + aoecmd_tx ( aoecmd ); + + /* Attach to parent interface, leave reference with command + * list, and return. + */ + intf_plug_plug ( &aoecmd->ata, parent ); + return aoecmd->tag; +} + +/** + * Free AoE device + * + * @v refcnt Reference count + */ +static void aoedev_free ( struct refcnt *refcnt ) { + struct aoe_device *aoedev = + container_of ( refcnt, struct aoe_device, refcnt ); + + netdev_put ( aoedev->netdev ); + free ( aoedev ); +} + +/** + * Close AoE device + * + * @v aoedev AoE device + * @v rc Reason for close + */ +static void aoedev_close ( struct aoe_device *aoedev, int rc ) { + struct aoe_command *aoecmd; + struct aoe_command *tmp; + + /* Shut down interfaces */ + intf_shutdown ( &aoedev->ata, rc ); + intf_shutdown ( &aoedev->config, rc ); + + /* Shut down any active commands */ + list_for_each_entry_safe ( aoecmd, tmp, &aoe_commands, list ) { + if ( aoecmd->aoedev != aoedev ) + continue; + aoecmd_get ( aoecmd ); + aoecmd_close ( aoecmd, rc ); + aoecmd_put ( aoecmd ); + } +} + +/** + * Check AoE device flow-control window + * + * @v aoedev AoE device + * @ret len Length of window + */ +static size_t aoedev_window ( struct aoe_device *aoedev ) { + return ( aoedev->configured ? ~( ( size_t ) 0 ) : 0 ); +} + +/** + * Handle AoE device configuration completion + * + * @v aoedev AoE device + * @v rc Reason for completion + */ +static void aoedev_config_done ( struct aoe_device *aoedev, int rc ) { + + /* Shut down interface */ + intf_shutdown ( &aoedev->config, rc ); + + /* Close device on failure */ + if ( rc != 0 ) { + aoedev_close ( aoedev, rc ); + return; + } + + /* Mark device as configured */ + aoedev->configured = 1; + xfer_window_changed ( &aoedev->ata ); +} + +/** + * Identify device underlying AoE device + * + * @v aoedev AoE device + * @ret device Underlying device + */ +static struct device * aoedev_identify_device ( struct aoe_device *aoedev ) { + return aoedev->netdev->dev; +} + +/** + * Describe AoE device in an ACPI table + * + * @v aoedev AoE device + * @v acpi ACPI table + * @v len Length of ACPI table + * @ret rc Return status code + */ +static int aoedev_describe ( struct aoe_device *aoedev, + struct acpi_description_header *acpi, + size_t len ) { + struct abft_table *abft = + container_of ( acpi, struct abft_table, acpi ); + + /* Sanity check */ + if ( len < sizeof ( *abft ) ) + return -ENOBUFS; + + /* Populate table */ + abft->acpi.signature = cpu_to_le32 ( ABFT_SIG ); + abft->acpi.length = cpu_to_le32 ( sizeof ( *abft ) ); + abft->acpi.revision = 1; + abft->shelf = cpu_to_le16 ( aoedev->major ); + abft->slot = aoedev->minor; + memcpy ( abft->mac, aoedev->netdev->ll_addr, sizeof ( abft->mac ) ); + + return 0; +} + +/** AoE device ATA interface operations */ +static struct interface_operation aoedev_ata_op[] = { + INTF_OP ( ata_command, struct aoe_device *, aoedev_ata_command ), + INTF_OP ( xfer_window, struct aoe_device *, aoedev_window ), + INTF_OP ( intf_close, struct aoe_device *, aoedev_close ), + INTF_OP ( acpi_describe, struct aoe_device *, aoedev_describe ), + INTF_OP ( identify_device, struct aoe_device *, + aoedev_identify_device ), +}; + +/** AoE device ATA interface descriptor */ +static struct interface_descriptor aoedev_ata_desc = + INTF_DESC ( struct aoe_device, ata, aoedev_ata_op ); + +/** AoE device configuration interface operations */ +static struct interface_operation aoedev_config_op[] = { + INTF_OP ( intf_close, struct aoe_device *, aoedev_config_done ), +}; + +/** AoE device configuration interface descriptor */ +static struct interface_descriptor aoedev_config_desc = + INTF_DESC ( struct aoe_device, config, aoedev_config_op ); + +/** + * Open AoE device + * + * @v parent Parent interface + * @v netdev Network device + * @v major Device major number + * @v minor Device minor number + * @ret rc Return status code + */ +static int aoedev_open ( struct interface *parent, struct net_device *netdev, + unsigned int major, unsigned int minor ) { + struct aoe_device *aoedev; + int rc; + + /* Allocate and initialise structure */ + aoedev = zalloc ( sizeof ( *aoedev ) ); + if ( ! aoedev ) { + rc = -ENOMEM; + goto err_zalloc; + } + ref_init ( &aoedev->refcnt, aoedev_free ); + intf_init ( &aoedev->ata, &aoedev_ata_desc, &aoedev->refcnt ); + intf_init ( &aoedev->config, &aoedev_config_desc, &aoedev->refcnt ); + aoedev->netdev = netdev_get ( netdev ); + aoedev->major = major; + aoedev->minor = minor; + memcpy ( aoedev->target, netdev->ll_broadcast, + netdev->ll_protocol->ll_addr_len ); + + /* Initiate configuration */ + if ( ( rc = aoedev_cfg_command ( aoedev, &aoedev->config ) ) < 0 ) { + DBGC ( aoedev, "AoE %s could not initiate configuration: %s\n", + aoedev_name ( aoedev ), strerror ( rc ) ); + goto err_config; + } + + /* Attach ATA device to parent interface */ + if ( ( rc = ata_open ( parent, &aoedev->ata, ATA_DEV_MASTER, + AOE_MAX_COUNT ) ) != 0 ) { + DBGC ( aoedev, "AoE %s could not create ATA device: %s\n", + aoedev_name ( aoedev ), strerror ( rc ) ); + goto err_ata_open; + } + + /* Mortalise self and return */ + ref_put ( &aoedev->refcnt ); + return 0; + + err_ata_open: + err_config: + aoedev_close ( aoedev, rc ); + ref_put ( &aoedev->refcnt ); + err_zalloc: + return rc; +} + +/****************************************************************************** + * + * AoE network protocol + * + ****************************************************************************** + */ + +/** + * Process incoming AoE packets + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v ll_dest Link-layer destination address + * @v ll_source Link-layer source address + * @v flags Packet flags + * @ret rc Return status code + */ +static int aoe_rx ( struct io_buffer *iobuf, + struct net_device *netdev __unused, + const void *ll_dest __unused, + const void *ll_source, + unsigned int flags __unused ) { + struct aoehdr *aoehdr = iobuf->data; + struct aoe_command *aoecmd; + int rc; + + /* Sanity check */ + if ( iob_len ( iobuf ) < sizeof ( *aoehdr ) ) { + DBG ( "AoE received underlength packet (%zd bytes)\n", + iob_len ( iobuf ) ); + rc = -EINVAL; + goto err_sanity; + } + if ( ( aoehdr->ver_flags & AOE_VERSION_MASK ) != AOE_VERSION ) { + DBG ( "AoE received packet for unsupported protocol version " + "%02x\n", ( aoehdr->ver_flags & AOE_VERSION_MASK ) ); + rc = -EPROTONOSUPPORT; + goto err_sanity; + } + if ( ! ( aoehdr->ver_flags & AOE_FL_RESPONSE ) ) { + DBG ( "AoE received request packet\n" ); + rc = -EOPNOTSUPP; + goto err_sanity; + } + + /* Demultiplex amongst active AoE commands */ + aoecmd = aoecmd_find_tag ( ntohl ( aoehdr->tag ) ); + if ( ! aoecmd ) { + DBG ( "AoE received packet for unused tag %08x\n", + ntohl ( aoehdr->tag ) ); + rc = -ENOENT; + goto err_demux; + } + + /* Pass received frame to command */ + aoecmd_get ( aoecmd ); + if ( ( rc = aoecmd_rx ( aoecmd, iob_disown ( iobuf ), + ll_source ) ) != 0 ) + goto err_rx; + + err_rx: + aoecmd_put ( aoecmd ); + err_demux: + err_sanity: + free_iob ( iobuf ); + return rc; +} + +/** AoE protocol */ +struct net_protocol aoe_protocol __net_protocol = { + .name = "AoE", + .net_proto = htons ( ETH_P_AOE ), + .rx = aoe_rx, +}; + +/****************************************************************************** + * + * AoE URIs + * + ****************************************************************************** + */ + +/** + * Parse AoE URI + * + * @v uri URI + * @ret major Major device number + * @ret minor Minor device number + * @ret rc Return status code + * + * An AoE URI has the form "aoe:e<major>.<minor>". + */ +static int aoe_parse_uri ( struct uri *uri, unsigned int *major, + unsigned int *minor ) { + const char *ptr; + char *end; + + /* Check for URI with opaque portion */ + if ( ! uri->opaque ) + return -EINVAL; + ptr = uri->opaque; + + /* Check for initial 'e' */ + if ( *ptr != 'e' ) + return -EINVAL; + ptr++; + + /* Parse major device number */ + *major = strtoul ( ptr, &end, 10 ); + if ( *end != '.' ) + return -EINVAL; + ptr = ( end + 1 ); + + /* Parse minor device number */ + *minor = strtoul ( ptr, &end, 10 ); + if ( *end ) + return -EINVAL; + + return 0; +} + +/** + * Open AoE URI + * + * @v parent Parent interface + * @v uri URI + * @ret rc Return status code + */ +static int aoe_open ( struct interface *parent, struct uri *uri ) { + struct net_device *netdev; + unsigned int major; + unsigned int minor; + int rc; + + /* Identify network device. This is something of a hack, but + * the AoE URI scheme that has been in use for some time now + * provides no way to specify a particular device. + */ + netdev = last_opened_netdev(); + if ( ! netdev ) { + DBG ( "AoE cannot identify network device\n" ); + return -ENODEV; + } + + /* Parse URI */ + if ( ( rc = aoe_parse_uri ( uri, &major, &minor ) ) != 0 ) { + DBG ( "AoE cannot parse URI\n" ); + return rc; + } + + /* Open AoE device */ + if ( ( rc = aoedev_open ( parent, netdev, major, minor ) ) != 0 ) + return rc; + + return 0; +} + +/** AoE URI opener */ +struct uri_opener aoe_uri_opener __uri_opener = { + .scheme = "aoe", + .open = aoe_open, +}; diff --git a/qemu/roms/ipxe/src/net/arp.c b/qemu/roms/ipxe/src/net/arp.c new file mode 100644 index 000000000..261e681e1 --- /dev/null +++ b/qemu/roms/ipxe/src/net/arp.c @@ -0,0 +1,220 @@ +/* + * Copyright (C) 2006 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <byteswap.h> +#include <errno.h> +#include <ipxe/if_ether.h> +#include <ipxe/if_arp.h> +#include <ipxe/iobuf.h> +#include <ipxe/netdevice.h> +#include <ipxe/neighbour.h> +#include <ipxe/arp.h> + +/** @file + * + * Address Resolution Protocol + * + * This file implements the address resolution protocol as defined in + * RFC826. The implementation is media-independent and + * protocol-independent; it is not limited to Ethernet or to IPv4. + * + */ + +struct net_protocol arp_protocol __net_protocol; + +/** + * Transmit ARP request + * + * @v netdev Network device + * @v net_protocol Network-layer protocol + * @v net_dest Destination network-layer address + * @v net_source Source network-layer address + * @ret rc Return status code + */ +static int arp_tx_request ( struct net_device *netdev, + struct net_protocol *net_protocol, + const void *net_dest, const void *net_source ) { + struct ll_protocol *ll_protocol = netdev->ll_protocol; + struct io_buffer *iobuf; + struct arphdr *arphdr; + int rc; + + /* Allocate ARP packet */ + iobuf = alloc_iob ( MAX_LL_HEADER_LEN + sizeof ( *arphdr ) + + ( 2 * ( MAX_LL_ADDR_LEN + MAX_NET_ADDR_LEN ) ) ); + if ( ! iobuf ) + return -ENOMEM; + iob_reserve ( iobuf, MAX_LL_HEADER_LEN ); + + /* Build up ARP request */ + arphdr = iob_put ( iobuf, sizeof ( *arphdr ) ); + arphdr->ar_hrd = ll_protocol->ll_proto; + arphdr->ar_hln = ll_protocol->ll_addr_len; + arphdr->ar_pro = net_protocol->net_proto; + arphdr->ar_pln = net_protocol->net_addr_len; + arphdr->ar_op = htons ( ARPOP_REQUEST ); + memcpy ( iob_put ( iobuf, ll_protocol->ll_addr_len ), + netdev->ll_addr, ll_protocol->ll_addr_len ); + memcpy ( iob_put ( iobuf, net_protocol->net_addr_len ), + net_source, net_protocol->net_addr_len ); + memset ( iob_put ( iobuf, ll_protocol->ll_addr_len ), + 0, ll_protocol->ll_addr_len ); + memcpy ( iob_put ( iobuf, net_protocol->net_addr_len ), + net_dest, net_protocol->net_addr_len ); + + /* Transmit ARP request */ + if ( ( rc = net_tx ( iobuf, netdev, &arp_protocol, + netdev->ll_broadcast, netdev->ll_addr ) ) != 0 ) { + DBGC ( netdev, "ARP %s %s %s could not transmit request: %s\n", + netdev->name, net_protocol->name, + net_protocol->ntoa ( net_dest ), strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** ARP neighbour discovery protocol */ +struct neighbour_discovery arp_discovery = { + .name = "ARP", + .tx_request = arp_tx_request, +}; + +/** + * Identify ARP protocol + * + * @v net_proto Network-layer protocol, in network-endian order + * @ret arp_net_protocol ARP protocol, or NULL + * + */ +static struct arp_net_protocol * arp_find_protocol ( uint16_t net_proto ) { + struct arp_net_protocol *arp_net_protocol; + + for_each_table_entry ( arp_net_protocol, ARP_NET_PROTOCOLS ) { + if ( arp_net_protocol->net_protocol->net_proto == net_proto ) + return arp_net_protocol; + } + return NULL; +} + +/** + * Process incoming ARP packets + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v ll_source Link-layer source address + * @v flags Packet flags + * @ret rc Return status code + */ +static int arp_rx ( struct io_buffer *iobuf, struct net_device *netdev, + const void *ll_dest __unused, + const void *ll_source __unused, + unsigned int flags __unused ) { + struct arphdr *arphdr = iobuf->data; + struct arp_net_protocol *arp_net_protocol; + struct net_protocol *net_protocol; + struct ll_protocol *ll_protocol; + int rc; + + /* Identify network-layer and link-layer protocols */ + arp_net_protocol = arp_find_protocol ( arphdr->ar_pro ); + if ( ! arp_net_protocol ) { + rc = -EPROTONOSUPPORT; + goto done; + } + net_protocol = arp_net_protocol->net_protocol; + ll_protocol = netdev->ll_protocol; + + /* Sanity checks */ + if ( ( arphdr->ar_hrd != ll_protocol->ll_proto ) || + ( arphdr->ar_hln != ll_protocol->ll_addr_len ) || + ( arphdr->ar_pln != net_protocol->net_addr_len ) ) { + rc = -EINVAL; + goto done; + } + + /* Update neighbour cache entry for this sender, if any */ + neighbour_update ( netdev, net_protocol, arp_sender_pa ( arphdr ), + arp_sender_ha ( arphdr ) ); + + /* If it's not a request, there's nothing more to do */ + if ( arphdr->ar_op != htons ( ARPOP_REQUEST ) ) { + rc = 0; + goto done; + } + + /* See if we own the target protocol address */ + if ( arp_net_protocol->check ( netdev, arp_target_pa ( arphdr ) ) != 0){ + rc = 0; + goto done; + } + + /* Change request to a reply */ + DBGC2 ( netdev, "ARP %s %s %s reply => %s %s\n", + netdev->name, net_protocol->name, + net_protocol->ntoa ( arp_target_pa ( arphdr ) ), + ll_protocol->name, ll_protocol->ntoa ( netdev->ll_addr ) ); + arphdr->ar_op = htons ( ARPOP_REPLY ); + memswap ( arp_sender_ha ( arphdr ), arp_target_ha ( arphdr ), + arphdr->ar_hln + arphdr->ar_pln ); + memcpy ( arp_sender_ha ( arphdr ), netdev->ll_addr, arphdr->ar_hln ); + + /* Send reply */ + if ( ( rc = net_tx ( iob_disown ( iobuf ), netdev, &arp_protocol, + arp_target_ha ( arphdr ), + netdev->ll_addr ) ) != 0 ) { + DBGC ( netdev, "ARP %s %s %s could not transmit reply: %s\n", + netdev->name, net_protocol->name, + net_protocol->ntoa ( arp_target_pa ( arphdr ) ), + strerror ( rc ) ); + goto done; + } + + /* Success */ + rc = 0; + + done: + free_iob ( iobuf ); + return rc; +} + +/** + * Transcribe ARP address + * + * @v net_addr ARP address + * @ret string "<ARP>" + * + * This operation is meaningless for the ARP protocol. + */ +static const char * arp_ntoa ( const void *net_addr __unused ) { + return "<ARP>"; +} + +/** ARP network protocol */ +struct net_protocol arp_protocol __net_protocol = { + .name = "ARP", + .net_proto = htons ( ETH_P_ARP ), + .rx = arp_rx, + .ntoa = arp_ntoa, +}; diff --git a/qemu/roms/ipxe/src/net/dhcpopts.c b/qemu/roms/ipxe/src/net/dhcpopts.c new file mode 100644 index 000000000..8cd19cf80 --- /dev/null +++ b/qemu/roms/ipxe/src/net/dhcpopts.c @@ -0,0 +1,461 @@ +/* + * Copyright (C) 2008 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <ipxe/dhcp.h> +#include <ipxe/dhcpopts.h> + +/** @file + * + * DHCP options + * + */ + +/** + * Obtain printable version of a DHCP option tag + * + * @v tag DHCP option tag + * @ret name String representation of the tag + * + */ +static inline char * dhcp_tag_name ( unsigned int tag ) { + static char name[8]; + + if ( DHCP_IS_ENCAP_OPT ( tag ) ) { + snprintf ( name, sizeof ( name ), "%d.%d", + DHCP_ENCAPSULATOR ( tag ), + DHCP_ENCAPSULATED ( tag ) ); + } else { + snprintf ( name, sizeof ( name ), "%d", tag ); + } + return name; +} + +/** + * Get pointer to DHCP option + * + * @v options DHCP options block + * @v offset Offset within options block + * @ret option DHCP option + */ +static inline __attribute__ (( always_inline )) struct dhcp_option * +dhcp_option ( struct dhcp_options *options, unsigned int offset ) { + return ( ( struct dhcp_option * ) ( options->data + offset ) ); +} + +/** + * Get offset of a DHCP option + * + * @v options DHCP options block + * @v option DHCP option + * @ret offset Offset within options block + */ +static inline __attribute__ (( always_inline )) int +dhcp_option_offset ( struct dhcp_options *options, + struct dhcp_option *option ) { + return ( ( ( void * ) option ) - options->data ); +} + +/** + * Calculate length of any DHCP option + * + * @v option DHCP option + * @ret len Length (including tag and length field) + */ +static unsigned int dhcp_option_len ( struct dhcp_option *option ) { + if ( ( option->tag == DHCP_END ) || ( option->tag == DHCP_PAD ) ) { + return 1; + } else { + return ( option->len + DHCP_OPTION_HEADER_LEN ); + } +} + +/** + * Find DHCP option within DHCP options block, and its encapsulator (if any) + * + * @v options DHCP options block + * @v tag DHCP option tag to search for + * @ret encap_offset Offset of encapsulating DHCP option + * @ret offset Offset of DHCP option, or negative error + * + * Searches for the DHCP option matching the specified tag within the + * DHCP option block. Encapsulated options may be searched for by + * using DHCP_ENCAP_OPT() to construct the tag value. + * + * If the option is encapsulated, and @c encap_offset is non-NULL, it + * will be filled in with the offset of the encapsulating option. + * + * This routine is designed to be paranoid. It does not assume that + * the option data is well-formatted, and so must guard against flaws + * such as options missing a @c DHCP_END terminator, or options whose + * length would take them beyond the end of the data block. + */ +static int find_dhcp_option_with_encap ( struct dhcp_options *options, + unsigned int tag, + int *encap_offset ) { + unsigned int original_tag __attribute__ (( unused )) = tag; + struct dhcp_option *option; + int offset = 0; + ssize_t remaining = options->used_len; + unsigned int option_len; + + /* Sanity check */ + if ( tag == DHCP_PAD ) + return -ENOENT; + + /* Search for option */ + while ( remaining ) { + /* Calculate length of this option. Abort processing + * if the length is malformed (i.e. takes us beyond + * the end of the data block). + */ + option = dhcp_option ( options, offset ); + option_len = dhcp_option_len ( option ); + remaining -= option_len; + if ( remaining < 0 ) + break; + /* Check for explicit end marker */ + if ( option->tag == DHCP_END ) { + if ( tag == DHCP_END ) + /* Special case where the caller is interested + * in whether we have this marker or not. + */ + return offset; + else + break; + } + /* Check for matching tag */ + if ( option->tag == tag ) { + DBGC ( options, "DHCPOPT %p found %s (length %d)\n", + options, dhcp_tag_name ( original_tag ), + option_len ); + return offset; + } + /* Check for start of matching encapsulation block */ + if ( DHCP_IS_ENCAP_OPT ( tag ) && + ( option->tag == DHCP_ENCAPSULATOR ( tag ) ) ) { + if ( encap_offset ) + *encap_offset = offset; + /* Continue search within encapsulated option block */ + tag = DHCP_ENCAPSULATED ( tag ); + remaining = option_len; + offset += DHCP_OPTION_HEADER_LEN; + continue; + } + offset += option_len; + } + + return -ENOENT; +} + +/** + * Refuse to reallocate DHCP option block + * + * @v options DHCP option block + * @v len New length + * @ret rc Return status code + */ +int dhcpopt_no_realloc ( struct dhcp_options *options, size_t len ) { + return ( ( len <= options->alloc_len ) ? 0 : -ENOSPC ); +} + +/** + * Resize a DHCP option + * + * @v options DHCP option block + * @v offset Offset of option to resize + * @v encap_offset Offset of encapsulating offset (or -ve for none) + * @v old_len Old length (including header) + * @v new_len New length (including header) + * @ret rc Return status code + */ +static int resize_dhcp_option ( struct dhcp_options *options, + int offset, int encap_offset, + size_t old_len, size_t new_len ) { + struct dhcp_option *encapsulator; + struct dhcp_option *option; + ssize_t delta = ( new_len - old_len ); + size_t old_alloc_len; + size_t new_used_len; + size_t new_encapsulator_len; + void *source; + void *dest; + int rc; + + /* Check for sufficient space */ + if ( new_len > DHCP_MAX_LEN ) { + DBGC ( options, "DHCPOPT %p overlength option\n", options ); + return -ENOSPC; + } + new_used_len = ( options->used_len + delta ); + + /* Expand options block, if necessary */ + if ( new_used_len > options->alloc_len ) { + /* Reallocate options block */ + old_alloc_len = options->alloc_len; + if ( ( rc = options->realloc ( options, new_used_len ) ) != 0 ){ + DBGC ( options, "DHCPOPT %p could not reallocate to " + "%zd bytes\n", options, new_used_len ); + return rc; + } + /* Clear newly allocated space */ + memset ( ( options->data + old_alloc_len ), 0, + ( options->alloc_len - old_alloc_len ) ); + } + + /* Update encapsulator, if applicable */ + if ( encap_offset >= 0 ) { + encapsulator = dhcp_option ( options, encap_offset ); + new_encapsulator_len = ( encapsulator->len + delta ); + if ( new_encapsulator_len > DHCP_MAX_LEN ) { + DBGC ( options, "DHCPOPT %p overlength encapsulator\n", + options ); + return -ENOSPC; + } + encapsulator->len = new_encapsulator_len; + } + + /* Update used length */ + options->used_len = new_used_len; + + /* Move remainder of option data */ + option = dhcp_option ( options, offset ); + source = ( ( ( void * ) option ) + old_len ); + dest = ( ( ( void * ) option ) + new_len ); + memmove ( dest, source, ( new_used_len - offset - new_len ) ); + + /* Shrink options block, if applicable */ + if ( new_used_len < options->alloc_len ) { + if ( ( rc = options->realloc ( options, new_used_len ) ) != 0 ){ + DBGC ( options, "DHCPOPT %p could not reallocate to " + "%zd bytes\n", options, new_used_len ); + return rc; + } + } + + return 0; +} + +/** + * Set value of DHCP option + * + * @v options DHCP option block + * @v tag DHCP option tag + * @v data New value for DHCP option + * @v len Length of value, in bytes + * @ret offset Offset of DHCP option, or negative error + * + * Sets the value of a DHCP option within the options block. The + * option may or may not already exist. Encapsulators will be created + * (and deleted) as necessary. + * + * This call may fail due to insufficient space in the options block. + * If it does fail, and the option existed previously, the option will + * be left with its original value. + */ +static int set_dhcp_option ( struct dhcp_options *options, unsigned int tag, + const void *data, size_t len ) { + static const uint8_t empty_encap[] = { DHCP_END }; + int offset; + int encap_offset = -1; + int creation_offset; + struct dhcp_option *option; + unsigned int encap_tag = DHCP_ENCAPSULATOR ( tag ); + size_t old_len = 0; + size_t new_len = ( len ? ( len + DHCP_OPTION_HEADER_LEN ) : 0 ); + int rc; + + /* Sanity check */ + if ( tag == DHCP_PAD ) + return -ENOTTY; + + creation_offset = find_dhcp_option_with_encap ( options, DHCP_END, + NULL ); + if ( creation_offset < 0 ) + creation_offset = options->used_len; + /* Find old instance of this option, if any */ + offset = find_dhcp_option_with_encap ( options, tag, &encap_offset ); + if ( offset >= 0 ) { + old_len = dhcp_option_len ( dhcp_option ( options, offset ) ); + DBGC ( options, "DHCPOPT %p resizing %s from %zd to %zd\n", + options, dhcp_tag_name ( tag ), old_len, new_len ); + } else { + DBGC ( options, "DHCPOPT %p creating %s (length %zd)\n", + options, dhcp_tag_name ( tag ), new_len ); + } + + /* Ensure that encapsulator exists, if required */ + if ( encap_tag ) { + if ( encap_offset < 0 ) { + encap_offset = + set_dhcp_option ( options, encap_tag, + empty_encap, + sizeof ( empty_encap ) ); + } + if ( encap_offset < 0 ) + return encap_offset; + creation_offset = ( encap_offset + DHCP_OPTION_HEADER_LEN ); + } + + /* Create new option if necessary */ + if ( offset < 0 ) + offset = creation_offset; + + /* Resize option to fit new data */ + if ( ( rc = resize_dhcp_option ( options, offset, encap_offset, + old_len, new_len ) ) != 0 ) + return rc; + + /* Copy new data into option, if applicable */ + if ( len ) { + option = dhcp_option ( options, offset ); + option->tag = tag; + option->len = len; + memcpy ( &option->data, data, len ); + } + + /* Delete encapsulator if there's nothing else left in it */ + if ( encap_offset >= 0 ) { + option = dhcp_option ( options, encap_offset ); + if ( option->len <= 1 ) + set_dhcp_option ( options, encap_tag, NULL, 0 ); + } + + return offset; +} + +/** + * Check applicability of DHCP option setting + * + * @v tag Setting tag number + * @ret applies Setting applies to this option block + */ +int dhcpopt_applies ( unsigned int tag ) { + + return ( tag && ( tag <= DHCP_ENCAP_OPT ( DHCP_MAX_OPTION, + DHCP_MAX_OPTION ) ) ); +} + +/** + * Store value of DHCP option setting + * + * @v options DHCP option block + * @v tag Setting tag number + * @v data Setting data, or NULL to clear setting + * @v len Length of setting data + * @ret rc Return status code + */ +int dhcpopt_store ( struct dhcp_options *options, unsigned int tag, + const void *data, size_t len ) { + int offset; + + offset = set_dhcp_option ( options, tag, data, len ); + if ( offset < 0 ) + return offset; + return 0; +} + +/** + * Fetch value of DHCP option setting + * + * @v options DHCP option block + * @v tag Setting tag number + * @v data Buffer to fill with setting data + * @v len Length of buffer + * @ret len Length of setting data, or negative error + */ +int dhcpopt_fetch ( struct dhcp_options *options, unsigned int tag, + void *data, size_t len ) { + int offset; + struct dhcp_option *option; + size_t option_len; + + offset = find_dhcp_option_with_encap ( options, tag, NULL ); + if ( offset < 0 ) + return offset; + + option = dhcp_option ( options, offset ); + option_len = option->len; + if ( len > option_len ) + len = option_len; + memcpy ( data, option->data, len ); + + return option_len; +} + +/** + * Recalculate length of DHCP options block + * + * @v options Uninitialised DHCP option block + * + * The "used length" field will be updated based on scanning through + * the block to find the end of the options. + */ +void dhcpopt_update_used_len ( struct dhcp_options *options ) { + struct dhcp_option *option; + int offset = 0; + ssize_t remaining = options->alloc_len; + unsigned int option_len; + + /* Find last non-pad option */ + options->used_len = 0; + while ( remaining ) { + option = dhcp_option ( options, offset ); + option_len = dhcp_option_len ( option ); + remaining -= option_len; + if ( remaining < 0 ) + break; + offset += option_len; + if ( option->tag != DHCP_PAD ) + options->used_len = offset; + } +} + +/** + * Initialise prepopulated block of DHCP options + * + * @v options Uninitialised DHCP option block + * @v data Memory for DHCP option data + * @v alloc_len Length of memory for DHCP option data + * @v realloc DHCP option block reallocator + * + * The memory content must already be filled with valid DHCP options. + * A zeroed block counts as a block of valid DHCP options. + */ +void dhcpopt_init ( struct dhcp_options *options, void *data, size_t alloc_len, + int ( * realloc ) ( struct dhcp_options *options, + size_t len ) ) { + + /* Fill in fields */ + options->data = data; + options->alloc_len = alloc_len; + options->realloc = realloc; + + /* Update length */ + dhcpopt_update_used_len ( options ); + + DBGC ( options, "DHCPOPT %p created (data %p lengths %#zx,%#zx)\n", + options, options->data, options->used_len, options->alloc_len ); +} diff --git a/qemu/roms/ipxe/src/net/dhcppkt.c b/qemu/roms/ipxe/src/net/dhcppkt.c new file mode 100644 index 000000000..a9a6d3a94 --- /dev/null +++ b/qemu/roms/ipxe/src/net/dhcppkt.c @@ -0,0 +1,305 @@ +/* + * Copyright (C) 2008 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <ipxe/netdevice.h> +#include <ipxe/dhcp.h> +#include <ipxe/dhcpopts.h> +#include <ipxe/dhcppkt.h> + +/** @file + * + * DHCP packets + * + */ + +/**************************************************************************** + * + * DHCP packet raw interface + * + */ + +/** + * Calculate used length of an IPv4 field within a DHCP packet + * + * @v data Field data + * @v len Length of field + * @ret used Used length of field + */ +static size_t used_len_ipv4 ( const void *data, size_t len __unused ) { + const struct in_addr *in = data; + + return ( in->s_addr ? sizeof ( *in ) : 0 ); +} + +/** + * Calculate used length of a string field within a DHCP packet + * + * @v data Field data + * @v len Length of field + * @ret used Used length of field + */ +static size_t used_len_string ( const void *data, size_t len ) { + return strnlen ( data, len ); +} + +/** A dedicated field within a DHCP packet */ +struct dhcp_packet_field { + /** Settings tag number */ + unsigned int tag; + /** Offset within DHCP packet */ + uint16_t offset; + /** Length of field */ + uint16_t len; + /** Calculate used length of field + * + * @v data Field data + * @v len Length of field + * @ret used Used length of field + */ + size_t ( * used_len ) ( const void *data, size_t len ); +}; + +/** Declare a dedicated field within a DHCP packet + * + * @v _tag Settings tag number + * @v _field Field name + * @v _used_len Function to calculate used length of field + */ +#define DHCP_PACKET_FIELD( _tag, _field, _used_len ) { \ + .tag = (_tag), \ + .offset = offsetof ( struct dhcphdr, _field ), \ + .len = sizeof ( ( ( struct dhcphdr * ) 0 )->_field ), \ + .used_len = _used_len, \ + } + +/** Dedicated fields within a DHCP packet */ +static struct dhcp_packet_field dhcp_packet_fields[] = { + DHCP_PACKET_FIELD ( DHCP_EB_YIADDR, yiaddr, used_len_ipv4 ), + DHCP_PACKET_FIELD ( DHCP_EB_SIADDR, siaddr, used_len_ipv4 ), + DHCP_PACKET_FIELD ( DHCP_TFTP_SERVER_NAME, sname, used_len_string ), + DHCP_PACKET_FIELD ( DHCP_BOOTFILE_NAME, file, used_len_string ), +}; + +/** + * Get address of a DHCP packet field + * + * @v dhcphdr DHCP packet header + * @v field DHCP packet field + * @ret data Packet field data + */ +static inline void * dhcp_packet_field ( struct dhcphdr *dhcphdr, + struct dhcp_packet_field *field ) { + return ( ( ( void * ) dhcphdr ) + field->offset ); +} + +/** + * Find DHCP packet field corresponding to settings tag number + * + * @v tag Settings tag number + * @ret field DHCP packet field, or NULL + */ +static struct dhcp_packet_field * +find_dhcp_packet_field ( unsigned int tag ) { + struct dhcp_packet_field *field; + unsigned int i; + + for ( i = 0 ; i < ( sizeof ( dhcp_packet_fields ) / + sizeof ( dhcp_packet_fields[0] ) ) ; i++ ) { + field = &dhcp_packet_fields[i]; + if ( field->tag == tag ) + return field; + } + return NULL; +} + +/** + * Check applicability of DHCP setting + * + * @v dhcppkt DHCP packet + * @v tag Setting tag number + * @ret applies Setting applies within this settings block + */ +static int dhcppkt_applies ( struct dhcp_packet *dhcppkt __unused, + unsigned int tag ) { + + return dhcpopt_applies ( tag ); +} + +/** + * Store value of DHCP packet setting + * + * @v dhcppkt DHCP packet + * @v tag Setting tag number + * @v data Setting data, or NULL to clear setting + * @v len Length of setting data + * @ret rc Return status code + */ +int dhcppkt_store ( struct dhcp_packet *dhcppkt, unsigned int tag, + const void *data, size_t len ) { + struct dhcp_packet_field *field; + void *field_data; + + /* If this is a special field, fill it in */ + if ( ( field = find_dhcp_packet_field ( tag ) ) != NULL ) { + if ( len > field->len ) + return -ENOSPC; + field_data = dhcp_packet_field ( dhcppkt->dhcphdr, field ); + memset ( field_data, 0, field->len ); + memcpy ( dhcp_packet_field ( dhcppkt->dhcphdr, field ), + data, len ); + /* Erase any equivalent option from the options block */ + dhcpopt_store ( &dhcppkt->options, tag, NULL, 0 ); + return 0; + } + + /* Otherwise, use the generic options block */ + return dhcpopt_store ( &dhcppkt->options, tag, data, len ); +} + +/** + * Fetch value of DHCP packet setting + * + * @v dhcppkt DHCP packet + * @v tag Setting tag number + * @v data Buffer to fill with setting data + * @v len Length of buffer + * @ret len Length of setting data, or negative error + */ +int dhcppkt_fetch ( struct dhcp_packet *dhcppkt, unsigned int tag, + void *data, size_t len ) { + struct dhcp_packet_field *field; + void *field_data; + size_t field_len = 0; + + /* Identify special field, if any */ + if ( ( field = find_dhcp_packet_field ( tag ) ) != NULL ) { + field_data = dhcp_packet_field ( dhcppkt->dhcphdr, field ); + field_len = field->used_len ( field_data, field->len ); + } + + /* Return special field, if it exists and is populated */ + if ( field_len ) { + if ( len > field_len ) + len = field_len; + memcpy ( data, field_data, len ); + return field_len; + } + + /* Otherwise, use the generic options block */ + return dhcpopt_fetch ( &dhcppkt->options, tag, data, len ); +} + +/**************************************************************************** + * + * DHCP packet settings interface + * + */ + +/** + * Check applicability of DHCP setting + * + * @v settings Settings block + * @v setting Setting + * @ret applies Setting applies within this settings block + */ +static int dhcppkt_settings_applies ( struct settings *settings, + const struct setting *setting ) { + struct dhcp_packet *dhcppkt = + container_of ( settings, struct dhcp_packet, settings ); + + return ( ( setting->scope == NULL ) && + dhcppkt_applies ( dhcppkt, setting->tag ) ); +} + +/** + * Store value of DHCP setting + * + * @v settings Settings block + * @v setting Setting to store + * @v data Setting data, or NULL to clear setting + * @v len Length of setting data + * @ret rc Return status code + */ +static int dhcppkt_settings_store ( struct settings *settings, + const struct setting *setting, + const void *data, size_t len ) { + struct dhcp_packet *dhcppkt = + container_of ( settings, struct dhcp_packet, settings ); + + return dhcppkt_store ( dhcppkt, setting->tag, data, len ); +} + +/** + * Fetch value of DHCP setting + * + * @v settings Settings block, or NULL to search all blocks + * @v setting Setting to fetch + * @v data Buffer to fill with setting data + * @v len Length of buffer + * @ret len Length of setting data, or negative error + */ +static int dhcppkt_settings_fetch ( struct settings *settings, + struct setting *setting, + void *data, size_t len ) { + struct dhcp_packet *dhcppkt = + container_of ( settings, struct dhcp_packet, settings ); + + return dhcppkt_fetch ( dhcppkt, setting->tag, data, len ); +} + +/** DHCP settings operations */ +static struct settings_operations dhcppkt_settings_operations = { + .applies = dhcppkt_settings_applies, + .store = dhcppkt_settings_store, + .fetch = dhcppkt_settings_fetch, +}; + +/**************************************************************************** + * + * Constructor + * + */ + +/** + * Initialise DHCP packet + * + * @v dhcppkt DHCP packet structure to fill in + * @v data DHCP packet raw data + * @v max_len Length of raw data buffer + * + * Initialise a DHCP packet structure from a data buffer containing a + * DHCP packet. + */ +void dhcppkt_init ( struct dhcp_packet *dhcppkt, struct dhcphdr *data, + size_t len ) { + ref_init ( &dhcppkt->refcnt, NULL ); + dhcppkt->dhcphdr = data; + dhcpopt_init ( &dhcppkt->options, &dhcppkt->dhcphdr->options, + ( len - offsetof ( struct dhcphdr, options ) ), + dhcpopt_no_realloc ); + settings_init ( &dhcppkt->settings, &dhcppkt_settings_operations, + &dhcppkt->refcnt, NULL ); +} diff --git a/qemu/roms/ipxe/src/net/eapol.c b/qemu/roms/ipxe/src/net/eapol.c new file mode 100644 index 000000000..eb0362994 --- /dev/null +++ b/qemu/roms/ipxe/src/net/eapol.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2009 Joshua Oreman <oremanj@rwcr.net>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +/** @file + * + * 802.1X Extensible Authentication Protocol over LANs demultiplexer + * + */ + +#include <ipxe/netdevice.h> +#include <ipxe/iobuf.h> +#include <ipxe/if_ether.h> +#include <ipxe/eapol.h> +#include <errno.h> +#include <byteswap.h> + +/** + * Receive EAPOL network-layer packet + * + * @v iob I/O buffer + * @v netdev Network device + * @v ll_dest Link-layer destination address + * @v ll_source Link-layer source address + * @v flags Packet flags + * + * This function takes ownership of the I/O buffer passed to it. + */ +static int eapol_rx ( struct io_buffer *iob, struct net_device *netdev, + const void *ll_dest, const void *ll_source, + unsigned int flags __unused ) { + struct eapol_frame *eapol = iob->data; + struct eapol_handler *handler; + + if ( iob_len ( iob ) < EAPOL_HDR_LEN ) { + free_iob ( iob ); + return -EINVAL; + } + + for_each_table_entry ( handler, EAPOL_HANDLERS ) { + if ( handler->type == eapol->type ) { + iob_pull ( iob, EAPOL_HDR_LEN ); + return handler->rx ( iob, netdev, ll_dest, ll_source ); + } + } + + free_iob ( iob ); + return -( ENOTSUP | ( ( eapol->type & 0x1f ) << 8 ) ); +} + +/** + * Transcribe EAPOL network-layer address + * + * @v net_addr Network-layer address + * @ret str String representation of network-layer address + * + * EAPOL doesn't have network-layer addresses, so we just return the + * string @c "<EAPOL>". + */ +static const char * eapol_ntoa ( const void *net_addr __unused ) +{ + return "<EAPOL>"; +} + +/** EAPOL network protocol */ +struct net_protocol eapol_protocol __net_protocol = { + .name = "EAPOL", + .rx = eapol_rx, + .ntoa = eapol_ntoa, + .net_proto = htons ( ETH_P_EAPOL ), +}; diff --git a/qemu/roms/ipxe/src/net/eth_slow.c b/qemu/roms/ipxe/src/net/eth_slow.c new file mode 100644 index 000000000..db54b55a4 --- /dev/null +++ b/qemu/roms/ipxe/src/net/eth_slow.c @@ -0,0 +1,274 @@ +/* + * Copyright (C) 2010 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdlib.h> +#include <string.h> +#include <byteswap.h> +#include <errno.h> +#include <ipxe/iobuf.h> +#include <ipxe/netdevice.h> +#include <ipxe/if_ether.h> +#include <ipxe/ethernet.h> +#include <ipxe/eth_slow.h> + +/** @file + * + * Ethernet slow protocols + * + * We implement a very simple passive LACP entity, that pretends that + * each port is the only port on an individual system. We avoid the + * need for timeout logic (and retaining local state about our + * partner) by requesting the same timeout period (1s or 30s) as our + * partner requests, and then simply responding to every packet the + * partner sends us. + */ + +struct net_protocol eth_slow_protocol __net_protocol; + +/** Slow protocols multicast address */ +static const uint8_t eth_slow_address[ETH_ALEN] = + { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02 }; + +/** + * Name LACP TLV type + * + * @v type LACP TLV type + * @ret name Name of LACP TLV type + */ +static inline __attribute__ (( always_inline )) const char * +eth_slow_lacp_tlv_name ( uint8_t type ) { + switch ( type ) { + case ETH_SLOW_TLV_TERMINATOR: return "terminator"; + case ETH_SLOW_TLV_LACP_ACTOR: return "actor"; + case ETH_SLOW_TLV_LACP_PARTNER: return "partner"; + case ETH_SLOW_TLV_LACP_COLLECTOR: return "collector"; + default: return "<invalid>"; + } +} + +/** + * Name marker TLV type + * + * @v type Marker TLV type + * @ret name Name of marker TLV type + */ +static inline __attribute__ (( always_inline )) const char * +eth_slow_marker_tlv_name ( uint8_t type ) { + switch ( type ) { + case ETH_SLOW_TLV_TERMINATOR: return "terminator"; + case ETH_SLOW_TLV_MARKER_REQUEST: return "request"; + case ETH_SLOW_TLV_MARKER_RESPONSE: return "response"; + default: return "<invalid>"; + } +} + +/** + * Name LACP state + * + * @v state LACP state + * @ret name LACP state name + */ +static const char * eth_slow_lacp_state_name ( uint8_t state ) { + static char state_chars[] = "AFGSRTLX"; + unsigned int i; + + for ( i = 0 ; i < 8 ; i++ ) { + state_chars[i] |= 0x20; + if ( state & ( 1 << i ) ) + state_chars[i] &= ~0x20; + } + return state_chars; +} + +/** + * Dump LACP packet + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v label "RX" or "TX" + */ +static void eth_slow_lacp_dump ( struct io_buffer *iobuf, + struct net_device *netdev, + const char *label ) { + union eth_slow_packet *eth_slow = iobuf->data; + struct eth_slow_lacp *lacp = ð_slow->lacp; + + DBGC ( netdev, + "SLOW %s %s LACP actor (%04x,%s,%04x,%02x,%04x) [%s]\n", + netdev->name, label, ntohs ( lacp->actor.system_priority ), + eth_ntoa ( lacp->actor.system ), + ntohs ( lacp->actor.key ), + ntohs ( lacp->actor.port_priority ), + ntohs ( lacp->actor.port ), + eth_slow_lacp_state_name ( lacp->actor.state ) ); + DBGC ( netdev, + "SLOW %s %s LACP partner (%04x,%s,%04x,%02x,%04x) [%s]\n", + netdev->name, label, ntohs ( lacp->partner.system_priority ), + eth_ntoa ( lacp->partner.system ), + ntohs ( lacp->partner.key ), + ntohs ( lacp->partner.port_priority ), + ntohs ( lacp->partner.port ), + eth_slow_lacp_state_name ( lacp->partner.state ) ); + DBGC ( netdev, "SLOW %s %s LACP collector %04x (%d us)\n", + netdev->name, label, ntohs ( lacp->collector.max_delay ), + ( ntohs ( lacp->collector.max_delay ) * 10 ) ); + DBGC2_HDA ( netdev, 0, iobuf->data, iob_len ( iobuf ) ); +} + +/** + * Process incoming LACP packet + * + * @v iobuf I/O buffer + * @v netdev Network device + * @ret rc Return status code + */ +static int eth_slow_lacp_rx ( struct io_buffer *iobuf, + struct net_device *netdev ) { + union eth_slow_packet *eth_slow = iobuf->data; + struct eth_slow_lacp *lacp = ð_slow->lacp; + + eth_slow_lacp_dump ( iobuf, netdev, "RX" ); + + /* Build response */ + memset ( lacp->reserved, 0, sizeof ( lacp->reserved ) ); + memset ( &lacp->terminator, 0, sizeof ( lacp->terminator ) ); + memset ( &lacp->collector, 0, sizeof ( lacp->collector ) ); + lacp->collector.tlv.type = ETH_SLOW_TLV_LACP_COLLECTOR; + lacp->collector.tlv.length = ETH_SLOW_TLV_LACP_COLLECTOR_LEN; + memcpy ( &lacp->partner, &lacp->actor, sizeof ( lacp->partner ) ); + lacp->partner.tlv.type = ETH_SLOW_TLV_LACP_PARTNER; + lacp->partner.tlv.length = ETH_SLOW_TLV_LACP_PARTNER_LEN; + memset ( &lacp->partner.reserved, 0, + sizeof ( lacp->partner.reserved ) ); + memset ( &lacp->actor, 0, sizeof ( lacp->actor ) ); + lacp->actor.tlv.type = ETH_SLOW_TLV_LACP_ACTOR; + lacp->actor.tlv.length = ETH_SLOW_TLV_LACP_ACTOR_LEN; + lacp->actor.system_priority = htons ( LACP_SYSTEM_PRIORITY_MAX ); + memcpy ( lacp->actor.system, netdev->ll_addr, + sizeof ( lacp->actor.system ) ); + lacp->actor.key = htons ( 1 ); + lacp->actor.port_priority = htons ( LACP_PORT_PRIORITY_MAX ); + lacp->actor.port = htons ( 1 ); + lacp->actor.state = ( LACP_STATE_AGGREGATABLE | + LACP_STATE_IN_SYNC | + LACP_STATE_COLLECTING | + LACP_STATE_DISTRIBUTING | + ( lacp->partner.state & LACP_STATE_FAST ) ); + lacp->header.version = ETH_SLOW_LACP_VERSION; + + /* Send response */ + eth_slow_lacp_dump ( iobuf, netdev, "TX" ); + return net_tx ( iobuf, netdev, ð_slow_protocol, eth_slow_address, + netdev->ll_addr ); +} + +/** + * Dump marker packet + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v label "RX" or "TX" + */ +static void eth_slow_marker_dump ( struct io_buffer *iobuf, + struct net_device *netdev, + const char *label ) { + union eth_slow_packet *eth_slow = iobuf->data; + struct eth_slow_marker *marker = ð_slow->marker; + + DBGC ( netdev, "SLOW %s %s marker %s port %04x system %s xact %08x\n", + netdev->name, label, + eth_slow_marker_tlv_name ( marker->marker.tlv.type ), + ntohs ( marker->marker.port ), + eth_ntoa ( marker->marker.system ), + ntohl ( marker->marker.xact ) ); + DBGC2_HDA ( netdev, 0, iobuf->data, iob_len ( iobuf ) ); +} + +/** + * Process incoming marker packet + * + * @v iobuf I/O buffer + * @v netdev Network device + * @ret rc Return status code + */ +static int eth_slow_marker_rx ( struct io_buffer *iobuf, + struct net_device *netdev ) { + union eth_slow_packet *eth_slow = iobuf->data; + struct eth_slow_marker *marker = ð_slow->marker; + + eth_slow_marker_dump ( iobuf, netdev, "RX" ); + + if ( marker->marker.tlv.type == ETH_SLOW_TLV_MARKER_REQUEST ) { + /* Send marker response */ + marker->marker.tlv.type = ETH_SLOW_TLV_MARKER_RESPONSE; + eth_slow_marker_dump ( iobuf, netdev, "TX" ); + return net_tx ( iobuf, netdev, ð_slow_protocol, + eth_slow_address, netdev->ll_addr ); + } else { + /* Discard all other marker packets */ + free_iob ( iobuf ); + return -EINVAL; + } +} + +/** + * Process incoming slow packet + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v ll_dest Link-layer destination address + * @v ll_source Link-layer source address + * @v flags Packet flags + * @ret rc Return status code + */ +static int eth_slow_rx ( struct io_buffer *iobuf, + struct net_device *netdev, + const void *ll_dest __unused, + const void *ll_source __unused, + unsigned int flags __unused ) { + union eth_slow_packet *eth_slow = iobuf->data; + + /* Sanity checks */ + if ( iob_len ( iobuf ) < sizeof ( *eth_slow ) ) { + free_iob ( iobuf ); + return -EINVAL; + } + + /* Handle according to subtype */ + switch ( eth_slow->header.subtype ) { + case ETH_SLOW_SUBTYPE_LACP: + return eth_slow_lacp_rx ( iobuf, netdev ); + case ETH_SLOW_SUBTYPE_MARKER: + return eth_slow_marker_rx ( iobuf, netdev ); + default: + DBGC ( netdev, "SLOW %s RX unknown subtype %02x\n", + netdev->name, eth_slow->header.subtype ); + free_iob ( iobuf ); + return -EINVAL; + } +} + +/** Slow protocol */ +struct net_protocol eth_slow_protocol __net_protocol = { + .name = "Slow", + .net_proto = htons ( ETH_P_SLOW ), + .rx = eth_slow_rx, +}; diff --git a/qemu/roms/ipxe/src/net/ethernet.c b/qemu/roms/ipxe/src/net/ethernet.c new file mode 100644 index 000000000..03978c2a8 --- /dev/null +++ b/qemu/roms/ipxe/src/net/ethernet.c @@ -0,0 +1,239 @@ +/* + * Copyright (C) 2006 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <byteswap.h> +#include <errno.h> +#include <assert.h> +#include <ipxe/if_arp.h> +#include <ipxe/if_ether.h> +#include <ipxe/in.h> +#include <ipxe/netdevice.h> +#include <ipxe/iobuf.h> +#include <ipxe/ethernet.h> + +/** @file + * + * Ethernet protocol + * + */ + +/** Ethernet broadcast MAC address */ +uint8_t eth_broadcast[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + +/** + * Add Ethernet link-layer header + * + * @v netdev Network device + * @v iobuf I/O buffer + * @v ll_dest Link-layer destination address + * @v ll_source Source link-layer address + * @v net_proto Network-layer protocol, in network-byte order + * @ret rc Return status code + */ +int eth_push ( struct net_device *netdev __unused, struct io_buffer *iobuf, + const void *ll_dest, const void *ll_source, + uint16_t net_proto ) { + struct ethhdr *ethhdr = iob_push ( iobuf, sizeof ( *ethhdr ) ); + + /* Build Ethernet header */ + memcpy ( ethhdr->h_dest, ll_dest, ETH_ALEN ); + memcpy ( ethhdr->h_source, ll_source, ETH_ALEN ); + ethhdr->h_protocol = net_proto; + + return 0; +} + +/** + * Remove Ethernet link-layer header + * + * @v netdev Network device + * @v iobuf I/O buffer + * @ret ll_dest Link-layer destination address + * @ret ll_source Source link-layer address + * @ret net_proto Network-layer protocol, in network-byte order + * @ret flags Packet flags + * @ret rc Return status code + */ +int eth_pull ( struct net_device *netdev __unused, struct io_buffer *iobuf, + const void **ll_dest, const void **ll_source, + uint16_t *net_proto, unsigned int *flags ) { + struct ethhdr *ethhdr = iobuf->data; + + /* Sanity check */ + if ( iob_len ( iobuf ) < sizeof ( *ethhdr ) ) { + DBG ( "Ethernet packet too short (%zd bytes)\n", + iob_len ( iobuf ) ); + return -EINVAL; + } + + /* Strip off Ethernet header */ + iob_pull ( iobuf, sizeof ( *ethhdr ) ); + + /* Fill in required fields */ + *ll_dest = ethhdr->h_dest; + *ll_source = ethhdr->h_source; + *net_proto = ethhdr->h_protocol; + *flags = ( ( is_multicast_ether_addr ( ethhdr->h_dest ) ? + LL_MULTICAST : 0 ) | + ( is_broadcast_ether_addr ( ethhdr->h_dest ) ? + LL_BROADCAST : 0 ) ); + + return 0; +} + +/** + * Initialise Ethernet address + * + * @v hw_addr Hardware address + * @v ll_addr Link-layer address + */ +void eth_init_addr ( const void *hw_addr, void *ll_addr ) { + memcpy ( ll_addr, hw_addr, ETH_ALEN ); +} + +/** + * Generate random Ethernet address + * + * @v hw_addr Generated hardware address + */ +void eth_random_addr ( void *hw_addr ) { + uint8_t *addr = hw_addr; + unsigned int i; + + for ( i = 0 ; i < ETH_ALEN ; i++ ) + addr[i] = random(); + addr[0] &= ~0x01; /* Clear multicast bit */ + addr[0] |= 0x02; /* Set locally-assigned bit */ +} + +/** + * Transcribe Ethernet address + * + * @v ll_addr Link-layer address + * @ret string Link-layer address in human-readable format + */ +const char * eth_ntoa ( const void *ll_addr ) { + static char buf[18]; /* "00:00:00:00:00:00" */ + const uint8_t *eth_addr = ll_addr; + + sprintf ( buf, "%02x:%02x:%02x:%02x:%02x:%02x", + eth_addr[0], eth_addr[1], eth_addr[2], + eth_addr[3], eth_addr[4], eth_addr[5] ); + return buf; +} + +/** + * Hash multicast address + * + * @v af Address family + * @v net_addr Network-layer address + * @v ll_addr Link-layer address to fill in + * @ret rc Return status code + */ +int eth_mc_hash ( unsigned int af, const void *net_addr, void *ll_addr ) { + const uint8_t *net_addr_bytes = net_addr; + uint8_t *ll_addr_bytes = ll_addr; + + switch ( af ) { + case AF_INET: + ll_addr_bytes[0] = 0x01; + ll_addr_bytes[1] = 0x00; + ll_addr_bytes[2] = 0x5e; + ll_addr_bytes[3] = net_addr_bytes[1] & 0x7f; + ll_addr_bytes[4] = net_addr_bytes[2]; + ll_addr_bytes[5] = net_addr_bytes[3]; + return 0; + case AF_INET6: + ll_addr_bytes[0] = 0x33; + ll_addr_bytes[1] = 0x33; + memcpy ( &ll_addr_bytes[2], &net_addr_bytes[12], 4 ); + return 0; + default: + return -ENOTSUP; + } +} + +/** + * Generate Ethernet-compatible compressed link-layer address + * + * @v ll_addr Link-layer address + * @v eth_addr Ethernet-compatible address to fill in + */ +int eth_eth_addr ( const void *ll_addr, void *eth_addr ) { + memcpy ( eth_addr, ll_addr, ETH_ALEN ); + return 0; +} + +/** + * Generate EUI-64 address + * + * @v ll_addr Link-layer address + * @v eui64 EUI-64 address to fill in + * @ret rc Return status code + */ +int eth_eui64 ( const void *ll_addr, void *eui64 ) { + + memcpy ( ( eui64 + 0 ), ( ll_addr + 0 ), 3 ); + memcpy ( ( eui64 + 5 ), ( ll_addr + 3 ), 3 ); + *( ( uint16_t * ) ( eui64 + 3 ) ) = htons ( 0xfffe ); + return 0; +} + +/** Ethernet protocol */ +struct ll_protocol ethernet_protocol __ll_protocol = { + .name = "Ethernet", + .ll_proto = htons ( ARPHRD_ETHER ), + .hw_addr_len = ETH_ALEN, + .ll_addr_len = ETH_ALEN, + .ll_header_len = ETH_HLEN, + .push = eth_push, + .pull = eth_pull, + .init_addr = eth_init_addr, + .ntoa = eth_ntoa, + .mc_hash = eth_mc_hash, + .eth_addr = eth_eth_addr, + .eui64 = eth_eui64, +}; + +/** + * Allocate Ethernet device + * + * @v priv_size Size of driver private data + * @ret netdev Network device, or NULL + */ +struct net_device * alloc_etherdev ( size_t priv_size ) { + struct net_device *netdev; + + netdev = alloc_netdev ( priv_size ); + if ( netdev ) { + netdev->ll_protocol = ðernet_protocol; + netdev->ll_broadcast = eth_broadcast; + netdev->max_pkt_len = ETH_FRAME_LEN; + } + return netdev; +} + +/* Drag in Ethernet slow protocols */ +REQUIRE_OBJECT ( eth_slow ); diff --git a/qemu/roms/ipxe/src/net/fakedhcp.c b/qemu/roms/ipxe/src/net/fakedhcp.c new file mode 100644 index 000000000..3dec88b11 --- /dev/null +++ b/qemu/roms/ipxe/src/net/fakedhcp.c @@ -0,0 +1,215 @@ +/* + * Copyright (C) 2008 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <ipxe/settings.h> +#include <ipxe/netdevice.h> +#include <ipxe/dhcppkt.h> +#include <ipxe/fakedhcp.h> + +/** @file + * + * Fake DHCP packets + * + */ + +/** + * Copy settings to DHCP packet + * + * @v dest Destination DHCP packet + * @v source Source settings block + * @v encapsulator Encapsulating setting tag number, or zero + * @ret rc Return status code + */ +static int copy_encap_settings ( struct dhcp_packet *dest, + struct settings *source, + unsigned int encapsulator ) { + struct setting setting = { .name = "" }; + unsigned int subtag; + unsigned int tag; + void *data; + int len; + int rc; + + for ( subtag = DHCP_MIN_OPTION; subtag <= DHCP_MAX_OPTION; subtag++ ) { + tag = DHCP_ENCAP_OPT ( encapsulator, subtag ); + switch ( tag ) { + case DHCP_EB_ENCAP: + case DHCP_VENDOR_ENCAP: + /* Process encapsulated settings */ + if ( ( rc = copy_encap_settings ( dest, source, + tag ) ) != 0 ) + return rc; + break; + default: + /* Copy setting, if present */ + setting.tag = tag; + len = fetch_raw_setting_copy ( source, &setting, &data); + if ( len >= 0 ) { + rc = dhcppkt_store ( dest, tag, data, len ); + free ( data ); + if ( rc != 0 ) + return rc; + } + break; + } + } + + return 0; +} + +/** + * Copy settings to DHCP packet + * + * @v dest Destination DHCP packet + * @v source Source settings block + * @ret rc Return status code + */ +static int copy_settings ( struct dhcp_packet *dest, + struct settings *source ) { + return copy_encap_settings ( dest, source, 0 ); +} + +/** + * Create fake DHCPDISCOVER packet + * + * @v netdev Network device + * @v data Buffer for DHCP packet + * @v max_len Size of DHCP packet buffer + * @ret rc Return status code + * + * Used by external code. + */ +int create_fakedhcpdiscover ( struct net_device *netdev, + void *data, size_t max_len ) { + struct dhcp_packet dhcppkt; + struct in_addr ciaddr = { 0 }; + int rc; + + if ( ( rc = dhcp_create_request ( &dhcppkt, netdev, DHCPDISCOVER, + dhcp_last_xid, ciaddr, data, + max_len ) ) != 0 ) { + DBG ( "Could not create DHCPDISCOVER: %s\n", + strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Create fake DHCPACK packet + * + * @v netdev Network device + * @v data Buffer for DHCP packet + * @v max_len Size of DHCP packet buffer + * @ret rc Return status code + * + * Used by external code. + */ +int create_fakedhcpack ( struct net_device *netdev, + void *data, size_t max_len ) { + struct dhcp_packet dhcppkt; + int rc; + + /* Create base DHCPACK packet */ + if ( ( rc = dhcp_create_packet ( &dhcppkt, netdev, DHCPACK, + dhcp_last_xid, NULL, 0, + data, max_len ) ) != 0 ) { + DBG ( "Could not create DHCPACK: %s\n", strerror ( rc ) ); + return rc; + } + + /* Merge in globally-scoped settings, then netdev-specific + * settings. Do it in this order so that netdev-specific + * settings take precedence regardless of stated priorities. + */ + if ( ( rc = copy_settings ( &dhcppkt, NULL ) ) != 0 ) { + DBG ( "Could not set DHCPACK global settings: %s\n", + strerror ( rc ) ); + return rc; + } + if ( ( rc = copy_settings ( &dhcppkt, + netdev_settings ( netdev ) ) ) != 0 ) { + DBG ( "Could not set DHCPACK netdev settings: %s\n", + strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Create fake PXE Boot Server ACK packet + * + * @v netdev Network device + * @v data Buffer for DHCP packet + * @v max_len Size of DHCP packet buffer + * @ret rc Return status code + * + * Used by external code. + */ +int create_fakepxebsack ( struct net_device *netdev, + void *data, size_t max_len ) { + struct dhcp_packet dhcppkt; + struct settings *proxy_settings; + struct settings *pxebs_settings; + int rc; + + /* Identify available settings */ + proxy_settings = find_settings ( PROXYDHCP_SETTINGS_NAME ); + pxebs_settings = find_settings ( PXEBS_SETTINGS_NAME ); + if ( ( ! proxy_settings ) && ( ! pxebs_settings ) ) { + /* No PXE boot server; return the regular DHCPACK */ + return create_fakedhcpack ( netdev, data, max_len ); + } + + /* Create base DHCPACK packet */ + if ( ( rc = dhcp_create_packet ( &dhcppkt, netdev, DHCPACK, + dhcp_last_xid, NULL, 0, + data, max_len ) ) != 0 ) { + DBG ( "Could not create PXE BS ACK: %s\n", + strerror ( rc ) ); + return rc; + } + + /* Merge in ProxyDHCP options */ + if ( proxy_settings && + ( ( rc = copy_settings ( &dhcppkt, proxy_settings ) ) != 0 ) ) { + DBG ( "Could not copy ProxyDHCP settings: %s\n", + strerror ( rc ) ); + return rc; + } + + /* Merge in BootServerDHCP options, if present */ + if ( pxebs_settings && + ( ( rc = copy_settings ( &dhcppkt, pxebs_settings ) ) != 0 ) ) { + DBG ( "Could not copy PXE BS settings: %s\n", + strerror ( rc ) ); + return rc; + } + + return 0; +} diff --git a/qemu/roms/ipxe/src/net/fc.c b/qemu/roms/ipxe/src/net/fc.c new file mode 100644 index 000000000..58008995c --- /dev/null +++ b/qemu/roms/ipxe/src/net/fc.c @@ -0,0 +1,1937 @@ +/* + * Copyright (C) 2010 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <errno.h> +#include <assert.h> +#include <byteswap.h> +#include <ipxe/refcnt.h> +#include <ipxe/list.h> +#include <ipxe/tables.h> +#include <ipxe/timer.h> +#include <ipxe/retry.h> +#include <ipxe/interface.h> +#include <ipxe/xfer.h> +#include <ipxe/iobuf.h> +#include <ipxe/fc.h> +#include <ipxe/fcels.h> +#include <ipxe/fcns.h> + +/** @file + * + * Fibre Channel + * + */ + +/** List of Fibre Channel ports */ +LIST_HEAD ( fc_ports ); + +/** List of Fibre Channel peers */ +LIST_HEAD ( fc_peers ); + +/****************************************************************************** + * + * Well-known addresses + * + ****************************************************************************** + */ + +/** Unassigned port ID */ +struct fc_port_id fc_empty_port_id = { .bytes = { 0x00, 0x00, 0x00 } }; + +/** F_Port contoller port ID */ +struct fc_port_id fc_f_port_id = { .bytes = { 0xff, 0xff, 0xfe } }; + +/** Generic services port ID */ +struct fc_port_id fc_gs_port_id = { .bytes = { 0xff, 0xff, 0xfc } }; + +/** Point-to-point low port ID */ +struct fc_port_id fc_ptp_low_port_id = { .bytes = { 0x01, 0x01, 0x01 } }; + +/** Point-to-point high port ID */ +struct fc_port_id fc_ptp_high_port_id = { .bytes = { 0x01, 0x01, 0x02 } }; + +/****************************************************************************** + * + * Utility functions + * + ****************************************************************************** + */ + +/** + * Format Fibre Channel port ID + * + * @v id Fibre Channel port ID + * @ret id_text Port ID text + */ +const char * fc_id_ntoa ( const struct fc_port_id *id ) { + static char id_text[ FC_PORT_ID_STRLEN + 1 /* NUL */ ]; + + snprintf ( id_text, sizeof ( id_text ), "%02x.%02x.%02x", + id->bytes[0], id->bytes[1], id->bytes[2] ); + return id_text; +} + +/** + * Parse Fibre Channel port ID + * + * @v id_text Port ID text + * @ret id Fibre Channel port ID + * @ret rc Return status code + */ +int fc_id_aton ( const char *id_text, struct fc_port_id *id ) { + char *ptr = ( ( char * ) id_text ); + unsigned int i = 0; + + while ( 1 ) { + id->bytes[i++] = strtoul ( ptr, &ptr, 16 ); + if ( i == sizeof ( id->bytes ) ) + return ( ( *ptr == '\0' ) ? 0 : -EINVAL ); + if ( *ptr != '.' ) + return -EINVAL; + ptr++; + } +} + +/** + * Format Fibre Channel WWN + * + * @v wwn Fibre Channel WWN + * @ret wwn_text WWN text + */ +const char * fc_ntoa ( const struct fc_name *wwn ) { + static char wwn_text[ FC_NAME_STRLEN + 1 /* NUL */ ]; + + snprintf ( wwn_text, sizeof ( wwn_text ), + "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x", + wwn->bytes[0], wwn->bytes[1], wwn->bytes[2], wwn->bytes[3], + wwn->bytes[4], wwn->bytes[5], wwn->bytes[6], wwn->bytes[7] ); + return wwn_text; +} + +/** + * Parse Fibre Channel WWN + * + * @v wwn_text WWN text + * @ret wwn Fibre Channel WWN + * @ret rc Return status code + */ +int fc_aton ( const char *wwn_text, struct fc_name *wwn ) { + char *ptr = ( ( char * ) wwn_text ); + unsigned int i = 0; + + while ( 1 ) { + wwn->bytes[i++] = strtoul ( ptr, &ptr, 16 ); + if ( i == sizeof ( wwn->bytes ) ) + return ( ( *ptr == '\0' ) ? 0 : -EINVAL ); + if ( *ptr != ':' ) + return -EINVAL; + ptr++; + } +} + +/** + * Fill Fibre Channel socket address + * + * @v sa_fc Fibre Channel socket address to fill in + * @v id Fibre Channel port ID + * @ret sa Socket address + */ +struct sockaddr * fc_fill_sockaddr ( struct sockaddr_fc *sa_fc, + struct fc_port_id *id ) { + union { + struct sockaddr sa; + struct sockaddr_fc fc; + } *u = container_of ( sa_fc, typeof ( *u ), fc ); + + memset ( sa_fc, 0, sizeof ( *sa_fc ) ); + sa_fc->sfc_family = AF_FC; + memcpy ( &sa_fc->sfc_port_id, id, sizeof ( sa_fc->sfc_port_id ) ); + return &u->sa; +} + +/****************************************************************************** + * + * Fibre Channel link state + * + ****************************************************************************** + */ + +/** Default link status code */ +#define EUNKNOWN_LINK_STATUS __einfo_error ( EINFO_EUNKNOWN_LINK_STATUS ) +#define EINFO_EUNKNOWN_LINK_STATUS \ + __einfo_uniqify ( EINFO_EINPROGRESS, 0x01, "Unknown" ) + +/** + * Mark Fibre Channel link as up + * + * @v link Fibre Channel link state monitor + */ +static void fc_link_up ( struct fc_link_state *link ) { + + /* Stop retry timer */ + stop_timer ( &link->timer ); + + /* Record link state */ + link->rc = 0; +} + +/** + * Mark Fibre Channel link as down + * + * @v link Fibre Channel link state monitor + * @v rc Link state + */ +static void fc_link_err ( struct fc_link_state *link, int rc ) { + + /* Record link state */ + if ( rc == 0 ) + rc = -EUNKNOWN_LINK_STATUS; + link->rc = rc; + + /* Schedule another link examination */ + start_timer_fixed ( &link->timer, FC_LINK_RETRY_DELAY ); +} + +/** + * Examine Fibre Channel link state + * + * @v link Fibre Channel link state monitor + */ +static void fc_link_examine ( struct fc_link_state *link ) { + + link->examine ( link ); +} + +/** + * Handle Fibre Channel link retry timer expiry + */ +static void fc_link_expired ( struct retry_timer *timer, int over __unused ) { + struct fc_link_state *link = + container_of ( timer, struct fc_link_state, timer ); + + /* Schedule another link examination */ + start_timer_fixed ( &link->timer, FC_LINK_RETRY_DELAY ); + + /* Examine link */ + fc_link_examine ( link ); +} + +/** + * Initialise Fibre Channel link state monitor + * + * @v link Fibre Channel link state monitor + * @v examine Examine link state method + * @v refcnt Reference counter + */ +static void fc_link_init ( struct fc_link_state *link, + void ( * examine ) ( struct fc_link_state *link ), + struct refcnt *refcnt ) { + + link->rc = -EUNKNOWN_LINK_STATUS; + timer_init ( &link->timer, fc_link_expired, refcnt ); + link->examine = examine; +} + +/** + * Start monitoring Fibre Channel link state + * + * @v link Fibre Channel link state monitor + */ +static void fc_link_start ( struct fc_link_state *link ) { + start_timer_nodelay ( &link->timer ); +} + +/** + * Stop monitoring Fibre Channel link state + * + * @v link Fibre Channel link state monitor + */ +static void fc_link_stop ( struct fc_link_state *link ) { + stop_timer ( &link->timer ); +} + +/****************************************************************************** + * + * Fibre Channel exchanges + * + ****************************************************************************** + */ + +/** A Fibre Channel exchange */ +struct fc_exchange { + /** Reference count */ + struct refcnt refcnt; + /** Fibre Channel port */ + struct fc_port *port; + /** List of active exchanges within this port */ + struct list_head list; + + /** Peer port ID */ + struct fc_port_id peer_port_id; + /** Data structure type */ + unsigned int type; + /** Flags */ + unsigned int flags; + /** Local exchange ID */ + uint16_t xchg_id; + /** Peer exchange ID */ + uint16_t peer_xchg_id; + /** Active sequence ID */ + uint8_t seq_id; + /** Active sequence count */ + uint16_t seq_cnt; + + /** Timeout timer */ + struct retry_timer timer; + + /** Upper-layer protocol interface */ + struct interface ulp; +}; + +/** Fibre Channel exchange flags */ +enum fc_exchange_flags { + /** We are the exchange originator */ + FC_XCHG_ORIGINATOR = 0x0001, + /** We have the sequence initiative */ + FC_XCHG_SEQ_INITIATIVE = 0x0002, + /** This is the first sequence of the exchange */ + FC_XCHG_SEQ_FIRST = 0x0004, +}; + +/** Fibre Channel timeout */ +#define FC_TIMEOUT ( 1 * TICKS_PER_SEC ) + +/** + * Create local Fibre Channel exchange identifier + * + * @ret xchg_id Local exchange ID + */ +static unsigned int fc_new_xchg_id ( void ) { + static uint16_t next_id = 0x0000; + + /* We must avoid using FC_RX_ID_UNKNOWN (0xffff) */ + next_id += 2; + return next_id; +} + +/** + * Create local Fibre Channel sequence identifier + * + * @ret seq_id Local sequence identifier + */ +static unsigned int fc_new_seq_id ( void ) { + static uint8_t seq_id = 0x00; + + return (++seq_id); +} + +/** + * Free Fibre Channel exchange + * + * @v refcnt Reference count + */ +static void fc_xchg_free ( struct refcnt *refcnt ) { + struct fc_exchange *xchg = + container_of ( refcnt, struct fc_exchange, refcnt ); + + assert ( ! timer_running ( &xchg->timer ) ); + assert ( list_empty ( &xchg->list ) ); + + fc_port_put ( xchg->port ); + free ( xchg ); +} + +/** + * Close Fibre Channel exchange + * + * @v xchg Fibre Channel exchange + * @v rc Reason for close + */ +static void fc_xchg_close ( struct fc_exchange *xchg, int rc ) { + struct fc_port *port = xchg->port; + + if ( rc != 0 ) { + DBGC2 ( port, "FCXCHG %s/%04x closed: %s\n", + port->name, xchg->xchg_id, strerror ( rc ) ); + } + + /* Stop timer */ + stop_timer ( &xchg->timer ); + + /* If list still holds a reference, remove from list of open + * exchanges and drop list's reference. + */ + if ( ! list_empty ( &xchg->list ) ) { + list_del ( &xchg->list ); + INIT_LIST_HEAD ( &xchg->list ); + ref_put ( &xchg->refcnt ); + } + + /* Shutdown interfaces */ + intf_shutdown ( &xchg->ulp, rc ); +} + +/** + * Handle exchange timeout + * + * @v timer Timeout timer + * @v over Failure indicator + */ +static void fc_xchg_expired ( struct retry_timer *timer, int over __unused ) { + struct fc_exchange *xchg = + container_of ( timer, struct fc_exchange, timer ); + struct fc_port *port = xchg->port; + + DBGC ( port, "FCXCHG %s/%04x timed out\n", port->name, xchg->xchg_id ); + + /* Terminate the exchange */ + fc_xchg_close ( xchg, -ETIMEDOUT ); +} + +/** + * Check Fibre Channel exchange window + * + * @v xchg Fibre Channel exchange + * @ret len Length opf window + */ +static size_t fc_xchg_window ( struct fc_exchange *xchg __unused ) { + + /* We don't currently store the path MTU */ + return FC_LOGIN_DEFAULT_MTU; +} + +/** + * Allocate Fibre Channel I/O buffer + * + * @v xchg Fibre Channel exchange + * @v len Payload length + * @ret iobuf I/O buffer, or NULL + */ +static struct io_buffer * fc_xchg_alloc_iob ( struct fc_exchange *xchg, + size_t len ) { + struct fc_port *port = xchg->port; + struct io_buffer *iobuf; + + iobuf = xfer_alloc_iob ( &port->transport, + ( sizeof ( struct fc_frame_header ) + len ) ); + if ( iobuf ) { + iob_reserve ( iobuf, sizeof ( struct fc_frame_header ) ); + } + return iobuf; +} + +/** + * Transmit data as part of a Fibre Channel exchange + * + * @v xchg Fibre Channel exchange + * @v iobuf I/O buffer + * @v meta Data transfer metadata + * @ret rc Return status code + */ +static int fc_xchg_tx ( struct fc_exchange *xchg, struct io_buffer *iobuf, + struct xfer_metadata *meta ) { + struct fc_port *port = xchg->port; + struct sockaddr_fc *dest = ( ( struct sockaddr_fc * ) meta->dest ); + struct fc_frame_header *fchdr; + unsigned int r_ctl; + unsigned int f_ctl_es; + int rc; + + /* Sanity checks */ + if ( ! ( xchg->flags & FC_XCHG_SEQ_INITIATIVE ) ) { + DBGC ( port, "FCXCHG %s/%04x cannot transmit while not " + "holding sequence initiative\n", + port->name, xchg->xchg_id ); + rc = -EBUSY; + goto done; + } + + /* Calculate routing control */ + switch ( xchg->type ) { + case FC_TYPE_ELS: + r_ctl = FC_R_CTL_ELS; + if ( meta->flags & XFER_FL_RESPONSE ) { + r_ctl |= FC_R_CTL_SOL_CTRL; + } else { + r_ctl |= FC_R_CTL_UNSOL_CTRL; + } + break; + case FC_TYPE_CT: + r_ctl = FC_R_CTL_DATA; + if ( meta->flags & XFER_FL_RESPONSE ) { + r_ctl |= FC_R_CTL_SOL_CTRL; + } else { + r_ctl |= FC_R_CTL_UNSOL_CTRL; + } + break; + default: + r_ctl = FC_R_CTL_DATA; + switch ( meta->flags & + ( XFER_FL_CMD_STAT | XFER_FL_RESPONSE ) ) { + case ( XFER_FL_CMD_STAT | XFER_FL_RESPONSE ): + r_ctl |= FC_R_CTL_CMD_STAT; + break; + case ( XFER_FL_CMD_STAT ): + r_ctl |= FC_R_CTL_UNSOL_CMD; + break; + case ( XFER_FL_RESPONSE ): + r_ctl |= FC_R_CTL_SOL_DATA; + break; + default: + r_ctl |= FC_R_CTL_UNSOL_DATA; + break; + } + break; + } + + /* Calculate exchange and sequence control */ + f_ctl_es = 0; + if ( ! ( xchg->flags & FC_XCHG_ORIGINATOR ) ) + f_ctl_es |= FC_F_CTL_ES_RESPONDER; + if ( xchg->flags & FC_XCHG_SEQ_FIRST ) + f_ctl_es |= FC_F_CTL_ES_FIRST; + if ( meta->flags & XFER_FL_OUT ) + f_ctl_es |= ( FC_F_CTL_ES_END | FC_F_CTL_ES_LAST ); + if ( meta->flags & XFER_FL_OVER ) + f_ctl_es |= ( FC_F_CTL_ES_END | FC_F_CTL_ES_TRANSFER ); + + /* Create frame header */ + fchdr = iob_push ( iobuf, sizeof ( *fchdr ) ); + memset ( fchdr, 0, sizeof ( *fchdr ) ); + fchdr->r_ctl = r_ctl; + memcpy ( &fchdr->d_id, + ( dest ? &dest->sfc_port_id : &xchg->peer_port_id ), + sizeof ( fchdr->d_id ) ); + memcpy ( &fchdr->s_id, &port->port_id, sizeof ( fchdr->s_id ) ); + fchdr->type = xchg->type; + fchdr->f_ctl_es = f_ctl_es; + fchdr->seq_id = xchg->seq_id; + fchdr->seq_cnt = htons ( xchg->seq_cnt++ ); + fchdr->ox_id = htons ( ( xchg->flags & FC_XCHG_ORIGINATOR ) ? + xchg->xchg_id : xchg->peer_xchg_id ); + fchdr->rx_id = htons ( ( xchg->flags & FC_XCHG_ORIGINATOR ) ? + xchg->peer_xchg_id : xchg->xchg_id ); + if ( meta->flags & XFER_FL_ABS_OFFSET ) { + fchdr->f_ctl_misc |= FC_F_CTL_MISC_REL_OFF; + fchdr->parameter = htonl ( meta->offset ); + } + + /* Relinquish sequence initiative if applicable */ + if ( meta->flags & XFER_FL_OVER ) { + xchg->flags &= ~( FC_XCHG_SEQ_INITIATIVE | FC_XCHG_SEQ_FIRST ); + xchg->seq_cnt = 0; + } + + /* Reset timeout */ + start_timer_fixed ( &xchg->timer, FC_TIMEOUT ); + + /* Deliver frame */ + if ( ( rc = xfer_deliver_iob ( &port->transport, + iob_disown ( iobuf ) ) ) != 0 ) { + DBGC ( port, "FCXCHG %s/%04x cannot transmit: %s\n", + port->name, xchg->xchg_id, strerror ( rc ) ); + goto done; + } + + done: + free_iob ( iobuf ); + return rc; +} + +/** Mapping from Fibre Channel routing control information to xfer metadata */ +static const uint8_t fc_r_ctl_info_meta_flags[ FC_R_CTL_INFO_MASK + 1 ] = { + [FC_R_CTL_UNCAT] = ( 0 ), + [FC_R_CTL_SOL_DATA] = ( XFER_FL_RESPONSE ), + [FC_R_CTL_UNSOL_CTRL] = ( XFER_FL_CMD_STAT ), + [FC_R_CTL_SOL_CTRL] = ( XFER_FL_CMD_STAT ), + [FC_R_CTL_UNSOL_DATA] = ( 0 ), + [FC_R_CTL_DATA_DESC] = ( XFER_FL_CMD_STAT ), + [FC_R_CTL_UNSOL_CMD] = ( XFER_FL_CMD_STAT ), + [FC_R_CTL_CMD_STAT] = ( XFER_FL_CMD_STAT | XFER_FL_RESPONSE ), +}; + +/** + * Receive data as part of a Fibre Channel exchange + * + * @v xchg Fibre Channel exchange + * @v iobuf I/O buffer + * @v meta Data transfer metadata + * @ret rc Return status code + */ +static int fc_xchg_rx ( struct fc_exchange *xchg, struct io_buffer *iobuf, + struct xfer_metadata *meta __unused ) { + struct fc_port *port = xchg->port; + struct fc_frame_header *fchdr = iobuf->data; + struct xfer_metadata fc_meta; + struct sockaddr_fc src; + struct sockaddr_fc dest; + int rc; + + /* Record peer exchange ID */ + xchg->peer_xchg_id = + ntohs ( ( fchdr->f_ctl_es & FC_F_CTL_ES_RESPONDER ) ? + fchdr->rx_id : fchdr->ox_id ); + + /* Sequence checks */ + if ( xchg->flags & FC_XCHG_SEQ_INITIATIVE ) { + DBGC ( port, "FCXCHG %s/%04x received frame while holding " + "sequence initiative\n", port->name, xchg->xchg_id ); + rc = -EBUSY; + goto done; + } + if ( ntohs ( fchdr->seq_cnt ) != xchg->seq_cnt ) { + DBGC ( port, "FCXCHG %s/%04x received out-of-order frame %d " + "(expected %d)\n", port->name, xchg->xchg_id, + ntohs ( fchdr->seq_cnt ), xchg->seq_cnt ); + rc = -EPIPE; + goto done; + } + if ( xchg->seq_cnt == 0 ) + xchg->seq_id = fchdr->seq_id; + xchg->seq_cnt++; + if ( fchdr->seq_id != xchg->seq_id ) { + DBGC ( port, "FCXCHG %s/%04x received frame for incorrect " + "sequence %02x (expected %02x)\n", port->name, + xchg->xchg_id, fchdr->seq_id, xchg->seq_id ); + rc = -EPIPE; + goto done; + } + + /* Check for end of sequence and transfer of sequence initiative */ + if ( fchdr->f_ctl_es & FC_F_CTL_ES_END ) { + xchg->seq_cnt = 0; + if ( fchdr->f_ctl_es & FC_F_CTL_ES_TRANSFER ) { + xchg->flags |= FC_XCHG_SEQ_INITIATIVE; + xchg->seq_id = fc_new_seq_id(); + } + } + + /* Construct metadata */ + memset ( &fc_meta, 0, sizeof ( fc_meta ) ); + fc_meta.flags = + fc_r_ctl_info_meta_flags[ fchdr->r_ctl & FC_R_CTL_INFO_MASK ]; + if ( fchdr->f_ctl_es & FC_F_CTL_ES_TRANSFER ) { + fc_meta.flags |= XFER_FL_OVER; + } + if ( ( fchdr->f_ctl_es & FC_F_CTL_ES_LAST ) && + ( fchdr->f_ctl_es & FC_F_CTL_ES_END ) ) { + fc_meta.flags |= XFER_FL_OUT; + } + if ( fchdr->f_ctl_misc & FC_F_CTL_MISC_REL_OFF ) { + fc_meta.flags |= XFER_FL_ABS_OFFSET; + fc_meta.offset = ntohl ( fchdr->parameter ); + } + fc_meta.src = fc_fill_sockaddr ( &src, &fchdr->s_id ); + fc_meta.dest = fc_fill_sockaddr ( &dest, &fchdr->d_id ); + + /* Reset timeout */ + start_timer_fixed ( &xchg->timer, FC_TIMEOUT ); + + /* Deliver via exchange's ULP interface */ + iob_pull ( iobuf, sizeof ( *fchdr ) ); + if ( ( rc = xfer_deliver ( &xchg->ulp, iob_disown ( iobuf ), + &fc_meta ) ) != 0 ) { + DBGC ( port, "FCXCHG %s/%04x cannot deliver frame: %s\n", + port->name, xchg->xchg_id, strerror ( rc ) ); + goto done; + } + + /* Close exchange if applicable */ + if ( ( fchdr->f_ctl_es & FC_F_CTL_ES_LAST ) && + ( fchdr->f_ctl_es & FC_F_CTL_ES_END ) ) { + fc_xchg_close ( xchg, 0 ); + } + + done: + free_iob ( iobuf ); + return rc; +} + +/** Fibre Channel exchange ULP interface operations */ +static struct interface_operation fc_xchg_ulp_op[] = { + INTF_OP ( xfer_deliver, struct fc_exchange *, fc_xchg_tx ), + INTF_OP ( xfer_alloc_iob, struct fc_exchange *, fc_xchg_alloc_iob ), + INTF_OP ( xfer_window, struct fc_exchange *, fc_xchg_window ), + INTF_OP ( intf_close, struct fc_exchange *, fc_xchg_close ), +}; + +/** Fibre Channel exchange ULP interface descriptor */ +static struct interface_descriptor fc_xchg_ulp_desc = + INTF_DESC ( struct fc_exchange, ulp, fc_xchg_ulp_op ); + +/** + * Create new Fibre Channel exchange + * + * @v port Fibre Channel port + * @v peer_port_id Peer port ID + * @ret xchg Exchange, or NULL + */ +static struct fc_exchange * fc_xchg_create ( struct fc_port *port, + struct fc_port_id *peer_port_id, + unsigned int type ) { + struct fc_exchange *xchg; + + /* Allocate and initialise structure */ + xchg = zalloc ( sizeof ( *xchg ) ); + if ( ! xchg ) + return NULL; + ref_init ( &xchg->refcnt, fc_xchg_free ); + intf_init ( &xchg->ulp, &fc_xchg_ulp_desc, &xchg->refcnt ); + timer_init ( &xchg->timer, fc_xchg_expired, &xchg->refcnt ); + xchg->port = fc_port_get ( port ); + memcpy ( &xchg->peer_port_id, peer_port_id, + sizeof ( xchg->peer_port_id ) ); + xchg->type = type; + xchg->xchg_id = fc_new_xchg_id(); + xchg->peer_xchg_id = FC_RX_ID_UNKNOWN; + xchg->seq_id = fc_new_seq_id(); + + /* Transfer reference to list of exchanges and return */ + list_add ( &xchg->list, &port->xchgs ); + return xchg; +} + +/** + * Originate a new Fibre Channel exchange + * + * @v parent Interface to which to attach + * @v port Fibre Channel port + * @v peer_port_id Peer port ID + * @ret xchg_id Exchange ID, or negative error + */ +int fc_xchg_originate ( struct interface *parent, struct fc_port *port, + struct fc_port_id *peer_port_id, unsigned int type ) { + struct fc_exchange *xchg; + + /* Allocate and initialise structure */ + xchg = fc_xchg_create ( port, peer_port_id, type ); + if ( ! xchg ) + return -ENOMEM; + xchg->flags = ( FC_XCHG_ORIGINATOR | FC_XCHG_SEQ_INITIATIVE | + FC_XCHG_SEQ_FIRST ); + + DBGC2 ( port, "FCXCHG %s/%04x originating to %s (type %02x)\n", + port->name, xchg->xchg_id, fc_id_ntoa ( &xchg->peer_port_id ), + xchg->type ); + + /* Attach to parent interface and return */ + intf_plug_plug ( &xchg->ulp, parent ); + return xchg->xchg_id; +} + +/** + * Open a new responder Fibre Channel exchange + * + * @v port Fibre Channel port + * @v fchdr Fibre Channel frame header + * @ret xchg Fibre Channel exchange, or NULL + */ +static struct fc_exchange * fc_xchg_respond ( struct fc_port *port, + struct fc_frame_header *fchdr ) { + struct fc_exchange *xchg; + struct fc_responder *responder; + unsigned int type = fchdr->type; + int rc; + + /* Allocate and initialise structure */ + xchg = fc_xchg_create ( port, &fchdr->s_id, type ); + if ( ! xchg ) + return NULL; + xchg->seq_id = fchdr->seq_id; + + DBGC2 ( port, "FCXCHG %s/%04x responding to %s xchg %04x (type " + "%02x)\n", port->name, xchg->xchg_id, + fc_id_ntoa ( &xchg->peer_port_id ), + ntohs ( fchdr->ox_id ), xchg->type ); + + /* Find a responder, if any */ + for_each_table_entry ( responder, FC_RESPONDERS ) { + if ( responder->type == type ) { + if ( ( rc = responder->respond ( &xchg->ulp, port, + &fchdr->d_id, + &fchdr->s_id ) ) !=0 ){ + DBGC ( port, "FCXCHG %s/%04x could not " + "respond: %s\n", port->name, + xchg->xchg_id, strerror ( rc ) ); + } + } + break; + } + + /* We may or may not have a ULP attached at this point, but + * the exchange does exist. + */ + return xchg; +} + +/****************************************************************************** + * + * Fibre Channel ports + * + ****************************************************************************** + */ + +/** + * Close Fibre Channel port + * + * @v port Fibre Channel port + * @v rc Reason for close + */ +static void fc_port_close ( struct fc_port *port, int rc ) { + struct fc_exchange *xchg; + struct fc_exchange *tmp; + + DBGC ( port, "FCPORT %s closed\n", port->name ); + + /* Log out port, if necessary */ + if ( fc_link_ok ( &port->link ) ) + fc_port_logout ( port, rc ); + + /* Stop link monitor */ + fc_link_stop ( &port->link ); + + /* Shut down interfaces */ + intf_shutdown ( &port->transport, rc ); + intf_shutdown ( &port->flogi, rc ); + intf_shutdown ( &port->ns_plogi, rc ); + + /* Shut down any remaining exchanges */ + list_for_each_entry_safe ( xchg, tmp, &port->xchgs, list ) + fc_xchg_close ( xchg, rc ); + + /* Remove from list of ports */ + list_del ( &port->list ); + INIT_LIST_HEAD ( &port->list ); +} + +/** + * Identify Fibre Channel exchange by local exchange ID + * + * @v port Fibre Channel port + * @v xchg_id Local exchange ID + * @ret xchg Fibre Channel exchange, or NULL + */ +static struct fc_exchange * fc_port_demux ( struct fc_port *port, + unsigned int xchg_id ) { + struct fc_exchange *xchg; + + list_for_each_entry ( xchg, &port->xchgs, list ) { + if ( xchg->xchg_id == xchg_id ) + return xchg; + } + return NULL; +} + +/** + * Handle received frame from Fibre Channel port + * + * @v port Fibre Channel port + * @v iobuf I/O buffer + * @v meta Data transfer metadata + * @ret rc Return status code + */ +static int fc_port_deliver ( struct fc_port *port, struct io_buffer *iobuf, + struct xfer_metadata *meta ) { + struct fc_frame_header *fchdr = iobuf->data; + unsigned int xchg_id; + struct fc_exchange *xchg; + int rc; + + /* Sanity check */ + if ( iob_len ( iobuf ) < sizeof ( *fchdr ) ) { + DBGC ( port, "FCPORT %s received underlength frame (%zd " + "bytes)\n", port->name, iob_len ( iobuf ) ); + rc = -EINVAL; + goto err_sanity; + } + + /* Verify local port ID */ + if ( ( memcmp ( &fchdr->d_id, &port->port_id, + sizeof ( fchdr->d_id ) ) != 0 ) && + ( memcmp ( &fchdr->d_id, &fc_f_port_id, + sizeof ( fchdr->d_id ) ) != 0 ) && + ( memcmp ( &port->port_id, &fc_empty_port_id, + sizeof ( port->port_id ) ) != 0 ) ) { + DBGC ( port, "FCPORT %s received frame for incorrect port ID " + "%s\n", port->name, fc_id_ntoa ( &fchdr->d_id ) ); + rc = -ENOTCONN; + goto err_port_id; + } + + /* Demultiplex amongst active exchanges */ + xchg_id = ntohs ( ( fchdr->f_ctl_es & FC_F_CTL_ES_RESPONDER ) ? + fchdr->ox_id : fchdr->rx_id ); + xchg = fc_port_demux ( port, xchg_id ); + + /* If we have no active exchange and this frame starts a new + * exchange, try to create a new responder exchange + */ + if ( ( fchdr->f_ctl_es & FC_F_CTL_ES_FIRST ) && + ( fchdr->seq_cnt == 0 ) ) { + + /* Create new exchange */ + xchg = fc_xchg_respond ( port, fchdr ); + if ( ! xchg ) { + DBGC ( port, "FCPORT %s cannot create new exchange\n", + port->name ); + rc = -ENOMEM; + goto err_respond; + } + } + + /* Fail if no exchange exists */ + if ( ! xchg ) { + DBGC ( port, "FCPORT %s xchg %04x unknown\n", + port->name, xchg_id ); + rc = -ENOTCONN; + goto err_no_xchg; + } + + /* Pass received frame to exchange */ + ref_get ( &xchg->refcnt ); + if ( ( rc = fc_xchg_rx ( xchg, iob_disown ( iobuf ), meta ) ) != 0 ) + goto err_xchg_rx; + + err_xchg_rx: + ref_put ( &xchg->refcnt ); + err_no_xchg: + err_respond: + err_port_id: + err_sanity: + free_iob ( iobuf ); + return rc; +} + +/** + * Log in Fibre Channel port + * + * @v port Fibre Channel port + * @v port_id Local port ID + * @v link_node_wwn Link node name + * @v link_port_wwn Link port name + * @v has_fabric Link is to a fabric + * @ret rc Return status code + */ +int fc_port_login ( struct fc_port *port, struct fc_port_id *port_id, + const struct fc_name *link_node_wwn, + const struct fc_name *link_port_wwn, int has_fabric ) { + struct fc_peer *peer; + struct fc_peer *tmp; + int rc; + + /* Perform implicit logout if logged in and details differ */ + if ( fc_link_ok ( &port->link ) && + ( ( ( !! ( port->flags & FC_PORT_HAS_FABRIC ) ) != + ( !! has_fabric ) ) || + ( memcmp ( &port->link_node_wwn, link_node_wwn, + sizeof ( port->link_node_wwn ) ) != 0 ) || + ( memcmp ( &port->link_port_wwn, link_port_wwn, + sizeof ( port->link_port_wwn ) ) != 0 ) || + ( has_fabric && + ( memcmp ( &port->port_id, port_id, + sizeof ( port->port_id ) ) != 0 ) ) ) ) { + fc_port_logout ( port, 0 ); + } + + /* Log in, if applicable */ + if ( ! fc_link_ok ( &port->link ) ) { + + /* Record link port name */ + memcpy ( &port->link_node_wwn, link_node_wwn, + sizeof ( port->link_node_wwn ) ); + memcpy ( &port->link_port_wwn, link_port_wwn, + sizeof ( port->link_port_wwn ) ); + DBGC ( port, "FCPORT %s logged in to %s", + port->name, fc_ntoa ( &port->link_node_wwn ) ); + DBGC ( port, " port %s\n", fc_ntoa ( &port->link_port_wwn ) ); + + /* Calculate local (and possibly remote) port IDs */ + if ( has_fabric ) { + port->flags |= FC_PORT_HAS_FABRIC; + memcpy ( &port->port_id, port_id, + sizeof ( port->port_id ) ); + } else { + port->flags &= ~FC_PORT_HAS_FABRIC; + if ( memcmp ( &port->port_wwn, link_port_wwn, + sizeof ( port->port_wwn ) ) > 0 ) { + memcpy ( &port->port_id, &fc_ptp_high_port_id, + sizeof ( port->port_id ) ); + memcpy ( &port->ptp_link_port_id, + &fc_ptp_low_port_id, + sizeof ( port->ptp_link_port_id ) ); + } else { + memcpy ( &port->port_id, &fc_ptp_low_port_id, + sizeof ( port->port_id ) ); + memcpy ( &port->ptp_link_port_id, + &fc_ptp_high_port_id, + sizeof ( port->ptp_link_port_id ) ); + } + } + DBGC ( port, "FCPORT %s logged in via a %s, with local ID " + "%s\n", port->name, + ( ( port->flags & FC_PORT_HAS_FABRIC ) ? + "fabric" : "point-to-point link" ), + fc_id_ntoa ( &port->port_id ) ); + } + + /* Log in to name server, if attached to a fabric */ + if ( has_fabric && ! ( port->flags & FC_PORT_HAS_NS ) ) { + + DBGC ( port, "FCPORT %s attempting login to name server\n", + port->name ); + + intf_restart ( &port->ns_plogi, -ECANCELED ); + if ( ( rc = fc_els_plogi ( &port->ns_plogi, port, + &fc_gs_port_id ) ) != 0 ) { + DBGC ( port, "FCPORT %s could not initiate name " + "server PLOGI: %s\n", + port->name, strerror ( rc ) ); + fc_port_logout ( port, rc ); + return rc; + } + } + + /* Record login */ + fc_link_up ( &port->link ); + + /* Notify peers of link state change */ + list_for_each_entry_safe ( peer, tmp, &fc_peers, list ) { + fc_peer_get ( peer ); + fc_link_examine ( &peer->link ); + fc_peer_put ( peer ); + } + + return 0; +} + +/** + * Log out Fibre Channel port + * + * @v port Fibre Channel port + * @v rc Reason for logout + */ +void fc_port_logout ( struct fc_port *port, int rc ) { + struct fc_peer *peer; + struct fc_peer *tmp; + + DBGC ( port, "FCPORT %s logged out: %s\n", + port->name, strerror ( rc ) ); + + /* Erase port details */ + memset ( &port->port_id, 0, sizeof ( port->port_id ) ); + port->flags = 0; + + /* Record logout */ + fc_link_err ( &port->link, rc ); + + /* Notify peers of link state change */ + list_for_each_entry_safe ( peer, tmp, &fc_peers, list ) { + fc_peer_get ( peer ); + fc_link_examine ( &peer->link ); + fc_peer_put ( peer ); + } +} + +/** + * Handle FLOGI completion + * + * @v port Fibre Channel port + * @v rc Reason for completion + */ +static void fc_port_flogi_done ( struct fc_port *port, int rc ) { + + intf_restart ( &port->flogi, rc ); + + if ( rc != 0 ) + fc_port_logout ( port, rc ); +} + +/** + * Handle name server PLOGI completion + * + * @v port Fibre Channel port + * @v rc Reason for completion + */ +static void fc_port_ns_plogi_done ( struct fc_port *port, int rc ) { + + intf_restart ( &port->ns_plogi, rc ); + + if ( rc == 0 ) { + port->flags |= FC_PORT_HAS_NS; + DBGC ( port, "FCPORT %s logged in to name server\n", + port->name ); + } else { + DBGC ( port, "FCPORT %s could not log in to name server: %s\n", + port->name, strerror ( rc ) ); + /* Absence of a name server is not a fatal error */ + } +} + +/** + * Examine Fibre Channel port link state + * + * @ link Fibre Channel link state monitor + */ +static void fc_port_examine ( struct fc_link_state *link ) { + struct fc_port *port = container_of ( link, struct fc_port, link ); + int rc; + + /* Do nothing if already logged in */ + if ( fc_link_ok ( &port->link ) ) + return; + + DBGC ( port, "FCPORT %s attempting login\n", port->name ); + + /* Try to create FLOGI ELS */ + intf_restart ( &port->flogi, -ECANCELED ); + if ( ( rc = fc_els_flogi ( &port->flogi, port ) ) != 0 ) { + DBGC ( port, "FCPORT %s could not initiate FLOGI: %s\n", + port->name, strerror ( rc ) ); + fc_port_logout ( port, rc ); + return; + } +} + +/** + * Handle change of flow control window + * + * @v port Fibre Channel port + */ +static void fc_port_window_changed ( struct fc_port *port ) { + size_t window; + + /* Check if transport layer is ready */ + window = xfer_window ( &port->transport ); + if ( window > 0 ) { + + /* Transport layer is ready. Start login if the link + * is not already up. + */ + if ( ! fc_link_ok ( &port->link ) ) + fc_link_start ( &port->link ); + + } else { + + /* Transport layer is not ready. Log out port and + * wait for transport layer before attempting log in + * again. + */ + fc_port_logout ( port, -ENOTCONN ); + fc_link_stop ( &port->link ); + } +} + +/** Fibre Channel port transport interface operations */ +static struct interface_operation fc_port_transport_op[] = { + INTF_OP ( xfer_deliver, struct fc_port *, fc_port_deliver ), + INTF_OP ( xfer_window_changed, struct fc_port *, + fc_port_window_changed ), + INTF_OP ( intf_close, struct fc_port *, fc_port_close ), +}; + +/** Fibre Channel port transport interface descriptor */ +static struct interface_descriptor fc_port_transport_desc = + INTF_DESC ( struct fc_port, transport, fc_port_transport_op ); + +/** Fibre Channel port FLOGI interface operations */ +static struct interface_operation fc_port_flogi_op[] = { + INTF_OP ( intf_close, struct fc_port *, fc_port_flogi_done ), +}; + +/** Fibre Channel port FLOGI interface descriptor */ +static struct interface_descriptor fc_port_flogi_desc = + INTF_DESC ( struct fc_port, flogi, fc_port_flogi_op ); + +/** Fibre Channel port name server PLOGI interface operations */ +static struct interface_operation fc_port_ns_plogi_op[] = { + INTF_OP ( intf_close, struct fc_port *, fc_port_ns_plogi_done ), +}; + +/** Fibre Channel port name server PLOGI interface descriptor */ +static struct interface_descriptor fc_port_ns_plogi_desc = + INTF_DESC ( struct fc_port, ns_plogi, fc_port_ns_plogi_op ); + +/** + * Create Fibre Channel port + * + * @v transport Transport interface + * @v node Fibre Channel node name + * @v port Fibre Channel port name + * @v name Symbolic port name + * @ret rc Return status code + */ +int fc_port_open ( struct interface *transport, const struct fc_name *node_wwn, + const struct fc_name *port_wwn, const char *name ) { + struct fc_port *port; + + /* Allocate and initialise structure */ + port = zalloc ( sizeof ( *port ) ); + if ( ! port ) + return -ENOMEM; + ref_init ( &port->refcnt, NULL ); + intf_init ( &port->transport, &fc_port_transport_desc, &port->refcnt ); + fc_link_init ( &port->link, fc_port_examine, &port->refcnt ); + intf_init ( &port->flogi, &fc_port_flogi_desc, &port->refcnt ); + intf_init ( &port->ns_plogi, &fc_port_ns_plogi_desc, &port->refcnt ); + list_add_tail ( &port->list, &fc_ports ); + INIT_LIST_HEAD ( &port->xchgs ); + memcpy ( &port->node_wwn, node_wwn, sizeof ( port->node_wwn ) ); + memcpy ( &port->port_wwn, port_wwn, sizeof ( port->port_wwn ) ); + snprintf ( port->name, sizeof ( port->name ), "%s", name ); + + DBGC ( port, "FCPORT %s opened as %s", + port->name, fc_ntoa ( &port->node_wwn ) ); + DBGC ( port, " port %s\n", fc_ntoa ( &port->port_wwn ) ); + + /* Attach to transport layer, mortalise self, and return */ + intf_plug_plug ( &port->transport, transport ); + ref_put ( &port->refcnt ); + return 0; +} + +/** + * Find Fibre Channel port by name + * + * @v name Fibre Channel port name + * @ret port Fibre Channel port, or NULL + */ +struct fc_port * fc_port_find ( const char *name ) { + struct fc_port *port; + + list_for_each_entry ( port, &fc_ports, list ) { + if ( strcmp ( name, port->name ) == 0 ) + return port; + } + return NULL; +} + +/****************************************************************************** + * + * Fibre Channel peers + * + ****************************************************************************** + */ + +/** + * Close Fibre Channel peer + * + * @v peer Fibre Channel peer + * @v rc Reason for close + */ +static void fc_peer_close ( struct fc_peer *peer, int rc ) { + + DBGC ( peer, "FCPEER %s closed: %s\n", + fc_ntoa ( &peer->port_wwn ) , strerror ( rc ) ); + + /* Sanity check */ + assert ( list_empty ( &peer->ulps ) ); + + /* Stop link timer */ + fc_link_stop ( &peer->link ); + + /* Shut down interfaces */ + intf_shutdown ( &peer->plogi, rc ); + + /* Remove from list of peers */ + list_del ( &peer->list ); + INIT_LIST_HEAD ( &peer->list ); +} + +/** + * Increment Fibre Channel peer active usage count + * + * @v peer Fibre Channel peer + */ +static void fc_peer_increment ( struct fc_peer *peer ) { + + /* Increment our usage count */ + peer->usage++; +} + +/** + * Decrement Fibre Channel peer active usage count + * + * @v peer Fibre Channel peer + */ +static void fc_peer_decrement ( struct fc_peer *peer ) { + + /* Sanity check */ + assert ( peer->usage > 0 ); + + /* Decrement our usage count and log out if we reach zero */ + if ( --(peer->usage) == 0 ) + fc_peer_logout ( peer, 0 ); +} + +/** + * Log in Fibre Channel peer + * + * @v peer Fibre Channel peer + * @v port Fibre Channel port + * @v port_id Port ID + * @ret rc Return status code + */ +int fc_peer_login ( struct fc_peer *peer, struct fc_port *port, + struct fc_port_id *port_id ) { + struct fc_ulp *ulp; + struct fc_ulp *tmp; + + /* Perform implicit logout if logged in and details differ */ + if ( fc_link_ok ( &peer->link ) && + ( ( peer->port != port ) || + ( memcmp ( &peer->port_id, port_id, + sizeof ( peer->port_id ) ) !=0 ) ) ) { + fc_peer_logout ( peer, 0 ); + } + + /* Log in, if applicable */ + if ( ! fc_link_ok ( &peer->link ) ) { + + /* Record peer details */ + assert ( peer->port == NULL ); + peer->port = fc_port_get ( port ); + memcpy ( &peer->port_id, port_id, sizeof ( peer->port_id ) ); + DBGC ( peer, "FCPEER %s logged in via %s as %s\n", + fc_ntoa ( &peer->port_wwn ), peer->port->name, + fc_id_ntoa ( &peer->port_id ) ); + + /* Add login reference */ + fc_peer_get ( peer ); + } + + /* Record login */ + fc_link_up ( &peer->link ); + + /* Notify ULPs of link state change */ + list_for_each_entry_safe ( ulp, tmp, &peer->ulps, list ) { + fc_ulp_get ( ulp ); + fc_link_examine ( &ulp->link ); + fc_ulp_put ( ulp ); + } + + return 0; +} + +/** + * Log out Fibre Channel peer + * + * @v peer Fibre Channel peer + * @v rc Reason for logout + */ +void fc_peer_logout ( struct fc_peer *peer, int rc ) { + struct fc_ulp *ulp; + struct fc_ulp *tmp; + + DBGC ( peer, "FCPEER %s logged out: %s\n", + fc_ntoa ( &peer->port_wwn ), strerror ( rc ) ); + + /* Drop login reference, if applicable */ + if ( fc_link_ok ( &peer->link ) ) + fc_peer_put ( peer ); + + /* Erase peer details */ + fc_port_put ( peer->port ); + peer->port = NULL; + + /* Record logout */ + fc_link_err ( &peer->link, rc ); + + /* Notify ULPs of link state change */ + list_for_each_entry_safe ( ulp, tmp, &peer->ulps, list ) { + fc_ulp_get ( ulp ); + fc_link_examine ( &ulp->link ); + fc_ulp_put ( ulp ); + } + + /* Close peer if there are no active users */ + if ( peer->usage == 0 ) + fc_peer_close ( peer, rc ); +} + +/** + * Handle PLOGI completion + * + * @v peer Fibre Channel peer + * @v rc Reason for completion + */ +static void fc_peer_plogi_done ( struct fc_peer *peer, int rc ) { + + intf_restart ( &peer->plogi, rc ); + + if ( rc != 0 ) + fc_peer_logout ( peer, rc ); +} + +/** + * Initiate PLOGI + * + * @v peer Fibre Channel peer + * @v port Fibre Channel port + * @v peer_port_id Peer port ID + * @ret rc Return status code + */ +static int fc_peer_plogi ( struct fc_peer *peer, struct fc_port *port, + struct fc_port_id *peer_port_id ) { + int rc; + + /* Try to create PLOGI ELS */ + intf_restart ( &peer->plogi, -ECANCELED ); + if ( ( rc = fc_els_plogi ( &peer->plogi, port, peer_port_id ) ) != 0 ) { + DBGC ( peer, "FCPEER %s could not initiate PLOGI: %s\n", + fc_ntoa ( &peer->port_wwn ), strerror ( rc ) ); + fc_peer_logout ( peer, rc ); + return rc; + } + + return 0; +} + +/** + * Examine Fibre Channel peer link state + * + * @ link Fibre Channel link state monitor + */ +static void fc_peer_examine ( struct fc_link_state *link ) { + struct fc_peer *peer = container_of ( link, struct fc_peer, link ); + struct fc_port *port; + int rc; + + /* Check to see if underlying port link has gone down */ + if ( peer->port && ( ! fc_link_ok ( &peer->port->link ) ) ) { + fc_peer_logout ( peer, -ENOTCONN ); + return; + } + + /* Do nothing if already logged in */ + if ( fc_link_ok ( &peer->link ) ) + return; + + DBGC ( peer, "FCPEER %s attempting login\n", + fc_ntoa ( &peer->port_wwn ) ); + + /* Sanity check */ + assert ( peer->port == NULL ); + + /* First, look for a port with the peer attached via a + * point-to-point link. + */ + list_for_each_entry ( port, &fc_ports, list ) { + if ( fc_link_ok ( &port->link ) && + ( ! ( port->flags & FC_PORT_HAS_FABRIC ) ) && + ( memcmp ( &peer->port_wwn, &port->link_port_wwn, + sizeof ( peer->port_wwn ) ) == 0 ) ) { + /* Use this peer port ID, and stop looking */ + fc_peer_plogi ( peer, port, &port->ptp_link_port_id ); + return; + } + } + + /* If the peer is not directly attached, try initiating a name + * server lookup on any suitable ports. + */ + list_for_each_entry ( port, &fc_ports, list ) { + if ( fc_link_ok ( &port->link ) && + ( port->flags & FC_PORT_HAS_FABRIC ) && + ( port->flags & FC_PORT_HAS_NS ) ) { + if ( ( rc = fc_ns_query ( peer, port, + fc_peer_plogi ) ) != 0 ) { + DBGC ( peer, "FCPEER %s could not attempt " + "name server lookup on %s: %s\n", + fc_ntoa ( &peer->port_wwn ), port->name, + strerror ( rc ) ); + /* Non-fatal */ + } + } + } +} + +/** Fibre Channel peer PLOGI interface operations */ +static struct interface_operation fc_peer_plogi_op[] = { + INTF_OP ( intf_close, struct fc_peer *, fc_peer_plogi_done ), +}; + +/** Fibre Channel peer PLOGI interface descriptor */ +static struct interface_descriptor fc_peer_plogi_desc = + INTF_DESC ( struct fc_peer, plogi, fc_peer_plogi_op ); + +/** + * Create Fibre Channel peer + * + * @v port_wwn Node name + * @ret peer Fibre Channel peer, or NULL + */ +static struct fc_peer * fc_peer_create ( const struct fc_name *port_wwn ) { + struct fc_peer *peer; + + /* Allocate and initialise structure */ + peer = zalloc ( sizeof ( *peer ) ); + if ( ! peer ) + return NULL; + ref_init ( &peer->refcnt, NULL ); + fc_link_init ( &peer->link, fc_peer_examine, &peer->refcnt ); + intf_init ( &peer->plogi, &fc_peer_plogi_desc, &peer->refcnt ); + list_add_tail ( &peer->list, &fc_peers ); + memcpy ( &peer->port_wwn, port_wwn, sizeof ( peer->port_wwn ) ); + INIT_LIST_HEAD ( &peer->ulps ); + + /* Start link monitor */ + fc_link_start ( &peer->link ); + + DBGC ( peer, "FCPEER %s created\n", fc_ntoa ( &peer->port_wwn ) ); + return peer; +} + +/** + * Get Fibre Channel peer by node name + * + * @v port_wwn Node name + * @ret peer Fibre Channel peer, or NULL + */ +struct fc_peer * fc_peer_get_wwn ( const struct fc_name *port_wwn ) { + struct fc_peer *peer; + + /* Look for an existing peer */ + list_for_each_entry ( peer, &fc_peers, list ) { + if ( memcmp ( &peer->port_wwn, port_wwn, + sizeof ( peer->port_wwn ) ) == 0 ) + return fc_peer_get ( peer ); + } + + /* Create a new peer */ + peer = fc_peer_create ( port_wwn ); + if ( ! peer ) + return NULL; + + return peer; +} + +/** + * Get Fibre Channel peer by port ID + * + * @v port Fibre Channel port + * @v peer_port_id Peer port ID + * @ret peer Fibre Channel peer, or NULL + */ +struct fc_peer * fc_peer_get_port_id ( struct fc_port *port, + const struct fc_port_id *peer_port_id ){ + struct fc_peer *peer; + + /* Look for an existing peer */ + list_for_each_entry ( peer, &fc_peers, list ) { + if ( ( peer->port == port ) && + ( memcmp ( &peer->port_id, peer_port_id, + sizeof ( peer->port_id ) ) == 0 ) ) + return fc_peer_get ( peer ); + } + + /* Cannot create a new peer, since we have no port name to use */ + return NULL; +} + +/****************************************************************************** + * + * Fibre Channel upper-layer protocols + * + ****************************************************************************** + */ + +/** + * Free Fibre Channel upper-layer protocol + * + * @v refcnt Reference count + */ +static void fc_ulp_free ( struct refcnt *refcnt ) { + struct fc_ulp *ulp = container_of ( refcnt, struct fc_ulp, refcnt ); + + fc_peer_put ( ulp->peer ); + free ( ulp ); +} + +/** + * Close Fibre Channel upper-layer protocol + * + * @v ulp Fibre Channel upper-layer protocol + * @v rc Reason for close + */ +static void fc_ulp_close ( struct fc_ulp *ulp, int rc ) { + + DBGC ( ulp, "FCULP %s/%02x closed: %s\n", + fc_ntoa ( &ulp->peer->port_wwn ), ulp->type, strerror ( rc ) ); + + /* Sanity check */ + assert ( list_empty ( &ulp->users ) ); + + /* Stop link monitor */ + fc_link_stop ( &ulp->link ); + + /* Shut down interfaces */ + intf_shutdown ( &ulp->prli, rc ); + + /* Remove from list of ULPs */ + list_del ( &ulp->list ); + INIT_LIST_HEAD ( &ulp->list ); +} + +/** + * Attach Fibre Channel upper-layer protocol user + * + * @v ulp Fibre Channel upper-layer protocol + * @v user Fibre Channel upper-layer protocol user + */ +void fc_ulp_attach ( struct fc_ulp *ulp, struct fc_ulp_user *user ) { + + /* Sanity check */ + assert ( user->ulp == NULL ); + + /* Increment peer's usage count */ + fc_peer_increment ( ulp->peer ); + + /* Attach user */ + user->ulp = fc_ulp_get ( ulp ); + list_add ( &user->list, &ulp->users ); +} + +/** + * Detach Fibre Channel upper-layer protocol user + * + * @v user Fibre Channel upper-layer protocol user + */ +void fc_ulp_detach ( struct fc_ulp_user *user ) { + struct fc_ulp *ulp = user->ulp; + + /* Do nothing if not attached */ + if ( ! ulp ) + return; + + /* Sanity checks */ + list_check_contains_entry ( user, &ulp->users, list ); + + /* Detach user and log out if no users remain */ + list_del ( &user->list ); + if ( list_empty ( &ulp->users ) ) + fc_ulp_logout ( ulp, 0 ); + + /* Decrement our peer's usage count */ + fc_peer_decrement ( ulp->peer ); + + /* Drop reference */ + user->ulp = NULL; + fc_ulp_put ( ulp ); +} + +/** + * Log in Fibre Channel upper-layer protocol + * + * @v ulp Fibre Channel upper-layer protocol + * @v param Service parameters + * @v param_len Length of service parameters + * @v originated Login was originated by us + * @ret rc Return status code + */ +int fc_ulp_login ( struct fc_ulp *ulp, const void *param, size_t param_len, + int originated ) { + struct fc_ulp_user *user; + struct fc_ulp_user *tmp; + + /* Perform implicit logout if logged in and service parameters differ */ + if ( fc_link_ok ( &ulp->link ) && + ( ( ulp->param_len != param_len ) || + ( memcmp ( ulp->param, param, ulp->param_len ) != 0 ) ) ) { + fc_ulp_logout ( ulp, 0 ); + } + + /* Work around a bug in some versions of the Linux Fibre + * Channel stack, which fail to fully initialise image pairs + * established via a PRLI originated by the Linux stack + * itself. + */ + if ( originated ) + ulp->flags |= FC_ULP_ORIGINATED_LOGIN_OK; + if ( ! ( ulp->flags & FC_ULP_ORIGINATED_LOGIN_OK ) ) { + DBGC ( ulp, "FCULP %s/%02x sending extra PRLI to work around " + "Linux bug\n", + fc_ntoa ( &ulp->peer->port_wwn ), ulp->type ); + fc_link_stop ( &ulp->link ); + fc_link_start ( &ulp->link ); + return 0; + } + + /* Log in, if applicable */ + if ( ! fc_link_ok ( &ulp->link ) ) { + + /* Record service parameters */ + assert ( ulp->param == NULL ); + assert ( ulp->param_len == 0 ); + ulp->param = malloc ( param_len ); + if ( ! ulp->param ) { + DBGC ( ulp, "FCULP %s/%02x could not record " + "parameters\n", + fc_ntoa ( &ulp->peer->port_wwn ), ulp->type ); + return -ENOMEM; + } + memcpy ( ulp->param, param, param_len ); + ulp->param_len = param_len; + DBGC ( ulp, "FCULP %s/%02x logged in with parameters:\n", + fc_ntoa ( &ulp->peer->port_wwn ), ulp->type ); + DBGC_HDA ( ulp, 0, ulp->param, ulp->param_len ); + + /* Add login reference */ + fc_ulp_get ( ulp ); + } + + /* Record login */ + fc_link_up ( &ulp->link ); + + /* Notify users of link state change */ + list_for_each_entry_safe ( user, tmp, &ulp->users, list ) { + fc_ulp_user_get ( user ); + user->examine ( user ); + fc_ulp_user_put ( user ); + } + + return 0; +} + +/** + * Log out Fibre Channel upper-layer protocol + * + * @v ulp Fibre Channel upper-layer protocol + * @v rc Reason for logout + */ +void fc_ulp_logout ( struct fc_ulp *ulp, int rc ) { + struct fc_ulp_user *user; + struct fc_ulp_user *tmp; + + DBGC ( ulp, "FCULP %s/%02x logged out: %s\n", + fc_ntoa ( &ulp->peer->port_wwn ), ulp->type, strerror ( rc ) ); + + /* Drop login reference, if applicable */ + if ( fc_link_ok ( &ulp->link ) ) + fc_ulp_put ( ulp ); + + /* Discard service parameters */ + free ( ulp->param ); + ulp->param = NULL; + ulp->param_len = 0; + ulp->flags = 0; + + /* Record logout */ + fc_link_err ( &ulp->link, rc ); + + /* Notify users of link state change */ + list_for_each_entry_safe ( user, tmp, &ulp->users, list ) { + fc_ulp_user_get ( user ); + user->examine ( user ); + fc_ulp_user_put ( user ); + } + + /* Close ULP if there are no clients attached */ + if ( list_empty ( &ulp->users ) ) + fc_ulp_close ( ulp, rc ); +} + +/** + * Handle PRLI completion + * + * @v ulp Fibre Channel upper-layer protocol + * @v rc Reason for completion + */ +static void fc_ulp_prli_done ( struct fc_ulp *ulp, int rc ) { + + intf_restart ( &ulp->prli, rc ); + + if ( rc != 0 ) + fc_ulp_logout ( ulp, rc ); +} + +/** + * Examine Fibre Channel upper-layer protocol link state + * + * @ link Fibre Channel link state monitor + */ +static void fc_ulp_examine ( struct fc_link_state *link ) { + struct fc_ulp *ulp = container_of ( link, struct fc_ulp, link ); + int rc; + + /* Check to see if underlying peer link has gone down */ + if ( ! fc_link_ok ( &ulp->peer->link ) ) { + fc_ulp_logout ( ulp, -ENOTCONN ); + return; + } + + /* Do nothing if already logged in */ + if ( fc_link_ok ( &ulp->link ) && + ( ulp->flags & FC_ULP_ORIGINATED_LOGIN_OK ) ) + return; + + DBGC ( ulp, "FCULP %s/%02x attempting login\n", + fc_ntoa ( &ulp->peer->port_wwn ), ulp->type ); + + /* Try to create PRLI ELS */ + intf_restart ( &ulp->prli, -ECANCELED ); + if ( ( rc = fc_els_prli ( &ulp->prli, ulp->peer->port, + &ulp->peer->port_id, ulp->type ) ) != 0 ) { + DBGC ( ulp, "FCULP %s/%02x could not initiate PRLI: %s\n", + fc_ntoa ( &ulp->peer->port_wwn ), ulp->type, + strerror ( rc ) ); + fc_ulp_logout ( ulp, rc ); + return; + } +} + +/** Fibre Channel upper-layer protocol PRLI interface operations */ +static struct interface_operation fc_ulp_prli_op[] = { + INTF_OP ( intf_close, struct fc_ulp *, fc_ulp_prli_done ), +}; + +/** Fibre Channel upper-layer protocol PRLI interface descriptor */ +static struct interface_descriptor fc_ulp_prli_desc = + INTF_DESC ( struct fc_ulp, prli, fc_ulp_prli_op ); + +/** + * Create Fibre Channel upper-layer protocl + * + * @v peer Fibre Channel peer + * @v type Type + * @ret ulp Fibre Channel upper-layer protocol, or NULL + */ +static struct fc_ulp * fc_ulp_create ( struct fc_peer *peer, + unsigned int type ) { + struct fc_ulp *ulp; + + /* Allocate and initialise structure */ + ulp = zalloc ( sizeof ( *ulp ) ); + if ( ! ulp ) + return NULL; + ref_init ( &ulp->refcnt, fc_ulp_free ); + fc_link_init ( &ulp->link, fc_ulp_examine, &ulp->refcnt ); + intf_init ( &ulp->prli, &fc_ulp_prli_desc, &ulp->refcnt ); + ulp->peer = fc_peer_get ( peer ); + list_add_tail ( &ulp->list, &peer->ulps ); + ulp->type = type; + INIT_LIST_HEAD ( &ulp->users ); + + /* Start link state monitor */ + fc_link_start ( &ulp->link ); + + DBGC ( ulp, "FCULP %s/%02x created\n", + fc_ntoa ( &ulp->peer->port_wwn ), ulp->type ); + return ulp; +} + +/** + * Get Fibre Channel upper-layer protocol by peer and type + * + * @v peer Fibre Channel peer + * @v type Type + * @ret ulp Fibre Channel upper-layer protocol, or NULL + */ +static struct fc_ulp * fc_ulp_get_type ( struct fc_peer *peer, + unsigned int type ) { + struct fc_ulp *ulp; + + /* Look for an existing ULP */ + list_for_each_entry ( ulp, &peer->ulps, list ) { + if ( ulp->type == type ) + return fc_ulp_get ( ulp ); + } + + /* Create a new ULP */ + ulp = fc_ulp_create ( peer, type ); + if ( ! ulp ) + return NULL; + + return ulp; +} + +/** + * Get Fibre Channel upper-layer protocol by port name and type + * + * @v port_wwn Port name + * @v type Type + * @ret ulp Fibre Channel upper-layer protocol, or NULL + */ +struct fc_ulp * fc_ulp_get_wwn_type ( const struct fc_name *port_wwn, + unsigned int type ) { + struct fc_ulp *ulp; + struct fc_peer *peer; + + /* Get peer */ + peer = fc_peer_get_wwn ( port_wwn ); + if ( ! peer ) + goto err_peer_get_wwn; + + /* Get ULP */ + ulp = fc_ulp_get_type ( peer, type ); + if ( ! ulp ) + goto err_ulp_get_type; + + /* Drop temporary reference to peer */ + fc_peer_put ( peer ); + + return ulp; + + fc_ulp_put ( ulp ); + err_ulp_get_type: + fc_peer_put ( peer ); + err_peer_get_wwn: + return NULL; +} + +/** + * Get Fibre Channel upper-layer protocol by port ID and type + * + * @v port Fibre Channel port + * @v peer_port_id Peer port ID + * @v type Type + * @ret ulp Fibre Channel upper-layer protocol, or NULL + */ +struct fc_ulp * fc_ulp_get_port_id_type ( struct fc_port *port, + const struct fc_port_id *peer_port_id, + unsigned int type ) { + struct fc_ulp *ulp; + struct fc_peer *peer; + + /* Get peer */ + peer = fc_peer_get_port_id ( port, peer_port_id ); + if ( ! peer ) + goto err_peer_get_wwn; + + /* Get ULP */ + ulp = fc_ulp_get_type ( peer, type ); + if ( ! ulp ) + goto err_ulp_get_type; + + /* Drop temporary reference to peer */ + fc_peer_put ( peer ); + + return ulp; + + fc_ulp_put ( ulp ); + err_ulp_get_type: + fc_peer_put ( peer ); + err_peer_get_wwn: + return NULL; +} diff --git a/qemu/roms/ipxe/src/net/fcels.c b/qemu/roms/ipxe/src/net/fcels.c new file mode 100644 index 000000000..1cfe90727 --- /dev/null +++ b/qemu/roms/ipxe/src/net/fcels.c @@ -0,0 +1,1339 @@ +/* + * Copyright (C) 2010 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <assert.h> +#include <byteswap.h> +#include <ipxe/interface.h> +#include <ipxe/xfer.h> +#include <ipxe/iobuf.h> +#include <ipxe/process.h> +#include <ipxe/fc.h> +#include <ipxe/fcels.h> + +/** @file + * + * Fibre Channel Extended Link Services + * + */ + +/** Fibre Channel ELS transaction debug message format */ +#define FCELS_FMT "FCELS %s %s %s %s" + +/** Fibre Channel ELS transaction debug message arguments */ +#define FCELS_ARGS( els ) \ + (els)->port->name, \ + ( (els)->handler ? (els)->handler->name : "unknown ELS" ), \ + ( fc_els_is_request ( els ) ? "to" : "from" ), \ + fc_id_ntoa ( &(els)->peer_port_id ) + +struct fc_els_handler fc_els_unknown_handler __fc_els_handler; + +/** + * Free Fibre Channel ELS transaction + * + * @v refcnt Reference count + */ +static void fc_els_free ( struct refcnt *refcnt ) { + struct fc_els *els = container_of ( refcnt, struct fc_els, refcnt ); + + assert ( ! process_running ( &els->process ) ); + fc_port_put ( els->port ); + free ( els ); +} + +/** + * Close Fibre Channel ELS transaction + * + * @v els Fibre Channel ELS transaction + * @v rc Reason for close + */ +static void fc_els_close ( struct fc_els *els, int rc ) { + + if ( rc != 0 ) { + DBGC ( els, FCELS_FMT " complete (%s)\n", + FCELS_ARGS ( els ), strerror ( rc ) ); + } + + /* Stop process */ + process_del ( &els->process ); + + /* Shut down interfaces */ + intf_shutdown ( &els->xchg, rc ); + intf_shutdown ( &els->job, rc ); +} + +/** + * Detect Fibre Channel ELS frame handler + * + * @v els Fibre Channel ELS transaction + * @v command ELS command code + * @ret handler ELS handler, or NULL + */ +static struct fc_els_handler * fc_els_detect ( struct fc_els *els, + const void *data, + size_t len ) { + const struct fc_els_frame_common *frame = data; + struct fc_els_handler *handler; + int rc; + + /* Sanity check */ + if ( len < sizeof ( *frame ) ) + return NULL; + + /* Try each handler in turn */ + for_each_table_entry ( handler, FC_ELS_HANDLERS ) { + if ( ( rc = handler->detect ( els, data, len ) ) == 0 ) + return handler; + } + + return NULL; +} + +/** + * Transmit Fibre Channel ELS frame + * + * @v els Fibre Channel ELS transaction + * @v data Data to transmit + * @v len Length of data + * @ret rc Return status code + */ +int fc_els_tx ( struct fc_els *els, const void *data, size_t len ) { + struct xfer_metadata meta; + struct sockaddr_fc dest; + int rc; + + DBGC2 ( els, FCELS_FMT " transmitting:\n", FCELS_ARGS ( els ) ); + DBGC2_HDA ( els, 0, data, len ); + + /* Construct metadata */ + memset ( &meta, 0, sizeof ( meta ) ); + meta.flags = ( fc_els_is_request ( els ) ? + XFER_FL_OVER : ( XFER_FL_RESPONSE | XFER_FL_OUT ) ); + meta.dest = fc_fill_sockaddr ( &dest, &els->peer_port_id ); + + /* Transmit frame */ + if ( ( rc = xfer_deliver_raw_meta ( &els->xchg, data, len, + &meta ) ) != 0 ) { + DBGC ( els, FCELS_FMT " could not deliver frame: %s\n", + FCELS_ARGS ( els ), strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Receive Fibre Channel ELS frame + * + * @v els Fibre Channel ELS transaction + * @v iobuf I/O buffer + * @v meta Data transfer metadata + * @ret rc Return status code + */ +static int fc_els_rx ( struct fc_els *els, + struct io_buffer *iobuf, + struct xfer_metadata *meta ) { + struct fc_els_frame_common *frame = iobuf->data; + struct sockaddr_fc *src = ( ( struct sockaddr_fc * ) meta->src ); + struct sockaddr_fc *dest = ( ( struct sockaddr_fc * ) meta->dest ); + size_t len = iob_len ( iobuf ); + int rc; + + /* Sanity check */ + if ( len < sizeof ( *frame ) ) { + DBGC ( els, FCELS_FMT " received underlength frame:\n", + FCELS_ARGS ( els ) ); + DBGC_HDA ( els, 0, frame, len ); + rc = -EINVAL; + goto done; + } + if ( ! src ) { + DBGC ( els, FCELS_FMT " received frame missing source " + "address:\n", FCELS_ARGS ( els ) ); + rc = -EINVAL; + goto done; + } + if ( ! dest ) { + DBGC ( els, FCELS_FMT " received frame missing destination " + "address:\n", FCELS_ARGS ( els ) ); + rc = -EINVAL; + goto done; + } + + /* Check for rejection responses */ + if ( fc_els_is_request ( els ) && + ( frame->command != FC_ELS_LS_ACC ) ) { + DBGC ( els, FCELS_FMT " rejected:\n", FCELS_ARGS ( els ) ); + DBGC_HDA ( els, 0, frame, len ); + rc = -EACCES; + goto done; + } + + /* Update port IDs */ + memcpy ( &els->port_id, &dest->sfc_port_id, sizeof ( els->port_id ) ); + memcpy ( &els->peer_port_id, &src->sfc_port_id, + sizeof ( els->peer_port_id ) ); + + /* Determine handler, if necessary */ + if ( ! els->handler ) + els->handler = fc_els_detect ( els, frame, len ); + if ( ! els->handler ) + els->handler = &fc_els_unknown_handler; + + DBGC2 ( els, FCELS_FMT " received:\n", FCELS_ARGS ( els ) ); + DBGC2_HDA ( els, 0, frame, len ); + + /* Handle received frame */ + if ( ( rc = els->handler->rx ( els, frame, len ) ) != 0 ) { + DBGC ( els, FCELS_FMT " could not handle received frame: " + "%s\n", FCELS_ARGS ( els ), strerror ( rc ) ); + DBGC_HDA ( els, 0, frame, len ); + goto done; + } + + done: + /* Free I/O buffer */ + free_iob ( iobuf ); + + /* Close transaction */ + fc_els_close ( els, rc ); + + return rc; +} + +/** Fibre Channel ELS exchange interface operations */ +static struct interface_operation fc_els_xchg_op[] = { + INTF_OP ( xfer_deliver, struct fc_els *, fc_els_rx ), + INTF_OP ( intf_close, struct fc_els *, fc_els_close ), +}; + +/** Fibre Channel ELS exchange interface descriptor */ +static struct interface_descriptor fc_els_xchg_desc = + INTF_DESC ( struct fc_els, xchg, fc_els_xchg_op ); + +/** Fibre Channel ELS job control interface operations */ +static struct interface_operation fc_els_job_op[] = { + INTF_OP ( intf_close, struct fc_els *, fc_els_close ), +}; + +/** Fibre Channel ELS job control interface descriptor */ +static struct interface_descriptor fc_els_job_desc = + INTF_DESC ( struct fc_els, job, fc_els_job_op ); + +/** + * Fibre Channel ELS process + * + * @v els Fibre Channel ELS transaction + */ +static void fc_els_step ( struct fc_els *els ) { + int xchg_id; + int rc; + + /* Sanity check */ + assert ( fc_els_is_request ( els ) ); + + /* Create exchange */ + if ( ( xchg_id = fc_xchg_originate ( &els->xchg, els->port, + &els->peer_port_id, + FC_TYPE_ELS ) ) < 0 ) { + rc = xchg_id; + DBGC ( els, FCELS_FMT " could not create exchange: %s\n", + FCELS_ARGS ( els ), strerror ( rc ) ); + fc_els_close ( els, rc ); + return; + } + + /* Transmit request */ + if ( ( rc = els->handler->tx ( els ) ) != 0 ) { + DBGC ( els, FCELS_FMT " could not transmit request: %s\n", + FCELS_ARGS ( els ), strerror ( rc ) ); + fc_els_close ( els, rc ); + return; + } +} + +/** Fibre Channel ELS process descriptor */ +static struct process_descriptor fc_els_process_desc = + PROC_DESC_ONCE ( struct fc_els, process, fc_els_step ); + +/** + * Create ELS transaction + * + * @v port Fibre Channel port + * @v port_id Local port ID + * @v peer_port_id Peer port ID + * @ret els Fibre Channel ELS transaction, or NULL + */ +static struct fc_els * fc_els_create ( struct fc_port *port, + struct fc_port_id *port_id, + struct fc_port_id *peer_port_id ) { + struct fc_els *els; + + /* Allocate and initialise structure */ + els = zalloc ( sizeof ( *els ) ); + if ( ! els ) + return NULL; + ref_init ( &els->refcnt, fc_els_free ); + intf_init ( &els->job, &fc_els_job_desc, &els->refcnt ); + intf_init ( &els->xchg, &fc_els_xchg_desc, &els->refcnt ); + process_init_stopped ( &els->process, &fc_els_process_desc, + &els->refcnt ); + els->port = fc_port_get ( port ); + memcpy ( &els->port_id, port_id, sizeof ( els->port_id ) ); + memcpy ( &els->peer_port_id, peer_port_id, + sizeof ( els->peer_port_id ) ); + return els; +} + +/** + * Create ELS request + * + * @v job Parent job-control interface + * @v port Fibre Channel port + * @v peer_port_id Peer port ID + * @v handler ELS handler + * @ret rc Return status code + */ +int fc_els_request ( struct interface *job, struct fc_port *port, + struct fc_port_id *peer_port_id, + struct fc_els_handler *handler ) { + struct fc_els *els; + + /* Allocate and initialise structure */ + els = fc_els_create ( port, &port->port_id, peer_port_id ); + if ( ! els ) + return -ENOMEM; + els->handler = handler; + els->flags = FC_ELS_REQUEST; + process_add ( &els->process ); + + /* Attach to parent job interface, mortalise self, and return */ + intf_plug_plug ( &els->job, job ); + ref_put ( &els->refcnt ); + return 0; +} + +/** + * Create ELS response + * + * @v xchg Exchange interface + * @v port Fibre Channel port + * @v port_id Local port ID + * @v peer_port_id Peer port ID + * @ret rc Return status code + */ +static int fc_els_respond ( struct interface *xchg, struct fc_port *port, + struct fc_port_id *port_id, + struct fc_port_id *peer_port_id ) { + struct fc_els *els; + + /* Allocate and initialise structure */ + els = fc_els_create ( port, port_id, peer_port_id ); + if ( ! els ) + return -ENOMEM; + + /* Attach to exchange interface, mortalise self, and return */ + intf_plug_plug ( &els->xchg, xchg ); + ref_put ( &els->refcnt ); + return 0; +} + +/** Fibre Channel ELS responder */ +struct fc_responder fc_els_responder __fc_responder = { + .type = FC_TYPE_ELS, + .respond = fc_els_respond, +}; + +/****************************************************************************** + * + * Unknown ELS handler + * + ****************************************************************************** + */ + +/** + * Transmit unknown ELS request + * + * @v els Fibre Channel ELS transaction + * @ret rc Return status code + */ +static int fc_els_unknown_tx ( struct fc_els *els __unused ) { + return -ENOTSUP; +} + +/** + * Transmit unknown ELS response + * + * @v els Fibre Channel ELS transaction + * @ret rc Return status code + */ +static int fc_els_unknown_tx_response ( struct fc_els *els ) { + struct fc_ls_rjt_frame ls_rjt; + + /* Construct LS_RJT */ + memset ( &ls_rjt, 0, sizeof ( ls_rjt ) ); + ls_rjt.command = FC_ELS_LS_RJT; + ls_rjt.reason = FC_ELS_RJT_UNSUPPORTED; + + /* Transmit LS_RJT */ + return fc_els_tx ( els, &ls_rjt, sizeof ( ls_rjt ) ); +} + +/** + * Receive unknown ELS + * + * @v els Fibre Channel ELS transaction + * @v data ELS frame + * @v len Length of ELS frame + * @ret rc Return status code + */ +static int fc_els_unknown_rx ( struct fc_els *els, void *data, size_t len ) { + int rc; + + DBGC ( els, FCELS_FMT ":\n", FCELS_ARGS ( els ) ); + DBGC_HDA ( els, 0, data, len ); + + /* Transmit response, if applicable */ + if ( ! fc_els_is_request ( els ) ) { + if ( ( rc = fc_els_unknown_tx_response ( els ) ) != 0 ) + return rc; + } + + return 0; +} + +/** + * Detect unknown ELS + * + * @v els Fibre Channel ELS transaction + * @v data ELS frame + * @v len Length of ELS frame + * @ret rc Return status code + */ +static int fc_els_unknown_detect ( struct fc_els *els __unused, + const void *data __unused, + size_t len __unused ) { + return -ENOTSUP; +} + +/** Unknown ELS handler */ +struct fc_els_handler fc_els_unknown_handler __fc_els_handler = { + .name = "UNKNOWN", + .tx = fc_els_unknown_tx, + .rx = fc_els_unknown_rx, + .detect = fc_els_unknown_detect, +}; + +/****************************************************************************** + * + * FLOGI + * + ****************************************************************************** + */ + +/** + * Transmit FLOGI + * + * @v els Fibre Channel ELS transaction + * @ret rc Return status code + */ +static int fc_els_flogi_tx ( struct fc_els *els ) { + struct fc_login_frame flogi; + + /* Construct FLOGI */ + memset ( &flogi, 0, sizeof ( flogi ) ); + flogi.command = fc_els_tx_command ( els, FC_ELS_FLOGI ); + flogi.common.version = htons ( FC_LOGIN_VERSION ); + flogi.common.credit = htons ( FC_LOGIN_DEFAULT_B2B ); + flogi.common.flags = htons ( FC_LOGIN_CONTINUOUS_OFFSET ); + flogi.common.mtu = htons ( FC_LOGIN_DEFAULT_MTU ); + memcpy ( &flogi.port_wwn, &els->port->port_wwn, + sizeof ( flogi.port_wwn ) ); + memcpy ( &flogi.node_wwn, &els->port->node_wwn, + sizeof ( flogi.node_wwn ) ); + flogi.class3.flags = htons ( FC_LOGIN_CLASS_VALID | + FC_LOGIN_CLASS_SEQUENTIAL ); + + /* Transmit FLOGI */ + return fc_els_tx ( els, &flogi, sizeof ( flogi ) ); +} + +/** + * Receive FLOGI + * + * @v els Fibre Channel ELS transaction + * @v data ELS frame + * @v len Length of ELS frame + * @ret rc Return status code + */ +static int fc_els_flogi_rx ( struct fc_els *els, void *data, size_t len ) { + struct fc_login_frame *flogi = data; + int has_fabric; + int rc; + + /* Sanity check */ + if ( len < sizeof ( *flogi ) ) { + DBGC ( els, FCELS_FMT " received underlength frame:\n", + FCELS_ARGS ( els ) ); + DBGC_HDA ( els, 0, data, len ); + return -EINVAL; + } + + /* Extract parameters */ + has_fabric = ( flogi->common.flags & htons ( FC_LOGIN_F_PORT ) ); + DBGC ( els, FCELS_FMT " has node %s\n", FCELS_ARGS ( els ), + fc_ntoa ( &flogi->node_wwn ) ); + DBGC ( els, FCELS_FMT " has port %s\n", FCELS_ARGS ( els ), + fc_ntoa ( &flogi->port_wwn ) ); + if ( has_fabric ) { + DBGC ( els, FCELS_FMT " has fabric with", FCELS_ARGS ( els ) ); + DBGC ( els, " local ID %s\n", fc_id_ntoa ( &els->port_id ) ); + } else { + DBGC ( els, FCELS_FMT " has point-to-point link\n", + FCELS_ARGS ( els ) ); + } + + /* Log in port */ + if ( ( rc = fc_port_login ( els->port, &els->port_id, &flogi->node_wwn, + &flogi->port_wwn, has_fabric ) ) != 0 ) { + DBGC ( els, FCELS_FMT " could not log in port: %s\n", + FCELS_ARGS ( els ), strerror ( rc ) ); + return rc; + } + + /* Send any responses to the newly-assigned peer port ID, if + * applicable. + */ + if ( ! has_fabric ) { + memcpy ( &els->peer_port_id, &els->port->ptp_link_port_id, + sizeof ( els->peer_port_id ) ); + } + + /* Transmit response, if applicable */ + if ( ! fc_els_is_request ( els ) ) { + if ( ( rc = fc_els_flogi_tx ( els ) ) != 0 ) + return rc; + } + + return 0; +} + +/** + * Detect FLOGI + * + * @v els Fibre Channel ELS transaction + * @v data ELS frame + * @v len Length of ELS frame + * @ret rc Return status code + */ +static int fc_els_flogi_detect ( struct fc_els *els __unused, const void *data, + size_t len __unused ) { + const struct fc_login_frame *flogi = data; + + /* Check for FLOGI */ + if ( flogi->command != FC_ELS_FLOGI ) + return -EINVAL; + + return 0; +} + +/** FLOGI ELS handler */ +struct fc_els_handler fc_els_flogi_handler __fc_els_handler = { + .name = "FLOGI", + .tx = fc_els_flogi_tx, + .rx = fc_els_flogi_rx, + .detect = fc_els_flogi_detect, +}; + +/** + * Create FLOGI request + * + * @v parent Parent interface + * @v port Fibre Channel port + * @ret rc Return status code + */ +int fc_els_flogi ( struct interface *parent, struct fc_port *port ) { + + return fc_els_request ( parent, port, &fc_f_port_id, + &fc_els_flogi_handler ); +} + +/****************************************************************************** + * + * PLOGI + * + ****************************************************************************** + */ + +/** + * Transmit PLOGI + * + * @v els Fibre Channel ELS transaction + * @ret rc Return status code + */ +static int fc_els_plogi_tx ( struct fc_els *els ) { + struct fc_login_frame plogi; + + /* Construct PLOGI */ + memset ( &plogi, 0, sizeof ( plogi ) ); + plogi.command = fc_els_tx_command ( els, FC_ELS_PLOGI ); + plogi.common.version = htons ( FC_LOGIN_VERSION ); + plogi.common.credit = htons ( FC_LOGIN_DEFAULT_B2B ); + plogi.common.flags = htons ( FC_LOGIN_CONTINUOUS_OFFSET ); + plogi.common.mtu = htons ( FC_LOGIN_DEFAULT_MTU ); + plogi.common.u.plogi.max_seq = htons ( FC_LOGIN_DEFAULT_MAX_SEQ ); + plogi.common.u.plogi.rel_offs = htons ( FC_LOGIN_DEFAULT_REL_OFFS ); + plogi.common.e_d_tov = htonl ( FC_LOGIN_DEFAULT_E_D_TOV ); + memcpy ( &plogi.port_wwn, &els->port->port_wwn, + sizeof ( plogi.port_wwn ) ); + memcpy ( &plogi.node_wwn, &els->port->node_wwn, + sizeof ( plogi.node_wwn ) ); + plogi.class3.flags = htons ( FC_LOGIN_CLASS_VALID | + FC_LOGIN_CLASS_SEQUENTIAL ); + plogi.class3.mtu = htons ( FC_LOGIN_DEFAULT_MTU ); + plogi.class3.max_seq = htons ( FC_LOGIN_DEFAULT_MAX_SEQ ); + plogi.class3.max_seq_per_xchg = 1; + + /* Transmit PLOGI */ + return fc_els_tx ( els, &plogi, sizeof ( plogi ) ); +} + +/** + * Receive PLOGI + * + * @v els Fibre Channel ELS transaction + * @v data ELS frame + * @v len Length of ELS frame + * @ret rc Return status code + */ +static int fc_els_plogi_rx ( struct fc_els *els, void *data, size_t len ) { + struct fc_login_frame *plogi = data; + struct fc_peer *peer; + int rc; + + /* Sanity checks */ + if ( len < sizeof ( *plogi ) ) { + DBGC ( els, FCELS_FMT " received underlength frame:\n", + FCELS_ARGS ( els ) ); + DBGC_HDA ( els, 0, data, len ); + rc = -EINVAL; + goto err_sanity; + } + if ( ! fc_link_ok ( &els->port->link ) ) { + DBGC ( els, FCELS_FMT " received while port link is down\n", + FCELS_ARGS ( els ) ); + rc = -EINVAL; + goto err_sanity; + } + + /* Extract parameters */ + DBGC ( els, FCELS_FMT " has node %s\n", FCELS_ARGS ( els ), + fc_ntoa ( &plogi->node_wwn ) ); + DBGC ( els, FCELS_FMT " has port %s as %s\n", + FCELS_ARGS ( els ), fc_ntoa ( &plogi->port_wwn ), + fc_id_ntoa ( &els->peer_port_id ) ); + + /* Get peer */ + peer = fc_peer_get_wwn ( &plogi->port_wwn ); + if ( ! peer ) { + DBGC ( els, FCELS_FMT " could not create peer\n", + FCELS_ARGS ( els ) ); + rc = -ENOMEM; + goto err_peer_get_wwn; + } + + /* Record login */ + if ( ( rc = fc_peer_login ( peer, els->port, + &els->peer_port_id ) ) != 0 ) { + DBGC ( els, FCELS_FMT " could not log in peer: %s\n", + FCELS_ARGS ( els ), strerror ( rc ) ); + goto err_login; + } + + /* Transmit response, if applicable */ + if ( ! fc_els_is_request ( els ) ) { + if ( ( rc = fc_els_plogi_tx ( els ) ) != 0 ) + goto err_plogi_tx; + } + + /* Drop temporary reference to peer */ + fc_peer_put ( peer ); + + return 0; + + err_plogi_tx: + err_login: + fc_peer_put ( peer ); + err_peer_get_wwn: + err_sanity: + return rc; +} + +/** + * Detect PLOGI + * + * @v els Fibre Channel ELS transaction + * @v data ELS frame + * @v len Length of ELS frame + * @ret rc Return status code + */ +static int fc_els_plogi_detect ( struct fc_els *els __unused, const void *data, + size_t len __unused ) { + const struct fc_login_frame *plogi = data; + + /* Check for PLOGI */ + if ( plogi->command != FC_ELS_PLOGI ) + return -EINVAL; + + return 0; +} + +/** PLOGI ELS handler */ +struct fc_els_handler fc_els_plogi_handler __fc_els_handler = { + .name = "PLOGI", + .tx = fc_els_plogi_tx, + .rx = fc_els_plogi_rx, + .detect = fc_els_plogi_detect, +}; + +/** + * Create PLOGI request + * + * @v parent Parent interface + * @v port Fibre Channel port + * @v peer_port_id Peer port ID + * @ret rc Return status code + */ +int fc_els_plogi ( struct interface *parent, struct fc_port *port, + struct fc_port_id *peer_port_id ) { + + return fc_els_request ( parent, port, peer_port_id, + &fc_els_plogi_handler ); +} + +/****************************************************************************** + * + * LOGO + * + ****************************************************************************** + */ + +/** + * Transmit LOGO request + * + * @v els Fibre Channel ELS transaction + * @ret rc Return status code + */ +static int fc_els_logo_tx ( struct fc_els *els ) { + struct fc_logout_request_frame logo; + + /* Construct LOGO */ + memset ( &logo, 0, sizeof ( logo ) ); + logo.command = FC_ELS_LOGO; + memcpy ( &logo.port_id, &els->port->port_id, sizeof ( logo.port_id ) ); + memcpy ( &logo.port_wwn, &els->port->port_wwn, + sizeof ( logo.port_wwn ) ); + + /* Transmit LOGO */ + return fc_els_tx ( els, &logo, sizeof ( logo ) ); +} + +/** + * Transmit LOGO response + * + * @v els Fibre Channel ELS transaction + * @ret rc Return status code + */ +static int fc_els_logo_tx_response ( struct fc_els *els ) { + struct fc_logout_response_frame logo; + + /* Construct LOGO */ + memset ( &logo, 0, sizeof ( logo ) ); + logo.command = FC_ELS_LS_ACC; + + /* Transmit LOGO */ + return fc_els_tx ( els, &logo, sizeof ( logo ) ); +} + +/** + * Log out individual peer or whole port as applicable + * + * @v els Fibre Channel ELS transaction + * @v port_id Peer port ID + */ +static void fc_els_logo_logout ( struct fc_els *els, + struct fc_port_id *peer_port_id ) { + struct fc_peer *peer; + + if ( ( memcmp ( peer_port_id, &fc_f_port_id, + sizeof ( *peer_port_id ) ) == 0 ) || + ( memcmp ( peer_port_id, &els->port->port_id, + sizeof ( *peer_port_id ) ) == 0 ) ) { + fc_port_logout ( els->port, 0 ); + } else { + peer = fc_peer_get_port_id ( els->port, peer_port_id ); + if ( peer ) { + fc_peer_logout ( peer, 0 ); + fc_peer_put ( peer ); + } + } +} + +/** + * Receive LOGO request + * + * @v els Fibre Channel ELS transaction + * @v data ELS frame + * @v len Length of ELS frame + * @ret rc Return status code + */ +static int fc_els_logo_rx_request ( struct fc_els *els, void *data, + size_t len ) { + struct fc_logout_request_frame *logo = data; + int rc; + + /* Sanity check */ + if ( len < sizeof ( *logo ) ) { + DBGC ( els, FCELS_FMT " received underlength frame:\n", + FCELS_ARGS ( els ) ); + DBGC_HDA ( els, 0, data, len ); + return -EINVAL; + } + + DBGC ( els, FCELS_FMT " has port %s as %s\n", FCELS_ARGS ( els ), + fc_ntoa ( &logo->port_wwn ), fc_id_ntoa ( &logo->port_id ) ); + + /* Log out individual peer or whole port as applicable */ + fc_els_logo_logout ( els, &logo->port_id ); + + /* Transmit repsonse */ + if ( ( rc = fc_els_logo_tx_response ( els ) ) != 0 ) + return rc; + + return 0; +} + +/** + * Receive LOGO response + * + * @v els Fibre Channel ELS transaction + * @v data ELS frame + * @v len Length of ELS frame + * @ret rc Return status code + */ +static int fc_els_logo_rx_response ( struct fc_els *els, void *data __unused, + size_t len __unused ) { + + /* Log out individual peer or whole port as applicable */ + fc_els_logo_logout ( els, &els->peer_port_id ); + + return 0; +} + +/** + * Receive LOGO + * + * @v els Fibre Channel ELS transaction + * @v data ELS frame + * @v len Length of ELS frame + * @ret rc Return status code + */ +static int fc_els_logo_rx ( struct fc_els *els, void *data, size_t len ) { + + if ( fc_els_is_request ( els ) ) { + return fc_els_logo_rx_response ( els, data, len ); + } else { + return fc_els_logo_rx_request ( els, data, len ); + } +} + +/** + * Detect LOGO + * + * @v els Fibre Channel ELS transaction + * @v data ELS frame + * @v len Length of ELS frame + * @ret rc Return status code + */ +static int fc_els_logo_detect ( struct fc_els *els __unused, const void *data, + size_t len __unused ) { + const struct fc_logout_request_frame *logo = data; + + /* Check for LOGO */ + if ( logo->command != FC_ELS_LOGO ) + return -EINVAL; + + return 0; +} + +/** LOGO ELS handler */ +struct fc_els_handler fc_els_logo_handler __fc_els_handler = { + .name = "LOGO", + .tx = fc_els_logo_tx, + .rx = fc_els_logo_rx, + .detect = fc_els_logo_detect, +}; + +/** + * Create LOGO request + * + * @v parent Parent interface + * @v port Fibre Channel port + * @v peer_port_id Peer port ID + * @ret rc Return status code + */ +int fc_els_logo ( struct interface *parent, struct fc_port *port, + struct fc_port_id *peer_port_id ) { + + return fc_els_request ( parent, port, peer_port_id, + &fc_els_logo_handler ); +} + +/****************************************************************************** + * + * PRLI + * + ****************************************************************************** + */ + +/** + * Find PRLI descriptor + * + * @v type Upper-layer protocol type + * @ret descriptor PRLI descriptor, or NULL + */ +static struct fc_els_prli_descriptor * +fc_els_prli_descriptor ( unsigned int type ) { + struct fc_els_prli_descriptor *descriptor; + + for_each_table_entry ( descriptor, FC_ELS_PRLI_DESCRIPTORS ) { + if ( descriptor->type == type ) + return descriptor; + } + return NULL; +} + +/** + * Transmit PRLI + * + * @v els Fibre Channel ELS transaction + * @v descriptor ELS PRLI descriptor + * @v param Service parameters + * @ret rc Return status code + */ +int fc_els_prli_tx ( struct fc_els *els, + struct fc_els_prli_descriptor *descriptor, void *param ) { + struct { + struct fc_prli_frame frame; + uint8_t param[descriptor->param_len]; + } __attribute__ (( packed )) prli; + struct fc_ulp *ulp; + int rc; + + /* Get ULP */ + ulp = fc_ulp_get_port_id_type ( els->port, &els->peer_port_id, + descriptor->type ); + if ( ! ulp ) { + rc = -ENOMEM; + goto err_get_port_id_type; + } + + /* Build frame for transmission */ + memset ( &prli, 0, sizeof ( prli ) ); + prli.frame.command = fc_els_tx_command ( els, FC_ELS_PRLI ); + prli.frame.page_len = + ( sizeof ( prli.frame.page ) + sizeof ( prli.param ) ); + prli.frame.len = htons ( sizeof ( prli ) ); + prli.frame.page.type = descriptor->type; + if ( fc_els_is_request ( els ) ) { + prli.frame.page.flags |= htons ( FC_PRLI_ESTABLISH ); + } else if ( fc_link_ok ( &ulp->link ) ) { + prli.frame.page.flags |= htons ( FC_PRLI_ESTABLISH | + FC_PRLI_RESPONSE_SUCCESS ); + } + memcpy ( &prli.param, param, sizeof ( prli.param ) ); + + /* Transmit frame */ + if ( ( rc = fc_els_tx ( els, &prli, sizeof ( prli ) ) ) != 0 ) + goto err_tx; + + /* Drop temporary reference to ULP */ + fc_ulp_put ( ulp ); + + return 0; + + err_tx: + fc_ulp_put ( ulp ); + err_get_port_id_type: + return rc; +} + +/** + * Receive PRLI + * + * @v els Fibre Channel ELS transaction + * @v descriptor ELS PRLI descriptor + * @v frame ELS frame + * @v len Length of ELS frame + * @ret rc Return status code + */ +int fc_els_prli_rx ( struct fc_els *els, + struct fc_els_prli_descriptor *descriptor, + void *data, size_t len ) { + struct { + struct fc_prli_frame frame; + uint8_t param[descriptor->param_len]; + } __attribute__ (( packed )) *prli = data; + struct fc_ulp *ulp; + int rc; + + /* Sanity check */ + if ( len < sizeof ( *prli ) ) { + DBGC ( els, FCELS_FMT " received underlength frame:\n", + FCELS_ARGS ( els ) ); + DBGC_HDA ( els, 0, data, len ); + rc = -EINVAL; + goto err_sanity; + } + + DBGC ( els, FCELS_FMT " has parameters:\n", FCELS_ARGS ( els ) ); + DBGC_HDA ( els, 0, prli->param, sizeof ( prli->param ) ); + + /* Get ULP */ + ulp = fc_ulp_get_port_id_type ( els->port, &els->peer_port_id, + descriptor->type ); + if ( ! ulp ) { + rc = -ENOMEM; + goto err_get_port_id_type; + } + + /* Sanity check */ + if ( ! fc_link_ok ( &ulp->peer->link ) ) { + DBGC ( els, FCELS_FMT " received while peer link is down\n", + FCELS_ARGS ( els ) ); + rc = -EINVAL; + goto err_link; + } + + /* Log in ULP, if applicable */ + if ( prli->frame.page.flags & htons ( FC_PRLI_ESTABLISH ) ) { + if ( ( rc = fc_ulp_login ( ulp, prli->param, + sizeof ( prli->param ), + fc_els_is_request ( els ) ) ) != 0 ){ + DBGC ( els, FCELS_FMT " could not log in ULP: %s\n", + FCELS_ARGS ( els ), strerror ( rc ) ); + goto err_login; + } + } else { + if ( fc_els_is_request ( els ) ) { + fc_ulp_logout ( ulp, -EACCES ); + } else { + /* This is just an information-gathering PRLI; do not + * log in or out + */ + } + } + + /* Transmit response, if applicable */ + if ( ! fc_els_is_request ( els ) ) { + if ( ( rc = els->handler->tx ( els ) ) != 0 ) + goto err_tx; + } + + /* Drop temporary reference to ULP */ + fc_ulp_put ( ulp ); + + return 0; + + err_tx: + err_login: + err_link: + fc_ulp_put ( ulp ); + err_get_port_id_type: + err_sanity: + return rc; +} + +/** + * Detect PRLI + * + * @v els Fibre Channel ELS transaction + * @v descriptor ELS PRLI descriptor + * @v data ELS frame + * @v len Length of ELS frame + * @ret rc Return status code + */ +int fc_els_prli_detect ( struct fc_els *els __unused, + struct fc_els_prli_descriptor *descriptor, + const void *data, size_t len ) { + const struct { + struct fc_prli_frame frame; + uint8_t param[descriptor->param_len]; + } __attribute__ (( packed )) *prli = data; + + /* Check for PRLI */ + if ( prli->frame.command != FC_ELS_PRLI ) + return -EINVAL; + + /* Check for sufficient length to contain service parameter page */ + if ( len < sizeof ( *prli ) ) + return -EINVAL; + + /* Check for upper-layer protocol type */ + if ( prli->frame.page.type != descriptor->type ) + return -EINVAL; + + return 0; +} + +/** + * Create PRLI request + * + * @v parent Parent interface + * @v port Fibre Channel port + * @v peer_port_id Peer port ID + * @v type Upper-layer protocol type + * @ret rc Return status code + */ +int fc_els_prli ( struct interface *parent, struct fc_port *port, + struct fc_port_id *peer_port_id, unsigned int type ) { + struct fc_els_prli_descriptor *descriptor; + + /* Find a PRLI descriptor */ + descriptor = fc_els_prli_descriptor ( type ); + if ( ! descriptor ) + return -ENOTSUP; + + return fc_els_request ( parent, port, peer_port_id, + descriptor->handler ); +} + +/****************************************************************************** + * + * RTV + * + ****************************************************************************** + */ + +/** + * Transmit RTV response + * + * @v els Fibre Channel ELS transaction + * @ret rc Return status code + */ +static int fc_els_rtv_tx_response ( struct fc_els *els ) { + struct fc_rtv_response_frame rtv; + + /* Construct RTV */ + memset ( &rtv, 0, sizeof ( rtv ) ); + rtv.command = FC_ELS_LS_ACC; + rtv.e_d_tov = htonl ( FC_LOGIN_DEFAULT_E_D_TOV ); + + /* Transmit RTV */ + return fc_els_tx ( els, &rtv, sizeof ( rtv ) ); +} + +/** + * Receive RTV + * + * @v els Fibre Channel ELS transaction + * @v data ELS frame + * @v len Length of ELS frame + * @ret rc Return status code + */ +static int fc_els_rtv_rx ( struct fc_els *els, void *data __unused, + size_t len __unused ) { + int rc; + + DBGC ( els, FCELS_FMT "\n", FCELS_ARGS ( els ) ); + + /* Transmit response */ + if ( ! fc_els_is_request ( els ) ) { + if ( ( rc = fc_els_rtv_tx_response ( els ) ) != 0 ) + return rc; + } + + return 0; +} + +/** + * Detect RTV + * + * @v els Fibre Channel ELS transaction + * @v data ELS frame + * @v len Length of ELS frame + * @ret rc Return status code + */ +static int fc_els_rtv_detect ( struct fc_els *els __unused, const void *data, + size_t len __unused ) { + const struct fc_rtv_request_frame *rtv = data; + + /* Check for RTV */ + if ( rtv->command != FC_ELS_RTV ) + return -EINVAL; + + return 0; +} + +/** RTV ELS handler */ +struct fc_els_handler fc_els_rtv_handler __fc_els_handler = { + .name = "RTV", + .tx = fc_els_unknown_tx, + .rx = fc_els_rtv_rx, + .detect = fc_els_rtv_detect, +}; + +/****************************************************************************** + * + * ECHO + * + ****************************************************************************** + */ + +/** ECHO request data */ +struct fc_echo_request_frame { + /** ECHO frame header */ + struct fc_echo_frame_header echo; + /** Magic marker */ + uint32_t magic; +} __attribute__ (( packed )); + +/** ECHO magic marker */ +#define FC_ECHO_MAGIC 0x69505845 + +/** + * Transmit ECHO + * + * @v els Fibre Channel ELS transaction + * @ret rc Return status code + */ +static int fc_els_echo_tx ( struct fc_els *els ) { + struct fc_echo_request_frame echo; + + /* Construct ECHO */ + memset ( &echo, 0, sizeof ( echo ) ); + echo.echo.command = FC_ELS_ECHO; + echo.magic = htonl ( FC_ECHO_MAGIC ); + + /* Transmit ECHO */ + return fc_els_tx ( els, &echo, sizeof ( echo ) ); +} + +/** + * Receive ECHO request + * + * @v els Fibre Channel ELS transaction + * @v data ELS frame + * @v len Length of ELS frame + * @ret rc Return status code + */ +static int fc_els_echo_rx_request ( struct fc_els *els, void *data, + size_t len ) { + struct { + struct fc_echo_frame_header echo; + char payload[ len - sizeof ( struct fc_echo_frame_header ) ]; + } *echo = data; + int rc; + + DBGC ( els, FCELS_FMT "\n", FCELS_ARGS ( els ) ); + + /* Transmit response */ + echo->echo.command = FC_ELS_LS_ACC; + if ( ( rc = fc_els_tx ( els, echo, sizeof ( *echo ) ) ) != 0 ) + return rc; + + /* Nothing to do */ + return 0; +} + +/** + * Receive ECHO response + * + * @v els Fibre Channel ELS transaction + * @v data ELS frame + * @v len Length of ELS frame + * @ret rc Return status code + */ +static int fc_els_echo_rx_response ( struct fc_els *els, void *data, + size_t len ) { + struct fc_echo_request_frame *echo = data; + + DBGC ( els, FCELS_FMT "\n", FCELS_ARGS ( els ) ); + + /* Check response is correct */ + if ( ( len != sizeof ( *echo ) ) || + ( echo->magic != htonl ( FC_ECHO_MAGIC ) ) ) { + DBGC ( els, FCELS_FMT " received bad echo response\n", + FCELS_ARGS ( els ) ); + DBGC_HDA ( els, 0, data, len ); + return -EIO; + } + + return 0; +} + +/** + * Receive ECHO + * + * @v els Fibre Channel ELS transaction + * @v data ELS frame + * @v len Length of ELS frame + * @ret rc Return status code + */ +static int fc_els_echo_rx ( struct fc_els *els, void *data, size_t len ) { + + if ( fc_els_is_request ( els ) ) { + return fc_els_echo_rx_response ( els, data, len ); + } else { + return fc_els_echo_rx_request ( els, data, len ); + } +} + +/** + * Detect ECHO + * + * @v els Fibre Channel ELS transaction + * @v data ELS frame + * @v len Length of ELS frame + * @ret rc Return status code + */ +static int fc_els_echo_detect ( struct fc_els *els __unused, const void *data, + size_t len __unused ) { + const struct fc_echo_frame_header *echo = data; + + /* Check for ECHO */ + if ( echo->command != FC_ELS_ECHO ) + return -EINVAL; + + return 0; +} + +/** ECHO ELS handler */ +struct fc_els_handler fc_els_echo_handler __fc_els_handler = { + .name = "ECHO", + .tx = fc_els_echo_tx, + .rx = fc_els_echo_rx, + .detect = fc_els_echo_detect, +}; diff --git a/qemu/roms/ipxe/src/net/fcns.c b/qemu/roms/ipxe/src/net/fcns.c new file mode 100644 index 000000000..3ca4ad557 --- /dev/null +++ b/qemu/roms/ipxe/src/net/fcns.c @@ -0,0 +1,241 @@ +/* + * Copyright (C) 2010 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <byteswap.h> +#include <ipxe/interface.h> +#include <ipxe/iobuf.h> +#include <ipxe/process.h> +#include <ipxe/xfer.h> +#include <ipxe/fc.h> +#include <ipxe/fcns.h> + +/** @file + * + * Fibre Channel name server lookups + * + */ + +/** A Fibre Channel name server query */ +struct fc_ns_query { + /** Reference count */ + struct refcnt refcnt; + /** Fibre Channel exchange */ + struct interface xchg; + + /** Fibre Channel peer */ + struct fc_peer *peer; + /** Fibre Channel port */ + struct fc_port *port; + + /** Process */ + struct process process; + /** Success handler + * + * @v peer Fibre Channel peer + * @v port Fibre Channel port + * @v peer_port_id Peer port ID + * @ret rc Return status code + */ + int ( * done ) ( struct fc_peer *peer, struct fc_port *port, + struct fc_port_id *peer_port_id ); +}; + +/** + * Free name server query + * + * @v refcnt Reference count + */ +static void fc_ns_query_free ( struct refcnt *refcnt ) { + struct fc_ns_query *query = + container_of ( refcnt, struct fc_ns_query, refcnt ); + + fc_peer_put ( query->peer ); + fc_port_put ( query->port ); + free ( query ); +} + +/** + * Close name server query + * + * @v query Name server query + * @v rc Reason for close + */ +static void fc_ns_query_close ( struct fc_ns_query *query, int rc ) { + + /* Stop process */ + process_del ( &query->process ); + + /* Shut down interfaces */ + intf_shutdown ( &query->xchg, rc ); +} + +/** + * Receive name server query response + * + * @v query Name server query + * @v iobuf I/O buffer + * @v meta Data transfer metadata + * @ret rc Return status code + */ +static int fc_ns_query_deliver ( struct fc_ns_query *query, + struct io_buffer *iobuf, + struct xfer_metadata *meta __unused ) { + union fc_ns_response *resp = iobuf->data; + struct fc_port_id *peer_port_id; + int rc; + + /* Sanity check */ + if ( iob_len ( iobuf ) < sizeof ( resp->ct ) ) { + DBGC ( query, "FCNS %p received underlength response (%zd " + "bytes)\n", query, iob_len ( iobuf ) ); + rc = -EINVAL; + goto done; + } + + /* Handle response */ + switch ( ntohs ( resp->ct.code ) ) { + case FC_GS_ACCEPT: + if ( iob_len ( iobuf ) < sizeof ( resp->gid_pn ) ) { + DBGC ( query, "FCNS %p received underlength accept " + "response (%zd bytes)\n", + query, iob_len ( iobuf ) ); + rc = -EINVAL; + goto done; + } + peer_port_id = &resp->gid_pn.port_id.port_id; + DBGC ( query, "FCNS %p resolved %s to %s via %s\n", + query, fc_ntoa ( &query->peer->port_wwn ), + fc_id_ntoa ( peer_port_id ), query->port->name ); + if ( ( rc = query->done ( query->peer, query->port, + peer_port_id ) ) != 0 ) + goto done; + break; + case FC_GS_REJECT: + DBGC ( query, "FCNS %p rejected (reason %02x explanation " + "%02x)\n", query, resp->reject.ct.reason, + resp->reject.ct.explanation ); + break; + default: + DBGC ( query, "FCNS %p received invalid response code %04x\n", + query, ntohs ( resp->ct.code ) ); + rc = -ENOTSUP; + goto done; + } + + rc = 0; + done: + free_iob ( iobuf ); + fc_ns_query_close ( query, rc ); + return rc; +} + +/** + * Name server query process + * + * @v query Name server query + */ +static void fc_ns_query_step ( struct fc_ns_query *query ) { + struct xfer_metadata meta; + struct fc_ns_gid_pn_request gid_pn; + int xchg_id; + int rc; + + /* Create exchange */ + if ( ( xchg_id = fc_xchg_originate ( &query->xchg, query->port, + &fc_gs_port_id, + FC_TYPE_CT ) ) < 0 ) { + rc = xchg_id; + DBGC ( query, "FCNS %p could not create exchange: %s\n", + query, strerror ( rc ) ); + fc_ns_query_close ( query, rc ); + return; + } + + /* Construct query request */ + memset ( &gid_pn, 0, sizeof ( gid_pn ) ); + gid_pn.ct.revision = FC_CT_REVISION; + gid_pn.ct.type = FC_GS_TYPE_DS; + gid_pn.ct.subtype = FC_DS_SUBTYPE_NAME; + gid_pn.ct.code = htons ( FC_NS_GET ( FC_NS_PORT_NAME, FC_NS_PORT_ID )); + memcpy ( &gid_pn.port_wwn, &query->peer->port_wwn, + sizeof ( gid_pn.port_wwn ) ); + memset ( &meta, 0, sizeof ( meta ) ); + meta.flags = XFER_FL_OVER; + + /* Send query */ + if ( ( rc = xfer_deliver_raw_meta ( &query->xchg, &gid_pn, + sizeof ( gid_pn ), &meta ) ) != 0){ + DBGC ( query, "FCNS %p could not deliver query: %s\n", + query, strerror ( rc ) ); + fc_ns_query_close ( query, rc ); + return; + } +} + +/** Name server exchange interface operations */ +static struct interface_operation fc_ns_query_xchg_op[] = { + INTF_OP ( xfer_deliver, struct fc_ns_query *, fc_ns_query_deliver ), + INTF_OP ( intf_close, struct fc_ns_query *, fc_ns_query_close ), +}; + +/** Name server exchange interface descriptor */ +static struct interface_descriptor fc_ns_query_xchg_desc = + INTF_DESC ( struct fc_ns_query, xchg, fc_ns_query_xchg_op ); + +/** Name server process descriptor */ +static struct process_descriptor fc_ns_query_process_desc = + PROC_DESC_ONCE ( struct fc_ns_query, process, fc_ns_query_step ); + +/** + * Issue Fibre Channel name server query + * + * @v peer Fibre Channel peer + * @v port Fibre Channel port + * @ret rc Return status code + */ +int fc_ns_query ( struct fc_peer *peer, struct fc_port *port, + int ( * done ) ( struct fc_peer *peer, struct fc_port *port, + struct fc_port_id *peer_port_id ) ) { + struct fc_ns_query *query; + + /* Allocate and initialise structure */ + query = zalloc ( sizeof ( *query ) ); + if ( ! query ) + return -ENOMEM; + ref_init ( &query->refcnt, fc_ns_query_free ); + intf_init ( &query->xchg, &fc_ns_query_xchg_desc, &query->refcnt ); + process_init ( &query->process, &fc_ns_query_process_desc, + &query->refcnt ); + query->peer = fc_peer_get ( peer ); + query->port = fc_port_get ( port ); + query->done = done; + + DBGC ( query, "FCNS %p querying %s via %s\n", + query, fc_ntoa ( &query->peer->port_wwn ), port->name ); + + /* Mortalise self and return */ + ref_put ( &query->refcnt ); + return 0; +} diff --git a/qemu/roms/ipxe/src/net/fcoe.c b/qemu/roms/ipxe/src/net/fcoe.c new file mode 100644 index 000000000..e9e404ec3 --- /dev/null +++ b/qemu/roms/ipxe/src/net/fcoe.c @@ -0,0 +1,1229 @@ +/* + * Copyright (C) 2010 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stddef.h> +#include <stdlib.h> +#include <errno.h> +#include <byteswap.h> +#include <ipxe/if_ether.h> +#include <ipxe/if_arp.h> +#include <ipxe/iobuf.h> +#include <ipxe/interface.h> +#include <ipxe/xfer.h> +#include <ipxe/netdevice.h> +#include <ipxe/ethernet.h> +#include <ipxe/vlan.h> +#include <ipxe/features.h> +#include <ipxe/errortab.h> +#include <ipxe/device.h> +#include <ipxe/crc32.h> +#include <ipxe/retry.h> +#include <ipxe/timer.h> +#include <ipxe/fc.h> +#include <ipxe/fip.h> +#include <ipxe/fcoe.h> + +/** @file + * + * FCoE protocol + * + */ + +FEATURE ( FEATURE_PROTOCOL, "FCoE", DHCP_EB_FEATURE_FCOE, 1 ); + +/* Disambiguate the various error causes */ +#define EINVAL_UNDERLENGTH __einfo_error ( EINFO_EINVAL_UNDERLENGTH ) +#define EINFO_EINVAL_UNDERLENGTH \ + __einfo_uniqify ( EINFO_EINVAL, 0x01, "Underlength packet" ) +#define EINVAL_SOF __einfo_error ( EINFO_EINVAL_SOF ) +#define EINFO_EINVAL_SOF \ + __einfo_uniqify ( EINFO_EINVAL, 0x02, "Invalid SoF delimiter" ) +#define EINVAL_CRC __einfo_error ( EINFO_EINVAL_CRC ) +#define EINFO_EINVAL_CRC \ + __einfo_uniqify ( EINFO_EINVAL, 0x03, "Invalid CRC (not stripped?)" ) +#define EINVAL_EOF __einfo_error ( EINFO_EINVAL_EOF ) +#define EINFO_EINVAL_EOF \ + __einfo_uniqify ( EINFO_EINVAL, 0x04, "Invalid EoF delimiter" ) + +/** An FCoE port */ +struct fcoe_port { + /** Reference count */ + struct refcnt refcnt; + /** List of FCoE ports */ + struct list_head list; + /** Transport interface */ + struct interface transport; + /** Network device */ + struct net_device *netdev; + + /** Node WWN */ + union fcoe_name node_wwn; + /** Port WWN */ + union fcoe_name port_wwn; + + /** FIP retransmission timer */ + struct retry_timer timer; + /** FIP timeout counter */ + unsigned int timeouts; + /** Flags */ + unsigned int flags; + /** FCoE forwarder priority */ + unsigned int priority; + /** Keepalive delay (in ms) */ + unsigned int keepalive; + /** FCoE forwarder MAC address */ + uint8_t fcf_mac[ETH_ALEN]; + /** Local MAC address */ + uint8_t local_mac[ETH_ALEN]; +}; + +/** FCoE flags */ +enum fcoe_flags { + /** Underlying network device is available */ + FCOE_HAVE_NETWORK = 0x0001, + /** We have selected an FCoE forwarder to use */ + FCOE_HAVE_FCF = 0x0002, + /** We have a FIP-capable FCoE forwarder available to be used */ + FCOE_HAVE_FIP_FCF = 0x0004, + /** FCoE forwarder supports server-provided MAC addresses */ + FCOE_FCF_ALLOWS_SPMA = 0x0008, + /** An alternative VLAN has been found */ + FCOE_VLAN_FOUND = 0x0010, + /** VLAN discovery has timed out */ + FCOE_VLAN_TIMED_OUT = 0x0020, +}; + +struct net_protocol fcoe_protocol __net_protocol; +struct net_protocol fip_protocol __net_protocol; + +/** FCoE All-FCoE-MACs address */ +static uint8_t all_fcoe_macs[ETH_ALEN] = + { 0x01, 0x10, 0x18, 0x01, 0x00, 0x00 }; + +/** FCoE All-ENode-MACs address */ +static uint8_t all_enode_macs[ETH_ALEN] = + { 0x01, 0x10, 0x18, 0x01, 0x00, 0x01 }; + +/** FCoE All-FCF-MACs address */ +static uint8_t all_fcf_macs[ETH_ALEN] = + { 0x01, 0x10, 0x18, 0x01, 0x00, 0x02 }; + +/** Default FCoE forwarded MAC address */ +static uint8_t default_fcf_mac[ETH_ALEN] = + { 0x0e, 0xfc, 0x00, 0xff, 0xff, 0xfe }; + +/** Maximum number of VLAN requests before giving up on VLAN discovery */ +#define FCOE_MAX_VLAN_REQUESTS 2 + +/** Delay between retrying VLAN requests */ +#define FCOE_VLAN_RETRY_DELAY ( TICKS_PER_SEC ) + +/** Delay between retrying polling VLAN requests */ +#define FCOE_VLAN_POLL_DELAY ( 30 * TICKS_PER_SEC ) + +/** Maximum number of FIP solicitations before giving up on FIP */ +#define FCOE_MAX_FIP_SOLICITATIONS 2 + +/** Delay between retrying FIP solicitations */ +#define FCOE_FIP_RETRY_DELAY ( TICKS_PER_SEC ) + +/** Maximum number of missing discovery advertisements */ +#define FCOE_MAX_FIP_MISSING_KEEPALIVES 4 + +/** List of FCoE ports */ +static LIST_HEAD ( fcoe_ports ); + +/****************************************************************************** + * + * FCoE protocol + * + ****************************************************************************** + */ + +/** + * Identify FCoE port by network device + * + * @v netdev Network device + * @ret fcoe FCoE port, or NULL + */ +static struct fcoe_port * fcoe_demux ( struct net_device *netdev ) { + struct fcoe_port *fcoe; + + list_for_each_entry ( fcoe, &fcoe_ports, list ) { + if ( fcoe->netdev == netdev ) + return fcoe; + } + return NULL; +} + +/** + * Reset FCoE port + * + * @v fcoe FCoE port + */ +static void fcoe_reset ( struct fcoe_port *fcoe ) { + + /* Detach FC port, if any */ + intf_restart ( &fcoe->transport, -ECANCELED ); + + /* Reset any FIP state */ + stop_timer ( &fcoe->timer ); + fcoe->timeouts = 0; + fcoe->flags = 0; + fcoe->priority = ( FIP_LOWEST_PRIORITY + 1 ); + fcoe->keepalive = 0; + memcpy ( fcoe->fcf_mac, default_fcf_mac, + sizeof ( fcoe->fcf_mac ) ); + memcpy ( fcoe->local_mac, fcoe->netdev->ll_addr, + sizeof ( fcoe->local_mac ) ); + + /* Start FIP solicitation if network is available */ + if ( netdev_is_open ( fcoe->netdev ) && + netdev_link_ok ( fcoe->netdev ) ) { + fcoe->flags |= FCOE_HAVE_NETWORK; + start_timer_nodelay ( &fcoe->timer ); + DBGC ( fcoe, "FCoE %s starting %s\n", fcoe->netdev->name, + ( vlan_can_be_trunk ( fcoe->netdev ) ? + "VLAN discovery" : "FIP solicitation" ) ); + } + + /* Send notification of window change */ + xfer_window_changed ( &fcoe->transport ); +} + +/** + * Transmit FCoE packet + * + * @v fcoe FCoE port + * @v iobuf I/O buffer + * @v meta Data transfer metadata + * @ret rc Return status code + */ +static int fcoe_deliver ( struct fcoe_port *fcoe, + struct io_buffer *iobuf, + struct xfer_metadata *meta __unused ) { + struct fc_frame_header *fchdr = iobuf->data; + struct fc_els_frame_common *els = ( iobuf->data + sizeof ( *fchdr ) ); + struct fcoe_header *fcoehdr; + struct fcoe_footer *fcoeftr; + struct fip_header *fiphdr; + struct fip_login *fipflogi; + struct fip_mac_address *fipmac; + uint32_t crc; + struct net_protocol *net_protocol; + void *ll_source; + int rc; + + /* Send as FIP or FCoE as appropriate */ + if ( ( fchdr->r_ctl == ( FC_R_CTL_ELS | FC_R_CTL_UNSOL_CTRL ) ) && + ( els->command == FC_ELS_FLOGI ) && + ( fcoe->flags & FCOE_HAVE_FIP_FCF ) ) { + + /* Create FIP FLOGI descriptor */ + fipflogi = iob_push ( iobuf, + offsetof ( typeof ( *fipflogi ), fc ) ); + memset ( fipflogi, 0, offsetof ( typeof ( *fipflogi ), fc ) ); + fipflogi->type = FIP_FLOGI; + fipflogi->len = ( iob_len ( iobuf ) / 4 ); + + /* Create FIP MAC address descriptor */ + fipmac = iob_put ( iobuf, sizeof ( *fipmac ) ); + memset ( fipmac, 0, sizeof ( *fipmac ) ); + fipmac->type = FIP_MAC_ADDRESS; + fipmac->len = ( sizeof ( *fipmac ) / 4 ); + if ( fcoe->flags & FCOE_FCF_ALLOWS_SPMA ) { + memcpy ( fipmac->mac, fcoe->netdev->ll_addr, + sizeof ( fipmac->mac ) ); + } + + /* Create FIP header */ + fiphdr = iob_push ( iobuf, sizeof ( *fiphdr ) ); + memset ( fiphdr, 0, sizeof ( *fiphdr ) ); + fiphdr->version = FIP_VERSION; + fiphdr->code = htons ( FIP_CODE_ELS ); + fiphdr->subcode = FIP_ELS_REQUEST; + fiphdr->len = + htons ( ( iob_len ( iobuf ) - sizeof ( *fiphdr ) ) / 4); + fiphdr->flags = ( ( fcoe->flags & FCOE_FCF_ALLOWS_SPMA ) ? + htons ( FIP_SP ) : htons ( FIP_FP ) ); + + /* Send as FIP packet from netdev's own MAC address */ + net_protocol = &fip_protocol; + ll_source = fcoe->netdev->ll_addr; + + } else { + + /* Calculate CRC */ + crc = crc32_le ( ~((uint32_t)0), iobuf->data, + iob_len ( iobuf ) ); + + /* Create FCoE header */ + fcoehdr = iob_push ( iobuf, sizeof ( *fcoehdr ) ); + memset ( fcoehdr, 0, sizeof ( *fcoehdr ) ); + fcoehdr->sof = ( ( fchdr->seq_cnt == ntohs ( 0 ) ) ? + FCOE_SOF_I3 : FCOE_SOF_N3 ); + + /* Create FCoE footer */ + fcoeftr = iob_put ( iobuf, sizeof ( *fcoeftr ) ); + memset ( fcoeftr, 0, sizeof ( *fcoeftr ) ); + fcoeftr->crc = cpu_to_le32 ( crc ^ ~((uint32_t)0) ); + fcoeftr->eof = ( ( fchdr->f_ctl_es & FC_F_CTL_ES_END ) ? + FCOE_EOF_T : FCOE_EOF_N ); + + /* Send as FCoE packet from FCoE MAC address */ + net_protocol = &fcoe_protocol; + ll_source = fcoe->local_mac; + } + + /* Transmit packet */ + if ( ( rc = net_tx ( iob_disown ( iobuf ), fcoe->netdev, net_protocol, + fcoe->fcf_mac, ll_source ) ) != 0 ) { + DBGC ( fcoe, "FCoE %s could not transmit: %s\n", + fcoe->netdev->name, strerror ( rc ) ); + goto done; + } + + done: + free_iob ( iobuf ); + return rc; +} + +/** + * Allocate FCoE I/O buffer + * + * @v len Payload length + * @ret iobuf I/O buffer, or NULL + */ +static struct io_buffer * fcoe_alloc_iob ( struct fcoe_port *fcoe __unused, + size_t len ) { + struct io_buffer *iobuf; + + iobuf = alloc_iob ( MAX_LL_HEADER_LEN + sizeof ( struct fcoe_header ) + + len + sizeof ( struct fcoe_footer ) ); + if ( iobuf ) { + iob_reserve ( iobuf, ( MAX_LL_HEADER_LEN + + sizeof ( struct fcoe_header ) ) ); + } + return iobuf; +} + +/** + * Process incoming FCoE packets + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v ll_dest Link-layer destination address + * @v ll_source Link-layer source address + * @v flags Packet flags + * @ret rc Return status code + */ +static int fcoe_rx ( struct io_buffer *iobuf, struct net_device *netdev, + const void *ll_dest, const void *ll_source, + unsigned int flags __unused ) { + struct fcoe_header *fcoehdr; + struct fcoe_footer *fcoeftr; + struct fcoe_port *fcoe; + int rc; + + /* Identify FCoE port */ + if ( ( fcoe = fcoe_demux ( netdev ) ) == NULL ) { + DBG ( "FCoE received frame for net device %s missing FCoE " + "port\n", netdev->name ); + rc = -ENOTCONN; + goto done; + } + + /* Discard packets not destined for us */ + if ( ( memcmp ( fcoe->local_mac, ll_dest, + sizeof ( fcoe->local_mac ) ) != 0 ) && + ( memcmp ( default_fcf_mac, ll_dest, + sizeof ( default_fcf_mac ) ) != 0 ) ) { + DBGC2 ( fcoe, "FCoE %s ignoring packet for %s\n", + fcoe->netdev->name, eth_ntoa ( ll_dest ) ); + rc = -ENOTCONN; + goto done; + } + + /* Sanity check */ + if ( iob_len ( iobuf ) < ( sizeof ( *fcoehdr ) + sizeof ( *fcoeftr ) )){ + DBGC ( fcoe, "FCoE %s received under-length frame (%zd " + "bytes)\n", fcoe->netdev->name, iob_len ( iobuf ) ); + rc = -EINVAL_UNDERLENGTH; + goto done; + } + + /* Strip header and footer */ + fcoehdr = iobuf->data; + iob_pull ( iobuf, sizeof ( *fcoehdr ) ); + fcoeftr = ( iobuf->data + iob_len ( iobuf ) - sizeof ( *fcoeftr ) ); + iob_unput ( iobuf, sizeof ( *fcoeftr ) ); + + /* Validity checks */ + if ( fcoehdr->version != FCOE_FRAME_VER ) { + DBGC ( fcoe, "FCoE %s received unsupported frame version " + "%02x\n", fcoe->netdev->name, fcoehdr->version ); + rc = -EPROTONOSUPPORT; + goto done; + } + if ( ! ( ( fcoehdr->sof == FCOE_SOF_I3 ) || + ( fcoehdr->sof == FCOE_SOF_N3 ) ) ) { + DBGC ( fcoe, "FCoE %s received unsupported start-of-frame " + "delimiter %02x\n", fcoe->netdev->name, fcoehdr->sof ); + rc = -EINVAL_SOF; + goto done; + } + if ( ( le32_to_cpu ( fcoeftr->crc ) ^ ~((uint32_t)0) ) != + crc32_le ( ~((uint32_t)0), iobuf->data, iob_len ( iobuf ) ) ) { + DBGC ( fcoe, "FCoE %s received invalid CRC\n", + fcoe->netdev->name ); + rc = -EINVAL_CRC; + goto done; + } + if ( ! ( ( fcoeftr->eof == FCOE_EOF_N ) || + ( fcoeftr->eof == FCOE_EOF_T ) ) ) { + DBGC ( fcoe, "FCoE %s received unsupported end-of-frame " + "delimiter %02x\n", fcoe->netdev->name, fcoeftr->eof ); + rc = -EINVAL_EOF; + goto done; + } + + /* Record FCF address if applicable */ + if ( ( fcoe->flags & FCOE_HAVE_FCF ) && + ( ! ( fcoe->flags & FCOE_HAVE_FIP_FCF ) ) ) { + memcpy ( &fcoe->fcf_mac, ll_source, sizeof ( fcoe->fcf_mac ) ); + } + + /* Hand off via transport interface */ + if ( ( rc = xfer_deliver_iob ( &fcoe->transport, + iob_disown ( iobuf ) ) ) != 0 ) { + DBGC ( fcoe, "FCoE %s could not deliver frame: %s\n", + fcoe->netdev->name, strerror ( rc ) ); + goto done; + } + + done: + free_iob ( iobuf ); + return rc; +} + +/** + * Check FCoE flow control window + * + * @v fcoe FCoE port + * @ret len Length of window + */ +static size_t fcoe_window ( struct fcoe_port *fcoe ) { + return ( ( fcoe->flags & FCOE_HAVE_FCF ) ? ~( ( size_t ) 0 ) : 0 ); +} + +/** + * Close FCoE port + * + * @v fcoe FCoE port + * @v rc Reason for close + */ +static void fcoe_close ( struct fcoe_port *fcoe, int rc ) { + + stop_timer ( &fcoe->timer ); + intf_shutdown ( &fcoe->transport, rc ); + netdev_put ( fcoe->netdev ); + list_del ( &fcoe->list ); + ref_put ( &fcoe->refcnt ); +} + +/** + * Identify device underlying FCoE port + * + * @v fcoe FCoE port + * @ret device Underlying device + */ +static struct device * fcoe_identify_device ( struct fcoe_port *fcoe ) { + return fcoe->netdev->dev; +} + +/** FCoE transport interface operations */ +static struct interface_operation fcoe_transport_op[] = { + INTF_OP ( xfer_deliver, struct fcoe_port *, fcoe_deliver ), + INTF_OP ( xfer_alloc_iob, struct fcoe_port *, fcoe_alloc_iob ), + INTF_OP ( xfer_window, struct fcoe_port *, fcoe_window ), + INTF_OP ( intf_close, struct fcoe_port *, fcoe_close ), + INTF_OP ( identify_device, struct fcoe_port *, + fcoe_identify_device ), +}; + +/** FCoE transport interface descriptor */ +static struct interface_descriptor fcoe_transport_desc = + INTF_DESC ( struct fcoe_port, transport, fcoe_transport_op ); + +/****************************************************************************** + * + * FIP protocol + * + ****************************************************************************** + */ + +/** + * Parse FIP packet into descriptor set + * + * @v fcoe FCoE port + * @v fiphdr FIP header + * @v len Length of FIP packet + * @v descs Descriptor set to fill in + * @ret rc Return status code + */ +static int fcoe_fip_parse ( struct fcoe_port *fcoe, struct fip_header *fiphdr, + size_t len, struct fip_descriptors *descs ) { + union fip_descriptor *desc; + size_t descs_len; + size_t desc_len; + size_t desc_offset; + unsigned int desc_type; + + /* Check FIP version */ + if ( fiphdr->version != FIP_VERSION ) { + DBGC ( fcoe, "FCoE %s received unsupported FIP version %02x\n", + fcoe->netdev->name, fiphdr->version ); + return -EINVAL; + } + + /* Check length */ + descs_len = ( ntohs ( fiphdr->len ) * 4 ); + if ( ( sizeof ( *fiphdr ) + descs_len ) > len ) { + DBGC ( fcoe, "FCoE %s received bad descriptor list length\n", + fcoe->netdev->name ); + return -EINVAL; + } + + /* Parse descriptor list */ + memset ( descs, 0, sizeof ( *descs ) ); + for ( desc_offset = 0 ; + desc_offset <= ( descs_len - sizeof ( desc->common ) ) ; + desc_offset += desc_len ) { + + /* Find descriptor and validate length */ + desc = ( ( ( void * ) ( fiphdr + 1 ) ) + desc_offset ); + desc_type = desc->common.type; + desc_len = ( desc->common.len * 4 ); + if ( desc_len == 0 ) { + DBGC ( fcoe, "FCoE %s received zero-length " + "descriptor\n", fcoe->netdev->name ); + return -EINVAL; + } + if ( ( desc_offset + desc_len ) > descs_len ) { + DBGC ( fcoe, "FCoE %s descriptor overrun\n", + fcoe->netdev->name ); + return -EINVAL; + } + + /* Handle descriptors that we understand */ + if ( ( desc_type > FIP_RESERVED ) && + ( desc_type < FIP_NUM_DESCRIPTOR_TYPES ) ) { + /* Use only the first instance of a descriptor */ + if ( descs->desc[desc_type] == NULL ) + descs->desc[desc_type] = desc; + continue; + } + + /* Abort if we cannot understand a critical descriptor */ + if ( FIP_IS_CRITICAL ( desc_type ) ) { + DBGC ( fcoe, "FCoE %s cannot understand critical " + "descriptor type %02x\n", + fcoe->netdev->name, desc_type ); + return -ENOTSUP; + } + + /* Ignore non-critical descriptors that we cannot understand */ + } + + return 0; +} + +/** + * Send FIP VLAN request + * + * @v fcoe FCoE port + * @ret rc Return status code + */ +static int fcoe_fip_tx_vlan ( struct fcoe_port *fcoe ) { + struct io_buffer *iobuf; + struct { + struct fip_header hdr; + struct fip_mac_address mac_address; + } __attribute__ (( packed )) *request; + int rc; + + /* Allocate I/O buffer */ + iobuf = alloc_iob ( MAX_LL_HEADER_LEN + sizeof ( *request ) ); + if ( ! iobuf ) + return -ENOMEM; + iob_reserve ( iobuf, MAX_LL_HEADER_LEN ); + + /* Construct VLAN request */ + request = iob_put ( iobuf, sizeof ( *request ) ); + memset ( request, 0, sizeof ( *request ) ); + request->hdr.version = FIP_VERSION; + request->hdr.code = htons ( FIP_CODE_VLAN ); + request->hdr.subcode = FIP_VLAN_REQUEST; + request->hdr.len = htons ( ( sizeof ( *request ) - + sizeof ( request->hdr ) ) / 4 ); + request->mac_address.type = FIP_MAC_ADDRESS; + request->mac_address.len = + ( sizeof ( request->mac_address ) / 4 ); + memcpy ( request->mac_address.mac, fcoe->netdev->ll_addr, + sizeof ( request->mac_address.mac ) ); + + /* Send VLAN request */ + if ( ( rc = net_tx ( iob_disown ( iobuf ), fcoe->netdev, + &fip_protocol, all_fcf_macs, + fcoe->netdev->ll_addr ) ) != 0 ) { + DBGC ( fcoe, "FCoE %s could not send VLAN request: " + "%s\n", fcoe->netdev->name, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Handle received FIP VLAN notification + * + * @v fcoe FCoE port + * @v descs Descriptor list + * @v flags Flags + * @ret rc Return status code + */ +static int fcoe_fip_rx_vlan ( struct fcoe_port *fcoe, + struct fip_descriptors *descs, + unsigned int flags __unused ) { + struct fip_mac_address *mac_address = fip_mac_address ( descs ); + struct fip_vlan *vlan = fip_vlan ( descs ); + unsigned int tag; + int rc; + + /* Sanity checks */ + if ( ! mac_address ) { + DBGC ( fcoe, "FCoE %s received VLAN notification missing MAC " + "address\n", fcoe->netdev->name ); + return -EINVAL; + } + if ( ! vlan ) { + DBGC ( fcoe, "FCoE %s received VLAN notification missing VLAN " + "tag\n", fcoe->netdev->name ); + return -EINVAL; + } + + /* Create VLAN */ + tag = ntohs ( vlan->vlan ); + DBGC ( fcoe, "FCoE %s creating VLAN %d for FCF %s\n", + fcoe->netdev->name, tag, eth_ntoa ( mac_address->mac ) ); + if ( ( rc = vlan_create ( fcoe->netdev, tag, + FCOE_VLAN_PRIORITY ) ) != 0 ) { + DBGC ( fcoe, "FCoE %s could not create VLAN %d: %s\n", + fcoe->netdev->name, tag, strerror ( rc ) ); + return rc; + } + + /* Record that a VLAN was found. This FCoE port will play no + * further active role; the real FCoE traffic will use the + * port automatically created for the new VLAN device. + */ + fcoe->flags |= FCOE_VLAN_FOUND; + + return 0; +} + +/** + * Send FIP discovery solicitation + * + * @v fcoe FCoE port + * @ret rc Return status code + */ +static int fcoe_fip_tx_solicitation ( struct fcoe_port *fcoe ) { + struct io_buffer *iobuf; + struct { + struct fip_header hdr; + struct fip_mac_address mac_address; + struct fip_name_id name_id; + struct fip_max_fcoe_size max_fcoe_size; + } __attribute__ (( packed )) *solicitation; + int rc; + + /* Allocate I/O buffer */ + iobuf = alloc_iob ( MAX_LL_HEADER_LEN + sizeof ( *solicitation ) ); + if ( ! iobuf ) + return -ENOMEM; + iob_reserve ( iobuf, MAX_LL_HEADER_LEN ); + + /* Construct discovery solicitation */ + solicitation = iob_put ( iobuf, sizeof ( *solicitation ) ); + memset ( solicitation, 0, sizeof ( *solicitation ) ); + solicitation->hdr.version = FIP_VERSION; + solicitation->hdr.code = htons ( FIP_CODE_DISCOVERY ); + solicitation->hdr.subcode = FIP_DISCOVERY_SOLICIT; + solicitation->hdr.len = htons ( ( sizeof ( *solicitation ) - + sizeof ( solicitation->hdr ) ) / 4 ); + solicitation->hdr.flags = htons ( FIP_FP | FIP_SP ); + solicitation->mac_address.type = FIP_MAC_ADDRESS; + solicitation->mac_address.len = + ( sizeof ( solicitation->mac_address ) / 4 ); + memcpy ( solicitation->mac_address.mac, fcoe->netdev->ll_addr, + sizeof ( solicitation->mac_address.mac ) ); + solicitation->name_id.type = FIP_NAME_ID; + solicitation->name_id.len = ( sizeof ( solicitation->name_id ) / 4 ); + memcpy ( &solicitation->name_id.name, &fcoe->node_wwn.fc, + sizeof ( solicitation->name_id.name ) ); + solicitation->max_fcoe_size.type = FIP_MAX_FCOE_SIZE; + solicitation->max_fcoe_size.len = + ( sizeof ( solicitation->max_fcoe_size ) / 4 ); + solicitation->max_fcoe_size.mtu = + htons ( ETH_MAX_MTU - sizeof ( struct fcoe_header ) - + sizeof ( struct fcoe_footer ) ); + + /* Send discovery solicitation */ + if ( ( rc = net_tx ( iob_disown ( iobuf ), fcoe->netdev, + &fip_protocol, all_fcf_macs, + fcoe->netdev->ll_addr ) ) != 0 ) { + DBGC ( fcoe, "FCoE %s could not send discovery solicitation: " + "%s\n", fcoe->netdev->name, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Handle received FIP discovery advertisement + * + * @v fcoe FCoE port + * @v descs Descriptor list + * @v flags Flags + * @ret rc Return status code + */ +static int fcoe_fip_rx_advertisement ( struct fcoe_port *fcoe, + struct fip_descriptors *descs, + unsigned int flags ) { + struct fip_priority *priority = fip_priority ( descs ); + struct fip_mac_address *mac_address = fip_mac_address ( descs ); + struct fip_fka_adv_p *fka_adv_p = fip_fka_adv_p ( descs ); + + /* Sanity checks */ + if ( ! priority ) { + DBGC ( fcoe, "FCoE %s received advertisement missing " + "priority\n", fcoe->netdev->name ); + return -EINVAL; + } + if ( ! mac_address ) { + DBGC ( fcoe, "FCoE %s received advertisement missing MAC " + "address\n", fcoe->netdev->name ); + return -EINVAL; + } + if ( ! fka_adv_p ) { + DBGC ( fcoe, "FCoE %s received advertisement missing FKA ADV " + "period\n", fcoe->netdev->name ); + return -EINVAL; + } + + if ( ! ( fcoe->flags & FCOE_HAVE_FCF ) ) { + + /* We are soliciting for an FCF. Store the highest + * (i.e. lowest-valued) priority solicited + * advertisement that we receive. + */ + if ( ( ( flags & ( FIP_A | FIP_S | FIP_F ) ) == + ( FIP_A | FIP_S | FIP_F ) ) && + ( priority->priority < fcoe->priority ) ) { + + fcoe->flags |= FCOE_HAVE_FIP_FCF; + fcoe->priority = priority->priority; + if ( fka_adv_p->flags & FIP_NO_KEEPALIVE ) { + fcoe->keepalive = 0; + } else { + fcoe->keepalive = ntohl ( fka_adv_p->period ); + } + fcoe->flags &= ~FCOE_FCF_ALLOWS_SPMA; + if ( flags & FIP_SP ) + fcoe->flags |= FCOE_FCF_ALLOWS_SPMA; + memcpy ( fcoe->fcf_mac, mac_address->mac, + sizeof ( fcoe->fcf_mac ) ); + DBGC ( fcoe, "FCoE %s selected FCF %s (pri %d", + fcoe->netdev->name, eth_ntoa ( fcoe->fcf_mac ), + fcoe->priority ); + if ( fcoe->keepalive ) { + DBGC ( fcoe, ", FKA ADV %dms", + fcoe->keepalive ); + } + DBGC ( fcoe, ", %cPMA)\n", + ( ( fcoe->flags & FCOE_FCF_ALLOWS_SPMA ) ? + 'S' : 'F' ) ); + } + + } else if ( fcoe->flags & FCOE_HAVE_FIP_FCF ) { + + /* We are checking that the FCF remains alive. Reset + * the timeout counter if this is an advertisement + * from our forwarder. + */ + if ( memcmp ( fcoe->fcf_mac, mac_address->mac, + sizeof ( fcoe->fcf_mac ) ) == 0 ) { + fcoe->timeouts = 0; + } + + } else { + + /* We are operating in non-FIP mode and have received + * a FIP advertisement. Reset the link in order to + * attempt FIP. + */ + fcoe_reset ( fcoe ); + + } + + return 0; +} + +/** + * Handle received FIP ELS response + * + * @v fcoe FCoE port + * @v descs Descriptor list + * @v flags Flags + * @ret rc Return status code + */ +static int fcoe_fip_rx_els_response ( struct fcoe_port *fcoe, + struct fip_descriptors *descs, + unsigned int flags __unused ) { + struct fip_els *flogi = fip_flogi ( descs ); + struct fip_mac_address *mac_address = fip_mac_address ( descs ); + void *frame; + size_t frame_len; + int rc; + + /* Sanity checks */ + if ( ! flogi ) { + DBGC ( fcoe, "FCoE %s received ELS response missing FLOGI\n", + fcoe->netdev->name ); + return -EINVAL; + } + if ( ! mac_address ) { + DBGC ( fcoe, "FCoE %s received ELS response missing MAC " + "address\n", fcoe->netdev->name ); + return -EINVAL; + } + + /* Record local MAC address */ + memcpy ( fcoe->local_mac, mac_address->mac, sizeof ( fcoe->local_mac )); + DBGC ( fcoe, "FCoE %s using local MAC %s\n", + fcoe->netdev->name, eth_ntoa ( fcoe->local_mac ) ); + + /* Hand off via transport interface */ + frame = &flogi->fc; + frame_len = ( ( flogi->len * 4 ) - offsetof ( typeof ( *flogi ), fc ) ); + if ( ( rc = xfer_deliver_raw ( &fcoe->transport, frame, + frame_len ) ) != 0 ) { + DBGC ( fcoe, "FCoE %s could not deliver FIP FLOGI frame: %s\n", + fcoe->netdev->name, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Send FIP keepalive + * + * @v fcoe FCoE port + * @ret rc Return status code + */ +static int fcoe_fip_tx_keepalive ( struct fcoe_port *fcoe ) { + struct io_buffer *iobuf; + struct { + struct fip_header hdr; + struct fip_mac_address mac_address; + } __attribute__ (( packed )) *keepalive; + int rc; + + /* Allocate I/O buffer */ + iobuf = alloc_iob ( MAX_LL_HEADER_LEN + sizeof ( *keepalive ) ); + if ( ! iobuf ) + return -ENOMEM; + iob_reserve ( iobuf, MAX_LL_HEADER_LEN ); + + /* Construct keepalive */ + keepalive = iob_put ( iobuf, sizeof ( *keepalive ) ); + memset ( keepalive, 0, sizeof ( *keepalive ) ); + keepalive->hdr.version = FIP_VERSION; + keepalive->hdr.code = htons ( FIP_CODE_MAINTAIN ); + keepalive->hdr.subcode = FIP_MAINTAIN_KEEP_ALIVE; + keepalive->hdr.len = htons ( ( sizeof ( *keepalive ) - + sizeof ( keepalive->hdr ) ) / 4 ); + keepalive->mac_address.type = FIP_MAC_ADDRESS; + keepalive->mac_address.len = + ( sizeof ( keepalive->mac_address ) / 4 ); + memcpy ( keepalive->mac_address.mac, fcoe->netdev->ll_addr, + sizeof ( keepalive->mac_address.mac ) ); + + /* Send keepalive */ + if ( ( rc = net_tx ( iob_disown ( iobuf ), fcoe->netdev, + &fip_protocol, fcoe->fcf_mac, + fcoe->netdev->ll_addr ) ) != 0 ) { + DBGC ( fcoe, "FCoE %s could not send keepalive: %s\n", + fcoe->netdev->name, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** A FIP handler */ +struct fip_handler { + /** Protocol code */ + uint16_t code; + /** Protocol subcode */ + uint8_t subcode; + /** + * Receive FIP packet + * + * @v fcoe FCoE port + * @v descs Descriptor list + * @v flags Flags + * @ret rc Return status code + */ + int ( * rx ) ( struct fcoe_port *fcoe, struct fip_descriptors *descs, + unsigned int flags ); +}; + +/** FIP handlers */ +static struct fip_handler fip_handlers[] = { + { FIP_CODE_VLAN, FIP_VLAN_NOTIFY, + fcoe_fip_rx_vlan }, + { FIP_CODE_DISCOVERY, FIP_DISCOVERY_ADVERTISE, + fcoe_fip_rx_advertisement }, + { FIP_CODE_ELS, FIP_ELS_RESPONSE, + fcoe_fip_rx_els_response }, +}; + +/** + * Process incoming FIP packets + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v ll_dest Link-layer destination address + * @v ll_source Link-layer source address + * @v flags Packet flags + * @ret rc Return status code + */ +static int fcoe_fip_rx ( struct io_buffer *iobuf, + struct net_device *netdev, + const void *ll_dest, + const void *ll_source __unused, + unsigned int flags __unused ) { + struct fip_header *fiphdr = iobuf->data; + struct fip_descriptors descs; + struct fip_handler *handler; + struct fcoe_port *fcoe; + unsigned int i; + int rc; + + /* Identify FCoE port */ + if ( ( fcoe = fcoe_demux ( netdev ) ) == NULL ) { + DBG ( "FCoE received FIP frame for net device %s missing FCoE " + "port\n", netdev->name ); + rc = -ENOTCONN; + goto done; + } + + /* Discard packets not destined for us */ + if ( ( memcmp ( fcoe->netdev->ll_addr, ll_dest, ETH_ALEN ) != 0 ) && + ( memcmp ( all_fcoe_macs, ll_dest, + sizeof ( all_fcoe_macs ) ) != 0 ) && + ( memcmp ( all_enode_macs, ll_dest, + sizeof ( all_enode_macs ) ) != 0 ) ) { + DBGC2 ( fcoe, "FCoE %s ignoring FIP packet for %s\n", + fcoe->netdev->name, eth_ntoa ( ll_dest ) ); + rc = -ENOTCONN; + goto done; + } + + /* Parse FIP packet */ + if ( ( rc = fcoe_fip_parse ( fcoe, fiphdr, iob_len ( iobuf ), + &descs ) ) != 0 ) + goto done; + + /* Find a suitable handler */ + for ( i = 0 ; i < ( sizeof ( fip_handlers ) / + sizeof ( fip_handlers[0] ) ) ; i++ ) { + handler = &fip_handlers[i]; + if ( ( handler->code == ntohs ( fiphdr->code ) ) && + ( handler->subcode == fiphdr->subcode ) ) { + rc = handler->rx ( fcoe, &descs, + ntohs ( fiphdr->flags ) ); + goto done; + } + } + DBGC ( fcoe, "FCoE %s received unsupported FIP code %04x.%02x\n", + fcoe->netdev->name, ntohs ( fiphdr->code ), fiphdr->subcode ); + rc = -ENOTSUP; + + done: + free_iob ( iobuf ); + return rc; +} + +/****************************************************************************** + * + * FCoE ports + * + ****************************************************************************** + */ + +/** + * Handle FCoE timer expiry + * + * @v timer FIP timer + * @v over Timer expired + */ +static void fcoe_expired ( struct retry_timer *timer, int over __unused ) { + struct fcoe_port *fcoe = + container_of ( timer, struct fcoe_port, timer ); + int rc; + + /* Sanity check */ + assert ( fcoe->flags & FCOE_HAVE_NETWORK ); + + /* Increment the timeout counter */ + fcoe->timeouts++; + + if ( vlan_can_be_trunk ( fcoe->netdev ) && + ! ( fcoe->flags & FCOE_VLAN_TIMED_OUT ) ) { + + /* If we have already found a VLAN, send infrequent + * VLAN requests, in case VLAN information changes. + */ + if ( fcoe->flags & FCOE_VLAN_FOUND ) { + fcoe->flags &= ~FCOE_VLAN_FOUND; + fcoe->timeouts = 0; + start_timer_fixed ( &fcoe->timer, + FCOE_VLAN_POLL_DELAY ); + fcoe_fip_tx_vlan ( fcoe ); + return; + } + + /* If we have not yet found a VLAN, and we have not + * yet timed out and given up on finding one, then + * send a VLAN request and wait. + */ + if ( fcoe->timeouts <= FCOE_MAX_VLAN_REQUESTS ) { + start_timer_fixed ( &fcoe->timer, + FCOE_VLAN_RETRY_DELAY ); + fcoe_fip_tx_vlan ( fcoe ); + return; + } + + /* We have timed out waiting for a VLAN; proceed to + * FIP discovery. + */ + fcoe->flags |= FCOE_VLAN_TIMED_OUT; + fcoe->timeouts = 0; + DBGC ( fcoe, "FCoE %s giving up on VLAN discovery\n", + fcoe->netdev->name ); + start_timer_nodelay ( &fcoe->timer ); + + } else if ( ! ( fcoe->flags & FCOE_HAVE_FCF ) ) { + + /* If we have not yet found a FIP-capable forwarder, + * and we have not yet timed out and given up on + * finding one, then send a FIP solicitation and wait. + */ + start_timer_fixed ( &fcoe->timer, FCOE_FIP_RETRY_DELAY ); + if ( ( ! ( fcoe->flags & FCOE_HAVE_FIP_FCF ) ) && + ( fcoe->timeouts <= FCOE_MAX_FIP_SOLICITATIONS ) ) { + fcoe_fip_tx_solicitation ( fcoe ); + return; + } + + /* Attach Fibre Channel port */ + if ( ( rc = fc_port_open ( &fcoe->transport, &fcoe->node_wwn.fc, + &fcoe->port_wwn.fc, + fcoe->netdev->name ) ) != 0 ) { + DBGC ( fcoe, "FCoE %s could not create FC port: %s\n", + fcoe->netdev->name, strerror ( rc ) ); + /* We will try again on the next timer expiry */ + return; + } + stop_timer ( &fcoe->timer ); + + /* Either we have found a FIP-capable forwarder, or we + * have timed out and will fall back to pre-FIP mode. + */ + fcoe->flags |= FCOE_HAVE_FCF; + fcoe->timeouts = 0; + DBGC ( fcoe, "FCoE %s using %sFIP FCF %s\n", fcoe->netdev->name, + ( ( fcoe->flags & FCOE_HAVE_FIP_FCF ) ? "" : "non-" ), + eth_ntoa ( fcoe->fcf_mac ) ); + + /* Start sending keepalives if applicable */ + if ( fcoe->keepalive ) + start_timer_nodelay ( &fcoe->timer ); + + /* Send notification of window change */ + xfer_window_changed ( &fcoe->transport ); + + } else { + + /* Send keepalive */ + start_timer_fixed ( &fcoe->timer, + ( ( fcoe->keepalive * TICKS_PER_SEC ) / 1000 ) ); + fcoe_fip_tx_keepalive ( fcoe ); + + /* Abandon FCF if we have not seen its advertisements */ + if ( fcoe->timeouts > FCOE_MAX_FIP_MISSING_KEEPALIVES ) { + DBGC ( fcoe, "FCoE %s abandoning FCF %s\n", + fcoe->netdev->name, eth_ntoa ( fcoe->fcf_mac )); + fcoe_reset ( fcoe ); + } + } +} + +/** + * Create FCoE port + * + * @v netdev Network device + * @ret rc Return status code + */ +static int fcoe_probe ( struct net_device *netdev ) { + struct ll_protocol *ll_protocol = netdev->ll_protocol; + struct fcoe_port *fcoe; + int rc; + + /* Sanity check */ + if ( ll_protocol->ll_proto != htons ( ARPHRD_ETHER ) ) { + /* Not an error; simply skip this net device */ + DBG ( "FCoE skipping non-Ethernet device %s\n", netdev->name ); + rc = 0; + goto err_non_ethernet; + } + + /* Allocate and initialise structure */ + fcoe = zalloc ( sizeof ( *fcoe ) ); + if ( ! fcoe ) { + rc = -ENOMEM; + goto err_zalloc; + } + ref_init ( &fcoe->refcnt, NULL ); + intf_init ( &fcoe->transport, &fcoe_transport_desc, &fcoe->refcnt ); + timer_init ( &fcoe->timer, fcoe_expired, &fcoe->refcnt ); + fcoe->netdev = netdev_get ( netdev ); + + /* Construct node and port names */ + fcoe->node_wwn.fcoe.authority = htons ( FCOE_AUTHORITY_IEEE ); + memcpy ( &fcoe->node_wwn.fcoe.mac, netdev->ll_addr, + sizeof ( fcoe->node_wwn.fcoe.mac ) ); + fcoe->port_wwn.fcoe.authority = htons ( FCOE_AUTHORITY_IEEE_EXTENDED ); + memcpy ( &fcoe->port_wwn.fcoe.mac, netdev->ll_addr, + sizeof ( fcoe->port_wwn.fcoe.mac ) ); + + DBGC ( fcoe, "FCoE %s is %s", fcoe->netdev->name, + fc_ntoa ( &fcoe->node_wwn.fc ) ); + DBGC ( fcoe, " port %s\n", fc_ntoa ( &fcoe->port_wwn.fc ) ); + + /* Transfer reference to port list */ + list_add ( &fcoe->list, &fcoe_ports ); + return 0; + + netdev_put ( fcoe->netdev ); + err_zalloc: + err_non_ethernet: + return rc; +} + +/** + * Handle FCoE port device or link state change + * + * @v netdev Network device + */ +static void fcoe_notify ( struct net_device *netdev ) { + struct fcoe_port *fcoe; + + /* Sanity check */ + if ( ( fcoe = fcoe_demux ( netdev ) ) == NULL ) { + DBG ( "FCoE notification for net device %s missing FCoE " + "port\n", netdev->name ); + return; + } + + /* Reset the FCoE link if necessary */ + if ( ! ( netdev_is_open ( netdev ) && + netdev_link_ok ( netdev ) && + ( fcoe->flags & FCOE_HAVE_NETWORK ) ) ) { + fcoe_reset ( fcoe ); + } +} + +/** + * Destroy FCoE port + * + * @v netdev Network device + */ +static void fcoe_remove ( struct net_device *netdev ) { + struct fcoe_port *fcoe; + + /* Sanity check */ + if ( ( fcoe = fcoe_demux ( netdev ) ) == NULL ) { + DBG ( "FCoE removal of net device %s missing FCoE port\n", + netdev->name ); + return; + } + + /* Close FCoE device */ + fcoe_close ( fcoe, 0 ); +} + +/** FCoE driver */ +struct net_driver fcoe_driver __net_driver = { + .name = "FCoE", + .probe = fcoe_probe, + .notify = fcoe_notify, + .remove = fcoe_remove, +}; + +/** FCoE protocol */ +struct net_protocol fcoe_protocol __net_protocol = { + .name = "FCoE", + .net_proto = htons ( ETH_P_FCOE ), + .rx = fcoe_rx, +}; + +/** FIP protocol */ +struct net_protocol fip_protocol __net_protocol = { + .name = "FIP", + .net_proto = htons ( ETH_P_FIP ), + .rx = fcoe_fip_rx, +}; + +/** Human-readable message for CRC errors + * + * It seems as though several drivers neglect to strip the Ethernet + * CRC, which will cause the FCoE footer to be misplaced and result + * (coincidentally) in an "invalid CRC" error from FCoE. + */ +struct errortab fcoe_errors[] __errortab = { + __einfo_errortab ( EINFO_EINVAL_CRC ), +}; diff --git a/qemu/roms/ipxe/src/net/fcp.c b/qemu/roms/ipxe/src/net/fcp.c new file mode 100644 index 000000000..9c36a4c72 --- /dev/null +++ b/qemu/roms/ipxe/src/net/fcp.c @@ -0,0 +1,1092 @@ +/* + * Copyright (C) 2010 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <stdio.h> +#include <assert.h> +#include <byteswap.h> +#include <ipxe/refcnt.h> +#include <ipxe/list.h> +#include <ipxe/interface.h> +#include <ipxe/xfer.h> +#include <ipxe/iobuf.h> +#include <ipxe/open.h> +#include <ipxe/process.h> +#include <ipxe/uri.h> +#include <ipxe/acpi.h> +#include <ipxe/scsi.h> +#include <ipxe/device.h> +#include <ipxe/edd.h> +#include <ipxe/fc.h> +#include <ipxe/fcels.h> +#include <ipxe/fcp.h> + +/** @file + * + * Fibre Channel Protocol + * + */ + +/* Disambiguate the various error causes */ +#define ERANGE_READ_DATA_ORDERING \ + __einfo_error ( EINFO_ERANGE_READ_DATA_ORDERING ) +#define EINFO_ERANGE_READ_DATA_ORDERING \ + __einfo_uniqify ( EINFO_ERANGE, 0x01, "Read data out of order" ) +#define ERANGE_READ_DATA_OVERRUN \ + __einfo_error ( EINFO_ERANGE_READ_DATA_OVERRUN ) +#define EINFO_ERANGE_READ_DATA_OVERRUN \ + __einfo_uniqify ( EINFO_ERANGE, 0x02, "Read data overrun" ) +#define ERANGE_WRITE_DATA_STUCK \ + __einfo_error ( EINFO_ERANGE_WRITE_DATA_STUCK ) +#define EINFO_ERANGE_WRITE_DATA_STUCK \ + __einfo_uniqify ( EINFO_ERANGE, 0x03, "Write data stuck" ) +#define ERANGE_WRITE_DATA_OVERRUN \ + __einfo_error ( EINFO_ERANGE_WRITE_DATA_OVERRUN ) +#define EINFO_ERANGE_WRITE_DATA_OVERRUN \ + __einfo_uniqify ( EINFO_ERANGE, 0x04, "Write data overrun" ) +#define ERANGE_DATA_UNDERRUN \ + __einfo_error ( EINFO_ERANGE_DATA_UNDERRUN ) +#define EINFO_ERANGE_DATA_UNDERRUN \ + __einfo_uniqify ( EINFO_ERANGE, 0x05, "Data underrun" ) + +/****************************************************************************** + * + * PRLI + * + ****************************************************************************** + */ + +struct fc_els_prli_descriptor fcp_prli_descriptor __fc_els_prli_descriptor; + +/** + * Transmit FCP PRLI + * + * @v els Fibre Channel ELS transaction + * @ret rc Return status code + */ +static int fcp_prli_tx ( struct fc_els *els ) { + struct fcp_prli_service_parameters param; + + /* Build service parameter page */ + memset ( ¶m, 0, sizeof ( param ) ); + param.flags = htonl ( FCP_PRLI_NO_READ_RDY | FCP_PRLI_INITIATOR ); + + return fc_els_prli_tx ( els, &fcp_prli_descriptor, ¶m ); +} + +/** + * Receive FCP PRLI + * + * @v els Fibre Channel ELS transaction + * @v frame ELS frame + * @v len Length of ELS frame + * @ret rc Return status code + */ +static int fcp_prli_rx ( struct fc_els *els, void *data, size_t len ) { + return fc_els_prli_rx ( els, &fcp_prli_descriptor, data, len ); +} + +/** + * Detect FCP PRLI + * + * @v els Fibre Channel ELS transaction + * @v data ELS frame + * @v len Length of ELS frame + * @ret rc Return status code + */ +static int fcp_prli_detect ( struct fc_els *els, const void *data, + size_t len ) { + return fc_els_prli_detect ( els, &fcp_prli_descriptor, data, len ); +} + +/** FCP PRLI ELS handler */ +struct fc_els_handler fcp_prli_handler __fc_els_handler = { + .name = "PRLI-FCP", + .tx = fcp_prli_tx, + .rx = fcp_prli_rx, + .detect = fcp_prli_detect, +}; + +/** FCP PRLI descriptor */ +struct fc_els_prli_descriptor fcp_prli_descriptor __fc_els_prli_descriptor = { + .type = FC_TYPE_FCP, + .param_len = sizeof ( struct fcp_prli_service_parameters ), + .handler = &fcp_prli_handler, +}; + +/****************************************************************************** + * + * FCP devices and commands + * + ****************************************************************************** + */ + +/** An FCP device */ +struct fcp_device { + /** Reference count */ + struct refcnt refcnt; + /** Fibre Channel upper-layer protocol user */ + struct fc_ulp_user user; + /** SCSI command issuing interface */ + struct interface scsi; + /** List of active commands */ + struct list_head fcpcmds; + + /** Fibre Channel WWN (for boot firmware table) */ + struct fc_name wwn; + /** SCSI LUN (for boot firmware table) */ + struct scsi_lun lun; +}; + +/** An FCP command */ +struct fcp_command { + /** Reference count */ + struct refcnt refcnt; + /** FCP SCSI device */ + struct fcp_device *fcpdev; + /** List of active commands */ + struct list_head list; + /** SCSI command interface */ + struct interface scsi; + /** Fibre Channel exchange interface */ + struct interface xchg; + /** Send process */ + struct process process; + /** Send current IU + * + * @v fcpcmd FCP command + * @ret rc Return status code + */ + int ( * send ) ( struct fcp_command *fcpcmd ); + /** SCSI command */ + struct scsi_cmd command; + /** Data offset within command */ + size_t offset; + /** Length of data remaining to be sent within this IU */ + size_t remaining; + /** Exchange ID */ + uint16_t xchg_id; +}; + +/** + * Get reference to FCP device + * + * @v fcpdev FCP device + * @ret fcpdev FCP device + */ +static inline __attribute__ (( always_inline )) struct fcp_device * +fcpdev_get ( struct fcp_device *fcpdev ) { + ref_get ( &fcpdev->refcnt ); + return fcpdev; +} + +/** + * Drop reference to FCP device + * + * @v fcpdev FCP device + */ +static inline __attribute__ (( always_inline )) void +fcpdev_put ( struct fcp_device *fcpdev ) { + ref_put ( &fcpdev->refcnt ); +} + +/** + * Get reference to FCP command + * + * @v fcpcmd FCP command + * @ret fcpcmd FCP command + */ +static inline __attribute__ (( always_inline )) struct fcp_command * +fcpcmd_get ( struct fcp_command *fcpcmd ) { + ref_get ( &fcpcmd->refcnt ); + return fcpcmd; +} + +/** + * Drop reference to FCP command + * + * @v fcpcmd FCP command + */ +static inline __attribute__ (( always_inline )) void +fcpcmd_put ( struct fcp_command *fcpcmd ) { + ref_put ( &fcpcmd->refcnt ); +} + +/** + * Start FCP command sending + * + * @v fcpcmd FCP command + * @v send Send method + */ +static inline __attribute__ (( always_inline )) void +fcpcmd_start_send ( struct fcp_command *fcpcmd, + int ( * send ) ( struct fcp_command *fcpcmd ) ) { + fcpcmd->send = send; + process_add ( &fcpcmd->process ); +} + +/** + * Stop FCP command sending + * + * @v fcpcmd FCP command + */ +static inline __attribute__ (( always_inline )) void +fcpcmd_stop_send ( struct fcp_command *fcpcmd ) { + process_del ( &fcpcmd->process ); +} + +/** + * Free FCP command + * + * @v refcnt Reference count + */ +static void fcpcmd_free ( struct refcnt *refcnt ) { + struct fcp_command *fcpcmd = + container_of ( refcnt, struct fcp_command, refcnt ); + + /* Remove from list of commands */ + list_del ( &fcpcmd->list ); + fcpdev_put ( fcpcmd->fcpdev ); + + /* Free command */ + free ( fcpcmd ); +} + +/** + * Close FCP command + * + * @v fcpcmd FCP command + * @v rc Reason for close + */ +static void fcpcmd_close ( struct fcp_command *fcpcmd, int rc ) { + struct fcp_device *fcpdev = fcpcmd->fcpdev; + + if ( rc != 0 ) { + DBGC ( fcpdev, "FCP %p xchg %04x closed: %s\n", + fcpdev, fcpcmd->xchg_id, strerror ( rc ) ); + } + + /* Stop sending */ + fcpcmd_stop_send ( fcpcmd ); + + /* Shut down interfaces */ + intf_shutdown ( &fcpcmd->scsi, rc ); + intf_shutdown ( &fcpcmd->xchg, rc ); +} + +/** + * Close FCP command in error + * + * @v fcpcmd FCP command + * @v rc Reason for close + */ +static void fcpcmd_close_err ( struct fcp_command *fcpcmd, int rc ) { + if ( rc == 0 ) + rc = -EPIPE; + fcpcmd_close ( fcpcmd, rc ); +} + +/** + * Send FCP command IU + * + * @v fcpcmd FCP command + * @ret rc Return status code + */ +static int fcpcmd_send_cmnd ( struct fcp_command *fcpcmd ) { + struct fcp_device *fcpdev = fcpcmd->fcpdev; + struct scsi_cmd *command = &fcpcmd->command; + struct io_buffer *iobuf; + struct fcp_cmnd *cmnd; + struct xfer_metadata meta; + int rc; + + /* Sanity check */ + if ( command->data_in_len && command->data_out_len ) { + DBGC ( fcpdev, "FCP %p xchg %04x cannot handle bidirectional " + "command\n", fcpdev, fcpcmd->xchg_id ); + return -ENOTSUP; + } + + /* Allocate I/O buffer */ + iobuf = xfer_alloc_iob ( &fcpcmd->xchg, sizeof ( *cmnd ) ); + if ( ! iobuf ) { + DBGC ( fcpdev, "FCP %p xchg %04x cannot allocate command IU\n", + fcpdev, fcpcmd->xchg_id ); + return -ENOMEM; + } + + /* Construct command IU frame */ + cmnd = iob_put ( iobuf, sizeof ( *cmnd ) ); + memset ( cmnd, 0, sizeof ( *cmnd ) ); + memcpy ( &cmnd->lun, &command->lun, sizeof ( cmnd->lun ) ); + assert ( ! ( command->data_in_len && command->data_out_len ) ); + if ( command->data_in_len ) + cmnd->dirn |= FCP_CMND_RDDATA; + if ( command->data_out_len ) + cmnd->dirn |= FCP_CMND_WRDATA; + memcpy ( &cmnd->cdb, &fcpcmd->command.cdb, sizeof ( cmnd->cdb ) ); + cmnd->len = htonl ( command->data_in_len + command->data_out_len ); + memset ( &meta, 0, sizeof ( meta ) ); + meta.flags = ( XFER_FL_CMD_STAT | XFER_FL_OVER ); + DBGC2 ( fcpdev, "FCP %p xchg %04x CMND " SCSI_CDB_FORMAT " %04x\n", + fcpdev, fcpcmd->xchg_id, SCSI_CDB_DATA ( cmnd->cdb ), + ntohl ( cmnd->len ) ); + + /* No further data to send within this IU */ + fcpcmd_stop_send ( fcpcmd ); + + /* Send command IU frame */ + if ( ( rc = xfer_deliver ( &fcpcmd->xchg, iob_disown ( iobuf ), + &meta ) ) != 0 ) { + DBGC ( fcpdev, "FCP %p xchg %04x cannot deliver command IU: " + "%s\n", fcpdev, fcpcmd->xchg_id, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Handle FCP read data IU + * + * @v fcpcmd FCP command + * @v iobuf I/O buffer + * @v meta Data transfer metadata + * @ret rc Return status code + */ +static int fcpcmd_recv_rddata ( struct fcp_command *fcpcmd, + struct io_buffer *iobuf, + struct xfer_metadata *meta ) { + struct fcp_device *fcpdev = fcpcmd->fcpdev; + struct scsi_cmd *command = &fcpcmd->command; + size_t offset = meta->offset; + size_t len = iob_len ( iobuf ); + int rc; + + /* Sanity checks */ + if ( ! ( meta->flags & XFER_FL_ABS_OFFSET ) ) { + DBGC ( fcpdev, "FCP %p xchg %04x read data missing offset\n", + fcpdev, fcpcmd->xchg_id ); + rc = -ERANGE_READ_DATA_ORDERING; + goto done; + } + if ( offset != fcpcmd->offset ) { + DBGC ( fcpdev, "FCP %p xchg %04x read data out of order " + "(expected %zd, received %zd)\n", + fcpdev, fcpcmd->xchg_id, fcpcmd->offset, offset ); + rc = -ERANGE_READ_DATA_ORDERING; + goto done; + } + if ( ( offset + len ) > command->data_in_len ) { + DBGC ( fcpdev, "FCP %p xchg %04x read data overrun (max %zd, " + "received %zd)\n", fcpdev, fcpcmd->xchg_id, + command->data_in_len, ( offset + len ) ); + rc = -ERANGE_READ_DATA_OVERRUN; + goto done; + } + DBGC2 ( fcpdev, "FCP %p xchg %04x RDDATA [%08zx,%08zx)\n", + fcpdev, fcpcmd->xchg_id, offset, ( offset + len ) ); + + /* Copy to user buffer */ + copy_to_user ( command->data_in, offset, iobuf->data, len ); + fcpcmd->offset += len; + assert ( fcpcmd->offset <= command->data_in_len ); + + rc = 0; + done: + free_iob ( iobuf ); + return rc; +} + +/** + * Send FCP write data IU + * + * @v fcpcmd FCP command + * @ret rc Return status code + */ +static int fcpcmd_send_wrdata ( struct fcp_command *fcpcmd ) { + struct fcp_device *fcpdev = fcpcmd->fcpdev; + struct scsi_cmd *command = &fcpcmd->command; + struct io_buffer *iobuf; + struct xfer_metadata meta; + size_t len; + int rc; + + /* Calculate length to be sent */ + len = xfer_window ( &fcpcmd->xchg ); + if ( len > fcpcmd->remaining ) + len = fcpcmd->remaining; + + /* Sanity checks */ + if ( len == 0 ) { + DBGC ( fcpdev, "FCP %p xchg %04x write data stuck\n", + fcpdev, fcpcmd->xchg_id ); + return -ERANGE_WRITE_DATA_STUCK; + } + if ( ( fcpcmd->offset + len ) > command->data_out_len ) { + DBGC ( fcpdev, "FCP %p xchg %04x write data overrun (max %zd, " + "requested %zd)\n", fcpdev, fcpcmd->xchg_id, + command->data_out_len, ( fcpcmd->offset + len ) ); + return -ERANGE_WRITE_DATA_OVERRUN; + } + + /* Allocate I/O buffer */ + iobuf = xfer_alloc_iob ( &fcpcmd->xchg, len ); + if ( ! iobuf ) { + DBGC ( fcpdev, "FCP %p xchg %04x cannot allocate write data " + "IU for %zd bytes\n", fcpdev, fcpcmd->xchg_id, len ); + return -ENOMEM; + } + + /* Construct data IU frame */ + copy_from_user ( iob_put ( iobuf, len ), command->data_out, + fcpcmd->offset, len ); + memset ( &meta, 0, sizeof ( meta ) ); + meta.flags = ( XFER_FL_RESPONSE | XFER_FL_ABS_OFFSET ); + meta.offset = fcpcmd->offset; + DBGC2 ( fcpdev, "FCP %p xchg %04x WRDATA [%08zx,%04zx)\n", + fcpdev, fcpcmd->xchg_id, fcpcmd->offset, + ( fcpcmd->offset + iob_len ( iobuf ) ) ); + + /* Calculate amount of data remaining to be sent within this IU */ + assert ( len <= fcpcmd->remaining ); + fcpcmd->offset += len; + fcpcmd->remaining -= len; + assert ( fcpcmd->offset <= command->data_out_len ); + if ( fcpcmd->remaining == 0 ) { + fcpcmd_stop_send ( fcpcmd ); + meta.flags |= XFER_FL_OVER; + } + + /* Send data IU frame */ + if ( ( rc = xfer_deliver ( &fcpcmd->xchg, iob_disown ( iobuf ), + &meta ) ) != 0 ) { + DBGC ( fcpdev, "FCP %p xchg %04x cannot deliver write data " + "IU: %s\n", fcpdev, fcpcmd->xchg_id, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Handle FCP transfer ready IU + * + * @v fcpcmd FCP command + * @v iobuf I/O buffer + * @v meta Data transfer metadata + * @ret rc Return status code + */ +static int fcpcmd_recv_xfer_rdy ( struct fcp_command *fcpcmd, + struct io_buffer *iobuf, + struct xfer_metadata *meta __unused ) { + struct fcp_device *fcpdev = fcpcmd->fcpdev; + struct fcp_xfer_rdy *xfer_rdy = iobuf->data; + int rc; + + /* Sanity checks */ + if ( iob_len ( iobuf ) != sizeof ( *xfer_rdy ) ) { + DBGC ( fcpdev, "FCP %p xchg %04x received invalid transfer " + "ready IU:\n", fcpdev, fcpcmd->xchg_id ); + DBGC_HDA ( fcpdev, 0, iobuf->data, iob_len ( iobuf ) ); + rc = -EPROTO; + goto done; + } + if ( ntohl ( xfer_rdy->offset ) != fcpcmd->offset ) { + /* We do not advertise out-of-order delivery */ + DBGC ( fcpdev, "FCP %p xchg %04x cannot support out-of-order " + "delivery (expected %zd, requested %d)\n", + fcpdev, fcpcmd->xchg_id, fcpcmd->offset, + ntohl ( xfer_rdy->offset ) ); + rc = -EPROTO; + goto done; + } + DBGC2 ( fcpdev, "FCP %p xchg %04x XFER_RDY [%08x,%08x)\n", + fcpdev, fcpcmd->xchg_id, ntohl ( xfer_rdy->offset ), + ( ntohl ( xfer_rdy->offset ) + ntohl ( xfer_rdy->len ) ) ); + + /* Start sending requested data */ + fcpcmd->remaining = ntohl ( xfer_rdy->len ); + fcpcmd_start_send ( fcpcmd, fcpcmd_send_wrdata ); + + rc = 0; + done: + free_iob ( iobuf ); + return rc; +} + +/** + * Handle FCP response IU + * + * @v fcpcmd FCP command + * @v iobuf I/O buffer + * @v meta Data transfer metadata + * @ret rc Return status code + */ +static int fcpcmd_recv_rsp ( struct fcp_command *fcpcmd, + struct io_buffer *iobuf, + struct xfer_metadata *meta __unused ) { + struct fcp_device *fcpdev = fcpcmd->fcpdev; + struct scsi_cmd *command = &fcpcmd->command; + struct fcp_rsp *rsp = iobuf->data; + struct scsi_rsp response; + int rc; + + /* Sanity check */ + if ( ( iob_len ( iobuf ) < sizeof ( *rsp ) ) || + ( iob_len ( iobuf ) < ( sizeof ( *rsp ) + + fcp_rsp_response_data_len ( rsp ) + + fcp_rsp_sense_data_len ( rsp ) ) ) ) { + DBGC ( fcpdev, "FCP %p xchg %04x received invalid response " + "IU:\n", fcpdev, fcpcmd->xchg_id ); + DBGC_HDA ( fcpdev, 0, iobuf->data, iob_len ( iobuf ) ); + rc = -EPROTO; + goto done; + } + DBGC2 ( fcpdev, "FCP %p xchg %04x RSP stat %02x resid %08x flags %02x" + "%s%s%s%s\n", fcpdev, fcpcmd->xchg_id, rsp->status, + ntohl ( rsp->residual ), rsp->flags, + ( ( rsp->flags & FCP_RSP_RESPONSE_LEN_VALID ) ? " resp" : "" ), + ( ( rsp->flags & FCP_RSP_SENSE_LEN_VALID ) ? " sense" : "" ), + ( ( rsp->flags & FCP_RSP_RESIDUAL_OVERRUN ) ? " over" : "" ), + ( ( rsp->flags & FCP_RSP_RESIDUAL_UNDERRUN ) ? " under" : "" )); + if ( fcp_rsp_response_data ( rsp ) ) { + DBGC2 ( fcpdev, "FCP %p xchg %04x response data:\n", + fcpdev, fcpcmd->xchg_id ); + DBGC2_HDA ( fcpdev, 0, fcp_rsp_response_data ( rsp ), + fcp_rsp_response_data_len ( rsp ) ); + } + if ( fcp_rsp_sense_data ( rsp ) ) { + DBGC2 ( fcpdev, "FCP %p xchg %04x sense data:\n", + fcpdev, fcpcmd->xchg_id ); + DBGC2_HDA ( fcpdev, 0, fcp_rsp_sense_data ( rsp ), + fcp_rsp_sense_data_len ( rsp ) ); + } + + /* Check for locally-detected command underrun */ + if ( ( rsp->status == 0 ) && + ( fcpcmd->offset != ( command->data_in_len + + command->data_out_len ) ) ) { + DBGC ( fcpdev, "FCP %p xchg %04x data underrun (expected %zd, " + "got %zd)\n", fcpdev, fcpcmd->xchg_id, + ( command->data_in_len + command->data_out_len ), + fcpcmd->offset ); + rc = -ERANGE_DATA_UNDERRUN; + goto done; + } + + /* Build SCSI response */ + memset ( &response, 0, sizeof ( response ) ); + response.status = rsp->status; + if ( rsp->flags & ( FCP_RSP_RESIDUAL_OVERRUN | + FCP_RSP_RESIDUAL_UNDERRUN ) ) { + response.overrun = ntohl ( rsp->residual ); + if ( rsp->flags & FCP_RSP_RESIDUAL_UNDERRUN ) + response.overrun = -response.overrun; + } + scsi_parse_sense ( fcp_rsp_sense_data ( rsp ), + fcp_rsp_sense_data_len ( rsp ), &response.sense ); + + /* Free buffer before sending response, to minimise + * out-of-memory errors. + */ + free_iob ( iob_disown ( iobuf ) ); + + /* Send SCSI response */ + scsi_response ( &fcpcmd->scsi, &response ); + + /* Terminate command */ + fcpcmd_close ( fcpcmd, 0 ); + + rc = 0; + done: + free_iob ( iobuf ); + return rc; +} + +/** + * Handle unknown FCP IU + * + * @v fcpcmd FCP command + * @v iobuf I/O buffer + * @v meta Data transfer metadata + * @ret rc Return status code + */ +static int fcpcmd_recv_unknown ( struct fcp_command *fcpcmd, + struct io_buffer *iobuf, + struct xfer_metadata *meta __unused ) { + struct fcp_device *fcpdev = fcpcmd->fcpdev; + + DBGC ( fcpdev, "FCP %p xchg %04x received unknown IU:\n", + fcpdev, fcpcmd->xchg_id ); + DBGC_HDA ( fcpdev, 0, iobuf->data, iob_len ( iobuf ) ); + free_iob ( iobuf ); + return -EPROTO; +} + +/** + * Transmit FCP frame + * + * @v fcpcmd FCP command + */ +static void fcpcmd_step ( struct fcp_command *fcpcmd ) { + int rc; + + /* Send the current IU */ + if ( ( rc = fcpcmd->send ( fcpcmd ) ) != 0 ) { + /* Treat failure as a fatal error */ + fcpcmd_close ( fcpcmd, rc ); + } +} + +/** + * Receive FCP frame + * + * @v fcpcmd FCP command + * @v iobuf I/O buffer + * @v meta Data transfer metadata + * @ret rc Return status code + */ +static int fcpcmd_deliver ( struct fcp_command *fcpcmd, + struct io_buffer *iobuf, + struct xfer_metadata *meta ) { + int ( * fcpcmd_recv ) ( struct fcp_command *fcpcmd, + struct io_buffer *iobuf, + struct xfer_metadata *meta ); + int rc; + + /* Determine handler */ + switch ( meta->flags & ( XFER_FL_CMD_STAT | XFER_FL_RESPONSE ) ) { + case ( XFER_FL_RESPONSE ) : + fcpcmd_recv = fcpcmd_recv_rddata; + break; + case ( XFER_FL_CMD_STAT ) : + fcpcmd_recv = fcpcmd_recv_xfer_rdy; + break; + case ( XFER_FL_CMD_STAT | XFER_FL_RESPONSE ) : + fcpcmd_recv = fcpcmd_recv_rsp; + break; + default: + fcpcmd_recv = fcpcmd_recv_unknown; + break; + } + + /* Handle IU */ + if ( ( rc = fcpcmd_recv ( fcpcmd, iob_disown ( iobuf ), meta ) ) != 0 ){ + /* Treat any error as fatal to the command */ + fcpcmd_close ( fcpcmd, rc ); + } + + return rc; +} + +/** FCP command SCSI interface operations */ +static struct interface_operation fcpcmd_scsi_op[] = { + INTF_OP ( intf_close, struct fcp_command *, fcpcmd_close ), +}; + +/** FCP command SCSI interface descriptor */ +static struct interface_descriptor fcpcmd_scsi_desc = + INTF_DESC_PASSTHRU ( struct fcp_command, scsi, fcpcmd_scsi_op, xchg ); + +/** FCP command Fibre Channel exchange interface operations */ +static struct interface_operation fcpcmd_xchg_op[] = { + INTF_OP ( xfer_deliver, struct fcp_command *, fcpcmd_deliver ), + INTF_OP ( intf_close, struct fcp_command *, fcpcmd_close_err ), +}; + +/** FCP command Fibre Channel exchange interface descriptor */ +static struct interface_descriptor fcpcmd_xchg_desc = + INTF_DESC_PASSTHRU ( struct fcp_command, xchg, fcpcmd_xchg_op, scsi ); + +/** FCP command process descriptor */ +static struct process_descriptor fcpcmd_process_desc = + PROC_DESC ( struct fcp_command, process, fcpcmd_step ); + +/** + * Issue FCP SCSI command + * + * @v fcpdev FCP device + * @v parent Parent interface + * @v command SCSI command + * @ret tag Command tag, or negative error + */ +static int fcpdev_scsi_command ( struct fcp_device *fcpdev, + struct interface *parent, + struct scsi_cmd *command ) { + struct fcp_prli_service_parameters *param = fcpdev->user.ulp->param; + struct fcp_command *fcpcmd; + int xchg_id; + int rc; + + /* Check link */ + if ( ( rc = fcpdev->user.ulp->link.rc ) != 0 ) { + DBGC ( fcpdev, "FCP %p could not issue command while link is " + "down: %s\n", fcpdev, strerror ( rc ) ); + goto err_link; + } + + /* Check target capability */ + assert ( param != NULL ); + assert ( fcpdev->user.ulp->param_len >= sizeof ( *param ) ); + if ( ! ( param->flags & htonl ( FCP_PRLI_TARGET ) ) ) { + DBGC ( fcpdev, "FCP %p could not issue command: not a target\n", + fcpdev ); + rc = -ENOTTY; + goto err_target; + } + + /* Allocate and initialise structure */ + fcpcmd = zalloc ( sizeof ( *fcpcmd ) ); + if ( ! fcpcmd ) { + rc = -ENOMEM; + goto err_zalloc; + } + ref_init ( &fcpcmd->refcnt, fcpcmd_free ); + intf_init ( &fcpcmd->scsi, &fcpcmd_scsi_desc, &fcpcmd->refcnt ); + intf_init ( &fcpcmd->xchg, &fcpcmd_xchg_desc, &fcpcmd->refcnt ); + process_init_stopped ( &fcpcmd->process, &fcpcmd_process_desc, + &fcpcmd->refcnt ); + fcpcmd->fcpdev = fcpdev_get ( fcpdev ); + list_add ( &fcpcmd->list, &fcpdev->fcpcmds ); + memcpy ( &fcpcmd->command, command, sizeof ( fcpcmd->command ) ); + + /* Create new exchange */ + if ( ( xchg_id = fc_xchg_originate ( &fcpcmd->xchg, + fcpdev->user.ulp->peer->port, + &fcpdev->user.ulp->peer->port_id, + FC_TYPE_FCP ) ) < 0 ) { + rc = xchg_id; + DBGC ( fcpdev, "FCP %p could not create exchange: %s\n", + fcpdev, strerror ( rc ) ); + goto err_xchg_originate; + } + fcpcmd->xchg_id = xchg_id; + + /* Start sending command IU */ + fcpcmd_start_send ( fcpcmd, fcpcmd_send_cmnd ); + + /* Attach to parent interface, mortalise self, and return */ + intf_plug_plug ( &fcpcmd->scsi, parent ); + ref_put ( &fcpcmd->refcnt ); + return ( FCP_TAG_MAGIC | fcpcmd->xchg_id ); + + err_xchg_originate: + fcpcmd_close ( fcpcmd, rc ); + ref_put ( &fcpcmd->refcnt ); + err_zalloc: + err_target: + err_link: + return rc; +} + +/** + * Close FCP device + * + * @v fcpdev FCP device + * @v rc Reason for close + */ +static void fcpdev_close ( struct fcp_device *fcpdev, int rc ) { + struct fcp_command *fcpcmd; + struct fcp_command *tmp; + + DBGC ( fcpdev, "FCP %p closed: %s\n", fcpdev, strerror ( rc ) ); + + /* Shut down interfaces */ + intf_shutdown ( &fcpdev->scsi, rc ); + + /* Shut down any active commands */ + list_for_each_entry_safe ( fcpcmd, tmp, &fcpdev->fcpcmds, list ) { + fcpcmd_get ( fcpcmd ); + fcpcmd_close ( fcpcmd, rc ); + fcpcmd_put ( fcpcmd ); + } + + /* Drop reference to ULP */ + fc_ulp_detach ( &fcpdev->user ); +} + +/** + * Check FCP device flow-control window + * + * @v fcpdev FCP device + * @ret len Length of window + */ +static size_t fcpdev_window ( struct fcp_device *fcpdev ) { + return ( fc_link_ok ( &fcpdev->user.ulp->link ) ? + ~( ( size_t ) 0 ) : 0 ); +} + +/** + * Describe FCP device in an ACPI table + * + * @v fcpdev FCP device + * @v acpi ACPI table + * @v len Length of ACPI table + * @ret rc Return status code + */ +static int fcpdev_acpi_describe ( struct fcp_device *fcpdev, + struct acpi_description_header *acpi, + size_t len ) { + + DBGC ( fcpdev, "FCP %p cannot yet describe device in an ACPI table\n", + fcpdev ); + ( void ) acpi; + ( void ) len; + return 0; +} + +/** + * Describe FCP device using EDD + * + * @v fcpdev FCP device + * @v type EDD interface type + * @v path EDD device path + * @ret rc Return status code + */ +static int fcpdev_edd_describe ( struct fcp_device *fcpdev, + struct edd_interface_type *type, + union edd_device_path *path ) { + union { + struct fc_name fc; + uint64_t u64; + } wwn; + union { + struct scsi_lun scsi; + uint64_t u64; + } lun; + + type->type = cpu_to_le64 ( EDD_INTF_TYPE_FIBRE ); + memcpy ( &wwn.fc, &fcpdev->wwn, sizeof ( wwn.fc ) ); + path->fibre.wwn = be64_to_cpu ( wwn.u64 ); + memcpy ( &lun.scsi, &fcpdev->lun, sizeof ( lun.scsi ) ); + path->fibre.lun = be64_to_cpu ( lun.u64 ); + return 0; +} + +/** + * Identify device underlying FCP device + * + * @v fcpdev FCP device + * @ret device Underlying device + */ +static struct device * fcpdev_identify_device ( struct fcp_device *fcpdev ) { + + /* We know the underlying device only if the link is up; + * otherwise we don't have a port to examine. + */ + if ( ! fc_link_ok ( &fcpdev->user.ulp->link ) ) { + DBGC ( fcpdev, "FCP %p doesn't know underlying device " + "until link is up\n", fcpdev ); + return NULL; + } + + /* Hand off to port's transport interface */ + assert ( fcpdev->user.ulp->peer->port != NULL ); + return identify_device ( &fcpdev->user.ulp->peer->port->transport ); +} + +/** FCP device SCSI interface operations */ +static struct interface_operation fcpdev_scsi_op[] = { + INTF_OP ( scsi_command, struct fcp_device *, fcpdev_scsi_command ), + INTF_OP ( xfer_window, struct fcp_device *, fcpdev_window ), + INTF_OP ( intf_close, struct fcp_device *, fcpdev_close ), + INTF_OP ( acpi_describe, struct fcp_device *, fcpdev_acpi_describe ), + INTF_OP ( edd_describe, struct fcp_device *, fcpdev_edd_describe ), + INTF_OP ( identify_device, struct fcp_device *, + fcpdev_identify_device ), +}; + +/** FCP device SCSI interface descriptor */ +static struct interface_descriptor fcpdev_scsi_desc = + INTF_DESC ( struct fcp_device, scsi, fcpdev_scsi_op ); + +/** + * Examine FCP ULP link state + * + * @v user Fibre Channel upper-layer protocol user + */ +static void fcpdev_examine ( struct fc_ulp_user *user ) { + struct fcp_device *fcpdev = + container_of ( user, struct fcp_device, user ); + + if ( fc_link_ok ( &fcpdev->user.ulp->link ) ) { + DBGC ( fcpdev, "FCP %p link is up\n", fcpdev ); + } else { + DBGC ( fcpdev, "FCP %p link is down: %s\n", + fcpdev, strerror ( fcpdev->user.ulp->link.rc ) ); + } + + /* Notify SCSI layer of window change */ + xfer_window_changed ( &fcpdev->scsi ); +} + +/** + * Open FCP device + * + * @v parent Parent interface + * @v wwn Fibre Channel WWN + * @v lun SCSI LUN + * @ret rc Return status code + */ +static int fcpdev_open ( struct interface *parent, struct fc_name *wwn, + struct scsi_lun *lun ) { + struct fc_ulp *ulp; + struct fcp_device *fcpdev; + int rc; + + /* Get Fibre Channel ULP interface */ + ulp = fc_ulp_get_wwn_type ( wwn, FC_TYPE_FCP ); + if ( ! ulp ) { + rc = -ENOMEM; + goto err_ulp_get; + } + + /* Allocate and initialise structure */ + fcpdev = zalloc ( sizeof ( *fcpdev ) ); + if ( ! fcpdev ) { + rc = -ENOMEM; + goto err_zalloc; + } + ref_init ( &fcpdev->refcnt, NULL ); + intf_init ( &fcpdev->scsi, &fcpdev_scsi_desc, &fcpdev->refcnt ); + INIT_LIST_HEAD ( &fcpdev->fcpcmds ); + fc_ulp_user_init ( &fcpdev->user, fcpdev_examine, &fcpdev->refcnt ); + + DBGC ( fcpdev, "FCP %p opened for %s\n", fcpdev, fc_ntoa ( wwn ) ); + + /* Attach to Fibre Channel ULP */ + fc_ulp_attach ( ulp, &fcpdev->user ); + + /* Preserve parameters required for boot firmware table */ + memcpy ( &fcpdev->wwn, wwn, sizeof ( fcpdev->wwn ) ); + memcpy ( &fcpdev->lun, lun, sizeof ( fcpdev->lun ) ); + + /* Attach SCSI device to parent interface */ + if ( ( rc = scsi_open ( parent, &fcpdev->scsi, lun ) ) != 0 ) { + DBGC ( fcpdev, "FCP %p could not create SCSI device: %s\n", + fcpdev, strerror ( rc ) ); + goto err_scsi_open; + } + + /* Drop temporary reference to ULP */ + fc_ulp_put ( ulp ); + + /* Mortalise self and return */ + ref_put ( &fcpdev->refcnt ); + return 0; + + err_scsi_open: + fcpdev_close ( fcpdev, rc ); + ref_put ( &fcpdev->refcnt ); + err_zalloc: + fc_ulp_put ( ulp ); + err_ulp_get: + return rc; +} + +/****************************************************************************** + * + * FCP URIs + * + ****************************************************************************** + */ + +/** + * Parse FCP URI + * + * @v uri URI + * @ret wwn Fibre Channel WWN + * @ret lun SCSI LUN + * @ret rc Return status code + * + * An FCP URI has the form "fcp:<wwn>:<lun>" or "fcp://<wwn>/<lun>" + */ +static int fcp_parse_uri ( struct uri *uri, struct fc_name *wwn, + struct scsi_lun *lun ) { + char wwn_buf[ FC_NAME_STRLEN + 1 /* NUL */ ]; + const char *wwn_text; + const char *lun_text; + int rc; + + /* Extract WWN and LUN texts from URI */ + if ( uri->opaque ) { + /* "fcp:<wwn>:<lun>" */ + if ( snprintf ( wwn_buf, sizeof ( wwn_buf ), "%s", + uri->opaque ) < ( FC_NAME_STRLEN + 1 /* : */ ) ) + return -EINVAL; + if ( uri->opaque[FC_NAME_STRLEN] != ':' ) + return -EINVAL; + wwn_text = wwn_buf; + lun_text = &uri->opaque[FC_NAME_STRLEN + 1]; + } else { + /* If host exists, path must also exist */ + if ( ! ( uri->host && uri->path ) ) + return -EINVAL; + if ( uri->path[0] != '/' ) + return -EINVAL; + wwn_text = uri->host; + lun_text = ( uri->path + 1 ); + } + + /* Parse WWN */ + if ( ( rc = fc_aton ( wwn_text, wwn ) ) != 0 ) + return rc; + + /* Parse LUN */ + if ( ( rc = scsi_parse_lun ( lun_text, lun ) ) != 0 ) + return rc; + + return 0; +} + +/** + * Open FCP URI + * + * @v parent Parent interface + * @v uri URI + * @ret rc Return status code + */ +static int fcp_open ( struct interface *parent, struct uri *uri ) { + struct fc_name wwn; + struct scsi_lun lun; + int rc; + + /* Parse URI */ + if ( ( rc = fcp_parse_uri ( uri, &wwn, &lun ) ) != 0 ) + return rc; + + /* Open FCP device */ + if ( ( rc = fcpdev_open ( parent, &wwn, &lun ) ) != 0 ) + return rc; + + return 0; +} + +/** FCP URI opener */ +struct uri_opener fcp_uri_opener __uri_opener = { + .scheme = "fcp", + .open = fcp_open, +}; diff --git a/qemu/roms/ipxe/src/net/fragment.c b/qemu/roms/ipxe/src/net/fragment.c new file mode 100644 index 000000000..410915b3b --- /dev/null +++ b/qemu/roms/ipxe/src/net/fragment.c @@ -0,0 +1,180 @@ +/* + * Copyright (C) 2013 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <ipxe/retry.h> +#include <ipxe/timer.h> +#include <ipxe/ipstat.h> +#include <ipxe/fragment.h> + +/** @file + * + * Fragment reassembly + * + */ + +/** + * Expire fragment reassembly buffer + * + * @v timer Retry timer + * @v fail Failure indicator + */ +static void fragment_expired ( struct retry_timer *timer, int fail __unused ) { + struct fragment *fragment = + container_of ( timer, struct fragment, timer ); + + DBGC ( fragment, "FRAG %p expired\n", fragment ); + free_iob ( fragment->iobuf ); + list_del ( &fragment->list ); + fragment->fragments->stats->reasm_fails++; + free ( fragment ); +} + +/** + * Find fragment reassembly buffer + * + * @v fragments Fragment reassembler + * @v iobuf I/O buffer + * @v hdrlen Length of non-fragmentable potion of I/O buffer + * @ret fragment Fragment reassembly buffer, or NULL if not found + */ +static struct fragment * fragment_find ( struct fragment_reassembler *fragments, + struct io_buffer *iobuf, + size_t hdrlen ) { + struct fragment *fragment; + + list_for_each_entry ( fragment, &fragments->list, list ) { + if ( fragments->is_fragment ( fragment, iobuf, hdrlen ) ) + return fragment; + } + return NULL; +} + +/** + * Reassemble packet + * + * @v fragments Fragment reassembler + * @v iobuf I/O buffer + * @v hdrlen Length of non-fragmentable potion of I/O buffer + * @ret iobuf Reassembled packet, or NULL + * + * This function takes ownership of the I/O buffer. Note that the + * length of the non-fragmentable portion may be modified. + */ +struct io_buffer * fragment_reassemble ( struct fragment_reassembler *fragments, + struct io_buffer *iobuf, + size_t *hdrlen ) { + struct fragment *fragment; + struct io_buffer *new_iobuf; + size_t new_len; + size_t offset; + size_t expected_offset; + int more_frags; + + /* Update statistics */ + fragments->stats->reasm_reqds++; + + /* Find matching fragment reassembly buffer, if any */ + fragment = fragment_find ( fragments, iobuf, *hdrlen ); + + /* Drop out-of-order fragments */ + offset = fragments->fragment_offset ( iobuf, *hdrlen ); + expected_offset = ( fragment ? ( iob_len ( fragment->iobuf ) - + fragment->hdrlen ) : 0 ); + if ( offset != expected_offset ) { + DBGC ( fragment, "FRAG %p dropping out-of-sequence fragment " + "[%zd,%zd), expected [%zd,...)\n", fragment, offset, + ( offset + iob_len ( iobuf ) - *hdrlen ), + expected_offset ); + goto drop; + } + + /* Create or extend fragment reassembly buffer as applicable */ + if ( ! fragment ) { + + /* Create new fragment reassembly buffer */ + fragment = zalloc ( sizeof ( *fragment ) ); + if ( ! fragment ) + goto drop; + list_add ( &fragment->list, &fragments->list ); + fragment->iobuf = iobuf; + fragment->hdrlen = *hdrlen; + timer_init ( &fragment->timer, fragment_expired, NULL ); + fragment->fragments = fragments; + DBGC ( fragment, "FRAG %p [0,%zd)\n", fragment, + ( iob_len ( iobuf ) - *hdrlen ) ); + + } else { + + /* Check if this is the final fragment */ + more_frags = fragments->more_fragments ( iobuf, *hdrlen ); + DBGC ( fragment, "FRAG %p [%zd,%zd)%s\n", fragment, + offset, ( offset + iob_len ( iobuf ) - *hdrlen ), + ( more_frags ? "" : " complete" ) ); + + /* Extend fragment reassembly buffer. Preserve I/O + * buffer headroom to allow for code which modifies + * and resends the buffer (e.g. ICMP echo responses). + */ + iob_pull ( iobuf, *hdrlen ); + new_len = ( iob_headroom ( fragment->iobuf ) + + iob_len ( fragment->iobuf ) + iob_len ( iobuf ) ); + new_iobuf = alloc_iob ( new_len ); + if ( ! new_iobuf ) { + DBGC ( fragment, "FRAG %p could not extend reassembly " + "buffer to %zd bytes\n", fragment, new_len ); + goto drop; + } + iob_reserve ( new_iobuf, iob_headroom ( fragment->iobuf ) ); + memcpy ( iob_put ( new_iobuf, iob_len ( fragment->iobuf ) ), + fragment->iobuf->data, iob_len ( fragment->iobuf ) ); + memcpy ( iob_put ( new_iobuf, iob_len ( iobuf ) ), + iobuf->data, iob_len ( iobuf ) ); + free_iob ( fragment->iobuf ); + fragment->iobuf = new_iobuf; + free_iob ( iobuf ); + + /* Stop fragment reassembly timer */ + stop_timer ( &fragment->timer ); + + /* If this is the final fragment, return it */ + if ( ! more_frags ) { + iobuf = fragment->iobuf; + *hdrlen = fragment->hdrlen; + list_del ( &fragment->list ); + free ( fragment ); + fragments->stats->reasm_oks++; + return iobuf; + } + } + + /* (Re)start fragment reassembly timer */ + start_timer_fixed ( &fragment->timer, FRAGMENT_TIMEOUT ); + + return NULL; + + drop: + fragments->stats->reasm_fails++; + free_iob ( iobuf ); + return NULL; +} diff --git a/qemu/roms/ipxe/src/net/icmp.c b/qemu/roms/ipxe/src/net/icmp.c new file mode 100644 index 000000000..1bbf8bd30 --- /dev/null +++ b/qemu/roms/ipxe/src/net/icmp.c @@ -0,0 +1,226 @@ +/* + * Copyright (C) 2013 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <string.h> +#include <byteswap.h> +#include <errno.h> +#include <ipxe/iobuf.h> +#include <ipxe/in.h> +#include <ipxe/tcpip.h> +#include <ipxe/ping.h> +#include <ipxe/crc32.h> +#include <ipxe/icmp.h> + +/** @file + * + * ICMP protocol + * + */ + +/** + * Identify ICMP echo protocol + * + * @v st_family Address family + * @ret echo_protocol ICMP echo protocol, or NULL + */ +static struct icmp_echo_protocol * icmp_echo_protocol ( sa_family_t family ) { + struct icmp_echo_protocol *echo_protocol; + + for_each_table_entry ( echo_protocol, ICMP_ECHO_PROTOCOLS ) { + if ( echo_protocol->family == family ) + return echo_protocol; + } + return NULL; +} + +/** + * + * Determine debugging colour for ICMP debug messages + * + * @v st_peer Peer address + * @ret col Debugging colour (for DBGC()) + */ +static uint32_t icmpcol ( struct sockaddr_tcpip *st_peer ) { + + return crc32_le ( 0, st_peer, sizeof ( *st_peer ) ); +} + +/** + * Transmit ICMP echo packet + * + * @v iobuf I/O buffer + * @v st_dest Destination socket address + * @v echo_protocol ICMP echo protocol + * @ret rc Return status code + */ +static int icmp_tx_echo ( struct io_buffer *iobuf, + struct sockaddr_tcpip *st_dest, + struct icmp_echo_protocol *echo_protocol ) { + struct icmp_echo *echo = iobuf->data; + int rc; + + /* Set ICMP type and (re)calculate checksum */ + echo->icmp.chksum = 0; + echo->icmp.chksum = tcpip_chksum ( echo, iob_len ( iobuf ) ); + + /* Transmit packet */ + if ( ( rc = tcpip_tx ( iobuf, echo_protocol->tcpip_protocol, NULL, + st_dest, NULL, + ( echo_protocol->net_checksum ? + &echo->icmp.chksum : NULL ) ) ) != 0 ) + return rc; + + return 0; +} + +/** + * Transmit ICMP echo request + * + * @v iobuf I/O buffer + * @v st_dest Destination socket address + * @ret rc Return status code + */ +int icmp_tx_echo_request ( struct io_buffer *iobuf, + struct sockaddr_tcpip *st_dest ) { + struct icmp_echo *echo = iobuf->data; + struct icmp_echo_protocol *echo_protocol; + int rc; + + /* Identify ICMP echo protocol */ + echo_protocol = icmp_echo_protocol ( st_dest->st_family ); + if ( ! echo_protocol ) { + DBGC ( icmpcol ( st_dest ), "ICMP TX echo request unknown " + "address family %d\n", st_dest->st_family ); + free_iob ( iobuf ); + return -ENOTSUP; + } + + /* Set type */ + echo->icmp.type = echo_protocol->request; + + /* Transmit request */ + DBGC ( icmpcol ( st_dest ), "ICMP TX echo request id %04x seq %04x\n", + ntohs ( echo->ident ), ntohs ( echo->sequence ) ); + if ( ( rc = icmp_tx_echo ( iobuf, st_dest, echo_protocol ) ) != 0 ) + return rc; + + return 0; +} + +/** + * Transmit ICMP echo reply + * + * @v iobuf I/O buffer + * @v st_dest Destination socket address + * @ret rc Return status code + */ +static int icmp_tx_echo_reply ( struct io_buffer *iobuf, + struct sockaddr_tcpip *st_dest, + struct icmp_echo_protocol *echo_protocol ) { + struct icmp_echo *echo = iobuf->data; + int rc; + + /* Set type */ + echo->icmp.type = echo_protocol->reply; + + /* Transmit reply */ + DBGC ( icmpcol ( st_dest ), "ICMP TX echo reply id %04x seq %04x\n", + ntohs ( echo->ident ), ntohs ( echo->sequence ) ); + if ( ( rc = icmp_tx_echo ( iobuf, st_dest, echo_protocol ) ) != 0 ) + return rc; + + return 0; +} + +/** + * Process a received ICMP echo request + * + * @v iobuf I/O buffer + * @v st_src Source socket address + * @v echo_protocol ICMP echo protocol + * @ret rc Return status code + */ +int icmp_rx_echo_request ( struct io_buffer *iobuf, + struct sockaddr_tcpip *st_src, + struct icmp_echo_protocol *echo_protocol ) { + struct icmp_echo *echo = iobuf->data; + int rc; + + /* Sanity check */ + if ( iob_len ( iobuf ) < sizeof ( *echo ) ) { + DBGC ( icmpcol ( st_src ), "ICMP RX echo request too short at " + "%zd bytes (min %zd bytes)\n", + iob_len ( iobuf ), sizeof ( *echo ) ); + free_iob ( iobuf ); + return -EINVAL; + } + DBGC ( icmpcol ( st_src ), "ICMP RX echo request id %04x seq %04x\n", + ntohs ( echo->ident ), ntohs ( echo->sequence ) ); + + /* Transmit echo reply */ + if ( ( rc = icmp_tx_echo_reply ( iobuf, st_src, echo_protocol ) ) != 0 ) + return rc; + + return 0; +} + +/** + * Process a received ICMP echo request + * + * @v iobuf I/O buffer + * @v st_src Source socket address + * @ret rc Return status code + */ +int icmp_rx_echo_reply ( struct io_buffer *iobuf, + struct sockaddr_tcpip *st_src ) { + struct icmp_echo *echo = iobuf->data; + int rc; + + /* Sanity check */ + if ( iob_len ( iobuf ) < sizeof ( *echo ) ) { + DBGC ( icmpcol ( st_src ), "ICMP RX echo reply too short at " + "%zd bytes (min %zd bytes)\n", + iob_len ( iobuf ), sizeof ( *echo ) ); + free_iob ( iobuf ); + return -EINVAL; + } + DBGC ( icmpcol ( st_src ), "ICMP RX echo reply id %04x seq %04x\n", + ntohs ( echo->ident ), ntohs ( echo->sequence ) ); + + /* Deliver to ping protocol */ + if ( ( rc = ping_rx ( iobuf, st_src ) ) != 0 ) + return rc; + + return 0; +} + +/** + * Receive ping reply (when no ping protocol is present) + * + * @v iobuf I/O buffer + * @v st_src Source socket address + * @ret rc Return status code + */ +__weak int ping_rx ( struct io_buffer *iobuf, + struct sockaddr_tcpip *st_src __unused ) { + free_iob ( iobuf ); + return 0; +} diff --git a/qemu/roms/ipxe/src/net/icmpv4.c b/qemu/roms/ipxe/src/net/icmpv4.c new file mode 100644 index 000000000..996ba1490 --- /dev/null +++ b/qemu/roms/ipxe/src/net/icmpv4.c @@ -0,0 +1,109 @@ +/* + * Copyright (C) 2013 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <string.h> +#include <errno.h> +#include <ipxe/iobuf.h> +#include <ipxe/in.h> +#include <ipxe/tcpip.h> +#include <ipxe/icmp.h> + +/** @file + * + * ICMPv4 protocol + * + */ + +struct icmp_echo_protocol icmpv4_echo_protocol __icmp_echo_protocol; + +/** + * Process a received packet + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v st_src Partially-filled source address + * @v st_dest Partially-filled destination address + * @v pshdr_csum Pseudo-header checksum + * @ret rc Return status code + */ +static int icmpv4_rx ( struct io_buffer *iobuf, + struct net_device *netdev __unused, + struct sockaddr_tcpip *st_src, + struct sockaddr_tcpip *st_dest __unused, + uint16_t pshdr_csum __unused ) { + struct icmp_header *icmp = iobuf->data; + size_t len = iob_len ( iobuf ); + unsigned int csum; + unsigned int type; + int rc; + + /* Sanity check */ + if ( len < sizeof ( *icmp ) ) { + DBG ( "ICMP packet too short at %zd bytes (min %zd bytes)\n", + len, sizeof ( *icmp ) ); + rc = -EINVAL; + goto discard; + } + + /* Verify checksum */ + csum = tcpip_chksum ( icmp, len ); + if ( csum != 0 ) { + DBG ( "ICMP checksum incorrect (is %04x, should be 0000)\n", + csum ); + DBG_HD ( icmp, len ); + rc = -EINVAL; + goto discard; + } + + /* Handle ICMP packet */ + type = icmp->type; + switch ( type ) { + case ICMP_ECHO_REQUEST: + return icmp_rx_echo_request ( iobuf, st_src, + &icmpv4_echo_protocol ); + case ICMP_ECHO_REPLY: + return icmp_rx_echo_reply ( iobuf, st_src ); + default: + DBG ( "ICMP ignoring type %d\n", type ); + rc = 0; + break; + } + + discard: + free_iob ( iobuf ); + return rc; +} + +/** ICMPv4 TCP/IP protocol */ +struct tcpip_protocol icmpv4_protocol __tcpip_protocol = { + .name = "ICMPv4", + .rx = icmpv4_rx, + .tcpip_proto = IP_ICMP, +}; + +/** ICMPv4 echo protocol */ +struct icmp_echo_protocol icmpv4_echo_protocol __icmp_echo_protocol = { + .family = AF_INET, + .request = ICMP_ECHO_REQUEST, + .reply = ICMP_ECHO_REPLY, + .tcpip_protocol = &icmpv4_protocol, + .net_checksum = 0, +}; diff --git a/qemu/roms/ipxe/src/net/icmpv6.c b/qemu/roms/ipxe/src/net/icmpv6.c new file mode 100644 index 000000000..479800e7d --- /dev/null +++ b/qemu/roms/ipxe/src/net/icmpv6.c @@ -0,0 +1,179 @@ +/* + * Copyright (C) 2013 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <string.h> +#include <errno.h> +#include <byteswap.h> +#include <ipxe/in.h> +#include <ipxe/iobuf.h> +#include <ipxe/tcpip.h> +#include <ipxe/ping.h> +#include <ipxe/icmpv6.h> + +/** @file + * + * ICMPv6 protocol + * + */ + +struct icmp_echo_protocol icmpv6_echo_protocol __icmp_echo_protocol; + +/** + * Process received ICMPv6 echo request packet + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v sin6_src Source socket address + * @v sin6_dest Destination socket address + * @ret rc Return status code + */ +static int icmpv6_rx_echo_request ( struct io_buffer *iobuf, + struct net_device *netdev __unused, + struct sockaddr_in6 *sin6_src, + struct sockaddr_in6 *sin6_dest __unused ) { + struct sockaddr_tcpip *st_src = + ( ( struct sockaddr_tcpip * ) sin6_src ); + + return icmp_rx_echo_request ( iobuf, st_src, &icmpv6_echo_protocol ); +} + +/** ICMPv6 echo request handler */ +struct icmpv6_handler icmpv6_echo_request_handler __icmpv6_handler = { + .type = ICMPV6_ECHO_REQUEST, + .rx = icmpv6_rx_echo_request, +}; + +/** + * Process received ICMPv6 echo reply packet + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v sin6_src Source socket address + * @v sin6_dest Destination socket address + * @ret rc Return status code + */ +static int icmpv6_rx_echo_reply ( struct io_buffer *iobuf, + struct net_device *netdev __unused, + struct sockaddr_in6 *sin6_src, + struct sockaddr_in6 *sin6_dest __unused ) { + struct sockaddr_tcpip *st_src = + ( ( struct sockaddr_tcpip * ) sin6_src ); + + return icmp_rx_echo_reply ( iobuf, st_src ); +} + +/** ICMPv6 echo reply handler */ +struct icmpv6_handler icmpv6_echo_reply_handler __icmpv6_handler = { + .type = ICMPV6_ECHO_REPLY, + .rx = icmpv6_rx_echo_reply, +}; + +/** + * Identify ICMPv6 handler + * + * @v type ICMPv6 type + * @ret handler ICMPv6 handler, or NULL if not found + */ +static struct icmpv6_handler * icmpv6_handler ( unsigned int type ) { + struct icmpv6_handler *handler; + + for_each_table_entry ( handler, ICMPV6_HANDLERS ) { + if ( handler->type == type ) + return handler; + } + return NULL; +} + +/** + * Process a received packet + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v st_src Partially-filled source address + * @v st_dest Partially-filled destination address + * @v pshdr_csum Pseudo-header checksum + * @ret rc Return status code + */ +static int icmpv6_rx ( struct io_buffer *iobuf, struct net_device *netdev, + struct sockaddr_tcpip *st_src, + struct sockaddr_tcpip *st_dest, uint16_t pshdr_csum ) { + struct sockaddr_in6 *sin6_src = ( ( struct sockaddr_in6 * ) st_src ); + struct sockaddr_in6 *sin6_dest = ( ( struct sockaddr_in6 * ) st_dest ); + struct icmp_header *icmp = iobuf->data; + size_t len = iob_len ( iobuf ); + struct icmpv6_handler *handler; + unsigned int csum; + int rc; + + /* Sanity check */ + if ( len < sizeof ( *icmp ) ) { + DBGC ( netdev, "ICMPv6 packet too short at %zd bytes (min %zd " + "bytes)\n", len, sizeof ( *icmp ) ); + rc = -EINVAL; + goto done; + } + + /* Verify checksum */ + csum = tcpip_continue_chksum ( pshdr_csum, icmp, len ); + if ( csum != 0 ) { + DBGC ( netdev, "ICMPv6 checksum incorrect (is %04x, should be " + "0000)\n", csum ); + DBGC_HDA ( netdev, 0, icmp, len ); + rc = -EINVAL; + goto done; + } + + /* Identify handler */ + handler = icmpv6_handler ( icmp->type ); + if ( ! handler ) { + DBGC ( netdev, "ICMPv6 unrecognised type %d\n", icmp->type ); + rc = -ENOTSUP; + goto done; + } + + /* Pass to handler */ + if ( ( rc = handler->rx ( iob_disown ( iobuf ), netdev, sin6_src, + sin6_dest ) ) != 0 ) { + DBGC ( netdev, "ICMPv6 could not handle type %d: %s\n", + icmp->type, strerror ( rc ) ); + goto done; + } + + done: + free_iob ( iobuf ); + return rc; +} + +/** ICMPv6 TCP/IP protocol */ +struct tcpip_protocol icmpv6_protocol __tcpip_protocol = { + .name = "ICMPv6", + .rx = icmpv6_rx, + .tcpip_proto = IP_ICMP6, +}; + +/** ICMPv6 echo protocol */ +struct icmp_echo_protocol icmpv6_echo_protocol __icmp_echo_protocol = { + .family = AF_INET6, + .request = ICMPV6_ECHO_REQUEST, + .reply = ICMPV6_ECHO_REPLY, + .tcpip_protocol = &icmpv6_protocol, + .net_checksum = 1, +}; diff --git a/qemu/roms/ipxe/src/net/infiniband.c b/qemu/roms/ipxe/src/net/infiniband.c new file mode 100644 index 000000000..12d1d83ce --- /dev/null +++ b/qemu/roms/ipxe/src/net/infiniband.c @@ -0,0 +1,999 @@ +/* + * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <byteswap.h> +#include <errno.h> +#include <assert.h> +#include <ipxe/list.h> +#include <ipxe/errortab.h> +#include <ipxe/if_arp.h> +#include <ipxe/netdevice.h> +#include <ipxe/iobuf.h> +#include <ipxe/process.h> +#include <ipxe/infiniband.h> +#include <ipxe/ib_mi.h> +#include <ipxe/ib_sma.h> + +/** @file + * + * Infiniband protocol + * + */ + +/** List of Infiniband devices */ +struct list_head ib_devices = LIST_HEAD_INIT ( ib_devices ); + +/** List of open Infiniband devices, in reverse order of opening */ +static struct list_head open_ib_devices = LIST_HEAD_INIT ( open_ib_devices ); + +/* Disambiguate the various possible EINPROGRESSes */ +#define EINPROGRESS_INIT __einfo_error ( EINFO_EINPROGRESS_INIT ) +#define EINFO_EINPROGRESS_INIT __einfo_uniqify \ + ( EINFO_EINPROGRESS, 0x01, "Initialising" ) +#define EINPROGRESS_ARMED __einfo_error ( EINFO_EINPROGRESS_ARMED ) +#define EINFO_EINPROGRESS_ARMED __einfo_uniqify \ + ( EINFO_EINPROGRESS, 0x02, "Armed" ) + +/** Human-readable message for the link statuses */ +struct errortab infiniband_errors[] __errortab = { + __einfo_errortab ( EINFO_EINPROGRESS_INIT ), + __einfo_errortab ( EINFO_EINPROGRESS_ARMED ), +}; + +/*************************************************************************** + * + * Completion queues + * + *************************************************************************** + */ + +/** + * Create completion queue + * + * @v ibdev Infiniband device + * @v num_cqes Number of completion queue entries + * @v op Completion queue operations + * @ret cq New completion queue + */ +struct ib_completion_queue * +ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes, + struct ib_completion_queue_operations *op ) { + struct ib_completion_queue *cq; + int rc; + + DBGC ( ibdev, "IBDEV %p creating completion queue\n", ibdev ); + + /* Allocate and initialise data structure */ + cq = zalloc ( sizeof ( *cq ) ); + if ( ! cq ) + goto err_alloc_cq; + cq->ibdev = ibdev; + list_add ( &cq->list, &ibdev->cqs ); + cq->num_cqes = num_cqes; + INIT_LIST_HEAD ( &cq->work_queues ); + cq->op = op; + + /* Perform device-specific initialisation and get CQN */ + if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not initialise completion " + "queue: %s\n", ibdev, strerror ( rc ) ); + goto err_dev_create_cq; + } + + DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) " + "with CQN %#lx\n", ibdev, num_cqes, cq, + ib_cq_get_drvdata ( cq ), cq->cqn ); + return cq; + + ibdev->op->destroy_cq ( ibdev, cq ); + err_dev_create_cq: + list_del ( &cq->list ); + free ( cq ); + err_alloc_cq: + return NULL; +} + +/** + * Destroy completion queue + * + * @v ibdev Infiniband device + * @v cq Completion queue + */ +void ib_destroy_cq ( struct ib_device *ibdev, + struct ib_completion_queue *cq ) { + DBGC ( ibdev, "IBDEV %p destroying completion queue %#lx\n", + ibdev, cq->cqn ); + assert ( list_empty ( &cq->work_queues ) ); + ibdev->op->destroy_cq ( ibdev, cq ); + list_del ( &cq->list ); + free ( cq ); +} + +/** + * Poll completion queue + * + * @v ibdev Infiniband device + * @v cq Completion queue + */ +void ib_poll_cq ( struct ib_device *ibdev, + struct ib_completion_queue *cq ) { + struct ib_work_queue *wq; + + /* Poll completion queue */ + ibdev->op->poll_cq ( ibdev, cq ); + + /* Refill receive work queues */ + list_for_each_entry ( wq, &cq->work_queues, list ) { + if ( ! wq->is_send ) + ib_refill_recv ( ibdev, wq->qp ); + } +} + +/*************************************************************************** + * + * Work queues + * + *************************************************************************** + */ + +/** + * Create queue pair + * + * @v ibdev Infiniband device + * @v type Queue pair type + * @v num_send_wqes Number of send work queue entries + * @v send_cq Send completion queue + * @v num_recv_wqes Number of receive work queue entries + * @v recv_cq Receive completion queue + * @v op Queue pair operations + * @ret qp Queue pair + * + * The queue pair will be left in the INIT state; you must call + * ib_modify_qp() before it is ready to use for sending and receiving. + */ +struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev, + enum ib_queue_pair_type type, + unsigned int num_send_wqes, + struct ib_completion_queue *send_cq, + unsigned int num_recv_wqes, + struct ib_completion_queue *recv_cq, + struct ib_queue_pair_operations *op ) { + struct ib_queue_pair *qp; + size_t total_size; + int rc; + + DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev ); + + /* Allocate and initialise data structure */ + total_size = ( sizeof ( *qp ) + + ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) + + ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) ); + qp = zalloc ( total_size ); + if ( ! qp ) + goto err_alloc_qp; + qp->ibdev = ibdev; + list_add ( &qp->list, &ibdev->qps ); + qp->type = type; + qp->send.qp = qp; + qp->send.is_send = 1; + qp->send.cq = send_cq; + list_add ( &qp->send.list, &send_cq->work_queues ); + qp->send.psn = ( random() & 0xffffffUL ); + qp->send.num_wqes = num_send_wqes; + qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) ); + qp->recv.qp = qp; + qp->recv.cq = recv_cq; + list_add ( &qp->recv.list, &recv_cq->work_queues ); + qp->recv.psn = ( random() & 0xffffffUL ); + qp->recv.num_wqes = num_recv_wqes; + qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) + + ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) )); + INIT_LIST_HEAD ( &qp->mgids ); + qp->op = op; + + /* Perform device-specific initialisation and get QPN */ + if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not initialise queue pair: " + "%s\n", ibdev, strerror ( rc ) ); + goto err_dev_create_qp; + } + DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n", + ibdev, qp, ib_qp_get_drvdata ( qp ), qp->qpn ); + DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n", + ibdev, qp->qpn, num_send_wqes, qp->send.iobufs, + qp->recv.iobufs ); + DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n", + ibdev, qp->qpn, num_recv_wqes, qp->recv.iobufs, + ( ( ( void * ) qp ) + total_size ) ); + + /* Calculate externally-visible QPN */ + switch ( type ) { + case IB_QPT_SMI: + qp->ext_qpn = IB_QPN_SMI; + break; + case IB_QPT_GSI: + qp->ext_qpn = IB_QPN_GSI; + break; + default: + qp->ext_qpn = qp->qpn; + break; + } + if ( qp->ext_qpn != qp->qpn ) { + DBGC ( ibdev, "IBDEV %p QPN %#lx has external QPN %#lx\n", + ibdev, qp->qpn, qp->ext_qpn ); + } + + return qp; + + ibdev->op->destroy_qp ( ibdev, qp ); + err_dev_create_qp: + list_del ( &qp->send.list ); + list_del ( &qp->recv.list ); + list_del ( &qp->list ); + free ( qp ); + err_alloc_qp: + return NULL; +} + +/** + * Modify queue pair + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @ret rc Return status code + */ +int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) { + int rc; + + DBGC ( ibdev, "IBDEV %p modifying QPN %#lx\n", ibdev, qp->qpn ); + + if ( ( rc = ibdev->op->modify_qp ( ibdev, qp ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not modify QPN %#lx: %s\n", + ibdev, qp->qpn, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Destroy queue pair + * + * @v ibdev Infiniband device + * @v qp Queue pair + */ +void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) { + struct io_buffer *iobuf; + unsigned int i; + + DBGC ( ibdev, "IBDEV %p destroying QPN %#lx\n", + ibdev, qp->qpn ); + + assert ( list_empty ( &qp->mgids ) ); + + /* Perform device-specific destruction */ + ibdev->op->destroy_qp ( ibdev, qp ); + + /* Complete any remaining I/O buffers with errors */ + for ( i = 0 ; i < qp->send.num_wqes ; i++ ) { + if ( ( iobuf = qp->send.iobufs[i] ) != NULL ) + ib_complete_send ( ibdev, qp, iobuf, -ECANCELED ); + } + for ( i = 0 ; i < qp->recv.num_wqes ; i++ ) { + if ( ( iobuf = qp->recv.iobufs[i] ) != NULL ) { + ib_complete_recv ( ibdev, qp, NULL, NULL, iobuf, + -ECANCELED ); + } + } + + /* Remove work queues from completion queue */ + list_del ( &qp->send.list ); + list_del ( &qp->recv.list ); + + /* Free QP */ + list_del ( &qp->list ); + free ( qp ); +} + +/** + * Find queue pair by QPN + * + * @v ibdev Infiniband device + * @v qpn Queue pair number + * @ret qp Queue pair, or NULL + */ +struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev, + unsigned long qpn ) { + struct ib_queue_pair *qp; + + list_for_each_entry ( qp, &ibdev->qps, list ) { + if ( ( qpn == qp->qpn ) || ( qpn == qp->ext_qpn ) ) + return qp; + } + return NULL; +} + +/** + * Find queue pair by multicast GID + * + * @v ibdev Infiniband device + * @v gid Multicast GID + * @ret qp Queue pair, or NULL + */ +struct ib_queue_pair * ib_find_qp_mgid ( struct ib_device *ibdev, + union ib_gid *gid ) { + struct ib_queue_pair *qp; + struct ib_multicast_gid *mgid; + + list_for_each_entry ( qp, &ibdev->qps, list ) { + list_for_each_entry ( mgid, &qp->mgids, list ) { + if ( memcmp ( &mgid->gid, gid, + sizeof ( mgid->gid ) ) == 0 ) { + return qp; + } + } + } + return NULL; +} + +/** + * Find work queue belonging to completion queue + * + * @v cq Completion queue + * @v qpn Queue pair number + * @v is_send Find send work queue (rather than receive) + * @ret wq Work queue, or NULL if not found + */ +struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq, + unsigned long qpn, int is_send ) { + struct ib_work_queue *wq; + + list_for_each_entry ( wq, &cq->work_queues, list ) { + if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) ) + return wq; + } + return NULL; +} + +/** + * Post send work queue entry + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v dest Destination address vector + * @v iobuf I/O buffer + * @ret rc Return status code + */ +int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct ib_address_vector *dest, + struct io_buffer *iobuf ) { + struct ib_address_vector dest_copy; + int rc; + + /* Check queue fill level */ + if ( qp->send.fill >= qp->send.num_wqes ) { + DBGC ( ibdev, "IBDEV %p QPN %#lx send queue full\n", + ibdev, qp->qpn ); + return -ENOBUFS; + } + + /* Use default address vector if none specified */ + if ( ! dest ) + dest = &qp->av; + + /* Make modifiable copy of address vector */ + memcpy ( &dest_copy, dest, sizeof ( dest_copy ) ); + dest = &dest_copy; + + /* Fill in optional parameters in address vector */ + if ( ! dest->qkey ) + dest->qkey = qp->qkey; + if ( ! dest->rate ) + dest->rate = IB_RATE_2_5; + + /* Post to hardware */ + if ( ( rc = ibdev->op->post_send ( ibdev, qp, dest, iobuf ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p QPN %#lx could not post send WQE: " + "%s\n", ibdev, qp->qpn, strerror ( rc ) ); + return rc; + } + + qp->send.fill++; + return 0; +} + +/** + * Post receive work queue entry + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v iobuf I/O buffer + * @ret rc Return status code + */ +int ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct io_buffer *iobuf ) { + int rc; + + /* Check packet length */ + if ( iob_tailroom ( iobuf ) < IB_MAX_PAYLOAD_SIZE ) { + DBGC ( ibdev, "IBDEV %p QPN %#lx wrong RX buffer size (%zd)\n", + ibdev, qp->qpn, iob_tailroom ( iobuf ) ); + return -EINVAL; + } + + /* Check queue fill level */ + if ( qp->recv.fill >= qp->recv.num_wqes ) { + DBGC ( ibdev, "IBDEV %p QPN %#lx receive queue full\n", + ibdev, qp->qpn ); + return -ENOBUFS; + } + + /* Post to hardware */ + if ( ( rc = ibdev->op->post_recv ( ibdev, qp, iobuf ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p QPN %#lx could not post receive WQE: " + "%s\n", ibdev, qp->qpn, strerror ( rc ) ); + return rc; + } + + qp->recv.fill++; + return 0; +} + +/** + * Complete send work queue entry + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v iobuf I/O buffer + * @v rc Completion status code + */ +void ib_complete_send ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct io_buffer *iobuf, int rc ) { + + if ( qp->send.cq->op->complete_send ) { + qp->send.cq->op->complete_send ( ibdev, qp, iobuf, rc ); + } else { + free_iob ( iobuf ); + } + qp->send.fill--; +} + +/** + * Complete receive work queue entry + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v dest Destination address vector, or NULL + * @v source Source address vector, or NULL + * @v iobuf I/O buffer + * @v rc Completion status code + */ +void ib_complete_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct ib_address_vector *dest, + struct ib_address_vector *source, + struct io_buffer *iobuf, int rc ) { + + if ( qp->recv.cq->op->complete_recv ) { + qp->recv.cq->op->complete_recv ( ibdev, qp, dest, source, + iobuf, rc ); + } else { + free_iob ( iobuf ); + } + qp->recv.fill--; +} + +/** + * Refill receive work queue + * + * @v ibdev Infiniband device + * @v qp Queue pair + */ +void ib_refill_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp ) { + struct io_buffer *iobuf; + int rc; + + /* Keep filling while unfilled entries remain */ + while ( qp->recv.fill < qp->recv.num_wqes ) { + + /* Allocate I/O buffer */ + iobuf = qp->op->alloc_iob ( IB_MAX_PAYLOAD_SIZE ); + if ( ! iobuf ) { + /* Non-fatal; we will refill on next attempt */ + return; + } + + /* Post I/O buffer */ + if ( ( rc = ib_post_recv ( ibdev, qp, iobuf ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not refill: %s\n", + ibdev, strerror ( rc ) ); + free_iob ( iobuf ); + /* Give up */ + return; + } + } +} + +/*************************************************************************** + * + * Link control + * + *************************************************************************** + */ + +/** + * Get link state + * + * @v ibdev Infiniband device + * @ret rc Link status code + */ +int ib_link_rc ( struct ib_device *ibdev ) { + switch ( ibdev->port_state ) { + case IB_PORT_STATE_DOWN: return -ENOTCONN; + case IB_PORT_STATE_INIT: return -EINPROGRESS_INIT; + case IB_PORT_STATE_ARMED: return -EINPROGRESS_ARMED; + case IB_PORT_STATE_ACTIVE: return 0; + default: return -EINVAL; + } +} + +/** + * Textual representation of Infiniband link state + * + * @v ibdev Infiniband device + * @ret link_text Link state text + */ +static const char * ib_link_state_text ( struct ib_device *ibdev ) { + switch ( ibdev->port_state ) { + case IB_PORT_STATE_DOWN: return "DOWN"; + case IB_PORT_STATE_INIT: return "INIT"; + case IB_PORT_STATE_ARMED: return "ARMED"; + case IB_PORT_STATE_ACTIVE: return "ACTIVE"; + default: return "UNKNOWN"; + } +} + +/** + * Notify drivers of Infiniband device or link state change + * + * @v ibdev Infiniband device + */ +static void ib_notify ( struct ib_device *ibdev ) { + struct ib_driver *driver; + + for_each_table_entry ( driver, IB_DRIVERS ) + driver->notify ( ibdev ); +} + +/** + * Notify of Infiniband link state change + * + * @v ibdev Infiniband device + */ +void ib_link_state_changed ( struct ib_device *ibdev ) { + + DBGC ( ibdev, "IBDEV %p link state is %s\n", + ibdev, ib_link_state_text ( ibdev ) ); + + /* Notify drivers of link state change */ + ib_notify ( ibdev ); +} + +/** + * Open port + * + * @v ibdev Infiniband device + * @ret rc Return status code + */ +int ib_open ( struct ib_device *ibdev ) { + int rc; + + /* Increment device open request counter */ + if ( ibdev->open_count++ > 0 ) { + /* Device was already open; do nothing */ + return 0; + } + + /* Open device */ + if ( ( rc = ibdev->op->open ( ibdev ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not open: %s\n", + ibdev, strerror ( rc ) ); + goto err_open; + } + + /* Create subnet management interface */ + ibdev->smi = ib_create_mi ( ibdev, IB_QPT_SMI ); + if ( ! ibdev->smi ) { + DBGC ( ibdev, "IBDEV %p could not create SMI\n", ibdev ); + rc = -ENOMEM; + goto err_create_smi; + } + + /* Create subnet management agent */ + if ( ( rc = ib_create_sma ( ibdev, ibdev->smi ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not create SMA: %s\n", + ibdev, strerror ( rc ) ); + goto err_create_sma; + } + + /* Create general services interface */ + ibdev->gsi = ib_create_mi ( ibdev, IB_QPT_GSI ); + if ( ! ibdev->gsi ) { + DBGC ( ibdev, "IBDEV %p could not create GSI\n", ibdev ); + rc = -ENOMEM; + goto err_create_gsi; + } + + /* Add to head of open devices list */ + list_add ( &ibdev->open_list, &open_ib_devices ); + + /* Notify drivers of device state change */ + ib_notify ( ibdev ); + + assert ( ibdev->open_count == 1 ); + return 0; + + ib_destroy_mi ( ibdev, ibdev->gsi ); + err_create_gsi: + ib_destroy_sma ( ibdev, ibdev->smi ); + err_create_sma: + ib_destroy_mi ( ibdev, ibdev->smi ); + err_create_smi: + ibdev->op->close ( ibdev ); + err_open: + assert ( ibdev->open_count == 1 ); + ibdev->open_count = 0; + return rc; +} + +/** + * Close port + * + * @v ibdev Infiniband device + */ +void ib_close ( struct ib_device *ibdev ) { + + /* Decrement device open request counter */ + ibdev->open_count--; + + /* Close device if this was the last remaining requested opening */ + if ( ibdev->open_count == 0 ) { + ib_notify ( ibdev ); + list_del ( &ibdev->open_list ); + ib_destroy_mi ( ibdev, ibdev->gsi ); + ib_destroy_sma ( ibdev, ibdev->smi ); + ib_destroy_mi ( ibdev, ibdev->smi ); + ibdev->op->close ( ibdev ); + ibdev->port_state = IB_PORT_STATE_DOWN; + } +} + +/*************************************************************************** + * + * Multicast + * + *************************************************************************** + */ + +/** + * Attach to multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v gid Multicast GID + * @ret rc Return status code + * + * Note that this function handles only the local device's attachment + * to the multicast GID; it does not issue the relevant MADs to join + * the multicast group on the subnet. + */ +int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp, + union ib_gid *gid ) { + struct ib_multicast_gid *mgid; + int rc; + + /* Add to software multicast GID list */ + mgid = zalloc ( sizeof ( *mgid ) ); + if ( ! mgid ) { + rc = -ENOMEM; + goto err_alloc_mgid; + } + memcpy ( &mgid->gid, gid, sizeof ( mgid->gid ) ); + list_add ( &mgid->list, &qp->mgids ); + + /* Add to hardware multicast GID list */ + if ( ( rc = ibdev->op->mcast_attach ( ibdev, qp, gid ) ) != 0 ) + goto err_dev_mcast_attach; + + return 0; + + err_dev_mcast_attach: + list_del ( &mgid->list ); + free ( mgid ); + err_alloc_mgid: + return rc; +} + +/** + * Detach from multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v gid Multicast GID + */ +void ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp, + union ib_gid *gid ) { + struct ib_multicast_gid *mgid; + + /* Remove from hardware multicast GID list */ + ibdev->op->mcast_detach ( ibdev, qp, gid ); + + /* Remove from software multicast GID list */ + list_for_each_entry ( mgid, &qp->mgids, list ) { + if ( memcmp ( &mgid->gid, gid, sizeof ( mgid->gid ) ) == 0 ) { + list_del ( &mgid->list ); + free ( mgid ); + break; + } + } +} + +/*************************************************************************** + * + * Miscellaneous + * + *************************************************************************** + */ + +/** + * Count Infiniband HCA ports + * + * @v ibdev Infiniband device + * @ret num_ports Number of ports + */ +int ib_count_ports ( struct ib_device *ibdev ) { + struct ib_device *tmp; + int num_ports = 0; + + /* Search for IB devices with the same physical device to + * identify port count. + */ + for_each_ibdev ( tmp ) { + if ( tmp->dev == ibdev->dev ) + num_ports++; + } + return num_ports; +} + +/** + * Set port information + * + * @v ibdev Infiniband device + * @v mad Set port information MAD + */ +int ib_set_port_info ( struct ib_device *ibdev, union ib_mad *mad ) { + int rc; + + /* Adapters with embedded SMAs do not need to support this method */ + if ( ! ibdev->op->set_port_info ) { + DBGC ( ibdev, "IBDEV %p does not support setting port " + "information\n", ibdev ); + return -ENOTSUP; + } + + if ( ( rc = ibdev->op->set_port_info ( ibdev, mad ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not set port information: %s\n", + ibdev, strerror ( rc ) ); + return rc; + } + + return 0; +}; + +/** + * Set partition key table + * + * @v ibdev Infiniband device + * @v mad Set partition key table MAD + */ +int ib_set_pkey_table ( struct ib_device *ibdev, union ib_mad *mad ) { + int rc; + + /* Adapters with embedded SMAs do not need to support this method */ + if ( ! ibdev->op->set_pkey_table ) { + DBGC ( ibdev, "IBDEV %p does not support setting partition " + "key table\n", ibdev ); + return -ENOTSUP; + } + + if ( ( rc = ibdev->op->set_pkey_table ( ibdev, mad ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not set partition key table: " + "%s\n", ibdev, strerror ( rc ) ); + return rc; + } + + return 0; +}; + +/*************************************************************************** + * + * Event queues + * + *************************************************************************** + */ + +/** + * Poll event queue + * + * @v ibdev Infiniband device + */ +void ib_poll_eq ( struct ib_device *ibdev ) { + struct ib_completion_queue *cq; + + /* Poll device's event queue */ + ibdev->op->poll_eq ( ibdev ); + + /* Poll all completion queues */ + list_for_each_entry ( cq, &ibdev->cqs, list ) + ib_poll_cq ( ibdev, cq ); +} + +/** + * Single-step the Infiniband event queue + * + * @v process Infiniband event queue process + */ +static void ib_step ( struct process *process __unused ) { + struct ib_device *ibdev; + + list_for_each_entry ( ibdev, &open_ib_devices, open_list ) + ib_poll_eq ( ibdev ); +} + +/** Infiniband event queue process */ +PERMANENT_PROCESS ( ib_process, ib_step ); + +/*************************************************************************** + * + * Infiniband device creation/destruction + * + *************************************************************************** + */ + +/** + * Allocate Infiniband device + * + * @v priv_size Size of driver private data area + * @ret ibdev Infiniband device, or NULL + */ +struct ib_device * alloc_ibdev ( size_t priv_size ) { + struct ib_device *ibdev; + void *drv_priv; + size_t total_len; + + total_len = ( sizeof ( *ibdev ) + priv_size ); + ibdev = zalloc ( total_len ); + if ( ibdev ) { + drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) ); + ib_set_drvdata ( ibdev, drv_priv ); + INIT_LIST_HEAD ( &ibdev->list ); + INIT_LIST_HEAD ( &ibdev->open_list ); + INIT_LIST_HEAD ( &ibdev->cqs ); + INIT_LIST_HEAD ( &ibdev->qps ); + ibdev->port_state = IB_PORT_STATE_DOWN; + ibdev->lid = IB_LID_NONE; + ibdev->pkey = IB_PKEY_DEFAULT; + } + return ibdev; +} + +/** + * Register Infiniband device + * + * @v ibdev Infiniband device + * @ret rc Return status code + */ +int register_ibdev ( struct ib_device *ibdev ) { + struct ib_driver *driver; + int rc; + + /* Add to device list */ + ibdev_get ( ibdev ); + list_add_tail ( &ibdev->list, &ib_devices ); + DBGC ( ibdev, "IBDEV %p registered (phys %s)\n", ibdev, + ibdev->dev->name ); + + /* Probe device */ + for_each_table_entry ( driver, IB_DRIVERS ) { + if ( ( rc = driver->probe ( ibdev ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not add %s device: %s\n", + ibdev, driver->name, strerror ( rc ) ); + goto err_probe; + } + } + + return 0; + + err_probe: + for_each_table_entry_continue_reverse ( driver, IB_DRIVERS ) + driver->remove ( ibdev ); + list_del ( &ibdev->list ); + ibdev_put ( ibdev ); + return rc; +} + +/** + * Unregister Infiniband device + * + * @v ibdev Infiniband device + */ +void unregister_ibdev ( struct ib_device *ibdev ) { + struct ib_driver *driver; + + /* Remove device */ + for_each_table_entry_reverse ( driver, IB_DRIVERS ) + driver->remove ( ibdev ); + + /* Remove from device list */ + list_del ( &ibdev->list ); + ibdev_put ( ibdev ); + DBGC ( ibdev, "IBDEV %p unregistered\n", ibdev ); +} + +/** + * Find Infiniband device by GID + * + * @v gid GID + * @ret ibdev Infiniband device, or NULL + */ +struct ib_device * find_ibdev ( union ib_gid *gid ) { + struct ib_device *ibdev; + + for_each_ibdev ( ibdev ) { + if ( memcmp ( gid, &ibdev->gid, sizeof ( *gid ) ) == 0 ) + return ibdev; + } + return NULL; +} + +/** + * Get most recently opened Infiniband device + * + * @ret ibdev Most recently opened Infiniband device, or NULL + */ +struct ib_device * last_opened_ibdev ( void ) { + struct ib_device *ibdev; + + ibdev = list_first_entry ( &open_ib_devices, struct ib_device, + open_list ); + if ( ! ibdev ) + return NULL; + + assert ( ibdev->open_count != 0 ); + return ibdev; +} + +/* Drag in IPoIB */ +REQUIRE_OBJECT ( ipoib ); diff --git a/qemu/roms/ipxe/src/net/infiniband/ib_cm.c b/qemu/roms/ipxe/src/net/infiniband/ib_cm.c new file mode 100644 index 000000000..797639bc8 --- /dev/null +++ b/qemu/roms/ipxe/src/net/infiniband/ib_cm.c @@ -0,0 +1,496 @@ +/* + * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <byteswap.h> +#include <errno.h> +#include <assert.h> +#include <ipxe/infiniband.h> +#include <ipxe/ib_mi.h> +#include <ipxe/ib_pathrec.h> +#include <ipxe/ib_cm.h> + +/** + * @file + * + * Infiniband communication management + * + */ + +/** List of connections */ +static LIST_HEAD ( ib_cm_conns ); + +/** + * Find connection by local communication ID + * + * @v local_id Local communication ID + * @ret conn Connection, or NULL + */ +static struct ib_connection * ib_cm_find ( uint32_t local_id ) { + struct ib_connection *conn; + + list_for_each_entry ( conn, &ib_cm_conns, list ) { + if ( conn->local_id == local_id ) + return conn; + } + return NULL; +} + +/** + * Send "ready to use" response + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v av Address vector + * @v local_id Local communication ID + * @v remote_id Remote communication ID + * @ret rc Return status code + */ +static int ib_cm_send_rtu ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + struct ib_address_vector *av, + uint32_t local_id, uint32_t remote_id ) { + union ib_mad mad; + struct ib_cm_ready_to_use *rtu = &mad.cm.cm_data.ready_to_use; + int rc; + + /* Construct "ready to use" response */ + memset ( &mad, 0, sizeof ( mad ) ); + mad.hdr.mgmt_class = IB_MGMT_CLASS_CM; + mad.hdr.class_version = IB_CM_CLASS_VERSION; + mad.hdr.method = IB_MGMT_METHOD_SEND; + mad.hdr.attr_id = htons ( IB_CM_ATTR_READY_TO_USE ); + rtu->local_id = htonl ( local_id ); + rtu->remote_id = htonl ( remote_id ); + if ( ( rc = ib_mi_send ( ibdev, mi, &mad, av ) ) != 0 ){ + DBG ( "CM could not send RTU: %s\n", strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Handle duplicate connection replies + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @v av Source address vector + * @ret rc Return status code + * + * If a "ready to use" MAD is lost, the peer may resend the connection + * reply. We have to respond to these with duplicate "ready to use" + * MADs, otherwise the peer may time out and drop the connection. + */ +static void ib_cm_recv_rep ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad, + struct ib_address_vector *av ) { + struct ib_cm_connect_reply *rep = &mad->cm.cm_data.connect_reply; + struct ib_connection *conn; + uint32_t local_id = ntohl ( rep->remote_id ); + int rc; + + /* Identify connection */ + conn = ib_cm_find ( local_id ); + if ( conn ) { + /* Try to send "ready to use" reply */ + if ( ( rc = ib_cm_send_rtu ( ibdev, mi, av, conn->local_id, + conn->remote_id ) ) != 0 ) { + /* Ignore errors; the remote end will retry */ + } + } else { + DBG ( "CM unidentified connection %08x\n", local_id ); + } +} + +/** + * Send reply to disconnection request + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v av Address vector + * @v local_id Local communication ID + * @v remote_id Remote communication ID + * @ret rc Return status code + */ +static int ib_cm_send_drep ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + struct ib_address_vector *av, + uint32_t local_id, uint32_t remote_id ) { + union ib_mad mad; + struct ib_cm_disconnect_reply *drep = &mad.cm.cm_data.disconnect_reply; + int rc; + + /* Construct reply to disconnection request */ + memset ( &mad, 0, sizeof ( mad ) ); + mad.hdr.mgmt_class = IB_MGMT_CLASS_CM; + mad.hdr.class_version = IB_CM_CLASS_VERSION; + mad.hdr.method = IB_MGMT_METHOD_SEND; + mad.hdr.attr_id = htons ( IB_CM_ATTR_DISCONNECT_REPLY ); + drep->local_id = htonl ( local_id ); + drep->remote_id = htonl ( remote_id ); + if ( ( rc = ib_mi_send ( ibdev, mi, &mad, av ) ) != 0 ){ + DBG ( "CM could not send DREP: %s\n", strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Handle disconnection requests + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @v av Source address vector + * @ret rc Return status code + */ +static void ib_cm_recv_dreq ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad, + struct ib_address_vector *av ) { + struct ib_cm_disconnect_request *dreq = + &mad->cm.cm_data.disconnect_request; + struct ib_connection *conn; + uint32_t local_id = ntohl ( dreq->remote_id ); + uint32_t remote_id = ntohl ( dreq->local_id ); + int rc; + + /* Identify connection */ + conn = ib_cm_find ( local_id ); + if ( conn ) { + /* Notify upper layer */ + conn->op->changed ( ibdev, conn->qp, conn, -ENOTCONN, + &dreq->private_data, + sizeof ( dreq->private_data ) ); + } else { + DBG ( "CM unidentified connection %08x\n", local_id ); + } + + /* Send reply */ + if ( ( rc = ib_cm_send_drep ( ibdev, mi, av, local_id, + remote_id ) ) != 0 ) { + /* Ignore errors; the remote end will retry */ + } +}; + +/** Communication management agents */ +struct ib_mad_agent ib_cm_agent[] __ib_mad_agent = { + { + .mgmt_class = IB_MGMT_CLASS_CM, + .class_version = IB_CM_CLASS_VERSION, + .attr_id = htons ( IB_CM_ATTR_CONNECT_REPLY ), + .handle = ib_cm_recv_rep, + }, + { + .mgmt_class = IB_MGMT_CLASS_CM, + .class_version = IB_CM_CLASS_VERSION, + .attr_id = htons ( IB_CM_ATTR_DISCONNECT_REQUEST ), + .handle = ib_cm_recv_dreq, + }, +}; + +/** + * Convert connection rejection reason to return status code + * + * @v reason Rejection reason (in network byte order) + * @ret rc Return status code + */ +static int ib_cm_rejection_reason_to_rc ( uint16_t reason ) { + switch ( reason ) { + case htons ( IB_CM_REJECT_BAD_SERVICE_ID ) : + return -ENODEV; + case htons ( IB_CM_REJECT_STALE_CONN ) : + return -EALREADY; + case htons ( IB_CM_REJECT_CONSUMER ) : + return -ENOTTY; + default: + return -EPERM; + } +} + +/** + * Handle connection request transaction completion + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v madx Management transaction + * @v rc Status code + * @v mad Received MAD (or NULL on error) + * @v av Source address vector (or NULL on error) + */ +static void ib_cm_req_complete ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + struct ib_mad_transaction *madx, + int rc, union ib_mad *mad, + struct ib_address_vector *av ) { + struct ib_connection *conn = ib_madx_get_ownerdata ( madx ); + struct ib_queue_pair *qp = conn->qp; + struct ib_cm_common *common = &mad->cm.cm_data.common; + struct ib_cm_connect_reply *rep = &mad->cm.cm_data.connect_reply; + struct ib_cm_connect_reject *rej = &mad->cm.cm_data.connect_reject; + void *private_data = NULL; + size_t private_data_len = 0; + + /* Report failures */ + if ( ( rc == 0 ) && ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) )) + rc = -EIO; + if ( rc != 0 ) { + DBGC ( conn, "CM %p connection request failed: %s\n", + conn, strerror ( rc ) ); + goto out; + } + + /* Record remote communication ID */ + conn->remote_id = ntohl ( common->local_id ); + + /* Handle response */ + switch ( mad->hdr.attr_id ) { + + case htons ( IB_CM_ATTR_CONNECT_REPLY ) : + /* Extract fields */ + qp->av.qpn = ( ntohl ( rep->local_qpn ) >> 8 ); + qp->send.psn = ( ntohl ( rep->starting_psn ) >> 8 ); + private_data = &rep->private_data; + private_data_len = sizeof ( rep->private_data ); + DBGC ( conn, "CM %p connected to QPN %lx PSN %x\n", + conn, qp->av.qpn, qp->send.psn ); + + /* Modify queue pair */ + if ( ( rc = ib_modify_qp ( ibdev, qp ) ) != 0 ) { + DBGC ( conn, "CM %p could not modify queue pair: %s\n", + conn, strerror ( rc ) ); + goto out; + } + + /* Send "ready to use" reply */ + if ( ( rc = ib_cm_send_rtu ( ibdev, mi, av, conn->local_id, + conn->remote_id ) ) != 0 ) { + /* Treat as non-fatal */ + rc = 0; + } + break; + + case htons ( IB_CM_ATTR_CONNECT_REJECT ) : + /* Extract fields */ + DBGC ( conn, "CM %p connection rejected (reason %d)\n", + conn, ntohs ( rej->reason ) ); + /* Private data is valid only for a Consumer Reject */ + if ( rej->reason == htons ( IB_CM_REJECT_CONSUMER ) ) { + private_data = &rej->private_data; + private_data_len = sizeof ( rej->private_data ); + } + rc = ib_cm_rejection_reason_to_rc ( rej->reason ); + break; + + default: + DBGC ( conn, "CM %p unexpected response (attribute %04x)\n", + conn, ntohs ( mad->hdr.attr_id ) ); + rc = -ENOTSUP; + break; + } + + out: + /* Destroy the completed transaction */ + ib_destroy_madx ( ibdev, ibdev->gsi, madx ); + conn->madx = NULL; + + /* Hand off to the upper completion handler */ + conn->op->changed ( ibdev, qp, conn, rc, private_data, + private_data_len ); +} + +/** Connection request operations */ +static struct ib_mad_transaction_operations ib_cm_req_op = { + .complete = ib_cm_req_complete, +}; + +/** + * Handle connection path transaction completion + * + * @v ibdev Infiniband device + * @v path Path + * @v rc Status code + * @v av Address vector, or NULL on error + */ +static void ib_cm_path_complete ( struct ib_device *ibdev, + struct ib_path *path, int rc, + struct ib_address_vector *av ) { + struct ib_connection *conn = ib_path_get_ownerdata ( path ); + struct ib_queue_pair *qp = conn->qp; + union ib_mad mad; + struct ib_cm_connect_request *req = &mad.cm.cm_data.connect_request; + size_t private_data_len; + + /* Report failures */ + if ( rc != 0 ) { + DBGC ( conn, "CM %p path lookup failed: %s\n", + conn, strerror ( rc ) ); + conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 ); + goto out; + } + + /* Update queue pair peer path */ + memcpy ( &qp->av, av, sizeof ( qp->av ) ); + + /* Construct connection request */ + memset ( &mad, 0, sizeof ( mad ) ); + mad.hdr.mgmt_class = IB_MGMT_CLASS_CM; + mad.hdr.class_version = IB_CM_CLASS_VERSION; + mad.hdr.method = IB_MGMT_METHOD_SEND; + mad.hdr.attr_id = htons ( IB_CM_ATTR_CONNECT_REQUEST ); + req->local_id = htonl ( conn->local_id ); + memcpy ( &req->service_id, &conn->service_id, + sizeof ( req->service_id ) ); + memcpy ( &req->local_ca, &ibdev->node_guid, sizeof ( req->local_ca ) ); + req->local_qpn__responder_resources = htonl ( ( qp->qpn << 8 ) | 1 ); + req->local_eecn__initiator_depth = htonl ( ( 0 << 8 ) | 1 ); + req->remote_eecn__remote_timeout__service_type__ee_flow_ctrl = + htonl ( ( 0x14 << 3 ) | ( IB_CM_TRANSPORT_RC << 1 ) | + ( 0 << 0 ) ); + req->starting_psn__local_timeout__retry_count = + htonl ( ( qp->recv.psn << 8 ) | ( 0x14 << 3 ) | + ( 0x07 << 0 ) ); + req->pkey = htons ( ibdev->pkey ); + req->payload_mtu__rdc_exists__rnr_retry = + ( ( IB_MTU_2048 << 4 ) | ( 1 << 3 ) | ( 0x07 << 0 ) ); + req->max_cm_retries__srq = ( ( 0x0f << 4 ) | ( 0 << 3 ) ); + req->primary.local_lid = htons ( ibdev->lid ); + req->primary.remote_lid = htons ( conn->qp->av.lid ); + memcpy ( &req->primary.local_gid, &ibdev->gid, + sizeof ( req->primary.local_gid ) ); + memcpy ( &req->primary.remote_gid, &conn->qp->av.gid, + sizeof ( req->primary.remote_gid ) ); + req->primary.flow_label__rate = + htonl ( ( 0 << 12 ) | ( conn->qp->av.rate << 0 ) ); + req->primary.hop_limit = 0; + req->primary.sl__subnet_local = + ( ( conn->qp->av.sl << 4 ) | ( 1 << 3 ) ); + req->primary.local_ack_timeout = ( 0x13 << 3 ); + private_data_len = conn->private_data_len; + if ( private_data_len > sizeof ( req->private_data ) ) + private_data_len = sizeof ( req->private_data ); + memcpy ( &req->private_data, &conn->private_data, private_data_len ); + + /* Create connection request */ + av->qpn = IB_QPN_GSI; + av->qkey = IB_QKEY_GSI; + conn->madx = ib_create_madx ( ibdev, ibdev->gsi, &mad, av, + &ib_cm_req_op ); + if ( ! conn->madx ) { + DBGC ( conn, "CM %p could not create connection request\n", + conn ); + conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 ); + goto out; + } + ib_madx_set_ownerdata ( conn->madx, conn ); + + out: + /* Destroy the completed transaction */ + ib_destroy_path ( ibdev, path ); + conn->path = NULL; +} + +/** Connection path operations */ +static struct ib_path_operations ib_cm_path_op = { + .complete = ib_cm_path_complete, +}; + +/** + * Create connection to remote QP + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v dgid Target GID + * @v service_id Target service ID + * @v private_data Connection request private data + * @v private_data_len Length of connection request private data + * @v op Connection operations + * @ret conn Connection + */ +struct ib_connection * +ib_create_conn ( struct ib_device *ibdev, struct ib_queue_pair *qp, + union ib_gid *dgid, union ib_guid *service_id, + void *private_data, size_t private_data_len, + struct ib_connection_operations *op ) { + struct ib_connection *conn; + + /* Allocate and initialise request */ + conn = zalloc ( sizeof ( *conn ) + private_data_len ); + if ( ! conn ) + goto err_alloc_conn; + conn->ibdev = ibdev; + conn->qp = qp; + memset ( &qp->av, 0, sizeof ( qp->av ) ); + qp->av.gid_present = 1; + memcpy ( &qp->av.gid, dgid, sizeof ( qp->av.gid ) ); + conn->local_id = random(); + memcpy ( &conn->service_id, service_id, sizeof ( conn->service_id ) ); + conn->op = op; + conn->private_data_len = private_data_len; + memcpy ( &conn->private_data, private_data, private_data_len ); + + /* Create path */ + conn->path = ib_create_path ( ibdev, &qp->av, &ib_cm_path_op ); + if ( ! conn->path ) + goto err_create_path; + ib_path_set_ownerdata ( conn->path, conn ); + + /* Add to list of connections */ + list_add ( &conn->list, &ib_cm_conns ); + + DBGC ( conn, "CM %p created for IBDEV %p QPN %lx\n", + conn, ibdev, qp->qpn ); + DBGC ( conn, "CM %p connecting to " IB_GID_FMT " " IB_GUID_FMT "\n", + conn, IB_GID_ARGS ( dgid ), IB_GUID_ARGS ( service_id ) ); + + return conn; + + ib_destroy_path ( ibdev, conn->path ); + err_create_path: + free ( conn ); + err_alloc_conn: + return NULL; +} + +/** + * Destroy connection to remote QP + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v conn Connection + */ +void ib_destroy_conn ( struct ib_device *ibdev, + struct ib_queue_pair *qp __unused, + struct ib_connection *conn ) { + + list_del ( &conn->list ); + if ( conn->madx ) + ib_destroy_madx ( ibdev, ibdev->gsi, conn->madx ); + if ( conn->path ) + ib_destroy_path ( ibdev, conn->path ); + free ( conn ); +} diff --git a/qemu/roms/ipxe/src/net/infiniband/ib_cmrc.c b/qemu/roms/ipxe/src/net/infiniband/ib_cmrc.c new file mode 100644 index 000000000..1cc0fcfef --- /dev/null +++ b/qemu/roms/ipxe/src/net/infiniband/ib_cmrc.c @@ -0,0 +1,445 @@ +/* + * Copyright (C) 2009 Fen Systems Ltd <mbrown@fensystems.co.uk>. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +FILE_LICENCE ( BSD2 ); + +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <ipxe/iobuf.h> +#include <ipxe/xfer.h> +#include <ipxe/process.h> +#include <ipxe/infiniband.h> +#include <ipxe/ib_cm.h> +#include <ipxe/ib_cmrc.h> + +/** + * @file + * + * Infiniband Communication-managed Reliable Connections + * + */ + +/** CMRC number of send WQEs + * + * This is a policy decision. + */ +#define IB_CMRC_NUM_SEND_WQES 4 + +/** CMRC number of receive WQEs + * + * This is a policy decision. + */ +#define IB_CMRC_NUM_RECV_WQES 2 + +/** CMRC number of completion queue entries + * + * This is a policy decision + */ +#define IB_CMRC_NUM_CQES 8 + +/** An Infiniband Communication-Managed Reliable Connection */ +struct ib_cmrc_connection { + /** Reference count */ + struct refcnt refcnt; + /** Data transfer interface */ + struct interface xfer; + /** Infiniband device */ + struct ib_device *ibdev; + /** Completion queue */ + struct ib_completion_queue *cq; + /** Queue pair */ + struct ib_queue_pair *qp; + /** Connection */ + struct ib_connection *conn; + /** Destination GID */ + union ib_gid dgid; + /** Service ID */ + union ib_guid service_id; + /** QP is connected */ + int connected; + /** Shutdown process */ + struct process shutdown; +}; + +/** + * Shut down CMRC connection gracefully + * + * @v cmrc Communication-Managed Reliable Connection + * + * The Infiniband data structures are not reference-counted or + * guarded. It is therefore unsafe to shut them down while we may be + * in the middle of a callback from the Infiniband stack (e.g. in a + * receive completion handler). + * + * This shutdown process will run some time after the call to + * ib_cmrc_close(), after control has returned out of the Infiniband + * core, and will shut down the Infiniband interfaces cleanly. + * + * The shutdown process holds an implicit reference on the CMRC + * connection, ensuring that the structure is not freed before the + * shutdown process has run. + */ +static void ib_cmrc_shutdown ( struct ib_cmrc_connection *cmrc ) { + + DBGC ( cmrc, "CMRC %p shutting down\n", cmrc ); + + /* Shut down Infiniband interface */ + ib_destroy_conn ( cmrc->ibdev, cmrc->qp, cmrc->conn ); + ib_destroy_qp ( cmrc->ibdev, cmrc->qp ); + ib_destroy_cq ( cmrc->ibdev, cmrc->cq ); + ib_close ( cmrc->ibdev ); + + /* Drop the remaining reference */ + ref_put ( &cmrc->refcnt ); +} + +/** + * Close CMRC connection + * + * @v cmrc Communication-Managed Reliable Connection + * @v rc Reason for close + */ +static void ib_cmrc_close ( struct ib_cmrc_connection *cmrc, int rc ) { + + /* Close data transfer interface */ + intf_shutdown ( &cmrc->xfer, rc ); + + /* Schedule shutdown process */ + process_add ( &cmrc->shutdown ); +} + +/** + * Handle change of CMRC connection status + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v conn Connection + * @v rc_cm Connection status code + * @v private_data Private data, if available + * @v private_data_len Length of private data + */ +static void ib_cmrc_changed ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_connection *conn __unused, int rc_cm, + void *private_data, size_t private_data_len ) { + struct ib_cmrc_connection *cmrc = ib_qp_get_ownerdata ( qp ); + int rc_xfer; + + /* Record connection status */ + if ( rc_cm == 0 ) { + DBGC ( cmrc, "CMRC %p connected\n", cmrc ); + cmrc->connected = 1; + } else { + DBGC ( cmrc, "CMRC %p disconnected: %s\n", + cmrc, strerror ( rc_cm ) ); + cmrc->connected = 0; + } + + /* Pass up any private data */ + DBGC2 ( cmrc, "CMRC %p received private data:\n", cmrc ); + DBGC2_HDA ( cmrc, 0, private_data, private_data_len ); + if ( private_data && + ( rc_xfer = xfer_deliver_raw ( &cmrc->xfer, private_data, + private_data_len ) ) != 0 ) { + DBGC ( cmrc, "CMRC %p could not deliver private data: %s\n", + cmrc, strerror ( rc_xfer ) ); + ib_cmrc_close ( cmrc, rc_xfer ); + return; + } + + /* Notify upper connection of window change */ + xfer_window_changed ( &cmrc->xfer ); + + /* If we are disconnected, close the upper connection */ + if ( rc_cm != 0 ) { + ib_cmrc_close ( cmrc, rc_cm ); + return; + } +} + +/** CMRC connection operations */ +static struct ib_connection_operations ib_cmrc_conn_op = { + .changed = ib_cmrc_changed, +}; + +/** + * Handle CMRC send completion + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v iobuf I/O buffer + * @v rc Completion status code + */ +static void ib_cmrc_complete_send ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct io_buffer *iobuf, int rc ) { + struct ib_cmrc_connection *cmrc = ib_qp_get_ownerdata ( qp ); + + /* Free the completed I/O buffer */ + free_iob ( iobuf ); + + /* Close the connection on any send errors */ + if ( rc != 0 ) { + DBGC ( cmrc, "CMRC %p send error: %s\n", + cmrc, strerror ( rc ) ); + ib_cmrc_close ( cmrc, rc ); + return; + } +} + +/** + * Handle CMRC receive completion + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v dest Destination address vector, or NULL + * @v source Source address vector, or NULL + * @v iobuf I/O buffer + * @v rc Completion status code + */ +static void ib_cmrc_complete_recv ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_address_vector *dest __unused, + struct ib_address_vector *source __unused, + struct io_buffer *iobuf, int rc ) { + struct ib_cmrc_connection *cmrc = ib_qp_get_ownerdata ( qp ); + + /* Close the connection on any receive errors */ + if ( rc != 0 ) { + DBGC ( cmrc, "CMRC %p receive error: %s\n", + cmrc, strerror ( rc ) ); + free_iob ( iobuf ); + ib_cmrc_close ( cmrc, rc ); + return; + } + + DBGC2 ( cmrc, "CMRC %p received:\n", cmrc ); + DBGC2_HDA ( cmrc, 0, iobuf->data, iob_len ( iobuf ) ); + + /* Pass up data */ + if ( ( rc = xfer_deliver_iob ( &cmrc->xfer, iobuf ) ) != 0 ) { + DBGC ( cmrc, "CMRC %p could not deliver data: %s\n", + cmrc, strerror ( rc ) ); + ib_cmrc_close ( cmrc, rc ); + return; + } +} + +/** Infiniband CMRC completion operations */ +static struct ib_completion_queue_operations ib_cmrc_completion_ops = { + .complete_send = ib_cmrc_complete_send, + .complete_recv = ib_cmrc_complete_recv, +}; + +/** Infiniband CMRC queue pair operations */ +static struct ib_queue_pair_operations ib_cmrc_queue_pair_ops = { + .alloc_iob = alloc_iob, +}; + +/** + * Send data via CMRC + * + * @v cmrc CMRC connection + * @v iobuf Datagram I/O buffer + * @v meta Data transfer metadata + * @ret rc Return status code + */ +static int ib_cmrc_xfer_deliver ( struct ib_cmrc_connection *cmrc, + struct io_buffer *iobuf, + struct xfer_metadata *meta __unused ) { + int rc; + + /* If no connection has yet been attempted, send this datagram + * as the CM REQ private data. Otherwise, send it via the QP. + */ + if ( ! cmrc->connected ) { + + /* Abort if we have already sent a CM connection request */ + if ( cmrc->conn ) { + DBGC ( cmrc, "CMRC %p attempt to send before " + "connection is complete\n", cmrc ); + rc = -EIO; + goto out; + } + + /* Send via CM connection request */ + cmrc->conn = ib_create_conn ( cmrc->ibdev, cmrc->qp, + &cmrc->dgid, &cmrc->service_id, + iobuf->data, iob_len ( iobuf ), + &ib_cmrc_conn_op ); + if ( ! cmrc->conn ) { + DBGC ( cmrc, "CMRC %p could not connect\n", cmrc ); + rc = -ENOMEM; + goto out; + } + + } else { + + /* Send via QP */ + if ( ( rc = ib_post_send ( cmrc->ibdev, cmrc->qp, NULL, + iob_disown ( iobuf ) ) ) != 0 ) { + DBGC ( cmrc, "CMRC %p could not send: %s\n", + cmrc, strerror ( rc ) ); + goto out; + } + + } + return 0; + + out: + /* Free the I/O buffer if necessary */ + free_iob ( iobuf ); + + /* Close the connection on any errors */ + if ( rc != 0 ) + ib_cmrc_close ( cmrc, rc ); + + return rc; +} + +/** + * Check CMRC flow control window + * + * @v cmrc CMRC connection + * @ret len Length of window + */ +static size_t ib_cmrc_xfer_window ( struct ib_cmrc_connection *cmrc ) { + + /* We indicate a window only when we are successfully + * connected. + */ + return ( cmrc->connected ? IB_MAX_PAYLOAD_SIZE : 0 ); +} + +/** + * Identify device underlying CMRC connection + * + * @v cmrc CMRC connection + * @ret device Underlying device + */ +static struct device * +ib_cmrc_identify_device ( struct ib_cmrc_connection *cmrc ) { + return cmrc->ibdev->dev; +} + +/** CMRC data transfer interface operations */ +static struct interface_operation ib_cmrc_xfer_operations[] = { + INTF_OP ( xfer_deliver, struct ib_cmrc_connection *, + ib_cmrc_xfer_deliver ), + INTF_OP ( xfer_window, struct ib_cmrc_connection *, + ib_cmrc_xfer_window ), + INTF_OP ( intf_close, struct ib_cmrc_connection *, ib_cmrc_close ), + INTF_OP ( identify_device, struct ib_cmrc_connection *, + ib_cmrc_identify_device ), +}; + +/** CMRC data transfer interface descriptor */ +static struct interface_descriptor ib_cmrc_xfer_desc = + INTF_DESC ( struct ib_cmrc_connection, xfer, ib_cmrc_xfer_operations ); + +/** CMRC shutdown process descriptor */ +static struct process_descriptor ib_cmrc_shutdown_desc = + PROC_DESC_ONCE ( struct ib_cmrc_connection, shutdown, + ib_cmrc_shutdown ); + +/** + * Open CMRC connection + * + * @v xfer Data transfer interface + * @v ibdev Infiniband device + * @v dgid Destination GID + * @v service_id Service ID + * @ret rc Returns status code + */ +int ib_cmrc_open ( struct interface *xfer, struct ib_device *ibdev, + union ib_gid *dgid, union ib_guid *service_id ) { + struct ib_cmrc_connection *cmrc; + int rc; + + /* Allocate and initialise structure */ + cmrc = zalloc ( sizeof ( *cmrc ) ); + if ( ! cmrc ) { + rc = -ENOMEM; + goto err_alloc; + } + ref_init ( &cmrc->refcnt, NULL ); + intf_init ( &cmrc->xfer, &ib_cmrc_xfer_desc, &cmrc->refcnt ); + cmrc->ibdev = ibdev; + memcpy ( &cmrc->dgid, dgid, sizeof ( cmrc->dgid ) ); + memcpy ( &cmrc->service_id, service_id, sizeof ( cmrc->service_id ) ); + process_init_stopped ( &cmrc->shutdown, &ib_cmrc_shutdown_desc, + &cmrc->refcnt ); + + /* Open Infiniband device */ + if ( ( rc = ib_open ( ibdev ) ) != 0 ) { + DBGC ( cmrc, "CMRC %p could not open device: %s\n", + cmrc, strerror ( rc ) ); + goto err_open; + } + + /* Create completion queue */ + cmrc->cq = ib_create_cq ( ibdev, IB_CMRC_NUM_CQES, + &ib_cmrc_completion_ops ); + if ( ! cmrc->cq ) { + DBGC ( cmrc, "CMRC %p could not create completion queue\n", + cmrc ); + rc = -ENOMEM; + goto err_create_cq; + } + + /* Create queue pair */ + cmrc->qp = ib_create_qp ( ibdev, IB_QPT_RC, IB_CMRC_NUM_SEND_WQES, + cmrc->cq, IB_CMRC_NUM_RECV_WQES, cmrc->cq, + &ib_cmrc_queue_pair_ops ); + if ( ! cmrc->qp ) { + DBGC ( cmrc, "CMRC %p could not create queue pair\n", cmrc ); + rc = -ENOMEM; + goto err_create_qp; + } + ib_qp_set_ownerdata ( cmrc->qp, cmrc ); + DBGC ( cmrc, "CMRC %p using QPN %lx\n", cmrc, cmrc->qp->qpn ); + + /* Attach to parent interface, transfer reference (implicitly) + * to our shutdown process, and return. + */ + intf_plug_plug ( &cmrc->xfer, xfer ); + return 0; + + ib_destroy_qp ( ibdev, cmrc->qp ); + err_create_qp: + ib_destroy_cq ( ibdev, cmrc->cq ); + err_create_cq: + ib_close ( ibdev ); + err_open: + ref_put ( &cmrc->refcnt ); + err_alloc: + return rc; +} diff --git a/qemu/roms/ipxe/src/net/infiniband/ib_mcast.c b/qemu/roms/ipxe/src/net/infiniband/ib_mcast.c new file mode 100644 index 000000000..0a5e72a37 --- /dev/null +++ b/qemu/roms/ipxe/src/net/infiniband/ib_mcast.c @@ -0,0 +1,213 @@ +/* + * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <string.h> +#include <byteswap.h> +#include <errno.h> +#include <ipxe/list.h> +#include <ipxe/infiniband.h> +#include <ipxe/ib_mi.h> +#include <ipxe/ib_mcast.h> + +/** @file + * + * Infiniband multicast groups + * + */ + +/** + * Generate multicast membership MAD + * + * @v ibdev Infiniband device + * @v gid Multicast GID + * @v join Join (rather than leave) group + * @v mad MAD to fill in + */ +static void ib_mcast_mad ( struct ib_device *ibdev, union ib_gid *gid, + int join, union ib_mad *mad ) { + struct ib_mad_sa *sa = &mad->sa; + + /* Construct multicast membership record request */ + memset ( sa, 0, sizeof ( *sa ) ); + sa->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; + sa->mad_hdr.class_version = IB_SA_CLASS_VERSION; + sa->mad_hdr.method = + ( join ? IB_MGMT_METHOD_SET : IB_MGMT_METHOD_DELETE ); + sa->mad_hdr.attr_id = htons ( IB_SA_ATTR_MC_MEMBER_REC ); + sa->sa_hdr.comp_mask[1] = + htonl ( IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | + IB_SA_MCMEMBER_REC_JOIN_STATE ); + sa->sa_data.mc_member_record.scope__join_state = 1; + memcpy ( &sa->sa_data.mc_member_record.mgid, gid, + sizeof ( sa->sa_data.mc_member_record.mgid ) ); + memcpy ( &sa->sa_data.mc_member_record.port_gid, &ibdev->gid, + sizeof ( sa->sa_data.mc_member_record.port_gid ) ); +} + +/** + * Handle multicast membership record join response + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v madx Management transaction + * @v rc Status code + * @v mad Received MAD (or NULL on error) + * @v av Source address vector (or NULL on error) + */ +static void ib_mcast_complete ( struct ib_device *ibdev, + struct ib_mad_interface *mi __unused, + struct ib_mad_transaction *madx, + int rc, union ib_mad *mad, + struct ib_address_vector *av __unused ) { + struct ib_mc_membership *membership = ib_madx_get_ownerdata ( madx ); + struct ib_queue_pair *qp = membership->qp; + union ib_gid *gid = &membership->gid; + struct ib_mc_member_record *mc_member_record = + &mad->sa.sa_data.mc_member_record; + int joined; + unsigned long qkey; + + /* Report failures */ + if ( ( rc == 0 ) && ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) )) + rc = -ENOTCONN; + if ( rc != 0 ) { + DBGC ( ibdev, "IBDEV %p QPN %lx join failed: %s\n", + ibdev, qp->qpn, strerror ( rc ) ); + goto out; + } + + /* Extract values from MAD */ + joined = ( mad->hdr.method == IB_MGMT_METHOD_GET_RESP ); + qkey = ntohl ( mc_member_record->qkey ); + DBGC ( ibdev, "IBDEV %p QPN %lx %s " IB_GID_FMT " qkey %lx\n", + ibdev, qp->qpn, ( joined ? "joined" : "left" ), + IB_GID_ARGS ( gid ), qkey ); + + /* Set queue key */ + qp->qkey = qkey; + if ( ( rc = ib_modify_qp ( ibdev, qp ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p QPN %lx could not modify qkey: %s\n", + ibdev, qp->qpn, strerror ( rc ) ); + goto out; + } + + out: + /* Destroy the completed transaction */ + ib_destroy_madx ( ibdev, mi, madx ); + membership->madx = NULL; + + /* Hand off to upper completion handler */ + membership->complete ( ibdev, qp, membership, rc, mad ); +} + +/** Multicast membership management transaction completion operations */ +static struct ib_mad_transaction_operations ib_mcast_op = { + .complete = ib_mcast_complete, +}; + +/** + * Join multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v membership Multicast group membership + * @v gid Multicast GID to join + * @v joined Join completion handler + * @ret rc Return status code + */ +int ib_mcast_join ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct ib_mc_membership *membership, union ib_gid *gid, + void ( * complete ) ( struct ib_device *ibdev, + struct ib_queue_pair *qp, + struct ib_mc_membership *membership, + int rc, union ib_mad *mad ) ) { + union ib_mad mad; + int rc; + + DBGC ( ibdev, "IBDEV %p QPN %lx joining " IB_GID_FMT "\n", + ibdev, qp->qpn, IB_GID_ARGS ( gid ) ); + + /* Initialise structure */ + membership->qp = qp; + memcpy ( &membership->gid, gid, sizeof ( membership->gid ) ); + membership->complete = complete; + + /* Attach queue pair to multicast GID */ + if ( ( rc = ib_mcast_attach ( ibdev, qp, gid ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p QPN %lx could not attach: %s\n", + ibdev, qp->qpn, strerror ( rc ) ); + goto err_mcast_attach; + } + + /* Initiate multicast membership join */ + ib_mcast_mad ( ibdev, gid, 1, &mad ); + membership->madx = ib_create_madx ( ibdev, ibdev->gsi, &mad, NULL, + &ib_mcast_op ); + if ( ! membership->madx ) { + DBGC ( ibdev, "IBDEV %p QPN %lx could not create join " + "transaction\n", ibdev, qp->qpn ); + rc = -ENOMEM; + goto err_create_madx; + } + ib_madx_set_ownerdata ( membership->madx, membership ); + + return 0; + + ib_destroy_madx ( ibdev, ibdev->gsi, membership->madx ); + err_create_madx: + ib_mcast_detach ( ibdev, qp, gid ); + err_mcast_attach: + return rc; +} + +/** + * Leave multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v membership Multicast group membership + */ +void ib_mcast_leave ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct ib_mc_membership *membership ) { + union ib_gid *gid = &membership->gid; + union ib_mad mad; + int rc; + + DBGC ( ibdev, "IBDEV %p QPN %lx leaving " IB_GID_FMT "\n", + ibdev, qp->qpn, IB_GID_ARGS ( gid ) ); + + /* Detach from multicast GID */ + ib_mcast_detach ( ibdev, qp, &membership->gid ); + + /* Cancel multicast membership join, if applicable */ + if ( membership->madx ) { + ib_destroy_madx ( ibdev, ibdev->gsi, membership->madx ); + membership->madx = NULL; + } + + /* Send a single group leave MAD */ + ib_mcast_mad ( ibdev, &membership->gid, 0, &mad ); + if ( ( rc = ib_mi_send ( ibdev, ibdev->gsi, &mad, NULL ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p QPN %lx could not send leave request: " + "%s\n", ibdev, qp->qpn, strerror ( rc ) ); + } +} diff --git a/qemu/roms/ipxe/src/net/infiniband/ib_mi.c b/qemu/roms/ipxe/src/net/infiniband/ib_mi.c new file mode 100644 index 000000000..ef6d539f1 --- /dev/null +++ b/qemu/roms/ipxe/src/net/infiniband/ib_mi.c @@ -0,0 +1,415 @@ +/* + * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <stdio.h> +#include <unistd.h> +#include <byteswap.h> +#include <ipxe/infiniband.h> +#include <ipxe/iobuf.h> +#include <ipxe/ib_mi.h> + +/** + * @file + * + * Infiniband management interfaces + * + */ + +/** Management interface number of send WQEs + * + * This is a policy decision. + */ +#define IB_MI_NUM_SEND_WQES 4 + +/** Management interface number of receive WQEs + * + * This is a policy decision. + */ +#define IB_MI_NUM_RECV_WQES 2 + +/** Management interface number of completion queue entries + * + * This is a policy decision + */ +#define IB_MI_NUM_CQES 8 + +/** TID magic signature */ +#define IB_MI_TID_MAGIC ( ( 'i' << 24 ) | ( 'P' << 16 ) | ( 'X' << 8 ) | 'E' ) + +/** TID to use for next MAD */ +static unsigned int next_tid; + +/** + * Handle received MAD + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @v av Source address vector + * @ret rc Return status code + */ +static int ib_mi_handle ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad, + struct ib_address_vector *av ) { + struct ib_mad_hdr *hdr = &mad->hdr; + struct ib_mad_transaction *madx; + struct ib_mad_agent *agent; + + /* Look for a matching transaction by TID */ + list_for_each_entry ( madx, &mi->madx, list ) { + if ( memcmp ( &hdr->tid, &madx->mad.hdr.tid, + sizeof ( hdr->tid ) ) != 0 ) + continue; + /* Found a matching transaction */ + madx->op->complete ( ibdev, mi, madx, 0, mad, av ); + return 0; + } + + /* If there is no matching transaction, look for a listening agent */ + for_each_table_entry ( agent, IB_MAD_AGENTS ) { + if ( ( ( agent->mgmt_class & IB_MGMT_CLASS_MASK ) != + ( hdr->mgmt_class & IB_MGMT_CLASS_MASK ) ) || + ( agent->class_version != hdr->class_version ) || + ( agent->attr_id != hdr->attr_id ) ) + continue; + /* Found a matching agent */ + agent->handle ( ibdev, mi, mad, av ); + return 0; + } + + /* Otherwise, ignore it */ + DBGC ( mi, "MI %p RX TID %08x%08x ignored\n", + mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ) ); + return -ENOTSUP; +} + +/** + * Complete receive via management interface + * + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v dest Destination address vector + * @v source Source address vector + * @v iobuf I/O buffer + * @v rc Completion status code + */ +static void ib_mi_complete_recv ( struct ib_device *ibdev, + struct ib_queue_pair *qp, + struct ib_address_vector *dest __unused, + struct ib_address_vector *source, + struct io_buffer *iobuf, int rc ) { + struct ib_mad_interface *mi = ib_qp_get_ownerdata ( qp ); + union ib_mad *mad; + struct ib_mad_hdr *hdr; + + /* Ignore errors */ + if ( rc != 0 ) { + DBGC ( mi, "MI %p RX error: %s\n", mi, strerror ( rc ) ); + goto out; + } + + /* Sanity checks */ + if ( iob_len ( iobuf ) != sizeof ( *mad ) ) { + DBGC ( mi, "MI %p RX bad size (%zd bytes)\n", + mi, iob_len ( iobuf ) ); + DBGC_HDA ( mi, 0, iobuf->data, iob_len ( iobuf ) ); + goto out; + } + mad = iobuf->data; + hdr = &mad->hdr; + if ( hdr->base_version != IB_MGMT_BASE_VERSION ) { + DBGC ( mi, "MI %p RX unsupported base version %x\n", + mi, hdr->base_version ); + DBGC_HDA ( mi, 0, mad, sizeof ( *mad ) ); + goto out; + } + DBGC ( mi, "MI %p RX TID %08x%08x (%02x,%02x,%02x,%04x) status " + "%04x\n", mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ), + hdr->mgmt_class, hdr->class_version, hdr->method, + ntohs ( hdr->attr_id ), ntohs ( hdr->status ) ); + DBGC2_HDA ( mi, 0, mad, sizeof ( *mad ) ); + + /* Handle MAD */ + if ( ( rc = ib_mi_handle ( ibdev, mi, mad, source ) ) != 0 ) + goto out; + + out: + free_iob ( iobuf ); +} + +/** Management interface completion operations */ +static struct ib_completion_queue_operations ib_mi_completion_ops = { + .complete_recv = ib_mi_complete_recv, +}; + +/** Management interface queue pair operations */ +static struct ib_queue_pair_operations ib_mi_queue_pair_ops = { + .alloc_iob = alloc_iob, +}; + +/** + * Transmit MAD + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad MAD + * @v av Destination address vector + * @ret rc Return status code + */ +int ib_mi_send ( struct ib_device *ibdev, struct ib_mad_interface *mi, + union ib_mad *mad, struct ib_address_vector *av ) { + struct ib_mad_hdr *hdr = &mad->hdr; + struct io_buffer *iobuf; + int rc; + + /* Set common fields */ + hdr->base_version = IB_MGMT_BASE_VERSION; + if ( ( hdr->tid[0] == 0 ) && ( hdr->tid[1] == 0 ) ) { + hdr->tid[0] = htonl ( IB_MI_TID_MAGIC ); + hdr->tid[1] = htonl ( ++next_tid ); + } + DBGC ( mi, "MI %p TX TID %08x%08x (%02x,%02x,%02x,%04x) status " + "%04x\n", mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ), + hdr->mgmt_class, hdr->class_version, hdr->method, + ntohs ( hdr->attr_id ), ntohs ( hdr->status ) ); + DBGC2_HDA ( mi, 0, mad, sizeof ( *mad ) ); + + /* Construct directed route portion of response, if necessary */ + if ( hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE ) { + struct ib_mad_smp *smp = &mad->smp; + unsigned int hop_pointer; + unsigned int hop_count; + + smp->mad_hdr.status |= htons ( IB_SMP_STATUS_D_INBOUND ); + hop_pointer = smp->mad_hdr.class_specific.smp.hop_pointer; + hop_count = smp->mad_hdr.class_specific.smp.hop_count; + assert ( hop_count == hop_pointer ); + if ( hop_pointer < ( sizeof ( smp->return_path.hops ) / + sizeof ( smp->return_path.hops[0] ) ) ) { + smp->return_path.hops[hop_pointer] = ibdev->port; + } else { + DBGC ( mi, "MI %p TX TID %08x%08x invalid hop pointer " + "%d\n", mi, ntohl ( hdr->tid[0] ), + ntohl ( hdr->tid[1] ), hop_pointer ); + return -EINVAL; + } + } + + /* Construct I/O buffer */ + iobuf = alloc_iob ( sizeof ( *mad ) ); + if ( ! iobuf ) { + DBGC ( mi, "MI %p could not allocate buffer for TID " + "%08x%08x\n", + mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ) ); + return -ENOMEM; + } + memcpy ( iob_put ( iobuf, sizeof ( *mad ) ), mad, sizeof ( *mad ) ); + + /* Send I/O buffer */ + if ( ( rc = ib_post_send ( ibdev, mi->qp, av, iobuf ) ) != 0 ) { + DBGC ( mi, "MI %p TX TID %08x%08x failed: %s\n", + mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ), + strerror ( rc ) ); + free_iob ( iobuf ); + return rc; + } + + return 0; +} + +/** + * Handle management transaction timer expiry + * + * @v timer Retry timer + * @v expired Failure indicator + */ +static void ib_mi_timer_expired ( struct retry_timer *timer, int expired ) { + struct ib_mad_transaction *madx = + container_of ( timer, struct ib_mad_transaction, timer ); + struct ib_mad_interface *mi = madx->mi; + struct ib_device *ibdev = mi->ibdev; + struct ib_mad_hdr *hdr = &madx->mad.hdr; + + /* Abandon transaction if we have tried too many times */ + if ( expired ) { + DBGC ( mi, "MI %p abandoning TID %08x%08x\n", + mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ) ); + madx->op->complete ( ibdev, mi, madx, -ETIMEDOUT, NULL, NULL ); + return; + } + + /* Restart retransmission timer */ + start_timer ( timer ); + + /* Resend MAD */ + ib_mi_send ( ibdev, mi, &madx->mad, &madx->av ); +} + +/** + * Create management transaction + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad MAD to send + * @v av Destination address, or NULL to use SM's GSI + * @v op Management transaction operations + * @ret madx Management transaction, or NULL + */ +struct ib_mad_transaction * +ib_create_madx ( struct ib_device *ibdev, struct ib_mad_interface *mi, + union ib_mad *mad, struct ib_address_vector *av, + struct ib_mad_transaction_operations *op ) { + struct ib_mad_transaction *madx; + + /* Allocate and initialise structure */ + madx = zalloc ( sizeof ( *madx ) ); + if ( ! madx ) + return NULL; + timer_init ( &madx->timer, ib_mi_timer_expired, NULL ); + madx->mi = mi; + madx->op = op; + + /* Determine address vector */ + if ( av ) { + memcpy ( &madx->av, av, sizeof ( madx->av ) ); + } else { + madx->av.lid = ibdev->sm_lid; + madx->av.sl = ibdev->sm_sl; + madx->av.qpn = IB_QPN_GSI; + madx->av.qkey = IB_QKEY_GSI; + } + + /* Copy MAD */ + memcpy ( &madx->mad, mad, sizeof ( madx->mad ) ); + + /* Add to list and start timer to send initial MAD */ + list_add ( &madx->list, &mi->madx ); + start_timer_nodelay ( &madx->timer ); + + return madx; +} + +/** + * Destroy management transaction + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v madx Management transaction + */ +void ib_destroy_madx ( struct ib_device *ibdev __unused, + struct ib_mad_interface *mi __unused, + struct ib_mad_transaction *madx ) { + + /* Stop timer and remove from list */ + stop_timer ( &madx->timer ); + list_del ( &madx->list ); + + /* Free transaction */ + free ( madx ); +} + +/** + * Create management interface + * + * @v ibdev Infiniband device + * @v type Queue pair type + * @ret mi Management agent, or NULL + */ +struct ib_mad_interface * ib_create_mi ( struct ib_device *ibdev, + enum ib_queue_pair_type type ) { + struct ib_mad_interface *mi; + int rc; + + /* Allocate and initialise fields */ + mi = zalloc ( sizeof ( *mi ) ); + if ( ! mi ) + goto err_alloc; + mi->ibdev = ibdev; + INIT_LIST_HEAD ( &mi->madx ); + + /* Create completion queue */ + mi->cq = ib_create_cq ( ibdev, IB_MI_NUM_CQES, &ib_mi_completion_ops ); + if ( ! mi->cq ) { + DBGC ( mi, "MI %p could not allocate completion queue\n", mi ); + goto err_create_cq; + } + + /* Create queue pair */ + mi->qp = ib_create_qp ( ibdev, type, IB_MI_NUM_SEND_WQES, mi->cq, + IB_MI_NUM_RECV_WQES, mi->cq, + &ib_mi_queue_pair_ops ); + if ( ! mi->qp ) { + DBGC ( mi, "MI %p could not allocate queue pair\n", mi ); + goto err_create_qp; + } + ib_qp_set_ownerdata ( mi->qp, mi ); + DBGC ( mi, "MI %p (%s) running on QPN %#lx\n", + mi, ( ( type == IB_QPT_SMI ) ? "SMI" : "GSI" ), mi->qp->qpn ); + + /* Set queue key */ + mi->qp->qkey = ( ( type == IB_QPT_SMI ) ? IB_QKEY_SMI : IB_QKEY_GSI ); + if ( ( rc = ib_modify_qp ( ibdev, mi->qp ) ) != 0 ) { + DBGC ( mi, "MI %p could not set queue key: %s\n", + mi, strerror ( rc ) ); + goto err_modify_qp; + } + + /* Fill receive ring */ + ib_refill_recv ( ibdev, mi->qp ); + return mi; + + err_modify_qp: + ib_destroy_qp ( ibdev, mi->qp ); + err_create_qp: + ib_destroy_cq ( ibdev, mi->cq ); + err_create_cq: + free ( mi ); + err_alloc: + return NULL; +} + +/** + * Destroy management interface + * + * @v mi Management interface + */ +void ib_destroy_mi ( struct ib_device *ibdev, struct ib_mad_interface *mi ) { + struct ib_mad_transaction *madx; + struct ib_mad_transaction *tmp; + + /* Flush any outstanding requests */ + list_for_each_entry_safe ( madx, tmp, &mi->madx, list ) { + DBGC ( mi, "MI %p destroyed while TID %08x%08x in progress\n", + mi, ntohl ( madx->mad.hdr.tid[0] ), + ntohl ( madx->mad.hdr.tid[1] ) ); + madx->op->complete ( ibdev, mi, madx, -ECANCELED, NULL, NULL ); + } + + ib_destroy_qp ( ibdev, mi->qp ); + ib_destroy_cq ( ibdev, mi->cq ); + free ( mi ); +} diff --git a/qemu/roms/ipxe/src/net/infiniband/ib_packet.c b/qemu/roms/ipxe/src/net/infiniband/ib_packet.c new file mode 100644 index 000000000..6c850e39b --- /dev/null +++ b/qemu/roms/ipxe/src/net/infiniband/ib_packet.c @@ -0,0 +1,249 @@ +/* + * Copyright (C) 2008 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <byteswap.h> +#include <ipxe/iobuf.h> +#include <ipxe/infiniband.h> +#include <ipxe/ib_packet.h> + +/** + * @file + * + * Infiniband Packet Formats + * + */ + +/** + * Add IB headers + * + * @v ibdev Infiniband device + * @v iobuf I/O buffer to contain headers + * @v qp Queue pair + * @v payload_len Payload length + * @v dest Destination address vector + * @ret rc Return status code + */ +int ib_push ( struct ib_device *ibdev, struct io_buffer *iobuf, + struct ib_queue_pair *qp, size_t payload_len, + const struct ib_address_vector *dest ) { + struct ib_local_route_header *lrh; + struct ib_global_route_header *grh; + struct ib_base_transport_header *bth; + struct ib_datagram_extended_transport_header *deth; + size_t orig_iob_len = iob_len ( iobuf ); + size_t pad_len; + size_t lrh_len; + size_t grh_len; + unsigned int vl; + unsigned int lnh; + + DBGC2 ( ibdev, "IBDEV %p TX %04x:%08lx => %04x:%08lx (key %08lx)\n", + ibdev, ibdev->lid, qp->ext_qpn, dest->lid, dest->qpn, + dest->qkey ); + + /* Calculate packet length */ + pad_len = ( (-payload_len) & 0x3 ); + payload_len += pad_len; + payload_len += 4; /* ICRC */ + + /* Reserve space for headers */ + orig_iob_len = iob_len ( iobuf ); + deth = iob_push ( iobuf, sizeof ( *deth ) ); + bth = iob_push ( iobuf, sizeof ( *bth ) ); + grh_len = ( payload_len + iob_len ( iobuf ) - orig_iob_len ); + grh = ( dest->gid_present ? + iob_push ( iobuf, sizeof ( *grh ) ) : NULL ); + lrh = iob_push ( iobuf, sizeof ( *lrh ) ); + lrh_len = ( payload_len + iob_len ( iobuf ) - orig_iob_len ); + + /* Construct LRH */ + vl = ( ( qp->ext_qpn == IB_QPN_SMI ) ? IB_VL_SMP : IB_VL_DEFAULT ); + lrh->vl__lver = ( vl << 4 ); + lnh = ( grh ? IB_LNH_GRH : IB_LNH_BTH ); + lrh->sl__lnh = ( ( dest->sl << 4 ) | lnh ); + lrh->dlid = htons ( dest->lid ); + lrh->length = htons ( lrh_len >> 2 ); + lrh->slid = htons ( ibdev->lid ); + + /* Construct GRH, if required */ + if ( grh ) { + grh->ipver__tclass__flowlabel = + htonl ( IB_GRH_IPVER_IPv6 << 28 ); + grh->paylen = htons ( grh_len ); + grh->nxthdr = IB_GRH_NXTHDR_IBA; + grh->hoplmt = 0; + memcpy ( &grh->sgid, &ibdev->gid, sizeof ( grh->sgid ) ); + memcpy ( &grh->dgid, &dest->gid, sizeof ( grh->dgid ) ); + } + + /* Construct BTH */ + bth->opcode = BTH_OPCODE_UD_SEND; + bth->se__m__padcnt__tver = ( pad_len << 4 ); + bth->pkey = htons ( ibdev->pkey ); + bth->dest_qp = htonl ( dest->qpn ); + bth->ack__psn = htonl ( ( qp->send.psn++ ) & 0xffffffUL ); + + /* Construct DETH */ + deth->qkey = htonl ( dest->qkey ); + deth->src_qp = htonl ( qp->ext_qpn ); + + DBGCP_HDA ( ibdev, 0, iobuf->data, + ( iob_len ( iobuf ) - orig_iob_len ) ); + + return 0; +} + +/** + * Remove IB headers + * + * @v ibdev Infiniband device + * @v iobuf I/O buffer containing headers + * @v qp Queue pair to fill in, or NULL + * @v payload_len Payload length to fill in, or NULL + * @v dest Destination address vector to fill in + * @v source Source address vector to fill in + * @ret rc Return status code + */ +int ib_pull ( struct ib_device *ibdev, struct io_buffer *iobuf, + struct ib_queue_pair **qp, size_t *payload_len, + struct ib_address_vector *dest, + struct ib_address_vector *source ) { + struct ib_local_route_header *lrh; + struct ib_global_route_header *grh; + struct ib_base_transport_header *bth; + struct ib_datagram_extended_transport_header *deth; + size_t orig_iob_len = iob_len ( iobuf ); + unsigned int lnh; + size_t pad_len; + + /* Clear return values */ + if ( qp ) + *qp = NULL; + if ( payload_len ) + *payload_len = 0; + memset ( dest, 0, sizeof ( *dest ) ); + memset ( source, 0, sizeof ( *source ) ); + + /* Extract LRH */ + if ( iob_len ( iobuf ) < sizeof ( *lrh ) ) { + DBGC ( ibdev, "IBDEV %p RX too short (%zd bytes) for LRH\n", + ibdev, iob_len ( iobuf ) ); + return -EINVAL; + } + lrh = iobuf->data; + iob_pull ( iobuf, sizeof ( *lrh ) ); + dest->lid = ntohs ( lrh->dlid ); + dest->sl = ( lrh->sl__lnh >> 4 ); + source->lid = ntohs ( lrh->slid ); + source->sl = ( lrh->sl__lnh >> 4 ); + lnh = ( lrh->sl__lnh & 0x3 ); + + /* Reject unsupported packets */ + if ( ! ( ( lnh == IB_LNH_BTH ) || ( lnh == IB_LNH_GRH ) ) ) { + DBGC ( ibdev, "IBDEV %p RX unsupported LNH %x\n", + ibdev, lnh ); + return -ENOTSUP; + } + + /* Extract GRH, if present */ + if ( lnh == IB_LNH_GRH ) { + if ( iob_len ( iobuf ) < sizeof ( *grh ) ) { + DBGC ( ibdev, "IBDEV %p RX too short (%zd bytes) " + "for GRH\n", ibdev, iob_len ( iobuf ) ); + return -EINVAL; + } + grh = iobuf->data; + iob_pull ( iobuf, sizeof ( *grh ) ); + dest->gid_present = 1; + memcpy ( &dest->gid, &grh->dgid, sizeof ( dest->gid ) ); + source->gid_present = 1; + memcpy ( &source->gid, &grh->sgid, sizeof ( source->gid ) ); + } else { + grh = NULL; + } + + /* Extract BTH */ + if ( iob_len ( iobuf ) < sizeof ( *bth ) ) { + DBGC ( ibdev, "IBDEV %p RX too short (%zd bytes) for BTH\n", + ibdev, iob_len ( iobuf ) ); + return -EINVAL; + } + bth = iobuf->data; + iob_pull ( iobuf, sizeof ( *bth ) ); + if ( bth->opcode != BTH_OPCODE_UD_SEND ) { + DBGC ( ibdev, "IBDEV %p unsupported BTH opcode %x\n", + ibdev, bth->opcode ); + return -ENOTSUP; + } + dest->qpn = ntohl ( bth->dest_qp ); + + /* Extract DETH */ + if ( iob_len ( iobuf ) < sizeof ( *deth ) ) { + DBGC ( ibdev, "IBDEV %p RX too short (%zd bytes) for DETH\n", + ibdev, iob_len ( iobuf ) ); + return -EINVAL; + } + deth = iobuf->data; + iob_pull ( iobuf, sizeof ( *deth ) ); + source->qpn = ntohl ( deth->src_qp ); + source->qkey = ntohl ( deth->qkey ); + + /* Calculate payload length, if applicable */ + if ( payload_len ) { + pad_len = ( ( bth->se__m__padcnt__tver >> 4 ) & 0x3 ); + *payload_len = ( ( ntohs ( lrh->length ) << 2 ) + - ( orig_iob_len - iob_len ( iobuf ) ) + - pad_len - 4 /* ICRC */ ); + } + + /* Determine destination QP, if applicable */ + if ( qp ) { + if ( IB_LID_MULTICAST ( dest->lid ) && grh ) { + if ( ! ( *qp = ib_find_qp_mgid ( ibdev, &grh->dgid ))){ + DBGC ( ibdev, "IBDEV %p RX for unknown MGID " + IB_GID_FMT "\n", + ibdev, IB_GID_ARGS ( &grh->dgid ) ); + return -ENODEV; + } + } else { + if ( ! ( *qp = ib_find_qp_qpn ( ibdev, dest->qpn ) ) ) { + DBGC ( ibdev, "IBDEV %p RX for nonexistent " + "QPN %lx\n", ibdev, dest->qpn ); + return -ENODEV; + } + } + assert ( *qp ); + } + + DBGC2 ( ibdev, "IBDEV %p RX %04x:%08lx <= %04x:%08lx (key %08x)\n", + ibdev, dest->lid, ( IB_LID_MULTICAST ( dest->lid ) ? + ( qp ? (*qp)->ext_qpn : -1UL ) : dest->qpn ), + source->lid, source->qpn, ntohl ( deth->qkey ) ); + DBGCP_HDA ( ibdev, 0, + ( iobuf->data - ( orig_iob_len - iob_len ( iobuf ) ) ), + ( orig_iob_len - iob_len ( iobuf ) ) ); + + return 0; +} diff --git a/qemu/roms/ipxe/src/net/infiniband/ib_pathrec.c b/qemu/roms/ipxe/src/net/infiniband/ib_pathrec.c new file mode 100644 index 000000000..1b95cbfa8 --- /dev/null +++ b/qemu/roms/ipxe/src/net/infiniband/ib_pathrec.c @@ -0,0 +1,289 @@ +/* + * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <byteswap.h> +#include <errno.h> +#include <ipxe/infiniband.h> +#include <ipxe/ib_mi.h> +#include <ipxe/ib_pathrec.h> + +/** @file + * + * Infiniband path lookups + * + */ + +/** + * Handle path transaction completion + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v madx Management transaction + * @v rc Status code + * @v mad Received MAD (or NULL on error) + * @v av Source address vector (or NULL on error) + */ +static void ib_path_complete ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + struct ib_mad_transaction *madx, + int rc, union ib_mad *mad, + struct ib_address_vector *av __unused ) { + struct ib_path *path = ib_madx_get_ownerdata ( madx ); + union ib_gid *dgid = &path->av.gid; + struct ib_path_record *pathrec = &mad->sa.sa_data.path_record; + + /* Report failures */ + if ( ( rc == 0 ) && ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) )) + rc = -ENETUNREACH; + if ( rc != 0 ) { + DBGC ( ibdev, "IBDEV %p path lookup for " IB_GID_FMT + " failed: %s\n", + ibdev, IB_GID_ARGS ( dgid ), strerror ( rc ) ); + goto out; + } + + /* Extract values from MAD */ + path->av.lid = ntohs ( pathrec->dlid ); + path->av.sl = ( pathrec->reserved__sl & 0x0f ); + path->av.rate = ( pathrec->rate_selector__rate & 0x3f ); + DBGC ( ibdev, "IBDEV %p path to " IB_GID_FMT " is %04x sl %d rate " + "%d\n", ibdev, IB_GID_ARGS ( dgid ), path->av.lid, path->av.sl, + path->av.rate ); + + out: + /* Destroy the completed transaction */ + ib_destroy_madx ( ibdev, mi, madx ); + path->madx = NULL; + + /* Hand off to upper completion handler */ + path->op->complete ( ibdev, path, rc, &path->av ); +} + +/** Path transaction completion operations */ +static struct ib_mad_transaction_operations ib_path_op = { + .complete = ib_path_complete, +}; + +/** + * Create path + * + * @v ibdev Infiniband device + * @v av Address vector to complete + * @v op Path operations + * @ret path Path + */ +struct ib_path * +ib_create_path ( struct ib_device *ibdev, struct ib_address_vector *av, + struct ib_path_operations *op ) { + struct ib_path *path; + union ib_mad mad; + struct ib_mad_sa *sa = &mad.sa; + + /* Allocate and initialise structure */ + path = zalloc ( sizeof ( *path ) ); + if ( ! path ) + goto err_alloc_path; + path->ibdev = ibdev; + memcpy ( &path->av, av, sizeof ( path->av ) ); + path->op = op; + + /* Construct path request */ + memset ( sa, 0, sizeof ( *sa ) ); + sa->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; + sa->mad_hdr.class_version = IB_SA_CLASS_VERSION; + sa->mad_hdr.method = IB_MGMT_METHOD_GET; + sa->mad_hdr.attr_id = htons ( IB_SA_ATTR_PATH_REC ); + sa->sa_hdr.comp_mask[1] = + htonl ( IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID ); + memcpy ( &sa->sa_data.path_record.dgid, &path->av.gid, + sizeof ( sa->sa_data.path_record.dgid ) ); + memcpy ( &sa->sa_data.path_record.sgid, &ibdev->gid, + sizeof ( sa->sa_data.path_record.sgid ) ); + + /* Create management transaction */ + path->madx = ib_create_madx ( ibdev, ibdev->gsi, &mad, NULL, + &ib_path_op ); + if ( ! path->madx ) + goto err_create_madx; + ib_madx_set_ownerdata ( path->madx, path ); + + return path; + + ib_destroy_madx ( ibdev, ibdev->gsi, path->madx ); + err_create_madx: + free ( path ); + err_alloc_path: + return NULL; +} + +/** + * Destroy path + * + * @v ibdev Infiniband device + * @v path Path + */ +void ib_destroy_path ( struct ib_device *ibdev, struct ib_path *path ) { + + if ( path->madx ) + ib_destroy_madx ( ibdev, ibdev->gsi, path->madx ); + free ( path ); +} + +/** Number of path cache entries + * + * Must be a power of two. + */ +#define IB_NUM_CACHED_PATHS 4 + +/** A cached path */ +struct ib_cached_path { + /** Path */ + struct ib_path *path; +}; + +/** Path cache */ +static struct ib_cached_path ib_path_cache[IB_NUM_CACHED_PATHS]; + +/** Oldest path cache entry index */ +static unsigned int ib_path_cache_idx; + +/** + * Find path cache entry + * + * @v ibdev Infiniband device + * @v dgid Destination GID + * @ret path Path cache entry, or NULL + */ +static struct ib_cached_path * +ib_find_path_cache_entry ( struct ib_device *ibdev, union ib_gid *dgid ) { + struct ib_cached_path *cached; + unsigned int i; + + for ( i = 0 ; i < IB_NUM_CACHED_PATHS ; i++ ) { + cached = &ib_path_cache[i]; + if ( ! cached->path ) + continue; + if ( cached->path->ibdev != ibdev ) + continue; + if ( memcmp ( &cached->path->av.gid, dgid, + sizeof ( cached->path->av.gid ) ) != 0 ) + continue; + return cached; + } + + return NULL; +} + +/** + * Handle cached path transaction completion + * + * @v ibdev Infiniband device + * @v path Path + * @v rc Status code + * @v av Address vector, or NULL on error + */ +static void ib_cached_path_complete ( struct ib_device *ibdev, + struct ib_path *path, int rc, + struct ib_address_vector *av __unused ) { + struct ib_cached_path *cached = ib_path_get_ownerdata ( path ); + + /* If the transaction failed, erase the cache entry */ + if ( rc != 0 ) { + /* Destroy the old cache entry */ + ib_destroy_path ( ibdev, path ); + memset ( cached, 0, sizeof ( *cached ) ); + return; + } + + /* Do not destroy the completed transaction; we still need to + * refer to the resolved path. + */ +} + +/** Cached path transaction completion operations */ +static struct ib_path_operations ib_cached_path_op = { + .complete = ib_cached_path_complete, +}; + +/** + * Resolve path + * + * @v ibdev Infiniband device + * @v av Address vector to complete + * @ret rc Return status code + * + * This provides a non-transactional way to resolve a path, via a + * cache similar to ARP. + */ +int ib_resolve_path ( struct ib_device *ibdev, struct ib_address_vector *av ) { + union ib_gid *gid = &av->gid; + struct ib_cached_path *cached; + unsigned int cache_idx; + + /* Sanity check */ + if ( ! av->gid_present ) { + DBGC ( ibdev, "IBDEV %p attempt to look up path without GID\n", + ibdev ); + return -EINVAL; + } + + /* Look in cache for a matching entry */ + cached = ib_find_path_cache_entry ( ibdev, gid ); + if ( cached && cached->path->av.lid ) { + /* Populated entry found */ + av->lid = cached->path->av.lid; + av->rate = cached->path->av.rate; + av->sl = cached->path->av.sl; + DBGC2 ( ibdev, "IBDEV %p cache hit for " IB_GID_FMT "\n", + ibdev, IB_GID_ARGS ( gid ) ); + return 0; + } + DBGC ( ibdev, "IBDEV %p cache miss for " IB_GID_FMT "%s\n", ibdev, + IB_GID_ARGS ( gid ), ( cached ? " (in progress)" : "" ) ); + + /* If lookup is already in progress, do nothing */ + if ( cached ) + return -ENOENT; + + /* Locate a new cache entry to use */ + cache_idx = ( (ib_path_cache_idx++) % IB_NUM_CACHED_PATHS ); + cached = &ib_path_cache[cache_idx]; + + /* Destroy the old cache entry */ + if ( cached->path ) + ib_destroy_path ( ibdev, cached->path ); + memset ( cached, 0, sizeof ( *cached ) ); + + /* Create new path */ + cached->path = ib_create_path ( ibdev, av, &ib_cached_path_op ); + if ( ! cached->path ) { + DBGC ( ibdev, "IBDEV %p could not create path\n", + ibdev ); + return -ENOMEM; + } + ib_path_set_ownerdata ( cached->path, cached ); + + /* Not found yet */ + return -ENOENT; +} diff --git a/qemu/roms/ipxe/src/net/infiniband/ib_sma.c b/qemu/roms/ipxe/src/net/infiniband/ib_sma.c new file mode 100644 index 000000000..86553732a --- /dev/null +++ b/qemu/roms/ipxe/src/net/infiniband/ib_sma.c @@ -0,0 +1,371 @@ +/* + * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <stdio.h> +#include <unistd.h> +#include <byteswap.h> +#include <ipxe/settings.h> +#include <ipxe/infiniband.h> +#include <ipxe/iobuf.h> +#include <ipxe/ib_mi.h> +#include <ipxe/ib_sma.h> + +/** + * @file + * + * Infiniband Subnet Management Agent + * + */ + +/** + * Node information + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @v av Source address vector + */ +static void ib_sma_node_info ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad, + struct ib_address_vector *av ) { + struct ib_node_info *node_info = &mad->smp.smp_data.node_info; + int rc; + + /* Fill in information */ + memset ( node_info, 0, sizeof ( *node_info ) ); + node_info->base_version = IB_MGMT_BASE_VERSION; + node_info->class_version = IB_SMP_CLASS_VERSION; + node_info->node_type = IB_NODE_TYPE_HCA; + node_info->num_ports = ib_count_ports ( ibdev ); + memcpy ( &node_info->sys_guid, &ibdev->node_guid, + sizeof ( node_info->sys_guid ) ); + memcpy ( &node_info->node_guid, &ibdev->node_guid, + sizeof ( node_info->node_guid ) ); + memcpy ( &node_info->port_guid, &ibdev->gid.s.guid, + sizeof ( node_info->port_guid ) ); + node_info->partition_cap = htons ( 1 ); + node_info->local_port_num = ibdev->port; + + /* Send GetResponse */ + mad->hdr.method = IB_MGMT_METHOD_GET_RESP; + if ( ( rc = ib_mi_send ( ibdev, mi, mad, av ) ) != 0 ) { + DBGC ( mi, "SMA %p could not send NodeInfo GetResponse: %s\n", + mi, strerror ( rc ) ); + return; + } +} + +/** + * Node description + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @v av Source address vector + */ +static void ib_sma_node_desc ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad, + struct ib_address_vector *av ) { + struct ib_node_desc *node_desc = &mad->smp.smp_data.node_desc; + union ib_guid *guid = &ibdev->node_guid; + char hostname[ sizeof ( node_desc->node_string ) ]; + int hostname_len; + int rc; + + /* Fill in information */ + memset ( node_desc, 0, sizeof ( *node_desc ) ); + hostname_len = fetch_string_setting ( NULL, &hostname_setting, + hostname, sizeof ( hostname ) ); + snprintf ( node_desc->node_string, sizeof ( node_desc->node_string ), + "iPXE %s%s%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x (%s)", + hostname, ( ( hostname_len >= 0 ) ? " " : "" ), + guid->bytes[0], guid->bytes[1], guid->bytes[2], + guid->bytes[3], guid->bytes[4], guid->bytes[5], + guid->bytes[6], guid->bytes[7], ibdev->dev->name ); + + /* Send GetResponse */ + mad->hdr.method = IB_MGMT_METHOD_GET_RESP; + if ( ( rc = ib_mi_send ( ibdev, mi, mad, av ) ) != 0 ) { + DBGC ( mi, "SMA %p could not send NodeDesc GetResponse: %s\n", + mi, strerror ( rc ) ); + return; + } +} + +/** + * GUID information + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @v av Source address vector + */ +static void ib_sma_guid_info ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad, + struct ib_address_vector *av ) { + struct ib_guid_info *guid_info = &mad->smp.smp_data.guid_info; + int rc; + + /* Fill in information */ + memset ( guid_info, 0, sizeof ( *guid_info ) ); + memcpy ( guid_info->guid[0], &ibdev->gid.s.guid, + sizeof ( guid_info->guid[0] ) ); + + /* Send GetResponse */ + mad->hdr.method = IB_MGMT_METHOD_GET_RESP; + if ( ( rc = ib_mi_send ( ibdev, mi, mad, av ) ) != 0 ) { + DBGC ( mi, "SMA %p could not send GuidInfo GetResponse: %s\n", + mi, strerror ( rc ) ); + return; + } +} + +/** + * Set port information + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @ret rc Return status code + */ +static int ib_sma_set_port_info ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad ) { + const struct ib_port_info *port_info = &mad->smp.smp_data.port_info; + unsigned int link_width_enabled; + unsigned int link_speed_enabled; + int rc; + + /* Set parameters */ + memcpy ( &ibdev->gid.s.prefix, port_info->gid_prefix, + sizeof ( ibdev->gid.s.prefix ) ); + ibdev->lid = ntohs ( port_info->lid ); + ibdev->sm_lid = ntohs ( port_info->mastersm_lid ); + if ( ( link_width_enabled = port_info->link_width_enabled ) ) + ibdev->link_width_enabled = link_width_enabled; + if ( ( link_speed_enabled = + ( port_info->link_speed_active__link_speed_enabled & 0xf ) ) ) + ibdev->link_speed_enabled = link_speed_enabled; + ibdev->sm_sl = ( port_info->neighbour_mtu__mastersm_sl & 0xf ); + DBGC ( mi, "SMA %p set LID %04x SMLID %04x link width %02x speed " + "%02x\n", mi, ibdev->lid, ibdev->sm_lid, + ibdev->link_width_enabled, ibdev->link_speed_enabled ); + + /* Update parameters on device */ + if ( ( rc = ib_set_port_info ( ibdev, mad ) ) != 0 ) { + DBGC ( mi, "SMA %p could not set port information: %s\n", + mi, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Port information + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @v av Source address vector + */ +static void ib_sma_port_info ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad, + struct ib_address_vector *av ) { + struct ib_port_info *port_info = &mad->smp.smp_data.port_info; + int rc; + + /* Set parameters if applicable */ + if ( mad->hdr.method == IB_MGMT_METHOD_SET ) { + if ( ( rc = ib_sma_set_port_info ( ibdev, mi, mad ) ) != 0 ) { + mad->hdr.status = + htons ( IB_MGMT_STATUS_UNSUPPORTED_METHOD_ATTR ); + /* Fall through to generate GetResponse */ + } + } + + /* Fill in information */ + memset ( port_info, 0, sizeof ( *port_info ) ); + memcpy ( port_info->gid_prefix, &ibdev->gid.s.prefix, + sizeof ( port_info->gid_prefix ) ); + port_info->lid = ntohs ( ibdev->lid ); + port_info->mastersm_lid = ntohs ( ibdev->sm_lid ); + port_info->local_port_num = ibdev->port; + port_info->link_width_enabled = ibdev->link_width_enabled; + port_info->link_width_supported = ibdev->link_width_supported; + port_info->link_width_active = ibdev->link_width_active; + port_info->link_speed_supported__port_state = + ( ( ibdev->link_speed_supported << 4 ) | ibdev->port_state ); + port_info->port_phys_state__link_down_def_state = + ( ( IB_PORT_PHYS_STATE_POLLING << 4 ) | + IB_PORT_PHYS_STATE_POLLING ); + port_info->link_speed_active__link_speed_enabled = + ( ( ibdev->link_speed_active << 4 ) | + ibdev->link_speed_enabled ); + port_info->neighbour_mtu__mastersm_sl = + ( ( IB_MTU_2048 << 4 ) | ibdev->sm_sl ); + port_info->vl_cap__init_type = ( IB_VL_0 << 4 ); + port_info->init_type_reply__mtu_cap = IB_MTU_2048; + port_info->operational_vls__enforcement = ( IB_VL_0 << 4 ); + port_info->guid_cap = 1; + + /* Send GetResponse */ + mad->hdr.method = IB_MGMT_METHOD_GET_RESP; + if ( ( rc = ib_mi_send ( ibdev, mi, mad, av ) ) != 0 ) { + DBGC ( mi, "SMA %p could not send PortInfo GetResponse: %s\n", + mi, strerror ( rc ) ); + return; + } +} + +/** + * Set partition key table + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @ret rc Return status code + */ +static int ib_sma_set_pkey_table ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad ) { + struct ib_pkey_table *pkey_table = &mad->smp.smp_data.pkey_table; + int rc; + + /* Set parameters */ + ibdev->pkey = ntohs ( pkey_table->pkey[0] ); + DBGC ( mi, "SMA %p set pkey %04x\n", mi, ibdev->pkey ); + + /* Update parameters on device */ + if ( ( rc = ib_set_pkey_table ( ibdev, mad ) ) != 0 ) { + DBGC ( mi, "SMA %p could not set pkey table: %s\n", + mi, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Partition key table + * + * @v ibdev Infiniband device + * @v mi Management interface + * @v mad Received MAD + * @v av Source address vector + */ +static void ib_sma_pkey_table ( struct ib_device *ibdev, + struct ib_mad_interface *mi, + union ib_mad *mad, + struct ib_address_vector *av ) { + struct ib_pkey_table *pkey_table = &mad->smp.smp_data.pkey_table; + int rc; + + /* Set parameters, if applicable */ + if ( mad->hdr.method == IB_MGMT_METHOD_SET ) { + if ( ( rc = ib_sma_set_pkey_table ( ibdev, mi, mad ) ) != 0 ) { + mad->hdr.status = + htons ( IB_MGMT_STATUS_UNSUPPORTED_METHOD_ATTR ); + /* Fall through to generate GetResponse */ + } + } + + /* Fill in information */ + mad->hdr.method = IB_MGMT_METHOD_GET_RESP; + memset ( pkey_table, 0, sizeof ( *pkey_table ) ); + pkey_table->pkey[0] = htons ( ibdev->pkey ); + + /* Send GetResponse */ + mad->hdr.method = IB_MGMT_METHOD_GET_RESP; + if ( ( rc = ib_mi_send ( ibdev, mi, mad, av ) ) != 0 ) { + DBGC ( mi, "SMA %p could not send PKeyTable GetResponse: %s\n", + mi, strerror ( rc ) ); + return; + } +} + +/** Subnet management agent */ +struct ib_mad_agent ib_sma_agent[] __ib_mad_agent = { + { + .mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED, + .class_version = IB_SMP_CLASS_VERSION, + .attr_id = htons ( IB_SMP_ATTR_NODE_INFO ), + .handle = ib_sma_node_info, + }, + { + .mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED, + .class_version = IB_SMP_CLASS_VERSION, + .attr_id = htons ( IB_SMP_ATTR_NODE_DESC ), + .handle = ib_sma_node_desc, + }, + { + .mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED, + .class_version = IB_SMP_CLASS_VERSION, + .attr_id = htons ( IB_SMP_ATTR_GUID_INFO ), + .handle = ib_sma_guid_info, + }, + { + .mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED, + .class_version = IB_SMP_CLASS_VERSION, + .attr_id = htons ( IB_SMP_ATTR_PORT_INFO ), + .handle = ib_sma_port_info, + }, + { + .mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED, + .class_version = IB_SMP_CLASS_VERSION, + .attr_id = htons ( IB_SMP_ATTR_PKEY_TABLE ), + .handle = ib_sma_pkey_table, + }, +}; + +/** + * Create subnet management agent and interface + * + * @v ibdev Infiniband device + * @v mi Management interface + * @ret rc Return status code + */ +int ib_create_sma ( struct ib_device *ibdev, struct ib_mad_interface *mi ) { + + /* Nothing to do */ + DBGC ( ibdev, "IBDEV %p SMA using SMI %p\n", ibdev, mi ); + + return 0; +} + +/** + * Destroy subnet management agent and interface + * + * @v ibdev Infiniband device + * @v mi Management interface + */ +void ib_destroy_sma ( struct ib_device *ibdev __unused, + struct ib_mad_interface *mi __unused ) { + /* Nothing to do */ +} diff --git a/qemu/roms/ipxe/src/net/infiniband/ib_smc.c b/qemu/roms/ipxe/src/net/infiniband/ib_smc.c new file mode 100644 index 000000000..4d947d568 --- /dev/null +++ b/qemu/roms/ipxe/src/net/infiniband/ib_smc.c @@ -0,0 +1,256 @@ +/* + * Copyright (C) 2008 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <byteswap.h> +#include <ipxe/infiniband.h> +#include <ipxe/ib_smc.h> + +/** + * @file + * + * Infiniband Subnet Management Client + * + */ + +/** + * Issue local MAD + * + * @v ibdev Infiniband device + * @v attr_id Attribute ID, in network byte order + * @v attr_mod Attribute modifier, in network byte order + * @v local_mad Method for issuing local MADs + * @v mad Management datagram to fill in + * @ret rc Return status code + */ +static int ib_smc_mad ( struct ib_device *ibdev, uint16_t attr_id, + uint32_t attr_mod, ib_local_mad_t local_mad, + union ib_mad *mad ) { + int rc; + + /* Construct MAD */ + memset ( mad, 0, sizeof ( *mad ) ); + mad->hdr.base_version = IB_MGMT_BASE_VERSION; + mad->hdr.mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + mad->hdr.class_version = 1; + mad->hdr.method = IB_MGMT_METHOD_GET; + mad->hdr.attr_id = attr_id; + mad->hdr.attr_mod = attr_mod; + + /* Issue MAD */ + if ( ( rc = local_mad ( ibdev, mad ) ) != 0 ) + return rc; + + return 0; +} + +/** + * Get node information + * + * @v ibdev Infiniband device + * @v local_mad Method for issuing local MADs + * @v mad Management datagram to fill in + * @ret rc Return status code + */ +static int ib_smc_get_node_info ( struct ib_device *ibdev, + ib_local_mad_t local_mad, + union ib_mad *mad ) { + int rc; + + /* Issue MAD */ + if ( ( rc = ib_smc_mad ( ibdev, htons ( IB_SMP_ATTR_NODE_INFO ), 0, + local_mad, mad ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not get node info: %s\n", + ibdev, strerror ( rc ) ); + return rc; + } + return 0; +} + +/** + * Get port information + * + * @v ibdev Infiniband device + * @v local_mad Method for issuing local MADs + * @v mad Management datagram to fill in + * @ret rc Return status code + */ +static int ib_smc_get_port_info ( struct ib_device *ibdev, + ib_local_mad_t local_mad, + union ib_mad *mad ) { + int rc; + + /* Issue MAD */ + if ( ( rc = ib_smc_mad ( ibdev, htons ( IB_SMP_ATTR_PORT_INFO ), + htonl ( ibdev->port ), local_mad, mad )) !=0){ + DBGC ( ibdev, "IBDEV %p could not get port info: %s\n", + ibdev, strerror ( rc ) ); + return rc; + } + return 0; +} + +/** + * Get GUID information + * + * @v ibdev Infiniband device + * @v local_mad Method for issuing local MADs + * @v mad Management datagram to fill in + * @ret rc Return status code + */ +static int ib_smc_get_guid_info ( struct ib_device *ibdev, + ib_local_mad_t local_mad, + union ib_mad *mad ) { + int rc; + + /* Issue MAD */ + if ( ( rc = ib_smc_mad ( ibdev, htons ( IB_SMP_ATTR_GUID_INFO ), 0, + local_mad, mad ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not get GUID info: %s\n", + ibdev, strerror ( rc ) ); + return rc; + } + return 0; +} + +/** + * Get partition key table + * + * @v ibdev Infiniband device + * @v local_mad Method for issuing local MADs + * @v mad Management datagram to fill in + * @ret rc Return status code + */ +static int ib_smc_get_pkey_table ( struct ib_device *ibdev, + ib_local_mad_t local_mad, + union ib_mad *mad ) { + int rc; + + /* Issue MAD */ + if ( ( rc = ib_smc_mad ( ibdev, htons ( IB_SMP_ATTR_PKEY_TABLE ), 0, + local_mad, mad ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not get pkey table: %s\n", + ibdev, strerror ( rc ) ); + return rc; + } + return 0; +} + +/** + * Get Infiniband parameters using SMC + * + * @v ibdev Infiniband device + * @v local_mad Method for issuing local MADs + * @ret rc Return status code + */ +static int ib_smc_get ( struct ib_device *ibdev, ib_local_mad_t local_mad ) { + union ib_mad mad; + struct ib_node_info *node_info = &mad.smp.smp_data.node_info; + struct ib_port_info *port_info = &mad.smp.smp_data.port_info; + struct ib_guid_info *guid_info = &mad.smp.smp_data.guid_info; + struct ib_pkey_table *pkey_table = &mad.smp.smp_data.pkey_table; + int rc; + + /* Node info gives us the node GUID */ + if ( ( rc = ib_smc_get_node_info ( ibdev, local_mad, &mad ) ) != 0 ) + return rc; + memcpy ( &ibdev->node_guid, &node_info->node_guid, + sizeof ( ibdev->node_guid ) ); + + /* Port info gives us the link state, the first half of the + * port GID and the SM LID. + */ + if ( ( rc = ib_smc_get_port_info ( ibdev, local_mad, &mad ) ) != 0 ) + return rc; + memcpy ( &ibdev->gid.s.prefix, port_info->gid_prefix, + sizeof ( ibdev->gid.s.prefix ) ); + ibdev->lid = ntohs ( port_info->lid ); + ibdev->sm_lid = ntohs ( port_info->mastersm_lid ); + ibdev->link_width_enabled = port_info->link_width_enabled; + ibdev->link_width_supported = port_info->link_width_supported; + ibdev->link_width_active = port_info->link_width_active; + ibdev->link_speed_supported = + ( port_info->link_speed_supported__port_state >> 4 ); + ibdev->port_state = + ( port_info->link_speed_supported__port_state & 0xf ); + ibdev->link_speed_active = + ( port_info->link_speed_active__link_speed_enabled >> 4 ); + ibdev->link_speed_enabled = + ( port_info->link_speed_active__link_speed_enabled & 0xf ); + ibdev->sm_sl = ( port_info->neighbour_mtu__mastersm_sl & 0xf ); + + /* GUID info gives us the second half of the port GID */ + if ( ( rc = ib_smc_get_guid_info ( ibdev, local_mad, &mad ) ) != 0 ) + return rc; + memcpy ( &ibdev->gid.s.guid, guid_info->guid[0], + sizeof ( ibdev->gid.s.guid ) ); + + /* Get partition key */ + if ( ( rc = ib_smc_get_pkey_table ( ibdev, local_mad, &mad ) ) != 0 ) + return rc; + ibdev->pkey = ntohs ( pkey_table->pkey[0] ); + + DBGC ( ibdev, "IBDEV %p port GID is " IB_GID_FMT "\n", + ibdev, IB_GID_ARGS ( &ibdev->gid ) ); + + return 0; +} + +/** + * Initialise Infiniband parameters using SMC + * + * @v ibdev Infiniband device + * @v local_mad Method for issuing local MADs + * @ret rc Return status code + */ +int ib_smc_init ( struct ib_device *ibdev, ib_local_mad_t local_mad ) { + int rc; + + /* Get MAD parameters */ + if ( ( rc = ib_smc_get ( ibdev, local_mad ) ) != 0 ) + return rc; + + return 0; +} + +/** + * Update Infiniband parameters using SMC + * + * @v ibdev Infiniband device + * @v local_mad Method for issuing local MADs + * @ret rc Return status code + */ +int ib_smc_update ( struct ib_device *ibdev, ib_local_mad_t local_mad ) { + int rc; + + /* Get MAD parameters */ + if ( ( rc = ib_smc_get ( ibdev, local_mad ) ) != 0 ) + return rc; + + /* Notify Infiniband core of potential link state change */ + ib_link_state_changed ( ibdev ); + + return 0; +} diff --git a/qemu/roms/ipxe/src/net/infiniband/ib_srp.c b/qemu/roms/ipxe/src/net/infiniband/ib_srp.c new file mode 100644 index 000000000..7b2b2b4ea --- /dev/null +++ b/qemu/roms/ipxe/src/net/infiniband/ib_srp.c @@ -0,0 +1,581 @@ +/* + * Copyright (C) 2009 Fen Systems Ltd <mbrown@fensystems.co.uk>. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +FILE_LICENCE ( BSD2 ); + +#include <stdlib.h> +#include <errno.h> +#include <ipxe/interface.h> +#include <ipxe/uri.h> +#include <ipxe/open.h> +#include <ipxe/base16.h> +#include <ipxe/acpi.h> +#include <ipxe/srp.h> +#include <ipxe/infiniband.h> +#include <ipxe/ib_cmrc.h> +#include <ipxe/ib_srp.h> + +/** + * @file + * + * SCSI RDMA Protocol over Infiniband + * + */ + +/* Disambiguate the various possible EINVALs */ +#define EINVAL_BYTE_STRING_LEN __einfo_error ( EINFO_EINVAL_BYTE_STRING_LEN ) +#define EINFO_EINVAL_BYTE_STRING_LEN __einfo_uniqify \ + ( EINFO_EINVAL, 0x01, "Invalid byte string length" ) +#define EINVAL_INTEGER __einfo_error ( EINFO_EINVAL_INTEGER ) +#define EINFO_EINVAL_INTEGER __einfo_uniqify \ + ( EINFO_EINVAL, 0x03, "Invalid integer" ) +#define EINVAL_RP_TOO_SHORT __einfo_error ( EINFO_EINVAL_RP_TOO_SHORT ) +#define EINFO_EINVAL_RP_TOO_SHORT __einfo_uniqify \ + ( EINFO_EINVAL, 0x04, "Root path too short" ) + +/****************************************************************************** + * + * IB SRP devices + * + ****************************************************************************** + */ + +/** An Infiniband SRP device */ +struct ib_srp_device { + /** Reference count */ + struct refcnt refcnt; + + /** SRP transport interface */ + struct interface srp; + /** CMRC interface */ + struct interface cmrc; + + /** Infiniband device */ + struct ib_device *ibdev; + + /** Destination GID (for boot firmware table) */ + union ib_gid dgid; + /** Service ID (for boot firmware table) */ + union ib_guid service_id; +}; + +/** + * Free IB SRP device + * + * @v refcnt Reference count + */ +static void ib_srp_free ( struct refcnt *refcnt ) { + struct ib_srp_device *ib_srp = + container_of ( refcnt, struct ib_srp_device, refcnt ); + + ibdev_put ( ib_srp->ibdev ); + free ( ib_srp ); +} + +/** + * Close IB SRP device + * + * @v ib_srp IB SRP device + * @v rc Reason for close + */ +static void ib_srp_close ( struct ib_srp_device *ib_srp, int rc ) { + + /* Shut down interfaces */ + intf_shutdown ( &ib_srp->cmrc, rc ); + intf_shutdown ( &ib_srp->srp, rc ); +} + +/** + * Describe IB SRP device in an ACPI table + * + * @v srpdev SRP device + * @v acpi ACPI table + * @v len Length of ACPI table + * @ret rc Return status code + */ +static int ib_srp_describe ( struct ib_srp_device *ib_srp, + struct acpi_description_header *acpi, + size_t len ) { + struct ib_device *ibdev = ib_srp->ibdev; + struct sbft_table *sbft = + container_of ( acpi, struct sbft_table, acpi ); + struct sbft_ib_subtable *ib_sbft; + size_t used; + + /* Sanity check */ + if ( acpi->signature != SBFT_SIG ) + return -EINVAL; + + /* Append IB subtable to existing table */ + used = le32_to_cpu ( sbft->acpi.length ); + sbft->ib_offset = cpu_to_le16 ( used ); + ib_sbft = ( ( ( void * ) sbft ) + used ); + used += sizeof ( *ib_sbft ); + if ( used > len ) + return -ENOBUFS; + sbft->acpi.length = cpu_to_le32 ( used ); + + /* Populate subtable */ + memcpy ( &ib_sbft->sgid, &ibdev->gid, sizeof ( ib_sbft->sgid ) ); + memcpy ( &ib_sbft->dgid, &ib_srp->dgid, sizeof ( ib_sbft->dgid ) ); + memcpy ( &ib_sbft->service_id, &ib_srp->service_id, + sizeof ( ib_sbft->service_id ) ); + ib_sbft->pkey = cpu_to_le16 ( ibdev->pkey ); + + return 0; +} + +/** IB SRP CMRC interface operations */ +static struct interface_operation ib_srp_cmrc_op[] = { + INTF_OP ( intf_close, struct ib_srp_device *, ib_srp_close ), +}; + +/** IB SRP CMRC interface descriptor */ +static struct interface_descriptor ib_srp_cmrc_desc = + INTF_DESC_PASSTHRU ( struct ib_srp_device, cmrc, ib_srp_cmrc_op, srp ); + +/** IB SRP SRP interface operations */ +static struct interface_operation ib_srp_srp_op[] = { + INTF_OP ( acpi_describe, struct ib_srp_device *, ib_srp_describe ), + INTF_OP ( intf_close, struct ib_srp_device *, ib_srp_close ), +}; + +/** IB SRP SRP interface descriptor */ +static struct interface_descriptor ib_srp_srp_desc = + INTF_DESC_PASSTHRU ( struct ib_srp_device, srp, ib_srp_srp_op, cmrc ); + +/** + * Open IB SRP device + * + * @v block Block control interface + * @v ibdev Infiniband device + * @v dgid Destination GID + * @v service_id Service ID + * @v initiator Initiator port ID + * @v target Target port ID + * @v lun SCSI LUN + * @ret rc Return status code + */ +static int ib_srp_open ( struct interface *block, struct ib_device *ibdev, + union ib_gid *dgid, union ib_guid *service_id, + union srp_port_id *initiator, + union srp_port_id *target, struct scsi_lun *lun ) { + struct ib_srp_device *ib_srp; + int rc; + + /* Allocate and initialise structure */ + ib_srp = zalloc ( sizeof ( *ib_srp ) ); + if ( ! ib_srp ) { + rc = -ENOMEM; + goto err_zalloc; + } + ref_init ( &ib_srp->refcnt, ib_srp_free ); + intf_init ( &ib_srp->srp, &ib_srp_srp_desc, &ib_srp->refcnt ); + intf_init ( &ib_srp->cmrc, &ib_srp_cmrc_desc, &ib_srp->refcnt ); + ib_srp->ibdev = ibdev_get ( ibdev ); + DBGC ( ib_srp, "IBSRP %p for " IB_GID_FMT " " IB_GUID_FMT "\n", + ib_srp, IB_GID_ARGS ( dgid ), IB_GUID_ARGS ( service_id ) ); + + /* Preserve parameters required for boot firmware table */ + memcpy ( &ib_srp->dgid, dgid, sizeof ( ib_srp->dgid ) ); + memcpy ( &ib_srp->service_id, service_id, + sizeof ( ib_srp->service_id ) ); + + /* Open CMRC socket */ + if ( ( rc = ib_cmrc_open ( &ib_srp->cmrc, ibdev, dgid, + service_id ) ) != 0 ) { + DBGC ( ib_srp, "IBSRP %p could not open CMRC socket: %s\n", + ib_srp, strerror ( rc ) ); + goto err_cmrc_open; + } + + /* Attach SRP device to parent interface */ + if ( ( rc = srp_open ( block, &ib_srp->srp, initiator, target, + ibdev->rdma_key, lun ) ) != 0 ) { + DBGC ( ib_srp, "IBSRP %p could not create SRP device: %s\n", + ib_srp, strerror ( rc ) ); + goto err_srp_open; + } + + /* Mortalise self and return */ + ref_put ( &ib_srp->refcnt ); + return 0; + + err_srp_open: + err_cmrc_open: + ib_srp_close ( ib_srp, rc ); + ref_put ( &ib_srp->refcnt ); + err_zalloc: + return rc; +} + +/****************************************************************************** + * + * IB SRP URIs + * + ****************************************************************************** + */ + +/** IB SRP parse flags */ +enum ib_srp_parse_flags { + IB_SRP_PARSE_REQUIRED = 0x0000, + IB_SRP_PARSE_OPTIONAL = 0x8000, + IB_SRP_PARSE_FLAG_MASK = 0xf000, +}; + +/** IB SRP root path parameters */ +struct ib_srp_root_path { + /** Source GID */ + union ib_gid sgid; + /** Initiator port ID */ + union ib_srp_initiator_port_id initiator; + /** Destination GID */ + union ib_gid dgid; + /** Partition key */ + uint16_t pkey; + /** Service ID */ + union ib_guid service_id; + /** SCSI LUN */ + struct scsi_lun lun; + /** Target port ID */ + union ib_srp_target_port_id target; +}; + +/** + * Parse IB SRP root path byte-string value + * + * @v rp_comp Root path component string + * @v default_value Default value to use if component string is empty + * @ret value Value + */ +static int ib_srp_parse_byte_string ( const char *rp_comp, uint8_t *bytes, + unsigned int size_flags ) { + size_t size = ( size_flags & ~IB_SRP_PARSE_FLAG_MASK ); + size_t rp_comp_len = strlen ( rp_comp ); + int decoded_size; + + /* Allow optional components to be empty */ + if ( ( rp_comp_len == 0 ) && + ( size_flags & IB_SRP_PARSE_OPTIONAL ) ) + return 0; + + /* Check string length */ + if ( rp_comp_len != ( 2 * size ) ) + return -EINVAL_BYTE_STRING_LEN; + + /* Parse byte string */ + decoded_size = base16_decode ( rp_comp, bytes ); + if ( decoded_size < 0 ) + return decoded_size; + + return 0; +} + +/** + * Parse IB SRP root path integer value + * + * @v rp_comp Root path component string + * @v default_value Default value to use if component string is empty + * @ret value Value + */ +static int ib_srp_parse_integer ( const char *rp_comp, int default_value ) { + int value; + char *end; + + value = strtoul ( rp_comp, &end, 16 ); + if ( *end ) + return -EINVAL_INTEGER; + + if ( end == rp_comp ) + return default_value; + + return value; +} + +/** + * Parse IB SRP root path source GID + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_sgid ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + struct ib_device *ibdev; + + /* Default to the GID of the last opened Infiniband device */ + if ( ( ibdev = last_opened_ibdev() ) != NULL ) + memcpy ( &rp->sgid, &ibdev->gid, sizeof ( rp->sgid ) ); + + return ib_srp_parse_byte_string ( rp_comp, rp->sgid.bytes, + ( sizeof ( rp->sgid ) | + IB_SRP_PARSE_OPTIONAL ) ); +} + +/** + * Parse IB SRP root path initiator identifier extension + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_initiator_id_ext ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + union ib_srp_initiator_port_id *port_id = &rp->initiator; + + return ib_srp_parse_byte_string ( rp_comp, port_id->ib.id_ext.bytes, + ( sizeof ( port_id->ib.id_ext ) | + IB_SRP_PARSE_OPTIONAL ) ); +} + +/** + * Parse IB SRP root path initiator HCA GUID + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_initiator_hca_guid ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + union ib_srp_initiator_port_id *port_id = &rp->initiator; + + /* Default to the GUID portion of the source GID */ + memcpy ( &port_id->ib.hca_guid, &rp->sgid.s.guid, + sizeof ( port_id->ib.hca_guid ) ); + + return ib_srp_parse_byte_string ( rp_comp, port_id->ib.hca_guid.bytes, + ( sizeof ( port_id->ib.hca_guid ) | + IB_SRP_PARSE_OPTIONAL ) ); +} + +/** + * Parse IB SRP root path destination GID + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_dgid ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + return ib_srp_parse_byte_string ( rp_comp, rp->dgid.bytes, + ( sizeof ( rp->dgid ) | + IB_SRP_PARSE_REQUIRED ) ); +} + +/** + * Parse IB SRP root path partition key + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_pkey ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + int pkey; + + if ( ( pkey = ib_srp_parse_integer ( rp_comp, IB_PKEY_DEFAULT ) ) < 0 ) + return pkey; + rp->pkey = pkey; + return 0; +} + +/** + * Parse IB SRP root path service ID + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_service_id ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + return ib_srp_parse_byte_string ( rp_comp, rp->service_id.bytes, + ( sizeof ( rp->service_id ) | + IB_SRP_PARSE_REQUIRED ) ); +} + +/** + * Parse IB SRP root path LUN + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_lun ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + return scsi_parse_lun ( rp_comp, &rp->lun ); +} + +/** + * Parse IB SRP root path target identifier extension + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_target_id_ext ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + union ib_srp_target_port_id *port_id = &rp->target; + + return ib_srp_parse_byte_string ( rp_comp, port_id->ib.id_ext.bytes, + ( sizeof ( port_id->ib.id_ext ) | + IB_SRP_PARSE_REQUIRED ) ); +} + +/** + * Parse IB SRP root path target I/O controller GUID + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_target_ioc_guid ( const char *rp_comp, + struct ib_srp_root_path *rp ) { + union ib_srp_target_port_id *port_id = &rp->target; + + return ib_srp_parse_byte_string ( rp_comp, port_id->ib.ioc_guid.bytes, + ( sizeof ( port_id->ib.ioc_guid ) | + IB_SRP_PARSE_REQUIRED ) ); +} + +/** IB SRP root path component parser */ +struct ib_srp_root_path_parser { + /** + * Parse IB SRP root path component + * + * @v rp_comp Root path component string + * @v rp IB SRP root path + * @ret rc Return status code + */ + int ( * parse ) ( const char *rp_comp, struct ib_srp_root_path *rp ); +}; + +/** IB SRP root path components */ +static struct ib_srp_root_path_parser ib_srp_rp_parser[] = { + { ib_srp_parse_sgid }, + { ib_srp_parse_initiator_id_ext }, + { ib_srp_parse_initiator_hca_guid }, + { ib_srp_parse_dgid }, + { ib_srp_parse_pkey }, + { ib_srp_parse_service_id }, + { ib_srp_parse_lun }, + { ib_srp_parse_target_id_ext }, + { ib_srp_parse_target_ioc_guid }, +}; + +/** Number of IB SRP root path components */ +#define IB_SRP_NUM_RP_COMPONENTS \ + ( sizeof ( ib_srp_rp_parser ) / sizeof ( ib_srp_rp_parser[0] ) ) + +/** + * Parse IB SRP root path + * + * @v rp_string Root path string + * @v rp IB SRP root path + * @ret rc Return status code + */ +static int ib_srp_parse_root_path ( const char *rp_string, + struct ib_srp_root_path *rp ) { + struct ib_srp_root_path_parser *parser; + char rp_string_copy[ strlen ( rp_string ) + 1 ]; + char *rp_comp[IB_SRP_NUM_RP_COMPONENTS]; + char *rp_string_tmp = rp_string_copy; + unsigned int i = 0; + int rc; + + /* Split root path into component parts */ + strcpy ( rp_string_copy, rp_string ); + while ( 1 ) { + rp_comp[i++] = rp_string_tmp; + if ( i == IB_SRP_NUM_RP_COMPONENTS ) + break; + for ( ; *rp_string_tmp != ':' ; rp_string_tmp++ ) { + if ( ! *rp_string_tmp ) { + DBG ( "IBSRP root path \"%s\" too short\n", + rp_string ); + return -EINVAL_RP_TOO_SHORT; + } + } + *(rp_string_tmp++) = '\0'; + } + + /* Parse root path components */ + for ( i = 0 ; i < IB_SRP_NUM_RP_COMPONENTS ; i++ ) { + parser = &ib_srp_rp_parser[i]; + if ( ( rc = parser->parse ( rp_comp[i], rp ) ) != 0 ) { + DBG ( "IBSRP could not parse \"%s\" in root path " + "\"%s\": %s\n", rp_comp[i], rp_string, + strerror ( rc ) ); + return rc; + } + } + + return 0; +} + +/** + * Open IB SRP URI + * + * @v parent Parent interface + * @v uri URI + * @ret rc Return status code + */ +static int ib_srp_open_uri ( struct interface *parent, struct uri *uri ) { + struct ib_srp_root_path rp; + struct ib_device *ibdev; + int rc; + + /* Parse URI */ + if ( ! uri->opaque ) + return -EINVAL; + memset ( &rp, 0, sizeof ( rp ) ); + if ( ( rc = ib_srp_parse_root_path ( uri->opaque, &rp ) ) != 0 ) + return rc; + + /* Identify Infiniband device */ + ibdev = find_ibdev ( &rp.sgid ); + if ( ! ibdev ) { + DBG ( "IBSRP could not identify Infiniband device\n" ); + return -ENODEV; + } + + /* Open IB SRP device */ + if ( ( rc = ib_srp_open ( parent, ibdev, &rp.dgid, &rp.service_id, + &rp.initiator.srp, &rp.target.srp, + &rp.lun ) ) != 0 ) + return rc; + + return 0; +} + +/** IB SRP URI opener */ +struct uri_opener ib_srp_uri_opener __uri_opener = { + .scheme = "ib_srp", + .open = ib_srp_open_uri, +}; diff --git a/qemu/roms/ipxe/src/net/iobpad.c b/qemu/roms/ipxe/src/net/iobpad.c new file mode 100644 index 000000000..9cc8328e9 --- /dev/null +++ b/qemu/roms/ipxe/src/net/iobpad.c @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +/** + * @file + * + * I/O buffer padding + * + */ + +#include <string.h> +#include <ipxe/iobuf.h> + +/** + * Pad I/O buffer + * + * @v iobuf I/O buffer + * @v min_len Minimum length + * + * This function pads and aligns I/O buffers, for devices that + * aren't capable of padding in hardware, or that require specific + * alignment in TX buffers. The packet data will end up aligned to a + * multiple of @c IOB_ALIGN. + * + * @c min_len must not exceed @v IOB_ZLEN. + */ +void iob_pad ( struct io_buffer *iobuf, size_t min_len ) { + void *data; + size_t len; + size_t headroom; + signed int pad_len; + + assert ( min_len <= IOB_ZLEN ); + + /* Move packet data to start of I/O buffer. This will both + * align the data (since I/O buffers are aligned to + * IOB_ALIGN) and give us sufficient space for the + * zero-padding + */ + data = iobuf->data; + len = iob_len ( iobuf ); + headroom = iob_headroom ( iobuf ); + iob_push ( iobuf, headroom ); + memmove ( iobuf->data, data, len ); + iob_unput ( iobuf, headroom ); + + /* Pad to minimum packet length */ + pad_len = ( min_len - iob_len ( iobuf ) ); + if ( pad_len > 0 ) + memset ( iob_put ( iobuf, pad_len ), 0, pad_len ); +} diff --git a/qemu/roms/ipxe/src/net/ipv4.c b/qemu/roms/ipxe/src/net/ipv4.c new file mode 100644 index 000000000..9c5cf2eb4 --- /dev/null +++ b/qemu/roms/ipxe/src/net/ipv4.c @@ -0,0 +1,789 @@ +#include <string.h> +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <byteswap.h> +#include <ipxe/list.h> +#include <ipxe/in.h> +#include <ipxe/arp.h> +#include <ipxe/if_ether.h> +#include <ipxe/iobuf.h> +#include <ipxe/netdevice.h> +#include <ipxe/ip.h> +#include <ipxe/tcpip.h> +#include <ipxe/dhcp.h> +#include <ipxe/settings.h> +#include <ipxe/fragment.h> +#include <ipxe/ipstat.h> +#include <ipxe/profile.h> + +/** @file + * + * IPv4 protocol + * + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +/* Unique IP datagram identification number (high byte) */ +static uint8_t next_ident_high = 0; + +/** List of IPv4 miniroutes */ +struct list_head ipv4_miniroutes = LIST_HEAD_INIT ( ipv4_miniroutes ); + +/** IPv4 statistics */ +static struct ip_statistics ipv4_stats; + +/** IPv4 statistics family */ +struct ip_statistics_family +ipv4_stats_family __ip_statistics_family ( IP_STATISTICS_IPV4 ) = { + .version = 4, + .stats = &ipv4_stats, +}; + +/** Transmit profiler */ +static struct profiler ipv4_tx_profiler __profiler = { .name = "ipv4.tx" }; + +/** Receive profiler */ +static struct profiler ipv4_rx_profiler __profiler = { .name = "ipv4.rx" }; + +/** + * Add IPv4 minirouting table entry + * + * @v netdev Network device + * @v address IPv4 address + * @v netmask Subnet mask + * @v gateway Gateway address (if any) + * @ret miniroute Routing table entry, or NULL + */ +static struct ipv4_miniroute * __malloc +add_ipv4_miniroute ( struct net_device *netdev, struct in_addr address, + struct in_addr netmask, struct in_addr gateway ) { + struct ipv4_miniroute *miniroute; + + DBGC ( netdev, "IPv4 add %s", inet_ntoa ( address ) ); + DBGC ( netdev, "/%s ", inet_ntoa ( netmask ) ); + if ( gateway.s_addr ) + DBGC ( netdev, "gw %s ", inet_ntoa ( gateway ) ); + DBGC ( netdev, "via %s\n", netdev->name ); + + /* Allocate and populate miniroute structure */ + miniroute = malloc ( sizeof ( *miniroute ) ); + if ( ! miniroute ) { + DBGC ( netdev, "IPv4 could not add miniroute\n" ); + return NULL; + } + + /* Record routing information */ + miniroute->netdev = netdev_get ( netdev ); + miniroute->address = address; + miniroute->netmask = netmask; + miniroute->gateway = gateway; + + /* Add to end of list if we have a gateway, otherwise + * to start of list. + */ + if ( gateway.s_addr ) { + list_add_tail ( &miniroute->list, &ipv4_miniroutes ); + } else { + list_add ( &miniroute->list, &ipv4_miniroutes ); + } + + return miniroute; +} + +/** + * Delete IPv4 minirouting table entry + * + * @v miniroute Routing table entry + */ +static void del_ipv4_miniroute ( struct ipv4_miniroute *miniroute ) { + struct net_device *netdev = miniroute->netdev; + + DBGC ( netdev, "IPv4 del %s", inet_ntoa ( miniroute->address ) ); + DBGC ( netdev, "/%s ", inet_ntoa ( miniroute->netmask ) ); + if ( miniroute->gateway.s_addr ) + DBGC ( netdev, "gw %s ", inet_ntoa ( miniroute->gateway ) ); + DBGC ( netdev, "via %s\n", miniroute->netdev->name ); + + netdev_put ( miniroute->netdev ); + list_del ( &miniroute->list ); + free ( miniroute ); +} + +/** + * Perform IPv4 routing + * + * @v dest Final destination address + * @ret dest Next hop destination address + * @ret miniroute Routing table entry to use, or NULL if no route + * + * If the route requires use of a gateway, the next hop destination + * address will be overwritten with the gateway address. + */ +static struct ipv4_miniroute * ipv4_route ( struct in_addr *dest ) { + struct ipv4_miniroute *miniroute; + int local; + int has_gw; + + /* Find first usable route in routing table */ + list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) { + if ( ! netdev_is_open ( miniroute->netdev ) ) + continue; + local = ( ( ( dest->s_addr ^ miniroute->address.s_addr ) + & miniroute->netmask.s_addr ) == 0 ); + has_gw = ( miniroute->gateway.s_addr ); + if ( local || has_gw ) { + if ( ! local ) + *dest = miniroute->gateway; + return miniroute; + } + } + + return NULL; +} + +/** + * Determine transmitting network device + * + * @v st_dest Destination network-layer address + * @ret netdev Transmitting network device, or NULL + */ +static struct net_device * ipv4_netdev ( struct sockaddr_tcpip *st_dest ) { + struct sockaddr_in *sin_dest = ( ( struct sockaddr_in * ) st_dest ); + struct in_addr dest = sin_dest->sin_addr; + struct ipv4_miniroute *miniroute; + + /* Find routing table entry */ + miniroute = ipv4_route ( &dest ); + if ( ! miniroute ) + return NULL; + + return miniroute->netdev; +} + +/** + * Check if IPv4 fragment matches fragment reassembly buffer + * + * @v fragment Fragment reassembly buffer + * @v iobuf I/O buffer + * @v hdrlen Length of non-fragmentable potion of I/O buffer + * @ret is_fragment Fragment matches this reassembly buffer + */ +static int ipv4_is_fragment ( struct fragment *fragment, + struct io_buffer *iobuf, + size_t hdrlen __unused ) { + struct iphdr *frag_iphdr = fragment->iobuf->data; + struct iphdr *iphdr = iobuf->data; + + return ( ( iphdr->src.s_addr == frag_iphdr->src.s_addr ) && + ( iphdr->ident == frag_iphdr->ident ) ); +} + +/** + * Get IPv4 fragment offset + * + * @v iobuf I/O buffer + * @v hdrlen Length of non-fragmentable potion of I/O buffer + * @ret offset Offset + */ +static size_t ipv4_fragment_offset ( struct io_buffer *iobuf, + size_t hdrlen __unused ) { + struct iphdr *iphdr = iobuf->data; + + return ( ( ntohs ( iphdr->frags ) & IP_MASK_OFFSET ) << 3 ); +} + +/** + * Check if more fragments exist + * + * @v iobuf I/O buffer + * @v hdrlen Length of non-fragmentable potion of I/O buffer + * @ret more_frags More fragments exist + */ +static int ipv4_more_fragments ( struct io_buffer *iobuf, + size_t hdrlen __unused ) { + struct iphdr *iphdr = iobuf->data; + + return ( iphdr->frags & htons ( IP_MASK_MOREFRAGS ) ); +} + +/** IPv4 fragment reassembler */ +static struct fragment_reassembler ipv4_reassembler = { + .list = LIST_HEAD_INIT ( ipv4_reassembler.list ), + .is_fragment = ipv4_is_fragment, + .fragment_offset = ipv4_fragment_offset, + .more_fragments = ipv4_more_fragments, + .stats = &ipv4_stats, +}; + +/** + * Add IPv4 pseudo-header checksum to existing checksum + * + * @v iobuf I/O buffer + * @v csum Existing checksum + * @ret csum Updated checksum + */ +static uint16_t ipv4_pshdr_chksum ( struct io_buffer *iobuf, uint16_t csum ) { + struct ipv4_pseudo_header pshdr; + struct iphdr *iphdr = iobuf->data; + size_t hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 ); + + /* Build pseudo-header */ + pshdr.src = iphdr->src; + pshdr.dest = iphdr->dest; + pshdr.zero_padding = 0x00; + pshdr.protocol = iphdr->protocol; + pshdr.len = htons ( iob_len ( iobuf ) - hdrlen ); + + /* Update the checksum value */ + return tcpip_continue_chksum ( csum, &pshdr, sizeof ( pshdr ) ); +} + +/** + * Transmit IP packet + * + * @v iobuf I/O buffer + * @v tcpip Transport-layer protocol + * @v st_src Source network-layer address + * @v st_dest Destination network-layer address + * @v netdev Network device to use if no route found, or NULL + * @v trans_csum Transport-layer checksum to complete, or NULL + * @ret rc Status + * + * This function expects a transport-layer segment and prepends the IP header + */ +static int ipv4_tx ( struct io_buffer *iobuf, + struct tcpip_protocol *tcpip_protocol, + struct sockaddr_tcpip *st_src, + struct sockaddr_tcpip *st_dest, + struct net_device *netdev, + uint16_t *trans_csum ) { + struct iphdr *iphdr = iob_push ( iobuf, sizeof ( *iphdr ) ); + struct sockaddr_in *sin_src = ( ( struct sockaddr_in * ) st_src ); + struct sockaddr_in *sin_dest = ( ( struct sockaddr_in * ) st_dest ); + struct ipv4_miniroute *miniroute; + struct in_addr next_hop; + struct in_addr netmask = { .s_addr = 0 }; + uint8_t ll_dest_buf[MAX_LL_ADDR_LEN]; + const void *ll_dest; + int rc; + + /* Start profiling */ + profile_start ( &ipv4_tx_profiler ); + + /* Update statistics */ + ipv4_stats.out_requests++; + + /* Fill up the IP header, except source address */ + memset ( iphdr, 0, sizeof ( *iphdr ) ); + iphdr->verhdrlen = ( IP_VER | ( sizeof ( *iphdr ) / 4 ) ); + iphdr->service = IP_TOS; + iphdr->len = htons ( iob_len ( iobuf ) ); + iphdr->ttl = IP_TTL; + iphdr->protocol = tcpip_protocol->tcpip_proto; + iphdr->dest = sin_dest->sin_addr; + + /* Use routing table to identify next hop and transmitting netdev */ + next_hop = iphdr->dest; + if ( sin_src ) + iphdr->src = sin_src->sin_addr; + if ( ( next_hop.s_addr != INADDR_BROADCAST ) && + ( ! IN_MULTICAST ( ntohl ( next_hop.s_addr ) ) ) && + ( ( miniroute = ipv4_route ( &next_hop ) ) != NULL ) ) { + iphdr->src = miniroute->address; + netmask = miniroute->netmask; + netdev = miniroute->netdev; + } + if ( ! netdev ) { + DBGC ( sin_dest->sin_addr, "IPv4 has no route to %s\n", + inet_ntoa ( iphdr->dest ) ); + ipv4_stats.out_no_routes++; + rc = -ENETUNREACH; + goto err; + } + + /* (Ab)use the "ident" field to convey metadata about the + * network device statistics into packet traces. Useful for + * extracting debug information from non-debug builds. + */ + iphdr->ident = htons ( ( (++next_ident_high) << 8 ) | + ( ( netdev->rx_stats.bad & 0xf ) << 4 ) | + ( ( netdev->rx_stats.good & 0xf ) << 0 ) ); + + /* Fix up checksums */ + if ( trans_csum ) + *trans_csum = ipv4_pshdr_chksum ( iobuf, *trans_csum ); + iphdr->chksum = tcpip_chksum ( iphdr, sizeof ( *iphdr ) ); + + /* Print IP4 header for debugging */ + DBGC2 ( sin_dest->sin_addr, "IPv4 TX %s->", inet_ntoa ( iphdr->src ) ); + DBGC2 ( sin_dest->sin_addr, "%s len %d proto %d id %04x csum %04x\n", + inet_ntoa ( iphdr->dest ), ntohs ( iphdr->len ), + iphdr->protocol, ntohs ( iphdr->ident ), + ntohs ( iphdr->chksum ) ); + + /* Calculate link-layer destination address, if possible */ + if ( ( ( next_hop.s_addr ^ INADDR_BROADCAST ) & ~netmask.s_addr ) == 0){ + /* Broadcast address */ + ipv4_stats.out_bcast_pkts++; + ll_dest = netdev->ll_broadcast; + } else if ( IN_MULTICAST ( ntohl ( next_hop.s_addr ) ) ) { + /* Multicast address */ + ipv4_stats.out_mcast_pkts++; + if ( ( rc = netdev->ll_protocol->mc_hash ( AF_INET, &next_hop, + ll_dest_buf ) ) !=0){ + DBGC ( sin_dest->sin_addr, "IPv4 could not hash " + "multicast %s: %s\n", + inet_ntoa ( next_hop ), strerror ( rc ) ); + goto err; + } + ll_dest = ll_dest_buf; + } else { + /* Unicast address */ + ll_dest = NULL; + } + + /* Update statistics */ + ipv4_stats.out_transmits++; + ipv4_stats.out_octets += iob_len ( iobuf ); + + /* Hand off to link layer (via ARP if applicable) */ + if ( ll_dest ) { + if ( ( rc = net_tx ( iobuf, netdev, &ipv4_protocol, ll_dest, + netdev->ll_addr ) ) != 0 ) { + DBGC ( sin_dest->sin_addr, "IPv4 could not transmit " + "packet via %s: %s\n", + netdev->name, strerror ( rc ) ); + return rc; + } + } else { + if ( ( rc = arp_tx ( iobuf, netdev, &ipv4_protocol, &next_hop, + &iphdr->src, netdev->ll_addr ) ) != 0 ) { + DBGC ( sin_dest->sin_addr, "IPv4 could not transmit " + "packet via %s: %s\n", + netdev->name, strerror ( rc ) ); + return rc; + } + } + + profile_stop ( &ipv4_tx_profiler ); + return 0; + + err: + free_iob ( iobuf ); + return rc; +} + +/** + * Check if network device has any IPv4 address + * + * @v netdev Network device + * @ret has_any_addr Network device has any IPv4 address + */ +int ipv4_has_any_addr ( struct net_device *netdev ) { + struct ipv4_miniroute *miniroute; + + list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) { + if ( miniroute->netdev == netdev ) + return 1; + } + return 0; +} + +/** + * Check if network device has a specific IPv4 address + * + * @v netdev Network device + * @v addr IPv4 address + * @ret has_addr Network device has this IPv4 address + */ +static int ipv4_has_addr ( struct net_device *netdev, struct in_addr addr ) { + struct ipv4_miniroute *miniroute; + + list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) { + if ( ( miniroute->netdev == netdev ) && + ( miniroute->address.s_addr == addr.s_addr ) ) { + /* Found matching address */ + return 1; + } + } + return 0; +} + +/** + * Process incoming packets + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v ll_dest Link-layer destination address + * @v ll_source Link-layer destination source + * @v flags Packet flags + * @ret rc Return status code + * + * This function expects an IP4 network datagram. It processes the headers + * and sends it to the transport layer. + */ +static int ipv4_rx ( struct io_buffer *iobuf, + struct net_device *netdev, + const void *ll_dest __unused, + const void *ll_source __unused, + unsigned int flags ) { + struct iphdr *iphdr = iobuf->data; + size_t hdrlen; + size_t len; + union { + struct sockaddr_in sin; + struct sockaddr_tcpip st; + } src, dest; + uint16_t csum; + uint16_t pshdr_csum; + int rc; + + /* Start profiling */ + profile_start ( &ipv4_rx_profiler ); + + /* Update statistics */ + ipv4_stats.in_receives++; + ipv4_stats.in_octets += iob_len ( iobuf ); + if ( flags & LL_BROADCAST ) { + ipv4_stats.in_bcast_pkts++; + } else if ( flags & LL_MULTICAST ) { + ipv4_stats.in_mcast_pkts++; + } + + /* Sanity check the IPv4 header */ + if ( iob_len ( iobuf ) < sizeof ( *iphdr ) ) { + DBGC ( iphdr->src, "IPv4 packet too short at %zd bytes (min " + "%zd bytes)\n", iob_len ( iobuf ), sizeof ( *iphdr ) ); + goto err_header; + } + if ( ( iphdr->verhdrlen & IP_MASK_VER ) != IP_VER ) { + DBGC ( iphdr->src, "IPv4 version %#02x not supported\n", + iphdr->verhdrlen ); + goto err_header; + } + hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 ); + if ( hdrlen < sizeof ( *iphdr ) ) { + DBGC ( iphdr->src, "IPv4 header too short at %zd bytes (min " + "%zd bytes)\n", hdrlen, sizeof ( *iphdr ) ); + goto err_header; + } + if ( hdrlen > iob_len ( iobuf ) ) { + DBGC ( iphdr->src, "IPv4 header too long at %zd bytes " + "(packet is %zd bytes)\n", hdrlen, iob_len ( iobuf ) ); + goto err_header; + } + if ( ( csum = tcpip_chksum ( iphdr, hdrlen ) ) != 0 ) { + DBGC ( iphdr->src, "IPv4 checksum incorrect (is %04x " + "including checksum field, should be 0000)\n", csum ); + goto err_header; + } + len = ntohs ( iphdr->len ); + if ( len < hdrlen ) { + DBGC ( iphdr->src, "IPv4 length too short at %zd bytes " + "(header is %zd bytes)\n", len, hdrlen ); + goto err_header; + } + if ( len > iob_len ( iobuf ) ) { + DBGC ( iphdr->src, "IPv4 length too long at %zd bytes " + "(packet is %zd bytes)\n", len, iob_len ( iobuf ) ); + ipv4_stats.in_truncated_pkts++; + goto err_other; + } + + /* Truncate packet to correct length */ + iob_unput ( iobuf, ( iob_len ( iobuf ) - len ) ); + + /* Print IPv4 header for debugging */ + DBGC2 ( iphdr->src, "IPv4 RX %s<-", inet_ntoa ( iphdr->dest ) ); + DBGC2 ( iphdr->src, "%s len %d proto %d id %04x csum %04x\n", + inet_ntoa ( iphdr->src ), ntohs ( iphdr->len ), iphdr->protocol, + ntohs ( iphdr->ident ), ntohs ( iphdr->chksum ) ); + + /* Discard unicast packets not destined for us */ + if ( ( ! ( flags & LL_MULTICAST ) ) && + ipv4_has_any_addr ( netdev ) && + ( ! ipv4_has_addr ( netdev, iphdr->dest ) ) ) { + DBGC ( iphdr->src, "IPv4 discarding non-local unicast packet " + "for %s\n", inet_ntoa ( iphdr->dest ) ); + ipv4_stats.in_addr_errors++; + goto err_other; + } + + /* Perform fragment reassembly if applicable */ + if ( iphdr->frags & htons ( IP_MASK_OFFSET | IP_MASK_MOREFRAGS ) ) { + /* Pass the fragment to fragment_reassemble() which returns + * either a fully reassembled I/O buffer or NULL. + */ + iobuf = fragment_reassemble ( &ipv4_reassembler, iobuf, + &hdrlen ); + if ( ! iobuf ) + return 0; + iphdr = iobuf->data; + } + + /* Construct socket addresses, calculate pseudo-header + * checksum, and hand off to transport layer + */ + memset ( &src, 0, sizeof ( src ) ); + src.sin.sin_family = AF_INET; + src.sin.sin_addr = iphdr->src; + memset ( &dest, 0, sizeof ( dest ) ); + dest.sin.sin_family = AF_INET; + dest.sin.sin_addr = iphdr->dest; + pshdr_csum = ipv4_pshdr_chksum ( iobuf, TCPIP_EMPTY_CSUM ); + iob_pull ( iobuf, hdrlen ); + if ( ( rc = tcpip_rx ( iobuf, netdev, iphdr->protocol, &src.st, + &dest.st, pshdr_csum, &ipv4_stats ) ) != 0 ) { + DBGC ( src.sin.sin_addr, "IPv4 received packet rejected by " + "stack: %s\n", strerror ( rc ) ); + return rc; + } + + profile_stop ( &ipv4_rx_profiler ); + return 0; + + err_header: + ipv4_stats.in_hdr_errors++; + err_other: + free_iob ( iobuf ); + return -EINVAL; +} + +/** + * Check existence of IPv4 address for ARP + * + * @v netdev Network device + * @v net_addr Network-layer address + * @ret rc Return status code + */ +static int ipv4_arp_check ( struct net_device *netdev, const void *net_addr ) { + const struct in_addr *address = net_addr; + + if ( ipv4_has_addr ( netdev, *address ) ) + return 0; + + return -ENOENT; +} + +/** + * Convert IPv4 address to dotted-quad notation + * + * @v in IP address + * @ret string IP address in dotted-quad notation + */ +char * inet_ntoa ( struct in_addr in ) { + static char buf[16]; /* "xxx.xxx.xxx.xxx" */ + uint8_t *bytes = ( uint8_t * ) ∈ + + sprintf ( buf, "%d.%d.%d.%d", bytes[0], bytes[1], bytes[2], bytes[3] ); + return buf; +} + +/** + * Transcribe IP address + * + * @v net_addr IP address + * @ret string IP address in dotted-quad notation + * + */ +static const char * ipv4_ntoa ( const void *net_addr ) { + return inet_ntoa ( * ( ( struct in_addr * ) net_addr ) ); +} + +/** + * Transcribe IPv4 socket address + * + * @v sa Socket address + * @ret string Socket address in standard notation + */ +static const char * ipv4_sock_ntoa ( struct sockaddr *sa ) { + struct sockaddr_in *sin = ( ( struct sockaddr_in * ) sa ); + + return inet_ntoa ( sin->sin_addr ); +} + +/** + * Parse IPv4 socket address + * + * @v string Socket address string + * @v sa Socket address to fill in + * @ret rc Return status code + */ +static int ipv4_sock_aton ( const char *string, struct sockaddr *sa ) { + struct sockaddr_in *sin = ( ( struct sockaddr_in * ) sa ); + struct in_addr in; + + if ( inet_aton ( string, &in ) ) { + sin->sin_addr = in; + return 0; + } + return -EINVAL; +} + +/** IPv4 protocol */ +struct net_protocol ipv4_protocol __net_protocol = { + .name = "IP", + .net_proto = htons ( ETH_P_IP ), + .net_addr_len = sizeof ( struct in_addr ), + .rx = ipv4_rx, + .ntoa = ipv4_ntoa, +}; + +/** IPv4 TCPIP net protocol */ +struct tcpip_net_protocol ipv4_tcpip_protocol __tcpip_net_protocol = { + .name = "IPv4", + .sa_family = AF_INET, + .header_len = sizeof ( struct iphdr ), + .tx = ipv4_tx, + .netdev = ipv4_netdev, +}; + +/** IPv4 ARP protocol */ +struct arp_net_protocol ipv4_arp_protocol __arp_net_protocol = { + .net_protocol = &ipv4_protocol, + .check = ipv4_arp_check, +}; + +/** IPv4 socket address converter */ +struct sockaddr_converter ipv4_sockaddr_converter __sockaddr_converter = { + .family = AF_INET, + .ntoa = ipv4_sock_ntoa, + .aton = ipv4_sock_aton, +}; + +/****************************************************************************** + * + * Settings + * + ****************************************************************************** + */ + +/** + * Parse IPv4 address setting value + * + * @v type Setting type + * @v value Formatted setting value + * @v buf Buffer to contain raw value + * @v len Length of buffer + * @ret len Length of raw value, or negative error + */ +int parse_ipv4_setting ( const struct setting_type *type __unused, + const char *value, void *buf, size_t len ) { + struct in_addr ipv4; + + /* Parse IPv4 address */ + if ( inet_aton ( value, &ipv4 ) == 0 ) + return -EINVAL; + + /* Copy to buffer */ + if ( len > sizeof ( ipv4 ) ) + len = sizeof ( ipv4 ); + memcpy ( buf, &ipv4, len ); + + return ( sizeof ( ipv4 ) ); +} + +/** + * Format IPv4 address setting value + * + * @v type Setting type + * @v raw Raw setting value + * @v raw_len Length of raw setting value + * @v buf Buffer to contain formatted value + * @v len Length of buffer + * @ret len Length of formatted value, or negative error + */ +int format_ipv4_setting ( const struct setting_type *type __unused, + const void *raw, size_t raw_len, char *buf, + size_t len ) { + const struct in_addr *ipv4 = raw; + + if ( raw_len < sizeof ( *ipv4 ) ) + return -EINVAL; + return snprintf ( buf, len, "%s", inet_ntoa ( *ipv4 ) ); +} + +/** IPv4 address setting */ +const struct setting ip_setting __setting ( SETTING_IP, ip ) = { + .name = "ip", + .description = "IP address", + .tag = DHCP_EB_YIADDR, + .type = &setting_type_ipv4, +}; + +/** IPv4 subnet mask setting */ +const struct setting netmask_setting __setting ( SETTING_IP, netmask ) = { + .name = "netmask", + .description = "Subnet mask", + .tag = DHCP_SUBNET_MASK, + .type = &setting_type_ipv4, +}; + +/** Default gateway setting */ +const struct setting gateway_setting __setting ( SETTING_IP, gateway ) = { + .name = "gateway", + .description = "Default gateway", + .tag = DHCP_ROUTERS, + .type = &setting_type_ipv4, +}; + +/** + * Create IPv4 routing table based on configured settings + * + * @ret rc Return status code + */ +static int ipv4_create_routes ( void ) { + struct ipv4_miniroute *miniroute; + struct ipv4_miniroute *tmp; + struct net_device *netdev; + struct settings *settings; + struct in_addr address = { 0 }; + struct in_addr netmask = { 0 }; + struct in_addr gateway = { 0 }; + + /* Delete all existing routes */ + list_for_each_entry_safe ( miniroute, tmp, &ipv4_miniroutes, list ) + del_ipv4_miniroute ( miniroute ); + + /* Create a route for each configured network device */ + for_each_netdev ( netdev ) { + settings = netdev_settings ( netdev ); + /* Get IPv4 address */ + address.s_addr = 0; + fetch_ipv4_setting ( settings, &ip_setting, &address ); + if ( ! address.s_addr ) + continue; + /* Get subnet mask */ + fetch_ipv4_setting ( settings, &netmask_setting, &netmask ); + /* Calculate default netmask, if necessary */ + if ( ! netmask.s_addr ) { + if ( IN_CLASSA ( ntohl ( address.s_addr ) ) ) { + netmask.s_addr = htonl ( IN_CLASSA_NET ); + } else if ( IN_CLASSB ( ntohl ( address.s_addr ) ) ) { + netmask.s_addr = htonl ( IN_CLASSB_NET ); + } else if ( IN_CLASSC ( ntohl ( address.s_addr ) ) ) { + netmask.s_addr = htonl ( IN_CLASSC_NET ); + } + } + /* Get default gateway, if present */ + fetch_ipv4_setting ( settings, &gateway_setting, &gateway ); + /* Configure route */ + miniroute = add_ipv4_miniroute ( netdev, address, + netmask, gateway ); + if ( ! miniroute ) + return -ENOMEM; + } + + return 0; +} + +/** IPv4 settings applicator */ +struct settings_applicator ipv4_settings_applicator __settings_applicator = { + .apply = ipv4_create_routes, +}; + +/* Drag in ICMPv4 */ +REQUIRE_OBJECT ( icmpv4 ); diff --git a/qemu/roms/ipxe/src/net/ipv6.c b/qemu/roms/ipxe/src/net/ipv6.c new file mode 100644 index 000000000..3c374168c --- /dev/null +++ b/qemu/roms/ipxe/src/net/ipv6.c @@ -0,0 +1,1111 @@ +/* + * Copyright (C) 2013 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <assert.h> +#include <byteswap.h> +#include <ipxe/iobuf.h> +#include <ipxe/tcpip.h> +#include <ipxe/if_ether.h> +#include <ipxe/crc32.h> +#include <ipxe/fragment.h> +#include <ipxe/ipstat.h> +#include <ipxe/ndp.h> +#include <ipxe/ipv6.h> + +/** @file + * + * IPv6 protocol + * + */ + +/* Disambiguate the various error causes */ +#define EINVAL_LEN __einfo_error ( EINFO_EINVAL_LEN ) +#define EINFO_EINVAL_LEN \ + __einfo_uniqify ( EINFO_EINVAL, 0x01, "Invalid length" ) +#define ENOTSUP_VER __einfo_error ( EINFO_ENOTSUP_VER ) +#define EINFO_ENOTSUP_VER \ + __einfo_uniqify ( EINFO_ENOTSUP, 0x01, "Unsupported version" ) +#define ENOTSUP_HDR __einfo_error ( EINFO_ENOTSUP_HDR ) +#define EINFO_ENOTSUP_HDR \ + __einfo_uniqify ( EINFO_ENOTSUP, 0x02, "Unsupported header type" ) +#define ENOTSUP_OPT __einfo_error ( EINFO_ENOTSUP_OPT ) +#define EINFO_ENOTSUP_OPT \ + __einfo_uniqify ( EINFO_ENOTSUP, 0x03, "Unsupported option" ) + +/** List of IPv6 miniroutes */ +struct list_head ipv6_miniroutes = LIST_HEAD_INIT ( ipv6_miniroutes ); + +/** IPv6 statistics */ +static struct ip_statistics ipv6_stats; + +/** IPv6 statistics family */ +struct ip_statistics_family +ipv6_statistics_family __ip_statistics_family ( IP_STATISTICS_IPV6 ) = { + .version = 6, + .stats = &ipv6_stats, +}; + +/** + * Determine debugging colour for IPv6 debug messages + * + * @v in IPv6 address + * @ret col Debugging colour (for DBGC()) + */ +static uint32_t ipv6col ( struct in6_addr *in ) { + return crc32_le ( 0, in, sizeof ( *in ) ); +} + +/** + * Dump IPv6 routing table entry + * + * @v miniroute Routing table entry + */ +static inline __attribute__ (( always_inline )) void +ipv6_dump_miniroute ( struct ipv6_miniroute *miniroute ) { + struct net_device *netdev = miniroute->netdev; + + DBGC ( netdev, "IPv6 %s has %s %s/%d", netdev->name, + ( ( miniroute->flags & IPV6_HAS_ADDRESS ) ? + "address" : "prefix" ), + inet6_ntoa ( &miniroute->address ), miniroute->prefix_len ); + if ( miniroute->flags & IPV6_HAS_ROUTER ) + DBGC ( netdev, " router %s", inet6_ntoa ( &miniroute->router )); + DBGC ( netdev, "\n" ); +} + +/** + * Check if network device has a specific IPv6 address + * + * @v netdev Network device + * @v addr IPv6 address + * @ret has_addr Network device has this IPv6 address + */ +int ipv6_has_addr ( struct net_device *netdev, struct in6_addr *addr ) { + struct ipv6_miniroute *miniroute; + + list_for_each_entry ( miniroute, &ipv6_miniroutes, list ) { + if ( ( miniroute->netdev == netdev ) && + ( miniroute->flags & IPV6_HAS_ADDRESS ) && + ( memcmp ( &miniroute->address, addr, + sizeof ( miniroute->address ) ) == 0 ) ) { + /* Found matching address */ + return 1; + } + } + return 0; +} + +/** + * Check if IPv6 address is within a routing table entry's local network + * + * @v miniroute Routing table entry + * @v address IPv6 address + * @ret is_on_link Address is within this entry's local network + */ +static int ipv6_is_on_link ( struct ipv6_miniroute *miniroute, + struct in6_addr *address ) { + unsigned int i; + + for ( i = 0 ; i < ( sizeof ( address->s6_addr32 ) / + sizeof ( address->s6_addr32[0] ) ) ; i++ ) { + if ( (( address->s6_addr32[i] ^ miniroute->address.s6_addr32[i]) + & miniroute->prefix_mask.s6_addr32[i] ) != 0 ) + return 0; + } + return 1; +} + +/** + * Find IPv6 routing table entry for a given address + * + * @v netdev Network device + * @v address IPv6 address + * @ret miniroute Routing table entry, or NULL if not found + */ +static struct ipv6_miniroute * ipv6_miniroute ( struct net_device *netdev, + struct in6_addr *address ) { + struct ipv6_miniroute *miniroute; + + list_for_each_entry ( miniroute, &ipv6_miniroutes, list ) { + if ( ( miniroute->netdev == netdev ) && + ipv6_is_on_link ( miniroute, address ) ) { + return miniroute; + } + } + return NULL; +} + +/** + * Add IPv6 routing table entry + * + * @v netdev Network device + * @v address IPv6 address (or prefix) + * @v prefix_len Prefix length + * @v flags Flags + * @ret miniroute Routing table entry, or NULL on failure + */ +static struct ipv6_miniroute * ipv6_add_miniroute ( struct net_device *netdev, + struct in6_addr *address, + unsigned int prefix_len, + unsigned int flags ) { + struct ipv6_miniroute *miniroute; + uint8_t *prefix_mask; + + /* Create routing table entry */ + miniroute = zalloc ( sizeof ( *miniroute ) ); + if ( ! miniroute ) + return NULL; + miniroute->netdev = netdev_get ( netdev ); + memcpy ( &miniroute->address, address, sizeof ( miniroute->address ) ); + miniroute->prefix_len = prefix_len; + assert ( prefix_len <= ( 8 * sizeof ( miniroute->prefix_mask ) ) ); + for ( prefix_mask = miniroute->prefix_mask.s6_addr ; prefix_len >= 8 ; + prefix_mask++, prefix_len -= 8 ) { + *prefix_mask = 0xff; + } + if ( prefix_len ) + *prefix_mask <<= ( 8 - prefix_len ); + miniroute->flags = flags; + list_add ( &miniroute->list, &ipv6_miniroutes ); + ipv6_dump_miniroute ( miniroute ); + + return miniroute; +} + +/** + * Define IPv6 on-link prefix + * + * @v netdev Network device + * @v prefix IPv6 address prefix + * @v prefix_len Prefix length + * @v router Router address (or NULL) + * @ret rc Return status code + */ +int ipv6_set_prefix ( struct net_device *netdev, struct in6_addr *prefix, + unsigned int prefix_len, struct in6_addr *router ) { + struct ipv6_miniroute *miniroute; + int changed; + + /* Find or create routing table entry */ + miniroute = ipv6_miniroute ( netdev, prefix ); + if ( ! miniroute ) + miniroute = ipv6_add_miniroute ( netdev, prefix, prefix_len, 0); + if ( ! miniroute ) + return -ENOMEM; + + /* Record router and add to start or end of list as appropriate */ + list_del ( &miniroute->list ); + if ( router ) { + changed = ( ( ! ( miniroute->flags & IPV6_HAS_ROUTER ) ) || + ( memcmp ( &miniroute->router, router, + sizeof ( miniroute->router ) ) != 0 ) ); + miniroute->flags |= IPV6_HAS_ROUTER; + memcpy ( &miniroute->router, router, + sizeof ( miniroute->router ) ); + list_add_tail ( &miniroute->list, &ipv6_miniroutes ); + } else { + changed = ( miniroute->flags & IPV6_HAS_ROUTER ); + miniroute->flags &= ~IPV6_HAS_ROUTER; + list_add ( &miniroute->list, &ipv6_miniroutes ); + } + if ( changed ) + ipv6_dump_miniroute ( miniroute ); + + return 0; +} + +/** + * Add IPv6 on-link address + * + * @v netdev Network device + * @v address IPv6 address + * @ret rc Return status code + * + * An on-link prefix for the address must already exist. + */ +int ipv6_set_address ( struct net_device *netdev, struct in6_addr *address ) { + struct ipv6_miniroute *miniroute; + int changed; + + /* Find routing table entry */ + miniroute = ipv6_miniroute ( netdev, address ); + if ( ! miniroute ) + return -EADDRNOTAVAIL; + + /* Record address */ + changed = ( ( ! ( miniroute->flags & IPV6_HAS_ADDRESS ) ) || + ( memcmp ( &miniroute->address, address, + sizeof ( miniroute->address ) ) != 0 ) ); + memcpy ( &miniroute->address, address, sizeof ( miniroute->address ) ); + miniroute->flags |= IPV6_HAS_ADDRESS; + if ( changed ) + ipv6_dump_miniroute ( miniroute ); + + return 0; +} + +/** + * Perform IPv6 routing + * + * @v scope_id Destination address scope ID (for link-local addresses) + * @v dest Final destination address + * @ret dest Next hop destination address + * @ret miniroute Routing table entry to use, or NULL if no route + */ +static struct ipv6_miniroute * ipv6_route ( unsigned int scope_id, + struct in6_addr **dest ) { + struct ipv6_miniroute *miniroute; + + /* Find first usable route in routing table */ + list_for_each_entry ( miniroute, &ipv6_miniroutes, list ) { + + /* Skip closed network devices */ + if ( ! netdev_is_open ( miniroute->netdev ) ) + continue; + + /* Skip routing table entries with no usable source address */ + if ( ! ( miniroute->flags & IPV6_HAS_ADDRESS ) ) + continue; + + if ( IN6_IS_ADDR_LINKLOCAL ( *dest ) || + IN6_IS_ADDR_MULTICAST ( *dest ) ) { + + /* If destination is non-global, and the scope ID + * matches this network device, then use this route. + */ + if ( miniroute->netdev->index == scope_id ) + return miniroute; + + } else { + + /* If destination is an on-link global + * address, then use this route. + */ + if ( ipv6_is_on_link ( miniroute, *dest ) ) + return miniroute; + + /* If destination is an off-link global + * address, and we have a default gateway, + * then use this route. + */ + if ( miniroute->flags & IPV6_HAS_ROUTER ) { + *dest = &miniroute->router; + return miniroute; + } + } + } + + return NULL; +} + +/** + * Determine transmitting network device + * + * @v st_dest Destination network-layer address + * @ret netdev Transmitting network device, or NULL + */ +static struct net_device * ipv6_netdev ( struct sockaddr_tcpip *st_dest ) { + struct sockaddr_in6 *sin6_dest = ( ( struct sockaddr_in6 * ) st_dest ); + struct in6_addr *dest = &sin6_dest->sin6_addr; + struct ipv6_miniroute *miniroute; + + /* Find routing table entry */ + miniroute = ipv6_route ( sin6_dest->sin6_scope_id, &dest ); + if ( ! miniroute ) + return NULL; + + return miniroute->netdev; +} + +/** + * Check that received options can be safely ignored + * + * @v iphdr IPv6 header + * @v options Options extension header + * @v len Maximum length of header + * @ret rc Return status code + */ +static int ipv6_check_options ( struct ipv6_header *iphdr, + struct ipv6_options_header *options, + size_t len ) { + struct ipv6_option *option = options->options; + struct ipv6_option *end = ( ( ( void * ) options ) + len ); + + while ( option < end ) { + if ( ! IPV6_CAN_IGNORE_OPT ( option->type ) ) { + DBGC ( ipv6col ( &iphdr->src ), "IPv6 unrecognised " + "option type %#02x:\n", option->type ); + DBGC_HDA ( ipv6col ( &iphdr->src ), 0, + options, len ); + return -ENOTSUP_OPT; + } + if ( option->type == IPV6_OPT_PAD1 ) { + option = ( ( ( void * ) option ) + 1 ); + } else { + option = ( ( ( void * ) option->value ) + option->len ); + } + } + return 0; +} + +/** + * Check if fragment matches fragment reassembly buffer + * + * @v fragment Fragment reassembly buffer + * @v iobuf I/O buffer + * @v hdrlen Length of non-fragmentable potion of I/O buffer + * @ret is_fragment Fragment matches this reassembly buffer + */ +static int ipv6_is_fragment ( struct fragment *fragment, + struct io_buffer *iobuf, size_t hdrlen ) { + struct ipv6_header *frag_iphdr = fragment->iobuf->data; + struct ipv6_fragment_header *frag_fhdr = + ( fragment->iobuf->data + fragment->hdrlen - + sizeof ( *frag_fhdr ) ); + struct ipv6_header *iphdr = iobuf->data; + struct ipv6_fragment_header *fhdr = + ( iobuf->data + hdrlen - sizeof ( *fhdr ) ); + + return ( ( memcmp ( &iphdr->src, &frag_iphdr->src, + sizeof ( iphdr->src ) ) == 0 ) && + ( fhdr->ident == frag_fhdr->ident ) ); +} + +/** + * Get fragment offset + * + * @v iobuf I/O buffer + * @v hdrlen Length of non-fragmentable potion of I/O buffer + * @ret offset Offset + */ +static size_t ipv6_fragment_offset ( struct io_buffer *iobuf, size_t hdrlen ) { + struct ipv6_fragment_header *fhdr = + ( iobuf->data + hdrlen - sizeof ( *fhdr ) ); + + return ( ntohs ( fhdr->offset_more ) & IPV6_MASK_OFFSET ); +} + +/** + * Check if more fragments exist + * + * @v iobuf I/O buffer + * @v hdrlen Length of non-fragmentable potion of I/O buffer + * @ret more_frags More fragments exist + */ +static int ipv6_more_fragments ( struct io_buffer *iobuf, size_t hdrlen ) { + struct ipv6_fragment_header *fhdr = + ( iobuf->data + hdrlen - sizeof ( *fhdr ) ); + + return ( fhdr->offset_more & htons ( IPV6_MASK_MOREFRAGS ) ); +} + +/** Fragment reassembler */ +static struct fragment_reassembler ipv6_reassembler = { + .list = LIST_HEAD_INIT ( ipv6_reassembler.list ), + .is_fragment = ipv6_is_fragment, + .fragment_offset = ipv6_fragment_offset, + .more_fragments = ipv6_more_fragments, + .stats = &ipv6_stats, +}; + +/** + * Calculate IPv6 pseudo-header checksum + * + * @v iphdr IPv6 header + * @v len Payload length + * @v next_header Next header type + * @v csum Existing checksum + * @ret csum Updated checksum + */ +static uint16_t ipv6_pshdr_chksum ( struct ipv6_header *iphdr, size_t len, + int next_header, uint16_t csum ) { + struct ipv6_pseudo_header pshdr; + + /* Build pseudo-header */ + memcpy ( &pshdr.src, &iphdr->src, sizeof ( pshdr.src ) ); + memcpy ( &pshdr.dest, &iphdr->dest, sizeof ( pshdr.dest ) ); + pshdr.len = htonl ( len ); + memset ( pshdr.zero, 0, sizeof ( pshdr.zero ) ); + pshdr.next_header = next_header; + + /* Update the checksum value */ + return tcpip_continue_chksum ( csum, &pshdr, sizeof ( pshdr ) ); +} + +/** + * Transmit IPv6 packet + * + * @v iobuf I/O buffer + * @v tcpip Transport-layer protocol + * @v st_src Source network-layer address + * @v st_dest Destination network-layer address + * @v netdev Network device to use if no route found, or NULL + * @v trans_csum Transport-layer checksum to complete, or NULL + * @ret rc Status + * + * This function expects a transport-layer segment and prepends the + * IPv6 header + */ +static int ipv6_tx ( struct io_buffer *iobuf, + struct tcpip_protocol *tcpip_protocol, + struct sockaddr_tcpip *st_src, + struct sockaddr_tcpip *st_dest, + struct net_device *netdev, + uint16_t *trans_csum ) { + struct sockaddr_in6 *sin6_src = ( ( struct sockaddr_in6 * ) st_src ); + struct sockaddr_in6 *sin6_dest = ( ( struct sockaddr_in6 * ) st_dest ); + struct ipv6_miniroute *miniroute; + struct ipv6_header *iphdr; + struct in6_addr *src = NULL; + struct in6_addr *next_hop; + uint8_t ll_dest_buf[MAX_LL_ADDR_LEN]; + const void *ll_dest; + size_t len; + int rc; + + /* Update statistics */ + ipv6_stats.out_requests++; + + /* Fill up the IPv6 header, except source address */ + len = iob_len ( iobuf ); + iphdr = iob_push ( iobuf, sizeof ( *iphdr ) ); + memset ( iphdr, 0, sizeof ( *iphdr ) ); + iphdr->ver_tc_label = htonl ( IPV6_VER ); + iphdr->len = htons ( len ); + iphdr->next_header = tcpip_protocol->tcpip_proto; + iphdr->hop_limit = IPV6_HOP_LIMIT; + memcpy ( &iphdr->dest, &sin6_dest->sin6_addr, sizeof ( iphdr->dest ) ); + + /* Use routing table to identify next hop and transmitting netdev */ + next_hop = &iphdr->dest; + if ( ( miniroute = ipv6_route ( sin6_dest->sin6_scope_id, + &next_hop ) ) != NULL ) { + src = &miniroute->address; + netdev = miniroute->netdev; + } + if ( ! netdev ) { + DBGC ( ipv6col ( &iphdr->dest ), "IPv6 has no route to %s\n", + inet6_ntoa ( &iphdr->dest ) ); + ipv6_stats.out_no_routes++; + rc = -ENETUNREACH; + goto err; + } + if ( sin6_src && ! IN6_IS_ADDR_UNSPECIFIED ( &sin6_src->sin6_addr ) ) + src = &sin6_src->sin6_addr; + if ( src ) + memcpy ( &iphdr->src, src, sizeof ( iphdr->src ) ); + + /* Fix up checksums */ + if ( trans_csum ) { + *trans_csum = ipv6_pshdr_chksum ( iphdr, len, + tcpip_protocol->tcpip_proto, + *trans_csum ); + } + + /* Print IPv6 header for debugging */ + DBGC2 ( ipv6col ( &iphdr->dest ), "IPv6 TX %s->", + inet6_ntoa ( &iphdr->src ) ); + DBGC2 ( ipv6col ( &iphdr->dest ), "%s len %zd next %d\n", + inet6_ntoa ( &iphdr->dest ), len, iphdr->next_header ); + + /* Calculate link-layer destination address, if possible */ + if ( IN6_IS_ADDR_MULTICAST ( next_hop ) ) { + /* Multicast address */ + ipv6_stats.out_mcast_pkts++; + if ( ( rc = netdev->ll_protocol->mc_hash ( AF_INET6, next_hop, + ll_dest_buf ) ) !=0){ + DBGC ( ipv6col ( &iphdr->dest ), "IPv6 could not hash " + "multicast %s: %s\n", inet6_ntoa ( next_hop ), + strerror ( rc ) ); + goto err; + } + ll_dest = ll_dest_buf; + } else { + /* Unicast address */ + ll_dest = NULL; + } + + /* Update statistics */ + ipv6_stats.out_transmits++; + ipv6_stats.out_octets += iob_len ( iobuf ); + + /* Hand off to link layer (via NDP if applicable) */ + if ( ll_dest ) { + if ( ( rc = net_tx ( iobuf, netdev, &ipv6_protocol, ll_dest, + netdev->ll_addr ) ) != 0 ) { + DBGC ( ipv6col ( &iphdr->dest ), "IPv6 could not " + "transmit packet via %s: %s\n", + netdev->name, strerror ( rc ) ); + return rc; + } + } else { + if ( ( rc = ndp_tx ( iobuf, netdev, next_hop, &iphdr->src, + netdev->ll_addr ) ) != 0 ) { + DBGC ( ipv6col ( &iphdr->dest ), "IPv6 could not " + "transmit packet via %s: %s\n", + netdev->name, strerror ( rc ) ); + return rc; + } + } + + return 0; + + err: + free_iob ( iobuf ); + return rc; +} + +/** + * Process incoming IPv6 packets + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v ll_dest Link-layer destination address + * @v ll_source Link-layer destination source + * @v flags Packet flags + * @ret rc Return status code + * + * This function expects an IPv6 network datagram. It processes the + * headers and sends it to the transport layer. + */ +static int ipv6_rx ( struct io_buffer *iobuf, struct net_device *netdev, + const void *ll_dest __unused, + const void *ll_source __unused, + unsigned int flags __unused ) { + struct ipv6_header *iphdr = iobuf->data; + union ipv6_extension_header *ext; + union { + struct sockaddr_in6 sin6; + struct sockaddr_tcpip st; + } src, dest; + uint16_t pshdr_csum; + size_t len; + size_t hdrlen; + size_t extlen; + int this_header; + int next_header; + int rc; + + /* Update statistics */ + ipv6_stats.in_receives++; + ipv6_stats.in_octets += iob_len ( iobuf ); + if ( flags & LL_BROADCAST ) { + ipv6_stats.in_bcast_pkts++; + } else if ( flags & LL_MULTICAST ) { + ipv6_stats.in_mcast_pkts++; + } + + /* Sanity check the IPv6 header */ + if ( iob_len ( iobuf ) < sizeof ( *iphdr ) ) { + DBGC ( ipv6col ( &iphdr->src ), "IPv6 packet too short at %zd " + "bytes (min %zd bytes)\n", iob_len ( iobuf ), + sizeof ( *iphdr ) ); + rc = -EINVAL_LEN; + goto err_header; + } + if ( ( iphdr->ver_tc_label & htonl ( IPV6_MASK_VER ) ) != + htonl ( IPV6_VER ) ) { + DBGC ( ipv6col ( &iphdr->src ), "IPv6 version %#08x not " + "supported\n", ntohl ( iphdr->ver_tc_label ) ); + rc = -ENOTSUP_VER; + goto err_header; + } + + /* Truncate packet to specified length */ + len = ntohs ( iphdr->len ); + if ( len > iob_len ( iobuf ) ) { + DBGC ( ipv6col ( &iphdr->src ), "IPv6 length too long at %zd " + "bytes (packet is %zd bytes)\n", len, iob_len ( iobuf )); + ipv6_stats.in_truncated_pkts++; + rc = -EINVAL_LEN; + goto err_other; + } + iob_unput ( iobuf, ( iob_len ( iobuf ) - len - sizeof ( *iphdr ) ) ); + hdrlen = sizeof ( *iphdr ); + + /* Print IPv6 header for debugging */ + DBGC2 ( ipv6col ( &iphdr->src ), "IPv6 RX %s<-", + inet6_ntoa ( &iphdr->dest ) ); + DBGC2 ( ipv6col ( &iphdr->src ), "%s len %zd next %d\n", + inet6_ntoa ( &iphdr->src ), len, iphdr->next_header ); + + /* Discard unicast packets not destined for us */ + if ( ( ! ( flags & LL_MULTICAST ) ) && + ( ! ipv6_has_addr ( netdev, &iphdr->dest ) ) ) { + DBGC ( ipv6col ( &iphdr->src ), "IPv6 discarding non-local " + "unicast packet for %s\n", inet6_ntoa ( &iphdr->dest ) ); + ipv6_stats.in_addr_errors++; + rc = -EPIPE; + goto err_other; + } + + /* Process any extension headers */ + next_header = iphdr->next_header; + while ( 1 ) { + + /* Extract extension header */ + this_header = next_header; + ext = ( iobuf->data + hdrlen ); + extlen = sizeof ( ext->pad ); + if ( iob_len ( iobuf ) < ( hdrlen + extlen ) ) { + DBGC ( ipv6col ( &iphdr->src ), "IPv6 too short for " + "extension header type %d at %zd bytes (min " + "%zd bytes)\n", this_header, + ( iob_len ( iobuf ) - hdrlen ), extlen ); + rc = -EINVAL_LEN; + goto err_header; + } + + /* Determine size of extension header (if applicable) */ + if ( ( this_header == IPV6_HOPBYHOP ) || + ( this_header == IPV6_DESTINATION ) || + ( this_header == IPV6_ROUTING ) ) { + /* Length field is present */ + extlen += ext->common.len; + } else if ( this_header == IPV6_FRAGMENT ) { + /* Length field is reserved and ignored (RFC2460) */ + } else { + /* Not an extension header; assume rest is payload */ + break; + } + if ( iob_len ( iobuf ) < ( hdrlen + extlen ) ) { + DBGC ( ipv6col ( &iphdr->src ), "IPv6 too short for " + "extension header type %d at %zd bytes (min " + "%zd bytes)\n", this_header, + ( iob_len ( iobuf ) - hdrlen ), extlen ); + rc = -EINVAL_LEN; + goto err_header; + } + hdrlen += extlen; + next_header = ext->common.next_header; + DBGC2 ( ipv6col ( &iphdr->src ), "IPv6 RX %s<-", + inet6_ntoa ( &iphdr->dest ) ); + DBGC2 ( ipv6col ( &iphdr->src ), "%s ext type %d len %zd next " + "%d\n", inet6_ntoa ( &iphdr->src ), this_header, + extlen, next_header ); + + /* Process this extension header */ + if ( ( this_header == IPV6_HOPBYHOP ) || + ( this_header == IPV6_DESTINATION ) ) { + + /* Check that all options can be ignored */ + if ( ( rc = ipv6_check_options ( iphdr, &ext->options, + extlen ) ) != 0 ) + goto err_header; + + } else if ( this_header == IPV6_FRAGMENT ) { + + /* Reassemble fragments */ + iobuf = fragment_reassemble ( &ipv6_reassembler, iobuf, + &hdrlen ); + if ( ! iobuf ) + return 0; + iphdr = iobuf->data; + } + } + + /* Construct socket address, calculate pseudo-header checksum, + * and hand off to transport layer + */ + memset ( &src, 0, sizeof ( src ) ); + src.sin6.sin6_family = AF_INET6; + memcpy ( &src.sin6.sin6_addr, &iphdr->src, + sizeof ( src.sin6.sin6_addr ) ); + src.sin6.sin6_scope_id = netdev->index; + memset ( &dest, 0, sizeof ( dest ) ); + dest.sin6.sin6_family = AF_INET6; + memcpy ( &dest.sin6.sin6_addr, &iphdr->dest, + sizeof ( dest.sin6.sin6_addr ) ); + dest.sin6.sin6_scope_id = netdev->index; + iob_pull ( iobuf, hdrlen ); + pshdr_csum = ipv6_pshdr_chksum ( iphdr, iob_len ( iobuf ), + next_header, TCPIP_EMPTY_CSUM ); + if ( ( rc = tcpip_rx ( iobuf, netdev, next_header, &src.st, &dest.st, + pshdr_csum, &ipv6_stats ) ) != 0 ) { + DBGC ( ipv6col ( &src.sin6.sin6_addr ), "IPv6 received packet " + "rejected by stack: %s\n", strerror ( rc ) ); + return rc; + } + + return 0; + + err_header: + ipv6_stats.in_hdr_errors++; + err_other: + free_iob ( iobuf ); + return rc; +} + +/** + * Parse IPv6 address + * + * @v string IPv6 address string + * @ret in IPv6 address to fill in + * @ret rc Return status code + */ +int inet6_aton ( const char *string, struct in6_addr *in ) { + uint16_t *word = in->s6_addr16; + uint16_t *end = ( word + ( sizeof ( in->s6_addr16 ) / + sizeof ( in->s6_addr16[0] ) ) ); + uint16_t *pad = NULL; + const char *nptr = string; + char *endptr; + unsigned long value; + size_t pad_len; + size_t move_len; + + /* Parse string */ + while ( 1 ) { + + /* Parse current word */ + value = strtoul ( nptr, &endptr, 16 ); + if ( value > 0xffff ) { + DBG ( "IPv6 invalid word value %#lx in \"%s\"\n", + value, string ); + return -EINVAL; + } + *(word++) = htons ( value ); + + /* Parse separator */ + if ( ! *endptr ) + break; + if ( *endptr != ':' ) { + DBG ( "IPv6 invalid separator '%c' in \"%s\"\n", + *endptr, string ); + return -EINVAL; + } + if ( ( endptr == nptr ) && ( nptr != string ) ) { + if ( pad ) { + DBG ( "IPv6 invalid multiple \"::\" in " + "\"%s\"\n", string ); + return -EINVAL; + } + pad = word; + } + nptr = ( endptr + 1 ); + + /* Check for overrun */ + if ( word == end ) { + DBG ( "IPv6 too many words in \"%s\"\n", string ); + return -EINVAL; + } + } + + /* Insert padding if specified */ + if ( pad ) { + move_len = ( ( ( void * ) word ) - ( ( void * ) pad ) ); + pad_len = ( ( ( void * ) end ) - ( ( void * ) word ) ); + memmove ( ( ( ( void * ) pad ) + pad_len ), pad, move_len ); + memset ( pad, 0, pad_len ); + } else if ( word != end ) { + DBG ( "IPv6 underlength address \"%s\"\n", string ); + return -EINVAL; + } + + return 0; +} + +/** + * Convert IPv6 address to standard notation + * + * @v in IPv6 address + * @ret string IPv6 address string in canonical format + * + * RFC5952 defines the canonical format for IPv6 textual representation. + */ +char * inet6_ntoa ( const struct in6_addr *in ) { + static char buf[41]; /* ":xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx" */ + char *out = buf; + char *longest_start = NULL; + char *start = NULL; + int longest_len = 1; + int len = 0; + char *dest; + unsigned int i; + uint16_t value; + + /* Format address, keeping track of longest run of zeros */ + for ( i = 0 ; i < ( sizeof ( in->s6_addr16 ) / + sizeof ( in->s6_addr16[0] ) ) ; i++ ) { + value = ntohs ( in->s6_addr16[i] ); + if ( value == 0 ) { + if ( len++ == 0 ) + start = out; + if ( len > longest_len ) { + longest_start = start; + longest_len = len; + } + } else { + len = 0; + } + out += sprintf ( out, ":%x", value ); + } + + /* Abbreviate longest run of zeros, if applicable */ + if ( longest_start ) { + dest = strcpy ( ( longest_start + 1 ), + ( longest_start + ( 2 * longest_len ) ) ); + if ( dest[0] == '\0' ) + dest[1] = '\0'; + dest[0] = ':'; + } + return ( ( longest_start == buf ) ? buf : ( buf + 1 ) ); +} + +/** + * Transcribe IPv6 address + * + * @v net_addr IPv6 address + * @ret string IPv6 address in standard notation + * + */ +static const char * ipv6_ntoa ( const void *net_addr ) { + return inet6_ntoa ( net_addr ); +} + +/** + * Transcribe IPv6 socket address + * + * @v sa Socket address + * @ret string Socket address in standard notation + */ +static const char * ipv6_sock_ntoa ( struct sockaddr *sa ) { + static char buf[ 39 /* "xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx" */ + + 1 /* "%" */ + NETDEV_NAME_LEN + 1 /* NUL */ ]; + struct sockaddr_in6 *sin6 = ( ( struct sockaddr_in6 * ) sa ); + struct in6_addr *in = &sin6->sin6_addr; + struct net_device *netdev; + const char *netdev_name; + + /* Identify network device, if applicable */ + if ( IN6_IS_ADDR_LINKLOCAL ( in ) || IN6_IS_ADDR_MULTICAST ( in ) ) { + netdev = find_netdev_by_index ( sin6->sin6_scope_id ); + netdev_name = ( netdev ? netdev->name : "UNKNOWN" ); + } else { + netdev_name = NULL; + } + + /* Format socket address */ + snprintf ( buf, sizeof ( buf ), "%s%s%s", inet6_ntoa ( in ), + ( netdev_name ? "%" : "" ), + ( netdev_name ? netdev_name : "" ) ); + return buf; +} + +/** + * Parse IPv6 socket address + * + * @v string Socket address string + * @v sa Socket address to fill in + * @ret rc Return status code + */ +static int ipv6_sock_aton ( const char *string, struct sockaddr *sa ) { + struct sockaddr_in6 *sin6 = ( ( struct sockaddr_in6 * ) sa ); + struct in6_addr in; + struct net_device *netdev; + size_t len; + char *tmp; + char *in_string; + char *netdev_string; + int rc; + + /* Create modifiable copy of string */ + tmp = strdup ( string ); + if ( ! tmp ) { + rc = -ENOMEM; + goto err_alloc; + } + in_string = tmp; + + /* Strip surrounding "[...]", if present */ + len = strlen ( in_string ); + if ( ( in_string[0] == '[' ) && ( in_string[ len - 1 ] == ']' ) ) { + in_string[ len - 1 ] = '\0'; + in_string++; + } + + /* Split at network device name, if present */ + netdev_string = strchr ( in_string, '%' ); + if ( netdev_string ) + *(netdev_string++) = '\0'; + + /* Parse IPv6 address portion */ + if ( ( rc = inet6_aton ( in_string, &in ) ) != 0 ) + goto err_inet6_aton; + + /* Parse network device name, if present */ + if ( netdev_string ) { + netdev = find_netdev ( netdev_string ); + if ( ! netdev ) { + rc = -ENODEV; + goto err_find_netdev; + } + sin6->sin6_scope_id = netdev->index; + } + + /* Copy IPv6 address portion to socket address */ + memcpy ( &sin6->sin6_addr, &in, sizeof ( sin6->sin6_addr ) ); + + err_find_netdev: + err_inet6_aton: + free ( tmp ); + err_alloc: + return rc; +} + +/** IPv6 protocol */ +struct net_protocol ipv6_protocol __net_protocol = { + .name = "IPv6", + .net_proto = htons ( ETH_P_IPV6 ), + .net_addr_len = sizeof ( struct in6_addr ), + .rx = ipv6_rx, + .ntoa = ipv6_ntoa, +}; + +/** IPv6 TCPIP net protocol */ +struct tcpip_net_protocol ipv6_tcpip_protocol __tcpip_net_protocol = { + .name = "IPv6", + .sa_family = AF_INET6, + .header_len = sizeof ( struct ipv6_header ), + .tx = ipv6_tx, + .netdev = ipv6_netdev, +}; + +/** IPv6 socket address converter */ +struct sockaddr_converter ipv6_sockaddr_converter __sockaddr_converter = { + .family = AF_INET6, + .ntoa = ipv6_sock_ntoa, + .aton = ipv6_sock_aton, +}; + +/** + * Parse IPv6 address setting value + * + * @v type Setting type + * @v value Formatted setting value + * @v buf Buffer to contain raw value + * @v len Length of buffer + * @ret len Length of raw value, or negative error + */ +int parse_ipv6_setting ( const struct setting_type *type __unused, + const char *value, void *buf, size_t len ) { + struct in6_addr ipv6; + int rc; + + /* Parse IPv6 address */ + if ( ( rc = inet6_aton ( value, &ipv6 ) ) != 0 ) + return rc; + + /* Copy to buffer */ + if ( len > sizeof ( ipv6 ) ) + len = sizeof ( ipv6 ); + memcpy ( buf, &ipv6, len ); + + return ( sizeof ( ipv6 ) ); +} + +/** + * Format IPv6 address setting value + * + * @v type Setting type + * @v raw Raw setting value + * @v raw_len Length of raw setting value + * @v buf Buffer to contain formatted value + * @v len Length of buffer + * @ret len Length of formatted value, or negative error + */ +int format_ipv6_setting ( const struct setting_type *type __unused, + const void *raw, size_t raw_len, char *buf, + size_t len ) { + const struct in6_addr *ipv6 = raw; + + if ( raw_len < sizeof ( *ipv6 ) ) + return -EINVAL; + return snprintf ( buf, len, "%s", inet6_ntoa ( ipv6 ) ); +} + +/** + * Create IPv6 network device + * + * @v netdev Network device + * @ret rc Return status code + */ +static int ipv6_probe ( struct net_device *netdev ) { + struct ipv6_miniroute *miniroute; + struct in6_addr address; + int prefix_len; + int rc; + + /* Construct link-local address from EUI-64 as per RFC 2464 */ + memset ( &address, 0, sizeof ( address ) ); + prefix_len = ipv6_link_local ( &address, netdev ); + if ( prefix_len < 0 ) { + rc = prefix_len; + DBGC ( netdev, "IPv6 %s could not construct link-local " + "address: %s\n", netdev->name, strerror ( rc ) ); + return rc; + } + + /* Create link-local address for this network device */ + miniroute = ipv6_add_miniroute ( netdev, &address, prefix_len, + IPV6_HAS_ADDRESS ); + if ( ! miniroute ) + return -ENOMEM; + + return 0; +} + +/** + * Destroy IPv6 network device + * + * @v netdev Network device + */ +static void ipv6_remove ( struct net_device *netdev ) { + struct ipv6_miniroute *miniroute; + struct ipv6_miniroute *tmp; + + /* Delete all miniroutes for this network device */ + list_for_each_entry_safe ( miniroute, tmp, &ipv6_miniroutes, list ) { + if ( miniroute->netdev == netdev ) { + netdev_put ( miniroute->netdev ); + list_del ( &miniroute->list ); + free ( miniroute ); + } + } +} + +/** IPv6 network device driver */ +struct net_driver ipv6_driver __net_driver = { + .name = "IPv6", + .probe = ipv6_probe, + .remove = ipv6_remove, +}; + +/* Drag in ICMPv6 */ +REQUIRE_OBJECT ( icmpv6 ); + +/* Drag in NDP */ +REQUIRE_OBJECT ( ndp ); diff --git a/qemu/roms/ipxe/src/net/ndp.c b/qemu/roms/ipxe/src/net/ndp.c new file mode 100644 index 000000000..e62f7d5cb --- /dev/null +++ b/qemu/roms/ipxe/src/net/ndp.c @@ -0,0 +1,1010 @@ +/* + * Copyright (C) 2013 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <byteswap.h> +#include <ipxe/in.h> +#include <ipxe/iobuf.h> +#include <ipxe/tcpip.h> +#include <ipxe/ipv6.h> +#include <ipxe/icmpv6.h> +#include <ipxe/neighbour.h> +#include <ipxe/dhcpv6.h> +#include <ipxe/ndp.h> + +/** @file + * + * IPv6 neighbour discovery protocol + * + */ + +static int +ipv6conf_rx_router_advertisement ( struct net_device *netdev, + struct ndp_router_advertisement_header *radv, + size_t len ); + +/** + * Transmit NDP packet with link-layer address option + * + * @v netdev Network device + * @v sin6_src Source socket address + * @v sin6_dest Destination socket address + * @v data NDP header + * @v len Size of NDP header + * @v option_type NDP option type + * @ret rc Return status code + */ +static int ndp_tx_ll_addr ( struct net_device *netdev, + struct sockaddr_in6 *sin6_src, + struct sockaddr_in6 *sin6_dest, + const void *data, size_t len, + unsigned int option_type ) { + struct sockaddr_tcpip *st_src = + ( ( struct sockaddr_tcpip * ) sin6_src ); + struct sockaddr_tcpip *st_dest = + ( ( struct sockaddr_tcpip * ) sin6_dest ); + struct ll_protocol *ll_protocol = netdev->ll_protocol; + struct io_buffer *iobuf; + struct ndp_ll_addr_option *ll_addr_opt; + union ndp_header *ndp; + size_t option_len; + int rc; + + /* Allocate and populate buffer */ + option_len = ( ( sizeof ( *ll_addr_opt ) + + ll_protocol->ll_addr_len + NDP_OPTION_BLKSZ - 1 ) & + ~( NDP_OPTION_BLKSZ - 1 ) ); + iobuf = alloc_iob ( MAX_LL_NET_HEADER_LEN + len + option_len ); + if ( ! iobuf ) + return -ENOMEM; + iob_reserve ( iobuf, MAX_LL_NET_HEADER_LEN ); + memcpy ( iob_put ( iobuf, len ), data, len ); + ll_addr_opt = iob_put ( iobuf, option_len ); + ll_addr_opt->header.type = option_type; + ll_addr_opt->header.blocks = ( option_len / NDP_OPTION_BLKSZ ); + memcpy ( ll_addr_opt->ll_addr, netdev->ll_addr, + ll_protocol->ll_addr_len ); + ndp = iobuf->data; + ndp->icmp.chksum = tcpip_chksum ( ndp, ( len + option_len ) ); + + /* Transmit packet */ + if ( ( rc = tcpip_tx ( iobuf, &icmpv6_protocol, st_src, st_dest, + netdev, &ndp->icmp.chksum ) ) != 0 ) { + DBGC ( netdev, "NDP %s could not transmit packet: %s\n", + netdev->name, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Transmit NDP neighbour discovery request + * + * @v netdev Network device + * @v net_protocol Network-layer protocol + * @v net_dest Destination network-layer address + * @v net_source Source network-layer address + * @ret rc Return status code + */ +static int ndp_tx_request ( struct net_device *netdev, + struct net_protocol *net_protocol __unused, + const void *net_dest, const void *net_source ) { + struct sockaddr_in6 sin6_src; + struct sockaddr_in6 sin6_dest; + struct ndp_neighbour_header neigh; + int rc; + + /* Construct source address */ + memset ( &sin6_src, 0, sizeof ( sin6_src ) ); + sin6_src.sin6_family = AF_INET6; + memcpy ( &sin6_src.sin6_addr, net_source, + sizeof ( sin6_src.sin6_addr ) ); + + /* Construct multicast destination address */ + memset ( &sin6_dest, 0, sizeof ( sin6_dest ) ); + sin6_dest.sin6_family = AF_INET6; + sin6_dest.sin6_scope_id = netdev->index; + ipv6_solicited_node ( &sin6_dest.sin6_addr, net_dest ); + + /* Construct neighbour header */ + memset ( &neigh, 0, sizeof ( neigh ) ); + neigh.icmp.type = ICMPV6_NEIGHBOUR_SOLICITATION; + memcpy ( &neigh.target, net_dest, sizeof ( neigh.target ) ); + + /* Transmit neighbour discovery packet */ + if ( ( rc = ndp_tx_ll_addr ( netdev, &sin6_src, &sin6_dest, &neigh, + sizeof ( neigh ), + NDP_OPT_LL_SOURCE ) ) != 0 ) + return rc; + + return 0; +} + +/** NDP neighbour discovery protocol */ +struct neighbour_discovery ndp_discovery = { + .name = "NDP", + .tx_request = ndp_tx_request, +}; + +/** + * Transmit NDP router solicitation + * + * @v netdev Network device + * @ret rc Return status code + */ +static int ndp_tx_router_solicitation ( struct net_device *netdev ) { + struct ndp_router_solicitation_header rsol; + struct sockaddr_in6 sin6_dest; + int rc; + + /* Construct multicast destination address */ + memset ( &sin6_dest, 0, sizeof ( sin6_dest ) ); + sin6_dest.sin6_family = AF_INET6; + sin6_dest.sin6_scope_id = netdev->index; + ipv6_all_routers ( &sin6_dest.sin6_addr ); + + /* Construct router solicitation */ + memset ( &rsol, 0, sizeof ( rsol ) ); + rsol.icmp.type = ICMPV6_ROUTER_SOLICITATION; + + /* Transmit packet */ + if ( ( rc = ndp_tx_ll_addr ( netdev, NULL, &sin6_dest, &rsol, + sizeof ( rsol ), NDP_OPT_LL_SOURCE ) ) !=0) + return rc; + + return 0; +} + +/** + * Process NDP neighbour solicitation source link-layer address option + * + * @v netdev Network device + * @v sin6_src Source socket address + * @v ndp NDP packet + * @v option NDP option + * @v len NDP option length + * @ret rc Return status code + */ +static int +ndp_rx_neighbour_solicitation_ll_source ( struct net_device *netdev, + struct sockaddr_in6 *sin6_src, + union ndp_header *ndp, + union ndp_option *option, + size_t len ) { + struct ndp_neighbour_header *neigh = &ndp->neigh; + struct ndp_ll_addr_option *ll_addr_opt = &option->ll_addr; + struct ll_protocol *ll_protocol = netdev->ll_protocol; + int rc; + + /* Silently ignore neighbour solicitations for addresses we do + * not own. + */ + if ( ! ipv6_has_addr ( netdev, &neigh->target ) ) + return 0; + + /* Sanity check */ + if ( offsetof ( typeof ( *ll_addr_opt ), + ll_addr[ll_protocol->ll_addr_len] ) > len ) { + DBGC ( netdev, "NDP %s neighbour solicitation link-layer " + "address option too short at %zd bytes\n", + netdev->name, len ); + return -EINVAL; + } + + /* Create or update neighbour cache entry */ + if ( ( rc = neighbour_define ( netdev, &ipv6_protocol, + &sin6_src->sin6_addr, + ll_addr_opt->ll_addr ) ) != 0 ) { + DBGC ( netdev, "NDP %s could not define %s => %s: %s\n", + netdev->name, inet6_ntoa ( &sin6_src->sin6_addr ), + ll_protocol->ntoa ( ll_addr_opt->ll_addr ), + strerror ( rc ) ); + return rc; + } + + /* Convert neighbour header to advertisement */ + memset ( neigh, 0, offsetof ( typeof ( *neigh ), target ) ); + neigh->icmp.type = ICMPV6_NEIGHBOUR_ADVERTISEMENT; + neigh->flags = ( NDP_NEIGHBOUR_SOLICITED | NDP_NEIGHBOUR_OVERRIDE ); + + /* Send neighbour advertisement */ + if ( ( rc = ndp_tx_ll_addr ( netdev, NULL, sin6_src, neigh, + sizeof ( *neigh ), + NDP_OPT_LL_TARGET ) ) != 0 ) + return rc; + + return 0; +} + +/** + * Process NDP neighbour advertisement target link-layer address option + * + * @v netdev Network device + * @v sin6_src Source socket address + * @v ndp NDP packet + * @v option NDP option + * @v len NDP option length + * @ret rc Return status code + */ +static int +ndp_rx_neighbour_advertisement_ll_target ( struct net_device *netdev, + struct sockaddr_in6 *sin6_src + __unused, + union ndp_header *ndp, + union ndp_option *option, + size_t len ) { + struct ndp_neighbour_header *neigh = &ndp->neigh; + struct ndp_ll_addr_option *ll_addr_opt = &option->ll_addr; + struct ll_protocol *ll_protocol = netdev->ll_protocol; + int rc; + + /* Sanity check */ + if ( offsetof ( typeof ( *ll_addr_opt ), + ll_addr[ll_protocol->ll_addr_len] ) > len ) { + DBGC ( netdev, "NDP %s neighbour advertisement link-layer " + "address option too short at %zd bytes\n", + netdev->name, len ); + return -EINVAL; + } + + /* Update neighbour cache entry, if any */ + if ( ( rc = neighbour_update ( netdev, &ipv6_protocol, &neigh->target, + ll_addr_opt->ll_addr ) ) != 0 ) { + DBGC ( netdev, "NDP %s could not update %s => %s: %s\n", + netdev->name, inet6_ntoa ( &neigh->target ), + ll_protocol->ntoa ( ll_addr_opt->ll_addr ), + strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Process NDP router advertisement source link-layer address option + * + * @v netdev Network device + * @v sin6_src Source socket address + * @v ndp NDP packet + * @v option NDP option + * @v len NDP option length + * @ret rc Return status code + */ +static int +ndp_rx_router_advertisement_ll_source ( struct net_device *netdev, + struct sockaddr_in6 *sin6_src, + union ndp_header *ndp __unused, + union ndp_option *option, size_t len ) { + struct ndp_ll_addr_option *ll_addr_opt = &option->ll_addr; + struct ll_protocol *ll_protocol = netdev->ll_protocol; + int rc; + + /* Sanity check */ + if ( offsetof ( typeof ( *ll_addr_opt ), + ll_addr[ll_protocol->ll_addr_len] ) > len ) { + DBGC ( netdev, "NDP %s router advertisement link-layer address " + "option too short at %zd bytes\n", netdev->name, len ); + return -EINVAL; + } + + /* Define neighbour cache entry */ + if ( ( rc = neighbour_define ( netdev, &ipv6_protocol, + &sin6_src->sin6_addr, + ll_addr_opt->ll_addr ) ) != 0 ) { + DBGC ( netdev, "NDP %s could not define %s => %s: %s\n", + netdev->name, inet6_ntoa ( &sin6_src->sin6_addr ), + ll_protocol->ntoa ( ll_addr_opt->ll_addr ), + strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Process NDP router advertisement prefix information option + * + * @v netdev Network device + * @v sin6_src Source socket address + * @v ndp NDP packet + * @v option NDP option + * @v len NDP option length + * @ret rc Return status code + */ +static int +ndp_rx_router_advertisement_prefix ( struct net_device *netdev, + struct sockaddr_in6 *sin6_src, + union ndp_header *ndp, + union ndp_option *option, size_t len ) { + struct ndp_router_advertisement_header *radv = &ndp->radv; + struct ndp_prefix_information_option *prefix_opt = &option->prefix; + struct in6_addr *router = &sin6_src->sin6_addr; + struct in6_addr address; + int prefix_len; + int rc; + + /* Sanity check */ + if ( sizeof ( *prefix_opt ) > len ) { + DBGC ( netdev, "NDP %s router advertisement prefix option too " + "short at %zd bytes\n", netdev->name, len ); + return -EINVAL; + } + DBGC ( netdev, "NDP %s found %sdefault router %s ", + netdev->name, ( radv->lifetime ? "" : "non-" ), + inet6_ntoa ( &sin6_src->sin6_addr ) ); + DBGC ( netdev, "for %s-link %sautonomous prefix %s/%d\n", + ( ( prefix_opt->flags & NDP_PREFIX_ON_LINK ) ? "on" : "off" ), + ( ( prefix_opt->flags & NDP_PREFIX_AUTONOMOUS ) ? "" : "non-" ), + inet6_ntoa ( &prefix_opt->prefix ), + prefix_opt->prefix_len ); + + /* Ignore off-link prefixes */ + if ( ! ( prefix_opt->flags & NDP_PREFIX_ON_LINK ) ) + return 0; + + /* Define prefix */ + if ( ( rc = ipv6_set_prefix ( netdev, &prefix_opt->prefix, + prefix_opt->prefix_len, + ( radv->lifetime ? + router : NULL ) ) ) != 0 ) { + DBGC ( netdev, "NDP %s could not define prefix %s/%d: %s\n", + netdev->name, inet6_ntoa ( &prefix_opt->prefix ), + prefix_opt->prefix_len, strerror ( rc ) ); + return rc; + } + + /* Perform stateless address autoconfiguration, if applicable */ + if ( prefix_opt->flags & NDP_PREFIX_AUTONOMOUS ) { + memcpy ( &address, &prefix_opt->prefix, sizeof ( address ) ); + prefix_len = ipv6_eui64 ( &address, netdev ); + if ( prefix_len < 0 ) { + rc = prefix_len; + DBGC ( netdev, "NDP %s could not construct SLAAC " + "address: %s\n", netdev->name, strerror ( rc ) ); + return rc; + } + if ( prefix_len != prefix_opt->prefix_len ) { + DBGC ( netdev, "NDP %s incorrect SLAAC prefix length " + "%d (expected %d)\n", netdev->name, + prefix_opt->prefix_len, prefix_len ); + return -EINVAL; + } + if ( ( rc = ipv6_set_address ( netdev, &address ) ) != 0 ) { + DBGC ( netdev, "NDP %s could not set address %s: %s\n", + netdev->name, inet6_ntoa ( &address ), + strerror ( rc ) ); + return rc; + } + } + + return 0; +} + +/** An NDP option handler */ +struct ndp_option_handler { + /** ICMPv6 type */ + uint8_t icmp_type; + /** Option type */ + uint8_t option_type; + /** + * Handle received option + * + * @v netdev Network device + * @v sin6_src Source socket address + * @v ndp NDP packet + * @v option NDP option + * @ret rc Return status code + */ + int ( * rx ) ( struct net_device *netdev, struct sockaddr_in6 *sin6_src, + union ndp_header *ndp, union ndp_option *option, + size_t len ); +}; + +/** NDP option handlers */ +static struct ndp_option_handler ndp_option_handlers[] = { + { + .icmp_type = ICMPV6_NEIGHBOUR_SOLICITATION, + .option_type = NDP_OPT_LL_SOURCE, + .rx = ndp_rx_neighbour_solicitation_ll_source, + }, + { + .icmp_type = ICMPV6_NEIGHBOUR_ADVERTISEMENT, + .option_type = NDP_OPT_LL_TARGET, + .rx = ndp_rx_neighbour_advertisement_ll_target, + }, + { + .icmp_type = ICMPV6_ROUTER_ADVERTISEMENT, + .option_type = NDP_OPT_LL_SOURCE, + .rx = ndp_rx_router_advertisement_ll_source, + }, + { + .icmp_type = ICMPV6_ROUTER_ADVERTISEMENT, + .option_type = NDP_OPT_PREFIX, + .rx = ndp_rx_router_advertisement_prefix, + }, +}; + +/** + * Process received NDP option + * + * @v netdev Network device + * @v sin6_src Source socket address + * @v ndp NDP packet + * @v option NDP option + * @v len Option length + * @ret rc Return status code + */ +static int ndp_rx_option ( struct net_device *netdev, + struct sockaddr_in6 *sin6_src, union ndp_header *ndp, + union ndp_option *option, size_t len ) { + struct ndp_option_handler *handler; + unsigned int i; + + /* Locate a suitable option handler, if any */ + for ( i = 0 ; i < ( sizeof ( ndp_option_handlers ) / + sizeof ( ndp_option_handlers[0] ) ) ; i++ ) { + handler = &ndp_option_handlers[i]; + if ( ( handler->icmp_type == ndp->icmp.type ) && + ( handler->option_type == option->header.type ) ) { + return handler->rx ( netdev, sin6_src, ndp, + option, len ); + } + } + + /* Silently ignore unknown options as per RFC 4861 */ + return 0; +} + +/** + * Process received NDP packet options + * + * @v netdev Network device + * @v sin6_src Source socket address + * @v ndp NDP header + * @v offset Offset to NDP options + * @v len Length of NDP packet + * @ret rc Return status code + */ +static int ndp_rx_options ( struct net_device *netdev, + struct sockaddr_in6 *sin6_src, + union ndp_header *ndp, size_t offset, size_t len ) { + union ndp_option *option; + size_t remaining; + size_t option_len; + int rc; + + /* Sanity check */ + if ( len < offset ) { + DBGC ( netdev, "NDP %s packet too short at %zd bytes (min %zd " + "bytes)\n", netdev->name, len, offset ); + return -EINVAL; + } + + /* Search for option */ + option = ( ( ( void * ) ndp ) + offset ); + remaining = ( len - offset ); + while ( remaining ) { + + /* Sanity check */ + if ( ( remaining < sizeof ( option->header ) ) || + ( option->header.blocks == 0 ) || + ( remaining < ( option->header.blocks * + NDP_OPTION_BLKSZ ) ) ) { + DBGC ( netdev, "NDP %s bad option length:\n", + netdev->name ); + DBGC_HDA ( netdev, 0, option, remaining ); + return -EINVAL; + } + option_len = ( option->header.blocks * NDP_OPTION_BLKSZ ); + + /* Handle option */ + if ( ( rc = ndp_rx_option ( netdev, sin6_src, ndp, option, + option_len ) ) != 0 ) + return rc; + + /* Move to next option */ + option = ( ( ( void * ) option ) + option_len ); + remaining -= option_len; + } + + return 0; +} + +/** + * Process received NDP neighbour solicitation or advertisement + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v sin6_src Source socket address + * @v sin6_dest Destination socket address + * @ret rc Return status code + */ +static int ndp_rx_neighbour ( struct io_buffer *iobuf, + struct net_device *netdev, + struct sockaddr_in6 *sin6_src, + struct sockaddr_in6 *sin6_dest __unused ) { + union ndp_header *ndp = iobuf->data; + struct ndp_neighbour_header *neigh = &ndp->neigh; + size_t len = iob_len ( iobuf ); + int rc; + + /* Process options */ + if ( ( rc = ndp_rx_options ( netdev, sin6_src, ndp, + offsetof ( typeof ( *neigh ), option ), + len ) ) != 0 ) + goto err_options; + + err_options: + free_iob ( iobuf ); + return rc; +} + +/** + * Process received NDP router advertisement + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v sin6_src Source socket address + * @v sin6_dest Destination socket address + * @ret rc Return status code + */ +static int +ndp_rx_router_advertisement ( struct io_buffer *iobuf, + struct net_device *netdev, + struct sockaddr_in6 *sin6_src, + struct sockaddr_in6 *sin6_dest __unused ) { + union ndp_header *ndp = iobuf->data; + struct ndp_router_advertisement_header *radv = &ndp->radv; + size_t len = iob_len ( iobuf ); + int rc; + + /* Process options */ + if ( ( rc = ndp_rx_options ( netdev, sin6_src, ndp, + offsetof ( typeof ( *radv ), option ), + len ) ) != 0 ) + goto err_options; + + /* Pass to IPv6 autoconfiguration */ + if ( ( rc = ipv6conf_rx_router_advertisement ( netdev, radv, + len ) ) != 0 ) + goto err_ipv6conf; + + err_ipv6conf: + err_options: + free_iob ( iobuf ); + return rc; +} + +/** NDP ICMPv6 handlers */ +struct icmpv6_handler ndp_handlers[] __icmpv6_handler = { + { + .type = ICMPV6_NEIGHBOUR_SOLICITATION, + .rx = ndp_rx_neighbour, + }, + { + .type = ICMPV6_NEIGHBOUR_ADVERTISEMENT, + .rx = ndp_rx_neighbour, + }, + { + .type = ICMPV6_ROUTER_ADVERTISEMENT, + .rx = ndp_rx_router_advertisement, + }, +}; + +/**************************************************************************** + * + * NDP settings + * + */ + +/** An NDP settings block */ +struct ndp_settings { + /** Reference counter */ + struct refcnt refcnt; + /** Settings interface */ + struct settings settings; + /** Length of NDP options */ + size_t len; + /** NDP options */ + union ndp_option option[0]; +}; + +/** NDP settings scope */ +static const struct settings_scope ndp_settings_scope; + +/** + * Construct NDP tag + * + * @v type NDP option type + * @v offset Starting offset of data + * @ret tag NDP tag + */ +#define NDP_TAG( type, offset ) ( ( (offset) << 8 ) | (type) ) + +/** + * Extract NDP tag type + * + * @v tag NDP tag + * @ret type NDP option type + */ +#define NDP_TAG_TYPE( tag ) ( (tag) & 0xff ) + +/** + * Extract NDP tag offset + * + * @v tag NDP tag + * @ret offset Starting offset of data + */ +#define NDP_TAG_OFFSET( tag ) ( (tag) >> 8 ) + +/** + * Check applicability of NDP setting + * + * @v settings Settings block + * @v setting Setting to fetch + * @ret applies Setting applies within this settings block + */ +static int ndp_applies ( struct settings *settings __unused, + const struct setting *setting ) { + + return ( setting->scope == &ndp_settings_scope ); +} + +/** + * Fetch value of NDP setting + * + * @v settings Settings block + * @v setting Setting to fetch + * @v data Buffer to fill with setting data + * @v len Length of buffer + * @ret len Length of setting data, or negative error + */ +static int ndp_fetch ( struct settings *settings, + struct setting *setting, + void *data, size_t len ) { + struct ndp_settings *ndpset = + container_of ( settings, struct ndp_settings, settings ); + struct net_device *netdev = + container_of ( settings->parent, struct net_device, + settings.settings ); + union ndp_option *option; + unsigned int type = NDP_TAG_TYPE ( setting->tag ); + unsigned int offset = NDP_TAG_OFFSET ( setting->tag ); + size_t remaining; + size_t option_len; + size_t payload_len; + + /* Scan through NDP options for requested type. We can assume + * that the options are well-formed, otherwise they would have + * been rejected prior to being stored. + */ + option = ndpset->option; + remaining = ndpset->len; + while ( remaining ) { + + /* Calculate option length */ + option_len = ( option->header.blocks * NDP_OPTION_BLKSZ ); + + /* If this is the requested option, return it */ + if ( option->header.type == type ) { + + /* Sanity check */ + if ( offset > option_len ) { + DBGC ( netdev, "NDP %s option %d too short\n", + netdev->name, type ); + return -EINVAL; + } + payload_len = ( option_len - offset ); + + /* Copy data to output buffer */ + if ( len > payload_len ) + len = payload_len; + memcpy ( data, ( ( ( void * ) option ) + offset ), len); + return payload_len; + } + + /* Move to next option */ + option = ( ( ( void * ) option ) + option_len ); + remaining -= option_len; + } + + return -ENOENT; +} + +/** NDP settings operations */ +static struct settings_operations ndp_settings_operations = { + .applies = ndp_applies, + .fetch = ndp_fetch, +}; + +/** + * Register NDP settings + * + * @v netdev Network device + * @v option NDP options + * @v len Length of options + * @ret rc Return status code + */ +static int ndp_register_settings ( struct net_device *netdev, + union ndp_option *option, size_t len ) { + struct settings *parent = netdev_settings ( netdev ); + struct ndp_settings *ndpset; + int rc; + + /* Allocate and initialise structure */ + ndpset = zalloc ( sizeof ( *ndpset ) + len ); + if ( ! ndpset ) { + rc = -ENOMEM; + goto err_alloc; + } + ref_init ( &ndpset->refcnt, NULL ); + settings_init ( &ndpset->settings, &ndp_settings_operations, + &ndpset->refcnt, &ndp_settings_scope ); + ndpset->len = len; + memcpy ( ndpset->option, option, len ); + + /* Register settings */ + if ( ( rc = register_settings ( &ndpset->settings, parent, + NDP_SETTINGS_NAME ) ) != 0 ) + goto err_register; + + err_register: + ref_put ( &ndpset->refcnt ); + err_alloc: + return rc; +} + +/** DNS server setting */ +const struct setting ndp_dns6_setting __setting ( SETTING_IP_EXTRA, dns6 ) = { + .name = "dns6", + .description = "DNS server", + .tag = NDP_TAG ( NDP_OPT_RDNSS, + offsetof ( struct ndp_rdnss_option, addresses ) ), + .type = &setting_type_ipv6, + .scope = &ndp_settings_scope, +}; + +/** DNS search list setting */ +const struct setting ndp_dnssl_setting __setting ( SETTING_IP_EXTRA, dnssl ) = { + .name = "dnssl", + .description = "DNS search list", + .tag = NDP_TAG ( NDP_OPT_DNSSL, + offsetof ( struct ndp_dnssl_option, names ) ), + .type = &setting_type_dnssl, + .scope = &ndp_settings_scope, +}; + +/**************************************************************************** + * + * IPv6 autoconfiguration + * + */ + +/** An IPv6 configurator */ +struct ipv6conf { + /** Reference count */ + struct refcnt refcnt; + /** List of configurators */ + struct list_head list; + + /** Job control interface */ + struct interface job; + /** DHCPv6 interface */ + struct interface dhcp; + + /** Network device being configured */ + struct net_device *netdev; + + /** Retransmission timer */ + struct retry_timer timer; +}; + +/** List of IPv6 configurators */ +static LIST_HEAD ( ipv6confs ); + +/** + * Free IPv6 configurator + * + * @v refcnt Reference count + */ +static void ipv6conf_free ( struct refcnt *refcnt ) { + struct ipv6conf *ipv6conf = + container_of ( refcnt, struct ipv6conf, refcnt ); + + netdev_put ( ipv6conf->netdev ); + free ( ipv6conf ); +} + +/** + * Identify IPv6 configurator by network device + * + * @v netdev Network device + * @ret ipv6 IPv6 configurator, or NULL + */ +static struct ipv6conf * ipv6conf_demux ( struct net_device *netdev ) { + struct ipv6conf *ipv6conf; + + list_for_each_entry ( ipv6conf, &ipv6confs, list ) { + if ( ipv6conf->netdev == netdev ) + return ipv6conf; + } + return NULL; +} + +/** + * Finish IPv6 autoconfiguration + * + * @v ipv6 IPv6 configurator + * @v rc Reason for finishing + */ +static void ipv6conf_done ( struct ipv6conf *ipv6conf, int rc ) { + + /* Shut down interfaces */ + intf_shutdown ( &ipv6conf->job, rc ); + intf_shutdown ( &ipv6conf->dhcp, rc ); + + /* Stop timer */ + stop_timer ( &ipv6conf->timer ); + + /* Remove from list and drop list's reference */ + list_del ( &ipv6conf->list ); + ref_put ( &ipv6conf->refcnt ); +} + +/** + * Handle IPv6 configurator timer expiry + * + * @v timer Retry timer + * @v fail Failure indicator + */ +static void ipv6conf_expired ( struct retry_timer *timer, int fail ) { + struct ipv6conf *ipv6conf = + container_of ( timer, struct ipv6conf, timer ); + + /* If we have failed, terminate autoconfiguration */ + if ( fail ) { + ipv6conf_done ( ipv6conf, -ETIMEDOUT ); + return; + } + + /* Otherwise, transmit router solicitation and restart timer */ + start_timer ( &ipv6conf->timer ); + ndp_tx_router_solicitation ( ipv6conf->netdev ); +} + +/** + * Handle router advertisement during IPv6 autoconfiguration + * + * @v netdev Network device + * @v radv Router advertisement + * @v len Length of router advertisement + * @ret rc Return status code + * + * This function assumes that the router advertisement is well-formed, + * since it must have already passed through option processing. + */ +static int +ipv6conf_rx_router_advertisement ( struct net_device *netdev, + struct ndp_router_advertisement_header *radv, + size_t len ) { + struct ipv6conf *ipv6conf; + size_t option_len; + int stateful; + int rc; + + /* Identify IPv6 configurator, if any */ + ipv6conf = ipv6conf_demux ( netdev ); + if ( ! ipv6conf ) { + /* Not an error; router advertisements are processed + * as a background activity even when no explicit + * autoconfiguration is taking place. + */ + return 0; + } + + /* If this is not the first solicited router advertisement, ignore it */ + if ( ! timer_running ( &ipv6conf->timer ) ) + return 0; + + /* Stop router solicitation timer */ + stop_timer ( &ipv6conf->timer ); + + /* Register NDP settings */ + option_len = ( len - offsetof ( typeof ( *radv ), option ) ); + if ( ( rc = ndp_register_settings ( netdev, radv->option, + option_len ) ) != 0 ) + return rc; + + /* Start DHCPv6 if required */ + if ( radv->flags & ( NDP_ROUTER_MANAGED | NDP_ROUTER_OTHER ) ) { + stateful = ( radv->flags & NDP_ROUTER_MANAGED ); + if ( ( rc = start_dhcpv6 ( &ipv6conf->dhcp, netdev, + stateful ) ) != 0 ) { + DBGC ( netdev, "NDP %s could not start state%s DHCPv6: " + "%s\n", netdev->name, + ( stateful ? "ful" : "less" ), strerror ( rc ) ); + ipv6conf_done ( ipv6conf, rc ); + return rc; + } + return 0; + } + + /* Otherwise, terminate autoconfiguration */ + ipv6conf_done ( ipv6conf, 0 ); + + return 0; +} + +/** IPv6 configurator job interface operations */ +static struct interface_operation ipv6conf_job_op[] = { + INTF_OP ( intf_close, struct ipv6conf *, ipv6conf_done ), +}; + +/** IPv6 configurator job interface descriptor */ +static struct interface_descriptor ipv6conf_job_desc = + INTF_DESC ( struct ipv6conf, job, ipv6conf_job_op ); + +/** IPv6 configurator DHCPv6 interface operations */ +static struct interface_operation ipv6conf_dhcp_op[] = { + INTF_OP ( intf_close, struct ipv6conf *, ipv6conf_done ), +}; + +/** IPv6 configurator DHCPv6 interface descriptor */ +static struct interface_descriptor ipv6conf_dhcp_desc = + INTF_DESC ( struct ipv6conf, dhcp, ipv6conf_dhcp_op ); + +/** + * Start IPv6 autoconfiguration + * + * @v job Job control interface + * @v netdev Network device + * @ret rc Return status code + */ +int start_ipv6conf ( struct interface *job, struct net_device *netdev ) { + struct ipv6conf *ipv6conf; + + /* Allocate and initialise structure */ + ipv6conf = zalloc ( sizeof ( *ipv6conf ) ); + if ( ! ipv6conf ) + return -ENOMEM; + ref_init ( &ipv6conf->refcnt, ipv6conf_free ); + intf_init ( &ipv6conf->job, &ipv6conf_job_desc, &ipv6conf->refcnt ); + intf_init ( &ipv6conf->dhcp, &ipv6conf_dhcp_desc, &ipv6conf->refcnt ); + timer_init ( &ipv6conf->timer, ipv6conf_expired, &ipv6conf->refcnt ); + ipv6conf->netdev = netdev_get ( netdev ); + + /* Start timer to initiate router solicitation */ + start_timer_nodelay ( &ipv6conf->timer ); + + /* Attach parent interface, transfer reference to list, and return */ + intf_plug_plug ( &ipv6conf->job, job ); + list_add ( &ipv6conf->list, &ipv6confs ); + return 0; +} + +/** IPv6 network device configurator */ +struct net_device_configurator ipv6_configurator __net_device_configurator = { + .name = "ipv6", + .start = start_ipv6conf, +}; diff --git a/qemu/roms/ipxe/src/net/neighbour.c b/qemu/roms/ipxe/src/net/neighbour.c new file mode 100644 index 000000000..e3026ce46 --- /dev/null +++ b/qemu/roms/ipxe/src/net/neighbour.c @@ -0,0 +1,428 @@ +/* + * Copyright (C) 2013 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <ipxe/iobuf.h> +#include <ipxe/retry.h> +#include <ipxe/timer.h> +#include <ipxe/malloc.h> +#include <ipxe/neighbour.h> + +/** @file + * + * Neighbour discovery + * + * This file implements the abstract functions of neighbour discovery, + * independent of the underlying network protocol (e.g. ARP or NDP). + * + */ + +/** Neighbour discovery minimum timeout */ +#define NEIGHBOUR_MIN_TIMEOUT ( TICKS_PER_SEC / 8 ) + +/** Neighbour discovery maximum timeout */ +#define NEIGHBOUR_MAX_TIMEOUT ( TICKS_PER_SEC * 3 ) + +/** The neighbour cache */ +struct list_head neighbours = LIST_HEAD_INIT ( neighbours ); + +static void neighbour_expired ( struct retry_timer *timer, int over ); + +/** + * Free neighbour cache entry + * + * @v refcnt Reference count + */ +static void neighbour_free ( struct refcnt *refcnt ) { + struct neighbour *neighbour = + container_of ( refcnt, struct neighbour, refcnt ); + + /* Sanity check */ + assert ( list_empty ( &neighbour->tx_queue ) ); + + /* Drop reference to network device */ + netdev_put ( neighbour->netdev ); + + /* Free neighbour */ + free ( neighbour ); +} + +/** + * Create neighbour cache entry + * + * @v netdev Network device + * @v net_protocol Network-layer protocol + * @v net_dest Destination network-layer address + * @ret neighbour Neighbour cache entry, or NULL if allocation failed + */ +static struct neighbour * neighbour_create ( struct net_device *netdev, + struct net_protocol *net_protocol, + const void *net_dest ) { + struct neighbour *neighbour; + + /* Allocate and initialise entry */ + neighbour = zalloc ( sizeof ( *neighbour ) ); + if ( ! neighbour ) + return NULL; + ref_init ( &neighbour->refcnt, neighbour_free ); + neighbour->netdev = netdev_get ( netdev ); + neighbour->net_protocol = net_protocol; + memcpy ( neighbour->net_dest, net_dest, + net_protocol->net_addr_len ); + timer_init ( &neighbour->timer, neighbour_expired, &neighbour->refcnt ); + neighbour->timer.min_timeout = NEIGHBOUR_MIN_TIMEOUT; + neighbour->timer.max_timeout = NEIGHBOUR_MAX_TIMEOUT; + INIT_LIST_HEAD ( &neighbour->tx_queue ); + + /* Transfer ownership to cache */ + list_add ( &neighbour->list, &neighbours ); + + DBGC ( neighbour, "NEIGHBOUR %s %s %s created\n", netdev->name, + net_protocol->name, net_protocol->ntoa ( net_dest ) ); + return neighbour; +} + +/** + * Find neighbour cache entry + * + * @v netdev Network device + * @v net_protocol Network-layer protocol + * @v net_dest Destination network-layer address + * @ret neighbour Neighbour cache entry, or NULL if not found + */ +static struct neighbour * neighbour_find ( struct net_device *netdev, + struct net_protocol *net_protocol, + const void *net_dest ) { + struct neighbour *neighbour; + + list_for_each_entry ( neighbour, &neighbours, list ) { + if ( ( neighbour->netdev == netdev ) && + ( neighbour->net_protocol == net_protocol ) && + ( memcmp ( neighbour->net_dest, net_dest, + net_protocol->net_addr_len ) == 0 ) ) { + + /* Move to start of cache */ + list_del ( &neighbour->list ); + list_add ( &neighbour->list, &neighbours ); + + return neighbour; + } + } + return NULL; +} + +/** + * Start neighbour discovery + * + * @v neighbour Neighbour cache entry + * @v discovery Neighbour discovery protocol + * @v net_source Source network-layer address + */ +static void neighbour_discover ( struct neighbour *neighbour, + struct neighbour_discovery *discovery, + const void *net_source ) { + struct net_device *netdev = neighbour->netdev; + struct net_protocol *net_protocol = neighbour->net_protocol; + + /* Record discovery protocol and source network-layer address */ + neighbour->discovery = discovery; + memcpy ( neighbour->net_source, net_source, + net_protocol->net_addr_len ); + + /* Start timer to trigger neighbour discovery */ + start_timer_nodelay ( &neighbour->timer ); + + DBGC ( neighbour, "NEIGHBOUR %s %s %s discovering via %s\n", + netdev->name, net_protocol->name, + net_protocol->ntoa ( neighbour->net_dest ), + neighbour->discovery->name ); +} + +/** + * Complete neighbour discovery + * + * @v neighbour Neighbour cache entry + * @v ll_dest Destination link-layer address + */ +static void neighbour_discovered ( struct neighbour *neighbour, + const void *ll_dest ) { + struct net_device *netdev = neighbour->netdev; + struct ll_protocol *ll_protocol = netdev->ll_protocol; + struct net_protocol *net_protocol = neighbour->net_protocol; + struct io_buffer *iobuf; + int rc; + + /* Fill in link-layer address */ + memcpy ( neighbour->ll_dest, ll_dest, ll_protocol->ll_addr_len ); + DBGC ( neighbour, "NEIGHBOUR %s %s %s is %s %s\n", netdev->name, + net_protocol->name, net_protocol->ntoa ( neighbour->net_dest ), + ll_protocol->name, ll_protocol->ntoa ( neighbour->ll_dest ) ); + + /* Stop retransmission timer */ + stop_timer ( &neighbour->timer ); + + /* Transmit any packets in queue. Take out a temporary + * reference on the entry to prevent it from going out of + * scope during the call to net_tx(). + */ + ref_get ( &neighbour->refcnt ); + while ( ( iobuf = list_first_entry ( &neighbour->tx_queue, + struct io_buffer, list )) != NULL){ + DBGC2 ( neighbour, "NEIGHBOUR %s %s %s transmitting deferred " + "packet\n", netdev->name, net_protocol->name, + net_protocol->ntoa ( neighbour->net_dest ) ); + list_del ( &iobuf->list ); + if ( ( rc = net_tx ( iobuf, netdev, net_protocol, ll_dest, + netdev->ll_addr ) ) != 0 ) { + DBGC ( neighbour, "NEIGHBOUR %s %s %s could not " + "transmit deferred packet: %s\n", + netdev->name, net_protocol->name, + net_protocol->ntoa ( neighbour->net_dest ), + strerror ( rc ) ); + /* Ignore error and continue */ + } + } + ref_put ( &neighbour->refcnt ); +} + +/** + * Destroy neighbour cache entry + * + * @v neighbour Neighbour cache entry + * @v rc Reason for destruction + */ +static void neighbour_destroy ( struct neighbour *neighbour, int rc ) { + struct net_device *netdev = neighbour->netdev; + struct net_protocol *net_protocol = neighbour->net_protocol; + struct io_buffer *iobuf; + + /* Take ownership from cache */ + list_del ( &neighbour->list ); + + /* Stop timer */ + stop_timer ( &neighbour->timer ); + + /* Discard any outstanding I/O buffers */ + while ( ( iobuf = list_first_entry ( &neighbour->tx_queue, + struct io_buffer, list )) != NULL){ + DBGC2 ( neighbour, "NEIGHBOUR %s %s %s discarding deferred " + "packet: %s\n", netdev->name, net_protocol->name, + net_protocol->ntoa ( neighbour->net_dest ), + strerror ( rc ) ); + list_del ( &iobuf->list ); + netdev_tx_err ( neighbour->netdev, iobuf, rc ); + } + + DBGC ( neighbour, "NEIGHBOUR %s %s %s destroyed: %s\n", netdev->name, + net_protocol->name, net_protocol->ntoa ( neighbour->net_dest ), + strerror ( rc ) ); + + /* Drop remaining reference */ + ref_put ( &neighbour->refcnt ); +} + +/** + * Handle neighbour timer expiry + * + * @v timer Retry timer + * @v fail Failure indicator + */ +static void neighbour_expired ( struct retry_timer *timer, int fail ) { + struct neighbour *neighbour = + container_of ( timer, struct neighbour, timer ); + struct net_device *netdev = neighbour->netdev; + struct net_protocol *net_protocol = neighbour->net_protocol; + struct neighbour_discovery *discovery = + neighbour->discovery; + const void *net_dest = neighbour->net_dest; + const void *net_source = neighbour->net_source; + int rc; + + /* If we have failed, destroy the cache entry */ + if ( fail ) { + neighbour_destroy ( neighbour, -ETIMEDOUT ); + return; + } + + /* Restart the timer */ + start_timer ( &neighbour->timer ); + + /* Transmit neighbour request */ + if ( ( rc = discovery->tx_request ( netdev, net_protocol, net_dest, + net_source ) ) != 0 ) { + DBGC ( neighbour, "NEIGHBOUR %s %s %s could not transmit %s " + "request: %s\n", netdev->name, net_protocol->name, + net_protocol->ntoa ( neighbour->net_dest ), + neighbour->discovery->name, strerror ( rc ) ); + /* Retransmit when timer expires */ + return; + } +} + +/** + * Transmit packet, determining link-layer address via neighbour discovery + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v discovery Neighbour discovery protocol + * @v net_protocol Network-layer protocol + * @v net_dest Destination network-layer address + * @v net_source Source network-layer address + * @v ll_source Source link-layer address + * @ret rc Return status code + */ +int neighbour_tx ( struct io_buffer *iobuf, struct net_device *netdev, + struct net_protocol *net_protocol, const void *net_dest, + struct neighbour_discovery *discovery, + const void *net_source, const void *ll_source ) { + struct neighbour *neighbour; + + /* Find or create neighbour cache entry */ + neighbour = neighbour_find ( netdev, net_protocol, net_dest ); + if ( ! neighbour ) { + neighbour = neighbour_create ( netdev, net_protocol, net_dest ); + if ( ! neighbour ) + return -ENOMEM; + neighbour_discover ( neighbour, discovery, net_source ); + } + + /* If a link-layer address is available then transmit + * immediately, otherwise queue for later transmission. + */ + if ( neighbour_has_ll_dest ( neighbour ) ) { + return net_tx ( iobuf, netdev, net_protocol, neighbour->ll_dest, + ll_source ); + } else { + DBGC2 ( neighbour, "NEIGHBOUR %s %s %s deferring packet\n", + netdev->name, net_protocol->name, + net_protocol->ntoa ( net_dest ) ); + list_add_tail ( &iobuf->list, &neighbour->tx_queue ); + return -EAGAIN; + } +} + +/** + * Update existing neighbour cache entry + * + * @v netdev Network device + * @v net_protocol Network-layer protocol + * @v net_dest Destination network-layer address + * @v ll_dest Destination link-layer address + * @ret rc Return status code + */ +int neighbour_update ( struct net_device *netdev, + struct net_protocol *net_protocol, + const void *net_dest, const void *ll_dest ) { + struct neighbour *neighbour; + + /* Find neighbour cache entry */ + neighbour = neighbour_find ( netdev, net_protocol, net_dest ); + if ( ! neighbour ) + return -ENOENT; + + /* Set destination address */ + neighbour_discovered ( neighbour, ll_dest ); + + return 0; +} + +/** + * Define neighbour cache entry + * + * @v netdev Network device + * @v net_protocol Network-layer protocol + * @v net_dest Destination network-layer address + * @v ll_dest Destination link-layer address, if known + * @ret rc Return status code + */ +int neighbour_define ( struct net_device *netdev, + struct net_protocol *net_protocol, + const void *net_dest, const void *ll_dest ) { + struct neighbour *neighbour; + + /* Find or create neighbour cache entry */ + neighbour = neighbour_find ( netdev, net_protocol, net_dest ); + if ( ! neighbour ) { + neighbour = neighbour_create ( netdev, net_protocol, net_dest ); + if ( ! neighbour ) + return -ENOMEM; + } + + /* Set destination address */ + neighbour_discovered ( neighbour, ll_dest ); + + return 0; +} + +/** + * Update neighbour cache on network device state change or removal + * + * @v netdev Network device + */ +static void neighbour_flush ( struct net_device *netdev ) { + struct neighbour *neighbour; + struct neighbour *tmp; + + /* Remove all neighbour cache entries when a network device is closed */ + if ( ! netdev_is_open ( netdev ) ) { + list_for_each_entry_safe ( neighbour, tmp, &neighbours, list ) + neighbour_destroy ( neighbour, -ENODEV ); + } +} + +/** Neighbour driver (for net device notifications) */ +struct net_driver neighbour_net_driver __net_driver = { + .name = "Neighbour", + .notify = neighbour_flush, + .remove = neighbour_flush, +}; + +/** + * Discard some cached neighbour entries + * + * @ret discarded Number of cached items discarded + */ +static unsigned int neighbour_discard ( void ) { + struct neighbour *neighbour; + + /* Drop oldest cache entry, if any */ + neighbour = list_last_entry ( &neighbours, struct neighbour, list ); + if ( neighbour ) { + neighbour_destroy ( neighbour, -ENOBUFS ); + return 1; + } else { + return 0; + } +} + +/** + * Neighbour cache discarder + * + * Neighbour cache entries are deemed to have a high replacement cost, + * since flushing an active neighbour cache entry midway through a TCP + * transfer will cause substantial disruption. + */ +struct cache_discarder neighbour_discarder __cache_discarder (CACHE_EXPENSIVE)={ + .discard = neighbour_discard, +}; diff --git a/qemu/roms/ipxe/src/net/netdev_settings.c b/qemu/roms/ipxe/src/net/netdev_settings.c new file mode 100644 index 000000000..b3b2e68d8 --- /dev/null +++ b/qemu/roms/ipxe/src/net/netdev_settings.c @@ -0,0 +1,348 @@ +/* + * Copyright (C) 2008 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <string.h> +#include <errno.h> +#include <byteswap.h> +#include <ipxe/dhcp.h> +#include <ipxe/dhcpopts.h> +#include <ipxe/settings.h> +#include <ipxe/device.h> +#include <ipxe/netdevice.h> +#include <ipxe/init.h> + +/** @file + * + * Network device configuration settings + * + */ + +/** Network device predefined settings */ +const struct setting mac_setting __setting ( SETTING_NETDEV, mac ) = { + .name = "mac", + .description = "MAC address", + .type = &setting_type_hex, +}; +const struct setting bustype_setting __setting ( SETTING_NETDEV, bustype ) = { + .name = "bustype", + .description = "Bus type", + .type = &setting_type_string, +}; +const struct setting busloc_setting __setting ( SETTING_NETDEV, busloc ) = { + .name = "busloc", + .description = "Bus location", + .type = &setting_type_uint32, +}; +const struct setting busid_setting __setting ( SETTING_NETDEV, busid ) = { + .name = "busid", + .description = "Bus ID", + .type = &setting_type_hex, +}; +const struct setting chip_setting __setting ( SETTING_NETDEV, chip ) = { + .name = "chip", + .description = "Chip", + .type = &setting_type_string, +}; + +/** + * Store MAC address setting + * + * @v netdev Network device + * @v data Setting data, or NULL to clear setting + * @v len Length of setting data + * @ret rc Return status code + */ +static int netdev_store_mac ( struct net_device *netdev, + const void *data, size_t len ) { + struct ll_protocol *ll_protocol = netdev->ll_protocol; + + /* Record new MAC address */ + if ( data ) { + if ( len != netdev->ll_protocol->ll_addr_len ) + return -EINVAL; + memcpy ( netdev->ll_addr, data, len ); + } else { + /* Reset MAC address if clearing setting */ + ll_protocol->init_addr ( netdev->hw_addr, netdev->ll_addr ); + } + + return 0; +} + +/** + * Fetch MAC address setting + * + * @v netdev Network device + * @v data Buffer to fill with setting data + * @v len Length of buffer + * @ret len Length of setting data, or negative error + */ +static int netdev_fetch_mac ( struct net_device *netdev, void *data, + size_t len ) { + + if ( len > netdev->ll_protocol->ll_addr_len ) + len = netdev->ll_protocol->ll_addr_len; + memcpy ( data, netdev->ll_addr, len ); + return netdev->ll_protocol->ll_addr_len; +} + +/** + * Fetch bus type setting + * + * @v netdev Network device + * @v data Buffer to fill with setting data + * @v len Length of buffer + * @ret len Length of setting data, or negative error + */ +static int netdev_fetch_bustype ( struct net_device *netdev, void *data, + size_t len ) { + static const char *bustypes[] = { + [BUS_TYPE_PCI] = "PCI", + [BUS_TYPE_ISAPNP] = "ISAPNP", + [BUS_TYPE_EISA] = "EISA", + [BUS_TYPE_MCA] = "MCA", + [BUS_TYPE_ISA] = "ISA", + [BUS_TYPE_TAP] = "TAP", + }; + struct device_description *desc = &netdev->dev->desc; + const char *bustype; + + assert ( desc->bus_type < ( sizeof ( bustypes ) / + sizeof ( bustypes[0] ) ) ); + bustype = bustypes[desc->bus_type]; + assert ( bustype != NULL ); + strncpy ( data, bustype, len ); + return strlen ( bustype ); +} + +/** + * Fetch bus location setting + * + * @v netdev Network device + * @v data Buffer to fill with setting data + * @v len Length of buffer + * @ret len Length of setting data, or negative error + */ +static int netdev_fetch_busloc ( struct net_device *netdev, void *data, + size_t len ) { + struct device_description *desc = &netdev->dev->desc; + uint32_t busloc; + + busloc = cpu_to_be32 ( desc->location ); + if ( len > sizeof ( busloc ) ) + len = sizeof ( busloc ); + memcpy ( data, &busloc, len ); + return sizeof ( busloc ); +} + +/** + * Fetch bus ID setting + * + * @v netdev Network device + * @v data Buffer to fill with setting data + * @v len Length of buffer + * @ret len Length of setting data, or negative error + */ +static int netdev_fetch_busid ( struct net_device *netdev, void *data, + size_t len ) { + struct device_description *desc = &netdev->dev->desc; + struct dhcp_netdev_desc dhcp_desc; + + dhcp_desc.type = desc->bus_type; + dhcp_desc.vendor = htons ( desc->vendor ); + dhcp_desc.device = htons ( desc->device ); + if ( len > sizeof ( dhcp_desc ) ) + len = sizeof ( dhcp_desc ); + memcpy ( data, &dhcp_desc, len ); + return sizeof ( dhcp_desc ); +} + +/** + * Fetch chip setting + * + * @v netdev Network device + * @v data Buffer to fill with setting data + * @v len Length of buffer + * @ret len Length of setting data, or negative error + */ +static int netdev_fetch_chip ( struct net_device *netdev, void *data, + size_t len ) { + const char *chip = netdev->dev->driver_name; + + strncpy ( data, chip, len ); + return strlen ( chip ); +} + +/** A network device setting operation */ +struct netdev_setting_operation { + /** Setting */ + const struct setting *setting; + /** Store setting (or NULL if not supported) + * + * @v netdev Network device + * @v data Setting data, or NULL to clear setting + * @v len Length of setting data + * @ret rc Return status code + */ + int ( * store ) ( struct net_device *netdev, const void *data, + size_t len ); + /** Fetch setting + * + * @v netdev Network device + * @v data Buffer to fill with setting data + * @v len Length of buffer + * @ret len Length of setting data, or negative error + */ + int ( * fetch ) ( struct net_device *netdev, void *data, size_t len ); +}; + +/** Network device settings */ +static struct netdev_setting_operation netdev_setting_operations[] = { + { &mac_setting, netdev_store_mac, netdev_fetch_mac }, + { &bustype_setting, NULL, netdev_fetch_bustype }, + { &busloc_setting, NULL, netdev_fetch_busloc }, + { &busid_setting, NULL, netdev_fetch_busid }, + { &chip_setting, NULL, netdev_fetch_chip }, +}; + +/** + * Store value of network device setting + * + * @v settings Settings block + * @v setting Setting to store + * @v data Setting data, or NULL to clear setting + * @v len Length of setting data + * @ret rc Return status code + */ +static int netdev_store ( struct settings *settings, + const struct setting *setting, + const void *data, size_t len ) { + struct net_device *netdev = container_of ( settings, struct net_device, + settings.settings ); + struct netdev_setting_operation *op; + unsigned int i; + + /* Handle network device-specific settings */ + for ( i = 0 ; i < ( sizeof ( netdev_setting_operations ) / + sizeof ( netdev_setting_operations[0] ) ) ; i++ ) { + op = &netdev_setting_operations[i]; + if ( setting_cmp ( setting, op->setting ) == 0 ) { + if ( op->store ) { + return op->store ( netdev, data, len ); + } else { + return -ENOTSUP; + } + } + } + + return generic_settings_store ( settings, setting, data, len ); +} + +/** + * Fetch value of network device setting + * + * @v settings Settings block + * @v setting Setting to fetch + * @v data Buffer to fill with setting data + * @v len Length of buffer + * @ret len Length of setting data, or negative error + */ +static int netdev_fetch ( struct settings *settings, struct setting *setting, + void *data, size_t len ) { + struct net_device *netdev = container_of ( settings, struct net_device, + settings.settings ); + struct netdev_setting_operation *op; + unsigned int i; + + /* Handle network device-specific settings */ + for ( i = 0 ; i < ( sizeof ( netdev_setting_operations ) / + sizeof ( netdev_setting_operations[0] ) ) ; i++ ) { + op = &netdev_setting_operations[i]; + if ( setting_cmp ( setting, op->setting ) == 0 ) + return op->fetch ( netdev, data, len ); + } + + return generic_settings_fetch ( settings, setting, data, len ); +} + +/** + * Clear network device settings + * + * @v settings Settings block + */ +static void netdev_clear ( struct settings *settings ) { + generic_settings_clear ( settings ); +} + +/** Network device configuration settings operations */ +struct settings_operations netdev_settings_operations = { + .store = netdev_store, + .fetch = netdev_fetch, + .clear = netdev_clear, +}; + +/** + * Redirect "netX" settings block + * + * @v settings Settings block + * @ret settings Underlying settings block + */ +static struct settings * netdev_redirect ( struct settings *settings ) { + struct net_device *netdev; + + /* Redirect to most recently opened network device */ + netdev = last_opened_netdev(); + if ( netdev ) { + return netdev_settings ( netdev ); + } else { + return settings; + } +} + +/** "netX" settings operations */ +static struct settings_operations netdev_redirect_settings_operations = { + .redirect = netdev_redirect, +}; + +/** "netX" settings */ +static struct settings netdev_redirect_settings = { + .refcnt = NULL, + .siblings = LIST_HEAD_INIT ( netdev_redirect_settings.siblings ), + .children = LIST_HEAD_INIT ( netdev_redirect_settings.children ), + .op = &netdev_redirect_settings_operations, +}; + +/** Initialise "netX" settings */ +static void netdev_redirect_settings_init ( void ) { + int rc; + + if ( ( rc = register_settings ( &netdev_redirect_settings, NULL, + "netX" ) ) != 0 ) { + DBG ( "Could not register netX settings: %s\n", + strerror ( rc ) ); + return; + } +} + +/** "netX" settings initialiser */ +struct init_fn netdev_redirect_settings_init_fn __init_fn ( INIT_LATE ) = { + .initialise = netdev_redirect_settings_init, +}; diff --git a/qemu/roms/ipxe/src/net/netdevice.c b/qemu/roms/ipxe/src/net/netdevice.c new file mode 100644 index 000000000..a55e6b7d7 --- /dev/null +++ b/qemu/roms/ipxe/src/net/netdevice.c @@ -0,0 +1,1220 @@ +/* + * Copyright (C) 2006 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <byteswap.h> +#include <string.h> +#include <errno.h> +#include <config/general.h> +#include <ipxe/if_ether.h> +#include <ipxe/iobuf.h> +#include <ipxe/tables.h> +#include <ipxe/process.h> +#include <ipxe/init.h> +#include <ipxe/malloc.h> +#include <ipxe/device.h> +#include <ipxe/errortab.h> +#include <ipxe/profile.h> +#include <ipxe/vlan.h> +#include <ipxe/netdevice.h> + +/** @file + * + * Network device management + * + */ + +/** List of network devices */ +struct list_head net_devices = LIST_HEAD_INIT ( net_devices ); + +/** List of open network devices, in reverse order of opening */ +static struct list_head open_net_devices = LIST_HEAD_INIT ( open_net_devices ); + +/** Network device index */ +static unsigned int netdev_index = 0; + +/** Network polling profiler */ +static struct profiler net_poll_profiler __profiler = { .name = "net.poll" }; + +/** Network receive profiler */ +static struct profiler net_rx_profiler __profiler = { .name = "net.rx" }; + +/** Network transmit profiler */ +static struct profiler net_tx_profiler __profiler = { .name = "net.tx" }; + +/** Default unknown link status code */ +#define EUNKNOWN_LINK_STATUS __einfo_error ( EINFO_EUNKNOWN_LINK_STATUS ) +#define EINFO_EUNKNOWN_LINK_STATUS \ + __einfo_uniqify ( EINFO_EINPROGRESS, 0x01, "Unknown" ) + +/** Default not-yet-attempted-configuration status code */ +#define EUNUSED_CONFIG __einfo_error ( EINFO_EUNUSED_CONFIG ) +#define EINFO_EUNUSED_CONFIG \ + __einfo_uniqify ( EINFO_EINPROGRESS, 0x02, "Unused" ) + +/** Default configuration-in-progress status code */ +#define EINPROGRESS_CONFIG __einfo_error ( EINFO_EINPROGRESS_CONFIG ) +#define EINFO_EINPROGRESS_CONFIG \ + __einfo_uniqify ( EINFO_EINPROGRESS, 0x03, "Incomplete" ) + +/** Default link-down status code */ +#define ENOTCONN_LINK_DOWN __einfo_error ( EINFO_ENOTCONN_LINK_DOWN ) +#define EINFO_ENOTCONN_LINK_DOWN \ + __einfo_uniqify ( EINFO_ENOTCONN, 0x01, "Down" ) + +/** Human-readable message for the default link statuses */ +struct errortab netdev_errors[] __errortab = { + __einfo_errortab ( EINFO_EUNKNOWN_LINK_STATUS ), + __einfo_errortab ( EINFO_ENOTCONN_LINK_DOWN ), + __einfo_errortab ( EINFO_EUNUSED_CONFIG ), + __einfo_errortab ( EINFO_EINPROGRESS_CONFIG ), +}; + +/** + * Check whether or not network device has a link-layer address + * + * @v netdev Network device + * @ret has_ll_addr Network device has a link-layer address + */ +static int netdev_has_ll_addr ( struct net_device *netdev ) { + uint8_t *ll_addr = netdev->ll_addr; + size_t remaining = sizeof ( netdev->ll_addr ); + + while ( remaining-- ) { + if ( *(ll_addr++) != 0 ) + return 1; + } + return 0; +} + +/** + * Notify drivers of network device or link state change + * + * @v netdev Network device + */ +static void netdev_notify ( struct net_device *netdev ) { + struct net_driver *driver; + + for_each_table_entry ( driver, NET_DRIVERS ) { + if ( driver->notify ) + driver->notify ( netdev ); + } +} + +/** + * Freeze network device receive queue processing + * + * @v netdev Network device + */ +void netdev_rx_freeze ( struct net_device *netdev ) { + + /* Mark receive queue processing as frozen */ + netdev->state |= NETDEV_RX_FROZEN; + + /* Notify drivers of change */ + netdev_notify ( netdev ); +} + +/** + * Unfreeze network device receive queue processing + * + * @v netdev Network device + */ +void netdev_rx_unfreeze ( struct net_device *netdev ) { + + /* Mark receive queue processing as not frozen */ + netdev->state &= ~NETDEV_RX_FROZEN; + + /* Notify drivers of change */ + netdev_notify ( netdev ); +} + +/** + * Mark network device as having a specific link state + * + * @v netdev Network device + * @v rc Link status code + */ +void netdev_link_err ( struct net_device *netdev, int rc ) { + + /* Record link state */ + netdev->link_rc = rc; + if ( netdev->link_rc == 0 ) { + DBGC ( netdev, "NETDEV %s link is up\n", netdev->name ); + } else { + DBGC ( netdev, "NETDEV %s link is down: %s\n", + netdev->name, strerror ( netdev->link_rc ) ); + } + + /* Notify drivers of link state change */ + netdev_notify ( netdev ); +} + +/** + * Mark network device as having link down + * + * @v netdev Network device + */ +void netdev_link_down ( struct net_device *netdev ) { + + /* Avoid clobbering a more detailed link status code, if one + * is already set. + */ + if ( ( netdev->link_rc == 0 ) || + ( netdev->link_rc == -EUNKNOWN_LINK_STATUS ) ) { + netdev_link_err ( netdev, -ENOTCONN_LINK_DOWN ); + } +} + +/** + * Record network device statistic + * + * @v stats Network device statistics + * @v rc Status code + */ +static void netdev_record_stat ( struct net_device_stats *stats, int rc ) { + struct net_device_error *error; + struct net_device_error *least_common_error; + unsigned int i; + + /* If this is not an error, just update the good counter */ + if ( rc == 0 ) { + stats->good++; + return; + } + + /* Update the bad counter */ + stats->bad++; + + /* Locate the appropriate error record */ + least_common_error = &stats->errors[0]; + for ( i = 0 ; i < ( sizeof ( stats->errors ) / + sizeof ( stats->errors[0] ) ) ; i++ ) { + error = &stats->errors[i]; + /* Update matching record, if found */ + if ( error->rc == rc ) { + error->count++; + return; + } + if ( error->count < least_common_error->count ) + least_common_error = error; + } + + /* Overwrite the least common error record */ + least_common_error->rc = rc; + least_common_error->count = 1; +} + +/** + * Transmit raw packet via network device + * + * @v netdev Network device + * @v iobuf I/O buffer + * @ret rc Return status code + * + * Transmits the packet via the specified network device. This + * function takes ownership of the I/O buffer. + */ +int netdev_tx ( struct net_device *netdev, struct io_buffer *iobuf ) { + int rc; + + DBGC2 ( netdev, "NETDEV %s transmitting %p (%p+%zx)\n", + netdev->name, iobuf, iobuf->data, iob_len ( iobuf ) ); + profile_start ( &net_tx_profiler ); + + /* Enqueue packet */ + list_add_tail ( &iobuf->list, &netdev->tx_queue ); + + /* Avoid calling transmit() on unopened network devices */ + if ( ! netdev_is_open ( netdev ) ) { + rc = -ENETUNREACH; + goto err; + } + + /* Discard packet (for test purposes) if applicable */ + if ( ( NETDEV_DISCARD_RATE > 0 ) && + ( ( random() % NETDEV_DISCARD_RATE ) == 0 ) ) { + rc = -EAGAIN; + goto err; + } + + /* Transmit packet */ + if ( ( rc = netdev->op->transmit ( netdev, iobuf ) ) != 0 ) + goto err; + + profile_stop ( &net_tx_profiler ); + return 0; + + err: + netdev_tx_complete_err ( netdev, iobuf, rc ); + return rc; +} + +/** + * Defer transmitted packet + * + * @v netdev Network device + * @v iobuf I/O buffer + * + * Drivers may call netdev_tx_defer() if there is insufficient space + * in the transmit descriptor ring. Any packets deferred in this way + * will be automatically retransmitted as soon as space becomes + * available (i.e. as soon as the driver calls netdev_tx_complete()). + * + * The packet must currently be in the network device's TX queue. + * + * Drivers utilising netdev_tx_defer() must ensure that space in the + * transmit descriptor ring is freed up @b before calling + * netdev_tx_complete(). For example, if the ring is modelled using a + * producer counter and a consumer counter, then the consumer counter + * must be incremented before the call to netdev_tx_complete(). + * Failure to do this will cause the retransmitted packet to be + * immediately redeferred (which will result in out-of-order + * transmissions and other nastiness). + */ +void netdev_tx_defer ( struct net_device *netdev, struct io_buffer *iobuf ) { + + /* Catch data corruption as early as possible */ + list_check_contains_entry ( iobuf, &netdev->tx_queue, list ); + + /* Remove from transmit queue */ + list_del ( &iobuf->list ); + + /* Add to deferred transmit queue */ + list_add_tail ( &iobuf->list, &netdev->tx_deferred ); + + /* Record "out of space" statistic */ + netdev_tx_err ( netdev, NULL, -ENOBUFS ); +} + +/** + * Discard transmitted packet + * + * @v netdev Network device + * @v iobuf I/O buffer, or NULL + * @v rc Packet status code + * + * The packet is discarded and a TX error is recorded. This function + * takes ownership of the I/O buffer. + */ +void netdev_tx_err ( struct net_device *netdev, + struct io_buffer *iobuf, int rc ) { + + /* Update statistics counter */ + netdev_record_stat ( &netdev->tx_stats, rc ); + if ( rc == 0 ) { + DBGC2 ( netdev, "NETDEV %s transmission %p complete\n", + netdev->name, iobuf ); + } else { + DBGC ( netdev, "NETDEV %s transmission %p failed: %s\n", + netdev->name, iobuf, strerror ( rc ) ); + } + + /* Discard packet */ + free_iob ( iobuf ); +} + +/** + * Complete network transmission + * + * @v netdev Network device + * @v iobuf I/O buffer + * @v rc Packet status code + * + * The packet must currently be in the network device's TX queue. + */ +void netdev_tx_complete_err ( struct net_device *netdev, + struct io_buffer *iobuf, int rc ) { + + /* Catch data corruption as early as possible */ + list_check_contains_entry ( iobuf, &netdev->tx_queue, list ); + + /* Dequeue and free I/O buffer */ + list_del ( &iobuf->list ); + netdev_tx_err ( netdev, iobuf, rc ); + + /* Transmit first pending packet, if any */ + if ( ( iobuf = list_first_entry ( &netdev->tx_deferred, + struct io_buffer, list ) ) != NULL ) { + list_del ( &iobuf->list ); + netdev_tx ( netdev, iobuf ); + } +} + +/** + * Complete network transmission + * + * @v netdev Network device + * @v rc Packet status code + * + * Completes the oldest outstanding packet in the TX queue. + */ +void netdev_tx_complete_next_err ( struct net_device *netdev, int rc ) { + struct io_buffer *iobuf; + + if ( ( iobuf = list_first_entry ( &netdev->tx_queue, struct io_buffer, + list ) ) != NULL ) { + netdev_tx_complete_err ( netdev, iobuf, rc ); + } +} + +/** + * Flush device's transmit queue + * + * @v netdev Network device + */ +static void netdev_tx_flush ( struct net_device *netdev ) { + + /* Discard any packets in the TX queue. This will also cause + * any packets in the deferred TX queue to be discarded + * automatically. + */ + while ( ! list_empty ( &netdev->tx_queue ) ) { + netdev_tx_complete_next_err ( netdev, -ECANCELED ); + } + assert ( list_empty ( &netdev->tx_queue ) ); + assert ( list_empty ( &netdev->tx_deferred ) ); +} + +/** + * Add packet to receive queue + * + * @v netdev Network device + * @v iobuf I/O buffer, or NULL + * + * The packet is added to the network device's RX queue. This + * function takes ownership of the I/O buffer. + */ +void netdev_rx ( struct net_device *netdev, struct io_buffer *iobuf ) { + + DBGC2 ( netdev, "NETDEV %s received %p (%p+%zx)\n", + netdev->name, iobuf, iobuf->data, iob_len ( iobuf ) ); + + /* Discard packet (for test purposes) if applicable */ + if ( ( NETDEV_DISCARD_RATE > 0 ) && + ( ( random() % NETDEV_DISCARD_RATE ) == 0 ) ) { + netdev_rx_err ( netdev, iobuf, -EAGAIN ); + return; + } + + /* Enqueue packet */ + list_add_tail ( &iobuf->list, &netdev->rx_queue ); + + /* Update statistics counter */ + netdev_record_stat ( &netdev->rx_stats, 0 ); +} + +/** + * Discard received packet + * + * @v netdev Network device + * @v iobuf I/O buffer, or NULL + * @v rc Packet status code + * + * The packet is discarded and an RX error is recorded. This function + * takes ownership of the I/O buffer. @c iobuf may be NULL if, for + * example, the net device wishes to report an error due to being + * unable to allocate an I/O buffer. + */ +void netdev_rx_err ( struct net_device *netdev, + struct io_buffer *iobuf, int rc ) { + + DBGC ( netdev, "NETDEV %s failed to receive %p: %s\n", + netdev->name, iobuf, strerror ( rc ) ); + + /* Discard packet */ + free_iob ( iobuf ); + + /* Update statistics counter */ + netdev_record_stat ( &netdev->rx_stats, rc ); +} + +/** + * Poll for completed and received packets on network device + * + * @v netdev Network device + * + * Polls the network device for completed transmissions and received + * packets. Any received packets will be added to the RX packet queue + * via netdev_rx(). + */ +void netdev_poll ( struct net_device *netdev ) { + + if ( netdev_is_open ( netdev ) ) + netdev->op->poll ( netdev ); +} + +/** + * Remove packet from device's receive queue + * + * @v netdev Network device + * @ret iobuf I/O buffer, or NULL + * + * Removes the first packet from the device's RX queue and returns it. + * Ownership of the packet is transferred to the caller. + */ +struct io_buffer * netdev_rx_dequeue ( struct net_device *netdev ) { + struct io_buffer *iobuf; + + iobuf = list_first_entry ( &netdev->rx_queue, struct io_buffer, list ); + if ( ! iobuf ) + return NULL; + + list_del ( &iobuf->list ); + return iobuf; +} + +/** + * Flush device's receive queue + * + * @v netdev Network device + */ +static void netdev_rx_flush ( struct net_device *netdev ) { + struct io_buffer *iobuf; + + /* Discard any packets in the RX queue */ + while ( ( iobuf = netdev_rx_dequeue ( netdev ) ) ) { + netdev_rx_err ( netdev, iobuf, -ECANCELED ); + } +} + +/** + * Finish network device configuration + * + * @v config Network device configuration + * @v rc Reason for completion + */ +static void netdev_config_close ( struct net_device_configuration *config, + int rc ) { + struct net_device_configurator *configurator = config->configurator; + struct net_device *netdev = config->netdev; + + /* Restart interface */ + intf_restart ( &config->job, rc ); + + /* Record configuration result */ + config->rc = rc; + if ( rc == 0 ) { + DBGC ( netdev, "NETDEV %s configured via %s\n", + netdev->name, configurator->name ); + } else { + DBGC ( netdev, "NETDEV %s configuration via %s failed: %s\n", + netdev->name, configurator->name, strerror ( rc ) ); + } +} + +/** Network device configuration interface operations */ +static struct interface_operation netdev_config_ops[] = { + INTF_OP ( intf_close, struct net_device_configuration *, + netdev_config_close ), +}; + +/** Network device configuration interface descriptor */ +static struct interface_descriptor netdev_config_desc = + INTF_DESC ( struct net_device_configuration, job, netdev_config_ops ); + +/** + * Free network device + * + * @v refcnt Network device reference counter + */ +static void free_netdev ( struct refcnt *refcnt ) { + struct net_device *netdev = + container_of ( refcnt, struct net_device, refcnt ); + + netdev_tx_flush ( netdev ); + netdev_rx_flush ( netdev ); + clear_settings ( netdev_settings ( netdev ) ); + free ( netdev ); +} + +/** + * Allocate network device + * + * @v priv_len Length of private data area (net_device::priv) + * @ret netdev Network device, or NULL + * + * Allocates space for a network device and its private data area. + */ +struct net_device * alloc_netdev ( size_t priv_len ) { + struct net_device *netdev; + struct net_device_configurator *configurator; + struct net_device_configuration *config; + unsigned int num_configs; + size_t confs_len; + size_t total_len; + + num_configs = table_num_entries ( NET_DEVICE_CONFIGURATORS ); + confs_len = ( num_configs * sizeof ( netdev->configs[0] ) ); + total_len = ( sizeof ( *netdev ) + confs_len + priv_len ); + netdev = zalloc ( total_len ); + if ( netdev ) { + ref_init ( &netdev->refcnt, free_netdev ); + netdev->link_rc = -EUNKNOWN_LINK_STATUS; + INIT_LIST_HEAD ( &netdev->tx_queue ); + INIT_LIST_HEAD ( &netdev->tx_deferred ); + INIT_LIST_HEAD ( &netdev->rx_queue ); + netdev_settings_init ( netdev ); + config = netdev->configs; + for_each_table_entry ( configurator, NET_DEVICE_CONFIGURATORS ){ + config->netdev = netdev; + config->configurator = configurator; + config->rc = -EUNUSED_CONFIG; + intf_init ( &config->job, &netdev_config_desc, + &netdev->refcnt ); + config++; + } + netdev->priv = ( ( ( void * ) netdev ) + sizeof ( *netdev ) + + confs_len ); + } + return netdev; +} + +/** + * Register network device + * + * @v netdev Network device + * @ret rc Return status code + * + * Gives the network device a name and adds it to the list of network + * devices. + */ +int register_netdev ( struct net_device *netdev ) { + struct ll_protocol *ll_protocol = netdev->ll_protocol; + struct net_driver *driver; + struct net_device *duplicate; + uint32_t seed; + int rc; + + /* Set initial link-layer address, if not already set */ + if ( ! netdev_has_ll_addr ( netdev ) ) { + ll_protocol->init_addr ( netdev->hw_addr, netdev->ll_addr ); + } + + /* Reject network devices that are already available via a + * different hardware device. + */ + duplicate = find_netdev_by_ll_addr ( ll_protocol, netdev->ll_addr ); + if ( duplicate && ( duplicate->dev != netdev->dev ) ) { + DBGC ( netdev, "NETDEV rejecting duplicate (phys %s) of %s " + "(phys %s)\n", netdev->dev->name, duplicate->name, + duplicate->dev->name ); + rc = -EEXIST; + goto err_duplicate; + } + + /* Record device index and create device name */ + netdev->index = netdev_index++; + if ( netdev->name[0] == '\0' ) { + snprintf ( netdev->name, sizeof ( netdev->name ), "net%d", + netdev->index ); + } + + /* Use least significant bits of the link-layer address to + * improve the randomness of the (non-cryptographic) random + * number generator. + */ + memcpy ( &seed, ( netdev->ll_addr + ll_protocol->ll_addr_len + - sizeof ( seed ) ), sizeof ( seed ) ); + srand ( rand() ^ seed ); + + /* Add to device list */ + netdev_get ( netdev ); + list_add_tail ( &netdev->list, &net_devices ); + DBGC ( netdev, "NETDEV %s registered (phys %s hwaddr %s)\n", + netdev->name, netdev->dev->name, + netdev_addr ( netdev ) ); + + /* Register per-netdev configuration settings */ + if ( ( rc = register_settings ( netdev_settings ( netdev ), + NULL, netdev->name ) ) != 0 ) { + DBGC ( netdev, "NETDEV %s could not register settings: %s\n", + netdev->name, strerror ( rc ) ); + goto err_register_settings; + } + + /* Probe device */ + for_each_table_entry ( driver, NET_DRIVERS ) { + if ( driver->probe && ( rc = driver->probe ( netdev ) ) != 0 ) { + DBGC ( netdev, "NETDEV %s could not add %s device: " + "%s\n", netdev->name, driver->name, + strerror ( rc ) ); + goto err_probe; + } + } + + return 0; + + err_probe: + for_each_table_entry_continue_reverse ( driver, NET_DRIVERS ) { + if ( driver->remove ) + driver->remove ( netdev ); + } + clear_settings ( netdev_settings ( netdev ) ); + unregister_settings ( netdev_settings ( netdev ) ); + err_register_settings: + err_duplicate: + return rc; +} + +/** + * Open network device + * + * @v netdev Network device + * @ret rc Return status code + */ +int netdev_open ( struct net_device *netdev ) { + int rc; + + /* Do nothing if device is already open */ + if ( netdev->state & NETDEV_OPEN ) + return 0; + + DBGC ( netdev, "NETDEV %s opening\n", netdev->name ); + + /* Mark as opened */ + netdev->state |= NETDEV_OPEN; + + /* Open the device */ + if ( ( rc = netdev->op->open ( netdev ) ) != 0 ) + goto err; + + /* Add to head of open devices list */ + list_add ( &netdev->open_list, &open_net_devices ); + + /* Notify drivers of device state change */ + netdev_notify ( netdev ); + + return 0; + + err: + netdev->state &= ~NETDEV_OPEN; + return rc; +} + +/** + * Close network device + * + * @v netdev Network device + */ +void netdev_close ( struct net_device *netdev ) { + unsigned int num_configs; + unsigned int i; + + /* Do nothing if device is already closed */ + if ( ! ( netdev->state & NETDEV_OPEN ) ) + return; + + DBGC ( netdev, "NETDEV %s closing\n", netdev->name ); + + /* Terminate any ongoing configurations. Use intf_close() + * rather than intf_restart() to allow the cancellation to be + * reported back to us if a configuration is actually in + * progress. + */ + num_configs = table_num_entries ( NET_DEVICE_CONFIGURATORS ); + for ( i = 0 ; i < num_configs ; i++ ) + intf_close ( &netdev->configs[i].job, -ECANCELED ); + + /* Remove from open devices list */ + list_del ( &netdev->open_list ); + + /* Mark as closed */ + netdev->state &= ~NETDEV_OPEN; + + /* Notify drivers of device state change */ + netdev_notify ( netdev ); + + /* Close the device */ + netdev->op->close ( netdev ); + + /* Flush TX and RX queues */ + netdev_tx_flush ( netdev ); + netdev_rx_flush ( netdev ); +} + +/** + * Unregister network device + * + * @v netdev Network device + * + * Removes the network device from the list of network devices. + */ +void unregister_netdev ( struct net_device *netdev ) { + struct net_driver *driver; + + /* Ensure device is closed */ + netdev_close ( netdev ); + + /* Remove device */ + for_each_table_entry_reverse ( driver, NET_DRIVERS ) { + if ( driver->remove ) + driver->remove ( netdev ); + } + + /* Unregister per-netdev configuration settings */ + clear_settings ( netdev_settings ( netdev ) ); + unregister_settings ( netdev_settings ( netdev ) ); + + /* Remove from device list */ + DBGC ( netdev, "NETDEV %s unregistered\n", netdev->name ); + list_del ( &netdev->list ); + netdev_put ( netdev ); + + /* Reset network device index if no devices remain */ + if ( list_empty ( &net_devices ) ) + netdev_index = 0; +} + +/** Enable or disable interrupts + * + * @v netdev Network device + * @v enable Interrupts should be enabled + */ +void netdev_irq ( struct net_device *netdev, int enable ) { + + /* Do nothing if device does not support interrupts */ + if ( ! netdev_irq_supported ( netdev ) ) + return; + + /* Enable or disable device interrupts */ + netdev->op->irq ( netdev, enable ); + + /* Record interrupt enabled state */ + netdev->state &= ~NETDEV_IRQ_ENABLED; + if ( enable ) + netdev->state |= NETDEV_IRQ_ENABLED; +} + +/** + * Get network device by name + * + * @v name Network device name + * @ret netdev Network device, or NULL + */ +struct net_device * find_netdev ( const char *name ) { + struct net_device *netdev; + + /* Allow "netX" shortcut */ + if ( strcmp ( name, "netX" ) == 0 ) + return last_opened_netdev(); + + /* Identify network device by name */ + list_for_each_entry ( netdev, &net_devices, list ) { + if ( strcmp ( netdev->name, name ) == 0 ) + return netdev; + } + + return NULL; +} + +/** + * Get network device by index + * + * @v index Network device index + * @ret netdev Network device, or NULL + */ +struct net_device * find_netdev_by_index ( unsigned int index ) { + struct net_device *netdev; + + /* Identify network device by index */ + list_for_each_entry ( netdev, &net_devices, list ) { + if ( netdev->index == index ) + return netdev; + } + + return NULL; +} + +/** + * Get network device by PCI bus:dev.fn address + * + * @v bus_type Bus type + * @v location Bus location + * @ret netdev Network device, or NULL + */ +struct net_device * find_netdev_by_location ( unsigned int bus_type, + unsigned int location ) { + struct net_device *netdev; + + list_for_each_entry ( netdev, &net_devices, list ) { + if ( ( netdev->dev->desc.bus_type == bus_type ) && + ( netdev->dev->desc.location == location ) ) + return netdev; + } + + return NULL; +} + +/** + * Get network device by link-layer address + * + * @v ll_protocol Link-layer protocol + * @v ll_addr Link-layer address + * @ret netdev Network device, or NULL + */ +struct net_device * find_netdev_by_ll_addr ( struct ll_protocol *ll_protocol, + const void *ll_addr ) { + struct net_device *netdev; + + list_for_each_entry ( netdev, &net_devices, list ) { + if ( ( netdev->ll_protocol == ll_protocol ) && + ( memcmp ( netdev->ll_addr, ll_addr, + ll_protocol->ll_addr_len ) == 0 ) ) + return netdev; + } + + return NULL; +} + +/** + * Get most recently opened network device + * + * @ret netdev Most recently opened network device, or NULL + */ +struct net_device * last_opened_netdev ( void ) { + struct net_device *netdev; + + netdev = list_first_entry ( &open_net_devices, struct net_device, + open_list ); + if ( ! netdev ) + return NULL; + + assert ( netdev_is_open ( netdev ) ); + return netdev; +} + +/** + * Transmit network-layer packet + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v net_protocol Network-layer protocol + * @v ll_dest Destination link-layer address + * @v ll_source Source link-layer address + * @ret rc Return status code + * + * Prepends link-layer headers to the I/O buffer and transmits the + * packet via the specified network device. This function takes + * ownership of the I/O buffer. + */ +int net_tx ( struct io_buffer *iobuf, struct net_device *netdev, + struct net_protocol *net_protocol, const void *ll_dest, + const void *ll_source ) { + struct ll_protocol *ll_protocol = netdev->ll_protocol; + int rc; + + /* Add link-layer header */ + if ( ( rc = ll_protocol->push ( netdev, iobuf, ll_dest, ll_source, + net_protocol->net_proto ) ) != 0 ) { + /* Record error for diagnosis */ + netdev_tx_err ( netdev, iobuf, rc ); + return rc; + } + + /* Transmit packet */ + return netdev_tx ( netdev, iobuf ); +} + +/** + * Process received network-layer packet + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v net_proto Network-layer protocol, in network-byte order + * @v ll_dest Destination link-layer address + * @v ll_source Source link-layer address + * @v flags Packet flags + * @ret rc Return status code + */ +int net_rx ( struct io_buffer *iobuf, struct net_device *netdev, + uint16_t net_proto, const void *ll_dest, const void *ll_source, + unsigned int flags ) { + struct net_protocol *net_protocol; + + /* Hand off to network-layer protocol, if any */ + for_each_table_entry ( net_protocol, NET_PROTOCOLS ) { + if ( net_protocol->net_proto == net_proto ) + return net_protocol->rx ( iobuf, netdev, ll_dest, + ll_source, flags ); + } + + DBGC ( netdev, "NETDEV %s unknown network protocol %04x\n", + netdev->name, ntohs ( net_proto ) ); + free_iob ( iobuf ); + return -ENOTSUP; +} + +/** + * Poll the network stack + * + * This polls all interfaces for received packets, and processes + * packets from the RX queue. + */ +void net_poll ( void ) { + struct net_device *netdev; + struct io_buffer *iobuf; + struct ll_protocol *ll_protocol; + const void *ll_dest; + const void *ll_source; + uint16_t net_proto; + unsigned int flags; + int rc; + + /* Poll and process each network device */ + list_for_each_entry ( netdev, &net_devices, list ) { + + /* Poll for new packets */ + profile_start ( &net_poll_profiler ); + netdev_poll ( netdev ); + profile_stop ( &net_poll_profiler ); + + /* Leave received packets on the queue if receive + * queue processing is currently frozen. This will + * happen when the raw packets are to be manually + * dequeued using netdev_rx_dequeue(), rather than + * processed via the usual networking stack. + */ + if ( netdev_rx_frozen ( netdev ) ) + continue; + + /* Process all received packets */ + while ( ( iobuf = netdev_rx_dequeue ( netdev ) ) ) { + + DBGC2 ( netdev, "NETDEV %s processing %p (%p+%zx)\n", + netdev->name, iobuf, iobuf->data, + iob_len ( iobuf ) ); + profile_start ( &net_rx_profiler ); + + /* Remove link-layer header */ + ll_protocol = netdev->ll_protocol; + if ( ( rc = ll_protocol->pull ( netdev, iobuf, + &ll_dest, &ll_source, + &net_proto, + &flags ) ) != 0 ) { + free_iob ( iobuf ); + continue; + } + + /* Hand packet to network layer */ + if ( ( rc = net_rx ( iob_disown ( iobuf ), netdev, + net_proto, ll_dest, + ll_source, flags ) ) != 0 ) { + /* Record error for diagnosis */ + netdev_rx_err ( netdev, NULL, rc ); + } + profile_stop ( &net_rx_profiler ); + } + } +} + +/** + * Single-step the network stack + * + * @v process Network stack process + */ +static void net_step ( struct process *process __unused ) { + net_poll(); +} + +/** + * Get the VLAN tag (when VLAN support is not present) + * + * @v netdev Network device + * @ret tag 0, indicating that device is not a VLAN device + */ +__weak unsigned int vlan_tag ( struct net_device *netdev __unused ) { + return 0; +} + +/** + * Identify VLAN device (when VLAN support is not present) + * + * @v trunk Trunk network device + * @v tag VLAN tag + * @ret netdev VLAN device, if any + */ +__weak struct net_device * vlan_find ( struct net_device *trunk __unused, + unsigned int tag __unused ) { + return NULL; +} + +/** Networking stack process */ +PERMANENT_PROCESS ( net_process, net_step ); + +/** + * Discard some cached network device data + * + * @ret discarded Number of cached items discarded + */ +static unsigned int net_discard ( void ) { + struct net_device *netdev; + struct io_buffer *iobuf; + unsigned int discarded = 0; + + /* Try to drop one deferred TX packet from each network device */ + for_each_netdev ( netdev ) { + if ( ( iobuf = list_first_entry ( &netdev->tx_deferred, + struct io_buffer, + list ) ) != NULL ) { + + /* Discard first deferred packet */ + list_del ( &iobuf->list ); + free_iob ( iobuf ); + + /* Report discard */ + discarded++; + } + } + + return discarded; +} + +/** Network device cache discarder */ +struct cache_discarder net_discarder __cache_discarder ( CACHE_NORMAL ) = { + .discard = net_discard, +}; + +/** + * Find network device configurator + * + * @v name Name + * @ret configurator Network device configurator, or NULL + */ +struct net_device_configurator * find_netdev_configurator ( const char *name ) { + struct net_device_configurator *configurator; + + for_each_table_entry ( configurator, NET_DEVICE_CONFIGURATORS ) { + if ( strcmp ( configurator->name, name ) == 0 ) + return configurator; + } + return NULL; +} + +/** + * Start network device configuration + * + * @v netdev Network device + * @v configurator Network device configurator + * @ret rc Return status code + */ +int netdev_configure ( struct net_device *netdev, + struct net_device_configurator *configurator ) { + struct net_device_configuration *config = + netdev_configuration ( netdev, configurator ); + int rc; + + /* Check applicability of configurator */ + if ( ! netdev_configurator_applies ( netdev, configurator ) ) { + DBGC ( netdev, "NETDEV %s does not support configuration via " + "%s\n", netdev->name, configurator->name ); + return -ENOTSUP; + } + + /* Terminate any ongoing configuration */ + intf_restart ( &config->job, -ECANCELED ); + + /* Mark configuration as being in progress */ + config->rc = -EINPROGRESS_CONFIG; + + DBGC ( netdev, "NETDEV %s starting configuration via %s\n", + netdev->name, configurator->name ); + + /* Start configuration */ + if ( ( rc = configurator->start ( &config->job, netdev ) ) != 0 ) { + DBGC ( netdev, "NETDEV %s could not start configuration via " + "%s: %s\n", netdev->name, configurator->name, + strerror ( rc ) ); + config->rc = rc; + return rc; + } + + return 0; +} + +/** + * Start network device configuration via all supported configurators + * + * @v netdev Network device + * @ret rc Return status code + */ +int netdev_configure_all ( struct net_device *netdev ) { + struct net_device_configurator *configurator; + int rc; + + /* Start configuration for each configurator */ + for_each_table_entry ( configurator, NET_DEVICE_CONFIGURATORS ) { + + /* Skip any inapplicable configurators */ + if ( ! netdev_configurator_applies ( netdev, configurator ) ) + continue; + + /* Start configuration */ + if ( ( rc = netdev_configure ( netdev, configurator ) ) != 0 ) + return rc; + } + + return 0; +} + +/** + * Check if network device has a configuration with a specified status code + * + * @v netdev Network device + * @v rc Status code + * @ret has_rc Network device has a configuration with this status code + */ +static int netdev_has_configuration_rc ( struct net_device *netdev, int rc ) { + unsigned int num_configs; + unsigned int i; + + num_configs = table_num_entries ( NET_DEVICE_CONFIGURATORS ); + for ( i = 0 ; i < num_configs ; i++ ) { + if ( netdev->configs[i].rc == rc ) + return 1; + } + return 0; +} + +/** + * Check if network device configuration is in progress + * + * @v netdev Network device + * @ret is_in_progress Network device configuration is in progress + */ +int netdev_configuration_in_progress ( struct net_device *netdev ) { + + return netdev_has_configuration_rc ( netdev, -EINPROGRESS_CONFIG ); +} + +/** + * Check if network device has at least one successful configuration + * + * @v netdev Network device + * @v configurator Configurator + * @ret rc Return status code + */ +int netdev_configuration_ok ( struct net_device *netdev ) { + + return netdev_has_configuration_rc ( netdev, 0 ); +} diff --git a/qemu/roms/ipxe/src/net/nullnet.c b/qemu/roms/ipxe/src/net/nullnet.c new file mode 100644 index 000000000..4ac50f64b --- /dev/null +++ b/qemu/roms/ipxe/src/net/nullnet.c @@ -0,0 +1,61 @@ +/* + * Copyright (C) 2006 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <errno.h> +#include <ipxe/iobuf.h> +#include <ipxe/netdevice.h> + +/** @file + * + * Null network device + * + */ + +static int null_open ( struct net_device *netdev __unused ) { + return -ENODEV; +}; + +static void null_close ( struct net_device *netdev __unused ) { + /* Do nothing */ +}; + +static int null_transmit ( struct net_device *netdev __unused, + struct io_buffer *iobuf __unused ) { + return -ENODEV; +}; + +static void null_poll ( struct net_device *netdev __unused ) { + /* Do nothing */ +} + +static void null_irq ( struct net_device *netdev __unused, + int enable __unused ) { + /* Do nothing */ +} + +struct net_device_operations null_netdev_operations = { + .open = null_open, + .close = null_close, + .transmit = null_transmit, + .poll = null_poll, + .irq = null_irq, +}; diff --git a/qemu/roms/ipxe/src/net/oncrpc/mount.c b/qemu/roms/ipxe/src/net/oncrpc/mount.c new file mode 100644 index 000000000..8838a147c --- /dev/null +++ b/qemu/roms/ipxe/src/net/oncrpc/mount.c @@ -0,0 +1,119 @@ +/* + * Copyright (C) 2013 Marin Hannache <ipxe@mareo.fr>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <errno.h> +#include <libgen.h> +#include <byteswap.h> +#include <ipxe/time.h> +#include <ipxe/iobuf.h> +#include <ipxe/open.h> +#include <ipxe/features.h> +#include <ipxe/oncrpc.h> +#include <ipxe/oncrpc_iob.h> +#include <ipxe/nfs.h> +#include <ipxe/mount.h> + +/** @file + * + * NFS MOUNT protocol + * + */ + +/** MNT procedure number */ +#define MOUNT_MNT 1 +/** UMNT procedure number */ +#define MOUNT_UMNT 3 + +/** + * Send a MNT request + * + * @v intf Interface to send the request on + * @v session ONC RPC session + * @v mountpoinrt The path of the directory to mount. + * @ret rc Return status code + */ +int mount_mnt ( struct interface *intf, struct oncrpc_session *session, + const char *mountpoint ) { + struct oncrpc_field fields[] = { + ONCRPC_FIELD ( str, mountpoint ), + ONCRPC_FIELD_END, + }; + + return oncrpc_call ( intf, session, MOUNT_MNT, fields ); +} + +/** + * Send a UMNT request + * + * @v intf Interface to send the request on + * @v session ONC RPC session + * @v mountpoinrt The path of the directory to unmount. + * @ret rc Return status code + */ +int mount_umnt ( struct interface *intf, struct oncrpc_session *session, + const char *mountpoint ) { + struct oncrpc_field fields[] = { + ONCRPC_FIELD ( str, mountpoint ), + ONCRPC_FIELD_END, + }; + + return oncrpc_call ( intf, session, MOUNT_UMNT, fields ); +} + +/** + * Parse an MNT reply + * + * @v mnt_reply A structure where the data will be saved + * @v reply The ONC RPC reply to get data from + * @ret rc Return status code + */ +int mount_get_mnt_reply ( struct mount_mnt_reply *mnt_reply, + struct oncrpc_reply *reply ) { + if ( ! mnt_reply || ! reply ) + return -EINVAL; + + mnt_reply->status = oncrpc_iob_get_int ( reply->data ); + + switch ( mnt_reply->status ) + { + case MNT3_OK: + break; + case MNT3ERR_NOENT: + return -ENOENT; + case MNT3ERR_IO: + return -EIO; + case MNT3ERR_ACCES: + return -EACCES; + case MNT3ERR_NOTDIR: + return -ENOTDIR; + case MNT3ERR_NAMETOOLONG: + return -ENAMETOOLONG; + default: + return -EPROTO; + } + + nfs_iob_get_fh ( reply->data, &mnt_reply->fh ); + + return 0; +} diff --git a/qemu/roms/ipxe/src/net/oncrpc/nfs.c b/qemu/roms/ipxe/src/net/oncrpc/nfs.c new file mode 100644 index 000000000..b6118f91a --- /dev/null +++ b/qemu/roms/ipxe/src/net/oncrpc/nfs.c @@ -0,0 +1,288 @@ +/* + * Copyright (C) 2013 Marin Hannache <ipxe@mareo.fr>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <errno.h> +#include <libgen.h> +#include <byteswap.h> +#include <ipxe/time.h> +#include <ipxe/iobuf.h> +#include <ipxe/open.h> +#include <ipxe/features.h> +#include <ipxe/nfs.h> +#include <ipxe/oncrpc.h> +#include <ipxe/oncrpc_iob.h> +#include <ipxe/portmap.h> +#include <ipxe/mount.h> +#include <ipxe/settings.h> + +/** @file + * + * Network File System protocol + * + */ + +/** NFS LOOKUP procedure */ +#define NFS_LOOKUP 3 +/** NFS READLINK procedure */ +#define NFS_READLINK 5 +/** NFS READ procedure */ +#define NFS_READ 6 + +/** + * Extract a file handle from the beginning of an I/O buffer + * + * @v io_buf I/O buffer + * @v fh File handle + * @ret size Size of the data read + */ +size_t nfs_iob_get_fh ( struct io_buffer *io_buf, struct nfs_fh *fh ) { + fh->size = oncrpc_iob_get_int ( io_buf ); + + if ( fh->size > 64 ) + return sizeof ( uint32_t ); + + memcpy (fh->fh, io_buf->data, fh->size ); + iob_pull ( io_buf, fh->size ); + + return fh->size + sizeof ( uint32_t ); +} + +/** + * Add a file handle to the end of an I/O buffer + * + * @v io_buf I/O buffer + * @v fh File handle + * @ret size Size of the data written + */ +size_t nfs_iob_add_fh ( struct io_buffer *io_buf, const struct nfs_fh *fh ) { + size_t s; + + s = oncrpc_iob_add_int ( io_buf, fh->size ); + memcpy ( iob_put ( io_buf, fh->size ), &fh->fh, fh->size ); + + return s + fh->size; +} + +/** + * Send a LOOKUP request + * + * @v intf Interface to send the request on + * @v session ONC RPC session + * @v fh The file handle of the the directory + * @v filename The file name + * @ret rc Return status code + */ +int nfs_lookup ( struct interface *intf, struct oncrpc_session *session, + const struct nfs_fh *fh, const char *filename ) { + struct oncrpc_field fields[] = { + ONCRPC_SUBFIELD ( array, fh->size, &fh->fh ), + ONCRPC_FIELD ( str, filename ), + ONCRPC_FIELD_END, + }; + + return oncrpc_call ( intf, session, NFS_LOOKUP, fields ); +} + +/** + * Send a READLINK request + * + * @v intf Interface to send the request on + * @v session ONC RPC session + * @v fh The symlink file handle + * @ret rc Return status code + */ +int nfs_readlink ( struct interface *intf, struct oncrpc_session *session, + const struct nfs_fh *fh ) { + struct oncrpc_field fields[] = { + ONCRPC_SUBFIELD ( array, fh->size, &fh->fh ), + ONCRPC_FIELD_END, + }; + + return oncrpc_call ( intf, session, NFS_READLINK, fields ); +} + +/** + * Send a READ request + * + * @v intf Interface to send the request on + * @v session ONC RPC session + * @v fh The file handle + * @v offset Offset + * @v count Byte count + * @ret rc Return status code + */ +int nfs_read ( struct interface *intf, struct oncrpc_session *session, + const struct nfs_fh *fh, uint64_t offset, uint32_t count ) { + struct oncrpc_field fields[] = { + ONCRPC_SUBFIELD ( array, fh->size, &fh->fh ), + ONCRPC_FIELD ( int64, offset ), + ONCRPC_FIELD ( int32, count ), + ONCRPC_FIELD_END, + }; + + return oncrpc_call ( intf, session, NFS_READ, fields ); +} + +/** + * Parse a LOOKUP reply + * + * @v lookup_reply A structure where the data will be saved + * @v reply The ONC RPC reply to get data from + * @ret rc Return status code + */ +int nfs_get_lookup_reply ( struct nfs_lookup_reply *lookup_reply, + struct oncrpc_reply *reply ) { + if ( ! lookup_reply || ! reply ) + return -EINVAL; + + lookup_reply->status = oncrpc_iob_get_int ( reply->data ); + switch ( lookup_reply->status ) + { + case NFS3_OK: + break; + case NFS3ERR_PERM: + return -EPERM; + case NFS3ERR_NOENT: + return -ENOENT; + case NFS3ERR_IO: + return -EIO; + case NFS3ERR_ACCES: + return -EACCES; + case NFS3ERR_NOTDIR: + return -ENOTDIR; + case NFS3ERR_NAMETOOLONG: + return -ENAMETOOLONG; + case NFS3ERR_STALE: + return -ESTALE; + case NFS3ERR_BADHANDLE: + case NFS3ERR_SERVERFAULT: + default: + return -EPROTO; + } + + nfs_iob_get_fh ( reply->data, &lookup_reply->fh ); + + if ( oncrpc_iob_get_int ( reply->data ) == 1 ) + lookup_reply->ent_type = oncrpc_iob_get_int ( reply->data ); + + return 0; +} +/** + * Parse a READLINK reply + * + * @v readlink_reply A structure where the data will be saved + * @v reply The ONC RPC reply to get data from + * @ret rc Return status code + */ +int nfs_get_readlink_reply ( struct nfs_readlink_reply *readlink_reply, + struct oncrpc_reply *reply ) { + if ( ! readlink_reply || ! reply ) + return -EINVAL; + + readlink_reply->status = oncrpc_iob_get_int ( reply->data ); + switch ( readlink_reply->status ) + { + case NFS3_OK: + break; + case NFS3ERR_IO: + return -EIO; + case NFS3ERR_ACCES: + return -EACCES; + case NFS3ERR_INVAL: + return -EINVAL; + case NFS3ERR_NOTSUPP: + return -ENOTSUP; + case NFS3ERR_STALE: + return -ESTALE; + case NFS3ERR_BADHANDLE: + case NFS3ERR_SERVERFAULT: + default: + return -EPROTO; + } + + if ( oncrpc_iob_get_int ( reply->data ) == 1 ) + iob_pull ( reply->data, 5 * sizeof ( uint32_t ) + + 8 * sizeof ( uint64_t ) ); + + readlink_reply->path_len = oncrpc_iob_get_int ( reply->data ); + readlink_reply->path = reply->data->data; + + return 0; +} + +/** + * Parse a READ reply + * + * @v read_reply A structure where the data will be saved + * @v reply The ONC RPC reply to get data from + * @ret rc Return status code + */ +int nfs_get_read_reply ( struct nfs_read_reply *read_reply, + struct oncrpc_reply *reply ) { + if ( ! read_reply || ! reply ) + return -EINVAL; + + read_reply->status = oncrpc_iob_get_int ( reply->data ); + switch ( read_reply->status ) + { + case NFS3_OK: + break; + case NFS3ERR_PERM: + return -EPERM; + case NFS3ERR_NOENT: + return -ENOENT; + case NFS3ERR_IO: + return -EIO; + case NFS3ERR_NXIO: + return -ENXIO; + case NFS3ERR_ACCES: + return -EACCES; + case NFS3ERR_INVAL: + return -EINVAL; + case NFS3ERR_STALE: + return -ESTALE; + case NFS3ERR_BADHANDLE: + case NFS3ERR_SERVERFAULT: + default: + return -EPROTO; + } + + if ( oncrpc_iob_get_int ( reply->data ) == 1 ) + { + iob_pull ( reply->data, 5 * sizeof ( uint32_t ) ); + read_reply->filesize = oncrpc_iob_get_int64 ( reply->data ); + iob_pull ( reply->data, 7 * sizeof ( uint64_t ) ); + } + + read_reply->count = oncrpc_iob_get_int ( reply->data ); + read_reply->eof = oncrpc_iob_get_int ( reply->data ); + read_reply->data_len = oncrpc_iob_get_int ( reply->data ); + read_reply->data = reply->data->data; + + if ( read_reply->count != read_reply->data_len ) + return -EPROTO; + + return 0; +} + diff --git a/qemu/roms/ipxe/src/net/oncrpc/nfs_open.c b/qemu/roms/ipxe/src/net/oncrpc/nfs_open.c new file mode 100644 index 000000000..c0dceb82f --- /dev/null +++ b/qemu/roms/ipxe/src/net/oncrpc/nfs_open.c @@ -0,0 +1,683 @@ +/* + * Copyright (C) 2013 Marin Hannache <ipxe@mareo.fr>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <errno.h> +#include <libgen.h> +#include <byteswap.h> +#include <ipxe/time.h> +#include <ipxe/socket.h> +#include <ipxe/tcpip.h> +#include <ipxe/in.h> +#include <ipxe/iobuf.h> +#include <ipxe/xfer.h> +#include <ipxe/open.h> +#include <ipxe/uri.h> +#include <ipxe/features.h> +#include <ipxe/nfs.h> +#include <ipxe/nfs_open.h> +#include <ipxe/oncrpc.h> +#include <ipxe/oncrpc_iob.h> +#include <ipxe/portmap.h> +#include <ipxe/mount.h> +#include <ipxe/nfs_uri.h> + +/** @file + * + * Network File System protocol + * + */ + +FEATURE ( FEATURE_PROTOCOL, "NFS", DHCP_EB_FEATURE_NFS, 1 ); + +#define NFS_RSIZE 100000 + +enum nfs_pm_state { + NFS_PORTMAP_NONE = 0, + NFS_PORTMAP_MOUNTPORT, + NFS_PORTMAP_NFSPORT, + MFS_PORTMAP_CLOSED, +}; + +enum nfs_mount_state { + NFS_MOUNT_NONE = 0, + NFS_MOUNT_MNT, + NFS_MOUNT_UMNT, + NFS_MOUNT_CLOSED, +}; + +enum nfs_state { + NFS_NONE = 0, + NFS_LOOKUP, + NFS_LOOKUP_SENT, + NFS_READLINK, + NFS_READLINK_SENT, + NFS_READ, + NFS_READ_SENT, + NFS_CLOSED, +}; + +/** + * A NFS request + * + */ +struct nfs_request { + /** Reference counter */ + struct refcnt refcnt; + /** Data transfer interface */ + struct interface xfer; + + struct interface pm_intf; + struct interface mount_intf; + struct interface nfs_intf; + + enum nfs_pm_state pm_state; + enum nfs_mount_state mount_state; + enum nfs_state nfs_state; + + struct oncrpc_session pm_session; + struct oncrpc_session mount_session; + struct oncrpc_session nfs_session; + + struct oncrpc_cred_sys auth_sys; + + char * hostname; + struct nfs_uri uri; + + struct nfs_fh readlink_fh; + struct nfs_fh current_fh; + uint64_t file_offset; + + size_t remaining; + int eof; +}; + +static void nfs_step ( struct nfs_request *nfs ); + +/** + * Free NFS request + * + * @v refcnt Reference counter + */ +static void nfs_free ( struct refcnt *refcnt ) { + struct nfs_request *nfs; + + nfs = container_of ( refcnt, struct nfs_request, refcnt ); + DBGC ( nfs, "NFS_OPEN %p freed\n", nfs ); + + nfs_uri_free ( &nfs->uri ); + + free ( nfs->hostname ); + free ( nfs->auth_sys.hostname ); + free ( nfs ); +} + +/** + * Mark NFS operation as complete + * + * @v nfs NFS request + * @v rc Return status code + */ +static void nfs_done ( struct nfs_request *nfs, int rc ) { + if ( rc == 0 && nfs->nfs_state != NFS_CLOSED ) + rc = -ECONNRESET; + + DBGC ( nfs, "NFS_OPEN %p completed (%s)\n", nfs, strerror ( rc ) ); + + intf_shutdown ( &nfs->xfer, rc ); + intf_shutdown ( &nfs->pm_intf, rc ); + intf_shutdown ( &nfs->mount_intf, rc ); + intf_shutdown ( &nfs->nfs_intf, rc ); +} + +static int nfs_connect ( struct interface *intf, uint16_t port, + const char *hostname ) { + struct sockaddr_tcpip peer; + struct sockaddr_tcpip local; + + if ( ! intf || ! hostname || ! port ) + return -EINVAL; + + memset ( &peer, 0, sizeof ( peer ) ); + memset ( &local, 0, sizeof ( local ) ); + peer.st_port = htons ( port ); + + /* Use a local port < 1024 to avoid using the 'insecure' option in + * /etc/exports file. */ + local.st_flags = TCPIP_BIND_PRIVILEGED; + + return xfer_open_named_socket ( intf, SOCK_STREAM, + ( struct sockaddr * ) &peer, hostname, + ( struct sockaddr * ) &local ); +} + +static void nfs_pm_step ( struct nfs_request *nfs ) { + int rc; + + if ( ! xfer_window ( &nfs->pm_intf ) ) + return; + + if ( nfs->pm_state == NFS_PORTMAP_NONE ) { + DBGC ( nfs, "NFS_OPEN %p GETPORT call (mount)\n", nfs ); + + rc = portmap_getport ( &nfs->pm_intf, &nfs->pm_session, + ONCRPC_MOUNT, MOUNT_VERS, + PORTMAP_PROTO_TCP ); + if ( rc != 0 ) + goto err; + + nfs->pm_state++; + return; + } + + if ( nfs->pm_state == NFS_PORTMAP_NFSPORT ) { + DBGC ( nfs, "NFS_OPEN %p GETPORT call (nfs)\n", nfs ); + + rc = portmap_getport ( &nfs->pm_intf, &nfs->pm_session, + ONCRPC_NFS, NFS_VERS, + PORTMAP_PROTO_TCP ); + if ( rc != 0 ) + goto err; + + return; + } + + return; +err: + nfs_done ( nfs, rc ); +} + +static int nfs_pm_deliver ( struct nfs_request *nfs, + struct io_buffer *io_buf, + struct xfer_metadata *meta __unused ) { + int rc; + struct oncrpc_reply reply; + struct portmap_getport_reply getport_reply; + + oncrpc_get_reply ( &nfs->pm_session, &reply, io_buf ); + if ( reply.accept_state != 0 ) + { + rc = -EPROTO; + goto err; + } + + if ( nfs->pm_state == NFS_PORTMAP_MOUNTPORT ) { + DBGC ( nfs, "NFS_OPEN %p got GETPORT reply (mount)\n", nfs ); + + rc = portmap_get_getport_reply ( &getport_reply, &reply ); + if ( rc != 0 ) + goto err; + + rc = nfs_connect ( &nfs->mount_intf, getport_reply.port, + nfs->hostname ); + if ( rc != 0 ) + goto err; + + nfs->pm_state++; + nfs_pm_step ( nfs ); + + goto done; + } + + if ( nfs->pm_state == NFS_PORTMAP_NFSPORT ) { + DBGC ( nfs, "NFS_OPEN %p got GETPORT reply (nfs)\n", nfs ); + + rc = portmap_get_getport_reply ( &getport_reply, &reply ); + if ( rc != 0 ) + goto err; + + rc = nfs_connect ( &nfs->nfs_intf, getport_reply.port, + nfs->hostname ); + if ( rc != 0 ) + goto err; + + intf_shutdown ( &nfs->pm_intf, 0 ); + nfs->pm_state++; + + goto done; + } + + rc = -EPROTO; +err: + nfs_done ( nfs, rc ); +done: + free_iob ( io_buf ); + return 0; +} + +static void nfs_mount_step ( struct nfs_request *nfs ) { + int rc; + + if ( ! xfer_window ( &nfs->mount_intf ) ) + return; + + if ( nfs->mount_state == NFS_MOUNT_NONE ) { + DBGC ( nfs, "NFS_OPEN %p MNT call (%s)\n", nfs, + nfs_uri_mountpoint ( &nfs->uri ) ); + + rc = mount_mnt ( &nfs->mount_intf, &nfs->mount_session, + nfs_uri_mountpoint ( &nfs->uri ) ); + if ( rc != 0 ) + goto err; + + nfs->mount_state++; + return; + } + + if ( nfs->mount_state == NFS_MOUNT_UMNT ) { + DBGC ( nfs, "NFS_OPEN %p UMNT call\n", nfs ); + + rc = mount_umnt ( &nfs->mount_intf, &nfs->mount_session, + nfs_uri_mountpoint ( &nfs->uri ) ); + if ( rc != 0 ) + goto err; + } + + return; +err: + nfs_done ( nfs, rc ); +} + +static int nfs_mount_deliver ( struct nfs_request *nfs, + struct io_buffer *io_buf, + struct xfer_metadata *meta __unused ) { + int rc; + struct oncrpc_reply reply; + struct mount_mnt_reply mnt_reply; + + oncrpc_get_reply ( &nfs->mount_session, &reply, io_buf ); + if ( reply.accept_state != 0 ) + { + rc = -EPROTO; + goto err; + } + + if ( nfs->mount_state == NFS_MOUNT_MNT ) { + DBGC ( nfs, "NFS_OPEN %p got MNT reply\n", nfs ); + rc = mount_get_mnt_reply ( &mnt_reply, &reply ); + if ( rc != 0 ) { + switch ( mnt_reply.status ) { + case MNT3ERR_NOTDIR: + case MNT3ERR_NOENT: + case MNT3ERR_ACCES: + break; + + default: + goto err; + } + + if ( ! strcmp ( nfs_uri_mountpoint ( &nfs->uri ), + "/" ) ) + goto err; + + if ( ( rc = nfs_uri_next_mountpoint ( &nfs->uri ) ) ) + goto err; + + DBGC ( nfs, "NFS_OPEN %p MNT failed retrying with " \ + "%s\n", nfs, nfs_uri_mountpoint ( &nfs->uri ) ); + + nfs->mount_state--; + nfs_mount_step ( nfs ); + + goto done; + } + + nfs->current_fh = mnt_reply.fh; + nfs->nfs_state = NFS_LOOKUP; + nfs_step ( nfs ); + + goto done; + } + + if ( nfs->mount_state == NFS_MOUNT_UMNT ) { + DBGC ( nfs, "NFS_OPEN %p got UMNT reply\n", nfs ); + nfs_done ( nfs, 0 ); + + goto done; + } + + rc = -EPROTO; +err: + nfs_done ( nfs, rc ); +done: + free_iob ( io_buf ); + return 0; +} + +static void nfs_step ( struct nfs_request *nfs ) { + int rc; + char *path_component; + + if ( ! xfer_window ( &nfs->nfs_intf ) ) + return; + + if ( nfs->nfs_state == NFS_LOOKUP ) { + path_component = nfs_uri_next_path_component ( &nfs->uri ); + + DBGC ( nfs, "NFS_OPEN %p LOOKUP call (%s)\n", nfs, + path_component ); + + rc = nfs_lookup ( &nfs->nfs_intf, &nfs->nfs_session, + &nfs->current_fh, path_component ); + if ( rc != 0 ) + goto err; + + nfs->nfs_state++; + return; + } + + + if ( nfs->nfs_state == NFS_READLINK ) { + DBGC ( nfs, "NFS_OPEN %p READLINK call\n", nfs ); + + rc = nfs_readlink ( &nfs->nfs_intf, &nfs->nfs_session, + &nfs->readlink_fh ); + if ( rc != 0 ) + goto err; + + nfs->nfs_state++; + return; + } + + if ( nfs->nfs_state == NFS_READ ) { + DBGC ( nfs, "NFS_OPEN %p READ call\n", nfs ); + + rc = nfs_read ( &nfs->nfs_intf, &nfs->nfs_session, + &nfs->current_fh, nfs->file_offset, + NFS_RSIZE ); + if ( rc != 0 ) + goto err; + + nfs->nfs_state++; + return; + } + + return; +err: + nfs_done ( nfs, rc ); +} + +static int nfs_deliver ( struct nfs_request *nfs, + struct io_buffer *io_buf, + struct xfer_metadata *meta __unused ) { + int rc; + struct oncrpc_reply reply; + + if ( nfs->remaining == 0 ) { + oncrpc_get_reply ( &nfs->nfs_session, &reply, io_buf ); + if ( reply.accept_state != 0 ) { + rc = -EPROTO; + goto err; + } + } + + if ( nfs->nfs_state == NFS_LOOKUP_SENT ) { + struct nfs_lookup_reply lookup_reply; + + DBGC ( nfs, "NFS_OPEN %p got LOOKUP reply\n", nfs ); + + rc = nfs_get_lookup_reply ( &lookup_reply, &reply ); + if ( rc != 0 ) + goto err; + + if ( lookup_reply.ent_type == NFS_ATTR_SYMLINK ) { + nfs->readlink_fh = lookup_reply.fh; + nfs->nfs_state = NFS_READLINK; + } else { + nfs->current_fh = lookup_reply.fh; + + if ( nfs->uri.lookup_pos[0] == '\0' ) + nfs->nfs_state = NFS_READ; + else + nfs->nfs_state--; + } + + nfs_step ( nfs ); + goto done; + } + + if ( nfs->nfs_state == NFS_READLINK_SENT ) { + char *path; + struct nfs_readlink_reply readlink_reply; + + DBGC ( nfs, "NFS_OPEN %p got READLINK reply\n", nfs ); + + rc = nfs_get_readlink_reply ( &readlink_reply, &reply ); + if ( rc != 0 ) + goto err; + + if ( readlink_reply.path_len == 0 ) + { + rc = -EINVAL; + goto err; + } + + if ( ! ( path = strndup ( readlink_reply.path, + readlink_reply.path_len ) ) ) + { + rc = -ENOMEM; + goto err; + } + + nfs_uri_symlink ( &nfs->uri, path ); + free ( path ); + + DBGC ( nfs, "NFS_OPEN %p new path: %s\n", nfs, + nfs->uri.path ); + + nfs->nfs_state = NFS_LOOKUP; + nfs_step ( nfs ); + goto done; + } + + if ( nfs->nfs_state == NFS_READ_SENT ) { + if ( nfs->remaining == 0 ) { + DBGC ( nfs, "NFS_OPEN %p got READ reply\n", nfs ); + + struct nfs_read_reply read_reply; + + rc = nfs_get_read_reply ( &read_reply, &reply ); + if ( rc != 0 ) + goto err; + + if ( nfs->file_offset == 0 ) { + DBGC2 ( nfs, "NFS_OPEN %p size: %llu bytes\n", + nfs, read_reply.filesize ); + + xfer_seek ( &nfs->xfer, read_reply.filesize ); + xfer_seek ( &nfs->xfer, 0 ); + } + + nfs->file_offset += read_reply.count; + nfs->remaining = read_reply.count; + nfs->eof = read_reply.eof; + } + + size_t len = iob_len ( io_buf ); + if ( len > nfs->remaining ) + iob_unput ( io_buf, len - nfs->remaining ); + + nfs->remaining -= iob_len ( io_buf ); + + DBGC ( nfs, "NFS_OPEN %p got %zd bytes\n", nfs, + iob_len ( io_buf ) ); + + rc = xfer_deliver_iob ( &nfs->xfer, iob_disown ( io_buf ) ); + if ( rc != 0 ) + goto err; + + if ( nfs->remaining == 0 ) { + if ( ! nfs->eof ) { + nfs->nfs_state--; + nfs_step ( nfs ); + } else { + intf_shutdown ( &nfs->nfs_intf, 0 ); + nfs->nfs_state++; + nfs->mount_state++; + nfs_mount_step ( nfs ); + } + } + + return 0; + } + + rc = -EPROTO; +err: + nfs_done ( nfs, rc ); +done: + free_iob ( io_buf ); + return 0; +} + +/***************************************************************************** + * Interfaces + * + */ + +static struct interface_operation nfs_xfer_operations[] = { + INTF_OP ( intf_close, struct nfs_request *, nfs_done ), +}; + +/** NFS data transfer interface descriptor */ +static struct interface_descriptor nfs_xfer_desc = + INTF_DESC ( struct nfs_request, xfer, nfs_xfer_operations ); + +static struct interface_operation nfs_pm_operations[] = { + INTF_OP ( intf_close, struct nfs_request *, nfs_done ), + INTF_OP ( xfer_deliver, struct nfs_request *, nfs_pm_deliver ), + INTF_OP ( xfer_window_changed, struct nfs_request *, nfs_pm_step ), +}; + +static struct interface_descriptor nfs_pm_desc = + INTF_DESC ( struct nfs_request, pm_intf, nfs_pm_operations ); + +static struct interface_operation nfs_mount_operations[] = { + INTF_OP ( intf_close, struct nfs_request *, nfs_done ), + INTF_OP ( xfer_deliver, struct nfs_request *, nfs_mount_deliver ), + INTF_OP ( xfer_window_changed, struct nfs_request *, nfs_mount_step ), +}; + +static struct interface_descriptor nfs_mount_desc = + INTF_DESC ( struct nfs_request, mount_intf, nfs_mount_operations ); + +static struct interface_operation nfs_operations[] = { + INTF_OP ( intf_close, struct nfs_request *, nfs_done ), + INTF_OP ( xfer_deliver, struct nfs_request *, nfs_deliver ), + INTF_OP ( xfer_window_changed, struct nfs_request *, nfs_step ), +}; + +static struct interface_descriptor nfs_desc = + INTF_DESC_PASSTHRU ( struct nfs_request, nfs_intf, nfs_operations, + xfer ); + +/***************************************************************************** + * + * URI opener + * + */ + +static int nfs_parse_uri ( struct nfs_request *nfs, const struct uri *uri ) { + int rc; + + if ( ! uri || ! uri->host || ! uri->path ) + return -EINVAL; + + if ( ( rc = nfs_uri_init ( &nfs->uri, uri ) ) != 0 ) + return rc; + + if ( ! ( nfs->hostname = strdup ( uri->host ) ) ) { + rc = -ENOMEM; + goto err_hostname; + } + + DBGC ( nfs, "NFS_OPEN %p URI parsed: (mountpoint=%s, path=%s)\n", + nfs, nfs_uri_mountpoint ( &nfs->uri), nfs->uri.path ); + + return 0; + +err_hostname: + nfs_uri_free ( &nfs->uri ); + return rc; +} + +/** + * Initiate a NFS connection + * + * @v xfer Data transfer interface + * @v uri Uniform Resource Identifier + * @ret rc Return status code + */ +static int nfs_open ( struct interface *xfer, struct uri *uri ) { + int rc; + struct nfs_request *nfs; + + nfs = zalloc ( sizeof ( *nfs ) ); + if ( ! nfs ) + return -ENOMEM; + + rc = nfs_parse_uri( nfs, uri ); + if ( rc != 0 ) + goto err_uri; + + rc = oncrpc_init_cred_sys ( &nfs->auth_sys ); + if ( rc != 0 ) + goto err_cred; + + ref_init ( &nfs->refcnt, nfs_free ); + intf_init ( &nfs->xfer, &nfs_xfer_desc, &nfs->refcnt ); + intf_init ( &nfs->pm_intf, &nfs_pm_desc, &nfs->refcnt ); + intf_init ( &nfs->mount_intf, &nfs_mount_desc, &nfs->refcnt ); + intf_init ( &nfs->nfs_intf, &nfs_desc, &nfs->refcnt ); + + portmap_init_session ( &nfs->pm_session, &nfs->auth_sys.credential ); + mount_init_session ( &nfs->mount_session, &nfs->auth_sys.credential ); + nfs_init_session ( &nfs->nfs_session, &nfs->auth_sys.credential ); + + DBGC ( nfs, "NFS_OPEN %p connecting to port mapper (%s:%d)...\n", nfs, + nfs->hostname, PORTMAP_PORT ); + + rc = nfs_connect ( &nfs->pm_intf, PORTMAP_PORT, nfs->hostname ); + if ( rc != 0 ) + goto err_connect; + + /* Attach to parent interface, mortalise self, and return */ + intf_plug_plug ( &nfs->xfer, xfer ); + ref_put ( &nfs->refcnt ); + + return 0; + +err_connect: + free ( nfs->auth_sys.hostname ); +err_cred: + nfs_uri_free ( &nfs->uri ); + free ( nfs->hostname ); +err_uri: + free ( nfs ); + return rc; +} + +/** NFS URI opener */ +struct uri_opener nfs_uri_opener __uri_opener = { + .scheme = "nfs", + .open = nfs_open, +}; diff --git a/qemu/roms/ipxe/src/net/oncrpc/nfs_uri.c b/qemu/roms/ipxe/src/net/oncrpc/nfs_uri.c new file mode 100644 index 000000000..c4c3f21e9 --- /dev/null +++ b/qemu/roms/ipxe/src/net/oncrpc/nfs_uri.c @@ -0,0 +1,148 @@ +/* + * Copyright (C) 2014 Marin Hannache <ipxe@mareo.fr>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <libgen.h> +#include <ipxe/nfs_uri.h> + +/** @file + * + * Network File System protocol URI handling functions + * + */ + +int nfs_uri_init ( struct nfs_uri *nfs_uri, const struct uri *uri ) { + if ( ! ( nfs_uri->mountpoint = strdup ( uri->path ) ) ) + return -ENOMEM; + + nfs_uri->filename = basename ( nfs_uri->mountpoint ); + if ( strchr ( uri->path, '/' ) != NULL ) + nfs_uri->mountpoint = dirname ( nfs_uri->mountpoint ); + + if ( nfs_uri->filename[0] == '\0' ) { + free ( nfs_uri->mountpoint ); + return -EINVAL; + } + + if ( ! ( nfs_uri->path = strdup ( nfs_uri->filename ) ) ) { + free ( nfs_uri->mountpoint ); + return -ENOMEM; + } + nfs_uri->lookup_pos = nfs_uri->path; + + return 0; +} + +char *nfs_uri_mountpoint ( const struct nfs_uri *uri ) { + if ( uri->mountpoint + 1 == uri->filename || + uri->mountpoint == uri->filename ) + return "/"; + + return uri->mountpoint; +} + +int nfs_uri_next_mountpoint ( struct nfs_uri *uri ) { + char *sep; + + if ( uri->mountpoint + 1 == uri->filename || + uri->mountpoint == uri->filename ) + return -ENOENT; + + sep = strrchr ( uri->mountpoint, '/' ); + uri->filename[-1] = '/'; + uri->filename = sep + 1; + *sep = '\0'; + + free ( uri->path ); + if ( ! ( uri->path = strdup ( uri->filename ) ) ) { + uri->path = NULL; + return -ENOMEM; + } + uri->lookup_pos = uri->path; + + return 0; +} + +int nfs_uri_symlink ( struct nfs_uri *uri, const char *symlink ) { + size_t len; + char *new_path; + + if ( ! uri->path ) + return -EINVAL; + + if ( *symlink == '/' ) + { + if ( strncmp ( symlink, uri->mountpoint, + strlen ( uri->mountpoint ) ) != 0 ) + return -EINVAL; + + len = strlen ( uri->lookup_pos ) + strlen ( symlink ) - \ + strlen ( uri->mountpoint ); + if ( ! ( new_path = malloc ( len * sizeof ( char ) ) ) ) + return -ENOMEM; + + strcpy ( new_path, symlink + strlen ( uri->mountpoint ) ); + strcpy ( new_path + strlen ( new_path ), uri->lookup_pos ); + + } else { + len = strlen ( uri->lookup_pos ) + strlen ( symlink ); + if ( ! ( new_path = malloc ( len * sizeof ( char ) ) ) ) + return -ENOMEM; + + + strcpy ( new_path, symlink ); + strcpy ( new_path + strlen ( new_path ), uri->lookup_pos ); + } + + free ( uri->path ); + uri->lookup_pos = uri->path = new_path; + + return 0; +} + +char *nfs_uri_next_path_component ( struct nfs_uri *uri ) { + char *sep; + char *start; + + if ( ! uri->path ) + return NULL; + + for ( sep = uri->lookup_pos ; *sep != '\0' && *sep != '/'; sep++ ) + ; + + start = uri->lookup_pos; + uri->lookup_pos = sep; + if ( *sep != '\0' ) { + uri->lookup_pos++; + *sep = '\0'; + if ( *start == '\0' ) + return nfs_uri_next_path_component ( uri ); + } + + return start; +} + +void nfs_uri_free ( struct nfs_uri *uri ) { + free ( uri->mountpoint ); + free ( uri->path ); + uri->mountpoint = NULL; + uri->path = NULL; +} diff --git a/qemu/roms/ipxe/src/net/oncrpc/oncrpc_iob.c b/qemu/roms/ipxe/src/net/oncrpc/oncrpc_iob.c new file mode 100644 index 000000000..be51805e7 --- /dev/null +++ b/qemu/roms/ipxe/src/net/oncrpc/oncrpc_iob.c @@ -0,0 +1,200 @@ +/* + * Copyright (C) 2013 Marin Hannache <ipxe@mareo.fr>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <errno.h> +#include <byteswap.h> +#include <ipxe/socket.h> +#include <ipxe/tcpip.h> +#include <ipxe/in.h> +#include <ipxe/iobuf.h> +#include <ipxe/xfer.h> +#include <ipxe/open.h> +#include <ipxe/uri.h> +#include <ipxe/features.h> +#include <ipxe/oncrpc.h> +#include <ipxe/oncrpc_iob.h> + +/** @file + * + * SUN ONC RPC protocol + * + */ + +size_t oncrpc_iob_add_fields ( struct io_buffer *io_buf, + const struct oncrpc_field fields[] ) { + size_t i; + size_t s = 0; + + struct oncrpc_field f; + + if ( ! io_buf ) + return 0; + + for ( i = 0; fields[i].type != oncrpc_none; i++ ) { + f = fields[i]; + switch ( f.type ) { + case oncrpc_int32: + s += oncrpc_iob_add_int ( io_buf, f.value.int32 ); + break; + + case oncrpc_int64: + s += oncrpc_iob_add_int64 ( io_buf, f.value.int64 ); + break; + + case oncrpc_str: + s += oncrpc_iob_add_string ( io_buf, f.value.str ); + break; + + case oncrpc_array: + s += oncrpc_iob_add_array ( io_buf, + f.value.array.length, + f.value.array.ptr ); + break; + + case oncrpc_intarray: + s += oncrpc_iob_add_intarray ( io_buf, + f.value.intarray.length, + f.value.intarray.ptr ); + break; + + case oncrpc_cred: + s += oncrpc_iob_add_cred ( io_buf, f.value.cred); + break; + + default: + return s; + } + } + + return s; +} + +/** + * Add an array of bytes to the end of an I/O buffer + * + * @v io_buf I/O buffer + * @v val String + * @ret size Size of the data written + * + * In the ONC RPC protocol, every data is four byte paded, we add padding when + * necessary by using oncrpc_align() + */ +size_t oncrpc_iob_add_array ( struct io_buffer *io_buf, size_t length, + const void *data ) { + size_t padding = oncrpc_align ( length ) - length; + + oncrpc_iob_add_int ( io_buf, length ); + memcpy ( iob_put ( io_buf, length ), data, length ); + memset ( iob_put ( io_buf, padding ), 0, padding ); + + return length + padding + sizeof ( uint32_t ); +} + +/** + * Add an int array to the end of an I/O buffer + * + * @v io_buf I/O buffer + * @v length Length od the array + * @v val Int array + * @ret size Size of the data written + */ +size_t oncrpc_iob_add_intarray ( struct io_buffer *io_buf, size_t length, + const uint32_t *array ) { + size_t i; + + oncrpc_iob_add_int ( io_buf, length ); + + for ( i = 0; i < length; ++i ) + oncrpc_iob_add_int ( io_buf, array[i] ); + + return ( ( length + 1 ) * sizeof ( uint32_t ) ); +} + +/** + * Add credential information to the end of an I/O buffer + * + * @v io_buf I/O buffer + * @v cred Credential information + * @ret size Size of the data written + */ +size_t oncrpc_iob_add_cred ( struct io_buffer *io_buf, + const struct oncrpc_cred *cred ) { + struct oncrpc_cred_sys *syscred; + size_t s; + + struct oncrpc_field credfields[] = { + ONCRPC_FIELD ( int32, cred->flavor ), + ONCRPC_FIELD ( int32, cred->length ), + ONCRPC_FIELD_END, + }; + + if ( ! io_buf || ! cred ) + return 0; + + s = oncrpc_iob_add_fields ( io_buf, credfields); + + switch ( cred->flavor ) { + case ONCRPC_AUTH_NONE: + break; + + case ONCRPC_AUTH_SYS: + syscred = container_of ( cred, struct oncrpc_cred_sys, + credential ); + + struct oncrpc_field syscredfields[] = { + ONCRPC_FIELD ( int32, syscred->stamp ), + ONCRPC_FIELD ( str, syscred->hostname ), + ONCRPC_FIELD ( int32, syscred->uid ), + ONCRPC_FIELD ( int32, syscred->gid ), + ONCRPC_SUBFIELD ( intarray, syscred->aux_gid_len, + syscred->aux_gid ), + ONCRPC_FIELD_END, + }; + + s += oncrpc_iob_add_fields ( io_buf, syscredfields ); + break; + } + + return s; +} + +/** + * Get credential information from the beginning of an I/O buffer + * + * @v io_buf I/O buffer + * @v cred Struct where the information will be saved + * @ret size Size of the data read + */ +size_t oncrpc_iob_get_cred ( struct io_buffer *io_buf, + struct oncrpc_cred *cred ) { + if ( cred == NULL ) + return * ( uint32_t * ) io_buf->data; + + cred->flavor = oncrpc_iob_get_int ( io_buf ); + cred->length = oncrpc_iob_get_int ( io_buf ); + + iob_pull ( io_buf, cred->length ); + + return ( 2 * sizeof ( uint32_t ) + cred->length ); +} diff --git a/qemu/roms/ipxe/src/net/oncrpc/portmap.c b/qemu/roms/ipxe/src/net/oncrpc/portmap.c new file mode 100644 index 000000000..df62221dc --- /dev/null +++ b/qemu/roms/ipxe/src/net/oncrpc/portmap.c @@ -0,0 +1,90 @@ +/* + * Copyright (C) 2013 Marin Hannache <ipxe@mareo.fr>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <errno.h> +#include <byteswap.h> +#include <ipxe/socket.h> +#include <ipxe/tcpip.h> +#include <ipxe/in.h> +#include <ipxe/iobuf.h> +#include <ipxe/xfer.h> +#include <ipxe/open.h> +#include <ipxe/uri.h> +#include <ipxe/features.h> +#include <ipxe/timer.h> +#include <ipxe/oncrpc.h> +#include <ipxe/oncrpc_iob.h> +#include <ipxe/portmap.h> + +/** @file + * + * PORTMAPPER protocol. + * + */ + +/** PORTMAP GETPORT procedure. */ +#define PORTMAP_GETPORT 3 + +/** + * Send a GETPORT request + * + * @v intf Interface to send the request on + * @v session ONC RPC session + * @v prog ONC RPC program number + * @v vers ONC RPC rogram version number + * @v proto Protocol (TCP or UDP) + * @ret rc Return status code + */ +int portmap_getport ( struct interface *intf, struct oncrpc_session *session, + uint32_t prog, uint32_t vers, uint32_t proto ) { + struct oncrpc_field fields[] = { + ONCRPC_FIELD ( int32, prog ), + ONCRPC_FIELD ( int32, vers ), + ONCRPC_FIELD ( int32, proto ), + ONCRPC_FIELD ( int32, 0 ), /* The port field is only meaningful + in GETPORT reply */ + ONCRPC_FIELD_END, + }; + + return oncrpc_call ( intf, session, PORTMAP_GETPORT, fields ); +} + +/** + * Parse a GETPORT reply + * + * @v getport_reply A structure where the data will be saved + * @v reply The ONC RPC reply to get data from + * @ret rc Return status code + */ +int portmap_get_getport_reply ( struct portmap_getport_reply *getport_reply, + struct oncrpc_reply *reply ) { + if ( ! getport_reply || ! reply ) + return -EINVAL; + + getport_reply->port = oncrpc_iob_get_int ( reply->data ); + if ( getport_reply == 0 || getport_reply->port >= 65536 ) + return -EINVAL; + + return 0; +} diff --git a/qemu/roms/ipxe/src/net/ping.c b/qemu/roms/ipxe/src/net/ping.c new file mode 100644 index 000000000..d9da87ade --- /dev/null +++ b/qemu/roms/ipxe/src/net/ping.c @@ -0,0 +1,273 @@ +/* + * Copyright (C) 2013 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <byteswap.h> +#include <ipxe/refcnt.h> +#include <ipxe/list.h> +#include <ipxe/iobuf.h> +#include <ipxe/tcpip.h> +#include <ipxe/icmp.h> +#include <ipxe/interface.h> +#include <ipxe/xfer.h> +#include <ipxe/open.h> +#include <ipxe/netdevice.h> +#include <ipxe/ping.h> + +/** @file + * + * ICMP ping protocol + * + */ + +/** + * A ping connection + * + */ +struct ping_connection { + /** Reference counter */ + struct refcnt refcnt; + /** List of ping connections */ + struct list_head list; + + /** Remote socket address */ + struct sockaddr_tcpip peer; + /** Local port number */ + uint16_t port; + + /** Data transfer interface */ + struct interface xfer; +}; + +/** List of registered ping connections */ +static LIST_HEAD ( ping_conns ); + +/** + * Identify ping connection by local port number + * + * @v port Local port number + * @ret ping Ping connection, or NULL + */ +static struct ping_connection * ping_demux ( unsigned int port ) { + struct ping_connection *ping; + + list_for_each_entry ( ping, &ping_conns, list ) { + if ( ping->port == port ) + return ping; + } + return NULL; +} + +/** + * Check if local port number is available + * + * @v port Local port number + * @ret port Local port number, or negative error + */ +static int ping_port_available ( int port ) { + + return ( ping_demux ( port ) ? -EADDRINUSE : port ); +} + +/** + * Process ICMP ping reply + * + * @v iobuf I/O buffer + * @v st_src Source address + * @ret rc Return status code + */ +int ping_rx ( struct io_buffer *iobuf, struct sockaddr_tcpip *st_src ) { + struct icmp_echo *echo = iobuf->data; + struct ping_connection *ping; + struct xfer_metadata meta; + int rc; + + /* Sanity check: should already have been checked by ICMP layer */ + assert ( iob_len ( iobuf ) >= sizeof ( *echo ) ); + + /* Identify connection */ + ping = ping_demux ( ntohs ( echo->ident ) ); + DBGC ( ping, "PING %p reply id %#04x seq %#04x\n", + ping, ntohs ( echo->ident ), ntohs ( echo->sequence ) ); + if ( ! ping ) { + rc = -ENOTCONN; + goto discard; + } + + /* Strip header, construct metadata, and pass data to upper layer */ + iob_pull ( iobuf, sizeof ( *echo ) ); + memset ( &meta, 0, sizeof ( meta ) ); + meta.src = ( ( struct sockaddr * ) st_src ); + meta.flags = XFER_FL_ABS_OFFSET; + meta.offset = ntohs ( echo->sequence ); + return xfer_deliver ( &ping->xfer, iob_disown ( iobuf ), &meta ); + + discard: + free_iob ( iobuf ); + return rc; +} + +/** + * Allocate I/O buffer for ping + * + * @v ping Ping connection + * @v len Payload size + * @ret iobuf I/O buffer, or NULL + */ +static struct io_buffer * +ping_alloc_iob ( struct ping_connection *ping __unused, size_t len ) { + size_t header_len; + struct io_buffer *iobuf; + + header_len = ( MAX_LL_NET_HEADER_LEN + sizeof ( struct icmp_echo ) ); + iobuf = alloc_iob ( header_len + len ); + if ( iobuf ) + iob_reserve ( iobuf, header_len ); + return iobuf; +} + +/** + * Deliver datagram as I/O buffer + * + * @v ping Ping connection + * @v iobuf I/O buffer + * @v meta Data transfer metadata + * @ret rc Return status code + */ +static int ping_deliver ( struct ping_connection *ping, struct io_buffer *iobuf, + struct xfer_metadata *meta ) { + struct icmp_echo *echo = iob_push ( iobuf, sizeof ( *echo ) ); + int rc; + + /* Construct header */ + memset ( echo, 0, sizeof ( *echo ) ); + echo->ident = htons ( ping->port ); + echo->sequence = htons ( meta->offset ); + + /* Transmit echo request */ + if ( ( rc = icmp_tx_echo_request ( iob_disown ( iobuf ), + &ping->peer ) ) != 0 ) { + DBGC ( ping, "PING %p could not transmit: %s\n", + ping, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Close ping connection + * + * @v ping Ping connection + * @v rc Reason for close + */ +static void ping_close ( struct ping_connection *ping, int rc ) { + + /* Close data transfer interface */ + intf_shutdown ( &ping->xfer, rc ); + + /* Remove from list of connections and drop list's reference */ + list_del ( &ping->list ); + ref_put ( &ping->refcnt ); + + DBGC ( ping, "PING %p closed\n", ping ); +} + +/** Ping data transfer interface operations */ +static struct interface_operation ping_xfer_operations[] = { + INTF_OP ( xfer_deliver, struct ping_connection *, ping_deliver ), + INTF_OP ( xfer_alloc_iob, struct ping_connection *, ping_alloc_iob ), + INTF_OP ( intf_close, struct ping_connection *, ping_close ), +}; + +/** Ping data transfer interface descriptor */ +static struct interface_descriptor ping_xfer_desc = + INTF_DESC ( struct ping_connection, xfer, ping_xfer_operations ); + +/** + * Open a ping connection + * + * @v xfer Data transfer interface + * @v peer Peer socket address + * @v local Local socket address, or NULL + * @ret rc Return status code + */ +static int ping_open ( struct interface *xfer, struct sockaddr *peer, + struct sockaddr *local ) { + struct sockaddr_tcpip *st_peer = ( struct sockaddr_tcpip * ) peer; + struct sockaddr_tcpip *st_local = ( struct sockaddr_tcpip * ) local; + struct ping_connection *ping; + int port; + int rc; + + /* Allocate and initialise structure */ + ping = zalloc ( sizeof ( *ping ) ); + if ( ! ping ) { + rc = -ENOMEM; + goto err_alloc; + } + DBGC ( ping, "PING %p allocated\n", ping ); + ref_init ( &ping->refcnt, NULL ); + intf_init ( &ping->xfer, &ping_xfer_desc, &ping->refcnt ); + memcpy ( &ping->peer, st_peer, sizeof ( ping->peer ) ); + + /* Bind to local port */ + port = tcpip_bind ( st_local, ping_port_available ); + if ( port < 0 ) { + rc = port; + DBGC ( ping, "PING %p could not bind: %s\n", + ping, strerror ( rc ) ); + goto err_bind; + } + ping->port = port; + DBGC ( ping, "PING %p bound to id %#04x\n", ping, port ); + + /* Attach parent interface, transfer reference to connection + * list, and return + */ + intf_plug_plug ( &ping->xfer, xfer ); + list_add ( &ping->list, &ping_conns ); + return 0; + + err_bind: + ref_put ( &ping->refcnt ); + err_alloc: + return rc; +} + +/** Ping IPv4 socket opener */ +struct socket_opener ping_ipv4_socket_opener __socket_opener = { + .semantics = PING_SOCK_ECHO, + .family = AF_INET, + .open = ping_open, +}; + +/** Ping IPv6 socket opener */ +struct socket_opener ping_ipv6_socket_opener __socket_opener = { + .semantics = PING_SOCK_ECHO, + .family = AF_INET6, + .open = ping_open, +}; + +/** Linkage hack */ +int ping_sock_echo = PING_SOCK_ECHO; diff --git a/qemu/roms/ipxe/src/net/rarp.c b/qemu/roms/ipxe/src/net/rarp.c new file mode 100644 index 000000000..371145015 --- /dev/null +++ b/qemu/roms/ipxe/src/net/rarp.c @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <byteswap.h> +#include <ipxe/netdevice.h> +#include <ipxe/iobuf.h> +#include <ipxe/if_ether.h> +#include <ipxe/rarp.h> + +/** @file + * + * Reverse Address Resolution Protocol + * + */ + +/** + * Process incoming ARP packets + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v ll_dest Link-layer destination address + * @v ll_source Link-layer source address + * @v flags Packet flags + * @ret rc Return status code + * + * This is a dummy method which simply discards RARP packets. + */ +static int rarp_rx ( struct io_buffer *iobuf, + struct net_device *netdev __unused, + const void *ll_dest __unused, + const void *ll_source __unused, + unsigned int flags __unused ) { + free_iob ( iobuf ); + return 0; +} + + +/** + * Transcribe RARP address + * + * @v net_addr RARP address + * @ret string "<RARP>" + * + * This operation is meaningless for the RARP protocol. + */ +static const char * rarp_ntoa ( const void *net_addr __unused ) { + return "<RARP>"; +} + +/** RARP protocol */ +struct net_protocol rarp_protocol __net_protocol = { + .name = "RARP", + .net_proto = htons ( ETH_P_RARP ), + .rx = rarp_rx, + .ntoa = rarp_ntoa, +}; diff --git a/qemu/roms/ipxe/src/net/retry.c b/qemu/roms/ipxe/src/net/retry.c new file mode 100644 index 000000000..8f210bdcc --- /dev/null +++ b/qemu/roms/ipxe/src/net/retry.c @@ -0,0 +1,212 @@ +/* + * Copyright (C) 2006 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stddef.h> +#include <ipxe/timer.h> +#include <ipxe/list.h> +#include <ipxe/process.h> +#include <ipxe/init.h> +#include <ipxe/retry.h> + +/** @file + * + * Retry timers + * + * A retry timer is a binary exponential backoff timer. It can be + * used to build automatic retransmission into network protocols. + * + * This implementation of the timer is designed to satisfy RFC 2988 + * and therefore be usable as a TCP retransmission timer. + * + * + */ + +/* The theoretical minimum that the algorithm in stop_timer() can + * adjust the timeout back down to is seven ticks, so set the minimum + * timeout to at least that value for the sake of consistency. + */ +#define MIN_TIMEOUT 7 + +/** List of running timers */ +static LIST_HEAD ( timers ); + +/** + * Start timer + * + * @v timer Retry timer + * + * This starts the timer running with the current timeout value. If + * stop_timer() is not called before the timer expires, the timer will + * be stopped and the timer's callback function will be called. + */ +void start_timer ( struct retry_timer *timer ) { + if ( ! timer->running ) { + list_add ( &timer->list, &timers ); + ref_get ( timer->refcnt ); + } + timer->start = currticks(); + timer->running = 1; + + /* 0 means "use default timeout" */ + if ( timer->min_timeout == 0 ) + timer->min_timeout = DEFAULT_MIN_TIMEOUT; + /* We must never be less than MIN_TIMEOUT under any circumstances */ + if ( timer->min_timeout < MIN_TIMEOUT ) + timer->min_timeout = MIN_TIMEOUT; + /* Honor user-specified minimum timeout */ + if ( timer->timeout < timer->min_timeout ) + timer->timeout = timer->min_timeout; + + DBG2 ( "Timer %p started at time %ld (expires at %ld)\n", + timer, timer->start, ( timer->start + timer->timeout ) ); +} + +/** + * Start timer with a specified fixed timeout + * + * @v timer Retry timer + * @v timeout Timeout, in ticks + */ +void start_timer_fixed ( struct retry_timer *timer, unsigned long timeout ) { + start_timer ( timer ); + timer->timeout = timeout; + DBG2 ( "Timer %p expiry time changed to %ld\n", + timer, ( timer->start + timer->timeout ) ); +} + +/** + * Stop timer + * + * @v timer Retry timer + * + * This stops the timer and updates the timer's timeout value. + */ +void stop_timer ( struct retry_timer *timer ) { + unsigned long old_timeout = timer->timeout; + unsigned long now = currticks(); + unsigned long runtime; + + /* If timer was already stopped, do nothing */ + if ( ! timer->running ) + return; + + list_del ( &timer->list ); + runtime = ( now - timer->start ); + timer->running = 0; + DBG2 ( "Timer %p stopped at time %ld (ran for %ld)\n", + timer, now, runtime ); + + /* Update timer. Variables are: + * + * r = round-trip time estimate (i.e. runtime) + * t = timeout value (i.e. timer->timeout) + * s = smoothed round-trip time + * + * By choice, we set t = 4s, i.e. allow for four times the + * normal round-trip time to pass before retransmitting. + * + * We want to smooth according to s := ( 7 s + r ) / 8 + * + * Since we don't actually store s, this reduces to + * t := ( 7 t / 8 ) + ( r / 2 ) + * + */ + if ( timer->count ) { + timer->count--; + } else { + timer->timeout -= ( timer->timeout >> 3 ); + timer->timeout += ( runtime >> 1 ); + if ( timer->timeout != old_timeout ) { + DBG ( "Timer %p timeout updated to %ld\n", + timer, timer->timeout ); + } + } + + ref_put ( timer->refcnt ); +} + +/** + * Handle expired timer + * + * @v timer Retry timer + */ +static void timer_expired ( struct retry_timer *timer ) { + struct refcnt *refcnt = timer->refcnt; + int fail; + + /* Stop timer without performing RTT calculations */ + DBG2 ( "Timer %p stopped at time %ld on expiry\n", + timer, currticks() ); + assert ( timer->running ); + list_del ( &timer->list ); + timer->running = 0; + timer->count++; + + /* Back off the timeout value */ + timer->timeout <<= 1; + if ( timer->max_timeout == 0 ) /* 0 means "use default timeout" */ + timer->max_timeout = DEFAULT_MAX_TIMEOUT; + if ( ( fail = ( timer->timeout > timer->max_timeout ) ) ) + timer->timeout = timer->max_timeout; + DBG ( "Timer %p timeout backed off to %ld\n", + timer, timer->timeout ); + + /* Call expiry callback */ + timer->expired ( timer, fail ); + /* If refcnt is NULL, then timer may already have been freed */ + + ref_put ( refcnt ); +} + +/** + * Poll the retry timer list + * + */ +void retry_poll ( void ) { + struct retry_timer *timer; + unsigned long now = currticks(); + unsigned long used; + + /* Process at most one timer expiry. We cannot process + * multiple expiries in one pass, because one timer expiring + * may end up triggering another timer's deletion from the + * list. + */ + list_for_each_entry ( timer, &timers, list ) { + used = ( now - timer->start ); + if ( used >= timer->timeout ) { + timer_expired ( timer ); + break; + } + } +} + +/** + * Single-step the retry timer list + * + * @v process Retry timer process + */ +static void retry_step ( struct process *process __unused ) { + retry_poll(); +} + +/** Retry timer process */ +PERMANENT_PROCESS ( retry_process, retry_step ); diff --git a/qemu/roms/ipxe/src/net/socket.c b/qemu/roms/ipxe/src/net/socket.c new file mode 100644 index 000000000..24f6a0892 --- /dev/null +++ b/qemu/roms/ipxe/src/net/socket.c @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2013 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stddef.h> +#include <errno.h> +#include <ipxe/socket.h> + +/** @file + * + * Sockets + * + */ + +/** + * Transcribe socket address + * + * @v sa Socket address + * @ret string Socket address string + */ +const char * sock_ntoa ( struct sockaddr *sa ) { + struct sockaddr_converter *converter; + + for_each_table_entry ( converter, SOCKADDR_CONVERTERS ) { + if ( converter->family == sa->sa_family ) + return converter->ntoa ( sa ); + } + return NULL; +} + +/** + * Parse socket address + * + * @v string Socket address string + * @v sa Socket address to fill in + * @ret rc Return status code + */ +int sock_aton ( const char *string, struct sockaddr *sa ) { + struct sockaddr_converter *converter; + + for_each_table_entry ( converter, SOCKADDR_CONVERTERS ) { + if ( converter->aton ( string, sa ) == 0 ) { + sa->sa_family = converter->family; + return 0; + } + } + return -EINVAL; +} diff --git a/qemu/roms/ipxe/src/net/tcp.c b/qemu/roms/ipxe/src/net/tcp.c new file mode 100644 index 000000000..987cb63e1 --- /dev/null +++ b/qemu/roms/ipxe/src/net/tcp.c @@ -0,0 +1,1494 @@ +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <assert.h> +#include <errno.h> +#include <byteswap.h> +#include <ipxe/timer.h> +#include <ipxe/iobuf.h> +#include <ipxe/malloc.h> +#include <ipxe/init.h> +#include <ipxe/retry.h> +#include <ipxe/refcnt.h> +#include <ipxe/pending.h> +#include <ipxe/xfer.h> +#include <ipxe/open.h> +#include <ipxe/uri.h> +#include <ipxe/netdevice.h> +#include <ipxe/profile.h> +#include <ipxe/process.h> +#include <ipxe/tcpip.h> +#include <ipxe/tcp.h> + +/** @file + * + * TCP protocol + * + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +/** A TCP connection */ +struct tcp_connection { + /** Reference counter */ + struct refcnt refcnt; + /** List of TCP connections */ + struct list_head list; + + /** Flags */ + unsigned int flags; + + /** Data transfer interface */ + struct interface xfer; + + /** Remote socket address */ + struct sockaddr_tcpip peer; + /** Local port */ + unsigned int local_port; + /** Maximum segment size */ + size_t mss; + + /** Current TCP state */ + unsigned int tcp_state; + /** Previous TCP state + * + * Maintained only for debug messages + */ + unsigned int prev_tcp_state; + /** Current sequence number + * + * Equivalent to SND.UNA in RFC 793 terminology. + */ + uint32_t snd_seq; + /** Unacknowledged sequence count + * + * Equivalent to (SND.NXT-SND.UNA) in RFC 793 terminology. + */ + uint32_t snd_sent; + /** Send window + * + * Equivalent to SND.WND in RFC 793 terminology + */ + uint32_t snd_win; + /** Current acknowledgement number + * + * Equivalent to RCV.NXT in RFC 793 terminology. + */ + uint32_t rcv_ack; + /** Receive window + * + * Equivalent to RCV.WND in RFC 793 terminology. + */ + uint32_t rcv_win; + /** Received timestamp value + * + * Updated when a packet is received; copied to ts_recent when + * the window is advanced. + */ + uint32_t ts_val; + /** Most recent received timestamp that advanced the window + * + * Equivalent to TS.Recent in RFC 1323 terminology. + */ + uint32_t ts_recent; + /** Send window scale + * + * Equivalent to Snd.Wind.Scale in RFC 1323 terminology + */ + uint8_t snd_win_scale; + /** Receive window scale + * + * Equivalent to Rcv.Wind.Scale in RFC 1323 terminology + */ + uint8_t rcv_win_scale; + /** Maximum receive window */ + uint32_t max_rcv_win; + + /** Transmit queue */ + struct list_head tx_queue; + /** Receive queue */ + struct list_head rx_queue; + /** Transmission process */ + struct process process; + /** Retransmission timer */ + struct retry_timer timer; + /** Shutdown (TIME_WAIT) timer */ + struct retry_timer wait; + + /** Pending operations for SYN and FIN */ + struct pending_operation pending_flags; + /** Pending operations for transmit queue */ + struct pending_operation pending_data; +}; + +/** TCP flags */ +enum tcp_flags { + /** TCP data transfer interface has been closed */ + TCP_XFER_CLOSED = 0x0001, + /** TCP timestamps are enabled */ + TCP_TS_ENABLED = 0x0002, + /** TCP acknowledgement is pending */ + TCP_ACK_PENDING = 0x0004, +}; + +/** TCP internal header + * + * This is the header that replaces the TCP header for packets + * enqueued on the receive queue. + */ +struct tcp_rx_queued_header { + /** SEQ value, in host-endian order + * + * This represents the SEQ value at the time the packet is + * enqueued, and so excludes the SYN, if present. + */ + uint32_t seq; + /** Flags + * + * Only FIN is valid within this flags byte; all other flags + * have already been processed by the time the packet is + * enqueued. + */ + uint8_t flags; + /** Reserved */ + uint8_t reserved[3]; +}; + +/** + * List of registered TCP connections + */ +static LIST_HEAD ( tcp_conns ); + +/** Transmit profiler */ +static struct profiler tcp_tx_profiler __profiler = { .name = "tcp.tx" }; + +/** Receive profiler */ +static struct profiler tcp_rx_profiler __profiler = { .name = "tcp.rx" }; + +/** Data transfer profiler */ +static struct profiler tcp_xfer_profiler __profiler = { .name = "tcp.xfer" }; + +/* Forward declarations */ +static struct process_descriptor tcp_process_desc; +static struct interface_descriptor tcp_xfer_desc; +static void tcp_expired ( struct retry_timer *timer, int over ); +static void tcp_wait_expired ( struct retry_timer *timer, int over ); +static struct tcp_connection * tcp_demux ( unsigned int local_port ); +static int tcp_rx_ack ( struct tcp_connection *tcp, uint32_t ack, + uint32_t win ); + +/** + * Name TCP state + * + * @v state TCP state + * @ret name Name of TCP state + */ +static inline __attribute__ (( always_inline )) const char * +tcp_state ( int state ) { + switch ( state ) { + case TCP_CLOSED: return "CLOSED"; + case TCP_LISTEN: return "LISTEN"; + case TCP_SYN_SENT: return "SYN_SENT"; + case TCP_SYN_RCVD: return "SYN_RCVD"; + case TCP_ESTABLISHED: return "ESTABLISHED"; + case TCP_FIN_WAIT_1: return "FIN_WAIT_1"; + case TCP_FIN_WAIT_2: return "FIN_WAIT_2"; + case TCP_CLOSING_OR_LAST_ACK: return "CLOSING/LAST_ACK"; + case TCP_TIME_WAIT: return "TIME_WAIT"; + case TCP_CLOSE_WAIT: return "CLOSE_WAIT"; + default: return "INVALID"; + } +} + +/** + * Dump TCP state transition + * + * @v tcp TCP connection + */ +static inline __attribute__ (( always_inline )) void +tcp_dump_state ( struct tcp_connection *tcp ) { + + if ( tcp->tcp_state != tcp->prev_tcp_state ) { + DBGC ( tcp, "TCP %p transitioned from %s to %s\n", tcp, + tcp_state ( tcp->prev_tcp_state ), + tcp_state ( tcp->tcp_state ) ); + } + tcp->prev_tcp_state = tcp->tcp_state; +} + +/** + * Dump TCP flags + * + * @v flags TCP flags + */ +static inline __attribute__ (( always_inline )) void +tcp_dump_flags ( struct tcp_connection *tcp, unsigned int flags ) { + if ( flags & TCP_RST ) + DBGC2 ( tcp, " RST" ); + if ( flags & TCP_SYN ) + DBGC2 ( tcp, " SYN" ); + if ( flags & TCP_PSH ) + DBGC2 ( tcp, " PSH" ); + if ( flags & TCP_FIN ) + DBGC2 ( tcp, " FIN" ); + if ( flags & TCP_ACK ) + DBGC2 ( tcp, " ACK" ); +} + +/*************************************************************************** + * + * Open and close + * + *************************************************************************** + */ + +/** + * Check if local TCP port is available + * + * @v port Local port number + * @ret port Local port number, or negative error + */ +static int tcp_port_available ( int port ) { + + return ( tcp_demux ( port ) ? -EADDRINUSE : port ); +} + +/** + * Open a TCP connection + * + * @v xfer Data transfer interface + * @v peer Peer socket address + * @v local Local socket address, or NULL + * @ret rc Return status code + */ +static int tcp_open ( struct interface *xfer, struct sockaddr *peer, + struct sockaddr *local ) { + struct sockaddr_tcpip *st_peer = ( struct sockaddr_tcpip * ) peer; + struct sockaddr_tcpip *st_local = ( struct sockaddr_tcpip * ) local; + struct tcp_connection *tcp; + size_t mtu; + int port; + int rc; + + /* Allocate and initialise structure */ + tcp = zalloc ( sizeof ( *tcp ) ); + if ( ! tcp ) + return -ENOMEM; + DBGC ( tcp, "TCP %p allocated\n", tcp ); + ref_init ( &tcp->refcnt, NULL ); + intf_init ( &tcp->xfer, &tcp_xfer_desc, &tcp->refcnt ); + process_init_stopped ( &tcp->process, &tcp_process_desc, &tcp->refcnt ); + timer_init ( &tcp->timer, tcp_expired, &tcp->refcnt ); + timer_init ( &tcp->wait, tcp_wait_expired, &tcp->refcnt ); + tcp->prev_tcp_state = TCP_CLOSED; + tcp->tcp_state = TCP_STATE_SENT ( TCP_SYN ); + tcp_dump_state ( tcp ); + tcp->snd_seq = random(); + tcp->max_rcv_win = TCP_MAX_WINDOW_SIZE; + INIT_LIST_HEAD ( &tcp->tx_queue ); + INIT_LIST_HEAD ( &tcp->rx_queue ); + memcpy ( &tcp->peer, st_peer, sizeof ( tcp->peer ) ); + + /* Calculate MSS */ + mtu = tcpip_mtu ( &tcp->peer ); + if ( ! mtu ) { + DBGC ( tcp, "TCP %p has no route to %s\n", + tcp, sock_ntoa ( peer ) ); + rc = -ENETUNREACH; + goto err; + } + tcp->mss = ( mtu - sizeof ( struct tcp_header ) ); + + /* Bind to local port */ + port = tcpip_bind ( st_local, tcp_port_available ); + if ( port < 0 ) { + rc = port; + DBGC ( tcp, "TCP %p could not bind: %s\n", + tcp, strerror ( rc ) ); + goto err; + } + tcp->local_port = port; + DBGC ( tcp, "TCP %p bound to port %d\n", tcp, tcp->local_port ); + + /* Start timer to initiate SYN */ + start_timer_nodelay ( &tcp->timer ); + + /* Add a pending operation for the SYN */ + pending_get ( &tcp->pending_flags ); + + /* Attach parent interface, transfer reference to connection + * list and return + */ + intf_plug_plug ( &tcp->xfer, xfer ); + list_add ( &tcp->list, &tcp_conns ); + return 0; + + err: + ref_put ( &tcp->refcnt ); + return rc; +} + +/** + * Close TCP connection + * + * @v tcp TCP connection + * @v rc Reason for close + * + * Closes the data transfer interface. If the TCP state machine is in + * a suitable state, the connection will be deleted. + */ +static void tcp_close ( struct tcp_connection *tcp, int rc ) { + struct io_buffer *iobuf; + struct io_buffer *tmp; + + /* Close data transfer interface */ + intf_shutdown ( &tcp->xfer, rc ); + tcp->flags |= TCP_XFER_CLOSED; + + /* If we are in CLOSED, or have otherwise not yet received a + * SYN (i.e. we are in LISTEN or SYN_SENT), just delete the + * connection. + */ + if ( ! ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) ) { + + /* Transition to CLOSED for the sake of debugging messages */ + tcp->tcp_state = TCP_CLOSED; + tcp_dump_state ( tcp ); + + /* Free any unprocessed I/O buffers */ + list_for_each_entry_safe ( iobuf, tmp, &tcp->rx_queue, list ) { + list_del ( &iobuf->list ); + free_iob ( iobuf ); + } + + /* Free any unsent I/O buffers */ + list_for_each_entry_safe ( iobuf, tmp, &tcp->tx_queue, list ) { + list_del ( &iobuf->list ); + free_iob ( iobuf ); + pending_put ( &tcp->pending_data ); + } + assert ( ! is_pending ( &tcp->pending_data ) ); + + /* Remove pending operations for SYN and FIN, if applicable */ + pending_put ( &tcp->pending_flags ); + pending_put ( &tcp->pending_flags ); + + /* Remove from list and drop reference */ + process_del ( &tcp->process ); + stop_timer ( &tcp->timer ); + stop_timer ( &tcp->wait ); + list_del ( &tcp->list ); + ref_put ( &tcp->refcnt ); + DBGC ( tcp, "TCP %p connection deleted\n", tcp ); + return; + } + + /* If we have not had our SYN acknowledged (i.e. we are in + * SYN_RCVD), pretend that it has been acknowledged so that we + * can send a FIN without breaking things. + */ + if ( ! ( tcp->tcp_state & TCP_STATE_ACKED ( TCP_SYN ) ) ) + tcp_rx_ack ( tcp, ( tcp->snd_seq + 1 ), 0 ); + + /* If we have no data remaining to send, start sending FIN */ + if ( list_empty ( &tcp->tx_queue ) && + ! ( tcp->tcp_state & TCP_STATE_SENT ( TCP_FIN ) ) ) { + + tcp->tcp_state |= TCP_STATE_SENT ( TCP_FIN ); + tcp_dump_state ( tcp ); + + /* Add a pending operation for the FIN */ + pending_get ( &tcp->pending_flags ); + } +} + +/*************************************************************************** + * + * Transmit data path + * + *************************************************************************** + */ + +/** + * Calculate transmission window + * + * @v tcp TCP connection + * @ret len Maximum length that can be sent in a single packet + */ +static size_t tcp_xmit_win ( struct tcp_connection *tcp ) { + size_t len; + + /* Not ready if we're not in a suitable connection state */ + if ( ! TCP_CAN_SEND_DATA ( tcp->tcp_state ) ) + return 0; + + /* Length is the minimum of the receiver's window and the path MTU */ + len = tcp->snd_win; + if ( len > TCP_PATH_MTU ) + len = TCP_PATH_MTU; + + return len; +} + +/** + * Check data-transfer flow control window + * + * @v tcp TCP connection + * @ret len Length of window + */ +static size_t tcp_xfer_window ( struct tcp_connection *tcp ) { + + /* Not ready if data queue is non-empty. This imposes a limit + * of only one unACKed packet in the TX queue at any time; we + * do this to conserve memory usage. + */ + if ( ! list_empty ( &tcp->tx_queue ) ) + return 0; + + /* Return TCP window length */ + return tcp_xmit_win ( tcp ); +} + +/** + * Process TCP transmit queue + * + * @v tcp TCP connection + * @v max_len Maximum length to process + * @v dest I/O buffer to fill with data, or NULL + * @v remove Remove data from queue + * @ret len Length of data processed + * + * This processes at most @c max_len bytes from the TCP connection's + * transmit queue. Data will be copied into the @c dest I/O buffer + * (if provided) and, if @c remove is true, removed from the transmit + * queue. + */ +static size_t tcp_process_tx_queue ( struct tcp_connection *tcp, size_t max_len, + struct io_buffer *dest, int remove ) { + struct io_buffer *iobuf; + struct io_buffer *tmp; + size_t frag_len; + size_t len = 0; + + list_for_each_entry_safe ( iobuf, tmp, &tcp->tx_queue, list ) { + frag_len = iob_len ( iobuf ); + if ( frag_len > max_len ) + frag_len = max_len; + if ( dest ) { + memcpy ( iob_put ( dest, frag_len ), iobuf->data, + frag_len ); + } + if ( remove ) { + iob_pull ( iobuf, frag_len ); + if ( ! iob_len ( iobuf ) ) { + list_del ( &iobuf->list ); + free_iob ( iobuf ); + pending_put ( &tcp->pending_data ); + } + } + len += frag_len; + max_len -= frag_len; + } + return len; +} + +/** + * Transmit any outstanding data + * + * @v tcp TCP connection + * + * Transmits any outstanding data on the connection. + * + * Note that even if an error is returned, the retransmission timer + * will have been started if necessary, and so the stack will + * eventually attempt to retransmit the failed packet. + */ +static void tcp_xmit ( struct tcp_connection *tcp ) { + struct io_buffer *iobuf; + struct tcp_header *tcphdr; + struct tcp_mss_option *mssopt; + struct tcp_window_scale_padded_option *wsopt; + struct tcp_timestamp_padded_option *tsopt; + void *payload; + unsigned int flags; + size_t len = 0; + uint32_t seq_len; + uint32_t app_win; + uint32_t max_rcv_win; + uint32_t max_representable_win; + int rc; + + /* Start profiling */ + profile_start ( &tcp_tx_profiler ); + + /* If retransmission timer is already running, do nothing */ + if ( timer_running ( &tcp->timer ) ) + return; + + /* Calculate both the actual (payload) and sequence space + * lengths that we wish to transmit. + */ + if ( TCP_CAN_SEND_DATA ( tcp->tcp_state ) ) { + len = tcp_process_tx_queue ( tcp, tcp_xmit_win ( tcp ), + NULL, 0 ); + } + seq_len = len; + flags = TCP_FLAGS_SENDING ( tcp->tcp_state ); + if ( flags & ( TCP_SYN | TCP_FIN ) ) { + /* SYN or FIN consume one byte, and we can never send both */ + assert ( ! ( ( flags & TCP_SYN ) && ( flags & TCP_FIN ) ) ); + seq_len++; + } + tcp->snd_sent = seq_len; + + /* If we have nothing to transmit, stop now */ + if ( ( seq_len == 0 ) && ! ( tcp->flags & TCP_ACK_PENDING ) ) + return; + + /* If we are transmitting anything that requires + * acknowledgement (i.e. consumes sequence space), start the + * retransmission timer. Do this before attempting to + * allocate the I/O buffer, in case allocation itself fails. + */ + if ( seq_len ) + start_timer ( &tcp->timer ); + + /* Allocate I/O buffer */ + iobuf = alloc_iob ( len + TCP_MAX_HEADER_LEN ); + if ( ! iobuf ) { + DBGC ( tcp, "TCP %p could not allocate iobuf for %08x..%08x " + "%08x\n", tcp, tcp->snd_seq, ( tcp->snd_seq + seq_len ), + tcp->rcv_ack ); + return; + } + iob_reserve ( iobuf, TCP_MAX_HEADER_LEN ); + + /* Fill data payload from transmit queue */ + tcp_process_tx_queue ( tcp, len, iobuf, 0 ); + + /* Expand receive window if possible */ + max_rcv_win = tcp->max_rcv_win; + app_win = xfer_window ( &tcp->xfer ); + if ( max_rcv_win > app_win ) + max_rcv_win = app_win; + max_representable_win = ( 0xffff << tcp->rcv_win_scale ); + if ( max_rcv_win > max_representable_win ) + max_rcv_win = max_representable_win; + max_rcv_win &= ~0x03; /* Keep everything dword-aligned */ + if ( tcp->rcv_win < max_rcv_win ) + tcp->rcv_win = max_rcv_win; + + /* Fill up the TCP header */ + payload = iobuf->data; + if ( flags & TCP_SYN ) { + mssopt = iob_push ( iobuf, sizeof ( *mssopt ) ); + mssopt->kind = TCP_OPTION_MSS; + mssopt->length = sizeof ( *mssopt ); + mssopt->mss = htons ( tcp->mss ); + wsopt = iob_push ( iobuf, sizeof ( *wsopt ) ); + wsopt->nop = TCP_OPTION_NOP; + wsopt->wsopt.kind = TCP_OPTION_WS; + wsopt->wsopt.length = sizeof ( wsopt->wsopt ); + wsopt->wsopt.scale = TCP_RX_WINDOW_SCALE; + } + if ( ( flags & TCP_SYN ) || ( tcp->flags & TCP_TS_ENABLED ) ) { + tsopt = iob_push ( iobuf, sizeof ( *tsopt ) ); + memset ( tsopt->nop, TCP_OPTION_NOP, sizeof ( tsopt->nop ) ); + tsopt->tsopt.kind = TCP_OPTION_TS; + tsopt->tsopt.length = sizeof ( tsopt->tsopt ); + tsopt->tsopt.tsval = htonl ( currticks() ); + tsopt->tsopt.tsecr = htonl ( tcp->ts_recent ); + } + if ( len != 0 ) + flags |= TCP_PSH; + tcphdr = iob_push ( iobuf, sizeof ( *tcphdr ) ); + memset ( tcphdr, 0, sizeof ( *tcphdr ) ); + tcphdr->src = htons ( tcp->local_port ); + tcphdr->dest = tcp->peer.st_port; + tcphdr->seq = htonl ( tcp->snd_seq ); + tcphdr->ack = htonl ( tcp->rcv_ack ); + tcphdr->hlen = ( ( payload - iobuf->data ) << 2 ); + tcphdr->flags = flags; + tcphdr->win = htons ( tcp->rcv_win >> tcp->rcv_win_scale ); + tcphdr->csum = tcpip_chksum ( iobuf->data, iob_len ( iobuf ) ); + + /* Dump header */ + DBGC2 ( tcp, "TCP %p TX %d->%d %08x..%08x %08x %4zd", + tcp, ntohs ( tcphdr->src ), ntohs ( tcphdr->dest ), + ntohl ( tcphdr->seq ), ( ntohl ( tcphdr->seq ) + seq_len ), + ntohl ( tcphdr->ack ), len ); + tcp_dump_flags ( tcp, tcphdr->flags ); + DBGC2 ( tcp, "\n" ); + + /* Transmit packet */ + if ( ( rc = tcpip_tx ( iobuf, &tcp_protocol, NULL, &tcp->peer, NULL, + &tcphdr->csum ) ) != 0 ) { + DBGC ( tcp, "TCP %p could not transmit %08x..%08x %08x: %s\n", + tcp, tcp->snd_seq, ( tcp->snd_seq + tcp->snd_sent ), + tcp->rcv_ack, strerror ( rc ) ); + return; + } + + /* Clear ACK-pending flag */ + tcp->flags &= ~TCP_ACK_PENDING; + + profile_stop ( &tcp_tx_profiler ); +} + +/** TCP process descriptor */ +static struct process_descriptor tcp_process_desc = + PROC_DESC_ONCE ( struct tcp_connection, process, tcp_xmit ); + +/** + * Retransmission timer expired + * + * @v timer Retransmission timer + * @v over Failure indicator + */ +static void tcp_expired ( struct retry_timer *timer, int over ) { + struct tcp_connection *tcp = + container_of ( timer, struct tcp_connection, timer ); + + DBGC ( tcp, "TCP %p timer %s in %s for %08x..%08x %08x\n", tcp, + ( over ? "expired" : "fired" ), tcp_state ( tcp->tcp_state ), + tcp->snd_seq, ( tcp->snd_seq + tcp->snd_sent ), tcp->rcv_ack ); + + assert ( ( tcp->tcp_state == TCP_SYN_SENT ) || + ( tcp->tcp_state == TCP_SYN_RCVD ) || + ( tcp->tcp_state == TCP_ESTABLISHED ) || + ( tcp->tcp_state == TCP_FIN_WAIT_1 ) || + ( tcp->tcp_state == TCP_CLOSE_WAIT ) || + ( tcp->tcp_state == TCP_CLOSING_OR_LAST_ACK ) ); + + if ( over ) { + /* If we have finally timed out and given up, + * terminate the connection + */ + tcp->tcp_state = TCP_CLOSED; + tcp_dump_state ( tcp ); + tcp_close ( tcp, -ETIMEDOUT ); + } else { + /* Otherwise, retransmit the packet */ + tcp_xmit ( tcp ); + } +} + +/** + * Shutdown timer expired + * + * @v timer Shutdown timer + * @v over Failure indicator + */ +static void tcp_wait_expired ( struct retry_timer *timer, int over __unused ) { + struct tcp_connection *tcp = + container_of ( timer, struct tcp_connection, wait ); + + assert ( tcp->tcp_state == TCP_TIME_WAIT ); + + DBGC ( tcp, "TCP %p wait complete in %s for %08x..%08x %08x\n", tcp, + tcp_state ( tcp->tcp_state ), tcp->snd_seq, + ( tcp->snd_seq + tcp->snd_sent ), tcp->rcv_ack ); + + tcp->tcp_state = TCP_CLOSED; + tcp_dump_state ( tcp ); + tcp_close ( tcp, 0 ); +} + +/** + * Send RST response to incoming packet + * + * @v in_tcphdr TCP header of incoming packet + * @ret rc Return status code + */ +static int tcp_xmit_reset ( struct tcp_connection *tcp, + struct sockaddr_tcpip *st_dest, + struct tcp_header *in_tcphdr ) { + struct io_buffer *iobuf; + struct tcp_header *tcphdr; + int rc; + + /* Allocate space for dataless TX buffer */ + iobuf = alloc_iob ( TCP_MAX_HEADER_LEN ); + if ( ! iobuf ) { + DBGC ( tcp, "TCP %p could not allocate iobuf for RST " + "%08x..%08x %08x\n", tcp, ntohl ( in_tcphdr->ack ), + ntohl ( in_tcphdr->ack ), ntohl ( in_tcphdr->seq ) ); + return -ENOMEM; + } + iob_reserve ( iobuf, TCP_MAX_HEADER_LEN ); + + /* Construct RST response */ + tcphdr = iob_push ( iobuf, sizeof ( *tcphdr ) ); + memset ( tcphdr, 0, sizeof ( *tcphdr ) ); + tcphdr->src = in_tcphdr->dest; + tcphdr->dest = in_tcphdr->src; + tcphdr->seq = in_tcphdr->ack; + tcphdr->ack = in_tcphdr->seq; + tcphdr->hlen = ( ( sizeof ( *tcphdr ) / 4 ) << 4 ); + tcphdr->flags = ( TCP_RST | TCP_ACK ); + tcphdr->win = htons ( 0 ); + tcphdr->csum = tcpip_chksum ( iobuf->data, iob_len ( iobuf ) ); + + /* Dump header */ + DBGC2 ( tcp, "TCP %p TX %d->%d %08x..%08x %08x %4d", + tcp, ntohs ( tcphdr->src ), ntohs ( tcphdr->dest ), + ntohl ( tcphdr->seq ), ( ntohl ( tcphdr->seq ) ), + ntohl ( tcphdr->ack ), 0 ); + tcp_dump_flags ( tcp, tcphdr->flags ); + DBGC2 ( tcp, "\n" ); + + /* Transmit packet */ + if ( ( rc = tcpip_tx ( iobuf, &tcp_protocol, NULL, st_dest, + NULL, &tcphdr->csum ) ) != 0 ) { + DBGC ( tcp, "TCP %p could not transmit RST %08x..%08x %08x: " + "%s\n", tcp, ntohl ( in_tcphdr->ack ), + ntohl ( in_tcphdr->ack ), ntohl ( in_tcphdr->seq ), + strerror ( rc ) ); + return rc; + } + + return 0; +} + +/*************************************************************************** + * + * Receive data path + * + *************************************************************************** + */ + +/** + * Identify TCP connection by local port number + * + * @v local_port Local port + * @ret tcp TCP connection, or NULL + */ +static struct tcp_connection * tcp_demux ( unsigned int local_port ) { + struct tcp_connection *tcp; + + list_for_each_entry ( tcp, &tcp_conns, list ) { + if ( tcp->local_port == local_port ) + return tcp; + } + return NULL; +} + +/** + * Parse TCP received options + * + * @v tcp TCP connection + * @v data Raw options data + * @v len Raw options length + * @v options Options structure to fill in + */ +static void tcp_rx_opts ( struct tcp_connection *tcp, const void *data, + size_t len, struct tcp_options *options ) { + const void *end = ( data + len ); + const struct tcp_option *option; + unsigned int kind; + + memset ( options, 0, sizeof ( *options ) ); + while ( data < end ) { + option = data; + kind = option->kind; + if ( kind == TCP_OPTION_END ) + return; + if ( kind == TCP_OPTION_NOP ) { + data++; + continue; + } + switch ( kind ) { + case TCP_OPTION_MSS: + options->mssopt = data; + break; + case TCP_OPTION_WS: + options->wsopt = data; + break; + case TCP_OPTION_TS: + options->tsopt = data; + break; + default: + DBGC ( tcp, "TCP %p received unknown option %d\n", + tcp, kind ); + break; + } + data += option->length; + } +} + +/** + * Consume received sequence space + * + * @v tcp TCP connection + * @v seq_len Sequence space length to consume + */ +static void tcp_rx_seq ( struct tcp_connection *tcp, uint32_t seq_len ) { + + /* Sanity check */ + assert ( seq_len > 0 ); + + /* Update acknowledgement number */ + tcp->rcv_ack += seq_len; + + /* Update window */ + if ( tcp->rcv_win > seq_len ) { + tcp->rcv_win -= seq_len; + } else { + tcp->rcv_win = 0; + } + + /* Update timestamp */ + tcp->ts_recent = tcp->ts_val; + + /* Mark ACK as pending */ + tcp->flags |= TCP_ACK_PENDING; +} + +/** + * Handle TCP received SYN + * + * @v tcp TCP connection + * @v seq SEQ value (in host-endian order) + * @v options TCP options + * @ret rc Return status code + */ +static int tcp_rx_syn ( struct tcp_connection *tcp, uint32_t seq, + struct tcp_options *options ) { + + /* Synchronise sequence numbers on first SYN */ + if ( ! ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) ) { + tcp->rcv_ack = seq; + if ( options->tsopt ) + tcp->flags |= TCP_TS_ENABLED; + if ( options->wsopt ) { + tcp->snd_win_scale = options->wsopt->scale; + tcp->rcv_win_scale = TCP_RX_WINDOW_SCALE; + } + } + + /* Ignore duplicate SYN */ + if ( seq != tcp->rcv_ack ) + return 0; + + /* Acknowledge SYN */ + tcp_rx_seq ( tcp, 1 ); + + /* Mark SYN as received and start sending ACKs with each packet */ + tcp->tcp_state |= ( TCP_STATE_SENT ( TCP_ACK ) | + TCP_STATE_RCVD ( TCP_SYN ) ); + + return 0; +} + +/** + * Handle TCP received ACK + * + * @v tcp TCP connection + * @v ack ACK value (in host-endian order) + * @v win WIN value (in host-endian order) + * @ret rc Return status code + */ +static int tcp_rx_ack ( struct tcp_connection *tcp, uint32_t ack, + uint32_t win ) { + uint32_t ack_len = ( ack - tcp->snd_seq ); + size_t len; + unsigned int acked_flags; + + /* Check for out-of-range or old duplicate ACKs */ + if ( ack_len > tcp->snd_sent ) { + DBGC ( tcp, "TCP %p received ACK for %08x..%08x, " + "sent only %08x..%08x\n", tcp, tcp->snd_seq, + ( tcp->snd_seq + ack_len ), tcp->snd_seq, + ( tcp->snd_seq + tcp->snd_sent ) ); + + if ( TCP_HAS_BEEN_ESTABLISHED ( tcp->tcp_state ) ) { + /* Just ignore what might be old duplicate ACKs */ + return 0; + } else { + /* Send RST if an out-of-range ACK is received + * on a not-yet-established connection, as per + * RFC 793. + */ + return -EINVAL; + } + } + + /* Update window size */ + tcp->snd_win = win; + + /* Ignore ACKs that don't actually acknowledge any new data. + * (In particular, do not stop the retransmission timer; this + * avoids creating a sorceror's apprentice syndrome when a + * duplicate ACK is received and we still have data in our + * transmit queue.) + */ + if ( ack_len == 0 ) + return 0; + + /* Stop the retransmission timer */ + stop_timer ( &tcp->timer ); + + /* Determine acknowledged flags and data length */ + len = ack_len; + acked_flags = ( TCP_FLAGS_SENDING ( tcp->tcp_state ) & + ( TCP_SYN | TCP_FIN ) ); + if ( acked_flags ) { + len--; + pending_put ( &tcp->pending_flags ); + } + + /* Update SEQ and sent counters */ + tcp->snd_seq = ack; + tcp->snd_sent = 0; + + /* Remove any acknowledged data from transmit queue */ + tcp_process_tx_queue ( tcp, len, NULL, 1 ); + + /* Mark SYN/FIN as acknowledged if applicable. */ + if ( acked_flags ) + tcp->tcp_state |= TCP_STATE_ACKED ( acked_flags ); + + /* Start sending FIN if we've had all possible data ACKed */ + if ( list_empty ( &tcp->tx_queue ) && + ( tcp->flags & TCP_XFER_CLOSED ) && + ! ( tcp->tcp_state & TCP_STATE_SENT ( TCP_FIN ) ) ) { + tcp->tcp_state |= TCP_STATE_SENT ( TCP_FIN ); + pending_get ( &tcp->pending_flags ); + } + + return 0; +} + +/** + * Handle TCP received data + * + * @v tcp TCP connection + * @v seq SEQ value (in host-endian order) + * @v iobuf I/O buffer + * @ret rc Return status code + * + * This function takes ownership of the I/O buffer. + */ +static int tcp_rx_data ( struct tcp_connection *tcp, uint32_t seq, + struct io_buffer *iobuf ) { + uint32_t already_rcvd; + uint32_t len; + int rc; + + /* Ignore duplicate or out-of-order data */ + already_rcvd = ( tcp->rcv_ack - seq ); + len = iob_len ( iobuf ); + if ( already_rcvd >= len ) { + free_iob ( iobuf ); + return 0; + } + iob_pull ( iobuf, already_rcvd ); + len -= already_rcvd; + + /* Acknowledge new data */ + tcp_rx_seq ( tcp, len ); + + /* Deliver data to application */ + profile_start ( &tcp_xfer_profiler ); + if ( ( rc = xfer_deliver_iob ( &tcp->xfer, iobuf ) ) != 0 ) { + DBGC ( tcp, "TCP %p could not deliver %08x..%08x: %s\n", + tcp, seq, ( seq + len ), strerror ( rc ) ); + return rc; + } + profile_stop ( &tcp_xfer_profiler ); + + return 0; +} + +/** + * Handle TCP received FIN + * + * @v tcp TCP connection + * @v seq SEQ value (in host-endian order) + * @ret rc Return status code + */ +static int tcp_rx_fin ( struct tcp_connection *tcp, uint32_t seq ) { + + /* Ignore duplicate or out-of-order FIN */ + if ( seq != tcp->rcv_ack ) + return 0; + + /* Acknowledge FIN */ + tcp_rx_seq ( tcp, 1 ); + + /* Mark FIN as received */ + tcp->tcp_state |= TCP_STATE_RCVD ( TCP_FIN ); + + /* Close connection */ + tcp_close ( tcp, 0 ); + + return 0; +} + +/** + * Handle TCP received RST + * + * @v tcp TCP connection + * @v seq SEQ value (in host-endian order) + * @ret rc Return status code + */ +static int tcp_rx_rst ( struct tcp_connection *tcp, uint32_t seq ) { + + /* Accept RST only if it falls within the window. If we have + * not yet received a SYN, then we have no window to test + * against, so fall back to checking that our SYN has been + * ACKed. + */ + if ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) { + if ( ! tcp_in_window ( seq, tcp->rcv_ack, tcp->rcv_win ) ) + return 0; + } else { + if ( ! ( tcp->tcp_state & TCP_STATE_ACKED ( TCP_SYN ) ) ) + return 0; + } + + /* Abort connection */ + tcp->tcp_state = TCP_CLOSED; + tcp_dump_state ( tcp ); + tcp_close ( tcp, -ECONNRESET ); + + DBGC ( tcp, "TCP %p connection reset by peer\n", tcp ); + return -ECONNRESET; +} + +/** + * Enqueue received TCP packet + * + * @v tcp TCP connection + * @v seq SEQ value (in host-endian order) + * @v flags TCP flags + * @v iobuf I/O buffer + */ +static void tcp_rx_enqueue ( struct tcp_connection *tcp, uint32_t seq, + uint8_t flags, struct io_buffer *iobuf ) { + struct tcp_rx_queued_header *tcpqhdr; + struct io_buffer *queued; + size_t len; + uint32_t seq_len; + + /* Calculate remaining flags and sequence length. Note that + * SYN, if present, has already been processed by this point. + */ + flags &= TCP_FIN; + len = iob_len ( iobuf ); + seq_len = ( len + ( flags ? 1 : 0 ) ); + + /* Discard immediately (to save memory) if: + * + * a) we have not yet received a SYN (and so have no defined + * receive window), or + * b) the packet lies entirely outside the receive window, or + * c) there is no further content to process. + */ + if ( ( ! ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) ) || + ( tcp_cmp ( seq, tcp->rcv_ack + tcp->rcv_win ) >= 0 ) || + ( tcp_cmp ( seq + seq_len, tcp->rcv_ack ) < 0 ) || + ( seq_len == 0 ) ) { + free_iob ( iobuf ); + return; + } + + /* Add internal header */ + tcpqhdr = iob_push ( iobuf, sizeof ( *tcpqhdr ) ); + tcpqhdr->seq = seq; + tcpqhdr->flags = flags; + + /* Add to RX queue */ + list_for_each_entry ( queued, &tcp->rx_queue, list ) { + tcpqhdr = queued->data; + if ( tcp_cmp ( seq, tcpqhdr->seq ) < 0 ) + break; + } + list_add_tail ( &iobuf->list, &queued->list ); +} + +/** + * Process receive queue + * + * @v tcp TCP connection + */ +static void tcp_process_rx_queue ( struct tcp_connection *tcp ) { + struct io_buffer *iobuf; + struct tcp_rx_queued_header *tcpqhdr; + uint32_t seq; + unsigned int flags; + size_t len; + + /* Process all applicable received buffers. Note that we + * cannot use list_for_each_entry() to iterate over the RX + * queue, since tcp_discard() may remove packets from the RX + * queue while we are processing. + */ + while ( ( iobuf = list_first_entry ( &tcp->rx_queue, struct io_buffer, + list ) ) ) { + + /* Stop processing when we hit the first gap */ + tcpqhdr = iobuf->data; + if ( tcp_cmp ( tcpqhdr->seq, tcp->rcv_ack ) > 0 ) + break; + + /* Strip internal header and remove from RX queue */ + list_del ( &iobuf->list ); + seq = tcpqhdr->seq; + flags = tcpqhdr->flags; + iob_pull ( iobuf, sizeof ( *tcpqhdr ) ); + len = iob_len ( iobuf ); + + /* Handle new data, if any */ + tcp_rx_data ( tcp, seq, iob_disown ( iobuf ) ); + seq += len; + + /* Handle FIN, if present */ + if ( flags & TCP_FIN ) { + tcp_rx_fin ( tcp, seq ); + seq++; + } + } +} + +/** + * Process received packet + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v st_src Partially-filled source address + * @v st_dest Partially-filled destination address + * @v pshdr_csum Pseudo-header checksum + * @ret rc Return status code + */ +static int tcp_rx ( struct io_buffer *iobuf, + struct net_device *netdev __unused, + struct sockaddr_tcpip *st_src, + struct sockaddr_tcpip *st_dest __unused, + uint16_t pshdr_csum ) { + struct tcp_header *tcphdr = iobuf->data; + struct tcp_connection *tcp; + struct tcp_options options; + size_t hlen; + uint16_t csum; + uint32_t seq; + uint32_t ack; + uint16_t raw_win; + uint32_t win; + unsigned int flags; + size_t len; + uint32_t seq_len; + size_t old_xfer_window; + int rc; + + /* Start profiling */ + profile_start ( &tcp_rx_profiler ); + + /* Sanity check packet */ + if ( iob_len ( iobuf ) < sizeof ( *tcphdr ) ) { + DBG ( "TCP packet too short at %zd bytes (min %zd bytes)\n", + iob_len ( iobuf ), sizeof ( *tcphdr ) ); + rc = -EINVAL; + goto discard; + } + hlen = ( ( tcphdr->hlen & TCP_MASK_HLEN ) / 16 ) * 4; + if ( hlen < sizeof ( *tcphdr ) ) { + DBG ( "TCP header too short at %zd bytes (min %zd bytes)\n", + hlen, sizeof ( *tcphdr ) ); + rc = -EINVAL; + goto discard; + } + if ( hlen > iob_len ( iobuf ) ) { + DBG ( "TCP header too long at %zd bytes (max %zd bytes)\n", + hlen, iob_len ( iobuf ) ); + rc = -EINVAL; + goto discard; + } + csum = tcpip_continue_chksum ( pshdr_csum, iobuf->data, + iob_len ( iobuf ) ); + if ( csum != 0 ) { + DBG ( "TCP checksum incorrect (is %04x including checksum " + "field, should be 0000)\n", csum ); + rc = -EINVAL; + goto discard; + } + + /* Parse parameters from header and strip header */ + tcp = tcp_demux ( ntohs ( tcphdr->dest ) ); + seq = ntohl ( tcphdr->seq ); + ack = ntohl ( tcphdr->ack ); + raw_win = ntohs ( tcphdr->win ); + flags = tcphdr->flags; + tcp_rx_opts ( tcp, ( ( ( void * ) tcphdr ) + sizeof ( *tcphdr ) ), + ( hlen - sizeof ( *tcphdr ) ), &options ); + if ( tcp && options.tsopt ) + tcp->ts_val = ntohl ( options.tsopt->tsval ); + iob_pull ( iobuf, hlen ); + len = iob_len ( iobuf ); + seq_len = ( len + ( ( flags & TCP_SYN ) ? 1 : 0 ) + + ( ( flags & TCP_FIN ) ? 1 : 0 ) ); + + /* Dump header */ + DBGC2 ( tcp, "TCP %p RX %d<-%d %08x %08x..%08x %4zd", + tcp, ntohs ( tcphdr->dest ), ntohs ( tcphdr->src ), + ntohl ( tcphdr->ack ), ntohl ( tcphdr->seq ), + ( ntohl ( tcphdr->seq ) + seq_len ), len ); + tcp_dump_flags ( tcp, tcphdr->flags ); + DBGC2 ( tcp, "\n" ); + + /* If no connection was found, silently drop packet */ + if ( ! tcp ) { + rc = -ENOTCONN; + goto discard; + } + + /* Record old data-transfer window */ + old_xfer_window = tcp_xfer_window ( tcp ); + + /* Handle ACK, if present */ + if ( flags & TCP_ACK ) { + win = ( raw_win << tcp->snd_win_scale ); + if ( ( rc = tcp_rx_ack ( tcp, ack, win ) ) != 0 ) { + tcp_xmit_reset ( tcp, st_src, tcphdr ); + goto discard; + } + } + + /* Force an ACK if this packet is out of order */ + if ( ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) && + ( seq != tcp->rcv_ack ) ) { + tcp->flags |= TCP_ACK_PENDING; + } + + /* Handle SYN, if present */ + if ( flags & TCP_SYN ) { + tcp_rx_syn ( tcp, seq, &options ); + seq++; + } + + /* Handle RST, if present */ + if ( flags & TCP_RST ) { + if ( ( rc = tcp_rx_rst ( tcp, seq ) ) != 0 ) + goto discard; + } + + /* Enqueue received data */ + tcp_rx_enqueue ( tcp, seq, flags, iob_disown ( iobuf ) ); + + /* Process receive queue */ + tcp_process_rx_queue ( tcp ); + + /* Dump out any state change as a result of the received packet */ + tcp_dump_state ( tcp ); + + /* Schedule transmission of ACK (and any pending data). If we + * have received any out-of-order packets (i.e. if the receive + * queue remains non-empty after processing) then send the ACK + * immediately in order to trigger Fast Retransmission. + */ + if ( list_empty ( &tcp->rx_queue ) ) { + process_add ( &tcp->process ); + } else { + tcp_xmit ( tcp ); + } + + /* If this packet was the last we expect to receive, set up + * timer to expire and cause the connection to be freed. + */ + if ( TCP_CLOSED_GRACEFULLY ( tcp->tcp_state ) ) { + stop_timer ( &tcp->wait ); + start_timer_fixed ( &tcp->wait, ( 2 * TCP_MSL ) ); + } + + /* Notify application if window has changed */ + if ( tcp_xfer_window ( tcp ) != old_xfer_window ) + xfer_window_changed ( &tcp->xfer ); + + profile_stop ( &tcp_rx_profiler ); + return 0; + + discard: + /* Free received packet */ + free_iob ( iobuf ); + return rc; +} + +/** TCP protocol */ +struct tcpip_protocol tcp_protocol __tcpip_protocol = { + .name = "TCP", + .rx = tcp_rx, + .tcpip_proto = IP_TCP, +}; + +/** + * Discard some cached TCP data + * + * @ret discarded Number of cached items discarded + */ +static unsigned int tcp_discard ( void ) { + struct tcp_connection *tcp; + struct io_buffer *iobuf; + struct tcp_rx_queued_header *tcpqhdr; + uint32_t max_win; + unsigned int discarded = 0; + + /* Try to drop one queued RX packet from each connection */ + list_for_each_entry ( tcp, &tcp_conns, list ) { + list_for_each_entry_reverse ( iobuf, &tcp->rx_queue, list ) { + + /* Limit window to prevent future discards */ + tcpqhdr = iobuf->data; + max_win = ( tcpqhdr->seq - tcp->rcv_ack ); + if ( max_win < tcp->max_rcv_win ) { + DBGC ( tcp, "TCP %p reducing maximum window " + "from %d to %d\n", + tcp, tcp->max_rcv_win, max_win ); + tcp->max_rcv_win = max_win; + } + + /* Remove packet from queue */ + list_del ( &iobuf->list ); + free_iob ( iobuf ); + + /* Report discard */ + discarded++; + break; + } + } + + return discarded; +} + +/** TCP cache discarder */ +struct cache_discarder tcp_discarder __cache_discarder ( CACHE_NORMAL ) = { + .discard = tcp_discard, +}; + +/** + * Shut down all TCP connections + * + */ +static void tcp_shutdown ( int booting __unused ) { + struct tcp_connection *tcp; + + while ( ( tcp = list_first_entry ( &tcp_conns, struct tcp_connection, + list ) ) != NULL ) { + tcp->tcp_state = TCP_CLOSED; + tcp_dump_state ( tcp ); + tcp_close ( tcp, -ECANCELED ); + } +} + +/** TCP shutdown function */ +struct startup_fn tcp_startup_fn __startup_fn ( STARTUP_EARLY ) = { + .shutdown = tcp_shutdown, +}; + +/*************************************************************************** + * + * Data transfer interface + * + *************************************************************************** + */ + +/** + * Close interface + * + * @v tcp TCP connection + * @v rc Reason for close + */ +static void tcp_xfer_close ( struct tcp_connection *tcp, int rc ) { + + /* Close data transfer interface */ + tcp_close ( tcp, rc ); + + /* Transmit FIN, if possible */ + tcp_xmit ( tcp ); +} + +/** + * Deliver datagram as I/O buffer + * + * @v tcp TCP connection + * @v iobuf Datagram I/O buffer + * @v meta Data transfer metadata + * @ret rc Return status code + */ +static int tcp_xfer_deliver ( struct tcp_connection *tcp, + struct io_buffer *iobuf, + struct xfer_metadata *meta __unused ) { + + /* Enqueue packet */ + list_add_tail ( &iobuf->list, &tcp->tx_queue ); + + /* Each enqueued packet is a pending operation */ + pending_get ( &tcp->pending_data ); + + /* Transmit data, if possible */ + tcp_xmit ( tcp ); + + return 0; +} + +/** TCP data transfer interface operations */ +static struct interface_operation tcp_xfer_operations[] = { + INTF_OP ( xfer_deliver, struct tcp_connection *, tcp_xfer_deliver ), + INTF_OP ( xfer_window, struct tcp_connection *, tcp_xfer_window ), + INTF_OP ( intf_close, struct tcp_connection *, tcp_xfer_close ), +}; + +/** TCP data transfer interface descriptor */ +static struct interface_descriptor tcp_xfer_desc = + INTF_DESC ( struct tcp_connection, xfer, tcp_xfer_operations ); + +/*************************************************************************** + * + * Openers + * + *************************************************************************** + */ + +/** TCP IPv4 socket opener */ +struct socket_opener tcp_ipv4_socket_opener __socket_opener = { + .semantics = TCP_SOCK_STREAM, + .family = AF_INET, + .open = tcp_open, +}; + +/** TCP IPv6 socket opener */ +struct socket_opener tcp_ipv6_socket_opener __socket_opener = { + .semantics = TCP_SOCK_STREAM, + .family = AF_INET6, + .open = tcp_open, +}; + +/** Linkage hack */ +int tcp_sock_stream = TCP_SOCK_STREAM; + +/** + * Open TCP URI + * + * @v xfer Data transfer interface + * @v uri URI + * @ret rc Return status code + */ +static int tcp_open_uri ( struct interface *xfer, struct uri *uri ) { + struct sockaddr_tcpip peer; + + /* Sanity check */ + if ( ! uri->host ) + return -EINVAL; + + memset ( &peer, 0, sizeof ( peer ) ); + peer.st_port = htons ( uri_port ( uri, 0 ) ); + return xfer_open_named_socket ( xfer, SOCK_STREAM, + ( struct sockaddr * ) &peer, + uri->host, NULL ); +} + +/** TCP URI opener */ +struct uri_opener tcp_uri_opener __uri_opener = { + .scheme = "tcp", + .open = tcp_open_uri, +}; + diff --git a/qemu/roms/ipxe/src/net/tcp/ftp.c b/qemu/roms/ipxe/src/net/tcp/ftp.c new file mode 100644 index 000000000..be7a7c3b5 --- /dev/null +++ b/qemu/roms/ipxe/src/net/tcp/ftp.c @@ -0,0 +1,546 @@ +/* + * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <errno.h> +#include <ctype.h> +#include <byteswap.h> +#include <ipxe/socket.h> +#include <ipxe/tcpip.h> +#include <ipxe/in.h> +#include <ipxe/iobuf.h> +#include <ipxe/xfer.h> +#include <ipxe/open.h> +#include <ipxe/uri.h> +#include <ipxe/features.h> +#include <ipxe/ftp.h> + +/** @file + * + * File transfer protocol + * + */ + +FEATURE ( FEATURE_PROTOCOL, "FTP", DHCP_EB_FEATURE_FTP, 1 ); + +/** + * FTP states + * + * These @b must be sequential, i.e. a successful FTP session must + * pass through each of these states in order. + */ +enum ftp_state { + FTP_CONNECT = 0, + FTP_USER, + FTP_PASS, + FTP_TYPE, + FTP_SIZE, + FTP_PASV, + FTP_RETR, + FTP_WAIT, + FTP_QUIT, + FTP_DONE, +}; + +/** + * An FTP request + * + */ +struct ftp_request { + /** Reference counter */ + struct refcnt refcnt; + /** Data transfer interface */ + struct interface xfer; + + /** URI being fetched */ + struct uri *uri; + /** FTP control channel interface */ + struct interface control; + /** FTP data channel interface */ + struct interface data; + + /** Current state */ + enum ftp_state state; + /** Buffer to be filled with data received via the control channel */ + char *recvbuf; + /** Remaining size of recvbuf */ + size_t recvsize; + /** FTP status code, as text */ + char status_text[5]; + /** Passive-mode parameters, as text */ + char passive_text[24]; /* "aaa,bbb,ccc,ddd,eee,fff" */ + /** File size, as text */ + char filesize[20]; +}; + +/** + * Free FTP request + * + * @v refcnt Reference counter + */ +static void ftp_free ( struct refcnt *refcnt ) { + struct ftp_request *ftp = + container_of ( refcnt, struct ftp_request, refcnt ); + + DBGC ( ftp, "FTP %p freed\n", ftp ); + + uri_put ( ftp->uri ); + free ( ftp ); +} + +/** + * Mark FTP operation as complete + * + * @v ftp FTP request + * @v rc Return status code + */ +static void ftp_done ( struct ftp_request *ftp, int rc ) { + + DBGC ( ftp, "FTP %p completed (%s)\n", ftp, strerror ( rc ) ); + + /* Close all data transfer interfaces */ + intf_shutdown ( &ftp->data, rc ); + intf_shutdown ( &ftp->control, rc ); + intf_shutdown ( &ftp->xfer, rc ); +} + +/***************************************************************************** + * + * FTP control channel + * + */ + +/** An FTP control channel string */ +struct ftp_control_string { + /** Literal portion */ + const char *literal; + /** Variable portion + * + * @v ftp FTP request + * @ret string Variable portion of string + */ + const char * ( *variable ) ( struct ftp_request *ftp ); +}; + +/** + * Retrieve FTP pathname + * + * @v ftp FTP request + * @ret path FTP pathname + */ +static const char * ftp_uri_path ( struct ftp_request *ftp ) { + return ftp->uri->path; +} + +/** + * Retrieve FTP user + * + * @v ftp FTP request + * @ret user FTP user + */ +static const char * ftp_user ( struct ftp_request *ftp ) { + static char *ftp_default_user = "anonymous"; + return ftp->uri->user ? ftp->uri->user : ftp_default_user; +} + +/** + * Retrieve FTP password + * + * @v ftp FTP request + * @ret password FTP password + */ +static const char * ftp_password ( struct ftp_request *ftp ) { + static char *ftp_default_password = "ipxe@ipxe.org"; + return ftp->uri->password ? ftp->uri->password : ftp_default_password; +} + +/** FTP control channel strings */ +static struct ftp_control_string ftp_strings[] = { + [FTP_CONNECT] = { NULL, NULL }, + [FTP_USER] = { "USER ", ftp_user }, + [FTP_PASS] = { "PASS ", ftp_password }, + [FTP_TYPE] = { "TYPE I", NULL }, + [FTP_SIZE] = { "SIZE ", ftp_uri_path }, + [FTP_PASV] = { "PASV", NULL }, + [FTP_RETR] = { "RETR ", ftp_uri_path }, + [FTP_WAIT] = { NULL, NULL }, + [FTP_QUIT] = { "QUIT", NULL }, + [FTP_DONE] = { NULL, NULL }, +}; + +/** + * Parse FTP byte sequence value + * + * @v text Text string + * @v value Value buffer + * @v len Length of value buffer + * + * This parses an FTP byte sequence value (e.g. the "aaa,bbb,ccc,ddd" + * form for IP addresses in PORT commands) into a byte sequence. @c + * *text will be updated to point beyond the end of the parsed byte + * sequence. + * + * This function is safe in the presence of malformed data, though the + * output is undefined. + */ +static void ftp_parse_value ( char **text, uint8_t *value, size_t len ) { + do { + *(value++) = strtoul ( *text, text, 10 ); + if ( **text ) + (*text)++; + } while ( --len ); +} + +/** + * Move to next state and send the appropriate FTP control string + * + * @v ftp FTP request + * + */ +static void ftp_next_state ( struct ftp_request *ftp ) { + struct ftp_control_string *ftp_string; + const char *literal; + const char *variable; + + /* Move to next state */ + if ( ftp->state < FTP_DONE ) + ftp->state++; + + /* Send control string if needed */ + ftp_string = &ftp_strings[ftp->state]; + literal = ftp_string->literal; + variable = ( ftp_string->variable ? + ftp_string->variable ( ftp ) : "" ); + if ( literal ) { + DBGC ( ftp, "FTP %p sending %s%s\n", ftp, literal, variable ); + xfer_printf ( &ftp->control, "%s%s\r\n", literal, variable ); + } +} + +/** + * Handle an FTP control channel response + * + * @v ftp FTP request + * + * This is called once we have received a complete response line. + */ +static void ftp_reply ( struct ftp_request *ftp ) { + char status_major = ftp->status_text[0]; + char separator = ftp->status_text[3]; + + DBGC ( ftp, "FTP %p received status %s\n", ftp, ftp->status_text ); + + /* Ignore malformed lines */ + if ( separator != ' ' ) + return; + + /* Ignore "intermediate" responses (1xx codes) */ + if ( status_major == '1' ) + return; + + /* If the SIZE command is not supported by the server, we go to + * the next step. + */ + if ( ( status_major == '5' ) && ( ftp->state == FTP_SIZE ) ) { + ftp_next_state ( ftp ); + return; + } + + /* Anything other than success (2xx) or, in the case of a + * repsonse to a "USER" command, a password prompt (3xx), is a + * fatal error. + */ + if ( ! ( ( status_major == '2' ) || + ( ( status_major == '3' ) && ( ftp->state == FTP_USER ) ) ) ){ + /* Flag protocol error and close connections */ + ftp_done ( ftp, -EPROTO ); + return; + } + + /* Parse file size */ + if ( ftp->state == FTP_SIZE ) { + size_t filesize; + char *endptr; + + /* Parse size */ + filesize = strtoul ( ftp->filesize, &endptr, 10 ); + if ( *endptr != '\0' ) { + DBGC ( ftp, "FTP %p invalid SIZE \"%s\"\n", + ftp, ftp->filesize ); + ftp_done ( ftp, -EPROTO ); + return; + } + + /* Use seek() to notify recipient of filesize */ + DBGC ( ftp, "FTP %p file size is %zd bytes\n", ftp, filesize ); + xfer_seek ( &ftp->xfer, filesize ); + xfer_seek ( &ftp->xfer, 0 ); + } + + /* Open passive connection when we get "PASV" response */ + if ( ftp->state == FTP_PASV ) { + char *ptr = ftp->passive_text; + union { + struct sockaddr_in sin; + struct sockaddr sa; + } sa; + int rc; + + sa.sin.sin_family = AF_INET; + ftp_parse_value ( &ptr, ( uint8_t * ) &sa.sin.sin_addr, + sizeof ( sa.sin.sin_addr ) ); + ftp_parse_value ( &ptr, ( uint8_t * ) &sa.sin.sin_port, + sizeof ( sa.sin.sin_port ) ); + if ( ( rc = xfer_open_socket ( &ftp->data, SOCK_STREAM, + &sa.sa, NULL ) ) != 0 ) { + DBGC ( ftp, "FTP %p could not open data connection\n", + ftp ); + ftp_done ( ftp, rc ); + return; + } + } + + /* Move to next state and send control string */ + ftp_next_state ( ftp ); + +} + +/** + * Handle new data arriving on FTP control channel + * + * @v ftp FTP request + * @v iob I/O buffer + * @v meta Data transfer metadata + * @ret rc Return status code + * + * Data is collected until a complete line is received, at which point + * its information is passed to ftp_reply(). + */ +static int ftp_control_deliver ( struct ftp_request *ftp, + struct io_buffer *iobuf, + struct xfer_metadata *meta __unused ) { + char *data = iobuf->data; + size_t len = iob_len ( iobuf ); + char *recvbuf = ftp->recvbuf; + size_t recvsize = ftp->recvsize; + char c; + + while ( len-- ) { + c = *(data++); + if ( ( c == '\r' ) || ( c == '\n' ) ) { + /* End of line: call ftp_reply() to handle + * completed reply. Avoid calling ftp_reply() + * twice if we receive both \r and \n. + */ + if ( recvbuf != ftp->status_text ) + ftp_reply ( ftp ); + /* Start filling up the status code buffer */ + recvbuf = ftp->status_text; + recvsize = sizeof ( ftp->status_text ) - 1; + } else if ( ( ftp->state == FTP_PASV ) && ( c == '(' ) ) { + /* Start filling up the passive parameter buffer */ + recvbuf = ftp->passive_text; + recvsize = sizeof ( ftp->passive_text ) - 1; + } else if ( ( ftp->state == FTP_PASV ) && ( c == ')' ) ) { + /* Stop filling the passive parameter buffer */ + recvsize = 0; + } else if ( ( ftp->state == FTP_SIZE ) && ( c == ' ' ) ) { + /* Start filling up the file size buffer */ + recvbuf = ftp->filesize; + recvsize = sizeof ( ftp->filesize ) - 1; + } else { + /* Fill up buffer if applicable */ + if ( recvsize > 0 ) { + *(recvbuf++) = c; + recvsize--; + } + } + } + + /* Store for next invocation */ + ftp->recvbuf = recvbuf; + ftp->recvsize = recvsize; + + /* Free I/O buffer */ + free_iob ( iobuf ); + + return 0; +} + +/** FTP control channel interface operations */ +static struct interface_operation ftp_control_operations[] = { + INTF_OP ( xfer_deliver, struct ftp_request *, ftp_control_deliver ), + INTF_OP ( intf_close, struct ftp_request *, ftp_done ), +}; + +/** FTP control channel interface descriptor */ +static struct interface_descriptor ftp_control_desc = + INTF_DESC ( struct ftp_request, control, ftp_control_operations ); + +/***************************************************************************** + * + * FTP data channel + * + */ + +/** + * Handle FTP data channel being closed + * + * @v ftp FTP request + * @v rc Reason for closure + * + * When the data channel is closed, the control channel should be left + * alone; the server will send a completion message via the control + * channel which we'll pick up. + * + * If the data channel is closed due to an error, we abort the request. + */ +static void ftp_data_closed ( struct ftp_request *ftp, int rc ) { + + DBGC ( ftp, "FTP %p data connection closed: %s\n", + ftp, strerror ( rc ) ); + + /* If there was an error, close control channel and record status */ + if ( rc ) { + ftp_done ( ftp, rc ); + } else { + ftp_next_state ( ftp ); + } +} + +/** FTP data channel interface operations */ +static struct interface_operation ftp_data_operations[] = { + INTF_OP ( intf_close, struct ftp_request *, ftp_data_closed ), +}; + +/** FTP data channel interface descriptor */ +static struct interface_descriptor ftp_data_desc = + INTF_DESC_PASSTHRU ( struct ftp_request, data, ftp_data_operations, + xfer ); + +/***************************************************************************** + * + * Data transfer interface + * + */ + +/** FTP data transfer interface operations */ +static struct interface_operation ftp_xfer_operations[] = { + INTF_OP ( intf_close, struct ftp_request *, ftp_done ), +}; + +/** FTP data transfer interface descriptor */ +static struct interface_descriptor ftp_xfer_desc = + INTF_DESC_PASSTHRU ( struct ftp_request, xfer, ftp_xfer_operations, + data ); + +/***************************************************************************** + * + * URI opener + * + */ + +/** + * Check validity of FTP control channel string + * + * @v string String + * @ret rc Return status code + */ +static int ftp_check_string ( const char *string ) { + char c; + + /* The FTP control channel is line-based. Check for invalid + * non-printable characters (e.g. newlines). + */ + while ( ( c = *(string++) ) ) { + if ( ! isprint ( c ) ) + return -EINVAL; + } + return 0; +} + +/** + * Initiate an FTP connection + * + * @v xfer Data transfer interface + * @v uri Uniform Resource Identifier + * @ret rc Return status code + */ +static int ftp_open ( struct interface *xfer, struct uri *uri ) { + struct ftp_request *ftp; + struct sockaddr_tcpip server; + int rc; + + /* Sanity checks */ + if ( ! uri->host ) + return -EINVAL; + if ( ! uri->path ) + return -EINVAL; + if ( ( rc = ftp_check_string ( uri->path ) ) != 0 ) + return rc; + if ( uri->user && ( ( rc = ftp_check_string ( uri->user ) ) != 0 ) ) + return rc; + if ( uri->password && + ( ( rc = ftp_check_string ( uri->password ) ) != 0 ) ) + return rc; + + /* Allocate and populate structure */ + ftp = zalloc ( sizeof ( *ftp ) ); + if ( ! ftp ) + return -ENOMEM; + ref_init ( &ftp->refcnt, ftp_free ); + intf_init ( &ftp->xfer, &ftp_xfer_desc, &ftp->refcnt ); + intf_init ( &ftp->control, &ftp_control_desc, &ftp->refcnt ); + intf_init ( &ftp->data, &ftp_data_desc, &ftp->refcnt ); + ftp->uri = uri_get ( uri ); + ftp->recvbuf = ftp->status_text; + ftp->recvsize = sizeof ( ftp->status_text ) - 1; + + DBGC ( ftp, "FTP %p fetching %s\n", ftp, ftp->uri->path ); + + /* Open control connection */ + memset ( &server, 0, sizeof ( server ) ); + server.st_port = htons ( uri_port ( uri, FTP_PORT ) ); + if ( ( rc = xfer_open_named_socket ( &ftp->control, SOCK_STREAM, + ( struct sockaddr * ) &server, + uri->host, NULL ) ) != 0 ) + goto err; + + /* Attach to parent interface, mortalise self, and return */ + intf_plug_plug ( &ftp->xfer, xfer ); + ref_put ( &ftp->refcnt ); + return 0; + + err: + DBGC ( ftp, "FTP %p could not create request: %s\n", + ftp, strerror ( rc ) ); + ftp_done ( ftp, rc ); + ref_put ( &ftp->refcnt ); + return rc; +} + +/** FTP URI opener */ +struct uri_opener ftp_uri_opener __uri_opener = { + .scheme = "ftp", + .open = ftp_open, +}; diff --git a/qemu/roms/ipxe/src/net/tcp/http.c b/qemu/roms/ipxe/src/net/tcp/http.c new file mode 100644 index 000000000..90bae9d7a --- /dev/null +++ b/qemu/roms/ipxe/src/net/tcp/http.c @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +/** + * @file + * + * Hyper Text Transfer Protocol (HTTP) + * + */ + +#include <stddef.h> +#include <ipxe/open.h> +#include <ipxe/http.h> +#include <ipxe/features.h> + +FEATURE ( FEATURE_PROTOCOL, "HTTP", DHCP_EB_FEATURE_HTTP, 1 ); + +/** + * Initiate an HTTP connection + * + * @v xfer Data transfer interface + * @v uri Uniform Resource Identifier + * @ret rc Return status code + */ +static int http_open ( struct interface *xfer, struct uri *uri ) { + return http_open_filter ( xfer, uri, HTTP_PORT, NULL ); +} + +/** HTTP URI opener */ +struct uri_opener http_uri_opener __uri_opener = { + .scheme = "http", + .open = http_open, +}; diff --git a/qemu/roms/ipxe/src/net/tcp/httpcore.c b/qemu/roms/ipxe/src/net/tcp/httpcore.c new file mode 100644 index 000000000..1d1953e61 --- /dev/null +++ b/qemu/roms/ipxe/src/net/tcp/httpcore.c @@ -0,0 +1,1574 @@ +/* + * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +/** + * @file + * + * Hyper Text Transfer Protocol (HTTP) core functionality + * + */ + +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <strings.h> +#include <byteswap.h> +#include <errno.h> +#include <ctype.h> +#include <assert.h> +#include <ipxe/uri.h> +#include <ipxe/refcnt.h> +#include <ipxe/iobuf.h> +#include <ipxe/xfer.h> +#include <ipxe/open.h> +#include <ipxe/socket.h> +#include <ipxe/tcpip.h> +#include <ipxe/process.h> +#include <ipxe/retry.h> +#include <ipxe/timer.h> +#include <ipxe/linebuf.h> +#include <ipxe/base64.h> +#include <ipxe/base16.h> +#include <ipxe/md5.h> +#include <ipxe/blockdev.h> +#include <ipxe/acpi.h> +#include <ipxe/version.h> +#include <ipxe/params.h> +#include <ipxe/profile.h> +#include <ipxe/http.h> + +/* Disambiguate the various error causes */ +#define EACCES_401 __einfo_error ( EINFO_EACCES_401 ) +#define EINFO_EACCES_401 \ + __einfo_uniqify ( EINFO_EACCES, 0x01, "HTTP 401 Unauthorized" ) +#define EIO_OTHER __einfo_error ( EINFO_EIO_OTHER ) +#define EINFO_EIO_OTHER \ + __einfo_uniqify ( EINFO_EIO, 0x01, "Unrecognised HTTP response code" ) +#define EIO_CONTENT_LENGTH __einfo_error ( EINFO_EIO_CONTENT_LENGTH ) +#define EINFO_EIO_CONTENT_LENGTH \ + __einfo_uniqify ( EINFO_EIO, 0x02, "Content length mismatch" ) +#define EINVAL_RESPONSE __einfo_error ( EINFO_EINVAL_RESPONSE ) +#define EINFO_EINVAL_RESPONSE \ + __einfo_uniqify ( EINFO_EINVAL, 0x01, "Invalid content length" ) +#define EINVAL_HEADER __einfo_error ( EINFO_EINVAL_HEADER ) +#define EINFO_EINVAL_HEADER \ + __einfo_uniqify ( EINFO_EINVAL, 0x02, "Invalid header" ) +#define EINVAL_CONTENT_LENGTH __einfo_error ( EINFO_EINVAL_CONTENT_LENGTH ) +#define EINFO_EINVAL_CONTENT_LENGTH \ + __einfo_uniqify ( EINFO_EINVAL, 0x03, "Invalid content length" ) +#define EINVAL_CHUNK_LENGTH __einfo_error ( EINFO_EINVAL_CHUNK_LENGTH ) +#define EINFO_EINVAL_CHUNK_LENGTH \ + __einfo_uniqify ( EINFO_EINVAL, 0x04, "Invalid chunk length" ) +#define ENOENT_404 __einfo_error ( EINFO_ENOENT_404 ) +#define EINFO_ENOENT_404 \ + __einfo_uniqify ( EINFO_ENOENT, 0x01, "HTTP 404 Not Found" ) +#define EPERM_403 __einfo_error ( EINFO_EPERM_403 ) +#define EINFO_EPERM_403 \ + __einfo_uniqify ( EINFO_EPERM, 0x01, "HTTP 403 Forbidden" ) +#define EPROTO_UNSOLICITED __einfo_error ( EINFO_EPROTO_UNSOLICITED ) +#define EINFO_EPROTO_UNSOLICITED \ + __einfo_uniqify ( EINFO_EPROTO, 0x01, "Unsolicited data" ) + +/** Block size used for HTTP block device request */ +#define HTTP_BLKSIZE 512 + +/** Retry delay used when we cannot understand the Retry-After header */ +#define HTTP_RETRY_SECONDS 5 + +/** Receive profiler */ +static struct profiler http_rx_profiler __profiler = { .name = "http.rx" }; + +/** Data transfer profiler */ +static struct profiler http_xfer_profiler __profiler = { .name = "http.xfer" }; + +/** HTTP flags */ +enum http_flags { + /** Request is waiting to be transmitted */ + HTTP_TX_PENDING = 0x0001, + /** Fetch header only */ + HTTP_HEAD_ONLY = 0x0002, + /** Client would like to keep connection alive */ + HTTP_CLIENT_KEEPALIVE = 0x0004, + /** Server will keep connection alive */ + HTTP_SERVER_KEEPALIVE = 0x0008, + /** Discard the current request and try again */ + HTTP_TRY_AGAIN = 0x0010, + /** Provide Basic authentication details */ + HTTP_BASIC_AUTH = 0x0020, + /** Provide Digest authentication details */ + HTTP_DIGEST_AUTH = 0x0040, + /** Socket must be reopened */ + HTTP_REOPEN_SOCKET = 0x0080, +}; + +/** HTTP receive state */ +enum http_rx_state { + HTTP_RX_RESPONSE = 0, + HTTP_RX_HEADER, + HTTP_RX_CHUNK_LEN, + /* In HTTP_RX_DATA, it is acceptable for the server to close + * the connection (unless we are in the middle of a chunked + * transfer). + */ + HTTP_RX_DATA, + /* In the following states, it is acceptable for the server to + * close the connection. + */ + HTTP_RX_TRAILER, + HTTP_RX_IDLE, + HTTP_RX_DEAD, +}; + +/** + * An HTTP request + * + */ +struct http_request { + /** Reference count */ + struct refcnt refcnt; + /** Data transfer interface */ + struct interface xfer; + /** Partial transfer interface */ + struct interface partial; + + /** URI being fetched */ + struct uri *uri; + /** Default port */ + unsigned int default_port; + /** Filter (if any) */ + int ( * filter ) ( struct interface *xfer, + const char *name, + struct interface **next ); + /** Transport layer interface */ + struct interface socket; + + /** Flags */ + unsigned int flags; + /** Starting offset of partial transfer (if applicable) */ + size_t partial_start; + /** Length of partial transfer (if applicable) */ + size_t partial_len; + + /** TX process */ + struct process process; + + /** RX state */ + enum http_rx_state rx_state; + /** Response code */ + unsigned int code; + /** Received length */ + size_t rx_len; + /** Length remaining (or 0 if unknown) */ + size_t remaining; + /** HTTP is using Transfer-Encoding: chunked */ + int chunked; + /** Current chunk length remaining (if applicable) */ + size_t chunk_remaining; + /** Line buffer for received header lines */ + struct line_buffer linebuf; + /** Receive data buffer (if applicable) */ + userptr_t rx_buffer; + + /** Authentication realm (if any) */ + char *auth_realm; + /** Authentication nonce (if any) */ + char *auth_nonce; + /** Authentication opaque string (if any) */ + char *auth_opaque; + + /** Request retry timer */ + struct retry_timer timer; + /** Retry delay (in timer ticks) */ + unsigned long retry_delay; +}; + +/** + * Free HTTP request + * + * @v refcnt Reference counter + */ +static void http_free ( struct refcnt *refcnt ) { + struct http_request *http = + container_of ( refcnt, struct http_request, refcnt ); + + uri_put ( http->uri ); + empty_line_buffer ( &http->linebuf ); + free ( http->auth_realm ); + free ( http->auth_nonce ); + free ( http->auth_opaque ); + free ( http ); +}; + +/** + * Close HTTP request + * + * @v http HTTP request + * @v rc Return status code + */ +static void http_close ( struct http_request *http, int rc ) { + + /* Prevent further processing of any current packet */ + http->rx_state = HTTP_RX_DEAD; + + /* Prevent reconnection */ + http->flags &= ~HTTP_CLIENT_KEEPALIVE; + + /* Remove process */ + process_del ( &http->process ); + + /* Close all data transfer interfaces */ + intf_shutdown ( &http->socket, rc ); + intf_shutdown ( &http->partial, rc ); + intf_shutdown ( &http->xfer, rc ); +} + +/** + * Open HTTP socket + * + * @v http HTTP request + * @ret rc Return status code + */ +static int http_socket_open ( struct http_request *http ) { + struct uri *uri = http->uri; + struct sockaddr_tcpip server; + struct interface *socket; + int rc; + + /* Open socket */ + memset ( &server, 0, sizeof ( server ) ); + server.st_port = htons ( uri_port ( uri, http->default_port ) ); + socket = &http->socket; + if ( http->filter ) { + if ( ( rc = http->filter ( socket, uri->host, &socket ) ) != 0 ) + return rc; + } + if ( ( rc = xfer_open_named_socket ( socket, SOCK_STREAM, + ( struct sockaddr * ) &server, + uri->host, NULL ) ) != 0 ) + return rc; + + return 0; +} + +/** + * Retry HTTP request + * + * @v timer Retry timer + * @v fail Failure indicator + */ +static void http_retry ( struct retry_timer *timer, int fail __unused ) { + struct http_request *http = + container_of ( timer, struct http_request, timer ); + int rc; + + /* Reopen socket if required */ + if ( http->flags & HTTP_REOPEN_SOCKET ) { + http->flags &= ~HTTP_REOPEN_SOCKET; + DBGC ( http, "HTTP %p reopening connection\n", http ); + if ( ( rc = http_socket_open ( http ) ) != 0 ) { + http_close ( http, rc ); + return; + } + } + + /* Retry the request if applicable */ + if ( http->flags & HTTP_TRY_AGAIN ) { + http->flags &= ~HTTP_TRY_AGAIN; + DBGC ( http, "HTTP %p retrying request\n", http ); + http->flags |= HTTP_TX_PENDING; + http->rx_state = HTTP_RX_RESPONSE; + process_add ( &http->process ); + } +} + +/** + * Mark HTTP request as completed successfully + * + * @v http HTTP request + */ +static void http_done ( struct http_request *http ) { + + /* If we are not at an appropriate stage of the protocol + * (including being in the middle of a chunked transfer), + * force an error. + */ + if ( ( http->rx_state < HTTP_RX_DATA ) || ( http->chunked != 0 ) ) { + DBGC ( http, "HTTP %p connection closed unexpectedly in state " + "%d\n", http, http->rx_state ); + http_close ( http, -ECONNRESET ); + return; + } + + /* If we had a Content-Length, and the received content length + * isn't correct, force an error + */ + if ( http->remaining != 0 ) { + DBGC ( http, "HTTP %p incorrect length %zd, should be %zd\n", + http, http->rx_len, ( http->rx_len + http->remaining ) ); + http_close ( http, -EIO_CONTENT_LENGTH ); + return; + } + + /* Enter idle state */ + http->rx_state = HTTP_RX_IDLE; + http->rx_len = 0; + assert ( http->remaining == 0 ); + assert ( http->chunked == 0 ); + assert ( http->chunk_remaining == 0 ); + + /* Close partial transfer interface */ + if ( ! ( http->flags & HTTP_TRY_AGAIN ) ) + intf_restart ( &http->partial, 0 ); + + /* Close everything unless we want to keep the connection alive */ + if ( ! ( http->flags & ( HTTP_CLIENT_KEEPALIVE | HTTP_TRY_AGAIN ) ) ) { + http_close ( http, 0 ); + return; + } + + /* If the server is not intending to keep the connection + * alive, then close the socket and mark it as requiring + * reopening. + */ + if ( ! ( http->flags & HTTP_SERVER_KEEPALIVE ) ) { + intf_restart ( &http->socket, 0 ); + http->flags &= ~HTTP_SERVER_KEEPALIVE; + http->flags |= HTTP_REOPEN_SOCKET; + } + + /* Start request retry timer */ + start_timer_fixed ( &http->timer, http->retry_delay ); + http->retry_delay = 0; +} + +/** + * Convert HTTP response code to return status code + * + * @v response HTTP response code + * @ret rc Return status code + */ +static int http_response_to_rc ( unsigned int response ) { + switch ( response ) { + case 200: + case 206: + case 301: + case 302: + case 303: + return 0; + case 404: + return -ENOENT_404; + case 403: + return -EPERM_403; + case 401: + return -EACCES_401; + default: + return -EIO_OTHER; + } +} + +/** + * Handle HTTP response + * + * @v http HTTP request + * @v response HTTP response + * @ret rc Return status code + */ +static int http_rx_response ( struct http_request *http, char *response ) { + char *spc; + + DBGC ( http, "HTTP %p response \"%s\"\n", http, response ); + + /* Check response starts with "HTTP/" */ + if ( strncmp ( response, "HTTP/", 5 ) != 0 ) + return -EINVAL_RESPONSE; + + /* Locate and store response code */ + spc = strchr ( response, ' ' ); + if ( ! spc ) + return -EINVAL_RESPONSE; + http->code = strtoul ( spc, NULL, 10 ); + + /* Move to receive headers */ + http->rx_state = ( ( http->flags & HTTP_HEAD_ONLY ) ? + HTTP_RX_TRAILER : HTTP_RX_HEADER ); + return 0; +} + +/** + * Handle HTTP Location header + * + * @v http HTTP request + * @v value HTTP header value + * @ret rc Return status code + */ +static int http_rx_location ( struct http_request *http, char *value ) { + int rc; + + /* Redirect to new location */ + DBGC ( http, "HTTP %p redirecting to %s\n", http, value ); + if ( ( rc = xfer_redirect ( &http->xfer, LOCATION_URI_STRING, + value ) ) != 0 ) { + DBGC ( http, "HTTP %p could not redirect: %s\n", + http, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Handle HTTP Content-Length header + * + * @v http HTTP request + * @v value HTTP header value + * @ret rc Return status code + */ +static int http_rx_content_length ( struct http_request *http, char *value ) { + struct block_device_capacity capacity; + size_t content_len; + char *endp; + + /* Parse content length */ + content_len = strtoul ( value, &endp, 10 ); + if ( ! ( ( *endp == '\0' ) || isspace ( *endp ) ) ) { + DBGC ( http, "HTTP %p invalid Content-Length \"%s\"\n", + http, value ); + return -EINVAL_CONTENT_LENGTH; + } + + /* If we already have an expected content length, and this + * isn't it, then complain + */ + if ( http->remaining && ( http->remaining != content_len ) ) { + DBGC ( http, "HTTP %p incorrect Content-Length %zd (expected " + "%zd)\n", http, content_len, http->remaining ); + return -EIO_CONTENT_LENGTH; + } + if ( ! ( http->flags & HTTP_HEAD_ONLY ) ) + http->remaining = content_len; + + /* Do nothing more if we are retrying the request */ + if ( http->flags & HTTP_TRY_AGAIN ) + return 0; + + /* Use seek() to notify recipient of filesize */ + xfer_seek ( &http->xfer, http->remaining ); + xfer_seek ( &http->xfer, 0 ); + + /* Report block device capacity if applicable */ + if ( http->flags & HTTP_HEAD_ONLY ) { + capacity.blocks = ( content_len / HTTP_BLKSIZE ); + capacity.blksize = HTTP_BLKSIZE; + capacity.max_count = -1U; + block_capacity ( &http->partial, &capacity ); + } + return 0; +} + +/** + * Handle HTTP Transfer-Encoding header + * + * @v http HTTP request + * @v value HTTP header value + * @ret rc Return status code + */ +static int http_rx_transfer_encoding ( struct http_request *http, char *value ){ + + if ( strcasecmp ( value, "chunked" ) == 0 ) { + /* Mark connection as using chunked transfer encoding */ + http->chunked = 1; + } + + return 0; +} + +/** + * Handle HTTP Connection header + * + * @v http HTTP request + * @v value HTTP header value + * @ret rc Return status code + */ +static int http_rx_connection ( struct http_request *http, char *value ) { + + if ( strcasecmp ( value, "keep-alive" ) == 0 ) { + /* Mark connection as being kept alive by the server */ + http->flags |= HTTP_SERVER_KEEPALIVE; + } + + return 0; +} + +/** + * Handle WWW-Authenticate Basic header + * + * @v http HTTP request + * @v params Parameters + * @ret rc Return status code + */ +static int http_rx_basic_auth ( struct http_request *http, char *params ) { + + DBGC ( http, "HTTP %p Basic authentication required (%s)\n", + http, params ); + + /* If we received a 401 Unauthorized response, then retry + * using Basic authentication + */ + if ( ( http->code == 401 ) && + ( ! ( http->flags & HTTP_BASIC_AUTH ) ) && + ( http->uri->user != NULL ) ) { + http->flags |= ( HTTP_TRY_AGAIN | HTTP_BASIC_AUTH ); + } + + return 0; +} + +/** + * Parse Digest authentication parameter + * + * @v params Parameters + * @v name Parameter name (including trailing "=\"") + * @ret value Parameter value, or NULL + */ +static char * http_digest_param ( char *params, const char *name ) { + char *key; + char *value; + char *terminator; + + /* Locate parameter */ + key = strstr ( params, name ); + if ( ! key ) + return NULL; + + /* Extract value */ + value = ( key + strlen ( name ) ); + terminator = strchr ( value, '"' ); + if ( ! terminator ) + return NULL; + return strndup ( value, ( terminator - value ) ); +} + +/** + * Handle WWW-Authenticate Digest header + * + * @v http HTTP request + * @v params Parameters + * @ret rc Return status code + */ +static int http_rx_digest_auth ( struct http_request *http, char *params ) { + + DBGC ( http, "HTTP %p Digest authentication required (%s)\n", + http, params ); + + /* If we received a 401 Unauthorized response, then retry + * using Digest authentication + */ + if ( ( http->code == 401 ) && + ( ! ( http->flags & HTTP_DIGEST_AUTH ) ) && + ( http->uri->user != NULL ) ) { + + /* Extract realm */ + free ( http->auth_realm ); + http->auth_realm = http_digest_param ( params, "realm=\"" ); + if ( ! http->auth_realm ) { + DBGC ( http, "HTTP %p Digest prompt missing realm\n", + http ); + return -EINVAL_HEADER; + } + + /* Extract nonce */ + free ( http->auth_nonce ); + http->auth_nonce = http_digest_param ( params, "nonce=\"" ); + if ( ! http->auth_nonce ) { + DBGC ( http, "HTTP %p Digest prompt missing nonce\n", + http ); + return -EINVAL_HEADER; + } + + /* Extract opaque */ + free ( http->auth_opaque ); + http->auth_opaque = http_digest_param ( params, "opaque=\"" ); + if ( ! http->auth_opaque ) { + /* Not an error; "opaque" is optional */ + } + + http->flags |= ( HTTP_TRY_AGAIN | HTTP_DIGEST_AUTH ); + } + + return 0; +} + +/** An HTTP WWW-Authenticate header handler */ +struct http_auth_header_handler { + /** Scheme (e.g. "Basic") */ + const char *scheme; + /** Handle received parameters + * + * @v http HTTP request + * @v params Parameters + * @ret rc Return status code + */ + int ( * rx ) ( struct http_request *http, char *params ); +}; + +/** List of HTTP WWW-Authenticate header handlers */ +static struct http_auth_header_handler http_auth_header_handlers[] = { + { + .scheme = "Basic", + .rx = http_rx_basic_auth, + }, + { + .scheme = "Digest", + .rx = http_rx_digest_auth, + }, + { NULL, NULL }, +}; + +/** + * Handle HTTP WWW-Authenticate header + * + * @v http HTTP request + * @v value HTTP header value + * @ret rc Return status code + */ +static int http_rx_www_authenticate ( struct http_request *http, char *value ) { + struct http_auth_header_handler *handler; + char *separator; + char *scheme; + char *params; + int rc; + + /* Extract scheme */ + separator = strchr ( value, ' ' ); + if ( ! separator ) { + DBGC ( http, "HTTP %p malformed WWW-Authenticate header\n", + http ); + return -EINVAL_HEADER; + } + *separator = '\0'; + scheme = value; + params = ( separator + 1 ); + + /* Hand off to header handler, if one exists */ + for ( handler = http_auth_header_handlers; handler->scheme; handler++ ){ + if ( strcasecmp ( scheme, handler->scheme ) == 0 ) { + if ( ( rc = handler->rx ( http, params ) ) != 0 ) + return rc; + break; + } + } + return 0; +} + +/** + * Handle HTTP Retry-After header + * + * @v http HTTP request + * @v value HTTP header value + * @ret rc Return status code + */ +static int http_rx_retry_after ( struct http_request *http, char *value ) { + unsigned long seconds; + char *endp; + + DBGC ( http, "HTTP %p retry requested (%s)\n", http, value ); + + /* If we received a 503 Service Unavailable response, then + * retry after the specified number of seconds. If the value + * is not a simple number of seconds (e.g. a full HTTP date), + * then retry after a fixed delay, since we don't have code + * able to parse full HTTP dates. + */ + if ( http->code == 503 ) { + seconds = strtoul ( value, &endp, 10 ); + if ( *endp != '\0' ) { + seconds = HTTP_RETRY_SECONDS; + DBGC ( http, "HTTP %p cannot understand \"%s\"; " + "using %ld seconds\n", http, value, seconds ); + } + http->flags |= HTTP_TRY_AGAIN; + http->retry_delay = ( seconds * TICKS_PER_SEC ); + } + + return 0; +} + +/** An HTTP header handler */ +struct http_header_handler { + /** Name (e.g. "Content-Length") */ + const char *header; + /** Handle received header + * + * @v http HTTP request + * @v value HTTP header value + * @ret rc Return status code + * + * If an error is returned, the download will be aborted. + */ + int ( * rx ) ( struct http_request *http, char *value ); +}; + +/** List of HTTP header handlers */ +static struct http_header_handler http_header_handlers[] = { + { + .header = "Location", + .rx = http_rx_location, + }, + { + .header = "Content-Length", + .rx = http_rx_content_length, + }, + { + .header = "Transfer-Encoding", + .rx = http_rx_transfer_encoding, + }, + { + .header = "Connection", + .rx = http_rx_connection, + }, + { + .header = "WWW-Authenticate", + .rx = http_rx_www_authenticate, + }, + { + .header = "Retry-After", + .rx = http_rx_retry_after, + }, + { NULL, NULL } +}; + +/** + * Handle HTTP header + * + * @v http HTTP request + * @v header HTTP header + * @ret rc Return status code + */ +static int http_rx_header ( struct http_request *http, char *header ) { + struct http_header_handler *handler; + char *separator; + char *value; + int rc; + + /* An empty header line marks the end of this phase */ + if ( ! header[0] ) { + empty_line_buffer ( &http->linebuf ); + + /* Handle response code */ + if ( ! ( http->flags & HTTP_TRY_AGAIN ) ) { + if ( ( rc = http_response_to_rc ( http->code ) ) != 0 ) + return rc; + } + + /* Move to next state */ + if ( http->rx_state == HTTP_RX_HEADER ) { + DBGC ( http, "HTTP %p start of data\n", http ); + http->rx_state = ( http->chunked ? + HTTP_RX_CHUNK_LEN : HTTP_RX_DATA ); + if ( ( http->partial_len != 0 ) && + ( ! ( http->flags & HTTP_TRY_AGAIN ) ) ) { + http->remaining = http->partial_len; + } + return 0; + } else { + DBGC ( http, "HTTP %p end of trailer\n", http ); + http_done ( http ); + return 0; + } + } + + DBGC ( http, "HTTP %p header \"%s\"\n", http, header ); + + /* Split header at the ": " */ + separator = strstr ( header, ": " ); + if ( ! separator ) { + DBGC ( http, "HTTP %p malformed header\n", http ); + return -EINVAL_HEADER; + } + *separator = '\0'; + value = ( separator + 2 ); + + /* Hand off to header handler, if one exists */ + for ( handler = http_header_handlers ; handler->header ; handler++ ) { + if ( strcasecmp ( header, handler->header ) == 0 ) { + if ( ( rc = handler->rx ( http, value ) ) != 0 ) + return rc; + break; + } + } + return 0; +} + +/** + * Handle HTTP chunk length + * + * @v http HTTP request + * @v length HTTP chunk length + * @ret rc Return status code + */ +static int http_rx_chunk_len ( struct http_request *http, char *length ) { + char *endp; + + /* Skip blank lines between chunks */ + if ( length[0] == '\0' ) + return 0; + + /* Parse chunk length */ + http->chunk_remaining = strtoul ( length, &endp, 16 ); + if ( *endp != '\0' ) { + DBGC ( http, "HTTP %p invalid chunk length \"%s\"\n", + http, length ); + return -EINVAL_CHUNK_LENGTH; + } + + /* Terminate chunked encoding if applicable */ + if ( http->chunk_remaining == 0 ) { + DBGC ( http, "HTTP %p end of chunks\n", http ); + http->chunked = 0; + http->rx_state = HTTP_RX_TRAILER; + return 0; + } + + /* Use seek() to notify recipient of new filesize */ + DBGC ( http, "HTTP %p start of chunk of length %zd\n", + http, http->chunk_remaining ); + if ( ! ( http->flags & HTTP_TRY_AGAIN ) ) { + xfer_seek ( &http->xfer, + ( http->rx_len + http->chunk_remaining ) ); + xfer_seek ( &http->xfer, http->rx_len ); + } + + /* Start receiving data */ + http->rx_state = HTTP_RX_DATA; + + return 0; +} + +/** An HTTP line-based data handler */ +struct http_line_handler { + /** Handle line + * + * @v http HTTP request + * @v line Line to handle + * @ret rc Return status code + */ + int ( * rx ) ( struct http_request *http, char *line ); +}; + +/** List of HTTP line-based data handlers */ +static struct http_line_handler http_line_handlers[] = { + [HTTP_RX_RESPONSE] = { .rx = http_rx_response }, + [HTTP_RX_HEADER] = { .rx = http_rx_header }, + [HTTP_RX_CHUNK_LEN] = { .rx = http_rx_chunk_len }, + [HTTP_RX_TRAILER] = { .rx = http_rx_header }, +}; + +/** + * Handle new data arriving via HTTP connection + * + * @v http HTTP request + * @v iobuf I/O buffer + * @v meta Data transfer metadata + * @ret rc Return status code + */ +static int http_socket_deliver ( struct http_request *http, + struct io_buffer *iobuf, + struct xfer_metadata *meta __unused ) { + struct http_line_handler *lh; + char *line; + size_t data_len; + ssize_t line_len; + int rc = 0; + + profile_start ( &http_rx_profiler ); + while ( iobuf && iob_len ( iobuf ) ) { + + switch ( http->rx_state ) { + case HTTP_RX_IDLE: + /* Receiving any data in this state is an error */ + DBGC ( http, "HTTP %p received %zd bytes while %s\n", + http, iob_len ( iobuf ), + ( ( http->rx_state == HTTP_RX_IDLE ) ? + "idle" : "dead" ) ); + rc = -EPROTO_UNSOLICITED; + goto done; + case HTTP_RX_DEAD: + /* Do no further processing */ + goto done; + case HTTP_RX_DATA: + /* Pass received data to caller */ + data_len = iob_len ( iobuf ); + if ( http->chunk_remaining && + ( http->chunk_remaining < data_len ) ) { + data_len = http->chunk_remaining; + } + if ( http->remaining && + ( http->remaining < data_len ) ) { + data_len = http->remaining; + } + if ( http->flags & HTTP_TRY_AGAIN ) { + /* Discard all received data */ + iob_pull ( iobuf, data_len ); + } else if ( http->rx_buffer != UNULL ) { + /* Copy to partial transfer buffer */ + copy_to_user ( http->rx_buffer, http->rx_len, + iobuf->data, data_len ); + iob_pull ( iobuf, data_len ); + } else if ( data_len < iob_len ( iobuf ) ) { + /* Deliver partial buffer as raw data */ + profile_start ( &http_xfer_profiler ); + rc = xfer_deliver_raw ( &http->xfer, + iobuf->data, data_len ); + iob_pull ( iobuf, data_len ); + if ( rc != 0 ) + goto done; + profile_stop ( &http_xfer_profiler ); + } else { + /* Deliver whole I/O buffer */ + profile_start ( &http_xfer_profiler ); + if ( ( rc = xfer_deliver_iob ( &http->xfer, + iob_disown ( iobuf ) ) ) != 0 ) + goto done; + profile_stop ( &http_xfer_profiler ); + } + http->rx_len += data_len; + if ( http->chunk_remaining ) { + http->chunk_remaining -= data_len; + if ( http->chunk_remaining == 0 ) + http->rx_state = HTTP_RX_CHUNK_LEN; + } + if ( http->remaining ) { + http->remaining -= data_len; + if ( ( http->remaining == 0 ) && + ( http->rx_state == HTTP_RX_DATA ) ) { + http_done ( http ); + } + } + break; + case HTTP_RX_RESPONSE: + case HTTP_RX_HEADER: + case HTTP_RX_CHUNK_LEN: + case HTTP_RX_TRAILER: + /* In the other phases, buffer and process a + * line at a time + */ + line_len = line_buffer ( &http->linebuf, iobuf->data, + iob_len ( iobuf ) ); + if ( line_len < 0 ) { + rc = line_len; + DBGC ( http, "HTTP %p could not buffer line: " + "%s\n", http, strerror ( rc ) ); + goto done; + } + iob_pull ( iobuf, line_len ); + line = buffered_line ( &http->linebuf ); + if ( line ) { + lh = &http_line_handlers[http->rx_state]; + if ( ( rc = lh->rx ( http, line ) ) != 0 ) + goto done; + } + break; + default: + assert ( 0 ); + break; + } + } + + done: + if ( rc ) + http_close ( http, rc ); + free_iob ( iobuf ); + profile_stop ( &http_rx_profiler ); + return rc; +} + +/** + * Check HTTP socket flow control window + * + * @v http HTTP request + * @ret len Length of window + */ +static size_t http_socket_window ( struct http_request *http __unused ) { + + /* Window is always open. This is to prevent TCP from + * stalling if our parent window is not currently open. + */ + return ( ~( ( size_t ) 0 ) ); +} + +/** + * Close HTTP socket + * + * @v http HTTP request + * @v rc Reason for close + */ +static void http_socket_close ( struct http_request *http, int rc ) { + + /* If we have an error, terminate */ + if ( rc != 0 ) { + http_close ( http, rc ); + return; + } + + /* Mark HTTP request as complete */ + http_done ( http ); +} + +/** + * Generate HTTP Basic authorisation string + * + * @v http HTTP request + * @ret auth Authorisation string, or NULL on error + * + * The authorisation string is dynamically allocated, and must be + * freed by the caller. + */ +static char * http_basic_auth ( struct http_request *http ) { + const char *user = http->uri->user; + const char *password = ( http->uri->password ? + http->uri->password : "" ); + size_t user_pw_len = + ( strlen ( user ) + 1 /* ":" */ + strlen ( password ) ); + char user_pw[ user_pw_len + 1 /* NUL */ ]; + size_t user_pw_base64_len = base64_encoded_len ( user_pw_len ); + char user_pw_base64[ user_pw_base64_len + 1 /* NUL */ ]; + char *auth; + int len; + + /* Sanity check */ + assert ( user != NULL ); + + /* Make "user:password" string from decoded fields */ + snprintf ( user_pw, sizeof ( user_pw ), "%s:%s", user, password ); + + /* Base64-encode the "user:password" string */ + base64_encode ( ( void * ) user_pw, user_pw_len, user_pw_base64 ); + + /* Generate the authorisation string */ + len = asprintf ( &auth, "Authorization: Basic %s\r\n", + user_pw_base64 ); + if ( len < 0 ) + return NULL; + + return auth; +} + +/** + * Generate HTTP Digest authorisation string + * + * @v http HTTP request + * @v method HTTP method (e.g. "GET") + * @v uri HTTP request URI (e.g. "/index.html") + * @ret auth Authorisation string, or NULL on error + * + * The authorisation string is dynamically allocated, and must be + * freed by the caller. + */ +static char * http_digest_auth ( struct http_request *http, + const char *method, const char *uri ) { + const char *user = http->uri->user; + const char *password = ( http->uri->password ? + http->uri->password : "" ); + const char *realm = http->auth_realm; + const char *nonce = http->auth_nonce; + const char *opaque = http->auth_opaque; + static const char colon = ':'; + uint8_t ctx[MD5_CTX_SIZE]; + uint8_t digest[MD5_DIGEST_SIZE]; + char ha1[ base16_encoded_len ( sizeof ( digest ) ) + 1 /* NUL */ ]; + char ha2[ base16_encoded_len ( sizeof ( digest ) ) + 1 /* NUL */ ]; + char response[ base16_encoded_len ( sizeof ( digest ) ) + 1 /* NUL */ ]; + char *auth; + int len; + + /* Sanity checks */ + assert ( user != NULL ); + assert ( realm != NULL ); + assert ( nonce != NULL ); + + /* Generate HA1 */ + digest_init ( &md5_algorithm, ctx ); + digest_update ( &md5_algorithm, ctx, user, strlen ( user ) ); + digest_update ( &md5_algorithm, ctx, &colon, sizeof ( colon ) ); + digest_update ( &md5_algorithm, ctx, realm, strlen ( realm ) ); + digest_update ( &md5_algorithm, ctx, &colon, sizeof ( colon ) ); + digest_update ( &md5_algorithm, ctx, password, strlen ( password ) ); + digest_final ( &md5_algorithm, ctx, digest ); + base16_encode ( digest, sizeof ( digest ), ha1 ); + + /* Generate HA2 */ + digest_init ( &md5_algorithm, ctx ); + digest_update ( &md5_algorithm, ctx, method, strlen ( method ) ); + digest_update ( &md5_algorithm, ctx, &colon, sizeof ( colon ) ); + digest_update ( &md5_algorithm, ctx, uri, strlen ( uri ) ); + digest_final ( &md5_algorithm, ctx, digest ); + base16_encode ( digest, sizeof ( digest ), ha2 ); + + /* Generate response */ + digest_init ( &md5_algorithm, ctx ); + digest_update ( &md5_algorithm, ctx, ha1, strlen ( ha1 ) ); + digest_update ( &md5_algorithm, ctx, &colon, sizeof ( colon ) ); + digest_update ( &md5_algorithm, ctx, nonce, strlen ( nonce ) ); + digest_update ( &md5_algorithm, ctx, &colon, sizeof ( colon ) ); + digest_update ( &md5_algorithm, ctx, ha2, strlen ( ha2 ) ); + digest_final ( &md5_algorithm, ctx, digest ); + base16_encode ( digest, sizeof ( digest ), response ); + + /* Generate the authorisation string */ + len = asprintf ( &auth, "Authorization: Digest username=\"%s\", " + "realm=\"%s\", nonce=\"%s\", uri=\"%s\", " + "%s%s%sresponse=\"%s\"\r\n", user, realm, nonce, uri, + ( opaque ? "opaque=\"" : "" ), + ( opaque ? opaque : "" ), + ( opaque ? "\", " : "" ), response ); + if ( len < 0 ) + return NULL; + + return auth; +} + +/** + * Generate HTTP POST parameter list + * + * @v http HTTP request + * @v buf Buffer to contain HTTP POST parameters + * @v len Length of buffer + * @ret len Length of parameter list (excluding terminating NUL) + */ +static size_t http_post_params ( struct http_request *http, + char *buf, size_t len ) { + struct parameter *param; + ssize_t remaining = len; + size_t frag_len; + + /* Add each parameter in the form "key=value", joined with "&" */ + len = 0; + for_each_param ( param, http->uri->params ) { + + /* Add the "&", if applicable */ + if ( len ) { + if ( remaining > 0 ) + *buf = '&'; + buf++; + len++; + remaining--; + } + + /* URI-encode the key */ + frag_len = uri_encode ( param->key, 0, buf, remaining ); + buf += frag_len; + len += frag_len; + remaining -= frag_len; + + /* Add the "=" */ + if ( remaining > 0 ) + *buf = '='; + buf++; + len++; + remaining--; + + /* URI-encode the value */ + frag_len = uri_encode ( param->value, 0, buf, remaining ); + buf += frag_len; + len += frag_len; + remaining -= frag_len; + } + + /* Ensure string is NUL-terminated even if no parameters are present */ + if ( remaining > 0 ) + *buf = '\0'; + + return len; +} + +/** + * Generate HTTP POST body + * + * @v http HTTP request + * @ret post I/O buffer containing POST body, or NULL on error + */ +static struct io_buffer * http_post ( struct http_request *http ) { + struct io_buffer *post; + size_t len; + size_t check_len; + + /* Calculate length of parameter list */ + len = http_post_params ( http, NULL, 0 ); + + /* Allocate parameter list */ + post = alloc_iob ( len + 1 /* NUL */ ); + if ( ! post ) + return NULL; + + /* Fill parameter list */ + check_len = http_post_params ( http, iob_put ( post, len ), + ( len + 1 /* NUL */ ) ); + assert ( len == check_len ); + DBGC ( http, "HTTP %p POST %s\n", http, ( ( char * ) post->data ) ); + + return post; +} + +/** + * HTTP process + * + * @v http HTTP request + */ +static void http_step ( struct http_request *http ) { + struct io_buffer *post; + struct uri host_uri; + struct uri path_uri; + char *host_uri_string; + char *path_uri_string; + char *method; + char *range; + char *auth; + char *content; + int len; + int rc; + + /* Do nothing if we have already transmitted the request */ + if ( ! ( http->flags & HTTP_TX_PENDING ) ) + return; + + /* Do nothing until socket is ready */ + if ( ! xfer_window ( &http->socket ) ) + return; + + /* Force a HEAD request if we have nowhere to send any received data */ + if ( ( xfer_window ( &http->xfer ) == 0 ) && + ( http->rx_buffer == UNULL ) ) { + http->flags |= ( HTTP_HEAD_ONLY | HTTP_CLIENT_KEEPALIVE ); + } + + /* Determine method */ + method = ( ( http->flags & HTTP_HEAD_ONLY ) ? "HEAD" : + ( http->uri->params ? "POST" : "GET" ) ); + + /* Construct host URI */ + memset ( &host_uri, 0, sizeof ( host_uri ) ); + host_uri.host = http->uri->host; + host_uri.port = http->uri->port; + host_uri_string = format_uri_alloc ( &host_uri ); + if ( ! host_uri_string ) { + rc = -ENOMEM; + goto err_host_uri; + } + + /* Construct path URI */ + memset ( &path_uri, 0, sizeof ( path_uri ) ); + path_uri.path = ( http->uri->path ? http->uri->path : "/" ); + path_uri.query = http->uri->query; + path_uri_string = format_uri_alloc ( &path_uri ); + if ( ! path_uri_string ) { + rc = -ENOMEM; + goto err_path_uri; + } + + /* Calculate range request parameters if applicable */ + if ( http->partial_len ) { + len = asprintf ( &range, "Range: bytes=%zd-%zd\r\n", + http->partial_start, + ( http->partial_start + http->partial_len + - 1 ) ); + if ( len < 0 ) { + rc = len; + goto err_range; + } + } else { + range = NULL; + } + + /* Construct authorisation, if applicable */ + if ( http->flags & HTTP_BASIC_AUTH ) { + auth = http_basic_auth ( http ); + if ( ! auth ) { + rc = -ENOMEM; + goto err_auth; + } + } else if ( http->flags & HTTP_DIGEST_AUTH ) { + auth = http_digest_auth ( http, method, path_uri_string ); + if ( ! auth ) { + rc = -ENOMEM; + goto err_auth; + } + } else { + auth = NULL; + } + + /* Construct POST content, if applicable */ + if ( http->uri->params ) { + post = http_post ( http ); + if ( ! post ) { + rc = -ENOMEM; + goto err_post; + } + len = asprintf ( &content, "Content-Type: " + "application/x-www-form-urlencoded\r\n" + "Content-Length: %zd\r\n", iob_len ( post ) ); + if ( len < 0 ) { + rc = len; + goto err_content; + } + } else { + post = NULL; + content = NULL; + } + + /* Mark request as transmitted */ + http->flags &= ~HTTP_TX_PENDING; + + /* Send request */ + if ( ( rc = xfer_printf ( &http->socket, + "%s %s HTTP/1.1\r\n" + "User-Agent: iPXE/%s\r\n" + "Host: %s\r\n" + "%s%s%s%s" + "\r\n", + method, path_uri_string, product_version, + host_uri_string, + ( ( http->flags & HTTP_CLIENT_KEEPALIVE ) ? + "Connection: keep-alive\r\n" : "" ), + ( range ? range : "" ), + ( auth ? auth : "" ), + ( content ? content : "" ) ) ) != 0 ) { + goto err_xfer; + } + + /* Send POST content, if applicable */ + if ( post ) { + if ( ( rc = xfer_deliver_iob ( &http->socket, + iob_disown ( post ) ) ) != 0 ) + goto err_xfer_post; + } + + err_xfer_post: + err_xfer: + free ( content ); + err_content: + free ( post ); + err_post: + free ( auth ); + err_auth: + free ( range ); + err_range: + free ( path_uri_string ); + err_path_uri: + free ( host_uri_string ); + err_host_uri: + if ( rc != 0 ) + http_close ( http, rc ); +} + +/** + * Check HTTP data transfer flow control window + * + * @v http HTTP request + * @ret len Length of window + */ +static size_t http_xfer_window ( struct http_request *http ) { + + /* New block commands may be issued only when we are idle */ + return ( ( http->rx_state == HTTP_RX_IDLE ) ? 1 : 0 ); +} + +/** + * Initiate HTTP partial read + * + * @v http HTTP request + * @v partial Partial transfer interface + * @v offset Starting offset + * @v buffer Data buffer + * @v len Length + * @ret rc Return status code + */ +static int http_partial_read ( struct http_request *http, + struct interface *partial, + size_t offset, userptr_t buffer, size_t len ) { + + /* Sanity check */ + if ( http_xfer_window ( http ) == 0 ) + return -EBUSY; + + /* Initialise partial transfer parameters */ + http->rx_buffer = buffer; + http->partial_start = offset; + http->partial_len = len; + + /* Schedule request */ + http->rx_state = HTTP_RX_RESPONSE; + http->flags = ( HTTP_TX_PENDING | HTTP_CLIENT_KEEPALIVE ); + if ( ! len ) + http->flags |= HTTP_HEAD_ONLY; + process_add ( &http->process ); + + /* Attach to parent interface and return */ + intf_plug_plug ( &http->partial, partial ); + + return 0; +} + +/** + * Issue HTTP block device read + * + * @v http HTTP request + * @v block Block data interface + * @v lba Starting logical block address + * @v count Number of blocks to transfer + * @v buffer Data buffer + * @v len Length of data buffer + * @ret rc Return status code + */ +static int http_block_read ( struct http_request *http, + struct interface *block, + uint64_t lba, unsigned int count, + userptr_t buffer, size_t len __unused ) { + + return http_partial_read ( http, block, ( lba * HTTP_BLKSIZE ), + buffer, ( count * HTTP_BLKSIZE ) ); +} + +/** + * Read HTTP block device capacity + * + * @v http HTTP request + * @v block Block data interface + * @ret rc Return status code + */ +static int http_block_read_capacity ( struct http_request *http, + struct interface *block ) { + + return http_partial_read ( http, block, 0, 0, 0 ); +} + +/** + * Describe HTTP device in an ACPI table + * + * @v http HTTP request + * @v acpi ACPI table + * @v len Length of ACPI table + * @ret rc Return status code + */ +static int http_acpi_describe ( struct http_request *http, + struct acpi_description_header *acpi, + size_t len ) { + + DBGC ( http, "HTTP %p cannot yet describe device in an ACPI table\n", + http ); + ( void ) acpi; + ( void ) len; + return 0; +} + +/** HTTP socket interface operations */ +static struct interface_operation http_socket_operations[] = { + INTF_OP ( xfer_window, struct http_request *, http_socket_window ), + INTF_OP ( xfer_deliver, struct http_request *, http_socket_deliver ), + INTF_OP ( xfer_window_changed, struct http_request *, http_step ), + INTF_OP ( intf_close, struct http_request *, http_socket_close ), +}; + +/** HTTP socket interface descriptor */ +static struct interface_descriptor http_socket_desc = + INTF_DESC_PASSTHRU ( struct http_request, socket, + http_socket_operations, xfer ); + +/** HTTP partial transfer interface operations */ +static struct interface_operation http_partial_operations[] = { + INTF_OP ( intf_close, struct http_request *, http_close ), +}; + +/** HTTP partial transfer interface descriptor */ +static struct interface_descriptor http_partial_desc = + INTF_DESC ( struct http_request, partial, http_partial_operations ); + +/** HTTP data transfer interface operations */ +static struct interface_operation http_xfer_operations[] = { + INTF_OP ( xfer_window, struct http_request *, http_xfer_window ), + INTF_OP ( block_read, struct http_request *, http_block_read ), + INTF_OP ( block_read_capacity, struct http_request *, + http_block_read_capacity ), + INTF_OP ( intf_close, struct http_request *, http_close ), + INTF_OP ( acpi_describe, struct http_request *, http_acpi_describe ), +}; + +/** HTTP data transfer interface descriptor */ +static struct interface_descriptor http_xfer_desc = + INTF_DESC_PASSTHRU ( struct http_request, xfer, + http_xfer_operations, socket ); + +/** HTTP process descriptor */ +static struct process_descriptor http_process_desc = + PROC_DESC_ONCE ( struct http_request, process, http_step ); + +/** + * Initiate an HTTP connection, with optional filter + * + * @v xfer Data transfer interface + * @v uri Uniform Resource Identifier + * @v default_port Default port number + * @v filter Filter to apply to socket, or NULL + * @ret rc Return status code + */ +int http_open_filter ( struct interface *xfer, struct uri *uri, + unsigned int default_port, + int ( * filter ) ( struct interface *xfer, + const char *name, + struct interface **next ) ) { + struct http_request *http; + int rc; + + /* Sanity checks */ + if ( ! uri->host ) + return -EINVAL; + + /* Allocate and populate HTTP structure */ + http = zalloc ( sizeof ( *http ) ); + if ( ! http ) + return -ENOMEM; + ref_init ( &http->refcnt, http_free ); + intf_init ( &http->xfer, &http_xfer_desc, &http->refcnt ); + intf_init ( &http->partial, &http_partial_desc, &http->refcnt ); + http->uri = uri_get ( uri ); + http->default_port = default_port; + http->filter = filter; + intf_init ( &http->socket, &http_socket_desc, &http->refcnt ); + process_init ( &http->process, &http_process_desc, &http->refcnt ); + timer_init ( &http->timer, http_retry, &http->refcnt ); + http->flags = HTTP_TX_PENDING; + + /* Open socket */ + if ( ( rc = http_socket_open ( http ) ) != 0 ) + goto err; + + /* Attach to parent interface, mortalise self, and return */ + intf_plug_plug ( &http->xfer, xfer ); + ref_put ( &http->refcnt ); + return 0; + + err: + DBGC ( http, "HTTP %p could not create request: %s\n", + http, strerror ( rc ) ); + http_close ( http, rc ); + ref_put ( &http->refcnt ); + return rc; +} diff --git a/qemu/roms/ipxe/src/net/tcp/https.c b/qemu/roms/ipxe/src/net/tcp/https.c new file mode 100644 index 000000000..6112acdae --- /dev/null +++ b/qemu/roms/ipxe/src/net/tcp/https.c @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +/** + * @file + * + * Secure Hyper Text Transfer Protocol (HTTPS) + * + */ + +#include <stddef.h> +#include <ipxe/open.h> +#include <ipxe/tls.h> +#include <ipxe/http.h> +#include <ipxe/features.h> + +FEATURE ( FEATURE_PROTOCOL, "HTTPS", DHCP_EB_FEATURE_HTTPS, 1 ); + +/** + * Initiate an HTTPS connection + * + * @v xfer Data transfer interface + * @v uri Uniform Resource Identifier + * @ret rc Return status code + */ +static int https_open ( struct interface *xfer, struct uri *uri ) { + return http_open_filter ( xfer, uri, HTTPS_PORT, add_tls ); +} + +/** HTTPS URI opener */ +struct uri_opener https_uri_opener __uri_opener = { + .scheme = "https", + .open = https_open, +}; diff --git a/qemu/roms/ipxe/src/net/tcp/iscsi.c b/qemu/roms/ipxe/src/net/tcp/iscsi.c new file mode 100644 index 000000000..03c6d0f23 --- /dev/null +++ b/qemu/roms/ipxe/src/net/tcp/iscsi.c @@ -0,0 +1,2126 @@ +/* + * Copyright (C) 2006 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stddef.h> +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <assert.h> +#include <byteswap.h> +#include <ipxe/vsprintf.h> +#include <ipxe/socket.h> +#include <ipxe/iobuf.h> +#include <ipxe/uri.h> +#include <ipxe/xfer.h> +#include <ipxe/open.h> +#include <ipxe/scsi.h> +#include <ipxe/process.h> +#include <ipxe/uaccess.h> +#include <ipxe/tcpip.h> +#include <ipxe/settings.h> +#include <ipxe/features.h> +#include <ipxe/base16.h> +#include <ipxe/base64.h> +#include <ipxe/ibft.h> +#include <ipxe/iscsi.h> + +/** @file + * + * iSCSI protocol + * + */ + +FEATURE ( FEATURE_PROTOCOL, "iSCSI", DHCP_EB_FEATURE_ISCSI, 1 ); + +/* Disambiguate the various error causes */ +#define EACCES_INCORRECT_TARGET_USERNAME \ + __einfo_error ( EINFO_EACCES_INCORRECT_TARGET_USERNAME ) +#define EINFO_EACCES_INCORRECT_TARGET_USERNAME \ + __einfo_uniqify ( EINFO_EACCES, 0x01, "Incorrect target username" ) +#define EACCES_INCORRECT_TARGET_PASSWORD \ + __einfo_error ( EINFO_EACCES_INCORRECT_TARGET_PASSWORD ) +#define EINFO_EACCES_INCORRECT_TARGET_PASSWORD \ + __einfo_uniqify ( EINFO_EACCES, 0x02, "Incorrect target password" ) +#define EINVAL_ROOT_PATH_TOO_SHORT \ + __einfo_error ( EINFO_EINVAL_ROOT_PATH_TOO_SHORT ) +#define EINFO_EINVAL_ROOT_PATH_TOO_SHORT \ + __einfo_uniqify ( EINFO_EINVAL, 0x01, "Root path too short" ) +#define EINVAL_BAD_CREDENTIAL_MIX \ + __einfo_error ( EINFO_EINVAL_BAD_CREDENTIAL_MIX ) +#define EINFO_EINVAL_BAD_CREDENTIAL_MIX \ + __einfo_uniqify ( EINFO_EINVAL, 0x02, "Bad credential mix" ) +#define EINVAL_NO_ROOT_PATH \ + __einfo_error ( EINFO_EINVAL_NO_ROOT_PATH ) +#define EINFO_EINVAL_NO_ROOT_PATH \ + __einfo_uniqify ( EINFO_EINVAL, 0x03, "No root path" ) +#define EINVAL_NO_TARGET_IQN \ + __einfo_error ( EINFO_EINVAL_NO_TARGET_IQN ) +#define EINFO_EINVAL_NO_TARGET_IQN \ + __einfo_uniqify ( EINFO_EINVAL, 0x04, "No target IQN" ) +#define EINVAL_NO_INITIATOR_IQN \ + __einfo_error ( EINFO_EINVAL_NO_INITIATOR_IQN ) +#define EINFO_EINVAL_NO_INITIATOR_IQN \ + __einfo_uniqify ( EINFO_EINVAL, 0x05, "No initiator IQN" ) +#define EIO_TARGET_UNAVAILABLE \ + __einfo_error ( EINFO_EIO_TARGET_UNAVAILABLE ) +#define EINFO_EIO_TARGET_UNAVAILABLE \ + __einfo_uniqify ( EINFO_EIO, 0x01, "Target not currently operational" ) +#define EIO_TARGET_NO_RESOURCES \ + __einfo_error ( EINFO_EIO_TARGET_NO_RESOURCES ) +#define EINFO_EIO_TARGET_NO_RESOURCES \ + __einfo_uniqify ( EINFO_EIO, 0x02, "Target out of resources" ) +#define ENOTSUP_INITIATOR_STATUS \ + __einfo_error ( EINFO_ENOTSUP_INITIATOR_STATUS ) +#define EINFO_ENOTSUP_INITIATOR_STATUS \ + __einfo_uniqify ( EINFO_ENOTSUP, 0x01, "Unsupported initiator status" ) +#define ENOTSUP_OPCODE \ + __einfo_error ( EINFO_ENOTSUP_OPCODE ) +#define EINFO_ENOTSUP_OPCODE \ + __einfo_uniqify ( EINFO_ENOTSUP, 0x02, "Unsupported opcode" ) +#define ENOTSUP_DISCOVERY \ + __einfo_error ( EINFO_ENOTSUP_DISCOVERY ) +#define EINFO_ENOTSUP_DISCOVERY \ + __einfo_uniqify ( EINFO_ENOTSUP, 0x03, "Discovery not supported" ) +#define ENOTSUP_TARGET_STATUS \ + __einfo_error ( EINFO_ENOTSUP_TARGET_STATUS ) +#define EINFO_ENOTSUP_TARGET_STATUS \ + __einfo_uniqify ( EINFO_ENOTSUP, 0x04, "Unsupported target status" ) +#define ENOTSUP_NOP_IN \ + __einfo_error ( EINFO_ENOTSUP_NOP_IN ) +#define EINFO_ENOTSUP_NOP_IN \ + __einfo_uniqify ( EINFO_ENOTSUP, 0x05, "Unsupported NOP-In received" ) +#define EPERM_INITIATOR_AUTHENTICATION \ + __einfo_error ( EINFO_EPERM_INITIATOR_AUTHENTICATION ) +#define EINFO_EPERM_INITIATOR_AUTHENTICATION \ + __einfo_uniqify ( EINFO_EPERM, 0x01, "Initiator authentication failed" ) +#define EPERM_INITIATOR_AUTHORISATION \ + __einfo_error ( EINFO_EPERM_INITIATOR_AUTHORISATION ) +#define EINFO_EPERM_INITIATOR_AUTHORISATION \ + __einfo_uniqify ( EINFO_EPERM, 0x02, "Initiator not authorised" ) +#define EPROTO_INVALID_CHAP_ALGORITHM \ + __einfo_error ( EINFO_EPROTO_INVALID_CHAP_ALGORITHM ) +#define EINFO_EPROTO_INVALID_CHAP_ALGORITHM \ + __einfo_uniqify ( EINFO_EPROTO, 0x01, "Invalid CHAP algorithm" ) +#define EPROTO_INVALID_CHAP_IDENTIFIER \ + __einfo_error ( EINFO_EPROTO_INVALID_CHAP_IDENTIFIER ) +#define EINFO_EPROTO_INVALID_CHAP_IDENTIFIER \ + __einfo_uniqify ( EINFO_EPROTO, 0x02, "Invalid CHAP identifier" ) +#define EPROTO_INVALID_LARGE_BINARY \ + __einfo_error ( EINFO_EPROTO_INVALID_LARGE_BINARY ) +#define EINFO_EPROTO_INVALID_LARGE_BINARY \ + __einfo_uniqify ( EINFO_EPROTO, 0x03, "Invalid large binary" ) +#define EPROTO_INVALID_CHAP_RESPONSE \ + __einfo_error ( EINFO_EPROTO_INVALID_CHAP_RESPONSE ) +#define EINFO_EPROTO_INVALID_CHAP_RESPONSE \ + __einfo_uniqify ( EINFO_EPROTO, 0x04, "Invalid CHAP response" ) +#define EPROTO_INVALID_KEY_VALUE_PAIR \ + __einfo_error ( EINFO_EPROTO_INVALID_KEY_VALUE_PAIR ) +#define EINFO_EPROTO_INVALID_KEY_VALUE_PAIR \ + __einfo_uniqify ( EINFO_EPROTO, 0x05, "Invalid key/value pair" ) +#define EPROTO_VALUE_REJECTED \ + __einfo_error ( EINFO_EPROTO_VALUE_REJECTED ) +#define EINFO_EPROTO_VALUE_REJECTED \ + __einfo_uniqify ( EINFO_EPROTO, 0x06, "Parameter rejected" ) + +static void iscsi_start_tx ( struct iscsi_session *iscsi ); +static void iscsi_start_login ( struct iscsi_session *iscsi ); +static void iscsi_start_data_out ( struct iscsi_session *iscsi, + unsigned int datasn ); + +/** + * Finish receiving PDU data into buffer + * + * @v iscsi iSCSI session + */ +static void iscsi_rx_buffered_data_done ( struct iscsi_session *iscsi ) { + free ( iscsi->rx_buffer ); + iscsi->rx_buffer = NULL; +} + +/** + * Receive PDU data into buffer + * + * @v iscsi iSCSI session + * @v data Data to receive + * @v len Length of data + * @ret rc Return status code + * + * This can be used when the RX PDU type handler wishes to buffer up + * all received data and process the PDU as a single unit. The caller + * is repsonsible for calling iscsi_rx_buffered_data_done() after + * processing the data. + */ +static int iscsi_rx_buffered_data ( struct iscsi_session *iscsi, + const void *data, size_t len ) { + + /* Allocate buffer on first call */ + if ( ! iscsi->rx_buffer ) { + iscsi->rx_buffer = malloc ( iscsi->rx_len ); + if ( ! iscsi->rx_buffer ) + return -ENOMEM; + } + + /* Copy data to buffer */ + assert ( ( iscsi->rx_offset + len ) <= iscsi->rx_len ); + memcpy ( ( iscsi->rx_buffer + iscsi->rx_offset ), data, len ); + + return 0; +} + +/** + * Free iSCSI session + * + * @v refcnt Reference counter + */ +static void iscsi_free ( struct refcnt *refcnt ) { + struct iscsi_session *iscsi = + container_of ( refcnt, struct iscsi_session, refcnt ); + + free ( iscsi->initiator_iqn ); + free ( iscsi->target_address ); + free ( iscsi->target_iqn ); + free ( iscsi->initiator_username ); + free ( iscsi->initiator_password ); + free ( iscsi->target_username ); + free ( iscsi->target_password ); + chap_finish ( &iscsi->chap ); + iscsi_rx_buffered_data_done ( iscsi ); + free ( iscsi->command ); + free ( iscsi ); +} + +/** + * Shut down iSCSI interface + * + * @v iscsi iSCSI session + * @v rc Reason for close + */ +static void iscsi_close ( struct iscsi_session *iscsi, int rc ) { + + /* A TCP graceful close is still an error from our point of view */ + if ( rc == 0 ) + rc = -ECONNRESET; + + DBGC ( iscsi, "iSCSI %p closed: %s\n", iscsi, strerror ( rc ) ); + + /* Stop transmission process */ + process_del ( &iscsi->process ); + + /* Shut down interfaces */ + intf_shutdown ( &iscsi->socket, rc ); + intf_shutdown ( &iscsi->control, rc ); + intf_shutdown ( &iscsi->data, rc ); +} + +/** + * Assign new iSCSI initiator task tag + * + * @v iscsi iSCSI session + */ +static void iscsi_new_itt ( struct iscsi_session *iscsi ) { + static uint16_t itt_idx; + + iscsi->itt = ( ISCSI_TAG_MAGIC | (++itt_idx) ); +} + +/** + * Open iSCSI transport-layer connection + * + * @v iscsi iSCSI session + * @ret rc Return status code + */ +static int iscsi_open_connection ( struct iscsi_session *iscsi ) { + struct sockaddr_tcpip target; + int rc; + + assert ( iscsi->tx_state == ISCSI_TX_IDLE ); + assert ( iscsi->rx_state == ISCSI_RX_BHS ); + assert ( iscsi->rx_offset == 0 ); + + /* Open socket */ + memset ( &target, 0, sizeof ( target ) ); + target.st_port = htons ( iscsi->target_port ); + if ( ( rc = xfer_open_named_socket ( &iscsi->socket, SOCK_STREAM, + ( struct sockaddr * ) &target, + iscsi->target_address, + NULL ) ) != 0 ) { + DBGC ( iscsi, "iSCSI %p could not open socket: %s\n", + iscsi, strerror ( rc ) ); + return rc; + } + + /* Enter security negotiation phase */ + iscsi->status = ( ISCSI_STATUS_SECURITY_NEGOTIATION_PHASE | + ISCSI_STATUS_STRINGS_SECURITY ); + if ( iscsi->target_username ) + iscsi->status |= ISCSI_STATUS_AUTH_REVERSE_REQUIRED; + + /* Assign new ISID */ + iscsi->isid_iana_qual = ( random() & 0xffff ); + + /* Assign fresh initiator task tag */ + iscsi_new_itt ( iscsi ); + + /* Initiate login */ + iscsi_start_login ( iscsi ); + + return 0; +} + +/** + * Close iSCSI transport-layer connection + * + * @v iscsi iSCSI session + * @v rc Reason for close + * + * Closes the transport-layer connection and resets the session state + * ready to attempt a fresh login. + */ +static void iscsi_close_connection ( struct iscsi_session *iscsi, int rc ) { + + /* Close all data transfer interfaces */ + intf_restart ( &iscsi->socket, rc ); + + /* Clear connection status */ + iscsi->status = 0; + + /* Reset TX and RX state machines */ + iscsi->tx_state = ISCSI_TX_IDLE; + iscsi->rx_state = ISCSI_RX_BHS; + iscsi->rx_offset = 0; + + /* Free any temporary dynamically allocated memory */ + chap_finish ( &iscsi->chap ); + iscsi_rx_buffered_data_done ( iscsi ); +} + +/** + * Mark iSCSI SCSI operation as complete + * + * @v iscsi iSCSI session + * @v rc Return status code + * @v rsp SCSI response, if any + * + * Note that iscsi_scsi_done() will not close the connection, and must + * therefore be called only when the internal state machines are in an + * appropriate state, otherwise bad things may happen on the next call + * to iscsi_scsi_command(). The general rule is to call + * iscsi_scsi_done() only at the end of receiving a PDU; at this point + * the TX and RX engines should both be idle. + */ +static void iscsi_scsi_done ( struct iscsi_session *iscsi, int rc, + struct scsi_rsp *rsp ) { + uint32_t itt = iscsi->itt; + + assert ( iscsi->tx_state == ISCSI_TX_IDLE ); + + /* Clear command */ + free ( iscsi->command ); + iscsi->command = NULL; + + /* Send SCSI response, if any */ + if ( rsp ) + scsi_response ( &iscsi->data, rsp ); + + /* Close SCSI command, if this is still the same command. (It + * is possible that the command interface has already been + * closed as a result of the SCSI response we sent.) + */ + if ( iscsi->itt == itt ) + intf_restart ( &iscsi->data, rc ); +} + +/**************************************************************************** + * + * iSCSI SCSI command issuing + * + */ + +/** + * Build iSCSI SCSI command BHS + * + * @v iscsi iSCSI session + * + * We don't currently support bidirectional commands (i.e. with both + * Data-In and Data-Out segments); these would require providing code + * to generate an AHS, and there doesn't seem to be any need for it at + * the moment. + */ +static void iscsi_start_command ( struct iscsi_session *iscsi ) { + struct iscsi_bhs_scsi_command *command = &iscsi->tx_bhs.scsi_command; + + assert ( ! ( iscsi->command->data_in && iscsi->command->data_out ) ); + + /* Construct BHS and initiate transmission */ + iscsi_start_tx ( iscsi ); + command->opcode = ISCSI_OPCODE_SCSI_COMMAND; + command->flags = ( ISCSI_FLAG_FINAL | + ISCSI_COMMAND_ATTR_SIMPLE ); + if ( iscsi->command->data_in ) + command->flags |= ISCSI_COMMAND_FLAG_READ; + if ( iscsi->command->data_out ) + command->flags |= ISCSI_COMMAND_FLAG_WRITE; + /* lengths left as zero */ + memcpy ( &command->lun, &iscsi->command->lun, + sizeof ( command->lun ) ); + command->itt = htonl ( iscsi->itt ); + command->exp_len = htonl ( iscsi->command->data_in_len | + iscsi->command->data_out_len ); + command->cmdsn = htonl ( iscsi->cmdsn ); + command->expstatsn = htonl ( iscsi->statsn + 1 ); + memcpy ( &command->cdb, &iscsi->command->cdb, sizeof ( command->cdb )); + DBGC2 ( iscsi, "iSCSI %p start " SCSI_CDB_FORMAT " %s %#zx\n", + iscsi, SCSI_CDB_DATA ( command->cdb ), + ( iscsi->command->data_in ? "in" : "out" ), + ( iscsi->command->data_in ? + iscsi->command->data_in_len : + iscsi->command->data_out_len ) ); +} + +/** + * Receive data segment of an iSCSI SCSI response PDU + * + * @v iscsi iSCSI session + * @v data Received data + * @v len Length of received data + * @v remaining Data remaining after this data + * @ret rc Return status code + */ +static int iscsi_rx_scsi_response ( struct iscsi_session *iscsi, + const void *data, size_t len, + size_t remaining ) { + struct iscsi_bhs_scsi_response *response + = &iscsi->rx_bhs.scsi_response; + struct scsi_rsp rsp; + uint32_t residual_count; + size_t data_len; + int rc; + + /* Buffer up the PDU data */ + if ( ( rc = iscsi_rx_buffered_data ( iscsi, data, len ) ) != 0 ) { + DBGC ( iscsi, "iSCSI %p could not buffer SCSI response: %s\n", + iscsi, strerror ( rc ) ); + return rc; + } + if ( remaining ) + return 0; + + /* Parse SCSI response and discard buffer */ + memset ( &rsp, 0, sizeof ( rsp ) ); + rsp.status = response->status; + residual_count = ntohl ( response->residual_count ); + if ( response->flags & ISCSI_DATA_FLAG_OVERFLOW ) { + rsp.overrun = residual_count; + } else if ( response->flags & ISCSI_DATA_FLAG_UNDERFLOW ) { + rsp.overrun = -(residual_count); + } + data_len = ISCSI_DATA_LEN ( response->lengths ); + if ( data_len ) { + scsi_parse_sense ( ( iscsi->rx_buffer + 2 ), ( data_len - 2 ), + &rsp.sense ); + } + iscsi_rx_buffered_data_done ( iscsi ); + + /* Check for errors */ + if ( response->response != ISCSI_RESPONSE_COMMAND_COMPLETE ) + return -EIO; + + /* Mark as completed */ + iscsi_scsi_done ( iscsi, 0, &rsp ); + return 0; +} + +/** + * Receive data segment of an iSCSI data-in PDU + * + * @v iscsi iSCSI session + * @v data Received data + * @v len Length of received data + * @v remaining Data remaining after this data + * @ret rc Return status code + */ +static int iscsi_rx_data_in ( struct iscsi_session *iscsi, + const void *data, size_t len, + size_t remaining ) { + struct iscsi_bhs_data_in *data_in = &iscsi->rx_bhs.data_in; + unsigned long offset; + + /* Copy data to data-in buffer */ + offset = ntohl ( data_in->offset ) + iscsi->rx_offset; + assert ( iscsi->command != NULL ); + assert ( iscsi->command->data_in ); + assert ( ( offset + len ) <= iscsi->command->data_in_len ); + copy_to_user ( iscsi->command->data_in, offset, data, len ); + + /* Wait for whole SCSI response to arrive */ + if ( remaining ) + return 0; + + /* Mark as completed if status is present */ + if ( data_in->flags & ISCSI_DATA_FLAG_STATUS ) { + assert ( ( offset + len ) == iscsi->command->data_in_len ); + assert ( data_in->flags & ISCSI_FLAG_FINAL ); + /* iSCSI cannot return an error status via a data-in */ + iscsi_scsi_done ( iscsi, 0, NULL ); + } + + return 0; +} + +/** + * Receive data segment of an iSCSI R2T PDU + * + * @v iscsi iSCSI session + * @v data Received data + * @v len Length of received data + * @v remaining Data remaining after this data + * @ret rc Return status code + */ +static int iscsi_rx_r2t ( struct iscsi_session *iscsi, + const void *data __unused, size_t len __unused, + size_t remaining __unused ) { + struct iscsi_bhs_r2t *r2t = &iscsi->rx_bhs.r2t; + + /* Record transfer parameters and trigger first data-out */ + iscsi->ttt = ntohl ( r2t->ttt ); + iscsi->transfer_offset = ntohl ( r2t->offset ); + iscsi->transfer_len = ntohl ( r2t->len ); + iscsi_start_data_out ( iscsi, 0 ); + + return 0; +} + +/** + * Build iSCSI data-out BHS + * + * @v iscsi iSCSI session + * @v datasn Data sequence number within the transfer + * + */ +static void iscsi_start_data_out ( struct iscsi_session *iscsi, + unsigned int datasn ) { + struct iscsi_bhs_data_out *data_out = &iscsi->tx_bhs.data_out; + unsigned long offset; + unsigned long remaining; + unsigned long len; + + /* We always send 512-byte Data-Out PDUs; this removes the + * need to worry about the target's MaxRecvDataSegmentLength. + */ + offset = datasn * 512; + remaining = iscsi->transfer_len - offset; + len = remaining; + if ( len > 512 ) + len = 512; + + /* Construct BHS and initiate transmission */ + iscsi_start_tx ( iscsi ); + data_out->opcode = ISCSI_OPCODE_DATA_OUT; + if ( len == remaining ) + data_out->flags = ( ISCSI_FLAG_FINAL ); + ISCSI_SET_LENGTHS ( data_out->lengths, 0, len ); + data_out->lun = iscsi->command->lun; + data_out->itt = htonl ( iscsi->itt ); + data_out->ttt = htonl ( iscsi->ttt ); + data_out->expstatsn = htonl ( iscsi->statsn + 1 ); + data_out->datasn = htonl ( datasn ); + data_out->offset = htonl ( iscsi->transfer_offset + offset ); + DBGC ( iscsi, "iSCSI %p start data out DataSN %#x len %#lx\n", + iscsi, datasn, len ); +} + +/** + * Complete iSCSI data-out PDU transmission + * + * @v iscsi iSCSI session + * + */ +static void iscsi_data_out_done ( struct iscsi_session *iscsi ) { + struct iscsi_bhs_data_out *data_out = &iscsi->tx_bhs.data_out; + + /* If we haven't reached the end of the sequence, start + * sending the next data-out PDU. + */ + if ( ! ( data_out->flags & ISCSI_FLAG_FINAL ) ) + iscsi_start_data_out ( iscsi, ntohl ( data_out->datasn ) + 1 ); +} + +/** + * Send iSCSI data-out data segment + * + * @v iscsi iSCSI session + * @ret rc Return status code + */ +static int iscsi_tx_data_out ( struct iscsi_session *iscsi ) { + struct iscsi_bhs_data_out *data_out = &iscsi->tx_bhs.data_out; + struct io_buffer *iobuf; + unsigned long offset; + size_t len; + size_t pad_len; + + offset = ntohl ( data_out->offset ); + len = ISCSI_DATA_LEN ( data_out->lengths ); + pad_len = ISCSI_DATA_PAD_LEN ( data_out->lengths ); + + assert ( iscsi->command != NULL ); + assert ( iscsi->command->data_out ); + assert ( ( offset + len ) <= iscsi->command->data_out_len ); + + iobuf = xfer_alloc_iob ( &iscsi->socket, ( len + pad_len ) ); + if ( ! iobuf ) + return -ENOMEM; + + copy_from_user ( iob_put ( iobuf, len ), + iscsi->command->data_out, offset, len ); + memset ( iob_put ( iobuf, pad_len ), 0, pad_len ); + + return xfer_deliver_iob ( &iscsi->socket, iobuf ); +} + +/** + * Receive data segment of an iSCSI NOP-In + * + * @v iscsi iSCSI session + * @v data Received data + * @v len Length of received data + * @v remaining Data remaining after this data + * @ret rc Return status code + */ +static int iscsi_rx_nop_in ( struct iscsi_session *iscsi, + const void *data __unused, size_t len __unused, + size_t remaining __unused ) { + struct iscsi_nop_in *nop_in = &iscsi->rx_bhs.nop_in; + + DBGC2 ( iscsi, "iSCSI %p received NOP-In\n", iscsi ); + + /* We don't currently have the ability to respond to NOP-Ins + * sent as ping requests, but we can happily accept NOP-Ins + * sent merely to update CmdSN. + */ + if ( nop_in->ttt != htonl ( ISCSI_TAG_RESERVED ) ) { + DBGC ( iscsi, "iSCSI %p received unsupported NOP-In with TTT " + "%08x\n", iscsi, ntohl ( nop_in->ttt ) ); + return -ENOTSUP_NOP_IN; + } + + return 0; +} + +/**************************************************************************** + * + * iSCSI login + * + */ + +/** + * Build iSCSI login request strings + * + * @v iscsi iSCSI session + * + * These are the initial set of strings sent in the first login + * request PDU. We want the following settings: + * + * HeaderDigest=None + * DataDigest=None + * MaxConnections is irrelevant; we make only one connection anyway [4] + * InitialR2T=Yes [1] + * ImmediateData is irrelevant; we never send immediate data [4] + * MaxRecvDataSegmentLength=8192 (default; we don't care) [3] + * MaxBurstLength=262144 (default; we don't care) [3] + * FirstBurstLength=262144 (default; we don't care) + * DefaultTime2Wait=0 [2] + * DefaultTime2Retain=0 [2] + * MaxOutstandingR2T=1 + * DataPDUInOrder=Yes + * DataSequenceInOrder=Yes + * ErrorRecoveryLevel=0 + * + * [1] InitialR2T has an OR resolution function, so the target may + * force us to use it. We therefore simplify our logic by always + * using it. + * + * [2] These ensure that we can safely start a new task once we have + * reconnected after a failure, without having to manually tidy up + * after the old one. + * + * [3] We are quite happy to use the RFC-defined default values for + * these parameters, but some targets (notably OpenSolaris) + * incorrectly assume a default value of zero, so we explicitly + * specify the default values. + * + * [4] We are quite happy to use the RFC-defined default values for + * these parameters, but some targets (notably a QNAP TS-639Pro) fail + * unless they are supplied, so we explicitly specify the default + * values. + */ +static int iscsi_build_login_request_strings ( struct iscsi_session *iscsi, + void *data, size_t len ) { + unsigned int used = 0; + const char *auth_method; + + if ( iscsi->status & ISCSI_STATUS_STRINGS_SECURITY ) { + /* Default to allowing no authentication */ + auth_method = "None"; + /* If we have a credential to supply, permit CHAP */ + if ( iscsi->initiator_username ) + auth_method = "CHAP,None"; + /* If we have a credential to check, force CHAP */ + if ( iscsi->target_username ) + auth_method = "CHAP"; + used += ssnprintf ( data + used, len - used, + "InitiatorName=%s%c" + "TargetName=%s%c" + "SessionType=Normal%c" + "AuthMethod=%s%c", + iscsi->initiator_iqn, 0, + iscsi->target_iqn, 0, 0, + auth_method, 0 ); + } + + if ( iscsi->status & ISCSI_STATUS_STRINGS_CHAP_ALGORITHM ) { + used += ssnprintf ( data + used, len - used, "CHAP_A=5%c", 0 ); + } + + if ( ( iscsi->status & ISCSI_STATUS_STRINGS_CHAP_RESPONSE ) ) { + char buf[ base16_encoded_len ( iscsi->chap.response_len ) + 1 ]; + assert ( iscsi->initiator_username != NULL ); + base16_encode ( iscsi->chap.response, iscsi->chap.response_len, + buf ); + used += ssnprintf ( data + used, len - used, + "CHAP_N=%s%cCHAP_R=0x%s%c", + iscsi->initiator_username, 0, buf, 0 ); + } + + if ( ( iscsi->status & ISCSI_STATUS_STRINGS_CHAP_CHALLENGE ) ) { + size_t challenge_len = ( sizeof ( iscsi->chap_challenge ) - 1 ); + char buf[ base16_encoded_len ( challenge_len ) + 1 ]; + base16_encode ( ( iscsi->chap_challenge + 1 ), challenge_len, + buf ); + used += ssnprintf ( data + used, len - used, + "CHAP_I=%d%cCHAP_C=0x%s%c", + iscsi->chap_challenge[0], 0, buf, 0 ); + } + + if ( iscsi->status & ISCSI_STATUS_STRINGS_OPERATIONAL ) { + used += ssnprintf ( data + used, len - used, + "HeaderDigest=None%c" + "DataDigest=None%c" + "MaxConnections=1%c" + "InitialR2T=Yes%c" + "ImmediateData=No%c" + "MaxRecvDataSegmentLength=8192%c" + "MaxBurstLength=262144%c" + "DefaultTime2Wait=0%c" + "DefaultTime2Retain=0%c" + "MaxOutstandingR2T=1%c" + "DataPDUInOrder=Yes%c" + "DataSequenceInOrder=Yes%c" + "ErrorRecoveryLevel=0%c", + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ); + } + + return used; +} + +/** + * Build iSCSI login request BHS + * + * @v iscsi iSCSI session + */ +static void iscsi_start_login ( struct iscsi_session *iscsi ) { + struct iscsi_bhs_login_request *request = &iscsi->tx_bhs.login_request; + int len; + + switch ( iscsi->status & ISCSI_LOGIN_CSG_MASK ) { + case ISCSI_LOGIN_CSG_SECURITY_NEGOTIATION: + DBGC ( iscsi, "iSCSI %p entering security negotiation\n", + iscsi ); + break; + case ISCSI_LOGIN_CSG_OPERATIONAL_NEGOTIATION: + DBGC ( iscsi, "iSCSI %p entering operational negotiation\n", + iscsi ); + break; + default: + assert ( 0 ); + } + + /* Construct BHS and initiate transmission */ + iscsi_start_tx ( iscsi ); + request->opcode = ( ISCSI_OPCODE_LOGIN_REQUEST | + ISCSI_FLAG_IMMEDIATE ); + request->flags = ( ( iscsi->status & ISCSI_STATUS_PHASE_MASK ) | + ISCSI_LOGIN_FLAG_TRANSITION ); + /* version_max and version_min left as zero */ + len = iscsi_build_login_request_strings ( iscsi, NULL, 0 ); + ISCSI_SET_LENGTHS ( request->lengths, 0, len ); + request->isid_iana_en = htonl ( ISCSI_ISID_IANA | + IANA_EN_FEN_SYSTEMS ); + request->isid_iana_qual = htons ( iscsi->isid_iana_qual ); + /* tsih left as zero */ + request->itt = htonl ( iscsi->itt ); + /* cid left as zero */ + request->cmdsn = htonl ( iscsi->cmdsn ); + request->expstatsn = htonl ( iscsi->statsn + 1 ); +} + +/** + * Complete iSCSI login request PDU transmission + * + * @v iscsi iSCSI session + * + */ +static void iscsi_login_request_done ( struct iscsi_session *iscsi ) { + + /* Clear any "strings to send" flags */ + iscsi->status &= ~ISCSI_STATUS_STRINGS_MASK; + + /* Free any dynamically allocated storage used for login */ + chap_finish ( &iscsi->chap ); +} + +/** + * Transmit data segment of an iSCSI login request PDU + * + * @v iscsi iSCSI session + * @ret rc Return status code + * + * For login requests, the data segment consists of the login strings. + */ +static int iscsi_tx_login_request ( struct iscsi_session *iscsi ) { + struct iscsi_bhs_login_request *request = &iscsi->tx_bhs.login_request; + struct io_buffer *iobuf; + size_t len; + size_t pad_len; + + len = ISCSI_DATA_LEN ( request->lengths ); + pad_len = ISCSI_DATA_PAD_LEN ( request->lengths ); + iobuf = xfer_alloc_iob ( &iscsi->socket, ( len + pad_len ) ); + if ( ! iobuf ) + return -ENOMEM; + iob_put ( iobuf, len ); + iscsi_build_login_request_strings ( iscsi, iobuf->data, len ); + memset ( iob_put ( iobuf, pad_len ), 0, pad_len ); + + return xfer_deliver_iob ( &iscsi->socket, iobuf ); +} + +/** + * Calculate maximum length of decoded large binary value + * + * @v encoded Encoded large binary value + * @v max_raw_len Maximum length of raw data + */ +static inline size_t +iscsi_large_binary_decoded_max_len ( const char *encoded ) { + return ( strlen ( encoded ) ); /* Decoding never expands data */ +} + +/** + * Decode large binary value + * + * @v encoded Encoded large binary value + * @v raw Raw data + * @ret len Length of raw data, or negative error + */ +static int iscsi_large_binary_decode ( const char *encoded, uint8_t *raw ) { + + if ( encoded[0] != '0' ) + return -EPROTO_INVALID_LARGE_BINARY; + + switch ( encoded[1] ) { + case 'x' : + case 'X' : + return base16_decode ( ( encoded + 2 ), raw ); + case 'b' : + case 'B' : + return base64_decode ( ( encoded + 2 ), raw ); + default: + return -EPROTO_INVALID_LARGE_BINARY; + } +} + +/** + * Handle iSCSI TargetAddress text value + * + * @v iscsi iSCSI session + * @v value TargetAddress value + * @ret rc Return status code + */ +static int iscsi_handle_targetaddress_value ( struct iscsi_session *iscsi, + const char *value ) { + char *separator; + + DBGC ( iscsi, "iSCSI %p will redirect to %s\n", iscsi, value ); + + /* Replace target address */ + free ( iscsi->target_address ); + iscsi->target_address = strdup ( value ); + if ( ! iscsi->target_address ) + return -ENOMEM; + + /* Replace target port */ + iscsi->target_port = htons ( ISCSI_PORT ); + separator = strchr ( iscsi->target_address, ':' ); + if ( separator ) { + *separator = '\0'; + iscsi->target_port = strtoul ( ( separator + 1 ), NULL, 0 ); + } + + return 0; +} + +/** + * Handle iSCSI AuthMethod text value + * + * @v iscsi iSCSI session + * @v value AuthMethod value + * @ret rc Return status code + */ +static int iscsi_handle_authmethod_value ( struct iscsi_session *iscsi, + const char *value ) { + + /* If server requests CHAP, send the CHAP_A string */ + if ( strcmp ( value, "CHAP" ) == 0 ) { + DBGC ( iscsi, "iSCSI %p initiating CHAP authentication\n", + iscsi ); + iscsi->status |= ( ISCSI_STATUS_STRINGS_CHAP_ALGORITHM | + ISCSI_STATUS_AUTH_FORWARD_REQUIRED ); + } + + return 0; +} + +/** + * Handle iSCSI CHAP_A text value + * + * @v iscsi iSCSI session + * @v value CHAP_A value + * @ret rc Return status code + */ +static int iscsi_handle_chap_a_value ( struct iscsi_session *iscsi, + const char *value ) { + + /* We only ever offer "5" (i.e. MD5) as an algorithm, so if + * the server responds with anything else it is a protocol + * violation. + */ + if ( strcmp ( value, "5" ) != 0 ) { + DBGC ( iscsi, "iSCSI %p got invalid CHAP algorithm \"%s\"\n", + iscsi, value ); + return -EPROTO_INVALID_CHAP_ALGORITHM; + } + + return 0; +} + +/** + * Handle iSCSI CHAP_I text value + * + * @v iscsi iSCSI session + * @v value CHAP_I value + * @ret rc Return status code + */ +static int iscsi_handle_chap_i_value ( struct iscsi_session *iscsi, + const char *value ) { + unsigned int identifier; + char *endp; + int rc; + + /* The CHAP identifier is an integer value */ + identifier = strtoul ( value, &endp, 0 ); + if ( *endp != '\0' ) { + DBGC ( iscsi, "iSCSI %p saw invalid CHAP identifier \"%s\"\n", + iscsi, value ); + return -EPROTO_INVALID_CHAP_IDENTIFIER; + } + + /* Prepare for CHAP with MD5 */ + chap_finish ( &iscsi->chap ); + if ( ( rc = chap_init ( &iscsi->chap, &md5_algorithm ) ) != 0 ) { + DBGC ( iscsi, "iSCSI %p could not initialise CHAP: %s\n", + iscsi, strerror ( rc ) ); + return rc; + } + + /* Identifier and secret are the first two components of the + * challenge. + */ + chap_set_identifier ( &iscsi->chap, identifier ); + if ( iscsi->initiator_password ) { + chap_update ( &iscsi->chap, iscsi->initiator_password, + strlen ( iscsi->initiator_password ) ); + } + + return 0; +} + +/** + * Handle iSCSI CHAP_C text value + * + * @v iscsi iSCSI session + * @v value CHAP_C value + * @ret rc Return status code + */ +static int iscsi_handle_chap_c_value ( struct iscsi_session *iscsi, + const char *value ) { + uint8_t buf[ iscsi_large_binary_decoded_max_len ( value ) ]; + unsigned int i; + size_t len; + int rc; + + /* Process challenge */ + rc = iscsi_large_binary_decode ( value, buf ); + if ( rc < 0 ) { + DBGC ( iscsi, "iSCSI %p invalid CHAP challenge \"%s\": %s\n", + iscsi, value, strerror ( rc ) ); + return rc; + } + len = rc; + chap_update ( &iscsi->chap, buf, len ); + + /* Build CHAP response */ + DBGC ( iscsi, "iSCSI %p sending CHAP response\n", iscsi ); + chap_respond ( &iscsi->chap ); + iscsi->status |= ISCSI_STATUS_STRINGS_CHAP_RESPONSE; + + /* Send CHAP challenge, if applicable */ + if ( iscsi->target_username ) { + iscsi->status |= ISCSI_STATUS_STRINGS_CHAP_CHALLENGE; + /* Generate CHAP challenge data */ + for ( i = 0 ; i < sizeof ( iscsi->chap_challenge ) ; i++ ) { + iscsi->chap_challenge[i] = random(); + } + } + + return 0; +} + +/** + * Handle iSCSI CHAP_N text value + * + * @v iscsi iSCSI session + * @v value CHAP_N value + * @ret rc Return status code + */ +static int iscsi_handle_chap_n_value ( struct iscsi_session *iscsi, + const char *value ) { + + /* The target username isn't actually involved at any point in + * the authentication process; it merely serves to identify + * which password the target is using to generate the CHAP + * response. We unnecessarily verify that the username is as + * expected, in order to provide mildly helpful diagnostics if + * the target is supplying the wrong username/password + * combination. + */ + if ( iscsi->target_username && + ( strcmp ( iscsi->target_username, value ) != 0 ) ) { + DBGC ( iscsi, "iSCSI %p target username \"%s\" incorrect " + "(wanted \"%s\")\n", + iscsi, value, iscsi->target_username ); + return -EACCES_INCORRECT_TARGET_USERNAME; + } + + return 0; +} + +/** + * Handle iSCSI CHAP_R text value + * + * @v iscsi iSCSI session + * @v value CHAP_R value + * @ret rc Return status code + */ +static int iscsi_handle_chap_r_value ( struct iscsi_session *iscsi, + const char *value ) { + uint8_t buf[ iscsi_large_binary_decoded_max_len ( value ) ]; + size_t len; + int rc; + + /* Generate CHAP response for verification */ + chap_finish ( &iscsi->chap ); + if ( ( rc = chap_init ( &iscsi->chap, &md5_algorithm ) ) != 0 ) { + DBGC ( iscsi, "iSCSI %p could not initialise CHAP: %s\n", + iscsi, strerror ( rc ) ); + return rc; + } + chap_set_identifier ( &iscsi->chap, iscsi->chap_challenge[0] ); + if ( iscsi->target_password ) { + chap_update ( &iscsi->chap, iscsi->target_password, + strlen ( iscsi->target_password ) ); + } + chap_update ( &iscsi->chap, &iscsi->chap_challenge[1], + ( sizeof ( iscsi->chap_challenge ) - 1 ) ); + chap_respond ( &iscsi->chap ); + + /* Process response */ + rc = iscsi_large_binary_decode ( value, buf ); + if ( rc < 0 ) { + DBGC ( iscsi, "iSCSI %p invalid CHAP response \"%s\": %s\n", + iscsi, value, strerror ( rc ) ); + return rc; + } + len = rc; + + /* Check CHAP response */ + if ( len != iscsi->chap.response_len ) { + DBGC ( iscsi, "iSCSI %p invalid CHAP response length\n", + iscsi ); + return -EPROTO_INVALID_CHAP_RESPONSE; + } + if ( memcmp ( buf, iscsi->chap.response, len ) != 0 ) { + DBGC ( iscsi, "iSCSI %p incorrect CHAP response \"%s\"\n", + iscsi, value ); + return -EACCES_INCORRECT_TARGET_PASSWORD; + } + + /* Mark session as authenticated */ + iscsi->status |= ISCSI_STATUS_AUTH_REVERSE_OK; + + return 0; +} + +/** An iSCSI text string that we want to handle */ +struct iscsi_string_type { + /** String key + * + * This is the portion preceding the "=" sign, + * e.g. "InitiatorName", "CHAP_A", etc. + */ + const char *key; + /** Handle iSCSI string value + * + * @v iscsi iSCSI session + * @v value iSCSI string value + * @ret rc Return status code + */ + int ( * handle ) ( struct iscsi_session *iscsi, const char *value ); +}; + +/** iSCSI text strings that we want to handle */ +static struct iscsi_string_type iscsi_string_types[] = { + { "TargetAddress", iscsi_handle_targetaddress_value }, + { "AuthMethod", iscsi_handle_authmethod_value }, + { "CHAP_A", iscsi_handle_chap_a_value }, + { "CHAP_I", iscsi_handle_chap_i_value }, + { "CHAP_C", iscsi_handle_chap_c_value }, + { "CHAP_N", iscsi_handle_chap_n_value }, + { "CHAP_R", iscsi_handle_chap_r_value }, + { NULL, NULL } +}; + +/** + * Handle iSCSI string + * + * @v iscsi iSCSI session + * @v string iSCSI string (in "key=value" format) + * @ret rc Return status code + */ +static int iscsi_handle_string ( struct iscsi_session *iscsi, + const char *string ) { + struct iscsi_string_type *type; + const char *separator; + const char *value; + size_t key_len; + int rc; + + /* Find separator */ + separator = strchr ( string, '=' ); + if ( ! separator ) { + DBGC ( iscsi, "iSCSI %p malformed string %s\n", + iscsi, string ); + return -EPROTO_INVALID_KEY_VALUE_PAIR; + } + key_len = ( separator - string ); + value = ( separator + 1 ); + + /* Check for rejections. Since we send only non-rejectable + * values, any rejection is a fatal protocol error. + */ + if ( strcmp ( value, "Reject" ) == 0 ) { + DBGC ( iscsi, "iSCSI %p rejection: %s\n", iscsi, string ); + return -EPROTO_VALUE_REJECTED; + } + + /* Handle key/value pair */ + for ( type = iscsi_string_types ; type->key ; type++ ) { + if ( strncmp ( string, type->key, key_len ) != 0 ) + continue; + DBGC ( iscsi, "iSCSI %p handling %s\n", iscsi, string ); + if ( ( rc = type->handle ( iscsi, value ) ) != 0 ) { + DBGC ( iscsi, "iSCSI %p could not handle %s: %s\n", + iscsi, string, strerror ( rc ) ); + return rc; + } + return 0; + } + DBGC ( iscsi, "iSCSI %p ignoring %s\n", iscsi, string ); + return 0; +} + +/** + * Handle iSCSI strings + * + * @v iscsi iSCSI session + * @v string iSCSI string buffer + * @v len Length of string buffer + * @ret rc Return status code + */ +static int iscsi_handle_strings ( struct iscsi_session *iscsi, + const char *strings, size_t len ) { + size_t string_len; + int rc; + + /* Handle each string in turn, taking care not to overrun the + * data buffer in case of badly-terminated data. + */ + while ( 1 ) { + string_len = ( strnlen ( strings, len ) + 1 ); + if ( string_len > len ) + break; + if ( ( rc = iscsi_handle_string ( iscsi, strings ) ) != 0 ) + return rc; + strings += string_len; + len -= string_len; + } + return 0; +} + +/** + * Convert iSCSI response status to return status code + * + * @v status_class iSCSI status class + * @v status_detail iSCSI status detail + * @ret rc Return status code + */ +static int iscsi_status_to_rc ( unsigned int status_class, + unsigned int status_detail ) { + switch ( status_class ) { + case ISCSI_STATUS_INITIATOR_ERROR : + switch ( status_detail ) { + case ISCSI_STATUS_INITIATOR_ERROR_AUTHENTICATION : + return -EPERM_INITIATOR_AUTHENTICATION; + case ISCSI_STATUS_INITIATOR_ERROR_AUTHORISATION : + return -EPERM_INITIATOR_AUTHORISATION; + case ISCSI_STATUS_INITIATOR_ERROR_NOT_FOUND : + case ISCSI_STATUS_INITIATOR_ERROR_REMOVED : + return -ENODEV; + default : + return -ENOTSUP_INITIATOR_STATUS; + } + case ISCSI_STATUS_TARGET_ERROR : + switch ( status_detail ) { + case ISCSI_STATUS_TARGET_ERROR_UNAVAILABLE: + return -EIO_TARGET_UNAVAILABLE; + case ISCSI_STATUS_TARGET_ERROR_NO_RESOURCES: + return -EIO_TARGET_NO_RESOURCES; + default: + return -ENOTSUP_TARGET_STATUS; + } + default : + return -EINVAL; + } +} + +/** + * Receive data segment of an iSCSI login response PDU + * + * @v iscsi iSCSI session + * @v data Received data + * @v len Length of received data + * @v remaining Data remaining after this data + * @ret rc Return status code + */ +static int iscsi_rx_login_response ( struct iscsi_session *iscsi, + const void *data, size_t len, + size_t remaining ) { + struct iscsi_bhs_login_response *response + = &iscsi->rx_bhs.login_response; + int rc; + + /* Buffer up the PDU data */ + if ( ( rc = iscsi_rx_buffered_data ( iscsi, data, len ) ) != 0 ) { + DBGC ( iscsi, "iSCSI %p could not buffer login response: %s\n", + iscsi, strerror ( rc ) ); + return rc; + } + if ( remaining ) + return 0; + + /* Process string data and discard string buffer */ + if ( ( rc = iscsi_handle_strings ( iscsi, iscsi->rx_buffer, + iscsi->rx_len ) ) != 0 ) + return rc; + iscsi_rx_buffered_data_done ( iscsi ); + + /* Check for login redirection */ + if ( response->status_class == ISCSI_STATUS_REDIRECT ) { + DBGC ( iscsi, "iSCSI %p redirecting to new server\n", iscsi ); + iscsi_close_connection ( iscsi, 0 ); + if ( ( rc = iscsi_open_connection ( iscsi ) ) != 0 ) { + DBGC ( iscsi, "iSCSI %p could not redirect: %s\n ", + iscsi, strerror ( rc ) ); + return rc; + } + return 0; + } + + /* Check for fatal errors */ + if ( response->status_class != 0 ) { + DBGC ( iscsi, "iSCSI login failure: class %02x detail %02x\n", + response->status_class, response->status_detail ); + rc = iscsi_status_to_rc ( response->status_class, + response->status_detail ); + return rc; + } + + /* Handle login transitions */ + if ( response->flags & ISCSI_LOGIN_FLAG_TRANSITION ) { + iscsi->status &= ~( ISCSI_STATUS_PHASE_MASK | + ISCSI_STATUS_STRINGS_MASK ); + switch ( response->flags & ISCSI_LOGIN_NSG_MASK ) { + case ISCSI_LOGIN_NSG_OPERATIONAL_NEGOTIATION: + iscsi->status |= + ( ISCSI_STATUS_OPERATIONAL_NEGOTIATION_PHASE | + ISCSI_STATUS_STRINGS_OPERATIONAL ); + break; + case ISCSI_LOGIN_NSG_FULL_FEATURE_PHASE: + iscsi->status |= ISCSI_STATUS_FULL_FEATURE_PHASE; + break; + default: + DBGC ( iscsi, "iSCSI %p got invalid response flags " + "%02x\n", iscsi, response->flags ); + return -EIO; + } + } + + /* Send next login request PDU if we haven't reached the full + * feature phase yet. + */ + if ( ( iscsi->status & ISCSI_STATUS_PHASE_MASK ) != + ISCSI_STATUS_FULL_FEATURE_PHASE ) { + iscsi_start_login ( iscsi ); + return 0; + } + + /* Check that target authentication was successful (if required) */ + if ( ( iscsi->status & ISCSI_STATUS_AUTH_REVERSE_REQUIRED ) && + ! ( iscsi->status & ISCSI_STATUS_AUTH_REVERSE_OK ) ) { + DBGC ( iscsi, "iSCSI %p nefarious target tried to bypass " + "authentication\n", iscsi ); + return -EPROTO; + } + + /* Notify SCSI layer of window change */ + DBGC ( iscsi, "iSCSI %p entering full feature phase\n", iscsi ); + xfer_window_changed ( &iscsi->control ); + + return 0; +} + +/**************************************************************************** + * + * iSCSI to socket interface + * + */ + +/** + * Pause TX engine + * + * @v iscsi iSCSI session + */ +static void iscsi_tx_pause ( struct iscsi_session *iscsi ) { + process_del ( &iscsi->process ); +} + +/** + * Resume TX engine + * + * @v iscsi iSCSI session + */ +static void iscsi_tx_resume ( struct iscsi_session *iscsi ) { + process_add ( &iscsi->process ); +} + +/** + * Start up a new TX PDU + * + * @v iscsi iSCSI session + * + * This initiates the process of sending a new PDU. Only one PDU may + * be in transit at any one time. + */ +static void iscsi_start_tx ( struct iscsi_session *iscsi ) { + + assert ( iscsi->tx_state == ISCSI_TX_IDLE ); + + /* Initialise TX BHS */ + memset ( &iscsi->tx_bhs, 0, sizeof ( iscsi->tx_bhs ) ); + + /* Flag TX engine to start transmitting */ + iscsi->tx_state = ISCSI_TX_BHS; + + /* Start transmission process */ + iscsi_tx_resume ( iscsi ); +} + +/** + * Transmit nothing + * + * @v iscsi iSCSI session + * @ret rc Return status code + */ +static int iscsi_tx_nothing ( struct iscsi_session *iscsi __unused ) { + return 0; +} + +/** + * Transmit basic header segment of an iSCSI PDU + * + * @v iscsi iSCSI session + * @ret rc Return status code + */ +static int iscsi_tx_bhs ( struct iscsi_session *iscsi ) { + return xfer_deliver_raw ( &iscsi->socket, &iscsi->tx_bhs, + sizeof ( iscsi->tx_bhs ) ); +} + +/** + * Transmit data segment of an iSCSI PDU + * + * @v iscsi iSCSI session + * @ret rc Return status code + * + * Handle transmission of part of a PDU data segment. iscsi::tx_bhs + * will be valid when this is called. + */ +static int iscsi_tx_data ( struct iscsi_session *iscsi ) { + struct iscsi_bhs_common *common = &iscsi->tx_bhs.common; + + switch ( common->opcode & ISCSI_OPCODE_MASK ) { + case ISCSI_OPCODE_DATA_OUT: + return iscsi_tx_data_out ( iscsi ); + case ISCSI_OPCODE_LOGIN_REQUEST: + return iscsi_tx_login_request ( iscsi ); + default: + /* Nothing to send in other states */ + return 0; + } +} + +/** + * Complete iSCSI PDU transmission + * + * @v iscsi iSCSI session + * + * Called when a PDU has been completely transmitted and the TX state + * machine is about to enter the idle state. iscsi::tx_bhs will be + * valid for the just-completed PDU when this is called. + */ +static void iscsi_tx_done ( struct iscsi_session *iscsi ) { + struct iscsi_bhs_common *common = &iscsi->tx_bhs.common; + + /* Stop transmission process */ + iscsi_tx_pause ( iscsi ); + + switch ( common->opcode & ISCSI_OPCODE_MASK ) { + case ISCSI_OPCODE_DATA_OUT: + iscsi_data_out_done ( iscsi ); + case ISCSI_OPCODE_LOGIN_REQUEST: + iscsi_login_request_done ( iscsi ); + default: + /* No action */ + break; + } +} + +/** + * Transmit iSCSI PDU + * + * @v iscsi iSCSI session + * @v buf Temporary data buffer + * @v len Length of temporary data buffer + * + * Constructs data to be sent for the current TX state + */ +static void iscsi_tx_step ( struct iscsi_session *iscsi ) { + struct iscsi_bhs_common *common = &iscsi->tx_bhs.common; + int ( * tx ) ( struct iscsi_session *iscsi ); + enum iscsi_tx_state next_state; + size_t tx_len; + int rc; + + /* Select fragment to transmit */ + while ( 1 ) { + switch ( iscsi->tx_state ) { + case ISCSI_TX_BHS: + tx = iscsi_tx_bhs; + tx_len = sizeof ( iscsi->tx_bhs ); + next_state = ISCSI_TX_AHS; + break; + case ISCSI_TX_AHS: + tx = iscsi_tx_nothing; + tx_len = 0; + next_state = ISCSI_TX_DATA; + break; + case ISCSI_TX_DATA: + tx = iscsi_tx_data; + tx_len = ISCSI_DATA_LEN ( common->lengths ); + next_state = ISCSI_TX_IDLE; + break; + case ISCSI_TX_IDLE: + /* Nothing to do; pause processing */ + iscsi_tx_pause ( iscsi ); + return; + default: + assert ( 0 ); + return; + } + + /* Check for window availability, if needed */ + if ( tx_len && ( xfer_window ( &iscsi->socket ) == 0 ) ) { + /* Cannot transmit at this point; pause + * processing and wait for window to reopen + */ + iscsi_tx_pause ( iscsi ); + return; + } + + /* Transmit data */ + if ( ( rc = tx ( iscsi ) ) != 0 ) { + DBGC ( iscsi, "iSCSI %p could not transmit: %s\n", + iscsi, strerror ( rc ) ); + /* Transmission errors are fatal */ + iscsi_close ( iscsi, rc ); + return; + } + + /* Move to next state */ + iscsi->tx_state = next_state; + + /* If we have moved to the idle state, mark + * transmission as complete + */ + if ( iscsi->tx_state == ISCSI_TX_IDLE ) + iscsi_tx_done ( iscsi ); + } +} + +/** iSCSI TX process descriptor */ +static struct process_descriptor iscsi_process_desc = + PROC_DESC ( struct iscsi_session, process, iscsi_tx_step ); + +/** + * Receive basic header segment of an iSCSI PDU + * + * @v iscsi iSCSI session + * @v data Received data + * @v len Length of received data + * @v remaining Data remaining after this data + * @ret rc Return status code + * + * This fills in iscsi::rx_bhs with the data from the BHS portion of + * the received PDU. + */ +static int iscsi_rx_bhs ( struct iscsi_session *iscsi, const void *data, + size_t len, size_t remaining __unused ) { + memcpy ( &iscsi->rx_bhs.bytes[iscsi->rx_offset], data, len ); + if ( ( iscsi->rx_offset + len ) >= sizeof ( iscsi->rx_bhs ) ) { + DBGC2 ( iscsi, "iSCSI %p received PDU opcode %#x len %#x\n", + iscsi, iscsi->rx_bhs.common.opcode, + ISCSI_DATA_LEN ( iscsi->rx_bhs.common.lengths ) ); + } + return 0; +} + +/** + * Discard portion of an iSCSI PDU. + * + * @v iscsi iSCSI session + * @v data Received data + * @v len Length of received data + * @v remaining Data remaining after this data + * @ret rc Return status code + * + * This discards data from a portion of a received PDU. + */ +static int iscsi_rx_discard ( struct iscsi_session *iscsi __unused, + const void *data __unused, size_t len __unused, + size_t remaining __unused ) { + /* Do nothing */ + return 0; +} + +/** + * Receive data segment of an iSCSI PDU + * + * @v iscsi iSCSI session + * @v data Received data + * @v len Length of received data + * @v remaining Data remaining after this data + * @ret rc Return status code + * + * Handle processing of part of a PDU data segment. iscsi::rx_bhs + * will be valid when this is called. + */ +static int iscsi_rx_data ( struct iscsi_session *iscsi, const void *data, + size_t len, size_t remaining ) { + struct iscsi_bhs_common_response *response + = &iscsi->rx_bhs.common_response; + + /* Update cmdsn and statsn */ + iscsi->cmdsn = ntohl ( response->expcmdsn ); + iscsi->statsn = ntohl ( response->statsn ); + + switch ( response->opcode & ISCSI_OPCODE_MASK ) { + case ISCSI_OPCODE_LOGIN_RESPONSE: + return iscsi_rx_login_response ( iscsi, data, len, remaining ); + case ISCSI_OPCODE_SCSI_RESPONSE: + return iscsi_rx_scsi_response ( iscsi, data, len, remaining ); + case ISCSI_OPCODE_DATA_IN: + return iscsi_rx_data_in ( iscsi, data, len, remaining ); + case ISCSI_OPCODE_R2T: + return iscsi_rx_r2t ( iscsi, data, len, remaining ); + case ISCSI_OPCODE_NOP_IN: + return iscsi_rx_nop_in ( iscsi, data, len, remaining ); + default: + if ( remaining ) + return 0; + DBGC ( iscsi, "iSCSI %p unknown opcode %02x\n", iscsi, + response->opcode ); + return -ENOTSUP_OPCODE; + } +} + +/** + * Receive new data + * + * @v iscsi iSCSI session + * @v iobuf I/O buffer + * @v meta Data transfer metadata + * @ret rc Return status code + * + * This handles received PDUs. The receive strategy is to fill in + * iscsi::rx_bhs with the contents of the BHS portion of the PDU, + * throw away any AHS portion, and then process each part of the data + * portion as it arrives. The data processing routine therefore + * always has a full copy of the BHS available, even for portions of + * the data in different packets to the BHS. + */ +static int iscsi_socket_deliver ( struct iscsi_session *iscsi, + struct io_buffer *iobuf, + struct xfer_metadata *meta __unused ) { + struct iscsi_bhs_common *common = &iscsi->rx_bhs.common; + int ( * rx ) ( struct iscsi_session *iscsi, const void *data, + size_t len, size_t remaining ); + enum iscsi_rx_state next_state; + size_t frag_len; + size_t remaining; + int rc; + + while ( 1 ) { + switch ( iscsi->rx_state ) { + case ISCSI_RX_BHS: + rx = iscsi_rx_bhs; + iscsi->rx_len = sizeof ( iscsi->rx_bhs ); + next_state = ISCSI_RX_AHS; + break; + case ISCSI_RX_AHS: + rx = iscsi_rx_discard; + iscsi->rx_len = 4 * ISCSI_AHS_LEN ( common->lengths ); + next_state = ISCSI_RX_DATA; + break; + case ISCSI_RX_DATA: + rx = iscsi_rx_data; + iscsi->rx_len = ISCSI_DATA_LEN ( common->lengths ); + next_state = ISCSI_RX_DATA_PADDING; + break; + case ISCSI_RX_DATA_PADDING: + rx = iscsi_rx_discard; + iscsi->rx_len = ISCSI_DATA_PAD_LEN ( common->lengths ); + next_state = ISCSI_RX_BHS; + break; + default: + assert ( 0 ); + rc = -EINVAL; + goto done; + } + + frag_len = iscsi->rx_len - iscsi->rx_offset; + if ( frag_len > iob_len ( iobuf ) ) + frag_len = iob_len ( iobuf ); + remaining = iscsi->rx_len - iscsi->rx_offset - frag_len; + if ( ( rc = rx ( iscsi, iobuf->data, frag_len, + remaining ) ) != 0 ) { + DBGC ( iscsi, "iSCSI %p could not process received " + "data: %s\n", iscsi, strerror ( rc ) ); + goto done; + } + + iscsi->rx_offset += frag_len; + iob_pull ( iobuf, frag_len ); + + /* If all the data for this state has not yet been + * received, stay in this state for now. + */ + if ( iscsi->rx_offset != iscsi->rx_len ) { + rc = 0; + goto done; + } + + iscsi->rx_state = next_state; + iscsi->rx_offset = 0; + } + + done: + /* Free I/O buffer */ + free_iob ( iobuf ); + + /* Destroy session on error */ + if ( rc != 0 ) + iscsi_close ( iscsi, rc ); + + return rc; +} + +/** + * Handle redirection event + * + * @v iscsi iSCSI session + * @v type Location type + * @v args Remaining arguments depend upon location type + * @ret rc Return status code + */ +static int iscsi_vredirect ( struct iscsi_session *iscsi, int type, + va_list args ) { + va_list tmp; + struct sockaddr *peer; + + /* Intercept redirects to a LOCATION_SOCKET and record the IP + * address for the iBFT. This is a bit of a hack, but avoids + * inventing an ioctl()-style call to retrieve the socket + * address from a data-xfer interface. + */ + if ( type == LOCATION_SOCKET ) { + va_copy ( tmp, args ); + ( void ) va_arg ( tmp, int ); /* Discard "semantics" */ + peer = va_arg ( tmp, struct sockaddr * ); + memcpy ( &iscsi->target_sockaddr, peer, + sizeof ( iscsi->target_sockaddr ) ); + va_end ( tmp ); + } + + return xfer_vreopen ( &iscsi->socket, type, args ); +} + +/** iSCSI socket interface operations */ +static struct interface_operation iscsi_socket_operations[] = { + INTF_OP ( xfer_deliver, struct iscsi_session *, iscsi_socket_deliver ), + INTF_OP ( xfer_window_changed, struct iscsi_session *, + iscsi_tx_resume ), + INTF_OP ( xfer_vredirect, struct iscsi_session *, iscsi_vredirect ), + INTF_OP ( intf_close, struct iscsi_session *, iscsi_close ), +}; + +/** iSCSI socket interface descriptor */ +static struct interface_descriptor iscsi_socket_desc = + INTF_DESC ( struct iscsi_session, socket, iscsi_socket_operations ); + +/**************************************************************************** + * + * iSCSI command issuing + * + */ + +/** + * Check iSCSI flow-control window + * + * @v iscsi iSCSI session + * @ret len Length of window + */ +static size_t iscsi_scsi_window ( struct iscsi_session *iscsi ) { + + if ( ( ( iscsi->status & ISCSI_STATUS_PHASE_MASK ) == + ISCSI_STATUS_FULL_FEATURE_PHASE ) && + ( iscsi->command == NULL ) ) { + /* We cannot handle concurrent commands */ + return 1; + } else { + return 0; + } +} + +/** + * Issue iSCSI SCSI command + * + * @v iscsi iSCSI session + * @v parent Parent interface + * @v command SCSI command + * @ret tag Command tag, or negative error + */ +static int iscsi_scsi_command ( struct iscsi_session *iscsi, + struct interface *parent, + struct scsi_cmd *command ) { + + /* This iSCSI implementation cannot handle multiple concurrent + * commands or commands arriving before login is complete. + */ + if ( iscsi_scsi_window ( iscsi ) == 0 ) { + DBGC ( iscsi, "iSCSI %p cannot handle concurrent commands\n", + iscsi ); + return -EOPNOTSUPP; + } + + /* Store command */ + iscsi->command = malloc ( sizeof ( *command ) ); + if ( ! iscsi->command ) + return -ENOMEM; + memcpy ( iscsi->command, command, sizeof ( *command ) ); + + /* Assign new ITT */ + iscsi_new_itt ( iscsi ); + + /* Start sending command */ + iscsi_start_command ( iscsi ); + + /* Attach to parent interface and return */ + intf_plug_plug ( &iscsi->data, parent ); + return iscsi->itt; +} + +/** iSCSI SCSI command-issuing interface operations */ +static struct interface_operation iscsi_control_op[] = { + INTF_OP ( scsi_command, struct iscsi_session *, iscsi_scsi_command ), + INTF_OP ( xfer_window, struct iscsi_session *, iscsi_scsi_window ), + INTF_OP ( intf_close, struct iscsi_session *, iscsi_close ), + INTF_OP ( acpi_describe, struct iscsi_session *, ibft_describe ), +}; + +/** iSCSI SCSI command-issuing interface descriptor */ +static struct interface_descriptor iscsi_control_desc = + INTF_DESC ( struct iscsi_session, control, iscsi_control_op ); + +/** + * Close iSCSI command + * + * @v iscsi iSCSI session + * @v rc Reason for close + */ +static void iscsi_command_close ( struct iscsi_session *iscsi, int rc ) { + + /* Restart interface */ + intf_restart ( &iscsi->data, rc ); + + /* Treat unsolicited command closures mid-command as fatal, + * because we have no code to handle partially-completed PDUs. + */ + if ( iscsi->command != NULL ) + iscsi_close ( iscsi, ( ( rc == 0 ) ? -ECANCELED : rc ) ); +} + +/** iSCSI SCSI command interface operations */ +static struct interface_operation iscsi_data_op[] = { + INTF_OP ( intf_close, struct iscsi_session *, iscsi_command_close ), +}; + +/** iSCSI SCSI command interface descriptor */ +static struct interface_descriptor iscsi_data_desc = + INTF_DESC ( struct iscsi_session, data, iscsi_data_op ); + +/**************************************************************************** + * + * Instantiator + * + */ + +/** iSCSI root path components (as per RFC4173) */ +enum iscsi_root_path_component { + RP_SERVERNAME = 0, + RP_PROTOCOL, + RP_PORT, + RP_LUN, + RP_TARGETNAME, + NUM_RP_COMPONENTS +}; + +/** iSCSI initiator IQN setting */ +const struct setting initiator_iqn_setting __setting ( SETTING_SANBOOT_EXTRA, + initiator-iqn ) = { + .name = "initiator-iqn", + .description = "iSCSI initiator name", + .tag = DHCP_ISCSI_INITIATOR_IQN, + .type = &setting_type_string, +}; + +/** iSCSI reverse username setting */ +const struct setting reverse_username_setting __setting ( SETTING_AUTH_EXTRA, + reverse-username ) = { + .name = "reverse-username", + .description = "Reverse user name", + .tag = DHCP_EB_REVERSE_USERNAME, + .type = &setting_type_string, +}; + +/** iSCSI reverse password setting */ +const struct setting reverse_password_setting __setting ( SETTING_AUTH_EXTRA, + reverse-password ) = { + .name = "reverse-password", + .description = "Reverse password", + .tag = DHCP_EB_REVERSE_PASSWORD, + .type = &setting_type_string, +}; + +/** + * Parse iSCSI root path + * + * @v iscsi iSCSI session + * @v root_path iSCSI root path (as per RFC4173) + * @ret rc Return status code + */ +static int iscsi_parse_root_path ( struct iscsi_session *iscsi, + const char *root_path ) { + char rp_copy[ strlen ( root_path ) + 1 ]; + char *rp_comp[NUM_RP_COMPONENTS]; + char *rp = rp_copy; + int i = 0; + int rc; + + /* Split root path into component parts */ + strcpy ( rp_copy, root_path ); + while ( 1 ) { + rp_comp[i++] = rp; + if ( i == NUM_RP_COMPONENTS ) + break; + for ( ; *rp != ':' ; rp++ ) { + if ( ! *rp ) { + DBGC ( iscsi, "iSCSI %p root path \"%s\" " + "too short\n", iscsi, root_path ); + return -EINVAL_ROOT_PATH_TOO_SHORT; + } + } + *(rp++) = '\0'; + } + + /* Use root path components to configure iSCSI session */ + iscsi->target_address = strdup ( rp_comp[RP_SERVERNAME] ); + if ( ! iscsi->target_address ) + return -ENOMEM; + iscsi->target_port = strtoul ( rp_comp[RP_PORT], NULL, 10 ); + if ( ! iscsi->target_port ) + iscsi->target_port = ISCSI_PORT; + if ( ( rc = scsi_parse_lun ( rp_comp[RP_LUN], &iscsi->lun ) ) != 0 ) { + DBGC ( iscsi, "iSCSI %p invalid LUN \"%s\"\n", + iscsi, rp_comp[RP_LUN] ); + return rc; + } + iscsi->target_iqn = strdup ( rp_comp[RP_TARGETNAME] ); + if ( ! iscsi->target_iqn ) + return -ENOMEM; + + return 0; +} + +/** + * Fetch iSCSI settings + * + * @v iscsi iSCSI session + * @ret rc Return status code + */ +static int iscsi_fetch_settings ( struct iscsi_session *iscsi ) { + char *hostname; + union uuid uuid; + int len; + + /* Fetch relevant settings. Don't worry about freeing on + * error, since iscsi_free() will take care of that anyway. + */ + fetch_string_setting_copy ( NULL, &username_setting, + &iscsi->initiator_username ); + fetch_string_setting_copy ( NULL, &password_setting, + &iscsi->initiator_password ); + fetch_string_setting_copy ( NULL, &reverse_username_setting, + &iscsi->target_username ); + fetch_string_setting_copy ( NULL, &reverse_password_setting, + &iscsi->target_password ); + + /* Use explicit initiator IQN if provided */ + fetch_string_setting_copy ( NULL, &initiator_iqn_setting, + &iscsi->initiator_iqn ); + if ( iscsi->initiator_iqn ) + return 0; + + /* Otherwise, try to construct an initiator IQN from the hostname */ + fetch_string_setting_copy ( NULL, &hostname_setting, &hostname ); + if ( hostname ) { + len = asprintf ( &iscsi->initiator_iqn, + ISCSI_DEFAULT_IQN_PREFIX ":%s", hostname ); + free ( hostname ); + if ( len < 0 ) { + DBGC ( iscsi, "iSCSI %p could not allocate initiator " + "IQN\n", iscsi ); + return -ENOMEM; + } + assert ( iscsi->initiator_iqn ); + return 0; + } + + /* Otherwise, try to construct an initiator IQN from the UUID */ + if ( ( len = fetch_uuid_setting ( NULL, &uuid_setting, &uuid ) ) < 0 ) { + DBGC ( iscsi, "iSCSI %p has no suitable initiator IQN\n", + iscsi ); + return -EINVAL_NO_INITIATOR_IQN; + } + if ( ( len = asprintf ( &iscsi->initiator_iqn, + ISCSI_DEFAULT_IQN_PREFIX ":%s", + uuid_ntoa ( &uuid ) ) ) < 0 ) { + DBGC ( iscsi, "iSCSI %p could not allocate initiator IQN\n", + iscsi ); + return -ENOMEM; + } + assert ( iscsi->initiator_iqn ); + + return 0; +} + + +/** + * Check iSCSI authentication details + * + * @v iscsi iSCSI session + * @ret rc Return status code + */ +static int iscsi_check_auth ( struct iscsi_session *iscsi ) { + + /* Check for invalid authentication combinations */ + if ( ( /* Initiator username without password (or vice-versa) */ + ( !! iscsi->initiator_username ) ^ + ( !! iscsi->initiator_password ) ) || + ( /* Target username without password (or vice-versa) */ + ( !! iscsi->target_username ) ^ + ( !! iscsi->target_password ) ) || + ( /* Target (reverse) without initiator (forward) */ + ( iscsi->target_username && + ( ! iscsi->initiator_username ) ) ) ) { + DBGC ( iscsi, "iSCSI %p invalid credentials: initiator " + "%sname,%spw, target %sname,%spw\n", iscsi, + ( iscsi->initiator_username ? "" : "no " ), + ( iscsi->initiator_password ? "" : "no " ), + ( iscsi->target_username ? "" : "no " ), + ( iscsi->target_password ? "" : "no " ) ); + return -EINVAL_BAD_CREDENTIAL_MIX; + } + + return 0; +} + +/** + * Open iSCSI URI + * + * @v parent Parent interface + * @v uri URI + * @ret rc Return status code + */ +static int iscsi_open ( struct interface *parent, struct uri *uri ) { + struct iscsi_session *iscsi; + int rc; + + /* Sanity check */ + if ( ! uri->opaque ) { + rc = -EINVAL_NO_ROOT_PATH; + goto err_sanity_uri; + } + + /* Allocate and initialise structure */ + iscsi = zalloc ( sizeof ( *iscsi ) ); + if ( ! iscsi ) { + rc = -ENOMEM; + goto err_zalloc; + } + ref_init ( &iscsi->refcnt, iscsi_free ); + intf_init ( &iscsi->control, &iscsi_control_desc, &iscsi->refcnt ); + intf_init ( &iscsi->data, &iscsi_data_desc, &iscsi->refcnt ); + intf_init ( &iscsi->socket, &iscsi_socket_desc, &iscsi->refcnt ); + process_init_stopped ( &iscsi->process, &iscsi_process_desc, + &iscsi->refcnt ); + + /* Parse root path */ + if ( ( rc = iscsi_parse_root_path ( iscsi, uri->opaque ) ) != 0 ) + goto err_parse_root_path; + /* Set fields not specified by root path */ + if ( ( rc = iscsi_fetch_settings ( iscsi ) ) != 0 ) + goto err_fetch_settings; + /* Validate authentication */ + if ( ( rc = iscsi_check_auth ( iscsi ) ) != 0 ) + goto err_check_auth; + + /* Sanity checks */ + if ( ! iscsi->target_address ) { + DBGC ( iscsi, "iSCSI %p does not yet support discovery\n", + iscsi ); + rc = -ENOTSUP_DISCOVERY; + goto err_sanity_address; + } + if ( ! iscsi->target_iqn ) { + DBGC ( iscsi, "iSCSI %p no target address supplied in %s\n", + iscsi, uri->opaque ); + rc = -EINVAL_NO_TARGET_IQN; + goto err_sanity_iqn; + } + DBGC ( iscsi, "iSCSI %p initiator %s\n",iscsi, iscsi->initiator_iqn ); + DBGC ( iscsi, "iSCSI %p target %s %s\n", + iscsi, iscsi->target_address, iscsi->target_iqn ); + + /* Open socket */ + if ( ( rc = iscsi_open_connection ( iscsi ) ) != 0 ) + goto err_open_connection; + + /* Attach SCSI device to parent interface */ + if ( ( rc = scsi_open ( parent, &iscsi->control, + &iscsi->lun ) ) != 0 ) { + DBGC ( iscsi, "iSCSI %p could not create SCSI device: %s\n", + iscsi, strerror ( rc ) ); + goto err_scsi_open; + } + + /* Mortalise self, and return */ + ref_put ( &iscsi->refcnt ); + return 0; + + err_scsi_open: + err_open_connection: + err_sanity_iqn: + err_sanity_address: + err_check_auth: + err_fetch_settings: + err_parse_root_path: + iscsi_close ( iscsi, rc ); + ref_put ( &iscsi->refcnt ); + err_zalloc: + err_sanity_uri: + return rc; +} + +/** iSCSI URI opener */ +struct uri_opener iscsi_uri_opener __uri_opener = { + .scheme = "iscsi", + .open = iscsi_open, +}; diff --git a/qemu/roms/ipxe/src/net/tcp/oncrpc.c b/qemu/roms/ipxe/src/net/tcp/oncrpc.c new file mode 100644 index 000000000..6469867e9 --- /dev/null +++ b/qemu/roms/ipxe/src/net/tcp/oncrpc.c @@ -0,0 +1,250 @@ +/* + * Copyright (C) 2013 Marin Hannache <ipxe@mareo.fr>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <errno.h> +#include <byteswap.h> +#include <ipxe/socket.h> +#include <ipxe/tcpip.h> +#include <ipxe/in.h> +#include <ipxe/iobuf.h> +#include <ipxe/dhcp.h> +#include <ipxe/xfer.h> +#include <ipxe/open.h> +#include <ipxe/uri.h> +#include <ipxe/features.h> +#include <ipxe/oncrpc.h> +#include <ipxe/oncrpc_iob.h> +#include <ipxe/init.h> +#include <ipxe/settings.h> +#include <ipxe/version.h> + +/** @file + * + * SUN ONC RPC protocol + * + */ + +/** Set most significant bit to 1. */ +#define SET_LAST_FRAME( x ) ( (x) | 1 << 31 ) +#define GET_FRAME_SIZE( x ) ( (x) & ~( 1 << 31 ) ) + +#define ONCRPC_CALL 0 +#define ONCRPC_REPLY 1 + +/** AUTH NONE authentication flavor */ +struct oncrpc_cred oncrpc_auth_none = { + .flavor = ONCRPC_AUTH_NONE, + .length = 0 +}; + +const struct setting uid_setting __setting ( SETTING_AUTH, uid ) = { + .name = "uid", + .description = "User ID", + .tag = DHCP_EB_UID, + .type = &setting_type_uint32 +}; + +const struct setting gid_setting __setting ( SETTING_AUTH, gid ) = { + .name = "gid", + .description = "Group ID", + .tag = DHCP_EB_GID, + .type = &setting_type_uint32 +}; + +/** + * Initialize an ONC RPC AUTH SYS credential structure + * + * @v auth_sys The structure to initialize + * + * The hostname field is filled with the value of the hostname setting, if the + * hostname setting is empty, PRODUCT_SHORT_NAME (usually "iPXE") is used + * instead. + */ +int oncrpc_init_cred_sys ( struct oncrpc_cred_sys *auth_sys ) { + if ( ! auth_sys ) + return -EINVAL; + + fetch_string_setting_copy ( NULL, &hostname_setting, + &auth_sys->hostname ); + if ( ! auth_sys->hostname ) + if ( ! ( auth_sys->hostname = strdup ( product_short_name ) ) ) + return -ENOMEM; + + auth_sys->uid = fetch_uintz_setting ( NULL, &uid_setting ); + auth_sys->gid = fetch_uintz_setting ( NULL, &uid_setting ); + auth_sys->aux_gid_len = 0; + auth_sys->stamp = 0; + + auth_sys->credential.flavor = ONCRPC_AUTH_SYS; + auth_sys->credential.length = 16 + + oncrpc_strlen ( auth_sys->hostname ); + + return 0; +} + +/** + * Prepare an ONC RPC session structure to be used by the ONC RPC layer + * + * @v session ONC RPC session + * @v credential Credential structure pointer + * @v verifier Verifier structure pointer + * @v prog_name ONC RPC program number + * @v prog_vers ONC RPC program version number + */ +void oncrpc_init_session ( struct oncrpc_session *session, + struct oncrpc_cred *credential, + struct oncrpc_cred *verifier, uint32_t prog_name, + uint32_t prog_vers ) { + if ( ! session ) + return; + + session->rpc_id = rand(); + session->credential = credential; + session->verifier = verifier; + session->prog_name = prog_name; + session->prog_vers = prog_vers; +} + +int oncrpc_call ( struct interface *intf, struct oncrpc_session *session, + uint32_t proc_name, const struct oncrpc_field fields[] ) { + int rc; + size_t frame_size; + struct io_buffer *io_buf; + + if ( ! session ) + return -EINVAL; + + struct oncrpc_field header[] = { + ONCRPC_FIELD ( int32, 0 ), + ONCRPC_FIELD ( int32, ++session->rpc_id ), + ONCRPC_FIELD ( int32, ONCRPC_CALL ), + ONCRPC_FIELD ( int32, ONCRPC_VERS ), + ONCRPC_FIELD ( int32, session->prog_name ), + ONCRPC_FIELD ( int32, session->prog_vers ), + ONCRPC_FIELD ( int32, proc_name ), + ONCRPC_FIELD ( cred, session->credential ), + ONCRPC_FIELD ( cred, session->verifier ), + ONCRPC_FIELD_END, + }; + + frame_size = oncrpc_compute_size ( header ); + frame_size += oncrpc_compute_size ( fields ); + + io_buf = alloc_iob ( frame_size ); + if ( ! io_buf ) + return -ENOBUFS; + + header[0].value.int32 = SET_LAST_FRAME ( frame_size - + sizeof ( uint32_t ) ); + + oncrpc_iob_add_fields ( io_buf, header ); + oncrpc_iob_add_fields ( io_buf, fields ); + + rc = xfer_deliver_iob ( intf, io_buf ); + if ( rc != 0 ) + free_iob ( io_buf ); + + return rc; +} + +size_t oncrpc_compute_size ( const struct oncrpc_field fields[] ) { + + size_t i; + size_t size = 0; + + for ( i = 0; fields[i].type != oncrpc_none; i++ ) { + switch ( fields[i].type ) { + case oncrpc_int32: + size += sizeof ( uint32_t ); + break; + + case oncrpc_int64: + size += sizeof ( uint64_t ); + break; + + case oncrpc_str: + size += oncrpc_strlen ( fields[i].value.str ); + break; + + case oncrpc_array: + size += oncrpc_align ( fields[i].value.array.length ); + size += sizeof ( uint32_t ); + break; + + case oncrpc_intarray: + size += sizeof ( uint32_t ) * + fields[i].value.intarray.length; + size += sizeof ( uint32_t ); + break; + + case oncrpc_cred: + size += fields[i].value.cred->length; + size += 2 * sizeof ( uint32_t ); + break; + + default: + return size; + } + } + + return size; +} + +/** + * Parse an I/O buffer to extract a ONC RPC REPLY + * @v session ONC RPC session + * @v reply Reply structure where data will be saved + * @v io_buf I/O buffer + */ +int oncrpc_get_reply ( struct oncrpc_session *session __unused, + struct oncrpc_reply *reply, struct io_buffer *io_buf ) { + if ( ! reply || ! io_buf ) + return -EINVAL; + + reply->frame_size = GET_FRAME_SIZE ( oncrpc_iob_get_int ( io_buf ) ); + reply->rpc_id = oncrpc_iob_get_int ( io_buf ); + + /* iPXE has no support for handling ONC RPC call */ + if ( oncrpc_iob_get_int ( io_buf ) != ONCRPC_REPLY ) + return -ENOSYS; + + reply->reply_state = oncrpc_iob_get_int ( io_buf ); + + if ( reply->reply_state == 0 ) + { + /* verifier.flavor */ + oncrpc_iob_get_int ( io_buf ); + /* verifier.length */ + iob_pull ( io_buf, oncrpc_iob_get_int ( io_buf )); + + /* We don't use the verifier in iPXE, let it be an empty + verifier. */ + reply->verifier = &oncrpc_auth_none; + } + + reply->accept_state = oncrpc_iob_get_int ( io_buf ); + reply->data = io_buf; + + return 0; +} diff --git a/qemu/roms/ipxe/src/net/tcp/syslogs.c b/qemu/roms/ipxe/src/net/tcp/syslogs.c new file mode 100644 index 000000000..095afc543 --- /dev/null +++ b/qemu/roms/ipxe/src/net/tcp/syslogs.c @@ -0,0 +1,269 @@ +/* + * Copyright (C) 2012 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +/** @file + * + * Encrypted syslog protocol + * + */ + +#include <stdint.h> +#include <stdlib.h> +#include <byteswap.h> +#include <ipxe/xfer.h> +#include <ipxe/open.h> +#include <ipxe/tcpip.h> +#include <ipxe/dhcp.h> +#include <ipxe/settings.h> +#include <ipxe/console.h> +#include <ipxe/lineconsole.h> +#include <ipxe/tls.h> +#include <ipxe/syslog.h> +#include <config/console.h> + +/* Set default console usage if applicable */ +#if ! ( defined ( CONSOLE_SYSLOGS ) && CONSOLE_EXPLICIT ( CONSOLE_SYSLOGS ) ) +#undef CONSOLE_SYSLOGS +#define CONSOLE_SYSLOGS ( CONSOLE_USAGE_ALL & ~CONSOLE_USAGE_TUI ) +#endif + +struct console_driver syslogs_console __console_driver; + +/** The encrypted syslog server */ +static struct sockaddr_tcpip logserver = { + .st_port = htons ( SYSLOG_PORT ), +}; + +/** + * Handle encrypted syslog TLS interface close + * + * @v intf Interface + * @v rc Reason for close + */ +static void syslogs_close ( struct interface *intf __unused, int rc ) { + + DBG ( "SYSLOGS console disconnected: %s\n", strerror ( rc ) ); +} + +/** + * Handle encrypted syslog TLS interface window change + * + * @v intf Interface + */ +static void syslogs_window_changed ( struct interface *intf ) { + + /* Mark console as enabled when window first opens, indicating + * that TLS negotiation is complete. (Do not disable console + * when window closes again, since TCP will close the window + * whenever there is unACKed data.) + */ + if ( xfer_window ( intf ) ) { + if ( syslogs_console.disabled ) + DBG ( "SYSLOGS console connected\n" ); + syslogs_console.disabled = 0; + } +} + +/** Encrypted syslog TLS interface operations */ +static struct interface_operation syslogs_operations[] = { + INTF_OP ( xfer_window_changed, struct interface *, + syslogs_window_changed ), + INTF_OP ( intf_close, struct interface *, syslogs_close ), +}; + +/** Encrypted syslog TLS interface descriptor */ +static struct interface_descriptor syslogs_desc = + INTF_DESC_PURE ( syslogs_operations ); + +/** The encrypted syslog TLS interface */ +static struct interface syslogs = INTF_INIT ( syslogs_desc ); + +/****************************************************************************** + * + * Console driver + * + ****************************************************************************** + */ + +/** Encrypted syslog line buffer */ +static char syslogs_buffer[SYSLOG_BUFSIZE]; + +/** Encrypted syslog severity */ +static unsigned int syslogs_severity = SYSLOG_DEFAULT_SEVERITY; + +/** + * Handle ANSI set encrypted syslog priority (private sequence) + * + * @v ctx ANSI escape sequence context + * @v count Parameter count + * @v params List of graphic rendition aspects + */ +static void syslogs_handle_priority ( struct ansiesc_context *ctx __unused, + unsigned int count __unused, + int params[] ) { + if ( params[0] >= 0 ) { + syslogs_severity = params[0]; + } else { + syslogs_severity = SYSLOG_DEFAULT_SEVERITY; + } +} + +/** Encrypted syslog ANSI escape sequence handlers */ +static struct ansiesc_handler syslogs_handlers[] = { + { ANSIESC_LOG_PRIORITY, syslogs_handle_priority }, + { 0, NULL } +}; + +/** Encrypted syslog line console */ +static struct line_console syslogs_line = { + .buffer = syslogs_buffer, + .len = sizeof ( syslogs_buffer ), + .ctx = { + .handlers = syslogs_handlers, + }, +}; + +/** Encrypted syslog recursion marker */ +static int syslogs_entered; + +/** + * Print a character to encrypted syslog console + * + * @v character Character to be printed + */ +static void syslogs_putchar ( int character ) { + int rc; + + /* Ignore if we are already mid-logging */ + if ( syslogs_entered ) + return; + + /* Fill line buffer */ + if ( line_putchar ( &syslogs_line, character ) == 0 ) + return; + + /* Guard against re-entry */ + syslogs_entered = 1; + + /* Send log message */ + if ( ( rc = syslog_send ( &syslogs, syslogs_severity, + syslogs_buffer, "\n" ) ) != 0 ) { + DBG ( "SYSLOGS could not send log message: %s\n", + strerror ( rc ) ); + } + + /* Clear re-entry flag */ + syslogs_entered = 0; +} + +/** Encrypted syslog console driver */ +struct console_driver syslogs_console __console_driver = { + .putchar = syslogs_putchar, + .disabled = CONSOLE_DISABLED, + .usage = CONSOLE_SYSLOGS, +}; + +/****************************************************************************** + * + * Settings + * + ****************************************************************************** + */ + +/** Encrypted syslog server setting */ +const struct setting syslogs_setting __setting ( SETTING_MISC, syslogs ) = { + .name = "syslogs", + .description = "Encrypted syslog server", + .tag = DHCP_EB_SYSLOGS_SERVER, + .type = &setting_type_string, +}; + +/** + * Apply encrypted syslog settings + * + * @ret rc Return status code + */ +static int apply_syslogs_settings ( void ) { + static char *old_server; + char *server; + struct interface *socket; + int rc; + + /* Fetch log server */ + fetch_string_setting_copy ( NULL, &syslogs_setting, &server ); + + /* Do nothing unless log server has changed */ + if ( ( ( server == NULL ) && ( old_server == NULL ) ) || + ( ( server != NULL ) && ( old_server != NULL ) && + ( strcmp ( server, old_server ) == 0 ) ) ) { + rc = 0; + goto out_no_change; + } + free ( old_server ); + old_server = NULL; + + /* Reset encrypted syslog connection */ + syslogs_console.disabled = CONSOLE_DISABLED; + intf_restart ( &syslogs, 0 ); + + /* Do nothing unless we have a log server */ + if ( ! server ) { + DBG ( "SYSLOGS has no log server\n" ); + rc = 0; + goto out_no_server; + } + + /* Add TLS filter */ + if ( ( rc = add_tls ( &syslogs, server, &socket ) ) != 0 ) { + DBG ( "SYSLOGS cannot create TLS filter: %s\n", + strerror ( rc ) ); + goto err_add_tls; + } + + /* Connect to log server */ + if ( ( rc = xfer_open_named_socket ( socket, SOCK_STREAM, + (( struct sockaddr *) &logserver ), + server, NULL ) ) != 0 ) { + DBG ( "SYSLOGS cannot connect to log server: %s\n", + strerror ( rc ) ); + goto err_open_named_socket; + } + DBG ( "SYSLOGS using log server %s\n", server ); + + /* Record log server */ + old_server = server; + server = NULL; + + /* Success */ + rc = 0; + + err_open_named_socket: + err_add_tls: + out_no_server: + out_no_change: + free ( server ); + return rc; +} + +/** Encrypted syslog settings applicator */ +struct settings_applicator syslogs_applicator __settings_applicator = { + .apply = apply_syslogs_settings, +}; diff --git a/qemu/roms/ipxe/src/net/tcpip.c b/qemu/roms/ipxe/src/net/tcpip.c new file mode 100644 index 000000000..4bcbe64bb --- /dev/null +++ b/qemu/roms/ipxe/src/net/tcpip.c @@ -0,0 +1,250 @@ +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <byteswap.h> +#include <ipxe/iobuf.h> +#include <ipxe/tables.h> +#include <ipxe/ipstat.h> +#include <ipxe/netdevice.h> +#include <ipxe/tcpip.h> + +/** @file + * + * Transport-network layer interface + * + * This file contains functions and utilities for the + * TCP/IP transport-network layer interface + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +/** + * Process a received TCP/IP packet + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v tcpip_proto Transport-layer protocol number + * @v st_src Partially-filled source address + * @v st_dest Partially-filled destination address + * @v pshdr_csum Pseudo-header checksum + * @v stats IP statistics + * @ret rc Return status code + * + * This function expects a transport-layer segment from the network + * layer. The network layer should fill in as much as it can of the + * source and destination addresses (i.e. it should fill in the + * address family and the network-layer addresses, but leave the ports + * and the rest of the structures as zero). + */ +int tcpip_rx ( struct io_buffer *iobuf, struct net_device *netdev, + uint8_t tcpip_proto, struct sockaddr_tcpip *st_src, + struct sockaddr_tcpip *st_dest, uint16_t pshdr_csum, + struct ip_statistics *stats ) { + struct tcpip_protocol *tcpip; + + /* Hand off packet to the appropriate transport-layer protocol */ + for_each_table_entry ( tcpip, TCPIP_PROTOCOLS ) { + if ( tcpip->tcpip_proto == tcpip_proto ) { + DBG ( "TCP/IP received %s packet\n", tcpip->name ); + stats->in_delivers++; + return tcpip->rx ( iobuf, netdev, st_src, st_dest, + pshdr_csum ); + } + } + + DBG ( "Unrecognised TCP/IP protocol %d\n", tcpip_proto ); + stats->in_unknown_protos++; + free_iob ( iobuf ); + return -EPROTONOSUPPORT; +} + +/** + * Find TCP/IP network-layer protocol + * + * @v st_dest Destination address + * @ret tcpip_net TCP/IP network-layer protocol, or NULL if not found + */ +static struct tcpip_net_protocol * +tcpip_net_protocol ( struct sockaddr_tcpip *st_dest ) { + struct tcpip_net_protocol *tcpip_net; + + for_each_table_entry ( tcpip_net, TCPIP_NET_PROTOCOLS ) { + if ( tcpip_net->sa_family == st_dest->st_family ) + return tcpip_net; + } + + DBG ( "Unrecognised TCP/IP address family %d\n", st_dest->st_family ); + return NULL; +} + +/** + * Transmit a TCP/IP packet + * + * @v iobuf I/O buffer + * @v tcpip_protocol Transport-layer protocol + * @v st_src Source address, or NULL to use route default + * @v st_dest Destination address + * @v netdev Network device to use if no route found, or NULL + * @v trans_csum Transport-layer checksum to complete, or NULL + * @ret rc Return status code + */ +int tcpip_tx ( struct io_buffer *iobuf, struct tcpip_protocol *tcpip_protocol, + struct sockaddr_tcpip *st_src, struct sockaddr_tcpip *st_dest, + struct net_device *netdev, uint16_t *trans_csum ) { + struct tcpip_net_protocol *tcpip_net; + + /* Hand off packet to the appropriate network-layer protocol */ + tcpip_net = tcpip_net_protocol ( st_dest ); + if ( tcpip_net ) { + DBG ( "TCP/IP sending %s packet\n", tcpip_net->name ); + return tcpip_net->tx ( iobuf, tcpip_protocol, st_src, st_dest, + netdev, trans_csum ); + } + + free_iob ( iobuf ); + return -EAFNOSUPPORT; +} + +/** + * Determine transmitting network device + * + * @v st_dest Destination address + * @ret netdev Network device, or NULL + */ +struct net_device * tcpip_netdev ( struct sockaddr_tcpip *st_dest ) { + struct tcpip_net_protocol *tcpip_net; + + /* Hand off to the appropriate network-layer protocol */ + tcpip_net = tcpip_net_protocol ( st_dest ); + if ( tcpip_net ) + return tcpip_net->netdev ( st_dest ); + + return NULL; +} + +/** + * Determine maximum transmission unit + * + * @v st_dest Destination address + * @ret mtu Maximum transmission unit + */ +size_t tcpip_mtu ( struct sockaddr_tcpip *st_dest ) { + struct tcpip_net_protocol *tcpip_net; + struct net_device *netdev; + size_t mtu; + + /* Find appropriate network-layer protocol */ + tcpip_net = tcpip_net_protocol ( st_dest ); + if ( ! tcpip_net ) + return 0; + + /* Find transmitting network device */ + netdev = tcpip_net->netdev ( st_dest ); + if ( ! netdev ) + return 0; + + /* Calculate MTU */ + mtu = ( netdev->max_pkt_len - netdev->ll_protocol->ll_header_len - + tcpip_net->header_len ); + + return mtu; +} + +/** + * Calculate continued TCP/IP checkum + * + * @v partial Checksum of already-summed data, in network byte order + * @v data Data buffer + * @v len Length of data buffer + * @ret cksum Updated checksum, in network byte order + * + * Calculates a TCP/IP-style 16-bit checksum over the data block. The + * checksum is returned in network byte order. + * + * This function may be used to add new data to an existing checksum. + * The function assumes that both the old data and the new data start + * on even byte offsets; if this is not the case then you will need to + * byte-swap either the input partial checksum, the output checksum, + * or both. Deciding which to swap is left as an exercise for the + * interested reader. + */ +uint16_t generic_tcpip_continue_chksum ( uint16_t partial, + const void *data, size_t len ) { + unsigned int cksum = ( ( ~partial ) & 0xffff ); + unsigned int value; + unsigned int i; + + for ( i = 0 ; i < len ; i++ ) { + value = * ( ( uint8_t * ) data + i ); + if ( i & 1 ) { + /* Odd bytes: swap on little-endian systems */ + value = be16_to_cpu ( value ); + } else { + /* Even bytes: swap on big-endian systems */ + value = le16_to_cpu ( value ); + } + cksum += value; + if ( cksum > 0xffff ) + cksum -= 0xffff; + } + + return ( ~cksum ); +} + +/** + * Calculate TCP/IP checkum + * + * @v data Data buffer + * @v len Length of data buffer + * @ret cksum Checksum, in network byte order + * + * Calculates a TCP/IP-style 16-bit checksum over the data block. The + * checksum is returned in network byte order. + */ +uint16_t tcpip_chksum ( const void *data, size_t len ) { + return tcpip_continue_chksum ( TCPIP_EMPTY_CSUM, data, len ); +} + +/** + * Bind to local TCP/IP port + * + * @v st_local Local TCP/IP socket address, or NULL + * @v available Function to check port availability + * @ret port Local port number, or negative error + */ +int tcpip_bind ( struct sockaddr_tcpip *st_local, + int ( * available ) ( int port ) ) { + uint16_t flags = 0; + uint16_t try_port = 0; + uint16_t min_port; + uint16_t max_port; + unsigned int offset; + unsigned int i; + + /* Extract parameters from local socket address */ + if ( st_local ) { + flags = st_local->st_flags; + try_port = ntohs ( st_local->st_port ); + } + + /* If an explicit port is specified, check its availability */ + if ( try_port ) + return available ( try_port ); + + /* Otherwise, find an available port in the range [1,1023] or + * [1025,65535] as appropriate. + */ + min_port = ( ( ( ! flags ) & TCPIP_BIND_PRIVILEGED ) + 1 ); + max_port = ( ( flags & TCPIP_BIND_PRIVILEGED ) - 1 ); + offset = random(); + for ( i = 0 ; i <= max_port ; i++ ) { + try_port = ( ( i + offset ) & max_port ); + if ( try_port < min_port ) + continue; + if ( available ( try_port ) < 0 ) + continue; + return try_port; + } + return -EADDRINUSE; +} diff --git a/qemu/roms/ipxe/src/net/tls.c b/qemu/roms/ipxe/src/net/tls.c new file mode 100644 index 000000000..30ccc932e --- /dev/null +++ b/qemu/roms/ipxe/src/net/tls.c @@ -0,0 +1,2639 @@ +/* + * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +/** + * @file + * + * Transport Layer Security Protocol + */ + +#include <stdint.h> +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> +#include <time.h> +#include <errno.h> +#include <byteswap.h> +#include <ipxe/pending.h> +#include <ipxe/hmac.h> +#include <ipxe/md5.h> +#include <ipxe/sha1.h> +#include <ipxe/sha256.h> +#include <ipxe/aes.h> +#include <ipxe/rsa.h> +#include <ipxe/iobuf.h> +#include <ipxe/xfer.h> +#include <ipxe/open.h> +#include <ipxe/x509.h> +#include <ipxe/privkey.h> +#include <ipxe/certstore.h> +#include <ipxe/rbg.h> +#include <ipxe/validator.h> +#include <ipxe/tls.h> + +/* Disambiguate the various error causes */ +#define EINVAL_CHANGE_CIPHER __einfo_error ( EINFO_EINVAL_CHANGE_CIPHER ) +#define EINFO_EINVAL_CHANGE_CIPHER \ + __einfo_uniqify ( EINFO_EINVAL, 0x01, \ + "Invalid Change Cipher record" ) +#define EINVAL_ALERT __einfo_error ( EINFO_EINVAL_ALERT ) +#define EINFO_EINVAL_ALERT \ + __einfo_uniqify ( EINFO_EINVAL, 0x02, \ + "Invalid Alert record" ) +#define EINVAL_HELLO __einfo_error ( EINFO_EINVAL_HELLO ) +#define EINFO_EINVAL_HELLO \ + __einfo_uniqify ( EINFO_EINVAL, 0x03, \ + "Invalid Server Hello record" ) +#define EINVAL_CERTIFICATE __einfo_error ( EINFO_EINVAL_CERTIFICATE ) +#define EINFO_EINVAL_CERTIFICATE \ + __einfo_uniqify ( EINFO_EINVAL, 0x04, \ + "Invalid Certificate" ) +#define EINVAL_CERTIFICATES __einfo_error ( EINFO_EINVAL_CERTIFICATES ) +#define EINFO_EINVAL_CERTIFICATES \ + __einfo_uniqify ( EINFO_EINVAL, 0x05, \ + "Invalid Server Certificate record" ) +#define EINVAL_HELLO_DONE __einfo_error ( EINFO_EINVAL_HELLO_DONE ) +#define EINFO_EINVAL_HELLO_DONE \ + __einfo_uniqify ( EINFO_EINVAL, 0x06, \ + "Invalid Server Hello Done record" ) +#define EINVAL_FINISHED __einfo_error ( EINFO_EINVAL_FINISHED ) +#define EINFO_EINVAL_FINISHED \ + __einfo_uniqify ( EINFO_EINVAL, 0x07, \ + "Invalid Server Finished record" ) +#define EINVAL_HANDSHAKE __einfo_error ( EINFO_EINVAL_HANDSHAKE ) +#define EINFO_EINVAL_HANDSHAKE \ + __einfo_uniqify ( EINFO_EINVAL, 0x08, \ + "Invalid Handshake record" ) +#define EINVAL_STREAM __einfo_error ( EINFO_EINVAL_STREAM ) +#define EINFO_EINVAL_STREAM \ + __einfo_uniqify ( EINFO_EINVAL, 0x09, \ + "Invalid stream-ciphered record" ) +#define EINVAL_BLOCK __einfo_error ( EINFO_EINVAL_BLOCK ) +#define EINFO_EINVAL_BLOCK \ + __einfo_uniqify ( EINFO_EINVAL, 0x0a, \ + "Invalid block-ciphered record" ) +#define EINVAL_PADDING __einfo_error ( EINFO_EINVAL_PADDING ) +#define EINFO_EINVAL_PADDING \ + __einfo_uniqify ( EINFO_EINVAL, 0x0b, \ + "Invalid block padding" ) +#define EINVAL_RX_STATE __einfo_error ( EINFO_EINVAL_RX_STATE ) +#define EINFO_EINVAL_RX_STATE \ + __einfo_uniqify ( EINFO_EINVAL, 0x0c, \ + "Invalid receive state" ) +#define EINVAL_MAC __einfo_error ( EINFO_EINVAL_MAC ) +#define EINFO_EINVAL_MAC \ + __einfo_uniqify ( EINFO_EINVAL, 0x0d, \ + "Invalid MAC" ) +#define EIO_ALERT __einfo_error ( EINFO_EIO_ALERT ) +#define EINFO_EIO_ALERT \ + __einfo_uniqify ( EINFO_EINVAL, 0x01, \ + "Unknown alert level" ) +#define ENOMEM_CONTEXT __einfo_error ( EINFO_ENOMEM_CONTEXT ) +#define EINFO_ENOMEM_CONTEXT \ + __einfo_uniqify ( EINFO_ENOMEM, 0x01, \ + "Not enough space for crypto context" ) +#define ENOMEM_CERTIFICATE __einfo_error ( EINFO_ENOMEM_CERTIFICATE ) +#define EINFO_ENOMEM_CERTIFICATE \ + __einfo_uniqify ( EINFO_ENOMEM, 0x02, \ + "Not enough space for certificate" ) +#define ENOMEM_CHAIN __einfo_error ( EINFO_ENOMEM_CHAIN ) +#define EINFO_ENOMEM_CHAIN \ + __einfo_uniqify ( EINFO_ENOMEM, 0x03, \ + "Not enough space for certificate chain" ) +#define ENOMEM_TX_PLAINTEXT __einfo_error ( EINFO_ENOMEM_TX_PLAINTEXT ) +#define EINFO_ENOMEM_TX_PLAINTEXT \ + __einfo_uniqify ( EINFO_ENOMEM, 0x04, \ + "Not enough space for transmitted plaintext" ) +#define ENOMEM_TX_CIPHERTEXT __einfo_error ( EINFO_ENOMEM_TX_CIPHERTEXT ) +#define EINFO_ENOMEM_TX_CIPHERTEXT \ + __einfo_uniqify ( EINFO_ENOMEM, 0x05, \ + "Not enough space for transmitted ciphertext" ) +#define ENOMEM_RX_DATA __einfo_error ( EINFO_ENOMEM_RX_DATA ) +#define EINFO_ENOMEM_RX_DATA \ + __einfo_uniqify ( EINFO_ENOMEM, 0x07, \ + "Not enough space for received data" ) +#define ENOMEM_RX_CONCAT __einfo_error ( EINFO_ENOMEM_RX_CONCAT ) +#define EINFO_ENOMEM_RX_CONCAT \ + __einfo_uniqify ( EINFO_ENOMEM, 0x08, \ + "Not enough space to concatenate received data" ) +#define ENOTSUP_CIPHER __einfo_error ( EINFO_ENOTSUP_CIPHER ) +#define EINFO_ENOTSUP_CIPHER \ + __einfo_uniqify ( EINFO_ENOTSUP, 0x01, \ + "Unsupported cipher" ) +#define ENOTSUP_NULL __einfo_error ( EINFO_ENOTSUP_NULL ) +#define EINFO_ENOTSUP_NULL \ + __einfo_uniqify ( EINFO_ENOTSUP, 0x02, \ + "Refusing to use null cipher" ) +#define ENOTSUP_SIG_HASH __einfo_error ( EINFO_ENOTSUP_SIG_HASH ) +#define EINFO_ENOTSUP_SIG_HASH \ + __einfo_uniqify ( EINFO_ENOTSUP, 0x03, \ + "Unsupported signature and hash algorithm" ) +#define ENOTSUP_VERSION __einfo_error ( EINFO_ENOTSUP_VERSION ) +#define EINFO_ENOTSUP_VERSION \ + __einfo_uniqify ( EINFO_ENOTSUP, 0x04, \ + "Unsupported protocol version" ) +#define EPERM_ALERT __einfo_error ( EINFO_EPERM_ALERT ) +#define EINFO_EPERM_ALERT \ + __einfo_uniqify ( EINFO_EPERM, 0x01, \ + "Received fatal alert" ) +#define EPERM_VERIFY __einfo_error ( EINFO_EPERM_VERIFY ) +#define EINFO_EPERM_VERIFY \ + __einfo_uniqify ( EINFO_EPERM, 0x02, \ + "Handshake verification failed" ) +#define EPERM_CLIENT_CERT __einfo_error ( EINFO_EPERM_CLIENT_CERT ) +#define EINFO_EPERM_CLIENT_CERT \ + __einfo_uniqify ( EINFO_EPERM, 0x03, \ + "No suitable client certificate available" ) +#define EPROTO_VERSION __einfo_error ( EINFO_EPROTO_VERSION ) +#define EINFO_EPROTO_VERSION \ + __einfo_uniqify ( EINFO_EPROTO, 0x01, \ + "Illegal protocol version upgrade" ) + +static int tls_send_plaintext ( struct tls_session *tls, unsigned int type, + const void *data, size_t len ); +static void tls_clear_cipher ( struct tls_session *tls, + struct tls_cipherspec *cipherspec ); + +/****************************************************************************** + * + * Utility functions + * + ****************************************************************************** + */ + +/** + * Extract 24-bit field value + * + * @v field24 24-bit field + * @ret value Field value + * + * TLS uses 24-bit integers in several places, which are awkward to + * parse in C. + */ +static inline __attribute__ (( always_inline )) unsigned long +tls_uint24 ( const uint8_t field24[3] ) { + const uint32_t *field32 __attribute__ (( may_alias )) = + ( ( const void * ) field24 ); + return ( be32_to_cpu ( *field32 ) >> 8 ); +} + +/** + * Set 24-bit field value + * + * @v field24 24-bit field + * @v value Field value + * + * The field must be pre-zeroed. + */ +static void tls_set_uint24 ( uint8_t field24[3], unsigned long value ) { + uint32_t *field32 __attribute__ (( may_alias )) = + ( ( void * ) field24 ); + *field32 |= cpu_to_be32 ( value << 8 ); +} + +/** + * Determine if TLS session is ready for application data + * + * @v tls TLS session + * @ret is_ready TLS session is ready + */ +static int tls_ready ( struct tls_session *tls ) { + return ( ( ! is_pending ( &tls->client_negotiation ) ) && + ( ! is_pending ( &tls->server_negotiation ) ) ); +} + +/****************************************************************************** + * + * Hybrid MD5+SHA1 hash as used by TLSv1.1 and earlier + * + ****************************************************************************** + */ + +/** + * Initialise MD5+SHA1 algorithm + * + * @v ctx MD5+SHA1 context + */ +static void md5_sha1_init ( void *ctx ) { + struct md5_sha1_context *context = ctx; + + digest_init ( &md5_algorithm, context->md5 ); + digest_init ( &sha1_algorithm, context->sha1 ); +} + +/** + * Accumulate data with MD5+SHA1 algorithm + * + * @v ctx MD5+SHA1 context + * @v data Data + * @v len Length of data + */ +static void md5_sha1_update ( void *ctx, const void *data, size_t len ) { + struct md5_sha1_context *context = ctx; + + digest_update ( &md5_algorithm, context->md5, data, len ); + digest_update ( &sha1_algorithm, context->sha1, data, len ); +} + +/** + * Generate MD5+SHA1 digest + * + * @v ctx MD5+SHA1 context + * @v out Output buffer + */ +static void md5_sha1_final ( void *ctx, void *out ) { + struct md5_sha1_context *context = ctx; + struct md5_sha1_digest *digest = out; + + digest_final ( &md5_algorithm, context->md5, digest->md5 ); + digest_final ( &sha1_algorithm, context->sha1, digest->sha1 ); +} + +/** Hybrid MD5+SHA1 digest algorithm */ +static struct digest_algorithm md5_sha1_algorithm = { + .name = "md5+sha1", + .ctxsize = sizeof ( struct md5_sha1_context ), + .blocksize = 0, /* Not applicable */ + .digestsize = sizeof ( struct md5_sha1_digest ), + .init = md5_sha1_init, + .update = md5_sha1_update, + .final = md5_sha1_final, +}; + +/** RSA digestInfo prefix for MD5+SHA1 algorithm */ +struct rsa_digestinfo_prefix rsa_md5_sha1_prefix __rsa_digestinfo_prefix = { + .digest = &md5_sha1_algorithm, + .data = NULL, /* MD5+SHA1 signatures have no digestInfo */ + .len = 0, +}; + +/****************************************************************************** + * + * Cleanup functions + * + ****************************************************************************** + */ + +/** + * Free TLS session + * + * @v refcnt Reference counter + */ +static void free_tls ( struct refcnt *refcnt ) { + struct tls_session *tls = + container_of ( refcnt, struct tls_session, refcnt ); + struct io_buffer *iobuf; + struct io_buffer *tmp; + + /* Free dynamically-allocated resources */ + tls_clear_cipher ( tls, &tls->tx_cipherspec ); + tls_clear_cipher ( tls, &tls->tx_cipherspec_pending ); + tls_clear_cipher ( tls, &tls->rx_cipherspec ); + tls_clear_cipher ( tls, &tls->rx_cipherspec_pending ); + list_for_each_entry_safe ( iobuf, tmp, &tls->rx_data, list ) { + list_del ( &iobuf->list ); + free_iob ( iobuf ); + } + x509_put ( tls->cert ); + x509_chain_put ( tls->chain ); + + /* Free TLS structure itself */ + free ( tls ); +} + +/** + * Finish with TLS session + * + * @v tls TLS session + * @v rc Status code + */ +static void tls_close ( struct tls_session *tls, int rc ) { + + /* Remove pending operations, if applicable */ + pending_put ( &tls->client_negotiation ); + pending_put ( &tls->server_negotiation ); + + /* Remove process */ + process_del ( &tls->process ); + + /* Close all interfaces */ + intf_shutdown ( &tls->cipherstream, rc ); + intf_shutdown ( &tls->plainstream, rc ); + intf_shutdown ( &tls->validator, rc ); +} + +/****************************************************************************** + * + * Random number generation + * + ****************************************************************************** + */ + +/** + * Generate random data + * + * @v tls TLS session + * @v data Buffer to fill + * @v len Length of buffer + * @ret rc Return status code + */ +static int tls_generate_random ( struct tls_session *tls, + void *data, size_t len ) { + int rc; + + /* Generate random bits with no additional input and without + * prediction resistance + */ + if ( ( rc = rbg_generate ( NULL, 0, 0, data, len ) ) != 0 ) { + DBGC ( tls, "TLS %p could not generate random data: %s\n", + tls, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Update HMAC with a list of ( data, len ) pairs + * + * @v digest Hash function to use + * @v digest_ctx Digest context + * @v args ( data, len ) pairs of data, terminated by NULL + */ +static void tls_hmac_update_va ( struct digest_algorithm *digest, + void *digest_ctx, va_list args ) { + void *data; + size_t len; + + while ( ( data = va_arg ( args, void * ) ) ) { + len = va_arg ( args, size_t ); + hmac_update ( digest, digest_ctx, data, len ); + } +} + +/** + * Generate secure pseudo-random data using a single hash function + * + * @v tls TLS session + * @v digest Hash function to use + * @v secret Secret + * @v secret_len Length of secret + * @v out Output buffer + * @v out_len Length of output buffer + * @v seeds ( data, len ) pairs of seed data, terminated by NULL + */ +static void tls_p_hash_va ( struct tls_session *tls, + struct digest_algorithm *digest, + void *secret, size_t secret_len, + void *out, size_t out_len, + va_list seeds ) { + uint8_t secret_copy[secret_len]; + uint8_t digest_ctx[digest->ctxsize]; + uint8_t digest_ctx_partial[digest->ctxsize]; + uint8_t a[digest->digestsize]; + uint8_t out_tmp[digest->digestsize]; + size_t frag_len = digest->digestsize; + va_list tmp; + + /* Copy the secret, in case HMAC modifies it */ + memcpy ( secret_copy, secret, secret_len ); + secret = secret_copy; + DBGC2 ( tls, "TLS %p %s secret:\n", tls, digest->name ); + DBGC2_HD ( tls, secret, secret_len ); + + /* Calculate A(1) */ + hmac_init ( digest, digest_ctx, secret, &secret_len ); + va_copy ( tmp, seeds ); + tls_hmac_update_va ( digest, digest_ctx, tmp ); + va_end ( tmp ); + hmac_final ( digest, digest_ctx, secret, &secret_len, a ); + DBGC2 ( tls, "TLS %p %s A(1):\n", tls, digest->name ); + DBGC2_HD ( tls, &a, sizeof ( a ) ); + + /* Generate as much data as required */ + while ( out_len ) { + /* Calculate output portion */ + hmac_init ( digest, digest_ctx, secret, &secret_len ); + hmac_update ( digest, digest_ctx, a, sizeof ( a ) ); + memcpy ( digest_ctx_partial, digest_ctx, digest->ctxsize ); + va_copy ( tmp, seeds ); + tls_hmac_update_va ( digest, digest_ctx, tmp ); + va_end ( tmp ); + hmac_final ( digest, digest_ctx, + secret, &secret_len, out_tmp ); + + /* Copy output */ + if ( frag_len > out_len ) + frag_len = out_len; + memcpy ( out, out_tmp, frag_len ); + DBGC2 ( tls, "TLS %p %s output:\n", tls, digest->name ); + DBGC2_HD ( tls, out, frag_len ); + + /* Calculate A(i) */ + hmac_final ( digest, digest_ctx_partial, + secret, &secret_len, a ); + DBGC2 ( tls, "TLS %p %s A(n):\n", tls, digest->name ); + DBGC2_HD ( tls, &a, sizeof ( a ) ); + + out += frag_len; + out_len -= frag_len; + } +} + +/** + * Generate secure pseudo-random data + * + * @v tls TLS session + * @v secret Secret + * @v secret_len Length of secret + * @v out Output buffer + * @v out_len Length of output buffer + * @v ... ( data, len ) pairs of seed data, terminated by NULL + */ +static void tls_prf ( struct tls_session *tls, void *secret, size_t secret_len, + void *out, size_t out_len, ... ) { + va_list seeds; + va_list tmp; + size_t subsecret_len; + void *md5_secret; + void *sha1_secret; + uint8_t buf[out_len]; + unsigned int i; + + va_start ( seeds, out_len ); + + if ( tls->version >= TLS_VERSION_TLS_1_2 ) { + /* Use P_SHA256 for TLSv1.2 and later */ + tls_p_hash_va ( tls, &sha256_algorithm, secret, secret_len, + out, out_len, seeds ); + } else { + /* Use combination of P_MD5 and P_SHA-1 for TLSv1.1 + * and earlier + */ + + /* Split secret into two, with an overlap of up to one byte */ + subsecret_len = ( ( secret_len + 1 ) / 2 ); + md5_secret = secret; + sha1_secret = ( secret + secret_len - subsecret_len ); + + /* Calculate MD5 portion */ + va_copy ( tmp, seeds ); + tls_p_hash_va ( tls, &md5_algorithm, md5_secret, + subsecret_len, out, out_len, seeds ); + va_end ( tmp ); + + /* Calculate SHA1 portion */ + va_copy ( tmp, seeds ); + tls_p_hash_va ( tls, &sha1_algorithm, sha1_secret, + subsecret_len, buf, out_len, seeds ); + va_end ( tmp ); + + /* XOR the two portions together into the final output buffer */ + for ( i = 0 ; i < out_len ; i++ ) + *( ( uint8_t * ) out + i ) ^= buf[i]; + } + + va_end ( seeds ); +} + +/** + * Generate secure pseudo-random data + * + * @v secret Secret + * @v secret_len Length of secret + * @v out Output buffer + * @v out_len Length of output buffer + * @v label String literal label + * @v ... ( data, len ) pairs of seed data + */ +#define tls_prf_label( tls, secret, secret_len, out, out_len, label, ... ) \ + tls_prf ( (tls), (secret), (secret_len), (out), (out_len), \ + label, ( sizeof ( label ) - 1 ), __VA_ARGS__, NULL ) + +/****************************************************************************** + * + * Secret management + * + ****************************************************************************** + */ + +/** + * Generate master secret + * + * @v tls TLS session + * + * The pre-master secret and the client and server random values must + * already be known. + */ +static void tls_generate_master_secret ( struct tls_session *tls ) { + DBGC ( tls, "TLS %p pre-master-secret:\n", tls ); + DBGC_HD ( tls, &tls->pre_master_secret, + sizeof ( tls->pre_master_secret ) ); + DBGC ( tls, "TLS %p client random bytes:\n", tls ); + DBGC_HD ( tls, &tls->client_random, sizeof ( tls->client_random ) ); + DBGC ( tls, "TLS %p server random bytes:\n", tls ); + DBGC_HD ( tls, &tls->server_random, sizeof ( tls->server_random ) ); + + tls_prf_label ( tls, &tls->pre_master_secret, + sizeof ( tls->pre_master_secret ), + &tls->master_secret, sizeof ( tls->master_secret ), + "master secret", + &tls->client_random, sizeof ( tls->client_random ), + &tls->server_random, sizeof ( tls->server_random ) ); + + DBGC ( tls, "TLS %p generated master secret:\n", tls ); + DBGC_HD ( tls, &tls->master_secret, sizeof ( tls->master_secret ) ); +} + +/** + * Generate key material + * + * @v tls TLS session + * + * The master secret must already be known. + */ +static int tls_generate_keys ( struct tls_session *tls ) { + struct tls_cipherspec *tx_cipherspec = &tls->tx_cipherspec_pending; + struct tls_cipherspec *rx_cipherspec = &tls->rx_cipherspec_pending; + size_t hash_size = tx_cipherspec->suite->digest->digestsize; + size_t key_size = tx_cipherspec->suite->key_len; + size_t iv_size = tx_cipherspec->suite->cipher->blocksize; + size_t total = ( 2 * ( hash_size + key_size + iv_size ) ); + uint8_t key_block[total]; + uint8_t *key; + int rc; + + /* Generate key block */ + tls_prf_label ( tls, &tls->master_secret, sizeof ( tls->master_secret ), + key_block, sizeof ( key_block ), "key expansion", + &tls->server_random, sizeof ( tls->server_random ), + &tls->client_random, sizeof ( tls->client_random ) ); + + /* Split key block into portions */ + key = key_block; + + /* TX MAC secret */ + memcpy ( tx_cipherspec->mac_secret, key, hash_size ); + DBGC ( tls, "TLS %p TX MAC secret:\n", tls ); + DBGC_HD ( tls, key, hash_size ); + key += hash_size; + + /* RX MAC secret */ + memcpy ( rx_cipherspec->mac_secret, key, hash_size ); + DBGC ( tls, "TLS %p RX MAC secret:\n", tls ); + DBGC_HD ( tls, key, hash_size ); + key += hash_size; + + /* TX key */ + if ( ( rc = cipher_setkey ( tx_cipherspec->suite->cipher, + tx_cipherspec->cipher_ctx, + key, key_size ) ) != 0 ) { + DBGC ( tls, "TLS %p could not set TX key: %s\n", + tls, strerror ( rc ) ); + return rc; + } + DBGC ( tls, "TLS %p TX key:\n", tls ); + DBGC_HD ( tls, key, key_size ); + key += key_size; + + /* RX key */ + if ( ( rc = cipher_setkey ( rx_cipherspec->suite->cipher, + rx_cipherspec->cipher_ctx, + key, key_size ) ) != 0 ) { + DBGC ( tls, "TLS %p could not set TX key: %s\n", + tls, strerror ( rc ) ); + return rc; + } + DBGC ( tls, "TLS %p RX key:\n", tls ); + DBGC_HD ( tls, key, key_size ); + key += key_size; + + /* TX initialisation vector */ + cipher_setiv ( tx_cipherspec->suite->cipher, + tx_cipherspec->cipher_ctx, key ); + DBGC ( tls, "TLS %p TX IV:\n", tls ); + DBGC_HD ( tls, key, iv_size ); + key += iv_size; + + /* RX initialisation vector */ + cipher_setiv ( rx_cipherspec->suite->cipher, + rx_cipherspec->cipher_ctx, key ); + DBGC ( tls, "TLS %p RX IV:\n", tls ); + DBGC_HD ( tls, key, iv_size ); + key += iv_size; + + assert ( ( key_block + total ) == key ); + + return 0; +} + +/****************************************************************************** + * + * Cipher suite management + * + ****************************************************************************** + */ + +/** Null cipher suite */ +struct tls_cipher_suite tls_cipher_suite_null = { + .pubkey = &pubkey_null, + .cipher = &cipher_null, + .digest = &digest_null, +}; + +/** Supported cipher suites, in order of preference */ +struct tls_cipher_suite tls_cipher_suites[] = { + { + .code = htons ( TLS_RSA_WITH_AES_256_CBC_SHA256 ), + .key_len = ( 256 / 8 ), + .pubkey = &rsa_algorithm, + .cipher = &aes_cbc_algorithm, + .digest = &sha256_algorithm, + }, + { + .code = htons ( TLS_RSA_WITH_AES_128_CBC_SHA256 ), + .key_len = ( 128 / 8 ), + .pubkey = &rsa_algorithm, + .cipher = &aes_cbc_algorithm, + .digest = &sha256_algorithm, + }, + { + .code = htons ( TLS_RSA_WITH_AES_256_CBC_SHA ), + .key_len = ( 256 / 8 ), + .pubkey = &rsa_algorithm, + .cipher = &aes_cbc_algorithm, + .digest = &sha1_algorithm, + }, + { + .code = htons ( TLS_RSA_WITH_AES_128_CBC_SHA ), + .key_len = ( 128 / 8 ), + .pubkey = &rsa_algorithm, + .cipher = &aes_cbc_algorithm, + .digest = &sha1_algorithm, + }, +}; + +/** Number of supported cipher suites */ +#define TLS_NUM_CIPHER_SUITES \ + ( sizeof ( tls_cipher_suites ) / sizeof ( tls_cipher_suites[0] ) ) + +/** + * Identify cipher suite + * + * @v cipher_suite Cipher suite specification + * @ret suite Cipher suite, or NULL + */ +static struct tls_cipher_suite * +tls_find_cipher_suite ( unsigned int cipher_suite ) { + struct tls_cipher_suite *suite; + unsigned int i; + + /* Identify cipher suite */ + for ( i = 0 ; i < TLS_NUM_CIPHER_SUITES ; i++ ) { + suite = &tls_cipher_suites[i]; + if ( suite->code == cipher_suite ) + return suite; + } + + return NULL; +} + +/** + * Clear cipher suite + * + * @v cipherspec TLS cipher specification + */ +static void tls_clear_cipher ( struct tls_session *tls __unused, + struct tls_cipherspec *cipherspec ) { + + if ( cipherspec->suite ) { + pubkey_final ( cipherspec->suite->pubkey, + cipherspec->pubkey_ctx ); + } + free ( cipherspec->dynamic ); + memset ( cipherspec, 0, sizeof ( *cipherspec ) ); + cipherspec->suite = &tls_cipher_suite_null; +} + +/** + * Set cipher suite + * + * @v tls TLS session + * @v cipherspec TLS cipher specification + * @v suite Cipher suite + * @ret rc Return status code + */ +static int tls_set_cipher ( struct tls_session *tls, + struct tls_cipherspec *cipherspec, + struct tls_cipher_suite *suite ) { + struct pubkey_algorithm *pubkey = suite->pubkey; + struct cipher_algorithm *cipher = suite->cipher; + struct digest_algorithm *digest = suite->digest; + size_t total; + void *dynamic; + + /* Clear out old cipher contents, if any */ + tls_clear_cipher ( tls, cipherspec ); + + /* Allocate dynamic storage */ + total = ( pubkey->ctxsize + 2 * cipher->ctxsize + digest->digestsize ); + dynamic = zalloc ( total ); + if ( ! dynamic ) { + DBGC ( tls, "TLS %p could not allocate %zd bytes for crypto " + "context\n", tls, total ); + return -ENOMEM_CONTEXT; + } + + /* Assign storage */ + cipherspec->dynamic = dynamic; + cipherspec->pubkey_ctx = dynamic; dynamic += pubkey->ctxsize; + cipherspec->cipher_ctx = dynamic; dynamic += cipher->ctxsize; + cipherspec->cipher_next_ctx = dynamic; dynamic += cipher->ctxsize; + cipherspec->mac_secret = dynamic; dynamic += digest->digestsize; + assert ( ( cipherspec->dynamic + total ) == dynamic ); + + /* Store parameters */ + cipherspec->suite = suite; + + return 0; +} + +/** + * Select next cipher suite + * + * @v tls TLS session + * @v cipher_suite Cipher suite specification + * @ret rc Return status code + */ +static int tls_select_cipher ( struct tls_session *tls, + unsigned int cipher_suite ) { + struct tls_cipher_suite *suite; + int rc; + + /* Identify cipher suite */ + suite = tls_find_cipher_suite ( cipher_suite ); + if ( ! suite ) { + DBGC ( tls, "TLS %p does not support cipher %04x\n", + tls, ntohs ( cipher_suite ) ); + return -ENOTSUP_CIPHER; + } + + /* Set ciphers */ + if ( ( rc = tls_set_cipher ( tls, &tls->tx_cipherspec_pending, + suite ) ) != 0 ) + return rc; + if ( ( rc = tls_set_cipher ( tls, &tls->rx_cipherspec_pending, + suite ) ) != 0 ) + return rc; + + DBGC ( tls, "TLS %p selected %s-%s-%d-%s\n", tls, suite->pubkey->name, + suite->cipher->name, ( suite->key_len * 8 ), + suite->digest->name ); + + return 0; +} + +/** + * Activate next cipher suite + * + * @v tls TLS session + * @v pending Pending cipher specification + * @v active Active cipher specification to replace + * @ret rc Return status code + */ +static int tls_change_cipher ( struct tls_session *tls, + struct tls_cipherspec *pending, + struct tls_cipherspec *active ) { + + /* Sanity check */ + if ( pending->suite == &tls_cipher_suite_null ) { + DBGC ( tls, "TLS %p refusing to use null cipher\n", tls ); + return -ENOTSUP_NULL; + } + + tls_clear_cipher ( tls, active ); + memswap ( active, pending, sizeof ( *active ) ); + return 0; +} + +/****************************************************************************** + * + * Signature and hash algorithms + * + ****************************************************************************** + */ + +/** Supported signature and hash algorithms + * + * Note that the default (TLSv1.1 and earlier) algorithm using + * MD5+SHA1 is never explicitly specified. + */ +struct tls_signature_hash_algorithm tls_signature_hash_algorithms[] = { + { + .code = { + .signature = TLS_RSA_ALGORITHM, + .hash = TLS_SHA256_ALGORITHM, + }, + .pubkey = &rsa_algorithm, + .digest = &sha256_algorithm, + }, +}; + +/** Number of supported signature and hash algorithms */ +#define TLS_NUM_SIG_HASH_ALGORITHMS \ + ( sizeof ( tls_signature_hash_algorithms ) / \ + sizeof ( tls_signature_hash_algorithms[0] ) ) + +/** + * Find TLS signature and hash algorithm + * + * @v pubkey Public-key algorithm + * @v digest Digest algorithm + * @ret sig_hash Signature and hash algorithm, or NULL + */ +static struct tls_signature_hash_algorithm * +tls_signature_hash_algorithm ( struct pubkey_algorithm *pubkey, + struct digest_algorithm *digest ) { + struct tls_signature_hash_algorithm *sig_hash; + unsigned int i; + + /* Identify signature and hash algorithm */ + for ( i = 0 ; i < TLS_NUM_SIG_HASH_ALGORITHMS ; i++ ) { + sig_hash = &tls_signature_hash_algorithms[i]; + if ( ( sig_hash->pubkey == pubkey ) && + ( sig_hash->digest == digest ) ) { + return sig_hash; + } + } + + return NULL; +} + +/****************************************************************************** + * + * Handshake verification + * + ****************************************************************************** + */ + +/** + * Add handshake record to verification hash + * + * @v tls TLS session + * @v data Handshake record + * @v len Length of handshake record + */ +static void tls_add_handshake ( struct tls_session *tls, + const void *data, size_t len ) { + + digest_update ( &md5_sha1_algorithm, tls->handshake_md5_sha1_ctx, + data, len ); + digest_update ( &sha256_algorithm, tls->handshake_sha256_ctx, + data, len ); +} + +/** + * Calculate handshake verification hash + * + * @v tls TLS session + * @v out Output buffer + * + * Calculates the MD5+SHA1 or SHA256 digest over all handshake + * messages seen so far. + */ +static void tls_verify_handshake ( struct tls_session *tls, void *out ) { + struct digest_algorithm *digest = tls->handshake_digest; + uint8_t ctx[ digest->ctxsize ]; + + memcpy ( ctx, tls->handshake_ctx, sizeof ( ctx ) ); + digest_final ( digest, ctx, out ); +} + +/****************************************************************************** + * + * Record handling + * + ****************************************************************************** + */ + +/** + * Resume TX state machine + * + * @v tls TLS session + */ +static void tls_tx_resume ( struct tls_session *tls ) { + process_add ( &tls->process ); +} + +/** + * Transmit Handshake record + * + * @v tls TLS session + * @v data Plaintext record + * @v len Length of plaintext record + * @ret rc Return status code + */ +static int tls_send_handshake ( struct tls_session *tls, + void *data, size_t len ) { + + /* Add to handshake digest */ + tls_add_handshake ( tls, data, len ); + + /* Send record */ + return tls_send_plaintext ( tls, TLS_TYPE_HANDSHAKE, data, len ); +} + +/** + * Transmit Client Hello record + * + * @v tls TLS session + * @ret rc Return status code + */ +static int tls_send_client_hello ( struct tls_session *tls ) { + struct { + uint32_t type_length; + uint16_t version; + uint8_t random[32]; + uint8_t session_id_len; + uint16_t cipher_suite_len; + uint16_t cipher_suites[TLS_NUM_CIPHER_SUITES]; + uint8_t compression_methods_len; + uint8_t compression_methods[1]; + uint16_t extensions_len; + struct { + uint16_t server_name_type; + uint16_t server_name_len; + struct { + uint16_t len; + struct { + uint8_t type; + uint16_t len; + uint8_t name[ strlen ( tls->name ) ]; + } __attribute__ (( packed )) list[1]; + } __attribute__ (( packed )) server_name; + uint16_t max_fragment_length_type; + uint16_t max_fragment_length_len; + struct { + uint8_t max; + } __attribute__ (( packed )) max_fragment_length; + } __attribute__ (( packed )) extensions; + } __attribute__ (( packed )) hello; + unsigned int i; + + memset ( &hello, 0, sizeof ( hello ) ); + hello.type_length = ( cpu_to_le32 ( TLS_CLIENT_HELLO ) | + htonl ( sizeof ( hello ) - + sizeof ( hello.type_length ) ) ); + hello.version = htons ( tls->version ); + memcpy ( &hello.random, &tls->client_random, sizeof ( hello.random ) ); + hello.cipher_suite_len = htons ( sizeof ( hello.cipher_suites ) ); + for ( i = 0 ; i < TLS_NUM_CIPHER_SUITES ; i++ ) + hello.cipher_suites[i] = tls_cipher_suites[i].code; + hello.compression_methods_len = sizeof ( hello.compression_methods ); + hello.extensions_len = htons ( sizeof ( hello.extensions ) ); + hello.extensions.server_name_type = htons ( TLS_SERVER_NAME ); + hello.extensions.server_name_len + = htons ( sizeof ( hello.extensions.server_name ) ); + hello.extensions.server_name.len + = htons ( sizeof ( hello.extensions.server_name.list ) ); + hello.extensions.server_name.list[0].type = TLS_SERVER_NAME_HOST_NAME; + hello.extensions.server_name.list[0].len + = htons ( sizeof ( hello.extensions.server_name.list[0].name )); + memcpy ( hello.extensions.server_name.list[0].name, tls->name, + sizeof ( hello.extensions.server_name.list[0].name ) ); + hello.extensions.max_fragment_length_type + = htons ( TLS_MAX_FRAGMENT_LENGTH ); + hello.extensions.max_fragment_length_len + = htons ( sizeof ( hello.extensions.max_fragment_length ) ); + hello.extensions.max_fragment_length.max + = TLS_MAX_FRAGMENT_LENGTH_4096; + + return tls_send_handshake ( tls, &hello, sizeof ( hello ) ); +} + +/** + * Transmit Certificate record + * + * @v tls TLS session + * @ret rc Return status code + */ +static int tls_send_certificate ( struct tls_session *tls ) { + struct { + uint32_t type_length; + uint8_t length[3]; + struct { + uint8_t length[3]; + uint8_t data[ tls->cert->raw.len ]; + } __attribute__ (( packed )) certificates[1]; + } __attribute__ (( packed )) *certificate; + int rc; + + /* Allocate storage for Certificate record (which may be too + * large for the stack). + */ + certificate = zalloc ( sizeof ( *certificate ) ); + if ( ! certificate ) + return -ENOMEM_CERTIFICATE; + + /* Populate record */ + certificate->type_length = + ( cpu_to_le32 ( TLS_CERTIFICATE ) | + htonl ( sizeof ( *certificate ) - + sizeof ( certificate->type_length ) ) ); + tls_set_uint24 ( certificate->length, + sizeof ( certificate->certificates ) ); + tls_set_uint24 ( certificate->certificates[0].length, + sizeof ( certificate->certificates[0].data ) ); + memcpy ( certificate->certificates[0].data, + tls->cert->raw.data, + sizeof ( certificate->certificates[0].data ) ); + + /* Transmit record */ + rc = tls_send_handshake ( tls, certificate, sizeof ( *certificate ) ); + + /* Free record */ + free ( certificate ); + + return rc; +} + +/** + * Transmit Client Key Exchange record + * + * @v tls TLS session + * @ret rc Return status code + */ +static int tls_send_client_key_exchange ( struct tls_session *tls ) { + struct tls_cipherspec *cipherspec = &tls->tx_cipherspec_pending; + struct pubkey_algorithm *pubkey = cipherspec->suite->pubkey; + size_t max_len = pubkey_max_len ( pubkey, cipherspec->pubkey_ctx ); + struct { + uint32_t type_length; + uint16_t encrypted_pre_master_secret_len; + uint8_t encrypted_pre_master_secret[max_len]; + } __attribute__ (( packed )) key_xchg; + size_t unused; + int len; + int rc; + + /* Encrypt pre-master secret using server's public key */ + memset ( &key_xchg, 0, sizeof ( key_xchg ) ); + len = pubkey_encrypt ( pubkey, cipherspec->pubkey_ctx, + &tls->pre_master_secret, + sizeof ( tls->pre_master_secret ), + key_xchg.encrypted_pre_master_secret ); + if ( len < 0 ) { + rc = len; + DBGC ( tls, "TLS %p could not encrypt pre-master secret: %s\n", + tls, strerror ( rc ) ); + return rc; + } + unused = ( max_len - len ); + key_xchg.type_length = + ( cpu_to_le32 ( TLS_CLIENT_KEY_EXCHANGE ) | + htonl ( sizeof ( key_xchg ) - + sizeof ( key_xchg.type_length ) - unused ) ); + key_xchg.encrypted_pre_master_secret_len = + htons ( sizeof ( key_xchg.encrypted_pre_master_secret ) - + unused ); + + return tls_send_handshake ( tls, &key_xchg, + ( sizeof ( key_xchg ) - unused ) ); +} + +/** + * Transmit Certificate Verify record + * + * @v tls TLS session + * @ret rc Return status code + */ +static int tls_send_certificate_verify ( struct tls_session *tls ) { + struct digest_algorithm *digest = tls->handshake_digest; + struct x509_certificate *cert = tls->cert; + struct pubkey_algorithm *pubkey = cert->signature_algorithm->pubkey; + uint8_t digest_out[ digest->digestsize ]; + uint8_t ctx[ pubkey->ctxsize ]; + struct tls_signature_hash_algorithm *sig_hash = NULL; + int rc; + + /* Generate digest to be signed */ + tls_verify_handshake ( tls, digest_out ); + + /* Initialise public-key algorithm */ + if ( ( rc = pubkey_init ( pubkey, ctx, private_key.data, + private_key.len ) ) != 0 ) { + DBGC ( tls, "TLS %p could not initialise %s client private " + "key: %s\n", tls, pubkey->name, strerror ( rc ) ); + goto err_pubkey_init; + } + + /* TLSv1.2 and later use explicit algorithm identifiers */ + if ( tls->version >= TLS_VERSION_TLS_1_2 ) { + sig_hash = tls_signature_hash_algorithm ( pubkey, digest ); + if ( ! sig_hash ) { + DBGC ( tls, "TLS %p could not identify (%s,%s) " + "signature and hash algorithm\n", tls, + pubkey->name, digest->name ); + rc = -ENOTSUP_SIG_HASH; + goto err_sig_hash; + } + } + + /* Generate and transmit record */ + { + size_t max_len = pubkey_max_len ( pubkey, ctx ); + int use_sig_hash = ( ( sig_hash == NULL ) ? 0 : 1 ); + struct { + uint32_t type_length; + struct tls_signature_hash_id sig_hash[use_sig_hash]; + uint16_t signature_len; + uint8_t signature[max_len]; + } __attribute__ (( packed )) certificate_verify; + size_t unused; + int len; + + /* Sign digest */ + len = pubkey_sign ( pubkey, ctx, digest, digest_out, + certificate_verify.signature ); + if ( len < 0 ) { + rc = len; + DBGC ( tls, "TLS %p could not sign %s digest using %s " + "client private key: %s\n", tls, digest->name, + pubkey->name, strerror ( rc ) ); + goto err_pubkey_sign; + } + unused = ( max_len - len ); + + /* Construct Certificate Verify record */ + certificate_verify.type_length = + ( cpu_to_le32 ( TLS_CERTIFICATE_VERIFY ) | + htonl ( sizeof ( certificate_verify ) - + sizeof ( certificate_verify.type_length ) - + unused ) ); + if ( use_sig_hash ) { + memcpy ( &certificate_verify.sig_hash[0], + &sig_hash->code, + sizeof ( certificate_verify.sig_hash[0] ) ); + } + certificate_verify.signature_len = + htons ( sizeof ( certificate_verify.signature ) - + unused ); + + /* Transmit record */ + rc = tls_send_handshake ( tls, &certificate_verify, + ( sizeof ( certificate_verify ) - unused ) ); + } + + err_pubkey_sign: + err_sig_hash: + pubkey_final ( pubkey, ctx ); + err_pubkey_init: + return rc; +} + +/** + * Transmit Change Cipher record + * + * @v tls TLS session + * @ret rc Return status code + */ +static int tls_send_change_cipher ( struct tls_session *tls ) { + static const uint8_t change_cipher[1] = { 1 }; + return tls_send_plaintext ( tls, TLS_TYPE_CHANGE_CIPHER, + change_cipher, sizeof ( change_cipher ) ); +} + +/** + * Transmit Finished record + * + * @v tls TLS session + * @ret rc Return status code + */ +static int tls_send_finished ( struct tls_session *tls ) { + struct digest_algorithm *digest = tls->handshake_digest; + struct { + uint32_t type_length; + uint8_t verify_data[12]; + } __attribute__ (( packed )) finished; + uint8_t digest_out[ digest->digestsize ]; + int rc; + + /* Construct record */ + memset ( &finished, 0, sizeof ( finished ) ); + finished.type_length = ( cpu_to_le32 ( TLS_FINISHED ) | + htonl ( sizeof ( finished ) - + sizeof ( finished.type_length ) ) ); + tls_verify_handshake ( tls, digest_out ); + tls_prf_label ( tls, &tls->master_secret, sizeof ( tls->master_secret ), + finished.verify_data, sizeof ( finished.verify_data ), + "client finished", digest_out, sizeof ( digest_out ) ); + + /* Transmit record */ + if ( ( rc = tls_send_handshake ( tls, &finished, + sizeof ( finished ) ) ) != 0 ) + return rc; + + /* Mark client as finished */ + pending_put ( &tls->client_negotiation ); + + return 0; +} + +/** + * Receive new Change Cipher record + * + * @v tls TLS session + * @v data Plaintext record + * @v len Length of plaintext record + * @ret rc Return status code + */ +static int tls_new_change_cipher ( struct tls_session *tls, + const void *data, size_t len ) { + int rc; + + if ( ( len != 1 ) || ( *( ( uint8_t * ) data ) != 1 ) ) { + DBGC ( tls, "TLS %p received invalid Change Cipher\n", tls ); + DBGC_HD ( tls, data, len ); + return -EINVAL_CHANGE_CIPHER; + } + + if ( ( rc = tls_change_cipher ( tls, &tls->rx_cipherspec_pending, + &tls->rx_cipherspec ) ) != 0 ) { + DBGC ( tls, "TLS %p could not activate RX cipher: %s\n", + tls, strerror ( rc ) ); + return rc; + } + tls->rx_seq = ~( ( uint64_t ) 0 ); + + return 0; +} + +/** + * Receive new Alert record + * + * @v tls TLS session + * @v data Plaintext record + * @v len Length of plaintext record + * @ret rc Return status code + */ +static int tls_new_alert ( struct tls_session *tls, const void *data, + size_t len ) { + const struct { + uint8_t level; + uint8_t description; + char next[0]; + } __attribute__ (( packed )) *alert = data; + const void *end = alert->next; + + /* Sanity check */ + if ( end != ( data + len ) ) { + DBGC ( tls, "TLS %p received overlength Alert\n", tls ); + DBGC_HD ( tls, data, len ); + return -EINVAL_ALERT; + } + + switch ( alert->level ) { + case TLS_ALERT_WARNING: + DBGC ( tls, "TLS %p received warning alert %d\n", + tls, alert->description ); + return 0; + case TLS_ALERT_FATAL: + DBGC ( tls, "TLS %p received fatal alert %d\n", + tls, alert->description ); + return -EPERM_ALERT; + default: + DBGC ( tls, "TLS %p received unknown alert level %d" + "(alert %d)\n", tls, alert->level, alert->description ); + return -EIO_ALERT; + } +} + +/** + * Receive new Server Hello handshake record + * + * @v tls TLS session + * @v data Plaintext handshake record + * @v len Length of plaintext handshake record + * @ret rc Return status code + */ +static int tls_new_server_hello ( struct tls_session *tls, + const void *data, size_t len ) { + const struct { + uint16_t version; + uint8_t random[32]; + uint8_t session_id_len; + char next[0]; + } __attribute__ (( packed )) *hello_a = data; + const struct { + uint8_t session_id[hello_a->session_id_len]; + uint16_t cipher_suite; + uint8_t compression_method; + char next[0]; + } __attribute__ (( packed )) *hello_b = ( void * ) &hello_a->next; + const void *end = hello_b->next; + uint16_t version; + int rc; + + /* Sanity check */ + if ( end > ( data + len ) ) { + DBGC ( tls, "TLS %p received underlength Server Hello\n", tls ); + DBGC_HD ( tls, data, len ); + return -EINVAL_HELLO; + } + + /* Check and store protocol version */ + version = ntohs ( hello_a->version ); + if ( version < TLS_VERSION_TLS_1_0 ) { + DBGC ( tls, "TLS %p does not support protocol version %d.%d\n", + tls, ( version >> 8 ), ( version & 0xff ) ); + return -ENOTSUP_VERSION; + } + if ( version > tls->version ) { + DBGC ( tls, "TLS %p server attempted to illegally upgrade to " + "protocol version %d.%d\n", + tls, ( version >> 8 ), ( version & 0xff ) ); + return -EPROTO_VERSION; + } + tls->version = version; + DBGC ( tls, "TLS %p using protocol version %d.%d\n", + tls, ( version >> 8 ), ( version & 0xff ) ); + + /* Use MD5+SHA1 digest algorithm for handshake verification + * for versions earlier than TLSv1.2. + */ + if ( tls->version < TLS_VERSION_TLS_1_2 ) { + tls->handshake_digest = &md5_sha1_algorithm; + tls->handshake_ctx = tls->handshake_md5_sha1_ctx; + } + + /* Copy out server random bytes */ + memcpy ( &tls->server_random, &hello_a->random, + sizeof ( tls->server_random ) ); + + /* Select cipher suite */ + if ( ( rc = tls_select_cipher ( tls, hello_b->cipher_suite ) ) != 0 ) + return rc; + + /* Generate secrets */ + tls_generate_master_secret ( tls ); + if ( ( rc = tls_generate_keys ( tls ) ) != 0 ) + return rc; + + return 0; +} + +/** + * Parse certificate chain + * + * @v tls TLS session + * @v data Certificate chain + * @v len Length of certificate chain + * @ret rc Return status code + */ +static int tls_parse_chain ( struct tls_session *tls, + const void *data, size_t len ) { + const void *end = ( data + len ); + const struct { + uint8_t length[3]; + uint8_t data[0]; + } __attribute__ (( packed )) *certificate; + size_t certificate_len; + struct x509_certificate *cert; + const void *next; + int rc; + + /* Free any existing certificate chain */ + x509_chain_put ( tls->chain ); + tls->chain = NULL; + + /* Create certificate chain */ + tls->chain = x509_alloc_chain(); + if ( ! tls->chain ) { + rc = -ENOMEM_CHAIN; + goto err_alloc_chain; + } + + /* Add certificates to chain */ + while ( data < end ) { + + /* Extract raw certificate data */ + certificate = data; + certificate_len = tls_uint24 ( certificate->length ); + next = ( certificate->data + certificate_len ); + if ( next > end ) { + DBGC ( tls, "TLS %p overlength certificate:\n", tls ); + DBGC_HDA ( tls, 0, data, ( end - data ) ); + rc = -EINVAL_CERTIFICATE; + goto err_overlength; + } + + /* Add certificate to chain */ + if ( ( rc = x509_append_raw ( tls->chain, certificate->data, + certificate_len ) ) != 0 ) { + DBGC ( tls, "TLS %p could not append certificate: %s\n", + tls, strerror ( rc ) ); + DBGC_HDA ( tls, 0, data, ( end - data ) ); + goto err_parse; + } + cert = x509_last ( tls->chain ); + DBGC ( tls, "TLS %p found certificate %s\n", + tls, x509_name ( cert ) ); + + /* Move to next certificate in list */ + data = next; + } + + return 0; + + err_parse: + err_overlength: + x509_chain_put ( tls->chain ); + tls->chain = NULL; + err_alloc_chain: + return rc; +} + +/** + * Receive new Certificate handshake record + * + * @v tls TLS session + * @v data Plaintext handshake record + * @v len Length of plaintext handshake record + * @ret rc Return status code + */ +static int tls_new_certificate ( struct tls_session *tls, + const void *data, size_t len ) { + const struct { + uint8_t length[3]; + uint8_t certificates[0]; + } __attribute__ (( packed )) *certificate = data; + size_t certificates_len = tls_uint24 ( certificate->length ); + const void *end = ( certificate->certificates + certificates_len ); + int rc; + + /* Sanity check */ + if ( end != ( data + len ) ) { + DBGC ( tls, "TLS %p received overlength Server Certificate\n", + tls ); + DBGC_HD ( tls, data, len ); + return -EINVAL_CERTIFICATES; + } + + /* Parse certificate chain */ + if ( ( rc = tls_parse_chain ( tls, certificate->certificates, + certificates_len ) ) != 0 ) + return rc; + + return 0; +} + +/** + * Receive new Certificate Request handshake record + * + * @v tls TLS session + * @v data Plaintext handshake record + * @v len Length of plaintext handshake record + * @ret rc Return status code + */ +static int tls_new_certificate_request ( struct tls_session *tls, + const void *data __unused, + size_t len __unused ) { + + /* We can only send a single certificate, so there is no point + * in parsing the Certificate Request. + */ + + /* Free any existing client certificate */ + x509_put ( tls->cert ); + + /* Determine client certificate to be sent */ + tls->cert = certstore_find_key ( &private_key ); + if ( ! tls->cert ) { + DBGC ( tls, "TLS %p could not find certificate corresponding " + "to private key\n", tls ); + return -EPERM_CLIENT_CERT; + } + x509_get ( tls->cert ); + DBGC ( tls, "TLS %p sending client certificate %s\n", + tls, x509_name ( tls->cert ) ); + + return 0; +} + +/** + * Receive new Server Hello Done handshake record + * + * @v tls TLS session + * @v data Plaintext handshake record + * @v len Length of plaintext handshake record + * @ret rc Return status code + */ +static int tls_new_server_hello_done ( struct tls_session *tls, + const void *data, size_t len ) { + const struct { + char next[0]; + } __attribute__ (( packed )) *hello_done = data; + const void *end = hello_done->next; + int rc; + + /* Sanity check */ + if ( end != ( data + len ) ) { + DBGC ( tls, "TLS %p received overlength Server Hello Done\n", + tls ); + DBGC_HD ( tls, data, len ); + return -EINVAL_HELLO_DONE; + } + + /* Begin certificate validation */ + if ( ( rc = create_validator ( &tls->validator, tls->chain ) ) != 0 ) { + DBGC ( tls, "TLS %p could not start certificate validation: " + "%s\n", tls, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Receive new Finished handshake record + * + * @v tls TLS session + * @v data Plaintext handshake record + * @v len Length of plaintext handshake record + * @ret rc Return status code + */ +static int tls_new_finished ( struct tls_session *tls, + const void *data, size_t len ) { + struct digest_algorithm *digest = tls->handshake_digest; + const struct { + uint8_t verify_data[12]; + char next[0]; + } __attribute__ (( packed )) *finished = data; + const void *end = finished->next; + uint8_t digest_out[ digest->digestsize ]; + uint8_t verify_data[ sizeof ( finished->verify_data ) ]; + + /* Sanity check */ + if ( end != ( data + len ) ) { + DBGC ( tls, "TLS %p received overlength Finished\n", tls ); + DBGC_HD ( tls, data, len ); + return -EINVAL_FINISHED; + } + + /* Verify data */ + tls_verify_handshake ( tls, digest_out ); + tls_prf_label ( tls, &tls->master_secret, sizeof ( tls->master_secret ), + verify_data, sizeof ( verify_data ), "server finished", + digest_out, sizeof ( digest_out ) ); + if ( memcmp ( verify_data, finished->verify_data, + sizeof ( verify_data ) ) != 0 ) { + DBGC ( tls, "TLS %p verification failed\n", tls ); + return -EPERM_VERIFY; + } + + /* Mark server as finished */ + pending_put ( &tls->server_negotiation ); + + /* Send notification of a window change */ + xfer_window_changed ( &tls->plainstream ); + + return 0; +} + +/** + * Receive new Handshake record + * + * @v tls TLS session + * @v data Plaintext record + * @v len Length of plaintext record + * @ret rc Return status code + */ +static int tls_new_handshake ( struct tls_session *tls, + const void *data, size_t len ) { + const void *end = ( data + len ); + int rc; + + while ( data != end ) { + const struct { + uint8_t type; + uint8_t length[3]; + uint8_t payload[0]; + } __attribute__ (( packed )) *handshake = data; + void *payload = &handshake->payload; + size_t payload_len = tls_uint24 ( handshake->length ); + void *next = ( payload + payload_len ); + + /* Sanity check */ + if ( next > end ) { + DBGC ( tls, "TLS %p received overlength Handshake\n", + tls ); + DBGC_HD ( tls, data, len ); + return -EINVAL_HANDSHAKE; + } + + switch ( handshake->type ) { + case TLS_SERVER_HELLO: + rc = tls_new_server_hello ( tls, payload, payload_len ); + break; + case TLS_CERTIFICATE: + rc = tls_new_certificate ( tls, payload, payload_len ); + break; + case TLS_CERTIFICATE_REQUEST: + rc = tls_new_certificate_request ( tls, payload, + payload_len ); + break; + case TLS_SERVER_HELLO_DONE: + rc = tls_new_server_hello_done ( tls, payload, + payload_len ); + break; + case TLS_FINISHED: + rc = tls_new_finished ( tls, payload, payload_len ); + break; + default: + DBGC ( tls, "TLS %p ignoring handshake type %d\n", + tls, handshake->type ); + rc = 0; + break; + } + + /* Add to handshake digest (except for Hello Requests, + * which are explicitly excluded). + */ + if ( handshake->type != TLS_HELLO_REQUEST ) + tls_add_handshake ( tls, data, + sizeof ( *handshake ) + + payload_len ); + + /* Abort on failure */ + if ( rc != 0 ) + return rc; + + /* Move to next handshake record */ + data = next; + } + + return 0; +} + +/** + * Receive new record + * + * @v tls TLS session + * @v type Record type + * @v rx_data List of received data buffers + * @ret rc Return status code + */ +static int tls_new_record ( struct tls_session *tls, unsigned int type, + struct list_head *rx_data ) { + struct io_buffer *iobuf; + int ( * handler ) ( struct tls_session *tls, const void *data, + size_t len ); + int rc; + + /* Deliver data records to the plainstream interface */ + if ( type == TLS_TYPE_DATA ) { + + /* Fail unless we are ready to receive data */ + if ( ! tls_ready ( tls ) ) + return -ENOTCONN; + + /* Deliver each I/O buffer in turn */ + while ( ( iobuf = list_first_entry ( rx_data, struct io_buffer, + list ) ) ) { + list_del ( &iobuf->list ); + if ( ( rc = xfer_deliver_iob ( &tls->plainstream, + iobuf ) ) != 0 ) { + DBGC ( tls, "TLS %p could not deliver data: " + "%s\n", tls, strerror ( rc ) ); + return rc; + } + } + return 0; + } + + /* For all other records, merge into a single I/O buffer */ + iobuf = iob_concatenate ( rx_data ); + if ( ! iobuf ) { + DBGC ( tls, "TLS %p could not concatenate non-data record " + "type %d\n", tls, type ); + return -ENOMEM_RX_CONCAT; + } + + /* Determine handler */ + switch ( type ) { + case TLS_TYPE_CHANGE_CIPHER: + handler = tls_new_change_cipher; + break; + case TLS_TYPE_ALERT: + handler = tls_new_alert; + break; + case TLS_TYPE_HANDSHAKE: + handler = tls_new_handshake; + break; + default: + /* RFC4346 says that we should just ignore unknown + * record types. + */ + handler = NULL; + DBGC ( tls, "TLS %p ignoring record type %d\n", tls, type ); + break; + } + + /* Handle record and free I/O buffer */ + rc = ( handler ? handler ( tls, iobuf->data, iob_len ( iobuf ) ) : 0 ); + free_iob ( iobuf ); + return rc; +} + +/****************************************************************************** + * + * Record encryption/decryption + * + ****************************************************************************** + */ + +/** + * Initialise HMAC + * + * @v cipherspec Cipher specification + * @v ctx Context + * @v seq Sequence number + * @v tlshdr TLS header + */ +static void tls_hmac_init ( struct tls_cipherspec *cipherspec, void *ctx, + uint64_t seq, struct tls_header *tlshdr ) { + struct digest_algorithm *digest = cipherspec->suite->digest; + + hmac_init ( digest, ctx, cipherspec->mac_secret, &digest->digestsize ); + seq = cpu_to_be64 ( seq ); + hmac_update ( digest, ctx, &seq, sizeof ( seq ) ); + hmac_update ( digest, ctx, tlshdr, sizeof ( *tlshdr ) ); +} + +/** + * Update HMAC + * + * @v cipherspec Cipher specification + * @v ctx Context + * @v data Data + * @v len Length of data + */ +static void tls_hmac_update ( struct tls_cipherspec *cipherspec, void *ctx, + const void *data, size_t len ) { + struct digest_algorithm *digest = cipherspec->suite->digest; + + hmac_update ( digest, ctx, data, len ); +} + +/** + * Finalise HMAC + * + * @v cipherspec Cipher specification + * @v ctx Context + * @v mac HMAC to fill in + */ +static void tls_hmac_final ( struct tls_cipherspec *cipherspec, void *ctx, + void *hmac ) { + struct digest_algorithm *digest = cipherspec->suite->digest; + + hmac_final ( digest, ctx, cipherspec->mac_secret, + &digest->digestsize, hmac ); +} + +/** + * Calculate HMAC + * + * @v cipherspec Cipher specification + * @v seq Sequence number + * @v tlshdr TLS header + * @v data Data + * @v len Length of data + * @v mac HMAC to fill in + */ +static void tls_hmac ( struct tls_cipherspec *cipherspec, + uint64_t seq, struct tls_header *tlshdr, + const void *data, size_t len, void *hmac ) { + struct digest_algorithm *digest = cipherspec->suite->digest; + uint8_t ctx[digest->ctxsize]; + + tls_hmac_init ( cipherspec, ctx, seq, tlshdr ); + tls_hmac_update ( cipherspec, ctx, data, len ); + tls_hmac_final ( cipherspec, ctx, hmac ); +} + +/** + * Allocate and assemble stream-ciphered record from data and MAC portions + * + * @v tls TLS session + * @ret data Data + * @ret len Length of data + * @ret digest MAC digest + * @ret plaintext_len Length of plaintext record + * @ret plaintext Allocated plaintext record + */ +static void * __malloc tls_assemble_stream ( struct tls_session *tls, + const void *data, size_t len, + void *digest, size_t *plaintext_len ) { + size_t mac_len = tls->tx_cipherspec.suite->digest->digestsize; + void *plaintext; + void *content; + void *mac; + + /* Calculate stream-ciphered struct length */ + *plaintext_len = ( len + mac_len ); + + /* Allocate stream-ciphered struct */ + plaintext = malloc ( *plaintext_len ); + if ( ! plaintext ) + return NULL; + content = plaintext; + mac = ( content + len ); + + /* Fill in stream-ciphered struct */ + memcpy ( content, data, len ); + memcpy ( mac, digest, mac_len ); + + return plaintext; +} + +/** + * Allocate and assemble block-ciphered record from data and MAC portions + * + * @v tls TLS session + * @ret data Data + * @ret len Length of data + * @ret digest MAC digest + * @ret plaintext_len Length of plaintext record + * @ret plaintext Allocated plaintext record + */ +static void * tls_assemble_block ( struct tls_session *tls, + const void *data, size_t len, + void *digest, size_t *plaintext_len ) { + size_t blocksize = tls->tx_cipherspec.suite->cipher->blocksize; + size_t mac_len = tls->tx_cipherspec.suite->digest->digestsize; + size_t iv_len; + size_t padding_len; + void *plaintext; + void *iv; + void *content; + void *mac; + void *padding; + + /* TLSv1.1 and later use an explicit IV */ + iv_len = ( ( tls->version >= TLS_VERSION_TLS_1_1 ) ? blocksize : 0 ); + + /* Calculate block-ciphered struct length */ + padding_len = ( ( blocksize - 1 ) & -( iv_len + len + mac_len + 1 ) ); + *plaintext_len = ( iv_len + len + mac_len + padding_len + 1 ); + + /* Allocate block-ciphered struct */ + plaintext = malloc ( *plaintext_len ); + if ( ! plaintext ) + return NULL; + iv = plaintext; + content = ( iv + iv_len ); + mac = ( content + len ); + padding = ( mac + mac_len ); + + /* Fill in block-ciphered struct */ + tls_generate_random ( tls, iv, iv_len ); + memcpy ( content, data, len ); + memcpy ( mac, digest, mac_len ); + memset ( padding, padding_len, ( padding_len + 1 ) ); + + return plaintext; +} + +/** + * Send plaintext record + * + * @v tls TLS session + * @v type Record type + * @v data Plaintext record + * @v len Length of plaintext record + * @ret rc Return status code + */ +static int tls_send_plaintext ( struct tls_session *tls, unsigned int type, + const void *data, size_t len ) { + struct tls_header plaintext_tlshdr; + struct tls_header *tlshdr; + struct tls_cipherspec *cipherspec = &tls->tx_cipherspec; + struct cipher_algorithm *cipher = cipherspec->suite->cipher; + void *plaintext = NULL; + size_t plaintext_len; + struct io_buffer *ciphertext = NULL; + size_t ciphertext_len; + size_t mac_len = cipherspec->suite->digest->digestsize; + uint8_t mac[mac_len]; + int rc; + + /* Construct header */ + plaintext_tlshdr.type = type; + plaintext_tlshdr.version = htons ( tls->version ); + plaintext_tlshdr.length = htons ( len ); + + /* Calculate MAC */ + tls_hmac ( cipherspec, tls->tx_seq, &plaintext_tlshdr, data, len, mac ); + + /* Allocate and assemble plaintext struct */ + if ( is_stream_cipher ( cipher ) ) { + plaintext = tls_assemble_stream ( tls, data, len, mac, + &plaintext_len ); + } else { + plaintext = tls_assemble_block ( tls, data, len, mac, + &plaintext_len ); + } + if ( ! plaintext ) { + DBGC ( tls, "TLS %p could not allocate %zd bytes for " + "plaintext\n", tls, plaintext_len ); + rc = -ENOMEM_TX_PLAINTEXT; + goto done; + } + + DBGC2 ( tls, "Sending plaintext data:\n" ); + DBGC2_HD ( tls, plaintext, plaintext_len ); + + /* Allocate ciphertext */ + ciphertext_len = ( sizeof ( *tlshdr ) + plaintext_len ); + ciphertext = xfer_alloc_iob ( &tls->cipherstream, ciphertext_len ); + if ( ! ciphertext ) { + DBGC ( tls, "TLS %p could not allocate %zd bytes for " + "ciphertext\n", tls, ciphertext_len ); + rc = -ENOMEM_TX_CIPHERTEXT; + goto done; + } + + /* Assemble ciphertext */ + tlshdr = iob_put ( ciphertext, sizeof ( *tlshdr ) ); + tlshdr->type = type; + tlshdr->version = htons ( tls->version ); + tlshdr->length = htons ( plaintext_len ); + memcpy ( cipherspec->cipher_next_ctx, cipherspec->cipher_ctx, + cipher->ctxsize ); + cipher_encrypt ( cipher, cipherspec->cipher_next_ctx, plaintext, + iob_put ( ciphertext, plaintext_len ), plaintext_len ); + + /* Free plaintext as soon as possible to conserve memory */ + free ( plaintext ); + plaintext = NULL; + + /* Send ciphertext */ + if ( ( rc = xfer_deliver_iob ( &tls->cipherstream, + iob_disown ( ciphertext ) ) ) != 0 ) { + DBGC ( tls, "TLS %p could not deliver ciphertext: %s\n", + tls, strerror ( rc ) ); + goto done; + } + + /* Update TX state machine to next record */ + tls->tx_seq += 1; + memcpy ( tls->tx_cipherspec.cipher_ctx, + tls->tx_cipherspec.cipher_next_ctx, cipher->ctxsize ); + + done: + free ( plaintext ); + free_iob ( ciphertext ); + return rc; +} + +/** + * Split stream-ciphered record into data and MAC portions + * + * @v tls TLS session + * @v rx_data List of received data buffers + * @v mac MAC to fill in + * @ret rc Return status code + */ +static int tls_split_stream ( struct tls_session *tls, + struct list_head *rx_data, void **mac ) { + size_t mac_len = tls->rx_cipherspec.suite->digest->digestsize; + struct io_buffer *iobuf; + + /* Extract MAC */ + iobuf = list_last_entry ( rx_data, struct io_buffer, list ); + assert ( iobuf != NULL ); + if ( iob_len ( iobuf ) < mac_len ) { + DBGC ( tls, "TLS %p received underlength MAC\n", tls ); + DBGC_HD ( tls, iobuf->data, iob_len ( iobuf ) ); + return -EINVAL_STREAM; + } + iob_unput ( iobuf, mac_len ); + *mac = iobuf->tail; + + return 0; +} + +/** + * Split block-ciphered record into data and MAC portions + * + * @v tls TLS session + * @v rx_data List of received data buffers + * @v mac MAC to fill in + * @ret rc Return status code + */ +static int tls_split_block ( struct tls_session *tls, + struct list_head *rx_data, void **mac ) { + size_t mac_len = tls->rx_cipherspec.suite->digest->digestsize; + struct io_buffer *iobuf; + size_t iv_len; + uint8_t *padding_final; + uint8_t *padding; + size_t padding_len; + + /* TLSv1.1 and later use an explicit IV */ + iobuf = list_first_entry ( rx_data, struct io_buffer, list ); + iv_len = ( ( tls->version >= TLS_VERSION_TLS_1_1 ) ? + tls->rx_cipherspec.suite->cipher->blocksize : 0 ); + if ( iob_len ( iobuf ) < iv_len ) { + DBGC ( tls, "TLS %p received underlength IV\n", tls ); + DBGC_HD ( tls, iobuf->data, iob_len ( iobuf ) ); + return -EINVAL_BLOCK; + } + iob_pull ( iobuf, iv_len ); + + /* Extract and verify padding */ + iobuf = list_last_entry ( rx_data, struct io_buffer, list ); + padding_final = ( iobuf->tail - 1 ); + padding_len = *padding_final; + if ( ( padding_len + 1 ) > iob_len ( iobuf ) ) { + DBGC ( tls, "TLS %p received underlength padding\n", tls ); + DBGC_HD ( tls, iobuf->data, iob_len ( iobuf ) ); + return -EINVAL_BLOCK; + } + iob_unput ( iobuf, ( padding_len + 1 ) ); + for ( padding = iobuf->tail ; padding < padding_final ; padding++ ) { + if ( *padding != padding_len ) { + DBGC ( tls, "TLS %p received bad padding\n", tls ); + DBGC_HD ( tls, padding, padding_len ); + return -EINVAL_PADDING; + } + } + + /* Extract MAC */ + if ( iob_len ( iobuf ) < mac_len ) { + DBGC ( tls, "TLS %p received underlength MAC\n", tls ); + DBGC_HD ( tls, iobuf->data, iob_len ( iobuf ) ); + return -EINVAL_BLOCK; + } + iob_unput ( iobuf, mac_len ); + *mac = iobuf->tail; + + return 0; +} + +/** + * Receive new ciphertext record + * + * @v tls TLS session + * @v tlshdr Record header + * @v rx_data List of received data buffers + * @ret rc Return status code + */ +static int tls_new_ciphertext ( struct tls_session *tls, + struct tls_header *tlshdr, + struct list_head *rx_data ) { + struct tls_header plaintext_tlshdr; + struct tls_cipherspec *cipherspec = &tls->rx_cipherspec; + struct cipher_algorithm *cipher = cipherspec->suite->cipher; + struct digest_algorithm *digest = cipherspec->suite->digest; + uint8_t ctx[digest->ctxsize]; + uint8_t verify_mac[digest->digestsize]; + struct io_buffer *iobuf; + void *mac; + size_t len = 0; + int rc; + + /* Decrypt the received data */ + list_for_each_entry ( iobuf, &tls->rx_data, list ) { + cipher_decrypt ( cipher, cipherspec->cipher_ctx, + iobuf->data, iobuf->data, iob_len ( iobuf ) ); + } + + /* Split record into content and MAC */ + if ( is_stream_cipher ( cipher ) ) { + if ( ( rc = tls_split_stream ( tls, rx_data, &mac ) ) != 0 ) + return rc; + } else { + if ( ( rc = tls_split_block ( tls, rx_data, &mac ) ) != 0 ) + return rc; + } + + /* Calculate total length */ + DBGC2 ( tls, "Received plaintext data:\n" ); + list_for_each_entry ( iobuf, rx_data, list ) { + DBGC2_HD ( tls, iobuf->data, iob_len ( iobuf ) ); + len += iob_len ( iobuf ); + } + + /* Verify MAC */ + plaintext_tlshdr.type = tlshdr->type; + plaintext_tlshdr.version = tlshdr->version; + plaintext_tlshdr.length = htons ( len ); + tls_hmac_init ( cipherspec, ctx, tls->rx_seq, &plaintext_tlshdr ); + list_for_each_entry ( iobuf, rx_data, list ) { + tls_hmac_update ( cipherspec, ctx, iobuf->data, + iob_len ( iobuf ) ); + } + tls_hmac_final ( cipherspec, ctx, verify_mac ); + if ( memcmp ( mac, verify_mac, sizeof ( verify_mac ) ) != 0 ) { + DBGC ( tls, "TLS %p failed MAC verification\n", tls ); + return -EINVAL_MAC; + } + + /* Process plaintext record */ + if ( ( rc = tls_new_record ( tls, tlshdr->type, rx_data ) ) != 0 ) + return rc; + + return 0; +} + +/****************************************************************************** + * + * Plaintext stream operations + * + ****************************************************************************** + */ + +/** + * Check flow control window + * + * @v tls TLS session + * @ret len Length of window + */ +static size_t tls_plainstream_window ( struct tls_session *tls ) { + + /* Block window unless we are ready to accept data */ + if ( ! tls_ready ( tls ) ) + return 0; + + return xfer_window ( &tls->cipherstream ); +} + +/** + * Deliver datagram as raw data + * + * @v tls TLS session + * @v iobuf I/O buffer + * @v meta Data transfer metadata + * @ret rc Return status code + */ +static int tls_plainstream_deliver ( struct tls_session *tls, + struct io_buffer *iobuf, + struct xfer_metadata *meta __unused ) { + int rc; + + /* Refuse unless we are ready to accept data */ + if ( ! tls_ready ( tls ) ) { + rc = -ENOTCONN; + goto done; + } + + if ( ( rc = tls_send_plaintext ( tls, TLS_TYPE_DATA, iobuf->data, + iob_len ( iobuf ) ) ) != 0 ) + goto done; + + done: + free_iob ( iobuf ); + return rc; +} + +/** TLS plaintext stream interface operations */ +static struct interface_operation tls_plainstream_ops[] = { + INTF_OP ( xfer_deliver, struct tls_session *, tls_plainstream_deliver ), + INTF_OP ( xfer_window, struct tls_session *, tls_plainstream_window ), + INTF_OP ( intf_close, struct tls_session *, tls_close ), +}; + +/** TLS plaintext stream interface descriptor */ +static struct interface_descriptor tls_plainstream_desc = + INTF_DESC_PASSTHRU ( struct tls_session, plainstream, + tls_plainstream_ops, cipherstream ); + +/****************************************************************************** + * + * Ciphertext stream operations + * + ****************************************************************************** + */ + +/** + * Handle received TLS header + * + * @v tls TLS session + * @ret rc Returned status code + */ +static int tls_newdata_process_header ( struct tls_session *tls ) { + size_t data_len = ntohs ( tls->rx_header.length ); + size_t remaining = data_len; + size_t frag_len; + struct io_buffer *iobuf; + struct io_buffer *tmp; + int rc; + + /* Allocate data buffers now that we know the length */ + assert ( list_empty ( &tls->rx_data ) ); + while ( remaining ) { + + /* Calculate fragment length. Ensure that no block is + * smaller than TLS_RX_MIN_BUFSIZE (by increasing the + * allocation length if necessary). + */ + frag_len = remaining; + if ( frag_len > TLS_RX_BUFSIZE ) + frag_len = TLS_RX_BUFSIZE; + remaining -= frag_len; + if ( remaining < TLS_RX_MIN_BUFSIZE ) { + frag_len += remaining; + remaining = 0; + } + + /* Allocate buffer */ + iobuf = alloc_iob_raw ( frag_len, TLS_RX_ALIGN, 0 ); + if ( ! iobuf ) { + DBGC ( tls, "TLS %p could not allocate %zd of %zd " + "bytes for receive buffer\n", tls, + remaining, data_len ); + rc = -ENOMEM_RX_DATA; + goto err; + } + + /* Ensure tailroom is exactly what we asked for. This + * will result in unaligned I/O buffers when the + * fragment length is unaligned, which can happen only + * before we switch to using a block cipher. + */ + iob_reserve ( iobuf, ( iob_tailroom ( iobuf ) - frag_len ) ); + + /* Add I/O buffer to list */ + list_add_tail ( &iobuf->list, &tls->rx_data ); + } + + /* Move to data state */ + tls->rx_state = TLS_RX_DATA; + + return 0; + + err: + list_for_each_entry_safe ( iobuf, tmp, &tls->rx_data, list ) { + list_del ( &iobuf->list ); + free_iob ( iobuf ); + } + return rc; +} + +/** + * Handle received TLS data payload + * + * @v tls TLS session + * @ret rc Returned status code + */ +static int tls_newdata_process_data ( struct tls_session *tls ) { + struct io_buffer *iobuf; + int rc; + + /* Move current buffer to end of list */ + iobuf = list_first_entry ( &tls->rx_data, struct io_buffer, list ); + list_del ( &iobuf->list ); + list_add_tail ( &iobuf->list, &tls->rx_data ); + + /* Continue receiving data if any space remains */ + iobuf = list_first_entry ( &tls->rx_data, struct io_buffer, list ); + if ( iob_tailroom ( iobuf ) ) + return 0; + + /* Process record */ + if ( ( rc = tls_new_ciphertext ( tls, &tls->rx_header, + &tls->rx_data ) ) != 0 ) + return rc; + + /* Increment RX sequence number */ + tls->rx_seq += 1; + + /* Return to header state */ + assert ( list_empty ( &tls->rx_data ) ); + tls->rx_state = TLS_RX_HEADER; + iob_unput ( &tls->rx_header_iobuf, sizeof ( tls->rx_header ) ); + + return 0; +} + +/** + * Receive new ciphertext + * + * @v tls TLS session + * @v iobuf I/O buffer + * @v meta Data transfer metadat + * @ret rc Return status code + */ +static int tls_cipherstream_deliver ( struct tls_session *tls, + struct io_buffer *iobuf, + struct xfer_metadata *xfer __unused ) { + size_t frag_len; + int ( * process ) ( struct tls_session *tls ); + struct io_buffer *dest; + int rc; + + while ( iob_len ( iobuf ) ) { + + /* Select buffer according to current state */ + switch ( tls->rx_state ) { + case TLS_RX_HEADER: + dest = &tls->rx_header_iobuf; + process = tls_newdata_process_header; + break; + case TLS_RX_DATA: + dest = list_first_entry ( &tls->rx_data, + struct io_buffer, list ); + assert ( dest != NULL ); + process = tls_newdata_process_data; + break; + default: + assert ( 0 ); + rc = -EINVAL_RX_STATE; + goto done; + } + + /* Copy data portion to buffer */ + frag_len = iob_len ( iobuf ); + if ( frag_len > iob_tailroom ( dest ) ) + frag_len = iob_tailroom ( dest ); + memcpy ( iob_put ( dest, frag_len ), iobuf->data, frag_len ); + iob_pull ( iobuf, frag_len ); + + /* Process data if buffer is now full */ + if ( iob_tailroom ( dest ) == 0 ) { + if ( ( rc = process ( tls ) ) != 0 ) { + tls_close ( tls, rc ); + goto done; + } + } + } + rc = 0; + + done: + free_iob ( iobuf ); + return rc; +} + +/** TLS ciphertext stream interface operations */ +static struct interface_operation tls_cipherstream_ops[] = { + INTF_OP ( xfer_deliver, struct tls_session *, + tls_cipherstream_deliver ), + INTF_OP ( xfer_window_changed, struct tls_session *, tls_tx_resume ), + INTF_OP ( intf_close, struct tls_session *, tls_close ), +}; + +/** TLS ciphertext stream interface descriptor */ +static struct interface_descriptor tls_cipherstream_desc = + INTF_DESC_PASSTHRU ( struct tls_session, cipherstream, + tls_cipherstream_ops, plainstream ); + +/****************************************************************************** + * + * Certificate validator + * + ****************************************************************************** + */ + +/** + * Handle certificate validation completion + * + * @v tls TLS session + * @v rc Reason for completion + */ +static void tls_validator_done ( struct tls_session *tls, int rc ) { + struct tls_cipherspec *cipherspec = &tls->tx_cipherspec_pending; + struct pubkey_algorithm *pubkey = cipherspec->suite->pubkey; + struct x509_certificate *cert; + + /* Close validator interface */ + intf_restart ( &tls->validator, rc ); + + /* Check for validation failure */ + if ( rc != 0 ) { + DBGC ( tls, "TLS %p certificate validation failed: %s\n", + tls, strerror ( rc ) ); + goto err; + } + DBGC ( tls, "TLS %p certificate validation succeeded\n", tls ); + + /* Extract first certificate */ + cert = x509_first ( tls->chain ); + assert ( cert != NULL ); + + /* Verify server name */ + if ( ( rc = x509_check_name ( cert, tls->name ) ) != 0 ) { + DBGC ( tls, "TLS %p server certificate does not match %s: %s\n", + tls, tls->name, strerror ( rc ) ); + goto err; + } + + /* Initialise public key algorithm */ + if ( ( rc = pubkey_init ( pubkey, cipherspec->pubkey_ctx, + cert->subject.public_key.raw.data, + cert->subject.public_key.raw.len ) ) != 0 ) { + DBGC ( tls, "TLS %p cannot initialise public key: %s\n", + tls, strerror ( rc ) ); + goto err; + } + + /* Schedule Client Key Exchange, Change Cipher, and Finished */ + tls->tx_pending |= ( TLS_TX_CLIENT_KEY_EXCHANGE | + TLS_TX_CHANGE_CIPHER | + TLS_TX_FINISHED ); + if ( tls->cert ) { + tls->tx_pending |= ( TLS_TX_CERTIFICATE | + TLS_TX_CERTIFICATE_VERIFY ); + } + tls_tx_resume ( tls ); + + return; + + err: + tls_close ( tls, rc ); + return; +} + +/** TLS certificate validator interface operations */ +static struct interface_operation tls_validator_ops[] = { + INTF_OP ( intf_close, struct tls_session *, tls_validator_done ), +}; + +/** TLS certificate validator interface descriptor */ +static struct interface_descriptor tls_validator_desc = + INTF_DESC ( struct tls_session, validator, tls_validator_ops ); + +/****************************************************************************** + * + * Controlling process + * + ****************************************************************************** + */ + +/** + * TLS TX state machine + * + * @v tls TLS session + */ +static void tls_tx_step ( struct tls_session *tls ) { + int rc; + + /* Wait for cipherstream to become ready */ + if ( ! xfer_window ( &tls->cipherstream ) ) + return; + + /* Send first pending transmission */ + if ( tls->tx_pending & TLS_TX_CLIENT_HELLO ) { + /* Send Client Hello */ + if ( ( rc = tls_send_client_hello ( tls ) ) != 0 ) { + DBGC ( tls, "TLS %p could not send Client Hello: %s\n", + tls, strerror ( rc ) ); + goto err; + } + tls->tx_pending &= ~TLS_TX_CLIENT_HELLO; + } else if ( tls->tx_pending & TLS_TX_CERTIFICATE ) { + /* Send Certificate */ + if ( ( rc = tls_send_certificate ( tls ) ) != 0 ) { + DBGC ( tls, "TLS %p cold not send Certificate: %s\n", + tls, strerror ( rc ) ); + goto err; + } + tls->tx_pending &= ~TLS_TX_CERTIFICATE; + } else if ( tls->tx_pending & TLS_TX_CLIENT_KEY_EXCHANGE ) { + /* Send Client Key Exchange */ + if ( ( rc = tls_send_client_key_exchange ( tls ) ) != 0 ) { + DBGC ( tls, "TLS %p could not send Client Key " + "Exchange: %s\n", tls, strerror ( rc ) ); + goto err; + } + tls->tx_pending &= ~TLS_TX_CLIENT_KEY_EXCHANGE; + } else if ( tls->tx_pending & TLS_TX_CERTIFICATE_VERIFY ) { + /* Send Certificate Verify */ + if ( ( rc = tls_send_certificate_verify ( tls ) ) != 0 ) { + DBGC ( tls, "TLS %p could not send Certificate " + "Verify: %s\n", tls, strerror ( rc ) ); + goto err; + } + tls->tx_pending &= ~TLS_TX_CERTIFICATE_VERIFY; + } else if ( tls->tx_pending & TLS_TX_CHANGE_CIPHER ) { + /* Send Change Cipher, and then change the cipher in use */ + if ( ( rc = tls_send_change_cipher ( tls ) ) != 0 ) { + DBGC ( tls, "TLS %p could not send Change Cipher: " + "%s\n", tls, strerror ( rc ) ); + goto err; + } + if ( ( rc = tls_change_cipher ( tls, + &tls->tx_cipherspec_pending, + &tls->tx_cipherspec )) != 0 ){ + DBGC ( tls, "TLS %p could not activate TX cipher: " + "%s\n", tls, strerror ( rc ) ); + goto err; + } + tls->tx_seq = 0; + tls->tx_pending &= ~TLS_TX_CHANGE_CIPHER; + } else if ( tls->tx_pending & TLS_TX_FINISHED ) { + /* Send Finished */ + if ( ( rc = tls_send_finished ( tls ) ) != 0 ) { + DBGC ( tls, "TLS %p could not send Finished: %s\n", + tls, strerror ( rc ) ); + goto err; + } + tls->tx_pending &= ~TLS_TX_FINISHED; + } + + /* Reschedule process if pending transmissions remain */ + if ( tls->tx_pending ) + tls_tx_resume ( tls ); + + return; + + err: + tls_close ( tls, rc ); +} + +/** TLS TX process descriptor */ +static struct process_descriptor tls_process_desc = + PROC_DESC_ONCE ( struct tls_session, process, tls_tx_step ); + +/****************************************************************************** + * + * Instantiator + * + ****************************************************************************** + */ + +int add_tls ( struct interface *xfer, const char *name, + struct interface **next ) { + struct tls_session *tls; + int rc; + + /* Allocate and initialise TLS structure */ + tls = malloc ( sizeof ( *tls ) ); + if ( ! tls ) { + rc = -ENOMEM; + goto err_alloc; + } + memset ( tls, 0, sizeof ( *tls ) ); + ref_init ( &tls->refcnt, free_tls ); + tls->name = name; + intf_init ( &tls->plainstream, &tls_plainstream_desc, &tls->refcnt ); + intf_init ( &tls->cipherstream, &tls_cipherstream_desc, &tls->refcnt ); + intf_init ( &tls->validator, &tls_validator_desc, &tls->refcnt ); + process_init ( &tls->process, &tls_process_desc, &tls->refcnt ); + tls->version = TLS_VERSION_TLS_1_2; + tls_clear_cipher ( tls, &tls->tx_cipherspec ); + tls_clear_cipher ( tls, &tls->tx_cipherspec_pending ); + tls_clear_cipher ( tls, &tls->rx_cipherspec ); + tls_clear_cipher ( tls, &tls->rx_cipherspec_pending ); + tls->client_random.gmt_unix_time = time ( NULL ); + if ( ( rc = tls_generate_random ( tls, &tls->client_random.random, + ( sizeof ( tls->client_random.random ) ) ) ) != 0 ) { + goto err_random; + } + tls->pre_master_secret.version = htons ( tls->version ); + if ( ( rc = tls_generate_random ( tls, &tls->pre_master_secret.random, + ( sizeof ( tls->pre_master_secret.random ) ) ) ) != 0 ) { + goto err_random; + } + digest_init ( &md5_sha1_algorithm, tls->handshake_md5_sha1_ctx ); + digest_init ( &sha256_algorithm, tls->handshake_sha256_ctx ); + tls->handshake_digest = &sha256_algorithm; + tls->handshake_ctx = tls->handshake_sha256_ctx; + tls->tx_pending = TLS_TX_CLIENT_HELLO; + iob_populate ( &tls->rx_header_iobuf, &tls->rx_header, 0, + sizeof ( tls->rx_header ) ); + INIT_LIST_HEAD ( &tls->rx_data ); + + /* Add pending operations for server and client Finished messages */ + pending_get ( &tls->client_negotiation ); + pending_get ( &tls->server_negotiation ); + + /* Attach to parent interface, mortalise self, and return */ + intf_plug_plug ( &tls->plainstream, xfer ); + *next = &tls->cipherstream; + ref_put ( &tls->refcnt ); + return 0; + + err_random: + ref_put ( &tls->refcnt ); + err_alloc: + return rc; +} diff --git a/qemu/roms/ipxe/src/net/udp.c b/qemu/roms/ipxe/src/net/udp.c new file mode 100644 index 000000000..76da67ecf --- /dev/null +++ b/qemu/roms/ipxe/src/net/udp.c @@ -0,0 +1,440 @@ +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <byteswap.h> +#include <errno.h> +#include <ipxe/tcpip.h> +#include <ipxe/iobuf.h> +#include <ipxe/xfer.h> +#include <ipxe/open.h> +#include <ipxe/uri.h> +#include <ipxe/netdevice.h> +#include <ipxe/udp.h> + +/** @file + * + * UDP protocol + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +/** + * A UDP connection + * + */ +struct udp_connection { + /** Reference counter */ + struct refcnt refcnt; + /** List of UDP connections */ + struct list_head list; + + /** Data transfer interface */ + struct interface xfer; + + /** Local socket address */ + struct sockaddr_tcpip local; + /** Remote socket address */ + struct sockaddr_tcpip peer; +}; + +/** + * List of registered UDP connections + */ +static LIST_HEAD ( udp_conns ); + +/* Forward declatations */ +static struct interface_descriptor udp_xfer_desc; +struct tcpip_protocol udp_protocol __tcpip_protocol; + +/** + * Check if local UDP port is available + * + * @v port Local port number + * @ret port Local port number, or negative error + */ +static int udp_port_available ( int port ) { + struct udp_connection *udp; + + list_for_each_entry ( udp, &udp_conns, list ) { + if ( udp->local.st_port == htons ( port ) ) + return -EADDRINUSE; + } + return port; +} + +/** + * Open a UDP connection + * + * @v xfer Data transfer interface + * @v peer Peer socket address, or NULL + * @v local Local socket address, or NULL + * @v promisc Socket is promiscuous + * @ret rc Return status code + */ +static int udp_open_common ( struct interface *xfer, + struct sockaddr *peer, struct sockaddr *local, + int promisc ) { + struct sockaddr_tcpip *st_peer = ( struct sockaddr_tcpip * ) peer; + struct sockaddr_tcpip *st_local = ( struct sockaddr_tcpip * ) local; + struct udp_connection *udp; + int port; + int rc; + + /* Allocate and initialise structure */ + udp = zalloc ( sizeof ( *udp ) ); + if ( ! udp ) + return -ENOMEM; + DBGC ( udp, "UDP %p allocated\n", udp ); + ref_init ( &udp->refcnt, NULL ); + intf_init ( &udp->xfer, &udp_xfer_desc, &udp->refcnt ); + if ( st_peer ) + memcpy ( &udp->peer, st_peer, sizeof ( udp->peer ) ); + if ( st_local ) + memcpy ( &udp->local, st_local, sizeof ( udp->local ) ); + + /* Bind to local port */ + if ( ! promisc ) { + port = tcpip_bind ( st_local, udp_port_available ); + if ( port < 0 ) { + rc = port; + DBGC ( udp, "UDP %p could not bind: %s\n", + udp, strerror ( rc ) ); + goto err; + } + udp->local.st_port = htons ( port ); + DBGC ( udp, "UDP %p bound to port %d\n", + udp, ntohs ( udp->local.st_port ) ); + } + + /* Attach parent interface, transfer reference to connection + * list and return + */ + intf_plug_plug ( &udp->xfer, xfer ); + list_add ( &udp->list, &udp_conns ); + return 0; + + err: + ref_put ( &udp->refcnt ); + return rc; +} + +/** + * Open a UDP connection + * + * @v xfer Data transfer interface + * @v peer Peer socket address + * @v local Local socket address, or NULL + * @ret rc Return status code + */ +int udp_open ( struct interface *xfer, struct sockaddr *peer, + struct sockaddr *local ) { + return udp_open_common ( xfer, peer, local, 0 ); +} + +/** + * Open a promiscuous UDP connection + * + * @v xfer Data transfer interface + * @ret rc Return status code + * + * Promiscuous UDP connections are required in order to support the + * PXE API. + */ +int udp_open_promisc ( struct interface *xfer ) { + return udp_open_common ( xfer, NULL, NULL, 1 ); +} + +/** + * Close a UDP connection + * + * @v udp UDP connection + * @v rc Reason for close + */ +static void udp_close ( struct udp_connection *udp, int rc ) { + + /* Close data transfer interface */ + intf_shutdown ( &udp->xfer, rc ); + + /* Remove from list of connections and drop list's reference */ + list_del ( &udp->list ); + ref_put ( &udp->refcnt ); + + DBGC ( udp, "UDP %p closed\n", udp ); +} + +/** + * Transmit data via a UDP connection to a specified address + * + * @v udp UDP connection + * @v iobuf I/O buffer + * @v src Source address, or NULL to use default + * @v dest Destination address, or NULL to use default + * @v netdev Network device, or NULL to use default + * @ret rc Return status code + */ +static int udp_tx ( struct udp_connection *udp, struct io_buffer *iobuf, + struct sockaddr_tcpip *src, struct sockaddr_tcpip *dest, + struct net_device *netdev ) { + struct udp_header *udphdr; + size_t len; + int rc; + + /* Check we can accommodate the header */ + if ( ( rc = iob_ensure_headroom ( iobuf, + MAX_LL_NET_HEADER_LEN ) ) != 0 ) { + free_iob ( iobuf ); + return rc; + } + + /* Fill in default values if not explicitly provided */ + if ( ! src ) + src = &udp->local; + if ( ! dest ) + dest = &udp->peer; + + /* Add the UDP header */ + udphdr = iob_push ( iobuf, sizeof ( *udphdr ) ); + len = iob_len ( iobuf ); + udphdr->dest = dest->st_port; + udphdr->src = src->st_port; + udphdr->len = htons ( len ); + udphdr->chksum = 0; + udphdr->chksum = tcpip_chksum ( udphdr, len ); + + /* Dump debugging information */ + DBGC2 ( udp, "UDP %p TX %d->%d len %d\n", udp, + ntohs ( udphdr->src ), ntohs ( udphdr->dest ), + ntohs ( udphdr->len ) ); + + /* Send it to the next layer for processing */ + if ( ( rc = tcpip_tx ( iobuf, &udp_protocol, src, dest, netdev, + &udphdr->chksum ) ) != 0 ) { + DBGC ( udp, "UDP %p could not transmit packet: %s\n", + udp, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Identify UDP connection by local address + * + * @v local Local address + * @ret udp UDP connection, or NULL + */ +static struct udp_connection * udp_demux ( struct sockaddr_tcpip *local ) { + static const struct sockaddr_tcpip empty_sockaddr = { .pad = { 0, } }; + struct udp_connection *udp; + + list_for_each_entry ( udp, &udp_conns, list ) { + if ( ( ( udp->local.st_family == local->st_family ) || + ( udp->local.st_family == 0 ) ) && + ( ( udp->local.st_port == local->st_port ) || + ( udp->local.st_port == 0 ) ) && + ( ( memcmp ( udp->local.pad, local->pad, + sizeof ( udp->local.pad ) ) == 0 ) || + ( memcmp ( udp->local.pad, empty_sockaddr.pad, + sizeof ( udp->local.pad ) ) == 0 ) ) ) { + return udp; + } + } + return NULL; +} + +/** + * Process a received packet + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v st_src Partially-filled source address + * @v st_dest Partially-filled destination address + * @v pshdr_csum Pseudo-header checksum + * @ret rc Return status code + */ +static int udp_rx ( struct io_buffer *iobuf, + struct net_device *netdev __unused, + struct sockaddr_tcpip *st_src, + struct sockaddr_tcpip *st_dest, uint16_t pshdr_csum ) { + struct udp_header *udphdr = iobuf->data; + struct udp_connection *udp; + struct xfer_metadata meta; + size_t ulen; + unsigned int csum; + int rc = 0; + + /* Sanity check packet */ + if ( iob_len ( iobuf ) < sizeof ( *udphdr ) ) { + DBG ( "UDP packet too short at %zd bytes (min %zd bytes)\n", + iob_len ( iobuf ), sizeof ( *udphdr ) ); + + rc = -EINVAL; + goto done; + } + ulen = ntohs ( udphdr->len ); + if ( ulen < sizeof ( *udphdr ) ) { + DBG ( "UDP length too short at %zd bytes " + "(header is %zd bytes)\n", ulen, sizeof ( *udphdr ) ); + rc = -EINVAL; + goto done; + } + if ( ulen > iob_len ( iobuf ) ) { + DBG ( "UDP length too long at %zd bytes (packet is %zd " + "bytes)\n", ulen, iob_len ( iobuf ) ); + rc = -EINVAL; + goto done; + } + if ( udphdr->chksum ) { + csum = tcpip_continue_chksum ( pshdr_csum, iobuf->data, ulen ); + if ( csum != 0 ) { + DBG ( "UDP checksum incorrect (is %04x including " + "checksum field, should be 0000)\n", csum ); + rc = -EINVAL; + goto done; + } + } + + /* Parse parameters from header and strip header */ + st_src->st_port = udphdr->src; + st_dest->st_port = udphdr->dest; + udp = udp_demux ( st_dest ); + iob_unput ( iobuf, ( iob_len ( iobuf ) - ulen ) ); + iob_pull ( iobuf, sizeof ( *udphdr ) ); + + /* Dump debugging information */ + DBGC2 ( udp, "UDP %p RX %d<-%d len %zd\n", udp, + ntohs ( udphdr->dest ), ntohs ( udphdr->src ), ulen ); + + /* Ignore if no matching connection found */ + if ( ! udp ) { + DBG ( "No UDP connection listening on port %d\n", + ntohs ( udphdr->dest ) ); + rc = -ENOTCONN; + goto done; + } + + /* Pass data to application */ + memset ( &meta, 0, sizeof ( meta ) ); + meta.src = ( struct sockaddr * ) st_src; + meta.dest = ( struct sockaddr * ) st_dest; + rc = xfer_deliver ( &udp->xfer, iob_disown ( iobuf ), &meta ); + + done: + free_iob ( iobuf ); + return rc; +} + +struct tcpip_protocol udp_protocol __tcpip_protocol = { + .name = "UDP", + .rx = udp_rx, + .tcpip_proto = IP_UDP, +}; + +/*************************************************************************** + * + * Data transfer interface + * + *************************************************************************** + */ + +/** + * Allocate I/O buffer for UDP + * + * @v udp UDP connection + * @v len Payload size + * @ret iobuf I/O buffer, or NULL + */ +static struct io_buffer * udp_xfer_alloc_iob ( struct udp_connection *udp, + size_t len ) { + struct io_buffer *iobuf; + + iobuf = alloc_iob ( MAX_LL_NET_HEADER_LEN + len ); + if ( ! iobuf ) { + DBGC ( udp, "UDP %p cannot allocate buffer of length %zd\n", + udp, len ); + return NULL; + } + iob_reserve ( iobuf, MAX_LL_NET_HEADER_LEN ); + return iobuf; +} + +/** + * Deliver datagram as I/O buffer + * + * @v udp UDP connection + * @v iobuf Datagram I/O buffer + * @v meta Data transfer metadata + * @ret rc Return status code + */ +static int udp_xfer_deliver ( struct udp_connection *udp, + struct io_buffer *iobuf, + struct xfer_metadata *meta ) { + + /* Transmit data, if possible */ + return udp_tx ( udp, iobuf, ( ( struct sockaddr_tcpip * ) meta->src ), + ( ( struct sockaddr_tcpip * ) meta->dest ), + meta->netdev ); +} + +/** UDP data transfer interface operations */ +static struct interface_operation udp_xfer_operations[] = { + INTF_OP ( xfer_deliver, struct udp_connection *, udp_xfer_deliver ), + INTF_OP ( xfer_alloc_iob, struct udp_connection *, udp_xfer_alloc_iob ), + INTF_OP ( intf_close, struct udp_connection *, udp_close ), +}; + +/** UDP data transfer interface descriptor */ +static struct interface_descriptor udp_xfer_desc = + INTF_DESC ( struct udp_connection, xfer, udp_xfer_operations ); + +/*************************************************************************** + * + * Openers + * + *************************************************************************** + */ + +/** UDP IPv4 socket opener */ +struct socket_opener udp_ipv4_socket_opener __socket_opener = { + .semantics = UDP_SOCK_DGRAM, + .family = AF_INET, + .open = udp_open, +}; + +/** UDP IPv6 socket opener */ +struct socket_opener udp_ipv6_socket_opener __socket_opener = { + .semantics = UDP_SOCK_DGRAM, + .family = AF_INET6, + .open = udp_open, +}; + +/** Linkage hack */ +int udp_sock_dgram = UDP_SOCK_DGRAM; + +/** + * Open UDP URI + * + * @v xfer Data transfer interface + * @v uri URI + * @ret rc Return status code + */ +static int udp_open_uri ( struct interface *xfer, struct uri *uri ) { + struct sockaddr_tcpip peer; + + /* Sanity check */ + if ( ! uri->host ) + return -EINVAL; + + memset ( &peer, 0, sizeof ( peer ) ); + peer.st_port = htons ( uri_port ( uri, 0 ) ); + return xfer_open_named_socket ( xfer, SOCK_DGRAM, + ( struct sockaddr * ) &peer, + uri->host, NULL ); +} + +/** UDP URI opener */ +struct uri_opener udp_uri_opener __uri_opener = { + .scheme = "udp", + .open = udp_open_uri, +}; diff --git a/qemu/roms/ipxe/src/net/udp/dhcp.c b/qemu/roms/ipxe/src/net/udp/dhcp.c new file mode 100644 index 000000000..04fad04c2 --- /dev/null +++ b/qemu/roms/ipxe/src/net/udp/dhcp.c @@ -0,0 +1,1446 @@ +/* + * Copyright (C) 2006 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <ctype.h> +#include <errno.h> +#include <assert.h> +#include <byteswap.h> +#include <ipxe/if_ether.h> +#include <ipxe/iobuf.h> +#include <ipxe/netdevice.h> +#include <ipxe/device.h> +#include <ipxe/xfer.h> +#include <ipxe/open.h> +#include <ipxe/job.h> +#include <ipxe/retry.h> +#include <ipxe/tcpip.h> +#include <ipxe/ip.h> +#include <ipxe/uuid.h> +#include <ipxe/timer.h> +#include <ipxe/settings.h> +#include <ipxe/dhcp.h> +#include <ipxe/dhcpopts.h> +#include <ipxe/dhcppkt.h> +#include <ipxe/dhcp_arch.h> +#include <ipxe/features.h> + +/** @file + * + * Dynamic Host Configuration Protocol + * + */ + +struct dhcp_session; +static int dhcp_tx ( struct dhcp_session *dhcp ); + +/** + * DHCP operation types + * + * This table maps from DHCP message types (i.e. values of the @c + * DHCP_MESSAGE_TYPE option) to values of the "op" field within a DHCP + * packet. + */ +static const uint8_t dhcp_op[] = { + [DHCPDISCOVER] = BOOTP_REQUEST, + [DHCPOFFER] = BOOTP_REPLY, + [DHCPREQUEST] = BOOTP_REQUEST, + [DHCPDECLINE] = BOOTP_REQUEST, + [DHCPACK] = BOOTP_REPLY, + [DHCPNAK] = BOOTP_REPLY, + [DHCPRELEASE] = BOOTP_REQUEST, + [DHCPINFORM] = BOOTP_REQUEST, +}; + +/** Raw option data for options common to all DHCP requests */ +static uint8_t dhcp_request_options_data[] = { + DHCP_MESSAGE_TYPE, DHCP_BYTE ( 0 ), + DHCP_MAX_MESSAGE_SIZE, + DHCP_WORD ( ETH_MAX_MTU - 20 /* IP header */ - 8 /* UDP header */ ), + DHCP_CLIENT_ARCHITECTURE, DHCP_ARCH_CLIENT_ARCHITECTURE, + DHCP_CLIENT_NDI, DHCP_ARCH_CLIENT_NDI, + DHCP_VENDOR_CLASS_ID, DHCP_ARCH_VENDOR_CLASS_ID, + DHCP_USER_CLASS_ID, DHCP_STRING ( 'i', 'P', 'X', 'E' ), + DHCP_PARAMETER_REQUEST_LIST, + DHCP_OPTION ( DHCP_SUBNET_MASK, DHCP_ROUTERS, DHCP_DNS_SERVERS, + DHCP_LOG_SERVERS, DHCP_HOST_NAME, DHCP_DOMAIN_NAME, + DHCP_ROOT_PATH, DHCP_VENDOR_ENCAP, DHCP_VENDOR_CLASS_ID, + DHCP_TFTP_SERVER_NAME, DHCP_BOOTFILE_NAME, + DHCP_DOMAIN_SEARCH, + 128, 129, 130, 131, 132, 133, 134, 135, /* for PXE */ + DHCP_EB_ENCAP, DHCP_ISCSI_INITIATOR_IQN ), + DHCP_END +}; + +/** DHCP server address setting */ +const struct setting dhcp_server_setting __setting ( SETTING_MISC, + dhcp-server ) = { + .name = "dhcp-server", + .description = "DHCP server", + .tag = DHCP_SERVER_IDENTIFIER, + .type = &setting_type_ipv4, +}; + +/** + * Most recent DHCP transaction ID + * + * This is exposed for use by the fakedhcp code when reconstructing + * DHCP packets for PXE NBPs. + */ +uint32_t dhcp_last_xid; + +/** + * Name a DHCP packet type + * + * @v msgtype DHCP message type + * @ret string DHCP mesasge type name + */ +static inline const char * dhcp_msgtype_name ( unsigned int msgtype ) { + switch ( msgtype ) { + case DHCPNONE: return "BOOTP"; /* Non-DHCP packet */ + case DHCPDISCOVER: return "DHCPDISCOVER"; + case DHCPOFFER: return "DHCPOFFER"; + case DHCPREQUEST: return "DHCPREQUEST"; + case DHCPDECLINE: return "DHCPDECLINE"; + case DHCPACK: return "DHCPACK"; + case DHCPNAK: return "DHCPNAK"; + case DHCPRELEASE: return "DHCPRELEASE"; + case DHCPINFORM: return "DHCPINFORM"; + default: return "DHCP<invalid>"; + } +} + +/**************************************************************************** + * + * DHCP session + * + */ + +struct dhcp_session; + +/** DHCP session state operations */ +struct dhcp_session_state { + /** State name */ + const char *name; + /** + * Construct transmitted packet + * + * @v dhcp DHCP session + * @v dhcppkt DHCP packet + * @v peer Destination address + */ + int ( * tx ) ( struct dhcp_session *dhcp, + struct dhcp_packet *dhcppkt, + struct sockaddr_in *peer ); + /** Handle received packet + * + * @v dhcp DHCP session + * @v dhcppkt DHCP packet + * @v peer DHCP server address + * @v msgtype DHCP message type + * @v server_id DHCP server ID + */ + void ( * rx ) ( struct dhcp_session *dhcp, + struct dhcp_packet *dhcppkt, + struct sockaddr_in *peer, + uint8_t msgtype, struct in_addr server_id ); + /** Handle timer expiry + * + * @v dhcp DHCP session + */ + void ( * expired ) ( struct dhcp_session *dhcp ); + /** Transmitted message type */ + uint8_t tx_msgtype; + /** Apply minimum timeout */ + uint8_t apply_min_timeout; +}; + +static struct dhcp_session_state dhcp_state_discover; +static struct dhcp_session_state dhcp_state_request; +static struct dhcp_session_state dhcp_state_proxy; +static struct dhcp_session_state dhcp_state_pxebs; + +/** A DHCP session */ +struct dhcp_session { + /** Reference counter */ + struct refcnt refcnt; + /** Job control interface */ + struct interface job; + /** Data transfer interface */ + struct interface xfer; + + /** Network device being configured */ + struct net_device *netdev; + /** Local socket address */ + struct sockaddr_in local; + /** State of the session */ + struct dhcp_session_state *state; + /** Transaction ID (in network-endian order) */ + uint32_t xid; + + /** Offered IP address */ + struct in_addr offer; + /** DHCP server */ + struct in_addr server; + /** DHCP offer priority */ + int priority; + + /** ProxyDHCP protocol extensions should be ignored */ + int no_pxedhcp; + /** ProxyDHCP server */ + struct in_addr proxy_server; + /** ProxyDHCP offer */ + struct dhcp_packet *proxy_offer; + /** ProxyDHCP offer priority */ + int proxy_priority; + + /** PXE Boot Server type */ + uint16_t pxe_type; + /** List of PXE Boot Servers to attempt */ + struct in_addr *pxe_attempt; + /** List of PXE Boot Servers to accept */ + struct in_addr *pxe_accept; + + /** Retransmission timer */ + struct retry_timer timer; + /** Transmission counter */ + unsigned int count; + /** Start time of the current state (in ticks) */ + unsigned long start; +}; + +/** + * Free DHCP session + * + * @v refcnt Reference counter + */ +static void dhcp_free ( struct refcnt *refcnt ) { + struct dhcp_session *dhcp = + container_of ( refcnt, struct dhcp_session, refcnt ); + + netdev_put ( dhcp->netdev ); + dhcppkt_put ( dhcp->proxy_offer ); + free ( dhcp ); +} + +/** + * Mark DHCP session as complete + * + * @v dhcp DHCP session + * @v rc Return status code + */ +static void dhcp_finished ( struct dhcp_session *dhcp, int rc ) { + + /* Stop retry timer */ + stop_timer ( &dhcp->timer ); + + /* Shut down interfaces */ + intf_shutdown ( &dhcp->xfer, rc ); + intf_shutdown ( &dhcp->job, rc ); +} + +/** + * Transition to new DHCP session state + * + * @v dhcp DHCP session + * @v state New session state + */ +static void dhcp_set_state ( struct dhcp_session *dhcp, + struct dhcp_session_state *state ) { + + DBGC ( dhcp, "DHCP %p entering %s state\n", dhcp, state->name ); + dhcp->state = state; + dhcp->start = currticks(); + stop_timer ( &dhcp->timer ); + dhcp->timer.min_timeout = + ( state->apply_min_timeout ? DHCP_MIN_TIMEOUT : 0 ); + dhcp->timer.max_timeout = DHCP_MAX_TIMEOUT; + start_timer_nodelay ( &dhcp->timer ); +} + +/** + * Check if DHCP packet contains PXE options + * + * @v dhcppkt DHCP packet + * @ret has_pxeopts DHCP packet contains PXE options + * + * It is assumed that the packet is already known to contain option 60 + * set to "PXEClient". + */ +static int dhcp_has_pxeopts ( struct dhcp_packet *dhcppkt ) { + + /* Check for a boot filename */ + if ( dhcppkt_fetch ( dhcppkt, DHCP_BOOTFILE_NAME, NULL, 0 ) > 0 ) + return 1; + + /* Check for a PXE boot menu */ + if ( dhcppkt_fetch ( dhcppkt, DHCP_PXE_BOOT_MENU, NULL, 0 ) > 0 ) + return 1; + + return 0; +} + +/**************************************************************************** + * + * DHCP state machine + * + */ + +/** + * Construct transmitted packet for DHCP discovery + * + * @v dhcp DHCP session + * @v dhcppkt DHCP packet + * @v peer Destination address + */ +static int dhcp_discovery_tx ( struct dhcp_session *dhcp, + struct dhcp_packet *dhcppkt __unused, + struct sockaddr_in *peer ) { + + DBGC ( dhcp, "DHCP %p DHCPDISCOVER\n", dhcp ); + + /* Set server address */ + peer->sin_addr.s_addr = INADDR_BROADCAST; + peer->sin_port = htons ( BOOTPS_PORT ); + + return 0; +} + +/** + * Handle received packet during DHCP discovery + * + * @v dhcp DHCP session + * @v dhcppkt DHCP packet + * @v peer DHCP server address + * @v msgtype DHCP message type + * @v server_id DHCP server ID + */ +static void dhcp_discovery_rx ( struct dhcp_session *dhcp, + struct dhcp_packet *dhcppkt, + struct sockaddr_in *peer, uint8_t msgtype, + struct in_addr server_id ) { + struct in_addr ip; + char vci[9]; /* "PXEClient" */ + int vci_len; + int has_pxeclient; + int8_t priority = 0; + uint8_t no_pxedhcp = 0; + unsigned long elapsed; + + DBGC ( dhcp, "DHCP %p %s from %s:%d", dhcp, + dhcp_msgtype_name ( msgtype ), inet_ntoa ( peer->sin_addr ), + ntohs ( peer->sin_port ) ); + if ( server_id.s_addr != peer->sin_addr.s_addr ) + DBGC ( dhcp, " (%s)", inet_ntoa ( server_id ) ); + + /* Identify offered IP address */ + ip = dhcppkt->dhcphdr->yiaddr; + if ( ip.s_addr ) + DBGC ( dhcp, " for %s", inet_ntoa ( ip ) ); + + /* Identify "PXEClient" vendor class */ + vci_len = dhcppkt_fetch ( dhcppkt, DHCP_VENDOR_CLASS_ID, + vci, sizeof ( vci ) ); + has_pxeclient = ( ( vci_len >= ( int ) sizeof ( vci ) ) && + ( strncmp ( "PXEClient", vci, sizeof (vci) ) == 0 )); + if ( has_pxeclient ) { + DBGC ( dhcp, "%s", + ( dhcp_has_pxeopts ( dhcppkt ) ? " pxe" : " proxy" ) ); + } + + /* Identify priority */ + dhcppkt_fetch ( dhcppkt, DHCP_EB_PRIORITY, &priority, + sizeof ( priority ) ); + if ( priority ) + DBGC ( dhcp, " pri %d", priority ); + + /* Identify ignore-PXE flag */ + dhcppkt_fetch ( dhcppkt, DHCP_EB_NO_PXEDHCP, &no_pxedhcp, + sizeof ( no_pxedhcp ) ); + if ( no_pxedhcp ) + DBGC ( dhcp, " nopxe" ); + DBGC ( dhcp, "\n" ); + + /* Select as DHCP offer, if applicable */ + if ( ip.s_addr && ( peer->sin_port == htons ( BOOTPS_PORT ) ) && + ( ( msgtype == DHCPOFFER ) || ( ! msgtype /* BOOTP */ ) ) && + ( priority >= dhcp->priority ) ) { + dhcp->offer = ip; + dhcp->server = server_id; + dhcp->priority = priority; + dhcp->no_pxedhcp = no_pxedhcp; + } + + /* Select as ProxyDHCP offer, if applicable */ + if ( server_id.s_addr && has_pxeclient && + ( priority >= dhcp->proxy_priority ) ) { + dhcppkt_put ( dhcp->proxy_offer ); + dhcp->proxy_server = server_id; + dhcp->proxy_offer = dhcppkt_get ( dhcppkt ); + dhcp->proxy_priority = priority; + } + + /* We can exit the discovery state when we have a valid + * DHCPOFFER, and either: + * + * o The DHCPOFFER instructs us to ignore ProxyDHCPOFFERs, or + * o We have a valid ProxyDHCPOFFER, or + * o We have allowed sufficient time for ProxyDHCPOFFERs. + */ + + /* If we don't yet have a DHCPOFFER, do nothing */ + if ( ! dhcp->offer.s_addr ) + return; + + /* If we can't yet transition to DHCPREQUEST, do nothing */ + elapsed = ( currticks() - dhcp->start ); + if ( ! ( dhcp->no_pxedhcp || dhcp->proxy_offer || + ( elapsed > PROXYDHCP_MAX_TIMEOUT ) ) ) + return; + + /* Transition to DHCPREQUEST */ + dhcp_set_state ( dhcp, &dhcp_state_request ); +} + +/** + * Handle timer expiry during DHCP discovery + * + * @v dhcp DHCP session + */ +static void dhcp_discovery_expired ( struct dhcp_session *dhcp ) { + unsigned long elapsed = ( currticks() - dhcp->start ); + + /* Give up waiting for ProxyDHCP before we reach the failure point */ + if ( dhcp->offer.s_addr && ( elapsed > PROXYDHCP_MAX_TIMEOUT ) ) { + dhcp_set_state ( dhcp, &dhcp_state_request ); + return; + } + + /* Otherwise, retransmit current packet */ + dhcp_tx ( dhcp ); +} + +/** DHCP discovery state operations */ +static struct dhcp_session_state dhcp_state_discover = { + .name = "discovery", + .tx = dhcp_discovery_tx, + .rx = dhcp_discovery_rx, + .expired = dhcp_discovery_expired, + .tx_msgtype = DHCPDISCOVER, + .apply_min_timeout = 1, +}; + +/** + * Construct transmitted packet for DHCP request + * + * @v dhcp DHCP session + * @v dhcppkt DHCP packet + * @v peer Destination address + */ +static int dhcp_request_tx ( struct dhcp_session *dhcp, + struct dhcp_packet *dhcppkt, + struct sockaddr_in *peer ) { + int rc; + + DBGC ( dhcp, "DHCP %p DHCPREQUEST to %s:%d", + dhcp, inet_ntoa ( dhcp->server ), BOOTPS_PORT ); + DBGC ( dhcp, " for %s\n", inet_ntoa ( dhcp->offer ) ); + + /* Set server ID */ + if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_SERVER_IDENTIFIER, + &dhcp->server, + sizeof ( dhcp->server ) ) ) != 0 ) + return rc; + + /* Set requested IP address */ + if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_REQUESTED_ADDRESS, + &dhcp->offer, + sizeof ( dhcp->offer ) ) ) != 0 ) + return rc; + + /* Set server address */ + peer->sin_addr.s_addr = INADDR_BROADCAST; + peer->sin_port = htons ( BOOTPS_PORT ); + + return 0; +} + +/** + * Handle received packet during DHCP request + * + * @v dhcp DHCP session + * @v dhcppkt DHCP packet + * @v peer DHCP server address + * @v msgtype DHCP message type + * @v server_id DHCP server ID + */ +static void dhcp_request_rx ( struct dhcp_session *dhcp, + struct dhcp_packet *dhcppkt, + struct sockaddr_in *peer, uint8_t msgtype, + struct in_addr server_id ) { + struct in_addr ip; + struct settings *parent; + struct settings *settings; + int rc; + + DBGC ( dhcp, "DHCP %p %s from %s:%d", dhcp, + dhcp_msgtype_name ( msgtype ), inet_ntoa ( peer->sin_addr ), + ntohs ( peer->sin_port ) ); + if ( server_id.s_addr != peer->sin_addr.s_addr ) + DBGC ( dhcp, " (%s)", inet_ntoa ( server_id ) ); + + /* Identify leased IP address */ + ip = dhcppkt->dhcphdr->yiaddr; + if ( ip.s_addr ) + DBGC ( dhcp, " for %s", inet_ntoa ( ip ) ); + DBGC ( dhcp, "\n" ); + + /* Filter out unacceptable responses */ + if ( peer->sin_port != htons ( BOOTPS_PORT ) ) + return; + if ( msgtype /* BOOTP */ && ( msgtype != DHCPACK ) ) + return; + if ( server_id.s_addr != dhcp->server.s_addr ) + return; + if ( ip.s_addr != dhcp->offer.s_addr ) + return; + + /* Record assigned address */ + dhcp->local.sin_addr = ip; + + /* Register settings */ + parent = netdev_settings ( dhcp->netdev ); + settings = &dhcppkt->settings; + if ( ( rc = register_settings ( settings, parent, + DHCP_SETTINGS_NAME ) ) != 0 ) { + DBGC ( dhcp, "DHCP %p could not register settings: %s\n", + dhcp, strerror ( rc ) ); + dhcp_finished ( dhcp, rc ); + return; + } + + /* Perform ProxyDHCP if applicable */ + if ( dhcp->proxy_offer /* Have ProxyDHCP offer */ && + ( ! dhcp->no_pxedhcp ) /* ProxyDHCP not disabled */ ) { + if ( dhcp_has_pxeopts ( dhcp->proxy_offer ) ) { + /* PXE options already present; register settings + * without performing a ProxyDHCPREQUEST + */ + settings = &dhcp->proxy_offer->settings; + if ( ( rc = register_settings ( settings, NULL, + PROXYDHCP_SETTINGS_NAME ) ) != 0 ) { + DBGC ( dhcp, "DHCP %p could not register " + "proxy settings: %s\n", + dhcp, strerror ( rc ) ); + dhcp_finished ( dhcp, rc ); + return; + } + } else { + /* PXE options not present; use a ProxyDHCPREQUEST */ + dhcp_set_state ( dhcp, &dhcp_state_proxy ); + return; + } + } + + /* Terminate DHCP */ + dhcp_finished ( dhcp, 0 ); +} + +/** + * Handle timer expiry during DHCP discovery + * + * @v dhcp DHCP session + */ +static void dhcp_request_expired ( struct dhcp_session *dhcp ) { + + /* Retransmit current packet */ + dhcp_tx ( dhcp ); +} + +/** DHCP request state operations */ +static struct dhcp_session_state dhcp_state_request = { + .name = "request", + .tx = dhcp_request_tx, + .rx = dhcp_request_rx, + .expired = dhcp_request_expired, + .tx_msgtype = DHCPREQUEST, + .apply_min_timeout = 0, +}; + +/** + * Construct transmitted packet for ProxyDHCP request + * + * @v dhcp DHCP session + * @v dhcppkt DHCP packet + * @v peer Destination address + */ +static int dhcp_proxy_tx ( struct dhcp_session *dhcp, + struct dhcp_packet *dhcppkt, + struct sockaddr_in *peer ) { + int rc; + + DBGC ( dhcp, "DHCP %p ProxyDHCP REQUEST to %s\n", dhcp, + inet_ntoa ( dhcp->proxy_server ) ); + + /* Set server ID */ + if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_SERVER_IDENTIFIER, + &dhcp->proxy_server, + sizeof ( dhcp->proxy_server ) ) ) != 0 ) + return rc; + + /* Set server address */ + peer->sin_addr = dhcp->proxy_server; + peer->sin_port = htons ( PXE_PORT ); + + return 0; +} + +/** + * Handle received packet during ProxyDHCP request + * + * @v dhcp DHCP session + * @v dhcppkt DHCP packet + * @v peer DHCP server address + * @v msgtype DHCP message type + * @v server_id DHCP server ID + */ +static void dhcp_proxy_rx ( struct dhcp_session *dhcp, + struct dhcp_packet *dhcppkt, + struct sockaddr_in *peer, uint8_t msgtype, + struct in_addr server_id ) { + struct settings *settings = &dhcppkt->settings; + int rc; + + DBGC ( dhcp, "DHCP %p %s from %s:%d", dhcp, + dhcp_msgtype_name ( msgtype ), inet_ntoa ( peer->sin_addr ), + ntohs ( peer->sin_port ) ); + if ( server_id.s_addr != peer->sin_addr.s_addr ) + DBGC ( dhcp, " (%s)", inet_ntoa ( server_id ) ); + DBGC ( dhcp, "\n" ); + + /* Filter out unacceptable responses */ + if ( peer->sin_port != ntohs ( PXE_PORT ) ) + return; + if ( ( msgtype != DHCPOFFER ) && ( msgtype != DHCPACK ) ) + return; + if ( server_id.s_addr /* Linux PXE server omits server ID */ && + ( server_id.s_addr != dhcp->proxy_server.s_addr ) ) + return; + + /* Register settings */ + if ( ( rc = register_settings ( settings, NULL, + PROXYDHCP_SETTINGS_NAME ) ) != 0 ) { + DBGC ( dhcp, "DHCP %p could not register proxy settings: %s\n", + dhcp, strerror ( rc ) ); + dhcp_finished ( dhcp, rc ); + return; + } + + /* Terminate DHCP */ + dhcp_finished ( dhcp, 0 ); +} + +/** + * Handle timer expiry during ProxyDHCP request + * + * @v dhcp DHCP session + */ +static void dhcp_proxy_expired ( struct dhcp_session *dhcp ) { + unsigned long elapsed = ( currticks() - dhcp->start ); + + /* Give up waiting for ProxyDHCP before we reach the failure point */ + if ( elapsed > PROXYDHCP_MAX_TIMEOUT ) { + dhcp_finished ( dhcp, 0 ); + return; + } + + /* Retransmit current packet */ + dhcp_tx ( dhcp ); +} + +/** ProxyDHCP request state operations */ +static struct dhcp_session_state dhcp_state_proxy = { + .name = "ProxyDHCP", + .tx = dhcp_proxy_tx, + .rx = dhcp_proxy_rx, + .expired = dhcp_proxy_expired, + .tx_msgtype = DHCPREQUEST, + .apply_min_timeout = 0, +}; + +/** + * Construct transmitted packet for PXE Boot Server Discovery + * + * @v dhcp DHCP session + * @v dhcppkt DHCP packet + * @v peer Destination address + */ +static int dhcp_pxebs_tx ( struct dhcp_session *dhcp, + struct dhcp_packet *dhcppkt, + struct sockaddr_in *peer ) { + struct dhcp_pxe_boot_menu_item menu_item = { 0, 0 }; + int rc; + + /* Set server address */ + peer->sin_addr = *(dhcp->pxe_attempt); + peer->sin_port = ( ( peer->sin_addr.s_addr == INADDR_BROADCAST ) ? + htons ( BOOTPS_PORT ) : htons ( PXE_PORT ) ); + + DBGC ( dhcp, "DHCP %p PXEBS REQUEST to %s:%d for type %d\n", + dhcp, inet_ntoa ( peer->sin_addr ), ntohs ( peer->sin_port ), + le16_to_cpu ( dhcp->pxe_type ) ); + + /* Set boot menu item */ + menu_item.type = dhcp->pxe_type; + if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_PXE_BOOT_MENU_ITEM, + &menu_item, sizeof ( menu_item ) ) ) != 0 ) + return rc; + + return 0; +} + +/** + * Check to see if PXE Boot Server address is acceptable + * + * @v dhcp DHCP session + * @v bs Boot Server address + * @ret accept Boot Server is acceptable + */ +static int dhcp_pxebs_accept ( struct dhcp_session *dhcp, + struct in_addr bs ) { + struct in_addr *accept; + + /* Accept if we have no acceptance filter */ + if ( ! dhcp->pxe_accept ) + return 1; + + /* Scan through acceptance list */ + for ( accept = dhcp->pxe_accept ; accept->s_addr ; accept++ ) { + if ( accept->s_addr == bs.s_addr ) + return 1; + } + + DBGC ( dhcp, "DHCP %p rejecting server %s\n", + dhcp, inet_ntoa ( bs ) ); + return 0; +} + +/** + * Handle received packet during PXE Boot Server Discovery + * + * @v dhcp DHCP session + * @v dhcppkt DHCP packet + * @v peer DHCP server address + * @v msgtype DHCP message type + * @v server_id DHCP server ID + */ +static void dhcp_pxebs_rx ( struct dhcp_session *dhcp, + struct dhcp_packet *dhcppkt, + struct sockaddr_in *peer, uint8_t msgtype, + struct in_addr server_id ) { + struct dhcp_pxe_boot_menu_item menu_item = { 0, 0 }; + int rc; + + DBGC ( dhcp, "DHCP %p %s from %s:%d", dhcp, + dhcp_msgtype_name ( msgtype ), inet_ntoa ( peer->sin_addr ), + ntohs ( peer->sin_port ) ); + if ( server_id.s_addr != peer->sin_addr.s_addr ) + DBGC ( dhcp, " (%s)", inet_ntoa ( server_id ) ); + + /* Identify boot menu item */ + dhcppkt_fetch ( dhcppkt, DHCP_PXE_BOOT_MENU_ITEM, + &menu_item, sizeof ( menu_item ) ); + if ( menu_item.type ) + DBGC ( dhcp, " for type %d", ntohs ( menu_item.type ) ); + DBGC ( dhcp, "\n" ); + + /* Filter out unacceptable responses */ + if ( ( peer->sin_port != htons ( BOOTPS_PORT ) ) && + ( peer->sin_port != htons ( PXE_PORT ) ) ) + return; + if ( msgtype != DHCPACK ) + return; + if ( menu_item.type != dhcp->pxe_type ) + return; + if ( ! dhcp_pxebs_accept ( dhcp, ( server_id.s_addr ? + server_id : peer->sin_addr ) ) ) + return; + + /* Register settings */ + if ( ( rc = register_settings ( &dhcppkt->settings, NULL, + PXEBS_SETTINGS_NAME ) ) != 0 ) { + DBGC ( dhcp, "DHCP %p could not register settings: %s\n", + dhcp, strerror ( rc ) ); + dhcp_finished ( dhcp, rc ); + return; + } + + /* Terminate DHCP */ + dhcp_finished ( dhcp, 0 ); +} + +/** + * Handle timer expiry during PXE Boot Server Discovery + * + * @v dhcp DHCP session + */ +static void dhcp_pxebs_expired ( struct dhcp_session *dhcp ) { + unsigned long elapsed = ( currticks() - dhcp->start ); + + /* Give up waiting before we reach the failure point, and fail + * over to the next server in the attempt list + */ + if ( elapsed > PXEBS_MAX_TIMEOUT ) { + dhcp->pxe_attempt++; + if ( dhcp->pxe_attempt->s_addr ) { + dhcp_set_state ( dhcp, &dhcp_state_pxebs ); + return; + } else { + dhcp_finished ( dhcp, -ETIMEDOUT ); + return; + } + } + + /* Retransmit current packet */ + dhcp_tx ( dhcp ); +} + +/** PXE Boot Server Discovery state operations */ +static struct dhcp_session_state dhcp_state_pxebs = { + .name = "PXEBS", + .tx = dhcp_pxebs_tx, + .rx = dhcp_pxebs_rx, + .expired = dhcp_pxebs_expired, + .tx_msgtype = DHCPREQUEST, + .apply_min_timeout = 1, +}; + +/**************************************************************************** + * + * Packet construction + * + */ + +/** + * Create a DHCP packet + * + * @v dhcppkt DHCP packet structure to fill in + * @v netdev Network device + * @v msgtype DHCP message type + * @v xid Transaction ID (in network-endian order) + * @v options Initial options to include (or NULL) + * @v options_len Length of initial options + * @v data Buffer for DHCP packet + * @v max_len Size of DHCP packet buffer + * @ret rc Return status code + * + * Creates a DHCP packet in the specified buffer, and initialise a + * DHCP packet structure. + */ +int dhcp_create_packet ( struct dhcp_packet *dhcppkt, + struct net_device *netdev, uint8_t msgtype, + uint32_t xid, const void *options, size_t options_len, + void *data, size_t max_len ) { + struct dhcphdr *dhcphdr = data; + int rc; + + /* Sanity check */ + if ( max_len < ( sizeof ( *dhcphdr ) + options_len ) ) + return -ENOSPC; + + /* Initialise DHCP packet content */ + memset ( dhcphdr, 0, max_len ); + dhcphdr->xid = xid; + dhcphdr->magic = htonl ( DHCP_MAGIC_COOKIE ); + dhcphdr->htype = ntohs ( netdev->ll_protocol->ll_proto ); + dhcphdr->op = dhcp_op[msgtype]; + dhcphdr->hlen = netdev->ll_protocol->ll_addr_len; + memcpy ( dhcphdr->chaddr, netdev->ll_addr, + netdev->ll_protocol->ll_addr_len ); + memcpy ( dhcphdr->options, options, options_len ); + + /* If the local link-layer address functions only as a name + * (i.e. cannot be used as a destination address), then + * request broadcast responses. + */ + if ( netdev->ll_protocol->flags & LL_NAME_ONLY ) + dhcphdr->flags |= htons ( BOOTP_FL_BROADCAST ); + + /* If the network device already has an IPv4 address then + * unicast responses from the DHCP server may be rejected, so + * request broadcast responses. + */ + if ( ipv4_has_any_addr ( netdev ) ) + dhcphdr->flags |= htons ( BOOTP_FL_BROADCAST ); + + /* Initialise DHCP packet structure */ + memset ( dhcppkt, 0, sizeof ( *dhcppkt ) ); + dhcppkt_init ( dhcppkt, data, max_len ); + + /* Set DHCP_MESSAGE_TYPE option */ + if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_MESSAGE_TYPE, + &msgtype, sizeof ( msgtype ) ) ) != 0 ) + return rc; + + return 0; +} + +/** + * Create DHCP request packet + * + * @v dhcppkt DHCP packet structure to fill in + * @v netdev Network device + * @v msgtype DHCP message type + * @v xid Transaction ID (in network-endian order) + * @v ciaddr Client IP address + * @v data Buffer for DHCP packet + * @v max_len Size of DHCP packet buffer + * @ret rc Return status code + * + * Creates a DHCP request packet in the specified buffer, and + * initialise a DHCP packet structure. + */ +int dhcp_create_request ( struct dhcp_packet *dhcppkt, + struct net_device *netdev, unsigned int msgtype, + uint32_t xid, struct in_addr ciaddr, + void *data, size_t max_len ) { + struct dhcp_netdev_desc dhcp_desc; + struct dhcp_client_id client_id; + struct dhcp_client_uuid client_uuid; + uint8_t *dhcp_features; + size_t dhcp_features_len; + size_t ll_addr_len; + void *user_class; + ssize_t len; + int rc; + + /* Create DHCP packet */ + if ( ( rc = dhcp_create_packet ( dhcppkt, netdev, msgtype, xid, + dhcp_request_options_data, + sizeof ( dhcp_request_options_data ), + data, max_len ) ) != 0 ) { + DBG ( "DHCP could not create DHCP packet: %s\n", + strerror ( rc ) ); + goto err_create_packet; + } + + /* Set client IP address */ + dhcppkt->dhcphdr->ciaddr = ciaddr; + + /* Add options to identify the feature list */ + dhcp_features = table_start ( DHCP_FEATURES ); + dhcp_features_len = table_num_entries ( DHCP_FEATURES ); + if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_EB_ENCAP, dhcp_features, + dhcp_features_len ) ) != 0 ) { + DBG ( "DHCP could not set features list option: %s\n", + strerror ( rc ) ); + goto err_store_features; + } + + /* Add options to identify the network device */ + fetch_raw_setting ( netdev_settings ( netdev ), &busid_setting, + &dhcp_desc, sizeof ( dhcp_desc ) ); + if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_EB_BUS_ID, &dhcp_desc, + sizeof ( dhcp_desc ) ) ) != 0 ) { + DBG ( "DHCP could not set bus ID option: %s\n", + strerror ( rc ) ); + goto err_store_busid; + } + + /* Add DHCP client identifier. Required for Infiniband, and + * doesn't hurt other link layers. + */ + client_id.ll_proto = ntohs ( netdev->ll_protocol->ll_proto ); + ll_addr_len = netdev->ll_protocol->ll_addr_len; + assert ( ll_addr_len <= sizeof ( client_id.ll_addr ) ); + memcpy ( client_id.ll_addr, netdev->ll_addr, ll_addr_len ); + if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_CLIENT_ID, &client_id, + ( ll_addr_len + 1 ) ) ) != 0 ) { + DBG ( "DHCP could not set client ID: %s\n", + strerror ( rc ) ); + goto err_store_client_id; + } + + /* Add client UUID, if we have one. Required for PXE. The + * PXE spec does not specify a byte ordering for UUIDs, but + * RFC4578 suggests that it follows the EFI spec, in which the + * first three fields are little-endian. + */ + client_uuid.type = DHCP_CLIENT_UUID_TYPE; + if ( ( len = fetch_uuid_setting ( NULL, &uuid_setting, + &client_uuid.uuid ) ) >= 0 ) { + uuid_mangle ( &client_uuid.uuid ); + if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_CLIENT_UUID, + &client_uuid, + sizeof ( client_uuid ) ) ) != 0 ) { + DBG ( "DHCP could not set client UUID: %s\n", + strerror ( rc ) ); + goto err_store_client_uuid; + } + } + + /* Add user class, if we have one. */ + if ( ( len = fetch_raw_setting_copy ( NULL, &user_class_setting, + &user_class ) ) >= 0 ) { + if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_USER_CLASS_ID, + user_class, len ) ) != 0 ) { + DBG ( "DHCP could not set user class: %s\n", + strerror ( rc ) ); + goto err_store_user_class; + } + } + + err_store_user_class: + free ( user_class ); + err_store_client_uuid: + err_store_client_id: + err_store_busid: + err_store_features: + err_create_packet: + return rc; +} + +/**************************************************************************** + * + * Data transfer interface + * + */ + +/** + * Transmit DHCP request + * + * @v dhcp DHCP session + * @ret rc Return status code + */ +static int dhcp_tx ( struct dhcp_session *dhcp ) { + static struct sockaddr_in peer = { + .sin_family = AF_INET, + }; + struct xfer_metadata meta = { + .netdev = dhcp->netdev, + .src = ( struct sockaddr * ) &dhcp->local, + .dest = ( struct sockaddr * ) &peer, + }; + struct io_buffer *iobuf; + uint8_t msgtype = dhcp->state->tx_msgtype; + struct dhcp_packet dhcppkt; + int rc; + + /* Start retry timer. Do this first so that failures to + * transmit will be retried. + */ + start_timer ( &dhcp->timer ); + + /* Allocate buffer for packet */ + iobuf = xfer_alloc_iob ( &dhcp->xfer, DHCP_MIN_LEN ); + if ( ! iobuf ) + return -ENOMEM; + + /* Create basic DHCP packet in temporary buffer */ + if ( ( rc = dhcp_create_request ( &dhcppkt, dhcp->netdev, msgtype, + dhcp->xid, dhcp->local.sin_addr, + iobuf->data, + iob_tailroom ( iobuf ) ) ) != 0 ) { + DBGC ( dhcp, "DHCP %p could not construct DHCP request: %s\n", + dhcp, strerror ( rc ) ); + goto done; + } + + /* (Ab)use the "secs" field to convey metadata about the DHCP + * session state into packet traces. Useful for extracting + * debug information from non-debug builds. + */ + dhcppkt.dhcphdr->secs = htons ( ( ++(dhcp->count) << 2 ) | + ( dhcp->offer.s_addr ? 0x02 : 0 ) | + ( dhcp->proxy_offer ? 0x01 : 0 ) ); + + /* Fill in packet based on current state */ + if ( ( rc = dhcp->state->tx ( dhcp, &dhcppkt, &peer ) ) != 0 ) { + DBGC ( dhcp, "DHCP %p could not fill DHCP request: %s\n", + dhcp, strerror ( rc ) ); + goto done; + } + + /* Transmit the packet */ + iob_put ( iobuf, dhcppkt_len ( &dhcppkt ) ); + if ( ( rc = xfer_deliver ( &dhcp->xfer, iob_disown ( iobuf ), + &meta ) ) != 0 ) { + DBGC ( dhcp, "DHCP %p could not transmit UDP packet: %s\n", + dhcp, strerror ( rc ) ); + goto done; + } + + done: + free_iob ( iobuf ); + return rc; +} + +/** + * Receive new data + * + * @v dhcp DHCP session + * @v iobuf I/O buffer + * @v meta Transfer metadata + * @ret rc Return status code + */ +static int dhcp_deliver ( struct dhcp_session *dhcp, + struct io_buffer *iobuf, + struct xfer_metadata *meta ) { + struct net_device *netdev = dhcp->netdev; + struct ll_protocol *ll_protocol = netdev->ll_protocol; + struct sockaddr_in *peer; + size_t data_len; + struct dhcp_packet *dhcppkt; + struct dhcphdr *dhcphdr; + uint8_t msgtype = 0; + struct in_addr server_id = { 0 }; + int rc = 0; + + /* Sanity checks */ + if ( ! meta->src ) { + DBGC ( dhcp, "DHCP %p received packet without source port\n", + dhcp ); + rc = -EINVAL; + goto err_no_src; + } + peer = ( struct sockaddr_in * ) meta->src; + + /* Create a DHCP packet containing the I/O buffer contents. + * Whilst we could just use the original buffer in situ, that + * would waste the unused space in the packet buffer, and also + * waste a relatively scarce fully-aligned I/O buffer. + */ + data_len = iob_len ( iobuf ); + dhcppkt = zalloc ( sizeof ( *dhcppkt ) + data_len ); + if ( ! dhcppkt ) { + rc = -ENOMEM; + goto err_alloc_dhcppkt; + } + dhcphdr = ( ( ( void * ) dhcppkt ) + sizeof ( *dhcppkt ) ); + memcpy ( dhcphdr, iobuf->data, data_len ); + dhcppkt_init ( dhcppkt, dhcphdr, data_len ); + + /* Identify message type */ + dhcppkt_fetch ( dhcppkt, DHCP_MESSAGE_TYPE, &msgtype, + sizeof ( msgtype ) ); + + /* Identify server ID */ + dhcppkt_fetch ( dhcppkt, DHCP_SERVER_IDENTIFIER, + &server_id, sizeof ( server_id ) ); + + /* Check for matching transaction ID */ + if ( dhcphdr->xid != dhcp->xid ) { + DBGC ( dhcp, "DHCP %p %s from %s:%d has bad transaction " + "ID\n", dhcp, dhcp_msgtype_name ( msgtype ), + inet_ntoa ( peer->sin_addr ), + ntohs ( peer->sin_port ) ); + rc = -EINVAL; + goto err_xid; + }; + + /* Check for matching client hardware address */ + if ( memcmp ( dhcphdr->chaddr, netdev->ll_addr, + ll_protocol->ll_addr_len ) != 0 ) { + DBGC ( dhcp, "DHCP %p %s from %s:%d has bad chaddr %s\n", + dhcp, dhcp_msgtype_name ( msgtype ), + inet_ntoa ( peer->sin_addr ), ntohs ( peer->sin_port ), + ll_protocol->ntoa ( dhcphdr->chaddr ) ); + rc = -EINVAL; + goto err_chaddr; + } + + /* Handle packet based on current state */ + dhcp->state->rx ( dhcp, dhcppkt, peer, msgtype, server_id ); + + err_chaddr: + err_xid: + dhcppkt_put ( dhcppkt ); + err_alloc_dhcppkt: + err_no_src: + free_iob ( iobuf ); + return rc; +} + +/** DHCP data transfer interface operations */ +static struct interface_operation dhcp_xfer_operations[] = { + INTF_OP ( xfer_deliver, struct dhcp_session *, dhcp_deliver ), +}; + +/** DHCP data transfer interface descriptor */ +static struct interface_descriptor dhcp_xfer_desc = + INTF_DESC ( struct dhcp_session, xfer, dhcp_xfer_operations ); + +/** + * Handle DHCP retry timer expiry + * + * @v timer DHCP retry timer + * @v fail Failure indicator + */ +static void dhcp_timer_expired ( struct retry_timer *timer, int fail ) { + struct dhcp_session *dhcp = + container_of ( timer, struct dhcp_session, timer ); + + /* If we have failed, terminate DHCP */ + if ( fail ) { + dhcp_finished ( dhcp, -ETIMEDOUT ); + return; + } + + /* Handle timer expiry based on current state */ + dhcp->state->expired ( dhcp ); +} + +/**************************************************************************** + * + * Job control interface + * + */ + +/** DHCP job control interface operations */ +static struct interface_operation dhcp_job_op[] = { + INTF_OP ( intf_close, struct dhcp_session *, dhcp_finished ), +}; + +/** DHCP job control interface descriptor */ +static struct interface_descriptor dhcp_job_desc = + INTF_DESC ( struct dhcp_session, job, dhcp_job_op ); + +/**************************************************************************** + * + * Instantiators + * + */ + +/** + * DHCP peer address for socket opening + * + * This is a dummy address; the only useful portion is the socket + * family (so that we get a UDP connection). The DHCP client will set + * the IP address and source port explicitly on each transmission. + */ +static struct sockaddr dhcp_peer = { + .sa_family = AF_INET, +}; + +/** + * Start DHCP state machine on a network device + * + * @v job Job control interface + * @v netdev Network device + * @ret rc Return status code + * + * Starts DHCP on the specified network device. If successful, the + * DHCPACK (and ProxyDHCPACK, if applicable) will be registered as + * option sources. + */ +int start_dhcp ( struct interface *job, struct net_device *netdev ) { + struct dhcp_session *dhcp; + int rc; + + /* Allocate and initialise structure */ + dhcp = zalloc ( sizeof ( *dhcp ) ); + if ( ! dhcp ) + return -ENOMEM; + ref_init ( &dhcp->refcnt, dhcp_free ); + intf_init ( &dhcp->job, &dhcp_job_desc, &dhcp->refcnt ); + intf_init ( &dhcp->xfer, &dhcp_xfer_desc, &dhcp->refcnt ); + timer_init ( &dhcp->timer, dhcp_timer_expired, &dhcp->refcnt ); + dhcp->netdev = netdev_get ( netdev ); + dhcp->local.sin_family = AF_INET; + dhcp->local.sin_port = htons ( BOOTPC_PORT ); + dhcp->xid = random(); + + /* Store DHCP transaction ID for fakedhcp code */ + dhcp_last_xid = dhcp->xid; + + /* Instantiate child objects and attach to our interfaces */ + if ( ( rc = xfer_open_socket ( &dhcp->xfer, SOCK_DGRAM, &dhcp_peer, + ( struct sockaddr * ) &dhcp->local ) ) != 0 ) + goto err; + + /* Enter DHCPDISCOVER state */ + dhcp_set_state ( dhcp, &dhcp_state_discover ); + + /* Attach parent interface, mortalise self, and return */ + intf_plug_plug ( &dhcp->job, job ); + ref_put ( &dhcp->refcnt ); + return 0; + + err: + dhcp_finished ( dhcp, rc ); + ref_put ( &dhcp->refcnt ); + return rc; +} + +/** + * Retrieve list of PXE boot servers for a given server type + * + * @v dhcp DHCP session + * @v raw DHCP PXE boot server list + * @v raw_len Length of DHCP PXE boot server list + * @v ip IP address list to fill in + * + * The caller must ensure that the IP address list has sufficient + * space. + */ +static void pxebs_list ( struct dhcp_session *dhcp, void *raw, + size_t raw_len, struct in_addr *ip ) { + struct dhcp_pxe_boot_server *server = raw; + size_t server_len; + unsigned int i; + + while ( raw_len ) { + if ( raw_len < sizeof ( *server ) ) { + DBGC ( dhcp, "DHCP %p malformed PXE server list\n", + dhcp ); + break; + } + server_len = offsetof ( typeof ( *server ), + ip[ server->num_ip ] ); + if ( raw_len < server_len ) { + DBGC ( dhcp, "DHCP %p malformed PXE server list\n", + dhcp ); + break; + } + if ( server->type == dhcp->pxe_type ) { + for ( i = 0 ; i < server->num_ip ; i++ ) + *(ip++) = server->ip[i]; + } + server = ( ( ( void * ) server ) + server_len ); + raw_len -= server_len; + } +} + +/** + * Start PXE Boot Server Discovery on a network device + * + * @v job Job control interface + * @v netdev Network device + * @v pxe_type PXE server type + * @ret rc Return status code + * + * Starts PXE Boot Server Discovery on the specified network device. + * If successful, the Boot Server ACK will be registered as an option + * source. + */ +int start_pxebs ( struct interface *job, struct net_device *netdev, + unsigned int pxe_type ) { + struct setting pxe_discovery_control_setting = + { .tag = DHCP_PXE_DISCOVERY_CONTROL }; + struct setting pxe_boot_servers_setting = + { .tag = DHCP_PXE_BOOT_SERVERS }; + struct setting pxe_boot_server_mcast_setting = + { .tag = DHCP_PXE_BOOT_SERVER_MCAST }; + ssize_t pxebs_list_len; + struct dhcp_session *dhcp; + struct in_addr *ip; + unsigned int pxe_discovery_control; + int rc; + + /* Get upper bound for PXE boot server IP address list */ + pxebs_list_len = fetch_raw_setting ( NULL, &pxe_boot_servers_setting, + NULL, 0 ); + if ( pxebs_list_len < 0 ) + pxebs_list_len = 0; + + /* Allocate and initialise structure */ + dhcp = zalloc ( sizeof ( *dhcp ) + sizeof ( *ip ) /* mcast */ + + sizeof ( *ip ) /* bcast */ + pxebs_list_len + + sizeof ( *ip ) /* terminator */ ); + if ( ! dhcp ) + return -ENOMEM; + ref_init ( &dhcp->refcnt, dhcp_free ); + intf_init ( &dhcp->job, &dhcp_job_desc, &dhcp->refcnt ); + intf_init ( &dhcp->xfer, &dhcp_xfer_desc, &dhcp->refcnt ); + timer_init ( &dhcp->timer, dhcp_timer_expired, &dhcp->refcnt ); + dhcp->netdev = netdev_get ( netdev ); + dhcp->local.sin_family = AF_INET; + fetch_ipv4_setting ( netdev_settings ( netdev ), &ip_setting, + &dhcp->local.sin_addr ); + dhcp->local.sin_port = htons ( BOOTPC_PORT ); + dhcp->pxe_type = cpu_to_le16 ( pxe_type ); + + /* Construct PXE boot server IP address lists */ + pxe_discovery_control = + fetch_uintz_setting ( NULL, &pxe_discovery_control_setting ); + ip = ( ( ( void * ) dhcp ) + sizeof ( *dhcp ) ); + dhcp->pxe_attempt = ip; + if ( ! ( pxe_discovery_control & PXEBS_NO_MULTICAST ) ) { + fetch_ipv4_setting ( NULL, &pxe_boot_server_mcast_setting, ip); + if ( ip->s_addr ) + ip++; + } + if ( ! ( pxe_discovery_control & PXEBS_NO_BROADCAST ) ) + (ip++)->s_addr = INADDR_BROADCAST; + if ( pxe_discovery_control & PXEBS_NO_UNKNOWN_SERVERS ) + dhcp->pxe_accept = ip; + if ( pxebs_list_len ) { + uint8_t buf[pxebs_list_len]; + + fetch_raw_setting ( NULL, &pxe_boot_servers_setting, + buf, sizeof ( buf ) ); + pxebs_list ( dhcp, buf, sizeof ( buf ), ip ); + } + if ( ! dhcp->pxe_attempt->s_addr ) { + DBGC ( dhcp, "DHCP %p has no PXE boot servers for type %04x\n", + dhcp, pxe_type ); + rc = -EINVAL; + goto err; + } + + /* Dump out PXE server lists */ + DBGC ( dhcp, "DHCP %p attempting", dhcp ); + for ( ip = dhcp->pxe_attempt ; ip->s_addr ; ip++ ) + DBGC ( dhcp, " %s", inet_ntoa ( *ip ) ); + DBGC ( dhcp, "\n" ); + if ( dhcp->pxe_accept ) { + DBGC ( dhcp, "DHCP %p accepting", dhcp ); + for ( ip = dhcp->pxe_accept ; ip->s_addr ; ip++ ) + DBGC ( dhcp, " %s", inet_ntoa ( *ip ) ); + DBGC ( dhcp, "\n" ); + } + + /* Instantiate child objects and attach to our interfaces */ + if ( ( rc = xfer_open_socket ( &dhcp->xfer, SOCK_DGRAM, &dhcp_peer, + ( struct sockaddr * ) &dhcp->local ) ) != 0 ) + goto err; + + /* Enter PXEBS state */ + dhcp_set_state ( dhcp, &dhcp_state_pxebs ); + + /* Attach parent interface, mortalise self, and return */ + intf_plug_plug ( &dhcp->job, job ); + ref_put ( &dhcp->refcnt ); + return 0; + + err: + dhcp_finished ( dhcp, rc ); + ref_put ( &dhcp->refcnt ); + return rc; +} + +/** DHCP network device configurator */ +struct net_device_configurator dhcp_configurator __net_device_configurator = { + .name = "dhcp", + .start = start_dhcp, +}; diff --git a/qemu/roms/ipxe/src/net/udp/dhcpv6.c b/qemu/roms/ipxe/src/net/udp/dhcpv6.c new file mode 100644 index 000000000..f7736d08e --- /dev/null +++ b/qemu/roms/ipxe/src/net/udp/dhcpv6.c @@ -0,0 +1,989 @@ +/* + * Copyright (C) 2013 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <byteswap.h> +#include <ipxe/interface.h> +#include <ipxe/xfer.h> +#include <ipxe/iobuf.h> +#include <ipxe/open.h> +#include <ipxe/netdevice.h> +#include <ipxe/settings.h> +#include <ipxe/retry.h> +#include <ipxe/timer.h> +#include <ipxe/in.h> +#include <ipxe/crc32.h> +#include <ipxe/errortab.h> +#include <ipxe/ipv6.h> +#include <ipxe/dhcpv6.h> + +/** @file + * + * Dynamic Host Configuration Protocol for IPv6 + * + */ + +/* Disambiguate the various error causes */ +#define EPROTO_UNSPECFAIL __einfo_error ( EINFO_EPROTO_UNSPECFAIL ) +#define EINFO_EPROTO_UNSPECFAIL \ + __einfo_uniqify ( EINFO_EPROTO, 1, "Unspecified server failure" ) +#define EPROTO_NOADDRSAVAIL __einfo_error ( EINFO_EPROTO_NOADDRSAVAIL ) +#define EINFO_EPROTO_NOADDRSAVAIL \ + __einfo_uniqify ( EINFO_EPROTO, 2, "No addresses available" ) +#define EPROTO_NOBINDING __einfo_error ( EINFO_EPROTO_NOBINDING ) +#define EINFO_EPROTO_NOBINDING \ + __einfo_uniqify ( EINFO_EPROTO, 3, "Client record unavailable" ) +#define EPROTO_NOTONLINK __einfo_error ( EINFO_EPROTO_NOTONLINK ) +#define EINFO_EPROTO_NOTONLINK \ + __einfo_uniqify ( EINFO_EPROTO, 4, "Prefix not on link" ) +#define EPROTO_USEMULTICAST __einfo_error ( EINFO_EPROTO_USEMULTICAST ) +#define EINFO_EPROTO_USEMULTICAST \ + __einfo_uniqify ( EINFO_EPROTO, 5, "Use multicast address" ) +#define EPROTO_STATUS( status ) \ + EUNIQ ( EINFO_EPROTO, ( (status) & 0x0f ), EPROTO_UNSPECFAIL, \ + EPROTO_NOADDRSAVAIL, EPROTO_NOBINDING, \ + EPROTO_NOTONLINK, EPROTO_USEMULTICAST ) + +/** Human-readable error messages */ +struct errortab dhcpv6_errors[] __errortab = { + __einfo_errortab ( EINFO_EPROTO_NOADDRSAVAIL ), +}; + +/**************************************************************************** + * + * DHCPv6 option lists + * + */ + +/** A DHCPv6 option list */ +struct dhcpv6_option_list { + /** Data buffer */ + const void *data; + /** Length of data buffer */ + size_t len; +}; + +/** + * Find DHCPv6 option + * + * @v options DHCPv6 option list + * @v code Option code + * @ret option DHCPv6 option, or NULL if not found + */ +static const union dhcpv6_any_option * +dhcpv6_option ( struct dhcpv6_option_list *options, unsigned int code ) { + const union dhcpv6_any_option *option = options->data; + size_t remaining = options->len; + size_t data_len; + + /* Scan through list of options */ + while ( remaining >= sizeof ( option->header ) ) { + + /* Calculate and validate option length */ + remaining -= sizeof ( option->header ); + data_len = ntohs ( option->header.len ); + if ( data_len > remaining ) { + /* Malformed option list */ + return NULL; + } + + /* Return if we have found the specified option */ + if ( option->header.code == htons ( code ) ) + return option; + + /* Otherwise, move to the next option */ + option = ( ( ( void * ) option->header.data ) + data_len ); + remaining -= data_len; + } + + return NULL; +} + +/** + * Check DHCPv6 client or server identifier + * + * @v options DHCPv6 option list + * @v code Option code + * @v expected Expected value + * @v len Length of expected value + * @ret rc Return status code + */ +static int dhcpv6_check_duid ( struct dhcpv6_option_list *options, + unsigned int code, const void *expected, + size_t len ) { + const union dhcpv6_any_option *option; + const struct dhcpv6_duid_option *duid; + + /* Find option */ + option = dhcpv6_option ( options, code ); + if ( ! option ) + return -ENOENT; + duid = &option->duid; + + /* Check option length */ + if ( ntohs ( duid->header.len ) != len ) + return -EINVAL; + + /* Compare option value */ + if ( memcmp ( duid->duid, expected, len ) != 0 ) + return -EINVAL; + + return 0; +} + +/** + * Get DHCPv6 status code + * + * @v options DHCPv6 option list + * @ret rc Return status code + */ +static int dhcpv6_status_code ( struct dhcpv6_option_list *options ) { + const union dhcpv6_any_option *option; + const struct dhcpv6_status_code_option *status_code; + unsigned int status; + + /* Find status code option, if present */ + option = dhcpv6_option ( options, DHCPV6_STATUS_CODE ); + if ( ! option ) { + /* Omitted status code should be treated as "success" */ + return 0; + } + status_code = &option->status_code; + + /* Sanity check */ + if ( ntohs ( status_code->header.len ) < + ( sizeof ( *status_code ) - sizeof ( status_code->header ) ) ) { + return -EINVAL; + } + + /* Calculate iPXE error code from DHCPv6 status code */ + status = ntohs ( status_code->status ); + return ( status ? -EPROTO_STATUS ( status ) : 0 ); +} + +/** + * Get DHCPv6 identity association address + * + * @v options DHCPv6 option list + * @v iaid Identity association ID + * @v address IPv6 address to fill in + * @ret rc Return status code + */ +static int dhcpv6_iaaddr ( struct dhcpv6_option_list *options, uint32_t iaid, + struct in6_addr *address ) { + const union dhcpv6_any_option *option; + const struct dhcpv6_ia_na_option *ia_na; + const struct dhcpv6_iaaddr_option *iaaddr; + struct dhcpv6_option_list suboptions; + size_t len; + int rc; + + /* Find identity association option, if present */ + option = dhcpv6_option ( options, DHCPV6_IA_NA ); + if ( ! option ) + return -ENOENT; + ia_na = &option->ia_na; + + /* Sanity check */ + len = ntohs ( ia_na->header.len ); + if ( len < ( sizeof ( *ia_na ) - sizeof ( ia_na->header ) ) ) + return -EINVAL; + + /* Check identity association ID */ + if ( ia_na->iaid != htonl ( iaid ) ) + return -EINVAL; + + /* Construct IA_NA sub-options list */ + suboptions.data = ia_na->options; + suboptions.len = ( len + sizeof ( ia_na->header ) - + offsetof ( typeof ( *ia_na ), options ) ); + + /* Check IA_NA status code */ + if ( ( rc = dhcpv6_status_code ( &suboptions ) ) != 0 ) + return rc; + + /* Find identity association address, if present */ + option = dhcpv6_option ( &suboptions, DHCPV6_IAADDR ); + if ( ! option ) + return -ENOENT; + iaaddr = &option->iaaddr; + + /* Sanity check */ + len = ntohs ( iaaddr->header.len ); + if ( len < ( sizeof ( *iaaddr ) - sizeof ( iaaddr->header ) ) ) + return -EINVAL; + + /* Construct IAADDR sub-options list */ + suboptions.data = iaaddr->options; + suboptions.len = ( len + sizeof ( iaaddr->header ) - + offsetof ( typeof ( *iaaddr ), options ) ); + + /* Check IAADDR status code */ + if ( ( rc = dhcpv6_status_code ( &suboptions ) ) != 0 ) + return rc; + + /* Extract IPv6 address */ + memcpy ( address, &iaaddr->address, sizeof ( *address ) ); + + return 0; +} + +/**************************************************************************** + * + * DHCPv6 settings blocks + * + */ + +/** A DHCPv6 settings block */ +struct dhcpv6_settings { + /** Reference count */ + struct refcnt refcnt; + /** Settings block */ + struct settings settings; + /** Option list */ + struct dhcpv6_option_list options; +}; + +/** + * Check applicability of DHCPv6 setting + * + * @v settings Settings block + * @v setting Setting + * @ret applies Setting applies within this settings block + */ +static int dhcpv6_applies ( struct settings *settings __unused, + const struct setting *setting ) { + + return ( setting->scope == &ipv6_scope ); +} + +/** + * Fetch value of DHCPv6 setting + * + * @v settings Settings block + * @v setting Setting to fetch + * @v data Buffer to fill with setting data + * @v len Length of buffer + * @ret len Length of setting data, or negative error + */ +static int dhcpv6_fetch ( struct settings *settings, + struct setting *setting, + void *data, size_t len ) { + struct dhcpv6_settings *dhcpv6set = + container_of ( settings, struct dhcpv6_settings, settings ); + const union dhcpv6_any_option *option; + size_t option_len; + + /* Find option */ + option = dhcpv6_option ( &dhcpv6set->options, setting->tag ); + if ( ! option ) + return -ENOENT; + + /* Copy option to data buffer */ + option_len = ntohs ( option->header.len ); + if ( len > option_len ) + len = option_len; + memcpy ( data, option->header.data, len ); + return option_len; +} + +/** DHCPv6 settings operations */ +static struct settings_operations dhcpv6_settings_operations = { + .applies = dhcpv6_applies, + .fetch = dhcpv6_fetch, +}; + +/** + * Register DHCPv6 options as network device settings + * + * @v options DHCPv6 option list + * @v parent Parent settings block + * @ret rc Return status code + */ +static int dhcpv6_register ( struct dhcpv6_option_list *options, + struct settings *parent ) { + struct dhcpv6_settings *dhcpv6set; + void *data; + size_t len; + int rc; + + /* Allocate and initialise structure */ + dhcpv6set = zalloc ( sizeof ( *dhcpv6set ) + options->len ); + if ( ! dhcpv6set ) { + rc = -ENOMEM; + goto err_alloc; + } + ref_init ( &dhcpv6set->refcnt, NULL ); + settings_init ( &dhcpv6set->settings, &dhcpv6_settings_operations, + &dhcpv6set->refcnt, &ipv6_scope ); + data = ( ( ( void * ) dhcpv6set ) + sizeof ( *dhcpv6set ) ); + len = options->len; + memcpy ( data, options->data, len ); + dhcpv6set->options.data = data; + dhcpv6set->options.len = len; + + /* Register settings */ + if ( ( rc = register_settings ( &dhcpv6set->settings, parent, + DHCPV6_SETTINGS_NAME ) ) != 0 ) + goto err_register; + + err_register: + ref_put ( &dhcpv6set->refcnt ); + err_alloc: + return rc; +} + +/**************************************************************************** + * + * DHCPv6 protocol + * + */ + +/** Options to be requested */ +static uint16_t dhcpv6_requested_options[] = { + htons ( DHCPV6_DNS_SERVERS ), htons ( DHCPV6_DOMAIN_LIST ), + htons ( DHCPV6_BOOTFILE_URL ), htons ( DHCPV6_BOOTFILE_PARAM ), +}; + +/** + * Name a DHCPv6 packet type + * + * @v type DHCPv6 packet type + * @ret name DHCPv6 packet type name + */ +static __attribute__ (( unused )) const char * +dhcpv6_type_name ( unsigned int type ) { + static char buf[ 12 /* "UNKNOWN-xxx" + NUL */ ]; + + switch ( type ) { + case DHCPV6_SOLICIT: return "SOLICIT"; + case DHCPV6_ADVERTISE: return "ADVERTISE"; + case DHCPV6_REQUEST: return "REQUEST"; + case DHCPV6_REPLY: return "REPLY"; + case DHCPV6_INFORMATION_REQUEST: return "INFORMATION-REQUEST"; + default: + snprintf ( buf, sizeof ( buf ), "UNKNOWN-%d", type ); + return buf; + } +} + +/** A DHCPv6 session state */ +struct dhcpv6_session_state { + /** Current transmitted packet type */ + uint8_t tx_type; + /** Current expected received packet type */ + uint8_t rx_type; + /** Flags */ + uint8_t flags; + /** Next state (or NULL to terminate) */ + struct dhcpv6_session_state *next; +}; + +/** DHCPv6 session state flags */ +enum dhcpv6_session_state_flags { + /** Include identity association within request */ + DHCPV6_TX_IA_NA = 0x01, + /** Include leased IPv6 address within request */ + DHCPV6_TX_IAADDR = 0x02, + /** Record received server ID */ + DHCPV6_RX_RECORD_SERVER_ID = 0x04, + /** Record received IPv6 address */ + DHCPV6_RX_RECORD_IAADDR = 0x08, + /** Apply received IPv6 address */ + DHCPV6_RX_APPLY_IAADDR = 0x10, +}; + +/** DHCPv6 request state */ +static struct dhcpv6_session_state dhcpv6_request = { + .tx_type = DHCPV6_REQUEST, + .rx_type = DHCPV6_REPLY, + .flags = ( DHCPV6_TX_IA_NA | DHCPV6_TX_IAADDR | + DHCPV6_RX_RECORD_IAADDR | DHCPV6_RX_APPLY_IAADDR ), + .next = NULL, +}; + +/** DHCPv6 solicitation state */ +static struct dhcpv6_session_state dhcpv6_solicit = { + .tx_type = DHCPV6_SOLICIT, + .rx_type = DHCPV6_ADVERTISE, + .flags = ( DHCPV6_TX_IA_NA | DHCPV6_RX_RECORD_SERVER_ID | + DHCPV6_RX_RECORD_IAADDR ), + .next = &dhcpv6_request, +}; + +/** DHCPv6 information request state */ +static struct dhcpv6_session_state dhcpv6_information_request = { + .tx_type = DHCPV6_INFORMATION_REQUEST, + .rx_type = DHCPV6_REPLY, + .flags = 0, + .next = NULL, +}; + +/** A DHCPv6 session */ +struct dhcpv6_session { + /** Reference counter */ + struct refcnt refcnt; + /** Job control interface */ + struct interface job; + /** Data transfer interface */ + struct interface xfer; + + /** Network device being configured */ + struct net_device *netdev; + /** Transaction ID */ + uint8_t xid[3]; + /** Identity association ID */ + uint32_t iaid; + /** Start time (in ticks) */ + unsigned long start; + /** Client DUID */ + struct dhcpv6_duid_uuid client_duid; + /** Server DUID, if known */ + void *server_duid; + /** Server DUID length */ + size_t server_duid_len; + /** Leased IPv6 address */ + struct in6_addr lease; + + /** Retransmission timer */ + struct retry_timer timer; + + /** Current session state */ + struct dhcpv6_session_state *state; + /** Current timeout status code */ + int rc; +}; + +/** + * Free DHCPv6 session + * + * @v refcnt Reference count + */ +static void dhcpv6_free ( struct refcnt *refcnt ) { + struct dhcpv6_session *dhcpv6 = + container_of ( refcnt, struct dhcpv6_session, refcnt ); + + netdev_put ( dhcpv6->netdev ); + free ( dhcpv6->server_duid ); + free ( dhcpv6 ); +} + +/** + * Terminate DHCPv6 session + * + * @v dhcpv6 DHCPv6 session + * @v rc Reason for close + */ +static void dhcpv6_finished ( struct dhcpv6_session *dhcpv6, int rc ) { + + /* Stop timer */ + stop_timer ( &dhcpv6->timer ); + + /* Shut down interfaces */ + intf_shutdown ( &dhcpv6->xfer, rc ); + intf_shutdown ( &dhcpv6->job, rc ); +} + +/** + * Transition to new DHCPv6 session state + * + * @v dhcpv6 DHCPv6 session + * @v state New session state + */ +static void dhcpv6_set_state ( struct dhcpv6_session *dhcpv6, + struct dhcpv6_session_state *state ) { + + DBGC ( dhcpv6, "DHCPv6 %s entering %s state\n", dhcpv6->netdev->name, + dhcpv6_type_name ( state->tx_type ) ); + + /* Record state */ + dhcpv6->state = state; + + /* Default to -ETIMEDOUT if no more specific error is recorded */ + dhcpv6->rc = -ETIMEDOUT; + + /* Start timer to trigger transmission */ + start_timer_nodelay ( &dhcpv6->timer ); +} + +/** + * Get DHCPv6 user class + * + * @v data Data buffer + * @v len Length of data buffer + * @ret len Length of user class + */ +static size_t dhcpv6_user_class ( void *data, size_t len ) { + static const char default_user_class[4] = { 'i', 'P', 'X', 'E' }; + int actual_len; + + /* Fetch user-class setting, if defined */ + actual_len = fetch_raw_setting ( NULL, &user_class_setting, data, len ); + if ( actual_len >= 0 ) + return actual_len; + + /* Otherwise, use the default user class ("iPXE") */ + if ( len > sizeof ( default_user_class ) ) + len = sizeof ( default_user_class ); + memcpy ( data, default_user_class, len ); + return sizeof ( default_user_class ); +} + +/** + * Transmit current request + * + * @v dhcpv6 DHCPv6 session + * @ret rc Return status code + */ +static int dhcpv6_tx ( struct dhcpv6_session *dhcpv6 ) { + struct dhcpv6_duid_option *client_id; + struct dhcpv6_duid_option *server_id; + struct dhcpv6_ia_na_option *ia_na; + struct dhcpv6_iaaddr_option *iaaddr; + struct dhcpv6_option_request_option *option_request; + struct dhcpv6_user_class_option *user_class; + struct dhcpv6_elapsed_time_option *elapsed; + struct dhcpv6_header *dhcphdr; + struct io_buffer *iobuf; + size_t client_id_len; + size_t server_id_len; + size_t ia_na_len; + size_t option_request_len; + size_t user_class_string_len; + size_t user_class_len; + size_t elapsed_len; + size_t total_len; + int rc; + + /* Calculate lengths */ + client_id_len = ( sizeof ( *client_id ) + + sizeof ( dhcpv6->client_duid ) ); + server_id_len = ( dhcpv6->server_duid ? ( sizeof ( *server_id ) + + dhcpv6->server_duid_len ) :0); + if ( dhcpv6->state->flags & DHCPV6_TX_IA_NA ) { + ia_na_len = sizeof ( *ia_na ); + if ( dhcpv6->state->flags & DHCPV6_TX_IAADDR ) + ia_na_len += sizeof ( *iaaddr ); + } else { + ia_na_len = 0; + } + option_request_len = ( sizeof ( *option_request ) + + sizeof ( dhcpv6_requested_options ) ); + user_class_string_len = dhcpv6_user_class ( NULL, 0 ); + user_class_len = ( sizeof ( *user_class ) + + sizeof ( user_class->user_class[0] ) + + user_class_string_len ); + elapsed_len = sizeof ( *elapsed ); + total_len = ( sizeof ( *dhcphdr ) + client_id_len + server_id_len + + ia_na_len + option_request_len + user_class_len + + elapsed_len ); + + /* Allocate packet */ + iobuf = xfer_alloc_iob ( &dhcpv6->xfer, total_len ); + if ( ! iobuf ) + return -ENOMEM; + + /* Construct header */ + dhcphdr = iob_put ( iobuf, sizeof ( *dhcphdr ) ); + dhcphdr->type = dhcpv6->state->tx_type; + memcpy ( dhcphdr->xid, dhcpv6->xid, sizeof ( dhcphdr->xid ) ); + + /* Construct client identifier */ + client_id = iob_put ( iobuf, client_id_len ); + client_id->header.code = htons ( DHCPV6_CLIENT_ID ); + client_id->header.len = htons ( client_id_len - + sizeof ( client_id->header ) ); + memcpy ( client_id->duid, &dhcpv6->client_duid, + sizeof ( dhcpv6->client_duid ) ); + + /* Construct server identifier, if applicable */ + if ( server_id_len ) { + server_id = iob_put ( iobuf, server_id_len ); + server_id->header.code = htons ( DHCPV6_SERVER_ID ); + server_id->header.len = htons ( server_id_len - + sizeof ( server_id->header ) ); + memcpy ( server_id->duid, dhcpv6->server_duid, + dhcpv6->server_duid_len ); + } + + /* Construct identity association, if applicable */ + if ( ia_na_len ) { + ia_na = iob_put ( iobuf, ia_na_len ); + ia_na->header.code = htons ( DHCPV6_IA_NA ); + ia_na->header.len = htons ( ia_na_len - + sizeof ( ia_na->header ) ); + ia_na->iaid = htonl ( dhcpv6->iaid ); + ia_na->renew = htonl ( 0 ); + ia_na->rebind = htonl ( 0 ); + if ( dhcpv6->state->flags & DHCPV6_TX_IAADDR ) { + iaaddr = ( ( void * ) ia_na->options ); + iaaddr->header.code = htons ( DHCPV6_IAADDR ); + iaaddr->header.len = htons ( sizeof ( *iaaddr ) - + sizeof ( iaaddr->header )); + memcpy ( &iaaddr->address, &dhcpv6->lease, + sizeof ( iaaddr->address ) ); + iaaddr->preferred = htonl ( 0 ); + iaaddr->valid = htonl ( 0 ); + } + } + + /* Construct option request */ + option_request = iob_put ( iobuf, option_request_len ); + option_request->header.code = htons ( DHCPV6_OPTION_REQUEST ); + option_request->header.len = htons ( option_request_len - + sizeof ( option_request->header )); + memcpy ( option_request->requested, dhcpv6_requested_options, + sizeof ( dhcpv6_requested_options ) ); + + /* Construct user class */ + user_class = iob_put ( iobuf, user_class_len ); + user_class->header.code = htons ( DHCPV6_USER_CLASS ); + user_class->header.len = htons ( user_class_len - + sizeof ( user_class->header ) ); + user_class->user_class[0].len = htons ( user_class_string_len ); + dhcpv6_user_class ( user_class->user_class[0].string, + user_class_string_len ); + + /* Construct elapsed time */ + elapsed = iob_put ( iobuf, elapsed_len ); + elapsed->header.code = htons ( DHCPV6_ELAPSED_TIME ); + elapsed->header.len = htons ( elapsed_len - + sizeof ( elapsed->header ) ); + elapsed->elapsed = htons ( ( ( currticks() - dhcpv6->start ) * 100 ) / + TICKS_PER_SEC ); + + /* Sanity check */ + assert ( iob_len ( iobuf ) == total_len ); + + /* Transmit packet */ + if ( ( rc = xfer_deliver_iob ( &dhcpv6->xfer, iobuf ) ) != 0 ) { + DBGC ( dhcpv6, "DHCPv6 %s could not transmit: %s\n", + dhcpv6->netdev->name, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Handle timer expiry + * + * @v timer Retransmission timer + * @v fail Failure indicator + */ +static void dhcpv6_timer_expired ( struct retry_timer *timer, int fail ) { + struct dhcpv6_session *dhcpv6 = + container_of ( timer, struct dhcpv6_session, timer ); + + /* If we have failed, terminate DHCPv6 */ + if ( fail ) { + dhcpv6_finished ( dhcpv6, dhcpv6->rc ); + return; + } + + /* Restart timer */ + start_timer ( &dhcpv6->timer ); + + /* (Re)transmit current request */ + dhcpv6_tx ( dhcpv6 ); +} + +/** + * Receive new data + * + * @v dhcpv6 DHCPv6 session + * @v iobuf I/O buffer + * @v meta Data transfer metadata + * @ret rc Return status code + */ +static int dhcpv6_rx ( struct dhcpv6_session *dhcpv6, + struct io_buffer *iobuf, + struct xfer_metadata *meta ) { + struct settings *parent = netdev_settings ( dhcpv6->netdev ); + struct sockaddr_in6 *src = ( ( struct sockaddr_in6 * ) meta->src ); + struct dhcpv6_header *dhcphdr = iobuf->data; + struct dhcpv6_option_list options; + const union dhcpv6_any_option *option; + int rc; + + /* Sanity checks */ + if ( iob_len ( iobuf ) < sizeof ( *dhcphdr ) ) { + DBGC ( dhcpv6, "DHCPv6 %s received packet too short (%zd " + "bytes, min %zd bytes)\n", dhcpv6->netdev->name, + iob_len ( iobuf ), sizeof ( *dhcphdr ) ); + rc = -EINVAL; + goto done; + } + assert ( src != NULL ); + assert ( src->sin6_family == AF_INET6 ); + DBGC ( dhcpv6, "DHCPv6 %s received %s from %s\n", + dhcpv6->netdev->name, dhcpv6_type_name ( dhcphdr->type ), + inet6_ntoa ( &src->sin6_addr ) ); + + /* Construct option list */ + options.data = dhcphdr->options; + options.len = ( iob_len ( iobuf ) - + offsetof ( typeof ( *dhcphdr ), options ) ); + + /* Verify client identifier */ + if ( ( rc = dhcpv6_check_duid ( &options, DHCPV6_CLIENT_ID, + &dhcpv6->client_duid, + sizeof ( dhcpv6->client_duid ) ) ) !=0){ + DBGC ( dhcpv6, "DHCPv6 %s received %s without correct client " + "ID: %s\n", dhcpv6->netdev->name, + dhcpv6_type_name ( dhcphdr->type ), strerror ( rc ) ); + goto done; + } + + /* Verify server identifier, if applicable */ + if ( dhcpv6->server_duid && + ( ( rc = dhcpv6_check_duid ( &options, DHCPV6_SERVER_ID, + dhcpv6->server_duid, + dhcpv6->server_duid_len ) ) != 0 ) ) { + DBGC ( dhcpv6, "DHCPv6 %s received %s without correct server " + "ID: %s\n", dhcpv6->netdev->name, + dhcpv6_type_name ( dhcphdr->type ), strerror ( rc ) ); + goto done; + } + + /* Check message type */ + if ( dhcphdr->type != dhcpv6->state->rx_type ) { + DBGC ( dhcpv6, "DHCPv6 %s received %s while expecting %s\n", + dhcpv6->netdev->name, dhcpv6_type_name ( dhcphdr->type ), + dhcpv6_type_name ( dhcpv6->state->rx_type ) ); + rc = -ENOTTY; + goto done; + } + + /* Fetch status code, if present */ + if ( ( rc = dhcpv6_status_code ( &options ) ) != 0 ) { + DBGC ( dhcpv6, "DHCPv6 %s received %s with error status: %s\n", + dhcpv6->netdev->name, dhcpv6_type_name ( dhcphdr->type ), + strerror ( rc ) ); + /* This is plausibly the error we want to return */ + dhcpv6->rc = rc; + goto done; + } + + /* Record identity association address, if applicable */ + if ( dhcpv6->state->flags & DHCPV6_RX_RECORD_IAADDR ) { + if ( ( rc = dhcpv6_iaaddr ( &options, dhcpv6->iaid, + &dhcpv6->lease ) ) != 0 ) { + DBGC ( dhcpv6, "DHCPv6 %s received %s with unusable " + "IAADDR: %s\n", dhcpv6->netdev->name, + dhcpv6_type_name ( dhcphdr->type ), + strerror ( rc ) ); + /* This is plausibly the error we want to return */ + dhcpv6->rc = rc; + goto done; + } + DBGC ( dhcpv6, "DHCPv6 %s received %s is for %s\n", + dhcpv6->netdev->name, dhcpv6_type_name ( dhcphdr->type ), + inet6_ntoa ( &dhcpv6->lease ) ); + } + + /* Record server ID, if applicable */ + if ( dhcpv6->state->flags & DHCPV6_RX_RECORD_SERVER_ID ) { + assert ( dhcpv6->server_duid == NULL ); + option = dhcpv6_option ( &options, DHCPV6_SERVER_ID ); + if ( ! option ) { + DBGC ( dhcpv6, "DHCPv6 %s received %s missing server " + "ID\n", dhcpv6->netdev->name, + dhcpv6_type_name ( dhcphdr->type ) ); + rc = -EINVAL; + goto done; + } + dhcpv6->server_duid_len = ntohs ( option->duid.header.len ); + dhcpv6->server_duid = malloc ( dhcpv6->server_duid_len ); + if ( ! dhcpv6->server_duid ) { + rc = -ENOMEM; + goto done; + } + memcpy ( dhcpv6->server_duid, option->duid.duid, + dhcpv6->server_duid_len ); + } + + /* Apply identity association address, if applicable */ + if ( dhcpv6->state->flags & DHCPV6_RX_APPLY_IAADDR ) { + if ( ( rc = ipv6_set_address ( dhcpv6->netdev, + &dhcpv6->lease ) ) != 0 ) { + DBGC ( dhcpv6, "DHCPv6 %s could not apply %s: %s\n", + dhcpv6->netdev->name, + inet6_ntoa ( &dhcpv6->lease ), strerror ( rc ) ); + /* This is plausibly the error we want to return */ + dhcpv6->rc = rc; + goto done; + } + } + + /* Transition to next state or complete DHCPv6, as applicable */ + if ( dhcpv6->state->next ) { + + /* Transition to next state */ + dhcpv6_set_state ( dhcpv6, dhcpv6->state->next ); + rc = 0; + + } else { + + /* Register settings */ + if ( ( rc = dhcpv6_register ( &options, parent ) ) != 0 ) { + DBGC ( dhcpv6, "DHCPv6 %s could not register " + "settings: %s\n", dhcpv6->netdev->name, + strerror ( rc ) ); + goto done; + } + + /* Mark as complete */ + dhcpv6_finished ( dhcpv6, 0 ); + DBGC ( dhcpv6, "DHCPv6 %s complete\n", dhcpv6->netdev->name ); + } + + done: + free_iob ( iobuf ); + return rc; +} + +/** DHCPv6 job control interface operations */ +static struct interface_operation dhcpv6_job_op[] = { + INTF_OP ( intf_close, struct dhcpv6_session *, dhcpv6_finished ), +}; + +/** DHCPv6 job control interface descriptor */ +static struct interface_descriptor dhcpv6_job_desc = + INTF_DESC ( struct dhcpv6_session, job, dhcpv6_job_op ); + +/** DHCPv6 data transfer interface operations */ +static struct interface_operation dhcpv6_xfer_op[] = { + INTF_OP ( xfer_deliver, struct dhcpv6_session *, dhcpv6_rx ), +}; + +/** DHCPv6 data transfer interface descriptor */ +static struct interface_descriptor dhcpv6_xfer_desc = + INTF_DESC ( struct dhcpv6_session, xfer, dhcpv6_xfer_op ); + +/** + * Start DHCPv6 + * + * @v job Job control interface + * @v netdev Network device + * @v stateful Perform stateful address autoconfiguration + * @ret rc Return status code + */ +int start_dhcpv6 ( struct interface *job, struct net_device *netdev, + int stateful ) { + struct ll_protocol *ll_protocol = netdev->ll_protocol; + struct dhcpv6_session *dhcpv6; + struct { + union { + struct sockaddr_in6 sin6; + struct sockaddr sa; + } client; + union { + struct sockaddr_in6 sin6; + struct sockaddr sa; + } server; + } addresses; + uint32_t xid; + int len; + int rc; + + /* Allocate and initialise structure */ + dhcpv6 = zalloc ( sizeof ( *dhcpv6 ) ); + if ( ! dhcpv6 ) + return -ENOMEM; + ref_init ( &dhcpv6->refcnt, dhcpv6_free ); + intf_init ( &dhcpv6->job, &dhcpv6_job_desc, &dhcpv6->refcnt ); + intf_init ( &dhcpv6->xfer, &dhcpv6_xfer_desc, &dhcpv6->refcnt ); + dhcpv6->netdev = netdev_get ( netdev ); + xid = random(); + memcpy ( dhcpv6->xid, &xid, sizeof ( dhcpv6->xid ) ); + dhcpv6->start = currticks(); + timer_init ( &dhcpv6->timer, dhcpv6_timer_expired, &dhcpv6->refcnt ); + + /* Construct client and server addresses */ + memset ( &addresses, 0, sizeof ( addresses ) ); + addresses.client.sin6.sin6_family = AF_INET6; + addresses.client.sin6.sin6_port = htons ( DHCPV6_CLIENT_PORT ); + addresses.server.sin6.sin6_family = AF_INET6; + ipv6_all_dhcp_relay_and_servers ( &addresses.server.sin6.sin6_addr ); + addresses.server.sin6.sin6_scope_id = netdev->index; + addresses.server.sin6.sin6_port = htons ( DHCPV6_SERVER_PORT ); + + /* Construct client DUID from system UUID */ + dhcpv6->client_duid.type = htons ( DHCPV6_DUID_UUID ); + if ( ( len = fetch_uuid_setting ( NULL, &uuid_setting, + &dhcpv6->client_duid.uuid ) ) < 0 ) { + rc = len; + DBGC ( dhcpv6, "DHCPv6 %s could not create DUID-UUID: %s\n", + dhcpv6->netdev->name, strerror ( rc ) ); + goto err_client_duid; + } + + /* Construct IAID from link-layer address */ + dhcpv6->iaid = crc32_le ( 0, netdev->ll_addr, ll_protocol->ll_addr_len); + DBGC ( dhcpv6, "DHCPv6 %s has XID %02x%02x%02x\n", dhcpv6->netdev->name, + dhcpv6->xid[0], dhcpv6->xid[1], dhcpv6->xid[2] ); + + /* Enter initial state */ + dhcpv6_set_state ( dhcpv6, ( stateful ? &dhcpv6_solicit : + &dhcpv6_information_request ) ); + + /* Open socket */ + if ( ( rc = xfer_open_socket ( &dhcpv6->xfer, SOCK_DGRAM, + &addresses.server.sa, + &addresses.client.sa ) ) != 0 ) { + DBGC ( dhcpv6, "DHCPv6 %s could not open socket: %s\n", + dhcpv6->netdev->name, strerror ( rc ) ); + goto err_open_socket; + } + + /* Attach parent interface, mortalise self, and return */ + intf_plug_plug ( &dhcpv6->job, job ); + ref_put ( &dhcpv6->refcnt ); + return 0; + + err_open_socket: + dhcpv6_finished ( dhcpv6, rc ); + err_client_duid: + ref_put ( &dhcpv6->refcnt ); + return rc; +} + +/** Boot filename setting */ +const struct setting filename6_setting __setting ( SETTING_BOOT, filename ) = { + .name = "filename", + .description = "Boot filename", + .tag = DHCPV6_BOOTFILE_URL, + .type = &setting_type_string, + .scope = &ipv6_scope, +}; + +/** DNS search list setting */ +const struct setting dnssl6_setting __setting ( SETTING_IP_EXTRA, dnssl ) = { + .name = "dnssl", + .description = "DNS search list", + .tag = DHCPV6_DOMAIN_LIST, + .type = &setting_type_dnssl, + .scope = &ipv6_scope, +}; diff --git a/qemu/roms/ipxe/src/net/udp/dns.c b/qemu/roms/ipxe/src/net/udp/dns.c new file mode 100644 index 000000000..fffe6e697 --- /dev/null +++ b/qemu/roms/ipxe/src/net/udp/dns.c @@ -0,0 +1,1152 @@ +/* + * Copyright (C) 2006 Michael Brown <mbrown@fensystems.co.uk>. + * + * Portions copyright (C) 2004 Anselm M. Hoffmeister + * <stockholm@users.sourceforge.net>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <ctype.h> +#include <errno.h> +#include <byteswap.h> +#include <ipxe/refcnt.h> +#include <ipxe/iobuf.h> +#include <ipxe/xfer.h> +#include <ipxe/open.h> +#include <ipxe/resolv.h> +#include <ipxe/retry.h> +#include <ipxe/tcpip.h> +#include <ipxe/settings.h> +#include <ipxe/features.h> +#include <ipxe/dhcp.h> +#include <ipxe/dhcpv6.h> +#include <ipxe/dns.h> + +/** @file + * + * DNS protocol + * + */ + +FEATURE ( FEATURE_PROTOCOL, "DNS", DHCP_EB_FEATURE_DNS, 1 ); + +/* Disambiguate the various error causes */ +#define ENXIO_NO_RECORD __einfo_error ( EINFO_ENXIO_NO_RECORD ) +#define EINFO_ENXIO_NO_RECORD \ + __einfo_uniqify ( EINFO_ENXIO, 0x01, "DNS name does not exist" ) +#define ENXIO_NO_NAMESERVER __einfo_error ( EINFO_ENXIO_NO_NAMESERVER ) +#define EINFO_ENXIO_NO_NAMESERVER \ + __einfo_uniqify ( EINFO_ENXIO, 0x02, "No DNS servers available" ) + +/** The DNS server */ +static union { + struct sockaddr sa; + struct sockaddr_tcpip st; + struct sockaddr_in sin; + struct sockaddr_in6 sin6; +} nameserver = { + .st = { + .st_port = htons ( DNS_PORT ), + }, +}; + +/** The DNS search list */ +static struct dns_name dns_search; + +/** + * Encode a DNS name using RFC1035 encoding + * + * @v string DNS name as a string + * @v name DNS name to fill in + * @ret len Length of DNS name, or negative error + */ +int dns_encode ( const char *string, struct dns_name *name ) { + uint8_t *start = ( name->data + name->offset ); + uint8_t *end = ( name->data + name->len ); + uint8_t *dst = start; + size_t len = 0; + char c; + + /* Encode name */ + while ( ( c = *(string++) ) ) { + + /* Handle '.' separators */ + if ( c == '.' ) { + + /* Reject consecutive '.' */ + if ( ( len == 0 ) && ( dst != start ) ) + return -EINVAL; + + /* Terminate if this is the trailing '.' */ + if ( *string == '\0' ) + break; + + /* Reject initial non-terminating '.' */ + if ( len == 0 ) + return -EINVAL; + + /* Reset length */ + len = 0; + + } else { + + /* Increment length */ + len++; + + /* Check for overflow */ + if ( len > DNS_MAX_LABEL_LEN ) + return -EINVAL; + } + + /* Copy byte, update length */ + if ( ++dst < end ) { + *dst = c; + dst[-len] = len; + } + } + + /* Add terminating root marker */ + if ( len ) + dst++; + if ( dst < end ) + *dst = '\0'; + dst++; + + return ( dst - start ); +} + +/** + * Find start of valid label within an RFC1035-encoded DNS name + * + * @v name DNS name + * @v offset Current offset + * @ret offset Offset of label, or negative error + */ +static int dns_label ( struct dns_name *name, size_t offset ) { + const uint8_t *byte; + const uint16_t *word; + size_t len; + size_t ptr; + + while ( 1 ) { + + /* Fail if we have overrun the DNS name */ + if ( ( offset + sizeof ( *byte) ) > name->len ) + return -EINVAL; + byte = ( name->data + offset ); + + /* Follow compression pointer, if applicable */ + if ( DNS_IS_COMPRESSED ( *byte ) ) { + + /* Fail if we have overrun the DNS name */ + if ( ( offset + sizeof ( *word ) ) > name->len ) + return -EINVAL; + word = ( name->data + offset ); + + /* Extract pointer to new offset */ + ptr = DNS_COMPRESSED_OFFSET ( ntohs ( *word ) ); + + /* Fail if pointer does not point backwards. + * (This guarantees termination of the + * function.) + */ + if ( ptr >= offset ) + return -EINVAL; + + /* Continue from new offset */ + offset = ptr; + continue; + } + + /* Fail if we have overrun the DNS name */ + len = *byte; + if ( ( offset + sizeof ( *byte ) + len ) > name->len ) + return -EINVAL; + + /* We have a valid label */ + return offset; + } +} + +/** + * Decode RFC1035-encoded DNS name + * + * @v name DNS name + * @v data Output buffer + * @v len Length of output buffer + * @ret len Length of decoded DNS name, or negative error + */ +int dns_decode ( struct dns_name *name, char *data, size_t len ) { + unsigned int recursion_limit = name->len; /* Generous upper bound */ + int offset = name->offset; + const uint8_t *label; + size_t decoded_len = 0; + size_t label_len; + size_t copy_len; + + while ( recursion_limit-- ) { + + /* Find valid DNS label */ + offset = dns_label ( name, offset ); + if ( offset < 0 ) + return offset; + + /* Terminate if we have reached the root */ + label = ( name->data + offset ); + label_len = *(label++); + if ( label_len == 0 ) { + if ( decoded_len < len ) + *data = '\0'; + return decoded_len; + } + + /* Prepend '.' if applicable */ + if ( decoded_len && ( decoded_len++ < len ) ) + *(data++) = '.'; + + /* Copy label to output buffer */ + copy_len = ( ( decoded_len < len ) ? ( len - decoded_len ) : 0); + if ( copy_len > label_len ) + copy_len = label_len; + memcpy ( data, label, copy_len ); + data += copy_len; + decoded_len += label_len; + + /* Move to next label */ + offset += ( sizeof ( *label ) + label_len ); + } + + /* Recursion limit exceeded */ + return -EINVAL; +} + +/** + * Compare DNS names for equality + * + * @v first First DNS name + * @v second Second DNS name + * @ret rc Return status code + */ +int dns_compare ( struct dns_name *first, struct dns_name *second ) { + unsigned int recursion_limit = first->len; /* Generous upper bound */ + int first_offset = first->offset; + int second_offset = second->offset; + const uint8_t *first_label; + const uint8_t *second_label; + size_t label_len; + size_t len; + + while ( recursion_limit-- ) { + + /* Find valid DNS labels */ + first_offset = dns_label ( first, first_offset ); + if ( first_offset < 0 ) + return first_offset; + second_offset = dns_label ( second, second_offset ); + if ( second_offset < 0 ) + return second_offset; + + /* Compare label lengths */ + first_label = ( first->data + first_offset ); + second_label = ( second->data + second_offset ); + label_len = *(first_label++); + if ( label_len != *(second_label++) ) + return -ENOENT; + len = ( sizeof ( *first_label ) + label_len ); + + /* Terminate if we have reached the root */ + if ( label_len == 0 ) + return 0; + + /* Compare label contents (case-insensitively) */ + while ( label_len-- ) { + if ( tolower ( *(first_label++) ) != + tolower ( *(second_label++) ) ) + return -ENOENT; + } + + /* Move to next labels */ + first_offset += len; + second_offset += len; + } + + /* Recursion limit exceeded */ + return -EINVAL; +} + +/** + * Copy a DNS name + * + * @v src Source DNS name + * @v dst Destination DNS name + * @ret len Length of copied DNS name, or negative error + */ +int dns_copy ( struct dns_name *src, struct dns_name *dst ) { + unsigned int recursion_limit = src->len; /* Generous upper bound */ + int src_offset = src->offset; + size_t dst_offset = dst->offset; + const uint8_t *label; + size_t label_len; + size_t copy_len; + size_t len; + + while ( recursion_limit-- ) { + + /* Find valid DNS label */ + src_offset = dns_label ( src, src_offset ); + if ( src_offset < 0 ) + return src_offset; + + /* Copy as an uncompressed label */ + label = ( src->data + src_offset ); + label_len = *label; + len = ( sizeof ( *label ) + label_len ); + copy_len = ( ( dst_offset < dst->len ) ? + ( dst->len - dst_offset ) : 0 ); + if ( copy_len > len ) + copy_len = len; + memcpy ( ( dst->data + dst_offset ), label, copy_len ); + src_offset += len; + dst_offset += len; + + /* Terminate if we have reached the root */ + if ( label_len == 0 ) + return ( dst_offset - dst->offset ); + } + + /* Recursion limit exceeded */ + return -EINVAL; +} + +/** + * Skip RFC1035-encoded DNS name + * + * @v name DNS name + * @ret offset Offset to next name, or negative error + */ +int dns_skip ( struct dns_name *name ) { + unsigned int recursion_limit = name->len; /* Generous upper bound */ + int offset = name->offset; + int prev_offset; + const uint8_t *label; + size_t label_len; + + while ( recursion_limit-- ) { + + /* Find valid DNS label */ + prev_offset = offset; + offset = dns_label ( name, prev_offset ); + if ( offset < 0 ) + return offset; + + /* Terminate if we have reached a compression pointer */ + if ( offset != prev_offset ) + return ( prev_offset + sizeof ( uint16_t ) ); + + /* Skip this label */ + label = ( name->data + offset ); + label_len = *label; + offset += ( sizeof ( *label ) + label_len ); + + /* Terminate if we have reached the root */ + if ( label_len == 0 ) + return offset; + } + + /* Recursion limit exceeded */ + return -EINVAL; +} + +/** + * Skip RFC1035-encoded DNS name in search list + * + * @v name DNS name + * @ret offset Offset to next non-empty name, or negative error + */ +static int dns_skip_search ( struct dns_name *name ) { + int offset; + + /* Find next name */ + offset = dns_skip ( name ); + if ( offset < 0 ) + return offset; + + /* Skip over any subsequent empty names (e.g. due to padding + * bytes used in the NDP DNSSL option). + */ + while ( ( offset < ( ( int ) name->len ) ) && + ( *( ( uint8_t * ) ( name->data + offset ) ) == 0 ) ) { + offset++; + } + + return offset; +} + +/** + * Transcribe DNS name (for debugging) + * + * @v name DNS name + * @ret string Transcribed DNS name + */ +static const char * dns_name ( struct dns_name *name ) { + static char buf[256]; + int len; + + len = dns_decode ( name, buf, sizeof ( buf ) ); + return ( ( len < 0 ) ? "<INVALID>" : buf ); +} + +/** + * Name a DNS query type (for debugging) + * + * @v type Query type (in network byte order) + * @ret name Type name + */ +static const char * dns_type ( uint16_t type ) { + switch ( type ) { + case htons ( DNS_TYPE_A ): return "A"; + case htons ( DNS_TYPE_AAAA ): return "AAAA"; + case htons ( DNS_TYPE_CNAME ): return "CNAME"; + default: return "<UNKNOWN>"; + } +} + +/** A DNS request */ +struct dns_request { + /** Reference counter */ + struct refcnt refcnt; + /** Name resolution interface */ + struct interface resolv; + /** Data transfer interface */ + struct interface socket; + /** Retry timer */ + struct retry_timer timer; + + /** Socket address to fill in with resolved address */ + union { + struct sockaddr sa; + struct sockaddr_in sin; + struct sockaddr_in6 sin6; + } address; + /** Initial query type */ + uint16_t qtype; + /** Buffer for current query */ + struct { + /** Query header */ + struct dns_header query; + /** Name buffer */ + char name[DNS_MAX_NAME_LEN]; + /** Space for question */ + struct dns_question padding; + } __attribute__ (( packed )) buf; + /** Current query name */ + struct dns_name name; + /** Question within current query */ + struct dns_question *question; + /** Length of current query */ + size_t len; + /** Offset of search suffix within current query */ + size_t offset; + /** Search list */ + struct dns_name search; + /** Recursion counter */ + unsigned int recursion; +}; + +/** + * Mark DNS request as complete + * + * @v dns DNS request + * @v rc Return status code + */ +static void dns_done ( struct dns_request *dns, int rc ) { + + /* Stop the retry timer */ + stop_timer ( &dns->timer ); + + /* Shut down interfaces */ + intf_shutdown ( &dns->socket, rc ); + intf_shutdown ( &dns->resolv, rc ); +} + +/** + * Mark DNS request as resolved and complete + * + * @v dns DNS request + * @v rc Return status code + */ +static void dns_resolved ( struct dns_request *dns ) { + + DBGC ( dns, "DNS %p found address %s\n", + dns, sock_ntoa ( &dns->address.sa ) ); + + /* Return resolved address */ + resolv_done ( &dns->resolv, &dns->address.sa ); + + /* Mark operation as complete */ + dns_done ( dns, 0 ); +} + +/** + * Construct DNS question + * + * @v dns DNS request + * @ret rc Return status code + */ +static int dns_question ( struct dns_request *dns ) { + static struct dns_name search_root = { + .data = "", + .len = 1, + }; + struct dns_name *search = &dns->search; + int len; + size_t offset; + + /* Use root suffix if search list is empty */ + if ( search->offset == search->len ) + search = &search_root; + + /* Overwrite current suffix */ + dns->name.offset = dns->offset; + len = dns_copy ( search, &dns->name ); + if ( len < 0 ) + return len; + + /* Sanity check */ + offset = ( dns->name.offset + len ); + if ( offset > dns->name.len ) { + DBGC ( dns, "DNS %p name is too long\n", dns ); + return -EINVAL; + } + + /* Construct question */ + dns->question = ( ( ( void * ) &dns->buf ) + offset ); + dns->question->qtype = dns->qtype; + dns->question->qclass = htons ( DNS_CLASS_IN ); + + /* Store length */ + dns->len = ( offset + sizeof ( *(dns->question) ) ); + + /* Restore name */ + dns->name.offset = offsetof ( typeof ( dns->buf ), name ); + + DBGC2 ( dns, "DNS %p question is %s type %s\n", dns, + dns_name ( &dns->name ), dns_type ( dns->question->qtype ) ); + + return 0; +} + +/** + * Send DNS query + * + * @v dns DNS request + * @ret rc Return status code + */ +static int dns_send_packet ( struct dns_request *dns ) { + struct dns_header *query = &dns->buf.query; + + /* Start retransmission timer */ + start_timer ( &dns->timer ); + + /* Generate query identifier */ + query->id = random(); + + /* Send query */ + DBGC ( dns, "DNS %p sending query ID %#04x for %s type %s\n", dns, + ntohs ( query->id ), dns_name ( &dns->name ), + dns_type ( dns->question->qtype ) ); + + /* Send the data */ + return xfer_deliver_raw ( &dns->socket, query, dns->len ); +} + +/** + * Handle DNS retransmission timer expiry + * + * @v timer Retry timer + * @v fail Failure indicator + */ +static void dns_timer_expired ( struct retry_timer *timer, int fail ) { + struct dns_request *dns = + container_of ( timer, struct dns_request, timer ); + + if ( fail ) { + dns_done ( dns, -ETIMEDOUT ); + } else { + dns_send_packet ( dns ); + } +} + +/** + * Receive new data + * + * @v dns DNS request + * @v iobuf I/O buffer + * @v meta Data transfer metadata + * @ret rc Return status code + */ +static int dns_xfer_deliver ( struct dns_request *dns, + struct io_buffer *iobuf, + struct xfer_metadata *meta __unused ) { + struct dns_header *response = iobuf->data; + struct dns_header *query = &dns->buf.query; + unsigned int qtype = dns->question->qtype; + struct dns_name buf; + union dns_rr *rr; + int offset; + size_t answer_offset; + size_t next_offset; + size_t rdlength; + size_t name_len; + int rc; + + /* Sanity check */ + if ( iob_len ( iobuf ) < sizeof ( *response ) ) { + DBGC ( dns, "DNS %p received underlength packet length %zd\n", + dns, iob_len ( iobuf ) ); + rc = -EINVAL; + goto done; + } + + /* Check response ID matches query ID */ + if ( response->id != query->id ) { + DBGC ( dns, "DNS %p received unexpected response ID %#04x " + "(wanted %d)\n", dns, ntohs ( response->id ), + ntohs ( query->id ) ); + rc = -EINVAL; + goto done; + } + DBGC ( dns, "DNS %p received response ID %#04x\n", + dns, ntohs ( response->id ) ); + + /* Check that we have exactly one question */ + if ( response->qdcount != htons ( 1 ) ) { + DBGC ( dns, "DNS %p received response with %d questions\n", + dns, ntohs ( response->qdcount ) ); + rc = -EINVAL; + goto done; + } + + /* Skip question section */ + buf.data = iobuf->data; + buf.offset = sizeof ( *response ); + buf.len = iob_len ( iobuf ); + offset = dns_skip ( &buf ); + if ( offset < 0 ) { + rc = offset; + DBGC ( dns, "DNS %p received response with malformed " + "question: %s\n", dns, strerror ( rc ) ); + goto done; + } + answer_offset = ( offset + sizeof ( struct dns_question ) ); + + /* Search through response for useful answers. Do this + * multiple times, to take advantage of useful nameservers + * which send us e.g. the CNAME *and* the A record for the + * pointed-to name. + */ + for ( buf.offset = answer_offset ; buf.offset != buf.len ; + buf.offset = next_offset ) { + + /* Check for valid name */ + offset = dns_skip ( &buf ); + if ( offset < 0 ) { + rc = offset; + DBGC ( dns, "DNS %p received response with malformed " + "answer: %s\n", dns, strerror ( rc ) ); + goto done; + } + + /* Check for sufficient space for resource record */ + rr = ( buf.data + offset ); + if ( ( offset + sizeof ( rr->common ) ) > buf.len ) { + DBGC ( dns, "DNS %p received response with underlength " + "RR\n", dns ); + rc = -EINVAL; + goto done; + } + rdlength = ntohs ( rr->common.rdlength ); + next_offset = ( offset + sizeof ( rr->common ) + rdlength ); + if ( next_offset > buf.len ) { + DBGC ( dns, "DNS %p received response with underlength " + "RR\n", dns ); + rc = -EINVAL; + goto done; + } + + /* Skip non-matching names */ + if ( dns_compare ( &buf, &dns->name ) != 0 ) { + DBGC2 ( dns, "DNS %p ignoring response for %s type " + "%s\n", dns, dns_name ( &buf ), + dns_type ( rr->common.type ) ); + continue; + } + + /* Handle answer */ + switch ( rr->common.type ) { + + case htons ( DNS_TYPE_AAAA ): + + /* Found the target AAAA record */ + if ( rdlength < sizeof ( dns->address.sin6.sin6_addr )){ + DBGC ( dns, "DNS %p received response with " + "underlength AAAA\n", dns ); + rc = -EINVAL; + goto done; + } + dns->address.sin6.sin6_family = AF_INET6; + memcpy ( &dns->address.sin6.sin6_addr, + &rr->aaaa.in6_addr, + sizeof ( dns->address.sin6.sin6_addr ) ); + dns_resolved ( dns ); + rc = 0; + goto done; + + case htons ( DNS_TYPE_A ): + + /* Found the target A record */ + if ( rdlength < sizeof ( dns->address.sin.sin_addr ) ) { + DBGC ( dns, "DNS %p received response with " + "underlength A\n", dns ); + rc = -EINVAL; + goto done; + } + dns->address.sin.sin_family = AF_INET; + dns->address.sin.sin_addr = rr->a.in_addr; + dns_resolved ( dns ); + rc = 0; + goto done; + + case htons ( DNS_TYPE_CNAME ): + + /* Terminate the operation if we recurse too far */ + if ( ++dns->recursion > DNS_MAX_CNAME_RECURSION ) { + DBGC ( dns, "DNS %p recursion exceeded\n", + dns ); + rc = -ELOOP; + dns_done ( dns, rc ); + goto done; + } + + /* Found a CNAME record; update query and recurse */ + buf.offset = ( offset + sizeof ( rr->cname ) ); + DBGC ( dns, "DNS %p found CNAME %s\n", + dns, dns_name ( &buf ) ); + dns->search.offset = dns->search.len; + name_len = dns_copy ( &buf, &dns->name ); + dns->offset = ( offsetof ( typeof ( dns->buf ), name ) + + name_len - 1 /* Strip root label */ ); + if ( ( rc = dns_question ( dns ) ) != 0 ) { + dns_done ( dns, rc ); + goto done; + } + next_offset = answer_offset; + break; + + default: + DBGC ( dns, "DNS %p got unknown record type %d\n", + dns, ntohs ( rr->common.type ) ); + break; + } + } + + /* Stop the retry timer. After this point, each code path + * must either restart the timer by calling dns_send_packet(), + * or mark the DNS operation as complete by calling + * dns_done() + */ + stop_timer ( &dns->timer ); + + /* Determine what to do next based on the type of query we + * issued and the response we received + */ + switch ( qtype ) { + + case htons ( DNS_TYPE_AAAA ): + /* We asked for an AAAA record and got nothing; try + * the A. + */ + DBGC ( dns, "DNS %p found no AAAA record; trying A\n", dns ); + dns->question->qtype = htons ( DNS_TYPE_A ); + dns_send_packet ( dns ); + rc = 0; + goto done; + + case htons ( DNS_TYPE_A ): + /* We asked for an A record and got nothing; + * try the CNAME. + */ + DBGC ( dns, "DNS %p found no A record; trying CNAME\n", dns ); + dns->question->qtype = htons ( DNS_TYPE_CNAME ); + dns_send_packet ( dns ); + rc = 0; + goto done; + + case htons ( DNS_TYPE_CNAME ): + /* We asked for a CNAME record. If we got a response + * (i.e. if the next AAAA/A query is already set up), + * then issue it. + */ + if ( qtype == dns->qtype ) { + dns_send_packet ( dns ); + rc = 0; + goto done; + } + + /* If we have already reached the end of the search list, + * then terminate lookup. + */ + if ( dns->search.offset == dns->search.len ) { + DBGC ( dns, "DNS %p found no CNAME record\n", dns ); + rc = -ENXIO_NO_RECORD; + dns_done ( dns, rc ); + goto done; + } + + /* Move to next entry in search list. This can never fail, + * since we have already used this entry. + */ + DBGC ( dns, "DNS %p found no CNAME record; trying next " + "suffix\n", dns ); + dns->search.offset = dns_skip_search ( &dns->search ); + if ( ( rc = dns_question ( dns ) ) != 0 ) { + dns_done ( dns, rc ); + goto done; + } + dns_send_packet ( dns ); + goto done; + + default: + assert ( 0 ); + rc = -EINVAL; + dns_done ( dns, rc ); + goto done; + } + + done: + /* Free I/O buffer */ + free_iob ( iobuf ); + return rc; +} + +/** + * Receive new data + * + * @v dns DNS request + * @v rc Reason for close + */ +static void dns_xfer_close ( struct dns_request *dns, int rc ) { + + if ( ! rc ) + rc = -ECONNABORTED; + + dns_done ( dns, rc ); +} + +/** DNS socket interface operations */ +static struct interface_operation dns_socket_operations[] = { + INTF_OP ( xfer_deliver, struct dns_request *, dns_xfer_deliver ), + INTF_OP ( intf_close, struct dns_request *, dns_xfer_close ), +}; + +/** DNS socket interface descriptor */ +static struct interface_descriptor dns_socket_desc = + INTF_DESC ( struct dns_request, socket, dns_socket_operations ); + +/** DNS resolver interface operations */ +static struct interface_operation dns_resolv_op[] = { + INTF_OP ( intf_close, struct dns_request *, dns_done ), +}; + +/** DNS resolver interface descriptor */ +static struct interface_descriptor dns_resolv_desc = + INTF_DESC ( struct dns_request, resolv, dns_resolv_op ); + +/** + * Resolve name using DNS + * + * @v resolv Name resolution interface + * @v name Name to resolve + * @v sa Socket address to fill in + * @ret rc Return status code + */ +static int dns_resolv ( struct interface *resolv, + const char *name, struct sockaddr *sa ) { + struct dns_request *dns; + struct dns_header *query; + size_t search_len; + int name_len; + int rc; + + /* Fail immediately if no DNS servers */ + if ( ! nameserver.sa.sa_family ) { + DBG ( "DNS not attempting to resolve \"%s\": " + "no DNS servers\n", name ); + rc = -ENXIO_NO_NAMESERVER; + goto err_no_nameserver; + } + + /* Determine whether or not to use search list */ + search_len = ( strchr ( name, '.' ) ? 0 : dns_search.len ); + + /* Allocate DNS structure */ + dns = zalloc ( sizeof ( *dns ) + search_len ); + if ( ! dns ) { + rc = -ENOMEM; + goto err_alloc_dns; + } + ref_init ( &dns->refcnt, NULL ); + intf_init ( &dns->resolv, &dns_resolv_desc, &dns->refcnt ); + intf_init ( &dns->socket, &dns_socket_desc, &dns->refcnt ); + timer_init ( &dns->timer, dns_timer_expired, &dns->refcnt ); + memcpy ( &dns->address.sa, sa, sizeof ( dns->address.sa ) ); + dns->search.data = ( ( ( void * ) dns ) + sizeof ( *dns ) ); + dns->search.len = search_len; + memcpy ( dns->search.data, dns_search.data, search_len ); + + /* Determine initial query type */ + switch ( nameserver.sa.sa_family ) { + case AF_INET: + dns->qtype = htons ( DNS_TYPE_A ); + break; + case AF_INET6: + dns->qtype = htons ( DNS_TYPE_AAAA ); + break; + default: + rc = -ENOTSUP; + goto err_type; + } + + /* Construct query */ + query = &dns->buf.query; + query->flags = htons ( DNS_FLAG_RD ); + query->qdcount = htons ( 1 ); + dns->name.data = &dns->buf; + dns->name.offset = offsetof ( typeof ( dns->buf ), name ); + dns->name.len = offsetof ( typeof ( dns->buf ), padding ); + name_len = dns_encode ( name, &dns->name ); + if ( name_len < 0 ) { + rc = name_len; + goto err_encode; + } + dns->offset = ( offsetof ( typeof ( dns->buf ), name ) + + name_len - 1 /* Strip root label */ ); + if ( ( rc = dns_question ( dns ) ) != 0 ) + goto err_question; + + /* Open UDP connection */ + if ( ( rc = xfer_open_socket ( &dns->socket, SOCK_DGRAM, + &nameserver.sa, NULL ) ) != 0 ) { + DBGC ( dns, "DNS %p could not open socket: %s\n", + dns, strerror ( rc ) ); + goto err_open_socket; + } + + /* Start timer to trigger first packet */ + start_timer_nodelay ( &dns->timer ); + + /* Attach parent interface, mortalise self, and return */ + intf_plug_plug ( &dns->resolv, resolv ); + ref_put ( &dns->refcnt ); + return 0; + + err_open_socket: + err_question: + err_encode: + err_type: + ref_put ( &dns->refcnt ); + err_alloc_dns: + err_no_nameserver: + return rc; +} + +/** DNS name resolver */ +struct resolver dns_resolver __resolver ( RESOLV_NORMAL ) = { + .name = "DNS", + .resolv = dns_resolv, +}; + +/****************************************************************************** + * + * Settings + * + ****************************************************************************** + */ + +/** + * Format DNS search list setting + * + * @v type Setting type + * @v raw Raw setting value + * @v raw_len Length of raw setting value + * @v buf Buffer to contain formatted value + * @v len Length of buffer + * @ret len Length of formatted value, or negative error + */ +static int format_dnssl_setting ( const struct setting_type *type __unused, + const void *raw, size_t raw_len, + char *buf, size_t len ) { + struct dns_name name = { + .data = ( ( void * ) raw ), + .len = raw_len, + }; + size_t remaining = len; + size_t total = 0; + int name_len; + + while ( name.offset < raw_len ) { + + /* Decode name */ + remaining = ( ( total < len ) ? ( len - total ) : 0 ); + name_len = dns_decode ( &name, ( buf + total ), remaining ); + if ( name_len < 0 ) + return name_len; + total += name_len; + + /* Move to next name */ + name.offset = dns_skip_search ( &name ); + + /* Add separator if applicable */ + if ( name.offset != raw_len ) { + if ( total < len ) + buf[total] = ' '; + total++; + } + } + + return total; +} + +/** A DNS search list setting type */ +const struct setting_type setting_type_dnssl __setting_type = { + .name = "dnssl", + .format = format_dnssl_setting, +}; + +/** IPv4 DNS server setting */ +const struct setting dns_setting __setting ( SETTING_IP_EXTRA, dns ) = { + .name = "dns", + .description = "DNS server", + .tag = DHCP_DNS_SERVERS, + .type = &setting_type_ipv4, +}; + +/** IPv6 DNS server setting */ +const struct setting dns6_setting __setting ( SETTING_IP_EXTRA, dns6 ) = { + .name = "dns6", + .description = "DNS server", + .tag = DHCPV6_DNS_SERVERS, + .type = &setting_type_ipv6, + .scope = &ipv6_scope, +}; + +/** DNS search list */ +const struct setting dnssl_setting __setting ( SETTING_IP_EXTRA, dnssl ) = { + .name = "dnssl", + .description = "DNS search list", + .tag = DHCP_DOMAIN_SEARCH, + .type = &setting_type_dnssl, +}; + +/** + * Apply DNS search list + * + */ +static void apply_dns_search ( void ) { + char *localdomain; + int len; + + /* Free existing search list */ + free ( dns_search.data ); + memset ( &dns_search, 0, sizeof ( dns_search ) ); + + /* Fetch DNS search list */ + len = fetch_setting_copy ( NULL, &dnssl_setting, NULL, NULL, + &dns_search.data ); + if ( len >= 0 ) { + dns_search.len = len; + return; + } + + /* If no DNS search list exists, try to fetch the local domain */ + fetch_string_setting_copy ( NULL, &domain_setting, &localdomain ); + if ( localdomain ) { + len = dns_encode ( localdomain, &dns_search ); + if ( len >= 0 ) { + dns_search.data = malloc ( len ); + if ( dns_search.data ) { + dns_search.len = len; + dns_encode ( localdomain, &dns_search ); + } + } + free ( localdomain ); + return; + } +} + +/** + * Apply DNS settings + * + * @ret rc Return status code + */ +static int apply_dns_settings ( void ) { + + /* Fetch DNS server address */ + nameserver.sa.sa_family = 0; + if ( fetch_ipv6_setting ( NULL, &dns6_setting, + &nameserver.sin6.sin6_addr ) >= 0 ) { + nameserver.sin6.sin6_family = AF_INET6; + } else if ( fetch_ipv4_setting ( NULL, &dns_setting, + &nameserver.sin.sin_addr ) >= 0 ) { + nameserver.sin.sin_family = AF_INET; + } + if ( nameserver.sa.sa_family ) { + DBG ( "DNS using nameserver %s\n", + sock_ntoa ( &nameserver.sa ) ); + } + + /* Fetch DNS search list */ + apply_dns_search(); + if ( DBG_LOG && ( dns_search.len != 0 ) ) { + struct dns_name name; + int offset; + + DBG ( "DNS search list:" ); + memcpy ( &name, &dns_search, sizeof ( name ) ); + while ( name.offset != name.len ) { + DBG ( " %s", dns_name ( &name ) ); + offset = dns_skip_search ( &name ); + if ( offset < 0 ) + break; + name.offset = offset; + } + DBG ( "\n" ); + } + + return 0; +} + +/** DNS settings applicator */ +struct settings_applicator dns_applicator __settings_applicator = { + .apply = apply_dns_settings, +}; diff --git a/qemu/roms/ipxe/src/net/udp/slam.c b/qemu/roms/ipxe/src/net/udp/slam.c new file mode 100644 index 000000000..3cb492d73 --- /dev/null +++ b/qemu/roms/ipxe/src/net/udp/slam.c @@ -0,0 +1,757 @@ +/* + * Copyright (C) 2008 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#include <errno.h> +#include <assert.h> +#include <byteswap.h> +#include <ipxe/features.h> +#include <ipxe/iobuf.h> +#include <ipxe/bitmap.h> +#include <ipxe/xfer.h> +#include <ipxe/open.h> +#include <ipxe/uri.h> +#include <ipxe/tcpip.h> +#include <ipxe/timer.h> +#include <ipxe/retry.h> + +/** @file + * + * Scalable Local Area Multicast protocol + * + * The SLAM protocol is supported only by Etherboot; it was designed + * and implemented by Eric Biederman. A server implementation is + * available in contrib/mini-slamd. There does not appear to be any + * documentation beyond a few sparse comments in Etherboot's + * proto_slam.c. + * + * SLAM packets use three types of data field: + * + * Nul : A single NUL (0) byte, used as a list terminator + * + * Raw : A block of raw data + * + * Int : A variable-length integer, in big-endian order. The length + * of the integer is encoded in the most significant three bits. + * + * Packets received by the client have the following layout: + * + * Int : Transaction identifier. This is an opaque value. + * + * Int : Total number of bytes in the transfer. + * + * Int : Block size, in bytes. + * + * Int : Packet sequence number within the transfer (if this packet + * contains data). + * + * Raw : Packet data (if this packet contains data). + * + * Packets transmitted by the client consist of a run-length-encoded + * representation of the received-blocks bitmap, looking something + * like: + * + * Int : Number of consecutive successfully-received packets + * Int : Number of consecutive missing packets + * Int : Number of consecutive successfully-received packets + * Int : Number of consecutive missing packets + * .... + * Nul + * + */ + +FEATURE ( FEATURE_PROTOCOL, "SLAM", DHCP_EB_FEATURE_SLAM, 1 ); + +/** Default SLAM server port */ +#define SLAM_DEFAULT_PORT 10000 + +/** Default SLAM multicast IP address */ +#define SLAM_DEFAULT_MULTICAST_IP \ + ( ( 239 << 24 ) | ( 255 << 16 ) | ( 1 << 8 ) | ( 1 << 0 ) ) + +/** Default SLAM multicast port */ +#define SLAM_DEFAULT_MULTICAST_PORT 10000 + +/** Maximum SLAM header length */ +#define SLAM_MAX_HEADER_LEN ( 7 /* transaction id */ + 7 /* total_bytes */ + \ + 7 /* block_size */ ) + +/** Maximum number of blocks to request per NACK + * + * This is a policy decision equivalent to selecting a TCP window + * size. + */ +#define SLAM_MAX_BLOCKS_PER_NACK 4 + +/** Maximum SLAM NACK length + * + * We only ever send a NACK for a single range of up to @c + * SLAM_MAX_BLOCKS_PER_NACK blocks. + */ +#define SLAM_MAX_NACK_LEN ( 7 /* block */ + 7 /* #blocks */ + 1 /* NUL */ ) + +/** SLAM slave timeout */ +#define SLAM_SLAVE_TIMEOUT ( 1 * TICKS_PER_SEC ) + +/** A SLAM request */ +struct slam_request { + /** Reference counter */ + struct refcnt refcnt; + + /** Data transfer interface */ + struct interface xfer; + /** Unicast socket */ + struct interface socket; + /** Multicast socket */ + struct interface mc_socket; + + /** Master client retry timer */ + struct retry_timer master_timer; + /** Slave client retry timer */ + struct retry_timer slave_timer; + + /** Cached header */ + uint8_t header[SLAM_MAX_HEADER_LEN]; + /** Size of cached header */ + size_t header_len; + /** Total number of bytes in transfer */ + unsigned long total_bytes; + /** Transfer block size */ + unsigned long block_size; + /** Number of blocks in transfer */ + unsigned long num_blocks; + /** Block bitmap */ + struct bitmap bitmap; + /** NACK sent flag */ + int nack_sent; +}; + +/** + * Free a SLAM request + * + * @v refcnt Reference counter + */ +static void slam_free ( struct refcnt *refcnt ) { + struct slam_request *slam = + container_of ( refcnt, struct slam_request, refcnt ); + + bitmap_free ( &slam->bitmap ); + free ( slam ); +} + +/** + * Mark SLAM request as complete + * + * @v slam SLAM request + * @v rc Return status code + */ +static void slam_finished ( struct slam_request *slam, int rc ) { + static const uint8_t slam_disconnect[] = { 0 }; + + DBGC ( slam, "SLAM %p finished with status code %d (%s)\n", + slam, rc, strerror ( rc ) ); + + /* Send a disconnect message if we ever sent anything to the + * server. + */ + if ( slam->nack_sent ) { + xfer_deliver_raw ( &slam->socket, slam_disconnect, + sizeof ( slam_disconnect ) ); + } + + /* Stop the retry timers */ + stop_timer ( &slam->master_timer ); + stop_timer ( &slam->slave_timer ); + + /* Close all data transfer interfaces */ + intf_shutdown ( &slam->socket, rc ); + intf_shutdown ( &slam->mc_socket, rc ); + intf_shutdown ( &slam->xfer, rc ); +} + +/**************************************************************************** + * + * TX datapath + * + */ + +/** + * Add a variable-length value to a SLAM packet + * + * @v slam SLAM request + * @v iobuf I/O buffer + * @v value Value to add + * @ret rc Return status code + * + * Adds a variable-length value to the end of an I/O buffer. Will + * always leave at least one byte of tailroom in the I/O buffer (to + * allow space for the terminating NUL). + */ +static int slam_put_value ( struct slam_request *slam, + struct io_buffer *iobuf, unsigned long value ) { + uint8_t *data; + size_t len; + unsigned int i; + + /* Calculate variable length required to store value. Always + * leave at least one byte in the I/O buffer. + */ + len = ( ( flsl ( value ) + 10 ) / 8 ); + if ( len >= iob_tailroom ( iobuf ) ) { + DBGC2 ( slam, "SLAM %p cannot add %zd-byte value\n", + slam, len ); + return -ENOBUFS; + } + /* There is no valid way within the protocol that we can end + * up trying to push a full-sized long (i.e. without space for + * the length encoding). + */ + assert ( len <= sizeof ( value ) ); + + /* Add value */ + data = iob_put ( iobuf, len ); + for ( i = len ; i-- ; ) { + data[i] = value; + value >>= 8; + } + *data |= ( len << 5 ); + assert ( value == 0 ); + + return 0; +} + +/** + * Send SLAM NACK packet + * + * @v slam SLAM request + * @ret rc Return status code + */ +static int slam_tx_nack ( struct slam_request *slam ) { + struct io_buffer *iobuf; + unsigned long first_block; + unsigned long num_blocks; + uint8_t *nul; + int rc; + + /* Mark NACK as sent, so that we know we have to disconnect later */ + slam->nack_sent = 1; + + /* Allocate I/O buffer */ + iobuf = xfer_alloc_iob ( &slam->socket, SLAM_MAX_NACK_LEN ); + if ( ! iobuf ) { + DBGC ( slam, "SLAM %p could not allocate I/O buffer\n", + slam ); + return -ENOMEM; + } + + /* Construct NACK. We always request only a single packet; + * this allows us to force multicast-TFTP-style flow control + * on the SLAM server, which will otherwise just blast the + * data out as fast as it can. On a gigabit network, without + * RX checksumming, this would inevitably cause packet drops. + */ + first_block = bitmap_first_gap ( &slam->bitmap ); + for ( num_blocks = 1 ; ; num_blocks++ ) { + if ( num_blocks >= SLAM_MAX_BLOCKS_PER_NACK ) + break; + if ( ( first_block + num_blocks ) >= slam->num_blocks ) + break; + if ( bitmap_test ( &slam->bitmap, + ( first_block + num_blocks ) ) ) + break; + } + if ( first_block ) { + DBGCP ( slam, "SLAM %p transmitting NACK for blocks " + "%ld-%ld\n", slam, first_block, + ( first_block + num_blocks - 1 ) ); + } else { + DBGC ( slam, "SLAM %p transmitting initial NACK for blocks " + "0-%ld\n", slam, ( num_blocks - 1 ) ); + } + if ( ( rc = slam_put_value ( slam, iobuf, first_block ) ) != 0 ) + return rc; + if ( ( rc = slam_put_value ( slam, iobuf, num_blocks ) ) != 0 ) + return rc; + nul = iob_put ( iobuf, 1 ); + *nul = 0; + + /* Transmit packet */ + return xfer_deliver_iob ( &slam->socket, iobuf ); +} + +/** + * Handle SLAM master client retry timer expiry + * + * @v timer Master retry timer + * @v fail Failure indicator + */ +static void slam_master_timer_expired ( struct retry_timer *timer, + int fail ) { + struct slam_request *slam = + container_of ( timer, struct slam_request, master_timer ); + + if ( fail ) { + /* Allow timer to stop running. We will terminate the + * connection only if the slave timer times out. + */ + DBGC ( slam, "SLAM %p giving up acting as master client\n", + slam ); + } else { + /* Retransmit NACK */ + start_timer ( timer ); + slam_tx_nack ( slam ); + } +} + +/** + * Handle SLAM slave client retry timer expiry + * + * @v timer Master retry timer + * @v fail Failure indicator + */ +static void slam_slave_timer_expired ( struct retry_timer *timer, + int fail ) { + struct slam_request *slam = + container_of ( timer, struct slam_request, slave_timer ); + + if ( fail ) { + /* Terminate connection */ + slam_finished ( slam, -ETIMEDOUT ); + } else { + /* Try sending a NACK */ + DBGC ( slam, "SLAM %p trying to become master client\n", + slam ); + start_timer ( timer ); + slam_tx_nack ( slam ); + } +} + +/**************************************************************************** + * + * RX datapath + * + */ + +/** + * Read and strip a variable-length value from a SLAM packet + * + * @v slam SLAM request + * @v iobuf I/O buffer + * @v value Value to fill in, or NULL to ignore value + * @ret rc Return status code + * + * Reads a variable-length value from the start of the I/O buffer. + */ +static int slam_pull_value ( struct slam_request *slam, + struct io_buffer *iobuf, + unsigned long *value ) { + uint8_t *data; + size_t len; + + /* Sanity check */ + if ( iob_len ( iobuf ) == 0 ) { + DBGC ( slam, "SLAM %p empty value\n", slam ); + return -EINVAL; + } + + /* Read and verify length of value */ + data = iobuf->data; + len = ( *data >> 5 ); + if ( ( len == 0 ) || + ( value && ( len > sizeof ( *value ) ) ) ) { + DBGC ( slam, "SLAM %p invalid value length %zd bytes\n", + slam, len ); + return -EINVAL; + } + if ( len > iob_len ( iobuf ) ) { + DBGC ( slam, "SLAM %p value extends beyond I/O buffer\n", + slam ); + return -EINVAL; + } + + /* Read value */ + iob_pull ( iobuf, len ); + *value = ( *data & 0x1f ); + while ( --len ) { + *value <<= 8; + *value |= *(++data); + } + + return 0; +} + +/** + * Read and strip SLAM header + * + * @v slam SLAM request + * @v iobuf I/O buffer + * @ret rc Return status code + */ +static int slam_pull_header ( struct slam_request *slam, + struct io_buffer *iobuf ) { + void *header = iobuf->data; + int rc; + + /* If header matches cached header, just pull it and return */ + if ( ( slam->header_len <= iob_len ( iobuf ) ) && + ( memcmp ( slam->header, iobuf->data, slam->header_len ) == 0 )){ + iob_pull ( iobuf, slam->header_len ); + return 0; + } + + DBGC ( slam, "SLAM %p detected changed header; resetting\n", slam ); + + /* Read and strip transaction ID, total number of bytes, and + * block size. + */ + if ( ( rc = slam_pull_value ( slam, iobuf, NULL ) ) != 0 ) + return rc; + if ( ( rc = slam_pull_value ( slam, iobuf, + &slam->total_bytes ) ) != 0 ) + return rc; + if ( ( rc = slam_pull_value ( slam, iobuf, + &slam->block_size ) ) != 0 ) + return rc; + + /* Update the cached header */ + slam->header_len = ( iobuf->data - header ); + assert ( slam->header_len <= sizeof ( slam->header ) ); + memcpy ( slam->header, header, slam->header_len ); + + /* Calculate number of blocks */ + slam->num_blocks = ( ( slam->total_bytes + slam->block_size - 1 ) / + slam->block_size ); + + DBGC ( slam, "SLAM %p has total bytes %ld, block size %ld, num " + "blocks %ld\n", slam, slam->total_bytes, slam->block_size, + slam->num_blocks ); + + /* Discard and reset the bitmap */ + bitmap_free ( &slam->bitmap ); + memset ( &slam->bitmap, 0, sizeof ( slam->bitmap ) ); + + /* Allocate a new bitmap */ + if ( ( rc = bitmap_resize ( &slam->bitmap, + slam->num_blocks ) ) != 0 ) { + /* Failure to allocate a bitmap is fatal */ + DBGC ( slam, "SLAM %p could not allocate bitmap for %ld " + "blocks: %s\n", slam, slam->num_blocks, + strerror ( rc ) ); + slam_finished ( slam, rc ); + return rc; + } + + /* Notify recipient of file size */ + xfer_seek ( &slam->xfer, slam->total_bytes ); + + return 0; +} + +/** + * Receive SLAM data packet + * + * @v slam SLAM request + * @v iobuf I/O buffer + * @ret rc Return status code + */ +static int slam_mc_socket_deliver ( struct slam_request *slam, + struct io_buffer *iobuf, + struct xfer_metadata *rx_meta __unused ) { + struct xfer_metadata meta; + unsigned long packet; + size_t len; + int rc; + + /* Stop the master client timer. Restart the slave client timer. */ + stop_timer ( &slam->master_timer ); + stop_timer ( &slam->slave_timer ); + start_timer_fixed ( &slam->slave_timer, SLAM_SLAVE_TIMEOUT ); + + /* Read and strip packet header */ + if ( ( rc = slam_pull_header ( slam, iobuf ) ) != 0 ) + goto err_discard; + + /* Read and strip packet number */ + if ( ( rc = slam_pull_value ( slam, iobuf, &packet ) ) != 0 ) + goto err_discard; + + /* Sanity check packet number */ + if ( packet >= slam->num_blocks ) { + DBGC ( slam, "SLAM %p received out-of-range packet %ld " + "(num_blocks=%ld)\n", slam, packet, slam->num_blocks ); + rc = -EINVAL; + goto err_discard; + } + + /* Sanity check length */ + len = iob_len ( iobuf ); + if ( len > slam->block_size ) { + DBGC ( slam, "SLAM %p received oversize packet of %zd bytes " + "(block_size=%ld)\n", slam, len, slam->block_size ); + rc = -EINVAL; + goto err_discard; + } + if ( ( packet != ( slam->num_blocks - 1 ) ) && + ( len < slam->block_size ) ) { + DBGC ( slam, "SLAM %p received short packet of %zd bytes " + "(block_size=%ld)\n", slam, len, slam->block_size ); + rc = -EINVAL; + goto err_discard; + } + + /* If we have already seen this packet, discard it */ + if ( bitmap_test ( &slam->bitmap, packet ) ) { + goto discard; + } + + /* Pass to recipient */ + memset ( &meta, 0, sizeof ( meta ) ); + meta.flags = XFER_FL_ABS_OFFSET; + meta.offset = ( packet * slam->block_size ); + if ( ( rc = xfer_deliver ( &slam->xfer, iobuf, &meta ) ) != 0 ) + goto err; + + /* Mark block as received */ + bitmap_set ( &slam->bitmap, packet ); + + /* If we have received all blocks, terminate */ + if ( bitmap_full ( &slam->bitmap ) ) + slam_finished ( slam, 0 ); + + return 0; + + err_discard: + discard: + free_iob ( iobuf ); + err: + return rc; +} + +/** + * Receive SLAM non-data packet + * + * @v slam SLAM request + * @v iobuf I/O buffer + * @ret rc Return status code + */ +static int slam_socket_deliver ( struct slam_request *slam, + struct io_buffer *iobuf, + struct xfer_metadata *rx_meta __unused ) { + int rc; + + /* Restart the master client timer */ + stop_timer ( &slam->master_timer ); + start_timer ( &slam->master_timer ); + + /* Read and strip packet header */ + if ( ( rc = slam_pull_header ( slam, iobuf ) ) != 0 ) + goto discard; + + /* Sanity check */ + if ( iob_len ( iobuf ) != 0 ) { + DBGC ( slam, "SLAM %p received trailing garbage:\n", slam ); + DBGC_HD ( slam, iobuf->data, iob_len ( iobuf ) ); + rc = -EINVAL; + goto discard; + } + + /* Discard packet */ + free_iob ( iobuf ); + + /* Send NACK in reply */ + slam_tx_nack ( slam ); + + return 0; + + discard: + free_iob ( iobuf ); + return rc; + +} + +/** SLAM unicast socket interface operations */ +static struct interface_operation slam_socket_operations[] = { + INTF_OP ( xfer_deliver, struct slam_request *, slam_socket_deliver ), + INTF_OP ( intf_close, struct slam_request *, slam_finished ), +}; + +/** SLAM unicast socket interface descriptor */ +static struct interface_descriptor slam_socket_desc = + INTF_DESC ( struct slam_request, socket, slam_socket_operations ); + +/** SLAM multicast socket interface operations */ +static struct interface_operation slam_mc_socket_operations[] = { + INTF_OP ( xfer_deliver, struct slam_request *, slam_mc_socket_deliver ), + INTF_OP ( intf_close, struct slam_request *, slam_finished ), +}; + +/** SLAM multicast socket interface descriptor */ +static struct interface_descriptor slam_mc_socket_desc = + INTF_DESC ( struct slam_request, mc_socket, slam_mc_socket_operations ); + +/**************************************************************************** + * + * Data transfer interface + * + */ + +/** SLAM data transfer interface operations */ +static struct interface_operation slam_xfer_operations[] = { + INTF_OP ( intf_close, struct slam_request *, slam_finished ), +}; + +/** SLAM data transfer interface descriptor */ +static struct interface_descriptor slam_xfer_desc = + INTF_DESC ( struct slam_request, xfer, slam_xfer_operations ); + +/** + * Parse SLAM URI multicast address + * + * @v slam SLAM request + * @v path Path portion of x-slam:// URI + * @v address Socket address to fill in + * @ret rc Return status code + */ +static int slam_parse_multicast_address ( struct slam_request *slam, + const char *path, + struct sockaddr_in *address ) { + char path_dup[ strlen ( path ) /* no +1 */ ]; + char *sep; + char *end; + + /* Create temporary copy of path, minus the leading '/' */ + assert ( *path == '/' ); + memcpy ( path_dup, ( path + 1 ) , sizeof ( path_dup ) ); + + /* Parse port, if present */ + sep = strchr ( path_dup, ':' ); + if ( sep ) { + *(sep++) = '\0'; + address->sin_port = htons ( strtoul ( sep, &end, 0 ) ); + if ( *end != '\0' ) { + DBGC ( slam, "SLAM %p invalid multicast port " + "\"%s\"\n", slam, sep ); + return -EINVAL; + } + } + + /* Parse address */ + if ( inet_aton ( path_dup, &address->sin_addr ) == 0 ) { + DBGC ( slam, "SLAM %p invalid multicast address \"%s\"\n", + slam, path_dup ); + return -EINVAL; + } + + return 0; +} + +/** + * Initiate a SLAM request + * + * @v xfer Data transfer interface + * @v uri Uniform Resource Identifier + * @ret rc Return status code + */ +static int slam_open ( struct interface *xfer, struct uri *uri ) { + static const struct sockaddr_in default_multicast = { + .sin_family = AF_INET, + .sin_port = htons ( SLAM_DEFAULT_MULTICAST_PORT ), + .sin_addr = { htonl ( SLAM_DEFAULT_MULTICAST_IP ) }, + }; + struct slam_request *slam; + struct sockaddr_tcpip server; + struct sockaddr_in multicast; + int rc; + + /* Sanity checks */ + if ( ! uri->host ) + return -EINVAL; + + /* Allocate and populate structure */ + slam = zalloc ( sizeof ( *slam ) ); + if ( ! slam ) + return -ENOMEM; + ref_init ( &slam->refcnt, slam_free ); + intf_init ( &slam->xfer, &slam_xfer_desc, &slam->refcnt ); + intf_init ( &slam->socket, &slam_socket_desc, &slam->refcnt ); + intf_init ( &slam->mc_socket, &slam_mc_socket_desc, &slam->refcnt ); + timer_init ( &slam->master_timer, slam_master_timer_expired, + &slam->refcnt ); + timer_init ( &slam->slave_timer, slam_slave_timer_expired, + &slam->refcnt ); + /* Fake an invalid cached header of { 0x00, ... } */ + slam->header_len = 1; + /* Fake parameters for initial NACK */ + slam->num_blocks = 1; + if ( ( rc = bitmap_resize ( &slam->bitmap, 1 ) ) != 0 ) { + DBGC ( slam, "SLAM %p could not allocate initial bitmap: " + "%s\n", slam, strerror ( rc ) ); + goto err; + } + + /* Open unicast socket */ + memset ( &server, 0, sizeof ( server ) ); + server.st_port = htons ( uri_port ( uri, SLAM_DEFAULT_PORT ) ); + if ( ( rc = xfer_open_named_socket ( &slam->socket, SOCK_DGRAM, + ( struct sockaddr * ) &server, + uri->host, NULL ) ) != 0 ) { + DBGC ( slam, "SLAM %p could not open unicast socket: %s\n", + slam, strerror ( rc ) ); + goto err; + } + + /* Open multicast socket */ + memcpy ( &multicast, &default_multicast, sizeof ( multicast ) ); + if ( uri->path && + ( ( rc = slam_parse_multicast_address ( slam, uri->path, + &multicast ) ) != 0 ) ) { + goto err; + } + if ( ( rc = xfer_open_socket ( &slam->mc_socket, SOCK_DGRAM, + ( struct sockaddr * ) &multicast, + ( struct sockaddr * ) &multicast ) ) != 0 ) { + DBGC ( slam, "SLAM %p could not open multicast socket: %s\n", + slam, strerror ( rc ) ); + goto err; + } + + /* Start slave retry timer */ + start_timer_fixed ( &slam->slave_timer, SLAM_SLAVE_TIMEOUT ); + + /* Attach to parent interface, mortalise self, and return */ + intf_plug_plug ( &slam->xfer, xfer ); + ref_put ( &slam->refcnt ); + return 0; + + err: + slam_finished ( slam, rc ); + ref_put ( &slam->refcnt ); + return rc; +} + +/** SLAM URI opener */ +struct uri_opener slam_uri_opener __uri_opener = { + .scheme = "x-slam", + .open = slam_open, +}; diff --git a/qemu/roms/ipxe/src/net/udp/syslog.c b/qemu/roms/ipxe/src/net/udp/syslog.c new file mode 100644 index 000000000..d65d19ab8 --- /dev/null +++ b/qemu/roms/ipxe/src/net/udp/syslog.c @@ -0,0 +1,298 @@ +/* + * Copyright (C) 2011 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +/** @file + * + * Syslog protocol + * + */ + +#include <stdint.h> +#include <stdlib.h> +#include <ctype.h> +#include <byteswap.h> +#include <ipxe/xfer.h> +#include <ipxe/open.h> +#include <ipxe/tcpip.h> +#include <ipxe/dhcp.h> +#include <ipxe/dhcpv6.h> +#include <ipxe/settings.h> +#include <ipxe/console.h> +#include <ipxe/lineconsole.h> +#include <ipxe/syslog.h> +#include <config/console.h> + +/* Set default console usage if applicable */ +#if ! ( defined ( CONSOLE_SYSLOG ) && CONSOLE_EXPLICIT ( CONSOLE_SYSLOG ) ) +#undef CONSOLE_SYSLOG +#define CONSOLE_SYSLOG ( CONSOLE_USAGE_ALL & ~CONSOLE_USAGE_TUI ) +#endif + +/** The syslog server */ +static union { + struct sockaddr sa; + struct sockaddr_tcpip st; + struct sockaddr_in sin; + struct sockaddr_in6 sin6; +} logserver = { + .st = { + .st_port = htons ( SYSLOG_PORT ), + }, +}; + +/** Syslog UDP interface operations */ +static struct interface_operation syslogger_operations[] = {}; + +/** Syslog UDP interface descriptor */ +static struct interface_descriptor syslogger_desc = + INTF_DESC_PURE ( syslogger_operations ); + +/** The syslog UDP interface */ +static struct interface syslogger = INTF_INIT ( syslogger_desc ); + +/****************************************************************************** + * + * Console driver + * + ****************************************************************************** + */ + +/** Host name (for log messages) */ +static char *syslog_hostname; + +/** Domain name (for log messages) */ +static char *syslog_domain; + +/** + * Transmit formatted syslog message + * + * @v xfer Data transfer interface + * @v severity Severity + * @v message Message + * @v terminator Message terminator + * @ret rc Return status code + */ +int syslog_send ( struct interface *xfer, unsigned int severity, + const char *message, const char *terminator ) { + const char *hostname = ( syslog_hostname ? syslog_hostname : "" ); + const char *domain = ( ( hostname[0] && syslog_domain ) ? + syslog_domain : "" ); + + return xfer_printf ( xfer, "<%d>%s%s%s%sipxe: %s%s", + SYSLOG_PRIORITY ( SYSLOG_DEFAULT_FACILITY, + severity ), hostname, + ( domain[0] ? "." : "" ), domain, + ( hostname[0] ? " " : "" ), message, terminator ); +} + +/****************************************************************************** + * + * Console driver + * + ****************************************************************************** + */ + +/** Syslog line buffer */ +static char syslog_buffer[SYSLOG_BUFSIZE]; + +/** Syslog severity */ +static unsigned int syslog_severity = SYSLOG_DEFAULT_SEVERITY; + +/** + * Handle ANSI set syslog priority (private sequence) + * + * @v ctx ANSI escape sequence context + * @v count Parameter count + * @v params List of graphic rendition aspects + */ +static void syslog_handle_priority ( struct ansiesc_context *ctx __unused, + unsigned int count __unused, + int params[] ) { + if ( params[0] >= 0 ) { + syslog_severity = params[0]; + } else { + syslog_severity = SYSLOG_DEFAULT_SEVERITY; + } +} + +/** Syslog ANSI escape sequence handlers */ +static struct ansiesc_handler syslog_handlers[] = { + { ANSIESC_LOG_PRIORITY, syslog_handle_priority }, + { 0, NULL } +}; + +/** Syslog line console */ +static struct line_console syslog_line = { + .buffer = syslog_buffer, + .len = sizeof ( syslog_buffer ), + .ctx = { + .handlers = syslog_handlers, + }, +}; + +/** Syslog recursion marker */ +static int syslog_entered; + +/** + * Print a character to syslog console + * + * @v character Character to be printed + */ +static void syslog_putchar ( int character ) { + int rc; + + /* Ignore if we are already mid-logging */ + if ( syslog_entered ) + return; + + /* Fill line buffer */ + if ( line_putchar ( &syslog_line, character ) == 0 ) + return; + + /* Guard against re-entry */ + syslog_entered = 1; + + /* Send log message */ + if ( ( rc = syslog_send ( &syslogger, syslog_severity, + syslog_buffer, "" ) ) != 0 ) { + DBG ( "SYSLOG could not send log message: %s\n", + strerror ( rc ) ); + } + + /* Clear re-entry flag */ + syslog_entered = 0; +} + +/** Syslog console driver */ +struct console_driver syslog_console __console_driver = { + .putchar = syslog_putchar, + .disabled = CONSOLE_DISABLED, + .usage = CONSOLE_SYSLOG, +}; + +/****************************************************************************** + * + * Settings + * + ****************************************************************************** + */ + +/** IPv4 syslog server setting */ +const struct setting syslog_setting __setting ( SETTING_MISC, syslog ) = { + .name = "syslog", + .description = "Syslog server", + .tag = DHCP_LOG_SERVERS, + .type = &setting_type_ipv4, +}; + +/** IPv6 syslog server setting */ +const struct setting syslog6_setting __setting ( SETTING_MISC, syslog6 ) = { + .name = "syslog6", + .description = "Syslog server", + .tag = DHCPV6_LOG_SERVERS, + .type = &setting_type_ipv6, + .scope = &ipv6_scope, +}; + +/** + * Strip invalid characters from host/domain name + * + * @v name Name to strip + */ +static void syslog_fix_name ( char *name ) { + char *fixed = name; + int c; + + /* Do nothing if name does not exist */ + if ( ! name ) + return; + + /* Strip any non-printable or whitespace characters from the name */ + do { + c = *(name++); + *fixed = c; + if ( isprint ( c ) && ! isspace ( c ) ) + fixed++; + } while ( c ); +} + +/** + * Apply syslog settings + * + * @ret rc Return status code + */ +static int apply_syslog_settings ( void ) { + struct sockaddr old_logserver; + int rc; + + /* Fetch hostname and domain name */ + free ( syslog_hostname ); + fetch_string_setting_copy ( NULL, &hostname_setting, &syslog_hostname ); + syslog_fix_name ( syslog_hostname ); + free ( syslog_domain ); + fetch_string_setting_copy ( NULL, &domain_setting, &syslog_domain ); + syslog_fix_name ( syslog_domain ); + + /* Fetch log server */ + syslog_console.disabled = CONSOLE_DISABLED; + memcpy ( &old_logserver, &logserver, sizeof ( old_logserver ) ); + logserver.sa.sa_family = 0; + if ( fetch_ipv6_setting ( NULL, &syslog6_setting, + &logserver.sin6.sin6_addr ) >= 0 ) { + logserver.sin6.sin6_family = AF_INET6; + } else if ( fetch_ipv4_setting ( NULL, &syslog_setting, + &logserver.sin.sin_addr ) >= 0 ) { + logserver.sin.sin_family = AF_INET; + } + if ( logserver.sa.sa_family ) { + syslog_console.disabled = 0; + DBG ( "SYSLOG using log server %s\n", + sock_ntoa ( &logserver.sa ) ); + } + + /* Do nothing unless log server has changed */ + if ( memcmp ( &logserver, &old_logserver, sizeof ( logserver ) ) == 0 ) + return 0; + + /* Reset syslog connection */ + intf_restart ( &syslogger, 0 ); + + /* Do nothing unless we have a log server */ + if ( syslog_console.disabled ) { + DBG ( "SYSLOG has no log server\n" ); + return 0; + } + + /* Connect to log server */ + if ( ( rc = xfer_open_socket ( &syslogger, SOCK_DGRAM, + &logserver.sa, NULL ) ) != 0 ) { + DBG ( "SYSLOG cannot connect to log server: %s\n", + strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** Syslog settings applicator */ +struct settings_applicator syslog_applicator __settings_applicator = { + .apply = apply_syslog_settings, +}; diff --git a/qemu/roms/ipxe/src/net/udp/tftp.c b/qemu/roms/ipxe/src/net/udp/tftp.c new file mode 100644 index 000000000..ee827ae3d --- /dev/null +++ b/qemu/roms/ipxe/src/net/udp/tftp.c @@ -0,0 +1,1236 @@ +/* + * Copyright (C) 2006 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <strings.h> +#include <byteswap.h> +#include <errno.h> +#include <assert.h> +#include <ipxe/refcnt.h> +#include <ipxe/iobuf.h> +#include <ipxe/xfer.h> +#include <ipxe/open.h> +#include <ipxe/uri.h> +#include <ipxe/tcpip.h> +#include <ipxe/retry.h> +#include <ipxe/features.h> +#include <ipxe/bitmap.h> +#include <ipxe/settings.h> +#include <ipxe/dhcp.h> +#include <ipxe/uri.h> +#include <ipxe/tftp.h> + +/** @file + * + * TFTP protocol + * + */ + +FEATURE ( FEATURE_PROTOCOL, "TFTP", DHCP_EB_FEATURE_TFTP, 1 ); + +/* TFTP-specific error codes */ +#define EINVAL_BLKSIZE __einfo_error ( EINFO_EINVAL_BLKSIZE ) +#define EINFO_EINVAL_BLKSIZE __einfo_uniqify \ + ( EINFO_EINVAL, 0x01, "Invalid blksize" ) +#define EINVAL_TSIZE __einfo_error ( EINFO_EINVAL_TSIZE ) +#define EINFO_EINVAL_TSIZE __einfo_uniqify \ + ( EINFO_EINVAL, 0x02, "Invalid tsize" ) +#define EINVAL_MC_NO_PORT __einfo_error ( EINFO_EINVAL_MC_NO_PORT ) +#define EINFO_EINVAL_MC_NO_PORT __einfo_uniqify \ + ( EINFO_EINVAL, 0x03, "Missing multicast port" ) +#define EINVAL_MC_NO_MC __einfo_error ( EINFO_EINVAL_MC_NO_MC ) +#define EINFO_EINVAL_MC_NO_MC __einfo_uniqify \ + ( EINFO_EINVAL, 0x04, "Missing multicast mc" ) +#define EINVAL_MC_INVALID_MC __einfo_error ( EINFO_EINVAL_MC_INVALID_MC ) +#define EINFO_EINVAL_MC_INVALID_MC __einfo_uniqify \ + ( EINFO_EINVAL, 0x05, "Missing multicast IP" ) +#define EINVAL_MC_INVALID_IP __einfo_error ( EINFO_EINVAL_MC_INVALID_IP ) +#define EINFO_EINVAL_MC_INVALID_IP __einfo_uniqify \ + ( EINFO_EINVAL, 0x06, "Invalid multicast IP" ) +#define EINVAL_MC_INVALID_PORT __einfo_error ( EINFO_EINVAL_MC_INVALID_PORT ) +#define EINFO_EINVAL_MC_INVALID_PORT __einfo_uniqify \ + ( EINFO_EINVAL, 0x07, "Invalid multicast port" ) + +/** + * A TFTP request + * + * This data structure holds the state for an ongoing TFTP transfer. + */ +struct tftp_request { + /** Reference count */ + struct refcnt refcnt; + /** Data transfer interface */ + struct interface xfer; + + /** URI being fetched */ + struct uri *uri; + /** Transport layer interface */ + struct interface socket; + /** Multicast transport layer interface */ + struct interface mc_socket; + + /** Data block size + * + * This is the "blksize" option negotiated with the TFTP + * server. (If the TFTP server does not support TFTP options, + * this will default to 512). + */ + unsigned int blksize; + /** File size + * + * This is the value returned in the "tsize" option from the + * TFTP server. If the TFTP server does not support the + * "tsize" option, this value will be zero. + */ + unsigned long tsize; + + /** Server port + * + * This is the port to which RRQ packets are sent. + */ + unsigned int port; + /** Peer address + * + * The peer address is determined by the first response + * received to the TFTP RRQ. + */ + struct sockaddr_tcpip peer; + /** Request flags */ + unsigned int flags; + /** MTFTP timeout count */ + unsigned int mtftp_timeouts; + + /** Block bitmap */ + struct bitmap bitmap; + /** Maximum known length + * + * We don't always know the file length in advance. In + * particular, if the TFTP server doesn't support the tsize + * option, or we are using MTFTP, then we don't know the file + * length until we see the end-of-file block (which, in the + * case of MTFTP, may not be the last block we see). + * + * This value is updated whenever we obtain information about + * the file length. + */ + size_t filesize; + /** Retransmission timer */ + struct retry_timer timer; +}; + +/** TFTP request flags */ +enum { + /** Send ACK packets */ + TFTP_FL_SEND_ACK = 0x0001, + /** Request blksize and tsize options */ + TFTP_FL_RRQ_SIZES = 0x0002, + /** Request multicast option */ + TFTP_FL_RRQ_MULTICAST = 0x0004, + /** Perform MTFTP recovery on timeout */ + TFTP_FL_MTFTP_RECOVERY = 0x0008, + /** Only get filesize and then abort the transfer */ + TFTP_FL_SIZEONLY = 0x0010, +}; + +/** Maximum number of MTFTP open requests before falling back to TFTP */ +#define MTFTP_MAX_TIMEOUTS 3 + +/** + * Free TFTP request + * + * @v refcnt Reference counter + */ +static void tftp_free ( struct refcnt *refcnt ) { + struct tftp_request *tftp = + container_of ( refcnt, struct tftp_request, refcnt ); + + uri_put ( tftp->uri ); + bitmap_free ( &tftp->bitmap ); + free ( tftp ); +} + +/** + * Mark TFTP request as complete + * + * @v tftp TFTP connection + * @v rc Return status code + */ +static void tftp_done ( struct tftp_request *tftp, int rc ) { + + DBGC ( tftp, "TFTP %p finished with status %d (%s)\n", + tftp, rc, strerror ( rc ) ); + + /* Stop the retry timer */ + stop_timer ( &tftp->timer ); + + /* Close all data transfer interfaces */ + intf_shutdown ( &tftp->socket, rc ); + intf_shutdown ( &tftp->mc_socket, rc ); + intf_shutdown ( &tftp->xfer, rc ); +} + +/** + * Reopen TFTP socket + * + * @v tftp TFTP connection + * @ret rc Return status code + */ +static int tftp_reopen ( struct tftp_request *tftp ) { + struct sockaddr_tcpip server; + int rc; + + /* Close socket */ + intf_restart ( &tftp->socket, 0 ); + + /* Disable ACK sending. */ + tftp->flags &= ~TFTP_FL_SEND_ACK; + + /* Reset peer address */ + memset ( &tftp->peer, 0, sizeof ( tftp->peer ) ); + + /* Open socket */ + memset ( &server, 0, sizeof ( server ) ); + server.st_port = htons ( tftp->port ); + if ( ( rc = xfer_open_named_socket ( &tftp->socket, SOCK_DGRAM, + ( struct sockaddr * ) &server, + tftp->uri->host, NULL ) ) != 0 ) { + DBGC ( tftp, "TFTP %p could not open socket: %s\n", + tftp, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Reopen TFTP multicast socket + * + * @v tftp TFTP connection + * @v local Local socket address + * @ret rc Return status code + */ +static int tftp_reopen_mc ( struct tftp_request *tftp, + struct sockaddr *local ) { + int rc; + + /* Close multicast socket */ + intf_restart ( &tftp->mc_socket, 0 ); + + /* Open multicast socket. We never send via this socket, so + * use the local address as the peer address (since the peer + * address cannot be NULL). + */ + if ( ( rc = xfer_open_socket ( &tftp->mc_socket, SOCK_DGRAM, + local, local ) ) != 0 ) { + DBGC ( tftp, "TFTP %p could not open multicast " + "socket: %s\n", tftp, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Presize TFTP receive buffers and block bitmap + * + * @v tftp TFTP connection + * @v filesize Known minimum file size + * @ret rc Return status code + */ +static int tftp_presize ( struct tftp_request *tftp, size_t filesize ) { + unsigned int num_blocks; + int rc; + + /* Do nothing if we are already large enough */ + if ( filesize <= tftp->filesize ) + return 0; + + /* Record filesize */ + tftp->filesize = filesize; + + /* Notify recipient of file size */ + xfer_seek ( &tftp->xfer, filesize ); + xfer_seek ( &tftp->xfer, 0 ); + + /* Calculate expected number of blocks. Note that files whose + * length is an exact multiple of the blocksize will have a + * trailing zero-length block, which must be included. + */ + num_blocks = ( ( filesize / tftp->blksize ) + 1 ); + if ( ( rc = bitmap_resize ( &tftp->bitmap, num_blocks ) ) != 0 ) { + DBGC ( tftp, "TFTP %p could not resize bitmap to %d blocks: " + "%s\n", tftp, num_blocks, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * MTFTP multicast receive address + * + * This is treated as a global configuration parameter. + */ +static struct sockaddr_in tftp_mtftp_socket = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl ( 0xefff0101 ), + .sin_port = htons ( 3001 ), +}; + +/** + * Set MTFTP multicast address + * + * @v address Multicast IPv4 address + */ +void tftp_set_mtftp_address ( struct in_addr address ) { + tftp_mtftp_socket.sin_addr = address; +} + +/** + * Set MTFTP multicast port + * + * @v port Multicast port + */ +void tftp_set_mtftp_port ( unsigned int port ) { + tftp_mtftp_socket.sin_port = htons ( port ); +} + +/** + * Transmit RRQ + * + * @v tftp TFTP connection + * @ret rc Return status code + */ +static int tftp_send_rrq ( struct tftp_request *tftp ) { + const char *path = tftp->uri->path; + struct tftp_rrq *rrq; + size_t len; + struct io_buffer *iobuf; + size_t blksize; + + DBGC ( tftp, "TFTP %p requesting \"%s\"\n", tftp, path ); + + /* Allocate buffer */ + len = ( sizeof ( *rrq ) + strlen ( path ) + 1 /* NUL */ + + 5 + 1 /* "octet" + NUL */ + + 7 + 1 + 5 + 1 /* "blksize" + NUL + ddddd + NUL */ + + 5 + 1 + 1 + 1 /* "tsize" + NUL + "0" + NUL */ + + 9 + 1 + 1 /* "multicast" + NUL + NUL */ ); + iobuf = xfer_alloc_iob ( &tftp->socket, len ); + if ( ! iobuf ) + return -ENOMEM; + + /* Determine block size */ + blksize = xfer_window ( &tftp->xfer ); + if ( blksize > TFTP_MAX_BLKSIZE ) + blksize = TFTP_MAX_BLKSIZE; + + /* Build request */ + rrq = iob_put ( iobuf, sizeof ( *rrq ) ); + rrq->opcode = htons ( TFTP_RRQ ); + iob_put ( iobuf, snprintf ( iobuf->tail, iob_tailroom ( iobuf ), + "%s%coctet", path, 0 ) + 1 ); + if ( tftp->flags & TFTP_FL_RRQ_SIZES ) { + iob_put ( iobuf, snprintf ( iobuf->tail, + iob_tailroom ( iobuf ), + "blksize%c%zd%ctsize%c0", + 0, blksize, 0, 0 ) + 1 ); + } + if ( tftp->flags & TFTP_FL_RRQ_MULTICAST ) { + iob_put ( iobuf, snprintf ( iobuf->tail, + iob_tailroom ( iobuf ), + "multicast%c", 0 ) + 1 ); + } + + /* RRQ always goes to the address specified in the initial + * xfer_open() call + */ + return xfer_deliver_iob ( &tftp->socket, iobuf ); +} + +/** + * Transmit ACK + * + * @v tftp TFTP connection + * @ret rc Return status code + */ +static int tftp_send_ack ( struct tftp_request *tftp ) { + struct tftp_ack *ack; + struct io_buffer *iobuf; + struct xfer_metadata meta = { + .dest = ( struct sockaddr * ) &tftp->peer, + }; + unsigned int block; + + /* Determine next required block number */ + block = bitmap_first_gap ( &tftp->bitmap ); + DBGC2 ( tftp, "TFTP %p sending ACK for block %d\n", tftp, block ); + + /* Allocate buffer */ + iobuf = xfer_alloc_iob ( &tftp->socket, sizeof ( *ack ) ); + if ( ! iobuf ) + return -ENOMEM; + + /* Build ACK */ + ack = iob_put ( iobuf, sizeof ( *ack ) ); + ack->opcode = htons ( TFTP_ACK ); + ack->block = htons ( block ); + + /* ACK always goes to the peer recorded from the RRQ response */ + return xfer_deliver ( &tftp->socket, iobuf, &meta ); +} + +/** + * Transmit ERROR (Abort) + * + * @v tftp TFTP connection + * @v errcode TFTP error code + * @v errmsg Error message string + * @ret rc Return status code + */ +static int tftp_send_error ( struct tftp_request *tftp, int errcode, + const char *errmsg ) { + struct tftp_error *err; + struct io_buffer *iobuf; + struct xfer_metadata meta = { + .dest = ( struct sockaddr * ) &tftp->peer, + }; + size_t msglen; + + DBGC2 ( tftp, "TFTP %p sending ERROR %d: %s\n", tftp, errcode, + errmsg ); + + /* Allocate buffer */ + msglen = sizeof ( *err ) + strlen ( errmsg ) + 1 /* NUL */; + iobuf = xfer_alloc_iob ( &tftp->socket, msglen ); + if ( ! iobuf ) + return -ENOMEM; + + /* Build ERROR */ + err = iob_put ( iobuf, msglen ); + err->opcode = htons ( TFTP_ERROR ); + err->errcode = htons ( errcode ); + strcpy ( err->errmsg, errmsg ); + + /* ERR always goes to the peer recorded from the RRQ response */ + return xfer_deliver ( &tftp->socket, iobuf, &meta ); +} + +/** + * Transmit next relevant packet + * + * @v tftp TFTP connection + * @ret rc Return status code + */ +static int tftp_send_packet ( struct tftp_request *tftp ) { + + /* Update retransmission timer. While name resolution takes place the + * window is zero. Avoid unnecessary delay after name resolution + * completes by retrying immediately. + */ + stop_timer ( &tftp->timer ); + if ( xfer_window ( &tftp->socket ) ) { + start_timer ( &tftp->timer ); + } else { + start_timer_nodelay ( &tftp->timer ); + } + + /* Send RRQ or ACK as appropriate */ + if ( ! tftp->peer.st_family ) { + return tftp_send_rrq ( tftp ); + } else { + if ( tftp->flags & TFTP_FL_SEND_ACK ) { + return tftp_send_ack ( tftp ); + } else { + return 0; + } + } +} + +/** + * Handle TFTP retransmission timer expiry + * + * @v timer Retry timer + * @v fail Failure indicator + */ +static void tftp_timer_expired ( struct retry_timer *timer, int fail ) { + struct tftp_request *tftp = + container_of ( timer, struct tftp_request, timer ); + int rc; + + /* If we are doing MTFTP, attempt the various recovery strategies */ + if ( tftp->flags & TFTP_FL_MTFTP_RECOVERY ) { + if ( tftp->peer.st_family ) { + /* If we have received any response from the server, + * try resending the RRQ to restart the download. + */ + DBGC ( tftp, "TFTP %p attempting reopen\n", tftp ); + if ( ( rc = tftp_reopen ( tftp ) ) != 0 ) + goto err; + } else { + /* Fall back to plain TFTP after several attempts */ + tftp->mtftp_timeouts++; + DBGC ( tftp, "TFTP %p timeout %d waiting for MTFTP " + "open\n", tftp, tftp->mtftp_timeouts ); + + if ( tftp->mtftp_timeouts > MTFTP_MAX_TIMEOUTS ) { + DBGC ( tftp, "TFTP %p falling back to plain " + "TFTP\n", tftp ); + tftp->flags = TFTP_FL_RRQ_SIZES; + + /* Close multicast socket */ + intf_restart ( &tftp->mc_socket, 0 ); + + /* Reset retry timer */ + start_timer_nodelay ( &tftp->timer ); + + /* The blocksize may change: discard + * the block bitmap + */ + bitmap_free ( &tftp->bitmap ); + memset ( &tftp->bitmap, 0, + sizeof ( tftp->bitmap ) ); + + /* Reopen on standard TFTP port */ + tftp->port = TFTP_PORT; + if ( ( rc = tftp_reopen ( tftp ) ) != 0 ) + goto err; + } + } + } else { + /* Not doing MTFTP (or have fallen back to plain + * TFTP); fail as per normal. + */ + if ( fail ) { + rc = -ETIMEDOUT; + goto err; + } + } + tftp_send_packet ( tftp ); + return; + + err: + tftp_done ( tftp, rc ); +} + +/** + * Process TFTP "blksize" option + * + * @v tftp TFTP connection + * @v value Option value + * @ret rc Return status code + */ +static int tftp_process_blksize ( struct tftp_request *tftp, + const char *value ) { + char *end; + + tftp->blksize = strtoul ( value, &end, 10 ); + if ( *end ) { + DBGC ( tftp, "TFTP %p got invalid blksize \"%s\"\n", + tftp, value ); + return -EINVAL_BLKSIZE; + } + DBGC ( tftp, "TFTP %p blksize=%d\n", tftp, tftp->blksize ); + + return 0; +} + +/** + * Process TFTP "tsize" option + * + * @v tftp TFTP connection + * @v value Option value + * @ret rc Return status code + */ +static int tftp_process_tsize ( struct tftp_request *tftp, + const char *value ) { + char *end; + + tftp->tsize = strtoul ( value, &end, 10 ); + if ( *end ) { + DBGC ( tftp, "TFTP %p got invalid tsize \"%s\"\n", + tftp, value ); + return -EINVAL_TSIZE; + } + DBGC ( tftp, "TFTP %p tsize=%ld\n", tftp, tftp->tsize ); + + return 0; +} + +/** + * Process TFTP "multicast" option + * + * @v tftp TFTP connection + * @v value Option value + * @ret rc Return status code + */ +static int tftp_process_multicast ( struct tftp_request *tftp, + const char *value ) { + union { + struct sockaddr sa; + struct sockaddr_in sin; + } socket; + char buf[ strlen ( value ) + 1 ]; + char *addr; + char *port; + char *port_end; + char *mc; + char *mc_end; + int rc; + + /* Split value into "addr,port,mc" fields */ + memcpy ( buf, value, sizeof ( buf ) ); + addr = buf; + port = strchr ( addr, ',' ); + if ( ! port ) { + DBGC ( tftp, "TFTP %p multicast missing port,mc\n", tftp ); + return -EINVAL_MC_NO_PORT; + } + *(port++) = '\0'; + mc = strchr ( port, ',' ); + if ( ! mc ) { + DBGC ( tftp, "TFTP %p multicast missing mc\n", tftp ); + return -EINVAL_MC_NO_MC; + } + *(mc++) = '\0'; + + /* Parse parameters */ + if ( strtoul ( mc, &mc_end, 0 ) == 0 ) + tftp->flags &= ~TFTP_FL_SEND_ACK; + if ( *mc_end ) { + DBGC ( tftp, "TFTP %p multicast invalid mc %s\n", tftp, mc ); + return -EINVAL_MC_INVALID_MC; + } + DBGC ( tftp, "TFTP %p is%s the master client\n", + tftp, ( ( tftp->flags & TFTP_FL_SEND_ACK ) ? "" : " not" ) ); + if ( *addr && *port ) { + socket.sin.sin_family = AF_INET; + if ( inet_aton ( addr, &socket.sin.sin_addr ) == 0 ) { + DBGC ( tftp, "TFTP %p multicast invalid IP address " + "%s\n", tftp, addr ); + return -EINVAL_MC_INVALID_IP; + } + DBGC ( tftp, "TFTP %p multicast IP address %s\n", + tftp, inet_ntoa ( socket.sin.sin_addr ) ); + socket.sin.sin_port = htons ( strtoul ( port, &port_end, 0 ) ); + if ( *port_end ) { + DBGC ( tftp, "TFTP %p multicast invalid port %s\n", + tftp, port ); + return -EINVAL_MC_INVALID_PORT; + } + DBGC ( tftp, "TFTP %p multicast port %d\n", + tftp, ntohs ( socket.sin.sin_port ) ); + if ( ( rc = tftp_reopen_mc ( tftp, &socket.sa ) ) != 0 ) + return rc; + } + + return 0; +} + +/** A TFTP option */ +struct tftp_option { + /** Option name */ + const char *name; + /** Option processor + * + * @v tftp TFTP connection + * @v value Option value + * @ret rc Return status code + */ + int ( * process ) ( struct tftp_request *tftp, const char *value ); +}; + +/** Recognised TFTP options */ +static struct tftp_option tftp_options[] = { + { "blksize", tftp_process_blksize }, + { "tsize", tftp_process_tsize }, + { "multicast", tftp_process_multicast }, + { NULL, NULL } +}; + +/** + * Process TFTP option + * + * @v tftp TFTP connection + * @v name Option name + * @v value Option value + * @ret rc Return status code + */ +static int tftp_process_option ( struct tftp_request *tftp, + const char *name, const char *value ) { + struct tftp_option *option; + + for ( option = tftp_options ; option->name ; option++ ) { + if ( strcasecmp ( name, option->name ) == 0 ) + return option->process ( tftp, value ); + } + + DBGC ( tftp, "TFTP %p received unknown option \"%s\" = \"%s\"\n", + tftp, name, value ); + + /* Unknown options should be silently ignored */ + return 0; +} + +/** + * Receive OACK + * + * @v tftp TFTP connection + * @v buf Temporary data buffer + * @v len Length of temporary data buffer + * @ret rc Return status code + */ +static int tftp_rx_oack ( struct tftp_request *tftp, void *buf, size_t len ) { + struct tftp_oack *oack = buf; + char *end = buf + len; + char *name; + char *value; + char *next; + int rc = 0; + + /* Sanity check */ + if ( len < sizeof ( *oack ) ) { + DBGC ( tftp, "TFTP %p received underlength OACK packet " + "length %zd\n", tftp, len ); + rc = -EINVAL; + goto done; + } + + /* Process each option in turn */ + for ( name = oack->data ; name < end ; name = next ) { + + /* Parse option name and value + * + * We treat parsing errors as non-fatal, because there + * exists at least one TFTP server (IBM Tivoli PXE + * Server 5.1.0.3) that has been observed to send + * malformed OACKs containing trailing garbage bytes. + */ + value = ( name + strnlen ( name, ( end - name ) ) + 1 ); + if ( value > end ) { + DBGC ( tftp, "TFTP %p received OACK with malformed " + "option name:\n", tftp ); + DBGC_HD ( tftp, oack, len ); + break; + } + if ( value == end ) { + DBGC ( tftp, "TFTP %p received OACK missing value " + "for option \"%s\"\n", tftp, name ); + DBGC_HD ( tftp, oack, len ); + break; + } + next = ( value + strnlen ( value, ( end - value ) ) + 1 ); + if ( next > end ) { + DBGC ( tftp, "TFTP %p received OACK with malformed " + "value for option \"%s\":\n", tftp, name ); + DBGC_HD ( tftp, oack, len ); + break; + } + + /* Process option */ + if ( ( rc = tftp_process_option ( tftp, name, value ) ) != 0 ) + goto done; + } + + /* Process tsize information, if available */ + if ( tftp->tsize ) { + if ( ( rc = tftp_presize ( tftp, tftp->tsize ) ) != 0 ) + goto done; + } + + /* Abort request if only trying to determine file size */ + if ( tftp->flags & TFTP_FL_SIZEONLY ) { + rc = 0; + tftp_send_error ( tftp, 0, "TFTP Aborted" ); + tftp_done ( tftp, rc ); + return rc; + } + + /* Request next data block */ + tftp_send_packet ( tftp ); + + done: + if ( rc ) + tftp_done ( tftp, rc ); + return rc; +} + +/** + * Receive DATA + * + * @v tftp TFTP connection + * @v iobuf I/O buffer + * @ret rc Return status code + * + * Takes ownership of I/O buffer. + */ +static int tftp_rx_data ( struct tftp_request *tftp, + struct io_buffer *iobuf ) { + struct tftp_data *data = iobuf->data; + struct xfer_metadata meta; + unsigned int block; + off_t offset; + size_t data_len; + int rc; + + if ( tftp->flags & TFTP_FL_SIZEONLY ) { + /* If we get here then server doesn't support SIZE option */ + rc = -ENOTSUP; + tftp_send_error ( tftp, 0, "TFTP Aborted" ); + goto done; + } + + /* Sanity check */ + if ( iob_len ( iobuf ) < sizeof ( *data ) ) { + DBGC ( tftp, "TFTP %p received underlength DATA packet " + "length %zd\n", tftp, iob_len ( iobuf ) ); + rc = -EINVAL; + goto done; + } + + /* Calculate block number */ + block = ( ( bitmap_first_gap ( &tftp->bitmap ) + 1 ) & ~0xffff ); + if ( data->block == 0 && block == 0 ) { + DBGC ( tftp, "TFTP %p received data block 0\n", tftp ); + rc = -EINVAL; + goto done; + } + block += ( ntohs ( data->block ) - 1 ); + + /* Extract data */ + offset = ( block * tftp->blksize ); + iob_pull ( iobuf, sizeof ( *data ) ); + data_len = iob_len ( iobuf ); + if ( data_len > tftp->blksize ) { + DBGC ( tftp, "TFTP %p received overlength DATA packet " + "length %zd\n", tftp, data_len ); + rc = -EINVAL; + goto done; + } + + /* Deliver data */ + memset ( &meta, 0, sizeof ( meta ) ); + meta.flags = XFER_FL_ABS_OFFSET; + meta.offset = offset; + if ( ( rc = xfer_deliver ( &tftp->xfer, iob_disown ( iobuf ), + &meta ) ) != 0 ) { + DBGC ( tftp, "TFTP %p could not deliver data: %s\n", + tftp, strerror ( rc ) ); + goto done; + } + + /* Ensure block bitmap is ready */ + if ( ( rc = tftp_presize ( tftp, ( offset + data_len ) ) ) != 0 ) + goto done; + + /* Mark block as received */ + bitmap_set ( &tftp->bitmap, block ); + + /* Acknowledge block */ + tftp_send_packet ( tftp ); + + /* If all blocks have been received, finish. */ + if ( bitmap_full ( &tftp->bitmap ) ) + tftp_done ( tftp, 0 ); + + done: + free_iob ( iobuf ); + if ( rc ) + tftp_done ( tftp, rc ); + return rc; +} + +/** + * Convert TFTP error code to return status code + * + * @v errcode TFTP error code + * @ret rc Return status code + */ +static int tftp_errcode_to_rc ( unsigned int errcode ) { + switch ( errcode ) { + case TFTP_ERR_FILE_NOT_FOUND: return -ENOENT; + case TFTP_ERR_ACCESS_DENIED: return -EACCES; + case TFTP_ERR_ILLEGAL_OP: return -ENOTTY; + default: return -ENOTSUP; + } +} + +/** + * Receive ERROR + * + * @v tftp TFTP connection + * @v buf Temporary data buffer + * @v len Length of temporary data buffer + * @ret rc Return status code + */ +static int tftp_rx_error ( struct tftp_request *tftp, void *buf, size_t len ) { + struct tftp_error *error = buf; + int rc; + + /* Sanity check */ + if ( len < sizeof ( *error ) ) { + DBGC ( tftp, "TFTP %p received underlength ERROR packet " + "length %zd\n", tftp, len ); + return -EINVAL; + } + + DBGC ( tftp, "TFTP %p received ERROR packet with code %d, message " + "\"%s\"\n", tftp, ntohs ( error->errcode ), error->errmsg ); + + /* Determine final operation result */ + rc = tftp_errcode_to_rc ( ntohs ( error->errcode ) ); + + /* Close TFTP request */ + tftp_done ( tftp, rc ); + + return 0; +} + +/** + * Receive new data + * + * @v tftp TFTP connection + * @v iobuf I/O buffer + * @v meta Transfer metadata + * @ret rc Return status code + */ +static int tftp_rx ( struct tftp_request *tftp, + struct io_buffer *iobuf, + struct xfer_metadata *meta ) { + struct sockaddr_tcpip *st_src; + struct tftp_common *common = iobuf->data; + size_t len = iob_len ( iobuf ); + int rc = -EINVAL; + + /* Sanity checks */ + if ( len < sizeof ( *common ) ) { + DBGC ( tftp, "TFTP %p received underlength packet length " + "%zd\n", tftp, len ); + goto done; + } + if ( ! meta->src ) { + DBGC ( tftp, "TFTP %p received packet without source port\n", + tftp ); + goto done; + } + + /* Filter by TID. Set TID on first response received */ + st_src = ( struct sockaddr_tcpip * ) meta->src; + if ( ! tftp->peer.st_family ) { + memcpy ( &tftp->peer, st_src, sizeof ( tftp->peer ) ); + DBGC ( tftp, "TFTP %p using remote port %d\n", tftp, + ntohs ( tftp->peer.st_port ) ); + } else if ( memcmp ( &tftp->peer, st_src, + sizeof ( tftp->peer ) ) != 0 ) { + DBGC ( tftp, "TFTP %p received packet from wrong source (got " + "%d, wanted %d)\n", tftp, ntohs ( st_src->st_port ), + ntohs ( tftp->peer.st_port ) ); + goto done; + } + + switch ( common->opcode ) { + case htons ( TFTP_OACK ): + rc = tftp_rx_oack ( tftp, iobuf->data, len ); + break; + case htons ( TFTP_DATA ): + rc = tftp_rx_data ( tftp, iob_disown ( iobuf ) ); + break; + case htons ( TFTP_ERROR ): + rc = tftp_rx_error ( tftp, iobuf->data, len ); + break; + default: + DBGC ( tftp, "TFTP %p received strange packet type %d\n", + tftp, ntohs ( common->opcode ) ); + break; + }; + + done: + free_iob ( iobuf ); + return rc; +} + +/** + * Receive new data via socket + * + * @v tftp TFTP connection + * @v iobuf I/O buffer + * @v meta Transfer metadata + * @ret rc Return status code + */ +static int tftp_socket_deliver ( struct tftp_request *tftp, + struct io_buffer *iobuf, + struct xfer_metadata *meta ) { + + /* Enable sending ACKs when we receive a unicast packet. This + * covers three cases: + * + * 1. Standard TFTP; we should always send ACKs, and will + * always receive a unicast packet before we need to send the + * first ACK. + * + * 2. RFC2090 multicast TFTP; the only unicast packets we will + * receive are the OACKs; enable sending ACKs here (before + * processing the OACK) and disable it when processing the + * multicast option if we are not the master client. + * + * 3. MTFTP; receiving a unicast datagram indicates that we + * are the "master client" and should send ACKs. + */ + tftp->flags |= TFTP_FL_SEND_ACK; + + return tftp_rx ( tftp, iobuf, meta ); +} + +/** TFTP socket operations */ +static struct interface_operation tftp_socket_operations[] = { + INTF_OP ( xfer_deliver, struct tftp_request *, tftp_socket_deliver ), +}; + +/** TFTP socket interface descriptor */ +static struct interface_descriptor tftp_socket_desc = + INTF_DESC ( struct tftp_request, socket, tftp_socket_operations ); + +/** TFTP multicast socket operations */ +static struct interface_operation tftp_mc_socket_operations[] = { + INTF_OP ( xfer_deliver, struct tftp_request *, tftp_rx ), +}; + +/** TFTP multicast socket interface descriptor */ +static struct interface_descriptor tftp_mc_socket_desc = + INTF_DESC ( struct tftp_request, mc_socket, tftp_mc_socket_operations ); + +/** + * Check flow control window + * + * @v tftp TFTP connection + * @ret len Length of window + */ +static size_t tftp_xfer_window ( struct tftp_request *tftp ) { + + /* We abuse this data-xfer method to convey the blocksize to + * the caller. This really should be done using some kind of + * stat() method, but we don't yet have the facility to do + * that. + */ + return tftp->blksize; +} + +/** TFTP data transfer interface operations */ +static struct interface_operation tftp_xfer_operations[] = { + INTF_OP ( xfer_window, struct tftp_request *, tftp_xfer_window ), + INTF_OP ( intf_close, struct tftp_request *, tftp_done ), +}; + +/** TFTP data transfer interface descriptor */ +static struct interface_descriptor tftp_xfer_desc = + INTF_DESC ( struct tftp_request, xfer, tftp_xfer_operations ); + +/** + * Initiate TFTP/TFTM/MTFTP download + * + * @v xfer Data transfer interface + * @v uri Uniform Resource Identifier + * @ret rc Return status code + */ +static int tftp_core_open ( struct interface *xfer, struct uri *uri, + unsigned int default_port, + struct sockaddr *multicast, + unsigned int flags ) { + struct tftp_request *tftp; + int rc; + + /* Sanity checks */ + if ( ! uri->host ) + return -EINVAL; + if ( ! uri->path ) + return -EINVAL; + + /* Allocate and populate TFTP structure */ + tftp = zalloc ( sizeof ( *tftp ) ); + if ( ! tftp ) + return -ENOMEM; + ref_init ( &tftp->refcnt, tftp_free ); + intf_init ( &tftp->xfer, &tftp_xfer_desc, &tftp->refcnt ); + intf_init ( &tftp->socket, &tftp_socket_desc, &tftp->refcnt ); + intf_init ( &tftp->mc_socket, &tftp_mc_socket_desc, &tftp->refcnt ); + timer_init ( &tftp->timer, tftp_timer_expired, &tftp->refcnt ); + tftp->uri = uri_get ( uri ); + tftp->blksize = TFTP_DEFAULT_BLKSIZE; + tftp->flags = flags; + + /* Open socket */ + tftp->port = uri_port ( tftp->uri, default_port ); + if ( ( rc = tftp_reopen ( tftp ) ) != 0 ) + goto err; + + /* Open multicast socket */ + if ( multicast ) { + if ( ( rc = tftp_reopen_mc ( tftp, multicast ) ) != 0 ) + goto err; + } + + /* Start timer to initiate RRQ */ + start_timer_nodelay ( &tftp->timer ); + + /* Attach to parent interface, mortalise self, and return */ + intf_plug_plug ( &tftp->xfer, xfer ); + ref_put ( &tftp->refcnt ); + return 0; + + err: + DBGC ( tftp, "TFTP %p could not create request: %s\n", + tftp, strerror ( rc ) ); + tftp_done ( tftp, rc ); + ref_put ( &tftp->refcnt ); + return rc; +} + +/** + * Initiate TFTP download + * + * @v xfer Data transfer interface + * @v uri Uniform Resource Identifier + * @ret rc Return status code + */ +static int tftp_open ( struct interface *xfer, struct uri *uri ) { + return tftp_core_open ( xfer, uri, TFTP_PORT, NULL, + TFTP_FL_RRQ_SIZES ); + +} + +/** TFTP URI opener */ +struct uri_opener tftp_uri_opener __uri_opener = { + .scheme = "tftp", + .open = tftp_open, +}; + +/** + * Initiate TFTP-size request + * + * @v xfer Data transfer interface + * @v uri Uniform Resource Identifier + * @ret rc Return status code + */ +static int tftpsize_open ( struct interface *xfer, struct uri *uri ) { + return tftp_core_open ( xfer, uri, TFTP_PORT, NULL, + ( TFTP_FL_RRQ_SIZES | + TFTP_FL_SIZEONLY ) ); + +} + +/** TFTP URI opener */ +struct uri_opener tftpsize_uri_opener __uri_opener = { + .scheme = "tftpsize", + .open = tftpsize_open, +}; + +/** + * Initiate TFTM download + * + * @v xfer Data transfer interface + * @v uri Uniform Resource Identifier + * @ret rc Return status code + */ +static int tftm_open ( struct interface *xfer, struct uri *uri ) { + return tftp_core_open ( xfer, uri, TFTP_PORT, NULL, + ( TFTP_FL_RRQ_SIZES | + TFTP_FL_RRQ_MULTICAST ) ); + +} + +/** TFTM URI opener */ +struct uri_opener tftm_uri_opener __uri_opener = { + .scheme = "tftm", + .open = tftm_open, +}; + +/** + * Initiate MTFTP download + * + * @v xfer Data transfer interface + * @v uri Uniform Resource Identifier + * @ret rc Return status code + */ +static int mtftp_open ( struct interface *xfer, struct uri *uri ) { + return tftp_core_open ( xfer, uri, MTFTP_PORT, + ( struct sockaddr * ) &tftp_mtftp_socket, + TFTP_FL_MTFTP_RECOVERY ); +} + +/** MTFTP URI opener */ +struct uri_opener mtftp_uri_opener __uri_opener = { + .scheme = "mtftp", + .open = mtftp_open, +}; + +/****************************************************************************** + * + * Settings + * + ****************************************************************************** + */ + +/** + * Apply TFTP configuration settings + * + * @ret rc Return status code + */ +static int tftp_apply_settings ( void ) { + static struct in_addr tftp_server = { 0 }; + struct in_addr last_tftp_server; + char uri_string[32]; + struct uri *uri; + + /* Retrieve TFTP server setting */ + last_tftp_server = tftp_server; + fetch_ipv4_setting ( NULL, &next_server_setting, &tftp_server ); + + /* If TFTP server setting has changed, set the current working + * URI to match. Do it only when the TFTP server has changed + * to try to minimise surprises to the user, who probably + * won't expect the CWURI to change just because they updated + * an unrelated setting and triggered all the settings + * applicators. + */ + if ( tftp_server.s_addr != last_tftp_server.s_addr ) { + if ( tftp_server.s_addr ) { + snprintf ( uri_string, sizeof ( uri_string ), + "tftp://%s/", inet_ntoa ( tftp_server ) ); + uri = parse_uri ( uri_string ); + if ( ! uri ) + return -ENOMEM; + } else { + uri = NULL; + } + churi ( uri ); + uri_put ( uri ); + } + + return 0; +} + +/** TFTP settings applicator */ +struct settings_applicator tftp_settings_applicator __settings_applicator = { + .apply = tftp_apply_settings, +}; diff --git a/qemu/roms/ipxe/src/net/validator.c b/qemu/roms/ipxe/src/net/validator.c new file mode 100644 index 000000000..74d70e312 --- /dev/null +++ b/qemu/roms/ipxe/src/net/validator.c @@ -0,0 +1,568 @@ +/* + * Copyright (C) 2012 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <string.h> +#include <stdio.h> +#include <errno.h> +#include <ipxe/refcnt.h> +#include <ipxe/malloc.h> +#include <ipxe/interface.h> +#include <ipxe/xfer.h> +#include <ipxe/open.h> +#include <ipxe/iobuf.h> +#include <ipxe/xferbuf.h> +#include <ipxe/process.h> +#include <ipxe/x509.h> +#include <ipxe/settings.h> +#include <ipxe/dhcp.h> +#include <ipxe/base64.h> +#include <ipxe/crc32.h> +#include <ipxe/ocsp.h> +#include <ipxe/validator.h> + +/** @file + * + * Certificate validator + * + */ + +/** A certificate validator */ +struct validator { + /** Reference count */ + struct refcnt refcnt; + /** Job control interface */ + struct interface job; + /** Data transfer interface */ + struct interface xfer; + + /** Process */ + struct process process; + + /** X.509 certificate chain */ + struct x509_chain *chain; + /** OCSP check */ + struct ocsp_check *ocsp; + /** Data buffer */ + struct xfer_buffer buffer; + /** Action to take upon completed transfer */ + int ( * done ) ( struct validator *validator, const void *data, + size_t len ); +}; + +/** + * Free certificate validator + * + * @v refcnt Reference count + */ +static void validator_free ( struct refcnt *refcnt ) { + struct validator *validator = + container_of ( refcnt, struct validator, refcnt ); + + DBGC2 ( validator, "VALIDATOR %p freed\n", validator ); + x509_chain_put ( validator->chain ); + ocsp_put ( validator->ocsp ); + xferbuf_done ( &validator->buffer ); + free ( validator ); +} + +/** + * Mark certificate validation as finished + * + * @v validator Certificate validator + * @v rc Reason for finishing + */ +static void validator_finished ( struct validator *validator, int rc ) { + + /* Remove process */ + process_del ( &validator->process ); + + /* Close all interfaces */ + intf_shutdown ( &validator->xfer, rc ); + intf_shutdown ( &validator->job, rc ); +} + +/**************************************************************************** + * + * Job control interface + * + */ + +/** Certificate validator job control interface operations */ +static struct interface_operation validator_job_operations[] = { + INTF_OP ( intf_close, struct validator *, validator_finished ), +}; + +/** Certificate validator job control interface descriptor */ +static struct interface_descriptor validator_job_desc = + INTF_DESC ( struct validator, job, validator_job_operations ); + +/**************************************************************************** + * + * Cross-signing certificates + * + */ + +/** Cross-signed certificate source setting */ +const struct setting crosscert_setting __setting ( SETTING_CRYPTO, crosscert )={ + .name = "crosscert", + .description = "Cross-signed certificate source", + .tag = DHCP_EB_CROSS_CERT, + .type = &setting_type_string, +}; + +/** Default cross-signed certificate source */ +static const char crosscert_default[] = "http://ca.ipxe.org/auto"; + +/** + * Append cross-signing certificates to certificate chain + * + * @v validator Certificate validator + * @v data Raw cross-signing certificate data + * @v len Length of raw data + * @ret rc Return status code + */ +static int validator_append ( struct validator *validator, + const void *data, size_t len ) { + struct asn1_cursor cursor; + struct x509_chain *certs; + struct x509_certificate *cert; + struct x509_certificate *last; + int rc; + + /* Allocate certificate list */ + certs = x509_alloc_chain(); + if ( ! certs ) { + rc = -ENOMEM; + goto err_alloc_certs; + } + + /* Initialise cursor */ + cursor.data = data; + cursor.len = len; + + /* Enter certificateSet */ + if ( ( rc = asn1_enter ( &cursor, ASN1_SET ) ) != 0 ) { + DBGC ( validator, "VALIDATOR %p could not enter " + "certificateSet: %s\n", validator, strerror ( rc ) ); + goto err_certificateset; + } + + /* Add each certificate to list */ + while ( cursor.len ) { + + /* Add certificate to chain */ + if ( ( rc = x509_append_raw ( certs, cursor.data, + cursor.len ) ) != 0 ) { + DBGC ( validator, "VALIDATOR %p could not append " + "certificate: %s\n", + validator, strerror ( rc) ); + DBGC_HDA ( validator, 0, cursor.data, cursor.len ); + return rc; + } + cert = x509_last ( certs ); + DBGC ( validator, "VALIDATOR %p found certificate %s\n", + validator, x509_name ( cert ) ); + + /* Move to next certificate */ + asn1_skip_any ( &cursor ); + } + + /* Append certificates to chain */ + last = x509_last ( validator->chain ); + if ( ( rc = x509_auto_append ( validator->chain, certs ) ) != 0 ) { + DBGC ( validator, "VALIDATOR %p could not append " + "certificates: %s\n", validator, strerror ( rc ) ); + goto err_auto_append; + } + + /* Check that at least one certificate has been added */ + if ( last == x509_last ( validator->chain ) ) { + DBGC ( validator, "VALIDATOR %p failed to append any " + "applicable certificates\n", validator ); + rc = -EACCES; + goto err_no_progress; + } + + /* Drop reference to certificate list */ + x509_chain_put ( certs ); + + return 0; + + err_no_progress: + err_auto_append: + err_certificateset: + x509_chain_put ( certs ); + err_alloc_certs: + return rc; +} + +/** + * Start download of cross-signing certificate + * + * @v validator Certificate validator + * @v issuer Required issuer + * @ret rc Return status code + */ +static int validator_start_download ( struct validator *validator, + const struct asn1_cursor *issuer ) { + const char *crosscert; + char *crosscert_copy; + char *uri_string; + size_t uri_string_len; + uint32_t crc; + int len; + int rc; + + /* Determine cross-signed certificate source */ + fetch_string_setting_copy ( NULL, &crosscert_setting, &crosscert_copy ); + crosscert = ( crosscert_copy ? crosscert_copy : crosscert_default ); + + /* Allocate URI string */ + uri_string_len = ( strlen ( crosscert ) + 22 /* "/%08x.der?subject=" */ + + base64_encoded_len ( issuer->len ) + 1 /* NUL */ ); + uri_string = zalloc ( uri_string_len ); + if ( ! uri_string ) { + rc = -ENOMEM; + goto err_alloc_uri_string; + } + + /* Generate CRC32 */ + crc = crc32_le ( 0xffffffffUL, issuer->data, issuer->len ); + + /* Generate URI string */ + len = snprintf ( uri_string, uri_string_len, "%s/%08x.der?subject=", + crosscert, crc ); + base64_encode ( issuer->data, issuer->len, ( uri_string + len ) ); + DBGC ( validator, "VALIDATOR %p downloading cross-signed certificate " + "from %s\n", validator, uri_string ); + + /* Set completion handler */ + validator->done = validator_append; + + /* Open URI */ + if ( ( rc = xfer_open_uri_string ( &validator->xfer, + uri_string ) ) != 0 ) { + DBGC ( validator, "VALIDATOR %p could not open %s: %s\n", + validator, uri_string, strerror ( rc ) ); + goto err_open_uri_string; + } + + /* Success */ + rc = 0; + + err_open_uri_string: + free ( uri_string ); + err_alloc_uri_string: + free ( crosscert_copy ); + return rc; +} + +/**************************************************************************** + * + * OCSP checks + * + */ + +/** + * Validate OCSP response + * + * @v validator Certificate validator + * @v data Raw OCSP response + * @v len Length of raw data + * @ret rc Return status code + */ +static int validator_ocsp_validate ( struct validator *validator, + const void *data, size_t len ) { + time_t now; + int rc; + + /* Record OCSP response */ + if ( ( rc = ocsp_response ( validator->ocsp, data, len ) ) != 0 ) { + DBGC ( validator, "VALIDATOR %p could not record OCSP " + "response: %s\n", validator, strerror ( rc ) ); + return rc; + } + + /* Validate OCSP response */ + now = time ( NULL ); + if ( ( rc = ocsp_validate ( validator->ocsp, now ) ) != 0 ) { + DBGC ( validator, "VALIDATOR %p could not validate OCSP " + "response: %s\n", validator, strerror ( rc ) ); + return rc; + } + + /* Drop reference to OCSP check */ + ocsp_put ( validator->ocsp ); + validator->ocsp = NULL; + + return 0; +} + +/** + * Start OCSP check + * + * @v validator Certificate validator + * @v cert Certificate to check + * @v issuer Issuing certificate + * @ret rc Return status code + */ +static int validator_start_ocsp ( struct validator *validator, + struct x509_certificate *cert, + struct x509_certificate *issuer ) { + const char *uri_string; + int rc; + + /* Create OCSP check */ + assert ( validator->ocsp == NULL ); + if ( ( rc = ocsp_check ( cert, issuer, &validator->ocsp ) ) != 0 ) { + DBGC ( validator, "VALIDATOR %p could not create OCSP check: " + "%s\n", validator, strerror ( rc ) ); + return rc; + } + + /* Set completion handler */ + validator->done = validator_ocsp_validate; + + /* Open URI */ + uri_string = validator->ocsp->uri_string; + DBGC ( validator, "VALIDATOR %p performing OCSP check at %s\n", + validator, uri_string ); + if ( ( rc = xfer_open_uri_string ( &validator->xfer, + uri_string ) ) != 0 ) { + DBGC ( validator, "VALIDATOR %p could not open %s: %s\n", + validator, uri_string, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/**************************************************************************** + * + * Data transfer interface + * + */ + +/** + * Close data transfer interface + * + * @v validator Certificate validator + * @v rc Reason for close + */ +static void validator_xfer_close ( struct validator *validator, int rc ) { + + /* Close data transfer interface */ + intf_restart ( &validator->xfer, rc ); + + /* Check for errors */ + if ( rc != 0 ) { + DBGC ( validator, "VALIDATOR %p transfer failed: %s\n", + validator, strerror ( rc ) ); + goto err_transfer; + } + DBGC2 ( validator, "VALIDATOR %p transfer complete\n", validator ); + + /* Process completed download */ + assert ( validator->done != NULL ); + if ( ( rc = validator->done ( validator, validator->buffer.data, + validator->buffer.len ) ) != 0 ) + goto err_append; + + /* Free downloaded data */ + xferbuf_done ( &validator->buffer ); + + /* Resume validation process */ + process_add ( &validator->process ); + + return; + + err_append: + err_transfer: + validator_finished ( validator, rc ); +} + +/** + * Receive data + * + * @v validator Certificate validator + * @v iobuf I/O buffer + * @v meta Data transfer metadata + * @ret rc Return status code + */ +static int validator_xfer_deliver ( struct validator *validator, + struct io_buffer *iobuf, + struct xfer_metadata *meta ) { + int rc; + + /* Add data to buffer */ + if ( ( rc = xferbuf_deliver ( &validator->buffer, iob_disown ( iobuf ), + meta ) ) != 0 ) { + DBGC ( validator, "VALIDATOR %p could not receive data: %s\n", + validator, strerror ( rc ) ); + validator_finished ( validator, rc ); + return rc; + } + + return 0; +} + +/** Certificate validator data transfer interface operations */ +static struct interface_operation validator_xfer_operations[] = { + INTF_OP ( xfer_deliver, struct validator *, validator_xfer_deliver ), + INTF_OP ( intf_close, struct validator *, validator_xfer_close ), +}; + +/** Certificate validator data transfer interface descriptor */ +static struct interface_descriptor validator_xfer_desc = + INTF_DESC ( struct validator, xfer, validator_xfer_operations ); + +/**************************************************************************** + * + * Validation process + * + */ + +/** + * Certificate validation process + * + * @v validator Certificate validator + */ +static void validator_step ( struct validator *validator ) { + struct x509_link *link; + struct x509_certificate *cert; + struct x509_certificate *issuer = NULL; + struct x509_certificate *last; + time_t now; + int rc; + + /* Try validating chain. Try even if the chain is incomplete, + * since certificates may already have been validated + * previously. + */ + now = time ( NULL ); + if ( ( rc = x509_validate_chain ( validator->chain, now, NULL, + NULL ) ) == 0 ) { + validator_finished ( validator, 0 ); + return; + } + + /* If there is a certificate that could be validated using + * OCSP, try it. + */ + list_for_each_entry ( link, &validator->chain->links, list ) { + cert = issuer; + issuer = link->cert; + if ( ! cert ) + continue; + if ( ! issuer->valid ) + continue; + /* The issuer is valid, but this certificate is not + * yet valid. If OCSP is applicable, start it. + */ + if ( cert->extensions.auth_info.ocsp.uri.len && + ( ! cert->extensions.auth_info.ocsp.good ) ) { + /* Start OCSP */ + if ( ( rc = validator_start_ocsp ( validator, cert, + issuer ) ) != 0 ) { + validator_finished ( validator, rc ); + return; + } + return; + } + /* Otherwise, this is a permanent failure */ + validator_finished ( validator, rc ); + return; + } + + /* If chain ends with a self-issued certificate, then there is + * nothing more to do. + */ + last = x509_last ( validator->chain ); + if ( asn1_compare ( &last->issuer.raw, &last->subject.raw ) == 0 ) { + validator_finished ( validator, rc ); + return; + } + + /* Otherwise, try to download a suitable cross-signing + * certificate. + */ + if ( ( rc = validator_start_download ( validator, + &last->issuer.raw ) ) != 0 ) { + validator_finished ( validator, rc ); + return; + } +} + +/** Certificate validator process descriptor */ +static struct process_descriptor validator_process_desc = + PROC_DESC_ONCE ( struct validator, process, validator_step ); + +/**************************************************************************** + * + * Instantiator + * + */ + +/** + * Instantiate a certificate validator + * + * @v job Job control interface + * @v chain X.509 certificate chain + * @ret rc Return status code + */ +int create_validator ( struct interface *job, struct x509_chain *chain ) { + struct validator *validator; + int rc; + + /* Sanity check */ + if ( ! chain ) { + rc = -EINVAL; + goto err_sanity; + } + + /* Allocate and initialise structure */ + validator = zalloc ( sizeof ( *validator ) ); + if ( ! validator ) { + rc = -ENOMEM; + goto err_alloc; + } + ref_init ( &validator->refcnt, validator_free ); + intf_init ( &validator->job, &validator_job_desc, + &validator->refcnt ); + intf_init ( &validator->xfer, &validator_xfer_desc, + &validator->refcnt ); + process_init ( &validator->process, &validator_process_desc, + &validator->refcnt ); + validator->chain = x509_chain_get ( chain ); + + /* Attach parent interface, mortalise self, and return */ + intf_plug_plug ( &validator->job, job ); + ref_put ( &validator->refcnt ); + DBGC2 ( validator, "VALIDATOR %p validating X509 chain %p\n", + validator, validator->chain ); + return 0; + + validator_finished ( validator, rc ); + ref_put ( &validator->refcnt ); + err_alloc: + err_sanity: + return rc; +} diff --git a/qemu/roms/ipxe/src/net/vlan.c b/qemu/roms/ipxe/src/net/vlan.c new file mode 100644 index 000000000..b4ddde42d --- /dev/null +++ b/qemu/roms/ipxe/src/net/vlan.c @@ -0,0 +1,500 @@ +/* + * Copyright (C) 2010 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <stdint.h> +#include <string.h> +#include <stdio.h> +#include <errno.h> +#include <byteswap.h> +#include <ipxe/features.h> +#include <ipxe/if_ether.h> +#include <ipxe/ethernet.h> +#include <ipxe/netdevice.h> +#include <ipxe/iobuf.h> +#include <ipxe/vlan.h> + +/** @file + * + * Virtual LANs + * + */ + +FEATURE ( FEATURE_PROTOCOL, "VLAN", DHCP_EB_FEATURE_VLAN, 1 ); + +struct net_protocol vlan_protocol __net_protocol; + +/** VLAN device private data */ +struct vlan_device { + /** Trunk network device */ + struct net_device *trunk; + /** VLAN tag */ + unsigned int tag; + /** Default priority */ + unsigned int priority; +}; + +/** + * Open VLAN device + * + * @v netdev Network device + * @ret rc Return status code + */ +static int vlan_open ( struct net_device *netdev ) { + struct vlan_device *vlan = netdev->priv; + + return netdev_open ( vlan->trunk ); +} + +/** + * Close VLAN device + * + * @v netdev Network device + */ +static void vlan_close ( struct net_device *netdev ) { + struct vlan_device *vlan = netdev->priv; + + netdev_close ( vlan->trunk ); +} + +/** + * Transmit packet on VLAN device + * + * @v netdev Network device + * @v iobuf I/O buffer + * @ret rc Return status code + */ +static int vlan_transmit ( struct net_device *netdev, + struct io_buffer *iobuf ) { + struct vlan_device *vlan = netdev->priv; + struct net_device *trunk = vlan->trunk; + struct ll_protocol *ll_protocol; + struct vlan_header *vlanhdr; + uint8_t ll_dest_copy[ETH_ALEN]; + uint8_t ll_source_copy[ETH_ALEN]; + const void *ll_dest; + const void *ll_source; + uint16_t net_proto; + unsigned int flags; + int rc; + + /* Strip link-layer header and preserve link-layer header fields */ + ll_protocol = netdev->ll_protocol; + if ( ( rc = ll_protocol->pull ( netdev, iobuf, &ll_dest, &ll_source, + &net_proto, &flags ) ) != 0 ) { + DBGC ( netdev, "VLAN %s could not parse link-layer header: " + "%s\n", netdev->name, strerror ( rc ) ); + return rc; + } + memcpy ( ll_dest_copy, ll_dest, ETH_ALEN ); + memcpy ( ll_source_copy, ll_source, ETH_ALEN ); + + /* Construct VLAN header */ + vlanhdr = iob_push ( iobuf, sizeof ( *vlanhdr ) ); + vlanhdr->tci = htons ( VLAN_TCI ( vlan->tag, vlan->priority ) ); + vlanhdr->net_proto = net_proto; + + /* Reclaim I/O buffer from VLAN device's TX queue */ + list_del ( &iobuf->list ); + + /* Transmit packet on trunk device */ + if ( ( rc = net_tx ( iob_disown ( iobuf ), trunk, &vlan_protocol, + ll_dest_copy, ll_source_copy ) ) != 0 ) { + DBGC ( netdev, "VLAN %s could not transmit: %s\n", + netdev->name, strerror ( rc ) ); + /* Cannot return an error status, since that would + * cause the I/O buffer to be double-freed. + */ + return 0; + } + + return 0; +} + +/** + * Poll VLAN device + * + * @v netdev Network device + */ +static void vlan_poll ( struct net_device *netdev ) { + struct vlan_device *vlan = netdev->priv; + + /* Poll trunk device */ + netdev_poll ( vlan->trunk ); +} + +/** + * Enable/disable interrupts on VLAN device + * + * @v netdev Network device + * @v enable Interrupts should be enabled + */ +static void vlan_irq ( struct net_device *netdev, int enable ) { + struct vlan_device *vlan = netdev->priv; + + /* Enable/disable interrupts on trunk device. This is not at + * all robust, but there is no sensible course of action + * available. + */ + netdev_irq ( vlan->trunk, enable ); +} + +/** VLAN device operations */ +static struct net_device_operations vlan_operations = { + .open = vlan_open, + .close = vlan_close, + .transmit = vlan_transmit, + .poll = vlan_poll, + .irq = vlan_irq, +}; + +/** + * Synchronise VLAN device + * + * @v netdev Network device + */ +static void vlan_sync ( struct net_device *netdev ) { + struct vlan_device *vlan = netdev->priv; + struct net_device *trunk = vlan->trunk; + + /* Synchronise link status */ + if ( netdev->link_rc != trunk->link_rc ) + netdev_link_err ( netdev, trunk->link_rc ); + + /* Synchronise open/closed status */ + if ( netdev_is_open ( trunk ) ) { + if ( ! netdev_is_open ( netdev ) ) + netdev_open ( netdev ); + } else { + if ( netdev_is_open ( netdev ) ) + netdev_close ( netdev ); + } +} + +/** + * Identify VLAN device + * + * @v trunk Trunk network device + * @v tag VLAN tag + * @ret netdev VLAN device, if any + */ +struct net_device * vlan_find ( struct net_device *trunk, unsigned int tag ) { + struct net_device *netdev; + struct vlan_device *vlan; + + for_each_netdev ( netdev ) { + if ( netdev->op != &vlan_operations ) + continue; + vlan = netdev->priv; + if ( ( vlan->trunk == trunk ) && ( vlan->tag == tag ) ) + return netdev; + } + return NULL; +} + +/** + * Process incoming VLAN packet + * + * @v iobuf I/O buffer + * @v trunk Trunk network device + * @v ll_dest Link-layer destination address + * @v ll_source Link-layer source address + * @v flags Packet flags + * @ret rc Return status code + */ +static int vlan_rx ( struct io_buffer *iobuf, struct net_device *trunk, + const void *ll_dest, const void *ll_source, + unsigned int flags __unused ) { + struct vlan_header *vlanhdr = iobuf->data; + struct net_device *netdev; + struct ll_protocol *ll_protocol; + uint8_t ll_dest_copy[ETH_ALEN]; + uint8_t ll_source_copy[ETH_ALEN]; + uint16_t tag; + int rc; + + /* Sanity check */ + if ( iob_len ( iobuf ) < sizeof ( *vlanhdr ) ) { + DBGC ( trunk, "VLAN %s received underlength packet (%zd " + "bytes)\n", trunk->name, iob_len ( iobuf ) ); + rc = -EINVAL; + goto err_sanity; + } + + /* Identify VLAN device */ + tag = VLAN_TAG ( ntohs ( vlanhdr->tci ) ); + netdev = vlan_find ( trunk, tag ); + if ( ! netdev ) { + DBGC2 ( trunk, "VLAN %s received packet for unknown VLAN " + "%d\n", trunk->name, tag ); + rc = -EPIPE; + goto err_no_vlan; + } + + /* Strip VLAN header and preserve original link-layer header fields */ + iob_pull ( iobuf, sizeof ( *vlanhdr ) ); + ll_protocol = trunk->ll_protocol; + memcpy ( ll_dest_copy, ll_dest, ETH_ALEN ); + memcpy ( ll_source_copy, ll_source, ETH_ALEN ); + + /* Reconstruct link-layer header for VLAN device */ + ll_protocol = netdev->ll_protocol; + if ( ( rc = ll_protocol->push ( netdev, iobuf, ll_dest_copy, + ll_source_copy, + vlanhdr->net_proto ) ) != 0 ) { + DBGC ( netdev, "VLAN %s could not reconstruct link-layer " + "header: %s\n", netdev->name, strerror ( rc ) ); + goto err_ll_push; + } + + /* Enqueue packet on VLAN device */ + netdev_rx ( netdev, iob_disown ( iobuf ) ); + return 0; + + err_ll_push: + err_no_vlan: + err_sanity: + free_iob ( iobuf ); + return rc; +} + +/** VLAN protocol */ +struct net_protocol vlan_protocol __net_protocol = { + .name = "VLAN", + .net_proto = htons ( ETH_P_8021Q ), + .rx = vlan_rx, +}; + +/** + * Get the VLAN tag + * + * @v netdev Network device + * @ret tag VLAN tag, or 0 if device is not a VLAN device + */ +unsigned int vlan_tag ( struct net_device *netdev ) { + struct vlan_device *vlan; + + if ( netdev->op == &vlan_operations ) { + vlan = netdev->priv; + return vlan->tag; + } else { + return 0; + } +} + +/** + * Check if network device can be used as a VLAN trunk device + * + * @v trunk Trunk network device + * @ret is_ok Trunk network device is usable + * + * VLAN devices will be created as Ethernet devices. (We cannot + * simply clone the link layer of the trunk network device, because + * this link layer may expect the network device structure to contain + * some link-layer-private data.) The trunk network device must + * therefore have a link layer that is in some sense 'compatible' with + * Ethernet; specifically, it must have link-layer addresses that are + * the same length as Ethernet link-layer addresses. + * + * As an additional check, and primarily to assist with the sanity of + * the FCoE code, we refuse to allow nested VLANs. + */ +int vlan_can_be_trunk ( struct net_device *trunk ) { + + return ( ( trunk->ll_protocol->ll_addr_len == ETH_ALEN ) && + ( trunk->op != &vlan_operations ) ); +} + +/** + * Create VLAN device + * + * @v trunk Trunk network device + * @v tag VLAN tag + * @v priority Default VLAN priority + * @ret rc Return status code + */ +int vlan_create ( struct net_device *trunk, unsigned int tag, + unsigned int priority ) { + struct net_device *netdev; + struct vlan_device *vlan; + int rc; + + /* If VLAN already exists, just update the priority */ + if ( ( netdev = vlan_find ( trunk, tag ) ) != NULL ) { + vlan = netdev->priv; + if ( priority != vlan->priority ) { + DBGC ( netdev, "VLAN %s priority changed from %d to " + "%d\n", netdev->name, vlan->priority, priority ); + } + vlan->priority = priority; + return 0; + } + + /* Sanity checks */ + if ( ! vlan_can_be_trunk ( trunk ) ) { + DBGC ( trunk, "VLAN %s cannot create VLAN on non-trunk " + "device\n", trunk->name ); + rc = -ENOTTY; + goto err_sanity; + } + if ( ! VLAN_TAG_IS_VALID ( tag ) ) { + DBGC ( trunk, "VLAN %s cannot create VLAN with invalid tag " + "%d\n", trunk->name, tag ); + rc = -EINVAL; + goto err_sanity; + } + if ( ! VLAN_PRIORITY_IS_VALID ( priority ) ) { + DBGC ( trunk, "VLAN %s cannot create VLAN with invalid " + "priority %d\n", trunk->name, priority ); + rc = -EINVAL; + goto err_sanity; + } + + /* Allocate and initialise structure */ + netdev = alloc_etherdev ( sizeof ( *vlan ) ); + if ( ! netdev ) { + rc = -ENOMEM; + goto err_alloc_etherdev; + } + netdev_init ( netdev, &vlan_operations ); + netdev->dev = trunk->dev; + memcpy ( netdev->hw_addr, trunk->ll_addr, ETH_ALEN ); + vlan = netdev->priv; + vlan->trunk = netdev_get ( trunk ); + vlan->tag = tag; + vlan->priority = priority; + + /* Construct VLAN device name */ + snprintf ( netdev->name, sizeof ( netdev->name ), "%s-%d", + trunk->name, vlan->tag ); + + /* Register VLAN device */ + if ( ( rc = register_netdev ( netdev ) ) != 0 ) { + DBGC ( netdev, "VLAN %s could not register: %s\n", + netdev->name, strerror ( rc ) ); + goto err_register; + } + + /* Synchronise with trunk device */ + vlan_sync ( netdev ); + + DBGC ( netdev, "VLAN %s created with tag %d and priority %d\n", + netdev->name, vlan->tag, vlan->priority ); + + return 0; + + unregister_netdev ( netdev ); + err_register: + netdev_nullify ( netdev ); + netdev_put ( netdev ); + netdev_put ( trunk ); + err_alloc_etherdev: + err_sanity: + return rc; +} + +/** + * Destroy VLAN device + * + * @v netdev Network device + * @ret rc Return status code + */ +int vlan_destroy ( struct net_device *netdev ) { + struct vlan_device *vlan = netdev->priv; + struct net_device *trunk; + + /* Sanity check */ + if ( netdev->op != &vlan_operations ) { + DBGC ( netdev, "VLAN %s cannot destroy non-VLAN device\n", + netdev->name ); + return -ENOTTY; + } + + DBGC ( netdev, "VLAN %s destroyed\n", netdev->name ); + + /* Remove VLAN device */ + unregister_netdev ( netdev ); + trunk = vlan->trunk; + netdev_nullify ( netdev ); + netdev_put ( netdev ); + netdev_put ( trunk ); + + return 0; +} + +/** + * Handle trunk network device link state change + * + * @v trunk Trunk network device + */ +static void vlan_notify ( struct net_device *trunk ) { + struct net_device *netdev; + struct vlan_device *vlan; + + for_each_netdev ( netdev ) { + if ( netdev->op != &vlan_operations ) + continue; + vlan = netdev->priv; + if ( vlan->trunk == trunk ) + vlan_sync ( netdev ); + } +} + +/** + * Destroy first VLAN device for a given trunk + * + * @v trunk Trunk network device + * @ret found A VLAN device was found + */ +static int vlan_remove_first ( struct net_device *trunk ) { + struct net_device *netdev; + struct vlan_device *vlan; + + for_each_netdev ( netdev ) { + if ( netdev->op != &vlan_operations ) + continue; + vlan = netdev->priv; + if ( vlan->trunk == trunk ) { + vlan_destroy ( netdev ); + return 1; + } + } + return 0; +} + +/** + * Destroy all VLAN devices for a given trunk + * + * @v trunk Trunk network device + */ +static void vlan_remove ( struct net_device *trunk ) { + + /* Remove all VLAN devices attached to this trunk, safe + * against arbitrary net device removal. + */ + while ( vlan_remove_first ( trunk ) ) {} +} + +/** VLAN driver */ +struct net_driver vlan_driver __net_driver = { + .name = "VLAN", + .notify = vlan_notify, + .remove = vlan_remove, +}; |