summaryrefslogtreecommitdiffstats
path: root/qemu/roms/ipxe/src/net
diff options
context:
space:
mode:
authorYang Zhang <yang.z.zhang@intel.com>2015-08-28 09:58:54 +0800
committerYang Zhang <yang.z.zhang@intel.com>2015-09-01 12:44:00 +0800
commite44e3482bdb4d0ebde2d8b41830ac2cdb07948fb (patch)
tree66b09f592c55df2878107a468a91d21506104d3f /qemu/roms/ipxe/src/net
parent9ca8dbcc65cfc63d6f5ef3312a33184e1d726e00 (diff)
Add qemu 2.4.0
Change-Id: Ic99cbad4b61f8b127b7dc74d04576c0bcbaaf4f5 Signed-off-by: Yang Zhang <yang.z.zhang@intel.com>
Diffstat (limited to 'qemu/roms/ipxe/src/net')
-rw-r--r--qemu/roms/ipxe/src/net/80211/net80211.c2828
-rw-r--r--qemu/roms/ipxe/src/net/80211/rc80211.c372
-rw-r--r--qemu/roms/ipxe/src/net/80211/sec80211.c518
-rw-r--r--qemu/roms/ipxe/src/net/80211/wep.c304
-rw-r--r--qemu/roms/ipxe/src/net/80211/wpa.c915
-rw-r--r--qemu/roms/ipxe/src/net/80211/wpa_ccmp.c530
-rw-r--r--qemu/roms/ipxe/src/net/80211/wpa_psk.c127
-rw-r--r--qemu/roms/ipxe/src/net/80211/wpa_tkip.c588
-rw-r--r--qemu/roms/ipxe/src/net/aoe.c1057
-rw-r--r--qemu/roms/ipxe/src/net/arp.c220
-rw-r--r--qemu/roms/ipxe/src/net/dhcpopts.c461
-rw-r--r--qemu/roms/ipxe/src/net/dhcppkt.c305
-rw-r--r--qemu/roms/ipxe/src/net/eapol.c88
-rw-r--r--qemu/roms/ipxe/src/net/eth_slow.c274
-rw-r--r--qemu/roms/ipxe/src/net/ethernet.c239
-rw-r--r--qemu/roms/ipxe/src/net/fakedhcp.c215
-rw-r--r--qemu/roms/ipxe/src/net/fc.c1937
-rw-r--r--qemu/roms/ipxe/src/net/fcels.c1339
-rw-r--r--qemu/roms/ipxe/src/net/fcns.c241
-rw-r--r--qemu/roms/ipxe/src/net/fcoe.c1229
-rw-r--r--qemu/roms/ipxe/src/net/fcp.c1092
-rw-r--r--qemu/roms/ipxe/src/net/fragment.c180
-rw-r--r--qemu/roms/ipxe/src/net/icmp.c226
-rw-r--r--qemu/roms/ipxe/src/net/icmpv4.c109
-rw-r--r--qemu/roms/ipxe/src/net/icmpv6.c179
-rw-r--r--qemu/roms/ipxe/src/net/infiniband.c999
-rw-r--r--qemu/roms/ipxe/src/net/infiniband/ib_cm.c496
-rw-r--r--qemu/roms/ipxe/src/net/infiniband/ib_cmrc.c445
-rw-r--r--qemu/roms/ipxe/src/net/infiniband/ib_mcast.c213
-rw-r--r--qemu/roms/ipxe/src/net/infiniband/ib_mi.c415
-rw-r--r--qemu/roms/ipxe/src/net/infiniband/ib_packet.c249
-rw-r--r--qemu/roms/ipxe/src/net/infiniband/ib_pathrec.c289
-rw-r--r--qemu/roms/ipxe/src/net/infiniband/ib_sma.c371
-rw-r--r--qemu/roms/ipxe/src/net/infiniband/ib_smc.c256
-rw-r--r--qemu/roms/ipxe/src/net/infiniband/ib_srp.c581
-rw-r--r--qemu/roms/ipxe/src/net/iobpad.c69
-rw-r--r--qemu/roms/ipxe/src/net/ipv4.c789
-rw-r--r--qemu/roms/ipxe/src/net/ipv6.c1111
-rw-r--r--qemu/roms/ipxe/src/net/ndp.c1010
-rw-r--r--qemu/roms/ipxe/src/net/neighbour.c428
-rw-r--r--qemu/roms/ipxe/src/net/netdev_settings.c348
-rw-r--r--qemu/roms/ipxe/src/net/netdevice.c1220
-rw-r--r--qemu/roms/ipxe/src/net/nullnet.c61
-rw-r--r--qemu/roms/ipxe/src/net/oncrpc/mount.c119
-rw-r--r--qemu/roms/ipxe/src/net/oncrpc/nfs.c288
-rw-r--r--qemu/roms/ipxe/src/net/oncrpc/nfs_open.c683
-rw-r--r--qemu/roms/ipxe/src/net/oncrpc/nfs_uri.c148
-rw-r--r--qemu/roms/ipxe/src/net/oncrpc/oncrpc_iob.c200
-rw-r--r--qemu/roms/ipxe/src/net/oncrpc/portmap.c90
-rw-r--r--qemu/roms/ipxe/src/net/ping.c273
-rw-r--r--qemu/roms/ipxe/src/net/rarp.c75
-rw-r--r--qemu/roms/ipxe/src/net/retry.c212
-rw-r--r--qemu/roms/ipxe/src/net/socket.c65
-rw-r--r--qemu/roms/ipxe/src/net/tcp.c1494
-rw-r--r--qemu/roms/ipxe/src/net/tcp/ftp.c546
-rw-r--r--qemu/roms/ipxe/src/net/tcp/http.c51
-rw-r--r--qemu/roms/ipxe/src/net/tcp/httpcore.c1574
-rw-r--r--qemu/roms/ipxe/src/net/tcp/https.c52
-rw-r--r--qemu/roms/ipxe/src/net/tcp/iscsi.c2126
-rw-r--r--qemu/roms/ipxe/src/net/tcp/oncrpc.c250
-rw-r--r--qemu/roms/ipxe/src/net/tcp/syslogs.c269
-rw-r--r--qemu/roms/ipxe/src/net/tcpip.c250
-rw-r--r--qemu/roms/ipxe/src/net/tls.c2639
-rw-r--r--qemu/roms/ipxe/src/net/udp.c440
-rw-r--r--qemu/roms/ipxe/src/net/udp/dhcp.c1446
-rw-r--r--qemu/roms/ipxe/src/net/udp/dhcpv6.c989
-rw-r--r--qemu/roms/ipxe/src/net/udp/dns.c1152
-rw-r--r--qemu/roms/ipxe/src/net/udp/slam.c757
-rw-r--r--qemu/roms/ipxe/src/net/udp/syslog.c298
-rw-r--r--qemu/roms/ipxe/src/net/udp/tftp.c1236
-rw-r--r--qemu/roms/ipxe/src/net/validator.c568
-rw-r--r--qemu/roms/ipxe/src/net/vlan.c500
72 files changed, 43713 insertions, 0 deletions
diff --git a/qemu/roms/ipxe/src/net/80211/net80211.c b/qemu/roms/ipxe/src/net/80211/net80211.c
new file mode 100644
index 000000000..434944523
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/80211/net80211.c
@@ -0,0 +1,2828 @@
+/*
+ * The iPXE 802.11 MAC layer.
+ *
+ * Copyright (c) 2009 Joshua Oreman <oremanj@rwcr.net>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <string.h>
+#include <byteswap.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <ipxe/settings.h>
+#include <ipxe/if_arp.h>
+#include <ipxe/ethernet.h>
+#include <ipxe/ieee80211.h>
+#include <ipxe/netdevice.h>
+#include <ipxe/net80211.h>
+#include <ipxe/sec80211.h>
+#include <ipxe/timer.h>
+#include <ipxe/nap.h>
+#include <ipxe/errortab.h>
+#include <ipxe/net80211_err.h>
+
+/** @file
+ *
+ * 802.11 device management
+ */
+
+/** List of 802.11 devices */
+static struct list_head net80211_devices = LIST_HEAD_INIT ( net80211_devices );
+
+/** Set of device operations that does nothing */
+static struct net80211_device_operations net80211_null_ops;
+
+/** Information associated with a received management packet
+ *
+ * This is used to keep beacon signal strengths in a parallel queue to
+ * the beacons themselves.
+ */
+struct net80211_rx_info {
+ int signal;
+ struct list_head list;
+};
+
+/** Context for a probe operation */
+struct net80211_probe_ctx {
+ /** 802.11 device to probe on */
+ struct net80211_device *dev;
+
+ /** Value of keep_mgmt before probe was started */
+ int old_keep_mgmt;
+
+ /** If scanning actively, pointer to probe packet to send */
+ struct io_buffer *probe;
+
+ /** If non-"", the ESSID to limit ourselves to */
+ const char *essid;
+
+ /** Time probe was started */
+ u32 ticks_start;
+
+ /** Time last useful beacon was received */
+ u32 ticks_beacon;
+
+ /** Time channel was last changed */
+ u32 ticks_channel;
+
+ /** Time to stay on each channel */
+ u32 hop_time;
+
+ /** Channels to hop by when changing channel */
+ int hop_step;
+
+ /** List of best beacons for each network found so far */
+ struct list_head *beacons;
+};
+
+/** Context for the association task */
+struct net80211_assoc_ctx {
+ /** Next authentication method to try using */
+ int method;
+
+ /** Time (in ticks) of the last sent association-related packet */
+ int last_packet;
+
+ /** Number of times we have tried sending it */
+ int times_tried;
+};
+
+/**
+ * Detect secure 802.11 network when security support is not available
+ *
+ * @return -ENOTSUP, always.
+ */
+__weak int sec80211_detect ( struct io_buffer *iob __unused,
+ enum net80211_security_proto *secprot __unused,
+ enum net80211_crypto_alg *crypt __unused ) {
+ return -ENOTSUP;
+}
+
+/**
+ * @defgroup net80211_netdev Network device interface functions
+ * @{
+ */
+static int net80211_netdev_open ( struct net_device *netdev );
+static void net80211_netdev_close ( struct net_device *netdev );
+static int net80211_netdev_transmit ( struct net_device *netdev,
+ struct io_buffer *iobuf );
+static void net80211_netdev_poll ( struct net_device *netdev );
+static void net80211_netdev_irq ( struct net_device *netdev, int enable );
+/** @} */
+
+/**
+ * @defgroup net80211_linklayer 802.11 link-layer protocol functions
+ * @{
+ */
+static int net80211_ll_push ( struct net_device *netdev,
+ struct io_buffer *iobuf, const void *ll_dest,
+ const void *ll_source, uint16_t net_proto );
+static int net80211_ll_pull ( struct net_device *netdev,
+ struct io_buffer *iobuf, const void **ll_dest,
+ const void **ll_source, uint16_t * net_proto,
+ unsigned int *flags );
+/** @} */
+
+/**
+ * @defgroup net80211_help 802.11 helper functions
+ * @{
+ */
+static void net80211_add_channels ( struct net80211_device *dev, int start,
+ int len, int txpower );
+static void net80211_filter_hw_channels ( struct net80211_device *dev );
+static void net80211_set_rtscts_rate ( struct net80211_device *dev );
+static int net80211_process_capab ( struct net80211_device *dev,
+ u16 capab );
+static int net80211_process_ie ( struct net80211_device *dev,
+ union ieee80211_ie *ie, void *ie_end );
+static union ieee80211_ie *
+net80211_marshal_request_info ( struct net80211_device *dev,
+ union ieee80211_ie *ie );
+/** @} */
+
+/**
+ * @defgroup net80211_assoc_ll 802.11 association handling functions
+ * @{
+ */
+static void net80211_step_associate ( struct net80211_device *dev );
+static void net80211_handle_auth ( struct net80211_device *dev,
+ struct io_buffer *iob );
+static void net80211_handle_assoc_reply ( struct net80211_device *dev,
+ struct io_buffer *iob );
+static int net80211_send_disassoc ( struct net80211_device *dev, int reason,
+ int deauth );
+static void net80211_handle_mgmt ( struct net80211_device *dev,
+ struct io_buffer *iob, int signal );
+/** @} */
+
+/**
+ * @defgroup net80211_frag 802.11 fragment handling functions
+ * @{
+ */
+static void net80211_free_frags ( struct net80211_device *dev, int fcid );
+static struct io_buffer *net80211_accum_frags ( struct net80211_device *dev,
+ int fcid, int nfrags, int size );
+static void net80211_rx_frag ( struct net80211_device *dev,
+ struct io_buffer *iob, int signal );
+/** @} */
+
+/**
+ * @defgroup net80211_settings 802.11 settings handlers
+ * @{
+ */
+static int net80211_check_settings_update ( void );
+
+/** 802.11 settings applicator
+ *
+ * When the SSID is changed, this will cause any open devices to
+ * re-associate; when the encryption key is changed, we similarly
+ * update their state.
+ */
+struct settings_applicator net80211_applicator __settings_applicator = {
+ .apply = net80211_check_settings_update,
+};
+
+/** The network name to associate with
+ *
+ * If this is blank, we scan for all networks and use the one with the
+ * greatest signal strength.
+ */
+const struct setting net80211_ssid_setting __setting ( SETTING_NETDEV_EXTRA,
+ ssid ) = {
+ .name = "ssid",
+ .description = "Wireless SSID",
+ .type = &setting_type_string,
+};
+
+/** Whether to use active scanning
+ *
+ * In order to associate with a hidden SSID, it's necessary to use an
+ * active scan (send probe packets). If this setting is nonzero, an
+ * active scan on the 2.4GHz band will be used to associate.
+ */
+const struct setting net80211_active_setting __setting ( SETTING_NETDEV_EXTRA,
+ active-scan ) = {
+ .name = "active-scan",
+ .description = "Actively scan for wireless networks",
+ .type = &setting_type_int8,
+};
+
+/** The cryptographic key to use
+ *
+ * For hex WEP keys, as is common, this must be entered using the
+ * normal iPXE method for entering hex settings; an ASCII string of
+ * hex characters will not behave as expected.
+ */
+const struct setting net80211_key_setting __setting ( SETTING_NETDEV_EXTRA,
+ key ) = {
+ .name = "key",
+ .description = "Wireless encryption key",
+ .type = &setting_type_string,
+};
+
+/** @} */
+
+
+/* ---------- net_device wrapper ---------- */
+
+/**
+ * Open 802.11 device and start association
+ *
+ * @v netdev Wrapping network device
+ * @ret rc Return status code
+ *
+ * This sets up a default conservative set of channels for probing,
+ * and starts the auto-association task unless the @c
+ * NET80211_NO_ASSOC flag is set in the wrapped 802.11 device's @c
+ * state field.
+ */
+static int net80211_netdev_open ( struct net_device *netdev )
+{
+ struct net80211_device *dev = netdev->priv;
+ int rc = 0;
+
+ if ( dev->op == &net80211_null_ops )
+ return -EFAULT;
+
+ if ( dev->op->open )
+ rc = dev->op->open ( dev );
+
+ if ( rc < 0 )
+ return rc;
+
+ if ( ! ( dev->state & NET80211_NO_ASSOC ) )
+ net80211_autoassociate ( dev );
+
+ return 0;
+}
+
+/**
+ * Close 802.11 device
+ *
+ * @v netdev Wrapping network device.
+ *
+ * If the association task is running, this will stop it.
+ */
+static void net80211_netdev_close ( struct net_device *netdev )
+{
+ struct net80211_device *dev = netdev->priv;
+
+ if ( dev->state & NET80211_WORKING )
+ process_del ( &dev->proc_assoc );
+
+ /* Send disassociation frame to AP, to be polite */
+ if ( dev->state & NET80211_ASSOCIATED )
+ net80211_send_disassoc ( dev, IEEE80211_REASON_LEAVING, 0 );
+
+ if ( dev->handshaker && dev->handshaker->stop &&
+ dev->handshaker->started )
+ dev->handshaker->stop ( dev );
+
+ free ( dev->crypto );
+ free ( dev->handshaker );
+ dev->crypto = NULL;
+ dev->handshaker = NULL;
+
+ netdev_link_down ( netdev );
+ dev->state = 0;
+
+ if ( dev->op->close )
+ dev->op->close ( dev );
+}
+
+/**
+ * Transmit packet on 802.11 device
+ *
+ * @v netdev Wrapping network device
+ * @v iobuf I/O buffer
+ * @ret rc Return status code
+ *
+ * If encryption is enabled for the currently associated network, the
+ * packet will be encrypted prior to transmission.
+ */
+static int net80211_netdev_transmit ( struct net_device *netdev,
+ struct io_buffer *iobuf )
+{
+ struct net80211_device *dev = netdev->priv;
+ struct ieee80211_frame *hdr = iobuf->data;
+ int rc = -ENOSYS;
+
+ if ( dev->crypto && ! ( hdr->fc & IEEE80211_FC_PROTECTED ) &&
+ ( ( hdr->fc & IEEE80211_FC_TYPE ) == IEEE80211_TYPE_DATA ) ) {
+ struct io_buffer *niob = dev->crypto->encrypt ( dev->crypto,
+ iobuf );
+ if ( ! niob )
+ return -ENOMEM; /* only reason encryption could fail */
+
+ /* Free the non-encrypted iob */
+ netdev_tx_complete ( netdev, iobuf );
+
+ /* Transmit the encrypted iob; the Protected flag is
+ set, so we won't recurse into here again */
+ netdev_tx ( netdev, niob );
+
+ /* Don't transmit the freed packet */
+ return 0;
+ }
+
+ if ( dev->op->transmit )
+ rc = dev->op->transmit ( dev, iobuf );
+
+ return rc;
+}
+
+/**
+ * Poll 802.11 device for received packets and completed transmissions
+ *
+ * @v netdev Wrapping network device
+ */
+static void net80211_netdev_poll ( struct net_device *netdev )
+{
+ struct net80211_device *dev = netdev->priv;
+
+ if ( dev->op->poll )
+ dev->op->poll ( dev );
+}
+
+/**
+ * Enable or disable interrupts for 802.11 device
+ *
+ * @v netdev Wrapping network device
+ * @v enable Whether to enable interrupts
+ */
+static void net80211_netdev_irq ( struct net_device *netdev, int enable )
+{
+ struct net80211_device *dev = netdev->priv;
+
+ if ( dev->op->irq )
+ dev->op->irq ( dev, enable );
+}
+
+/** Network device operations for a wrapped 802.11 device */
+static struct net_device_operations net80211_netdev_ops = {
+ .open = net80211_netdev_open,
+ .close = net80211_netdev_close,
+ .transmit = net80211_netdev_transmit,
+ .poll = net80211_netdev_poll,
+ .irq = net80211_netdev_irq,
+};
+
+
+/* ---------- 802.11 link-layer protocol ---------- */
+
+/**
+ * Determine whether a transmission rate uses ERP/OFDM
+ *
+ * @v rate Rate in 100 kbps units
+ * @ret is_erp TRUE if the rate is an ERP/OFDM rate
+ *
+ * 802.11b supports rates of 1.0, 2.0, 5.5, and 11.0 Mbps; any other
+ * rate than these on the 2.4GHz spectrum is an ERP (802.11g) rate.
+ */
+static inline int net80211_rate_is_erp ( u16 rate )
+{
+ if ( rate == 10 || rate == 20 || rate == 55 || rate == 110 )
+ return 0;
+ return 1;
+}
+
+
+/**
+ * Calculate one frame's contribution to 802.11 duration field
+ *
+ * @v dev 802.11 device
+ * @v bytes Amount of data to calculate duration for
+ * @ret dur Duration field in microseconds
+ *
+ * To avoid multiple stations attempting to transmit at once, 802.11
+ * provides that every packet shall include a duration field
+ * specifying a length of time for which the wireless medium will be
+ * reserved after it is transmitted. The duration is measured in
+ * microseconds and is calculated with respect to the current
+ * physical-layer parameters of the 802.11 device.
+ *
+ * For an unfragmented data or management frame, or the last fragment
+ * of a fragmented frame, the duration captures only the 10 data bytes
+ * of one ACK; call once with bytes = 10.
+ *
+ * For a fragment of a data or management rame that will be followed
+ * by more fragments, the duration captures an ACK, the following
+ * fragment, and its ACK; add the results of three calls, two with
+ * bytes = 10 and one with bytes set to the next fragment's size.
+ *
+ * For an RTS control frame, the duration captures the responding CTS,
+ * the frame being sent, and its ACK; add the results of three calls,
+ * two with bytes = 10 and one with bytes set to the next frame's size
+ * (assuming unfragmented).
+ *
+ * For a CTS-to-self control frame, the duration captures the frame
+ * being protected and its ACK; add the results of two calls, one with
+ * bytes = 10 and one with bytes set to the next frame's size.
+ *
+ * No other frame types are currently supported by iPXE.
+ */
+u16 net80211_duration ( struct net80211_device *dev, int bytes, u16 rate )
+{
+ struct net80211_channel *chan = &dev->channels[dev->channel];
+ u32 kbps = rate * 100;
+
+ if ( chan->band == NET80211_BAND_5GHZ || net80211_rate_is_erp ( rate ) ) {
+ /* OFDM encoding (802.11a/g) */
+ int bits_per_symbol = ( kbps * 4 ) / 1000; /* 4us/symbol */
+ int bits = 22 + ( bytes << 3 ); /* 22-bit PLCP */
+ int symbols = ( bits + bits_per_symbol - 1 ) / bits_per_symbol;
+
+ return 16 + 20 + ( symbols * 4 ); /* 16us SIFS, 20us preamble */
+ } else {
+ /* CCK encoding (802.11b) */
+ int phy_time = 144 + 48; /* preamble + PLCP */
+ int bits = bytes << 3;
+ int data_time = ( bits * 1000 + kbps - 1 ) / kbps;
+
+ if ( dev->phy_flags & NET80211_PHY_USE_SHORT_PREAMBLE )
+ phy_time >>= 1;
+
+ return 10 + phy_time + data_time; /* 10us SIFS */
+ }
+}
+
+/**
+ * Add 802.11 link-layer header
+ *
+ * @v netdev Wrapping network device
+ * @v iobuf I/O buffer
+ * @v ll_dest Link-layer destination address
+ * @v ll_source Link-layer source address
+ * @v net_proto Network-layer protocol, in network byte order
+ * @ret rc Return status code
+ *
+ * This adds both the 802.11 frame header and the 802.2 LLC/SNAP
+ * header used on data packets.
+ *
+ * We also check here for state of the link that would make it invalid
+ * to send a data packet; every data packet must pass through here,
+ * and no non-data packet (e.g. management frame) should.
+ */
+static int net80211_ll_push ( struct net_device *netdev,
+ struct io_buffer *iobuf, const void *ll_dest,
+ const void *ll_source, uint16_t net_proto )
+{
+ struct net80211_device *dev = netdev->priv;
+ struct ieee80211_frame *hdr = iob_push ( iobuf,
+ IEEE80211_LLC_HEADER_LEN +
+ IEEE80211_TYP_FRAME_HEADER_LEN );
+ struct ieee80211_llc_snap_header *lhdr =
+ ( void * ) hdr + IEEE80211_TYP_FRAME_HEADER_LEN;
+
+ /* We can't send data packets if we're not associated. */
+ if ( ! ( dev->state & NET80211_ASSOCIATED ) ) {
+ if ( dev->assoc_rc )
+ return dev->assoc_rc;
+ return -ENETUNREACH;
+ }
+
+ hdr->fc = IEEE80211_THIS_VERSION | IEEE80211_TYPE_DATA |
+ IEEE80211_STYPE_DATA | IEEE80211_FC_TODS;
+
+ /* We don't send fragmented frames, so duration is the time
+ for an SIFS + 10-byte ACK. */
+ hdr->duration = net80211_duration ( dev, 10, dev->rates[dev->rate] );
+
+ memcpy ( hdr->addr1, dev->bssid, ETH_ALEN );
+ memcpy ( hdr->addr2, ll_source, ETH_ALEN );
+ memcpy ( hdr->addr3, ll_dest, ETH_ALEN );
+
+ hdr->seq = IEEE80211_MAKESEQ ( ++dev->last_tx_seqnr, 0 );
+
+ lhdr->dsap = IEEE80211_LLC_DSAP;
+ lhdr->ssap = IEEE80211_LLC_SSAP;
+ lhdr->ctrl = IEEE80211_LLC_CTRL;
+ memset ( lhdr->oui, 0x00, 3 );
+ lhdr->ethertype = net_proto;
+
+ return 0;
+}
+
+/**
+ * Remove 802.11 link-layer header
+ *
+ * @v netdev Wrapping network device
+ * @v iobuf I/O buffer
+ * @ret ll_dest Link-layer destination address
+ * @ret ll_source Link-layer source
+ * @ret net_proto Network-layer protocol, in network byte order
+ * @ret flags Packet flags
+ * @ret rc Return status code
+ *
+ * This expects and removes both the 802.11 frame header and the 802.2
+ * LLC/SNAP header that are used on data packets.
+ */
+static int net80211_ll_pull ( struct net_device *netdev __unused,
+ struct io_buffer *iobuf,
+ const void **ll_dest, const void **ll_source,
+ uint16_t * net_proto, unsigned int *flags )
+{
+ struct ieee80211_frame *hdr = iobuf->data;
+ struct ieee80211_llc_snap_header *lhdr =
+ ( void * ) hdr + IEEE80211_TYP_FRAME_HEADER_LEN;
+
+ /* Bunch of sanity checks */
+ if ( iob_len ( iobuf ) < IEEE80211_TYP_FRAME_HEADER_LEN +
+ IEEE80211_LLC_HEADER_LEN ) {
+ DBGC ( netdev->priv, "802.11 %p packet too short (%zd bytes)\n",
+ netdev->priv, iob_len ( iobuf ) );
+ return -EINVAL_PKT_TOO_SHORT;
+ }
+
+ if ( ( hdr->fc & IEEE80211_FC_VERSION ) != IEEE80211_THIS_VERSION ) {
+ DBGC ( netdev->priv, "802.11 %p packet invalid version %04x\n",
+ netdev->priv, hdr->fc & IEEE80211_FC_VERSION );
+ return -EINVAL_PKT_VERSION;
+ }
+
+ if ( ( hdr->fc & IEEE80211_FC_TYPE ) != IEEE80211_TYPE_DATA ||
+ ( hdr->fc & IEEE80211_FC_SUBTYPE ) != IEEE80211_STYPE_DATA ) {
+ DBGC ( netdev->priv, "802.11 %p packet not data/data (fc=%04x)\n",
+ netdev->priv, hdr->fc );
+ return -EINVAL_PKT_NOT_DATA;
+ }
+
+ if ( ( hdr->fc & ( IEEE80211_FC_TODS | IEEE80211_FC_FROMDS ) ) !=
+ IEEE80211_FC_FROMDS ) {
+ DBGC ( netdev->priv, "802.11 %p packet not from DS (fc=%04x)\n",
+ netdev->priv, hdr->fc );
+ return -EINVAL_PKT_NOT_FROMDS;
+ }
+
+ if ( lhdr->dsap != IEEE80211_LLC_DSAP || lhdr->ssap != IEEE80211_LLC_SSAP ||
+ lhdr->ctrl != IEEE80211_LLC_CTRL || lhdr->oui[0] || lhdr->oui[1] ||
+ lhdr->oui[2] ) {
+ DBGC ( netdev->priv, "802.11 %p LLC header is not plain EtherType "
+ "encapsulator: %02x->%02x [%02x] %02x:%02x:%02x %04x\n",
+ netdev->priv, lhdr->dsap, lhdr->ssap, lhdr->ctrl,
+ lhdr->oui[0], lhdr->oui[1], lhdr->oui[2], lhdr->ethertype );
+ return -EINVAL_PKT_LLC_HEADER;
+ }
+
+ iob_pull ( iobuf, sizeof ( *hdr ) + sizeof ( *lhdr ) );
+
+ *ll_dest = hdr->addr1;
+ *ll_source = hdr->addr3;
+ *net_proto = lhdr->ethertype;
+ *flags = ( ( is_multicast_ether_addr ( hdr->addr1 ) ?
+ LL_MULTICAST : 0 ) |
+ ( is_broadcast_ether_addr ( hdr->addr1 ) ?
+ LL_BROADCAST : 0 ) );
+ return 0;
+}
+
+/** 802.11 link-layer protocol */
+static struct ll_protocol net80211_ll_protocol __ll_protocol = {
+ .name = "802.11",
+ .push = net80211_ll_push,
+ .pull = net80211_ll_pull,
+ .init_addr = eth_init_addr,
+ .ntoa = eth_ntoa,
+ .mc_hash = eth_mc_hash,
+ .eth_addr = eth_eth_addr,
+ .eui64 = eth_eui64,
+ .ll_proto = htons ( ARPHRD_ETHER ), /* "encapsulated Ethernet" */
+ .hw_addr_len = ETH_ALEN,
+ .ll_addr_len = ETH_ALEN,
+ .ll_header_len = IEEE80211_TYP_FRAME_HEADER_LEN +
+ IEEE80211_LLC_HEADER_LEN,
+};
+
+
+/* ---------- 802.11 network management API ---------- */
+
+/**
+ * Get 802.11 device from wrapping network device
+ *
+ * @v netdev Wrapping network device
+ * @ret dev 802.11 device wrapped by network device, or NULL
+ *
+ * Returns NULL if the network device does not wrap an 802.11 device.
+ */
+struct net80211_device * net80211_get ( struct net_device *netdev )
+{
+ struct net80211_device *dev;
+
+ list_for_each_entry ( dev, &net80211_devices, list ) {
+ if ( netdev->priv == dev )
+ return netdev->priv;
+ }
+
+ return NULL;
+}
+
+/**
+ * Set state of 802.11 device keeping management frames
+ *
+ * @v dev 802.11 device
+ * @v enable Whether to keep management frames
+ * @ret oldenab Whether management frames were enabled before this call
+ *
+ * If enable is TRUE, beacon, probe, and action frames will be kept
+ * and may be retrieved by calling net80211_mgmt_dequeue().
+ */
+int net80211_keep_mgmt ( struct net80211_device *dev, int enable )
+{
+ int oldenab = dev->keep_mgmt;
+
+ dev->keep_mgmt = enable;
+ return oldenab;
+}
+
+/**
+ * Get 802.11 management frame
+ *
+ * @v dev 802.11 device
+ * @ret signal Signal strength of returned management frame
+ * @ret iob I/O buffer, or NULL if no management frame is queued
+ *
+ * Frames will only be returned by this function if
+ * net80211_keep_mgmt() has been previously called with enable set to
+ * TRUE.
+ *
+ * The calling function takes ownership of the returned I/O buffer.
+ */
+struct io_buffer * net80211_mgmt_dequeue ( struct net80211_device *dev,
+ int *signal )
+{
+ struct io_buffer *iobuf;
+ struct net80211_rx_info *rxi;
+
+ list_for_each_entry ( rxi, &dev->mgmt_info_queue, list ) {
+ list_del ( &rxi->list );
+ if ( signal )
+ *signal = rxi->signal;
+ free ( rxi );
+
+ assert ( ! list_empty ( &dev->mgmt_queue ) );
+ iobuf = list_first_entry ( &dev->mgmt_queue, struct io_buffer,
+ list );
+ list_del ( &iobuf->list );
+ return iobuf;
+ }
+
+ return NULL;
+}
+
+/**
+ * Transmit 802.11 management frame
+ *
+ * @v dev 802.11 device
+ * @v fc Frame Control flags for management frame
+ * @v dest Destination access point
+ * @v iob I/O buffer
+ * @ret rc Return status code
+ *
+ * The @a fc argument must contain at least an IEEE 802.11 management
+ * subtype number (e.g. IEEE80211_STYPE_PROBE_REQ). If it contains
+ * IEEE80211_FC_PROTECTED, the frame will be encrypted prior to
+ * transmission.
+ *
+ * It is required that @a iob have at least 24 bytes of headroom
+ * reserved before its data start.
+ */
+int net80211_tx_mgmt ( struct net80211_device *dev, u16 fc, u8 dest[6],
+ struct io_buffer *iob )
+{
+ struct ieee80211_frame *hdr = iob_push ( iob,
+ IEEE80211_TYP_FRAME_HEADER_LEN );
+
+ hdr->fc = IEEE80211_THIS_VERSION | IEEE80211_TYPE_MGMT |
+ ( fc & ~IEEE80211_FC_PROTECTED );
+ hdr->duration = net80211_duration ( dev, 10, dev->rates[dev->rate] );
+ hdr->seq = IEEE80211_MAKESEQ ( ++dev->last_tx_seqnr, 0 );
+
+ memcpy ( hdr->addr1, dest, ETH_ALEN ); /* DA = RA */
+ memcpy ( hdr->addr2, dev->netdev->ll_addr, ETH_ALEN ); /* SA = TA */
+ memcpy ( hdr->addr3, dest, ETH_ALEN ); /* BSSID */
+
+ if ( fc & IEEE80211_FC_PROTECTED ) {
+ if ( ! dev->crypto )
+ return -EINVAL_CRYPTO_REQUEST;
+
+ struct io_buffer *eiob = dev->crypto->encrypt ( dev->crypto,
+ iob );
+ free_iob ( iob );
+ iob = eiob;
+ }
+
+ return netdev_tx ( dev->netdev, iob );
+}
+
+
+/* ---------- Driver API ---------- */
+
+/** 802.11 association process descriptor */
+static struct process_descriptor net80211_process_desc =
+ PROC_DESC ( struct net80211_device, proc_assoc,
+ net80211_step_associate );
+
+/**
+ * Allocate 802.11 device
+ *
+ * @v priv_size Size of driver-private allocation area
+ * @ret dev Newly allocated 802.11 device
+ *
+ * This function allocates a net_device with space in its private area
+ * for both the net80211_device it will wrap and the driver-private
+ * data space requested. It initializes the link-layer-specific parts
+ * of the net_device, and links the net80211_device to the net_device
+ * appropriately.
+ */
+struct net80211_device * net80211_alloc ( size_t priv_size )
+{
+ struct net80211_device *dev;
+ struct net_device *netdev =
+ alloc_netdev ( sizeof ( *dev ) + priv_size );
+
+ if ( ! netdev )
+ return NULL;
+
+ netdev->ll_protocol = &net80211_ll_protocol;
+ netdev->ll_broadcast = eth_broadcast;
+ netdev->max_pkt_len = IEEE80211_MAX_DATA_LEN;
+ netdev_init ( netdev, &net80211_netdev_ops );
+
+ dev = netdev->priv;
+ dev->netdev = netdev;
+ dev->priv = ( u8 * ) dev + sizeof ( *dev );
+ dev->op = &net80211_null_ops;
+
+ process_init_stopped ( &dev->proc_assoc, &net80211_process_desc,
+ &netdev->refcnt );
+ INIT_LIST_HEAD ( &dev->mgmt_queue );
+ INIT_LIST_HEAD ( &dev->mgmt_info_queue );
+
+ return dev;
+}
+
+/**
+ * Register 802.11 device with network stack
+ *
+ * @v dev 802.11 device
+ * @v ops 802.11 device operations
+ * @v hw 802.11 hardware information
+ *
+ * This also registers the wrapping net_device with the higher network
+ * layers.
+ */
+int net80211_register ( struct net80211_device *dev,
+ struct net80211_device_operations *ops,
+ struct net80211_hw_info *hw )
+{
+ dev->op = ops;
+ dev->hw = malloc ( sizeof ( *hw ) );
+ if ( ! dev->hw )
+ return -ENOMEM;
+
+ memcpy ( dev->hw, hw, sizeof ( *hw ) );
+ memcpy ( dev->netdev->hw_addr, hw->hwaddr, ETH_ALEN );
+
+ /* Set some sensible channel defaults for driver's open() function */
+ memcpy ( dev->channels, dev->hw->channels,
+ NET80211_MAX_CHANNELS * sizeof ( dev->channels[0] ) );
+ dev->channel = 0;
+
+ list_add_tail ( &dev->list, &net80211_devices );
+ return register_netdev ( dev->netdev );
+}
+
+/**
+ * Unregister 802.11 device from network stack
+ *
+ * @v dev 802.11 device
+ *
+ * After this call, the device operations are cleared so that they
+ * will not be called.
+ */
+void net80211_unregister ( struct net80211_device *dev )
+{
+ unregister_netdev ( dev->netdev );
+ list_del ( &dev->list );
+ dev->op = &net80211_null_ops;
+}
+
+/**
+ * Free 802.11 device
+ *
+ * @v dev 802.11 device
+ *
+ * The device should be unregistered before this function is called.
+ */
+void net80211_free ( struct net80211_device *dev )
+{
+ free ( dev->hw );
+ rc80211_free ( dev->rctl );
+ netdev_nullify ( dev->netdev );
+ netdev_put ( dev->netdev );
+}
+
+
+/* ---------- 802.11 network management workhorse code ---------- */
+
+/**
+ * Set state of 802.11 device
+ *
+ * @v dev 802.11 device
+ * @v clear Bitmask of flags to clear
+ * @v set Bitmask of flags to set
+ * @v status Status or reason code for most recent operation
+ *
+ * If @a status represents a reason code, it should be OR'ed with
+ * NET80211_IS_REASON.
+ *
+ * Clearing authentication also clears association; clearing
+ * association also clears security handshaking state. Clearing
+ * association removes the link-up flag from the wrapping net_device,
+ * but setting it does not automatically set the flag; that is left to
+ * the judgment of higher-level code.
+ */
+static inline void net80211_set_state ( struct net80211_device *dev,
+ short clear, short set,
+ u16 status )
+{
+ /* The conditions in this function are deliberately formulated
+ to be decidable at compile-time in most cases. Since clear
+ and set are generally passed as constants, the body of this
+ function can be reduced down to a few statements by the
+ compiler. */
+
+ const int statmsk = NET80211_STATUS_MASK | NET80211_IS_REASON;
+
+ if ( clear & NET80211_PROBED )
+ clear |= NET80211_AUTHENTICATED;
+
+ if ( clear & NET80211_AUTHENTICATED )
+ clear |= NET80211_ASSOCIATED;
+
+ if ( clear & NET80211_ASSOCIATED )
+ clear |= NET80211_CRYPTO_SYNCED;
+
+ dev->state = ( dev->state & ~clear ) | set;
+ dev->state = ( dev->state & ~statmsk ) | ( status & statmsk );
+
+ if ( clear & NET80211_ASSOCIATED )
+ netdev_link_down ( dev->netdev );
+
+ if ( ( clear | set ) & NET80211_ASSOCIATED )
+ dev->op->config ( dev, NET80211_CFG_ASSOC );
+
+ if ( status != 0 ) {
+ if ( status & NET80211_IS_REASON )
+ dev->assoc_rc = -E80211_REASON ( status );
+ else
+ dev->assoc_rc = -E80211_STATUS ( status );
+ netdev_link_err ( dev->netdev, dev->assoc_rc );
+ }
+}
+
+/**
+ * Add channels to 802.11 device
+ *
+ * @v dev 802.11 device
+ * @v start First channel number to add
+ * @v len Number of channels to add
+ * @v txpower TX power (dBm) to allow on added channels
+ *
+ * To replace the current list of channels instead of adding to it,
+ * set the nr_channels field of the 802.11 device to 0 before calling
+ * this function.
+ */
+static void net80211_add_channels ( struct net80211_device *dev, int start,
+ int len, int txpower )
+{
+ int i, chan = start;
+
+ for ( i = dev->nr_channels; len-- && i < NET80211_MAX_CHANNELS; i++ ) {
+ dev->channels[i].channel_nr = chan;
+ dev->channels[i].maxpower = txpower;
+ dev->channels[i].hw_value = 0;
+
+ if ( chan >= 1 && chan <= 14 ) {
+ dev->channels[i].band = NET80211_BAND_2GHZ;
+ if ( chan == 14 )
+ dev->channels[i].center_freq = 2484;
+ else
+ dev->channels[i].center_freq = 2407 + 5 * chan;
+ chan++;
+ } else {
+ dev->channels[i].band = NET80211_BAND_5GHZ;
+ dev->channels[i].center_freq = 5000 + 5 * chan;
+ chan += 4;
+ }
+ }
+
+ dev->nr_channels = i;
+}
+
+/**
+ * Filter 802.11 device channels for hardware capabilities
+ *
+ * @v dev 802.11 device
+ *
+ * Hardware may support fewer channels than regulatory restrictions
+ * allow; this function filters out channels in dev->channels that are
+ * not supported by the hardware list in dev->hwinfo. It also copies
+ * over the net80211_channel::hw_value and limits maximum TX power
+ * appropriately.
+ *
+ * Channels are matched based on center frequency, ignoring band and
+ * channel number.
+ *
+ * If the driver specifies no supported channels, the effect will be
+ * as though all were supported.
+ */
+static void net80211_filter_hw_channels ( struct net80211_device *dev )
+{
+ int delta = 0, i = 0;
+ int old_freq = dev->channels[dev->channel].center_freq;
+ struct net80211_channel *chan, *hwchan;
+
+ if ( ! dev->hw->nr_channels )
+ return;
+
+ dev->channel = 0;
+ for ( chan = dev->channels; chan < dev->channels + dev->nr_channels;
+ chan++, i++ ) {
+ int ok = 0;
+ for ( hwchan = dev->hw->channels;
+ hwchan < dev->hw->channels + dev->hw->nr_channels;
+ hwchan++ ) {
+ if ( hwchan->center_freq == chan->center_freq ) {
+ ok = 1;
+ break;
+ }
+ }
+
+ if ( ! ok )
+ delta++;
+ else {
+ chan->hw_value = hwchan->hw_value;
+ if ( hwchan->maxpower != 0 &&
+ chan->maxpower > hwchan->maxpower )
+ chan->maxpower = hwchan->maxpower;
+ if ( old_freq == chan->center_freq )
+ dev->channel = i - delta;
+ if ( delta )
+ chan[-delta] = *chan;
+ }
+ }
+
+ dev->nr_channels -= delta;
+
+ if ( dev->channels[dev->channel].center_freq != old_freq )
+ dev->op->config ( dev, NET80211_CFG_CHANNEL );
+}
+
+/**
+ * Update 802.11 device state to reflect received capabilities field
+ *
+ * @v dev 802.11 device
+ * @v capab Capabilities field in beacon, probe, or association frame
+ * @ret rc Return status code
+ */
+static int net80211_process_capab ( struct net80211_device *dev,
+ u16 capab )
+{
+ u16 old_phy = dev->phy_flags;
+
+ if ( ( capab & ( IEEE80211_CAPAB_MANAGED | IEEE80211_CAPAB_ADHOC ) ) !=
+ IEEE80211_CAPAB_MANAGED ) {
+ DBGC ( dev, "802.11 %p cannot handle IBSS network\n", dev );
+ return -ENOSYS;
+ }
+
+ dev->phy_flags &= ~( NET80211_PHY_USE_SHORT_PREAMBLE |
+ NET80211_PHY_USE_SHORT_SLOT );
+
+ if ( capab & IEEE80211_CAPAB_SHORT_PMBL )
+ dev->phy_flags |= NET80211_PHY_USE_SHORT_PREAMBLE;
+
+ if ( capab & IEEE80211_CAPAB_SHORT_SLOT )
+ dev->phy_flags |= NET80211_PHY_USE_SHORT_SLOT;
+
+ if ( old_phy != dev->phy_flags )
+ dev->op->config ( dev, NET80211_CFG_PHY_PARAMS );
+
+ return 0;
+}
+
+/**
+ * Update 802.11 device state to reflect received information elements
+ *
+ * @v dev 802.11 device
+ * @v ie Pointer to first information element
+ * @v ie_end Pointer to tail of packet I/O buffer
+ * @ret rc Return status code
+ */
+static int net80211_process_ie ( struct net80211_device *dev,
+ union ieee80211_ie *ie, void *ie_end )
+{
+ u16 old_rate = dev->rates[dev->rate];
+ u16 old_phy = dev->phy_flags;
+ int have_rates = 0, i;
+ int ds_channel = 0;
+ int changed = 0;
+ int band = dev->channels[dev->channel].band;
+
+ if ( ! ieee80211_ie_bound ( ie, ie_end ) )
+ return 0;
+
+ for ( ; ie; ie = ieee80211_next_ie ( ie, ie_end ) ) {
+ switch ( ie->id ) {
+ case IEEE80211_IE_SSID:
+ if ( ie->len <= 32 ) {
+ memcpy ( dev->essid, ie->ssid, ie->len );
+ dev->essid[ie->len] = 0;
+ }
+ break;
+
+ case IEEE80211_IE_RATES:
+ case IEEE80211_IE_EXT_RATES:
+ if ( ! have_rates ) {
+ dev->nr_rates = 0;
+ dev->basic_rates = 0;
+ have_rates = 1;
+ }
+ for ( i = 0; i < ie->len &&
+ dev->nr_rates < NET80211_MAX_RATES; i++ ) {
+ u8 rid = ie->rates[i];
+ u16 rate = ( rid & 0x7f ) * 5;
+
+ if ( rid & 0x80 )
+ dev->basic_rates |=
+ ( 1 << dev->nr_rates );
+
+ dev->rates[dev->nr_rates++] = rate;
+ }
+
+ break;
+
+ case IEEE80211_IE_DS_PARAM:
+ if ( dev->channel < dev->nr_channels && ds_channel ==
+ dev->channels[dev->channel].channel_nr )
+ break;
+ ds_channel = ie->ds_param.current_channel;
+ net80211_change_channel ( dev, ds_channel );
+ break;
+
+ case IEEE80211_IE_COUNTRY:
+ dev->nr_channels = 0;
+
+ DBGC ( dev, "802.11 %p setting country regulations "
+ "for %c%c\n", dev, ie->country.name[0],
+ ie->country.name[1] );
+ for ( i = 0; i < ( ie->len - 3 ) / 3; i++ ) {
+ union ieee80211_ie_country_triplet *t =
+ &ie->country.triplet[i];
+ if ( t->first > 200 ) {
+ DBGC ( dev, "802.11 %p ignoring regulatory "
+ "extension information\n", dev );
+ } else {
+ net80211_add_channels ( dev,
+ t->band.first_channel,
+ t->band.nr_channels,
+ t->band.max_txpower );
+ }
+ }
+ net80211_filter_hw_channels ( dev );
+ break;
+
+ case IEEE80211_IE_ERP_INFO:
+ dev->phy_flags &= ~( NET80211_PHY_USE_PROTECTION |
+ NET80211_PHY_USE_SHORT_PREAMBLE );
+ if ( ie->erp_info & IEEE80211_ERP_USE_PROTECTION )
+ dev->phy_flags |= NET80211_PHY_USE_PROTECTION;
+ if ( ! ( ie->erp_info & IEEE80211_ERP_BARKER_LONG ) )
+ dev->phy_flags |= NET80211_PHY_USE_SHORT_PREAMBLE;
+ break;
+ }
+ }
+
+ if ( have_rates ) {
+ /* Allow only those rates that are also supported by
+ the hardware. */
+ int delta = 0, j;
+
+ dev->rate = 0;
+ for ( i = 0; i < dev->nr_rates; i++ ) {
+ int ok = 0;
+ for ( j = 0; j < dev->hw->nr_rates[band]; j++ ) {
+ if ( dev->hw->rates[band][j] == dev->rates[i] ){
+ ok = 1;
+ break;
+ }
+ }
+
+ if ( ! ok )
+ delta++;
+ else {
+ dev->rates[i - delta] = dev->rates[i];
+ if ( old_rate == dev->rates[i] )
+ dev->rate = i - delta;
+ }
+ }
+
+ dev->nr_rates -= delta;
+
+ /* Sort available rates - sorted subclumps tend to already
+ exist, so insertion sort works well. */
+ for ( i = 1; i < dev->nr_rates; i++ ) {
+ u16 rate = dev->rates[i];
+ u32 tmp, br, mask;
+
+ for ( j = i - 1; j >= 0 && dev->rates[j] >= rate; j-- )
+ dev->rates[j + 1] = dev->rates[j];
+ dev->rates[j + 1] = rate;
+
+ /* Adjust basic_rates to match by rotating the
+ bits from bit j+1 to bit i left one position. */
+ mask = ( ( 1 << i ) - 1 ) & ~( ( 1 << ( j + 1 ) ) - 1 );
+ br = dev->basic_rates;
+ tmp = br & ( 1 << i );
+ br = ( br & ~( mask | tmp ) ) | ( ( br & mask ) << 1 );
+ br |= ( tmp >> ( i - j - 1 ) );
+ dev->basic_rates = br;
+ }
+
+ net80211_set_rtscts_rate ( dev );
+
+ if ( dev->rates[dev->rate] != old_rate )
+ changed |= NET80211_CFG_RATE;
+ }
+
+ if ( dev->hw->flags & NET80211_HW_NO_SHORT_PREAMBLE )
+ dev->phy_flags &= ~NET80211_PHY_USE_SHORT_PREAMBLE;
+ if ( dev->hw->flags & NET80211_HW_NO_SHORT_SLOT )
+ dev->phy_flags &= ~NET80211_PHY_USE_SHORT_SLOT;
+
+ if ( old_phy != dev->phy_flags )
+ changed |= NET80211_CFG_PHY_PARAMS;
+
+ if ( changed )
+ dev->op->config ( dev, changed );
+
+ return 0;
+}
+
+/**
+ * Create information elements for outgoing probe or association packet
+ *
+ * @v dev 802.11 device
+ * @v ie Pointer to start of information element area
+ * @ret next_ie Pointer to first byte after added information elements
+ */
+static union ieee80211_ie *
+net80211_marshal_request_info ( struct net80211_device *dev,
+ union ieee80211_ie *ie )
+{
+ int i;
+
+ ie->id = IEEE80211_IE_SSID;
+ ie->len = strlen ( dev->essid );
+ memcpy ( ie->ssid, dev->essid, ie->len );
+
+ ie = ieee80211_next_ie ( ie, NULL );
+
+ ie->id = IEEE80211_IE_RATES;
+ ie->len = dev->nr_rates;
+ if ( ie->len > 8 )
+ ie->len = 8;
+
+ for ( i = 0; i < ie->len; i++ ) {
+ ie->rates[i] = dev->rates[i] / 5;
+ if ( dev->basic_rates & ( 1 << i ) )
+ ie->rates[i] |= 0x80;
+ }
+
+ ie = ieee80211_next_ie ( ie, NULL );
+
+ if ( dev->rsn_ie && dev->rsn_ie->id == IEEE80211_IE_RSN ) {
+ memcpy ( ie, dev->rsn_ie, dev->rsn_ie->len + 2 );
+ ie = ieee80211_next_ie ( ie, NULL );
+ }
+
+ if ( dev->nr_rates > 8 ) {
+ /* 802.11 requires we use an Extended Basic Rates IE
+ for the rates beyond the eighth. */
+
+ ie->id = IEEE80211_IE_EXT_RATES;
+ ie->len = dev->nr_rates - 8;
+
+ for ( ; i < dev->nr_rates; i++ ) {
+ ie->rates[i - 8] = dev->rates[i] / 5;
+ if ( dev->basic_rates & ( 1 << i ) )
+ ie->rates[i - 8] |= 0x80;
+ }
+
+ ie = ieee80211_next_ie ( ie, NULL );
+ }
+
+ if ( dev->rsn_ie && dev->rsn_ie->id == IEEE80211_IE_VENDOR ) {
+ memcpy ( ie, dev->rsn_ie, dev->rsn_ie->len + 2 );
+ ie = ieee80211_next_ie ( ie, NULL );
+ }
+
+ return ie;
+}
+
+/** Seconds to wait after finding a network, to possibly find better APs for it
+ *
+ * This is used when a specific SSID to scan for is specified.
+ */
+#define NET80211_PROBE_GATHER 1
+
+/** Seconds to wait after finding a network, to possibly find other networks
+ *
+ * This is used when an empty SSID is specified, to scan for all
+ * networks.
+ */
+#define NET80211_PROBE_GATHER_ALL 2
+
+/** Seconds to allow a probe to take if no network has been found */
+#define NET80211_PROBE_TIMEOUT 6
+
+/**
+ * Begin probe of 802.11 networks
+ *
+ * @v dev 802.11 device
+ * @v essid SSID to probe for, or "" to accept any (may not be NULL)
+ * @v active Whether to use active scanning
+ * @ret ctx Probe context
+ *
+ * Active scanning may only be used on channels 1-11 in the 2.4GHz
+ * band, due to iPXE's lack of a complete regulatory database. If
+ * active scanning is used, probe packets will be sent on each
+ * channel; this can allow association with hidden-SSID networks if
+ * the SSID is properly specified.
+ *
+ * A @c NULL return indicates an out-of-memory condition.
+ *
+ * The returned context must be periodically passed to
+ * net80211_probe_step() until that function returns zero.
+ */
+struct net80211_probe_ctx * net80211_probe_start ( struct net80211_device *dev,
+ const char *essid,
+ int active )
+{
+ struct net80211_probe_ctx *ctx = zalloc ( sizeof ( *ctx ) );
+
+ if ( ! ctx )
+ return NULL;
+
+ assert ( netdev_is_open ( dev->netdev ) );
+
+ ctx->dev = dev;
+ ctx->old_keep_mgmt = net80211_keep_mgmt ( dev, 1 );
+ ctx->essid = essid;
+ if ( dev->essid != ctx->essid )
+ strcpy ( dev->essid, ctx->essid );
+
+ if ( active ) {
+ struct ieee80211_probe_req *probe_req;
+ union ieee80211_ie *ie;
+
+ ctx->probe = alloc_iob ( 128 );
+ iob_reserve ( ctx->probe, IEEE80211_TYP_FRAME_HEADER_LEN );
+ probe_req = ctx->probe->data;
+
+ ie = net80211_marshal_request_info ( dev,
+ probe_req->info_element );
+
+ iob_put ( ctx->probe, ( void * ) ie - ctx->probe->data );
+ }
+
+ ctx->ticks_start = currticks();
+ ctx->ticks_beacon = 0;
+ ctx->ticks_channel = currticks();
+ ctx->hop_time = ticks_per_sec() / ( active ? 2 : 6 );
+
+ /*
+ * Channels on 2.4GHz overlap, and the most commonly used
+ * are 1, 6, and 11. We'll get a result faster if we check
+ * every 5 channels, but in order to hit all of them the
+ * number of channels must be relatively prime to 5. If it's
+ * not, tweak the hop.
+ */
+ ctx->hop_step = 5;
+ while ( dev->nr_channels % ctx->hop_step == 0 && ctx->hop_step > 1 )
+ ctx->hop_step--;
+
+ ctx->beacons = malloc ( sizeof ( *ctx->beacons ) );
+ INIT_LIST_HEAD ( ctx->beacons );
+
+ dev->channel = 0;
+ dev->op->config ( dev, NET80211_CFG_CHANNEL );
+
+ return ctx;
+}
+
+/**
+ * Continue probe of 802.11 networks
+ *
+ * @v ctx Probe context returned by net80211_probe_start()
+ * @ret rc Probe status
+ *
+ * The return code will be 0 if the probe is still going on (and this
+ * function should be called again), a positive number if the probe
+ * completed successfully, or a negative error code if the probe
+ * failed for that reason.
+ *
+ * Whether the probe succeeded or failed, you must call
+ * net80211_probe_finish_all() or net80211_probe_finish_best()
+ * (depending on whether you want information on all networks or just
+ * the best-signal one) in order to release the probe context. A
+ * failed probe may still have acquired some valid data.
+ */
+int net80211_probe_step ( struct net80211_probe_ctx *ctx )
+{
+ struct net80211_device *dev = ctx->dev;
+ u32 start_timeout = NET80211_PROBE_TIMEOUT * ticks_per_sec();
+ u32 gather_timeout = ticks_per_sec();
+ u32 now = currticks();
+ struct io_buffer *iob;
+ int signal;
+ int rc;
+ char ssid[IEEE80211_MAX_SSID_LEN + 1];
+
+ gather_timeout *= ( ctx->essid[0] ? NET80211_PROBE_GATHER :
+ NET80211_PROBE_GATHER_ALL );
+
+ /* Time out if necessary */
+ if ( now >= ctx->ticks_start + start_timeout )
+ return list_empty ( ctx->beacons ) ? -ETIMEDOUT : +1;
+
+ if ( ctx->ticks_beacon > 0 && now >= ctx->ticks_start + gather_timeout )
+ return +1;
+
+ /* Change channels if necessary */
+ if ( now >= ctx->ticks_channel + ctx->hop_time ) {
+ dev->channel = ( dev->channel + ctx->hop_step )
+ % dev->nr_channels;
+ dev->op->config ( dev, NET80211_CFG_CHANNEL );
+ udelay ( dev->hw->channel_change_time );
+
+ ctx->ticks_channel = now;
+
+ if ( ctx->probe ) {
+ struct io_buffer *siob = ctx->probe; /* to send */
+
+ /* make a copy for future use */
+ iob = alloc_iob ( siob->tail - siob->head );
+ iob_reserve ( iob, iob_headroom ( siob ) );
+ memcpy ( iob_put ( iob, iob_len ( siob ) ),
+ siob->data, iob_len ( siob ) );
+
+ ctx->probe = iob;
+ rc = net80211_tx_mgmt ( dev, IEEE80211_STYPE_PROBE_REQ,
+ eth_broadcast,
+ iob_disown ( siob ) );
+ if ( rc ) {
+ DBGC ( dev, "802.11 %p send probe failed: "
+ "%s\n", dev, strerror ( rc ) );
+ return rc;
+ }
+ }
+ }
+
+ /* Check for new management packets */
+ while ( ( iob = net80211_mgmt_dequeue ( dev, &signal ) ) != NULL ) {
+ struct ieee80211_frame *hdr;
+ struct ieee80211_beacon *beacon;
+ union ieee80211_ie *ie;
+ struct net80211_wlan *wlan;
+ u16 type;
+
+ hdr = iob->data;
+ type = hdr->fc & IEEE80211_FC_SUBTYPE;
+ beacon = ( struct ieee80211_beacon * ) hdr->data;
+
+ if ( type != IEEE80211_STYPE_BEACON &&
+ type != IEEE80211_STYPE_PROBE_RESP ) {
+ DBGC2 ( dev, "802.11 %p probe: non-beacon\n", dev );
+ goto drop;
+ }
+
+ if ( ( void * ) beacon->info_element >= iob->tail ) {
+ DBGC ( dev, "802.11 %p probe: beacon with no IEs\n",
+ dev );
+ goto drop;
+ }
+
+ ie = beacon->info_element;
+
+ if ( ! ieee80211_ie_bound ( ie, iob->tail ) )
+ ie = NULL;
+
+ while ( ie && ie->id != IEEE80211_IE_SSID )
+ ie = ieee80211_next_ie ( ie, iob->tail );
+
+ if ( ! ie ) {
+ DBGC ( dev, "802.11 %p probe: beacon with no SSID\n",
+ dev );
+ goto drop;
+ }
+
+ memcpy ( ssid, ie->ssid, ie->len );
+ ssid[ie->len] = 0;
+
+ if ( ctx->essid[0] && strcmp ( ctx->essid, ssid ) != 0 ) {
+ DBGC2 ( dev, "802.11 %p probe: beacon with wrong SSID "
+ "(%s)\n", dev, ssid );
+ goto drop;
+ }
+
+ /* See if we've got an entry for this network */
+ list_for_each_entry ( wlan, ctx->beacons, list ) {
+ if ( strcmp ( wlan->essid, ssid ) != 0 )
+ continue;
+
+ if ( signal < wlan->signal ) {
+ DBGC2 ( dev, "802.11 %p probe: beacon for %s "
+ "(%s) with weaker signal %d\n", dev,
+ ssid, eth_ntoa ( hdr->addr3 ), signal );
+ goto drop;
+ }
+
+ goto fill;
+ }
+
+ /* No entry yet - make one */
+ wlan = zalloc ( sizeof ( *wlan ) );
+ strcpy ( wlan->essid, ssid );
+ list_add_tail ( &wlan->list, ctx->beacons );
+
+ /* Whether we're using an old entry or a new one, fill
+ it with new data. */
+ fill:
+ memcpy ( wlan->bssid, hdr->addr3, ETH_ALEN );
+ wlan->signal = signal;
+ wlan->channel = dev->channels[dev->channel].channel_nr;
+
+ /* Copy this I/O buffer into a new wlan->beacon; the
+ * iob we've got probably came from the device driver
+ * and may have the full 2.4k allocation, which we
+ * don't want to keep around wasting memory.
+ */
+ free_iob ( wlan->beacon );
+ wlan->beacon = alloc_iob ( iob_len ( iob ) );
+ memcpy ( iob_put ( wlan->beacon, iob_len ( iob ) ),
+ iob->data, iob_len ( iob ) );
+
+ if ( ( rc = sec80211_detect ( wlan->beacon, &wlan->handshaking,
+ &wlan->crypto ) ) == -ENOTSUP ) {
+ struct ieee80211_beacon *beacon =
+ ( struct ieee80211_beacon * ) hdr->data;
+
+ if ( beacon->capability & IEEE80211_CAPAB_PRIVACY ) {
+ DBG ( "802.11 %p probe: secured network %s but "
+ "encryption support not compiled in\n",
+ dev, wlan->essid );
+ wlan->handshaking = NET80211_SECPROT_UNKNOWN;
+ wlan->crypto = NET80211_CRYPT_UNKNOWN;
+ } else {
+ wlan->handshaking = NET80211_SECPROT_NONE;
+ wlan->crypto = NET80211_CRYPT_NONE;
+ }
+ } else if ( rc != 0 ) {
+ DBGC ( dev, "802.11 %p probe warning: network "
+ "%s with unidentifiable security "
+ "settings: %s\n", dev, wlan->essid,
+ strerror ( rc ) );
+ }
+
+ ctx->ticks_beacon = now;
+
+ DBGC2 ( dev, "802.11 %p probe: good beacon for %s (%s)\n",
+ dev, wlan->essid, eth_ntoa ( wlan->bssid ) );
+
+ drop:
+ free_iob ( iob );
+ }
+
+ return 0;
+}
+
+
+/**
+ * Finish probe of 802.11 networks, returning best-signal network found
+ *
+ * @v ctx Probe context
+ * @ret wlan Best-signal network found, or @c NULL if none were found
+ *
+ * If net80211_probe_start() was called with a particular SSID
+ * parameter as filter, only a network with that SSID (matching
+ * case-sensitively) can be returned from this function.
+ */
+struct net80211_wlan *
+net80211_probe_finish_best ( struct net80211_probe_ctx *ctx )
+{
+ struct net80211_wlan *best = NULL, *wlan;
+
+ if ( ! ctx )
+ return NULL;
+
+ list_for_each_entry ( wlan, ctx->beacons, list ) {
+ if ( ! best || best->signal < wlan->signal )
+ best = wlan;
+ }
+
+ if ( best )
+ list_del ( &best->list );
+ else
+ DBGC ( ctx->dev, "802.11 %p probe: found nothing for '%s'\n",
+ ctx->dev, ctx->essid );
+
+ net80211_free_wlanlist ( ctx->beacons );
+
+ net80211_keep_mgmt ( ctx->dev, ctx->old_keep_mgmt );
+
+ if ( ctx->probe )
+ free_iob ( ctx->probe );
+
+ free ( ctx );
+
+ return best;
+}
+
+
+/**
+ * Finish probe of 802.11 networks, returning all networks found
+ *
+ * @v ctx Probe context
+ * @ret list List of net80211_wlan detailing networks found
+ *
+ * If net80211_probe_start() was called with a particular SSID
+ * parameter as filter, this will always return either an empty or a
+ * one-element list.
+ */
+struct list_head *net80211_probe_finish_all ( struct net80211_probe_ctx *ctx )
+{
+ struct list_head *beacons = ctx->beacons;
+
+ if ( ! ctx )
+ return NULL;
+
+ net80211_keep_mgmt ( ctx->dev, ctx->old_keep_mgmt );
+
+ if ( ctx->probe )
+ free_iob ( ctx->probe );
+
+ free ( ctx );
+
+ return beacons;
+}
+
+
+/**
+ * Free WLAN structure
+ *
+ * @v wlan WLAN structure to free
+ */
+void net80211_free_wlan ( struct net80211_wlan *wlan )
+{
+ if ( wlan ) {
+ free_iob ( wlan->beacon );
+ free ( wlan );
+ }
+}
+
+
+/**
+ * Free list of WLAN structures
+ *
+ * @v list List of WLAN structures to free
+ */
+void net80211_free_wlanlist ( struct list_head *list )
+{
+ struct net80211_wlan *wlan, *tmp;
+
+ if ( ! list )
+ return;
+
+ list_for_each_entry_safe ( wlan, tmp, list, list ) {
+ list_del ( &wlan->list );
+ net80211_free_wlan ( wlan );
+ }
+
+ free ( list );
+}
+
+
+/** Number of ticks to wait for replies to association management frames */
+#define ASSOC_TIMEOUT TICKS_PER_SEC
+
+/** Number of times to try sending a particular association management frame */
+#define ASSOC_RETRIES 2
+
+/**
+ * Step 802.11 association process
+ *
+ * @v dev 802.11 device
+ */
+static void net80211_step_associate ( struct net80211_device *dev )
+{
+ int rc = 0;
+ int status = dev->state & NET80211_STATUS_MASK;
+
+ /*
+ * We use a sort of state machine implemented using bits in
+ * the dev->state variable. At each call, we take the
+ * logically first step that has not yet succeeded; either it
+ * has not been tried yet, it's being retried, or it failed.
+ * If it failed, we return an error indication; otherwise we
+ * perform the step. If it succeeds, RX handling code will set
+ * the appropriate status bit for us.
+ *
+ * Probe works a bit differently, since we have to step it
+ * on every call instead of waiting for a packet to arrive
+ * that will set the completion bit for us.
+ */
+
+ /* If we're waiting for a reply, check for timeout condition */
+ if ( dev->state & NET80211_WAITING ) {
+ /* Sanity check */
+ if ( ! dev->associating )
+ return;
+
+ if ( currticks() - dev->ctx.assoc->last_packet > ASSOC_TIMEOUT ) {
+ /* Timed out - fail if too many retries, or retry */
+ dev->ctx.assoc->times_tried++;
+ if ( ++dev->ctx.assoc->times_tried > ASSOC_RETRIES ) {
+ rc = -ETIMEDOUT;
+ goto fail;
+ }
+ } else {
+ /* Didn't time out - let it keep going */
+ return;
+ }
+ } else {
+ if ( dev->state & NET80211_PROBED )
+ dev->ctx.assoc->times_tried = 0;
+ }
+
+ if ( ! ( dev->state & NET80211_PROBED ) ) {
+ /* state: probe */
+
+ if ( ! dev->ctx.probe ) {
+ /* start probe */
+ int active = fetch_intz_setting ( NULL,
+ &net80211_active_setting );
+ int band = dev->hw->bands;
+
+ if ( active )
+ band &= ~NET80211_BAND_BIT_5GHZ;
+
+ rc = net80211_prepare_probe ( dev, band, active );
+ if ( rc )
+ goto fail;
+
+ dev->ctx.probe = net80211_probe_start ( dev, dev->essid,
+ active );
+ if ( ! dev->ctx.probe ) {
+ dev->assoc_rc = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ rc = net80211_probe_step ( dev->ctx.probe );
+ if ( ! rc ) {
+ return; /* still going */
+ }
+
+ dev->associating = net80211_probe_finish_best ( dev->ctx.probe );
+ dev->ctx.probe = NULL;
+ if ( ! dev->associating ) {
+ if ( rc > 0 ) /* "successful" probe found nothing */
+ rc = -ETIMEDOUT;
+ goto fail;
+ }
+
+ /* If we probed using a broadcast SSID, record that
+ fact for the settings applicator before we clobber
+ it with the specific SSID we've chosen. */
+ if ( ! dev->essid[0] )
+ dev->state |= NET80211_AUTO_SSID;
+
+ DBGC ( dev, "802.11 %p found network %s (%s)\n", dev,
+ dev->associating->essid,
+ eth_ntoa ( dev->associating->bssid ) );
+
+ dev->ctx.assoc = zalloc ( sizeof ( *dev->ctx.assoc ) );
+ if ( ! dev->ctx.assoc ) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+
+ dev->state |= NET80211_PROBED;
+ dev->ctx.assoc->method = IEEE80211_AUTH_OPEN_SYSTEM;
+
+ return;
+ }
+
+ /* Record time of sending the packet we're about to send, for timeout */
+ dev->ctx.assoc->last_packet = currticks();
+
+ if ( ! ( dev->state & NET80211_AUTHENTICATED ) ) {
+ /* state: prepare and authenticate */
+
+ if ( status != IEEE80211_STATUS_SUCCESS ) {
+ /* we tried authenticating already, but failed */
+ int method = dev->ctx.assoc->method;
+
+ if ( method == IEEE80211_AUTH_OPEN_SYSTEM &&
+ ( status == IEEE80211_STATUS_AUTH_CHALL_INVALID ||
+ status == IEEE80211_STATUS_AUTH_ALGO_UNSUPP ) ) {
+ /* Maybe this network uses Shared Key? */
+ dev->ctx.assoc->method =
+ IEEE80211_AUTH_SHARED_KEY;
+ } else {
+ goto fail;
+ }
+ }
+
+ DBGC ( dev, "802.11 %p authenticating with method %d\n", dev,
+ dev->ctx.assoc->method );
+
+ rc = net80211_prepare_assoc ( dev, dev->associating );
+ if ( rc )
+ goto fail;
+
+ rc = net80211_send_auth ( dev, dev->associating,
+ dev->ctx.assoc->method );
+ if ( rc )
+ goto fail;
+
+ return;
+ }
+
+ if ( ! ( dev->state & NET80211_ASSOCIATED ) ) {
+ /* state: associate */
+
+ if ( status != IEEE80211_STATUS_SUCCESS )
+ goto fail;
+
+ DBGC ( dev, "802.11 %p associating\n", dev );
+
+ if ( dev->handshaker && dev->handshaker->start &&
+ ! dev->handshaker->started ) {
+ rc = dev->handshaker->start ( dev );
+ if ( rc < 0 )
+ goto fail;
+ dev->handshaker->started = 1;
+ }
+
+ rc = net80211_send_assoc ( dev, dev->associating );
+ if ( rc )
+ goto fail;
+
+ return;
+ }
+
+ if ( ! ( dev->state & NET80211_CRYPTO_SYNCED ) ) {
+ /* state: crypto sync */
+ DBGC ( dev, "802.11 %p security handshaking\n", dev );
+
+ if ( ! dev->handshaker || ! dev->handshaker->step ) {
+ dev->state |= NET80211_CRYPTO_SYNCED;
+ return;
+ }
+
+ rc = dev->handshaker->step ( dev );
+
+ if ( rc < 0 ) {
+ /* Only record the returned error if we're
+ still marked as associated, because an
+ asynchronous error will have already been
+ reported to net80211_deauthenticate() and
+ assoc_rc thereby set. */
+ if ( dev->state & NET80211_ASSOCIATED )
+ dev->assoc_rc = rc;
+ rc = 0;
+ goto fail;
+ }
+
+ if ( rc > 0 ) {
+ dev->assoc_rc = 0;
+ dev->state |= NET80211_CRYPTO_SYNCED;
+ }
+ return;
+ }
+
+ /* state: done! */
+ netdev_link_up ( dev->netdev );
+ dev->assoc_rc = 0;
+ dev->state &= ~NET80211_WORKING;
+
+ free ( dev->ctx.assoc );
+ dev->ctx.assoc = NULL;
+
+ net80211_free_wlan ( dev->associating );
+ dev->associating = NULL;
+
+ dev->rctl = rc80211_init ( dev );
+
+ process_del ( &dev->proc_assoc );
+
+ DBGC ( dev, "802.11 %p associated with %s (%s)\n", dev,
+ dev->essid, eth_ntoa ( dev->bssid ) );
+
+ return;
+
+ fail:
+ dev->state &= ~( NET80211_WORKING | NET80211_WAITING );
+ if ( rc )
+ dev->assoc_rc = rc;
+
+ netdev_link_err ( dev->netdev, dev->assoc_rc );
+
+ /* We never reach here from the middle of a probe, so we don't
+ need to worry about freeing dev->ctx.probe. */
+
+ if ( dev->state & NET80211_PROBED ) {
+ free ( dev->ctx.assoc );
+ dev->ctx.assoc = NULL;
+ }
+
+ net80211_free_wlan ( dev->associating );
+ dev->associating = NULL;
+
+ process_del ( &dev->proc_assoc );
+
+ DBGC ( dev, "802.11 %p association failed (state=%04x): "
+ "%s\n", dev, dev->state, strerror ( dev->assoc_rc ) );
+
+ /* Try it again: */
+ net80211_autoassociate ( dev );
+}
+
+/**
+ * Check for 802.11 SSID or key updates
+ *
+ * This acts as a settings applicator; if the user changes netX/ssid,
+ * and netX is currently open, the association task will be invoked
+ * again. If the user changes the encryption key, the current security
+ * handshaker will be asked to update its state to match; if that is
+ * impossible without reassociation, we reassociate.
+ */
+static int net80211_check_settings_update ( void )
+{
+ struct net80211_device *dev;
+ char ssid[IEEE80211_MAX_SSID_LEN + 1];
+ int key_reassoc;
+
+ list_for_each_entry ( dev, &net80211_devices, list ) {
+ if ( ! netdev_is_open ( dev->netdev ) )
+ continue;
+
+ key_reassoc = 0;
+ if ( dev->handshaker && dev->handshaker->change_key &&
+ dev->handshaker->change_key ( dev ) < 0 )
+ key_reassoc = 1;
+
+ fetch_string_setting ( netdev_settings ( dev->netdev ),
+ &net80211_ssid_setting, ssid,
+ IEEE80211_MAX_SSID_LEN + 1 );
+
+ if ( key_reassoc ||
+ ( ! ( ! ssid[0] && ( dev->state & NET80211_AUTO_SSID ) ) &&
+ strcmp ( ssid, dev->essid ) != 0 ) ) {
+ DBGC ( dev, "802.11 %p updating association: "
+ "%s -> %s\n", dev, dev->essid, ssid );
+ net80211_autoassociate ( dev );
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * Start 802.11 association process
+ *
+ * @v dev 802.11 device
+ *
+ * If the association process is running, it will be restarted.
+ */
+void net80211_autoassociate ( struct net80211_device *dev )
+{
+ if ( ! ( dev->state & NET80211_WORKING ) ) {
+ DBGC2 ( dev, "802.11 %p spawning association process\n", dev );
+ process_add ( &dev->proc_assoc );
+ } else {
+ DBGC2 ( dev, "802.11 %p restarting association\n", dev );
+ }
+
+ /* Clean up everything an earlier association process might
+ have been in the middle of using */
+ if ( dev->associating )
+ net80211_free_wlan ( dev->associating );
+
+ if ( ! ( dev->state & NET80211_PROBED ) )
+ net80211_free_wlan (
+ net80211_probe_finish_best ( dev->ctx.probe ) );
+ else
+ free ( dev->ctx.assoc );
+
+ /* Reset to a clean state */
+ fetch_string_setting ( netdev_settings ( dev->netdev ),
+ &net80211_ssid_setting, dev->essid,
+ IEEE80211_MAX_SSID_LEN + 1 );
+ dev->ctx.probe = NULL;
+ dev->associating = NULL;
+ dev->assoc_rc = 0;
+ net80211_set_state ( dev, NET80211_PROBED, NET80211_WORKING, 0 );
+}
+
+/**
+ * Pick TX rate for RTS/CTS packets based on data rate
+ *
+ * @v dev 802.11 device
+ *
+ * The RTS/CTS rate is the fastest TX rate marked as "basic" that is
+ * not faster than the data rate.
+ */
+static void net80211_set_rtscts_rate ( struct net80211_device *dev )
+{
+ u16 datarate = dev->rates[dev->rate];
+ u16 rtsrate = 0;
+ int rts_idx = -1;
+ int i;
+
+ for ( i = 0; i < dev->nr_rates; i++ ) {
+ u16 rate = dev->rates[i];
+
+ if ( ! ( dev->basic_rates & ( 1 << i ) ) || rate > datarate )
+ continue;
+
+ if ( rate > rtsrate ) {
+ rtsrate = rate;
+ rts_idx = i;
+ }
+ }
+
+ /* If this is in initialization, we might not have any basic
+ rates; just use the first data rate in that case. */
+ if ( rts_idx < 0 )
+ rts_idx = 0;
+
+ dev->rtscts_rate = rts_idx;
+}
+
+/**
+ * Set data transmission rate for 802.11 device
+ *
+ * @v dev 802.11 device
+ * @v rate Rate to set, as index into @c dev->rates array
+ */
+void net80211_set_rate_idx ( struct net80211_device *dev, int rate )
+{
+ assert ( netdev_is_open ( dev->netdev ) );
+
+ if ( rate >= 0 && rate < dev->nr_rates && rate != dev->rate ) {
+ DBGC2 ( dev, "802.11 %p changing rate from %d->%d Mbps\n",
+ dev, dev->rates[dev->rate] / 10,
+ dev->rates[rate] / 10 );
+
+ dev->rate = rate;
+ net80211_set_rtscts_rate ( dev );
+ dev->op->config ( dev, NET80211_CFG_RATE );
+ }
+}
+
+/**
+ * Configure 802.11 device to transmit on a certain channel
+ *
+ * @v dev 802.11 device
+ * @v channel Channel number (1-11 for 2.4GHz) to transmit on
+ */
+int net80211_change_channel ( struct net80211_device *dev, int channel )
+{
+ int i, oldchan = dev->channel;
+
+ assert ( netdev_is_open ( dev->netdev ) );
+
+ for ( i = 0; i < dev->nr_channels; i++ ) {
+ if ( dev->channels[i].channel_nr == channel ) {
+ dev->channel = i;
+ break;
+ }
+ }
+
+ if ( i == dev->nr_channels )
+ return -ENOENT;
+
+ if ( i != oldchan )
+ return dev->op->config ( dev, NET80211_CFG_CHANNEL );
+
+ return 0;
+}
+
+/**
+ * Prepare 802.11 device channel and rate set for scanning
+ *
+ * @v dev 802.11 device
+ * @v band RF band(s) on which to prepare for scanning
+ * @v active Whether the scanning will be active
+ * @ret rc Return status code
+ */
+int net80211_prepare_probe ( struct net80211_device *dev, int band,
+ int active )
+{
+ assert ( netdev_is_open ( dev->netdev ) );
+
+ if ( active && ( band & NET80211_BAND_BIT_5GHZ ) ) {
+ DBGC ( dev, "802.11 %p cannot perform active scanning on "
+ "5GHz band\n", dev );
+ return -EINVAL_ACTIVE_SCAN;
+ }
+
+ if ( band == 0 ) {
+ /* This can happen for a 5GHz-only card with 5GHz
+ scanning masked out by an active request. */
+ DBGC ( dev, "802.11 %p asked to prepare for scanning nothing\n",
+ dev );
+ return -EINVAL_ACTIVE_SCAN;
+ }
+
+ dev->nr_channels = 0;
+
+ if ( active )
+ net80211_add_channels ( dev, 1, 11, NET80211_REG_TXPOWER );
+ else {
+ if ( band & NET80211_BAND_BIT_2GHZ )
+ net80211_add_channels ( dev, 1, 14,
+ NET80211_REG_TXPOWER );
+ if ( band & NET80211_BAND_BIT_5GHZ )
+ net80211_add_channels ( dev, 36, 8,
+ NET80211_REG_TXPOWER );
+ }
+
+ net80211_filter_hw_channels ( dev );
+
+ /* Use channel 1 for now */
+ dev->channel = 0;
+ dev->op->config ( dev, NET80211_CFG_CHANNEL );
+
+ /* Always do active probes at lowest (presumably first) speed */
+ dev->rate = 0;
+ dev->nr_rates = 1;
+ dev->rates[0] = dev->hw->rates[dev->channels[0].band][0];
+ dev->op->config ( dev, NET80211_CFG_RATE );
+
+ return 0;
+}
+
+/**
+ * Prepare 802.11 device channel and rate set for communication
+ *
+ * @v dev 802.11 device
+ * @v wlan WLAN to prepare for communication with
+ * @ret rc Return status code
+ */
+int net80211_prepare_assoc ( struct net80211_device *dev,
+ struct net80211_wlan *wlan )
+{
+ struct ieee80211_frame *hdr = wlan->beacon->data;
+ struct ieee80211_beacon *beacon =
+ ( struct ieee80211_beacon * ) hdr->data;
+ struct net80211_handshaker *handshaker;
+ int rc;
+
+ assert ( netdev_is_open ( dev->netdev ) );
+
+ net80211_set_state ( dev, NET80211_ASSOCIATED, 0, 0 );
+ memcpy ( dev->bssid, wlan->bssid, ETH_ALEN );
+ strcpy ( dev->essid, wlan->essid );
+
+ free ( dev->rsn_ie );
+ dev->rsn_ie = NULL;
+
+ dev->last_beacon_timestamp = beacon->timestamp;
+ dev->tx_beacon_interval = 1024 * beacon->beacon_interval;
+
+ /* Barring an IE that tells us the channel outright, assume
+ the channel we heard this AP best on is the channel it's
+ communicating on. */
+ net80211_change_channel ( dev, wlan->channel );
+
+ rc = net80211_process_capab ( dev, beacon->capability );
+ if ( rc )
+ return rc;
+
+ rc = net80211_process_ie ( dev, beacon->info_element,
+ wlan->beacon->tail );
+ if ( rc )
+ return rc;
+
+ /* Associate at the lowest rate so we know it'll get through */
+ dev->rate = 0;
+ dev->op->config ( dev, NET80211_CFG_RATE );
+
+ /* Free old handshaker and crypto, if they exist */
+ if ( dev->handshaker && dev->handshaker->stop &&
+ dev->handshaker->started )
+ dev->handshaker->stop ( dev );
+ free ( dev->handshaker );
+ dev->handshaker = NULL;
+ free ( dev->crypto );
+ free ( dev->gcrypto );
+ dev->crypto = dev->gcrypto = NULL;
+
+ /* Find new security handshaker to use */
+ for_each_table_entry ( handshaker, NET80211_HANDSHAKERS ) {
+ if ( handshaker->protocol == wlan->handshaking ) {
+ dev->handshaker = zalloc ( sizeof ( *handshaker ) +
+ handshaker->priv_len );
+ if ( ! dev->handshaker )
+ return -ENOMEM;
+
+ memcpy ( dev->handshaker, handshaker,
+ sizeof ( *handshaker ) );
+ dev->handshaker->priv = ( ( void * ) dev->handshaker +
+ sizeof ( *handshaker ) );
+ break;
+ }
+ }
+
+ if ( ( wlan->handshaking != NET80211_SECPROT_NONE ) &&
+ ! dev->handshaker ) {
+ DBGC ( dev, "802.11 %p no support for handshaking scheme %d\n",
+ dev, wlan->handshaking );
+ return -( ENOTSUP | ( wlan->handshaking << 8 ) );
+ }
+
+ /* Initialize security handshaker */
+ if ( dev->handshaker ) {
+ rc = dev->handshaker->init ( dev );
+ if ( rc < 0 )
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Send 802.11 initial authentication frame
+ *
+ * @v dev 802.11 device
+ * @v wlan WLAN to authenticate with
+ * @v method Authentication method
+ * @ret rc Return status code
+ *
+ * @a method may be 0 for Open System authentication or 1 for Shared
+ * Key authentication. Open System provides no security in association
+ * whatsoever, relying on encryption for confidentiality, but Shared
+ * Key actively introduces security problems and is very rarely used.
+ */
+int net80211_send_auth ( struct net80211_device *dev,
+ struct net80211_wlan *wlan, int method )
+{
+ struct io_buffer *iob = alloc_iob ( 64 );
+ struct ieee80211_auth *auth;
+
+ net80211_set_state ( dev, 0, NET80211_WAITING, 0 );
+ iob_reserve ( iob, IEEE80211_TYP_FRAME_HEADER_LEN );
+ auth = iob_put ( iob, sizeof ( *auth ) );
+ auth->algorithm = method;
+ auth->tx_seq = 1;
+ auth->status = 0;
+
+ return net80211_tx_mgmt ( dev, IEEE80211_STYPE_AUTH, wlan->bssid, iob );
+}
+
+/**
+ * Handle receipt of 802.11 authentication frame
+ *
+ * @v dev 802.11 device
+ * @v iob I/O buffer
+ *
+ * If the authentication method being used is Shared Key, and the
+ * frame that was received included challenge text, the frame is
+ * encrypted using the cryptosystem currently in effect and sent back
+ * to the AP to complete the authentication.
+ */
+static void net80211_handle_auth ( struct net80211_device *dev,
+ struct io_buffer *iob )
+{
+ struct ieee80211_frame *hdr = iob->data;
+ struct ieee80211_auth *auth =
+ ( struct ieee80211_auth * ) hdr->data;
+
+ if ( auth->tx_seq & 1 ) {
+ DBGC ( dev, "802.11 %p authentication received improperly "
+ "directed frame (seq. %d)\n", dev, auth->tx_seq );
+ net80211_set_state ( dev, NET80211_WAITING, 0,
+ IEEE80211_STATUS_FAILURE );
+ return;
+ }
+
+ if ( auth->status != IEEE80211_STATUS_SUCCESS ) {
+ DBGC ( dev, "802.11 %p authentication failed: status %d\n",
+ dev, auth->status );
+ net80211_set_state ( dev, NET80211_WAITING, 0,
+ auth->status );
+ return;
+ }
+
+ if ( auth->algorithm == IEEE80211_AUTH_SHARED_KEY && ! dev->crypto ) {
+ DBGC ( dev, "802.11 %p can't perform shared-key authentication "
+ "without a cryptosystem\n", dev );
+ net80211_set_state ( dev, NET80211_WAITING, 0,
+ IEEE80211_STATUS_FAILURE );
+ return;
+ }
+
+ if ( auth->algorithm == IEEE80211_AUTH_SHARED_KEY &&
+ auth->tx_seq == 2 ) {
+ /* Since the iob we got is going to be freed as soon
+ as we return, we can do some in-place
+ modification. */
+ auth->tx_seq = 3;
+ auth->status = 0;
+
+ memcpy ( hdr->addr2, hdr->addr1, ETH_ALEN );
+ memcpy ( hdr->addr1, hdr->addr3, ETH_ALEN );
+
+ netdev_tx ( dev->netdev,
+ dev->crypto->encrypt ( dev->crypto, iob ) );
+ return;
+ }
+
+ net80211_set_state ( dev, NET80211_WAITING, NET80211_AUTHENTICATED,
+ IEEE80211_STATUS_SUCCESS );
+
+ return;
+}
+
+/**
+ * Send 802.11 association frame
+ *
+ * @v dev 802.11 device
+ * @v wlan WLAN to associate with
+ * @ret rc Return status code
+ */
+int net80211_send_assoc ( struct net80211_device *dev,
+ struct net80211_wlan *wlan )
+{
+ struct io_buffer *iob = alloc_iob ( 128 );
+ struct ieee80211_assoc_req *assoc;
+ union ieee80211_ie *ie;
+
+ net80211_set_state ( dev, 0, NET80211_WAITING, 0 );
+
+ iob_reserve ( iob, IEEE80211_TYP_FRAME_HEADER_LEN );
+ assoc = iob->data;
+
+ assoc->capability = IEEE80211_CAPAB_MANAGED;
+ if ( ! ( dev->hw->flags & NET80211_HW_NO_SHORT_PREAMBLE ) )
+ assoc->capability |= IEEE80211_CAPAB_SHORT_PMBL;
+ if ( ! ( dev->hw->flags & NET80211_HW_NO_SHORT_SLOT ) )
+ assoc->capability |= IEEE80211_CAPAB_SHORT_SLOT;
+ if ( wlan->crypto )
+ assoc->capability |= IEEE80211_CAPAB_PRIVACY;
+
+ assoc->listen_interval = 1;
+
+ ie = net80211_marshal_request_info ( dev, assoc->info_element );
+
+ DBGP ( "802.11 %p about to send association request:\n", dev );
+ DBGP_HD ( iob->data, ( void * ) ie - iob->data );
+
+ iob_put ( iob, ( void * ) ie - iob->data );
+
+ return net80211_tx_mgmt ( dev, IEEE80211_STYPE_ASSOC_REQ,
+ wlan->bssid, iob );
+}
+
+/**
+ * Handle receipt of 802.11 association reply frame
+ *
+ * @v dev 802.11 device
+ * @v iob I/O buffer
+ */
+static void net80211_handle_assoc_reply ( struct net80211_device *dev,
+ struct io_buffer *iob )
+{
+ struct ieee80211_frame *hdr = iob->data;
+ struct ieee80211_assoc_resp *assoc =
+ ( struct ieee80211_assoc_resp * ) hdr->data;
+
+ net80211_process_capab ( dev, assoc->capability );
+ net80211_process_ie ( dev, assoc->info_element, iob->tail );
+
+ if ( assoc->status != IEEE80211_STATUS_SUCCESS ) {
+ DBGC ( dev, "802.11 %p association failed: status %d\n",
+ dev, assoc->status );
+ net80211_set_state ( dev, NET80211_WAITING, 0,
+ assoc->status );
+ return;
+ }
+
+ /* ESSID was filled before the association request was sent */
+ memcpy ( dev->bssid, hdr->addr3, ETH_ALEN );
+ dev->aid = assoc->aid;
+
+ net80211_set_state ( dev, NET80211_WAITING, NET80211_ASSOCIATED,
+ IEEE80211_STATUS_SUCCESS );
+}
+
+
+/**
+ * Send 802.11 disassociation frame
+ *
+ * @v dev 802.11 device
+ * @v reason Reason for disassociation
+ * @v deauth If TRUE, send deauthentication instead of disassociation
+ * @ret rc Return status code
+ */
+static int net80211_send_disassoc ( struct net80211_device *dev, int reason,
+ int deauth )
+{
+ struct io_buffer *iob = alloc_iob ( 64 );
+ struct ieee80211_disassoc *disassoc;
+
+ if ( ! ( dev->state & NET80211_ASSOCIATED ) )
+ return -EINVAL;
+
+ net80211_set_state ( dev, NET80211_ASSOCIATED, 0, 0 );
+ iob_reserve ( iob, IEEE80211_TYP_FRAME_HEADER_LEN );
+ disassoc = iob_put ( iob, sizeof ( *disassoc ) );
+ disassoc->reason = reason;
+
+ return net80211_tx_mgmt ( dev, deauth ? IEEE80211_STYPE_DEAUTH :
+ IEEE80211_STYPE_DISASSOC, dev->bssid, iob );
+}
+
+
+/**
+ * Deauthenticate from current network and try again
+ *
+ * @v dev 802.11 device
+ * @v rc Return status code indicating reason
+ *
+ * The deauthentication will be sent using an 802.11 "unspecified
+ * reason", as is common, but @a rc will be set as a link-up
+ * error to aid the user in debugging.
+ */
+void net80211_deauthenticate ( struct net80211_device *dev, int rc )
+{
+ net80211_send_disassoc ( dev, IEEE80211_REASON_UNSPECIFIED, 1 );
+ dev->assoc_rc = rc;
+ netdev_link_err ( dev->netdev, rc );
+
+ net80211_autoassociate ( dev );
+}
+
+
+/** Smoothing factor (1-7) for link quality calculation */
+#define LQ_SMOOTH 7
+
+/**
+ * Update link quality information based on received beacon
+ *
+ * @v dev 802.11 device
+ * @v iob I/O buffer containing beacon
+ * @ret rc Return status code
+ */
+static void net80211_update_link_quality ( struct net80211_device *dev,
+ struct io_buffer *iob )
+{
+ struct ieee80211_frame *hdr = iob->data;
+ struct ieee80211_beacon *beacon;
+ u32 dt, rxi;
+
+ if ( ! ( dev->state & NET80211_ASSOCIATED ) )
+ return;
+
+ beacon = ( struct ieee80211_beacon * ) hdr->data;
+ dt = ( u32 ) ( beacon->timestamp - dev->last_beacon_timestamp );
+ rxi = dev->rx_beacon_interval;
+
+ rxi = ( LQ_SMOOTH * rxi ) + ( ( 8 - LQ_SMOOTH ) * dt );
+ dev->rx_beacon_interval = rxi >> 3;
+
+ dev->last_beacon_timestamp = beacon->timestamp;
+}
+
+
+/**
+ * Handle receipt of 802.11 management frame
+ *
+ * @v dev 802.11 device
+ * @v iob I/O buffer
+ * @v signal Signal strength of received frame
+ */
+static void net80211_handle_mgmt ( struct net80211_device *dev,
+ struct io_buffer *iob, int signal )
+{
+ struct ieee80211_frame *hdr = iob->data;
+ struct ieee80211_disassoc *disassoc;
+ u16 stype = hdr->fc & IEEE80211_FC_SUBTYPE;
+ int keep = 0;
+ int is_deauth = ( stype == IEEE80211_STYPE_DEAUTH );
+
+ if ( ( hdr->fc & IEEE80211_FC_TYPE ) != IEEE80211_TYPE_MGMT ) {
+ free_iob ( iob );
+ return; /* only handle management frames */
+ }
+
+ switch ( stype ) {
+ /* We reconnect on deauthentication and disassociation. */
+ case IEEE80211_STYPE_DEAUTH:
+ case IEEE80211_STYPE_DISASSOC:
+ disassoc = ( struct ieee80211_disassoc * ) hdr->data;
+ net80211_set_state ( dev, is_deauth ? NET80211_AUTHENTICATED :
+ NET80211_ASSOCIATED, 0,
+ NET80211_IS_REASON | disassoc->reason );
+ DBGC ( dev, "802.11 %p %s: reason %d\n",
+ dev, is_deauth ? "deauthenticated" : "disassociated",
+ disassoc->reason );
+
+ /* Try to reassociate, in case it's transient. */
+ net80211_autoassociate ( dev );
+
+ break;
+
+ /* We handle authentication and association. */
+ case IEEE80211_STYPE_AUTH:
+ if ( ! ( dev->state & NET80211_AUTHENTICATED ) )
+ net80211_handle_auth ( dev, iob );
+ break;
+
+ case IEEE80211_STYPE_ASSOC_RESP:
+ case IEEE80211_STYPE_REASSOC_RESP:
+ if ( ! ( dev->state & NET80211_ASSOCIATED ) )
+ net80211_handle_assoc_reply ( dev, iob );
+ break;
+
+ /* We pass probes and beacons onto network scanning
+ code. Pass actions for future extensibility. */
+ case IEEE80211_STYPE_BEACON:
+ net80211_update_link_quality ( dev, iob );
+ /* fall through */
+ case IEEE80211_STYPE_PROBE_RESP:
+ case IEEE80211_STYPE_ACTION:
+ if ( dev->keep_mgmt ) {
+ struct net80211_rx_info *rxinf;
+ rxinf = zalloc ( sizeof ( *rxinf ) );
+ if ( ! rxinf ) {
+ DBGC ( dev, "802.11 %p out of memory\n", dev );
+ break;
+ }
+ rxinf->signal = signal;
+ list_add_tail ( &iob->list, &dev->mgmt_queue );
+ list_add_tail ( &rxinf->list, &dev->mgmt_info_queue );
+ keep = 1;
+ }
+ break;
+
+ case IEEE80211_STYPE_PROBE_REQ:
+ /* Some nodes send these broadcast. Ignore them. */
+ break;
+
+ case IEEE80211_STYPE_ASSOC_REQ:
+ case IEEE80211_STYPE_REASSOC_REQ:
+ /* We should never receive these, only send them. */
+ DBGC ( dev, "802.11 %p received strange management request "
+ "(%04x)\n", dev, stype );
+ break;
+
+ default:
+ DBGC ( dev, "802.11 %p received unimplemented management "
+ "packet (%04x)\n", dev, stype );
+ break;
+ }
+
+ if ( ! keep )
+ free_iob ( iob );
+}
+
+/* ---------- Packet handling functions ---------- */
+
+/**
+ * Free buffers used by 802.11 fragment cache entry
+ *
+ * @v dev 802.11 device
+ * @v fcid Fragment cache entry index
+ *
+ * After this function, the referenced entry will be marked unused.
+ */
+static void net80211_free_frags ( struct net80211_device *dev, int fcid )
+{
+ int j;
+ struct net80211_frag_cache *frag = &dev->frags[fcid];
+
+ for ( j = 0; j < 16; j++ ) {
+ if ( frag->iob[j] ) {
+ free_iob ( frag->iob[j] );
+ frag->iob[j] = NULL;
+ }
+ }
+
+ frag->seqnr = 0;
+ frag->start_ticks = 0;
+ frag->in_use = 0;
+}
+
+/**
+ * Accumulate 802.11 fragments into one I/O buffer
+ *
+ * @v dev 802.11 device
+ * @v fcid Fragment cache entry index
+ * @v nfrags Number of fragments received
+ * @v size Sum of sizes of all fragments, including headers
+ * @ret iob I/O buffer containing reassembled packet
+ *
+ * This function does not free the fragment buffers.
+ */
+static struct io_buffer *net80211_accum_frags ( struct net80211_device *dev,
+ int fcid, int nfrags, int size )
+{
+ struct net80211_frag_cache *frag = &dev->frags[fcid];
+ int hdrsize = IEEE80211_TYP_FRAME_HEADER_LEN;
+ int nsize = size - hdrsize * ( nfrags - 1 );
+ int i;
+
+ struct io_buffer *niob = alloc_iob ( nsize );
+ struct ieee80211_frame *hdr;
+
+ /* Add the header from the first one... */
+ memcpy ( iob_put ( niob, hdrsize ), frag->iob[0]->data, hdrsize );
+
+ /* ... and all the data from all of them. */
+ for ( i = 0; i < nfrags; i++ ) {
+ int len = iob_len ( frag->iob[i] ) - hdrsize;
+ memcpy ( iob_put ( niob, len ),
+ frag->iob[i]->data + hdrsize, len );
+ }
+
+ /* Turn off the fragment bit. */
+ hdr = niob->data;
+ hdr->fc &= ~IEEE80211_FC_MORE_FRAG;
+
+ return niob;
+}
+
+/**
+ * Handle receipt of 802.11 fragment
+ *
+ * @v dev 802.11 device
+ * @v iob I/O buffer containing fragment
+ * @v signal Signal strength with which fragment was received
+ */
+static void net80211_rx_frag ( struct net80211_device *dev,
+ struct io_buffer *iob, int signal )
+{
+ struct ieee80211_frame *hdr = iob->data;
+ int fragnr = IEEE80211_FRAG ( hdr->seq );
+
+ if ( fragnr == 0 && ( hdr->fc & IEEE80211_FC_MORE_FRAG ) ) {
+ /* start a frag cache entry */
+ int i, newest = -1;
+ u32 curr_ticks = currticks(), newest_ticks = 0;
+ u32 timeout = ticks_per_sec() * NET80211_FRAG_TIMEOUT;
+
+ for ( i = 0; i < NET80211_NR_CONCURRENT_FRAGS; i++ ) {
+ if ( dev->frags[i].in_use == 0 )
+ break;
+
+ if ( dev->frags[i].start_ticks + timeout >=
+ curr_ticks ) {
+ net80211_free_frags ( dev, i );
+ break;
+ }
+
+ if ( dev->frags[i].start_ticks > newest_ticks ) {
+ newest = i;
+ newest_ticks = dev->frags[i].start_ticks;
+ }
+ }
+
+ /* If we're being sent more concurrent fragmented
+ packets than we can handle, drop the newest so the
+ older ones have time to complete. */
+ if ( i == NET80211_NR_CONCURRENT_FRAGS ) {
+ i = newest;
+ net80211_free_frags ( dev, i );
+ }
+
+ dev->frags[i].in_use = 1;
+ dev->frags[i].seqnr = IEEE80211_SEQNR ( hdr->seq );
+ dev->frags[i].start_ticks = currticks();
+ dev->frags[i].iob[0] = iob;
+ return;
+ } else {
+ int i;
+ for ( i = 0; i < NET80211_NR_CONCURRENT_FRAGS; i++ ) {
+ if ( dev->frags[i].in_use && dev->frags[i].seqnr ==
+ IEEE80211_SEQNR ( hdr->seq ) )
+ break;
+ }
+ if ( i == NET80211_NR_CONCURRENT_FRAGS ) {
+ /* Drop non-first not-in-cache fragments */
+ DBGC ( dev, "802.11 %p dropped fragment fc=%04x "
+ "seq=%04x\n", dev, hdr->fc, hdr->seq );
+ free_iob ( iob );
+ return;
+ }
+
+ dev->frags[i].iob[fragnr] = iob;
+
+ if ( ! ( hdr->fc & IEEE80211_FC_MORE_FRAG ) ) {
+ int j, size = 0;
+ for ( j = 0; j < fragnr; j++ ) {
+ size += iob_len ( dev->frags[i].iob[j] );
+ if ( dev->frags[i].iob[j] == NULL )
+ break;
+ }
+ if ( j == fragnr ) {
+ /* We've got everything */
+ struct io_buffer *niob =
+ net80211_accum_frags ( dev, i, fragnr,
+ size );
+ net80211_free_frags ( dev, i );
+ net80211_rx ( dev, niob, signal, 0 );
+ } else {
+ DBGC ( dev, "802.11 %p dropping fragmented "
+ "packet due to out-of-order arrival, "
+ "fc=%04x seq=%04x\n", dev, hdr->fc,
+ hdr->seq );
+ net80211_free_frags ( dev, i );
+ }
+ }
+ }
+}
+
+/**
+ * Handle receipt of 802.11 frame
+ *
+ * @v dev 802.11 device
+ * @v iob I/O buffer
+ * @v signal Received signal strength
+ * @v rate Bitrate at which frame was received, in 100 kbps units
+ *
+ * If the rate or signal is unknown, 0 should be passed.
+ */
+void net80211_rx ( struct net80211_device *dev, struct io_buffer *iob,
+ int signal, u16 rate )
+{
+ struct ieee80211_frame *hdr = iob->data;
+ u16 type = hdr->fc & IEEE80211_FC_TYPE;
+ if ( ( hdr->fc & IEEE80211_FC_VERSION ) != IEEE80211_THIS_VERSION )
+ goto drop; /* drop invalid-version packets */
+
+ if ( type == IEEE80211_TYPE_CTRL )
+ goto drop; /* we don't handle control packets,
+ the hardware does */
+
+ if ( dev->last_rx_seq == hdr->seq )
+ goto drop; /* avoid duplicate packet */
+ dev->last_rx_seq = hdr->seq;
+
+ if ( dev->hw->flags & NET80211_HW_RX_HAS_FCS ) {
+ /* discard the FCS */
+ iob_unput ( iob, 4 );
+ }
+
+ /* Only decrypt packets from our BSSID, to avoid spurious errors */
+ if ( ( hdr->fc & IEEE80211_FC_PROTECTED ) &&
+ ! memcmp ( hdr->addr2, dev->bssid, ETH_ALEN ) ) {
+ /* Decrypt packet; record and drop if it fails */
+ struct io_buffer *niob;
+ struct net80211_crypto *crypto = dev->crypto;
+
+ if ( ! dev->crypto ) {
+ DBGC ( dev, "802.11 %p cannot decrypt packet "
+ "without a cryptosystem\n", dev );
+ goto drop_crypt;
+ }
+
+ if ( ( hdr->addr1[0] & 1 ) && dev->gcrypto ) {
+ /* Use group decryption if needed */
+ crypto = dev->gcrypto;
+ }
+
+ niob = crypto->decrypt ( crypto, iob );
+ if ( ! niob ) {
+ DBGC ( dev, "802.11 %p decryption error\n", dev );
+ goto drop_crypt;
+ }
+ free_iob ( iob );
+ iob = niob;
+ hdr = iob->data;
+ }
+
+ dev->last_signal = signal;
+
+ /* Fragments go into the frag cache or get dropped. */
+ if ( IEEE80211_FRAG ( hdr->seq ) != 0
+ || ( hdr->fc & IEEE80211_FC_MORE_FRAG ) ) {
+ net80211_rx_frag ( dev, iob, signal );
+ return;
+ }
+
+ /* Management frames get handled, enqueued, or dropped. */
+ if ( type == IEEE80211_TYPE_MGMT ) {
+ net80211_handle_mgmt ( dev, iob, signal );
+ return;
+ }
+
+ /* Data frames get dropped or sent to the net_device. */
+ if ( ( hdr->fc & IEEE80211_FC_SUBTYPE ) != IEEE80211_STYPE_DATA )
+ goto drop; /* drop QoS, CFP, or null data packets */
+
+ /* Update rate-control algorithm */
+ if ( dev->rctl )
+ rc80211_update_rx ( dev, hdr->fc & IEEE80211_FC_RETRY, rate );
+
+ /* Pass packet onward */
+ if ( dev->state & NET80211_ASSOCIATED ) {
+ netdev_rx ( dev->netdev, iob );
+ return;
+ }
+
+ /* No association? Drop it. */
+ goto drop;
+
+ drop_crypt:
+ netdev_rx_err ( dev->netdev, NULL, EINVAL_CRYPTO_REQUEST );
+ drop:
+ DBGC2 ( dev, "802.11 %p dropped packet fc=%04x seq=%04x\n", dev,
+ hdr->fc, hdr->seq );
+ free_iob ( iob );
+ return;
+}
+
+/** Indicate an error in receiving a packet
+ *
+ * @v dev 802.11 device
+ * @v iob I/O buffer with received packet, or NULL
+ * @v rc Error code
+ *
+ * This logs the error with the wrapping net_device, and frees iob if
+ * it is passed.
+ */
+void net80211_rx_err ( struct net80211_device *dev,
+ struct io_buffer *iob, int rc )
+{
+ netdev_rx_err ( dev->netdev, iob, rc );
+}
+
+/** Indicate the completed transmission of a packet
+ *
+ * @v dev 802.11 device
+ * @v iob I/O buffer of transmitted packet
+ * @v retries Number of times this packet was retransmitted
+ * @v rc Error code, or 0 for success
+ *
+ * This logs an error with the wrapping net_device if one occurred,
+ * and removes and frees the I/O buffer from its TX queue. The
+ * provided retry information is used to tune our transmission rate.
+ *
+ * If the packet did not need to be retransmitted because it was
+ * properly ACKed the first time, @a retries should be 0.
+ */
+void net80211_tx_complete ( struct net80211_device *dev,
+ struct io_buffer *iob, int retries, int rc )
+{
+ /* Update rate-control algorithm */
+ if ( dev->rctl )
+ rc80211_update_tx ( dev, retries, rc );
+
+ /* Pass completion onward */
+ netdev_tx_complete_err ( dev->netdev, iob, rc );
+}
+
+/** Common 802.11 errors */
+struct errortab common_wireless_errors[] __errortab = {
+ __einfo_errortab ( EINFO_EINVAL_CRYPTO_REQUEST ),
+ __einfo_errortab ( EINFO_ECONNRESET_UNSPECIFIED ),
+ __einfo_errortab ( EINFO_ECONNRESET_INACTIVITY ),
+ __einfo_errortab ( EINFO_ECONNRESET_4WAY_TIMEOUT ),
+ __einfo_errortab ( EINFO_ECONNRESET_8021X_FAILURE ),
+ __einfo_errortab ( EINFO_ECONNREFUSED_FAILURE ),
+ __einfo_errortab ( EINFO_ECONNREFUSED_ASSOC_DENIED ),
+ __einfo_errortab ( EINFO_ECONNREFUSED_AUTH_ALGO_UNSUPP ),
+};
diff --git a/qemu/roms/ipxe/src/net/80211/rc80211.c b/qemu/roms/ipxe/src/net/80211/rc80211.c
new file mode 100644
index 000000000..eea3bc908
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/80211/rc80211.c
@@ -0,0 +1,372 @@
+/*
+ * Simple 802.11 rate-control algorithm for iPXE.
+ *
+ * Copyright (c) 2009 Joshua Oreman <oremanj@rwcr.net>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdlib.h>
+#include <ipxe/net80211.h>
+
+/**
+ * @file
+ *
+ * Simple 802.11 rate-control algorithm
+ */
+
+/** @page rc80211 Rate control philosophy
+ *
+ * We want to maximize our transmission speed, to the extent that we
+ * can do that without dropping undue numbers of packets. We also
+ * don't want to take up very much code space, so our algorithm has to
+ * be pretty simple
+ *
+ * When we receive a packet, we know what rate it was transmitted at,
+ * and whether it had to be retransmitted to get to us.
+ *
+ * When we send a packet, we hear back how many times it had to be
+ * retried to get through, and whether it got through at all.
+ *
+ * Indications of TX success are more reliable than RX success, but RX
+ * information helps us know where to start.
+ *
+ * To handle all of this, we keep for each rate and each direction (TX
+ * and RX separately) some state information for the most recent
+ * packets on that rate and the number of packets for which we have
+ * information. The state is a 32-bit unsigned integer in which two
+ * bits represent a packet: 11 if it went through well, 10 if it went
+ * through with one retry, 01 if it went through with more than one
+ * retry, or 00 if it didn't go through at all. We define the
+ * "goodness" for a particular (rate, direction) combination as the
+ * sum of all the 2-bit fields, times 33, divided by the number of
+ * 2-bit fields containing valid information (16 except when we're
+ * starting out). The number produced is between 0 and 99; we use -1
+ * for rates with less than 4 RX packets or 1 TX, as an indicator that
+ * we do not have enough information to rely on them.
+ *
+ * In deciding which rates are best, we find the weighted average of
+ * TX and RX goodness, where the weighting is by number of packets
+ * with data and TX packets are worth 4 times as much as RX packets.
+ * The weighted average is called "net goodness" and is also a number
+ * between 0 and 99. If 3 consecutive packets fail transmission
+ * outright, we automatically ratchet down the rate; otherwise, we
+ * switch to the best rate whenever the current rate's goodness falls
+ * below some threshold, and try increasing our rate when the goodness
+ * is very high.
+ *
+ * This system is optimized for iPXE's style of usage. Because normal
+ * operation always involves receiving something, we'll make our way
+ * to the best rate pretty quickly. We tend to follow the lead of the
+ * sending AP in choosing rates, but we won't use rates for long that
+ * don't work well for us in transmission. We assume iPXE won't be
+ * running for long enough that rate patterns will change much, so we
+ * don't have to keep time counters or the like. And if this doesn't
+ * work well in practice there are many ways it could be tweaked.
+ *
+ * To avoid staying at 1Mbps for a long time, we don't track any
+ * transmitted packets until we've set our rate based on received
+ * packets.
+ */
+
+/** Two-bit packet status indicator for a packet with no retries */
+#define RC_PKT_OK 0x3
+
+/** Two-bit packet status indicator for a packet with one retry */
+#define RC_PKT_RETRIED_ONCE 0x2
+
+/** Two-bit packet status indicator for a TX packet with multiple retries
+ *
+ * It is not possible to tell whether an RX packet had one or multiple
+ * retries; we rely instead on the fact that failed RX packets won't
+ * get to us at all, so if we receive a lot of RX packets on a certain
+ * rate it must be pretty good.
+ */
+#define RC_PKT_RETRIED_MULTI 0x1
+
+/** Two-bit packet status indicator for a TX packet that was never ACKed
+ *
+ * It is not possible to tell whether an RX packet was setn if it
+ * didn't get through to us, but if we don't see one we won't increase
+ * the goodness for its rate. This asymmetry is part of why TX packets
+ * are weighted much more heavily than RX.
+ */
+#define RC_PKT_FAILED 0x0
+
+/** Number of times to weight TX packets more heavily than RX packets */
+#define RC_TX_FACTOR 4
+
+/** Number of consecutive failed TX packets that cause an automatic rate drop */
+#define RC_TX_EMERG_FAIL 3
+
+/** Minimum net goodness below which we will search for a better rate */
+#define RC_GOODNESS_MIN 85
+
+/** Maximum net goodness above which we will try to increase our rate */
+#define RC_GOODNESS_MAX 95
+
+/** Minimum (num RX + @c RC_TX_FACTOR * num TX) to use a certain rate */
+#define RC_UNCERTAINTY_THRESH 4
+
+/** TX direction */
+#define TX 0
+
+/** RX direction */
+#define RX 1
+
+/** A rate control context */
+struct rc80211_ctx
+{
+ /** Goodness state for each rate, TX and RX */
+ u32 goodness[2][NET80211_MAX_RATES];
+
+ /** Number of packets recorded for each rate */
+ u8 count[2][NET80211_MAX_RATES];
+
+ /** Indication of whether we've set the device rate yet */
+ int started;
+
+ /** Counter of all packets sent and received */
+ int packets;
+};
+
+/**
+ * Initialize rate-control algorithm
+ *
+ * @v dev 802.11 device
+ * @ret ctx Rate-control context, to be stored in @c dev->rctl
+ */
+struct rc80211_ctx * rc80211_init ( struct net80211_device *dev __unused )
+{
+ struct rc80211_ctx *ret = zalloc ( sizeof ( *ret ) );
+ return ret;
+}
+
+/**
+ * Calculate net goodness for a certain rate
+ *
+ * @v ctx Rate-control context
+ * @v rate_idx Index of rate to calculate net goodness for
+ */
+static int rc80211_calc_net_goodness ( struct rc80211_ctx *ctx,
+ int rate_idx )
+{
+ int sum[2], num[2], dir, pkt;
+
+ for ( dir = 0; dir < 2; dir++ ) {
+ u32 good = ctx->goodness[dir][rate_idx];
+
+ num[dir] = ctx->count[dir][rate_idx];
+ sum[dir] = 0;
+
+ for ( pkt = 0; pkt < num[dir]; pkt++ )
+ sum[dir] += ( good >> ( 2 * pkt ) ) & 0x3;
+ }
+
+ if ( ( num[TX] * RC_TX_FACTOR + num[RX] ) < RC_UNCERTAINTY_THRESH )
+ return -1;
+
+ return ( 33 * ( sum[TX] * RC_TX_FACTOR + sum[RX] ) /
+ ( num[TX] * RC_TX_FACTOR + num[RX] ) );
+}
+
+/**
+ * Determine the best rate to switch to and return it
+ *
+ * @v dev 802.11 device
+ * @ret rate_idx Index of the best rate to switch to
+ */
+static int rc80211_pick_best ( struct net80211_device *dev )
+{
+ struct rc80211_ctx *ctx = dev->rctl;
+ int best_net_good = 0, best_rate = -1, i;
+
+ for ( i = 0; i < dev->nr_rates; i++ ) {
+ int net_good = rc80211_calc_net_goodness ( ctx, i );
+
+ if ( net_good > best_net_good ||
+ ( best_net_good > RC_GOODNESS_MIN &&
+ net_good > RC_GOODNESS_MIN ) ) {
+ best_net_good = net_good;
+ best_rate = i;
+ }
+ }
+
+ if ( best_rate >= 0 ) {
+ int old_good = rc80211_calc_net_goodness ( ctx, dev->rate );
+ if ( old_good != best_net_good )
+ DBGC ( ctx, "802.11 RC %p switching from goodness "
+ "%d to %d\n", ctx, old_good, best_net_good );
+
+ ctx->started = 1;
+ return best_rate;
+ }
+
+ return dev->rate;
+}
+
+/**
+ * Set 802.11 device rate
+ *
+ * @v dev 802.11 device
+ * @v rate_idx Index of rate to switch to
+ *
+ * This is a thin wrapper around net80211_set_rate_idx to insert a
+ * debugging message where appropriate.
+ */
+static inline void rc80211_set_rate ( struct net80211_device *dev,
+ int rate_idx )
+{
+ DBGC ( dev->rctl, "802.11 RC %p changing rate %d->%d Mbps\n", dev->rctl,
+ dev->rates[dev->rate] / 10, dev->rates[rate_idx] / 10 );
+
+ net80211_set_rate_idx ( dev, rate_idx );
+}
+
+/**
+ * Check rate-control state and change rate if necessary
+ *
+ * @v dev 802.11 device
+ */
+static void rc80211_maybe_set_new ( struct net80211_device *dev )
+{
+ struct rc80211_ctx *ctx = dev->rctl;
+ int net_good;
+
+ net_good = rc80211_calc_net_goodness ( ctx, dev->rate );
+
+ if ( ! ctx->started ) {
+ rc80211_set_rate ( dev, rc80211_pick_best ( dev ) );
+ return;
+ }
+
+ if ( net_good < 0 ) /* insufficient data */
+ return;
+
+ if ( net_good > RC_GOODNESS_MAX && dev->rate + 1 < dev->nr_rates ) {
+ int higher = rc80211_calc_net_goodness ( ctx, dev->rate + 1 );
+ if ( higher > net_good || higher < 0 )
+ rc80211_set_rate ( dev, dev->rate + 1 );
+ else
+ rc80211_set_rate ( dev, rc80211_pick_best ( dev ) );
+ }
+
+ if ( net_good < RC_GOODNESS_MIN ) {
+ rc80211_set_rate ( dev, rc80211_pick_best ( dev ) );
+ }
+}
+
+/**
+ * Update rate-control state
+ *
+ * @v dev 802.11 device
+ * @v direction One of the direction constants TX or RX
+ * @v rate_idx Index of rate at which packet was sent or received
+ * @v retries Number of times packet was retried before success
+ * @v failed If nonzero, the packet failed to get through
+ */
+static void rc80211_update ( struct net80211_device *dev, int direction,
+ int rate_idx, int retries, int failed )
+{
+ struct rc80211_ctx *ctx = dev->rctl;
+ u32 goodness = ctx->goodness[direction][rate_idx];
+
+ if ( ctx->count[direction][rate_idx] < 16 )
+ ctx->count[direction][rate_idx]++;
+
+ goodness <<= 2;
+ if ( failed )
+ goodness |= RC_PKT_FAILED;
+ else if ( retries > 1 )
+ goodness |= RC_PKT_RETRIED_MULTI;
+ else if ( retries )
+ goodness |= RC_PKT_RETRIED_ONCE;
+ else
+ goodness |= RC_PKT_OK;
+
+ ctx->goodness[direction][rate_idx] = goodness;
+
+ ctx->packets++;
+
+ rc80211_maybe_set_new ( dev );
+}
+
+/**
+ * Update rate-control state for transmitted packet
+ *
+ * @v dev 802.11 device
+ * @v retries Number of times packet was transmitted before success
+ * @v rc Return status code for transmission
+ */
+void rc80211_update_tx ( struct net80211_device *dev, int retries, int rc )
+{
+ struct rc80211_ctx *ctx = dev->rctl;
+
+ if ( ! ctx->started )
+ return;
+
+ rc80211_update ( dev, TX, dev->rate, retries, rc );
+
+ /* Check if the last RC_TX_EMERG_FAIL packets have all failed */
+ if ( ! ( ctx->goodness[TX][dev->rate] &
+ ( ( 1 << ( 2 * RC_TX_EMERG_FAIL ) ) - 1 ) ) ) {
+ if ( dev->rate == 0 )
+ DBGC ( dev->rctl, "802.11 RC %p saw %d consecutive "
+ "failed TX, but cannot lower rate any further\n",
+ dev->rctl, RC_TX_EMERG_FAIL );
+ else {
+ DBGC ( dev->rctl, "802.11 RC %p lowering rate (%d->%d "
+ "Mbps) due to %d consecutive TX failures\n",
+ dev->rctl, dev->rates[dev->rate] / 10,
+ dev->rates[dev->rate - 1] / 10,
+ RC_TX_EMERG_FAIL );
+
+ rc80211_set_rate ( dev, dev->rate - 1 );
+ }
+ }
+}
+
+/**
+ * Update rate-control state for received packet
+ *
+ * @v dev 802.11 device
+ * @v retry Whether the received packet had been retransmitted
+ * @v rate Rate at which packet was received, in 100 kbps units
+ */
+void rc80211_update_rx ( struct net80211_device *dev, int retry, u16 rate )
+{
+ int ridx;
+
+ for ( ridx = 0; ridx < dev->nr_rates && dev->rates[ridx] != rate;
+ ridx++ )
+ ;
+ if ( ridx >= dev->nr_rates )
+ return; /* couldn't find the rate */
+
+ rc80211_update ( dev, RX, ridx, retry, 0 );
+}
+
+/**
+ * Free rate-control context
+ *
+ * @v ctx Rate-control context
+ */
+void rc80211_free ( struct rc80211_ctx *ctx )
+{
+ free ( ctx );
+}
diff --git a/qemu/roms/ipxe/src/net/80211/sec80211.c b/qemu/roms/ipxe/src/net/80211/sec80211.c
new file mode 100644
index 000000000..d1bc75e90
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/80211/sec80211.c
@@ -0,0 +1,518 @@
+/*
+ * Copyright (c) 2009 Joshua Oreman <oremanj@rwcr.net>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <ipxe/ieee80211.h>
+#include <ipxe/net80211.h>
+#include <ipxe/sec80211.h>
+
+/** @file
+ *
+ * General secured-network routines required whenever any secure
+ * network support at all is compiled in. This involves things like
+ * installing keys, determining the type of security used by a probed
+ * network, and some small helper functions that take advantage of
+ * static data in this file.
+ */
+
+/* Unsupported cryptosystem error numbers */
+#define ENOTSUP_WEP __einfo_error ( EINFO_ENOTSUP_WEP )
+#define EINFO_ENOTSUP_WEP __einfo_uniqify ( EINFO_ENOTSUP, \
+ ( 0x10 | NET80211_CRYPT_WEP ), "WEP not supported" )
+#define ENOTSUP_TKIP __einfo_error ( EINFO_ENOTSUP_TKIP )
+#define EINFO_ENOTSUP_TKIP __einfo_uniqify ( EINFO_ENOTSUP, \
+ ( 0x10 | NET80211_CRYPT_TKIP ), "TKIP not supported" )
+#define ENOTSUP_CCMP __einfo_error ( EINFO_ENOTSUP_CCMP )
+#define EINFO_ENOTSUP_CCMP __einfo_uniqify ( EINFO_ENOTSUP, \
+ ( 0x10 | NET80211_CRYPT_CCMP ), "CCMP not supported" )
+#define ENOTSUP_CRYPT( crypt ) \
+ EUNIQ ( EINFO_ENOTSUP, ( 0x10 | (crypt) ), \
+ ENOTSUP_WEP, ENOTSUP_TKIP, ENOTSUP_CCMP )
+
+/** Mapping from net80211 crypto/secprot types to RSN OUI descriptors */
+struct descriptor_map {
+ /** Value of net80211_crypto_alg or net80211_security_proto */
+ u32 net80211_type;
+
+ /** OUI+type in appropriate byte order, masked to exclude vendor */
+ u32 oui_type;
+};
+
+/** Magic number in @a oui_type showing end of list */
+#define END_MAGIC 0xFFFFFFFF
+
+/** Mapping between net80211 cryptosystems and 802.11i cipher IDs */
+static struct descriptor_map rsn_cipher_map[] = {
+ { .net80211_type = NET80211_CRYPT_WEP,
+ .oui_type = IEEE80211_RSN_CTYPE_WEP40 },
+
+ { .net80211_type = NET80211_CRYPT_WEP,
+ .oui_type = IEEE80211_RSN_CTYPE_WEP104 },
+
+ { .net80211_type = NET80211_CRYPT_TKIP,
+ .oui_type = IEEE80211_RSN_CTYPE_TKIP },
+
+ { .net80211_type = NET80211_CRYPT_CCMP,
+ .oui_type = IEEE80211_RSN_CTYPE_CCMP },
+
+ { .net80211_type = NET80211_CRYPT_UNKNOWN,
+ .oui_type = END_MAGIC },
+};
+
+/** Mapping between net80211 handshakers and 802.11i AKM IDs */
+static struct descriptor_map rsn_akm_map[] = {
+ { .net80211_type = NET80211_SECPROT_EAP,
+ .oui_type = IEEE80211_RSN_ATYPE_8021X },
+
+ { .net80211_type = NET80211_SECPROT_PSK,
+ .oui_type = IEEE80211_RSN_ATYPE_PSK },
+
+ { .net80211_type = NET80211_SECPROT_UNKNOWN,
+ .oui_type = END_MAGIC },
+};
+
+
+/**
+ * Install 802.11 cryptosystem
+ *
+ * @v which Pointer to the cryptosystem structure to install in
+ * @v crypt Cryptosystem ID number
+ * @v key Encryption key to use
+ * @v len Length of encryption key
+ * @v rsc Initial receive sequence counter, if applicable
+ * @ret rc Return status code
+ *
+ * The encryption key will not be accessed via the provided pointer
+ * after this function returns, so you may keep it on the stack.
+ *
+ * @a which must point to either @c dev->crypto (for the normal case
+ * of installing a unicast cryptosystem) or @c dev->gcrypto (to
+ * install a cryptosystem that will be used only for decrypting
+ * group-source frames).
+ */
+int sec80211_install ( struct net80211_crypto **which,
+ enum net80211_crypto_alg crypt,
+ const void *key, int len, const void *rsc )
+{
+ struct net80211_crypto *crypto = *which;
+ struct net80211_crypto *tbl_crypto;
+
+ /* Remove old crypto if it exists */
+ free ( *which );
+ *which = NULL;
+
+ if ( crypt == NET80211_CRYPT_NONE ) {
+ DBG ( "802.11-Sec not installing null cryptography\n" );
+ return 0;
+ }
+
+ /* Find cryptosystem to use */
+ for_each_table_entry ( tbl_crypto, NET80211_CRYPTOS ) {
+ if ( tbl_crypto->algorithm == crypt ) {
+ crypto = zalloc ( sizeof ( *crypto ) +
+ tbl_crypto->priv_len );
+ if ( ! crypto ) {
+ DBG ( "802.11-Sec out of memory\n" );
+ return -ENOMEM;
+ }
+
+ memcpy ( crypto, tbl_crypto, sizeof ( *crypto ) );
+ crypto->priv = ( ( void * ) crypto +
+ sizeof ( *crypto ) );
+ break;
+ }
+ }
+
+ if ( ! crypto ) {
+ DBG ( "802.11-Sec no support for cryptosystem %d\n", crypt );
+ return -ENOTSUP_CRYPT ( crypt );
+ }
+
+ *which = crypto;
+
+ DBG ( "802.11-Sec installing cryptosystem %d as %p with key of "
+ "length %d\n", crypt, crypto, len );
+
+ return crypto->init ( crypto, key, len, rsc );
+}
+
+
+/**
+ * Determine net80211 crypto or handshaking type value to return for RSN info
+ *
+ * @v rsnp Pointer to next descriptor count field in RSN IE
+ * @v rsn_end Pointer to end of RSN IE
+ * @v map Descriptor map to use
+ * @v tbl_start Start of linker table to examine for iPXE support
+ * @v tbl_end End of linker table to examine for iPXE support
+ * @ret rsnp Updated to point to first byte after descriptors
+ * @ret map_ent Descriptor map entry of translation to use
+ *
+ * The entries in the linker table must be either net80211_crypto or
+ * net80211_handshaker structures, and @a tbl_stride must be set to
+ * sizeof() the appropriate one.
+ *
+ * This function expects @a rsnp to point at a two-byte descriptor
+ * count followed by a list of four-byte cipher or AKM descriptors; it
+ * will return @c NULL if the input packet is malformed, and otherwise
+ * set @a rsnp to the first byte it has not looked at. It will return
+ * the first cipher in the list that is supported by the current build
+ * of iPXE, or the first of all if none are supported.
+ *
+ * We play rather fast and loose with type checking, because this
+ * function is only called from two well-defined places in the
+ * RSN-checking code. Don't try to use it for anything else.
+ */
+static struct descriptor_map * rsn_pick_desc ( u8 **rsnp, u8 *rsn_end,
+ struct descriptor_map *map,
+ void *tbl_start, void *tbl_end )
+{
+ int ndesc;
+ int ok = 0;
+ struct descriptor_map *map_ent, *map_ret = NULL;
+ u8 *rsn = *rsnp;
+ void *tblp;
+ size_t tbl_stride = ( map == rsn_cipher_map ?
+ sizeof ( struct net80211_crypto ) :
+ sizeof ( struct net80211_handshaker ) );
+
+ if ( map != rsn_cipher_map && map != rsn_akm_map )
+ return NULL;
+
+ /* Determine which types we support */
+ for ( tblp = tbl_start; tblp < tbl_end; tblp += tbl_stride ) {
+ struct net80211_crypto *crypto = tblp;
+ struct net80211_handshaker *hs = tblp;
+
+ if ( map == rsn_cipher_map )
+ ok |= ( 1 << crypto->algorithm );
+ else
+ ok |= ( 1 << hs->protocol );
+ }
+
+ /* RSN sanity checks */
+ if ( rsn + 2 > rsn_end ) {
+ DBG ( "RSN detect: malformed descriptor count\n" );
+ return NULL;
+ }
+
+ ndesc = *( u16 * ) rsn;
+ rsn += 2;
+
+ if ( ! ndesc ) {
+ DBG ( "RSN detect: no descriptors\n" );
+ return NULL;
+ }
+
+ /* Determine which net80211 crypto types are listed */
+ while ( ndesc-- ) {
+ u32 desc;
+
+ if ( rsn + 4 > rsn_end ) {
+ DBG ( "RSN detect: malformed descriptor (%d left)\n",
+ ndesc );
+ return NULL;
+ }
+
+ desc = *( u32 * ) rsn;
+ rsn += 4;
+
+ for ( map_ent = map; map_ent->oui_type != END_MAGIC; map_ent++ )
+ if ( map_ent->oui_type == ( desc & OUI_TYPE_MASK ) )
+ break;
+
+ /* Use first cipher as a fallback */
+ if ( ! map_ret )
+ map_ret = map_ent;
+
+ /* Once we find one we support, use it */
+ if ( ok & ( 1 << map_ent->net80211_type ) ) {
+ map_ret = map_ent;
+ break;
+ }
+ }
+
+ if ( ndesc > 0 )
+ rsn += 4 * ndesc;
+
+ *rsnp = rsn;
+ return map_ret;
+}
+
+
+/**
+ * Find the RSN or WPA information element in the provided beacon frame
+ *
+ * @v ie Pointer to first information element to check
+ * @v ie_end Pointer to end of information element space
+ * @ret is_rsn TRUE if returned IE is RSN, FALSE if it's WPA
+ * @ret end Pointer to byte immediately after last byte of data
+ * @ret data Pointer to first byte of data (the `version' field)
+ *
+ * If both an RSN and a WPA information element are found, this
+ * function will return the first one seen, which by ordering rules
+ * should always prefer the newer RSN IE.
+ *
+ * If no RSN or WPA infomration element is found, returns @c NULL and
+ * leaves @a is_rsn and @a end in an undefined state.
+ *
+ * This function will not return a pointer to an information element
+ * that states it extends past the tail of the io_buffer, or whose @a
+ * version field is incorrect.
+ */
+u8 * sec80211_find_rsn ( union ieee80211_ie *ie, void *ie_end,
+ int *is_rsn, u8 **end )
+{
+ u8 *rsn = NULL;
+
+ if ( ! ieee80211_ie_bound ( ie, ie_end ) )
+ return NULL;
+
+ while ( ie ) {
+ if ( ie->id == IEEE80211_IE_VENDOR &&
+ ie->vendor.oui == IEEE80211_WPA_OUI_VEN ) {
+ DBG ( "RSN detect: old-style WPA IE found\n" );
+ rsn = &ie->vendor.data[0];
+ *end = rsn + ie->len - 4;
+ *is_rsn = 0;
+ } else if ( ie->id == IEEE80211_IE_RSN ) {
+ DBG ( "RSN detect: 802.11i RSN IE found\n" );
+ rsn = ( u8 * ) &ie->rsn.version;
+ *end = rsn + ie->len;
+ *is_rsn = 1;
+ }
+
+ if ( rsn && ( *end > ( u8 * ) ie_end || rsn >= *end ||
+ *( u16 * ) rsn != IEEE80211_RSN_VERSION ) ) {
+ DBG ( "RSN detect: malformed RSN IE or unknown "
+ "version, keep trying\n" );
+ rsn = NULL;
+ }
+
+ if ( rsn )
+ break;
+
+ ie = ieee80211_next_ie ( ie, ie_end );
+ }
+
+ if ( ! ie ) {
+ DBG ( "RSN detect: no RSN IE found\n" );
+ return NULL;
+ }
+
+ return rsn;
+}
+
+
+/**
+ * Detect crypto and AKM types from RSN information element
+ *
+ * @v is_rsn If TRUE, IE is a new-style RSN information element
+ * @v start Pointer to first byte of @a version field
+ * @v end Pointer to first byte not in the RSN IE
+ * @ret secprot Security handshaking protocol used by network
+ * @ret crypt Cryptosystem used by network
+ * @ret rc Return status code
+ *
+ * If the IE cannot be parsed, returns an error indication and leaves
+ * @a secprot and @a crypt unchanged.
+ */
+int sec80211_detect_ie ( int is_rsn, u8 *start, u8 *end,
+ enum net80211_security_proto *secprot,
+ enum net80211_crypto_alg *crypt )
+{
+ enum net80211_security_proto sp;
+ enum net80211_crypto_alg cr;
+ struct descriptor_map *map;
+ u8 *rsn = start;
+
+ /* Set some defaults */
+ cr = ( is_rsn ? NET80211_CRYPT_CCMP : NET80211_CRYPT_TKIP );
+ sp = NET80211_SECPROT_EAP;
+
+ rsn += 2; /* version - already checked */
+ rsn += 4; /* group cipher - we don't use it here */
+
+ if ( rsn >= end )
+ goto done;
+
+ /* Pick crypto algorithm */
+ map = rsn_pick_desc ( &rsn, end, rsn_cipher_map,
+ table_start ( NET80211_CRYPTOS ),
+ table_end ( NET80211_CRYPTOS ) );
+ if ( ! map )
+ goto invalid_rsn;
+
+ cr = map->net80211_type;
+
+ if ( rsn >= end )
+ goto done;
+
+ /* Pick handshaking algorithm */
+ map = rsn_pick_desc ( &rsn, end, rsn_akm_map,
+ table_start ( NET80211_HANDSHAKERS ),
+ table_end ( NET80211_HANDSHAKERS ) );
+ if ( ! map )
+ goto invalid_rsn;
+
+ sp = map->net80211_type;
+
+ done:
+ DBG ( "RSN detect: OK, crypto type %d, secprot type %d\n", cr, sp );
+ *secprot = sp;
+ *crypt = cr;
+ return 0;
+
+ invalid_rsn:
+ DBG ( "RSN detect: invalid RSN IE\n" );
+ return -EINVAL;
+}
+
+
+/**
+ * Detect the cryptosystem and handshaking protocol used by an 802.11 network
+ *
+ * @v iob I/O buffer containing beacon frame
+ * @ret secprot Security handshaking protocol used by network
+ * @ret crypt Cryptosystem used by network
+ * @ret rc Return status code
+ *
+ * This function uses weak linkage, as it must be called from generic
+ * contexts but should only be linked in if some encryption is
+ * supported; you must test its address against @c NULL before calling
+ * it. If it does not exist, any network with the PRIVACY bit set in
+ * beacon->capab should be considered unknown.
+ */
+int sec80211_detect ( struct io_buffer *iob,
+ enum net80211_security_proto *secprot,
+ enum net80211_crypto_alg *crypt )
+{
+ struct ieee80211_frame *hdr = iob->data;
+ struct ieee80211_beacon *beacon =
+ ( struct ieee80211_beacon * ) hdr->data;
+ u8 *rsn, *rsn_end;
+ int is_rsn, rc;
+
+ *crypt = NET80211_CRYPT_UNKNOWN;
+ *secprot = NET80211_SECPROT_UNKNOWN;
+
+ /* Find RSN or WPA IE */
+ if ( ! ( rsn = sec80211_find_rsn ( beacon->info_element, iob->tail,
+ &is_rsn, &rsn_end ) ) ) {
+ /* No security IE at all; either WEP or no security. */
+ *secprot = NET80211_SECPROT_NONE;
+
+ if ( beacon->capability & IEEE80211_CAPAB_PRIVACY )
+ *crypt = NET80211_CRYPT_WEP;
+ else
+ *crypt = NET80211_CRYPT_NONE;
+
+ return 0;
+ }
+
+ /* Determine type of security */
+ if ( ( rc = sec80211_detect_ie ( is_rsn, rsn, rsn_end, secprot,
+ crypt ) ) == 0 )
+ return 0;
+
+ /* If we get here, the RSN IE was invalid */
+
+ *crypt = NET80211_CRYPT_UNKNOWN;
+ *secprot = NET80211_SECPROT_UNKNOWN;
+ DBG ( "Failed to handle RSN IE:\n" );
+ DBG_HD ( rsn, rsn_end - rsn );
+ return rc;
+}
+
+
+/**
+ * Determine RSN descriptor for specified net80211 ID
+ *
+ * @v id net80211 ID value
+ * @v rsnie Whether to return a new-format (RSN IE) descriptor
+ * @v map Map to use in translation
+ * @ret desc RSN descriptor, or 0 on error
+ *
+ * If @a rsnie is false, returns an old-format (WPA vendor IE)
+ * descriptor.
+ */
+static u32 rsn_get_desc ( unsigned id, int rsnie, struct descriptor_map *map )
+{
+ u32 vendor = ( rsnie ? IEEE80211_RSN_OUI : IEEE80211_WPA_OUI );
+
+ for ( ; map->oui_type != END_MAGIC; map++ ) {
+ if ( map->net80211_type == id )
+ return map->oui_type | vendor;
+ }
+
+ return 0;
+}
+
+/**
+ * Determine RSN descriptor for specified net80211 cryptosystem number
+ *
+ * @v crypt Cryptosystem number
+ * @v rsnie Whether to return a new-format (RSN IE) descriptor
+ * @ret desc RSN descriptor
+ *
+ * If @a rsnie is false, returns an old-format (WPA vendor IE)
+ * descriptor.
+ */
+u32 sec80211_rsn_get_crypto_desc ( enum net80211_crypto_alg crypt, int rsnie )
+{
+ return rsn_get_desc ( crypt, rsnie, rsn_cipher_map );
+}
+
+/**
+ * Determine RSN descriptor for specified net80211 handshaker number
+ *
+ * @v secprot Handshaker number
+ * @v rsnie Whether to return a new-format (RSN IE) descriptor
+ * @ret desc RSN descriptor
+ *
+ * If @a rsnie is false, returns an old-format (WPA vendor IE)
+ * descriptor.
+ */
+u32 sec80211_rsn_get_akm_desc ( enum net80211_security_proto secprot,
+ int rsnie )
+{
+ return rsn_get_desc ( secprot, rsnie, rsn_akm_map );
+}
+
+/**
+ * Determine net80211 cryptosystem number from RSN descriptor
+ *
+ * @v desc RSN descriptor
+ * @ret crypt net80211 cryptosystem enumeration value
+ */
+enum net80211_crypto_alg sec80211_rsn_get_net80211_crypt ( u32 desc )
+{
+ struct descriptor_map *map = rsn_cipher_map;
+
+ for ( ; map->oui_type != END_MAGIC; map++ ) {
+ if ( map->oui_type == ( desc & OUI_TYPE_MASK ) )
+ break;
+ }
+
+ return map->net80211_type;
+}
diff --git a/qemu/roms/ipxe/src/net/80211/wep.c b/qemu/roms/ipxe/src/net/80211/wep.c
new file mode 100644
index 000000000..e22ac8998
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/80211/wep.c
@@ -0,0 +1,304 @@
+/*
+ * Copyright (c) 2009 Joshua Oreman <oremanj@rwcr.net>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <ipxe/net80211.h>
+#include <ipxe/sec80211.h>
+#include <ipxe/crypto.h>
+#include <ipxe/arc4.h>
+#include <ipxe/crc32.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+/** @file
+ *
+ * The WEP wireless encryption method (insecure!)
+ *
+ * The data field in a WEP-encrypted packet contains a 3-byte
+ * initialisation vector, one-byte Key ID field (only the bottom two
+ * bits are ever used), encrypted data, and a 4-byte encrypted CRC of
+ * the plaintext data, called the ICV. To decrypt it, the IV is
+ * prepended to the shared key and the data stream (including ICV) is
+ * run through the ARC4 stream cipher; if the ICV matches a CRC32
+ * calculated on the plaintext, the packet is valid.
+ *
+ * For efficiency and code-size reasons, this file assumes it is
+ * running on a little-endian machine.
+ */
+
+/** Length of WEP initialisation vector */
+#define WEP_IV_LEN 3
+
+/** Length of WEP key ID byte */
+#define WEP_KID_LEN 1
+
+/** Length of WEP ICV checksum */
+#define WEP_ICV_LEN 4
+
+/** Maximum length of WEP key */
+#define WEP_MAX_KEY 16
+
+/** Amount of data placed before the encrypted bytes */
+#define WEP_HEADER_LEN 4
+
+/** Amount of data placed after the encrypted bytes */
+#define WEP_TRAILER_LEN 4
+
+/** Total WEP overhead bytes */
+#define WEP_OVERHEAD 8
+
+/** Context for WEP encryption and decryption */
+struct wep_ctx
+{
+ /** Encoded WEP key
+ *
+ * The actual key bytes are stored beginning at offset 3, to
+ * leave room for easily inserting the IV before a particular
+ * operation.
+ */
+ u8 key[WEP_IV_LEN + WEP_MAX_KEY];
+
+ /** Length of WEP key (not including IV bytes) */
+ int keylen;
+
+ /** ARC4 context */
+ struct arc4_ctx arc4;
+};
+
+/**
+ * Initialize WEP algorithm
+ *
+ * @v crypto 802.11 cryptographic algorithm
+ * @v key WEP key to use
+ * @v keylen Length of WEP key
+ * @v rsc Initial receive sequence counter (unused)
+ * @ret rc Return status code
+ *
+ * Standard key lengths are 5 and 13 bytes; 16-byte keys are
+ * occasionally supported as an extension to the standard.
+ */
+static int wep_init ( struct net80211_crypto *crypto, const void *key,
+ int keylen, const void *rsc __unused )
+{
+ struct wep_ctx *ctx = crypto->priv;
+
+ ctx->keylen = ( keylen > WEP_MAX_KEY ? WEP_MAX_KEY : keylen );
+ memcpy ( ctx->key + WEP_IV_LEN, key, ctx->keylen );
+
+ return 0;
+}
+
+/**
+ * Encrypt packet using WEP
+ *
+ * @v crypto 802.11 cryptographic algorithm
+ * @v iob I/O buffer of plaintext packet
+ * @ret eiob Newly allocated I/O buffer for encrypted packet, or NULL
+ *
+ * If memory allocation fails, @c NULL is returned.
+ */
+static struct io_buffer * wep_encrypt ( struct net80211_crypto *crypto,
+ struct io_buffer *iob )
+{
+ struct wep_ctx *ctx = crypto->priv;
+ struct io_buffer *eiob;
+ struct ieee80211_frame *hdr;
+ const int hdrlen = IEEE80211_TYP_FRAME_HEADER_LEN;
+ int datalen = iob_len ( iob ) - hdrlen;
+ int newlen = hdrlen + datalen + WEP_OVERHEAD;
+ u32 iv, icv;
+
+ eiob = alloc_iob ( newlen );
+ if ( ! eiob )
+ return NULL;
+
+ memcpy ( iob_put ( eiob, hdrlen ), iob->data, hdrlen );
+ hdr = eiob->data;
+ hdr->fc |= IEEE80211_FC_PROTECTED;
+
+ /* Calculate IV, put it in the header (with key ID byte = 0), and
+ set it up at the start of the encryption key. */
+ iv = random() & 0xffffff; /* IV in bottom 3 bytes, top byte = KID = 0 */
+ memcpy ( iob_put ( eiob, WEP_HEADER_LEN ), &iv, WEP_HEADER_LEN );
+ memcpy ( ctx->key, &iv, WEP_IV_LEN );
+
+ /* Encrypt the data using RC4 */
+ cipher_setkey ( &arc4_algorithm, &ctx->arc4, ctx->key,
+ ctx->keylen + WEP_IV_LEN );
+ cipher_encrypt ( &arc4_algorithm, &ctx->arc4, iob->data + hdrlen,
+ iob_put ( eiob, datalen ), datalen );
+
+ /* Add ICV */
+ icv = ~crc32_le ( ~0, iob->data + hdrlen, datalen );
+ cipher_encrypt ( &arc4_algorithm, &ctx->arc4, &icv,
+ iob_put ( eiob, WEP_ICV_LEN ), WEP_ICV_LEN );
+
+ return eiob;
+}
+
+/**
+ * Decrypt packet using WEP
+ *
+ * @v crypto 802.11 cryptographic algorithm
+ * @v eiob I/O buffer of encrypted packet
+ * @ret iob Newly allocated I/O buffer for plaintext packet, or NULL
+ *
+ * If a consistency check for the decryption fails (usually indicating
+ * an invalid key), @c NULL is returned.
+ */
+static struct io_buffer * wep_decrypt ( struct net80211_crypto *crypto,
+ struct io_buffer *eiob )
+{
+ struct wep_ctx *ctx = crypto->priv;
+ struct io_buffer *iob;
+ struct ieee80211_frame *hdr;
+ const int hdrlen = IEEE80211_TYP_FRAME_HEADER_LEN;
+ int datalen = iob_len ( eiob ) - hdrlen - WEP_OVERHEAD;
+ int newlen = hdrlen + datalen;
+ u32 iv, icv, crc;
+
+ iob = alloc_iob ( newlen );
+ if ( ! iob )
+ return NULL;
+
+ memcpy ( iob_put ( iob, hdrlen ), eiob->data, hdrlen );
+ hdr = iob->data;
+ hdr->fc &= ~IEEE80211_FC_PROTECTED;
+
+ /* Strip off IV and use it to initialize cryptosystem */
+ memcpy ( &iv, eiob->data + hdrlen, 4 );
+ iv &= 0xffffff; /* ignore key ID byte */
+ memcpy ( ctx->key, &iv, WEP_IV_LEN );
+
+ /* Decrypt the data using RC4 */
+ cipher_setkey ( &arc4_algorithm, &ctx->arc4, ctx->key,
+ ctx->keylen + WEP_IV_LEN );
+ cipher_decrypt ( &arc4_algorithm, &ctx->arc4, eiob->data + hdrlen +
+ WEP_HEADER_LEN, iob_put ( iob, datalen ), datalen );
+
+ /* Strip off ICV and verify it */
+ cipher_decrypt ( &arc4_algorithm, &ctx->arc4, eiob->data + hdrlen +
+ WEP_HEADER_LEN + datalen, &icv, WEP_ICV_LEN );
+ crc = ~crc32_le ( ~0, iob->data + hdrlen, datalen );
+ if ( crc != icv ) {
+ DBGC ( crypto, "WEP %p CRC mismatch: expect %08x, get %08x\n",
+ crypto, icv, crc );
+ free_iob ( iob );
+ return NULL;
+ }
+ return iob;
+}
+
+/** WEP cryptosystem for 802.11 */
+struct net80211_crypto wep_crypto __net80211_crypto = {
+ .algorithm = NET80211_CRYPT_WEP,
+ .init = wep_init,
+ .encrypt = wep_encrypt,
+ .decrypt = wep_decrypt,
+ .priv_len = sizeof ( struct wep_ctx ),
+};
+
+/**
+ * Initialize trivial 802.11 security handshaker
+ *
+ * @v dev 802.11 device
+ * @v ctx Security handshaker
+ *
+ * This simply fetches a WEP key from netX/key, and if it exists,
+ * installs WEP cryptography on the 802.11 device. No real handshaking
+ * is performed.
+ */
+static int trivial_init ( struct net80211_device *dev )
+{
+ u8 key[WEP_MAX_KEY]; /* support up to 128-bit keys */
+ int len;
+ int rc;
+
+ if ( dev->associating &&
+ dev->associating->crypto == NET80211_CRYPT_NONE )
+ return 0; /* no crypto? OK. */
+
+ len = fetch_raw_setting ( netdev_settings ( dev->netdev ),
+ &net80211_key_setting, key, WEP_MAX_KEY );
+
+ if ( len <= 0 ) {
+ DBGC ( dev, "802.11 %p cannot do WEP without a key\n", dev );
+ return -EACCES;
+ }
+
+ /* Full 128-bit keys are a nonstandard extension, but they're
+ utterly trivial to support, so we do. */
+ if ( len != 5 && len != 13 && len != 16 ) {
+ DBGC ( dev, "802.11 %p invalid WEP key length %d\n",
+ dev, len );
+ return -EINVAL;
+ }
+
+ DBGC ( dev, "802.11 %p installing %d-bit WEP\n", dev, len * 8 );
+
+ rc = sec80211_install ( &dev->crypto, NET80211_CRYPT_WEP, key, len,
+ NULL );
+ if ( rc < 0 )
+ return rc;
+
+ return 0;
+}
+
+/**
+ * Check for key change on trivial 802.11 security handshaker
+ *
+ * @v dev 802.11 device
+ * @v ctx Security handshaker
+ */
+static int trivial_change_key ( struct net80211_device *dev )
+{
+ u8 key[WEP_MAX_KEY];
+ int len;
+ int change = 0;
+
+ /* If going from WEP to clear, or something else to WEP, reassociate. */
+ if ( ! dev->crypto || ( dev->crypto->init != wep_init ) )
+ change ^= 1;
+
+ len = fetch_raw_setting ( netdev_settings ( dev->netdev ),
+ &net80211_key_setting, key, WEP_MAX_KEY );
+ if ( len <= 0 )
+ change ^= 1;
+
+ /* Changing crypto type => return nonzero to reassociate. */
+ if ( change )
+ return -EINVAL;
+
+ /* Going from no crypto to still no crypto => nothing to do. */
+ if ( len <= 0 )
+ return 0;
+
+ /* Otherwise, reinitialise WEP with new key. */
+ return wep_init ( dev->crypto, key, len, NULL );
+}
+
+/** Trivial 802.11 security handshaker */
+struct net80211_handshaker trivial_handshaker __net80211_handshaker = {
+ .protocol = NET80211_SECPROT_NONE,
+ .init = trivial_init,
+ .change_key = trivial_change_key,
+ .priv_len = 0,
+};
diff --git a/qemu/roms/ipxe/src/net/80211/wpa.c b/qemu/roms/ipxe/src/net/80211/wpa.c
new file mode 100644
index 000000000..e2c4945f9
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/80211/wpa.c
@@ -0,0 +1,915 @@
+/*
+ * Copyright (c) 2009 Joshua Oreman <oremanj@rwcr.net>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <ipxe/net80211.h>
+#include <ipxe/sec80211.h>
+#include <ipxe/wpa.h>
+#include <ipxe/eapol.h>
+#include <ipxe/crypto.h>
+#include <ipxe/arc4.h>
+#include <ipxe/crc32.h>
+#include <ipxe/sha1.h>
+#include <ipxe/hmac.h>
+#include <ipxe/list.h>
+#include <ipxe/ethernet.h>
+#include <ipxe/rbg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <byteswap.h>
+
+/** @file
+ *
+ * Handler for the aspects of WPA handshaking that are independent of
+ * 802.1X/PSK or TKIP/CCMP; this mostly involves the 4-Way Handshake.
+ */
+
+/** List of WPA contexts in active use. */
+struct list_head wpa_contexts = LIST_HEAD_INIT ( wpa_contexts );
+
+
+/**
+ * Return an error code and deauthenticate
+ *
+ * @v ctx WPA common context
+ * @v rc Return status code
+ * @ret rc The passed return status code
+ */
+static int wpa_fail ( struct wpa_common_ctx *ctx, int rc )
+{
+ net80211_deauthenticate ( ctx->dev, rc );
+ return rc;
+}
+
+
+/**
+ * Find a cryptosystem handler structure from a crypto ID
+ *
+ * @v crypt Cryptosystem ID
+ * @ret crypto Cryptosystem handler structure
+ *
+ * If support for @a crypt is not compiled in to iPXE, or if @a crypt
+ * is NET80211_CRYPT_UNKNOWN, returns @c NULL.
+ */
+static struct net80211_crypto *
+wpa_find_cryptosystem ( enum net80211_crypto_alg crypt )
+{
+ struct net80211_crypto *crypto;
+
+ for_each_table_entry ( crypto, NET80211_CRYPTOS ) {
+ if ( crypto->algorithm == crypt )
+ return crypto;
+ }
+
+ return NULL;
+}
+
+
+/**
+ * Find WPA key integrity and encryption handler from key version field
+ *
+ * @v ver Version bits of EAPOL-Key info field
+ * @ret kie Key integrity and encryption handler
+ */
+struct wpa_kie * wpa_find_kie ( int version )
+{
+ struct wpa_kie *kie;
+
+ for_each_table_entry ( kie, WPA_KIES ) {
+ if ( kie->version == version )
+ return kie;
+ }
+
+ return NULL;
+}
+
+
+/**
+ * Construct RSN or WPA information element
+ *
+ * @v dev 802.11 device
+ * @ret ie_ret RSN or WPA information element
+ * @ret rc Return status code
+ *
+ * This function allocates, fills, and returns a RSN or WPA
+ * information element suitable for including in an association
+ * request frame to the network identified by @c dev->associating.
+ * If it is impossible to construct an information element consistent
+ * with iPXE's capabilities that is compatible with that network, or
+ * if none should be sent because that network's beacon included no
+ * security information, returns an error indication and leaves
+ * @a ie_ret unchanged.
+ *
+ * The returned IE will be of the same type (RSN or WPA) as was
+ * included in the beacon for the network it is destined for.
+ */
+int wpa_make_rsn_ie ( struct net80211_device *dev, union ieee80211_ie **ie_ret )
+{
+ u8 *rsn, *rsn_end;
+ int is_rsn;
+ u32 group_cipher;
+ enum net80211_crypto_alg gcrypt;
+ int ie_len;
+ u8 *iep;
+ struct ieee80211_ie_rsn *ie;
+ struct ieee80211_frame *hdr;
+ struct ieee80211_beacon *beacon;
+
+ if ( ! dev->associating ) {
+ DBG ( "WPA: Can't make RSN IE for a non-associating device\n" );
+ return -EINVAL;
+ }
+
+ hdr = dev->associating->beacon->data;
+ beacon = ( struct ieee80211_beacon * ) hdr->data;
+ rsn = sec80211_find_rsn ( beacon->info_element,
+ dev->associating->beacon->tail, &is_rsn,
+ &rsn_end );
+ if ( ! rsn ) {
+ DBG ( "WPA: Can't make RSN IE when we didn't get one\n" );
+ return -EINVAL;
+ }
+
+ rsn += 2; /* skip version */
+ group_cipher = *( u32 * ) rsn;
+ gcrypt = sec80211_rsn_get_net80211_crypt ( group_cipher );
+
+ if ( ! wpa_find_cryptosystem ( gcrypt ) ||
+ ! wpa_find_cryptosystem ( dev->associating->crypto ) ) {
+ DBG ( "WPA: No support for (GC:%d, PC:%d)\n",
+ gcrypt, dev->associating->crypto );
+ return -ENOTSUP;
+ }
+
+ /* Everything looks good - make our IE. */
+
+ /* WPA IEs need 4 more bytes for the OUI+type */
+ ie_len = ieee80211_rsn_size ( 1, 1, 0, is_rsn ) + ( 4 * ! is_rsn );
+ iep = malloc ( ie_len );
+ if ( ! iep )
+ return -ENOMEM;
+
+ *ie_ret = ( union ieee80211_ie * ) iep;
+
+ /* Store ID and length bytes. */
+ *iep++ = ( is_rsn ? IEEE80211_IE_RSN : IEEE80211_IE_VENDOR );
+ *iep++ = ie_len - 2;
+
+ /* Store OUI+type for WPA IEs. */
+ if ( ! is_rsn ) {
+ *( u32 * ) iep = IEEE80211_WPA_OUI_VEN;
+ iep += 4;
+ }
+
+ /* If this is a WPA IE, the id and len bytes in the
+ ieee80211_ie_rsn structure will not be valid, but by doing
+ the cast we can fill all the other fields much more
+ readily. */
+
+ ie = ( struct ieee80211_ie_rsn * ) ( iep - 2 );
+ ie->version = IEEE80211_RSN_VERSION;
+ ie->group_cipher = group_cipher;
+ ie->pairwise_count = 1;
+ ie->pairwise_cipher[0] =
+ sec80211_rsn_get_crypto_desc ( dev->associating->crypto,
+ is_rsn );
+ ie->akm_count = 1;
+ ie->akm_list[0] =
+ sec80211_rsn_get_akm_desc ( dev->associating->handshaking,
+ is_rsn );
+ if ( is_rsn ) {
+ ie->rsn_capab = 0;
+ ie->pmkid_count = 0;
+ }
+
+ return 0;
+}
+
+
+/**
+ * Set up generic WPA support to handle 4-Way Handshake
+ *
+ * @v dev 802.11 device
+ * @v ctx WPA common context
+ * @v pmk Pairwise Master Key to use for session
+ * @v pmk_len Length of PMK, almost always 32
+ * @ret rc Return status code
+ */
+int wpa_start ( struct net80211_device *dev, struct wpa_common_ctx *ctx,
+ const void *pmk, size_t pmk_len )
+{
+ struct io_buffer *iob;
+ struct ieee80211_frame *hdr;
+ struct ieee80211_beacon *beacon;
+ u8 *ap_rsn_ie = NULL, *ap_rsn_ie_end;
+
+ if ( ! dev->rsn_ie || ! dev->associating )
+ return -EINVAL;
+
+ ctx->dev = dev;
+ memcpy ( ctx->pmk, pmk, ctx->pmk_len = pmk_len );
+ ctx->state = WPA_READY;
+ ctx->replay = ~0ULL;
+
+ iob = dev->associating->beacon;
+ hdr = iob->data;
+ beacon = ( struct ieee80211_beacon * ) hdr->data;
+ ap_rsn_ie = sec80211_find_rsn ( beacon->info_element, iob->tail,
+ &ctx->ap_rsn_is_rsn, &ap_rsn_ie_end );
+ if ( ap_rsn_ie ) {
+ ctx->ap_rsn_ie = malloc ( ap_rsn_ie_end - ap_rsn_ie );
+ if ( ! ctx->ap_rsn_ie )
+ return -ENOMEM;
+ memcpy ( ctx->ap_rsn_ie, ap_rsn_ie, ap_rsn_ie_end - ap_rsn_ie );
+ ctx->ap_rsn_ie_len = ap_rsn_ie_end - ap_rsn_ie;
+ } else {
+ return -ENOENT;
+ }
+
+ ctx->crypt = dev->associating->crypto;
+ ctx->gcrypt = NET80211_CRYPT_UNKNOWN;
+
+ list_add_tail ( &ctx->list, &wpa_contexts );
+ return 0;
+}
+
+
+/**
+ * Disable handling of received WPA handshake frames
+ *
+ * @v dev 802.11 device
+ */
+void wpa_stop ( struct net80211_device *dev )
+{
+ struct wpa_common_ctx *ctx, *tmp;
+
+ list_for_each_entry_safe ( ctx, tmp, &wpa_contexts, list ) {
+ if ( ctx->dev == dev ) {
+ free ( ctx->ap_rsn_ie );
+ ctx->ap_rsn_ie = NULL;
+ list_del ( &ctx->list );
+ }
+ }
+}
+
+
+/**
+ * Derive pairwise transient key
+ *
+ * @v ctx WPA common context
+ */
+static void wpa_derive_ptk ( struct wpa_common_ctx *ctx )
+{
+ struct {
+ u8 mac1[ETH_ALEN];
+ u8 mac2[ETH_ALEN];
+ u8 nonce1[WPA_NONCE_LEN];
+ u8 nonce2[WPA_NONCE_LEN];
+ } __attribute__ (( packed )) ptk_data;
+
+ /* The addresses and nonces are stored in numerical order (!) */
+
+ if ( memcmp ( ctx->dev->netdev->ll_addr, ctx->dev->bssid,
+ ETH_ALEN ) < 0 ) {
+ memcpy ( ptk_data.mac1, ctx->dev->netdev->ll_addr, ETH_ALEN );
+ memcpy ( ptk_data.mac2, ctx->dev->bssid, ETH_ALEN );
+ } else {
+ memcpy ( ptk_data.mac1, ctx->dev->bssid, ETH_ALEN );
+ memcpy ( ptk_data.mac2, ctx->dev->netdev->ll_addr, ETH_ALEN );
+ }
+
+ if ( memcmp ( ctx->Anonce, ctx->Snonce, WPA_NONCE_LEN ) < 0 ) {
+ memcpy ( ptk_data.nonce1, ctx->Anonce, WPA_NONCE_LEN );
+ memcpy ( ptk_data.nonce2, ctx->Snonce, WPA_NONCE_LEN );
+ } else {
+ memcpy ( ptk_data.nonce1, ctx->Snonce, WPA_NONCE_LEN );
+ memcpy ( ptk_data.nonce2, ctx->Anonce, WPA_NONCE_LEN );
+ }
+
+ DBGC2 ( ctx, "WPA %p A1 %s, A2 %s\n", ctx, eth_ntoa ( ptk_data.mac1 ),
+ eth_ntoa ( ptk_data.mac2 ) );
+ DBGC2 ( ctx, "WPA %p Nonce1, Nonce2:\n", ctx );
+ DBGC2_HD ( ctx, ptk_data.nonce1, WPA_NONCE_LEN );
+ DBGC2_HD ( ctx, ptk_data.nonce2, WPA_NONCE_LEN );
+
+ prf_sha1 ( ctx->pmk, ctx->pmk_len,
+ "Pairwise key expansion",
+ &ptk_data, sizeof ( ptk_data ),
+ &ctx->ptk, sizeof ( ctx->ptk ) );
+
+ DBGC2 ( ctx, "WPA %p PTK:\n", ctx );
+ DBGC2_HD ( ctx, &ctx->ptk, sizeof ( ctx->ptk ) );
+}
+
+
+/**
+ * Install pairwise transient key
+ *
+ * @v ctx WPA common context
+ * @v len Key length (16 for CCMP, 32 for TKIP)
+ * @ret rc Return status code
+ */
+static inline int wpa_install_ptk ( struct wpa_common_ctx *ctx, int len )
+{
+ DBGC ( ctx, "WPA %p: installing %d-byte pairwise transient key\n",
+ ctx, len );
+ DBGC2_HD ( ctx, &ctx->ptk.tk, len );
+
+ return sec80211_install ( &ctx->dev->crypto, ctx->crypt,
+ &ctx->ptk.tk, len, NULL );
+}
+
+/**
+ * Install group transient key
+ *
+ * @v ctx WPA common context
+ * @v len Key length (16 for CCMP, 32 for TKIP)
+ * @v rsc Receive sequence counter field in EAPOL-Key packet
+ * @ret rc Return status code
+ */
+static inline int wpa_install_gtk ( struct wpa_common_ctx *ctx, int len,
+ const void *rsc )
+{
+ DBGC ( ctx, "WPA %p: installing %d-byte group transient key\n",
+ ctx, len );
+ DBGC2_HD ( ctx, &ctx->gtk.tk, len );
+
+ return sec80211_install ( &ctx->dev->gcrypto, ctx->gcrypt,
+ &ctx->gtk.tk, len, rsc );
+}
+
+/**
+ * Search for group transient key, and install it if found
+ *
+ * @v ctx WPA common context
+ * @v ie Pointer to first IE in key data field
+ * @v ie_end Pointer to first byte not in key data field
+ * @v rsc Receive sequence counter field in EAPOL-Key packet
+ * @ret rc Return status code
+ */
+static int wpa_maybe_install_gtk ( struct wpa_common_ctx *ctx,
+ union ieee80211_ie *ie, void *ie_end,
+ const void *rsc )
+{
+ struct wpa_kde *kde;
+
+ if ( ! ieee80211_ie_bound ( ie, ie_end ) )
+ return -ENOENT;
+
+ while ( ie ) {
+ if ( ie->id == IEEE80211_IE_VENDOR &&
+ ie->vendor.oui == WPA_KDE_GTK )
+ break;
+
+ ie = ieee80211_next_ie ( ie, ie_end );
+ }
+
+ if ( ! ie )
+ return -ENOENT;
+
+ if ( ie->len - 6u > sizeof ( ctx->gtk.tk ) ) {
+ DBGC ( ctx, "WPA %p: GTK KDE is too long (%d bytes, max %zd)\n",
+ ctx, ie->len - 4, sizeof ( ctx->gtk.tk ) );
+ return -EINVAL;
+ }
+
+ /* XXX We ignore key ID for now. */
+ kde = ( struct wpa_kde * ) ie;
+ memcpy ( &ctx->gtk.tk, &kde->gtk_encap.gtk, kde->len - 6 );
+
+ return wpa_install_gtk ( ctx, kde->len - 6, rsc );
+}
+
+
+/**
+ * Allocate I/O buffer for construction of outgoing EAPOL-Key frame
+ *
+ * @v kdlen Maximum number of bytes in the Key Data field
+ * @ret iob Newly allocated I/O buffer
+ *
+ * The returned buffer will have space reserved for the link-layer and
+ * EAPOL headers, and will have @c iob->tail pointing to the start of
+ * the Key Data field. Thus, it is necessary to use iob_put() in
+ * filling the Key Data.
+ */
+static struct io_buffer * wpa_alloc_frame ( int kdlen )
+{
+ struct io_buffer *ret = alloc_iob ( sizeof ( struct eapol_key_pkt ) +
+ kdlen + EAPOL_HDR_LEN +
+ MAX_LL_HEADER_LEN );
+ if ( ! ret )
+ return NULL;
+
+ iob_reserve ( ret, MAX_LL_HEADER_LEN + EAPOL_HDR_LEN );
+ memset ( iob_put ( ret, sizeof ( struct eapol_key_pkt ) ), 0,
+ sizeof ( struct eapol_key_pkt ) );
+
+ return ret;
+}
+
+
+/**
+ * Send EAPOL-Key packet
+ *
+ * @v iob I/O buffer, with sufficient headroom for headers
+ * @v dev 802.11 device
+ * @v kie Key integrity and encryption handler
+ * @v is_rsn If TRUE, handshake uses new RSN format
+ * @ret rc Return status code
+ *
+ * If a KIE is specified, the MIC will be filled in before transmission.
+ */
+static int wpa_send_eapol ( struct io_buffer *iob, struct wpa_common_ctx *ctx,
+ struct wpa_kie *kie )
+{
+ struct eapol_key_pkt *pkt = iob->data;
+ struct eapol_frame *eapol = iob_push ( iob, EAPOL_HDR_LEN );
+
+ pkt->info = htons ( pkt->info );
+ pkt->keysize = htons ( pkt->keysize );
+ pkt->datalen = htons ( pkt->datalen );
+ pkt->replay = cpu_to_be64 ( pkt->replay );
+ eapol->version = EAPOL_THIS_VERSION;
+ eapol->type = EAPOL_TYPE_KEY;
+ eapol->length = htons ( iob->tail - iob->data - sizeof ( *eapol ) );
+
+ memset ( pkt->mic, 0, sizeof ( pkt->mic ) );
+ if ( kie )
+ kie->mic ( &ctx->ptk.kck, eapol, EAPOL_HDR_LEN +
+ sizeof ( *pkt ) + ntohs ( pkt->datalen ),
+ pkt->mic );
+
+ return net_tx ( iob, ctx->dev->netdev, &eapol_protocol,
+ ctx->dev->bssid, ctx->dev->netdev->ll_addr );
+}
+
+
+/**
+ * Send second frame in 4-Way Handshake
+ *
+ * @v ctx WPA common context
+ * @v pkt First frame, to which this is a reply
+ * @v is_rsn If TRUE, handshake uses new RSN format
+ * @v kie Key integrity and encryption handler
+ * @ret rc Return status code
+ */
+static int wpa_send_2_of_4 ( struct wpa_common_ctx *ctx,
+ struct eapol_key_pkt *pkt, int is_rsn,
+ struct wpa_kie *kie )
+{
+ struct io_buffer *iob = wpa_alloc_frame ( ctx->dev->rsn_ie->len + 2 );
+ struct eapol_key_pkt *npkt;
+
+ if ( ! iob )
+ return -ENOMEM;
+
+ npkt = iob->data;
+ memcpy ( npkt, pkt, sizeof ( *pkt ) );
+ npkt->info &= ~EAPOL_KEY_INFO_KEY_ACK;
+ npkt->info |= EAPOL_KEY_INFO_KEY_MIC;
+ if ( is_rsn )
+ npkt->keysize = 0;
+ memcpy ( npkt->nonce, ctx->Snonce, sizeof ( npkt->nonce ) );
+ npkt->datalen = ctx->dev->rsn_ie->len + 2;
+ memcpy ( iob_put ( iob, npkt->datalen ), ctx->dev->rsn_ie,
+ npkt->datalen );
+
+ DBGC ( ctx, "WPA %p: sending 2/4\n", ctx );
+
+ return wpa_send_eapol ( iob, ctx, kie );
+}
+
+
+/**
+ * Handle receipt of first frame in 4-Way Handshake
+ *
+ * @v ctx WPA common context
+ * @v pkt EAPOL-Key packet
+ * @v is_rsn If TRUE, frame uses new RSN format
+ * @v kie Key integrity and encryption handler
+ * @ret rc Return status code
+ */
+static int wpa_handle_1_of_4 ( struct wpa_common_ctx *ctx,
+ struct eapol_key_pkt *pkt, int is_rsn,
+ struct wpa_kie *kie )
+{
+ if ( ctx->state == WPA_WAITING )
+ return -EINVAL;
+
+ ctx->state = WPA_WORKING;
+ memcpy ( ctx->Anonce, pkt->nonce, sizeof ( ctx->Anonce ) );
+ if ( ! ctx->have_Snonce ) {
+ rbg_generate ( NULL, 0, 0, ctx->Snonce,
+ sizeof ( ctx->Snonce ) );
+ ctx->have_Snonce = 1;
+ }
+
+ DBGC ( ctx, "WPA %p: received 1/4, looks OK\n", ctx );
+
+ wpa_derive_ptk ( ctx );
+
+ return wpa_send_2_of_4 ( ctx, pkt, is_rsn, kie );
+}
+
+
+/**
+ * Send fourth frame in 4-Way Handshake, or second in Group Key Handshake
+ *
+ * @v ctx WPA common context
+ * @v pkt EAPOL-Key packet for frame to which we're replying
+ * @v is_rsn If TRUE, frame uses new RSN format
+ * @v kie Key integrity and encryption handler
+ * @ret rc Return status code
+ */
+static int wpa_send_final ( struct wpa_common_ctx *ctx,
+ struct eapol_key_pkt *pkt, int is_rsn,
+ struct wpa_kie *kie )
+{
+ struct io_buffer *iob = wpa_alloc_frame ( 0 );
+ struct eapol_key_pkt *npkt;
+
+ if ( ! iob )
+ return -ENOMEM;
+
+ npkt = iob->data;
+ memcpy ( npkt, pkt, sizeof ( *pkt ) );
+ npkt->info &= ~( EAPOL_KEY_INFO_KEY_ACK | EAPOL_KEY_INFO_INSTALL |
+ EAPOL_KEY_INFO_KEY_ENC );
+ if ( is_rsn )
+ npkt->keysize = 0;
+ memset ( npkt->nonce, 0, sizeof ( npkt->nonce ) );
+ memset ( npkt->iv, 0, sizeof ( npkt->iv ) );
+ npkt->datalen = 0;
+
+ if ( npkt->info & EAPOL_KEY_INFO_TYPE )
+ DBGC ( ctx, "WPA %p: sending 4/4\n", ctx );
+ else
+ DBGC ( ctx, "WPA %p: sending 2/2\n", ctx );
+
+ return wpa_send_eapol ( iob, ctx, kie );
+
+}
+
+
+/**
+ * Handle receipt of third frame in 4-Way Handshake
+ *
+ * @v ctx WPA common context
+ * @v pkt EAPOL-Key packet
+ * @v is_rsn If TRUE, frame uses new RSN format
+ * @v kie Key integrity and encryption handler
+ * @ret rc Return status code
+ */
+static int wpa_handle_3_of_4 ( struct wpa_common_ctx *ctx,
+ struct eapol_key_pkt *pkt, int is_rsn,
+ struct wpa_kie *kie )
+{
+ int rc;
+ u8 *this_rsn, *this_rsn_end;
+ u8 *new_rsn, *new_rsn_end;
+ int this_is_rsn, new_is_rsn;
+
+ if ( ctx->state == WPA_WAITING )
+ return -EINVAL;
+
+ ctx->state = WPA_WORKING;
+
+ /* Check nonce */
+ if ( memcmp ( ctx->Anonce, pkt->nonce, WPA_NONCE_LEN ) != 0 ) {
+ DBGC ( ctx, "WPA %p ALERT: nonce mismatch in 3/4\n", ctx );
+ return wpa_fail ( ctx, -EACCES );
+ }
+
+ /* Check RSN IE */
+ this_rsn = sec80211_find_rsn ( ( union ieee80211_ie * ) pkt->data,
+ pkt->data + pkt->datalen,
+ &this_is_rsn, &this_rsn_end );
+ if ( this_rsn )
+ new_rsn = sec80211_find_rsn ( ( union ieee80211_ie * )
+ this_rsn_end,
+ pkt->data + pkt->datalen,
+ &new_is_rsn, &new_rsn_end );
+ else
+ new_rsn = NULL;
+
+ if ( ! ctx->ap_rsn_ie || ! this_rsn ||
+ ctx->ap_rsn_ie_len != ( this_rsn_end - this_rsn ) ||
+ ctx->ap_rsn_is_rsn != this_is_rsn ||
+ memcmp ( ctx->ap_rsn_ie, this_rsn, ctx->ap_rsn_ie_len ) != 0 ) {
+ DBGC ( ctx, "WPA %p ALERT: RSN mismatch in 3/4\n", ctx );
+ DBGC2 ( ctx, "WPA %p RSNs (in 3/4, in beacon):\n", ctx );
+ DBGC2_HD ( ctx, this_rsn, this_rsn_end - this_rsn );
+ DBGC2_HD ( ctx, ctx->ap_rsn_ie, ctx->ap_rsn_ie_len );
+ return wpa_fail ( ctx, -EACCES );
+ }
+
+ /* Don't switch if they just supplied both styles of IE
+ simultaneously; we need two RSN IEs or two WPA IEs to
+ switch ciphers. They'll be immediately consecutive because
+ of ordering guarantees. */
+ if ( new_rsn && this_is_rsn == new_is_rsn ) {
+ struct net80211_wlan *assoc = ctx->dev->associating;
+ DBGC ( ctx, "WPA %p: accommodating bait-and-switch tactics\n",
+ ctx );
+ DBGC2 ( ctx, "WPA %p RSNs (in 3/4+beacon, new in 3/4):\n",
+ ctx );
+ DBGC2_HD ( ctx, this_rsn, this_rsn_end - this_rsn );
+ DBGC2_HD ( ctx, new_rsn, new_rsn_end - new_rsn );
+
+ if ( ( rc = sec80211_detect_ie ( new_is_rsn, new_rsn,
+ new_rsn_end,
+ &assoc->handshaking,
+ &assoc->crypto ) ) != 0 )
+ DBGC ( ctx, "WPA %p: bait-and-switch invalid, staying "
+ "with original request\n", ctx );
+ } else {
+ new_rsn = this_rsn;
+ new_is_rsn = this_is_rsn;
+ new_rsn_end = this_rsn_end;
+ }
+
+ /* Grab group cryptosystem ID */
+ ctx->gcrypt = sec80211_rsn_get_net80211_crypt ( *( u32 * )
+ ( new_rsn + 2 ) );
+
+ /* Check for a GTK, if info field is encrypted */
+ if ( pkt->info & EAPOL_KEY_INFO_KEY_ENC ) {
+ rc = wpa_maybe_install_gtk ( ctx,
+ ( union ieee80211_ie * ) pkt->data,
+ pkt->data + pkt->datalen,
+ pkt->rsc );
+ if ( rc < 0 ) {
+ DBGC ( ctx, "WPA %p did not install GTK in 3/4: %s\n",
+ ctx, strerror ( rc ) );
+ if ( rc != -ENOENT )
+ return wpa_fail ( ctx, rc );
+ }
+ }
+
+ DBGC ( ctx, "WPA %p: received 3/4, looks OK\n", ctx );
+
+ /* Send final message */
+ rc = wpa_send_final ( ctx, pkt, is_rsn, kie );
+ if ( rc < 0 )
+ return wpa_fail ( ctx, rc );
+
+ /* Install PTK */
+ rc = wpa_install_ptk ( ctx, pkt->keysize );
+ if ( rc < 0 ) {
+ DBGC ( ctx, "WPA %p failed to install PTK: %s\n", ctx,
+ strerror ( rc ) );
+ return wpa_fail ( ctx, rc );
+ }
+
+ /* Mark us as needing a new Snonce if we rekey */
+ ctx->have_Snonce = 0;
+
+ /* Done! */
+ ctx->state = WPA_SUCCESS;
+ return 0;
+}
+
+
+/**
+ * Handle receipt of first frame in Group Key Handshake
+ *
+ * @v ctx WPA common context
+ * @v pkt EAPOL-Key packet
+ * @v is_rsn If TRUE, frame uses new RSN format
+ * @v kie Key integrity and encryption handler
+ * @ret rc Return status code
+ */
+static int wpa_handle_1_of_2 ( struct wpa_common_ctx *ctx,
+ struct eapol_key_pkt *pkt, int is_rsn,
+ struct wpa_kie *kie )
+{
+ int rc;
+
+ /*
+ * WPA and RSN do this completely differently.
+ *
+ * The idea of encoding the GTK (or PMKID, or various other
+ * things) into a KDE that looks like an information element
+ * is an RSN innovation; old WPA code never encapsulates
+ * things like that. If it looks like an info element, it
+ * really is (for the WPA IE check in frames 2/4 and 3/4). The
+ * "key data encrypted" bit in the info field is also specific
+ * to RSN.
+ *
+ * So from an old WPA host, 3/4 does not contain an
+ * encapsulated GTK. The first frame of the GK handshake
+ * contains it, encrypted, but without a KDE wrapper, and with
+ * the key ID field (which iPXE doesn't use) shoved away in
+ * the reserved bits in the info field, and the TxRx bit
+ * stealing the Install bit's spot.
+ */
+
+ if ( is_rsn && ( pkt->info & EAPOL_KEY_INFO_KEY_ENC ) ) {
+ rc = wpa_maybe_install_gtk ( ctx,
+ ( union ieee80211_ie * ) pkt->data,
+ pkt->data + pkt->datalen,
+ pkt->rsc );
+ if ( rc < 0 ) {
+ DBGC ( ctx, "WPA %p: failed to install GTK in 1/2: "
+ "%s\n", ctx, strerror ( rc ) );
+ return wpa_fail ( ctx, rc );
+ }
+ } else {
+ rc = kie->decrypt ( &ctx->ptk.kek, pkt->iv, pkt->data,
+ &pkt->datalen );
+ if ( rc < 0 ) {
+ DBGC ( ctx, "WPA %p: failed to decrypt GTK: %s\n",
+ ctx, strerror ( rc ) );
+ return rc; /* non-fatal */
+ }
+ if ( pkt->datalen > sizeof ( ctx->gtk.tk ) ) {
+ DBGC ( ctx, "WPA %p: too much GTK data (%d > %zd)\n",
+ ctx, pkt->datalen, sizeof ( ctx->gtk.tk ) );
+ return wpa_fail ( ctx, -EINVAL );
+ }
+
+ memcpy ( &ctx->gtk.tk, pkt->data, pkt->datalen );
+ wpa_install_gtk ( ctx, pkt->datalen, pkt->rsc );
+ }
+
+ DBGC ( ctx, "WPA %p: received 1/2, looks OK\n", ctx );
+
+ return wpa_send_final ( ctx, pkt, is_rsn, kie );
+}
+
+
+/**
+ * Handle receipt of EAPOL-Key frame for WPA
+ *
+ * @v iob I/O buffer
+ * @v netdev Network device
+ * @v ll_dest Link-layer destination address
+ * @v ll_source Source link-layer address
+ */
+static int eapol_key_rx ( struct io_buffer *iob, struct net_device *netdev,
+ const void *ll_dest __unused,
+ const void *ll_source )
+{
+ struct net80211_device *dev = net80211_get ( netdev );
+ struct eapol_key_pkt *pkt = iob->data;
+ int is_rsn, found_ctx;
+ struct wpa_common_ctx *ctx;
+ int rc = 0;
+ struct wpa_kie *kie;
+ u8 their_mic[16], our_mic[16];
+
+ if ( pkt->type != EAPOL_KEY_TYPE_WPA &&
+ pkt->type != EAPOL_KEY_TYPE_RSN ) {
+ DBG ( "EAPOL-Key: packet not of 802.11 type\n" );
+ rc = -EINVAL;
+ goto drop;
+ }
+
+ is_rsn = ( pkt->type == EAPOL_KEY_TYPE_RSN );
+
+ if ( ! dev ) {
+ DBG ( "EAPOL-Key: packet not from 802.11\n" );
+ rc = -EINVAL;
+ goto drop;
+ }
+
+ if ( memcmp ( dev->bssid, ll_source, ETH_ALEN ) != 0 ) {
+ DBG ( "EAPOL-Key: packet not from associated AP\n" );
+ rc = -EINVAL;
+ goto drop;
+ }
+
+ if ( ! ( ntohs ( pkt->info ) & EAPOL_KEY_INFO_KEY_ACK ) ) {
+ DBG ( "EAPOL-Key: packet sent in wrong direction\n" );
+ rc = -EINVAL;
+ goto drop;
+ }
+
+ found_ctx = 0;
+ list_for_each_entry ( ctx, &wpa_contexts, list ) {
+ if ( ctx->dev == dev ) {
+ found_ctx = 1;
+ break;
+ }
+ }
+
+ if ( ! found_ctx ) {
+ DBG ( "EAPOL-Key: no WPA context to handle packet for %p\n",
+ dev );
+ rc = -ENOENT;
+ goto drop;
+ }
+
+ if ( ( void * ) ( pkt + 1 ) + ntohs ( pkt->datalen ) > iob->tail ) {
+ DBGC ( ctx, "WPA %p: packet truncated (has %zd extra bytes, "
+ "states %d)\n", ctx, iob->tail - ( void * ) ( pkt + 1 ),
+ ntohs ( pkt->datalen ) );
+ rc = -EINVAL;
+ goto drop;
+ }
+
+ /* Get a handle on key integrity/encryption handler */
+ kie = wpa_find_kie ( ntohs ( pkt->info ) & EAPOL_KEY_INFO_VERSION );
+ if ( ! kie ) {
+ DBGC ( ctx, "WPA %p: no support for packet version %d\n", ctx,
+ ntohs ( pkt->info ) & EAPOL_KEY_INFO_VERSION );
+ rc = wpa_fail ( ctx, -ENOTSUP );
+ goto drop;
+ }
+
+ /* Check MIC */
+ if ( ntohs ( pkt->info ) & EAPOL_KEY_INFO_KEY_MIC ) {
+ memcpy ( their_mic, pkt->mic, sizeof ( pkt->mic ) );
+ memset ( pkt->mic, 0, sizeof ( pkt->mic ) );
+ kie->mic ( &ctx->ptk.kck, ( void * ) pkt - EAPOL_HDR_LEN,
+ EAPOL_HDR_LEN + sizeof ( *pkt ) +
+ ntohs ( pkt->datalen ), our_mic );
+ DBGC2 ( ctx, "WPA %p MIC comparison (theirs, ours):\n", ctx );
+ DBGC2_HD ( ctx, their_mic, 16 );
+ DBGC2_HD ( ctx, our_mic, 16 );
+ if ( memcmp ( their_mic, our_mic, sizeof ( pkt->mic ) ) != 0 ) {
+ DBGC ( ctx, "WPA %p: EAPOL MIC failure\n", ctx );
+ goto drop;
+ }
+ }
+
+ /* Fix byte order to local */
+ pkt->info = ntohs ( pkt->info );
+ pkt->keysize = ntohs ( pkt->keysize );
+ pkt->datalen = ntohs ( pkt->datalen );
+ pkt->replay = be64_to_cpu ( pkt->replay );
+
+ /* Check replay counter */
+ if ( ctx->replay != ~0ULL && ctx->replay >= pkt->replay ) {
+ DBGC ( ctx, "WPA %p ALERT: Replay detected! "
+ "(%08x:%08x >= %08x:%08x)\n", ctx,
+ ( u32 ) ( ctx->replay >> 32 ), ( u32 ) ctx->replay,
+ ( u32 ) ( pkt->replay >> 32 ), ( u32 ) pkt->replay );
+ rc = 0; /* ignore without error */
+ goto drop;
+ }
+ ctx->replay = pkt->replay;
+
+ /* Decrypt key data */
+ if ( pkt->info & EAPOL_KEY_INFO_KEY_ENC ) {
+ rc = kie->decrypt ( &ctx->ptk.kek, pkt->iv, pkt->data,
+ &pkt->datalen );
+ if ( rc < 0 ) {
+ DBGC ( ctx, "WPA %p: failed to decrypt packet: %s\n",
+ ctx, strerror ( rc ) );
+ goto drop;
+ }
+ }
+
+ /* Hand it off to appropriate handler */
+ switch ( pkt->info & ( EAPOL_KEY_INFO_TYPE |
+ EAPOL_KEY_INFO_KEY_MIC ) ) {
+ case EAPOL_KEY_TYPE_PTK:
+ rc = wpa_handle_1_of_4 ( ctx, pkt, is_rsn, kie );
+ break;
+
+ case EAPOL_KEY_TYPE_PTK | EAPOL_KEY_INFO_KEY_MIC:
+ rc = wpa_handle_3_of_4 ( ctx, pkt, is_rsn, kie );
+ break;
+
+ case EAPOL_KEY_TYPE_GTK | EAPOL_KEY_INFO_KEY_MIC:
+ rc = wpa_handle_1_of_2 ( ctx, pkt, is_rsn, kie );
+ break;
+
+ default:
+ DBGC ( ctx, "WPA %p: Invalid combination of key flags %04x\n",
+ ctx, pkt->info );
+ rc = -EINVAL;
+ break;
+ }
+
+ drop:
+ free_iob ( iob );
+ return rc;
+}
+
+struct eapol_handler eapol_key_handler __eapol_handler = {
+ .type = EAPOL_TYPE_KEY,
+ .rx = eapol_key_rx,
+};
+
+/* WPA always needs EAPOL in order to be useful */
+REQUIRE_OBJECT ( eapol );
diff --git a/qemu/roms/ipxe/src/net/80211/wpa_ccmp.c b/qemu/roms/ipxe/src/net/80211/wpa_ccmp.c
new file mode 100644
index 000000000..f98ebea26
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/80211/wpa_ccmp.c
@@ -0,0 +1,530 @@
+/*
+ * Copyright (c) 2009 Joshua Oreman <oremanj@rwcr.net>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <string.h>
+#include <ipxe/net80211.h>
+#include <ipxe/crypto.h>
+#include <ipxe/hmac.h>
+#include <ipxe/sha1.h>
+#include <ipxe/aes.h>
+#include <ipxe/wpa.h>
+#include <byteswap.h>
+#include <errno.h>
+
+/** @file
+ *
+ * Backend for WPA using the CCMP encryption method
+ */
+
+/** Context for CCMP encryption and decryption */
+struct ccmp_ctx
+{
+ /** AES context - only ever used for encryption */
+ u8 aes_ctx[AES_CTX_SIZE];
+
+ /** Most recently sent packet number */
+ u64 tx_seq;
+
+ /** Most recently received packet number */
+ u64 rx_seq;
+};
+
+/** Header structure at the beginning of CCMP frame data */
+struct ccmp_head
+{
+ u8 pn_lo[2]; /**< Bytes 0 and 1 of packet number */
+ u8 _rsvd; /**< Reserved byte */
+ u8 kid; /**< Key ID and ExtIV byte */
+ u8 pn_hi[4]; /**< Bytes 2-5 (2 first) of packet number */
+} __attribute__ (( packed ));
+
+
+/** CCMP header overhead */
+#define CCMP_HEAD_LEN 8
+
+/** CCMP MIC trailer overhead */
+#define CCMP_MIC_LEN 8
+
+/** CCMP nonce length */
+#define CCMP_NONCE_LEN 13
+
+/** CCMP nonce structure */
+struct ccmp_nonce
+{
+ u8 prio; /**< Packet priority, 0 for non-QoS */
+ u8 a2[ETH_ALEN]; /**< Address 2 from packet header (sender) */
+ u8 pn[6]; /**< Packet number */
+} __attribute__ (( packed ));
+
+/** CCMP additional authentication data length (for non-QoS, non-WDS frames) */
+#define CCMP_AAD_LEN 22
+
+/** CCMP additional authentication data structure */
+struct ccmp_aad
+{
+ u16 fc; /**< Frame Control field */
+ u8 a1[6]; /**< Address 1 */
+ u8 a2[6]; /**< Address 2 */
+ u8 a3[6]; /**< Address 3 */
+ u16 seq; /**< Sequence Control field */
+ /* Address 4 and QoS Control are included if present */
+} __attribute__ (( packed ));
+
+/** Mask for Frame Control field in AAD */
+#define CCMP_AAD_FC_MASK 0xC38F
+
+/** Mask for Sequence Control field in AAD */
+#define CCMP_AAD_SEQ_MASK 0x000F
+
+
+/**
+ * Convert 6-byte LSB packet number to 64-bit integer
+ *
+ * @v pn Pointer to 6-byte packet number
+ * @ret v 64-bit integer value of @a pn
+ */
+static u64 pn_to_u64 ( const u8 *pn )
+{
+ int i;
+ u64 ret = 0;
+
+ for ( i = 5; i >= 0; i-- ) {
+ ret <<= 8;
+ ret |= pn[i];
+ }
+
+ return ret;
+}
+
+/**
+ * Convert 64-bit integer to 6-byte packet number
+ *
+ * @v v 64-bit integer
+ * @v msb If TRUE, reverse the output PN to be in MSB order
+ * @ret pn 6-byte packet number
+ *
+ * The PN is stored in LSB order in the packet header and in MSB order
+ * in the nonce. WHYYYYY?
+ */
+static void u64_to_pn ( u64 v, u8 *pn, int msb )
+{
+ int i;
+ u8 *pnp = pn + ( msb ? 5 : 0 );
+ int delta = ( msb ? -1 : +1 );
+
+ for ( i = 0; i < 6; i++ ) {
+ *pnp = v & 0xFF;
+ pnp += delta;
+ v >>= 8;
+ }
+}
+
+/** Value for @a msb argument of u64_to_pn() for MSB output */
+#define PN_MSB 1
+
+/** Value for @a msb argument of u64_to_pn() for LSB output */
+#define PN_LSB 0
+
+
+
+/**
+ * Initialise CCMP state and install key
+ *
+ * @v crypto CCMP cryptosystem structure
+ * @v key Pointer to 16-byte temporal key to install
+ * @v keylen Length of key (16 bytes)
+ * @v rsc Initial receive sequence counter
+ */
+static int ccmp_init ( struct net80211_crypto *crypto, const void *key,
+ int keylen, const void *rsc )
+{
+ struct ccmp_ctx *ctx = crypto->priv;
+
+ if ( keylen != 16 )
+ return -EINVAL;
+
+ if ( rsc )
+ ctx->rx_seq = pn_to_u64 ( rsc );
+
+ cipher_setkey ( &aes_algorithm, ctx->aes_ctx, key, keylen );
+
+ return 0;
+}
+
+
+/**
+ * Encrypt or decrypt data stream using AES in Counter mode
+ *
+ * @v ctx CCMP cryptosystem context
+ * @v nonce Nonce value, 13 bytes
+ * @v srcv Data to encrypt or decrypt
+ * @v len Number of bytes pointed to by @a src
+ * @v msrcv MIC value to encrypt or decrypt (may be NULL)
+ * @ret destv Encrypted or decrypted data
+ * @ret mdestv Encrypted or decrypted MIC value
+ *
+ * This assumes CCMP parameters of L=2 and M=8. The algorithm is
+ * defined in RFC 3610.
+ */
+static void ccmp_ctr_xor ( struct ccmp_ctx *ctx, const void *nonce,
+ const void *srcv, void *destv, int len,
+ const void *msrcv, void *mdestv )
+{
+ u8 A[16], S[16];
+ u16 ctr;
+ int i;
+ const u8 *src = srcv, *msrc = msrcv;
+ u8 *dest = destv, *mdest = mdestv;
+
+ A[0] = 0x01; /* flags, L' = L - 1 = 1, other bits rsvd */
+ memcpy ( A + 1, nonce, CCMP_NONCE_LEN );
+
+ if ( msrcv ) {
+ A[14] = A[15] = 0;
+
+ cipher_encrypt ( &aes_algorithm, ctx->aes_ctx, A, S, 16 );
+
+ for ( i = 0; i < 8; i++ ) {
+ *mdest++ = *msrc++ ^ S[i];
+ }
+ }
+
+ for ( ctr = 1 ;; ctr++ ) {
+ A[14] = ctr >> 8;
+ A[15] = ctr & 0xFF;
+
+ cipher_encrypt ( &aes_algorithm, ctx->aes_ctx, A, S, 16 );
+
+ for ( i = 0; i < len && i < 16; i++ )
+ *dest++ = *src++ ^ S[i];
+
+ if ( len <= 16 )
+ break; /* we're done */
+
+ len -= 16;
+ }
+}
+
+
+/**
+ * Advance one block in CBC-MAC calculation
+ *
+ * @v aes_ctx AES encryption context with key set
+ * @v B Cleartext block to incorporate (16 bytes)
+ * @v X Previous ciphertext block (16 bytes)
+ * @ret B Clobbered
+ * @ret X New ciphertext block (16 bytes)
+ *
+ * This function does X := E[key] ( X ^ B ).
+ */
+static void ccmp_feed_cbc_mac ( void *aes_ctx, u8 *B, u8 *X )
+{
+ int i;
+ for ( i = 0; i < 16; i++ )
+ B[i] ^= X[i];
+ cipher_encrypt ( &aes_algorithm, aes_ctx, B, X, 16 );
+}
+
+
+/**
+ * Calculate MIC on plaintext data using CBC-MAC
+ *
+ * @v ctx CCMP cryptosystem context
+ * @v nonce Nonce value, 13 bytes
+ * @v data Data to calculate MIC over
+ * @v datalen Length of @a data
+ * @v aad Additional authentication data, for MIC but not encryption
+ * @ret mic MIC value (unencrypted), 8 bytes
+ *
+ * @a aadlen is assumed to be 22 bytes long, as it always is for
+ * 802.11 use when transmitting non-QoS, not-between-APs frames (the
+ * only type we deal with).
+ */
+static void ccmp_cbc_mac ( struct ccmp_ctx *ctx, const void *nonce,
+ const void *data, u16 datalen,
+ const void *aad, void *mic )
+{
+ u8 X[16], B[16];
+
+ /* Zeroth block: flags, nonce, length */
+
+ /* Rsv AAD - M'- - L'-
+ * 0 1 0 1 1 0 0 1 for an 8-byte MAC and 2-byte message length
+ */
+ B[0] = 0x59;
+ memcpy ( B + 1, nonce, CCMP_NONCE_LEN );
+ B[14] = datalen >> 8;
+ B[15] = datalen & 0xFF;
+
+ cipher_encrypt ( &aes_algorithm, ctx->aes_ctx, B, X, 16 );
+
+ /* First block: AAD length field and 14 bytes of AAD */
+ B[0] = 0;
+ B[1] = CCMP_AAD_LEN;
+ memcpy ( B + 2, aad, 14 );
+
+ ccmp_feed_cbc_mac ( ctx->aes_ctx, B, X );
+
+ /* Second block: Remaining 8 bytes of AAD, 8 bytes zero pad */
+ memcpy ( B, aad + 14, 8 );
+ memset ( B + 8, 0, 8 );
+
+ ccmp_feed_cbc_mac ( ctx->aes_ctx, B, X );
+
+ /* Message blocks */
+ while ( datalen ) {
+ if ( datalen >= 16 ) {
+ memcpy ( B, data, 16 );
+ datalen -= 16;
+ } else {
+ memcpy ( B, data, datalen );
+ memset ( B + datalen, 0, 16 - datalen );
+ datalen = 0;
+ }
+
+ ccmp_feed_cbc_mac ( ctx->aes_ctx, B, X );
+
+ data += 16;
+ }
+
+ /* Get MIC from final value of X */
+ memcpy ( mic, X, 8 );
+}
+
+
+/**
+ * Encapsulate and encrypt a packet using CCMP
+ *
+ * @v crypto CCMP cryptosystem
+ * @v iob I/O buffer containing cleartext packet
+ * @ret eiob I/O buffer containing encrypted packet
+ */
+struct io_buffer * ccmp_encrypt ( struct net80211_crypto *crypto,
+ struct io_buffer *iob )
+{
+ struct ccmp_ctx *ctx = crypto->priv;
+ struct ieee80211_frame *hdr = iob->data;
+ struct io_buffer *eiob;
+ const int hdrlen = IEEE80211_TYP_FRAME_HEADER_LEN;
+ int datalen = iob_len ( iob ) - hdrlen;
+ struct ccmp_head head;
+ struct ccmp_nonce nonce;
+ struct ccmp_aad aad;
+ u8 mic[8], tx_pn[6];
+ void *edata, *emic;
+
+ ctx->tx_seq++;
+ u64_to_pn ( ctx->tx_seq, tx_pn, PN_LSB );
+
+ /* Allocate memory */
+ eiob = alloc_iob ( iob_len ( iob ) + CCMP_HEAD_LEN + CCMP_MIC_LEN );
+ if ( ! eiob )
+ return NULL;
+
+ /* Copy frame header */
+ memcpy ( iob_put ( eiob, hdrlen ), iob->data, hdrlen );
+ hdr = eiob->data;
+ hdr->fc |= IEEE80211_FC_PROTECTED;
+
+ /* Fill in packet number and extended IV */
+ memcpy ( head.pn_lo, tx_pn, 2 );
+ memcpy ( head.pn_hi, tx_pn + 2, 4 );
+ head.kid = 0x20; /* have Extended IV, key ID 0 */
+ head._rsvd = 0;
+ memcpy ( iob_put ( eiob, sizeof ( head ) ), &head, sizeof ( head ) );
+
+ /* Form nonce */
+ nonce.prio = 0;
+ memcpy ( nonce.a2, hdr->addr2, ETH_ALEN );
+ u64_to_pn ( ctx->tx_seq, nonce.pn, PN_MSB );
+
+ /* Form additional authentication data */
+ aad.fc = hdr->fc & CCMP_AAD_FC_MASK;
+ memcpy ( aad.a1, hdr->addr1, 3 * ETH_ALEN ); /* all 3 at once */
+ aad.seq = hdr->seq & CCMP_AAD_SEQ_MASK;
+
+ /* Calculate MIC over the data */
+ ccmp_cbc_mac ( ctx, &nonce, iob->data + hdrlen, datalen, &aad, mic );
+
+ /* Copy and encrypt data and MIC */
+ edata = iob_put ( eiob, datalen );
+ emic = iob_put ( eiob, CCMP_MIC_LEN );
+ ccmp_ctr_xor ( ctx, &nonce,
+ iob->data + hdrlen, edata, datalen,
+ mic, emic );
+
+ /* Done! */
+ DBGC2 ( ctx, "WPA-CCMP %p: encrypted packet %p -> %p\n", ctx,
+ iob, eiob );
+
+ return eiob;
+}
+
+/**
+ * Decrypt a packet using CCMP
+ *
+ * @v crypto CCMP cryptosystem
+ * @v eiob I/O buffer containing encrypted packet
+ * @ret iob I/O buffer containing cleartext packet
+ */
+static struct io_buffer * ccmp_decrypt ( struct net80211_crypto *crypto,
+ struct io_buffer *eiob )
+{
+ struct ccmp_ctx *ctx = crypto->priv;
+ struct ieee80211_frame *hdr;
+ struct io_buffer *iob;
+ const int hdrlen = IEEE80211_TYP_FRAME_HEADER_LEN;
+ int datalen = iob_len ( eiob ) - hdrlen - CCMP_HEAD_LEN - CCMP_MIC_LEN;
+ struct ccmp_head *head;
+ struct ccmp_nonce nonce;
+ struct ccmp_aad aad;
+ u8 rx_pn[6], their_mic[8], our_mic[8];
+
+ iob = alloc_iob ( hdrlen + datalen );
+ if ( ! iob )
+ return NULL;
+
+ /* Copy frame header */
+ memcpy ( iob_put ( iob, hdrlen ), eiob->data, hdrlen );
+ hdr = iob->data;
+ hdr->fc &= ~IEEE80211_FC_PROTECTED;
+
+ /* Check and update RX packet number */
+ head = eiob->data + hdrlen;
+ memcpy ( rx_pn, head->pn_lo, 2 );
+ memcpy ( rx_pn + 2, head->pn_hi, 4 );
+
+ if ( pn_to_u64 ( rx_pn ) <= ctx->rx_seq ) {
+ DBGC ( ctx, "WPA-CCMP %p: packet received out of order "
+ "(%012llx <= %012llx)\n", ctx, pn_to_u64 ( rx_pn ),
+ ctx->rx_seq );
+ free_iob ( iob );
+ return NULL;
+ }
+
+ ctx->rx_seq = pn_to_u64 ( rx_pn );
+ DBGC2 ( ctx, "WPA-CCMP %p: RX packet number %012llx\n", ctx, ctx->rx_seq );
+
+ /* Form nonce */
+ nonce.prio = 0;
+ memcpy ( nonce.a2, hdr->addr2, ETH_ALEN );
+ u64_to_pn ( ctx->rx_seq, nonce.pn, PN_MSB );
+
+ /* Form additional authentication data */
+ aad.fc = ( hdr->fc & CCMP_AAD_FC_MASK ) | IEEE80211_FC_PROTECTED;
+ memcpy ( aad.a1, hdr->addr1, 3 * ETH_ALEN ); /* all 3 at once */
+ aad.seq = hdr->seq & CCMP_AAD_SEQ_MASK;
+
+ /* Copy-decrypt data and MIC */
+ ccmp_ctr_xor ( ctx, &nonce, eiob->data + hdrlen + sizeof ( *head ),
+ iob_put ( iob, datalen ), datalen,
+ eiob->tail - CCMP_MIC_LEN, their_mic );
+
+ /* Check MIC */
+ ccmp_cbc_mac ( ctx, &nonce, iob->data + hdrlen, datalen, &aad,
+ our_mic );
+
+ if ( memcmp ( their_mic, our_mic, CCMP_MIC_LEN ) != 0 ) {
+ DBGC2 ( ctx, "WPA-CCMP %p: MIC failure\n", ctx );
+ free_iob ( iob );
+ return NULL;
+ }
+
+ DBGC2 ( ctx, "WPA-CCMP %p: decrypted packet %p -> %p\n", ctx,
+ eiob, iob );
+
+ return iob;
+}
+
+
+/** CCMP cryptosystem */
+struct net80211_crypto ccmp_crypto __net80211_crypto = {
+ .algorithm = NET80211_CRYPT_CCMP,
+ .init = ccmp_init,
+ .encrypt = ccmp_encrypt,
+ .decrypt = ccmp_decrypt,
+ .priv_len = sizeof ( struct ccmp_ctx ),
+};
+
+
+
+
+/**
+ * Calculate HMAC-SHA1 MIC for EAPOL-Key frame
+ *
+ * @v kck Key Confirmation Key, 16 bytes
+ * @v msg Message to calculate MIC over
+ * @v len Number of bytes to calculate MIC over
+ * @ret mic Calculated MIC, 16 bytes long
+ */
+static void ccmp_kie_mic ( const void *kck, const void *msg, size_t len,
+ void *mic )
+{
+ u8 sha1_ctx[SHA1_CTX_SIZE];
+ u8 kckb[16];
+ u8 hash[SHA1_SIZE];
+ size_t kck_len = 16;
+
+ memcpy ( kckb, kck, kck_len );
+
+ hmac_init ( &sha1_algorithm, sha1_ctx, kckb, &kck_len );
+ hmac_update ( &sha1_algorithm, sha1_ctx, msg, len );
+ hmac_final ( &sha1_algorithm, sha1_ctx, kckb, &kck_len, hash );
+
+ memcpy ( mic, hash, 16 );
+}
+
+/**
+ * Decrypt key data in EAPOL-Key frame
+ *
+ * @v kek Key Encryption Key, 16 bytes
+ * @v iv Initialisation vector, 16 bytes (unused)
+ * @v msg Message to decrypt
+ * @v len Length of message
+ * @ret msg Decrypted message in place of original
+ * @ret len Adjusted downward for 8 bytes of overhead
+ * @ret rc Return status code
+ *
+ * The returned message may still contain padding of 0xDD followed by
+ * zero or more 0x00 octets. It is impossible to remove the padding
+ * without parsing the IEs in the packet (another design decision that
+ * tends to make one question the 802.11i committee's intelligence...)
+ */
+static int ccmp_kie_decrypt ( const void *kek, const void *iv __unused,
+ void *msg, u16 *len )
+{
+ if ( *len % 8 != 0 )
+ return -EINVAL;
+
+ if ( aes_unwrap ( kek, msg, msg, *len / 8 - 1 ) != 0 )
+ return -EINVAL;
+
+ *len -= 8;
+
+ return 0;
+}
+
+/** CCMP-style key integrity and encryption handler */
+struct wpa_kie ccmp_kie __wpa_kie = {
+ .version = EAPOL_KEY_VERSION_WPA2,
+ .mic = ccmp_kie_mic,
+ .decrypt = ccmp_kie_decrypt,
+};
diff --git a/qemu/roms/ipxe/src/net/80211/wpa_psk.c b/qemu/roms/ipxe/src/net/80211/wpa_psk.c
new file mode 100644
index 000000000..71190b139
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/80211/wpa_psk.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2009 Joshua Oreman <oremanj@rwcr.net>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <string.h>
+#include <ipxe/net80211.h>
+#include <ipxe/sha1.h>
+#include <ipxe/wpa.h>
+#include <errno.h>
+
+/** @file
+ *
+ * Frontend for WPA using a pre-shared key.
+ */
+
+/**
+ * Initialise WPA-PSK state
+ *
+ * @v dev 802.11 device
+ * @ret rc Return status code
+ */
+static int wpa_psk_init ( struct net80211_device *dev )
+{
+ return wpa_make_rsn_ie ( dev, &dev->rsn_ie );
+}
+
+/**
+ * Start WPA-PSK authentication
+ *
+ * @v dev 802.11 device
+ * @ret rc Return status code
+ */
+static int wpa_psk_start ( struct net80211_device *dev )
+{
+ char passphrase[64+1];
+ u8 pmk[WPA_PMK_LEN];
+ int len;
+ struct wpa_common_ctx *ctx = dev->handshaker->priv;
+
+ len = fetch_string_setting ( netdev_settings ( dev->netdev ),
+ &net80211_key_setting, passphrase,
+ 64 + 1 );
+
+ if ( len <= 0 ) {
+ DBGC ( ctx, "WPA-PSK %p: no passphrase provided!\n", ctx );
+ net80211_deauthenticate ( dev, -EACCES );
+ return -EACCES;
+ }
+
+ pbkdf2_sha1 ( passphrase, len, dev->essid, strlen ( dev->essid ),
+ 4096, pmk, WPA_PMK_LEN );
+
+ DBGC ( ctx, "WPA-PSK %p: derived PMK from passphrase `%s':\n", ctx,
+ passphrase );
+ DBGC_HD ( ctx, pmk, WPA_PMK_LEN );
+
+ return wpa_start ( dev, ctx, pmk, WPA_PMK_LEN );
+}
+
+/**
+ * Step WPA-PSK authentication
+ *
+ * @v dev 802.11 device
+ * @ret rc Return status code
+ */
+static int wpa_psk_step ( struct net80211_device *dev )
+{
+ struct wpa_common_ctx *ctx = dev->handshaker->priv;
+
+ switch ( ctx->state ) {
+ case WPA_SUCCESS:
+ return 1;
+ case WPA_FAILURE:
+ return -EACCES;
+ default:
+ return 0;
+ }
+}
+
+/**
+ * Do-nothing function; you can't change a WPA key post-authentication
+ *
+ * @v dev 802.11 device
+ * @ret rc Return status code
+ */
+static int wpa_psk_no_change_key ( struct net80211_device *dev __unused )
+{
+ return 0;
+}
+
+/**
+ * Disable handling of received WPA authentication frames
+ *
+ * @v dev 802.11 device
+ */
+static void wpa_psk_stop ( struct net80211_device *dev )
+{
+ wpa_stop ( dev );
+}
+
+/** WPA-PSK security handshaker */
+struct net80211_handshaker wpa_psk_handshaker __net80211_handshaker = {
+ .protocol = NET80211_SECPROT_PSK,
+ .init = wpa_psk_init,
+ .start = wpa_psk_start,
+ .step = wpa_psk_step,
+ .change_key = wpa_psk_no_change_key,
+ .stop = wpa_psk_stop,
+ .priv_len = sizeof ( struct wpa_common_ctx ),
+};
diff --git a/qemu/roms/ipxe/src/net/80211/wpa_tkip.c b/qemu/roms/ipxe/src/net/80211/wpa_tkip.c
new file mode 100644
index 000000000..fa3e0763b
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/80211/wpa_tkip.c
@@ -0,0 +1,588 @@
+/*
+ * Copyright (c) 2009 Joshua Oreman <oremanj@rwcr.net>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <string.h>
+#include <ipxe/net80211.h>
+#include <ipxe/crypto.h>
+#include <ipxe/hmac.h>
+#include <ipxe/sha1.h>
+#include <ipxe/md5.h>
+#include <ipxe/crc32.h>
+#include <ipxe/arc4.h>
+#include <ipxe/wpa.h>
+#include <byteswap.h>
+#include <errno.h>
+
+/** @file
+ *
+ * Backend for WPA using the TKIP encryption standard.
+ */
+
+/** Context for one direction of TKIP, either encryption or decryption */
+struct tkip_dir_ctx
+{
+ /** High 32 bits of last sequence counter value used */
+ u32 tsc_hi;
+
+ /** Low 32 bits of last sequence counter value used */
+ u16 tsc_lo;
+
+ /** MAC address used to derive TTAK */
+ u8 mac[ETH_ALEN];
+
+ /** If TRUE, TTAK is valid */
+ u16 ttak_ok;
+
+ /** TKIP-mixed transmit address and key, depends on tsc_hi and MAC */
+ u16 ttak[5];
+};
+
+/** Context for TKIP encryption and decryption */
+struct tkip_ctx
+{
+ /** Temporal key to use */
+ struct tkip_tk tk;
+
+ /** State for encryption */
+ struct tkip_dir_ctx enc;
+
+ /** State for decryption */
+ struct tkip_dir_ctx dec;
+};
+
+/** Header structure at the beginning of TKIP frame data */
+struct tkip_head
+{
+ u8 tsc1; /**< High byte of low 16 bits of TSC */
+ u8 seed1; /**< Second byte of WEP seed */
+ u8 tsc0; /**< Low byte of TSC */
+ u8 kid; /**< Key ID and ExtIV byte */
+ u32 tsc_hi; /**< High 32 bits of TSC, as an ExtIV */
+} __attribute__ (( packed ));
+
+
+/** TKIP header overhead (IV + KID + ExtIV) */
+#define TKIP_HEAD_LEN 8
+
+/** TKIP trailer overhead (MIC + ICV) [assumes unfragmented] */
+#define TKIP_FOOT_LEN 12
+
+/** TKIP MIC length */
+#define TKIP_MIC_LEN 8
+
+/** TKIP ICV length */
+#define TKIP_ICV_LEN 4
+
+
+/** TKIP S-box */
+static const u16 Sbox[256] = {
+ 0xC6A5, 0xF884, 0xEE99, 0xF68D, 0xFF0D, 0xD6BD, 0xDEB1, 0x9154,
+ 0x6050, 0x0203, 0xCEA9, 0x567D, 0xE719, 0xB562, 0x4DE6, 0xEC9A,
+ 0x8F45, 0x1F9D, 0x8940, 0xFA87, 0xEF15, 0xB2EB, 0x8EC9, 0xFB0B,
+ 0x41EC, 0xB367, 0x5FFD, 0x45EA, 0x23BF, 0x53F7, 0xE496, 0x9B5B,
+ 0x75C2, 0xE11C, 0x3DAE, 0x4C6A, 0x6C5A, 0x7E41, 0xF502, 0x834F,
+ 0x685C, 0x51F4, 0xD134, 0xF908, 0xE293, 0xAB73, 0x6253, 0x2A3F,
+ 0x080C, 0x9552, 0x4665, 0x9D5E, 0x3028, 0x37A1, 0x0A0F, 0x2FB5,
+ 0x0E09, 0x2436, 0x1B9B, 0xDF3D, 0xCD26, 0x4E69, 0x7FCD, 0xEA9F,
+ 0x121B, 0x1D9E, 0x5874, 0x342E, 0x362D, 0xDCB2, 0xB4EE, 0x5BFB,
+ 0xA4F6, 0x764D, 0xB761, 0x7DCE, 0x527B, 0xDD3E, 0x5E71, 0x1397,
+ 0xA6F5, 0xB968, 0x0000, 0xC12C, 0x4060, 0xE31F, 0x79C8, 0xB6ED,
+ 0xD4BE, 0x8D46, 0x67D9, 0x724B, 0x94DE, 0x98D4, 0xB0E8, 0x854A,
+ 0xBB6B, 0xC52A, 0x4FE5, 0xED16, 0x86C5, 0x9AD7, 0x6655, 0x1194,
+ 0x8ACF, 0xE910, 0x0406, 0xFE81, 0xA0F0, 0x7844, 0x25BA, 0x4BE3,
+ 0xA2F3, 0x5DFE, 0x80C0, 0x058A, 0x3FAD, 0x21BC, 0x7048, 0xF104,
+ 0x63DF, 0x77C1, 0xAF75, 0x4263, 0x2030, 0xE51A, 0xFD0E, 0xBF6D,
+ 0x814C, 0x1814, 0x2635, 0xC32F, 0xBEE1, 0x35A2, 0x88CC, 0x2E39,
+ 0x9357, 0x55F2, 0xFC82, 0x7A47, 0xC8AC, 0xBAE7, 0x322B, 0xE695,
+ 0xC0A0, 0x1998, 0x9ED1, 0xA37F, 0x4466, 0x547E, 0x3BAB, 0x0B83,
+ 0x8CCA, 0xC729, 0x6BD3, 0x283C, 0xA779, 0xBCE2, 0x161D, 0xAD76,
+ 0xDB3B, 0x6456, 0x744E, 0x141E, 0x92DB, 0x0C0A, 0x486C, 0xB8E4,
+ 0x9F5D, 0xBD6E, 0x43EF, 0xC4A6, 0x39A8, 0x31A4, 0xD337, 0xF28B,
+ 0xD532, 0x8B43, 0x6E59, 0xDAB7, 0x018C, 0xB164, 0x9CD2, 0x49E0,
+ 0xD8B4, 0xACFA, 0xF307, 0xCF25, 0xCAAF, 0xF48E, 0x47E9, 0x1018,
+ 0x6FD5, 0xF088, 0x4A6F, 0x5C72, 0x3824, 0x57F1, 0x73C7, 0x9751,
+ 0xCB23, 0xA17C, 0xE89C, 0x3E21, 0x96DD, 0x61DC, 0x0D86, 0x0F85,
+ 0xE090, 0x7C42, 0x71C4, 0xCCAA, 0x90D8, 0x0605, 0xF701, 0x1C12,
+ 0xC2A3, 0x6A5F, 0xAEF9, 0x69D0, 0x1791, 0x9958, 0x3A27, 0x27B9,
+ 0xD938, 0xEB13, 0x2BB3, 0x2233, 0xD2BB, 0xA970, 0x0789, 0x33A7,
+ 0x2DB6, 0x3C22, 0x1592, 0xC920, 0x8749, 0xAAFF, 0x5078, 0xA57A,
+ 0x038F, 0x59F8, 0x0980, 0x1A17, 0x65DA, 0xD731, 0x84C6, 0xD0B8,
+ 0x82C3, 0x29B0, 0x5A77, 0x1E11, 0x7BCB, 0xA8FC, 0x6DD6, 0x2C3A,
+};
+
+/**
+ * Perform S-box mapping on a 16-bit value
+ *
+ * @v v Value to perform S-box mapping on
+ * @ret Sv S-box mapped value
+ */
+static inline u16 S ( u16 v )
+{
+ return Sbox[v & 0xFF] ^ swap16 ( Sbox[v >> 8] );
+}
+
+/**
+ * Rotate 16-bit value right
+ *
+ * @v v Value to rotate
+ * @v bits Number of bits to rotate by
+ * @ret rotv Rotated value
+ */
+static inline u16 ror16 ( u16 v, int bits )
+{
+ return ( v >> bits ) | ( v << ( 16 - bits ) );
+}
+
+/**
+ * Rotate 32-bit value right
+ *
+ * @v v Value to rotate
+ * @v bits Number of bits to rotate by
+ * @ret rotv Rotated value
+ */
+static inline u32 ror32 ( u32 v, int bits )
+{
+ return ( v >> bits ) | ( v << ( 32 - bits ) );
+}
+
+/**
+ * Rotate 32-bit value left
+ *
+ * @v v Value to rotate
+ * @v bits Number of bits to rotate by
+ * @ret rotv Rotated value
+ */
+static inline u32 rol32 ( u32 v, int bits )
+{
+ return ( v << bits ) | ( v >> ( 32 - bits ) );
+}
+
+
+/**
+ * Initialise TKIP state and install key
+ *
+ * @v crypto TKIP cryptosystem structure
+ * @v key Pointer to tkip_tk to install
+ * @v keylen Length of key (32 bytes)
+ * @v rsc Initial receive sequence counter
+ */
+static int tkip_init ( struct net80211_crypto *crypto, const void *key,
+ int keylen, const void *rsc )
+{
+ struct tkip_ctx *ctx = crypto->priv;
+ const u8 *rscb = rsc;
+
+ if ( keylen != sizeof ( ctx->tk ) )
+ return -EINVAL;
+
+ if ( rscb ) {
+ ctx->dec.tsc_lo = ( rscb[1] << 8 ) | rscb[0];
+ ctx->dec.tsc_hi = ( ( rscb[5] << 24 ) | ( rscb[4] << 16 ) |
+ ( rscb[3] << 8 ) | rscb[2] );
+ }
+
+ memcpy ( &ctx->tk, key, sizeof ( ctx->tk ) );
+
+ return 0;
+}
+
+/**
+ * Perform TKIP key mixing, phase 1
+ *
+ * @v dctx TKIP directional context
+ * @v tk TKIP temporal key
+ * @v mac MAC address of transmitter
+ *
+ * This recomputes the TTAK in @a dctx if necessary, and sets
+ * @c dctx->ttak_ok.
+ */
+static void tkip_mix_1 ( struct tkip_dir_ctx *dctx, struct tkip_tk *tk, u8 *mac )
+{
+ int i, j;
+
+ if ( dctx->ttak_ok && ! memcmp ( mac, dctx->mac, ETH_ALEN ) )
+ return;
+
+ memcpy ( dctx->mac, mac, ETH_ALEN );
+
+ dctx->ttak[0] = dctx->tsc_hi & 0xFFFF;
+ dctx->ttak[1] = dctx->tsc_hi >> 16;
+ dctx->ttak[2] = ( mac[1] << 8 ) | mac[0];
+ dctx->ttak[3] = ( mac[3] << 8 ) | mac[2];
+ dctx->ttak[4] = ( mac[5] << 8 ) | mac[4];
+
+ for ( i = 0; i < 8; i++ ) {
+ j = 2 * ( i & 1 );
+
+ dctx->ttak[0] += S ( dctx->ttak[4] ^ ( ( tk->key[1 + j] << 8 ) |
+ tk->key[0 + j] ) );
+ dctx->ttak[1] += S ( dctx->ttak[0] ^ ( ( tk->key[5 + j] << 8 ) |
+ tk->key[4 + j] ) );
+ dctx->ttak[2] += S ( dctx->ttak[1] ^ ( ( tk->key[9 + j] << 8 ) |
+ tk->key[8 + j] ) );
+ dctx->ttak[3] += S ( dctx->ttak[2] ^ ( ( tk->key[13+ j] << 8 ) |
+ tk->key[12+ j] ) );
+ dctx->ttak[4] += S ( dctx->ttak[3] ^ ( ( tk->key[1 + j] << 8 ) |
+ tk->key[0 + j] ) ) + i;
+ }
+
+ dctx->ttak_ok = 1;
+}
+
+/**
+ * Perform TKIP key mixing, phase 2
+ *
+ * @v dctx TKIP directional context
+ * @v tk TKIP temporal key
+ * @ret key ARC4 key, 16 bytes long
+ */
+static void tkip_mix_2 ( struct tkip_dir_ctx *dctx, struct tkip_tk *tk,
+ void *key )
+{
+ u8 *kb = key;
+ u16 ppk[6];
+ int i;
+
+ memcpy ( ppk, dctx->ttak, sizeof ( dctx->ttak ) );
+ ppk[5] = dctx->ttak[4] + dctx->tsc_lo;
+
+ ppk[0] += S ( ppk[5] ^ ( ( tk->key[1] << 8 ) | tk->key[0] ) );
+ ppk[1] += S ( ppk[0] ^ ( ( tk->key[3] << 8 ) | tk->key[2] ) );
+ ppk[2] += S ( ppk[1] ^ ( ( tk->key[5] << 8 ) | tk->key[4] ) );
+ ppk[3] += S ( ppk[2] ^ ( ( tk->key[7] << 8 ) | tk->key[6] ) );
+ ppk[4] += S ( ppk[3] ^ ( ( tk->key[9] << 8 ) | tk->key[8] ) );
+ ppk[5] += S ( ppk[4] ^ ( ( tk->key[11] << 8 ) | tk->key[10] ) );
+
+ ppk[0] += ror16 ( ppk[5] ^ ( ( tk->key[13] << 8 ) | tk->key[12] ), 1 );
+ ppk[1] += ror16 ( ppk[0] ^ ( ( tk->key[15] << 8 ) | tk->key[14] ), 1 );
+ ppk[2] += ror16 ( ppk[1], 1 );
+ ppk[3] += ror16 ( ppk[2], 1 );
+ ppk[4] += ror16 ( ppk[3], 1 );
+ ppk[5] += ror16 ( ppk[4], 1 );
+
+ kb[0] = dctx->tsc_lo >> 8;
+ kb[1] = ( ( dctx->tsc_lo >> 8 ) | 0x20 ) & 0x7F;
+ kb[2] = dctx->tsc_lo & 0xFF;
+ kb[3] = ( ( ppk[5] ^ ( ( tk->key[1] << 8 ) | tk->key[0] ) ) >> 1 )
+ & 0xFF;
+
+ for ( i = 0; i < 6; i++ ) {
+ kb[4 + 2*i] = ppk[i] & 0xFF;
+ kb[5 + 2*i] = ppk[i] >> 8;
+ }
+}
+
+/**
+ * Update Michael message integrity code based on next 32-bit word of data
+ *
+ * @v V Michael code state (two 32-bit words)
+ * @v word Next 32-bit word of data
+ */
+static void tkip_feed_michael ( u32 *V, u32 word )
+{
+ V[0] ^= word;
+ V[1] ^= rol32 ( V[0], 17 );
+ V[0] += V[1];
+ V[1] ^= ( ( V[0] & 0xFF00FF00 ) >> 8 ) | ( ( V[0] & 0x00FF00FF ) << 8 );
+ V[0] += V[1];
+ V[1] ^= rol32 ( V[0], 3 );
+ V[0] += V[1];
+ V[1] ^= ror32 ( V[0], 2 );
+ V[0] += V[1];
+}
+
+/**
+ * Calculate Michael message integrity code
+ *
+ * @v key MIC key to use (8 bytes)
+ * @v da Destination link-layer address
+ * @v sa Source link-layer address
+ * @v data Start of data to calculate over
+ * @v len Length of header + data
+ * @ret mic Calculated Michael MIC (8 bytes)
+ */
+static void tkip_michael ( const void *key, const void *da, const void *sa,
+ const void *data, size_t len, void *mic )
+{
+ u32 V[2]; /* V[0] = "l", V[1] = "r" in 802.11 */
+ union {
+ u8 byte[12];
+ u32 word[3];
+ } cap;
+ const u8 *ptr = data;
+ const u8 *end = ptr + len;
+ int i;
+
+ memcpy ( V, key, sizeof ( V ) );
+ V[0] = le32_to_cpu ( V[0] );
+ V[1] = le32_to_cpu ( V[1] );
+
+ /* Feed in header (we assume non-QoS, so Priority = 0) */
+ memcpy ( &cap.byte[0], da, ETH_ALEN );
+ memcpy ( &cap.byte[6], sa, ETH_ALEN );
+ tkip_feed_michael ( V, le32_to_cpu ( cap.word[0] ) );
+ tkip_feed_michael ( V, le32_to_cpu ( cap.word[1] ) );
+ tkip_feed_michael ( V, le32_to_cpu ( cap.word[2] ) );
+ tkip_feed_michael ( V, 0 );
+
+ /* Feed in data */
+ while ( ptr + 4 <= end ) {
+ tkip_feed_michael ( V, le32_to_cpu ( *( u32 * ) ptr ) );
+ ptr += 4;
+ }
+
+ /* Add unaligned part and padding */
+ for ( i = 0; ptr < end; i++ )
+ cap.byte[i] = *ptr++;
+ cap.byte[i++] = 0x5a;
+ for ( ; i < 8; i++ )
+ cap.byte[i] = 0;
+
+ /* Feed in padding */
+ tkip_feed_michael ( V, le32_to_cpu ( cap.word[0] ) );
+ tkip_feed_michael ( V, le32_to_cpu ( cap.word[1] ) );
+
+ /* Output MIC */
+ V[0] = cpu_to_le32 ( V[0] );
+ V[1] = cpu_to_le32 ( V[1] );
+ memcpy ( mic, V, sizeof ( V ) );
+}
+
+/**
+ * Encrypt a packet using TKIP
+ *
+ * @v crypto TKIP cryptosystem
+ * @v iob I/O buffer containing cleartext packet
+ * @ret eiob I/O buffer containing encrypted packet
+ */
+static struct io_buffer * tkip_encrypt ( struct net80211_crypto *crypto,
+ struct io_buffer *iob )
+{
+ struct tkip_ctx *ctx = crypto->priv;
+ struct ieee80211_frame *hdr = iob->data;
+ struct io_buffer *eiob;
+ struct arc4_ctx arc4;
+ u8 key[16];
+ struct tkip_head head;
+ u8 mic[8];
+ u32 icv;
+ const int hdrlen = IEEE80211_TYP_FRAME_HEADER_LEN;
+ int datalen = iob_len ( iob ) - hdrlen;
+
+ ctx->enc.tsc_lo++;
+ if ( ctx->enc.tsc_lo == 0 ) {
+ ctx->enc.tsc_hi++;
+ ctx->enc.ttak_ok = 0;
+ }
+
+ tkip_mix_1 ( &ctx->enc, &ctx->tk, hdr->addr2 );
+ tkip_mix_2 ( &ctx->enc, &ctx->tk, key );
+
+ eiob = alloc_iob ( iob_len ( iob ) + TKIP_HEAD_LEN + TKIP_FOOT_LEN );
+ if ( ! eiob )
+ return NULL;
+
+ /* Copy frame header */
+ memcpy ( iob_put ( eiob, hdrlen ), iob->data, hdrlen );
+ hdr = eiob->data;
+ hdr->fc |= IEEE80211_FC_PROTECTED;
+
+ /* Fill in IV and key ID byte, and extended IV */
+ memcpy ( &head, key, 3 );
+ head.kid = 0x20; /* have Extended IV, key ID 0 */
+ head.tsc_hi = cpu_to_le32 ( ctx->enc.tsc_hi );
+ memcpy ( iob_put ( eiob, sizeof ( head ) ), &head, sizeof ( head ) );
+
+ /* Copy and encrypt the data */
+ cipher_setkey ( &arc4_algorithm, &arc4, key, 16 );
+ cipher_encrypt ( &arc4_algorithm, &arc4, iob->data + hdrlen,
+ iob_put ( eiob, datalen ), datalen );
+
+ /* Add MIC */
+ hdr = iob->data;
+ tkip_michael ( &ctx->tk.mic.tx, hdr->addr3, hdr->addr2,
+ iob->data + hdrlen, datalen, mic );
+ cipher_encrypt ( &arc4_algorithm, &arc4, mic,
+ iob_put ( eiob, sizeof ( mic ) ), sizeof ( mic ) );
+
+ /* Add ICV */
+ icv = crc32_le ( ~0, iob->data + hdrlen, datalen );
+ icv = crc32_le ( icv, mic, sizeof ( mic ) );
+ icv = cpu_to_le32 ( ~icv );
+ cipher_encrypt ( &arc4_algorithm, &arc4, &icv,
+ iob_put ( eiob, TKIP_ICV_LEN ), TKIP_ICV_LEN );
+
+ DBGC2 ( ctx, "WPA-TKIP %p: encrypted packet %p -> %p\n", ctx,
+ iob, eiob );
+
+ return eiob;
+}
+
+/**
+ * Decrypt a packet using TKIP
+ *
+ * @v crypto TKIP cryptosystem
+ * @v eiob I/O buffer containing encrypted packet
+ * @ret iob I/O buffer containing cleartext packet
+ */
+static struct io_buffer * tkip_decrypt ( struct net80211_crypto *crypto,
+ struct io_buffer *eiob )
+{
+ struct tkip_ctx *ctx = crypto->priv;
+ struct ieee80211_frame *hdr;
+ struct io_buffer *iob;
+ const int hdrlen = IEEE80211_TYP_FRAME_HEADER_LEN;
+ int datalen = iob_len ( eiob ) - hdrlen - TKIP_HEAD_LEN - TKIP_FOOT_LEN;
+ struct tkip_head *head;
+ struct arc4_ctx arc4;
+ u16 rx_tsc_lo;
+ u8 key[16];
+ u8 mic[8];
+ u32 icv, crc;
+
+ iob = alloc_iob ( hdrlen + datalen + TKIP_FOOT_LEN );
+ if ( ! iob )
+ return NULL;
+
+ /* Copy frame header */
+ memcpy ( iob_put ( iob, hdrlen ), eiob->data, hdrlen );
+ hdr = iob->data;
+ hdr->fc &= ~IEEE80211_FC_PROTECTED;
+
+ /* Check and update TSC */
+ head = eiob->data + hdrlen;
+ rx_tsc_lo = ( head->tsc1 << 8 ) | head->tsc0;
+
+ if ( head->tsc_hi < ctx->dec.tsc_hi ||
+ ( head->tsc_hi == ctx->dec.tsc_hi &&
+ rx_tsc_lo <= ctx->dec.tsc_lo ) ) {
+ DBGC ( ctx, "WPA-TKIP %p: packet received out of order "
+ "(%08x:%04x <= %08x:%04x)\n", ctx, head->tsc_hi,
+ rx_tsc_lo, ctx->dec.tsc_hi, ctx->dec.tsc_lo );
+ free_iob ( iob );
+ return NULL;
+ }
+ ctx->dec.tsc_lo = rx_tsc_lo;
+ if ( ctx->dec.tsc_hi != head->tsc_hi ) {
+ ctx->dec.ttak_ok = 0;
+ ctx->dec.tsc_hi = head->tsc_hi;
+ }
+
+ /* Calculate key */
+ tkip_mix_1 ( &ctx->dec, &ctx->tk, hdr->addr2 );
+ tkip_mix_2 ( &ctx->dec, &ctx->tk, key );
+
+ /* Copy-decrypt data, MIC, ICV */
+ cipher_setkey ( &arc4_algorithm, &arc4, key, 16 );
+ cipher_decrypt ( &arc4_algorithm, &arc4,
+ eiob->data + hdrlen + TKIP_HEAD_LEN,
+ iob_put ( iob, datalen ), datalen + TKIP_FOOT_LEN );
+
+ /* Check ICV */
+ icv = le32_to_cpu ( *( u32 * ) ( iob->tail + TKIP_MIC_LEN ) );
+ crc = ~crc32_le ( ~0, iob->data + hdrlen, datalen + TKIP_MIC_LEN );
+ if ( crc != icv ) {
+ DBGC ( ctx, "WPA-TKIP %p CRC mismatch: expect %08x, get %08x\n",
+ ctx, icv, crc );
+ free_iob ( iob );
+ return NULL;
+ }
+
+ /* Check MIC */
+ tkip_michael ( &ctx->tk.mic.rx, hdr->addr1, hdr->addr3,
+ iob->data + hdrlen, datalen, mic );
+ if ( memcmp ( mic, iob->tail, TKIP_MIC_LEN ) != 0 ) {
+ DBGC ( ctx, "WPA-TKIP %p ALERT! MIC failure\n", ctx );
+ /* XXX we should do the countermeasures here */
+ free_iob ( iob );
+ return NULL;
+ }
+
+ DBGC2 ( ctx, "WPA-TKIP %p: decrypted packet %p -> %p\n", ctx,
+ eiob, iob );
+
+ return iob;
+}
+
+/** TKIP cryptosystem */
+struct net80211_crypto tkip_crypto __net80211_crypto = {
+ .algorithm = NET80211_CRYPT_TKIP,
+ .init = tkip_init,
+ .encrypt = tkip_encrypt,
+ .decrypt = tkip_decrypt,
+ .priv_len = sizeof ( struct tkip_ctx ),
+};
+
+
+
+
+/**
+ * Calculate HMAC-MD5 MIC for EAPOL-Key frame
+ *
+ * @v kck Key Confirmation Key, 16 bytes
+ * @v msg Message to calculate MIC over
+ * @v len Number of bytes to calculate MIC over
+ * @ret mic Calculated MIC, 16 bytes long
+ */
+static void tkip_kie_mic ( const void *kck, const void *msg, size_t len,
+ void *mic )
+{
+ uint8_t ctx[MD5_CTX_SIZE];
+ u8 kckb[16];
+ size_t kck_len = 16;
+
+ memcpy ( kckb, kck, kck_len );
+
+ hmac_init ( &md5_algorithm, ctx, kckb, &kck_len );
+ hmac_update ( &md5_algorithm, ctx, msg, len );
+ hmac_final ( &md5_algorithm, ctx, kckb, &kck_len, mic );
+}
+
+/**
+ * Decrypt key data in EAPOL-Key frame
+ *
+ * @v kek Key Encryption Key, 16 bytes
+ * @v iv Initialisation vector, 16 bytes
+ * @v msg Message to decrypt
+ * @v len Length of message
+ * @ret msg Decrypted message in place of original
+ * @ret len Unchanged
+ * @ret rc Always 0 for success
+ */
+static int tkip_kie_decrypt ( const void *kek, const void *iv,
+ void *msg, u16 *len )
+{
+ u8 key[32];
+ memcpy ( key, iv, 16 );
+ memcpy ( key + 16, kek, 16 );
+
+ arc4_skip ( key, 32, 256, msg, msg, *len );
+
+ return 0;
+}
+
+
+/** TKIP-style key integrity and encryption handler */
+struct wpa_kie tkip_kie __wpa_kie = {
+ .version = EAPOL_KEY_VERSION_WPA,
+ .mic = tkip_kie_mic,
+ .decrypt = tkip_kie_decrypt,
+};
diff --git a/qemu/roms/ipxe/src/net/aoe.c b/qemu/roms/ipxe/src/net/aoe.c
new file mode 100644
index 000000000..a6d7b3e7b
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/aoe.c
@@ -0,0 +1,1057 @@
+/*
+ * Copyright (C) 2006 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stddef.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <assert.h>
+#include <byteswap.h>
+#include <ipxe/list.h>
+#include <ipxe/if_ether.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/uaccess.h>
+#include <ipxe/netdevice.h>
+#include <ipxe/features.h>
+#include <ipxe/interface.h>
+#include <ipxe/xfer.h>
+#include <ipxe/uri.h>
+#include <ipxe/open.h>
+#include <ipxe/ata.h>
+#include <ipxe/device.h>
+#include <ipxe/aoe.h>
+
+/** @file
+ *
+ * AoE protocol
+ *
+ */
+
+FEATURE ( FEATURE_PROTOCOL, "AoE", DHCP_EB_FEATURE_AOE, 1 );
+
+struct net_protocol aoe_protocol __net_protocol;
+
+/******************************************************************************
+ *
+ * AoE devices and commands
+ *
+ ******************************************************************************
+ */
+
+/** List of all AoE devices */
+static LIST_HEAD ( aoe_devices );
+
+/** List of active AoE commands */
+static LIST_HEAD ( aoe_commands );
+
+/** An AoE device */
+struct aoe_device {
+ /** Reference counter */
+ struct refcnt refcnt;
+
+ /** Network device */
+ struct net_device *netdev;
+ /** ATA command issuing interface */
+ struct interface ata;
+
+ /** Major number */
+ uint16_t major;
+ /** Minor number */
+ uint8_t minor;
+ /** Target MAC address */
+ uint8_t target[MAX_LL_ADDR_LEN];
+
+ /** Saved timeout value */
+ unsigned long timeout;
+
+ /** Configuration command interface */
+ struct interface config;
+ /** Device is configued */
+ int configured;
+};
+
+/** An AoE command */
+struct aoe_command {
+ /** Reference count */
+ struct refcnt refcnt;
+ /** AOE device */
+ struct aoe_device *aoedev;
+ /** List of active commands */
+ struct list_head list;
+
+ /** ATA command interface */
+ struct interface ata;
+
+ /** ATA command */
+ struct ata_cmd command;
+ /** Command type */
+ struct aoe_command_type *type;
+ /** Command tag */
+ uint32_t tag;
+
+ /** Retransmission timer */
+ struct retry_timer timer;
+};
+
+/** An AoE command type */
+struct aoe_command_type {
+ /**
+ * Calculate length of AoE command IU
+ *
+ * @v aoecmd AoE command
+ * @ret len Length of command IU
+ */
+ size_t ( * cmd_len ) ( struct aoe_command *aoecmd );
+ /**
+ * Build AoE command IU
+ *
+ * @v aoecmd AoE command
+ * @v data Command IU
+ * @v len Length of command IU
+ */
+ void ( * cmd ) ( struct aoe_command *aoecmd, void *data, size_t len );
+ /**
+ * Handle AoE response IU
+ *
+ * @v aoecmd AoE command
+ * @v data Response IU
+ * @v len Length of response IU
+ * @v ll_source Link-layer source address
+ * @ret rc Return status code
+ */
+ int ( * rsp ) ( struct aoe_command *aoecmd, const void *data,
+ size_t len, const void *ll_source );
+};
+
+/**
+ * Get reference to AoE device
+ *
+ * @v aoedev AoE device
+ * @ret aoedev AoE device
+ */
+static inline __attribute__ (( always_inline )) struct aoe_device *
+aoedev_get ( struct aoe_device *aoedev ) {
+ ref_get ( &aoedev->refcnt );
+ return aoedev;
+}
+
+/**
+ * Drop reference to AoE device
+ *
+ * @v aoedev AoE device
+ */
+static inline __attribute__ (( always_inline )) void
+aoedev_put ( struct aoe_device *aoedev ) {
+ ref_put ( &aoedev->refcnt );
+}
+
+/**
+ * Get reference to AoE command
+ *
+ * @v aoecmd AoE command
+ * @ret aoecmd AoE command
+ */
+static inline __attribute__ (( always_inline )) struct aoe_command *
+aoecmd_get ( struct aoe_command *aoecmd ) {
+ ref_get ( &aoecmd->refcnt );
+ return aoecmd;
+}
+
+/**
+ * Drop reference to AoE command
+ *
+ * @v aoecmd AoE command
+ */
+static inline __attribute__ (( always_inline )) void
+aoecmd_put ( struct aoe_command *aoecmd ) {
+ ref_put ( &aoecmd->refcnt );
+}
+
+/**
+ * Name AoE device
+ *
+ * @v aoedev AoE device
+ * @ret name AoE device name
+ */
+static const char * aoedev_name ( struct aoe_device *aoedev ) {
+ static char buf[16];
+
+ snprintf ( buf, sizeof ( buf ), "%s/e%d.%d", aoedev->netdev->name,
+ aoedev->major, aoedev->minor );
+ return buf;
+}
+
+/**
+ * Free AoE command
+ *
+ * @v refcnt Reference counter
+ */
+static void aoecmd_free ( struct refcnt *refcnt ) {
+ struct aoe_command *aoecmd =
+ container_of ( refcnt, struct aoe_command, refcnt );
+
+ assert ( ! timer_running ( &aoecmd->timer ) );
+ assert ( list_empty ( &aoecmd->list ) );
+
+ aoedev_put ( aoecmd->aoedev );
+ free ( aoecmd );
+}
+
+/**
+ * Close AoE command
+ *
+ * @v aoecmd AoE command
+ * @v rc Reason for close
+ */
+static void aoecmd_close ( struct aoe_command *aoecmd, int rc ) {
+ struct aoe_device *aoedev = aoecmd->aoedev;
+
+ /* Stop timer */
+ stop_timer ( &aoecmd->timer );
+
+ /* Preserve the timeout value for subsequent commands */
+ aoedev->timeout = aoecmd->timer.timeout;
+
+ /* Remove from list of commands */
+ if ( ! list_empty ( &aoecmd->list ) ) {
+ list_del ( &aoecmd->list );
+ INIT_LIST_HEAD ( &aoecmd->list );
+ aoecmd_put ( aoecmd );
+ }
+
+ /* Shut down interfaces */
+ intf_shutdown ( &aoecmd->ata, rc );
+}
+
+/**
+ * Transmit AoE command request
+ *
+ * @v aoecmd AoE command
+ * @ret rc Return status code
+ */
+static int aoecmd_tx ( struct aoe_command *aoecmd ) {
+ struct aoe_device *aoedev = aoecmd->aoedev;
+ struct net_device *netdev = aoedev->netdev;
+ struct io_buffer *iobuf;
+ struct aoehdr *aoehdr;
+ size_t cmd_len;
+ int rc;
+
+ /* Sanity check */
+ assert ( netdev != NULL );
+
+ /* If we are transmitting anything that requires a response,
+ * start the retransmission timer. Do this before attempting
+ * to allocate the I/O buffer, in case allocation itself
+ * fails.
+ */
+ start_timer ( &aoecmd->timer );
+
+ /* Create outgoing I/O buffer */
+ cmd_len = aoecmd->type->cmd_len ( aoecmd );
+ iobuf = alloc_iob ( MAX_LL_HEADER_LEN + cmd_len );
+ if ( ! iobuf )
+ return -ENOMEM;
+ iob_reserve ( iobuf, MAX_LL_HEADER_LEN );
+ aoehdr = iob_put ( iobuf, cmd_len );
+
+ /* Fill AoE header */
+ memset ( aoehdr, 0, sizeof ( *aoehdr ) );
+ aoehdr->ver_flags = AOE_VERSION;
+ aoehdr->major = htons ( aoedev->major );
+ aoehdr->minor = aoedev->minor;
+ aoehdr->tag = htonl ( aoecmd->tag );
+ aoecmd->type->cmd ( aoecmd, iobuf->data, iob_len ( iobuf ) );
+
+ /* Send packet */
+ if ( ( rc = net_tx ( iobuf, netdev, &aoe_protocol, aoedev->target,
+ netdev->ll_addr ) ) != 0 ) {
+ DBGC ( aoedev, "AoE %s/%08x could not transmit: %s\n",
+ aoedev_name ( aoedev ), aoecmd->tag,
+ strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Receive AoE command response
+ *
+ * @v aoecmd AoE command
+ * @v iobuf I/O buffer
+ * @v ll_source Link-layer source address
+ * @ret rc Return status code
+ */
+static int aoecmd_rx ( struct aoe_command *aoecmd, struct io_buffer *iobuf,
+ const void *ll_source ) {
+ struct aoe_device *aoedev = aoecmd->aoedev;
+ struct aoehdr *aoehdr = iobuf->data;
+ int rc;
+
+ /* Sanity check */
+ if ( iob_len ( iobuf ) < sizeof ( *aoehdr ) ) {
+ DBGC ( aoedev, "AoE %s/%08x received underlength response "
+ "(%zd bytes)\n", aoedev_name ( aoedev ),
+ aoecmd->tag, iob_len ( iobuf ) );
+ rc = -EINVAL;
+ goto done;
+ }
+ if ( ( ntohs ( aoehdr->major ) != aoedev->major ) ||
+ ( aoehdr->minor != aoedev->minor ) ) {
+ DBGC ( aoedev, "AoE %s/%08x received response for incorrect "
+ "device e%d.%d\n", aoedev_name ( aoedev ), aoecmd->tag,
+ ntohs ( aoehdr->major ), aoehdr->minor );
+ rc = -EINVAL;
+ goto done;
+ }
+
+ /* Catch command failures */
+ if ( aoehdr->ver_flags & AOE_FL_ERROR ) {
+ DBGC ( aoedev, "AoE %s/%08x terminated in error\n",
+ aoedev_name ( aoedev ), aoecmd->tag );
+ aoecmd_close ( aoecmd, -EIO );
+ rc = -EIO;
+ goto done;
+ }
+
+ /* Hand off to command completion handler */
+ if ( ( rc = aoecmd->type->rsp ( aoecmd, iobuf->data, iob_len ( iobuf ),
+ ll_source ) ) != 0 )
+ goto done;
+
+ done:
+ /* Free I/O buffer */
+ free_iob ( iobuf );
+
+ /* Terminate command */
+ aoecmd_close ( aoecmd, rc );
+
+ return rc;
+}
+
+/**
+ * Handle AoE retry timer expiry
+ *
+ * @v timer AoE retry timer
+ * @v fail Failure indicator
+ */
+static void aoecmd_expired ( struct retry_timer *timer, int fail ) {
+ struct aoe_command *aoecmd =
+ container_of ( timer, struct aoe_command, timer );
+
+ if ( fail ) {
+ aoecmd_close ( aoecmd, -ETIMEDOUT );
+ } else {
+ aoecmd_tx ( aoecmd );
+ }
+}
+
+/**
+ * Calculate length of AoE ATA command IU
+ *
+ * @v aoecmd AoE command
+ * @ret len Length of command IU
+ */
+static size_t aoecmd_ata_cmd_len ( struct aoe_command *aoecmd ) {
+ struct ata_cmd *command = &aoecmd->command;
+
+ return ( sizeof ( struct aoehdr ) + sizeof ( struct aoeata ) +
+ command->data_out_len );
+}
+
+/**
+ * Build AoE ATA command IU
+ *
+ * @v aoecmd AoE command
+ * @v data Command IU
+ * @v len Length of command IU
+ */
+static void aoecmd_ata_cmd ( struct aoe_command *aoecmd,
+ void *data, size_t len ) {
+ struct aoe_device *aoedev = aoecmd->aoedev;
+ struct ata_cmd *command = &aoecmd->command;
+ struct aoehdr *aoehdr = data;
+ struct aoeata *aoeata = &aoehdr->payload[0].ata;
+
+ /* Sanity check */
+ linker_assert ( AOE_FL_DEV_HEAD == ATA_DEV_SLAVE, __fix_ata_h__ );
+ assert ( len == ( sizeof ( *aoehdr ) + sizeof ( *aoeata ) +
+ command->data_out_len ) );
+
+ /* Build IU */
+ aoehdr->command = AOE_CMD_ATA;
+ memset ( aoeata, 0, sizeof ( *aoeata ) );
+ aoeata->aflags = ( ( command->cb.lba48 ? AOE_FL_EXTENDED : 0 ) |
+ ( command->cb.device & ATA_DEV_SLAVE ) |
+ ( command->data_out_len ? AOE_FL_WRITE : 0 ) );
+ aoeata->err_feat = command->cb.err_feat.bytes.cur;
+ aoeata->count = command->cb.count.native;
+ aoeata->cmd_stat = command->cb.cmd_stat;
+ aoeata->lba.u64 = cpu_to_le64 ( command->cb.lba.native );
+ if ( ! command->cb.lba48 )
+ aoeata->lba.bytes[3] |=
+ ( command->cb.device & ATA_DEV_MASK );
+ copy_from_user ( aoeata->data, command->data_out, 0,
+ command->data_out_len );
+
+ DBGC2 ( aoedev, "AoE %s/%08x ATA cmd %02x:%02x:%02x:%02x:%08llx",
+ aoedev_name ( aoedev ), aoecmd->tag, aoeata->aflags,
+ aoeata->err_feat, aoeata->count, aoeata->cmd_stat,
+ aoeata->lba.u64 );
+ if ( command->data_out_len )
+ DBGC2 ( aoedev, " out %04zx", command->data_out_len );
+ if ( command->data_in_len )
+ DBGC2 ( aoedev, " in %04zx", command->data_in_len );
+ DBGC2 ( aoedev, "\n" );
+}
+
+/**
+ * Handle AoE ATA response IU
+ *
+ * @v aoecmd AoE command
+ * @v data Response IU
+ * @v len Length of response IU
+ * @v ll_source Link-layer source address
+ * @ret rc Return status code
+ */
+static int aoecmd_ata_rsp ( struct aoe_command *aoecmd, const void *data,
+ size_t len, const void *ll_source __unused ) {
+ struct aoe_device *aoedev = aoecmd->aoedev;
+ struct ata_cmd *command = &aoecmd->command;
+ const struct aoehdr *aoehdr = data;
+ const struct aoeata *aoeata = &aoehdr->payload[0].ata;
+ size_t data_len;
+
+ /* Sanity check */
+ if ( len < ( sizeof ( *aoehdr ) + sizeof ( *aoeata ) ) ) {
+ DBGC ( aoedev, "AoE %s/%08x received underlength ATA response "
+ "(%zd bytes)\n", aoedev_name ( aoedev ),
+ aoecmd->tag, len );
+ return -EINVAL;
+ }
+ data_len = ( len - ( sizeof ( *aoehdr ) + sizeof ( *aoeata ) ) );
+ DBGC2 ( aoedev, "AoE %s/%08x ATA rsp %02x in %04zx\n",
+ aoedev_name ( aoedev ), aoecmd->tag, aoeata->cmd_stat,
+ data_len );
+
+ /* Check for command failure */
+ if ( aoeata->cmd_stat & ATA_STAT_ERR ) {
+ DBGC ( aoedev, "AoE %s/%08x status %02x\n",
+ aoedev_name ( aoedev ), aoecmd->tag, aoeata->cmd_stat );
+ return -EIO;
+ }
+
+ /* Check data-in length is sufficient. (There may be trailing
+ * garbage due to Ethernet minimum-frame-size padding.)
+ */
+ if ( data_len < command->data_in_len ) {
+ DBGC ( aoedev, "AoE %s/%08x data-in underrun (received %zd, "
+ "expected %zd)\n", aoedev_name ( aoedev ), aoecmd->tag,
+ data_len, command->data_in_len );
+ return -ERANGE;
+ }
+
+ /* Copy out data payload */
+ copy_to_user ( command->data_in, 0, aoeata->data,
+ command->data_in_len );
+
+ return 0;
+}
+
+/** AoE ATA command */
+static struct aoe_command_type aoecmd_ata = {
+ .cmd_len = aoecmd_ata_cmd_len,
+ .cmd = aoecmd_ata_cmd,
+ .rsp = aoecmd_ata_rsp,
+};
+
+/**
+ * Calculate length of AoE configuration command IU
+ *
+ * @v aoecmd AoE command
+ * @ret len Length of command IU
+ */
+static size_t aoecmd_cfg_cmd_len ( struct aoe_command *aoecmd __unused ) {
+ return ( sizeof ( struct aoehdr ) + sizeof ( struct aoecfg ) );
+}
+
+/**
+ * Build AoE configuration command IU
+ *
+ * @v aoecmd AoE command
+ * @v data Command IU
+ * @v len Length of command IU
+ */
+static void aoecmd_cfg_cmd ( struct aoe_command *aoecmd,
+ void *data, size_t len ) {
+ struct aoe_device *aoedev = aoecmd->aoedev;
+ struct aoehdr *aoehdr = data;
+ struct aoecfg *aoecfg = &aoehdr->payload[0].cfg;
+
+ /* Sanity check */
+ assert ( len == ( sizeof ( *aoehdr ) + sizeof ( *aoecfg ) ) );
+
+ /* Build IU */
+ aoehdr->command = AOE_CMD_CONFIG;
+ memset ( aoecfg, 0, sizeof ( *aoecfg ) );
+
+ DBGC ( aoedev, "AoE %s/%08x CONFIG cmd\n",
+ aoedev_name ( aoedev ), aoecmd->tag );
+}
+
+/**
+ * Handle AoE configuration response IU
+ *
+ * @v aoecmd AoE command
+ * @v data Response IU
+ * @v len Length of response IU
+ * @v ll_source Link-layer source address
+ * @ret rc Return status code
+ */
+static int aoecmd_cfg_rsp ( struct aoe_command *aoecmd, const void *data,
+ size_t len, const void *ll_source ) {
+ struct aoe_device *aoedev = aoecmd->aoedev;
+ struct ll_protocol *ll_protocol = aoedev->netdev->ll_protocol;
+ const struct aoehdr *aoehdr = data;
+ const struct aoecfg *aoecfg = &aoehdr->payload[0].cfg;
+
+ /* Sanity check */
+ if ( len < ( sizeof ( *aoehdr ) + sizeof ( *aoecfg ) ) ) {
+ DBGC ( aoedev, "AoE %s/%08x received underlength "
+ "configuration response (%zd bytes)\n",
+ aoedev_name ( aoedev ), aoecmd->tag, len );
+ return -EINVAL;
+ }
+ DBGC ( aoedev, "AoE %s/%08x CONFIG rsp buf %04x fw %04x scnt %02x\n",
+ aoedev_name ( aoedev ), aoecmd->tag, ntohs ( aoecfg->bufcnt ),
+ aoecfg->fwver, aoecfg->scnt );
+
+ /* Record target MAC address */
+ memcpy ( aoedev->target, ll_source, ll_protocol->ll_addr_len );
+ DBGC ( aoedev, "AoE %s has MAC address %s\n",
+ aoedev_name ( aoedev ), ll_protocol->ntoa ( aoedev->target ) );
+
+ return 0;
+}
+
+/** AoE configuration command */
+static struct aoe_command_type aoecmd_cfg = {
+ .cmd_len = aoecmd_cfg_cmd_len,
+ .cmd = aoecmd_cfg_cmd,
+ .rsp = aoecmd_cfg_rsp,
+};
+
+/** AoE command ATA interface operations */
+static struct interface_operation aoecmd_ata_op[] = {
+ INTF_OP ( intf_close, struct aoe_command *, aoecmd_close ),
+};
+
+/** AoE command ATA interface descriptor */
+static struct interface_descriptor aoecmd_ata_desc =
+ INTF_DESC ( struct aoe_command, ata, aoecmd_ata_op );
+
+/**
+ * Identify AoE command by tag
+ *
+ * @v tag Command tag
+ * @ret aoecmd AoE command, or NULL
+ */
+static struct aoe_command * aoecmd_find_tag ( uint32_t tag ) {
+ struct aoe_command *aoecmd;
+
+ list_for_each_entry ( aoecmd, &aoe_commands, list ) {
+ if ( aoecmd->tag == tag )
+ return aoecmd;
+ }
+ return NULL;
+}
+
+/**
+ * Choose an AoE command tag
+ *
+ * @ret tag New tag, or negative error
+ */
+static int aoecmd_new_tag ( void ) {
+ static uint16_t tag_idx;
+ unsigned int i;
+
+ for ( i = 0 ; i < 65536 ; i++ ) {
+ tag_idx++;
+ if ( aoecmd_find_tag ( tag_idx ) == NULL )
+ return ( AOE_TAG_MAGIC | tag_idx );
+ }
+ return -EADDRINUSE;
+}
+
+/**
+ * Create AoE command
+ *
+ * @v aoedev AoE device
+ * @v type AoE command type
+ * @ret aoecmd AoE command
+ */
+static struct aoe_command * aoecmd_create ( struct aoe_device *aoedev,
+ struct aoe_command_type *type ) {
+ struct aoe_command *aoecmd;
+ int tag;
+
+ /* Allocate command tag */
+ tag = aoecmd_new_tag();
+ if ( tag < 0 )
+ return NULL;
+
+ /* Allocate and initialise structure */
+ aoecmd = zalloc ( sizeof ( *aoecmd ) );
+ if ( ! aoecmd )
+ return NULL;
+ ref_init ( &aoecmd->refcnt, aoecmd_free );
+ list_add ( &aoecmd->list, &aoe_commands );
+ intf_init ( &aoecmd->ata, &aoecmd_ata_desc, &aoecmd->refcnt );
+ timer_init ( &aoecmd->timer, aoecmd_expired, &aoecmd->refcnt );
+ aoecmd->aoedev = aoedev_get ( aoedev );
+ aoecmd->type = type;
+ aoecmd->tag = tag;
+
+ /* Preserve timeout from last completed command */
+ aoecmd->timer.timeout = aoedev->timeout;
+
+ /* Return already mortalised. (Reference is held by command list.) */
+ return aoecmd;
+}
+
+/**
+ * Issue AoE ATA command
+ *
+ * @v aoedev AoE device
+ * @v parent Parent interface
+ * @v command ATA command
+ * @ret tag Command tag, or negative error
+ */
+static int aoedev_ata_command ( struct aoe_device *aoedev,
+ struct interface *parent,
+ struct ata_cmd *command ) {
+ struct net_device *netdev = aoedev->netdev;
+ struct aoe_command *aoecmd;
+
+ /* Fail immediately if net device is closed */
+ if ( ! netdev_is_open ( netdev ) ) {
+ DBGC ( aoedev, "AoE %s cannot issue command while net device "
+ "is closed\n", aoedev_name ( aoedev ) );
+ return -EWOULDBLOCK;
+ }
+
+ /* Create command */
+ aoecmd = aoecmd_create ( aoedev, &aoecmd_ata );
+ if ( ! aoecmd )
+ return -ENOMEM;
+ memcpy ( &aoecmd->command, command, sizeof ( aoecmd->command ) );
+
+ /* Attempt to send command. Allow failures to be handled by
+ * the retry timer.
+ */
+ aoecmd_tx ( aoecmd );
+
+ /* Attach to parent interface, leave reference with command
+ * list, and return.
+ */
+ intf_plug_plug ( &aoecmd->ata, parent );
+ return aoecmd->tag;
+}
+
+/**
+ * Issue AoE configuration command
+ *
+ * @v aoedev AoE device
+ * @v parent Parent interface
+ * @ret tag Command tag, or negative error
+ */
+static int aoedev_cfg_command ( struct aoe_device *aoedev,
+ struct interface *parent ) {
+ struct aoe_command *aoecmd;
+
+ /* Create command */
+ aoecmd = aoecmd_create ( aoedev, &aoecmd_cfg );
+ if ( ! aoecmd )
+ return -ENOMEM;
+
+ /* Attempt to send command. Allow failures to be handled by
+ * the retry timer.
+ */
+ aoecmd_tx ( aoecmd );
+
+ /* Attach to parent interface, leave reference with command
+ * list, and return.
+ */
+ intf_plug_plug ( &aoecmd->ata, parent );
+ return aoecmd->tag;
+}
+
+/**
+ * Free AoE device
+ *
+ * @v refcnt Reference count
+ */
+static void aoedev_free ( struct refcnt *refcnt ) {
+ struct aoe_device *aoedev =
+ container_of ( refcnt, struct aoe_device, refcnt );
+
+ netdev_put ( aoedev->netdev );
+ free ( aoedev );
+}
+
+/**
+ * Close AoE device
+ *
+ * @v aoedev AoE device
+ * @v rc Reason for close
+ */
+static void aoedev_close ( struct aoe_device *aoedev, int rc ) {
+ struct aoe_command *aoecmd;
+ struct aoe_command *tmp;
+
+ /* Shut down interfaces */
+ intf_shutdown ( &aoedev->ata, rc );
+ intf_shutdown ( &aoedev->config, rc );
+
+ /* Shut down any active commands */
+ list_for_each_entry_safe ( aoecmd, tmp, &aoe_commands, list ) {
+ if ( aoecmd->aoedev != aoedev )
+ continue;
+ aoecmd_get ( aoecmd );
+ aoecmd_close ( aoecmd, rc );
+ aoecmd_put ( aoecmd );
+ }
+}
+
+/**
+ * Check AoE device flow-control window
+ *
+ * @v aoedev AoE device
+ * @ret len Length of window
+ */
+static size_t aoedev_window ( struct aoe_device *aoedev ) {
+ return ( aoedev->configured ? ~( ( size_t ) 0 ) : 0 );
+}
+
+/**
+ * Handle AoE device configuration completion
+ *
+ * @v aoedev AoE device
+ * @v rc Reason for completion
+ */
+static void aoedev_config_done ( struct aoe_device *aoedev, int rc ) {
+
+ /* Shut down interface */
+ intf_shutdown ( &aoedev->config, rc );
+
+ /* Close device on failure */
+ if ( rc != 0 ) {
+ aoedev_close ( aoedev, rc );
+ return;
+ }
+
+ /* Mark device as configured */
+ aoedev->configured = 1;
+ xfer_window_changed ( &aoedev->ata );
+}
+
+/**
+ * Identify device underlying AoE device
+ *
+ * @v aoedev AoE device
+ * @ret device Underlying device
+ */
+static struct device * aoedev_identify_device ( struct aoe_device *aoedev ) {
+ return aoedev->netdev->dev;
+}
+
+/**
+ * Describe AoE device in an ACPI table
+ *
+ * @v aoedev AoE device
+ * @v acpi ACPI table
+ * @v len Length of ACPI table
+ * @ret rc Return status code
+ */
+static int aoedev_describe ( struct aoe_device *aoedev,
+ struct acpi_description_header *acpi,
+ size_t len ) {
+ struct abft_table *abft =
+ container_of ( acpi, struct abft_table, acpi );
+
+ /* Sanity check */
+ if ( len < sizeof ( *abft ) )
+ return -ENOBUFS;
+
+ /* Populate table */
+ abft->acpi.signature = cpu_to_le32 ( ABFT_SIG );
+ abft->acpi.length = cpu_to_le32 ( sizeof ( *abft ) );
+ abft->acpi.revision = 1;
+ abft->shelf = cpu_to_le16 ( aoedev->major );
+ abft->slot = aoedev->minor;
+ memcpy ( abft->mac, aoedev->netdev->ll_addr, sizeof ( abft->mac ) );
+
+ return 0;
+}
+
+/** AoE device ATA interface operations */
+static struct interface_operation aoedev_ata_op[] = {
+ INTF_OP ( ata_command, struct aoe_device *, aoedev_ata_command ),
+ INTF_OP ( xfer_window, struct aoe_device *, aoedev_window ),
+ INTF_OP ( intf_close, struct aoe_device *, aoedev_close ),
+ INTF_OP ( acpi_describe, struct aoe_device *, aoedev_describe ),
+ INTF_OP ( identify_device, struct aoe_device *,
+ aoedev_identify_device ),
+};
+
+/** AoE device ATA interface descriptor */
+static struct interface_descriptor aoedev_ata_desc =
+ INTF_DESC ( struct aoe_device, ata, aoedev_ata_op );
+
+/** AoE device configuration interface operations */
+static struct interface_operation aoedev_config_op[] = {
+ INTF_OP ( intf_close, struct aoe_device *, aoedev_config_done ),
+};
+
+/** AoE device configuration interface descriptor */
+static struct interface_descriptor aoedev_config_desc =
+ INTF_DESC ( struct aoe_device, config, aoedev_config_op );
+
+/**
+ * Open AoE device
+ *
+ * @v parent Parent interface
+ * @v netdev Network device
+ * @v major Device major number
+ * @v minor Device minor number
+ * @ret rc Return status code
+ */
+static int aoedev_open ( struct interface *parent, struct net_device *netdev,
+ unsigned int major, unsigned int minor ) {
+ struct aoe_device *aoedev;
+ int rc;
+
+ /* Allocate and initialise structure */
+ aoedev = zalloc ( sizeof ( *aoedev ) );
+ if ( ! aoedev ) {
+ rc = -ENOMEM;
+ goto err_zalloc;
+ }
+ ref_init ( &aoedev->refcnt, aoedev_free );
+ intf_init ( &aoedev->ata, &aoedev_ata_desc, &aoedev->refcnt );
+ intf_init ( &aoedev->config, &aoedev_config_desc, &aoedev->refcnt );
+ aoedev->netdev = netdev_get ( netdev );
+ aoedev->major = major;
+ aoedev->minor = minor;
+ memcpy ( aoedev->target, netdev->ll_broadcast,
+ netdev->ll_protocol->ll_addr_len );
+
+ /* Initiate configuration */
+ if ( ( rc = aoedev_cfg_command ( aoedev, &aoedev->config ) ) < 0 ) {
+ DBGC ( aoedev, "AoE %s could not initiate configuration: %s\n",
+ aoedev_name ( aoedev ), strerror ( rc ) );
+ goto err_config;
+ }
+
+ /* Attach ATA device to parent interface */
+ if ( ( rc = ata_open ( parent, &aoedev->ata, ATA_DEV_MASTER,
+ AOE_MAX_COUNT ) ) != 0 ) {
+ DBGC ( aoedev, "AoE %s could not create ATA device: %s\n",
+ aoedev_name ( aoedev ), strerror ( rc ) );
+ goto err_ata_open;
+ }
+
+ /* Mortalise self and return */
+ ref_put ( &aoedev->refcnt );
+ return 0;
+
+ err_ata_open:
+ err_config:
+ aoedev_close ( aoedev, rc );
+ ref_put ( &aoedev->refcnt );
+ err_zalloc:
+ return rc;
+}
+
+/******************************************************************************
+ *
+ * AoE network protocol
+ *
+ ******************************************************************************
+ */
+
+/**
+ * Process incoming AoE packets
+ *
+ * @v iobuf I/O buffer
+ * @v netdev Network device
+ * @v ll_dest Link-layer destination address
+ * @v ll_source Link-layer source address
+ * @v flags Packet flags
+ * @ret rc Return status code
+ */
+static int aoe_rx ( struct io_buffer *iobuf,
+ struct net_device *netdev __unused,
+ const void *ll_dest __unused,
+ const void *ll_source,
+ unsigned int flags __unused ) {
+ struct aoehdr *aoehdr = iobuf->data;
+ struct aoe_command *aoecmd;
+ int rc;
+
+ /* Sanity check */
+ if ( iob_len ( iobuf ) < sizeof ( *aoehdr ) ) {
+ DBG ( "AoE received underlength packet (%zd bytes)\n",
+ iob_len ( iobuf ) );
+ rc = -EINVAL;
+ goto err_sanity;
+ }
+ if ( ( aoehdr->ver_flags & AOE_VERSION_MASK ) != AOE_VERSION ) {
+ DBG ( "AoE received packet for unsupported protocol version "
+ "%02x\n", ( aoehdr->ver_flags & AOE_VERSION_MASK ) );
+ rc = -EPROTONOSUPPORT;
+ goto err_sanity;
+ }
+ if ( ! ( aoehdr->ver_flags & AOE_FL_RESPONSE ) ) {
+ DBG ( "AoE received request packet\n" );
+ rc = -EOPNOTSUPP;
+ goto err_sanity;
+ }
+
+ /* Demultiplex amongst active AoE commands */
+ aoecmd = aoecmd_find_tag ( ntohl ( aoehdr->tag ) );
+ if ( ! aoecmd ) {
+ DBG ( "AoE received packet for unused tag %08x\n",
+ ntohl ( aoehdr->tag ) );
+ rc = -ENOENT;
+ goto err_demux;
+ }
+
+ /* Pass received frame to command */
+ aoecmd_get ( aoecmd );
+ if ( ( rc = aoecmd_rx ( aoecmd, iob_disown ( iobuf ),
+ ll_source ) ) != 0 )
+ goto err_rx;
+
+ err_rx:
+ aoecmd_put ( aoecmd );
+ err_demux:
+ err_sanity:
+ free_iob ( iobuf );
+ return rc;
+}
+
+/** AoE protocol */
+struct net_protocol aoe_protocol __net_protocol = {
+ .name = "AoE",
+ .net_proto = htons ( ETH_P_AOE ),
+ .rx = aoe_rx,
+};
+
+/******************************************************************************
+ *
+ * AoE URIs
+ *
+ ******************************************************************************
+ */
+
+/**
+ * Parse AoE URI
+ *
+ * @v uri URI
+ * @ret major Major device number
+ * @ret minor Minor device number
+ * @ret rc Return status code
+ *
+ * An AoE URI has the form "aoe:e<major>.<minor>".
+ */
+static int aoe_parse_uri ( struct uri *uri, unsigned int *major,
+ unsigned int *minor ) {
+ const char *ptr;
+ char *end;
+
+ /* Check for URI with opaque portion */
+ if ( ! uri->opaque )
+ return -EINVAL;
+ ptr = uri->opaque;
+
+ /* Check for initial 'e' */
+ if ( *ptr != 'e' )
+ return -EINVAL;
+ ptr++;
+
+ /* Parse major device number */
+ *major = strtoul ( ptr, &end, 10 );
+ if ( *end != '.' )
+ return -EINVAL;
+ ptr = ( end + 1 );
+
+ /* Parse minor device number */
+ *minor = strtoul ( ptr, &end, 10 );
+ if ( *end )
+ return -EINVAL;
+
+ return 0;
+}
+
+/**
+ * Open AoE URI
+ *
+ * @v parent Parent interface
+ * @v uri URI
+ * @ret rc Return status code
+ */
+static int aoe_open ( struct interface *parent, struct uri *uri ) {
+ struct net_device *netdev;
+ unsigned int major;
+ unsigned int minor;
+ int rc;
+
+ /* Identify network device. This is something of a hack, but
+ * the AoE URI scheme that has been in use for some time now
+ * provides no way to specify a particular device.
+ */
+ netdev = last_opened_netdev();
+ if ( ! netdev ) {
+ DBG ( "AoE cannot identify network device\n" );
+ return -ENODEV;
+ }
+
+ /* Parse URI */
+ if ( ( rc = aoe_parse_uri ( uri, &major, &minor ) ) != 0 ) {
+ DBG ( "AoE cannot parse URI\n" );
+ return rc;
+ }
+
+ /* Open AoE device */
+ if ( ( rc = aoedev_open ( parent, netdev, major, minor ) ) != 0 )
+ return rc;
+
+ return 0;
+}
+
+/** AoE URI opener */
+struct uri_opener aoe_uri_opener __uri_opener = {
+ .scheme = "aoe",
+ .open = aoe_open,
+};
diff --git a/qemu/roms/ipxe/src/net/arp.c b/qemu/roms/ipxe/src/net/arp.c
new file mode 100644
index 000000000..261e681e1
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/arp.c
@@ -0,0 +1,220 @@
+/*
+ * Copyright (C) 2006 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <byteswap.h>
+#include <errno.h>
+#include <ipxe/if_ether.h>
+#include <ipxe/if_arp.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/netdevice.h>
+#include <ipxe/neighbour.h>
+#include <ipxe/arp.h>
+
+/** @file
+ *
+ * Address Resolution Protocol
+ *
+ * This file implements the address resolution protocol as defined in
+ * RFC826. The implementation is media-independent and
+ * protocol-independent; it is not limited to Ethernet or to IPv4.
+ *
+ */
+
+struct net_protocol arp_protocol __net_protocol;
+
+/**
+ * Transmit ARP request
+ *
+ * @v netdev Network device
+ * @v net_protocol Network-layer protocol
+ * @v net_dest Destination network-layer address
+ * @v net_source Source network-layer address
+ * @ret rc Return status code
+ */
+static int arp_tx_request ( struct net_device *netdev,
+ struct net_protocol *net_protocol,
+ const void *net_dest, const void *net_source ) {
+ struct ll_protocol *ll_protocol = netdev->ll_protocol;
+ struct io_buffer *iobuf;
+ struct arphdr *arphdr;
+ int rc;
+
+ /* Allocate ARP packet */
+ iobuf = alloc_iob ( MAX_LL_HEADER_LEN + sizeof ( *arphdr ) +
+ ( 2 * ( MAX_LL_ADDR_LEN + MAX_NET_ADDR_LEN ) ) );
+ if ( ! iobuf )
+ return -ENOMEM;
+ iob_reserve ( iobuf, MAX_LL_HEADER_LEN );
+
+ /* Build up ARP request */
+ arphdr = iob_put ( iobuf, sizeof ( *arphdr ) );
+ arphdr->ar_hrd = ll_protocol->ll_proto;
+ arphdr->ar_hln = ll_protocol->ll_addr_len;
+ arphdr->ar_pro = net_protocol->net_proto;
+ arphdr->ar_pln = net_protocol->net_addr_len;
+ arphdr->ar_op = htons ( ARPOP_REQUEST );
+ memcpy ( iob_put ( iobuf, ll_protocol->ll_addr_len ),
+ netdev->ll_addr, ll_protocol->ll_addr_len );
+ memcpy ( iob_put ( iobuf, net_protocol->net_addr_len ),
+ net_source, net_protocol->net_addr_len );
+ memset ( iob_put ( iobuf, ll_protocol->ll_addr_len ),
+ 0, ll_protocol->ll_addr_len );
+ memcpy ( iob_put ( iobuf, net_protocol->net_addr_len ),
+ net_dest, net_protocol->net_addr_len );
+
+ /* Transmit ARP request */
+ if ( ( rc = net_tx ( iobuf, netdev, &arp_protocol,
+ netdev->ll_broadcast, netdev->ll_addr ) ) != 0 ) {
+ DBGC ( netdev, "ARP %s %s %s could not transmit request: %s\n",
+ netdev->name, net_protocol->name,
+ net_protocol->ntoa ( net_dest ), strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/** ARP neighbour discovery protocol */
+struct neighbour_discovery arp_discovery = {
+ .name = "ARP",
+ .tx_request = arp_tx_request,
+};
+
+/**
+ * Identify ARP protocol
+ *
+ * @v net_proto Network-layer protocol, in network-endian order
+ * @ret arp_net_protocol ARP protocol, or NULL
+ *
+ */
+static struct arp_net_protocol * arp_find_protocol ( uint16_t net_proto ) {
+ struct arp_net_protocol *arp_net_protocol;
+
+ for_each_table_entry ( arp_net_protocol, ARP_NET_PROTOCOLS ) {
+ if ( arp_net_protocol->net_protocol->net_proto == net_proto )
+ return arp_net_protocol;
+ }
+ return NULL;
+}
+
+/**
+ * Process incoming ARP packets
+ *
+ * @v iobuf I/O buffer
+ * @v netdev Network device
+ * @v ll_source Link-layer source address
+ * @v flags Packet flags
+ * @ret rc Return status code
+ */
+static int arp_rx ( struct io_buffer *iobuf, struct net_device *netdev,
+ const void *ll_dest __unused,
+ const void *ll_source __unused,
+ unsigned int flags __unused ) {
+ struct arphdr *arphdr = iobuf->data;
+ struct arp_net_protocol *arp_net_protocol;
+ struct net_protocol *net_protocol;
+ struct ll_protocol *ll_protocol;
+ int rc;
+
+ /* Identify network-layer and link-layer protocols */
+ arp_net_protocol = arp_find_protocol ( arphdr->ar_pro );
+ if ( ! arp_net_protocol ) {
+ rc = -EPROTONOSUPPORT;
+ goto done;
+ }
+ net_protocol = arp_net_protocol->net_protocol;
+ ll_protocol = netdev->ll_protocol;
+
+ /* Sanity checks */
+ if ( ( arphdr->ar_hrd != ll_protocol->ll_proto ) ||
+ ( arphdr->ar_hln != ll_protocol->ll_addr_len ) ||
+ ( arphdr->ar_pln != net_protocol->net_addr_len ) ) {
+ rc = -EINVAL;
+ goto done;
+ }
+
+ /* Update neighbour cache entry for this sender, if any */
+ neighbour_update ( netdev, net_protocol, arp_sender_pa ( arphdr ),
+ arp_sender_ha ( arphdr ) );
+
+ /* If it's not a request, there's nothing more to do */
+ if ( arphdr->ar_op != htons ( ARPOP_REQUEST ) ) {
+ rc = 0;
+ goto done;
+ }
+
+ /* See if we own the target protocol address */
+ if ( arp_net_protocol->check ( netdev, arp_target_pa ( arphdr ) ) != 0){
+ rc = 0;
+ goto done;
+ }
+
+ /* Change request to a reply */
+ DBGC2 ( netdev, "ARP %s %s %s reply => %s %s\n",
+ netdev->name, net_protocol->name,
+ net_protocol->ntoa ( arp_target_pa ( arphdr ) ),
+ ll_protocol->name, ll_protocol->ntoa ( netdev->ll_addr ) );
+ arphdr->ar_op = htons ( ARPOP_REPLY );
+ memswap ( arp_sender_ha ( arphdr ), arp_target_ha ( arphdr ),
+ arphdr->ar_hln + arphdr->ar_pln );
+ memcpy ( arp_sender_ha ( arphdr ), netdev->ll_addr, arphdr->ar_hln );
+
+ /* Send reply */
+ if ( ( rc = net_tx ( iob_disown ( iobuf ), netdev, &arp_protocol,
+ arp_target_ha ( arphdr ),
+ netdev->ll_addr ) ) != 0 ) {
+ DBGC ( netdev, "ARP %s %s %s could not transmit reply: %s\n",
+ netdev->name, net_protocol->name,
+ net_protocol->ntoa ( arp_target_pa ( arphdr ) ),
+ strerror ( rc ) );
+ goto done;
+ }
+
+ /* Success */
+ rc = 0;
+
+ done:
+ free_iob ( iobuf );
+ return rc;
+}
+
+/**
+ * Transcribe ARP address
+ *
+ * @v net_addr ARP address
+ * @ret string "<ARP>"
+ *
+ * This operation is meaningless for the ARP protocol.
+ */
+static const char * arp_ntoa ( const void *net_addr __unused ) {
+ return "<ARP>";
+}
+
+/** ARP network protocol */
+struct net_protocol arp_protocol __net_protocol = {
+ .name = "ARP",
+ .net_proto = htons ( ETH_P_ARP ),
+ .rx = arp_rx,
+ .ntoa = arp_ntoa,
+};
diff --git a/qemu/roms/ipxe/src/net/dhcpopts.c b/qemu/roms/ipxe/src/net/dhcpopts.c
new file mode 100644
index 000000000..8cd19cf80
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/dhcpopts.c
@@ -0,0 +1,461 @@
+/*
+ * Copyright (C) 2008 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <ipxe/dhcp.h>
+#include <ipxe/dhcpopts.h>
+
+/** @file
+ *
+ * DHCP options
+ *
+ */
+
+/**
+ * Obtain printable version of a DHCP option tag
+ *
+ * @v tag DHCP option tag
+ * @ret name String representation of the tag
+ *
+ */
+static inline char * dhcp_tag_name ( unsigned int tag ) {
+ static char name[8];
+
+ if ( DHCP_IS_ENCAP_OPT ( tag ) ) {
+ snprintf ( name, sizeof ( name ), "%d.%d",
+ DHCP_ENCAPSULATOR ( tag ),
+ DHCP_ENCAPSULATED ( tag ) );
+ } else {
+ snprintf ( name, sizeof ( name ), "%d", tag );
+ }
+ return name;
+}
+
+/**
+ * Get pointer to DHCP option
+ *
+ * @v options DHCP options block
+ * @v offset Offset within options block
+ * @ret option DHCP option
+ */
+static inline __attribute__ (( always_inline )) struct dhcp_option *
+dhcp_option ( struct dhcp_options *options, unsigned int offset ) {
+ return ( ( struct dhcp_option * ) ( options->data + offset ) );
+}
+
+/**
+ * Get offset of a DHCP option
+ *
+ * @v options DHCP options block
+ * @v option DHCP option
+ * @ret offset Offset within options block
+ */
+static inline __attribute__ (( always_inline )) int
+dhcp_option_offset ( struct dhcp_options *options,
+ struct dhcp_option *option ) {
+ return ( ( ( void * ) option ) - options->data );
+}
+
+/**
+ * Calculate length of any DHCP option
+ *
+ * @v option DHCP option
+ * @ret len Length (including tag and length field)
+ */
+static unsigned int dhcp_option_len ( struct dhcp_option *option ) {
+ if ( ( option->tag == DHCP_END ) || ( option->tag == DHCP_PAD ) ) {
+ return 1;
+ } else {
+ return ( option->len + DHCP_OPTION_HEADER_LEN );
+ }
+}
+
+/**
+ * Find DHCP option within DHCP options block, and its encapsulator (if any)
+ *
+ * @v options DHCP options block
+ * @v tag DHCP option tag to search for
+ * @ret encap_offset Offset of encapsulating DHCP option
+ * @ret offset Offset of DHCP option, or negative error
+ *
+ * Searches for the DHCP option matching the specified tag within the
+ * DHCP option block. Encapsulated options may be searched for by
+ * using DHCP_ENCAP_OPT() to construct the tag value.
+ *
+ * If the option is encapsulated, and @c encap_offset is non-NULL, it
+ * will be filled in with the offset of the encapsulating option.
+ *
+ * This routine is designed to be paranoid. It does not assume that
+ * the option data is well-formatted, and so must guard against flaws
+ * such as options missing a @c DHCP_END terminator, or options whose
+ * length would take them beyond the end of the data block.
+ */
+static int find_dhcp_option_with_encap ( struct dhcp_options *options,
+ unsigned int tag,
+ int *encap_offset ) {
+ unsigned int original_tag __attribute__ (( unused )) = tag;
+ struct dhcp_option *option;
+ int offset = 0;
+ ssize_t remaining = options->used_len;
+ unsigned int option_len;
+
+ /* Sanity check */
+ if ( tag == DHCP_PAD )
+ return -ENOENT;
+
+ /* Search for option */
+ while ( remaining ) {
+ /* Calculate length of this option. Abort processing
+ * if the length is malformed (i.e. takes us beyond
+ * the end of the data block).
+ */
+ option = dhcp_option ( options, offset );
+ option_len = dhcp_option_len ( option );
+ remaining -= option_len;
+ if ( remaining < 0 )
+ break;
+ /* Check for explicit end marker */
+ if ( option->tag == DHCP_END ) {
+ if ( tag == DHCP_END )
+ /* Special case where the caller is interested
+ * in whether we have this marker or not.
+ */
+ return offset;
+ else
+ break;
+ }
+ /* Check for matching tag */
+ if ( option->tag == tag ) {
+ DBGC ( options, "DHCPOPT %p found %s (length %d)\n",
+ options, dhcp_tag_name ( original_tag ),
+ option_len );
+ return offset;
+ }
+ /* Check for start of matching encapsulation block */
+ if ( DHCP_IS_ENCAP_OPT ( tag ) &&
+ ( option->tag == DHCP_ENCAPSULATOR ( tag ) ) ) {
+ if ( encap_offset )
+ *encap_offset = offset;
+ /* Continue search within encapsulated option block */
+ tag = DHCP_ENCAPSULATED ( tag );
+ remaining = option_len;
+ offset += DHCP_OPTION_HEADER_LEN;
+ continue;
+ }
+ offset += option_len;
+ }
+
+ return -ENOENT;
+}
+
+/**
+ * Refuse to reallocate DHCP option block
+ *
+ * @v options DHCP option block
+ * @v len New length
+ * @ret rc Return status code
+ */
+int dhcpopt_no_realloc ( struct dhcp_options *options, size_t len ) {
+ return ( ( len <= options->alloc_len ) ? 0 : -ENOSPC );
+}
+
+/**
+ * Resize a DHCP option
+ *
+ * @v options DHCP option block
+ * @v offset Offset of option to resize
+ * @v encap_offset Offset of encapsulating offset (or -ve for none)
+ * @v old_len Old length (including header)
+ * @v new_len New length (including header)
+ * @ret rc Return status code
+ */
+static int resize_dhcp_option ( struct dhcp_options *options,
+ int offset, int encap_offset,
+ size_t old_len, size_t new_len ) {
+ struct dhcp_option *encapsulator;
+ struct dhcp_option *option;
+ ssize_t delta = ( new_len - old_len );
+ size_t old_alloc_len;
+ size_t new_used_len;
+ size_t new_encapsulator_len;
+ void *source;
+ void *dest;
+ int rc;
+
+ /* Check for sufficient space */
+ if ( new_len > DHCP_MAX_LEN ) {
+ DBGC ( options, "DHCPOPT %p overlength option\n", options );
+ return -ENOSPC;
+ }
+ new_used_len = ( options->used_len + delta );
+
+ /* Expand options block, if necessary */
+ if ( new_used_len > options->alloc_len ) {
+ /* Reallocate options block */
+ old_alloc_len = options->alloc_len;
+ if ( ( rc = options->realloc ( options, new_used_len ) ) != 0 ){
+ DBGC ( options, "DHCPOPT %p could not reallocate to "
+ "%zd bytes\n", options, new_used_len );
+ return rc;
+ }
+ /* Clear newly allocated space */
+ memset ( ( options->data + old_alloc_len ), 0,
+ ( options->alloc_len - old_alloc_len ) );
+ }
+
+ /* Update encapsulator, if applicable */
+ if ( encap_offset >= 0 ) {
+ encapsulator = dhcp_option ( options, encap_offset );
+ new_encapsulator_len = ( encapsulator->len + delta );
+ if ( new_encapsulator_len > DHCP_MAX_LEN ) {
+ DBGC ( options, "DHCPOPT %p overlength encapsulator\n",
+ options );
+ return -ENOSPC;
+ }
+ encapsulator->len = new_encapsulator_len;
+ }
+
+ /* Update used length */
+ options->used_len = new_used_len;
+
+ /* Move remainder of option data */
+ option = dhcp_option ( options, offset );
+ source = ( ( ( void * ) option ) + old_len );
+ dest = ( ( ( void * ) option ) + new_len );
+ memmove ( dest, source, ( new_used_len - offset - new_len ) );
+
+ /* Shrink options block, if applicable */
+ if ( new_used_len < options->alloc_len ) {
+ if ( ( rc = options->realloc ( options, new_used_len ) ) != 0 ){
+ DBGC ( options, "DHCPOPT %p could not reallocate to "
+ "%zd bytes\n", options, new_used_len );
+ return rc;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * Set value of DHCP option
+ *
+ * @v options DHCP option block
+ * @v tag DHCP option tag
+ * @v data New value for DHCP option
+ * @v len Length of value, in bytes
+ * @ret offset Offset of DHCP option, or negative error
+ *
+ * Sets the value of a DHCP option within the options block. The
+ * option may or may not already exist. Encapsulators will be created
+ * (and deleted) as necessary.
+ *
+ * This call may fail due to insufficient space in the options block.
+ * If it does fail, and the option existed previously, the option will
+ * be left with its original value.
+ */
+static int set_dhcp_option ( struct dhcp_options *options, unsigned int tag,
+ const void *data, size_t len ) {
+ static const uint8_t empty_encap[] = { DHCP_END };
+ int offset;
+ int encap_offset = -1;
+ int creation_offset;
+ struct dhcp_option *option;
+ unsigned int encap_tag = DHCP_ENCAPSULATOR ( tag );
+ size_t old_len = 0;
+ size_t new_len = ( len ? ( len + DHCP_OPTION_HEADER_LEN ) : 0 );
+ int rc;
+
+ /* Sanity check */
+ if ( tag == DHCP_PAD )
+ return -ENOTTY;
+
+ creation_offset = find_dhcp_option_with_encap ( options, DHCP_END,
+ NULL );
+ if ( creation_offset < 0 )
+ creation_offset = options->used_len;
+ /* Find old instance of this option, if any */
+ offset = find_dhcp_option_with_encap ( options, tag, &encap_offset );
+ if ( offset >= 0 ) {
+ old_len = dhcp_option_len ( dhcp_option ( options, offset ) );
+ DBGC ( options, "DHCPOPT %p resizing %s from %zd to %zd\n",
+ options, dhcp_tag_name ( tag ), old_len, new_len );
+ } else {
+ DBGC ( options, "DHCPOPT %p creating %s (length %zd)\n",
+ options, dhcp_tag_name ( tag ), new_len );
+ }
+
+ /* Ensure that encapsulator exists, if required */
+ if ( encap_tag ) {
+ if ( encap_offset < 0 ) {
+ encap_offset =
+ set_dhcp_option ( options, encap_tag,
+ empty_encap,
+ sizeof ( empty_encap ) );
+ }
+ if ( encap_offset < 0 )
+ return encap_offset;
+ creation_offset = ( encap_offset + DHCP_OPTION_HEADER_LEN );
+ }
+
+ /* Create new option if necessary */
+ if ( offset < 0 )
+ offset = creation_offset;
+
+ /* Resize option to fit new data */
+ if ( ( rc = resize_dhcp_option ( options, offset, encap_offset,
+ old_len, new_len ) ) != 0 )
+ return rc;
+
+ /* Copy new data into option, if applicable */
+ if ( len ) {
+ option = dhcp_option ( options, offset );
+ option->tag = tag;
+ option->len = len;
+ memcpy ( &option->data, data, len );
+ }
+
+ /* Delete encapsulator if there's nothing else left in it */
+ if ( encap_offset >= 0 ) {
+ option = dhcp_option ( options, encap_offset );
+ if ( option->len <= 1 )
+ set_dhcp_option ( options, encap_tag, NULL, 0 );
+ }
+
+ return offset;
+}
+
+/**
+ * Check applicability of DHCP option setting
+ *
+ * @v tag Setting tag number
+ * @ret applies Setting applies to this option block
+ */
+int dhcpopt_applies ( unsigned int tag ) {
+
+ return ( tag && ( tag <= DHCP_ENCAP_OPT ( DHCP_MAX_OPTION,
+ DHCP_MAX_OPTION ) ) );
+}
+
+/**
+ * Store value of DHCP option setting
+ *
+ * @v options DHCP option block
+ * @v tag Setting tag number
+ * @v data Setting data, or NULL to clear setting
+ * @v len Length of setting data
+ * @ret rc Return status code
+ */
+int dhcpopt_store ( struct dhcp_options *options, unsigned int tag,
+ const void *data, size_t len ) {
+ int offset;
+
+ offset = set_dhcp_option ( options, tag, data, len );
+ if ( offset < 0 )
+ return offset;
+ return 0;
+}
+
+/**
+ * Fetch value of DHCP option setting
+ *
+ * @v options DHCP option block
+ * @v tag Setting tag number
+ * @v data Buffer to fill with setting data
+ * @v len Length of buffer
+ * @ret len Length of setting data, or negative error
+ */
+int dhcpopt_fetch ( struct dhcp_options *options, unsigned int tag,
+ void *data, size_t len ) {
+ int offset;
+ struct dhcp_option *option;
+ size_t option_len;
+
+ offset = find_dhcp_option_with_encap ( options, tag, NULL );
+ if ( offset < 0 )
+ return offset;
+
+ option = dhcp_option ( options, offset );
+ option_len = option->len;
+ if ( len > option_len )
+ len = option_len;
+ memcpy ( data, option->data, len );
+
+ return option_len;
+}
+
+/**
+ * Recalculate length of DHCP options block
+ *
+ * @v options Uninitialised DHCP option block
+ *
+ * The "used length" field will be updated based on scanning through
+ * the block to find the end of the options.
+ */
+void dhcpopt_update_used_len ( struct dhcp_options *options ) {
+ struct dhcp_option *option;
+ int offset = 0;
+ ssize_t remaining = options->alloc_len;
+ unsigned int option_len;
+
+ /* Find last non-pad option */
+ options->used_len = 0;
+ while ( remaining ) {
+ option = dhcp_option ( options, offset );
+ option_len = dhcp_option_len ( option );
+ remaining -= option_len;
+ if ( remaining < 0 )
+ break;
+ offset += option_len;
+ if ( option->tag != DHCP_PAD )
+ options->used_len = offset;
+ }
+}
+
+/**
+ * Initialise prepopulated block of DHCP options
+ *
+ * @v options Uninitialised DHCP option block
+ * @v data Memory for DHCP option data
+ * @v alloc_len Length of memory for DHCP option data
+ * @v realloc DHCP option block reallocator
+ *
+ * The memory content must already be filled with valid DHCP options.
+ * A zeroed block counts as a block of valid DHCP options.
+ */
+void dhcpopt_init ( struct dhcp_options *options, void *data, size_t alloc_len,
+ int ( * realloc ) ( struct dhcp_options *options,
+ size_t len ) ) {
+
+ /* Fill in fields */
+ options->data = data;
+ options->alloc_len = alloc_len;
+ options->realloc = realloc;
+
+ /* Update length */
+ dhcpopt_update_used_len ( options );
+
+ DBGC ( options, "DHCPOPT %p created (data %p lengths %#zx,%#zx)\n",
+ options, options->data, options->used_len, options->alloc_len );
+}
diff --git a/qemu/roms/ipxe/src/net/dhcppkt.c b/qemu/roms/ipxe/src/net/dhcppkt.c
new file mode 100644
index 000000000..a9a6d3a94
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/dhcppkt.c
@@ -0,0 +1,305 @@
+/*
+ * Copyright (C) 2008 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <ipxe/netdevice.h>
+#include <ipxe/dhcp.h>
+#include <ipxe/dhcpopts.h>
+#include <ipxe/dhcppkt.h>
+
+/** @file
+ *
+ * DHCP packets
+ *
+ */
+
+/****************************************************************************
+ *
+ * DHCP packet raw interface
+ *
+ */
+
+/**
+ * Calculate used length of an IPv4 field within a DHCP packet
+ *
+ * @v data Field data
+ * @v len Length of field
+ * @ret used Used length of field
+ */
+static size_t used_len_ipv4 ( const void *data, size_t len __unused ) {
+ const struct in_addr *in = data;
+
+ return ( in->s_addr ? sizeof ( *in ) : 0 );
+}
+
+/**
+ * Calculate used length of a string field within a DHCP packet
+ *
+ * @v data Field data
+ * @v len Length of field
+ * @ret used Used length of field
+ */
+static size_t used_len_string ( const void *data, size_t len ) {
+ return strnlen ( data, len );
+}
+
+/** A dedicated field within a DHCP packet */
+struct dhcp_packet_field {
+ /** Settings tag number */
+ unsigned int tag;
+ /** Offset within DHCP packet */
+ uint16_t offset;
+ /** Length of field */
+ uint16_t len;
+ /** Calculate used length of field
+ *
+ * @v data Field data
+ * @v len Length of field
+ * @ret used Used length of field
+ */
+ size_t ( * used_len ) ( const void *data, size_t len );
+};
+
+/** Declare a dedicated field within a DHCP packet
+ *
+ * @v _tag Settings tag number
+ * @v _field Field name
+ * @v _used_len Function to calculate used length of field
+ */
+#define DHCP_PACKET_FIELD( _tag, _field, _used_len ) { \
+ .tag = (_tag), \
+ .offset = offsetof ( struct dhcphdr, _field ), \
+ .len = sizeof ( ( ( struct dhcphdr * ) 0 )->_field ), \
+ .used_len = _used_len, \
+ }
+
+/** Dedicated fields within a DHCP packet */
+static struct dhcp_packet_field dhcp_packet_fields[] = {
+ DHCP_PACKET_FIELD ( DHCP_EB_YIADDR, yiaddr, used_len_ipv4 ),
+ DHCP_PACKET_FIELD ( DHCP_EB_SIADDR, siaddr, used_len_ipv4 ),
+ DHCP_PACKET_FIELD ( DHCP_TFTP_SERVER_NAME, sname, used_len_string ),
+ DHCP_PACKET_FIELD ( DHCP_BOOTFILE_NAME, file, used_len_string ),
+};
+
+/**
+ * Get address of a DHCP packet field
+ *
+ * @v dhcphdr DHCP packet header
+ * @v field DHCP packet field
+ * @ret data Packet field data
+ */
+static inline void * dhcp_packet_field ( struct dhcphdr *dhcphdr,
+ struct dhcp_packet_field *field ) {
+ return ( ( ( void * ) dhcphdr ) + field->offset );
+}
+
+/**
+ * Find DHCP packet field corresponding to settings tag number
+ *
+ * @v tag Settings tag number
+ * @ret field DHCP packet field, or NULL
+ */
+static struct dhcp_packet_field *
+find_dhcp_packet_field ( unsigned int tag ) {
+ struct dhcp_packet_field *field;
+ unsigned int i;
+
+ for ( i = 0 ; i < ( sizeof ( dhcp_packet_fields ) /
+ sizeof ( dhcp_packet_fields[0] ) ) ; i++ ) {
+ field = &dhcp_packet_fields[i];
+ if ( field->tag == tag )
+ return field;
+ }
+ return NULL;
+}
+
+/**
+ * Check applicability of DHCP setting
+ *
+ * @v dhcppkt DHCP packet
+ * @v tag Setting tag number
+ * @ret applies Setting applies within this settings block
+ */
+static int dhcppkt_applies ( struct dhcp_packet *dhcppkt __unused,
+ unsigned int tag ) {
+
+ return dhcpopt_applies ( tag );
+}
+
+/**
+ * Store value of DHCP packet setting
+ *
+ * @v dhcppkt DHCP packet
+ * @v tag Setting tag number
+ * @v data Setting data, or NULL to clear setting
+ * @v len Length of setting data
+ * @ret rc Return status code
+ */
+int dhcppkt_store ( struct dhcp_packet *dhcppkt, unsigned int tag,
+ const void *data, size_t len ) {
+ struct dhcp_packet_field *field;
+ void *field_data;
+
+ /* If this is a special field, fill it in */
+ if ( ( field = find_dhcp_packet_field ( tag ) ) != NULL ) {
+ if ( len > field->len )
+ return -ENOSPC;
+ field_data = dhcp_packet_field ( dhcppkt->dhcphdr, field );
+ memset ( field_data, 0, field->len );
+ memcpy ( dhcp_packet_field ( dhcppkt->dhcphdr, field ),
+ data, len );
+ /* Erase any equivalent option from the options block */
+ dhcpopt_store ( &dhcppkt->options, tag, NULL, 0 );
+ return 0;
+ }
+
+ /* Otherwise, use the generic options block */
+ return dhcpopt_store ( &dhcppkt->options, tag, data, len );
+}
+
+/**
+ * Fetch value of DHCP packet setting
+ *
+ * @v dhcppkt DHCP packet
+ * @v tag Setting tag number
+ * @v data Buffer to fill with setting data
+ * @v len Length of buffer
+ * @ret len Length of setting data, or negative error
+ */
+int dhcppkt_fetch ( struct dhcp_packet *dhcppkt, unsigned int tag,
+ void *data, size_t len ) {
+ struct dhcp_packet_field *field;
+ void *field_data;
+ size_t field_len = 0;
+
+ /* Identify special field, if any */
+ if ( ( field = find_dhcp_packet_field ( tag ) ) != NULL ) {
+ field_data = dhcp_packet_field ( dhcppkt->dhcphdr, field );
+ field_len = field->used_len ( field_data, field->len );
+ }
+
+ /* Return special field, if it exists and is populated */
+ if ( field_len ) {
+ if ( len > field_len )
+ len = field_len;
+ memcpy ( data, field_data, len );
+ return field_len;
+ }
+
+ /* Otherwise, use the generic options block */
+ return dhcpopt_fetch ( &dhcppkt->options, tag, data, len );
+}
+
+/****************************************************************************
+ *
+ * DHCP packet settings interface
+ *
+ */
+
+/**
+ * Check applicability of DHCP setting
+ *
+ * @v settings Settings block
+ * @v setting Setting
+ * @ret applies Setting applies within this settings block
+ */
+static int dhcppkt_settings_applies ( struct settings *settings,
+ const struct setting *setting ) {
+ struct dhcp_packet *dhcppkt =
+ container_of ( settings, struct dhcp_packet, settings );
+
+ return ( ( setting->scope == NULL ) &&
+ dhcppkt_applies ( dhcppkt, setting->tag ) );
+}
+
+/**
+ * Store value of DHCP setting
+ *
+ * @v settings Settings block
+ * @v setting Setting to store
+ * @v data Setting data, or NULL to clear setting
+ * @v len Length of setting data
+ * @ret rc Return status code
+ */
+static int dhcppkt_settings_store ( struct settings *settings,
+ const struct setting *setting,
+ const void *data, size_t len ) {
+ struct dhcp_packet *dhcppkt =
+ container_of ( settings, struct dhcp_packet, settings );
+
+ return dhcppkt_store ( dhcppkt, setting->tag, data, len );
+}
+
+/**
+ * Fetch value of DHCP setting
+ *
+ * @v settings Settings block, or NULL to search all blocks
+ * @v setting Setting to fetch
+ * @v data Buffer to fill with setting data
+ * @v len Length of buffer
+ * @ret len Length of setting data, or negative error
+ */
+static int dhcppkt_settings_fetch ( struct settings *settings,
+ struct setting *setting,
+ void *data, size_t len ) {
+ struct dhcp_packet *dhcppkt =
+ container_of ( settings, struct dhcp_packet, settings );
+
+ return dhcppkt_fetch ( dhcppkt, setting->tag, data, len );
+}
+
+/** DHCP settings operations */
+static struct settings_operations dhcppkt_settings_operations = {
+ .applies = dhcppkt_settings_applies,
+ .store = dhcppkt_settings_store,
+ .fetch = dhcppkt_settings_fetch,
+};
+
+/****************************************************************************
+ *
+ * Constructor
+ *
+ */
+
+/**
+ * Initialise DHCP packet
+ *
+ * @v dhcppkt DHCP packet structure to fill in
+ * @v data DHCP packet raw data
+ * @v max_len Length of raw data buffer
+ *
+ * Initialise a DHCP packet structure from a data buffer containing a
+ * DHCP packet.
+ */
+void dhcppkt_init ( struct dhcp_packet *dhcppkt, struct dhcphdr *data,
+ size_t len ) {
+ ref_init ( &dhcppkt->refcnt, NULL );
+ dhcppkt->dhcphdr = data;
+ dhcpopt_init ( &dhcppkt->options, &dhcppkt->dhcphdr->options,
+ ( len - offsetof ( struct dhcphdr, options ) ),
+ dhcpopt_no_realloc );
+ settings_init ( &dhcppkt->settings, &dhcppkt_settings_operations,
+ &dhcppkt->refcnt, NULL );
+}
diff --git a/qemu/roms/ipxe/src/net/eapol.c b/qemu/roms/ipxe/src/net/eapol.c
new file mode 100644
index 000000000..eb0362994
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/eapol.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2009 Joshua Oreman <oremanj@rwcr.net>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+/** @file
+ *
+ * 802.1X Extensible Authentication Protocol over LANs demultiplexer
+ *
+ */
+
+#include <ipxe/netdevice.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/if_ether.h>
+#include <ipxe/eapol.h>
+#include <errno.h>
+#include <byteswap.h>
+
+/**
+ * Receive EAPOL network-layer packet
+ *
+ * @v iob I/O buffer
+ * @v netdev Network device
+ * @v ll_dest Link-layer destination address
+ * @v ll_source Link-layer source address
+ * @v flags Packet flags
+ *
+ * This function takes ownership of the I/O buffer passed to it.
+ */
+static int eapol_rx ( struct io_buffer *iob, struct net_device *netdev,
+ const void *ll_dest, const void *ll_source,
+ unsigned int flags __unused ) {
+ struct eapol_frame *eapol = iob->data;
+ struct eapol_handler *handler;
+
+ if ( iob_len ( iob ) < EAPOL_HDR_LEN ) {
+ free_iob ( iob );
+ return -EINVAL;
+ }
+
+ for_each_table_entry ( handler, EAPOL_HANDLERS ) {
+ if ( handler->type == eapol->type ) {
+ iob_pull ( iob, EAPOL_HDR_LEN );
+ return handler->rx ( iob, netdev, ll_dest, ll_source );
+ }
+ }
+
+ free_iob ( iob );
+ return -( ENOTSUP | ( ( eapol->type & 0x1f ) << 8 ) );
+}
+
+/**
+ * Transcribe EAPOL network-layer address
+ *
+ * @v net_addr Network-layer address
+ * @ret str String representation of network-layer address
+ *
+ * EAPOL doesn't have network-layer addresses, so we just return the
+ * string @c "<EAPOL>".
+ */
+static const char * eapol_ntoa ( const void *net_addr __unused )
+{
+ return "<EAPOL>";
+}
+
+/** EAPOL network protocol */
+struct net_protocol eapol_protocol __net_protocol = {
+ .name = "EAPOL",
+ .rx = eapol_rx,
+ .ntoa = eapol_ntoa,
+ .net_proto = htons ( ETH_P_EAPOL ),
+};
diff --git a/qemu/roms/ipxe/src/net/eth_slow.c b/qemu/roms/ipxe/src/net/eth_slow.c
new file mode 100644
index 000000000..db54b55a4
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/eth_slow.c
@@ -0,0 +1,274 @@
+/*
+ * Copyright (C) 2010 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdlib.h>
+#include <string.h>
+#include <byteswap.h>
+#include <errno.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/netdevice.h>
+#include <ipxe/if_ether.h>
+#include <ipxe/ethernet.h>
+#include <ipxe/eth_slow.h>
+
+/** @file
+ *
+ * Ethernet slow protocols
+ *
+ * We implement a very simple passive LACP entity, that pretends that
+ * each port is the only port on an individual system. We avoid the
+ * need for timeout logic (and retaining local state about our
+ * partner) by requesting the same timeout period (1s or 30s) as our
+ * partner requests, and then simply responding to every packet the
+ * partner sends us.
+ */
+
+struct net_protocol eth_slow_protocol __net_protocol;
+
+/** Slow protocols multicast address */
+static const uint8_t eth_slow_address[ETH_ALEN] =
+ { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02 };
+
+/**
+ * Name LACP TLV type
+ *
+ * @v type LACP TLV type
+ * @ret name Name of LACP TLV type
+ */
+static inline __attribute__ (( always_inline )) const char *
+eth_slow_lacp_tlv_name ( uint8_t type ) {
+ switch ( type ) {
+ case ETH_SLOW_TLV_TERMINATOR: return "terminator";
+ case ETH_SLOW_TLV_LACP_ACTOR: return "actor";
+ case ETH_SLOW_TLV_LACP_PARTNER: return "partner";
+ case ETH_SLOW_TLV_LACP_COLLECTOR: return "collector";
+ default: return "<invalid>";
+ }
+}
+
+/**
+ * Name marker TLV type
+ *
+ * @v type Marker TLV type
+ * @ret name Name of marker TLV type
+ */
+static inline __attribute__ (( always_inline )) const char *
+eth_slow_marker_tlv_name ( uint8_t type ) {
+ switch ( type ) {
+ case ETH_SLOW_TLV_TERMINATOR: return "terminator";
+ case ETH_SLOW_TLV_MARKER_REQUEST: return "request";
+ case ETH_SLOW_TLV_MARKER_RESPONSE: return "response";
+ default: return "<invalid>";
+ }
+}
+
+/**
+ * Name LACP state
+ *
+ * @v state LACP state
+ * @ret name LACP state name
+ */
+static const char * eth_slow_lacp_state_name ( uint8_t state ) {
+ static char state_chars[] = "AFGSRTLX";
+ unsigned int i;
+
+ for ( i = 0 ; i < 8 ; i++ ) {
+ state_chars[i] |= 0x20;
+ if ( state & ( 1 << i ) )
+ state_chars[i] &= ~0x20;
+ }
+ return state_chars;
+}
+
+/**
+ * Dump LACP packet
+ *
+ * @v iobuf I/O buffer
+ * @v netdev Network device
+ * @v label "RX" or "TX"
+ */
+static void eth_slow_lacp_dump ( struct io_buffer *iobuf,
+ struct net_device *netdev,
+ const char *label ) {
+ union eth_slow_packet *eth_slow = iobuf->data;
+ struct eth_slow_lacp *lacp = &eth_slow->lacp;
+
+ DBGC ( netdev,
+ "SLOW %s %s LACP actor (%04x,%s,%04x,%02x,%04x) [%s]\n",
+ netdev->name, label, ntohs ( lacp->actor.system_priority ),
+ eth_ntoa ( lacp->actor.system ),
+ ntohs ( lacp->actor.key ),
+ ntohs ( lacp->actor.port_priority ),
+ ntohs ( lacp->actor.port ),
+ eth_slow_lacp_state_name ( lacp->actor.state ) );
+ DBGC ( netdev,
+ "SLOW %s %s LACP partner (%04x,%s,%04x,%02x,%04x) [%s]\n",
+ netdev->name, label, ntohs ( lacp->partner.system_priority ),
+ eth_ntoa ( lacp->partner.system ),
+ ntohs ( lacp->partner.key ),
+ ntohs ( lacp->partner.port_priority ),
+ ntohs ( lacp->partner.port ),
+ eth_slow_lacp_state_name ( lacp->partner.state ) );
+ DBGC ( netdev, "SLOW %s %s LACP collector %04x (%d us)\n",
+ netdev->name, label, ntohs ( lacp->collector.max_delay ),
+ ( ntohs ( lacp->collector.max_delay ) * 10 ) );
+ DBGC2_HDA ( netdev, 0, iobuf->data, iob_len ( iobuf ) );
+}
+
+/**
+ * Process incoming LACP packet
+ *
+ * @v iobuf I/O buffer
+ * @v netdev Network device
+ * @ret rc Return status code
+ */
+static int eth_slow_lacp_rx ( struct io_buffer *iobuf,
+ struct net_device *netdev ) {
+ union eth_slow_packet *eth_slow = iobuf->data;
+ struct eth_slow_lacp *lacp = &eth_slow->lacp;
+
+ eth_slow_lacp_dump ( iobuf, netdev, "RX" );
+
+ /* Build response */
+ memset ( lacp->reserved, 0, sizeof ( lacp->reserved ) );
+ memset ( &lacp->terminator, 0, sizeof ( lacp->terminator ) );
+ memset ( &lacp->collector, 0, sizeof ( lacp->collector ) );
+ lacp->collector.tlv.type = ETH_SLOW_TLV_LACP_COLLECTOR;
+ lacp->collector.tlv.length = ETH_SLOW_TLV_LACP_COLLECTOR_LEN;
+ memcpy ( &lacp->partner, &lacp->actor, sizeof ( lacp->partner ) );
+ lacp->partner.tlv.type = ETH_SLOW_TLV_LACP_PARTNER;
+ lacp->partner.tlv.length = ETH_SLOW_TLV_LACP_PARTNER_LEN;
+ memset ( &lacp->partner.reserved, 0,
+ sizeof ( lacp->partner.reserved ) );
+ memset ( &lacp->actor, 0, sizeof ( lacp->actor ) );
+ lacp->actor.tlv.type = ETH_SLOW_TLV_LACP_ACTOR;
+ lacp->actor.tlv.length = ETH_SLOW_TLV_LACP_ACTOR_LEN;
+ lacp->actor.system_priority = htons ( LACP_SYSTEM_PRIORITY_MAX );
+ memcpy ( lacp->actor.system, netdev->ll_addr,
+ sizeof ( lacp->actor.system ) );
+ lacp->actor.key = htons ( 1 );
+ lacp->actor.port_priority = htons ( LACP_PORT_PRIORITY_MAX );
+ lacp->actor.port = htons ( 1 );
+ lacp->actor.state = ( LACP_STATE_AGGREGATABLE |
+ LACP_STATE_IN_SYNC |
+ LACP_STATE_COLLECTING |
+ LACP_STATE_DISTRIBUTING |
+ ( lacp->partner.state & LACP_STATE_FAST ) );
+ lacp->header.version = ETH_SLOW_LACP_VERSION;
+
+ /* Send response */
+ eth_slow_lacp_dump ( iobuf, netdev, "TX" );
+ return net_tx ( iobuf, netdev, &eth_slow_protocol, eth_slow_address,
+ netdev->ll_addr );
+}
+
+/**
+ * Dump marker packet
+ *
+ * @v iobuf I/O buffer
+ * @v netdev Network device
+ * @v label "RX" or "TX"
+ */
+static void eth_slow_marker_dump ( struct io_buffer *iobuf,
+ struct net_device *netdev,
+ const char *label ) {
+ union eth_slow_packet *eth_slow = iobuf->data;
+ struct eth_slow_marker *marker = &eth_slow->marker;
+
+ DBGC ( netdev, "SLOW %s %s marker %s port %04x system %s xact %08x\n",
+ netdev->name, label,
+ eth_slow_marker_tlv_name ( marker->marker.tlv.type ),
+ ntohs ( marker->marker.port ),
+ eth_ntoa ( marker->marker.system ),
+ ntohl ( marker->marker.xact ) );
+ DBGC2_HDA ( netdev, 0, iobuf->data, iob_len ( iobuf ) );
+}
+
+/**
+ * Process incoming marker packet
+ *
+ * @v iobuf I/O buffer
+ * @v netdev Network device
+ * @ret rc Return status code
+ */
+static int eth_slow_marker_rx ( struct io_buffer *iobuf,
+ struct net_device *netdev ) {
+ union eth_slow_packet *eth_slow = iobuf->data;
+ struct eth_slow_marker *marker = &eth_slow->marker;
+
+ eth_slow_marker_dump ( iobuf, netdev, "RX" );
+
+ if ( marker->marker.tlv.type == ETH_SLOW_TLV_MARKER_REQUEST ) {
+ /* Send marker response */
+ marker->marker.tlv.type = ETH_SLOW_TLV_MARKER_RESPONSE;
+ eth_slow_marker_dump ( iobuf, netdev, "TX" );
+ return net_tx ( iobuf, netdev, &eth_slow_protocol,
+ eth_slow_address, netdev->ll_addr );
+ } else {
+ /* Discard all other marker packets */
+ free_iob ( iobuf );
+ return -EINVAL;
+ }
+}
+
+/**
+ * Process incoming slow packet
+ *
+ * @v iobuf I/O buffer
+ * @v netdev Network device
+ * @v ll_dest Link-layer destination address
+ * @v ll_source Link-layer source address
+ * @v flags Packet flags
+ * @ret rc Return status code
+ */
+static int eth_slow_rx ( struct io_buffer *iobuf,
+ struct net_device *netdev,
+ const void *ll_dest __unused,
+ const void *ll_source __unused,
+ unsigned int flags __unused ) {
+ union eth_slow_packet *eth_slow = iobuf->data;
+
+ /* Sanity checks */
+ if ( iob_len ( iobuf ) < sizeof ( *eth_slow ) ) {
+ free_iob ( iobuf );
+ return -EINVAL;
+ }
+
+ /* Handle according to subtype */
+ switch ( eth_slow->header.subtype ) {
+ case ETH_SLOW_SUBTYPE_LACP:
+ return eth_slow_lacp_rx ( iobuf, netdev );
+ case ETH_SLOW_SUBTYPE_MARKER:
+ return eth_slow_marker_rx ( iobuf, netdev );
+ default:
+ DBGC ( netdev, "SLOW %s RX unknown subtype %02x\n",
+ netdev->name, eth_slow->header.subtype );
+ free_iob ( iobuf );
+ return -EINVAL;
+ }
+}
+
+/** Slow protocol */
+struct net_protocol eth_slow_protocol __net_protocol = {
+ .name = "Slow",
+ .net_proto = htons ( ETH_P_SLOW ),
+ .rx = eth_slow_rx,
+};
diff --git a/qemu/roms/ipxe/src/net/ethernet.c b/qemu/roms/ipxe/src/net/ethernet.c
new file mode 100644
index 000000000..03978c2a8
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/ethernet.c
@@ -0,0 +1,239 @@
+/*
+ * Copyright (C) 2006 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <byteswap.h>
+#include <errno.h>
+#include <assert.h>
+#include <ipxe/if_arp.h>
+#include <ipxe/if_ether.h>
+#include <ipxe/in.h>
+#include <ipxe/netdevice.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/ethernet.h>
+
+/** @file
+ *
+ * Ethernet protocol
+ *
+ */
+
+/** Ethernet broadcast MAC address */
+uint8_t eth_broadcast[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+
+/**
+ * Add Ethernet link-layer header
+ *
+ * @v netdev Network device
+ * @v iobuf I/O buffer
+ * @v ll_dest Link-layer destination address
+ * @v ll_source Source link-layer address
+ * @v net_proto Network-layer protocol, in network-byte order
+ * @ret rc Return status code
+ */
+int eth_push ( struct net_device *netdev __unused, struct io_buffer *iobuf,
+ const void *ll_dest, const void *ll_source,
+ uint16_t net_proto ) {
+ struct ethhdr *ethhdr = iob_push ( iobuf, sizeof ( *ethhdr ) );
+
+ /* Build Ethernet header */
+ memcpy ( ethhdr->h_dest, ll_dest, ETH_ALEN );
+ memcpy ( ethhdr->h_source, ll_source, ETH_ALEN );
+ ethhdr->h_protocol = net_proto;
+
+ return 0;
+}
+
+/**
+ * Remove Ethernet link-layer header
+ *
+ * @v netdev Network device
+ * @v iobuf I/O buffer
+ * @ret ll_dest Link-layer destination address
+ * @ret ll_source Source link-layer address
+ * @ret net_proto Network-layer protocol, in network-byte order
+ * @ret flags Packet flags
+ * @ret rc Return status code
+ */
+int eth_pull ( struct net_device *netdev __unused, struct io_buffer *iobuf,
+ const void **ll_dest, const void **ll_source,
+ uint16_t *net_proto, unsigned int *flags ) {
+ struct ethhdr *ethhdr = iobuf->data;
+
+ /* Sanity check */
+ if ( iob_len ( iobuf ) < sizeof ( *ethhdr ) ) {
+ DBG ( "Ethernet packet too short (%zd bytes)\n",
+ iob_len ( iobuf ) );
+ return -EINVAL;
+ }
+
+ /* Strip off Ethernet header */
+ iob_pull ( iobuf, sizeof ( *ethhdr ) );
+
+ /* Fill in required fields */
+ *ll_dest = ethhdr->h_dest;
+ *ll_source = ethhdr->h_source;
+ *net_proto = ethhdr->h_protocol;
+ *flags = ( ( is_multicast_ether_addr ( ethhdr->h_dest ) ?
+ LL_MULTICAST : 0 ) |
+ ( is_broadcast_ether_addr ( ethhdr->h_dest ) ?
+ LL_BROADCAST : 0 ) );
+
+ return 0;
+}
+
+/**
+ * Initialise Ethernet address
+ *
+ * @v hw_addr Hardware address
+ * @v ll_addr Link-layer address
+ */
+void eth_init_addr ( const void *hw_addr, void *ll_addr ) {
+ memcpy ( ll_addr, hw_addr, ETH_ALEN );
+}
+
+/**
+ * Generate random Ethernet address
+ *
+ * @v hw_addr Generated hardware address
+ */
+void eth_random_addr ( void *hw_addr ) {
+ uint8_t *addr = hw_addr;
+ unsigned int i;
+
+ for ( i = 0 ; i < ETH_ALEN ; i++ )
+ addr[i] = random();
+ addr[0] &= ~0x01; /* Clear multicast bit */
+ addr[0] |= 0x02; /* Set locally-assigned bit */
+}
+
+/**
+ * Transcribe Ethernet address
+ *
+ * @v ll_addr Link-layer address
+ * @ret string Link-layer address in human-readable format
+ */
+const char * eth_ntoa ( const void *ll_addr ) {
+ static char buf[18]; /* "00:00:00:00:00:00" */
+ const uint8_t *eth_addr = ll_addr;
+
+ sprintf ( buf, "%02x:%02x:%02x:%02x:%02x:%02x",
+ eth_addr[0], eth_addr[1], eth_addr[2],
+ eth_addr[3], eth_addr[4], eth_addr[5] );
+ return buf;
+}
+
+/**
+ * Hash multicast address
+ *
+ * @v af Address family
+ * @v net_addr Network-layer address
+ * @v ll_addr Link-layer address to fill in
+ * @ret rc Return status code
+ */
+int eth_mc_hash ( unsigned int af, const void *net_addr, void *ll_addr ) {
+ const uint8_t *net_addr_bytes = net_addr;
+ uint8_t *ll_addr_bytes = ll_addr;
+
+ switch ( af ) {
+ case AF_INET:
+ ll_addr_bytes[0] = 0x01;
+ ll_addr_bytes[1] = 0x00;
+ ll_addr_bytes[2] = 0x5e;
+ ll_addr_bytes[3] = net_addr_bytes[1] & 0x7f;
+ ll_addr_bytes[4] = net_addr_bytes[2];
+ ll_addr_bytes[5] = net_addr_bytes[3];
+ return 0;
+ case AF_INET6:
+ ll_addr_bytes[0] = 0x33;
+ ll_addr_bytes[1] = 0x33;
+ memcpy ( &ll_addr_bytes[2], &net_addr_bytes[12], 4 );
+ return 0;
+ default:
+ return -ENOTSUP;
+ }
+}
+
+/**
+ * Generate Ethernet-compatible compressed link-layer address
+ *
+ * @v ll_addr Link-layer address
+ * @v eth_addr Ethernet-compatible address to fill in
+ */
+int eth_eth_addr ( const void *ll_addr, void *eth_addr ) {
+ memcpy ( eth_addr, ll_addr, ETH_ALEN );
+ return 0;
+}
+
+/**
+ * Generate EUI-64 address
+ *
+ * @v ll_addr Link-layer address
+ * @v eui64 EUI-64 address to fill in
+ * @ret rc Return status code
+ */
+int eth_eui64 ( const void *ll_addr, void *eui64 ) {
+
+ memcpy ( ( eui64 + 0 ), ( ll_addr + 0 ), 3 );
+ memcpy ( ( eui64 + 5 ), ( ll_addr + 3 ), 3 );
+ *( ( uint16_t * ) ( eui64 + 3 ) ) = htons ( 0xfffe );
+ return 0;
+}
+
+/** Ethernet protocol */
+struct ll_protocol ethernet_protocol __ll_protocol = {
+ .name = "Ethernet",
+ .ll_proto = htons ( ARPHRD_ETHER ),
+ .hw_addr_len = ETH_ALEN,
+ .ll_addr_len = ETH_ALEN,
+ .ll_header_len = ETH_HLEN,
+ .push = eth_push,
+ .pull = eth_pull,
+ .init_addr = eth_init_addr,
+ .ntoa = eth_ntoa,
+ .mc_hash = eth_mc_hash,
+ .eth_addr = eth_eth_addr,
+ .eui64 = eth_eui64,
+};
+
+/**
+ * Allocate Ethernet device
+ *
+ * @v priv_size Size of driver private data
+ * @ret netdev Network device, or NULL
+ */
+struct net_device * alloc_etherdev ( size_t priv_size ) {
+ struct net_device *netdev;
+
+ netdev = alloc_netdev ( priv_size );
+ if ( netdev ) {
+ netdev->ll_protocol = &ethernet_protocol;
+ netdev->ll_broadcast = eth_broadcast;
+ netdev->max_pkt_len = ETH_FRAME_LEN;
+ }
+ return netdev;
+}
+
+/* Drag in Ethernet slow protocols */
+REQUIRE_OBJECT ( eth_slow );
diff --git a/qemu/roms/ipxe/src/net/fakedhcp.c b/qemu/roms/ipxe/src/net/fakedhcp.c
new file mode 100644
index 000000000..3dec88b11
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/fakedhcp.c
@@ -0,0 +1,215 @@
+/*
+ * Copyright (C) 2008 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <ipxe/settings.h>
+#include <ipxe/netdevice.h>
+#include <ipxe/dhcppkt.h>
+#include <ipxe/fakedhcp.h>
+
+/** @file
+ *
+ * Fake DHCP packets
+ *
+ */
+
+/**
+ * Copy settings to DHCP packet
+ *
+ * @v dest Destination DHCP packet
+ * @v source Source settings block
+ * @v encapsulator Encapsulating setting tag number, or zero
+ * @ret rc Return status code
+ */
+static int copy_encap_settings ( struct dhcp_packet *dest,
+ struct settings *source,
+ unsigned int encapsulator ) {
+ struct setting setting = { .name = "" };
+ unsigned int subtag;
+ unsigned int tag;
+ void *data;
+ int len;
+ int rc;
+
+ for ( subtag = DHCP_MIN_OPTION; subtag <= DHCP_MAX_OPTION; subtag++ ) {
+ tag = DHCP_ENCAP_OPT ( encapsulator, subtag );
+ switch ( tag ) {
+ case DHCP_EB_ENCAP:
+ case DHCP_VENDOR_ENCAP:
+ /* Process encapsulated settings */
+ if ( ( rc = copy_encap_settings ( dest, source,
+ tag ) ) != 0 )
+ return rc;
+ break;
+ default:
+ /* Copy setting, if present */
+ setting.tag = tag;
+ len = fetch_raw_setting_copy ( source, &setting, &data);
+ if ( len >= 0 ) {
+ rc = dhcppkt_store ( dest, tag, data, len );
+ free ( data );
+ if ( rc != 0 )
+ return rc;
+ }
+ break;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * Copy settings to DHCP packet
+ *
+ * @v dest Destination DHCP packet
+ * @v source Source settings block
+ * @ret rc Return status code
+ */
+static int copy_settings ( struct dhcp_packet *dest,
+ struct settings *source ) {
+ return copy_encap_settings ( dest, source, 0 );
+}
+
+/**
+ * Create fake DHCPDISCOVER packet
+ *
+ * @v netdev Network device
+ * @v data Buffer for DHCP packet
+ * @v max_len Size of DHCP packet buffer
+ * @ret rc Return status code
+ *
+ * Used by external code.
+ */
+int create_fakedhcpdiscover ( struct net_device *netdev,
+ void *data, size_t max_len ) {
+ struct dhcp_packet dhcppkt;
+ struct in_addr ciaddr = { 0 };
+ int rc;
+
+ if ( ( rc = dhcp_create_request ( &dhcppkt, netdev, DHCPDISCOVER,
+ dhcp_last_xid, ciaddr, data,
+ max_len ) ) != 0 ) {
+ DBG ( "Could not create DHCPDISCOVER: %s\n",
+ strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Create fake DHCPACK packet
+ *
+ * @v netdev Network device
+ * @v data Buffer for DHCP packet
+ * @v max_len Size of DHCP packet buffer
+ * @ret rc Return status code
+ *
+ * Used by external code.
+ */
+int create_fakedhcpack ( struct net_device *netdev,
+ void *data, size_t max_len ) {
+ struct dhcp_packet dhcppkt;
+ int rc;
+
+ /* Create base DHCPACK packet */
+ if ( ( rc = dhcp_create_packet ( &dhcppkt, netdev, DHCPACK,
+ dhcp_last_xid, NULL, 0,
+ data, max_len ) ) != 0 ) {
+ DBG ( "Could not create DHCPACK: %s\n", strerror ( rc ) );
+ return rc;
+ }
+
+ /* Merge in globally-scoped settings, then netdev-specific
+ * settings. Do it in this order so that netdev-specific
+ * settings take precedence regardless of stated priorities.
+ */
+ if ( ( rc = copy_settings ( &dhcppkt, NULL ) ) != 0 ) {
+ DBG ( "Could not set DHCPACK global settings: %s\n",
+ strerror ( rc ) );
+ return rc;
+ }
+ if ( ( rc = copy_settings ( &dhcppkt,
+ netdev_settings ( netdev ) ) ) != 0 ) {
+ DBG ( "Could not set DHCPACK netdev settings: %s\n",
+ strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Create fake PXE Boot Server ACK packet
+ *
+ * @v netdev Network device
+ * @v data Buffer for DHCP packet
+ * @v max_len Size of DHCP packet buffer
+ * @ret rc Return status code
+ *
+ * Used by external code.
+ */
+int create_fakepxebsack ( struct net_device *netdev,
+ void *data, size_t max_len ) {
+ struct dhcp_packet dhcppkt;
+ struct settings *proxy_settings;
+ struct settings *pxebs_settings;
+ int rc;
+
+ /* Identify available settings */
+ proxy_settings = find_settings ( PROXYDHCP_SETTINGS_NAME );
+ pxebs_settings = find_settings ( PXEBS_SETTINGS_NAME );
+ if ( ( ! proxy_settings ) && ( ! pxebs_settings ) ) {
+ /* No PXE boot server; return the regular DHCPACK */
+ return create_fakedhcpack ( netdev, data, max_len );
+ }
+
+ /* Create base DHCPACK packet */
+ if ( ( rc = dhcp_create_packet ( &dhcppkt, netdev, DHCPACK,
+ dhcp_last_xid, NULL, 0,
+ data, max_len ) ) != 0 ) {
+ DBG ( "Could not create PXE BS ACK: %s\n",
+ strerror ( rc ) );
+ return rc;
+ }
+
+ /* Merge in ProxyDHCP options */
+ if ( proxy_settings &&
+ ( ( rc = copy_settings ( &dhcppkt, proxy_settings ) ) != 0 ) ) {
+ DBG ( "Could not copy ProxyDHCP settings: %s\n",
+ strerror ( rc ) );
+ return rc;
+ }
+
+ /* Merge in BootServerDHCP options, if present */
+ if ( pxebs_settings &&
+ ( ( rc = copy_settings ( &dhcppkt, pxebs_settings ) ) != 0 ) ) {
+ DBG ( "Could not copy PXE BS settings: %s\n",
+ strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
diff --git a/qemu/roms/ipxe/src/net/fc.c b/qemu/roms/ipxe/src/net/fc.c
new file mode 100644
index 000000000..58008995c
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/fc.c
@@ -0,0 +1,1937 @@
+/*
+ * Copyright (C) 2010 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <errno.h>
+#include <assert.h>
+#include <byteswap.h>
+#include <ipxe/refcnt.h>
+#include <ipxe/list.h>
+#include <ipxe/tables.h>
+#include <ipxe/timer.h>
+#include <ipxe/retry.h>
+#include <ipxe/interface.h>
+#include <ipxe/xfer.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/fc.h>
+#include <ipxe/fcels.h>
+#include <ipxe/fcns.h>
+
+/** @file
+ *
+ * Fibre Channel
+ *
+ */
+
+/** List of Fibre Channel ports */
+LIST_HEAD ( fc_ports );
+
+/** List of Fibre Channel peers */
+LIST_HEAD ( fc_peers );
+
+/******************************************************************************
+ *
+ * Well-known addresses
+ *
+ ******************************************************************************
+ */
+
+/** Unassigned port ID */
+struct fc_port_id fc_empty_port_id = { .bytes = { 0x00, 0x00, 0x00 } };
+
+/** F_Port contoller port ID */
+struct fc_port_id fc_f_port_id = { .bytes = { 0xff, 0xff, 0xfe } };
+
+/** Generic services port ID */
+struct fc_port_id fc_gs_port_id = { .bytes = { 0xff, 0xff, 0xfc } };
+
+/** Point-to-point low port ID */
+struct fc_port_id fc_ptp_low_port_id = { .bytes = { 0x01, 0x01, 0x01 } };
+
+/** Point-to-point high port ID */
+struct fc_port_id fc_ptp_high_port_id = { .bytes = { 0x01, 0x01, 0x02 } };
+
+/******************************************************************************
+ *
+ * Utility functions
+ *
+ ******************************************************************************
+ */
+
+/**
+ * Format Fibre Channel port ID
+ *
+ * @v id Fibre Channel port ID
+ * @ret id_text Port ID text
+ */
+const char * fc_id_ntoa ( const struct fc_port_id *id ) {
+ static char id_text[ FC_PORT_ID_STRLEN + 1 /* NUL */ ];
+
+ snprintf ( id_text, sizeof ( id_text ), "%02x.%02x.%02x",
+ id->bytes[0], id->bytes[1], id->bytes[2] );
+ return id_text;
+}
+
+/**
+ * Parse Fibre Channel port ID
+ *
+ * @v id_text Port ID text
+ * @ret id Fibre Channel port ID
+ * @ret rc Return status code
+ */
+int fc_id_aton ( const char *id_text, struct fc_port_id *id ) {
+ char *ptr = ( ( char * ) id_text );
+ unsigned int i = 0;
+
+ while ( 1 ) {
+ id->bytes[i++] = strtoul ( ptr, &ptr, 16 );
+ if ( i == sizeof ( id->bytes ) )
+ return ( ( *ptr == '\0' ) ? 0 : -EINVAL );
+ if ( *ptr != '.' )
+ return -EINVAL;
+ ptr++;
+ }
+}
+
+/**
+ * Format Fibre Channel WWN
+ *
+ * @v wwn Fibre Channel WWN
+ * @ret wwn_text WWN text
+ */
+const char * fc_ntoa ( const struct fc_name *wwn ) {
+ static char wwn_text[ FC_NAME_STRLEN + 1 /* NUL */ ];
+
+ snprintf ( wwn_text, sizeof ( wwn_text ),
+ "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x",
+ wwn->bytes[0], wwn->bytes[1], wwn->bytes[2], wwn->bytes[3],
+ wwn->bytes[4], wwn->bytes[5], wwn->bytes[6], wwn->bytes[7] );
+ return wwn_text;
+}
+
+/**
+ * Parse Fibre Channel WWN
+ *
+ * @v wwn_text WWN text
+ * @ret wwn Fibre Channel WWN
+ * @ret rc Return status code
+ */
+int fc_aton ( const char *wwn_text, struct fc_name *wwn ) {
+ char *ptr = ( ( char * ) wwn_text );
+ unsigned int i = 0;
+
+ while ( 1 ) {
+ wwn->bytes[i++] = strtoul ( ptr, &ptr, 16 );
+ if ( i == sizeof ( wwn->bytes ) )
+ return ( ( *ptr == '\0' ) ? 0 : -EINVAL );
+ if ( *ptr != ':' )
+ return -EINVAL;
+ ptr++;
+ }
+}
+
+/**
+ * Fill Fibre Channel socket address
+ *
+ * @v sa_fc Fibre Channel socket address to fill in
+ * @v id Fibre Channel port ID
+ * @ret sa Socket address
+ */
+struct sockaddr * fc_fill_sockaddr ( struct sockaddr_fc *sa_fc,
+ struct fc_port_id *id ) {
+ union {
+ struct sockaddr sa;
+ struct sockaddr_fc fc;
+ } *u = container_of ( sa_fc, typeof ( *u ), fc );
+
+ memset ( sa_fc, 0, sizeof ( *sa_fc ) );
+ sa_fc->sfc_family = AF_FC;
+ memcpy ( &sa_fc->sfc_port_id, id, sizeof ( sa_fc->sfc_port_id ) );
+ return &u->sa;
+}
+
+/******************************************************************************
+ *
+ * Fibre Channel link state
+ *
+ ******************************************************************************
+ */
+
+/** Default link status code */
+#define EUNKNOWN_LINK_STATUS __einfo_error ( EINFO_EUNKNOWN_LINK_STATUS )
+#define EINFO_EUNKNOWN_LINK_STATUS \
+ __einfo_uniqify ( EINFO_EINPROGRESS, 0x01, "Unknown" )
+
+/**
+ * Mark Fibre Channel link as up
+ *
+ * @v link Fibre Channel link state monitor
+ */
+static void fc_link_up ( struct fc_link_state *link ) {
+
+ /* Stop retry timer */
+ stop_timer ( &link->timer );
+
+ /* Record link state */
+ link->rc = 0;
+}
+
+/**
+ * Mark Fibre Channel link as down
+ *
+ * @v link Fibre Channel link state monitor
+ * @v rc Link state
+ */
+static void fc_link_err ( struct fc_link_state *link, int rc ) {
+
+ /* Record link state */
+ if ( rc == 0 )
+ rc = -EUNKNOWN_LINK_STATUS;
+ link->rc = rc;
+
+ /* Schedule another link examination */
+ start_timer_fixed ( &link->timer, FC_LINK_RETRY_DELAY );
+}
+
+/**
+ * Examine Fibre Channel link state
+ *
+ * @v link Fibre Channel link state monitor
+ */
+static void fc_link_examine ( struct fc_link_state *link ) {
+
+ link->examine ( link );
+}
+
+/**
+ * Handle Fibre Channel link retry timer expiry
+ */
+static void fc_link_expired ( struct retry_timer *timer, int over __unused ) {
+ struct fc_link_state *link =
+ container_of ( timer, struct fc_link_state, timer );
+
+ /* Schedule another link examination */
+ start_timer_fixed ( &link->timer, FC_LINK_RETRY_DELAY );
+
+ /* Examine link */
+ fc_link_examine ( link );
+}
+
+/**
+ * Initialise Fibre Channel link state monitor
+ *
+ * @v link Fibre Channel link state monitor
+ * @v examine Examine link state method
+ * @v refcnt Reference counter
+ */
+static void fc_link_init ( struct fc_link_state *link,
+ void ( * examine ) ( struct fc_link_state *link ),
+ struct refcnt *refcnt ) {
+
+ link->rc = -EUNKNOWN_LINK_STATUS;
+ timer_init ( &link->timer, fc_link_expired, refcnt );
+ link->examine = examine;
+}
+
+/**
+ * Start monitoring Fibre Channel link state
+ *
+ * @v link Fibre Channel link state monitor
+ */
+static void fc_link_start ( struct fc_link_state *link ) {
+ start_timer_nodelay ( &link->timer );
+}
+
+/**
+ * Stop monitoring Fibre Channel link state
+ *
+ * @v link Fibre Channel link state monitor
+ */
+static void fc_link_stop ( struct fc_link_state *link ) {
+ stop_timer ( &link->timer );
+}
+
+/******************************************************************************
+ *
+ * Fibre Channel exchanges
+ *
+ ******************************************************************************
+ */
+
+/** A Fibre Channel exchange */
+struct fc_exchange {
+ /** Reference count */
+ struct refcnt refcnt;
+ /** Fibre Channel port */
+ struct fc_port *port;
+ /** List of active exchanges within this port */
+ struct list_head list;
+
+ /** Peer port ID */
+ struct fc_port_id peer_port_id;
+ /** Data structure type */
+ unsigned int type;
+ /** Flags */
+ unsigned int flags;
+ /** Local exchange ID */
+ uint16_t xchg_id;
+ /** Peer exchange ID */
+ uint16_t peer_xchg_id;
+ /** Active sequence ID */
+ uint8_t seq_id;
+ /** Active sequence count */
+ uint16_t seq_cnt;
+
+ /** Timeout timer */
+ struct retry_timer timer;
+
+ /** Upper-layer protocol interface */
+ struct interface ulp;
+};
+
+/** Fibre Channel exchange flags */
+enum fc_exchange_flags {
+ /** We are the exchange originator */
+ FC_XCHG_ORIGINATOR = 0x0001,
+ /** We have the sequence initiative */
+ FC_XCHG_SEQ_INITIATIVE = 0x0002,
+ /** This is the first sequence of the exchange */
+ FC_XCHG_SEQ_FIRST = 0x0004,
+};
+
+/** Fibre Channel timeout */
+#define FC_TIMEOUT ( 1 * TICKS_PER_SEC )
+
+/**
+ * Create local Fibre Channel exchange identifier
+ *
+ * @ret xchg_id Local exchange ID
+ */
+static unsigned int fc_new_xchg_id ( void ) {
+ static uint16_t next_id = 0x0000;
+
+ /* We must avoid using FC_RX_ID_UNKNOWN (0xffff) */
+ next_id += 2;
+ return next_id;
+}
+
+/**
+ * Create local Fibre Channel sequence identifier
+ *
+ * @ret seq_id Local sequence identifier
+ */
+static unsigned int fc_new_seq_id ( void ) {
+ static uint8_t seq_id = 0x00;
+
+ return (++seq_id);
+}
+
+/**
+ * Free Fibre Channel exchange
+ *
+ * @v refcnt Reference count
+ */
+static void fc_xchg_free ( struct refcnt *refcnt ) {
+ struct fc_exchange *xchg =
+ container_of ( refcnt, struct fc_exchange, refcnt );
+
+ assert ( ! timer_running ( &xchg->timer ) );
+ assert ( list_empty ( &xchg->list ) );
+
+ fc_port_put ( xchg->port );
+ free ( xchg );
+}
+
+/**
+ * Close Fibre Channel exchange
+ *
+ * @v xchg Fibre Channel exchange
+ * @v rc Reason for close
+ */
+static void fc_xchg_close ( struct fc_exchange *xchg, int rc ) {
+ struct fc_port *port = xchg->port;
+
+ if ( rc != 0 ) {
+ DBGC2 ( port, "FCXCHG %s/%04x closed: %s\n",
+ port->name, xchg->xchg_id, strerror ( rc ) );
+ }
+
+ /* Stop timer */
+ stop_timer ( &xchg->timer );
+
+ /* If list still holds a reference, remove from list of open
+ * exchanges and drop list's reference.
+ */
+ if ( ! list_empty ( &xchg->list ) ) {
+ list_del ( &xchg->list );
+ INIT_LIST_HEAD ( &xchg->list );
+ ref_put ( &xchg->refcnt );
+ }
+
+ /* Shutdown interfaces */
+ intf_shutdown ( &xchg->ulp, rc );
+}
+
+/**
+ * Handle exchange timeout
+ *
+ * @v timer Timeout timer
+ * @v over Failure indicator
+ */
+static void fc_xchg_expired ( struct retry_timer *timer, int over __unused ) {
+ struct fc_exchange *xchg =
+ container_of ( timer, struct fc_exchange, timer );
+ struct fc_port *port = xchg->port;
+
+ DBGC ( port, "FCXCHG %s/%04x timed out\n", port->name, xchg->xchg_id );
+
+ /* Terminate the exchange */
+ fc_xchg_close ( xchg, -ETIMEDOUT );
+}
+
+/**
+ * Check Fibre Channel exchange window
+ *
+ * @v xchg Fibre Channel exchange
+ * @ret len Length opf window
+ */
+static size_t fc_xchg_window ( struct fc_exchange *xchg __unused ) {
+
+ /* We don't currently store the path MTU */
+ return FC_LOGIN_DEFAULT_MTU;
+}
+
+/**
+ * Allocate Fibre Channel I/O buffer
+ *
+ * @v xchg Fibre Channel exchange
+ * @v len Payload length
+ * @ret iobuf I/O buffer, or NULL
+ */
+static struct io_buffer * fc_xchg_alloc_iob ( struct fc_exchange *xchg,
+ size_t len ) {
+ struct fc_port *port = xchg->port;
+ struct io_buffer *iobuf;
+
+ iobuf = xfer_alloc_iob ( &port->transport,
+ ( sizeof ( struct fc_frame_header ) + len ) );
+ if ( iobuf ) {
+ iob_reserve ( iobuf, sizeof ( struct fc_frame_header ) );
+ }
+ return iobuf;
+}
+
+/**
+ * Transmit data as part of a Fibre Channel exchange
+ *
+ * @v xchg Fibre Channel exchange
+ * @v iobuf I/O buffer
+ * @v meta Data transfer metadata
+ * @ret rc Return status code
+ */
+static int fc_xchg_tx ( struct fc_exchange *xchg, struct io_buffer *iobuf,
+ struct xfer_metadata *meta ) {
+ struct fc_port *port = xchg->port;
+ struct sockaddr_fc *dest = ( ( struct sockaddr_fc * ) meta->dest );
+ struct fc_frame_header *fchdr;
+ unsigned int r_ctl;
+ unsigned int f_ctl_es;
+ int rc;
+
+ /* Sanity checks */
+ if ( ! ( xchg->flags & FC_XCHG_SEQ_INITIATIVE ) ) {
+ DBGC ( port, "FCXCHG %s/%04x cannot transmit while not "
+ "holding sequence initiative\n",
+ port->name, xchg->xchg_id );
+ rc = -EBUSY;
+ goto done;
+ }
+
+ /* Calculate routing control */
+ switch ( xchg->type ) {
+ case FC_TYPE_ELS:
+ r_ctl = FC_R_CTL_ELS;
+ if ( meta->flags & XFER_FL_RESPONSE ) {
+ r_ctl |= FC_R_CTL_SOL_CTRL;
+ } else {
+ r_ctl |= FC_R_CTL_UNSOL_CTRL;
+ }
+ break;
+ case FC_TYPE_CT:
+ r_ctl = FC_R_CTL_DATA;
+ if ( meta->flags & XFER_FL_RESPONSE ) {
+ r_ctl |= FC_R_CTL_SOL_CTRL;
+ } else {
+ r_ctl |= FC_R_CTL_UNSOL_CTRL;
+ }
+ break;
+ default:
+ r_ctl = FC_R_CTL_DATA;
+ switch ( meta->flags &
+ ( XFER_FL_CMD_STAT | XFER_FL_RESPONSE ) ) {
+ case ( XFER_FL_CMD_STAT | XFER_FL_RESPONSE ):
+ r_ctl |= FC_R_CTL_CMD_STAT;
+ break;
+ case ( XFER_FL_CMD_STAT ):
+ r_ctl |= FC_R_CTL_UNSOL_CMD;
+ break;
+ case ( XFER_FL_RESPONSE ):
+ r_ctl |= FC_R_CTL_SOL_DATA;
+ break;
+ default:
+ r_ctl |= FC_R_CTL_UNSOL_DATA;
+ break;
+ }
+ break;
+ }
+
+ /* Calculate exchange and sequence control */
+ f_ctl_es = 0;
+ if ( ! ( xchg->flags & FC_XCHG_ORIGINATOR ) )
+ f_ctl_es |= FC_F_CTL_ES_RESPONDER;
+ if ( xchg->flags & FC_XCHG_SEQ_FIRST )
+ f_ctl_es |= FC_F_CTL_ES_FIRST;
+ if ( meta->flags & XFER_FL_OUT )
+ f_ctl_es |= ( FC_F_CTL_ES_END | FC_F_CTL_ES_LAST );
+ if ( meta->flags & XFER_FL_OVER )
+ f_ctl_es |= ( FC_F_CTL_ES_END | FC_F_CTL_ES_TRANSFER );
+
+ /* Create frame header */
+ fchdr = iob_push ( iobuf, sizeof ( *fchdr ) );
+ memset ( fchdr, 0, sizeof ( *fchdr ) );
+ fchdr->r_ctl = r_ctl;
+ memcpy ( &fchdr->d_id,
+ ( dest ? &dest->sfc_port_id : &xchg->peer_port_id ),
+ sizeof ( fchdr->d_id ) );
+ memcpy ( &fchdr->s_id, &port->port_id, sizeof ( fchdr->s_id ) );
+ fchdr->type = xchg->type;
+ fchdr->f_ctl_es = f_ctl_es;
+ fchdr->seq_id = xchg->seq_id;
+ fchdr->seq_cnt = htons ( xchg->seq_cnt++ );
+ fchdr->ox_id = htons ( ( xchg->flags & FC_XCHG_ORIGINATOR ) ?
+ xchg->xchg_id : xchg->peer_xchg_id );
+ fchdr->rx_id = htons ( ( xchg->flags & FC_XCHG_ORIGINATOR ) ?
+ xchg->peer_xchg_id : xchg->xchg_id );
+ if ( meta->flags & XFER_FL_ABS_OFFSET ) {
+ fchdr->f_ctl_misc |= FC_F_CTL_MISC_REL_OFF;
+ fchdr->parameter = htonl ( meta->offset );
+ }
+
+ /* Relinquish sequence initiative if applicable */
+ if ( meta->flags & XFER_FL_OVER ) {
+ xchg->flags &= ~( FC_XCHG_SEQ_INITIATIVE | FC_XCHG_SEQ_FIRST );
+ xchg->seq_cnt = 0;
+ }
+
+ /* Reset timeout */
+ start_timer_fixed ( &xchg->timer, FC_TIMEOUT );
+
+ /* Deliver frame */
+ if ( ( rc = xfer_deliver_iob ( &port->transport,
+ iob_disown ( iobuf ) ) ) != 0 ) {
+ DBGC ( port, "FCXCHG %s/%04x cannot transmit: %s\n",
+ port->name, xchg->xchg_id, strerror ( rc ) );
+ goto done;
+ }
+
+ done:
+ free_iob ( iobuf );
+ return rc;
+}
+
+/** Mapping from Fibre Channel routing control information to xfer metadata */
+static const uint8_t fc_r_ctl_info_meta_flags[ FC_R_CTL_INFO_MASK + 1 ] = {
+ [FC_R_CTL_UNCAT] = ( 0 ),
+ [FC_R_CTL_SOL_DATA] = ( XFER_FL_RESPONSE ),
+ [FC_R_CTL_UNSOL_CTRL] = ( XFER_FL_CMD_STAT ),
+ [FC_R_CTL_SOL_CTRL] = ( XFER_FL_CMD_STAT ),
+ [FC_R_CTL_UNSOL_DATA] = ( 0 ),
+ [FC_R_CTL_DATA_DESC] = ( XFER_FL_CMD_STAT ),
+ [FC_R_CTL_UNSOL_CMD] = ( XFER_FL_CMD_STAT ),
+ [FC_R_CTL_CMD_STAT] = ( XFER_FL_CMD_STAT | XFER_FL_RESPONSE ),
+};
+
+/**
+ * Receive data as part of a Fibre Channel exchange
+ *
+ * @v xchg Fibre Channel exchange
+ * @v iobuf I/O buffer
+ * @v meta Data transfer metadata
+ * @ret rc Return status code
+ */
+static int fc_xchg_rx ( struct fc_exchange *xchg, struct io_buffer *iobuf,
+ struct xfer_metadata *meta __unused ) {
+ struct fc_port *port = xchg->port;
+ struct fc_frame_header *fchdr = iobuf->data;
+ struct xfer_metadata fc_meta;
+ struct sockaddr_fc src;
+ struct sockaddr_fc dest;
+ int rc;
+
+ /* Record peer exchange ID */
+ xchg->peer_xchg_id =
+ ntohs ( ( fchdr->f_ctl_es & FC_F_CTL_ES_RESPONDER ) ?
+ fchdr->rx_id : fchdr->ox_id );
+
+ /* Sequence checks */
+ if ( xchg->flags & FC_XCHG_SEQ_INITIATIVE ) {
+ DBGC ( port, "FCXCHG %s/%04x received frame while holding "
+ "sequence initiative\n", port->name, xchg->xchg_id );
+ rc = -EBUSY;
+ goto done;
+ }
+ if ( ntohs ( fchdr->seq_cnt ) != xchg->seq_cnt ) {
+ DBGC ( port, "FCXCHG %s/%04x received out-of-order frame %d "
+ "(expected %d)\n", port->name, xchg->xchg_id,
+ ntohs ( fchdr->seq_cnt ), xchg->seq_cnt );
+ rc = -EPIPE;
+ goto done;
+ }
+ if ( xchg->seq_cnt == 0 )
+ xchg->seq_id = fchdr->seq_id;
+ xchg->seq_cnt++;
+ if ( fchdr->seq_id != xchg->seq_id ) {
+ DBGC ( port, "FCXCHG %s/%04x received frame for incorrect "
+ "sequence %02x (expected %02x)\n", port->name,
+ xchg->xchg_id, fchdr->seq_id, xchg->seq_id );
+ rc = -EPIPE;
+ goto done;
+ }
+
+ /* Check for end of sequence and transfer of sequence initiative */
+ if ( fchdr->f_ctl_es & FC_F_CTL_ES_END ) {
+ xchg->seq_cnt = 0;
+ if ( fchdr->f_ctl_es & FC_F_CTL_ES_TRANSFER ) {
+ xchg->flags |= FC_XCHG_SEQ_INITIATIVE;
+ xchg->seq_id = fc_new_seq_id();
+ }
+ }
+
+ /* Construct metadata */
+ memset ( &fc_meta, 0, sizeof ( fc_meta ) );
+ fc_meta.flags =
+ fc_r_ctl_info_meta_flags[ fchdr->r_ctl & FC_R_CTL_INFO_MASK ];
+ if ( fchdr->f_ctl_es & FC_F_CTL_ES_TRANSFER ) {
+ fc_meta.flags |= XFER_FL_OVER;
+ }
+ if ( ( fchdr->f_ctl_es & FC_F_CTL_ES_LAST ) &&
+ ( fchdr->f_ctl_es & FC_F_CTL_ES_END ) ) {
+ fc_meta.flags |= XFER_FL_OUT;
+ }
+ if ( fchdr->f_ctl_misc & FC_F_CTL_MISC_REL_OFF ) {
+ fc_meta.flags |= XFER_FL_ABS_OFFSET;
+ fc_meta.offset = ntohl ( fchdr->parameter );
+ }
+ fc_meta.src = fc_fill_sockaddr ( &src, &fchdr->s_id );
+ fc_meta.dest = fc_fill_sockaddr ( &dest, &fchdr->d_id );
+
+ /* Reset timeout */
+ start_timer_fixed ( &xchg->timer, FC_TIMEOUT );
+
+ /* Deliver via exchange's ULP interface */
+ iob_pull ( iobuf, sizeof ( *fchdr ) );
+ if ( ( rc = xfer_deliver ( &xchg->ulp, iob_disown ( iobuf ),
+ &fc_meta ) ) != 0 ) {
+ DBGC ( port, "FCXCHG %s/%04x cannot deliver frame: %s\n",
+ port->name, xchg->xchg_id, strerror ( rc ) );
+ goto done;
+ }
+
+ /* Close exchange if applicable */
+ if ( ( fchdr->f_ctl_es & FC_F_CTL_ES_LAST ) &&
+ ( fchdr->f_ctl_es & FC_F_CTL_ES_END ) ) {
+ fc_xchg_close ( xchg, 0 );
+ }
+
+ done:
+ free_iob ( iobuf );
+ return rc;
+}
+
+/** Fibre Channel exchange ULP interface operations */
+static struct interface_operation fc_xchg_ulp_op[] = {
+ INTF_OP ( xfer_deliver, struct fc_exchange *, fc_xchg_tx ),
+ INTF_OP ( xfer_alloc_iob, struct fc_exchange *, fc_xchg_alloc_iob ),
+ INTF_OP ( xfer_window, struct fc_exchange *, fc_xchg_window ),
+ INTF_OP ( intf_close, struct fc_exchange *, fc_xchg_close ),
+};
+
+/** Fibre Channel exchange ULP interface descriptor */
+static struct interface_descriptor fc_xchg_ulp_desc =
+ INTF_DESC ( struct fc_exchange, ulp, fc_xchg_ulp_op );
+
+/**
+ * Create new Fibre Channel exchange
+ *
+ * @v port Fibre Channel port
+ * @v peer_port_id Peer port ID
+ * @ret xchg Exchange, or NULL
+ */
+static struct fc_exchange * fc_xchg_create ( struct fc_port *port,
+ struct fc_port_id *peer_port_id,
+ unsigned int type ) {
+ struct fc_exchange *xchg;
+
+ /* Allocate and initialise structure */
+ xchg = zalloc ( sizeof ( *xchg ) );
+ if ( ! xchg )
+ return NULL;
+ ref_init ( &xchg->refcnt, fc_xchg_free );
+ intf_init ( &xchg->ulp, &fc_xchg_ulp_desc, &xchg->refcnt );
+ timer_init ( &xchg->timer, fc_xchg_expired, &xchg->refcnt );
+ xchg->port = fc_port_get ( port );
+ memcpy ( &xchg->peer_port_id, peer_port_id,
+ sizeof ( xchg->peer_port_id ) );
+ xchg->type = type;
+ xchg->xchg_id = fc_new_xchg_id();
+ xchg->peer_xchg_id = FC_RX_ID_UNKNOWN;
+ xchg->seq_id = fc_new_seq_id();
+
+ /* Transfer reference to list of exchanges and return */
+ list_add ( &xchg->list, &port->xchgs );
+ return xchg;
+}
+
+/**
+ * Originate a new Fibre Channel exchange
+ *
+ * @v parent Interface to which to attach
+ * @v port Fibre Channel port
+ * @v peer_port_id Peer port ID
+ * @ret xchg_id Exchange ID, or negative error
+ */
+int fc_xchg_originate ( struct interface *parent, struct fc_port *port,
+ struct fc_port_id *peer_port_id, unsigned int type ) {
+ struct fc_exchange *xchg;
+
+ /* Allocate and initialise structure */
+ xchg = fc_xchg_create ( port, peer_port_id, type );
+ if ( ! xchg )
+ return -ENOMEM;
+ xchg->flags = ( FC_XCHG_ORIGINATOR | FC_XCHG_SEQ_INITIATIVE |
+ FC_XCHG_SEQ_FIRST );
+
+ DBGC2 ( port, "FCXCHG %s/%04x originating to %s (type %02x)\n",
+ port->name, xchg->xchg_id, fc_id_ntoa ( &xchg->peer_port_id ),
+ xchg->type );
+
+ /* Attach to parent interface and return */
+ intf_plug_plug ( &xchg->ulp, parent );
+ return xchg->xchg_id;
+}
+
+/**
+ * Open a new responder Fibre Channel exchange
+ *
+ * @v port Fibre Channel port
+ * @v fchdr Fibre Channel frame header
+ * @ret xchg Fibre Channel exchange, or NULL
+ */
+static struct fc_exchange * fc_xchg_respond ( struct fc_port *port,
+ struct fc_frame_header *fchdr ) {
+ struct fc_exchange *xchg;
+ struct fc_responder *responder;
+ unsigned int type = fchdr->type;
+ int rc;
+
+ /* Allocate and initialise structure */
+ xchg = fc_xchg_create ( port, &fchdr->s_id, type );
+ if ( ! xchg )
+ return NULL;
+ xchg->seq_id = fchdr->seq_id;
+
+ DBGC2 ( port, "FCXCHG %s/%04x responding to %s xchg %04x (type "
+ "%02x)\n", port->name, xchg->xchg_id,
+ fc_id_ntoa ( &xchg->peer_port_id ),
+ ntohs ( fchdr->ox_id ), xchg->type );
+
+ /* Find a responder, if any */
+ for_each_table_entry ( responder, FC_RESPONDERS ) {
+ if ( responder->type == type ) {
+ if ( ( rc = responder->respond ( &xchg->ulp, port,
+ &fchdr->d_id,
+ &fchdr->s_id ) ) !=0 ){
+ DBGC ( port, "FCXCHG %s/%04x could not "
+ "respond: %s\n", port->name,
+ xchg->xchg_id, strerror ( rc ) );
+ }
+ }
+ break;
+ }
+
+ /* We may or may not have a ULP attached at this point, but
+ * the exchange does exist.
+ */
+ return xchg;
+}
+
+/******************************************************************************
+ *
+ * Fibre Channel ports
+ *
+ ******************************************************************************
+ */
+
+/**
+ * Close Fibre Channel port
+ *
+ * @v port Fibre Channel port
+ * @v rc Reason for close
+ */
+static void fc_port_close ( struct fc_port *port, int rc ) {
+ struct fc_exchange *xchg;
+ struct fc_exchange *tmp;
+
+ DBGC ( port, "FCPORT %s closed\n", port->name );
+
+ /* Log out port, if necessary */
+ if ( fc_link_ok ( &port->link ) )
+ fc_port_logout ( port, rc );
+
+ /* Stop link monitor */
+ fc_link_stop ( &port->link );
+
+ /* Shut down interfaces */
+ intf_shutdown ( &port->transport, rc );
+ intf_shutdown ( &port->flogi, rc );
+ intf_shutdown ( &port->ns_plogi, rc );
+
+ /* Shut down any remaining exchanges */
+ list_for_each_entry_safe ( xchg, tmp, &port->xchgs, list )
+ fc_xchg_close ( xchg, rc );
+
+ /* Remove from list of ports */
+ list_del ( &port->list );
+ INIT_LIST_HEAD ( &port->list );
+}
+
+/**
+ * Identify Fibre Channel exchange by local exchange ID
+ *
+ * @v port Fibre Channel port
+ * @v xchg_id Local exchange ID
+ * @ret xchg Fibre Channel exchange, or NULL
+ */
+static struct fc_exchange * fc_port_demux ( struct fc_port *port,
+ unsigned int xchg_id ) {
+ struct fc_exchange *xchg;
+
+ list_for_each_entry ( xchg, &port->xchgs, list ) {
+ if ( xchg->xchg_id == xchg_id )
+ return xchg;
+ }
+ return NULL;
+}
+
+/**
+ * Handle received frame from Fibre Channel port
+ *
+ * @v port Fibre Channel port
+ * @v iobuf I/O buffer
+ * @v meta Data transfer metadata
+ * @ret rc Return status code
+ */
+static int fc_port_deliver ( struct fc_port *port, struct io_buffer *iobuf,
+ struct xfer_metadata *meta ) {
+ struct fc_frame_header *fchdr = iobuf->data;
+ unsigned int xchg_id;
+ struct fc_exchange *xchg;
+ int rc;
+
+ /* Sanity check */
+ if ( iob_len ( iobuf ) < sizeof ( *fchdr ) ) {
+ DBGC ( port, "FCPORT %s received underlength frame (%zd "
+ "bytes)\n", port->name, iob_len ( iobuf ) );
+ rc = -EINVAL;
+ goto err_sanity;
+ }
+
+ /* Verify local port ID */
+ if ( ( memcmp ( &fchdr->d_id, &port->port_id,
+ sizeof ( fchdr->d_id ) ) != 0 ) &&
+ ( memcmp ( &fchdr->d_id, &fc_f_port_id,
+ sizeof ( fchdr->d_id ) ) != 0 ) &&
+ ( memcmp ( &port->port_id, &fc_empty_port_id,
+ sizeof ( port->port_id ) ) != 0 ) ) {
+ DBGC ( port, "FCPORT %s received frame for incorrect port ID "
+ "%s\n", port->name, fc_id_ntoa ( &fchdr->d_id ) );
+ rc = -ENOTCONN;
+ goto err_port_id;
+ }
+
+ /* Demultiplex amongst active exchanges */
+ xchg_id = ntohs ( ( fchdr->f_ctl_es & FC_F_CTL_ES_RESPONDER ) ?
+ fchdr->ox_id : fchdr->rx_id );
+ xchg = fc_port_demux ( port, xchg_id );
+
+ /* If we have no active exchange and this frame starts a new
+ * exchange, try to create a new responder exchange
+ */
+ if ( ( fchdr->f_ctl_es & FC_F_CTL_ES_FIRST ) &&
+ ( fchdr->seq_cnt == 0 ) ) {
+
+ /* Create new exchange */
+ xchg = fc_xchg_respond ( port, fchdr );
+ if ( ! xchg ) {
+ DBGC ( port, "FCPORT %s cannot create new exchange\n",
+ port->name );
+ rc = -ENOMEM;
+ goto err_respond;
+ }
+ }
+
+ /* Fail if no exchange exists */
+ if ( ! xchg ) {
+ DBGC ( port, "FCPORT %s xchg %04x unknown\n",
+ port->name, xchg_id );
+ rc = -ENOTCONN;
+ goto err_no_xchg;
+ }
+
+ /* Pass received frame to exchange */
+ ref_get ( &xchg->refcnt );
+ if ( ( rc = fc_xchg_rx ( xchg, iob_disown ( iobuf ), meta ) ) != 0 )
+ goto err_xchg_rx;
+
+ err_xchg_rx:
+ ref_put ( &xchg->refcnt );
+ err_no_xchg:
+ err_respond:
+ err_port_id:
+ err_sanity:
+ free_iob ( iobuf );
+ return rc;
+}
+
+/**
+ * Log in Fibre Channel port
+ *
+ * @v port Fibre Channel port
+ * @v port_id Local port ID
+ * @v link_node_wwn Link node name
+ * @v link_port_wwn Link port name
+ * @v has_fabric Link is to a fabric
+ * @ret rc Return status code
+ */
+int fc_port_login ( struct fc_port *port, struct fc_port_id *port_id,
+ const struct fc_name *link_node_wwn,
+ const struct fc_name *link_port_wwn, int has_fabric ) {
+ struct fc_peer *peer;
+ struct fc_peer *tmp;
+ int rc;
+
+ /* Perform implicit logout if logged in and details differ */
+ if ( fc_link_ok ( &port->link ) &&
+ ( ( ( !! ( port->flags & FC_PORT_HAS_FABRIC ) ) !=
+ ( !! has_fabric ) ) ||
+ ( memcmp ( &port->link_node_wwn, link_node_wwn,
+ sizeof ( port->link_node_wwn ) ) != 0 ) ||
+ ( memcmp ( &port->link_port_wwn, link_port_wwn,
+ sizeof ( port->link_port_wwn ) ) != 0 ) ||
+ ( has_fabric &&
+ ( memcmp ( &port->port_id, port_id,
+ sizeof ( port->port_id ) ) != 0 ) ) ) ) {
+ fc_port_logout ( port, 0 );
+ }
+
+ /* Log in, if applicable */
+ if ( ! fc_link_ok ( &port->link ) ) {
+
+ /* Record link port name */
+ memcpy ( &port->link_node_wwn, link_node_wwn,
+ sizeof ( port->link_node_wwn ) );
+ memcpy ( &port->link_port_wwn, link_port_wwn,
+ sizeof ( port->link_port_wwn ) );
+ DBGC ( port, "FCPORT %s logged in to %s",
+ port->name, fc_ntoa ( &port->link_node_wwn ) );
+ DBGC ( port, " port %s\n", fc_ntoa ( &port->link_port_wwn ) );
+
+ /* Calculate local (and possibly remote) port IDs */
+ if ( has_fabric ) {
+ port->flags |= FC_PORT_HAS_FABRIC;
+ memcpy ( &port->port_id, port_id,
+ sizeof ( port->port_id ) );
+ } else {
+ port->flags &= ~FC_PORT_HAS_FABRIC;
+ if ( memcmp ( &port->port_wwn, link_port_wwn,
+ sizeof ( port->port_wwn ) ) > 0 ) {
+ memcpy ( &port->port_id, &fc_ptp_high_port_id,
+ sizeof ( port->port_id ) );
+ memcpy ( &port->ptp_link_port_id,
+ &fc_ptp_low_port_id,
+ sizeof ( port->ptp_link_port_id ) );
+ } else {
+ memcpy ( &port->port_id, &fc_ptp_low_port_id,
+ sizeof ( port->port_id ) );
+ memcpy ( &port->ptp_link_port_id,
+ &fc_ptp_high_port_id,
+ sizeof ( port->ptp_link_port_id ) );
+ }
+ }
+ DBGC ( port, "FCPORT %s logged in via a %s, with local ID "
+ "%s\n", port->name,
+ ( ( port->flags & FC_PORT_HAS_FABRIC ) ?
+ "fabric" : "point-to-point link" ),
+ fc_id_ntoa ( &port->port_id ) );
+ }
+
+ /* Log in to name server, if attached to a fabric */
+ if ( has_fabric && ! ( port->flags & FC_PORT_HAS_NS ) ) {
+
+ DBGC ( port, "FCPORT %s attempting login to name server\n",
+ port->name );
+
+ intf_restart ( &port->ns_plogi, -ECANCELED );
+ if ( ( rc = fc_els_plogi ( &port->ns_plogi, port,
+ &fc_gs_port_id ) ) != 0 ) {
+ DBGC ( port, "FCPORT %s could not initiate name "
+ "server PLOGI: %s\n",
+ port->name, strerror ( rc ) );
+ fc_port_logout ( port, rc );
+ return rc;
+ }
+ }
+
+ /* Record login */
+ fc_link_up ( &port->link );
+
+ /* Notify peers of link state change */
+ list_for_each_entry_safe ( peer, tmp, &fc_peers, list ) {
+ fc_peer_get ( peer );
+ fc_link_examine ( &peer->link );
+ fc_peer_put ( peer );
+ }
+
+ return 0;
+}
+
+/**
+ * Log out Fibre Channel port
+ *
+ * @v port Fibre Channel port
+ * @v rc Reason for logout
+ */
+void fc_port_logout ( struct fc_port *port, int rc ) {
+ struct fc_peer *peer;
+ struct fc_peer *tmp;
+
+ DBGC ( port, "FCPORT %s logged out: %s\n",
+ port->name, strerror ( rc ) );
+
+ /* Erase port details */
+ memset ( &port->port_id, 0, sizeof ( port->port_id ) );
+ port->flags = 0;
+
+ /* Record logout */
+ fc_link_err ( &port->link, rc );
+
+ /* Notify peers of link state change */
+ list_for_each_entry_safe ( peer, tmp, &fc_peers, list ) {
+ fc_peer_get ( peer );
+ fc_link_examine ( &peer->link );
+ fc_peer_put ( peer );
+ }
+}
+
+/**
+ * Handle FLOGI completion
+ *
+ * @v port Fibre Channel port
+ * @v rc Reason for completion
+ */
+static void fc_port_flogi_done ( struct fc_port *port, int rc ) {
+
+ intf_restart ( &port->flogi, rc );
+
+ if ( rc != 0 )
+ fc_port_logout ( port, rc );
+}
+
+/**
+ * Handle name server PLOGI completion
+ *
+ * @v port Fibre Channel port
+ * @v rc Reason for completion
+ */
+static void fc_port_ns_plogi_done ( struct fc_port *port, int rc ) {
+
+ intf_restart ( &port->ns_plogi, rc );
+
+ if ( rc == 0 ) {
+ port->flags |= FC_PORT_HAS_NS;
+ DBGC ( port, "FCPORT %s logged in to name server\n",
+ port->name );
+ } else {
+ DBGC ( port, "FCPORT %s could not log in to name server: %s\n",
+ port->name, strerror ( rc ) );
+ /* Absence of a name server is not a fatal error */
+ }
+}
+
+/**
+ * Examine Fibre Channel port link state
+ *
+ * @ link Fibre Channel link state monitor
+ */
+static void fc_port_examine ( struct fc_link_state *link ) {
+ struct fc_port *port = container_of ( link, struct fc_port, link );
+ int rc;
+
+ /* Do nothing if already logged in */
+ if ( fc_link_ok ( &port->link ) )
+ return;
+
+ DBGC ( port, "FCPORT %s attempting login\n", port->name );
+
+ /* Try to create FLOGI ELS */
+ intf_restart ( &port->flogi, -ECANCELED );
+ if ( ( rc = fc_els_flogi ( &port->flogi, port ) ) != 0 ) {
+ DBGC ( port, "FCPORT %s could not initiate FLOGI: %s\n",
+ port->name, strerror ( rc ) );
+ fc_port_logout ( port, rc );
+ return;
+ }
+}
+
+/**
+ * Handle change of flow control window
+ *
+ * @v port Fibre Channel port
+ */
+static void fc_port_window_changed ( struct fc_port *port ) {
+ size_t window;
+
+ /* Check if transport layer is ready */
+ window = xfer_window ( &port->transport );
+ if ( window > 0 ) {
+
+ /* Transport layer is ready. Start login if the link
+ * is not already up.
+ */
+ if ( ! fc_link_ok ( &port->link ) )
+ fc_link_start ( &port->link );
+
+ } else {
+
+ /* Transport layer is not ready. Log out port and
+ * wait for transport layer before attempting log in
+ * again.
+ */
+ fc_port_logout ( port, -ENOTCONN );
+ fc_link_stop ( &port->link );
+ }
+}
+
+/** Fibre Channel port transport interface operations */
+static struct interface_operation fc_port_transport_op[] = {
+ INTF_OP ( xfer_deliver, struct fc_port *, fc_port_deliver ),
+ INTF_OP ( xfer_window_changed, struct fc_port *,
+ fc_port_window_changed ),
+ INTF_OP ( intf_close, struct fc_port *, fc_port_close ),
+};
+
+/** Fibre Channel port transport interface descriptor */
+static struct interface_descriptor fc_port_transport_desc =
+ INTF_DESC ( struct fc_port, transport, fc_port_transport_op );
+
+/** Fibre Channel port FLOGI interface operations */
+static struct interface_operation fc_port_flogi_op[] = {
+ INTF_OP ( intf_close, struct fc_port *, fc_port_flogi_done ),
+};
+
+/** Fibre Channel port FLOGI interface descriptor */
+static struct interface_descriptor fc_port_flogi_desc =
+ INTF_DESC ( struct fc_port, flogi, fc_port_flogi_op );
+
+/** Fibre Channel port name server PLOGI interface operations */
+static struct interface_operation fc_port_ns_plogi_op[] = {
+ INTF_OP ( intf_close, struct fc_port *, fc_port_ns_plogi_done ),
+};
+
+/** Fibre Channel port name server PLOGI interface descriptor */
+static struct interface_descriptor fc_port_ns_plogi_desc =
+ INTF_DESC ( struct fc_port, ns_plogi, fc_port_ns_plogi_op );
+
+/**
+ * Create Fibre Channel port
+ *
+ * @v transport Transport interface
+ * @v node Fibre Channel node name
+ * @v port Fibre Channel port name
+ * @v name Symbolic port name
+ * @ret rc Return status code
+ */
+int fc_port_open ( struct interface *transport, const struct fc_name *node_wwn,
+ const struct fc_name *port_wwn, const char *name ) {
+ struct fc_port *port;
+
+ /* Allocate and initialise structure */
+ port = zalloc ( sizeof ( *port ) );
+ if ( ! port )
+ return -ENOMEM;
+ ref_init ( &port->refcnt, NULL );
+ intf_init ( &port->transport, &fc_port_transport_desc, &port->refcnt );
+ fc_link_init ( &port->link, fc_port_examine, &port->refcnt );
+ intf_init ( &port->flogi, &fc_port_flogi_desc, &port->refcnt );
+ intf_init ( &port->ns_plogi, &fc_port_ns_plogi_desc, &port->refcnt );
+ list_add_tail ( &port->list, &fc_ports );
+ INIT_LIST_HEAD ( &port->xchgs );
+ memcpy ( &port->node_wwn, node_wwn, sizeof ( port->node_wwn ) );
+ memcpy ( &port->port_wwn, port_wwn, sizeof ( port->port_wwn ) );
+ snprintf ( port->name, sizeof ( port->name ), "%s", name );
+
+ DBGC ( port, "FCPORT %s opened as %s",
+ port->name, fc_ntoa ( &port->node_wwn ) );
+ DBGC ( port, " port %s\n", fc_ntoa ( &port->port_wwn ) );
+
+ /* Attach to transport layer, mortalise self, and return */
+ intf_plug_plug ( &port->transport, transport );
+ ref_put ( &port->refcnt );
+ return 0;
+}
+
+/**
+ * Find Fibre Channel port by name
+ *
+ * @v name Fibre Channel port name
+ * @ret port Fibre Channel port, or NULL
+ */
+struct fc_port * fc_port_find ( const char *name ) {
+ struct fc_port *port;
+
+ list_for_each_entry ( port, &fc_ports, list ) {
+ if ( strcmp ( name, port->name ) == 0 )
+ return port;
+ }
+ return NULL;
+}
+
+/******************************************************************************
+ *
+ * Fibre Channel peers
+ *
+ ******************************************************************************
+ */
+
+/**
+ * Close Fibre Channel peer
+ *
+ * @v peer Fibre Channel peer
+ * @v rc Reason for close
+ */
+static void fc_peer_close ( struct fc_peer *peer, int rc ) {
+
+ DBGC ( peer, "FCPEER %s closed: %s\n",
+ fc_ntoa ( &peer->port_wwn ) , strerror ( rc ) );
+
+ /* Sanity check */
+ assert ( list_empty ( &peer->ulps ) );
+
+ /* Stop link timer */
+ fc_link_stop ( &peer->link );
+
+ /* Shut down interfaces */
+ intf_shutdown ( &peer->plogi, rc );
+
+ /* Remove from list of peers */
+ list_del ( &peer->list );
+ INIT_LIST_HEAD ( &peer->list );
+}
+
+/**
+ * Increment Fibre Channel peer active usage count
+ *
+ * @v peer Fibre Channel peer
+ */
+static void fc_peer_increment ( struct fc_peer *peer ) {
+
+ /* Increment our usage count */
+ peer->usage++;
+}
+
+/**
+ * Decrement Fibre Channel peer active usage count
+ *
+ * @v peer Fibre Channel peer
+ */
+static void fc_peer_decrement ( struct fc_peer *peer ) {
+
+ /* Sanity check */
+ assert ( peer->usage > 0 );
+
+ /* Decrement our usage count and log out if we reach zero */
+ if ( --(peer->usage) == 0 )
+ fc_peer_logout ( peer, 0 );
+}
+
+/**
+ * Log in Fibre Channel peer
+ *
+ * @v peer Fibre Channel peer
+ * @v port Fibre Channel port
+ * @v port_id Port ID
+ * @ret rc Return status code
+ */
+int fc_peer_login ( struct fc_peer *peer, struct fc_port *port,
+ struct fc_port_id *port_id ) {
+ struct fc_ulp *ulp;
+ struct fc_ulp *tmp;
+
+ /* Perform implicit logout if logged in and details differ */
+ if ( fc_link_ok ( &peer->link ) &&
+ ( ( peer->port != port ) ||
+ ( memcmp ( &peer->port_id, port_id,
+ sizeof ( peer->port_id ) ) !=0 ) ) ) {
+ fc_peer_logout ( peer, 0 );
+ }
+
+ /* Log in, if applicable */
+ if ( ! fc_link_ok ( &peer->link ) ) {
+
+ /* Record peer details */
+ assert ( peer->port == NULL );
+ peer->port = fc_port_get ( port );
+ memcpy ( &peer->port_id, port_id, sizeof ( peer->port_id ) );
+ DBGC ( peer, "FCPEER %s logged in via %s as %s\n",
+ fc_ntoa ( &peer->port_wwn ), peer->port->name,
+ fc_id_ntoa ( &peer->port_id ) );
+
+ /* Add login reference */
+ fc_peer_get ( peer );
+ }
+
+ /* Record login */
+ fc_link_up ( &peer->link );
+
+ /* Notify ULPs of link state change */
+ list_for_each_entry_safe ( ulp, tmp, &peer->ulps, list ) {
+ fc_ulp_get ( ulp );
+ fc_link_examine ( &ulp->link );
+ fc_ulp_put ( ulp );
+ }
+
+ return 0;
+}
+
+/**
+ * Log out Fibre Channel peer
+ *
+ * @v peer Fibre Channel peer
+ * @v rc Reason for logout
+ */
+void fc_peer_logout ( struct fc_peer *peer, int rc ) {
+ struct fc_ulp *ulp;
+ struct fc_ulp *tmp;
+
+ DBGC ( peer, "FCPEER %s logged out: %s\n",
+ fc_ntoa ( &peer->port_wwn ), strerror ( rc ) );
+
+ /* Drop login reference, if applicable */
+ if ( fc_link_ok ( &peer->link ) )
+ fc_peer_put ( peer );
+
+ /* Erase peer details */
+ fc_port_put ( peer->port );
+ peer->port = NULL;
+
+ /* Record logout */
+ fc_link_err ( &peer->link, rc );
+
+ /* Notify ULPs of link state change */
+ list_for_each_entry_safe ( ulp, tmp, &peer->ulps, list ) {
+ fc_ulp_get ( ulp );
+ fc_link_examine ( &ulp->link );
+ fc_ulp_put ( ulp );
+ }
+
+ /* Close peer if there are no active users */
+ if ( peer->usage == 0 )
+ fc_peer_close ( peer, rc );
+}
+
+/**
+ * Handle PLOGI completion
+ *
+ * @v peer Fibre Channel peer
+ * @v rc Reason for completion
+ */
+static void fc_peer_plogi_done ( struct fc_peer *peer, int rc ) {
+
+ intf_restart ( &peer->plogi, rc );
+
+ if ( rc != 0 )
+ fc_peer_logout ( peer, rc );
+}
+
+/**
+ * Initiate PLOGI
+ *
+ * @v peer Fibre Channel peer
+ * @v port Fibre Channel port
+ * @v peer_port_id Peer port ID
+ * @ret rc Return status code
+ */
+static int fc_peer_plogi ( struct fc_peer *peer, struct fc_port *port,
+ struct fc_port_id *peer_port_id ) {
+ int rc;
+
+ /* Try to create PLOGI ELS */
+ intf_restart ( &peer->plogi, -ECANCELED );
+ if ( ( rc = fc_els_plogi ( &peer->plogi, port, peer_port_id ) ) != 0 ) {
+ DBGC ( peer, "FCPEER %s could not initiate PLOGI: %s\n",
+ fc_ntoa ( &peer->port_wwn ), strerror ( rc ) );
+ fc_peer_logout ( peer, rc );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Examine Fibre Channel peer link state
+ *
+ * @ link Fibre Channel link state monitor
+ */
+static void fc_peer_examine ( struct fc_link_state *link ) {
+ struct fc_peer *peer = container_of ( link, struct fc_peer, link );
+ struct fc_port *port;
+ int rc;
+
+ /* Check to see if underlying port link has gone down */
+ if ( peer->port && ( ! fc_link_ok ( &peer->port->link ) ) ) {
+ fc_peer_logout ( peer, -ENOTCONN );
+ return;
+ }
+
+ /* Do nothing if already logged in */
+ if ( fc_link_ok ( &peer->link ) )
+ return;
+
+ DBGC ( peer, "FCPEER %s attempting login\n",
+ fc_ntoa ( &peer->port_wwn ) );
+
+ /* Sanity check */
+ assert ( peer->port == NULL );
+
+ /* First, look for a port with the peer attached via a
+ * point-to-point link.
+ */
+ list_for_each_entry ( port, &fc_ports, list ) {
+ if ( fc_link_ok ( &port->link ) &&
+ ( ! ( port->flags & FC_PORT_HAS_FABRIC ) ) &&
+ ( memcmp ( &peer->port_wwn, &port->link_port_wwn,
+ sizeof ( peer->port_wwn ) ) == 0 ) ) {
+ /* Use this peer port ID, and stop looking */
+ fc_peer_plogi ( peer, port, &port->ptp_link_port_id );
+ return;
+ }
+ }
+
+ /* If the peer is not directly attached, try initiating a name
+ * server lookup on any suitable ports.
+ */
+ list_for_each_entry ( port, &fc_ports, list ) {
+ if ( fc_link_ok ( &port->link ) &&
+ ( port->flags & FC_PORT_HAS_FABRIC ) &&
+ ( port->flags & FC_PORT_HAS_NS ) ) {
+ if ( ( rc = fc_ns_query ( peer, port,
+ fc_peer_plogi ) ) != 0 ) {
+ DBGC ( peer, "FCPEER %s could not attempt "
+ "name server lookup on %s: %s\n",
+ fc_ntoa ( &peer->port_wwn ), port->name,
+ strerror ( rc ) );
+ /* Non-fatal */
+ }
+ }
+ }
+}
+
+/** Fibre Channel peer PLOGI interface operations */
+static struct interface_operation fc_peer_plogi_op[] = {
+ INTF_OP ( intf_close, struct fc_peer *, fc_peer_plogi_done ),
+};
+
+/** Fibre Channel peer PLOGI interface descriptor */
+static struct interface_descriptor fc_peer_plogi_desc =
+ INTF_DESC ( struct fc_peer, plogi, fc_peer_plogi_op );
+
+/**
+ * Create Fibre Channel peer
+ *
+ * @v port_wwn Node name
+ * @ret peer Fibre Channel peer, or NULL
+ */
+static struct fc_peer * fc_peer_create ( const struct fc_name *port_wwn ) {
+ struct fc_peer *peer;
+
+ /* Allocate and initialise structure */
+ peer = zalloc ( sizeof ( *peer ) );
+ if ( ! peer )
+ return NULL;
+ ref_init ( &peer->refcnt, NULL );
+ fc_link_init ( &peer->link, fc_peer_examine, &peer->refcnt );
+ intf_init ( &peer->plogi, &fc_peer_plogi_desc, &peer->refcnt );
+ list_add_tail ( &peer->list, &fc_peers );
+ memcpy ( &peer->port_wwn, port_wwn, sizeof ( peer->port_wwn ) );
+ INIT_LIST_HEAD ( &peer->ulps );
+
+ /* Start link monitor */
+ fc_link_start ( &peer->link );
+
+ DBGC ( peer, "FCPEER %s created\n", fc_ntoa ( &peer->port_wwn ) );
+ return peer;
+}
+
+/**
+ * Get Fibre Channel peer by node name
+ *
+ * @v port_wwn Node name
+ * @ret peer Fibre Channel peer, or NULL
+ */
+struct fc_peer * fc_peer_get_wwn ( const struct fc_name *port_wwn ) {
+ struct fc_peer *peer;
+
+ /* Look for an existing peer */
+ list_for_each_entry ( peer, &fc_peers, list ) {
+ if ( memcmp ( &peer->port_wwn, port_wwn,
+ sizeof ( peer->port_wwn ) ) == 0 )
+ return fc_peer_get ( peer );
+ }
+
+ /* Create a new peer */
+ peer = fc_peer_create ( port_wwn );
+ if ( ! peer )
+ return NULL;
+
+ return peer;
+}
+
+/**
+ * Get Fibre Channel peer by port ID
+ *
+ * @v port Fibre Channel port
+ * @v peer_port_id Peer port ID
+ * @ret peer Fibre Channel peer, or NULL
+ */
+struct fc_peer * fc_peer_get_port_id ( struct fc_port *port,
+ const struct fc_port_id *peer_port_id ){
+ struct fc_peer *peer;
+
+ /* Look for an existing peer */
+ list_for_each_entry ( peer, &fc_peers, list ) {
+ if ( ( peer->port == port ) &&
+ ( memcmp ( &peer->port_id, peer_port_id,
+ sizeof ( peer->port_id ) ) == 0 ) )
+ return fc_peer_get ( peer );
+ }
+
+ /* Cannot create a new peer, since we have no port name to use */
+ return NULL;
+}
+
+/******************************************************************************
+ *
+ * Fibre Channel upper-layer protocols
+ *
+ ******************************************************************************
+ */
+
+/**
+ * Free Fibre Channel upper-layer protocol
+ *
+ * @v refcnt Reference count
+ */
+static void fc_ulp_free ( struct refcnt *refcnt ) {
+ struct fc_ulp *ulp = container_of ( refcnt, struct fc_ulp, refcnt );
+
+ fc_peer_put ( ulp->peer );
+ free ( ulp );
+}
+
+/**
+ * Close Fibre Channel upper-layer protocol
+ *
+ * @v ulp Fibre Channel upper-layer protocol
+ * @v rc Reason for close
+ */
+static void fc_ulp_close ( struct fc_ulp *ulp, int rc ) {
+
+ DBGC ( ulp, "FCULP %s/%02x closed: %s\n",
+ fc_ntoa ( &ulp->peer->port_wwn ), ulp->type, strerror ( rc ) );
+
+ /* Sanity check */
+ assert ( list_empty ( &ulp->users ) );
+
+ /* Stop link monitor */
+ fc_link_stop ( &ulp->link );
+
+ /* Shut down interfaces */
+ intf_shutdown ( &ulp->prli, rc );
+
+ /* Remove from list of ULPs */
+ list_del ( &ulp->list );
+ INIT_LIST_HEAD ( &ulp->list );
+}
+
+/**
+ * Attach Fibre Channel upper-layer protocol user
+ *
+ * @v ulp Fibre Channel upper-layer protocol
+ * @v user Fibre Channel upper-layer protocol user
+ */
+void fc_ulp_attach ( struct fc_ulp *ulp, struct fc_ulp_user *user ) {
+
+ /* Sanity check */
+ assert ( user->ulp == NULL );
+
+ /* Increment peer's usage count */
+ fc_peer_increment ( ulp->peer );
+
+ /* Attach user */
+ user->ulp = fc_ulp_get ( ulp );
+ list_add ( &user->list, &ulp->users );
+}
+
+/**
+ * Detach Fibre Channel upper-layer protocol user
+ *
+ * @v user Fibre Channel upper-layer protocol user
+ */
+void fc_ulp_detach ( struct fc_ulp_user *user ) {
+ struct fc_ulp *ulp = user->ulp;
+
+ /* Do nothing if not attached */
+ if ( ! ulp )
+ return;
+
+ /* Sanity checks */
+ list_check_contains_entry ( user, &ulp->users, list );
+
+ /* Detach user and log out if no users remain */
+ list_del ( &user->list );
+ if ( list_empty ( &ulp->users ) )
+ fc_ulp_logout ( ulp, 0 );
+
+ /* Decrement our peer's usage count */
+ fc_peer_decrement ( ulp->peer );
+
+ /* Drop reference */
+ user->ulp = NULL;
+ fc_ulp_put ( ulp );
+}
+
+/**
+ * Log in Fibre Channel upper-layer protocol
+ *
+ * @v ulp Fibre Channel upper-layer protocol
+ * @v param Service parameters
+ * @v param_len Length of service parameters
+ * @v originated Login was originated by us
+ * @ret rc Return status code
+ */
+int fc_ulp_login ( struct fc_ulp *ulp, const void *param, size_t param_len,
+ int originated ) {
+ struct fc_ulp_user *user;
+ struct fc_ulp_user *tmp;
+
+ /* Perform implicit logout if logged in and service parameters differ */
+ if ( fc_link_ok ( &ulp->link ) &&
+ ( ( ulp->param_len != param_len ) ||
+ ( memcmp ( ulp->param, param, ulp->param_len ) != 0 ) ) ) {
+ fc_ulp_logout ( ulp, 0 );
+ }
+
+ /* Work around a bug in some versions of the Linux Fibre
+ * Channel stack, which fail to fully initialise image pairs
+ * established via a PRLI originated by the Linux stack
+ * itself.
+ */
+ if ( originated )
+ ulp->flags |= FC_ULP_ORIGINATED_LOGIN_OK;
+ if ( ! ( ulp->flags & FC_ULP_ORIGINATED_LOGIN_OK ) ) {
+ DBGC ( ulp, "FCULP %s/%02x sending extra PRLI to work around "
+ "Linux bug\n",
+ fc_ntoa ( &ulp->peer->port_wwn ), ulp->type );
+ fc_link_stop ( &ulp->link );
+ fc_link_start ( &ulp->link );
+ return 0;
+ }
+
+ /* Log in, if applicable */
+ if ( ! fc_link_ok ( &ulp->link ) ) {
+
+ /* Record service parameters */
+ assert ( ulp->param == NULL );
+ assert ( ulp->param_len == 0 );
+ ulp->param = malloc ( param_len );
+ if ( ! ulp->param ) {
+ DBGC ( ulp, "FCULP %s/%02x could not record "
+ "parameters\n",
+ fc_ntoa ( &ulp->peer->port_wwn ), ulp->type );
+ return -ENOMEM;
+ }
+ memcpy ( ulp->param, param, param_len );
+ ulp->param_len = param_len;
+ DBGC ( ulp, "FCULP %s/%02x logged in with parameters:\n",
+ fc_ntoa ( &ulp->peer->port_wwn ), ulp->type );
+ DBGC_HDA ( ulp, 0, ulp->param, ulp->param_len );
+
+ /* Add login reference */
+ fc_ulp_get ( ulp );
+ }
+
+ /* Record login */
+ fc_link_up ( &ulp->link );
+
+ /* Notify users of link state change */
+ list_for_each_entry_safe ( user, tmp, &ulp->users, list ) {
+ fc_ulp_user_get ( user );
+ user->examine ( user );
+ fc_ulp_user_put ( user );
+ }
+
+ return 0;
+}
+
+/**
+ * Log out Fibre Channel upper-layer protocol
+ *
+ * @v ulp Fibre Channel upper-layer protocol
+ * @v rc Reason for logout
+ */
+void fc_ulp_logout ( struct fc_ulp *ulp, int rc ) {
+ struct fc_ulp_user *user;
+ struct fc_ulp_user *tmp;
+
+ DBGC ( ulp, "FCULP %s/%02x logged out: %s\n",
+ fc_ntoa ( &ulp->peer->port_wwn ), ulp->type, strerror ( rc ) );
+
+ /* Drop login reference, if applicable */
+ if ( fc_link_ok ( &ulp->link ) )
+ fc_ulp_put ( ulp );
+
+ /* Discard service parameters */
+ free ( ulp->param );
+ ulp->param = NULL;
+ ulp->param_len = 0;
+ ulp->flags = 0;
+
+ /* Record logout */
+ fc_link_err ( &ulp->link, rc );
+
+ /* Notify users of link state change */
+ list_for_each_entry_safe ( user, tmp, &ulp->users, list ) {
+ fc_ulp_user_get ( user );
+ user->examine ( user );
+ fc_ulp_user_put ( user );
+ }
+
+ /* Close ULP if there are no clients attached */
+ if ( list_empty ( &ulp->users ) )
+ fc_ulp_close ( ulp, rc );
+}
+
+/**
+ * Handle PRLI completion
+ *
+ * @v ulp Fibre Channel upper-layer protocol
+ * @v rc Reason for completion
+ */
+static void fc_ulp_prli_done ( struct fc_ulp *ulp, int rc ) {
+
+ intf_restart ( &ulp->prli, rc );
+
+ if ( rc != 0 )
+ fc_ulp_logout ( ulp, rc );
+}
+
+/**
+ * Examine Fibre Channel upper-layer protocol link state
+ *
+ * @ link Fibre Channel link state monitor
+ */
+static void fc_ulp_examine ( struct fc_link_state *link ) {
+ struct fc_ulp *ulp = container_of ( link, struct fc_ulp, link );
+ int rc;
+
+ /* Check to see if underlying peer link has gone down */
+ if ( ! fc_link_ok ( &ulp->peer->link ) ) {
+ fc_ulp_logout ( ulp, -ENOTCONN );
+ return;
+ }
+
+ /* Do nothing if already logged in */
+ if ( fc_link_ok ( &ulp->link ) &&
+ ( ulp->flags & FC_ULP_ORIGINATED_LOGIN_OK ) )
+ return;
+
+ DBGC ( ulp, "FCULP %s/%02x attempting login\n",
+ fc_ntoa ( &ulp->peer->port_wwn ), ulp->type );
+
+ /* Try to create PRLI ELS */
+ intf_restart ( &ulp->prli, -ECANCELED );
+ if ( ( rc = fc_els_prli ( &ulp->prli, ulp->peer->port,
+ &ulp->peer->port_id, ulp->type ) ) != 0 ) {
+ DBGC ( ulp, "FCULP %s/%02x could not initiate PRLI: %s\n",
+ fc_ntoa ( &ulp->peer->port_wwn ), ulp->type,
+ strerror ( rc ) );
+ fc_ulp_logout ( ulp, rc );
+ return;
+ }
+}
+
+/** Fibre Channel upper-layer protocol PRLI interface operations */
+static struct interface_operation fc_ulp_prli_op[] = {
+ INTF_OP ( intf_close, struct fc_ulp *, fc_ulp_prli_done ),
+};
+
+/** Fibre Channel upper-layer protocol PRLI interface descriptor */
+static struct interface_descriptor fc_ulp_prli_desc =
+ INTF_DESC ( struct fc_ulp, prli, fc_ulp_prli_op );
+
+/**
+ * Create Fibre Channel upper-layer protocl
+ *
+ * @v peer Fibre Channel peer
+ * @v type Type
+ * @ret ulp Fibre Channel upper-layer protocol, or NULL
+ */
+static struct fc_ulp * fc_ulp_create ( struct fc_peer *peer,
+ unsigned int type ) {
+ struct fc_ulp *ulp;
+
+ /* Allocate and initialise structure */
+ ulp = zalloc ( sizeof ( *ulp ) );
+ if ( ! ulp )
+ return NULL;
+ ref_init ( &ulp->refcnt, fc_ulp_free );
+ fc_link_init ( &ulp->link, fc_ulp_examine, &ulp->refcnt );
+ intf_init ( &ulp->prli, &fc_ulp_prli_desc, &ulp->refcnt );
+ ulp->peer = fc_peer_get ( peer );
+ list_add_tail ( &ulp->list, &peer->ulps );
+ ulp->type = type;
+ INIT_LIST_HEAD ( &ulp->users );
+
+ /* Start link state monitor */
+ fc_link_start ( &ulp->link );
+
+ DBGC ( ulp, "FCULP %s/%02x created\n",
+ fc_ntoa ( &ulp->peer->port_wwn ), ulp->type );
+ return ulp;
+}
+
+/**
+ * Get Fibre Channel upper-layer protocol by peer and type
+ *
+ * @v peer Fibre Channel peer
+ * @v type Type
+ * @ret ulp Fibre Channel upper-layer protocol, or NULL
+ */
+static struct fc_ulp * fc_ulp_get_type ( struct fc_peer *peer,
+ unsigned int type ) {
+ struct fc_ulp *ulp;
+
+ /* Look for an existing ULP */
+ list_for_each_entry ( ulp, &peer->ulps, list ) {
+ if ( ulp->type == type )
+ return fc_ulp_get ( ulp );
+ }
+
+ /* Create a new ULP */
+ ulp = fc_ulp_create ( peer, type );
+ if ( ! ulp )
+ return NULL;
+
+ return ulp;
+}
+
+/**
+ * Get Fibre Channel upper-layer protocol by port name and type
+ *
+ * @v port_wwn Port name
+ * @v type Type
+ * @ret ulp Fibre Channel upper-layer protocol, or NULL
+ */
+struct fc_ulp * fc_ulp_get_wwn_type ( const struct fc_name *port_wwn,
+ unsigned int type ) {
+ struct fc_ulp *ulp;
+ struct fc_peer *peer;
+
+ /* Get peer */
+ peer = fc_peer_get_wwn ( port_wwn );
+ if ( ! peer )
+ goto err_peer_get_wwn;
+
+ /* Get ULP */
+ ulp = fc_ulp_get_type ( peer, type );
+ if ( ! ulp )
+ goto err_ulp_get_type;
+
+ /* Drop temporary reference to peer */
+ fc_peer_put ( peer );
+
+ return ulp;
+
+ fc_ulp_put ( ulp );
+ err_ulp_get_type:
+ fc_peer_put ( peer );
+ err_peer_get_wwn:
+ return NULL;
+}
+
+/**
+ * Get Fibre Channel upper-layer protocol by port ID and type
+ *
+ * @v port Fibre Channel port
+ * @v peer_port_id Peer port ID
+ * @v type Type
+ * @ret ulp Fibre Channel upper-layer protocol, or NULL
+ */
+struct fc_ulp * fc_ulp_get_port_id_type ( struct fc_port *port,
+ const struct fc_port_id *peer_port_id,
+ unsigned int type ) {
+ struct fc_ulp *ulp;
+ struct fc_peer *peer;
+
+ /* Get peer */
+ peer = fc_peer_get_port_id ( port, peer_port_id );
+ if ( ! peer )
+ goto err_peer_get_wwn;
+
+ /* Get ULP */
+ ulp = fc_ulp_get_type ( peer, type );
+ if ( ! ulp )
+ goto err_ulp_get_type;
+
+ /* Drop temporary reference to peer */
+ fc_peer_put ( peer );
+
+ return ulp;
+
+ fc_ulp_put ( ulp );
+ err_ulp_get_type:
+ fc_peer_put ( peer );
+ err_peer_get_wwn:
+ return NULL;
+}
diff --git a/qemu/roms/ipxe/src/net/fcels.c b/qemu/roms/ipxe/src/net/fcels.c
new file mode 100644
index 000000000..1cfe90727
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/fcels.c
@@ -0,0 +1,1339 @@
+/*
+ * Copyright (C) 2010 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <byteswap.h>
+#include <ipxe/interface.h>
+#include <ipxe/xfer.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/process.h>
+#include <ipxe/fc.h>
+#include <ipxe/fcels.h>
+
+/** @file
+ *
+ * Fibre Channel Extended Link Services
+ *
+ */
+
+/** Fibre Channel ELS transaction debug message format */
+#define FCELS_FMT "FCELS %s %s %s %s"
+
+/** Fibre Channel ELS transaction debug message arguments */
+#define FCELS_ARGS( els ) \
+ (els)->port->name, \
+ ( (els)->handler ? (els)->handler->name : "unknown ELS" ), \
+ ( fc_els_is_request ( els ) ? "to" : "from" ), \
+ fc_id_ntoa ( &(els)->peer_port_id )
+
+struct fc_els_handler fc_els_unknown_handler __fc_els_handler;
+
+/**
+ * Free Fibre Channel ELS transaction
+ *
+ * @v refcnt Reference count
+ */
+static void fc_els_free ( struct refcnt *refcnt ) {
+ struct fc_els *els = container_of ( refcnt, struct fc_els, refcnt );
+
+ assert ( ! process_running ( &els->process ) );
+ fc_port_put ( els->port );
+ free ( els );
+}
+
+/**
+ * Close Fibre Channel ELS transaction
+ *
+ * @v els Fibre Channel ELS transaction
+ * @v rc Reason for close
+ */
+static void fc_els_close ( struct fc_els *els, int rc ) {
+
+ if ( rc != 0 ) {
+ DBGC ( els, FCELS_FMT " complete (%s)\n",
+ FCELS_ARGS ( els ), strerror ( rc ) );
+ }
+
+ /* Stop process */
+ process_del ( &els->process );
+
+ /* Shut down interfaces */
+ intf_shutdown ( &els->xchg, rc );
+ intf_shutdown ( &els->job, rc );
+}
+
+/**
+ * Detect Fibre Channel ELS frame handler
+ *
+ * @v els Fibre Channel ELS transaction
+ * @v command ELS command code
+ * @ret handler ELS handler, or NULL
+ */
+static struct fc_els_handler * fc_els_detect ( struct fc_els *els,
+ const void *data,
+ size_t len ) {
+ const struct fc_els_frame_common *frame = data;
+ struct fc_els_handler *handler;
+ int rc;
+
+ /* Sanity check */
+ if ( len < sizeof ( *frame ) )
+ return NULL;
+
+ /* Try each handler in turn */
+ for_each_table_entry ( handler, FC_ELS_HANDLERS ) {
+ if ( ( rc = handler->detect ( els, data, len ) ) == 0 )
+ return handler;
+ }
+
+ return NULL;
+}
+
+/**
+ * Transmit Fibre Channel ELS frame
+ *
+ * @v els Fibre Channel ELS transaction
+ * @v data Data to transmit
+ * @v len Length of data
+ * @ret rc Return status code
+ */
+int fc_els_tx ( struct fc_els *els, const void *data, size_t len ) {
+ struct xfer_metadata meta;
+ struct sockaddr_fc dest;
+ int rc;
+
+ DBGC2 ( els, FCELS_FMT " transmitting:\n", FCELS_ARGS ( els ) );
+ DBGC2_HDA ( els, 0, data, len );
+
+ /* Construct metadata */
+ memset ( &meta, 0, sizeof ( meta ) );
+ meta.flags = ( fc_els_is_request ( els ) ?
+ XFER_FL_OVER : ( XFER_FL_RESPONSE | XFER_FL_OUT ) );
+ meta.dest = fc_fill_sockaddr ( &dest, &els->peer_port_id );
+
+ /* Transmit frame */
+ if ( ( rc = xfer_deliver_raw_meta ( &els->xchg, data, len,
+ &meta ) ) != 0 ) {
+ DBGC ( els, FCELS_FMT " could not deliver frame: %s\n",
+ FCELS_ARGS ( els ), strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Receive Fibre Channel ELS frame
+ *
+ * @v els Fibre Channel ELS transaction
+ * @v iobuf I/O buffer
+ * @v meta Data transfer metadata
+ * @ret rc Return status code
+ */
+static int fc_els_rx ( struct fc_els *els,
+ struct io_buffer *iobuf,
+ struct xfer_metadata *meta ) {
+ struct fc_els_frame_common *frame = iobuf->data;
+ struct sockaddr_fc *src = ( ( struct sockaddr_fc * ) meta->src );
+ struct sockaddr_fc *dest = ( ( struct sockaddr_fc * ) meta->dest );
+ size_t len = iob_len ( iobuf );
+ int rc;
+
+ /* Sanity check */
+ if ( len < sizeof ( *frame ) ) {
+ DBGC ( els, FCELS_FMT " received underlength frame:\n",
+ FCELS_ARGS ( els ) );
+ DBGC_HDA ( els, 0, frame, len );
+ rc = -EINVAL;
+ goto done;
+ }
+ if ( ! src ) {
+ DBGC ( els, FCELS_FMT " received frame missing source "
+ "address:\n", FCELS_ARGS ( els ) );
+ rc = -EINVAL;
+ goto done;
+ }
+ if ( ! dest ) {
+ DBGC ( els, FCELS_FMT " received frame missing destination "
+ "address:\n", FCELS_ARGS ( els ) );
+ rc = -EINVAL;
+ goto done;
+ }
+
+ /* Check for rejection responses */
+ if ( fc_els_is_request ( els ) &&
+ ( frame->command != FC_ELS_LS_ACC ) ) {
+ DBGC ( els, FCELS_FMT " rejected:\n", FCELS_ARGS ( els ) );
+ DBGC_HDA ( els, 0, frame, len );
+ rc = -EACCES;
+ goto done;
+ }
+
+ /* Update port IDs */
+ memcpy ( &els->port_id, &dest->sfc_port_id, sizeof ( els->port_id ) );
+ memcpy ( &els->peer_port_id, &src->sfc_port_id,
+ sizeof ( els->peer_port_id ) );
+
+ /* Determine handler, if necessary */
+ if ( ! els->handler )
+ els->handler = fc_els_detect ( els, frame, len );
+ if ( ! els->handler )
+ els->handler = &fc_els_unknown_handler;
+
+ DBGC2 ( els, FCELS_FMT " received:\n", FCELS_ARGS ( els ) );
+ DBGC2_HDA ( els, 0, frame, len );
+
+ /* Handle received frame */
+ if ( ( rc = els->handler->rx ( els, frame, len ) ) != 0 ) {
+ DBGC ( els, FCELS_FMT " could not handle received frame: "
+ "%s\n", FCELS_ARGS ( els ), strerror ( rc ) );
+ DBGC_HDA ( els, 0, frame, len );
+ goto done;
+ }
+
+ done:
+ /* Free I/O buffer */
+ free_iob ( iobuf );
+
+ /* Close transaction */
+ fc_els_close ( els, rc );
+
+ return rc;
+}
+
+/** Fibre Channel ELS exchange interface operations */
+static struct interface_operation fc_els_xchg_op[] = {
+ INTF_OP ( xfer_deliver, struct fc_els *, fc_els_rx ),
+ INTF_OP ( intf_close, struct fc_els *, fc_els_close ),
+};
+
+/** Fibre Channel ELS exchange interface descriptor */
+static struct interface_descriptor fc_els_xchg_desc =
+ INTF_DESC ( struct fc_els, xchg, fc_els_xchg_op );
+
+/** Fibre Channel ELS job control interface operations */
+static struct interface_operation fc_els_job_op[] = {
+ INTF_OP ( intf_close, struct fc_els *, fc_els_close ),
+};
+
+/** Fibre Channel ELS job control interface descriptor */
+static struct interface_descriptor fc_els_job_desc =
+ INTF_DESC ( struct fc_els, job, fc_els_job_op );
+
+/**
+ * Fibre Channel ELS process
+ *
+ * @v els Fibre Channel ELS transaction
+ */
+static void fc_els_step ( struct fc_els *els ) {
+ int xchg_id;
+ int rc;
+
+ /* Sanity check */
+ assert ( fc_els_is_request ( els ) );
+
+ /* Create exchange */
+ if ( ( xchg_id = fc_xchg_originate ( &els->xchg, els->port,
+ &els->peer_port_id,
+ FC_TYPE_ELS ) ) < 0 ) {
+ rc = xchg_id;
+ DBGC ( els, FCELS_FMT " could not create exchange: %s\n",
+ FCELS_ARGS ( els ), strerror ( rc ) );
+ fc_els_close ( els, rc );
+ return;
+ }
+
+ /* Transmit request */
+ if ( ( rc = els->handler->tx ( els ) ) != 0 ) {
+ DBGC ( els, FCELS_FMT " could not transmit request: %s\n",
+ FCELS_ARGS ( els ), strerror ( rc ) );
+ fc_els_close ( els, rc );
+ return;
+ }
+}
+
+/** Fibre Channel ELS process descriptor */
+static struct process_descriptor fc_els_process_desc =
+ PROC_DESC_ONCE ( struct fc_els, process, fc_els_step );
+
+/**
+ * Create ELS transaction
+ *
+ * @v port Fibre Channel port
+ * @v port_id Local port ID
+ * @v peer_port_id Peer port ID
+ * @ret els Fibre Channel ELS transaction, or NULL
+ */
+static struct fc_els * fc_els_create ( struct fc_port *port,
+ struct fc_port_id *port_id,
+ struct fc_port_id *peer_port_id ) {
+ struct fc_els *els;
+
+ /* Allocate and initialise structure */
+ els = zalloc ( sizeof ( *els ) );
+ if ( ! els )
+ return NULL;
+ ref_init ( &els->refcnt, fc_els_free );
+ intf_init ( &els->job, &fc_els_job_desc, &els->refcnt );
+ intf_init ( &els->xchg, &fc_els_xchg_desc, &els->refcnt );
+ process_init_stopped ( &els->process, &fc_els_process_desc,
+ &els->refcnt );
+ els->port = fc_port_get ( port );
+ memcpy ( &els->port_id, port_id, sizeof ( els->port_id ) );
+ memcpy ( &els->peer_port_id, peer_port_id,
+ sizeof ( els->peer_port_id ) );
+ return els;
+}
+
+/**
+ * Create ELS request
+ *
+ * @v job Parent job-control interface
+ * @v port Fibre Channel port
+ * @v peer_port_id Peer port ID
+ * @v handler ELS handler
+ * @ret rc Return status code
+ */
+int fc_els_request ( struct interface *job, struct fc_port *port,
+ struct fc_port_id *peer_port_id,
+ struct fc_els_handler *handler ) {
+ struct fc_els *els;
+
+ /* Allocate and initialise structure */
+ els = fc_els_create ( port, &port->port_id, peer_port_id );
+ if ( ! els )
+ return -ENOMEM;
+ els->handler = handler;
+ els->flags = FC_ELS_REQUEST;
+ process_add ( &els->process );
+
+ /* Attach to parent job interface, mortalise self, and return */
+ intf_plug_plug ( &els->job, job );
+ ref_put ( &els->refcnt );
+ return 0;
+}
+
+/**
+ * Create ELS response
+ *
+ * @v xchg Exchange interface
+ * @v port Fibre Channel port
+ * @v port_id Local port ID
+ * @v peer_port_id Peer port ID
+ * @ret rc Return status code
+ */
+static int fc_els_respond ( struct interface *xchg, struct fc_port *port,
+ struct fc_port_id *port_id,
+ struct fc_port_id *peer_port_id ) {
+ struct fc_els *els;
+
+ /* Allocate and initialise structure */
+ els = fc_els_create ( port, port_id, peer_port_id );
+ if ( ! els )
+ return -ENOMEM;
+
+ /* Attach to exchange interface, mortalise self, and return */
+ intf_plug_plug ( &els->xchg, xchg );
+ ref_put ( &els->refcnt );
+ return 0;
+}
+
+/** Fibre Channel ELS responder */
+struct fc_responder fc_els_responder __fc_responder = {
+ .type = FC_TYPE_ELS,
+ .respond = fc_els_respond,
+};
+
+/******************************************************************************
+ *
+ * Unknown ELS handler
+ *
+ ******************************************************************************
+ */
+
+/**
+ * Transmit unknown ELS request
+ *
+ * @v els Fibre Channel ELS transaction
+ * @ret rc Return status code
+ */
+static int fc_els_unknown_tx ( struct fc_els *els __unused ) {
+ return -ENOTSUP;
+}
+
+/**
+ * Transmit unknown ELS response
+ *
+ * @v els Fibre Channel ELS transaction
+ * @ret rc Return status code
+ */
+static int fc_els_unknown_tx_response ( struct fc_els *els ) {
+ struct fc_ls_rjt_frame ls_rjt;
+
+ /* Construct LS_RJT */
+ memset ( &ls_rjt, 0, sizeof ( ls_rjt ) );
+ ls_rjt.command = FC_ELS_LS_RJT;
+ ls_rjt.reason = FC_ELS_RJT_UNSUPPORTED;
+
+ /* Transmit LS_RJT */
+ return fc_els_tx ( els, &ls_rjt, sizeof ( ls_rjt ) );
+}
+
+/**
+ * Receive unknown ELS
+ *
+ * @v els Fibre Channel ELS transaction
+ * @v data ELS frame
+ * @v len Length of ELS frame
+ * @ret rc Return status code
+ */
+static int fc_els_unknown_rx ( struct fc_els *els, void *data, size_t len ) {
+ int rc;
+
+ DBGC ( els, FCELS_FMT ":\n", FCELS_ARGS ( els ) );
+ DBGC_HDA ( els, 0, data, len );
+
+ /* Transmit response, if applicable */
+ if ( ! fc_els_is_request ( els ) ) {
+ if ( ( rc = fc_els_unknown_tx_response ( els ) ) != 0 )
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Detect unknown ELS
+ *
+ * @v els Fibre Channel ELS transaction
+ * @v data ELS frame
+ * @v len Length of ELS frame
+ * @ret rc Return status code
+ */
+static int fc_els_unknown_detect ( struct fc_els *els __unused,
+ const void *data __unused,
+ size_t len __unused ) {
+ return -ENOTSUP;
+}
+
+/** Unknown ELS handler */
+struct fc_els_handler fc_els_unknown_handler __fc_els_handler = {
+ .name = "UNKNOWN",
+ .tx = fc_els_unknown_tx,
+ .rx = fc_els_unknown_rx,
+ .detect = fc_els_unknown_detect,
+};
+
+/******************************************************************************
+ *
+ * FLOGI
+ *
+ ******************************************************************************
+ */
+
+/**
+ * Transmit FLOGI
+ *
+ * @v els Fibre Channel ELS transaction
+ * @ret rc Return status code
+ */
+static int fc_els_flogi_tx ( struct fc_els *els ) {
+ struct fc_login_frame flogi;
+
+ /* Construct FLOGI */
+ memset ( &flogi, 0, sizeof ( flogi ) );
+ flogi.command = fc_els_tx_command ( els, FC_ELS_FLOGI );
+ flogi.common.version = htons ( FC_LOGIN_VERSION );
+ flogi.common.credit = htons ( FC_LOGIN_DEFAULT_B2B );
+ flogi.common.flags = htons ( FC_LOGIN_CONTINUOUS_OFFSET );
+ flogi.common.mtu = htons ( FC_LOGIN_DEFAULT_MTU );
+ memcpy ( &flogi.port_wwn, &els->port->port_wwn,
+ sizeof ( flogi.port_wwn ) );
+ memcpy ( &flogi.node_wwn, &els->port->node_wwn,
+ sizeof ( flogi.node_wwn ) );
+ flogi.class3.flags = htons ( FC_LOGIN_CLASS_VALID |
+ FC_LOGIN_CLASS_SEQUENTIAL );
+
+ /* Transmit FLOGI */
+ return fc_els_tx ( els, &flogi, sizeof ( flogi ) );
+}
+
+/**
+ * Receive FLOGI
+ *
+ * @v els Fibre Channel ELS transaction
+ * @v data ELS frame
+ * @v len Length of ELS frame
+ * @ret rc Return status code
+ */
+static int fc_els_flogi_rx ( struct fc_els *els, void *data, size_t len ) {
+ struct fc_login_frame *flogi = data;
+ int has_fabric;
+ int rc;
+
+ /* Sanity check */
+ if ( len < sizeof ( *flogi ) ) {
+ DBGC ( els, FCELS_FMT " received underlength frame:\n",
+ FCELS_ARGS ( els ) );
+ DBGC_HDA ( els, 0, data, len );
+ return -EINVAL;
+ }
+
+ /* Extract parameters */
+ has_fabric = ( flogi->common.flags & htons ( FC_LOGIN_F_PORT ) );
+ DBGC ( els, FCELS_FMT " has node %s\n", FCELS_ARGS ( els ),
+ fc_ntoa ( &flogi->node_wwn ) );
+ DBGC ( els, FCELS_FMT " has port %s\n", FCELS_ARGS ( els ),
+ fc_ntoa ( &flogi->port_wwn ) );
+ if ( has_fabric ) {
+ DBGC ( els, FCELS_FMT " has fabric with", FCELS_ARGS ( els ) );
+ DBGC ( els, " local ID %s\n", fc_id_ntoa ( &els->port_id ) );
+ } else {
+ DBGC ( els, FCELS_FMT " has point-to-point link\n",
+ FCELS_ARGS ( els ) );
+ }
+
+ /* Log in port */
+ if ( ( rc = fc_port_login ( els->port, &els->port_id, &flogi->node_wwn,
+ &flogi->port_wwn, has_fabric ) ) != 0 ) {
+ DBGC ( els, FCELS_FMT " could not log in port: %s\n",
+ FCELS_ARGS ( els ), strerror ( rc ) );
+ return rc;
+ }
+
+ /* Send any responses to the newly-assigned peer port ID, if
+ * applicable.
+ */
+ if ( ! has_fabric ) {
+ memcpy ( &els->peer_port_id, &els->port->ptp_link_port_id,
+ sizeof ( els->peer_port_id ) );
+ }
+
+ /* Transmit response, if applicable */
+ if ( ! fc_els_is_request ( els ) ) {
+ if ( ( rc = fc_els_flogi_tx ( els ) ) != 0 )
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Detect FLOGI
+ *
+ * @v els Fibre Channel ELS transaction
+ * @v data ELS frame
+ * @v len Length of ELS frame
+ * @ret rc Return status code
+ */
+static int fc_els_flogi_detect ( struct fc_els *els __unused, const void *data,
+ size_t len __unused ) {
+ const struct fc_login_frame *flogi = data;
+
+ /* Check for FLOGI */
+ if ( flogi->command != FC_ELS_FLOGI )
+ return -EINVAL;
+
+ return 0;
+}
+
+/** FLOGI ELS handler */
+struct fc_els_handler fc_els_flogi_handler __fc_els_handler = {
+ .name = "FLOGI",
+ .tx = fc_els_flogi_tx,
+ .rx = fc_els_flogi_rx,
+ .detect = fc_els_flogi_detect,
+};
+
+/**
+ * Create FLOGI request
+ *
+ * @v parent Parent interface
+ * @v port Fibre Channel port
+ * @ret rc Return status code
+ */
+int fc_els_flogi ( struct interface *parent, struct fc_port *port ) {
+
+ return fc_els_request ( parent, port, &fc_f_port_id,
+ &fc_els_flogi_handler );
+}
+
+/******************************************************************************
+ *
+ * PLOGI
+ *
+ ******************************************************************************
+ */
+
+/**
+ * Transmit PLOGI
+ *
+ * @v els Fibre Channel ELS transaction
+ * @ret rc Return status code
+ */
+static int fc_els_plogi_tx ( struct fc_els *els ) {
+ struct fc_login_frame plogi;
+
+ /* Construct PLOGI */
+ memset ( &plogi, 0, sizeof ( plogi ) );
+ plogi.command = fc_els_tx_command ( els, FC_ELS_PLOGI );
+ plogi.common.version = htons ( FC_LOGIN_VERSION );
+ plogi.common.credit = htons ( FC_LOGIN_DEFAULT_B2B );
+ plogi.common.flags = htons ( FC_LOGIN_CONTINUOUS_OFFSET );
+ plogi.common.mtu = htons ( FC_LOGIN_DEFAULT_MTU );
+ plogi.common.u.plogi.max_seq = htons ( FC_LOGIN_DEFAULT_MAX_SEQ );
+ plogi.common.u.plogi.rel_offs = htons ( FC_LOGIN_DEFAULT_REL_OFFS );
+ plogi.common.e_d_tov = htonl ( FC_LOGIN_DEFAULT_E_D_TOV );
+ memcpy ( &plogi.port_wwn, &els->port->port_wwn,
+ sizeof ( plogi.port_wwn ) );
+ memcpy ( &plogi.node_wwn, &els->port->node_wwn,
+ sizeof ( plogi.node_wwn ) );
+ plogi.class3.flags = htons ( FC_LOGIN_CLASS_VALID |
+ FC_LOGIN_CLASS_SEQUENTIAL );
+ plogi.class3.mtu = htons ( FC_LOGIN_DEFAULT_MTU );
+ plogi.class3.max_seq = htons ( FC_LOGIN_DEFAULT_MAX_SEQ );
+ plogi.class3.max_seq_per_xchg = 1;
+
+ /* Transmit PLOGI */
+ return fc_els_tx ( els, &plogi, sizeof ( plogi ) );
+}
+
+/**
+ * Receive PLOGI
+ *
+ * @v els Fibre Channel ELS transaction
+ * @v data ELS frame
+ * @v len Length of ELS frame
+ * @ret rc Return status code
+ */
+static int fc_els_plogi_rx ( struct fc_els *els, void *data, size_t len ) {
+ struct fc_login_frame *plogi = data;
+ struct fc_peer *peer;
+ int rc;
+
+ /* Sanity checks */
+ if ( len < sizeof ( *plogi ) ) {
+ DBGC ( els, FCELS_FMT " received underlength frame:\n",
+ FCELS_ARGS ( els ) );
+ DBGC_HDA ( els, 0, data, len );
+ rc = -EINVAL;
+ goto err_sanity;
+ }
+ if ( ! fc_link_ok ( &els->port->link ) ) {
+ DBGC ( els, FCELS_FMT " received while port link is down\n",
+ FCELS_ARGS ( els ) );
+ rc = -EINVAL;
+ goto err_sanity;
+ }
+
+ /* Extract parameters */
+ DBGC ( els, FCELS_FMT " has node %s\n", FCELS_ARGS ( els ),
+ fc_ntoa ( &plogi->node_wwn ) );
+ DBGC ( els, FCELS_FMT " has port %s as %s\n",
+ FCELS_ARGS ( els ), fc_ntoa ( &plogi->port_wwn ),
+ fc_id_ntoa ( &els->peer_port_id ) );
+
+ /* Get peer */
+ peer = fc_peer_get_wwn ( &plogi->port_wwn );
+ if ( ! peer ) {
+ DBGC ( els, FCELS_FMT " could not create peer\n",
+ FCELS_ARGS ( els ) );
+ rc = -ENOMEM;
+ goto err_peer_get_wwn;
+ }
+
+ /* Record login */
+ if ( ( rc = fc_peer_login ( peer, els->port,
+ &els->peer_port_id ) ) != 0 ) {
+ DBGC ( els, FCELS_FMT " could not log in peer: %s\n",
+ FCELS_ARGS ( els ), strerror ( rc ) );
+ goto err_login;
+ }
+
+ /* Transmit response, if applicable */
+ if ( ! fc_els_is_request ( els ) ) {
+ if ( ( rc = fc_els_plogi_tx ( els ) ) != 0 )
+ goto err_plogi_tx;
+ }
+
+ /* Drop temporary reference to peer */
+ fc_peer_put ( peer );
+
+ return 0;
+
+ err_plogi_tx:
+ err_login:
+ fc_peer_put ( peer );
+ err_peer_get_wwn:
+ err_sanity:
+ return rc;
+}
+
+/**
+ * Detect PLOGI
+ *
+ * @v els Fibre Channel ELS transaction
+ * @v data ELS frame
+ * @v len Length of ELS frame
+ * @ret rc Return status code
+ */
+static int fc_els_plogi_detect ( struct fc_els *els __unused, const void *data,
+ size_t len __unused ) {
+ const struct fc_login_frame *plogi = data;
+
+ /* Check for PLOGI */
+ if ( plogi->command != FC_ELS_PLOGI )
+ return -EINVAL;
+
+ return 0;
+}
+
+/** PLOGI ELS handler */
+struct fc_els_handler fc_els_plogi_handler __fc_els_handler = {
+ .name = "PLOGI",
+ .tx = fc_els_plogi_tx,
+ .rx = fc_els_plogi_rx,
+ .detect = fc_els_plogi_detect,
+};
+
+/**
+ * Create PLOGI request
+ *
+ * @v parent Parent interface
+ * @v port Fibre Channel port
+ * @v peer_port_id Peer port ID
+ * @ret rc Return status code
+ */
+int fc_els_plogi ( struct interface *parent, struct fc_port *port,
+ struct fc_port_id *peer_port_id ) {
+
+ return fc_els_request ( parent, port, peer_port_id,
+ &fc_els_plogi_handler );
+}
+
+/******************************************************************************
+ *
+ * LOGO
+ *
+ ******************************************************************************
+ */
+
+/**
+ * Transmit LOGO request
+ *
+ * @v els Fibre Channel ELS transaction
+ * @ret rc Return status code
+ */
+static int fc_els_logo_tx ( struct fc_els *els ) {
+ struct fc_logout_request_frame logo;
+
+ /* Construct LOGO */
+ memset ( &logo, 0, sizeof ( logo ) );
+ logo.command = FC_ELS_LOGO;
+ memcpy ( &logo.port_id, &els->port->port_id, sizeof ( logo.port_id ) );
+ memcpy ( &logo.port_wwn, &els->port->port_wwn,
+ sizeof ( logo.port_wwn ) );
+
+ /* Transmit LOGO */
+ return fc_els_tx ( els, &logo, sizeof ( logo ) );
+}
+
+/**
+ * Transmit LOGO response
+ *
+ * @v els Fibre Channel ELS transaction
+ * @ret rc Return status code
+ */
+static int fc_els_logo_tx_response ( struct fc_els *els ) {
+ struct fc_logout_response_frame logo;
+
+ /* Construct LOGO */
+ memset ( &logo, 0, sizeof ( logo ) );
+ logo.command = FC_ELS_LS_ACC;
+
+ /* Transmit LOGO */
+ return fc_els_tx ( els, &logo, sizeof ( logo ) );
+}
+
+/**
+ * Log out individual peer or whole port as applicable
+ *
+ * @v els Fibre Channel ELS transaction
+ * @v port_id Peer port ID
+ */
+static void fc_els_logo_logout ( struct fc_els *els,
+ struct fc_port_id *peer_port_id ) {
+ struct fc_peer *peer;
+
+ if ( ( memcmp ( peer_port_id, &fc_f_port_id,
+ sizeof ( *peer_port_id ) ) == 0 ) ||
+ ( memcmp ( peer_port_id, &els->port->port_id,
+ sizeof ( *peer_port_id ) ) == 0 ) ) {
+ fc_port_logout ( els->port, 0 );
+ } else {
+ peer = fc_peer_get_port_id ( els->port, peer_port_id );
+ if ( peer ) {
+ fc_peer_logout ( peer, 0 );
+ fc_peer_put ( peer );
+ }
+ }
+}
+
+/**
+ * Receive LOGO request
+ *
+ * @v els Fibre Channel ELS transaction
+ * @v data ELS frame
+ * @v len Length of ELS frame
+ * @ret rc Return status code
+ */
+static int fc_els_logo_rx_request ( struct fc_els *els, void *data,
+ size_t len ) {
+ struct fc_logout_request_frame *logo = data;
+ int rc;
+
+ /* Sanity check */
+ if ( len < sizeof ( *logo ) ) {
+ DBGC ( els, FCELS_FMT " received underlength frame:\n",
+ FCELS_ARGS ( els ) );
+ DBGC_HDA ( els, 0, data, len );
+ return -EINVAL;
+ }
+
+ DBGC ( els, FCELS_FMT " has port %s as %s\n", FCELS_ARGS ( els ),
+ fc_ntoa ( &logo->port_wwn ), fc_id_ntoa ( &logo->port_id ) );
+
+ /* Log out individual peer or whole port as applicable */
+ fc_els_logo_logout ( els, &logo->port_id );
+
+ /* Transmit repsonse */
+ if ( ( rc = fc_els_logo_tx_response ( els ) ) != 0 )
+ return rc;
+
+ return 0;
+}
+
+/**
+ * Receive LOGO response
+ *
+ * @v els Fibre Channel ELS transaction
+ * @v data ELS frame
+ * @v len Length of ELS frame
+ * @ret rc Return status code
+ */
+static int fc_els_logo_rx_response ( struct fc_els *els, void *data __unused,
+ size_t len __unused ) {
+
+ /* Log out individual peer or whole port as applicable */
+ fc_els_logo_logout ( els, &els->peer_port_id );
+
+ return 0;
+}
+
+/**
+ * Receive LOGO
+ *
+ * @v els Fibre Channel ELS transaction
+ * @v data ELS frame
+ * @v len Length of ELS frame
+ * @ret rc Return status code
+ */
+static int fc_els_logo_rx ( struct fc_els *els, void *data, size_t len ) {
+
+ if ( fc_els_is_request ( els ) ) {
+ return fc_els_logo_rx_response ( els, data, len );
+ } else {
+ return fc_els_logo_rx_request ( els, data, len );
+ }
+}
+
+/**
+ * Detect LOGO
+ *
+ * @v els Fibre Channel ELS transaction
+ * @v data ELS frame
+ * @v len Length of ELS frame
+ * @ret rc Return status code
+ */
+static int fc_els_logo_detect ( struct fc_els *els __unused, const void *data,
+ size_t len __unused ) {
+ const struct fc_logout_request_frame *logo = data;
+
+ /* Check for LOGO */
+ if ( logo->command != FC_ELS_LOGO )
+ return -EINVAL;
+
+ return 0;
+}
+
+/** LOGO ELS handler */
+struct fc_els_handler fc_els_logo_handler __fc_els_handler = {
+ .name = "LOGO",
+ .tx = fc_els_logo_tx,
+ .rx = fc_els_logo_rx,
+ .detect = fc_els_logo_detect,
+};
+
+/**
+ * Create LOGO request
+ *
+ * @v parent Parent interface
+ * @v port Fibre Channel port
+ * @v peer_port_id Peer port ID
+ * @ret rc Return status code
+ */
+int fc_els_logo ( struct interface *parent, struct fc_port *port,
+ struct fc_port_id *peer_port_id ) {
+
+ return fc_els_request ( parent, port, peer_port_id,
+ &fc_els_logo_handler );
+}
+
+/******************************************************************************
+ *
+ * PRLI
+ *
+ ******************************************************************************
+ */
+
+/**
+ * Find PRLI descriptor
+ *
+ * @v type Upper-layer protocol type
+ * @ret descriptor PRLI descriptor, or NULL
+ */
+static struct fc_els_prli_descriptor *
+fc_els_prli_descriptor ( unsigned int type ) {
+ struct fc_els_prli_descriptor *descriptor;
+
+ for_each_table_entry ( descriptor, FC_ELS_PRLI_DESCRIPTORS ) {
+ if ( descriptor->type == type )
+ return descriptor;
+ }
+ return NULL;
+}
+
+/**
+ * Transmit PRLI
+ *
+ * @v els Fibre Channel ELS transaction
+ * @v descriptor ELS PRLI descriptor
+ * @v param Service parameters
+ * @ret rc Return status code
+ */
+int fc_els_prli_tx ( struct fc_els *els,
+ struct fc_els_prli_descriptor *descriptor, void *param ) {
+ struct {
+ struct fc_prli_frame frame;
+ uint8_t param[descriptor->param_len];
+ } __attribute__ (( packed )) prli;
+ struct fc_ulp *ulp;
+ int rc;
+
+ /* Get ULP */
+ ulp = fc_ulp_get_port_id_type ( els->port, &els->peer_port_id,
+ descriptor->type );
+ if ( ! ulp ) {
+ rc = -ENOMEM;
+ goto err_get_port_id_type;
+ }
+
+ /* Build frame for transmission */
+ memset ( &prli, 0, sizeof ( prli ) );
+ prli.frame.command = fc_els_tx_command ( els, FC_ELS_PRLI );
+ prli.frame.page_len =
+ ( sizeof ( prli.frame.page ) + sizeof ( prli.param ) );
+ prli.frame.len = htons ( sizeof ( prli ) );
+ prli.frame.page.type = descriptor->type;
+ if ( fc_els_is_request ( els ) ) {
+ prli.frame.page.flags |= htons ( FC_PRLI_ESTABLISH );
+ } else if ( fc_link_ok ( &ulp->link ) ) {
+ prli.frame.page.flags |= htons ( FC_PRLI_ESTABLISH |
+ FC_PRLI_RESPONSE_SUCCESS );
+ }
+ memcpy ( &prli.param, param, sizeof ( prli.param ) );
+
+ /* Transmit frame */
+ if ( ( rc = fc_els_tx ( els, &prli, sizeof ( prli ) ) ) != 0 )
+ goto err_tx;
+
+ /* Drop temporary reference to ULP */
+ fc_ulp_put ( ulp );
+
+ return 0;
+
+ err_tx:
+ fc_ulp_put ( ulp );
+ err_get_port_id_type:
+ return rc;
+}
+
+/**
+ * Receive PRLI
+ *
+ * @v els Fibre Channel ELS transaction
+ * @v descriptor ELS PRLI descriptor
+ * @v frame ELS frame
+ * @v len Length of ELS frame
+ * @ret rc Return status code
+ */
+int fc_els_prli_rx ( struct fc_els *els,
+ struct fc_els_prli_descriptor *descriptor,
+ void *data, size_t len ) {
+ struct {
+ struct fc_prli_frame frame;
+ uint8_t param[descriptor->param_len];
+ } __attribute__ (( packed )) *prli = data;
+ struct fc_ulp *ulp;
+ int rc;
+
+ /* Sanity check */
+ if ( len < sizeof ( *prli ) ) {
+ DBGC ( els, FCELS_FMT " received underlength frame:\n",
+ FCELS_ARGS ( els ) );
+ DBGC_HDA ( els, 0, data, len );
+ rc = -EINVAL;
+ goto err_sanity;
+ }
+
+ DBGC ( els, FCELS_FMT " has parameters:\n", FCELS_ARGS ( els ) );
+ DBGC_HDA ( els, 0, prli->param, sizeof ( prli->param ) );
+
+ /* Get ULP */
+ ulp = fc_ulp_get_port_id_type ( els->port, &els->peer_port_id,
+ descriptor->type );
+ if ( ! ulp ) {
+ rc = -ENOMEM;
+ goto err_get_port_id_type;
+ }
+
+ /* Sanity check */
+ if ( ! fc_link_ok ( &ulp->peer->link ) ) {
+ DBGC ( els, FCELS_FMT " received while peer link is down\n",
+ FCELS_ARGS ( els ) );
+ rc = -EINVAL;
+ goto err_link;
+ }
+
+ /* Log in ULP, if applicable */
+ if ( prli->frame.page.flags & htons ( FC_PRLI_ESTABLISH ) ) {
+ if ( ( rc = fc_ulp_login ( ulp, prli->param,
+ sizeof ( prli->param ),
+ fc_els_is_request ( els ) ) ) != 0 ){
+ DBGC ( els, FCELS_FMT " could not log in ULP: %s\n",
+ FCELS_ARGS ( els ), strerror ( rc ) );
+ goto err_login;
+ }
+ } else {
+ if ( fc_els_is_request ( els ) ) {
+ fc_ulp_logout ( ulp, -EACCES );
+ } else {
+ /* This is just an information-gathering PRLI; do not
+ * log in or out
+ */
+ }
+ }
+
+ /* Transmit response, if applicable */
+ if ( ! fc_els_is_request ( els ) ) {
+ if ( ( rc = els->handler->tx ( els ) ) != 0 )
+ goto err_tx;
+ }
+
+ /* Drop temporary reference to ULP */
+ fc_ulp_put ( ulp );
+
+ return 0;
+
+ err_tx:
+ err_login:
+ err_link:
+ fc_ulp_put ( ulp );
+ err_get_port_id_type:
+ err_sanity:
+ return rc;
+}
+
+/**
+ * Detect PRLI
+ *
+ * @v els Fibre Channel ELS transaction
+ * @v descriptor ELS PRLI descriptor
+ * @v data ELS frame
+ * @v len Length of ELS frame
+ * @ret rc Return status code
+ */
+int fc_els_prli_detect ( struct fc_els *els __unused,
+ struct fc_els_prli_descriptor *descriptor,
+ const void *data, size_t len ) {
+ const struct {
+ struct fc_prli_frame frame;
+ uint8_t param[descriptor->param_len];
+ } __attribute__ (( packed )) *prli = data;
+
+ /* Check for PRLI */
+ if ( prli->frame.command != FC_ELS_PRLI )
+ return -EINVAL;
+
+ /* Check for sufficient length to contain service parameter page */
+ if ( len < sizeof ( *prli ) )
+ return -EINVAL;
+
+ /* Check for upper-layer protocol type */
+ if ( prli->frame.page.type != descriptor->type )
+ return -EINVAL;
+
+ return 0;
+}
+
+/**
+ * Create PRLI request
+ *
+ * @v parent Parent interface
+ * @v port Fibre Channel port
+ * @v peer_port_id Peer port ID
+ * @v type Upper-layer protocol type
+ * @ret rc Return status code
+ */
+int fc_els_prli ( struct interface *parent, struct fc_port *port,
+ struct fc_port_id *peer_port_id, unsigned int type ) {
+ struct fc_els_prli_descriptor *descriptor;
+
+ /* Find a PRLI descriptor */
+ descriptor = fc_els_prli_descriptor ( type );
+ if ( ! descriptor )
+ return -ENOTSUP;
+
+ return fc_els_request ( parent, port, peer_port_id,
+ descriptor->handler );
+}
+
+/******************************************************************************
+ *
+ * RTV
+ *
+ ******************************************************************************
+ */
+
+/**
+ * Transmit RTV response
+ *
+ * @v els Fibre Channel ELS transaction
+ * @ret rc Return status code
+ */
+static int fc_els_rtv_tx_response ( struct fc_els *els ) {
+ struct fc_rtv_response_frame rtv;
+
+ /* Construct RTV */
+ memset ( &rtv, 0, sizeof ( rtv ) );
+ rtv.command = FC_ELS_LS_ACC;
+ rtv.e_d_tov = htonl ( FC_LOGIN_DEFAULT_E_D_TOV );
+
+ /* Transmit RTV */
+ return fc_els_tx ( els, &rtv, sizeof ( rtv ) );
+}
+
+/**
+ * Receive RTV
+ *
+ * @v els Fibre Channel ELS transaction
+ * @v data ELS frame
+ * @v len Length of ELS frame
+ * @ret rc Return status code
+ */
+static int fc_els_rtv_rx ( struct fc_els *els, void *data __unused,
+ size_t len __unused ) {
+ int rc;
+
+ DBGC ( els, FCELS_FMT "\n", FCELS_ARGS ( els ) );
+
+ /* Transmit response */
+ if ( ! fc_els_is_request ( els ) ) {
+ if ( ( rc = fc_els_rtv_tx_response ( els ) ) != 0 )
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Detect RTV
+ *
+ * @v els Fibre Channel ELS transaction
+ * @v data ELS frame
+ * @v len Length of ELS frame
+ * @ret rc Return status code
+ */
+static int fc_els_rtv_detect ( struct fc_els *els __unused, const void *data,
+ size_t len __unused ) {
+ const struct fc_rtv_request_frame *rtv = data;
+
+ /* Check for RTV */
+ if ( rtv->command != FC_ELS_RTV )
+ return -EINVAL;
+
+ return 0;
+}
+
+/** RTV ELS handler */
+struct fc_els_handler fc_els_rtv_handler __fc_els_handler = {
+ .name = "RTV",
+ .tx = fc_els_unknown_tx,
+ .rx = fc_els_rtv_rx,
+ .detect = fc_els_rtv_detect,
+};
+
+/******************************************************************************
+ *
+ * ECHO
+ *
+ ******************************************************************************
+ */
+
+/** ECHO request data */
+struct fc_echo_request_frame {
+ /** ECHO frame header */
+ struct fc_echo_frame_header echo;
+ /** Magic marker */
+ uint32_t magic;
+} __attribute__ (( packed ));
+
+/** ECHO magic marker */
+#define FC_ECHO_MAGIC 0x69505845
+
+/**
+ * Transmit ECHO
+ *
+ * @v els Fibre Channel ELS transaction
+ * @ret rc Return status code
+ */
+static int fc_els_echo_tx ( struct fc_els *els ) {
+ struct fc_echo_request_frame echo;
+
+ /* Construct ECHO */
+ memset ( &echo, 0, sizeof ( echo ) );
+ echo.echo.command = FC_ELS_ECHO;
+ echo.magic = htonl ( FC_ECHO_MAGIC );
+
+ /* Transmit ECHO */
+ return fc_els_tx ( els, &echo, sizeof ( echo ) );
+}
+
+/**
+ * Receive ECHO request
+ *
+ * @v els Fibre Channel ELS transaction
+ * @v data ELS frame
+ * @v len Length of ELS frame
+ * @ret rc Return status code
+ */
+static int fc_els_echo_rx_request ( struct fc_els *els, void *data,
+ size_t len ) {
+ struct {
+ struct fc_echo_frame_header echo;
+ char payload[ len - sizeof ( struct fc_echo_frame_header ) ];
+ } *echo = data;
+ int rc;
+
+ DBGC ( els, FCELS_FMT "\n", FCELS_ARGS ( els ) );
+
+ /* Transmit response */
+ echo->echo.command = FC_ELS_LS_ACC;
+ if ( ( rc = fc_els_tx ( els, echo, sizeof ( *echo ) ) ) != 0 )
+ return rc;
+
+ /* Nothing to do */
+ return 0;
+}
+
+/**
+ * Receive ECHO response
+ *
+ * @v els Fibre Channel ELS transaction
+ * @v data ELS frame
+ * @v len Length of ELS frame
+ * @ret rc Return status code
+ */
+static int fc_els_echo_rx_response ( struct fc_els *els, void *data,
+ size_t len ) {
+ struct fc_echo_request_frame *echo = data;
+
+ DBGC ( els, FCELS_FMT "\n", FCELS_ARGS ( els ) );
+
+ /* Check response is correct */
+ if ( ( len != sizeof ( *echo ) ) ||
+ ( echo->magic != htonl ( FC_ECHO_MAGIC ) ) ) {
+ DBGC ( els, FCELS_FMT " received bad echo response\n",
+ FCELS_ARGS ( els ) );
+ DBGC_HDA ( els, 0, data, len );
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/**
+ * Receive ECHO
+ *
+ * @v els Fibre Channel ELS transaction
+ * @v data ELS frame
+ * @v len Length of ELS frame
+ * @ret rc Return status code
+ */
+static int fc_els_echo_rx ( struct fc_els *els, void *data, size_t len ) {
+
+ if ( fc_els_is_request ( els ) ) {
+ return fc_els_echo_rx_response ( els, data, len );
+ } else {
+ return fc_els_echo_rx_request ( els, data, len );
+ }
+}
+
+/**
+ * Detect ECHO
+ *
+ * @v els Fibre Channel ELS transaction
+ * @v data ELS frame
+ * @v len Length of ELS frame
+ * @ret rc Return status code
+ */
+static int fc_els_echo_detect ( struct fc_els *els __unused, const void *data,
+ size_t len __unused ) {
+ const struct fc_echo_frame_header *echo = data;
+
+ /* Check for ECHO */
+ if ( echo->command != FC_ELS_ECHO )
+ return -EINVAL;
+
+ return 0;
+}
+
+/** ECHO ELS handler */
+struct fc_els_handler fc_els_echo_handler __fc_els_handler = {
+ .name = "ECHO",
+ .tx = fc_els_echo_tx,
+ .rx = fc_els_echo_rx,
+ .detect = fc_els_echo_detect,
+};
diff --git a/qemu/roms/ipxe/src/net/fcns.c b/qemu/roms/ipxe/src/net/fcns.c
new file mode 100644
index 000000000..3ca4ad557
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/fcns.c
@@ -0,0 +1,241 @@
+/*
+ * Copyright (C) 2010 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <byteswap.h>
+#include <ipxe/interface.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/process.h>
+#include <ipxe/xfer.h>
+#include <ipxe/fc.h>
+#include <ipxe/fcns.h>
+
+/** @file
+ *
+ * Fibre Channel name server lookups
+ *
+ */
+
+/** A Fibre Channel name server query */
+struct fc_ns_query {
+ /** Reference count */
+ struct refcnt refcnt;
+ /** Fibre Channel exchange */
+ struct interface xchg;
+
+ /** Fibre Channel peer */
+ struct fc_peer *peer;
+ /** Fibre Channel port */
+ struct fc_port *port;
+
+ /** Process */
+ struct process process;
+ /** Success handler
+ *
+ * @v peer Fibre Channel peer
+ * @v port Fibre Channel port
+ * @v peer_port_id Peer port ID
+ * @ret rc Return status code
+ */
+ int ( * done ) ( struct fc_peer *peer, struct fc_port *port,
+ struct fc_port_id *peer_port_id );
+};
+
+/**
+ * Free name server query
+ *
+ * @v refcnt Reference count
+ */
+static void fc_ns_query_free ( struct refcnt *refcnt ) {
+ struct fc_ns_query *query =
+ container_of ( refcnt, struct fc_ns_query, refcnt );
+
+ fc_peer_put ( query->peer );
+ fc_port_put ( query->port );
+ free ( query );
+}
+
+/**
+ * Close name server query
+ *
+ * @v query Name server query
+ * @v rc Reason for close
+ */
+static void fc_ns_query_close ( struct fc_ns_query *query, int rc ) {
+
+ /* Stop process */
+ process_del ( &query->process );
+
+ /* Shut down interfaces */
+ intf_shutdown ( &query->xchg, rc );
+}
+
+/**
+ * Receive name server query response
+ *
+ * @v query Name server query
+ * @v iobuf I/O buffer
+ * @v meta Data transfer metadata
+ * @ret rc Return status code
+ */
+static int fc_ns_query_deliver ( struct fc_ns_query *query,
+ struct io_buffer *iobuf,
+ struct xfer_metadata *meta __unused ) {
+ union fc_ns_response *resp = iobuf->data;
+ struct fc_port_id *peer_port_id;
+ int rc;
+
+ /* Sanity check */
+ if ( iob_len ( iobuf ) < sizeof ( resp->ct ) ) {
+ DBGC ( query, "FCNS %p received underlength response (%zd "
+ "bytes)\n", query, iob_len ( iobuf ) );
+ rc = -EINVAL;
+ goto done;
+ }
+
+ /* Handle response */
+ switch ( ntohs ( resp->ct.code ) ) {
+ case FC_GS_ACCEPT:
+ if ( iob_len ( iobuf ) < sizeof ( resp->gid_pn ) ) {
+ DBGC ( query, "FCNS %p received underlength accept "
+ "response (%zd bytes)\n",
+ query, iob_len ( iobuf ) );
+ rc = -EINVAL;
+ goto done;
+ }
+ peer_port_id = &resp->gid_pn.port_id.port_id;
+ DBGC ( query, "FCNS %p resolved %s to %s via %s\n",
+ query, fc_ntoa ( &query->peer->port_wwn ),
+ fc_id_ntoa ( peer_port_id ), query->port->name );
+ if ( ( rc = query->done ( query->peer, query->port,
+ peer_port_id ) ) != 0 )
+ goto done;
+ break;
+ case FC_GS_REJECT:
+ DBGC ( query, "FCNS %p rejected (reason %02x explanation "
+ "%02x)\n", query, resp->reject.ct.reason,
+ resp->reject.ct.explanation );
+ break;
+ default:
+ DBGC ( query, "FCNS %p received invalid response code %04x\n",
+ query, ntohs ( resp->ct.code ) );
+ rc = -ENOTSUP;
+ goto done;
+ }
+
+ rc = 0;
+ done:
+ free_iob ( iobuf );
+ fc_ns_query_close ( query, rc );
+ return rc;
+}
+
+/**
+ * Name server query process
+ *
+ * @v query Name server query
+ */
+static void fc_ns_query_step ( struct fc_ns_query *query ) {
+ struct xfer_metadata meta;
+ struct fc_ns_gid_pn_request gid_pn;
+ int xchg_id;
+ int rc;
+
+ /* Create exchange */
+ if ( ( xchg_id = fc_xchg_originate ( &query->xchg, query->port,
+ &fc_gs_port_id,
+ FC_TYPE_CT ) ) < 0 ) {
+ rc = xchg_id;
+ DBGC ( query, "FCNS %p could not create exchange: %s\n",
+ query, strerror ( rc ) );
+ fc_ns_query_close ( query, rc );
+ return;
+ }
+
+ /* Construct query request */
+ memset ( &gid_pn, 0, sizeof ( gid_pn ) );
+ gid_pn.ct.revision = FC_CT_REVISION;
+ gid_pn.ct.type = FC_GS_TYPE_DS;
+ gid_pn.ct.subtype = FC_DS_SUBTYPE_NAME;
+ gid_pn.ct.code = htons ( FC_NS_GET ( FC_NS_PORT_NAME, FC_NS_PORT_ID ));
+ memcpy ( &gid_pn.port_wwn, &query->peer->port_wwn,
+ sizeof ( gid_pn.port_wwn ) );
+ memset ( &meta, 0, sizeof ( meta ) );
+ meta.flags = XFER_FL_OVER;
+
+ /* Send query */
+ if ( ( rc = xfer_deliver_raw_meta ( &query->xchg, &gid_pn,
+ sizeof ( gid_pn ), &meta ) ) != 0){
+ DBGC ( query, "FCNS %p could not deliver query: %s\n",
+ query, strerror ( rc ) );
+ fc_ns_query_close ( query, rc );
+ return;
+ }
+}
+
+/** Name server exchange interface operations */
+static struct interface_operation fc_ns_query_xchg_op[] = {
+ INTF_OP ( xfer_deliver, struct fc_ns_query *, fc_ns_query_deliver ),
+ INTF_OP ( intf_close, struct fc_ns_query *, fc_ns_query_close ),
+};
+
+/** Name server exchange interface descriptor */
+static struct interface_descriptor fc_ns_query_xchg_desc =
+ INTF_DESC ( struct fc_ns_query, xchg, fc_ns_query_xchg_op );
+
+/** Name server process descriptor */
+static struct process_descriptor fc_ns_query_process_desc =
+ PROC_DESC_ONCE ( struct fc_ns_query, process, fc_ns_query_step );
+
+/**
+ * Issue Fibre Channel name server query
+ *
+ * @v peer Fibre Channel peer
+ * @v port Fibre Channel port
+ * @ret rc Return status code
+ */
+int fc_ns_query ( struct fc_peer *peer, struct fc_port *port,
+ int ( * done ) ( struct fc_peer *peer, struct fc_port *port,
+ struct fc_port_id *peer_port_id ) ) {
+ struct fc_ns_query *query;
+
+ /* Allocate and initialise structure */
+ query = zalloc ( sizeof ( *query ) );
+ if ( ! query )
+ return -ENOMEM;
+ ref_init ( &query->refcnt, fc_ns_query_free );
+ intf_init ( &query->xchg, &fc_ns_query_xchg_desc, &query->refcnt );
+ process_init ( &query->process, &fc_ns_query_process_desc,
+ &query->refcnt );
+ query->peer = fc_peer_get ( peer );
+ query->port = fc_port_get ( port );
+ query->done = done;
+
+ DBGC ( query, "FCNS %p querying %s via %s\n",
+ query, fc_ntoa ( &query->peer->port_wwn ), port->name );
+
+ /* Mortalise self and return */
+ ref_put ( &query->refcnt );
+ return 0;
+}
diff --git a/qemu/roms/ipxe/src/net/fcoe.c b/qemu/roms/ipxe/src/net/fcoe.c
new file mode 100644
index 000000000..e9e404ec3
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/fcoe.c
@@ -0,0 +1,1229 @@
+/*
+ * Copyright (C) 2010 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <byteswap.h>
+#include <ipxe/if_ether.h>
+#include <ipxe/if_arp.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/interface.h>
+#include <ipxe/xfer.h>
+#include <ipxe/netdevice.h>
+#include <ipxe/ethernet.h>
+#include <ipxe/vlan.h>
+#include <ipxe/features.h>
+#include <ipxe/errortab.h>
+#include <ipxe/device.h>
+#include <ipxe/crc32.h>
+#include <ipxe/retry.h>
+#include <ipxe/timer.h>
+#include <ipxe/fc.h>
+#include <ipxe/fip.h>
+#include <ipxe/fcoe.h>
+
+/** @file
+ *
+ * FCoE protocol
+ *
+ */
+
+FEATURE ( FEATURE_PROTOCOL, "FCoE", DHCP_EB_FEATURE_FCOE, 1 );
+
+/* Disambiguate the various error causes */
+#define EINVAL_UNDERLENGTH __einfo_error ( EINFO_EINVAL_UNDERLENGTH )
+#define EINFO_EINVAL_UNDERLENGTH \
+ __einfo_uniqify ( EINFO_EINVAL, 0x01, "Underlength packet" )
+#define EINVAL_SOF __einfo_error ( EINFO_EINVAL_SOF )
+#define EINFO_EINVAL_SOF \
+ __einfo_uniqify ( EINFO_EINVAL, 0x02, "Invalid SoF delimiter" )
+#define EINVAL_CRC __einfo_error ( EINFO_EINVAL_CRC )
+#define EINFO_EINVAL_CRC \
+ __einfo_uniqify ( EINFO_EINVAL, 0x03, "Invalid CRC (not stripped?)" )
+#define EINVAL_EOF __einfo_error ( EINFO_EINVAL_EOF )
+#define EINFO_EINVAL_EOF \
+ __einfo_uniqify ( EINFO_EINVAL, 0x04, "Invalid EoF delimiter" )
+
+/** An FCoE port */
+struct fcoe_port {
+ /** Reference count */
+ struct refcnt refcnt;
+ /** List of FCoE ports */
+ struct list_head list;
+ /** Transport interface */
+ struct interface transport;
+ /** Network device */
+ struct net_device *netdev;
+
+ /** Node WWN */
+ union fcoe_name node_wwn;
+ /** Port WWN */
+ union fcoe_name port_wwn;
+
+ /** FIP retransmission timer */
+ struct retry_timer timer;
+ /** FIP timeout counter */
+ unsigned int timeouts;
+ /** Flags */
+ unsigned int flags;
+ /** FCoE forwarder priority */
+ unsigned int priority;
+ /** Keepalive delay (in ms) */
+ unsigned int keepalive;
+ /** FCoE forwarder MAC address */
+ uint8_t fcf_mac[ETH_ALEN];
+ /** Local MAC address */
+ uint8_t local_mac[ETH_ALEN];
+};
+
+/** FCoE flags */
+enum fcoe_flags {
+ /** Underlying network device is available */
+ FCOE_HAVE_NETWORK = 0x0001,
+ /** We have selected an FCoE forwarder to use */
+ FCOE_HAVE_FCF = 0x0002,
+ /** We have a FIP-capable FCoE forwarder available to be used */
+ FCOE_HAVE_FIP_FCF = 0x0004,
+ /** FCoE forwarder supports server-provided MAC addresses */
+ FCOE_FCF_ALLOWS_SPMA = 0x0008,
+ /** An alternative VLAN has been found */
+ FCOE_VLAN_FOUND = 0x0010,
+ /** VLAN discovery has timed out */
+ FCOE_VLAN_TIMED_OUT = 0x0020,
+};
+
+struct net_protocol fcoe_protocol __net_protocol;
+struct net_protocol fip_protocol __net_protocol;
+
+/** FCoE All-FCoE-MACs address */
+static uint8_t all_fcoe_macs[ETH_ALEN] =
+ { 0x01, 0x10, 0x18, 0x01, 0x00, 0x00 };
+
+/** FCoE All-ENode-MACs address */
+static uint8_t all_enode_macs[ETH_ALEN] =
+ { 0x01, 0x10, 0x18, 0x01, 0x00, 0x01 };
+
+/** FCoE All-FCF-MACs address */
+static uint8_t all_fcf_macs[ETH_ALEN] =
+ { 0x01, 0x10, 0x18, 0x01, 0x00, 0x02 };
+
+/** Default FCoE forwarded MAC address */
+static uint8_t default_fcf_mac[ETH_ALEN] =
+ { 0x0e, 0xfc, 0x00, 0xff, 0xff, 0xfe };
+
+/** Maximum number of VLAN requests before giving up on VLAN discovery */
+#define FCOE_MAX_VLAN_REQUESTS 2
+
+/** Delay between retrying VLAN requests */
+#define FCOE_VLAN_RETRY_DELAY ( TICKS_PER_SEC )
+
+/** Delay between retrying polling VLAN requests */
+#define FCOE_VLAN_POLL_DELAY ( 30 * TICKS_PER_SEC )
+
+/** Maximum number of FIP solicitations before giving up on FIP */
+#define FCOE_MAX_FIP_SOLICITATIONS 2
+
+/** Delay between retrying FIP solicitations */
+#define FCOE_FIP_RETRY_DELAY ( TICKS_PER_SEC )
+
+/** Maximum number of missing discovery advertisements */
+#define FCOE_MAX_FIP_MISSING_KEEPALIVES 4
+
+/** List of FCoE ports */
+static LIST_HEAD ( fcoe_ports );
+
+/******************************************************************************
+ *
+ * FCoE protocol
+ *
+ ******************************************************************************
+ */
+
+/**
+ * Identify FCoE port by network device
+ *
+ * @v netdev Network device
+ * @ret fcoe FCoE port, or NULL
+ */
+static struct fcoe_port * fcoe_demux ( struct net_device *netdev ) {
+ struct fcoe_port *fcoe;
+
+ list_for_each_entry ( fcoe, &fcoe_ports, list ) {
+ if ( fcoe->netdev == netdev )
+ return fcoe;
+ }
+ return NULL;
+}
+
+/**
+ * Reset FCoE port
+ *
+ * @v fcoe FCoE port
+ */
+static void fcoe_reset ( struct fcoe_port *fcoe ) {
+
+ /* Detach FC port, if any */
+ intf_restart ( &fcoe->transport, -ECANCELED );
+
+ /* Reset any FIP state */
+ stop_timer ( &fcoe->timer );
+ fcoe->timeouts = 0;
+ fcoe->flags = 0;
+ fcoe->priority = ( FIP_LOWEST_PRIORITY + 1 );
+ fcoe->keepalive = 0;
+ memcpy ( fcoe->fcf_mac, default_fcf_mac,
+ sizeof ( fcoe->fcf_mac ) );
+ memcpy ( fcoe->local_mac, fcoe->netdev->ll_addr,
+ sizeof ( fcoe->local_mac ) );
+
+ /* Start FIP solicitation if network is available */
+ if ( netdev_is_open ( fcoe->netdev ) &&
+ netdev_link_ok ( fcoe->netdev ) ) {
+ fcoe->flags |= FCOE_HAVE_NETWORK;
+ start_timer_nodelay ( &fcoe->timer );
+ DBGC ( fcoe, "FCoE %s starting %s\n", fcoe->netdev->name,
+ ( vlan_can_be_trunk ( fcoe->netdev ) ?
+ "VLAN discovery" : "FIP solicitation" ) );
+ }
+
+ /* Send notification of window change */
+ xfer_window_changed ( &fcoe->transport );
+}
+
+/**
+ * Transmit FCoE packet
+ *
+ * @v fcoe FCoE port
+ * @v iobuf I/O buffer
+ * @v meta Data transfer metadata
+ * @ret rc Return status code
+ */
+static int fcoe_deliver ( struct fcoe_port *fcoe,
+ struct io_buffer *iobuf,
+ struct xfer_metadata *meta __unused ) {
+ struct fc_frame_header *fchdr = iobuf->data;
+ struct fc_els_frame_common *els = ( iobuf->data + sizeof ( *fchdr ) );
+ struct fcoe_header *fcoehdr;
+ struct fcoe_footer *fcoeftr;
+ struct fip_header *fiphdr;
+ struct fip_login *fipflogi;
+ struct fip_mac_address *fipmac;
+ uint32_t crc;
+ struct net_protocol *net_protocol;
+ void *ll_source;
+ int rc;
+
+ /* Send as FIP or FCoE as appropriate */
+ if ( ( fchdr->r_ctl == ( FC_R_CTL_ELS | FC_R_CTL_UNSOL_CTRL ) ) &&
+ ( els->command == FC_ELS_FLOGI ) &&
+ ( fcoe->flags & FCOE_HAVE_FIP_FCF ) ) {
+
+ /* Create FIP FLOGI descriptor */
+ fipflogi = iob_push ( iobuf,
+ offsetof ( typeof ( *fipflogi ), fc ) );
+ memset ( fipflogi, 0, offsetof ( typeof ( *fipflogi ), fc ) );
+ fipflogi->type = FIP_FLOGI;
+ fipflogi->len = ( iob_len ( iobuf ) / 4 );
+
+ /* Create FIP MAC address descriptor */
+ fipmac = iob_put ( iobuf, sizeof ( *fipmac ) );
+ memset ( fipmac, 0, sizeof ( *fipmac ) );
+ fipmac->type = FIP_MAC_ADDRESS;
+ fipmac->len = ( sizeof ( *fipmac ) / 4 );
+ if ( fcoe->flags & FCOE_FCF_ALLOWS_SPMA ) {
+ memcpy ( fipmac->mac, fcoe->netdev->ll_addr,
+ sizeof ( fipmac->mac ) );
+ }
+
+ /* Create FIP header */
+ fiphdr = iob_push ( iobuf, sizeof ( *fiphdr ) );
+ memset ( fiphdr, 0, sizeof ( *fiphdr ) );
+ fiphdr->version = FIP_VERSION;
+ fiphdr->code = htons ( FIP_CODE_ELS );
+ fiphdr->subcode = FIP_ELS_REQUEST;
+ fiphdr->len =
+ htons ( ( iob_len ( iobuf ) - sizeof ( *fiphdr ) ) / 4);
+ fiphdr->flags = ( ( fcoe->flags & FCOE_FCF_ALLOWS_SPMA ) ?
+ htons ( FIP_SP ) : htons ( FIP_FP ) );
+
+ /* Send as FIP packet from netdev's own MAC address */
+ net_protocol = &fip_protocol;
+ ll_source = fcoe->netdev->ll_addr;
+
+ } else {
+
+ /* Calculate CRC */
+ crc = crc32_le ( ~((uint32_t)0), iobuf->data,
+ iob_len ( iobuf ) );
+
+ /* Create FCoE header */
+ fcoehdr = iob_push ( iobuf, sizeof ( *fcoehdr ) );
+ memset ( fcoehdr, 0, sizeof ( *fcoehdr ) );
+ fcoehdr->sof = ( ( fchdr->seq_cnt == ntohs ( 0 ) ) ?
+ FCOE_SOF_I3 : FCOE_SOF_N3 );
+
+ /* Create FCoE footer */
+ fcoeftr = iob_put ( iobuf, sizeof ( *fcoeftr ) );
+ memset ( fcoeftr, 0, sizeof ( *fcoeftr ) );
+ fcoeftr->crc = cpu_to_le32 ( crc ^ ~((uint32_t)0) );
+ fcoeftr->eof = ( ( fchdr->f_ctl_es & FC_F_CTL_ES_END ) ?
+ FCOE_EOF_T : FCOE_EOF_N );
+
+ /* Send as FCoE packet from FCoE MAC address */
+ net_protocol = &fcoe_protocol;
+ ll_source = fcoe->local_mac;
+ }
+
+ /* Transmit packet */
+ if ( ( rc = net_tx ( iob_disown ( iobuf ), fcoe->netdev, net_protocol,
+ fcoe->fcf_mac, ll_source ) ) != 0 ) {
+ DBGC ( fcoe, "FCoE %s could not transmit: %s\n",
+ fcoe->netdev->name, strerror ( rc ) );
+ goto done;
+ }
+
+ done:
+ free_iob ( iobuf );
+ return rc;
+}
+
+/**
+ * Allocate FCoE I/O buffer
+ *
+ * @v len Payload length
+ * @ret iobuf I/O buffer, or NULL
+ */
+static struct io_buffer * fcoe_alloc_iob ( struct fcoe_port *fcoe __unused,
+ size_t len ) {
+ struct io_buffer *iobuf;
+
+ iobuf = alloc_iob ( MAX_LL_HEADER_LEN + sizeof ( struct fcoe_header ) +
+ len + sizeof ( struct fcoe_footer ) );
+ if ( iobuf ) {
+ iob_reserve ( iobuf, ( MAX_LL_HEADER_LEN +
+ sizeof ( struct fcoe_header ) ) );
+ }
+ return iobuf;
+}
+
+/**
+ * Process incoming FCoE packets
+ *
+ * @v iobuf I/O buffer
+ * @v netdev Network device
+ * @v ll_dest Link-layer destination address
+ * @v ll_source Link-layer source address
+ * @v flags Packet flags
+ * @ret rc Return status code
+ */
+static int fcoe_rx ( struct io_buffer *iobuf, struct net_device *netdev,
+ const void *ll_dest, const void *ll_source,
+ unsigned int flags __unused ) {
+ struct fcoe_header *fcoehdr;
+ struct fcoe_footer *fcoeftr;
+ struct fcoe_port *fcoe;
+ int rc;
+
+ /* Identify FCoE port */
+ if ( ( fcoe = fcoe_demux ( netdev ) ) == NULL ) {
+ DBG ( "FCoE received frame for net device %s missing FCoE "
+ "port\n", netdev->name );
+ rc = -ENOTCONN;
+ goto done;
+ }
+
+ /* Discard packets not destined for us */
+ if ( ( memcmp ( fcoe->local_mac, ll_dest,
+ sizeof ( fcoe->local_mac ) ) != 0 ) &&
+ ( memcmp ( default_fcf_mac, ll_dest,
+ sizeof ( default_fcf_mac ) ) != 0 ) ) {
+ DBGC2 ( fcoe, "FCoE %s ignoring packet for %s\n",
+ fcoe->netdev->name, eth_ntoa ( ll_dest ) );
+ rc = -ENOTCONN;
+ goto done;
+ }
+
+ /* Sanity check */
+ if ( iob_len ( iobuf ) < ( sizeof ( *fcoehdr ) + sizeof ( *fcoeftr ) )){
+ DBGC ( fcoe, "FCoE %s received under-length frame (%zd "
+ "bytes)\n", fcoe->netdev->name, iob_len ( iobuf ) );
+ rc = -EINVAL_UNDERLENGTH;
+ goto done;
+ }
+
+ /* Strip header and footer */
+ fcoehdr = iobuf->data;
+ iob_pull ( iobuf, sizeof ( *fcoehdr ) );
+ fcoeftr = ( iobuf->data + iob_len ( iobuf ) - sizeof ( *fcoeftr ) );
+ iob_unput ( iobuf, sizeof ( *fcoeftr ) );
+
+ /* Validity checks */
+ if ( fcoehdr->version != FCOE_FRAME_VER ) {
+ DBGC ( fcoe, "FCoE %s received unsupported frame version "
+ "%02x\n", fcoe->netdev->name, fcoehdr->version );
+ rc = -EPROTONOSUPPORT;
+ goto done;
+ }
+ if ( ! ( ( fcoehdr->sof == FCOE_SOF_I3 ) ||
+ ( fcoehdr->sof == FCOE_SOF_N3 ) ) ) {
+ DBGC ( fcoe, "FCoE %s received unsupported start-of-frame "
+ "delimiter %02x\n", fcoe->netdev->name, fcoehdr->sof );
+ rc = -EINVAL_SOF;
+ goto done;
+ }
+ if ( ( le32_to_cpu ( fcoeftr->crc ) ^ ~((uint32_t)0) ) !=
+ crc32_le ( ~((uint32_t)0), iobuf->data, iob_len ( iobuf ) ) ) {
+ DBGC ( fcoe, "FCoE %s received invalid CRC\n",
+ fcoe->netdev->name );
+ rc = -EINVAL_CRC;
+ goto done;
+ }
+ if ( ! ( ( fcoeftr->eof == FCOE_EOF_N ) ||
+ ( fcoeftr->eof == FCOE_EOF_T ) ) ) {
+ DBGC ( fcoe, "FCoE %s received unsupported end-of-frame "
+ "delimiter %02x\n", fcoe->netdev->name, fcoeftr->eof );
+ rc = -EINVAL_EOF;
+ goto done;
+ }
+
+ /* Record FCF address if applicable */
+ if ( ( fcoe->flags & FCOE_HAVE_FCF ) &&
+ ( ! ( fcoe->flags & FCOE_HAVE_FIP_FCF ) ) ) {
+ memcpy ( &fcoe->fcf_mac, ll_source, sizeof ( fcoe->fcf_mac ) );
+ }
+
+ /* Hand off via transport interface */
+ if ( ( rc = xfer_deliver_iob ( &fcoe->transport,
+ iob_disown ( iobuf ) ) ) != 0 ) {
+ DBGC ( fcoe, "FCoE %s could not deliver frame: %s\n",
+ fcoe->netdev->name, strerror ( rc ) );
+ goto done;
+ }
+
+ done:
+ free_iob ( iobuf );
+ return rc;
+}
+
+/**
+ * Check FCoE flow control window
+ *
+ * @v fcoe FCoE port
+ * @ret len Length of window
+ */
+static size_t fcoe_window ( struct fcoe_port *fcoe ) {
+ return ( ( fcoe->flags & FCOE_HAVE_FCF ) ? ~( ( size_t ) 0 ) : 0 );
+}
+
+/**
+ * Close FCoE port
+ *
+ * @v fcoe FCoE port
+ * @v rc Reason for close
+ */
+static void fcoe_close ( struct fcoe_port *fcoe, int rc ) {
+
+ stop_timer ( &fcoe->timer );
+ intf_shutdown ( &fcoe->transport, rc );
+ netdev_put ( fcoe->netdev );
+ list_del ( &fcoe->list );
+ ref_put ( &fcoe->refcnt );
+}
+
+/**
+ * Identify device underlying FCoE port
+ *
+ * @v fcoe FCoE port
+ * @ret device Underlying device
+ */
+static struct device * fcoe_identify_device ( struct fcoe_port *fcoe ) {
+ return fcoe->netdev->dev;
+}
+
+/** FCoE transport interface operations */
+static struct interface_operation fcoe_transport_op[] = {
+ INTF_OP ( xfer_deliver, struct fcoe_port *, fcoe_deliver ),
+ INTF_OP ( xfer_alloc_iob, struct fcoe_port *, fcoe_alloc_iob ),
+ INTF_OP ( xfer_window, struct fcoe_port *, fcoe_window ),
+ INTF_OP ( intf_close, struct fcoe_port *, fcoe_close ),
+ INTF_OP ( identify_device, struct fcoe_port *,
+ fcoe_identify_device ),
+};
+
+/** FCoE transport interface descriptor */
+static struct interface_descriptor fcoe_transport_desc =
+ INTF_DESC ( struct fcoe_port, transport, fcoe_transport_op );
+
+/******************************************************************************
+ *
+ * FIP protocol
+ *
+ ******************************************************************************
+ */
+
+/**
+ * Parse FIP packet into descriptor set
+ *
+ * @v fcoe FCoE port
+ * @v fiphdr FIP header
+ * @v len Length of FIP packet
+ * @v descs Descriptor set to fill in
+ * @ret rc Return status code
+ */
+static int fcoe_fip_parse ( struct fcoe_port *fcoe, struct fip_header *fiphdr,
+ size_t len, struct fip_descriptors *descs ) {
+ union fip_descriptor *desc;
+ size_t descs_len;
+ size_t desc_len;
+ size_t desc_offset;
+ unsigned int desc_type;
+
+ /* Check FIP version */
+ if ( fiphdr->version != FIP_VERSION ) {
+ DBGC ( fcoe, "FCoE %s received unsupported FIP version %02x\n",
+ fcoe->netdev->name, fiphdr->version );
+ return -EINVAL;
+ }
+
+ /* Check length */
+ descs_len = ( ntohs ( fiphdr->len ) * 4 );
+ if ( ( sizeof ( *fiphdr ) + descs_len ) > len ) {
+ DBGC ( fcoe, "FCoE %s received bad descriptor list length\n",
+ fcoe->netdev->name );
+ return -EINVAL;
+ }
+
+ /* Parse descriptor list */
+ memset ( descs, 0, sizeof ( *descs ) );
+ for ( desc_offset = 0 ;
+ desc_offset <= ( descs_len - sizeof ( desc->common ) ) ;
+ desc_offset += desc_len ) {
+
+ /* Find descriptor and validate length */
+ desc = ( ( ( void * ) ( fiphdr + 1 ) ) + desc_offset );
+ desc_type = desc->common.type;
+ desc_len = ( desc->common.len * 4 );
+ if ( desc_len == 0 ) {
+ DBGC ( fcoe, "FCoE %s received zero-length "
+ "descriptor\n", fcoe->netdev->name );
+ return -EINVAL;
+ }
+ if ( ( desc_offset + desc_len ) > descs_len ) {
+ DBGC ( fcoe, "FCoE %s descriptor overrun\n",
+ fcoe->netdev->name );
+ return -EINVAL;
+ }
+
+ /* Handle descriptors that we understand */
+ if ( ( desc_type > FIP_RESERVED ) &&
+ ( desc_type < FIP_NUM_DESCRIPTOR_TYPES ) ) {
+ /* Use only the first instance of a descriptor */
+ if ( descs->desc[desc_type] == NULL )
+ descs->desc[desc_type] = desc;
+ continue;
+ }
+
+ /* Abort if we cannot understand a critical descriptor */
+ if ( FIP_IS_CRITICAL ( desc_type ) ) {
+ DBGC ( fcoe, "FCoE %s cannot understand critical "
+ "descriptor type %02x\n",
+ fcoe->netdev->name, desc_type );
+ return -ENOTSUP;
+ }
+
+ /* Ignore non-critical descriptors that we cannot understand */
+ }
+
+ return 0;
+}
+
+/**
+ * Send FIP VLAN request
+ *
+ * @v fcoe FCoE port
+ * @ret rc Return status code
+ */
+static int fcoe_fip_tx_vlan ( struct fcoe_port *fcoe ) {
+ struct io_buffer *iobuf;
+ struct {
+ struct fip_header hdr;
+ struct fip_mac_address mac_address;
+ } __attribute__ (( packed )) *request;
+ int rc;
+
+ /* Allocate I/O buffer */
+ iobuf = alloc_iob ( MAX_LL_HEADER_LEN + sizeof ( *request ) );
+ if ( ! iobuf )
+ return -ENOMEM;
+ iob_reserve ( iobuf, MAX_LL_HEADER_LEN );
+
+ /* Construct VLAN request */
+ request = iob_put ( iobuf, sizeof ( *request ) );
+ memset ( request, 0, sizeof ( *request ) );
+ request->hdr.version = FIP_VERSION;
+ request->hdr.code = htons ( FIP_CODE_VLAN );
+ request->hdr.subcode = FIP_VLAN_REQUEST;
+ request->hdr.len = htons ( ( sizeof ( *request ) -
+ sizeof ( request->hdr ) ) / 4 );
+ request->mac_address.type = FIP_MAC_ADDRESS;
+ request->mac_address.len =
+ ( sizeof ( request->mac_address ) / 4 );
+ memcpy ( request->mac_address.mac, fcoe->netdev->ll_addr,
+ sizeof ( request->mac_address.mac ) );
+
+ /* Send VLAN request */
+ if ( ( rc = net_tx ( iob_disown ( iobuf ), fcoe->netdev,
+ &fip_protocol, all_fcf_macs,
+ fcoe->netdev->ll_addr ) ) != 0 ) {
+ DBGC ( fcoe, "FCoE %s could not send VLAN request: "
+ "%s\n", fcoe->netdev->name, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Handle received FIP VLAN notification
+ *
+ * @v fcoe FCoE port
+ * @v descs Descriptor list
+ * @v flags Flags
+ * @ret rc Return status code
+ */
+static int fcoe_fip_rx_vlan ( struct fcoe_port *fcoe,
+ struct fip_descriptors *descs,
+ unsigned int flags __unused ) {
+ struct fip_mac_address *mac_address = fip_mac_address ( descs );
+ struct fip_vlan *vlan = fip_vlan ( descs );
+ unsigned int tag;
+ int rc;
+
+ /* Sanity checks */
+ if ( ! mac_address ) {
+ DBGC ( fcoe, "FCoE %s received VLAN notification missing MAC "
+ "address\n", fcoe->netdev->name );
+ return -EINVAL;
+ }
+ if ( ! vlan ) {
+ DBGC ( fcoe, "FCoE %s received VLAN notification missing VLAN "
+ "tag\n", fcoe->netdev->name );
+ return -EINVAL;
+ }
+
+ /* Create VLAN */
+ tag = ntohs ( vlan->vlan );
+ DBGC ( fcoe, "FCoE %s creating VLAN %d for FCF %s\n",
+ fcoe->netdev->name, tag, eth_ntoa ( mac_address->mac ) );
+ if ( ( rc = vlan_create ( fcoe->netdev, tag,
+ FCOE_VLAN_PRIORITY ) ) != 0 ) {
+ DBGC ( fcoe, "FCoE %s could not create VLAN %d: %s\n",
+ fcoe->netdev->name, tag, strerror ( rc ) );
+ return rc;
+ }
+
+ /* Record that a VLAN was found. This FCoE port will play no
+ * further active role; the real FCoE traffic will use the
+ * port automatically created for the new VLAN device.
+ */
+ fcoe->flags |= FCOE_VLAN_FOUND;
+
+ return 0;
+}
+
+/**
+ * Send FIP discovery solicitation
+ *
+ * @v fcoe FCoE port
+ * @ret rc Return status code
+ */
+static int fcoe_fip_tx_solicitation ( struct fcoe_port *fcoe ) {
+ struct io_buffer *iobuf;
+ struct {
+ struct fip_header hdr;
+ struct fip_mac_address mac_address;
+ struct fip_name_id name_id;
+ struct fip_max_fcoe_size max_fcoe_size;
+ } __attribute__ (( packed )) *solicitation;
+ int rc;
+
+ /* Allocate I/O buffer */
+ iobuf = alloc_iob ( MAX_LL_HEADER_LEN + sizeof ( *solicitation ) );
+ if ( ! iobuf )
+ return -ENOMEM;
+ iob_reserve ( iobuf, MAX_LL_HEADER_LEN );
+
+ /* Construct discovery solicitation */
+ solicitation = iob_put ( iobuf, sizeof ( *solicitation ) );
+ memset ( solicitation, 0, sizeof ( *solicitation ) );
+ solicitation->hdr.version = FIP_VERSION;
+ solicitation->hdr.code = htons ( FIP_CODE_DISCOVERY );
+ solicitation->hdr.subcode = FIP_DISCOVERY_SOLICIT;
+ solicitation->hdr.len = htons ( ( sizeof ( *solicitation ) -
+ sizeof ( solicitation->hdr ) ) / 4 );
+ solicitation->hdr.flags = htons ( FIP_FP | FIP_SP );
+ solicitation->mac_address.type = FIP_MAC_ADDRESS;
+ solicitation->mac_address.len =
+ ( sizeof ( solicitation->mac_address ) / 4 );
+ memcpy ( solicitation->mac_address.mac, fcoe->netdev->ll_addr,
+ sizeof ( solicitation->mac_address.mac ) );
+ solicitation->name_id.type = FIP_NAME_ID;
+ solicitation->name_id.len = ( sizeof ( solicitation->name_id ) / 4 );
+ memcpy ( &solicitation->name_id.name, &fcoe->node_wwn.fc,
+ sizeof ( solicitation->name_id.name ) );
+ solicitation->max_fcoe_size.type = FIP_MAX_FCOE_SIZE;
+ solicitation->max_fcoe_size.len =
+ ( sizeof ( solicitation->max_fcoe_size ) / 4 );
+ solicitation->max_fcoe_size.mtu =
+ htons ( ETH_MAX_MTU - sizeof ( struct fcoe_header ) -
+ sizeof ( struct fcoe_footer ) );
+
+ /* Send discovery solicitation */
+ if ( ( rc = net_tx ( iob_disown ( iobuf ), fcoe->netdev,
+ &fip_protocol, all_fcf_macs,
+ fcoe->netdev->ll_addr ) ) != 0 ) {
+ DBGC ( fcoe, "FCoE %s could not send discovery solicitation: "
+ "%s\n", fcoe->netdev->name, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Handle received FIP discovery advertisement
+ *
+ * @v fcoe FCoE port
+ * @v descs Descriptor list
+ * @v flags Flags
+ * @ret rc Return status code
+ */
+static int fcoe_fip_rx_advertisement ( struct fcoe_port *fcoe,
+ struct fip_descriptors *descs,
+ unsigned int flags ) {
+ struct fip_priority *priority = fip_priority ( descs );
+ struct fip_mac_address *mac_address = fip_mac_address ( descs );
+ struct fip_fka_adv_p *fka_adv_p = fip_fka_adv_p ( descs );
+
+ /* Sanity checks */
+ if ( ! priority ) {
+ DBGC ( fcoe, "FCoE %s received advertisement missing "
+ "priority\n", fcoe->netdev->name );
+ return -EINVAL;
+ }
+ if ( ! mac_address ) {
+ DBGC ( fcoe, "FCoE %s received advertisement missing MAC "
+ "address\n", fcoe->netdev->name );
+ return -EINVAL;
+ }
+ if ( ! fka_adv_p ) {
+ DBGC ( fcoe, "FCoE %s received advertisement missing FKA ADV "
+ "period\n", fcoe->netdev->name );
+ return -EINVAL;
+ }
+
+ if ( ! ( fcoe->flags & FCOE_HAVE_FCF ) ) {
+
+ /* We are soliciting for an FCF. Store the highest
+ * (i.e. lowest-valued) priority solicited
+ * advertisement that we receive.
+ */
+ if ( ( ( flags & ( FIP_A | FIP_S | FIP_F ) ) ==
+ ( FIP_A | FIP_S | FIP_F ) ) &&
+ ( priority->priority < fcoe->priority ) ) {
+
+ fcoe->flags |= FCOE_HAVE_FIP_FCF;
+ fcoe->priority = priority->priority;
+ if ( fka_adv_p->flags & FIP_NO_KEEPALIVE ) {
+ fcoe->keepalive = 0;
+ } else {
+ fcoe->keepalive = ntohl ( fka_adv_p->period );
+ }
+ fcoe->flags &= ~FCOE_FCF_ALLOWS_SPMA;
+ if ( flags & FIP_SP )
+ fcoe->flags |= FCOE_FCF_ALLOWS_SPMA;
+ memcpy ( fcoe->fcf_mac, mac_address->mac,
+ sizeof ( fcoe->fcf_mac ) );
+ DBGC ( fcoe, "FCoE %s selected FCF %s (pri %d",
+ fcoe->netdev->name, eth_ntoa ( fcoe->fcf_mac ),
+ fcoe->priority );
+ if ( fcoe->keepalive ) {
+ DBGC ( fcoe, ", FKA ADV %dms",
+ fcoe->keepalive );
+ }
+ DBGC ( fcoe, ", %cPMA)\n",
+ ( ( fcoe->flags & FCOE_FCF_ALLOWS_SPMA ) ?
+ 'S' : 'F' ) );
+ }
+
+ } else if ( fcoe->flags & FCOE_HAVE_FIP_FCF ) {
+
+ /* We are checking that the FCF remains alive. Reset
+ * the timeout counter if this is an advertisement
+ * from our forwarder.
+ */
+ if ( memcmp ( fcoe->fcf_mac, mac_address->mac,
+ sizeof ( fcoe->fcf_mac ) ) == 0 ) {
+ fcoe->timeouts = 0;
+ }
+
+ } else {
+
+ /* We are operating in non-FIP mode and have received
+ * a FIP advertisement. Reset the link in order to
+ * attempt FIP.
+ */
+ fcoe_reset ( fcoe );
+
+ }
+
+ return 0;
+}
+
+/**
+ * Handle received FIP ELS response
+ *
+ * @v fcoe FCoE port
+ * @v descs Descriptor list
+ * @v flags Flags
+ * @ret rc Return status code
+ */
+static int fcoe_fip_rx_els_response ( struct fcoe_port *fcoe,
+ struct fip_descriptors *descs,
+ unsigned int flags __unused ) {
+ struct fip_els *flogi = fip_flogi ( descs );
+ struct fip_mac_address *mac_address = fip_mac_address ( descs );
+ void *frame;
+ size_t frame_len;
+ int rc;
+
+ /* Sanity checks */
+ if ( ! flogi ) {
+ DBGC ( fcoe, "FCoE %s received ELS response missing FLOGI\n",
+ fcoe->netdev->name );
+ return -EINVAL;
+ }
+ if ( ! mac_address ) {
+ DBGC ( fcoe, "FCoE %s received ELS response missing MAC "
+ "address\n", fcoe->netdev->name );
+ return -EINVAL;
+ }
+
+ /* Record local MAC address */
+ memcpy ( fcoe->local_mac, mac_address->mac, sizeof ( fcoe->local_mac ));
+ DBGC ( fcoe, "FCoE %s using local MAC %s\n",
+ fcoe->netdev->name, eth_ntoa ( fcoe->local_mac ) );
+
+ /* Hand off via transport interface */
+ frame = &flogi->fc;
+ frame_len = ( ( flogi->len * 4 ) - offsetof ( typeof ( *flogi ), fc ) );
+ if ( ( rc = xfer_deliver_raw ( &fcoe->transport, frame,
+ frame_len ) ) != 0 ) {
+ DBGC ( fcoe, "FCoE %s could not deliver FIP FLOGI frame: %s\n",
+ fcoe->netdev->name, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Send FIP keepalive
+ *
+ * @v fcoe FCoE port
+ * @ret rc Return status code
+ */
+static int fcoe_fip_tx_keepalive ( struct fcoe_port *fcoe ) {
+ struct io_buffer *iobuf;
+ struct {
+ struct fip_header hdr;
+ struct fip_mac_address mac_address;
+ } __attribute__ (( packed )) *keepalive;
+ int rc;
+
+ /* Allocate I/O buffer */
+ iobuf = alloc_iob ( MAX_LL_HEADER_LEN + sizeof ( *keepalive ) );
+ if ( ! iobuf )
+ return -ENOMEM;
+ iob_reserve ( iobuf, MAX_LL_HEADER_LEN );
+
+ /* Construct keepalive */
+ keepalive = iob_put ( iobuf, sizeof ( *keepalive ) );
+ memset ( keepalive, 0, sizeof ( *keepalive ) );
+ keepalive->hdr.version = FIP_VERSION;
+ keepalive->hdr.code = htons ( FIP_CODE_MAINTAIN );
+ keepalive->hdr.subcode = FIP_MAINTAIN_KEEP_ALIVE;
+ keepalive->hdr.len = htons ( ( sizeof ( *keepalive ) -
+ sizeof ( keepalive->hdr ) ) / 4 );
+ keepalive->mac_address.type = FIP_MAC_ADDRESS;
+ keepalive->mac_address.len =
+ ( sizeof ( keepalive->mac_address ) / 4 );
+ memcpy ( keepalive->mac_address.mac, fcoe->netdev->ll_addr,
+ sizeof ( keepalive->mac_address.mac ) );
+
+ /* Send keepalive */
+ if ( ( rc = net_tx ( iob_disown ( iobuf ), fcoe->netdev,
+ &fip_protocol, fcoe->fcf_mac,
+ fcoe->netdev->ll_addr ) ) != 0 ) {
+ DBGC ( fcoe, "FCoE %s could not send keepalive: %s\n",
+ fcoe->netdev->name, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/** A FIP handler */
+struct fip_handler {
+ /** Protocol code */
+ uint16_t code;
+ /** Protocol subcode */
+ uint8_t subcode;
+ /**
+ * Receive FIP packet
+ *
+ * @v fcoe FCoE port
+ * @v descs Descriptor list
+ * @v flags Flags
+ * @ret rc Return status code
+ */
+ int ( * rx ) ( struct fcoe_port *fcoe, struct fip_descriptors *descs,
+ unsigned int flags );
+};
+
+/** FIP handlers */
+static struct fip_handler fip_handlers[] = {
+ { FIP_CODE_VLAN, FIP_VLAN_NOTIFY,
+ fcoe_fip_rx_vlan },
+ { FIP_CODE_DISCOVERY, FIP_DISCOVERY_ADVERTISE,
+ fcoe_fip_rx_advertisement },
+ { FIP_CODE_ELS, FIP_ELS_RESPONSE,
+ fcoe_fip_rx_els_response },
+};
+
+/**
+ * Process incoming FIP packets
+ *
+ * @v iobuf I/O buffer
+ * @v netdev Network device
+ * @v ll_dest Link-layer destination address
+ * @v ll_source Link-layer source address
+ * @v flags Packet flags
+ * @ret rc Return status code
+ */
+static int fcoe_fip_rx ( struct io_buffer *iobuf,
+ struct net_device *netdev,
+ const void *ll_dest,
+ const void *ll_source __unused,
+ unsigned int flags __unused ) {
+ struct fip_header *fiphdr = iobuf->data;
+ struct fip_descriptors descs;
+ struct fip_handler *handler;
+ struct fcoe_port *fcoe;
+ unsigned int i;
+ int rc;
+
+ /* Identify FCoE port */
+ if ( ( fcoe = fcoe_demux ( netdev ) ) == NULL ) {
+ DBG ( "FCoE received FIP frame for net device %s missing FCoE "
+ "port\n", netdev->name );
+ rc = -ENOTCONN;
+ goto done;
+ }
+
+ /* Discard packets not destined for us */
+ if ( ( memcmp ( fcoe->netdev->ll_addr, ll_dest, ETH_ALEN ) != 0 ) &&
+ ( memcmp ( all_fcoe_macs, ll_dest,
+ sizeof ( all_fcoe_macs ) ) != 0 ) &&
+ ( memcmp ( all_enode_macs, ll_dest,
+ sizeof ( all_enode_macs ) ) != 0 ) ) {
+ DBGC2 ( fcoe, "FCoE %s ignoring FIP packet for %s\n",
+ fcoe->netdev->name, eth_ntoa ( ll_dest ) );
+ rc = -ENOTCONN;
+ goto done;
+ }
+
+ /* Parse FIP packet */
+ if ( ( rc = fcoe_fip_parse ( fcoe, fiphdr, iob_len ( iobuf ),
+ &descs ) ) != 0 )
+ goto done;
+
+ /* Find a suitable handler */
+ for ( i = 0 ; i < ( sizeof ( fip_handlers ) /
+ sizeof ( fip_handlers[0] ) ) ; i++ ) {
+ handler = &fip_handlers[i];
+ if ( ( handler->code == ntohs ( fiphdr->code ) ) &&
+ ( handler->subcode == fiphdr->subcode ) ) {
+ rc = handler->rx ( fcoe, &descs,
+ ntohs ( fiphdr->flags ) );
+ goto done;
+ }
+ }
+ DBGC ( fcoe, "FCoE %s received unsupported FIP code %04x.%02x\n",
+ fcoe->netdev->name, ntohs ( fiphdr->code ), fiphdr->subcode );
+ rc = -ENOTSUP;
+
+ done:
+ free_iob ( iobuf );
+ return rc;
+}
+
+/******************************************************************************
+ *
+ * FCoE ports
+ *
+ ******************************************************************************
+ */
+
+/**
+ * Handle FCoE timer expiry
+ *
+ * @v timer FIP timer
+ * @v over Timer expired
+ */
+static void fcoe_expired ( struct retry_timer *timer, int over __unused ) {
+ struct fcoe_port *fcoe =
+ container_of ( timer, struct fcoe_port, timer );
+ int rc;
+
+ /* Sanity check */
+ assert ( fcoe->flags & FCOE_HAVE_NETWORK );
+
+ /* Increment the timeout counter */
+ fcoe->timeouts++;
+
+ if ( vlan_can_be_trunk ( fcoe->netdev ) &&
+ ! ( fcoe->flags & FCOE_VLAN_TIMED_OUT ) ) {
+
+ /* If we have already found a VLAN, send infrequent
+ * VLAN requests, in case VLAN information changes.
+ */
+ if ( fcoe->flags & FCOE_VLAN_FOUND ) {
+ fcoe->flags &= ~FCOE_VLAN_FOUND;
+ fcoe->timeouts = 0;
+ start_timer_fixed ( &fcoe->timer,
+ FCOE_VLAN_POLL_DELAY );
+ fcoe_fip_tx_vlan ( fcoe );
+ return;
+ }
+
+ /* If we have not yet found a VLAN, and we have not
+ * yet timed out and given up on finding one, then
+ * send a VLAN request and wait.
+ */
+ if ( fcoe->timeouts <= FCOE_MAX_VLAN_REQUESTS ) {
+ start_timer_fixed ( &fcoe->timer,
+ FCOE_VLAN_RETRY_DELAY );
+ fcoe_fip_tx_vlan ( fcoe );
+ return;
+ }
+
+ /* We have timed out waiting for a VLAN; proceed to
+ * FIP discovery.
+ */
+ fcoe->flags |= FCOE_VLAN_TIMED_OUT;
+ fcoe->timeouts = 0;
+ DBGC ( fcoe, "FCoE %s giving up on VLAN discovery\n",
+ fcoe->netdev->name );
+ start_timer_nodelay ( &fcoe->timer );
+
+ } else if ( ! ( fcoe->flags & FCOE_HAVE_FCF ) ) {
+
+ /* If we have not yet found a FIP-capable forwarder,
+ * and we have not yet timed out and given up on
+ * finding one, then send a FIP solicitation and wait.
+ */
+ start_timer_fixed ( &fcoe->timer, FCOE_FIP_RETRY_DELAY );
+ if ( ( ! ( fcoe->flags & FCOE_HAVE_FIP_FCF ) ) &&
+ ( fcoe->timeouts <= FCOE_MAX_FIP_SOLICITATIONS ) ) {
+ fcoe_fip_tx_solicitation ( fcoe );
+ return;
+ }
+
+ /* Attach Fibre Channel port */
+ if ( ( rc = fc_port_open ( &fcoe->transport, &fcoe->node_wwn.fc,
+ &fcoe->port_wwn.fc,
+ fcoe->netdev->name ) ) != 0 ) {
+ DBGC ( fcoe, "FCoE %s could not create FC port: %s\n",
+ fcoe->netdev->name, strerror ( rc ) );
+ /* We will try again on the next timer expiry */
+ return;
+ }
+ stop_timer ( &fcoe->timer );
+
+ /* Either we have found a FIP-capable forwarder, or we
+ * have timed out and will fall back to pre-FIP mode.
+ */
+ fcoe->flags |= FCOE_HAVE_FCF;
+ fcoe->timeouts = 0;
+ DBGC ( fcoe, "FCoE %s using %sFIP FCF %s\n", fcoe->netdev->name,
+ ( ( fcoe->flags & FCOE_HAVE_FIP_FCF ) ? "" : "non-" ),
+ eth_ntoa ( fcoe->fcf_mac ) );
+
+ /* Start sending keepalives if applicable */
+ if ( fcoe->keepalive )
+ start_timer_nodelay ( &fcoe->timer );
+
+ /* Send notification of window change */
+ xfer_window_changed ( &fcoe->transport );
+
+ } else {
+
+ /* Send keepalive */
+ start_timer_fixed ( &fcoe->timer,
+ ( ( fcoe->keepalive * TICKS_PER_SEC ) / 1000 ) );
+ fcoe_fip_tx_keepalive ( fcoe );
+
+ /* Abandon FCF if we have not seen its advertisements */
+ if ( fcoe->timeouts > FCOE_MAX_FIP_MISSING_KEEPALIVES ) {
+ DBGC ( fcoe, "FCoE %s abandoning FCF %s\n",
+ fcoe->netdev->name, eth_ntoa ( fcoe->fcf_mac ));
+ fcoe_reset ( fcoe );
+ }
+ }
+}
+
+/**
+ * Create FCoE port
+ *
+ * @v netdev Network device
+ * @ret rc Return status code
+ */
+static int fcoe_probe ( struct net_device *netdev ) {
+ struct ll_protocol *ll_protocol = netdev->ll_protocol;
+ struct fcoe_port *fcoe;
+ int rc;
+
+ /* Sanity check */
+ if ( ll_protocol->ll_proto != htons ( ARPHRD_ETHER ) ) {
+ /* Not an error; simply skip this net device */
+ DBG ( "FCoE skipping non-Ethernet device %s\n", netdev->name );
+ rc = 0;
+ goto err_non_ethernet;
+ }
+
+ /* Allocate and initialise structure */
+ fcoe = zalloc ( sizeof ( *fcoe ) );
+ if ( ! fcoe ) {
+ rc = -ENOMEM;
+ goto err_zalloc;
+ }
+ ref_init ( &fcoe->refcnt, NULL );
+ intf_init ( &fcoe->transport, &fcoe_transport_desc, &fcoe->refcnt );
+ timer_init ( &fcoe->timer, fcoe_expired, &fcoe->refcnt );
+ fcoe->netdev = netdev_get ( netdev );
+
+ /* Construct node and port names */
+ fcoe->node_wwn.fcoe.authority = htons ( FCOE_AUTHORITY_IEEE );
+ memcpy ( &fcoe->node_wwn.fcoe.mac, netdev->ll_addr,
+ sizeof ( fcoe->node_wwn.fcoe.mac ) );
+ fcoe->port_wwn.fcoe.authority = htons ( FCOE_AUTHORITY_IEEE_EXTENDED );
+ memcpy ( &fcoe->port_wwn.fcoe.mac, netdev->ll_addr,
+ sizeof ( fcoe->port_wwn.fcoe.mac ) );
+
+ DBGC ( fcoe, "FCoE %s is %s", fcoe->netdev->name,
+ fc_ntoa ( &fcoe->node_wwn.fc ) );
+ DBGC ( fcoe, " port %s\n", fc_ntoa ( &fcoe->port_wwn.fc ) );
+
+ /* Transfer reference to port list */
+ list_add ( &fcoe->list, &fcoe_ports );
+ return 0;
+
+ netdev_put ( fcoe->netdev );
+ err_zalloc:
+ err_non_ethernet:
+ return rc;
+}
+
+/**
+ * Handle FCoE port device or link state change
+ *
+ * @v netdev Network device
+ */
+static void fcoe_notify ( struct net_device *netdev ) {
+ struct fcoe_port *fcoe;
+
+ /* Sanity check */
+ if ( ( fcoe = fcoe_demux ( netdev ) ) == NULL ) {
+ DBG ( "FCoE notification for net device %s missing FCoE "
+ "port\n", netdev->name );
+ return;
+ }
+
+ /* Reset the FCoE link if necessary */
+ if ( ! ( netdev_is_open ( netdev ) &&
+ netdev_link_ok ( netdev ) &&
+ ( fcoe->flags & FCOE_HAVE_NETWORK ) ) ) {
+ fcoe_reset ( fcoe );
+ }
+}
+
+/**
+ * Destroy FCoE port
+ *
+ * @v netdev Network device
+ */
+static void fcoe_remove ( struct net_device *netdev ) {
+ struct fcoe_port *fcoe;
+
+ /* Sanity check */
+ if ( ( fcoe = fcoe_demux ( netdev ) ) == NULL ) {
+ DBG ( "FCoE removal of net device %s missing FCoE port\n",
+ netdev->name );
+ return;
+ }
+
+ /* Close FCoE device */
+ fcoe_close ( fcoe, 0 );
+}
+
+/** FCoE driver */
+struct net_driver fcoe_driver __net_driver = {
+ .name = "FCoE",
+ .probe = fcoe_probe,
+ .notify = fcoe_notify,
+ .remove = fcoe_remove,
+};
+
+/** FCoE protocol */
+struct net_protocol fcoe_protocol __net_protocol = {
+ .name = "FCoE",
+ .net_proto = htons ( ETH_P_FCOE ),
+ .rx = fcoe_rx,
+};
+
+/** FIP protocol */
+struct net_protocol fip_protocol __net_protocol = {
+ .name = "FIP",
+ .net_proto = htons ( ETH_P_FIP ),
+ .rx = fcoe_fip_rx,
+};
+
+/** Human-readable message for CRC errors
+ *
+ * It seems as though several drivers neglect to strip the Ethernet
+ * CRC, which will cause the FCoE footer to be misplaced and result
+ * (coincidentally) in an "invalid CRC" error from FCoE.
+ */
+struct errortab fcoe_errors[] __errortab = {
+ __einfo_errortab ( EINFO_EINVAL_CRC ),
+};
diff --git a/qemu/roms/ipxe/src/net/fcp.c b/qemu/roms/ipxe/src/net/fcp.c
new file mode 100644
index 000000000..9c36a4c72
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/fcp.c
@@ -0,0 +1,1092 @@
+/*
+ * Copyright (C) 2010 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+#include <assert.h>
+#include <byteswap.h>
+#include <ipxe/refcnt.h>
+#include <ipxe/list.h>
+#include <ipxe/interface.h>
+#include <ipxe/xfer.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/open.h>
+#include <ipxe/process.h>
+#include <ipxe/uri.h>
+#include <ipxe/acpi.h>
+#include <ipxe/scsi.h>
+#include <ipxe/device.h>
+#include <ipxe/edd.h>
+#include <ipxe/fc.h>
+#include <ipxe/fcels.h>
+#include <ipxe/fcp.h>
+
+/** @file
+ *
+ * Fibre Channel Protocol
+ *
+ */
+
+/* Disambiguate the various error causes */
+#define ERANGE_READ_DATA_ORDERING \
+ __einfo_error ( EINFO_ERANGE_READ_DATA_ORDERING )
+#define EINFO_ERANGE_READ_DATA_ORDERING \
+ __einfo_uniqify ( EINFO_ERANGE, 0x01, "Read data out of order" )
+#define ERANGE_READ_DATA_OVERRUN \
+ __einfo_error ( EINFO_ERANGE_READ_DATA_OVERRUN )
+#define EINFO_ERANGE_READ_DATA_OVERRUN \
+ __einfo_uniqify ( EINFO_ERANGE, 0x02, "Read data overrun" )
+#define ERANGE_WRITE_DATA_STUCK \
+ __einfo_error ( EINFO_ERANGE_WRITE_DATA_STUCK )
+#define EINFO_ERANGE_WRITE_DATA_STUCK \
+ __einfo_uniqify ( EINFO_ERANGE, 0x03, "Write data stuck" )
+#define ERANGE_WRITE_DATA_OVERRUN \
+ __einfo_error ( EINFO_ERANGE_WRITE_DATA_OVERRUN )
+#define EINFO_ERANGE_WRITE_DATA_OVERRUN \
+ __einfo_uniqify ( EINFO_ERANGE, 0x04, "Write data overrun" )
+#define ERANGE_DATA_UNDERRUN \
+ __einfo_error ( EINFO_ERANGE_DATA_UNDERRUN )
+#define EINFO_ERANGE_DATA_UNDERRUN \
+ __einfo_uniqify ( EINFO_ERANGE, 0x05, "Data underrun" )
+
+/******************************************************************************
+ *
+ * PRLI
+ *
+ ******************************************************************************
+ */
+
+struct fc_els_prli_descriptor fcp_prli_descriptor __fc_els_prli_descriptor;
+
+/**
+ * Transmit FCP PRLI
+ *
+ * @v els Fibre Channel ELS transaction
+ * @ret rc Return status code
+ */
+static int fcp_prli_tx ( struct fc_els *els ) {
+ struct fcp_prli_service_parameters param;
+
+ /* Build service parameter page */
+ memset ( &param, 0, sizeof ( param ) );
+ param.flags = htonl ( FCP_PRLI_NO_READ_RDY | FCP_PRLI_INITIATOR );
+
+ return fc_els_prli_tx ( els, &fcp_prli_descriptor, &param );
+}
+
+/**
+ * Receive FCP PRLI
+ *
+ * @v els Fibre Channel ELS transaction
+ * @v frame ELS frame
+ * @v len Length of ELS frame
+ * @ret rc Return status code
+ */
+static int fcp_prli_rx ( struct fc_els *els, void *data, size_t len ) {
+ return fc_els_prli_rx ( els, &fcp_prli_descriptor, data, len );
+}
+
+/**
+ * Detect FCP PRLI
+ *
+ * @v els Fibre Channel ELS transaction
+ * @v data ELS frame
+ * @v len Length of ELS frame
+ * @ret rc Return status code
+ */
+static int fcp_prli_detect ( struct fc_els *els, const void *data,
+ size_t len ) {
+ return fc_els_prli_detect ( els, &fcp_prli_descriptor, data, len );
+}
+
+/** FCP PRLI ELS handler */
+struct fc_els_handler fcp_prli_handler __fc_els_handler = {
+ .name = "PRLI-FCP",
+ .tx = fcp_prli_tx,
+ .rx = fcp_prli_rx,
+ .detect = fcp_prli_detect,
+};
+
+/** FCP PRLI descriptor */
+struct fc_els_prli_descriptor fcp_prli_descriptor __fc_els_prli_descriptor = {
+ .type = FC_TYPE_FCP,
+ .param_len = sizeof ( struct fcp_prli_service_parameters ),
+ .handler = &fcp_prli_handler,
+};
+
+/******************************************************************************
+ *
+ * FCP devices and commands
+ *
+ ******************************************************************************
+ */
+
+/** An FCP device */
+struct fcp_device {
+ /** Reference count */
+ struct refcnt refcnt;
+ /** Fibre Channel upper-layer protocol user */
+ struct fc_ulp_user user;
+ /** SCSI command issuing interface */
+ struct interface scsi;
+ /** List of active commands */
+ struct list_head fcpcmds;
+
+ /** Fibre Channel WWN (for boot firmware table) */
+ struct fc_name wwn;
+ /** SCSI LUN (for boot firmware table) */
+ struct scsi_lun lun;
+};
+
+/** An FCP command */
+struct fcp_command {
+ /** Reference count */
+ struct refcnt refcnt;
+ /** FCP SCSI device */
+ struct fcp_device *fcpdev;
+ /** List of active commands */
+ struct list_head list;
+ /** SCSI command interface */
+ struct interface scsi;
+ /** Fibre Channel exchange interface */
+ struct interface xchg;
+ /** Send process */
+ struct process process;
+ /** Send current IU
+ *
+ * @v fcpcmd FCP command
+ * @ret rc Return status code
+ */
+ int ( * send ) ( struct fcp_command *fcpcmd );
+ /** SCSI command */
+ struct scsi_cmd command;
+ /** Data offset within command */
+ size_t offset;
+ /** Length of data remaining to be sent within this IU */
+ size_t remaining;
+ /** Exchange ID */
+ uint16_t xchg_id;
+};
+
+/**
+ * Get reference to FCP device
+ *
+ * @v fcpdev FCP device
+ * @ret fcpdev FCP device
+ */
+static inline __attribute__ (( always_inline )) struct fcp_device *
+fcpdev_get ( struct fcp_device *fcpdev ) {
+ ref_get ( &fcpdev->refcnt );
+ return fcpdev;
+}
+
+/**
+ * Drop reference to FCP device
+ *
+ * @v fcpdev FCP device
+ */
+static inline __attribute__ (( always_inline )) void
+fcpdev_put ( struct fcp_device *fcpdev ) {
+ ref_put ( &fcpdev->refcnt );
+}
+
+/**
+ * Get reference to FCP command
+ *
+ * @v fcpcmd FCP command
+ * @ret fcpcmd FCP command
+ */
+static inline __attribute__ (( always_inline )) struct fcp_command *
+fcpcmd_get ( struct fcp_command *fcpcmd ) {
+ ref_get ( &fcpcmd->refcnt );
+ return fcpcmd;
+}
+
+/**
+ * Drop reference to FCP command
+ *
+ * @v fcpcmd FCP command
+ */
+static inline __attribute__ (( always_inline )) void
+fcpcmd_put ( struct fcp_command *fcpcmd ) {
+ ref_put ( &fcpcmd->refcnt );
+}
+
+/**
+ * Start FCP command sending
+ *
+ * @v fcpcmd FCP command
+ * @v send Send method
+ */
+static inline __attribute__ (( always_inline )) void
+fcpcmd_start_send ( struct fcp_command *fcpcmd,
+ int ( * send ) ( struct fcp_command *fcpcmd ) ) {
+ fcpcmd->send = send;
+ process_add ( &fcpcmd->process );
+}
+
+/**
+ * Stop FCP command sending
+ *
+ * @v fcpcmd FCP command
+ */
+static inline __attribute__ (( always_inline )) void
+fcpcmd_stop_send ( struct fcp_command *fcpcmd ) {
+ process_del ( &fcpcmd->process );
+}
+
+/**
+ * Free FCP command
+ *
+ * @v refcnt Reference count
+ */
+static void fcpcmd_free ( struct refcnt *refcnt ) {
+ struct fcp_command *fcpcmd =
+ container_of ( refcnt, struct fcp_command, refcnt );
+
+ /* Remove from list of commands */
+ list_del ( &fcpcmd->list );
+ fcpdev_put ( fcpcmd->fcpdev );
+
+ /* Free command */
+ free ( fcpcmd );
+}
+
+/**
+ * Close FCP command
+ *
+ * @v fcpcmd FCP command
+ * @v rc Reason for close
+ */
+static void fcpcmd_close ( struct fcp_command *fcpcmd, int rc ) {
+ struct fcp_device *fcpdev = fcpcmd->fcpdev;
+
+ if ( rc != 0 ) {
+ DBGC ( fcpdev, "FCP %p xchg %04x closed: %s\n",
+ fcpdev, fcpcmd->xchg_id, strerror ( rc ) );
+ }
+
+ /* Stop sending */
+ fcpcmd_stop_send ( fcpcmd );
+
+ /* Shut down interfaces */
+ intf_shutdown ( &fcpcmd->scsi, rc );
+ intf_shutdown ( &fcpcmd->xchg, rc );
+}
+
+/**
+ * Close FCP command in error
+ *
+ * @v fcpcmd FCP command
+ * @v rc Reason for close
+ */
+static void fcpcmd_close_err ( struct fcp_command *fcpcmd, int rc ) {
+ if ( rc == 0 )
+ rc = -EPIPE;
+ fcpcmd_close ( fcpcmd, rc );
+}
+
+/**
+ * Send FCP command IU
+ *
+ * @v fcpcmd FCP command
+ * @ret rc Return status code
+ */
+static int fcpcmd_send_cmnd ( struct fcp_command *fcpcmd ) {
+ struct fcp_device *fcpdev = fcpcmd->fcpdev;
+ struct scsi_cmd *command = &fcpcmd->command;
+ struct io_buffer *iobuf;
+ struct fcp_cmnd *cmnd;
+ struct xfer_metadata meta;
+ int rc;
+
+ /* Sanity check */
+ if ( command->data_in_len && command->data_out_len ) {
+ DBGC ( fcpdev, "FCP %p xchg %04x cannot handle bidirectional "
+ "command\n", fcpdev, fcpcmd->xchg_id );
+ return -ENOTSUP;
+ }
+
+ /* Allocate I/O buffer */
+ iobuf = xfer_alloc_iob ( &fcpcmd->xchg, sizeof ( *cmnd ) );
+ if ( ! iobuf ) {
+ DBGC ( fcpdev, "FCP %p xchg %04x cannot allocate command IU\n",
+ fcpdev, fcpcmd->xchg_id );
+ return -ENOMEM;
+ }
+
+ /* Construct command IU frame */
+ cmnd = iob_put ( iobuf, sizeof ( *cmnd ) );
+ memset ( cmnd, 0, sizeof ( *cmnd ) );
+ memcpy ( &cmnd->lun, &command->lun, sizeof ( cmnd->lun ) );
+ assert ( ! ( command->data_in_len && command->data_out_len ) );
+ if ( command->data_in_len )
+ cmnd->dirn |= FCP_CMND_RDDATA;
+ if ( command->data_out_len )
+ cmnd->dirn |= FCP_CMND_WRDATA;
+ memcpy ( &cmnd->cdb, &fcpcmd->command.cdb, sizeof ( cmnd->cdb ) );
+ cmnd->len = htonl ( command->data_in_len + command->data_out_len );
+ memset ( &meta, 0, sizeof ( meta ) );
+ meta.flags = ( XFER_FL_CMD_STAT | XFER_FL_OVER );
+ DBGC2 ( fcpdev, "FCP %p xchg %04x CMND " SCSI_CDB_FORMAT " %04x\n",
+ fcpdev, fcpcmd->xchg_id, SCSI_CDB_DATA ( cmnd->cdb ),
+ ntohl ( cmnd->len ) );
+
+ /* No further data to send within this IU */
+ fcpcmd_stop_send ( fcpcmd );
+
+ /* Send command IU frame */
+ if ( ( rc = xfer_deliver ( &fcpcmd->xchg, iob_disown ( iobuf ),
+ &meta ) ) != 0 ) {
+ DBGC ( fcpdev, "FCP %p xchg %04x cannot deliver command IU: "
+ "%s\n", fcpdev, fcpcmd->xchg_id, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Handle FCP read data IU
+ *
+ * @v fcpcmd FCP command
+ * @v iobuf I/O buffer
+ * @v meta Data transfer metadata
+ * @ret rc Return status code
+ */
+static int fcpcmd_recv_rddata ( struct fcp_command *fcpcmd,
+ struct io_buffer *iobuf,
+ struct xfer_metadata *meta ) {
+ struct fcp_device *fcpdev = fcpcmd->fcpdev;
+ struct scsi_cmd *command = &fcpcmd->command;
+ size_t offset = meta->offset;
+ size_t len = iob_len ( iobuf );
+ int rc;
+
+ /* Sanity checks */
+ if ( ! ( meta->flags & XFER_FL_ABS_OFFSET ) ) {
+ DBGC ( fcpdev, "FCP %p xchg %04x read data missing offset\n",
+ fcpdev, fcpcmd->xchg_id );
+ rc = -ERANGE_READ_DATA_ORDERING;
+ goto done;
+ }
+ if ( offset != fcpcmd->offset ) {
+ DBGC ( fcpdev, "FCP %p xchg %04x read data out of order "
+ "(expected %zd, received %zd)\n",
+ fcpdev, fcpcmd->xchg_id, fcpcmd->offset, offset );
+ rc = -ERANGE_READ_DATA_ORDERING;
+ goto done;
+ }
+ if ( ( offset + len ) > command->data_in_len ) {
+ DBGC ( fcpdev, "FCP %p xchg %04x read data overrun (max %zd, "
+ "received %zd)\n", fcpdev, fcpcmd->xchg_id,
+ command->data_in_len, ( offset + len ) );
+ rc = -ERANGE_READ_DATA_OVERRUN;
+ goto done;
+ }
+ DBGC2 ( fcpdev, "FCP %p xchg %04x RDDATA [%08zx,%08zx)\n",
+ fcpdev, fcpcmd->xchg_id, offset, ( offset + len ) );
+
+ /* Copy to user buffer */
+ copy_to_user ( command->data_in, offset, iobuf->data, len );
+ fcpcmd->offset += len;
+ assert ( fcpcmd->offset <= command->data_in_len );
+
+ rc = 0;
+ done:
+ free_iob ( iobuf );
+ return rc;
+}
+
+/**
+ * Send FCP write data IU
+ *
+ * @v fcpcmd FCP command
+ * @ret rc Return status code
+ */
+static int fcpcmd_send_wrdata ( struct fcp_command *fcpcmd ) {
+ struct fcp_device *fcpdev = fcpcmd->fcpdev;
+ struct scsi_cmd *command = &fcpcmd->command;
+ struct io_buffer *iobuf;
+ struct xfer_metadata meta;
+ size_t len;
+ int rc;
+
+ /* Calculate length to be sent */
+ len = xfer_window ( &fcpcmd->xchg );
+ if ( len > fcpcmd->remaining )
+ len = fcpcmd->remaining;
+
+ /* Sanity checks */
+ if ( len == 0 ) {
+ DBGC ( fcpdev, "FCP %p xchg %04x write data stuck\n",
+ fcpdev, fcpcmd->xchg_id );
+ return -ERANGE_WRITE_DATA_STUCK;
+ }
+ if ( ( fcpcmd->offset + len ) > command->data_out_len ) {
+ DBGC ( fcpdev, "FCP %p xchg %04x write data overrun (max %zd, "
+ "requested %zd)\n", fcpdev, fcpcmd->xchg_id,
+ command->data_out_len, ( fcpcmd->offset + len ) );
+ return -ERANGE_WRITE_DATA_OVERRUN;
+ }
+
+ /* Allocate I/O buffer */
+ iobuf = xfer_alloc_iob ( &fcpcmd->xchg, len );
+ if ( ! iobuf ) {
+ DBGC ( fcpdev, "FCP %p xchg %04x cannot allocate write data "
+ "IU for %zd bytes\n", fcpdev, fcpcmd->xchg_id, len );
+ return -ENOMEM;
+ }
+
+ /* Construct data IU frame */
+ copy_from_user ( iob_put ( iobuf, len ), command->data_out,
+ fcpcmd->offset, len );
+ memset ( &meta, 0, sizeof ( meta ) );
+ meta.flags = ( XFER_FL_RESPONSE | XFER_FL_ABS_OFFSET );
+ meta.offset = fcpcmd->offset;
+ DBGC2 ( fcpdev, "FCP %p xchg %04x WRDATA [%08zx,%04zx)\n",
+ fcpdev, fcpcmd->xchg_id, fcpcmd->offset,
+ ( fcpcmd->offset + iob_len ( iobuf ) ) );
+
+ /* Calculate amount of data remaining to be sent within this IU */
+ assert ( len <= fcpcmd->remaining );
+ fcpcmd->offset += len;
+ fcpcmd->remaining -= len;
+ assert ( fcpcmd->offset <= command->data_out_len );
+ if ( fcpcmd->remaining == 0 ) {
+ fcpcmd_stop_send ( fcpcmd );
+ meta.flags |= XFER_FL_OVER;
+ }
+
+ /* Send data IU frame */
+ if ( ( rc = xfer_deliver ( &fcpcmd->xchg, iob_disown ( iobuf ),
+ &meta ) ) != 0 ) {
+ DBGC ( fcpdev, "FCP %p xchg %04x cannot deliver write data "
+ "IU: %s\n", fcpdev, fcpcmd->xchg_id, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Handle FCP transfer ready IU
+ *
+ * @v fcpcmd FCP command
+ * @v iobuf I/O buffer
+ * @v meta Data transfer metadata
+ * @ret rc Return status code
+ */
+static int fcpcmd_recv_xfer_rdy ( struct fcp_command *fcpcmd,
+ struct io_buffer *iobuf,
+ struct xfer_metadata *meta __unused ) {
+ struct fcp_device *fcpdev = fcpcmd->fcpdev;
+ struct fcp_xfer_rdy *xfer_rdy = iobuf->data;
+ int rc;
+
+ /* Sanity checks */
+ if ( iob_len ( iobuf ) != sizeof ( *xfer_rdy ) ) {
+ DBGC ( fcpdev, "FCP %p xchg %04x received invalid transfer "
+ "ready IU:\n", fcpdev, fcpcmd->xchg_id );
+ DBGC_HDA ( fcpdev, 0, iobuf->data, iob_len ( iobuf ) );
+ rc = -EPROTO;
+ goto done;
+ }
+ if ( ntohl ( xfer_rdy->offset ) != fcpcmd->offset ) {
+ /* We do not advertise out-of-order delivery */
+ DBGC ( fcpdev, "FCP %p xchg %04x cannot support out-of-order "
+ "delivery (expected %zd, requested %d)\n",
+ fcpdev, fcpcmd->xchg_id, fcpcmd->offset,
+ ntohl ( xfer_rdy->offset ) );
+ rc = -EPROTO;
+ goto done;
+ }
+ DBGC2 ( fcpdev, "FCP %p xchg %04x XFER_RDY [%08x,%08x)\n",
+ fcpdev, fcpcmd->xchg_id, ntohl ( xfer_rdy->offset ),
+ ( ntohl ( xfer_rdy->offset ) + ntohl ( xfer_rdy->len ) ) );
+
+ /* Start sending requested data */
+ fcpcmd->remaining = ntohl ( xfer_rdy->len );
+ fcpcmd_start_send ( fcpcmd, fcpcmd_send_wrdata );
+
+ rc = 0;
+ done:
+ free_iob ( iobuf );
+ return rc;
+}
+
+/**
+ * Handle FCP response IU
+ *
+ * @v fcpcmd FCP command
+ * @v iobuf I/O buffer
+ * @v meta Data transfer metadata
+ * @ret rc Return status code
+ */
+static int fcpcmd_recv_rsp ( struct fcp_command *fcpcmd,
+ struct io_buffer *iobuf,
+ struct xfer_metadata *meta __unused ) {
+ struct fcp_device *fcpdev = fcpcmd->fcpdev;
+ struct scsi_cmd *command = &fcpcmd->command;
+ struct fcp_rsp *rsp = iobuf->data;
+ struct scsi_rsp response;
+ int rc;
+
+ /* Sanity check */
+ if ( ( iob_len ( iobuf ) < sizeof ( *rsp ) ) ||
+ ( iob_len ( iobuf ) < ( sizeof ( *rsp ) +
+ fcp_rsp_response_data_len ( rsp ) +
+ fcp_rsp_sense_data_len ( rsp ) ) ) ) {
+ DBGC ( fcpdev, "FCP %p xchg %04x received invalid response "
+ "IU:\n", fcpdev, fcpcmd->xchg_id );
+ DBGC_HDA ( fcpdev, 0, iobuf->data, iob_len ( iobuf ) );
+ rc = -EPROTO;
+ goto done;
+ }
+ DBGC2 ( fcpdev, "FCP %p xchg %04x RSP stat %02x resid %08x flags %02x"
+ "%s%s%s%s\n", fcpdev, fcpcmd->xchg_id, rsp->status,
+ ntohl ( rsp->residual ), rsp->flags,
+ ( ( rsp->flags & FCP_RSP_RESPONSE_LEN_VALID ) ? " resp" : "" ),
+ ( ( rsp->flags & FCP_RSP_SENSE_LEN_VALID ) ? " sense" : "" ),
+ ( ( rsp->flags & FCP_RSP_RESIDUAL_OVERRUN ) ? " over" : "" ),
+ ( ( rsp->flags & FCP_RSP_RESIDUAL_UNDERRUN ) ? " under" : "" ));
+ if ( fcp_rsp_response_data ( rsp ) ) {
+ DBGC2 ( fcpdev, "FCP %p xchg %04x response data:\n",
+ fcpdev, fcpcmd->xchg_id );
+ DBGC2_HDA ( fcpdev, 0, fcp_rsp_response_data ( rsp ),
+ fcp_rsp_response_data_len ( rsp ) );
+ }
+ if ( fcp_rsp_sense_data ( rsp ) ) {
+ DBGC2 ( fcpdev, "FCP %p xchg %04x sense data:\n",
+ fcpdev, fcpcmd->xchg_id );
+ DBGC2_HDA ( fcpdev, 0, fcp_rsp_sense_data ( rsp ),
+ fcp_rsp_sense_data_len ( rsp ) );
+ }
+
+ /* Check for locally-detected command underrun */
+ if ( ( rsp->status == 0 ) &&
+ ( fcpcmd->offset != ( command->data_in_len +
+ command->data_out_len ) ) ) {
+ DBGC ( fcpdev, "FCP %p xchg %04x data underrun (expected %zd, "
+ "got %zd)\n", fcpdev, fcpcmd->xchg_id,
+ ( command->data_in_len + command->data_out_len ),
+ fcpcmd->offset );
+ rc = -ERANGE_DATA_UNDERRUN;
+ goto done;
+ }
+
+ /* Build SCSI response */
+ memset ( &response, 0, sizeof ( response ) );
+ response.status = rsp->status;
+ if ( rsp->flags & ( FCP_RSP_RESIDUAL_OVERRUN |
+ FCP_RSP_RESIDUAL_UNDERRUN ) ) {
+ response.overrun = ntohl ( rsp->residual );
+ if ( rsp->flags & FCP_RSP_RESIDUAL_UNDERRUN )
+ response.overrun = -response.overrun;
+ }
+ scsi_parse_sense ( fcp_rsp_sense_data ( rsp ),
+ fcp_rsp_sense_data_len ( rsp ), &response.sense );
+
+ /* Free buffer before sending response, to minimise
+ * out-of-memory errors.
+ */
+ free_iob ( iob_disown ( iobuf ) );
+
+ /* Send SCSI response */
+ scsi_response ( &fcpcmd->scsi, &response );
+
+ /* Terminate command */
+ fcpcmd_close ( fcpcmd, 0 );
+
+ rc = 0;
+ done:
+ free_iob ( iobuf );
+ return rc;
+}
+
+/**
+ * Handle unknown FCP IU
+ *
+ * @v fcpcmd FCP command
+ * @v iobuf I/O buffer
+ * @v meta Data transfer metadata
+ * @ret rc Return status code
+ */
+static int fcpcmd_recv_unknown ( struct fcp_command *fcpcmd,
+ struct io_buffer *iobuf,
+ struct xfer_metadata *meta __unused ) {
+ struct fcp_device *fcpdev = fcpcmd->fcpdev;
+
+ DBGC ( fcpdev, "FCP %p xchg %04x received unknown IU:\n",
+ fcpdev, fcpcmd->xchg_id );
+ DBGC_HDA ( fcpdev, 0, iobuf->data, iob_len ( iobuf ) );
+ free_iob ( iobuf );
+ return -EPROTO;
+}
+
+/**
+ * Transmit FCP frame
+ *
+ * @v fcpcmd FCP command
+ */
+static void fcpcmd_step ( struct fcp_command *fcpcmd ) {
+ int rc;
+
+ /* Send the current IU */
+ if ( ( rc = fcpcmd->send ( fcpcmd ) ) != 0 ) {
+ /* Treat failure as a fatal error */
+ fcpcmd_close ( fcpcmd, rc );
+ }
+}
+
+/**
+ * Receive FCP frame
+ *
+ * @v fcpcmd FCP command
+ * @v iobuf I/O buffer
+ * @v meta Data transfer metadata
+ * @ret rc Return status code
+ */
+static int fcpcmd_deliver ( struct fcp_command *fcpcmd,
+ struct io_buffer *iobuf,
+ struct xfer_metadata *meta ) {
+ int ( * fcpcmd_recv ) ( struct fcp_command *fcpcmd,
+ struct io_buffer *iobuf,
+ struct xfer_metadata *meta );
+ int rc;
+
+ /* Determine handler */
+ switch ( meta->flags & ( XFER_FL_CMD_STAT | XFER_FL_RESPONSE ) ) {
+ case ( XFER_FL_RESPONSE ) :
+ fcpcmd_recv = fcpcmd_recv_rddata;
+ break;
+ case ( XFER_FL_CMD_STAT ) :
+ fcpcmd_recv = fcpcmd_recv_xfer_rdy;
+ break;
+ case ( XFER_FL_CMD_STAT | XFER_FL_RESPONSE ) :
+ fcpcmd_recv = fcpcmd_recv_rsp;
+ break;
+ default:
+ fcpcmd_recv = fcpcmd_recv_unknown;
+ break;
+ }
+
+ /* Handle IU */
+ if ( ( rc = fcpcmd_recv ( fcpcmd, iob_disown ( iobuf ), meta ) ) != 0 ){
+ /* Treat any error as fatal to the command */
+ fcpcmd_close ( fcpcmd, rc );
+ }
+
+ return rc;
+}
+
+/** FCP command SCSI interface operations */
+static struct interface_operation fcpcmd_scsi_op[] = {
+ INTF_OP ( intf_close, struct fcp_command *, fcpcmd_close ),
+};
+
+/** FCP command SCSI interface descriptor */
+static struct interface_descriptor fcpcmd_scsi_desc =
+ INTF_DESC_PASSTHRU ( struct fcp_command, scsi, fcpcmd_scsi_op, xchg );
+
+/** FCP command Fibre Channel exchange interface operations */
+static struct interface_operation fcpcmd_xchg_op[] = {
+ INTF_OP ( xfer_deliver, struct fcp_command *, fcpcmd_deliver ),
+ INTF_OP ( intf_close, struct fcp_command *, fcpcmd_close_err ),
+};
+
+/** FCP command Fibre Channel exchange interface descriptor */
+static struct interface_descriptor fcpcmd_xchg_desc =
+ INTF_DESC_PASSTHRU ( struct fcp_command, xchg, fcpcmd_xchg_op, scsi );
+
+/** FCP command process descriptor */
+static struct process_descriptor fcpcmd_process_desc =
+ PROC_DESC ( struct fcp_command, process, fcpcmd_step );
+
+/**
+ * Issue FCP SCSI command
+ *
+ * @v fcpdev FCP device
+ * @v parent Parent interface
+ * @v command SCSI command
+ * @ret tag Command tag, or negative error
+ */
+static int fcpdev_scsi_command ( struct fcp_device *fcpdev,
+ struct interface *parent,
+ struct scsi_cmd *command ) {
+ struct fcp_prli_service_parameters *param = fcpdev->user.ulp->param;
+ struct fcp_command *fcpcmd;
+ int xchg_id;
+ int rc;
+
+ /* Check link */
+ if ( ( rc = fcpdev->user.ulp->link.rc ) != 0 ) {
+ DBGC ( fcpdev, "FCP %p could not issue command while link is "
+ "down: %s\n", fcpdev, strerror ( rc ) );
+ goto err_link;
+ }
+
+ /* Check target capability */
+ assert ( param != NULL );
+ assert ( fcpdev->user.ulp->param_len >= sizeof ( *param ) );
+ if ( ! ( param->flags & htonl ( FCP_PRLI_TARGET ) ) ) {
+ DBGC ( fcpdev, "FCP %p could not issue command: not a target\n",
+ fcpdev );
+ rc = -ENOTTY;
+ goto err_target;
+ }
+
+ /* Allocate and initialise structure */
+ fcpcmd = zalloc ( sizeof ( *fcpcmd ) );
+ if ( ! fcpcmd ) {
+ rc = -ENOMEM;
+ goto err_zalloc;
+ }
+ ref_init ( &fcpcmd->refcnt, fcpcmd_free );
+ intf_init ( &fcpcmd->scsi, &fcpcmd_scsi_desc, &fcpcmd->refcnt );
+ intf_init ( &fcpcmd->xchg, &fcpcmd_xchg_desc, &fcpcmd->refcnt );
+ process_init_stopped ( &fcpcmd->process, &fcpcmd_process_desc,
+ &fcpcmd->refcnt );
+ fcpcmd->fcpdev = fcpdev_get ( fcpdev );
+ list_add ( &fcpcmd->list, &fcpdev->fcpcmds );
+ memcpy ( &fcpcmd->command, command, sizeof ( fcpcmd->command ) );
+
+ /* Create new exchange */
+ if ( ( xchg_id = fc_xchg_originate ( &fcpcmd->xchg,
+ fcpdev->user.ulp->peer->port,
+ &fcpdev->user.ulp->peer->port_id,
+ FC_TYPE_FCP ) ) < 0 ) {
+ rc = xchg_id;
+ DBGC ( fcpdev, "FCP %p could not create exchange: %s\n",
+ fcpdev, strerror ( rc ) );
+ goto err_xchg_originate;
+ }
+ fcpcmd->xchg_id = xchg_id;
+
+ /* Start sending command IU */
+ fcpcmd_start_send ( fcpcmd, fcpcmd_send_cmnd );
+
+ /* Attach to parent interface, mortalise self, and return */
+ intf_plug_plug ( &fcpcmd->scsi, parent );
+ ref_put ( &fcpcmd->refcnt );
+ return ( FCP_TAG_MAGIC | fcpcmd->xchg_id );
+
+ err_xchg_originate:
+ fcpcmd_close ( fcpcmd, rc );
+ ref_put ( &fcpcmd->refcnt );
+ err_zalloc:
+ err_target:
+ err_link:
+ return rc;
+}
+
+/**
+ * Close FCP device
+ *
+ * @v fcpdev FCP device
+ * @v rc Reason for close
+ */
+static void fcpdev_close ( struct fcp_device *fcpdev, int rc ) {
+ struct fcp_command *fcpcmd;
+ struct fcp_command *tmp;
+
+ DBGC ( fcpdev, "FCP %p closed: %s\n", fcpdev, strerror ( rc ) );
+
+ /* Shut down interfaces */
+ intf_shutdown ( &fcpdev->scsi, rc );
+
+ /* Shut down any active commands */
+ list_for_each_entry_safe ( fcpcmd, tmp, &fcpdev->fcpcmds, list ) {
+ fcpcmd_get ( fcpcmd );
+ fcpcmd_close ( fcpcmd, rc );
+ fcpcmd_put ( fcpcmd );
+ }
+
+ /* Drop reference to ULP */
+ fc_ulp_detach ( &fcpdev->user );
+}
+
+/**
+ * Check FCP device flow-control window
+ *
+ * @v fcpdev FCP device
+ * @ret len Length of window
+ */
+static size_t fcpdev_window ( struct fcp_device *fcpdev ) {
+ return ( fc_link_ok ( &fcpdev->user.ulp->link ) ?
+ ~( ( size_t ) 0 ) : 0 );
+}
+
+/**
+ * Describe FCP device in an ACPI table
+ *
+ * @v fcpdev FCP device
+ * @v acpi ACPI table
+ * @v len Length of ACPI table
+ * @ret rc Return status code
+ */
+static int fcpdev_acpi_describe ( struct fcp_device *fcpdev,
+ struct acpi_description_header *acpi,
+ size_t len ) {
+
+ DBGC ( fcpdev, "FCP %p cannot yet describe device in an ACPI table\n",
+ fcpdev );
+ ( void ) acpi;
+ ( void ) len;
+ return 0;
+}
+
+/**
+ * Describe FCP device using EDD
+ *
+ * @v fcpdev FCP device
+ * @v type EDD interface type
+ * @v path EDD device path
+ * @ret rc Return status code
+ */
+static int fcpdev_edd_describe ( struct fcp_device *fcpdev,
+ struct edd_interface_type *type,
+ union edd_device_path *path ) {
+ union {
+ struct fc_name fc;
+ uint64_t u64;
+ } wwn;
+ union {
+ struct scsi_lun scsi;
+ uint64_t u64;
+ } lun;
+
+ type->type = cpu_to_le64 ( EDD_INTF_TYPE_FIBRE );
+ memcpy ( &wwn.fc, &fcpdev->wwn, sizeof ( wwn.fc ) );
+ path->fibre.wwn = be64_to_cpu ( wwn.u64 );
+ memcpy ( &lun.scsi, &fcpdev->lun, sizeof ( lun.scsi ) );
+ path->fibre.lun = be64_to_cpu ( lun.u64 );
+ return 0;
+}
+
+/**
+ * Identify device underlying FCP device
+ *
+ * @v fcpdev FCP device
+ * @ret device Underlying device
+ */
+static struct device * fcpdev_identify_device ( struct fcp_device *fcpdev ) {
+
+ /* We know the underlying device only if the link is up;
+ * otherwise we don't have a port to examine.
+ */
+ if ( ! fc_link_ok ( &fcpdev->user.ulp->link ) ) {
+ DBGC ( fcpdev, "FCP %p doesn't know underlying device "
+ "until link is up\n", fcpdev );
+ return NULL;
+ }
+
+ /* Hand off to port's transport interface */
+ assert ( fcpdev->user.ulp->peer->port != NULL );
+ return identify_device ( &fcpdev->user.ulp->peer->port->transport );
+}
+
+/** FCP device SCSI interface operations */
+static struct interface_operation fcpdev_scsi_op[] = {
+ INTF_OP ( scsi_command, struct fcp_device *, fcpdev_scsi_command ),
+ INTF_OP ( xfer_window, struct fcp_device *, fcpdev_window ),
+ INTF_OP ( intf_close, struct fcp_device *, fcpdev_close ),
+ INTF_OP ( acpi_describe, struct fcp_device *, fcpdev_acpi_describe ),
+ INTF_OP ( edd_describe, struct fcp_device *, fcpdev_edd_describe ),
+ INTF_OP ( identify_device, struct fcp_device *,
+ fcpdev_identify_device ),
+};
+
+/** FCP device SCSI interface descriptor */
+static struct interface_descriptor fcpdev_scsi_desc =
+ INTF_DESC ( struct fcp_device, scsi, fcpdev_scsi_op );
+
+/**
+ * Examine FCP ULP link state
+ *
+ * @v user Fibre Channel upper-layer protocol user
+ */
+static void fcpdev_examine ( struct fc_ulp_user *user ) {
+ struct fcp_device *fcpdev =
+ container_of ( user, struct fcp_device, user );
+
+ if ( fc_link_ok ( &fcpdev->user.ulp->link ) ) {
+ DBGC ( fcpdev, "FCP %p link is up\n", fcpdev );
+ } else {
+ DBGC ( fcpdev, "FCP %p link is down: %s\n",
+ fcpdev, strerror ( fcpdev->user.ulp->link.rc ) );
+ }
+
+ /* Notify SCSI layer of window change */
+ xfer_window_changed ( &fcpdev->scsi );
+}
+
+/**
+ * Open FCP device
+ *
+ * @v parent Parent interface
+ * @v wwn Fibre Channel WWN
+ * @v lun SCSI LUN
+ * @ret rc Return status code
+ */
+static int fcpdev_open ( struct interface *parent, struct fc_name *wwn,
+ struct scsi_lun *lun ) {
+ struct fc_ulp *ulp;
+ struct fcp_device *fcpdev;
+ int rc;
+
+ /* Get Fibre Channel ULP interface */
+ ulp = fc_ulp_get_wwn_type ( wwn, FC_TYPE_FCP );
+ if ( ! ulp ) {
+ rc = -ENOMEM;
+ goto err_ulp_get;
+ }
+
+ /* Allocate and initialise structure */
+ fcpdev = zalloc ( sizeof ( *fcpdev ) );
+ if ( ! fcpdev ) {
+ rc = -ENOMEM;
+ goto err_zalloc;
+ }
+ ref_init ( &fcpdev->refcnt, NULL );
+ intf_init ( &fcpdev->scsi, &fcpdev_scsi_desc, &fcpdev->refcnt );
+ INIT_LIST_HEAD ( &fcpdev->fcpcmds );
+ fc_ulp_user_init ( &fcpdev->user, fcpdev_examine, &fcpdev->refcnt );
+
+ DBGC ( fcpdev, "FCP %p opened for %s\n", fcpdev, fc_ntoa ( wwn ) );
+
+ /* Attach to Fibre Channel ULP */
+ fc_ulp_attach ( ulp, &fcpdev->user );
+
+ /* Preserve parameters required for boot firmware table */
+ memcpy ( &fcpdev->wwn, wwn, sizeof ( fcpdev->wwn ) );
+ memcpy ( &fcpdev->lun, lun, sizeof ( fcpdev->lun ) );
+
+ /* Attach SCSI device to parent interface */
+ if ( ( rc = scsi_open ( parent, &fcpdev->scsi, lun ) ) != 0 ) {
+ DBGC ( fcpdev, "FCP %p could not create SCSI device: %s\n",
+ fcpdev, strerror ( rc ) );
+ goto err_scsi_open;
+ }
+
+ /* Drop temporary reference to ULP */
+ fc_ulp_put ( ulp );
+
+ /* Mortalise self and return */
+ ref_put ( &fcpdev->refcnt );
+ return 0;
+
+ err_scsi_open:
+ fcpdev_close ( fcpdev, rc );
+ ref_put ( &fcpdev->refcnt );
+ err_zalloc:
+ fc_ulp_put ( ulp );
+ err_ulp_get:
+ return rc;
+}
+
+/******************************************************************************
+ *
+ * FCP URIs
+ *
+ ******************************************************************************
+ */
+
+/**
+ * Parse FCP URI
+ *
+ * @v uri URI
+ * @ret wwn Fibre Channel WWN
+ * @ret lun SCSI LUN
+ * @ret rc Return status code
+ *
+ * An FCP URI has the form "fcp:<wwn>:<lun>" or "fcp://<wwn>/<lun>"
+ */
+static int fcp_parse_uri ( struct uri *uri, struct fc_name *wwn,
+ struct scsi_lun *lun ) {
+ char wwn_buf[ FC_NAME_STRLEN + 1 /* NUL */ ];
+ const char *wwn_text;
+ const char *lun_text;
+ int rc;
+
+ /* Extract WWN and LUN texts from URI */
+ if ( uri->opaque ) {
+ /* "fcp:<wwn>:<lun>" */
+ if ( snprintf ( wwn_buf, sizeof ( wwn_buf ), "%s",
+ uri->opaque ) < ( FC_NAME_STRLEN + 1 /* : */ ) )
+ return -EINVAL;
+ if ( uri->opaque[FC_NAME_STRLEN] != ':' )
+ return -EINVAL;
+ wwn_text = wwn_buf;
+ lun_text = &uri->opaque[FC_NAME_STRLEN + 1];
+ } else {
+ /* If host exists, path must also exist */
+ if ( ! ( uri->host && uri->path ) )
+ return -EINVAL;
+ if ( uri->path[0] != '/' )
+ return -EINVAL;
+ wwn_text = uri->host;
+ lun_text = ( uri->path + 1 );
+ }
+
+ /* Parse WWN */
+ if ( ( rc = fc_aton ( wwn_text, wwn ) ) != 0 )
+ return rc;
+
+ /* Parse LUN */
+ if ( ( rc = scsi_parse_lun ( lun_text, lun ) ) != 0 )
+ return rc;
+
+ return 0;
+}
+
+/**
+ * Open FCP URI
+ *
+ * @v parent Parent interface
+ * @v uri URI
+ * @ret rc Return status code
+ */
+static int fcp_open ( struct interface *parent, struct uri *uri ) {
+ struct fc_name wwn;
+ struct scsi_lun lun;
+ int rc;
+
+ /* Parse URI */
+ if ( ( rc = fcp_parse_uri ( uri, &wwn, &lun ) ) != 0 )
+ return rc;
+
+ /* Open FCP device */
+ if ( ( rc = fcpdev_open ( parent, &wwn, &lun ) ) != 0 )
+ return rc;
+
+ return 0;
+}
+
+/** FCP URI opener */
+struct uri_opener fcp_uri_opener __uri_opener = {
+ .scheme = "fcp",
+ .open = fcp_open,
+};
diff --git a/qemu/roms/ipxe/src/net/fragment.c b/qemu/roms/ipxe/src/net/fragment.c
new file mode 100644
index 000000000..410915b3b
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/fragment.c
@@ -0,0 +1,180 @@
+/*
+ * Copyright (C) 2013 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ipxe/retry.h>
+#include <ipxe/timer.h>
+#include <ipxe/ipstat.h>
+#include <ipxe/fragment.h>
+
+/** @file
+ *
+ * Fragment reassembly
+ *
+ */
+
+/**
+ * Expire fragment reassembly buffer
+ *
+ * @v timer Retry timer
+ * @v fail Failure indicator
+ */
+static void fragment_expired ( struct retry_timer *timer, int fail __unused ) {
+ struct fragment *fragment =
+ container_of ( timer, struct fragment, timer );
+
+ DBGC ( fragment, "FRAG %p expired\n", fragment );
+ free_iob ( fragment->iobuf );
+ list_del ( &fragment->list );
+ fragment->fragments->stats->reasm_fails++;
+ free ( fragment );
+}
+
+/**
+ * Find fragment reassembly buffer
+ *
+ * @v fragments Fragment reassembler
+ * @v iobuf I/O buffer
+ * @v hdrlen Length of non-fragmentable potion of I/O buffer
+ * @ret fragment Fragment reassembly buffer, or NULL if not found
+ */
+static struct fragment * fragment_find ( struct fragment_reassembler *fragments,
+ struct io_buffer *iobuf,
+ size_t hdrlen ) {
+ struct fragment *fragment;
+
+ list_for_each_entry ( fragment, &fragments->list, list ) {
+ if ( fragments->is_fragment ( fragment, iobuf, hdrlen ) )
+ return fragment;
+ }
+ return NULL;
+}
+
+/**
+ * Reassemble packet
+ *
+ * @v fragments Fragment reassembler
+ * @v iobuf I/O buffer
+ * @v hdrlen Length of non-fragmentable potion of I/O buffer
+ * @ret iobuf Reassembled packet, or NULL
+ *
+ * This function takes ownership of the I/O buffer. Note that the
+ * length of the non-fragmentable portion may be modified.
+ */
+struct io_buffer * fragment_reassemble ( struct fragment_reassembler *fragments,
+ struct io_buffer *iobuf,
+ size_t *hdrlen ) {
+ struct fragment *fragment;
+ struct io_buffer *new_iobuf;
+ size_t new_len;
+ size_t offset;
+ size_t expected_offset;
+ int more_frags;
+
+ /* Update statistics */
+ fragments->stats->reasm_reqds++;
+
+ /* Find matching fragment reassembly buffer, if any */
+ fragment = fragment_find ( fragments, iobuf, *hdrlen );
+
+ /* Drop out-of-order fragments */
+ offset = fragments->fragment_offset ( iobuf, *hdrlen );
+ expected_offset = ( fragment ? ( iob_len ( fragment->iobuf ) -
+ fragment->hdrlen ) : 0 );
+ if ( offset != expected_offset ) {
+ DBGC ( fragment, "FRAG %p dropping out-of-sequence fragment "
+ "[%zd,%zd), expected [%zd,...)\n", fragment, offset,
+ ( offset + iob_len ( iobuf ) - *hdrlen ),
+ expected_offset );
+ goto drop;
+ }
+
+ /* Create or extend fragment reassembly buffer as applicable */
+ if ( ! fragment ) {
+
+ /* Create new fragment reassembly buffer */
+ fragment = zalloc ( sizeof ( *fragment ) );
+ if ( ! fragment )
+ goto drop;
+ list_add ( &fragment->list, &fragments->list );
+ fragment->iobuf = iobuf;
+ fragment->hdrlen = *hdrlen;
+ timer_init ( &fragment->timer, fragment_expired, NULL );
+ fragment->fragments = fragments;
+ DBGC ( fragment, "FRAG %p [0,%zd)\n", fragment,
+ ( iob_len ( iobuf ) - *hdrlen ) );
+
+ } else {
+
+ /* Check if this is the final fragment */
+ more_frags = fragments->more_fragments ( iobuf, *hdrlen );
+ DBGC ( fragment, "FRAG %p [%zd,%zd)%s\n", fragment,
+ offset, ( offset + iob_len ( iobuf ) - *hdrlen ),
+ ( more_frags ? "" : " complete" ) );
+
+ /* Extend fragment reassembly buffer. Preserve I/O
+ * buffer headroom to allow for code which modifies
+ * and resends the buffer (e.g. ICMP echo responses).
+ */
+ iob_pull ( iobuf, *hdrlen );
+ new_len = ( iob_headroom ( fragment->iobuf ) +
+ iob_len ( fragment->iobuf ) + iob_len ( iobuf ) );
+ new_iobuf = alloc_iob ( new_len );
+ if ( ! new_iobuf ) {
+ DBGC ( fragment, "FRAG %p could not extend reassembly "
+ "buffer to %zd bytes\n", fragment, new_len );
+ goto drop;
+ }
+ iob_reserve ( new_iobuf, iob_headroom ( fragment->iobuf ) );
+ memcpy ( iob_put ( new_iobuf, iob_len ( fragment->iobuf ) ),
+ fragment->iobuf->data, iob_len ( fragment->iobuf ) );
+ memcpy ( iob_put ( new_iobuf, iob_len ( iobuf ) ),
+ iobuf->data, iob_len ( iobuf ) );
+ free_iob ( fragment->iobuf );
+ fragment->iobuf = new_iobuf;
+ free_iob ( iobuf );
+
+ /* Stop fragment reassembly timer */
+ stop_timer ( &fragment->timer );
+
+ /* If this is the final fragment, return it */
+ if ( ! more_frags ) {
+ iobuf = fragment->iobuf;
+ *hdrlen = fragment->hdrlen;
+ list_del ( &fragment->list );
+ free ( fragment );
+ fragments->stats->reasm_oks++;
+ return iobuf;
+ }
+ }
+
+ /* (Re)start fragment reassembly timer */
+ start_timer_fixed ( &fragment->timer, FRAGMENT_TIMEOUT );
+
+ return NULL;
+
+ drop:
+ fragments->stats->reasm_fails++;
+ free_iob ( iobuf );
+ return NULL;
+}
diff --git a/qemu/roms/ipxe/src/net/icmp.c b/qemu/roms/ipxe/src/net/icmp.c
new file mode 100644
index 000000000..1bbf8bd30
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/icmp.c
@@ -0,0 +1,226 @@
+/*
+ * Copyright (C) 2013 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <string.h>
+#include <byteswap.h>
+#include <errno.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/in.h>
+#include <ipxe/tcpip.h>
+#include <ipxe/ping.h>
+#include <ipxe/crc32.h>
+#include <ipxe/icmp.h>
+
+/** @file
+ *
+ * ICMP protocol
+ *
+ */
+
+/**
+ * Identify ICMP echo protocol
+ *
+ * @v st_family Address family
+ * @ret echo_protocol ICMP echo protocol, or NULL
+ */
+static struct icmp_echo_protocol * icmp_echo_protocol ( sa_family_t family ) {
+ struct icmp_echo_protocol *echo_protocol;
+
+ for_each_table_entry ( echo_protocol, ICMP_ECHO_PROTOCOLS ) {
+ if ( echo_protocol->family == family )
+ return echo_protocol;
+ }
+ return NULL;
+}
+
+/**
+ *
+ * Determine debugging colour for ICMP debug messages
+ *
+ * @v st_peer Peer address
+ * @ret col Debugging colour (for DBGC())
+ */
+static uint32_t icmpcol ( struct sockaddr_tcpip *st_peer ) {
+
+ return crc32_le ( 0, st_peer, sizeof ( *st_peer ) );
+}
+
+/**
+ * Transmit ICMP echo packet
+ *
+ * @v iobuf I/O buffer
+ * @v st_dest Destination socket address
+ * @v echo_protocol ICMP echo protocol
+ * @ret rc Return status code
+ */
+static int icmp_tx_echo ( struct io_buffer *iobuf,
+ struct sockaddr_tcpip *st_dest,
+ struct icmp_echo_protocol *echo_protocol ) {
+ struct icmp_echo *echo = iobuf->data;
+ int rc;
+
+ /* Set ICMP type and (re)calculate checksum */
+ echo->icmp.chksum = 0;
+ echo->icmp.chksum = tcpip_chksum ( echo, iob_len ( iobuf ) );
+
+ /* Transmit packet */
+ if ( ( rc = tcpip_tx ( iobuf, echo_protocol->tcpip_protocol, NULL,
+ st_dest, NULL,
+ ( echo_protocol->net_checksum ?
+ &echo->icmp.chksum : NULL ) ) ) != 0 )
+ return rc;
+
+ return 0;
+}
+
+/**
+ * Transmit ICMP echo request
+ *
+ * @v iobuf I/O buffer
+ * @v st_dest Destination socket address
+ * @ret rc Return status code
+ */
+int icmp_tx_echo_request ( struct io_buffer *iobuf,
+ struct sockaddr_tcpip *st_dest ) {
+ struct icmp_echo *echo = iobuf->data;
+ struct icmp_echo_protocol *echo_protocol;
+ int rc;
+
+ /* Identify ICMP echo protocol */
+ echo_protocol = icmp_echo_protocol ( st_dest->st_family );
+ if ( ! echo_protocol ) {
+ DBGC ( icmpcol ( st_dest ), "ICMP TX echo request unknown "
+ "address family %d\n", st_dest->st_family );
+ free_iob ( iobuf );
+ return -ENOTSUP;
+ }
+
+ /* Set type */
+ echo->icmp.type = echo_protocol->request;
+
+ /* Transmit request */
+ DBGC ( icmpcol ( st_dest ), "ICMP TX echo request id %04x seq %04x\n",
+ ntohs ( echo->ident ), ntohs ( echo->sequence ) );
+ if ( ( rc = icmp_tx_echo ( iobuf, st_dest, echo_protocol ) ) != 0 )
+ return rc;
+
+ return 0;
+}
+
+/**
+ * Transmit ICMP echo reply
+ *
+ * @v iobuf I/O buffer
+ * @v st_dest Destination socket address
+ * @ret rc Return status code
+ */
+static int icmp_tx_echo_reply ( struct io_buffer *iobuf,
+ struct sockaddr_tcpip *st_dest,
+ struct icmp_echo_protocol *echo_protocol ) {
+ struct icmp_echo *echo = iobuf->data;
+ int rc;
+
+ /* Set type */
+ echo->icmp.type = echo_protocol->reply;
+
+ /* Transmit reply */
+ DBGC ( icmpcol ( st_dest ), "ICMP TX echo reply id %04x seq %04x\n",
+ ntohs ( echo->ident ), ntohs ( echo->sequence ) );
+ if ( ( rc = icmp_tx_echo ( iobuf, st_dest, echo_protocol ) ) != 0 )
+ return rc;
+
+ return 0;
+}
+
+/**
+ * Process a received ICMP echo request
+ *
+ * @v iobuf I/O buffer
+ * @v st_src Source socket address
+ * @v echo_protocol ICMP echo protocol
+ * @ret rc Return status code
+ */
+int icmp_rx_echo_request ( struct io_buffer *iobuf,
+ struct sockaddr_tcpip *st_src,
+ struct icmp_echo_protocol *echo_protocol ) {
+ struct icmp_echo *echo = iobuf->data;
+ int rc;
+
+ /* Sanity check */
+ if ( iob_len ( iobuf ) < sizeof ( *echo ) ) {
+ DBGC ( icmpcol ( st_src ), "ICMP RX echo request too short at "
+ "%zd bytes (min %zd bytes)\n",
+ iob_len ( iobuf ), sizeof ( *echo ) );
+ free_iob ( iobuf );
+ return -EINVAL;
+ }
+ DBGC ( icmpcol ( st_src ), "ICMP RX echo request id %04x seq %04x\n",
+ ntohs ( echo->ident ), ntohs ( echo->sequence ) );
+
+ /* Transmit echo reply */
+ if ( ( rc = icmp_tx_echo_reply ( iobuf, st_src, echo_protocol ) ) != 0 )
+ return rc;
+
+ return 0;
+}
+
+/**
+ * Process a received ICMP echo request
+ *
+ * @v iobuf I/O buffer
+ * @v st_src Source socket address
+ * @ret rc Return status code
+ */
+int icmp_rx_echo_reply ( struct io_buffer *iobuf,
+ struct sockaddr_tcpip *st_src ) {
+ struct icmp_echo *echo = iobuf->data;
+ int rc;
+
+ /* Sanity check */
+ if ( iob_len ( iobuf ) < sizeof ( *echo ) ) {
+ DBGC ( icmpcol ( st_src ), "ICMP RX echo reply too short at "
+ "%zd bytes (min %zd bytes)\n",
+ iob_len ( iobuf ), sizeof ( *echo ) );
+ free_iob ( iobuf );
+ return -EINVAL;
+ }
+ DBGC ( icmpcol ( st_src ), "ICMP RX echo reply id %04x seq %04x\n",
+ ntohs ( echo->ident ), ntohs ( echo->sequence ) );
+
+ /* Deliver to ping protocol */
+ if ( ( rc = ping_rx ( iobuf, st_src ) ) != 0 )
+ return rc;
+
+ return 0;
+}
+
+/**
+ * Receive ping reply (when no ping protocol is present)
+ *
+ * @v iobuf I/O buffer
+ * @v st_src Source socket address
+ * @ret rc Return status code
+ */
+__weak int ping_rx ( struct io_buffer *iobuf,
+ struct sockaddr_tcpip *st_src __unused ) {
+ free_iob ( iobuf );
+ return 0;
+}
diff --git a/qemu/roms/ipxe/src/net/icmpv4.c b/qemu/roms/ipxe/src/net/icmpv4.c
new file mode 100644
index 000000000..996ba1490
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/icmpv4.c
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2013 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <string.h>
+#include <errno.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/in.h>
+#include <ipxe/tcpip.h>
+#include <ipxe/icmp.h>
+
+/** @file
+ *
+ * ICMPv4 protocol
+ *
+ */
+
+struct icmp_echo_protocol icmpv4_echo_protocol __icmp_echo_protocol;
+
+/**
+ * Process a received packet
+ *
+ * @v iobuf I/O buffer
+ * @v netdev Network device
+ * @v st_src Partially-filled source address
+ * @v st_dest Partially-filled destination address
+ * @v pshdr_csum Pseudo-header checksum
+ * @ret rc Return status code
+ */
+static int icmpv4_rx ( struct io_buffer *iobuf,
+ struct net_device *netdev __unused,
+ struct sockaddr_tcpip *st_src,
+ struct sockaddr_tcpip *st_dest __unused,
+ uint16_t pshdr_csum __unused ) {
+ struct icmp_header *icmp = iobuf->data;
+ size_t len = iob_len ( iobuf );
+ unsigned int csum;
+ unsigned int type;
+ int rc;
+
+ /* Sanity check */
+ if ( len < sizeof ( *icmp ) ) {
+ DBG ( "ICMP packet too short at %zd bytes (min %zd bytes)\n",
+ len, sizeof ( *icmp ) );
+ rc = -EINVAL;
+ goto discard;
+ }
+
+ /* Verify checksum */
+ csum = tcpip_chksum ( icmp, len );
+ if ( csum != 0 ) {
+ DBG ( "ICMP checksum incorrect (is %04x, should be 0000)\n",
+ csum );
+ DBG_HD ( icmp, len );
+ rc = -EINVAL;
+ goto discard;
+ }
+
+ /* Handle ICMP packet */
+ type = icmp->type;
+ switch ( type ) {
+ case ICMP_ECHO_REQUEST:
+ return icmp_rx_echo_request ( iobuf, st_src,
+ &icmpv4_echo_protocol );
+ case ICMP_ECHO_REPLY:
+ return icmp_rx_echo_reply ( iobuf, st_src );
+ default:
+ DBG ( "ICMP ignoring type %d\n", type );
+ rc = 0;
+ break;
+ }
+
+ discard:
+ free_iob ( iobuf );
+ return rc;
+}
+
+/** ICMPv4 TCP/IP protocol */
+struct tcpip_protocol icmpv4_protocol __tcpip_protocol = {
+ .name = "ICMPv4",
+ .rx = icmpv4_rx,
+ .tcpip_proto = IP_ICMP,
+};
+
+/** ICMPv4 echo protocol */
+struct icmp_echo_protocol icmpv4_echo_protocol __icmp_echo_protocol = {
+ .family = AF_INET,
+ .request = ICMP_ECHO_REQUEST,
+ .reply = ICMP_ECHO_REPLY,
+ .tcpip_protocol = &icmpv4_protocol,
+ .net_checksum = 0,
+};
diff --git a/qemu/roms/ipxe/src/net/icmpv6.c b/qemu/roms/ipxe/src/net/icmpv6.c
new file mode 100644
index 000000000..479800e7d
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/icmpv6.c
@@ -0,0 +1,179 @@
+/*
+ * Copyright (C) 2013 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <string.h>
+#include <errno.h>
+#include <byteswap.h>
+#include <ipxe/in.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/tcpip.h>
+#include <ipxe/ping.h>
+#include <ipxe/icmpv6.h>
+
+/** @file
+ *
+ * ICMPv6 protocol
+ *
+ */
+
+struct icmp_echo_protocol icmpv6_echo_protocol __icmp_echo_protocol;
+
+/**
+ * Process received ICMPv6 echo request packet
+ *
+ * @v iobuf I/O buffer
+ * @v netdev Network device
+ * @v sin6_src Source socket address
+ * @v sin6_dest Destination socket address
+ * @ret rc Return status code
+ */
+static int icmpv6_rx_echo_request ( struct io_buffer *iobuf,
+ struct net_device *netdev __unused,
+ struct sockaddr_in6 *sin6_src,
+ struct sockaddr_in6 *sin6_dest __unused ) {
+ struct sockaddr_tcpip *st_src =
+ ( ( struct sockaddr_tcpip * ) sin6_src );
+
+ return icmp_rx_echo_request ( iobuf, st_src, &icmpv6_echo_protocol );
+}
+
+/** ICMPv6 echo request handler */
+struct icmpv6_handler icmpv6_echo_request_handler __icmpv6_handler = {
+ .type = ICMPV6_ECHO_REQUEST,
+ .rx = icmpv6_rx_echo_request,
+};
+
+/**
+ * Process received ICMPv6 echo reply packet
+ *
+ * @v iobuf I/O buffer
+ * @v netdev Network device
+ * @v sin6_src Source socket address
+ * @v sin6_dest Destination socket address
+ * @ret rc Return status code
+ */
+static int icmpv6_rx_echo_reply ( struct io_buffer *iobuf,
+ struct net_device *netdev __unused,
+ struct sockaddr_in6 *sin6_src,
+ struct sockaddr_in6 *sin6_dest __unused ) {
+ struct sockaddr_tcpip *st_src =
+ ( ( struct sockaddr_tcpip * ) sin6_src );
+
+ return icmp_rx_echo_reply ( iobuf, st_src );
+}
+
+/** ICMPv6 echo reply handler */
+struct icmpv6_handler icmpv6_echo_reply_handler __icmpv6_handler = {
+ .type = ICMPV6_ECHO_REPLY,
+ .rx = icmpv6_rx_echo_reply,
+};
+
+/**
+ * Identify ICMPv6 handler
+ *
+ * @v type ICMPv6 type
+ * @ret handler ICMPv6 handler, or NULL if not found
+ */
+static struct icmpv6_handler * icmpv6_handler ( unsigned int type ) {
+ struct icmpv6_handler *handler;
+
+ for_each_table_entry ( handler, ICMPV6_HANDLERS ) {
+ if ( handler->type == type )
+ return handler;
+ }
+ return NULL;
+}
+
+/**
+ * Process a received packet
+ *
+ * @v iobuf I/O buffer
+ * @v netdev Network device
+ * @v st_src Partially-filled source address
+ * @v st_dest Partially-filled destination address
+ * @v pshdr_csum Pseudo-header checksum
+ * @ret rc Return status code
+ */
+static int icmpv6_rx ( struct io_buffer *iobuf, struct net_device *netdev,
+ struct sockaddr_tcpip *st_src,
+ struct sockaddr_tcpip *st_dest, uint16_t pshdr_csum ) {
+ struct sockaddr_in6 *sin6_src = ( ( struct sockaddr_in6 * ) st_src );
+ struct sockaddr_in6 *sin6_dest = ( ( struct sockaddr_in6 * ) st_dest );
+ struct icmp_header *icmp = iobuf->data;
+ size_t len = iob_len ( iobuf );
+ struct icmpv6_handler *handler;
+ unsigned int csum;
+ int rc;
+
+ /* Sanity check */
+ if ( len < sizeof ( *icmp ) ) {
+ DBGC ( netdev, "ICMPv6 packet too short at %zd bytes (min %zd "
+ "bytes)\n", len, sizeof ( *icmp ) );
+ rc = -EINVAL;
+ goto done;
+ }
+
+ /* Verify checksum */
+ csum = tcpip_continue_chksum ( pshdr_csum, icmp, len );
+ if ( csum != 0 ) {
+ DBGC ( netdev, "ICMPv6 checksum incorrect (is %04x, should be "
+ "0000)\n", csum );
+ DBGC_HDA ( netdev, 0, icmp, len );
+ rc = -EINVAL;
+ goto done;
+ }
+
+ /* Identify handler */
+ handler = icmpv6_handler ( icmp->type );
+ if ( ! handler ) {
+ DBGC ( netdev, "ICMPv6 unrecognised type %d\n", icmp->type );
+ rc = -ENOTSUP;
+ goto done;
+ }
+
+ /* Pass to handler */
+ if ( ( rc = handler->rx ( iob_disown ( iobuf ), netdev, sin6_src,
+ sin6_dest ) ) != 0 ) {
+ DBGC ( netdev, "ICMPv6 could not handle type %d: %s\n",
+ icmp->type, strerror ( rc ) );
+ goto done;
+ }
+
+ done:
+ free_iob ( iobuf );
+ return rc;
+}
+
+/** ICMPv6 TCP/IP protocol */
+struct tcpip_protocol icmpv6_protocol __tcpip_protocol = {
+ .name = "ICMPv6",
+ .rx = icmpv6_rx,
+ .tcpip_proto = IP_ICMP6,
+};
+
+/** ICMPv6 echo protocol */
+struct icmp_echo_protocol icmpv6_echo_protocol __icmp_echo_protocol = {
+ .family = AF_INET6,
+ .request = ICMPV6_ECHO_REQUEST,
+ .reply = ICMPV6_ECHO_REPLY,
+ .tcpip_protocol = &icmpv6_protocol,
+ .net_checksum = 1,
+};
diff --git a/qemu/roms/ipxe/src/net/infiniband.c b/qemu/roms/ipxe/src/net/infiniband.c
new file mode 100644
index 000000000..12d1d83ce
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/infiniband.c
@@ -0,0 +1,999 @@
+/*
+ * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <byteswap.h>
+#include <errno.h>
+#include <assert.h>
+#include <ipxe/list.h>
+#include <ipxe/errortab.h>
+#include <ipxe/if_arp.h>
+#include <ipxe/netdevice.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/process.h>
+#include <ipxe/infiniband.h>
+#include <ipxe/ib_mi.h>
+#include <ipxe/ib_sma.h>
+
+/** @file
+ *
+ * Infiniband protocol
+ *
+ */
+
+/** List of Infiniband devices */
+struct list_head ib_devices = LIST_HEAD_INIT ( ib_devices );
+
+/** List of open Infiniband devices, in reverse order of opening */
+static struct list_head open_ib_devices = LIST_HEAD_INIT ( open_ib_devices );
+
+/* Disambiguate the various possible EINPROGRESSes */
+#define EINPROGRESS_INIT __einfo_error ( EINFO_EINPROGRESS_INIT )
+#define EINFO_EINPROGRESS_INIT __einfo_uniqify \
+ ( EINFO_EINPROGRESS, 0x01, "Initialising" )
+#define EINPROGRESS_ARMED __einfo_error ( EINFO_EINPROGRESS_ARMED )
+#define EINFO_EINPROGRESS_ARMED __einfo_uniqify \
+ ( EINFO_EINPROGRESS, 0x02, "Armed" )
+
+/** Human-readable message for the link statuses */
+struct errortab infiniband_errors[] __errortab = {
+ __einfo_errortab ( EINFO_EINPROGRESS_INIT ),
+ __einfo_errortab ( EINFO_EINPROGRESS_ARMED ),
+};
+
+/***************************************************************************
+ *
+ * Completion queues
+ *
+ ***************************************************************************
+ */
+
+/**
+ * Create completion queue
+ *
+ * @v ibdev Infiniband device
+ * @v num_cqes Number of completion queue entries
+ * @v op Completion queue operations
+ * @ret cq New completion queue
+ */
+struct ib_completion_queue *
+ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes,
+ struct ib_completion_queue_operations *op ) {
+ struct ib_completion_queue *cq;
+ int rc;
+
+ DBGC ( ibdev, "IBDEV %p creating completion queue\n", ibdev );
+
+ /* Allocate and initialise data structure */
+ cq = zalloc ( sizeof ( *cq ) );
+ if ( ! cq )
+ goto err_alloc_cq;
+ cq->ibdev = ibdev;
+ list_add ( &cq->list, &ibdev->cqs );
+ cq->num_cqes = num_cqes;
+ INIT_LIST_HEAD ( &cq->work_queues );
+ cq->op = op;
+
+ /* Perform device-specific initialisation and get CQN */
+ if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p could not initialise completion "
+ "queue: %s\n", ibdev, strerror ( rc ) );
+ goto err_dev_create_cq;
+ }
+
+ DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) "
+ "with CQN %#lx\n", ibdev, num_cqes, cq,
+ ib_cq_get_drvdata ( cq ), cq->cqn );
+ return cq;
+
+ ibdev->op->destroy_cq ( ibdev, cq );
+ err_dev_create_cq:
+ list_del ( &cq->list );
+ free ( cq );
+ err_alloc_cq:
+ return NULL;
+}
+
+/**
+ * Destroy completion queue
+ *
+ * @v ibdev Infiniband device
+ * @v cq Completion queue
+ */
+void ib_destroy_cq ( struct ib_device *ibdev,
+ struct ib_completion_queue *cq ) {
+ DBGC ( ibdev, "IBDEV %p destroying completion queue %#lx\n",
+ ibdev, cq->cqn );
+ assert ( list_empty ( &cq->work_queues ) );
+ ibdev->op->destroy_cq ( ibdev, cq );
+ list_del ( &cq->list );
+ free ( cq );
+}
+
+/**
+ * Poll completion queue
+ *
+ * @v ibdev Infiniband device
+ * @v cq Completion queue
+ */
+void ib_poll_cq ( struct ib_device *ibdev,
+ struct ib_completion_queue *cq ) {
+ struct ib_work_queue *wq;
+
+ /* Poll completion queue */
+ ibdev->op->poll_cq ( ibdev, cq );
+
+ /* Refill receive work queues */
+ list_for_each_entry ( wq, &cq->work_queues, list ) {
+ if ( ! wq->is_send )
+ ib_refill_recv ( ibdev, wq->qp );
+ }
+}
+
+/***************************************************************************
+ *
+ * Work queues
+ *
+ ***************************************************************************
+ */
+
+/**
+ * Create queue pair
+ *
+ * @v ibdev Infiniband device
+ * @v type Queue pair type
+ * @v num_send_wqes Number of send work queue entries
+ * @v send_cq Send completion queue
+ * @v num_recv_wqes Number of receive work queue entries
+ * @v recv_cq Receive completion queue
+ * @v op Queue pair operations
+ * @ret qp Queue pair
+ *
+ * The queue pair will be left in the INIT state; you must call
+ * ib_modify_qp() before it is ready to use for sending and receiving.
+ */
+struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev,
+ enum ib_queue_pair_type type,
+ unsigned int num_send_wqes,
+ struct ib_completion_queue *send_cq,
+ unsigned int num_recv_wqes,
+ struct ib_completion_queue *recv_cq,
+ struct ib_queue_pair_operations *op ) {
+ struct ib_queue_pair *qp;
+ size_t total_size;
+ int rc;
+
+ DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev );
+
+ /* Allocate and initialise data structure */
+ total_size = ( sizeof ( *qp ) +
+ ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
+ ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
+ qp = zalloc ( total_size );
+ if ( ! qp )
+ goto err_alloc_qp;
+ qp->ibdev = ibdev;
+ list_add ( &qp->list, &ibdev->qps );
+ qp->type = type;
+ qp->send.qp = qp;
+ qp->send.is_send = 1;
+ qp->send.cq = send_cq;
+ list_add ( &qp->send.list, &send_cq->work_queues );
+ qp->send.psn = ( random() & 0xffffffUL );
+ qp->send.num_wqes = num_send_wqes;
+ qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) );
+ qp->recv.qp = qp;
+ qp->recv.cq = recv_cq;
+ list_add ( &qp->recv.list, &recv_cq->work_queues );
+ qp->recv.psn = ( random() & 0xffffffUL );
+ qp->recv.num_wqes = num_recv_wqes;
+ qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) +
+ ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ));
+ INIT_LIST_HEAD ( &qp->mgids );
+ qp->op = op;
+
+ /* Perform device-specific initialisation and get QPN */
+ if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p could not initialise queue pair: "
+ "%s\n", ibdev, strerror ( rc ) );
+ goto err_dev_create_qp;
+ }
+ DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n",
+ ibdev, qp, ib_qp_get_drvdata ( qp ), qp->qpn );
+ DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n",
+ ibdev, qp->qpn, num_send_wqes, qp->send.iobufs,
+ qp->recv.iobufs );
+ DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n",
+ ibdev, qp->qpn, num_recv_wqes, qp->recv.iobufs,
+ ( ( ( void * ) qp ) + total_size ) );
+
+ /* Calculate externally-visible QPN */
+ switch ( type ) {
+ case IB_QPT_SMI:
+ qp->ext_qpn = IB_QPN_SMI;
+ break;
+ case IB_QPT_GSI:
+ qp->ext_qpn = IB_QPN_GSI;
+ break;
+ default:
+ qp->ext_qpn = qp->qpn;
+ break;
+ }
+ if ( qp->ext_qpn != qp->qpn ) {
+ DBGC ( ibdev, "IBDEV %p QPN %#lx has external QPN %#lx\n",
+ ibdev, qp->qpn, qp->ext_qpn );
+ }
+
+ return qp;
+
+ ibdev->op->destroy_qp ( ibdev, qp );
+ err_dev_create_qp:
+ list_del ( &qp->send.list );
+ list_del ( &qp->recv.list );
+ list_del ( &qp->list );
+ free ( qp );
+ err_alloc_qp:
+ return NULL;
+}
+
+/**
+ * Modify queue pair
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ * @ret rc Return status code
+ */
+int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
+ int rc;
+
+ DBGC ( ibdev, "IBDEV %p modifying QPN %#lx\n", ibdev, qp->qpn );
+
+ if ( ( rc = ibdev->op->modify_qp ( ibdev, qp ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p could not modify QPN %#lx: %s\n",
+ ibdev, qp->qpn, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Destroy queue pair
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ */
+void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
+ struct io_buffer *iobuf;
+ unsigned int i;
+
+ DBGC ( ibdev, "IBDEV %p destroying QPN %#lx\n",
+ ibdev, qp->qpn );
+
+ assert ( list_empty ( &qp->mgids ) );
+
+ /* Perform device-specific destruction */
+ ibdev->op->destroy_qp ( ibdev, qp );
+
+ /* Complete any remaining I/O buffers with errors */
+ for ( i = 0 ; i < qp->send.num_wqes ; i++ ) {
+ if ( ( iobuf = qp->send.iobufs[i] ) != NULL )
+ ib_complete_send ( ibdev, qp, iobuf, -ECANCELED );
+ }
+ for ( i = 0 ; i < qp->recv.num_wqes ; i++ ) {
+ if ( ( iobuf = qp->recv.iobufs[i] ) != NULL ) {
+ ib_complete_recv ( ibdev, qp, NULL, NULL, iobuf,
+ -ECANCELED );
+ }
+ }
+
+ /* Remove work queues from completion queue */
+ list_del ( &qp->send.list );
+ list_del ( &qp->recv.list );
+
+ /* Free QP */
+ list_del ( &qp->list );
+ free ( qp );
+}
+
+/**
+ * Find queue pair by QPN
+ *
+ * @v ibdev Infiniband device
+ * @v qpn Queue pair number
+ * @ret qp Queue pair, or NULL
+ */
+struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev,
+ unsigned long qpn ) {
+ struct ib_queue_pair *qp;
+
+ list_for_each_entry ( qp, &ibdev->qps, list ) {
+ if ( ( qpn == qp->qpn ) || ( qpn == qp->ext_qpn ) )
+ return qp;
+ }
+ return NULL;
+}
+
+/**
+ * Find queue pair by multicast GID
+ *
+ * @v ibdev Infiniband device
+ * @v gid Multicast GID
+ * @ret qp Queue pair, or NULL
+ */
+struct ib_queue_pair * ib_find_qp_mgid ( struct ib_device *ibdev,
+ union ib_gid *gid ) {
+ struct ib_queue_pair *qp;
+ struct ib_multicast_gid *mgid;
+
+ list_for_each_entry ( qp, &ibdev->qps, list ) {
+ list_for_each_entry ( mgid, &qp->mgids, list ) {
+ if ( memcmp ( &mgid->gid, gid,
+ sizeof ( mgid->gid ) ) == 0 ) {
+ return qp;
+ }
+ }
+ }
+ return NULL;
+}
+
+/**
+ * Find work queue belonging to completion queue
+ *
+ * @v cq Completion queue
+ * @v qpn Queue pair number
+ * @v is_send Find send work queue (rather than receive)
+ * @ret wq Work queue, or NULL if not found
+ */
+struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
+ unsigned long qpn, int is_send ) {
+ struct ib_work_queue *wq;
+
+ list_for_each_entry ( wq, &cq->work_queues, list ) {
+ if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) )
+ return wq;
+ }
+ return NULL;
+}
+
+/**
+ * Post send work queue entry
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ * @v dest Destination address vector
+ * @v iobuf I/O buffer
+ * @ret rc Return status code
+ */
+int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
+ struct ib_address_vector *dest,
+ struct io_buffer *iobuf ) {
+ struct ib_address_vector dest_copy;
+ int rc;
+
+ /* Check queue fill level */
+ if ( qp->send.fill >= qp->send.num_wqes ) {
+ DBGC ( ibdev, "IBDEV %p QPN %#lx send queue full\n",
+ ibdev, qp->qpn );
+ return -ENOBUFS;
+ }
+
+ /* Use default address vector if none specified */
+ if ( ! dest )
+ dest = &qp->av;
+
+ /* Make modifiable copy of address vector */
+ memcpy ( &dest_copy, dest, sizeof ( dest_copy ) );
+ dest = &dest_copy;
+
+ /* Fill in optional parameters in address vector */
+ if ( ! dest->qkey )
+ dest->qkey = qp->qkey;
+ if ( ! dest->rate )
+ dest->rate = IB_RATE_2_5;
+
+ /* Post to hardware */
+ if ( ( rc = ibdev->op->post_send ( ibdev, qp, dest, iobuf ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p QPN %#lx could not post send WQE: "
+ "%s\n", ibdev, qp->qpn, strerror ( rc ) );
+ return rc;
+ }
+
+ qp->send.fill++;
+ return 0;
+}
+
+/**
+ * Post receive work queue entry
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ * @v iobuf I/O buffer
+ * @ret rc Return status code
+ */
+int ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
+ struct io_buffer *iobuf ) {
+ int rc;
+
+ /* Check packet length */
+ if ( iob_tailroom ( iobuf ) < IB_MAX_PAYLOAD_SIZE ) {
+ DBGC ( ibdev, "IBDEV %p QPN %#lx wrong RX buffer size (%zd)\n",
+ ibdev, qp->qpn, iob_tailroom ( iobuf ) );
+ return -EINVAL;
+ }
+
+ /* Check queue fill level */
+ if ( qp->recv.fill >= qp->recv.num_wqes ) {
+ DBGC ( ibdev, "IBDEV %p QPN %#lx receive queue full\n",
+ ibdev, qp->qpn );
+ return -ENOBUFS;
+ }
+
+ /* Post to hardware */
+ if ( ( rc = ibdev->op->post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p QPN %#lx could not post receive WQE: "
+ "%s\n", ibdev, qp->qpn, strerror ( rc ) );
+ return rc;
+ }
+
+ qp->recv.fill++;
+ return 0;
+}
+
+/**
+ * Complete send work queue entry
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ * @v iobuf I/O buffer
+ * @v rc Completion status code
+ */
+void ib_complete_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
+ struct io_buffer *iobuf, int rc ) {
+
+ if ( qp->send.cq->op->complete_send ) {
+ qp->send.cq->op->complete_send ( ibdev, qp, iobuf, rc );
+ } else {
+ free_iob ( iobuf );
+ }
+ qp->send.fill--;
+}
+
+/**
+ * Complete receive work queue entry
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ * @v dest Destination address vector, or NULL
+ * @v source Source address vector, or NULL
+ * @v iobuf I/O buffer
+ * @v rc Completion status code
+ */
+void ib_complete_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
+ struct ib_address_vector *dest,
+ struct ib_address_vector *source,
+ struct io_buffer *iobuf, int rc ) {
+
+ if ( qp->recv.cq->op->complete_recv ) {
+ qp->recv.cq->op->complete_recv ( ibdev, qp, dest, source,
+ iobuf, rc );
+ } else {
+ free_iob ( iobuf );
+ }
+ qp->recv.fill--;
+}
+
+/**
+ * Refill receive work queue
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ */
+void ib_refill_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
+ struct io_buffer *iobuf;
+ int rc;
+
+ /* Keep filling while unfilled entries remain */
+ while ( qp->recv.fill < qp->recv.num_wqes ) {
+
+ /* Allocate I/O buffer */
+ iobuf = qp->op->alloc_iob ( IB_MAX_PAYLOAD_SIZE );
+ if ( ! iobuf ) {
+ /* Non-fatal; we will refill on next attempt */
+ return;
+ }
+
+ /* Post I/O buffer */
+ if ( ( rc = ib_post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p could not refill: %s\n",
+ ibdev, strerror ( rc ) );
+ free_iob ( iobuf );
+ /* Give up */
+ return;
+ }
+ }
+}
+
+/***************************************************************************
+ *
+ * Link control
+ *
+ ***************************************************************************
+ */
+
+/**
+ * Get link state
+ *
+ * @v ibdev Infiniband device
+ * @ret rc Link status code
+ */
+int ib_link_rc ( struct ib_device *ibdev ) {
+ switch ( ibdev->port_state ) {
+ case IB_PORT_STATE_DOWN: return -ENOTCONN;
+ case IB_PORT_STATE_INIT: return -EINPROGRESS_INIT;
+ case IB_PORT_STATE_ARMED: return -EINPROGRESS_ARMED;
+ case IB_PORT_STATE_ACTIVE: return 0;
+ default: return -EINVAL;
+ }
+}
+
+/**
+ * Textual representation of Infiniband link state
+ *
+ * @v ibdev Infiniband device
+ * @ret link_text Link state text
+ */
+static const char * ib_link_state_text ( struct ib_device *ibdev ) {
+ switch ( ibdev->port_state ) {
+ case IB_PORT_STATE_DOWN: return "DOWN";
+ case IB_PORT_STATE_INIT: return "INIT";
+ case IB_PORT_STATE_ARMED: return "ARMED";
+ case IB_PORT_STATE_ACTIVE: return "ACTIVE";
+ default: return "UNKNOWN";
+ }
+}
+
+/**
+ * Notify drivers of Infiniband device or link state change
+ *
+ * @v ibdev Infiniband device
+ */
+static void ib_notify ( struct ib_device *ibdev ) {
+ struct ib_driver *driver;
+
+ for_each_table_entry ( driver, IB_DRIVERS )
+ driver->notify ( ibdev );
+}
+
+/**
+ * Notify of Infiniband link state change
+ *
+ * @v ibdev Infiniband device
+ */
+void ib_link_state_changed ( struct ib_device *ibdev ) {
+
+ DBGC ( ibdev, "IBDEV %p link state is %s\n",
+ ibdev, ib_link_state_text ( ibdev ) );
+
+ /* Notify drivers of link state change */
+ ib_notify ( ibdev );
+}
+
+/**
+ * Open port
+ *
+ * @v ibdev Infiniband device
+ * @ret rc Return status code
+ */
+int ib_open ( struct ib_device *ibdev ) {
+ int rc;
+
+ /* Increment device open request counter */
+ if ( ibdev->open_count++ > 0 ) {
+ /* Device was already open; do nothing */
+ return 0;
+ }
+
+ /* Open device */
+ if ( ( rc = ibdev->op->open ( ibdev ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p could not open: %s\n",
+ ibdev, strerror ( rc ) );
+ goto err_open;
+ }
+
+ /* Create subnet management interface */
+ ibdev->smi = ib_create_mi ( ibdev, IB_QPT_SMI );
+ if ( ! ibdev->smi ) {
+ DBGC ( ibdev, "IBDEV %p could not create SMI\n", ibdev );
+ rc = -ENOMEM;
+ goto err_create_smi;
+ }
+
+ /* Create subnet management agent */
+ if ( ( rc = ib_create_sma ( ibdev, ibdev->smi ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p could not create SMA: %s\n",
+ ibdev, strerror ( rc ) );
+ goto err_create_sma;
+ }
+
+ /* Create general services interface */
+ ibdev->gsi = ib_create_mi ( ibdev, IB_QPT_GSI );
+ if ( ! ibdev->gsi ) {
+ DBGC ( ibdev, "IBDEV %p could not create GSI\n", ibdev );
+ rc = -ENOMEM;
+ goto err_create_gsi;
+ }
+
+ /* Add to head of open devices list */
+ list_add ( &ibdev->open_list, &open_ib_devices );
+
+ /* Notify drivers of device state change */
+ ib_notify ( ibdev );
+
+ assert ( ibdev->open_count == 1 );
+ return 0;
+
+ ib_destroy_mi ( ibdev, ibdev->gsi );
+ err_create_gsi:
+ ib_destroy_sma ( ibdev, ibdev->smi );
+ err_create_sma:
+ ib_destroy_mi ( ibdev, ibdev->smi );
+ err_create_smi:
+ ibdev->op->close ( ibdev );
+ err_open:
+ assert ( ibdev->open_count == 1 );
+ ibdev->open_count = 0;
+ return rc;
+}
+
+/**
+ * Close port
+ *
+ * @v ibdev Infiniband device
+ */
+void ib_close ( struct ib_device *ibdev ) {
+
+ /* Decrement device open request counter */
+ ibdev->open_count--;
+
+ /* Close device if this was the last remaining requested opening */
+ if ( ibdev->open_count == 0 ) {
+ ib_notify ( ibdev );
+ list_del ( &ibdev->open_list );
+ ib_destroy_mi ( ibdev, ibdev->gsi );
+ ib_destroy_sma ( ibdev, ibdev->smi );
+ ib_destroy_mi ( ibdev, ibdev->smi );
+ ibdev->op->close ( ibdev );
+ ibdev->port_state = IB_PORT_STATE_DOWN;
+ }
+}
+
+/***************************************************************************
+ *
+ * Multicast
+ *
+ ***************************************************************************
+ */
+
+/**
+ * Attach to multicast group
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ * @v gid Multicast GID
+ * @ret rc Return status code
+ *
+ * Note that this function handles only the local device's attachment
+ * to the multicast GID; it does not issue the relevant MADs to join
+ * the multicast group on the subnet.
+ */
+int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
+ union ib_gid *gid ) {
+ struct ib_multicast_gid *mgid;
+ int rc;
+
+ /* Add to software multicast GID list */
+ mgid = zalloc ( sizeof ( *mgid ) );
+ if ( ! mgid ) {
+ rc = -ENOMEM;
+ goto err_alloc_mgid;
+ }
+ memcpy ( &mgid->gid, gid, sizeof ( mgid->gid ) );
+ list_add ( &mgid->list, &qp->mgids );
+
+ /* Add to hardware multicast GID list */
+ if ( ( rc = ibdev->op->mcast_attach ( ibdev, qp, gid ) ) != 0 )
+ goto err_dev_mcast_attach;
+
+ return 0;
+
+ err_dev_mcast_attach:
+ list_del ( &mgid->list );
+ free ( mgid );
+ err_alloc_mgid:
+ return rc;
+}
+
+/**
+ * Detach from multicast group
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ * @v gid Multicast GID
+ */
+void ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
+ union ib_gid *gid ) {
+ struct ib_multicast_gid *mgid;
+
+ /* Remove from hardware multicast GID list */
+ ibdev->op->mcast_detach ( ibdev, qp, gid );
+
+ /* Remove from software multicast GID list */
+ list_for_each_entry ( mgid, &qp->mgids, list ) {
+ if ( memcmp ( &mgid->gid, gid, sizeof ( mgid->gid ) ) == 0 ) {
+ list_del ( &mgid->list );
+ free ( mgid );
+ break;
+ }
+ }
+}
+
+/***************************************************************************
+ *
+ * Miscellaneous
+ *
+ ***************************************************************************
+ */
+
+/**
+ * Count Infiniband HCA ports
+ *
+ * @v ibdev Infiniband device
+ * @ret num_ports Number of ports
+ */
+int ib_count_ports ( struct ib_device *ibdev ) {
+ struct ib_device *tmp;
+ int num_ports = 0;
+
+ /* Search for IB devices with the same physical device to
+ * identify port count.
+ */
+ for_each_ibdev ( tmp ) {
+ if ( tmp->dev == ibdev->dev )
+ num_ports++;
+ }
+ return num_ports;
+}
+
+/**
+ * Set port information
+ *
+ * @v ibdev Infiniband device
+ * @v mad Set port information MAD
+ */
+int ib_set_port_info ( struct ib_device *ibdev, union ib_mad *mad ) {
+ int rc;
+
+ /* Adapters with embedded SMAs do not need to support this method */
+ if ( ! ibdev->op->set_port_info ) {
+ DBGC ( ibdev, "IBDEV %p does not support setting port "
+ "information\n", ibdev );
+ return -ENOTSUP;
+ }
+
+ if ( ( rc = ibdev->op->set_port_info ( ibdev, mad ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p could not set port information: %s\n",
+ ibdev, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+};
+
+/**
+ * Set partition key table
+ *
+ * @v ibdev Infiniband device
+ * @v mad Set partition key table MAD
+ */
+int ib_set_pkey_table ( struct ib_device *ibdev, union ib_mad *mad ) {
+ int rc;
+
+ /* Adapters with embedded SMAs do not need to support this method */
+ if ( ! ibdev->op->set_pkey_table ) {
+ DBGC ( ibdev, "IBDEV %p does not support setting partition "
+ "key table\n", ibdev );
+ return -ENOTSUP;
+ }
+
+ if ( ( rc = ibdev->op->set_pkey_table ( ibdev, mad ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p could not set partition key table: "
+ "%s\n", ibdev, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+};
+
+/***************************************************************************
+ *
+ * Event queues
+ *
+ ***************************************************************************
+ */
+
+/**
+ * Poll event queue
+ *
+ * @v ibdev Infiniband device
+ */
+void ib_poll_eq ( struct ib_device *ibdev ) {
+ struct ib_completion_queue *cq;
+
+ /* Poll device's event queue */
+ ibdev->op->poll_eq ( ibdev );
+
+ /* Poll all completion queues */
+ list_for_each_entry ( cq, &ibdev->cqs, list )
+ ib_poll_cq ( ibdev, cq );
+}
+
+/**
+ * Single-step the Infiniband event queue
+ *
+ * @v process Infiniband event queue process
+ */
+static void ib_step ( struct process *process __unused ) {
+ struct ib_device *ibdev;
+
+ list_for_each_entry ( ibdev, &open_ib_devices, open_list )
+ ib_poll_eq ( ibdev );
+}
+
+/** Infiniband event queue process */
+PERMANENT_PROCESS ( ib_process, ib_step );
+
+/***************************************************************************
+ *
+ * Infiniband device creation/destruction
+ *
+ ***************************************************************************
+ */
+
+/**
+ * Allocate Infiniband device
+ *
+ * @v priv_size Size of driver private data area
+ * @ret ibdev Infiniband device, or NULL
+ */
+struct ib_device * alloc_ibdev ( size_t priv_size ) {
+ struct ib_device *ibdev;
+ void *drv_priv;
+ size_t total_len;
+
+ total_len = ( sizeof ( *ibdev ) + priv_size );
+ ibdev = zalloc ( total_len );
+ if ( ibdev ) {
+ drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) );
+ ib_set_drvdata ( ibdev, drv_priv );
+ INIT_LIST_HEAD ( &ibdev->list );
+ INIT_LIST_HEAD ( &ibdev->open_list );
+ INIT_LIST_HEAD ( &ibdev->cqs );
+ INIT_LIST_HEAD ( &ibdev->qps );
+ ibdev->port_state = IB_PORT_STATE_DOWN;
+ ibdev->lid = IB_LID_NONE;
+ ibdev->pkey = IB_PKEY_DEFAULT;
+ }
+ return ibdev;
+}
+
+/**
+ * Register Infiniband device
+ *
+ * @v ibdev Infiniband device
+ * @ret rc Return status code
+ */
+int register_ibdev ( struct ib_device *ibdev ) {
+ struct ib_driver *driver;
+ int rc;
+
+ /* Add to device list */
+ ibdev_get ( ibdev );
+ list_add_tail ( &ibdev->list, &ib_devices );
+ DBGC ( ibdev, "IBDEV %p registered (phys %s)\n", ibdev,
+ ibdev->dev->name );
+
+ /* Probe device */
+ for_each_table_entry ( driver, IB_DRIVERS ) {
+ if ( ( rc = driver->probe ( ibdev ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p could not add %s device: %s\n",
+ ibdev, driver->name, strerror ( rc ) );
+ goto err_probe;
+ }
+ }
+
+ return 0;
+
+ err_probe:
+ for_each_table_entry_continue_reverse ( driver, IB_DRIVERS )
+ driver->remove ( ibdev );
+ list_del ( &ibdev->list );
+ ibdev_put ( ibdev );
+ return rc;
+}
+
+/**
+ * Unregister Infiniband device
+ *
+ * @v ibdev Infiniband device
+ */
+void unregister_ibdev ( struct ib_device *ibdev ) {
+ struct ib_driver *driver;
+
+ /* Remove device */
+ for_each_table_entry_reverse ( driver, IB_DRIVERS )
+ driver->remove ( ibdev );
+
+ /* Remove from device list */
+ list_del ( &ibdev->list );
+ ibdev_put ( ibdev );
+ DBGC ( ibdev, "IBDEV %p unregistered\n", ibdev );
+}
+
+/**
+ * Find Infiniband device by GID
+ *
+ * @v gid GID
+ * @ret ibdev Infiniband device, or NULL
+ */
+struct ib_device * find_ibdev ( union ib_gid *gid ) {
+ struct ib_device *ibdev;
+
+ for_each_ibdev ( ibdev ) {
+ if ( memcmp ( gid, &ibdev->gid, sizeof ( *gid ) ) == 0 )
+ return ibdev;
+ }
+ return NULL;
+}
+
+/**
+ * Get most recently opened Infiniband device
+ *
+ * @ret ibdev Most recently opened Infiniband device, or NULL
+ */
+struct ib_device * last_opened_ibdev ( void ) {
+ struct ib_device *ibdev;
+
+ ibdev = list_first_entry ( &open_ib_devices, struct ib_device,
+ open_list );
+ if ( ! ibdev )
+ return NULL;
+
+ assert ( ibdev->open_count != 0 );
+ return ibdev;
+}
+
+/* Drag in IPoIB */
+REQUIRE_OBJECT ( ipoib );
diff --git a/qemu/roms/ipxe/src/net/infiniband/ib_cm.c b/qemu/roms/ipxe/src/net/infiniband/ib_cm.c
new file mode 100644
index 000000000..797639bc8
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/infiniband/ib_cm.c
@@ -0,0 +1,496 @@
+/*
+ * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <byteswap.h>
+#include <errno.h>
+#include <assert.h>
+#include <ipxe/infiniband.h>
+#include <ipxe/ib_mi.h>
+#include <ipxe/ib_pathrec.h>
+#include <ipxe/ib_cm.h>
+
+/**
+ * @file
+ *
+ * Infiniband communication management
+ *
+ */
+
+/** List of connections */
+static LIST_HEAD ( ib_cm_conns );
+
+/**
+ * Find connection by local communication ID
+ *
+ * @v local_id Local communication ID
+ * @ret conn Connection, or NULL
+ */
+static struct ib_connection * ib_cm_find ( uint32_t local_id ) {
+ struct ib_connection *conn;
+
+ list_for_each_entry ( conn, &ib_cm_conns, list ) {
+ if ( conn->local_id == local_id )
+ return conn;
+ }
+ return NULL;
+}
+
+/**
+ * Send "ready to use" response
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v av Address vector
+ * @v local_id Local communication ID
+ * @v remote_id Remote communication ID
+ * @ret rc Return status code
+ */
+static int ib_cm_send_rtu ( struct ib_device *ibdev,
+ struct ib_mad_interface *mi,
+ struct ib_address_vector *av,
+ uint32_t local_id, uint32_t remote_id ) {
+ union ib_mad mad;
+ struct ib_cm_ready_to_use *rtu = &mad.cm.cm_data.ready_to_use;
+ int rc;
+
+ /* Construct "ready to use" response */
+ memset ( &mad, 0, sizeof ( mad ) );
+ mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
+ mad.hdr.class_version = IB_CM_CLASS_VERSION;
+ mad.hdr.method = IB_MGMT_METHOD_SEND;
+ mad.hdr.attr_id = htons ( IB_CM_ATTR_READY_TO_USE );
+ rtu->local_id = htonl ( local_id );
+ rtu->remote_id = htonl ( remote_id );
+ if ( ( rc = ib_mi_send ( ibdev, mi, &mad, av ) ) != 0 ){
+ DBG ( "CM could not send RTU: %s\n", strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Handle duplicate connection replies
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v mad Received MAD
+ * @v av Source address vector
+ * @ret rc Return status code
+ *
+ * If a "ready to use" MAD is lost, the peer may resend the connection
+ * reply. We have to respond to these with duplicate "ready to use"
+ * MADs, otherwise the peer may time out and drop the connection.
+ */
+static void ib_cm_recv_rep ( struct ib_device *ibdev,
+ struct ib_mad_interface *mi,
+ union ib_mad *mad,
+ struct ib_address_vector *av ) {
+ struct ib_cm_connect_reply *rep = &mad->cm.cm_data.connect_reply;
+ struct ib_connection *conn;
+ uint32_t local_id = ntohl ( rep->remote_id );
+ int rc;
+
+ /* Identify connection */
+ conn = ib_cm_find ( local_id );
+ if ( conn ) {
+ /* Try to send "ready to use" reply */
+ if ( ( rc = ib_cm_send_rtu ( ibdev, mi, av, conn->local_id,
+ conn->remote_id ) ) != 0 ) {
+ /* Ignore errors; the remote end will retry */
+ }
+ } else {
+ DBG ( "CM unidentified connection %08x\n", local_id );
+ }
+}
+
+/**
+ * Send reply to disconnection request
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v av Address vector
+ * @v local_id Local communication ID
+ * @v remote_id Remote communication ID
+ * @ret rc Return status code
+ */
+static int ib_cm_send_drep ( struct ib_device *ibdev,
+ struct ib_mad_interface *mi,
+ struct ib_address_vector *av,
+ uint32_t local_id, uint32_t remote_id ) {
+ union ib_mad mad;
+ struct ib_cm_disconnect_reply *drep = &mad.cm.cm_data.disconnect_reply;
+ int rc;
+
+ /* Construct reply to disconnection request */
+ memset ( &mad, 0, sizeof ( mad ) );
+ mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
+ mad.hdr.class_version = IB_CM_CLASS_VERSION;
+ mad.hdr.method = IB_MGMT_METHOD_SEND;
+ mad.hdr.attr_id = htons ( IB_CM_ATTR_DISCONNECT_REPLY );
+ drep->local_id = htonl ( local_id );
+ drep->remote_id = htonl ( remote_id );
+ if ( ( rc = ib_mi_send ( ibdev, mi, &mad, av ) ) != 0 ){
+ DBG ( "CM could not send DREP: %s\n", strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Handle disconnection requests
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v mad Received MAD
+ * @v av Source address vector
+ * @ret rc Return status code
+ */
+static void ib_cm_recv_dreq ( struct ib_device *ibdev,
+ struct ib_mad_interface *mi,
+ union ib_mad *mad,
+ struct ib_address_vector *av ) {
+ struct ib_cm_disconnect_request *dreq =
+ &mad->cm.cm_data.disconnect_request;
+ struct ib_connection *conn;
+ uint32_t local_id = ntohl ( dreq->remote_id );
+ uint32_t remote_id = ntohl ( dreq->local_id );
+ int rc;
+
+ /* Identify connection */
+ conn = ib_cm_find ( local_id );
+ if ( conn ) {
+ /* Notify upper layer */
+ conn->op->changed ( ibdev, conn->qp, conn, -ENOTCONN,
+ &dreq->private_data,
+ sizeof ( dreq->private_data ) );
+ } else {
+ DBG ( "CM unidentified connection %08x\n", local_id );
+ }
+
+ /* Send reply */
+ if ( ( rc = ib_cm_send_drep ( ibdev, mi, av, local_id,
+ remote_id ) ) != 0 ) {
+ /* Ignore errors; the remote end will retry */
+ }
+};
+
+/** Communication management agents */
+struct ib_mad_agent ib_cm_agent[] __ib_mad_agent = {
+ {
+ .mgmt_class = IB_MGMT_CLASS_CM,
+ .class_version = IB_CM_CLASS_VERSION,
+ .attr_id = htons ( IB_CM_ATTR_CONNECT_REPLY ),
+ .handle = ib_cm_recv_rep,
+ },
+ {
+ .mgmt_class = IB_MGMT_CLASS_CM,
+ .class_version = IB_CM_CLASS_VERSION,
+ .attr_id = htons ( IB_CM_ATTR_DISCONNECT_REQUEST ),
+ .handle = ib_cm_recv_dreq,
+ },
+};
+
+/**
+ * Convert connection rejection reason to return status code
+ *
+ * @v reason Rejection reason (in network byte order)
+ * @ret rc Return status code
+ */
+static int ib_cm_rejection_reason_to_rc ( uint16_t reason ) {
+ switch ( reason ) {
+ case htons ( IB_CM_REJECT_BAD_SERVICE_ID ) :
+ return -ENODEV;
+ case htons ( IB_CM_REJECT_STALE_CONN ) :
+ return -EALREADY;
+ case htons ( IB_CM_REJECT_CONSUMER ) :
+ return -ENOTTY;
+ default:
+ return -EPERM;
+ }
+}
+
+/**
+ * Handle connection request transaction completion
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v madx Management transaction
+ * @v rc Status code
+ * @v mad Received MAD (or NULL on error)
+ * @v av Source address vector (or NULL on error)
+ */
+static void ib_cm_req_complete ( struct ib_device *ibdev,
+ struct ib_mad_interface *mi,
+ struct ib_mad_transaction *madx,
+ int rc, union ib_mad *mad,
+ struct ib_address_vector *av ) {
+ struct ib_connection *conn = ib_madx_get_ownerdata ( madx );
+ struct ib_queue_pair *qp = conn->qp;
+ struct ib_cm_common *common = &mad->cm.cm_data.common;
+ struct ib_cm_connect_reply *rep = &mad->cm.cm_data.connect_reply;
+ struct ib_cm_connect_reject *rej = &mad->cm.cm_data.connect_reject;
+ void *private_data = NULL;
+ size_t private_data_len = 0;
+
+ /* Report failures */
+ if ( ( rc == 0 ) && ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) ))
+ rc = -EIO;
+ if ( rc != 0 ) {
+ DBGC ( conn, "CM %p connection request failed: %s\n",
+ conn, strerror ( rc ) );
+ goto out;
+ }
+
+ /* Record remote communication ID */
+ conn->remote_id = ntohl ( common->local_id );
+
+ /* Handle response */
+ switch ( mad->hdr.attr_id ) {
+
+ case htons ( IB_CM_ATTR_CONNECT_REPLY ) :
+ /* Extract fields */
+ qp->av.qpn = ( ntohl ( rep->local_qpn ) >> 8 );
+ qp->send.psn = ( ntohl ( rep->starting_psn ) >> 8 );
+ private_data = &rep->private_data;
+ private_data_len = sizeof ( rep->private_data );
+ DBGC ( conn, "CM %p connected to QPN %lx PSN %x\n",
+ conn, qp->av.qpn, qp->send.psn );
+
+ /* Modify queue pair */
+ if ( ( rc = ib_modify_qp ( ibdev, qp ) ) != 0 ) {
+ DBGC ( conn, "CM %p could not modify queue pair: %s\n",
+ conn, strerror ( rc ) );
+ goto out;
+ }
+
+ /* Send "ready to use" reply */
+ if ( ( rc = ib_cm_send_rtu ( ibdev, mi, av, conn->local_id,
+ conn->remote_id ) ) != 0 ) {
+ /* Treat as non-fatal */
+ rc = 0;
+ }
+ break;
+
+ case htons ( IB_CM_ATTR_CONNECT_REJECT ) :
+ /* Extract fields */
+ DBGC ( conn, "CM %p connection rejected (reason %d)\n",
+ conn, ntohs ( rej->reason ) );
+ /* Private data is valid only for a Consumer Reject */
+ if ( rej->reason == htons ( IB_CM_REJECT_CONSUMER ) ) {
+ private_data = &rej->private_data;
+ private_data_len = sizeof ( rej->private_data );
+ }
+ rc = ib_cm_rejection_reason_to_rc ( rej->reason );
+ break;
+
+ default:
+ DBGC ( conn, "CM %p unexpected response (attribute %04x)\n",
+ conn, ntohs ( mad->hdr.attr_id ) );
+ rc = -ENOTSUP;
+ break;
+ }
+
+ out:
+ /* Destroy the completed transaction */
+ ib_destroy_madx ( ibdev, ibdev->gsi, madx );
+ conn->madx = NULL;
+
+ /* Hand off to the upper completion handler */
+ conn->op->changed ( ibdev, qp, conn, rc, private_data,
+ private_data_len );
+}
+
+/** Connection request operations */
+static struct ib_mad_transaction_operations ib_cm_req_op = {
+ .complete = ib_cm_req_complete,
+};
+
+/**
+ * Handle connection path transaction completion
+ *
+ * @v ibdev Infiniband device
+ * @v path Path
+ * @v rc Status code
+ * @v av Address vector, or NULL on error
+ */
+static void ib_cm_path_complete ( struct ib_device *ibdev,
+ struct ib_path *path, int rc,
+ struct ib_address_vector *av ) {
+ struct ib_connection *conn = ib_path_get_ownerdata ( path );
+ struct ib_queue_pair *qp = conn->qp;
+ union ib_mad mad;
+ struct ib_cm_connect_request *req = &mad.cm.cm_data.connect_request;
+ size_t private_data_len;
+
+ /* Report failures */
+ if ( rc != 0 ) {
+ DBGC ( conn, "CM %p path lookup failed: %s\n",
+ conn, strerror ( rc ) );
+ conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 );
+ goto out;
+ }
+
+ /* Update queue pair peer path */
+ memcpy ( &qp->av, av, sizeof ( qp->av ) );
+
+ /* Construct connection request */
+ memset ( &mad, 0, sizeof ( mad ) );
+ mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
+ mad.hdr.class_version = IB_CM_CLASS_VERSION;
+ mad.hdr.method = IB_MGMT_METHOD_SEND;
+ mad.hdr.attr_id = htons ( IB_CM_ATTR_CONNECT_REQUEST );
+ req->local_id = htonl ( conn->local_id );
+ memcpy ( &req->service_id, &conn->service_id,
+ sizeof ( req->service_id ) );
+ memcpy ( &req->local_ca, &ibdev->node_guid, sizeof ( req->local_ca ) );
+ req->local_qpn__responder_resources = htonl ( ( qp->qpn << 8 ) | 1 );
+ req->local_eecn__initiator_depth = htonl ( ( 0 << 8 ) | 1 );
+ req->remote_eecn__remote_timeout__service_type__ee_flow_ctrl =
+ htonl ( ( 0x14 << 3 ) | ( IB_CM_TRANSPORT_RC << 1 ) |
+ ( 0 << 0 ) );
+ req->starting_psn__local_timeout__retry_count =
+ htonl ( ( qp->recv.psn << 8 ) | ( 0x14 << 3 ) |
+ ( 0x07 << 0 ) );
+ req->pkey = htons ( ibdev->pkey );
+ req->payload_mtu__rdc_exists__rnr_retry =
+ ( ( IB_MTU_2048 << 4 ) | ( 1 << 3 ) | ( 0x07 << 0 ) );
+ req->max_cm_retries__srq = ( ( 0x0f << 4 ) | ( 0 << 3 ) );
+ req->primary.local_lid = htons ( ibdev->lid );
+ req->primary.remote_lid = htons ( conn->qp->av.lid );
+ memcpy ( &req->primary.local_gid, &ibdev->gid,
+ sizeof ( req->primary.local_gid ) );
+ memcpy ( &req->primary.remote_gid, &conn->qp->av.gid,
+ sizeof ( req->primary.remote_gid ) );
+ req->primary.flow_label__rate =
+ htonl ( ( 0 << 12 ) | ( conn->qp->av.rate << 0 ) );
+ req->primary.hop_limit = 0;
+ req->primary.sl__subnet_local =
+ ( ( conn->qp->av.sl << 4 ) | ( 1 << 3 ) );
+ req->primary.local_ack_timeout = ( 0x13 << 3 );
+ private_data_len = conn->private_data_len;
+ if ( private_data_len > sizeof ( req->private_data ) )
+ private_data_len = sizeof ( req->private_data );
+ memcpy ( &req->private_data, &conn->private_data, private_data_len );
+
+ /* Create connection request */
+ av->qpn = IB_QPN_GSI;
+ av->qkey = IB_QKEY_GSI;
+ conn->madx = ib_create_madx ( ibdev, ibdev->gsi, &mad, av,
+ &ib_cm_req_op );
+ if ( ! conn->madx ) {
+ DBGC ( conn, "CM %p could not create connection request\n",
+ conn );
+ conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 );
+ goto out;
+ }
+ ib_madx_set_ownerdata ( conn->madx, conn );
+
+ out:
+ /* Destroy the completed transaction */
+ ib_destroy_path ( ibdev, path );
+ conn->path = NULL;
+}
+
+/** Connection path operations */
+static struct ib_path_operations ib_cm_path_op = {
+ .complete = ib_cm_path_complete,
+};
+
+/**
+ * Create connection to remote QP
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ * @v dgid Target GID
+ * @v service_id Target service ID
+ * @v private_data Connection request private data
+ * @v private_data_len Length of connection request private data
+ * @v op Connection operations
+ * @ret conn Connection
+ */
+struct ib_connection *
+ib_create_conn ( struct ib_device *ibdev, struct ib_queue_pair *qp,
+ union ib_gid *dgid, union ib_guid *service_id,
+ void *private_data, size_t private_data_len,
+ struct ib_connection_operations *op ) {
+ struct ib_connection *conn;
+
+ /* Allocate and initialise request */
+ conn = zalloc ( sizeof ( *conn ) + private_data_len );
+ if ( ! conn )
+ goto err_alloc_conn;
+ conn->ibdev = ibdev;
+ conn->qp = qp;
+ memset ( &qp->av, 0, sizeof ( qp->av ) );
+ qp->av.gid_present = 1;
+ memcpy ( &qp->av.gid, dgid, sizeof ( qp->av.gid ) );
+ conn->local_id = random();
+ memcpy ( &conn->service_id, service_id, sizeof ( conn->service_id ) );
+ conn->op = op;
+ conn->private_data_len = private_data_len;
+ memcpy ( &conn->private_data, private_data, private_data_len );
+
+ /* Create path */
+ conn->path = ib_create_path ( ibdev, &qp->av, &ib_cm_path_op );
+ if ( ! conn->path )
+ goto err_create_path;
+ ib_path_set_ownerdata ( conn->path, conn );
+
+ /* Add to list of connections */
+ list_add ( &conn->list, &ib_cm_conns );
+
+ DBGC ( conn, "CM %p created for IBDEV %p QPN %lx\n",
+ conn, ibdev, qp->qpn );
+ DBGC ( conn, "CM %p connecting to " IB_GID_FMT " " IB_GUID_FMT "\n",
+ conn, IB_GID_ARGS ( dgid ), IB_GUID_ARGS ( service_id ) );
+
+ return conn;
+
+ ib_destroy_path ( ibdev, conn->path );
+ err_create_path:
+ free ( conn );
+ err_alloc_conn:
+ return NULL;
+}
+
+/**
+ * Destroy connection to remote QP
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ * @v conn Connection
+ */
+void ib_destroy_conn ( struct ib_device *ibdev,
+ struct ib_queue_pair *qp __unused,
+ struct ib_connection *conn ) {
+
+ list_del ( &conn->list );
+ if ( conn->madx )
+ ib_destroy_madx ( ibdev, ibdev->gsi, conn->madx );
+ if ( conn->path )
+ ib_destroy_path ( ibdev, conn->path );
+ free ( conn );
+}
diff --git a/qemu/roms/ipxe/src/net/infiniband/ib_cmrc.c b/qemu/roms/ipxe/src/net/infiniband/ib_cmrc.c
new file mode 100644
index 000000000..1cc0fcfef
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/infiniband/ib_cmrc.c
@@ -0,0 +1,445 @@
+/*
+ * Copyright (C) 2009 Fen Systems Ltd <mbrown@fensystems.co.uk>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+FILE_LICENCE ( BSD2 );
+
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/xfer.h>
+#include <ipxe/process.h>
+#include <ipxe/infiniband.h>
+#include <ipxe/ib_cm.h>
+#include <ipxe/ib_cmrc.h>
+
+/**
+ * @file
+ *
+ * Infiniband Communication-managed Reliable Connections
+ *
+ */
+
+/** CMRC number of send WQEs
+ *
+ * This is a policy decision.
+ */
+#define IB_CMRC_NUM_SEND_WQES 4
+
+/** CMRC number of receive WQEs
+ *
+ * This is a policy decision.
+ */
+#define IB_CMRC_NUM_RECV_WQES 2
+
+/** CMRC number of completion queue entries
+ *
+ * This is a policy decision
+ */
+#define IB_CMRC_NUM_CQES 8
+
+/** An Infiniband Communication-Managed Reliable Connection */
+struct ib_cmrc_connection {
+ /** Reference count */
+ struct refcnt refcnt;
+ /** Data transfer interface */
+ struct interface xfer;
+ /** Infiniband device */
+ struct ib_device *ibdev;
+ /** Completion queue */
+ struct ib_completion_queue *cq;
+ /** Queue pair */
+ struct ib_queue_pair *qp;
+ /** Connection */
+ struct ib_connection *conn;
+ /** Destination GID */
+ union ib_gid dgid;
+ /** Service ID */
+ union ib_guid service_id;
+ /** QP is connected */
+ int connected;
+ /** Shutdown process */
+ struct process shutdown;
+};
+
+/**
+ * Shut down CMRC connection gracefully
+ *
+ * @v cmrc Communication-Managed Reliable Connection
+ *
+ * The Infiniband data structures are not reference-counted or
+ * guarded. It is therefore unsafe to shut them down while we may be
+ * in the middle of a callback from the Infiniband stack (e.g. in a
+ * receive completion handler).
+ *
+ * This shutdown process will run some time after the call to
+ * ib_cmrc_close(), after control has returned out of the Infiniband
+ * core, and will shut down the Infiniband interfaces cleanly.
+ *
+ * The shutdown process holds an implicit reference on the CMRC
+ * connection, ensuring that the structure is not freed before the
+ * shutdown process has run.
+ */
+static void ib_cmrc_shutdown ( struct ib_cmrc_connection *cmrc ) {
+
+ DBGC ( cmrc, "CMRC %p shutting down\n", cmrc );
+
+ /* Shut down Infiniband interface */
+ ib_destroy_conn ( cmrc->ibdev, cmrc->qp, cmrc->conn );
+ ib_destroy_qp ( cmrc->ibdev, cmrc->qp );
+ ib_destroy_cq ( cmrc->ibdev, cmrc->cq );
+ ib_close ( cmrc->ibdev );
+
+ /* Drop the remaining reference */
+ ref_put ( &cmrc->refcnt );
+}
+
+/**
+ * Close CMRC connection
+ *
+ * @v cmrc Communication-Managed Reliable Connection
+ * @v rc Reason for close
+ */
+static void ib_cmrc_close ( struct ib_cmrc_connection *cmrc, int rc ) {
+
+ /* Close data transfer interface */
+ intf_shutdown ( &cmrc->xfer, rc );
+
+ /* Schedule shutdown process */
+ process_add ( &cmrc->shutdown );
+}
+
+/**
+ * Handle change of CMRC connection status
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ * @v conn Connection
+ * @v rc_cm Connection status code
+ * @v private_data Private data, if available
+ * @v private_data_len Length of private data
+ */
+static void ib_cmrc_changed ( struct ib_device *ibdev __unused,
+ struct ib_queue_pair *qp,
+ struct ib_connection *conn __unused, int rc_cm,
+ void *private_data, size_t private_data_len ) {
+ struct ib_cmrc_connection *cmrc = ib_qp_get_ownerdata ( qp );
+ int rc_xfer;
+
+ /* Record connection status */
+ if ( rc_cm == 0 ) {
+ DBGC ( cmrc, "CMRC %p connected\n", cmrc );
+ cmrc->connected = 1;
+ } else {
+ DBGC ( cmrc, "CMRC %p disconnected: %s\n",
+ cmrc, strerror ( rc_cm ) );
+ cmrc->connected = 0;
+ }
+
+ /* Pass up any private data */
+ DBGC2 ( cmrc, "CMRC %p received private data:\n", cmrc );
+ DBGC2_HDA ( cmrc, 0, private_data, private_data_len );
+ if ( private_data &&
+ ( rc_xfer = xfer_deliver_raw ( &cmrc->xfer, private_data,
+ private_data_len ) ) != 0 ) {
+ DBGC ( cmrc, "CMRC %p could not deliver private data: %s\n",
+ cmrc, strerror ( rc_xfer ) );
+ ib_cmrc_close ( cmrc, rc_xfer );
+ return;
+ }
+
+ /* Notify upper connection of window change */
+ xfer_window_changed ( &cmrc->xfer );
+
+ /* If we are disconnected, close the upper connection */
+ if ( rc_cm != 0 ) {
+ ib_cmrc_close ( cmrc, rc_cm );
+ return;
+ }
+}
+
+/** CMRC connection operations */
+static struct ib_connection_operations ib_cmrc_conn_op = {
+ .changed = ib_cmrc_changed,
+};
+
+/**
+ * Handle CMRC send completion
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ * @v iobuf I/O buffer
+ * @v rc Completion status code
+ */
+static void ib_cmrc_complete_send ( struct ib_device *ibdev __unused,
+ struct ib_queue_pair *qp,
+ struct io_buffer *iobuf, int rc ) {
+ struct ib_cmrc_connection *cmrc = ib_qp_get_ownerdata ( qp );
+
+ /* Free the completed I/O buffer */
+ free_iob ( iobuf );
+
+ /* Close the connection on any send errors */
+ if ( rc != 0 ) {
+ DBGC ( cmrc, "CMRC %p send error: %s\n",
+ cmrc, strerror ( rc ) );
+ ib_cmrc_close ( cmrc, rc );
+ return;
+ }
+}
+
+/**
+ * Handle CMRC receive completion
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ * @v dest Destination address vector, or NULL
+ * @v source Source address vector, or NULL
+ * @v iobuf I/O buffer
+ * @v rc Completion status code
+ */
+static void ib_cmrc_complete_recv ( struct ib_device *ibdev __unused,
+ struct ib_queue_pair *qp,
+ struct ib_address_vector *dest __unused,
+ struct ib_address_vector *source __unused,
+ struct io_buffer *iobuf, int rc ) {
+ struct ib_cmrc_connection *cmrc = ib_qp_get_ownerdata ( qp );
+
+ /* Close the connection on any receive errors */
+ if ( rc != 0 ) {
+ DBGC ( cmrc, "CMRC %p receive error: %s\n",
+ cmrc, strerror ( rc ) );
+ free_iob ( iobuf );
+ ib_cmrc_close ( cmrc, rc );
+ return;
+ }
+
+ DBGC2 ( cmrc, "CMRC %p received:\n", cmrc );
+ DBGC2_HDA ( cmrc, 0, iobuf->data, iob_len ( iobuf ) );
+
+ /* Pass up data */
+ if ( ( rc = xfer_deliver_iob ( &cmrc->xfer, iobuf ) ) != 0 ) {
+ DBGC ( cmrc, "CMRC %p could not deliver data: %s\n",
+ cmrc, strerror ( rc ) );
+ ib_cmrc_close ( cmrc, rc );
+ return;
+ }
+}
+
+/** Infiniband CMRC completion operations */
+static struct ib_completion_queue_operations ib_cmrc_completion_ops = {
+ .complete_send = ib_cmrc_complete_send,
+ .complete_recv = ib_cmrc_complete_recv,
+};
+
+/** Infiniband CMRC queue pair operations */
+static struct ib_queue_pair_operations ib_cmrc_queue_pair_ops = {
+ .alloc_iob = alloc_iob,
+};
+
+/**
+ * Send data via CMRC
+ *
+ * @v cmrc CMRC connection
+ * @v iobuf Datagram I/O buffer
+ * @v meta Data transfer metadata
+ * @ret rc Return status code
+ */
+static int ib_cmrc_xfer_deliver ( struct ib_cmrc_connection *cmrc,
+ struct io_buffer *iobuf,
+ struct xfer_metadata *meta __unused ) {
+ int rc;
+
+ /* If no connection has yet been attempted, send this datagram
+ * as the CM REQ private data. Otherwise, send it via the QP.
+ */
+ if ( ! cmrc->connected ) {
+
+ /* Abort if we have already sent a CM connection request */
+ if ( cmrc->conn ) {
+ DBGC ( cmrc, "CMRC %p attempt to send before "
+ "connection is complete\n", cmrc );
+ rc = -EIO;
+ goto out;
+ }
+
+ /* Send via CM connection request */
+ cmrc->conn = ib_create_conn ( cmrc->ibdev, cmrc->qp,
+ &cmrc->dgid, &cmrc->service_id,
+ iobuf->data, iob_len ( iobuf ),
+ &ib_cmrc_conn_op );
+ if ( ! cmrc->conn ) {
+ DBGC ( cmrc, "CMRC %p could not connect\n", cmrc );
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ } else {
+
+ /* Send via QP */
+ if ( ( rc = ib_post_send ( cmrc->ibdev, cmrc->qp, NULL,
+ iob_disown ( iobuf ) ) ) != 0 ) {
+ DBGC ( cmrc, "CMRC %p could not send: %s\n",
+ cmrc, strerror ( rc ) );
+ goto out;
+ }
+
+ }
+ return 0;
+
+ out:
+ /* Free the I/O buffer if necessary */
+ free_iob ( iobuf );
+
+ /* Close the connection on any errors */
+ if ( rc != 0 )
+ ib_cmrc_close ( cmrc, rc );
+
+ return rc;
+}
+
+/**
+ * Check CMRC flow control window
+ *
+ * @v cmrc CMRC connection
+ * @ret len Length of window
+ */
+static size_t ib_cmrc_xfer_window ( struct ib_cmrc_connection *cmrc ) {
+
+ /* We indicate a window only when we are successfully
+ * connected.
+ */
+ return ( cmrc->connected ? IB_MAX_PAYLOAD_SIZE : 0 );
+}
+
+/**
+ * Identify device underlying CMRC connection
+ *
+ * @v cmrc CMRC connection
+ * @ret device Underlying device
+ */
+static struct device *
+ib_cmrc_identify_device ( struct ib_cmrc_connection *cmrc ) {
+ return cmrc->ibdev->dev;
+}
+
+/** CMRC data transfer interface operations */
+static struct interface_operation ib_cmrc_xfer_operations[] = {
+ INTF_OP ( xfer_deliver, struct ib_cmrc_connection *,
+ ib_cmrc_xfer_deliver ),
+ INTF_OP ( xfer_window, struct ib_cmrc_connection *,
+ ib_cmrc_xfer_window ),
+ INTF_OP ( intf_close, struct ib_cmrc_connection *, ib_cmrc_close ),
+ INTF_OP ( identify_device, struct ib_cmrc_connection *,
+ ib_cmrc_identify_device ),
+};
+
+/** CMRC data transfer interface descriptor */
+static struct interface_descriptor ib_cmrc_xfer_desc =
+ INTF_DESC ( struct ib_cmrc_connection, xfer, ib_cmrc_xfer_operations );
+
+/** CMRC shutdown process descriptor */
+static struct process_descriptor ib_cmrc_shutdown_desc =
+ PROC_DESC_ONCE ( struct ib_cmrc_connection, shutdown,
+ ib_cmrc_shutdown );
+
+/**
+ * Open CMRC connection
+ *
+ * @v xfer Data transfer interface
+ * @v ibdev Infiniband device
+ * @v dgid Destination GID
+ * @v service_id Service ID
+ * @ret rc Returns status code
+ */
+int ib_cmrc_open ( struct interface *xfer, struct ib_device *ibdev,
+ union ib_gid *dgid, union ib_guid *service_id ) {
+ struct ib_cmrc_connection *cmrc;
+ int rc;
+
+ /* Allocate and initialise structure */
+ cmrc = zalloc ( sizeof ( *cmrc ) );
+ if ( ! cmrc ) {
+ rc = -ENOMEM;
+ goto err_alloc;
+ }
+ ref_init ( &cmrc->refcnt, NULL );
+ intf_init ( &cmrc->xfer, &ib_cmrc_xfer_desc, &cmrc->refcnt );
+ cmrc->ibdev = ibdev;
+ memcpy ( &cmrc->dgid, dgid, sizeof ( cmrc->dgid ) );
+ memcpy ( &cmrc->service_id, service_id, sizeof ( cmrc->service_id ) );
+ process_init_stopped ( &cmrc->shutdown, &ib_cmrc_shutdown_desc,
+ &cmrc->refcnt );
+
+ /* Open Infiniband device */
+ if ( ( rc = ib_open ( ibdev ) ) != 0 ) {
+ DBGC ( cmrc, "CMRC %p could not open device: %s\n",
+ cmrc, strerror ( rc ) );
+ goto err_open;
+ }
+
+ /* Create completion queue */
+ cmrc->cq = ib_create_cq ( ibdev, IB_CMRC_NUM_CQES,
+ &ib_cmrc_completion_ops );
+ if ( ! cmrc->cq ) {
+ DBGC ( cmrc, "CMRC %p could not create completion queue\n",
+ cmrc );
+ rc = -ENOMEM;
+ goto err_create_cq;
+ }
+
+ /* Create queue pair */
+ cmrc->qp = ib_create_qp ( ibdev, IB_QPT_RC, IB_CMRC_NUM_SEND_WQES,
+ cmrc->cq, IB_CMRC_NUM_RECV_WQES, cmrc->cq,
+ &ib_cmrc_queue_pair_ops );
+ if ( ! cmrc->qp ) {
+ DBGC ( cmrc, "CMRC %p could not create queue pair\n", cmrc );
+ rc = -ENOMEM;
+ goto err_create_qp;
+ }
+ ib_qp_set_ownerdata ( cmrc->qp, cmrc );
+ DBGC ( cmrc, "CMRC %p using QPN %lx\n", cmrc, cmrc->qp->qpn );
+
+ /* Attach to parent interface, transfer reference (implicitly)
+ * to our shutdown process, and return.
+ */
+ intf_plug_plug ( &cmrc->xfer, xfer );
+ return 0;
+
+ ib_destroy_qp ( ibdev, cmrc->qp );
+ err_create_qp:
+ ib_destroy_cq ( ibdev, cmrc->cq );
+ err_create_cq:
+ ib_close ( ibdev );
+ err_open:
+ ref_put ( &cmrc->refcnt );
+ err_alloc:
+ return rc;
+}
diff --git a/qemu/roms/ipxe/src/net/infiniband/ib_mcast.c b/qemu/roms/ipxe/src/net/infiniband/ib_mcast.c
new file mode 100644
index 000000000..0a5e72a37
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/infiniband/ib_mcast.c
@@ -0,0 +1,213 @@
+/*
+ * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <string.h>
+#include <byteswap.h>
+#include <errno.h>
+#include <ipxe/list.h>
+#include <ipxe/infiniband.h>
+#include <ipxe/ib_mi.h>
+#include <ipxe/ib_mcast.h>
+
+/** @file
+ *
+ * Infiniband multicast groups
+ *
+ */
+
+/**
+ * Generate multicast membership MAD
+ *
+ * @v ibdev Infiniband device
+ * @v gid Multicast GID
+ * @v join Join (rather than leave) group
+ * @v mad MAD to fill in
+ */
+static void ib_mcast_mad ( struct ib_device *ibdev, union ib_gid *gid,
+ int join, union ib_mad *mad ) {
+ struct ib_mad_sa *sa = &mad->sa;
+
+ /* Construct multicast membership record request */
+ memset ( sa, 0, sizeof ( *sa ) );
+ sa->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
+ sa->mad_hdr.class_version = IB_SA_CLASS_VERSION;
+ sa->mad_hdr.method =
+ ( join ? IB_MGMT_METHOD_SET : IB_MGMT_METHOD_DELETE );
+ sa->mad_hdr.attr_id = htons ( IB_SA_ATTR_MC_MEMBER_REC );
+ sa->sa_hdr.comp_mask[1] =
+ htonl ( IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
+ IB_SA_MCMEMBER_REC_JOIN_STATE );
+ sa->sa_data.mc_member_record.scope__join_state = 1;
+ memcpy ( &sa->sa_data.mc_member_record.mgid, gid,
+ sizeof ( sa->sa_data.mc_member_record.mgid ) );
+ memcpy ( &sa->sa_data.mc_member_record.port_gid, &ibdev->gid,
+ sizeof ( sa->sa_data.mc_member_record.port_gid ) );
+}
+
+/**
+ * Handle multicast membership record join response
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v madx Management transaction
+ * @v rc Status code
+ * @v mad Received MAD (or NULL on error)
+ * @v av Source address vector (or NULL on error)
+ */
+static void ib_mcast_complete ( struct ib_device *ibdev,
+ struct ib_mad_interface *mi __unused,
+ struct ib_mad_transaction *madx,
+ int rc, union ib_mad *mad,
+ struct ib_address_vector *av __unused ) {
+ struct ib_mc_membership *membership = ib_madx_get_ownerdata ( madx );
+ struct ib_queue_pair *qp = membership->qp;
+ union ib_gid *gid = &membership->gid;
+ struct ib_mc_member_record *mc_member_record =
+ &mad->sa.sa_data.mc_member_record;
+ int joined;
+ unsigned long qkey;
+
+ /* Report failures */
+ if ( ( rc == 0 ) && ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) ))
+ rc = -ENOTCONN;
+ if ( rc != 0 ) {
+ DBGC ( ibdev, "IBDEV %p QPN %lx join failed: %s\n",
+ ibdev, qp->qpn, strerror ( rc ) );
+ goto out;
+ }
+
+ /* Extract values from MAD */
+ joined = ( mad->hdr.method == IB_MGMT_METHOD_GET_RESP );
+ qkey = ntohl ( mc_member_record->qkey );
+ DBGC ( ibdev, "IBDEV %p QPN %lx %s " IB_GID_FMT " qkey %lx\n",
+ ibdev, qp->qpn, ( joined ? "joined" : "left" ),
+ IB_GID_ARGS ( gid ), qkey );
+
+ /* Set queue key */
+ qp->qkey = qkey;
+ if ( ( rc = ib_modify_qp ( ibdev, qp ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p QPN %lx could not modify qkey: %s\n",
+ ibdev, qp->qpn, strerror ( rc ) );
+ goto out;
+ }
+
+ out:
+ /* Destroy the completed transaction */
+ ib_destroy_madx ( ibdev, mi, madx );
+ membership->madx = NULL;
+
+ /* Hand off to upper completion handler */
+ membership->complete ( ibdev, qp, membership, rc, mad );
+}
+
+/** Multicast membership management transaction completion operations */
+static struct ib_mad_transaction_operations ib_mcast_op = {
+ .complete = ib_mcast_complete,
+};
+
+/**
+ * Join multicast group
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ * @v membership Multicast group membership
+ * @v gid Multicast GID to join
+ * @v joined Join completion handler
+ * @ret rc Return status code
+ */
+int ib_mcast_join ( struct ib_device *ibdev, struct ib_queue_pair *qp,
+ struct ib_mc_membership *membership, union ib_gid *gid,
+ void ( * complete ) ( struct ib_device *ibdev,
+ struct ib_queue_pair *qp,
+ struct ib_mc_membership *membership,
+ int rc, union ib_mad *mad ) ) {
+ union ib_mad mad;
+ int rc;
+
+ DBGC ( ibdev, "IBDEV %p QPN %lx joining " IB_GID_FMT "\n",
+ ibdev, qp->qpn, IB_GID_ARGS ( gid ) );
+
+ /* Initialise structure */
+ membership->qp = qp;
+ memcpy ( &membership->gid, gid, sizeof ( membership->gid ) );
+ membership->complete = complete;
+
+ /* Attach queue pair to multicast GID */
+ if ( ( rc = ib_mcast_attach ( ibdev, qp, gid ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p QPN %lx could not attach: %s\n",
+ ibdev, qp->qpn, strerror ( rc ) );
+ goto err_mcast_attach;
+ }
+
+ /* Initiate multicast membership join */
+ ib_mcast_mad ( ibdev, gid, 1, &mad );
+ membership->madx = ib_create_madx ( ibdev, ibdev->gsi, &mad, NULL,
+ &ib_mcast_op );
+ if ( ! membership->madx ) {
+ DBGC ( ibdev, "IBDEV %p QPN %lx could not create join "
+ "transaction\n", ibdev, qp->qpn );
+ rc = -ENOMEM;
+ goto err_create_madx;
+ }
+ ib_madx_set_ownerdata ( membership->madx, membership );
+
+ return 0;
+
+ ib_destroy_madx ( ibdev, ibdev->gsi, membership->madx );
+ err_create_madx:
+ ib_mcast_detach ( ibdev, qp, gid );
+ err_mcast_attach:
+ return rc;
+}
+
+/**
+ * Leave multicast group
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ * @v membership Multicast group membership
+ */
+void ib_mcast_leave ( struct ib_device *ibdev, struct ib_queue_pair *qp,
+ struct ib_mc_membership *membership ) {
+ union ib_gid *gid = &membership->gid;
+ union ib_mad mad;
+ int rc;
+
+ DBGC ( ibdev, "IBDEV %p QPN %lx leaving " IB_GID_FMT "\n",
+ ibdev, qp->qpn, IB_GID_ARGS ( gid ) );
+
+ /* Detach from multicast GID */
+ ib_mcast_detach ( ibdev, qp, &membership->gid );
+
+ /* Cancel multicast membership join, if applicable */
+ if ( membership->madx ) {
+ ib_destroy_madx ( ibdev, ibdev->gsi, membership->madx );
+ membership->madx = NULL;
+ }
+
+ /* Send a single group leave MAD */
+ ib_mcast_mad ( ibdev, &membership->gid, 0, &mad );
+ if ( ( rc = ib_mi_send ( ibdev, ibdev->gsi, &mad, NULL ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p QPN %lx could not send leave request: "
+ "%s\n", ibdev, qp->qpn, strerror ( rc ) );
+ }
+}
diff --git a/qemu/roms/ipxe/src/net/infiniband/ib_mi.c b/qemu/roms/ipxe/src/net/infiniband/ib_mi.c
new file mode 100644
index 000000000..ef6d539f1
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/infiniband/ib_mi.c
@@ -0,0 +1,415 @@
+/*
+ * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <byteswap.h>
+#include <ipxe/infiniband.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/ib_mi.h>
+
+/**
+ * @file
+ *
+ * Infiniband management interfaces
+ *
+ */
+
+/** Management interface number of send WQEs
+ *
+ * This is a policy decision.
+ */
+#define IB_MI_NUM_SEND_WQES 4
+
+/** Management interface number of receive WQEs
+ *
+ * This is a policy decision.
+ */
+#define IB_MI_NUM_RECV_WQES 2
+
+/** Management interface number of completion queue entries
+ *
+ * This is a policy decision
+ */
+#define IB_MI_NUM_CQES 8
+
+/** TID magic signature */
+#define IB_MI_TID_MAGIC ( ( 'i' << 24 ) | ( 'P' << 16 ) | ( 'X' << 8 ) | 'E' )
+
+/** TID to use for next MAD */
+static unsigned int next_tid;
+
+/**
+ * Handle received MAD
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v mad Received MAD
+ * @v av Source address vector
+ * @ret rc Return status code
+ */
+static int ib_mi_handle ( struct ib_device *ibdev,
+ struct ib_mad_interface *mi,
+ union ib_mad *mad,
+ struct ib_address_vector *av ) {
+ struct ib_mad_hdr *hdr = &mad->hdr;
+ struct ib_mad_transaction *madx;
+ struct ib_mad_agent *agent;
+
+ /* Look for a matching transaction by TID */
+ list_for_each_entry ( madx, &mi->madx, list ) {
+ if ( memcmp ( &hdr->tid, &madx->mad.hdr.tid,
+ sizeof ( hdr->tid ) ) != 0 )
+ continue;
+ /* Found a matching transaction */
+ madx->op->complete ( ibdev, mi, madx, 0, mad, av );
+ return 0;
+ }
+
+ /* If there is no matching transaction, look for a listening agent */
+ for_each_table_entry ( agent, IB_MAD_AGENTS ) {
+ if ( ( ( agent->mgmt_class & IB_MGMT_CLASS_MASK ) !=
+ ( hdr->mgmt_class & IB_MGMT_CLASS_MASK ) ) ||
+ ( agent->class_version != hdr->class_version ) ||
+ ( agent->attr_id != hdr->attr_id ) )
+ continue;
+ /* Found a matching agent */
+ agent->handle ( ibdev, mi, mad, av );
+ return 0;
+ }
+
+ /* Otherwise, ignore it */
+ DBGC ( mi, "MI %p RX TID %08x%08x ignored\n",
+ mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ) );
+ return -ENOTSUP;
+}
+
+/**
+ * Complete receive via management interface
+ *
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ * @v dest Destination address vector
+ * @v source Source address vector
+ * @v iobuf I/O buffer
+ * @v rc Completion status code
+ */
+static void ib_mi_complete_recv ( struct ib_device *ibdev,
+ struct ib_queue_pair *qp,
+ struct ib_address_vector *dest __unused,
+ struct ib_address_vector *source,
+ struct io_buffer *iobuf, int rc ) {
+ struct ib_mad_interface *mi = ib_qp_get_ownerdata ( qp );
+ union ib_mad *mad;
+ struct ib_mad_hdr *hdr;
+
+ /* Ignore errors */
+ if ( rc != 0 ) {
+ DBGC ( mi, "MI %p RX error: %s\n", mi, strerror ( rc ) );
+ goto out;
+ }
+
+ /* Sanity checks */
+ if ( iob_len ( iobuf ) != sizeof ( *mad ) ) {
+ DBGC ( mi, "MI %p RX bad size (%zd bytes)\n",
+ mi, iob_len ( iobuf ) );
+ DBGC_HDA ( mi, 0, iobuf->data, iob_len ( iobuf ) );
+ goto out;
+ }
+ mad = iobuf->data;
+ hdr = &mad->hdr;
+ if ( hdr->base_version != IB_MGMT_BASE_VERSION ) {
+ DBGC ( mi, "MI %p RX unsupported base version %x\n",
+ mi, hdr->base_version );
+ DBGC_HDA ( mi, 0, mad, sizeof ( *mad ) );
+ goto out;
+ }
+ DBGC ( mi, "MI %p RX TID %08x%08x (%02x,%02x,%02x,%04x) status "
+ "%04x\n", mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ),
+ hdr->mgmt_class, hdr->class_version, hdr->method,
+ ntohs ( hdr->attr_id ), ntohs ( hdr->status ) );
+ DBGC2_HDA ( mi, 0, mad, sizeof ( *mad ) );
+
+ /* Handle MAD */
+ if ( ( rc = ib_mi_handle ( ibdev, mi, mad, source ) ) != 0 )
+ goto out;
+
+ out:
+ free_iob ( iobuf );
+}
+
+/** Management interface completion operations */
+static struct ib_completion_queue_operations ib_mi_completion_ops = {
+ .complete_recv = ib_mi_complete_recv,
+};
+
+/** Management interface queue pair operations */
+static struct ib_queue_pair_operations ib_mi_queue_pair_ops = {
+ .alloc_iob = alloc_iob,
+};
+
+/**
+ * Transmit MAD
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v mad MAD
+ * @v av Destination address vector
+ * @ret rc Return status code
+ */
+int ib_mi_send ( struct ib_device *ibdev, struct ib_mad_interface *mi,
+ union ib_mad *mad, struct ib_address_vector *av ) {
+ struct ib_mad_hdr *hdr = &mad->hdr;
+ struct io_buffer *iobuf;
+ int rc;
+
+ /* Set common fields */
+ hdr->base_version = IB_MGMT_BASE_VERSION;
+ if ( ( hdr->tid[0] == 0 ) && ( hdr->tid[1] == 0 ) ) {
+ hdr->tid[0] = htonl ( IB_MI_TID_MAGIC );
+ hdr->tid[1] = htonl ( ++next_tid );
+ }
+ DBGC ( mi, "MI %p TX TID %08x%08x (%02x,%02x,%02x,%04x) status "
+ "%04x\n", mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ),
+ hdr->mgmt_class, hdr->class_version, hdr->method,
+ ntohs ( hdr->attr_id ), ntohs ( hdr->status ) );
+ DBGC2_HDA ( mi, 0, mad, sizeof ( *mad ) );
+
+ /* Construct directed route portion of response, if necessary */
+ if ( hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE ) {
+ struct ib_mad_smp *smp = &mad->smp;
+ unsigned int hop_pointer;
+ unsigned int hop_count;
+
+ smp->mad_hdr.status |= htons ( IB_SMP_STATUS_D_INBOUND );
+ hop_pointer = smp->mad_hdr.class_specific.smp.hop_pointer;
+ hop_count = smp->mad_hdr.class_specific.smp.hop_count;
+ assert ( hop_count == hop_pointer );
+ if ( hop_pointer < ( sizeof ( smp->return_path.hops ) /
+ sizeof ( smp->return_path.hops[0] ) ) ) {
+ smp->return_path.hops[hop_pointer] = ibdev->port;
+ } else {
+ DBGC ( mi, "MI %p TX TID %08x%08x invalid hop pointer "
+ "%d\n", mi, ntohl ( hdr->tid[0] ),
+ ntohl ( hdr->tid[1] ), hop_pointer );
+ return -EINVAL;
+ }
+ }
+
+ /* Construct I/O buffer */
+ iobuf = alloc_iob ( sizeof ( *mad ) );
+ if ( ! iobuf ) {
+ DBGC ( mi, "MI %p could not allocate buffer for TID "
+ "%08x%08x\n",
+ mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ) );
+ return -ENOMEM;
+ }
+ memcpy ( iob_put ( iobuf, sizeof ( *mad ) ), mad, sizeof ( *mad ) );
+
+ /* Send I/O buffer */
+ if ( ( rc = ib_post_send ( ibdev, mi->qp, av, iobuf ) ) != 0 ) {
+ DBGC ( mi, "MI %p TX TID %08x%08x failed: %s\n",
+ mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ),
+ strerror ( rc ) );
+ free_iob ( iobuf );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Handle management transaction timer expiry
+ *
+ * @v timer Retry timer
+ * @v expired Failure indicator
+ */
+static void ib_mi_timer_expired ( struct retry_timer *timer, int expired ) {
+ struct ib_mad_transaction *madx =
+ container_of ( timer, struct ib_mad_transaction, timer );
+ struct ib_mad_interface *mi = madx->mi;
+ struct ib_device *ibdev = mi->ibdev;
+ struct ib_mad_hdr *hdr = &madx->mad.hdr;
+
+ /* Abandon transaction if we have tried too many times */
+ if ( expired ) {
+ DBGC ( mi, "MI %p abandoning TID %08x%08x\n",
+ mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ) );
+ madx->op->complete ( ibdev, mi, madx, -ETIMEDOUT, NULL, NULL );
+ return;
+ }
+
+ /* Restart retransmission timer */
+ start_timer ( timer );
+
+ /* Resend MAD */
+ ib_mi_send ( ibdev, mi, &madx->mad, &madx->av );
+}
+
+/**
+ * Create management transaction
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v mad MAD to send
+ * @v av Destination address, or NULL to use SM's GSI
+ * @v op Management transaction operations
+ * @ret madx Management transaction, or NULL
+ */
+struct ib_mad_transaction *
+ib_create_madx ( struct ib_device *ibdev, struct ib_mad_interface *mi,
+ union ib_mad *mad, struct ib_address_vector *av,
+ struct ib_mad_transaction_operations *op ) {
+ struct ib_mad_transaction *madx;
+
+ /* Allocate and initialise structure */
+ madx = zalloc ( sizeof ( *madx ) );
+ if ( ! madx )
+ return NULL;
+ timer_init ( &madx->timer, ib_mi_timer_expired, NULL );
+ madx->mi = mi;
+ madx->op = op;
+
+ /* Determine address vector */
+ if ( av ) {
+ memcpy ( &madx->av, av, sizeof ( madx->av ) );
+ } else {
+ madx->av.lid = ibdev->sm_lid;
+ madx->av.sl = ibdev->sm_sl;
+ madx->av.qpn = IB_QPN_GSI;
+ madx->av.qkey = IB_QKEY_GSI;
+ }
+
+ /* Copy MAD */
+ memcpy ( &madx->mad, mad, sizeof ( madx->mad ) );
+
+ /* Add to list and start timer to send initial MAD */
+ list_add ( &madx->list, &mi->madx );
+ start_timer_nodelay ( &madx->timer );
+
+ return madx;
+}
+
+/**
+ * Destroy management transaction
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v madx Management transaction
+ */
+void ib_destroy_madx ( struct ib_device *ibdev __unused,
+ struct ib_mad_interface *mi __unused,
+ struct ib_mad_transaction *madx ) {
+
+ /* Stop timer and remove from list */
+ stop_timer ( &madx->timer );
+ list_del ( &madx->list );
+
+ /* Free transaction */
+ free ( madx );
+}
+
+/**
+ * Create management interface
+ *
+ * @v ibdev Infiniband device
+ * @v type Queue pair type
+ * @ret mi Management agent, or NULL
+ */
+struct ib_mad_interface * ib_create_mi ( struct ib_device *ibdev,
+ enum ib_queue_pair_type type ) {
+ struct ib_mad_interface *mi;
+ int rc;
+
+ /* Allocate and initialise fields */
+ mi = zalloc ( sizeof ( *mi ) );
+ if ( ! mi )
+ goto err_alloc;
+ mi->ibdev = ibdev;
+ INIT_LIST_HEAD ( &mi->madx );
+
+ /* Create completion queue */
+ mi->cq = ib_create_cq ( ibdev, IB_MI_NUM_CQES, &ib_mi_completion_ops );
+ if ( ! mi->cq ) {
+ DBGC ( mi, "MI %p could not allocate completion queue\n", mi );
+ goto err_create_cq;
+ }
+
+ /* Create queue pair */
+ mi->qp = ib_create_qp ( ibdev, type, IB_MI_NUM_SEND_WQES, mi->cq,
+ IB_MI_NUM_RECV_WQES, mi->cq,
+ &ib_mi_queue_pair_ops );
+ if ( ! mi->qp ) {
+ DBGC ( mi, "MI %p could not allocate queue pair\n", mi );
+ goto err_create_qp;
+ }
+ ib_qp_set_ownerdata ( mi->qp, mi );
+ DBGC ( mi, "MI %p (%s) running on QPN %#lx\n",
+ mi, ( ( type == IB_QPT_SMI ) ? "SMI" : "GSI" ), mi->qp->qpn );
+
+ /* Set queue key */
+ mi->qp->qkey = ( ( type == IB_QPT_SMI ) ? IB_QKEY_SMI : IB_QKEY_GSI );
+ if ( ( rc = ib_modify_qp ( ibdev, mi->qp ) ) != 0 ) {
+ DBGC ( mi, "MI %p could not set queue key: %s\n",
+ mi, strerror ( rc ) );
+ goto err_modify_qp;
+ }
+
+ /* Fill receive ring */
+ ib_refill_recv ( ibdev, mi->qp );
+ return mi;
+
+ err_modify_qp:
+ ib_destroy_qp ( ibdev, mi->qp );
+ err_create_qp:
+ ib_destroy_cq ( ibdev, mi->cq );
+ err_create_cq:
+ free ( mi );
+ err_alloc:
+ return NULL;
+}
+
+/**
+ * Destroy management interface
+ *
+ * @v mi Management interface
+ */
+void ib_destroy_mi ( struct ib_device *ibdev, struct ib_mad_interface *mi ) {
+ struct ib_mad_transaction *madx;
+ struct ib_mad_transaction *tmp;
+
+ /* Flush any outstanding requests */
+ list_for_each_entry_safe ( madx, tmp, &mi->madx, list ) {
+ DBGC ( mi, "MI %p destroyed while TID %08x%08x in progress\n",
+ mi, ntohl ( madx->mad.hdr.tid[0] ),
+ ntohl ( madx->mad.hdr.tid[1] ) );
+ madx->op->complete ( ibdev, mi, madx, -ECANCELED, NULL, NULL );
+ }
+
+ ib_destroy_qp ( ibdev, mi->qp );
+ ib_destroy_cq ( ibdev, mi->cq );
+ free ( mi );
+}
diff --git a/qemu/roms/ipxe/src/net/infiniband/ib_packet.c b/qemu/roms/ipxe/src/net/infiniband/ib_packet.c
new file mode 100644
index 000000000..6c850e39b
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/infiniband/ib_packet.c
@@ -0,0 +1,249 @@
+/*
+ * Copyright (C) 2008 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <byteswap.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/infiniband.h>
+#include <ipxe/ib_packet.h>
+
+/**
+ * @file
+ *
+ * Infiniband Packet Formats
+ *
+ */
+
+/**
+ * Add IB headers
+ *
+ * @v ibdev Infiniband device
+ * @v iobuf I/O buffer to contain headers
+ * @v qp Queue pair
+ * @v payload_len Payload length
+ * @v dest Destination address vector
+ * @ret rc Return status code
+ */
+int ib_push ( struct ib_device *ibdev, struct io_buffer *iobuf,
+ struct ib_queue_pair *qp, size_t payload_len,
+ const struct ib_address_vector *dest ) {
+ struct ib_local_route_header *lrh;
+ struct ib_global_route_header *grh;
+ struct ib_base_transport_header *bth;
+ struct ib_datagram_extended_transport_header *deth;
+ size_t orig_iob_len = iob_len ( iobuf );
+ size_t pad_len;
+ size_t lrh_len;
+ size_t grh_len;
+ unsigned int vl;
+ unsigned int lnh;
+
+ DBGC2 ( ibdev, "IBDEV %p TX %04x:%08lx => %04x:%08lx (key %08lx)\n",
+ ibdev, ibdev->lid, qp->ext_qpn, dest->lid, dest->qpn,
+ dest->qkey );
+
+ /* Calculate packet length */
+ pad_len = ( (-payload_len) & 0x3 );
+ payload_len += pad_len;
+ payload_len += 4; /* ICRC */
+
+ /* Reserve space for headers */
+ orig_iob_len = iob_len ( iobuf );
+ deth = iob_push ( iobuf, sizeof ( *deth ) );
+ bth = iob_push ( iobuf, sizeof ( *bth ) );
+ grh_len = ( payload_len + iob_len ( iobuf ) - orig_iob_len );
+ grh = ( dest->gid_present ?
+ iob_push ( iobuf, sizeof ( *grh ) ) : NULL );
+ lrh = iob_push ( iobuf, sizeof ( *lrh ) );
+ lrh_len = ( payload_len + iob_len ( iobuf ) - orig_iob_len );
+
+ /* Construct LRH */
+ vl = ( ( qp->ext_qpn == IB_QPN_SMI ) ? IB_VL_SMP : IB_VL_DEFAULT );
+ lrh->vl__lver = ( vl << 4 );
+ lnh = ( grh ? IB_LNH_GRH : IB_LNH_BTH );
+ lrh->sl__lnh = ( ( dest->sl << 4 ) | lnh );
+ lrh->dlid = htons ( dest->lid );
+ lrh->length = htons ( lrh_len >> 2 );
+ lrh->slid = htons ( ibdev->lid );
+
+ /* Construct GRH, if required */
+ if ( grh ) {
+ grh->ipver__tclass__flowlabel =
+ htonl ( IB_GRH_IPVER_IPv6 << 28 );
+ grh->paylen = htons ( grh_len );
+ grh->nxthdr = IB_GRH_NXTHDR_IBA;
+ grh->hoplmt = 0;
+ memcpy ( &grh->sgid, &ibdev->gid, sizeof ( grh->sgid ) );
+ memcpy ( &grh->dgid, &dest->gid, sizeof ( grh->dgid ) );
+ }
+
+ /* Construct BTH */
+ bth->opcode = BTH_OPCODE_UD_SEND;
+ bth->se__m__padcnt__tver = ( pad_len << 4 );
+ bth->pkey = htons ( ibdev->pkey );
+ bth->dest_qp = htonl ( dest->qpn );
+ bth->ack__psn = htonl ( ( qp->send.psn++ ) & 0xffffffUL );
+
+ /* Construct DETH */
+ deth->qkey = htonl ( dest->qkey );
+ deth->src_qp = htonl ( qp->ext_qpn );
+
+ DBGCP_HDA ( ibdev, 0, iobuf->data,
+ ( iob_len ( iobuf ) - orig_iob_len ) );
+
+ return 0;
+}
+
+/**
+ * Remove IB headers
+ *
+ * @v ibdev Infiniband device
+ * @v iobuf I/O buffer containing headers
+ * @v qp Queue pair to fill in, or NULL
+ * @v payload_len Payload length to fill in, or NULL
+ * @v dest Destination address vector to fill in
+ * @v source Source address vector to fill in
+ * @ret rc Return status code
+ */
+int ib_pull ( struct ib_device *ibdev, struct io_buffer *iobuf,
+ struct ib_queue_pair **qp, size_t *payload_len,
+ struct ib_address_vector *dest,
+ struct ib_address_vector *source ) {
+ struct ib_local_route_header *lrh;
+ struct ib_global_route_header *grh;
+ struct ib_base_transport_header *bth;
+ struct ib_datagram_extended_transport_header *deth;
+ size_t orig_iob_len = iob_len ( iobuf );
+ unsigned int lnh;
+ size_t pad_len;
+
+ /* Clear return values */
+ if ( qp )
+ *qp = NULL;
+ if ( payload_len )
+ *payload_len = 0;
+ memset ( dest, 0, sizeof ( *dest ) );
+ memset ( source, 0, sizeof ( *source ) );
+
+ /* Extract LRH */
+ if ( iob_len ( iobuf ) < sizeof ( *lrh ) ) {
+ DBGC ( ibdev, "IBDEV %p RX too short (%zd bytes) for LRH\n",
+ ibdev, iob_len ( iobuf ) );
+ return -EINVAL;
+ }
+ lrh = iobuf->data;
+ iob_pull ( iobuf, sizeof ( *lrh ) );
+ dest->lid = ntohs ( lrh->dlid );
+ dest->sl = ( lrh->sl__lnh >> 4 );
+ source->lid = ntohs ( lrh->slid );
+ source->sl = ( lrh->sl__lnh >> 4 );
+ lnh = ( lrh->sl__lnh & 0x3 );
+
+ /* Reject unsupported packets */
+ if ( ! ( ( lnh == IB_LNH_BTH ) || ( lnh == IB_LNH_GRH ) ) ) {
+ DBGC ( ibdev, "IBDEV %p RX unsupported LNH %x\n",
+ ibdev, lnh );
+ return -ENOTSUP;
+ }
+
+ /* Extract GRH, if present */
+ if ( lnh == IB_LNH_GRH ) {
+ if ( iob_len ( iobuf ) < sizeof ( *grh ) ) {
+ DBGC ( ibdev, "IBDEV %p RX too short (%zd bytes) "
+ "for GRH\n", ibdev, iob_len ( iobuf ) );
+ return -EINVAL;
+ }
+ grh = iobuf->data;
+ iob_pull ( iobuf, sizeof ( *grh ) );
+ dest->gid_present = 1;
+ memcpy ( &dest->gid, &grh->dgid, sizeof ( dest->gid ) );
+ source->gid_present = 1;
+ memcpy ( &source->gid, &grh->sgid, sizeof ( source->gid ) );
+ } else {
+ grh = NULL;
+ }
+
+ /* Extract BTH */
+ if ( iob_len ( iobuf ) < sizeof ( *bth ) ) {
+ DBGC ( ibdev, "IBDEV %p RX too short (%zd bytes) for BTH\n",
+ ibdev, iob_len ( iobuf ) );
+ return -EINVAL;
+ }
+ bth = iobuf->data;
+ iob_pull ( iobuf, sizeof ( *bth ) );
+ if ( bth->opcode != BTH_OPCODE_UD_SEND ) {
+ DBGC ( ibdev, "IBDEV %p unsupported BTH opcode %x\n",
+ ibdev, bth->opcode );
+ return -ENOTSUP;
+ }
+ dest->qpn = ntohl ( bth->dest_qp );
+
+ /* Extract DETH */
+ if ( iob_len ( iobuf ) < sizeof ( *deth ) ) {
+ DBGC ( ibdev, "IBDEV %p RX too short (%zd bytes) for DETH\n",
+ ibdev, iob_len ( iobuf ) );
+ return -EINVAL;
+ }
+ deth = iobuf->data;
+ iob_pull ( iobuf, sizeof ( *deth ) );
+ source->qpn = ntohl ( deth->src_qp );
+ source->qkey = ntohl ( deth->qkey );
+
+ /* Calculate payload length, if applicable */
+ if ( payload_len ) {
+ pad_len = ( ( bth->se__m__padcnt__tver >> 4 ) & 0x3 );
+ *payload_len = ( ( ntohs ( lrh->length ) << 2 )
+ - ( orig_iob_len - iob_len ( iobuf ) )
+ - pad_len - 4 /* ICRC */ );
+ }
+
+ /* Determine destination QP, if applicable */
+ if ( qp ) {
+ if ( IB_LID_MULTICAST ( dest->lid ) && grh ) {
+ if ( ! ( *qp = ib_find_qp_mgid ( ibdev, &grh->dgid ))){
+ DBGC ( ibdev, "IBDEV %p RX for unknown MGID "
+ IB_GID_FMT "\n",
+ ibdev, IB_GID_ARGS ( &grh->dgid ) );
+ return -ENODEV;
+ }
+ } else {
+ if ( ! ( *qp = ib_find_qp_qpn ( ibdev, dest->qpn ) ) ) {
+ DBGC ( ibdev, "IBDEV %p RX for nonexistent "
+ "QPN %lx\n", ibdev, dest->qpn );
+ return -ENODEV;
+ }
+ }
+ assert ( *qp );
+ }
+
+ DBGC2 ( ibdev, "IBDEV %p RX %04x:%08lx <= %04x:%08lx (key %08x)\n",
+ ibdev, dest->lid, ( IB_LID_MULTICAST ( dest->lid ) ?
+ ( qp ? (*qp)->ext_qpn : -1UL ) : dest->qpn ),
+ source->lid, source->qpn, ntohl ( deth->qkey ) );
+ DBGCP_HDA ( ibdev, 0,
+ ( iobuf->data - ( orig_iob_len - iob_len ( iobuf ) ) ),
+ ( orig_iob_len - iob_len ( iobuf ) ) );
+
+ return 0;
+}
diff --git a/qemu/roms/ipxe/src/net/infiniband/ib_pathrec.c b/qemu/roms/ipxe/src/net/infiniband/ib_pathrec.c
new file mode 100644
index 000000000..1b95cbfa8
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/infiniband/ib_pathrec.c
@@ -0,0 +1,289 @@
+/*
+ * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <byteswap.h>
+#include <errno.h>
+#include <ipxe/infiniband.h>
+#include <ipxe/ib_mi.h>
+#include <ipxe/ib_pathrec.h>
+
+/** @file
+ *
+ * Infiniband path lookups
+ *
+ */
+
+/**
+ * Handle path transaction completion
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v madx Management transaction
+ * @v rc Status code
+ * @v mad Received MAD (or NULL on error)
+ * @v av Source address vector (or NULL on error)
+ */
+static void ib_path_complete ( struct ib_device *ibdev,
+ struct ib_mad_interface *mi,
+ struct ib_mad_transaction *madx,
+ int rc, union ib_mad *mad,
+ struct ib_address_vector *av __unused ) {
+ struct ib_path *path = ib_madx_get_ownerdata ( madx );
+ union ib_gid *dgid = &path->av.gid;
+ struct ib_path_record *pathrec = &mad->sa.sa_data.path_record;
+
+ /* Report failures */
+ if ( ( rc == 0 ) && ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) ))
+ rc = -ENETUNREACH;
+ if ( rc != 0 ) {
+ DBGC ( ibdev, "IBDEV %p path lookup for " IB_GID_FMT
+ " failed: %s\n",
+ ibdev, IB_GID_ARGS ( dgid ), strerror ( rc ) );
+ goto out;
+ }
+
+ /* Extract values from MAD */
+ path->av.lid = ntohs ( pathrec->dlid );
+ path->av.sl = ( pathrec->reserved__sl & 0x0f );
+ path->av.rate = ( pathrec->rate_selector__rate & 0x3f );
+ DBGC ( ibdev, "IBDEV %p path to " IB_GID_FMT " is %04x sl %d rate "
+ "%d\n", ibdev, IB_GID_ARGS ( dgid ), path->av.lid, path->av.sl,
+ path->av.rate );
+
+ out:
+ /* Destroy the completed transaction */
+ ib_destroy_madx ( ibdev, mi, madx );
+ path->madx = NULL;
+
+ /* Hand off to upper completion handler */
+ path->op->complete ( ibdev, path, rc, &path->av );
+}
+
+/** Path transaction completion operations */
+static struct ib_mad_transaction_operations ib_path_op = {
+ .complete = ib_path_complete,
+};
+
+/**
+ * Create path
+ *
+ * @v ibdev Infiniband device
+ * @v av Address vector to complete
+ * @v op Path operations
+ * @ret path Path
+ */
+struct ib_path *
+ib_create_path ( struct ib_device *ibdev, struct ib_address_vector *av,
+ struct ib_path_operations *op ) {
+ struct ib_path *path;
+ union ib_mad mad;
+ struct ib_mad_sa *sa = &mad.sa;
+
+ /* Allocate and initialise structure */
+ path = zalloc ( sizeof ( *path ) );
+ if ( ! path )
+ goto err_alloc_path;
+ path->ibdev = ibdev;
+ memcpy ( &path->av, av, sizeof ( path->av ) );
+ path->op = op;
+
+ /* Construct path request */
+ memset ( sa, 0, sizeof ( *sa ) );
+ sa->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
+ sa->mad_hdr.class_version = IB_SA_CLASS_VERSION;
+ sa->mad_hdr.method = IB_MGMT_METHOD_GET;
+ sa->mad_hdr.attr_id = htons ( IB_SA_ATTR_PATH_REC );
+ sa->sa_hdr.comp_mask[1] =
+ htonl ( IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID );
+ memcpy ( &sa->sa_data.path_record.dgid, &path->av.gid,
+ sizeof ( sa->sa_data.path_record.dgid ) );
+ memcpy ( &sa->sa_data.path_record.sgid, &ibdev->gid,
+ sizeof ( sa->sa_data.path_record.sgid ) );
+
+ /* Create management transaction */
+ path->madx = ib_create_madx ( ibdev, ibdev->gsi, &mad, NULL,
+ &ib_path_op );
+ if ( ! path->madx )
+ goto err_create_madx;
+ ib_madx_set_ownerdata ( path->madx, path );
+
+ return path;
+
+ ib_destroy_madx ( ibdev, ibdev->gsi, path->madx );
+ err_create_madx:
+ free ( path );
+ err_alloc_path:
+ return NULL;
+}
+
+/**
+ * Destroy path
+ *
+ * @v ibdev Infiniband device
+ * @v path Path
+ */
+void ib_destroy_path ( struct ib_device *ibdev, struct ib_path *path ) {
+
+ if ( path->madx )
+ ib_destroy_madx ( ibdev, ibdev->gsi, path->madx );
+ free ( path );
+}
+
+/** Number of path cache entries
+ *
+ * Must be a power of two.
+ */
+#define IB_NUM_CACHED_PATHS 4
+
+/** A cached path */
+struct ib_cached_path {
+ /** Path */
+ struct ib_path *path;
+};
+
+/** Path cache */
+static struct ib_cached_path ib_path_cache[IB_NUM_CACHED_PATHS];
+
+/** Oldest path cache entry index */
+static unsigned int ib_path_cache_idx;
+
+/**
+ * Find path cache entry
+ *
+ * @v ibdev Infiniband device
+ * @v dgid Destination GID
+ * @ret path Path cache entry, or NULL
+ */
+static struct ib_cached_path *
+ib_find_path_cache_entry ( struct ib_device *ibdev, union ib_gid *dgid ) {
+ struct ib_cached_path *cached;
+ unsigned int i;
+
+ for ( i = 0 ; i < IB_NUM_CACHED_PATHS ; i++ ) {
+ cached = &ib_path_cache[i];
+ if ( ! cached->path )
+ continue;
+ if ( cached->path->ibdev != ibdev )
+ continue;
+ if ( memcmp ( &cached->path->av.gid, dgid,
+ sizeof ( cached->path->av.gid ) ) != 0 )
+ continue;
+ return cached;
+ }
+
+ return NULL;
+}
+
+/**
+ * Handle cached path transaction completion
+ *
+ * @v ibdev Infiniband device
+ * @v path Path
+ * @v rc Status code
+ * @v av Address vector, or NULL on error
+ */
+static void ib_cached_path_complete ( struct ib_device *ibdev,
+ struct ib_path *path, int rc,
+ struct ib_address_vector *av __unused ) {
+ struct ib_cached_path *cached = ib_path_get_ownerdata ( path );
+
+ /* If the transaction failed, erase the cache entry */
+ if ( rc != 0 ) {
+ /* Destroy the old cache entry */
+ ib_destroy_path ( ibdev, path );
+ memset ( cached, 0, sizeof ( *cached ) );
+ return;
+ }
+
+ /* Do not destroy the completed transaction; we still need to
+ * refer to the resolved path.
+ */
+}
+
+/** Cached path transaction completion operations */
+static struct ib_path_operations ib_cached_path_op = {
+ .complete = ib_cached_path_complete,
+};
+
+/**
+ * Resolve path
+ *
+ * @v ibdev Infiniband device
+ * @v av Address vector to complete
+ * @ret rc Return status code
+ *
+ * This provides a non-transactional way to resolve a path, via a
+ * cache similar to ARP.
+ */
+int ib_resolve_path ( struct ib_device *ibdev, struct ib_address_vector *av ) {
+ union ib_gid *gid = &av->gid;
+ struct ib_cached_path *cached;
+ unsigned int cache_idx;
+
+ /* Sanity check */
+ if ( ! av->gid_present ) {
+ DBGC ( ibdev, "IBDEV %p attempt to look up path without GID\n",
+ ibdev );
+ return -EINVAL;
+ }
+
+ /* Look in cache for a matching entry */
+ cached = ib_find_path_cache_entry ( ibdev, gid );
+ if ( cached && cached->path->av.lid ) {
+ /* Populated entry found */
+ av->lid = cached->path->av.lid;
+ av->rate = cached->path->av.rate;
+ av->sl = cached->path->av.sl;
+ DBGC2 ( ibdev, "IBDEV %p cache hit for " IB_GID_FMT "\n",
+ ibdev, IB_GID_ARGS ( gid ) );
+ return 0;
+ }
+ DBGC ( ibdev, "IBDEV %p cache miss for " IB_GID_FMT "%s\n", ibdev,
+ IB_GID_ARGS ( gid ), ( cached ? " (in progress)" : "" ) );
+
+ /* If lookup is already in progress, do nothing */
+ if ( cached )
+ return -ENOENT;
+
+ /* Locate a new cache entry to use */
+ cache_idx = ( (ib_path_cache_idx++) % IB_NUM_CACHED_PATHS );
+ cached = &ib_path_cache[cache_idx];
+
+ /* Destroy the old cache entry */
+ if ( cached->path )
+ ib_destroy_path ( ibdev, cached->path );
+ memset ( cached, 0, sizeof ( *cached ) );
+
+ /* Create new path */
+ cached->path = ib_create_path ( ibdev, av, &ib_cached_path_op );
+ if ( ! cached->path ) {
+ DBGC ( ibdev, "IBDEV %p could not create path\n",
+ ibdev );
+ return -ENOMEM;
+ }
+ ib_path_set_ownerdata ( cached->path, cached );
+
+ /* Not found yet */
+ return -ENOENT;
+}
diff --git a/qemu/roms/ipxe/src/net/infiniband/ib_sma.c b/qemu/roms/ipxe/src/net/infiniband/ib_sma.c
new file mode 100644
index 000000000..86553732a
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/infiniband/ib_sma.c
@@ -0,0 +1,371 @@
+/*
+ * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <byteswap.h>
+#include <ipxe/settings.h>
+#include <ipxe/infiniband.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/ib_mi.h>
+#include <ipxe/ib_sma.h>
+
+/**
+ * @file
+ *
+ * Infiniband Subnet Management Agent
+ *
+ */
+
+/**
+ * Node information
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v mad Received MAD
+ * @v av Source address vector
+ */
+static void ib_sma_node_info ( struct ib_device *ibdev,
+ struct ib_mad_interface *mi,
+ union ib_mad *mad,
+ struct ib_address_vector *av ) {
+ struct ib_node_info *node_info = &mad->smp.smp_data.node_info;
+ int rc;
+
+ /* Fill in information */
+ memset ( node_info, 0, sizeof ( *node_info ) );
+ node_info->base_version = IB_MGMT_BASE_VERSION;
+ node_info->class_version = IB_SMP_CLASS_VERSION;
+ node_info->node_type = IB_NODE_TYPE_HCA;
+ node_info->num_ports = ib_count_ports ( ibdev );
+ memcpy ( &node_info->sys_guid, &ibdev->node_guid,
+ sizeof ( node_info->sys_guid ) );
+ memcpy ( &node_info->node_guid, &ibdev->node_guid,
+ sizeof ( node_info->node_guid ) );
+ memcpy ( &node_info->port_guid, &ibdev->gid.s.guid,
+ sizeof ( node_info->port_guid ) );
+ node_info->partition_cap = htons ( 1 );
+ node_info->local_port_num = ibdev->port;
+
+ /* Send GetResponse */
+ mad->hdr.method = IB_MGMT_METHOD_GET_RESP;
+ if ( ( rc = ib_mi_send ( ibdev, mi, mad, av ) ) != 0 ) {
+ DBGC ( mi, "SMA %p could not send NodeInfo GetResponse: %s\n",
+ mi, strerror ( rc ) );
+ return;
+ }
+}
+
+/**
+ * Node description
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v mad Received MAD
+ * @v av Source address vector
+ */
+static void ib_sma_node_desc ( struct ib_device *ibdev,
+ struct ib_mad_interface *mi,
+ union ib_mad *mad,
+ struct ib_address_vector *av ) {
+ struct ib_node_desc *node_desc = &mad->smp.smp_data.node_desc;
+ union ib_guid *guid = &ibdev->node_guid;
+ char hostname[ sizeof ( node_desc->node_string ) ];
+ int hostname_len;
+ int rc;
+
+ /* Fill in information */
+ memset ( node_desc, 0, sizeof ( *node_desc ) );
+ hostname_len = fetch_string_setting ( NULL, &hostname_setting,
+ hostname, sizeof ( hostname ) );
+ snprintf ( node_desc->node_string, sizeof ( node_desc->node_string ),
+ "iPXE %s%s%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x (%s)",
+ hostname, ( ( hostname_len >= 0 ) ? " " : "" ),
+ guid->bytes[0], guid->bytes[1], guid->bytes[2],
+ guid->bytes[3], guid->bytes[4], guid->bytes[5],
+ guid->bytes[6], guid->bytes[7], ibdev->dev->name );
+
+ /* Send GetResponse */
+ mad->hdr.method = IB_MGMT_METHOD_GET_RESP;
+ if ( ( rc = ib_mi_send ( ibdev, mi, mad, av ) ) != 0 ) {
+ DBGC ( mi, "SMA %p could not send NodeDesc GetResponse: %s\n",
+ mi, strerror ( rc ) );
+ return;
+ }
+}
+
+/**
+ * GUID information
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v mad Received MAD
+ * @v av Source address vector
+ */
+static void ib_sma_guid_info ( struct ib_device *ibdev,
+ struct ib_mad_interface *mi,
+ union ib_mad *mad,
+ struct ib_address_vector *av ) {
+ struct ib_guid_info *guid_info = &mad->smp.smp_data.guid_info;
+ int rc;
+
+ /* Fill in information */
+ memset ( guid_info, 0, sizeof ( *guid_info ) );
+ memcpy ( guid_info->guid[0], &ibdev->gid.s.guid,
+ sizeof ( guid_info->guid[0] ) );
+
+ /* Send GetResponse */
+ mad->hdr.method = IB_MGMT_METHOD_GET_RESP;
+ if ( ( rc = ib_mi_send ( ibdev, mi, mad, av ) ) != 0 ) {
+ DBGC ( mi, "SMA %p could not send GuidInfo GetResponse: %s\n",
+ mi, strerror ( rc ) );
+ return;
+ }
+}
+
+/**
+ * Set port information
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v mad Received MAD
+ * @ret rc Return status code
+ */
+static int ib_sma_set_port_info ( struct ib_device *ibdev,
+ struct ib_mad_interface *mi,
+ union ib_mad *mad ) {
+ const struct ib_port_info *port_info = &mad->smp.smp_data.port_info;
+ unsigned int link_width_enabled;
+ unsigned int link_speed_enabled;
+ int rc;
+
+ /* Set parameters */
+ memcpy ( &ibdev->gid.s.prefix, port_info->gid_prefix,
+ sizeof ( ibdev->gid.s.prefix ) );
+ ibdev->lid = ntohs ( port_info->lid );
+ ibdev->sm_lid = ntohs ( port_info->mastersm_lid );
+ if ( ( link_width_enabled = port_info->link_width_enabled ) )
+ ibdev->link_width_enabled = link_width_enabled;
+ if ( ( link_speed_enabled =
+ ( port_info->link_speed_active__link_speed_enabled & 0xf ) ) )
+ ibdev->link_speed_enabled = link_speed_enabled;
+ ibdev->sm_sl = ( port_info->neighbour_mtu__mastersm_sl & 0xf );
+ DBGC ( mi, "SMA %p set LID %04x SMLID %04x link width %02x speed "
+ "%02x\n", mi, ibdev->lid, ibdev->sm_lid,
+ ibdev->link_width_enabled, ibdev->link_speed_enabled );
+
+ /* Update parameters on device */
+ if ( ( rc = ib_set_port_info ( ibdev, mad ) ) != 0 ) {
+ DBGC ( mi, "SMA %p could not set port information: %s\n",
+ mi, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Port information
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v mad Received MAD
+ * @v av Source address vector
+ */
+static void ib_sma_port_info ( struct ib_device *ibdev,
+ struct ib_mad_interface *mi,
+ union ib_mad *mad,
+ struct ib_address_vector *av ) {
+ struct ib_port_info *port_info = &mad->smp.smp_data.port_info;
+ int rc;
+
+ /* Set parameters if applicable */
+ if ( mad->hdr.method == IB_MGMT_METHOD_SET ) {
+ if ( ( rc = ib_sma_set_port_info ( ibdev, mi, mad ) ) != 0 ) {
+ mad->hdr.status =
+ htons ( IB_MGMT_STATUS_UNSUPPORTED_METHOD_ATTR );
+ /* Fall through to generate GetResponse */
+ }
+ }
+
+ /* Fill in information */
+ memset ( port_info, 0, sizeof ( *port_info ) );
+ memcpy ( port_info->gid_prefix, &ibdev->gid.s.prefix,
+ sizeof ( port_info->gid_prefix ) );
+ port_info->lid = ntohs ( ibdev->lid );
+ port_info->mastersm_lid = ntohs ( ibdev->sm_lid );
+ port_info->local_port_num = ibdev->port;
+ port_info->link_width_enabled = ibdev->link_width_enabled;
+ port_info->link_width_supported = ibdev->link_width_supported;
+ port_info->link_width_active = ibdev->link_width_active;
+ port_info->link_speed_supported__port_state =
+ ( ( ibdev->link_speed_supported << 4 ) | ibdev->port_state );
+ port_info->port_phys_state__link_down_def_state =
+ ( ( IB_PORT_PHYS_STATE_POLLING << 4 ) |
+ IB_PORT_PHYS_STATE_POLLING );
+ port_info->link_speed_active__link_speed_enabled =
+ ( ( ibdev->link_speed_active << 4 ) |
+ ibdev->link_speed_enabled );
+ port_info->neighbour_mtu__mastersm_sl =
+ ( ( IB_MTU_2048 << 4 ) | ibdev->sm_sl );
+ port_info->vl_cap__init_type = ( IB_VL_0 << 4 );
+ port_info->init_type_reply__mtu_cap = IB_MTU_2048;
+ port_info->operational_vls__enforcement = ( IB_VL_0 << 4 );
+ port_info->guid_cap = 1;
+
+ /* Send GetResponse */
+ mad->hdr.method = IB_MGMT_METHOD_GET_RESP;
+ if ( ( rc = ib_mi_send ( ibdev, mi, mad, av ) ) != 0 ) {
+ DBGC ( mi, "SMA %p could not send PortInfo GetResponse: %s\n",
+ mi, strerror ( rc ) );
+ return;
+ }
+}
+
+/**
+ * Set partition key table
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v mad Received MAD
+ * @ret rc Return status code
+ */
+static int ib_sma_set_pkey_table ( struct ib_device *ibdev,
+ struct ib_mad_interface *mi,
+ union ib_mad *mad ) {
+ struct ib_pkey_table *pkey_table = &mad->smp.smp_data.pkey_table;
+ int rc;
+
+ /* Set parameters */
+ ibdev->pkey = ntohs ( pkey_table->pkey[0] );
+ DBGC ( mi, "SMA %p set pkey %04x\n", mi, ibdev->pkey );
+
+ /* Update parameters on device */
+ if ( ( rc = ib_set_pkey_table ( ibdev, mad ) ) != 0 ) {
+ DBGC ( mi, "SMA %p could not set pkey table: %s\n",
+ mi, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Partition key table
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v mad Received MAD
+ * @v av Source address vector
+ */
+static void ib_sma_pkey_table ( struct ib_device *ibdev,
+ struct ib_mad_interface *mi,
+ union ib_mad *mad,
+ struct ib_address_vector *av ) {
+ struct ib_pkey_table *pkey_table = &mad->smp.smp_data.pkey_table;
+ int rc;
+
+ /* Set parameters, if applicable */
+ if ( mad->hdr.method == IB_MGMT_METHOD_SET ) {
+ if ( ( rc = ib_sma_set_pkey_table ( ibdev, mi, mad ) ) != 0 ) {
+ mad->hdr.status =
+ htons ( IB_MGMT_STATUS_UNSUPPORTED_METHOD_ATTR );
+ /* Fall through to generate GetResponse */
+ }
+ }
+
+ /* Fill in information */
+ mad->hdr.method = IB_MGMT_METHOD_GET_RESP;
+ memset ( pkey_table, 0, sizeof ( *pkey_table ) );
+ pkey_table->pkey[0] = htons ( ibdev->pkey );
+
+ /* Send GetResponse */
+ mad->hdr.method = IB_MGMT_METHOD_GET_RESP;
+ if ( ( rc = ib_mi_send ( ibdev, mi, mad, av ) ) != 0 ) {
+ DBGC ( mi, "SMA %p could not send PKeyTable GetResponse: %s\n",
+ mi, strerror ( rc ) );
+ return;
+ }
+}
+
+/** Subnet management agent */
+struct ib_mad_agent ib_sma_agent[] __ib_mad_agent = {
+ {
+ .mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED,
+ .class_version = IB_SMP_CLASS_VERSION,
+ .attr_id = htons ( IB_SMP_ATTR_NODE_INFO ),
+ .handle = ib_sma_node_info,
+ },
+ {
+ .mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED,
+ .class_version = IB_SMP_CLASS_VERSION,
+ .attr_id = htons ( IB_SMP_ATTR_NODE_DESC ),
+ .handle = ib_sma_node_desc,
+ },
+ {
+ .mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED,
+ .class_version = IB_SMP_CLASS_VERSION,
+ .attr_id = htons ( IB_SMP_ATTR_GUID_INFO ),
+ .handle = ib_sma_guid_info,
+ },
+ {
+ .mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED,
+ .class_version = IB_SMP_CLASS_VERSION,
+ .attr_id = htons ( IB_SMP_ATTR_PORT_INFO ),
+ .handle = ib_sma_port_info,
+ },
+ {
+ .mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED,
+ .class_version = IB_SMP_CLASS_VERSION,
+ .attr_id = htons ( IB_SMP_ATTR_PKEY_TABLE ),
+ .handle = ib_sma_pkey_table,
+ },
+};
+
+/**
+ * Create subnet management agent and interface
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @ret rc Return status code
+ */
+int ib_create_sma ( struct ib_device *ibdev, struct ib_mad_interface *mi ) {
+
+ /* Nothing to do */
+ DBGC ( ibdev, "IBDEV %p SMA using SMI %p\n", ibdev, mi );
+
+ return 0;
+}
+
+/**
+ * Destroy subnet management agent and interface
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ */
+void ib_destroy_sma ( struct ib_device *ibdev __unused,
+ struct ib_mad_interface *mi __unused ) {
+ /* Nothing to do */
+}
diff --git a/qemu/roms/ipxe/src/net/infiniband/ib_smc.c b/qemu/roms/ipxe/src/net/infiniband/ib_smc.c
new file mode 100644
index 000000000..4d947d568
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/infiniband/ib_smc.c
@@ -0,0 +1,256 @@
+/*
+ * Copyright (C) 2008 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <byteswap.h>
+#include <ipxe/infiniband.h>
+#include <ipxe/ib_smc.h>
+
+/**
+ * @file
+ *
+ * Infiniband Subnet Management Client
+ *
+ */
+
+/**
+ * Issue local MAD
+ *
+ * @v ibdev Infiniband device
+ * @v attr_id Attribute ID, in network byte order
+ * @v attr_mod Attribute modifier, in network byte order
+ * @v local_mad Method for issuing local MADs
+ * @v mad Management datagram to fill in
+ * @ret rc Return status code
+ */
+static int ib_smc_mad ( struct ib_device *ibdev, uint16_t attr_id,
+ uint32_t attr_mod, ib_local_mad_t local_mad,
+ union ib_mad *mad ) {
+ int rc;
+
+ /* Construct MAD */
+ memset ( mad, 0, sizeof ( *mad ) );
+ mad->hdr.base_version = IB_MGMT_BASE_VERSION;
+ mad->hdr.mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
+ mad->hdr.class_version = 1;
+ mad->hdr.method = IB_MGMT_METHOD_GET;
+ mad->hdr.attr_id = attr_id;
+ mad->hdr.attr_mod = attr_mod;
+
+ /* Issue MAD */
+ if ( ( rc = local_mad ( ibdev, mad ) ) != 0 )
+ return rc;
+
+ return 0;
+}
+
+/**
+ * Get node information
+ *
+ * @v ibdev Infiniband device
+ * @v local_mad Method for issuing local MADs
+ * @v mad Management datagram to fill in
+ * @ret rc Return status code
+ */
+static int ib_smc_get_node_info ( struct ib_device *ibdev,
+ ib_local_mad_t local_mad,
+ union ib_mad *mad ) {
+ int rc;
+
+ /* Issue MAD */
+ if ( ( rc = ib_smc_mad ( ibdev, htons ( IB_SMP_ATTR_NODE_INFO ), 0,
+ local_mad, mad ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p could not get node info: %s\n",
+ ibdev, strerror ( rc ) );
+ return rc;
+ }
+ return 0;
+}
+
+/**
+ * Get port information
+ *
+ * @v ibdev Infiniband device
+ * @v local_mad Method for issuing local MADs
+ * @v mad Management datagram to fill in
+ * @ret rc Return status code
+ */
+static int ib_smc_get_port_info ( struct ib_device *ibdev,
+ ib_local_mad_t local_mad,
+ union ib_mad *mad ) {
+ int rc;
+
+ /* Issue MAD */
+ if ( ( rc = ib_smc_mad ( ibdev, htons ( IB_SMP_ATTR_PORT_INFO ),
+ htonl ( ibdev->port ), local_mad, mad )) !=0){
+ DBGC ( ibdev, "IBDEV %p could not get port info: %s\n",
+ ibdev, strerror ( rc ) );
+ return rc;
+ }
+ return 0;
+}
+
+/**
+ * Get GUID information
+ *
+ * @v ibdev Infiniband device
+ * @v local_mad Method for issuing local MADs
+ * @v mad Management datagram to fill in
+ * @ret rc Return status code
+ */
+static int ib_smc_get_guid_info ( struct ib_device *ibdev,
+ ib_local_mad_t local_mad,
+ union ib_mad *mad ) {
+ int rc;
+
+ /* Issue MAD */
+ if ( ( rc = ib_smc_mad ( ibdev, htons ( IB_SMP_ATTR_GUID_INFO ), 0,
+ local_mad, mad ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p could not get GUID info: %s\n",
+ ibdev, strerror ( rc ) );
+ return rc;
+ }
+ return 0;
+}
+
+/**
+ * Get partition key table
+ *
+ * @v ibdev Infiniband device
+ * @v local_mad Method for issuing local MADs
+ * @v mad Management datagram to fill in
+ * @ret rc Return status code
+ */
+static int ib_smc_get_pkey_table ( struct ib_device *ibdev,
+ ib_local_mad_t local_mad,
+ union ib_mad *mad ) {
+ int rc;
+
+ /* Issue MAD */
+ if ( ( rc = ib_smc_mad ( ibdev, htons ( IB_SMP_ATTR_PKEY_TABLE ), 0,
+ local_mad, mad ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p could not get pkey table: %s\n",
+ ibdev, strerror ( rc ) );
+ return rc;
+ }
+ return 0;
+}
+
+/**
+ * Get Infiniband parameters using SMC
+ *
+ * @v ibdev Infiniband device
+ * @v local_mad Method for issuing local MADs
+ * @ret rc Return status code
+ */
+static int ib_smc_get ( struct ib_device *ibdev, ib_local_mad_t local_mad ) {
+ union ib_mad mad;
+ struct ib_node_info *node_info = &mad.smp.smp_data.node_info;
+ struct ib_port_info *port_info = &mad.smp.smp_data.port_info;
+ struct ib_guid_info *guid_info = &mad.smp.smp_data.guid_info;
+ struct ib_pkey_table *pkey_table = &mad.smp.smp_data.pkey_table;
+ int rc;
+
+ /* Node info gives us the node GUID */
+ if ( ( rc = ib_smc_get_node_info ( ibdev, local_mad, &mad ) ) != 0 )
+ return rc;
+ memcpy ( &ibdev->node_guid, &node_info->node_guid,
+ sizeof ( ibdev->node_guid ) );
+
+ /* Port info gives us the link state, the first half of the
+ * port GID and the SM LID.
+ */
+ if ( ( rc = ib_smc_get_port_info ( ibdev, local_mad, &mad ) ) != 0 )
+ return rc;
+ memcpy ( &ibdev->gid.s.prefix, port_info->gid_prefix,
+ sizeof ( ibdev->gid.s.prefix ) );
+ ibdev->lid = ntohs ( port_info->lid );
+ ibdev->sm_lid = ntohs ( port_info->mastersm_lid );
+ ibdev->link_width_enabled = port_info->link_width_enabled;
+ ibdev->link_width_supported = port_info->link_width_supported;
+ ibdev->link_width_active = port_info->link_width_active;
+ ibdev->link_speed_supported =
+ ( port_info->link_speed_supported__port_state >> 4 );
+ ibdev->port_state =
+ ( port_info->link_speed_supported__port_state & 0xf );
+ ibdev->link_speed_active =
+ ( port_info->link_speed_active__link_speed_enabled >> 4 );
+ ibdev->link_speed_enabled =
+ ( port_info->link_speed_active__link_speed_enabled & 0xf );
+ ibdev->sm_sl = ( port_info->neighbour_mtu__mastersm_sl & 0xf );
+
+ /* GUID info gives us the second half of the port GID */
+ if ( ( rc = ib_smc_get_guid_info ( ibdev, local_mad, &mad ) ) != 0 )
+ return rc;
+ memcpy ( &ibdev->gid.s.guid, guid_info->guid[0],
+ sizeof ( ibdev->gid.s.guid ) );
+
+ /* Get partition key */
+ if ( ( rc = ib_smc_get_pkey_table ( ibdev, local_mad, &mad ) ) != 0 )
+ return rc;
+ ibdev->pkey = ntohs ( pkey_table->pkey[0] );
+
+ DBGC ( ibdev, "IBDEV %p port GID is " IB_GID_FMT "\n",
+ ibdev, IB_GID_ARGS ( &ibdev->gid ) );
+
+ return 0;
+}
+
+/**
+ * Initialise Infiniband parameters using SMC
+ *
+ * @v ibdev Infiniband device
+ * @v local_mad Method for issuing local MADs
+ * @ret rc Return status code
+ */
+int ib_smc_init ( struct ib_device *ibdev, ib_local_mad_t local_mad ) {
+ int rc;
+
+ /* Get MAD parameters */
+ if ( ( rc = ib_smc_get ( ibdev, local_mad ) ) != 0 )
+ return rc;
+
+ return 0;
+}
+
+/**
+ * Update Infiniband parameters using SMC
+ *
+ * @v ibdev Infiniband device
+ * @v local_mad Method for issuing local MADs
+ * @ret rc Return status code
+ */
+int ib_smc_update ( struct ib_device *ibdev, ib_local_mad_t local_mad ) {
+ int rc;
+
+ /* Get MAD parameters */
+ if ( ( rc = ib_smc_get ( ibdev, local_mad ) ) != 0 )
+ return rc;
+
+ /* Notify Infiniband core of potential link state change */
+ ib_link_state_changed ( ibdev );
+
+ return 0;
+}
diff --git a/qemu/roms/ipxe/src/net/infiniband/ib_srp.c b/qemu/roms/ipxe/src/net/infiniband/ib_srp.c
new file mode 100644
index 000000000..7b2b2b4ea
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/infiniband/ib_srp.c
@@ -0,0 +1,581 @@
+/*
+ * Copyright (C) 2009 Fen Systems Ltd <mbrown@fensystems.co.uk>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+FILE_LICENCE ( BSD2 );
+
+#include <stdlib.h>
+#include <errno.h>
+#include <ipxe/interface.h>
+#include <ipxe/uri.h>
+#include <ipxe/open.h>
+#include <ipxe/base16.h>
+#include <ipxe/acpi.h>
+#include <ipxe/srp.h>
+#include <ipxe/infiniband.h>
+#include <ipxe/ib_cmrc.h>
+#include <ipxe/ib_srp.h>
+
+/**
+ * @file
+ *
+ * SCSI RDMA Protocol over Infiniband
+ *
+ */
+
+/* Disambiguate the various possible EINVALs */
+#define EINVAL_BYTE_STRING_LEN __einfo_error ( EINFO_EINVAL_BYTE_STRING_LEN )
+#define EINFO_EINVAL_BYTE_STRING_LEN __einfo_uniqify \
+ ( EINFO_EINVAL, 0x01, "Invalid byte string length" )
+#define EINVAL_INTEGER __einfo_error ( EINFO_EINVAL_INTEGER )
+#define EINFO_EINVAL_INTEGER __einfo_uniqify \
+ ( EINFO_EINVAL, 0x03, "Invalid integer" )
+#define EINVAL_RP_TOO_SHORT __einfo_error ( EINFO_EINVAL_RP_TOO_SHORT )
+#define EINFO_EINVAL_RP_TOO_SHORT __einfo_uniqify \
+ ( EINFO_EINVAL, 0x04, "Root path too short" )
+
+/******************************************************************************
+ *
+ * IB SRP devices
+ *
+ ******************************************************************************
+ */
+
+/** An Infiniband SRP device */
+struct ib_srp_device {
+ /** Reference count */
+ struct refcnt refcnt;
+
+ /** SRP transport interface */
+ struct interface srp;
+ /** CMRC interface */
+ struct interface cmrc;
+
+ /** Infiniband device */
+ struct ib_device *ibdev;
+
+ /** Destination GID (for boot firmware table) */
+ union ib_gid dgid;
+ /** Service ID (for boot firmware table) */
+ union ib_guid service_id;
+};
+
+/**
+ * Free IB SRP device
+ *
+ * @v refcnt Reference count
+ */
+static void ib_srp_free ( struct refcnt *refcnt ) {
+ struct ib_srp_device *ib_srp =
+ container_of ( refcnt, struct ib_srp_device, refcnt );
+
+ ibdev_put ( ib_srp->ibdev );
+ free ( ib_srp );
+}
+
+/**
+ * Close IB SRP device
+ *
+ * @v ib_srp IB SRP device
+ * @v rc Reason for close
+ */
+static void ib_srp_close ( struct ib_srp_device *ib_srp, int rc ) {
+
+ /* Shut down interfaces */
+ intf_shutdown ( &ib_srp->cmrc, rc );
+ intf_shutdown ( &ib_srp->srp, rc );
+}
+
+/**
+ * Describe IB SRP device in an ACPI table
+ *
+ * @v srpdev SRP device
+ * @v acpi ACPI table
+ * @v len Length of ACPI table
+ * @ret rc Return status code
+ */
+static int ib_srp_describe ( struct ib_srp_device *ib_srp,
+ struct acpi_description_header *acpi,
+ size_t len ) {
+ struct ib_device *ibdev = ib_srp->ibdev;
+ struct sbft_table *sbft =
+ container_of ( acpi, struct sbft_table, acpi );
+ struct sbft_ib_subtable *ib_sbft;
+ size_t used;
+
+ /* Sanity check */
+ if ( acpi->signature != SBFT_SIG )
+ return -EINVAL;
+
+ /* Append IB subtable to existing table */
+ used = le32_to_cpu ( sbft->acpi.length );
+ sbft->ib_offset = cpu_to_le16 ( used );
+ ib_sbft = ( ( ( void * ) sbft ) + used );
+ used += sizeof ( *ib_sbft );
+ if ( used > len )
+ return -ENOBUFS;
+ sbft->acpi.length = cpu_to_le32 ( used );
+
+ /* Populate subtable */
+ memcpy ( &ib_sbft->sgid, &ibdev->gid, sizeof ( ib_sbft->sgid ) );
+ memcpy ( &ib_sbft->dgid, &ib_srp->dgid, sizeof ( ib_sbft->dgid ) );
+ memcpy ( &ib_sbft->service_id, &ib_srp->service_id,
+ sizeof ( ib_sbft->service_id ) );
+ ib_sbft->pkey = cpu_to_le16 ( ibdev->pkey );
+
+ return 0;
+}
+
+/** IB SRP CMRC interface operations */
+static struct interface_operation ib_srp_cmrc_op[] = {
+ INTF_OP ( intf_close, struct ib_srp_device *, ib_srp_close ),
+};
+
+/** IB SRP CMRC interface descriptor */
+static struct interface_descriptor ib_srp_cmrc_desc =
+ INTF_DESC_PASSTHRU ( struct ib_srp_device, cmrc, ib_srp_cmrc_op, srp );
+
+/** IB SRP SRP interface operations */
+static struct interface_operation ib_srp_srp_op[] = {
+ INTF_OP ( acpi_describe, struct ib_srp_device *, ib_srp_describe ),
+ INTF_OP ( intf_close, struct ib_srp_device *, ib_srp_close ),
+};
+
+/** IB SRP SRP interface descriptor */
+static struct interface_descriptor ib_srp_srp_desc =
+ INTF_DESC_PASSTHRU ( struct ib_srp_device, srp, ib_srp_srp_op, cmrc );
+
+/**
+ * Open IB SRP device
+ *
+ * @v block Block control interface
+ * @v ibdev Infiniband device
+ * @v dgid Destination GID
+ * @v service_id Service ID
+ * @v initiator Initiator port ID
+ * @v target Target port ID
+ * @v lun SCSI LUN
+ * @ret rc Return status code
+ */
+static int ib_srp_open ( struct interface *block, struct ib_device *ibdev,
+ union ib_gid *dgid, union ib_guid *service_id,
+ union srp_port_id *initiator,
+ union srp_port_id *target, struct scsi_lun *lun ) {
+ struct ib_srp_device *ib_srp;
+ int rc;
+
+ /* Allocate and initialise structure */
+ ib_srp = zalloc ( sizeof ( *ib_srp ) );
+ if ( ! ib_srp ) {
+ rc = -ENOMEM;
+ goto err_zalloc;
+ }
+ ref_init ( &ib_srp->refcnt, ib_srp_free );
+ intf_init ( &ib_srp->srp, &ib_srp_srp_desc, &ib_srp->refcnt );
+ intf_init ( &ib_srp->cmrc, &ib_srp_cmrc_desc, &ib_srp->refcnt );
+ ib_srp->ibdev = ibdev_get ( ibdev );
+ DBGC ( ib_srp, "IBSRP %p for " IB_GID_FMT " " IB_GUID_FMT "\n",
+ ib_srp, IB_GID_ARGS ( dgid ), IB_GUID_ARGS ( service_id ) );
+
+ /* Preserve parameters required for boot firmware table */
+ memcpy ( &ib_srp->dgid, dgid, sizeof ( ib_srp->dgid ) );
+ memcpy ( &ib_srp->service_id, service_id,
+ sizeof ( ib_srp->service_id ) );
+
+ /* Open CMRC socket */
+ if ( ( rc = ib_cmrc_open ( &ib_srp->cmrc, ibdev, dgid,
+ service_id ) ) != 0 ) {
+ DBGC ( ib_srp, "IBSRP %p could not open CMRC socket: %s\n",
+ ib_srp, strerror ( rc ) );
+ goto err_cmrc_open;
+ }
+
+ /* Attach SRP device to parent interface */
+ if ( ( rc = srp_open ( block, &ib_srp->srp, initiator, target,
+ ibdev->rdma_key, lun ) ) != 0 ) {
+ DBGC ( ib_srp, "IBSRP %p could not create SRP device: %s\n",
+ ib_srp, strerror ( rc ) );
+ goto err_srp_open;
+ }
+
+ /* Mortalise self and return */
+ ref_put ( &ib_srp->refcnt );
+ return 0;
+
+ err_srp_open:
+ err_cmrc_open:
+ ib_srp_close ( ib_srp, rc );
+ ref_put ( &ib_srp->refcnt );
+ err_zalloc:
+ return rc;
+}
+
+/******************************************************************************
+ *
+ * IB SRP URIs
+ *
+ ******************************************************************************
+ */
+
+/** IB SRP parse flags */
+enum ib_srp_parse_flags {
+ IB_SRP_PARSE_REQUIRED = 0x0000,
+ IB_SRP_PARSE_OPTIONAL = 0x8000,
+ IB_SRP_PARSE_FLAG_MASK = 0xf000,
+};
+
+/** IB SRP root path parameters */
+struct ib_srp_root_path {
+ /** Source GID */
+ union ib_gid sgid;
+ /** Initiator port ID */
+ union ib_srp_initiator_port_id initiator;
+ /** Destination GID */
+ union ib_gid dgid;
+ /** Partition key */
+ uint16_t pkey;
+ /** Service ID */
+ union ib_guid service_id;
+ /** SCSI LUN */
+ struct scsi_lun lun;
+ /** Target port ID */
+ union ib_srp_target_port_id target;
+};
+
+/**
+ * Parse IB SRP root path byte-string value
+ *
+ * @v rp_comp Root path component string
+ * @v default_value Default value to use if component string is empty
+ * @ret value Value
+ */
+static int ib_srp_parse_byte_string ( const char *rp_comp, uint8_t *bytes,
+ unsigned int size_flags ) {
+ size_t size = ( size_flags & ~IB_SRP_PARSE_FLAG_MASK );
+ size_t rp_comp_len = strlen ( rp_comp );
+ int decoded_size;
+
+ /* Allow optional components to be empty */
+ if ( ( rp_comp_len == 0 ) &&
+ ( size_flags & IB_SRP_PARSE_OPTIONAL ) )
+ return 0;
+
+ /* Check string length */
+ if ( rp_comp_len != ( 2 * size ) )
+ return -EINVAL_BYTE_STRING_LEN;
+
+ /* Parse byte string */
+ decoded_size = base16_decode ( rp_comp, bytes );
+ if ( decoded_size < 0 )
+ return decoded_size;
+
+ return 0;
+}
+
+/**
+ * Parse IB SRP root path integer value
+ *
+ * @v rp_comp Root path component string
+ * @v default_value Default value to use if component string is empty
+ * @ret value Value
+ */
+static int ib_srp_parse_integer ( const char *rp_comp, int default_value ) {
+ int value;
+ char *end;
+
+ value = strtoul ( rp_comp, &end, 16 );
+ if ( *end )
+ return -EINVAL_INTEGER;
+
+ if ( end == rp_comp )
+ return default_value;
+
+ return value;
+}
+
+/**
+ * Parse IB SRP root path source GID
+ *
+ * @v rp_comp Root path component string
+ * @v rp IB SRP root path
+ * @ret rc Return status code
+ */
+static int ib_srp_parse_sgid ( const char *rp_comp,
+ struct ib_srp_root_path *rp ) {
+ struct ib_device *ibdev;
+
+ /* Default to the GID of the last opened Infiniband device */
+ if ( ( ibdev = last_opened_ibdev() ) != NULL )
+ memcpy ( &rp->sgid, &ibdev->gid, sizeof ( rp->sgid ) );
+
+ return ib_srp_parse_byte_string ( rp_comp, rp->sgid.bytes,
+ ( sizeof ( rp->sgid ) |
+ IB_SRP_PARSE_OPTIONAL ) );
+}
+
+/**
+ * Parse IB SRP root path initiator identifier extension
+ *
+ * @v rp_comp Root path component string
+ * @v rp IB SRP root path
+ * @ret rc Return status code
+ */
+static int ib_srp_parse_initiator_id_ext ( const char *rp_comp,
+ struct ib_srp_root_path *rp ) {
+ union ib_srp_initiator_port_id *port_id = &rp->initiator;
+
+ return ib_srp_parse_byte_string ( rp_comp, port_id->ib.id_ext.bytes,
+ ( sizeof ( port_id->ib.id_ext ) |
+ IB_SRP_PARSE_OPTIONAL ) );
+}
+
+/**
+ * Parse IB SRP root path initiator HCA GUID
+ *
+ * @v rp_comp Root path component string
+ * @v rp IB SRP root path
+ * @ret rc Return status code
+ */
+static int ib_srp_parse_initiator_hca_guid ( const char *rp_comp,
+ struct ib_srp_root_path *rp ) {
+ union ib_srp_initiator_port_id *port_id = &rp->initiator;
+
+ /* Default to the GUID portion of the source GID */
+ memcpy ( &port_id->ib.hca_guid, &rp->sgid.s.guid,
+ sizeof ( port_id->ib.hca_guid ) );
+
+ return ib_srp_parse_byte_string ( rp_comp, port_id->ib.hca_guid.bytes,
+ ( sizeof ( port_id->ib.hca_guid ) |
+ IB_SRP_PARSE_OPTIONAL ) );
+}
+
+/**
+ * Parse IB SRP root path destination GID
+ *
+ * @v rp_comp Root path component string
+ * @v rp IB SRP root path
+ * @ret rc Return status code
+ */
+static int ib_srp_parse_dgid ( const char *rp_comp,
+ struct ib_srp_root_path *rp ) {
+ return ib_srp_parse_byte_string ( rp_comp, rp->dgid.bytes,
+ ( sizeof ( rp->dgid ) |
+ IB_SRP_PARSE_REQUIRED ) );
+}
+
+/**
+ * Parse IB SRP root path partition key
+ *
+ * @v rp_comp Root path component string
+ * @v rp IB SRP root path
+ * @ret rc Return status code
+ */
+static int ib_srp_parse_pkey ( const char *rp_comp,
+ struct ib_srp_root_path *rp ) {
+ int pkey;
+
+ if ( ( pkey = ib_srp_parse_integer ( rp_comp, IB_PKEY_DEFAULT ) ) < 0 )
+ return pkey;
+ rp->pkey = pkey;
+ return 0;
+}
+
+/**
+ * Parse IB SRP root path service ID
+ *
+ * @v rp_comp Root path component string
+ * @v rp IB SRP root path
+ * @ret rc Return status code
+ */
+static int ib_srp_parse_service_id ( const char *rp_comp,
+ struct ib_srp_root_path *rp ) {
+ return ib_srp_parse_byte_string ( rp_comp, rp->service_id.bytes,
+ ( sizeof ( rp->service_id ) |
+ IB_SRP_PARSE_REQUIRED ) );
+}
+
+/**
+ * Parse IB SRP root path LUN
+ *
+ * @v rp_comp Root path component string
+ * @v rp IB SRP root path
+ * @ret rc Return status code
+ */
+static int ib_srp_parse_lun ( const char *rp_comp,
+ struct ib_srp_root_path *rp ) {
+ return scsi_parse_lun ( rp_comp, &rp->lun );
+}
+
+/**
+ * Parse IB SRP root path target identifier extension
+ *
+ * @v rp_comp Root path component string
+ * @v rp IB SRP root path
+ * @ret rc Return status code
+ */
+static int ib_srp_parse_target_id_ext ( const char *rp_comp,
+ struct ib_srp_root_path *rp ) {
+ union ib_srp_target_port_id *port_id = &rp->target;
+
+ return ib_srp_parse_byte_string ( rp_comp, port_id->ib.id_ext.bytes,
+ ( sizeof ( port_id->ib.id_ext ) |
+ IB_SRP_PARSE_REQUIRED ) );
+}
+
+/**
+ * Parse IB SRP root path target I/O controller GUID
+ *
+ * @v rp_comp Root path component string
+ * @v rp IB SRP root path
+ * @ret rc Return status code
+ */
+static int ib_srp_parse_target_ioc_guid ( const char *rp_comp,
+ struct ib_srp_root_path *rp ) {
+ union ib_srp_target_port_id *port_id = &rp->target;
+
+ return ib_srp_parse_byte_string ( rp_comp, port_id->ib.ioc_guid.bytes,
+ ( sizeof ( port_id->ib.ioc_guid ) |
+ IB_SRP_PARSE_REQUIRED ) );
+}
+
+/** IB SRP root path component parser */
+struct ib_srp_root_path_parser {
+ /**
+ * Parse IB SRP root path component
+ *
+ * @v rp_comp Root path component string
+ * @v rp IB SRP root path
+ * @ret rc Return status code
+ */
+ int ( * parse ) ( const char *rp_comp, struct ib_srp_root_path *rp );
+};
+
+/** IB SRP root path components */
+static struct ib_srp_root_path_parser ib_srp_rp_parser[] = {
+ { ib_srp_parse_sgid },
+ { ib_srp_parse_initiator_id_ext },
+ { ib_srp_parse_initiator_hca_guid },
+ { ib_srp_parse_dgid },
+ { ib_srp_parse_pkey },
+ { ib_srp_parse_service_id },
+ { ib_srp_parse_lun },
+ { ib_srp_parse_target_id_ext },
+ { ib_srp_parse_target_ioc_guid },
+};
+
+/** Number of IB SRP root path components */
+#define IB_SRP_NUM_RP_COMPONENTS \
+ ( sizeof ( ib_srp_rp_parser ) / sizeof ( ib_srp_rp_parser[0] ) )
+
+/**
+ * Parse IB SRP root path
+ *
+ * @v rp_string Root path string
+ * @v rp IB SRP root path
+ * @ret rc Return status code
+ */
+static int ib_srp_parse_root_path ( const char *rp_string,
+ struct ib_srp_root_path *rp ) {
+ struct ib_srp_root_path_parser *parser;
+ char rp_string_copy[ strlen ( rp_string ) + 1 ];
+ char *rp_comp[IB_SRP_NUM_RP_COMPONENTS];
+ char *rp_string_tmp = rp_string_copy;
+ unsigned int i = 0;
+ int rc;
+
+ /* Split root path into component parts */
+ strcpy ( rp_string_copy, rp_string );
+ while ( 1 ) {
+ rp_comp[i++] = rp_string_tmp;
+ if ( i == IB_SRP_NUM_RP_COMPONENTS )
+ break;
+ for ( ; *rp_string_tmp != ':' ; rp_string_tmp++ ) {
+ if ( ! *rp_string_tmp ) {
+ DBG ( "IBSRP root path \"%s\" too short\n",
+ rp_string );
+ return -EINVAL_RP_TOO_SHORT;
+ }
+ }
+ *(rp_string_tmp++) = '\0';
+ }
+
+ /* Parse root path components */
+ for ( i = 0 ; i < IB_SRP_NUM_RP_COMPONENTS ; i++ ) {
+ parser = &ib_srp_rp_parser[i];
+ if ( ( rc = parser->parse ( rp_comp[i], rp ) ) != 0 ) {
+ DBG ( "IBSRP could not parse \"%s\" in root path "
+ "\"%s\": %s\n", rp_comp[i], rp_string,
+ strerror ( rc ) );
+ return rc;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * Open IB SRP URI
+ *
+ * @v parent Parent interface
+ * @v uri URI
+ * @ret rc Return status code
+ */
+static int ib_srp_open_uri ( struct interface *parent, struct uri *uri ) {
+ struct ib_srp_root_path rp;
+ struct ib_device *ibdev;
+ int rc;
+
+ /* Parse URI */
+ if ( ! uri->opaque )
+ return -EINVAL;
+ memset ( &rp, 0, sizeof ( rp ) );
+ if ( ( rc = ib_srp_parse_root_path ( uri->opaque, &rp ) ) != 0 )
+ return rc;
+
+ /* Identify Infiniband device */
+ ibdev = find_ibdev ( &rp.sgid );
+ if ( ! ibdev ) {
+ DBG ( "IBSRP could not identify Infiniband device\n" );
+ return -ENODEV;
+ }
+
+ /* Open IB SRP device */
+ if ( ( rc = ib_srp_open ( parent, ibdev, &rp.dgid, &rp.service_id,
+ &rp.initiator.srp, &rp.target.srp,
+ &rp.lun ) ) != 0 )
+ return rc;
+
+ return 0;
+}
+
+/** IB SRP URI opener */
+struct uri_opener ib_srp_uri_opener __uri_opener = {
+ .scheme = "ib_srp",
+ .open = ib_srp_open_uri,
+};
diff --git a/qemu/roms/ipxe/src/net/iobpad.c b/qemu/roms/ipxe/src/net/iobpad.c
new file mode 100644
index 000000000..9cc8328e9
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/iobpad.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+/**
+ * @file
+ *
+ * I/O buffer padding
+ *
+ */
+
+#include <string.h>
+#include <ipxe/iobuf.h>
+
+/**
+ * Pad I/O buffer
+ *
+ * @v iobuf I/O buffer
+ * @v min_len Minimum length
+ *
+ * This function pads and aligns I/O buffers, for devices that
+ * aren't capable of padding in hardware, or that require specific
+ * alignment in TX buffers. The packet data will end up aligned to a
+ * multiple of @c IOB_ALIGN.
+ *
+ * @c min_len must not exceed @v IOB_ZLEN.
+ */
+void iob_pad ( struct io_buffer *iobuf, size_t min_len ) {
+ void *data;
+ size_t len;
+ size_t headroom;
+ signed int pad_len;
+
+ assert ( min_len <= IOB_ZLEN );
+
+ /* Move packet data to start of I/O buffer. This will both
+ * align the data (since I/O buffers are aligned to
+ * IOB_ALIGN) and give us sufficient space for the
+ * zero-padding
+ */
+ data = iobuf->data;
+ len = iob_len ( iobuf );
+ headroom = iob_headroom ( iobuf );
+ iob_push ( iobuf, headroom );
+ memmove ( iobuf->data, data, len );
+ iob_unput ( iobuf, headroom );
+
+ /* Pad to minimum packet length */
+ pad_len = ( min_len - iob_len ( iobuf ) );
+ if ( pad_len > 0 )
+ memset ( iob_put ( iobuf, pad_len ), 0, pad_len );
+}
diff --git a/qemu/roms/ipxe/src/net/ipv4.c b/qemu/roms/ipxe/src/net/ipv4.c
new file mode 100644
index 000000000..9c5cf2eb4
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/ipv4.c
@@ -0,0 +1,789 @@
+#include <string.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <byteswap.h>
+#include <ipxe/list.h>
+#include <ipxe/in.h>
+#include <ipxe/arp.h>
+#include <ipxe/if_ether.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/netdevice.h>
+#include <ipxe/ip.h>
+#include <ipxe/tcpip.h>
+#include <ipxe/dhcp.h>
+#include <ipxe/settings.h>
+#include <ipxe/fragment.h>
+#include <ipxe/ipstat.h>
+#include <ipxe/profile.h>
+
+/** @file
+ *
+ * IPv4 protocol
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+/* Unique IP datagram identification number (high byte) */
+static uint8_t next_ident_high = 0;
+
+/** List of IPv4 miniroutes */
+struct list_head ipv4_miniroutes = LIST_HEAD_INIT ( ipv4_miniroutes );
+
+/** IPv4 statistics */
+static struct ip_statistics ipv4_stats;
+
+/** IPv4 statistics family */
+struct ip_statistics_family
+ipv4_stats_family __ip_statistics_family ( IP_STATISTICS_IPV4 ) = {
+ .version = 4,
+ .stats = &ipv4_stats,
+};
+
+/** Transmit profiler */
+static struct profiler ipv4_tx_profiler __profiler = { .name = "ipv4.tx" };
+
+/** Receive profiler */
+static struct profiler ipv4_rx_profiler __profiler = { .name = "ipv4.rx" };
+
+/**
+ * Add IPv4 minirouting table entry
+ *
+ * @v netdev Network device
+ * @v address IPv4 address
+ * @v netmask Subnet mask
+ * @v gateway Gateway address (if any)
+ * @ret miniroute Routing table entry, or NULL
+ */
+static struct ipv4_miniroute * __malloc
+add_ipv4_miniroute ( struct net_device *netdev, struct in_addr address,
+ struct in_addr netmask, struct in_addr gateway ) {
+ struct ipv4_miniroute *miniroute;
+
+ DBGC ( netdev, "IPv4 add %s", inet_ntoa ( address ) );
+ DBGC ( netdev, "/%s ", inet_ntoa ( netmask ) );
+ if ( gateway.s_addr )
+ DBGC ( netdev, "gw %s ", inet_ntoa ( gateway ) );
+ DBGC ( netdev, "via %s\n", netdev->name );
+
+ /* Allocate and populate miniroute structure */
+ miniroute = malloc ( sizeof ( *miniroute ) );
+ if ( ! miniroute ) {
+ DBGC ( netdev, "IPv4 could not add miniroute\n" );
+ return NULL;
+ }
+
+ /* Record routing information */
+ miniroute->netdev = netdev_get ( netdev );
+ miniroute->address = address;
+ miniroute->netmask = netmask;
+ miniroute->gateway = gateway;
+
+ /* Add to end of list if we have a gateway, otherwise
+ * to start of list.
+ */
+ if ( gateway.s_addr ) {
+ list_add_tail ( &miniroute->list, &ipv4_miniroutes );
+ } else {
+ list_add ( &miniroute->list, &ipv4_miniroutes );
+ }
+
+ return miniroute;
+}
+
+/**
+ * Delete IPv4 minirouting table entry
+ *
+ * @v miniroute Routing table entry
+ */
+static void del_ipv4_miniroute ( struct ipv4_miniroute *miniroute ) {
+ struct net_device *netdev = miniroute->netdev;
+
+ DBGC ( netdev, "IPv4 del %s", inet_ntoa ( miniroute->address ) );
+ DBGC ( netdev, "/%s ", inet_ntoa ( miniroute->netmask ) );
+ if ( miniroute->gateway.s_addr )
+ DBGC ( netdev, "gw %s ", inet_ntoa ( miniroute->gateway ) );
+ DBGC ( netdev, "via %s\n", miniroute->netdev->name );
+
+ netdev_put ( miniroute->netdev );
+ list_del ( &miniroute->list );
+ free ( miniroute );
+}
+
+/**
+ * Perform IPv4 routing
+ *
+ * @v dest Final destination address
+ * @ret dest Next hop destination address
+ * @ret miniroute Routing table entry to use, or NULL if no route
+ *
+ * If the route requires use of a gateway, the next hop destination
+ * address will be overwritten with the gateway address.
+ */
+static struct ipv4_miniroute * ipv4_route ( struct in_addr *dest ) {
+ struct ipv4_miniroute *miniroute;
+ int local;
+ int has_gw;
+
+ /* Find first usable route in routing table */
+ list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) {
+ if ( ! netdev_is_open ( miniroute->netdev ) )
+ continue;
+ local = ( ( ( dest->s_addr ^ miniroute->address.s_addr )
+ & miniroute->netmask.s_addr ) == 0 );
+ has_gw = ( miniroute->gateway.s_addr );
+ if ( local || has_gw ) {
+ if ( ! local )
+ *dest = miniroute->gateway;
+ return miniroute;
+ }
+ }
+
+ return NULL;
+}
+
+/**
+ * Determine transmitting network device
+ *
+ * @v st_dest Destination network-layer address
+ * @ret netdev Transmitting network device, or NULL
+ */
+static struct net_device * ipv4_netdev ( struct sockaddr_tcpip *st_dest ) {
+ struct sockaddr_in *sin_dest = ( ( struct sockaddr_in * ) st_dest );
+ struct in_addr dest = sin_dest->sin_addr;
+ struct ipv4_miniroute *miniroute;
+
+ /* Find routing table entry */
+ miniroute = ipv4_route ( &dest );
+ if ( ! miniroute )
+ return NULL;
+
+ return miniroute->netdev;
+}
+
+/**
+ * Check if IPv4 fragment matches fragment reassembly buffer
+ *
+ * @v fragment Fragment reassembly buffer
+ * @v iobuf I/O buffer
+ * @v hdrlen Length of non-fragmentable potion of I/O buffer
+ * @ret is_fragment Fragment matches this reassembly buffer
+ */
+static int ipv4_is_fragment ( struct fragment *fragment,
+ struct io_buffer *iobuf,
+ size_t hdrlen __unused ) {
+ struct iphdr *frag_iphdr = fragment->iobuf->data;
+ struct iphdr *iphdr = iobuf->data;
+
+ return ( ( iphdr->src.s_addr == frag_iphdr->src.s_addr ) &&
+ ( iphdr->ident == frag_iphdr->ident ) );
+}
+
+/**
+ * Get IPv4 fragment offset
+ *
+ * @v iobuf I/O buffer
+ * @v hdrlen Length of non-fragmentable potion of I/O buffer
+ * @ret offset Offset
+ */
+static size_t ipv4_fragment_offset ( struct io_buffer *iobuf,
+ size_t hdrlen __unused ) {
+ struct iphdr *iphdr = iobuf->data;
+
+ return ( ( ntohs ( iphdr->frags ) & IP_MASK_OFFSET ) << 3 );
+}
+
+/**
+ * Check if more fragments exist
+ *
+ * @v iobuf I/O buffer
+ * @v hdrlen Length of non-fragmentable potion of I/O buffer
+ * @ret more_frags More fragments exist
+ */
+static int ipv4_more_fragments ( struct io_buffer *iobuf,
+ size_t hdrlen __unused ) {
+ struct iphdr *iphdr = iobuf->data;
+
+ return ( iphdr->frags & htons ( IP_MASK_MOREFRAGS ) );
+}
+
+/** IPv4 fragment reassembler */
+static struct fragment_reassembler ipv4_reassembler = {
+ .list = LIST_HEAD_INIT ( ipv4_reassembler.list ),
+ .is_fragment = ipv4_is_fragment,
+ .fragment_offset = ipv4_fragment_offset,
+ .more_fragments = ipv4_more_fragments,
+ .stats = &ipv4_stats,
+};
+
+/**
+ * Add IPv4 pseudo-header checksum to existing checksum
+ *
+ * @v iobuf I/O buffer
+ * @v csum Existing checksum
+ * @ret csum Updated checksum
+ */
+static uint16_t ipv4_pshdr_chksum ( struct io_buffer *iobuf, uint16_t csum ) {
+ struct ipv4_pseudo_header pshdr;
+ struct iphdr *iphdr = iobuf->data;
+ size_t hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 );
+
+ /* Build pseudo-header */
+ pshdr.src = iphdr->src;
+ pshdr.dest = iphdr->dest;
+ pshdr.zero_padding = 0x00;
+ pshdr.protocol = iphdr->protocol;
+ pshdr.len = htons ( iob_len ( iobuf ) - hdrlen );
+
+ /* Update the checksum value */
+ return tcpip_continue_chksum ( csum, &pshdr, sizeof ( pshdr ) );
+}
+
+/**
+ * Transmit IP packet
+ *
+ * @v iobuf I/O buffer
+ * @v tcpip Transport-layer protocol
+ * @v st_src Source network-layer address
+ * @v st_dest Destination network-layer address
+ * @v netdev Network device to use if no route found, or NULL
+ * @v trans_csum Transport-layer checksum to complete, or NULL
+ * @ret rc Status
+ *
+ * This function expects a transport-layer segment and prepends the IP header
+ */
+static int ipv4_tx ( struct io_buffer *iobuf,
+ struct tcpip_protocol *tcpip_protocol,
+ struct sockaddr_tcpip *st_src,
+ struct sockaddr_tcpip *st_dest,
+ struct net_device *netdev,
+ uint16_t *trans_csum ) {
+ struct iphdr *iphdr = iob_push ( iobuf, sizeof ( *iphdr ) );
+ struct sockaddr_in *sin_src = ( ( struct sockaddr_in * ) st_src );
+ struct sockaddr_in *sin_dest = ( ( struct sockaddr_in * ) st_dest );
+ struct ipv4_miniroute *miniroute;
+ struct in_addr next_hop;
+ struct in_addr netmask = { .s_addr = 0 };
+ uint8_t ll_dest_buf[MAX_LL_ADDR_LEN];
+ const void *ll_dest;
+ int rc;
+
+ /* Start profiling */
+ profile_start ( &ipv4_tx_profiler );
+
+ /* Update statistics */
+ ipv4_stats.out_requests++;
+
+ /* Fill up the IP header, except source address */
+ memset ( iphdr, 0, sizeof ( *iphdr ) );
+ iphdr->verhdrlen = ( IP_VER | ( sizeof ( *iphdr ) / 4 ) );
+ iphdr->service = IP_TOS;
+ iphdr->len = htons ( iob_len ( iobuf ) );
+ iphdr->ttl = IP_TTL;
+ iphdr->protocol = tcpip_protocol->tcpip_proto;
+ iphdr->dest = sin_dest->sin_addr;
+
+ /* Use routing table to identify next hop and transmitting netdev */
+ next_hop = iphdr->dest;
+ if ( sin_src )
+ iphdr->src = sin_src->sin_addr;
+ if ( ( next_hop.s_addr != INADDR_BROADCAST ) &&
+ ( ! IN_MULTICAST ( ntohl ( next_hop.s_addr ) ) ) &&
+ ( ( miniroute = ipv4_route ( &next_hop ) ) != NULL ) ) {
+ iphdr->src = miniroute->address;
+ netmask = miniroute->netmask;
+ netdev = miniroute->netdev;
+ }
+ if ( ! netdev ) {
+ DBGC ( sin_dest->sin_addr, "IPv4 has no route to %s\n",
+ inet_ntoa ( iphdr->dest ) );
+ ipv4_stats.out_no_routes++;
+ rc = -ENETUNREACH;
+ goto err;
+ }
+
+ /* (Ab)use the "ident" field to convey metadata about the
+ * network device statistics into packet traces. Useful for
+ * extracting debug information from non-debug builds.
+ */
+ iphdr->ident = htons ( ( (++next_ident_high) << 8 ) |
+ ( ( netdev->rx_stats.bad & 0xf ) << 4 ) |
+ ( ( netdev->rx_stats.good & 0xf ) << 0 ) );
+
+ /* Fix up checksums */
+ if ( trans_csum )
+ *trans_csum = ipv4_pshdr_chksum ( iobuf, *trans_csum );
+ iphdr->chksum = tcpip_chksum ( iphdr, sizeof ( *iphdr ) );
+
+ /* Print IP4 header for debugging */
+ DBGC2 ( sin_dest->sin_addr, "IPv4 TX %s->", inet_ntoa ( iphdr->src ) );
+ DBGC2 ( sin_dest->sin_addr, "%s len %d proto %d id %04x csum %04x\n",
+ inet_ntoa ( iphdr->dest ), ntohs ( iphdr->len ),
+ iphdr->protocol, ntohs ( iphdr->ident ),
+ ntohs ( iphdr->chksum ) );
+
+ /* Calculate link-layer destination address, if possible */
+ if ( ( ( next_hop.s_addr ^ INADDR_BROADCAST ) & ~netmask.s_addr ) == 0){
+ /* Broadcast address */
+ ipv4_stats.out_bcast_pkts++;
+ ll_dest = netdev->ll_broadcast;
+ } else if ( IN_MULTICAST ( ntohl ( next_hop.s_addr ) ) ) {
+ /* Multicast address */
+ ipv4_stats.out_mcast_pkts++;
+ if ( ( rc = netdev->ll_protocol->mc_hash ( AF_INET, &next_hop,
+ ll_dest_buf ) ) !=0){
+ DBGC ( sin_dest->sin_addr, "IPv4 could not hash "
+ "multicast %s: %s\n",
+ inet_ntoa ( next_hop ), strerror ( rc ) );
+ goto err;
+ }
+ ll_dest = ll_dest_buf;
+ } else {
+ /* Unicast address */
+ ll_dest = NULL;
+ }
+
+ /* Update statistics */
+ ipv4_stats.out_transmits++;
+ ipv4_stats.out_octets += iob_len ( iobuf );
+
+ /* Hand off to link layer (via ARP if applicable) */
+ if ( ll_dest ) {
+ if ( ( rc = net_tx ( iobuf, netdev, &ipv4_protocol, ll_dest,
+ netdev->ll_addr ) ) != 0 ) {
+ DBGC ( sin_dest->sin_addr, "IPv4 could not transmit "
+ "packet via %s: %s\n",
+ netdev->name, strerror ( rc ) );
+ return rc;
+ }
+ } else {
+ if ( ( rc = arp_tx ( iobuf, netdev, &ipv4_protocol, &next_hop,
+ &iphdr->src, netdev->ll_addr ) ) != 0 ) {
+ DBGC ( sin_dest->sin_addr, "IPv4 could not transmit "
+ "packet via %s: %s\n",
+ netdev->name, strerror ( rc ) );
+ return rc;
+ }
+ }
+
+ profile_stop ( &ipv4_tx_profiler );
+ return 0;
+
+ err:
+ free_iob ( iobuf );
+ return rc;
+}
+
+/**
+ * Check if network device has any IPv4 address
+ *
+ * @v netdev Network device
+ * @ret has_any_addr Network device has any IPv4 address
+ */
+int ipv4_has_any_addr ( struct net_device *netdev ) {
+ struct ipv4_miniroute *miniroute;
+
+ list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) {
+ if ( miniroute->netdev == netdev )
+ return 1;
+ }
+ return 0;
+}
+
+/**
+ * Check if network device has a specific IPv4 address
+ *
+ * @v netdev Network device
+ * @v addr IPv4 address
+ * @ret has_addr Network device has this IPv4 address
+ */
+static int ipv4_has_addr ( struct net_device *netdev, struct in_addr addr ) {
+ struct ipv4_miniroute *miniroute;
+
+ list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) {
+ if ( ( miniroute->netdev == netdev ) &&
+ ( miniroute->address.s_addr == addr.s_addr ) ) {
+ /* Found matching address */
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/**
+ * Process incoming packets
+ *
+ * @v iobuf I/O buffer
+ * @v netdev Network device
+ * @v ll_dest Link-layer destination address
+ * @v ll_source Link-layer destination source
+ * @v flags Packet flags
+ * @ret rc Return status code
+ *
+ * This function expects an IP4 network datagram. It processes the headers
+ * and sends it to the transport layer.
+ */
+static int ipv4_rx ( struct io_buffer *iobuf,
+ struct net_device *netdev,
+ const void *ll_dest __unused,
+ const void *ll_source __unused,
+ unsigned int flags ) {
+ struct iphdr *iphdr = iobuf->data;
+ size_t hdrlen;
+ size_t len;
+ union {
+ struct sockaddr_in sin;
+ struct sockaddr_tcpip st;
+ } src, dest;
+ uint16_t csum;
+ uint16_t pshdr_csum;
+ int rc;
+
+ /* Start profiling */
+ profile_start ( &ipv4_rx_profiler );
+
+ /* Update statistics */
+ ipv4_stats.in_receives++;
+ ipv4_stats.in_octets += iob_len ( iobuf );
+ if ( flags & LL_BROADCAST ) {
+ ipv4_stats.in_bcast_pkts++;
+ } else if ( flags & LL_MULTICAST ) {
+ ipv4_stats.in_mcast_pkts++;
+ }
+
+ /* Sanity check the IPv4 header */
+ if ( iob_len ( iobuf ) < sizeof ( *iphdr ) ) {
+ DBGC ( iphdr->src, "IPv4 packet too short at %zd bytes (min "
+ "%zd bytes)\n", iob_len ( iobuf ), sizeof ( *iphdr ) );
+ goto err_header;
+ }
+ if ( ( iphdr->verhdrlen & IP_MASK_VER ) != IP_VER ) {
+ DBGC ( iphdr->src, "IPv4 version %#02x not supported\n",
+ iphdr->verhdrlen );
+ goto err_header;
+ }
+ hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 );
+ if ( hdrlen < sizeof ( *iphdr ) ) {
+ DBGC ( iphdr->src, "IPv4 header too short at %zd bytes (min "
+ "%zd bytes)\n", hdrlen, sizeof ( *iphdr ) );
+ goto err_header;
+ }
+ if ( hdrlen > iob_len ( iobuf ) ) {
+ DBGC ( iphdr->src, "IPv4 header too long at %zd bytes "
+ "(packet is %zd bytes)\n", hdrlen, iob_len ( iobuf ) );
+ goto err_header;
+ }
+ if ( ( csum = tcpip_chksum ( iphdr, hdrlen ) ) != 0 ) {
+ DBGC ( iphdr->src, "IPv4 checksum incorrect (is %04x "
+ "including checksum field, should be 0000)\n", csum );
+ goto err_header;
+ }
+ len = ntohs ( iphdr->len );
+ if ( len < hdrlen ) {
+ DBGC ( iphdr->src, "IPv4 length too short at %zd bytes "
+ "(header is %zd bytes)\n", len, hdrlen );
+ goto err_header;
+ }
+ if ( len > iob_len ( iobuf ) ) {
+ DBGC ( iphdr->src, "IPv4 length too long at %zd bytes "
+ "(packet is %zd bytes)\n", len, iob_len ( iobuf ) );
+ ipv4_stats.in_truncated_pkts++;
+ goto err_other;
+ }
+
+ /* Truncate packet to correct length */
+ iob_unput ( iobuf, ( iob_len ( iobuf ) - len ) );
+
+ /* Print IPv4 header for debugging */
+ DBGC2 ( iphdr->src, "IPv4 RX %s<-", inet_ntoa ( iphdr->dest ) );
+ DBGC2 ( iphdr->src, "%s len %d proto %d id %04x csum %04x\n",
+ inet_ntoa ( iphdr->src ), ntohs ( iphdr->len ), iphdr->protocol,
+ ntohs ( iphdr->ident ), ntohs ( iphdr->chksum ) );
+
+ /* Discard unicast packets not destined for us */
+ if ( ( ! ( flags & LL_MULTICAST ) ) &&
+ ipv4_has_any_addr ( netdev ) &&
+ ( ! ipv4_has_addr ( netdev, iphdr->dest ) ) ) {
+ DBGC ( iphdr->src, "IPv4 discarding non-local unicast packet "
+ "for %s\n", inet_ntoa ( iphdr->dest ) );
+ ipv4_stats.in_addr_errors++;
+ goto err_other;
+ }
+
+ /* Perform fragment reassembly if applicable */
+ if ( iphdr->frags & htons ( IP_MASK_OFFSET | IP_MASK_MOREFRAGS ) ) {
+ /* Pass the fragment to fragment_reassemble() which returns
+ * either a fully reassembled I/O buffer or NULL.
+ */
+ iobuf = fragment_reassemble ( &ipv4_reassembler, iobuf,
+ &hdrlen );
+ if ( ! iobuf )
+ return 0;
+ iphdr = iobuf->data;
+ }
+
+ /* Construct socket addresses, calculate pseudo-header
+ * checksum, and hand off to transport layer
+ */
+ memset ( &src, 0, sizeof ( src ) );
+ src.sin.sin_family = AF_INET;
+ src.sin.sin_addr = iphdr->src;
+ memset ( &dest, 0, sizeof ( dest ) );
+ dest.sin.sin_family = AF_INET;
+ dest.sin.sin_addr = iphdr->dest;
+ pshdr_csum = ipv4_pshdr_chksum ( iobuf, TCPIP_EMPTY_CSUM );
+ iob_pull ( iobuf, hdrlen );
+ if ( ( rc = tcpip_rx ( iobuf, netdev, iphdr->protocol, &src.st,
+ &dest.st, pshdr_csum, &ipv4_stats ) ) != 0 ) {
+ DBGC ( src.sin.sin_addr, "IPv4 received packet rejected by "
+ "stack: %s\n", strerror ( rc ) );
+ return rc;
+ }
+
+ profile_stop ( &ipv4_rx_profiler );
+ return 0;
+
+ err_header:
+ ipv4_stats.in_hdr_errors++;
+ err_other:
+ free_iob ( iobuf );
+ return -EINVAL;
+}
+
+/**
+ * Check existence of IPv4 address for ARP
+ *
+ * @v netdev Network device
+ * @v net_addr Network-layer address
+ * @ret rc Return status code
+ */
+static int ipv4_arp_check ( struct net_device *netdev, const void *net_addr ) {
+ const struct in_addr *address = net_addr;
+
+ if ( ipv4_has_addr ( netdev, *address ) )
+ return 0;
+
+ return -ENOENT;
+}
+
+/**
+ * Convert IPv4 address to dotted-quad notation
+ *
+ * @v in IP address
+ * @ret string IP address in dotted-quad notation
+ */
+char * inet_ntoa ( struct in_addr in ) {
+ static char buf[16]; /* "xxx.xxx.xxx.xxx" */
+ uint8_t *bytes = ( uint8_t * ) &in;
+
+ sprintf ( buf, "%d.%d.%d.%d", bytes[0], bytes[1], bytes[2], bytes[3] );
+ return buf;
+}
+
+/**
+ * Transcribe IP address
+ *
+ * @v net_addr IP address
+ * @ret string IP address in dotted-quad notation
+ *
+ */
+static const char * ipv4_ntoa ( const void *net_addr ) {
+ return inet_ntoa ( * ( ( struct in_addr * ) net_addr ) );
+}
+
+/**
+ * Transcribe IPv4 socket address
+ *
+ * @v sa Socket address
+ * @ret string Socket address in standard notation
+ */
+static const char * ipv4_sock_ntoa ( struct sockaddr *sa ) {
+ struct sockaddr_in *sin = ( ( struct sockaddr_in * ) sa );
+
+ return inet_ntoa ( sin->sin_addr );
+}
+
+/**
+ * Parse IPv4 socket address
+ *
+ * @v string Socket address string
+ * @v sa Socket address to fill in
+ * @ret rc Return status code
+ */
+static int ipv4_sock_aton ( const char *string, struct sockaddr *sa ) {
+ struct sockaddr_in *sin = ( ( struct sockaddr_in * ) sa );
+ struct in_addr in;
+
+ if ( inet_aton ( string, &in ) ) {
+ sin->sin_addr = in;
+ return 0;
+ }
+ return -EINVAL;
+}
+
+/** IPv4 protocol */
+struct net_protocol ipv4_protocol __net_protocol = {
+ .name = "IP",
+ .net_proto = htons ( ETH_P_IP ),
+ .net_addr_len = sizeof ( struct in_addr ),
+ .rx = ipv4_rx,
+ .ntoa = ipv4_ntoa,
+};
+
+/** IPv4 TCPIP net protocol */
+struct tcpip_net_protocol ipv4_tcpip_protocol __tcpip_net_protocol = {
+ .name = "IPv4",
+ .sa_family = AF_INET,
+ .header_len = sizeof ( struct iphdr ),
+ .tx = ipv4_tx,
+ .netdev = ipv4_netdev,
+};
+
+/** IPv4 ARP protocol */
+struct arp_net_protocol ipv4_arp_protocol __arp_net_protocol = {
+ .net_protocol = &ipv4_protocol,
+ .check = ipv4_arp_check,
+};
+
+/** IPv4 socket address converter */
+struct sockaddr_converter ipv4_sockaddr_converter __sockaddr_converter = {
+ .family = AF_INET,
+ .ntoa = ipv4_sock_ntoa,
+ .aton = ipv4_sock_aton,
+};
+
+/******************************************************************************
+ *
+ * Settings
+ *
+ ******************************************************************************
+ */
+
+/**
+ * Parse IPv4 address setting value
+ *
+ * @v type Setting type
+ * @v value Formatted setting value
+ * @v buf Buffer to contain raw value
+ * @v len Length of buffer
+ * @ret len Length of raw value, or negative error
+ */
+int parse_ipv4_setting ( const struct setting_type *type __unused,
+ const char *value, void *buf, size_t len ) {
+ struct in_addr ipv4;
+
+ /* Parse IPv4 address */
+ if ( inet_aton ( value, &ipv4 ) == 0 )
+ return -EINVAL;
+
+ /* Copy to buffer */
+ if ( len > sizeof ( ipv4 ) )
+ len = sizeof ( ipv4 );
+ memcpy ( buf, &ipv4, len );
+
+ return ( sizeof ( ipv4 ) );
+}
+
+/**
+ * Format IPv4 address setting value
+ *
+ * @v type Setting type
+ * @v raw Raw setting value
+ * @v raw_len Length of raw setting value
+ * @v buf Buffer to contain formatted value
+ * @v len Length of buffer
+ * @ret len Length of formatted value, or negative error
+ */
+int format_ipv4_setting ( const struct setting_type *type __unused,
+ const void *raw, size_t raw_len, char *buf,
+ size_t len ) {
+ const struct in_addr *ipv4 = raw;
+
+ if ( raw_len < sizeof ( *ipv4 ) )
+ return -EINVAL;
+ return snprintf ( buf, len, "%s", inet_ntoa ( *ipv4 ) );
+}
+
+/** IPv4 address setting */
+const struct setting ip_setting __setting ( SETTING_IP, ip ) = {
+ .name = "ip",
+ .description = "IP address",
+ .tag = DHCP_EB_YIADDR,
+ .type = &setting_type_ipv4,
+};
+
+/** IPv4 subnet mask setting */
+const struct setting netmask_setting __setting ( SETTING_IP, netmask ) = {
+ .name = "netmask",
+ .description = "Subnet mask",
+ .tag = DHCP_SUBNET_MASK,
+ .type = &setting_type_ipv4,
+};
+
+/** Default gateway setting */
+const struct setting gateway_setting __setting ( SETTING_IP, gateway ) = {
+ .name = "gateway",
+ .description = "Default gateway",
+ .tag = DHCP_ROUTERS,
+ .type = &setting_type_ipv4,
+};
+
+/**
+ * Create IPv4 routing table based on configured settings
+ *
+ * @ret rc Return status code
+ */
+static int ipv4_create_routes ( void ) {
+ struct ipv4_miniroute *miniroute;
+ struct ipv4_miniroute *tmp;
+ struct net_device *netdev;
+ struct settings *settings;
+ struct in_addr address = { 0 };
+ struct in_addr netmask = { 0 };
+ struct in_addr gateway = { 0 };
+
+ /* Delete all existing routes */
+ list_for_each_entry_safe ( miniroute, tmp, &ipv4_miniroutes, list )
+ del_ipv4_miniroute ( miniroute );
+
+ /* Create a route for each configured network device */
+ for_each_netdev ( netdev ) {
+ settings = netdev_settings ( netdev );
+ /* Get IPv4 address */
+ address.s_addr = 0;
+ fetch_ipv4_setting ( settings, &ip_setting, &address );
+ if ( ! address.s_addr )
+ continue;
+ /* Get subnet mask */
+ fetch_ipv4_setting ( settings, &netmask_setting, &netmask );
+ /* Calculate default netmask, if necessary */
+ if ( ! netmask.s_addr ) {
+ if ( IN_CLASSA ( ntohl ( address.s_addr ) ) ) {
+ netmask.s_addr = htonl ( IN_CLASSA_NET );
+ } else if ( IN_CLASSB ( ntohl ( address.s_addr ) ) ) {
+ netmask.s_addr = htonl ( IN_CLASSB_NET );
+ } else if ( IN_CLASSC ( ntohl ( address.s_addr ) ) ) {
+ netmask.s_addr = htonl ( IN_CLASSC_NET );
+ }
+ }
+ /* Get default gateway, if present */
+ fetch_ipv4_setting ( settings, &gateway_setting, &gateway );
+ /* Configure route */
+ miniroute = add_ipv4_miniroute ( netdev, address,
+ netmask, gateway );
+ if ( ! miniroute )
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/** IPv4 settings applicator */
+struct settings_applicator ipv4_settings_applicator __settings_applicator = {
+ .apply = ipv4_create_routes,
+};
+
+/* Drag in ICMPv4 */
+REQUIRE_OBJECT ( icmpv4 );
diff --git a/qemu/roms/ipxe/src/net/ipv6.c b/qemu/roms/ipxe/src/net/ipv6.c
new file mode 100644
index 000000000..3c374168c
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/ipv6.c
@@ -0,0 +1,1111 @@
+/*
+ * Copyright (C) 2013 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <byteswap.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/tcpip.h>
+#include <ipxe/if_ether.h>
+#include <ipxe/crc32.h>
+#include <ipxe/fragment.h>
+#include <ipxe/ipstat.h>
+#include <ipxe/ndp.h>
+#include <ipxe/ipv6.h>
+
+/** @file
+ *
+ * IPv6 protocol
+ *
+ */
+
+/* Disambiguate the various error causes */
+#define EINVAL_LEN __einfo_error ( EINFO_EINVAL_LEN )
+#define EINFO_EINVAL_LEN \
+ __einfo_uniqify ( EINFO_EINVAL, 0x01, "Invalid length" )
+#define ENOTSUP_VER __einfo_error ( EINFO_ENOTSUP_VER )
+#define EINFO_ENOTSUP_VER \
+ __einfo_uniqify ( EINFO_ENOTSUP, 0x01, "Unsupported version" )
+#define ENOTSUP_HDR __einfo_error ( EINFO_ENOTSUP_HDR )
+#define EINFO_ENOTSUP_HDR \
+ __einfo_uniqify ( EINFO_ENOTSUP, 0x02, "Unsupported header type" )
+#define ENOTSUP_OPT __einfo_error ( EINFO_ENOTSUP_OPT )
+#define EINFO_ENOTSUP_OPT \
+ __einfo_uniqify ( EINFO_ENOTSUP, 0x03, "Unsupported option" )
+
+/** List of IPv6 miniroutes */
+struct list_head ipv6_miniroutes = LIST_HEAD_INIT ( ipv6_miniroutes );
+
+/** IPv6 statistics */
+static struct ip_statistics ipv6_stats;
+
+/** IPv6 statistics family */
+struct ip_statistics_family
+ipv6_statistics_family __ip_statistics_family ( IP_STATISTICS_IPV6 ) = {
+ .version = 6,
+ .stats = &ipv6_stats,
+};
+
+/**
+ * Determine debugging colour for IPv6 debug messages
+ *
+ * @v in IPv6 address
+ * @ret col Debugging colour (for DBGC())
+ */
+static uint32_t ipv6col ( struct in6_addr *in ) {
+ return crc32_le ( 0, in, sizeof ( *in ) );
+}
+
+/**
+ * Dump IPv6 routing table entry
+ *
+ * @v miniroute Routing table entry
+ */
+static inline __attribute__ (( always_inline )) void
+ipv6_dump_miniroute ( struct ipv6_miniroute *miniroute ) {
+ struct net_device *netdev = miniroute->netdev;
+
+ DBGC ( netdev, "IPv6 %s has %s %s/%d", netdev->name,
+ ( ( miniroute->flags & IPV6_HAS_ADDRESS ) ?
+ "address" : "prefix" ),
+ inet6_ntoa ( &miniroute->address ), miniroute->prefix_len );
+ if ( miniroute->flags & IPV6_HAS_ROUTER )
+ DBGC ( netdev, " router %s", inet6_ntoa ( &miniroute->router ));
+ DBGC ( netdev, "\n" );
+}
+
+/**
+ * Check if network device has a specific IPv6 address
+ *
+ * @v netdev Network device
+ * @v addr IPv6 address
+ * @ret has_addr Network device has this IPv6 address
+ */
+int ipv6_has_addr ( struct net_device *netdev, struct in6_addr *addr ) {
+ struct ipv6_miniroute *miniroute;
+
+ list_for_each_entry ( miniroute, &ipv6_miniroutes, list ) {
+ if ( ( miniroute->netdev == netdev ) &&
+ ( miniroute->flags & IPV6_HAS_ADDRESS ) &&
+ ( memcmp ( &miniroute->address, addr,
+ sizeof ( miniroute->address ) ) == 0 ) ) {
+ /* Found matching address */
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/**
+ * Check if IPv6 address is within a routing table entry's local network
+ *
+ * @v miniroute Routing table entry
+ * @v address IPv6 address
+ * @ret is_on_link Address is within this entry's local network
+ */
+static int ipv6_is_on_link ( struct ipv6_miniroute *miniroute,
+ struct in6_addr *address ) {
+ unsigned int i;
+
+ for ( i = 0 ; i < ( sizeof ( address->s6_addr32 ) /
+ sizeof ( address->s6_addr32[0] ) ) ; i++ ) {
+ if ( (( address->s6_addr32[i] ^ miniroute->address.s6_addr32[i])
+ & miniroute->prefix_mask.s6_addr32[i] ) != 0 )
+ return 0;
+ }
+ return 1;
+}
+
+/**
+ * Find IPv6 routing table entry for a given address
+ *
+ * @v netdev Network device
+ * @v address IPv6 address
+ * @ret miniroute Routing table entry, or NULL if not found
+ */
+static struct ipv6_miniroute * ipv6_miniroute ( struct net_device *netdev,
+ struct in6_addr *address ) {
+ struct ipv6_miniroute *miniroute;
+
+ list_for_each_entry ( miniroute, &ipv6_miniroutes, list ) {
+ if ( ( miniroute->netdev == netdev ) &&
+ ipv6_is_on_link ( miniroute, address ) ) {
+ return miniroute;
+ }
+ }
+ return NULL;
+}
+
+/**
+ * Add IPv6 routing table entry
+ *
+ * @v netdev Network device
+ * @v address IPv6 address (or prefix)
+ * @v prefix_len Prefix length
+ * @v flags Flags
+ * @ret miniroute Routing table entry, or NULL on failure
+ */
+static struct ipv6_miniroute * ipv6_add_miniroute ( struct net_device *netdev,
+ struct in6_addr *address,
+ unsigned int prefix_len,
+ unsigned int flags ) {
+ struct ipv6_miniroute *miniroute;
+ uint8_t *prefix_mask;
+
+ /* Create routing table entry */
+ miniroute = zalloc ( sizeof ( *miniroute ) );
+ if ( ! miniroute )
+ return NULL;
+ miniroute->netdev = netdev_get ( netdev );
+ memcpy ( &miniroute->address, address, sizeof ( miniroute->address ) );
+ miniroute->prefix_len = prefix_len;
+ assert ( prefix_len <= ( 8 * sizeof ( miniroute->prefix_mask ) ) );
+ for ( prefix_mask = miniroute->prefix_mask.s6_addr ; prefix_len >= 8 ;
+ prefix_mask++, prefix_len -= 8 ) {
+ *prefix_mask = 0xff;
+ }
+ if ( prefix_len )
+ *prefix_mask <<= ( 8 - prefix_len );
+ miniroute->flags = flags;
+ list_add ( &miniroute->list, &ipv6_miniroutes );
+ ipv6_dump_miniroute ( miniroute );
+
+ return miniroute;
+}
+
+/**
+ * Define IPv6 on-link prefix
+ *
+ * @v netdev Network device
+ * @v prefix IPv6 address prefix
+ * @v prefix_len Prefix length
+ * @v router Router address (or NULL)
+ * @ret rc Return status code
+ */
+int ipv6_set_prefix ( struct net_device *netdev, struct in6_addr *prefix,
+ unsigned int prefix_len, struct in6_addr *router ) {
+ struct ipv6_miniroute *miniroute;
+ int changed;
+
+ /* Find or create routing table entry */
+ miniroute = ipv6_miniroute ( netdev, prefix );
+ if ( ! miniroute )
+ miniroute = ipv6_add_miniroute ( netdev, prefix, prefix_len, 0);
+ if ( ! miniroute )
+ return -ENOMEM;
+
+ /* Record router and add to start or end of list as appropriate */
+ list_del ( &miniroute->list );
+ if ( router ) {
+ changed = ( ( ! ( miniroute->flags & IPV6_HAS_ROUTER ) ) ||
+ ( memcmp ( &miniroute->router, router,
+ sizeof ( miniroute->router ) ) != 0 ) );
+ miniroute->flags |= IPV6_HAS_ROUTER;
+ memcpy ( &miniroute->router, router,
+ sizeof ( miniroute->router ) );
+ list_add_tail ( &miniroute->list, &ipv6_miniroutes );
+ } else {
+ changed = ( miniroute->flags & IPV6_HAS_ROUTER );
+ miniroute->flags &= ~IPV6_HAS_ROUTER;
+ list_add ( &miniroute->list, &ipv6_miniroutes );
+ }
+ if ( changed )
+ ipv6_dump_miniroute ( miniroute );
+
+ return 0;
+}
+
+/**
+ * Add IPv6 on-link address
+ *
+ * @v netdev Network device
+ * @v address IPv6 address
+ * @ret rc Return status code
+ *
+ * An on-link prefix for the address must already exist.
+ */
+int ipv6_set_address ( struct net_device *netdev, struct in6_addr *address ) {
+ struct ipv6_miniroute *miniroute;
+ int changed;
+
+ /* Find routing table entry */
+ miniroute = ipv6_miniroute ( netdev, address );
+ if ( ! miniroute )
+ return -EADDRNOTAVAIL;
+
+ /* Record address */
+ changed = ( ( ! ( miniroute->flags & IPV6_HAS_ADDRESS ) ) ||
+ ( memcmp ( &miniroute->address, address,
+ sizeof ( miniroute->address ) ) != 0 ) );
+ memcpy ( &miniroute->address, address, sizeof ( miniroute->address ) );
+ miniroute->flags |= IPV6_HAS_ADDRESS;
+ if ( changed )
+ ipv6_dump_miniroute ( miniroute );
+
+ return 0;
+}
+
+/**
+ * Perform IPv6 routing
+ *
+ * @v scope_id Destination address scope ID (for link-local addresses)
+ * @v dest Final destination address
+ * @ret dest Next hop destination address
+ * @ret miniroute Routing table entry to use, or NULL if no route
+ */
+static struct ipv6_miniroute * ipv6_route ( unsigned int scope_id,
+ struct in6_addr **dest ) {
+ struct ipv6_miniroute *miniroute;
+
+ /* Find first usable route in routing table */
+ list_for_each_entry ( miniroute, &ipv6_miniroutes, list ) {
+
+ /* Skip closed network devices */
+ if ( ! netdev_is_open ( miniroute->netdev ) )
+ continue;
+
+ /* Skip routing table entries with no usable source address */
+ if ( ! ( miniroute->flags & IPV6_HAS_ADDRESS ) )
+ continue;
+
+ if ( IN6_IS_ADDR_LINKLOCAL ( *dest ) ||
+ IN6_IS_ADDR_MULTICAST ( *dest ) ) {
+
+ /* If destination is non-global, and the scope ID
+ * matches this network device, then use this route.
+ */
+ if ( miniroute->netdev->index == scope_id )
+ return miniroute;
+
+ } else {
+
+ /* If destination is an on-link global
+ * address, then use this route.
+ */
+ if ( ipv6_is_on_link ( miniroute, *dest ) )
+ return miniroute;
+
+ /* If destination is an off-link global
+ * address, and we have a default gateway,
+ * then use this route.
+ */
+ if ( miniroute->flags & IPV6_HAS_ROUTER ) {
+ *dest = &miniroute->router;
+ return miniroute;
+ }
+ }
+ }
+
+ return NULL;
+}
+
+/**
+ * Determine transmitting network device
+ *
+ * @v st_dest Destination network-layer address
+ * @ret netdev Transmitting network device, or NULL
+ */
+static struct net_device * ipv6_netdev ( struct sockaddr_tcpip *st_dest ) {
+ struct sockaddr_in6 *sin6_dest = ( ( struct sockaddr_in6 * ) st_dest );
+ struct in6_addr *dest = &sin6_dest->sin6_addr;
+ struct ipv6_miniroute *miniroute;
+
+ /* Find routing table entry */
+ miniroute = ipv6_route ( sin6_dest->sin6_scope_id, &dest );
+ if ( ! miniroute )
+ return NULL;
+
+ return miniroute->netdev;
+}
+
+/**
+ * Check that received options can be safely ignored
+ *
+ * @v iphdr IPv6 header
+ * @v options Options extension header
+ * @v len Maximum length of header
+ * @ret rc Return status code
+ */
+static int ipv6_check_options ( struct ipv6_header *iphdr,
+ struct ipv6_options_header *options,
+ size_t len ) {
+ struct ipv6_option *option = options->options;
+ struct ipv6_option *end = ( ( ( void * ) options ) + len );
+
+ while ( option < end ) {
+ if ( ! IPV6_CAN_IGNORE_OPT ( option->type ) ) {
+ DBGC ( ipv6col ( &iphdr->src ), "IPv6 unrecognised "
+ "option type %#02x:\n", option->type );
+ DBGC_HDA ( ipv6col ( &iphdr->src ), 0,
+ options, len );
+ return -ENOTSUP_OPT;
+ }
+ if ( option->type == IPV6_OPT_PAD1 ) {
+ option = ( ( ( void * ) option ) + 1 );
+ } else {
+ option = ( ( ( void * ) option->value ) + option->len );
+ }
+ }
+ return 0;
+}
+
+/**
+ * Check if fragment matches fragment reassembly buffer
+ *
+ * @v fragment Fragment reassembly buffer
+ * @v iobuf I/O buffer
+ * @v hdrlen Length of non-fragmentable potion of I/O buffer
+ * @ret is_fragment Fragment matches this reassembly buffer
+ */
+static int ipv6_is_fragment ( struct fragment *fragment,
+ struct io_buffer *iobuf, size_t hdrlen ) {
+ struct ipv6_header *frag_iphdr = fragment->iobuf->data;
+ struct ipv6_fragment_header *frag_fhdr =
+ ( fragment->iobuf->data + fragment->hdrlen -
+ sizeof ( *frag_fhdr ) );
+ struct ipv6_header *iphdr = iobuf->data;
+ struct ipv6_fragment_header *fhdr =
+ ( iobuf->data + hdrlen - sizeof ( *fhdr ) );
+
+ return ( ( memcmp ( &iphdr->src, &frag_iphdr->src,
+ sizeof ( iphdr->src ) ) == 0 ) &&
+ ( fhdr->ident == frag_fhdr->ident ) );
+}
+
+/**
+ * Get fragment offset
+ *
+ * @v iobuf I/O buffer
+ * @v hdrlen Length of non-fragmentable potion of I/O buffer
+ * @ret offset Offset
+ */
+static size_t ipv6_fragment_offset ( struct io_buffer *iobuf, size_t hdrlen ) {
+ struct ipv6_fragment_header *fhdr =
+ ( iobuf->data + hdrlen - sizeof ( *fhdr ) );
+
+ return ( ntohs ( fhdr->offset_more ) & IPV6_MASK_OFFSET );
+}
+
+/**
+ * Check if more fragments exist
+ *
+ * @v iobuf I/O buffer
+ * @v hdrlen Length of non-fragmentable potion of I/O buffer
+ * @ret more_frags More fragments exist
+ */
+static int ipv6_more_fragments ( struct io_buffer *iobuf, size_t hdrlen ) {
+ struct ipv6_fragment_header *fhdr =
+ ( iobuf->data + hdrlen - sizeof ( *fhdr ) );
+
+ return ( fhdr->offset_more & htons ( IPV6_MASK_MOREFRAGS ) );
+}
+
+/** Fragment reassembler */
+static struct fragment_reassembler ipv6_reassembler = {
+ .list = LIST_HEAD_INIT ( ipv6_reassembler.list ),
+ .is_fragment = ipv6_is_fragment,
+ .fragment_offset = ipv6_fragment_offset,
+ .more_fragments = ipv6_more_fragments,
+ .stats = &ipv6_stats,
+};
+
+/**
+ * Calculate IPv6 pseudo-header checksum
+ *
+ * @v iphdr IPv6 header
+ * @v len Payload length
+ * @v next_header Next header type
+ * @v csum Existing checksum
+ * @ret csum Updated checksum
+ */
+static uint16_t ipv6_pshdr_chksum ( struct ipv6_header *iphdr, size_t len,
+ int next_header, uint16_t csum ) {
+ struct ipv6_pseudo_header pshdr;
+
+ /* Build pseudo-header */
+ memcpy ( &pshdr.src, &iphdr->src, sizeof ( pshdr.src ) );
+ memcpy ( &pshdr.dest, &iphdr->dest, sizeof ( pshdr.dest ) );
+ pshdr.len = htonl ( len );
+ memset ( pshdr.zero, 0, sizeof ( pshdr.zero ) );
+ pshdr.next_header = next_header;
+
+ /* Update the checksum value */
+ return tcpip_continue_chksum ( csum, &pshdr, sizeof ( pshdr ) );
+}
+
+/**
+ * Transmit IPv6 packet
+ *
+ * @v iobuf I/O buffer
+ * @v tcpip Transport-layer protocol
+ * @v st_src Source network-layer address
+ * @v st_dest Destination network-layer address
+ * @v netdev Network device to use if no route found, or NULL
+ * @v trans_csum Transport-layer checksum to complete, or NULL
+ * @ret rc Status
+ *
+ * This function expects a transport-layer segment and prepends the
+ * IPv6 header
+ */
+static int ipv6_tx ( struct io_buffer *iobuf,
+ struct tcpip_protocol *tcpip_protocol,
+ struct sockaddr_tcpip *st_src,
+ struct sockaddr_tcpip *st_dest,
+ struct net_device *netdev,
+ uint16_t *trans_csum ) {
+ struct sockaddr_in6 *sin6_src = ( ( struct sockaddr_in6 * ) st_src );
+ struct sockaddr_in6 *sin6_dest = ( ( struct sockaddr_in6 * ) st_dest );
+ struct ipv6_miniroute *miniroute;
+ struct ipv6_header *iphdr;
+ struct in6_addr *src = NULL;
+ struct in6_addr *next_hop;
+ uint8_t ll_dest_buf[MAX_LL_ADDR_LEN];
+ const void *ll_dest;
+ size_t len;
+ int rc;
+
+ /* Update statistics */
+ ipv6_stats.out_requests++;
+
+ /* Fill up the IPv6 header, except source address */
+ len = iob_len ( iobuf );
+ iphdr = iob_push ( iobuf, sizeof ( *iphdr ) );
+ memset ( iphdr, 0, sizeof ( *iphdr ) );
+ iphdr->ver_tc_label = htonl ( IPV6_VER );
+ iphdr->len = htons ( len );
+ iphdr->next_header = tcpip_protocol->tcpip_proto;
+ iphdr->hop_limit = IPV6_HOP_LIMIT;
+ memcpy ( &iphdr->dest, &sin6_dest->sin6_addr, sizeof ( iphdr->dest ) );
+
+ /* Use routing table to identify next hop and transmitting netdev */
+ next_hop = &iphdr->dest;
+ if ( ( miniroute = ipv6_route ( sin6_dest->sin6_scope_id,
+ &next_hop ) ) != NULL ) {
+ src = &miniroute->address;
+ netdev = miniroute->netdev;
+ }
+ if ( ! netdev ) {
+ DBGC ( ipv6col ( &iphdr->dest ), "IPv6 has no route to %s\n",
+ inet6_ntoa ( &iphdr->dest ) );
+ ipv6_stats.out_no_routes++;
+ rc = -ENETUNREACH;
+ goto err;
+ }
+ if ( sin6_src && ! IN6_IS_ADDR_UNSPECIFIED ( &sin6_src->sin6_addr ) )
+ src = &sin6_src->sin6_addr;
+ if ( src )
+ memcpy ( &iphdr->src, src, sizeof ( iphdr->src ) );
+
+ /* Fix up checksums */
+ if ( trans_csum ) {
+ *trans_csum = ipv6_pshdr_chksum ( iphdr, len,
+ tcpip_protocol->tcpip_proto,
+ *trans_csum );
+ }
+
+ /* Print IPv6 header for debugging */
+ DBGC2 ( ipv6col ( &iphdr->dest ), "IPv6 TX %s->",
+ inet6_ntoa ( &iphdr->src ) );
+ DBGC2 ( ipv6col ( &iphdr->dest ), "%s len %zd next %d\n",
+ inet6_ntoa ( &iphdr->dest ), len, iphdr->next_header );
+
+ /* Calculate link-layer destination address, if possible */
+ if ( IN6_IS_ADDR_MULTICAST ( next_hop ) ) {
+ /* Multicast address */
+ ipv6_stats.out_mcast_pkts++;
+ if ( ( rc = netdev->ll_protocol->mc_hash ( AF_INET6, next_hop,
+ ll_dest_buf ) ) !=0){
+ DBGC ( ipv6col ( &iphdr->dest ), "IPv6 could not hash "
+ "multicast %s: %s\n", inet6_ntoa ( next_hop ),
+ strerror ( rc ) );
+ goto err;
+ }
+ ll_dest = ll_dest_buf;
+ } else {
+ /* Unicast address */
+ ll_dest = NULL;
+ }
+
+ /* Update statistics */
+ ipv6_stats.out_transmits++;
+ ipv6_stats.out_octets += iob_len ( iobuf );
+
+ /* Hand off to link layer (via NDP if applicable) */
+ if ( ll_dest ) {
+ if ( ( rc = net_tx ( iobuf, netdev, &ipv6_protocol, ll_dest,
+ netdev->ll_addr ) ) != 0 ) {
+ DBGC ( ipv6col ( &iphdr->dest ), "IPv6 could not "
+ "transmit packet via %s: %s\n",
+ netdev->name, strerror ( rc ) );
+ return rc;
+ }
+ } else {
+ if ( ( rc = ndp_tx ( iobuf, netdev, next_hop, &iphdr->src,
+ netdev->ll_addr ) ) != 0 ) {
+ DBGC ( ipv6col ( &iphdr->dest ), "IPv6 could not "
+ "transmit packet via %s: %s\n",
+ netdev->name, strerror ( rc ) );
+ return rc;
+ }
+ }
+
+ return 0;
+
+ err:
+ free_iob ( iobuf );
+ return rc;
+}
+
+/**
+ * Process incoming IPv6 packets
+ *
+ * @v iobuf I/O buffer
+ * @v netdev Network device
+ * @v ll_dest Link-layer destination address
+ * @v ll_source Link-layer destination source
+ * @v flags Packet flags
+ * @ret rc Return status code
+ *
+ * This function expects an IPv6 network datagram. It processes the
+ * headers and sends it to the transport layer.
+ */
+static int ipv6_rx ( struct io_buffer *iobuf, struct net_device *netdev,
+ const void *ll_dest __unused,
+ const void *ll_source __unused,
+ unsigned int flags __unused ) {
+ struct ipv6_header *iphdr = iobuf->data;
+ union ipv6_extension_header *ext;
+ union {
+ struct sockaddr_in6 sin6;
+ struct sockaddr_tcpip st;
+ } src, dest;
+ uint16_t pshdr_csum;
+ size_t len;
+ size_t hdrlen;
+ size_t extlen;
+ int this_header;
+ int next_header;
+ int rc;
+
+ /* Update statistics */
+ ipv6_stats.in_receives++;
+ ipv6_stats.in_octets += iob_len ( iobuf );
+ if ( flags & LL_BROADCAST ) {
+ ipv6_stats.in_bcast_pkts++;
+ } else if ( flags & LL_MULTICAST ) {
+ ipv6_stats.in_mcast_pkts++;
+ }
+
+ /* Sanity check the IPv6 header */
+ if ( iob_len ( iobuf ) < sizeof ( *iphdr ) ) {
+ DBGC ( ipv6col ( &iphdr->src ), "IPv6 packet too short at %zd "
+ "bytes (min %zd bytes)\n", iob_len ( iobuf ),
+ sizeof ( *iphdr ) );
+ rc = -EINVAL_LEN;
+ goto err_header;
+ }
+ if ( ( iphdr->ver_tc_label & htonl ( IPV6_MASK_VER ) ) !=
+ htonl ( IPV6_VER ) ) {
+ DBGC ( ipv6col ( &iphdr->src ), "IPv6 version %#08x not "
+ "supported\n", ntohl ( iphdr->ver_tc_label ) );
+ rc = -ENOTSUP_VER;
+ goto err_header;
+ }
+
+ /* Truncate packet to specified length */
+ len = ntohs ( iphdr->len );
+ if ( len > iob_len ( iobuf ) ) {
+ DBGC ( ipv6col ( &iphdr->src ), "IPv6 length too long at %zd "
+ "bytes (packet is %zd bytes)\n", len, iob_len ( iobuf ));
+ ipv6_stats.in_truncated_pkts++;
+ rc = -EINVAL_LEN;
+ goto err_other;
+ }
+ iob_unput ( iobuf, ( iob_len ( iobuf ) - len - sizeof ( *iphdr ) ) );
+ hdrlen = sizeof ( *iphdr );
+
+ /* Print IPv6 header for debugging */
+ DBGC2 ( ipv6col ( &iphdr->src ), "IPv6 RX %s<-",
+ inet6_ntoa ( &iphdr->dest ) );
+ DBGC2 ( ipv6col ( &iphdr->src ), "%s len %zd next %d\n",
+ inet6_ntoa ( &iphdr->src ), len, iphdr->next_header );
+
+ /* Discard unicast packets not destined for us */
+ if ( ( ! ( flags & LL_MULTICAST ) ) &&
+ ( ! ipv6_has_addr ( netdev, &iphdr->dest ) ) ) {
+ DBGC ( ipv6col ( &iphdr->src ), "IPv6 discarding non-local "
+ "unicast packet for %s\n", inet6_ntoa ( &iphdr->dest ) );
+ ipv6_stats.in_addr_errors++;
+ rc = -EPIPE;
+ goto err_other;
+ }
+
+ /* Process any extension headers */
+ next_header = iphdr->next_header;
+ while ( 1 ) {
+
+ /* Extract extension header */
+ this_header = next_header;
+ ext = ( iobuf->data + hdrlen );
+ extlen = sizeof ( ext->pad );
+ if ( iob_len ( iobuf ) < ( hdrlen + extlen ) ) {
+ DBGC ( ipv6col ( &iphdr->src ), "IPv6 too short for "
+ "extension header type %d at %zd bytes (min "
+ "%zd bytes)\n", this_header,
+ ( iob_len ( iobuf ) - hdrlen ), extlen );
+ rc = -EINVAL_LEN;
+ goto err_header;
+ }
+
+ /* Determine size of extension header (if applicable) */
+ if ( ( this_header == IPV6_HOPBYHOP ) ||
+ ( this_header == IPV6_DESTINATION ) ||
+ ( this_header == IPV6_ROUTING ) ) {
+ /* Length field is present */
+ extlen += ext->common.len;
+ } else if ( this_header == IPV6_FRAGMENT ) {
+ /* Length field is reserved and ignored (RFC2460) */
+ } else {
+ /* Not an extension header; assume rest is payload */
+ break;
+ }
+ if ( iob_len ( iobuf ) < ( hdrlen + extlen ) ) {
+ DBGC ( ipv6col ( &iphdr->src ), "IPv6 too short for "
+ "extension header type %d at %zd bytes (min "
+ "%zd bytes)\n", this_header,
+ ( iob_len ( iobuf ) - hdrlen ), extlen );
+ rc = -EINVAL_LEN;
+ goto err_header;
+ }
+ hdrlen += extlen;
+ next_header = ext->common.next_header;
+ DBGC2 ( ipv6col ( &iphdr->src ), "IPv6 RX %s<-",
+ inet6_ntoa ( &iphdr->dest ) );
+ DBGC2 ( ipv6col ( &iphdr->src ), "%s ext type %d len %zd next "
+ "%d\n", inet6_ntoa ( &iphdr->src ), this_header,
+ extlen, next_header );
+
+ /* Process this extension header */
+ if ( ( this_header == IPV6_HOPBYHOP ) ||
+ ( this_header == IPV6_DESTINATION ) ) {
+
+ /* Check that all options can be ignored */
+ if ( ( rc = ipv6_check_options ( iphdr, &ext->options,
+ extlen ) ) != 0 )
+ goto err_header;
+
+ } else if ( this_header == IPV6_FRAGMENT ) {
+
+ /* Reassemble fragments */
+ iobuf = fragment_reassemble ( &ipv6_reassembler, iobuf,
+ &hdrlen );
+ if ( ! iobuf )
+ return 0;
+ iphdr = iobuf->data;
+ }
+ }
+
+ /* Construct socket address, calculate pseudo-header checksum,
+ * and hand off to transport layer
+ */
+ memset ( &src, 0, sizeof ( src ) );
+ src.sin6.sin6_family = AF_INET6;
+ memcpy ( &src.sin6.sin6_addr, &iphdr->src,
+ sizeof ( src.sin6.sin6_addr ) );
+ src.sin6.sin6_scope_id = netdev->index;
+ memset ( &dest, 0, sizeof ( dest ) );
+ dest.sin6.sin6_family = AF_INET6;
+ memcpy ( &dest.sin6.sin6_addr, &iphdr->dest,
+ sizeof ( dest.sin6.sin6_addr ) );
+ dest.sin6.sin6_scope_id = netdev->index;
+ iob_pull ( iobuf, hdrlen );
+ pshdr_csum = ipv6_pshdr_chksum ( iphdr, iob_len ( iobuf ),
+ next_header, TCPIP_EMPTY_CSUM );
+ if ( ( rc = tcpip_rx ( iobuf, netdev, next_header, &src.st, &dest.st,
+ pshdr_csum, &ipv6_stats ) ) != 0 ) {
+ DBGC ( ipv6col ( &src.sin6.sin6_addr ), "IPv6 received packet "
+ "rejected by stack: %s\n", strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+
+ err_header:
+ ipv6_stats.in_hdr_errors++;
+ err_other:
+ free_iob ( iobuf );
+ return rc;
+}
+
+/**
+ * Parse IPv6 address
+ *
+ * @v string IPv6 address string
+ * @ret in IPv6 address to fill in
+ * @ret rc Return status code
+ */
+int inet6_aton ( const char *string, struct in6_addr *in ) {
+ uint16_t *word = in->s6_addr16;
+ uint16_t *end = ( word + ( sizeof ( in->s6_addr16 ) /
+ sizeof ( in->s6_addr16[0] ) ) );
+ uint16_t *pad = NULL;
+ const char *nptr = string;
+ char *endptr;
+ unsigned long value;
+ size_t pad_len;
+ size_t move_len;
+
+ /* Parse string */
+ while ( 1 ) {
+
+ /* Parse current word */
+ value = strtoul ( nptr, &endptr, 16 );
+ if ( value > 0xffff ) {
+ DBG ( "IPv6 invalid word value %#lx in \"%s\"\n",
+ value, string );
+ return -EINVAL;
+ }
+ *(word++) = htons ( value );
+
+ /* Parse separator */
+ if ( ! *endptr )
+ break;
+ if ( *endptr != ':' ) {
+ DBG ( "IPv6 invalid separator '%c' in \"%s\"\n",
+ *endptr, string );
+ return -EINVAL;
+ }
+ if ( ( endptr == nptr ) && ( nptr != string ) ) {
+ if ( pad ) {
+ DBG ( "IPv6 invalid multiple \"::\" in "
+ "\"%s\"\n", string );
+ return -EINVAL;
+ }
+ pad = word;
+ }
+ nptr = ( endptr + 1 );
+
+ /* Check for overrun */
+ if ( word == end ) {
+ DBG ( "IPv6 too many words in \"%s\"\n", string );
+ return -EINVAL;
+ }
+ }
+
+ /* Insert padding if specified */
+ if ( pad ) {
+ move_len = ( ( ( void * ) word ) - ( ( void * ) pad ) );
+ pad_len = ( ( ( void * ) end ) - ( ( void * ) word ) );
+ memmove ( ( ( ( void * ) pad ) + pad_len ), pad, move_len );
+ memset ( pad, 0, pad_len );
+ } else if ( word != end ) {
+ DBG ( "IPv6 underlength address \"%s\"\n", string );
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * Convert IPv6 address to standard notation
+ *
+ * @v in IPv6 address
+ * @ret string IPv6 address string in canonical format
+ *
+ * RFC5952 defines the canonical format for IPv6 textual representation.
+ */
+char * inet6_ntoa ( const struct in6_addr *in ) {
+ static char buf[41]; /* ":xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx" */
+ char *out = buf;
+ char *longest_start = NULL;
+ char *start = NULL;
+ int longest_len = 1;
+ int len = 0;
+ char *dest;
+ unsigned int i;
+ uint16_t value;
+
+ /* Format address, keeping track of longest run of zeros */
+ for ( i = 0 ; i < ( sizeof ( in->s6_addr16 ) /
+ sizeof ( in->s6_addr16[0] ) ) ; i++ ) {
+ value = ntohs ( in->s6_addr16[i] );
+ if ( value == 0 ) {
+ if ( len++ == 0 )
+ start = out;
+ if ( len > longest_len ) {
+ longest_start = start;
+ longest_len = len;
+ }
+ } else {
+ len = 0;
+ }
+ out += sprintf ( out, ":%x", value );
+ }
+
+ /* Abbreviate longest run of zeros, if applicable */
+ if ( longest_start ) {
+ dest = strcpy ( ( longest_start + 1 ),
+ ( longest_start + ( 2 * longest_len ) ) );
+ if ( dest[0] == '\0' )
+ dest[1] = '\0';
+ dest[0] = ':';
+ }
+ return ( ( longest_start == buf ) ? buf : ( buf + 1 ) );
+}
+
+/**
+ * Transcribe IPv6 address
+ *
+ * @v net_addr IPv6 address
+ * @ret string IPv6 address in standard notation
+ *
+ */
+static const char * ipv6_ntoa ( const void *net_addr ) {
+ return inet6_ntoa ( net_addr );
+}
+
+/**
+ * Transcribe IPv6 socket address
+ *
+ * @v sa Socket address
+ * @ret string Socket address in standard notation
+ */
+static const char * ipv6_sock_ntoa ( struct sockaddr *sa ) {
+ static char buf[ 39 /* "xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx" */ +
+ 1 /* "%" */ + NETDEV_NAME_LEN + 1 /* NUL */ ];
+ struct sockaddr_in6 *sin6 = ( ( struct sockaddr_in6 * ) sa );
+ struct in6_addr *in = &sin6->sin6_addr;
+ struct net_device *netdev;
+ const char *netdev_name;
+
+ /* Identify network device, if applicable */
+ if ( IN6_IS_ADDR_LINKLOCAL ( in ) || IN6_IS_ADDR_MULTICAST ( in ) ) {
+ netdev = find_netdev_by_index ( sin6->sin6_scope_id );
+ netdev_name = ( netdev ? netdev->name : "UNKNOWN" );
+ } else {
+ netdev_name = NULL;
+ }
+
+ /* Format socket address */
+ snprintf ( buf, sizeof ( buf ), "%s%s%s", inet6_ntoa ( in ),
+ ( netdev_name ? "%" : "" ),
+ ( netdev_name ? netdev_name : "" ) );
+ return buf;
+}
+
+/**
+ * Parse IPv6 socket address
+ *
+ * @v string Socket address string
+ * @v sa Socket address to fill in
+ * @ret rc Return status code
+ */
+static int ipv6_sock_aton ( const char *string, struct sockaddr *sa ) {
+ struct sockaddr_in6 *sin6 = ( ( struct sockaddr_in6 * ) sa );
+ struct in6_addr in;
+ struct net_device *netdev;
+ size_t len;
+ char *tmp;
+ char *in_string;
+ char *netdev_string;
+ int rc;
+
+ /* Create modifiable copy of string */
+ tmp = strdup ( string );
+ if ( ! tmp ) {
+ rc = -ENOMEM;
+ goto err_alloc;
+ }
+ in_string = tmp;
+
+ /* Strip surrounding "[...]", if present */
+ len = strlen ( in_string );
+ if ( ( in_string[0] == '[' ) && ( in_string[ len - 1 ] == ']' ) ) {
+ in_string[ len - 1 ] = '\0';
+ in_string++;
+ }
+
+ /* Split at network device name, if present */
+ netdev_string = strchr ( in_string, '%' );
+ if ( netdev_string )
+ *(netdev_string++) = '\0';
+
+ /* Parse IPv6 address portion */
+ if ( ( rc = inet6_aton ( in_string, &in ) ) != 0 )
+ goto err_inet6_aton;
+
+ /* Parse network device name, if present */
+ if ( netdev_string ) {
+ netdev = find_netdev ( netdev_string );
+ if ( ! netdev ) {
+ rc = -ENODEV;
+ goto err_find_netdev;
+ }
+ sin6->sin6_scope_id = netdev->index;
+ }
+
+ /* Copy IPv6 address portion to socket address */
+ memcpy ( &sin6->sin6_addr, &in, sizeof ( sin6->sin6_addr ) );
+
+ err_find_netdev:
+ err_inet6_aton:
+ free ( tmp );
+ err_alloc:
+ return rc;
+}
+
+/** IPv6 protocol */
+struct net_protocol ipv6_protocol __net_protocol = {
+ .name = "IPv6",
+ .net_proto = htons ( ETH_P_IPV6 ),
+ .net_addr_len = sizeof ( struct in6_addr ),
+ .rx = ipv6_rx,
+ .ntoa = ipv6_ntoa,
+};
+
+/** IPv6 TCPIP net protocol */
+struct tcpip_net_protocol ipv6_tcpip_protocol __tcpip_net_protocol = {
+ .name = "IPv6",
+ .sa_family = AF_INET6,
+ .header_len = sizeof ( struct ipv6_header ),
+ .tx = ipv6_tx,
+ .netdev = ipv6_netdev,
+};
+
+/** IPv6 socket address converter */
+struct sockaddr_converter ipv6_sockaddr_converter __sockaddr_converter = {
+ .family = AF_INET6,
+ .ntoa = ipv6_sock_ntoa,
+ .aton = ipv6_sock_aton,
+};
+
+/**
+ * Parse IPv6 address setting value
+ *
+ * @v type Setting type
+ * @v value Formatted setting value
+ * @v buf Buffer to contain raw value
+ * @v len Length of buffer
+ * @ret len Length of raw value, or negative error
+ */
+int parse_ipv6_setting ( const struct setting_type *type __unused,
+ const char *value, void *buf, size_t len ) {
+ struct in6_addr ipv6;
+ int rc;
+
+ /* Parse IPv6 address */
+ if ( ( rc = inet6_aton ( value, &ipv6 ) ) != 0 )
+ return rc;
+
+ /* Copy to buffer */
+ if ( len > sizeof ( ipv6 ) )
+ len = sizeof ( ipv6 );
+ memcpy ( buf, &ipv6, len );
+
+ return ( sizeof ( ipv6 ) );
+}
+
+/**
+ * Format IPv6 address setting value
+ *
+ * @v type Setting type
+ * @v raw Raw setting value
+ * @v raw_len Length of raw setting value
+ * @v buf Buffer to contain formatted value
+ * @v len Length of buffer
+ * @ret len Length of formatted value, or negative error
+ */
+int format_ipv6_setting ( const struct setting_type *type __unused,
+ const void *raw, size_t raw_len, char *buf,
+ size_t len ) {
+ const struct in6_addr *ipv6 = raw;
+
+ if ( raw_len < sizeof ( *ipv6 ) )
+ return -EINVAL;
+ return snprintf ( buf, len, "%s", inet6_ntoa ( ipv6 ) );
+}
+
+/**
+ * Create IPv6 network device
+ *
+ * @v netdev Network device
+ * @ret rc Return status code
+ */
+static int ipv6_probe ( struct net_device *netdev ) {
+ struct ipv6_miniroute *miniroute;
+ struct in6_addr address;
+ int prefix_len;
+ int rc;
+
+ /* Construct link-local address from EUI-64 as per RFC 2464 */
+ memset ( &address, 0, sizeof ( address ) );
+ prefix_len = ipv6_link_local ( &address, netdev );
+ if ( prefix_len < 0 ) {
+ rc = prefix_len;
+ DBGC ( netdev, "IPv6 %s could not construct link-local "
+ "address: %s\n", netdev->name, strerror ( rc ) );
+ return rc;
+ }
+
+ /* Create link-local address for this network device */
+ miniroute = ipv6_add_miniroute ( netdev, &address, prefix_len,
+ IPV6_HAS_ADDRESS );
+ if ( ! miniroute )
+ return -ENOMEM;
+
+ return 0;
+}
+
+/**
+ * Destroy IPv6 network device
+ *
+ * @v netdev Network device
+ */
+static void ipv6_remove ( struct net_device *netdev ) {
+ struct ipv6_miniroute *miniroute;
+ struct ipv6_miniroute *tmp;
+
+ /* Delete all miniroutes for this network device */
+ list_for_each_entry_safe ( miniroute, tmp, &ipv6_miniroutes, list ) {
+ if ( miniroute->netdev == netdev ) {
+ netdev_put ( miniroute->netdev );
+ list_del ( &miniroute->list );
+ free ( miniroute );
+ }
+ }
+}
+
+/** IPv6 network device driver */
+struct net_driver ipv6_driver __net_driver = {
+ .name = "IPv6",
+ .probe = ipv6_probe,
+ .remove = ipv6_remove,
+};
+
+/* Drag in ICMPv6 */
+REQUIRE_OBJECT ( icmpv6 );
+
+/* Drag in NDP */
+REQUIRE_OBJECT ( ndp );
diff --git a/qemu/roms/ipxe/src/net/ndp.c b/qemu/roms/ipxe/src/net/ndp.c
new file mode 100644
index 000000000..e62f7d5cb
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/ndp.c
@@ -0,0 +1,1010 @@
+/*
+ * Copyright (C) 2013 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <byteswap.h>
+#include <ipxe/in.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/tcpip.h>
+#include <ipxe/ipv6.h>
+#include <ipxe/icmpv6.h>
+#include <ipxe/neighbour.h>
+#include <ipxe/dhcpv6.h>
+#include <ipxe/ndp.h>
+
+/** @file
+ *
+ * IPv6 neighbour discovery protocol
+ *
+ */
+
+static int
+ipv6conf_rx_router_advertisement ( struct net_device *netdev,
+ struct ndp_router_advertisement_header *radv,
+ size_t len );
+
+/**
+ * Transmit NDP packet with link-layer address option
+ *
+ * @v netdev Network device
+ * @v sin6_src Source socket address
+ * @v sin6_dest Destination socket address
+ * @v data NDP header
+ * @v len Size of NDP header
+ * @v option_type NDP option type
+ * @ret rc Return status code
+ */
+static int ndp_tx_ll_addr ( struct net_device *netdev,
+ struct sockaddr_in6 *sin6_src,
+ struct sockaddr_in6 *sin6_dest,
+ const void *data, size_t len,
+ unsigned int option_type ) {
+ struct sockaddr_tcpip *st_src =
+ ( ( struct sockaddr_tcpip * ) sin6_src );
+ struct sockaddr_tcpip *st_dest =
+ ( ( struct sockaddr_tcpip * ) sin6_dest );
+ struct ll_protocol *ll_protocol = netdev->ll_protocol;
+ struct io_buffer *iobuf;
+ struct ndp_ll_addr_option *ll_addr_opt;
+ union ndp_header *ndp;
+ size_t option_len;
+ int rc;
+
+ /* Allocate and populate buffer */
+ option_len = ( ( sizeof ( *ll_addr_opt ) +
+ ll_protocol->ll_addr_len + NDP_OPTION_BLKSZ - 1 ) &
+ ~( NDP_OPTION_BLKSZ - 1 ) );
+ iobuf = alloc_iob ( MAX_LL_NET_HEADER_LEN + len + option_len );
+ if ( ! iobuf )
+ return -ENOMEM;
+ iob_reserve ( iobuf, MAX_LL_NET_HEADER_LEN );
+ memcpy ( iob_put ( iobuf, len ), data, len );
+ ll_addr_opt = iob_put ( iobuf, option_len );
+ ll_addr_opt->header.type = option_type;
+ ll_addr_opt->header.blocks = ( option_len / NDP_OPTION_BLKSZ );
+ memcpy ( ll_addr_opt->ll_addr, netdev->ll_addr,
+ ll_protocol->ll_addr_len );
+ ndp = iobuf->data;
+ ndp->icmp.chksum = tcpip_chksum ( ndp, ( len + option_len ) );
+
+ /* Transmit packet */
+ if ( ( rc = tcpip_tx ( iobuf, &icmpv6_protocol, st_src, st_dest,
+ netdev, &ndp->icmp.chksum ) ) != 0 ) {
+ DBGC ( netdev, "NDP %s could not transmit packet: %s\n",
+ netdev->name, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Transmit NDP neighbour discovery request
+ *
+ * @v netdev Network device
+ * @v net_protocol Network-layer protocol
+ * @v net_dest Destination network-layer address
+ * @v net_source Source network-layer address
+ * @ret rc Return status code
+ */
+static int ndp_tx_request ( struct net_device *netdev,
+ struct net_protocol *net_protocol __unused,
+ const void *net_dest, const void *net_source ) {
+ struct sockaddr_in6 sin6_src;
+ struct sockaddr_in6 sin6_dest;
+ struct ndp_neighbour_header neigh;
+ int rc;
+
+ /* Construct source address */
+ memset ( &sin6_src, 0, sizeof ( sin6_src ) );
+ sin6_src.sin6_family = AF_INET6;
+ memcpy ( &sin6_src.sin6_addr, net_source,
+ sizeof ( sin6_src.sin6_addr ) );
+
+ /* Construct multicast destination address */
+ memset ( &sin6_dest, 0, sizeof ( sin6_dest ) );
+ sin6_dest.sin6_family = AF_INET6;
+ sin6_dest.sin6_scope_id = netdev->index;
+ ipv6_solicited_node ( &sin6_dest.sin6_addr, net_dest );
+
+ /* Construct neighbour header */
+ memset ( &neigh, 0, sizeof ( neigh ) );
+ neigh.icmp.type = ICMPV6_NEIGHBOUR_SOLICITATION;
+ memcpy ( &neigh.target, net_dest, sizeof ( neigh.target ) );
+
+ /* Transmit neighbour discovery packet */
+ if ( ( rc = ndp_tx_ll_addr ( netdev, &sin6_src, &sin6_dest, &neigh,
+ sizeof ( neigh ),
+ NDP_OPT_LL_SOURCE ) ) != 0 )
+ return rc;
+
+ return 0;
+}
+
+/** NDP neighbour discovery protocol */
+struct neighbour_discovery ndp_discovery = {
+ .name = "NDP",
+ .tx_request = ndp_tx_request,
+};
+
+/**
+ * Transmit NDP router solicitation
+ *
+ * @v netdev Network device
+ * @ret rc Return status code
+ */
+static int ndp_tx_router_solicitation ( struct net_device *netdev ) {
+ struct ndp_router_solicitation_header rsol;
+ struct sockaddr_in6 sin6_dest;
+ int rc;
+
+ /* Construct multicast destination address */
+ memset ( &sin6_dest, 0, sizeof ( sin6_dest ) );
+ sin6_dest.sin6_family = AF_INET6;
+ sin6_dest.sin6_scope_id = netdev->index;
+ ipv6_all_routers ( &sin6_dest.sin6_addr );
+
+ /* Construct router solicitation */
+ memset ( &rsol, 0, sizeof ( rsol ) );
+ rsol.icmp.type = ICMPV6_ROUTER_SOLICITATION;
+
+ /* Transmit packet */
+ if ( ( rc = ndp_tx_ll_addr ( netdev, NULL, &sin6_dest, &rsol,
+ sizeof ( rsol ), NDP_OPT_LL_SOURCE ) ) !=0)
+ return rc;
+
+ return 0;
+}
+
+/**
+ * Process NDP neighbour solicitation source link-layer address option
+ *
+ * @v netdev Network device
+ * @v sin6_src Source socket address
+ * @v ndp NDP packet
+ * @v option NDP option
+ * @v len NDP option length
+ * @ret rc Return status code
+ */
+static int
+ndp_rx_neighbour_solicitation_ll_source ( struct net_device *netdev,
+ struct sockaddr_in6 *sin6_src,
+ union ndp_header *ndp,
+ union ndp_option *option,
+ size_t len ) {
+ struct ndp_neighbour_header *neigh = &ndp->neigh;
+ struct ndp_ll_addr_option *ll_addr_opt = &option->ll_addr;
+ struct ll_protocol *ll_protocol = netdev->ll_protocol;
+ int rc;
+
+ /* Silently ignore neighbour solicitations for addresses we do
+ * not own.
+ */
+ if ( ! ipv6_has_addr ( netdev, &neigh->target ) )
+ return 0;
+
+ /* Sanity check */
+ if ( offsetof ( typeof ( *ll_addr_opt ),
+ ll_addr[ll_protocol->ll_addr_len] ) > len ) {
+ DBGC ( netdev, "NDP %s neighbour solicitation link-layer "
+ "address option too short at %zd bytes\n",
+ netdev->name, len );
+ return -EINVAL;
+ }
+
+ /* Create or update neighbour cache entry */
+ if ( ( rc = neighbour_define ( netdev, &ipv6_protocol,
+ &sin6_src->sin6_addr,
+ ll_addr_opt->ll_addr ) ) != 0 ) {
+ DBGC ( netdev, "NDP %s could not define %s => %s: %s\n",
+ netdev->name, inet6_ntoa ( &sin6_src->sin6_addr ),
+ ll_protocol->ntoa ( ll_addr_opt->ll_addr ),
+ strerror ( rc ) );
+ return rc;
+ }
+
+ /* Convert neighbour header to advertisement */
+ memset ( neigh, 0, offsetof ( typeof ( *neigh ), target ) );
+ neigh->icmp.type = ICMPV6_NEIGHBOUR_ADVERTISEMENT;
+ neigh->flags = ( NDP_NEIGHBOUR_SOLICITED | NDP_NEIGHBOUR_OVERRIDE );
+
+ /* Send neighbour advertisement */
+ if ( ( rc = ndp_tx_ll_addr ( netdev, NULL, sin6_src, neigh,
+ sizeof ( *neigh ),
+ NDP_OPT_LL_TARGET ) ) != 0 )
+ return rc;
+
+ return 0;
+}
+
+/**
+ * Process NDP neighbour advertisement target link-layer address option
+ *
+ * @v netdev Network device
+ * @v sin6_src Source socket address
+ * @v ndp NDP packet
+ * @v option NDP option
+ * @v len NDP option length
+ * @ret rc Return status code
+ */
+static int
+ndp_rx_neighbour_advertisement_ll_target ( struct net_device *netdev,
+ struct sockaddr_in6 *sin6_src
+ __unused,
+ union ndp_header *ndp,
+ union ndp_option *option,
+ size_t len ) {
+ struct ndp_neighbour_header *neigh = &ndp->neigh;
+ struct ndp_ll_addr_option *ll_addr_opt = &option->ll_addr;
+ struct ll_protocol *ll_protocol = netdev->ll_protocol;
+ int rc;
+
+ /* Sanity check */
+ if ( offsetof ( typeof ( *ll_addr_opt ),
+ ll_addr[ll_protocol->ll_addr_len] ) > len ) {
+ DBGC ( netdev, "NDP %s neighbour advertisement link-layer "
+ "address option too short at %zd bytes\n",
+ netdev->name, len );
+ return -EINVAL;
+ }
+
+ /* Update neighbour cache entry, if any */
+ if ( ( rc = neighbour_update ( netdev, &ipv6_protocol, &neigh->target,
+ ll_addr_opt->ll_addr ) ) != 0 ) {
+ DBGC ( netdev, "NDP %s could not update %s => %s: %s\n",
+ netdev->name, inet6_ntoa ( &neigh->target ),
+ ll_protocol->ntoa ( ll_addr_opt->ll_addr ),
+ strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Process NDP router advertisement source link-layer address option
+ *
+ * @v netdev Network device
+ * @v sin6_src Source socket address
+ * @v ndp NDP packet
+ * @v option NDP option
+ * @v len NDP option length
+ * @ret rc Return status code
+ */
+static int
+ndp_rx_router_advertisement_ll_source ( struct net_device *netdev,
+ struct sockaddr_in6 *sin6_src,
+ union ndp_header *ndp __unused,
+ union ndp_option *option, size_t len ) {
+ struct ndp_ll_addr_option *ll_addr_opt = &option->ll_addr;
+ struct ll_protocol *ll_protocol = netdev->ll_protocol;
+ int rc;
+
+ /* Sanity check */
+ if ( offsetof ( typeof ( *ll_addr_opt ),
+ ll_addr[ll_protocol->ll_addr_len] ) > len ) {
+ DBGC ( netdev, "NDP %s router advertisement link-layer address "
+ "option too short at %zd bytes\n", netdev->name, len );
+ return -EINVAL;
+ }
+
+ /* Define neighbour cache entry */
+ if ( ( rc = neighbour_define ( netdev, &ipv6_protocol,
+ &sin6_src->sin6_addr,
+ ll_addr_opt->ll_addr ) ) != 0 ) {
+ DBGC ( netdev, "NDP %s could not define %s => %s: %s\n",
+ netdev->name, inet6_ntoa ( &sin6_src->sin6_addr ),
+ ll_protocol->ntoa ( ll_addr_opt->ll_addr ),
+ strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Process NDP router advertisement prefix information option
+ *
+ * @v netdev Network device
+ * @v sin6_src Source socket address
+ * @v ndp NDP packet
+ * @v option NDP option
+ * @v len NDP option length
+ * @ret rc Return status code
+ */
+static int
+ndp_rx_router_advertisement_prefix ( struct net_device *netdev,
+ struct sockaddr_in6 *sin6_src,
+ union ndp_header *ndp,
+ union ndp_option *option, size_t len ) {
+ struct ndp_router_advertisement_header *radv = &ndp->radv;
+ struct ndp_prefix_information_option *prefix_opt = &option->prefix;
+ struct in6_addr *router = &sin6_src->sin6_addr;
+ struct in6_addr address;
+ int prefix_len;
+ int rc;
+
+ /* Sanity check */
+ if ( sizeof ( *prefix_opt ) > len ) {
+ DBGC ( netdev, "NDP %s router advertisement prefix option too "
+ "short at %zd bytes\n", netdev->name, len );
+ return -EINVAL;
+ }
+ DBGC ( netdev, "NDP %s found %sdefault router %s ",
+ netdev->name, ( radv->lifetime ? "" : "non-" ),
+ inet6_ntoa ( &sin6_src->sin6_addr ) );
+ DBGC ( netdev, "for %s-link %sautonomous prefix %s/%d\n",
+ ( ( prefix_opt->flags & NDP_PREFIX_ON_LINK ) ? "on" : "off" ),
+ ( ( prefix_opt->flags & NDP_PREFIX_AUTONOMOUS ) ? "" : "non-" ),
+ inet6_ntoa ( &prefix_opt->prefix ),
+ prefix_opt->prefix_len );
+
+ /* Ignore off-link prefixes */
+ if ( ! ( prefix_opt->flags & NDP_PREFIX_ON_LINK ) )
+ return 0;
+
+ /* Define prefix */
+ if ( ( rc = ipv6_set_prefix ( netdev, &prefix_opt->prefix,
+ prefix_opt->prefix_len,
+ ( radv->lifetime ?
+ router : NULL ) ) ) != 0 ) {
+ DBGC ( netdev, "NDP %s could not define prefix %s/%d: %s\n",
+ netdev->name, inet6_ntoa ( &prefix_opt->prefix ),
+ prefix_opt->prefix_len, strerror ( rc ) );
+ return rc;
+ }
+
+ /* Perform stateless address autoconfiguration, if applicable */
+ if ( prefix_opt->flags & NDP_PREFIX_AUTONOMOUS ) {
+ memcpy ( &address, &prefix_opt->prefix, sizeof ( address ) );
+ prefix_len = ipv6_eui64 ( &address, netdev );
+ if ( prefix_len < 0 ) {
+ rc = prefix_len;
+ DBGC ( netdev, "NDP %s could not construct SLAAC "
+ "address: %s\n", netdev->name, strerror ( rc ) );
+ return rc;
+ }
+ if ( prefix_len != prefix_opt->prefix_len ) {
+ DBGC ( netdev, "NDP %s incorrect SLAAC prefix length "
+ "%d (expected %d)\n", netdev->name,
+ prefix_opt->prefix_len, prefix_len );
+ return -EINVAL;
+ }
+ if ( ( rc = ipv6_set_address ( netdev, &address ) ) != 0 ) {
+ DBGC ( netdev, "NDP %s could not set address %s: %s\n",
+ netdev->name, inet6_ntoa ( &address ),
+ strerror ( rc ) );
+ return rc;
+ }
+ }
+
+ return 0;
+}
+
+/** An NDP option handler */
+struct ndp_option_handler {
+ /** ICMPv6 type */
+ uint8_t icmp_type;
+ /** Option type */
+ uint8_t option_type;
+ /**
+ * Handle received option
+ *
+ * @v netdev Network device
+ * @v sin6_src Source socket address
+ * @v ndp NDP packet
+ * @v option NDP option
+ * @ret rc Return status code
+ */
+ int ( * rx ) ( struct net_device *netdev, struct sockaddr_in6 *sin6_src,
+ union ndp_header *ndp, union ndp_option *option,
+ size_t len );
+};
+
+/** NDP option handlers */
+static struct ndp_option_handler ndp_option_handlers[] = {
+ {
+ .icmp_type = ICMPV6_NEIGHBOUR_SOLICITATION,
+ .option_type = NDP_OPT_LL_SOURCE,
+ .rx = ndp_rx_neighbour_solicitation_ll_source,
+ },
+ {
+ .icmp_type = ICMPV6_NEIGHBOUR_ADVERTISEMENT,
+ .option_type = NDP_OPT_LL_TARGET,
+ .rx = ndp_rx_neighbour_advertisement_ll_target,
+ },
+ {
+ .icmp_type = ICMPV6_ROUTER_ADVERTISEMENT,
+ .option_type = NDP_OPT_LL_SOURCE,
+ .rx = ndp_rx_router_advertisement_ll_source,
+ },
+ {
+ .icmp_type = ICMPV6_ROUTER_ADVERTISEMENT,
+ .option_type = NDP_OPT_PREFIX,
+ .rx = ndp_rx_router_advertisement_prefix,
+ },
+};
+
+/**
+ * Process received NDP option
+ *
+ * @v netdev Network device
+ * @v sin6_src Source socket address
+ * @v ndp NDP packet
+ * @v option NDP option
+ * @v len Option length
+ * @ret rc Return status code
+ */
+static int ndp_rx_option ( struct net_device *netdev,
+ struct sockaddr_in6 *sin6_src, union ndp_header *ndp,
+ union ndp_option *option, size_t len ) {
+ struct ndp_option_handler *handler;
+ unsigned int i;
+
+ /* Locate a suitable option handler, if any */
+ for ( i = 0 ; i < ( sizeof ( ndp_option_handlers ) /
+ sizeof ( ndp_option_handlers[0] ) ) ; i++ ) {
+ handler = &ndp_option_handlers[i];
+ if ( ( handler->icmp_type == ndp->icmp.type ) &&
+ ( handler->option_type == option->header.type ) ) {
+ return handler->rx ( netdev, sin6_src, ndp,
+ option, len );
+ }
+ }
+
+ /* Silently ignore unknown options as per RFC 4861 */
+ return 0;
+}
+
+/**
+ * Process received NDP packet options
+ *
+ * @v netdev Network device
+ * @v sin6_src Source socket address
+ * @v ndp NDP header
+ * @v offset Offset to NDP options
+ * @v len Length of NDP packet
+ * @ret rc Return status code
+ */
+static int ndp_rx_options ( struct net_device *netdev,
+ struct sockaddr_in6 *sin6_src,
+ union ndp_header *ndp, size_t offset, size_t len ) {
+ union ndp_option *option;
+ size_t remaining;
+ size_t option_len;
+ int rc;
+
+ /* Sanity check */
+ if ( len < offset ) {
+ DBGC ( netdev, "NDP %s packet too short at %zd bytes (min %zd "
+ "bytes)\n", netdev->name, len, offset );
+ return -EINVAL;
+ }
+
+ /* Search for option */
+ option = ( ( ( void * ) ndp ) + offset );
+ remaining = ( len - offset );
+ while ( remaining ) {
+
+ /* Sanity check */
+ if ( ( remaining < sizeof ( option->header ) ) ||
+ ( option->header.blocks == 0 ) ||
+ ( remaining < ( option->header.blocks *
+ NDP_OPTION_BLKSZ ) ) ) {
+ DBGC ( netdev, "NDP %s bad option length:\n",
+ netdev->name );
+ DBGC_HDA ( netdev, 0, option, remaining );
+ return -EINVAL;
+ }
+ option_len = ( option->header.blocks * NDP_OPTION_BLKSZ );
+
+ /* Handle option */
+ if ( ( rc = ndp_rx_option ( netdev, sin6_src, ndp, option,
+ option_len ) ) != 0 )
+ return rc;
+
+ /* Move to next option */
+ option = ( ( ( void * ) option ) + option_len );
+ remaining -= option_len;
+ }
+
+ return 0;
+}
+
+/**
+ * Process received NDP neighbour solicitation or advertisement
+ *
+ * @v iobuf I/O buffer
+ * @v netdev Network device
+ * @v sin6_src Source socket address
+ * @v sin6_dest Destination socket address
+ * @ret rc Return status code
+ */
+static int ndp_rx_neighbour ( struct io_buffer *iobuf,
+ struct net_device *netdev,
+ struct sockaddr_in6 *sin6_src,
+ struct sockaddr_in6 *sin6_dest __unused ) {
+ union ndp_header *ndp = iobuf->data;
+ struct ndp_neighbour_header *neigh = &ndp->neigh;
+ size_t len = iob_len ( iobuf );
+ int rc;
+
+ /* Process options */
+ if ( ( rc = ndp_rx_options ( netdev, sin6_src, ndp,
+ offsetof ( typeof ( *neigh ), option ),
+ len ) ) != 0 )
+ goto err_options;
+
+ err_options:
+ free_iob ( iobuf );
+ return rc;
+}
+
+/**
+ * Process received NDP router advertisement
+ *
+ * @v iobuf I/O buffer
+ * @v netdev Network device
+ * @v sin6_src Source socket address
+ * @v sin6_dest Destination socket address
+ * @ret rc Return status code
+ */
+static int
+ndp_rx_router_advertisement ( struct io_buffer *iobuf,
+ struct net_device *netdev,
+ struct sockaddr_in6 *sin6_src,
+ struct sockaddr_in6 *sin6_dest __unused ) {
+ union ndp_header *ndp = iobuf->data;
+ struct ndp_router_advertisement_header *radv = &ndp->radv;
+ size_t len = iob_len ( iobuf );
+ int rc;
+
+ /* Process options */
+ if ( ( rc = ndp_rx_options ( netdev, sin6_src, ndp,
+ offsetof ( typeof ( *radv ), option ),
+ len ) ) != 0 )
+ goto err_options;
+
+ /* Pass to IPv6 autoconfiguration */
+ if ( ( rc = ipv6conf_rx_router_advertisement ( netdev, radv,
+ len ) ) != 0 )
+ goto err_ipv6conf;
+
+ err_ipv6conf:
+ err_options:
+ free_iob ( iobuf );
+ return rc;
+}
+
+/** NDP ICMPv6 handlers */
+struct icmpv6_handler ndp_handlers[] __icmpv6_handler = {
+ {
+ .type = ICMPV6_NEIGHBOUR_SOLICITATION,
+ .rx = ndp_rx_neighbour,
+ },
+ {
+ .type = ICMPV6_NEIGHBOUR_ADVERTISEMENT,
+ .rx = ndp_rx_neighbour,
+ },
+ {
+ .type = ICMPV6_ROUTER_ADVERTISEMENT,
+ .rx = ndp_rx_router_advertisement,
+ },
+};
+
+/****************************************************************************
+ *
+ * NDP settings
+ *
+ */
+
+/** An NDP settings block */
+struct ndp_settings {
+ /** Reference counter */
+ struct refcnt refcnt;
+ /** Settings interface */
+ struct settings settings;
+ /** Length of NDP options */
+ size_t len;
+ /** NDP options */
+ union ndp_option option[0];
+};
+
+/** NDP settings scope */
+static const struct settings_scope ndp_settings_scope;
+
+/**
+ * Construct NDP tag
+ *
+ * @v type NDP option type
+ * @v offset Starting offset of data
+ * @ret tag NDP tag
+ */
+#define NDP_TAG( type, offset ) ( ( (offset) << 8 ) | (type) )
+
+/**
+ * Extract NDP tag type
+ *
+ * @v tag NDP tag
+ * @ret type NDP option type
+ */
+#define NDP_TAG_TYPE( tag ) ( (tag) & 0xff )
+
+/**
+ * Extract NDP tag offset
+ *
+ * @v tag NDP tag
+ * @ret offset Starting offset of data
+ */
+#define NDP_TAG_OFFSET( tag ) ( (tag) >> 8 )
+
+/**
+ * Check applicability of NDP setting
+ *
+ * @v settings Settings block
+ * @v setting Setting to fetch
+ * @ret applies Setting applies within this settings block
+ */
+static int ndp_applies ( struct settings *settings __unused,
+ const struct setting *setting ) {
+
+ return ( setting->scope == &ndp_settings_scope );
+}
+
+/**
+ * Fetch value of NDP setting
+ *
+ * @v settings Settings block
+ * @v setting Setting to fetch
+ * @v data Buffer to fill with setting data
+ * @v len Length of buffer
+ * @ret len Length of setting data, or negative error
+ */
+static int ndp_fetch ( struct settings *settings,
+ struct setting *setting,
+ void *data, size_t len ) {
+ struct ndp_settings *ndpset =
+ container_of ( settings, struct ndp_settings, settings );
+ struct net_device *netdev =
+ container_of ( settings->parent, struct net_device,
+ settings.settings );
+ union ndp_option *option;
+ unsigned int type = NDP_TAG_TYPE ( setting->tag );
+ unsigned int offset = NDP_TAG_OFFSET ( setting->tag );
+ size_t remaining;
+ size_t option_len;
+ size_t payload_len;
+
+ /* Scan through NDP options for requested type. We can assume
+ * that the options are well-formed, otherwise they would have
+ * been rejected prior to being stored.
+ */
+ option = ndpset->option;
+ remaining = ndpset->len;
+ while ( remaining ) {
+
+ /* Calculate option length */
+ option_len = ( option->header.blocks * NDP_OPTION_BLKSZ );
+
+ /* If this is the requested option, return it */
+ if ( option->header.type == type ) {
+
+ /* Sanity check */
+ if ( offset > option_len ) {
+ DBGC ( netdev, "NDP %s option %d too short\n",
+ netdev->name, type );
+ return -EINVAL;
+ }
+ payload_len = ( option_len - offset );
+
+ /* Copy data to output buffer */
+ if ( len > payload_len )
+ len = payload_len;
+ memcpy ( data, ( ( ( void * ) option ) + offset ), len);
+ return payload_len;
+ }
+
+ /* Move to next option */
+ option = ( ( ( void * ) option ) + option_len );
+ remaining -= option_len;
+ }
+
+ return -ENOENT;
+}
+
+/** NDP settings operations */
+static struct settings_operations ndp_settings_operations = {
+ .applies = ndp_applies,
+ .fetch = ndp_fetch,
+};
+
+/**
+ * Register NDP settings
+ *
+ * @v netdev Network device
+ * @v option NDP options
+ * @v len Length of options
+ * @ret rc Return status code
+ */
+static int ndp_register_settings ( struct net_device *netdev,
+ union ndp_option *option, size_t len ) {
+ struct settings *parent = netdev_settings ( netdev );
+ struct ndp_settings *ndpset;
+ int rc;
+
+ /* Allocate and initialise structure */
+ ndpset = zalloc ( sizeof ( *ndpset ) + len );
+ if ( ! ndpset ) {
+ rc = -ENOMEM;
+ goto err_alloc;
+ }
+ ref_init ( &ndpset->refcnt, NULL );
+ settings_init ( &ndpset->settings, &ndp_settings_operations,
+ &ndpset->refcnt, &ndp_settings_scope );
+ ndpset->len = len;
+ memcpy ( ndpset->option, option, len );
+
+ /* Register settings */
+ if ( ( rc = register_settings ( &ndpset->settings, parent,
+ NDP_SETTINGS_NAME ) ) != 0 )
+ goto err_register;
+
+ err_register:
+ ref_put ( &ndpset->refcnt );
+ err_alloc:
+ return rc;
+}
+
+/** DNS server setting */
+const struct setting ndp_dns6_setting __setting ( SETTING_IP_EXTRA, dns6 ) = {
+ .name = "dns6",
+ .description = "DNS server",
+ .tag = NDP_TAG ( NDP_OPT_RDNSS,
+ offsetof ( struct ndp_rdnss_option, addresses ) ),
+ .type = &setting_type_ipv6,
+ .scope = &ndp_settings_scope,
+};
+
+/** DNS search list setting */
+const struct setting ndp_dnssl_setting __setting ( SETTING_IP_EXTRA, dnssl ) = {
+ .name = "dnssl",
+ .description = "DNS search list",
+ .tag = NDP_TAG ( NDP_OPT_DNSSL,
+ offsetof ( struct ndp_dnssl_option, names ) ),
+ .type = &setting_type_dnssl,
+ .scope = &ndp_settings_scope,
+};
+
+/****************************************************************************
+ *
+ * IPv6 autoconfiguration
+ *
+ */
+
+/** An IPv6 configurator */
+struct ipv6conf {
+ /** Reference count */
+ struct refcnt refcnt;
+ /** List of configurators */
+ struct list_head list;
+
+ /** Job control interface */
+ struct interface job;
+ /** DHCPv6 interface */
+ struct interface dhcp;
+
+ /** Network device being configured */
+ struct net_device *netdev;
+
+ /** Retransmission timer */
+ struct retry_timer timer;
+};
+
+/** List of IPv6 configurators */
+static LIST_HEAD ( ipv6confs );
+
+/**
+ * Free IPv6 configurator
+ *
+ * @v refcnt Reference count
+ */
+static void ipv6conf_free ( struct refcnt *refcnt ) {
+ struct ipv6conf *ipv6conf =
+ container_of ( refcnt, struct ipv6conf, refcnt );
+
+ netdev_put ( ipv6conf->netdev );
+ free ( ipv6conf );
+}
+
+/**
+ * Identify IPv6 configurator by network device
+ *
+ * @v netdev Network device
+ * @ret ipv6 IPv6 configurator, or NULL
+ */
+static struct ipv6conf * ipv6conf_demux ( struct net_device *netdev ) {
+ struct ipv6conf *ipv6conf;
+
+ list_for_each_entry ( ipv6conf, &ipv6confs, list ) {
+ if ( ipv6conf->netdev == netdev )
+ return ipv6conf;
+ }
+ return NULL;
+}
+
+/**
+ * Finish IPv6 autoconfiguration
+ *
+ * @v ipv6 IPv6 configurator
+ * @v rc Reason for finishing
+ */
+static void ipv6conf_done ( struct ipv6conf *ipv6conf, int rc ) {
+
+ /* Shut down interfaces */
+ intf_shutdown ( &ipv6conf->job, rc );
+ intf_shutdown ( &ipv6conf->dhcp, rc );
+
+ /* Stop timer */
+ stop_timer ( &ipv6conf->timer );
+
+ /* Remove from list and drop list's reference */
+ list_del ( &ipv6conf->list );
+ ref_put ( &ipv6conf->refcnt );
+}
+
+/**
+ * Handle IPv6 configurator timer expiry
+ *
+ * @v timer Retry timer
+ * @v fail Failure indicator
+ */
+static void ipv6conf_expired ( struct retry_timer *timer, int fail ) {
+ struct ipv6conf *ipv6conf =
+ container_of ( timer, struct ipv6conf, timer );
+
+ /* If we have failed, terminate autoconfiguration */
+ if ( fail ) {
+ ipv6conf_done ( ipv6conf, -ETIMEDOUT );
+ return;
+ }
+
+ /* Otherwise, transmit router solicitation and restart timer */
+ start_timer ( &ipv6conf->timer );
+ ndp_tx_router_solicitation ( ipv6conf->netdev );
+}
+
+/**
+ * Handle router advertisement during IPv6 autoconfiguration
+ *
+ * @v netdev Network device
+ * @v radv Router advertisement
+ * @v len Length of router advertisement
+ * @ret rc Return status code
+ *
+ * This function assumes that the router advertisement is well-formed,
+ * since it must have already passed through option processing.
+ */
+static int
+ipv6conf_rx_router_advertisement ( struct net_device *netdev,
+ struct ndp_router_advertisement_header *radv,
+ size_t len ) {
+ struct ipv6conf *ipv6conf;
+ size_t option_len;
+ int stateful;
+ int rc;
+
+ /* Identify IPv6 configurator, if any */
+ ipv6conf = ipv6conf_demux ( netdev );
+ if ( ! ipv6conf ) {
+ /* Not an error; router advertisements are processed
+ * as a background activity even when no explicit
+ * autoconfiguration is taking place.
+ */
+ return 0;
+ }
+
+ /* If this is not the first solicited router advertisement, ignore it */
+ if ( ! timer_running ( &ipv6conf->timer ) )
+ return 0;
+
+ /* Stop router solicitation timer */
+ stop_timer ( &ipv6conf->timer );
+
+ /* Register NDP settings */
+ option_len = ( len - offsetof ( typeof ( *radv ), option ) );
+ if ( ( rc = ndp_register_settings ( netdev, radv->option,
+ option_len ) ) != 0 )
+ return rc;
+
+ /* Start DHCPv6 if required */
+ if ( radv->flags & ( NDP_ROUTER_MANAGED | NDP_ROUTER_OTHER ) ) {
+ stateful = ( radv->flags & NDP_ROUTER_MANAGED );
+ if ( ( rc = start_dhcpv6 ( &ipv6conf->dhcp, netdev,
+ stateful ) ) != 0 ) {
+ DBGC ( netdev, "NDP %s could not start state%s DHCPv6: "
+ "%s\n", netdev->name,
+ ( stateful ? "ful" : "less" ), strerror ( rc ) );
+ ipv6conf_done ( ipv6conf, rc );
+ return rc;
+ }
+ return 0;
+ }
+
+ /* Otherwise, terminate autoconfiguration */
+ ipv6conf_done ( ipv6conf, 0 );
+
+ return 0;
+}
+
+/** IPv6 configurator job interface operations */
+static struct interface_operation ipv6conf_job_op[] = {
+ INTF_OP ( intf_close, struct ipv6conf *, ipv6conf_done ),
+};
+
+/** IPv6 configurator job interface descriptor */
+static struct interface_descriptor ipv6conf_job_desc =
+ INTF_DESC ( struct ipv6conf, job, ipv6conf_job_op );
+
+/** IPv6 configurator DHCPv6 interface operations */
+static struct interface_operation ipv6conf_dhcp_op[] = {
+ INTF_OP ( intf_close, struct ipv6conf *, ipv6conf_done ),
+};
+
+/** IPv6 configurator DHCPv6 interface descriptor */
+static struct interface_descriptor ipv6conf_dhcp_desc =
+ INTF_DESC ( struct ipv6conf, dhcp, ipv6conf_dhcp_op );
+
+/**
+ * Start IPv6 autoconfiguration
+ *
+ * @v job Job control interface
+ * @v netdev Network device
+ * @ret rc Return status code
+ */
+int start_ipv6conf ( struct interface *job, struct net_device *netdev ) {
+ struct ipv6conf *ipv6conf;
+
+ /* Allocate and initialise structure */
+ ipv6conf = zalloc ( sizeof ( *ipv6conf ) );
+ if ( ! ipv6conf )
+ return -ENOMEM;
+ ref_init ( &ipv6conf->refcnt, ipv6conf_free );
+ intf_init ( &ipv6conf->job, &ipv6conf_job_desc, &ipv6conf->refcnt );
+ intf_init ( &ipv6conf->dhcp, &ipv6conf_dhcp_desc, &ipv6conf->refcnt );
+ timer_init ( &ipv6conf->timer, ipv6conf_expired, &ipv6conf->refcnt );
+ ipv6conf->netdev = netdev_get ( netdev );
+
+ /* Start timer to initiate router solicitation */
+ start_timer_nodelay ( &ipv6conf->timer );
+
+ /* Attach parent interface, transfer reference to list, and return */
+ intf_plug_plug ( &ipv6conf->job, job );
+ list_add ( &ipv6conf->list, &ipv6confs );
+ return 0;
+}
+
+/** IPv6 network device configurator */
+struct net_device_configurator ipv6_configurator __net_device_configurator = {
+ .name = "ipv6",
+ .start = start_ipv6conf,
+};
diff --git a/qemu/roms/ipxe/src/net/neighbour.c b/qemu/roms/ipxe/src/net/neighbour.c
new file mode 100644
index 000000000..e3026ce46
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/neighbour.c
@@ -0,0 +1,428 @@
+/*
+ * Copyright (C) 2013 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/retry.h>
+#include <ipxe/timer.h>
+#include <ipxe/malloc.h>
+#include <ipxe/neighbour.h>
+
+/** @file
+ *
+ * Neighbour discovery
+ *
+ * This file implements the abstract functions of neighbour discovery,
+ * independent of the underlying network protocol (e.g. ARP or NDP).
+ *
+ */
+
+/** Neighbour discovery minimum timeout */
+#define NEIGHBOUR_MIN_TIMEOUT ( TICKS_PER_SEC / 8 )
+
+/** Neighbour discovery maximum timeout */
+#define NEIGHBOUR_MAX_TIMEOUT ( TICKS_PER_SEC * 3 )
+
+/** The neighbour cache */
+struct list_head neighbours = LIST_HEAD_INIT ( neighbours );
+
+static void neighbour_expired ( struct retry_timer *timer, int over );
+
+/**
+ * Free neighbour cache entry
+ *
+ * @v refcnt Reference count
+ */
+static void neighbour_free ( struct refcnt *refcnt ) {
+ struct neighbour *neighbour =
+ container_of ( refcnt, struct neighbour, refcnt );
+
+ /* Sanity check */
+ assert ( list_empty ( &neighbour->tx_queue ) );
+
+ /* Drop reference to network device */
+ netdev_put ( neighbour->netdev );
+
+ /* Free neighbour */
+ free ( neighbour );
+}
+
+/**
+ * Create neighbour cache entry
+ *
+ * @v netdev Network device
+ * @v net_protocol Network-layer protocol
+ * @v net_dest Destination network-layer address
+ * @ret neighbour Neighbour cache entry, or NULL if allocation failed
+ */
+static struct neighbour * neighbour_create ( struct net_device *netdev,
+ struct net_protocol *net_protocol,
+ const void *net_dest ) {
+ struct neighbour *neighbour;
+
+ /* Allocate and initialise entry */
+ neighbour = zalloc ( sizeof ( *neighbour ) );
+ if ( ! neighbour )
+ return NULL;
+ ref_init ( &neighbour->refcnt, neighbour_free );
+ neighbour->netdev = netdev_get ( netdev );
+ neighbour->net_protocol = net_protocol;
+ memcpy ( neighbour->net_dest, net_dest,
+ net_protocol->net_addr_len );
+ timer_init ( &neighbour->timer, neighbour_expired, &neighbour->refcnt );
+ neighbour->timer.min_timeout = NEIGHBOUR_MIN_TIMEOUT;
+ neighbour->timer.max_timeout = NEIGHBOUR_MAX_TIMEOUT;
+ INIT_LIST_HEAD ( &neighbour->tx_queue );
+
+ /* Transfer ownership to cache */
+ list_add ( &neighbour->list, &neighbours );
+
+ DBGC ( neighbour, "NEIGHBOUR %s %s %s created\n", netdev->name,
+ net_protocol->name, net_protocol->ntoa ( net_dest ) );
+ return neighbour;
+}
+
+/**
+ * Find neighbour cache entry
+ *
+ * @v netdev Network device
+ * @v net_protocol Network-layer protocol
+ * @v net_dest Destination network-layer address
+ * @ret neighbour Neighbour cache entry, or NULL if not found
+ */
+static struct neighbour * neighbour_find ( struct net_device *netdev,
+ struct net_protocol *net_protocol,
+ const void *net_dest ) {
+ struct neighbour *neighbour;
+
+ list_for_each_entry ( neighbour, &neighbours, list ) {
+ if ( ( neighbour->netdev == netdev ) &&
+ ( neighbour->net_protocol == net_protocol ) &&
+ ( memcmp ( neighbour->net_dest, net_dest,
+ net_protocol->net_addr_len ) == 0 ) ) {
+
+ /* Move to start of cache */
+ list_del ( &neighbour->list );
+ list_add ( &neighbour->list, &neighbours );
+
+ return neighbour;
+ }
+ }
+ return NULL;
+}
+
+/**
+ * Start neighbour discovery
+ *
+ * @v neighbour Neighbour cache entry
+ * @v discovery Neighbour discovery protocol
+ * @v net_source Source network-layer address
+ */
+static void neighbour_discover ( struct neighbour *neighbour,
+ struct neighbour_discovery *discovery,
+ const void *net_source ) {
+ struct net_device *netdev = neighbour->netdev;
+ struct net_protocol *net_protocol = neighbour->net_protocol;
+
+ /* Record discovery protocol and source network-layer address */
+ neighbour->discovery = discovery;
+ memcpy ( neighbour->net_source, net_source,
+ net_protocol->net_addr_len );
+
+ /* Start timer to trigger neighbour discovery */
+ start_timer_nodelay ( &neighbour->timer );
+
+ DBGC ( neighbour, "NEIGHBOUR %s %s %s discovering via %s\n",
+ netdev->name, net_protocol->name,
+ net_protocol->ntoa ( neighbour->net_dest ),
+ neighbour->discovery->name );
+}
+
+/**
+ * Complete neighbour discovery
+ *
+ * @v neighbour Neighbour cache entry
+ * @v ll_dest Destination link-layer address
+ */
+static void neighbour_discovered ( struct neighbour *neighbour,
+ const void *ll_dest ) {
+ struct net_device *netdev = neighbour->netdev;
+ struct ll_protocol *ll_protocol = netdev->ll_protocol;
+ struct net_protocol *net_protocol = neighbour->net_protocol;
+ struct io_buffer *iobuf;
+ int rc;
+
+ /* Fill in link-layer address */
+ memcpy ( neighbour->ll_dest, ll_dest, ll_protocol->ll_addr_len );
+ DBGC ( neighbour, "NEIGHBOUR %s %s %s is %s %s\n", netdev->name,
+ net_protocol->name, net_protocol->ntoa ( neighbour->net_dest ),
+ ll_protocol->name, ll_protocol->ntoa ( neighbour->ll_dest ) );
+
+ /* Stop retransmission timer */
+ stop_timer ( &neighbour->timer );
+
+ /* Transmit any packets in queue. Take out a temporary
+ * reference on the entry to prevent it from going out of
+ * scope during the call to net_tx().
+ */
+ ref_get ( &neighbour->refcnt );
+ while ( ( iobuf = list_first_entry ( &neighbour->tx_queue,
+ struct io_buffer, list )) != NULL){
+ DBGC2 ( neighbour, "NEIGHBOUR %s %s %s transmitting deferred "
+ "packet\n", netdev->name, net_protocol->name,
+ net_protocol->ntoa ( neighbour->net_dest ) );
+ list_del ( &iobuf->list );
+ if ( ( rc = net_tx ( iobuf, netdev, net_protocol, ll_dest,
+ netdev->ll_addr ) ) != 0 ) {
+ DBGC ( neighbour, "NEIGHBOUR %s %s %s could not "
+ "transmit deferred packet: %s\n",
+ netdev->name, net_protocol->name,
+ net_protocol->ntoa ( neighbour->net_dest ),
+ strerror ( rc ) );
+ /* Ignore error and continue */
+ }
+ }
+ ref_put ( &neighbour->refcnt );
+}
+
+/**
+ * Destroy neighbour cache entry
+ *
+ * @v neighbour Neighbour cache entry
+ * @v rc Reason for destruction
+ */
+static void neighbour_destroy ( struct neighbour *neighbour, int rc ) {
+ struct net_device *netdev = neighbour->netdev;
+ struct net_protocol *net_protocol = neighbour->net_protocol;
+ struct io_buffer *iobuf;
+
+ /* Take ownership from cache */
+ list_del ( &neighbour->list );
+
+ /* Stop timer */
+ stop_timer ( &neighbour->timer );
+
+ /* Discard any outstanding I/O buffers */
+ while ( ( iobuf = list_first_entry ( &neighbour->tx_queue,
+ struct io_buffer, list )) != NULL){
+ DBGC2 ( neighbour, "NEIGHBOUR %s %s %s discarding deferred "
+ "packet: %s\n", netdev->name, net_protocol->name,
+ net_protocol->ntoa ( neighbour->net_dest ),
+ strerror ( rc ) );
+ list_del ( &iobuf->list );
+ netdev_tx_err ( neighbour->netdev, iobuf, rc );
+ }
+
+ DBGC ( neighbour, "NEIGHBOUR %s %s %s destroyed: %s\n", netdev->name,
+ net_protocol->name, net_protocol->ntoa ( neighbour->net_dest ),
+ strerror ( rc ) );
+
+ /* Drop remaining reference */
+ ref_put ( &neighbour->refcnt );
+}
+
+/**
+ * Handle neighbour timer expiry
+ *
+ * @v timer Retry timer
+ * @v fail Failure indicator
+ */
+static void neighbour_expired ( struct retry_timer *timer, int fail ) {
+ struct neighbour *neighbour =
+ container_of ( timer, struct neighbour, timer );
+ struct net_device *netdev = neighbour->netdev;
+ struct net_protocol *net_protocol = neighbour->net_protocol;
+ struct neighbour_discovery *discovery =
+ neighbour->discovery;
+ const void *net_dest = neighbour->net_dest;
+ const void *net_source = neighbour->net_source;
+ int rc;
+
+ /* If we have failed, destroy the cache entry */
+ if ( fail ) {
+ neighbour_destroy ( neighbour, -ETIMEDOUT );
+ return;
+ }
+
+ /* Restart the timer */
+ start_timer ( &neighbour->timer );
+
+ /* Transmit neighbour request */
+ if ( ( rc = discovery->tx_request ( netdev, net_protocol, net_dest,
+ net_source ) ) != 0 ) {
+ DBGC ( neighbour, "NEIGHBOUR %s %s %s could not transmit %s "
+ "request: %s\n", netdev->name, net_protocol->name,
+ net_protocol->ntoa ( neighbour->net_dest ),
+ neighbour->discovery->name, strerror ( rc ) );
+ /* Retransmit when timer expires */
+ return;
+ }
+}
+
+/**
+ * Transmit packet, determining link-layer address via neighbour discovery
+ *
+ * @v iobuf I/O buffer
+ * @v netdev Network device
+ * @v discovery Neighbour discovery protocol
+ * @v net_protocol Network-layer protocol
+ * @v net_dest Destination network-layer address
+ * @v net_source Source network-layer address
+ * @v ll_source Source link-layer address
+ * @ret rc Return status code
+ */
+int neighbour_tx ( struct io_buffer *iobuf, struct net_device *netdev,
+ struct net_protocol *net_protocol, const void *net_dest,
+ struct neighbour_discovery *discovery,
+ const void *net_source, const void *ll_source ) {
+ struct neighbour *neighbour;
+
+ /* Find or create neighbour cache entry */
+ neighbour = neighbour_find ( netdev, net_protocol, net_dest );
+ if ( ! neighbour ) {
+ neighbour = neighbour_create ( netdev, net_protocol, net_dest );
+ if ( ! neighbour )
+ return -ENOMEM;
+ neighbour_discover ( neighbour, discovery, net_source );
+ }
+
+ /* If a link-layer address is available then transmit
+ * immediately, otherwise queue for later transmission.
+ */
+ if ( neighbour_has_ll_dest ( neighbour ) ) {
+ return net_tx ( iobuf, netdev, net_protocol, neighbour->ll_dest,
+ ll_source );
+ } else {
+ DBGC2 ( neighbour, "NEIGHBOUR %s %s %s deferring packet\n",
+ netdev->name, net_protocol->name,
+ net_protocol->ntoa ( net_dest ) );
+ list_add_tail ( &iobuf->list, &neighbour->tx_queue );
+ return -EAGAIN;
+ }
+}
+
+/**
+ * Update existing neighbour cache entry
+ *
+ * @v netdev Network device
+ * @v net_protocol Network-layer protocol
+ * @v net_dest Destination network-layer address
+ * @v ll_dest Destination link-layer address
+ * @ret rc Return status code
+ */
+int neighbour_update ( struct net_device *netdev,
+ struct net_protocol *net_protocol,
+ const void *net_dest, const void *ll_dest ) {
+ struct neighbour *neighbour;
+
+ /* Find neighbour cache entry */
+ neighbour = neighbour_find ( netdev, net_protocol, net_dest );
+ if ( ! neighbour )
+ return -ENOENT;
+
+ /* Set destination address */
+ neighbour_discovered ( neighbour, ll_dest );
+
+ return 0;
+}
+
+/**
+ * Define neighbour cache entry
+ *
+ * @v netdev Network device
+ * @v net_protocol Network-layer protocol
+ * @v net_dest Destination network-layer address
+ * @v ll_dest Destination link-layer address, if known
+ * @ret rc Return status code
+ */
+int neighbour_define ( struct net_device *netdev,
+ struct net_protocol *net_protocol,
+ const void *net_dest, const void *ll_dest ) {
+ struct neighbour *neighbour;
+
+ /* Find or create neighbour cache entry */
+ neighbour = neighbour_find ( netdev, net_protocol, net_dest );
+ if ( ! neighbour ) {
+ neighbour = neighbour_create ( netdev, net_protocol, net_dest );
+ if ( ! neighbour )
+ return -ENOMEM;
+ }
+
+ /* Set destination address */
+ neighbour_discovered ( neighbour, ll_dest );
+
+ return 0;
+}
+
+/**
+ * Update neighbour cache on network device state change or removal
+ *
+ * @v netdev Network device
+ */
+static void neighbour_flush ( struct net_device *netdev ) {
+ struct neighbour *neighbour;
+ struct neighbour *tmp;
+
+ /* Remove all neighbour cache entries when a network device is closed */
+ if ( ! netdev_is_open ( netdev ) ) {
+ list_for_each_entry_safe ( neighbour, tmp, &neighbours, list )
+ neighbour_destroy ( neighbour, -ENODEV );
+ }
+}
+
+/** Neighbour driver (for net device notifications) */
+struct net_driver neighbour_net_driver __net_driver = {
+ .name = "Neighbour",
+ .notify = neighbour_flush,
+ .remove = neighbour_flush,
+};
+
+/**
+ * Discard some cached neighbour entries
+ *
+ * @ret discarded Number of cached items discarded
+ */
+static unsigned int neighbour_discard ( void ) {
+ struct neighbour *neighbour;
+
+ /* Drop oldest cache entry, if any */
+ neighbour = list_last_entry ( &neighbours, struct neighbour, list );
+ if ( neighbour ) {
+ neighbour_destroy ( neighbour, -ENOBUFS );
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+/**
+ * Neighbour cache discarder
+ *
+ * Neighbour cache entries are deemed to have a high replacement cost,
+ * since flushing an active neighbour cache entry midway through a TCP
+ * transfer will cause substantial disruption.
+ */
+struct cache_discarder neighbour_discarder __cache_discarder (CACHE_EXPENSIVE)={
+ .discard = neighbour_discard,
+};
diff --git a/qemu/roms/ipxe/src/net/netdev_settings.c b/qemu/roms/ipxe/src/net/netdev_settings.c
new file mode 100644
index 000000000..b3b2e68d8
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/netdev_settings.c
@@ -0,0 +1,348 @@
+/*
+ * Copyright (C) 2008 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <string.h>
+#include <errno.h>
+#include <byteswap.h>
+#include <ipxe/dhcp.h>
+#include <ipxe/dhcpopts.h>
+#include <ipxe/settings.h>
+#include <ipxe/device.h>
+#include <ipxe/netdevice.h>
+#include <ipxe/init.h>
+
+/** @file
+ *
+ * Network device configuration settings
+ *
+ */
+
+/** Network device predefined settings */
+const struct setting mac_setting __setting ( SETTING_NETDEV, mac ) = {
+ .name = "mac",
+ .description = "MAC address",
+ .type = &setting_type_hex,
+};
+const struct setting bustype_setting __setting ( SETTING_NETDEV, bustype ) = {
+ .name = "bustype",
+ .description = "Bus type",
+ .type = &setting_type_string,
+};
+const struct setting busloc_setting __setting ( SETTING_NETDEV, busloc ) = {
+ .name = "busloc",
+ .description = "Bus location",
+ .type = &setting_type_uint32,
+};
+const struct setting busid_setting __setting ( SETTING_NETDEV, busid ) = {
+ .name = "busid",
+ .description = "Bus ID",
+ .type = &setting_type_hex,
+};
+const struct setting chip_setting __setting ( SETTING_NETDEV, chip ) = {
+ .name = "chip",
+ .description = "Chip",
+ .type = &setting_type_string,
+};
+
+/**
+ * Store MAC address setting
+ *
+ * @v netdev Network device
+ * @v data Setting data, or NULL to clear setting
+ * @v len Length of setting data
+ * @ret rc Return status code
+ */
+static int netdev_store_mac ( struct net_device *netdev,
+ const void *data, size_t len ) {
+ struct ll_protocol *ll_protocol = netdev->ll_protocol;
+
+ /* Record new MAC address */
+ if ( data ) {
+ if ( len != netdev->ll_protocol->ll_addr_len )
+ return -EINVAL;
+ memcpy ( netdev->ll_addr, data, len );
+ } else {
+ /* Reset MAC address if clearing setting */
+ ll_protocol->init_addr ( netdev->hw_addr, netdev->ll_addr );
+ }
+
+ return 0;
+}
+
+/**
+ * Fetch MAC address setting
+ *
+ * @v netdev Network device
+ * @v data Buffer to fill with setting data
+ * @v len Length of buffer
+ * @ret len Length of setting data, or negative error
+ */
+static int netdev_fetch_mac ( struct net_device *netdev, void *data,
+ size_t len ) {
+
+ if ( len > netdev->ll_protocol->ll_addr_len )
+ len = netdev->ll_protocol->ll_addr_len;
+ memcpy ( data, netdev->ll_addr, len );
+ return netdev->ll_protocol->ll_addr_len;
+}
+
+/**
+ * Fetch bus type setting
+ *
+ * @v netdev Network device
+ * @v data Buffer to fill with setting data
+ * @v len Length of buffer
+ * @ret len Length of setting data, or negative error
+ */
+static int netdev_fetch_bustype ( struct net_device *netdev, void *data,
+ size_t len ) {
+ static const char *bustypes[] = {
+ [BUS_TYPE_PCI] = "PCI",
+ [BUS_TYPE_ISAPNP] = "ISAPNP",
+ [BUS_TYPE_EISA] = "EISA",
+ [BUS_TYPE_MCA] = "MCA",
+ [BUS_TYPE_ISA] = "ISA",
+ [BUS_TYPE_TAP] = "TAP",
+ };
+ struct device_description *desc = &netdev->dev->desc;
+ const char *bustype;
+
+ assert ( desc->bus_type < ( sizeof ( bustypes ) /
+ sizeof ( bustypes[0] ) ) );
+ bustype = bustypes[desc->bus_type];
+ assert ( bustype != NULL );
+ strncpy ( data, bustype, len );
+ return strlen ( bustype );
+}
+
+/**
+ * Fetch bus location setting
+ *
+ * @v netdev Network device
+ * @v data Buffer to fill with setting data
+ * @v len Length of buffer
+ * @ret len Length of setting data, or negative error
+ */
+static int netdev_fetch_busloc ( struct net_device *netdev, void *data,
+ size_t len ) {
+ struct device_description *desc = &netdev->dev->desc;
+ uint32_t busloc;
+
+ busloc = cpu_to_be32 ( desc->location );
+ if ( len > sizeof ( busloc ) )
+ len = sizeof ( busloc );
+ memcpy ( data, &busloc, len );
+ return sizeof ( busloc );
+}
+
+/**
+ * Fetch bus ID setting
+ *
+ * @v netdev Network device
+ * @v data Buffer to fill with setting data
+ * @v len Length of buffer
+ * @ret len Length of setting data, or negative error
+ */
+static int netdev_fetch_busid ( struct net_device *netdev, void *data,
+ size_t len ) {
+ struct device_description *desc = &netdev->dev->desc;
+ struct dhcp_netdev_desc dhcp_desc;
+
+ dhcp_desc.type = desc->bus_type;
+ dhcp_desc.vendor = htons ( desc->vendor );
+ dhcp_desc.device = htons ( desc->device );
+ if ( len > sizeof ( dhcp_desc ) )
+ len = sizeof ( dhcp_desc );
+ memcpy ( data, &dhcp_desc, len );
+ return sizeof ( dhcp_desc );
+}
+
+/**
+ * Fetch chip setting
+ *
+ * @v netdev Network device
+ * @v data Buffer to fill with setting data
+ * @v len Length of buffer
+ * @ret len Length of setting data, or negative error
+ */
+static int netdev_fetch_chip ( struct net_device *netdev, void *data,
+ size_t len ) {
+ const char *chip = netdev->dev->driver_name;
+
+ strncpy ( data, chip, len );
+ return strlen ( chip );
+}
+
+/** A network device setting operation */
+struct netdev_setting_operation {
+ /** Setting */
+ const struct setting *setting;
+ /** Store setting (or NULL if not supported)
+ *
+ * @v netdev Network device
+ * @v data Setting data, or NULL to clear setting
+ * @v len Length of setting data
+ * @ret rc Return status code
+ */
+ int ( * store ) ( struct net_device *netdev, const void *data,
+ size_t len );
+ /** Fetch setting
+ *
+ * @v netdev Network device
+ * @v data Buffer to fill with setting data
+ * @v len Length of buffer
+ * @ret len Length of setting data, or negative error
+ */
+ int ( * fetch ) ( struct net_device *netdev, void *data, size_t len );
+};
+
+/** Network device settings */
+static struct netdev_setting_operation netdev_setting_operations[] = {
+ { &mac_setting, netdev_store_mac, netdev_fetch_mac },
+ { &bustype_setting, NULL, netdev_fetch_bustype },
+ { &busloc_setting, NULL, netdev_fetch_busloc },
+ { &busid_setting, NULL, netdev_fetch_busid },
+ { &chip_setting, NULL, netdev_fetch_chip },
+};
+
+/**
+ * Store value of network device setting
+ *
+ * @v settings Settings block
+ * @v setting Setting to store
+ * @v data Setting data, or NULL to clear setting
+ * @v len Length of setting data
+ * @ret rc Return status code
+ */
+static int netdev_store ( struct settings *settings,
+ const struct setting *setting,
+ const void *data, size_t len ) {
+ struct net_device *netdev = container_of ( settings, struct net_device,
+ settings.settings );
+ struct netdev_setting_operation *op;
+ unsigned int i;
+
+ /* Handle network device-specific settings */
+ for ( i = 0 ; i < ( sizeof ( netdev_setting_operations ) /
+ sizeof ( netdev_setting_operations[0] ) ) ; i++ ) {
+ op = &netdev_setting_operations[i];
+ if ( setting_cmp ( setting, op->setting ) == 0 ) {
+ if ( op->store ) {
+ return op->store ( netdev, data, len );
+ } else {
+ return -ENOTSUP;
+ }
+ }
+ }
+
+ return generic_settings_store ( settings, setting, data, len );
+}
+
+/**
+ * Fetch value of network device setting
+ *
+ * @v settings Settings block
+ * @v setting Setting to fetch
+ * @v data Buffer to fill with setting data
+ * @v len Length of buffer
+ * @ret len Length of setting data, or negative error
+ */
+static int netdev_fetch ( struct settings *settings, struct setting *setting,
+ void *data, size_t len ) {
+ struct net_device *netdev = container_of ( settings, struct net_device,
+ settings.settings );
+ struct netdev_setting_operation *op;
+ unsigned int i;
+
+ /* Handle network device-specific settings */
+ for ( i = 0 ; i < ( sizeof ( netdev_setting_operations ) /
+ sizeof ( netdev_setting_operations[0] ) ) ; i++ ) {
+ op = &netdev_setting_operations[i];
+ if ( setting_cmp ( setting, op->setting ) == 0 )
+ return op->fetch ( netdev, data, len );
+ }
+
+ return generic_settings_fetch ( settings, setting, data, len );
+}
+
+/**
+ * Clear network device settings
+ *
+ * @v settings Settings block
+ */
+static void netdev_clear ( struct settings *settings ) {
+ generic_settings_clear ( settings );
+}
+
+/** Network device configuration settings operations */
+struct settings_operations netdev_settings_operations = {
+ .store = netdev_store,
+ .fetch = netdev_fetch,
+ .clear = netdev_clear,
+};
+
+/**
+ * Redirect "netX" settings block
+ *
+ * @v settings Settings block
+ * @ret settings Underlying settings block
+ */
+static struct settings * netdev_redirect ( struct settings *settings ) {
+ struct net_device *netdev;
+
+ /* Redirect to most recently opened network device */
+ netdev = last_opened_netdev();
+ if ( netdev ) {
+ return netdev_settings ( netdev );
+ } else {
+ return settings;
+ }
+}
+
+/** "netX" settings operations */
+static struct settings_operations netdev_redirect_settings_operations = {
+ .redirect = netdev_redirect,
+};
+
+/** "netX" settings */
+static struct settings netdev_redirect_settings = {
+ .refcnt = NULL,
+ .siblings = LIST_HEAD_INIT ( netdev_redirect_settings.siblings ),
+ .children = LIST_HEAD_INIT ( netdev_redirect_settings.children ),
+ .op = &netdev_redirect_settings_operations,
+};
+
+/** Initialise "netX" settings */
+static void netdev_redirect_settings_init ( void ) {
+ int rc;
+
+ if ( ( rc = register_settings ( &netdev_redirect_settings, NULL,
+ "netX" ) ) != 0 ) {
+ DBG ( "Could not register netX settings: %s\n",
+ strerror ( rc ) );
+ return;
+ }
+}
+
+/** "netX" settings initialiser */
+struct init_fn netdev_redirect_settings_init_fn __init_fn ( INIT_LATE ) = {
+ .initialise = netdev_redirect_settings_init,
+};
diff --git a/qemu/roms/ipxe/src/net/netdevice.c b/qemu/roms/ipxe/src/net/netdevice.c
new file mode 100644
index 000000000..a55e6b7d7
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/netdevice.c
@@ -0,0 +1,1220 @@
+/*
+ * Copyright (C) 2006 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <byteswap.h>
+#include <string.h>
+#include <errno.h>
+#include <config/general.h>
+#include <ipxe/if_ether.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/tables.h>
+#include <ipxe/process.h>
+#include <ipxe/init.h>
+#include <ipxe/malloc.h>
+#include <ipxe/device.h>
+#include <ipxe/errortab.h>
+#include <ipxe/profile.h>
+#include <ipxe/vlan.h>
+#include <ipxe/netdevice.h>
+
+/** @file
+ *
+ * Network device management
+ *
+ */
+
+/** List of network devices */
+struct list_head net_devices = LIST_HEAD_INIT ( net_devices );
+
+/** List of open network devices, in reverse order of opening */
+static struct list_head open_net_devices = LIST_HEAD_INIT ( open_net_devices );
+
+/** Network device index */
+static unsigned int netdev_index = 0;
+
+/** Network polling profiler */
+static struct profiler net_poll_profiler __profiler = { .name = "net.poll" };
+
+/** Network receive profiler */
+static struct profiler net_rx_profiler __profiler = { .name = "net.rx" };
+
+/** Network transmit profiler */
+static struct profiler net_tx_profiler __profiler = { .name = "net.tx" };
+
+/** Default unknown link status code */
+#define EUNKNOWN_LINK_STATUS __einfo_error ( EINFO_EUNKNOWN_LINK_STATUS )
+#define EINFO_EUNKNOWN_LINK_STATUS \
+ __einfo_uniqify ( EINFO_EINPROGRESS, 0x01, "Unknown" )
+
+/** Default not-yet-attempted-configuration status code */
+#define EUNUSED_CONFIG __einfo_error ( EINFO_EUNUSED_CONFIG )
+#define EINFO_EUNUSED_CONFIG \
+ __einfo_uniqify ( EINFO_EINPROGRESS, 0x02, "Unused" )
+
+/** Default configuration-in-progress status code */
+#define EINPROGRESS_CONFIG __einfo_error ( EINFO_EINPROGRESS_CONFIG )
+#define EINFO_EINPROGRESS_CONFIG \
+ __einfo_uniqify ( EINFO_EINPROGRESS, 0x03, "Incomplete" )
+
+/** Default link-down status code */
+#define ENOTCONN_LINK_DOWN __einfo_error ( EINFO_ENOTCONN_LINK_DOWN )
+#define EINFO_ENOTCONN_LINK_DOWN \
+ __einfo_uniqify ( EINFO_ENOTCONN, 0x01, "Down" )
+
+/** Human-readable message for the default link statuses */
+struct errortab netdev_errors[] __errortab = {
+ __einfo_errortab ( EINFO_EUNKNOWN_LINK_STATUS ),
+ __einfo_errortab ( EINFO_ENOTCONN_LINK_DOWN ),
+ __einfo_errortab ( EINFO_EUNUSED_CONFIG ),
+ __einfo_errortab ( EINFO_EINPROGRESS_CONFIG ),
+};
+
+/**
+ * Check whether or not network device has a link-layer address
+ *
+ * @v netdev Network device
+ * @ret has_ll_addr Network device has a link-layer address
+ */
+static int netdev_has_ll_addr ( struct net_device *netdev ) {
+ uint8_t *ll_addr = netdev->ll_addr;
+ size_t remaining = sizeof ( netdev->ll_addr );
+
+ while ( remaining-- ) {
+ if ( *(ll_addr++) != 0 )
+ return 1;
+ }
+ return 0;
+}
+
+/**
+ * Notify drivers of network device or link state change
+ *
+ * @v netdev Network device
+ */
+static void netdev_notify ( struct net_device *netdev ) {
+ struct net_driver *driver;
+
+ for_each_table_entry ( driver, NET_DRIVERS ) {
+ if ( driver->notify )
+ driver->notify ( netdev );
+ }
+}
+
+/**
+ * Freeze network device receive queue processing
+ *
+ * @v netdev Network device
+ */
+void netdev_rx_freeze ( struct net_device *netdev ) {
+
+ /* Mark receive queue processing as frozen */
+ netdev->state |= NETDEV_RX_FROZEN;
+
+ /* Notify drivers of change */
+ netdev_notify ( netdev );
+}
+
+/**
+ * Unfreeze network device receive queue processing
+ *
+ * @v netdev Network device
+ */
+void netdev_rx_unfreeze ( struct net_device *netdev ) {
+
+ /* Mark receive queue processing as not frozen */
+ netdev->state &= ~NETDEV_RX_FROZEN;
+
+ /* Notify drivers of change */
+ netdev_notify ( netdev );
+}
+
+/**
+ * Mark network device as having a specific link state
+ *
+ * @v netdev Network device
+ * @v rc Link status code
+ */
+void netdev_link_err ( struct net_device *netdev, int rc ) {
+
+ /* Record link state */
+ netdev->link_rc = rc;
+ if ( netdev->link_rc == 0 ) {
+ DBGC ( netdev, "NETDEV %s link is up\n", netdev->name );
+ } else {
+ DBGC ( netdev, "NETDEV %s link is down: %s\n",
+ netdev->name, strerror ( netdev->link_rc ) );
+ }
+
+ /* Notify drivers of link state change */
+ netdev_notify ( netdev );
+}
+
+/**
+ * Mark network device as having link down
+ *
+ * @v netdev Network device
+ */
+void netdev_link_down ( struct net_device *netdev ) {
+
+ /* Avoid clobbering a more detailed link status code, if one
+ * is already set.
+ */
+ if ( ( netdev->link_rc == 0 ) ||
+ ( netdev->link_rc == -EUNKNOWN_LINK_STATUS ) ) {
+ netdev_link_err ( netdev, -ENOTCONN_LINK_DOWN );
+ }
+}
+
+/**
+ * Record network device statistic
+ *
+ * @v stats Network device statistics
+ * @v rc Status code
+ */
+static void netdev_record_stat ( struct net_device_stats *stats, int rc ) {
+ struct net_device_error *error;
+ struct net_device_error *least_common_error;
+ unsigned int i;
+
+ /* If this is not an error, just update the good counter */
+ if ( rc == 0 ) {
+ stats->good++;
+ return;
+ }
+
+ /* Update the bad counter */
+ stats->bad++;
+
+ /* Locate the appropriate error record */
+ least_common_error = &stats->errors[0];
+ for ( i = 0 ; i < ( sizeof ( stats->errors ) /
+ sizeof ( stats->errors[0] ) ) ; i++ ) {
+ error = &stats->errors[i];
+ /* Update matching record, if found */
+ if ( error->rc == rc ) {
+ error->count++;
+ return;
+ }
+ if ( error->count < least_common_error->count )
+ least_common_error = error;
+ }
+
+ /* Overwrite the least common error record */
+ least_common_error->rc = rc;
+ least_common_error->count = 1;
+}
+
+/**
+ * Transmit raw packet via network device
+ *
+ * @v netdev Network device
+ * @v iobuf I/O buffer
+ * @ret rc Return status code
+ *
+ * Transmits the packet via the specified network device. This
+ * function takes ownership of the I/O buffer.
+ */
+int netdev_tx ( struct net_device *netdev, struct io_buffer *iobuf ) {
+ int rc;
+
+ DBGC2 ( netdev, "NETDEV %s transmitting %p (%p+%zx)\n",
+ netdev->name, iobuf, iobuf->data, iob_len ( iobuf ) );
+ profile_start ( &net_tx_profiler );
+
+ /* Enqueue packet */
+ list_add_tail ( &iobuf->list, &netdev->tx_queue );
+
+ /* Avoid calling transmit() on unopened network devices */
+ if ( ! netdev_is_open ( netdev ) ) {
+ rc = -ENETUNREACH;
+ goto err;
+ }
+
+ /* Discard packet (for test purposes) if applicable */
+ if ( ( NETDEV_DISCARD_RATE > 0 ) &&
+ ( ( random() % NETDEV_DISCARD_RATE ) == 0 ) ) {
+ rc = -EAGAIN;
+ goto err;
+ }
+
+ /* Transmit packet */
+ if ( ( rc = netdev->op->transmit ( netdev, iobuf ) ) != 0 )
+ goto err;
+
+ profile_stop ( &net_tx_profiler );
+ return 0;
+
+ err:
+ netdev_tx_complete_err ( netdev, iobuf, rc );
+ return rc;
+}
+
+/**
+ * Defer transmitted packet
+ *
+ * @v netdev Network device
+ * @v iobuf I/O buffer
+ *
+ * Drivers may call netdev_tx_defer() if there is insufficient space
+ * in the transmit descriptor ring. Any packets deferred in this way
+ * will be automatically retransmitted as soon as space becomes
+ * available (i.e. as soon as the driver calls netdev_tx_complete()).
+ *
+ * The packet must currently be in the network device's TX queue.
+ *
+ * Drivers utilising netdev_tx_defer() must ensure that space in the
+ * transmit descriptor ring is freed up @b before calling
+ * netdev_tx_complete(). For example, if the ring is modelled using a
+ * producer counter and a consumer counter, then the consumer counter
+ * must be incremented before the call to netdev_tx_complete().
+ * Failure to do this will cause the retransmitted packet to be
+ * immediately redeferred (which will result in out-of-order
+ * transmissions and other nastiness).
+ */
+void netdev_tx_defer ( struct net_device *netdev, struct io_buffer *iobuf ) {
+
+ /* Catch data corruption as early as possible */
+ list_check_contains_entry ( iobuf, &netdev->tx_queue, list );
+
+ /* Remove from transmit queue */
+ list_del ( &iobuf->list );
+
+ /* Add to deferred transmit queue */
+ list_add_tail ( &iobuf->list, &netdev->tx_deferred );
+
+ /* Record "out of space" statistic */
+ netdev_tx_err ( netdev, NULL, -ENOBUFS );
+}
+
+/**
+ * Discard transmitted packet
+ *
+ * @v netdev Network device
+ * @v iobuf I/O buffer, or NULL
+ * @v rc Packet status code
+ *
+ * The packet is discarded and a TX error is recorded. This function
+ * takes ownership of the I/O buffer.
+ */
+void netdev_tx_err ( struct net_device *netdev,
+ struct io_buffer *iobuf, int rc ) {
+
+ /* Update statistics counter */
+ netdev_record_stat ( &netdev->tx_stats, rc );
+ if ( rc == 0 ) {
+ DBGC2 ( netdev, "NETDEV %s transmission %p complete\n",
+ netdev->name, iobuf );
+ } else {
+ DBGC ( netdev, "NETDEV %s transmission %p failed: %s\n",
+ netdev->name, iobuf, strerror ( rc ) );
+ }
+
+ /* Discard packet */
+ free_iob ( iobuf );
+}
+
+/**
+ * Complete network transmission
+ *
+ * @v netdev Network device
+ * @v iobuf I/O buffer
+ * @v rc Packet status code
+ *
+ * The packet must currently be in the network device's TX queue.
+ */
+void netdev_tx_complete_err ( struct net_device *netdev,
+ struct io_buffer *iobuf, int rc ) {
+
+ /* Catch data corruption as early as possible */
+ list_check_contains_entry ( iobuf, &netdev->tx_queue, list );
+
+ /* Dequeue and free I/O buffer */
+ list_del ( &iobuf->list );
+ netdev_tx_err ( netdev, iobuf, rc );
+
+ /* Transmit first pending packet, if any */
+ if ( ( iobuf = list_first_entry ( &netdev->tx_deferred,
+ struct io_buffer, list ) ) != NULL ) {
+ list_del ( &iobuf->list );
+ netdev_tx ( netdev, iobuf );
+ }
+}
+
+/**
+ * Complete network transmission
+ *
+ * @v netdev Network device
+ * @v rc Packet status code
+ *
+ * Completes the oldest outstanding packet in the TX queue.
+ */
+void netdev_tx_complete_next_err ( struct net_device *netdev, int rc ) {
+ struct io_buffer *iobuf;
+
+ if ( ( iobuf = list_first_entry ( &netdev->tx_queue, struct io_buffer,
+ list ) ) != NULL ) {
+ netdev_tx_complete_err ( netdev, iobuf, rc );
+ }
+}
+
+/**
+ * Flush device's transmit queue
+ *
+ * @v netdev Network device
+ */
+static void netdev_tx_flush ( struct net_device *netdev ) {
+
+ /* Discard any packets in the TX queue. This will also cause
+ * any packets in the deferred TX queue to be discarded
+ * automatically.
+ */
+ while ( ! list_empty ( &netdev->tx_queue ) ) {
+ netdev_tx_complete_next_err ( netdev, -ECANCELED );
+ }
+ assert ( list_empty ( &netdev->tx_queue ) );
+ assert ( list_empty ( &netdev->tx_deferred ) );
+}
+
+/**
+ * Add packet to receive queue
+ *
+ * @v netdev Network device
+ * @v iobuf I/O buffer, or NULL
+ *
+ * The packet is added to the network device's RX queue. This
+ * function takes ownership of the I/O buffer.
+ */
+void netdev_rx ( struct net_device *netdev, struct io_buffer *iobuf ) {
+
+ DBGC2 ( netdev, "NETDEV %s received %p (%p+%zx)\n",
+ netdev->name, iobuf, iobuf->data, iob_len ( iobuf ) );
+
+ /* Discard packet (for test purposes) if applicable */
+ if ( ( NETDEV_DISCARD_RATE > 0 ) &&
+ ( ( random() % NETDEV_DISCARD_RATE ) == 0 ) ) {
+ netdev_rx_err ( netdev, iobuf, -EAGAIN );
+ return;
+ }
+
+ /* Enqueue packet */
+ list_add_tail ( &iobuf->list, &netdev->rx_queue );
+
+ /* Update statistics counter */
+ netdev_record_stat ( &netdev->rx_stats, 0 );
+}
+
+/**
+ * Discard received packet
+ *
+ * @v netdev Network device
+ * @v iobuf I/O buffer, or NULL
+ * @v rc Packet status code
+ *
+ * The packet is discarded and an RX error is recorded. This function
+ * takes ownership of the I/O buffer. @c iobuf may be NULL if, for
+ * example, the net device wishes to report an error due to being
+ * unable to allocate an I/O buffer.
+ */
+void netdev_rx_err ( struct net_device *netdev,
+ struct io_buffer *iobuf, int rc ) {
+
+ DBGC ( netdev, "NETDEV %s failed to receive %p: %s\n",
+ netdev->name, iobuf, strerror ( rc ) );
+
+ /* Discard packet */
+ free_iob ( iobuf );
+
+ /* Update statistics counter */
+ netdev_record_stat ( &netdev->rx_stats, rc );
+}
+
+/**
+ * Poll for completed and received packets on network device
+ *
+ * @v netdev Network device
+ *
+ * Polls the network device for completed transmissions and received
+ * packets. Any received packets will be added to the RX packet queue
+ * via netdev_rx().
+ */
+void netdev_poll ( struct net_device *netdev ) {
+
+ if ( netdev_is_open ( netdev ) )
+ netdev->op->poll ( netdev );
+}
+
+/**
+ * Remove packet from device's receive queue
+ *
+ * @v netdev Network device
+ * @ret iobuf I/O buffer, or NULL
+ *
+ * Removes the first packet from the device's RX queue and returns it.
+ * Ownership of the packet is transferred to the caller.
+ */
+struct io_buffer * netdev_rx_dequeue ( struct net_device *netdev ) {
+ struct io_buffer *iobuf;
+
+ iobuf = list_first_entry ( &netdev->rx_queue, struct io_buffer, list );
+ if ( ! iobuf )
+ return NULL;
+
+ list_del ( &iobuf->list );
+ return iobuf;
+}
+
+/**
+ * Flush device's receive queue
+ *
+ * @v netdev Network device
+ */
+static void netdev_rx_flush ( struct net_device *netdev ) {
+ struct io_buffer *iobuf;
+
+ /* Discard any packets in the RX queue */
+ while ( ( iobuf = netdev_rx_dequeue ( netdev ) ) ) {
+ netdev_rx_err ( netdev, iobuf, -ECANCELED );
+ }
+}
+
+/**
+ * Finish network device configuration
+ *
+ * @v config Network device configuration
+ * @v rc Reason for completion
+ */
+static void netdev_config_close ( struct net_device_configuration *config,
+ int rc ) {
+ struct net_device_configurator *configurator = config->configurator;
+ struct net_device *netdev = config->netdev;
+
+ /* Restart interface */
+ intf_restart ( &config->job, rc );
+
+ /* Record configuration result */
+ config->rc = rc;
+ if ( rc == 0 ) {
+ DBGC ( netdev, "NETDEV %s configured via %s\n",
+ netdev->name, configurator->name );
+ } else {
+ DBGC ( netdev, "NETDEV %s configuration via %s failed: %s\n",
+ netdev->name, configurator->name, strerror ( rc ) );
+ }
+}
+
+/** Network device configuration interface operations */
+static struct interface_operation netdev_config_ops[] = {
+ INTF_OP ( intf_close, struct net_device_configuration *,
+ netdev_config_close ),
+};
+
+/** Network device configuration interface descriptor */
+static struct interface_descriptor netdev_config_desc =
+ INTF_DESC ( struct net_device_configuration, job, netdev_config_ops );
+
+/**
+ * Free network device
+ *
+ * @v refcnt Network device reference counter
+ */
+static void free_netdev ( struct refcnt *refcnt ) {
+ struct net_device *netdev =
+ container_of ( refcnt, struct net_device, refcnt );
+
+ netdev_tx_flush ( netdev );
+ netdev_rx_flush ( netdev );
+ clear_settings ( netdev_settings ( netdev ) );
+ free ( netdev );
+}
+
+/**
+ * Allocate network device
+ *
+ * @v priv_len Length of private data area (net_device::priv)
+ * @ret netdev Network device, or NULL
+ *
+ * Allocates space for a network device and its private data area.
+ */
+struct net_device * alloc_netdev ( size_t priv_len ) {
+ struct net_device *netdev;
+ struct net_device_configurator *configurator;
+ struct net_device_configuration *config;
+ unsigned int num_configs;
+ size_t confs_len;
+ size_t total_len;
+
+ num_configs = table_num_entries ( NET_DEVICE_CONFIGURATORS );
+ confs_len = ( num_configs * sizeof ( netdev->configs[0] ) );
+ total_len = ( sizeof ( *netdev ) + confs_len + priv_len );
+ netdev = zalloc ( total_len );
+ if ( netdev ) {
+ ref_init ( &netdev->refcnt, free_netdev );
+ netdev->link_rc = -EUNKNOWN_LINK_STATUS;
+ INIT_LIST_HEAD ( &netdev->tx_queue );
+ INIT_LIST_HEAD ( &netdev->tx_deferred );
+ INIT_LIST_HEAD ( &netdev->rx_queue );
+ netdev_settings_init ( netdev );
+ config = netdev->configs;
+ for_each_table_entry ( configurator, NET_DEVICE_CONFIGURATORS ){
+ config->netdev = netdev;
+ config->configurator = configurator;
+ config->rc = -EUNUSED_CONFIG;
+ intf_init ( &config->job, &netdev_config_desc,
+ &netdev->refcnt );
+ config++;
+ }
+ netdev->priv = ( ( ( void * ) netdev ) + sizeof ( *netdev ) +
+ confs_len );
+ }
+ return netdev;
+}
+
+/**
+ * Register network device
+ *
+ * @v netdev Network device
+ * @ret rc Return status code
+ *
+ * Gives the network device a name and adds it to the list of network
+ * devices.
+ */
+int register_netdev ( struct net_device *netdev ) {
+ struct ll_protocol *ll_protocol = netdev->ll_protocol;
+ struct net_driver *driver;
+ struct net_device *duplicate;
+ uint32_t seed;
+ int rc;
+
+ /* Set initial link-layer address, if not already set */
+ if ( ! netdev_has_ll_addr ( netdev ) ) {
+ ll_protocol->init_addr ( netdev->hw_addr, netdev->ll_addr );
+ }
+
+ /* Reject network devices that are already available via a
+ * different hardware device.
+ */
+ duplicate = find_netdev_by_ll_addr ( ll_protocol, netdev->ll_addr );
+ if ( duplicate && ( duplicate->dev != netdev->dev ) ) {
+ DBGC ( netdev, "NETDEV rejecting duplicate (phys %s) of %s "
+ "(phys %s)\n", netdev->dev->name, duplicate->name,
+ duplicate->dev->name );
+ rc = -EEXIST;
+ goto err_duplicate;
+ }
+
+ /* Record device index and create device name */
+ netdev->index = netdev_index++;
+ if ( netdev->name[0] == '\0' ) {
+ snprintf ( netdev->name, sizeof ( netdev->name ), "net%d",
+ netdev->index );
+ }
+
+ /* Use least significant bits of the link-layer address to
+ * improve the randomness of the (non-cryptographic) random
+ * number generator.
+ */
+ memcpy ( &seed, ( netdev->ll_addr + ll_protocol->ll_addr_len
+ - sizeof ( seed ) ), sizeof ( seed ) );
+ srand ( rand() ^ seed );
+
+ /* Add to device list */
+ netdev_get ( netdev );
+ list_add_tail ( &netdev->list, &net_devices );
+ DBGC ( netdev, "NETDEV %s registered (phys %s hwaddr %s)\n",
+ netdev->name, netdev->dev->name,
+ netdev_addr ( netdev ) );
+
+ /* Register per-netdev configuration settings */
+ if ( ( rc = register_settings ( netdev_settings ( netdev ),
+ NULL, netdev->name ) ) != 0 ) {
+ DBGC ( netdev, "NETDEV %s could not register settings: %s\n",
+ netdev->name, strerror ( rc ) );
+ goto err_register_settings;
+ }
+
+ /* Probe device */
+ for_each_table_entry ( driver, NET_DRIVERS ) {
+ if ( driver->probe && ( rc = driver->probe ( netdev ) ) != 0 ) {
+ DBGC ( netdev, "NETDEV %s could not add %s device: "
+ "%s\n", netdev->name, driver->name,
+ strerror ( rc ) );
+ goto err_probe;
+ }
+ }
+
+ return 0;
+
+ err_probe:
+ for_each_table_entry_continue_reverse ( driver, NET_DRIVERS ) {
+ if ( driver->remove )
+ driver->remove ( netdev );
+ }
+ clear_settings ( netdev_settings ( netdev ) );
+ unregister_settings ( netdev_settings ( netdev ) );
+ err_register_settings:
+ err_duplicate:
+ return rc;
+}
+
+/**
+ * Open network device
+ *
+ * @v netdev Network device
+ * @ret rc Return status code
+ */
+int netdev_open ( struct net_device *netdev ) {
+ int rc;
+
+ /* Do nothing if device is already open */
+ if ( netdev->state & NETDEV_OPEN )
+ return 0;
+
+ DBGC ( netdev, "NETDEV %s opening\n", netdev->name );
+
+ /* Mark as opened */
+ netdev->state |= NETDEV_OPEN;
+
+ /* Open the device */
+ if ( ( rc = netdev->op->open ( netdev ) ) != 0 )
+ goto err;
+
+ /* Add to head of open devices list */
+ list_add ( &netdev->open_list, &open_net_devices );
+
+ /* Notify drivers of device state change */
+ netdev_notify ( netdev );
+
+ return 0;
+
+ err:
+ netdev->state &= ~NETDEV_OPEN;
+ return rc;
+}
+
+/**
+ * Close network device
+ *
+ * @v netdev Network device
+ */
+void netdev_close ( struct net_device *netdev ) {
+ unsigned int num_configs;
+ unsigned int i;
+
+ /* Do nothing if device is already closed */
+ if ( ! ( netdev->state & NETDEV_OPEN ) )
+ return;
+
+ DBGC ( netdev, "NETDEV %s closing\n", netdev->name );
+
+ /* Terminate any ongoing configurations. Use intf_close()
+ * rather than intf_restart() to allow the cancellation to be
+ * reported back to us if a configuration is actually in
+ * progress.
+ */
+ num_configs = table_num_entries ( NET_DEVICE_CONFIGURATORS );
+ for ( i = 0 ; i < num_configs ; i++ )
+ intf_close ( &netdev->configs[i].job, -ECANCELED );
+
+ /* Remove from open devices list */
+ list_del ( &netdev->open_list );
+
+ /* Mark as closed */
+ netdev->state &= ~NETDEV_OPEN;
+
+ /* Notify drivers of device state change */
+ netdev_notify ( netdev );
+
+ /* Close the device */
+ netdev->op->close ( netdev );
+
+ /* Flush TX and RX queues */
+ netdev_tx_flush ( netdev );
+ netdev_rx_flush ( netdev );
+}
+
+/**
+ * Unregister network device
+ *
+ * @v netdev Network device
+ *
+ * Removes the network device from the list of network devices.
+ */
+void unregister_netdev ( struct net_device *netdev ) {
+ struct net_driver *driver;
+
+ /* Ensure device is closed */
+ netdev_close ( netdev );
+
+ /* Remove device */
+ for_each_table_entry_reverse ( driver, NET_DRIVERS ) {
+ if ( driver->remove )
+ driver->remove ( netdev );
+ }
+
+ /* Unregister per-netdev configuration settings */
+ clear_settings ( netdev_settings ( netdev ) );
+ unregister_settings ( netdev_settings ( netdev ) );
+
+ /* Remove from device list */
+ DBGC ( netdev, "NETDEV %s unregistered\n", netdev->name );
+ list_del ( &netdev->list );
+ netdev_put ( netdev );
+
+ /* Reset network device index if no devices remain */
+ if ( list_empty ( &net_devices ) )
+ netdev_index = 0;
+}
+
+/** Enable or disable interrupts
+ *
+ * @v netdev Network device
+ * @v enable Interrupts should be enabled
+ */
+void netdev_irq ( struct net_device *netdev, int enable ) {
+
+ /* Do nothing if device does not support interrupts */
+ if ( ! netdev_irq_supported ( netdev ) )
+ return;
+
+ /* Enable or disable device interrupts */
+ netdev->op->irq ( netdev, enable );
+
+ /* Record interrupt enabled state */
+ netdev->state &= ~NETDEV_IRQ_ENABLED;
+ if ( enable )
+ netdev->state |= NETDEV_IRQ_ENABLED;
+}
+
+/**
+ * Get network device by name
+ *
+ * @v name Network device name
+ * @ret netdev Network device, or NULL
+ */
+struct net_device * find_netdev ( const char *name ) {
+ struct net_device *netdev;
+
+ /* Allow "netX" shortcut */
+ if ( strcmp ( name, "netX" ) == 0 )
+ return last_opened_netdev();
+
+ /* Identify network device by name */
+ list_for_each_entry ( netdev, &net_devices, list ) {
+ if ( strcmp ( netdev->name, name ) == 0 )
+ return netdev;
+ }
+
+ return NULL;
+}
+
+/**
+ * Get network device by index
+ *
+ * @v index Network device index
+ * @ret netdev Network device, or NULL
+ */
+struct net_device * find_netdev_by_index ( unsigned int index ) {
+ struct net_device *netdev;
+
+ /* Identify network device by index */
+ list_for_each_entry ( netdev, &net_devices, list ) {
+ if ( netdev->index == index )
+ return netdev;
+ }
+
+ return NULL;
+}
+
+/**
+ * Get network device by PCI bus:dev.fn address
+ *
+ * @v bus_type Bus type
+ * @v location Bus location
+ * @ret netdev Network device, or NULL
+ */
+struct net_device * find_netdev_by_location ( unsigned int bus_type,
+ unsigned int location ) {
+ struct net_device *netdev;
+
+ list_for_each_entry ( netdev, &net_devices, list ) {
+ if ( ( netdev->dev->desc.bus_type == bus_type ) &&
+ ( netdev->dev->desc.location == location ) )
+ return netdev;
+ }
+
+ return NULL;
+}
+
+/**
+ * Get network device by link-layer address
+ *
+ * @v ll_protocol Link-layer protocol
+ * @v ll_addr Link-layer address
+ * @ret netdev Network device, or NULL
+ */
+struct net_device * find_netdev_by_ll_addr ( struct ll_protocol *ll_protocol,
+ const void *ll_addr ) {
+ struct net_device *netdev;
+
+ list_for_each_entry ( netdev, &net_devices, list ) {
+ if ( ( netdev->ll_protocol == ll_protocol ) &&
+ ( memcmp ( netdev->ll_addr, ll_addr,
+ ll_protocol->ll_addr_len ) == 0 ) )
+ return netdev;
+ }
+
+ return NULL;
+}
+
+/**
+ * Get most recently opened network device
+ *
+ * @ret netdev Most recently opened network device, or NULL
+ */
+struct net_device * last_opened_netdev ( void ) {
+ struct net_device *netdev;
+
+ netdev = list_first_entry ( &open_net_devices, struct net_device,
+ open_list );
+ if ( ! netdev )
+ return NULL;
+
+ assert ( netdev_is_open ( netdev ) );
+ return netdev;
+}
+
+/**
+ * Transmit network-layer packet
+ *
+ * @v iobuf I/O buffer
+ * @v netdev Network device
+ * @v net_protocol Network-layer protocol
+ * @v ll_dest Destination link-layer address
+ * @v ll_source Source link-layer address
+ * @ret rc Return status code
+ *
+ * Prepends link-layer headers to the I/O buffer and transmits the
+ * packet via the specified network device. This function takes
+ * ownership of the I/O buffer.
+ */
+int net_tx ( struct io_buffer *iobuf, struct net_device *netdev,
+ struct net_protocol *net_protocol, const void *ll_dest,
+ const void *ll_source ) {
+ struct ll_protocol *ll_protocol = netdev->ll_protocol;
+ int rc;
+
+ /* Add link-layer header */
+ if ( ( rc = ll_protocol->push ( netdev, iobuf, ll_dest, ll_source,
+ net_protocol->net_proto ) ) != 0 ) {
+ /* Record error for diagnosis */
+ netdev_tx_err ( netdev, iobuf, rc );
+ return rc;
+ }
+
+ /* Transmit packet */
+ return netdev_tx ( netdev, iobuf );
+}
+
+/**
+ * Process received network-layer packet
+ *
+ * @v iobuf I/O buffer
+ * @v netdev Network device
+ * @v net_proto Network-layer protocol, in network-byte order
+ * @v ll_dest Destination link-layer address
+ * @v ll_source Source link-layer address
+ * @v flags Packet flags
+ * @ret rc Return status code
+ */
+int net_rx ( struct io_buffer *iobuf, struct net_device *netdev,
+ uint16_t net_proto, const void *ll_dest, const void *ll_source,
+ unsigned int flags ) {
+ struct net_protocol *net_protocol;
+
+ /* Hand off to network-layer protocol, if any */
+ for_each_table_entry ( net_protocol, NET_PROTOCOLS ) {
+ if ( net_protocol->net_proto == net_proto )
+ return net_protocol->rx ( iobuf, netdev, ll_dest,
+ ll_source, flags );
+ }
+
+ DBGC ( netdev, "NETDEV %s unknown network protocol %04x\n",
+ netdev->name, ntohs ( net_proto ) );
+ free_iob ( iobuf );
+ return -ENOTSUP;
+}
+
+/**
+ * Poll the network stack
+ *
+ * This polls all interfaces for received packets, and processes
+ * packets from the RX queue.
+ */
+void net_poll ( void ) {
+ struct net_device *netdev;
+ struct io_buffer *iobuf;
+ struct ll_protocol *ll_protocol;
+ const void *ll_dest;
+ const void *ll_source;
+ uint16_t net_proto;
+ unsigned int flags;
+ int rc;
+
+ /* Poll and process each network device */
+ list_for_each_entry ( netdev, &net_devices, list ) {
+
+ /* Poll for new packets */
+ profile_start ( &net_poll_profiler );
+ netdev_poll ( netdev );
+ profile_stop ( &net_poll_profiler );
+
+ /* Leave received packets on the queue if receive
+ * queue processing is currently frozen. This will
+ * happen when the raw packets are to be manually
+ * dequeued using netdev_rx_dequeue(), rather than
+ * processed via the usual networking stack.
+ */
+ if ( netdev_rx_frozen ( netdev ) )
+ continue;
+
+ /* Process all received packets */
+ while ( ( iobuf = netdev_rx_dequeue ( netdev ) ) ) {
+
+ DBGC2 ( netdev, "NETDEV %s processing %p (%p+%zx)\n",
+ netdev->name, iobuf, iobuf->data,
+ iob_len ( iobuf ) );
+ profile_start ( &net_rx_profiler );
+
+ /* Remove link-layer header */
+ ll_protocol = netdev->ll_protocol;
+ if ( ( rc = ll_protocol->pull ( netdev, iobuf,
+ &ll_dest, &ll_source,
+ &net_proto,
+ &flags ) ) != 0 ) {
+ free_iob ( iobuf );
+ continue;
+ }
+
+ /* Hand packet to network layer */
+ if ( ( rc = net_rx ( iob_disown ( iobuf ), netdev,
+ net_proto, ll_dest,
+ ll_source, flags ) ) != 0 ) {
+ /* Record error for diagnosis */
+ netdev_rx_err ( netdev, NULL, rc );
+ }
+ profile_stop ( &net_rx_profiler );
+ }
+ }
+}
+
+/**
+ * Single-step the network stack
+ *
+ * @v process Network stack process
+ */
+static void net_step ( struct process *process __unused ) {
+ net_poll();
+}
+
+/**
+ * Get the VLAN tag (when VLAN support is not present)
+ *
+ * @v netdev Network device
+ * @ret tag 0, indicating that device is not a VLAN device
+ */
+__weak unsigned int vlan_tag ( struct net_device *netdev __unused ) {
+ return 0;
+}
+
+/**
+ * Identify VLAN device (when VLAN support is not present)
+ *
+ * @v trunk Trunk network device
+ * @v tag VLAN tag
+ * @ret netdev VLAN device, if any
+ */
+__weak struct net_device * vlan_find ( struct net_device *trunk __unused,
+ unsigned int tag __unused ) {
+ return NULL;
+}
+
+/** Networking stack process */
+PERMANENT_PROCESS ( net_process, net_step );
+
+/**
+ * Discard some cached network device data
+ *
+ * @ret discarded Number of cached items discarded
+ */
+static unsigned int net_discard ( void ) {
+ struct net_device *netdev;
+ struct io_buffer *iobuf;
+ unsigned int discarded = 0;
+
+ /* Try to drop one deferred TX packet from each network device */
+ for_each_netdev ( netdev ) {
+ if ( ( iobuf = list_first_entry ( &netdev->tx_deferred,
+ struct io_buffer,
+ list ) ) != NULL ) {
+
+ /* Discard first deferred packet */
+ list_del ( &iobuf->list );
+ free_iob ( iobuf );
+
+ /* Report discard */
+ discarded++;
+ }
+ }
+
+ return discarded;
+}
+
+/** Network device cache discarder */
+struct cache_discarder net_discarder __cache_discarder ( CACHE_NORMAL ) = {
+ .discard = net_discard,
+};
+
+/**
+ * Find network device configurator
+ *
+ * @v name Name
+ * @ret configurator Network device configurator, or NULL
+ */
+struct net_device_configurator * find_netdev_configurator ( const char *name ) {
+ struct net_device_configurator *configurator;
+
+ for_each_table_entry ( configurator, NET_DEVICE_CONFIGURATORS ) {
+ if ( strcmp ( configurator->name, name ) == 0 )
+ return configurator;
+ }
+ return NULL;
+}
+
+/**
+ * Start network device configuration
+ *
+ * @v netdev Network device
+ * @v configurator Network device configurator
+ * @ret rc Return status code
+ */
+int netdev_configure ( struct net_device *netdev,
+ struct net_device_configurator *configurator ) {
+ struct net_device_configuration *config =
+ netdev_configuration ( netdev, configurator );
+ int rc;
+
+ /* Check applicability of configurator */
+ if ( ! netdev_configurator_applies ( netdev, configurator ) ) {
+ DBGC ( netdev, "NETDEV %s does not support configuration via "
+ "%s\n", netdev->name, configurator->name );
+ return -ENOTSUP;
+ }
+
+ /* Terminate any ongoing configuration */
+ intf_restart ( &config->job, -ECANCELED );
+
+ /* Mark configuration as being in progress */
+ config->rc = -EINPROGRESS_CONFIG;
+
+ DBGC ( netdev, "NETDEV %s starting configuration via %s\n",
+ netdev->name, configurator->name );
+
+ /* Start configuration */
+ if ( ( rc = configurator->start ( &config->job, netdev ) ) != 0 ) {
+ DBGC ( netdev, "NETDEV %s could not start configuration via "
+ "%s: %s\n", netdev->name, configurator->name,
+ strerror ( rc ) );
+ config->rc = rc;
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Start network device configuration via all supported configurators
+ *
+ * @v netdev Network device
+ * @ret rc Return status code
+ */
+int netdev_configure_all ( struct net_device *netdev ) {
+ struct net_device_configurator *configurator;
+ int rc;
+
+ /* Start configuration for each configurator */
+ for_each_table_entry ( configurator, NET_DEVICE_CONFIGURATORS ) {
+
+ /* Skip any inapplicable configurators */
+ if ( ! netdev_configurator_applies ( netdev, configurator ) )
+ continue;
+
+ /* Start configuration */
+ if ( ( rc = netdev_configure ( netdev, configurator ) ) != 0 )
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Check if network device has a configuration with a specified status code
+ *
+ * @v netdev Network device
+ * @v rc Status code
+ * @ret has_rc Network device has a configuration with this status code
+ */
+static int netdev_has_configuration_rc ( struct net_device *netdev, int rc ) {
+ unsigned int num_configs;
+ unsigned int i;
+
+ num_configs = table_num_entries ( NET_DEVICE_CONFIGURATORS );
+ for ( i = 0 ; i < num_configs ; i++ ) {
+ if ( netdev->configs[i].rc == rc )
+ return 1;
+ }
+ return 0;
+}
+
+/**
+ * Check if network device configuration is in progress
+ *
+ * @v netdev Network device
+ * @ret is_in_progress Network device configuration is in progress
+ */
+int netdev_configuration_in_progress ( struct net_device *netdev ) {
+
+ return netdev_has_configuration_rc ( netdev, -EINPROGRESS_CONFIG );
+}
+
+/**
+ * Check if network device has at least one successful configuration
+ *
+ * @v netdev Network device
+ * @v configurator Configurator
+ * @ret rc Return status code
+ */
+int netdev_configuration_ok ( struct net_device *netdev ) {
+
+ return netdev_has_configuration_rc ( netdev, 0 );
+}
diff --git a/qemu/roms/ipxe/src/net/nullnet.c b/qemu/roms/ipxe/src/net/nullnet.c
new file mode 100644
index 000000000..4ac50f64b
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/nullnet.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2006 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <errno.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/netdevice.h>
+
+/** @file
+ *
+ * Null network device
+ *
+ */
+
+static int null_open ( struct net_device *netdev __unused ) {
+ return -ENODEV;
+};
+
+static void null_close ( struct net_device *netdev __unused ) {
+ /* Do nothing */
+};
+
+static int null_transmit ( struct net_device *netdev __unused,
+ struct io_buffer *iobuf __unused ) {
+ return -ENODEV;
+};
+
+static void null_poll ( struct net_device *netdev __unused ) {
+ /* Do nothing */
+}
+
+static void null_irq ( struct net_device *netdev __unused,
+ int enable __unused ) {
+ /* Do nothing */
+}
+
+struct net_device_operations null_netdev_operations = {
+ .open = null_open,
+ .close = null_close,
+ .transmit = null_transmit,
+ .poll = null_poll,
+ .irq = null_irq,
+};
diff --git a/qemu/roms/ipxe/src/net/oncrpc/mount.c b/qemu/roms/ipxe/src/net/oncrpc/mount.c
new file mode 100644
index 000000000..8838a147c
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/oncrpc/mount.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2013 Marin Hannache <ipxe@mareo.fr>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+#include <libgen.h>
+#include <byteswap.h>
+#include <ipxe/time.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/open.h>
+#include <ipxe/features.h>
+#include <ipxe/oncrpc.h>
+#include <ipxe/oncrpc_iob.h>
+#include <ipxe/nfs.h>
+#include <ipxe/mount.h>
+
+/** @file
+ *
+ * NFS MOUNT protocol
+ *
+ */
+
+/** MNT procedure number */
+#define MOUNT_MNT 1
+/** UMNT procedure number */
+#define MOUNT_UMNT 3
+
+/**
+ * Send a MNT request
+ *
+ * @v intf Interface to send the request on
+ * @v session ONC RPC session
+ * @v mountpoinrt The path of the directory to mount.
+ * @ret rc Return status code
+ */
+int mount_mnt ( struct interface *intf, struct oncrpc_session *session,
+ const char *mountpoint ) {
+ struct oncrpc_field fields[] = {
+ ONCRPC_FIELD ( str, mountpoint ),
+ ONCRPC_FIELD_END,
+ };
+
+ return oncrpc_call ( intf, session, MOUNT_MNT, fields );
+}
+
+/**
+ * Send a UMNT request
+ *
+ * @v intf Interface to send the request on
+ * @v session ONC RPC session
+ * @v mountpoinrt The path of the directory to unmount.
+ * @ret rc Return status code
+ */
+int mount_umnt ( struct interface *intf, struct oncrpc_session *session,
+ const char *mountpoint ) {
+ struct oncrpc_field fields[] = {
+ ONCRPC_FIELD ( str, mountpoint ),
+ ONCRPC_FIELD_END,
+ };
+
+ return oncrpc_call ( intf, session, MOUNT_UMNT, fields );
+}
+
+/**
+ * Parse an MNT reply
+ *
+ * @v mnt_reply A structure where the data will be saved
+ * @v reply The ONC RPC reply to get data from
+ * @ret rc Return status code
+ */
+int mount_get_mnt_reply ( struct mount_mnt_reply *mnt_reply,
+ struct oncrpc_reply *reply ) {
+ if ( ! mnt_reply || ! reply )
+ return -EINVAL;
+
+ mnt_reply->status = oncrpc_iob_get_int ( reply->data );
+
+ switch ( mnt_reply->status )
+ {
+ case MNT3_OK:
+ break;
+ case MNT3ERR_NOENT:
+ return -ENOENT;
+ case MNT3ERR_IO:
+ return -EIO;
+ case MNT3ERR_ACCES:
+ return -EACCES;
+ case MNT3ERR_NOTDIR:
+ return -ENOTDIR;
+ case MNT3ERR_NAMETOOLONG:
+ return -ENAMETOOLONG;
+ default:
+ return -EPROTO;
+ }
+
+ nfs_iob_get_fh ( reply->data, &mnt_reply->fh );
+
+ return 0;
+}
diff --git a/qemu/roms/ipxe/src/net/oncrpc/nfs.c b/qemu/roms/ipxe/src/net/oncrpc/nfs.c
new file mode 100644
index 000000000..b6118f91a
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/oncrpc/nfs.c
@@ -0,0 +1,288 @@
+/*
+ * Copyright (C) 2013 Marin Hannache <ipxe@mareo.fr>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+#include <libgen.h>
+#include <byteswap.h>
+#include <ipxe/time.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/open.h>
+#include <ipxe/features.h>
+#include <ipxe/nfs.h>
+#include <ipxe/oncrpc.h>
+#include <ipxe/oncrpc_iob.h>
+#include <ipxe/portmap.h>
+#include <ipxe/mount.h>
+#include <ipxe/settings.h>
+
+/** @file
+ *
+ * Network File System protocol
+ *
+ */
+
+/** NFS LOOKUP procedure */
+#define NFS_LOOKUP 3
+/** NFS READLINK procedure */
+#define NFS_READLINK 5
+/** NFS READ procedure */
+#define NFS_READ 6
+
+/**
+ * Extract a file handle from the beginning of an I/O buffer
+ *
+ * @v io_buf I/O buffer
+ * @v fh File handle
+ * @ret size Size of the data read
+ */
+size_t nfs_iob_get_fh ( struct io_buffer *io_buf, struct nfs_fh *fh ) {
+ fh->size = oncrpc_iob_get_int ( io_buf );
+
+ if ( fh->size > 64 )
+ return sizeof ( uint32_t );
+
+ memcpy (fh->fh, io_buf->data, fh->size );
+ iob_pull ( io_buf, fh->size );
+
+ return fh->size + sizeof ( uint32_t );
+}
+
+/**
+ * Add a file handle to the end of an I/O buffer
+ *
+ * @v io_buf I/O buffer
+ * @v fh File handle
+ * @ret size Size of the data written
+ */
+size_t nfs_iob_add_fh ( struct io_buffer *io_buf, const struct nfs_fh *fh ) {
+ size_t s;
+
+ s = oncrpc_iob_add_int ( io_buf, fh->size );
+ memcpy ( iob_put ( io_buf, fh->size ), &fh->fh, fh->size );
+
+ return s + fh->size;
+}
+
+/**
+ * Send a LOOKUP request
+ *
+ * @v intf Interface to send the request on
+ * @v session ONC RPC session
+ * @v fh The file handle of the the directory
+ * @v filename The file name
+ * @ret rc Return status code
+ */
+int nfs_lookup ( struct interface *intf, struct oncrpc_session *session,
+ const struct nfs_fh *fh, const char *filename ) {
+ struct oncrpc_field fields[] = {
+ ONCRPC_SUBFIELD ( array, fh->size, &fh->fh ),
+ ONCRPC_FIELD ( str, filename ),
+ ONCRPC_FIELD_END,
+ };
+
+ return oncrpc_call ( intf, session, NFS_LOOKUP, fields );
+}
+
+/**
+ * Send a READLINK request
+ *
+ * @v intf Interface to send the request on
+ * @v session ONC RPC session
+ * @v fh The symlink file handle
+ * @ret rc Return status code
+ */
+int nfs_readlink ( struct interface *intf, struct oncrpc_session *session,
+ const struct nfs_fh *fh ) {
+ struct oncrpc_field fields[] = {
+ ONCRPC_SUBFIELD ( array, fh->size, &fh->fh ),
+ ONCRPC_FIELD_END,
+ };
+
+ return oncrpc_call ( intf, session, NFS_READLINK, fields );
+}
+
+/**
+ * Send a READ request
+ *
+ * @v intf Interface to send the request on
+ * @v session ONC RPC session
+ * @v fh The file handle
+ * @v offset Offset
+ * @v count Byte count
+ * @ret rc Return status code
+ */
+int nfs_read ( struct interface *intf, struct oncrpc_session *session,
+ const struct nfs_fh *fh, uint64_t offset, uint32_t count ) {
+ struct oncrpc_field fields[] = {
+ ONCRPC_SUBFIELD ( array, fh->size, &fh->fh ),
+ ONCRPC_FIELD ( int64, offset ),
+ ONCRPC_FIELD ( int32, count ),
+ ONCRPC_FIELD_END,
+ };
+
+ return oncrpc_call ( intf, session, NFS_READ, fields );
+}
+
+/**
+ * Parse a LOOKUP reply
+ *
+ * @v lookup_reply A structure where the data will be saved
+ * @v reply The ONC RPC reply to get data from
+ * @ret rc Return status code
+ */
+int nfs_get_lookup_reply ( struct nfs_lookup_reply *lookup_reply,
+ struct oncrpc_reply *reply ) {
+ if ( ! lookup_reply || ! reply )
+ return -EINVAL;
+
+ lookup_reply->status = oncrpc_iob_get_int ( reply->data );
+ switch ( lookup_reply->status )
+ {
+ case NFS3_OK:
+ break;
+ case NFS3ERR_PERM:
+ return -EPERM;
+ case NFS3ERR_NOENT:
+ return -ENOENT;
+ case NFS3ERR_IO:
+ return -EIO;
+ case NFS3ERR_ACCES:
+ return -EACCES;
+ case NFS3ERR_NOTDIR:
+ return -ENOTDIR;
+ case NFS3ERR_NAMETOOLONG:
+ return -ENAMETOOLONG;
+ case NFS3ERR_STALE:
+ return -ESTALE;
+ case NFS3ERR_BADHANDLE:
+ case NFS3ERR_SERVERFAULT:
+ default:
+ return -EPROTO;
+ }
+
+ nfs_iob_get_fh ( reply->data, &lookup_reply->fh );
+
+ if ( oncrpc_iob_get_int ( reply->data ) == 1 )
+ lookup_reply->ent_type = oncrpc_iob_get_int ( reply->data );
+
+ return 0;
+}
+/**
+ * Parse a READLINK reply
+ *
+ * @v readlink_reply A structure where the data will be saved
+ * @v reply The ONC RPC reply to get data from
+ * @ret rc Return status code
+ */
+int nfs_get_readlink_reply ( struct nfs_readlink_reply *readlink_reply,
+ struct oncrpc_reply *reply ) {
+ if ( ! readlink_reply || ! reply )
+ return -EINVAL;
+
+ readlink_reply->status = oncrpc_iob_get_int ( reply->data );
+ switch ( readlink_reply->status )
+ {
+ case NFS3_OK:
+ break;
+ case NFS3ERR_IO:
+ return -EIO;
+ case NFS3ERR_ACCES:
+ return -EACCES;
+ case NFS3ERR_INVAL:
+ return -EINVAL;
+ case NFS3ERR_NOTSUPP:
+ return -ENOTSUP;
+ case NFS3ERR_STALE:
+ return -ESTALE;
+ case NFS3ERR_BADHANDLE:
+ case NFS3ERR_SERVERFAULT:
+ default:
+ return -EPROTO;
+ }
+
+ if ( oncrpc_iob_get_int ( reply->data ) == 1 )
+ iob_pull ( reply->data, 5 * sizeof ( uint32_t ) +
+ 8 * sizeof ( uint64_t ) );
+
+ readlink_reply->path_len = oncrpc_iob_get_int ( reply->data );
+ readlink_reply->path = reply->data->data;
+
+ return 0;
+}
+
+/**
+ * Parse a READ reply
+ *
+ * @v read_reply A structure where the data will be saved
+ * @v reply The ONC RPC reply to get data from
+ * @ret rc Return status code
+ */
+int nfs_get_read_reply ( struct nfs_read_reply *read_reply,
+ struct oncrpc_reply *reply ) {
+ if ( ! read_reply || ! reply )
+ return -EINVAL;
+
+ read_reply->status = oncrpc_iob_get_int ( reply->data );
+ switch ( read_reply->status )
+ {
+ case NFS3_OK:
+ break;
+ case NFS3ERR_PERM:
+ return -EPERM;
+ case NFS3ERR_NOENT:
+ return -ENOENT;
+ case NFS3ERR_IO:
+ return -EIO;
+ case NFS3ERR_NXIO:
+ return -ENXIO;
+ case NFS3ERR_ACCES:
+ return -EACCES;
+ case NFS3ERR_INVAL:
+ return -EINVAL;
+ case NFS3ERR_STALE:
+ return -ESTALE;
+ case NFS3ERR_BADHANDLE:
+ case NFS3ERR_SERVERFAULT:
+ default:
+ return -EPROTO;
+ }
+
+ if ( oncrpc_iob_get_int ( reply->data ) == 1 )
+ {
+ iob_pull ( reply->data, 5 * sizeof ( uint32_t ) );
+ read_reply->filesize = oncrpc_iob_get_int64 ( reply->data );
+ iob_pull ( reply->data, 7 * sizeof ( uint64_t ) );
+ }
+
+ read_reply->count = oncrpc_iob_get_int ( reply->data );
+ read_reply->eof = oncrpc_iob_get_int ( reply->data );
+ read_reply->data_len = oncrpc_iob_get_int ( reply->data );
+ read_reply->data = reply->data->data;
+
+ if ( read_reply->count != read_reply->data_len )
+ return -EPROTO;
+
+ return 0;
+}
+
diff --git a/qemu/roms/ipxe/src/net/oncrpc/nfs_open.c b/qemu/roms/ipxe/src/net/oncrpc/nfs_open.c
new file mode 100644
index 000000000..c0dceb82f
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/oncrpc/nfs_open.c
@@ -0,0 +1,683 @@
+/*
+ * Copyright (C) 2013 Marin Hannache <ipxe@mareo.fr>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+#include <libgen.h>
+#include <byteswap.h>
+#include <ipxe/time.h>
+#include <ipxe/socket.h>
+#include <ipxe/tcpip.h>
+#include <ipxe/in.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/xfer.h>
+#include <ipxe/open.h>
+#include <ipxe/uri.h>
+#include <ipxe/features.h>
+#include <ipxe/nfs.h>
+#include <ipxe/nfs_open.h>
+#include <ipxe/oncrpc.h>
+#include <ipxe/oncrpc_iob.h>
+#include <ipxe/portmap.h>
+#include <ipxe/mount.h>
+#include <ipxe/nfs_uri.h>
+
+/** @file
+ *
+ * Network File System protocol
+ *
+ */
+
+FEATURE ( FEATURE_PROTOCOL, "NFS", DHCP_EB_FEATURE_NFS, 1 );
+
+#define NFS_RSIZE 100000
+
+enum nfs_pm_state {
+ NFS_PORTMAP_NONE = 0,
+ NFS_PORTMAP_MOUNTPORT,
+ NFS_PORTMAP_NFSPORT,
+ MFS_PORTMAP_CLOSED,
+};
+
+enum nfs_mount_state {
+ NFS_MOUNT_NONE = 0,
+ NFS_MOUNT_MNT,
+ NFS_MOUNT_UMNT,
+ NFS_MOUNT_CLOSED,
+};
+
+enum nfs_state {
+ NFS_NONE = 0,
+ NFS_LOOKUP,
+ NFS_LOOKUP_SENT,
+ NFS_READLINK,
+ NFS_READLINK_SENT,
+ NFS_READ,
+ NFS_READ_SENT,
+ NFS_CLOSED,
+};
+
+/**
+ * A NFS request
+ *
+ */
+struct nfs_request {
+ /** Reference counter */
+ struct refcnt refcnt;
+ /** Data transfer interface */
+ struct interface xfer;
+
+ struct interface pm_intf;
+ struct interface mount_intf;
+ struct interface nfs_intf;
+
+ enum nfs_pm_state pm_state;
+ enum nfs_mount_state mount_state;
+ enum nfs_state nfs_state;
+
+ struct oncrpc_session pm_session;
+ struct oncrpc_session mount_session;
+ struct oncrpc_session nfs_session;
+
+ struct oncrpc_cred_sys auth_sys;
+
+ char * hostname;
+ struct nfs_uri uri;
+
+ struct nfs_fh readlink_fh;
+ struct nfs_fh current_fh;
+ uint64_t file_offset;
+
+ size_t remaining;
+ int eof;
+};
+
+static void nfs_step ( struct nfs_request *nfs );
+
+/**
+ * Free NFS request
+ *
+ * @v refcnt Reference counter
+ */
+static void nfs_free ( struct refcnt *refcnt ) {
+ struct nfs_request *nfs;
+
+ nfs = container_of ( refcnt, struct nfs_request, refcnt );
+ DBGC ( nfs, "NFS_OPEN %p freed\n", nfs );
+
+ nfs_uri_free ( &nfs->uri );
+
+ free ( nfs->hostname );
+ free ( nfs->auth_sys.hostname );
+ free ( nfs );
+}
+
+/**
+ * Mark NFS operation as complete
+ *
+ * @v nfs NFS request
+ * @v rc Return status code
+ */
+static void nfs_done ( struct nfs_request *nfs, int rc ) {
+ if ( rc == 0 && nfs->nfs_state != NFS_CLOSED )
+ rc = -ECONNRESET;
+
+ DBGC ( nfs, "NFS_OPEN %p completed (%s)\n", nfs, strerror ( rc ) );
+
+ intf_shutdown ( &nfs->xfer, rc );
+ intf_shutdown ( &nfs->pm_intf, rc );
+ intf_shutdown ( &nfs->mount_intf, rc );
+ intf_shutdown ( &nfs->nfs_intf, rc );
+}
+
+static int nfs_connect ( struct interface *intf, uint16_t port,
+ const char *hostname ) {
+ struct sockaddr_tcpip peer;
+ struct sockaddr_tcpip local;
+
+ if ( ! intf || ! hostname || ! port )
+ return -EINVAL;
+
+ memset ( &peer, 0, sizeof ( peer ) );
+ memset ( &local, 0, sizeof ( local ) );
+ peer.st_port = htons ( port );
+
+ /* Use a local port < 1024 to avoid using the 'insecure' option in
+ * /etc/exports file. */
+ local.st_flags = TCPIP_BIND_PRIVILEGED;
+
+ return xfer_open_named_socket ( intf, SOCK_STREAM,
+ ( struct sockaddr * ) &peer, hostname,
+ ( struct sockaddr * ) &local );
+}
+
+static void nfs_pm_step ( struct nfs_request *nfs ) {
+ int rc;
+
+ if ( ! xfer_window ( &nfs->pm_intf ) )
+ return;
+
+ if ( nfs->pm_state == NFS_PORTMAP_NONE ) {
+ DBGC ( nfs, "NFS_OPEN %p GETPORT call (mount)\n", nfs );
+
+ rc = portmap_getport ( &nfs->pm_intf, &nfs->pm_session,
+ ONCRPC_MOUNT, MOUNT_VERS,
+ PORTMAP_PROTO_TCP );
+ if ( rc != 0 )
+ goto err;
+
+ nfs->pm_state++;
+ return;
+ }
+
+ if ( nfs->pm_state == NFS_PORTMAP_NFSPORT ) {
+ DBGC ( nfs, "NFS_OPEN %p GETPORT call (nfs)\n", nfs );
+
+ rc = portmap_getport ( &nfs->pm_intf, &nfs->pm_session,
+ ONCRPC_NFS, NFS_VERS,
+ PORTMAP_PROTO_TCP );
+ if ( rc != 0 )
+ goto err;
+
+ return;
+ }
+
+ return;
+err:
+ nfs_done ( nfs, rc );
+}
+
+static int nfs_pm_deliver ( struct nfs_request *nfs,
+ struct io_buffer *io_buf,
+ struct xfer_metadata *meta __unused ) {
+ int rc;
+ struct oncrpc_reply reply;
+ struct portmap_getport_reply getport_reply;
+
+ oncrpc_get_reply ( &nfs->pm_session, &reply, io_buf );
+ if ( reply.accept_state != 0 )
+ {
+ rc = -EPROTO;
+ goto err;
+ }
+
+ if ( nfs->pm_state == NFS_PORTMAP_MOUNTPORT ) {
+ DBGC ( nfs, "NFS_OPEN %p got GETPORT reply (mount)\n", nfs );
+
+ rc = portmap_get_getport_reply ( &getport_reply, &reply );
+ if ( rc != 0 )
+ goto err;
+
+ rc = nfs_connect ( &nfs->mount_intf, getport_reply.port,
+ nfs->hostname );
+ if ( rc != 0 )
+ goto err;
+
+ nfs->pm_state++;
+ nfs_pm_step ( nfs );
+
+ goto done;
+ }
+
+ if ( nfs->pm_state == NFS_PORTMAP_NFSPORT ) {
+ DBGC ( nfs, "NFS_OPEN %p got GETPORT reply (nfs)\n", nfs );
+
+ rc = portmap_get_getport_reply ( &getport_reply, &reply );
+ if ( rc != 0 )
+ goto err;
+
+ rc = nfs_connect ( &nfs->nfs_intf, getport_reply.port,
+ nfs->hostname );
+ if ( rc != 0 )
+ goto err;
+
+ intf_shutdown ( &nfs->pm_intf, 0 );
+ nfs->pm_state++;
+
+ goto done;
+ }
+
+ rc = -EPROTO;
+err:
+ nfs_done ( nfs, rc );
+done:
+ free_iob ( io_buf );
+ return 0;
+}
+
+static void nfs_mount_step ( struct nfs_request *nfs ) {
+ int rc;
+
+ if ( ! xfer_window ( &nfs->mount_intf ) )
+ return;
+
+ if ( nfs->mount_state == NFS_MOUNT_NONE ) {
+ DBGC ( nfs, "NFS_OPEN %p MNT call (%s)\n", nfs,
+ nfs_uri_mountpoint ( &nfs->uri ) );
+
+ rc = mount_mnt ( &nfs->mount_intf, &nfs->mount_session,
+ nfs_uri_mountpoint ( &nfs->uri ) );
+ if ( rc != 0 )
+ goto err;
+
+ nfs->mount_state++;
+ return;
+ }
+
+ if ( nfs->mount_state == NFS_MOUNT_UMNT ) {
+ DBGC ( nfs, "NFS_OPEN %p UMNT call\n", nfs );
+
+ rc = mount_umnt ( &nfs->mount_intf, &nfs->mount_session,
+ nfs_uri_mountpoint ( &nfs->uri ) );
+ if ( rc != 0 )
+ goto err;
+ }
+
+ return;
+err:
+ nfs_done ( nfs, rc );
+}
+
+static int nfs_mount_deliver ( struct nfs_request *nfs,
+ struct io_buffer *io_buf,
+ struct xfer_metadata *meta __unused ) {
+ int rc;
+ struct oncrpc_reply reply;
+ struct mount_mnt_reply mnt_reply;
+
+ oncrpc_get_reply ( &nfs->mount_session, &reply, io_buf );
+ if ( reply.accept_state != 0 )
+ {
+ rc = -EPROTO;
+ goto err;
+ }
+
+ if ( nfs->mount_state == NFS_MOUNT_MNT ) {
+ DBGC ( nfs, "NFS_OPEN %p got MNT reply\n", nfs );
+ rc = mount_get_mnt_reply ( &mnt_reply, &reply );
+ if ( rc != 0 ) {
+ switch ( mnt_reply.status ) {
+ case MNT3ERR_NOTDIR:
+ case MNT3ERR_NOENT:
+ case MNT3ERR_ACCES:
+ break;
+
+ default:
+ goto err;
+ }
+
+ if ( ! strcmp ( nfs_uri_mountpoint ( &nfs->uri ),
+ "/" ) )
+ goto err;
+
+ if ( ( rc = nfs_uri_next_mountpoint ( &nfs->uri ) ) )
+ goto err;
+
+ DBGC ( nfs, "NFS_OPEN %p MNT failed retrying with " \
+ "%s\n", nfs, nfs_uri_mountpoint ( &nfs->uri ) );
+
+ nfs->mount_state--;
+ nfs_mount_step ( nfs );
+
+ goto done;
+ }
+
+ nfs->current_fh = mnt_reply.fh;
+ nfs->nfs_state = NFS_LOOKUP;
+ nfs_step ( nfs );
+
+ goto done;
+ }
+
+ if ( nfs->mount_state == NFS_MOUNT_UMNT ) {
+ DBGC ( nfs, "NFS_OPEN %p got UMNT reply\n", nfs );
+ nfs_done ( nfs, 0 );
+
+ goto done;
+ }
+
+ rc = -EPROTO;
+err:
+ nfs_done ( nfs, rc );
+done:
+ free_iob ( io_buf );
+ return 0;
+}
+
+static void nfs_step ( struct nfs_request *nfs ) {
+ int rc;
+ char *path_component;
+
+ if ( ! xfer_window ( &nfs->nfs_intf ) )
+ return;
+
+ if ( nfs->nfs_state == NFS_LOOKUP ) {
+ path_component = nfs_uri_next_path_component ( &nfs->uri );
+
+ DBGC ( nfs, "NFS_OPEN %p LOOKUP call (%s)\n", nfs,
+ path_component );
+
+ rc = nfs_lookup ( &nfs->nfs_intf, &nfs->nfs_session,
+ &nfs->current_fh, path_component );
+ if ( rc != 0 )
+ goto err;
+
+ nfs->nfs_state++;
+ return;
+ }
+
+
+ if ( nfs->nfs_state == NFS_READLINK ) {
+ DBGC ( nfs, "NFS_OPEN %p READLINK call\n", nfs );
+
+ rc = nfs_readlink ( &nfs->nfs_intf, &nfs->nfs_session,
+ &nfs->readlink_fh );
+ if ( rc != 0 )
+ goto err;
+
+ nfs->nfs_state++;
+ return;
+ }
+
+ if ( nfs->nfs_state == NFS_READ ) {
+ DBGC ( nfs, "NFS_OPEN %p READ call\n", nfs );
+
+ rc = nfs_read ( &nfs->nfs_intf, &nfs->nfs_session,
+ &nfs->current_fh, nfs->file_offset,
+ NFS_RSIZE );
+ if ( rc != 0 )
+ goto err;
+
+ nfs->nfs_state++;
+ return;
+ }
+
+ return;
+err:
+ nfs_done ( nfs, rc );
+}
+
+static int nfs_deliver ( struct nfs_request *nfs,
+ struct io_buffer *io_buf,
+ struct xfer_metadata *meta __unused ) {
+ int rc;
+ struct oncrpc_reply reply;
+
+ if ( nfs->remaining == 0 ) {
+ oncrpc_get_reply ( &nfs->nfs_session, &reply, io_buf );
+ if ( reply.accept_state != 0 ) {
+ rc = -EPROTO;
+ goto err;
+ }
+ }
+
+ if ( nfs->nfs_state == NFS_LOOKUP_SENT ) {
+ struct nfs_lookup_reply lookup_reply;
+
+ DBGC ( nfs, "NFS_OPEN %p got LOOKUP reply\n", nfs );
+
+ rc = nfs_get_lookup_reply ( &lookup_reply, &reply );
+ if ( rc != 0 )
+ goto err;
+
+ if ( lookup_reply.ent_type == NFS_ATTR_SYMLINK ) {
+ nfs->readlink_fh = lookup_reply.fh;
+ nfs->nfs_state = NFS_READLINK;
+ } else {
+ nfs->current_fh = lookup_reply.fh;
+
+ if ( nfs->uri.lookup_pos[0] == '\0' )
+ nfs->nfs_state = NFS_READ;
+ else
+ nfs->nfs_state--;
+ }
+
+ nfs_step ( nfs );
+ goto done;
+ }
+
+ if ( nfs->nfs_state == NFS_READLINK_SENT ) {
+ char *path;
+ struct nfs_readlink_reply readlink_reply;
+
+ DBGC ( nfs, "NFS_OPEN %p got READLINK reply\n", nfs );
+
+ rc = nfs_get_readlink_reply ( &readlink_reply, &reply );
+ if ( rc != 0 )
+ goto err;
+
+ if ( readlink_reply.path_len == 0 )
+ {
+ rc = -EINVAL;
+ goto err;
+ }
+
+ if ( ! ( path = strndup ( readlink_reply.path,
+ readlink_reply.path_len ) ) )
+ {
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ nfs_uri_symlink ( &nfs->uri, path );
+ free ( path );
+
+ DBGC ( nfs, "NFS_OPEN %p new path: %s\n", nfs,
+ nfs->uri.path );
+
+ nfs->nfs_state = NFS_LOOKUP;
+ nfs_step ( nfs );
+ goto done;
+ }
+
+ if ( nfs->nfs_state == NFS_READ_SENT ) {
+ if ( nfs->remaining == 0 ) {
+ DBGC ( nfs, "NFS_OPEN %p got READ reply\n", nfs );
+
+ struct nfs_read_reply read_reply;
+
+ rc = nfs_get_read_reply ( &read_reply, &reply );
+ if ( rc != 0 )
+ goto err;
+
+ if ( nfs->file_offset == 0 ) {
+ DBGC2 ( nfs, "NFS_OPEN %p size: %llu bytes\n",
+ nfs, read_reply.filesize );
+
+ xfer_seek ( &nfs->xfer, read_reply.filesize );
+ xfer_seek ( &nfs->xfer, 0 );
+ }
+
+ nfs->file_offset += read_reply.count;
+ nfs->remaining = read_reply.count;
+ nfs->eof = read_reply.eof;
+ }
+
+ size_t len = iob_len ( io_buf );
+ if ( len > nfs->remaining )
+ iob_unput ( io_buf, len - nfs->remaining );
+
+ nfs->remaining -= iob_len ( io_buf );
+
+ DBGC ( nfs, "NFS_OPEN %p got %zd bytes\n", nfs,
+ iob_len ( io_buf ) );
+
+ rc = xfer_deliver_iob ( &nfs->xfer, iob_disown ( io_buf ) );
+ if ( rc != 0 )
+ goto err;
+
+ if ( nfs->remaining == 0 ) {
+ if ( ! nfs->eof ) {
+ nfs->nfs_state--;
+ nfs_step ( nfs );
+ } else {
+ intf_shutdown ( &nfs->nfs_intf, 0 );
+ nfs->nfs_state++;
+ nfs->mount_state++;
+ nfs_mount_step ( nfs );
+ }
+ }
+
+ return 0;
+ }
+
+ rc = -EPROTO;
+err:
+ nfs_done ( nfs, rc );
+done:
+ free_iob ( io_buf );
+ return 0;
+}
+
+/*****************************************************************************
+ * Interfaces
+ *
+ */
+
+static struct interface_operation nfs_xfer_operations[] = {
+ INTF_OP ( intf_close, struct nfs_request *, nfs_done ),
+};
+
+/** NFS data transfer interface descriptor */
+static struct interface_descriptor nfs_xfer_desc =
+ INTF_DESC ( struct nfs_request, xfer, nfs_xfer_operations );
+
+static struct interface_operation nfs_pm_operations[] = {
+ INTF_OP ( intf_close, struct nfs_request *, nfs_done ),
+ INTF_OP ( xfer_deliver, struct nfs_request *, nfs_pm_deliver ),
+ INTF_OP ( xfer_window_changed, struct nfs_request *, nfs_pm_step ),
+};
+
+static struct interface_descriptor nfs_pm_desc =
+ INTF_DESC ( struct nfs_request, pm_intf, nfs_pm_operations );
+
+static struct interface_operation nfs_mount_operations[] = {
+ INTF_OP ( intf_close, struct nfs_request *, nfs_done ),
+ INTF_OP ( xfer_deliver, struct nfs_request *, nfs_mount_deliver ),
+ INTF_OP ( xfer_window_changed, struct nfs_request *, nfs_mount_step ),
+};
+
+static struct interface_descriptor nfs_mount_desc =
+ INTF_DESC ( struct nfs_request, mount_intf, nfs_mount_operations );
+
+static struct interface_operation nfs_operations[] = {
+ INTF_OP ( intf_close, struct nfs_request *, nfs_done ),
+ INTF_OP ( xfer_deliver, struct nfs_request *, nfs_deliver ),
+ INTF_OP ( xfer_window_changed, struct nfs_request *, nfs_step ),
+};
+
+static struct interface_descriptor nfs_desc =
+ INTF_DESC_PASSTHRU ( struct nfs_request, nfs_intf, nfs_operations,
+ xfer );
+
+/*****************************************************************************
+ *
+ * URI opener
+ *
+ */
+
+static int nfs_parse_uri ( struct nfs_request *nfs, const struct uri *uri ) {
+ int rc;
+
+ if ( ! uri || ! uri->host || ! uri->path )
+ return -EINVAL;
+
+ if ( ( rc = nfs_uri_init ( &nfs->uri, uri ) ) != 0 )
+ return rc;
+
+ if ( ! ( nfs->hostname = strdup ( uri->host ) ) ) {
+ rc = -ENOMEM;
+ goto err_hostname;
+ }
+
+ DBGC ( nfs, "NFS_OPEN %p URI parsed: (mountpoint=%s, path=%s)\n",
+ nfs, nfs_uri_mountpoint ( &nfs->uri), nfs->uri.path );
+
+ return 0;
+
+err_hostname:
+ nfs_uri_free ( &nfs->uri );
+ return rc;
+}
+
+/**
+ * Initiate a NFS connection
+ *
+ * @v xfer Data transfer interface
+ * @v uri Uniform Resource Identifier
+ * @ret rc Return status code
+ */
+static int nfs_open ( struct interface *xfer, struct uri *uri ) {
+ int rc;
+ struct nfs_request *nfs;
+
+ nfs = zalloc ( sizeof ( *nfs ) );
+ if ( ! nfs )
+ return -ENOMEM;
+
+ rc = nfs_parse_uri( nfs, uri );
+ if ( rc != 0 )
+ goto err_uri;
+
+ rc = oncrpc_init_cred_sys ( &nfs->auth_sys );
+ if ( rc != 0 )
+ goto err_cred;
+
+ ref_init ( &nfs->refcnt, nfs_free );
+ intf_init ( &nfs->xfer, &nfs_xfer_desc, &nfs->refcnt );
+ intf_init ( &nfs->pm_intf, &nfs_pm_desc, &nfs->refcnt );
+ intf_init ( &nfs->mount_intf, &nfs_mount_desc, &nfs->refcnt );
+ intf_init ( &nfs->nfs_intf, &nfs_desc, &nfs->refcnt );
+
+ portmap_init_session ( &nfs->pm_session, &nfs->auth_sys.credential );
+ mount_init_session ( &nfs->mount_session, &nfs->auth_sys.credential );
+ nfs_init_session ( &nfs->nfs_session, &nfs->auth_sys.credential );
+
+ DBGC ( nfs, "NFS_OPEN %p connecting to port mapper (%s:%d)...\n", nfs,
+ nfs->hostname, PORTMAP_PORT );
+
+ rc = nfs_connect ( &nfs->pm_intf, PORTMAP_PORT, nfs->hostname );
+ if ( rc != 0 )
+ goto err_connect;
+
+ /* Attach to parent interface, mortalise self, and return */
+ intf_plug_plug ( &nfs->xfer, xfer );
+ ref_put ( &nfs->refcnt );
+
+ return 0;
+
+err_connect:
+ free ( nfs->auth_sys.hostname );
+err_cred:
+ nfs_uri_free ( &nfs->uri );
+ free ( nfs->hostname );
+err_uri:
+ free ( nfs );
+ return rc;
+}
+
+/** NFS URI opener */
+struct uri_opener nfs_uri_opener __uri_opener = {
+ .scheme = "nfs",
+ .open = nfs_open,
+};
diff --git a/qemu/roms/ipxe/src/net/oncrpc/nfs_uri.c b/qemu/roms/ipxe/src/net/oncrpc/nfs_uri.c
new file mode 100644
index 000000000..c4c3f21e9
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/oncrpc/nfs_uri.c
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2014 Marin Hannache <ipxe@mareo.fr>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <libgen.h>
+#include <ipxe/nfs_uri.h>
+
+/** @file
+ *
+ * Network File System protocol URI handling functions
+ *
+ */
+
+int nfs_uri_init ( struct nfs_uri *nfs_uri, const struct uri *uri ) {
+ if ( ! ( nfs_uri->mountpoint = strdup ( uri->path ) ) )
+ return -ENOMEM;
+
+ nfs_uri->filename = basename ( nfs_uri->mountpoint );
+ if ( strchr ( uri->path, '/' ) != NULL )
+ nfs_uri->mountpoint = dirname ( nfs_uri->mountpoint );
+
+ if ( nfs_uri->filename[0] == '\0' ) {
+ free ( nfs_uri->mountpoint );
+ return -EINVAL;
+ }
+
+ if ( ! ( nfs_uri->path = strdup ( nfs_uri->filename ) ) ) {
+ free ( nfs_uri->mountpoint );
+ return -ENOMEM;
+ }
+ nfs_uri->lookup_pos = nfs_uri->path;
+
+ return 0;
+}
+
+char *nfs_uri_mountpoint ( const struct nfs_uri *uri ) {
+ if ( uri->mountpoint + 1 == uri->filename ||
+ uri->mountpoint == uri->filename )
+ return "/";
+
+ return uri->mountpoint;
+}
+
+int nfs_uri_next_mountpoint ( struct nfs_uri *uri ) {
+ char *sep;
+
+ if ( uri->mountpoint + 1 == uri->filename ||
+ uri->mountpoint == uri->filename )
+ return -ENOENT;
+
+ sep = strrchr ( uri->mountpoint, '/' );
+ uri->filename[-1] = '/';
+ uri->filename = sep + 1;
+ *sep = '\0';
+
+ free ( uri->path );
+ if ( ! ( uri->path = strdup ( uri->filename ) ) ) {
+ uri->path = NULL;
+ return -ENOMEM;
+ }
+ uri->lookup_pos = uri->path;
+
+ return 0;
+}
+
+int nfs_uri_symlink ( struct nfs_uri *uri, const char *symlink ) {
+ size_t len;
+ char *new_path;
+
+ if ( ! uri->path )
+ return -EINVAL;
+
+ if ( *symlink == '/' )
+ {
+ if ( strncmp ( symlink, uri->mountpoint,
+ strlen ( uri->mountpoint ) ) != 0 )
+ return -EINVAL;
+
+ len = strlen ( uri->lookup_pos ) + strlen ( symlink ) - \
+ strlen ( uri->mountpoint );
+ if ( ! ( new_path = malloc ( len * sizeof ( char ) ) ) )
+ return -ENOMEM;
+
+ strcpy ( new_path, symlink + strlen ( uri->mountpoint ) );
+ strcpy ( new_path + strlen ( new_path ), uri->lookup_pos );
+
+ } else {
+ len = strlen ( uri->lookup_pos ) + strlen ( symlink );
+ if ( ! ( new_path = malloc ( len * sizeof ( char ) ) ) )
+ return -ENOMEM;
+
+
+ strcpy ( new_path, symlink );
+ strcpy ( new_path + strlen ( new_path ), uri->lookup_pos );
+ }
+
+ free ( uri->path );
+ uri->lookup_pos = uri->path = new_path;
+
+ return 0;
+}
+
+char *nfs_uri_next_path_component ( struct nfs_uri *uri ) {
+ char *sep;
+ char *start;
+
+ if ( ! uri->path )
+ return NULL;
+
+ for ( sep = uri->lookup_pos ; *sep != '\0' && *sep != '/'; sep++ )
+ ;
+
+ start = uri->lookup_pos;
+ uri->lookup_pos = sep;
+ if ( *sep != '\0' ) {
+ uri->lookup_pos++;
+ *sep = '\0';
+ if ( *start == '\0' )
+ return nfs_uri_next_path_component ( uri );
+ }
+
+ return start;
+}
+
+void nfs_uri_free ( struct nfs_uri *uri ) {
+ free ( uri->mountpoint );
+ free ( uri->path );
+ uri->mountpoint = NULL;
+ uri->path = NULL;
+}
diff --git a/qemu/roms/ipxe/src/net/oncrpc/oncrpc_iob.c b/qemu/roms/ipxe/src/net/oncrpc/oncrpc_iob.c
new file mode 100644
index 000000000..be51805e7
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/oncrpc/oncrpc_iob.c
@@ -0,0 +1,200 @@
+/*
+ * Copyright (C) 2013 Marin Hannache <ipxe@mareo.fr>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+#include <byteswap.h>
+#include <ipxe/socket.h>
+#include <ipxe/tcpip.h>
+#include <ipxe/in.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/xfer.h>
+#include <ipxe/open.h>
+#include <ipxe/uri.h>
+#include <ipxe/features.h>
+#include <ipxe/oncrpc.h>
+#include <ipxe/oncrpc_iob.h>
+
+/** @file
+ *
+ * SUN ONC RPC protocol
+ *
+ */
+
+size_t oncrpc_iob_add_fields ( struct io_buffer *io_buf,
+ const struct oncrpc_field fields[] ) {
+ size_t i;
+ size_t s = 0;
+
+ struct oncrpc_field f;
+
+ if ( ! io_buf )
+ return 0;
+
+ for ( i = 0; fields[i].type != oncrpc_none; i++ ) {
+ f = fields[i];
+ switch ( f.type ) {
+ case oncrpc_int32:
+ s += oncrpc_iob_add_int ( io_buf, f.value.int32 );
+ break;
+
+ case oncrpc_int64:
+ s += oncrpc_iob_add_int64 ( io_buf, f.value.int64 );
+ break;
+
+ case oncrpc_str:
+ s += oncrpc_iob_add_string ( io_buf, f.value.str );
+ break;
+
+ case oncrpc_array:
+ s += oncrpc_iob_add_array ( io_buf,
+ f.value.array.length,
+ f.value.array.ptr );
+ break;
+
+ case oncrpc_intarray:
+ s += oncrpc_iob_add_intarray ( io_buf,
+ f.value.intarray.length,
+ f.value.intarray.ptr );
+ break;
+
+ case oncrpc_cred:
+ s += oncrpc_iob_add_cred ( io_buf, f.value.cred);
+ break;
+
+ default:
+ return s;
+ }
+ }
+
+ return s;
+}
+
+/**
+ * Add an array of bytes to the end of an I/O buffer
+ *
+ * @v io_buf I/O buffer
+ * @v val String
+ * @ret size Size of the data written
+ *
+ * In the ONC RPC protocol, every data is four byte paded, we add padding when
+ * necessary by using oncrpc_align()
+ */
+size_t oncrpc_iob_add_array ( struct io_buffer *io_buf, size_t length,
+ const void *data ) {
+ size_t padding = oncrpc_align ( length ) - length;
+
+ oncrpc_iob_add_int ( io_buf, length );
+ memcpy ( iob_put ( io_buf, length ), data, length );
+ memset ( iob_put ( io_buf, padding ), 0, padding );
+
+ return length + padding + sizeof ( uint32_t );
+}
+
+/**
+ * Add an int array to the end of an I/O buffer
+ *
+ * @v io_buf I/O buffer
+ * @v length Length od the array
+ * @v val Int array
+ * @ret size Size of the data written
+ */
+size_t oncrpc_iob_add_intarray ( struct io_buffer *io_buf, size_t length,
+ const uint32_t *array ) {
+ size_t i;
+
+ oncrpc_iob_add_int ( io_buf, length );
+
+ for ( i = 0; i < length; ++i )
+ oncrpc_iob_add_int ( io_buf, array[i] );
+
+ return ( ( length + 1 ) * sizeof ( uint32_t ) );
+}
+
+/**
+ * Add credential information to the end of an I/O buffer
+ *
+ * @v io_buf I/O buffer
+ * @v cred Credential information
+ * @ret size Size of the data written
+ */
+size_t oncrpc_iob_add_cred ( struct io_buffer *io_buf,
+ const struct oncrpc_cred *cred ) {
+ struct oncrpc_cred_sys *syscred;
+ size_t s;
+
+ struct oncrpc_field credfields[] = {
+ ONCRPC_FIELD ( int32, cred->flavor ),
+ ONCRPC_FIELD ( int32, cred->length ),
+ ONCRPC_FIELD_END,
+ };
+
+ if ( ! io_buf || ! cred )
+ return 0;
+
+ s = oncrpc_iob_add_fields ( io_buf, credfields);
+
+ switch ( cred->flavor ) {
+ case ONCRPC_AUTH_NONE:
+ break;
+
+ case ONCRPC_AUTH_SYS:
+ syscred = container_of ( cred, struct oncrpc_cred_sys,
+ credential );
+
+ struct oncrpc_field syscredfields[] = {
+ ONCRPC_FIELD ( int32, syscred->stamp ),
+ ONCRPC_FIELD ( str, syscred->hostname ),
+ ONCRPC_FIELD ( int32, syscred->uid ),
+ ONCRPC_FIELD ( int32, syscred->gid ),
+ ONCRPC_SUBFIELD ( intarray, syscred->aux_gid_len,
+ syscred->aux_gid ),
+ ONCRPC_FIELD_END,
+ };
+
+ s += oncrpc_iob_add_fields ( io_buf, syscredfields );
+ break;
+ }
+
+ return s;
+}
+
+/**
+ * Get credential information from the beginning of an I/O buffer
+ *
+ * @v io_buf I/O buffer
+ * @v cred Struct where the information will be saved
+ * @ret size Size of the data read
+ */
+size_t oncrpc_iob_get_cred ( struct io_buffer *io_buf,
+ struct oncrpc_cred *cred ) {
+ if ( cred == NULL )
+ return * ( uint32_t * ) io_buf->data;
+
+ cred->flavor = oncrpc_iob_get_int ( io_buf );
+ cred->length = oncrpc_iob_get_int ( io_buf );
+
+ iob_pull ( io_buf, cred->length );
+
+ return ( 2 * sizeof ( uint32_t ) + cred->length );
+}
diff --git a/qemu/roms/ipxe/src/net/oncrpc/portmap.c b/qemu/roms/ipxe/src/net/oncrpc/portmap.c
new file mode 100644
index 000000000..df62221dc
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/oncrpc/portmap.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2013 Marin Hannache <ipxe@mareo.fr>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+#include <byteswap.h>
+#include <ipxe/socket.h>
+#include <ipxe/tcpip.h>
+#include <ipxe/in.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/xfer.h>
+#include <ipxe/open.h>
+#include <ipxe/uri.h>
+#include <ipxe/features.h>
+#include <ipxe/timer.h>
+#include <ipxe/oncrpc.h>
+#include <ipxe/oncrpc_iob.h>
+#include <ipxe/portmap.h>
+
+/** @file
+ *
+ * PORTMAPPER protocol.
+ *
+ */
+
+/** PORTMAP GETPORT procedure. */
+#define PORTMAP_GETPORT 3
+
+/**
+ * Send a GETPORT request
+ *
+ * @v intf Interface to send the request on
+ * @v session ONC RPC session
+ * @v prog ONC RPC program number
+ * @v vers ONC RPC rogram version number
+ * @v proto Protocol (TCP or UDP)
+ * @ret rc Return status code
+ */
+int portmap_getport ( struct interface *intf, struct oncrpc_session *session,
+ uint32_t prog, uint32_t vers, uint32_t proto ) {
+ struct oncrpc_field fields[] = {
+ ONCRPC_FIELD ( int32, prog ),
+ ONCRPC_FIELD ( int32, vers ),
+ ONCRPC_FIELD ( int32, proto ),
+ ONCRPC_FIELD ( int32, 0 ), /* The port field is only meaningful
+ in GETPORT reply */
+ ONCRPC_FIELD_END,
+ };
+
+ return oncrpc_call ( intf, session, PORTMAP_GETPORT, fields );
+}
+
+/**
+ * Parse a GETPORT reply
+ *
+ * @v getport_reply A structure where the data will be saved
+ * @v reply The ONC RPC reply to get data from
+ * @ret rc Return status code
+ */
+int portmap_get_getport_reply ( struct portmap_getport_reply *getport_reply,
+ struct oncrpc_reply *reply ) {
+ if ( ! getport_reply || ! reply )
+ return -EINVAL;
+
+ getport_reply->port = oncrpc_iob_get_int ( reply->data );
+ if ( getport_reply == 0 || getport_reply->port >= 65536 )
+ return -EINVAL;
+
+ return 0;
+}
diff --git a/qemu/roms/ipxe/src/net/ping.c b/qemu/roms/ipxe/src/net/ping.c
new file mode 100644
index 000000000..d9da87ade
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/ping.c
@@ -0,0 +1,273 @@
+/*
+ * Copyright (C) 2013 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <byteswap.h>
+#include <ipxe/refcnt.h>
+#include <ipxe/list.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/tcpip.h>
+#include <ipxe/icmp.h>
+#include <ipxe/interface.h>
+#include <ipxe/xfer.h>
+#include <ipxe/open.h>
+#include <ipxe/netdevice.h>
+#include <ipxe/ping.h>
+
+/** @file
+ *
+ * ICMP ping protocol
+ *
+ */
+
+/**
+ * A ping connection
+ *
+ */
+struct ping_connection {
+ /** Reference counter */
+ struct refcnt refcnt;
+ /** List of ping connections */
+ struct list_head list;
+
+ /** Remote socket address */
+ struct sockaddr_tcpip peer;
+ /** Local port number */
+ uint16_t port;
+
+ /** Data transfer interface */
+ struct interface xfer;
+};
+
+/** List of registered ping connections */
+static LIST_HEAD ( ping_conns );
+
+/**
+ * Identify ping connection by local port number
+ *
+ * @v port Local port number
+ * @ret ping Ping connection, or NULL
+ */
+static struct ping_connection * ping_demux ( unsigned int port ) {
+ struct ping_connection *ping;
+
+ list_for_each_entry ( ping, &ping_conns, list ) {
+ if ( ping->port == port )
+ return ping;
+ }
+ return NULL;
+}
+
+/**
+ * Check if local port number is available
+ *
+ * @v port Local port number
+ * @ret port Local port number, or negative error
+ */
+static int ping_port_available ( int port ) {
+
+ return ( ping_demux ( port ) ? -EADDRINUSE : port );
+}
+
+/**
+ * Process ICMP ping reply
+ *
+ * @v iobuf I/O buffer
+ * @v st_src Source address
+ * @ret rc Return status code
+ */
+int ping_rx ( struct io_buffer *iobuf, struct sockaddr_tcpip *st_src ) {
+ struct icmp_echo *echo = iobuf->data;
+ struct ping_connection *ping;
+ struct xfer_metadata meta;
+ int rc;
+
+ /* Sanity check: should already have been checked by ICMP layer */
+ assert ( iob_len ( iobuf ) >= sizeof ( *echo ) );
+
+ /* Identify connection */
+ ping = ping_demux ( ntohs ( echo->ident ) );
+ DBGC ( ping, "PING %p reply id %#04x seq %#04x\n",
+ ping, ntohs ( echo->ident ), ntohs ( echo->sequence ) );
+ if ( ! ping ) {
+ rc = -ENOTCONN;
+ goto discard;
+ }
+
+ /* Strip header, construct metadata, and pass data to upper layer */
+ iob_pull ( iobuf, sizeof ( *echo ) );
+ memset ( &meta, 0, sizeof ( meta ) );
+ meta.src = ( ( struct sockaddr * ) st_src );
+ meta.flags = XFER_FL_ABS_OFFSET;
+ meta.offset = ntohs ( echo->sequence );
+ return xfer_deliver ( &ping->xfer, iob_disown ( iobuf ), &meta );
+
+ discard:
+ free_iob ( iobuf );
+ return rc;
+}
+
+/**
+ * Allocate I/O buffer for ping
+ *
+ * @v ping Ping connection
+ * @v len Payload size
+ * @ret iobuf I/O buffer, or NULL
+ */
+static struct io_buffer *
+ping_alloc_iob ( struct ping_connection *ping __unused, size_t len ) {
+ size_t header_len;
+ struct io_buffer *iobuf;
+
+ header_len = ( MAX_LL_NET_HEADER_LEN + sizeof ( struct icmp_echo ) );
+ iobuf = alloc_iob ( header_len + len );
+ if ( iobuf )
+ iob_reserve ( iobuf, header_len );
+ return iobuf;
+}
+
+/**
+ * Deliver datagram as I/O buffer
+ *
+ * @v ping Ping connection
+ * @v iobuf I/O buffer
+ * @v meta Data transfer metadata
+ * @ret rc Return status code
+ */
+static int ping_deliver ( struct ping_connection *ping, struct io_buffer *iobuf,
+ struct xfer_metadata *meta ) {
+ struct icmp_echo *echo = iob_push ( iobuf, sizeof ( *echo ) );
+ int rc;
+
+ /* Construct header */
+ memset ( echo, 0, sizeof ( *echo ) );
+ echo->ident = htons ( ping->port );
+ echo->sequence = htons ( meta->offset );
+
+ /* Transmit echo request */
+ if ( ( rc = icmp_tx_echo_request ( iob_disown ( iobuf ),
+ &ping->peer ) ) != 0 ) {
+ DBGC ( ping, "PING %p could not transmit: %s\n",
+ ping, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Close ping connection
+ *
+ * @v ping Ping connection
+ * @v rc Reason for close
+ */
+static void ping_close ( struct ping_connection *ping, int rc ) {
+
+ /* Close data transfer interface */
+ intf_shutdown ( &ping->xfer, rc );
+
+ /* Remove from list of connections and drop list's reference */
+ list_del ( &ping->list );
+ ref_put ( &ping->refcnt );
+
+ DBGC ( ping, "PING %p closed\n", ping );
+}
+
+/** Ping data transfer interface operations */
+static struct interface_operation ping_xfer_operations[] = {
+ INTF_OP ( xfer_deliver, struct ping_connection *, ping_deliver ),
+ INTF_OP ( xfer_alloc_iob, struct ping_connection *, ping_alloc_iob ),
+ INTF_OP ( intf_close, struct ping_connection *, ping_close ),
+};
+
+/** Ping data transfer interface descriptor */
+static struct interface_descriptor ping_xfer_desc =
+ INTF_DESC ( struct ping_connection, xfer, ping_xfer_operations );
+
+/**
+ * Open a ping connection
+ *
+ * @v xfer Data transfer interface
+ * @v peer Peer socket address
+ * @v local Local socket address, or NULL
+ * @ret rc Return status code
+ */
+static int ping_open ( struct interface *xfer, struct sockaddr *peer,
+ struct sockaddr *local ) {
+ struct sockaddr_tcpip *st_peer = ( struct sockaddr_tcpip * ) peer;
+ struct sockaddr_tcpip *st_local = ( struct sockaddr_tcpip * ) local;
+ struct ping_connection *ping;
+ int port;
+ int rc;
+
+ /* Allocate and initialise structure */
+ ping = zalloc ( sizeof ( *ping ) );
+ if ( ! ping ) {
+ rc = -ENOMEM;
+ goto err_alloc;
+ }
+ DBGC ( ping, "PING %p allocated\n", ping );
+ ref_init ( &ping->refcnt, NULL );
+ intf_init ( &ping->xfer, &ping_xfer_desc, &ping->refcnt );
+ memcpy ( &ping->peer, st_peer, sizeof ( ping->peer ) );
+
+ /* Bind to local port */
+ port = tcpip_bind ( st_local, ping_port_available );
+ if ( port < 0 ) {
+ rc = port;
+ DBGC ( ping, "PING %p could not bind: %s\n",
+ ping, strerror ( rc ) );
+ goto err_bind;
+ }
+ ping->port = port;
+ DBGC ( ping, "PING %p bound to id %#04x\n", ping, port );
+
+ /* Attach parent interface, transfer reference to connection
+ * list, and return
+ */
+ intf_plug_plug ( &ping->xfer, xfer );
+ list_add ( &ping->list, &ping_conns );
+ return 0;
+
+ err_bind:
+ ref_put ( &ping->refcnt );
+ err_alloc:
+ return rc;
+}
+
+/** Ping IPv4 socket opener */
+struct socket_opener ping_ipv4_socket_opener __socket_opener = {
+ .semantics = PING_SOCK_ECHO,
+ .family = AF_INET,
+ .open = ping_open,
+};
+
+/** Ping IPv6 socket opener */
+struct socket_opener ping_ipv6_socket_opener __socket_opener = {
+ .semantics = PING_SOCK_ECHO,
+ .family = AF_INET6,
+ .open = ping_open,
+};
+
+/** Linkage hack */
+int ping_sock_echo = PING_SOCK_ECHO;
diff --git a/qemu/roms/ipxe/src/net/rarp.c b/qemu/roms/ipxe/src/net/rarp.c
new file mode 100644
index 000000000..371145015
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/rarp.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <byteswap.h>
+#include <ipxe/netdevice.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/if_ether.h>
+#include <ipxe/rarp.h>
+
+/** @file
+ *
+ * Reverse Address Resolution Protocol
+ *
+ */
+
+/**
+ * Process incoming ARP packets
+ *
+ * @v iobuf I/O buffer
+ * @v netdev Network device
+ * @v ll_dest Link-layer destination address
+ * @v ll_source Link-layer source address
+ * @v flags Packet flags
+ * @ret rc Return status code
+ *
+ * This is a dummy method which simply discards RARP packets.
+ */
+static int rarp_rx ( struct io_buffer *iobuf,
+ struct net_device *netdev __unused,
+ const void *ll_dest __unused,
+ const void *ll_source __unused,
+ unsigned int flags __unused ) {
+ free_iob ( iobuf );
+ return 0;
+}
+
+
+/**
+ * Transcribe RARP address
+ *
+ * @v net_addr RARP address
+ * @ret string "<RARP>"
+ *
+ * This operation is meaningless for the RARP protocol.
+ */
+static const char * rarp_ntoa ( const void *net_addr __unused ) {
+ return "<RARP>";
+}
+
+/** RARP protocol */
+struct net_protocol rarp_protocol __net_protocol = {
+ .name = "RARP",
+ .net_proto = htons ( ETH_P_RARP ),
+ .rx = rarp_rx,
+ .ntoa = rarp_ntoa,
+};
diff --git a/qemu/roms/ipxe/src/net/retry.c b/qemu/roms/ipxe/src/net/retry.c
new file mode 100644
index 000000000..8f210bdcc
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/retry.c
@@ -0,0 +1,212 @@
+/*
+ * Copyright (C) 2006 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stddef.h>
+#include <ipxe/timer.h>
+#include <ipxe/list.h>
+#include <ipxe/process.h>
+#include <ipxe/init.h>
+#include <ipxe/retry.h>
+
+/** @file
+ *
+ * Retry timers
+ *
+ * A retry timer is a binary exponential backoff timer. It can be
+ * used to build automatic retransmission into network protocols.
+ *
+ * This implementation of the timer is designed to satisfy RFC 2988
+ * and therefore be usable as a TCP retransmission timer.
+ *
+ *
+ */
+
+/* The theoretical minimum that the algorithm in stop_timer() can
+ * adjust the timeout back down to is seven ticks, so set the minimum
+ * timeout to at least that value for the sake of consistency.
+ */
+#define MIN_TIMEOUT 7
+
+/** List of running timers */
+static LIST_HEAD ( timers );
+
+/**
+ * Start timer
+ *
+ * @v timer Retry timer
+ *
+ * This starts the timer running with the current timeout value. If
+ * stop_timer() is not called before the timer expires, the timer will
+ * be stopped and the timer's callback function will be called.
+ */
+void start_timer ( struct retry_timer *timer ) {
+ if ( ! timer->running ) {
+ list_add ( &timer->list, &timers );
+ ref_get ( timer->refcnt );
+ }
+ timer->start = currticks();
+ timer->running = 1;
+
+ /* 0 means "use default timeout" */
+ if ( timer->min_timeout == 0 )
+ timer->min_timeout = DEFAULT_MIN_TIMEOUT;
+ /* We must never be less than MIN_TIMEOUT under any circumstances */
+ if ( timer->min_timeout < MIN_TIMEOUT )
+ timer->min_timeout = MIN_TIMEOUT;
+ /* Honor user-specified minimum timeout */
+ if ( timer->timeout < timer->min_timeout )
+ timer->timeout = timer->min_timeout;
+
+ DBG2 ( "Timer %p started at time %ld (expires at %ld)\n",
+ timer, timer->start, ( timer->start + timer->timeout ) );
+}
+
+/**
+ * Start timer with a specified fixed timeout
+ *
+ * @v timer Retry timer
+ * @v timeout Timeout, in ticks
+ */
+void start_timer_fixed ( struct retry_timer *timer, unsigned long timeout ) {
+ start_timer ( timer );
+ timer->timeout = timeout;
+ DBG2 ( "Timer %p expiry time changed to %ld\n",
+ timer, ( timer->start + timer->timeout ) );
+}
+
+/**
+ * Stop timer
+ *
+ * @v timer Retry timer
+ *
+ * This stops the timer and updates the timer's timeout value.
+ */
+void stop_timer ( struct retry_timer *timer ) {
+ unsigned long old_timeout = timer->timeout;
+ unsigned long now = currticks();
+ unsigned long runtime;
+
+ /* If timer was already stopped, do nothing */
+ if ( ! timer->running )
+ return;
+
+ list_del ( &timer->list );
+ runtime = ( now - timer->start );
+ timer->running = 0;
+ DBG2 ( "Timer %p stopped at time %ld (ran for %ld)\n",
+ timer, now, runtime );
+
+ /* Update timer. Variables are:
+ *
+ * r = round-trip time estimate (i.e. runtime)
+ * t = timeout value (i.e. timer->timeout)
+ * s = smoothed round-trip time
+ *
+ * By choice, we set t = 4s, i.e. allow for four times the
+ * normal round-trip time to pass before retransmitting.
+ *
+ * We want to smooth according to s := ( 7 s + r ) / 8
+ *
+ * Since we don't actually store s, this reduces to
+ * t := ( 7 t / 8 ) + ( r / 2 )
+ *
+ */
+ if ( timer->count ) {
+ timer->count--;
+ } else {
+ timer->timeout -= ( timer->timeout >> 3 );
+ timer->timeout += ( runtime >> 1 );
+ if ( timer->timeout != old_timeout ) {
+ DBG ( "Timer %p timeout updated to %ld\n",
+ timer, timer->timeout );
+ }
+ }
+
+ ref_put ( timer->refcnt );
+}
+
+/**
+ * Handle expired timer
+ *
+ * @v timer Retry timer
+ */
+static void timer_expired ( struct retry_timer *timer ) {
+ struct refcnt *refcnt = timer->refcnt;
+ int fail;
+
+ /* Stop timer without performing RTT calculations */
+ DBG2 ( "Timer %p stopped at time %ld on expiry\n",
+ timer, currticks() );
+ assert ( timer->running );
+ list_del ( &timer->list );
+ timer->running = 0;
+ timer->count++;
+
+ /* Back off the timeout value */
+ timer->timeout <<= 1;
+ if ( timer->max_timeout == 0 ) /* 0 means "use default timeout" */
+ timer->max_timeout = DEFAULT_MAX_TIMEOUT;
+ if ( ( fail = ( timer->timeout > timer->max_timeout ) ) )
+ timer->timeout = timer->max_timeout;
+ DBG ( "Timer %p timeout backed off to %ld\n",
+ timer, timer->timeout );
+
+ /* Call expiry callback */
+ timer->expired ( timer, fail );
+ /* If refcnt is NULL, then timer may already have been freed */
+
+ ref_put ( refcnt );
+}
+
+/**
+ * Poll the retry timer list
+ *
+ */
+void retry_poll ( void ) {
+ struct retry_timer *timer;
+ unsigned long now = currticks();
+ unsigned long used;
+
+ /* Process at most one timer expiry. We cannot process
+ * multiple expiries in one pass, because one timer expiring
+ * may end up triggering another timer's deletion from the
+ * list.
+ */
+ list_for_each_entry ( timer, &timers, list ) {
+ used = ( now - timer->start );
+ if ( used >= timer->timeout ) {
+ timer_expired ( timer );
+ break;
+ }
+ }
+}
+
+/**
+ * Single-step the retry timer list
+ *
+ * @v process Retry timer process
+ */
+static void retry_step ( struct process *process __unused ) {
+ retry_poll();
+}
+
+/** Retry timer process */
+PERMANENT_PROCESS ( retry_process, retry_step );
diff --git a/qemu/roms/ipxe/src/net/socket.c b/qemu/roms/ipxe/src/net/socket.c
new file mode 100644
index 000000000..24f6a0892
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/socket.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2013 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stddef.h>
+#include <errno.h>
+#include <ipxe/socket.h>
+
+/** @file
+ *
+ * Sockets
+ *
+ */
+
+/**
+ * Transcribe socket address
+ *
+ * @v sa Socket address
+ * @ret string Socket address string
+ */
+const char * sock_ntoa ( struct sockaddr *sa ) {
+ struct sockaddr_converter *converter;
+
+ for_each_table_entry ( converter, SOCKADDR_CONVERTERS ) {
+ if ( converter->family == sa->sa_family )
+ return converter->ntoa ( sa );
+ }
+ return NULL;
+}
+
+/**
+ * Parse socket address
+ *
+ * @v string Socket address string
+ * @v sa Socket address to fill in
+ * @ret rc Return status code
+ */
+int sock_aton ( const char *string, struct sockaddr *sa ) {
+ struct sockaddr_converter *converter;
+
+ for_each_table_entry ( converter, SOCKADDR_CONVERTERS ) {
+ if ( converter->aton ( string, sa ) == 0 ) {
+ sa->sa_family = converter->family;
+ return 0;
+ }
+ }
+ return -EINVAL;
+}
diff --git a/qemu/roms/ipxe/src/net/tcp.c b/qemu/roms/ipxe/src/net/tcp.c
new file mode 100644
index 000000000..987cb63e1
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/tcp.c
@@ -0,0 +1,1494 @@
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <errno.h>
+#include <byteswap.h>
+#include <ipxe/timer.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/malloc.h>
+#include <ipxe/init.h>
+#include <ipxe/retry.h>
+#include <ipxe/refcnt.h>
+#include <ipxe/pending.h>
+#include <ipxe/xfer.h>
+#include <ipxe/open.h>
+#include <ipxe/uri.h>
+#include <ipxe/netdevice.h>
+#include <ipxe/profile.h>
+#include <ipxe/process.h>
+#include <ipxe/tcpip.h>
+#include <ipxe/tcp.h>
+
+/** @file
+ *
+ * TCP protocol
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+/** A TCP connection */
+struct tcp_connection {
+ /** Reference counter */
+ struct refcnt refcnt;
+ /** List of TCP connections */
+ struct list_head list;
+
+ /** Flags */
+ unsigned int flags;
+
+ /** Data transfer interface */
+ struct interface xfer;
+
+ /** Remote socket address */
+ struct sockaddr_tcpip peer;
+ /** Local port */
+ unsigned int local_port;
+ /** Maximum segment size */
+ size_t mss;
+
+ /** Current TCP state */
+ unsigned int tcp_state;
+ /** Previous TCP state
+ *
+ * Maintained only for debug messages
+ */
+ unsigned int prev_tcp_state;
+ /** Current sequence number
+ *
+ * Equivalent to SND.UNA in RFC 793 terminology.
+ */
+ uint32_t snd_seq;
+ /** Unacknowledged sequence count
+ *
+ * Equivalent to (SND.NXT-SND.UNA) in RFC 793 terminology.
+ */
+ uint32_t snd_sent;
+ /** Send window
+ *
+ * Equivalent to SND.WND in RFC 793 terminology
+ */
+ uint32_t snd_win;
+ /** Current acknowledgement number
+ *
+ * Equivalent to RCV.NXT in RFC 793 terminology.
+ */
+ uint32_t rcv_ack;
+ /** Receive window
+ *
+ * Equivalent to RCV.WND in RFC 793 terminology.
+ */
+ uint32_t rcv_win;
+ /** Received timestamp value
+ *
+ * Updated when a packet is received; copied to ts_recent when
+ * the window is advanced.
+ */
+ uint32_t ts_val;
+ /** Most recent received timestamp that advanced the window
+ *
+ * Equivalent to TS.Recent in RFC 1323 terminology.
+ */
+ uint32_t ts_recent;
+ /** Send window scale
+ *
+ * Equivalent to Snd.Wind.Scale in RFC 1323 terminology
+ */
+ uint8_t snd_win_scale;
+ /** Receive window scale
+ *
+ * Equivalent to Rcv.Wind.Scale in RFC 1323 terminology
+ */
+ uint8_t rcv_win_scale;
+ /** Maximum receive window */
+ uint32_t max_rcv_win;
+
+ /** Transmit queue */
+ struct list_head tx_queue;
+ /** Receive queue */
+ struct list_head rx_queue;
+ /** Transmission process */
+ struct process process;
+ /** Retransmission timer */
+ struct retry_timer timer;
+ /** Shutdown (TIME_WAIT) timer */
+ struct retry_timer wait;
+
+ /** Pending operations for SYN and FIN */
+ struct pending_operation pending_flags;
+ /** Pending operations for transmit queue */
+ struct pending_operation pending_data;
+};
+
+/** TCP flags */
+enum tcp_flags {
+ /** TCP data transfer interface has been closed */
+ TCP_XFER_CLOSED = 0x0001,
+ /** TCP timestamps are enabled */
+ TCP_TS_ENABLED = 0x0002,
+ /** TCP acknowledgement is pending */
+ TCP_ACK_PENDING = 0x0004,
+};
+
+/** TCP internal header
+ *
+ * This is the header that replaces the TCP header for packets
+ * enqueued on the receive queue.
+ */
+struct tcp_rx_queued_header {
+ /** SEQ value, in host-endian order
+ *
+ * This represents the SEQ value at the time the packet is
+ * enqueued, and so excludes the SYN, if present.
+ */
+ uint32_t seq;
+ /** Flags
+ *
+ * Only FIN is valid within this flags byte; all other flags
+ * have already been processed by the time the packet is
+ * enqueued.
+ */
+ uint8_t flags;
+ /** Reserved */
+ uint8_t reserved[3];
+};
+
+/**
+ * List of registered TCP connections
+ */
+static LIST_HEAD ( tcp_conns );
+
+/** Transmit profiler */
+static struct profiler tcp_tx_profiler __profiler = { .name = "tcp.tx" };
+
+/** Receive profiler */
+static struct profiler tcp_rx_profiler __profiler = { .name = "tcp.rx" };
+
+/** Data transfer profiler */
+static struct profiler tcp_xfer_profiler __profiler = { .name = "tcp.xfer" };
+
+/* Forward declarations */
+static struct process_descriptor tcp_process_desc;
+static struct interface_descriptor tcp_xfer_desc;
+static void tcp_expired ( struct retry_timer *timer, int over );
+static void tcp_wait_expired ( struct retry_timer *timer, int over );
+static struct tcp_connection * tcp_demux ( unsigned int local_port );
+static int tcp_rx_ack ( struct tcp_connection *tcp, uint32_t ack,
+ uint32_t win );
+
+/**
+ * Name TCP state
+ *
+ * @v state TCP state
+ * @ret name Name of TCP state
+ */
+static inline __attribute__ (( always_inline )) const char *
+tcp_state ( int state ) {
+ switch ( state ) {
+ case TCP_CLOSED: return "CLOSED";
+ case TCP_LISTEN: return "LISTEN";
+ case TCP_SYN_SENT: return "SYN_SENT";
+ case TCP_SYN_RCVD: return "SYN_RCVD";
+ case TCP_ESTABLISHED: return "ESTABLISHED";
+ case TCP_FIN_WAIT_1: return "FIN_WAIT_1";
+ case TCP_FIN_WAIT_2: return "FIN_WAIT_2";
+ case TCP_CLOSING_OR_LAST_ACK: return "CLOSING/LAST_ACK";
+ case TCP_TIME_WAIT: return "TIME_WAIT";
+ case TCP_CLOSE_WAIT: return "CLOSE_WAIT";
+ default: return "INVALID";
+ }
+}
+
+/**
+ * Dump TCP state transition
+ *
+ * @v tcp TCP connection
+ */
+static inline __attribute__ (( always_inline )) void
+tcp_dump_state ( struct tcp_connection *tcp ) {
+
+ if ( tcp->tcp_state != tcp->prev_tcp_state ) {
+ DBGC ( tcp, "TCP %p transitioned from %s to %s\n", tcp,
+ tcp_state ( tcp->prev_tcp_state ),
+ tcp_state ( tcp->tcp_state ) );
+ }
+ tcp->prev_tcp_state = tcp->tcp_state;
+}
+
+/**
+ * Dump TCP flags
+ *
+ * @v flags TCP flags
+ */
+static inline __attribute__ (( always_inline )) void
+tcp_dump_flags ( struct tcp_connection *tcp, unsigned int flags ) {
+ if ( flags & TCP_RST )
+ DBGC2 ( tcp, " RST" );
+ if ( flags & TCP_SYN )
+ DBGC2 ( tcp, " SYN" );
+ if ( flags & TCP_PSH )
+ DBGC2 ( tcp, " PSH" );
+ if ( flags & TCP_FIN )
+ DBGC2 ( tcp, " FIN" );
+ if ( flags & TCP_ACK )
+ DBGC2 ( tcp, " ACK" );
+}
+
+/***************************************************************************
+ *
+ * Open and close
+ *
+ ***************************************************************************
+ */
+
+/**
+ * Check if local TCP port is available
+ *
+ * @v port Local port number
+ * @ret port Local port number, or negative error
+ */
+static int tcp_port_available ( int port ) {
+
+ return ( tcp_demux ( port ) ? -EADDRINUSE : port );
+}
+
+/**
+ * Open a TCP connection
+ *
+ * @v xfer Data transfer interface
+ * @v peer Peer socket address
+ * @v local Local socket address, or NULL
+ * @ret rc Return status code
+ */
+static int tcp_open ( struct interface *xfer, struct sockaddr *peer,
+ struct sockaddr *local ) {
+ struct sockaddr_tcpip *st_peer = ( struct sockaddr_tcpip * ) peer;
+ struct sockaddr_tcpip *st_local = ( struct sockaddr_tcpip * ) local;
+ struct tcp_connection *tcp;
+ size_t mtu;
+ int port;
+ int rc;
+
+ /* Allocate and initialise structure */
+ tcp = zalloc ( sizeof ( *tcp ) );
+ if ( ! tcp )
+ return -ENOMEM;
+ DBGC ( tcp, "TCP %p allocated\n", tcp );
+ ref_init ( &tcp->refcnt, NULL );
+ intf_init ( &tcp->xfer, &tcp_xfer_desc, &tcp->refcnt );
+ process_init_stopped ( &tcp->process, &tcp_process_desc, &tcp->refcnt );
+ timer_init ( &tcp->timer, tcp_expired, &tcp->refcnt );
+ timer_init ( &tcp->wait, tcp_wait_expired, &tcp->refcnt );
+ tcp->prev_tcp_state = TCP_CLOSED;
+ tcp->tcp_state = TCP_STATE_SENT ( TCP_SYN );
+ tcp_dump_state ( tcp );
+ tcp->snd_seq = random();
+ tcp->max_rcv_win = TCP_MAX_WINDOW_SIZE;
+ INIT_LIST_HEAD ( &tcp->tx_queue );
+ INIT_LIST_HEAD ( &tcp->rx_queue );
+ memcpy ( &tcp->peer, st_peer, sizeof ( tcp->peer ) );
+
+ /* Calculate MSS */
+ mtu = tcpip_mtu ( &tcp->peer );
+ if ( ! mtu ) {
+ DBGC ( tcp, "TCP %p has no route to %s\n",
+ tcp, sock_ntoa ( peer ) );
+ rc = -ENETUNREACH;
+ goto err;
+ }
+ tcp->mss = ( mtu - sizeof ( struct tcp_header ) );
+
+ /* Bind to local port */
+ port = tcpip_bind ( st_local, tcp_port_available );
+ if ( port < 0 ) {
+ rc = port;
+ DBGC ( tcp, "TCP %p could not bind: %s\n",
+ tcp, strerror ( rc ) );
+ goto err;
+ }
+ tcp->local_port = port;
+ DBGC ( tcp, "TCP %p bound to port %d\n", tcp, tcp->local_port );
+
+ /* Start timer to initiate SYN */
+ start_timer_nodelay ( &tcp->timer );
+
+ /* Add a pending operation for the SYN */
+ pending_get ( &tcp->pending_flags );
+
+ /* Attach parent interface, transfer reference to connection
+ * list and return
+ */
+ intf_plug_plug ( &tcp->xfer, xfer );
+ list_add ( &tcp->list, &tcp_conns );
+ return 0;
+
+ err:
+ ref_put ( &tcp->refcnt );
+ return rc;
+}
+
+/**
+ * Close TCP connection
+ *
+ * @v tcp TCP connection
+ * @v rc Reason for close
+ *
+ * Closes the data transfer interface. If the TCP state machine is in
+ * a suitable state, the connection will be deleted.
+ */
+static void tcp_close ( struct tcp_connection *tcp, int rc ) {
+ struct io_buffer *iobuf;
+ struct io_buffer *tmp;
+
+ /* Close data transfer interface */
+ intf_shutdown ( &tcp->xfer, rc );
+ tcp->flags |= TCP_XFER_CLOSED;
+
+ /* If we are in CLOSED, or have otherwise not yet received a
+ * SYN (i.e. we are in LISTEN or SYN_SENT), just delete the
+ * connection.
+ */
+ if ( ! ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) ) {
+
+ /* Transition to CLOSED for the sake of debugging messages */
+ tcp->tcp_state = TCP_CLOSED;
+ tcp_dump_state ( tcp );
+
+ /* Free any unprocessed I/O buffers */
+ list_for_each_entry_safe ( iobuf, tmp, &tcp->rx_queue, list ) {
+ list_del ( &iobuf->list );
+ free_iob ( iobuf );
+ }
+
+ /* Free any unsent I/O buffers */
+ list_for_each_entry_safe ( iobuf, tmp, &tcp->tx_queue, list ) {
+ list_del ( &iobuf->list );
+ free_iob ( iobuf );
+ pending_put ( &tcp->pending_data );
+ }
+ assert ( ! is_pending ( &tcp->pending_data ) );
+
+ /* Remove pending operations for SYN and FIN, if applicable */
+ pending_put ( &tcp->pending_flags );
+ pending_put ( &tcp->pending_flags );
+
+ /* Remove from list and drop reference */
+ process_del ( &tcp->process );
+ stop_timer ( &tcp->timer );
+ stop_timer ( &tcp->wait );
+ list_del ( &tcp->list );
+ ref_put ( &tcp->refcnt );
+ DBGC ( tcp, "TCP %p connection deleted\n", tcp );
+ return;
+ }
+
+ /* If we have not had our SYN acknowledged (i.e. we are in
+ * SYN_RCVD), pretend that it has been acknowledged so that we
+ * can send a FIN without breaking things.
+ */
+ if ( ! ( tcp->tcp_state & TCP_STATE_ACKED ( TCP_SYN ) ) )
+ tcp_rx_ack ( tcp, ( tcp->snd_seq + 1 ), 0 );
+
+ /* If we have no data remaining to send, start sending FIN */
+ if ( list_empty ( &tcp->tx_queue ) &&
+ ! ( tcp->tcp_state & TCP_STATE_SENT ( TCP_FIN ) ) ) {
+
+ tcp->tcp_state |= TCP_STATE_SENT ( TCP_FIN );
+ tcp_dump_state ( tcp );
+
+ /* Add a pending operation for the FIN */
+ pending_get ( &tcp->pending_flags );
+ }
+}
+
+/***************************************************************************
+ *
+ * Transmit data path
+ *
+ ***************************************************************************
+ */
+
+/**
+ * Calculate transmission window
+ *
+ * @v tcp TCP connection
+ * @ret len Maximum length that can be sent in a single packet
+ */
+static size_t tcp_xmit_win ( struct tcp_connection *tcp ) {
+ size_t len;
+
+ /* Not ready if we're not in a suitable connection state */
+ if ( ! TCP_CAN_SEND_DATA ( tcp->tcp_state ) )
+ return 0;
+
+ /* Length is the minimum of the receiver's window and the path MTU */
+ len = tcp->snd_win;
+ if ( len > TCP_PATH_MTU )
+ len = TCP_PATH_MTU;
+
+ return len;
+}
+
+/**
+ * Check data-transfer flow control window
+ *
+ * @v tcp TCP connection
+ * @ret len Length of window
+ */
+static size_t tcp_xfer_window ( struct tcp_connection *tcp ) {
+
+ /* Not ready if data queue is non-empty. This imposes a limit
+ * of only one unACKed packet in the TX queue at any time; we
+ * do this to conserve memory usage.
+ */
+ if ( ! list_empty ( &tcp->tx_queue ) )
+ return 0;
+
+ /* Return TCP window length */
+ return tcp_xmit_win ( tcp );
+}
+
+/**
+ * Process TCP transmit queue
+ *
+ * @v tcp TCP connection
+ * @v max_len Maximum length to process
+ * @v dest I/O buffer to fill with data, or NULL
+ * @v remove Remove data from queue
+ * @ret len Length of data processed
+ *
+ * This processes at most @c max_len bytes from the TCP connection's
+ * transmit queue. Data will be copied into the @c dest I/O buffer
+ * (if provided) and, if @c remove is true, removed from the transmit
+ * queue.
+ */
+static size_t tcp_process_tx_queue ( struct tcp_connection *tcp, size_t max_len,
+ struct io_buffer *dest, int remove ) {
+ struct io_buffer *iobuf;
+ struct io_buffer *tmp;
+ size_t frag_len;
+ size_t len = 0;
+
+ list_for_each_entry_safe ( iobuf, tmp, &tcp->tx_queue, list ) {
+ frag_len = iob_len ( iobuf );
+ if ( frag_len > max_len )
+ frag_len = max_len;
+ if ( dest ) {
+ memcpy ( iob_put ( dest, frag_len ), iobuf->data,
+ frag_len );
+ }
+ if ( remove ) {
+ iob_pull ( iobuf, frag_len );
+ if ( ! iob_len ( iobuf ) ) {
+ list_del ( &iobuf->list );
+ free_iob ( iobuf );
+ pending_put ( &tcp->pending_data );
+ }
+ }
+ len += frag_len;
+ max_len -= frag_len;
+ }
+ return len;
+}
+
+/**
+ * Transmit any outstanding data
+ *
+ * @v tcp TCP connection
+ *
+ * Transmits any outstanding data on the connection.
+ *
+ * Note that even if an error is returned, the retransmission timer
+ * will have been started if necessary, and so the stack will
+ * eventually attempt to retransmit the failed packet.
+ */
+static void tcp_xmit ( struct tcp_connection *tcp ) {
+ struct io_buffer *iobuf;
+ struct tcp_header *tcphdr;
+ struct tcp_mss_option *mssopt;
+ struct tcp_window_scale_padded_option *wsopt;
+ struct tcp_timestamp_padded_option *tsopt;
+ void *payload;
+ unsigned int flags;
+ size_t len = 0;
+ uint32_t seq_len;
+ uint32_t app_win;
+ uint32_t max_rcv_win;
+ uint32_t max_representable_win;
+ int rc;
+
+ /* Start profiling */
+ profile_start ( &tcp_tx_profiler );
+
+ /* If retransmission timer is already running, do nothing */
+ if ( timer_running ( &tcp->timer ) )
+ return;
+
+ /* Calculate both the actual (payload) and sequence space
+ * lengths that we wish to transmit.
+ */
+ if ( TCP_CAN_SEND_DATA ( tcp->tcp_state ) ) {
+ len = tcp_process_tx_queue ( tcp, tcp_xmit_win ( tcp ),
+ NULL, 0 );
+ }
+ seq_len = len;
+ flags = TCP_FLAGS_SENDING ( tcp->tcp_state );
+ if ( flags & ( TCP_SYN | TCP_FIN ) ) {
+ /* SYN or FIN consume one byte, and we can never send both */
+ assert ( ! ( ( flags & TCP_SYN ) && ( flags & TCP_FIN ) ) );
+ seq_len++;
+ }
+ tcp->snd_sent = seq_len;
+
+ /* If we have nothing to transmit, stop now */
+ if ( ( seq_len == 0 ) && ! ( tcp->flags & TCP_ACK_PENDING ) )
+ return;
+
+ /* If we are transmitting anything that requires
+ * acknowledgement (i.e. consumes sequence space), start the
+ * retransmission timer. Do this before attempting to
+ * allocate the I/O buffer, in case allocation itself fails.
+ */
+ if ( seq_len )
+ start_timer ( &tcp->timer );
+
+ /* Allocate I/O buffer */
+ iobuf = alloc_iob ( len + TCP_MAX_HEADER_LEN );
+ if ( ! iobuf ) {
+ DBGC ( tcp, "TCP %p could not allocate iobuf for %08x..%08x "
+ "%08x\n", tcp, tcp->snd_seq, ( tcp->snd_seq + seq_len ),
+ tcp->rcv_ack );
+ return;
+ }
+ iob_reserve ( iobuf, TCP_MAX_HEADER_LEN );
+
+ /* Fill data payload from transmit queue */
+ tcp_process_tx_queue ( tcp, len, iobuf, 0 );
+
+ /* Expand receive window if possible */
+ max_rcv_win = tcp->max_rcv_win;
+ app_win = xfer_window ( &tcp->xfer );
+ if ( max_rcv_win > app_win )
+ max_rcv_win = app_win;
+ max_representable_win = ( 0xffff << tcp->rcv_win_scale );
+ if ( max_rcv_win > max_representable_win )
+ max_rcv_win = max_representable_win;
+ max_rcv_win &= ~0x03; /* Keep everything dword-aligned */
+ if ( tcp->rcv_win < max_rcv_win )
+ tcp->rcv_win = max_rcv_win;
+
+ /* Fill up the TCP header */
+ payload = iobuf->data;
+ if ( flags & TCP_SYN ) {
+ mssopt = iob_push ( iobuf, sizeof ( *mssopt ) );
+ mssopt->kind = TCP_OPTION_MSS;
+ mssopt->length = sizeof ( *mssopt );
+ mssopt->mss = htons ( tcp->mss );
+ wsopt = iob_push ( iobuf, sizeof ( *wsopt ) );
+ wsopt->nop = TCP_OPTION_NOP;
+ wsopt->wsopt.kind = TCP_OPTION_WS;
+ wsopt->wsopt.length = sizeof ( wsopt->wsopt );
+ wsopt->wsopt.scale = TCP_RX_WINDOW_SCALE;
+ }
+ if ( ( flags & TCP_SYN ) || ( tcp->flags & TCP_TS_ENABLED ) ) {
+ tsopt = iob_push ( iobuf, sizeof ( *tsopt ) );
+ memset ( tsopt->nop, TCP_OPTION_NOP, sizeof ( tsopt->nop ) );
+ tsopt->tsopt.kind = TCP_OPTION_TS;
+ tsopt->tsopt.length = sizeof ( tsopt->tsopt );
+ tsopt->tsopt.tsval = htonl ( currticks() );
+ tsopt->tsopt.tsecr = htonl ( tcp->ts_recent );
+ }
+ if ( len != 0 )
+ flags |= TCP_PSH;
+ tcphdr = iob_push ( iobuf, sizeof ( *tcphdr ) );
+ memset ( tcphdr, 0, sizeof ( *tcphdr ) );
+ tcphdr->src = htons ( tcp->local_port );
+ tcphdr->dest = tcp->peer.st_port;
+ tcphdr->seq = htonl ( tcp->snd_seq );
+ tcphdr->ack = htonl ( tcp->rcv_ack );
+ tcphdr->hlen = ( ( payload - iobuf->data ) << 2 );
+ tcphdr->flags = flags;
+ tcphdr->win = htons ( tcp->rcv_win >> tcp->rcv_win_scale );
+ tcphdr->csum = tcpip_chksum ( iobuf->data, iob_len ( iobuf ) );
+
+ /* Dump header */
+ DBGC2 ( tcp, "TCP %p TX %d->%d %08x..%08x %08x %4zd",
+ tcp, ntohs ( tcphdr->src ), ntohs ( tcphdr->dest ),
+ ntohl ( tcphdr->seq ), ( ntohl ( tcphdr->seq ) + seq_len ),
+ ntohl ( tcphdr->ack ), len );
+ tcp_dump_flags ( tcp, tcphdr->flags );
+ DBGC2 ( tcp, "\n" );
+
+ /* Transmit packet */
+ if ( ( rc = tcpip_tx ( iobuf, &tcp_protocol, NULL, &tcp->peer, NULL,
+ &tcphdr->csum ) ) != 0 ) {
+ DBGC ( tcp, "TCP %p could not transmit %08x..%08x %08x: %s\n",
+ tcp, tcp->snd_seq, ( tcp->snd_seq + tcp->snd_sent ),
+ tcp->rcv_ack, strerror ( rc ) );
+ return;
+ }
+
+ /* Clear ACK-pending flag */
+ tcp->flags &= ~TCP_ACK_PENDING;
+
+ profile_stop ( &tcp_tx_profiler );
+}
+
+/** TCP process descriptor */
+static struct process_descriptor tcp_process_desc =
+ PROC_DESC_ONCE ( struct tcp_connection, process, tcp_xmit );
+
+/**
+ * Retransmission timer expired
+ *
+ * @v timer Retransmission timer
+ * @v over Failure indicator
+ */
+static void tcp_expired ( struct retry_timer *timer, int over ) {
+ struct tcp_connection *tcp =
+ container_of ( timer, struct tcp_connection, timer );
+
+ DBGC ( tcp, "TCP %p timer %s in %s for %08x..%08x %08x\n", tcp,
+ ( over ? "expired" : "fired" ), tcp_state ( tcp->tcp_state ),
+ tcp->snd_seq, ( tcp->snd_seq + tcp->snd_sent ), tcp->rcv_ack );
+
+ assert ( ( tcp->tcp_state == TCP_SYN_SENT ) ||
+ ( tcp->tcp_state == TCP_SYN_RCVD ) ||
+ ( tcp->tcp_state == TCP_ESTABLISHED ) ||
+ ( tcp->tcp_state == TCP_FIN_WAIT_1 ) ||
+ ( tcp->tcp_state == TCP_CLOSE_WAIT ) ||
+ ( tcp->tcp_state == TCP_CLOSING_OR_LAST_ACK ) );
+
+ if ( over ) {
+ /* If we have finally timed out and given up,
+ * terminate the connection
+ */
+ tcp->tcp_state = TCP_CLOSED;
+ tcp_dump_state ( tcp );
+ tcp_close ( tcp, -ETIMEDOUT );
+ } else {
+ /* Otherwise, retransmit the packet */
+ tcp_xmit ( tcp );
+ }
+}
+
+/**
+ * Shutdown timer expired
+ *
+ * @v timer Shutdown timer
+ * @v over Failure indicator
+ */
+static void tcp_wait_expired ( struct retry_timer *timer, int over __unused ) {
+ struct tcp_connection *tcp =
+ container_of ( timer, struct tcp_connection, wait );
+
+ assert ( tcp->tcp_state == TCP_TIME_WAIT );
+
+ DBGC ( tcp, "TCP %p wait complete in %s for %08x..%08x %08x\n", tcp,
+ tcp_state ( tcp->tcp_state ), tcp->snd_seq,
+ ( tcp->snd_seq + tcp->snd_sent ), tcp->rcv_ack );
+
+ tcp->tcp_state = TCP_CLOSED;
+ tcp_dump_state ( tcp );
+ tcp_close ( tcp, 0 );
+}
+
+/**
+ * Send RST response to incoming packet
+ *
+ * @v in_tcphdr TCP header of incoming packet
+ * @ret rc Return status code
+ */
+static int tcp_xmit_reset ( struct tcp_connection *tcp,
+ struct sockaddr_tcpip *st_dest,
+ struct tcp_header *in_tcphdr ) {
+ struct io_buffer *iobuf;
+ struct tcp_header *tcphdr;
+ int rc;
+
+ /* Allocate space for dataless TX buffer */
+ iobuf = alloc_iob ( TCP_MAX_HEADER_LEN );
+ if ( ! iobuf ) {
+ DBGC ( tcp, "TCP %p could not allocate iobuf for RST "
+ "%08x..%08x %08x\n", tcp, ntohl ( in_tcphdr->ack ),
+ ntohl ( in_tcphdr->ack ), ntohl ( in_tcphdr->seq ) );
+ return -ENOMEM;
+ }
+ iob_reserve ( iobuf, TCP_MAX_HEADER_LEN );
+
+ /* Construct RST response */
+ tcphdr = iob_push ( iobuf, sizeof ( *tcphdr ) );
+ memset ( tcphdr, 0, sizeof ( *tcphdr ) );
+ tcphdr->src = in_tcphdr->dest;
+ tcphdr->dest = in_tcphdr->src;
+ tcphdr->seq = in_tcphdr->ack;
+ tcphdr->ack = in_tcphdr->seq;
+ tcphdr->hlen = ( ( sizeof ( *tcphdr ) / 4 ) << 4 );
+ tcphdr->flags = ( TCP_RST | TCP_ACK );
+ tcphdr->win = htons ( 0 );
+ tcphdr->csum = tcpip_chksum ( iobuf->data, iob_len ( iobuf ) );
+
+ /* Dump header */
+ DBGC2 ( tcp, "TCP %p TX %d->%d %08x..%08x %08x %4d",
+ tcp, ntohs ( tcphdr->src ), ntohs ( tcphdr->dest ),
+ ntohl ( tcphdr->seq ), ( ntohl ( tcphdr->seq ) ),
+ ntohl ( tcphdr->ack ), 0 );
+ tcp_dump_flags ( tcp, tcphdr->flags );
+ DBGC2 ( tcp, "\n" );
+
+ /* Transmit packet */
+ if ( ( rc = tcpip_tx ( iobuf, &tcp_protocol, NULL, st_dest,
+ NULL, &tcphdr->csum ) ) != 0 ) {
+ DBGC ( tcp, "TCP %p could not transmit RST %08x..%08x %08x: "
+ "%s\n", tcp, ntohl ( in_tcphdr->ack ),
+ ntohl ( in_tcphdr->ack ), ntohl ( in_tcphdr->seq ),
+ strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/***************************************************************************
+ *
+ * Receive data path
+ *
+ ***************************************************************************
+ */
+
+/**
+ * Identify TCP connection by local port number
+ *
+ * @v local_port Local port
+ * @ret tcp TCP connection, or NULL
+ */
+static struct tcp_connection * tcp_demux ( unsigned int local_port ) {
+ struct tcp_connection *tcp;
+
+ list_for_each_entry ( tcp, &tcp_conns, list ) {
+ if ( tcp->local_port == local_port )
+ return tcp;
+ }
+ return NULL;
+}
+
+/**
+ * Parse TCP received options
+ *
+ * @v tcp TCP connection
+ * @v data Raw options data
+ * @v len Raw options length
+ * @v options Options structure to fill in
+ */
+static void tcp_rx_opts ( struct tcp_connection *tcp, const void *data,
+ size_t len, struct tcp_options *options ) {
+ const void *end = ( data + len );
+ const struct tcp_option *option;
+ unsigned int kind;
+
+ memset ( options, 0, sizeof ( *options ) );
+ while ( data < end ) {
+ option = data;
+ kind = option->kind;
+ if ( kind == TCP_OPTION_END )
+ return;
+ if ( kind == TCP_OPTION_NOP ) {
+ data++;
+ continue;
+ }
+ switch ( kind ) {
+ case TCP_OPTION_MSS:
+ options->mssopt = data;
+ break;
+ case TCP_OPTION_WS:
+ options->wsopt = data;
+ break;
+ case TCP_OPTION_TS:
+ options->tsopt = data;
+ break;
+ default:
+ DBGC ( tcp, "TCP %p received unknown option %d\n",
+ tcp, kind );
+ break;
+ }
+ data += option->length;
+ }
+}
+
+/**
+ * Consume received sequence space
+ *
+ * @v tcp TCP connection
+ * @v seq_len Sequence space length to consume
+ */
+static void tcp_rx_seq ( struct tcp_connection *tcp, uint32_t seq_len ) {
+
+ /* Sanity check */
+ assert ( seq_len > 0 );
+
+ /* Update acknowledgement number */
+ tcp->rcv_ack += seq_len;
+
+ /* Update window */
+ if ( tcp->rcv_win > seq_len ) {
+ tcp->rcv_win -= seq_len;
+ } else {
+ tcp->rcv_win = 0;
+ }
+
+ /* Update timestamp */
+ tcp->ts_recent = tcp->ts_val;
+
+ /* Mark ACK as pending */
+ tcp->flags |= TCP_ACK_PENDING;
+}
+
+/**
+ * Handle TCP received SYN
+ *
+ * @v tcp TCP connection
+ * @v seq SEQ value (in host-endian order)
+ * @v options TCP options
+ * @ret rc Return status code
+ */
+static int tcp_rx_syn ( struct tcp_connection *tcp, uint32_t seq,
+ struct tcp_options *options ) {
+
+ /* Synchronise sequence numbers on first SYN */
+ if ( ! ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) ) {
+ tcp->rcv_ack = seq;
+ if ( options->tsopt )
+ tcp->flags |= TCP_TS_ENABLED;
+ if ( options->wsopt ) {
+ tcp->snd_win_scale = options->wsopt->scale;
+ tcp->rcv_win_scale = TCP_RX_WINDOW_SCALE;
+ }
+ }
+
+ /* Ignore duplicate SYN */
+ if ( seq != tcp->rcv_ack )
+ return 0;
+
+ /* Acknowledge SYN */
+ tcp_rx_seq ( tcp, 1 );
+
+ /* Mark SYN as received and start sending ACKs with each packet */
+ tcp->tcp_state |= ( TCP_STATE_SENT ( TCP_ACK ) |
+ TCP_STATE_RCVD ( TCP_SYN ) );
+
+ return 0;
+}
+
+/**
+ * Handle TCP received ACK
+ *
+ * @v tcp TCP connection
+ * @v ack ACK value (in host-endian order)
+ * @v win WIN value (in host-endian order)
+ * @ret rc Return status code
+ */
+static int tcp_rx_ack ( struct tcp_connection *tcp, uint32_t ack,
+ uint32_t win ) {
+ uint32_t ack_len = ( ack - tcp->snd_seq );
+ size_t len;
+ unsigned int acked_flags;
+
+ /* Check for out-of-range or old duplicate ACKs */
+ if ( ack_len > tcp->snd_sent ) {
+ DBGC ( tcp, "TCP %p received ACK for %08x..%08x, "
+ "sent only %08x..%08x\n", tcp, tcp->snd_seq,
+ ( tcp->snd_seq + ack_len ), tcp->snd_seq,
+ ( tcp->snd_seq + tcp->snd_sent ) );
+
+ if ( TCP_HAS_BEEN_ESTABLISHED ( tcp->tcp_state ) ) {
+ /* Just ignore what might be old duplicate ACKs */
+ return 0;
+ } else {
+ /* Send RST if an out-of-range ACK is received
+ * on a not-yet-established connection, as per
+ * RFC 793.
+ */
+ return -EINVAL;
+ }
+ }
+
+ /* Update window size */
+ tcp->snd_win = win;
+
+ /* Ignore ACKs that don't actually acknowledge any new data.
+ * (In particular, do not stop the retransmission timer; this
+ * avoids creating a sorceror's apprentice syndrome when a
+ * duplicate ACK is received and we still have data in our
+ * transmit queue.)
+ */
+ if ( ack_len == 0 )
+ return 0;
+
+ /* Stop the retransmission timer */
+ stop_timer ( &tcp->timer );
+
+ /* Determine acknowledged flags and data length */
+ len = ack_len;
+ acked_flags = ( TCP_FLAGS_SENDING ( tcp->tcp_state ) &
+ ( TCP_SYN | TCP_FIN ) );
+ if ( acked_flags ) {
+ len--;
+ pending_put ( &tcp->pending_flags );
+ }
+
+ /* Update SEQ and sent counters */
+ tcp->snd_seq = ack;
+ tcp->snd_sent = 0;
+
+ /* Remove any acknowledged data from transmit queue */
+ tcp_process_tx_queue ( tcp, len, NULL, 1 );
+
+ /* Mark SYN/FIN as acknowledged if applicable. */
+ if ( acked_flags )
+ tcp->tcp_state |= TCP_STATE_ACKED ( acked_flags );
+
+ /* Start sending FIN if we've had all possible data ACKed */
+ if ( list_empty ( &tcp->tx_queue ) &&
+ ( tcp->flags & TCP_XFER_CLOSED ) &&
+ ! ( tcp->tcp_state & TCP_STATE_SENT ( TCP_FIN ) ) ) {
+ tcp->tcp_state |= TCP_STATE_SENT ( TCP_FIN );
+ pending_get ( &tcp->pending_flags );
+ }
+
+ return 0;
+}
+
+/**
+ * Handle TCP received data
+ *
+ * @v tcp TCP connection
+ * @v seq SEQ value (in host-endian order)
+ * @v iobuf I/O buffer
+ * @ret rc Return status code
+ *
+ * This function takes ownership of the I/O buffer.
+ */
+static int tcp_rx_data ( struct tcp_connection *tcp, uint32_t seq,
+ struct io_buffer *iobuf ) {
+ uint32_t already_rcvd;
+ uint32_t len;
+ int rc;
+
+ /* Ignore duplicate or out-of-order data */
+ already_rcvd = ( tcp->rcv_ack - seq );
+ len = iob_len ( iobuf );
+ if ( already_rcvd >= len ) {
+ free_iob ( iobuf );
+ return 0;
+ }
+ iob_pull ( iobuf, already_rcvd );
+ len -= already_rcvd;
+
+ /* Acknowledge new data */
+ tcp_rx_seq ( tcp, len );
+
+ /* Deliver data to application */
+ profile_start ( &tcp_xfer_profiler );
+ if ( ( rc = xfer_deliver_iob ( &tcp->xfer, iobuf ) ) != 0 ) {
+ DBGC ( tcp, "TCP %p could not deliver %08x..%08x: %s\n",
+ tcp, seq, ( seq + len ), strerror ( rc ) );
+ return rc;
+ }
+ profile_stop ( &tcp_xfer_profiler );
+
+ return 0;
+}
+
+/**
+ * Handle TCP received FIN
+ *
+ * @v tcp TCP connection
+ * @v seq SEQ value (in host-endian order)
+ * @ret rc Return status code
+ */
+static int tcp_rx_fin ( struct tcp_connection *tcp, uint32_t seq ) {
+
+ /* Ignore duplicate or out-of-order FIN */
+ if ( seq != tcp->rcv_ack )
+ return 0;
+
+ /* Acknowledge FIN */
+ tcp_rx_seq ( tcp, 1 );
+
+ /* Mark FIN as received */
+ tcp->tcp_state |= TCP_STATE_RCVD ( TCP_FIN );
+
+ /* Close connection */
+ tcp_close ( tcp, 0 );
+
+ return 0;
+}
+
+/**
+ * Handle TCP received RST
+ *
+ * @v tcp TCP connection
+ * @v seq SEQ value (in host-endian order)
+ * @ret rc Return status code
+ */
+static int tcp_rx_rst ( struct tcp_connection *tcp, uint32_t seq ) {
+
+ /* Accept RST only if it falls within the window. If we have
+ * not yet received a SYN, then we have no window to test
+ * against, so fall back to checking that our SYN has been
+ * ACKed.
+ */
+ if ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) {
+ if ( ! tcp_in_window ( seq, tcp->rcv_ack, tcp->rcv_win ) )
+ return 0;
+ } else {
+ if ( ! ( tcp->tcp_state & TCP_STATE_ACKED ( TCP_SYN ) ) )
+ return 0;
+ }
+
+ /* Abort connection */
+ tcp->tcp_state = TCP_CLOSED;
+ tcp_dump_state ( tcp );
+ tcp_close ( tcp, -ECONNRESET );
+
+ DBGC ( tcp, "TCP %p connection reset by peer\n", tcp );
+ return -ECONNRESET;
+}
+
+/**
+ * Enqueue received TCP packet
+ *
+ * @v tcp TCP connection
+ * @v seq SEQ value (in host-endian order)
+ * @v flags TCP flags
+ * @v iobuf I/O buffer
+ */
+static void tcp_rx_enqueue ( struct tcp_connection *tcp, uint32_t seq,
+ uint8_t flags, struct io_buffer *iobuf ) {
+ struct tcp_rx_queued_header *tcpqhdr;
+ struct io_buffer *queued;
+ size_t len;
+ uint32_t seq_len;
+
+ /* Calculate remaining flags and sequence length. Note that
+ * SYN, if present, has already been processed by this point.
+ */
+ flags &= TCP_FIN;
+ len = iob_len ( iobuf );
+ seq_len = ( len + ( flags ? 1 : 0 ) );
+
+ /* Discard immediately (to save memory) if:
+ *
+ * a) we have not yet received a SYN (and so have no defined
+ * receive window), or
+ * b) the packet lies entirely outside the receive window, or
+ * c) there is no further content to process.
+ */
+ if ( ( ! ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) ) ||
+ ( tcp_cmp ( seq, tcp->rcv_ack + tcp->rcv_win ) >= 0 ) ||
+ ( tcp_cmp ( seq + seq_len, tcp->rcv_ack ) < 0 ) ||
+ ( seq_len == 0 ) ) {
+ free_iob ( iobuf );
+ return;
+ }
+
+ /* Add internal header */
+ tcpqhdr = iob_push ( iobuf, sizeof ( *tcpqhdr ) );
+ tcpqhdr->seq = seq;
+ tcpqhdr->flags = flags;
+
+ /* Add to RX queue */
+ list_for_each_entry ( queued, &tcp->rx_queue, list ) {
+ tcpqhdr = queued->data;
+ if ( tcp_cmp ( seq, tcpqhdr->seq ) < 0 )
+ break;
+ }
+ list_add_tail ( &iobuf->list, &queued->list );
+}
+
+/**
+ * Process receive queue
+ *
+ * @v tcp TCP connection
+ */
+static void tcp_process_rx_queue ( struct tcp_connection *tcp ) {
+ struct io_buffer *iobuf;
+ struct tcp_rx_queued_header *tcpqhdr;
+ uint32_t seq;
+ unsigned int flags;
+ size_t len;
+
+ /* Process all applicable received buffers. Note that we
+ * cannot use list_for_each_entry() to iterate over the RX
+ * queue, since tcp_discard() may remove packets from the RX
+ * queue while we are processing.
+ */
+ while ( ( iobuf = list_first_entry ( &tcp->rx_queue, struct io_buffer,
+ list ) ) ) {
+
+ /* Stop processing when we hit the first gap */
+ tcpqhdr = iobuf->data;
+ if ( tcp_cmp ( tcpqhdr->seq, tcp->rcv_ack ) > 0 )
+ break;
+
+ /* Strip internal header and remove from RX queue */
+ list_del ( &iobuf->list );
+ seq = tcpqhdr->seq;
+ flags = tcpqhdr->flags;
+ iob_pull ( iobuf, sizeof ( *tcpqhdr ) );
+ len = iob_len ( iobuf );
+
+ /* Handle new data, if any */
+ tcp_rx_data ( tcp, seq, iob_disown ( iobuf ) );
+ seq += len;
+
+ /* Handle FIN, if present */
+ if ( flags & TCP_FIN ) {
+ tcp_rx_fin ( tcp, seq );
+ seq++;
+ }
+ }
+}
+
+/**
+ * Process received packet
+ *
+ * @v iobuf I/O buffer
+ * @v netdev Network device
+ * @v st_src Partially-filled source address
+ * @v st_dest Partially-filled destination address
+ * @v pshdr_csum Pseudo-header checksum
+ * @ret rc Return status code
+ */
+static int tcp_rx ( struct io_buffer *iobuf,
+ struct net_device *netdev __unused,
+ struct sockaddr_tcpip *st_src,
+ struct sockaddr_tcpip *st_dest __unused,
+ uint16_t pshdr_csum ) {
+ struct tcp_header *tcphdr = iobuf->data;
+ struct tcp_connection *tcp;
+ struct tcp_options options;
+ size_t hlen;
+ uint16_t csum;
+ uint32_t seq;
+ uint32_t ack;
+ uint16_t raw_win;
+ uint32_t win;
+ unsigned int flags;
+ size_t len;
+ uint32_t seq_len;
+ size_t old_xfer_window;
+ int rc;
+
+ /* Start profiling */
+ profile_start ( &tcp_rx_profiler );
+
+ /* Sanity check packet */
+ if ( iob_len ( iobuf ) < sizeof ( *tcphdr ) ) {
+ DBG ( "TCP packet too short at %zd bytes (min %zd bytes)\n",
+ iob_len ( iobuf ), sizeof ( *tcphdr ) );
+ rc = -EINVAL;
+ goto discard;
+ }
+ hlen = ( ( tcphdr->hlen & TCP_MASK_HLEN ) / 16 ) * 4;
+ if ( hlen < sizeof ( *tcphdr ) ) {
+ DBG ( "TCP header too short at %zd bytes (min %zd bytes)\n",
+ hlen, sizeof ( *tcphdr ) );
+ rc = -EINVAL;
+ goto discard;
+ }
+ if ( hlen > iob_len ( iobuf ) ) {
+ DBG ( "TCP header too long at %zd bytes (max %zd bytes)\n",
+ hlen, iob_len ( iobuf ) );
+ rc = -EINVAL;
+ goto discard;
+ }
+ csum = tcpip_continue_chksum ( pshdr_csum, iobuf->data,
+ iob_len ( iobuf ) );
+ if ( csum != 0 ) {
+ DBG ( "TCP checksum incorrect (is %04x including checksum "
+ "field, should be 0000)\n", csum );
+ rc = -EINVAL;
+ goto discard;
+ }
+
+ /* Parse parameters from header and strip header */
+ tcp = tcp_demux ( ntohs ( tcphdr->dest ) );
+ seq = ntohl ( tcphdr->seq );
+ ack = ntohl ( tcphdr->ack );
+ raw_win = ntohs ( tcphdr->win );
+ flags = tcphdr->flags;
+ tcp_rx_opts ( tcp, ( ( ( void * ) tcphdr ) + sizeof ( *tcphdr ) ),
+ ( hlen - sizeof ( *tcphdr ) ), &options );
+ if ( tcp && options.tsopt )
+ tcp->ts_val = ntohl ( options.tsopt->tsval );
+ iob_pull ( iobuf, hlen );
+ len = iob_len ( iobuf );
+ seq_len = ( len + ( ( flags & TCP_SYN ) ? 1 : 0 ) +
+ ( ( flags & TCP_FIN ) ? 1 : 0 ) );
+
+ /* Dump header */
+ DBGC2 ( tcp, "TCP %p RX %d<-%d %08x %08x..%08x %4zd",
+ tcp, ntohs ( tcphdr->dest ), ntohs ( tcphdr->src ),
+ ntohl ( tcphdr->ack ), ntohl ( tcphdr->seq ),
+ ( ntohl ( tcphdr->seq ) + seq_len ), len );
+ tcp_dump_flags ( tcp, tcphdr->flags );
+ DBGC2 ( tcp, "\n" );
+
+ /* If no connection was found, silently drop packet */
+ if ( ! tcp ) {
+ rc = -ENOTCONN;
+ goto discard;
+ }
+
+ /* Record old data-transfer window */
+ old_xfer_window = tcp_xfer_window ( tcp );
+
+ /* Handle ACK, if present */
+ if ( flags & TCP_ACK ) {
+ win = ( raw_win << tcp->snd_win_scale );
+ if ( ( rc = tcp_rx_ack ( tcp, ack, win ) ) != 0 ) {
+ tcp_xmit_reset ( tcp, st_src, tcphdr );
+ goto discard;
+ }
+ }
+
+ /* Force an ACK if this packet is out of order */
+ if ( ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) &&
+ ( seq != tcp->rcv_ack ) ) {
+ tcp->flags |= TCP_ACK_PENDING;
+ }
+
+ /* Handle SYN, if present */
+ if ( flags & TCP_SYN ) {
+ tcp_rx_syn ( tcp, seq, &options );
+ seq++;
+ }
+
+ /* Handle RST, if present */
+ if ( flags & TCP_RST ) {
+ if ( ( rc = tcp_rx_rst ( tcp, seq ) ) != 0 )
+ goto discard;
+ }
+
+ /* Enqueue received data */
+ tcp_rx_enqueue ( tcp, seq, flags, iob_disown ( iobuf ) );
+
+ /* Process receive queue */
+ tcp_process_rx_queue ( tcp );
+
+ /* Dump out any state change as a result of the received packet */
+ tcp_dump_state ( tcp );
+
+ /* Schedule transmission of ACK (and any pending data). If we
+ * have received any out-of-order packets (i.e. if the receive
+ * queue remains non-empty after processing) then send the ACK
+ * immediately in order to trigger Fast Retransmission.
+ */
+ if ( list_empty ( &tcp->rx_queue ) ) {
+ process_add ( &tcp->process );
+ } else {
+ tcp_xmit ( tcp );
+ }
+
+ /* If this packet was the last we expect to receive, set up
+ * timer to expire and cause the connection to be freed.
+ */
+ if ( TCP_CLOSED_GRACEFULLY ( tcp->tcp_state ) ) {
+ stop_timer ( &tcp->wait );
+ start_timer_fixed ( &tcp->wait, ( 2 * TCP_MSL ) );
+ }
+
+ /* Notify application if window has changed */
+ if ( tcp_xfer_window ( tcp ) != old_xfer_window )
+ xfer_window_changed ( &tcp->xfer );
+
+ profile_stop ( &tcp_rx_profiler );
+ return 0;
+
+ discard:
+ /* Free received packet */
+ free_iob ( iobuf );
+ return rc;
+}
+
+/** TCP protocol */
+struct tcpip_protocol tcp_protocol __tcpip_protocol = {
+ .name = "TCP",
+ .rx = tcp_rx,
+ .tcpip_proto = IP_TCP,
+};
+
+/**
+ * Discard some cached TCP data
+ *
+ * @ret discarded Number of cached items discarded
+ */
+static unsigned int tcp_discard ( void ) {
+ struct tcp_connection *tcp;
+ struct io_buffer *iobuf;
+ struct tcp_rx_queued_header *tcpqhdr;
+ uint32_t max_win;
+ unsigned int discarded = 0;
+
+ /* Try to drop one queued RX packet from each connection */
+ list_for_each_entry ( tcp, &tcp_conns, list ) {
+ list_for_each_entry_reverse ( iobuf, &tcp->rx_queue, list ) {
+
+ /* Limit window to prevent future discards */
+ tcpqhdr = iobuf->data;
+ max_win = ( tcpqhdr->seq - tcp->rcv_ack );
+ if ( max_win < tcp->max_rcv_win ) {
+ DBGC ( tcp, "TCP %p reducing maximum window "
+ "from %d to %d\n",
+ tcp, tcp->max_rcv_win, max_win );
+ tcp->max_rcv_win = max_win;
+ }
+
+ /* Remove packet from queue */
+ list_del ( &iobuf->list );
+ free_iob ( iobuf );
+
+ /* Report discard */
+ discarded++;
+ break;
+ }
+ }
+
+ return discarded;
+}
+
+/** TCP cache discarder */
+struct cache_discarder tcp_discarder __cache_discarder ( CACHE_NORMAL ) = {
+ .discard = tcp_discard,
+};
+
+/**
+ * Shut down all TCP connections
+ *
+ */
+static void tcp_shutdown ( int booting __unused ) {
+ struct tcp_connection *tcp;
+
+ while ( ( tcp = list_first_entry ( &tcp_conns, struct tcp_connection,
+ list ) ) != NULL ) {
+ tcp->tcp_state = TCP_CLOSED;
+ tcp_dump_state ( tcp );
+ tcp_close ( tcp, -ECANCELED );
+ }
+}
+
+/** TCP shutdown function */
+struct startup_fn tcp_startup_fn __startup_fn ( STARTUP_EARLY ) = {
+ .shutdown = tcp_shutdown,
+};
+
+/***************************************************************************
+ *
+ * Data transfer interface
+ *
+ ***************************************************************************
+ */
+
+/**
+ * Close interface
+ *
+ * @v tcp TCP connection
+ * @v rc Reason for close
+ */
+static void tcp_xfer_close ( struct tcp_connection *tcp, int rc ) {
+
+ /* Close data transfer interface */
+ tcp_close ( tcp, rc );
+
+ /* Transmit FIN, if possible */
+ tcp_xmit ( tcp );
+}
+
+/**
+ * Deliver datagram as I/O buffer
+ *
+ * @v tcp TCP connection
+ * @v iobuf Datagram I/O buffer
+ * @v meta Data transfer metadata
+ * @ret rc Return status code
+ */
+static int tcp_xfer_deliver ( struct tcp_connection *tcp,
+ struct io_buffer *iobuf,
+ struct xfer_metadata *meta __unused ) {
+
+ /* Enqueue packet */
+ list_add_tail ( &iobuf->list, &tcp->tx_queue );
+
+ /* Each enqueued packet is a pending operation */
+ pending_get ( &tcp->pending_data );
+
+ /* Transmit data, if possible */
+ tcp_xmit ( tcp );
+
+ return 0;
+}
+
+/** TCP data transfer interface operations */
+static struct interface_operation tcp_xfer_operations[] = {
+ INTF_OP ( xfer_deliver, struct tcp_connection *, tcp_xfer_deliver ),
+ INTF_OP ( xfer_window, struct tcp_connection *, tcp_xfer_window ),
+ INTF_OP ( intf_close, struct tcp_connection *, tcp_xfer_close ),
+};
+
+/** TCP data transfer interface descriptor */
+static struct interface_descriptor tcp_xfer_desc =
+ INTF_DESC ( struct tcp_connection, xfer, tcp_xfer_operations );
+
+/***************************************************************************
+ *
+ * Openers
+ *
+ ***************************************************************************
+ */
+
+/** TCP IPv4 socket opener */
+struct socket_opener tcp_ipv4_socket_opener __socket_opener = {
+ .semantics = TCP_SOCK_STREAM,
+ .family = AF_INET,
+ .open = tcp_open,
+};
+
+/** TCP IPv6 socket opener */
+struct socket_opener tcp_ipv6_socket_opener __socket_opener = {
+ .semantics = TCP_SOCK_STREAM,
+ .family = AF_INET6,
+ .open = tcp_open,
+};
+
+/** Linkage hack */
+int tcp_sock_stream = TCP_SOCK_STREAM;
+
+/**
+ * Open TCP URI
+ *
+ * @v xfer Data transfer interface
+ * @v uri URI
+ * @ret rc Return status code
+ */
+static int tcp_open_uri ( struct interface *xfer, struct uri *uri ) {
+ struct sockaddr_tcpip peer;
+
+ /* Sanity check */
+ if ( ! uri->host )
+ return -EINVAL;
+
+ memset ( &peer, 0, sizeof ( peer ) );
+ peer.st_port = htons ( uri_port ( uri, 0 ) );
+ return xfer_open_named_socket ( xfer, SOCK_STREAM,
+ ( struct sockaddr * ) &peer,
+ uri->host, NULL );
+}
+
+/** TCP URI opener */
+struct uri_opener tcp_uri_opener __uri_opener = {
+ .scheme = "tcp",
+ .open = tcp_open_uri,
+};
+
diff --git a/qemu/roms/ipxe/src/net/tcp/ftp.c b/qemu/roms/ipxe/src/net/tcp/ftp.c
new file mode 100644
index 000000000..be7a7c3b5
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/tcp/ftp.c
@@ -0,0 +1,546 @@
+/*
+ * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+#include <ctype.h>
+#include <byteswap.h>
+#include <ipxe/socket.h>
+#include <ipxe/tcpip.h>
+#include <ipxe/in.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/xfer.h>
+#include <ipxe/open.h>
+#include <ipxe/uri.h>
+#include <ipxe/features.h>
+#include <ipxe/ftp.h>
+
+/** @file
+ *
+ * File transfer protocol
+ *
+ */
+
+FEATURE ( FEATURE_PROTOCOL, "FTP", DHCP_EB_FEATURE_FTP, 1 );
+
+/**
+ * FTP states
+ *
+ * These @b must be sequential, i.e. a successful FTP session must
+ * pass through each of these states in order.
+ */
+enum ftp_state {
+ FTP_CONNECT = 0,
+ FTP_USER,
+ FTP_PASS,
+ FTP_TYPE,
+ FTP_SIZE,
+ FTP_PASV,
+ FTP_RETR,
+ FTP_WAIT,
+ FTP_QUIT,
+ FTP_DONE,
+};
+
+/**
+ * An FTP request
+ *
+ */
+struct ftp_request {
+ /** Reference counter */
+ struct refcnt refcnt;
+ /** Data transfer interface */
+ struct interface xfer;
+
+ /** URI being fetched */
+ struct uri *uri;
+ /** FTP control channel interface */
+ struct interface control;
+ /** FTP data channel interface */
+ struct interface data;
+
+ /** Current state */
+ enum ftp_state state;
+ /** Buffer to be filled with data received via the control channel */
+ char *recvbuf;
+ /** Remaining size of recvbuf */
+ size_t recvsize;
+ /** FTP status code, as text */
+ char status_text[5];
+ /** Passive-mode parameters, as text */
+ char passive_text[24]; /* "aaa,bbb,ccc,ddd,eee,fff" */
+ /** File size, as text */
+ char filesize[20];
+};
+
+/**
+ * Free FTP request
+ *
+ * @v refcnt Reference counter
+ */
+static void ftp_free ( struct refcnt *refcnt ) {
+ struct ftp_request *ftp =
+ container_of ( refcnt, struct ftp_request, refcnt );
+
+ DBGC ( ftp, "FTP %p freed\n", ftp );
+
+ uri_put ( ftp->uri );
+ free ( ftp );
+}
+
+/**
+ * Mark FTP operation as complete
+ *
+ * @v ftp FTP request
+ * @v rc Return status code
+ */
+static void ftp_done ( struct ftp_request *ftp, int rc ) {
+
+ DBGC ( ftp, "FTP %p completed (%s)\n", ftp, strerror ( rc ) );
+
+ /* Close all data transfer interfaces */
+ intf_shutdown ( &ftp->data, rc );
+ intf_shutdown ( &ftp->control, rc );
+ intf_shutdown ( &ftp->xfer, rc );
+}
+
+/*****************************************************************************
+ *
+ * FTP control channel
+ *
+ */
+
+/** An FTP control channel string */
+struct ftp_control_string {
+ /** Literal portion */
+ const char *literal;
+ /** Variable portion
+ *
+ * @v ftp FTP request
+ * @ret string Variable portion of string
+ */
+ const char * ( *variable ) ( struct ftp_request *ftp );
+};
+
+/**
+ * Retrieve FTP pathname
+ *
+ * @v ftp FTP request
+ * @ret path FTP pathname
+ */
+static const char * ftp_uri_path ( struct ftp_request *ftp ) {
+ return ftp->uri->path;
+}
+
+/**
+ * Retrieve FTP user
+ *
+ * @v ftp FTP request
+ * @ret user FTP user
+ */
+static const char * ftp_user ( struct ftp_request *ftp ) {
+ static char *ftp_default_user = "anonymous";
+ return ftp->uri->user ? ftp->uri->user : ftp_default_user;
+}
+
+/**
+ * Retrieve FTP password
+ *
+ * @v ftp FTP request
+ * @ret password FTP password
+ */
+static const char * ftp_password ( struct ftp_request *ftp ) {
+ static char *ftp_default_password = "ipxe@ipxe.org";
+ return ftp->uri->password ? ftp->uri->password : ftp_default_password;
+}
+
+/** FTP control channel strings */
+static struct ftp_control_string ftp_strings[] = {
+ [FTP_CONNECT] = { NULL, NULL },
+ [FTP_USER] = { "USER ", ftp_user },
+ [FTP_PASS] = { "PASS ", ftp_password },
+ [FTP_TYPE] = { "TYPE I", NULL },
+ [FTP_SIZE] = { "SIZE ", ftp_uri_path },
+ [FTP_PASV] = { "PASV", NULL },
+ [FTP_RETR] = { "RETR ", ftp_uri_path },
+ [FTP_WAIT] = { NULL, NULL },
+ [FTP_QUIT] = { "QUIT", NULL },
+ [FTP_DONE] = { NULL, NULL },
+};
+
+/**
+ * Parse FTP byte sequence value
+ *
+ * @v text Text string
+ * @v value Value buffer
+ * @v len Length of value buffer
+ *
+ * This parses an FTP byte sequence value (e.g. the "aaa,bbb,ccc,ddd"
+ * form for IP addresses in PORT commands) into a byte sequence. @c
+ * *text will be updated to point beyond the end of the parsed byte
+ * sequence.
+ *
+ * This function is safe in the presence of malformed data, though the
+ * output is undefined.
+ */
+static void ftp_parse_value ( char **text, uint8_t *value, size_t len ) {
+ do {
+ *(value++) = strtoul ( *text, text, 10 );
+ if ( **text )
+ (*text)++;
+ } while ( --len );
+}
+
+/**
+ * Move to next state and send the appropriate FTP control string
+ *
+ * @v ftp FTP request
+ *
+ */
+static void ftp_next_state ( struct ftp_request *ftp ) {
+ struct ftp_control_string *ftp_string;
+ const char *literal;
+ const char *variable;
+
+ /* Move to next state */
+ if ( ftp->state < FTP_DONE )
+ ftp->state++;
+
+ /* Send control string if needed */
+ ftp_string = &ftp_strings[ftp->state];
+ literal = ftp_string->literal;
+ variable = ( ftp_string->variable ?
+ ftp_string->variable ( ftp ) : "" );
+ if ( literal ) {
+ DBGC ( ftp, "FTP %p sending %s%s\n", ftp, literal, variable );
+ xfer_printf ( &ftp->control, "%s%s\r\n", literal, variable );
+ }
+}
+
+/**
+ * Handle an FTP control channel response
+ *
+ * @v ftp FTP request
+ *
+ * This is called once we have received a complete response line.
+ */
+static void ftp_reply ( struct ftp_request *ftp ) {
+ char status_major = ftp->status_text[0];
+ char separator = ftp->status_text[3];
+
+ DBGC ( ftp, "FTP %p received status %s\n", ftp, ftp->status_text );
+
+ /* Ignore malformed lines */
+ if ( separator != ' ' )
+ return;
+
+ /* Ignore "intermediate" responses (1xx codes) */
+ if ( status_major == '1' )
+ return;
+
+ /* If the SIZE command is not supported by the server, we go to
+ * the next step.
+ */
+ if ( ( status_major == '5' ) && ( ftp->state == FTP_SIZE ) ) {
+ ftp_next_state ( ftp );
+ return;
+ }
+
+ /* Anything other than success (2xx) or, in the case of a
+ * repsonse to a "USER" command, a password prompt (3xx), is a
+ * fatal error.
+ */
+ if ( ! ( ( status_major == '2' ) ||
+ ( ( status_major == '3' ) && ( ftp->state == FTP_USER ) ) ) ){
+ /* Flag protocol error and close connections */
+ ftp_done ( ftp, -EPROTO );
+ return;
+ }
+
+ /* Parse file size */
+ if ( ftp->state == FTP_SIZE ) {
+ size_t filesize;
+ char *endptr;
+
+ /* Parse size */
+ filesize = strtoul ( ftp->filesize, &endptr, 10 );
+ if ( *endptr != '\0' ) {
+ DBGC ( ftp, "FTP %p invalid SIZE \"%s\"\n",
+ ftp, ftp->filesize );
+ ftp_done ( ftp, -EPROTO );
+ return;
+ }
+
+ /* Use seek() to notify recipient of filesize */
+ DBGC ( ftp, "FTP %p file size is %zd bytes\n", ftp, filesize );
+ xfer_seek ( &ftp->xfer, filesize );
+ xfer_seek ( &ftp->xfer, 0 );
+ }
+
+ /* Open passive connection when we get "PASV" response */
+ if ( ftp->state == FTP_PASV ) {
+ char *ptr = ftp->passive_text;
+ union {
+ struct sockaddr_in sin;
+ struct sockaddr sa;
+ } sa;
+ int rc;
+
+ sa.sin.sin_family = AF_INET;
+ ftp_parse_value ( &ptr, ( uint8_t * ) &sa.sin.sin_addr,
+ sizeof ( sa.sin.sin_addr ) );
+ ftp_parse_value ( &ptr, ( uint8_t * ) &sa.sin.sin_port,
+ sizeof ( sa.sin.sin_port ) );
+ if ( ( rc = xfer_open_socket ( &ftp->data, SOCK_STREAM,
+ &sa.sa, NULL ) ) != 0 ) {
+ DBGC ( ftp, "FTP %p could not open data connection\n",
+ ftp );
+ ftp_done ( ftp, rc );
+ return;
+ }
+ }
+
+ /* Move to next state and send control string */
+ ftp_next_state ( ftp );
+
+}
+
+/**
+ * Handle new data arriving on FTP control channel
+ *
+ * @v ftp FTP request
+ * @v iob I/O buffer
+ * @v meta Data transfer metadata
+ * @ret rc Return status code
+ *
+ * Data is collected until a complete line is received, at which point
+ * its information is passed to ftp_reply().
+ */
+static int ftp_control_deliver ( struct ftp_request *ftp,
+ struct io_buffer *iobuf,
+ struct xfer_metadata *meta __unused ) {
+ char *data = iobuf->data;
+ size_t len = iob_len ( iobuf );
+ char *recvbuf = ftp->recvbuf;
+ size_t recvsize = ftp->recvsize;
+ char c;
+
+ while ( len-- ) {
+ c = *(data++);
+ if ( ( c == '\r' ) || ( c == '\n' ) ) {
+ /* End of line: call ftp_reply() to handle
+ * completed reply. Avoid calling ftp_reply()
+ * twice if we receive both \r and \n.
+ */
+ if ( recvbuf != ftp->status_text )
+ ftp_reply ( ftp );
+ /* Start filling up the status code buffer */
+ recvbuf = ftp->status_text;
+ recvsize = sizeof ( ftp->status_text ) - 1;
+ } else if ( ( ftp->state == FTP_PASV ) && ( c == '(' ) ) {
+ /* Start filling up the passive parameter buffer */
+ recvbuf = ftp->passive_text;
+ recvsize = sizeof ( ftp->passive_text ) - 1;
+ } else if ( ( ftp->state == FTP_PASV ) && ( c == ')' ) ) {
+ /* Stop filling the passive parameter buffer */
+ recvsize = 0;
+ } else if ( ( ftp->state == FTP_SIZE ) && ( c == ' ' ) ) {
+ /* Start filling up the file size buffer */
+ recvbuf = ftp->filesize;
+ recvsize = sizeof ( ftp->filesize ) - 1;
+ } else {
+ /* Fill up buffer if applicable */
+ if ( recvsize > 0 ) {
+ *(recvbuf++) = c;
+ recvsize--;
+ }
+ }
+ }
+
+ /* Store for next invocation */
+ ftp->recvbuf = recvbuf;
+ ftp->recvsize = recvsize;
+
+ /* Free I/O buffer */
+ free_iob ( iobuf );
+
+ return 0;
+}
+
+/** FTP control channel interface operations */
+static struct interface_operation ftp_control_operations[] = {
+ INTF_OP ( xfer_deliver, struct ftp_request *, ftp_control_deliver ),
+ INTF_OP ( intf_close, struct ftp_request *, ftp_done ),
+};
+
+/** FTP control channel interface descriptor */
+static struct interface_descriptor ftp_control_desc =
+ INTF_DESC ( struct ftp_request, control, ftp_control_operations );
+
+/*****************************************************************************
+ *
+ * FTP data channel
+ *
+ */
+
+/**
+ * Handle FTP data channel being closed
+ *
+ * @v ftp FTP request
+ * @v rc Reason for closure
+ *
+ * When the data channel is closed, the control channel should be left
+ * alone; the server will send a completion message via the control
+ * channel which we'll pick up.
+ *
+ * If the data channel is closed due to an error, we abort the request.
+ */
+static void ftp_data_closed ( struct ftp_request *ftp, int rc ) {
+
+ DBGC ( ftp, "FTP %p data connection closed: %s\n",
+ ftp, strerror ( rc ) );
+
+ /* If there was an error, close control channel and record status */
+ if ( rc ) {
+ ftp_done ( ftp, rc );
+ } else {
+ ftp_next_state ( ftp );
+ }
+}
+
+/** FTP data channel interface operations */
+static struct interface_operation ftp_data_operations[] = {
+ INTF_OP ( intf_close, struct ftp_request *, ftp_data_closed ),
+};
+
+/** FTP data channel interface descriptor */
+static struct interface_descriptor ftp_data_desc =
+ INTF_DESC_PASSTHRU ( struct ftp_request, data, ftp_data_operations,
+ xfer );
+
+/*****************************************************************************
+ *
+ * Data transfer interface
+ *
+ */
+
+/** FTP data transfer interface operations */
+static struct interface_operation ftp_xfer_operations[] = {
+ INTF_OP ( intf_close, struct ftp_request *, ftp_done ),
+};
+
+/** FTP data transfer interface descriptor */
+static struct interface_descriptor ftp_xfer_desc =
+ INTF_DESC_PASSTHRU ( struct ftp_request, xfer, ftp_xfer_operations,
+ data );
+
+/*****************************************************************************
+ *
+ * URI opener
+ *
+ */
+
+/**
+ * Check validity of FTP control channel string
+ *
+ * @v string String
+ * @ret rc Return status code
+ */
+static int ftp_check_string ( const char *string ) {
+ char c;
+
+ /* The FTP control channel is line-based. Check for invalid
+ * non-printable characters (e.g. newlines).
+ */
+ while ( ( c = *(string++) ) ) {
+ if ( ! isprint ( c ) )
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/**
+ * Initiate an FTP connection
+ *
+ * @v xfer Data transfer interface
+ * @v uri Uniform Resource Identifier
+ * @ret rc Return status code
+ */
+static int ftp_open ( struct interface *xfer, struct uri *uri ) {
+ struct ftp_request *ftp;
+ struct sockaddr_tcpip server;
+ int rc;
+
+ /* Sanity checks */
+ if ( ! uri->host )
+ return -EINVAL;
+ if ( ! uri->path )
+ return -EINVAL;
+ if ( ( rc = ftp_check_string ( uri->path ) ) != 0 )
+ return rc;
+ if ( uri->user && ( ( rc = ftp_check_string ( uri->user ) ) != 0 ) )
+ return rc;
+ if ( uri->password &&
+ ( ( rc = ftp_check_string ( uri->password ) ) != 0 ) )
+ return rc;
+
+ /* Allocate and populate structure */
+ ftp = zalloc ( sizeof ( *ftp ) );
+ if ( ! ftp )
+ return -ENOMEM;
+ ref_init ( &ftp->refcnt, ftp_free );
+ intf_init ( &ftp->xfer, &ftp_xfer_desc, &ftp->refcnt );
+ intf_init ( &ftp->control, &ftp_control_desc, &ftp->refcnt );
+ intf_init ( &ftp->data, &ftp_data_desc, &ftp->refcnt );
+ ftp->uri = uri_get ( uri );
+ ftp->recvbuf = ftp->status_text;
+ ftp->recvsize = sizeof ( ftp->status_text ) - 1;
+
+ DBGC ( ftp, "FTP %p fetching %s\n", ftp, ftp->uri->path );
+
+ /* Open control connection */
+ memset ( &server, 0, sizeof ( server ) );
+ server.st_port = htons ( uri_port ( uri, FTP_PORT ) );
+ if ( ( rc = xfer_open_named_socket ( &ftp->control, SOCK_STREAM,
+ ( struct sockaddr * ) &server,
+ uri->host, NULL ) ) != 0 )
+ goto err;
+
+ /* Attach to parent interface, mortalise self, and return */
+ intf_plug_plug ( &ftp->xfer, xfer );
+ ref_put ( &ftp->refcnt );
+ return 0;
+
+ err:
+ DBGC ( ftp, "FTP %p could not create request: %s\n",
+ ftp, strerror ( rc ) );
+ ftp_done ( ftp, rc );
+ ref_put ( &ftp->refcnt );
+ return rc;
+}
+
+/** FTP URI opener */
+struct uri_opener ftp_uri_opener __uri_opener = {
+ .scheme = "ftp",
+ .open = ftp_open,
+};
diff --git a/qemu/roms/ipxe/src/net/tcp/http.c b/qemu/roms/ipxe/src/net/tcp/http.c
new file mode 100644
index 000000000..90bae9d7a
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/tcp/http.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+/**
+ * @file
+ *
+ * Hyper Text Transfer Protocol (HTTP)
+ *
+ */
+
+#include <stddef.h>
+#include <ipxe/open.h>
+#include <ipxe/http.h>
+#include <ipxe/features.h>
+
+FEATURE ( FEATURE_PROTOCOL, "HTTP", DHCP_EB_FEATURE_HTTP, 1 );
+
+/**
+ * Initiate an HTTP connection
+ *
+ * @v xfer Data transfer interface
+ * @v uri Uniform Resource Identifier
+ * @ret rc Return status code
+ */
+static int http_open ( struct interface *xfer, struct uri *uri ) {
+ return http_open_filter ( xfer, uri, HTTP_PORT, NULL );
+}
+
+/** HTTP URI opener */
+struct uri_opener http_uri_opener __uri_opener = {
+ .scheme = "http",
+ .open = http_open,
+};
diff --git a/qemu/roms/ipxe/src/net/tcp/httpcore.c b/qemu/roms/ipxe/src/net/tcp/httpcore.c
new file mode 100644
index 000000000..1d1953e61
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/tcp/httpcore.c
@@ -0,0 +1,1574 @@
+/*
+ * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+/**
+ * @file
+ *
+ * Hyper Text Transfer Protocol (HTTP) core functionality
+ *
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <strings.h>
+#include <byteswap.h>
+#include <errno.h>
+#include <ctype.h>
+#include <assert.h>
+#include <ipxe/uri.h>
+#include <ipxe/refcnt.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/xfer.h>
+#include <ipxe/open.h>
+#include <ipxe/socket.h>
+#include <ipxe/tcpip.h>
+#include <ipxe/process.h>
+#include <ipxe/retry.h>
+#include <ipxe/timer.h>
+#include <ipxe/linebuf.h>
+#include <ipxe/base64.h>
+#include <ipxe/base16.h>
+#include <ipxe/md5.h>
+#include <ipxe/blockdev.h>
+#include <ipxe/acpi.h>
+#include <ipxe/version.h>
+#include <ipxe/params.h>
+#include <ipxe/profile.h>
+#include <ipxe/http.h>
+
+/* Disambiguate the various error causes */
+#define EACCES_401 __einfo_error ( EINFO_EACCES_401 )
+#define EINFO_EACCES_401 \
+ __einfo_uniqify ( EINFO_EACCES, 0x01, "HTTP 401 Unauthorized" )
+#define EIO_OTHER __einfo_error ( EINFO_EIO_OTHER )
+#define EINFO_EIO_OTHER \
+ __einfo_uniqify ( EINFO_EIO, 0x01, "Unrecognised HTTP response code" )
+#define EIO_CONTENT_LENGTH __einfo_error ( EINFO_EIO_CONTENT_LENGTH )
+#define EINFO_EIO_CONTENT_LENGTH \
+ __einfo_uniqify ( EINFO_EIO, 0x02, "Content length mismatch" )
+#define EINVAL_RESPONSE __einfo_error ( EINFO_EINVAL_RESPONSE )
+#define EINFO_EINVAL_RESPONSE \
+ __einfo_uniqify ( EINFO_EINVAL, 0x01, "Invalid content length" )
+#define EINVAL_HEADER __einfo_error ( EINFO_EINVAL_HEADER )
+#define EINFO_EINVAL_HEADER \
+ __einfo_uniqify ( EINFO_EINVAL, 0x02, "Invalid header" )
+#define EINVAL_CONTENT_LENGTH __einfo_error ( EINFO_EINVAL_CONTENT_LENGTH )
+#define EINFO_EINVAL_CONTENT_LENGTH \
+ __einfo_uniqify ( EINFO_EINVAL, 0x03, "Invalid content length" )
+#define EINVAL_CHUNK_LENGTH __einfo_error ( EINFO_EINVAL_CHUNK_LENGTH )
+#define EINFO_EINVAL_CHUNK_LENGTH \
+ __einfo_uniqify ( EINFO_EINVAL, 0x04, "Invalid chunk length" )
+#define ENOENT_404 __einfo_error ( EINFO_ENOENT_404 )
+#define EINFO_ENOENT_404 \
+ __einfo_uniqify ( EINFO_ENOENT, 0x01, "HTTP 404 Not Found" )
+#define EPERM_403 __einfo_error ( EINFO_EPERM_403 )
+#define EINFO_EPERM_403 \
+ __einfo_uniqify ( EINFO_EPERM, 0x01, "HTTP 403 Forbidden" )
+#define EPROTO_UNSOLICITED __einfo_error ( EINFO_EPROTO_UNSOLICITED )
+#define EINFO_EPROTO_UNSOLICITED \
+ __einfo_uniqify ( EINFO_EPROTO, 0x01, "Unsolicited data" )
+
+/** Block size used for HTTP block device request */
+#define HTTP_BLKSIZE 512
+
+/** Retry delay used when we cannot understand the Retry-After header */
+#define HTTP_RETRY_SECONDS 5
+
+/** Receive profiler */
+static struct profiler http_rx_profiler __profiler = { .name = "http.rx" };
+
+/** Data transfer profiler */
+static struct profiler http_xfer_profiler __profiler = { .name = "http.xfer" };
+
+/** HTTP flags */
+enum http_flags {
+ /** Request is waiting to be transmitted */
+ HTTP_TX_PENDING = 0x0001,
+ /** Fetch header only */
+ HTTP_HEAD_ONLY = 0x0002,
+ /** Client would like to keep connection alive */
+ HTTP_CLIENT_KEEPALIVE = 0x0004,
+ /** Server will keep connection alive */
+ HTTP_SERVER_KEEPALIVE = 0x0008,
+ /** Discard the current request and try again */
+ HTTP_TRY_AGAIN = 0x0010,
+ /** Provide Basic authentication details */
+ HTTP_BASIC_AUTH = 0x0020,
+ /** Provide Digest authentication details */
+ HTTP_DIGEST_AUTH = 0x0040,
+ /** Socket must be reopened */
+ HTTP_REOPEN_SOCKET = 0x0080,
+};
+
+/** HTTP receive state */
+enum http_rx_state {
+ HTTP_RX_RESPONSE = 0,
+ HTTP_RX_HEADER,
+ HTTP_RX_CHUNK_LEN,
+ /* In HTTP_RX_DATA, it is acceptable for the server to close
+ * the connection (unless we are in the middle of a chunked
+ * transfer).
+ */
+ HTTP_RX_DATA,
+ /* In the following states, it is acceptable for the server to
+ * close the connection.
+ */
+ HTTP_RX_TRAILER,
+ HTTP_RX_IDLE,
+ HTTP_RX_DEAD,
+};
+
+/**
+ * An HTTP request
+ *
+ */
+struct http_request {
+ /** Reference count */
+ struct refcnt refcnt;
+ /** Data transfer interface */
+ struct interface xfer;
+ /** Partial transfer interface */
+ struct interface partial;
+
+ /** URI being fetched */
+ struct uri *uri;
+ /** Default port */
+ unsigned int default_port;
+ /** Filter (if any) */
+ int ( * filter ) ( struct interface *xfer,
+ const char *name,
+ struct interface **next );
+ /** Transport layer interface */
+ struct interface socket;
+
+ /** Flags */
+ unsigned int flags;
+ /** Starting offset of partial transfer (if applicable) */
+ size_t partial_start;
+ /** Length of partial transfer (if applicable) */
+ size_t partial_len;
+
+ /** TX process */
+ struct process process;
+
+ /** RX state */
+ enum http_rx_state rx_state;
+ /** Response code */
+ unsigned int code;
+ /** Received length */
+ size_t rx_len;
+ /** Length remaining (or 0 if unknown) */
+ size_t remaining;
+ /** HTTP is using Transfer-Encoding: chunked */
+ int chunked;
+ /** Current chunk length remaining (if applicable) */
+ size_t chunk_remaining;
+ /** Line buffer for received header lines */
+ struct line_buffer linebuf;
+ /** Receive data buffer (if applicable) */
+ userptr_t rx_buffer;
+
+ /** Authentication realm (if any) */
+ char *auth_realm;
+ /** Authentication nonce (if any) */
+ char *auth_nonce;
+ /** Authentication opaque string (if any) */
+ char *auth_opaque;
+
+ /** Request retry timer */
+ struct retry_timer timer;
+ /** Retry delay (in timer ticks) */
+ unsigned long retry_delay;
+};
+
+/**
+ * Free HTTP request
+ *
+ * @v refcnt Reference counter
+ */
+static void http_free ( struct refcnt *refcnt ) {
+ struct http_request *http =
+ container_of ( refcnt, struct http_request, refcnt );
+
+ uri_put ( http->uri );
+ empty_line_buffer ( &http->linebuf );
+ free ( http->auth_realm );
+ free ( http->auth_nonce );
+ free ( http->auth_opaque );
+ free ( http );
+};
+
+/**
+ * Close HTTP request
+ *
+ * @v http HTTP request
+ * @v rc Return status code
+ */
+static void http_close ( struct http_request *http, int rc ) {
+
+ /* Prevent further processing of any current packet */
+ http->rx_state = HTTP_RX_DEAD;
+
+ /* Prevent reconnection */
+ http->flags &= ~HTTP_CLIENT_KEEPALIVE;
+
+ /* Remove process */
+ process_del ( &http->process );
+
+ /* Close all data transfer interfaces */
+ intf_shutdown ( &http->socket, rc );
+ intf_shutdown ( &http->partial, rc );
+ intf_shutdown ( &http->xfer, rc );
+}
+
+/**
+ * Open HTTP socket
+ *
+ * @v http HTTP request
+ * @ret rc Return status code
+ */
+static int http_socket_open ( struct http_request *http ) {
+ struct uri *uri = http->uri;
+ struct sockaddr_tcpip server;
+ struct interface *socket;
+ int rc;
+
+ /* Open socket */
+ memset ( &server, 0, sizeof ( server ) );
+ server.st_port = htons ( uri_port ( uri, http->default_port ) );
+ socket = &http->socket;
+ if ( http->filter ) {
+ if ( ( rc = http->filter ( socket, uri->host, &socket ) ) != 0 )
+ return rc;
+ }
+ if ( ( rc = xfer_open_named_socket ( socket, SOCK_STREAM,
+ ( struct sockaddr * ) &server,
+ uri->host, NULL ) ) != 0 )
+ return rc;
+
+ return 0;
+}
+
+/**
+ * Retry HTTP request
+ *
+ * @v timer Retry timer
+ * @v fail Failure indicator
+ */
+static void http_retry ( struct retry_timer *timer, int fail __unused ) {
+ struct http_request *http =
+ container_of ( timer, struct http_request, timer );
+ int rc;
+
+ /* Reopen socket if required */
+ if ( http->flags & HTTP_REOPEN_SOCKET ) {
+ http->flags &= ~HTTP_REOPEN_SOCKET;
+ DBGC ( http, "HTTP %p reopening connection\n", http );
+ if ( ( rc = http_socket_open ( http ) ) != 0 ) {
+ http_close ( http, rc );
+ return;
+ }
+ }
+
+ /* Retry the request if applicable */
+ if ( http->flags & HTTP_TRY_AGAIN ) {
+ http->flags &= ~HTTP_TRY_AGAIN;
+ DBGC ( http, "HTTP %p retrying request\n", http );
+ http->flags |= HTTP_TX_PENDING;
+ http->rx_state = HTTP_RX_RESPONSE;
+ process_add ( &http->process );
+ }
+}
+
+/**
+ * Mark HTTP request as completed successfully
+ *
+ * @v http HTTP request
+ */
+static void http_done ( struct http_request *http ) {
+
+ /* If we are not at an appropriate stage of the protocol
+ * (including being in the middle of a chunked transfer),
+ * force an error.
+ */
+ if ( ( http->rx_state < HTTP_RX_DATA ) || ( http->chunked != 0 ) ) {
+ DBGC ( http, "HTTP %p connection closed unexpectedly in state "
+ "%d\n", http, http->rx_state );
+ http_close ( http, -ECONNRESET );
+ return;
+ }
+
+ /* If we had a Content-Length, and the received content length
+ * isn't correct, force an error
+ */
+ if ( http->remaining != 0 ) {
+ DBGC ( http, "HTTP %p incorrect length %zd, should be %zd\n",
+ http, http->rx_len, ( http->rx_len + http->remaining ) );
+ http_close ( http, -EIO_CONTENT_LENGTH );
+ return;
+ }
+
+ /* Enter idle state */
+ http->rx_state = HTTP_RX_IDLE;
+ http->rx_len = 0;
+ assert ( http->remaining == 0 );
+ assert ( http->chunked == 0 );
+ assert ( http->chunk_remaining == 0 );
+
+ /* Close partial transfer interface */
+ if ( ! ( http->flags & HTTP_TRY_AGAIN ) )
+ intf_restart ( &http->partial, 0 );
+
+ /* Close everything unless we want to keep the connection alive */
+ if ( ! ( http->flags & ( HTTP_CLIENT_KEEPALIVE | HTTP_TRY_AGAIN ) ) ) {
+ http_close ( http, 0 );
+ return;
+ }
+
+ /* If the server is not intending to keep the connection
+ * alive, then close the socket and mark it as requiring
+ * reopening.
+ */
+ if ( ! ( http->flags & HTTP_SERVER_KEEPALIVE ) ) {
+ intf_restart ( &http->socket, 0 );
+ http->flags &= ~HTTP_SERVER_KEEPALIVE;
+ http->flags |= HTTP_REOPEN_SOCKET;
+ }
+
+ /* Start request retry timer */
+ start_timer_fixed ( &http->timer, http->retry_delay );
+ http->retry_delay = 0;
+}
+
+/**
+ * Convert HTTP response code to return status code
+ *
+ * @v response HTTP response code
+ * @ret rc Return status code
+ */
+static int http_response_to_rc ( unsigned int response ) {
+ switch ( response ) {
+ case 200:
+ case 206:
+ case 301:
+ case 302:
+ case 303:
+ return 0;
+ case 404:
+ return -ENOENT_404;
+ case 403:
+ return -EPERM_403;
+ case 401:
+ return -EACCES_401;
+ default:
+ return -EIO_OTHER;
+ }
+}
+
+/**
+ * Handle HTTP response
+ *
+ * @v http HTTP request
+ * @v response HTTP response
+ * @ret rc Return status code
+ */
+static int http_rx_response ( struct http_request *http, char *response ) {
+ char *spc;
+
+ DBGC ( http, "HTTP %p response \"%s\"\n", http, response );
+
+ /* Check response starts with "HTTP/" */
+ if ( strncmp ( response, "HTTP/", 5 ) != 0 )
+ return -EINVAL_RESPONSE;
+
+ /* Locate and store response code */
+ spc = strchr ( response, ' ' );
+ if ( ! spc )
+ return -EINVAL_RESPONSE;
+ http->code = strtoul ( spc, NULL, 10 );
+
+ /* Move to receive headers */
+ http->rx_state = ( ( http->flags & HTTP_HEAD_ONLY ) ?
+ HTTP_RX_TRAILER : HTTP_RX_HEADER );
+ return 0;
+}
+
+/**
+ * Handle HTTP Location header
+ *
+ * @v http HTTP request
+ * @v value HTTP header value
+ * @ret rc Return status code
+ */
+static int http_rx_location ( struct http_request *http, char *value ) {
+ int rc;
+
+ /* Redirect to new location */
+ DBGC ( http, "HTTP %p redirecting to %s\n", http, value );
+ if ( ( rc = xfer_redirect ( &http->xfer, LOCATION_URI_STRING,
+ value ) ) != 0 ) {
+ DBGC ( http, "HTTP %p could not redirect: %s\n",
+ http, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Handle HTTP Content-Length header
+ *
+ * @v http HTTP request
+ * @v value HTTP header value
+ * @ret rc Return status code
+ */
+static int http_rx_content_length ( struct http_request *http, char *value ) {
+ struct block_device_capacity capacity;
+ size_t content_len;
+ char *endp;
+
+ /* Parse content length */
+ content_len = strtoul ( value, &endp, 10 );
+ if ( ! ( ( *endp == '\0' ) || isspace ( *endp ) ) ) {
+ DBGC ( http, "HTTP %p invalid Content-Length \"%s\"\n",
+ http, value );
+ return -EINVAL_CONTENT_LENGTH;
+ }
+
+ /* If we already have an expected content length, and this
+ * isn't it, then complain
+ */
+ if ( http->remaining && ( http->remaining != content_len ) ) {
+ DBGC ( http, "HTTP %p incorrect Content-Length %zd (expected "
+ "%zd)\n", http, content_len, http->remaining );
+ return -EIO_CONTENT_LENGTH;
+ }
+ if ( ! ( http->flags & HTTP_HEAD_ONLY ) )
+ http->remaining = content_len;
+
+ /* Do nothing more if we are retrying the request */
+ if ( http->flags & HTTP_TRY_AGAIN )
+ return 0;
+
+ /* Use seek() to notify recipient of filesize */
+ xfer_seek ( &http->xfer, http->remaining );
+ xfer_seek ( &http->xfer, 0 );
+
+ /* Report block device capacity if applicable */
+ if ( http->flags & HTTP_HEAD_ONLY ) {
+ capacity.blocks = ( content_len / HTTP_BLKSIZE );
+ capacity.blksize = HTTP_BLKSIZE;
+ capacity.max_count = -1U;
+ block_capacity ( &http->partial, &capacity );
+ }
+ return 0;
+}
+
+/**
+ * Handle HTTP Transfer-Encoding header
+ *
+ * @v http HTTP request
+ * @v value HTTP header value
+ * @ret rc Return status code
+ */
+static int http_rx_transfer_encoding ( struct http_request *http, char *value ){
+
+ if ( strcasecmp ( value, "chunked" ) == 0 ) {
+ /* Mark connection as using chunked transfer encoding */
+ http->chunked = 1;
+ }
+
+ return 0;
+}
+
+/**
+ * Handle HTTP Connection header
+ *
+ * @v http HTTP request
+ * @v value HTTP header value
+ * @ret rc Return status code
+ */
+static int http_rx_connection ( struct http_request *http, char *value ) {
+
+ if ( strcasecmp ( value, "keep-alive" ) == 0 ) {
+ /* Mark connection as being kept alive by the server */
+ http->flags |= HTTP_SERVER_KEEPALIVE;
+ }
+
+ return 0;
+}
+
+/**
+ * Handle WWW-Authenticate Basic header
+ *
+ * @v http HTTP request
+ * @v params Parameters
+ * @ret rc Return status code
+ */
+static int http_rx_basic_auth ( struct http_request *http, char *params ) {
+
+ DBGC ( http, "HTTP %p Basic authentication required (%s)\n",
+ http, params );
+
+ /* If we received a 401 Unauthorized response, then retry
+ * using Basic authentication
+ */
+ if ( ( http->code == 401 ) &&
+ ( ! ( http->flags & HTTP_BASIC_AUTH ) ) &&
+ ( http->uri->user != NULL ) ) {
+ http->flags |= ( HTTP_TRY_AGAIN | HTTP_BASIC_AUTH );
+ }
+
+ return 0;
+}
+
+/**
+ * Parse Digest authentication parameter
+ *
+ * @v params Parameters
+ * @v name Parameter name (including trailing "=\"")
+ * @ret value Parameter value, or NULL
+ */
+static char * http_digest_param ( char *params, const char *name ) {
+ char *key;
+ char *value;
+ char *terminator;
+
+ /* Locate parameter */
+ key = strstr ( params, name );
+ if ( ! key )
+ return NULL;
+
+ /* Extract value */
+ value = ( key + strlen ( name ) );
+ terminator = strchr ( value, '"' );
+ if ( ! terminator )
+ return NULL;
+ return strndup ( value, ( terminator - value ) );
+}
+
+/**
+ * Handle WWW-Authenticate Digest header
+ *
+ * @v http HTTP request
+ * @v params Parameters
+ * @ret rc Return status code
+ */
+static int http_rx_digest_auth ( struct http_request *http, char *params ) {
+
+ DBGC ( http, "HTTP %p Digest authentication required (%s)\n",
+ http, params );
+
+ /* If we received a 401 Unauthorized response, then retry
+ * using Digest authentication
+ */
+ if ( ( http->code == 401 ) &&
+ ( ! ( http->flags & HTTP_DIGEST_AUTH ) ) &&
+ ( http->uri->user != NULL ) ) {
+
+ /* Extract realm */
+ free ( http->auth_realm );
+ http->auth_realm = http_digest_param ( params, "realm=\"" );
+ if ( ! http->auth_realm ) {
+ DBGC ( http, "HTTP %p Digest prompt missing realm\n",
+ http );
+ return -EINVAL_HEADER;
+ }
+
+ /* Extract nonce */
+ free ( http->auth_nonce );
+ http->auth_nonce = http_digest_param ( params, "nonce=\"" );
+ if ( ! http->auth_nonce ) {
+ DBGC ( http, "HTTP %p Digest prompt missing nonce\n",
+ http );
+ return -EINVAL_HEADER;
+ }
+
+ /* Extract opaque */
+ free ( http->auth_opaque );
+ http->auth_opaque = http_digest_param ( params, "opaque=\"" );
+ if ( ! http->auth_opaque ) {
+ /* Not an error; "opaque" is optional */
+ }
+
+ http->flags |= ( HTTP_TRY_AGAIN | HTTP_DIGEST_AUTH );
+ }
+
+ return 0;
+}
+
+/** An HTTP WWW-Authenticate header handler */
+struct http_auth_header_handler {
+ /** Scheme (e.g. "Basic") */
+ const char *scheme;
+ /** Handle received parameters
+ *
+ * @v http HTTP request
+ * @v params Parameters
+ * @ret rc Return status code
+ */
+ int ( * rx ) ( struct http_request *http, char *params );
+};
+
+/** List of HTTP WWW-Authenticate header handlers */
+static struct http_auth_header_handler http_auth_header_handlers[] = {
+ {
+ .scheme = "Basic",
+ .rx = http_rx_basic_auth,
+ },
+ {
+ .scheme = "Digest",
+ .rx = http_rx_digest_auth,
+ },
+ { NULL, NULL },
+};
+
+/**
+ * Handle HTTP WWW-Authenticate header
+ *
+ * @v http HTTP request
+ * @v value HTTP header value
+ * @ret rc Return status code
+ */
+static int http_rx_www_authenticate ( struct http_request *http, char *value ) {
+ struct http_auth_header_handler *handler;
+ char *separator;
+ char *scheme;
+ char *params;
+ int rc;
+
+ /* Extract scheme */
+ separator = strchr ( value, ' ' );
+ if ( ! separator ) {
+ DBGC ( http, "HTTP %p malformed WWW-Authenticate header\n",
+ http );
+ return -EINVAL_HEADER;
+ }
+ *separator = '\0';
+ scheme = value;
+ params = ( separator + 1 );
+
+ /* Hand off to header handler, if one exists */
+ for ( handler = http_auth_header_handlers; handler->scheme; handler++ ){
+ if ( strcasecmp ( scheme, handler->scheme ) == 0 ) {
+ if ( ( rc = handler->rx ( http, params ) ) != 0 )
+ return rc;
+ break;
+ }
+ }
+ return 0;
+}
+
+/**
+ * Handle HTTP Retry-After header
+ *
+ * @v http HTTP request
+ * @v value HTTP header value
+ * @ret rc Return status code
+ */
+static int http_rx_retry_after ( struct http_request *http, char *value ) {
+ unsigned long seconds;
+ char *endp;
+
+ DBGC ( http, "HTTP %p retry requested (%s)\n", http, value );
+
+ /* If we received a 503 Service Unavailable response, then
+ * retry after the specified number of seconds. If the value
+ * is not a simple number of seconds (e.g. a full HTTP date),
+ * then retry after a fixed delay, since we don't have code
+ * able to parse full HTTP dates.
+ */
+ if ( http->code == 503 ) {
+ seconds = strtoul ( value, &endp, 10 );
+ if ( *endp != '\0' ) {
+ seconds = HTTP_RETRY_SECONDS;
+ DBGC ( http, "HTTP %p cannot understand \"%s\"; "
+ "using %ld seconds\n", http, value, seconds );
+ }
+ http->flags |= HTTP_TRY_AGAIN;
+ http->retry_delay = ( seconds * TICKS_PER_SEC );
+ }
+
+ return 0;
+}
+
+/** An HTTP header handler */
+struct http_header_handler {
+ /** Name (e.g. "Content-Length") */
+ const char *header;
+ /** Handle received header
+ *
+ * @v http HTTP request
+ * @v value HTTP header value
+ * @ret rc Return status code
+ *
+ * If an error is returned, the download will be aborted.
+ */
+ int ( * rx ) ( struct http_request *http, char *value );
+};
+
+/** List of HTTP header handlers */
+static struct http_header_handler http_header_handlers[] = {
+ {
+ .header = "Location",
+ .rx = http_rx_location,
+ },
+ {
+ .header = "Content-Length",
+ .rx = http_rx_content_length,
+ },
+ {
+ .header = "Transfer-Encoding",
+ .rx = http_rx_transfer_encoding,
+ },
+ {
+ .header = "Connection",
+ .rx = http_rx_connection,
+ },
+ {
+ .header = "WWW-Authenticate",
+ .rx = http_rx_www_authenticate,
+ },
+ {
+ .header = "Retry-After",
+ .rx = http_rx_retry_after,
+ },
+ { NULL, NULL }
+};
+
+/**
+ * Handle HTTP header
+ *
+ * @v http HTTP request
+ * @v header HTTP header
+ * @ret rc Return status code
+ */
+static int http_rx_header ( struct http_request *http, char *header ) {
+ struct http_header_handler *handler;
+ char *separator;
+ char *value;
+ int rc;
+
+ /* An empty header line marks the end of this phase */
+ if ( ! header[0] ) {
+ empty_line_buffer ( &http->linebuf );
+
+ /* Handle response code */
+ if ( ! ( http->flags & HTTP_TRY_AGAIN ) ) {
+ if ( ( rc = http_response_to_rc ( http->code ) ) != 0 )
+ return rc;
+ }
+
+ /* Move to next state */
+ if ( http->rx_state == HTTP_RX_HEADER ) {
+ DBGC ( http, "HTTP %p start of data\n", http );
+ http->rx_state = ( http->chunked ?
+ HTTP_RX_CHUNK_LEN : HTTP_RX_DATA );
+ if ( ( http->partial_len != 0 ) &&
+ ( ! ( http->flags & HTTP_TRY_AGAIN ) ) ) {
+ http->remaining = http->partial_len;
+ }
+ return 0;
+ } else {
+ DBGC ( http, "HTTP %p end of trailer\n", http );
+ http_done ( http );
+ return 0;
+ }
+ }
+
+ DBGC ( http, "HTTP %p header \"%s\"\n", http, header );
+
+ /* Split header at the ": " */
+ separator = strstr ( header, ": " );
+ if ( ! separator ) {
+ DBGC ( http, "HTTP %p malformed header\n", http );
+ return -EINVAL_HEADER;
+ }
+ *separator = '\0';
+ value = ( separator + 2 );
+
+ /* Hand off to header handler, if one exists */
+ for ( handler = http_header_handlers ; handler->header ; handler++ ) {
+ if ( strcasecmp ( header, handler->header ) == 0 ) {
+ if ( ( rc = handler->rx ( http, value ) ) != 0 )
+ return rc;
+ break;
+ }
+ }
+ return 0;
+}
+
+/**
+ * Handle HTTP chunk length
+ *
+ * @v http HTTP request
+ * @v length HTTP chunk length
+ * @ret rc Return status code
+ */
+static int http_rx_chunk_len ( struct http_request *http, char *length ) {
+ char *endp;
+
+ /* Skip blank lines between chunks */
+ if ( length[0] == '\0' )
+ return 0;
+
+ /* Parse chunk length */
+ http->chunk_remaining = strtoul ( length, &endp, 16 );
+ if ( *endp != '\0' ) {
+ DBGC ( http, "HTTP %p invalid chunk length \"%s\"\n",
+ http, length );
+ return -EINVAL_CHUNK_LENGTH;
+ }
+
+ /* Terminate chunked encoding if applicable */
+ if ( http->chunk_remaining == 0 ) {
+ DBGC ( http, "HTTP %p end of chunks\n", http );
+ http->chunked = 0;
+ http->rx_state = HTTP_RX_TRAILER;
+ return 0;
+ }
+
+ /* Use seek() to notify recipient of new filesize */
+ DBGC ( http, "HTTP %p start of chunk of length %zd\n",
+ http, http->chunk_remaining );
+ if ( ! ( http->flags & HTTP_TRY_AGAIN ) ) {
+ xfer_seek ( &http->xfer,
+ ( http->rx_len + http->chunk_remaining ) );
+ xfer_seek ( &http->xfer, http->rx_len );
+ }
+
+ /* Start receiving data */
+ http->rx_state = HTTP_RX_DATA;
+
+ return 0;
+}
+
+/** An HTTP line-based data handler */
+struct http_line_handler {
+ /** Handle line
+ *
+ * @v http HTTP request
+ * @v line Line to handle
+ * @ret rc Return status code
+ */
+ int ( * rx ) ( struct http_request *http, char *line );
+};
+
+/** List of HTTP line-based data handlers */
+static struct http_line_handler http_line_handlers[] = {
+ [HTTP_RX_RESPONSE] = { .rx = http_rx_response },
+ [HTTP_RX_HEADER] = { .rx = http_rx_header },
+ [HTTP_RX_CHUNK_LEN] = { .rx = http_rx_chunk_len },
+ [HTTP_RX_TRAILER] = { .rx = http_rx_header },
+};
+
+/**
+ * Handle new data arriving via HTTP connection
+ *
+ * @v http HTTP request
+ * @v iobuf I/O buffer
+ * @v meta Data transfer metadata
+ * @ret rc Return status code
+ */
+static int http_socket_deliver ( struct http_request *http,
+ struct io_buffer *iobuf,
+ struct xfer_metadata *meta __unused ) {
+ struct http_line_handler *lh;
+ char *line;
+ size_t data_len;
+ ssize_t line_len;
+ int rc = 0;
+
+ profile_start ( &http_rx_profiler );
+ while ( iobuf && iob_len ( iobuf ) ) {
+
+ switch ( http->rx_state ) {
+ case HTTP_RX_IDLE:
+ /* Receiving any data in this state is an error */
+ DBGC ( http, "HTTP %p received %zd bytes while %s\n",
+ http, iob_len ( iobuf ),
+ ( ( http->rx_state == HTTP_RX_IDLE ) ?
+ "idle" : "dead" ) );
+ rc = -EPROTO_UNSOLICITED;
+ goto done;
+ case HTTP_RX_DEAD:
+ /* Do no further processing */
+ goto done;
+ case HTTP_RX_DATA:
+ /* Pass received data to caller */
+ data_len = iob_len ( iobuf );
+ if ( http->chunk_remaining &&
+ ( http->chunk_remaining < data_len ) ) {
+ data_len = http->chunk_remaining;
+ }
+ if ( http->remaining &&
+ ( http->remaining < data_len ) ) {
+ data_len = http->remaining;
+ }
+ if ( http->flags & HTTP_TRY_AGAIN ) {
+ /* Discard all received data */
+ iob_pull ( iobuf, data_len );
+ } else if ( http->rx_buffer != UNULL ) {
+ /* Copy to partial transfer buffer */
+ copy_to_user ( http->rx_buffer, http->rx_len,
+ iobuf->data, data_len );
+ iob_pull ( iobuf, data_len );
+ } else if ( data_len < iob_len ( iobuf ) ) {
+ /* Deliver partial buffer as raw data */
+ profile_start ( &http_xfer_profiler );
+ rc = xfer_deliver_raw ( &http->xfer,
+ iobuf->data, data_len );
+ iob_pull ( iobuf, data_len );
+ if ( rc != 0 )
+ goto done;
+ profile_stop ( &http_xfer_profiler );
+ } else {
+ /* Deliver whole I/O buffer */
+ profile_start ( &http_xfer_profiler );
+ if ( ( rc = xfer_deliver_iob ( &http->xfer,
+ iob_disown ( iobuf ) ) ) != 0 )
+ goto done;
+ profile_stop ( &http_xfer_profiler );
+ }
+ http->rx_len += data_len;
+ if ( http->chunk_remaining ) {
+ http->chunk_remaining -= data_len;
+ if ( http->chunk_remaining == 0 )
+ http->rx_state = HTTP_RX_CHUNK_LEN;
+ }
+ if ( http->remaining ) {
+ http->remaining -= data_len;
+ if ( ( http->remaining == 0 ) &&
+ ( http->rx_state == HTTP_RX_DATA ) ) {
+ http_done ( http );
+ }
+ }
+ break;
+ case HTTP_RX_RESPONSE:
+ case HTTP_RX_HEADER:
+ case HTTP_RX_CHUNK_LEN:
+ case HTTP_RX_TRAILER:
+ /* In the other phases, buffer and process a
+ * line at a time
+ */
+ line_len = line_buffer ( &http->linebuf, iobuf->data,
+ iob_len ( iobuf ) );
+ if ( line_len < 0 ) {
+ rc = line_len;
+ DBGC ( http, "HTTP %p could not buffer line: "
+ "%s\n", http, strerror ( rc ) );
+ goto done;
+ }
+ iob_pull ( iobuf, line_len );
+ line = buffered_line ( &http->linebuf );
+ if ( line ) {
+ lh = &http_line_handlers[http->rx_state];
+ if ( ( rc = lh->rx ( http, line ) ) != 0 )
+ goto done;
+ }
+ break;
+ default:
+ assert ( 0 );
+ break;
+ }
+ }
+
+ done:
+ if ( rc )
+ http_close ( http, rc );
+ free_iob ( iobuf );
+ profile_stop ( &http_rx_profiler );
+ return rc;
+}
+
+/**
+ * Check HTTP socket flow control window
+ *
+ * @v http HTTP request
+ * @ret len Length of window
+ */
+static size_t http_socket_window ( struct http_request *http __unused ) {
+
+ /* Window is always open. This is to prevent TCP from
+ * stalling if our parent window is not currently open.
+ */
+ return ( ~( ( size_t ) 0 ) );
+}
+
+/**
+ * Close HTTP socket
+ *
+ * @v http HTTP request
+ * @v rc Reason for close
+ */
+static void http_socket_close ( struct http_request *http, int rc ) {
+
+ /* If we have an error, terminate */
+ if ( rc != 0 ) {
+ http_close ( http, rc );
+ return;
+ }
+
+ /* Mark HTTP request as complete */
+ http_done ( http );
+}
+
+/**
+ * Generate HTTP Basic authorisation string
+ *
+ * @v http HTTP request
+ * @ret auth Authorisation string, or NULL on error
+ *
+ * The authorisation string is dynamically allocated, and must be
+ * freed by the caller.
+ */
+static char * http_basic_auth ( struct http_request *http ) {
+ const char *user = http->uri->user;
+ const char *password = ( http->uri->password ?
+ http->uri->password : "" );
+ size_t user_pw_len =
+ ( strlen ( user ) + 1 /* ":" */ + strlen ( password ) );
+ char user_pw[ user_pw_len + 1 /* NUL */ ];
+ size_t user_pw_base64_len = base64_encoded_len ( user_pw_len );
+ char user_pw_base64[ user_pw_base64_len + 1 /* NUL */ ];
+ char *auth;
+ int len;
+
+ /* Sanity check */
+ assert ( user != NULL );
+
+ /* Make "user:password" string from decoded fields */
+ snprintf ( user_pw, sizeof ( user_pw ), "%s:%s", user, password );
+
+ /* Base64-encode the "user:password" string */
+ base64_encode ( ( void * ) user_pw, user_pw_len, user_pw_base64 );
+
+ /* Generate the authorisation string */
+ len = asprintf ( &auth, "Authorization: Basic %s\r\n",
+ user_pw_base64 );
+ if ( len < 0 )
+ return NULL;
+
+ return auth;
+}
+
+/**
+ * Generate HTTP Digest authorisation string
+ *
+ * @v http HTTP request
+ * @v method HTTP method (e.g. "GET")
+ * @v uri HTTP request URI (e.g. "/index.html")
+ * @ret auth Authorisation string, or NULL on error
+ *
+ * The authorisation string is dynamically allocated, and must be
+ * freed by the caller.
+ */
+static char * http_digest_auth ( struct http_request *http,
+ const char *method, const char *uri ) {
+ const char *user = http->uri->user;
+ const char *password = ( http->uri->password ?
+ http->uri->password : "" );
+ const char *realm = http->auth_realm;
+ const char *nonce = http->auth_nonce;
+ const char *opaque = http->auth_opaque;
+ static const char colon = ':';
+ uint8_t ctx[MD5_CTX_SIZE];
+ uint8_t digest[MD5_DIGEST_SIZE];
+ char ha1[ base16_encoded_len ( sizeof ( digest ) ) + 1 /* NUL */ ];
+ char ha2[ base16_encoded_len ( sizeof ( digest ) ) + 1 /* NUL */ ];
+ char response[ base16_encoded_len ( sizeof ( digest ) ) + 1 /* NUL */ ];
+ char *auth;
+ int len;
+
+ /* Sanity checks */
+ assert ( user != NULL );
+ assert ( realm != NULL );
+ assert ( nonce != NULL );
+
+ /* Generate HA1 */
+ digest_init ( &md5_algorithm, ctx );
+ digest_update ( &md5_algorithm, ctx, user, strlen ( user ) );
+ digest_update ( &md5_algorithm, ctx, &colon, sizeof ( colon ) );
+ digest_update ( &md5_algorithm, ctx, realm, strlen ( realm ) );
+ digest_update ( &md5_algorithm, ctx, &colon, sizeof ( colon ) );
+ digest_update ( &md5_algorithm, ctx, password, strlen ( password ) );
+ digest_final ( &md5_algorithm, ctx, digest );
+ base16_encode ( digest, sizeof ( digest ), ha1 );
+
+ /* Generate HA2 */
+ digest_init ( &md5_algorithm, ctx );
+ digest_update ( &md5_algorithm, ctx, method, strlen ( method ) );
+ digest_update ( &md5_algorithm, ctx, &colon, sizeof ( colon ) );
+ digest_update ( &md5_algorithm, ctx, uri, strlen ( uri ) );
+ digest_final ( &md5_algorithm, ctx, digest );
+ base16_encode ( digest, sizeof ( digest ), ha2 );
+
+ /* Generate response */
+ digest_init ( &md5_algorithm, ctx );
+ digest_update ( &md5_algorithm, ctx, ha1, strlen ( ha1 ) );
+ digest_update ( &md5_algorithm, ctx, &colon, sizeof ( colon ) );
+ digest_update ( &md5_algorithm, ctx, nonce, strlen ( nonce ) );
+ digest_update ( &md5_algorithm, ctx, &colon, sizeof ( colon ) );
+ digest_update ( &md5_algorithm, ctx, ha2, strlen ( ha2 ) );
+ digest_final ( &md5_algorithm, ctx, digest );
+ base16_encode ( digest, sizeof ( digest ), response );
+
+ /* Generate the authorisation string */
+ len = asprintf ( &auth, "Authorization: Digest username=\"%s\", "
+ "realm=\"%s\", nonce=\"%s\", uri=\"%s\", "
+ "%s%s%sresponse=\"%s\"\r\n", user, realm, nonce, uri,
+ ( opaque ? "opaque=\"" : "" ),
+ ( opaque ? opaque : "" ),
+ ( opaque ? "\", " : "" ), response );
+ if ( len < 0 )
+ return NULL;
+
+ return auth;
+}
+
+/**
+ * Generate HTTP POST parameter list
+ *
+ * @v http HTTP request
+ * @v buf Buffer to contain HTTP POST parameters
+ * @v len Length of buffer
+ * @ret len Length of parameter list (excluding terminating NUL)
+ */
+static size_t http_post_params ( struct http_request *http,
+ char *buf, size_t len ) {
+ struct parameter *param;
+ ssize_t remaining = len;
+ size_t frag_len;
+
+ /* Add each parameter in the form "key=value", joined with "&" */
+ len = 0;
+ for_each_param ( param, http->uri->params ) {
+
+ /* Add the "&", if applicable */
+ if ( len ) {
+ if ( remaining > 0 )
+ *buf = '&';
+ buf++;
+ len++;
+ remaining--;
+ }
+
+ /* URI-encode the key */
+ frag_len = uri_encode ( param->key, 0, buf, remaining );
+ buf += frag_len;
+ len += frag_len;
+ remaining -= frag_len;
+
+ /* Add the "=" */
+ if ( remaining > 0 )
+ *buf = '=';
+ buf++;
+ len++;
+ remaining--;
+
+ /* URI-encode the value */
+ frag_len = uri_encode ( param->value, 0, buf, remaining );
+ buf += frag_len;
+ len += frag_len;
+ remaining -= frag_len;
+ }
+
+ /* Ensure string is NUL-terminated even if no parameters are present */
+ if ( remaining > 0 )
+ *buf = '\0';
+
+ return len;
+}
+
+/**
+ * Generate HTTP POST body
+ *
+ * @v http HTTP request
+ * @ret post I/O buffer containing POST body, or NULL on error
+ */
+static struct io_buffer * http_post ( struct http_request *http ) {
+ struct io_buffer *post;
+ size_t len;
+ size_t check_len;
+
+ /* Calculate length of parameter list */
+ len = http_post_params ( http, NULL, 0 );
+
+ /* Allocate parameter list */
+ post = alloc_iob ( len + 1 /* NUL */ );
+ if ( ! post )
+ return NULL;
+
+ /* Fill parameter list */
+ check_len = http_post_params ( http, iob_put ( post, len ),
+ ( len + 1 /* NUL */ ) );
+ assert ( len == check_len );
+ DBGC ( http, "HTTP %p POST %s\n", http, ( ( char * ) post->data ) );
+
+ return post;
+}
+
+/**
+ * HTTP process
+ *
+ * @v http HTTP request
+ */
+static void http_step ( struct http_request *http ) {
+ struct io_buffer *post;
+ struct uri host_uri;
+ struct uri path_uri;
+ char *host_uri_string;
+ char *path_uri_string;
+ char *method;
+ char *range;
+ char *auth;
+ char *content;
+ int len;
+ int rc;
+
+ /* Do nothing if we have already transmitted the request */
+ if ( ! ( http->flags & HTTP_TX_PENDING ) )
+ return;
+
+ /* Do nothing until socket is ready */
+ if ( ! xfer_window ( &http->socket ) )
+ return;
+
+ /* Force a HEAD request if we have nowhere to send any received data */
+ if ( ( xfer_window ( &http->xfer ) == 0 ) &&
+ ( http->rx_buffer == UNULL ) ) {
+ http->flags |= ( HTTP_HEAD_ONLY | HTTP_CLIENT_KEEPALIVE );
+ }
+
+ /* Determine method */
+ method = ( ( http->flags & HTTP_HEAD_ONLY ) ? "HEAD" :
+ ( http->uri->params ? "POST" : "GET" ) );
+
+ /* Construct host URI */
+ memset ( &host_uri, 0, sizeof ( host_uri ) );
+ host_uri.host = http->uri->host;
+ host_uri.port = http->uri->port;
+ host_uri_string = format_uri_alloc ( &host_uri );
+ if ( ! host_uri_string ) {
+ rc = -ENOMEM;
+ goto err_host_uri;
+ }
+
+ /* Construct path URI */
+ memset ( &path_uri, 0, sizeof ( path_uri ) );
+ path_uri.path = ( http->uri->path ? http->uri->path : "/" );
+ path_uri.query = http->uri->query;
+ path_uri_string = format_uri_alloc ( &path_uri );
+ if ( ! path_uri_string ) {
+ rc = -ENOMEM;
+ goto err_path_uri;
+ }
+
+ /* Calculate range request parameters if applicable */
+ if ( http->partial_len ) {
+ len = asprintf ( &range, "Range: bytes=%zd-%zd\r\n",
+ http->partial_start,
+ ( http->partial_start + http->partial_len
+ - 1 ) );
+ if ( len < 0 ) {
+ rc = len;
+ goto err_range;
+ }
+ } else {
+ range = NULL;
+ }
+
+ /* Construct authorisation, if applicable */
+ if ( http->flags & HTTP_BASIC_AUTH ) {
+ auth = http_basic_auth ( http );
+ if ( ! auth ) {
+ rc = -ENOMEM;
+ goto err_auth;
+ }
+ } else if ( http->flags & HTTP_DIGEST_AUTH ) {
+ auth = http_digest_auth ( http, method, path_uri_string );
+ if ( ! auth ) {
+ rc = -ENOMEM;
+ goto err_auth;
+ }
+ } else {
+ auth = NULL;
+ }
+
+ /* Construct POST content, if applicable */
+ if ( http->uri->params ) {
+ post = http_post ( http );
+ if ( ! post ) {
+ rc = -ENOMEM;
+ goto err_post;
+ }
+ len = asprintf ( &content, "Content-Type: "
+ "application/x-www-form-urlencoded\r\n"
+ "Content-Length: %zd\r\n", iob_len ( post ) );
+ if ( len < 0 ) {
+ rc = len;
+ goto err_content;
+ }
+ } else {
+ post = NULL;
+ content = NULL;
+ }
+
+ /* Mark request as transmitted */
+ http->flags &= ~HTTP_TX_PENDING;
+
+ /* Send request */
+ if ( ( rc = xfer_printf ( &http->socket,
+ "%s %s HTTP/1.1\r\n"
+ "User-Agent: iPXE/%s\r\n"
+ "Host: %s\r\n"
+ "%s%s%s%s"
+ "\r\n",
+ method, path_uri_string, product_version,
+ host_uri_string,
+ ( ( http->flags & HTTP_CLIENT_KEEPALIVE ) ?
+ "Connection: keep-alive\r\n" : "" ),
+ ( range ? range : "" ),
+ ( auth ? auth : "" ),
+ ( content ? content : "" ) ) ) != 0 ) {
+ goto err_xfer;
+ }
+
+ /* Send POST content, if applicable */
+ if ( post ) {
+ if ( ( rc = xfer_deliver_iob ( &http->socket,
+ iob_disown ( post ) ) ) != 0 )
+ goto err_xfer_post;
+ }
+
+ err_xfer_post:
+ err_xfer:
+ free ( content );
+ err_content:
+ free ( post );
+ err_post:
+ free ( auth );
+ err_auth:
+ free ( range );
+ err_range:
+ free ( path_uri_string );
+ err_path_uri:
+ free ( host_uri_string );
+ err_host_uri:
+ if ( rc != 0 )
+ http_close ( http, rc );
+}
+
+/**
+ * Check HTTP data transfer flow control window
+ *
+ * @v http HTTP request
+ * @ret len Length of window
+ */
+static size_t http_xfer_window ( struct http_request *http ) {
+
+ /* New block commands may be issued only when we are idle */
+ return ( ( http->rx_state == HTTP_RX_IDLE ) ? 1 : 0 );
+}
+
+/**
+ * Initiate HTTP partial read
+ *
+ * @v http HTTP request
+ * @v partial Partial transfer interface
+ * @v offset Starting offset
+ * @v buffer Data buffer
+ * @v len Length
+ * @ret rc Return status code
+ */
+static int http_partial_read ( struct http_request *http,
+ struct interface *partial,
+ size_t offset, userptr_t buffer, size_t len ) {
+
+ /* Sanity check */
+ if ( http_xfer_window ( http ) == 0 )
+ return -EBUSY;
+
+ /* Initialise partial transfer parameters */
+ http->rx_buffer = buffer;
+ http->partial_start = offset;
+ http->partial_len = len;
+
+ /* Schedule request */
+ http->rx_state = HTTP_RX_RESPONSE;
+ http->flags = ( HTTP_TX_PENDING | HTTP_CLIENT_KEEPALIVE );
+ if ( ! len )
+ http->flags |= HTTP_HEAD_ONLY;
+ process_add ( &http->process );
+
+ /* Attach to parent interface and return */
+ intf_plug_plug ( &http->partial, partial );
+
+ return 0;
+}
+
+/**
+ * Issue HTTP block device read
+ *
+ * @v http HTTP request
+ * @v block Block data interface
+ * @v lba Starting logical block address
+ * @v count Number of blocks to transfer
+ * @v buffer Data buffer
+ * @v len Length of data buffer
+ * @ret rc Return status code
+ */
+static int http_block_read ( struct http_request *http,
+ struct interface *block,
+ uint64_t lba, unsigned int count,
+ userptr_t buffer, size_t len __unused ) {
+
+ return http_partial_read ( http, block, ( lba * HTTP_BLKSIZE ),
+ buffer, ( count * HTTP_BLKSIZE ) );
+}
+
+/**
+ * Read HTTP block device capacity
+ *
+ * @v http HTTP request
+ * @v block Block data interface
+ * @ret rc Return status code
+ */
+static int http_block_read_capacity ( struct http_request *http,
+ struct interface *block ) {
+
+ return http_partial_read ( http, block, 0, 0, 0 );
+}
+
+/**
+ * Describe HTTP device in an ACPI table
+ *
+ * @v http HTTP request
+ * @v acpi ACPI table
+ * @v len Length of ACPI table
+ * @ret rc Return status code
+ */
+static int http_acpi_describe ( struct http_request *http,
+ struct acpi_description_header *acpi,
+ size_t len ) {
+
+ DBGC ( http, "HTTP %p cannot yet describe device in an ACPI table\n",
+ http );
+ ( void ) acpi;
+ ( void ) len;
+ return 0;
+}
+
+/** HTTP socket interface operations */
+static struct interface_operation http_socket_operations[] = {
+ INTF_OP ( xfer_window, struct http_request *, http_socket_window ),
+ INTF_OP ( xfer_deliver, struct http_request *, http_socket_deliver ),
+ INTF_OP ( xfer_window_changed, struct http_request *, http_step ),
+ INTF_OP ( intf_close, struct http_request *, http_socket_close ),
+};
+
+/** HTTP socket interface descriptor */
+static struct interface_descriptor http_socket_desc =
+ INTF_DESC_PASSTHRU ( struct http_request, socket,
+ http_socket_operations, xfer );
+
+/** HTTP partial transfer interface operations */
+static struct interface_operation http_partial_operations[] = {
+ INTF_OP ( intf_close, struct http_request *, http_close ),
+};
+
+/** HTTP partial transfer interface descriptor */
+static struct interface_descriptor http_partial_desc =
+ INTF_DESC ( struct http_request, partial, http_partial_operations );
+
+/** HTTP data transfer interface operations */
+static struct interface_operation http_xfer_operations[] = {
+ INTF_OP ( xfer_window, struct http_request *, http_xfer_window ),
+ INTF_OP ( block_read, struct http_request *, http_block_read ),
+ INTF_OP ( block_read_capacity, struct http_request *,
+ http_block_read_capacity ),
+ INTF_OP ( intf_close, struct http_request *, http_close ),
+ INTF_OP ( acpi_describe, struct http_request *, http_acpi_describe ),
+};
+
+/** HTTP data transfer interface descriptor */
+static struct interface_descriptor http_xfer_desc =
+ INTF_DESC_PASSTHRU ( struct http_request, xfer,
+ http_xfer_operations, socket );
+
+/** HTTP process descriptor */
+static struct process_descriptor http_process_desc =
+ PROC_DESC_ONCE ( struct http_request, process, http_step );
+
+/**
+ * Initiate an HTTP connection, with optional filter
+ *
+ * @v xfer Data transfer interface
+ * @v uri Uniform Resource Identifier
+ * @v default_port Default port number
+ * @v filter Filter to apply to socket, or NULL
+ * @ret rc Return status code
+ */
+int http_open_filter ( struct interface *xfer, struct uri *uri,
+ unsigned int default_port,
+ int ( * filter ) ( struct interface *xfer,
+ const char *name,
+ struct interface **next ) ) {
+ struct http_request *http;
+ int rc;
+
+ /* Sanity checks */
+ if ( ! uri->host )
+ return -EINVAL;
+
+ /* Allocate and populate HTTP structure */
+ http = zalloc ( sizeof ( *http ) );
+ if ( ! http )
+ return -ENOMEM;
+ ref_init ( &http->refcnt, http_free );
+ intf_init ( &http->xfer, &http_xfer_desc, &http->refcnt );
+ intf_init ( &http->partial, &http_partial_desc, &http->refcnt );
+ http->uri = uri_get ( uri );
+ http->default_port = default_port;
+ http->filter = filter;
+ intf_init ( &http->socket, &http_socket_desc, &http->refcnt );
+ process_init ( &http->process, &http_process_desc, &http->refcnt );
+ timer_init ( &http->timer, http_retry, &http->refcnt );
+ http->flags = HTTP_TX_PENDING;
+
+ /* Open socket */
+ if ( ( rc = http_socket_open ( http ) ) != 0 )
+ goto err;
+
+ /* Attach to parent interface, mortalise self, and return */
+ intf_plug_plug ( &http->xfer, xfer );
+ ref_put ( &http->refcnt );
+ return 0;
+
+ err:
+ DBGC ( http, "HTTP %p could not create request: %s\n",
+ http, strerror ( rc ) );
+ http_close ( http, rc );
+ ref_put ( &http->refcnt );
+ return rc;
+}
diff --git a/qemu/roms/ipxe/src/net/tcp/https.c b/qemu/roms/ipxe/src/net/tcp/https.c
new file mode 100644
index 000000000..6112acdae
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/tcp/https.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+/**
+ * @file
+ *
+ * Secure Hyper Text Transfer Protocol (HTTPS)
+ *
+ */
+
+#include <stddef.h>
+#include <ipxe/open.h>
+#include <ipxe/tls.h>
+#include <ipxe/http.h>
+#include <ipxe/features.h>
+
+FEATURE ( FEATURE_PROTOCOL, "HTTPS", DHCP_EB_FEATURE_HTTPS, 1 );
+
+/**
+ * Initiate an HTTPS connection
+ *
+ * @v xfer Data transfer interface
+ * @v uri Uniform Resource Identifier
+ * @ret rc Return status code
+ */
+static int https_open ( struct interface *xfer, struct uri *uri ) {
+ return http_open_filter ( xfer, uri, HTTPS_PORT, add_tls );
+}
+
+/** HTTPS URI opener */
+struct uri_opener https_uri_opener __uri_opener = {
+ .scheme = "https",
+ .open = https_open,
+};
diff --git a/qemu/roms/ipxe/src/net/tcp/iscsi.c b/qemu/roms/ipxe/src/net/tcp/iscsi.c
new file mode 100644
index 000000000..03c6d0f23
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/tcp/iscsi.c
@@ -0,0 +1,2126 @@
+/*
+ * Copyright (C) 2006 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stddef.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <assert.h>
+#include <byteswap.h>
+#include <ipxe/vsprintf.h>
+#include <ipxe/socket.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/uri.h>
+#include <ipxe/xfer.h>
+#include <ipxe/open.h>
+#include <ipxe/scsi.h>
+#include <ipxe/process.h>
+#include <ipxe/uaccess.h>
+#include <ipxe/tcpip.h>
+#include <ipxe/settings.h>
+#include <ipxe/features.h>
+#include <ipxe/base16.h>
+#include <ipxe/base64.h>
+#include <ipxe/ibft.h>
+#include <ipxe/iscsi.h>
+
+/** @file
+ *
+ * iSCSI protocol
+ *
+ */
+
+FEATURE ( FEATURE_PROTOCOL, "iSCSI", DHCP_EB_FEATURE_ISCSI, 1 );
+
+/* Disambiguate the various error causes */
+#define EACCES_INCORRECT_TARGET_USERNAME \
+ __einfo_error ( EINFO_EACCES_INCORRECT_TARGET_USERNAME )
+#define EINFO_EACCES_INCORRECT_TARGET_USERNAME \
+ __einfo_uniqify ( EINFO_EACCES, 0x01, "Incorrect target username" )
+#define EACCES_INCORRECT_TARGET_PASSWORD \
+ __einfo_error ( EINFO_EACCES_INCORRECT_TARGET_PASSWORD )
+#define EINFO_EACCES_INCORRECT_TARGET_PASSWORD \
+ __einfo_uniqify ( EINFO_EACCES, 0x02, "Incorrect target password" )
+#define EINVAL_ROOT_PATH_TOO_SHORT \
+ __einfo_error ( EINFO_EINVAL_ROOT_PATH_TOO_SHORT )
+#define EINFO_EINVAL_ROOT_PATH_TOO_SHORT \
+ __einfo_uniqify ( EINFO_EINVAL, 0x01, "Root path too short" )
+#define EINVAL_BAD_CREDENTIAL_MIX \
+ __einfo_error ( EINFO_EINVAL_BAD_CREDENTIAL_MIX )
+#define EINFO_EINVAL_BAD_CREDENTIAL_MIX \
+ __einfo_uniqify ( EINFO_EINVAL, 0x02, "Bad credential mix" )
+#define EINVAL_NO_ROOT_PATH \
+ __einfo_error ( EINFO_EINVAL_NO_ROOT_PATH )
+#define EINFO_EINVAL_NO_ROOT_PATH \
+ __einfo_uniqify ( EINFO_EINVAL, 0x03, "No root path" )
+#define EINVAL_NO_TARGET_IQN \
+ __einfo_error ( EINFO_EINVAL_NO_TARGET_IQN )
+#define EINFO_EINVAL_NO_TARGET_IQN \
+ __einfo_uniqify ( EINFO_EINVAL, 0x04, "No target IQN" )
+#define EINVAL_NO_INITIATOR_IQN \
+ __einfo_error ( EINFO_EINVAL_NO_INITIATOR_IQN )
+#define EINFO_EINVAL_NO_INITIATOR_IQN \
+ __einfo_uniqify ( EINFO_EINVAL, 0x05, "No initiator IQN" )
+#define EIO_TARGET_UNAVAILABLE \
+ __einfo_error ( EINFO_EIO_TARGET_UNAVAILABLE )
+#define EINFO_EIO_TARGET_UNAVAILABLE \
+ __einfo_uniqify ( EINFO_EIO, 0x01, "Target not currently operational" )
+#define EIO_TARGET_NO_RESOURCES \
+ __einfo_error ( EINFO_EIO_TARGET_NO_RESOURCES )
+#define EINFO_EIO_TARGET_NO_RESOURCES \
+ __einfo_uniqify ( EINFO_EIO, 0x02, "Target out of resources" )
+#define ENOTSUP_INITIATOR_STATUS \
+ __einfo_error ( EINFO_ENOTSUP_INITIATOR_STATUS )
+#define EINFO_ENOTSUP_INITIATOR_STATUS \
+ __einfo_uniqify ( EINFO_ENOTSUP, 0x01, "Unsupported initiator status" )
+#define ENOTSUP_OPCODE \
+ __einfo_error ( EINFO_ENOTSUP_OPCODE )
+#define EINFO_ENOTSUP_OPCODE \
+ __einfo_uniqify ( EINFO_ENOTSUP, 0x02, "Unsupported opcode" )
+#define ENOTSUP_DISCOVERY \
+ __einfo_error ( EINFO_ENOTSUP_DISCOVERY )
+#define EINFO_ENOTSUP_DISCOVERY \
+ __einfo_uniqify ( EINFO_ENOTSUP, 0x03, "Discovery not supported" )
+#define ENOTSUP_TARGET_STATUS \
+ __einfo_error ( EINFO_ENOTSUP_TARGET_STATUS )
+#define EINFO_ENOTSUP_TARGET_STATUS \
+ __einfo_uniqify ( EINFO_ENOTSUP, 0x04, "Unsupported target status" )
+#define ENOTSUP_NOP_IN \
+ __einfo_error ( EINFO_ENOTSUP_NOP_IN )
+#define EINFO_ENOTSUP_NOP_IN \
+ __einfo_uniqify ( EINFO_ENOTSUP, 0x05, "Unsupported NOP-In received" )
+#define EPERM_INITIATOR_AUTHENTICATION \
+ __einfo_error ( EINFO_EPERM_INITIATOR_AUTHENTICATION )
+#define EINFO_EPERM_INITIATOR_AUTHENTICATION \
+ __einfo_uniqify ( EINFO_EPERM, 0x01, "Initiator authentication failed" )
+#define EPERM_INITIATOR_AUTHORISATION \
+ __einfo_error ( EINFO_EPERM_INITIATOR_AUTHORISATION )
+#define EINFO_EPERM_INITIATOR_AUTHORISATION \
+ __einfo_uniqify ( EINFO_EPERM, 0x02, "Initiator not authorised" )
+#define EPROTO_INVALID_CHAP_ALGORITHM \
+ __einfo_error ( EINFO_EPROTO_INVALID_CHAP_ALGORITHM )
+#define EINFO_EPROTO_INVALID_CHAP_ALGORITHM \
+ __einfo_uniqify ( EINFO_EPROTO, 0x01, "Invalid CHAP algorithm" )
+#define EPROTO_INVALID_CHAP_IDENTIFIER \
+ __einfo_error ( EINFO_EPROTO_INVALID_CHAP_IDENTIFIER )
+#define EINFO_EPROTO_INVALID_CHAP_IDENTIFIER \
+ __einfo_uniqify ( EINFO_EPROTO, 0x02, "Invalid CHAP identifier" )
+#define EPROTO_INVALID_LARGE_BINARY \
+ __einfo_error ( EINFO_EPROTO_INVALID_LARGE_BINARY )
+#define EINFO_EPROTO_INVALID_LARGE_BINARY \
+ __einfo_uniqify ( EINFO_EPROTO, 0x03, "Invalid large binary" )
+#define EPROTO_INVALID_CHAP_RESPONSE \
+ __einfo_error ( EINFO_EPROTO_INVALID_CHAP_RESPONSE )
+#define EINFO_EPROTO_INVALID_CHAP_RESPONSE \
+ __einfo_uniqify ( EINFO_EPROTO, 0x04, "Invalid CHAP response" )
+#define EPROTO_INVALID_KEY_VALUE_PAIR \
+ __einfo_error ( EINFO_EPROTO_INVALID_KEY_VALUE_PAIR )
+#define EINFO_EPROTO_INVALID_KEY_VALUE_PAIR \
+ __einfo_uniqify ( EINFO_EPROTO, 0x05, "Invalid key/value pair" )
+#define EPROTO_VALUE_REJECTED \
+ __einfo_error ( EINFO_EPROTO_VALUE_REJECTED )
+#define EINFO_EPROTO_VALUE_REJECTED \
+ __einfo_uniqify ( EINFO_EPROTO, 0x06, "Parameter rejected" )
+
+static void iscsi_start_tx ( struct iscsi_session *iscsi );
+static void iscsi_start_login ( struct iscsi_session *iscsi );
+static void iscsi_start_data_out ( struct iscsi_session *iscsi,
+ unsigned int datasn );
+
+/**
+ * Finish receiving PDU data into buffer
+ *
+ * @v iscsi iSCSI session
+ */
+static void iscsi_rx_buffered_data_done ( struct iscsi_session *iscsi ) {
+ free ( iscsi->rx_buffer );
+ iscsi->rx_buffer = NULL;
+}
+
+/**
+ * Receive PDU data into buffer
+ *
+ * @v iscsi iSCSI session
+ * @v data Data to receive
+ * @v len Length of data
+ * @ret rc Return status code
+ *
+ * This can be used when the RX PDU type handler wishes to buffer up
+ * all received data and process the PDU as a single unit. The caller
+ * is repsonsible for calling iscsi_rx_buffered_data_done() after
+ * processing the data.
+ */
+static int iscsi_rx_buffered_data ( struct iscsi_session *iscsi,
+ const void *data, size_t len ) {
+
+ /* Allocate buffer on first call */
+ if ( ! iscsi->rx_buffer ) {
+ iscsi->rx_buffer = malloc ( iscsi->rx_len );
+ if ( ! iscsi->rx_buffer )
+ return -ENOMEM;
+ }
+
+ /* Copy data to buffer */
+ assert ( ( iscsi->rx_offset + len ) <= iscsi->rx_len );
+ memcpy ( ( iscsi->rx_buffer + iscsi->rx_offset ), data, len );
+
+ return 0;
+}
+
+/**
+ * Free iSCSI session
+ *
+ * @v refcnt Reference counter
+ */
+static void iscsi_free ( struct refcnt *refcnt ) {
+ struct iscsi_session *iscsi =
+ container_of ( refcnt, struct iscsi_session, refcnt );
+
+ free ( iscsi->initiator_iqn );
+ free ( iscsi->target_address );
+ free ( iscsi->target_iqn );
+ free ( iscsi->initiator_username );
+ free ( iscsi->initiator_password );
+ free ( iscsi->target_username );
+ free ( iscsi->target_password );
+ chap_finish ( &iscsi->chap );
+ iscsi_rx_buffered_data_done ( iscsi );
+ free ( iscsi->command );
+ free ( iscsi );
+}
+
+/**
+ * Shut down iSCSI interface
+ *
+ * @v iscsi iSCSI session
+ * @v rc Reason for close
+ */
+static void iscsi_close ( struct iscsi_session *iscsi, int rc ) {
+
+ /* A TCP graceful close is still an error from our point of view */
+ if ( rc == 0 )
+ rc = -ECONNRESET;
+
+ DBGC ( iscsi, "iSCSI %p closed: %s\n", iscsi, strerror ( rc ) );
+
+ /* Stop transmission process */
+ process_del ( &iscsi->process );
+
+ /* Shut down interfaces */
+ intf_shutdown ( &iscsi->socket, rc );
+ intf_shutdown ( &iscsi->control, rc );
+ intf_shutdown ( &iscsi->data, rc );
+}
+
+/**
+ * Assign new iSCSI initiator task tag
+ *
+ * @v iscsi iSCSI session
+ */
+static void iscsi_new_itt ( struct iscsi_session *iscsi ) {
+ static uint16_t itt_idx;
+
+ iscsi->itt = ( ISCSI_TAG_MAGIC | (++itt_idx) );
+}
+
+/**
+ * Open iSCSI transport-layer connection
+ *
+ * @v iscsi iSCSI session
+ * @ret rc Return status code
+ */
+static int iscsi_open_connection ( struct iscsi_session *iscsi ) {
+ struct sockaddr_tcpip target;
+ int rc;
+
+ assert ( iscsi->tx_state == ISCSI_TX_IDLE );
+ assert ( iscsi->rx_state == ISCSI_RX_BHS );
+ assert ( iscsi->rx_offset == 0 );
+
+ /* Open socket */
+ memset ( &target, 0, sizeof ( target ) );
+ target.st_port = htons ( iscsi->target_port );
+ if ( ( rc = xfer_open_named_socket ( &iscsi->socket, SOCK_STREAM,
+ ( struct sockaddr * ) &target,
+ iscsi->target_address,
+ NULL ) ) != 0 ) {
+ DBGC ( iscsi, "iSCSI %p could not open socket: %s\n",
+ iscsi, strerror ( rc ) );
+ return rc;
+ }
+
+ /* Enter security negotiation phase */
+ iscsi->status = ( ISCSI_STATUS_SECURITY_NEGOTIATION_PHASE |
+ ISCSI_STATUS_STRINGS_SECURITY );
+ if ( iscsi->target_username )
+ iscsi->status |= ISCSI_STATUS_AUTH_REVERSE_REQUIRED;
+
+ /* Assign new ISID */
+ iscsi->isid_iana_qual = ( random() & 0xffff );
+
+ /* Assign fresh initiator task tag */
+ iscsi_new_itt ( iscsi );
+
+ /* Initiate login */
+ iscsi_start_login ( iscsi );
+
+ return 0;
+}
+
+/**
+ * Close iSCSI transport-layer connection
+ *
+ * @v iscsi iSCSI session
+ * @v rc Reason for close
+ *
+ * Closes the transport-layer connection and resets the session state
+ * ready to attempt a fresh login.
+ */
+static void iscsi_close_connection ( struct iscsi_session *iscsi, int rc ) {
+
+ /* Close all data transfer interfaces */
+ intf_restart ( &iscsi->socket, rc );
+
+ /* Clear connection status */
+ iscsi->status = 0;
+
+ /* Reset TX and RX state machines */
+ iscsi->tx_state = ISCSI_TX_IDLE;
+ iscsi->rx_state = ISCSI_RX_BHS;
+ iscsi->rx_offset = 0;
+
+ /* Free any temporary dynamically allocated memory */
+ chap_finish ( &iscsi->chap );
+ iscsi_rx_buffered_data_done ( iscsi );
+}
+
+/**
+ * Mark iSCSI SCSI operation as complete
+ *
+ * @v iscsi iSCSI session
+ * @v rc Return status code
+ * @v rsp SCSI response, if any
+ *
+ * Note that iscsi_scsi_done() will not close the connection, and must
+ * therefore be called only when the internal state machines are in an
+ * appropriate state, otherwise bad things may happen on the next call
+ * to iscsi_scsi_command(). The general rule is to call
+ * iscsi_scsi_done() only at the end of receiving a PDU; at this point
+ * the TX and RX engines should both be idle.
+ */
+static void iscsi_scsi_done ( struct iscsi_session *iscsi, int rc,
+ struct scsi_rsp *rsp ) {
+ uint32_t itt = iscsi->itt;
+
+ assert ( iscsi->tx_state == ISCSI_TX_IDLE );
+
+ /* Clear command */
+ free ( iscsi->command );
+ iscsi->command = NULL;
+
+ /* Send SCSI response, if any */
+ if ( rsp )
+ scsi_response ( &iscsi->data, rsp );
+
+ /* Close SCSI command, if this is still the same command. (It
+ * is possible that the command interface has already been
+ * closed as a result of the SCSI response we sent.)
+ */
+ if ( iscsi->itt == itt )
+ intf_restart ( &iscsi->data, rc );
+}
+
+/****************************************************************************
+ *
+ * iSCSI SCSI command issuing
+ *
+ */
+
+/**
+ * Build iSCSI SCSI command BHS
+ *
+ * @v iscsi iSCSI session
+ *
+ * We don't currently support bidirectional commands (i.e. with both
+ * Data-In and Data-Out segments); these would require providing code
+ * to generate an AHS, and there doesn't seem to be any need for it at
+ * the moment.
+ */
+static void iscsi_start_command ( struct iscsi_session *iscsi ) {
+ struct iscsi_bhs_scsi_command *command = &iscsi->tx_bhs.scsi_command;
+
+ assert ( ! ( iscsi->command->data_in && iscsi->command->data_out ) );
+
+ /* Construct BHS and initiate transmission */
+ iscsi_start_tx ( iscsi );
+ command->opcode = ISCSI_OPCODE_SCSI_COMMAND;
+ command->flags = ( ISCSI_FLAG_FINAL |
+ ISCSI_COMMAND_ATTR_SIMPLE );
+ if ( iscsi->command->data_in )
+ command->flags |= ISCSI_COMMAND_FLAG_READ;
+ if ( iscsi->command->data_out )
+ command->flags |= ISCSI_COMMAND_FLAG_WRITE;
+ /* lengths left as zero */
+ memcpy ( &command->lun, &iscsi->command->lun,
+ sizeof ( command->lun ) );
+ command->itt = htonl ( iscsi->itt );
+ command->exp_len = htonl ( iscsi->command->data_in_len |
+ iscsi->command->data_out_len );
+ command->cmdsn = htonl ( iscsi->cmdsn );
+ command->expstatsn = htonl ( iscsi->statsn + 1 );
+ memcpy ( &command->cdb, &iscsi->command->cdb, sizeof ( command->cdb ));
+ DBGC2 ( iscsi, "iSCSI %p start " SCSI_CDB_FORMAT " %s %#zx\n",
+ iscsi, SCSI_CDB_DATA ( command->cdb ),
+ ( iscsi->command->data_in ? "in" : "out" ),
+ ( iscsi->command->data_in ?
+ iscsi->command->data_in_len :
+ iscsi->command->data_out_len ) );
+}
+
+/**
+ * Receive data segment of an iSCSI SCSI response PDU
+ *
+ * @v iscsi iSCSI session
+ * @v data Received data
+ * @v len Length of received data
+ * @v remaining Data remaining after this data
+ * @ret rc Return status code
+ */
+static int iscsi_rx_scsi_response ( struct iscsi_session *iscsi,
+ const void *data, size_t len,
+ size_t remaining ) {
+ struct iscsi_bhs_scsi_response *response
+ = &iscsi->rx_bhs.scsi_response;
+ struct scsi_rsp rsp;
+ uint32_t residual_count;
+ size_t data_len;
+ int rc;
+
+ /* Buffer up the PDU data */
+ if ( ( rc = iscsi_rx_buffered_data ( iscsi, data, len ) ) != 0 ) {
+ DBGC ( iscsi, "iSCSI %p could not buffer SCSI response: %s\n",
+ iscsi, strerror ( rc ) );
+ return rc;
+ }
+ if ( remaining )
+ return 0;
+
+ /* Parse SCSI response and discard buffer */
+ memset ( &rsp, 0, sizeof ( rsp ) );
+ rsp.status = response->status;
+ residual_count = ntohl ( response->residual_count );
+ if ( response->flags & ISCSI_DATA_FLAG_OVERFLOW ) {
+ rsp.overrun = residual_count;
+ } else if ( response->flags & ISCSI_DATA_FLAG_UNDERFLOW ) {
+ rsp.overrun = -(residual_count);
+ }
+ data_len = ISCSI_DATA_LEN ( response->lengths );
+ if ( data_len ) {
+ scsi_parse_sense ( ( iscsi->rx_buffer + 2 ), ( data_len - 2 ),
+ &rsp.sense );
+ }
+ iscsi_rx_buffered_data_done ( iscsi );
+
+ /* Check for errors */
+ if ( response->response != ISCSI_RESPONSE_COMMAND_COMPLETE )
+ return -EIO;
+
+ /* Mark as completed */
+ iscsi_scsi_done ( iscsi, 0, &rsp );
+ return 0;
+}
+
+/**
+ * Receive data segment of an iSCSI data-in PDU
+ *
+ * @v iscsi iSCSI session
+ * @v data Received data
+ * @v len Length of received data
+ * @v remaining Data remaining after this data
+ * @ret rc Return status code
+ */
+static int iscsi_rx_data_in ( struct iscsi_session *iscsi,
+ const void *data, size_t len,
+ size_t remaining ) {
+ struct iscsi_bhs_data_in *data_in = &iscsi->rx_bhs.data_in;
+ unsigned long offset;
+
+ /* Copy data to data-in buffer */
+ offset = ntohl ( data_in->offset ) + iscsi->rx_offset;
+ assert ( iscsi->command != NULL );
+ assert ( iscsi->command->data_in );
+ assert ( ( offset + len ) <= iscsi->command->data_in_len );
+ copy_to_user ( iscsi->command->data_in, offset, data, len );
+
+ /* Wait for whole SCSI response to arrive */
+ if ( remaining )
+ return 0;
+
+ /* Mark as completed if status is present */
+ if ( data_in->flags & ISCSI_DATA_FLAG_STATUS ) {
+ assert ( ( offset + len ) == iscsi->command->data_in_len );
+ assert ( data_in->flags & ISCSI_FLAG_FINAL );
+ /* iSCSI cannot return an error status via a data-in */
+ iscsi_scsi_done ( iscsi, 0, NULL );
+ }
+
+ return 0;
+}
+
+/**
+ * Receive data segment of an iSCSI R2T PDU
+ *
+ * @v iscsi iSCSI session
+ * @v data Received data
+ * @v len Length of received data
+ * @v remaining Data remaining after this data
+ * @ret rc Return status code
+ */
+static int iscsi_rx_r2t ( struct iscsi_session *iscsi,
+ const void *data __unused, size_t len __unused,
+ size_t remaining __unused ) {
+ struct iscsi_bhs_r2t *r2t = &iscsi->rx_bhs.r2t;
+
+ /* Record transfer parameters and trigger first data-out */
+ iscsi->ttt = ntohl ( r2t->ttt );
+ iscsi->transfer_offset = ntohl ( r2t->offset );
+ iscsi->transfer_len = ntohl ( r2t->len );
+ iscsi_start_data_out ( iscsi, 0 );
+
+ return 0;
+}
+
+/**
+ * Build iSCSI data-out BHS
+ *
+ * @v iscsi iSCSI session
+ * @v datasn Data sequence number within the transfer
+ *
+ */
+static void iscsi_start_data_out ( struct iscsi_session *iscsi,
+ unsigned int datasn ) {
+ struct iscsi_bhs_data_out *data_out = &iscsi->tx_bhs.data_out;
+ unsigned long offset;
+ unsigned long remaining;
+ unsigned long len;
+
+ /* We always send 512-byte Data-Out PDUs; this removes the
+ * need to worry about the target's MaxRecvDataSegmentLength.
+ */
+ offset = datasn * 512;
+ remaining = iscsi->transfer_len - offset;
+ len = remaining;
+ if ( len > 512 )
+ len = 512;
+
+ /* Construct BHS and initiate transmission */
+ iscsi_start_tx ( iscsi );
+ data_out->opcode = ISCSI_OPCODE_DATA_OUT;
+ if ( len == remaining )
+ data_out->flags = ( ISCSI_FLAG_FINAL );
+ ISCSI_SET_LENGTHS ( data_out->lengths, 0, len );
+ data_out->lun = iscsi->command->lun;
+ data_out->itt = htonl ( iscsi->itt );
+ data_out->ttt = htonl ( iscsi->ttt );
+ data_out->expstatsn = htonl ( iscsi->statsn + 1 );
+ data_out->datasn = htonl ( datasn );
+ data_out->offset = htonl ( iscsi->transfer_offset + offset );
+ DBGC ( iscsi, "iSCSI %p start data out DataSN %#x len %#lx\n",
+ iscsi, datasn, len );
+}
+
+/**
+ * Complete iSCSI data-out PDU transmission
+ *
+ * @v iscsi iSCSI session
+ *
+ */
+static void iscsi_data_out_done ( struct iscsi_session *iscsi ) {
+ struct iscsi_bhs_data_out *data_out = &iscsi->tx_bhs.data_out;
+
+ /* If we haven't reached the end of the sequence, start
+ * sending the next data-out PDU.
+ */
+ if ( ! ( data_out->flags & ISCSI_FLAG_FINAL ) )
+ iscsi_start_data_out ( iscsi, ntohl ( data_out->datasn ) + 1 );
+}
+
+/**
+ * Send iSCSI data-out data segment
+ *
+ * @v iscsi iSCSI session
+ * @ret rc Return status code
+ */
+static int iscsi_tx_data_out ( struct iscsi_session *iscsi ) {
+ struct iscsi_bhs_data_out *data_out = &iscsi->tx_bhs.data_out;
+ struct io_buffer *iobuf;
+ unsigned long offset;
+ size_t len;
+ size_t pad_len;
+
+ offset = ntohl ( data_out->offset );
+ len = ISCSI_DATA_LEN ( data_out->lengths );
+ pad_len = ISCSI_DATA_PAD_LEN ( data_out->lengths );
+
+ assert ( iscsi->command != NULL );
+ assert ( iscsi->command->data_out );
+ assert ( ( offset + len ) <= iscsi->command->data_out_len );
+
+ iobuf = xfer_alloc_iob ( &iscsi->socket, ( len + pad_len ) );
+ if ( ! iobuf )
+ return -ENOMEM;
+
+ copy_from_user ( iob_put ( iobuf, len ),
+ iscsi->command->data_out, offset, len );
+ memset ( iob_put ( iobuf, pad_len ), 0, pad_len );
+
+ return xfer_deliver_iob ( &iscsi->socket, iobuf );
+}
+
+/**
+ * Receive data segment of an iSCSI NOP-In
+ *
+ * @v iscsi iSCSI session
+ * @v data Received data
+ * @v len Length of received data
+ * @v remaining Data remaining after this data
+ * @ret rc Return status code
+ */
+static int iscsi_rx_nop_in ( struct iscsi_session *iscsi,
+ const void *data __unused, size_t len __unused,
+ size_t remaining __unused ) {
+ struct iscsi_nop_in *nop_in = &iscsi->rx_bhs.nop_in;
+
+ DBGC2 ( iscsi, "iSCSI %p received NOP-In\n", iscsi );
+
+ /* We don't currently have the ability to respond to NOP-Ins
+ * sent as ping requests, but we can happily accept NOP-Ins
+ * sent merely to update CmdSN.
+ */
+ if ( nop_in->ttt != htonl ( ISCSI_TAG_RESERVED ) ) {
+ DBGC ( iscsi, "iSCSI %p received unsupported NOP-In with TTT "
+ "%08x\n", iscsi, ntohl ( nop_in->ttt ) );
+ return -ENOTSUP_NOP_IN;
+ }
+
+ return 0;
+}
+
+/****************************************************************************
+ *
+ * iSCSI login
+ *
+ */
+
+/**
+ * Build iSCSI login request strings
+ *
+ * @v iscsi iSCSI session
+ *
+ * These are the initial set of strings sent in the first login
+ * request PDU. We want the following settings:
+ *
+ * HeaderDigest=None
+ * DataDigest=None
+ * MaxConnections is irrelevant; we make only one connection anyway [4]
+ * InitialR2T=Yes [1]
+ * ImmediateData is irrelevant; we never send immediate data [4]
+ * MaxRecvDataSegmentLength=8192 (default; we don't care) [3]
+ * MaxBurstLength=262144 (default; we don't care) [3]
+ * FirstBurstLength=262144 (default; we don't care)
+ * DefaultTime2Wait=0 [2]
+ * DefaultTime2Retain=0 [2]
+ * MaxOutstandingR2T=1
+ * DataPDUInOrder=Yes
+ * DataSequenceInOrder=Yes
+ * ErrorRecoveryLevel=0
+ *
+ * [1] InitialR2T has an OR resolution function, so the target may
+ * force us to use it. We therefore simplify our logic by always
+ * using it.
+ *
+ * [2] These ensure that we can safely start a new task once we have
+ * reconnected after a failure, without having to manually tidy up
+ * after the old one.
+ *
+ * [3] We are quite happy to use the RFC-defined default values for
+ * these parameters, but some targets (notably OpenSolaris)
+ * incorrectly assume a default value of zero, so we explicitly
+ * specify the default values.
+ *
+ * [4] We are quite happy to use the RFC-defined default values for
+ * these parameters, but some targets (notably a QNAP TS-639Pro) fail
+ * unless they are supplied, so we explicitly specify the default
+ * values.
+ */
+static int iscsi_build_login_request_strings ( struct iscsi_session *iscsi,
+ void *data, size_t len ) {
+ unsigned int used = 0;
+ const char *auth_method;
+
+ if ( iscsi->status & ISCSI_STATUS_STRINGS_SECURITY ) {
+ /* Default to allowing no authentication */
+ auth_method = "None";
+ /* If we have a credential to supply, permit CHAP */
+ if ( iscsi->initiator_username )
+ auth_method = "CHAP,None";
+ /* If we have a credential to check, force CHAP */
+ if ( iscsi->target_username )
+ auth_method = "CHAP";
+ used += ssnprintf ( data + used, len - used,
+ "InitiatorName=%s%c"
+ "TargetName=%s%c"
+ "SessionType=Normal%c"
+ "AuthMethod=%s%c",
+ iscsi->initiator_iqn, 0,
+ iscsi->target_iqn, 0, 0,
+ auth_method, 0 );
+ }
+
+ if ( iscsi->status & ISCSI_STATUS_STRINGS_CHAP_ALGORITHM ) {
+ used += ssnprintf ( data + used, len - used, "CHAP_A=5%c", 0 );
+ }
+
+ if ( ( iscsi->status & ISCSI_STATUS_STRINGS_CHAP_RESPONSE ) ) {
+ char buf[ base16_encoded_len ( iscsi->chap.response_len ) + 1 ];
+ assert ( iscsi->initiator_username != NULL );
+ base16_encode ( iscsi->chap.response, iscsi->chap.response_len,
+ buf );
+ used += ssnprintf ( data + used, len - used,
+ "CHAP_N=%s%cCHAP_R=0x%s%c",
+ iscsi->initiator_username, 0, buf, 0 );
+ }
+
+ if ( ( iscsi->status & ISCSI_STATUS_STRINGS_CHAP_CHALLENGE ) ) {
+ size_t challenge_len = ( sizeof ( iscsi->chap_challenge ) - 1 );
+ char buf[ base16_encoded_len ( challenge_len ) + 1 ];
+ base16_encode ( ( iscsi->chap_challenge + 1 ), challenge_len,
+ buf );
+ used += ssnprintf ( data + used, len - used,
+ "CHAP_I=%d%cCHAP_C=0x%s%c",
+ iscsi->chap_challenge[0], 0, buf, 0 );
+ }
+
+ if ( iscsi->status & ISCSI_STATUS_STRINGS_OPERATIONAL ) {
+ used += ssnprintf ( data + used, len - used,
+ "HeaderDigest=None%c"
+ "DataDigest=None%c"
+ "MaxConnections=1%c"
+ "InitialR2T=Yes%c"
+ "ImmediateData=No%c"
+ "MaxRecvDataSegmentLength=8192%c"
+ "MaxBurstLength=262144%c"
+ "DefaultTime2Wait=0%c"
+ "DefaultTime2Retain=0%c"
+ "MaxOutstandingR2T=1%c"
+ "DataPDUInOrder=Yes%c"
+ "DataSequenceInOrder=Yes%c"
+ "ErrorRecoveryLevel=0%c",
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 );
+ }
+
+ return used;
+}
+
+/**
+ * Build iSCSI login request BHS
+ *
+ * @v iscsi iSCSI session
+ */
+static void iscsi_start_login ( struct iscsi_session *iscsi ) {
+ struct iscsi_bhs_login_request *request = &iscsi->tx_bhs.login_request;
+ int len;
+
+ switch ( iscsi->status & ISCSI_LOGIN_CSG_MASK ) {
+ case ISCSI_LOGIN_CSG_SECURITY_NEGOTIATION:
+ DBGC ( iscsi, "iSCSI %p entering security negotiation\n",
+ iscsi );
+ break;
+ case ISCSI_LOGIN_CSG_OPERATIONAL_NEGOTIATION:
+ DBGC ( iscsi, "iSCSI %p entering operational negotiation\n",
+ iscsi );
+ break;
+ default:
+ assert ( 0 );
+ }
+
+ /* Construct BHS and initiate transmission */
+ iscsi_start_tx ( iscsi );
+ request->opcode = ( ISCSI_OPCODE_LOGIN_REQUEST |
+ ISCSI_FLAG_IMMEDIATE );
+ request->flags = ( ( iscsi->status & ISCSI_STATUS_PHASE_MASK ) |
+ ISCSI_LOGIN_FLAG_TRANSITION );
+ /* version_max and version_min left as zero */
+ len = iscsi_build_login_request_strings ( iscsi, NULL, 0 );
+ ISCSI_SET_LENGTHS ( request->lengths, 0, len );
+ request->isid_iana_en = htonl ( ISCSI_ISID_IANA |
+ IANA_EN_FEN_SYSTEMS );
+ request->isid_iana_qual = htons ( iscsi->isid_iana_qual );
+ /* tsih left as zero */
+ request->itt = htonl ( iscsi->itt );
+ /* cid left as zero */
+ request->cmdsn = htonl ( iscsi->cmdsn );
+ request->expstatsn = htonl ( iscsi->statsn + 1 );
+}
+
+/**
+ * Complete iSCSI login request PDU transmission
+ *
+ * @v iscsi iSCSI session
+ *
+ */
+static void iscsi_login_request_done ( struct iscsi_session *iscsi ) {
+
+ /* Clear any "strings to send" flags */
+ iscsi->status &= ~ISCSI_STATUS_STRINGS_MASK;
+
+ /* Free any dynamically allocated storage used for login */
+ chap_finish ( &iscsi->chap );
+}
+
+/**
+ * Transmit data segment of an iSCSI login request PDU
+ *
+ * @v iscsi iSCSI session
+ * @ret rc Return status code
+ *
+ * For login requests, the data segment consists of the login strings.
+ */
+static int iscsi_tx_login_request ( struct iscsi_session *iscsi ) {
+ struct iscsi_bhs_login_request *request = &iscsi->tx_bhs.login_request;
+ struct io_buffer *iobuf;
+ size_t len;
+ size_t pad_len;
+
+ len = ISCSI_DATA_LEN ( request->lengths );
+ pad_len = ISCSI_DATA_PAD_LEN ( request->lengths );
+ iobuf = xfer_alloc_iob ( &iscsi->socket, ( len + pad_len ) );
+ if ( ! iobuf )
+ return -ENOMEM;
+ iob_put ( iobuf, len );
+ iscsi_build_login_request_strings ( iscsi, iobuf->data, len );
+ memset ( iob_put ( iobuf, pad_len ), 0, pad_len );
+
+ return xfer_deliver_iob ( &iscsi->socket, iobuf );
+}
+
+/**
+ * Calculate maximum length of decoded large binary value
+ *
+ * @v encoded Encoded large binary value
+ * @v max_raw_len Maximum length of raw data
+ */
+static inline size_t
+iscsi_large_binary_decoded_max_len ( const char *encoded ) {
+ return ( strlen ( encoded ) ); /* Decoding never expands data */
+}
+
+/**
+ * Decode large binary value
+ *
+ * @v encoded Encoded large binary value
+ * @v raw Raw data
+ * @ret len Length of raw data, or negative error
+ */
+static int iscsi_large_binary_decode ( const char *encoded, uint8_t *raw ) {
+
+ if ( encoded[0] != '0' )
+ return -EPROTO_INVALID_LARGE_BINARY;
+
+ switch ( encoded[1] ) {
+ case 'x' :
+ case 'X' :
+ return base16_decode ( ( encoded + 2 ), raw );
+ case 'b' :
+ case 'B' :
+ return base64_decode ( ( encoded + 2 ), raw );
+ default:
+ return -EPROTO_INVALID_LARGE_BINARY;
+ }
+}
+
+/**
+ * Handle iSCSI TargetAddress text value
+ *
+ * @v iscsi iSCSI session
+ * @v value TargetAddress value
+ * @ret rc Return status code
+ */
+static int iscsi_handle_targetaddress_value ( struct iscsi_session *iscsi,
+ const char *value ) {
+ char *separator;
+
+ DBGC ( iscsi, "iSCSI %p will redirect to %s\n", iscsi, value );
+
+ /* Replace target address */
+ free ( iscsi->target_address );
+ iscsi->target_address = strdup ( value );
+ if ( ! iscsi->target_address )
+ return -ENOMEM;
+
+ /* Replace target port */
+ iscsi->target_port = htons ( ISCSI_PORT );
+ separator = strchr ( iscsi->target_address, ':' );
+ if ( separator ) {
+ *separator = '\0';
+ iscsi->target_port = strtoul ( ( separator + 1 ), NULL, 0 );
+ }
+
+ return 0;
+}
+
+/**
+ * Handle iSCSI AuthMethod text value
+ *
+ * @v iscsi iSCSI session
+ * @v value AuthMethod value
+ * @ret rc Return status code
+ */
+static int iscsi_handle_authmethod_value ( struct iscsi_session *iscsi,
+ const char *value ) {
+
+ /* If server requests CHAP, send the CHAP_A string */
+ if ( strcmp ( value, "CHAP" ) == 0 ) {
+ DBGC ( iscsi, "iSCSI %p initiating CHAP authentication\n",
+ iscsi );
+ iscsi->status |= ( ISCSI_STATUS_STRINGS_CHAP_ALGORITHM |
+ ISCSI_STATUS_AUTH_FORWARD_REQUIRED );
+ }
+
+ return 0;
+}
+
+/**
+ * Handle iSCSI CHAP_A text value
+ *
+ * @v iscsi iSCSI session
+ * @v value CHAP_A value
+ * @ret rc Return status code
+ */
+static int iscsi_handle_chap_a_value ( struct iscsi_session *iscsi,
+ const char *value ) {
+
+ /* We only ever offer "5" (i.e. MD5) as an algorithm, so if
+ * the server responds with anything else it is a protocol
+ * violation.
+ */
+ if ( strcmp ( value, "5" ) != 0 ) {
+ DBGC ( iscsi, "iSCSI %p got invalid CHAP algorithm \"%s\"\n",
+ iscsi, value );
+ return -EPROTO_INVALID_CHAP_ALGORITHM;
+ }
+
+ return 0;
+}
+
+/**
+ * Handle iSCSI CHAP_I text value
+ *
+ * @v iscsi iSCSI session
+ * @v value CHAP_I value
+ * @ret rc Return status code
+ */
+static int iscsi_handle_chap_i_value ( struct iscsi_session *iscsi,
+ const char *value ) {
+ unsigned int identifier;
+ char *endp;
+ int rc;
+
+ /* The CHAP identifier is an integer value */
+ identifier = strtoul ( value, &endp, 0 );
+ if ( *endp != '\0' ) {
+ DBGC ( iscsi, "iSCSI %p saw invalid CHAP identifier \"%s\"\n",
+ iscsi, value );
+ return -EPROTO_INVALID_CHAP_IDENTIFIER;
+ }
+
+ /* Prepare for CHAP with MD5 */
+ chap_finish ( &iscsi->chap );
+ if ( ( rc = chap_init ( &iscsi->chap, &md5_algorithm ) ) != 0 ) {
+ DBGC ( iscsi, "iSCSI %p could not initialise CHAP: %s\n",
+ iscsi, strerror ( rc ) );
+ return rc;
+ }
+
+ /* Identifier and secret are the first two components of the
+ * challenge.
+ */
+ chap_set_identifier ( &iscsi->chap, identifier );
+ if ( iscsi->initiator_password ) {
+ chap_update ( &iscsi->chap, iscsi->initiator_password,
+ strlen ( iscsi->initiator_password ) );
+ }
+
+ return 0;
+}
+
+/**
+ * Handle iSCSI CHAP_C text value
+ *
+ * @v iscsi iSCSI session
+ * @v value CHAP_C value
+ * @ret rc Return status code
+ */
+static int iscsi_handle_chap_c_value ( struct iscsi_session *iscsi,
+ const char *value ) {
+ uint8_t buf[ iscsi_large_binary_decoded_max_len ( value ) ];
+ unsigned int i;
+ size_t len;
+ int rc;
+
+ /* Process challenge */
+ rc = iscsi_large_binary_decode ( value, buf );
+ if ( rc < 0 ) {
+ DBGC ( iscsi, "iSCSI %p invalid CHAP challenge \"%s\": %s\n",
+ iscsi, value, strerror ( rc ) );
+ return rc;
+ }
+ len = rc;
+ chap_update ( &iscsi->chap, buf, len );
+
+ /* Build CHAP response */
+ DBGC ( iscsi, "iSCSI %p sending CHAP response\n", iscsi );
+ chap_respond ( &iscsi->chap );
+ iscsi->status |= ISCSI_STATUS_STRINGS_CHAP_RESPONSE;
+
+ /* Send CHAP challenge, if applicable */
+ if ( iscsi->target_username ) {
+ iscsi->status |= ISCSI_STATUS_STRINGS_CHAP_CHALLENGE;
+ /* Generate CHAP challenge data */
+ for ( i = 0 ; i < sizeof ( iscsi->chap_challenge ) ; i++ ) {
+ iscsi->chap_challenge[i] = random();
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * Handle iSCSI CHAP_N text value
+ *
+ * @v iscsi iSCSI session
+ * @v value CHAP_N value
+ * @ret rc Return status code
+ */
+static int iscsi_handle_chap_n_value ( struct iscsi_session *iscsi,
+ const char *value ) {
+
+ /* The target username isn't actually involved at any point in
+ * the authentication process; it merely serves to identify
+ * which password the target is using to generate the CHAP
+ * response. We unnecessarily verify that the username is as
+ * expected, in order to provide mildly helpful diagnostics if
+ * the target is supplying the wrong username/password
+ * combination.
+ */
+ if ( iscsi->target_username &&
+ ( strcmp ( iscsi->target_username, value ) != 0 ) ) {
+ DBGC ( iscsi, "iSCSI %p target username \"%s\" incorrect "
+ "(wanted \"%s\")\n",
+ iscsi, value, iscsi->target_username );
+ return -EACCES_INCORRECT_TARGET_USERNAME;
+ }
+
+ return 0;
+}
+
+/**
+ * Handle iSCSI CHAP_R text value
+ *
+ * @v iscsi iSCSI session
+ * @v value CHAP_R value
+ * @ret rc Return status code
+ */
+static int iscsi_handle_chap_r_value ( struct iscsi_session *iscsi,
+ const char *value ) {
+ uint8_t buf[ iscsi_large_binary_decoded_max_len ( value ) ];
+ size_t len;
+ int rc;
+
+ /* Generate CHAP response for verification */
+ chap_finish ( &iscsi->chap );
+ if ( ( rc = chap_init ( &iscsi->chap, &md5_algorithm ) ) != 0 ) {
+ DBGC ( iscsi, "iSCSI %p could not initialise CHAP: %s\n",
+ iscsi, strerror ( rc ) );
+ return rc;
+ }
+ chap_set_identifier ( &iscsi->chap, iscsi->chap_challenge[0] );
+ if ( iscsi->target_password ) {
+ chap_update ( &iscsi->chap, iscsi->target_password,
+ strlen ( iscsi->target_password ) );
+ }
+ chap_update ( &iscsi->chap, &iscsi->chap_challenge[1],
+ ( sizeof ( iscsi->chap_challenge ) - 1 ) );
+ chap_respond ( &iscsi->chap );
+
+ /* Process response */
+ rc = iscsi_large_binary_decode ( value, buf );
+ if ( rc < 0 ) {
+ DBGC ( iscsi, "iSCSI %p invalid CHAP response \"%s\": %s\n",
+ iscsi, value, strerror ( rc ) );
+ return rc;
+ }
+ len = rc;
+
+ /* Check CHAP response */
+ if ( len != iscsi->chap.response_len ) {
+ DBGC ( iscsi, "iSCSI %p invalid CHAP response length\n",
+ iscsi );
+ return -EPROTO_INVALID_CHAP_RESPONSE;
+ }
+ if ( memcmp ( buf, iscsi->chap.response, len ) != 0 ) {
+ DBGC ( iscsi, "iSCSI %p incorrect CHAP response \"%s\"\n",
+ iscsi, value );
+ return -EACCES_INCORRECT_TARGET_PASSWORD;
+ }
+
+ /* Mark session as authenticated */
+ iscsi->status |= ISCSI_STATUS_AUTH_REVERSE_OK;
+
+ return 0;
+}
+
+/** An iSCSI text string that we want to handle */
+struct iscsi_string_type {
+ /** String key
+ *
+ * This is the portion preceding the "=" sign,
+ * e.g. "InitiatorName", "CHAP_A", etc.
+ */
+ const char *key;
+ /** Handle iSCSI string value
+ *
+ * @v iscsi iSCSI session
+ * @v value iSCSI string value
+ * @ret rc Return status code
+ */
+ int ( * handle ) ( struct iscsi_session *iscsi, const char *value );
+};
+
+/** iSCSI text strings that we want to handle */
+static struct iscsi_string_type iscsi_string_types[] = {
+ { "TargetAddress", iscsi_handle_targetaddress_value },
+ { "AuthMethod", iscsi_handle_authmethod_value },
+ { "CHAP_A", iscsi_handle_chap_a_value },
+ { "CHAP_I", iscsi_handle_chap_i_value },
+ { "CHAP_C", iscsi_handle_chap_c_value },
+ { "CHAP_N", iscsi_handle_chap_n_value },
+ { "CHAP_R", iscsi_handle_chap_r_value },
+ { NULL, NULL }
+};
+
+/**
+ * Handle iSCSI string
+ *
+ * @v iscsi iSCSI session
+ * @v string iSCSI string (in "key=value" format)
+ * @ret rc Return status code
+ */
+static int iscsi_handle_string ( struct iscsi_session *iscsi,
+ const char *string ) {
+ struct iscsi_string_type *type;
+ const char *separator;
+ const char *value;
+ size_t key_len;
+ int rc;
+
+ /* Find separator */
+ separator = strchr ( string, '=' );
+ if ( ! separator ) {
+ DBGC ( iscsi, "iSCSI %p malformed string %s\n",
+ iscsi, string );
+ return -EPROTO_INVALID_KEY_VALUE_PAIR;
+ }
+ key_len = ( separator - string );
+ value = ( separator + 1 );
+
+ /* Check for rejections. Since we send only non-rejectable
+ * values, any rejection is a fatal protocol error.
+ */
+ if ( strcmp ( value, "Reject" ) == 0 ) {
+ DBGC ( iscsi, "iSCSI %p rejection: %s\n", iscsi, string );
+ return -EPROTO_VALUE_REJECTED;
+ }
+
+ /* Handle key/value pair */
+ for ( type = iscsi_string_types ; type->key ; type++ ) {
+ if ( strncmp ( string, type->key, key_len ) != 0 )
+ continue;
+ DBGC ( iscsi, "iSCSI %p handling %s\n", iscsi, string );
+ if ( ( rc = type->handle ( iscsi, value ) ) != 0 ) {
+ DBGC ( iscsi, "iSCSI %p could not handle %s: %s\n",
+ iscsi, string, strerror ( rc ) );
+ return rc;
+ }
+ return 0;
+ }
+ DBGC ( iscsi, "iSCSI %p ignoring %s\n", iscsi, string );
+ return 0;
+}
+
+/**
+ * Handle iSCSI strings
+ *
+ * @v iscsi iSCSI session
+ * @v string iSCSI string buffer
+ * @v len Length of string buffer
+ * @ret rc Return status code
+ */
+static int iscsi_handle_strings ( struct iscsi_session *iscsi,
+ const char *strings, size_t len ) {
+ size_t string_len;
+ int rc;
+
+ /* Handle each string in turn, taking care not to overrun the
+ * data buffer in case of badly-terminated data.
+ */
+ while ( 1 ) {
+ string_len = ( strnlen ( strings, len ) + 1 );
+ if ( string_len > len )
+ break;
+ if ( ( rc = iscsi_handle_string ( iscsi, strings ) ) != 0 )
+ return rc;
+ strings += string_len;
+ len -= string_len;
+ }
+ return 0;
+}
+
+/**
+ * Convert iSCSI response status to return status code
+ *
+ * @v status_class iSCSI status class
+ * @v status_detail iSCSI status detail
+ * @ret rc Return status code
+ */
+static int iscsi_status_to_rc ( unsigned int status_class,
+ unsigned int status_detail ) {
+ switch ( status_class ) {
+ case ISCSI_STATUS_INITIATOR_ERROR :
+ switch ( status_detail ) {
+ case ISCSI_STATUS_INITIATOR_ERROR_AUTHENTICATION :
+ return -EPERM_INITIATOR_AUTHENTICATION;
+ case ISCSI_STATUS_INITIATOR_ERROR_AUTHORISATION :
+ return -EPERM_INITIATOR_AUTHORISATION;
+ case ISCSI_STATUS_INITIATOR_ERROR_NOT_FOUND :
+ case ISCSI_STATUS_INITIATOR_ERROR_REMOVED :
+ return -ENODEV;
+ default :
+ return -ENOTSUP_INITIATOR_STATUS;
+ }
+ case ISCSI_STATUS_TARGET_ERROR :
+ switch ( status_detail ) {
+ case ISCSI_STATUS_TARGET_ERROR_UNAVAILABLE:
+ return -EIO_TARGET_UNAVAILABLE;
+ case ISCSI_STATUS_TARGET_ERROR_NO_RESOURCES:
+ return -EIO_TARGET_NO_RESOURCES;
+ default:
+ return -ENOTSUP_TARGET_STATUS;
+ }
+ default :
+ return -EINVAL;
+ }
+}
+
+/**
+ * Receive data segment of an iSCSI login response PDU
+ *
+ * @v iscsi iSCSI session
+ * @v data Received data
+ * @v len Length of received data
+ * @v remaining Data remaining after this data
+ * @ret rc Return status code
+ */
+static int iscsi_rx_login_response ( struct iscsi_session *iscsi,
+ const void *data, size_t len,
+ size_t remaining ) {
+ struct iscsi_bhs_login_response *response
+ = &iscsi->rx_bhs.login_response;
+ int rc;
+
+ /* Buffer up the PDU data */
+ if ( ( rc = iscsi_rx_buffered_data ( iscsi, data, len ) ) != 0 ) {
+ DBGC ( iscsi, "iSCSI %p could not buffer login response: %s\n",
+ iscsi, strerror ( rc ) );
+ return rc;
+ }
+ if ( remaining )
+ return 0;
+
+ /* Process string data and discard string buffer */
+ if ( ( rc = iscsi_handle_strings ( iscsi, iscsi->rx_buffer,
+ iscsi->rx_len ) ) != 0 )
+ return rc;
+ iscsi_rx_buffered_data_done ( iscsi );
+
+ /* Check for login redirection */
+ if ( response->status_class == ISCSI_STATUS_REDIRECT ) {
+ DBGC ( iscsi, "iSCSI %p redirecting to new server\n", iscsi );
+ iscsi_close_connection ( iscsi, 0 );
+ if ( ( rc = iscsi_open_connection ( iscsi ) ) != 0 ) {
+ DBGC ( iscsi, "iSCSI %p could not redirect: %s\n ",
+ iscsi, strerror ( rc ) );
+ return rc;
+ }
+ return 0;
+ }
+
+ /* Check for fatal errors */
+ if ( response->status_class != 0 ) {
+ DBGC ( iscsi, "iSCSI login failure: class %02x detail %02x\n",
+ response->status_class, response->status_detail );
+ rc = iscsi_status_to_rc ( response->status_class,
+ response->status_detail );
+ return rc;
+ }
+
+ /* Handle login transitions */
+ if ( response->flags & ISCSI_LOGIN_FLAG_TRANSITION ) {
+ iscsi->status &= ~( ISCSI_STATUS_PHASE_MASK |
+ ISCSI_STATUS_STRINGS_MASK );
+ switch ( response->flags & ISCSI_LOGIN_NSG_MASK ) {
+ case ISCSI_LOGIN_NSG_OPERATIONAL_NEGOTIATION:
+ iscsi->status |=
+ ( ISCSI_STATUS_OPERATIONAL_NEGOTIATION_PHASE |
+ ISCSI_STATUS_STRINGS_OPERATIONAL );
+ break;
+ case ISCSI_LOGIN_NSG_FULL_FEATURE_PHASE:
+ iscsi->status |= ISCSI_STATUS_FULL_FEATURE_PHASE;
+ break;
+ default:
+ DBGC ( iscsi, "iSCSI %p got invalid response flags "
+ "%02x\n", iscsi, response->flags );
+ return -EIO;
+ }
+ }
+
+ /* Send next login request PDU if we haven't reached the full
+ * feature phase yet.
+ */
+ if ( ( iscsi->status & ISCSI_STATUS_PHASE_MASK ) !=
+ ISCSI_STATUS_FULL_FEATURE_PHASE ) {
+ iscsi_start_login ( iscsi );
+ return 0;
+ }
+
+ /* Check that target authentication was successful (if required) */
+ if ( ( iscsi->status & ISCSI_STATUS_AUTH_REVERSE_REQUIRED ) &&
+ ! ( iscsi->status & ISCSI_STATUS_AUTH_REVERSE_OK ) ) {
+ DBGC ( iscsi, "iSCSI %p nefarious target tried to bypass "
+ "authentication\n", iscsi );
+ return -EPROTO;
+ }
+
+ /* Notify SCSI layer of window change */
+ DBGC ( iscsi, "iSCSI %p entering full feature phase\n", iscsi );
+ xfer_window_changed ( &iscsi->control );
+
+ return 0;
+}
+
+/****************************************************************************
+ *
+ * iSCSI to socket interface
+ *
+ */
+
+/**
+ * Pause TX engine
+ *
+ * @v iscsi iSCSI session
+ */
+static void iscsi_tx_pause ( struct iscsi_session *iscsi ) {
+ process_del ( &iscsi->process );
+}
+
+/**
+ * Resume TX engine
+ *
+ * @v iscsi iSCSI session
+ */
+static void iscsi_tx_resume ( struct iscsi_session *iscsi ) {
+ process_add ( &iscsi->process );
+}
+
+/**
+ * Start up a new TX PDU
+ *
+ * @v iscsi iSCSI session
+ *
+ * This initiates the process of sending a new PDU. Only one PDU may
+ * be in transit at any one time.
+ */
+static void iscsi_start_tx ( struct iscsi_session *iscsi ) {
+
+ assert ( iscsi->tx_state == ISCSI_TX_IDLE );
+
+ /* Initialise TX BHS */
+ memset ( &iscsi->tx_bhs, 0, sizeof ( iscsi->tx_bhs ) );
+
+ /* Flag TX engine to start transmitting */
+ iscsi->tx_state = ISCSI_TX_BHS;
+
+ /* Start transmission process */
+ iscsi_tx_resume ( iscsi );
+}
+
+/**
+ * Transmit nothing
+ *
+ * @v iscsi iSCSI session
+ * @ret rc Return status code
+ */
+static int iscsi_tx_nothing ( struct iscsi_session *iscsi __unused ) {
+ return 0;
+}
+
+/**
+ * Transmit basic header segment of an iSCSI PDU
+ *
+ * @v iscsi iSCSI session
+ * @ret rc Return status code
+ */
+static int iscsi_tx_bhs ( struct iscsi_session *iscsi ) {
+ return xfer_deliver_raw ( &iscsi->socket, &iscsi->tx_bhs,
+ sizeof ( iscsi->tx_bhs ) );
+}
+
+/**
+ * Transmit data segment of an iSCSI PDU
+ *
+ * @v iscsi iSCSI session
+ * @ret rc Return status code
+ *
+ * Handle transmission of part of a PDU data segment. iscsi::tx_bhs
+ * will be valid when this is called.
+ */
+static int iscsi_tx_data ( struct iscsi_session *iscsi ) {
+ struct iscsi_bhs_common *common = &iscsi->tx_bhs.common;
+
+ switch ( common->opcode & ISCSI_OPCODE_MASK ) {
+ case ISCSI_OPCODE_DATA_OUT:
+ return iscsi_tx_data_out ( iscsi );
+ case ISCSI_OPCODE_LOGIN_REQUEST:
+ return iscsi_tx_login_request ( iscsi );
+ default:
+ /* Nothing to send in other states */
+ return 0;
+ }
+}
+
+/**
+ * Complete iSCSI PDU transmission
+ *
+ * @v iscsi iSCSI session
+ *
+ * Called when a PDU has been completely transmitted and the TX state
+ * machine is about to enter the idle state. iscsi::tx_bhs will be
+ * valid for the just-completed PDU when this is called.
+ */
+static void iscsi_tx_done ( struct iscsi_session *iscsi ) {
+ struct iscsi_bhs_common *common = &iscsi->tx_bhs.common;
+
+ /* Stop transmission process */
+ iscsi_tx_pause ( iscsi );
+
+ switch ( common->opcode & ISCSI_OPCODE_MASK ) {
+ case ISCSI_OPCODE_DATA_OUT:
+ iscsi_data_out_done ( iscsi );
+ case ISCSI_OPCODE_LOGIN_REQUEST:
+ iscsi_login_request_done ( iscsi );
+ default:
+ /* No action */
+ break;
+ }
+}
+
+/**
+ * Transmit iSCSI PDU
+ *
+ * @v iscsi iSCSI session
+ * @v buf Temporary data buffer
+ * @v len Length of temporary data buffer
+ *
+ * Constructs data to be sent for the current TX state
+ */
+static void iscsi_tx_step ( struct iscsi_session *iscsi ) {
+ struct iscsi_bhs_common *common = &iscsi->tx_bhs.common;
+ int ( * tx ) ( struct iscsi_session *iscsi );
+ enum iscsi_tx_state next_state;
+ size_t tx_len;
+ int rc;
+
+ /* Select fragment to transmit */
+ while ( 1 ) {
+ switch ( iscsi->tx_state ) {
+ case ISCSI_TX_BHS:
+ tx = iscsi_tx_bhs;
+ tx_len = sizeof ( iscsi->tx_bhs );
+ next_state = ISCSI_TX_AHS;
+ break;
+ case ISCSI_TX_AHS:
+ tx = iscsi_tx_nothing;
+ tx_len = 0;
+ next_state = ISCSI_TX_DATA;
+ break;
+ case ISCSI_TX_DATA:
+ tx = iscsi_tx_data;
+ tx_len = ISCSI_DATA_LEN ( common->lengths );
+ next_state = ISCSI_TX_IDLE;
+ break;
+ case ISCSI_TX_IDLE:
+ /* Nothing to do; pause processing */
+ iscsi_tx_pause ( iscsi );
+ return;
+ default:
+ assert ( 0 );
+ return;
+ }
+
+ /* Check for window availability, if needed */
+ if ( tx_len && ( xfer_window ( &iscsi->socket ) == 0 ) ) {
+ /* Cannot transmit at this point; pause
+ * processing and wait for window to reopen
+ */
+ iscsi_tx_pause ( iscsi );
+ return;
+ }
+
+ /* Transmit data */
+ if ( ( rc = tx ( iscsi ) ) != 0 ) {
+ DBGC ( iscsi, "iSCSI %p could not transmit: %s\n",
+ iscsi, strerror ( rc ) );
+ /* Transmission errors are fatal */
+ iscsi_close ( iscsi, rc );
+ return;
+ }
+
+ /* Move to next state */
+ iscsi->tx_state = next_state;
+
+ /* If we have moved to the idle state, mark
+ * transmission as complete
+ */
+ if ( iscsi->tx_state == ISCSI_TX_IDLE )
+ iscsi_tx_done ( iscsi );
+ }
+}
+
+/** iSCSI TX process descriptor */
+static struct process_descriptor iscsi_process_desc =
+ PROC_DESC ( struct iscsi_session, process, iscsi_tx_step );
+
+/**
+ * Receive basic header segment of an iSCSI PDU
+ *
+ * @v iscsi iSCSI session
+ * @v data Received data
+ * @v len Length of received data
+ * @v remaining Data remaining after this data
+ * @ret rc Return status code
+ *
+ * This fills in iscsi::rx_bhs with the data from the BHS portion of
+ * the received PDU.
+ */
+static int iscsi_rx_bhs ( struct iscsi_session *iscsi, const void *data,
+ size_t len, size_t remaining __unused ) {
+ memcpy ( &iscsi->rx_bhs.bytes[iscsi->rx_offset], data, len );
+ if ( ( iscsi->rx_offset + len ) >= sizeof ( iscsi->rx_bhs ) ) {
+ DBGC2 ( iscsi, "iSCSI %p received PDU opcode %#x len %#x\n",
+ iscsi, iscsi->rx_bhs.common.opcode,
+ ISCSI_DATA_LEN ( iscsi->rx_bhs.common.lengths ) );
+ }
+ return 0;
+}
+
+/**
+ * Discard portion of an iSCSI PDU.
+ *
+ * @v iscsi iSCSI session
+ * @v data Received data
+ * @v len Length of received data
+ * @v remaining Data remaining after this data
+ * @ret rc Return status code
+ *
+ * This discards data from a portion of a received PDU.
+ */
+static int iscsi_rx_discard ( struct iscsi_session *iscsi __unused,
+ const void *data __unused, size_t len __unused,
+ size_t remaining __unused ) {
+ /* Do nothing */
+ return 0;
+}
+
+/**
+ * Receive data segment of an iSCSI PDU
+ *
+ * @v iscsi iSCSI session
+ * @v data Received data
+ * @v len Length of received data
+ * @v remaining Data remaining after this data
+ * @ret rc Return status code
+ *
+ * Handle processing of part of a PDU data segment. iscsi::rx_bhs
+ * will be valid when this is called.
+ */
+static int iscsi_rx_data ( struct iscsi_session *iscsi, const void *data,
+ size_t len, size_t remaining ) {
+ struct iscsi_bhs_common_response *response
+ = &iscsi->rx_bhs.common_response;
+
+ /* Update cmdsn and statsn */
+ iscsi->cmdsn = ntohl ( response->expcmdsn );
+ iscsi->statsn = ntohl ( response->statsn );
+
+ switch ( response->opcode & ISCSI_OPCODE_MASK ) {
+ case ISCSI_OPCODE_LOGIN_RESPONSE:
+ return iscsi_rx_login_response ( iscsi, data, len, remaining );
+ case ISCSI_OPCODE_SCSI_RESPONSE:
+ return iscsi_rx_scsi_response ( iscsi, data, len, remaining );
+ case ISCSI_OPCODE_DATA_IN:
+ return iscsi_rx_data_in ( iscsi, data, len, remaining );
+ case ISCSI_OPCODE_R2T:
+ return iscsi_rx_r2t ( iscsi, data, len, remaining );
+ case ISCSI_OPCODE_NOP_IN:
+ return iscsi_rx_nop_in ( iscsi, data, len, remaining );
+ default:
+ if ( remaining )
+ return 0;
+ DBGC ( iscsi, "iSCSI %p unknown opcode %02x\n", iscsi,
+ response->opcode );
+ return -ENOTSUP_OPCODE;
+ }
+}
+
+/**
+ * Receive new data
+ *
+ * @v iscsi iSCSI session
+ * @v iobuf I/O buffer
+ * @v meta Data transfer metadata
+ * @ret rc Return status code
+ *
+ * This handles received PDUs. The receive strategy is to fill in
+ * iscsi::rx_bhs with the contents of the BHS portion of the PDU,
+ * throw away any AHS portion, and then process each part of the data
+ * portion as it arrives. The data processing routine therefore
+ * always has a full copy of the BHS available, even for portions of
+ * the data in different packets to the BHS.
+ */
+static int iscsi_socket_deliver ( struct iscsi_session *iscsi,
+ struct io_buffer *iobuf,
+ struct xfer_metadata *meta __unused ) {
+ struct iscsi_bhs_common *common = &iscsi->rx_bhs.common;
+ int ( * rx ) ( struct iscsi_session *iscsi, const void *data,
+ size_t len, size_t remaining );
+ enum iscsi_rx_state next_state;
+ size_t frag_len;
+ size_t remaining;
+ int rc;
+
+ while ( 1 ) {
+ switch ( iscsi->rx_state ) {
+ case ISCSI_RX_BHS:
+ rx = iscsi_rx_bhs;
+ iscsi->rx_len = sizeof ( iscsi->rx_bhs );
+ next_state = ISCSI_RX_AHS;
+ break;
+ case ISCSI_RX_AHS:
+ rx = iscsi_rx_discard;
+ iscsi->rx_len = 4 * ISCSI_AHS_LEN ( common->lengths );
+ next_state = ISCSI_RX_DATA;
+ break;
+ case ISCSI_RX_DATA:
+ rx = iscsi_rx_data;
+ iscsi->rx_len = ISCSI_DATA_LEN ( common->lengths );
+ next_state = ISCSI_RX_DATA_PADDING;
+ break;
+ case ISCSI_RX_DATA_PADDING:
+ rx = iscsi_rx_discard;
+ iscsi->rx_len = ISCSI_DATA_PAD_LEN ( common->lengths );
+ next_state = ISCSI_RX_BHS;
+ break;
+ default:
+ assert ( 0 );
+ rc = -EINVAL;
+ goto done;
+ }
+
+ frag_len = iscsi->rx_len - iscsi->rx_offset;
+ if ( frag_len > iob_len ( iobuf ) )
+ frag_len = iob_len ( iobuf );
+ remaining = iscsi->rx_len - iscsi->rx_offset - frag_len;
+ if ( ( rc = rx ( iscsi, iobuf->data, frag_len,
+ remaining ) ) != 0 ) {
+ DBGC ( iscsi, "iSCSI %p could not process received "
+ "data: %s\n", iscsi, strerror ( rc ) );
+ goto done;
+ }
+
+ iscsi->rx_offset += frag_len;
+ iob_pull ( iobuf, frag_len );
+
+ /* If all the data for this state has not yet been
+ * received, stay in this state for now.
+ */
+ if ( iscsi->rx_offset != iscsi->rx_len ) {
+ rc = 0;
+ goto done;
+ }
+
+ iscsi->rx_state = next_state;
+ iscsi->rx_offset = 0;
+ }
+
+ done:
+ /* Free I/O buffer */
+ free_iob ( iobuf );
+
+ /* Destroy session on error */
+ if ( rc != 0 )
+ iscsi_close ( iscsi, rc );
+
+ return rc;
+}
+
+/**
+ * Handle redirection event
+ *
+ * @v iscsi iSCSI session
+ * @v type Location type
+ * @v args Remaining arguments depend upon location type
+ * @ret rc Return status code
+ */
+static int iscsi_vredirect ( struct iscsi_session *iscsi, int type,
+ va_list args ) {
+ va_list tmp;
+ struct sockaddr *peer;
+
+ /* Intercept redirects to a LOCATION_SOCKET and record the IP
+ * address for the iBFT. This is a bit of a hack, but avoids
+ * inventing an ioctl()-style call to retrieve the socket
+ * address from a data-xfer interface.
+ */
+ if ( type == LOCATION_SOCKET ) {
+ va_copy ( tmp, args );
+ ( void ) va_arg ( tmp, int ); /* Discard "semantics" */
+ peer = va_arg ( tmp, struct sockaddr * );
+ memcpy ( &iscsi->target_sockaddr, peer,
+ sizeof ( iscsi->target_sockaddr ) );
+ va_end ( tmp );
+ }
+
+ return xfer_vreopen ( &iscsi->socket, type, args );
+}
+
+/** iSCSI socket interface operations */
+static struct interface_operation iscsi_socket_operations[] = {
+ INTF_OP ( xfer_deliver, struct iscsi_session *, iscsi_socket_deliver ),
+ INTF_OP ( xfer_window_changed, struct iscsi_session *,
+ iscsi_tx_resume ),
+ INTF_OP ( xfer_vredirect, struct iscsi_session *, iscsi_vredirect ),
+ INTF_OP ( intf_close, struct iscsi_session *, iscsi_close ),
+};
+
+/** iSCSI socket interface descriptor */
+static struct interface_descriptor iscsi_socket_desc =
+ INTF_DESC ( struct iscsi_session, socket, iscsi_socket_operations );
+
+/****************************************************************************
+ *
+ * iSCSI command issuing
+ *
+ */
+
+/**
+ * Check iSCSI flow-control window
+ *
+ * @v iscsi iSCSI session
+ * @ret len Length of window
+ */
+static size_t iscsi_scsi_window ( struct iscsi_session *iscsi ) {
+
+ if ( ( ( iscsi->status & ISCSI_STATUS_PHASE_MASK ) ==
+ ISCSI_STATUS_FULL_FEATURE_PHASE ) &&
+ ( iscsi->command == NULL ) ) {
+ /* We cannot handle concurrent commands */
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+/**
+ * Issue iSCSI SCSI command
+ *
+ * @v iscsi iSCSI session
+ * @v parent Parent interface
+ * @v command SCSI command
+ * @ret tag Command tag, or negative error
+ */
+static int iscsi_scsi_command ( struct iscsi_session *iscsi,
+ struct interface *parent,
+ struct scsi_cmd *command ) {
+
+ /* This iSCSI implementation cannot handle multiple concurrent
+ * commands or commands arriving before login is complete.
+ */
+ if ( iscsi_scsi_window ( iscsi ) == 0 ) {
+ DBGC ( iscsi, "iSCSI %p cannot handle concurrent commands\n",
+ iscsi );
+ return -EOPNOTSUPP;
+ }
+
+ /* Store command */
+ iscsi->command = malloc ( sizeof ( *command ) );
+ if ( ! iscsi->command )
+ return -ENOMEM;
+ memcpy ( iscsi->command, command, sizeof ( *command ) );
+
+ /* Assign new ITT */
+ iscsi_new_itt ( iscsi );
+
+ /* Start sending command */
+ iscsi_start_command ( iscsi );
+
+ /* Attach to parent interface and return */
+ intf_plug_plug ( &iscsi->data, parent );
+ return iscsi->itt;
+}
+
+/** iSCSI SCSI command-issuing interface operations */
+static struct interface_operation iscsi_control_op[] = {
+ INTF_OP ( scsi_command, struct iscsi_session *, iscsi_scsi_command ),
+ INTF_OP ( xfer_window, struct iscsi_session *, iscsi_scsi_window ),
+ INTF_OP ( intf_close, struct iscsi_session *, iscsi_close ),
+ INTF_OP ( acpi_describe, struct iscsi_session *, ibft_describe ),
+};
+
+/** iSCSI SCSI command-issuing interface descriptor */
+static struct interface_descriptor iscsi_control_desc =
+ INTF_DESC ( struct iscsi_session, control, iscsi_control_op );
+
+/**
+ * Close iSCSI command
+ *
+ * @v iscsi iSCSI session
+ * @v rc Reason for close
+ */
+static void iscsi_command_close ( struct iscsi_session *iscsi, int rc ) {
+
+ /* Restart interface */
+ intf_restart ( &iscsi->data, rc );
+
+ /* Treat unsolicited command closures mid-command as fatal,
+ * because we have no code to handle partially-completed PDUs.
+ */
+ if ( iscsi->command != NULL )
+ iscsi_close ( iscsi, ( ( rc == 0 ) ? -ECANCELED : rc ) );
+}
+
+/** iSCSI SCSI command interface operations */
+static struct interface_operation iscsi_data_op[] = {
+ INTF_OP ( intf_close, struct iscsi_session *, iscsi_command_close ),
+};
+
+/** iSCSI SCSI command interface descriptor */
+static struct interface_descriptor iscsi_data_desc =
+ INTF_DESC ( struct iscsi_session, data, iscsi_data_op );
+
+/****************************************************************************
+ *
+ * Instantiator
+ *
+ */
+
+/** iSCSI root path components (as per RFC4173) */
+enum iscsi_root_path_component {
+ RP_SERVERNAME = 0,
+ RP_PROTOCOL,
+ RP_PORT,
+ RP_LUN,
+ RP_TARGETNAME,
+ NUM_RP_COMPONENTS
+};
+
+/** iSCSI initiator IQN setting */
+const struct setting initiator_iqn_setting __setting ( SETTING_SANBOOT_EXTRA,
+ initiator-iqn ) = {
+ .name = "initiator-iqn",
+ .description = "iSCSI initiator name",
+ .tag = DHCP_ISCSI_INITIATOR_IQN,
+ .type = &setting_type_string,
+};
+
+/** iSCSI reverse username setting */
+const struct setting reverse_username_setting __setting ( SETTING_AUTH_EXTRA,
+ reverse-username ) = {
+ .name = "reverse-username",
+ .description = "Reverse user name",
+ .tag = DHCP_EB_REVERSE_USERNAME,
+ .type = &setting_type_string,
+};
+
+/** iSCSI reverse password setting */
+const struct setting reverse_password_setting __setting ( SETTING_AUTH_EXTRA,
+ reverse-password ) = {
+ .name = "reverse-password",
+ .description = "Reverse password",
+ .tag = DHCP_EB_REVERSE_PASSWORD,
+ .type = &setting_type_string,
+};
+
+/**
+ * Parse iSCSI root path
+ *
+ * @v iscsi iSCSI session
+ * @v root_path iSCSI root path (as per RFC4173)
+ * @ret rc Return status code
+ */
+static int iscsi_parse_root_path ( struct iscsi_session *iscsi,
+ const char *root_path ) {
+ char rp_copy[ strlen ( root_path ) + 1 ];
+ char *rp_comp[NUM_RP_COMPONENTS];
+ char *rp = rp_copy;
+ int i = 0;
+ int rc;
+
+ /* Split root path into component parts */
+ strcpy ( rp_copy, root_path );
+ while ( 1 ) {
+ rp_comp[i++] = rp;
+ if ( i == NUM_RP_COMPONENTS )
+ break;
+ for ( ; *rp != ':' ; rp++ ) {
+ if ( ! *rp ) {
+ DBGC ( iscsi, "iSCSI %p root path \"%s\" "
+ "too short\n", iscsi, root_path );
+ return -EINVAL_ROOT_PATH_TOO_SHORT;
+ }
+ }
+ *(rp++) = '\0';
+ }
+
+ /* Use root path components to configure iSCSI session */
+ iscsi->target_address = strdup ( rp_comp[RP_SERVERNAME] );
+ if ( ! iscsi->target_address )
+ return -ENOMEM;
+ iscsi->target_port = strtoul ( rp_comp[RP_PORT], NULL, 10 );
+ if ( ! iscsi->target_port )
+ iscsi->target_port = ISCSI_PORT;
+ if ( ( rc = scsi_parse_lun ( rp_comp[RP_LUN], &iscsi->lun ) ) != 0 ) {
+ DBGC ( iscsi, "iSCSI %p invalid LUN \"%s\"\n",
+ iscsi, rp_comp[RP_LUN] );
+ return rc;
+ }
+ iscsi->target_iqn = strdup ( rp_comp[RP_TARGETNAME] );
+ if ( ! iscsi->target_iqn )
+ return -ENOMEM;
+
+ return 0;
+}
+
+/**
+ * Fetch iSCSI settings
+ *
+ * @v iscsi iSCSI session
+ * @ret rc Return status code
+ */
+static int iscsi_fetch_settings ( struct iscsi_session *iscsi ) {
+ char *hostname;
+ union uuid uuid;
+ int len;
+
+ /* Fetch relevant settings. Don't worry about freeing on
+ * error, since iscsi_free() will take care of that anyway.
+ */
+ fetch_string_setting_copy ( NULL, &username_setting,
+ &iscsi->initiator_username );
+ fetch_string_setting_copy ( NULL, &password_setting,
+ &iscsi->initiator_password );
+ fetch_string_setting_copy ( NULL, &reverse_username_setting,
+ &iscsi->target_username );
+ fetch_string_setting_copy ( NULL, &reverse_password_setting,
+ &iscsi->target_password );
+
+ /* Use explicit initiator IQN if provided */
+ fetch_string_setting_copy ( NULL, &initiator_iqn_setting,
+ &iscsi->initiator_iqn );
+ if ( iscsi->initiator_iqn )
+ return 0;
+
+ /* Otherwise, try to construct an initiator IQN from the hostname */
+ fetch_string_setting_copy ( NULL, &hostname_setting, &hostname );
+ if ( hostname ) {
+ len = asprintf ( &iscsi->initiator_iqn,
+ ISCSI_DEFAULT_IQN_PREFIX ":%s", hostname );
+ free ( hostname );
+ if ( len < 0 ) {
+ DBGC ( iscsi, "iSCSI %p could not allocate initiator "
+ "IQN\n", iscsi );
+ return -ENOMEM;
+ }
+ assert ( iscsi->initiator_iqn );
+ return 0;
+ }
+
+ /* Otherwise, try to construct an initiator IQN from the UUID */
+ if ( ( len = fetch_uuid_setting ( NULL, &uuid_setting, &uuid ) ) < 0 ) {
+ DBGC ( iscsi, "iSCSI %p has no suitable initiator IQN\n",
+ iscsi );
+ return -EINVAL_NO_INITIATOR_IQN;
+ }
+ if ( ( len = asprintf ( &iscsi->initiator_iqn,
+ ISCSI_DEFAULT_IQN_PREFIX ":%s",
+ uuid_ntoa ( &uuid ) ) ) < 0 ) {
+ DBGC ( iscsi, "iSCSI %p could not allocate initiator IQN\n",
+ iscsi );
+ return -ENOMEM;
+ }
+ assert ( iscsi->initiator_iqn );
+
+ return 0;
+}
+
+
+/**
+ * Check iSCSI authentication details
+ *
+ * @v iscsi iSCSI session
+ * @ret rc Return status code
+ */
+static int iscsi_check_auth ( struct iscsi_session *iscsi ) {
+
+ /* Check for invalid authentication combinations */
+ if ( ( /* Initiator username without password (or vice-versa) */
+ ( !! iscsi->initiator_username ) ^
+ ( !! iscsi->initiator_password ) ) ||
+ ( /* Target username without password (or vice-versa) */
+ ( !! iscsi->target_username ) ^
+ ( !! iscsi->target_password ) ) ||
+ ( /* Target (reverse) without initiator (forward) */
+ ( iscsi->target_username &&
+ ( ! iscsi->initiator_username ) ) ) ) {
+ DBGC ( iscsi, "iSCSI %p invalid credentials: initiator "
+ "%sname,%spw, target %sname,%spw\n", iscsi,
+ ( iscsi->initiator_username ? "" : "no " ),
+ ( iscsi->initiator_password ? "" : "no " ),
+ ( iscsi->target_username ? "" : "no " ),
+ ( iscsi->target_password ? "" : "no " ) );
+ return -EINVAL_BAD_CREDENTIAL_MIX;
+ }
+
+ return 0;
+}
+
+/**
+ * Open iSCSI URI
+ *
+ * @v parent Parent interface
+ * @v uri URI
+ * @ret rc Return status code
+ */
+static int iscsi_open ( struct interface *parent, struct uri *uri ) {
+ struct iscsi_session *iscsi;
+ int rc;
+
+ /* Sanity check */
+ if ( ! uri->opaque ) {
+ rc = -EINVAL_NO_ROOT_PATH;
+ goto err_sanity_uri;
+ }
+
+ /* Allocate and initialise structure */
+ iscsi = zalloc ( sizeof ( *iscsi ) );
+ if ( ! iscsi ) {
+ rc = -ENOMEM;
+ goto err_zalloc;
+ }
+ ref_init ( &iscsi->refcnt, iscsi_free );
+ intf_init ( &iscsi->control, &iscsi_control_desc, &iscsi->refcnt );
+ intf_init ( &iscsi->data, &iscsi_data_desc, &iscsi->refcnt );
+ intf_init ( &iscsi->socket, &iscsi_socket_desc, &iscsi->refcnt );
+ process_init_stopped ( &iscsi->process, &iscsi_process_desc,
+ &iscsi->refcnt );
+
+ /* Parse root path */
+ if ( ( rc = iscsi_parse_root_path ( iscsi, uri->opaque ) ) != 0 )
+ goto err_parse_root_path;
+ /* Set fields not specified by root path */
+ if ( ( rc = iscsi_fetch_settings ( iscsi ) ) != 0 )
+ goto err_fetch_settings;
+ /* Validate authentication */
+ if ( ( rc = iscsi_check_auth ( iscsi ) ) != 0 )
+ goto err_check_auth;
+
+ /* Sanity checks */
+ if ( ! iscsi->target_address ) {
+ DBGC ( iscsi, "iSCSI %p does not yet support discovery\n",
+ iscsi );
+ rc = -ENOTSUP_DISCOVERY;
+ goto err_sanity_address;
+ }
+ if ( ! iscsi->target_iqn ) {
+ DBGC ( iscsi, "iSCSI %p no target address supplied in %s\n",
+ iscsi, uri->opaque );
+ rc = -EINVAL_NO_TARGET_IQN;
+ goto err_sanity_iqn;
+ }
+ DBGC ( iscsi, "iSCSI %p initiator %s\n",iscsi, iscsi->initiator_iqn );
+ DBGC ( iscsi, "iSCSI %p target %s %s\n",
+ iscsi, iscsi->target_address, iscsi->target_iqn );
+
+ /* Open socket */
+ if ( ( rc = iscsi_open_connection ( iscsi ) ) != 0 )
+ goto err_open_connection;
+
+ /* Attach SCSI device to parent interface */
+ if ( ( rc = scsi_open ( parent, &iscsi->control,
+ &iscsi->lun ) ) != 0 ) {
+ DBGC ( iscsi, "iSCSI %p could not create SCSI device: %s\n",
+ iscsi, strerror ( rc ) );
+ goto err_scsi_open;
+ }
+
+ /* Mortalise self, and return */
+ ref_put ( &iscsi->refcnt );
+ return 0;
+
+ err_scsi_open:
+ err_open_connection:
+ err_sanity_iqn:
+ err_sanity_address:
+ err_check_auth:
+ err_fetch_settings:
+ err_parse_root_path:
+ iscsi_close ( iscsi, rc );
+ ref_put ( &iscsi->refcnt );
+ err_zalloc:
+ err_sanity_uri:
+ return rc;
+}
+
+/** iSCSI URI opener */
+struct uri_opener iscsi_uri_opener __uri_opener = {
+ .scheme = "iscsi",
+ .open = iscsi_open,
+};
diff --git a/qemu/roms/ipxe/src/net/tcp/oncrpc.c b/qemu/roms/ipxe/src/net/tcp/oncrpc.c
new file mode 100644
index 000000000..6469867e9
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/tcp/oncrpc.c
@@ -0,0 +1,250 @@
+/*
+ * Copyright (C) 2013 Marin Hannache <ipxe@mareo.fr>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+#include <byteswap.h>
+#include <ipxe/socket.h>
+#include <ipxe/tcpip.h>
+#include <ipxe/in.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/dhcp.h>
+#include <ipxe/xfer.h>
+#include <ipxe/open.h>
+#include <ipxe/uri.h>
+#include <ipxe/features.h>
+#include <ipxe/oncrpc.h>
+#include <ipxe/oncrpc_iob.h>
+#include <ipxe/init.h>
+#include <ipxe/settings.h>
+#include <ipxe/version.h>
+
+/** @file
+ *
+ * SUN ONC RPC protocol
+ *
+ */
+
+/** Set most significant bit to 1. */
+#define SET_LAST_FRAME( x ) ( (x) | 1 << 31 )
+#define GET_FRAME_SIZE( x ) ( (x) & ~( 1 << 31 ) )
+
+#define ONCRPC_CALL 0
+#define ONCRPC_REPLY 1
+
+/** AUTH NONE authentication flavor */
+struct oncrpc_cred oncrpc_auth_none = {
+ .flavor = ONCRPC_AUTH_NONE,
+ .length = 0
+};
+
+const struct setting uid_setting __setting ( SETTING_AUTH, uid ) = {
+ .name = "uid",
+ .description = "User ID",
+ .tag = DHCP_EB_UID,
+ .type = &setting_type_uint32
+};
+
+const struct setting gid_setting __setting ( SETTING_AUTH, gid ) = {
+ .name = "gid",
+ .description = "Group ID",
+ .tag = DHCP_EB_GID,
+ .type = &setting_type_uint32
+};
+
+/**
+ * Initialize an ONC RPC AUTH SYS credential structure
+ *
+ * @v auth_sys The structure to initialize
+ *
+ * The hostname field is filled with the value of the hostname setting, if the
+ * hostname setting is empty, PRODUCT_SHORT_NAME (usually "iPXE") is used
+ * instead.
+ */
+int oncrpc_init_cred_sys ( struct oncrpc_cred_sys *auth_sys ) {
+ if ( ! auth_sys )
+ return -EINVAL;
+
+ fetch_string_setting_copy ( NULL, &hostname_setting,
+ &auth_sys->hostname );
+ if ( ! auth_sys->hostname )
+ if ( ! ( auth_sys->hostname = strdup ( product_short_name ) ) )
+ return -ENOMEM;
+
+ auth_sys->uid = fetch_uintz_setting ( NULL, &uid_setting );
+ auth_sys->gid = fetch_uintz_setting ( NULL, &uid_setting );
+ auth_sys->aux_gid_len = 0;
+ auth_sys->stamp = 0;
+
+ auth_sys->credential.flavor = ONCRPC_AUTH_SYS;
+ auth_sys->credential.length = 16 +
+ oncrpc_strlen ( auth_sys->hostname );
+
+ return 0;
+}
+
+/**
+ * Prepare an ONC RPC session structure to be used by the ONC RPC layer
+ *
+ * @v session ONC RPC session
+ * @v credential Credential structure pointer
+ * @v verifier Verifier structure pointer
+ * @v prog_name ONC RPC program number
+ * @v prog_vers ONC RPC program version number
+ */
+void oncrpc_init_session ( struct oncrpc_session *session,
+ struct oncrpc_cred *credential,
+ struct oncrpc_cred *verifier, uint32_t prog_name,
+ uint32_t prog_vers ) {
+ if ( ! session )
+ return;
+
+ session->rpc_id = rand();
+ session->credential = credential;
+ session->verifier = verifier;
+ session->prog_name = prog_name;
+ session->prog_vers = prog_vers;
+}
+
+int oncrpc_call ( struct interface *intf, struct oncrpc_session *session,
+ uint32_t proc_name, const struct oncrpc_field fields[] ) {
+ int rc;
+ size_t frame_size;
+ struct io_buffer *io_buf;
+
+ if ( ! session )
+ return -EINVAL;
+
+ struct oncrpc_field header[] = {
+ ONCRPC_FIELD ( int32, 0 ),
+ ONCRPC_FIELD ( int32, ++session->rpc_id ),
+ ONCRPC_FIELD ( int32, ONCRPC_CALL ),
+ ONCRPC_FIELD ( int32, ONCRPC_VERS ),
+ ONCRPC_FIELD ( int32, session->prog_name ),
+ ONCRPC_FIELD ( int32, session->prog_vers ),
+ ONCRPC_FIELD ( int32, proc_name ),
+ ONCRPC_FIELD ( cred, session->credential ),
+ ONCRPC_FIELD ( cred, session->verifier ),
+ ONCRPC_FIELD_END,
+ };
+
+ frame_size = oncrpc_compute_size ( header );
+ frame_size += oncrpc_compute_size ( fields );
+
+ io_buf = alloc_iob ( frame_size );
+ if ( ! io_buf )
+ return -ENOBUFS;
+
+ header[0].value.int32 = SET_LAST_FRAME ( frame_size -
+ sizeof ( uint32_t ) );
+
+ oncrpc_iob_add_fields ( io_buf, header );
+ oncrpc_iob_add_fields ( io_buf, fields );
+
+ rc = xfer_deliver_iob ( intf, io_buf );
+ if ( rc != 0 )
+ free_iob ( io_buf );
+
+ return rc;
+}
+
+size_t oncrpc_compute_size ( const struct oncrpc_field fields[] ) {
+
+ size_t i;
+ size_t size = 0;
+
+ for ( i = 0; fields[i].type != oncrpc_none; i++ ) {
+ switch ( fields[i].type ) {
+ case oncrpc_int32:
+ size += sizeof ( uint32_t );
+ break;
+
+ case oncrpc_int64:
+ size += sizeof ( uint64_t );
+ break;
+
+ case oncrpc_str:
+ size += oncrpc_strlen ( fields[i].value.str );
+ break;
+
+ case oncrpc_array:
+ size += oncrpc_align ( fields[i].value.array.length );
+ size += sizeof ( uint32_t );
+ break;
+
+ case oncrpc_intarray:
+ size += sizeof ( uint32_t ) *
+ fields[i].value.intarray.length;
+ size += sizeof ( uint32_t );
+ break;
+
+ case oncrpc_cred:
+ size += fields[i].value.cred->length;
+ size += 2 * sizeof ( uint32_t );
+ break;
+
+ default:
+ return size;
+ }
+ }
+
+ return size;
+}
+
+/**
+ * Parse an I/O buffer to extract a ONC RPC REPLY
+ * @v session ONC RPC session
+ * @v reply Reply structure where data will be saved
+ * @v io_buf I/O buffer
+ */
+int oncrpc_get_reply ( struct oncrpc_session *session __unused,
+ struct oncrpc_reply *reply, struct io_buffer *io_buf ) {
+ if ( ! reply || ! io_buf )
+ return -EINVAL;
+
+ reply->frame_size = GET_FRAME_SIZE ( oncrpc_iob_get_int ( io_buf ) );
+ reply->rpc_id = oncrpc_iob_get_int ( io_buf );
+
+ /* iPXE has no support for handling ONC RPC call */
+ if ( oncrpc_iob_get_int ( io_buf ) != ONCRPC_REPLY )
+ return -ENOSYS;
+
+ reply->reply_state = oncrpc_iob_get_int ( io_buf );
+
+ if ( reply->reply_state == 0 )
+ {
+ /* verifier.flavor */
+ oncrpc_iob_get_int ( io_buf );
+ /* verifier.length */
+ iob_pull ( io_buf, oncrpc_iob_get_int ( io_buf ));
+
+ /* We don't use the verifier in iPXE, let it be an empty
+ verifier. */
+ reply->verifier = &oncrpc_auth_none;
+ }
+
+ reply->accept_state = oncrpc_iob_get_int ( io_buf );
+ reply->data = io_buf;
+
+ return 0;
+}
diff --git a/qemu/roms/ipxe/src/net/tcp/syslogs.c b/qemu/roms/ipxe/src/net/tcp/syslogs.c
new file mode 100644
index 000000000..095afc543
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/tcp/syslogs.c
@@ -0,0 +1,269 @@
+/*
+ * Copyright (C) 2012 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+/** @file
+ *
+ * Encrypted syslog protocol
+ *
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <byteswap.h>
+#include <ipxe/xfer.h>
+#include <ipxe/open.h>
+#include <ipxe/tcpip.h>
+#include <ipxe/dhcp.h>
+#include <ipxe/settings.h>
+#include <ipxe/console.h>
+#include <ipxe/lineconsole.h>
+#include <ipxe/tls.h>
+#include <ipxe/syslog.h>
+#include <config/console.h>
+
+/* Set default console usage if applicable */
+#if ! ( defined ( CONSOLE_SYSLOGS ) && CONSOLE_EXPLICIT ( CONSOLE_SYSLOGS ) )
+#undef CONSOLE_SYSLOGS
+#define CONSOLE_SYSLOGS ( CONSOLE_USAGE_ALL & ~CONSOLE_USAGE_TUI )
+#endif
+
+struct console_driver syslogs_console __console_driver;
+
+/** The encrypted syslog server */
+static struct sockaddr_tcpip logserver = {
+ .st_port = htons ( SYSLOG_PORT ),
+};
+
+/**
+ * Handle encrypted syslog TLS interface close
+ *
+ * @v intf Interface
+ * @v rc Reason for close
+ */
+static void syslogs_close ( struct interface *intf __unused, int rc ) {
+
+ DBG ( "SYSLOGS console disconnected: %s\n", strerror ( rc ) );
+}
+
+/**
+ * Handle encrypted syslog TLS interface window change
+ *
+ * @v intf Interface
+ */
+static void syslogs_window_changed ( struct interface *intf ) {
+
+ /* Mark console as enabled when window first opens, indicating
+ * that TLS negotiation is complete. (Do not disable console
+ * when window closes again, since TCP will close the window
+ * whenever there is unACKed data.)
+ */
+ if ( xfer_window ( intf ) ) {
+ if ( syslogs_console.disabled )
+ DBG ( "SYSLOGS console connected\n" );
+ syslogs_console.disabled = 0;
+ }
+}
+
+/** Encrypted syslog TLS interface operations */
+static struct interface_operation syslogs_operations[] = {
+ INTF_OP ( xfer_window_changed, struct interface *,
+ syslogs_window_changed ),
+ INTF_OP ( intf_close, struct interface *, syslogs_close ),
+};
+
+/** Encrypted syslog TLS interface descriptor */
+static struct interface_descriptor syslogs_desc =
+ INTF_DESC_PURE ( syslogs_operations );
+
+/** The encrypted syslog TLS interface */
+static struct interface syslogs = INTF_INIT ( syslogs_desc );
+
+/******************************************************************************
+ *
+ * Console driver
+ *
+ ******************************************************************************
+ */
+
+/** Encrypted syslog line buffer */
+static char syslogs_buffer[SYSLOG_BUFSIZE];
+
+/** Encrypted syslog severity */
+static unsigned int syslogs_severity = SYSLOG_DEFAULT_SEVERITY;
+
+/**
+ * Handle ANSI set encrypted syslog priority (private sequence)
+ *
+ * @v ctx ANSI escape sequence context
+ * @v count Parameter count
+ * @v params List of graphic rendition aspects
+ */
+static void syslogs_handle_priority ( struct ansiesc_context *ctx __unused,
+ unsigned int count __unused,
+ int params[] ) {
+ if ( params[0] >= 0 ) {
+ syslogs_severity = params[0];
+ } else {
+ syslogs_severity = SYSLOG_DEFAULT_SEVERITY;
+ }
+}
+
+/** Encrypted syslog ANSI escape sequence handlers */
+static struct ansiesc_handler syslogs_handlers[] = {
+ { ANSIESC_LOG_PRIORITY, syslogs_handle_priority },
+ { 0, NULL }
+};
+
+/** Encrypted syslog line console */
+static struct line_console syslogs_line = {
+ .buffer = syslogs_buffer,
+ .len = sizeof ( syslogs_buffer ),
+ .ctx = {
+ .handlers = syslogs_handlers,
+ },
+};
+
+/** Encrypted syslog recursion marker */
+static int syslogs_entered;
+
+/**
+ * Print a character to encrypted syslog console
+ *
+ * @v character Character to be printed
+ */
+static void syslogs_putchar ( int character ) {
+ int rc;
+
+ /* Ignore if we are already mid-logging */
+ if ( syslogs_entered )
+ return;
+
+ /* Fill line buffer */
+ if ( line_putchar ( &syslogs_line, character ) == 0 )
+ return;
+
+ /* Guard against re-entry */
+ syslogs_entered = 1;
+
+ /* Send log message */
+ if ( ( rc = syslog_send ( &syslogs, syslogs_severity,
+ syslogs_buffer, "\n" ) ) != 0 ) {
+ DBG ( "SYSLOGS could not send log message: %s\n",
+ strerror ( rc ) );
+ }
+
+ /* Clear re-entry flag */
+ syslogs_entered = 0;
+}
+
+/** Encrypted syslog console driver */
+struct console_driver syslogs_console __console_driver = {
+ .putchar = syslogs_putchar,
+ .disabled = CONSOLE_DISABLED,
+ .usage = CONSOLE_SYSLOGS,
+};
+
+/******************************************************************************
+ *
+ * Settings
+ *
+ ******************************************************************************
+ */
+
+/** Encrypted syslog server setting */
+const struct setting syslogs_setting __setting ( SETTING_MISC, syslogs ) = {
+ .name = "syslogs",
+ .description = "Encrypted syslog server",
+ .tag = DHCP_EB_SYSLOGS_SERVER,
+ .type = &setting_type_string,
+};
+
+/**
+ * Apply encrypted syslog settings
+ *
+ * @ret rc Return status code
+ */
+static int apply_syslogs_settings ( void ) {
+ static char *old_server;
+ char *server;
+ struct interface *socket;
+ int rc;
+
+ /* Fetch log server */
+ fetch_string_setting_copy ( NULL, &syslogs_setting, &server );
+
+ /* Do nothing unless log server has changed */
+ if ( ( ( server == NULL ) && ( old_server == NULL ) ) ||
+ ( ( server != NULL ) && ( old_server != NULL ) &&
+ ( strcmp ( server, old_server ) == 0 ) ) ) {
+ rc = 0;
+ goto out_no_change;
+ }
+ free ( old_server );
+ old_server = NULL;
+
+ /* Reset encrypted syslog connection */
+ syslogs_console.disabled = CONSOLE_DISABLED;
+ intf_restart ( &syslogs, 0 );
+
+ /* Do nothing unless we have a log server */
+ if ( ! server ) {
+ DBG ( "SYSLOGS has no log server\n" );
+ rc = 0;
+ goto out_no_server;
+ }
+
+ /* Add TLS filter */
+ if ( ( rc = add_tls ( &syslogs, server, &socket ) ) != 0 ) {
+ DBG ( "SYSLOGS cannot create TLS filter: %s\n",
+ strerror ( rc ) );
+ goto err_add_tls;
+ }
+
+ /* Connect to log server */
+ if ( ( rc = xfer_open_named_socket ( socket, SOCK_STREAM,
+ (( struct sockaddr *) &logserver ),
+ server, NULL ) ) != 0 ) {
+ DBG ( "SYSLOGS cannot connect to log server: %s\n",
+ strerror ( rc ) );
+ goto err_open_named_socket;
+ }
+ DBG ( "SYSLOGS using log server %s\n", server );
+
+ /* Record log server */
+ old_server = server;
+ server = NULL;
+
+ /* Success */
+ rc = 0;
+
+ err_open_named_socket:
+ err_add_tls:
+ out_no_server:
+ out_no_change:
+ free ( server );
+ return rc;
+}
+
+/** Encrypted syslog settings applicator */
+struct settings_applicator syslogs_applicator __settings_applicator = {
+ .apply = apply_syslogs_settings,
+};
diff --git a/qemu/roms/ipxe/src/net/tcpip.c b/qemu/roms/ipxe/src/net/tcpip.c
new file mode 100644
index 000000000..4bcbe64bb
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/tcpip.c
@@ -0,0 +1,250 @@
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <byteswap.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/tables.h>
+#include <ipxe/ipstat.h>
+#include <ipxe/netdevice.h>
+#include <ipxe/tcpip.h>
+
+/** @file
+ *
+ * Transport-network layer interface
+ *
+ * This file contains functions and utilities for the
+ * TCP/IP transport-network layer interface
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+/**
+ * Process a received TCP/IP packet
+ *
+ * @v iobuf I/O buffer
+ * @v netdev Network device
+ * @v tcpip_proto Transport-layer protocol number
+ * @v st_src Partially-filled source address
+ * @v st_dest Partially-filled destination address
+ * @v pshdr_csum Pseudo-header checksum
+ * @v stats IP statistics
+ * @ret rc Return status code
+ *
+ * This function expects a transport-layer segment from the network
+ * layer. The network layer should fill in as much as it can of the
+ * source and destination addresses (i.e. it should fill in the
+ * address family and the network-layer addresses, but leave the ports
+ * and the rest of the structures as zero).
+ */
+int tcpip_rx ( struct io_buffer *iobuf, struct net_device *netdev,
+ uint8_t tcpip_proto, struct sockaddr_tcpip *st_src,
+ struct sockaddr_tcpip *st_dest, uint16_t pshdr_csum,
+ struct ip_statistics *stats ) {
+ struct tcpip_protocol *tcpip;
+
+ /* Hand off packet to the appropriate transport-layer protocol */
+ for_each_table_entry ( tcpip, TCPIP_PROTOCOLS ) {
+ if ( tcpip->tcpip_proto == tcpip_proto ) {
+ DBG ( "TCP/IP received %s packet\n", tcpip->name );
+ stats->in_delivers++;
+ return tcpip->rx ( iobuf, netdev, st_src, st_dest,
+ pshdr_csum );
+ }
+ }
+
+ DBG ( "Unrecognised TCP/IP protocol %d\n", tcpip_proto );
+ stats->in_unknown_protos++;
+ free_iob ( iobuf );
+ return -EPROTONOSUPPORT;
+}
+
+/**
+ * Find TCP/IP network-layer protocol
+ *
+ * @v st_dest Destination address
+ * @ret tcpip_net TCP/IP network-layer protocol, or NULL if not found
+ */
+static struct tcpip_net_protocol *
+tcpip_net_protocol ( struct sockaddr_tcpip *st_dest ) {
+ struct tcpip_net_protocol *tcpip_net;
+
+ for_each_table_entry ( tcpip_net, TCPIP_NET_PROTOCOLS ) {
+ if ( tcpip_net->sa_family == st_dest->st_family )
+ return tcpip_net;
+ }
+
+ DBG ( "Unrecognised TCP/IP address family %d\n", st_dest->st_family );
+ return NULL;
+}
+
+/**
+ * Transmit a TCP/IP packet
+ *
+ * @v iobuf I/O buffer
+ * @v tcpip_protocol Transport-layer protocol
+ * @v st_src Source address, or NULL to use route default
+ * @v st_dest Destination address
+ * @v netdev Network device to use if no route found, or NULL
+ * @v trans_csum Transport-layer checksum to complete, or NULL
+ * @ret rc Return status code
+ */
+int tcpip_tx ( struct io_buffer *iobuf, struct tcpip_protocol *tcpip_protocol,
+ struct sockaddr_tcpip *st_src, struct sockaddr_tcpip *st_dest,
+ struct net_device *netdev, uint16_t *trans_csum ) {
+ struct tcpip_net_protocol *tcpip_net;
+
+ /* Hand off packet to the appropriate network-layer protocol */
+ tcpip_net = tcpip_net_protocol ( st_dest );
+ if ( tcpip_net ) {
+ DBG ( "TCP/IP sending %s packet\n", tcpip_net->name );
+ return tcpip_net->tx ( iobuf, tcpip_protocol, st_src, st_dest,
+ netdev, trans_csum );
+ }
+
+ free_iob ( iobuf );
+ return -EAFNOSUPPORT;
+}
+
+/**
+ * Determine transmitting network device
+ *
+ * @v st_dest Destination address
+ * @ret netdev Network device, or NULL
+ */
+struct net_device * tcpip_netdev ( struct sockaddr_tcpip *st_dest ) {
+ struct tcpip_net_protocol *tcpip_net;
+
+ /* Hand off to the appropriate network-layer protocol */
+ tcpip_net = tcpip_net_protocol ( st_dest );
+ if ( tcpip_net )
+ return tcpip_net->netdev ( st_dest );
+
+ return NULL;
+}
+
+/**
+ * Determine maximum transmission unit
+ *
+ * @v st_dest Destination address
+ * @ret mtu Maximum transmission unit
+ */
+size_t tcpip_mtu ( struct sockaddr_tcpip *st_dest ) {
+ struct tcpip_net_protocol *tcpip_net;
+ struct net_device *netdev;
+ size_t mtu;
+
+ /* Find appropriate network-layer protocol */
+ tcpip_net = tcpip_net_protocol ( st_dest );
+ if ( ! tcpip_net )
+ return 0;
+
+ /* Find transmitting network device */
+ netdev = tcpip_net->netdev ( st_dest );
+ if ( ! netdev )
+ return 0;
+
+ /* Calculate MTU */
+ mtu = ( netdev->max_pkt_len - netdev->ll_protocol->ll_header_len -
+ tcpip_net->header_len );
+
+ return mtu;
+}
+
+/**
+ * Calculate continued TCP/IP checkum
+ *
+ * @v partial Checksum of already-summed data, in network byte order
+ * @v data Data buffer
+ * @v len Length of data buffer
+ * @ret cksum Updated checksum, in network byte order
+ *
+ * Calculates a TCP/IP-style 16-bit checksum over the data block. The
+ * checksum is returned in network byte order.
+ *
+ * This function may be used to add new data to an existing checksum.
+ * The function assumes that both the old data and the new data start
+ * on even byte offsets; if this is not the case then you will need to
+ * byte-swap either the input partial checksum, the output checksum,
+ * or both. Deciding which to swap is left as an exercise for the
+ * interested reader.
+ */
+uint16_t generic_tcpip_continue_chksum ( uint16_t partial,
+ const void *data, size_t len ) {
+ unsigned int cksum = ( ( ~partial ) & 0xffff );
+ unsigned int value;
+ unsigned int i;
+
+ for ( i = 0 ; i < len ; i++ ) {
+ value = * ( ( uint8_t * ) data + i );
+ if ( i & 1 ) {
+ /* Odd bytes: swap on little-endian systems */
+ value = be16_to_cpu ( value );
+ } else {
+ /* Even bytes: swap on big-endian systems */
+ value = le16_to_cpu ( value );
+ }
+ cksum += value;
+ if ( cksum > 0xffff )
+ cksum -= 0xffff;
+ }
+
+ return ( ~cksum );
+}
+
+/**
+ * Calculate TCP/IP checkum
+ *
+ * @v data Data buffer
+ * @v len Length of data buffer
+ * @ret cksum Checksum, in network byte order
+ *
+ * Calculates a TCP/IP-style 16-bit checksum over the data block. The
+ * checksum is returned in network byte order.
+ */
+uint16_t tcpip_chksum ( const void *data, size_t len ) {
+ return tcpip_continue_chksum ( TCPIP_EMPTY_CSUM, data, len );
+}
+
+/**
+ * Bind to local TCP/IP port
+ *
+ * @v st_local Local TCP/IP socket address, or NULL
+ * @v available Function to check port availability
+ * @ret port Local port number, or negative error
+ */
+int tcpip_bind ( struct sockaddr_tcpip *st_local,
+ int ( * available ) ( int port ) ) {
+ uint16_t flags = 0;
+ uint16_t try_port = 0;
+ uint16_t min_port;
+ uint16_t max_port;
+ unsigned int offset;
+ unsigned int i;
+
+ /* Extract parameters from local socket address */
+ if ( st_local ) {
+ flags = st_local->st_flags;
+ try_port = ntohs ( st_local->st_port );
+ }
+
+ /* If an explicit port is specified, check its availability */
+ if ( try_port )
+ return available ( try_port );
+
+ /* Otherwise, find an available port in the range [1,1023] or
+ * [1025,65535] as appropriate.
+ */
+ min_port = ( ( ( ! flags ) & TCPIP_BIND_PRIVILEGED ) + 1 );
+ max_port = ( ( flags & TCPIP_BIND_PRIVILEGED ) - 1 );
+ offset = random();
+ for ( i = 0 ; i <= max_port ; i++ ) {
+ try_port = ( ( i + offset ) & max_port );
+ if ( try_port < min_port )
+ continue;
+ if ( available ( try_port ) < 0 )
+ continue;
+ return try_port;
+ }
+ return -EADDRINUSE;
+}
diff --git a/qemu/roms/ipxe/src/net/tls.c b/qemu/roms/ipxe/src/net/tls.c
new file mode 100644
index 000000000..30ccc932e
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/tls.c
@@ -0,0 +1,2639 @@
+/*
+ * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+/**
+ * @file
+ *
+ * Transport Layer Security Protocol
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <time.h>
+#include <errno.h>
+#include <byteswap.h>
+#include <ipxe/pending.h>
+#include <ipxe/hmac.h>
+#include <ipxe/md5.h>
+#include <ipxe/sha1.h>
+#include <ipxe/sha256.h>
+#include <ipxe/aes.h>
+#include <ipxe/rsa.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/xfer.h>
+#include <ipxe/open.h>
+#include <ipxe/x509.h>
+#include <ipxe/privkey.h>
+#include <ipxe/certstore.h>
+#include <ipxe/rbg.h>
+#include <ipxe/validator.h>
+#include <ipxe/tls.h>
+
+/* Disambiguate the various error causes */
+#define EINVAL_CHANGE_CIPHER __einfo_error ( EINFO_EINVAL_CHANGE_CIPHER )
+#define EINFO_EINVAL_CHANGE_CIPHER \
+ __einfo_uniqify ( EINFO_EINVAL, 0x01, \
+ "Invalid Change Cipher record" )
+#define EINVAL_ALERT __einfo_error ( EINFO_EINVAL_ALERT )
+#define EINFO_EINVAL_ALERT \
+ __einfo_uniqify ( EINFO_EINVAL, 0x02, \
+ "Invalid Alert record" )
+#define EINVAL_HELLO __einfo_error ( EINFO_EINVAL_HELLO )
+#define EINFO_EINVAL_HELLO \
+ __einfo_uniqify ( EINFO_EINVAL, 0x03, \
+ "Invalid Server Hello record" )
+#define EINVAL_CERTIFICATE __einfo_error ( EINFO_EINVAL_CERTIFICATE )
+#define EINFO_EINVAL_CERTIFICATE \
+ __einfo_uniqify ( EINFO_EINVAL, 0x04, \
+ "Invalid Certificate" )
+#define EINVAL_CERTIFICATES __einfo_error ( EINFO_EINVAL_CERTIFICATES )
+#define EINFO_EINVAL_CERTIFICATES \
+ __einfo_uniqify ( EINFO_EINVAL, 0x05, \
+ "Invalid Server Certificate record" )
+#define EINVAL_HELLO_DONE __einfo_error ( EINFO_EINVAL_HELLO_DONE )
+#define EINFO_EINVAL_HELLO_DONE \
+ __einfo_uniqify ( EINFO_EINVAL, 0x06, \
+ "Invalid Server Hello Done record" )
+#define EINVAL_FINISHED __einfo_error ( EINFO_EINVAL_FINISHED )
+#define EINFO_EINVAL_FINISHED \
+ __einfo_uniqify ( EINFO_EINVAL, 0x07, \
+ "Invalid Server Finished record" )
+#define EINVAL_HANDSHAKE __einfo_error ( EINFO_EINVAL_HANDSHAKE )
+#define EINFO_EINVAL_HANDSHAKE \
+ __einfo_uniqify ( EINFO_EINVAL, 0x08, \
+ "Invalid Handshake record" )
+#define EINVAL_STREAM __einfo_error ( EINFO_EINVAL_STREAM )
+#define EINFO_EINVAL_STREAM \
+ __einfo_uniqify ( EINFO_EINVAL, 0x09, \
+ "Invalid stream-ciphered record" )
+#define EINVAL_BLOCK __einfo_error ( EINFO_EINVAL_BLOCK )
+#define EINFO_EINVAL_BLOCK \
+ __einfo_uniqify ( EINFO_EINVAL, 0x0a, \
+ "Invalid block-ciphered record" )
+#define EINVAL_PADDING __einfo_error ( EINFO_EINVAL_PADDING )
+#define EINFO_EINVAL_PADDING \
+ __einfo_uniqify ( EINFO_EINVAL, 0x0b, \
+ "Invalid block padding" )
+#define EINVAL_RX_STATE __einfo_error ( EINFO_EINVAL_RX_STATE )
+#define EINFO_EINVAL_RX_STATE \
+ __einfo_uniqify ( EINFO_EINVAL, 0x0c, \
+ "Invalid receive state" )
+#define EINVAL_MAC __einfo_error ( EINFO_EINVAL_MAC )
+#define EINFO_EINVAL_MAC \
+ __einfo_uniqify ( EINFO_EINVAL, 0x0d, \
+ "Invalid MAC" )
+#define EIO_ALERT __einfo_error ( EINFO_EIO_ALERT )
+#define EINFO_EIO_ALERT \
+ __einfo_uniqify ( EINFO_EINVAL, 0x01, \
+ "Unknown alert level" )
+#define ENOMEM_CONTEXT __einfo_error ( EINFO_ENOMEM_CONTEXT )
+#define EINFO_ENOMEM_CONTEXT \
+ __einfo_uniqify ( EINFO_ENOMEM, 0x01, \
+ "Not enough space for crypto context" )
+#define ENOMEM_CERTIFICATE __einfo_error ( EINFO_ENOMEM_CERTIFICATE )
+#define EINFO_ENOMEM_CERTIFICATE \
+ __einfo_uniqify ( EINFO_ENOMEM, 0x02, \
+ "Not enough space for certificate" )
+#define ENOMEM_CHAIN __einfo_error ( EINFO_ENOMEM_CHAIN )
+#define EINFO_ENOMEM_CHAIN \
+ __einfo_uniqify ( EINFO_ENOMEM, 0x03, \
+ "Not enough space for certificate chain" )
+#define ENOMEM_TX_PLAINTEXT __einfo_error ( EINFO_ENOMEM_TX_PLAINTEXT )
+#define EINFO_ENOMEM_TX_PLAINTEXT \
+ __einfo_uniqify ( EINFO_ENOMEM, 0x04, \
+ "Not enough space for transmitted plaintext" )
+#define ENOMEM_TX_CIPHERTEXT __einfo_error ( EINFO_ENOMEM_TX_CIPHERTEXT )
+#define EINFO_ENOMEM_TX_CIPHERTEXT \
+ __einfo_uniqify ( EINFO_ENOMEM, 0x05, \
+ "Not enough space for transmitted ciphertext" )
+#define ENOMEM_RX_DATA __einfo_error ( EINFO_ENOMEM_RX_DATA )
+#define EINFO_ENOMEM_RX_DATA \
+ __einfo_uniqify ( EINFO_ENOMEM, 0x07, \
+ "Not enough space for received data" )
+#define ENOMEM_RX_CONCAT __einfo_error ( EINFO_ENOMEM_RX_CONCAT )
+#define EINFO_ENOMEM_RX_CONCAT \
+ __einfo_uniqify ( EINFO_ENOMEM, 0x08, \
+ "Not enough space to concatenate received data" )
+#define ENOTSUP_CIPHER __einfo_error ( EINFO_ENOTSUP_CIPHER )
+#define EINFO_ENOTSUP_CIPHER \
+ __einfo_uniqify ( EINFO_ENOTSUP, 0x01, \
+ "Unsupported cipher" )
+#define ENOTSUP_NULL __einfo_error ( EINFO_ENOTSUP_NULL )
+#define EINFO_ENOTSUP_NULL \
+ __einfo_uniqify ( EINFO_ENOTSUP, 0x02, \
+ "Refusing to use null cipher" )
+#define ENOTSUP_SIG_HASH __einfo_error ( EINFO_ENOTSUP_SIG_HASH )
+#define EINFO_ENOTSUP_SIG_HASH \
+ __einfo_uniqify ( EINFO_ENOTSUP, 0x03, \
+ "Unsupported signature and hash algorithm" )
+#define ENOTSUP_VERSION __einfo_error ( EINFO_ENOTSUP_VERSION )
+#define EINFO_ENOTSUP_VERSION \
+ __einfo_uniqify ( EINFO_ENOTSUP, 0x04, \
+ "Unsupported protocol version" )
+#define EPERM_ALERT __einfo_error ( EINFO_EPERM_ALERT )
+#define EINFO_EPERM_ALERT \
+ __einfo_uniqify ( EINFO_EPERM, 0x01, \
+ "Received fatal alert" )
+#define EPERM_VERIFY __einfo_error ( EINFO_EPERM_VERIFY )
+#define EINFO_EPERM_VERIFY \
+ __einfo_uniqify ( EINFO_EPERM, 0x02, \
+ "Handshake verification failed" )
+#define EPERM_CLIENT_CERT __einfo_error ( EINFO_EPERM_CLIENT_CERT )
+#define EINFO_EPERM_CLIENT_CERT \
+ __einfo_uniqify ( EINFO_EPERM, 0x03, \
+ "No suitable client certificate available" )
+#define EPROTO_VERSION __einfo_error ( EINFO_EPROTO_VERSION )
+#define EINFO_EPROTO_VERSION \
+ __einfo_uniqify ( EINFO_EPROTO, 0x01, \
+ "Illegal protocol version upgrade" )
+
+static int tls_send_plaintext ( struct tls_session *tls, unsigned int type,
+ const void *data, size_t len );
+static void tls_clear_cipher ( struct tls_session *tls,
+ struct tls_cipherspec *cipherspec );
+
+/******************************************************************************
+ *
+ * Utility functions
+ *
+ ******************************************************************************
+ */
+
+/**
+ * Extract 24-bit field value
+ *
+ * @v field24 24-bit field
+ * @ret value Field value
+ *
+ * TLS uses 24-bit integers in several places, which are awkward to
+ * parse in C.
+ */
+static inline __attribute__ (( always_inline )) unsigned long
+tls_uint24 ( const uint8_t field24[3] ) {
+ const uint32_t *field32 __attribute__ (( may_alias )) =
+ ( ( const void * ) field24 );
+ return ( be32_to_cpu ( *field32 ) >> 8 );
+}
+
+/**
+ * Set 24-bit field value
+ *
+ * @v field24 24-bit field
+ * @v value Field value
+ *
+ * The field must be pre-zeroed.
+ */
+static void tls_set_uint24 ( uint8_t field24[3], unsigned long value ) {
+ uint32_t *field32 __attribute__ (( may_alias )) =
+ ( ( void * ) field24 );
+ *field32 |= cpu_to_be32 ( value << 8 );
+}
+
+/**
+ * Determine if TLS session is ready for application data
+ *
+ * @v tls TLS session
+ * @ret is_ready TLS session is ready
+ */
+static int tls_ready ( struct tls_session *tls ) {
+ return ( ( ! is_pending ( &tls->client_negotiation ) ) &&
+ ( ! is_pending ( &tls->server_negotiation ) ) );
+}
+
+/******************************************************************************
+ *
+ * Hybrid MD5+SHA1 hash as used by TLSv1.1 and earlier
+ *
+ ******************************************************************************
+ */
+
+/**
+ * Initialise MD5+SHA1 algorithm
+ *
+ * @v ctx MD5+SHA1 context
+ */
+static void md5_sha1_init ( void *ctx ) {
+ struct md5_sha1_context *context = ctx;
+
+ digest_init ( &md5_algorithm, context->md5 );
+ digest_init ( &sha1_algorithm, context->sha1 );
+}
+
+/**
+ * Accumulate data with MD5+SHA1 algorithm
+ *
+ * @v ctx MD5+SHA1 context
+ * @v data Data
+ * @v len Length of data
+ */
+static void md5_sha1_update ( void *ctx, const void *data, size_t len ) {
+ struct md5_sha1_context *context = ctx;
+
+ digest_update ( &md5_algorithm, context->md5, data, len );
+ digest_update ( &sha1_algorithm, context->sha1, data, len );
+}
+
+/**
+ * Generate MD5+SHA1 digest
+ *
+ * @v ctx MD5+SHA1 context
+ * @v out Output buffer
+ */
+static void md5_sha1_final ( void *ctx, void *out ) {
+ struct md5_sha1_context *context = ctx;
+ struct md5_sha1_digest *digest = out;
+
+ digest_final ( &md5_algorithm, context->md5, digest->md5 );
+ digest_final ( &sha1_algorithm, context->sha1, digest->sha1 );
+}
+
+/** Hybrid MD5+SHA1 digest algorithm */
+static struct digest_algorithm md5_sha1_algorithm = {
+ .name = "md5+sha1",
+ .ctxsize = sizeof ( struct md5_sha1_context ),
+ .blocksize = 0, /* Not applicable */
+ .digestsize = sizeof ( struct md5_sha1_digest ),
+ .init = md5_sha1_init,
+ .update = md5_sha1_update,
+ .final = md5_sha1_final,
+};
+
+/** RSA digestInfo prefix for MD5+SHA1 algorithm */
+struct rsa_digestinfo_prefix rsa_md5_sha1_prefix __rsa_digestinfo_prefix = {
+ .digest = &md5_sha1_algorithm,
+ .data = NULL, /* MD5+SHA1 signatures have no digestInfo */
+ .len = 0,
+};
+
+/******************************************************************************
+ *
+ * Cleanup functions
+ *
+ ******************************************************************************
+ */
+
+/**
+ * Free TLS session
+ *
+ * @v refcnt Reference counter
+ */
+static void free_tls ( struct refcnt *refcnt ) {
+ struct tls_session *tls =
+ container_of ( refcnt, struct tls_session, refcnt );
+ struct io_buffer *iobuf;
+ struct io_buffer *tmp;
+
+ /* Free dynamically-allocated resources */
+ tls_clear_cipher ( tls, &tls->tx_cipherspec );
+ tls_clear_cipher ( tls, &tls->tx_cipherspec_pending );
+ tls_clear_cipher ( tls, &tls->rx_cipherspec );
+ tls_clear_cipher ( tls, &tls->rx_cipherspec_pending );
+ list_for_each_entry_safe ( iobuf, tmp, &tls->rx_data, list ) {
+ list_del ( &iobuf->list );
+ free_iob ( iobuf );
+ }
+ x509_put ( tls->cert );
+ x509_chain_put ( tls->chain );
+
+ /* Free TLS structure itself */
+ free ( tls );
+}
+
+/**
+ * Finish with TLS session
+ *
+ * @v tls TLS session
+ * @v rc Status code
+ */
+static void tls_close ( struct tls_session *tls, int rc ) {
+
+ /* Remove pending operations, if applicable */
+ pending_put ( &tls->client_negotiation );
+ pending_put ( &tls->server_negotiation );
+
+ /* Remove process */
+ process_del ( &tls->process );
+
+ /* Close all interfaces */
+ intf_shutdown ( &tls->cipherstream, rc );
+ intf_shutdown ( &tls->plainstream, rc );
+ intf_shutdown ( &tls->validator, rc );
+}
+
+/******************************************************************************
+ *
+ * Random number generation
+ *
+ ******************************************************************************
+ */
+
+/**
+ * Generate random data
+ *
+ * @v tls TLS session
+ * @v data Buffer to fill
+ * @v len Length of buffer
+ * @ret rc Return status code
+ */
+static int tls_generate_random ( struct tls_session *tls,
+ void *data, size_t len ) {
+ int rc;
+
+ /* Generate random bits with no additional input and without
+ * prediction resistance
+ */
+ if ( ( rc = rbg_generate ( NULL, 0, 0, data, len ) ) != 0 ) {
+ DBGC ( tls, "TLS %p could not generate random data: %s\n",
+ tls, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Update HMAC with a list of ( data, len ) pairs
+ *
+ * @v digest Hash function to use
+ * @v digest_ctx Digest context
+ * @v args ( data, len ) pairs of data, terminated by NULL
+ */
+static void tls_hmac_update_va ( struct digest_algorithm *digest,
+ void *digest_ctx, va_list args ) {
+ void *data;
+ size_t len;
+
+ while ( ( data = va_arg ( args, void * ) ) ) {
+ len = va_arg ( args, size_t );
+ hmac_update ( digest, digest_ctx, data, len );
+ }
+}
+
+/**
+ * Generate secure pseudo-random data using a single hash function
+ *
+ * @v tls TLS session
+ * @v digest Hash function to use
+ * @v secret Secret
+ * @v secret_len Length of secret
+ * @v out Output buffer
+ * @v out_len Length of output buffer
+ * @v seeds ( data, len ) pairs of seed data, terminated by NULL
+ */
+static void tls_p_hash_va ( struct tls_session *tls,
+ struct digest_algorithm *digest,
+ void *secret, size_t secret_len,
+ void *out, size_t out_len,
+ va_list seeds ) {
+ uint8_t secret_copy[secret_len];
+ uint8_t digest_ctx[digest->ctxsize];
+ uint8_t digest_ctx_partial[digest->ctxsize];
+ uint8_t a[digest->digestsize];
+ uint8_t out_tmp[digest->digestsize];
+ size_t frag_len = digest->digestsize;
+ va_list tmp;
+
+ /* Copy the secret, in case HMAC modifies it */
+ memcpy ( secret_copy, secret, secret_len );
+ secret = secret_copy;
+ DBGC2 ( tls, "TLS %p %s secret:\n", tls, digest->name );
+ DBGC2_HD ( tls, secret, secret_len );
+
+ /* Calculate A(1) */
+ hmac_init ( digest, digest_ctx, secret, &secret_len );
+ va_copy ( tmp, seeds );
+ tls_hmac_update_va ( digest, digest_ctx, tmp );
+ va_end ( tmp );
+ hmac_final ( digest, digest_ctx, secret, &secret_len, a );
+ DBGC2 ( tls, "TLS %p %s A(1):\n", tls, digest->name );
+ DBGC2_HD ( tls, &a, sizeof ( a ) );
+
+ /* Generate as much data as required */
+ while ( out_len ) {
+ /* Calculate output portion */
+ hmac_init ( digest, digest_ctx, secret, &secret_len );
+ hmac_update ( digest, digest_ctx, a, sizeof ( a ) );
+ memcpy ( digest_ctx_partial, digest_ctx, digest->ctxsize );
+ va_copy ( tmp, seeds );
+ tls_hmac_update_va ( digest, digest_ctx, tmp );
+ va_end ( tmp );
+ hmac_final ( digest, digest_ctx,
+ secret, &secret_len, out_tmp );
+
+ /* Copy output */
+ if ( frag_len > out_len )
+ frag_len = out_len;
+ memcpy ( out, out_tmp, frag_len );
+ DBGC2 ( tls, "TLS %p %s output:\n", tls, digest->name );
+ DBGC2_HD ( tls, out, frag_len );
+
+ /* Calculate A(i) */
+ hmac_final ( digest, digest_ctx_partial,
+ secret, &secret_len, a );
+ DBGC2 ( tls, "TLS %p %s A(n):\n", tls, digest->name );
+ DBGC2_HD ( tls, &a, sizeof ( a ) );
+
+ out += frag_len;
+ out_len -= frag_len;
+ }
+}
+
+/**
+ * Generate secure pseudo-random data
+ *
+ * @v tls TLS session
+ * @v secret Secret
+ * @v secret_len Length of secret
+ * @v out Output buffer
+ * @v out_len Length of output buffer
+ * @v ... ( data, len ) pairs of seed data, terminated by NULL
+ */
+static void tls_prf ( struct tls_session *tls, void *secret, size_t secret_len,
+ void *out, size_t out_len, ... ) {
+ va_list seeds;
+ va_list tmp;
+ size_t subsecret_len;
+ void *md5_secret;
+ void *sha1_secret;
+ uint8_t buf[out_len];
+ unsigned int i;
+
+ va_start ( seeds, out_len );
+
+ if ( tls->version >= TLS_VERSION_TLS_1_2 ) {
+ /* Use P_SHA256 for TLSv1.2 and later */
+ tls_p_hash_va ( tls, &sha256_algorithm, secret, secret_len,
+ out, out_len, seeds );
+ } else {
+ /* Use combination of P_MD5 and P_SHA-1 for TLSv1.1
+ * and earlier
+ */
+
+ /* Split secret into two, with an overlap of up to one byte */
+ subsecret_len = ( ( secret_len + 1 ) / 2 );
+ md5_secret = secret;
+ sha1_secret = ( secret + secret_len - subsecret_len );
+
+ /* Calculate MD5 portion */
+ va_copy ( tmp, seeds );
+ tls_p_hash_va ( tls, &md5_algorithm, md5_secret,
+ subsecret_len, out, out_len, seeds );
+ va_end ( tmp );
+
+ /* Calculate SHA1 portion */
+ va_copy ( tmp, seeds );
+ tls_p_hash_va ( tls, &sha1_algorithm, sha1_secret,
+ subsecret_len, buf, out_len, seeds );
+ va_end ( tmp );
+
+ /* XOR the two portions together into the final output buffer */
+ for ( i = 0 ; i < out_len ; i++ )
+ *( ( uint8_t * ) out + i ) ^= buf[i];
+ }
+
+ va_end ( seeds );
+}
+
+/**
+ * Generate secure pseudo-random data
+ *
+ * @v secret Secret
+ * @v secret_len Length of secret
+ * @v out Output buffer
+ * @v out_len Length of output buffer
+ * @v label String literal label
+ * @v ... ( data, len ) pairs of seed data
+ */
+#define tls_prf_label( tls, secret, secret_len, out, out_len, label, ... ) \
+ tls_prf ( (tls), (secret), (secret_len), (out), (out_len), \
+ label, ( sizeof ( label ) - 1 ), __VA_ARGS__, NULL )
+
+/******************************************************************************
+ *
+ * Secret management
+ *
+ ******************************************************************************
+ */
+
+/**
+ * Generate master secret
+ *
+ * @v tls TLS session
+ *
+ * The pre-master secret and the client and server random values must
+ * already be known.
+ */
+static void tls_generate_master_secret ( struct tls_session *tls ) {
+ DBGC ( tls, "TLS %p pre-master-secret:\n", tls );
+ DBGC_HD ( tls, &tls->pre_master_secret,
+ sizeof ( tls->pre_master_secret ) );
+ DBGC ( tls, "TLS %p client random bytes:\n", tls );
+ DBGC_HD ( tls, &tls->client_random, sizeof ( tls->client_random ) );
+ DBGC ( tls, "TLS %p server random bytes:\n", tls );
+ DBGC_HD ( tls, &tls->server_random, sizeof ( tls->server_random ) );
+
+ tls_prf_label ( tls, &tls->pre_master_secret,
+ sizeof ( tls->pre_master_secret ),
+ &tls->master_secret, sizeof ( tls->master_secret ),
+ "master secret",
+ &tls->client_random, sizeof ( tls->client_random ),
+ &tls->server_random, sizeof ( tls->server_random ) );
+
+ DBGC ( tls, "TLS %p generated master secret:\n", tls );
+ DBGC_HD ( tls, &tls->master_secret, sizeof ( tls->master_secret ) );
+}
+
+/**
+ * Generate key material
+ *
+ * @v tls TLS session
+ *
+ * The master secret must already be known.
+ */
+static int tls_generate_keys ( struct tls_session *tls ) {
+ struct tls_cipherspec *tx_cipherspec = &tls->tx_cipherspec_pending;
+ struct tls_cipherspec *rx_cipherspec = &tls->rx_cipherspec_pending;
+ size_t hash_size = tx_cipherspec->suite->digest->digestsize;
+ size_t key_size = tx_cipherspec->suite->key_len;
+ size_t iv_size = tx_cipherspec->suite->cipher->blocksize;
+ size_t total = ( 2 * ( hash_size + key_size + iv_size ) );
+ uint8_t key_block[total];
+ uint8_t *key;
+ int rc;
+
+ /* Generate key block */
+ tls_prf_label ( tls, &tls->master_secret, sizeof ( tls->master_secret ),
+ key_block, sizeof ( key_block ), "key expansion",
+ &tls->server_random, sizeof ( tls->server_random ),
+ &tls->client_random, sizeof ( tls->client_random ) );
+
+ /* Split key block into portions */
+ key = key_block;
+
+ /* TX MAC secret */
+ memcpy ( tx_cipherspec->mac_secret, key, hash_size );
+ DBGC ( tls, "TLS %p TX MAC secret:\n", tls );
+ DBGC_HD ( tls, key, hash_size );
+ key += hash_size;
+
+ /* RX MAC secret */
+ memcpy ( rx_cipherspec->mac_secret, key, hash_size );
+ DBGC ( tls, "TLS %p RX MAC secret:\n", tls );
+ DBGC_HD ( tls, key, hash_size );
+ key += hash_size;
+
+ /* TX key */
+ if ( ( rc = cipher_setkey ( tx_cipherspec->suite->cipher,
+ tx_cipherspec->cipher_ctx,
+ key, key_size ) ) != 0 ) {
+ DBGC ( tls, "TLS %p could not set TX key: %s\n",
+ tls, strerror ( rc ) );
+ return rc;
+ }
+ DBGC ( tls, "TLS %p TX key:\n", tls );
+ DBGC_HD ( tls, key, key_size );
+ key += key_size;
+
+ /* RX key */
+ if ( ( rc = cipher_setkey ( rx_cipherspec->suite->cipher,
+ rx_cipherspec->cipher_ctx,
+ key, key_size ) ) != 0 ) {
+ DBGC ( tls, "TLS %p could not set TX key: %s\n",
+ tls, strerror ( rc ) );
+ return rc;
+ }
+ DBGC ( tls, "TLS %p RX key:\n", tls );
+ DBGC_HD ( tls, key, key_size );
+ key += key_size;
+
+ /* TX initialisation vector */
+ cipher_setiv ( tx_cipherspec->suite->cipher,
+ tx_cipherspec->cipher_ctx, key );
+ DBGC ( tls, "TLS %p TX IV:\n", tls );
+ DBGC_HD ( tls, key, iv_size );
+ key += iv_size;
+
+ /* RX initialisation vector */
+ cipher_setiv ( rx_cipherspec->suite->cipher,
+ rx_cipherspec->cipher_ctx, key );
+ DBGC ( tls, "TLS %p RX IV:\n", tls );
+ DBGC_HD ( tls, key, iv_size );
+ key += iv_size;
+
+ assert ( ( key_block + total ) == key );
+
+ return 0;
+}
+
+/******************************************************************************
+ *
+ * Cipher suite management
+ *
+ ******************************************************************************
+ */
+
+/** Null cipher suite */
+struct tls_cipher_suite tls_cipher_suite_null = {
+ .pubkey = &pubkey_null,
+ .cipher = &cipher_null,
+ .digest = &digest_null,
+};
+
+/** Supported cipher suites, in order of preference */
+struct tls_cipher_suite tls_cipher_suites[] = {
+ {
+ .code = htons ( TLS_RSA_WITH_AES_256_CBC_SHA256 ),
+ .key_len = ( 256 / 8 ),
+ .pubkey = &rsa_algorithm,
+ .cipher = &aes_cbc_algorithm,
+ .digest = &sha256_algorithm,
+ },
+ {
+ .code = htons ( TLS_RSA_WITH_AES_128_CBC_SHA256 ),
+ .key_len = ( 128 / 8 ),
+ .pubkey = &rsa_algorithm,
+ .cipher = &aes_cbc_algorithm,
+ .digest = &sha256_algorithm,
+ },
+ {
+ .code = htons ( TLS_RSA_WITH_AES_256_CBC_SHA ),
+ .key_len = ( 256 / 8 ),
+ .pubkey = &rsa_algorithm,
+ .cipher = &aes_cbc_algorithm,
+ .digest = &sha1_algorithm,
+ },
+ {
+ .code = htons ( TLS_RSA_WITH_AES_128_CBC_SHA ),
+ .key_len = ( 128 / 8 ),
+ .pubkey = &rsa_algorithm,
+ .cipher = &aes_cbc_algorithm,
+ .digest = &sha1_algorithm,
+ },
+};
+
+/** Number of supported cipher suites */
+#define TLS_NUM_CIPHER_SUITES \
+ ( sizeof ( tls_cipher_suites ) / sizeof ( tls_cipher_suites[0] ) )
+
+/**
+ * Identify cipher suite
+ *
+ * @v cipher_suite Cipher suite specification
+ * @ret suite Cipher suite, or NULL
+ */
+static struct tls_cipher_suite *
+tls_find_cipher_suite ( unsigned int cipher_suite ) {
+ struct tls_cipher_suite *suite;
+ unsigned int i;
+
+ /* Identify cipher suite */
+ for ( i = 0 ; i < TLS_NUM_CIPHER_SUITES ; i++ ) {
+ suite = &tls_cipher_suites[i];
+ if ( suite->code == cipher_suite )
+ return suite;
+ }
+
+ return NULL;
+}
+
+/**
+ * Clear cipher suite
+ *
+ * @v cipherspec TLS cipher specification
+ */
+static void tls_clear_cipher ( struct tls_session *tls __unused,
+ struct tls_cipherspec *cipherspec ) {
+
+ if ( cipherspec->suite ) {
+ pubkey_final ( cipherspec->suite->pubkey,
+ cipherspec->pubkey_ctx );
+ }
+ free ( cipherspec->dynamic );
+ memset ( cipherspec, 0, sizeof ( *cipherspec ) );
+ cipherspec->suite = &tls_cipher_suite_null;
+}
+
+/**
+ * Set cipher suite
+ *
+ * @v tls TLS session
+ * @v cipherspec TLS cipher specification
+ * @v suite Cipher suite
+ * @ret rc Return status code
+ */
+static int tls_set_cipher ( struct tls_session *tls,
+ struct tls_cipherspec *cipherspec,
+ struct tls_cipher_suite *suite ) {
+ struct pubkey_algorithm *pubkey = suite->pubkey;
+ struct cipher_algorithm *cipher = suite->cipher;
+ struct digest_algorithm *digest = suite->digest;
+ size_t total;
+ void *dynamic;
+
+ /* Clear out old cipher contents, if any */
+ tls_clear_cipher ( tls, cipherspec );
+
+ /* Allocate dynamic storage */
+ total = ( pubkey->ctxsize + 2 * cipher->ctxsize + digest->digestsize );
+ dynamic = zalloc ( total );
+ if ( ! dynamic ) {
+ DBGC ( tls, "TLS %p could not allocate %zd bytes for crypto "
+ "context\n", tls, total );
+ return -ENOMEM_CONTEXT;
+ }
+
+ /* Assign storage */
+ cipherspec->dynamic = dynamic;
+ cipherspec->pubkey_ctx = dynamic; dynamic += pubkey->ctxsize;
+ cipherspec->cipher_ctx = dynamic; dynamic += cipher->ctxsize;
+ cipherspec->cipher_next_ctx = dynamic; dynamic += cipher->ctxsize;
+ cipherspec->mac_secret = dynamic; dynamic += digest->digestsize;
+ assert ( ( cipherspec->dynamic + total ) == dynamic );
+
+ /* Store parameters */
+ cipherspec->suite = suite;
+
+ return 0;
+}
+
+/**
+ * Select next cipher suite
+ *
+ * @v tls TLS session
+ * @v cipher_suite Cipher suite specification
+ * @ret rc Return status code
+ */
+static int tls_select_cipher ( struct tls_session *tls,
+ unsigned int cipher_suite ) {
+ struct tls_cipher_suite *suite;
+ int rc;
+
+ /* Identify cipher suite */
+ suite = tls_find_cipher_suite ( cipher_suite );
+ if ( ! suite ) {
+ DBGC ( tls, "TLS %p does not support cipher %04x\n",
+ tls, ntohs ( cipher_suite ) );
+ return -ENOTSUP_CIPHER;
+ }
+
+ /* Set ciphers */
+ if ( ( rc = tls_set_cipher ( tls, &tls->tx_cipherspec_pending,
+ suite ) ) != 0 )
+ return rc;
+ if ( ( rc = tls_set_cipher ( tls, &tls->rx_cipherspec_pending,
+ suite ) ) != 0 )
+ return rc;
+
+ DBGC ( tls, "TLS %p selected %s-%s-%d-%s\n", tls, suite->pubkey->name,
+ suite->cipher->name, ( suite->key_len * 8 ),
+ suite->digest->name );
+
+ return 0;
+}
+
+/**
+ * Activate next cipher suite
+ *
+ * @v tls TLS session
+ * @v pending Pending cipher specification
+ * @v active Active cipher specification to replace
+ * @ret rc Return status code
+ */
+static int tls_change_cipher ( struct tls_session *tls,
+ struct tls_cipherspec *pending,
+ struct tls_cipherspec *active ) {
+
+ /* Sanity check */
+ if ( pending->suite == &tls_cipher_suite_null ) {
+ DBGC ( tls, "TLS %p refusing to use null cipher\n", tls );
+ return -ENOTSUP_NULL;
+ }
+
+ tls_clear_cipher ( tls, active );
+ memswap ( active, pending, sizeof ( *active ) );
+ return 0;
+}
+
+/******************************************************************************
+ *
+ * Signature and hash algorithms
+ *
+ ******************************************************************************
+ */
+
+/** Supported signature and hash algorithms
+ *
+ * Note that the default (TLSv1.1 and earlier) algorithm using
+ * MD5+SHA1 is never explicitly specified.
+ */
+struct tls_signature_hash_algorithm tls_signature_hash_algorithms[] = {
+ {
+ .code = {
+ .signature = TLS_RSA_ALGORITHM,
+ .hash = TLS_SHA256_ALGORITHM,
+ },
+ .pubkey = &rsa_algorithm,
+ .digest = &sha256_algorithm,
+ },
+};
+
+/** Number of supported signature and hash algorithms */
+#define TLS_NUM_SIG_HASH_ALGORITHMS \
+ ( sizeof ( tls_signature_hash_algorithms ) / \
+ sizeof ( tls_signature_hash_algorithms[0] ) )
+
+/**
+ * Find TLS signature and hash algorithm
+ *
+ * @v pubkey Public-key algorithm
+ * @v digest Digest algorithm
+ * @ret sig_hash Signature and hash algorithm, or NULL
+ */
+static struct tls_signature_hash_algorithm *
+tls_signature_hash_algorithm ( struct pubkey_algorithm *pubkey,
+ struct digest_algorithm *digest ) {
+ struct tls_signature_hash_algorithm *sig_hash;
+ unsigned int i;
+
+ /* Identify signature and hash algorithm */
+ for ( i = 0 ; i < TLS_NUM_SIG_HASH_ALGORITHMS ; i++ ) {
+ sig_hash = &tls_signature_hash_algorithms[i];
+ if ( ( sig_hash->pubkey == pubkey ) &&
+ ( sig_hash->digest == digest ) ) {
+ return sig_hash;
+ }
+ }
+
+ return NULL;
+}
+
+/******************************************************************************
+ *
+ * Handshake verification
+ *
+ ******************************************************************************
+ */
+
+/**
+ * Add handshake record to verification hash
+ *
+ * @v tls TLS session
+ * @v data Handshake record
+ * @v len Length of handshake record
+ */
+static void tls_add_handshake ( struct tls_session *tls,
+ const void *data, size_t len ) {
+
+ digest_update ( &md5_sha1_algorithm, tls->handshake_md5_sha1_ctx,
+ data, len );
+ digest_update ( &sha256_algorithm, tls->handshake_sha256_ctx,
+ data, len );
+}
+
+/**
+ * Calculate handshake verification hash
+ *
+ * @v tls TLS session
+ * @v out Output buffer
+ *
+ * Calculates the MD5+SHA1 or SHA256 digest over all handshake
+ * messages seen so far.
+ */
+static void tls_verify_handshake ( struct tls_session *tls, void *out ) {
+ struct digest_algorithm *digest = tls->handshake_digest;
+ uint8_t ctx[ digest->ctxsize ];
+
+ memcpy ( ctx, tls->handshake_ctx, sizeof ( ctx ) );
+ digest_final ( digest, ctx, out );
+}
+
+/******************************************************************************
+ *
+ * Record handling
+ *
+ ******************************************************************************
+ */
+
+/**
+ * Resume TX state machine
+ *
+ * @v tls TLS session
+ */
+static void tls_tx_resume ( struct tls_session *tls ) {
+ process_add ( &tls->process );
+}
+
+/**
+ * Transmit Handshake record
+ *
+ * @v tls TLS session
+ * @v data Plaintext record
+ * @v len Length of plaintext record
+ * @ret rc Return status code
+ */
+static int tls_send_handshake ( struct tls_session *tls,
+ void *data, size_t len ) {
+
+ /* Add to handshake digest */
+ tls_add_handshake ( tls, data, len );
+
+ /* Send record */
+ return tls_send_plaintext ( tls, TLS_TYPE_HANDSHAKE, data, len );
+}
+
+/**
+ * Transmit Client Hello record
+ *
+ * @v tls TLS session
+ * @ret rc Return status code
+ */
+static int tls_send_client_hello ( struct tls_session *tls ) {
+ struct {
+ uint32_t type_length;
+ uint16_t version;
+ uint8_t random[32];
+ uint8_t session_id_len;
+ uint16_t cipher_suite_len;
+ uint16_t cipher_suites[TLS_NUM_CIPHER_SUITES];
+ uint8_t compression_methods_len;
+ uint8_t compression_methods[1];
+ uint16_t extensions_len;
+ struct {
+ uint16_t server_name_type;
+ uint16_t server_name_len;
+ struct {
+ uint16_t len;
+ struct {
+ uint8_t type;
+ uint16_t len;
+ uint8_t name[ strlen ( tls->name ) ];
+ } __attribute__ (( packed )) list[1];
+ } __attribute__ (( packed )) server_name;
+ uint16_t max_fragment_length_type;
+ uint16_t max_fragment_length_len;
+ struct {
+ uint8_t max;
+ } __attribute__ (( packed )) max_fragment_length;
+ } __attribute__ (( packed )) extensions;
+ } __attribute__ (( packed )) hello;
+ unsigned int i;
+
+ memset ( &hello, 0, sizeof ( hello ) );
+ hello.type_length = ( cpu_to_le32 ( TLS_CLIENT_HELLO ) |
+ htonl ( sizeof ( hello ) -
+ sizeof ( hello.type_length ) ) );
+ hello.version = htons ( tls->version );
+ memcpy ( &hello.random, &tls->client_random, sizeof ( hello.random ) );
+ hello.cipher_suite_len = htons ( sizeof ( hello.cipher_suites ) );
+ for ( i = 0 ; i < TLS_NUM_CIPHER_SUITES ; i++ )
+ hello.cipher_suites[i] = tls_cipher_suites[i].code;
+ hello.compression_methods_len = sizeof ( hello.compression_methods );
+ hello.extensions_len = htons ( sizeof ( hello.extensions ) );
+ hello.extensions.server_name_type = htons ( TLS_SERVER_NAME );
+ hello.extensions.server_name_len
+ = htons ( sizeof ( hello.extensions.server_name ) );
+ hello.extensions.server_name.len
+ = htons ( sizeof ( hello.extensions.server_name.list ) );
+ hello.extensions.server_name.list[0].type = TLS_SERVER_NAME_HOST_NAME;
+ hello.extensions.server_name.list[0].len
+ = htons ( sizeof ( hello.extensions.server_name.list[0].name ));
+ memcpy ( hello.extensions.server_name.list[0].name, tls->name,
+ sizeof ( hello.extensions.server_name.list[0].name ) );
+ hello.extensions.max_fragment_length_type
+ = htons ( TLS_MAX_FRAGMENT_LENGTH );
+ hello.extensions.max_fragment_length_len
+ = htons ( sizeof ( hello.extensions.max_fragment_length ) );
+ hello.extensions.max_fragment_length.max
+ = TLS_MAX_FRAGMENT_LENGTH_4096;
+
+ return tls_send_handshake ( tls, &hello, sizeof ( hello ) );
+}
+
+/**
+ * Transmit Certificate record
+ *
+ * @v tls TLS session
+ * @ret rc Return status code
+ */
+static int tls_send_certificate ( struct tls_session *tls ) {
+ struct {
+ uint32_t type_length;
+ uint8_t length[3];
+ struct {
+ uint8_t length[3];
+ uint8_t data[ tls->cert->raw.len ];
+ } __attribute__ (( packed )) certificates[1];
+ } __attribute__ (( packed )) *certificate;
+ int rc;
+
+ /* Allocate storage for Certificate record (which may be too
+ * large for the stack).
+ */
+ certificate = zalloc ( sizeof ( *certificate ) );
+ if ( ! certificate )
+ return -ENOMEM_CERTIFICATE;
+
+ /* Populate record */
+ certificate->type_length =
+ ( cpu_to_le32 ( TLS_CERTIFICATE ) |
+ htonl ( sizeof ( *certificate ) -
+ sizeof ( certificate->type_length ) ) );
+ tls_set_uint24 ( certificate->length,
+ sizeof ( certificate->certificates ) );
+ tls_set_uint24 ( certificate->certificates[0].length,
+ sizeof ( certificate->certificates[0].data ) );
+ memcpy ( certificate->certificates[0].data,
+ tls->cert->raw.data,
+ sizeof ( certificate->certificates[0].data ) );
+
+ /* Transmit record */
+ rc = tls_send_handshake ( tls, certificate, sizeof ( *certificate ) );
+
+ /* Free record */
+ free ( certificate );
+
+ return rc;
+}
+
+/**
+ * Transmit Client Key Exchange record
+ *
+ * @v tls TLS session
+ * @ret rc Return status code
+ */
+static int tls_send_client_key_exchange ( struct tls_session *tls ) {
+ struct tls_cipherspec *cipherspec = &tls->tx_cipherspec_pending;
+ struct pubkey_algorithm *pubkey = cipherspec->suite->pubkey;
+ size_t max_len = pubkey_max_len ( pubkey, cipherspec->pubkey_ctx );
+ struct {
+ uint32_t type_length;
+ uint16_t encrypted_pre_master_secret_len;
+ uint8_t encrypted_pre_master_secret[max_len];
+ } __attribute__ (( packed )) key_xchg;
+ size_t unused;
+ int len;
+ int rc;
+
+ /* Encrypt pre-master secret using server's public key */
+ memset ( &key_xchg, 0, sizeof ( key_xchg ) );
+ len = pubkey_encrypt ( pubkey, cipherspec->pubkey_ctx,
+ &tls->pre_master_secret,
+ sizeof ( tls->pre_master_secret ),
+ key_xchg.encrypted_pre_master_secret );
+ if ( len < 0 ) {
+ rc = len;
+ DBGC ( tls, "TLS %p could not encrypt pre-master secret: %s\n",
+ tls, strerror ( rc ) );
+ return rc;
+ }
+ unused = ( max_len - len );
+ key_xchg.type_length =
+ ( cpu_to_le32 ( TLS_CLIENT_KEY_EXCHANGE ) |
+ htonl ( sizeof ( key_xchg ) -
+ sizeof ( key_xchg.type_length ) - unused ) );
+ key_xchg.encrypted_pre_master_secret_len =
+ htons ( sizeof ( key_xchg.encrypted_pre_master_secret ) -
+ unused );
+
+ return tls_send_handshake ( tls, &key_xchg,
+ ( sizeof ( key_xchg ) - unused ) );
+}
+
+/**
+ * Transmit Certificate Verify record
+ *
+ * @v tls TLS session
+ * @ret rc Return status code
+ */
+static int tls_send_certificate_verify ( struct tls_session *tls ) {
+ struct digest_algorithm *digest = tls->handshake_digest;
+ struct x509_certificate *cert = tls->cert;
+ struct pubkey_algorithm *pubkey = cert->signature_algorithm->pubkey;
+ uint8_t digest_out[ digest->digestsize ];
+ uint8_t ctx[ pubkey->ctxsize ];
+ struct tls_signature_hash_algorithm *sig_hash = NULL;
+ int rc;
+
+ /* Generate digest to be signed */
+ tls_verify_handshake ( tls, digest_out );
+
+ /* Initialise public-key algorithm */
+ if ( ( rc = pubkey_init ( pubkey, ctx, private_key.data,
+ private_key.len ) ) != 0 ) {
+ DBGC ( tls, "TLS %p could not initialise %s client private "
+ "key: %s\n", tls, pubkey->name, strerror ( rc ) );
+ goto err_pubkey_init;
+ }
+
+ /* TLSv1.2 and later use explicit algorithm identifiers */
+ if ( tls->version >= TLS_VERSION_TLS_1_2 ) {
+ sig_hash = tls_signature_hash_algorithm ( pubkey, digest );
+ if ( ! sig_hash ) {
+ DBGC ( tls, "TLS %p could not identify (%s,%s) "
+ "signature and hash algorithm\n", tls,
+ pubkey->name, digest->name );
+ rc = -ENOTSUP_SIG_HASH;
+ goto err_sig_hash;
+ }
+ }
+
+ /* Generate and transmit record */
+ {
+ size_t max_len = pubkey_max_len ( pubkey, ctx );
+ int use_sig_hash = ( ( sig_hash == NULL ) ? 0 : 1 );
+ struct {
+ uint32_t type_length;
+ struct tls_signature_hash_id sig_hash[use_sig_hash];
+ uint16_t signature_len;
+ uint8_t signature[max_len];
+ } __attribute__ (( packed )) certificate_verify;
+ size_t unused;
+ int len;
+
+ /* Sign digest */
+ len = pubkey_sign ( pubkey, ctx, digest, digest_out,
+ certificate_verify.signature );
+ if ( len < 0 ) {
+ rc = len;
+ DBGC ( tls, "TLS %p could not sign %s digest using %s "
+ "client private key: %s\n", tls, digest->name,
+ pubkey->name, strerror ( rc ) );
+ goto err_pubkey_sign;
+ }
+ unused = ( max_len - len );
+
+ /* Construct Certificate Verify record */
+ certificate_verify.type_length =
+ ( cpu_to_le32 ( TLS_CERTIFICATE_VERIFY ) |
+ htonl ( sizeof ( certificate_verify ) -
+ sizeof ( certificate_verify.type_length ) -
+ unused ) );
+ if ( use_sig_hash ) {
+ memcpy ( &certificate_verify.sig_hash[0],
+ &sig_hash->code,
+ sizeof ( certificate_verify.sig_hash[0] ) );
+ }
+ certificate_verify.signature_len =
+ htons ( sizeof ( certificate_verify.signature ) -
+ unused );
+
+ /* Transmit record */
+ rc = tls_send_handshake ( tls, &certificate_verify,
+ ( sizeof ( certificate_verify ) - unused ) );
+ }
+
+ err_pubkey_sign:
+ err_sig_hash:
+ pubkey_final ( pubkey, ctx );
+ err_pubkey_init:
+ return rc;
+}
+
+/**
+ * Transmit Change Cipher record
+ *
+ * @v tls TLS session
+ * @ret rc Return status code
+ */
+static int tls_send_change_cipher ( struct tls_session *tls ) {
+ static const uint8_t change_cipher[1] = { 1 };
+ return tls_send_plaintext ( tls, TLS_TYPE_CHANGE_CIPHER,
+ change_cipher, sizeof ( change_cipher ) );
+}
+
+/**
+ * Transmit Finished record
+ *
+ * @v tls TLS session
+ * @ret rc Return status code
+ */
+static int tls_send_finished ( struct tls_session *tls ) {
+ struct digest_algorithm *digest = tls->handshake_digest;
+ struct {
+ uint32_t type_length;
+ uint8_t verify_data[12];
+ } __attribute__ (( packed )) finished;
+ uint8_t digest_out[ digest->digestsize ];
+ int rc;
+
+ /* Construct record */
+ memset ( &finished, 0, sizeof ( finished ) );
+ finished.type_length = ( cpu_to_le32 ( TLS_FINISHED ) |
+ htonl ( sizeof ( finished ) -
+ sizeof ( finished.type_length ) ) );
+ tls_verify_handshake ( tls, digest_out );
+ tls_prf_label ( tls, &tls->master_secret, sizeof ( tls->master_secret ),
+ finished.verify_data, sizeof ( finished.verify_data ),
+ "client finished", digest_out, sizeof ( digest_out ) );
+
+ /* Transmit record */
+ if ( ( rc = tls_send_handshake ( tls, &finished,
+ sizeof ( finished ) ) ) != 0 )
+ return rc;
+
+ /* Mark client as finished */
+ pending_put ( &tls->client_negotiation );
+
+ return 0;
+}
+
+/**
+ * Receive new Change Cipher record
+ *
+ * @v tls TLS session
+ * @v data Plaintext record
+ * @v len Length of plaintext record
+ * @ret rc Return status code
+ */
+static int tls_new_change_cipher ( struct tls_session *tls,
+ const void *data, size_t len ) {
+ int rc;
+
+ if ( ( len != 1 ) || ( *( ( uint8_t * ) data ) != 1 ) ) {
+ DBGC ( tls, "TLS %p received invalid Change Cipher\n", tls );
+ DBGC_HD ( tls, data, len );
+ return -EINVAL_CHANGE_CIPHER;
+ }
+
+ if ( ( rc = tls_change_cipher ( tls, &tls->rx_cipherspec_pending,
+ &tls->rx_cipherspec ) ) != 0 ) {
+ DBGC ( tls, "TLS %p could not activate RX cipher: %s\n",
+ tls, strerror ( rc ) );
+ return rc;
+ }
+ tls->rx_seq = ~( ( uint64_t ) 0 );
+
+ return 0;
+}
+
+/**
+ * Receive new Alert record
+ *
+ * @v tls TLS session
+ * @v data Plaintext record
+ * @v len Length of plaintext record
+ * @ret rc Return status code
+ */
+static int tls_new_alert ( struct tls_session *tls, const void *data,
+ size_t len ) {
+ const struct {
+ uint8_t level;
+ uint8_t description;
+ char next[0];
+ } __attribute__ (( packed )) *alert = data;
+ const void *end = alert->next;
+
+ /* Sanity check */
+ if ( end != ( data + len ) ) {
+ DBGC ( tls, "TLS %p received overlength Alert\n", tls );
+ DBGC_HD ( tls, data, len );
+ return -EINVAL_ALERT;
+ }
+
+ switch ( alert->level ) {
+ case TLS_ALERT_WARNING:
+ DBGC ( tls, "TLS %p received warning alert %d\n",
+ tls, alert->description );
+ return 0;
+ case TLS_ALERT_FATAL:
+ DBGC ( tls, "TLS %p received fatal alert %d\n",
+ tls, alert->description );
+ return -EPERM_ALERT;
+ default:
+ DBGC ( tls, "TLS %p received unknown alert level %d"
+ "(alert %d)\n", tls, alert->level, alert->description );
+ return -EIO_ALERT;
+ }
+}
+
+/**
+ * Receive new Server Hello handshake record
+ *
+ * @v tls TLS session
+ * @v data Plaintext handshake record
+ * @v len Length of plaintext handshake record
+ * @ret rc Return status code
+ */
+static int tls_new_server_hello ( struct tls_session *tls,
+ const void *data, size_t len ) {
+ const struct {
+ uint16_t version;
+ uint8_t random[32];
+ uint8_t session_id_len;
+ char next[0];
+ } __attribute__ (( packed )) *hello_a = data;
+ const struct {
+ uint8_t session_id[hello_a->session_id_len];
+ uint16_t cipher_suite;
+ uint8_t compression_method;
+ char next[0];
+ } __attribute__ (( packed )) *hello_b = ( void * ) &hello_a->next;
+ const void *end = hello_b->next;
+ uint16_t version;
+ int rc;
+
+ /* Sanity check */
+ if ( end > ( data + len ) ) {
+ DBGC ( tls, "TLS %p received underlength Server Hello\n", tls );
+ DBGC_HD ( tls, data, len );
+ return -EINVAL_HELLO;
+ }
+
+ /* Check and store protocol version */
+ version = ntohs ( hello_a->version );
+ if ( version < TLS_VERSION_TLS_1_0 ) {
+ DBGC ( tls, "TLS %p does not support protocol version %d.%d\n",
+ tls, ( version >> 8 ), ( version & 0xff ) );
+ return -ENOTSUP_VERSION;
+ }
+ if ( version > tls->version ) {
+ DBGC ( tls, "TLS %p server attempted to illegally upgrade to "
+ "protocol version %d.%d\n",
+ tls, ( version >> 8 ), ( version & 0xff ) );
+ return -EPROTO_VERSION;
+ }
+ tls->version = version;
+ DBGC ( tls, "TLS %p using protocol version %d.%d\n",
+ tls, ( version >> 8 ), ( version & 0xff ) );
+
+ /* Use MD5+SHA1 digest algorithm for handshake verification
+ * for versions earlier than TLSv1.2.
+ */
+ if ( tls->version < TLS_VERSION_TLS_1_2 ) {
+ tls->handshake_digest = &md5_sha1_algorithm;
+ tls->handshake_ctx = tls->handshake_md5_sha1_ctx;
+ }
+
+ /* Copy out server random bytes */
+ memcpy ( &tls->server_random, &hello_a->random,
+ sizeof ( tls->server_random ) );
+
+ /* Select cipher suite */
+ if ( ( rc = tls_select_cipher ( tls, hello_b->cipher_suite ) ) != 0 )
+ return rc;
+
+ /* Generate secrets */
+ tls_generate_master_secret ( tls );
+ if ( ( rc = tls_generate_keys ( tls ) ) != 0 )
+ return rc;
+
+ return 0;
+}
+
+/**
+ * Parse certificate chain
+ *
+ * @v tls TLS session
+ * @v data Certificate chain
+ * @v len Length of certificate chain
+ * @ret rc Return status code
+ */
+static int tls_parse_chain ( struct tls_session *tls,
+ const void *data, size_t len ) {
+ const void *end = ( data + len );
+ const struct {
+ uint8_t length[3];
+ uint8_t data[0];
+ } __attribute__ (( packed )) *certificate;
+ size_t certificate_len;
+ struct x509_certificate *cert;
+ const void *next;
+ int rc;
+
+ /* Free any existing certificate chain */
+ x509_chain_put ( tls->chain );
+ tls->chain = NULL;
+
+ /* Create certificate chain */
+ tls->chain = x509_alloc_chain();
+ if ( ! tls->chain ) {
+ rc = -ENOMEM_CHAIN;
+ goto err_alloc_chain;
+ }
+
+ /* Add certificates to chain */
+ while ( data < end ) {
+
+ /* Extract raw certificate data */
+ certificate = data;
+ certificate_len = tls_uint24 ( certificate->length );
+ next = ( certificate->data + certificate_len );
+ if ( next > end ) {
+ DBGC ( tls, "TLS %p overlength certificate:\n", tls );
+ DBGC_HDA ( tls, 0, data, ( end - data ) );
+ rc = -EINVAL_CERTIFICATE;
+ goto err_overlength;
+ }
+
+ /* Add certificate to chain */
+ if ( ( rc = x509_append_raw ( tls->chain, certificate->data,
+ certificate_len ) ) != 0 ) {
+ DBGC ( tls, "TLS %p could not append certificate: %s\n",
+ tls, strerror ( rc ) );
+ DBGC_HDA ( tls, 0, data, ( end - data ) );
+ goto err_parse;
+ }
+ cert = x509_last ( tls->chain );
+ DBGC ( tls, "TLS %p found certificate %s\n",
+ tls, x509_name ( cert ) );
+
+ /* Move to next certificate in list */
+ data = next;
+ }
+
+ return 0;
+
+ err_parse:
+ err_overlength:
+ x509_chain_put ( tls->chain );
+ tls->chain = NULL;
+ err_alloc_chain:
+ return rc;
+}
+
+/**
+ * Receive new Certificate handshake record
+ *
+ * @v tls TLS session
+ * @v data Plaintext handshake record
+ * @v len Length of plaintext handshake record
+ * @ret rc Return status code
+ */
+static int tls_new_certificate ( struct tls_session *tls,
+ const void *data, size_t len ) {
+ const struct {
+ uint8_t length[3];
+ uint8_t certificates[0];
+ } __attribute__ (( packed )) *certificate = data;
+ size_t certificates_len = tls_uint24 ( certificate->length );
+ const void *end = ( certificate->certificates + certificates_len );
+ int rc;
+
+ /* Sanity check */
+ if ( end != ( data + len ) ) {
+ DBGC ( tls, "TLS %p received overlength Server Certificate\n",
+ tls );
+ DBGC_HD ( tls, data, len );
+ return -EINVAL_CERTIFICATES;
+ }
+
+ /* Parse certificate chain */
+ if ( ( rc = tls_parse_chain ( tls, certificate->certificates,
+ certificates_len ) ) != 0 )
+ return rc;
+
+ return 0;
+}
+
+/**
+ * Receive new Certificate Request handshake record
+ *
+ * @v tls TLS session
+ * @v data Plaintext handshake record
+ * @v len Length of plaintext handshake record
+ * @ret rc Return status code
+ */
+static int tls_new_certificate_request ( struct tls_session *tls,
+ const void *data __unused,
+ size_t len __unused ) {
+
+ /* We can only send a single certificate, so there is no point
+ * in parsing the Certificate Request.
+ */
+
+ /* Free any existing client certificate */
+ x509_put ( tls->cert );
+
+ /* Determine client certificate to be sent */
+ tls->cert = certstore_find_key ( &private_key );
+ if ( ! tls->cert ) {
+ DBGC ( tls, "TLS %p could not find certificate corresponding "
+ "to private key\n", tls );
+ return -EPERM_CLIENT_CERT;
+ }
+ x509_get ( tls->cert );
+ DBGC ( tls, "TLS %p sending client certificate %s\n",
+ tls, x509_name ( tls->cert ) );
+
+ return 0;
+}
+
+/**
+ * Receive new Server Hello Done handshake record
+ *
+ * @v tls TLS session
+ * @v data Plaintext handshake record
+ * @v len Length of plaintext handshake record
+ * @ret rc Return status code
+ */
+static int tls_new_server_hello_done ( struct tls_session *tls,
+ const void *data, size_t len ) {
+ const struct {
+ char next[0];
+ } __attribute__ (( packed )) *hello_done = data;
+ const void *end = hello_done->next;
+ int rc;
+
+ /* Sanity check */
+ if ( end != ( data + len ) ) {
+ DBGC ( tls, "TLS %p received overlength Server Hello Done\n",
+ tls );
+ DBGC_HD ( tls, data, len );
+ return -EINVAL_HELLO_DONE;
+ }
+
+ /* Begin certificate validation */
+ if ( ( rc = create_validator ( &tls->validator, tls->chain ) ) != 0 ) {
+ DBGC ( tls, "TLS %p could not start certificate validation: "
+ "%s\n", tls, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Receive new Finished handshake record
+ *
+ * @v tls TLS session
+ * @v data Plaintext handshake record
+ * @v len Length of plaintext handshake record
+ * @ret rc Return status code
+ */
+static int tls_new_finished ( struct tls_session *tls,
+ const void *data, size_t len ) {
+ struct digest_algorithm *digest = tls->handshake_digest;
+ const struct {
+ uint8_t verify_data[12];
+ char next[0];
+ } __attribute__ (( packed )) *finished = data;
+ const void *end = finished->next;
+ uint8_t digest_out[ digest->digestsize ];
+ uint8_t verify_data[ sizeof ( finished->verify_data ) ];
+
+ /* Sanity check */
+ if ( end != ( data + len ) ) {
+ DBGC ( tls, "TLS %p received overlength Finished\n", tls );
+ DBGC_HD ( tls, data, len );
+ return -EINVAL_FINISHED;
+ }
+
+ /* Verify data */
+ tls_verify_handshake ( tls, digest_out );
+ tls_prf_label ( tls, &tls->master_secret, sizeof ( tls->master_secret ),
+ verify_data, sizeof ( verify_data ), "server finished",
+ digest_out, sizeof ( digest_out ) );
+ if ( memcmp ( verify_data, finished->verify_data,
+ sizeof ( verify_data ) ) != 0 ) {
+ DBGC ( tls, "TLS %p verification failed\n", tls );
+ return -EPERM_VERIFY;
+ }
+
+ /* Mark server as finished */
+ pending_put ( &tls->server_negotiation );
+
+ /* Send notification of a window change */
+ xfer_window_changed ( &tls->plainstream );
+
+ return 0;
+}
+
+/**
+ * Receive new Handshake record
+ *
+ * @v tls TLS session
+ * @v data Plaintext record
+ * @v len Length of plaintext record
+ * @ret rc Return status code
+ */
+static int tls_new_handshake ( struct tls_session *tls,
+ const void *data, size_t len ) {
+ const void *end = ( data + len );
+ int rc;
+
+ while ( data != end ) {
+ const struct {
+ uint8_t type;
+ uint8_t length[3];
+ uint8_t payload[0];
+ } __attribute__ (( packed )) *handshake = data;
+ void *payload = &handshake->payload;
+ size_t payload_len = tls_uint24 ( handshake->length );
+ void *next = ( payload + payload_len );
+
+ /* Sanity check */
+ if ( next > end ) {
+ DBGC ( tls, "TLS %p received overlength Handshake\n",
+ tls );
+ DBGC_HD ( tls, data, len );
+ return -EINVAL_HANDSHAKE;
+ }
+
+ switch ( handshake->type ) {
+ case TLS_SERVER_HELLO:
+ rc = tls_new_server_hello ( tls, payload, payload_len );
+ break;
+ case TLS_CERTIFICATE:
+ rc = tls_new_certificate ( tls, payload, payload_len );
+ break;
+ case TLS_CERTIFICATE_REQUEST:
+ rc = tls_new_certificate_request ( tls, payload,
+ payload_len );
+ break;
+ case TLS_SERVER_HELLO_DONE:
+ rc = tls_new_server_hello_done ( tls, payload,
+ payload_len );
+ break;
+ case TLS_FINISHED:
+ rc = tls_new_finished ( tls, payload, payload_len );
+ break;
+ default:
+ DBGC ( tls, "TLS %p ignoring handshake type %d\n",
+ tls, handshake->type );
+ rc = 0;
+ break;
+ }
+
+ /* Add to handshake digest (except for Hello Requests,
+ * which are explicitly excluded).
+ */
+ if ( handshake->type != TLS_HELLO_REQUEST )
+ tls_add_handshake ( tls, data,
+ sizeof ( *handshake ) +
+ payload_len );
+
+ /* Abort on failure */
+ if ( rc != 0 )
+ return rc;
+
+ /* Move to next handshake record */
+ data = next;
+ }
+
+ return 0;
+}
+
+/**
+ * Receive new record
+ *
+ * @v tls TLS session
+ * @v type Record type
+ * @v rx_data List of received data buffers
+ * @ret rc Return status code
+ */
+static int tls_new_record ( struct tls_session *tls, unsigned int type,
+ struct list_head *rx_data ) {
+ struct io_buffer *iobuf;
+ int ( * handler ) ( struct tls_session *tls, const void *data,
+ size_t len );
+ int rc;
+
+ /* Deliver data records to the plainstream interface */
+ if ( type == TLS_TYPE_DATA ) {
+
+ /* Fail unless we are ready to receive data */
+ if ( ! tls_ready ( tls ) )
+ return -ENOTCONN;
+
+ /* Deliver each I/O buffer in turn */
+ while ( ( iobuf = list_first_entry ( rx_data, struct io_buffer,
+ list ) ) ) {
+ list_del ( &iobuf->list );
+ if ( ( rc = xfer_deliver_iob ( &tls->plainstream,
+ iobuf ) ) != 0 ) {
+ DBGC ( tls, "TLS %p could not deliver data: "
+ "%s\n", tls, strerror ( rc ) );
+ return rc;
+ }
+ }
+ return 0;
+ }
+
+ /* For all other records, merge into a single I/O buffer */
+ iobuf = iob_concatenate ( rx_data );
+ if ( ! iobuf ) {
+ DBGC ( tls, "TLS %p could not concatenate non-data record "
+ "type %d\n", tls, type );
+ return -ENOMEM_RX_CONCAT;
+ }
+
+ /* Determine handler */
+ switch ( type ) {
+ case TLS_TYPE_CHANGE_CIPHER:
+ handler = tls_new_change_cipher;
+ break;
+ case TLS_TYPE_ALERT:
+ handler = tls_new_alert;
+ break;
+ case TLS_TYPE_HANDSHAKE:
+ handler = tls_new_handshake;
+ break;
+ default:
+ /* RFC4346 says that we should just ignore unknown
+ * record types.
+ */
+ handler = NULL;
+ DBGC ( tls, "TLS %p ignoring record type %d\n", tls, type );
+ break;
+ }
+
+ /* Handle record and free I/O buffer */
+ rc = ( handler ? handler ( tls, iobuf->data, iob_len ( iobuf ) ) : 0 );
+ free_iob ( iobuf );
+ return rc;
+}
+
+/******************************************************************************
+ *
+ * Record encryption/decryption
+ *
+ ******************************************************************************
+ */
+
+/**
+ * Initialise HMAC
+ *
+ * @v cipherspec Cipher specification
+ * @v ctx Context
+ * @v seq Sequence number
+ * @v tlshdr TLS header
+ */
+static void tls_hmac_init ( struct tls_cipherspec *cipherspec, void *ctx,
+ uint64_t seq, struct tls_header *tlshdr ) {
+ struct digest_algorithm *digest = cipherspec->suite->digest;
+
+ hmac_init ( digest, ctx, cipherspec->mac_secret, &digest->digestsize );
+ seq = cpu_to_be64 ( seq );
+ hmac_update ( digest, ctx, &seq, sizeof ( seq ) );
+ hmac_update ( digest, ctx, tlshdr, sizeof ( *tlshdr ) );
+}
+
+/**
+ * Update HMAC
+ *
+ * @v cipherspec Cipher specification
+ * @v ctx Context
+ * @v data Data
+ * @v len Length of data
+ */
+static void tls_hmac_update ( struct tls_cipherspec *cipherspec, void *ctx,
+ const void *data, size_t len ) {
+ struct digest_algorithm *digest = cipherspec->suite->digest;
+
+ hmac_update ( digest, ctx, data, len );
+}
+
+/**
+ * Finalise HMAC
+ *
+ * @v cipherspec Cipher specification
+ * @v ctx Context
+ * @v mac HMAC to fill in
+ */
+static void tls_hmac_final ( struct tls_cipherspec *cipherspec, void *ctx,
+ void *hmac ) {
+ struct digest_algorithm *digest = cipherspec->suite->digest;
+
+ hmac_final ( digest, ctx, cipherspec->mac_secret,
+ &digest->digestsize, hmac );
+}
+
+/**
+ * Calculate HMAC
+ *
+ * @v cipherspec Cipher specification
+ * @v seq Sequence number
+ * @v tlshdr TLS header
+ * @v data Data
+ * @v len Length of data
+ * @v mac HMAC to fill in
+ */
+static void tls_hmac ( struct tls_cipherspec *cipherspec,
+ uint64_t seq, struct tls_header *tlshdr,
+ const void *data, size_t len, void *hmac ) {
+ struct digest_algorithm *digest = cipherspec->suite->digest;
+ uint8_t ctx[digest->ctxsize];
+
+ tls_hmac_init ( cipherspec, ctx, seq, tlshdr );
+ tls_hmac_update ( cipherspec, ctx, data, len );
+ tls_hmac_final ( cipherspec, ctx, hmac );
+}
+
+/**
+ * Allocate and assemble stream-ciphered record from data and MAC portions
+ *
+ * @v tls TLS session
+ * @ret data Data
+ * @ret len Length of data
+ * @ret digest MAC digest
+ * @ret plaintext_len Length of plaintext record
+ * @ret plaintext Allocated plaintext record
+ */
+static void * __malloc tls_assemble_stream ( struct tls_session *tls,
+ const void *data, size_t len,
+ void *digest, size_t *plaintext_len ) {
+ size_t mac_len = tls->tx_cipherspec.suite->digest->digestsize;
+ void *plaintext;
+ void *content;
+ void *mac;
+
+ /* Calculate stream-ciphered struct length */
+ *plaintext_len = ( len + mac_len );
+
+ /* Allocate stream-ciphered struct */
+ plaintext = malloc ( *plaintext_len );
+ if ( ! plaintext )
+ return NULL;
+ content = plaintext;
+ mac = ( content + len );
+
+ /* Fill in stream-ciphered struct */
+ memcpy ( content, data, len );
+ memcpy ( mac, digest, mac_len );
+
+ return plaintext;
+}
+
+/**
+ * Allocate and assemble block-ciphered record from data and MAC portions
+ *
+ * @v tls TLS session
+ * @ret data Data
+ * @ret len Length of data
+ * @ret digest MAC digest
+ * @ret plaintext_len Length of plaintext record
+ * @ret plaintext Allocated plaintext record
+ */
+static void * tls_assemble_block ( struct tls_session *tls,
+ const void *data, size_t len,
+ void *digest, size_t *plaintext_len ) {
+ size_t blocksize = tls->tx_cipherspec.suite->cipher->blocksize;
+ size_t mac_len = tls->tx_cipherspec.suite->digest->digestsize;
+ size_t iv_len;
+ size_t padding_len;
+ void *plaintext;
+ void *iv;
+ void *content;
+ void *mac;
+ void *padding;
+
+ /* TLSv1.1 and later use an explicit IV */
+ iv_len = ( ( tls->version >= TLS_VERSION_TLS_1_1 ) ? blocksize : 0 );
+
+ /* Calculate block-ciphered struct length */
+ padding_len = ( ( blocksize - 1 ) & -( iv_len + len + mac_len + 1 ) );
+ *plaintext_len = ( iv_len + len + mac_len + padding_len + 1 );
+
+ /* Allocate block-ciphered struct */
+ plaintext = malloc ( *plaintext_len );
+ if ( ! plaintext )
+ return NULL;
+ iv = plaintext;
+ content = ( iv + iv_len );
+ mac = ( content + len );
+ padding = ( mac + mac_len );
+
+ /* Fill in block-ciphered struct */
+ tls_generate_random ( tls, iv, iv_len );
+ memcpy ( content, data, len );
+ memcpy ( mac, digest, mac_len );
+ memset ( padding, padding_len, ( padding_len + 1 ) );
+
+ return plaintext;
+}
+
+/**
+ * Send plaintext record
+ *
+ * @v tls TLS session
+ * @v type Record type
+ * @v data Plaintext record
+ * @v len Length of plaintext record
+ * @ret rc Return status code
+ */
+static int tls_send_plaintext ( struct tls_session *tls, unsigned int type,
+ const void *data, size_t len ) {
+ struct tls_header plaintext_tlshdr;
+ struct tls_header *tlshdr;
+ struct tls_cipherspec *cipherspec = &tls->tx_cipherspec;
+ struct cipher_algorithm *cipher = cipherspec->suite->cipher;
+ void *plaintext = NULL;
+ size_t plaintext_len;
+ struct io_buffer *ciphertext = NULL;
+ size_t ciphertext_len;
+ size_t mac_len = cipherspec->suite->digest->digestsize;
+ uint8_t mac[mac_len];
+ int rc;
+
+ /* Construct header */
+ plaintext_tlshdr.type = type;
+ plaintext_tlshdr.version = htons ( tls->version );
+ plaintext_tlshdr.length = htons ( len );
+
+ /* Calculate MAC */
+ tls_hmac ( cipherspec, tls->tx_seq, &plaintext_tlshdr, data, len, mac );
+
+ /* Allocate and assemble plaintext struct */
+ if ( is_stream_cipher ( cipher ) ) {
+ plaintext = tls_assemble_stream ( tls, data, len, mac,
+ &plaintext_len );
+ } else {
+ plaintext = tls_assemble_block ( tls, data, len, mac,
+ &plaintext_len );
+ }
+ if ( ! plaintext ) {
+ DBGC ( tls, "TLS %p could not allocate %zd bytes for "
+ "plaintext\n", tls, plaintext_len );
+ rc = -ENOMEM_TX_PLAINTEXT;
+ goto done;
+ }
+
+ DBGC2 ( tls, "Sending plaintext data:\n" );
+ DBGC2_HD ( tls, plaintext, plaintext_len );
+
+ /* Allocate ciphertext */
+ ciphertext_len = ( sizeof ( *tlshdr ) + plaintext_len );
+ ciphertext = xfer_alloc_iob ( &tls->cipherstream, ciphertext_len );
+ if ( ! ciphertext ) {
+ DBGC ( tls, "TLS %p could not allocate %zd bytes for "
+ "ciphertext\n", tls, ciphertext_len );
+ rc = -ENOMEM_TX_CIPHERTEXT;
+ goto done;
+ }
+
+ /* Assemble ciphertext */
+ tlshdr = iob_put ( ciphertext, sizeof ( *tlshdr ) );
+ tlshdr->type = type;
+ tlshdr->version = htons ( tls->version );
+ tlshdr->length = htons ( plaintext_len );
+ memcpy ( cipherspec->cipher_next_ctx, cipherspec->cipher_ctx,
+ cipher->ctxsize );
+ cipher_encrypt ( cipher, cipherspec->cipher_next_ctx, plaintext,
+ iob_put ( ciphertext, plaintext_len ), plaintext_len );
+
+ /* Free plaintext as soon as possible to conserve memory */
+ free ( plaintext );
+ plaintext = NULL;
+
+ /* Send ciphertext */
+ if ( ( rc = xfer_deliver_iob ( &tls->cipherstream,
+ iob_disown ( ciphertext ) ) ) != 0 ) {
+ DBGC ( tls, "TLS %p could not deliver ciphertext: %s\n",
+ tls, strerror ( rc ) );
+ goto done;
+ }
+
+ /* Update TX state machine to next record */
+ tls->tx_seq += 1;
+ memcpy ( tls->tx_cipherspec.cipher_ctx,
+ tls->tx_cipherspec.cipher_next_ctx, cipher->ctxsize );
+
+ done:
+ free ( plaintext );
+ free_iob ( ciphertext );
+ return rc;
+}
+
+/**
+ * Split stream-ciphered record into data and MAC portions
+ *
+ * @v tls TLS session
+ * @v rx_data List of received data buffers
+ * @v mac MAC to fill in
+ * @ret rc Return status code
+ */
+static int tls_split_stream ( struct tls_session *tls,
+ struct list_head *rx_data, void **mac ) {
+ size_t mac_len = tls->rx_cipherspec.suite->digest->digestsize;
+ struct io_buffer *iobuf;
+
+ /* Extract MAC */
+ iobuf = list_last_entry ( rx_data, struct io_buffer, list );
+ assert ( iobuf != NULL );
+ if ( iob_len ( iobuf ) < mac_len ) {
+ DBGC ( tls, "TLS %p received underlength MAC\n", tls );
+ DBGC_HD ( tls, iobuf->data, iob_len ( iobuf ) );
+ return -EINVAL_STREAM;
+ }
+ iob_unput ( iobuf, mac_len );
+ *mac = iobuf->tail;
+
+ return 0;
+}
+
+/**
+ * Split block-ciphered record into data and MAC portions
+ *
+ * @v tls TLS session
+ * @v rx_data List of received data buffers
+ * @v mac MAC to fill in
+ * @ret rc Return status code
+ */
+static int tls_split_block ( struct tls_session *tls,
+ struct list_head *rx_data, void **mac ) {
+ size_t mac_len = tls->rx_cipherspec.suite->digest->digestsize;
+ struct io_buffer *iobuf;
+ size_t iv_len;
+ uint8_t *padding_final;
+ uint8_t *padding;
+ size_t padding_len;
+
+ /* TLSv1.1 and later use an explicit IV */
+ iobuf = list_first_entry ( rx_data, struct io_buffer, list );
+ iv_len = ( ( tls->version >= TLS_VERSION_TLS_1_1 ) ?
+ tls->rx_cipherspec.suite->cipher->blocksize : 0 );
+ if ( iob_len ( iobuf ) < iv_len ) {
+ DBGC ( tls, "TLS %p received underlength IV\n", tls );
+ DBGC_HD ( tls, iobuf->data, iob_len ( iobuf ) );
+ return -EINVAL_BLOCK;
+ }
+ iob_pull ( iobuf, iv_len );
+
+ /* Extract and verify padding */
+ iobuf = list_last_entry ( rx_data, struct io_buffer, list );
+ padding_final = ( iobuf->tail - 1 );
+ padding_len = *padding_final;
+ if ( ( padding_len + 1 ) > iob_len ( iobuf ) ) {
+ DBGC ( tls, "TLS %p received underlength padding\n", tls );
+ DBGC_HD ( tls, iobuf->data, iob_len ( iobuf ) );
+ return -EINVAL_BLOCK;
+ }
+ iob_unput ( iobuf, ( padding_len + 1 ) );
+ for ( padding = iobuf->tail ; padding < padding_final ; padding++ ) {
+ if ( *padding != padding_len ) {
+ DBGC ( tls, "TLS %p received bad padding\n", tls );
+ DBGC_HD ( tls, padding, padding_len );
+ return -EINVAL_PADDING;
+ }
+ }
+
+ /* Extract MAC */
+ if ( iob_len ( iobuf ) < mac_len ) {
+ DBGC ( tls, "TLS %p received underlength MAC\n", tls );
+ DBGC_HD ( tls, iobuf->data, iob_len ( iobuf ) );
+ return -EINVAL_BLOCK;
+ }
+ iob_unput ( iobuf, mac_len );
+ *mac = iobuf->tail;
+
+ return 0;
+}
+
+/**
+ * Receive new ciphertext record
+ *
+ * @v tls TLS session
+ * @v tlshdr Record header
+ * @v rx_data List of received data buffers
+ * @ret rc Return status code
+ */
+static int tls_new_ciphertext ( struct tls_session *tls,
+ struct tls_header *tlshdr,
+ struct list_head *rx_data ) {
+ struct tls_header plaintext_tlshdr;
+ struct tls_cipherspec *cipherspec = &tls->rx_cipherspec;
+ struct cipher_algorithm *cipher = cipherspec->suite->cipher;
+ struct digest_algorithm *digest = cipherspec->suite->digest;
+ uint8_t ctx[digest->ctxsize];
+ uint8_t verify_mac[digest->digestsize];
+ struct io_buffer *iobuf;
+ void *mac;
+ size_t len = 0;
+ int rc;
+
+ /* Decrypt the received data */
+ list_for_each_entry ( iobuf, &tls->rx_data, list ) {
+ cipher_decrypt ( cipher, cipherspec->cipher_ctx,
+ iobuf->data, iobuf->data, iob_len ( iobuf ) );
+ }
+
+ /* Split record into content and MAC */
+ if ( is_stream_cipher ( cipher ) ) {
+ if ( ( rc = tls_split_stream ( tls, rx_data, &mac ) ) != 0 )
+ return rc;
+ } else {
+ if ( ( rc = tls_split_block ( tls, rx_data, &mac ) ) != 0 )
+ return rc;
+ }
+
+ /* Calculate total length */
+ DBGC2 ( tls, "Received plaintext data:\n" );
+ list_for_each_entry ( iobuf, rx_data, list ) {
+ DBGC2_HD ( tls, iobuf->data, iob_len ( iobuf ) );
+ len += iob_len ( iobuf );
+ }
+
+ /* Verify MAC */
+ plaintext_tlshdr.type = tlshdr->type;
+ plaintext_tlshdr.version = tlshdr->version;
+ plaintext_tlshdr.length = htons ( len );
+ tls_hmac_init ( cipherspec, ctx, tls->rx_seq, &plaintext_tlshdr );
+ list_for_each_entry ( iobuf, rx_data, list ) {
+ tls_hmac_update ( cipherspec, ctx, iobuf->data,
+ iob_len ( iobuf ) );
+ }
+ tls_hmac_final ( cipherspec, ctx, verify_mac );
+ if ( memcmp ( mac, verify_mac, sizeof ( verify_mac ) ) != 0 ) {
+ DBGC ( tls, "TLS %p failed MAC verification\n", tls );
+ return -EINVAL_MAC;
+ }
+
+ /* Process plaintext record */
+ if ( ( rc = tls_new_record ( tls, tlshdr->type, rx_data ) ) != 0 )
+ return rc;
+
+ return 0;
+}
+
+/******************************************************************************
+ *
+ * Plaintext stream operations
+ *
+ ******************************************************************************
+ */
+
+/**
+ * Check flow control window
+ *
+ * @v tls TLS session
+ * @ret len Length of window
+ */
+static size_t tls_plainstream_window ( struct tls_session *tls ) {
+
+ /* Block window unless we are ready to accept data */
+ if ( ! tls_ready ( tls ) )
+ return 0;
+
+ return xfer_window ( &tls->cipherstream );
+}
+
+/**
+ * Deliver datagram as raw data
+ *
+ * @v tls TLS session
+ * @v iobuf I/O buffer
+ * @v meta Data transfer metadata
+ * @ret rc Return status code
+ */
+static int tls_plainstream_deliver ( struct tls_session *tls,
+ struct io_buffer *iobuf,
+ struct xfer_metadata *meta __unused ) {
+ int rc;
+
+ /* Refuse unless we are ready to accept data */
+ if ( ! tls_ready ( tls ) ) {
+ rc = -ENOTCONN;
+ goto done;
+ }
+
+ if ( ( rc = tls_send_plaintext ( tls, TLS_TYPE_DATA, iobuf->data,
+ iob_len ( iobuf ) ) ) != 0 )
+ goto done;
+
+ done:
+ free_iob ( iobuf );
+ return rc;
+}
+
+/** TLS plaintext stream interface operations */
+static struct interface_operation tls_plainstream_ops[] = {
+ INTF_OP ( xfer_deliver, struct tls_session *, tls_plainstream_deliver ),
+ INTF_OP ( xfer_window, struct tls_session *, tls_plainstream_window ),
+ INTF_OP ( intf_close, struct tls_session *, tls_close ),
+};
+
+/** TLS plaintext stream interface descriptor */
+static struct interface_descriptor tls_plainstream_desc =
+ INTF_DESC_PASSTHRU ( struct tls_session, plainstream,
+ tls_plainstream_ops, cipherstream );
+
+/******************************************************************************
+ *
+ * Ciphertext stream operations
+ *
+ ******************************************************************************
+ */
+
+/**
+ * Handle received TLS header
+ *
+ * @v tls TLS session
+ * @ret rc Returned status code
+ */
+static int tls_newdata_process_header ( struct tls_session *tls ) {
+ size_t data_len = ntohs ( tls->rx_header.length );
+ size_t remaining = data_len;
+ size_t frag_len;
+ struct io_buffer *iobuf;
+ struct io_buffer *tmp;
+ int rc;
+
+ /* Allocate data buffers now that we know the length */
+ assert ( list_empty ( &tls->rx_data ) );
+ while ( remaining ) {
+
+ /* Calculate fragment length. Ensure that no block is
+ * smaller than TLS_RX_MIN_BUFSIZE (by increasing the
+ * allocation length if necessary).
+ */
+ frag_len = remaining;
+ if ( frag_len > TLS_RX_BUFSIZE )
+ frag_len = TLS_RX_BUFSIZE;
+ remaining -= frag_len;
+ if ( remaining < TLS_RX_MIN_BUFSIZE ) {
+ frag_len += remaining;
+ remaining = 0;
+ }
+
+ /* Allocate buffer */
+ iobuf = alloc_iob_raw ( frag_len, TLS_RX_ALIGN, 0 );
+ if ( ! iobuf ) {
+ DBGC ( tls, "TLS %p could not allocate %zd of %zd "
+ "bytes for receive buffer\n", tls,
+ remaining, data_len );
+ rc = -ENOMEM_RX_DATA;
+ goto err;
+ }
+
+ /* Ensure tailroom is exactly what we asked for. This
+ * will result in unaligned I/O buffers when the
+ * fragment length is unaligned, which can happen only
+ * before we switch to using a block cipher.
+ */
+ iob_reserve ( iobuf, ( iob_tailroom ( iobuf ) - frag_len ) );
+
+ /* Add I/O buffer to list */
+ list_add_tail ( &iobuf->list, &tls->rx_data );
+ }
+
+ /* Move to data state */
+ tls->rx_state = TLS_RX_DATA;
+
+ return 0;
+
+ err:
+ list_for_each_entry_safe ( iobuf, tmp, &tls->rx_data, list ) {
+ list_del ( &iobuf->list );
+ free_iob ( iobuf );
+ }
+ return rc;
+}
+
+/**
+ * Handle received TLS data payload
+ *
+ * @v tls TLS session
+ * @ret rc Returned status code
+ */
+static int tls_newdata_process_data ( struct tls_session *tls ) {
+ struct io_buffer *iobuf;
+ int rc;
+
+ /* Move current buffer to end of list */
+ iobuf = list_first_entry ( &tls->rx_data, struct io_buffer, list );
+ list_del ( &iobuf->list );
+ list_add_tail ( &iobuf->list, &tls->rx_data );
+
+ /* Continue receiving data if any space remains */
+ iobuf = list_first_entry ( &tls->rx_data, struct io_buffer, list );
+ if ( iob_tailroom ( iobuf ) )
+ return 0;
+
+ /* Process record */
+ if ( ( rc = tls_new_ciphertext ( tls, &tls->rx_header,
+ &tls->rx_data ) ) != 0 )
+ return rc;
+
+ /* Increment RX sequence number */
+ tls->rx_seq += 1;
+
+ /* Return to header state */
+ assert ( list_empty ( &tls->rx_data ) );
+ tls->rx_state = TLS_RX_HEADER;
+ iob_unput ( &tls->rx_header_iobuf, sizeof ( tls->rx_header ) );
+
+ return 0;
+}
+
+/**
+ * Receive new ciphertext
+ *
+ * @v tls TLS session
+ * @v iobuf I/O buffer
+ * @v meta Data transfer metadat
+ * @ret rc Return status code
+ */
+static int tls_cipherstream_deliver ( struct tls_session *tls,
+ struct io_buffer *iobuf,
+ struct xfer_metadata *xfer __unused ) {
+ size_t frag_len;
+ int ( * process ) ( struct tls_session *tls );
+ struct io_buffer *dest;
+ int rc;
+
+ while ( iob_len ( iobuf ) ) {
+
+ /* Select buffer according to current state */
+ switch ( tls->rx_state ) {
+ case TLS_RX_HEADER:
+ dest = &tls->rx_header_iobuf;
+ process = tls_newdata_process_header;
+ break;
+ case TLS_RX_DATA:
+ dest = list_first_entry ( &tls->rx_data,
+ struct io_buffer, list );
+ assert ( dest != NULL );
+ process = tls_newdata_process_data;
+ break;
+ default:
+ assert ( 0 );
+ rc = -EINVAL_RX_STATE;
+ goto done;
+ }
+
+ /* Copy data portion to buffer */
+ frag_len = iob_len ( iobuf );
+ if ( frag_len > iob_tailroom ( dest ) )
+ frag_len = iob_tailroom ( dest );
+ memcpy ( iob_put ( dest, frag_len ), iobuf->data, frag_len );
+ iob_pull ( iobuf, frag_len );
+
+ /* Process data if buffer is now full */
+ if ( iob_tailroom ( dest ) == 0 ) {
+ if ( ( rc = process ( tls ) ) != 0 ) {
+ tls_close ( tls, rc );
+ goto done;
+ }
+ }
+ }
+ rc = 0;
+
+ done:
+ free_iob ( iobuf );
+ return rc;
+}
+
+/** TLS ciphertext stream interface operations */
+static struct interface_operation tls_cipherstream_ops[] = {
+ INTF_OP ( xfer_deliver, struct tls_session *,
+ tls_cipherstream_deliver ),
+ INTF_OP ( xfer_window_changed, struct tls_session *, tls_tx_resume ),
+ INTF_OP ( intf_close, struct tls_session *, tls_close ),
+};
+
+/** TLS ciphertext stream interface descriptor */
+static struct interface_descriptor tls_cipherstream_desc =
+ INTF_DESC_PASSTHRU ( struct tls_session, cipherstream,
+ tls_cipherstream_ops, plainstream );
+
+/******************************************************************************
+ *
+ * Certificate validator
+ *
+ ******************************************************************************
+ */
+
+/**
+ * Handle certificate validation completion
+ *
+ * @v tls TLS session
+ * @v rc Reason for completion
+ */
+static void tls_validator_done ( struct tls_session *tls, int rc ) {
+ struct tls_cipherspec *cipherspec = &tls->tx_cipherspec_pending;
+ struct pubkey_algorithm *pubkey = cipherspec->suite->pubkey;
+ struct x509_certificate *cert;
+
+ /* Close validator interface */
+ intf_restart ( &tls->validator, rc );
+
+ /* Check for validation failure */
+ if ( rc != 0 ) {
+ DBGC ( tls, "TLS %p certificate validation failed: %s\n",
+ tls, strerror ( rc ) );
+ goto err;
+ }
+ DBGC ( tls, "TLS %p certificate validation succeeded\n", tls );
+
+ /* Extract first certificate */
+ cert = x509_first ( tls->chain );
+ assert ( cert != NULL );
+
+ /* Verify server name */
+ if ( ( rc = x509_check_name ( cert, tls->name ) ) != 0 ) {
+ DBGC ( tls, "TLS %p server certificate does not match %s: %s\n",
+ tls, tls->name, strerror ( rc ) );
+ goto err;
+ }
+
+ /* Initialise public key algorithm */
+ if ( ( rc = pubkey_init ( pubkey, cipherspec->pubkey_ctx,
+ cert->subject.public_key.raw.data,
+ cert->subject.public_key.raw.len ) ) != 0 ) {
+ DBGC ( tls, "TLS %p cannot initialise public key: %s\n",
+ tls, strerror ( rc ) );
+ goto err;
+ }
+
+ /* Schedule Client Key Exchange, Change Cipher, and Finished */
+ tls->tx_pending |= ( TLS_TX_CLIENT_KEY_EXCHANGE |
+ TLS_TX_CHANGE_CIPHER |
+ TLS_TX_FINISHED );
+ if ( tls->cert ) {
+ tls->tx_pending |= ( TLS_TX_CERTIFICATE |
+ TLS_TX_CERTIFICATE_VERIFY );
+ }
+ tls_tx_resume ( tls );
+
+ return;
+
+ err:
+ tls_close ( tls, rc );
+ return;
+}
+
+/** TLS certificate validator interface operations */
+static struct interface_operation tls_validator_ops[] = {
+ INTF_OP ( intf_close, struct tls_session *, tls_validator_done ),
+};
+
+/** TLS certificate validator interface descriptor */
+static struct interface_descriptor tls_validator_desc =
+ INTF_DESC ( struct tls_session, validator, tls_validator_ops );
+
+/******************************************************************************
+ *
+ * Controlling process
+ *
+ ******************************************************************************
+ */
+
+/**
+ * TLS TX state machine
+ *
+ * @v tls TLS session
+ */
+static void tls_tx_step ( struct tls_session *tls ) {
+ int rc;
+
+ /* Wait for cipherstream to become ready */
+ if ( ! xfer_window ( &tls->cipherstream ) )
+ return;
+
+ /* Send first pending transmission */
+ if ( tls->tx_pending & TLS_TX_CLIENT_HELLO ) {
+ /* Send Client Hello */
+ if ( ( rc = tls_send_client_hello ( tls ) ) != 0 ) {
+ DBGC ( tls, "TLS %p could not send Client Hello: %s\n",
+ tls, strerror ( rc ) );
+ goto err;
+ }
+ tls->tx_pending &= ~TLS_TX_CLIENT_HELLO;
+ } else if ( tls->tx_pending & TLS_TX_CERTIFICATE ) {
+ /* Send Certificate */
+ if ( ( rc = tls_send_certificate ( tls ) ) != 0 ) {
+ DBGC ( tls, "TLS %p cold not send Certificate: %s\n",
+ tls, strerror ( rc ) );
+ goto err;
+ }
+ tls->tx_pending &= ~TLS_TX_CERTIFICATE;
+ } else if ( tls->tx_pending & TLS_TX_CLIENT_KEY_EXCHANGE ) {
+ /* Send Client Key Exchange */
+ if ( ( rc = tls_send_client_key_exchange ( tls ) ) != 0 ) {
+ DBGC ( tls, "TLS %p could not send Client Key "
+ "Exchange: %s\n", tls, strerror ( rc ) );
+ goto err;
+ }
+ tls->tx_pending &= ~TLS_TX_CLIENT_KEY_EXCHANGE;
+ } else if ( tls->tx_pending & TLS_TX_CERTIFICATE_VERIFY ) {
+ /* Send Certificate Verify */
+ if ( ( rc = tls_send_certificate_verify ( tls ) ) != 0 ) {
+ DBGC ( tls, "TLS %p could not send Certificate "
+ "Verify: %s\n", tls, strerror ( rc ) );
+ goto err;
+ }
+ tls->tx_pending &= ~TLS_TX_CERTIFICATE_VERIFY;
+ } else if ( tls->tx_pending & TLS_TX_CHANGE_CIPHER ) {
+ /* Send Change Cipher, and then change the cipher in use */
+ if ( ( rc = tls_send_change_cipher ( tls ) ) != 0 ) {
+ DBGC ( tls, "TLS %p could not send Change Cipher: "
+ "%s\n", tls, strerror ( rc ) );
+ goto err;
+ }
+ if ( ( rc = tls_change_cipher ( tls,
+ &tls->tx_cipherspec_pending,
+ &tls->tx_cipherspec )) != 0 ){
+ DBGC ( tls, "TLS %p could not activate TX cipher: "
+ "%s\n", tls, strerror ( rc ) );
+ goto err;
+ }
+ tls->tx_seq = 0;
+ tls->tx_pending &= ~TLS_TX_CHANGE_CIPHER;
+ } else if ( tls->tx_pending & TLS_TX_FINISHED ) {
+ /* Send Finished */
+ if ( ( rc = tls_send_finished ( tls ) ) != 0 ) {
+ DBGC ( tls, "TLS %p could not send Finished: %s\n",
+ tls, strerror ( rc ) );
+ goto err;
+ }
+ tls->tx_pending &= ~TLS_TX_FINISHED;
+ }
+
+ /* Reschedule process if pending transmissions remain */
+ if ( tls->tx_pending )
+ tls_tx_resume ( tls );
+
+ return;
+
+ err:
+ tls_close ( tls, rc );
+}
+
+/** TLS TX process descriptor */
+static struct process_descriptor tls_process_desc =
+ PROC_DESC_ONCE ( struct tls_session, process, tls_tx_step );
+
+/******************************************************************************
+ *
+ * Instantiator
+ *
+ ******************************************************************************
+ */
+
+int add_tls ( struct interface *xfer, const char *name,
+ struct interface **next ) {
+ struct tls_session *tls;
+ int rc;
+
+ /* Allocate and initialise TLS structure */
+ tls = malloc ( sizeof ( *tls ) );
+ if ( ! tls ) {
+ rc = -ENOMEM;
+ goto err_alloc;
+ }
+ memset ( tls, 0, sizeof ( *tls ) );
+ ref_init ( &tls->refcnt, free_tls );
+ tls->name = name;
+ intf_init ( &tls->plainstream, &tls_plainstream_desc, &tls->refcnt );
+ intf_init ( &tls->cipherstream, &tls_cipherstream_desc, &tls->refcnt );
+ intf_init ( &tls->validator, &tls_validator_desc, &tls->refcnt );
+ process_init ( &tls->process, &tls_process_desc, &tls->refcnt );
+ tls->version = TLS_VERSION_TLS_1_2;
+ tls_clear_cipher ( tls, &tls->tx_cipherspec );
+ tls_clear_cipher ( tls, &tls->tx_cipherspec_pending );
+ tls_clear_cipher ( tls, &tls->rx_cipherspec );
+ tls_clear_cipher ( tls, &tls->rx_cipherspec_pending );
+ tls->client_random.gmt_unix_time = time ( NULL );
+ if ( ( rc = tls_generate_random ( tls, &tls->client_random.random,
+ ( sizeof ( tls->client_random.random ) ) ) ) != 0 ) {
+ goto err_random;
+ }
+ tls->pre_master_secret.version = htons ( tls->version );
+ if ( ( rc = tls_generate_random ( tls, &tls->pre_master_secret.random,
+ ( sizeof ( tls->pre_master_secret.random ) ) ) ) != 0 ) {
+ goto err_random;
+ }
+ digest_init ( &md5_sha1_algorithm, tls->handshake_md5_sha1_ctx );
+ digest_init ( &sha256_algorithm, tls->handshake_sha256_ctx );
+ tls->handshake_digest = &sha256_algorithm;
+ tls->handshake_ctx = tls->handshake_sha256_ctx;
+ tls->tx_pending = TLS_TX_CLIENT_HELLO;
+ iob_populate ( &tls->rx_header_iobuf, &tls->rx_header, 0,
+ sizeof ( tls->rx_header ) );
+ INIT_LIST_HEAD ( &tls->rx_data );
+
+ /* Add pending operations for server and client Finished messages */
+ pending_get ( &tls->client_negotiation );
+ pending_get ( &tls->server_negotiation );
+
+ /* Attach to parent interface, mortalise self, and return */
+ intf_plug_plug ( &tls->plainstream, xfer );
+ *next = &tls->cipherstream;
+ ref_put ( &tls->refcnt );
+ return 0;
+
+ err_random:
+ ref_put ( &tls->refcnt );
+ err_alloc:
+ return rc;
+}
diff --git a/qemu/roms/ipxe/src/net/udp.c b/qemu/roms/ipxe/src/net/udp.c
new file mode 100644
index 000000000..76da67ecf
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/udp.c
@@ -0,0 +1,440 @@
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <byteswap.h>
+#include <errno.h>
+#include <ipxe/tcpip.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/xfer.h>
+#include <ipxe/open.h>
+#include <ipxe/uri.h>
+#include <ipxe/netdevice.h>
+#include <ipxe/udp.h>
+
+/** @file
+ *
+ * UDP protocol
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+/**
+ * A UDP connection
+ *
+ */
+struct udp_connection {
+ /** Reference counter */
+ struct refcnt refcnt;
+ /** List of UDP connections */
+ struct list_head list;
+
+ /** Data transfer interface */
+ struct interface xfer;
+
+ /** Local socket address */
+ struct sockaddr_tcpip local;
+ /** Remote socket address */
+ struct sockaddr_tcpip peer;
+};
+
+/**
+ * List of registered UDP connections
+ */
+static LIST_HEAD ( udp_conns );
+
+/* Forward declatations */
+static struct interface_descriptor udp_xfer_desc;
+struct tcpip_protocol udp_protocol __tcpip_protocol;
+
+/**
+ * Check if local UDP port is available
+ *
+ * @v port Local port number
+ * @ret port Local port number, or negative error
+ */
+static int udp_port_available ( int port ) {
+ struct udp_connection *udp;
+
+ list_for_each_entry ( udp, &udp_conns, list ) {
+ if ( udp->local.st_port == htons ( port ) )
+ return -EADDRINUSE;
+ }
+ return port;
+}
+
+/**
+ * Open a UDP connection
+ *
+ * @v xfer Data transfer interface
+ * @v peer Peer socket address, or NULL
+ * @v local Local socket address, or NULL
+ * @v promisc Socket is promiscuous
+ * @ret rc Return status code
+ */
+static int udp_open_common ( struct interface *xfer,
+ struct sockaddr *peer, struct sockaddr *local,
+ int promisc ) {
+ struct sockaddr_tcpip *st_peer = ( struct sockaddr_tcpip * ) peer;
+ struct sockaddr_tcpip *st_local = ( struct sockaddr_tcpip * ) local;
+ struct udp_connection *udp;
+ int port;
+ int rc;
+
+ /* Allocate and initialise structure */
+ udp = zalloc ( sizeof ( *udp ) );
+ if ( ! udp )
+ return -ENOMEM;
+ DBGC ( udp, "UDP %p allocated\n", udp );
+ ref_init ( &udp->refcnt, NULL );
+ intf_init ( &udp->xfer, &udp_xfer_desc, &udp->refcnt );
+ if ( st_peer )
+ memcpy ( &udp->peer, st_peer, sizeof ( udp->peer ) );
+ if ( st_local )
+ memcpy ( &udp->local, st_local, sizeof ( udp->local ) );
+
+ /* Bind to local port */
+ if ( ! promisc ) {
+ port = tcpip_bind ( st_local, udp_port_available );
+ if ( port < 0 ) {
+ rc = port;
+ DBGC ( udp, "UDP %p could not bind: %s\n",
+ udp, strerror ( rc ) );
+ goto err;
+ }
+ udp->local.st_port = htons ( port );
+ DBGC ( udp, "UDP %p bound to port %d\n",
+ udp, ntohs ( udp->local.st_port ) );
+ }
+
+ /* Attach parent interface, transfer reference to connection
+ * list and return
+ */
+ intf_plug_plug ( &udp->xfer, xfer );
+ list_add ( &udp->list, &udp_conns );
+ return 0;
+
+ err:
+ ref_put ( &udp->refcnt );
+ return rc;
+}
+
+/**
+ * Open a UDP connection
+ *
+ * @v xfer Data transfer interface
+ * @v peer Peer socket address
+ * @v local Local socket address, or NULL
+ * @ret rc Return status code
+ */
+int udp_open ( struct interface *xfer, struct sockaddr *peer,
+ struct sockaddr *local ) {
+ return udp_open_common ( xfer, peer, local, 0 );
+}
+
+/**
+ * Open a promiscuous UDP connection
+ *
+ * @v xfer Data transfer interface
+ * @ret rc Return status code
+ *
+ * Promiscuous UDP connections are required in order to support the
+ * PXE API.
+ */
+int udp_open_promisc ( struct interface *xfer ) {
+ return udp_open_common ( xfer, NULL, NULL, 1 );
+}
+
+/**
+ * Close a UDP connection
+ *
+ * @v udp UDP connection
+ * @v rc Reason for close
+ */
+static void udp_close ( struct udp_connection *udp, int rc ) {
+
+ /* Close data transfer interface */
+ intf_shutdown ( &udp->xfer, rc );
+
+ /* Remove from list of connections and drop list's reference */
+ list_del ( &udp->list );
+ ref_put ( &udp->refcnt );
+
+ DBGC ( udp, "UDP %p closed\n", udp );
+}
+
+/**
+ * Transmit data via a UDP connection to a specified address
+ *
+ * @v udp UDP connection
+ * @v iobuf I/O buffer
+ * @v src Source address, or NULL to use default
+ * @v dest Destination address, or NULL to use default
+ * @v netdev Network device, or NULL to use default
+ * @ret rc Return status code
+ */
+static int udp_tx ( struct udp_connection *udp, struct io_buffer *iobuf,
+ struct sockaddr_tcpip *src, struct sockaddr_tcpip *dest,
+ struct net_device *netdev ) {
+ struct udp_header *udphdr;
+ size_t len;
+ int rc;
+
+ /* Check we can accommodate the header */
+ if ( ( rc = iob_ensure_headroom ( iobuf,
+ MAX_LL_NET_HEADER_LEN ) ) != 0 ) {
+ free_iob ( iobuf );
+ return rc;
+ }
+
+ /* Fill in default values if not explicitly provided */
+ if ( ! src )
+ src = &udp->local;
+ if ( ! dest )
+ dest = &udp->peer;
+
+ /* Add the UDP header */
+ udphdr = iob_push ( iobuf, sizeof ( *udphdr ) );
+ len = iob_len ( iobuf );
+ udphdr->dest = dest->st_port;
+ udphdr->src = src->st_port;
+ udphdr->len = htons ( len );
+ udphdr->chksum = 0;
+ udphdr->chksum = tcpip_chksum ( udphdr, len );
+
+ /* Dump debugging information */
+ DBGC2 ( udp, "UDP %p TX %d->%d len %d\n", udp,
+ ntohs ( udphdr->src ), ntohs ( udphdr->dest ),
+ ntohs ( udphdr->len ) );
+
+ /* Send it to the next layer for processing */
+ if ( ( rc = tcpip_tx ( iobuf, &udp_protocol, src, dest, netdev,
+ &udphdr->chksum ) ) != 0 ) {
+ DBGC ( udp, "UDP %p could not transmit packet: %s\n",
+ udp, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Identify UDP connection by local address
+ *
+ * @v local Local address
+ * @ret udp UDP connection, or NULL
+ */
+static struct udp_connection * udp_demux ( struct sockaddr_tcpip *local ) {
+ static const struct sockaddr_tcpip empty_sockaddr = { .pad = { 0, } };
+ struct udp_connection *udp;
+
+ list_for_each_entry ( udp, &udp_conns, list ) {
+ if ( ( ( udp->local.st_family == local->st_family ) ||
+ ( udp->local.st_family == 0 ) ) &&
+ ( ( udp->local.st_port == local->st_port ) ||
+ ( udp->local.st_port == 0 ) ) &&
+ ( ( memcmp ( udp->local.pad, local->pad,
+ sizeof ( udp->local.pad ) ) == 0 ) ||
+ ( memcmp ( udp->local.pad, empty_sockaddr.pad,
+ sizeof ( udp->local.pad ) ) == 0 ) ) ) {
+ return udp;
+ }
+ }
+ return NULL;
+}
+
+/**
+ * Process a received packet
+ *
+ * @v iobuf I/O buffer
+ * @v netdev Network device
+ * @v st_src Partially-filled source address
+ * @v st_dest Partially-filled destination address
+ * @v pshdr_csum Pseudo-header checksum
+ * @ret rc Return status code
+ */
+static int udp_rx ( struct io_buffer *iobuf,
+ struct net_device *netdev __unused,
+ struct sockaddr_tcpip *st_src,
+ struct sockaddr_tcpip *st_dest, uint16_t pshdr_csum ) {
+ struct udp_header *udphdr = iobuf->data;
+ struct udp_connection *udp;
+ struct xfer_metadata meta;
+ size_t ulen;
+ unsigned int csum;
+ int rc = 0;
+
+ /* Sanity check packet */
+ if ( iob_len ( iobuf ) < sizeof ( *udphdr ) ) {
+ DBG ( "UDP packet too short at %zd bytes (min %zd bytes)\n",
+ iob_len ( iobuf ), sizeof ( *udphdr ) );
+
+ rc = -EINVAL;
+ goto done;
+ }
+ ulen = ntohs ( udphdr->len );
+ if ( ulen < sizeof ( *udphdr ) ) {
+ DBG ( "UDP length too short at %zd bytes "
+ "(header is %zd bytes)\n", ulen, sizeof ( *udphdr ) );
+ rc = -EINVAL;
+ goto done;
+ }
+ if ( ulen > iob_len ( iobuf ) ) {
+ DBG ( "UDP length too long at %zd bytes (packet is %zd "
+ "bytes)\n", ulen, iob_len ( iobuf ) );
+ rc = -EINVAL;
+ goto done;
+ }
+ if ( udphdr->chksum ) {
+ csum = tcpip_continue_chksum ( pshdr_csum, iobuf->data, ulen );
+ if ( csum != 0 ) {
+ DBG ( "UDP checksum incorrect (is %04x including "
+ "checksum field, should be 0000)\n", csum );
+ rc = -EINVAL;
+ goto done;
+ }
+ }
+
+ /* Parse parameters from header and strip header */
+ st_src->st_port = udphdr->src;
+ st_dest->st_port = udphdr->dest;
+ udp = udp_demux ( st_dest );
+ iob_unput ( iobuf, ( iob_len ( iobuf ) - ulen ) );
+ iob_pull ( iobuf, sizeof ( *udphdr ) );
+
+ /* Dump debugging information */
+ DBGC2 ( udp, "UDP %p RX %d<-%d len %zd\n", udp,
+ ntohs ( udphdr->dest ), ntohs ( udphdr->src ), ulen );
+
+ /* Ignore if no matching connection found */
+ if ( ! udp ) {
+ DBG ( "No UDP connection listening on port %d\n",
+ ntohs ( udphdr->dest ) );
+ rc = -ENOTCONN;
+ goto done;
+ }
+
+ /* Pass data to application */
+ memset ( &meta, 0, sizeof ( meta ) );
+ meta.src = ( struct sockaddr * ) st_src;
+ meta.dest = ( struct sockaddr * ) st_dest;
+ rc = xfer_deliver ( &udp->xfer, iob_disown ( iobuf ), &meta );
+
+ done:
+ free_iob ( iobuf );
+ return rc;
+}
+
+struct tcpip_protocol udp_protocol __tcpip_protocol = {
+ .name = "UDP",
+ .rx = udp_rx,
+ .tcpip_proto = IP_UDP,
+};
+
+/***************************************************************************
+ *
+ * Data transfer interface
+ *
+ ***************************************************************************
+ */
+
+/**
+ * Allocate I/O buffer for UDP
+ *
+ * @v udp UDP connection
+ * @v len Payload size
+ * @ret iobuf I/O buffer, or NULL
+ */
+static struct io_buffer * udp_xfer_alloc_iob ( struct udp_connection *udp,
+ size_t len ) {
+ struct io_buffer *iobuf;
+
+ iobuf = alloc_iob ( MAX_LL_NET_HEADER_LEN + len );
+ if ( ! iobuf ) {
+ DBGC ( udp, "UDP %p cannot allocate buffer of length %zd\n",
+ udp, len );
+ return NULL;
+ }
+ iob_reserve ( iobuf, MAX_LL_NET_HEADER_LEN );
+ return iobuf;
+}
+
+/**
+ * Deliver datagram as I/O buffer
+ *
+ * @v udp UDP connection
+ * @v iobuf Datagram I/O buffer
+ * @v meta Data transfer metadata
+ * @ret rc Return status code
+ */
+static int udp_xfer_deliver ( struct udp_connection *udp,
+ struct io_buffer *iobuf,
+ struct xfer_metadata *meta ) {
+
+ /* Transmit data, if possible */
+ return udp_tx ( udp, iobuf, ( ( struct sockaddr_tcpip * ) meta->src ),
+ ( ( struct sockaddr_tcpip * ) meta->dest ),
+ meta->netdev );
+}
+
+/** UDP data transfer interface operations */
+static struct interface_operation udp_xfer_operations[] = {
+ INTF_OP ( xfer_deliver, struct udp_connection *, udp_xfer_deliver ),
+ INTF_OP ( xfer_alloc_iob, struct udp_connection *, udp_xfer_alloc_iob ),
+ INTF_OP ( intf_close, struct udp_connection *, udp_close ),
+};
+
+/** UDP data transfer interface descriptor */
+static struct interface_descriptor udp_xfer_desc =
+ INTF_DESC ( struct udp_connection, xfer, udp_xfer_operations );
+
+/***************************************************************************
+ *
+ * Openers
+ *
+ ***************************************************************************
+ */
+
+/** UDP IPv4 socket opener */
+struct socket_opener udp_ipv4_socket_opener __socket_opener = {
+ .semantics = UDP_SOCK_DGRAM,
+ .family = AF_INET,
+ .open = udp_open,
+};
+
+/** UDP IPv6 socket opener */
+struct socket_opener udp_ipv6_socket_opener __socket_opener = {
+ .semantics = UDP_SOCK_DGRAM,
+ .family = AF_INET6,
+ .open = udp_open,
+};
+
+/** Linkage hack */
+int udp_sock_dgram = UDP_SOCK_DGRAM;
+
+/**
+ * Open UDP URI
+ *
+ * @v xfer Data transfer interface
+ * @v uri URI
+ * @ret rc Return status code
+ */
+static int udp_open_uri ( struct interface *xfer, struct uri *uri ) {
+ struct sockaddr_tcpip peer;
+
+ /* Sanity check */
+ if ( ! uri->host )
+ return -EINVAL;
+
+ memset ( &peer, 0, sizeof ( peer ) );
+ peer.st_port = htons ( uri_port ( uri, 0 ) );
+ return xfer_open_named_socket ( xfer, SOCK_DGRAM,
+ ( struct sockaddr * ) &peer,
+ uri->host, NULL );
+}
+
+/** UDP URI opener */
+struct uri_opener udp_uri_opener __uri_opener = {
+ .scheme = "udp",
+ .open = udp_open_uri,
+};
diff --git a/qemu/roms/ipxe/src/net/udp/dhcp.c b/qemu/roms/ipxe/src/net/udp/dhcp.c
new file mode 100644
index 000000000..04fad04c2
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/udp/dhcp.c
@@ -0,0 +1,1446 @@
+/*
+ * Copyright (C) 2006 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <errno.h>
+#include <assert.h>
+#include <byteswap.h>
+#include <ipxe/if_ether.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/netdevice.h>
+#include <ipxe/device.h>
+#include <ipxe/xfer.h>
+#include <ipxe/open.h>
+#include <ipxe/job.h>
+#include <ipxe/retry.h>
+#include <ipxe/tcpip.h>
+#include <ipxe/ip.h>
+#include <ipxe/uuid.h>
+#include <ipxe/timer.h>
+#include <ipxe/settings.h>
+#include <ipxe/dhcp.h>
+#include <ipxe/dhcpopts.h>
+#include <ipxe/dhcppkt.h>
+#include <ipxe/dhcp_arch.h>
+#include <ipxe/features.h>
+
+/** @file
+ *
+ * Dynamic Host Configuration Protocol
+ *
+ */
+
+struct dhcp_session;
+static int dhcp_tx ( struct dhcp_session *dhcp );
+
+/**
+ * DHCP operation types
+ *
+ * This table maps from DHCP message types (i.e. values of the @c
+ * DHCP_MESSAGE_TYPE option) to values of the "op" field within a DHCP
+ * packet.
+ */
+static const uint8_t dhcp_op[] = {
+ [DHCPDISCOVER] = BOOTP_REQUEST,
+ [DHCPOFFER] = BOOTP_REPLY,
+ [DHCPREQUEST] = BOOTP_REQUEST,
+ [DHCPDECLINE] = BOOTP_REQUEST,
+ [DHCPACK] = BOOTP_REPLY,
+ [DHCPNAK] = BOOTP_REPLY,
+ [DHCPRELEASE] = BOOTP_REQUEST,
+ [DHCPINFORM] = BOOTP_REQUEST,
+};
+
+/** Raw option data for options common to all DHCP requests */
+static uint8_t dhcp_request_options_data[] = {
+ DHCP_MESSAGE_TYPE, DHCP_BYTE ( 0 ),
+ DHCP_MAX_MESSAGE_SIZE,
+ DHCP_WORD ( ETH_MAX_MTU - 20 /* IP header */ - 8 /* UDP header */ ),
+ DHCP_CLIENT_ARCHITECTURE, DHCP_ARCH_CLIENT_ARCHITECTURE,
+ DHCP_CLIENT_NDI, DHCP_ARCH_CLIENT_NDI,
+ DHCP_VENDOR_CLASS_ID, DHCP_ARCH_VENDOR_CLASS_ID,
+ DHCP_USER_CLASS_ID, DHCP_STRING ( 'i', 'P', 'X', 'E' ),
+ DHCP_PARAMETER_REQUEST_LIST,
+ DHCP_OPTION ( DHCP_SUBNET_MASK, DHCP_ROUTERS, DHCP_DNS_SERVERS,
+ DHCP_LOG_SERVERS, DHCP_HOST_NAME, DHCP_DOMAIN_NAME,
+ DHCP_ROOT_PATH, DHCP_VENDOR_ENCAP, DHCP_VENDOR_CLASS_ID,
+ DHCP_TFTP_SERVER_NAME, DHCP_BOOTFILE_NAME,
+ DHCP_DOMAIN_SEARCH,
+ 128, 129, 130, 131, 132, 133, 134, 135, /* for PXE */
+ DHCP_EB_ENCAP, DHCP_ISCSI_INITIATOR_IQN ),
+ DHCP_END
+};
+
+/** DHCP server address setting */
+const struct setting dhcp_server_setting __setting ( SETTING_MISC,
+ dhcp-server ) = {
+ .name = "dhcp-server",
+ .description = "DHCP server",
+ .tag = DHCP_SERVER_IDENTIFIER,
+ .type = &setting_type_ipv4,
+};
+
+/**
+ * Most recent DHCP transaction ID
+ *
+ * This is exposed for use by the fakedhcp code when reconstructing
+ * DHCP packets for PXE NBPs.
+ */
+uint32_t dhcp_last_xid;
+
+/**
+ * Name a DHCP packet type
+ *
+ * @v msgtype DHCP message type
+ * @ret string DHCP mesasge type name
+ */
+static inline const char * dhcp_msgtype_name ( unsigned int msgtype ) {
+ switch ( msgtype ) {
+ case DHCPNONE: return "BOOTP"; /* Non-DHCP packet */
+ case DHCPDISCOVER: return "DHCPDISCOVER";
+ case DHCPOFFER: return "DHCPOFFER";
+ case DHCPREQUEST: return "DHCPREQUEST";
+ case DHCPDECLINE: return "DHCPDECLINE";
+ case DHCPACK: return "DHCPACK";
+ case DHCPNAK: return "DHCPNAK";
+ case DHCPRELEASE: return "DHCPRELEASE";
+ case DHCPINFORM: return "DHCPINFORM";
+ default: return "DHCP<invalid>";
+ }
+}
+
+/****************************************************************************
+ *
+ * DHCP session
+ *
+ */
+
+struct dhcp_session;
+
+/** DHCP session state operations */
+struct dhcp_session_state {
+ /** State name */
+ const char *name;
+ /**
+ * Construct transmitted packet
+ *
+ * @v dhcp DHCP session
+ * @v dhcppkt DHCP packet
+ * @v peer Destination address
+ */
+ int ( * tx ) ( struct dhcp_session *dhcp,
+ struct dhcp_packet *dhcppkt,
+ struct sockaddr_in *peer );
+ /** Handle received packet
+ *
+ * @v dhcp DHCP session
+ * @v dhcppkt DHCP packet
+ * @v peer DHCP server address
+ * @v msgtype DHCP message type
+ * @v server_id DHCP server ID
+ */
+ void ( * rx ) ( struct dhcp_session *dhcp,
+ struct dhcp_packet *dhcppkt,
+ struct sockaddr_in *peer,
+ uint8_t msgtype, struct in_addr server_id );
+ /** Handle timer expiry
+ *
+ * @v dhcp DHCP session
+ */
+ void ( * expired ) ( struct dhcp_session *dhcp );
+ /** Transmitted message type */
+ uint8_t tx_msgtype;
+ /** Apply minimum timeout */
+ uint8_t apply_min_timeout;
+};
+
+static struct dhcp_session_state dhcp_state_discover;
+static struct dhcp_session_state dhcp_state_request;
+static struct dhcp_session_state dhcp_state_proxy;
+static struct dhcp_session_state dhcp_state_pxebs;
+
+/** A DHCP session */
+struct dhcp_session {
+ /** Reference counter */
+ struct refcnt refcnt;
+ /** Job control interface */
+ struct interface job;
+ /** Data transfer interface */
+ struct interface xfer;
+
+ /** Network device being configured */
+ struct net_device *netdev;
+ /** Local socket address */
+ struct sockaddr_in local;
+ /** State of the session */
+ struct dhcp_session_state *state;
+ /** Transaction ID (in network-endian order) */
+ uint32_t xid;
+
+ /** Offered IP address */
+ struct in_addr offer;
+ /** DHCP server */
+ struct in_addr server;
+ /** DHCP offer priority */
+ int priority;
+
+ /** ProxyDHCP protocol extensions should be ignored */
+ int no_pxedhcp;
+ /** ProxyDHCP server */
+ struct in_addr proxy_server;
+ /** ProxyDHCP offer */
+ struct dhcp_packet *proxy_offer;
+ /** ProxyDHCP offer priority */
+ int proxy_priority;
+
+ /** PXE Boot Server type */
+ uint16_t pxe_type;
+ /** List of PXE Boot Servers to attempt */
+ struct in_addr *pxe_attempt;
+ /** List of PXE Boot Servers to accept */
+ struct in_addr *pxe_accept;
+
+ /** Retransmission timer */
+ struct retry_timer timer;
+ /** Transmission counter */
+ unsigned int count;
+ /** Start time of the current state (in ticks) */
+ unsigned long start;
+};
+
+/**
+ * Free DHCP session
+ *
+ * @v refcnt Reference counter
+ */
+static void dhcp_free ( struct refcnt *refcnt ) {
+ struct dhcp_session *dhcp =
+ container_of ( refcnt, struct dhcp_session, refcnt );
+
+ netdev_put ( dhcp->netdev );
+ dhcppkt_put ( dhcp->proxy_offer );
+ free ( dhcp );
+}
+
+/**
+ * Mark DHCP session as complete
+ *
+ * @v dhcp DHCP session
+ * @v rc Return status code
+ */
+static void dhcp_finished ( struct dhcp_session *dhcp, int rc ) {
+
+ /* Stop retry timer */
+ stop_timer ( &dhcp->timer );
+
+ /* Shut down interfaces */
+ intf_shutdown ( &dhcp->xfer, rc );
+ intf_shutdown ( &dhcp->job, rc );
+}
+
+/**
+ * Transition to new DHCP session state
+ *
+ * @v dhcp DHCP session
+ * @v state New session state
+ */
+static void dhcp_set_state ( struct dhcp_session *dhcp,
+ struct dhcp_session_state *state ) {
+
+ DBGC ( dhcp, "DHCP %p entering %s state\n", dhcp, state->name );
+ dhcp->state = state;
+ dhcp->start = currticks();
+ stop_timer ( &dhcp->timer );
+ dhcp->timer.min_timeout =
+ ( state->apply_min_timeout ? DHCP_MIN_TIMEOUT : 0 );
+ dhcp->timer.max_timeout = DHCP_MAX_TIMEOUT;
+ start_timer_nodelay ( &dhcp->timer );
+}
+
+/**
+ * Check if DHCP packet contains PXE options
+ *
+ * @v dhcppkt DHCP packet
+ * @ret has_pxeopts DHCP packet contains PXE options
+ *
+ * It is assumed that the packet is already known to contain option 60
+ * set to "PXEClient".
+ */
+static int dhcp_has_pxeopts ( struct dhcp_packet *dhcppkt ) {
+
+ /* Check for a boot filename */
+ if ( dhcppkt_fetch ( dhcppkt, DHCP_BOOTFILE_NAME, NULL, 0 ) > 0 )
+ return 1;
+
+ /* Check for a PXE boot menu */
+ if ( dhcppkt_fetch ( dhcppkt, DHCP_PXE_BOOT_MENU, NULL, 0 ) > 0 )
+ return 1;
+
+ return 0;
+}
+
+/****************************************************************************
+ *
+ * DHCP state machine
+ *
+ */
+
+/**
+ * Construct transmitted packet for DHCP discovery
+ *
+ * @v dhcp DHCP session
+ * @v dhcppkt DHCP packet
+ * @v peer Destination address
+ */
+static int dhcp_discovery_tx ( struct dhcp_session *dhcp,
+ struct dhcp_packet *dhcppkt __unused,
+ struct sockaddr_in *peer ) {
+
+ DBGC ( dhcp, "DHCP %p DHCPDISCOVER\n", dhcp );
+
+ /* Set server address */
+ peer->sin_addr.s_addr = INADDR_BROADCAST;
+ peer->sin_port = htons ( BOOTPS_PORT );
+
+ return 0;
+}
+
+/**
+ * Handle received packet during DHCP discovery
+ *
+ * @v dhcp DHCP session
+ * @v dhcppkt DHCP packet
+ * @v peer DHCP server address
+ * @v msgtype DHCP message type
+ * @v server_id DHCP server ID
+ */
+static void dhcp_discovery_rx ( struct dhcp_session *dhcp,
+ struct dhcp_packet *dhcppkt,
+ struct sockaddr_in *peer, uint8_t msgtype,
+ struct in_addr server_id ) {
+ struct in_addr ip;
+ char vci[9]; /* "PXEClient" */
+ int vci_len;
+ int has_pxeclient;
+ int8_t priority = 0;
+ uint8_t no_pxedhcp = 0;
+ unsigned long elapsed;
+
+ DBGC ( dhcp, "DHCP %p %s from %s:%d", dhcp,
+ dhcp_msgtype_name ( msgtype ), inet_ntoa ( peer->sin_addr ),
+ ntohs ( peer->sin_port ) );
+ if ( server_id.s_addr != peer->sin_addr.s_addr )
+ DBGC ( dhcp, " (%s)", inet_ntoa ( server_id ) );
+
+ /* Identify offered IP address */
+ ip = dhcppkt->dhcphdr->yiaddr;
+ if ( ip.s_addr )
+ DBGC ( dhcp, " for %s", inet_ntoa ( ip ) );
+
+ /* Identify "PXEClient" vendor class */
+ vci_len = dhcppkt_fetch ( dhcppkt, DHCP_VENDOR_CLASS_ID,
+ vci, sizeof ( vci ) );
+ has_pxeclient = ( ( vci_len >= ( int ) sizeof ( vci ) ) &&
+ ( strncmp ( "PXEClient", vci, sizeof (vci) ) == 0 ));
+ if ( has_pxeclient ) {
+ DBGC ( dhcp, "%s",
+ ( dhcp_has_pxeopts ( dhcppkt ) ? " pxe" : " proxy" ) );
+ }
+
+ /* Identify priority */
+ dhcppkt_fetch ( dhcppkt, DHCP_EB_PRIORITY, &priority,
+ sizeof ( priority ) );
+ if ( priority )
+ DBGC ( dhcp, " pri %d", priority );
+
+ /* Identify ignore-PXE flag */
+ dhcppkt_fetch ( dhcppkt, DHCP_EB_NO_PXEDHCP, &no_pxedhcp,
+ sizeof ( no_pxedhcp ) );
+ if ( no_pxedhcp )
+ DBGC ( dhcp, " nopxe" );
+ DBGC ( dhcp, "\n" );
+
+ /* Select as DHCP offer, if applicable */
+ if ( ip.s_addr && ( peer->sin_port == htons ( BOOTPS_PORT ) ) &&
+ ( ( msgtype == DHCPOFFER ) || ( ! msgtype /* BOOTP */ ) ) &&
+ ( priority >= dhcp->priority ) ) {
+ dhcp->offer = ip;
+ dhcp->server = server_id;
+ dhcp->priority = priority;
+ dhcp->no_pxedhcp = no_pxedhcp;
+ }
+
+ /* Select as ProxyDHCP offer, if applicable */
+ if ( server_id.s_addr && has_pxeclient &&
+ ( priority >= dhcp->proxy_priority ) ) {
+ dhcppkt_put ( dhcp->proxy_offer );
+ dhcp->proxy_server = server_id;
+ dhcp->proxy_offer = dhcppkt_get ( dhcppkt );
+ dhcp->proxy_priority = priority;
+ }
+
+ /* We can exit the discovery state when we have a valid
+ * DHCPOFFER, and either:
+ *
+ * o The DHCPOFFER instructs us to ignore ProxyDHCPOFFERs, or
+ * o We have a valid ProxyDHCPOFFER, or
+ * o We have allowed sufficient time for ProxyDHCPOFFERs.
+ */
+
+ /* If we don't yet have a DHCPOFFER, do nothing */
+ if ( ! dhcp->offer.s_addr )
+ return;
+
+ /* If we can't yet transition to DHCPREQUEST, do nothing */
+ elapsed = ( currticks() - dhcp->start );
+ if ( ! ( dhcp->no_pxedhcp || dhcp->proxy_offer ||
+ ( elapsed > PROXYDHCP_MAX_TIMEOUT ) ) )
+ return;
+
+ /* Transition to DHCPREQUEST */
+ dhcp_set_state ( dhcp, &dhcp_state_request );
+}
+
+/**
+ * Handle timer expiry during DHCP discovery
+ *
+ * @v dhcp DHCP session
+ */
+static void dhcp_discovery_expired ( struct dhcp_session *dhcp ) {
+ unsigned long elapsed = ( currticks() - dhcp->start );
+
+ /* Give up waiting for ProxyDHCP before we reach the failure point */
+ if ( dhcp->offer.s_addr && ( elapsed > PROXYDHCP_MAX_TIMEOUT ) ) {
+ dhcp_set_state ( dhcp, &dhcp_state_request );
+ return;
+ }
+
+ /* Otherwise, retransmit current packet */
+ dhcp_tx ( dhcp );
+}
+
+/** DHCP discovery state operations */
+static struct dhcp_session_state dhcp_state_discover = {
+ .name = "discovery",
+ .tx = dhcp_discovery_tx,
+ .rx = dhcp_discovery_rx,
+ .expired = dhcp_discovery_expired,
+ .tx_msgtype = DHCPDISCOVER,
+ .apply_min_timeout = 1,
+};
+
+/**
+ * Construct transmitted packet for DHCP request
+ *
+ * @v dhcp DHCP session
+ * @v dhcppkt DHCP packet
+ * @v peer Destination address
+ */
+static int dhcp_request_tx ( struct dhcp_session *dhcp,
+ struct dhcp_packet *dhcppkt,
+ struct sockaddr_in *peer ) {
+ int rc;
+
+ DBGC ( dhcp, "DHCP %p DHCPREQUEST to %s:%d",
+ dhcp, inet_ntoa ( dhcp->server ), BOOTPS_PORT );
+ DBGC ( dhcp, " for %s\n", inet_ntoa ( dhcp->offer ) );
+
+ /* Set server ID */
+ if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_SERVER_IDENTIFIER,
+ &dhcp->server,
+ sizeof ( dhcp->server ) ) ) != 0 )
+ return rc;
+
+ /* Set requested IP address */
+ if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_REQUESTED_ADDRESS,
+ &dhcp->offer,
+ sizeof ( dhcp->offer ) ) ) != 0 )
+ return rc;
+
+ /* Set server address */
+ peer->sin_addr.s_addr = INADDR_BROADCAST;
+ peer->sin_port = htons ( BOOTPS_PORT );
+
+ return 0;
+}
+
+/**
+ * Handle received packet during DHCP request
+ *
+ * @v dhcp DHCP session
+ * @v dhcppkt DHCP packet
+ * @v peer DHCP server address
+ * @v msgtype DHCP message type
+ * @v server_id DHCP server ID
+ */
+static void dhcp_request_rx ( struct dhcp_session *dhcp,
+ struct dhcp_packet *dhcppkt,
+ struct sockaddr_in *peer, uint8_t msgtype,
+ struct in_addr server_id ) {
+ struct in_addr ip;
+ struct settings *parent;
+ struct settings *settings;
+ int rc;
+
+ DBGC ( dhcp, "DHCP %p %s from %s:%d", dhcp,
+ dhcp_msgtype_name ( msgtype ), inet_ntoa ( peer->sin_addr ),
+ ntohs ( peer->sin_port ) );
+ if ( server_id.s_addr != peer->sin_addr.s_addr )
+ DBGC ( dhcp, " (%s)", inet_ntoa ( server_id ) );
+
+ /* Identify leased IP address */
+ ip = dhcppkt->dhcphdr->yiaddr;
+ if ( ip.s_addr )
+ DBGC ( dhcp, " for %s", inet_ntoa ( ip ) );
+ DBGC ( dhcp, "\n" );
+
+ /* Filter out unacceptable responses */
+ if ( peer->sin_port != htons ( BOOTPS_PORT ) )
+ return;
+ if ( msgtype /* BOOTP */ && ( msgtype != DHCPACK ) )
+ return;
+ if ( server_id.s_addr != dhcp->server.s_addr )
+ return;
+ if ( ip.s_addr != dhcp->offer.s_addr )
+ return;
+
+ /* Record assigned address */
+ dhcp->local.sin_addr = ip;
+
+ /* Register settings */
+ parent = netdev_settings ( dhcp->netdev );
+ settings = &dhcppkt->settings;
+ if ( ( rc = register_settings ( settings, parent,
+ DHCP_SETTINGS_NAME ) ) != 0 ) {
+ DBGC ( dhcp, "DHCP %p could not register settings: %s\n",
+ dhcp, strerror ( rc ) );
+ dhcp_finished ( dhcp, rc );
+ return;
+ }
+
+ /* Perform ProxyDHCP if applicable */
+ if ( dhcp->proxy_offer /* Have ProxyDHCP offer */ &&
+ ( ! dhcp->no_pxedhcp ) /* ProxyDHCP not disabled */ ) {
+ if ( dhcp_has_pxeopts ( dhcp->proxy_offer ) ) {
+ /* PXE options already present; register settings
+ * without performing a ProxyDHCPREQUEST
+ */
+ settings = &dhcp->proxy_offer->settings;
+ if ( ( rc = register_settings ( settings, NULL,
+ PROXYDHCP_SETTINGS_NAME ) ) != 0 ) {
+ DBGC ( dhcp, "DHCP %p could not register "
+ "proxy settings: %s\n",
+ dhcp, strerror ( rc ) );
+ dhcp_finished ( dhcp, rc );
+ return;
+ }
+ } else {
+ /* PXE options not present; use a ProxyDHCPREQUEST */
+ dhcp_set_state ( dhcp, &dhcp_state_proxy );
+ return;
+ }
+ }
+
+ /* Terminate DHCP */
+ dhcp_finished ( dhcp, 0 );
+}
+
+/**
+ * Handle timer expiry during DHCP discovery
+ *
+ * @v dhcp DHCP session
+ */
+static void dhcp_request_expired ( struct dhcp_session *dhcp ) {
+
+ /* Retransmit current packet */
+ dhcp_tx ( dhcp );
+}
+
+/** DHCP request state operations */
+static struct dhcp_session_state dhcp_state_request = {
+ .name = "request",
+ .tx = dhcp_request_tx,
+ .rx = dhcp_request_rx,
+ .expired = dhcp_request_expired,
+ .tx_msgtype = DHCPREQUEST,
+ .apply_min_timeout = 0,
+};
+
+/**
+ * Construct transmitted packet for ProxyDHCP request
+ *
+ * @v dhcp DHCP session
+ * @v dhcppkt DHCP packet
+ * @v peer Destination address
+ */
+static int dhcp_proxy_tx ( struct dhcp_session *dhcp,
+ struct dhcp_packet *dhcppkt,
+ struct sockaddr_in *peer ) {
+ int rc;
+
+ DBGC ( dhcp, "DHCP %p ProxyDHCP REQUEST to %s\n", dhcp,
+ inet_ntoa ( dhcp->proxy_server ) );
+
+ /* Set server ID */
+ if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_SERVER_IDENTIFIER,
+ &dhcp->proxy_server,
+ sizeof ( dhcp->proxy_server ) ) ) != 0 )
+ return rc;
+
+ /* Set server address */
+ peer->sin_addr = dhcp->proxy_server;
+ peer->sin_port = htons ( PXE_PORT );
+
+ return 0;
+}
+
+/**
+ * Handle received packet during ProxyDHCP request
+ *
+ * @v dhcp DHCP session
+ * @v dhcppkt DHCP packet
+ * @v peer DHCP server address
+ * @v msgtype DHCP message type
+ * @v server_id DHCP server ID
+ */
+static void dhcp_proxy_rx ( struct dhcp_session *dhcp,
+ struct dhcp_packet *dhcppkt,
+ struct sockaddr_in *peer, uint8_t msgtype,
+ struct in_addr server_id ) {
+ struct settings *settings = &dhcppkt->settings;
+ int rc;
+
+ DBGC ( dhcp, "DHCP %p %s from %s:%d", dhcp,
+ dhcp_msgtype_name ( msgtype ), inet_ntoa ( peer->sin_addr ),
+ ntohs ( peer->sin_port ) );
+ if ( server_id.s_addr != peer->sin_addr.s_addr )
+ DBGC ( dhcp, " (%s)", inet_ntoa ( server_id ) );
+ DBGC ( dhcp, "\n" );
+
+ /* Filter out unacceptable responses */
+ if ( peer->sin_port != ntohs ( PXE_PORT ) )
+ return;
+ if ( ( msgtype != DHCPOFFER ) && ( msgtype != DHCPACK ) )
+ return;
+ if ( server_id.s_addr /* Linux PXE server omits server ID */ &&
+ ( server_id.s_addr != dhcp->proxy_server.s_addr ) )
+ return;
+
+ /* Register settings */
+ if ( ( rc = register_settings ( settings, NULL,
+ PROXYDHCP_SETTINGS_NAME ) ) != 0 ) {
+ DBGC ( dhcp, "DHCP %p could not register proxy settings: %s\n",
+ dhcp, strerror ( rc ) );
+ dhcp_finished ( dhcp, rc );
+ return;
+ }
+
+ /* Terminate DHCP */
+ dhcp_finished ( dhcp, 0 );
+}
+
+/**
+ * Handle timer expiry during ProxyDHCP request
+ *
+ * @v dhcp DHCP session
+ */
+static void dhcp_proxy_expired ( struct dhcp_session *dhcp ) {
+ unsigned long elapsed = ( currticks() - dhcp->start );
+
+ /* Give up waiting for ProxyDHCP before we reach the failure point */
+ if ( elapsed > PROXYDHCP_MAX_TIMEOUT ) {
+ dhcp_finished ( dhcp, 0 );
+ return;
+ }
+
+ /* Retransmit current packet */
+ dhcp_tx ( dhcp );
+}
+
+/** ProxyDHCP request state operations */
+static struct dhcp_session_state dhcp_state_proxy = {
+ .name = "ProxyDHCP",
+ .tx = dhcp_proxy_tx,
+ .rx = dhcp_proxy_rx,
+ .expired = dhcp_proxy_expired,
+ .tx_msgtype = DHCPREQUEST,
+ .apply_min_timeout = 0,
+};
+
+/**
+ * Construct transmitted packet for PXE Boot Server Discovery
+ *
+ * @v dhcp DHCP session
+ * @v dhcppkt DHCP packet
+ * @v peer Destination address
+ */
+static int dhcp_pxebs_tx ( struct dhcp_session *dhcp,
+ struct dhcp_packet *dhcppkt,
+ struct sockaddr_in *peer ) {
+ struct dhcp_pxe_boot_menu_item menu_item = { 0, 0 };
+ int rc;
+
+ /* Set server address */
+ peer->sin_addr = *(dhcp->pxe_attempt);
+ peer->sin_port = ( ( peer->sin_addr.s_addr == INADDR_BROADCAST ) ?
+ htons ( BOOTPS_PORT ) : htons ( PXE_PORT ) );
+
+ DBGC ( dhcp, "DHCP %p PXEBS REQUEST to %s:%d for type %d\n",
+ dhcp, inet_ntoa ( peer->sin_addr ), ntohs ( peer->sin_port ),
+ le16_to_cpu ( dhcp->pxe_type ) );
+
+ /* Set boot menu item */
+ menu_item.type = dhcp->pxe_type;
+ if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_PXE_BOOT_MENU_ITEM,
+ &menu_item, sizeof ( menu_item ) ) ) != 0 )
+ return rc;
+
+ return 0;
+}
+
+/**
+ * Check to see if PXE Boot Server address is acceptable
+ *
+ * @v dhcp DHCP session
+ * @v bs Boot Server address
+ * @ret accept Boot Server is acceptable
+ */
+static int dhcp_pxebs_accept ( struct dhcp_session *dhcp,
+ struct in_addr bs ) {
+ struct in_addr *accept;
+
+ /* Accept if we have no acceptance filter */
+ if ( ! dhcp->pxe_accept )
+ return 1;
+
+ /* Scan through acceptance list */
+ for ( accept = dhcp->pxe_accept ; accept->s_addr ; accept++ ) {
+ if ( accept->s_addr == bs.s_addr )
+ return 1;
+ }
+
+ DBGC ( dhcp, "DHCP %p rejecting server %s\n",
+ dhcp, inet_ntoa ( bs ) );
+ return 0;
+}
+
+/**
+ * Handle received packet during PXE Boot Server Discovery
+ *
+ * @v dhcp DHCP session
+ * @v dhcppkt DHCP packet
+ * @v peer DHCP server address
+ * @v msgtype DHCP message type
+ * @v server_id DHCP server ID
+ */
+static void dhcp_pxebs_rx ( struct dhcp_session *dhcp,
+ struct dhcp_packet *dhcppkt,
+ struct sockaddr_in *peer, uint8_t msgtype,
+ struct in_addr server_id ) {
+ struct dhcp_pxe_boot_menu_item menu_item = { 0, 0 };
+ int rc;
+
+ DBGC ( dhcp, "DHCP %p %s from %s:%d", dhcp,
+ dhcp_msgtype_name ( msgtype ), inet_ntoa ( peer->sin_addr ),
+ ntohs ( peer->sin_port ) );
+ if ( server_id.s_addr != peer->sin_addr.s_addr )
+ DBGC ( dhcp, " (%s)", inet_ntoa ( server_id ) );
+
+ /* Identify boot menu item */
+ dhcppkt_fetch ( dhcppkt, DHCP_PXE_BOOT_MENU_ITEM,
+ &menu_item, sizeof ( menu_item ) );
+ if ( menu_item.type )
+ DBGC ( dhcp, " for type %d", ntohs ( menu_item.type ) );
+ DBGC ( dhcp, "\n" );
+
+ /* Filter out unacceptable responses */
+ if ( ( peer->sin_port != htons ( BOOTPS_PORT ) ) &&
+ ( peer->sin_port != htons ( PXE_PORT ) ) )
+ return;
+ if ( msgtype != DHCPACK )
+ return;
+ if ( menu_item.type != dhcp->pxe_type )
+ return;
+ if ( ! dhcp_pxebs_accept ( dhcp, ( server_id.s_addr ?
+ server_id : peer->sin_addr ) ) )
+ return;
+
+ /* Register settings */
+ if ( ( rc = register_settings ( &dhcppkt->settings, NULL,
+ PXEBS_SETTINGS_NAME ) ) != 0 ) {
+ DBGC ( dhcp, "DHCP %p could not register settings: %s\n",
+ dhcp, strerror ( rc ) );
+ dhcp_finished ( dhcp, rc );
+ return;
+ }
+
+ /* Terminate DHCP */
+ dhcp_finished ( dhcp, 0 );
+}
+
+/**
+ * Handle timer expiry during PXE Boot Server Discovery
+ *
+ * @v dhcp DHCP session
+ */
+static void dhcp_pxebs_expired ( struct dhcp_session *dhcp ) {
+ unsigned long elapsed = ( currticks() - dhcp->start );
+
+ /* Give up waiting before we reach the failure point, and fail
+ * over to the next server in the attempt list
+ */
+ if ( elapsed > PXEBS_MAX_TIMEOUT ) {
+ dhcp->pxe_attempt++;
+ if ( dhcp->pxe_attempt->s_addr ) {
+ dhcp_set_state ( dhcp, &dhcp_state_pxebs );
+ return;
+ } else {
+ dhcp_finished ( dhcp, -ETIMEDOUT );
+ return;
+ }
+ }
+
+ /* Retransmit current packet */
+ dhcp_tx ( dhcp );
+}
+
+/** PXE Boot Server Discovery state operations */
+static struct dhcp_session_state dhcp_state_pxebs = {
+ .name = "PXEBS",
+ .tx = dhcp_pxebs_tx,
+ .rx = dhcp_pxebs_rx,
+ .expired = dhcp_pxebs_expired,
+ .tx_msgtype = DHCPREQUEST,
+ .apply_min_timeout = 1,
+};
+
+/****************************************************************************
+ *
+ * Packet construction
+ *
+ */
+
+/**
+ * Create a DHCP packet
+ *
+ * @v dhcppkt DHCP packet structure to fill in
+ * @v netdev Network device
+ * @v msgtype DHCP message type
+ * @v xid Transaction ID (in network-endian order)
+ * @v options Initial options to include (or NULL)
+ * @v options_len Length of initial options
+ * @v data Buffer for DHCP packet
+ * @v max_len Size of DHCP packet buffer
+ * @ret rc Return status code
+ *
+ * Creates a DHCP packet in the specified buffer, and initialise a
+ * DHCP packet structure.
+ */
+int dhcp_create_packet ( struct dhcp_packet *dhcppkt,
+ struct net_device *netdev, uint8_t msgtype,
+ uint32_t xid, const void *options, size_t options_len,
+ void *data, size_t max_len ) {
+ struct dhcphdr *dhcphdr = data;
+ int rc;
+
+ /* Sanity check */
+ if ( max_len < ( sizeof ( *dhcphdr ) + options_len ) )
+ return -ENOSPC;
+
+ /* Initialise DHCP packet content */
+ memset ( dhcphdr, 0, max_len );
+ dhcphdr->xid = xid;
+ dhcphdr->magic = htonl ( DHCP_MAGIC_COOKIE );
+ dhcphdr->htype = ntohs ( netdev->ll_protocol->ll_proto );
+ dhcphdr->op = dhcp_op[msgtype];
+ dhcphdr->hlen = netdev->ll_protocol->ll_addr_len;
+ memcpy ( dhcphdr->chaddr, netdev->ll_addr,
+ netdev->ll_protocol->ll_addr_len );
+ memcpy ( dhcphdr->options, options, options_len );
+
+ /* If the local link-layer address functions only as a name
+ * (i.e. cannot be used as a destination address), then
+ * request broadcast responses.
+ */
+ if ( netdev->ll_protocol->flags & LL_NAME_ONLY )
+ dhcphdr->flags |= htons ( BOOTP_FL_BROADCAST );
+
+ /* If the network device already has an IPv4 address then
+ * unicast responses from the DHCP server may be rejected, so
+ * request broadcast responses.
+ */
+ if ( ipv4_has_any_addr ( netdev ) )
+ dhcphdr->flags |= htons ( BOOTP_FL_BROADCAST );
+
+ /* Initialise DHCP packet structure */
+ memset ( dhcppkt, 0, sizeof ( *dhcppkt ) );
+ dhcppkt_init ( dhcppkt, data, max_len );
+
+ /* Set DHCP_MESSAGE_TYPE option */
+ if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_MESSAGE_TYPE,
+ &msgtype, sizeof ( msgtype ) ) ) != 0 )
+ return rc;
+
+ return 0;
+}
+
+/**
+ * Create DHCP request packet
+ *
+ * @v dhcppkt DHCP packet structure to fill in
+ * @v netdev Network device
+ * @v msgtype DHCP message type
+ * @v xid Transaction ID (in network-endian order)
+ * @v ciaddr Client IP address
+ * @v data Buffer for DHCP packet
+ * @v max_len Size of DHCP packet buffer
+ * @ret rc Return status code
+ *
+ * Creates a DHCP request packet in the specified buffer, and
+ * initialise a DHCP packet structure.
+ */
+int dhcp_create_request ( struct dhcp_packet *dhcppkt,
+ struct net_device *netdev, unsigned int msgtype,
+ uint32_t xid, struct in_addr ciaddr,
+ void *data, size_t max_len ) {
+ struct dhcp_netdev_desc dhcp_desc;
+ struct dhcp_client_id client_id;
+ struct dhcp_client_uuid client_uuid;
+ uint8_t *dhcp_features;
+ size_t dhcp_features_len;
+ size_t ll_addr_len;
+ void *user_class;
+ ssize_t len;
+ int rc;
+
+ /* Create DHCP packet */
+ if ( ( rc = dhcp_create_packet ( dhcppkt, netdev, msgtype, xid,
+ dhcp_request_options_data,
+ sizeof ( dhcp_request_options_data ),
+ data, max_len ) ) != 0 ) {
+ DBG ( "DHCP could not create DHCP packet: %s\n",
+ strerror ( rc ) );
+ goto err_create_packet;
+ }
+
+ /* Set client IP address */
+ dhcppkt->dhcphdr->ciaddr = ciaddr;
+
+ /* Add options to identify the feature list */
+ dhcp_features = table_start ( DHCP_FEATURES );
+ dhcp_features_len = table_num_entries ( DHCP_FEATURES );
+ if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_EB_ENCAP, dhcp_features,
+ dhcp_features_len ) ) != 0 ) {
+ DBG ( "DHCP could not set features list option: %s\n",
+ strerror ( rc ) );
+ goto err_store_features;
+ }
+
+ /* Add options to identify the network device */
+ fetch_raw_setting ( netdev_settings ( netdev ), &busid_setting,
+ &dhcp_desc, sizeof ( dhcp_desc ) );
+ if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_EB_BUS_ID, &dhcp_desc,
+ sizeof ( dhcp_desc ) ) ) != 0 ) {
+ DBG ( "DHCP could not set bus ID option: %s\n",
+ strerror ( rc ) );
+ goto err_store_busid;
+ }
+
+ /* Add DHCP client identifier. Required for Infiniband, and
+ * doesn't hurt other link layers.
+ */
+ client_id.ll_proto = ntohs ( netdev->ll_protocol->ll_proto );
+ ll_addr_len = netdev->ll_protocol->ll_addr_len;
+ assert ( ll_addr_len <= sizeof ( client_id.ll_addr ) );
+ memcpy ( client_id.ll_addr, netdev->ll_addr, ll_addr_len );
+ if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_CLIENT_ID, &client_id,
+ ( ll_addr_len + 1 ) ) ) != 0 ) {
+ DBG ( "DHCP could not set client ID: %s\n",
+ strerror ( rc ) );
+ goto err_store_client_id;
+ }
+
+ /* Add client UUID, if we have one. Required for PXE. The
+ * PXE spec does not specify a byte ordering for UUIDs, but
+ * RFC4578 suggests that it follows the EFI spec, in which the
+ * first three fields are little-endian.
+ */
+ client_uuid.type = DHCP_CLIENT_UUID_TYPE;
+ if ( ( len = fetch_uuid_setting ( NULL, &uuid_setting,
+ &client_uuid.uuid ) ) >= 0 ) {
+ uuid_mangle ( &client_uuid.uuid );
+ if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_CLIENT_UUID,
+ &client_uuid,
+ sizeof ( client_uuid ) ) ) != 0 ) {
+ DBG ( "DHCP could not set client UUID: %s\n",
+ strerror ( rc ) );
+ goto err_store_client_uuid;
+ }
+ }
+
+ /* Add user class, if we have one. */
+ if ( ( len = fetch_raw_setting_copy ( NULL, &user_class_setting,
+ &user_class ) ) >= 0 ) {
+ if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_USER_CLASS_ID,
+ user_class, len ) ) != 0 ) {
+ DBG ( "DHCP could not set user class: %s\n",
+ strerror ( rc ) );
+ goto err_store_user_class;
+ }
+ }
+
+ err_store_user_class:
+ free ( user_class );
+ err_store_client_uuid:
+ err_store_client_id:
+ err_store_busid:
+ err_store_features:
+ err_create_packet:
+ return rc;
+}
+
+/****************************************************************************
+ *
+ * Data transfer interface
+ *
+ */
+
+/**
+ * Transmit DHCP request
+ *
+ * @v dhcp DHCP session
+ * @ret rc Return status code
+ */
+static int dhcp_tx ( struct dhcp_session *dhcp ) {
+ static struct sockaddr_in peer = {
+ .sin_family = AF_INET,
+ };
+ struct xfer_metadata meta = {
+ .netdev = dhcp->netdev,
+ .src = ( struct sockaddr * ) &dhcp->local,
+ .dest = ( struct sockaddr * ) &peer,
+ };
+ struct io_buffer *iobuf;
+ uint8_t msgtype = dhcp->state->tx_msgtype;
+ struct dhcp_packet dhcppkt;
+ int rc;
+
+ /* Start retry timer. Do this first so that failures to
+ * transmit will be retried.
+ */
+ start_timer ( &dhcp->timer );
+
+ /* Allocate buffer for packet */
+ iobuf = xfer_alloc_iob ( &dhcp->xfer, DHCP_MIN_LEN );
+ if ( ! iobuf )
+ return -ENOMEM;
+
+ /* Create basic DHCP packet in temporary buffer */
+ if ( ( rc = dhcp_create_request ( &dhcppkt, dhcp->netdev, msgtype,
+ dhcp->xid, dhcp->local.sin_addr,
+ iobuf->data,
+ iob_tailroom ( iobuf ) ) ) != 0 ) {
+ DBGC ( dhcp, "DHCP %p could not construct DHCP request: %s\n",
+ dhcp, strerror ( rc ) );
+ goto done;
+ }
+
+ /* (Ab)use the "secs" field to convey metadata about the DHCP
+ * session state into packet traces. Useful for extracting
+ * debug information from non-debug builds.
+ */
+ dhcppkt.dhcphdr->secs = htons ( ( ++(dhcp->count) << 2 ) |
+ ( dhcp->offer.s_addr ? 0x02 : 0 ) |
+ ( dhcp->proxy_offer ? 0x01 : 0 ) );
+
+ /* Fill in packet based on current state */
+ if ( ( rc = dhcp->state->tx ( dhcp, &dhcppkt, &peer ) ) != 0 ) {
+ DBGC ( dhcp, "DHCP %p could not fill DHCP request: %s\n",
+ dhcp, strerror ( rc ) );
+ goto done;
+ }
+
+ /* Transmit the packet */
+ iob_put ( iobuf, dhcppkt_len ( &dhcppkt ) );
+ if ( ( rc = xfer_deliver ( &dhcp->xfer, iob_disown ( iobuf ),
+ &meta ) ) != 0 ) {
+ DBGC ( dhcp, "DHCP %p could not transmit UDP packet: %s\n",
+ dhcp, strerror ( rc ) );
+ goto done;
+ }
+
+ done:
+ free_iob ( iobuf );
+ return rc;
+}
+
+/**
+ * Receive new data
+ *
+ * @v dhcp DHCP session
+ * @v iobuf I/O buffer
+ * @v meta Transfer metadata
+ * @ret rc Return status code
+ */
+static int dhcp_deliver ( struct dhcp_session *dhcp,
+ struct io_buffer *iobuf,
+ struct xfer_metadata *meta ) {
+ struct net_device *netdev = dhcp->netdev;
+ struct ll_protocol *ll_protocol = netdev->ll_protocol;
+ struct sockaddr_in *peer;
+ size_t data_len;
+ struct dhcp_packet *dhcppkt;
+ struct dhcphdr *dhcphdr;
+ uint8_t msgtype = 0;
+ struct in_addr server_id = { 0 };
+ int rc = 0;
+
+ /* Sanity checks */
+ if ( ! meta->src ) {
+ DBGC ( dhcp, "DHCP %p received packet without source port\n",
+ dhcp );
+ rc = -EINVAL;
+ goto err_no_src;
+ }
+ peer = ( struct sockaddr_in * ) meta->src;
+
+ /* Create a DHCP packet containing the I/O buffer contents.
+ * Whilst we could just use the original buffer in situ, that
+ * would waste the unused space in the packet buffer, and also
+ * waste a relatively scarce fully-aligned I/O buffer.
+ */
+ data_len = iob_len ( iobuf );
+ dhcppkt = zalloc ( sizeof ( *dhcppkt ) + data_len );
+ if ( ! dhcppkt ) {
+ rc = -ENOMEM;
+ goto err_alloc_dhcppkt;
+ }
+ dhcphdr = ( ( ( void * ) dhcppkt ) + sizeof ( *dhcppkt ) );
+ memcpy ( dhcphdr, iobuf->data, data_len );
+ dhcppkt_init ( dhcppkt, dhcphdr, data_len );
+
+ /* Identify message type */
+ dhcppkt_fetch ( dhcppkt, DHCP_MESSAGE_TYPE, &msgtype,
+ sizeof ( msgtype ) );
+
+ /* Identify server ID */
+ dhcppkt_fetch ( dhcppkt, DHCP_SERVER_IDENTIFIER,
+ &server_id, sizeof ( server_id ) );
+
+ /* Check for matching transaction ID */
+ if ( dhcphdr->xid != dhcp->xid ) {
+ DBGC ( dhcp, "DHCP %p %s from %s:%d has bad transaction "
+ "ID\n", dhcp, dhcp_msgtype_name ( msgtype ),
+ inet_ntoa ( peer->sin_addr ),
+ ntohs ( peer->sin_port ) );
+ rc = -EINVAL;
+ goto err_xid;
+ };
+
+ /* Check for matching client hardware address */
+ if ( memcmp ( dhcphdr->chaddr, netdev->ll_addr,
+ ll_protocol->ll_addr_len ) != 0 ) {
+ DBGC ( dhcp, "DHCP %p %s from %s:%d has bad chaddr %s\n",
+ dhcp, dhcp_msgtype_name ( msgtype ),
+ inet_ntoa ( peer->sin_addr ), ntohs ( peer->sin_port ),
+ ll_protocol->ntoa ( dhcphdr->chaddr ) );
+ rc = -EINVAL;
+ goto err_chaddr;
+ }
+
+ /* Handle packet based on current state */
+ dhcp->state->rx ( dhcp, dhcppkt, peer, msgtype, server_id );
+
+ err_chaddr:
+ err_xid:
+ dhcppkt_put ( dhcppkt );
+ err_alloc_dhcppkt:
+ err_no_src:
+ free_iob ( iobuf );
+ return rc;
+}
+
+/** DHCP data transfer interface operations */
+static struct interface_operation dhcp_xfer_operations[] = {
+ INTF_OP ( xfer_deliver, struct dhcp_session *, dhcp_deliver ),
+};
+
+/** DHCP data transfer interface descriptor */
+static struct interface_descriptor dhcp_xfer_desc =
+ INTF_DESC ( struct dhcp_session, xfer, dhcp_xfer_operations );
+
+/**
+ * Handle DHCP retry timer expiry
+ *
+ * @v timer DHCP retry timer
+ * @v fail Failure indicator
+ */
+static void dhcp_timer_expired ( struct retry_timer *timer, int fail ) {
+ struct dhcp_session *dhcp =
+ container_of ( timer, struct dhcp_session, timer );
+
+ /* If we have failed, terminate DHCP */
+ if ( fail ) {
+ dhcp_finished ( dhcp, -ETIMEDOUT );
+ return;
+ }
+
+ /* Handle timer expiry based on current state */
+ dhcp->state->expired ( dhcp );
+}
+
+/****************************************************************************
+ *
+ * Job control interface
+ *
+ */
+
+/** DHCP job control interface operations */
+static struct interface_operation dhcp_job_op[] = {
+ INTF_OP ( intf_close, struct dhcp_session *, dhcp_finished ),
+};
+
+/** DHCP job control interface descriptor */
+static struct interface_descriptor dhcp_job_desc =
+ INTF_DESC ( struct dhcp_session, job, dhcp_job_op );
+
+/****************************************************************************
+ *
+ * Instantiators
+ *
+ */
+
+/**
+ * DHCP peer address for socket opening
+ *
+ * This is a dummy address; the only useful portion is the socket
+ * family (so that we get a UDP connection). The DHCP client will set
+ * the IP address and source port explicitly on each transmission.
+ */
+static struct sockaddr dhcp_peer = {
+ .sa_family = AF_INET,
+};
+
+/**
+ * Start DHCP state machine on a network device
+ *
+ * @v job Job control interface
+ * @v netdev Network device
+ * @ret rc Return status code
+ *
+ * Starts DHCP on the specified network device. If successful, the
+ * DHCPACK (and ProxyDHCPACK, if applicable) will be registered as
+ * option sources.
+ */
+int start_dhcp ( struct interface *job, struct net_device *netdev ) {
+ struct dhcp_session *dhcp;
+ int rc;
+
+ /* Allocate and initialise structure */
+ dhcp = zalloc ( sizeof ( *dhcp ) );
+ if ( ! dhcp )
+ return -ENOMEM;
+ ref_init ( &dhcp->refcnt, dhcp_free );
+ intf_init ( &dhcp->job, &dhcp_job_desc, &dhcp->refcnt );
+ intf_init ( &dhcp->xfer, &dhcp_xfer_desc, &dhcp->refcnt );
+ timer_init ( &dhcp->timer, dhcp_timer_expired, &dhcp->refcnt );
+ dhcp->netdev = netdev_get ( netdev );
+ dhcp->local.sin_family = AF_INET;
+ dhcp->local.sin_port = htons ( BOOTPC_PORT );
+ dhcp->xid = random();
+
+ /* Store DHCP transaction ID for fakedhcp code */
+ dhcp_last_xid = dhcp->xid;
+
+ /* Instantiate child objects and attach to our interfaces */
+ if ( ( rc = xfer_open_socket ( &dhcp->xfer, SOCK_DGRAM, &dhcp_peer,
+ ( struct sockaddr * ) &dhcp->local ) ) != 0 )
+ goto err;
+
+ /* Enter DHCPDISCOVER state */
+ dhcp_set_state ( dhcp, &dhcp_state_discover );
+
+ /* Attach parent interface, mortalise self, and return */
+ intf_plug_plug ( &dhcp->job, job );
+ ref_put ( &dhcp->refcnt );
+ return 0;
+
+ err:
+ dhcp_finished ( dhcp, rc );
+ ref_put ( &dhcp->refcnt );
+ return rc;
+}
+
+/**
+ * Retrieve list of PXE boot servers for a given server type
+ *
+ * @v dhcp DHCP session
+ * @v raw DHCP PXE boot server list
+ * @v raw_len Length of DHCP PXE boot server list
+ * @v ip IP address list to fill in
+ *
+ * The caller must ensure that the IP address list has sufficient
+ * space.
+ */
+static void pxebs_list ( struct dhcp_session *dhcp, void *raw,
+ size_t raw_len, struct in_addr *ip ) {
+ struct dhcp_pxe_boot_server *server = raw;
+ size_t server_len;
+ unsigned int i;
+
+ while ( raw_len ) {
+ if ( raw_len < sizeof ( *server ) ) {
+ DBGC ( dhcp, "DHCP %p malformed PXE server list\n",
+ dhcp );
+ break;
+ }
+ server_len = offsetof ( typeof ( *server ),
+ ip[ server->num_ip ] );
+ if ( raw_len < server_len ) {
+ DBGC ( dhcp, "DHCP %p malformed PXE server list\n",
+ dhcp );
+ break;
+ }
+ if ( server->type == dhcp->pxe_type ) {
+ for ( i = 0 ; i < server->num_ip ; i++ )
+ *(ip++) = server->ip[i];
+ }
+ server = ( ( ( void * ) server ) + server_len );
+ raw_len -= server_len;
+ }
+}
+
+/**
+ * Start PXE Boot Server Discovery on a network device
+ *
+ * @v job Job control interface
+ * @v netdev Network device
+ * @v pxe_type PXE server type
+ * @ret rc Return status code
+ *
+ * Starts PXE Boot Server Discovery on the specified network device.
+ * If successful, the Boot Server ACK will be registered as an option
+ * source.
+ */
+int start_pxebs ( struct interface *job, struct net_device *netdev,
+ unsigned int pxe_type ) {
+ struct setting pxe_discovery_control_setting =
+ { .tag = DHCP_PXE_DISCOVERY_CONTROL };
+ struct setting pxe_boot_servers_setting =
+ { .tag = DHCP_PXE_BOOT_SERVERS };
+ struct setting pxe_boot_server_mcast_setting =
+ { .tag = DHCP_PXE_BOOT_SERVER_MCAST };
+ ssize_t pxebs_list_len;
+ struct dhcp_session *dhcp;
+ struct in_addr *ip;
+ unsigned int pxe_discovery_control;
+ int rc;
+
+ /* Get upper bound for PXE boot server IP address list */
+ pxebs_list_len = fetch_raw_setting ( NULL, &pxe_boot_servers_setting,
+ NULL, 0 );
+ if ( pxebs_list_len < 0 )
+ pxebs_list_len = 0;
+
+ /* Allocate and initialise structure */
+ dhcp = zalloc ( sizeof ( *dhcp ) + sizeof ( *ip ) /* mcast */ +
+ sizeof ( *ip ) /* bcast */ + pxebs_list_len +
+ sizeof ( *ip ) /* terminator */ );
+ if ( ! dhcp )
+ return -ENOMEM;
+ ref_init ( &dhcp->refcnt, dhcp_free );
+ intf_init ( &dhcp->job, &dhcp_job_desc, &dhcp->refcnt );
+ intf_init ( &dhcp->xfer, &dhcp_xfer_desc, &dhcp->refcnt );
+ timer_init ( &dhcp->timer, dhcp_timer_expired, &dhcp->refcnt );
+ dhcp->netdev = netdev_get ( netdev );
+ dhcp->local.sin_family = AF_INET;
+ fetch_ipv4_setting ( netdev_settings ( netdev ), &ip_setting,
+ &dhcp->local.sin_addr );
+ dhcp->local.sin_port = htons ( BOOTPC_PORT );
+ dhcp->pxe_type = cpu_to_le16 ( pxe_type );
+
+ /* Construct PXE boot server IP address lists */
+ pxe_discovery_control =
+ fetch_uintz_setting ( NULL, &pxe_discovery_control_setting );
+ ip = ( ( ( void * ) dhcp ) + sizeof ( *dhcp ) );
+ dhcp->pxe_attempt = ip;
+ if ( ! ( pxe_discovery_control & PXEBS_NO_MULTICAST ) ) {
+ fetch_ipv4_setting ( NULL, &pxe_boot_server_mcast_setting, ip);
+ if ( ip->s_addr )
+ ip++;
+ }
+ if ( ! ( pxe_discovery_control & PXEBS_NO_BROADCAST ) )
+ (ip++)->s_addr = INADDR_BROADCAST;
+ if ( pxe_discovery_control & PXEBS_NO_UNKNOWN_SERVERS )
+ dhcp->pxe_accept = ip;
+ if ( pxebs_list_len ) {
+ uint8_t buf[pxebs_list_len];
+
+ fetch_raw_setting ( NULL, &pxe_boot_servers_setting,
+ buf, sizeof ( buf ) );
+ pxebs_list ( dhcp, buf, sizeof ( buf ), ip );
+ }
+ if ( ! dhcp->pxe_attempt->s_addr ) {
+ DBGC ( dhcp, "DHCP %p has no PXE boot servers for type %04x\n",
+ dhcp, pxe_type );
+ rc = -EINVAL;
+ goto err;
+ }
+
+ /* Dump out PXE server lists */
+ DBGC ( dhcp, "DHCP %p attempting", dhcp );
+ for ( ip = dhcp->pxe_attempt ; ip->s_addr ; ip++ )
+ DBGC ( dhcp, " %s", inet_ntoa ( *ip ) );
+ DBGC ( dhcp, "\n" );
+ if ( dhcp->pxe_accept ) {
+ DBGC ( dhcp, "DHCP %p accepting", dhcp );
+ for ( ip = dhcp->pxe_accept ; ip->s_addr ; ip++ )
+ DBGC ( dhcp, " %s", inet_ntoa ( *ip ) );
+ DBGC ( dhcp, "\n" );
+ }
+
+ /* Instantiate child objects and attach to our interfaces */
+ if ( ( rc = xfer_open_socket ( &dhcp->xfer, SOCK_DGRAM, &dhcp_peer,
+ ( struct sockaddr * ) &dhcp->local ) ) != 0 )
+ goto err;
+
+ /* Enter PXEBS state */
+ dhcp_set_state ( dhcp, &dhcp_state_pxebs );
+
+ /* Attach parent interface, mortalise self, and return */
+ intf_plug_plug ( &dhcp->job, job );
+ ref_put ( &dhcp->refcnt );
+ return 0;
+
+ err:
+ dhcp_finished ( dhcp, rc );
+ ref_put ( &dhcp->refcnt );
+ return rc;
+}
+
+/** DHCP network device configurator */
+struct net_device_configurator dhcp_configurator __net_device_configurator = {
+ .name = "dhcp",
+ .start = start_dhcp,
+};
diff --git a/qemu/roms/ipxe/src/net/udp/dhcpv6.c b/qemu/roms/ipxe/src/net/udp/dhcpv6.c
new file mode 100644
index 000000000..f7736d08e
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/udp/dhcpv6.c
@@ -0,0 +1,989 @@
+/*
+ * Copyright (C) 2013 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <byteswap.h>
+#include <ipxe/interface.h>
+#include <ipxe/xfer.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/open.h>
+#include <ipxe/netdevice.h>
+#include <ipxe/settings.h>
+#include <ipxe/retry.h>
+#include <ipxe/timer.h>
+#include <ipxe/in.h>
+#include <ipxe/crc32.h>
+#include <ipxe/errortab.h>
+#include <ipxe/ipv6.h>
+#include <ipxe/dhcpv6.h>
+
+/** @file
+ *
+ * Dynamic Host Configuration Protocol for IPv6
+ *
+ */
+
+/* Disambiguate the various error causes */
+#define EPROTO_UNSPECFAIL __einfo_error ( EINFO_EPROTO_UNSPECFAIL )
+#define EINFO_EPROTO_UNSPECFAIL \
+ __einfo_uniqify ( EINFO_EPROTO, 1, "Unspecified server failure" )
+#define EPROTO_NOADDRSAVAIL __einfo_error ( EINFO_EPROTO_NOADDRSAVAIL )
+#define EINFO_EPROTO_NOADDRSAVAIL \
+ __einfo_uniqify ( EINFO_EPROTO, 2, "No addresses available" )
+#define EPROTO_NOBINDING __einfo_error ( EINFO_EPROTO_NOBINDING )
+#define EINFO_EPROTO_NOBINDING \
+ __einfo_uniqify ( EINFO_EPROTO, 3, "Client record unavailable" )
+#define EPROTO_NOTONLINK __einfo_error ( EINFO_EPROTO_NOTONLINK )
+#define EINFO_EPROTO_NOTONLINK \
+ __einfo_uniqify ( EINFO_EPROTO, 4, "Prefix not on link" )
+#define EPROTO_USEMULTICAST __einfo_error ( EINFO_EPROTO_USEMULTICAST )
+#define EINFO_EPROTO_USEMULTICAST \
+ __einfo_uniqify ( EINFO_EPROTO, 5, "Use multicast address" )
+#define EPROTO_STATUS( status ) \
+ EUNIQ ( EINFO_EPROTO, ( (status) & 0x0f ), EPROTO_UNSPECFAIL, \
+ EPROTO_NOADDRSAVAIL, EPROTO_NOBINDING, \
+ EPROTO_NOTONLINK, EPROTO_USEMULTICAST )
+
+/** Human-readable error messages */
+struct errortab dhcpv6_errors[] __errortab = {
+ __einfo_errortab ( EINFO_EPROTO_NOADDRSAVAIL ),
+};
+
+/****************************************************************************
+ *
+ * DHCPv6 option lists
+ *
+ */
+
+/** A DHCPv6 option list */
+struct dhcpv6_option_list {
+ /** Data buffer */
+ const void *data;
+ /** Length of data buffer */
+ size_t len;
+};
+
+/**
+ * Find DHCPv6 option
+ *
+ * @v options DHCPv6 option list
+ * @v code Option code
+ * @ret option DHCPv6 option, or NULL if not found
+ */
+static const union dhcpv6_any_option *
+dhcpv6_option ( struct dhcpv6_option_list *options, unsigned int code ) {
+ const union dhcpv6_any_option *option = options->data;
+ size_t remaining = options->len;
+ size_t data_len;
+
+ /* Scan through list of options */
+ while ( remaining >= sizeof ( option->header ) ) {
+
+ /* Calculate and validate option length */
+ remaining -= sizeof ( option->header );
+ data_len = ntohs ( option->header.len );
+ if ( data_len > remaining ) {
+ /* Malformed option list */
+ return NULL;
+ }
+
+ /* Return if we have found the specified option */
+ if ( option->header.code == htons ( code ) )
+ return option;
+
+ /* Otherwise, move to the next option */
+ option = ( ( ( void * ) option->header.data ) + data_len );
+ remaining -= data_len;
+ }
+
+ return NULL;
+}
+
+/**
+ * Check DHCPv6 client or server identifier
+ *
+ * @v options DHCPv6 option list
+ * @v code Option code
+ * @v expected Expected value
+ * @v len Length of expected value
+ * @ret rc Return status code
+ */
+static int dhcpv6_check_duid ( struct dhcpv6_option_list *options,
+ unsigned int code, const void *expected,
+ size_t len ) {
+ const union dhcpv6_any_option *option;
+ const struct dhcpv6_duid_option *duid;
+
+ /* Find option */
+ option = dhcpv6_option ( options, code );
+ if ( ! option )
+ return -ENOENT;
+ duid = &option->duid;
+
+ /* Check option length */
+ if ( ntohs ( duid->header.len ) != len )
+ return -EINVAL;
+
+ /* Compare option value */
+ if ( memcmp ( duid->duid, expected, len ) != 0 )
+ return -EINVAL;
+
+ return 0;
+}
+
+/**
+ * Get DHCPv6 status code
+ *
+ * @v options DHCPv6 option list
+ * @ret rc Return status code
+ */
+static int dhcpv6_status_code ( struct dhcpv6_option_list *options ) {
+ const union dhcpv6_any_option *option;
+ const struct dhcpv6_status_code_option *status_code;
+ unsigned int status;
+
+ /* Find status code option, if present */
+ option = dhcpv6_option ( options, DHCPV6_STATUS_CODE );
+ if ( ! option ) {
+ /* Omitted status code should be treated as "success" */
+ return 0;
+ }
+ status_code = &option->status_code;
+
+ /* Sanity check */
+ if ( ntohs ( status_code->header.len ) <
+ ( sizeof ( *status_code ) - sizeof ( status_code->header ) ) ) {
+ return -EINVAL;
+ }
+
+ /* Calculate iPXE error code from DHCPv6 status code */
+ status = ntohs ( status_code->status );
+ return ( status ? -EPROTO_STATUS ( status ) : 0 );
+}
+
+/**
+ * Get DHCPv6 identity association address
+ *
+ * @v options DHCPv6 option list
+ * @v iaid Identity association ID
+ * @v address IPv6 address to fill in
+ * @ret rc Return status code
+ */
+static int dhcpv6_iaaddr ( struct dhcpv6_option_list *options, uint32_t iaid,
+ struct in6_addr *address ) {
+ const union dhcpv6_any_option *option;
+ const struct dhcpv6_ia_na_option *ia_na;
+ const struct dhcpv6_iaaddr_option *iaaddr;
+ struct dhcpv6_option_list suboptions;
+ size_t len;
+ int rc;
+
+ /* Find identity association option, if present */
+ option = dhcpv6_option ( options, DHCPV6_IA_NA );
+ if ( ! option )
+ return -ENOENT;
+ ia_na = &option->ia_na;
+
+ /* Sanity check */
+ len = ntohs ( ia_na->header.len );
+ if ( len < ( sizeof ( *ia_na ) - sizeof ( ia_na->header ) ) )
+ return -EINVAL;
+
+ /* Check identity association ID */
+ if ( ia_na->iaid != htonl ( iaid ) )
+ return -EINVAL;
+
+ /* Construct IA_NA sub-options list */
+ suboptions.data = ia_na->options;
+ suboptions.len = ( len + sizeof ( ia_na->header ) -
+ offsetof ( typeof ( *ia_na ), options ) );
+
+ /* Check IA_NA status code */
+ if ( ( rc = dhcpv6_status_code ( &suboptions ) ) != 0 )
+ return rc;
+
+ /* Find identity association address, if present */
+ option = dhcpv6_option ( &suboptions, DHCPV6_IAADDR );
+ if ( ! option )
+ return -ENOENT;
+ iaaddr = &option->iaaddr;
+
+ /* Sanity check */
+ len = ntohs ( iaaddr->header.len );
+ if ( len < ( sizeof ( *iaaddr ) - sizeof ( iaaddr->header ) ) )
+ return -EINVAL;
+
+ /* Construct IAADDR sub-options list */
+ suboptions.data = iaaddr->options;
+ suboptions.len = ( len + sizeof ( iaaddr->header ) -
+ offsetof ( typeof ( *iaaddr ), options ) );
+
+ /* Check IAADDR status code */
+ if ( ( rc = dhcpv6_status_code ( &suboptions ) ) != 0 )
+ return rc;
+
+ /* Extract IPv6 address */
+ memcpy ( address, &iaaddr->address, sizeof ( *address ) );
+
+ return 0;
+}
+
+/****************************************************************************
+ *
+ * DHCPv6 settings blocks
+ *
+ */
+
+/** A DHCPv6 settings block */
+struct dhcpv6_settings {
+ /** Reference count */
+ struct refcnt refcnt;
+ /** Settings block */
+ struct settings settings;
+ /** Option list */
+ struct dhcpv6_option_list options;
+};
+
+/**
+ * Check applicability of DHCPv6 setting
+ *
+ * @v settings Settings block
+ * @v setting Setting
+ * @ret applies Setting applies within this settings block
+ */
+static int dhcpv6_applies ( struct settings *settings __unused,
+ const struct setting *setting ) {
+
+ return ( setting->scope == &ipv6_scope );
+}
+
+/**
+ * Fetch value of DHCPv6 setting
+ *
+ * @v settings Settings block
+ * @v setting Setting to fetch
+ * @v data Buffer to fill with setting data
+ * @v len Length of buffer
+ * @ret len Length of setting data, or negative error
+ */
+static int dhcpv6_fetch ( struct settings *settings,
+ struct setting *setting,
+ void *data, size_t len ) {
+ struct dhcpv6_settings *dhcpv6set =
+ container_of ( settings, struct dhcpv6_settings, settings );
+ const union dhcpv6_any_option *option;
+ size_t option_len;
+
+ /* Find option */
+ option = dhcpv6_option ( &dhcpv6set->options, setting->tag );
+ if ( ! option )
+ return -ENOENT;
+
+ /* Copy option to data buffer */
+ option_len = ntohs ( option->header.len );
+ if ( len > option_len )
+ len = option_len;
+ memcpy ( data, option->header.data, len );
+ return option_len;
+}
+
+/** DHCPv6 settings operations */
+static struct settings_operations dhcpv6_settings_operations = {
+ .applies = dhcpv6_applies,
+ .fetch = dhcpv6_fetch,
+};
+
+/**
+ * Register DHCPv6 options as network device settings
+ *
+ * @v options DHCPv6 option list
+ * @v parent Parent settings block
+ * @ret rc Return status code
+ */
+static int dhcpv6_register ( struct dhcpv6_option_list *options,
+ struct settings *parent ) {
+ struct dhcpv6_settings *dhcpv6set;
+ void *data;
+ size_t len;
+ int rc;
+
+ /* Allocate and initialise structure */
+ dhcpv6set = zalloc ( sizeof ( *dhcpv6set ) + options->len );
+ if ( ! dhcpv6set ) {
+ rc = -ENOMEM;
+ goto err_alloc;
+ }
+ ref_init ( &dhcpv6set->refcnt, NULL );
+ settings_init ( &dhcpv6set->settings, &dhcpv6_settings_operations,
+ &dhcpv6set->refcnt, &ipv6_scope );
+ data = ( ( ( void * ) dhcpv6set ) + sizeof ( *dhcpv6set ) );
+ len = options->len;
+ memcpy ( data, options->data, len );
+ dhcpv6set->options.data = data;
+ dhcpv6set->options.len = len;
+
+ /* Register settings */
+ if ( ( rc = register_settings ( &dhcpv6set->settings, parent,
+ DHCPV6_SETTINGS_NAME ) ) != 0 )
+ goto err_register;
+
+ err_register:
+ ref_put ( &dhcpv6set->refcnt );
+ err_alloc:
+ return rc;
+}
+
+/****************************************************************************
+ *
+ * DHCPv6 protocol
+ *
+ */
+
+/** Options to be requested */
+static uint16_t dhcpv6_requested_options[] = {
+ htons ( DHCPV6_DNS_SERVERS ), htons ( DHCPV6_DOMAIN_LIST ),
+ htons ( DHCPV6_BOOTFILE_URL ), htons ( DHCPV6_BOOTFILE_PARAM ),
+};
+
+/**
+ * Name a DHCPv6 packet type
+ *
+ * @v type DHCPv6 packet type
+ * @ret name DHCPv6 packet type name
+ */
+static __attribute__ (( unused )) const char *
+dhcpv6_type_name ( unsigned int type ) {
+ static char buf[ 12 /* "UNKNOWN-xxx" + NUL */ ];
+
+ switch ( type ) {
+ case DHCPV6_SOLICIT: return "SOLICIT";
+ case DHCPV6_ADVERTISE: return "ADVERTISE";
+ case DHCPV6_REQUEST: return "REQUEST";
+ case DHCPV6_REPLY: return "REPLY";
+ case DHCPV6_INFORMATION_REQUEST: return "INFORMATION-REQUEST";
+ default:
+ snprintf ( buf, sizeof ( buf ), "UNKNOWN-%d", type );
+ return buf;
+ }
+}
+
+/** A DHCPv6 session state */
+struct dhcpv6_session_state {
+ /** Current transmitted packet type */
+ uint8_t tx_type;
+ /** Current expected received packet type */
+ uint8_t rx_type;
+ /** Flags */
+ uint8_t flags;
+ /** Next state (or NULL to terminate) */
+ struct dhcpv6_session_state *next;
+};
+
+/** DHCPv6 session state flags */
+enum dhcpv6_session_state_flags {
+ /** Include identity association within request */
+ DHCPV6_TX_IA_NA = 0x01,
+ /** Include leased IPv6 address within request */
+ DHCPV6_TX_IAADDR = 0x02,
+ /** Record received server ID */
+ DHCPV6_RX_RECORD_SERVER_ID = 0x04,
+ /** Record received IPv6 address */
+ DHCPV6_RX_RECORD_IAADDR = 0x08,
+ /** Apply received IPv6 address */
+ DHCPV6_RX_APPLY_IAADDR = 0x10,
+};
+
+/** DHCPv6 request state */
+static struct dhcpv6_session_state dhcpv6_request = {
+ .tx_type = DHCPV6_REQUEST,
+ .rx_type = DHCPV6_REPLY,
+ .flags = ( DHCPV6_TX_IA_NA | DHCPV6_TX_IAADDR |
+ DHCPV6_RX_RECORD_IAADDR | DHCPV6_RX_APPLY_IAADDR ),
+ .next = NULL,
+};
+
+/** DHCPv6 solicitation state */
+static struct dhcpv6_session_state dhcpv6_solicit = {
+ .tx_type = DHCPV6_SOLICIT,
+ .rx_type = DHCPV6_ADVERTISE,
+ .flags = ( DHCPV6_TX_IA_NA | DHCPV6_RX_RECORD_SERVER_ID |
+ DHCPV6_RX_RECORD_IAADDR ),
+ .next = &dhcpv6_request,
+};
+
+/** DHCPv6 information request state */
+static struct dhcpv6_session_state dhcpv6_information_request = {
+ .tx_type = DHCPV6_INFORMATION_REQUEST,
+ .rx_type = DHCPV6_REPLY,
+ .flags = 0,
+ .next = NULL,
+};
+
+/** A DHCPv6 session */
+struct dhcpv6_session {
+ /** Reference counter */
+ struct refcnt refcnt;
+ /** Job control interface */
+ struct interface job;
+ /** Data transfer interface */
+ struct interface xfer;
+
+ /** Network device being configured */
+ struct net_device *netdev;
+ /** Transaction ID */
+ uint8_t xid[3];
+ /** Identity association ID */
+ uint32_t iaid;
+ /** Start time (in ticks) */
+ unsigned long start;
+ /** Client DUID */
+ struct dhcpv6_duid_uuid client_duid;
+ /** Server DUID, if known */
+ void *server_duid;
+ /** Server DUID length */
+ size_t server_duid_len;
+ /** Leased IPv6 address */
+ struct in6_addr lease;
+
+ /** Retransmission timer */
+ struct retry_timer timer;
+
+ /** Current session state */
+ struct dhcpv6_session_state *state;
+ /** Current timeout status code */
+ int rc;
+};
+
+/**
+ * Free DHCPv6 session
+ *
+ * @v refcnt Reference count
+ */
+static void dhcpv6_free ( struct refcnt *refcnt ) {
+ struct dhcpv6_session *dhcpv6 =
+ container_of ( refcnt, struct dhcpv6_session, refcnt );
+
+ netdev_put ( dhcpv6->netdev );
+ free ( dhcpv6->server_duid );
+ free ( dhcpv6 );
+}
+
+/**
+ * Terminate DHCPv6 session
+ *
+ * @v dhcpv6 DHCPv6 session
+ * @v rc Reason for close
+ */
+static void dhcpv6_finished ( struct dhcpv6_session *dhcpv6, int rc ) {
+
+ /* Stop timer */
+ stop_timer ( &dhcpv6->timer );
+
+ /* Shut down interfaces */
+ intf_shutdown ( &dhcpv6->xfer, rc );
+ intf_shutdown ( &dhcpv6->job, rc );
+}
+
+/**
+ * Transition to new DHCPv6 session state
+ *
+ * @v dhcpv6 DHCPv6 session
+ * @v state New session state
+ */
+static void dhcpv6_set_state ( struct dhcpv6_session *dhcpv6,
+ struct dhcpv6_session_state *state ) {
+
+ DBGC ( dhcpv6, "DHCPv6 %s entering %s state\n", dhcpv6->netdev->name,
+ dhcpv6_type_name ( state->tx_type ) );
+
+ /* Record state */
+ dhcpv6->state = state;
+
+ /* Default to -ETIMEDOUT if no more specific error is recorded */
+ dhcpv6->rc = -ETIMEDOUT;
+
+ /* Start timer to trigger transmission */
+ start_timer_nodelay ( &dhcpv6->timer );
+}
+
+/**
+ * Get DHCPv6 user class
+ *
+ * @v data Data buffer
+ * @v len Length of data buffer
+ * @ret len Length of user class
+ */
+static size_t dhcpv6_user_class ( void *data, size_t len ) {
+ static const char default_user_class[4] = { 'i', 'P', 'X', 'E' };
+ int actual_len;
+
+ /* Fetch user-class setting, if defined */
+ actual_len = fetch_raw_setting ( NULL, &user_class_setting, data, len );
+ if ( actual_len >= 0 )
+ return actual_len;
+
+ /* Otherwise, use the default user class ("iPXE") */
+ if ( len > sizeof ( default_user_class ) )
+ len = sizeof ( default_user_class );
+ memcpy ( data, default_user_class, len );
+ return sizeof ( default_user_class );
+}
+
+/**
+ * Transmit current request
+ *
+ * @v dhcpv6 DHCPv6 session
+ * @ret rc Return status code
+ */
+static int dhcpv6_tx ( struct dhcpv6_session *dhcpv6 ) {
+ struct dhcpv6_duid_option *client_id;
+ struct dhcpv6_duid_option *server_id;
+ struct dhcpv6_ia_na_option *ia_na;
+ struct dhcpv6_iaaddr_option *iaaddr;
+ struct dhcpv6_option_request_option *option_request;
+ struct dhcpv6_user_class_option *user_class;
+ struct dhcpv6_elapsed_time_option *elapsed;
+ struct dhcpv6_header *dhcphdr;
+ struct io_buffer *iobuf;
+ size_t client_id_len;
+ size_t server_id_len;
+ size_t ia_na_len;
+ size_t option_request_len;
+ size_t user_class_string_len;
+ size_t user_class_len;
+ size_t elapsed_len;
+ size_t total_len;
+ int rc;
+
+ /* Calculate lengths */
+ client_id_len = ( sizeof ( *client_id ) +
+ sizeof ( dhcpv6->client_duid ) );
+ server_id_len = ( dhcpv6->server_duid ? ( sizeof ( *server_id ) +
+ dhcpv6->server_duid_len ) :0);
+ if ( dhcpv6->state->flags & DHCPV6_TX_IA_NA ) {
+ ia_na_len = sizeof ( *ia_na );
+ if ( dhcpv6->state->flags & DHCPV6_TX_IAADDR )
+ ia_na_len += sizeof ( *iaaddr );
+ } else {
+ ia_na_len = 0;
+ }
+ option_request_len = ( sizeof ( *option_request ) +
+ sizeof ( dhcpv6_requested_options ) );
+ user_class_string_len = dhcpv6_user_class ( NULL, 0 );
+ user_class_len = ( sizeof ( *user_class ) +
+ sizeof ( user_class->user_class[0] ) +
+ user_class_string_len );
+ elapsed_len = sizeof ( *elapsed );
+ total_len = ( sizeof ( *dhcphdr ) + client_id_len + server_id_len +
+ ia_na_len + option_request_len + user_class_len +
+ elapsed_len );
+
+ /* Allocate packet */
+ iobuf = xfer_alloc_iob ( &dhcpv6->xfer, total_len );
+ if ( ! iobuf )
+ return -ENOMEM;
+
+ /* Construct header */
+ dhcphdr = iob_put ( iobuf, sizeof ( *dhcphdr ) );
+ dhcphdr->type = dhcpv6->state->tx_type;
+ memcpy ( dhcphdr->xid, dhcpv6->xid, sizeof ( dhcphdr->xid ) );
+
+ /* Construct client identifier */
+ client_id = iob_put ( iobuf, client_id_len );
+ client_id->header.code = htons ( DHCPV6_CLIENT_ID );
+ client_id->header.len = htons ( client_id_len -
+ sizeof ( client_id->header ) );
+ memcpy ( client_id->duid, &dhcpv6->client_duid,
+ sizeof ( dhcpv6->client_duid ) );
+
+ /* Construct server identifier, if applicable */
+ if ( server_id_len ) {
+ server_id = iob_put ( iobuf, server_id_len );
+ server_id->header.code = htons ( DHCPV6_SERVER_ID );
+ server_id->header.len = htons ( server_id_len -
+ sizeof ( server_id->header ) );
+ memcpy ( server_id->duid, dhcpv6->server_duid,
+ dhcpv6->server_duid_len );
+ }
+
+ /* Construct identity association, if applicable */
+ if ( ia_na_len ) {
+ ia_na = iob_put ( iobuf, ia_na_len );
+ ia_na->header.code = htons ( DHCPV6_IA_NA );
+ ia_na->header.len = htons ( ia_na_len -
+ sizeof ( ia_na->header ) );
+ ia_na->iaid = htonl ( dhcpv6->iaid );
+ ia_na->renew = htonl ( 0 );
+ ia_na->rebind = htonl ( 0 );
+ if ( dhcpv6->state->flags & DHCPV6_TX_IAADDR ) {
+ iaaddr = ( ( void * ) ia_na->options );
+ iaaddr->header.code = htons ( DHCPV6_IAADDR );
+ iaaddr->header.len = htons ( sizeof ( *iaaddr ) -
+ sizeof ( iaaddr->header ));
+ memcpy ( &iaaddr->address, &dhcpv6->lease,
+ sizeof ( iaaddr->address ) );
+ iaaddr->preferred = htonl ( 0 );
+ iaaddr->valid = htonl ( 0 );
+ }
+ }
+
+ /* Construct option request */
+ option_request = iob_put ( iobuf, option_request_len );
+ option_request->header.code = htons ( DHCPV6_OPTION_REQUEST );
+ option_request->header.len = htons ( option_request_len -
+ sizeof ( option_request->header ));
+ memcpy ( option_request->requested, dhcpv6_requested_options,
+ sizeof ( dhcpv6_requested_options ) );
+
+ /* Construct user class */
+ user_class = iob_put ( iobuf, user_class_len );
+ user_class->header.code = htons ( DHCPV6_USER_CLASS );
+ user_class->header.len = htons ( user_class_len -
+ sizeof ( user_class->header ) );
+ user_class->user_class[0].len = htons ( user_class_string_len );
+ dhcpv6_user_class ( user_class->user_class[0].string,
+ user_class_string_len );
+
+ /* Construct elapsed time */
+ elapsed = iob_put ( iobuf, elapsed_len );
+ elapsed->header.code = htons ( DHCPV6_ELAPSED_TIME );
+ elapsed->header.len = htons ( elapsed_len -
+ sizeof ( elapsed->header ) );
+ elapsed->elapsed = htons ( ( ( currticks() - dhcpv6->start ) * 100 ) /
+ TICKS_PER_SEC );
+
+ /* Sanity check */
+ assert ( iob_len ( iobuf ) == total_len );
+
+ /* Transmit packet */
+ if ( ( rc = xfer_deliver_iob ( &dhcpv6->xfer, iobuf ) ) != 0 ) {
+ DBGC ( dhcpv6, "DHCPv6 %s could not transmit: %s\n",
+ dhcpv6->netdev->name, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Handle timer expiry
+ *
+ * @v timer Retransmission timer
+ * @v fail Failure indicator
+ */
+static void dhcpv6_timer_expired ( struct retry_timer *timer, int fail ) {
+ struct dhcpv6_session *dhcpv6 =
+ container_of ( timer, struct dhcpv6_session, timer );
+
+ /* If we have failed, terminate DHCPv6 */
+ if ( fail ) {
+ dhcpv6_finished ( dhcpv6, dhcpv6->rc );
+ return;
+ }
+
+ /* Restart timer */
+ start_timer ( &dhcpv6->timer );
+
+ /* (Re)transmit current request */
+ dhcpv6_tx ( dhcpv6 );
+}
+
+/**
+ * Receive new data
+ *
+ * @v dhcpv6 DHCPv6 session
+ * @v iobuf I/O buffer
+ * @v meta Data transfer metadata
+ * @ret rc Return status code
+ */
+static int dhcpv6_rx ( struct dhcpv6_session *dhcpv6,
+ struct io_buffer *iobuf,
+ struct xfer_metadata *meta ) {
+ struct settings *parent = netdev_settings ( dhcpv6->netdev );
+ struct sockaddr_in6 *src = ( ( struct sockaddr_in6 * ) meta->src );
+ struct dhcpv6_header *dhcphdr = iobuf->data;
+ struct dhcpv6_option_list options;
+ const union dhcpv6_any_option *option;
+ int rc;
+
+ /* Sanity checks */
+ if ( iob_len ( iobuf ) < sizeof ( *dhcphdr ) ) {
+ DBGC ( dhcpv6, "DHCPv6 %s received packet too short (%zd "
+ "bytes, min %zd bytes)\n", dhcpv6->netdev->name,
+ iob_len ( iobuf ), sizeof ( *dhcphdr ) );
+ rc = -EINVAL;
+ goto done;
+ }
+ assert ( src != NULL );
+ assert ( src->sin6_family == AF_INET6 );
+ DBGC ( dhcpv6, "DHCPv6 %s received %s from %s\n",
+ dhcpv6->netdev->name, dhcpv6_type_name ( dhcphdr->type ),
+ inet6_ntoa ( &src->sin6_addr ) );
+
+ /* Construct option list */
+ options.data = dhcphdr->options;
+ options.len = ( iob_len ( iobuf ) -
+ offsetof ( typeof ( *dhcphdr ), options ) );
+
+ /* Verify client identifier */
+ if ( ( rc = dhcpv6_check_duid ( &options, DHCPV6_CLIENT_ID,
+ &dhcpv6->client_duid,
+ sizeof ( dhcpv6->client_duid ) ) ) !=0){
+ DBGC ( dhcpv6, "DHCPv6 %s received %s without correct client "
+ "ID: %s\n", dhcpv6->netdev->name,
+ dhcpv6_type_name ( dhcphdr->type ), strerror ( rc ) );
+ goto done;
+ }
+
+ /* Verify server identifier, if applicable */
+ if ( dhcpv6->server_duid &&
+ ( ( rc = dhcpv6_check_duid ( &options, DHCPV6_SERVER_ID,
+ dhcpv6->server_duid,
+ dhcpv6->server_duid_len ) ) != 0 ) ) {
+ DBGC ( dhcpv6, "DHCPv6 %s received %s without correct server "
+ "ID: %s\n", dhcpv6->netdev->name,
+ dhcpv6_type_name ( dhcphdr->type ), strerror ( rc ) );
+ goto done;
+ }
+
+ /* Check message type */
+ if ( dhcphdr->type != dhcpv6->state->rx_type ) {
+ DBGC ( dhcpv6, "DHCPv6 %s received %s while expecting %s\n",
+ dhcpv6->netdev->name, dhcpv6_type_name ( dhcphdr->type ),
+ dhcpv6_type_name ( dhcpv6->state->rx_type ) );
+ rc = -ENOTTY;
+ goto done;
+ }
+
+ /* Fetch status code, if present */
+ if ( ( rc = dhcpv6_status_code ( &options ) ) != 0 ) {
+ DBGC ( dhcpv6, "DHCPv6 %s received %s with error status: %s\n",
+ dhcpv6->netdev->name, dhcpv6_type_name ( dhcphdr->type ),
+ strerror ( rc ) );
+ /* This is plausibly the error we want to return */
+ dhcpv6->rc = rc;
+ goto done;
+ }
+
+ /* Record identity association address, if applicable */
+ if ( dhcpv6->state->flags & DHCPV6_RX_RECORD_IAADDR ) {
+ if ( ( rc = dhcpv6_iaaddr ( &options, dhcpv6->iaid,
+ &dhcpv6->lease ) ) != 0 ) {
+ DBGC ( dhcpv6, "DHCPv6 %s received %s with unusable "
+ "IAADDR: %s\n", dhcpv6->netdev->name,
+ dhcpv6_type_name ( dhcphdr->type ),
+ strerror ( rc ) );
+ /* This is plausibly the error we want to return */
+ dhcpv6->rc = rc;
+ goto done;
+ }
+ DBGC ( dhcpv6, "DHCPv6 %s received %s is for %s\n",
+ dhcpv6->netdev->name, dhcpv6_type_name ( dhcphdr->type ),
+ inet6_ntoa ( &dhcpv6->lease ) );
+ }
+
+ /* Record server ID, if applicable */
+ if ( dhcpv6->state->flags & DHCPV6_RX_RECORD_SERVER_ID ) {
+ assert ( dhcpv6->server_duid == NULL );
+ option = dhcpv6_option ( &options, DHCPV6_SERVER_ID );
+ if ( ! option ) {
+ DBGC ( dhcpv6, "DHCPv6 %s received %s missing server "
+ "ID\n", dhcpv6->netdev->name,
+ dhcpv6_type_name ( dhcphdr->type ) );
+ rc = -EINVAL;
+ goto done;
+ }
+ dhcpv6->server_duid_len = ntohs ( option->duid.header.len );
+ dhcpv6->server_duid = malloc ( dhcpv6->server_duid_len );
+ if ( ! dhcpv6->server_duid ) {
+ rc = -ENOMEM;
+ goto done;
+ }
+ memcpy ( dhcpv6->server_duid, option->duid.duid,
+ dhcpv6->server_duid_len );
+ }
+
+ /* Apply identity association address, if applicable */
+ if ( dhcpv6->state->flags & DHCPV6_RX_APPLY_IAADDR ) {
+ if ( ( rc = ipv6_set_address ( dhcpv6->netdev,
+ &dhcpv6->lease ) ) != 0 ) {
+ DBGC ( dhcpv6, "DHCPv6 %s could not apply %s: %s\n",
+ dhcpv6->netdev->name,
+ inet6_ntoa ( &dhcpv6->lease ), strerror ( rc ) );
+ /* This is plausibly the error we want to return */
+ dhcpv6->rc = rc;
+ goto done;
+ }
+ }
+
+ /* Transition to next state or complete DHCPv6, as applicable */
+ if ( dhcpv6->state->next ) {
+
+ /* Transition to next state */
+ dhcpv6_set_state ( dhcpv6, dhcpv6->state->next );
+ rc = 0;
+
+ } else {
+
+ /* Register settings */
+ if ( ( rc = dhcpv6_register ( &options, parent ) ) != 0 ) {
+ DBGC ( dhcpv6, "DHCPv6 %s could not register "
+ "settings: %s\n", dhcpv6->netdev->name,
+ strerror ( rc ) );
+ goto done;
+ }
+
+ /* Mark as complete */
+ dhcpv6_finished ( dhcpv6, 0 );
+ DBGC ( dhcpv6, "DHCPv6 %s complete\n", dhcpv6->netdev->name );
+ }
+
+ done:
+ free_iob ( iobuf );
+ return rc;
+}
+
+/** DHCPv6 job control interface operations */
+static struct interface_operation dhcpv6_job_op[] = {
+ INTF_OP ( intf_close, struct dhcpv6_session *, dhcpv6_finished ),
+};
+
+/** DHCPv6 job control interface descriptor */
+static struct interface_descriptor dhcpv6_job_desc =
+ INTF_DESC ( struct dhcpv6_session, job, dhcpv6_job_op );
+
+/** DHCPv6 data transfer interface operations */
+static struct interface_operation dhcpv6_xfer_op[] = {
+ INTF_OP ( xfer_deliver, struct dhcpv6_session *, dhcpv6_rx ),
+};
+
+/** DHCPv6 data transfer interface descriptor */
+static struct interface_descriptor dhcpv6_xfer_desc =
+ INTF_DESC ( struct dhcpv6_session, xfer, dhcpv6_xfer_op );
+
+/**
+ * Start DHCPv6
+ *
+ * @v job Job control interface
+ * @v netdev Network device
+ * @v stateful Perform stateful address autoconfiguration
+ * @ret rc Return status code
+ */
+int start_dhcpv6 ( struct interface *job, struct net_device *netdev,
+ int stateful ) {
+ struct ll_protocol *ll_protocol = netdev->ll_protocol;
+ struct dhcpv6_session *dhcpv6;
+ struct {
+ union {
+ struct sockaddr_in6 sin6;
+ struct sockaddr sa;
+ } client;
+ union {
+ struct sockaddr_in6 sin6;
+ struct sockaddr sa;
+ } server;
+ } addresses;
+ uint32_t xid;
+ int len;
+ int rc;
+
+ /* Allocate and initialise structure */
+ dhcpv6 = zalloc ( sizeof ( *dhcpv6 ) );
+ if ( ! dhcpv6 )
+ return -ENOMEM;
+ ref_init ( &dhcpv6->refcnt, dhcpv6_free );
+ intf_init ( &dhcpv6->job, &dhcpv6_job_desc, &dhcpv6->refcnt );
+ intf_init ( &dhcpv6->xfer, &dhcpv6_xfer_desc, &dhcpv6->refcnt );
+ dhcpv6->netdev = netdev_get ( netdev );
+ xid = random();
+ memcpy ( dhcpv6->xid, &xid, sizeof ( dhcpv6->xid ) );
+ dhcpv6->start = currticks();
+ timer_init ( &dhcpv6->timer, dhcpv6_timer_expired, &dhcpv6->refcnt );
+
+ /* Construct client and server addresses */
+ memset ( &addresses, 0, sizeof ( addresses ) );
+ addresses.client.sin6.sin6_family = AF_INET6;
+ addresses.client.sin6.sin6_port = htons ( DHCPV6_CLIENT_PORT );
+ addresses.server.sin6.sin6_family = AF_INET6;
+ ipv6_all_dhcp_relay_and_servers ( &addresses.server.sin6.sin6_addr );
+ addresses.server.sin6.sin6_scope_id = netdev->index;
+ addresses.server.sin6.sin6_port = htons ( DHCPV6_SERVER_PORT );
+
+ /* Construct client DUID from system UUID */
+ dhcpv6->client_duid.type = htons ( DHCPV6_DUID_UUID );
+ if ( ( len = fetch_uuid_setting ( NULL, &uuid_setting,
+ &dhcpv6->client_duid.uuid ) ) < 0 ) {
+ rc = len;
+ DBGC ( dhcpv6, "DHCPv6 %s could not create DUID-UUID: %s\n",
+ dhcpv6->netdev->name, strerror ( rc ) );
+ goto err_client_duid;
+ }
+
+ /* Construct IAID from link-layer address */
+ dhcpv6->iaid = crc32_le ( 0, netdev->ll_addr, ll_protocol->ll_addr_len);
+ DBGC ( dhcpv6, "DHCPv6 %s has XID %02x%02x%02x\n", dhcpv6->netdev->name,
+ dhcpv6->xid[0], dhcpv6->xid[1], dhcpv6->xid[2] );
+
+ /* Enter initial state */
+ dhcpv6_set_state ( dhcpv6, ( stateful ? &dhcpv6_solicit :
+ &dhcpv6_information_request ) );
+
+ /* Open socket */
+ if ( ( rc = xfer_open_socket ( &dhcpv6->xfer, SOCK_DGRAM,
+ &addresses.server.sa,
+ &addresses.client.sa ) ) != 0 ) {
+ DBGC ( dhcpv6, "DHCPv6 %s could not open socket: %s\n",
+ dhcpv6->netdev->name, strerror ( rc ) );
+ goto err_open_socket;
+ }
+
+ /* Attach parent interface, mortalise self, and return */
+ intf_plug_plug ( &dhcpv6->job, job );
+ ref_put ( &dhcpv6->refcnt );
+ return 0;
+
+ err_open_socket:
+ dhcpv6_finished ( dhcpv6, rc );
+ err_client_duid:
+ ref_put ( &dhcpv6->refcnt );
+ return rc;
+}
+
+/** Boot filename setting */
+const struct setting filename6_setting __setting ( SETTING_BOOT, filename ) = {
+ .name = "filename",
+ .description = "Boot filename",
+ .tag = DHCPV6_BOOTFILE_URL,
+ .type = &setting_type_string,
+ .scope = &ipv6_scope,
+};
+
+/** DNS search list setting */
+const struct setting dnssl6_setting __setting ( SETTING_IP_EXTRA, dnssl ) = {
+ .name = "dnssl",
+ .description = "DNS search list",
+ .tag = DHCPV6_DOMAIN_LIST,
+ .type = &setting_type_dnssl,
+ .scope = &ipv6_scope,
+};
diff --git a/qemu/roms/ipxe/src/net/udp/dns.c b/qemu/roms/ipxe/src/net/udp/dns.c
new file mode 100644
index 000000000..fffe6e697
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/udp/dns.c
@@ -0,0 +1,1152 @@
+/*
+ * Copyright (C) 2006 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * Portions copyright (C) 2004 Anselm M. Hoffmeister
+ * <stockholm@users.sourceforge.net>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <errno.h>
+#include <byteswap.h>
+#include <ipxe/refcnt.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/xfer.h>
+#include <ipxe/open.h>
+#include <ipxe/resolv.h>
+#include <ipxe/retry.h>
+#include <ipxe/tcpip.h>
+#include <ipxe/settings.h>
+#include <ipxe/features.h>
+#include <ipxe/dhcp.h>
+#include <ipxe/dhcpv6.h>
+#include <ipxe/dns.h>
+
+/** @file
+ *
+ * DNS protocol
+ *
+ */
+
+FEATURE ( FEATURE_PROTOCOL, "DNS", DHCP_EB_FEATURE_DNS, 1 );
+
+/* Disambiguate the various error causes */
+#define ENXIO_NO_RECORD __einfo_error ( EINFO_ENXIO_NO_RECORD )
+#define EINFO_ENXIO_NO_RECORD \
+ __einfo_uniqify ( EINFO_ENXIO, 0x01, "DNS name does not exist" )
+#define ENXIO_NO_NAMESERVER __einfo_error ( EINFO_ENXIO_NO_NAMESERVER )
+#define EINFO_ENXIO_NO_NAMESERVER \
+ __einfo_uniqify ( EINFO_ENXIO, 0x02, "No DNS servers available" )
+
+/** The DNS server */
+static union {
+ struct sockaddr sa;
+ struct sockaddr_tcpip st;
+ struct sockaddr_in sin;
+ struct sockaddr_in6 sin6;
+} nameserver = {
+ .st = {
+ .st_port = htons ( DNS_PORT ),
+ },
+};
+
+/** The DNS search list */
+static struct dns_name dns_search;
+
+/**
+ * Encode a DNS name using RFC1035 encoding
+ *
+ * @v string DNS name as a string
+ * @v name DNS name to fill in
+ * @ret len Length of DNS name, or negative error
+ */
+int dns_encode ( const char *string, struct dns_name *name ) {
+ uint8_t *start = ( name->data + name->offset );
+ uint8_t *end = ( name->data + name->len );
+ uint8_t *dst = start;
+ size_t len = 0;
+ char c;
+
+ /* Encode name */
+ while ( ( c = *(string++) ) ) {
+
+ /* Handle '.' separators */
+ if ( c == '.' ) {
+
+ /* Reject consecutive '.' */
+ if ( ( len == 0 ) && ( dst != start ) )
+ return -EINVAL;
+
+ /* Terminate if this is the trailing '.' */
+ if ( *string == '\0' )
+ break;
+
+ /* Reject initial non-terminating '.' */
+ if ( len == 0 )
+ return -EINVAL;
+
+ /* Reset length */
+ len = 0;
+
+ } else {
+
+ /* Increment length */
+ len++;
+
+ /* Check for overflow */
+ if ( len > DNS_MAX_LABEL_LEN )
+ return -EINVAL;
+ }
+
+ /* Copy byte, update length */
+ if ( ++dst < end ) {
+ *dst = c;
+ dst[-len] = len;
+ }
+ }
+
+ /* Add terminating root marker */
+ if ( len )
+ dst++;
+ if ( dst < end )
+ *dst = '\0';
+ dst++;
+
+ return ( dst - start );
+}
+
+/**
+ * Find start of valid label within an RFC1035-encoded DNS name
+ *
+ * @v name DNS name
+ * @v offset Current offset
+ * @ret offset Offset of label, or negative error
+ */
+static int dns_label ( struct dns_name *name, size_t offset ) {
+ const uint8_t *byte;
+ const uint16_t *word;
+ size_t len;
+ size_t ptr;
+
+ while ( 1 ) {
+
+ /* Fail if we have overrun the DNS name */
+ if ( ( offset + sizeof ( *byte) ) > name->len )
+ return -EINVAL;
+ byte = ( name->data + offset );
+
+ /* Follow compression pointer, if applicable */
+ if ( DNS_IS_COMPRESSED ( *byte ) ) {
+
+ /* Fail if we have overrun the DNS name */
+ if ( ( offset + sizeof ( *word ) ) > name->len )
+ return -EINVAL;
+ word = ( name->data + offset );
+
+ /* Extract pointer to new offset */
+ ptr = DNS_COMPRESSED_OFFSET ( ntohs ( *word ) );
+
+ /* Fail if pointer does not point backwards.
+ * (This guarantees termination of the
+ * function.)
+ */
+ if ( ptr >= offset )
+ return -EINVAL;
+
+ /* Continue from new offset */
+ offset = ptr;
+ continue;
+ }
+
+ /* Fail if we have overrun the DNS name */
+ len = *byte;
+ if ( ( offset + sizeof ( *byte ) + len ) > name->len )
+ return -EINVAL;
+
+ /* We have a valid label */
+ return offset;
+ }
+}
+
+/**
+ * Decode RFC1035-encoded DNS name
+ *
+ * @v name DNS name
+ * @v data Output buffer
+ * @v len Length of output buffer
+ * @ret len Length of decoded DNS name, or negative error
+ */
+int dns_decode ( struct dns_name *name, char *data, size_t len ) {
+ unsigned int recursion_limit = name->len; /* Generous upper bound */
+ int offset = name->offset;
+ const uint8_t *label;
+ size_t decoded_len = 0;
+ size_t label_len;
+ size_t copy_len;
+
+ while ( recursion_limit-- ) {
+
+ /* Find valid DNS label */
+ offset = dns_label ( name, offset );
+ if ( offset < 0 )
+ return offset;
+
+ /* Terminate if we have reached the root */
+ label = ( name->data + offset );
+ label_len = *(label++);
+ if ( label_len == 0 ) {
+ if ( decoded_len < len )
+ *data = '\0';
+ return decoded_len;
+ }
+
+ /* Prepend '.' if applicable */
+ if ( decoded_len && ( decoded_len++ < len ) )
+ *(data++) = '.';
+
+ /* Copy label to output buffer */
+ copy_len = ( ( decoded_len < len ) ? ( len - decoded_len ) : 0);
+ if ( copy_len > label_len )
+ copy_len = label_len;
+ memcpy ( data, label, copy_len );
+ data += copy_len;
+ decoded_len += label_len;
+
+ /* Move to next label */
+ offset += ( sizeof ( *label ) + label_len );
+ }
+
+ /* Recursion limit exceeded */
+ return -EINVAL;
+}
+
+/**
+ * Compare DNS names for equality
+ *
+ * @v first First DNS name
+ * @v second Second DNS name
+ * @ret rc Return status code
+ */
+int dns_compare ( struct dns_name *first, struct dns_name *second ) {
+ unsigned int recursion_limit = first->len; /* Generous upper bound */
+ int first_offset = first->offset;
+ int second_offset = second->offset;
+ const uint8_t *first_label;
+ const uint8_t *second_label;
+ size_t label_len;
+ size_t len;
+
+ while ( recursion_limit-- ) {
+
+ /* Find valid DNS labels */
+ first_offset = dns_label ( first, first_offset );
+ if ( first_offset < 0 )
+ return first_offset;
+ second_offset = dns_label ( second, second_offset );
+ if ( second_offset < 0 )
+ return second_offset;
+
+ /* Compare label lengths */
+ first_label = ( first->data + first_offset );
+ second_label = ( second->data + second_offset );
+ label_len = *(first_label++);
+ if ( label_len != *(second_label++) )
+ return -ENOENT;
+ len = ( sizeof ( *first_label ) + label_len );
+
+ /* Terminate if we have reached the root */
+ if ( label_len == 0 )
+ return 0;
+
+ /* Compare label contents (case-insensitively) */
+ while ( label_len-- ) {
+ if ( tolower ( *(first_label++) ) !=
+ tolower ( *(second_label++) ) )
+ return -ENOENT;
+ }
+
+ /* Move to next labels */
+ first_offset += len;
+ second_offset += len;
+ }
+
+ /* Recursion limit exceeded */
+ return -EINVAL;
+}
+
+/**
+ * Copy a DNS name
+ *
+ * @v src Source DNS name
+ * @v dst Destination DNS name
+ * @ret len Length of copied DNS name, or negative error
+ */
+int dns_copy ( struct dns_name *src, struct dns_name *dst ) {
+ unsigned int recursion_limit = src->len; /* Generous upper bound */
+ int src_offset = src->offset;
+ size_t dst_offset = dst->offset;
+ const uint8_t *label;
+ size_t label_len;
+ size_t copy_len;
+ size_t len;
+
+ while ( recursion_limit-- ) {
+
+ /* Find valid DNS label */
+ src_offset = dns_label ( src, src_offset );
+ if ( src_offset < 0 )
+ return src_offset;
+
+ /* Copy as an uncompressed label */
+ label = ( src->data + src_offset );
+ label_len = *label;
+ len = ( sizeof ( *label ) + label_len );
+ copy_len = ( ( dst_offset < dst->len ) ?
+ ( dst->len - dst_offset ) : 0 );
+ if ( copy_len > len )
+ copy_len = len;
+ memcpy ( ( dst->data + dst_offset ), label, copy_len );
+ src_offset += len;
+ dst_offset += len;
+
+ /* Terminate if we have reached the root */
+ if ( label_len == 0 )
+ return ( dst_offset - dst->offset );
+ }
+
+ /* Recursion limit exceeded */
+ return -EINVAL;
+}
+
+/**
+ * Skip RFC1035-encoded DNS name
+ *
+ * @v name DNS name
+ * @ret offset Offset to next name, or negative error
+ */
+int dns_skip ( struct dns_name *name ) {
+ unsigned int recursion_limit = name->len; /* Generous upper bound */
+ int offset = name->offset;
+ int prev_offset;
+ const uint8_t *label;
+ size_t label_len;
+
+ while ( recursion_limit-- ) {
+
+ /* Find valid DNS label */
+ prev_offset = offset;
+ offset = dns_label ( name, prev_offset );
+ if ( offset < 0 )
+ return offset;
+
+ /* Terminate if we have reached a compression pointer */
+ if ( offset != prev_offset )
+ return ( prev_offset + sizeof ( uint16_t ) );
+
+ /* Skip this label */
+ label = ( name->data + offset );
+ label_len = *label;
+ offset += ( sizeof ( *label ) + label_len );
+
+ /* Terminate if we have reached the root */
+ if ( label_len == 0 )
+ return offset;
+ }
+
+ /* Recursion limit exceeded */
+ return -EINVAL;
+}
+
+/**
+ * Skip RFC1035-encoded DNS name in search list
+ *
+ * @v name DNS name
+ * @ret offset Offset to next non-empty name, or negative error
+ */
+static int dns_skip_search ( struct dns_name *name ) {
+ int offset;
+
+ /* Find next name */
+ offset = dns_skip ( name );
+ if ( offset < 0 )
+ return offset;
+
+ /* Skip over any subsequent empty names (e.g. due to padding
+ * bytes used in the NDP DNSSL option).
+ */
+ while ( ( offset < ( ( int ) name->len ) ) &&
+ ( *( ( uint8_t * ) ( name->data + offset ) ) == 0 ) ) {
+ offset++;
+ }
+
+ return offset;
+}
+
+/**
+ * Transcribe DNS name (for debugging)
+ *
+ * @v name DNS name
+ * @ret string Transcribed DNS name
+ */
+static const char * dns_name ( struct dns_name *name ) {
+ static char buf[256];
+ int len;
+
+ len = dns_decode ( name, buf, sizeof ( buf ) );
+ return ( ( len < 0 ) ? "<INVALID>" : buf );
+}
+
+/**
+ * Name a DNS query type (for debugging)
+ *
+ * @v type Query type (in network byte order)
+ * @ret name Type name
+ */
+static const char * dns_type ( uint16_t type ) {
+ switch ( type ) {
+ case htons ( DNS_TYPE_A ): return "A";
+ case htons ( DNS_TYPE_AAAA ): return "AAAA";
+ case htons ( DNS_TYPE_CNAME ): return "CNAME";
+ default: return "<UNKNOWN>";
+ }
+}
+
+/** A DNS request */
+struct dns_request {
+ /** Reference counter */
+ struct refcnt refcnt;
+ /** Name resolution interface */
+ struct interface resolv;
+ /** Data transfer interface */
+ struct interface socket;
+ /** Retry timer */
+ struct retry_timer timer;
+
+ /** Socket address to fill in with resolved address */
+ union {
+ struct sockaddr sa;
+ struct sockaddr_in sin;
+ struct sockaddr_in6 sin6;
+ } address;
+ /** Initial query type */
+ uint16_t qtype;
+ /** Buffer for current query */
+ struct {
+ /** Query header */
+ struct dns_header query;
+ /** Name buffer */
+ char name[DNS_MAX_NAME_LEN];
+ /** Space for question */
+ struct dns_question padding;
+ } __attribute__ (( packed )) buf;
+ /** Current query name */
+ struct dns_name name;
+ /** Question within current query */
+ struct dns_question *question;
+ /** Length of current query */
+ size_t len;
+ /** Offset of search suffix within current query */
+ size_t offset;
+ /** Search list */
+ struct dns_name search;
+ /** Recursion counter */
+ unsigned int recursion;
+};
+
+/**
+ * Mark DNS request as complete
+ *
+ * @v dns DNS request
+ * @v rc Return status code
+ */
+static void dns_done ( struct dns_request *dns, int rc ) {
+
+ /* Stop the retry timer */
+ stop_timer ( &dns->timer );
+
+ /* Shut down interfaces */
+ intf_shutdown ( &dns->socket, rc );
+ intf_shutdown ( &dns->resolv, rc );
+}
+
+/**
+ * Mark DNS request as resolved and complete
+ *
+ * @v dns DNS request
+ * @v rc Return status code
+ */
+static void dns_resolved ( struct dns_request *dns ) {
+
+ DBGC ( dns, "DNS %p found address %s\n",
+ dns, sock_ntoa ( &dns->address.sa ) );
+
+ /* Return resolved address */
+ resolv_done ( &dns->resolv, &dns->address.sa );
+
+ /* Mark operation as complete */
+ dns_done ( dns, 0 );
+}
+
+/**
+ * Construct DNS question
+ *
+ * @v dns DNS request
+ * @ret rc Return status code
+ */
+static int dns_question ( struct dns_request *dns ) {
+ static struct dns_name search_root = {
+ .data = "",
+ .len = 1,
+ };
+ struct dns_name *search = &dns->search;
+ int len;
+ size_t offset;
+
+ /* Use root suffix if search list is empty */
+ if ( search->offset == search->len )
+ search = &search_root;
+
+ /* Overwrite current suffix */
+ dns->name.offset = dns->offset;
+ len = dns_copy ( search, &dns->name );
+ if ( len < 0 )
+ return len;
+
+ /* Sanity check */
+ offset = ( dns->name.offset + len );
+ if ( offset > dns->name.len ) {
+ DBGC ( dns, "DNS %p name is too long\n", dns );
+ return -EINVAL;
+ }
+
+ /* Construct question */
+ dns->question = ( ( ( void * ) &dns->buf ) + offset );
+ dns->question->qtype = dns->qtype;
+ dns->question->qclass = htons ( DNS_CLASS_IN );
+
+ /* Store length */
+ dns->len = ( offset + sizeof ( *(dns->question) ) );
+
+ /* Restore name */
+ dns->name.offset = offsetof ( typeof ( dns->buf ), name );
+
+ DBGC2 ( dns, "DNS %p question is %s type %s\n", dns,
+ dns_name ( &dns->name ), dns_type ( dns->question->qtype ) );
+
+ return 0;
+}
+
+/**
+ * Send DNS query
+ *
+ * @v dns DNS request
+ * @ret rc Return status code
+ */
+static int dns_send_packet ( struct dns_request *dns ) {
+ struct dns_header *query = &dns->buf.query;
+
+ /* Start retransmission timer */
+ start_timer ( &dns->timer );
+
+ /* Generate query identifier */
+ query->id = random();
+
+ /* Send query */
+ DBGC ( dns, "DNS %p sending query ID %#04x for %s type %s\n", dns,
+ ntohs ( query->id ), dns_name ( &dns->name ),
+ dns_type ( dns->question->qtype ) );
+
+ /* Send the data */
+ return xfer_deliver_raw ( &dns->socket, query, dns->len );
+}
+
+/**
+ * Handle DNS retransmission timer expiry
+ *
+ * @v timer Retry timer
+ * @v fail Failure indicator
+ */
+static void dns_timer_expired ( struct retry_timer *timer, int fail ) {
+ struct dns_request *dns =
+ container_of ( timer, struct dns_request, timer );
+
+ if ( fail ) {
+ dns_done ( dns, -ETIMEDOUT );
+ } else {
+ dns_send_packet ( dns );
+ }
+}
+
+/**
+ * Receive new data
+ *
+ * @v dns DNS request
+ * @v iobuf I/O buffer
+ * @v meta Data transfer metadata
+ * @ret rc Return status code
+ */
+static int dns_xfer_deliver ( struct dns_request *dns,
+ struct io_buffer *iobuf,
+ struct xfer_metadata *meta __unused ) {
+ struct dns_header *response = iobuf->data;
+ struct dns_header *query = &dns->buf.query;
+ unsigned int qtype = dns->question->qtype;
+ struct dns_name buf;
+ union dns_rr *rr;
+ int offset;
+ size_t answer_offset;
+ size_t next_offset;
+ size_t rdlength;
+ size_t name_len;
+ int rc;
+
+ /* Sanity check */
+ if ( iob_len ( iobuf ) < sizeof ( *response ) ) {
+ DBGC ( dns, "DNS %p received underlength packet length %zd\n",
+ dns, iob_len ( iobuf ) );
+ rc = -EINVAL;
+ goto done;
+ }
+
+ /* Check response ID matches query ID */
+ if ( response->id != query->id ) {
+ DBGC ( dns, "DNS %p received unexpected response ID %#04x "
+ "(wanted %d)\n", dns, ntohs ( response->id ),
+ ntohs ( query->id ) );
+ rc = -EINVAL;
+ goto done;
+ }
+ DBGC ( dns, "DNS %p received response ID %#04x\n",
+ dns, ntohs ( response->id ) );
+
+ /* Check that we have exactly one question */
+ if ( response->qdcount != htons ( 1 ) ) {
+ DBGC ( dns, "DNS %p received response with %d questions\n",
+ dns, ntohs ( response->qdcount ) );
+ rc = -EINVAL;
+ goto done;
+ }
+
+ /* Skip question section */
+ buf.data = iobuf->data;
+ buf.offset = sizeof ( *response );
+ buf.len = iob_len ( iobuf );
+ offset = dns_skip ( &buf );
+ if ( offset < 0 ) {
+ rc = offset;
+ DBGC ( dns, "DNS %p received response with malformed "
+ "question: %s\n", dns, strerror ( rc ) );
+ goto done;
+ }
+ answer_offset = ( offset + sizeof ( struct dns_question ) );
+
+ /* Search through response for useful answers. Do this
+ * multiple times, to take advantage of useful nameservers
+ * which send us e.g. the CNAME *and* the A record for the
+ * pointed-to name.
+ */
+ for ( buf.offset = answer_offset ; buf.offset != buf.len ;
+ buf.offset = next_offset ) {
+
+ /* Check for valid name */
+ offset = dns_skip ( &buf );
+ if ( offset < 0 ) {
+ rc = offset;
+ DBGC ( dns, "DNS %p received response with malformed "
+ "answer: %s\n", dns, strerror ( rc ) );
+ goto done;
+ }
+
+ /* Check for sufficient space for resource record */
+ rr = ( buf.data + offset );
+ if ( ( offset + sizeof ( rr->common ) ) > buf.len ) {
+ DBGC ( dns, "DNS %p received response with underlength "
+ "RR\n", dns );
+ rc = -EINVAL;
+ goto done;
+ }
+ rdlength = ntohs ( rr->common.rdlength );
+ next_offset = ( offset + sizeof ( rr->common ) + rdlength );
+ if ( next_offset > buf.len ) {
+ DBGC ( dns, "DNS %p received response with underlength "
+ "RR\n", dns );
+ rc = -EINVAL;
+ goto done;
+ }
+
+ /* Skip non-matching names */
+ if ( dns_compare ( &buf, &dns->name ) != 0 ) {
+ DBGC2 ( dns, "DNS %p ignoring response for %s type "
+ "%s\n", dns, dns_name ( &buf ),
+ dns_type ( rr->common.type ) );
+ continue;
+ }
+
+ /* Handle answer */
+ switch ( rr->common.type ) {
+
+ case htons ( DNS_TYPE_AAAA ):
+
+ /* Found the target AAAA record */
+ if ( rdlength < sizeof ( dns->address.sin6.sin6_addr )){
+ DBGC ( dns, "DNS %p received response with "
+ "underlength AAAA\n", dns );
+ rc = -EINVAL;
+ goto done;
+ }
+ dns->address.sin6.sin6_family = AF_INET6;
+ memcpy ( &dns->address.sin6.sin6_addr,
+ &rr->aaaa.in6_addr,
+ sizeof ( dns->address.sin6.sin6_addr ) );
+ dns_resolved ( dns );
+ rc = 0;
+ goto done;
+
+ case htons ( DNS_TYPE_A ):
+
+ /* Found the target A record */
+ if ( rdlength < sizeof ( dns->address.sin.sin_addr ) ) {
+ DBGC ( dns, "DNS %p received response with "
+ "underlength A\n", dns );
+ rc = -EINVAL;
+ goto done;
+ }
+ dns->address.sin.sin_family = AF_INET;
+ dns->address.sin.sin_addr = rr->a.in_addr;
+ dns_resolved ( dns );
+ rc = 0;
+ goto done;
+
+ case htons ( DNS_TYPE_CNAME ):
+
+ /* Terminate the operation if we recurse too far */
+ if ( ++dns->recursion > DNS_MAX_CNAME_RECURSION ) {
+ DBGC ( dns, "DNS %p recursion exceeded\n",
+ dns );
+ rc = -ELOOP;
+ dns_done ( dns, rc );
+ goto done;
+ }
+
+ /* Found a CNAME record; update query and recurse */
+ buf.offset = ( offset + sizeof ( rr->cname ) );
+ DBGC ( dns, "DNS %p found CNAME %s\n",
+ dns, dns_name ( &buf ) );
+ dns->search.offset = dns->search.len;
+ name_len = dns_copy ( &buf, &dns->name );
+ dns->offset = ( offsetof ( typeof ( dns->buf ), name ) +
+ name_len - 1 /* Strip root label */ );
+ if ( ( rc = dns_question ( dns ) ) != 0 ) {
+ dns_done ( dns, rc );
+ goto done;
+ }
+ next_offset = answer_offset;
+ break;
+
+ default:
+ DBGC ( dns, "DNS %p got unknown record type %d\n",
+ dns, ntohs ( rr->common.type ) );
+ break;
+ }
+ }
+
+ /* Stop the retry timer. After this point, each code path
+ * must either restart the timer by calling dns_send_packet(),
+ * or mark the DNS operation as complete by calling
+ * dns_done()
+ */
+ stop_timer ( &dns->timer );
+
+ /* Determine what to do next based on the type of query we
+ * issued and the response we received
+ */
+ switch ( qtype ) {
+
+ case htons ( DNS_TYPE_AAAA ):
+ /* We asked for an AAAA record and got nothing; try
+ * the A.
+ */
+ DBGC ( dns, "DNS %p found no AAAA record; trying A\n", dns );
+ dns->question->qtype = htons ( DNS_TYPE_A );
+ dns_send_packet ( dns );
+ rc = 0;
+ goto done;
+
+ case htons ( DNS_TYPE_A ):
+ /* We asked for an A record and got nothing;
+ * try the CNAME.
+ */
+ DBGC ( dns, "DNS %p found no A record; trying CNAME\n", dns );
+ dns->question->qtype = htons ( DNS_TYPE_CNAME );
+ dns_send_packet ( dns );
+ rc = 0;
+ goto done;
+
+ case htons ( DNS_TYPE_CNAME ):
+ /* We asked for a CNAME record. If we got a response
+ * (i.e. if the next AAAA/A query is already set up),
+ * then issue it.
+ */
+ if ( qtype == dns->qtype ) {
+ dns_send_packet ( dns );
+ rc = 0;
+ goto done;
+ }
+
+ /* If we have already reached the end of the search list,
+ * then terminate lookup.
+ */
+ if ( dns->search.offset == dns->search.len ) {
+ DBGC ( dns, "DNS %p found no CNAME record\n", dns );
+ rc = -ENXIO_NO_RECORD;
+ dns_done ( dns, rc );
+ goto done;
+ }
+
+ /* Move to next entry in search list. This can never fail,
+ * since we have already used this entry.
+ */
+ DBGC ( dns, "DNS %p found no CNAME record; trying next "
+ "suffix\n", dns );
+ dns->search.offset = dns_skip_search ( &dns->search );
+ if ( ( rc = dns_question ( dns ) ) != 0 ) {
+ dns_done ( dns, rc );
+ goto done;
+ }
+ dns_send_packet ( dns );
+ goto done;
+
+ default:
+ assert ( 0 );
+ rc = -EINVAL;
+ dns_done ( dns, rc );
+ goto done;
+ }
+
+ done:
+ /* Free I/O buffer */
+ free_iob ( iobuf );
+ return rc;
+}
+
+/**
+ * Receive new data
+ *
+ * @v dns DNS request
+ * @v rc Reason for close
+ */
+static void dns_xfer_close ( struct dns_request *dns, int rc ) {
+
+ if ( ! rc )
+ rc = -ECONNABORTED;
+
+ dns_done ( dns, rc );
+}
+
+/** DNS socket interface operations */
+static struct interface_operation dns_socket_operations[] = {
+ INTF_OP ( xfer_deliver, struct dns_request *, dns_xfer_deliver ),
+ INTF_OP ( intf_close, struct dns_request *, dns_xfer_close ),
+};
+
+/** DNS socket interface descriptor */
+static struct interface_descriptor dns_socket_desc =
+ INTF_DESC ( struct dns_request, socket, dns_socket_operations );
+
+/** DNS resolver interface operations */
+static struct interface_operation dns_resolv_op[] = {
+ INTF_OP ( intf_close, struct dns_request *, dns_done ),
+};
+
+/** DNS resolver interface descriptor */
+static struct interface_descriptor dns_resolv_desc =
+ INTF_DESC ( struct dns_request, resolv, dns_resolv_op );
+
+/**
+ * Resolve name using DNS
+ *
+ * @v resolv Name resolution interface
+ * @v name Name to resolve
+ * @v sa Socket address to fill in
+ * @ret rc Return status code
+ */
+static int dns_resolv ( struct interface *resolv,
+ const char *name, struct sockaddr *sa ) {
+ struct dns_request *dns;
+ struct dns_header *query;
+ size_t search_len;
+ int name_len;
+ int rc;
+
+ /* Fail immediately if no DNS servers */
+ if ( ! nameserver.sa.sa_family ) {
+ DBG ( "DNS not attempting to resolve \"%s\": "
+ "no DNS servers\n", name );
+ rc = -ENXIO_NO_NAMESERVER;
+ goto err_no_nameserver;
+ }
+
+ /* Determine whether or not to use search list */
+ search_len = ( strchr ( name, '.' ) ? 0 : dns_search.len );
+
+ /* Allocate DNS structure */
+ dns = zalloc ( sizeof ( *dns ) + search_len );
+ if ( ! dns ) {
+ rc = -ENOMEM;
+ goto err_alloc_dns;
+ }
+ ref_init ( &dns->refcnt, NULL );
+ intf_init ( &dns->resolv, &dns_resolv_desc, &dns->refcnt );
+ intf_init ( &dns->socket, &dns_socket_desc, &dns->refcnt );
+ timer_init ( &dns->timer, dns_timer_expired, &dns->refcnt );
+ memcpy ( &dns->address.sa, sa, sizeof ( dns->address.sa ) );
+ dns->search.data = ( ( ( void * ) dns ) + sizeof ( *dns ) );
+ dns->search.len = search_len;
+ memcpy ( dns->search.data, dns_search.data, search_len );
+
+ /* Determine initial query type */
+ switch ( nameserver.sa.sa_family ) {
+ case AF_INET:
+ dns->qtype = htons ( DNS_TYPE_A );
+ break;
+ case AF_INET6:
+ dns->qtype = htons ( DNS_TYPE_AAAA );
+ break;
+ default:
+ rc = -ENOTSUP;
+ goto err_type;
+ }
+
+ /* Construct query */
+ query = &dns->buf.query;
+ query->flags = htons ( DNS_FLAG_RD );
+ query->qdcount = htons ( 1 );
+ dns->name.data = &dns->buf;
+ dns->name.offset = offsetof ( typeof ( dns->buf ), name );
+ dns->name.len = offsetof ( typeof ( dns->buf ), padding );
+ name_len = dns_encode ( name, &dns->name );
+ if ( name_len < 0 ) {
+ rc = name_len;
+ goto err_encode;
+ }
+ dns->offset = ( offsetof ( typeof ( dns->buf ), name ) +
+ name_len - 1 /* Strip root label */ );
+ if ( ( rc = dns_question ( dns ) ) != 0 )
+ goto err_question;
+
+ /* Open UDP connection */
+ if ( ( rc = xfer_open_socket ( &dns->socket, SOCK_DGRAM,
+ &nameserver.sa, NULL ) ) != 0 ) {
+ DBGC ( dns, "DNS %p could not open socket: %s\n",
+ dns, strerror ( rc ) );
+ goto err_open_socket;
+ }
+
+ /* Start timer to trigger first packet */
+ start_timer_nodelay ( &dns->timer );
+
+ /* Attach parent interface, mortalise self, and return */
+ intf_plug_plug ( &dns->resolv, resolv );
+ ref_put ( &dns->refcnt );
+ return 0;
+
+ err_open_socket:
+ err_question:
+ err_encode:
+ err_type:
+ ref_put ( &dns->refcnt );
+ err_alloc_dns:
+ err_no_nameserver:
+ return rc;
+}
+
+/** DNS name resolver */
+struct resolver dns_resolver __resolver ( RESOLV_NORMAL ) = {
+ .name = "DNS",
+ .resolv = dns_resolv,
+};
+
+/******************************************************************************
+ *
+ * Settings
+ *
+ ******************************************************************************
+ */
+
+/**
+ * Format DNS search list setting
+ *
+ * @v type Setting type
+ * @v raw Raw setting value
+ * @v raw_len Length of raw setting value
+ * @v buf Buffer to contain formatted value
+ * @v len Length of buffer
+ * @ret len Length of formatted value, or negative error
+ */
+static int format_dnssl_setting ( const struct setting_type *type __unused,
+ const void *raw, size_t raw_len,
+ char *buf, size_t len ) {
+ struct dns_name name = {
+ .data = ( ( void * ) raw ),
+ .len = raw_len,
+ };
+ size_t remaining = len;
+ size_t total = 0;
+ int name_len;
+
+ while ( name.offset < raw_len ) {
+
+ /* Decode name */
+ remaining = ( ( total < len ) ? ( len - total ) : 0 );
+ name_len = dns_decode ( &name, ( buf + total ), remaining );
+ if ( name_len < 0 )
+ return name_len;
+ total += name_len;
+
+ /* Move to next name */
+ name.offset = dns_skip_search ( &name );
+
+ /* Add separator if applicable */
+ if ( name.offset != raw_len ) {
+ if ( total < len )
+ buf[total] = ' ';
+ total++;
+ }
+ }
+
+ return total;
+}
+
+/** A DNS search list setting type */
+const struct setting_type setting_type_dnssl __setting_type = {
+ .name = "dnssl",
+ .format = format_dnssl_setting,
+};
+
+/** IPv4 DNS server setting */
+const struct setting dns_setting __setting ( SETTING_IP_EXTRA, dns ) = {
+ .name = "dns",
+ .description = "DNS server",
+ .tag = DHCP_DNS_SERVERS,
+ .type = &setting_type_ipv4,
+};
+
+/** IPv6 DNS server setting */
+const struct setting dns6_setting __setting ( SETTING_IP_EXTRA, dns6 ) = {
+ .name = "dns6",
+ .description = "DNS server",
+ .tag = DHCPV6_DNS_SERVERS,
+ .type = &setting_type_ipv6,
+ .scope = &ipv6_scope,
+};
+
+/** DNS search list */
+const struct setting dnssl_setting __setting ( SETTING_IP_EXTRA, dnssl ) = {
+ .name = "dnssl",
+ .description = "DNS search list",
+ .tag = DHCP_DOMAIN_SEARCH,
+ .type = &setting_type_dnssl,
+};
+
+/**
+ * Apply DNS search list
+ *
+ */
+static void apply_dns_search ( void ) {
+ char *localdomain;
+ int len;
+
+ /* Free existing search list */
+ free ( dns_search.data );
+ memset ( &dns_search, 0, sizeof ( dns_search ) );
+
+ /* Fetch DNS search list */
+ len = fetch_setting_copy ( NULL, &dnssl_setting, NULL, NULL,
+ &dns_search.data );
+ if ( len >= 0 ) {
+ dns_search.len = len;
+ return;
+ }
+
+ /* If no DNS search list exists, try to fetch the local domain */
+ fetch_string_setting_copy ( NULL, &domain_setting, &localdomain );
+ if ( localdomain ) {
+ len = dns_encode ( localdomain, &dns_search );
+ if ( len >= 0 ) {
+ dns_search.data = malloc ( len );
+ if ( dns_search.data ) {
+ dns_search.len = len;
+ dns_encode ( localdomain, &dns_search );
+ }
+ }
+ free ( localdomain );
+ return;
+ }
+}
+
+/**
+ * Apply DNS settings
+ *
+ * @ret rc Return status code
+ */
+static int apply_dns_settings ( void ) {
+
+ /* Fetch DNS server address */
+ nameserver.sa.sa_family = 0;
+ if ( fetch_ipv6_setting ( NULL, &dns6_setting,
+ &nameserver.sin6.sin6_addr ) >= 0 ) {
+ nameserver.sin6.sin6_family = AF_INET6;
+ } else if ( fetch_ipv4_setting ( NULL, &dns_setting,
+ &nameserver.sin.sin_addr ) >= 0 ) {
+ nameserver.sin.sin_family = AF_INET;
+ }
+ if ( nameserver.sa.sa_family ) {
+ DBG ( "DNS using nameserver %s\n",
+ sock_ntoa ( &nameserver.sa ) );
+ }
+
+ /* Fetch DNS search list */
+ apply_dns_search();
+ if ( DBG_LOG && ( dns_search.len != 0 ) ) {
+ struct dns_name name;
+ int offset;
+
+ DBG ( "DNS search list:" );
+ memcpy ( &name, &dns_search, sizeof ( name ) );
+ while ( name.offset != name.len ) {
+ DBG ( " %s", dns_name ( &name ) );
+ offset = dns_skip_search ( &name );
+ if ( offset < 0 )
+ break;
+ name.offset = offset;
+ }
+ DBG ( "\n" );
+ }
+
+ return 0;
+}
+
+/** DNS settings applicator */
+struct settings_applicator dns_applicator __settings_applicator = {
+ .apply = apply_dns_settings,
+};
diff --git a/qemu/roms/ipxe/src/net/udp/slam.c b/qemu/roms/ipxe/src/net/udp/slam.c
new file mode 100644
index 000000000..3cb492d73
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/udp/slam.c
@@ -0,0 +1,757 @@
+/*
+ * Copyright (C) 2008 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <errno.h>
+#include <assert.h>
+#include <byteswap.h>
+#include <ipxe/features.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/bitmap.h>
+#include <ipxe/xfer.h>
+#include <ipxe/open.h>
+#include <ipxe/uri.h>
+#include <ipxe/tcpip.h>
+#include <ipxe/timer.h>
+#include <ipxe/retry.h>
+
+/** @file
+ *
+ * Scalable Local Area Multicast protocol
+ *
+ * The SLAM protocol is supported only by Etherboot; it was designed
+ * and implemented by Eric Biederman. A server implementation is
+ * available in contrib/mini-slamd. There does not appear to be any
+ * documentation beyond a few sparse comments in Etherboot's
+ * proto_slam.c.
+ *
+ * SLAM packets use three types of data field:
+ *
+ * Nul : A single NUL (0) byte, used as a list terminator
+ *
+ * Raw : A block of raw data
+ *
+ * Int : A variable-length integer, in big-endian order. The length
+ * of the integer is encoded in the most significant three bits.
+ *
+ * Packets received by the client have the following layout:
+ *
+ * Int : Transaction identifier. This is an opaque value.
+ *
+ * Int : Total number of bytes in the transfer.
+ *
+ * Int : Block size, in bytes.
+ *
+ * Int : Packet sequence number within the transfer (if this packet
+ * contains data).
+ *
+ * Raw : Packet data (if this packet contains data).
+ *
+ * Packets transmitted by the client consist of a run-length-encoded
+ * representation of the received-blocks bitmap, looking something
+ * like:
+ *
+ * Int : Number of consecutive successfully-received packets
+ * Int : Number of consecutive missing packets
+ * Int : Number of consecutive successfully-received packets
+ * Int : Number of consecutive missing packets
+ * ....
+ * Nul
+ *
+ */
+
+FEATURE ( FEATURE_PROTOCOL, "SLAM", DHCP_EB_FEATURE_SLAM, 1 );
+
+/** Default SLAM server port */
+#define SLAM_DEFAULT_PORT 10000
+
+/** Default SLAM multicast IP address */
+#define SLAM_DEFAULT_MULTICAST_IP \
+ ( ( 239 << 24 ) | ( 255 << 16 ) | ( 1 << 8 ) | ( 1 << 0 ) )
+
+/** Default SLAM multicast port */
+#define SLAM_DEFAULT_MULTICAST_PORT 10000
+
+/** Maximum SLAM header length */
+#define SLAM_MAX_HEADER_LEN ( 7 /* transaction id */ + 7 /* total_bytes */ + \
+ 7 /* block_size */ )
+
+/** Maximum number of blocks to request per NACK
+ *
+ * This is a policy decision equivalent to selecting a TCP window
+ * size.
+ */
+#define SLAM_MAX_BLOCKS_PER_NACK 4
+
+/** Maximum SLAM NACK length
+ *
+ * We only ever send a NACK for a single range of up to @c
+ * SLAM_MAX_BLOCKS_PER_NACK blocks.
+ */
+#define SLAM_MAX_NACK_LEN ( 7 /* block */ + 7 /* #blocks */ + 1 /* NUL */ )
+
+/** SLAM slave timeout */
+#define SLAM_SLAVE_TIMEOUT ( 1 * TICKS_PER_SEC )
+
+/** A SLAM request */
+struct slam_request {
+ /** Reference counter */
+ struct refcnt refcnt;
+
+ /** Data transfer interface */
+ struct interface xfer;
+ /** Unicast socket */
+ struct interface socket;
+ /** Multicast socket */
+ struct interface mc_socket;
+
+ /** Master client retry timer */
+ struct retry_timer master_timer;
+ /** Slave client retry timer */
+ struct retry_timer slave_timer;
+
+ /** Cached header */
+ uint8_t header[SLAM_MAX_HEADER_LEN];
+ /** Size of cached header */
+ size_t header_len;
+ /** Total number of bytes in transfer */
+ unsigned long total_bytes;
+ /** Transfer block size */
+ unsigned long block_size;
+ /** Number of blocks in transfer */
+ unsigned long num_blocks;
+ /** Block bitmap */
+ struct bitmap bitmap;
+ /** NACK sent flag */
+ int nack_sent;
+};
+
+/**
+ * Free a SLAM request
+ *
+ * @v refcnt Reference counter
+ */
+static void slam_free ( struct refcnt *refcnt ) {
+ struct slam_request *slam =
+ container_of ( refcnt, struct slam_request, refcnt );
+
+ bitmap_free ( &slam->bitmap );
+ free ( slam );
+}
+
+/**
+ * Mark SLAM request as complete
+ *
+ * @v slam SLAM request
+ * @v rc Return status code
+ */
+static void slam_finished ( struct slam_request *slam, int rc ) {
+ static const uint8_t slam_disconnect[] = { 0 };
+
+ DBGC ( slam, "SLAM %p finished with status code %d (%s)\n",
+ slam, rc, strerror ( rc ) );
+
+ /* Send a disconnect message if we ever sent anything to the
+ * server.
+ */
+ if ( slam->nack_sent ) {
+ xfer_deliver_raw ( &slam->socket, slam_disconnect,
+ sizeof ( slam_disconnect ) );
+ }
+
+ /* Stop the retry timers */
+ stop_timer ( &slam->master_timer );
+ stop_timer ( &slam->slave_timer );
+
+ /* Close all data transfer interfaces */
+ intf_shutdown ( &slam->socket, rc );
+ intf_shutdown ( &slam->mc_socket, rc );
+ intf_shutdown ( &slam->xfer, rc );
+}
+
+/****************************************************************************
+ *
+ * TX datapath
+ *
+ */
+
+/**
+ * Add a variable-length value to a SLAM packet
+ *
+ * @v slam SLAM request
+ * @v iobuf I/O buffer
+ * @v value Value to add
+ * @ret rc Return status code
+ *
+ * Adds a variable-length value to the end of an I/O buffer. Will
+ * always leave at least one byte of tailroom in the I/O buffer (to
+ * allow space for the terminating NUL).
+ */
+static int slam_put_value ( struct slam_request *slam,
+ struct io_buffer *iobuf, unsigned long value ) {
+ uint8_t *data;
+ size_t len;
+ unsigned int i;
+
+ /* Calculate variable length required to store value. Always
+ * leave at least one byte in the I/O buffer.
+ */
+ len = ( ( flsl ( value ) + 10 ) / 8 );
+ if ( len >= iob_tailroom ( iobuf ) ) {
+ DBGC2 ( slam, "SLAM %p cannot add %zd-byte value\n",
+ slam, len );
+ return -ENOBUFS;
+ }
+ /* There is no valid way within the protocol that we can end
+ * up trying to push a full-sized long (i.e. without space for
+ * the length encoding).
+ */
+ assert ( len <= sizeof ( value ) );
+
+ /* Add value */
+ data = iob_put ( iobuf, len );
+ for ( i = len ; i-- ; ) {
+ data[i] = value;
+ value >>= 8;
+ }
+ *data |= ( len << 5 );
+ assert ( value == 0 );
+
+ return 0;
+}
+
+/**
+ * Send SLAM NACK packet
+ *
+ * @v slam SLAM request
+ * @ret rc Return status code
+ */
+static int slam_tx_nack ( struct slam_request *slam ) {
+ struct io_buffer *iobuf;
+ unsigned long first_block;
+ unsigned long num_blocks;
+ uint8_t *nul;
+ int rc;
+
+ /* Mark NACK as sent, so that we know we have to disconnect later */
+ slam->nack_sent = 1;
+
+ /* Allocate I/O buffer */
+ iobuf = xfer_alloc_iob ( &slam->socket, SLAM_MAX_NACK_LEN );
+ if ( ! iobuf ) {
+ DBGC ( slam, "SLAM %p could not allocate I/O buffer\n",
+ slam );
+ return -ENOMEM;
+ }
+
+ /* Construct NACK. We always request only a single packet;
+ * this allows us to force multicast-TFTP-style flow control
+ * on the SLAM server, which will otherwise just blast the
+ * data out as fast as it can. On a gigabit network, without
+ * RX checksumming, this would inevitably cause packet drops.
+ */
+ first_block = bitmap_first_gap ( &slam->bitmap );
+ for ( num_blocks = 1 ; ; num_blocks++ ) {
+ if ( num_blocks >= SLAM_MAX_BLOCKS_PER_NACK )
+ break;
+ if ( ( first_block + num_blocks ) >= slam->num_blocks )
+ break;
+ if ( bitmap_test ( &slam->bitmap,
+ ( first_block + num_blocks ) ) )
+ break;
+ }
+ if ( first_block ) {
+ DBGCP ( slam, "SLAM %p transmitting NACK for blocks "
+ "%ld-%ld\n", slam, first_block,
+ ( first_block + num_blocks - 1 ) );
+ } else {
+ DBGC ( slam, "SLAM %p transmitting initial NACK for blocks "
+ "0-%ld\n", slam, ( num_blocks - 1 ) );
+ }
+ if ( ( rc = slam_put_value ( slam, iobuf, first_block ) ) != 0 )
+ return rc;
+ if ( ( rc = slam_put_value ( slam, iobuf, num_blocks ) ) != 0 )
+ return rc;
+ nul = iob_put ( iobuf, 1 );
+ *nul = 0;
+
+ /* Transmit packet */
+ return xfer_deliver_iob ( &slam->socket, iobuf );
+}
+
+/**
+ * Handle SLAM master client retry timer expiry
+ *
+ * @v timer Master retry timer
+ * @v fail Failure indicator
+ */
+static void slam_master_timer_expired ( struct retry_timer *timer,
+ int fail ) {
+ struct slam_request *slam =
+ container_of ( timer, struct slam_request, master_timer );
+
+ if ( fail ) {
+ /* Allow timer to stop running. We will terminate the
+ * connection only if the slave timer times out.
+ */
+ DBGC ( slam, "SLAM %p giving up acting as master client\n",
+ slam );
+ } else {
+ /* Retransmit NACK */
+ start_timer ( timer );
+ slam_tx_nack ( slam );
+ }
+}
+
+/**
+ * Handle SLAM slave client retry timer expiry
+ *
+ * @v timer Master retry timer
+ * @v fail Failure indicator
+ */
+static void slam_slave_timer_expired ( struct retry_timer *timer,
+ int fail ) {
+ struct slam_request *slam =
+ container_of ( timer, struct slam_request, slave_timer );
+
+ if ( fail ) {
+ /* Terminate connection */
+ slam_finished ( slam, -ETIMEDOUT );
+ } else {
+ /* Try sending a NACK */
+ DBGC ( slam, "SLAM %p trying to become master client\n",
+ slam );
+ start_timer ( timer );
+ slam_tx_nack ( slam );
+ }
+}
+
+/****************************************************************************
+ *
+ * RX datapath
+ *
+ */
+
+/**
+ * Read and strip a variable-length value from a SLAM packet
+ *
+ * @v slam SLAM request
+ * @v iobuf I/O buffer
+ * @v value Value to fill in, or NULL to ignore value
+ * @ret rc Return status code
+ *
+ * Reads a variable-length value from the start of the I/O buffer.
+ */
+static int slam_pull_value ( struct slam_request *slam,
+ struct io_buffer *iobuf,
+ unsigned long *value ) {
+ uint8_t *data;
+ size_t len;
+
+ /* Sanity check */
+ if ( iob_len ( iobuf ) == 0 ) {
+ DBGC ( slam, "SLAM %p empty value\n", slam );
+ return -EINVAL;
+ }
+
+ /* Read and verify length of value */
+ data = iobuf->data;
+ len = ( *data >> 5 );
+ if ( ( len == 0 ) ||
+ ( value && ( len > sizeof ( *value ) ) ) ) {
+ DBGC ( slam, "SLAM %p invalid value length %zd bytes\n",
+ slam, len );
+ return -EINVAL;
+ }
+ if ( len > iob_len ( iobuf ) ) {
+ DBGC ( slam, "SLAM %p value extends beyond I/O buffer\n",
+ slam );
+ return -EINVAL;
+ }
+
+ /* Read value */
+ iob_pull ( iobuf, len );
+ *value = ( *data & 0x1f );
+ while ( --len ) {
+ *value <<= 8;
+ *value |= *(++data);
+ }
+
+ return 0;
+}
+
+/**
+ * Read and strip SLAM header
+ *
+ * @v slam SLAM request
+ * @v iobuf I/O buffer
+ * @ret rc Return status code
+ */
+static int slam_pull_header ( struct slam_request *slam,
+ struct io_buffer *iobuf ) {
+ void *header = iobuf->data;
+ int rc;
+
+ /* If header matches cached header, just pull it and return */
+ if ( ( slam->header_len <= iob_len ( iobuf ) ) &&
+ ( memcmp ( slam->header, iobuf->data, slam->header_len ) == 0 )){
+ iob_pull ( iobuf, slam->header_len );
+ return 0;
+ }
+
+ DBGC ( slam, "SLAM %p detected changed header; resetting\n", slam );
+
+ /* Read and strip transaction ID, total number of bytes, and
+ * block size.
+ */
+ if ( ( rc = slam_pull_value ( slam, iobuf, NULL ) ) != 0 )
+ return rc;
+ if ( ( rc = slam_pull_value ( slam, iobuf,
+ &slam->total_bytes ) ) != 0 )
+ return rc;
+ if ( ( rc = slam_pull_value ( slam, iobuf,
+ &slam->block_size ) ) != 0 )
+ return rc;
+
+ /* Update the cached header */
+ slam->header_len = ( iobuf->data - header );
+ assert ( slam->header_len <= sizeof ( slam->header ) );
+ memcpy ( slam->header, header, slam->header_len );
+
+ /* Calculate number of blocks */
+ slam->num_blocks = ( ( slam->total_bytes + slam->block_size - 1 ) /
+ slam->block_size );
+
+ DBGC ( slam, "SLAM %p has total bytes %ld, block size %ld, num "
+ "blocks %ld\n", slam, slam->total_bytes, slam->block_size,
+ slam->num_blocks );
+
+ /* Discard and reset the bitmap */
+ bitmap_free ( &slam->bitmap );
+ memset ( &slam->bitmap, 0, sizeof ( slam->bitmap ) );
+
+ /* Allocate a new bitmap */
+ if ( ( rc = bitmap_resize ( &slam->bitmap,
+ slam->num_blocks ) ) != 0 ) {
+ /* Failure to allocate a bitmap is fatal */
+ DBGC ( slam, "SLAM %p could not allocate bitmap for %ld "
+ "blocks: %s\n", slam, slam->num_blocks,
+ strerror ( rc ) );
+ slam_finished ( slam, rc );
+ return rc;
+ }
+
+ /* Notify recipient of file size */
+ xfer_seek ( &slam->xfer, slam->total_bytes );
+
+ return 0;
+}
+
+/**
+ * Receive SLAM data packet
+ *
+ * @v slam SLAM request
+ * @v iobuf I/O buffer
+ * @ret rc Return status code
+ */
+static int slam_mc_socket_deliver ( struct slam_request *slam,
+ struct io_buffer *iobuf,
+ struct xfer_metadata *rx_meta __unused ) {
+ struct xfer_metadata meta;
+ unsigned long packet;
+ size_t len;
+ int rc;
+
+ /* Stop the master client timer. Restart the slave client timer. */
+ stop_timer ( &slam->master_timer );
+ stop_timer ( &slam->slave_timer );
+ start_timer_fixed ( &slam->slave_timer, SLAM_SLAVE_TIMEOUT );
+
+ /* Read and strip packet header */
+ if ( ( rc = slam_pull_header ( slam, iobuf ) ) != 0 )
+ goto err_discard;
+
+ /* Read and strip packet number */
+ if ( ( rc = slam_pull_value ( slam, iobuf, &packet ) ) != 0 )
+ goto err_discard;
+
+ /* Sanity check packet number */
+ if ( packet >= slam->num_blocks ) {
+ DBGC ( slam, "SLAM %p received out-of-range packet %ld "
+ "(num_blocks=%ld)\n", slam, packet, slam->num_blocks );
+ rc = -EINVAL;
+ goto err_discard;
+ }
+
+ /* Sanity check length */
+ len = iob_len ( iobuf );
+ if ( len > slam->block_size ) {
+ DBGC ( slam, "SLAM %p received oversize packet of %zd bytes "
+ "(block_size=%ld)\n", slam, len, slam->block_size );
+ rc = -EINVAL;
+ goto err_discard;
+ }
+ if ( ( packet != ( slam->num_blocks - 1 ) ) &&
+ ( len < slam->block_size ) ) {
+ DBGC ( slam, "SLAM %p received short packet of %zd bytes "
+ "(block_size=%ld)\n", slam, len, slam->block_size );
+ rc = -EINVAL;
+ goto err_discard;
+ }
+
+ /* If we have already seen this packet, discard it */
+ if ( bitmap_test ( &slam->bitmap, packet ) ) {
+ goto discard;
+ }
+
+ /* Pass to recipient */
+ memset ( &meta, 0, sizeof ( meta ) );
+ meta.flags = XFER_FL_ABS_OFFSET;
+ meta.offset = ( packet * slam->block_size );
+ if ( ( rc = xfer_deliver ( &slam->xfer, iobuf, &meta ) ) != 0 )
+ goto err;
+
+ /* Mark block as received */
+ bitmap_set ( &slam->bitmap, packet );
+
+ /* If we have received all blocks, terminate */
+ if ( bitmap_full ( &slam->bitmap ) )
+ slam_finished ( slam, 0 );
+
+ return 0;
+
+ err_discard:
+ discard:
+ free_iob ( iobuf );
+ err:
+ return rc;
+}
+
+/**
+ * Receive SLAM non-data packet
+ *
+ * @v slam SLAM request
+ * @v iobuf I/O buffer
+ * @ret rc Return status code
+ */
+static int slam_socket_deliver ( struct slam_request *slam,
+ struct io_buffer *iobuf,
+ struct xfer_metadata *rx_meta __unused ) {
+ int rc;
+
+ /* Restart the master client timer */
+ stop_timer ( &slam->master_timer );
+ start_timer ( &slam->master_timer );
+
+ /* Read and strip packet header */
+ if ( ( rc = slam_pull_header ( slam, iobuf ) ) != 0 )
+ goto discard;
+
+ /* Sanity check */
+ if ( iob_len ( iobuf ) != 0 ) {
+ DBGC ( slam, "SLAM %p received trailing garbage:\n", slam );
+ DBGC_HD ( slam, iobuf->data, iob_len ( iobuf ) );
+ rc = -EINVAL;
+ goto discard;
+ }
+
+ /* Discard packet */
+ free_iob ( iobuf );
+
+ /* Send NACK in reply */
+ slam_tx_nack ( slam );
+
+ return 0;
+
+ discard:
+ free_iob ( iobuf );
+ return rc;
+
+}
+
+/** SLAM unicast socket interface operations */
+static struct interface_operation slam_socket_operations[] = {
+ INTF_OP ( xfer_deliver, struct slam_request *, slam_socket_deliver ),
+ INTF_OP ( intf_close, struct slam_request *, slam_finished ),
+};
+
+/** SLAM unicast socket interface descriptor */
+static struct interface_descriptor slam_socket_desc =
+ INTF_DESC ( struct slam_request, socket, slam_socket_operations );
+
+/** SLAM multicast socket interface operations */
+static struct interface_operation slam_mc_socket_operations[] = {
+ INTF_OP ( xfer_deliver, struct slam_request *, slam_mc_socket_deliver ),
+ INTF_OP ( intf_close, struct slam_request *, slam_finished ),
+};
+
+/** SLAM multicast socket interface descriptor */
+static struct interface_descriptor slam_mc_socket_desc =
+ INTF_DESC ( struct slam_request, mc_socket, slam_mc_socket_operations );
+
+/****************************************************************************
+ *
+ * Data transfer interface
+ *
+ */
+
+/** SLAM data transfer interface operations */
+static struct interface_operation slam_xfer_operations[] = {
+ INTF_OP ( intf_close, struct slam_request *, slam_finished ),
+};
+
+/** SLAM data transfer interface descriptor */
+static struct interface_descriptor slam_xfer_desc =
+ INTF_DESC ( struct slam_request, xfer, slam_xfer_operations );
+
+/**
+ * Parse SLAM URI multicast address
+ *
+ * @v slam SLAM request
+ * @v path Path portion of x-slam:// URI
+ * @v address Socket address to fill in
+ * @ret rc Return status code
+ */
+static int slam_parse_multicast_address ( struct slam_request *slam,
+ const char *path,
+ struct sockaddr_in *address ) {
+ char path_dup[ strlen ( path ) /* no +1 */ ];
+ char *sep;
+ char *end;
+
+ /* Create temporary copy of path, minus the leading '/' */
+ assert ( *path == '/' );
+ memcpy ( path_dup, ( path + 1 ) , sizeof ( path_dup ) );
+
+ /* Parse port, if present */
+ sep = strchr ( path_dup, ':' );
+ if ( sep ) {
+ *(sep++) = '\0';
+ address->sin_port = htons ( strtoul ( sep, &end, 0 ) );
+ if ( *end != '\0' ) {
+ DBGC ( slam, "SLAM %p invalid multicast port "
+ "\"%s\"\n", slam, sep );
+ return -EINVAL;
+ }
+ }
+
+ /* Parse address */
+ if ( inet_aton ( path_dup, &address->sin_addr ) == 0 ) {
+ DBGC ( slam, "SLAM %p invalid multicast address \"%s\"\n",
+ slam, path_dup );
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * Initiate a SLAM request
+ *
+ * @v xfer Data transfer interface
+ * @v uri Uniform Resource Identifier
+ * @ret rc Return status code
+ */
+static int slam_open ( struct interface *xfer, struct uri *uri ) {
+ static const struct sockaddr_in default_multicast = {
+ .sin_family = AF_INET,
+ .sin_port = htons ( SLAM_DEFAULT_MULTICAST_PORT ),
+ .sin_addr = { htonl ( SLAM_DEFAULT_MULTICAST_IP ) },
+ };
+ struct slam_request *slam;
+ struct sockaddr_tcpip server;
+ struct sockaddr_in multicast;
+ int rc;
+
+ /* Sanity checks */
+ if ( ! uri->host )
+ return -EINVAL;
+
+ /* Allocate and populate structure */
+ slam = zalloc ( sizeof ( *slam ) );
+ if ( ! slam )
+ return -ENOMEM;
+ ref_init ( &slam->refcnt, slam_free );
+ intf_init ( &slam->xfer, &slam_xfer_desc, &slam->refcnt );
+ intf_init ( &slam->socket, &slam_socket_desc, &slam->refcnt );
+ intf_init ( &slam->mc_socket, &slam_mc_socket_desc, &slam->refcnt );
+ timer_init ( &slam->master_timer, slam_master_timer_expired,
+ &slam->refcnt );
+ timer_init ( &slam->slave_timer, slam_slave_timer_expired,
+ &slam->refcnt );
+ /* Fake an invalid cached header of { 0x00, ... } */
+ slam->header_len = 1;
+ /* Fake parameters for initial NACK */
+ slam->num_blocks = 1;
+ if ( ( rc = bitmap_resize ( &slam->bitmap, 1 ) ) != 0 ) {
+ DBGC ( slam, "SLAM %p could not allocate initial bitmap: "
+ "%s\n", slam, strerror ( rc ) );
+ goto err;
+ }
+
+ /* Open unicast socket */
+ memset ( &server, 0, sizeof ( server ) );
+ server.st_port = htons ( uri_port ( uri, SLAM_DEFAULT_PORT ) );
+ if ( ( rc = xfer_open_named_socket ( &slam->socket, SOCK_DGRAM,
+ ( struct sockaddr * ) &server,
+ uri->host, NULL ) ) != 0 ) {
+ DBGC ( slam, "SLAM %p could not open unicast socket: %s\n",
+ slam, strerror ( rc ) );
+ goto err;
+ }
+
+ /* Open multicast socket */
+ memcpy ( &multicast, &default_multicast, sizeof ( multicast ) );
+ if ( uri->path &&
+ ( ( rc = slam_parse_multicast_address ( slam, uri->path,
+ &multicast ) ) != 0 ) ) {
+ goto err;
+ }
+ if ( ( rc = xfer_open_socket ( &slam->mc_socket, SOCK_DGRAM,
+ ( struct sockaddr * ) &multicast,
+ ( struct sockaddr * ) &multicast ) ) != 0 ) {
+ DBGC ( slam, "SLAM %p could not open multicast socket: %s\n",
+ slam, strerror ( rc ) );
+ goto err;
+ }
+
+ /* Start slave retry timer */
+ start_timer_fixed ( &slam->slave_timer, SLAM_SLAVE_TIMEOUT );
+
+ /* Attach to parent interface, mortalise self, and return */
+ intf_plug_plug ( &slam->xfer, xfer );
+ ref_put ( &slam->refcnt );
+ return 0;
+
+ err:
+ slam_finished ( slam, rc );
+ ref_put ( &slam->refcnt );
+ return rc;
+}
+
+/** SLAM URI opener */
+struct uri_opener slam_uri_opener __uri_opener = {
+ .scheme = "x-slam",
+ .open = slam_open,
+};
diff --git a/qemu/roms/ipxe/src/net/udp/syslog.c b/qemu/roms/ipxe/src/net/udp/syslog.c
new file mode 100644
index 000000000..d65d19ab8
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/udp/syslog.c
@@ -0,0 +1,298 @@
+/*
+ * Copyright (C) 2011 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+/** @file
+ *
+ * Syslog protocol
+ *
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <byteswap.h>
+#include <ipxe/xfer.h>
+#include <ipxe/open.h>
+#include <ipxe/tcpip.h>
+#include <ipxe/dhcp.h>
+#include <ipxe/dhcpv6.h>
+#include <ipxe/settings.h>
+#include <ipxe/console.h>
+#include <ipxe/lineconsole.h>
+#include <ipxe/syslog.h>
+#include <config/console.h>
+
+/* Set default console usage if applicable */
+#if ! ( defined ( CONSOLE_SYSLOG ) && CONSOLE_EXPLICIT ( CONSOLE_SYSLOG ) )
+#undef CONSOLE_SYSLOG
+#define CONSOLE_SYSLOG ( CONSOLE_USAGE_ALL & ~CONSOLE_USAGE_TUI )
+#endif
+
+/** The syslog server */
+static union {
+ struct sockaddr sa;
+ struct sockaddr_tcpip st;
+ struct sockaddr_in sin;
+ struct sockaddr_in6 sin6;
+} logserver = {
+ .st = {
+ .st_port = htons ( SYSLOG_PORT ),
+ },
+};
+
+/** Syslog UDP interface operations */
+static struct interface_operation syslogger_operations[] = {};
+
+/** Syslog UDP interface descriptor */
+static struct interface_descriptor syslogger_desc =
+ INTF_DESC_PURE ( syslogger_operations );
+
+/** The syslog UDP interface */
+static struct interface syslogger = INTF_INIT ( syslogger_desc );
+
+/******************************************************************************
+ *
+ * Console driver
+ *
+ ******************************************************************************
+ */
+
+/** Host name (for log messages) */
+static char *syslog_hostname;
+
+/** Domain name (for log messages) */
+static char *syslog_domain;
+
+/**
+ * Transmit formatted syslog message
+ *
+ * @v xfer Data transfer interface
+ * @v severity Severity
+ * @v message Message
+ * @v terminator Message terminator
+ * @ret rc Return status code
+ */
+int syslog_send ( struct interface *xfer, unsigned int severity,
+ const char *message, const char *terminator ) {
+ const char *hostname = ( syslog_hostname ? syslog_hostname : "" );
+ const char *domain = ( ( hostname[0] && syslog_domain ) ?
+ syslog_domain : "" );
+
+ return xfer_printf ( xfer, "<%d>%s%s%s%sipxe: %s%s",
+ SYSLOG_PRIORITY ( SYSLOG_DEFAULT_FACILITY,
+ severity ), hostname,
+ ( domain[0] ? "." : "" ), domain,
+ ( hostname[0] ? " " : "" ), message, terminator );
+}
+
+/******************************************************************************
+ *
+ * Console driver
+ *
+ ******************************************************************************
+ */
+
+/** Syslog line buffer */
+static char syslog_buffer[SYSLOG_BUFSIZE];
+
+/** Syslog severity */
+static unsigned int syslog_severity = SYSLOG_DEFAULT_SEVERITY;
+
+/**
+ * Handle ANSI set syslog priority (private sequence)
+ *
+ * @v ctx ANSI escape sequence context
+ * @v count Parameter count
+ * @v params List of graphic rendition aspects
+ */
+static void syslog_handle_priority ( struct ansiesc_context *ctx __unused,
+ unsigned int count __unused,
+ int params[] ) {
+ if ( params[0] >= 0 ) {
+ syslog_severity = params[0];
+ } else {
+ syslog_severity = SYSLOG_DEFAULT_SEVERITY;
+ }
+}
+
+/** Syslog ANSI escape sequence handlers */
+static struct ansiesc_handler syslog_handlers[] = {
+ { ANSIESC_LOG_PRIORITY, syslog_handle_priority },
+ { 0, NULL }
+};
+
+/** Syslog line console */
+static struct line_console syslog_line = {
+ .buffer = syslog_buffer,
+ .len = sizeof ( syslog_buffer ),
+ .ctx = {
+ .handlers = syslog_handlers,
+ },
+};
+
+/** Syslog recursion marker */
+static int syslog_entered;
+
+/**
+ * Print a character to syslog console
+ *
+ * @v character Character to be printed
+ */
+static void syslog_putchar ( int character ) {
+ int rc;
+
+ /* Ignore if we are already mid-logging */
+ if ( syslog_entered )
+ return;
+
+ /* Fill line buffer */
+ if ( line_putchar ( &syslog_line, character ) == 0 )
+ return;
+
+ /* Guard against re-entry */
+ syslog_entered = 1;
+
+ /* Send log message */
+ if ( ( rc = syslog_send ( &syslogger, syslog_severity,
+ syslog_buffer, "" ) ) != 0 ) {
+ DBG ( "SYSLOG could not send log message: %s\n",
+ strerror ( rc ) );
+ }
+
+ /* Clear re-entry flag */
+ syslog_entered = 0;
+}
+
+/** Syslog console driver */
+struct console_driver syslog_console __console_driver = {
+ .putchar = syslog_putchar,
+ .disabled = CONSOLE_DISABLED,
+ .usage = CONSOLE_SYSLOG,
+};
+
+/******************************************************************************
+ *
+ * Settings
+ *
+ ******************************************************************************
+ */
+
+/** IPv4 syslog server setting */
+const struct setting syslog_setting __setting ( SETTING_MISC, syslog ) = {
+ .name = "syslog",
+ .description = "Syslog server",
+ .tag = DHCP_LOG_SERVERS,
+ .type = &setting_type_ipv4,
+};
+
+/** IPv6 syslog server setting */
+const struct setting syslog6_setting __setting ( SETTING_MISC, syslog6 ) = {
+ .name = "syslog6",
+ .description = "Syslog server",
+ .tag = DHCPV6_LOG_SERVERS,
+ .type = &setting_type_ipv6,
+ .scope = &ipv6_scope,
+};
+
+/**
+ * Strip invalid characters from host/domain name
+ *
+ * @v name Name to strip
+ */
+static void syslog_fix_name ( char *name ) {
+ char *fixed = name;
+ int c;
+
+ /* Do nothing if name does not exist */
+ if ( ! name )
+ return;
+
+ /* Strip any non-printable or whitespace characters from the name */
+ do {
+ c = *(name++);
+ *fixed = c;
+ if ( isprint ( c ) && ! isspace ( c ) )
+ fixed++;
+ } while ( c );
+}
+
+/**
+ * Apply syslog settings
+ *
+ * @ret rc Return status code
+ */
+static int apply_syslog_settings ( void ) {
+ struct sockaddr old_logserver;
+ int rc;
+
+ /* Fetch hostname and domain name */
+ free ( syslog_hostname );
+ fetch_string_setting_copy ( NULL, &hostname_setting, &syslog_hostname );
+ syslog_fix_name ( syslog_hostname );
+ free ( syslog_domain );
+ fetch_string_setting_copy ( NULL, &domain_setting, &syslog_domain );
+ syslog_fix_name ( syslog_domain );
+
+ /* Fetch log server */
+ syslog_console.disabled = CONSOLE_DISABLED;
+ memcpy ( &old_logserver, &logserver, sizeof ( old_logserver ) );
+ logserver.sa.sa_family = 0;
+ if ( fetch_ipv6_setting ( NULL, &syslog6_setting,
+ &logserver.sin6.sin6_addr ) >= 0 ) {
+ logserver.sin6.sin6_family = AF_INET6;
+ } else if ( fetch_ipv4_setting ( NULL, &syslog_setting,
+ &logserver.sin.sin_addr ) >= 0 ) {
+ logserver.sin.sin_family = AF_INET;
+ }
+ if ( logserver.sa.sa_family ) {
+ syslog_console.disabled = 0;
+ DBG ( "SYSLOG using log server %s\n",
+ sock_ntoa ( &logserver.sa ) );
+ }
+
+ /* Do nothing unless log server has changed */
+ if ( memcmp ( &logserver, &old_logserver, sizeof ( logserver ) ) == 0 )
+ return 0;
+
+ /* Reset syslog connection */
+ intf_restart ( &syslogger, 0 );
+
+ /* Do nothing unless we have a log server */
+ if ( syslog_console.disabled ) {
+ DBG ( "SYSLOG has no log server\n" );
+ return 0;
+ }
+
+ /* Connect to log server */
+ if ( ( rc = xfer_open_socket ( &syslogger, SOCK_DGRAM,
+ &logserver.sa, NULL ) ) != 0 ) {
+ DBG ( "SYSLOG cannot connect to log server: %s\n",
+ strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/** Syslog settings applicator */
+struct settings_applicator syslog_applicator __settings_applicator = {
+ .apply = apply_syslog_settings,
+};
diff --git a/qemu/roms/ipxe/src/net/udp/tftp.c b/qemu/roms/ipxe/src/net/udp/tftp.c
new file mode 100644
index 000000000..ee827ae3d
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/udp/tftp.c
@@ -0,0 +1,1236 @@
+/*
+ * Copyright (C) 2006 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <strings.h>
+#include <byteswap.h>
+#include <errno.h>
+#include <assert.h>
+#include <ipxe/refcnt.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/xfer.h>
+#include <ipxe/open.h>
+#include <ipxe/uri.h>
+#include <ipxe/tcpip.h>
+#include <ipxe/retry.h>
+#include <ipxe/features.h>
+#include <ipxe/bitmap.h>
+#include <ipxe/settings.h>
+#include <ipxe/dhcp.h>
+#include <ipxe/uri.h>
+#include <ipxe/tftp.h>
+
+/** @file
+ *
+ * TFTP protocol
+ *
+ */
+
+FEATURE ( FEATURE_PROTOCOL, "TFTP", DHCP_EB_FEATURE_TFTP, 1 );
+
+/* TFTP-specific error codes */
+#define EINVAL_BLKSIZE __einfo_error ( EINFO_EINVAL_BLKSIZE )
+#define EINFO_EINVAL_BLKSIZE __einfo_uniqify \
+ ( EINFO_EINVAL, 0x01, "Invalid blksize" )
+#define EINVAL_TSIZE __einfo_error ( EINFO_EINVAL_TSIZE )
+#define EINFO_EINVAL_TSIZE __einfo_uniqify \
+ ( EINFO_EINVAL, 0x02, "Invalid tsize" )
+#define EINVAL_MC_NO_PORT __einfo_error ( EINFO_EINVAL_MC_NO_PORT )
+#define EINFO_EINVAL_MC_NO_PORT __einfo_uniqify \
+ ( EINFO_EINVAL, 0x03, "Missing multicast port" )
+#define EINVAL_MC_NO_MC __einfo_error ( EINFO_EINVAL_MC_NO_MC )
+#define EINFO_EINVAL_MC_NO_MC __einfo_uniqify \
+ ( EINFO_EINVAL, 0x04, "Missing multicast mc" )
+#define EINVAL_MC_INVALID_MC __einfo_error ( EINFO_EINVAL_MC_INVALID_MC )
+#define EINFO_EINVAL_MC_INVALID_MC __einfo_uniqify \
+ ( EINFO_EINVAL, 0x05, "Missing multicast IP" )
+#define EINVAL_MC_INVALID_IP __einfo_error ( EINFO_EINVAL_MC_INVALID_IP )
+#define EINFO_EINVAL_MC_INVALID_IP __einfo_uniqify \
+ ( EINFO_EINVAL, 0x06, "Invalid multicast IP" )
+#define EINVAL_MC_INVALID_PORT __einfo_error ( EINFO_EINVAL_MC_INVALID_PORT )
+#define EINFO_EINVAL_MC_INVALID_PORT __einfo_uniqify \
+ ( EINFO_EINVAL, 0x07, "Invalid multicast port" )
+
+/**
+ * A TFTP request
+ *
+ * This data structure holds the state for an ongoing TFTP transfer.
+ */
+struct tftp_request {
+ /** Reference count */
+ struct refcnt refcnt;
+ /** Data transfer interface */
+ struct interface xfer;
+
+ /** URI being fetched */
+ struct uri *uri;
+ /** Transport layer interface */
+ struct interface socket;
+ /** Multicast transport layer interface */
+ struct interface mc_socket;
+
+ /** Data block size
+ *
+ * This is the "blksize" option negotiated with the TFTP
+ * server. (If the TFTP server does not support TFTP options,
+ * this will default to 512).
+ */
+ unsigned int blksize;
+ /** File size
+ *
+ * This is the value returned in the "tsize" option from the
+ * TFTP server. If the TFTP server does not support the
+ * "tsize" option, this value will be zero.
+ */
+ unsigned long tsize;
+
+ /** Server port
+ *
+ * This is the port to which RRQ packets are sent.
+ */
+ unsigned int port;
+ /** Peer address
+ *
+ * The peer address is determined by the first response
+ * received to the TFTP RRQ.
+ */
+ struct sockaddr_tcpip peer;
+ /** Request flags */
+ unsigned int flags;
+ /** MTFTP timeout count */
+ unsigned int mtftp_timeouts;
+
+ /** Block bitmap */
+ struct bitmap bitmap;
+ /** Maximum known length
+ *
+ * We don't always know the file length in advance. In
+ * particular, if the TFTP server doesn't support the tsize
+ * option, or we are using MTFTP, then we don't know the file
+ * length until we see the end-of-file block (which, in the
+ * case of MTFTP, may not be the last block we see).
+ *
+ * This value is updated whenever we obtain information about
+ * the file length.
+ */
+ size_t filesize;
+ /** Retransmission timer */
+ struct retry_timer timer;
+};
+
+/** TFTP request flags */
+enum {
+ /** Send ACK packets */
+ TFTP_FL_SEND_ACK = 0x0001,
+ /** Request blksize and tsize options */
+ TFTP_FL_RRQ_SIZES = 0x0002,
+ /** Request multicast option */
+ TFTP_FL_RRQ_MULTICAST = 0x0004,
+ /** Perform MTFTP recovery on timeout */
+ TFTP_FL_MTFTP_RECOVERY = 0x0008,
+ /** Only get filesize and then abort the transfer */
+ TFTP_FL_SIZEONLY = 0x0010,
+};
+
+/** Maximum number of MTFTP open requests before falling back to TFTP */
+#define MTFTP_MAX_TIMEOUTS 3
+
+/**
+ * Free TFTP request
+ *
+ * @v refcnt Reference counter
+ */
+static void tftp_free ( struct refcnt *refcnt ) {
+ struct tftp_request *tftp =
+ container_of ( refcnt, struct tftp_request, refcnt );
+
+ uri_put ( tftp->uri );
+ bitmap_free ( &tftp->bitmap );
+ free ( tftp );
+}
+
+/**
+ * Mark TFTP request as complete
+ *
+ * @v tftp TFTP connection
+ * @v rc Return status code
+ */
+static void tftp_done ( struct tftp_request *tftp, int rc ) {
+
+ DBGC ( tftp, "TFTP %p finished with status %d (%s)\n",
+ tftp, rc, strerror ( rc ) );
+
+ /* Stop the retry timer */
+ stop_timer ( &tftp->timer );
+
+ /* Close all data transfer interfaces */
+ intf_shutdown ( &tftp->socket, rc );
+ intf_shutdown ( &tftp->mc_socket, rc );
+ intf_shutdown ( &tftp->xfer, rc );
+}
+
+/**
+ * Reopen TFTP socket
+ *
+ * @v tftp TFTP connection
+ * @ret rc Return status code
+ */
+static int tftp_reopen ( struct tftp_request *tftp ) {
+ struct sockaddr_tcpip server;
+ int rc;
+
+ /* Close socket */
+ intf_restart ( &tftp->socket, 0 );
+
+ /* Disable ACK sending. */
+ tftp->flags &= ~TFTP_FL_SEND_ACK;
+
+ /* Reset peer address */
+ memset ( &tftp->peer, 0, sizeof ( tftp->peer ) );
+
+ /* Open socket */
+ memset ( &server, 0, sizeof ( server ) );
+ server.st_port = htons ( tftp->port );
+ if ( ( rc = xfer_open_named_socket ( &tftp->socket, SOCK_DGRAM,
+ ( struct sockaddr * ) &server,
+ tftp->uri->host, NULL ) ) != 0 ) {
+ DBGC ( tftp, "TFTP %p could not open socket: %s\n",
+ tftp, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Reopen TFTP multicast socket
+ *
+ * @v tftp TFTP connection
+ * @v local Local socket address
+ * @ret rc Return status code
+ */
+static int tftp_reopen_mc ( struct tftp_request *tftp,
+ struct sockaddr *local ) {
+ int rc;
+
+ /* Close multicast socket */
+ intf_restart ( &tftp->mc_socket, 0 );
+
+ /* Open multicast socket. We never send via this socket, so
+ * use the local address as the peer address (since the peer
+ * address cannot be NULL).
+ */
+ if ( ( rc = xfer_open_socket ( &tftp->mc_socket, SOCK_DGRAM,
+ local, local ) ) != 0 ) {
+ DBGC ( tftp, "TFTP %p could not open multicast "
+ "socket: %s\n", tftp, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Presize TFTP receive buffers and block bitmap
+ *
+ * @v tftp TFTP connection
+ * @v filesize Known minimum file size
+ * @ret rc Return status code
+ */
+static int tftp_presize ( struct tftp_request *tftp, size_t filesize ) {
+ unsigned int num_blocks;
+ int rc;
+
+ /* Do nothing if we are already large enough */
+ if ( filesize <= tftp->filesize )
+ return 0;
+
+ /* Record filesize */
+ tftp->filesize = filesize;
+
+ /* Notify recipient of file size */
+ xfer_seek ( &tftp->xfer, filesize );
+ xfer_seek ( &tftp->xfer, 0 );
+
+ /* Calculate expected number of blocks. Note that files whose
+ * length is an exact multiple of the blocksize will have a
+ * trailing zero-length block, which must be included.
+ */
+ num_blocks = ( ( filesize / tftp->blksize ) + 1 );
+ if ( ( rc = bitmap_resize ( &tftp->bitmap, num_blocks ) ) != 0 ) {
+ DBGC ( tftp, "TFTP %p could not resize bitmap to %d blocks: "
+ "%s\n", tftp, num_blocks, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * MTFTP multicast receive address
+ *
+ * This is treated as a global configuration parameter.
+ */
+static struct sockaddr_in tftp_mtftp_socket = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl ( 0xefff0101 ),
+ .sin_port = htons ( 3001 ),
+};
+
+/**
+ * Set MTFTP multicast address
+ *
+ * @v address Multicast IPv4 address
+ */
+void tftp_set_mtftp_address ( struct in_addr address ) {
+ tftp_mtftp_socket.sin_addr = address;
+}
+
+/**
+ * Set MTFTP multicast port
+ *
+ * @v port Multicast port
+ */
+void tftp_set_mtftp_port ( unsigned int port ) {
+ tftp_mtftp_socket.sin_port = htons ( port );
+}
+
+/**
+ * Transmit RRQ
+ *
+ * @v tftp TFTP connection
+ * @ret rc Return status code
+ */
+static int tftp_send_rrq ( struct tftp_request *tftp ) {
+ const char *path = tftp->uri->path;
+ struct tftp_rrq *rrq;
+ size_t len;
+ struct io_buffer *iobuf;
+ size_t blksize;
+
+ DBGC ( tftp, "TFTP %p requesting \"%s\"\n", tftp, path );
+
+ /* Allocate buffer */
+ len = ( sizeof ( *rrq ) + strlen ( path ) + 1 /* NUL */
+ + 5 + 1 /* "octet" + NUL */
+ + 7 + 1 + 5 + 1 /* "blksize" + NUL + ddddd + NUL */
+ + 5 + 1 + 1 + 1 /* "tsize" + NUL + "0" + NUL */
+ + 9 + 1 + 1 /* "multicast" + NUL + NUL */ );
+ iobuf = xfer_alloc_iob ( &tftp->socket, len );
+ if ( ! iobuf )
+ return -ENOMEM;
+
+ /* Determine block size */
+ blksize = xfer_window ( &tftp->xfer );
+ if ( blksize > TFTP_MAX_BLKSIZE )
+ blksize = TFTP_MAX_BLKSIZE;
+
+ /* Build request */
+ rrq = iob_put ( iobuf, sizeof ( *rrq ) );
+ rrq->opcode = htons ( TFTP_RRQ );
+ iob_put ( iobuf, snprintf ( iobuf->tail, iob_tailroom ( iobuf ),
+ "%s%coctet", path, 0 ) + 1 );
+ if ( tftp->flags & TFTP_FL_RRQ_SIZES ) {
+ iob_put ( iobuf, snprintf ( iobuf->tail,
+ iob_tailroom ( iobuf ),
+ "blksize%c%zd%ctsize%c0",
+ 0, blksize, 0, 0 ) + 1 );
+ }
+ if ( tftp->flags & TFTP_FL_RRQ_MULTICAST ) {
+ iob_put ( iobuf, snprintf ( iobuf->tail,
+ iob_tailroom ( iobuf ),
+ "multicast%c", 0 ) + 1 );
+ }
+
+ /* RRQ always goes to the address specified in the initial
+ * xfer_open() call
+ */
+ return xfer_deliver_iob ( &tftp->socket, iobuf );
+}
+
+/**
+ * Transmit ACK
+ *
+ * @v tftp TFTP connection
+ * @ret rc Return status code
+ */
+static int tftp_send_ack ( struct tftp_request *tftp ) {
+ struct tftp_ack *ack;
+ struct io_buffer *iobuf;
+ struct xfer_metadata meta = {
+ .dest = ( struct sockaddr * ) &tftp->peer,
+ };
+ unsigned int block;
+
+ /* Determine next required block number */
+ block = bitmap_first_gap ( &tftp->bitmap );
+ DBGC2 ( tftp, "TFTP %p sending ACK for block %d\n", tftp, block );
+
+ /* Allocate buffer */
+ iobuf = xfer_alloc_iob ( &tftp->socket, sizeof ( *ack ) );
+ if ( ! iobuf )
+ return -ENOMEM;
+
+ /* Build ACK */
+ ack = iob_put ( iobuf, sizeof ( *ack ) );
+ ack->opcode = htons ( TFTP_ACK );
+ ack->block = htons ( block );
+
+ /* ACK always goes to the peer recorded from the RRQ response */
+ return xfer_deliver ( &tftp->socket, iobuf, &meta );
+}
+
+/**
+ * Transmit ERROR (Abort)
+ *
+ * @v tftp TFTP connection
+ * @v errcode TFTP error code
+ * @v errmsg Error message string
+ * @ret rc Return status code
+ */
+static int tftp_send_error ( struct tftp_request *tftp, int errcode,
+ const char *errmsg ) {
+ struct tftp_error *err;
+ struct io_buffer *iobuf;
+ struct xfer_metadata meta = {
+ .dest = ( struct sockaddr * ) &tftp->peer,
+ };
+ size_t msglen;
+
+ DBGC2 ( tftp, "TFTP %p sending ERROR %d: %s\n", tftp, errcode,
+ errmsg );
+
+ /* Allocate buffer */
+ msglen = sizeof ( *err ) + strlen ( errmsg ) + 1 /* NUL */;
+ iobuf = xfer_alloc_iob ( &tftp->socket, msglen );
+ if ( ! iobuf )
+ return -ENOMEM;
+
+ /* Build ERROR */
+ err = iob_put ( iobuf, msglen );
+ err->opcode = htons ( TFTP_ERROR );
+ err->errcode = htons ( errcode );
+ strcpy ( err->errmsg, errmsg );
+
+ /* ERR always goes to the peer recorded from the RRQ response */
+ return xfer_deliver ( &tftp->socket, iobuf, &meta );
+}
+
+/**
+ * Transmit next relevant packet
+ *
+ * @v tftp TFTP connection
+ * @ret rc Return status code
+ */
+static int tftp_send_packet ( struct tftp_request *tftp ) {
+
+ /* Update retransmission timer. While name resolution takes place the
+ * window is zero. Avoid unnecessary delay after name resolution
+ * completes by retrying immediately.
+ */
+ stop_timer ( &tftp->timer );
+ if ( xfer_window ( &tftp->socket ) ) {
+ start_timer ( &tftp->timer );
+ } else {
+ start_timer_nodelay ( &tftp->timer );
+ }
+
+ /* Send RRQ or ACK as appropriate */
+ if ( ! tftp->peer.st_family ) {
+ return tftp_send_rrq ( tftp );
+ } else {
+ if ( tftp->flags & TFTP_FL_SEND_ACK ) {
+ return tftp_send_ack ( tftp );
+ } else {
+ return 0;
+ }
+ }
+}
+
+/**
+ * Handle TFTP retransmission timer expiry
+ *
+ * @v timer Retry timer
+ * @v fail Failure indicator
+ */
+static void tftp_timer_expired ( struct retry_timer *timer, int fail ) {
+ struct tftp_request *tftp =
+ container_of ( timer, struct tftp_request, timer );
+ int rc;
+
+ /* If we are doing MTFTP, attempt the various recovery strategies */
+ if ( tftp->flags & TFTP_FL_MTFTP_RECOVERY ) {
+ if ( tftp->peer.st_family ) {
+ /* If we have received any response from the server,
+ * try resending the RRQ to restart the download.
+ */
+ DBGC ( tftp, "TFTP %p attempting reopen\n", tftp );
+ if ( ( rc = tftp_reopen ( tftp ) ) != 0 )
+ goto err;
+ } else {
+ /* Fall back to plain TFTP after several attempts */
+ tftp->mtftp_timeouts++;
+ DBGC ( tftp, "TFTP %p timeout %d waiting for MTFTP "
+ "open\n", tftp, tftp->mtftp_timeouts );
+
+ if ( tftp->mtftp_timeouts > MTFTP_MAX_TIMEOUTS ) {
+ DBGC ( tftp, "TFTP %p falling back to plain "
+ "TFTP\n", tftp );
+ tftp->flags = TFTP_FL_RRQ_SIZES;
+
+ /* Close multicast socket */
+ intf_restart ( &tftp->mc_socket, 0 );
+
+ /* Reset retry timer */
+ start_timer_nodelay ( &tftp->timer );
+
+ /* The blocksize may change: discard
+ * the block bitmap
+ */
+ bitmap_free ( &tftp->bitmap );
+ memset ( &tftp->bitmap, 0,
+ sizeof ( tftp->bitmap ) );
+
+ /* Reopen on standard TFTP port */
+ tftp->port = TFTP_PORT;
+ if ( ( rc = tftp_reopen ( tftp ) ) != 0 )
+ goto err;
+ }
+ }
+ } else {
+ /* Not doing MTFTP (or have fallen back to plain
+ * TFTP); fail as per normal.
+ */
+ if ( fail ) {
+ rc = -ETIMEDOUT;
+ goto err;
+ }
+ }
+ tftp_send_packet ( tftp );
+ return;
+
+ err:
+ tftp_done ( tftp, rc );
+}
+
+/**
+ * Process TFTP "blksize" option
+ *
+ * @v tftp TFTP connection
+ * @v value Option value
+ * @ret rc Return status code
+ */
+static int tftp_process_blksize ( struct tftp_request *tftp,
+ const char *value ) {
+ char *end;
+
+ tftp->blksize = strtoul ( value, &end, 10 );
+ if ( *end ) {
+ DBGC ( tftp, "TFTP %p got invalid blksize \"%s\"\n",
+ tftp, value );
+ return -EINVAL_BLKSIZE;
+ }
+ DBGC ( tftp, "TFTP %p blksize=%d\n", tftp, tftp->blksize );
+
+ return 0;
+}
+
+/**
+ * Process TFTP "tsize" option
+ *
+ * @v tftp TFTP connection
+ * @v value Option value
+ * @ret rc Return status code
+ */
+static int tftp_process_tsize ( struct tftp_request *tftp,
+ const char *value ) {
+ char *end;
+
+ tftp->tsize = strtoul ( value, &end, 10 );
+ if ( *end ) {
+ DBGC ( tftp, "TFTP %p got invalid tsize \"%s\"\n",
+ tftp, value );
+ return -EINVAL_TSIZE;
+ }
+ DBGC ( tftp, "TFTP %p tsize=%ld\n", tftp, tftp->tsize );
+
+ return 0;
+}
+
+/**
+ * Process TFTP "multicast" option
+ *
+ * @v tftp TFTP connection
+ * @v value Option value
+ * @ret rc Return status code
+ */
+static int tftp_process_multicast ( struct tftp_request *tftp,
+ const char *value ) {
+ union {
+ struct sockaddr sa;
+ struct sockaddr_in sin;
+ } socket;
+ char buf[ strlen ( value ) + 1 ];
+ char *addr;
+ char *port;
+ char *port_end;
+ char *mc;
+ char *mc_end;
+ int rc;
+
+ /* Split value into "addr,port,mc" fields */
+ memcpy ( buf, value, sizeof ( buf ) );
+ addr = buf;
+ port = strchr ( addr, ',' );
+ if ( ! port ) {
+ DBGC ( tftp, "TFTP %p multicast missing port,mc\n", tftp );
+ return -EINVAL_MC_NO_PORT;
+ }
+ *(port++) = '\0';
+ mc = strchr ( port, ',' );
+ if ( ! mc ) {
+ DBGC ( tftp, "TFTP %p multicast missing mc\n", tftp );
+ return -EINVAL_MC_NO_MC;
+ }
+ *(mc++) = '\0';
+
+ /* Parse parameters */
+ if ( strtoul ( mc, &mc_end, 0 ) == 0 )
+ tftp->flags &= ~TFTP_FL_SEND_ACK;
+ if ( *mc_end ) {
+ DBGC ( tftp, "TFTP %p multicast invalid mc %s\n", tftp, mc );
+ return -EINVAL_MC_INVALID_MC;
+ }
+ DBGC ( tftp, "TFTP %p is%s the master client\n",
+ tftp, ( ( tftp->flags & TFTP_FL_SEND_ACK ) ? "" : " not" ) );
+ if ( *addr && *port ) {
+ socket.sin.sin_family = AF_INET;
+ if ( inet_aton ( addr, &socket.sin.sin_addr ) == 0 ) {
+ DBGC ( tftp, "TFTP %p multicast invalid IP address "
+ "%s\n", tftp, addr );
+ return -EINVAL_MC_INVALID_IP;
+ }
+ DBGC ( tftp, "TFTP %p multicast IP address %s\n",
+ tftp, inet_ntoa ( socket.sin.sin_addr ) );
+ socket.sin.sin_port = htons ( strtoul ( port, &port_end, 0 ) );
+ if ( *port_end ) {
+ DBGC ( tftp, "TFTP %p multicast invalid port %s\n",
+ tftp, port );
+ return -EINVAL_MC_INVALID_PORT;
+ }
+ DBGC ( tftp, "TFTP %p multicast port %d\n",
+ tftp, ntohs ( socket.sin.sin_port ) );
+ if ( ( rc = tftp_reopen_mc ( tftp, &socket.sa ) ) != 0 )
+ return rc;
+ }
+
+ return 0;
+}
+
+/** A TFTP option */
+struct tftp_option {
+ /** Option name */
+ const char *name;
+ /** Option processor
+ *
+ * @v tftp TFTP connection
+ * @v value Option value
+ * @ret rc Return status code
+ */
+ int ( * process ) ( struct tftp_request *tftp, const char *value );
+};
+
+/** Recognised TFTP options */
+static struct tftp_option tftp_options[] = {
+ { "blksize", tftp_process_blksize },
+ { "tsize", tftp_process_tsize },
+ { "multicast", tftp_process_multicast },
+ { NULL, NULL }
+};
+
+/**
+ * Process TFTP option
+ *
+ * @v tftp TFTP connection
+ * @v name Option name
+ * @v value Option value
+ * @ret rc Return status code
+ */
+static int tftp_process_option ( struct tftp_request *tftp,
+ const char *name, const char *value ) {
+ struct tftp_option *option;
+
+ for ( option = tftp_options ; option->name ; option++ ) {
+ if ( strcasecmp ( name, option->name ) == 0 )
+ return option->process ( tftp, value );
+ }
+
+ DBGC ( tftp, "TFTP %p received unknown option \"%s\" = \"%s\"\n",
+ tftp, name, value );
+
+ /* Unknown options should be silently ignored */
+ return 0;
+}
+
+/**
+ * Receive OACK
+ *
+ * @v tftp TFTP connection
+ * @v buf Temporary data buffer
+ * @v len Length of temporary data buffer
+ * @ret rc Return status code
+ */
+static int tftp_rx_oack ( struct tftp_request *tftp, void *buf, size_t len ) {
+ struct tftp_oack *oack = buf;
+ char *end = buf + len;
+ char *name;
+ char *value;
+ char *next;
+ int rc = 0;
+
+ /* Sanity check */
+ if ( len < sizeof ( *oack ) ) {
+ DBGC ( tftp, "TFTP %p received underlength OACK packet "
+ "length %zd\n", tftp, len );
+ rc = -EINVAL;
+ goto done;
+ }
+
+ /* Process each option in turn */
+ for ( name = oack->data ; name < end ; name = next ) {
+
+ /* Parse option name and value
+ *
+ * We treat parsing errors as non-fatal, because there
+ * exists at least one TFTP server (IBM Tivoli PXE
+ * Server 5.1.0.3) that has been observed to send
+ * malformed OACKs containing trailing garbage bytes.
+ */
+ value = ( name + strnlen ( name, ( end - name ) ) + 1 );
+ if ( value > end ) {
+ DBGC ( tftp, "TFTP %p received OACK with malformed "
+ "option name:\n", tftp );
+ DBGC_HD ( tftp, oack, len );
+ break;
+ }
+ if ( value == end ) {
+ DBGC ( tftp, "TFTP %p received OACK missing value "
+ "for option \"%s\"\n", tftp, name );
+ DBGC_HD ( tftp, oack, len );
+ break;
+ }
+ next = ( value + strnlen ( value, ( end - value ) ) + 1 );
+ if ( next > end ) {
+ DBGC ( tftp, "TFTP %p received OACK with malformed "
+ "value for option \"%s\":\n", tftp, name );
+ DBGC_HD ( tftp, oack, len );
+ break;
+ }
+
+ /* Process option */
+ if ( ( rc = tftp_process_option ( tftp, name, value ) ) != 0 )
+ goto done;
+ }
+
+ /* Process tsize information, if available */
+ if ( tftp->tsize ) {
+ if ( ( rc = tftp_presize ( tftp, tftp->tsize ) ) != 0 )
+ goto done;
+ }
+
+ /* Abort request if only trying to determine file size */
+ if ( tftp->flags & TFTP_FL_SIZEONLY ) {
+ rc = 0;
+ tftp_send_error ( tftp, 0, "TFTP Aborted" );
+ tftp_done ( tftp, rc );
+ return rc;
+ }
+
+ /* Request next data block */
+ tftp_send_packet ( tftp );
+
+ done:
+ if ( rc )
+ tftp_done ( tftp, rc );
+ return rc;
+}
+
+/**
+ * Receive DATA
+ *
+ * @v tftp TFTP connection
+ * @v iobuf I/O buffer
+ * @ret rc Return status code
+ *
+ * Takes ownership of I/O buffer.
+ */
+static int tftp_rx_data ( struct tftp_request *tftp,
+ struct io_buffer *iobuf ) {
+ struct tftp_data *data = iobuf->data;
+ struct xfer_metadata meta;
+ unsigned int block;
+ off_t offset;
+ size_t data_len;
+ int rc;
+
+ if ( tftp->flags & TFTP_FL_SIZEONLY ) {
+ /* If we get here then server doesn't support SIZE option */
+ rc = -ENOTSUP;
+ tftp_send_error ( tftp, 0, "TFTP Aborted" );
+ goto done;
+ }
+
+ /* Sanity check */
+ if ( iob_len ( iobuf ) < sizeof ( *data ) ) {
+ DBGC ( tftp, "TFTP %p received underlength DATA packet "
+ "length %zd\n", tftp, iob_len ( iobuf ) );
+ rc = -EINVAL;
+ goto done;
+ }
+
+ /* Calculate block number */
+ block = ( ( bitmap_first_gap ( &tftp->bitmap ) + 1 ) & ~0xffff );
+ if ( data->block == 0 && block == 0 ) {
+ DBGC ( tftp, "TFTP %p received data block 0\n", tftp );
+ rc = -EINVAL;
+ goto done;
+ }
+ block += ( ntohs ( data->block ) - 1 );
+
+ /* Extract data */
+ offset = ( block * tftp->blksize );
+ iob_pull ( iobuf, sizeof ( *data ) );
+ data_len = iob_len ( iobuf );
+ if ( data_len > tftp->blksize ) {
+ DBGC ( tftp, "TFTP %p received overlength DATA packet "
+ "length %zd\n", tftp, data_len );
+ rc = -EINVAL;
+ goto done;
+ }
+
+ /* Deliver data */
+ memset ( &meta, 0, sizeof ( meta ) );
+ meta.flags = XFER_FL_ABS_OFFSET;
+ meta.offset = offset;
+ if ( ( rc = xfer_deliver ( &tftp->xfer, iob_disown ( iobuf ),
+ &meta ) ) != 0 ) {
+ DBGC ( tftp, "TFTP %p could not deliver data: %s\n",
+ tftp, strerror ( rc ) );
+ goto done;
+ }
+
+ /* Ensure block bitmap is ready */
+ if ( ( rc = tftp_presize ( tftp, ( offset + data_len ) ) ) != 0 )
+ goto done;
+
+ /* Mark block as received */
+ bitmap_set ( &tftp->bitmap, block );
+
+ /* Acknowledge block */
+ tftp_send_packet ( tftp );
+
+ /* If all blocks have been received, finish. */
+ if ( bitmap_full ( &tftp->bitmap ) )
+ tftp_done ( tftp, 0 );
+
+ done:
+ free_iob ( iobuf );
+ if ( rc )
+ tftp_done ( tftp, rc );
+ return rc;
+}
+
+/**
+ * Convert TFTP error code to return status code
+ *
+ * @v errcode TFTP error code
+ * @ret rc Return status code
+ */
+static int tftp_errcode_to_rc ( unsigned int errcode ) {
+ switch ( errcode ) {
+ case TFTP_ERR_FILE_NOT_FOUND: return -ENOENT;
+ case TFTP_ERR_ACCESS_DENIED: return -EACCES;
+ case TFTP_ERR_ILLEGAL_OP: return -ENOTTY;
+ default: return -ENOTSUP;
+ }
+}
+
+/**
+ * Receive ERROR
+ *
+ * @v tftp TFTP connection
+ * @v buf Temporary data buffer
+ * @v len Length of temporary data buffer
+ * @ret rc Return status code
+ */
+static int tftp_rx_error ( struct tftp_request *tftp, void *buf, size_t len ) {
+ struct tftp_error *error = buf;
+ int rc;
+
+ /* Sanity check */
+ if ( len < sizeof ( *error ) ) {
+ DBGC ( tftp, "TFTP %p received underlength ERROR packet "
+ "length %zd\n", tftp, len );
+ return -EINVAL;
+ }
+
+ DBGC ( tftp, "TFTP %p received ERROR packet with code %d, message "
+ "\"%s\"\n", tftp, ntohs ( error->errcode ), error->errmsg );
+
+ /* Determine final operation result */
+ rc = tftp_errcode_to_rc ( ntohs ( error->errcode ) );
+
+ /* Close TFTP request */
+ tftp_done ( tftp, rc );
+
+ return 0;
+}
+
+/**
+ * Receive new data
+ *
+ * @v tftp TFTP connection
+ * @v iobuf I/O buffer
+ * @v meta Transfer metadata
+ * @ret rc Return status code
+ */
+static int tftp_rx ( struct tftp_request *tftp,
+ struct io_buffer *iobuf,
+ struct xfer_metadata *meta ) {
+ struct sockaddr_tcpip *st_src;
+ struct tftp_common *common = iobuf->data;
+ size_t len = iob_len ( iobuf );
+ int rc = -EINVAL;
+
+ /* Sanity checks */
+ if ( len < sizeof ( *common ) ) {
+ DBGC ( tftp, "TFTP %p received underlength packet length "
+ "%zd\n", tftp, len );
+ goto done;
+ }
+ if ( ! meta->src ) {
+ DBGC ( tftp, "TFTP %p received packet without source port\n",
+ tftp );
+ goto done;
+ }
+
+ /* Filter by TID. Set TID on first response received */
+ st_src = ( struct sockaddr_tcpip * ) meta->src;
+ if ( ! tftp->peer.st_family ) {
+ memcpy ( &tftp->peer, st_src, sizeof ( tftp->peer ) );
+ DBGC ( tftp, "TFTP %p using remote port %d\n", tftp,
+ ntohs ( tftp->peer.st_port ) );
+ } else if ( memcmp ( &tftp->peer, st_src,
+ sizeof ( tftp->peer ) ) != 0 ) {
+ DBGC ( tftp, "TFTP %p received packet from wrong source (got "
+ "%d, wanted %d)\n", tftp, ntohs ( st_src->st_port ),
+ ntohs ( tftp->peer.st_port ) );
+ goto done;
+ }
+
+ switch ( common->opcode ) {
+ case htons ( TFTP_OACK ):
+ rc = tftp_rx_oack ( tftp, iobuf->data, len );
+ break;
+ case htons ( TFTP_DATA ):
+ rc = tftp_rx_data ( tftp, iob_disown ( iobuf ) );
+ break;
+ case htons ( TFTP_ERROR ):
+ rc = tftp_rx_error ( tftp, iobuf->data, len );
+ break;
+ default:
+ DBGC ( tftp, "TFTP %p received strange packet type %d\n",
+ tftp, ntohs ( common->opcode ) );
+ break;
+ };
+
+ done:
+ free_iob ( iobuf );
+ return rc;
+}
+
+/**
+ * Receive new data via socket
+ *
+ * @v tftp TFTP connection
+ * @v iobuf I/O buffer
+ * @v meta Transfer metadata
+ * @ret rc Return status code
+ */
+static int tftp_socket_deliver ( struct tftp_request *tftp,
+ struct io_buffer *iobuf,
+ struct xfer_metadata *meta ) {
+
+ /* Enable sending ACKs when we receive a unicast packet. This
+ * covers three cases:
+ *
+ * 1. Standard TFTP; we should always send ACKs, and will
+ * always receive a unicast packet before we need to send the
+ * first ACK.
+ *
+ * 2. RFC2090 multicast TFTP; the only unicast packets we will
+ * receive are the OACKs; enable sending ACKs here (before
+ * processing the OACK) and disable it when processing the
+ * multicast option if we are not the master client.
+ *
+ * 3. MTFTP; receiving a unicast datagram indicates that we
+ * are the "master client" and should send ACKs.
+ */
+ tftp->flags |= TFTP_FL_SEND_ACK;
+
+ return tftp_rx ( tftp, iobuf, meta );
+}
+
+/** TFTP socket operations */
+static struct interface_operation tftp_socket_operations[] = {
+ INTF_OP ( xfer_deliver, struct tftp_request *, tftp_socket_deliver ),
+};
+
+/** TFTP socket interface descriptor */
+static struct interface_descriptor tftp_socket_desc =
+ INTF_DESC ( struct tftp_request, socket, tftp_socket_operations );
+
+/** TFTP multicast socket operations */
+static struct interface_operation tftp_mc_socket_operations[] = {
+ INTF_OP ( xfer_deliver, struct tftp_request *, tftp_rx ),
+};
+
+/** TFTP multicast socket interface descriptor */
+static struct interface_descriptor tftp_mc_socket_desc =
+ INTF_DESC ( struct tftp_request, mc_socket, tftp_mc_socket_operations );
+
+/**
+ * Check flow control window
+ *
+ * @v tftp TFTP connection
+ * @ret len Length of window
+ */
+static size_t tftp_xfer_window ( struct tftp_request *tftp ) {
+
+ /* We abuse this data-xfer method to convey the blocksize to
+ * the caller. This really should be done using some kind of
+ * stat() method, but we don't yet have the facility to do
+ * that.
+ */
+ return tftp->blksize;
+}
+
+/** TFTP data transfer interface operations */
+static struct interface_operation tftp_xfer_operations[] = {
+ INTF_OP ( xfer_window, struct tftp_request *, tftp_xfer_window ),
+ INTF_OP ( intf_close, struct tftp_request *, tftp_done ),
+};
+
+/** TFTP data transfer interface descriptor */
+static struct interface_descriptor tftp_xfer_desc =
+ INTF_DESC ( struct tftp_request, xfer, tftp_xfer_operations );
+
+/**
+ * Initiate TFTP/TFTM/MTFTP download
+ *
+ * @v xfer Data transfer interface
+ * @v uri Uniform Resource Identifier
+ * @ret rc Return status code
+ */
+static int tftp_core_open ( struct interface *xfer, struct uri *uri,
+ unsigned int default_port,
+ struct sockaddr *multicast,
+ unsigned int flags ) {
+ struct tftp_request *tftp;
+ int rc;
+
+ /* Sanity checks */
+ if ( ! uri->host )
+ return -EINVAL;
+ if ( ! uri->path )
+ return -EINVAL;
+
+ /* Allocate and populate TFTP structure */
+ tftp = zalloc ( sizeof ( *tftp ) );
+ if ( ! tftp )
+ return -ENOMEM;
+ ref_init ( &tftp->refcnt, tftp_free );
+ intf_init ( &tftp->xfer, &tftp_xfer_desc, &tftp->refcnt );
+ intf_init ( &tftp->socket, &tftp_socket_desc, &tftp->refcnt );
+ intf_init ( &tftp->mc_socket, &tftp_mc_socket_desc, &tftp->refcnt );
+ timer_init ( &tftp->timer, tftp_timer_expired, &tftp->refcnt );
+ tftp->uri = uri_get ( uri );
+ tftp->blksize = TFTP_DEFAULT_BLKSIZE;
+ tftp->flags = flags;
+
+ /* Open socket */
+ tftp->port = uri_port ( tftp->uri, default_port );
+ if ( ( rc = tftp_reopen ( tftp ) ) != 0 )
+ goto err;
+
+ /* Open multicast socket */
+ if ( multicast ) {
+ if ( ( rc = tftp_reopen_mc ( tftp, multicast ) ) != 0 )
+ goto err;
+ }
+
+ /* Start timer to initiate RRQ */
+ start_timer_nodelay ( &tftp->timer );
+
+ /* Attach to parent interface, mortalise self, and return */
+ intf_plug_plug ( &tftp->xfer, xfer );
+ ref_put ( &tftp->refcnt );
+ return 0;
+
+ err:
+ DBGC ( tftp, "TFTP %p could not create request: %s\n",
+ tftp, strerror ( rc ) );
+ tftp_done ( tftp, rc );
+ ref_put ( &tftp->refcnt );
+ return rc;
+}
+
+/**
+ * Initiate TFTP download
+ *
+ * @v xfer Data transfer interface
+ * @v uri Uniform Resource Identifier
+ * @ret rc Return status code
+ */
+static int tftp_open ( struct interface *xfer, struct uri *uri ) {
+ return tftp_core_open ( xfer, uri, TFTP_PORT, NULL,
+ TFTP_FL_RRQ_SIZES );
+
+}
+
+/** TFTP URI opener */
+struct uri_opener tftp_uri_opener __uri_opener = {
+ .scheme = "tftp",
+ .open = tftp_open,
+};
+
+/**
+ * Initiate TFTP-size request
+ *
+ * @v xfer Data transfer interface
+ * @v uri Uniform Resource Identifier
+ * @ret rc Return status code
+ */
+static int tftpsize_open ( struct interface *xfer, struct uri *uri ) {
+ return tftp_core_open ( xfer, uri, TFTP_PORT, NULL,
+ ( TFTP_FL_RRQ_SIZES |
+ TFTP_FL_SIZEONLY ) );
+
+}
+
+/** TFTP URI opener */
+struct uri_opener tftpsize_uri_opener __uri_opener = {
+ .scheme = "tftpsize",
+ .open = tftpsize_open,
+};
+
+/**
+ * Initiate TFTM download
+ *
+ * @v xfer Data transfer interface
+ * @v uri Uniform Resource Identifier
+ * @ret rc Return status code
+ */
+static int tftm_open ( struct interface *xfer, struct uri *uri ) {
+ return tftp_core_open ( xfer, uri, TFTP_PORT, NULL,
+ ( TFTP_FL_RRQ_SIZES |
+ TFTP_FL_RRQ_MULTICAST ) );
+
+}
+
+/** TFTM URI opener */
+struct uri_opener tftm_uri_opener __uri_opener = {
+ .scheme = "tftm",
+ .open = tftm_open,
+};
+
+/**
+ * Initiate MTFTP download
+ *
+ * @v xfer Data transfer interface
+ * @v uri Uniform Resource Identifier
+ * @ret rc Return status code
+ */
+static int mtftp_open ( struct interface *xfer, struct uri *uri ) {
+ return tftp_core_open ( xfer, uri, MTFTP_PORT,
+ ( struct sockaddr * ) &tftp_mtftp_socket,
+ TFTP_FL_MTFTP_RECOVERY );
+}
+
+/** MTFTP URI opener */
+struct uri_opener mtftp_uri_opener __uri_opener = {
+ .scheme = "mtftp",
+ .open = mtftp_open,
+};
+
+/******************************************************************************
+ *
+ * Settings
+ *
+ ******************************************************************************
+ */
+
+/**
+ * Apply TFTP configuration settings
+ *
+ * @ret rc Return status code
+ */
+static int tftp_apply_settings ( void ) {
+ static struct in_addr tftp_server = { 0 };
+ struct in_addr last_tftp_server;
+ char uri_string[32];
+ struct uri *uri;
+
+ /* Retrieve TFTP server setting */
+ last_tftp_server = tftp_server;
+ fetch_ipv4_setting ( NULL, &next_server_setting, &tftp_server );
+
+ /* If TFTP server setting has changed, set the current working
+ * URI to match. Do it only when the TFTP server has changed
+ * to try to minimise surprises to the user, who probably
+ * won't expect the CWURI to change just because they updated
+ * an unrelated setting and triggered all the settings
+ * applicators.
+ */
+ if ( tftp_server.s_addr != last_tftp_server.s_addr ) {
+ if ( tftp_server.s_addr ) {
+ snprintf ( uri_string, sizeof ( uri_string ),
+ "tftp://%s/", inet_ntoa ( tftp_server ) );
+ uri = parse_uri ( uri_string );
+ if ( ! uri )
+ return -ENOMEM;
+ } else {
+ uri = NULL;
+ }
+ churi ( uri );
+ uri_put ( uri );
+ }
+
+ return 0;
+}
+
+/** TFTP settings applicator */
+struct settings_applicator tftp_settings_applicator __settings_applicator = {
+ .apply = tftp_apply_settings,
+};
diff --git a/qemu/roms/ipxe/src/net/validator.c b/qemu/roms/ipxe/src/net/validator.c
new file mode 100644
index 000000000..74d70e312
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/validator.c
@@ -0,0 +1,568 @@
+/*
+ * Copyright (C) 2012 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <string.h>
+#include <stdio.h>
+#include <errno.h>
+#include <ipxe/refcnt.h>
+#include <ipxe/malloc.h>
+#include <ipxe/interface.h>
+#include <ipxe/xfer.h>
+#include <ipxe/open.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/xferbuf.h>
+#include <ipxe/process.h>
+#include <ipxe/x509.h>
+#include <ipxe/settings.h>
+#include <ipxe/dhcp.h>
+#include <ipxe/base64.h>
+#include <ipxe/crc32.h>
+#include <ipxe/ocsp.h>
+#include <ipxe/validator.h>
+
+/** @file
+ *
+ * Certificate validator
+ *
+ */
+
+/** A certificate validator */
+struct validator {
+ /** Reference count */
+ struct refcnt refcnt;
+ /** Job control interface */
+ struct interface job;
+ /** Data transfer interface */
+ struct interface xfer;
+
+ /** Process */
+ struct process process;
+
+ /** X.509 certificate chain */
+ struct x509_chain *chain;
+ /** OCSP check */
+ struct ocsp_check *ocsp;
+ /** Data buffer */
+ struct xfer_buffer buffer;
+ /** Action to take upon completed transfer */
+ int ( * done ) ( struct validator *validator, const void *data,
+ size_t len );
+};
+
+/**
+ * Free certificate validator
+ *
+ * @v refcnt Reference count
+ */
+static void validator_free ( struct refcnt *refcnt ) {
+ struct validator *validator =
+ container_of ( refcnt, struct validator, refcnt );
+
+ DBGC2 ( validator, "VALIDATOR %p freed\n", validator );
+ x509_chain_put ( validator->chain );
+ ocsp_put ( validator->ocsp );
+ xferbuf_done ( &validator->buffer );
+ free ( validator );
+}
+
+/**
+ * Mark certificate validation as finished
+ *
+ * @v validator Certificate validator
+ * @v rc Reason for finishing
+ */
+static void validator_finished ( struct validator *validator, int rc ) {
+
+ /* Remove process */
+ process_del ( &validator->process );
+
+ /* Close all interfaces */
+ intf_shutdown ( &validator->xfer, rc );
+ intf_shutdown ( &validator->job, rc );
+}
+
+/****************************************************************************
+ *
+ * Job control interface
+ *
+ */
+
+/** Certificate validator job control interface operations */
+static struct interface_operation validator_job_operations[] = {
+ INTF_OP ( intf_close, struct validator *, validator_finished ),
+};
+
+/** Certificate validator job control interface descriptor */
+static struct interface_descriptor validator_job_desc =
+ INTF_DESC ( struct validator, job, validator_job_operations );
+
+/****************************************************************************
+ *
+ * Cross-signing certificates
+ *
+ */
+
+/** Cross-signed certificate source setting */
+const struct setting crosscert_setting __setting ( SETTING_CRYPTO, crosscert )={
+ .name = "crosscert",
+ .description = "Cross-signed certificate source",
+ .tag = DHCP_EB_CROSS_CERT,
+ .type = &setting_type_string,
+};
+
+/** Default cross-signed certificate source */
+static const char crosscert_default[] = "http://ca.ipxe.org/auto";
+
+/**
+ * Append cross-signing certificates to certificate chain
+ *
+ * @v validator Certificate validator
+ * @v data Raw cross-signing certificate data
+ * @v len Length of raw data
+ * @ret rc Return status code
+ */
+static int validator_append ( struct validator *validator,
+ const void *data, size_t len ) {
+ struct asn1_cursor cursor;
+ struct x509_chain *certs;
+ struct x509_certificate *cert;
+ struct x509_certificate *last;
+ int rc;
+
+ /* Allocate certificate list */
+ certs = x509_alloc_chain();
+ if ( ! certs ) {
+ rc = -ENOMEM;
+ goto err_alloc_certs;
+ }
+
+ /* Initialise cursor */
+ cursor.data = data;
+ cursor.len = len;
+
+ /* Enter certificateSet */
+ if ( ( rc = asn1_enter ( &cursor, ASN1_SET ) ) != 0 ) {
+ DBGC ( validator, "VALIDATOR %p could not enter "
+ "certificateSet: %s\n", validator, strerror ( rc ) );
+ goto err_certificateset;
+ }
+
+ /* Add each certificate to list */
+ while ( cursor.len ) {
+
+ /* Add certificate to chain */
+ if ( ( rc = x509_append_raw ( certs, cursor.data,
+ cursor.len ) ) != 0 ) {
+ DBGC ( validator, "VALIDATOR %p could not append "
+ "certificate: %s\n",
+ validator, strerror ( rc) );
+ DBGC_HDA ( validator, 0, cursor.data, cursor.len );
+ return rc;
+ }
+ cert = x509_last ( certs );
+ DBGC ( validator, "VALIDATOR %p found certificate %s\n",
+ validator, x509_name ( cert ) );
+
+ /* Move to next certificate */
+ asn1_skip_any ( &cursor );
+ }
+
+ /* Append certificates to chain */
+ last = x509_last ( validator->chain );
+ if ( ( rc = x509_auto_append ( validator->chain, certs ) ) != 0 ) {
+ DBGC ( validator, "VALIDATOR %p could not append "
+ "certificates: %s\n", validator, strerror ( rc ) );
+ goto err_auto_append;
+ }
+
+ /* Check that at least one certificate has been added */
+ if ( last == x509_last ( validator->chain ) ) {
+ DBGC ( validator, "VALIDATOR %p failed to append any "
+ "applicable certificates\n", validator );
+ rc = -EACCES;
+ goto err_no_progress;
+ }
+
+ /* Drop reference to certificate list */
+ x509_chain_put ( certs );
+
+ return 0;
+
+ err_no_progress:
+ err_auto_append:
+ err_certificateset:
+ x509_chain_put ( certs );
+ err_alloc_certs:
+ return rc;
+}
+
+/**
+ * Start download of cross-signing certificate
+ *
+ * @v validator Certificate validator
+ * @v issuer Required issuer
+ * @ret rc Return status code
+ */
+static int validator_start_download ( struct validator *validator,
+ const struct asn1_cursor *issuer ) {
+ const char *crosscert;
+ char *crosscert_copy;
+ char *uri_string;
+ size_t uri_string_len;
+ uint32_t crc;
+ int len;
+ int rc;
+
+ /* Determine cross-signed certificate source */
+ fetch_string_setting_copy ( NULL, &crosscert_setting, &crosscert_copy );
+ crosscert = ( crosscert_copy ? crosscert_copy : crosscert_default );
+
+ /* Allocate URI string */
+ uri_string_len = ( strlen ( crosscert ) + 22 /* "/%08x.der?subject=" */
+ + base64_encoded_len ( issuer->len ) + 1 /* NUL */ );
+ uri_string = zalloc ( uri_string_len );
+ if ( ! uri_string ) {
+ rc = -ENOMEM;
+ goto err_alloc_uri_string;
+ }
+
+ /* Generate CRC32 */
+ crc = crc32_le ( 0xffffffffUL, issuer->data, issuer->len );
+
+ /* Generate URI string */
+ len = snprintf ( uri_string, uri_string_len, "%s/%08x.der?subject=",
+ crosscert, crc );
+ base64_encode ( issuer->data, issuer->len, ( uri_string + len ) );
+ DBGC ( validator, "VALIDATOR %p downloading cross-signed certificate "
+ "from %s\n", validator, uri_string );
+
+ /* Set completion handler */
+ validator->done = validator_append;
+
+ /* Open URI */
+ if ( ( rc = xfer_open_uri_string ( &validator->xfer,
+ uri_string ) ) != 0 ) {
+ DBGC ( validator, "VALIDATOR %p could not open %s: %s\n",
+ validator, uri_string, strerror ( rc ) );
+ goto err_open_uri_string;
+ }
+
+ /* Success */
+ rc = 0;
+
+ err_open_uri_string:
+ free ( uri_string );
+ err_alloc_uri_string:
+ free ( crosscert_copy );
+ return rc;
+}
+
+/****************************************************************************
+ *
+ * OCSP checks
+ *
+ */
+
+/**
+ * Validate OCSP response
+ *
+ * @v validator Certificate validator
+ * @v data Raw OCSP response
+ * @v len Length of raw data
+ * @ret rc Return status code
+ */
+static int validator_ocsp_validate ( struct validator *validator,
+ const void *data, size_t len ) {
+ time_t now;
+ int rc;
+
+ /* Record OCSP response */
+ if ( ( rc = ocsp_response ( validator->ocsp, data, len ) ) != 0 ) {
+ DBGC ( validator, "VALIDATOR %p could not record OCSP "
+ "response: %s\n", validator, strerror ( rc ) );
+ return rc;
+ }
+
+ /* Validate OCSP response */
+ now = time ( NULL );
+ if ( ( rc = ocsp_validate ( validator->ocsp, now ) ) != 0 ) {
+ DBGC ( validator, "VALIDATOR %p could not validate OCSP "
+ "response: %s\n", validator, strerror ( rc ) );
+ return rc;
+ }
+
+ /* Drop reference to OCSP check */
+ ocsp_put ( validator->ocsp );
+ validator->ocsp = NULL;
+
+ return 0;
+}
+
+/**
+ * Start OCSP check
+ *
+ * @v validator Certificate validator
+ * @v cert Certificate to check
+ * @v issuer Issuing certificate
+ * @ret rc Return status code
+ */
+static int validator_start_ocsp ( struct validator *validator,
+ struct x509_certificate *cert,
+ struct x509_certificate *issuer ) {
+ const char *uri_string;
+ int rc;
+
+ /* Create OCSP check */
+ assert ( validator->ocsp == NULL );
+ if ( ( rc = ocsp_check ( cert, issuer, &validator->ocsp ) ) != 0 ) {
+ DBGC ( validator, "VALIDATOR %p could not create OCSP check: "
+ "%s\n", validator, strerror ( rc ) );
+ return rc;
+ }
+
+ /* Set completion handler */
+ validator->done = validator_ocsp_validate;
+
+ /* Open URI */
+ uri_string = validator->ocsp->uri_string;
+ DBGC ( validator, "VALIDATOR %p performing OCSP check at %s\n",
+ validator, uri_string );
+ if ( ( rc = xfer_open_uri_string ( &validator->xfer,
+ uri_string ) ) != 0 ) {
+ DBGC ( validator, "VALIDATOR %p could not open %s: %s\n",
+ validator, uri_string, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/****************************************************************************
+ *
+ * Data transfer interface
+ *
+ */
+
+/**
+ * Close data transfer interface
+ *
+ * @v validator Certificate validator
+ * @v rc Reason for close
+ */
+static void validator_xfer_close ( struct validator *validator, int rc ) {
+
+ /* Close data transfer interface */
+ intf_restart ( &validator->xfer, rc );
+
+ /* Check for errors */
+ if ( rc != 0 ) {
+ DBGC ( validator, "VALIDATOR %p transfer failed: %s\n",
+ validator, strerror ( rc ) );
+ goto err_transfer;
+ }
+ DBGC2 ( validator, "VALIDATOR %p transfer complete\n", validator );
+
+ /* Process completed download */
+ assert ( validator->done != NULL );
+ if ( ( rc = validator->done ( validator, validator->buffer.data,
+ validator->buffer.len ) ) != 0 )
+ goto err_append;
+
+ /* Free downloaded data */
+ xferbuf_done ( &validator->buffer );
+
+ /* Resume validation process */
+ process_add ( &validator->process );
+
+ return;
+
+ err_append:
+ err_transfer:
+ validator_finished ( validator, rc );
+}
+
+/**
+ * Receive data
+ *
+ * @v validator Certificate validator
+ * @v iobuf I/O buffer
+ * @v meta Data transfer metadata
+ * @ret rc Return status code
+ */
+static int validator_xfer_deliver ( struct validator *validator,
+ struct io_buffer *iobuf,
+ struct xfer_metadata *meta ) {
+ int rc;
+
+ /* Add data to buffer */
+ if ( ( rc = xferbuf_deliver ( &validator->buffer, iob_disown ( iobuf ),
+ meta ) ) != 0 ) {
+ DBGC ( validator, "VALIDATOR %p could not receive data: %s\n",
+ validator, strerror ( rc ) );
+ validator_finished ( validator, rc );
+ return rc;
+ }
+
+ return 0;
+}
+
+/** Certificate validator data transfer interface operations */
+static struct interface_operation validator_xfer_operations[] = {
+ INTF_OP ( xfer_deliver, struct validator *, validator_xfer_deliver ),
+ INTF_OP ( intf_close, struct validator *, validator_xfer_close ),
+};
+
+/** Certificate validator data transfer interface descriptor */
+static struct interface_descriptor validator_xfer_desc =
+ INTF_DESC ( struct validator, xfer, validator_xfer_operations );
+
+/****************************************************************************
+ *
+ * Validation process
+ *
+ */
+
+/**
+ * Certificate validation process
+ *
+ * @v validator Certificate validator
+ */
+static void validator_step ( struct validator *validator ) {
+ struct x509_link *link;
+ struct x509_certificate *cert;
+ struct x509_certificate *issuer = NULL;
+ struct x509_certificate *last;
+ time_t now;
+ int rc;
+
+ /* Try validating chain. Try even if the chain is incomplete,
+ * since certificates may already have been validated
+ * previously.
+ */
+ now = time ( NULL );
+ if ( ( rc = x509_validate_chain ( validator->chain, now, NULL,
+ NULL ) ) == 0 ) {
+ validator_finished ( validator, 0 );
+ return;
+ }
+
+ /* If there is a certificate that could be validated using
+ * OCSP, try it.
+ */
+ list_for_each_entry ( link, &validator->chain->links, list ) {
+ cert = issuer;
+ issuer = link->cert;
+ if ( ! cert )
+ continue;
+ if ( ! issuer->valid )
+ continue;
+ /* The issuer is valid, but this certificate is not
+ * yet valid. If OCSP is applicable, start it.
+ */
+ if ( cert->extensions.auth_info.ocsp.uri.len &&
+ ( ! cert->extensions.auth_info.ocsp.good ) ) {
+ /* Start OCSP */
+ if ( ( rc = validator_start_ocsp ( validator, cert,
+ issuer ) ) != 0 ) {
+ validator_finished ( validator, rc );
+ return;
+ }
+ return;
+ }
+ /* Otherwise, this is a permanent failure */
+ validator_finished ( validator, rc );
+ return;
+ }
+
+ /* If chain ends with a self-issued certificate, then there is
+ * nothing more to do.
+ */
+ last = x509_last ( validator->chain );
+ if ( asn1_compare ( &last->issuer.raw, &last->subject.raw ) == 0 ) {
+ validator_finished ( validator, rc );
+ return;
+ }
+
+ /* Otherwise, try to download a suitable cross-signing
+ * certificate.
+ */
+ if ( ( rc = validator_start_download ( validator,
+ &last->issuer.raw ) ) != 0 ) {
+ validator_finished ( validator, rc );
+ return;
+ }
+}
+
+/** Certificate validator process descriptor */
+static struct process_descriptor validator_process_desc =
+ PROC_DESC_ONCE ( struct validator, process, validator_step );
+
+/****************************************************************************
+ *
+ * Instantiator
+ *
+ */
+
+/**
+ * Instantiate a certificate validator
+ *
+ * @v job Job control interface
+ * @v chain X.509 certificate chain
+ * @ret rc Return status code
+ */
+int create_validator ( struct interface *job, struct x509_chain *chain ) {
+ struct validator *validator;
+ int rc;
+
+ /* Sanity check */
+ if ( ! chain ) {
+ rc = -EINVAL;
+ goto err_sanity;
+ }
+
+ /* Allocate and initialise structure */
+ validator = zalloc ( sizeof ( *validator ) );
+ if ( ! validator ) {
+ rc = -ENOMEM;
+ goto err_alloc;
+ }
+ ref_init ( &validator->refcnt, validator_free );
+ intf_init ( &validator->job, &validator_job_desc,
+ &validator->refcnt );
+ intf_init ( &validator->xfer, &validator_xfer_desc,
+ &validator->refcnt );
+ process_init ( &validator->process, &validator_process_desc,
+ &validator->refcnt );
+ validator->chain = x509_chain_get ( chain );
+
+ /* Attach parent interface, mortalise self, and return */
+ intf_plug_plug ( &validator->job, job );
+ ref_put ( &validator->refcnt );
+ DBGC2 ( validator, "VALIDATOR %p validating X509 chain %p\n",
+ validator, validator->chain );
+ return 0;
+
+ validator_finished ( validator, rc );
+ ref_put ( &validator->refcnt );
+ err_alloc:
+ err_sanity:
+ return rc;
+}
diff --git a/qemu/roms/ipxe/src/net/vlan.c b/qemu/roms/ipxe/src/net/vlan.c
new file mode 100644
index 000000000..b4ddde42d
--- /dev/null
+++ b/qemu/roms/ipxe/src/net/vlan.c
@@ -0,0 +1,500 @@
+/*
+ * Copyright (C) 2010 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+#include <errno.h>
+#include <byteswap.h>
+#include <ipxe/features.h>
+#include <ipxe/if_ether.h>
+#include <ipxe/ethernet.h>
+#include <ipxe/netdevice.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/vlan.h>
+
+/** @file
+ *
+ * Virtual LANs
+ *
+ */
+
+FEATURE ( FEATURE_PROTOCOL, "VLAN", DHCP_EB_FEATURE_VLAN, 1 );
+
+struct net_protocol vlan_protocol __net_protocol;
+
+/** VLAN device private data */
+struct vlan_device {
+ /** Trunk network device */
+ struct net_device *trunk;
+ /** VLAN tag */
+ unsigned int tag;
+ /** Default priority */
+ unsigned int priority;
+};
+
+/**
+ * Open VLAN device
+ *
+ * @v netdev Network device
+ * @ret rc Return status code
+ */
+static int vlan_open ( struct net_device *netdev ) {
+ struct vlan_device *vlan = netdev->priv;
+
+ return netdev_open ( vlan->trunk );
+}
+
+/**
+ * Close VLAN device
+ *
+ * @v netdev Network device
+ */
+static void vlan_close ( struct net_device *netdev ) {
+ struct vlan_device *vlan = netdev->priv;
+
+ netdev_close ( vlan->trunk );
+}
+
+/**
+ * Transmit packet on VLAN device
+ *
+ * @v netdev Network device
+ * @v iobuf I/O buffer
+ * @ret rc Return status code
+ */
+static int vlan_transmit ( struct net_device *netdev,
+ struct io_buffer *iobuf ) {
+ struct vlan_device *vlan = netdev->priv;
+ struct net_device *trunk = vlan->trunk;
+ struct ll_protocol *ll_protocol;
+ struct vlan_header *vlanhdr;
+ uint8_t ll_dest_copy[ETH_ALEN];
+ uint8_t ll_source_copy[ETH_ALEN];
+ const void *ll_dest;
+ const void *ll_source;
+ uint16_t net_proto;
+ unsigned int flags;
+ int rc;
+
+ /* Strip link-layer header and preserve link-layer header fields */
+ ll_protocol = netdev->ll_protocol;
+ if ( ( rc = ll_protocol->pull ( netdev, iobuf, &ll_dest, &ll_source,
+ &net_proto, &flags ) ) != 0 ) {
+ DBGC ( netdev, "VLAN %s could not parse link-layer header: "
+ "%s\n", netdev->name, strerror ( rc ) );
+ return rc;
+ }
+ memcpy ( ll_dest_copy, ll_dest, ETH_ALEN );
+ memcpy ( ll_source_copy, ll_source, ETH_ALEN );
+
+ /* Construct VLAN header */
+ vlanhdr = iob_push ( iobuf, sizeof ( *vlanhdr ) );
+ vlanhdr->tci = htons ( VLAN_TCI ( vlan->tag, vlan->priority ) );
+ vlanhdr->net_proto = net_proto;
+
+ /* Reclaim I/O buffer from VLAN device's TX queue */
+ list_del ( &iobuf->list );
+
+ /* Transmit packet on trunk device */
+ if ( ( rc = net_tx ( iob_disown ( iobuf ), trunk, &vlan_protocol,
+ ll_dest_copy, ll_source_copy ) ) != 0 ) {
+ DBGC ( netdev, "VLAN %s could not transmit: %s\n",
+ netdev->name, strerror ( rc ) );
+ /* Cannot return an error status, since that would
+ * cause the I/O buffer to be double-freed.
+ */
+ return 0;
+ }
+
+ return 0;
+}
+
+/**
+ * Poll VLAN device
+ *
+ * @v netdev Network device
+ */
+static void vlan_poll ( struct net_device *netdev ) {
+ struct vlan_device *vlan = netdev->priv;
+
+ /* Poll trunk device */
+ netdev_poll ( vlan->trunk );
+}
+
+/**
+ * Enable/disable interrupts on VLAN device
+ *
+ * @v netdev Network device
+ * @v enable Interrupts should be enabled
+ */
+static void vlan_irq ( struct net_device *netdev, int enable ) {
+ struct vlan_device *vlan = netdev->priv;
+
+ /* Enable/disable interrupts on trunk device. This is not at
+ * all robust, but there is no sensible course of action
+ * available.
+ */
+ netdev_irq ( vlan->trunk, enable );
+}
+
+/** VLAN device operations */
+static struct net_device_operations vlan_operations = {
+ .open = vlan_open,
+ .close = vlan_close,
+ .transmit = vlan_transmit,
+ .poll = vlan_poll,
+ .irq = vlan_irq,
+};
+
+/**
+ * Synchronise VLAN device
+ *
+ * @v netdev Network device
+ */
+static void vlan_sync ( struct net_device *netdev ) {
+ struct vlan_device *vlan = netdev->priv;
+ struct net_device *trunk = vlan->trunk;
+
+ /* Synchronise link status */
+ if ( netdev->link_rc != trunk->link_rc )
+ netdev_link_err ( netdev, trunk->link_rc );
+
+ /* Synchronise open/closed status */
+ if ( netdev_is_open ( trunk ) ) {
+ if ( ! netdev_is_open ( netdev ) )
+ netdev_open ( netdev );
+ } else {
+ if ( netdev_is_open ( netdev ) )
+ netdev_close ( netdev );
+ }
+}
+
+/**
+ * Identify VLAN device
+ *
+ * @v trunk Trunk network device
+ * @v tag VLAN tag
+ * @ret netdev VLAN device, if any
+ */
+struct net_device * vlan_find ( struct net_device *trunk, unsigned int tag ) {
+ struct net_device *netdev;
+ struct vlan_device *vlan;
+
+ for_each_netdev ( netdev ) {
+ if ( netdev->op != &vlan_operations )
+ continue;
+ vlan = netdev->priv;
+ if ( ( vlan->trunk == trunk ) && ( vlan->tag == tag ) )
+ return netdev;
+ }
+ return NULL;
+}
+
+/**
+ * Process incoming VLAN packet
+ *
+ * @v iobuf I/O buffer
+ * @v trunk Trunk network device
+ * @v ll_dest Link-layer destination address
+ * @v ll_source Link-layer source address
+ * @v flags Packet flags
+ * @ret rc Return status code
+ */
+static int vlan_rx ( struct io_buffer *iobuf, struct net_device *trunk,
+ const void *ll_dest, const void *ll_source,
+ unsigned int flags __unused ) {
+ struct vlan_header *vlanhdr = iobuf->data;
+ struct net_device *netdev;
+ struct ll_protocol *ll_protocol;
+ uint8_t ll_dest_copy[ETH_ALEN];
+ uint8_t ll_source_copy[ETH_ALEN];
+ uint16_t tag;
+ int rc;
+
+ /* Sanity check */
+ if ( iob_len ( iobuf ) < sizeof ( *vlanhdr ) ) {
+ DBGC ( trunk, "VLAN %s received underlength packet (%zd "
+ "bytes)\n", trunk->name, iob_len ( iobuf ) );
+ rc = -EINVAL;
+ goto err_sanity;
+ }
+
+ /* Identify VLAN device */
+ tag = VLAN_TAG ( ntohs ( vlanhdr->tci ) );
+ netdev = vlan_find ( trunk, tag );
+ if ( ! netdev ) {
+ DBGC2 ( trunk, "VLAN %s received packet for unknown VLAN "
+ "%d\n", trunk->name, tag );
+ rc = -EPIPE;
+ goto err_no_vlan;
+ }
+
+ /* Strip VLAN header and preserve original link-layer header fields */
+ iob_pull ( iobuf, sizeof ( *vlanhdr ) );
+ ll_protocol = trunk->ll_protocol;
+ memcpy ( ll_dest_copy, ll_dest, ETH_ALEN );
+ memcpy ( ll_source_copy, ll_source, ETH_ALEN );
+
+ /* Reconstruct link-layer header for VLAN device */
+ ll_protocol = netdev->ll_protocol;
+ if ( ( rc = ll_protocol->push ( netdev, iobuf, ll_dest_copy,
+ ll_source_copy,
+ vlanhdr->net_proto ) ) != 0 ) {
+ DBGC ( netdev, "VLAN %s could not reconstruct link-layer "
+ "header: %s\n", netdev->name, strerror ( rc ) );
+ goto err_ll_push;
+ }
+
+ /* Enqueue packet on VLAN device */
+ netdev_rx ( netdev, iob_disown ( iobuf ) );
+ return 0;
+
+ err_ll_push:
+ err_no_vlan:
+ err_sanity:
+ free_iob ( iobuf );
+ return rc;
+}
+
+/** VLAN protocol */
+struct net_protocol vlan_protocol __net_protocol = {
+ .name = "VLAN",
+ .net_proto = htons ( ETH_P_8021Q ),
+ .rx = vlan_rx,
+};
+
+/**
+ * Get the VLAN tag
+ *
+ * @v netdev Network device
+ * @ret tag VLAN tag, or 0 if device is not a VLAN device
+ */
+unsigned int vlan_tag ( struct net_device *netdev ) {
+ struct vlan_device *vlan;
+
+ if ( netdev->op == &vlan_operations ) {
+ vlan = netdev->priv;
+ return vlan->tag;
+ } else {
+ return 0;
+ }
+}
+
+/**
+ * Check if network device can be used as a VLAN trunk device
+ *
+ * @v trunk Trunk network device
+ * @ret is_ok Trunk network device is usable
+ *
+ * VLAN devices will be created as Ethernet devices. (We cannot
+ * simply clone the link layer of the trunk network device, because
+ * this link layer may expect the network device structure to contain
+ * some link-layer-private data.) The trunk network device must
+ * therefore have a link layer that is in some sense 'compatible' with
+ * Ethernet; specifically, it must have link-layer addresses that are
+ * the same length as Ethernet link-layer addresses.
+ *
+ * As an additional check, and primarily to assist with the sanity of
+ * the FCoE code, we refuse to allow nested VLANs.
+ */
+int vlan_can_be_trunk ( struct net_device *trunk ) {
+
+ return ( ( trunk->ll_protocol->ll_addr_len == ETH_ALEN ) &&
+ ( trunk->op != &vlan_operations ) );
+}
+
+/**
+ * Create VLAN device
+ *
+ * @v trunk Trunk network device
+ * @v tag VLAN tag
+ * @v priority Default VLAN priority
+ * @ret rc Return status code
+ */
+int vlan_create ( struct net_device *trunk, unsigned int tag,
+ unsigned int priority ) {
+ struct net_device *netdev;
+ struct vlan_device *vlan;
+ int rc;
+
+ /* If VLAN already exists, just update the priority */
+ if ( ( netdev = vlan_find ( trunk, tag ) ) != NULL ) {
+ vlan = netdev->priv;
+ if ( priority != vlan->priority ) {
+ DBGC ( netdev, "VLAN %s priority changed from %d to "
+ "%d\n", netdev->name, vlan->priority, priority );
+ }
+ vlan->priority = priority;
+ return 0;
+ }
+
+ /* Sanity checks */
+ if ( ! vlan_can_be_trunk ( trunk ) ) {
+ DBGC ( trunk, "VLAN %s cannot create VLAN on non-trunk "
+ "device\n", trunk->name );
+ rc = -ENOTTY;
+ goto err_sanity;
+ }
+ if ( ! VLAN_TAG_IS_VALID ( tag ) ) {
+ DBGC ( trunk, "VLAN %s cannot create VLAN with invalid tag "
+ "%d\n", trunk->name, tag );
+ rc = -EINVAL;
+ goto err_sanity;
+ }
+ if ( ! VLAN_PRIORITY_IS_VALID ( priority ) ) {
+ DBGC ( trunk, "VLAN %s cannot create VLAN with invalid "
+ "priority %d\n", trunk->name, priority );
+ rc = -EINVAL;
+ goto err_sanity;
+ }
+
+ /* Allocate and initialise structure */
+ netdev = alloc_etherdev ( sizeof ( *vlan ) );
+ if ( ! netdev ) {
+ rc = -ENOMEM;
+ goto err_alloc_etherdev;
+ }
+ netdev_init ( netdev, &vlan_operations );
+ netdev->dev = trunk->dev;
+ memcpy ( netdev->hw_addr, trunk->ll_addr, ETH_ALEN );
+ vlan = netdev->priv;
+ vlan->trunk = netdev_get ( trunk );
+ vlan->tag = tag;
+ vlan->priority = priority;
+
+ /* Construct VLAN device name */
+ snprintf ( netdev->name, sizeof ( netdev->name ), "%s-%d",
+ trunk->name, vlan->tag );
+
+ /* Register VLAN device */
+ if ( ( rc = register_netdev ( netdev ) ) != 0 ) {
+ DBGC ( netdev, "VLAN %s could not register: %s\n",
+ netdev->name, strerror ( rc ) );
+ goto err_register;
+ }
+
+ /* Synchronise with trunk device */
+ vlan_sync ( netdev );
+
+ DBGC ( netdev, "VLAN %s created with tag %d and priority %d\n",
+ netdev->name, vlan->tag, vlan->priority );
+
+ return 0;
+
+ unregister_netdev ( netdev );
+ err_register:
+ netdev_nullify ( netdev );
+ netdev_put ( netdev );
+ netdev_put ( trunk );
+ err_alloc_etherdev:
+ err_sanity:
+ return rc;
+}
+
+/**
+ * Destroy VLAN device
+ *
+ * @v netdev Network device
+ * @ret rc Return status code
+ */
+int vlan_destroy ( struct net_device *netdev ) {
+ struct vlan_device *vlan = netdev->priv;
+ struct net_device *trunk;
+
+ /* Sanity check */
+ if ( netdev->op != &vlan_operations ) {
+ DBGC ( netdev, "VLAN %s cannot destroy non-VLAN device\n",
+ netdev->name );
+ return -ENOTTY;
+ }
+
+ DBGC ( netdev, "VLAN %s destroyed\n", netdev->name );
+
+ /* Remove VLAN device */
+ unregister_netdev ( netdev );
+ trunk = vlan->trunk;
+ netdev_nullify ( netdev );
+ netdev_put ( netdev );
+ netdev_put ( trunk );
+
+ return 0;
+}
+
+/**
+ * Handle trunk network device link state change
+ *
+ * @v trunk Trunk network device
+ */
+static void vlan_notify ( struct net_device *trunk ) {
+ struct net_device *netdev;
+ struct vlan_device *vlan;
+
+ for_each_netdev ( netdev ) {
+ if ( netdev->op != &vlan_operations )
+ continue;
+ vlan = netdev->priv;
+ if ( vlan->trunk == trunk )
+ vlan_sync ( netdev );
+ }
+}
+
+/**
+ * Destroy first VLAN device for a given trunk
+ *
+ * @v trunk Trunk network device
+ * @ret found A VLAN device was found
+ */
+static int vlan_remove_first ( struct net_device *trunk ) {
+ struct net_device *netdev;
+ struct vlan_device *vlan;
+
+ for_each_netdev ( netdev ) {
+ if ( netdev->op != &vlan_operations )
+ continue;
+ vlan = netdev->priv;
+ if ( vlan->trunk == trunk ) {
+ vlan_destroy ( netdev );
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/**
+ * Destroy all VLAN devices for a given trunk
+ *
+ * @v trunk Trunk network device
+ */
+static void vlan_remove ( struct net_device *trunk ) {
+
+ /* Remove all VLAN devices attached to this trunk, safe
+ * against arbitrary net device removal.
+ */
+ while ( vlan_remove_first ( trunk ) ) {}
+}
+
+/** VLAN driver */
+struct net_driver vlan_driver __net_driver = {
+ .name = "VLAN",
+ .notify = vlan_notify,
+ .remove = vlan_remove,
+};