summaryrefslogtreecommitdiffstats
path: root/qemu/roms/ipxe/src/include/ipxe/tcp.h
blob: 9baa6391cd621b4e3b84fd4f89b641aea29d7e71 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
#ifndef _IPXE_TCP_H
#define _IPXE_TCP_H

/** @file
 *
 * TCP protocol
 *
 * This file defines the iPXE TCP API.
 *
 */

FILE_LICENCE ( GPL2_OR_LATER );

#include <ipxe/tcpip.h>

/**
 * A TCP header
 */
struct tcp_header {
	uint16_t src;		/* Source port */
	uint16_t dest;		/* Destination port */
	uint32_t seq;		/* Sequence number */
	uint32_t ack;		/* Acknowledgement number */
	uint8_t hlen;		/* Header length (4), Reserved (4) */
	uint8_t flags;		/* Reserved (2), Flags (6) */
	uint16_t win;		/* Advertised window */
	uint16_t csum;		/* Checksum */
	uint16_t urg;		/* Urgent pointer */
};

/** @defgroup tcpopts TCP options
 * @{
 */

/** End of TCP options list */
#define TCP_OPTION_END 0

/** TCP option pad */
#define TCP_OPTION_NOP 1

/** Generic TCP option */
struct tcp_option {
	uint8_t kind;
	uint8_t length;
} __attribute__ (( packed ));

/** TCP MSS option */
struct tcp_mss_option {
	uint8_t kind;
	uint8_t length;
	uint16_t mss;
} __attribute__ (( packed ));

/** Code for the TCP MSS option */
#define TCP_OPTION_MSS 2

/** TCP window scale option */
struct tcp_window_scale_option {
	uint8_t kind;
	uint8_t length;
	uint8_t scale;
} __attribute__ (( packed ));

/** Padded TCP window scale option (used for sending) */
struct tcp_window_scale_padded_option {
	uint8_t nop;
	struct tcp_window_scale_option wsopt;
} __attribute (( packed ));

/** Code for the TCP window scale option */
#define TCP_OPTION_WS 3

/** Advertised TCP window scale
 *
 * Using a scale factor of 2**9 provides for a maximum window of 32MB,
 * which is sufficient to allow Gigabit-speed transfers with a 200ms
 * RTT.  The minimum advertised window is 512 bytes, which is still
 * less than a single packet.
 */
#define TCP_RX_WINDOW_SCALE 9

/** TCP timestamp option */
struct tcp_timestamp_option {
	uint8_t kind;
	uint8_t length;
	uint32_t tsval;
	uint32_t tsecr;
} __attribute__ (( packed ));

/** Padded TCP timestamp option (used for sending) */
struct tcp_timestamp_padded_option {
	uint8_t nop[2];
	struct tcp_timestamp_option tsopt;
} __attribute__ (( packed ));

/** Code for the TCP timestamp option */
#define TCP_OPTION_TS 8

/** Parsed TCP options */
struct tcp_options {
	/** MSS option, if present */
	const struct tcp_mss_option *mssopt;
	/** Window scale option, if present */
	const struct tcp_window_scale_option *wsopt;
	/** Timestamp option, if present */
	const struct tcp_timestamp_option *tsopt;
};

/** @} */

/*
 * TCP flags
 */
#define TCP_CWR		0x80
#define TCP_ECE		0x40
#define TCP_URG		0x20
#define TCP_ACK		0x10
#define TCP_PSH		0x08
#define TCP_RST		0x04
#define TCP_SYN		0x02
#define TCP_FIN		0x01

/**
* @defgroup tcpstates TCP states
*
* The TCP state is defined by a combination of the flags that have
* been sent to the peer, the flags that have been acknowledged by the
* peer, and the flags that have been received from the peer.
*
* @{
*/

/** TCP flags that have been sent in outgoing packets */
#define TCP_STATE_SENT(flags) ( (flags) << 0 )
#define TCP_FLAGS_SENT(state) ( ( (state) >> 0 ) & 0xff )

/** TCP flags that have been acknowledged by the peer
 *
 * Note that this applies only to SYN and FIN.
 */
#define TCP_STATE_ACKED(flags) ( (flags) << 8 )
#define TCP_FLAGS_ACKED(state) ( ( (state) >> 8 ) & 0xff )

/** TCP flags that have been received from the peer
 *
 * Note that this applies only to SYN and FIN, and that once SYN has
 * been received, we should always be sending ACK.
 */
#define TCP_STATE_RCVD(flags) ( (flags) << 16 )
#define TCP_FLAGS_RCVD(state) ( ( (state) >> 16 ) & 0xff )

/** TCP flags that are currently being sent in outgoing packets */
#define TCP_FLAGS_SENDING(state) \
	( TCP_FLAGS_SENT ( state ) & ~TCP_FLAGS_ACKED ( state ) )

/** CLOSED
 *
 * The connection has not yet been used for anything.
 */
#define TCP_CLOSED TCP_RST

/** LISTEN
 *
 * Not currently used as a state; we have no support for listening
 * connections.  Given a unique value to avoid compiler warnings.
 */
#define TCP_LISTEN 0

/** SYN_SENT
 *
 * SYN has been sent, nothing has yet been received or acknowledged.
 */
#define TCP_SYN_SENT	( TCP_STATE_SENT ( TCP_SYN ) )

/** SYN_RCVD
 *
 * SYN has been sent but not acknowledged, SYN has been received.
 */
#define TCP_SYN_RCVD	( TCP_STATE_SENT ( TCP_SYN | TCP_ACK ) |	    \
			  TCP_STATE_RCVD ( TCP_SYN ) )

/** ESTABLISHED
 *
 * SYN has been sent and acknowledged, SYN has been received.
 */
#define TCP_ESTABLISHED	( TCP_STATE_SENT ( TCP_SYN | TCP_ACK ) |	    \
			  TCP_STATE_ACKED ( TCP_SYN ) |			    \
			  TCP_STATE_RCVD ( TCP_SYN ) )

/** FIN_WAIT_1
 *
 * SYN has been sent and acknowledged, SYN has been received, FIN has
 * been sent but not acknowledged, FIN has not been received.
 *
 * RFC 793 shows that we can enter FIN_WAIT_1 without have had SYN
 * acknowledged, i.e. if the application closes the connection after
 * sending and receiving SYN, but before having had SYN acknowledged.
 * However, we have to *pretend* that SYN has been acknowledged
 * anyway, otherwise we end up sending SYN and FIN in the same
 * sequence number slot.  Therefore, when we transition from SYN_RCVD
 * to FIN_WAIT_1, we have to remember to set TCP_STATE_ACKED(TCP_SYN)
 * and increment our sequence number.
 */
#define TCP_FIN_WAIT_1	( TCP_STATE_SENT ( TCP_SYN | TCP_ACK | TCP_FIN ) |  \
			  TCP_STATE_ACKED ( TCP_SYN ) |			    \
			  TCP_STATE_RCVD ( TCP_SYN ) )

/** FIN_WAIT_2
 *
 * SYN has been sent and acknowledged, SYN has been received, FIN has
 * been sent and acknowledged, FIN ha not been received.
 */
#define TCP_FIN_WAIT_2	( TCP_STATE_SENT ( TCP_SYN | TCP_ACK | TCP_FIN ) |  \
			  TCP_STATE_ACKED ( TCP_SYN | TCP_FIN ) |	    \
			  TCP_STATE_RCVD ( TCP_SYN ) )

/** CLOSING / LAST_ACK
 *
 * SYN has been sent and acknowledged, SYN has been received, FIN has
 * been sent but not acknowledged, FIN has been received.
 *
 * This state actually encompasses both CLOSING and LAST_ACK; they are
 * identical with the definition of state that we use.  I don't
 * *believe* that they need to be distinguished.
 */
#define TCP_CLOSING_OR_LAST_ACK						    \
			( TCP_STATE_SENT ( TCP_SYN | TCP_ACK | TCP_FIN ) |  \
			  TCP_STATE_ACKED ( TCP_SYN ) |			    \
			  TCP_STATE_RCVD ( TCP_SYN | TCP_FIN ) )

/** TIME_WAIT
 *
 * SYN has been sent and acknowledged, SYN has been received, FIN has
 * been sent and acknowledged, FIN has been received.
 */
#define TCP_TIME_WAIT	( TCP_STATE_SENT ( TCP_SYN | TCP_ACK | TCP_FIN ) |  \
			  TCP_STATE_ACKED ( TCP_SYN | TCP_FIN ) |	    \
			  TCP_STATE_RCVD ( TCP_SYN | TCP_FIN ) )

/** CLOSE_WAIT
 *
 * SYN has been sent and acknowledged, SYN has been received, FIN has
 * been received.
 */
#define TCP_CLOSE_WAIT	( TCP_STATE_SENT ( TCP_SYN | TCP_ACK ) |	    \
			  TCP_STATE_ACKED ( TCP_SYN ) |			    \
			  TCP_STATE_RCVD ( TCP_SYN | TCP_FIN ) )

/** Can send data in current state
 *
 * We can send data if and only if we have had our SYN acked and we
 * have not yet sent our FIN.
 */
#define TCP_CAN_SEND_DATA(state)					    \
	( ( (state) & ( TCP_STATE_ACKED ( TCP_SYN ) |			    \
			TCP_STATE_SENT ( TCP_FIN ) ) )			    \
	  == TCP_STATE_ACKED ( TCP_SYN ) )

/** Have ever been fully established
 *
 * We have been fully established if we have both received a SYN and
 * had our own SYN acked.
 */
#define TCP_HAS_BEEN_ESTABLISHED(state)					    \
	( ( (state) & ( TCP_STATE_ACKED ( TCP_SYN ) |			    \
			TCP_STATE_RCVD ( TCP_SYN ) ) )			    \
	  == ( TCP_STATE_ACKED ( TCP_SYN ) | TCP_STATE_RCVD ( TCP_SYN ) ) )

/** Have closed gracefully
 *
 * We have closed gracefully if we have both received a FIN and had
 * our own FIN acked.
 */
#define TCP_CLOSED_GRACEFULLY(state)					    \
	( ( (state) & ( TCP_STATE_ACKED ( TCP_FIN ) |			    \
			TCP_STATE_RCVD ( TCP_FIN ) ) )			    \
	  == ( TCP_STATE_ACKED ( TCP_FIN ) | TCP_STATE_RCVD ( TCP_FIN ) ) )

/** @} */

/** Mask for TCP header length field */
#define TCP_MASK_HLEN	0xf0

/** Smallest port number on which a TCP connection can listen */
#define TCP_MIN_PORT 1

/**
 * Maxmimum advertised TCP window size
 *
 * The maximum bandwidth on any link is limited by
 *
 *    max_bandwidth * round_trip_time = tcp_window
 *
 * Some rough expectations for achievable bandwidths over various
 * links are:
 *
 *    a) Gigabit LAN: expected bandwidth 125MB/s, typical RTT 0.5ms,
 *       minimum required window 64kB
 *
 *    b) Home Internet connection: expected bandwidth 10MB/s, typical
 *       RTT 25ms, minimum required window 256kB
 *
 *    c) WAN: expected bandwidth 2MB/s, typical RTT 100ms, minimum
 *       required window 200kB.
 *
 * The maximum possible value for the TCP window size is 1GB (using
 * the maximum window scale of 2**14).  However, it is advisable to
 * keep the window size as small as possible (without limiting
 * bandwidth), since in the event of a lost packet the window size
 * represents the maximum amount that will need to be retransmitted.
 *
 * We therefore choose a maximum window size of 256kB.
 */
#define TCP_MAX_WINDOW_SIZE	( 256 * 1024 )

/**
 * Path MTU
 *
 * IPv6 requires all data link layers to support a datagram size of
 * 1280 bytes.  We choose to use this as our maximum transmitted
 * datagram size, on the assumption that any practical link layer we
 * encounter will allow this size.  This is a very conservative
 * assumption in practice, but the impact of making such a
 * conservative assumption is insignificant since the amount of data
 * that we transmit (rather than receive) is negligible.
 *
 * We allow space within this 1280 bytes for an IPv6 header, a TCP
 * header, and a (padded) TCP timestamp option.
 */
#define TCP_PATH_MTU							\
	( 1280 - 40 /* IPv6 */ - 20 /* TCP */ - 12 /* TCP timestamp */ )

/** TCP maximum segment lifetime
 *
 * Currently set to 2 minutes, as per RFC 793.
 */
#define TCP_MSL ( 2 * 60 * TICKS_PER_SEC )

/**
 * TCP maximum header length
 *
 */
#define TCP_MAX_HEADER_LEN					\
	( MAX_LL_NET_HEADER_LEN +				\
	  sizeof ( struct tcp_header ) +			\
	  sizeof ( struct tcp_mss_option ) +			\
	  sizeof ( struct tcp_window_scale_padded_option ) +	\
	  sizeof ( struct tcp_timestamp_padded_option ) )

/**
 * Compare TCP sequence numbers
 *
 * @v seq1		Sequence number 1
 * @v seq2		Sequence number 2
 * @ret diff		Sequence difference
 *
 * Analogous to memcmp(), returns an integer less than, equal to, or
 * greater than zero if @c seq1 is found, respectively, to be before,
 * equal to, or after @c seq2.
 */
static inline __attribute__ (( always_inline )) int32_t
tcp_cmp ( uint32_t seq1, uint32_t seq2 ) {
	return ( ( int32_t ) ( seq1 - seq2 ) );
}

/**
 * Check if TCP sequence number lies within window
 *
 * @v seq		Sequence number
 * @v start		Start of window
 * @v len		Length of window
 * @ret in_window	Sequence number is within window
 */
static inline int tcp_in_window ( uint32_t seq, uint32_t start,
				  uint32_t len ) {
	return ( ( seq - start ) < len );
}

extern struct tcpip_protocol tcp_protocol __tcpip_protocol;

#endif /* _IPXE_TCP_H */