###############################################################################
# Copyright (c) 2015 Ericsson AB and others.
# szilard.cserey@ericsson.com
# All rights reserved. This program and the accompanying materials
# are made available under the terms of the Apache License, Version 2.0
# which accompanies this distribution, and is available at
# http://www.apache.org/licenses/LICENSE-2.0
###############################################################################
import time
import os
import glob
from ssh_client import SSHClient
from dha_adapters.libvirt_adapter import LibvirtAdapter
from common import (
log,
err,
clean,
delete,
)
TRANSPLANT_FUEL_SETTINGS = 'transplant_fuel_settings.py'
BOOTSTRAP_ADMIN = 'bootstrap_admin_node'
FUEL_CLIENT_CONFIG = '/etc/fuel/client/config.yaml'
PLUGINS_DIR = '~/plugins'
LOCAL_PLUGIN_FOLDER = '/opt/opnfv'
IGNORABLE_FUEL_ERRORS = ['does not update installed package',
'Couldn\'t resolve host']
class InstallFuelMaster(object):
def __init__(self, dea_file, dha_file, fuel_ip, fuel_username,
fuel_password, fuel_node_id, iso_file, work_dir,
fuel_plugins_dir, no_plugins):
self.dea_file = dea_file
self.dha = LibvirtAdapter(dha_file)
self.fuel_ip = fuel_ip
self.fuel_username = fuel_username
self.fuel_password = fuel_password
self.fuel_node_id = fuel_node_id
self.iso_file = iso_file
self.iso_dir = os.path.dirname(self.iso_file)
self.work_dir = work_dir
self.fuel_plugins_dir = fuel_plugins_dir
self.no_plugins = no_plugins
self.file_dir = os.path.dirname(os.path.realpath(__file__))
self.ssh = SSHClient(self.fuel_ip, self.fuel_username,
self.fuel_password)
def install(self):
log('Start Fuel Installation')
self.dha.node_power_off(self.fuel_node_id)
if os.environ.get('LIBVIRT_DEFAULT_URI'):
log('Upload ISO to pool')
self.iso_file = self.dha.upload_iso(self.iso_file)
else:
log('Zero the MBR')
self.dha.node_zero_mbr(self.fuel_node_id)
self.dha.node_set_boot_order(self.fuel_node_id, ['disk', 'iso'])
try:
self.proceed_with_installation()
except Exception as e:
self.post_install_cleanup()
err(e)
def proceed_with_installation(self):
log('Eject ISO')
self.dha.node_eject_iso(self.fuel_node_id)
log('Insert ISO %s' % self.iso_file)
self.dha.node_insert_iso(self.fuel_node_id, self.iso_file)
self.dha.node_power_on(self.fuel_node_id)
log('Waiting for Fuel master to accept SSH')
self.wait_for_node_up()
log('Wait until Fuel menu is up')
fuel_menu_pid = self.wait_until_fuel_menu_up()
log('Inject our own astute.yaml and fuel_bootstrap_cli.yaml settings')
self.inject_own_astute_and_bootstrap_yaml()
log('Let the Fuel deployment continue')
log('Found FUEL menu as PID %s, now killing it' % fuel_menu_pid)
self.ssh_exec_cmd('kill %s' % fuel_menu_pid, False)
log('Wait until installation is complete')
self.wait_until_installation_completed()
log('Waiting for one minute for Fuel to stabilize')
time.sleep(60)
self.delete_deprecated_fuel_client_config()
if not self.no_plugins:
self.collect_plugin_files()
self.install_plugins()
self.post_install_cleanup()
log('Fuel Master installed successfully !')
def collect_plugin_files(self):
with self.ssh as s:
s.exec_cmd('mkdir %s' % PLUGINS_DIR)
if self.fuel_plugins_dir:
for f in glob.glob('%s/*.rpm' % self.fuel_plugins_dir):
s.scp_put(f, PLUGINS_DIR)
def install_plugins(self):
log('Installing Fuel Plugins')
plugin_files = []
with self.ssh as s:
for plugin_location in [PLUGINS_DIR, LOCAL_PLUGIN_FOLDER]:
s.exec_cmd(
@media only all and (prefers-color-scheme: dark) {
.highlight .hll { background-color: #49483e }
.highlight .c { color: #75715e } /* Comment */
.highlight .err { color: #960050; background-color: #1e0010 } /* Error */
.highlight .k { color: #66d9ef } /* Keyword */
.highlight .l { color: #ae81ff } /* Literal */
.highlight .n { color: #f8f8f2 } /* Name */
.highlight .o { color: #f92672 } /* Operator */
.highlight .p { color: #f8f8f2 } /* Punctuation */
.highlight .ch { color: #75715e } /* Comment.Hashbang */
.highlight .cm { color: #75715e } /* Comment.Multiline */
.highlight .cp { color: #75715e } /* Comment.Preproc */
.highlight .cpf { color: #75715e } /* Comment.PreprocFile */
.highlight .c1 { color: #75715e } /* Comment.Single */
.highlight .cs { color: #75715e } /* Comment.Special */
.highlight .gd { color: #f92672 } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .gi { color: #a6e22e } /* Generic.Inserted */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #75715e } /* Generic.Subheading */
.highlight .kc { color: #66d9ef } /* Keyword.Constant */
.highlight .kd { color: #66d9ef } /* Keyword.Declaration */
.highlight .kn { color: #f92672 } /* Keyword.Namespace */
.highlight .kp { color: #66d9ef } /* Keyword.Pseudo */
.highlight .kr { color: #66d9ef } /* Keyword.Reserved */
.highlight .kt { color: #66d9ef } /* Keyword.Type */
.highlight .ld { color: #e6db74 } /* Literal.Date */
.highlight .m { color: #ae81ff } /* Literal.Number */
.highlight .s { color: #e6db74 } /* Literal.String */
.highlight .na { color: #a6e22e } /* Name.Attribute */
.highlight .nb { color: #f8f8f2 } /* Name.Builtin */
.highlight .nc { color: #a6e22e } /* Name.Class */
.highlight .no { color: #66d9ef } /* Name.Constant */
.highlight .nd { color: #a6e22e } /* Name.Decorator */
.highlight .ni { color: #f8f8f2 } /* Name.Entity */
.highlight .ne { color: #a6e22e } /* Name.Exception */
.highlight .nf { color: #a6e22e } /* Name.Function */
.highlight .nl { color: #f8f8f2 } /* Name.Label */
.highlight .nn { color: #f8f8f2 } /* Name.Namespace */
.highlight .nx { color: #a6e22e } /* Name.Other */
.highlight .py { color: #f8f8f2 } /* Name.Property */
.highlight .nt { color: #f92672 } /* Name.Tag */
.highlight .nv { color: #f8f8f2 } /* Name.Variable */
.highlight .ow { color: #f92672 } /* Operator.Word */
.highlight .w { color: #f8f8f2 } /* Text.Whitespace */
.highlight .mb { color: #ae81ff } /* Literal.Number.Bin */
.highlight .mf { color: #ae81ff } /* Literal.Number.Float */
.highlight .mh { color: #ae81ff } /* Literal.Number.Hex */
.highlight .mi { color: #ae81ff } /* Literal.Number.Integer */
.highlight .mo { color: #ae81ff } /* Literal.Number.Oct */
.highlight .sa { color: #e6db74 } /* Literal.String.Affix */
.highlight .sb { color: #e6db74 } /* Literal.String.Backtick */
.highlight .sc { color: #e6db74 } /* Literal.String.Char */
.highlight .dl { color: #e6db74 } /* Literal.String.Delimiter */
.highlight .sd { color: #e6db74 } /* Literal.String.Doc */
.highlight .s2 { color: #e6db74 } /* Literal.String.Double */
.highlight .se { color: #ae81ff } /* Literal.String.Escape */
.highlight .sh { color: #e6db74 } /* Literal.String.Heredoc */
.highlight .si { color: #e6db74 } /* Literal.String.Interpol */
.highlight .sx { color: #e6db74 } /* Literal.String.Other */
.highlight .sr { color: #e6db74 } /* Literal.String.Regex */
.highlight .s1 { color: #e6db74 } /* Literal.String.Single */
.highlight .ss { color: #e6db74 } /* Literal.String.Symbol */
.highlight .bp { color: #f8f8f2 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #a6e22e } /* Name.Function.Magic */
.highlight .vc { color: #f8f8f2 } /* Name.Variable.Class */
.highlight .vg { color: #f8f8f2 } /* Name.Variable.Global */
.highlight .vi { color: #f8f8f2 } /* Name.Variable.Instance */
.highlight .vm { color: #f8f8f2 } /* Name.Variable.Magic */
.highlight .il { color: #ae81ff } /* Literal.Number.Integer.Long */
}
@media (prefers-color-scheme: light) {
.highlight .hll { background-color: #ffffcc }
.highlight .c { color: #888888 } /* Comment */
.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
.highlight .k { color: #008800; font-weight: bold } /* Keyword */
.highlight .ch { color: #888888 } /* Comment.Hashbang */
.highlight .cm { color: #888888 } /* Comment.Multiline */
.highlight .cp { color: #cc0000; font-weight: bold } /* Comment.Preproc */
.highlight .cpf { color: #888888 } /* Comment.PreprocFile */
.highlight .c1 { color: #888888 } /* Comment.Single */
.highlight .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */
.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .gr { color: #aa0000 } /* Generic.Error */
.highlight .gh { color: #333333 } /* Generic.Heading */
.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
.highlight .go { color: #888888 } /* Generic.Output */
.highlight .gp { color: #555555 } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #666666 } /* Generic.Subheading */
.highlight .gt { color: #aa0000 } /* Generic.Traceback */
.highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */
.highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */
.highlight .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */
.highlight .kp { color: #008800 } /* Keyword.Pseudo */
.highlight .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #888888; font-weight: bold } /* Keyword.Type */
.highlight .m { color: #0000DD; font-weight: bold } /* Literal.Number */
.highlight .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */
.highlight .na { color: #336699 } /* Name.Attribute */
.highlight .nb { color: #003388 } /* Name.Builtin */
.highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */
.highlight .no { color: #003366; font-weight: bold } /* Name.Constant */
.highlight .nd { color: #555555 } /* Name.Decorator */
.highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */
.highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */
.highlight .nl { color: #336699; font-style: italic } /* Name.Label */
.highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */
.highlight .py { color: #336699; font-weight: bold } /* Name.Property */
.highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */
.highlight .nv { color: #336699 } /* Name.Variable */
.highlight .ow { color: #008800 } /* Operator.Word */
.highlight .w { color: #bbbbbb } /* Text.Whitespace */
.highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */
.highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */
.highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */
.highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */
.highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */
.highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */
.highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */
.highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */
.highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */
.highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */
.highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */
.highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */
.highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */
.highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */
.highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */
.highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */
.highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */
.highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */
.highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */
.highlight .vc { color: #336699 } /* Name.Variable.Class */
.highlight .vg { color: #dd7700 } /* Name.Variable.Global */
.highlight .vi { color: #3333bb } /* Name.Variable.Instance */
.highlight .vm { color: #336699 } /* Name.Variable.Magic */
.highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */
}
/*
// Copyright (c) 2010-2017 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#include <rte_cycles.h>
#include <rte_ether.h>
#include <rte_ethdev.h> // required by rte_eth_ctrl.h in 19.05
#include <rte_eth_ctrl.h>
#include "log.h"
#include "genl4_stream_tcp.h"
#include "prox_assert.h"
#include "mbuf_utils.h"
static uint64_t tcp_retx_timeout(const struct stream_ctx *ctx)
{
uint64_t delay = token_time_tsc_until_full(&ctx->token_time_other);
return delay + ctx->stream_cfg->tsc_timeout;
}
static uint64_t tcp_resched_timeout(const struct stream_ctx *ctx)
{
uint64_t delay = token_time_tsc_until_full(&ctx->token_time);
return delay;
}
static void tcp_retx_timeout_start(struct stream_ctx *ctx, uint64_t *next_tsc)
{
uint64_t now = rte_rdtsc();
*next_tsc = tcp_retx_timeout(ctx);
ctx->sched_tsc = now + *next_tsc;
}
static int tcp_retx_timeout_occured(const struct stream_ctx *ctx, uint64_t now)
{
return ctx->sched_tsc < now;
}
static void tcp_retx_timeout_resume(const struct stream_ctx *ctx, uint64_t now, uint64_t *next_tsc)
{
*next_tsc = ctx->sched_tsc - now;
}
static void tcp_set_retransmit(struct stream_ctx *ctx)
{
ctx->retransmits++;
}
struct tcp_option {
uint8_t kind;
uint8_t len;
} __attribute__((packed));
void stream_tcp_create_rst(struct rte_mbuf *mbuf, struct l4_meta *l4_meta, struct pkt_tuple *tuple)
{
prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)l4_meta->l4_hdr;
prox_rte_ipv4_hdr *ip = ((prox_rte_ipv4_hdr *)tcp) - 1;
ip->src_addr = tuple->dst_addr;
ip->dst_addr = tuple->src_addr;
tcp->dst_port = tuple->src_port;
tcp->src_port = tuple->dst_port;
ip->total_length = rte_bswap16(sizeof(prox_rte_ipv4_hdr) + sizeof(prox_rte_tcp_hdr));
tcp->tcp_flags = PROX_RTE_TCP_RST_FLAG;
tcp->data_off = ((sizeof(prox_rte_tcp_hdr) / 4) << 4);
rte_pktmbuf_pkt_len(mbuf) = l4_meta->payload - rte_pktmbuf_mtod(mbuf, uint8_t *);
rte_pktmbuf_data_len(mbuf) = l4_meta->payload - rte_pktmbuf_mtod(mbuf, uint8_t *);
}
static void create_tcp_pkt(struct stream_ctx *ctx, struct rte_mbuf *mbuf, uint8_t tcp_flags, int data_beg, int data_len)
{
uint8_t *pkt;
const struct peer_action *act = &ctx->stream_cfg->actions[ctx->cur_action];
const struct stream_cfg *stream_cfg = ctx->stream_cfg;
pkt = rte_pktmbuf_mtod(mbuf, uint8_t *);
rte_memcpy(pkt, stream_cfg->data[act->peer].hdr, stream_cfg->data[act->peer].hdr_len);
prox_rte_ipv4_hdr *l3_hdr = (prox_rte_ipv4_hdr*)&pkt[stream_cfg->data[act->peer].hdr_len - sizeof(prox_rte_ipv4_hdr)];
prox_rte_tcp_hdr *l4_hdr = (prox_rte_tcp_hdr *)&pkt[stream_cfg->data[act->peer].hdr_len];
l3_hdr->src_addr = ctx->tuple->dst_addr;
l3_hdr->dst_addr = ctx->tuple->src_addr;
l3_hdr->next_proto_id = IPPROTO_TCP;
l4_hdr->src_port = ctx->tuple->dst_port;
l4_hdr->dst_port = ctx->tuple->src_port;
uint32_t tcp_len = sizeof(prox_rte_tcp_hdr);
uint32_t tcp_payload_len = 0;
uint32_t seq_len = 0;
struct tcp_option *tcp_op;
if (tcp_flags & PROX_RTE_TCP_RST_FLAG) {
tcp_flags |= PROX_RTE_TCP_RST_FLAG;
seq_len = 1;
}
else if (tcp_flags & PROX_RTE_TCP_SYN_FLAG) {
tcp_flags |= PROX_RTE_TCP_SYN_FLAG;
/* Window scaling */
/* TODO: make options come from the stream. */
tcp_op = (struct tcp_option *)(l4_hdr + 1);
tcp_op->kind = 2;
tcp_op->len = 4;
*(uint16_t *)(tcp_op + 1) = rte_bswap16(1460); /* TODO: Save this in this_mss */
tcp_len += 4;
seq_len = 1;
ctx->seq_first_byte = ctx->ackd_seq + 1;
}
else if (tcp_flags & PROX_RTE_TCP_FIN_FLAG) {
tcp_flags |= PROX_RTE_TCP_FIN_FLAG;
seq_len = 1;
}
if (tcp_flags & PROX_RTE_TCP_ACK_FLAG) {
l4_hdr->recv_ack = rte_bswap32(ctx->recv_seq);
tcp_flags |= PROX_RTE_TCP_ACK_FLAG;
}
else
l4_hdr->recv_ack = 0;
uint16_t l4_payload_offset = stream_cfg->data[act->peer].hdr_len + tcp_len;
if (data_len) {
seq_len = data_len;
plogx_dbg("l4 payload offset = %d\n", l4_payload_offset);
rte_memcpy(pkt + l4_payload_offset, stream_cfg->data[act->peer].content + data_beg, data_len);
}
l4_hdr->sent_seq = rte_bswap32(ctx->next_seq);
l4_hdr->tcp_flags = tcp_flags; /* SYN */
l4_hdr->rx_win = rte_bswap16(0x3890); // TODO: make this come from stream (config)
//l4_hdr->cksum = ...;
l4_hdr->tcp_urp = 0;
l4_hdr->data_off = ((tcp_len / 4) << 4); /* Highest 4 bits are TCP header len in units of 32 bit words */
/* ctx->next_seq = ctx->ackd_seq + seq_len; */
ctx->next_seq += seq_len;
/* No payload after TCP header. */
rte_pktmbuf_pkt_len(mbuf) = l4_payload_offset + data_len;
rte_pktmbuf_data_len(mbuf) = l4_payload_offset + data_len;
l3_hdr->total_length = rte_bswap16(sizeof(prox_rte_ipv4_hdr) + tcp_len + data_len);
plogdx_dbg(mbuf, NULL);
plogx_dbg("put tcp packet with flags: %s%s%s, (len = %d, seq = %d, ack =%d)\n",
tcp_flags & PROX_RTE_TCP_SYN_FLAG? "SYN ":"",
tcp_flags & PROX_RTE_TCP_ACK_FLAG? "ACK ":"",
tcp_flags & PROX_RTE_TCP_FIN_FLAG? "FIN ":"",
data_len, rte_bswap32(l4_hdr->sent_seq), rte_bswap32(l4_hdr->recv_ack));
}
/* Get the length of the reply associated for the next packet. Note
that the packet will come from the other peer. In case the next
packet belongs to the current peer (again), the reply length will
be that of an empty TCP packet (i.e. the ACK). */
uint16_t stream_tcp_reply_len(struct stream_ctx *ctx)
{
if (stream_tcp_is_ended(ctx))
return 0;
else if (ctx->tcp_state != ESTABLISHED) {
if (ctx->tcp_state == SYN_SENT || ctx->tcp_state == LISTEN) {
/* First packet received is a SYN packet. In
the current implementation this packet
contains the TCP option field to set the
MSS. For this, add 4 bytes. */
return ctx->stream_cfg->data[!ctx->peer].hdr_len + sizeof(prox_rte_tcp_hdr) + 4;
}
return ctx->stream_cfg->data[!ctx->peer].hdr_len + sizeof(prox_rte_tcp_hdr);
}
else if (ctx->stream_cfg->actions[ctx->cur_action].peer == ctx->peer) {
/* The reply _could_ (due to races, still possibly
receive an old ack) contain data. This means that
in some cases, the prediction of the reply size
will be an overestimate. */
uint32_t data_beg = ctx->next_seq - ctx->seq_first_byte;
const struct peer_action *act = &ctx->stream_cfg->actions[ctx->cur_action];
uint32_t remaining_len = act->len - (data_beg - act->beg);
if (remaining_len == 0) {
if (ctx->cur_action + 1 != ctx->stream_cfg->n_actions) {
if (ctx->stream_cfg->actions[ctx->cur_action + 1].peer == ctx->peer)
return ctx->stream_cfg->data[ctx->peer].hdr_len + sizeof(prox_rte_tcp_hdr);
else {
uint32_t seq_beg = ctx->recv_seq - ctx->other_seq_first_byte;
uint32_t end = ctx->stream_cfg->actions[ctx->cur_action + 1].beg +
ctx->stream_cfg->actions[ctx->cur_action + 1].len;
uint32_t remaining = end - seq_beg;
uint16_t data_len = remaining > 1460? 1460: remaining;
return ctx->stream_cfg->data[!ctx->peer].hdr_len + sizeof(prox_rte_tcp_hdr) + data_len;
}
}
else {
return ctx->stream_cfg->data[ctx->peer].hdr_len + sizeof(prox_rte_tcp_hdr);
}
}
else {
return ctx->stream_cfg->data[ctx->peer].hdr_len + sizeof(prox_rte_tcp_hdr);
}
}
else if (ctx->stream_cfg->actions[ctx->cur_action].peer != ctx->peer) {
uint32_t seq_beg = ctx->recv_seq - ctx->other_seq_first_byte;
uint32_t end = ctx->stream_cfg->actions[ctx->cur_action].beg +
ctx->stream_cfg->actions[ctx->cur_action].len;
uint32_t remaining = end - seq_beg;
uint16_t data_len = remaining > 1460? 1460: remaining;
return ctx->stream_cfg->data[!ctx->peer].hdr_len + sizeof(prox_rte_tcp_hdr) + data_len;
}
else
return ctx->stream_cfg->data[ctx->peer].hdr_len + sizeof(prox_rte_tcp_hdr);
}
static void stream_tcp_proc_in_order_data(struct stream_ctx *ctx, struct l4_meta *l4_meta, int *progress_seq)
{
plogx_dbg("Got data with seq %d (as expected), with len %d\n", ctx->recv_seq, l4_meta->len);
if (!l4_meta->len)
return;
const struct peer_action *act = &ctx->stream_cfg->actions[ctx->cur_action];
enum l4gen_peer peer = act->peer;
/* Since we have received the expected sequence number, the start address will not exceed the cfg memory buffer. */
uint8_t *content = ctx->stream_cfg->data[peer].content;
uint32_t seq_beg = ctx->recv_seq - ctx->other_seq_first_byte;
uint32_t end = ctx->stream_cfg->actions[ctx->cur_action].beg + ctx->stream_cfg->actions[ctx->cur_action].len;
uint32_t remaining = end - seq_beg;
if (l4_meta->len > remaining) {
plogx_err("Provided data is too long:\n");
plogx_err("action.beg = %d, action.len = %d", act->beg, act->len);
plogx_err("tcp seq points at %d in action, l4_meta->len = %d\n", seq_beg, l4_meta->len);
}
else {
if (memcmp(content + seq_beg, l4_meta->payload, l4_meta->len) == 0) {
plogx_dbg("Good payload in %d: %u -> %u\n", ctx->cur_action, ctx->recv_seq, l4_meta->len);
ctx->recv_seq += l4_meta->len;
ctx->cur_pos[peer] += l4_meta->len;
/* Move forward only when this was the last piece of data within current action (i.e. end of received data == end of action data). */
if (seq_beg + l4_meta->len == act->beg + act->len) {
plogx_dbg("Got last piece in action %d\n", ctx->cur_action);
ctx->cur_action++;
}
else {
plogx_dbg("Got data from %d with len %d, but waiting for more (tot len = %d)!\n", seq_beg, l4_meta->len, act->len);
}
*progress_seq = 1;
ctx->flags |= STREAM_CTX_F_NEW_DATA;
}
else {
plogx_err("ackable = %d, ackd = %d\n", ctx->ackable_data_seq ,ctx->ackd_seq);
plogx_err("Bad payload action[%d]{.len = %d, .peer = %s}\n", ctx->cur_action, act->len, peer == PEER_SERVER? "s" : "c");
plogx_err(" pkt payload len = %d, beginning at %u\n", l4_meta->len, seq_beg);
/* plogx_err(" Payload starts %zu bytes after beginning of l4_hdr\n", l4_meta->payload - l4_meta->l4_hdr); */
plogx_err(" payload[0-3] = %02x %02x %02x %02x\n",
l4_meta->payload[0],
l4_meta->payload[1],
l4_meta->payload[2],
l4_meta->payload[3]);
plogx_err(" expect[0-3] = %02x %02x %02x %02x\n",
content[seq_beg + 0],
content[seq_beg + 1],
content[seq_beg + 2],
content[seq_beg + 3]);
}
}
}
static int stream_tcp_proc_in(struct stream_ctx *ctx, struct l4_meta *l4_meta)
{
prox_rte_tcp_hdr *tcp = NULL;
int got_syn = 0;
int got_ack = 0;
int got_fin = 0;
int got_rst = 0;
tcp = (prox_rte_tcp_hdr *)l4_meta->l4_hdr;
got_syn = tcp->tcp_flags & PROX_RTE_TCP_SYN_FLAG;
got_ack = tcp->tcp_flags & PROX_RTE_TCP_ACK_FLAG;
got_fin = tcp->tcp_flags & PROX_RTE_TCP_FIN_FLAG;
got_rst = tcp->tcp_flags & PROX_RTE_TCP_RST_FLAG;
plogx_dbg("TCP, flags: %s%s%s, (len = %d, seq = %d, ack =%d)\n", got_syn? "SYN ":"", got_ack? "ACK ":"", got_fin? "FIN " : "", l4_meta->len, rte_bswap32(tcp->sent_seq), rte_bswap32(tcp->recv_ack));
if (got_syn)
ctx->flags |= STREAM_CTX_F_TCP_GOT_SYN;
if (got_fin)
ctx->flags |= STREAM_CTX_F_TCP_GOT_FIN;
int progress_ack = 0, progress_seq = 0;
/* RST => other side wants to terminate due to
inconsitent state (example: delay of retransmit of
last ACK while other side already closed the
connection. The other side will accept the packet
as a beginning of a new connection but there will
be no SYN. ) */
if (got_rst) {
plogx_dbg("got rst\n");
ctx->flags |= STREAM_CTX_F_TCP_ENDED;
return -1;
}
if (got_ack) {
uint32_t ackd_seq = rte_bswap32(tcp->recv_ack);
if (ackd_seq > ctx->ackd_seq) {
plogx_dbg("Got ACK for outstanding data, from %d to %d\n", ctx->ackd_seq, ackd_seq);
ctx->ackd_seq = ackd_seq;
plogx_dbg("ackable data = %d\n", ctx->ackable_data_seq);
/* Ackable_data_seq set to byte after
current action. */
if (ctx->ackable_data_seq == ctx->ackd_seq) {
/* Due to retransmit in
combination with late acks,
is is possible to ack
future data. In this case,
the assumption that data
was lost is not true and
the next seq is moved
forward. */
if (ctx->next_seq < ctx->ackable_data_seq) {
ctx->next_seq = ctx->ackable_data_seq;
}
ctx->ackable_data_seq = 0;
const struct stream_cfg *stream_cfg = ctx->stream_cfg;
const struct peer_action *act = &stream_cfg->actions[ctx->cur_action];
ctx->cur_pos[act->peer] += act->len;
ctx->cur_action++;
plogx_dbg("Moving to next action %u\n", ctx->ackd_seq);
}
progress_ack = 1;
}
else {
plogx_dbg("Old data acked: acked = %d, ackable =%d\n", ackd_seq, ctx->ackd_seq);
}
}
uint32_t seq = rte_bswap32(tcp->sent_seq);
/* update recv_seq. */
if (got_syn) {
/* When a syn is received, immediately reset recv_seq based on seq from packet. */
ctx->recv_seq = seq + 1;
/* Syn packets have length 1, so the first real data will start after that. */
ctx->other_seq_first_byte = seq + 1;
progress_seq = 1;
}
else if (got_fin) {
if (ctx->recv_seq == seq) {
plogx_dbg("Got fin with correct seq\n");
ctx->recv_seq = seq + 1;
progress_seq = 1;
}
else {
plogx_dbg("Got fin but incorrect seq\n");
}
}
else {
/* Only expect in-order packets. */
if (ctx->recv_seq == seq) {
stream_tcp_proc_in_order_data(ctx, l4_meta, &progress_seq);
}
else if (ctx->recv_seq < seq) {
plogx_dbg("Future data received (got = %d, expected = %d), missing data! (data ignored)\n", seq, ctx->recv_seq);
}
else {
plogx_dbg("Old data received again (state = %s)\n", tcp_state_to_str(ctx->tcp_state));
plogx_dbg("expecting seq %d, got seq %d, len = %d\n",ctx->recv_seq, seq, l4_meta->len);
plogx_dbg("ackd_seq = %d, next_seq = %d, action = %d\n", ctx->ackd_seq, ctx->next_seq, ctx->cur_action);
}
}
/* parse options */
if (((tcp->data_off >> 4)*4) > sizeof(prox_rte_tcp_hdr)) {
struct tcp_option *tcp_op = (struct tcp_option *)(tcp + 1);
uint8_t *payload = (uint8_t *)tcp + ((tcp->data_off >> 4)*4);
do {
if (tcp_op->kind == 2 && tcp_op->len == 4) {
uint16_t mss = rte_bswap16(*(uint16_t *)(tcp_op + 1));
ctx->other_mss = mss;
}
tcp_op = (struct tcp_option *)(((uint8_t*)tcp_op) + tcp_op->len);
} while (((uint8_t*)tcp_op) < payload);
}
if (progress_ack || progress_seq) {
ctx->same_state = 0;
ctx->flags |= STREAM_CTX_F_LAST_RX_PKT_MADE_PROGRESS;
}
else {
ctx->flags &= ~STREAM_CTX_F_LAST_RX_PKT_MADE_PROGRESS;
}
return 0;
}
static int stream_tcp_proc_out_closed(struct stream_ctx *ctx, struct rte_mbuf *mbuf, uint64_t *next_tsc)
{
uint64_t wait_tsc = token_time_tsc_until_full(&ctx->token_time);
if (wait_tsc != 0) {
*next_tsc = wait_tsc;
return -1;
}
/* create SYN packet in mbuf, return 0. goto SYN_SENT, set timeout */
ctx->tcp_state = SYN_SENT;
/* Initialize: */
ctx->next_seq = 99;
ctx->ackd_seq = 99;
create_tcp_pkt(ctx, mbuf, PROX_RTE_TCP_SYN_FLAG, 0, 0);
token_time_take(&ctx->token_time, mbuf_wire_size(mbuf));
*next_tsc = tcp_retx_timeout(ctx);
return 0;
}
static int stream_tcp_proc_out_listen(struct stream_ctx *ctx, struct rte_mbuf *mbuf, uint64_t *next_tsc)
{
uint64_t wait_tsc = token_time_tsc_until_full(&ctx->token_time);
if (wait_tsc != 0) {
*next_tsc = wait_tsc;
return -1;
}
if (!(ctx->flags & STREAM_CTX_F_TCP_GOT_SYN)) {
// TODO: keep connection around at end to catch retransmits from client
plogx_dbg("Got packet while listening without SYN (will send RST)\n");
pkt_tuple_debug(ctx->tuple);
ctx->flags |= STREAM_CTX_F_TCP_ENDED;
create_tcp_pkt(ctx, mbuf, PROX_RTE_TCP_RST_FLAG, 0, 0);
token_time_take(&ctx->token_time, mbuf_wire_size(mbuf));
*next_tsc = tcp_retx_timeout(ctx);
return 0;
}
/* if syn received _now_, send ack + syn. goto SYN_RECEIVED. */
plogx_dbg("Got packet while listen\n");
ctx->next_seq = 200;
ctx->ackd_seq = 200;
ctx->tcp_state = SYN_RECEIVED;
create_tcp_pkt(ctx, mbuf, PROX_RTE_TCP_SYN_FLAG | PROX_RTE_TCP_ACK_FLAG, 0, 0);
token_time_take(&ctx->token_time, mbuf_wire_size(mbuf));
*next_tsc = tcp_retx_timeout(ctx);
return 0;
}
static int stream_tcp_proc_out_syn_sent(struct stream_ctx *ctx, struct rte_mbuf *mbuf, uint64_t *next_tsc)
{
uint64_t wait_tsc = token_time_tsc_until_full(&ctx->token_time);
if (wait_tsc != 0) {
*next_tsc = wait_tsc;
return -1;
}
if (ctx->ackd_seq < ctx->next_seq || !(ctx->flags & STREAM_CTX_F_TCP_GOT_SYN)) {
plogx_dbg("Retransmit SYN\n");
/* Did not get packet, send syn again and keep state (waiting for ACK). */
++ctx->same_state;
tcp_set_retransmit(ctx);
return stream_tcp_proc_out_closed(ctx, mbuf, next_tsc);
}
plogx_dbg("SYN_SENT and everything ACK'ed\n");
plogx_dbg("ackd_seq = %d, next_seq = %d\n", ctx->ackd_seq, ctx->next_seq);
/* If syn received for this stream, send ack and goto
ESTABLISHED. If first peer is this peer to send actual
data, schedule immediately. */
ctx->same_state = 0;
ctx->tcp_state = ESTABLISHED;
/* third packet of three-way handshake will also contain
data. Don't send separate ACK yet. TODO: only send ACK if
data has not yet been ACK'ed. */
if (ctx->stream_cfg->actions[ctx->cur_action].peer == ctx->peer) {
*next_tsc = tcp_resched_timeout(ctx);
plogx_dbg("immediately resched (%d)\n", ctx->cur_action);
return -1;
}
else {
create_tcp_pkt(ctx, mbuf, PROX_RTE_TCP_ACK_FLAG, 0, 0);
token_time_take(&ctx->token_time, mbuf_wire_size(mbuf));
*next_tsc = tcp_retx_timeout(ctx);
}
return 0;
}
static int stream_tcp_proc_out_syn_recv(struct stream_ctx *ctx, struct rte_mbuf *mbuf, uint64_t *next_tsc)
{
uint64_t wait_tsc = token_time_tsc_until_full(&ctx->token_time);
if (wait_tsc != 0) {
*next_tsc = wait_tsc;
return -1;
}
if (ctx->ackd_seq == ctx->next_seq) {
/* Possible from server side with ctx->cur_action == 1
if the current packet received had ACK for syn from
server to client and also data completing the first
action. */
ctx->same_state = 0;
ctx->tcp_state = ESTABLISHED;
if (ctx->stream_cfg->actions[ctx->cur_action].peer != ctx->peer) {
create_tcp_pkt(ctx, mbuf, PROX_RTE_TCP_ACK_FLAG, 0, 0);
token_time_take(&ctx->token_time, mbuf_wire_size(mbuf));
*next_tsc = tcp_retx_timeout(ctx);
return 0;
}
else {
/* While at this point, an ACK without data
any could be sent by the server, it is not
really required because the next pacekt
after reschedule will also contain an ACK
along with new data.
In this implementation, if this is the
case, the client is not only expecting an
ACK, but also actual data. For this reason,
the empty ACK packet should not be sent,
otherwise the client will retransmit its
data.
*/
/* create_tcp_pkt(ctx, mbuf, PROX_RTE_TCP_ACK_FLAG, 0, 0); */
/* token_time_take(&ctx->token_time, mbuf_wire_size(mbuf)); */
*next_tsc = tcp_resched_timeout(ctx);
return -1;
}
}
else {
/* Either this portion is executed due to a time-out
or due to packet reception, the SYN that has been
sent is not yet ACK'ed. So, retransmit the SYN/ACK. */
plogx_dbg("Retransmit SYN/ACK\n");
++ctx->same_state;
tcp_set_retransmit(ctx);
ctx->next_seq = ctx->ackd_seq;
create_tcp_pkt(ctx, mbuf, PROX_RTE_TCP_SYN_FLAG | PROX_RTE_TCP_ACK_FLAG, 0, 0);
token_time_take(&ctx->token_time, mbuf_wire_size(mbuf));
*next_tsc = tcp_retx_timeout(ctx);
return 0;
}
}
static int stream_tcp_proc_out_estab_tx(struct stream_ctx *ctx, struct rte_mbuf *mbuf, uint64_t *next_tsc)
{
uint64_t wait_tsc = token_time_tsc_until_full(&ctx->token_time);
if (wait_tsc != 0) {
*next_tsc = wait_tsc;
return -1;
}
const struct peer_action *act = &ctx->stream_cfg->actions[ctx->cur_action];
if (act->len == 0) {
plogx_dbg("Closing connection\n");
/* This would be an ACK combined with FIN. To
send a separate ack. keep the state in
established, put_ack and expire
immediately*/
plogx_dbg("Moving to FIN_WAIT\n");
ctx->tcp_state = FIN_WAIT;
ctx->same_state = 0;
create_tcp_pkt(ctx, mbuf, PROX_RTE_TCP_FIN_FLAG | PROX_RTE_TCP_ACK_FLAG, 0, 0);
token_time_take(&ctx->token_time, mbuf_wire_size(mbuf));
*next_tsc = tcp_retx_timeout(ctx);
return 0;
}
/* remaining_len2 will be zero, while in case of
act->len == 0, the connection can be closed
immediately. */
plogx_dbg("This peer to send!\n");
uint32_t outstanding_bytes = ctx->next_seq - ctx->ackd_seq;
uint32_t data_beg2 = ctx->next_seq - ctx->seq_first_byte;
uint32_t remaining_len2 = act->len - (data_beg2 - act->beg);
const uint32_t rx_win = 300000;
/* If still data to be sent and allowed by outstanding amount */
if (outstanding_bytes <= rx_win && remaining_len2) {
plogx_dbg("Outstanding bytes = %d, and remaining_len = %d, next_seq = %d\n", outstanding_bytes, remaining_len2, ctx->next_seq);
if (ctx->ackable_data_seq == 0) {
PROX_ASSERT(outstanding_bytes == 0);
ctx->ackable_data_seq = ctx->next_seq + act->len;
}
else
plogx_dbg("This will not be the first part of the data within an action\n");
}
/* still data yet to be acked || still data to be sent but blocked by RX win. */
else {
if (ctx->flags & STREAM_CTX_F_MORE_DATA) {
/* Don't send any packet. */
ctx->flags &= ~STREAM_CTX_F_MORE_DATA;
*next_tsc = tcp_retx_timeout(ctx);
ctx->sched_tsc = rte_rdtsc() + *next_tsc;
return -1;
}
else {
uint64_t now = rte_rdtsc();
if ((ctx->flags & STREAM_CTX_F_LAST_RX_PKT_MADE_PROGRESS) && token_time_tsc_until_full(&ctx->token_time_other) != 0) {
tcp_retx_timeout_start(ctx, next_tsc);
ctx->flags &= ~STREAM_CTX_F_LAST_RX_PKT_MADE_PROGRESS;
return -1;
}
/* This function might be called due to packet
reception. In that case, cancel here and
wait until the timeout really occurs before
reTX. */
if (!tcp_retx_timeout_occured(ctx, now)) {
tcp_retx_timeout_resume(ctx, now, next_tsc);
return -1;
}
ctx->same_state++;
tcp_set_retransmit(ctx);
/* This possibly means that now retransmit is resumed half-way in the action. */
plogx_dbg("Retransmit: outstanding = %d\n", outstanding_bytes);
plogx_dbg("Assuming %d->%d lost\n", ctx->ackd_seq, ctx->next_seq);
ctx->next_seq = ctx->ackd_seq;
plogx_dbg("highest seq from other side = %d\n", ctx->recv_seq);
}
/* When STREAM_CTX_F_MORE_DATA is set, real timeouts
can't occur. If this is needed, timeouts
need to carry additional information. */
}
/* The following code will retransmit the same data if next_seq is not moved forward. */
uint32_t data_beg = ctx->next_seq - ctx->seq_first_byte;
uint32_t remaining_len = act->len - (data_beg - act->beg);
uint32_t data_len = remaining_len > ctx->other_mss? ctx->other_mss: remaining_len;
if (data_len == 0)
plogx_warn("data_len == 0\n");
if (remaining_len > ctx->other_mss)
ctx->flags |= STREAM_CTX_F_MORE_DATA;
else
ctx->flags &= ~STREAM_CTX_F_MORE_DATA;
create_tcp_pkt(ctx, mbuf, PROX_RTE_TCP_ACK_FLAG, data_beg, data_len);
token_time_take(&ctx->token_time, mbuf_wire_size(mbuf));
if (ctx->flags & STREAM_CTX_F_MORE_DATA)
*next_tsc = tcp_resched_timeout(ctx);
else
tcp_retx_timeout_start(ctx, next_tsc);
return 0;
}
static int stream_tcp_proc_out_estab_rx(struct stream_ctx *ctx, struct rte_mbuf *mbuf, uint64_t *next_tsc)
{
uint64_t wait_tsc = token_time_tsc_until_full(&ctx->token_time);
if (wait_tsc != 0) {
*next_tsc = wait_tsc;
return -1;
}
if (ctx->flags & STREAM_CTX_F_TCP_GOT_FIN) {
plogx_dbg("Got fin!\n");
if (1) {
ctx->tcp_state = LAST_ACK;
create_tcp_pkt(ctx, mbuf, PROX_RTE_TCP_FIN_FLAG | PROX_RTE_TCP_ACK_FLAG, 0, 0);
token_time_take(&ctx->token_time, mbuf_wire_size(mbuf));
*next_tsc = tcp_retx_timeout(ctx);
return 0;
}
else {
ctx->tcp_state = CLOSE_WAIT;
create_tcp_pkt(ctx, mbuf, PROX_RTE_TCP_FIN_FLAG, 0, 0);
token_time_take(&ctx->token_time, mbuf_wire_size(mbuf));
*next_tsc = tcp_resched_timeout(ctx);
return 0;
}
}
if (ctx->flags & STREAM_CTX_F_NEW_DATA)
ctx->flags &= ~STREAM_CTX_F_NEW_DATA;
else {
ctx->same_state++;
tcp_set_retransmit(ctx);
plogx_dbg("state++ (ack = %d)\n", ctx->recv_seq);
}
create_tcp_pkt(ctx, mbuf, PROX_RTE_TCP_ACK_FLAG, 0, 0);
token_time_take(&ctx->token_time, mbuf_wire_size(mbuf));
*next_tsc = tcp_retx_timeout(ctx);
return 0;
}
static int stream_tcp_proc_out_estab(struct stream_ctx *ctx, struct rte_mbuf *mbuf, uint64_t *next_tsc)
{
if (ctx->stream_cfg->actions[ctx->cur_action].peer == ctx->peer) {
return stream_tcp_proc_out_estab_tx(ctx, mbuf, next_tsc);
}
else {
return stream_tcp_proc_out_estab_rx(ctx, mbuf, next_tsc);
}
}
static int stream_tcp_proc_out_close_wait(struct stream_ctx *ctx, struct rte_mbuf *mbuf, uint64_t *next_tsc)
{
uint64_t wait_tsc = token_time_tsc_until_full(&ctx->token_time);
if (wait_tsc != 0) {
*next_tsc = wait_tsc;
return -1;
}
/* CLOSE_WAIT is an intermediary stage that is only visited
when the FIN is sent after ACK'ing the incoming FIN. In any
case, it does not matter if there was a packet or not. */
ctx->tcp_state = LAST_ACK;
create_tcp_pkt(ctx, mbuf, PROX_RTE_TCP_ACK_FLAG | PROX_RTE_TCP_FIN_FLAG, 0, 0);
token_time_take(&ctx->token_time, mbuf_wire_size(mbuf));
*next_tsc = tcp_retx_timeout(ctx);
return 0;
}
static int stream_tcp_proc_out_last_ack(struct stream_ctx *ctx, struct rte_mbuf *mbuf, uint64_t *next_tsc)
{
if (ctx->ackd_seq == ctx->next_seq) {
plogx_dbg("Last ACK received\n");
ctx->flags |= STREAM_CTX_F_TCP_ENDED;
return -1;
}
else {
uint64_t wait_tsc = token_time_tsc_until_full(&ctx->token_time);
if (wait_tsc != 0) {
*next_tsc = wait_tsc;
return -1;
}
if (ctx->flags & STREAM_CTX_F_LAST_RX_PKT_MADE_PROGRESS) {
ctx->flags &= ~STREAM_CTX_F_LAST_RX_PKT_MADE_PROGRESS;
*next_tsc = tcp_retx_timeout(ctx);
return -1;
}
plogx_dbg("Retransmit!\n");
ctx->next_seq = ctx->ackd_seq;
ctx->same_state++;
tcp_set_retransmit(ctx);
create_tcp_pkt(ctx, mbuf, PROX_RTE_TCP_ACK_FLAG | PROX_RTE_TCP_FIN_FLAG, 0, 0);
token_time_take(&ctx->token_time, mbuf_wire_size(mbuf));
*next_tsc = tcp_retx_timeout(ctx);
return 0;
}
}
static int stream_tcp_proc_out_fin_wait(struct stream_ctx *ctx, struct rte_mbuf *mbuf, uint64_t *next_tsc)
{
uint64_t wait_tsc = token_time_tsc_until_full(&ctx->token_time);
if (wait_tsc != 0) {
*next_tsc = wait_tsc;
return -1;
}
if (ctx->ackd_seq == ctx->next_seq) {
if (ctx->flags & STREAM_CTX_F_TCP_GOT_FIN) {
ctx->same_state = 0;
ctx->tcp_state = TIME_WAIT;
ctx->sched_tsc = rte_rdtsc() + ctx->stream_cfg->tsc_timeout_time_wait;
plogx_dbg("from FIN_WAIT to TIME_WAIT\n");
create_tcp_pkt(ctx, mbuf, PROX_RTE_TCP_ACK_FLAG, 0, 0);
token_time_take(&ctx->token_time, mbuf_wire_size(mbuf));
*next_tsc = ctx->stream_cfg->tsc_timeout_time_wait;
return 0;
}
else {
/* FIN will still need to come */
*next_tsc = tcp_retx_timeout(ctx);
return -1;
}
}
else {
if (ctx->flags & STREAM_CTX_F_LAST_RX_PKT_MADE_PROGRESS) {
ctx->flags &= ~STREAM_CTX_F_LAST_RX_PKT_MADE_PROGRESS;
*next_tsc = tcp_retx_timeout(ctx);
return -1;
}
plogx_dbg("Retransmit!\n");
ctx->same_state++;
tcp_set_retransmit(ctx);
ctx->next_seq = ctx->ackd_seq;
create_tcp_pkt(ctx, mbuf, PROX_RTE_TCP_FIN_FLAG | PROX_RTE_TCP_ACK_FLAG, 0, 0);
token_time_take(&ctx->token_time, mbuf_wire_size(mbuf));
*next_tsc = tcp_retx_timeout(ctx);
return 0;
}
}
static int stream_tcp_proc_out_time_wait(struct stream_ctx *ctx, struct rte_mbuf *mbuf, uint64_t *next_tsc)
{
if (ctx->sched_tsc < rte_rdtsc()) {
plogx_dbg("TIME_WAIT expired! for %#x\n", ctx->tuple->dst_addr);
ctx->flags |= STREAM_CTX_F_TCP_ENDED;
return -1;
}
uint64_t wait_tsc = token_time_tsc_until_full(&ctx->token_time);
if (wait_tsc != 0) {
*next_tsc = wait_tsc;
return -1;
}
plogx_dbg("Got packet while in TIME_WAIT (pkt ACK reTX)\n");
ctx->sched_tsc = rte_rdtsc() + ctx->stream_cfg->tsc_timeout_time_wait;
create_tcp_pkt(ctx, mbuf, PROX_RTE_TCP_ACK_FLAG, 0, 0);
token_time_take(&ctx->token_time, mbuf_wire_size(mbuf));
*next_tsc = ctx->stream_cfg->tsc_timeout_time_wait;
return 0;
}
static int stream_tcp_proc_out(struct stream_ctx *ctx, struct rte_mbuf *mbuf, uint64_t *next_tsc)
{
if (ctx->same_state == 10) {
ctx->flags |= STREAM_CTX_F_EXPIRED;
return -1;
}
switch (ctx->tcp_state) {
case CLOSED: /* Client initial state */
return stream_tcp_proc_out_closed(ctx, mbuf, next_tsc);
case LISTEN: /* Server starts in this state. */
return stream_tcp_proc_out_listen(ctx, mbuf, next_tsc);
case SYN_SENT:
return stream_tcp_proc_out_syn_sent(ctx, mbuf, next_tsc);
case SYN_RECEIVED:
return stream_tcp_proc_out_syn_recv(ctx, mbuf, next_tsc);
case ESTABLISHED:
return stream_tcp_proc_out_estab(ctx, mbuf, next_tsc);
case CLOSE_WAIT:
return stream_tcp_proc_out_close_wait(ctx, mbuf, next_tsc);
case LAST_ACK:
return stream_tcp_proc_out_last_ack(ctx, mbuf, next_tsc);
case FIN_WAIT:
return stream_tcp_proc_out_fin_wait(ctx, mbuf, next_tsc);
case TIME_WAIT:
return stream_tcp_proc_out_time_wait(ctx, mbuf, next_tsc);
}
return -1;
}
/* Return: zero: packet in mbuf is the reply, non-zero: data consumed,
nothing to send. The latter case might mean that the connection has
ended, or that a future event has been scheduled. l4_meta =>
mbuf contains packet to be processed. */
int stream_tcp_proc(struct stream_ctx *ctx, struct rte_mbuf *mbuf, struct l4_meta *l4_meta, uint64_t *next_tsc)
{
token_time_update(&ctx->token_time, rte_rdtsc());
token_time_update(&ctx->token_time_other, rte_rdtsc());
if (l4_meta) {
int ret;
token_time_take_clamp(&ctx->token_time_other, mbuf_wire_size(mbuf));
ret = stream_tcp_proc_in(ctx, l4_meta);
if (ret)
return ret;
}
return stream_tcp_proc_out(ctx, mbuf, next_tsc);
}
int stream_tcp_is_ended(struct stream_ctx *ctx)
{
return ctx->flags & STREAM_CTX_F_TCP_ENDED;
}
static void add_pkt_bytes(uint32_t *n_pkts, uint32_t *n_bytes, uint32_t len)
{
len = (len < 60? 60 : len) + 20 + PROX_RTE_ETHER_CRC_LEN;
(*n_pkts)++;
*n_bytes += len;
}
void stream_tcp_calc_len(struct stream_cfg *cfg, uint32_t *n_pkts, uint32_t *n_bytes)
{
const uint32_t client_hdr_len = cfg->data[PEER_CLIENT].hdr_len;
const uint32_t server_hdr_len = cfg->data[PEER_SERVER].hdr_len;
*n_pkts = 0;
*n_bytes = 0;
/* Connection setup */
add_pkt_bytes(n_pkts, n_bytes, client_hdr_len + sizeof(prox_rte_tcp_hdr) + 4); /* SYN */
add_pkt_bytes(n_pkts, n_bytes, server_hdr_len + sizeof(prox_rte_tcp_hdr) + 4); /* SYN/ACK */
add_pkt_bytes(n_pkts, n_bytes, client_hdr_len + sizeof(prox_rte_tcp_hdr)); /* ACK */
for (uint32_t i = 0; i < cfg->n_actions; ++i) {
const uint32_t mss = 1440; /* TODO: should come from peer's own mss. */
uint32_t remaining = cfg->actions[i].len;
const uint32_t send_hdr_len = cfg->actions[i].peer == PEER_CLIENT? client_hdr_len : server_hdr_len;
const uint32_t reply_hdr_len = cfg->actions[i].peer == PEER_CLIENT? server_hdr_len : client_hdr_len;
if (remaining == 0)
break;
while (remaining) {
uint32_t seg = remaining > mss? mss: remaining;
add_pkt_bytes(n_pkts, n_bytes, send_hdr_len + sizeof(prox_rte_tcp_hdr) + seg);
remaining -= seg;
}
add_pkt_bytes(n_pkts, n_bytes, reply_hdr_len + sizeof(prox_rte_tcp_hdr));
}
/* Connection Tear-down */
enum l4gen_peer last_peer = cfg->actions[cfg->n_actions - 1].peer;
const uint32_t init_hdr_len = last_peer == PEER_CLIENT? client_hdr_len : server_hdr_len;
const uint32_t resp_hdr_len = last_peer == PEER_CLIENT? server_hdr_len : client_hdr_len;
add_pkt_bytes(n_pkts, n_bytes, init_hdr_len + sizeof(prox_rte_tcp_hdr)); /* FIN */
add_pkt_bytes(n_pkts, n_bytes, resp_hdr_len + sizeof(prox_rte_tcp_hdr)); /* FIN/ACK */
add_pkt_bytes(n_pkts, n_bytes, init_hdr_len + sizeof(prox_rte_tcp_hdr)); /* ACK */
}