/*
// Copyright (c) 2010-2017 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/

#include <rte_ip.h>
#include <rte_udp.h>
#include <rte_tcp.h>
#include <rte_table_hash.h>
#include <rte_ether.h>
#include <rte_version.h>
#include <rte_byteorder.h>

#include "prox_lua.h"
#include "prox_lua_types.h"

#include "tx_pkt.h"
#include "task_init.h"
#include "task_base.h"
#include "prox_port_cfg.h"
#include "prefetch.h"
#include "lconf.h"
#include "hash_utils.h"
#include "etypes.h"
#include "prox_cksum.h"
#include "defines.h"
#include "log.h"
#include "quit.h"
#include "prox_cfg.h"
#include "parse_utils.h"
#include "cfgfile.h"
#include "prox_shared.h"

#if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
#define IPPROTO_IPIP IPPROTO_IPV4
#endif

struct ipv6_tun_dest {
        struct ipv6_addr  dst_addr;
	struct ether_addr dst_mac;
};

typedef enum ipv6_tun_dir_t {
        TUNNEL_DIR_ENCAP = 0,
        TUNNEL_DIR_DECAP = 1,
} ipv6_tun_dir_t;

struct task_ipv6_tun_base {
	struct task_base        base;
	struct ether_addr       src_mac;
	uint8_t                 core_nb;
	uint64_t                keys[64];
	struct rte_mbuf*        fake_packets[64];
	uint16_t                lookup_port_mask;  // Mask used before looking up the port
	void*                   lookup_table;      // Fast lookup table for bindings
	uint32_t		runtime_flags;
	int                     offload_crc;
};

struct task_ipv6_decap {
	struct task_ipv6_tun_base   base;
        struct ether_addr           dst_mac;
};

struct task_ipv6_encap {
	struct task_ipv6_tun_base   base;
	uint32_t                    ipaddr;
	struct ipv6_addr            local_endpoint_addr;
	uint8_t                     tunnel_hop_limit;
};

#define IPv6_VERSION 6
#ifndef IPPROTO_IPV4
#define IPPROTO_IPV4	4
#endif

#define MAKE_KEY_FROM_FIELDS(ipv4_addr, port, port_mask) ( ((uint64_t)ipv4_addr << 16) | (port & port_mask) )

static int handle_ipv6_decap_bulk(struct task_base* tbase, struct rte_mbuf** rx_mbuf, const uint16_t n_pkts);
static int handle_ipv6_encap_bulk(struct task_base* tbase, struct rte_mbuf** rx_mbuf, const uint16_t n_pkts);

static void init_lookup_table(struct task_ipv6_tun_base* ptask, struct task_args *targ)
{
	const int socket_id = rte_lcore_to_socket_id(targ->lconf->id);

	/* The lookup table is a per-core data structure to reduce the
	   memory footprint and improve cache utilization. Since
	   operations on the hash table are not safe, the data
	   structure can't be used on a per socket or on a system wide
	   basis. */
	ptask->lookup_table = prox_sh_find_core(targ->lconf->id, "ipv6_binding_table");
	if (NULL == ptask->lookup_table) {
		struct ipv6_tun_binding_table *table;
		PROX_PANIC(!strcmp(targ->tun_bindings, ""), "No tun bindings specified\n");
		int ret = lua_to_ip6_tun_binding(prox_lua(), GLOBAL, targ->tun_bindings, socket_id, &table);
		PROX_PANIC(ret, "Failed to read tun_bindings config:\n %s\n", get_lua_to_errors());

		struct rte_table_hash_key8_ext_params table_hash_params = {
			.n_entries = (table->num_binding_entries * 4),
			.n_entries_ext = (table->num_binding_entries * 2) >> 1,
			.f_hash = hash_crc32,
			.seed = 0,
			.signature_offset = HASH_METADATA_OFFSET(8),  // Ignored for dosig tables
			.key_offset = HASH_METADATA_OFFSET(0),
		};
                plogx_info("IPv6 Tunnel allocating lookup table on socket %d\n", socket_id);
		ptask->lookup_table = rte_table_hash_key8_ext_dosig_ops.
				f_create(&table_hash_params, socket_id, sizeof(struct ipv6_tun_dest));
		PROX_PANIC(ptask->lookup_table == NULL, "Error creating IPv6 Tunnel lookup table");

		for (unsigned idx = 0; idx < table->num_binding_entries; idx++) {
			int key_found = 0;
			void* entry_in_hash = NULL;
			struct ipv6_tun_dest data;
			struct ipv6_tun_binding_entry* entry = &table->entry[idx];
                        uint64_t key = MAKE_KEY_FROM_FIELDS(rte_cpu_to_be_32(entry->public_ipv4), entry->public_port, ptask->lookup_port_mask);
			rte_memcpy(&data.dst_addr, &entry->endpoint_addr, sizeof(struct ipv6_addr));
			rte_memcpy(&data.dst_mac, &entry->next_hop_mac, sizeof(struct ether_addr));

			int ret = rte_table_hash_key8_ext_dosig_ops.f_add(ptask->lookup_table, &key, &data, &key_found, &entry_in_hash);
			PROX_PANIC(ret, "Error adding entry (%d) to binding lookup table", idx);
			PROX_PANIC(key_found, "key_found!!! for idx=%d\n", idx);

#ifdef DBG_IPV6_TUN_BINDING
			plog_info("Bind: %x:0x%x (port_mask 0x%x) key=0x%"PRIx64"\n", entry->public_ipv4, entry->public_port, ptask->lookup_port_mask, key);
			plog_info("  -> "IPv6_BYTES_FMT" ("MAC_BYTES_FMT")\n", IPv6_BYTES(entry->endpoint_addr.bytes), MAC_BYTES(entry->next_hop_mac.addr_bytes));
			plog_info("  -> "IPv6_BYTES_FMT" ("MAC_BYTES_FMT")\n", IPv6_BYTES(data.dst_addr.bytes), MAC_BYTES(data.dst_mac.addr_bytes));
			plog_info("  -> entry_in_hash=%p\n", entry_in_hash);
#endif
		}
                plogx_info("IPv6 Tunnel created %d lookup table entries\n", table->num_binding_entries);

		prox_sh_add_core(targ->lconf->id, "ipv6_binding_table", ptask->lookup_table);
	}
}

static void init_task_ipv6_tun_base(struct task_ipv6_tun_base* tun_base, struct task_args* targ)
{
	memcpy(&tun_base->src_mac, find_reachable_port(targ), sizeof(tun_base->src_mac));

	tun_base->lookup_port_mask = targ->lookup_port_mask;  // Mask used before looking up the port

	init_lookup_table(tun_base, targ);

	for (uint32_t i = 0; i < 64; ++i) {
		tun_base->fake_packets[i] = (struct rte_mbuf*)((uint8_t*)&tun_base->keys[i] - sizeof (struct rte_mbuf));
	}

	plogx_info("IPv6 Tunnel MAC="MAC_BYTES_FMT" port_mask=0x%x\n",
		  MAC_BYTES(tun_base->src_mac.addr_bytes), tun_base->lookup_port_mask);

	struct prox_port_cfg *port = find_reachable_port(targ);
	if (port) {
		tun_base->offload_crc = port->capabilities.tx_offload_cksum;
	}
}

static void init_task_ipv6_decap(struct task_base* tbase, struct task_args* targ)
{
	struct task_ipv6_decap* tun_task = (struct task_ipv6_decap*)tbase;
	struct task_ipv6_tun_base* tun_base = (struct task_ipv6_tun_base*)tun_task;

	init_task_ipv6_tun_base(tun_base, targ);
	tun_base->runtime_flags = targ->runtime_flags;

        memcpy(&tun_task->dst_mac, &targ->edaddr, sizeof(tun_task->dst_mac));
}

static void init_task_ipv6_encap(struct task_base* tbase, struct task_args* targ)
{
	struct task_ipv6_encap* tun_task = (struct task_ipv6_encap*)tbase;
	struct task_ipv6_tun_base *tun_base = (struct task_ipv6_tun_base*)tun_task;

	init_task_ipv6_tun_base(tun_base, targ);

	rte_memcpy(&tun_task->local_endpoint_addr, &targ->local_ipv6, sizeof(tun_task->local_endpoint_addr));
	tun_task->tunnel_hop_limit = targ->tunnel_hop_limit;
	tun_base->runtime_flags = targ->runtime_flags;
}

static struct task_init task_init_ipv6_decap = {
	.mode_str = "ipv6_decap",
	.init = init_task_ipv6_decap,
	.handle = handle_ipv6_decap_bulk,
	.size = sizeof(struct task_ipv6_decap)
};

static struct task_init task_init_ipv6_encap = {
	.mode_str = "ipv6_encap",
	.init = init_task_ipv6_encap,
	.handle = handle_ipv6_encap_bulk,
	.size = sizeof(struct task_ipv6_encap)
};

__attribute__((constructor)) static void reg_task_ipv6_decap(void)
{
	reg_task(&task_init_ipv6_decap);
}

__attribute__((constructor)) static void reg_task_ipv6_encap(void)
{
	reg_task(&task_init_ipv6_encap);
}

static inline uint8_t handle_ipv6_decap(struct task_ipv6_decap* ptask, struct rte_mbuf* rx_mbuf, struct ipv6_tun_dest* tun_dest);
static inline uint8_t handle_ipv6_encap(struct task_ipv6_encap* ptask, struct rte_mbuf* rx_mbuf, struct ipv6_tun_dest* tun_dest);

static inline int extract_key_fields( __attribute__((unused)) struct task_ipv6_tun_base* ptask, struct ipv4_hdr* pip4, ipv6_tun_dir_t dir, uint32_t* pAddr, uint16_t* pPort)
{
        *pAddr = (dir == TUNNEL_DIR_DECAP) ? pip4->src_addr : pip4->dst_addr;

        if (pip4->next_proto_id == IPPROTO_UDP) {
                struct udp_hdr* pudp = (struct udp_hdr *)(pip4 + 1);
                *pPort = rte_be_to_cpu_16((dir == TUNNEL_DIR_DECAP) ? pudp->src_port : pudp->dst_port);
        }
        else if (pip4->next_proto_id == IPPROTO_TCP) {
                struct tcp_hdr* ptcp = (struct tcp_hdr *)(pip4 + 1);
                *pPort = rte_be_to_cpu_16((dir == TUNNEL_DIR_DECAP) ? ptcp->src_port : ptcp->dst_port);
        }
        else {
                plog_warn("IPv6 Tunnel: IPv4 packet of unexpected type proto_id=0x%x\n", pip4->next_proto_id);
                *pPort = 0xffff;
                return -1;
        }

        return 0;
}

static inline void extract_key(struct task_ipv6_tun_base* ptask, struct ipv4_hdr* pip4, ipv6_tun_dir_t dir, uint64_t* pkey)
{
        uint32_t lookup_addr;
        uint16_t lookup_port;

        if (unlikely( extract_key_fields(ptask, pip4, dir, &lookup_addr, &lookup_port))) {
                plog_warn("IPv6 Tunnel: Unable to extract fields from packet\n");
                *pkey = 0xffffffffL;
                return;
        }

        *pkey = MAKE_KEY_FROM_FIELDS(lookup_addr, lookup_port, ptask->lookup_port_mask);
}

static inline struct ipv4_hdr* get_ipv4_decap(struct rte_mbuf *mbuf)
{
        struct ether_hdr* peth = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
        struct ipv6_hdr* pip6 = (struct ipv6_hdr *)(peth + 1);
        struct ipv4_hdr* pip4 = (struct ipv4_hdr*) (pip6 + 1);  // TODO - Skip Option headers

        return pip4;
}

static inline struct ipv4_hdr* get_ipv4_encap(struct rte_mbuf *mbuf)
{
        struct ether_hdr* peth = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
        struct ipv4_hdr* pip4 = (struct ipv4_hdr *)(peth + 1);

        return pip4;
}

static inline void extract_key_decap(struct task_ipv6_tun_base* ptask, struct rte_mbuf *mbuf, uint64_t* pkey)
{
        extract_key(ptask, get_ipv4_decap(mbuf), TUNNEL_DIR_DECAP, pkey);
}

static inline void extract_key_decap_bulk(struct task_ipv6_tun_base* ptask, struct rte_mbuf **mbufs, uint16_t n_pkts)
{
        for (uint16_t j = 0; j < n_pkts; ++j) {
                extract_key_decap(ptask, mbufs[j], &ptask->keys[j]);
        }
}

static inline void extract_key_encap(struct task_ipv6_tun_base* ptask, struct rte_mbuf *mbuf, uint64_t* pkey)
{
        extract_key(ptask, get_ipv4_encap(mbuf), TUNNEL_DIR_ENCAP, pkey);
}

static inline void extract_key_encap_bulk(struct task_ipv6_tun_base* ptask, struct rte_mbuf **mbufs, uint16_t n_pkts)
{
        for (uint16_t j = 0; j < n_pkts; ++j) {
                extract_key_encap(ptask, mbufs[j], &ptask->keys[j]);
        }
}

__attribute__((cold)) static void handle_error(struct task_ipv6_tun_base* ptask, struct rte_mbuf* mbuf, ipv6_tun_dir_t dir)
{
        uint32_t lookup_addr;
        uint16_t lookup_port;
        uint64_t key;

        struct ipv4_hdr* pip4 = (dir == TUNNEL_DIR_DECAP) ? get_ipv4_decap(mbuf) : get_ipv4_encap(mbuf);
        extract_key_fields(ptask, pip4, dir, &lookup_addr, &lookup_port);
        extract_key(ptask, pip4, dir, &key);

        plog_warn("IPv6 Tunnel (%s) lookup failed for "IPv4_BYTES_FMT":%d [key=0x%"PRIx64"]\n",
                        (dir == TUNNEL_DIR_DECAP) ? "decap" : "encap",
                        IPv4_BYTES(((unsigned char*)&lookup_addr)), lookup_port, key);
}

static int handle_ipv6_decap_bulk(struct task_base* tbase, struct rte_mbuf** mbufs, const uint16_t n_pkts)
{
        struct task_ipv6_decap* task = (struct task_ipv6_decap *)tbase;
        uint64_t pkts_mask = RTE_LEN2MASK(n_pkts, uint64_t);
        struct ipv6_tun_dest* entries[64];
	uint8_t out[MAX_PKT_BURST];
        uint64_t lookup_hit_mask;
        uint16_t n_kept = 0;

        prefetch_pkts(mbufs, n_pkts);

        // Lookup to verify packets are valid for their respective tunnels (their sending lwB4)
        extract_key_decap_bulk(&task->base, mbufs, n_pkts);
        rte_table_hash_key8_ext_dosig_ops.f_lookup(task->base.lookup_table, task->base.fake_packets, pkts_mask, &lookup_hit_mask, (void**)entries);

        if (likely(lookup_hit_mask == pkts_mask)) {
                for (uint16_t j = 0; j < n_pkts; ++j) {
                        out[j] = handle_ipv6_decap(task, mbufs[j], entries[j]);
                }
        }
        else {
                for (uint16_t j = 0; j < n_pkts; ++j) {
                        if (unlikely(!((lookup_hit_mask >> j) & 0x1))) {
                                handle_error(&task->base, mbufs[j], TUNNEL_DIR_DECAP);
				out[j] = OUT_DISCARD;
                                continue;
                        }
                        out[j] = handle_ipv6_decap(task, mbufs[j], entries[j]);
                }
        }

	return task->base.base.tx_pkt(tbase, mbufs, n_pkts, out);
}

static int handle_ipv6_encap_bulk(struct task_base* tbase, struct rte_mbuf** mbufs, const uint16_t n_pkts)
{
	struct task_ipv6_encap* task = (struct task_ipv6_encap *)tbase;
        uint64_t pkts_mask = RTE_LEN2MASK(n_pkts, uint64_t);
        struct ipv6_tun_dest* entries[64];
        uint64_t lookup_hit_mask;
	uint8_t out[MAX_PKT_BURST];
        uint16_t n_kept = 0;

	prefetch_first(mbufs, n_pkts);

        extract_key_encap_bulk(&task->base, mbufs, n_pkts);
        rte_table_hash_key8_ext_dosig_ops.f_lookup(task->base.lookup_table, task->base.fake_packets, pkts_mask, &lookup_hit_mask, (void**)entries);

        if (likely(lookup_hit_mask == pkts_mask)) {
                for (uint16_t j = 0; j < n_pkts; ++j) {
                        out[j] = handle_ipv6_encap(task, mbufs[j], entries[j]);
                }
        }
        else {
                for (uint16_t j = 0; j < n_pkts; ++j) {
                        if (unlikely(!((lookup_hit_mask >> j) & 0x1))) {
                                handle_error(&task->base, mbufs[j], TUNNEL_DIR_ENCAP);
				out[j] = OUT_DISCARD;
                                continue;
                        }
                        out[j] = handle_ipv6_encap(task, mbufs[j], entries[j]);
                }
        }

	return task->base.base.tx_pkt(tbase, mbufs, n_pkts, out);
}

static inline uint8_t handle_ipv6_decap(struct task_ipv6_decap* ptask, struct rte_mbuf* rx_mbuf, __attribute__((unused)) struct ipv6_tun_dest* tun_dest)
{
	struct ether_hdr* peth = rte_pktmbuf_mtod(rx_mbuf, struct ether_hdr *);

	if (unlikely(peth->ether_type != ETYPE_IPv6)) {
		plog_warn("Received non IPv6 packet on ipv6 tunnel port\n");
		// Drop packet
		return OUT_DISCARD;
	}

	struct ipv6_hdr* pip6 = (struct ipv6_hdr *)(peth + 1);
	int ipv6_hdr_len = sizeof(struct ipv6_hdr);

	// TODO - Skip over any IPv6 Extension Header:
	//      If pip6->next_header is in (0, 43, 44, 50, 51, 60, 135), skip ahead pip->hdr_ext_len
	//      bytes and repeat. Increase ipv6_hdr_len with as much, each time.

	if (unlikely(pip6->proto != IPPROTO_IPIP)) {
		plog_warn("Received non IPv4 content within IPv6 tunnel packet\n");
		// Drop packet
		return OUT_DISCARD;
	}

        // Discard IPv6 encapsulation
        rte_pktmbuf_adj(rx_mbuf, ipv6_hdr_len);
        peth = rte_pktmbuf_mtod(rx_mbuf, struct ether_hdr *);

        // Restore Ethernet header
        ether_addr_copy(&ptask->base.src_mac, &peth->s_addr);
        ether_addr_copy(&ptask->dst_mac, &peth->d_addr);
        peth->ether_type = ETYPE_IPv4;

	return 0;
}

static inline uint8_t handle_ipv6_encap(struct task_ipv6_encap* ptask, struct rte_mbuf* rx_mbuf, __attribute__((unused)) struct ipv6_tun_dest* tun_dest)
{
        //plog_info("Found tunnel endpoint:"IPv6_BYTES_FMT" ("MAC_BYTES_FMT")\n", IPv6_BYTES(tun_dest->dst_addr), MAC_BYTES(tun_dest->dst_mac.addr_bytes));

	struct ether_hdr* peth = (struct ether_hdr *)(rte_pktmbuf_mtod(rx_mbuf, struct ether_hdr *));
	struct ipv4_hdr* pip4 = (struct ipv4_hdr *)(peth + 1);
	uint16_t ipv4_length = rte_be_to_cpu_16(pip4->total_length);
	struct task_ipv6_tun_base* tun_base = (struct task_ipv6_tun_base*)ptask;

	if (unlikely((pip4->version_ihl >> 4) != 4)) {
		plog_warn("Received non IPv4 packet at ipv6 tunnel input\n");
		// Drop packet
		return OUT_DISCARD;
	}

	if (pip4->time_to_live) {
		pip4->time_to_live--;
	}
	else {
		plog_info("TTL = 0 => Dropping\n");
		return OUT_DISCARD;
	}
	pip4->hdr_checksum = 0;

	// Remove padding if any (we don't want to encapsulate garbage at end of IPv4 packet)
	int padding = rte_pktmbuf_pkt_len(rx_mbuf) - (ipv4_length + sizeof(struct ether_hdr));
	if (unlikely(padding > 0)) {
	        rte_pktmbuf_trim(rx_mbuf, padding);
	}

	// Encapsulate
	const int extra_space = sizeof(struct ipv6_hdr);
	peth = (struct ether_hdr *)rte_pktmbuf_prepend(rx_mbuf, extra_space);

	// Ethernet Header
	ether_addr_copy(&ptask->base.src_mac, &peth->s_addr);
	ether_addr_copy(&tun_dest->dst_mac, &peth->d_addr);
	peth->ether_type = ETYPE_IPv6;

	// Set up IPv6 Header
	struct ipv6_hdr* pip6 = (struct ipv6_hdr *)(peth + 1);
	pip6->vtc_flow = rte_cpu_to_be_32(IPv6_VERSION << 28);
	pip6->proto = IPPROTO_IPIP;
	pip6->payload_len = rte_cpu_to_be_16(ipv4_length);
	pip6->hop_limits = ptask->tunnel_hop_limit;
	rte_memcpy(pip6->dst_addr, &tun_dest->dst_addr, sizeof(pip6->dst_addr));
	rte_memcpy(pip6->src_addr, &ptask->local_endpoint_addr, sizeof(pip6->src_addr));

	if (tun_base->runtime_flags & TASK_TX_CRC) {
	// We modified the TTL in the IPv4 header, hence have to recompute the IPv4 checksum
#define TUNNEL_L2_LEN (sizeof(struct ether_hdr) + sizeof(struct ipv6_hdr))
		prox_ip_cksum(rx_mbuf, pip4, TUNNEL_L2_LEN, sizeof(struct ipv4_hdr), ptask->base.offload_crc);
	}
	return 0;
}