From 9ca8dbcc65cfc63d6f5ef3312a33184e1d726e00 Mon Sep 17 00:00:00 2001 From: Yunhong Jiang Date: Tue, 4 Aug 2015 12:17:53 -0700 Subject: Add the rt linux 4.1.3-rt3 as base Import the rt linux 4.1.3-rt3 as OPNFV kvm base. It's from git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git linux-4.1.y-rt and the base is: commit 0917f823c59692d751951bf5ea699a2d1e2f26a2 Author: Sebastian Andrzej Siewior Date: Sat Jul 25 12:13:34 2015 +0200 Prepare v4.1.3-rt3 Signed-off-by: Sebastian Andrzej Siewior We lose all the git history this way and it's not good. We should apply another opnfv project repo in future. Change-Id: I87543d81c9df70d99c5001fbdf646b202c19f423 Signed-off-by: Yunhong Jiang --- kernel/net/ipv4/netfilter/Kconfig | 406 ++++ kernel/net/ipv4/netfilter/Makefile | 72 + kernel/net/ipv4/netfilter/arp_tables.c | 1926 +++++++++++++++++ kernel/net/ipv4/netfilter/arpt_mangle.c | 91 + kernel/net/ipv4/netfilter/arptable_filter.c | 91 + kernel/net/ipv4/netfilter/ip_tables.c | 2282 ++++++++++++++++++++ kernel/net/ipv4/netfilter/ipt_CLUSTERIP.c | 794 +++++++ kernel/net/ipv4/netfilter/ipt_ECN.c | 138 ++ kernel/net/ipv4/netfilter/ipt_MASQUERADE.c | 91 + kernel/net/ipv4/netfilter/ipt_REJECT.c | 113 + kernel/net/ipv4/netfilter/ipt_SYNPROXY.c | 480 ++++ kernel/net/ipv4/netfilter/ipt_ah.c | 91 + kernel/net/ipv4/netfilter/ipt_rpfilter.c | 144 ++ kernel/net/ipv4/netfilter/iptable_filter.c | 109 + kernel/net/ipv4/netfilter/iptable_mangle.c | 147 ++ kernel/net/ipv4/netfilter/iptable_nat.c | 154 ++ kernel/net/ipv4/netfilter/iptable_raw.c | 89 + kernel/net/ipv4/netfilter/iptable_security.c | 109 + .../net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 542 +++++ .../netfilter/nf_conntrack_l3proto_ipv4_compat.c | 469 ++++ .../net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 428 ++++ kernel/net/ipv4/netfilter/nf_defrag_ipv4.c | 129 ++ kernel/net/ipv4/netfilter/nf_log_arp.c | 161 ++ kernel/net/ipv4/netfilter/nf_log_ipv4.c | 397 ++++ kernel/net/ipv4/netfilter/nf_nat_h323.c | 631 ++++++ kernel/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c | 481 +++++ kernel/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c | 153 ++ kernel/net/ipv4/netfilter/nf_nat_pptp.c | 311 +++ kernel/net/ipv4/netfilter/nf_nat_proto_gre.c | 149 ++ kernel/net/ipv4/netfilter/nf_nat_proto_icmp.c | 83 + kernel/net/ipv4/netfilter/nf_nat_snmp_basic.c | 1313 +++++++++++ kernel/net/ipv4/netfilter/nf_reject_ipv4.c | 192 ++ kernel/net/ipv4/netfilter/nf_tables_arp.c | 102 + kernel/net/ipv4/netfilter/nf_tables_ipv4.c | 125 ++ kernel/net/ipv4/netfilter/nft_chain_nat_ipv4.c | 107 + kernel/net/ipv4/netfilter/nft_chain_route_ipv4.c | 88 + kernel/net/ipv4/netfilter/nft_masq_ipv4.c | 76 + kernel/net/ipv4/netfilter/nft_redir_ipv4.c | 76 + kernel/net/ipv4/netfilter/nft_reject_ipv4.c | 76 + 39 files changed, 13416 insertions(+) create mode 100644 kernel/net/ipv4/netfilter/Kconfig create mode 100644 kernel/net/ipv4/netfilter/Makefile create mode 100644 kernel/net/ipv4/netfilter/arp_tables.c create mode 100644 kernel/net/ipv4/netfilter/arpt_mangle.c create mode 100644 kernel/net/ipv4/netfilter/arptable_filter.c create mode 100644 kernel/net/ipv4/netfilter/ip_tables.c create mode 100644 kernel/net/ipv4/netfilter/ipt_CLUSTERIP.c create mode 100644 kernel/net/ipv4/netfilter/ipt_ECN.c create mode 100644 kernel/net/ipv4/netfilter/ipt_MASQUERADE.c create mode 100644 kernel/net/ipv4/netfilter/ipt_REJECT.c create mode 100644 kernel/net/ipv4/netfilter/ipt_SYNPROXY.c create mode 100644 kernel/net/ipv4/netfilter/ipt_ah.c create mode 100644 kernel/net/ipv4/netfilter/ipt_rpfilter.c create mode 100644 kernel/net/ipv4/netfilter/iptable_filter.c create mode 100644 kernel/net/ipv4/netfilter/iptable_mangle.c create mode 100644 kernel/net/ipv4/netfilter/iptable_nat.c create mode 100644 kernel/net/ipv4/netfilter/iptable_raw.c create mode 100644 kernel/net/ipv4/netfilter/iptable_security.c create mode 100644 kernel/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c create mode 100644 kernel/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c create mode 100644 kernel/net/ipv4/netfilter/nf_conntrack_proto_icmp.c create mode 100644 kernel/net/ipv4/netfilter/nf_defrag_ipv4.c create mode 100644 kernel/net/ipv4/netfilter/nf_log_arp.c create mode 100644 kernel/net/ipv4/netfilter/nf_log_ipv4.c create mode 100644 kernel/net/ipv4/netfilter/nf_nat_h323.c create mode 100644 kernel/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c create mode 100644 kernel/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c create mode 100644 kernel/net/ipv4/netfilter/nf_nat_pptp.c create mode 100644 kernel/net/ipv4/netfilter/nf_nat_proto_gre.c create mode 100644 kernel/net/ipv4/netfilter/nf_nat_proto_icmp.c create mode 100644 kernel/net/ipv4/netfilter/nf_nat_snmp_basic.c create mode 100644 kernel/net/ipv4/netfilter/nf_reject_ipv4.c create mode 100644 kernel/net/ipv4/netfilter/nf_tables_arp.c create mode 100644 kernel/net/ipv4/netfilter/nf_tables_ipv4.c create mode 100644 kernel/net/ipv4/netfilter/nft_chain_nat_ipv4.c create mode 100644 kernel/net/ipv4/netfilter/nft_chain_route_ipv4.c create mode 100644 kernel/net/ipv4/netfilter/nft_masq_ipv4.c create mode 100644 kernel/net/ipv4/netfilter/nft_redir_ipv4.c create mode 100644 kernel/net/ipv4/netfilter/nft_reject_ipv4.c (limited to 'kernel/net/ipv4/netfilter') diff --git a/kernel/net/ipv4/netfilter/Kconfig b/kernel/net/ipv4/netfilter/Kconfig new file mode 100644 index 000000000..fb20f3631 --- /dev/null +++ b/kernel/net/ipv4/netfilter/Kconfig @@ -0,0 +1,406 @@ +# +# IP netfilter configuration +# + +menu "IP: Netfilter Configuration" + depends on INET && NETFILTER + +config NF_DEFRAG_IPV4 + tristate + default n + +config NF_CONNTRACK_IPV4 + tristate "IPv4 connection tracking support (required for NAT)" + depends on NF_CONNTRACK + default m if NETFILTER_ADVANCED=n + select NF_DEFRAG_IPV4 + ---help--- + Connection tracking keeps a record of what packets have passed + through your machine, in order to figure out how they are related + into connections. + + This is IPv4 support on Layer 3 independent connection tracking. + Layer 3 independent connection tracking is experimental scheme + which generalize ip_conntrack to support other layer 3 protocols. + + To compile it as a module, choose M here. If unsure, say N. + +config NF_CONNTRACK_PROC_COMPAT + bool "proc/sysctl compatibility with old connection tracking" + depends on NF_CONNTRACK_PROCFS && NF_CONNTRACK_IPV4 + default y + help + This option enables /proc and sysctl compatibility with the old + layer 3 dependent connection tracking. This is needed to keep + old programs that have not been adapted to the new names working. + + If unsure, say Y. + +if NF_TABLES + +config NF_TABLES_IPV4 + tristate "IPv4 nf_tables support" + help + This option enables the IPv4 support for nf_tables. + +if NF_TABLES_IPV4 + +config NFT_CHAIN_ROUTE_IPV4 + tristate "IPv4 nf_tables route chain support" + help + This option enables the "route" chain for IPv4 in nf_tables. This + chain type is used to force packet re-routing after mangling header + fields such as the source, destination, type of service and + the packet mark. + +config NFT_REJECT_IPV4 + select NF_REJECT_IPV4 + default NFT_REJECT + tristate + +endif # NF_TABLES_IPV4 + +config NF_TABLES_ARP + tristate "ARP nf_tables support" + help + This option enables the ARP support for nf_tables. + +endif # NF_TABLES + +config NF_LOG_ARP + tristate "ARP packet logging" + default m if NETFILTER_ADVANCED=n + select NF_LOG_COMMON + +config NF_LOG_IPV4 + tristate "IPv4 packet logging" + default m if NETFILTER_ADVANCED=n + select NF_LOG_COMMON + +config NF_REJECT_IPV4 + tristate "IPv4 packet rejection" + default m if NETFILTER_ADVANCED=n + +config NF_NAT_IPV4 + tristate "IPv4 NAT" + depends on NF_CONNTRACK_IPV4 + default m if NETFILTER_ADVANCED=n + select NF_NAT + help + The IPv4 NAT option allows masquerading, port forwarding and other + forms of full Network Address Port Translation. This can be + controlled by iptables or nft. + +if NF_NAT_IPV4 + +config NFT_CHAIN_NAT_IPV4 + depends on NF_TABLES_IPV4 + tristate "IPv4 nf_tables nat chain support" + help + This option enables the "nat" chain for IPv4 in nf_tables. This + chain type is used to perform Network Address Translation (NAT) + packet transformations such as the source, destination address and + source and destination ports. + +config NF_NAT_MASQUERADE_IPV4 + tristate "IPv4 masquerade support" + help + This is the kernel functionality to provide NAT in the masquerade + flavour (automatic source address selection). + +config NFT_MASQ_IPV4 + tristate "IPv4 masquerading support for nf_tables" + depends on NF_TABLES_IPV4 + depends on NFT_MASQ + select NF_NAT_MASQUERADE_IPV4 + help + This is the expression that provides IPv4 masquerading support for + nf_tables. + +config NFT_REDIR_IPV4 + tristate "IPv4 redirect support for nf_tables" + depends on NF_TABLES_IPV4 + depends on NFT_REDIR + select NF_NAT_REDIRECT + help + This is the expression that provides IPv4 redirect support for + nf_tables. + +config NF_NAT_SNMP_BASIC + tristate "Basic SNMP-ALG support" + depends on NF_CONNTRACK_SNMP + depends on NETFILTER_ADVANCED + default NF_NAT && NF_CONNTRACK_SNMP + ---help--- + + This module implements an Application Layer Gateway (ALG) for + SNMP payloads. In conjunction with NAT, it allows a network + management system to access multiple private networks with + conflicting addresses. It works by modifying IP addresses + inside SNMP payloads to match IP-layer NAT mapping. + + This is the "basic" form of SNMP-ALG, as described in RFC 2962 + + To compile it as a module, choose M here. If unsure, say N. + +config NF_NAT_PROTO_GRE + tristate + depends on NF_CT_PROTO_GRE + +config NF_NAT_PPTP + tristate + depends on NF_CONNTRACK + default NF_CONNTRACK_PPTP + select NF_NAT_PROTO_GRE + +config NF_NAT_H323 + tristate + depends on NF_CONNTRACK + default NF_CONNTRACK_H323 + +endif # NF_NAT_IPV4 + +config IP_NF_IPTABLES + tristate "IP tables support (required for filtering/masq/NAT)" + default m if NETFILTER_ADVANCED=n + select NETFILTER_XTABLES + help + iptables is a general, extensible packet identification framework. + The packet filtering and full NAT (masquerading, port forwarding, + etc) subsystems now use this: say `Y' or `M' here if you want to use + either of those. + + To compile it as a module, choose M here. If unsure, say N. + +if IP_NF_IPTABLES + +# The matches. +config IP_NF_MATCH_AH + tristate '"ah" match support' + depends on NETFILTER_ADVANCED + help + This match extension allows you to match a range of SPIs + inside AH header of IPSec packets. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_MATCH_ECN + tristate '"ecn" match support' + depends on NETFILTER_ADVANCED + select NETFILTER_XT_MATCH_ECN + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_MATCH_ECN. + +config IP_NF_MATCH_RPFILTER + tristate '"rpfilter" reverse path filter match support' + depends on NETFILTER_ADVANCED && (IP_NF_MANGLE || IP_NF_RAW) + ---help--- + This option allows you to match packets whose replies would + go out via the interface the packet came in. + + To compile it as a module, choose M here. If unsure, say N. + The module will be called ipt_rpfilter. + +config IP_NF_MATCH_TTL + tristate '"ttl" match support' + depends on NETFILTER_ADVANCED + select NETFILTER_XT_MATCH_HL + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_MATCH_HL. + +# `filter', generic and specific targets +config IP_NF_FILTER + tristate "Packet filtering" + default m if NETFILTER_ADVANCED=n + help + Packet filtering defines a table `filter', which has a series of + rules for simple packet filtering at local input, forwarding and + local output. See the man page for iptables(8). + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_TARGET_REJECT + tristate "REJECT target support" + depends on IP_NF_FILTER + select NF_REJECT_IPV4 + default m if NETFILTER_ADVANCED=n + help + The REJECT target allows a filtering rule to specify that an ICMP + error should be issued in response to an incoming packet, rather + than silently being dropped. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_TARGET_SYNPROXY + tristate "SYNPROXY target support" + depends on NF_CONNTRACK && NETFILTER_ADVANCED + select NETFILTER_SYNPROXY + select SYN_COOKIES + help + The SYNPROXY target allows you to intercept TCP connections and + establish them using syncookies before they are passed on to the + server. This allows to avoid conntrack and server resource usage + during SYN-flood attacks. + + To compile it as a module, choose M here. If unsure, say N. + +# NAT + specific targets: nf_conntrack +config IP_NF_NAT + tristate "iptables NAT support" + depends on NF_CONNTRACK_IPV4 + default m if NETFILTER_ADVANCED=n + select NF_NAT + select NF_NAT_IPV4 + select NETFILTER_XT_NAT + help + This enables the `nat' table in iptables. This allows masquerading, + port forwarding and other forms of full Network Address Port + Translation. + + To compile it as a module, choose M here. If unsure, say N. + +if IP_NF_NAT + +config IP_NF_TARGET_MASQUERADE + tristate "MASQUERADE target support" + select NF_NAT_MASQUERADE_IPV4 + default m if NETFILTER_ADVANCED=n + help + Masquerading is a special case of NAT: all outgoing connections are + changed to seem to come from a particular interface's address, and + if the interface goes down, those connections are lost. This is + only useful for dialup accounts with dynamic IP address (ie. your IP + address will be different on next dialup). + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_TARGET_NETMAP + tristate "NETMAP target support" + depends on NETFILTER_ADVANCED + select NETFILTER_XT_TARGET_NETMAP + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_TARGET_NETMAP. + +config IP_NF_TARGET_REDIRECT + tristate "REDIRECT target support" + depends on NETFILTER_ADVANCED + select NETFILTER_XT_TARGET_REDIRECT + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_TARGET_REDIRECT. + +endif # IP_NF_NAT + +# mangle + specific targets +config IP_NF_MANGLE + tristate "Packet mangling" + default m if NETFILTER_ADVANCED=n + help + This option adds a `mangle' table to iptables: see the man page for + iptables(8). This table is used for various packet alterations + which can effect how the packet is routed. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_TARGET_CLUSTERIP + tristate "CLUSTERIP target support" + depends on IP_NF_MANGLE + depends on NF_CONNTRACK_IPV4 + depends on NETFILTER_ADVANCED + select NF_CONNTRACK_MARK + help + The CLUSTERIP target allows you to build load-balancing clusters of + network servers without having a dedicated load-balancing + router/server/switch. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_TARGET_ECN + tristate "ECN target support" + depends on IP_NF_MANGLE + depends on NETFILTER_ADVANCED + ---help--- + This option adds a `ECN' target, which can be used in the iptables mangle + table. + + You can use this target to remove the ECN bits from the IPv4 header of + an IP packet. This is particularly useful, if you need to work around + existing ECN blackholes on the internet, but don't want to disable + ECN support in general. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_TARGET_TTL + tristate '"TTL" target support' + depends on NETFILTER_ADVANCED && IP_NF_MANGLE + select NETFILTER_XT_TARGET_HL + ---help--- + This is a backwards-compatible option for the user's convenience + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_TARGET_HL. + +# raw + specific targets +config IP_NF_RAW + tristate 'raw table support (required for NOTRACK/TRACE)' + help + This option adds a `raw' table to iptables. This table is the very + first in the netfilter framework and hooks in at the PREROUTING + and OUTPUT chains. + + If you want to compile it as a module, say M here and read + . If unsure, say `N'. + +# security table for MAC policy +config IP_NF_SECURITY + tristate "Security table" + depends on SECURITY + depends on NETFILTER_ADVANCED + help + This option adds a `security' table to iptables, for use + with Mandatory Access Control (MAC) policy. + + If unsure, say N. + +endif # IP_NF_IPTABLES + +# ARP tables +config IP_NF_ARPTABLES + tristate "ARP tables support" + select NETFILTER_XTABLES + depends on NETFILTER_ADVANCED + help + arptables is a general, extensible packet identification framework. + The ARP packet filtering and mangling (manipulation)subsystems + use this: say Y or M here if you want to use either of those. + + To compile it as a module, choose M here. If unsure, say N. + +if IP_NF_ARPTABLES + +config IP_NF_ARPFILTER + tristate "ARP packet filtering" + help + ARP packet filtering defines a table `filter', which has a series of + rules for simple ARP packet filtering at local input and + local output. On a bridge, you can also specify filtering rules + for forwarded ARP packets. See the man page for arptables(8). + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_ARP_MANGLE + tristate "ARP payload mangling" + help + Allows altering the ARP packet payload: source and destination + hardware and network addresses. + +endif # IP_NF_ARPTABLES + +endmenu + diff --git a/kernel/net/ipv4/netfilter/Makefile b/kernel/net/ipv4/netfilter/Makefile new file mode 100644 index 000000000..7fe6c7035 --- /dev/null +++ b/kernel/net/ipv4/netfilter/Makefile @@ -0,0 +1,72 @@ +# +# Makefile for the netfilter modules on top of IPv4. +# + +# objects for l3 independent conntrack +nf_conntrack_ipv4-y := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o +ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y) +ifeq ($(CONFIG_PROC_FS),y) +nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o +endif +endif + +# connection tracking +obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o + +nf_nat_ipv4-y := nf_nat_l3proto_ipv4.o nf_nat_proto_icmp.o +obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o + +# defrag +obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o + +# logging +obj-$(CONFIG_NF_LOG_ARP) += nf_log_arp.o +obj-$(CONFIG_NF_LOG_IPV4) += nf_log_ipv4.o + +# reject +obj-$(CONFIG_NF_REJECT_IPV4) += nf_reject_ipv4.o + +# NAT helpers (nf_conntrack) +obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o +obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o +obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o +obj-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o + +# NAT protocols (nf_nat) +obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o + +obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o +obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o +obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o +obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o +obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o +obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o +obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o + +# generic IP tables +obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o + +# the three instances of ip_tables +obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o +obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o +obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o +obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o +obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o + +# matches +obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o +obj-$(CONFIG_IP_NF_MATCH_RPFILTER) += ipt_rpfilter.o + +# targets +obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o +obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o +obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o +obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o +obj-$(CONFIG_IP_NF_TARGET_SYNPROXY) += ipt_SYNPROXY.o + +# generic ARP tables +obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o +obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o + +# just filtering instance of ARP tables for now +obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o diff --git a/kernel/net/ipv4/netfilter/arp_tables.c b/kernel/net/ipv4/netfilter/arp_tables.c new file mode 100644 index 000000000..a61200754 --- /dev/null +++ b/kernel/net/ipv4/netfilter/arp_tables.c @@ -0,0 +1,1926 @@ +/* + * Packet matching code for ARP packets. + * + * Based heavily, if not almost entirely, upon ip_tables.c framework. + * + * Some ARP specific bits are: + * + * Copyright (C) 2002 David S. Miller (davem@redhat.com) + * Copyright (C) 2006-2009 Patrick McHardy + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include "../../netfilter/xt_repldata.h" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("David S. Miller "); +MODULE_DESCRIPTION("arptables core"); + +/*#define DEBUG_ARP_TABLES*/ +/*#define DEBUG_ARP_TABLES_USER*/ + +#ifdef DEBUG_ARP_TABLES +#define dprintf(format, args...) printk(format , ## args) +#else +#define dprintf(format, args...) +#endif + +#ifdef DEBUG_ARP_TABLES_USER +#define duprintf(format, args...) printk(format , ## args) +#else +#define duprintf(format, args...) +#endif + +#ifdef CONFIG_NETFILTER_DEBUG +#define ARP_NF_ASSERT(x) WARN_ON(!(x)) +#else +#define ARP_NF_ASSERT(x) +#endif + +void *arpt_alloc_initial_table(const struct xt_table *info) +{ + return xt_alloc_initial_table(arpt, ARPT); +} +EXPORT_SYMBOL_GPL(arpt_alloc_initial_table); + +static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, + const char *hdr_addr, int len) +{ + int i, ret; + + if (len > ARPT_DEV_ADDR_LEN_MAX) + len = ARPT_DEV_ADDR_LEN_MAX; + + ret = 0; + for (i = 0; i < len; i++) + ret |= (hdr_addr[i] ^ ap->addr[i]) & ap->mask[i]; + + return ret != 0; +} + +/* + * Unfortunately, _b and _mask are not aligned to an int (or long int) + * Some arches dont care, unrolling the loop is a win on them. + * For other arches, we only have a 16bit alignement. + */ +static unsigned long ifname_compare(const char *_a, const char *_b, const char *_mask) +{ +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + unsigned long ret = ifname_compare_aligned(_a, _b, _mask); +#else + unsigned long ret = 0; + const u16 *a = (const u16 *)_a; + const u16 *b = (const u16 *)_b; + const u16 *mask = (const u16 *)_mask; + int i; + + for (i = 0; i < IFNAMSIZ/sizeof(u16); i++) + ret |= (a[i] ^ b[i]) & mask[i]; +#endif + return ret; +} + +/* Returns whether packet matches rule or not. */ +static inline int arp_packet_match(const struct arphdr *arphdr, + struct net_device *dev, + const char *indev, + const char *outdev, + const struct arpt_arp *arpinfo) +{ + const char *arpptr = (char *)(arphdr + 1); + const char *src_devaddr, *tgt_devaddr; + __be32 src_ipaddr, tgt_ipaddr; + long ret; + +#define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg))) + + if (FWINV((arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop, + ARPT_INV_ARPOP)) { + dprintf("ARP operation field mismatch.\n"); + dprintf("ar_op: %04x info->arpop: %04x info->arpop_mask: %04x\n", + arphdr->ar_op, arpinfo->arpop, arpinfo->arpop_mask); + return 0; + } + + if (FWINV((arphdr->ar_hrd & arpinfo->arhrd_mask) != arpinfo->arhrd, + ARPT_INV_ARPHRD)) { + dprintf("ARP hardware address format mismatch.\n"); + dprintf("ar_hrd: %04x info->arhrd: %04x info->arhrd_mask: %04x\n", + arphdr->ar_hrd, arpinfo->arhrd, arpinfo->arhrd_mask); + return 0; + } + + if (FWINV((arphdr->ar_pro & arpinfo->arpro_mask) != arpinfo->arpro, + ARPT_INV_ARPPRO)) { + dprintf("ARP protocol address format mismatch.\n"); + dprintf("ar_pro: %04x info->arpro: %04x info->arpro_mask: %04x\n", + arphdr->ar_pro, arpinfo->arpro, arpinfo->arpro_mask); + return 0; + } + + if (FWINV((arphdr->ar_hln & arpinfo->arhln_mask) != arpinfo->arhln, + ARPT_INV_ARPHLN)) { + dprintf("ARP hardware address length mismatch.\n"); + dprintf("ar_hln: %02x info->arhln: %02x info->arhln_mask: %02x\n", + arphdr->ar_hln, arpinfo->arhln, arpinfo->arhln_mask); + return 0; + } + + src_devaddr = arpptr; + arpptr += dev->addr_len; + memcpy(&src_ipaddr, arpptr, sizeof(u32)); + arpptr += sizeof(u32); + tgt_devaddr = arpptr; + arpptr += dev->addr_len; + memcpy(&tgt_ipaddr, arpptr, sizeof(u32)); + + if (FWINV(arp_devaddr_compare(&arpinfo->src_devaddr, src_devaddr, dev->addr_len), + ARPT_INV_SRCDEVADDR) || + FWINV(arp_devaddr_compare(&arpinfo->tgt_devaddr, tgt_devaddr, dev->addr_len), + ARPT_INV_TGTDEVADDR)) { + dprintf("Source or target device address mismatch.\n"); + + return 0; + } + + if (FWINV((src_ipaddr & arpinfo->smsk.s_addr) != arpinfo->src.s_addr, + ARPT_INV_SRCIP) || + FWINV(((tgt_ipaddr & arpinfo->tmsk.s_addr) != arpinfo->tgt.s_addr), + ARPT_INV_TGTIP)) { + dprintf("Source or target IP address mismatch.\n"); + + dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n", + &src_ipaddr, + &arpinfo->smsk.s_addr, + &arpinfo->src.s_addr, + arpinfo->invflags & ARPT_INV_SRCIP ? " (INV)" : ""); + dprintf("TGT: %pI4 Mask: %pI4 Target: %pI4.%s\n", + &tgt_ipaddr, + &arpinfo->tmsk.s_addr, + &arpinfo->tgt.s_addr, + arpinfo->invflags & ARPT_INV_TGTIP ? " (INV)" : ""); + return 0; + } + + /* Look for ifname matches. */ + ret = ifname_compare(indev, arpinfo->iniface, arpinfo->iniface_mask); + + if (FWINV(ret != 0, ARPT_INV_VIA_IN)) { + dprintf("VIA in mismatch (%s vs %s).%s\n", + indev, arpinfo->iniface, + arpinfo->invflags&ARPT_INV_VIA_IN ?" (INV)":""); + return 0; + } + + ret = ifname_compare(outdev, arpinfo->outiface, arpinfo->outiface_mask); + + if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) { + dprintf("VIA out mismatch (%s vs %s).%s\n", + outdev, arpinfo->outiface, + arpinfo->invflags&ARPT_INV_VIA_OUT ?" (INV)":""); + return 0; + } + + return 1; +#undef FWINV +} + +static inline int arp_checkentry(const struct arpt_arp *arp) +{ + if (arp->flags & ~ARPT_F_MASK) { + duprintf("Unknown flag bits set: %08X\n", + arp->flags & ~ARPT_F_MASK); + return 0; + } + if (arp->invflags & ~ARPT_INV_MASK) { + duprintf("Unknown invflag bits set: %08X\n", + arp->invflags & ~ARPT_INV_MASK); + return 0; + } + + return 1; +} + +static unsigned int +arpt_error(struct sk_buff *skb, const struct xt_action_param *par) +{ + net_err_ratelimited("arp_tables: error: '%s'\n", + (const char *)par->targinfo); + + return NF_DROP; +} + +static inline const struct xt_entry_target * +arpt_get_target_c(const struct arpt_entry *e) +{ + return arpt_get_target((struct arpt_entry *)e); +} + +static inline struct arpt_entry * +get_entry(const void *base, unsigned int offset) +{ + return (struct arpt_entry *)(base + offset); +} + +static inline __pure +struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry) +{ + return (void *)entry + entry->next_offset; +} + +unsigned int arpt_do_table(struct sk_buff *skb, + unsigned int hook, + const struct nf_hook_state *state, + struct xt_table *table) +{ + static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); + unsigned int verdict = NF_DROP; + const struct arphdr *arp; + struct arpt_entry *e, *back; + const char *indev, *outdev; + void *table_base; + const struct xt_table_info *private; + struct xt_action_param acpar; + unsigned int addend; + + if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) + return NF_DROP; + + indev = state->in ? state->in->name : nulldevname; + outdev = state->out ? state->out->name : nulldevname; + + local_bh_disable(); + addend = xt_write_recseq_begin(); + private = table->private; + /* + * Ensure we load private-> members after we've fetched the base + * pointer. + */ + smp_read_barrier_depends(); + table_base = private->entries[smp_processor_id()]; + + e = get_entry(table_base, private->hook_entry[hook]); + back = get_entry(table_base, private->underflow[hook]); + + acpar.in = state->in; + acpar.out = state->out; + acpar.hooknum = hook; + acpar.family = NFPROTO_ARP; + acpar.hotdrop = false; + + arp = arp_hdr(skb); + do { + const struct xt_entry_target *t; + + if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { + e = arpt_next_entry(e); + continue; + } + + ADD_COUNTER(e->counters, arp_hdr_len(skb->dev), 1); + + t = arpt_get_target_c(e); + + /* Standard target? */ + if (!t->u.kernel.target->target) { + int v; + + v = ((struct xt_standard_target *)t)->verdict; + if (v < 0) { + /* Pop from stack? */ + if (v != XT_RETURN) { + verdict = (unsigned int)(-v) - 1; + break; + } + e = back; + back = get_entry(table_base, back->comefrom); + continue; + } + if (table_base + v + != arpt_next_entry(e)) { + /* Save old back ptr in next entry */ + struct arpt_entry *next = arpt_next_entry(e); + next->comefrom = (void *)back - table_base; + + /* set back pointer to next entry */ + back = next; + } + + e = get_entry(table_base, v); + continue; + } + + /* Targets which reenter must return + * abs. verdicts + */ + acpar.target = t->u.kernel.target; + acpar.targinfo = t->data; + verdict = t->u.kernel.target->target(skb, &acpar); + + /* Target might have changed stuff. */ + arp = arp_hdr(skb); + + if (verdict == XT_CONTINUE) + e = arpt_next_entry(e); + else + /* Verdict */ + break; + } while (!acpar.hotdrop); + xt_write_recseq_end(addend); + local_bh_enable(); + + if (acpar.hotdrop) + return NF_DROP; + else + return verdict; +} + +/* All zeroes == unconditional rule. */ +static inline bool unconditional(const struct arpt_arp *arp) +{ + static const struct arpt_arp uncond; + + return memcmp(arp, &uncond, sizeof(uncond)) == 0; +} + +/* Figures out from what hook each rule can be called: returns 0 if + * there are loops. Puts hook bitmask in comefrom. + */ +static int mark_source_chains(const struct xt_table_info *newinfo, + unsigned int valid_hooks, void *entry0) +{ + unsigned int hook; + + /* No recursion; use packet counter to save back ptrs (reset + * to 0 as we leave), and comefrom to save source hook bitmask. + */ + for (hook = 0; hook < NF_ARP_NUMHOOKS; hook++) { + unsigned int pos = newinfo->hook_entry[hook]; + struct arpt_entry *e + = (struct arpt_entry *)(entry0 + pos); + + if (!(valid_hooks & (1 << hook))) + continue; + + /* Set initial back pointer. */ + e->counters.pcnt = pos; + + for (;;) { + const struct xt_standard_target *t + = (void *)arpt_get_target_c(e); + int visited = e->comefrom & (1 << hook); + + if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) { + pr_notice("arptables: loop hook %u pos %u %08X.\n", + hook, pos, e->comefrom); + return 0; + } + e->comefrom + |= ((1 << hook) | (1 << NF_ARP_NUMHOOKS)); + + /* Unconditional return/END. */ + if ((e->target_offset == sizeof(struct arpt_entry) && + (strcmp(t->target.u.user.name, + XT_STANDARD_TARGET) == 0) && + t->verdict < 0 && unconditional(&e->arp)) || + visited) { + unsigned int oldpos, size; + + if ((strcmp(t->target.u.user.name, + XT_STANDARD_TARGET) == 0) && + t->verdict < -NF_MAX_VERDICT - 1) { + duprintf("mark_source_chains: bad " + "negative verdict (%i)\n", + t->verdict); + return 0; + } + + /* Return: backtrack through the last + * big jump. + */ + do { + e->comefrom ^= (1<counters.pcnt; + e->counters.pcnt = 0; + + /* We're at the start. */ + if (pos == oldpos) + goto next; + + e = (struct arpt_entry *) + (entry0 + pos); + } while (oldpos == pos + e->next_offset); + + /* Move along one */ + size = e->next_offset; + e = (struct arpt_entry *) + (entry0 + pos + size); + e->counters.pcnt = pos; + pos += size; + } else { + int newpos = t->verdict; + + if (strcmp(t->target.u.user.name, + XT_STANDARD_TARGET) == 0 && + newpos >= 0) { + if (newpos > newinfo->size - + sizeof(struct arpt_entry)) { + duprintf("mark_source_chains: " + "bad verdict (%i)\n", + newpos); + return 0; + } + + /* This a jump; chase it. */ + duprintf("Jump rule %u -> %u\n", + pos, newpos); + } else { + /* ... this is a fallthru */ + newpos = pos + e->next_offset; + } + e = (struct arpt_entry *) + (entry0 + newpos); + e->counters.pcnt = pos; + pos = newpos; + } + } + next: + duprintf("Finished chain %u\n", hook); + } + return 1; +} + +static inline int check_entry(const struct arpt_entry *e, const char *name) +{ + const struct xt_entry_target *t; + + if (!arp_checkentry(&e->arp)) { + duprintf("arp_tables: arp check failed %p %s.\n", e, name); + return -EINVAL; + } + + if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset) + return -EINVAL; + + t = arpt_get_target_c(e); + if (e->target_offset + t->u.target_size > e->next_offset) + return -EINVAL; + + return 0; +} + +static inline int check_target(struct arpt_entry *e, const char *name) +{ + struct xt_entry_target *t = arpt_get_target(e); + int ret; + struct xt_tgchk_param par = { + .table = name, + .entryinfo = e, + .target = t->u.kernel.target, + .targinfo = t->data, + .hook_mask = e->comefrom, + .family = NFPROTO_ARP, + }; + + ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false); + if (ret < 0) { + duprintf("arp_tables: check failed for `%s'.\n", + t->u.kernel.target->name); + return ret; + } + return 0; +} + +static inline int +find_check_entry(struct arpt_entry *e, const char *name, unsigned int size) +{ + struct xt_entry_target *t; + struct xt_target *target; + int ret; + + ret = check_entry(e, name); + if (ret) + return ret; + + t = arpt_get_target(e); + target = xt_request_find_target(NFPROTO_ARP, t->u.user.name, + t->u.user.revision); + if (IS_ERR(target)) { + duprintf("find_check_entry: `%s' not found\n", t->u.user.name); + ret = PTR_ERR(target); + goto out; + } + t->u.kernel.target = target; + + ret = check_target(e, name); + if (ret) + goto err; + return 0; +err: + module_put(t->u.kernel.target->me); +out: + return ret; +} + +static bool check_underflow(const struct arpt_entry *e) +{ + const struct xt_entry_target *t; + unsigned int verdict; + + if (!unconditional(&e->arp)) + return false; + t = arpt_get_target_c(e); + if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) + return false; + verdict = ((struct xt_standard_target *)t)->verdict; + verdict = -verdict - 1; + return verdict == NF_DROP || verdict == NF_ACCEPT; +} + +static inline int check_entry_size_and_hooks(struct arpt_entry *e, + struct xt_table_info *newinfo, + const unsigned char *base, + const unsigned char *limit, + const unsigned int *hook_entries, + const unsigned int *underflows, + unsigned int valid_hooks) +{ + unsigned int h; + + if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 || + (unsigned char *)e + sizeof(struct arpt_entry) >= limit) { + duprintf("Bad offset %p\n", e); + return -EINVAL; + } + + if (e->next_offset + < sizeof(struct arpt_entry) + sizeof(struct xt_entry_target)) { + duprintf("checking: element %p size %u\n", + e, e->next_offset); + return -EINVAL; + } + + /* Check hooks & underflows */ + for (h = 0; h < NF_ARP_NUMHOOKS; h++) { + if (!(valid_hooks & (1 << h))) + continue; + if ((unsigned char *)e - base == hook_entries[h]) + newinfo->hook_entry[h] = hook_entries[h]; + if ((unsigned char *)e - base == underflows[h]) { + if (!check_underflow(e)) { + pr_err("Underflows must be unconditional and " + "use the STANDARD target with " + "ACCEPT/DROP\n"); + return -EINVAL; + } + newinfo->underflow[h] = underflows[h]; + } + } + + /* Clear counters and comefrom */ + e->counters = ((struct xt_counters) { 0, 0 }); + e->comefrom = 0; + return 0; +} + +static inline void cleanup_entry(struct arpt_entry *e) +{ + struct xt_tgdtor_param par; + struct xt_entry_target *t; + + t = arpt_get_target(e); + par.target = t->u.kernel.target; + par.targinfo = t->data; + par.family = NFPROTO_ARP; + if (par.target->destroy != NULL) + par.target->destroy(&par); + module_put(par.target->me); +} + +/* Checks and translates the user-supplied table segment (held in + * newinfo). + */ +static int translate_table(struct xt_table_info *newinfo, void *entry0, + const struct arpt_replace *repl) +{ + struct arpt_entry *iter; + unsigned int i; + int ret = 0; + + newinfo->size = repl->size; + newinfo->number = repl->num_entries; + + /* Init all hooks to impossible value. */ + for (i = 0; i < NF_ARP_NUMHOOKS; i++) { + newinfo->hook_entry[i] = 0xFFFFFFFF; + newinfo->underflow[i] = 0xFFFFFFFF; + } + + duprintf("translate_table: size %u\n", newinfo->size); + i = 0; + + /* Walk through entries, checking offsets. */ + xt_entry_foreach(iter, entry0, newinfo->size) { + ret = check_entry_size_and_hooks(iter, newinfo, entry0, + entry0 + repl->size, + repl->hook_entry, + repl->underflow, + repl->valid_hooks); + if (ret != 0) + break; + ++i; + if (strcmp(arpt_get_target(iter)->u.user.name, + XT_ERROR_TARGET) == 0) + ++newinfo->stacksize; + } + duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret); + if (ret != 0) + return ret; + + if (i != repl->num_entries) { + duprintf("translate_table: %u not %u entries\n", + i, repl->num_entries); + return -EINVAL; + } + + /* Check hooks all assigned */ + for (i = 0; i < NF_ARP_NUMHOOKS; i++) { + /* Only hooks which are valid */ + if (!(repl->valid_hooks & (1 << i))) + continue; + if (newinfo->hook_entry[i] == 0xFFFFFFFF) { + duprintf("Invalid hook entry %u %u\n", + i, repl->hook_entry[i]); + return -EINVAL; + } + if (newinfo->underflow[i] == 0xFFFFFFFF) { + duprintf("Invalid underflow %u %u\n", + i, repl->underflow[i]); + return -EINVAL; + } + } + + if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) { + duprintf("Looping hook\n"); + return -ELOOP; + } + + /* Finally, each sanity check must pass */ + i = 0; + xt_entry_foreach(iter, entry0, newinfo->size) { + ret = find_check_entry(iter, repl->name, repl->size); + if (ret != 0) + break; + ++i; + } + + if (ret != 0) { + xt_entry_foreach(iter, entry0, newinfo->size) { + if (i-- == 0) + break; + cleanup_entry(iter); + } + return ret; + } + + /* And one copy for every other CPU */ + for_each_possible_cpu(i) { + if (newinfo->entries[i] && newinfo->entries[i] != entry0) + memcpy(newinfo->entries[i], entry0, newinfo->size); + } + + return ret; +} + +static void get_counters(const struct xt_table_info *t, + struct xt_counters counters[]) +{ + struct arpt_entry *iter; + unsigned int cpu; + unsigned int i; + + for_each_possible_cpu(cpu) { + seqcount_t *s = &per_cpu(xt_recseq, cpu); + + i = 0; + xt_entry_foreach(iter, t->entries[cpu], t->size) { + u64 bcnt, pcnt; + unsigned int start; + + do { + start = read_seqcount_begin(s); + bcnt = iter->counters.bcnt; + pcnt = iter->counters.pcnt; + } while (read_seqcount_retry(s, start)); + + ADD_COUNTER(counters[i], bcnt, pcnt); + ++i; + } + } +} + +static struct xt_counters *alloc_counters(const struct xt_table *table) +{ + unsigned int countersize; + struct xt_counters *counters; + const struct xt_table_info *private = table->private; + + /* We need atomic snapshot of counters: rest doesn't change + * (other than comefrom, which userspace doesn't care + * about). + */ + countersize = sizeof(struct xt_counters) * private->number; + counters = vzalloc(countersize); + + if (counters == NULL) + return ERR_PTR(-ENOMEM); + + get_counters(private, counters); + + return counters; +} + +static int copy_entries_to_user(unsigned int total_size, + const struct xt_table *table, + void __user *userptr) +{ + unsigned int off, num; + const struct arpt_entry *e; + struct xt_counters *counters; + struct xt_table_info *private = table->private; + int ret = 0; + void *loc_cpu_entry; + + counters = alloc_counters(table); + if (IS_ERR(counters)) + return PTR_ERR(counters); + + loc_cpu_entry = private->entries[raw_smp_processor_id()]; + /* ... then copy entire thing ... */ + if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { + ret = -EFAULT; + goto free_counters; + } + + /* FIXME: use iterator macros --RR */ + /* ... then go back and fix counters and names */ + for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ + const struct xt_entry_target *t; + + e = (struct arpt_entry *)(loc_cpu_entry + off); + if (copy_to_user(userptr + off + + offsetof(struct arpt_entry, counters), + &counters[num], + sizeof(counters[num])) != 0) { + ret = -EFAULT; + goto free_counters; + } + + t = arpt_get_target_c(e); + if (copy_to_user(userptr + off + e->target_offset + + offsetof(struct xt_entry_target, + u.user.name), + t->u.kernel.target->name, + strlen(t->u.kernel.target->name)+1) != 0) { + ret = -EFAULT; + goto free_counters; + } + } + + free_counters: + vfree(counters); + return ret; +} + +#ifdef CONFIG_COMPAT +static void compat_standard_from_user(void *dst, const void *src) +{ + int v = *(compat_int_t *)src; + + if (v > 0) + v += xt_compat_calc_jump(NFPROTO_ARP, v); + memcpy(dst, &v, sizeof(v)); +} + +static int compat_standard_to_user(void __user *dst, const void *src) +{ + compat_int_t cv = *(int *)src; + + if (cv > 0) + cv -= xt_compat_calc_jump(NFPROTO_ARP, cv); + return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; +} + +static int compat_calc_entry(const struct arpt_entry *e, + const struct xt_table_info *info, + const void *base, struct xt_table_info *newinfo) +{ + const struct xt_entry_target *t; + unsigned int entry_offset; + int off, i, ret; + + off = sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry); + entry_offset = (void *)e - base; + + t = arpt_get_target_c(e); + off += xt_compat_target_offset(t->u.kernel.target); + newinfo->size -= off; + ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off); + if (ret) + return ret; + + for (i = 0; i < NF_ARP_NUMHOOKS; i++) { + if (info->hook_entry[i] && + (e < (struct arpt_entry *)(base + info->hook_entry[i]))) + newinfo->hook_entry[i] -= off; + if (info->underflow[i] && + (e < (struct arpt_entry *)(base + info->underflow[i]))) + newinfo->underflow[i] -= off; + } + return 0; +} + +static int compat_table_info(const struct xt_table_info *info, + struct xt_table_info *newinfo) +{ + struct arpt_entry *iter; + void *loc_cpu_entry; + int ret; + + if (!newinfo || !info) + return -EINVAL; + + /* we dont care about newinfo->entries[] */ + memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); + newinfo->initial_entries = 0; + loc_cpu_entry = info->entries[raw_smp_processor_id()]; + xt_compat_init_offsets(NFPROTO_ARP, info->number); + xt_entry_foreach(iter, loc_cpu_entry, info->size) { + ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); + if (ret != 0) + return ret; + } + return 0; +} +#endif + +static int get_info(struct net *net, void __user *user, + const int *len, int compat) +{ + char name[XT_TABLE_MAXNAMELEN]; + struct xt_table *t; + int ret; + + if (*len != sizeof(struct arpt_getinfo)) { + duprintf("length %u != %Zu\n", *len, + sizeof(struct arpt_getinfo)); + return -EINVAL; + } + + if (copy_from_user(name, user, sizeof(name)) != 0) + return -EFAULT; + + name[XT_TABLE_MAXNAMELEN-1] = '\0'; +#ifdef CONFIG_COMPAT + if (compat) + xt_compat_lock(NFPROTO_ARP); +#endif + t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name), + "arptable_%s", name); + if (!IS_ERR_OR_NULL(t)) { + struct arpt_getinfo info; + const struct xt_table_info *private = t->private; +#ifdef CONFIG_COMPAT + struct xt_table_info tmp; + + if (compat) { + ret = compat_table_info(private, &tmp); + xt_compat_flush_offsets(NFPROTO_ARP); + private = &tmp; + } +#endif + memset(&info, 0, sizeof(info)); + info.valid_hooks = t->valid_hooks; + memcpy(info.hook_entry, private->hook_entry, + sizeof(info.hook_entry)); + memcpy(info.underflow, private->underflow, + sizeof(info.underflow)); + info.num_entries = private->number; + info.size = private->size; + strcpy(info.name, name); + + if (copy_to_user(user, &info, *len) != 0) + ret = -EFAULT; + else + ret = 0; + xt_table_unlock(t); + module_put(t->me); + } else + ret = t ? PTR_ERR(t) : -ENOENT; +#ifdef CONFIG_COMPAT + if (compat) + xt_compat_unlock(NFPROTO_ARP); +#endif + return ret; +} + +static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, + const int *len) +{ + int ret; + struct arpt_get_entries get; + struct xt_table *t; + + if (*len < sizeof(get)) { + duprintf("get_entries: %u < %Zu\n", *len, sizeof(get)); + return -EINVAL; + } + if (copy_from_user(&get, uptr, sizeof(get)) != 0) + return -EFAULT; + if (*len != sizeof(struct arpt_get_entries) + get.size) { + duprintf("get_entries: %u != %Zu\n", *len, + sizeof(struct arpt_get_entries) + get.size); + return -EINVAL; + } + + t = xt_find_table_lock(net, NFPROTO_ARP, get.name); + if (!IS_ERR_OR_NULL(t)) { + const struct xt_table_info *private = t->private; + + duprintf("t->private->number = %u\n", + private->number); + if (get.size == private->size) + ret = copy_entries_to_user(private->size, + t, uptr->entrytable); + else { + duprintf("get_entries: I've got %u not %u!\n", + private->size, get.size); + ret = -EAGAIN; + } + module_put(t->me); + xt_table_unlock(t); + } else + ret = t ? PTR_ERR(t) : -ENOENT; + + return ret; +} + +static int __do_replace(struct net *net, const char *name, + unsigned int valid_hooks, + struct xt_table_info *newinfo, + unsigned int num_counters, + void __user *counters_ptr) +{ + int ret; + struct xt_table *t; + struct xt_table_info *oldinfo; + struct xt_counters *counters; + void *loc_cpu_old_entry; + struct arpt_entry *iter; + + ret = 0; + counters = vzalloc(num_counters * sizeof(struct xt_counters)); + if (!counters) { + ret = -ENOMEM; + goto out; + } + + t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name), + "arptable_%s", name); + if (IS_ERR_OR_NULL(t)) { + ret = t ? PTR_ERR(t) : -ENOENT; + goto free_newinfo_counters_untrans; + } + + /* You lied! */ + if (valid_hooks != t->valid_hooks) { + duprintf("Valid hook crap: %08X vs %08X\n", + valid_hooks, t->valid_hooks); + ret = -EINVAL; + goto put_module; + } + + oldinfo = xt_replace_table(t, num_counters, newinfo, &ret); + if (!oldinfo) + goto put_module; + + /* Update module usage count based on number of rules */ + duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n", + oldinfo->number, oldinfo->initial_entries, newinfo->number); + if ((oldinfo->number > oldinfo->initial_entries) || + (newinfo->number <= oldinfo->initial_entries)) + module_put(t->me); + if ((oldinfo->number > oldinfo->initial_entries) && + (newinfo->number <= oldinfo->initial_entries)) + module_put(t->me); + + /* Get the old counters, and synchronize with replace */ + get_counters(oldinfo, counters); + + /* Decrease module usage counts and free resource */ + loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; + xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size) + cleanup_entry(iter); + + xt_free_table_info(oldinfo); + if (copy_to_user(counters_ptr, counters, + sizeof(struct xt_counters) * num_counters) != 0) { + /* Silent error, can't fail, new table is already in place */ + net_warn_ratelimited("arptables: counters copy to user failed while replacing table\n"); + } + vfree(counters); + xt_table_unlock(t); + return ret; + + put_module: + module_put(t->me); + xt_table_unlock(t); + free_newinfo_counters_untrans: + vfree(counters); + out: + return ret; +} + +static int do_replace(struct net *net, const void __user *user, + unsigned int len) +{ + int ret; + struct arpt_replace tmp; + struct xt_table_info *newinfo; + void *loc_cpu_entry; + struct arpt_entry *iter; + + if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) + return -EFAULT; + + /* overflow check */ + if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) + return -ENOMEM; + if (tmp.num_counters == 0) + return -EINVAL; + + tmp.name[sizeof(tmp.name)-1] = 0; + + newinfo = xt_alloc_table_info(tmp.size); + if (!newinfo) + return -ENOMEM; + + /* choose the copy that is on our node/cpu */ + loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), + tmp.size) != 0) { + ret = -EFAULT; + goto free_newinfo; + } + + ret = translate_table(newinfo, loc_cpu_entry, &tmp); + if (ret != 0) + goto free_newinfo; + + duprintf("arp_tables: Translated table\n"); + + ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, + tmp.num_counters, tmp.counters); + if (ret) + goto free_newinfo_untrans; + return 0; + + free_newinfo_untrans: + xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) + cleanup_entry(iter); + free_newinfo: + xt_free_table_info(newinfo); + return ret; +} + +static int do_add_counters(struct net *net, const void __user *user, + unsigned int len, int compat) +{ + unsigned int i, curcpu; + struct xt_counters_info tmp; + struct xt_counters *paddc; + unsigned int num_counters; + const char *name; + int size; + void *ptmp; + struct xt_table *t; + const struct xt_table_info *private; + int ret = 0; + void *loc_cpu_entry; + struct arpt_entry *iter; + unsigned int addend; +#ifdef CONFIG_COMPAT + struct compat_xt_counters_info compat_tmp; + + if (compat) { + ptmp = &compat_tmp; + size = sizeof(struct compat_xt_counters_info); + } else +#endif + { + ptmp = &tmp; + size = sizeof(struct xt_counters_info); + } + + if (copy_from_user(ptmp, user, size) != 0) + return -EFAULT; + +#ifdef CONFIG_COMPAT + if (compat) { + num_counters = compat_tmp.num_counters; + name = compat_tmp.name; + } else +#endif + { + num_counters = tmp.num_counters; + name = tmp.name; + } + + if (len != size + num_counters * sizeof(struct xt_counters)) + return -EINVAL; + + paddc = vmalloc(len - size); + if (!paddc) + return -ENOMEM; + + if (copy_from_user(paddc, user + size, len - size) != 0) { + ret = -EFAULT; + goto free; + } + + t = xt_find_table_lock(net, NFPROTO_ARP, name); + if (IS_ERR_OR_NULL(t)) { + ret = t ? PTR_ERR(t) : -ENOENT; + goto free; + } + + local_bh_disable(); + private = t->private; + if (private->number != num_counters) { + ret = -EINVAL; + goto unlock_up_free; + } + + i = 0; + /* Choose the copy that is on our node */ + curcpu = smp_processor_id(); + loc_cpu_entry = private->entries[curcpu]; + addend = xt_write_recseq_begin(); + xt_entry_foreach(iter, loc_cpu_entry, private->size) { + ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); + ++i; + } + xt_write_recseq_end(addend); + unlock_up_free: + local_bh_enable(); + xt_table_unlock(t); + module_put(t->me); + free: + vfree(paddc); + + return ret; +} + +#ifdef CONFIG_COMPAT +static inline void compat_release_entry(struct compat_arpt_entry *e) +{ + struct xt_entry_target *t; + + t = compat_arpt_get_target(e); + module_put(t->u.kernel.target->me); +} + +static inline int +check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, + struct xt_table_info *newinfo, + unsigned int *size, + const unsigned char *base, + const unsigned char *limit, + const unsigned int *hook_entries, + const unsigned int *underflows, + const char *name) +{ + struct xt_entry_target *t; + struct xt_target *target; + unsigned int entry_offset; + int ret, off, h; + + duprintf("check_compat_entry_size_and_hooks %p\n", e); + if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 || + (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit) { + duprintf("Bad offset %p, limit = %p\n", e, limit); + return -EINVAL; + } + + if (e->next_offset < sizeof(struct compat_arpt_entry) + + sizeof(struct compat_xt_entry_target)) { + duprintf("checking: element %p size %u\n", + e, e->next_offset); + return -EINVAL; + } + + /* For purposes of check_entry casting the compat entry is fine */ + ret = check_entry((struct arpt_entry *)e, name); + if (ret) + return ret; + + off = sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry); + entry_offset = (void *)e - (void *)base; + + t = compat_arpt_get_target(e); + target = xt_request_find_target(NFPROTO_ARP, t->u.user.name, + t->u.user.revision); + if (IS_ERR(target)) { + duprintf("check_compat_entry_size_and_hooks: `%s' not found\n", + t->u.user.name); + ret = PTR_ERR(target); + goto out; + } + t->u.kernel.target = target; + + off += xt_compat_target_offset(target); + *size += off; + ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off); + if (ret) + goto release_target; + + /* Check hooks & underflows */ + for (h = 0; h < NF_ARP_NUMHOOKS; h++) { + if ((unsigned char *)e - base == hook_entries[h]) + newinfo->hook_entry[h] = hook_entries[h]; + if ((unsigned char *)e - base == underflows[h]) + newinfo->underflow[h] = underflows[h]; + } + + /* Clear counters and comefrom */ + memset(&e->counters, 0, sizeof(e->counters)); + e->comefrom = 0; + return 0; + +release_target: + module_put(t->u.kernel.target->me); +out: + return ret; +} + +static int +compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, + unsigned int *size, const char *name, + struct xt_table_info *newinfo, unsigned char *base) +{ + struct xt_entry_target *t; + struct xt_target *target; + struct arpt_entry *de; + unsigned int origsize; + int ret, h; + + ret = 0; + origsize = *size; + de = (struct arpt_entry *)*dstptr; + memcpy(de, e, sizeof(struct arpt_entry)); + memcpy(&de->counters, &e->counters, sizeof(e->counters)); + + *dstptr += sizeof(struct arpt_entry); + *size += sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry); + + de->target_offset = e->target_offset - (origsize - *size); + t = compat_arpt_get_target(e); + target = t->u.kernel.target; + xt_compat_target_from_user(t, dstptr, size); + + de->next_offset = e->next_offset - (origsize - *size); + for (h = 0; h < NF_ARP_NUMHOOKS; h++) { + if ((unsigned char *)de - base < newinfo->hook_entry[h]) + newinfo->hook_entry[h] -= origsize - *size; + if ((unsigned char *)de - base < newinfo->underflow[h]) + newinfo->underflow[h] -= origsize - *size; + } + return ret; +} + +static int translate_compat_table(const char *name, + unsigned int valid_hooks, + struct xt_table_info **pinfo, + void **pentry0, + unsigned int total_size, + unsigned int number, + unsigned int *hook_entries, + unsigned int *underflows) +{ + unsigned int i, j; + struct xt_table_info *newinfo, *info; + void *pos, *entry0, *entry1; + struct compat_arpt_entry *iter0; + struct arpt_entry *iter1; + unsigned int size; + int ret = 0; + + info = *pinfo; + entry0 = *pentry0; + size = total_size; + info->number = number; + + /* Init all hooks to impossible value. */ + for (i = 0; i < NF_ARP_NUMHOOKS; i++) { + info->hook_entry[i] = 0xFFFFFFFF; + info->underflow[i] = 0xFFFFFFFF; + } + + duprintf("translate_compat_table: size %u\n", info->size); + j = 0; + xt_compat_lock(NFPROTO_ARP); + xt_compat_init_offsets(NFPROTO_ARP, number); + /* Walk through entries, checking offsets. */ + xt_entry_foreach(iter0, entry0, total_size) { + ret = check_compat_entry_size_and_hooks(iter0, info, &size, + entry0, + entry0 + total_size, + hook_entries, + underflows, + name); + if (ret != 0) + goto out_unlock; + ++j; + } + + ret = -EINVAL; + if (j != number) { + duprintf("translate_compat_table: %u not %u entries\n", + j, number); + goto out_unlock; + } + + /* Check hooks all assigned */ + for (i = 0; i < NF_ARP_NUMHOOKS; i++) { + /* Only hooks which are valid */ + if (!(valid_hooks & (1 << i))) + continue; + if (info->hook_entry[i] == 0xFFFFFFFF) { + duprintf("Invalid hook entry %u %u\n", + i, hook_entries[i]); + goto out_unlock; + } + if (info->underflow[i] == 0xFFFFFFFF) { + duprintf("Invalid underflow %u %u\n", + i, underflows[i]); + goto out_unlock; + } + } + + ret = -ENOMEM; + newinfo = xt_alloc_table_info(size); + if (!newinfo) + goto out_unlock; + + newinfo->number = number; + for (i = 0; i < NF_ARP_NUMHOOKS; i++) { + newinfo->hook_entry[i] = info->hook_entry[i]; + newinfo->underflow[i] = info->underflow[i]; + } + entry1 = newinfo->entries[raw_smp_processor_id()]; + pos = entry1; + size = total_size; + xt_entry_foreach(iter0, entry0, total_size) { + ret = compat_copy_entry_from_user(iter0, &pos, &size, + name, newinfo, entry1); + if (ret != 0) + break; + } + xt_compat_flush_offsets(NFPROTO_ARP); + xt_compat_unlock(NFPROTO_ARP); + if (ret) + goto free_newinfo; + + ret = -ELOOP; + if (!mark_source_chains(newinfo, valid_hooks, entry1)) + goto free_newinfo; + + i = 0; + xt_entry_foreach(iter1, entry1, newinfo->size) { + ret = check_target(iter1, name); + if (ret != 0) + break; + ++i; + if (strcmp(arpt_get_target(iter1)->u.user.name, + XT_ERROR_TARGET) == 0) + ++newinfo->stacksize; + } + if (ret) { + /* + * The first i matches need cleanup_entry (calls ->destroy) + * because they had called ->check already. The other j-i + * entries need only release. + */ + int skip = i; + j -= i; + xt_entry_foreach(iter0, entry0, newinfo->size) { + if (skip-- > 0) + continue; + if (j-- == 0) + break; + compat_release_entry(iter0); + } + xt_entry_foreach(iter1, entry1, newinfo->size) { + if (i-- == 0) + break; + cleanup_entry(iter1); + } + xt_free_table_info(newinfo); + return ret; + } + + /* And one copy for every other CPU */ + for_each_possible_cpu(i) + if (newinfo->entries[i] && newinfo->entries[i] != entry1) + memcpy(newinfo->entries[i], entry1, newinfo->size); + + *pinfo = newinfo; + *pentry0 = entry1; + xt_free_table_info(info); + return 0; + +free_newinfo: + xt_free_table_info(newinfo); +out: + xt_entry_foreach(iter0, entry0, total_size) { + if (j-- == 0) + break; + compat_release_entry(iter0); + } + return ret; +out_unlock: + xt_compat_flush_offsets(NFPROTO_ARP); + xt_compat_unlock(NFPROTO_ARP); + goto out; +} + +struct compat_arpt_replace { + char name[XT_TABLE_MAXNAMELEN]; + u32 valid_hooks; + u32 num_entries; + u32 size; + u32 hook_entry[NF_ARP_NUMHOOKS]; + u32 underflow[NF_ARP_NUMHOOKS]; + u32 num_counters; + compat_uptr_t counters; + struct compat_arpt_entry entries[0]; +}; + +static int compat_do_replace(struct net *net, void __user *user, + unsigned int len) +{ + int ret; + struct compat_arpt_replace tmp; + struct xt_table_info *newinfo; + void *loc_cpu_entry; + struct arpt_entry *iter; + + if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) + return -EFAULT; + + /* overflow check */ + if (tmp.size >= INT_MAX / num_possible_cpus()) + return -ENOMEM; + if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) + return -ENOMEM; + if (tmp.num_counters == 0) + return -EINVAL; + + tmp.name[sizeof(tmp.name)-1] = 0; + + newinfo = xt_alloc_table_info(tmp.size); + if (!newinfo) + return -ENOMEM; + + /* choose the copy that is on our node/cpu */ + loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) { + ret = -EFAULT; + goto free_newinfo; + } + + ret = translate_compat_table(tmp.name, tmp.valid_hooks, + &newinfo, &loc_cpu_entry, tmp.size, + tmp.num_entries, tmp.hook_entry, + tmp.underflow); + if (ret != 0) + goto free_newinfo; + + duprintf("compat_do_replace: Translated table\n"); + + ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, + tmp.num_counters, compat_ptr(tmp.counters)); + if (ret) + goto free_newinfo_untrans; + return 0; + + free_newinfo_untrans: + xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) + cleanup_entry(iter); + free_newinfo: + xt_free_table_info(newinfo); + return ret; +} + +static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, + unsigned int len) +{ + int ret; + + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + switch (cmd) { + case ARPT_SO_SET_REPLACE: + ret = compat_do_replace(sock_net(sk), user, len); + break; + + case ARPT_SO_SET_ADD_COUNTERS: + ret = do_add_counters(sock_net(sk), user, len, 1); + break; + + default: + duprintf("do_arpt_set_ctl: unknown request %i\n", cmd); + ret = -EINVAL; + } + + return ret; +} + +static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr, + compat_uint_t *size, + struct xt_counters *counters, + unsigned int i) +{ + struct xt_entry_target *t; + struct compat_arpt_entry __user *ce; + u_int16_t target_offset, next_offset; + compat_uint_t origsize; + int ret; + + origsize = *size; + ce = (struct compat_arpt_entry __user *)*dstptr; + if (copy_to_user(ce, e, sizeof(struct arpt_entry)) != 0 || + copy_to_user(&ce->counters, &counters[i], + sizeof(counters[i])) != 0) + return -EFAULT; + + *dstptr += sizeof(struct compat_arpt_entry); + *size -= sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry); + + target_offset = e->target_offset - (origsize - *size); + + t = arpt_get_target(e); + ret = xt_compat_target_to_user(t, dstptr, size); + if (ret) + return ret; + next_offset = e->next_offset - (origsize - *size); + if (put_user(target_offset, &ce->target_offset) != 0 || + put_user(next_offset, &ce->next_offset) != 0) + return -EFAULT; + return 0; +} + +static int compat_copy_entries_to_user(unsigned int total_size, + struct xt_table *table, + void __user *userptr) +{ + struct xt_counters *counters; + const struct xt_table_info *private = table->private; + void __user *pos; + unsigned int size; + int ret = 0; + void *loc_cpu_entry; + unsigned int i = 0; + struct arpt_entry *iter; + + counters = alloc_counters(table); + if (IS_ERR(counters)) + return PTR_ERR(counters); + + /* choose the copy on our node/cpu */ + loc_cpu_entry = private->entries[raw_smp_processor_id()]; + pos = userptr; + size = total_size; + xt_entry_foreach(iter, loc_cpu_entry, total_size) { + ret = compat_copy_entry_to_user(iter, &pos, + &size, counters, i++); + if (ret != 0) + break; + } + vfree(counters); + return ret; +} + +struct compat_arpt_get_entries { + char name[XT_TABLE_MAXNAMELEN]; + compat_uint_t size; + struct compat_arpt_entry entrytable[0]; +}; + +static int compat_get_entries(struct net *net, + struct compat_arpt_get_entries __user *uptr, + int *len) +{ + int ret; + struct compat_arpt_get_entries get; + struct xt_table *t; + + if (*len < sizeof(get)) { + duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get)); + return -EINVAL; + } + if (copy_from_user(&get, uptr, sizeof(get)) != 0) + return -EFAULT; + if (*len != sizeof(struct compat_arpt_get_entries) + get.size) { + duprintf("compat_get_entries: %u != %zu\n", + *len, sizeof(get) + get.size); + return -EINVAL; + } + + xt_compat_lock(NFPROTO_ARP); + t = xt_find_table_lock(net, NFPROTO_ARP, get.name); + if (!IS_ERR_OR_NULL(t)) { + const struct xt_table_info *private = t->private; + struct xt_table_info info; + + duprintf("t->private->number = %u\n", private->number); + ret = compat_table_info(private, &info); + if (!ret && get.size == info.size) { + ret = compat_copy_entries_to_user(private->size, + t, uptr->entrytable); + } else if (!ret) { + duprintf("compat_get_entries: I've got %u not %u!\n", + private->size, get.size); + ret = -EAGAIN; + } + xt_compat_flush_offsets(NFPROTO_ARP); + module_put(t->me); + xt_table_unlock(t); + } else + ret = t ? PTR_ERR(t) : -ENOENT; + + xt_compat_unlock(NFPROTO_ARP); + return ret; +} + +static int do_arpt_get_ctl(struct sock *, int, void __user *, int *); + +static int compat_do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, + int *len) +{ + int ret; + + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + switch (cmd) { + case ARPT_SO_GET_INFO: + ret = get_info(sock_net(sk), user, len, 1); + break; + case ARPT_SO_GET_ENTRIES: + ret = compat_get_entries(sock_net(sk), user, len); + break; + default: + ret = do_arpt_get_ctl(sk, cmd, user, len); + } + return ret; +} +#endif + +static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) +{ + int ret; + + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + switch (cmd) { + case ARPT_SO_SET_REPLACE: + ret = do_replace(sock_net(sk), user, len); + break; + + case ARPT_SO_SET_ADD_COUNTERS: + ret = do_add_counters(sock_net(sk), user, len, 0); + break; + + default: + duprintf("do_arpt_set_ctl: unknown request %i\n", cmd); + ret = -EINVAL; + } + + return ret; +} + +static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) +{ + int ret; + + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + switch (cmd) { + case ARPT_SO_GET_INFO: + ret = get_info(sock_net(sk), user, len, 0); + break; + + case ARPT_SO_GET_ENTRIES: + ret = get_entries(sock_net(sk), user, len); + break; + + case ARPT_SO_GET_REVISION_TARGET: { + struct xt_get_revision rev; + + if (*len != sizeof(rev)) { + ret = -EINVAL; + break; + } + if (copy_from_user(&rev, user, sizeof(rev)) != 0) { + ret = -EFAULT; + break; + } + rev.name[sizeof(rev.name)-1] = 0; + + try_then_request_module(xt_find_revision(NFPROTO_ARP, rev.name, + rev.revision, 1, &ret), + "arpt_%s", rev.name); + break; + } + + default: + duprintf("do_arpt_get_ctl: unknown request %i\n", cmd); + ret = -EINVAL; + } + + return ret; +} + +struct xt_table *arpt_register_table(struct net *net, + const struct xt_table *table, + const struct arpt_replace *repl) +{ + int ret; + struct xt_table_info *newinfo; + struct xt_table_info bootstrap = {0}; + void *loc_cpu_entry; + struct xt_table *new_table; + + newinfo = xt_alloc_table_info(repl->size); + if (!newinfo) { + ret = -ENOMEM; + goto out; + } + + /* choose the copy on our node/cpu */ + loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + memcpy(loc_cpu_entry, repl->entries, repl->size); + + ret = translate_table(newinfo, loc_cpu_entry, repl); + duprintf("arpt_register_table: translate table gives %d\n", ret); + if (ret != 0) + goto out_free; + + new_table = xt_register_table(net, table, &bootstrap, newinfo); + if (IS_ERR(new_table)) { + ret = PTR_ERR(new_table); + goto out_free; + } + return new_table; + +out_free: + xt_free_table_info(newinfo); +out: + return ERR_PTR(ret); +} + +void arpt_unregister_table(struct xt_table *table) +{ + struct xt_table_info *private; + void *loc_cpu_entry; + struct module *table_owner = table->me; + struct arpt_entry *iter; + + private = xt_unregister_table(table); + + /* Decrease module usage counts and free resources */ + loc_cpu_entry = private->entries[raw_smp_processor_id()]; + xt_entry_foreach(iter, loc_cpu_entry, private->size) + cleanup_entry(iter); + if (private->number > private->initial_entries) + module_put(table_owner); + xt_free_table_info(private); +} + +/* The built-in targets: standard (NULL) and error. */ +static struct xt_target arpt_builtin_tg[] __read_mostly = { + { + .name = XT_STANDARD_TARGET, + .targetsize = sizeof(int), + .family = NFPROTO_ARP, +#ifdef CONFIG_COMPAT + .compatsize = sizeof(compat_int_t), + .compat_from_user = compat_standard_from_user, + .compat_to_user = compat_standard_to_user, +#endif + }, + { + .name = XT_ERROR_TARGET, + .target = arpt_error, + .targetsize = XT_FUNCTION_MAXNAMELEN, + .family = NFPROTO_ARP, + }, +}; + +static struct nf_sockopt_ops arpt_sockopts = { + .pf = PF_INET, + .set_optmin = ARPT_BASE_CTL, + .set_optmax = ARPT_SO_SET_MAX+1, + .set = do_arpt_set_ctl, +#ifdef CONFIG_COMPAT + .compat_set = compat_do_arpt_set_ctl, +#endif + .get_optmin = ARPT_BASE_CTL, + .get_optmax = ARPT_SO_GET_MAX+1, + .get = do_arpt_get_ctl, +#ifdef CONFIG_COMPAT + .compat_get = compat_do_arpt_get_ctl, +#endif + .owner = THIS_MODULE, +}; + +static int __net_init arp_tables_net_init(struct net *net) +{ + return xt_proto_init(net, NFPROTO_ARP); +} + +static void __net_exit arp_tables_net_exit(struct net *net) +{ + xt_proto_fini(net, NFPROTO_ARP); +} + +static struct pernet_operations arp_tables_net_ops = { + .init = arp_tables_net_init, + .exit = arp_tables_net_exit, +}; + +static int __init arp_tables_init(void) +{ + int ret; + + ret = register_pernet_subsys(&arp_tables_net_ops); + if (ret < 0) + goto err1; + + /* No one else will be downing sem now, so we won't sleep */ + ret = xt_register_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg)); + if (ret < 0) + goto err2; + + /* Register setsockopt */ + ret = nf_register_sockopt(&arpt_sockopts); + if (ret < 0) + goto err4; + + printk(KERN_INFO "arp_tables: (C) 2002 David S. Miller\n"); + return 0; + +err4: + xt_unregister_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg)); +err2: + unregister_pernet_subsys(&arp_tables_net_ops); +err1: + return ret; +} + +static void __exit arp_tables_fini(void) +{ + nf_unregister_sockopt(&arpt_sockopts); + xt_unregister_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg)); + unregister_pernet_subsys(&arp_tables_net_ops); +} + +EXPORT_SYMBOL(arpt_register_table); +EXPORT_SYMBOL(arpt_unregister_table); +EXPORT_SYMBOL(arpt_do_table); + +module_init(arp_tables_init); +module_exit(arp_tables_fini); diff --git a/kernel/net/ipv4/netfilter/arpt_mangle.c b/kernel/net/ipv4/netfilter/arpt_mangle.c new file mode 100644 index 000000000..a5e52a9f0 --- /dev/null +++ b/kernel/net/ipv4/netfilter/arpt_mangle.c @@ -0,0 +1,91 @@ +/* module that allows mangling of the arp payload */ +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Bart De Schuymer "); +MODULE_DESCRIPTION("arptables arp payload mangle target"); + +static unsigned int +target(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct arpt_mangle *mangle = par->targinfo; + const struct arphdr *arp; + unsigned char *arpptr; + int pln, hln; + + if (!skb_make_writable(skb, skb->len)) + return NF_DROP; + + arp = arp_hdr(skb); + arpptr = skb_network_header(skb) + sizeof(*arp); + pln = arp->ar_pln; + hln = arp->ar_hln; + /* We assume that pln and hln were checked in the match */ + if (mangle->flags & ARPT_MANGLE_SDEV) { + if (ARPT_DEV_ADDR_LEN_MAX < hln || + (arpptr + hln > skb_tail_pointer(skb))) + return NF_DROP; + memcpy(arpptr, mangle->src_devaddr, hln); + } + arpptr += hln; + if (mangle->flags & ARPT_MANGLE_SIP) { + if (ARPT_MANGLE_ADDR_LEN_MAX < pln || + (arpptr + pln > skb_tail_pointer(skb))) + return NF_DROP; + memcpy(arpptr, &mangle->u_s.src_ip, pln); + } + arpptr += pln; + if (mangle->flags & ARPT_MANGLE_TDEV) { + if (ARPT_DEV_ADDR_LEN_MAX < hln || + (arpptr + hln > skb_tail_pointer(skb))) + return NF_DROP; + memcpy(arpptr, mangle->tgt_devaddr, hln); + } + arpptr += hln; + if (mangle->flags & ARPT_MANGLE_TIP) { + if (ARPT_MANGLE_ADDR_LEN_MAX < pln || + (arpptr + pln > skb_tail_pointer(skb))) + return NF_DROP; + memcpy(arpptr, &mangle->u_t.tgt_ip, pln); + } + return mangle->target; +} + +static int checkentry(const struct xt_tgchk_param *par) +{ + const struct arpt_mangle *mangle = par->targinfo; + + if (mangle->flags & ~ARPT_MANGLE_MASK || + !(mangle->flags & ARPT_MANGLE_MASK)) + return -EINVAL; + + if (mangle->target != NF_DROP && mangle->target != NF_ACCEPT && + mangle->target != XT_CONTINUE) + return -EINVAL; + return 0; +} + +static struct xt_target arpt_mangle_reg __read_mostly = { + .name = "mangle", + .family = NFPROTO_ARP, + .target = target, + .targetsize = sizeof(struct arpt_mangle), + .checkentry = checkentry, + .me = THIS_MODULE, +}; + +static int __init arpt_mangle_init(void) +{ + return xt_register_target(&arpt_mangle_reg); +} + +static void __exit arpt_mangle_fini(void) +{ + xt_unregister_target(&arpt_mangle_reg); +} + +module_init(arpt_mangle_init); +module_exit(arpt_mangle_fini); diff --git a/kernel/net/ipv4/netfilter/arptable_filter.c b/kernel/net/ipv4/netfilter/arptable_filter.c new file mode 100644 index 000000000..93876d031 --- /dev/null +++ b/kernel/net/ipv4/netfilter/arptable_filter.c @@ -0,0 +1,91 @@ +/* + * Filtering ARP tables module. + * + * Copyright (C) 2002 David S. Miller (davem@redhat.com) + * + */ + +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("David S. Miller "); +MODULE_DESCRIPTION("arptables filter table"); + +#define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \ + (1 << NF_ARP_FORWARD)) + +static const struct xt_table packet_filter = { + .name = "filter", + .valid_hooks = FILTER_VALID_HOOKS, + .me = THIS_MODULE, + .af = NFPROTO_ARP, + .priority = NF_IP_PRI_FILTER, +}; + +/* The work comes in here from netfilter.c */ +static unsigned int +arptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + const struct net *net = dev_net(state->in ? state->in : state->out); + + return arpt_do_table(skb, ops->hooknum, state, + net->ipv4.arptable_filter); +} + +static struct nf_hook_ops *arpfilter_ops __read_mostly; + +static int __net_init arptable_filter_net_init(struct net *net) +{ + struct arpt_replace *repl; + + repl = arpt_alloc_initial_table(&packet_filter); + if (repl == NULL) + return -ENOMEM; + net->ipv4.arptable_filter = + arpt_register_table(net, &packet_filter, repl); + kfree(repl); + return PTR_ERR_OR_ZERO(net->ipv4.arptable_filter); +} + +static void __net_exit arptable_filter_net_exit(struct net *net) +{ + arpt_unregister_table(net->ipv4.arptable_filter); +} + +static struct pernet_operations arptable_filter_net_ops = { + .init = arptable_filter_net_init, + .exit = arptable_filter_net_exit, +}; + +static int __init arptable_filter_init(void) +{ + int ret; + + ret = register_pernet_subsys(&arptable_filter_net_ops); + if (ret < 0) + return ret; + + arpfilter_ops = xt_hook_link(&packet_filter, arptable_filter_hook); + if (IS_ERR(arpfilter_ops)) { + ret = PTR_ERR(arpfilter_ops); + goto cleanup_table; + } + return ret; + +cleanup_table: + unregister_pernet_subsys(&arptable_filter_net_ops); + return ret; +} + +static void __exit arptable_filter_fini(void) +{ + xt_hook_unlink(&packet_filter, arpfilter_ops); + unregister_pernet_subsys(&arptable_filter_net_ops); +} + +module_init(arptable_filter_init); +module_exit(arptable_filter_fini); diff --git a/kernel/net/ipv4/netfilter/ip_tables.c b/kernel/net/ipv4/netfilter/ip_tables.c new file mode 100644 index 000000000..2d0e265fe --- /dev/null +++ b/kernel/net/ipv4/netfilter/ip_tables.c @@ -0,0 +1,2282 @@ +/* + * Packet matching code. + * + * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling + * Copyright (C) 2000-2005 Netfilter Core Team + * Copyright (C) 2006-2010 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include "../../netfilter/xt_repldata.h" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Netfilter Core Team "); +MODULE_DESCRIPTION("IPv4 packet filter"); + +/*#define DEBUG_IP_FIREWALL*/ +/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */ +/*#define DEBUG_IP_FIREWALL_USER*/ + +#ifdef DEBUG_IP_FIREWALL +#define dprintf(format, args...) pr_info(format , ## args) +#else +#define dprintf(format, args...) +#endif + +#ifdef DEBUG_IP_FIREWALL_USER +#define duprintf(format, args...) pr_info(format , ## args) +#else +#define duprintf(format, args...) +#endif + +#ifdef CONFIG_NETFILTER_DEBUG +#define IP_NF_ASSERT(x) WARN_ON(!(x)) +#else +#define IP_NF_ASSERT(x) +#endif + +#if 0 +/* All the better to debug you with... */ +#define static +#define inline +#endif + +void *ipt_alloc_initial_table(const struct xt_table *info) +{ + return xt_alloc_initial_table(ipt, IPT); +} +EXPORT_SYMBOL_GPL(ipt_alloc_initial_table); + +/* Returns whether matches rule or not. */ +/* Performance critical - called for every packet */ +static inline bool +ip_packet_match(const struct iphdr *ip, + const char *indev, + const char *outdev, + const struct ipt_ip *ipinfo, + int isfrag) +{ + unsigned long ret; + +#define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg))) + + if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr, + IPT_INV_SRCIP) || + FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr, + IPT_INV_DSTIP)) { + dprintf("Source or dest mismatch.\n"); + + dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n", + &ip->saddr, &ipinfo->smsk.s_addr, &ipinfo->src.s_addr, + ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : ""); + dprintf("DST: %pI4 Mask: %pI4 Target: %pI4.%s\n", + &ip->daddr, &ipinfo->dmsk.s_addr, &ipinfo->dst.s_addr, + ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : ""); + return false; + } + + ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask); + + if (FWINV(ret != 0, IPT_INV_VIA_IN)) { + dprintf("VIA in mismatch (%s vs %s).%s\n", + indev, ipinfo->iniface, + ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":""); + return false; + } + + ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask); + + if (FWINV(ret != 0, IPT_INV_VIA_OUT)) { + dprintf("VIA out mismatch (%s vs %s).%s\n", + outdev, ipinfo->outiface, + ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":""); + return false; + } + + /* Check specific protocol */ + if (ipinfo->proto && + FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) { + dprintf("Packet protocol %hi does not match %hi.%s\n", + ip->protocol, ipinfo->proto, + ipinfo->invflags&IPT_INV_PROTO ? " (INV)":""); + return false; + } + + /* If we have a fragment rule but the packet is not a fragment + * then we return zero */ + if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) { + dprintf("Fragment rule but not fragment.%s\n", + ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : ""); + return false; + } + + return true; +} + +static bool +ip_checkentry(const struct ipt_ip *ip) +{ + if (ip->flags & ~IPT_F_MASK) { + duprintf("Unknown flag bits set: %08X\n", + ip->flags & ~IPT_F_MASK); + return false; + } + if (ip->invflags & ~IPT_INV_MASK) { + duprintf("Unknown invflag bits set: %08X\n", + ip->invflags & ~IPT_INV_MASK); + return false; + } + return true; +} + +static unsigned int +ipt_error(struct sk_buff *skb, const struct xt_action_param *par) +{ + net_info_ratelimited("error: `%s'\n", (const char *)par->targinfo); + + return NF_DROP; +} + +/* Performance critical */ +static inline struct ipt_entry * +get_entry(const void *base, unsigned int offset) +{ + return (struct ipt_entry *)(base + offset); +} + +/* All zeroes == unconditional rule. */ +/* Mildly perf critical (only if packet tracing is on) */ +static inline bool unconditional(const struct ipt_ip *ip) +{ + static const struct ipt_ip uncond; + + return memcmp(ip, &uncond, sizeof(uncond)) == 0; +#undef FWINV +} + +/* for const-correctness */ +static inline const struct xt_entry_target * +ipt_get_target_c(const struct ipt_entry *e) +{ + return ipt_get_target((struct ipt_entry *)e); +} + +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) +static const char *const hooknames[] = { + [NF_INET_PRE_ROUTING] = "PREROUTING", + [NF_INET_LOCAL_IN] = "INPUT", + [NF_INET_FORWARD] = "FORWARD", + [NF_INET_LOCAL_OUT] = "OUTPUT", + [NF_INET_POST_ROUTING] = "POSTROUTING", +}; + +enum nf_ip_trace_comments { + NF_IP_TRACE_COMMENT_RULE, + NF_IP_TRACE_COMMENT_RETURN, + NF_IP_TRACE_COMMENT_POLICY, +}; + +static const char *const comments[] = { + [NF_IP_TRACE_COMMENT_RULE] = "rule", + [NF_IP_TRACE_COMMENT_RETURN] = "return", + [NF_IP_TRACE_COMMENT_POLICY] = "policy", +}; + +static struct nf_loginfo trace_loginfo = { + .type = NF_LOG_TYPE_LOG, + .u = { + .log = { + .level = 4, + .logflags = NF_LOG_MASK, + }, + }, +}; + +/* Mildly perf critical (only if packet tracing is on) */ +static inline int +get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e, + const char *hookname, const char **chainname, + const char **comment, unsigned int *rulenum) +{ + const struct xt_standard_target *t = (void *)ipt_get_target_c(s); + + if (strcmp(t->target.u.kernel.target->name, XT_ERROR_TARGET) == 0) { + /* Head of user chain: ERROR target with chainname */ + *chainname = t->target.data; + (*rulenum) = 0; + } else if (s == e) { + (*rulenum)++; + + if (s->target_offset == sizeof(struct ipt_entry) && + strcmp(t->target.u.kernel.target->name, + XT_STANDARD_TARGET) == 0 && + t->verdict < 0 && + unconditional(&s->ip)) { + /* Tail of chains: STANDARD target (return/policy) */ + *comment = *chainname == hookname + ? comments[NF_IP_TRACE_COMMENT_POLICY] + : comments[NF_IP_TRACE_COMMENT_RETURN]; + } + return 1; + } else + (*rulenum)++; + + return 0; +} + +static void trace_packet(const struct sk_buff *skb, + unsigned int hook, + const struct net_device *in, + const struct net_device *out, + const char *tablename, + const struct xt_table_info *private, + const struct ipt_entry *e) +{ + const void *table_base; + const struct ipt_entry *root; + const char *hookname, *chainname, *comment; + const struct ipt_entry *iter; + unsigned int rulenum = 0; + struct net *net = dev_net(in ? in : out); + + table_base = private->entries[smp_processor_id()]; + root = get_entry(table_base, private->hook_entry[hook]); + + hookname = chainname = hooknames[hook]; + comment = comments[NF_IP_TRACE_COMMENT_RULE]; + + xt_entry_foreach(iter, root, private->size - private->hook_entry[hook]) + if (get_chainname_rulenum(iter, e, hookname, + &chainname, &comment, &rulenum) != 0) + break; + + nf_log_trace(net, AF_INET, hook, skb, in, out, &trace_loginfo, + "TRACE: %s:%s:%s:%u ", + tablename, chainname, comment, rulenum); +} +#endif + +static inline __pure +struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry) +{ + return (void *)entry + entry->next_offset; +} + +/* Returns one of the generic firewall policies, like NF_ACCEPT. */ +unsigned int +ipt_do_table(struct sk_buff *skb, + unsigned int hook, + const struct nf_hook_state *state, + struct xt_table *table) +{ + static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); + const struct iphdr *ip; + /* Initializing verdict to NF_DROP keeps gcc happy. */ + unsigned int verdict = NF_DROP; + const char *indev, *outdev; + const void *table_base; + struct ipt_entry *e, **jumpstack; + unsigned int *stackptr, origptr, cpu; + const struct xt_table_info *private; + struct xt_action_param acpar; + unsigned int addend; + + /* Initialization */ + ip = ip_hdr(skb); + indev = state->in ? state->in->name : nulldevname; + outdev = state->out ? state->out->name : nulldevname; + /* We handle fragments by dealing with the first fragment as + * if it was a normal packet. All other fragments are treated + * normally, except that they will NEVER match rules that ask + * things we don't know, ie. tcp syn flag or ports). If the + * rule is also a fragment-specific rule, non-fragments won't + * match it. */ + acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET; + acpar.thoff = ip_hdrlen(skb); + acpar.hotdrop = false; + acpar.in = state->in; + acpar.out = state->out; + acpar.family = NFPROTO_IPV4; + acpar.hooknum = hook; + + IP_NF_ASSERT(table->valid_hooks & (1 << hook)); + local_bh_disable(); + addend = xt_write_recseq_begin(); + private = table->private; + cpu = smp_processor_id(); + /* + * Ensure we load private-> members after we've fetched the base + * pointer. + */ + smp_read_barrier_depends(); + table_base = private->entries[cpu]; + jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; + stackptr = per_cpu_ptr(private->stackptr, cpu); + origptr = *stackptr; + + e = get_entry(table_base, private->hook_entry[hook]); + + pr_debug("Entering %s(hook %u); sp at %u (UF %p)\n", + table->name, hook, origptr, + get_entry(table_base, private->underflow[hook])); + + do { + const struct xt_entry_target *t; + const struct xt_entry_match *ematch; + + IP_NF_ASSERT(e); + if (!ip_packet_match(ip, indev, outdev, + &e->ip, acpar.fragoff)) { + no_match: + e = ipt_next_entry(e); + continue; + } + + xt_ematch_foreach(ematch, e) { + acpar.match = ematch->u.kernel.match; + acpar.matchinfo = ematch->data; + if (!acpar.match->match(skb, &acpar)) + goto no_match; + } + + ADD_COUNTER(e->counters, skb->len, 1); + + t = ipt_get_target(e); + IP_NF_ASSERT(t->u.kernel.target); + +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) + /* The packet is traced: log it */ + if (unlikely(skb->nf_trace)) + trace_packet(skb, hook, state->in, state->out, + table->name, private, e); +#endif + /* Standard target? */ + if (!t->u.kernel.target->target) { + int v; + + v = ((struct xt_standard_target *)t)->verdict; + if (v < 0) { + /* Pop from stack? */ + if (v != XT_RETURN) { + verdict = (unsigned int)(-v) - 1; + break; + } + if (*stackptr <= origptr) { + e = get_entry(table_base, + private->underflow[hook]); + pr_debug("Underflow (this is normal) " + "to %p\n", e); + } else { + e = jumpstack[--*stackptr]; + pr_debug("Pulled %p out from pos %u\n", + e, *stackptr); + e = ipt_next_entry(e); + } + continue; + } + if (table_base + v != ipt_next_entry(e) && + !(e->ip.flags & IPT_F_GOTO)) { + if (*stackptr >= private->stacksize) { + verdict = NF_DROP; + break; + } + jumpstack[(*stackptr)++] = e; + pr_debug("Pushed %p into pos %u\n", + e, *stackptr - 1); + } + + e = get_entry(table_base, v); + continue; + } + + acpar.target = t->u.kernel.target; + acpar.targinfo = t->data; + + verdict = t->u.kernel.target->target(skb, &acpar); + /* Target might have changed stuff. */ + ip = ip_hdr(skb); + if (verdict == XT_CONTINUE) + e = ipt_next_entry(e); + else + /* Verdict */ + break; + } while (!acpar.hotdrop); + pr_debug("Exiting %s; resetting sp from %u to %u\n", + __func__, *stackptr, origptr); + *stackptr = origptr; + xt_write_recseq_end(addend); + local_bh_enable(); + +#ifdef DEBUG_ALLOW_ALL + return NF_ACCEPT; +#else + if (acpar.hotdrop) + return NF_DROP; + else return verdict; +#endif +} + +/* Figures out from what hook each rule can be called: returns 0 if + there are loops. Puts hook bitmask in comefrom. */ +static int +mark_source_chains(const struct xt_table_info *newinfo, + unsigned int valid_hooks, void *entry0) +{ + unsigned int hook; + + /* No recursion; use packet counter to save back ptrs (reset + to 0 as we leave), and comefrom to save source hook bitmask */ + for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) { + unsigned int pos = newinfo->hook_entry[hook]; + struct ipt_entry *e = (struct ipt_entry *)(entry0 + pos); + + if (!(valid_hooks & (1 << hook))) + continue; + + /* Set initial back pointer. */ + e->counters.pcnt = pos; + + for (;;) { + const struct xt_standard_target *t + = (void *)ipt_get_target_c(e); + int visited = e->comefrom & (1 << hook); + + if (e->comefrom & (1 << NF_INET_NUMHOOKS)) { + pr_err("iptables: loop hook %u pos %u %08X.\n", + hook, pos, e->comefrom); + return 0; + } + e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); + + /* Unconditional return/END. */ + if ((e->target_offset == sizeof(struct ipt_entry) && + (strcmp(t->target.u.user.name, + XT_STANDARD_TARGET) == 0) && + t->verdict < 0 && unconditional(&e->ip)) || + visited) { + unsigned int oldpos, size; + + if ((strcmp(t->target.u.user.name, + XT_STANDARD_TARGET) == 0) && + t->verdict < -NF_MAX_VERDICT - 1) { + duprintf("mark_source_chains: bad " + "negative verdict (%i)\n", + t->verdict); + return 0; + } + + /* Return: backtrack through the last + big jump. */ + do { + e->comefrom ^= (1<comefrom + & (1 << NF_INET_NUMHOOKS)) { + duprintf("Back unset " + "on hook %u " + "rule %u\n", + hook, pos); + } +#endif + oldpos = pos; + pos = e->counters.pcnt; + e->counters.pcnt = 0; + + /* We're at the start. */ + if (pos == oldpos) + goto next; + + e = (struct ipt_entry *) + (entry0 + pos); + } while (oldpos == pos + e->next_offset); + + /* Move along one */ + size = e->next_offset; + e = (struct ipt_entry *) + (entry0 + pos + size); + e->counters.pcnt = pos; + pos += size; + } else { + int newpos = t->verdict; + + if (strcmp(t->target.u.user.name, + XT_STANDARD_TARGET) == 0 && + newpos >= 0) { + if (newpos > newinfo->size - + sizeof(struct ipt_entry)) { + duprintf("mark_source_chains: " + "bad verdict (%i)\n", + newpos); + return 0; + } + /* This a jump; chase it. */ + duprintf("Jump rule %u -> %u\n", + pos, newpos); + } else { + /* ... this is a fallthru */ + newpos = pos + e->next_offset; + } + e = (struct ipt_entry *) + (entry0 + newpos); + e->counters.pcnt = pos; + pos = newpos; + } + } + next: + duprintf("Finished chain %u\n", hook); + } + return 1; +} + +static void cleanup_match(struct xt_entry_match *m, struct net *net) +{ + struct xt_mtdtor_param par; + + par.net = net; + par.match = m->u.kernel.match; + par.matchinfo = m->data; + par.family = NFPROTO_IPV4; + if (par.match->destroy != NULL) + par.match->destroy(&par); + module_put(par.match->me); +} + +static int +check_entry(const struct ipt_entry *e, const char *name) +{ + const struct xt_entry_target *t; + + if (!ip_checkentry(&e->ip)) { + duprintf("ip check failed %p %s.\n", e, name); + return -EINVAL; + } + + if (e->target_offset + sizeof(struct xt_entry_target) > + e->next_offset) + return -EINVAL; + + t = ipt_get_target_c(e); + if (e->target_offset + t->u.target_size > e->next_offset) + return -EINVAL; + + return 0; +} + +static int +check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) +{ + const struct ipt_ip *ip = par->entryinfo; + int ret; + + par->match = m->u.kernel.match; + par->matchinfo = m->data; + + ret = xt_check_match(par, m->u.match_size - sizeof(*m), + ip->proto, ip->invflags & IPT_INV_PROTO); + if (ret < 0) { + duprintf("check failed for `%s'.\n", par->match->name); + return ret; + } + return 0; +} + +static int +find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) +{ + struct xt_match *match; + int ret; + + match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name, + m->u.user.revision); + if (IS_ERR(match)) { + duprintf("find_check_match: `%s' not found\n", m->u.user.name); + return PTR_ERR(match); + } + m->u.kernel.match = match; + + ret = check_match(m, par); + if (ret) + goto err; + + return 0; +err: + module_put(m->u.kernel.match->me); + return ret; +} + +static int check_target(struct ipt_entry *e, struct net *net, const char *name) +{ + struct xt_entry_target *t = ipt_get_target(e); + struct xt_tgchk_param par = { + .net = net, + .table = name, + .entryinfo = e, + .target = t->u.kernel.target, + .targinfo = t->data, + .hook_mask = e->comefrom, + .family = NFPROTO_IPV4, + }; + int ret; + + ret = xt_check_target(&par, t->u.target_size - sizeof(*t), + e->ip.proto, e->ip.invflags & IPT_INV_PROTO); + if (ret < 0) { + duprintf("check failed for `%s'.\n", + t->u.kernel.target->name); + return ret; + } + return 0; +} + +static int +find_check_entry(struct ipt_entry *e, struct net *net, const char *name, + unsigned int size) +{ + struct xt_entry_target *t; + struct xt_target *target; + int ret; + unsigned int j; + struct xt_mtchk_param mtpar; + struct xt_entry_match *ematch; + + ret = check_entry(e, name); + if (ret) + return ret; + + j = 0; + mtpar.net = net; + mtpar.table = name; + mtpar.entryinfo = &e->ip; + mtpar.hook_mask = e->comefrom; + mtpar.family = NFPROTO_IPV4; + xt_ematch_foreach(ematch, e) { + ret = find_check_match(ematch, &mtpar); + if (ret != 0) + goto cleanup_matches; + ++j; + } + + t = ipt_get_target(e); + target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name, + t->u.user.revision); + if (IS_ERR(target)) { + duprintf("find_check_entry: `%s' not found\n", t->u.user.name); + ret = PTR_ERR(target); + goto cleanup_matches; + } + t->u.kernel.target = target; + + ret = check_target(e, net, name); + if (ret) + goto err; + return 0; + err: + module_put(t->u.kernel.target->me); + cleanup_matches: + xt_ematch_foreach(ematch, e) { + if (j-- == 0) + break; + cleanup_match(ematch, net); + } + return ret; +} + +static bool check_underflow(const struct ipt_entry *e) +{ + const struct xt_entry_target *t; + unsigned int verdict; + + if (!unconditional(&e->ip)) + return false; + t = ipt_get_target_c(e); + if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) + return false; + verdict = ((struct xt_standard_target *)t)->verdict; + verdict = -verdict - 1; + return verdict == NF_DROP || verdict == NF_ACCEPT; +} + +static int +check_entry_size_and_hooks(struct ipt_entry *e, + struct xt_table_info *newinfo, + const unsigned char *base, + const unsigned char *limit, + const unsigned int *hook_entries, + const unsigned int *underflows, + unsigned int valid_hooks) +{ + unsigned int h; + + if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 || + (unsigned char *)e + sizeof(struct ipt_entry) >= limit) { + duprintf("Bad offset %p\n", e); + return -EINVAL; + } + + if (e->next_offset + < sizeof(struct ipt_entry) + sizeof(struct xt_entry_target)) { + duprintf("checking: element %p size %u\n", + e, e->next_offset); + return -EINVAL; + } + + /* Check hooks & underflows */ + for (h = 0; h < NF_INET_NUMHOOKS; h++) { + if (!(valid_hooks & (1 << h))) + continue; + if ((unsigned char *)e - base == hook_entries[h]) + newinfo->hook_entry[h] = hook_entries[h]; + if ((unsigned char *)e - base == underflows[h]) { + if (!check_underflow(e)) { + pr_err("Underflows must be unconditional and " + "use the STANDARD target with " + "ACCEPT/DROP\n"); + return -EINVAL; + } + newinfo->underflow[h] = underflows[h]; + } + } + + /* Clear counters and comefrom */ + e->counters = ((struct xt_counters) { 0, 0 }); + e->comefrom = 0; + return 0; +} + +static void +cleanup_entry(struct ipt_entry *e, struct net *net) +{ + struct xt_tgdtor_param par; + struct xt_entry_target *t; + struct xt_entry_match *ematch; + + /* Cleanup all matches */ + xt_ematch_foreach(ematch, e) + cleanup_match(ematch, net); + t = ipt_get_target(e); + + par.net = net; + par.target = t->u.kernel.target; + par.targinfo = t->data; + par.family = NFPROTO_IPV4; + if (par.target->destroy != NULL) + par.target->destroy(&par); + module_put(par.target->me); +} + +/* Checks and translates the user-supplied table segment (held in + newinfo) */ +static int +translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, + const struct ipt_replace *repl) +{ + struct ipt_entry *iter; + unsigned int i; + int ret = 0; + + newinfo->size = repl->size; + newinfo->number = repl->num_entries; + + /* Init all hooks to impossible value. */ + for (i = 0; i < NF_INET_NUMHOOKS; i++) { + newinfo->hook_entry[i] = 0xFFFFFFFF; + newinfo->underflow[i] = 0xFFFFFFFF; + } + + duprintf("translate_table: size %u\n", newinfo->size); + i = 0; + /* Walk through entries, checking offsets. */ + xt_entry_foreach(iter, entry0, newinfo->size) { + ret = check_entry_size_and_hooks(iter, newinfo, entry0, + entry0 + repl->size, + repl->hook_entry, + repl->underflow, + repl->valid_hooks); + if (ret != 0) + return ret; + ++i; + if (strcmp(ipt_get_target(iter)->u.user.name, + XT_ERROR_TARGET) == 0) + ++newinfo->stacksize; + } + + if (i != repl->num_entries) { + duprintf("translate_table: %u not %u entries\n", + i, repl->num_entries); + return -EINVAL; + } + + /* Check hooks all assigned */ + for (i = 0; i < NF_INET_NUMHOOKS; i++) { + /* Only hooks which are valid */ + if (!(repl->valid_hooks & (1 << i))) + continue; + if (newinfo->hook_entry[i] == 0xFFFFFFFF) { + duprintf("Invalid hook entry %u %u\n", + i, repl->hook_entry[i]); + return -EINVAL; + } + if (newinfo->underflow[i] == 0xFFFFFFFF) { + duprintf("Invalid underflow %u %u\n", + i, repl->underflow[i]); + return -EINVAL; + } + } + + if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) + return -ELOOP; + + /* Finally, each sanity check must pass */ + i = 0; + xt_entry_foreach(iter, entry0, newinfo->size) { + ret = find_check_entry(iter, net, repl->name, repl->size); + if (ret != 0) + break; + ++i; + } + + if (ret != 0) { + xt_entry_foreach(iter, entry0, newinfo->size) { + if (i-- == 0) + break; + cleanup_entry(iter, net); + } + return ret; + } + + /* And one copy for every other CPU */ + for_each_possible_cpu(i) { + if (newinfo->entries[i] && newinfo->entries[i] != entry0) + memcpy(newinfo->entries[i], entry0, newinfo->size); + } + + return ret; +} + +static void +get_counters(const struct xt_table_info *t, + struct xt_counters counters[]) +{ + struct ipt_entry *iter; + unsigned int cpu; + unsigned int i; + + for_each_possible_cpu(cpu) { + seqcount_t *s = &per_cpu(xt_recseq, cpu); + + i = 0; + xt_entry_foreach(iter, t->entries[cpu], t->size) { + u64 bcnt, pcnt; + unsigned int start; + + do { + start = read_seqcount_begin(s); + bcnt = iter->counters.bcnt; + pcnt = iter->counters.pcnt; + } while (read_seqcount_retry(s, start)); + + ADD_COUNTER(counters[i], bcnt, pcnt); + ++i; /* macro does multi eval of i */ + } + } +} + +static struct xt_counters *alloc_counters(const struct xt_table *table) +{ + unsigned int countersize; + struct xt_counters *counters; + const struct xt_table_info *private = table->private; + + /* We need atomic snapshot of counters: rest doesn't change + (other than comefrom, which userspace doesn't care + about). */ + countersize = sizeof(struct xt_counters) * private->number; + counters = vzalloc(countersize); + + if (counters == NULL) + return ERR_PTR(-ENOMEM); + + get_counters(private, counters); + + return counters; +} + +static int +copy_entries_to_user(unsigned int total_size, + const struct xt_table *table, + void __user *userptr) +{ + unsigned int off, num; + const struct ipt_entry *e; + struct xt_counters *counters; + const struct xt_table_info *private = table->private; + int ret = 0; + const void *loc_cpu_entry; + + counters = alloc_counters(table); + if (IS_ERR(counters)) + return PTR_ERR(counters); + + /* choose the copy that is on our node/cpu, ... + * This choice is lazy (because current thread is + * allowed to migrate to another cpu) + */ + loc_cpu_entry = private->entries[raw_smp_processor_id()]; + if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { + ret = -EFAULT; + goto free_counters; + } + + /* FIXME: use iterator macros --RR */ + /* ... then go back and fix counters and names */ + for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ + unsigned int i; + const struct xt_entry_match *m; + const struct xt_entry_target *t; + + e = (struct ipt_entry *)(loc_cpu_entry + off); + if (copy_to_user(userptr + off + + offsetof(struct ipt_entry, counters), + &counters[num], + sizeof(counters[num])) != 0) { + ret = -EFAULT; + goto free_counters; + } + + for (i = sizeof(struct ipt_entry); + i < e->target_offset; + i += m->u.match_size) { + m = (void *)e + i; + + if (copy_to_user(userptr + off + i + + offsetof(struct xt_entry_match, + u.user.name), + m->u.kernel.match->name, + strlen(m->u.kernel.match->name)+1) + != 0) { + ret = -EFAULT; + goto free_counters; + } + } + + t = ipt_get_target_c(e); + if (copy_to_user(userptr + off + e->target_offset + + offsetof(struct xt_entry_target, + u.user.name), + t->u.kernel.target->name, + strlen(t->u.kernel.target->name)+1) != 0) { + ret = -EFAULT; + goto free_counters; + } + } + + free_counters: + vfree(counters); + return ret; +} + +#ifdef CONFIG_COMPAT +static void compat_standard_from_user(void *dst, const void *src) +{ + int v = *(compat_int_t *)src; + + if (v > 0) + v += xt_compat_calc_jump(AF_INET, v); + memcpy(dst, &v, sizeof(v)); +} + +static int compat_standard_to_user(void __user *dst, const void *src) +{ + compat_int_t cv = *(int *)src; + + if (cv > 0) + cv -= xt_compat_calc_jump(AF_INET, cv); + return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; +} + +static int compat_calc_entry(const struct ipt_entry *e, + const struct xt_table_info *info, + const void *base, struct xt_table_info *newinfo) +{ + const struct xt_entry_match *ematch; + const struct xt_entry_target *t; + unsigned int entry_offset; + int off, i, ret; + + off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); + entry_offset = (void *)e - base; + xt_ematch_foreach(ematch, e) + off += xt_compat_match_offset(ematch->u.kernel.match); + t = ipt_get_target_c(e); + off += xt_compat_target_offset(t->u.kernel.target); + newinfo->size -= off; + ret = xt_compat_add_offset(AF_INET, entry_offset, off); + if (ret) + return ret; + + for (i = 0; i < NF_INET_NUMHOOKS; i++) { + if (info->hook_entry[i] && + (e < (struct ipt_entry *)(base + info->hook_entry[i]))) + newinfo->hook_entry[i] -= off; + if (info->underflow[i] && + (e < (struct ipt_entry *)(base + info->underflow[i]))) + newinfo->underflow[i] -= off; + } + return 0; +} + +static int compat_table_info(const struct xt_table_info *info, + struct xt_table_info *newinfo) +{ + struct ipt_entry *iter; + void *loc_cpu_entry; + int ret; + + if (!newinfo || !info) + return -EINVAL; + + /* we dont care about newinfo->entries[] */ + memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); + newinfo->initial_entries = 0; + loc_cpu_entry = info->entries[raw_smp_processor_id()]; + xt_compat_init_offsets(AF_INET, info->number); + xt_entry_foreach(iter, loc_cpu_entry, info->size) { + ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); + if (ret != 0) + return ret; + } + return 0; +} +#endif + +static int get_info(struct net *net, void __user *user, + const int *len, int compat) +{ + char name[XT_TABLE_MAXNAMELEN]; + struct xt_table *t; + int ret; + + if (*len != sizeof(struct ipt_getinfo)) { + duprintf("length %u != %zu\n", *len, + sizeof(struct ipt_getinfo)); + return -EINVAL; + } + + if (copy_from_user(name, user, sizeof(name)) != 0) + return -EFAULT; + + name[XT_TABLE_MAXNAMELEN-1] = '\0'; +#ifdef CONFIG_COMPAT + if (compat) + xt_compat_lock(AF_INET); +#endif + t = try_then_request_module(xt_find_table_lock(net, AF_INET, name), + "iptable_%s", name); + if (!IS_ERR_OR_NULL(t)) { + struct ipt_getinfo info; + const struct xt_table_info *private = t->private; +#ifdef CONFIG_COMPAT + struct xt_table_info tmp; + + if (compat) { + ret = compat_table_info(private, &tmp); + xt_compat_flush_offsets(AF_INET); + private = &tmp; + } +#endif + memset(&info, 0, sizeof(info)); + info.valid_hooks = t->valid_hooks; + memcpy(info.hook_entry, private->hook_entry, + sizeof(info.hook_entry)); + memcpy(info.underflow, private->underflow, + sizeof(info.underflow)); + info.num_entries = private->number; + info.size = private->size; + strcpy(info.name, name); + + if (copy_to_user(user, &info, *len) != 0) + ret = -EFAULT; + else + ret = 0; + + xt_table_unlock(t); + module_put(t->me); + } else + ret = t ? PTR_ERR(t) : -ENOENT; +#ifdef CONFIG_COMPAT + if (compat) + xt_compat_unlock(AF_INET); +#endif + return ret; +} + +static int +get_entries(struct net *net, struct ipt_get_entries __user *uptr, + const int *len) +{ + int ret; + struct ipt_get_entries get; + struct xt_table *t; + + if (*len < sizeof(get)) { + duprintf("get_entries: %u < %zu\n", *len, sizeof(get)); + return -EINVAL; + } + if (copy_from_user(&get, uptr, sizeof(get)) != 0) + return -EFAULT; + if (*len != sizeof(struct ipt_get_entries) + get.size) { + duprintf("get_entries: %u != %zu\n", + *len, sizeof(get) + get.size); + return -EINVAL; + } + + t = xt_find_table_lock(net, AF_INET, get.name); + if (!IS_ERR_OR_NULL(t)) { + const struct xt_table_info *private = t->private; + duprintf("t->private->number = %u\n", private->number); + if (get.size == private->size) + ret = copy_entries_to_user(private->size, + t, uptr->entrytable); + else { + duprintf("get_entries: I've got %u not %u!\n", + private->size, get.size); + ret = -EAGAIN; + } + module_put(t->me); + xt_table_unlock(t); + } else + ret = t ? PTR_ERR(t) : -ENOENT; + + return ret; +} + +static int +__do_replace(struct net *net, const char *name, unsigned int valid_hooks, + struct xt_table_info *newinfo, unsigned int num_counters, + void __user *counters_ptr) +{ + int ret; + struct xt_table *t; + struct xt_table_info *oldinfo; + struct xt_counters *counters; + void *loc_cpu_old_entry; + struct ipt_entry *iter; + + ret = 0; + counters = vzalloc(num_counters * sizeof(struct xt_counters)); + if (!counters) { + ret = -ENOMEM; + goto out; + } + + t = try_then_request_module(xt_find_table_lock(net, AF_INET, name), + "iptable_%s", name); + if (IS_ERR_OR_NULL(t)) { + ret = t ? PTR_ERR(t) : -ENOENT; + goto free_newinfo_counters_untrans; + } + + /* You lied! */ + if (valid_hooks != t->valid_hooks) { + duprintf("Valid hook crap: %08X vs %08X\n", + valid_hooks, t->valid_hooks); + ret = -EINVAL; + goto put_module; + } + + oldinfo = xt_replace_table(t, num_counters, newinfo, &ret); + if (!oldinfo) + goto put_module; + + /* Update module usage count based on number of rules */ + duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n", + oldinfo->number, oldinfo->initial_entries, newinfo->number); + if ((oldinfo->number > oldinfo->initial_entries) || + (newinfo->number <= oldinfo->initial_entries)) + module_put(t->me); + if ((oldinfo->number > oldinfo->initial_entries) && + (newinfo->number <= oldinfo->initial_entries)) + module_put(t->me); + + /* Get the old counters, and synchronize with replace */ + get_counters(oldinfo, counters); + + /* Decrease module usage counts and free resource */ + loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; + xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size) + cleanup_entry(iter, net); + + xt_free_table_info(oldinfo); + if (copy_to_user(counters_ptr, counters, + sizeof(struct xt_counters) * num_counters) != 0) { + /* Silent error, can't fail, new table is already in place */ + net_warn_ratelimited("iptables: counters copy to user failed while replacing table\n"); + } + vfree(counters); + xt_table_unlock(t); + return ret; + + put_module: + module_put(t->me); + xt_table_unlock(t); + free_newinfo_counters_untrans: + vfree(counters); + out: + return ret; +} + +static int +do_replace(struct net *net, const void __user *user, unsigned int len) +{ + int ret; + struct ipt_replace tmp; + struct xt_table_info *newinfo; + void *loc_cpu_entry; + struct ipt_entry *iter; + + if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) + return -EFAULT; + + /* overflow check */ + if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) + return -ENOMEM; + if (tmp.num_counters == 0) + return -EINVAL; + + tmp.name[sizeof(tmp.name)-1] = 0; + + newinfo = xt_alloc_table_info(tmp.size); + if (!newinfo) + return -ENOMEM; + + /* choose the copy that is on our node/cpu */ + loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), + tmp.size) != 0) { + ret = -EFAULT; + goto free_newinfo; + } + + ret = translate_table(net, newinfo, loc_cpu_entry, &tmp); + if (ret != 0) + goto free_newinfo; + + duprintf("Translated table\n"); + + ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, + tmp.num_counters, tmp.counters); + if (ret) + goto free_newinfo_untrans; + return 0; + + free_newinfo_untrans: + xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) + cleanup_entry(iter, net); + free_newinfo: + xt_free_table_info(newinfo); + return ret; +} + +static int +do_add_counters(struct net *net, const void __user *user, + unsigned int len, int compat) +{ + unsigned int i, curcpu; + struct xt_counters_info tmp; + struct xt_counters *paddc; + unsigned int num_counters; + const char *name; + int size; + void *ptmp; + struct xt_table *t; + const struct xt_table_info *private; + int ret = 0; + void *loc_cpu_entry; + struct ipt_entry *iter; + unsigned int addend; +#ifdef CONFIG_COMPAT + struct compat_xt_counters_info compat_tmp; + + if (compat) { + ptmp = &compat_tmp; + size = sizeof(struct compat_xt_counters_info); + } else +#endif + { + ptmp = &tmp; + size = sizeof(struct xt_counters_info); + } + + if (copy_from_user(ptmp, user, size) != 0) + return -EFAULT; + +#ifdef CONFIG_COMPAT + if (compat) { + num_counters = compat_tmp.num_counters; + name = compat_tmp.name; + } else +#endif + { + num_counters = tmp.num_counters; + name = tmp.name; + } + + if (len != size + num_counters * sizeof(struct xt_counters)) + return -EINVAL; + + paddc = vmalloc(len - size); + if (!paddc) + return -ENOMEM; + + if (copy_from_user(paddc, user + size, len - size) != 0) { + ret = -EFAULT; + goto free; + } + + t = xt_find_table_lock(net, AF_INET, name); + if (IS_ERR_OR_NULL(t)) { + ret = t ? PTR_ERR(t) : -ENOENT; + goto free; + } + + local_bh_disable(); + private = t->private; + if (private->number != num_counters) { + ret = -EINVAL; + goto unlock_up_free; + } + + i = 0; + /* Choose the copy that is on our node */ + curcpu = smp_processor_id(); + loc_cpu_entry = private->entries[curcpu]; + addend = xt_write_recseq_begin(); + xt_entry_foreach(iter, loc_cpu_entry, private->size) { + ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); + ++i; + } + xt_write_recseq_end(addend); + unlock_up_free: + local_bh_enable(); + xt_table_unlock(t); + module_put(t->me); + free: + vfree(paddc); + + return ret; +} + +#ifdef CONFIG_COMPAT +struct compat_ipt_replace { + char name[XT_TABLE_MAXNAMELEN]; + u32 valid_hooks; + u32 num_entries; + u32 size; + u32 hook_entry[NF_INET_NUMHOOKS]; + u32 underflow[NF_INET_NUMHOOKS]; + u32 num_counters; + compat_uptr_t counters; /* struct xt_counters * */ + struct compat_ipt_entry entries[0]; +}; + +static int +compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr, + unsigned int *size, struct xt_counters *counters, + unsigned int i) +{ + struct xt_entry_target *t; + struct compat_ipt_entry __user *ce; + u_int16_t target_offset, next_offset; + compat_uint_t origsize; + const struct xt_entry_match *ematch; + int ret = 0; + + origsize = *size; + ce = (struct compat_ipt_entry __user *)*dstptr; + if (copy_to_user(ce, e, sizeof(struct ipt_entry)) != 0 || + copy_to_user(&ce->counters, &counters[i], + sizeof(counters[i])) != 0) + return -EFAULT; + + *dstptr += sizeof(struct compat_ipt_entry); + *size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); + + xt_ematch_foreach(ematch, e) { + ret = xt_compat_match_to_user(ematch, dstptr, size); + if (ret != 0) + return ret; + } + target_offset = e->target_offset - (origsize - *size); + t = ipt_get_target(e); + ret = xt_compat_target_to_user(t, dstptr, size); + if (ret) + return ret; + next_offset = e->next_offset - (origsize - *size); + if (put_user(target_offset, &ce->target_offset) != 0 || + put_user(next_offset, &ce->next_offset) != 0) + return -EFAULT; + return 0; +} + +static int +compat_find_calc_match(struct xt_entry_match *m, + const char *name, + const struct ipt_ip *ip, + unsigned int hookmask, + int *size) +{ + struct xt_match *match; + + match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name, + m->u.user.revision); + if (IS_ERR(match)) { + duprintf("compat_check_calc_match: `%s' not found\n", + m->u.user.name); + return PTR_ERR(match); + } + m->u.kernel.match = match; + *size += xt_compat_match_offset(match); + return 0; +} + +static void compat_release_entry(struct compat_ipt_entry *e) +{ + struct xt_entry_target *t; + struct xt_entry_match *ematch; + + /* Cleanup all matches */ + xt_ematch_foreach(ematch, e) + module_put(ematch->u.kernel.match->me); + t = compat_ipt_get_target(e); + module_put(t->u.kernel.target->me); +} + +static int +check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, + struct xt_table_info *newinfo, + unsigned int *size, + const unsigned char *base, + const unsigned char *limit, + const unsigned int *hook_entries, + const unsigned int *underflows, + const char *name) +{ + struct xt_entry_match *ematch; + struct xt_entry_target *t; + struct xt_target *target; + unsigned int entry_offset; + unsigned int j; + int ret, off, h; + + duprintf("check_compat_entry_size_and_hooks %p\n", e); + if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 || + (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) { + duprintf("Bad offset %p, limit = %p\n", e, limit); + return -EINVAL; + } + + if (e->next_offset < sizeof(struct compat_ipt_entry) + + sizeof(struct compat_xt_entry_target)) { + duprintf("checking: element %p size %u\n", + e, e->next_offset); + return -EINVAL; + } + + /* For purposes of check_entry casting the compat entry is fine */ + ret = check_entry((struct ipt_entry *)e, name); + if (ret) + return ret; + + off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); + entry_offset = (void *)e - (void *)base; + j = 0; + xt_ematch_foreach(ematch, e) { + ret = compat_find_calc_match(ematch, name, + &e->ip, e->comefrom, &off); + if (ret != 0) + goto release_matches; + ++j; + } + + t = compat_ipt_get_target(e); + target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name, + t->u.user.revision); + if (IS_ERR(target)) { + duprintf("check_compat_entry_size_and_hooks: `%s' not found\n", + t->u.user.name); + ret = PTR_ERR(target); + goto release_matches; + } + t->u.kernel.target = target; + + off += xt_compat_target_offset(target); + *size += off; + ret = xt_compat_add_offset(AF_INET, entry_offset, off); + if (ret) + goto out; + + /* Check hooks & underflows */ + for (h = 0; h < NF_INET_NUMHOOKS; h++) { + if ((unsigned char *)e - base == hook_entries[h]) + newinfo->hook_entry[h] = hook_entries[h]; + if ((unsigned char *)e - base == underflows[h]) + newinfo->underflow[h] = underflows[h]; + } + + /* Clear counters and comefrom */ + memset(&e->counters, 0, sizeof(e->counters)); + e->comefrom = 0; + return 0; + +out: + module_put(t->u.kernel.target->me); +release_matches: + xt_ematch_foreach(ematch, e) { + if (j-- == 0) + break; + module_put(ematch->u.kernel.match->me); + } + return ret; +} + +static int +compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, + unsigned int *size, const char *name, + struct xt_table_info *newinfo, unsigned char *base) +{ + struct xt_entry_target *t; + struct xt_target *target; + struct ipt_entry *de; + unsigned int origsize; + int ret, h; + struct xt_entry_match *ematch; + + ret = 0; + origsize = *size; + de = (struct ipt_entry *)*dstptr; + memcpy(de, e, sizeof(struct ipt_entry)); + memcpy(&de->counters, &e->counters, sizeof(e->counters)); + + *dstptr += sizeof(struct ipt_entry); + *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); + + xt_ematch_foreach(ematch, e) { + ret = xt_compat_match_from_user(ematch, dstptr, size); + if (ret != 0) + return ret; + } + de->target_offset = e->target_offset - (origsize - *size); + t = compat_ipt_get_target(e); + target = t->u.kernel.target; + xt_compat_target_from_user(t, dstptr, size); + + de->next_offset = e->next_offset - (origsize - *size); + for (h = 0; h < NF_INET_NUMHOOKS; h++) { + if ((unsigned char *)de - base < newinfo->hook_entry[h]) + newinfo->hook_entry[h] -= origsize - *size; + if ((unsigned char *)de - base < newinfo->underflow[h]) + newinfo->underflow[h] -= origsize - *size; + } + return ret; +} + +static int +compat_check_entry(struct ipt_entry *e, struct net *net, const char *name) +{ + struct xt_entry_match *ematch; + struct xt_mtchk_param mtpar; + unsigned int j; + int ret = 0; + + j = 0; + mtpar.net = net; + mtpar.table = name; + mtpar.entryinfo = &e->ip; + mtpar.hook_mask = e->comefrom; + mtpar.family = NFPROTO_IPV4; + xt_ematch_foreach(ematch, e) { + ret = check_match(ematch, &mtpar); + if (ret != 0) + goto cleanup_matches; + ++j; + } + + ret = check_target(e, net, name); + if (ret) + goto cleanup_matches; + return 0; + + cleanup_matches: + xt_ematch_foreach(ematch, e) { + if (j-- == 0) + break; + cleanup_match(ematch, net); + } + return ret; +} + +static int +translate_compat_table(struct net *net, + const char *name, + unsigned int valid_hooks, + struct xt_table_info **pinfo, + void **pentry0, + unsigned int total_size, + unsigned int number, + unsigned int *hook_entries, + unsigned int *underflows) +{ + unsigned int i, j; + struct xt_table_info *newinfo, *info; + void *pos, *entry0, *entry1; + struct compat_ipt_entry *iter0; + struct ipt_entry *iter1; + unsigned int size; + int ret; + + info = *pinfo; + entry0 = *pentry0; + size = total_size; + info->number = number; + + /* Init all hooks to impossible value. */ + for (i = 0; i < NF_INET_NUMHOOKS; i++) { + info->hook_entry[i] = 0xFFFFFFFF; + info->underflow[i] = 0xFFFFFFFF; + } + + duprintf("translate_compat_table: size %u\n", info->size); + j = 0; + xt_compat_lock(AF_INET); + xt_compat_init_offsets(AF_INET, number); + /* Walk through entries, checking offsets. */ + xt_entry_foreach(iter0, entry0, total_size) { + ret = check_compat_entry_size_and_hooks(iter0, info, &size, + entry0, + entry0 + total_size, + hook_entries, + underflows, + name); + if (ret != 0) + goto out_unlock; + ++j; + } + + ret = -EINVAL; + if (j != number) { + duprintf("translate_compat_table: %u not %u entries\n", + j, number); + goto out_unlock; + } + + /* Check hooks all assigned */ + for (i = 0; i < NF_INET_NUMHOOKS; i++) { + /* Only hooks which are valid */ + if (!(valid_hooks & (1 << i))) + continue; + if (info->hook_entry[i] == 0xFFFFFFFF) { + duprintf("Invalid hook entry %u %u\n", + i, hook_entries[i]); + goto out_unlock; + } + if (info->underflow[i] == 0xFFFFFFFF) { + duprintf("Invalid underflow %u %u\n", + i, underflows[i]); + goto out_unlock; + } + } + + ret = -ENOMEM; + newinfo = xt_alloc_table_info(size); + if (!newinfo) + goto out_unlock; + + newinfo->number = number; + for (i = 0; i < NF_INET_NUMHOOKS; i++) { + newinfo->hook_entry[i] = info->hook_entry[i]; + newinfo->underflow[i] = info->underflow[i]; + } + entry1 = newinfo->entries[raw_smp_processor_id()]; + pos = entry1; + size = total_size; + xt_entry_foreach(iter0, entry0, total_size) { + ret = compat_copy_entry_from_user(iter0, &pos, &size, + name, newinfo, entry1); + if (ret != 0) + break; + } + xt_compat_flush_offsets(AF_INET); + xt_compat_unlock(AF_INET); + if (ret) + goto free_newinfo; + + ret = -ELOOP; + if (!mark_source_chains(newinfo, valid_hooks, entry1)) + goto free_newinfo; + + i = 0; + xt_entry_foreach(iter1, entry1, newinfo->size) { + ret = compat_check_entry(iter1, net, name); + if (ret != 0) + break; + ++i; + if (strcmp(ipt_get_target(iter1)->u.user.name, + XT_ERROR_TARGET) == 0) + ++newinfo->stacksize; + } + if (ret) { + /* + * The first i matches need cleanup_entry (calls ->destroy) + * because they had called ->check already. The other j-i + * entries need only release. + */ + int skip = i; + j -= i; + xt_entry_foreach(iter0, entry0, newinfo->size) { + if (skip-- > 0) + continue; + if (j-- == 0) + break; + compat_release_entry(iter0); + } + xt_entry_foreach(iter1, entry1, newinfo->size) { + if (i-- == 0) + break; + cleanup_entry(iter1, net); + } + xt_free_table_info(newinfo); + return ret; + } + + /* And one copy for every other CPU */ + for_each_possible_cpu(i) + if (newinfo->entries[i] && newinfo->entries[i] != entry1) + memcpy(newinfo->entries[i], entry1, newinfo->size); + + *pinfo = newinfo; + *pentry0 = entry1; + xt_free_table_info(info); + return 0; + +free_newinfo: + xt_free_table_info(newinfo); +out: + xt_entry_foreach(iter0, entry0, total_size) { + if (j-- == 0) + break; + compat_release_entry(iter0); + } + return ret; +out_unlock: + xt_compat_flush_offsets(AF_INET); + xt_compat_unlock(AF_INET); + goto out; +} + +static int +compat_do_replace(struct net *net, void __user *user, unsigned int len) +{ + int ret; + struct compat_ipt_replace tmp; + struct xt_table_info *newinfo; + void *loc_cpu_entry; + struct ipt_entry *iter; + + if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) + return -EFAULT; + + /* overflow check */ + if (tmp.size >= INT_MAX / num_possible_cpus()) + return -ENOMEM; + if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) + return -ENOMEM; + if (tmp.num_counters == 0) + return -EINVAL; + + tmp.name[sizeof(tmp.name)-1] = 0; + + newinfo = xt_alloc_table_info(tmp.size); + if (!newinfo) + return -ENOMEM; + + /* choose the copy that is on our node/cpu */ + loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), + tmp.size) != 0) { + ret = -EFAULT; + goto free_newinfo; + } + + ret = translate_compat_table(net, tmp.name, tmp.valid_hooks, + &newinfo, &loc_cpu_entry, tmp.size, + tmp.num_entries, tmp.hook_entry, + tmp.underflow); + if (ret != 0) + goto free_newinfo; + + duprintf("compat_do_replace: Translated table\n"); + + ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, + tmp.num_counters, compat_ptr(tmp.counters)); + if (ret) + goto free_newinfo_untrans; + return 0; + + free_newinfo_untrans: + xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) + cleanup_entry(iter, net); + free_newinfo: + xt_free_table_info(newinfo); + return ret; +} + +static int +compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, + unsigned int len) +{ + int ret; + + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + switch (cmd) { + case IPT_SO_SET_REPLACE: + ret = compat_do_replace(sock_net(sk), user, len); + break; + + case IPT_SO_SET_ADD_COUNTERS: + ret = do_add_counters(sock_net(sk), user, len, 1); + break; + + default: + duprintf("do_ipt_set_ctl: unknown request %i\n", cmd); + ret = -EINVAL; + } + + return ret; +} + +struct compat_ipt_get_entries { + char name[XT_TABLE_MAXNAMELEN]; + compat_uint_t size; + struct compat_ipt_entry entrytable[0]; +}; + +static int +compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, + void __user *userptr) +{ + struct xt_counters *counters; + const struct xt_table_info *private = table->private; + void __user *pos; + unsigned int size; + int ret = 0; + const void *loc_cpu_entry; + unsigned int i = 0; + struct ipt_entry *iter; + + counters = alloc_counters(table); + if (IS_ERR(counters)) + return PTR_ERR(counters); + + /* choose the copy that is on our node/cpu, ... + * This choice is lazy (because current thread is + * allowed to migrate to another cpu) + */ + loc_cpu_entry = private->entries[raw_smp_processor_id()]; + pos = userptr; + size = total_size; + xt_entry_foreach(iter, loc_cpu_entry, total_size) { + ret = compat_copy_entry_to_user(iter, &pos, + &size, counters, i++); + if (ret != 0) + break; + } + + vfree(counters); + return ret; +} + +static int +compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, + int *len) +{ + int ret; + struct compat_ipt_get_entries get; + struct xt_table *t; + + if (*len < sizeof(get)) { + duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get)); + return -EINVAL; + } + + if (copy_from_user(&get, uptr, sizeof(get)) != 0) + return -EFAULT; + + if (*len != sizeof(struct compat_ipt_get_entries) + get.size) { + duprintf("compat_get_entries: %u != %zu\n", + *len, sizeof(get) + get.size); + return -EINVAL; + } + + xt_compat_lock(AF_INET); + t = xt_find_table_lock(net, AF_INET, get.name); + if (!IS_ERR_OR_NULL(t)) { + const struct xt_table_info *private = t->private; + struct xt_table_info info; + duprintf("t->private->number = %u\n", private->number); + ret = compat_table_info(private, &info); + if (!ret && get.size == info.size) { + ret = compat_copy_entries_to_user(private->size, + t, uptr->entrytable); + } else if (!ret) { + duprintf("compat_get_entries: I've got %u not %u!\n", + private->size, get.size); + ret = -EAGAIN; + } + xt_compat_flush_offsets(AF_INET); + module_put(t->me); + xt_table_unlock(t); + } else + ret = t ? PTR_ERR(t) : -ENOENT; + + xt_compat_unlock(AF_INET); + return ret; +} + +static int do_ipt_get_ctl(struct sock *, int, void __user *, int *); + +static int +compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) +{ + int ret; + + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + switch (cmd) { + case IPT_SO_GET_INFO: + ret = get_info(sock_net(sk), user, len, 1); + break; + case IPT_SO_GET_ENTRIES: + ret = compat_get_entries(sock_net(sk), user, len); + break; + default: + ret = do_ipt_get_ctl(sk, cmd, user, len); + } + return ret; +} +#endif + +static int +do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) +{ + int ret; + + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + switch (cmd) { + case IPT_SO_SET_REPLACE: + ret = do_replace(sock_net(sk), user, len); + break; + + case IPT_SO_SET_ADD_COUNTERS: + ret = do_add_counters(sock_net(sk), user, len, 0); + break; + + default: + duprintf("do_ipt_set_ctl: unknown request %i\n", cmd); + ret = -EINVAL; + } + + return ret; +} + +static int +do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) +{ + int ret; + + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + switch (cmd) { + case IPT_SO_GET_INFO: + ret = get_info(sock_net(sk), user, len, 0); + break; + + case IPT_SO_GET_ENTRIES: + ret = get_entries(sock_net(sk), user, len); + break; + + case IPT_SO_GET_REVISION_MATCH: + case IPT_SO_GET_REVISION_TARGET: { + struct xt_get_revision rev; + int target; + + if (*len != sizeof(rev)) { + ret = -EINVAL; + break; + } + if (copy_from_user(&rev, user, sizeof(rev)) != 0) { + ret = -EFAULT; + break; + } + rev.name[sizeof(rev.name)-1] = 0; + + if (cmd == IPT_SO_GET_REVISION_TARGET) + target = 1; + else + target = 0; + + try_then_request_module(xt_find_revision(AF_INET, rev.name, + rev.revision, + target, &ret), + "ipt_%s", rev.name); + break; + } + + default: + duprintf("do_ipt_get_ctl: unknown request %i\n", cmd); + ret = -EINVAL; + } + + return ret; +} + +struct xt_table *ipt_register_table(struct net *net, + const struct xt_table *table, + const struct ipt_replace *repl) +{ + int ret; + struct xt_table_info *newinfo; + struct xt_table_info bootstrap = {0}; + void *loc_cpu_entry; + struct xt_table *new_table; + + newinfo = xt_alloc_table_info(repl->size); + if (!newinfo) { + ret = -ENOMEM; + goto out; + } + + /* choose the copy on our node/cpu, but dont care about preemption */ + loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + memcpy(loc_cpu_entry, repl->entries, repl->size); + + ret = translate_table(net, newinfo, loc_cpu_entry, repl); + if (ret != 0) + goto out_free; + + new_table = xt_register_table(net, table, &bootstrap, newinfo); + if (IS_ERR(new_table)) { + ret = PTR_ERR(new_table); + goto out_free; + } + + return new_table; + +out_free: + xt_free_table_info(newinfo); +out: + return ERR_PTR(ret); +} + +void ipt_unregister_table(struct net *net, struct xt_table *table) +{ + struct xt_table_info *private; + void *loc_cpu_entry; + struct module *table_owner = table->me; + struct ipt_entry *iter; + + private = xt_unregister_table(table); + + /* Decrease module usage counts and free resources */ + loc_cpu_entry = private->entries[raw_smp_processor_id()]; + xt_entry_foreach(iter, loc_cpu_entry, private->size) + cleanup_entry(iter, net); + if (private->number > private->initial_entries) + module_put(table_owner); + xt_free_table_info(private); +} + +/* Returns 1 if the type and code is matched by the range, 0 otherwise */ +static inline bool +icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code, + u_int8_t type, u_int8_t code, + bool invert) +{ + return ((test_type == 0xFF) || + (type == test_type && code >= min_code && code <= max_code)) + ^ invert; +} + +static bool +icmp_match(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct icmphdr *ic; + struct icmphdr _icmph; + const struct ipt_icmp *icmpinfo = par->matchinfo; + + /* Must not be a fragment. */ + if (par->fragoff != 0) + return false; + + ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph); + if (ic == NULL) { + /* We've been asked to examine this packet, and we + * can't. Hence, no choice but to drop. + */ + duprintf("Dropping evil ICMP tinygram.\n"); + par->hotdrop = true; + return false; + } + + return icmp_type_code_match(icmpinfo->type, + icmpinfo->code[0], + icmpinfo->code[1], + ic->type, ic->code, + !!(icmpinfo->invflags&IPT_ICMP_INV)); +} + +static int icmp_checkentry(const struct xt_mtchk_param *par) +{ + const struct ipt_icmp *icmpinfo = par->matchinfo; + + /* Must specify no unknown invflags */ + return (icmpinfo->invflags & ~IPT_ICMP_INV) ? -EINVAL : 0; +} + +static struct xt_target ipt_builtin_tg[] __read_mostly = { + { + .name = XT_STANDARD_TARGET, + .targetsize = sizeof(int), + .family = NFPROTO_IPV4, +#ifdef CONFIG_COMPAT + .compatsize = sizeof(compat_int_t), + .compat_from_user = compat_standard_from_user, + .compat_to_user = compat_standard_to_user, +#endif + }, + { + .name = XT_ERROR_TARGET, + .target = ipt_error, + .targetsize = XT_FUNCTION_MAXNAMELEN, + .family = NFPROTO_IPV4, + }, +}; + +static struct nf_sockopt_ops ipt_sockopts = { + .pf = PF_INET, + .set_optmin = IPT_BASE_CTL, + .set_optmax = IPT_SO_SET_MAX+1, + .set = do_ipt_set_ctl, +#ifdef CONFIG_COMPAT + .compat_set = compat_do_ipt_set_ctl, +#endif + .get_optmin = IPT_BASE_CTL, + .get_optmax = IPT_SO_GET_MAX+1, + .get = do_ipt_get_ctl, +#ifdef CONFIG_COMPAT + .compat_get = compat_do_ipt_get_ctl, +#endif + .owner = THIS_MODULE, +}; + +static struct xt_match ipt_builtin_mt[] __read_mostly = { + { + .name = "icmp", + .match = icmp_match, + .matchsize = sizeof(struct ipt_icmp), + .checkentry = icmp_checkentry, + .proto = IPPROTO_ICMP, + .family = NFPROTO_IPV4, + }, +}; + +static int __net_init ip_tables_net_init(struct net *net) +{ + return xt_proto_init(net, NFPROTO_IPV4); +} + +static void __net_exit ip_tables_net_exit(struct net *net) +{ + xt_proto_fini(net, NFPROTO_IPV4); +} + +static struct pernet_operations ip_tables_net_ops = { + .init = ip_tables_net_init, + .exit = ip_tables_net_exit, +}; + +static int __init ip_tables_init(void) +{ + int ret; + + ret = register_pernet_subsys(&ip_tables_net_ops); + if (ret < 0) + goto err1; + + /* No one else will be downing sem now, so we won't sleep */ + ret = xt_register_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg)); + if (ret < 0) + goto err2; + ret = xt_register_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt)); + if (ret < 0) + goto err4; + + /* Register setsockopt */ + ret = nf_register_sockopt(&ipt_sockopts); + if (ret < 0) + goto err5; + + pr_info("(C) 2000-2006 Netfilter Core Team\n"); + return 0; + +err5: + xt_unregister_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt)); +err4: + xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg)); +err2: + unregister_pernet_subsys(&ip_tables_net_ops); +err1: + return ret; +} + +static void __exit ip_tables_fini(void) +{ + nf_unregister_sockopt(&ipt_sockopts); + + xt_unregister_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt)); + xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg)); + unregister_pernet_subsys(&ip_tables_net_ops); +} + +EXPORT_SYMBOL(ipt_register_table); +EXPORT_SYMBOL(ipt_unregister_table); +EXPORT_SYMBOL(ipt_do_table); +module_init(ip_tables_init); +module_exit(ip_tables_fini); diff --git a/kernel/net/ipv4/netfilter/ipt_CLUSTERIP.c b/kernel/net/ipv4/netfilter/ipt_CLUSTERIP.c new file mode 100644 index 000000000..771ab3d01 --- /dev/null +++ b/kernel/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -0,0 +1,794 @@ +/* Cluster IP hashmark target + * (C) 2003-2004 by Harald Welte + * based on ideas of Fabio Olive Leite + * + * Development of this code funded by SuSE Linux AG, http://www.suse.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CLUSTERIP_VERSION "0.8" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte "); +MODULE_DESCRIPTION("Xtables: CLUSTERIP target"); + +struct clusterip_config { + struct list_head list; /* list of all configs */ + atomic_t refcount; /* reference count */ + atomic_t entries; /* number of entries/rules + * referencing us */ + + __be32 clusterip; /* the IP address */ + u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ + struct net_device *dev; /* device */ + u_int16_t num_total_nodes; /* total number of nodes */ + unsigned long local_nodes; /* node number array */ + +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *pde; /* proc dir entry */ +#endif + enum clusterip_hashmode hash_mode; /* which hashing mode */ + u_int32_t hash_initval; /* hash initialization */ + struct rcu_head rcu; +}; + +#ifdef CONFIG_PROC_FS +static const struct file_operations clusterip_proc_fops; +#endif + +static int clusterip_net_id __read_mostly; + +struct clusterip_net { + struct list_head configs; + /* lock protects the configs list */ + spinlock_t lock; + +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *procdir; +#endif +}; + +static inline void +clusterip_config_get(struct clusterip_config *c) +{ + atomic_inc(&c->refcount); +} + + +static void clusterip_config_rcu_free(struct rcu_head *head) +{ + kfree(container_of(head, struct clusterip_config, rcu)); +} + +static inline void +clusterip_config_put(struct clusterip_config *c) +{ + if (atomic_dec_and_test(&c->refcount)) + call_rcu_bh(&c->rcu, clusterip_config_rcu_free); +} + +/* decrease the count of entries using/referencing this config. If last + * entry(rule) is removed, remove the config from lists, but don't free it + * yet, since proc-files could still be holding references */ +static inline void +clusterip_config_entry_put(struct clusterip_config *c) +{ + struct net *net = dev_net(c->dev); + struct clusterip_net *cn = net_generic(net, clusterip_net_id); + + local_bh_disable(); + if (atomic_dec_and_lock(&c->entries, &cn->lock)) { + list_del_rcu(&c->list); + spin_unlock(&cn->lock); + local_bh_enable(); + + dev_mc_del(c->dev, c->clustermac); + dev_put(c->dev); + + /* In case anyone still accesses the file, the open/close + * functions are also incrementing the refcount on their own, + * so it's safe to remove the entry even if it's in use. */ +#ifdef CONFIG_PROC_FS + proc_remove(c->pde); +#endif + return; + } + local_bh_enable(); +} + +static struct clusterip_config * +__clusterip_config_find(struct net *net, __be32 clusterip) +{ + struct clusterip_config *c; + struct clusterip_net *cn = net_generic(net, clusterip_net_id); + + list_for_each_entry_rcu(c, &cn->configs, list) { + if (c->clusterip == clusterip) + return c; + } + + return NULL; +} + +static inline struct clusterip_config * +clusterip_config_find_get(struct net *net, __be32 clusterip, int entry) +{ + struct clusterip_config *c; + + rcu_read_lock_bh(); + c = __clusterip_config_find(net, clusterip); + if (c) { + if (unlikely(!atomic_inc_not_zero(&c->refcount))) + c = NULL; + else if (entry) + atomic_inc(&c->entries); + } + rcu_read_unlock_bh(); + + return c; +} + +static void +clusterip_config_init_nodelist(struct clusterip_config *c, + const struct ipt_clusterip_tgt_info *i) +{ + int n; + + for (n = 0; n < i->num_local_nodes; n++) + set_bit(i->local_nodes[n] - 1, &c->local_nodes); +} + +static struct clusterip_config * +clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip, + struct net_device *dev) +{ + struct clusterip_config *c; + struct clusterip_net *cn = net_generic(dev_net(dev), clusterip_net_id); + + c = kzalloc(sizeof(*c), GFP_ATOMIC); + if (!c) + return NULL; + + c->dev = dev; + c->clusterip = ip; + memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); + c->num_total_nodes = i->num_total_nodes; + clusterip_config_init_nodelist(c, i); + c->hash_mode = i->hash_mode; + c->hash_initval = i->hash_initval; + atomic_set(&c->refcount, 1); + atomic_set(&c->entries, 1); + +#ifdef CONFIG_PROC_FS + { + char buffer[16]; + + /* create proc dir entry */ + sprintf(buffer, "%pI4", &ip); + c->pde = proc_create_data(buffer, S_IWUSR|S_IRUSR, + cn->procdir, + &clusterip_proc_fops, c); + if (!c->pde) { + kfree(c); + return NULL; + } + } +#endif + + spin_lock_bh(&cn->lock); + list_add_rcu(&c->list, &cn->configs); + spin_unlock_bh(&cn->lock); + + return c; +} + +#ifdef CONFIG_PROC_FS +static int +clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum) +{ + + if (nodenum == 0 || + nodenum > c->num_total_nodes) + return 1; + + /* check if we already have this number in our bitfield */ + if (test_and_set_bit(nodenum - 1, &c->local_nodes)) + return 1; + + return 0; +} + +static bool +clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) +{ + if (nodenum == 0 || + nodenum > c->num_total_nodes) + return true; + + if (test_and_clear_bit(nodenum - 1, &c->local_nodes)) + return false; + + return true; +} +#endif + +static inline u_int32_t +clusterip_hashfn(const struct sk_buff *skb, + const struct clusterip_config *config) +{ + const struct iphdr *iph = ip_hdr(skb); + unsigned long hashval; + u_int16_t sport = 0, dport = 0; + int poff; + + poff = proto_ports_offset(iph->protocol); + if (poff >= 0) { + const u_int16_t *ports; + u16 _ports[2]; + + ports = skb_header_pointer(skb, iph->ihl * 4 + poff, 4, _ports); + if (ports) { + sport = ports[0]; + dport = ports[1]; + } + } else { + net_info_ratelimited("unknown protocol %u\n", iph->protocol); + } + + switch (config->hash_mode) { + case CLUSTERIP_HASHMODE_SIP: + hashval = jhash_1word(ntohl(iph->saddr), + config->hash_initval); + break; + case CLUSTERIP_HASHMODE_SIP_SPT: + hashval = jhash_2words(ntohl(iph->saddr), sport, + config->hash_initval); + break; + case CLUSTERIP_HASHMODE_SIP_SPT_DPT: + hashval = jhash_3words(ntohl(iph->saddr), sport, dport, + config->hash_initval); + break; + default: + /* to make gcc happy */ + hashval = 0; + /* This cannot happen, unless the check function wasn't called + * at rule load time */ + pr_info("unknown mode %u\n", config->hash_mode); + BUG(); + break; + } + + /* node numbers are 1..n, not 0..n */ + return reciprocal_scale(hashval, config->num_total_nodes) + 1; +} + +static inline int +clusterip_responsible(const struct clusterip_config *config, u_int32_t hash) +{ + return test_bit(hash - 1, &config->local_nodes); +} + +/*********************************************************************** + * IPTABLES TARGET + ***********************************************************************/ + +static unsigned int +clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + u_int32_t hash; + + /* don't need to clusterip_config_get() here, since refcount + * is only decremented by destroy() - and ip_tables guarantees + * that the ->target() function isn't called after ->destroy() */ + + ct = nf_ct_get(skb, &ctinfo); + if (ct == NULL) + return NF_DROP; + + /* special case: ICMP error handling. conntrack distinguishes between + * error messages (RELATED) and information requests (see below) */ + if (ip_hdr(skb)->protocol == IPPROTO_ICMP && + (ctinfo == IP_CT_RELATED || + ctinfo == IP_CT_RELATED_REPLY)) + return XT_CONTINUE; + + /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO, + * TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here + * on, which all have an ID field [relevant for hashing]. */ + + hash = clusterip_hashfn(skb, cipinfo->config); + + switch (ctinfo) { + case IP_CT_NEW: + ct->mark = hash; + break; + case IP_CT_RELATED: + case IP_CT_RELATED_REPLY: + /* FIXME: we don't handle expectations at the moment. + * They can arrive on a different node than + * the master connection (e.g. FTP passive mode) */ + case IP_CT_ESTABLISHED: + case IP_CT_ESTABLISHED_REPLY: + break; + default: /* Prevent gcc warnings */ + break; + } + +#ifdef DEBUG + nf_ct_dump_tuple_ip(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); +#endif + pr_debug("hash=%u ct_hash=%u ", hash, ct->mark); + if (!clusterip_responsible(cipinfo->config, hash)) { + pr_debug("not responsible\n"); + return NF_DROP; + } + pr_debug("responsible\n"); + + /* despite being received via linklayer multicast, this is + * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */ + skb->pkt_type = PACKET_HOST; + + return XT_CONTINUE; +} + +static int clusterip_tg_check(const struct xt_tgchk_param *par) +{ + struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; + const struct ipt_entry *e = par->entryinfo; + struct clusterip_config *config; + int ret; + + if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP && + cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT && + cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) { + pr_info("unknown mode %u\n", cipinfo->hash_mode); + return -EINVAL; + + } + if (e->ip.dmsk.s_addr != htonl(0xffffffff) || + e->ip.dst.s_addr == 0) { + pr_info("Please specify destination IP\n"); + return -EINVAL; + } + + /* FIXME: further sanity checks */ + + config = clusterip_config_find_get(par->net, e->ip.dst.s_addr, 1); + if (!config) { + if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { + pr_info("no config found for %pI4, need 'new'\n", + &e->ip.dst.s_addr); + return -EINVAL; + } else { + struct net_device *dev; + + if (e->ip.iniface[0] == '\0') { + pr_info("Please specify an interface name\n"); + return -EINVAL; + } + + dev = dev_get_by_name(par->net, e->ip.iniface); + if (!dev) { + pr_info("no such interface %s\n", + e->ip.iniface); + return -ENOENT; + } + + config = clusterip_config_init(cipinfo, + e->ip.dst.s_addr, dev); + if (!config) { + dev_put(dev); + return -ENOMEM; + } + dev_mc_add(config->dev, config->clustermac); + } + } + cipinfo->config = config; + + ret = nf_ct_l3proto_try_module_get(par->family); + if (ret < 0) + pr_info("cannot load conntrack support for proto=%u\n", + par->family); + + if (!par->net->xt.clusterip_deprecated_warning) { + pr_info("ipt_CLUSTERIP is deprecated and it will removed soon, " + "use xt_cluster instead\n"); + par->net->xt.clusterip_deprecated_warning = true; + } + + return ret; +} + +/* drop reference count of cluster config when rule is deleted */ +static void clusterip_tg_destroy(const struct xt_tgdtor_param *par) +{ + const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; + + /* if no more entries are referencing the config, remove it + * from the list and destroy the proc entry */ + clusterip_config_entry_put(cipinfo->config); + + clusterip_config_put(cipinfo->config); + + nf_ct_l3proto_module_put(par->family); +} + +#ifdef CONFIG_COMPAT +struct compat_ipt_clusterip_tgt_info +{ + u_int32_t flags; + u_int8_t clustermac[6]; + u_int16_t num_total_nodes; + u_int16_t num_local_nodes; + u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; + u_int32_t hash_mode; + u_int32_t hash_initval; + compat_uptr_t config; +}; +#endif /* CONFIG_COMPAT */ + +static struct xt_target clusterip_tg_reg __read_mostly = { + .name = "CLUSTERIP", + .family = NFPROTO_IPV4, + .target = clusterip_tg, + .checkentry = clusterip_tg_check, + .destroy = clusterip_tg_destroy, + .targetsize = sizeof(struct ipt_clusterip_tgt_info), +#ifdef CONFIG_COMPAT + .compatsize = sizeof(struct compat_ipt_clusterip_tgt_info), +#endif /* CONFIG_COMPAT */ + .me = THIS_MODULE +}; + + +/*********************************************************************** + * ARP MANGLING CODE + ***********************************************************************/ + +/* hardcoded for 48bit ethernet and 32bit ipv4 addresses */ +struct arp_payload { + u_int8_t src_hw[ETH_ALEN]; + __be32 src_ip; + u_int8_t dst_hw[ETH_ALEN]; + __be32 dst_ip; +} __packed; + +#ifdef DEBUG +static void arp_print(struct arp_payload *payload) +{ +#define HBUFFERLEN 30 + char hbuffer[HBUFFERLEN]; + int j,k; + + for (k=0, j=0; k < HBUFFERLEN-3 && j < ETH_ALEN; j++) { + hbuffer[k++] = hex_asc_hi(payload->src_hw[j]); + hbuffer[k++] = hex_asc_lo(payload->src_hw[j]); + hbuffer[k++]=':'; + } + hbuffer[--k]='\0'; + + pr_debug("src %pI4@%s, dst %pI4\n", + &payload->src_ip, hbuffer, &payload->dst_ip); +} +#endif + +static unsigned int +arp_mangle(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct arphdr *arp = arp_hdr(skb); + struct arp_payload *payload; + struct clusterip_config *c; + struct net *net = dev_net(state->in ? state->in : state->out); + + /* we don't care about non-ethernet and non-ipv4 ARP */ + if (arp->ar_hrd != htons(ARPHRD_ETHER) || + arp->ar_pro != htons(ETH_P_IP) || + arp->ar_pln != 4 || arp->ar_hln != ETH_ALEN) + return NF_ACCEPT; + + /* we only want to mangle arp requests and replies */ + if (arp->ar_op != htons(ARPOP_REPLY) && + arp->ar_op != htons(ARPOP_REQUEST)) + return NF_ACCEPT; + + payload = (void *)(arp+1); + + /* if there is no clusterip configuration for the arp reply's + * source ip, we don't want to mangle it */ + c = clusterip_config_find_get(net, payload->src_ip, 0); + if (!c) + return NF_ACCEPT; + + /* normally the linux kernel always replies to arp queries of + * addresses on different interfacs. However, in the CLUSTERIP case + * this wouldn't work, since we didn't subscribe the mcast group on + * other interfaces */ + if (c->dev != state->out) { + pr_debug("not mangling arp reply on different " + "interface: cip'%s'-skb'%s'\n", + c->dev->name, state->out->name); + clusterip_config_put(c); + return NF_ACCEPT; + } + + /* mangle reply hardware address */ + memcpy(payload->src_hw, c->clustermac, arp->ar_hln); + +#ifdef DEBUG + pr_debug("mangled arp reply: "); + arp_print(payload); +#endif + + clusterip_config_put(c); + + return NF_ACCEPT; +} + +static struct nf_hook_ops cip_arp_ops __read_mostly = { + .hook = arp_mangle, + .pf = NFPROTO_ARP, + .hooknum = NF_ARP_OUT, + .priority = -1 +}; + +/*********************************************************************** + * PROC DIR HANDLING + ***********************************************************************/ + +#ifdef CONFIG_PROC_FS + +struct clusterip_seq_position { + unsigned int pos; /* position */ + unsigned int weight; /* number of bits set == size */ + unsigned int bit; /* current bit */ + unsigned long val; /* current value */ +}; + +static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) +{ + struct clusterip_config *c = s->private; + unsigned int weight; + u_int32_t local_nodes; + struct clusterip_seq_position *idx; + + /* FIXME: possible race */ + local_nodes = c->local_nodes; + weight = hweight32(local_nodes); + if (*pos >= weight) + return NULL; + + idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL); + if (!idx) + return ERR_PTR(-ENOMEM); + + idx->pos = *pos; + idx->weight = weight; + idx->bit = ffs(local_nodes); + idx->val = local_nodes; + clear_bit(idx->bit - 1, &idx->val); + + return idx; +} + +static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct clusterip_seq_position *idx = v; + + *pos = ++idx->pos; + if (*pos >= idx->weight) { + kfree(v); + return NULL; + } + idx->bit = ffs(idx->val); + clear_bit(idx->bit - 1, &idx->val); + return idx; +} + +static void clusterip_seq_stop(struct seq_file *s, void *v) +{ + if (!IS_ERR(v)) + kfree(v); +} + +static int clusterip_seq_show(struct seq_file *s, void *v) +{ + struct clusterip_seq_position *idx = v; + + if (idx->pos != 0) + seq_putc(s, ','); + + seq_printf(s, "%u", idx->bit); + + if (idx->pos == idx->weight - 1) + seq_putc(s, '\n'); + + return 0; +} + +static const struct seq_operations clusterip_seq_ops = { + .start = clusterip_seq_start, + .next = clusterip_seq_next, + .stop = clusterip_seq_stop, + .show = clusterip_seq_show, +}; + +static int clusterip_proc_open(struct inode *inode, struct file *file) +{ + int ret = seq_open(file, &clusterip_seq_ops); + + if (!ret) { + struct seq_file *sf = file->private_data; + struct clusterip_config *c = PDE_DATA(inode); + + sf->private = c; + + clusterip_config_get(c); + } + + return ret; +} + +static int clusterip_proc_release(struct inode *inode, struct file *file) +{ + struct clusterip_config *c = PDE_DATA(inode); + int ret; + + ret = seq_release(inode, file); + + if (!ret) + clusterip_config_put(c); + + return ret; +} + +static ssize_t clusterip_proc_write(struct file *file, const char __user *input, + size_t size, loff_t *ofs) +{ + struct clusterip_config *c = PDE_DATA(file_inode(file)); +#define PROC_WRITELEN 10 + char buffer[PROC_WRITELEN+1]; + unsigned long nodenum; + int rc; + + if (size > PROC_WRITELEN) + return -EIO; + if (copy_from_user(buffer, input, size)) + return -EFAULT; + buffer[size] = 0; + + if (*buffer == '+') { + rc = kstrtoul(buffer+1, 10, &nodenum); + if (rc) + return rc; + if (clusterip_add_node(c, nodenum)) + return -ENOMEM; + } else if (*buffer == '-') { + rc = kstrtoul(buffer+1, 10, &nodenum); + if (rc) + return rc; + if (clusterip_del_node(c, nodenum)) + return -ENOENT; + } else + return -EIO; + + return size; +} + +static const struct file_operations clusterip_proc_fops = { + .owner = THIS_MODULE, + .open = clusterip_proc_open, + .read = seq_read, + .write = clusterip_proc_write, + .llseek = seq_lseek, + .release = clusterip_proc_release, +}; + +#endif /* CONFIG_PROC_FS */ + +static int clusterip_net_init(struct net *net) +{ + struct clusterip_net *cn = net_generic(net, clusterip_net_id); + + INIT_LIST_HEAD(&cn->configs); + + spin_lock_init(&cn->lock); + +#ifdef CONFIG_PROC_FS + cn->procdir = proc_mkdir("ipt_CLUSTERIP", net->proc_net); + if (!cn->procdir) { + pr_err("Unable to proc dir entry\n"); + return -ENOMEM; + } +#endif /* CONFIG_PROC_FS */ + + return 0; +} + +static void clusterip_net_exit(struct net *net) +{ +#ifdef CONFIG_PROC_FS + struct clusterip_net *cn = net_generic(net, clusterip_net_id); + proc_remove(cn->procdir); +#endif +} + +static struct pernet_operations clusterip_net_ops = { + .init = clusterip_net_init, + .exit = clusterip_net_exit, + .id = &clusterip_net_id, + .size = sizeof(struct clusterip_net), +}; + +static int __init clusterip_tg_init(void) +{ + int ret; + + ret = register_pernet_subsys(&clusterip_net_ops); + if (ret < 0) + return ret; + + ret = xt_register_target(&clusterip_tg_reg); + if (ret < 0) + goto cleanup_subsys; + + ret = nf_register_hook(&cip_arp_ops); + if (ret < 0) + goto cleanup_target; + + pr_info("ClusterIP Version %s loaded successfully\n", + CLUSTERIP_VERSION); + + return 0; + +cleanup_target: + xt_unregister_target(&clusterip_tg_reg); +cleanup_subsys: + unregister_pernet_subsys(&clusterip_net_ops); + return ret; +} + +static void __exit clusterip_tg_exit(void) +{ + pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION); + + nf_unregister_hook(&cip_arp_ops); + xt_unregister_target(&clusterip_tg_reg); + unregister_pernet_subsys(&clusterip_net_ops); + + /* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */ + rcu_barrier_bh(); +} + +module_init(clusterip_tg_init); +module_exit(clusterip_tg_exit); diff --git a/kernel/net/ipv4/netfilter/ipt_ECN.c b/kernel/net/ipv4/netfilter/ipt_ECN.c new file mode 100644 index 000000000..4bf3dc49a --- /dev/null +++ b/kernel/net/ipv4/netfilter/ipt_ECN.c @@ -0,0 +1,138 @@ +/* iptables module for the IPv4 and TCP ECN bits, Version 1.5 + * + * (C) 2002 by Harald Welte + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. +*/ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte "); +MODULE_DESCRIPTION("Xtables: Explicit Congestion Notification (ECN) flag modification"); + +/* set ECT codepoint from IP header. + * return false if there was an error. */ +static inline bool +set_ect_ip(struct sk_buff *skb, const struct ipt_ECN_info *einfo) +{ + struct iphdr *iph = ip_hdr(skb); + + if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) { + __u8 oldtos; + if (!skb_make_writable(skb, sizeof(struct iphdr))) + return false; + iph = ip_hdr(skb); + oldtos = iph->tos; + iph->tos &= ~IPT_ECN_IP_MASK; + iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK); + csum_replace2(&iph->check, htons(oldtos), htons(iph->tos)); + } + return true; +} + +/* Return false if there was an error. */ +static inline bool +set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo) +{ + struct tcphdr _tcph, *tcph; + __be16 oldval; + + /* Not enough header? */ + tcph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); + if (!tcph) + return false; + + if ((!(einfo->operation & IPT_ECN_OP_SET_ECE) || + tcph->ece == einfo->proto.tcp.ece) && + (!(einfo->operation & IPT_ECN_OP_SET_CWR) || + tcph->cwr == einfo->proto.tcp.cwr)) + return true; + + if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph))) + return false; + tcph = (void *)ip_hdr(skb) + ip_hdrlen(skb); + + oldval = ((__be16 *)tcph)[6]; + if (einfo->operation & IPT_ECN_OP_SET_ECE) + tcph->ece = einfo->proto.tcp.ece; + if (einfo->operation & IPT_ECN_OP_SET_CWR) + tcph->cwr = einfo->proto.tcp.cwr; + + inet_proto_csum_replace2(&tcph->check, skb, + oldval, ((__be16 *)tcph)[6], 0); + return true; +} + +static unsigned int +ecn_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct ipt_ECN_info *einfo = par->targinfo; + + if (einfo->operation & IPT_ECN_OP_SET_IP) + if (!set_ect_ip(skb, einfo)) + return NF_DROP; + + if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR) && + ip_hdr(skb)->protocol == IPPROTO_TCP) + if (!set_ect_tcp(skb, einfo)) + return NF_DROP; + + return XT_CONTINUE; +} + +static int ecn_tg_check(const struct xt_tgchk_param *par) +{ + const struct ipt_ECN_info *einfo = par->targinfo; + const struct ipt_entry *e = par->entryinfo; + + if (einfo->operation & IPT_ECN_OP_MASK) { + pr_info("unsupported ECN operation %x\n", einfo->operation); + return -EINVAL; + } + if (einfo->ip_ect & ~IPT_ECN_IP_MASK) { + pr_info("new ECT codepoint %x out of mask\n", einfo->ip_ect); + return -EINVAL; + } + if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) && + (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) { + pr_info("cannot use TCP operations on a non-tcp rule\n"); + return -EINVAL; + } + return 0; +} + +static struct xt_target ecn_tg_reg __read_mostly = { + .name = "ECN", + .family = NFPROTO_IPV4, + .target = ecn_tg, + .targetsize = sizeof(struct ipt_ECN_info), + .table = "mangle", + .checkentry = ecn_tg_check, + .me = THIS_MODULE, +}; + +static int __init ecn_tg_init(void) +{ + return xt_register_target(&ecn_tg_reg); +} + +static void __exit ecn_tg_exit(void) +{ + xt_unregister_target(&ecn_tg_reg); +} + +module_init(ecn_tg_init); +module_exit(ecn_tg_exit); diff --git a/kernel/net/ipv4/netfilter/ipt_MASQUERADE.c b/kernel/net/ipv4/netfilter/ipt_MASQUERADE.c new file mode 100644 index 000000000..da7f02a0b --- /dev/null +++ b/kernel/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -0,0 +1,91 @@ +/* Masquerade. Simple mapping which alters range to a local IP address + (depending on route). */ + +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Netfilter Core Team "); +MODULE_DESCRIPTION("Xtables: automatic-address SNAT"); + +/* FIXME: Multiple targets. --RR */ +static int masquerade_tg_check(const struct xt_tgchk_param *par) +{ + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; + + if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) { + pr_debug("bad MAP_IPS.\n"); + return -EINVAL; + } + if (mr->rangesize != 1) { + pr_debug("bad rangesize %u\n", mr->rangesize); + return -EINVAL; + } + return 0; +} + +static unsigned int +masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + struct nf_nat_range range; + const struct nf_nat_ipv4_multi_range_compat *mr; + + mr = par->targinfo; + range.flags = mr->range[0].flags; + range.min_proto = mr->range[0].min; + range.max_proto = mr->range[0].max; + + return nf_nat_masquerade_ipv4(skb, par->hooknum, &range, par->out); +} + +static struct xt_target masquerade_tg_reg __read_mostly = { + .name = "MASQUERADE", + .family = NFPROTO_IPV4, + .target = masquerade_tg, + .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), + .table = "nat", + .hooks = 1 << NF_INET_POST_ROUTING, + .checkentry = masquerade_tg_check, + .me = THIS_MODULE, +}; + +static int __init masquerade_tg_init(void) +{ + int ret; + + ret = xt_register_target(&masquerade_tg_reg); + + if (ret == 0) + nf_nat_masquerade_ipv4_register_notifier(); + + return ret; +} + +static void __exit masquerade_tg_exit(void) +{ + xt_unregister_target(&masquerade_tg_reg); + nf_nat_masquerade_ipv4_unregister_notifier(); +} + +module_init(masquerade_tg_init); +module_exit(masquerade_tg_exit); diff --git a/kernel/net/ipv4/netfilter/ipt_REJECT.c b/kernel/net/ipv4/netfilter/ipt_REJECT.c new file mode 100644 index 000000000..87907d4bd --- /dev/null +++ b/kernel/net/ipv4/netfilter/ipt_REJECT.c @@ -0,0 +1,113 @@ +/* + * This is a module which is used for rejecting packets. + */ + +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) +#include +#endif + +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Netfilter Core Team "); +MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv4"); + +static unsigned int +reject_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct ipt_reject_info *reject = par->targinfo; + int hook = par->hooknum; + + switch (reject->with) { + case IPT_ICMP_NET_UNREACHABLE: + nf_send_unreach(skb, ICMP_NET_UNREACH, hook); + break; + case IPT_ICMP_HOST_UNREACHABLE: + nf_send_unreach(skb, ICMP_HOST_UNREACH, hook); + break; + case IPT_ICMP_PROT_UNREACHABLE: + nf_send_unreach(skb, ICMP_PROT_UNREACH, hook); + break; + case IPT_ICMP_PORT_UNREACHABLE: + nf_send_unreach(skb, ICMP_PORT_UNREACH, hook); + break; + case IPT_ICMP_NET_PROHIBITED: + nf_send_unreach(skb, ICMP_NET_ANO, hook); + break; + case IPT_ICMP_HOST_PROHIBITED: + nf_send_unreach(skb, ICMP_HOST_ANO, hook); + break; + case IPT_ICMP_ADMIN_PROHIBITED: + nf_send_unreach(skb, ICMP_PKT_FILTERED, hook); + break; + case IPT_TCP_RESET: + nf_send_reset(skb, hook); + case IPT_ICMP_ECHOREPLY: + /* Doesn't happen. */ + break; + } + + return NF_DROP; +} + +static int reject_tg_check(const struct xt_tgchk_param *par) +{ + const struct ipt_reject_info *rejinfo = par->targinfo; + const struct ipt_entry *e = par->entryinfo; + + if (rejinfo->with == IPT_ICMP_ECHOREPLY) { + pr_info("ECHOREPLY no longer supported.\n"); + return -EINVAL; + } else if (rejinfo->with == IPT_TCP_RESET) { + /* Must specify that it's a TCP packet */ + if (e->ip.proto != IPPROTO_TCP || + (e->ip.invflags & XT_INV_PROTO)) { + pr_info("TCP_RESET invalid for non-tcp\n"); + return -EINVAL; + } + } + return 0; +} + +static struct xt_target reject_tg_reg __read_mostly = { + .name = "REJECT", + .family = NFPROTO_IPV4, + .target = reject_tg, + .targetsize = sizeof(struct ipt_reject_info), + .table = "filter", + .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT), + .checkentry = reject_tg_check, + .me = THIS_MODULE, +}; + +static int __init reject_tg_init(void) +{ + return xt_register_target(&reject_tg_reg); +} + +static void __exit reject_tg_exit(void) +{ + xt_unregister_target(&reject_tg_reg); +} + +module_init(reject_tg_init); +module_exit(reject_tg_exit); diff --git a/kernel/net/ipv4/netfilter/ipt_SYNPROXY.c b/kernel/net/ipv4/netfilter/ipt_SYNPROXY.c new file mode 100644 index 000000000..e9e677930 --- /dev/null +++ b/kernel/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -0,0 +1,480 @@ +/* + * Copyright (c) 2013 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +static struct iphdr * +synproxy_build_ip(struct sk_buff *skb, u32 saddr, u32 daddr) +{ + struct iphdr *iph; + + skb_reset_network_header(skb); + iph = (struct iphdr *)skb_put(skb, sizeof(*iph)); + iph->version = 4; + iph->ihl = sizeof(*iph) / 4; + iph->tos = 0; + iph->id = 0; + iph->frag_off = htons(IP_DF); + iph->ttl = sysctl_ip_default_ttl; + iph->protocol = IPPROTO_TCP; + iph->check = 0; + iph->saddr = saddr; + iph->daddr = daddr; + + return iph; +} + +static void +synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb, + struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo, + struct iphdr *niph, struct tcphdr *nth, + unsigned int tcp_hdr_size) +{ + nth->check = ~tcp_v4_check(tcp_hdr_size, niph->saddr, niph->daddr, 0); + nskb->ip_summed = CHECKSUM_PARTIAL; + nskb->csum_start = (unsigned char *)nth - nskb->head; + nskb->csum_offset = offsetof(struct tcphdr, check); + + skb_dst_set_noref(nskb, skb_dst(skb)); + nskb->protocol = htons(ETH_P_IP); + if (ip_route_me_harder(nskb, RTN_UNSPEC)) + goto free_nskb; + + if (nfct) { + nskb->nfct = nfct; + nskb->nfctinfo = ctinfo; + nf_conntrack_get(nfct); + } + + ip_local_out(nskb); + return; + +free_nskb: + kfree_skb(nskb); +} + +static void +synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th, + const struct synproxy_options *opts) +{ + struct sk_buff *nskb; + struct iphdr *iph, *niph; + struct tcphdr *nth; + unsigned int tcp_hdr_size; + u16 mss = opts->mss; + + iph = ip_hdr(skb); + + tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts); + nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER, + GFP_ATOMIC); + if (nskb == NULL) + return; + skb_reserve(nskb, MAX_TCP_HEADER); + + niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr); + + skb_reset_transport_header(nskb); + nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); + nth->source = th->dest; + nth->dest = th->source; + nth->seq = htonl(__cookie_v4_init_sequence(iph, th, &mss)); + nth->ack_seq = htonl(ntohl(th->seq) + 1); + tcp_flag_word(nth) = TCP_FLAG_SYN | TCP_FLAG_ACK; + if (opts->options & XT_SYNPROXY_OPT_ECN) + tcp_flag_word(nth) |= TCP_FLAG_ECE; + nth->doff = tcp_hdr_size / 4; + nth->window = 0; + nth->check = 0; + nth->urg_ptr = 0; + + synproxy_build_options(nth, opts); + + synproxy_send_tcp(skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, + niph, nth, tcp_hdr_size); +} + +static void +synproxy_send_server_syn(const struct synproxy_net *snet, + const struct sk_buff *skb, const struct tcphdr *th, + const struct synproxy_options *opts, u32 recv_seq) +{ + struct sk_buff *nskb; + struct iphdr *iph, *niph; + struct tcphdr *nth; + unsigned int tcp_hdr_size; + + iph = ip_hdr(skb); + + tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts); + nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER, + GFP_ATOMIC); + if (nskb == NULL) + return; + skb_reserve(nskb, MAX_TCP_HEADER); + + niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr); + + skb_reset_transport_header(nskb); + nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); + nth->source = th->source; + nth->dest = th->dest; + nth->seq = htonl(recv_seq - 1); + /* ack_seq is used to relay our ISN to the synproxy hook to initialize + * sequence number translation once a connection tracking entry exists. + */ + nth->ack_seq = htonl(ntohl(th->ack_seq) - 1); + tcp_flag_word(nth) = TCP_FLAG_SYN; + if (opts->options & XT_SYNPROXY_OPT_ECN) + tcp_flag_word(nth) |= TCP_FLAG_ECE | TCP_FLAG_CWR; + nth->doff = tcp_hdr_size / 4; + nth->window = th->window; + nth->check = 0; + nth->urg_ptr = 0; + + synproxy_build_options(nth, opts); + + synproxy_send_tcp(skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW, + niph, nth, tcp_hdr_size); +} + +static void +synproxy_send_server_ack(const struct synproxy_net *snet, + const struct ip_ct_tcp *state, + const struct sk_buff *skb, const struct tcphdr *th, + const struct synproxy_options *opts) +{ + struct sk_buff *nskb; + struct iphdr *iph, *niph; + struct tcphdr *nth; + unsigned int tcp_hdr_size; + + iph = ip_hdr(skb); + + tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts); + nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER, + GFP_ATOMIC); + if (nskb == NULL) + return; + skb_reserve(nskb, MAX_TCP_HEADER); + + niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr); + + skb_reset_transport_header(nskb); + nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); + nth->source = th->dest; + nth->dest = th->source; + nth->seq = htonl(ntohl(th->ack_seq)); + nth->ack_seq = htonl(ntohl(th->seq) + 1); + tcp_flag_word(nth) = TCP_FLAG_ACK; + nth->doff = tcp_hdr_size / 4; + nth->window = htons(state->seen[IP_CT_DIR_ORIGINAL].td_maxwin); + nth->check = 0; + nth->urg_ptr = 0; + + synproxy_build_options(nth, opts); + + synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); +} + +static void +synproxy_send_client_ack(const struct synproxy_net *snet, + const struct sk_buff *skb, const struct tcphdr *th, + const struct synproxy_options *opts) +{ + struct sk_buff *nskb; + struct iphdr *iph, *niph; + struct tcphdr *nth; + unsigned int tcp_hdr_size; + + iph = ip_hdr(skb); + + tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts); + nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER, + GFP_ATOMIC); + if (nskb == NULL) + return; + skb_reserve(nskb, MAX_TCP_HEADER); + + niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr); + + skb_reset_transport_header(nskb); + nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); + nth->source = th->source; + nth->dest = th->dest; + nth->seq = htonl(ntohl(th->seq) + 1); + nth->ack_seq = th->ack_seq; + tcp_flag_word(nth) = TCP_FLAG_ACK; + nth->doff = tcp_hdr_size / 4; + nth->window = ntohs(htons(th->window) >> opts->wscale); + nth->check = 0; + nth->urg_ptr = 0; + + synproxy_build_options(nth, opts); + + synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); +} + +static bool +synproxy_recv_client_ack(const struct synproxy_net *snet, + const struct sk_buff *skb, const struct tcphdr *th, + struct synproxy_options *opts, u32 recv_seq) +{ + int mss; + + mss = __cookie_v4_check(ip_hdr(skb), th, ntohl(th->ack_seq) - 1); + if (mss == 0) { + this_cpu_inc(snet->stats->cookie_invalid); + return false; + } + + this_cpu_inc(snet->stats->cookie_valid); + opts->mss = mss; + opts->options |= XT_SYNPROXY_OPT_MSS; + + if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP) + synproxy_check_timestamp_cookie(opts); + + synproxy_send_server_syn(snet, skb, th, opts, recv_seq); + return true; +} + +static unsigned int +synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_synproxy_info *info = par->targinfo; + struct synproxy_net *snet = synproxy_pernet(dev_net(par->in)); + struct synproxy_options opts = {}; + struct tcphdr *th, _th; + + if (nf_ip_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP)) + return NF_DROP; + + th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th); + if (th == NULL) + return NF_DROP; + + if (!synproxy_parse_options(skb, par->thoff, th, &opts)) + return NF_DROP; + + if (th->syn && !(th->ack || th->fin || th->rst)) { + /* Initial SYN from client */ + this_cpu_inc(snet->stats->syn_received); + + if (th->ece && th->cwr) + opts.options |= XT_SYNPROXY_OPT_ECN; + + opts.options &= info->options; + if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) + synproxy_init_timestamp_cookie(info, &opts); + else + opts.options &= ~(XT_SYNPROXY_OPT_WSCALE | + XT_SYNPROXY_OPT_SACK_PERM | + XT_SYNPROXY_OPT_ECN); + + synproxy_send_client_synack(skb, th, &opts); + return NF_DROP; + + } else if (th->ack && !(th->fin || th->rst || th->syn)) { + /* ACK from client */ + synproxy_recv_client_ack(snet, skb, th, &opts, ntohl(th->seq)); + return NF_DROP; + } + + return XT_CONTINUE; +} + +static unsigned int ipv4_synproxy_hook(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *nhs) +{ + struct synproxy_net *snet = synproxy_pernet(dev_net(nhs->in ? : nhs->out)); + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + struct nf_conn_synproxy *synproxy; + struct synproxy_options opts = {}; + const struct ip_ct_tcp *state; + struct tcphdr *th, _th; + unsigned int thoff; + + ct = nf_ct_get(skb, &ctinfo); + if (ct == NULL) + return NF_ACCEPT; + + synproxy = nfct_synproxy(ct); + if (synproxy == NULL) + return NF_ACCEPT; + + if (nf_is_loopback_packet(skb)) + return NF_ACCEPT; + + thoff = ip_hdrlen(skb); + th = skb_header_pointer(skb, thoff, sizeof(_th), &_th); + if (th == NULL) + return NF_DROP; + + state = &ct->proto.tcp; + switch (state->state) { + case TCP_CONNTRACK_CLOSE: + if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { + nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - + ntohl(th->seq) + 1); + break; + } + + if (!th->syn || th->ack || + CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) + break; + + /* Reopened connection - reset the sequence number and timestamp + * adjustments, they will get initialized once the connection is + * reestablished. + */ + nf_ct_seqadj_init(ct, ctinfo, 0); + synproxy->tsoff = 0; + this_cpu_inc(snet->stats->conn_reopened); + + /* fall through */ + case TCP_CONNTRACK_SYN_SENT: + if (!synproxy_parse_options(skb, thoff, th, &opts)) + return NF_DROP; + + if (!th->syn && th->ack && + CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { + /* Keep-Alives are sent with SEG.SEQ = SND.NXT-1, + * therefore we need to add 1 to make the SYN sequence + * number match the one of first SYN. + */ + if (synproxy_recv_client_ack(snet, skb, th, &opts, + ntohl(th->seq) + 1)) + this_cpu_inc(snet->stats->cookie_retrans); + + return NF_DROP; + } + + synproxy->isn = ntohl(th->ack_seq); + if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) + synproxy->its = opts.tsecr; + break; + case TCP_CONNTRACK_SYN_RECV: + if (!th->syn || !th->ack) + break; + + if (!synproxy_parse_options(skb, thoff, th, &opts)) + return NF_DROP; + + if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) + synproxy->tsoff = opts.tsval - synproxy->its; + + opts.options &= ~(XT_SYNPROXY_OPT_MSS | + XT_SYNPROXY_OPT_WSCALE | + XT_SYNPROXY_OPT_SACK_PERM); + + swap(opts.tsval, opts.tsecr); + synproxy_send_server_ack(snet, state, skb, th, &opts); + + nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq)); + + swap(opts.tsval, opts.tsecr); + synproxy_send_client_ack(snet, skb, th, &opts); + + consume_skb(skb); + return NF_STOLEN; + default: + break; + } + + synproxy_tstamp_adjust(skb, thoff, th, ct, ctinfo, synproxy); + return NF_ACCEPT; +} + +static int synproxy_tg4_check(const struct xt_tgchk_param *par) +{ + const struct ipt_entry *e = par->entryinfo; + + if (e->ip.proto != IPPROTO_TCP || + e->ip.invflags & XT_INV_PROTO) + return -EINVAL; + + return nf_ct_l3proto_try_module_get(par->family); +} + +static void synproxy_tg4_destroy(const struct xt_tgdtor_param *par) +{ + nf_ct_l3proto_module_put(par->family); +} + +static struct xt_target synproxy_tg4_reg __read_mostly = { + .name = "SYNPROXY", + .family = NFPROTO_IPV4, + .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD), + .target = synproxy_tg4, + .targetsize = sizeof(struct xt_synproxy_info), + .checkentry = synproxy_tg4_check, + .destroy = synproxy_tg4_destroy, + .me = THIS_MODULE, +}; + +static struct nf_hook_ops ipv4_synproxy_ops[] __read_mostly = { + { + .hook = ipv4_synproxy_hook, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, + }, + { + .hook = ipv4_synproxy_hook, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, + }, +}; + +static int __init synproxy_tg4_init(void) +{ + int err; + + err = nf_register_hooks(ipv4_synproxy_ops, + ARRAY_SIZE(ipv4_synproxy_ops)); + if (err < 0) + goto err1; + + err = xt_register_target(&synproxy_tg4_reg); + if (err < 0) + goto err2; + + return 0; + +err2: + nf_unregister_hooks(ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops)); +err1: + return err; +} + +static void __exit synproxy_tg4_exit(void) +{ + xt_unregister_target(&synproxy_tg4_reg); + nf_unregister_hooks(ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops)); +} + +module_init(synproxy_tg4_init); +module_exit(synproxy_tg4_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); diff --git a/kernel/net/ipv4/netfilter/ipt_ah.c b/kernel/net/ipv4/netfilter/ipt_ah.c new file mode 100644 index 000000000..14a2aa8b8 --- /dev/null +++ b/kernel/net/ipv4/netfilter/ipt_ah.c @@ -0,0 +1,91 @@ +/* Kernel module to match AH parameters. */ +/* (C) 1999-2000 Yon Uriarte + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include +#include +#include +#include + +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Yon Uriarte "); +MODULE_DESCRIPTION("Xtables: IPv4 IPsec-AH SPI match"); + +/* Returns 1 if the spi is matched by the range, 0 otherwise */ +static inline bool +spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) +{ + bool r; + pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n", + invert ? '!' : ' ', min, spi, max); + r=(spi >= min && spi <= max) ^ invert; + pr_debug(" result %s\n", r ? "PASS" : "FAILED"); + return r; +} + +static bool ah_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + struct ip_auth_hdr _ahdr; + const struct ip_auth_hdr *ah; + const struct ipt_ah *ahinfo = par->matchinfo; + + /* Must not be a fragment. */ + if (par->fragoff != 0) + return false; + + ah = skb_header_pointer(skb, par->thoff, sizeof(_ahdr), &_ahdr); + if (ah == NULL) { + /* We've been asked to examine this packet, and we + * can't. Hence, no choice but to drop. + */ + pr_debug("Dropping evil AH tinygram.\n"); + par->hotdrop = true; + return 0; + } + + return spi_match(ahinfo->spis[0], ahinfo->spis[1], + ntohl(ah->spi), + !!(ahinfo->invflags & IPT_AH_INV_SPI)); +} + +static int ah_mt_check(const struct xt_mtchk_param *par) +{ + const struct ipt_ah *ahinfo = par->matchinfo; + + /* Must specify no unknown invflags */ + if (ahinfo->invflags & ~IPT_AH_INV_MASK) { + pr_debug("unknown flags %X\n", ahinfo->invflags); + return -EINVAL; + } + return 0; +} + +static struct xt_match ah_mt_reg __read_mostly = { + .name = "ah", + .family = NFPROTO_IPV4, + .match = ah_mt, + .matchsize = sizeof(struct ipt_ah), + .proto = IPPROTO_AH, + .checkentry = ah_mt_check, + .me = THIS_MODULE, +}; + +static int __init ah_mt_init(void) +{ + return xt_register_match(&ah_mt_reg); +} + +static void __exit ah_mt_exit(void) +{ + xt_unregister_match(&ah_mt_reg); +} + +module_init(ah_mt_init); +module_exit(ah_mt_exit); diff --git a/kernel/net/ipv4/netfilter/ipt_rpfilter.c b/kernel/net/ipv4/netfilter/ipt_rpfilter.c new file mode 100644 index 000000000..4bfaedf9b --- /dev/null +++ b/kernel/net/ipv4/netfilter/ipt_rpfilter.c @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2011 Florian Westphal + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * based on fib_frontend.c; Author: Alexey Kuznetsov, + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal "); +MODULE_DESCRIPTION("iptables: ipv4 reverse path filter match"); + +/* don't try to find route from mcast/bcast/zeronet */ +static __be32 rpfilter_get_saddr(__be32 addr) +{ + if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) || + ipv4_is_zeronet(addr)) + return 0; + return addr; +} + +static bool rpfilter_lookup_reverse(struct flowi4 *fl4, + const struct net_device *dev, u8 flags) +{ + struct fib_result res; + bool dev_match; + struct net *net = dev_net(dev); + int ret __maybe_unused; + + if (fib_lookup(net, fl4, &res)) + return false; + + if (res.type != RTN_UNICAST) { + if (res.type != RTN_LOCAL || !(flags & XT_RPFILTER_ACCEPT_LOCAL)) + return false; + } + dev_match = false; +#ifdef CONFIG_IP_ROUTE_MULTIPATH + for (ret = 0; ret < res.fi->fib_nhs; ret++) { + struct fib_nh *nh = &res.fi->fib_nh[ret]; + + if (nh->nh_dev == dev) { + dev_match = true; + break; + } + } +#else + if (FIB_RES_DEV(res) == dev) + dev_match = true; +#endif + if (dev_match || flags & XT_RPFILTER_LOOSE) + return FIB_RES_NH(res).nh_scope <= RT_SCOPE_HOST; + return dev_match; +} + +static bool rpfilter_is_local(const struct sk_buff *skb) +{ + const struct rtable *rt = skb_rtable(skb); + return rt && (rt->rt_flags & RTCF_LOCAL); +} + +static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_rpfilter_info *info; + const struct iphdr *iph; + struct flowi4 flow; + bool invert; + + info = par->matchinfo; + invert = info->flags & XT_RPFILTER_INVERT; + + if (rpfilter_is_local(skb)) + return true ^ invert; + + iph = ip_hdr(skb); + if (ipv4_is_multicast(iph->daddr)) { + if (ipv4_is_zeronet(iph->saddr)) + return ipv4_is_local_multicast(iph->daddr) ^ invert; + } + flow.flowi4_iif = LOOPBACK_IFINDEX; + flow.daddr = iph->saddr; + flow.saddr = rpfilter_get_saddr(iph->daddr); + flow.flowi4_oif = 0; + flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0; + flow.flowi4_tos = RT_TOS(iph->tos); + flow.flowi4_scope = RT_SCOPE_UNIVERSE; + + return rpfilter_lookup_reverse(&flow, par->in, info->flags) ^ invert; +} + +static int rpfilter_check(const struct xt_mtchk_param *par) +{ + const struct xt_rpfilter_info *info = par->matchinfo; + unsigned int options = ~XT_RPFILTER_OPTION_MASK; + if (info->flags & options) { + pr_info("unknown options encountered"); + return -EINVAL; + } + + if (strcmp(par->table, "mangle") != 0 && + strcmp(par->table, "raw") != 0) { + pr_info("match only valid in the \'raw\' " + "or \'mangle\' tables, not \'%s\'.\n", par->table); + return -EINVAL; + } + + return 0; +} + +static struct xt_match rpfilter_mt_reg __read_mostly = { + .name = "rpfilter", + .family = NFPROTO_IPV4, + .checkentry = rpfilter_check, + .match = rpfilter_mt, + .matchsize = sizeof(struct xt_rpfilter_info), + .hooks = (1 << NF_INET_PRE_ROUTING), + .me = THIS_MODULE +}; + +static int __init rpfilter_mt_init(void) +{ + return xt_register_match(&rpfilter_mt_reg); +} + +static void __exit rpfilter_mt_exit(void) +{ + xt_unregister_match(&rpfilter_mt_reg); +} + +module_init(rpfilter_mt_init); +module_exit(rpfilter_mt_exit); diff --git a/kernel/net/ipv4/netfilter/iptable_filter.c b/kernel/net/ipv4/netfilter/iptable_filter.c new file mode 100644 index 000000000..a0f3beca5 --- /dev/null +++ b/kernel/net/ipv4/netfilter/iptable_filter.c @@ -0,0 +1,109 @@ +/* + * This is the 1999 rewrite of IP Firewalling, aiming for kernel 2.3.x. + * + * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling + * Copyright (C) 2000-2004 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Netfilter Core Team "); +MODULE_DESCRIPTION("iptables filter table"); + +#define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \ + (1 << NF_INET_FORWARD) | \ + (1 << NF_INET_LOCAL_OUT)) + +static const struct xt_table packet_filter = { + .name = "filter", + .valid_hooks = FILTER_VALID_HOOKS, + .me = THIS_MODULE, + .af = NFPROTO_IPV4, + .priority = NF_IP_PRI_FILTER, +}; + +static unsigned int +iptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + const struct net *net; + + if (ops->hooknum == NF_INET_LOCAL_OUT && + (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr))) + /* root is playing with raw sockets. */ + return NF_ACCEPT; + + net = dev_net(state->in ? state->in : state->out); + return ipt_do_table(skb, ops->hooknum, state, net->ipv4.iptable_filter); +} + +static struct nf_hook_ops *filter_ops __read_mostly; + +/* Default to forward because I got too much mail already. */ +static bool forward = true; +module_param(forward, bool, 0000); + +static int __net_init iptable_filter_net_init(struct net *net) +{ + struct ipt_replace *repl; + + repl = ipt_alloc_initial_table(&packet_filter); + if (repl == NULL) + return -ENOMEM; + /* Entry 1 is the FORWARD hook */ + ((struct ipt_standard *)repl->entries)[1].target.verdict = + forward ? -NF_ACCEPT - 1 : -NF_DROP - 1; + + net->ipv4.iptable_filter = + ipt_register_table(net, &packet_filter, repl); + kfree(repl); + return PTR_ERR_OR_ZERO(net->ipv4.iptable_filter); +} + +static void __net_exit iptable_filter_net_exit(struct net *net) +{ + ipt_unregister_table(net, net->ipv4.iptable_filter); +} + +static struct pernet_operations iptable_filter_net_ops = { + .init = iptable_filter_net_init, + .exit = iptable_filter_net_exit, +}; + +static int __init iptable_filter_init(void) +{ + int ret; + + ret = register_pernet_subsys(&iptable_filter_net_ops); + if (ret < 0) + return ret; + + /* Register hooks */ + filter_ops = xt_hook_link(&packet_filter, iptable_filter_hook); + if (IS_ERR(filter_ops)) { + ret = PTR_ERR(filter_ops); + unregister_pernet_subsys(&iptable_filter_net_ops); + } + + return ret; +} + +static void __exit iptable_filter_fini(void) +{ + xt_hook_unlink(&packet_filter, filter_ops); + unregister_pernet_subsys(&iptable_filter_net_ops); +} + +module_init(iptable_filter_init); +module_exit(iptable_filter_fini); diff --git a/kernel/net/ipv4/netfilter/iptable_mangle.c b/kernel/net/ipv4/netfilter/iptable_mangle.c new file mode 100644 index 000000000..62cbb8c5f --- /dev/null +++ b/kernel/net/ipv4/netfilter/iptable_mangle.c @@ -0,0 +1,147 @@ +/* + * This is the 1999 rewrite of IP Firewalling, aiming for kernel 2.3.x. + * + * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling + * Copyright (C) 2000-2004 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Netfilter Core Team "); +MODULE_DESCRIPTION("iptables mangle table"); + +#define MANGLE_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \ + (1 << NF_INET_LOCAL_IN) | \ + (1 << NF_INET_FORWARD) | \ + (1 << NF_INET_LOCAL_OUT) | \ + (1 << NF_INET_POST_ROUTING)) + +static const struct xt_table packet_mangler = { + .name = "mangle", + .valid_hooks = MANGLE_VALID_HOOKS, + .me = THIS_MODULE, + .af = NFPROTO_IPV4, + .priority = NF_IP_PRI_MANGLE, +}; + +static unsigned int +ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state) +{ + struct net_device *out = state->out; + unsigned int ret; + const struct iphdr *iph; + u_int8_t tos; + __be32 saddr, daddr; + u_int32_t mark; + int err; + + /* root is playing with raw sockets. */ + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) + return NF_ACCEPT; + + /* Save things which could affect route */ + mark = skb->mark; + iph = ip_hdr(skb); + saddr = iph->saddr; + daddr = iph->daddr; + tos = iph->tos; + + ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, state, + dev_net(out)->ipv4.iptable_mangle); + /* Reroute for ANY change. */ + if (ret != NF_DROP && ret != NF_STOLEN) { + iph = ip_hdr(skb); + + if (iph->saddr != saddr || + iph->daddr != daddr || + skb->mark != mark || + iph->tos != tos) { + err = ip_route_me_harder(skb, RTN_UNSPEC); + if (err < 0) + ret = NF_DROP_ERR(err); + } + } + + return ret; +} + +/* The work comes in here from netfilter.c. */ +static unsigned int +iptable_mangle_hook(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + if (ops->hooknum == NF_INET_LOCAL_OUT) + return ipt_mangle_out(skb, state); + if (ops->hooknum == NF_INET_POST_ROUTING) + return ipt_do_table(skb, ops->hooknum, state, + dev_net(state->out)->ipv4.iptable_mangle); + /* PREROUTING/INPUT/FORWARD: */ + return ipt_do_table(skb, ops->hooknum, state, + dev_net(state->in)->ipv4.iptable_mangle); +} + +static struct nf_hook_ops *mangle_ops __read_mostly; + +static int __net_init iptable_mangle_net_init(struct net *net) +{ + struct ipt_replace *repl; + + repl = ipt_alloc_initial_table(&packet_mangler); + if (repl == NULL) + return -ENOMEM; + net->ipv4.iptable_mangle = + ipt_register_table(net, &packet_mangler, repl); + kfree(repl); + return PTR_ERR_OR_ZERO(net->ipv4.iptable_mangle); +} + +static void __net_exit iptable_mangle_net_exit(struct net *net) +{ + ipt_unregister_table(net, net->ipv4.iptable_mangle); +} + +static struct pernet_operations iptable_mangle_net_ops = { + .init = iptable_mangle_net_init, + .exit = iptable_mangle_net_exit, +}; + +static int __init iptable_mangle_init(void) +{ + int ret; + + ret = register_pernet_subsys(&iptable_mangle_net_ops); + if (ret < 0) + return ret; + + /* Register hooks */ + mangle_ops = xt_hook_link(&packet_mangler, iptable_mangle_hook); + if (IS_ERR(mangle_ops)) { + ret = PTR_ERR(mangle_ops); + unregister_pernet_subsys(&iptable_mangle_net_ops); + } + + return ret; +} + +static void __exit iptable_mangle_fini(void) +{ + xt_hook_unlink(&packet_mangler, mangle_ops); + unregister_pernet_subsys(&iptable_mangle_net_ops); +} + +module_init(iptable_mangle_init); +module_exit(iptable_mangle_fini); diff --git a/kernel/net/ipv4/netfilter/iptable_nat.c b/kernel/net/ipv4/netfilter/iptable_nat.c new file mode 100644 index 000000000..0d4d9cdf9 --- /dev/null +++ b/kernel/net/ipv4/netfilter/iptable_nat.c @@ -0,0 +1,154 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team + * (C) 2011 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static const struct xt_table nf_nat_ipv4_table = { + .name = "nat", + .valid_hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_LOCAL_IN), + .me = THIS_MODULE, + .af = NFPROTO_IPV4, +}; + +static unsigned int iptable_nat_do_chain(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state, + struct nf_conn *ct) +{ + struct net *net = nf_ct_net(ct); + + return ipt_do_table(skb, ops->hooknum, state, net->ipv4.nat_table); +} + +static unsigned int iptable_nat_ipv4_fn(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return nf_nat_ipv4_fn(ops, skb, state, iptable_nat_do_chain); +} + +static unsigned int iptable_nat_ipv4_in(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return nf_nat_ipv4_in(ops, skb, state, iptable_nat_do_chain); +} + +static unsigned int iptable_nat_ipv4_out(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return nf_nat_ipv4_out(ops, skb, state, iptable_nat_do_chain); +} + +static unsigned int iptable_nat_ipv4_local_fn(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return nf_nat_ipv4_local_fn(ops, skb, state, iptable_nat_do_chain); +} + +static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = { + /* Before packet filtering, change destination */ + { + .hook = iptable_nat_ipv4_in, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP_PRI_NAT_DST, + }, + /* After packet filtering, change source */ + { + .hook = iptable_nat_ipv4_out, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP_PRI_NAT_SRC, + }, + /* Before packet filtering, change destination */ + { + .hook = iptable_nat_ipv4_local_fn, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP_PRI_NAT_DST, + }, + /* After packet filtering, change source */ + { + .hook = iptable_nat_ipv4_fn, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP_PRI_NAT_SRC, + }, +}; + +static int __net_init iptable_nat_net_init(struct net *net) +{ + struct ipt_replace *repl; + + repl = ipt_alloc_initial_table(&nf_nat_ipv4_table); + if (repl == NULL) + return -ENOMEM; + net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl); + kfree(repl); + return PTR_ERR_OR_ZERO(net->ipv4.nat_table); +} + +static void __net_exit iptable_nat_net_exit(struct net *net) +{ + ipt_unregister_table(net, net->ipv4.nat_table); +} + +static struct pernet_operations iptable_nat_net_ops = { + .init = iptable_nat_net_init, + .exit = iptable_nat_net_exit, +}; + +static int __init iptable_nat_init(void) +{ + int err; + + err = register_pernet_subsys(&iptable_nat_net_ops); + if (err < 0) + goto err1; + + err = nf_register_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops)); + if (err < 0) + goto err2; + return 0; + +err2: + unregister_pernet_subsys(&iptable_nat_net_ops); +err1: + return err; +} + +static void __exit iptable_nat_exit(void) +{ + nf_unregister_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops)); + unregister_pernet_subsys(&iptable_nat_net_ops); +} + +module_init(iptable_nat_init); +module_exit(iptable_nat_exit); + +MODULE_LICENSE("GPL"); diff --git a/kernel/net/ipv4/netfilter/iptable_raw.c b/kernel/net/ipv4/netfilter/iptable_raw.c new file mode 100644 index 000000000..0356e6da4 --- /dev/null +++ b/kernel/net/ipv4/netfilter/iptable_raw.c @@ -0,0 +1,89 @@ +/* + * 'raw' table, which is the very first hooked in at PRE_ROUTING and LOCAL_OUT . + * + * Copyright (C) 2003 Jozsef Kadlecsik + */ +#include +#include +#include +#include + +#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) + +static const struct xt_table packet_raw = { + .name = "raw", + .valid_hooks = RAW_VALID_HOOKS, + .me = THIS_MODULE, + .af = NFPROTO_IPV4, + .priority = NF_IP_PRI_RAW, +}; + +/* The work comes in here from netfilter.c. */ +static unsigned int +iptable_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + const struct net *net; + + if (ops->hooknum == NF_INET_LOCAL_OUT && + (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr))) + /* root is playing with raw sockets. */ + return NF_ACCEPT; + + net = dev_net(state->in ? state->in : state->out); + return ipt_do_table(skb, ops->hooknum, state, net->ipv4.iptable_raw); +} + +static struct nf_hook_ops *rawtable_ops __read_mostly; + +static int __net_init iptable_raw_net_init(struct net *net) +{ + struct ipt_replace *repl; + + repl = ipt_alloc_initial_table(&packet_raw); + if (repl == NULL) + return -ENOMEM; + net->ipv4.iptable_raw = + ipt_register_table(net, &packet_raw, repl); + kfree(repl); + return PTR_ERR_OR_ZERO(net->ipv4.iptable_raw); +} + +static void __net_exit iptable_raw_net_exit(struct net *net) +{ + ipt_unregister_table(net, net->ipv4.iptable_raw); +} + +static struct pernet_operations iptable_raw_net_ops = { + .init = iptable_raw_net_init, + .exit = iptable_raw_net_exit, +}; + +static int __init iptable_raw_init(void) +{ + int ret; + + ret = register_pernet_subsys(&iptable_raw_net_ops); + if (ret < 0) + return ret; + + /* Register hooks */ + rawtable_ops = xt_hook_link(&packet_raw, iptable_raw_hook); + if (IS_ERR(rawtable_ops)) { + ret = PTR_ERR(rawtable_ops); + unregister_pernet_subsys(&iptable_raw_net_ops); + } + + return ret; +} + +static void __exit iptable_raw_fini(void) +{ + xt_hook_unlink(&packet_raw, rawtable_ops); + unregister_pernet_subsys(&iptable_raw_net_ops); +} + +module_init(iptable_raw_init); +module_exit(iptable_raw_fini); +MODULE_LICENSE("GPL"); diff --git a/kernel/net/ipv4/netfilter/iptable_security.c b/kernel/net/ipv4/netfilter/iptable_security.c new file mode 100644 index 000000000..4bce3980c --- /dev/null +++ b/kernel/net/ipv4/netfilter/iptable_security.c @@ -0,0 +1,109 @@ +/* + * "security" table + * + * This is for use by Mandatory Access Control (MAC) security models, + * which need to be able to manage security policy in separate context + * to DAC. + * + * Based on iptable_mangle.c + * + * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling + * Copyright (C) 2000-2004 Netfilter Core Team netfilter.org> + * Copyright (C) 2008 Red Hat, Inc., James Morris redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("James Morris redhat.com>"); +MODULE_DESCRIPTION("iptables security table, for MAC rules"); + +#define SECURITY_VALID_HOOKS (1 << NF_INET_LOCAL_IN) | \ + (1 << NF_INET_FORWARD) | \ + (1 << NF_INET_LOCAL_OUT) + +static const struct xt_table security_table = { + .name = "security", + .valid_hooks = SECURITY_VALID_HOOKS, + .me = THIS_MODULE, + .af = NFPROTO_IPV4, + .priority = NF_IP_PRI_SECURITY, +}; + +static unsigned int +iptable_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + const struct net *net; + + if (ops->hooknum == NF_INET_LOCAL_OUT && + (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr))) + /* Somebody is playing with raw sockets. */ + return NF_ACCEPT; + + net = dev_net(state->in ? state->in : state->out); + return ipt_do_table(skb, ops->hooknum, state, + net->ipv4.iptable_security); +} + +static struct nf_hook_ops *sectbl_ops __read_mostly; + +static int __net_init iptable_security_net_init(struct net *net) +{ + struct ipt_replace *repl; + + repl = ipt_alloc_initial_table(&security_table); + if (repl == NULL) + return -ENOMEM; + net->ipv4.iptable_security = + ipt_register_table(net, &security_table, repl); + kfree(repl); + return PTR_ERR_OR_ZERO(net->ipv4.iptable_security); +} + +static void __net_exit iptable_security_net_exit(struct net *net) +{ + ipt_unregister_table(net, net->ipv4.iptable_security); +} + +static struct pernet_operations iptable_security_net_ops = { + .init = iptable_security_net_init, + .exit = iptable_security_net_exit, +}; + +static int __init iptable_security_init(void) +{ + int ret; + + ret = register_pernet_subsys(&iptable_security_net_ops); + if (ret < 0) + return ret; + + sectbl_ops = xt_hook_link(&security_table, iptable_security_hook); + if (IS_ERR(sectbl_ops)) { + ret = PTR_ERR(sectbl_ops); + goto cleanup_table; + } + + return ret; + +cleanup_table: + unregister_pernet_subsys(&iptable_security_net_ops); + return ret; +} + +static void __exit iptable_security_fini(void) +{ + xt_hook_unlink(&security_table, sectbl_ops); + unregister_pernet_subsys(&iptable_security_net_ops); +} + +module_init(iptable_security_init); +module_exit(iptable_security_fini); diff --git a/kernel/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/kernel/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c new file mode 100644 index 000000000..30ad9554b --- /dev/null +++ b/kernel/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -0,0 +1,542 @@ + +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team + * (C) 2006-2012 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, + struct nf_conntrack_tuple *tuple) +{ + const __be32 *ap; + __be32 _addrs[2]; + ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr), + sizeof(u_int32_t) * 2, _addrs); + if (ap == NULL) + return false; + + tuple->src.u3.ip = ap[0]; + tuple->dst.u3.ip = ap[1]; + + return true; +} + +static bool ipv4_invert_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig) +{ + tuple->src.u3.ip = orig->dst.u3.ip; + tuple->dst.u3.ip = orig->src.u3.ip; + + return true; +} + +static void ipv4_print_tuple(struct seq_file *s, + const struct nf_conntrack_tuple *tuple) +{ + seq_printf(s, "src=%pI4 dst=%pI4 ", + &tuple->src.u3.ip, &tuple->dst.u3.ip); +} + +static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, + unsigned int *dataoff, u_int8_t *protonum) +{ + const struct iphdr *iph; + struct iphdr _iph; + + iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); + if (iph == NULL) + return -NF_ACCEPT; + + /* Conntrack defragments packets, we might still see fragments + * inside ICMP packets though. */ + if (iph->frag_off & htons(IP_OFFSET)) + return -NF_ACCEPT; + + *dataoff = nhoff + (iph->ihl << 2); + *protonum = iph->protocol; + + /* Check bogus IP headers */ + if (*dataoff > skb->len) { + pr_debug("nf_conntrack_ipv4: bogus IPv4 packet: " + "nhoff %u, ihl %u, skblen %u\n", + nhoff, iph->ihl << 2, skb->len); + return -NF_ACCEPT; + } + + return NF_ACCEPT; +} + +static unsigned int ipv4_helper(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + const struct nf_conn_help *help; + const struct nf_conntrack_helper *helper; + + /* This is where we call the helper: as the packet goes out. */ + ct = nf_ct_get(skb, &ctinfo); + if (!ct || ctinfo == IP_CT_RELATED_REPLY) + return NF_ACCEPT; + + help = nfct_help(ct); + if (!help) + return NF_ACCEPT; + + /* rcu_read_lock()ed by nf_hook_slow */ + helper = rcu_dereference(help->helper); + if (!helper) + return NF_ACCEPT; + + return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb), + ct, ctinfo); +} + +static unsigned int ipv4_confirm(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + + ct = nf_ct_get(skb, &ctinfo); + if (!ct || ctinfo == IP_CT_RELATED_REPLY) + goto out; + + /* adjust seqs for loopback traffic only in outgoing direction */ + if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && + !nf_is_loopback_packet(skb)) { + if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) { + NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); + return NF_DROP; + } + } +out: + /* We've seen it coming out the other side: confirm it */ + return nf_conntrack_confirm(skb); +} + +static unsigned int ipv4_conntrack_in(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return nf_conntrack_in(dev_net(state->in), PF_INET, ops->hooknum, skb); +} + +static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + /* root is playing with raw sockets. */ + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) + return NF_ACCEPT; + return nf_conntrack_in(dev_net(state->out), PF_INET, ops->hooknum, skb); +} + +/* Connection tracking may drop packets, but never alters them, so + make it the first hook. */ +static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { + { + .hook = ipv4_conntrack_in, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP_PRI_CONNTRACK, + }, + { + .hook = ipv4_conntrack_local, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP_PRI_CONNTRACK, + }, + { + .hook = ipv4_helper, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP_PRI_CONNTRACK_HELPER, + }, + { + .hook = ipv4_confirm, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP_PRI_CONNTRACK_CONFIRM, + }, + { + .hook = ipv4_helper, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP_PRI_CONNTRACK_HELPER, + }, + { + .hook = ipv4_confirm, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP_PRI_CONNTRACK_CONFIRM, + }, +}; + +#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) +static int log_invalid_proto_min = 0; +static int log_invalid_proto_max = 255; + +static struct ctl_table ip_ct_sysctl_table[] = { + { + .procname = "ip_conntrack_max", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "ip_conntrack_count", + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = proc_dointvec, + }, + { + .procname = "ip_conntrack_buckets", + .maxlen = sizeof(unsigned int), + .mode = 0444, + .proc_handler = proc_dointvec, + }, + { + .procname = "ip_conntrack_checksum", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "ip_conntrack_log_invalid", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &log_invalid_proto_min, + .extra2 = &log_invalid_proto_max, + }, + { } +}; +#endif /* CONFIG_SYSCTL && CONFIG_NF_CONNTRACK_PROC_COMPAT */ + +/* Fast function for those who don't want to parse /proc (and I don't + blame them). */ +/* Reversing the socket's dst/src point of view gives us the reply + mapping. */ +static int +getorigdst(struct sock *sk, int optval, void __user *user, int *len) +{ + const struct inet_sock *inet = inet_sk(sk); + const struct nf_conntrack_tuple_hash *h; + struct nf_conntrack_tuple tuple; + + memset(&tuple, 0, sizeof(tuple)); + tuple.src.u3.ip = inet->inet_rcv_saddr; + tuple.src.u.tcp.port = inet->inet_sport; + tuple.dst.u3.ip = inet->inet_daddr; + tuple.dst.u.tcp.port = inet->inet_dport; + tuple.src.l3num = PF_INET; + tuple.dst.protonum = sk->sk_protocol; + + /* We only do TCP and SCTP at the moment: is there a better way? */ + if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP) { + pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n"); + return -ENOPROTOOPT; + } + + if ((unsigned int) *len < sizeof(struct sockaddr_in)) { + pr_debug("SO_ORIGINAL_DST: len %d not %Zu\n", + *len, sizeof(struct sockaddr_in)); + return -EINVAL; + } + + h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple); + if (h) { + struct sockaddr_in sin; + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); + + sin.sin_family = AF_INET; + sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.u.tcp.port; + sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.u3.ip; + memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); + + pr_debug("SO_ORIGINAL_DST: %pI4 %u\n", + &sin.sin_addr.s_addr, ntohs(sin.sin_port)); + nf_ct_put(ct); + if (copy_to_user(user, &sin, sizeof(sin)) != 0) + return -EFAULT; + else + return 0; + } + pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n", + &tuple.src.u3.ip, ntohs(tuple.src.u.tcp.port), + &tuple.dst.u3.ip, ntohs(tuple.dst.u.tcp.port)); + return -ENOENT; +} + +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) + +#include +#include + +static int ipv4_tuple_to_nlattr(struct sk_buff *skb, + const struct nf_conntrack_tuple *tuple) +{ + if (nla_put_in_addr(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) || + nla_put_in_addr(skb, CTA_IP_V4_DST, tuple->dst.u3.ip)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static const struct nla_policy ipv4_nla_policy[CTA_IP_MAX+1] = { + [CTA_IP_V4_SRC] = { .type = NLA_U32 }, + [CTA_IP_V4_DST] = { .type = NLA_U32 }, +}; + +static int ipv4_nlattr_to_tuple(struct nlattr *tb[], + struct nf_conntrack_tuple *t) +{ + if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST]) + return -EINVAL; + + t->src.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_SRC]); + t->dst.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_DST]); + + return 0; +} + +static int ipv4_nlattr_tuple_size(void) +{ + return nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + 1); +} +#endif + +static struct nf_sockopt_ops so_getorigdst = { + .pf = PF_INET, + .get_optmin = SO_ORIGINAL_DST, + .get_optmax = SO_ORIGINAL_DST+1, + .get = getorigdst, + .owner = THIS_MODULE, +}; + +static int ipv4_init_net(struct net *net) +{ +#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) + struct nf_ip_net *in = &net->ct.nf_ct_proto; + in->ctl_table = kmemdup(ip_ct_sysctl_table, + sizeof(ip_ct_sysctl_table), + GFP_KERNEL); + if (!in->ctl_table) + return -ENOMEM; + + in->ctl_table[0].data = &nf_conntrack_max; + in->ctl_table[1].data = &net->ct.count; + in->ctl_table[2].data = &net->ct.htable_size; + in->ctl_table[3].data = &net->ct.sysctl_checksum; + in->ctl_table[4].data = &net->ct.sysctl_log_invalid; +#endif + return 0; +} + +struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { + .l3proto = PF_INET, + .name = "ipv4", + .pkt_to_tuple = ipv4_pkt_to_tuple, + .invert_tuple = ipv4_invert_tuple, + .print_tuple = ipv4_print_tuple, + .get_l4proto = ipv4_get_l4proto, +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) + .tuple_to_nlattr = ipv4_tuple_to_nlattr, + .nlattr_tuple_size = ipv4_nlattr_tuple_size, + .nlattr_to_tuple = ipv4_nlattr_to_tuple, + .nla_policy = ipv4_nla_policy, +#endif +#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) + .ctl_table_path = "net/ipv4/netfilter", +#endif + .init_net = ipv4_init_net, + .me = THIS_MODULE, +}; + +module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, + &nf_conntrack_htable_size, 0600); + +MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET)); +MODULE_ALIAS("ip_conntrack"); +MODULE_LICENSE("GPL"); + +static int ipv4_net_init(struct net *net) +{ + int ret = 0; + + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_tcp4); + if (ret < 0) { + pr_err("nf_conntrack_tcp4: pernet registration failed\n"); + goto out_tcp; + } + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udp4); + if (ret < 0) { + pr_err("nf_conntrack_udp4: pernet registration failed\n"); + goto out_udp; + } + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_icmp); + if (ret < 0) { + pr_err("nf_conntrack_icmp4: pernet registration failed\n"); + goto out_icmp; + } + ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv4); + if (ret < 0) { + pr_err("nf_conntrack_ipv4: pernet registration failed\n"); + goto out_ipv4; + } + return 0; +out_ipv4: + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp); +out_icmp: + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4); +out_udp: + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4); +out_tcp: + return ret; +} + +static void ipv4_net_exit(struct net *net) +{ + nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv4); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4); +} + +static struct pernet_operations ipv4_net_ops = { + .init = ipv4_net_init, + .exit = ipv4_net_exit, +}; + +static int __init nf_conntrack_l3proto_ipv4_init(void) +{ + int ret = 0; + + need_conntrack(); + nf_defrag_ipv4_enable(); + + ret = nf_register_sockopt(&so_getorigdst); + if (ret < 0) { + printk(KERN_ERR "Unable to register netfilter socket option\n"); + return ret; + } + + ret = register_pernet_subsys(&ipv4_net_ops); + if (ret < 0) { + pr_err("nf_conntrack_ipv4: can't register pernet ops\n"); + goto cleanup_sockopt; + } + + ret = nf_register_hooks(ipv4_conntrack_ops, + ARRAY_SIZE(ipv4_conntrack_ops)); + if (ret < 0) { + pr_err("nf_conntrack_ipv4: can't register hooks.\n"); + goto cleanup_pernet; + } + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp4); + if (ret < 0) { + pr_err("nf_conntrack_ipv4: can't register tcp4 proto.\n"); + goto cleanup_hooks; + } + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp4); + if (ret < 0) { + pr_err("nf_conntrack_ipv4: can't register udp4 proto.\n"); + goto cleanup_tcp4; + } + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmp); + if (ret < 0) { + pr_err("nf_conntrack_ipv4: can't register icmpv4 proto.\n"); + goto cleanup_udp4; + } + + ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv4); + if (ret < 0) { + pr_err("nf_conntrack_ipv4: can't register ipv4 proto.\n"); + goto cleanup_icmpv4; + } + +#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) + ret = nf_conntrack_ipv4_compat_init(); + if (ret < 0) + goto cleanup_proto; +#endif + return ret; +#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) + cleanup_proto: + nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4); +#endif + cleanup_icmpv4: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp); + cleanup_udp4: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4); + cleanup_tcp4: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4); + cleanup_hooks: + nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); + cleanup_pernet: + unregister_pernet_subsys(&ipv4_net_ops); + cleanup_sockopt: + nf_unregister_sockopt(&so_getorigdst); + return ret; +} + +static void __exit nf_conntrack_l3proto_ipv4_fini(void) +{ + synchronize_net(); +#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) + nf_conntrack_ipv4_compat_fini(); +#endif + nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4); + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp); + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4); + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4); + nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); + unregister_pernet_subsys(&ipv4_net_ops); + nf_unregister_sockopt(&so_getorigdst); +} + +module_init(nf_conntrack_l3proto_ipv4_init); +module_exit(nf_conntrack_l3proto_ipv4_fini); diff --git a/kernel/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/kernel/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c new file mode 100644 index 000000000..f0dfe92a0 --- /dev/null +++ b/kernel/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -0,0 +1,469 @@ +/* ip_conntrack proc compat - based on ip_conntrack_standalone.c + * + * (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team + * (C) 2006-2010 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +struct ct_iter_state { + struct seq_net_private p; + unsigned int bucket; +}; + +static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) +{ + struct net *net = seq_file_net(seq); + struct ct_iter_state *st = seq->private; + struct hlist_nulls_node *n; + + for (st->bucket = 0; + st->bucket < net->ct.htable_size; + st->bucket++) { + n = rcu_dereference( + hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); + if (!is_a_nulls(n)) + return n; + } + return NULL; +} + +static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, + struct hlist_nulls_node *head) +{ + struct net *net = seq_file_net(seq); + struct ct_iter_state *st = seq->private; + + head = rcu_dereference(hlist_nulls_next_rcu(head)); + while (is_a_nulls(head)) { + if (likely(get_nulls_value(head) == st->bucket)) { + if (++st->bucket >= net->ct.htable_size) + return NULL; + } + head = rcu_dereference( + hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); + } + return head; +} + +static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos) +{ + struct hlist_nulls_node *head = ct_get_first(seq); + + if (head) + while (pos && (head = ct_get_next(seq, head))) + pos--; + return pos ? NULL : head; +} + +static void *ct_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(RCU) +{ + rcu_read_lock(); + return ct_get_idx(seq, *pos); +} + +static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + (*pos)++; + return ct_get_next(s, v); +} + +static void ct_seq_stop(struct seq_file *s, void *v) + __releases(RCU) +{ + rcu_read_unlock(); +} + +#ifdef CONFIG_NF_CONNTRACK_SECMARK +static void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) +{ + int ret; + u32 len; + char *secctx; + + ret = security_secid_to_secctx(ct->secmark, &secctx, &len); + if (ret) + return; + + seq_printf(s, "secctx=%s ", secctx); + + security_release_secctx(secctx, len); +} +#else +static inline void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) +{ +} +#endif + +static int ct_seq_show(struct seq_file *s, void *v) +{ + struct nf_conntrack_tuple_hash *hash = v; + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash); + const struct nf_conntrack_l3proto *l3proto; + const struct nf_conntrack_l4proto *l4proto; + int ret = 0; + + NF_CT_ASSERT(ct); + if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) + return 0; + + + /* we only want to print DIR_ORIGINAL */ + if (NF_CT_DIRECTION(hash)) + goto release; + if (nf_ct_l3num(ct) != AF_INET) + goto release; + + l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); + NF_CT_ASSERT(l3proto); + l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); + NF_CT_ASSERT(l4proto); + + ret = -ENOSPC; + seq_printf(s, "%-8s %u %ld ", + l4proto->name, nf_ct_protonum(ct), + timer_pending(&ct->timeout) + ? (long)(ct->timeout.expires - jiffies)/HZ : 0); + + if (l4proto->print_conntrack) + l4proto->print_conntrack(s, ct); + + if (seq_has_overflowed(s)) + goto release; + + print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + l3proto, l4proto); + + if (seq_has_overflowed(s)) + goto release; + + if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL)) + goto release; + + if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) + seq_printf(s, "[UNREPLIED] "); + + print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, + l3proto, l4proto); + + if (seq_has_overflowed(s)) + goto release; + + if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) + goto release; + + if (test_bit(IPS_ASSURED_BIT, &ct->status)) + seq_printf(s, "[ASSURED] "); + +#ifdef CONFIG_NF_CONNTRACK_MARK + seq_printf(s, "mark=%u ", ct->mark); +#endif + + ct_show_secctx(s, ct); + + seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)); + + if (seq_has_overflowed(s)) + goto release; + + ret = 0; +release: + nf_ct_put(ct); + return ret; +} + +static const struct seq_operations ct_seq_ops = { + .start = ct_seq_start, + .next = ct_seq_next, + .stop = ct_seq_stop, + .show = ct_seq_show +}; + +static int ct_open(struct inode *inode, struct file *file) +{ + return seq_open_net(inode, file, &ct_seq_ops, + sizeof(struct ct_iter_state)); +} + +static const struct file_operations ct_file_ops = { + .owner = THIS_MODULE, + .open = ct_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_net, +}; + +/* expects */ +struct ct_expect_iter_state { + struct seq_net_private p; + unsigned int bucket; +}; + +static struct hlist_node *ct_expect_get_first(struct seq_file *seq) +{ + struct net *net = seq_file_net(seq); + struct ct_expect_iter_state *st = seq->private; + struct hlist_node *n; + + for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { + n = rcu_dereference( + hlist_first_rcu(&net->ct.expect_hash[st->bucket])); + if (n) + return n; + } + return NULL; +} + +static struct hlist_node *ct_expect_get_next(struct seq_file *seq, + struct hlist_node *head) +{ + struct net *net = seq_file_net(seq); + struct ct_expect_iter_state *st = seq->private; + + head = rcu_dereference(hlist_next_rcu(head)); + while (head == NULL) { + if (++st->bucket >= nf_ct_expect_hsize) + return NULL; + head = rcu_dereference( + hlist_first_rcu(&net->ct.expect_hash[st->bucket])); + } + return head; +} + +static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos) +{ + struct hlist_node *head = ct_expect_get_first(seq); + + if (head) + while (pos && (head = ct_expect_get_next(seq, head))) + pos--; + return pos ? NULL : head; +} + +static void *exp_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(RCU) +{ + rcu_read_lock(); + return ct_expect_get_idx(seq, *pos); +} + +static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + (*pos)++; + return ct_expect_get_next(seq, v); +} + +static void exp_seq_stop(struct seq_file *seq, void *v) + __releases(RCU) +{ + rcu_read_unlock(); +} + +static int exp_seq_show(struct seq_file *s, void *v) +{ + struct nf_conntrack_expect *exp; + const struct hlist_node *n = v; + + exp = hlist_entry(n, struct nf_conntrack_expect, hnode); + + if (exp->tuple.src.l3num != AF_INET) + return 0; + + if (exp->timeout.function) + seq_printf(s, "%ld ", timer_pending(&exp->timeout) + ? (long)(exp->timeout.expires - jiffies)/HZ : 0); + else + seq_printf(s, "- "); + + seq_printf(s, "proto=%u ", exp->tuple.dst.protonum); + + print_tuple(s, &exp->tuple, + __nf_ct_l3proto_find(exp->tuple.src.l3num), + __nf_ct_l4proto_find(exp->tuple.src.l3num, + exp->tuple.dst.protonum)); + seq_putc(s, '\n'); + + return 0; +} + +static const struct seq_operations exp_seq_ops = { + .start = exp_seq_start, + .next = exp_seq_next, + .stop = exp_seq_stop, + .show = exp_seq_show +}; + +static int exp_open(struct inode *inode, struct file *file) +{ + return seq_open_net(inode, file, &exp_seq_ops, + sizeof(struct ct_expect_iter_state)); +} + +static const struct file_operations ip_exp_file_ops = { + .owner = THIS_MODULE, + .open = exp_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_net, +}; + +static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) +{ + struct net *net = seq_file_net(seq); + int cpu; + + if (*pos == 0) + return SEQ_START_TOKEN; + + for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { + if (!cpu_possible(cpu)) + continue; + *pos = cpu+1; + return per_cpu_ptr(net->ct.stat, cpu); + } + + return NULL; +} + +static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct net *net = seq_file_net(seq); + int cpu; + + for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { + if (!cpu_possible(cpu)) + continue; + *pos = cpu+1; + return per_cpu_ptr(net->ct.stat, cpu); + } + + return NULL; +} + +static void ct_cpu_seq_stop(struct seq_file *seq, void *v) +{ +} + +static int ct_cpu_seq_show(struct seq_file *seq, void *v) +{ + struct net *net = seq_file_net(seq); + unsigned int nr_conntracks = atomic_read(&net->ct.count); + const struct ip_conntrack_stat *st = v; + + if (v == SEQ_START_TOKEN) { + seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n"); + return 0; + } + + seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x " + "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", + nr_conntracks, + st->searched, + st->found, + st->new, + st->invalid, + st->ignore, + st->delete, + st->delete_list, + st->insert, + st->insert_failed, + st->drop, + st->early_drop, + st->error, + + st->expect_new, + st->expect_create, + st->expect_delete, + st->search_restart + ); + return 0; +} + +static const struct seq_operations ct_cpu_seq_ops = { + .start = ct_cpu_seq_start, + .next = ct_cpu_seq_next, + .stop = ct_cpu_seq_stop, + .show = ct_cpu_seq_show, +}; + +static int ct_cpu_seq_open(struct inode *inode, struct file *file) +{ + return seq_open_net(inode, file, &ct_cpu_seq_ops, + sizeof(struct seq_net_private)); +} + +static const struct file_operations ct_cpu_seq_fops = { + .owner = THIS_MODULE, + .open = ct_cpu_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_net, +}; + +static int __net_init ip_conntrack_net_init(struct net *net) +{ + struct proc_dir_entry *proc, *proc_exp, *proc_stat; + + proc = proc_create("ip_conntrack", 0440, net->proc_net, &ct_file_ops); + if (!proc) + goto err1; + + proc_exp = proc_create("ip_conntrack_expect", 0440, net->proc_net, + &ip_exp_file_ops); + if (!proc_exp) + goto err2; + + proc_stat = proc_create("ip_conntrack", S_IRUGO, + net->proc_net_stat, &ct_cpu_seq_fops); + if (!proc_stat) + goto err3; + return 0; + +err3: + remove_proc_entry("ip_conntrack_expect", net->proc_net); +err2: + remove_proc_entry("ip_conntrack", net->proc_net); +err1: + return -ENOMEM; +} + +static void __net_exit ip_conntrack_net_exit(struct net *net) +{ + remove_proc_entry("ip_conntrack", net->proc_net_stat); + remove_proc_entry("ip_conntrack_expect", net->proc_net); + remove_proc_entry("ip_conntrack", net->proc_net); +} + +static struct pernet_operations ip_conntrack_net_ops = { + .init = ip_conntrack_net_init, + .exit = ip_conntrack_net_exit, +}; + +int __init nf_conntrack_ipv4_compat_init(void) +{ + return register_pernet_subsys(&ip_conntrack_net_ops); +} + +void __exit nf_conntrack_ipv4_compat_fini(void) +{ + unregister_pernet_subsys(&ip_conntrack_net_ops); +} diff --git a/kernel/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/kernel/net/ipv4/netfilter/nf_conntrack_proto_icmp.c new file mode 100644 index 000000000..80d5554b9 --- /dev/null +++ b/kernel/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -0,0 +1,428 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team + * (C) 2006-2010 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ; + +static inline struct nf_icmp_net *icmp_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.icmp; +} + +static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, + struct nf_conntrack_tuple *tuple) +{ + const struct icmphdr *hp; + struct icmphdr _hdr; + + hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); + if (hp == NULL) + return false; + + tuple->dst.u.icmp.type = hp->type; + tuple->src.u.icmp.id = hp->un.echo.id; + tuple->dst.u.icmp.code = hp->code; + + return true; +} + +/* Add 1; spaces filled with 0. */ +static const u_int8_t invmap[] = { + [ICMP_ECHO] = ICMP_ECHOREPLY + 1, + [ICMP_ECHOREPLY] = ICMP_ECHO + 1, + [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1, + [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1, + [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1, + [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1, + [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1, + [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1 +}; + +static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig) +{ + if (orig->dst.u.icmp.type >= sizeof(invmap) || + !invmap[orig->dst.u.icmp.type]) + return false; + + tuple->src.u.icmp.id = orig->src.u.icmp.id; + tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1; + tuple->dst.u.icmp.code = orig->dst.u.icmp.code; + return true; +} + +/* Print out the per-protocol part of the tuple. */ +static void icmp_print_tuple(struct seq_file *s, + const struct nf_conntrack_tuple *tuple) +{ + seq_printf(s, "type=%u code=%u id=%u ", + tuple->dst.u.icmp.type, + tuple->dst.u.icmp.code, + ntohs(tuple->src.u.icmp.id)); +} + +static unsigned int *icmp_get_timeouts(struct net *net) +{ + return &icmp_pernet(net)->timeout; +} + +/* Returns verdict for packet, or -1 for invalid. */ +static int icmp_packet(struct nf_conn *ct, + const struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + u_int8_t pf, + unsigned int hooknum, + unsigned int *timeout) +{ + /* Do not immediately delete the connection after the first + successful reply to avoid excessive conntrackd traffic + and also to handle correctly ICMP echo reply duplicates. */ + nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); + + return NF_ACCEPT; +} + +/* Called when a new connection for this protocol found. */ +static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb, + unsigned int dataoff, unsigned int *timeouts) +{ + static const u_int8_t valid_new[] = { + [ICMP_ECHO] = 1, + [ICMP_TIMESTAMP] = 1, + [ICMP_INFO_REQUEST] = 1, + [ICMP_ADDRESS] = 1 + }; + + if (ct->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) || + !valid_new[ct->tuplehash[0].tuple.dst.u.icmp.type]) { + /* Can't create a new ICMP `conn' with this. */ + pr_debug("icmp: can't create new conn with type %u\n", + ct->tuplehash[0].tuple.dst.u.icmp.type); + nf_ct_dump_tuple_ip(&ct->tuplehash[0].tuple); + return false; + } + return true; +} + +/* Returns conntrack if it dealt with ICMP, and filled in skb fields */ +static int +icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, + enum ip_conntrack_info *ctinfo, + unsigned int hooknum) +{ + struct nf_conntrack_tuple innertuple, origtuple; + const struct nf_conntrack_l4proto *innerproto; + const struct nf_conntrack_tuple_hash *h; + u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; + + NF_CT_ASSERT(skb->nfct == NULL); + + /* Are they talking about one of our connections? */ + if (!nf_ct_get_tuplepr(skb, + skb_network_offset(skb) + ip_hdrlen(skb) + + sizeof(struct icmphdr), + PF_INET, &origtuple)) { + pr_debug("icmp_error_message: failed to get tuple\n"); + return -NF_ACCEPT; + } + + /* rcu_read_lock()ed by nf_hook_slow */ + innerproto = __nf_ct_l4proto_find(PF_INET, origtuple.dst.protonum); + + /* Ordinarily, we'd expect the inverted tupleproto, but it's + been preserved inside the ICMP. */ + if (!nf_ct_invert_tuple(&innertuple, &origtuple, + &nf_conntrack_l3proto_ipv4, innerproto)) { + pr_debug("icmp_error_message: no match\n"); + return -NF_ACCEPT; + } + + *ctinfo = IP_CT_RELATED; + + h = nf_conntrack_find_get(net, zone, &innertuple); + if (!h) { + pr_debug("icmp_error_message: no match\n"); + return -NF_ACCEPT; + } + + if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) + *ctinfo += IP_CT_IS_REPLY; + + /* Update skb to refer to this connection */ + skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general; + skb->nfctinfo = *ctinfo; + return NF_ACCEPT; +} + +/* Small and modified version of icmp_rcv */ +static int +icmp_error(struct net *net, struct nf_conn *tmpl, + struct sk_buff *skb, unsigned int dataoff, + enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) +{ + const struct icmphdr *icmph; + struct icmphdr _ih; + + /* Not enough header? */ + icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih); + if (icmph == NULL) { + if (LOG_INVALID(net, IPPROTO_ICMP)) + nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, + NULL, "nf_ct_icmp: short packet "); + return -NF_ACCEPT; + } + + /* See ip_conntrack_proto_tcp.c */ + if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && + nf_ip_checksum(skb, hooknum, dataoff, 0)) { + if (LOG_INVALID(net, IPPROTO_ICMP)) + nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL, + "nf_ct_icmp: bad HW ICMP checksum "); + return -NF_ACCEPT; + } + + /* + * 18 is the highest 'known' ICMP type. Anything else is a mystery + * + * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently + * discarded. + */ + if (icmph->type > NR_ICMP_TYPES) { + if (LOG_INVALID(net, IPPROTO_ICMP)) + nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL, + "nf_ct_icmp: invalid ICMP type "); + return -NF_ACCEPT; + } + + /* Need to track icmp error message? */ + if (icmph->type != ICMP_DEST_UNREACH && + icmph->type != ICMP_SOURCE_QUENCH && + icmph->type != ICMP_TIME_EXCEEDED && + icmph->type != ICMP_PARAMETERPROB && + icmph->type != ICMP_REDIRECT) + return NF_ACCEPT; + + return icmp_error_message(net, tmpl, skb, ctinfo, hooknum); +} + +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) + +#include +#include + +static int icmp_tuple_to_nlattr(struct sk_buff *skb, + const struct nf_conntrack_tuple *t) +{ + if (nla_put_be16(skb, CTA_PROTO_ICMP_ID, t->src.u.icmp.id) || + nla_put_u8(skb, CTA_PROTO_ICMP_TYPE, t->dst.u.icmp.type) || + nla_put_u8(skb, CTA_PROTO_ICMP_CODE, t->dst.u.icmp.code)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static const struct nla_policy icmp_nla_policy[CTA_PROTO_MAX+1] = { + [CTA_PROTO_ICMP_TYPE] = { .type = NLA_U8 }, + [CTA_PROTO_ICMP_CODE] = { .type = NLA_U8 }, + [CTA_PROTO_ICMP_ID] = { .type = NLA_U16 }, +}; + +static int icmp_nlattr_to_tuple(struct nlattr *tb[], + struct nf_conntrack_tuple *tuple) +{ + if (!tb[CTA_PROTO_ICMP_TYPE] || + !tb[CTA_PROTO_ICMP_CODE] || + !tb[CTA_PROTO_ICMP_ID]) + return -EINVAL; + + tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMP_TYPE]); + tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMP_CODE]); + tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMP_ID]); + + if (tuple->dst.u.icmp.type >= sizeof(invmap) || + !invmap[tuple->dst.u.icmp.type]) + return -EINVAL; + + return 0; +} + +static int icmp_nlattr_tuple_size(void) +{ + return nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1); +} +#endif + +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + +#include +#include + +static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[], + struct net *net, void *data) +{ + unsigned int *timeout = data; + struct nf_icmp_net *in = icmp_pernet(net); + + if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) { + *timeout = + ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMP_TIMEOUT])) * HZ; + } else { + /* Set default ICMP timeout. */ + *timeout = in->timeout; + } + return 0; +} + +static int +icmp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) +{ + const unsigned int *timeout = data; + + if (nla_put_be32(skb, CTA_TIMEOUT_ICMP_TIMEOUT, htonl(*timeout / HZ))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -ENOSPC; +} + +static const struct nla_policy +icmp_timeout_nla_policy[CTA_TIMEOUT_ICMP_MAX+1] = { + [CTA_TIMEOUT_ICMP_TIMEOUT] = { .type = NLA_U32 }, +}; +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ + +#ifdef CONFIG_SYSCTL +static struct ctl_table icmp_sysctl_table[] = { + { + .procname = "nf_conntrack_icmp_timeout", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { } +}; +#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT +static struct ctl_table icmp_compat_sysctl_table[] = { + { + .procname = "ip_conntrack_icmp_timeout", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { } +}; +#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ +#endif /* CONFIG_SYSCTL */ + +static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn, + struct nf_icmp_net *in) +{ +#ifdef CONFIG_SYSCTL + pn->ctl_table = kmemdup(icmp_sysctl_table, + sizeof(icmp_sysctl_table), + GFP_KERNEL); + if (!pn->ctl_table) + return -ENOMEM; + + pn->ctl_table[0].data = &in->timeout; +#endif + return 0; +} + +static int icmp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn, + struct nf_icmp_net *in) +{ +#ifdef CONFIG_SYSCTL +#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT + pn->ctl_compat_table = kmemdup(icmp_compat_sysctl_table, + sizeof(icmp_compat_sysctl_table), + GFP_KERNEL); + if (!pn->ctl_compat_table) + return -ENOMEM; + + pn->ctl_compat_table[0].data = &in->timeout; +#endif +#endif + return 0; +} + +static int icmp_init_net(struct net *net, u_int16_t proto) +{ + int ret; + struct nf_icmp_net *in = icmp_pernet(net); + struct nf_proto_net *pn = &in->pn; + + in->timeout = nf_ct_icmp_timeout; + + ret = icmp_kmemdup_compat_sysctl_table(pn, in); + if (ret < 0) + return ret; + + ret = icmp_kmemdup_sysctl_table(pn, in); + if (ret < 0) + nf_ct_kfree_compat_sysctl_table(pn); + + return ret; +} + +static struct nf_proto_net *icmp_get_net_proto(struct net *net) +{ + return &net->ct.nf_ct_proto.icmp.pn; +} + +struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = +{ + .l3proto = PF_INET, + .l4proto = IPPROTO_ICMP, + .name = "icmp", + .pkt_to_tuple = icmp_pkt_to_tuple, + .invert_tuple = icmp_invert_tuple, + .print_tuple = icmp_print_tuple, + .packet = icmp_packet, + .get_timeouts = icmp_get_timeouts, + .new = icmp_new, + .error = icmp_error, + .destroy = NULL, + .me = NULL, +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) + .tuple_to_nlattr = icmp_tuple_to_nlattr, + .nlattr_tuple_size = icmp_nlattr_tuple_size, + .nlattr_to_tuple = icmp_nlattr_to_tuple, + .nla_policy = icmp_nla_policy, +#endif +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + .ctnl_timeout = { + .nlattr_to_obj = icmp_timeout_nlattr_to_obj, + .obj_to_nlattr = icmp_timeout_obj_to_nlattr, + .nlattr_max = CTA_TIMEOUT_ICMP_MAX, + .obj_size = sizeof(unsigned int), + .nla_policy = icmp_timeout_nla_policy, + }, +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ + .init_net = icmp_init_net, + .get_net_proto = icmp_get_net_proto, +}; diff --git a/kernel/net/ipv4/netfilter/nf_defrag_ipv4.c b/kernel/net/ipv4/netfilter/nf_defrag_ipv4.c new file mode 100644 index 000000000..c88b7d434 --- /dev/null +++ b/kernel/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -0,0 +1,129 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include +#endif +#include + +static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) +{ + int err; + + skb_orphan(skb); + + local_bh_disable(); + err = ip_defrag(skb, user); + local_bh_enable(); + + if (!err) { + ip_send_check(ip_hdr(skb)); + skb->ignore_df = 1; + } + + return err; +} + +static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum, + struct sk_buff *skb) +{ + u16 zone = NF_CT_DEFAULT_ZONE; + +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + if (skb->nfct) + zone = nf_ct_zone((struct nf_conn *)skb->nfct); +#endif + +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + if (skb->nf_bridge && + skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING) + return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone; +#endif + if (hooknum == NF_INET_PRE_ROUTING) + return IP_DEFRAG_CONNTRACK_IN + zone; + else + return IP_DEFRAG_CONNTRACK_OUT + zone; +} + +static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct sock *sk = skb->sk; + struct inet_sock *inet = inet_sk(skb->sk); + + if (sk && (sk->sk_family == PF_INET) && + inet->nodefrag) + return NF_ACCEPT; + +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#if !IS_ENABLED(CONFIG_NF_NAT) + /* Previously seen (loopback)? Ignore. Do this before + fragment check. */ + if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct)) + return NF_ACCEPT; +#endif +#endif + /* Gather fragments. */ + if (ip_is_fragment(ip_hdr(skb))) { + enum ip_defrag_users user = + nf_ct_defrag_user(ops->hooknum, skb); + + if (nf_ct_ipv4_gather_frags(skb, user)) + return NF_STOLEN; + } + return NF_ACCEPT; +} + +static struct nf_hook_ops ipv4_defrag_ops[] = { + { + .hook = ipv4_conntrack_defrag, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP_PRI_CONNTRACK_DEFRAG, + }, + { + .hook = ipv4_conntrack_defrag, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP_PRI_CONNTRACK_DEFRAG, + }, +}; + +static int __init nf_defrag_init(void) +{ + return nf_register_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops)); +} + +static void __exit nf_defrag_fini(void) +{ + nf_unregister_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops)); +} + +void nf_defrag_ipv4_enable(void) +{ +} +EXPORT_SYMBOL_GPL(nf_defrag_ipv4_enable); + +module_init(nf_defrag_init); +module_exit(nf_defrag_fini); + +MODULE_LICENSE("GPL"); diff --git a/kernel/net/ipv4/netfilter/nf_log_arp.c b/kernel/net/ipv4/netfilter/nf_log_arp.c new file mode 100644 index 000000000..e7ad950cf --- /dev/null +++ b/kernel/net/ipv4/netfilter/nf_log_arp.c @@ -0,0 +1,161 @@ +/* + * (C) 2014 by Pablo Neira Ayuso + * + * Based on code from ebt_log from: + * + * Bart De Schuymer + * Harald Welte + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static struct nf_loginfo default_loginfo = { + .type = NF_LOG_TYPE_LOG, + .u = { + .log = { + .level = LOGLEVEL_NOTICE, + .logflags = NF_LOG_MASK, + }, + }, +}; + +struct arppayload { + unsigned char mac_src[ETH_ALEN]; + unsigned char ip_src[4]; + unsigned char mac_dst[ETH_ALEN]; + unsigned char ip_dst[4]; +}; + +static void dump_arp_packet(struct nf_log_buf *m, + const struct nf_loginfo *info, + const struct sk_buff *skb, unsigned int nhoff) +{ + const struct arphdr *ah; + struct arphdr _arph; + const struct arppayload *ap; + struct arppayload _arpp; + + ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); + if (ah == NULL) { + nf_log_buf_add(m, "TRUNCATED"); + return; + } + nf_log_buf_add(m, "ARP HTYPE=%d PTYPE=0x%04x OPCODE=%d", + ntohs(ah->ar_hrd), ntohs(ah->ar_pro), ntohs(ah->ar_op)); + + /* If it's for Ethernet and the lengths are OK, then log the ARP + * payload. + */ + if (ah->ar_hrd != htons(1) || + ah->ar_hln != ETH_ALEN || + ah->ar_pln != sizeof(__be32)) + return; + + ap = skb_header_pointer(skb, sizeof(_arph), sizeof(_arpp), &_arpp); + if (ap == NULL) { + nf_log_buf_add(m, " INCOMPLETE [%Zu bytes]", + skb->len - sizeof(_arph)); + return; + } + nf_log_buf_add(m, " MACSRC=%pM IPSRC=%pI4 MACDST=%pM IPDST=%pI4", + ap->mac_src, ap->ip_src, ap->mac_dst, ap->ip_dst); +} + +static void nf_log_arp_packet(struct net *net, u_int8_t pf, + unsigned int hooknum, const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, + const char *prefix) +{ + struct nf_log_buf *m; + + /* FIXME: Disabled from containers until syslog ns is supported */ + if (!net_eq(net, &init_net)) + return; + + m = nf_log_buf_open(); + + if (!loginfo) + loginfo = &default_loginfo; + + nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo, + prefix); + dump_arp_packet(m, loginfo, skb, 0); + + nf_log_buf_close(m); +} + +static struct nf_logger nf_arp_logger __read_mostly = { + .name = "nf_log_arp", + .type = NF_LOG_TYPE_LOG, + .logfn = nf_log_arp_packet, + .me = THIS_MODULE, +}; + +static int __net_init nf_log_arp_net_init(struct net *net) +{ + nf_log_set(net, NFPROTO_ARP, &nf_arp_logger); + return 0; +} + +static void __net_exit nf_log_arp_net_exit(struct net *net) +{ + nf_log_unset(net, &nf_arp_logger); +} + +static struct pernet_operations nf_log_arp_net_ops = { + .init = nf_log_arp_net_init, + .exit = nf_log_arp_net_exit, +}; + +static int __init nf_log_arp_init(void) +{ + int ret; + + ret = register_pernet_subsys(&nf_log_arp_net_ops); + if (ret < 0) + return ret; + + ret = nf_log_register(NFPROTO_ARP, &nf_arp_logger); + if (ret < 0) { + pr_err("failed to register logger\n"); + goto err1; + } + + return 0; + +err1: + unregister_pernet_subsys(&nf_log_arp_net_ops); + return ret; +} + +static void __exit nf_log_arp_exit(void) +{ + unregister_pernet_subsys(&nf_log_arp_net_ops); + nf_log_unregister(&nf_arp_logger); +} + +module_init(nf_log_arp_init); +module_exit(nf_log_arp_exit); + +MODULE_AUTHOR("Pablo Neira Ayuso "); +MODULE_DESCRIPTION("Netfilter ARP packet logging"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NF_LOGGER(3, 0); diff --git a/kernel/net/ipv4/netfilter/nf_log_ipv4.c b/kernel/net/ipv4/netfilter/nf_log_ipv4.c new file mode 100644 index 000000000..076aadda0 --- /dev/null +++ b/kernel/net/ipv4/netfilter/nf_log_ipv4.c @@ -0,0 +1,397 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static struct nf_loginfo default_loginfo = { + .type = NF_LOG_TYPE_LOG, + .u = { + .log = { + .level = LOGLEVEL_NOTICE, + .logflags = NF_LOG_MASK, + }, + }, +}; + +/* One level of recursion won't kill us */ +static void dump_ipv4_packet(struct nf_log_buf *m, + const struct nf_loginfo *info, + const struct sk_buff *skb, unsigned int iphoff) +{ + struct iphdr _iph; + const struct iphdr *ih; + unsigned int logflags; + + if (info->type == NF_LOG_TYPE_LOG) + logflags = info->u.log.logflags; + else + logflags = NF_LOG_MASK; + + ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph); + if (ih == NULL) { + nf_log_buf_add(m, "TRUNCATED"); + return; + } + + /* Important fields: + * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */ + /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */ + nf_log_buf_add(m, "SRC=%pI4 DST=%pI4 ", &ih->saddr, &ih->daddr); + + /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */ + nf_log_buf_add(m, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ", + ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK, + ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id)); + + /* Max length: 6 "CE DF MF " */ + if (ntohs(ih->frag_off) & IP_CE) + nf_log_buf_add(m, "CE "); + if (ntohs(ih->frag_off) & IP_DF) + nf_log_buf_add(m, "DF "); + if (ntohs(ih->frag_off) & IP_MF) + nf_log_buf_add(m, "MF "); + + /* Max length: 11 "FRAG:65535 " */ + if (ntohs(ih->frag_off) & IP_OFFSET) + nf_log_buf_add(m, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET); + + if ((logflags & XT_LOG_IPOPT) && + ih->ihl * 4 > sizeof(struct iphdr)) { + const unsigned char *op; + unsigned char _opt[4 * 15 - sizeof(struct iphdr)]; + unsigned int i, optsize; + + optsize = ih->ihl * 4 - sizeof(struct iphdr); + op = skb_header_pointer(skb, iphoff+sizeof(_iph), + optsize, _opt); + if (op == NULL) { + nf_log_buf_add(m, "TRUNCATED"); + return; + } + + /* Max length: 127 "OPT (" 15*4*2chars ") " */ + nf_log_buf_add(m, "OPT ("); + for (i = 0; i < optsize; i++) + nf_log_buf_add(m, "%02X", op[i]); + nf_log_buf_add(m, ") "); + } + + switch (ih->protocol) { + case IPPROTO_TCP: + if (nf_log_dump_tcp_header(m, skb, ih->protocol, + ntohs(ih->frag_off) & IP_OFFSET, + iphoff+ih->ihl*4, logflags)) + return; + break; + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + if (nf_log_dump_udp_header(m, skb, ih->protocol, + ntohs(ih->frag_off) & IP_OFFSET, + iphoff+ih->ihl*4)) + return; + break; + case IPPROTO_ICMP: { + struct icmphdr _icmph; + const struct icmphdr *ich; + static const size_t required_len[NR_ICMP_TYPES+1] + = { [ICMP_ECHOREPLY] = 4, + [ICMP_DEST_UNREACH] + = 8 + sizeof(struct iphdr), + [ICMP_SOURCE_QUENCH] + = 8 + sizeof(struct iphdr), + [ICMP_REDIRECT] + = 8 + sizeof(struct iphdr), + [ICMP_ECHO] = 4, + [ICMP_TIME_EXCEEDED] + = 8 + sizeof(struct iphdr), + [ICMP_PARAMETERPROB] + = 8 + sizeof(struct iphdr), + [ICMP_TIMESTAMP] = 20, + [ICMP_TIMESTAMPREPLY] = 20, + [ICMP_ADDRESS] = 12, + [ICMP_ADDRESSREPLY] = 12 }; + + /* Max length: 11 "PROTO=ICMP " */ + nf_log_buf_add(m, "PROTO=ICMP "); + + if (ntohs(ih->frag_off) & IP_OFFSET) + break; + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + ich = skb_header_pointer(skb, iphoff + ih->ihl * 4, + sizeof(_icmph), &_icmph); + if (ich == NULL) { + nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", + skb->len - iphoff - ih->ihl*4); + break; + } + + /* Max length: 18 "TYPE=255 CODE=255 " */ + nf_log_buf_add(m, "TYPE=%u CODE=%u ", ich->type, ich->code); + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + if (ich->type <= NR_ICMP_TYPES && + required_len[ich->type] && + skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) { + nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", + skb->len - iphoff - ih->ihl*4); + break; + } + + switch (ich->type) { + case ICMP_ECHOREPLY: + case ICMP_ECHO: + /* Max length: 19 "ID=65535 SEQ=65535 " */ + nf_log_buf_add(m, "ID=%u SEQ=%u ", + ntohs(ich->un.echo.id), + ntohs(ich->un.echo.sequence)); + break; + + case ICMP_PARAMETERPROB: + /* Max length: 14 "PARAMETER=255 " */ + nf_log_buf_add(m, "PARAMETER=%u ", + ntohl(ich->un.gateway) >> 24); + break; + case ICMP_REDIRECT: + /* Max length: 24 "GATEWAY=255.255.255.255 " */ + nf_log_buf_add(m, "GATEWAY=%pI4 ", &ich->un.gateway); + /* Fall through */ + case ICMP_DEST_UNREACH: + case ICMP_SOURCE_QUENCH: + case ICMP_TIME_EXCEEDED: + /* Max length: 3+maxlen */ + if (!iphoff) { /* Only recurse once. */ + nf_log_buf_add(m, "["); + dump_ipv4_packet(m, info, skb, + iphoff + ih->ihl*4+sizeof(_icmph)); + nf_log_buf_add(m, "] "); + } + + /* Max length: 10 "MTU=65535 " */ + if (ich->type == ICMP_DEST_UNREACH && + ich->code == ICMP_FRAG_NEEDED) { + nf_log_buf_add(m, "MTU=%u ", + ntohs(ich->un.frag.mtu)); + } + } + break; + } + /* Max Length */ + case IPPROTO_AH: { + struct ip_auth_hdr _ahdr; + const struct ip_auth_hdr *ah; + + if (ntohs(ih->frag_off) & IP_OFFSET) + break; + + /* Max length: 9 "PROTO=AH " */ + nf_log_buf_add(m, "PROTO=AH "); + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + ah = skb_header_pointer(skb, iphoff+ih->ihl*4, + sizeof(_ahdr), &_ahdr); + if (ah == NULL) { + nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", + skb->len - iphoff - ih->ihl*4); + break; + } + + /* Length: 15 "SPI=0xF1234567 " */ + nf_log_buf_add(m, "SPI=0x%x ", ntohl(ah->spi)); + break; + } + case IPPROTO_ESP: { + struct ip_esp_hdr _esph; + const struct ip_esp_hdr *eh; + + /* Max length: 10 "PROTO=ESP " */ + nf_log_buf_add(m, "PROTO=ESP "); + + if (ntohs(ih->frag_off) & IP_OFFSET) + break; + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + eh = skb_header_pointer(skb, iphoff+ih->ihl*4, + sizeof(_esph), &_esph); + if (eh == NULL) { + nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", + skb->len - iphoff - ih->ihl*4); + break; + } + + /* Length: 15 "SPI=0xF1234567 " */ + nf_log_buf_add(m, "SPI=0x%x ", ntohl(eh->spi)); + break; + } + /* Max length: 10 "PROTO 255 " */ + default: + nf_log_buf_add(m, "PROTO=%u ", ih->protocol); + } + + /* Max length: 15 "UID=4294967295 " */ + if ((logflags & XT_LOG_UID) && !iphoff) + nf_log_dump_sk_uid_gid(m, skb->sk); + + /* Max length: 16 "MARK=0xFFFFFFFF " */ + if (!iphoff && skb->mark) + nf_log_buf_add(m, "MARK=0x%x ", skb->mark); + + /* Proto Max log string length */ + /* IP: 40+46+6+11+127 = 230 */ + /* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */ + /* UDP: 10+max(25,20) = 35 */ + /* UDPLITE: 14+max(25,20) = 39 */ + /* ICMP: 11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */ + /* ESP: 10+max(25)+15 = 50 */ + /* AH: 9+max(25)+15 = 49 */ + /* unknown: 10 */ + + /* (ICMP allows recursion one level deep) */ + /* maxlen = IP + ICMP + IP + max(TCP,UDP,ICMP,unknown) */ + /* maxlen = 230+ 91 + 230 + 252 = 803 */ +} + +static void dump_ipv4_mac_header(struct nf_log_buf *m, + const struct nf_loginfo *info, + const struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + unsigned int logflags = 0; + + if (info->type == NF_LOG_TYPE_LOG) + logflags = info->u.log.logflags; + + if (!(logflags & XT_LOG_MACDECODE)) + goto fallback; + + switch (dev->type) { + case ARPHRD_ETHER: + nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ", + eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, + ntohs(eth_hdr(skb)->h_proto)); + return; + default: + break; + } + +fallback: + nf_log_buf_add(m, "MAC="); + if (dev->hard_header_len && + skb->mac_header != skb->network_header) { + const unsigned char *p = skb_mac_header(skb); + unsigned int i; + + nf_log_buf_add(m, "%02x", *p++); + for (i = 1; i < dev->hard_header_len; i++, p++) + nf_log_buf_add(m, ":%02x", *p); + } + nf_log_buf_add(m, " "); +} + +static void nf_log_ip_packet(struct net *net, u_int8_t pf, + unsigned int hooknum, const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, + const char *prefix) +{ + struct nf_log_buf *m; + + /* FIXME: Disabled from containers until syslog ns is supported */ + if (!net_eq(net, &init_net)) + return; + + m = nf_log_buf_open(); + + if (!loginfo) + loginfo = &default_loginfo; + + nf_log_dump_packet_common(m, pf, hooknum, skb, in, + out, loginfo, prefix); + + if (in != NULL) + dump_ipv4_mac_header(m, loginfo, skb); + + dump_ipv4_packet(m, loginfo, skb, 0); + + nf_log_buf_close(m); +} + +static struct nf_logger nf_ip_logger __read_mostly = { + .name = "nf_log_ipv4", + .type = NF_LOG_TYPE_LOG, + .logfn = nf_log_ip_packet, + .me = THIS_MODULE, +}; + +static int __net_init nf_log_ipv4_net_init(struct net *net) +{ + nf_log_set(net, NFPROTO_IPV4, &nf_ip_logger); + return 0; +} + +static void __net_exit nf_log_ipv4_net_exit(struct net *net) +{ + nf_log_unset(net, &nf_ip_logger); +} + +static struct pernet_operations nf_log_ipv4_net_ops = { + .init = nf_log_ipv4_net_init, + .exit = nf_log_ipv4_net_exit, +}; + +static int __init nf_log_ipv4_init(void) +{ + int ret; + + ret = register_pernet_subsys(&nf_log_ipv4_net_ops); + if (ret < 0) + return ret; + + ret = nf_log_register(NFPROTO_IPV4, &nf_ip_logger); + if (ret < 0) { + pr_err("failed to register logger\n"); + goto err1; + } + + return 0; + +err1: + unregister_pernet_subsys(&nf_log_ipv4_net_ops); + return ret; +} + +static void __exit nf_log_ipv4_exit(void) +{ + unregister_pernet_subsys(&nf_log_ipv4_net_ops); + nf_log_unregister(&nf_ip_logger); +} + +module_init(nf_log_ipv4_init); +module_exit(nf_log_ipv4_exit); + +MODULE_AUTHOR("Netfilter Core Team "); +MODULE_DESCRIPTION("Netfilter IPv4 packet logging"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NF_LOGGER(AF_INET, 0); diff --git a/kernel/net/ipv4/netfilter/nf_nat_h323.c b/kernel/net/ipv4/netfilter/nf_nat_h323.c new file mode 100644 index 000000000..574f7ebba --- /dev/null +++ b/kernel/net/ipv4/netfilter/nf_nat_h323.c @@ -0,0 +1,631 @@ +/* + * H.323 extension for NAT alteration. + * + * Copyright (c) 2006 Jing Min Zhao + * Copyright (c) 2006-2012 Patrick McHardy + * + * This source code is licensed under General Public License version 2. + * + * Based on the 'brute force' H.323 NAT module by + * Jozsef Kadlecsik + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +/****************************************************************************/ +static int set_addr(struct sk_buff *skb, unsigned int protoff, + unsigned char **data, int dataoff, + unsigned int addroff, __be32 ip, __be16 port) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct { + __be32 ip; + __be16 port; + } __attribute__ ((__packed__)) buf; + const struct tcphdr *th; + struct tcphdr _tcph; + + buf.ip = ip; + buf.port = port; + addroff += dataoff; + + if (ip_hdr(skb)->protocol == IPPROTO_TCP) { + if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, + protoff, addroff, sizeof(buf), + (char *) &buf, sizeof(buf))) { + net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_tcp_packet error\n"); + return -1; + } + + /* Relocate data pointer */ + th = skb_header_pointer(skb, ip_hdrlen(skb), + sizeof(_tcph), &_tcph); + if (th == NULL) + return -1; + *data = skb->data + ip_hdrlen(skb) + th->doff * 4 + dataoff; + } else { + if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, + protoff, addroff, sizeof(buf), + (char *) &buf, sizeof(buf))) { + net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_udp_packet error\n"); + return -1; + } + /* nf_nat_mangle_udp_packet uses skb_make_writable() to copy + * or pull everything in a linear buffer, so we can safely + * use the skb pointers now */ + *data = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr); + } + + return 0; +} + +/****************************************************************************/ +static int set_h225_addr(struct sk_buff *skb, unsigned int protoff, + unsigned char **data, int dataoff, + TransportAddress *taddr, + union nf_inet_addr *addr, __be16 port) +{ + return set_addr(skb, protoff, data, dataoff, taddr->ipAddress.ip, + addr->ip, port); +} + +/****************************************************************************/ +static int set_h245_addr(struct sk_buff *skb, unsigned protoff, + unsigned char **data, int dataoff, + H245_TransportAddress *taddr, + union nf_inet_addr *addr, __be16 port) +{ + return set_addr(skb, protoff, data, dataoff, + taddr->unicastAddress.iPAddress.network, + addr->ip, port); +} + +/****************************************************************************/ +static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, + TransportAddress *taddr, int count) +{ + const struct nf_ct_h323_master *info = nfct_help_data(ct); + int dir = CTINFO2DIR(ctinfo); + int i; + __be16 port; + union nf_inet_addr addr; + + for (i = 0; i < count; i++) { + if (get_h225_addr(ct, *data, &taddr[i], &addr, &port)) { + if (addr.ip == ct->tuplehash[dir].tuple.src.u3.ip && + port == info->sig_port[dir]) { + /* GW->GK */ + + /* Fix for Gnomemeeting */ + if (i > 0 && + get_h225_addr(ct, *data, &taddr[0], + &addr, &port) && + (ntohl(addr.ip) & 0xff000000) == 0x7f000000) + i = 0; + + pr_debug("nf_nat_ras: set signal address %pI4:%hu->%pI4:%hu\n", + &addr.ip, port, + &ct->tuplehash[!dir].tuple.dst.u3.ip, + info->sig_port[!dir]); + return set_h225_addr(skb, protoff, data, 0, + &taddr[i], + &ct->tuplehash[!dir]. + tuple.dst.u3, + info->sig_port[!dir]); + } else if (addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip && + port == info->sig_port[dir]) { + /* GK->GW */ + pr_debug("nf_nat_ras: set signal address %pI4:%hu->%pI4:%hu\n", + &addr.ip, port, + &ct->tuplehash[!dir].tuple.src.u3.ip, + info->sig_port[!dir]); + return set_h225_addr(skb, protoff, data, 0, + &taddr[i], + &ct->tuplehash[!dir]. + tuple.src.u3, + info->sig_port[!dir]); + } + } + } + + return 0; +} + +/****************************************************************************/ +static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, + TransportAddress *taddr, int count) +{ + int dir = CTINFO2DIR(ctinfo); + int i; + __be16 port; + union nf_inet_addr addr; + + for (i = 0; i < count; i++) { + if (get_h225_addr(ct, *data, &taddr[i], &addr, &port) && + addr.ip == ct->tuplehash[dir].tuple.src.u3.ip && + port == ct->tuplehash[dir].tuple.src.u.udp.port) { + pr_debug("nf_nat_ras: set rasAddress %pI4:%hu->%pI4:%hu\n", + &addr.ip, ntohs(port), + &ct->tuplehash[!dir].tuple.dst.u3.ip, + ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port)); + return set_h225_addr(skb, protoff, data, 0, &taddr[i], + &ct->tuplehash[!dir].tuple.dst.u3, + ct->tuplehash[!dir].tuple. + dst.u.udp.port); + } + } + + return 0; +} + +/****************************************************************************/ +static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, + H245_TransportAddress *taddr, + __be16 port, __be16 rtp_port, + struct nf_conntrack_expect *rtp_exp, + struct nf_conntrack_expect *rtcp_exp) +{ + struct nf_ct_h323_master *info = nfct_help_data(ct); + int dir = CTINFO2DIR(ctinfo); + int i; + u_int16_t nated_port; + + /* Set expectations for NAT */ + rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port; + rtp_exp->expectfn = nf_nat_follow_master; + rtp_exp->dir = !dir; + rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port; + rtcp_exp->expectfn = nf_nat_follow_master; + rtcp_exp->dir = !dir; + + /* Lookup existing expects */ + for (i = 0; i < H323_RTP_CHANNEL_MAX; i++) { + if (info->rtp_port[i][dir] == rtp_port) { + /* Expected */ + + /* Use allocated ports first. This will refresh + * the expects */ + rtp_exp->tuple.dst.u.udp.port = info->rtp_port[i][dir]; + rtcp_exp->tuple.dst.u.udp.port = + htons(ntohs(info->rtp_port[i][dir]) + 1); + break; + } else if (info->rtp_port[i][dir] == 0) { + /* Not expected */ + break; + } + } + + /* Run out of expectations */ + if (i >= H323_RTP_CHANNEL_MAX) { + net_notice_ratelimited("nf_nat_h323: out of expectations\n"); + return 0; + } + + /* Try to get a pair of ports. */ + for (nated_port = ntohs(rtp_exp->tuple.dst.u.udp.port); + nated_port != 0; nated_port += 2) { + int ret; + + rtp_exp->tuple.dst.u.udp.port = htons(nated_port); + ret = nf_ct_expect_related(rtp_exp); + if (ret == 0) { + rtcp_exp->tuple.dst.u.udp.port = + htons(nated_port + 1); + ret = nf_ct_expect_related(rtcp_exp); + if (ret == 0) + break; + else if (ret == -EBUSY) { + nf_ct_unexpect_related(rtp_exp); + continue; + } else if (ret < 0) { + nf_ct_unexpect_related(rtp_exp); + nated_port = 0; + break; + } + } else if (ret != -EBUSY) { + nated_port = 0; + break; + } + } + + if (nated_port == 0) { /* No port available */ + net_notice_ratelimited("nf_nat_h323: out of RTP ports\n"); + return 0; + } + + /* Modify signal */ + if (set_h245_addr(skb, protoff, data, dataoff, taddr, + &ct->tuplehash[!dir].tuple.dst.u3, + htons((port & htons(1)) ? nated_port + 1 : + nated_port)) == 0) { + /* Save ports */ + info->rtp_port[i][dir] = rtp_port; + info->rtp_port[i][!dir] = htons(nated_port); + } else { + nf_ct_unexpect_related(rtp_exp); + nf_ct_unexpect_related(rtcp_exp); + return -1; + } + + /* Success */ + pr_debug("nf_nat_h323: expect RTP %pI4:%hu->%pI4:%hu\n", + &rtp_exp->tuple.src.u3.ip, + ntohs(rtp_exp->tuple.src.u.udp.port), + &rtp_exp->tuple.dst.u3.ip, + ntohs(rtp_exp->tuple.dst.u.udp.port)); + pr_debug("nf_nat_h323: expect RTCP %pI4:%hu->%pI4:%hu\n", + &rtcp_exp->tuple.src.u3.ip, + ntohs(rtcp_exp->tuple.src.u.udp.port), + &rtcp_exp->tuple.dst.u3.ip, + ntohs(rtcp_exp->tuple.dst.u.udp.port)); + + return 0; +} + +/****************************************************************************/ +static int nat_t120(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, + H245_TransportAddress *taddr, __be16 port, + struct nf_conntrack_expect *exp) +{ + int dir = CTINFO2DIR(ctinfo); + u_int16_t nated_port = ntohs(port); + + /* Set expectations for NAT */ + exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; + exp->expectfn = nf_nat_follow_master; + exp->dir = !dir; + + /* Try to get same port: if not, try to change it. */ + for (; nated_port != 0; nated_port++) { + int ret; + + exp->tuple.dst.u.tcp.port = htons(nated_port); + ret = nf_ct_expect_related(exp); + if (ret == 0) + break; + else if (ret != -EBUSY) { + nated_port = 0; + break; + } + } + + if (nated_port == 0) { /* No port available */ + net_notice_ratelimited("nf_nat_h323: out of TCP ports\n"); + return 0; + } + + /* Modify signal */ + if (set_h245_addr(skb, protoff, data, dataoff, taddr, + &ct->tuplehash[!dir].tuple.dst.u3, + htons(nated_port)) < 0) { + nf_ct_unexpect_related(exp); + return -1; + } + + pr_debug("nf_nat_h323: expect T.120 %pI4:%hu->%pI4:%hu\n", + &exp->tuple.src.u3.ip, + ntohs(exp->tuple.src.u.tcp.port), + &exp->tuple.dst.u3.ip, + ntohs(exp->tuple.dst.u.tcp.port)); + + return 0; +} + +/****************************************************************************/ +static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, + TransportAddress *taddr, __be16 port, + struct nf_conntrack_expect *exp) +{ + struct nf_ct_h323_master *info = nfct_help_data(ct); + int dir = CTINFO2DIR(ctinfo); + u_int16_t nated_port = ntohs(port); + + /* Set expectations for NAT */ + exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; + exp->expectfn = nf_nat_follow_master; + exp->dir = !dir; + + /* Check existing expects */ + if (info->sig_port[dir] == port) + nated_port = ntohs(info->sig_port[!dir]); + + /* Try to get same port: if not, try to change it. */ + for (; nated_port != 0; nated_port++) { + int ret; + + exp->tuple.dst.u.tcp.port = htons(nated_port); + ret = nf_ct_expect_related(exp); + if (ret == 0) + break; + else if (ret != -EBUSY) { + nated_port = 0; + break; + } + } + + if (nated_port == 0) { /* No port available */ + net_notice_ratelimited("nf_nat_q931: out of TCP ports\n"); + return 0; + } + + /* Modify signal */ + if (set_h225_addr(skb, protoff, data, dataoff, taddr, + &ct->tuplehash[!dir].tuple.dst.u3, + htons(nated_port)) == 0) { + /* Save ports */ + info->sig_port[dir] = port; + info->sig_port[!dir] = htons(nated_port); + } else { + nf_ct_unexpect_related(exp); + return -1; + } + + pr_debug("nf_nat_q931: expect H.245 %pI4:%hu->%pI4:%hu\n", + &exp->tuple.src.u3.ip, + ntohs(exp->tuple.src.u.tcp.port), + &exp->tuple.dst.u3.ip, + ntohs(exp->tuple.dst.u.tcp.port)); + + return 0; +} + +/**************************************************************************** + * This conntrack expect function replaces nf_conntrack_q931_expect() + * which was set by nf_conntrack_h323.c. + ****************************************************************************/ +static void ip_nat_q931_expect(struct nf_conn *new, + struct nf_conntrack_expect *this) +{ + struct nf_nat_range range; + + if (this->tuple.src.u3.ip != 0) { /* Only accept calls from GK */ + nf_nat_follow_master(new, this); + return; + } + + /* This must be a fresh one. */ + BUG_ON(new->status & IPS_NAT_DONE_MASK); + + /* Change src to where master sends to */ + range.flags = NF_NAT_RANGE_MAP_IPS; + range.min_addr = range.max_addr = + new->tuplehash[!this->dir].tuple.src.u3; + nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC); + + /* For DST manip, map port here to where it's expected. */ + range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); + range.min_proto = range.max_proto = this->saved_proto; + range.min_addr = range.max_addr = + new->master->tuplehash[!this->dir].tuple.src.u3; + nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST); +} + +/****************************************************************************/ +static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, + TransportAddress *taddr, int idx, + __be16 port, struct nf_conntrack_expect *exp) +{ + struct nf_ct_h323_master *info = nfct_help_data(ct); + int dir = CTINFO2DIR(ctinfo); + u_int16_t nated_port = ntohs(port); + union nf_inet_addr addr; + + /* Set expectations for NAT */ + exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; + exp->expectfn = ip_nat_q931_expect; + exp->dir = !dir; + + /* Check existing expects */ + if (info->sig_port[dir] == port) + nated_port = ntohs(info->sig_port[!dir]); + + /* Try to get same port: if not, try to change it. */ + for (; nated_port != 0; nated_port++) { + int ret; + + exp->tuple.dst.u.tcp.port = htons(nated_port); + ret = nf_ct_expect_related(exp); + if (ret == 0) + break; + else if (ret != -EBUSY) { + nated_port = 0; + break; + } + } + + if (nated_port == 0) { /* No port available */ + net_notice_ratelimited("nf_nat_ras: out of TCP ports\n"); + return 0; + } + + /* Modify signal */ + if (set_h225_addr(skb, protoff, data, 0, &taddr[idx], + &ct->tuplehash[!dir].tuple.dst.u3, + htons(nated_port)) == 0) { + /* Save ports */ + info->sig_port[dir] = port; + info->sig_port[!dir] = htons(nated_port); + + /* Fix for Gnomemeeting */ + if (idx > 0 && + get_h225_addr(ct, *data, &taddr[0], &addr, &port) && + (ntohl(addr.ip) & 0xff000000) == 0x7f000000) { + set_h225_addr(skb, protoff, data, 0, &taddr[0], + &ct->tuplehash[!dir].tuple.dst.u3, + info->sig_port[!dir]); + } + } else { + nf_ct_unexpect_related(exp); + return -1; + } + + /* Success */ + pr_debug("nf_nat_ras: expect Q.931 %pI4:%hu->%pI4:%hu\n", + &exp->tuple.src.u3.ip, + ntohs(exp->tuple.src.u.tcp.port), + &exp->tuple.dst.u3.ip, + ntohs(exp->tuple.dst.u.tcp.port)); + + return 0; +} + +/****************************************************************************/ +static void ip_nat_callforwarding_expect(struct nf_conn *new, + struct nf_conntrack_expect *this) +{ + struct nf_nat_range range; + + /* This must be a fresh one. */ + BUG_ON(new->status & IPS_NAT_DONE_MASK); + + /* Change src to where master sends to */ + range.flags = NF_NAT_RANGE_MAP_IPS; + range.min_addr = range.max_addr = + new->tuplehash[!this->dir].tuple.src.u3; + nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC); + + /* For DST manip, map port here to where it's expected. */ + range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); + range.min_proto = range.max_proto = this->saved_proto; + range.min_addr = range.max_addr = this->saved_addr; + nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST); +} + +/****************************************************************************/ +static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + unsigned char **data, int dataoff, + TransportAddress *taddr, __be16 port, + struct nf_conntrack_expect *exp) +{ + int dir = CTINFO2DIR(ctinfo); + u_int16_t nated_port; + + /* Set expectations for NAT */ + exp->saved_addr = exp->tuple.dst.u3; + exp->tuple.dst.u3.ip = ct->tuplehash[!dir].tuple.dst.u3.ip; + exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; + exp->expectfn = ip_nat_callforwarding_expect; + exp->dir = !dir; + + /* Try to get same port: if not, try to change it. */ + for (nated_port = ntohs(port); nated_port != 0; nated_port++) { + int ret; + + exp->tuple.dst.u.tcp.port = htons(nated_port); + ret = nf_ct_expect_related(exp); + if (ret == 0) + break; + else if (ret != -EBUSY) { + nated_port = 0; + break; + } + } + + if (nated_port == 0) { /* No port available */ + net_notice_ratelimited("nf_nat_q931: out of TCP ports\n"); + return 0; + } + + /* Modify signal */ + if (!set_h225_addr(skb, protoff, data, dataoff, taddr, + &ct->tuplehash[!dir].tuple.dst.u3, + htons(nated_port)) == 0) { + nf_ct_unexpect_related(exp); + return -1; + } + + /* Success */ + pr_debug("nf_nat_q931: expect Call Forwarding %pI4:%hu->%pI4:%hu\n", + &exp->tuple.src.u3.ip, + ntohs(exp->tuple.src.u.tcp.port), + &exp->tuple.dst.u3.ip, + ntohs(exp->tuple.dst.u.tcp.port)); + + return 0; +} + +static struct nf_ct_helper_expectfn q931_nat = { + .name = "Q.931", + .expectfn = ip_nat_q931_expect, +}; + +static struct nf_ct_helper_expectfn callforwarding_nat = { + .name = "callforwarding", + .expectfn = ip_nat_callforwarding_expect, +}; + +/****************************************************************************/ +static int __init init(void) +{ + BUG_ON(set_h245_addr_hook != NULL); + BUG_ON(set_h225_addr_hook != NULL); + BUG_ON(set_sig_addr_hook != NULL); + BUG_ON(set_ras_addr_hook != NULL); + BUG_ON(nat_rtp_rtcp_hook != NULL); + BUG_ON(nat_t120_hook != NULL); + BUG_ON(nat_h245_hook != NULL); + BUG_ON(nat_callforwarding_hook != NULL); + BUG_ON(nat_q931_hook != NULL); + + RCU_INIT_POINTER(set_h245_addr_hook, set_h245_addr); + RCU_INIT_POINTER(set_h225_addr_hook, set_h225_addr); + RCU_INIT_POINTER(set_sig_addr_hook, set_sig_addr); + RCU_INIT_POINTER(set_ras_addr_hook, set_ras_addr); + RCU_INIT_POINTER(nat_rtp_rtcp_hook, nat_rtp_rtcp); + RCU_INIT_POINTER(nat_t120_hook, nat_t120); + RCU_INIT_POINTER(nat_h245_hook, nat_h245); + RCU_INIT_POINTER(nat_callforwarding_hook, nat_callforwarding); + RCU_INIT_POINTER(nat_q931_hook, nat_q931); + nf_ct_helper_expectfn_register(&q931_nat); + nf_ct_helper_expectfn_register(&callforwarding_nat); + return 0; +} + +/****************************************************************************/ +static void __exit fini(void) +{ + RCU_INIT_POINTER(set_h245_addr_hook, NULL); + RCU_INIT_POINTER(set_h225_addr_hook, NULL); + RCU_INIT_POINTER(set_sig_addr_hook, NULL); + RCU_INIT_POINTER(set_ras_addr_hook, NULL); + RCU_INIT_POINTER(nat_rtp_rtcp_hook, NULL); + RCU_INIT_POINTER(nat_t120_hook, NULL); + RCU_INIT_POINTER(nat_h245_hook, NULL); + RCU_INIT_POINTER(nat_callforwarding_hook, NULL); + RCU_INIT_POINTER(nat_q931_hook, NULL); + nf_ct_helper_expectfn_unregister(&q931_nat); + nf_ct_helper_expectfn_unregister(&callforwarding_nat); + synchronize_rcu(); +} + +/****************************************************************************/ +module_init(init); +module_exit(fini); + +MODULE_AUTHOR("Jing Min Zhao "); +MODULE_DESCRIPTION("H.323 NAT helper"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ip_nat_h323"); diff --git a/kernel/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/kernel/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c new file mode 100644 index 000000000..e59cc05c0 --- /dev/null +++ b/kernel/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -0,0 +1,481 @@ +/* + * (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team + * (C) 2011 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +static const struct nf_nat_l3proto nf_nat_l3proto_ipv4; + +#ifdef CONFIG_XFRM +static void nf_nat_ipv4_decode_session(struct sk_buff *skb, + const struct nf_conn *ct, + enum ip_conntrack_dir dir, + unsigned long statusbit, + struct flowi *fl) +{ + const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple; + struct flowi4 *fl4 = &fl->u.ip4; + + if (ct->status & statusbit) { + fl4->daddr = t->dst.u3.ip; + if (t->dst.protonum == IPPROTO_TCP || + t->dst.protonum == IPPROTO_UDP || + t->dst.protonum == IPPROTO_UDPLITE || + t->dst.protonum == IPPROTO_DCCP || + t->dst.protonum == IPPROTO_SCTP) + fl4->fl4_dport = t->dst.u.all; + } + + statusbit ^= IPS_NAT_MASK; + + if (ct->status & statusbit) { + fl4->saddr = t->src.u3.ip; + if (t->dst.protonum == IPPROTO_TCP || + t->dst.protonum == IPPROTO_UDP || + t->dst.protonum == IPPROTO_UDPLITE || + t->dst.protonum == IPPROTO_DCCP || + t->dst.protonum == IPPROTO_SCTP) + fl4->fl4_sport = t->src.u.all; + } +} +#endif /* CONFIG_XFRM */ + +static bool nf_nat_ipv4_in_range(const struct nf_conntrack_tuple *t, + const struct nf_nat_range *range) +{ + return ntohl(t->src.u3.ip) >= ntohl(range->min_addr.ip) && + ntohl(t->src.u3.ip) <= ntohl(range->max_addr.ip); +} + +static u32 nf_nat_ipv4_secure_port(const struct nf_conntrack_tuple *t, + __be16 dport) +{ + return secure_ipv4_port_ephemeral(t->src.u3.ip, t->dst.u3.ip, dport); +} + +static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb, + unsigned int iphdroff, + const struct nf_nat_l4proto *l4proto, + const struct nf_conntrack_tuple *target, + enum nf_nat_manip_type maniptype) +{ + struct iphdr *iph; + unsigned int hdroff; + + if (!skb_make_writable(skb, iphdroff + sizeof(*iph))) + return false; + + iph = (void *)skb->data + iphdroff; + hdroff = iphdroff + iph->ihl * 4; + + if (!l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv4, iphdroff, hdroff, + target, maniptype)) + return false; + iph = (void *)skb->data + iphdroff; + + if (maniptype == NF_NAT_MANIP_SRC) { + csum_replace4(&iph->check, iph->saddr, target->src.u3.ip); + iph->saddr = target->src.u3.ip; + } else { + csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip); + iph->daddr = target->dst.u3.ip; + } + return true; +} + +static void nf_nat_ipv4_csum_update(struct sk_buff *skb, + unsigned int iphdroff, __sum16 *check, + const struct nf_conntrack_tuple *t, + enum nf_nat_manip_type maniptype) +{ + struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); + __be32 oldip, newip; + + if (maniptype == NF_NAT_MANIP_SRC) { + oldip = iph->saddr; + newip = t->src.u3.ip; + } else { + oldip = iph->daddr; + newip = t->dst.u3.ip; + } + inet_proto_csum_replace4(check, skb, oldip, newip, 1); +} + +static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb, + u8 proto, void *data, __sum16 *check, + int datalen, int oldlen) +{ + const struct iphdr *iph = ip_hdr(skb); + struct rtable *rt = skb_rtable(skb); + + if (skb->ip_summed != CHECKSUM_PARTIAL) { + if (!(rt->rt_flags & RTCF_LOCAL) && + (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) { + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_headroom(skb) + + skb_network_offset(skb) + + ip_hdrlen(skb); + skb->csum_offset = (void *)check - data; + *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, + datalen, proto, 0); + } else { + *check = 0; + *check = csum_tcpudp_magic(iph->saddr, iph->daddr, + datalen, proto, + csum_partial(data, datalen, + 0)); + if (proto == IPPROTO_UDP && !*check) + *check = CSUM_MANGLED_0; + } + } else + inet_proto_csum_replace2(check, skb, + htons(oldlen), htons(datalen), 1); +} + +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) +static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[], + struct nf_nat_range *range) +{ + if (tb[CTA_NAT_V4_MINIP]) { + range->min_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MINIP]); + range->flags |= NF_NAT_RANGE_MAP_IPS; + } + + if (tb[CTA_NAT_V4_MAXIP]) + range->max_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MAXIP]); + else + range->max_addr.ip = range->min_addr.ip; + + return 0; +} +#endif + +static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = { + .l3proto = NFPROTO_IPV4, + .in_range = nf_nat_ipv4_in_range, + .secure_port = nf_nat_ipv4_secure_port, + .manip_pkt = nf_nat_ipv4_manip_pkt, + .csum_update = nf_nat_ipv4_csum_update, + .csum_recalc = nf_nat_ipv4_csum_recalc, +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) + .nlattr_to_range = nf_nat_ipv4_nlattr_to_range, +#endif +#ifdef CONFIG_XFRM + .decode_session = nf_nat_ipv4_decode_session, +#endif +}; + +int nf_nat_icmp_reply_translation(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum) +{ + struct { + struct icmphdr icmp; + struct iphdr ip; + } *inside; + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); + unsigned int hdrlen = ip_hdrlen(skb); + const struct nf_nat_l4proto *l4proto; + struct nf_conntrack_tuple target; + unsigned long statusbit; + + NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY); + + if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) + return 0; + if (nf_ip_checksum(skb, hooknum, hdrlen, 0)) + return 0; + + inside = (void *)skb->data + hdrlen; + if (inside->icmp.type == ICMP_REDIRECT) { + if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK) + return 0; + if (ct->status & IPS_NAT_MASK) + return 0; + } + + if (manip == NF_NAT_MANIP_SRC) + statusbit = IPS_SRC_NAT; + else + statusbit = IPS_DST_NAT; + + /* Invert if this is reply direction */ + if (dir == IP_CT_DIR_REPLY) + statusbit ^= IPS_NAT_MASK; + + if (!(ct->status & statusbit)) + return 1; + + l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, inside->ip.protocol); + if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp), + l4proto, &ct->tuplehash[!dir].tuple, !manip)) + return 0; + + if (skb->ip_summed != CHECKSUM_PARTIAL) { + /* Reloading "inside" here since manip_pkt may reallocate */ + inside = (void *)skb->data + hdrlen; + inside->icmp.checksum = 0; + inside->icmp.checksum = + csum_fold(skb_checksum(skb, hdrlen, + skb->len - hdrlen, 0)); + } + + /* Change outer to look like the reply to an incoming packet */ + nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); + l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, 0); + if (!nf_nat_ipv4_manip_pkt(skb, 0, l4proto, &target, manip)) + return 0; + + return 1; +} +EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation); + +unsigned int +nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct nf_hook_state *state, + unsigned int (*do_chain)(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state, + struct nf_conn *ct)) +{ + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + struct nf_conn_nat *nat; + /* maniptype == SRC for postrouting. */ + enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); + + /* We never see fragments: conntrack defrags on pre-routing + * and local-out, and nf_nat_out protects post-routing. + */ + NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb))); + + ct = nf_ct_get(skb, &ctinfo); + /* Can't track? It's not due to stress, or conntrack would + * have dropped it. Hence it's the user's responsibilty to + * packet filter it out, or implement conntrack/NAT for that + * protocol. 8) --RR + */ + if (!ct) + return NF_ACCEPT; + + /* Don't try to NAT if this packet is not conntracked */ + if (nf_ct_is_untracked(ct)) + return NF_ACCEPT; + + nat = nf_ct_nat_ext_add(ct); + if (nat == NULL) + return NF_ACCEPT; + + switch (ctinfo) { + case IP_CT_RELATED: + case IP_CT_RELATED_REPLY: + if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { + if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, + ops->hooknum)) + return NF_DROP; + else + return NF_ACCEPT; + } + /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ + case IP_CT_NEW: + /* Seen it before? This can happen for loopback, retrans, + * or local packets. + */ + if (!nf_nat_initialized(ct, maniptype)) { + unsigned int ret; + + ret = do_chain(ops, skb, state, ct); + if (ret != NF_ACCEPT) + return ret; + + if (nf_nat_initialized(ct, HOOK2MANIP(ops->hooknum))) + break; + + ret = nf_nat_alloc_null_binding(ct, ops->hooknum); + if (ret != NF_ACCEPT) + return ret; + } else { + pr_debug("Already setup manip %s for ct %p\n", + maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", + ct); + if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, + state->out)) + goto oif_changed; + } + break; + + default: + /* ESTABLISHED */ + NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || + ctinfo == IP_CT_ESTABLISHED_REPLY); + if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, state->out)) + goto oif_changed; + } + + return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); + +oif_changed: + nf_ct_kill_acct(ct, ctinfo, skb); + return NF_DROP; +} +EXPORT_SYMBOL_GPL(nf_nat_ipv4_fn); + +unsigned int +nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct nf_hook_state *state, + unsigned int (*do_chain)(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state, + struct nf_conn *ct)) +{ + unsigned int ret; + __be32 daddr = ip_hdr(skb)->daddr; + + ret = nf_nat_ipv4_fn(ops, skb, state, do_chain); + if (ret != NF_DROP && ret != NF_STOLEN && + daddr != ip_hdr(skb)->daddr) + skb_dst_drop(skb); + + return ret; +} +EXPORT_SYMBOL_GPL(nf_nat_ipv4_in); + +unsigned int +nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct nf_hook_state *state, + unsigned int (*do_chain)(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state, + struct nf_conn *ct)) +{ +#ifdef CONFIG_XFRM + const struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + int err; +#endif + unsigned int ret; + + /* root is playing with raw sockets. */ + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) + return NF_ACCEPT; + + ret = nf_nat_ipv4_fn(ops, skb, state, do_chain); +#ifdef CONFIG_XFRM + if (ret != NF_DROP && ret != NF_STOLEN && + !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if ((ct->tuplehash[dir].tuple.src.u3.ip != + ct->tuplehash[!dir].tuple.dst.u3.ip) || + (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && + ct->tuplehash[dir].tuple.src.u.all != + ct->tuplehash[!dir].tuple.dst.u.all)) { + err = nf_xfrm_me_harder(skb, AF_INET); + if (err < 0) + ret = NF_DROP_ERR(err); + } + } +#endif + return ret; +} +EXPORT_SYMBOL_GPL(nf_nat_ipv4_out); + +unsigned int +nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct nf_hook_state *state, + unsigned int (*do_chain)(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state, + struct nf_conn *ct)) +{ + const struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + unsigned int ret; + int err; + + /* root is playing with raw sockets. */ + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) + return NF_ACCEPT; + + ret = nf_nat_ipv4_fn(ops, skb, state, do_chain); + if (ret != NF_DROP && ret != NF_STOLEN && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (ct->tuplehash[dir].tuple.dst.u3.ip != + ct->tuplehash[!dir].tuple.src.u3.ip) { + err = ip_route_me_harder(skb, RTN_UNSPEC); + if (err < 0) + ret = NF_DROP_ERR(err); + } +#ifdef CONFIG_XFRM + else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && + ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && + ct->tuplehash[dir].tuple.dst.u.all != + ct->tuplehash[!dir].tuple.src.u.all) { + err = nf_xfrm_me_harder(skb, AF_INET); + if (err < 0) + ret = NF_DROP_ERR(err); + } +#endif + } + return ret; +} +EXPORT_SYMBOL_GPL(nf_nat_ipv4_local_fn); + +static int __init nf_nat_l3proto_ipv4_init(void) +{ + int err; + + err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_icmp); + if (err < 0) + goto err1; + err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv4); + if (err < 0) + goto err2; + return err; + +err2: + nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp); +err1: + return err; +} + +static void __exit nf_nat_l3proto_ipv4_exit(void) +{ + nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv4); + nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp); +} + +MODULE_LICENSE("GPL"); +MODULE_ALIAS("nf-nat-" __stringify(AF_INET)); + +module_init(nf_nat_l3proto_ipv4_init); +module_exit(nf_nat_l3proto_ipv4_exit); diff --git a/kernel/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/kernel/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c new file mode 100644 index 000000000..c6eb42100 --- /dev/null +++ b/kernel/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c @@ -0,0 +1,153 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +unsigned int +nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, + const struct nf_nat_range *range, + const struct net_device *out) +{ + struct nf_conn *ct; + struct nf_conn_nat *nat; + enum ip_conntrack_info ctinfo; + struct nf_nat_range newrange; + const struct rtable *rt; + __be32 newsrc, nh; + + NF_CT_ASSERT(hooknum == NF_INET_POST_ROUTING); + + ct = nf_ct_get(skb, &ctinfo); + nat = nfct_nat(ct); + + NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || + ctinfo == IP_CT_RELATED_REPLY)); + + /* Source address is 0.0.0.0 - locally generated packet that is + * probably not supposed to be masqueraded. + */ + if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0) + return NF_ACCEPT; + + rt = skb_rtable(skb); + nh = rt_nexthop(rt, ip_hdr(skb)->daddr); + newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE); + if (!newsrc) { + pr_info("%s ate my IP address\n", out->name); + return NF_DROP; + } + + nat->masq_index = out->ifindex; + + /* Transfer from original range. */ + memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); + memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); + newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr.ip = newsrc; + newrange.max_addr.ip = newsrc; + newrange.min_proto = range->min_proto; + newrange.max_proto = range->max_proto; + + /* Hand modified range to generic setup. */ + return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); +} +EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4); + +static int device_cmp(struct nf_conn *i, void *ifindex) +{ + const struct nf_conn_nat *nat = nfct_nat(i); + + if (!nat) + return 0; + if (nf_ct_l3num(i) != NFPROTO_IPV4) + return 0; + return nat->masq_index == (int)(long)ifindex; +} + +static int masq_device_event(struct notifier_block *this, + unsigned long event, + void *ptr) +{ + const struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct net *net = dev_net(dev); + + if (event == NETDEV_DOWN) { + /* Device was downed. Search entire table for + * conntracks which were associated with that device, + * and forget them. + */ + NF_CT_ASSERT(dev->ifindex != 0); + + nf_ct_iterate_cleanup(net, device_cmp, + (void *)(long)dev->ifindex, 0, 0); + } + + return NOTIFY_DONE; +} + +static int masq_inet_event(struct notifier_block *this, + unsigned long event, + void *ptr) +{ + struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev; + struct netdev_notifier_info info; + + netdev_notifier_info_init(&info, dev); + return masq_device_event(this, event, &info); +} + +static struct notifier_block masq_dev_notifier = { + .notifier_call = masq_device_event, +}; + +static struct notifier_block masq_inet_notifier = { + .notifier_call = masq_inet_event, +}; + +static atomic_t masquerade_notifier_refcount = ATOMIC_INIT(0); + +void nf_nat_masquerade_ipv4_register_notifier(void) +{ + /* check if the notifier was already set */ + if (atomic_inc_return(&masquerade_notifier_refcount) > 1) + return; + + /* Register for device down reports */ + register_netdevice_notifier(&masq_dev_notifier); + /* Register IP address change reports */ + register_inetaddr_notifier(&masq_inet_notifier); +} +EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_register_notifier); + +void nf_nat_masquerade_ipv4_unregister_notifier(void) +{ + /* check if the notifier still has clients */ + if (atomic_dec_return(&masquerade_notifier_refcount) > 0) + return; + + unregister_netdevice_notifier(&masq_dev_notifier); + unregister_inetaddr_notifier(&masq_inet_notifier); +} +EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_unregister_notifier); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Rusty Russell "); diff --git a/kernel/net/ipv4/netfilter/nf_nat_pptp.c b/kernel/net/ipv4/netfilter/nf_nat_pptp.c new file mode 100644 index 000000000..657d2307f --- /dev/null +++ b/kernel/net/ipv4/netfilter/nf_nat_pptp.c @@ -0,0 +1,311 @@ +/* + * nf_nat_pptp.c + * + * NAT support for PPTP (Point to Point Tunneling Protocol). + * PPTP is a a protocol for creating virtual private networks. + * It is a specification defined by Microsoft and some vendors + * working with Microsoft. PPTP is built on top of a modified + * version of the Internet Generic Routing Encapsulation Protocol. + * GRE is defined in RFC 1701 and RFC 1702. Documentation of + * PPTP can be found in RFC 2637 + * + * (C) 2000-2005 by Harald Welte + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + * + * (C) 2006-2012 Patrick McHardy + * + * TODO: - NAT to a unique tuple, not to TCP source port + * (needs netfilter tuple reservation) + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#define NF_NAT_PPTP_VERSION "3.0" + +#define REQ_CID(req, off) (*(__be16 *)((char *)(req) + (off))) + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte "); +MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP"); +MODULE_ALIAS("ip_nat_pptp"); + +static void pptp_nat_expected(struct nf_conn *ct, + struct nf_conntrack_expect *exp) +{ + struct net *net = nf_ct_net(ct); + const struct nf_conn *master = ct->master; + struct nf_conntrack_expect *other_exp; + struct nf_conntrack_tuple t; + const struct nf_ct_pptp_master *ct_pptp_info; + const struct nf_nat_pptp *nat_pptp_info; + struct nf_nat_range range; + + ct_pptp_info = nfct_help_data(master); + nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info; + + /* And here goes the grand finale of corrosion... */ + if (exp->dir == IP_CT_DIR_ORIGINAL) { + pr_debug("we are PNS->PAC\n"); + /* therefore, build tuple for PAC->PNS */ + t.src.l3num = AF_INET; + t.src.u3.ip = master->tuplehash[!exp->dir].tuple.src.u3.ip; + t.src.u.gre.key = ct_pptp_info->pac_call_id; + t.dst.u3.ip = master->tuplehash[!exp->dir].tuple.dst.u3.ip; + t.dst.u.gre.key = ct_pptp_info->pns_call_id; + t.dst.protonum = IPPROTO_GRE; + } else { + pr_debug("we are PAC->PNS\n"); + /* build tuple for PNS->PAC */ + t.src.l3num = AF_INET; + t.src.u3.ip = master->tuplehash[!exp->dir].tuple.src.u3.ip; + t.src.u.gre.key = nat_pptp_info->pns_call_id; + t.dst.u3.ip = master->tuplehash[!exp->dir].tuple.dst.u3.ip; + t.dst.u.gre.key = nat_pptp_info->pac_call_id; + t.dst.protonum = IPPROTO_GRE; + } + + pr_debug("trying to unexpect other dir: "); + nf_ct_dump_tuple_ip(&t); + other_exp = nf_ct_expect_find_get(net, nf_ct_zone(ct), &t); + if (other_exp) { + nf_ct_unexpect_related(other_exp); + nf_ct_expect_put(other_exp); + pr_debug("success\n"); + } else { + pr_debug("not found!\n"); + } + + /* This must be a fresh one. */ + BUG_ON(ct->status & IPS_NAT_DONE_MASK); + + /* Change src to where master sends to */ + range.flags = NF_NAT_RANGE_MAP_IPS; + range.min_addr = range.max_addr + = ct->master->tuplehash[!exp->dir].tuple.dst.u3; + if (exp->dir == IP_CT_DIR_ORIGINAL) { + range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + range.min_proto = range.max_proto = exp->saved_proto; + } + nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); + + /* For DST manip, map port here to where it's expected. */ + range.flags = NF_NAT_RANGE_MAP_IPS; + range.min_addr = range.max_addr + = ct->master->tuplehash[!exp->dir].tuple.src.u3; + if (exp->dir == IP_CT_DIR_REPLY) { + range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + range.min_proto = range.max_proto = exp->saved_proto; + } + nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); +} + +/* outbound packets == from PNS to PAC */ +static int +pptp_outbound_pkt(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq) + +{ + struct nf_ct_pptp_master *ct_pptp_info; + struct nf_nat_pptp *nat_pptp_info; + u_int16_t msg; + __be16 new_callid; + unsigned int cid_off; + + ct_pptp_info = nfct_help_data(ct); + nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info; + + new_callid = ct_pptp_info->pns_call_id; + + switch (msg = ntohs(ctlh->messageType)) { + case PPTP_OUT_CALL_REQUEST: + cid_off = offsetof(union pptp_ctrl_union, ocreq.callID); + /* FIXME: ideally we would want to reserve a call ID + * here. current netfilter NAT core is not able to do + * this :( For now we use TCP source port. This breaks + * multiple calls within one control session */ + + /* save original call ID in nat_info */ + nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id; + + /* don't use tcph->source since we are at a DSTmanip + * hook (e.g. PREROUTING) and pkt is not mangled yet */ + new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; + + /* save new call ID in ct info */ + ct_pptp_info->pns_call_id = new_callid; + break; + case PPTP_IN_CALL_REPLY: + cid_off = offsetof(union pptp_ctrl_union, icack.callID); + break; + case PPTP_CALL_CLEAR_REQUEST: + cid_off = offsetof(union pptp_ctrl_union, clrreq.callID); + break; + default: + pr_debug("unknown outbound packet 0x%04x:%s\n", msg, + msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : + pptp_msg_name[0]); + /* fall through */ + case PPTP_SET_LINK_INFO: + /* only need to NAT in case PAC is behind NAT box */ + case PPTP_START_SESSION_REQUEST: + case PPTP_START_SESSION_REPLY: + case PPTP_STOP_SESSION_REQUEST: + case PPTP_STOP_SESSION_REPLY: + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* no need to alter packet */ + return NF_ACCEPT; + } + + /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass + * down to here */ + pr_debug("altering call id from 0x%04x to 0x%04x\n", + ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid)); + + /* mangle packet */ + if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, + cid_off + sizeof(struct pptp_pkt_hdr) + + sizeof(struct PptpControlHeader), + sizeof(new_callid), (char *)&new_callid, + sizeof(new_callid)) == 0) + return NF_DROP; + return NF_ACCEPT; +} + +static void +pptp_exp_gre(struct nf_conntrack_expect *expect_orig, + struct nf_conntrack_expect *expect_reply) +{ + const struct nf_conn *ct = expect_orig->master; + struct nf_ct_pptp_master *ct_pptp_info; + struct nf_nat_pptp *nat_pptp_info; + + ct_pptp_info = nfct_help_data(ct); + nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info; + + /* save original PAC call ID in nat_info */ + nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id; + + /* alter expectation for PNS->PAC direction */ + expect_orig->saved_proto.gre.key = ct_pptp_info->pns_call_id; + expect_orig->tuple.src.u.gre.key = nat_pptp_info->pns_call_id; + expect_orig->tuple.dst.u.gre.key = ct_pptp_info->pac_call_id; + expect_orig->dir = IP_CT_DIR_ORIGINAL; + + /* alter expectation for PAC->PNS direction */ + expect_reply->saved_proto.gre.key = nat_pptp_info->pns_call_id; + expect_reply->tuple.src.u.gre.key = nat_pptp_info->pac_call_id; + expect_reply->tuple.dst.u.gre.key = ct_pptp_info->pns_call_id; + expect_reply->dir = IP_CT_DIR_REPLY; +} + +/* inbound packets == from PAC to PNS */ +static int +pptp_inbound_pkt(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq) +{ + const struct nf_nat_pptp *nat_pptp_info; + u_int16_t msg; + __be16 new_pcid; + unsigned int pcid_off; + + nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info; + new_pcid = nat_pptp_info->pns_call_id; + + switch (msg = ntohs(ctlh->messageType)) { + case PPTP_OUT_CALL_REPLY: + pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID); + break; + case PPTP_IN_CALL_CONNECT: + pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID); + break; + case PPTP_IN_CALL_REQUEST: + /* only need to nat in case PAC is behind NAT box */ + return NF_ACCEPT; + case PPTP_WAN_ERROR_NOTIFY: + pcid_off = offsetof(union pptp_ctrl_union, wanerr.peersCallID); + break; + case PPTP_CALL_DISCONNECT_NOTIFY: + pcid_off = offsetof(union pptp_ctrl_union, disc.callID); + break; + case PPTP_SET_LINK_INFO: + pcid_off = offsetof(union pptp_ctrl_union, setlink.peersCallID); + break; + default: + pr_debug("unknown inbound packet %s\n", + msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : + pptp_msg_name[0]); + /* fall through */ + case PPTP_START_SESSION_REQUEST: + case PPTP_START_SESSION_REPLY: + case PPTP_STOP_SESSION_REQUEST: + case PPTP_STOP_SESSION_REPLY: + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* no need to alter packet */ + return NF_ACCEPT; + } + + /* only OUT_CALL_REPLY, IN_CALL_CONNECT, IN_CALL_REQUEST, + * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */ + + /* mangle packet */ + pr_debug("altering peer call id from 0x%04x to 0x%04x\n", + ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid)); + + if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, + pcid_off + sizeof(struct pptp_pkt_hdr) + + sizeof(struct PptpControlHeader), + sizeof(new_pcid), (char *)&new_pcid, + sizeof(new_pcid)) == 0) + return NF_DROP; + return NF_ACCEPT; +} + +static int __init nf_nat_helper_pptp_init(void) +{ + nf_nat_need_gre(); + + BUG_ON(nf_nat_pptp_hook_outbound != NULL); + RCU_INIT_POINTER(nf_nat_pptp_hook_outbound, pptp_outbound_pkt); + + BUG_ON(nf_nat_pptp_hook_inbound != NULL); + RCU_INIT_POINTER(nf_nat_pptp_hook_inbound, pptp_inbound_pkt); + + BUG_ON(nf_nat_pptp_hook_exp_gre != NULL); + RCU_INIT_POINTER(nf_nat_pptp_hook_exp_gre, pptp_exp_gre); + + BUG_ON(nf_nat_pptp_hook_expectfn != NULL); + RCU_INIT_POINTER(nf_nat_pptp_hook_expectfn, pptp_nat_expected); + return 0; +} + +static void __exit nf_nat_helper_pptp_fini(void) +{ + RCU_INIT_POINTER(nf_nat_pptp_hook_expectfn, NULL); + RCU_INIT_POINTER(nf_nat_pptp_hook_exp_gre, NULL); + RCU_INIT_POINTER(nf_nat_pptp_hook_inbound, NULL); + RCU_INIT_POINTER(nf_nat_pptp_hook_outbound, NULL); + synchronize_rcu(); +} + +module_init(nf_nat_helper_pptp_init); +module_exit(nf_nat_helper_pptp_fini); diff --git a/kernel/net/ipv4/netfilter/nf_nat_proto_gre.c b/kernel/net/ipv4/netfilter/nf_nat_proto_gre.c new file mode 100644 index 000000000..9414923f1 --- /dev/null +++ b/kernel/net/ipv4/netfilter/nf_nat_proto_gre.c @@ -0,0 +1,149 @@ +/* + * nf_nat_proto_gre.c + * + * NAT protocol helper module for GRE. + * + * GRE is a generic encapsulation protocol, which is generally not very + * suited for NAT, as it has no protocol-specific part as port numbers. + * + * It has an optional key field, which may help us distinguishing two + * connections between the same two hosts. + * + * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 + * + * PPTP is built on top of a modified version of GRE, and has a mandatory + * field called "CallID", which serves us for the same purpose as the key + * field in plain GRE. + * + * Documentation about PPTP can be found in RFC 2637 + * + * (C) 2000-2005 by Harald Welte + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + * + * (C) 2006-2012 Patrick McHardy + * + */ + +#include +#include +#include + +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte "); +MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); + +/* generate unique tuple ... */ +static void +gre_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) +{ + static u_int16_t key; + __be16 *keyptr; + unsigned int min, i, range_size; + + /* If there is no master conntrack we are not PPTP, + do not change tuples */ + if (!ct->master) + return; + + if (maniptype == NF_NAT_MANIP_SRC) + keyptr = &tuple->src.u.gre.key; + else + keyptr = &tuple->dst.u.gre.key; + + if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) { + pr_debug("%p: NATing GRE PPTP\n", ct); + min = 1; + range_size = 0xffff; + } else { + min = ntohs(range->min_proto.gre.key); + range_size = ntohs(range->max_proto.gre.key) - min + 1; + } + + pr_debug("min = %u, range_size = %u\n", min, range_size); + + for (i = 0; ; ++key) { + *keyptr = htons(min + key % range_size); + if (++i == range_size || !nf_nat_used_tuple(tuple, ct)) + return; + } + + pr_debug("%p: no NAT mapping\n", ct); + return; +} + +/* manipulate a GRE packet according to maniptype */ +static bool +gre_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + const struct gre_hdr *greh; + struct gre_hdr_pptp *pgreh; + + /* pgreh includes two optional 32bit fields which are not required + * to be there. That's where the magic '8' comes from */ + if (!skb_make_writable(skb, hdroff + sizeof(*pgreh) - 8)) + return false; + + greh = (void *)skb->data + hdroff; + pgreh = (struct gre_hdr_pptp *)greh; + + /* we only have destination manip of a packet, since 'source key' + * is not present in the packet itself */ + if (maniptype != NF_NAT_MANIP_DST) + return true; + switch (greh->version) { + case GRE_VERSION_1701: + /* We do not currently NAT any GREv0 packets. + * Try to behave like "nf_nat_proto_unknown" */ + break; + case GRE_VERSION_PPTP: + pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key)); + pgreh->call_id = tuple->dst.u.gre.key; + break; + default: + pr_debug("can't nat unknown GRE version\n"); + return false; + } + return true; +} + +static const struct nf_nat_l4proto gre = { + .l4proto = IPPROTO_GRE, + .manip_pkt = gre_manip_pkt, + .in_range = nf_nat_l4proto_in_range, + .unique_tuple = gre_unique_tuple, +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) + .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, +#endif +}; + +static int __init nf_nat_proto_gre_init(void) +{ + return nf_nat_l4proto_register(NFPROTO_IPV4, &gre); +} + +static void __exit nf_nat_proto_gre_fini(void) +{ + nf_nat_l4proto_unregister(NFPROTO_IPV4, &gre); +} + +module_init(nf_nat_proto_gre_init); +module_exit(nf_nat_proto_gre_fini); + +void nf_nat_need_gre(void) +{ + return; +} +EXPORT_SYMBOL_GPL(nf_nat_need_gre); diff --git a/kernel/net/ipv4/netfilter/nf_nat_proto_icmp.c b/kernel/net/ipv4/netfilter/nf_nat_proto_icmp.c new file mode 100644 index 000000000..4557b4ab8 --- /dev/null +++ b/kernel/net/ipv4/netfilter/nf_nat_proto_icmp.c @@ -0,0 +1,83 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +static bool +icmp_in_range(const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype, + const union nf_conntrack_man_proto *min, + const union nf_conntrack_man_proto *max) +{ + return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) && + ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id); +} + +static void +icmp_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) +{ + static u_int16_t id; + unsigned int range_size; + unsigned int i; + + range_size = ntohs(range->max_proto.icmp.id) - + ntohs(range->min_proto.icmp.id) + 1; + /* If no range specified... */ + if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) + range_size = 0xFFFF; + + for (i = 0; ; ++id) { + tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) + + (id % range_size)); + if (++i == range_size || !nf_nat_used_tuple(tuple, ct)) + return; + } + return; +} + +static bool +icmp_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + struct icmphdr *hdr; + + if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) + return false; + + hdr = (struct icmphdr *)(skb->data + hdroff); + inet_proto_csum_replace2(&hdr->checksum, skb, + hdr->un.echo.id, tuple->src.u.icmp.id, 0); + hdr->un.echo.id = tuple->src.u.icmp.id; + return true; +} + +const struct nf_nat_l4proto nf_nat_l4proto_icmp = { + .l4proto = IPPROTO_ICMP, + .manip_pkt = icmp_manip_pkt, + .in_range = icmp_in_range, + .unique_tuple = icmp_unique_tuple, +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) + .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, +#endif +}; diff --git a/kernel/net/ipv4/netfilter/nf_nat_snmp_basic.c b/kernel/net/ipv4/netfilter/nf_nat_snmp_basic.c new file mode 100644 index 000000000..7c6766713 --- /dev/null +++ b/kernel/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -0,0 +1,1313 @@ +/* + * nf_nat_snmp_basic.c + * + * Basic SNMP Application Layer Gateway + * + * This IP NAT module is intended for use with SNMP network + * discovery and monitoring applications where target networks use + * conflicting private address realms. + * + * Static NAT is used to remap the networks from the view of the network + * management system at the IP layer, and this module remaps some application + * layer addresses to match. + * + * The simplest form of ALG is performed, where only tagged IP addresses + * are modified. The module does not need to be MIB aware and only scans + * messages at the ASN.1/BER level. + * + * Currently, only SNMPv1 and SNMPv2 are supported. + * + * More information on ALG and associated issues can be found in + * RFC 2962 + * + * The ASB.1/BER parsing code is derived from the gxsnmp package by Gregory + * McLean & Jochen Friedrich, stripped down for use in the kernel. + * + * Copyright (c) 2000 RP Internet (www.rpi.net.au). + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + * + * Author: James Morris + * + * Copyright (c) 2006-2010 Patrick McHardy + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("James Morris "); +MODULE_DESCRIPTION("Basic SNMP Application Layer Gateway"); +MODULE_ALIAS("ip_nat_snmp_basic"); + +#define SNMP_PORT 161 +#define SNMP_TRAP_PORT 162 +#define NOCT1(n) (*(u8 *)(n)) + +static int debug; +static DEFINE_SPINLOCK(snmp_lock); + +/* + * Application layer address mapping mimics the NAT mapping, but + * only for the first octet in this case (a more flexible system + * can be implemented if needed). + */ +struct oct1_map +{ + u_int8_t from; + u_int8_t to; +}; + + +/***************************************************************************** + * + * Basic ASN.1 decoding routines (gxsnmp author Dirk Wisse) + * + *****************************************************************************/ + +/* Class */ +#define ASN1_UNI 0 /* Universal */ +#define ASN1_APL 1 /* Application */ +#define ASN1_CTX 2 /* Context */ +#define ASN1_PRV 3 /* Private */ + +/* Tag */ +#define ASN1_EOC 0 /* End Of Contents */ +#define ASN1_BOL 1 /* Boolean */ +#define ASN1_INT 2 /* Integer */ +#define ASN1_BTS 3 /* Bit String */ +#define ASN1_OTS 4 /* Octet String */ +#define ASN1_NUL 5 /* Null */ +#define ASN1_OJI 6 /* Object Identifier */ +#define ASN1_OJD 7 /* Object Description */ +#define ASN1_EXT 8 /* External */ +#define ASN1_SEQ 16 /* Sequence */ +#define ASN1_SET 17 /* Set */ +#define ASN1_NUMSTR 18 /* Numerical String */ +#define ASN1_PRNSTR 19 /* Printable String */ +#define ASN1_TEXSTR 20 /* Teletext String */ +#define ASN1_VIDSTR 21 /* Video String */ +#define ASN1_IA5STR 22 /* IA5 String */ +#define ASN1_UNITIM 23 /* Universal Time */ +#define ASN1_GENTIM 24 /* General Time */ +#define ASN1_GRASTR 25 /* Graphical String */ +#define ASN1_VISSTR 26 /* Visible String */ +#define ASN1_GENSTR 27 /* General String */ + +/* Primitive / Constructed methods*/ +#define ASN1_PRI 0 /* Primitive */ +#define ASN1_CON 1 /* Constructed */ + +/* + * Error codes. + */ +#define ASN1_ERR_NOERROR 0 +#define ASN1_ERR_DEC_EMPTY 2 +#define ASN1_ERR_DEC_EOC_MISMATCH 3 +#define ASN1_ERR_DEC_LENGTH_MISMATCH 4 +#define ASN1_ERR_DEC_BADVALUE 5 + +/* + * ASN.1 context. + */ +struct asn1_ctx +{ + int error; /* Error condition */ + unsigned char *pointer; /* Octet just to be decoded */ + unsigned char *begin; /* First octet */ + unsigned char *end; /* Octet after last octet */ +}; + +/* + * Octet string (not null terminated) + */ +struct asn1_octstr +{ + unsigned char *data; + unsigned int len; +}; + +static void asn1_open(struct asn1_ctx *ctx, + unsigned char *buf, + unsigned int len) +{ + ctx->begin = buf; + ctx->end = buf + len; + ctx->pointer = buf; + ctx->error = ASN1_ERR_NOERROR; +} + +static unsigned char asn1_octet_decode(struct asn1_ctx *ctx, unsigned char *ch) +{ + if (ctx->pointer >= ctx->end) { + ctx->error = ASN1_ERR_DEC_EMPTY; + return 0; + } + *ch = *(ctx->pointer)++; + return 1; +} + +static unsigned char asn1_tag_decode(struct asn1_ctx *ctx, unsigned int *tag) +{ + unsigned char ch; + + *tag = 0; + + do + { + if (!asn1_octet_decode(ctx, &ch)) + return 0; + *tag <<= 7; + *tag |= ch & 0x7F; + } while ((ch & 0x80) == 0x80); + return 1; +} + +static unsigned char asn1_id_decode(struct asn1_ctx *ctx, + unsigned int *cls, + unsigned int *con, + unsigned int *tag) +{ + unsigned char ch; + + if (!asn1_octet_decode(ctx, &ch)) + return 0; + + *cls = (ch & 0xC0) >> 6; + *con = (ch & 0x20) >> 5; + *tag = (ch & 0x1F); + + if (*tag == 0x1F) { + if (!asn1_tag_decode(ctx, tag)) + return 0; + } + return 1; +} + +static unsigned char asn1_length_decode(struct asn1_ctx *ctx, + unsigned int *def, + unsigned int *len) +{ + unsigned char ch, cnt; + + if (!asn1_octet_decode(ctx, &ch)) + return 0; + + if (ch == 0x80) + *def = 0; + else { + *def = 1; + + if (ch < 0x80) + *len = ch; + else { + cnt = ch & 0x7F; + *len = 0; + + while (cnt > 0) { + if (!asn1_octet_decode(ctx, &ch)) + return 0; + *len <<= 8; + *len |= ch; + cnt--; + } + } + } + + /* don't trust len bigger than ctx buffer */ + if (*len > ctx->end - ctx->pointer) + return 0; + + return 1; +} + +static unsigned char asn1_header_decode(struct asn1_ctx *ctx, + unsigned char **eoc, + unsigned int *cls, + unsigned int *con, + unsigned int *tag) +{ + unsigned int def, len; + + if (!asn1_id_decode(ctx, cls, con, tag)) + return 0; + + def = len = 0; + if (!asn1_length_decode(ctx, &def, &len)) + return 0; + + /* primitive shall be definite, indefinite shall be constructed */ + if (*con == ASN1_PRI && !def) + return 0; + + if (def) + *eoc = ctx->pointer + len; + else + *eoc = NULL; + return 1; +} + +static unsigned char asn1_eoc_decode(struct asn1_ctx *ctx, unsigned char *eoc) +{ + unsigned char ch; + + if (eoc == NULL) { + if (!asn1_octet_decode(ctx, &ch)) + return 0; + + if (ch != 0x00) { + ctx->error = ASN1_ERR_DEC_EOC_MISMATCH; + return 0; + } + + if (!asn1_octet_decode(ctx, &ch)) + return 0; + + if (ch != 0x00) { + ctx->error = ASN1_ERR_DEC_EOC_MISMATCH; + return 0; + } + return 1; + } else { + if (ctx->pointer != eoc) { + ctx->error = ASN1_ERR_DEC_LENGTH_MISMATCH; + return 0; + } + return 1; + } +} + +static unsigned char asn1_null_decode(struct asn1_ctx *ctx, unsigned char *eoc) +{ + ctx->pointer = eoc; + return 1; +} + +static unsigned char asn1_long_decode(struct asn1_ctx *ctx, + unsigned char *eoc, + long *integer) +{ + unsigned char ch; + unsigned int len; + + if (!asn1_octet_decode(ctx, &ch)) + return 0; + + *integer = (signed char) ch; + len = 1; + + while (ctx->pointer < eoc) { + if (++len > sizeof (long)) { + ctx->error = ASN1_ERR_DEC_BADVALUE; + return 0; + } + + if (!asn1_octet_decode(ctx, &ch)) + return 0; + + *integer <<= 8; + *integer |= ch; + } + return 1; +} + +static unsigned char asn1_uint_decode(struct asn1_ctx *ctx, + unsigned char *eoc, + unsigned int *integer) +{ + unsigned char ch; + unsigned int len; + + if (!asn1_octet_decode(ctx, &ch)) + return 0; + + *integer = ch; + if (ch == 0) len = 0; + else len = 1; + + while (ctx->pointer < eoc) { + if (++len > sizeof (unsigned int)) { + ctx->error = ASN1_ERR_DEC_BADVALUE; + return 0; + } + + if (!asn1_octet_decode(ctx, &ch)) + return 0; + + *integer <<= 8; + *integer |= ch; + } + return 1; +} + +static unsigned char asn1_ulong_decode(struct asn1_ctx *ctx, + unsigned char *eoc, + unsigned long *integer) +{ + unsigned char ch; + unsigned int len; + + if (!asn1_octet_decode(ctx, &ch)) + return 0; + + *integer = ch; + if (ch == 0) len = 0; + else len = 1; + + while (ctx->pointer < eoc) { + if (++len > sizeof (unsigned long)) { + ctx->error = ASN1_ERR_DEC_BADVALUE; + return 0; + } + + if (!asn1_octet_decode(ctx, &ch)) + return 0; + + *integer <<= 8; + *integer |= ch; + } + return 1; +} + +static unsigned char asn1_octets_decode(struct asn1_ctx *ctx, + unsigned char *eoc, + unsigned char **octets, + unsigned int *len) +{ + unsigned char *ptr; + + *len = 0; + + *octets = kmalloc(eoc - ctx->pointer, GFP_ATOMIC); + if (*octets == NULL) + return 0; + + ptr = *octets; + while (ctx->pointer < eoc) { + if (!asn1_octet_decode(ctx, ptr++)) { + kfree(*octets); + *octets = NULL; + return 0; + } + (*len)++; + } + return 1; +} + +static unsigned char asn1_subid_decode(struct asn1_ctx *ctx, + unsigned long *subid) +{ + unsigned char ch; + + *subid = 0; + + do { + if (!asn1_octet_decode(ctx, &ch)) + return 0; + + *subid <<= 7; + *subid |= ch & 0x7F; + } while ((ch & 0x80) == 0x80); + return 1; +} + +static unsigned char asn1_oid_decode(struct asn1_ctx *ctx, + unsigned char *eoc, + unsigned long **oid, + unsigned int *len) +{ + unsigned long subid; + unsigned long *optr; + size_t size; + + size = eoc - ctx->pointer + 1; + + /* first subid actually encodes first two subids */ + if (size < 2 || size > ULONG_MAX/sizeof(unsigned long)) + return 0; + + *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC); + if (*oid == NULL) + return 0; + + optr = *oid; + + if (!asn1_subid_decode(ctx, &subid)) { + kfree(*oid); + *oid = NULL; + return 0; + } + + if (subid < 40) { + optr[0] = 0; + optr[1] = subid; + } else if (subid < 80) { + optr[0] = 1; + optr[1] = subid - 40; + } else { + optr[0] = 2; + optr[1] = subid - 80; + } + + *len = 2; + optr += 2; + + while (ctx->pointer < eoc) { + if (++(*len) > size) { + ctx->error = ASN1_ERR_DEC_BADVALUE; + kfree(*oid); + *oid = NULL; + return 0; + } + + if (!asn1_subid_decode(ctx, optr++)) { + kfree(*oid); + *oid = NULL; + return 0; + } + } + return 1; +} + +/***************************************************************************** + * + * SNMP decoding routines (gxsnmp author Dirk Wisse) + * + *****************************************************************************/ + +/* SNMP Versions */ +#define SNMP_V1 0 +#define SNMP_V2C 1 +#define SNMP_V2 2 +#define SNMP_V3 3 + +/* Default Sizes */ +#define SNMP_SIZE_COMM 256 +#define SNMP_SIZE_OBJECTID 128 +#define SNMP_SIZE_BUFCHR 256 +#define SNMP_SIZE_BUFINT 128 +#define SNMP_SIZE_SMALLOBJECTID 16 + +/* Requests */ +#define SNMP_PDU_GET 0 +#define SNMP_PDU_NEXT 1 +#define SNMP_PDU_RESPONSE 2 +#define SNMP_PDU_SET 3 +#define SNMP_PDU_TRAP1 4 +#define SNMP_PDU_BULK 5 +#define SNMP_PDU_INFORM 6 +#define SNMP_PDU_TRAP2 7 + +/* Errors */ +#define SNMP_NOERROR 0 +#define SNMP_TOOBIG 1 +#define SNMP_NOSUCHNAME 2 +#define SNMP_BADVALUE 3 +#define SNMP_READONLY 4 +#define SNMP_GENERROR 5 +#define SNMP_NOACCESS 6 +#define SNMP_WRONGTYPE 7 +#define SNMP_WRONGLENGTH 8 +#define SNMP_WRONGENCODING 9 +#define SNMP_WRONGVALUE 10 +#define SNMP_NOCREATION 11 +#define SNMP_INCONSISTENTVALUE 12 +#define SNMP_RESOURCEUNAVAILABLE 13 +#define SNMP_COMMITFAILED 14 +#define SNMP_UNDOFAILED 15 +#define SNMP_AUTHORIZATIONERROR 16 +#define SNMP_NOTWRITABLE 17 +#define SNMP_INCONSISTENTNAME 18 + +/* General SNMP V1 Traps */ +#define SNMP_TRAP_COLDSTART 0 +#define SNMP_TRAP_WARMSTART 1 +#define SNMP_TRAP_LINKDOWN 2 +#define SNMP_TRAP_LINKUP 3 +#define SNMP_TRAP_AUTFAILURE 4 +#define SNMP_TRAP_EQPNEIGHBORLOSS 5 +#define SNMP_TRAP_ENTSPECIFIC 6 + +/* SNMPv1 Types */ +#define SNMP_NULL 0 +#define SNMP_INTEGER 1 /* l */ +#define SNMP_OCTETSTR 2 /* c */ +#define SNMP_DISPLAYSTR 2 /* c */ +#define SNMP_OBJECTID 3 /* ul */ +#define SNMP_IPADDR 4 /* uc */ +#define SNMP_COUNTER 5 /* ul */ +#define SNMP_GAUGE 6 /* ul */ +#define SNMP_TIMETICKS 7 /* ul */ +#define SNMP_OPAQUE 8 /* c */ + +/* Additional SNMPv2 Types */ +#define SNMP_UINTEGER 5 /* ul */ +#define SNMP_BITSTR 9 /* uc */ +#define SNMP_NSAP 10 /* uc */ +#define SNMP_COUNTER64 11 /* ul */ +#define SNMP_NOSUCHOBJECT 12 +#define SNMP_NOSUCHINSTANCE 13 +#define SNMP_ENDOFMIBVIEW 14 + +union snmp_syntax +{ + unsigned char uc[0]; /* 8 bit unsigned */ + char c[0]; /* 8 bit signed */ + unsigned long ul[0]; /* 32 bit unsigned */ + long l[0]; /* 32 bit signed */ +}; + +struct snmp_object +{ + unsigned long *id; + unsigned int id_len; + unsigned short type; + unsigned int syntax_len; + union snmp_syntax syntax; +}; + +struct snmp_request +{ + unsigned long id; + unsigned int error_status; + unsigned int error_index; +}; + +struct snmp_v1_trap +{ + unsigned long *id; + unsigned int id_len; + unsigned long ip_address; /* pointer */ + unsigned int general; + unsigned int specific; + unsigned long time; +}; + +/* SNMP types */ +#define SNMP_IPA 0 +#define SNMP_CNT 1 +#define SNMP_GGE 2 +#define SNMP_TIT 3 +#define SNMP_OPQ 4 +#define SNMP_C64 6 + +/* SNMP errors */ +#define SERR_NSO 0 +#define SERR_NSI 1 +#define SERR_EOM 2 + +static inline void mangle_address(unsigned char *begin, + unsigned char *addr, + const struct oct1_map *map, + __sum16 *check); +struct snmp_cnv +{ + unsigned int class; + unsigned int tag; + int syntax; +}; + +static const struct snmp_cnv snmp_conv[] = { + {ASN1_UNI, ASN1_NUL, SNMP_NULL}, + {ASN1_UNI, ASN1_INT, SNMP_INTEGER}, + {ASN1_UNI, ASN1_OTS, SNMP_OCTETSTR}, + {ASN1_UNI, ASN1_OTS, SNMP_DISPLAYSTR}, + {ASN1_UNI, ASN1_OJI, SNMP_OBJECTID}, + {ASN1_APL, SNMP_IPA, SNMP_IPADDR}, + {ASN1_APL, SNMP_CNT, SNMP_COUNTER}, /* Counter32 */ + {ASN1_APL, SNMP_GGE, SNMP_GAUGE}, /* Gauge32 == Unsigned32 */ + {ASN1_APL, SNMP_TIT, SNMP_TIMETICKS}, + {ASN1_APL, SNMP_OPQ, SNMP_OPAQUE}, + + /* SNMPv2 data types and errors */ + {ASN1_UNI, ASN1_BTS, SNMP_BITSTR}, + {ASN1_APL, SNMP_C64, SNMP_COUNTER64}, + {ASN1_CTX, SERR_NSO, SNMP_NOSUCHOBJECT}, + {ASN1_CTX, SERR_NSI, SNMP_NOSUCHINSTANCE}, + {ASN1_CTX, SERR_EOM, SNMP_ENDOFMIBVIEW}, + {0, 0, -1} +}; + +static unsigned char snmp_tag_cls2syntax(unsigned int tag, + unsigned int cls, + unsigned short *syntax) +{ + const struct snmp_cnv *cnv; + + cnv = snmp_conv; + + while (cnv->syntax != -1) { + if (cnv->tag == tag && cnv->class == cls) { + *syntax = cnv->syntax; + return 1; + } + cnv++; + } + return 0; +} + +static unsigned char snmp_object_decode(struct asn1_ctx *ctx, + struct snmp_object **obj) +{ + unsigned int cls, con, tag, len, idlen; + unsigned short type; + unsigned char *eoc, *end, *p; + unsigned long *lp, *id; + unsigned long ul; + long l; + + *obj = NULL; + id = NULL; + + if (!asn1_header_decode(ctx, &eoc, &cls, &con, &tag)) + return 0; + + if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ) + return 0; + + if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) + return 0; + + if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI) + return 0; + + if (!asn1_oid_decode(ctx, end, &id, &idlen)) + return 0; + + if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) { + kfree(id); + return 0; + } + + if (con != ASN1_PRI) { + kfree(id); + return 0; + } + + type = 0; + if (!snmp_tag_cls2syntax(tag, cls, &type)) { + kfree(id); + return 0; + } + + l = 0; + switch (type) { + case SNMP_INTEGER: + len = sizeof(long); + if (!asn1_long_decode(ctx, end, &l)) { + kfree(id); + return 0; + } + *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); + if (*obj == NULL) { + kfree(id); + return 0; + } + (*obj)->syntax.l[0] = l; + break; + case SNMP_OCTETSTR: + case SNMP_OPAQUE: + if (!asn1_octets_decode(ctx, end, &p, &len)) { + kfree(id); + return 0; + } + *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); + if (*obj == NULL) { + kfree(p); + kfree(id); + return 0; + } + memcpy((*obj)->syntax.c, p, len); + kfree(p); + break; + case SNMP_NULL: + case SNMP_NOSUCHOBJECT: + case SNMP_NOSUCHINSTANCE: + case SNMP_ENDOFMIBVIEW: + len = 0; + *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC); + if (*obj == NULL) { + kfree(id); + return 0; + } + if (!asn1_null_decode(ctx, end)) { + kfree(id); + kfree(*obj); + *obj = NULL; + return 0; + } + break; + case SNMP_OBJECTID: + if (!asn1_oid_decode(ctx, end, &lp, &len)) { + kfree(id); + return 0; + } + len *= sizeof(unsigned long); + *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); + if (*obj == NULL) { + kfree(lp); + kfree(id); + return 0; + } + memcpy((*obj)->syntax.ul, lp, len); + kfree(lp); + break; + case SNMP_IPADDR: + if (!asn1_octets_decode(ctx, end, &p, &len)) { + kfree(id); + return 0; + } + if (len != 4) { + kfree(p); + kfree(id); + return 0; + } + *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); + if (*obj == NULL) { + kfree(p); + kfree(id); + return 0; + } + memcpy((*obj)->syntax.uc, p, len); + kfree(p); + break; + case SNMP_COUNTER: + case SNMP_GAUGE: + case SNMP_TIMETICKS: + len = sizeof(unsigned long); + if (!asn1_ulong_decode(ctx, end, &ul)) { + kfree(id); + return 0; + } + *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); + if (*obj == NULL) { + kfree(id); + return 0; + } + (*obj)->syntax.ul[0] = ul; + break; + default: + kfree(id); + return 0; + } + + (*obj)->syntax_len = len; + (*obj)->type = type; + (*obj)->id = id; + (*obj)->id_len = idlen; + + if (!asn1_eoc_decode(ctx, eoc)) { + kfree(id); + kfree(*obj); + *obj = NULL; + return 0; + } + return 1; +} + +static unsigned char snmp_request_decode(struct asn1_ctx *ctx, + struct snmp_request *request) +{ + unsigned int cls, con, tag; + unsigned char *end; + + if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) + return 0; + + if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) + return 0; + + if (!asn1_ulong_decode(ctx, end, &request->id)) + return 0; + + if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) + return 0; + + if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) + return 0; + + if (!asn1_uint_decode(ctx, end, &request->error_status)) + return 0; + + if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) + return 0; + + if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) + return 0; + + if (!asn1_uint_decode(ctx, end, &request->error_index)) + return 0; + + return 1; +} + +/* + * Fast checksum update for possibly oddly-aligned UDP byte, from the + * code example in the draft. + */ +static void fast_csum(__sum16 *csum, + const unsigned char *optr, + const unsigned char *nptr, + int offset) +{ + unsigned char s[4]; + + if (offset & 1) { + s[0] = ~0; + s[1] = ~*optr; + s[2] = 0; + s[3] = *nptr; + } else { + s[0] = ~*optr; + s[1] = ~0; + s[2] = *nptr; + s[3] = 0; + } + + *csum = csum_fold(csum_partial(s, 4, ~csum_unfold(*csum))); +} + +/* + * Mangle IP address. + * - begin points to the start of the snmp messgae + * - addr points to the start of the address + */ +static inline void mangle_address(unsigned char *begin, + unsigned char *addr, + const struct oct1_map *map, + __sum16 *check) +{ + if (map->from == NOCT1(addr)) { + u_int32_t old; + + if (debug) + memcpy(&old, addr, sizeof(old)); + + *addr = map->to; + + /* Update UDP checksum if being used */ + if (*check) { + fast_csum(check, + &map->from, &map->to, addr - begin); + + } + + if (debug) + printk(KERN_DEBUG "bsalg: mapped %pI4 to %pI4\n", + &old, addr); + } +} + +static unsigned char snmp_trap_decode(struct asn1_ctx *ctx, + struct snmp_v1_trap *trap, + const struct oct1_map *map, + __sum16 *check) +{ + unsigned int cls, con, tag, len; + unsigned char *end; + + if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) + return 0; + + if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI) + return 0; + + if (!asn1_oid_decode(ctx, end, &trap->id, &trap->id_len)) + return 0; + + if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) + goto err_id_free; + + if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_IPA) || + (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_OTS))) + goto err_id_free; + + if (!asn1_octets_decode(ctx, end, (unsigned char **)&trap->ip_address, &len)) + goto err_id_free; + + /* IPv4 only */ + if (len != 4) + goto err_addr_free; + + mangle_address(ctx->begin, ctx->pointer - 4, map, check); + + if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) + goto err_addr_free; + + if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) + goto err_addr_free; + + if (!asn1_uint_decode(ctx, end, &trap->general)) + goto err_addr_free; + + if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) + goto err_addr_free; + + if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) + goto err_addr_free; + + if (!asn1_uint_decode(ctx, end, &trap->specific)) + goto err_addr_free; + + if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) + goto err_addr_free; + + if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_TIT) || + (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_INT))) + goto err_addr_free; + + if (!asn1_ulong_decode(ctx, end, &trap->time)) + goto err_addr_free; + + return 1; + +err_addr_free: + kfree((unsigned long *)trap->ip_address); + +err_id_free: + kfree(trap->id); + + return 0; +} + +/***************************************************************************** + * + * Misc. routines + * + *****************************************************************************/ + +static void hex_dump(const unsigned char *buf, size_t len) +{ + size_t i; + + for (i = 0; i < len; i++) { + if (i && !(i % 16)) + printk("\n"); + printk("%02x ", *(buf + i)); + } + printk("\n"); +} + +/* + * Parse and mangle SNMP message according to mapping. + * (And this is the fucking 'basic' method). + */ +static int snmp_parse_mangle(unsigned char *msg, + u_int16_t len, + const struct oct1_map *map, + __sum16 *check) +{ + unsigned char *eoc, *end; + unsigned int cls, con, tag, vers, pdutype; + struct asn1_ctx ctx; + struct asn1_octstr comm; + struct snmp_object *obj; + + if (debug > 1) + hex_dump(msg, len); + + asn1_open(&ctx, msg, len); + + /* + * Start of SNMP message. + */ + if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag)) + return 0; + if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ) + return 0; + + /* + * Version 1 or 2 handled. + */ + if (!asn1_header_decode(&ctx, &end, &cls, &con, &tag)) + return 0; + if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) + return 0; + if (!asn1_uint_decode (&ctx, end, &vers)) + return 0; + if (debug > 1) + printk(KERN_DEBUG "bsalg: snmp version: %u\n", vers + 1); + if (vers > 1) + return 1; + + /* + * Community. + */ + if (!asn1_header_decode (&ctx, &end, &cls, &con, &tag)) + return 0; + if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OTS) + return 0; + if (!asn1_octets_decode(&ctx, end, &comm.data, &comm.len)) + return 0; + if (debug > 1) { + unsigned int i; + + printk(KERN_DEBUG "bsalg: community: "); + for (i = 0; i < comm.len; i++) + printk("%c", comm.data[i]); + printk("\n"); + } + kfree(comm.data); + + /* + * PDU type + */ + if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &pdutype)) + return 0; + if (cls != ASN1_CTX || con != ASN1_CON) + return 0; + if (debug > 1) { + static const unsigned char *const pdus[] = { + [SNMP_PDU_GET] = "get", + [SNMP_PDU_NEXT] = "get-next", + [SNMP_PDU_RESPONSE] = "response", + [SNMP_PDU_SET] = "set", + [SNMP_PDU_TRAP1] = "trapv1", + [SNMP_PDU_BULK] = "bulk", + [SNMP_PDU_INFORM] = "inform", + [SNMP_PDU_TRAP2] = "trapv2" + }; + + if (pdutype > SNMP_PDU_TRAP2) + printk(KERN_DEBUG "bsalg: bad pdu type %u\n", pdutype); + else + printk(KERN_DEBUG "bsalg: pdu: %s\n", pdus[pdutype]); + } + if (pdutype != SNMP_PDU_RESPONSE && + pdutype != SNMP_PDU_TRAP1 && pdutype != SNMP_PDU_TRAP2) + return 1; + + /* + * Request header or v1 trap + */ + if (pdutype == SNMP_PDU_TRAP1) { + struct snmp_v1_trap trap; + unsigned char ret = snmp_trap_decode(&ctx, &trap, map, check); + + if (ret) { + kfree(trap.id); + kfree((unsigned long *)trap.ip_address); + } else + return ret; + + } else { + struct snmp_request req; + + if (!snmp_request_decode(&ctx, &req)) + return 0; + + if (debug > 1) + printk(KERN_DEBUG "bsalg: request: id=0x%lx error_status=%u " + "error_index=%u\n", req.id, req.error_status, + req.error_index); + } + + /* + * Loop through objects, look for IP addresses to mangle. + */ + if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag)) + return 0; + + if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ) + return 0; + + while (!asn1_eoc_decode(&ctx, eoc)) { + unsigned int i; + + if (!snmp_object_decode(&ctx, &obj)) { + if (obj) { + kfree(obj->id); + kfree(obj); + } + return 0; + } + + if (debug > 1) { + printk(KERN_DEBUG "bsalg: object: "); + for (i = 0; i < obj->id_len; i++) { + if (i > 0) + printk("."); + printk("%lu", obj->id[i]); + } + printk(": type=%u\n", obj->type); + + } + + if (obj->type == SNMP_IPADDR) + mangle_address(ctx.begin, ctx.pointer - 4 , map, check); + + kfree(obj->id); + kfree(obj); + } + + if (!asn1_eoc_decode(&ctx, eoc)) + return 0; + + return 1; +} + +/***************************************************************************** + * + * NAT routines. + * + *****************************************************************************/ + +/* + * SNMP translation routine. + */ +static int snmp_translate(struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + struct sk_buff *skb) +{ + struct iphdr *iph = ip_hdr(skb); + struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl); + u_int16_t udplen = ntohs(udph->len); + u_int16_t paylen = udplen - sizeof(struct udphdr); + int dir = CTINFO2DIR(ctinfo); + struct oct1_map map; + + /* + * Determine mappping for application layer addresses based + * on NAT manipulations for the packet. + */ + if (dir == IP_CT_DIR_ORIGINAL) { + /* SNAT traps */ + map.from = NOCT1(&ct->tuplehash[dir].tuple.src.u3.ip); + map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip); + } else { + /* DNAT replies */ + map.from = NOCT1(&ct->tuplehash[!dir].tuple.src.u3.ip); + map.to = NOCT1(&ct->tuplehash[dir].tuple.dst.u3.ip); + } + + if (map.from == map.to) + return NF_ACCEPT; + + if (!snmp_parse_mangle((unsigned char *)udph + sizeof(struct udphdr), + paylen, &map, &udph->check)) { + net_warn_ratelimited("bsalg: parser failed\n"); + return NF_DROP; + } + return NF_ACCEPT; +} + +/* We don't actually set up expectations, just adjust internal IP + * addresses if this is being NATted */ +static int help(struct sk_buff *skb, unsigned int protoff, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ + int dir = CTINFO2DIR(ctinfo); + unsigned int ret; + const struct iphdr *iph = ip_hdr(skb); + const struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl); + + /* SNMP replies and originating SNMP traps get mangled */ + if (udph->source == htons(SNMP_PORT) && dir != IP_CT_DIR_REPLY) + return NF_ACCEPT; + if (udph->dest == htons(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL) + return NF_ACCEPT; + + /* No NAT? */ + if (!(ct->status & IPS_NAT_MASK)) + return NF_ACCEPT; + + /* + * Make sure the packet length is ok. So far, we were only guaranteed + * to have a valid length IP header plus 8 bytes, which means we have + * enough room for a UDP header. Just verify the UDP length field so we + * can mess around with the payload. + */ + if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) { + net_warn_ratelimited("SNMP: dropping malformed packet src=%pI4 dst=%pI4\n", + &iph->saddr, &iph->daddr); + return NF_DROP; + } + + if (!skb_make_writable(skb, skb->len)) + return NF_DROP; + + spin_lock_bh(&snmp_lock); + ret = snmp_translate(ct, ctinfo, skb); + spin_unlock_bh(&snmp_lock); + return ret; +} + +static const struct nf_conntrack_expect_policy snmp_exp_policy = { + .max_expected = 0, + .timeout = 180, +}; + +static struct nf_conntrack_helper snmp_helper __read_mostly = { + .me = THIS_MODULE, + .help = help, + .expect_policy = &snmp_exp_policy, + .name = "snmp", + .tuple.src.l3num = AF_INET, + .tuple.src.u.udp.port = cpu_to_be16(SNMP_PORT), + .tuple.dst.protonum = IPPROTO_UDP, +}; + +static struct nf_conntrack_helper snmp_trap_helper __read_mostly = { + .me = THIS_MODULE, + .help = help, + .expect_policy = &snmp_exp_policy, + .name = "snmp_trap", + .tuple.src.l3num = AF_INET, + .tuple.src.u.udp.port = cpu_to_be16(SNMP_TRAP_PORT), + .tuple.dst.protonum = IPPROTO_UDP, +}; + +/***************************************************************************** + * + * Module stuff. + * + *****************************************************************************/ + +static int __init nf_nat_snmp_basic_init(void) +{ + int ret = 0; + + BUG_ON(nf_nat_snmp_hook != NULL); + RCU_INIT_POINTER(nf_nat_snmp_hook, help); + + ret = nf_conntrack_helper_register(&snmp_trap_helper); + if (ret < 0) { + nf_conntrack_helper_unregister(&snmp_helper); + return ret; + } + return ret; +} + +static void __exit nf_nat_snmp_basic_fini(void) +{ + RCU_INIT_POINTER(nf_nat_snmp_hook, NULL); + nf_conntrack_helper_unregister(&snmp_trap_helper); +} + +module_init(nf_nat_snmp_basic_init); +module_exit(nf_nat_snmp_basic_fini); + +module_param(debug, int, 0600); diff --git a/kernel/net/ipv4/netfilter/nf_reject_ipv4.c b/kernel/net/ipv4/netfilter/nf_reject_ipv4.c new file mode 100644 index 000000000..3262e41ff --- /dev/null +++ b/kernel/net/ipv4/netfilter/nf_reject_ipv4.c @@ -0,0 +1,192 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb, + struct tcphdr *_oth, int hook) +{ + const struct tcphdr *oth; + + /* IP header checks: fragment. */ + if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) + return NULL; + + oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb), + sizeof(struct tcphdr), _oth); + if (oth == NULL) + return NULL; + + /* No RST for RST. */ + if (oth->rst) + return NULL; + + /* Check checksum */ + if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP)) + return NULL; + + return oth; +} +EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_get); + +struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + __u8 protocol, int ttl) +{ + struct iphdr *niph, *oiph = ip_hdr(oldskb); + + skb_reset_network_header(nskb); + niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr)); + niph->version = 4; + niph->ihl = sizeof(struct iphdr) / 4; + niph->tos = 0; + niph->id = 0; + niph->frag_off = htons(IP_DF); + niph->protocol = protocol; + niph->check = 0; + niph->saddr = oiph->daddr; + niph->daddr = oiph->saddr; + niph->ttl = ttl; + + nskb->protocol = htons(ETH_P_IP); + + return niph; +} +EXPORT_SYMBOL_GPL(nf_reject_iphdr_put); + +void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb, + const struct tcphdr *oth) +{ + struct iphdr *niph = ip_hdr(nskb); + struct tcphdr *tcph; + + skb_reset_transport_header(nskb); + tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); + memset(tcph, 0, sizeof(*tcph)); + tcph->source = oth->dest; + tcph->dest = oth->source; + tcph->doff = sizeof(struct tcphdr) / 4; + + if (oth->ack) { + tcph->seq = oth->ack_seq; + } else { + tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin + + oldskb->len - ip_hdrlen(oldskb) - + (oth->doff << 2)); + tcph->ack = 1; + } + + tcph->rst = 1; + tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr, + niph->daddr, 0); + nskb->ip_summed = CHECKSUM_PARTIAL; + nskb->csum_start = (unsigned char *)tcph - nskb->head; + nskb->csum_offset = offsetof(struct tcphdr, check); +} +EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_put); + +/* Send RST reply */ +void nf_send_reset(struct sk_buff *oldskb, int hook) +{ + struct sk_buff *nskb; + const struct iphdr *oiph; + struct iphdr *niph; + const struct tcphdr *oth; + struct tcphdr _oth; + + oth = nf_reject_ip_tcphdr_get(oldskb, &_oth, hook); + if (!oth) + return; + + if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) + return; + + oiph = ip_hdr(oldskb); + + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) + + LL_MAX_HEADER, GFP_ATOMIC); + if (!nskb) + return; + + /* ip_route_me_harder expects skb->dst to be set */ + skb_dst_set_noref(nskb, skb_dst(oldskb)); + + skb_reserve(nskb, LL_MAX_HEADER); + niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP, + ip4_dst_hoplimit(skb_dst(nskb))); + nf_reject_ip_tcphdr_put(nskb, oldskb, oth); + + if (ip_route_me_harder(nskb, RTN_UNSPEC)) + goto free_nskb; + + /* "Never happens" */ + if (nskb->len > dst_mtu(skb_dst(nskb))) + goto free_nskb; + + nf_ct_attach(nskb, oldskb); + +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + /* If we use ip_local_out for bridged traffic, the MAC source on + * the RST will be ours, instead of the destination's. This confuses + * some routers/firewalls, and they drop the packet. So we need to + * build the eth header using the original destination's MAC as the + * source, and send the RST packet directly. + */ + if (oldskb->nf_bridge) { + struct ethhdr *oeth = eth_hdr(oldskb); + + nskb->dev = nf_bridge_get_physindev(oldskb); + niph->tot_len = htons(nskb->len); + ip_send_check(niph); + if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol), + oeth->h_source, oeth->h_dest, nskb->len) < 0) + goto free_nskb; + dev_queue_xmit(nskb); + } else +#endif + ip_local_out(nskb); + + return; + + free_nskb: + kfree_skb(nskb); +} +EXPORT_SYMBOL_GPL(nf_send_reset); + +void nf_send_unreach(struct sk_buff *skb_in, int code, int hook) +{ + struct iphdr *iph = ip_hdr(skb_in); + u8 proto; + + if (skb_in->csum_bad || iph->frag_off & htons(IP_OFFSET)) + return; + + if (skb_csum_unnecessary(skb_in)) { + icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); + return; + } + + if (iph->protocol == IPPROTO_TCP || iph->protocol == IPPROTO_UDP) + proto = iph->protocol; + else + proto = 0; + + if (nf_ip_checksum(skb_in, hook, ip_hdrlen(skb_in), proto) == 0) + icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); +} +EXPORT_SYMBOL_GPL(nf_send_unreach); + +MODULE_LICENSE("GPL"); diff --git a/kernel/net/ipv4/netfilter/nf_tables_arp.c b/kernel/net/ipv4/netfilter/nf_tables_arp.c new file mode 100644 index 000000000..8412268bb --- /dev/null +++ b/kernel/net/ipv4/netfilter/nf_tables_arp.c @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2008-2010 Patrick McHardy + * Copyright (c) 2013 Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include + +static unsigned int +nft_do_chain_arp(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nft_pktinfo pkt; + + nft_set_pktinfo(&pkt, ops, skb, state); + + return nft_do_chain(&pkt, ops); +} + +static struct nft_af_info nft_af_arp __read_mostly = { + .family = NFPROTO_ARP, + .nhooks = NF_ARP_NUMHOOKS, + .owner = THIS_MODULE, + .nops = 1, + .hooks = { + [NF_ARP_IN] = nft_do_chain_arp, + [NF_ARP_OUT] = nft_do_chain_arp, + [NF_ARP_FORWARD] = nft_do_chain_arp, + }, +}; + +static int nf_tables_arp_init_net(struct net *net) +{ + net->nft.arp = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); + if (net->nft.arp== NULL) + return -ENOMEM; + + memcpy(net->nft.arp, &nft_af_arp, sizeof(nft_af_arp)); + + if (nft_register_afinfo(net, net->nft.arp) < 0) + goto err; + + return 0; +err: + kfree(net->nft.arp); + return -ENOMEM; +} + +static void nf_tables_arp_exit_net(struct net *net) +{ + nft_unregister_afinfo(net->nft.arp); + kfree(net->nft.arp); +} + +static struct pernet_operations nf_tables_arp_net_ops = { + .init = nf_tables_arp_init_net, + .exit = nf_tables_arp_exit_net, +}; + +static const struct nf_chain_type filter_arp = { + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, + .family = NFPROTO_ARP, + .owner = THIS_MODULE, + .hook_mask = (1 << NF_ARP_IN) | + (1 << NF_ARP_OUT) | + (1 << NF_ARP_FORWARD), +}; + +static int __init nf_tables_arp_init(void) +{ + int ret; + + nft_register_chain_type(&filter_arp); + ret = register_pernet_subsys(&nf_tables_arp_net_ops); + if (ret < 0) + nft_unregister_chain_type(&filter_arp); + + return ret; +} + +static void __exit nf_tables_arp_exit(void) +{ + unregister_pernet_subsys(&nf_tables_arp_net_ops); + nft_unregister_chain_type(&filter_arp); +} + +module_init(nf_tables_arp_init); +module_exit(nf_tables_arp_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_FAMILY(3); /* NFPROTO_ARP */ diff --git a/kernel/net/ipv4/netfilter/nf_tables_ipv4.c b/kernel/net/ipv4/netfilter/nf_tables_ipv4.c new file mode 100644 index 000000000..aa180d3a6 --- /dev/null +++ b/kernel/net/ipv4/netfilter/nf_tables_ipv4.c @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2012-2013 Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned int nft_do_chain_ipv4(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nft_pktinfo pkt; + + nft_set_pktinfo_ipv4(&pkt, ops, skb, state); + + return nft_do_chain(&pkt, ops); +} + +static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + if (unlikely(skb->len < sizeof(struct iphdr) || + ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) { + if (net_ratelimit()) + pr_info("nf_tables_ipv4: ignoring short SOCK_RAW " + "packet\n"); + return NF_ACCEPT; + } + + return nft_do_chain_ipv4(ops, skb, state); +} + +struct nft_af_info nft_af_ipv4 __read_mostly = { + .family = NFPROTO_IPV4, + .nhooks = NF_INET_NUMHOOKS, + .owner = THIS_MODULE, + .nops = 1, + .hooks = { + [NF_INET_LOCAL_IN] = nft_do_chain_ipv4, + [NF_INET_LOCAL_OUT] = nft_ipv4_output, + [NF_INET_FORWARD] = nft_do_chain_ipv4, + [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4, + [NF_INET_POST_ROUTING] = nft_do_chain_ipv4, + }, +}; +EXPORT_SYMBOL_GPL(nft_af_ipv4); + +static int nf_tables_ipv4_init_net(struct net *net) +{ + net->nft.ipv4 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); + if (net->nft.ipv4 == NULL) + return -ENOMEM; + + memcpy(net->nft.ipv4, &nft_af_ipv4, sizeof(nft_af_ipv4)); + + if (nft_register_afinfo(net, net->nft.ipv4) < 0) + goto err; + + return 0; +err: + kfree(net->nft.ipv4); + return -ENOMEM; +} + +static void nf_tables_ipv4_exit_net(struct net *net) +{ + nft_unregister_afinfo(net->nft.ipv4); + kfree(net->nft.ipv4); +} + +static struct pernet_operations nf_tables_ipv4_net_ops = { + .init = nf_tables_ipv4_init_net, + .exit = nf_tables_ipv4_exit_net, +}; + +static const struct nf_chain_type filter_ipv4 = { + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, + .family = NFPROTO_IPV4, + .owner = THIS_MODULE, + .hook_mask = (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING), +}; + +static int __init nf_tables_ipv4_init(void) +{ + int ret; + + nft_register_chain_type(&filter_ipv4); + ret = register_pernet_subsys(&nf_tables_ipv4_net_ops); + if (ret < 0) + nft_unregister_chain_type(&filter_ipv4); + + return ret; +} + +static void __exit nf_tables_ipv4_exit(void) +{ + unregister_pernet_subsys(&nf_tables_ipv4_net_ops); + nft_unregister_chain_type(&filter_ipv4); +} + +module_init(nf_tables_ipv4_init); +module_exit(nf_tables_ipv4_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_FAMILY(AF_INET); diff --git a/kernel/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/kernel/net/ipv4/netfilter/nft_chain_nat_ipv4.c new file mode 100644 index 000000000..bf5c30ae1 --- /dev/null +++ b/kernel/net/ipv4/netfilter/nft_chain_nat_ipv4.c @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2008-2009 Patrick McHardy + * Copyright (c) 2012 Pablo Neira Ayuso + * Copyright (c) 2012 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state, + struct nf_conn *ct) +{ + struct nft_pktinfo pkt; + + nft_set_pktinfo_ipv4(&pkt, ops, skb, state); + + return nft_do_chain(&pkt, ops); +} + +static unsigned int nft_nat_ipv4_fn(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return nf_nat_ipv4_fn(ops, skb, state, nft_nat_do_chain); +} + +static unsigned int nft_nat_ipv4_in(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return nf_nat_ipv4_in(ops, skb, state, nft_nat_do_chain); +} + +static unsigned int nft_nat_ipv4_out(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return nf_nat_ipv4_out(ops, skb, state, nft_nat_do_chain); +} + +static unsigned int nft_nat_ipv4_local_fn(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return nf_nat_ipv4_local_fn(ops, skb, state, nft_nat_do_chain); +} + +static const struct nf_chain_type nft_chain_nat_ipv4 = { + .name = "nat", + .type = NFT_CHAIN_T_NAT, + .family = NFPROTO_IPV4, + .owner = THIS_MODULE, + .hook_mask = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_LOCAL_IN), + .hooks = { + [NF_INET_PRE_ROUTING] = nft_nat_ipv4_in, + [NF_INET_POST_ROUTING] = nft_nat_ipv4_out, + [NF_INET_LOCAL_OUT] = nft_nat_ipv4_local_fn, + [NF_INET_LOCAL_IN] = nft_nat_ipv4_fn, + }, +}; + +static int __init nft_chain_nat_init(void) +{ + int err; + + err = nft_register_chain_type(&nft_chain_nat_ipv4); + if (err < 0) + return err; + + return 0; +} + +static void __exit nft_chain_nat_exit(void) +{ + nft_unregister_chain_type(&nft_chain_nat_ipv4); +} + +module_init(nft_chain_nat_init); +module_exit(nft_chain_nat_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_CHAIN(AF_INET, "nat"); diff --git a/kernel/net/ipv4/netfilter/nft_chain_route_ipv4.c b/kernel/net/ipv4/netfilter/nft_chain_route_ipv4.c new file mode 100644 index 000000000..e335b0afd --- /dev/null +++ b/kernel/net/ipv4/netfilter/nft_chain_route_ipv4.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2012 Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + unsigned int ret; + struct nft_pktinfo pkt; + u32 mark; + __be32 saddr, daddr; + u_int8_t tos; + const struct iphdr *iph; + + /* root is playing with raw sockets. */ + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) + return NF_ACCEPT; + + nft_set_pktinfo_ipv4(&pkt, ops, skb, state); + + mark = skb->mark; + iph = ip_hdr(skb); + saddr = iph->saddr; + daddr = iph->daddr; + tos = iph->tos; + + ret = nft_do_chain(&pkt, ops); + if (ret != NF_DROP && ret != NF_QUEUE) { + iph = ip_hdr(skb); + + if (iph->saddr != saddr || + iph->daddr != daddr || + skb->mark != mark || + iph->tos != tos) + if (ip_route_me_harder(skb, RTN_UNSPEC)) + ret = NF_DROP; + } + return ret; +} + +static const struct nf_chain_type nft_chain_route_ipv4 = { + .name = "route", + .type = NFT_CHAIN_T_ROUTE, + .family = NFPROTO_IPV4, + .owner = THIS_MODULE, + .hook_mask = (1 << NF_INET_LOCAL_OUT), + .hooks = { + [NF_INET_LOCAL_OUT] = nf_route_table_hook, + }, +}; + +static int __init nft_chain_route_init(void) +{ + return nft_register_chain_type(&nft_chain_route_ipv4); +} + +static void __exit nft_chain_route_exit(void) +{ + nft_unregister_chain_type(&nft_chain_route_ipv4); +} + +module_init(nft_chain_route_init); +module_exit(nft_chain_route_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_CHAIN(AF_INET, "route"); diff --git a/kernel/net/ipv4/netfilter/nft_masq_ipv4.c b/kernel/net/ipv4/netfilter/nft_masq_ipv4.c new file mode 100644 index 000000000..40e414c4c --- /dev/null +++ b/kernel/net/ipv4/netfilter/nft_masq_ipv4.c @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2014 Arturo Borrero Gonzalez + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void nft_masq_ipv4_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_masq *priv = nft_expr_priv(expr); + struct nf_nat_range range; + + memset(&range, 0, sizeof(range)); + range.flags = priv->flags; + + regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, pkt->ops->hooknum, + &range, pkt->out); +} + +static struct nft_expr_type nft_masq_ipv4_type; +static const struct nft_expr_ops nft_masq_ipv4_ops = { + .type = &nft_masq_ipv4_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)), + .eval = nft_masq_ipv4_eval, + .init = nft_masq_init, + .dump = nft_masq_dump, + .validate = nft_masq_validate, +}; + +static struct nft_expr_type nft_masq_ipv4_type __read_mostly = { + .family = NFPROTO_IPV4, + .name = "masq", + .ops = &nft_masq_ipv4_ops, + .policy = nft_masq_policy, + .maxattr = NFTA_MASQ_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_masq_ipv4_module_init(void) +{ + int ret; + + ret = nft_register_expr(&nft_masq_ipv4_type); + if (ret < 0) + return ret; + + nf_nat_masquerade_ipv4_register_notifier(); + + return ret; +} + +static void __exit nft_masq_ipv4_module_exit(void) +{ + nft_unregister_expr(&nft_masq_ipv4_type); + nf_nat_masquerade_ipv4_unregister_notifier(); +} + +module_init(nft_masq_ipv4_module_init); +module_exit(nft_masq_ipv4_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arturo Borrero Gonzalez "); +MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "masq"); diff --git a/kernel/net/ipv4/netfilter/nft_redir_ipv4.c b/kernel/net/ipv4/netfilter/nft_redir_ipv4.c new file mode 100644 index 000000000..d8d795df9 --- /dev/null +++ b/kernel/net/ipv4/netfilter/nft_redir_ipv4.c @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2014 Arturo Borrero Gonzalez + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void nft_redir_ipv4_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_redir *priv = nft_expr_priv(expr); + struct nf_nat_ipv4_multi_range_compat mr; + + memset(&mr, 0, sizeof(mr)); + if (priv->sreg_proto_min) { + mr.range[0].min.all = + *(__be16 *)®s->data[priv->sreg_proto_min]; + mr.range[0].max.all = + *(__be16 *)®s->data[priv->sreg_proto_max]; + mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + } + + mr.range[0].flags |= priv->flags; + + regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &mr, + pkt->ops->hooknum); +} + +static struct nft_expr_type nft_redir_ipv4_type; +static const struct nft_expr_ops nft_redir_ipv4_ops = { + .type = &nft_redir_ipv4_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), + .eval = nft_redir_ipv4_eval, + .init = nft_redir_init, + .dump = nft_redir_dump, + .validate = nft_redir_validate, +}; + +static struct nft_expr_type nft_redir_ipv4_type __read_mostly = { + .family = NFPROTO_IPV4, + .name = "redir", + .ops = &nft_redir_ipv4_ops, + .policy = nft_redir_policy, + .maxattr = NFTA_REDIR_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_redir_ipv4_module_init(void) +{ + return nft_register_expr(&nft_redir_ipv4_type); +} + +static void __exit nft_redir_ipv4_module_exit(void) +{ + nft_unregister_expr(&nft_redir_ipv4_type); +} + +module_init(nft_redir_ipv4_module_init); +module_exit(nft_redir_ipv4_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arturo Borrero Gonzalez "); +MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "redir"); diff --git a/kernel/net/ipv4/netfilter/nft_reject_ipv4.c b/kernel/net/ipv4/netfilter/nft_reject_ipv4.c new file mode 100644 index 000000000..b07e58b51 --- /dev/null +++ b/kernel/net/ipv4/netfilter/nft_reject_ipv4.c @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2008-2009 Patrick McHardy + * Copyright (c) 2013 Eric Leblond + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void nft_reject_ipv4_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_reject *priv = nft_expr_priv(expr); + + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + nf_send_unreach(pkt->skb, priv->icmp_code, + pkt->ops->hooknum); + break; + case NFT_REJECT_TCP_RST: + nf_send_reset(pkt->skb, pkt->ops->hooknum); + break; + default: + break; + } + + regs->verdict.code = NF_DROP; +} + +static struct nft_expr_type nft_reject_ipv4_type; +static const struct nft_expr_ops nft_reject_ipv4_ops = { + .type = &nft_reject_ipv4_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)), + .eval = nft_reject_ipv4_eval, + .init = nft_reject_init, + .dump = nft_reject_dump, +}; + +static struct nft_expr_type nft_reject_ipv4_type __read_mostly = { + .family = NFPROTO_IPV4, + .name = "reject", + .ops = &nft_reject_ipv4_ops, + .policy = nft_reject_policy, + .maxattr = NFTA_REJECT_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_reject_ipv4_module_init(void) +{ + return nft_register_expr(&nft_reject_ipv4_type); +} + +static void __exit nft_reject_ipv4_module_exit(void) +{ + nft_unregister_expr(&nft_reject_ipv4_type); +} + +module_init(nft_reject_ipv4_module_init); +module_exit(nft_reject_ipv4_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "reject"); -- cgit 1.2.3-korg