From 3c56f810a4875c9285cc6d1e72ebd2dd4eb8c42d Mon Sep 17 00:00:00 2001 From: klever1988 <56048681+klever1988@users.noreply.github.com> Date: Wed, 4 Mar 2020 15:18:19 +0800 Subject: [PATCH] Add files via upload --- add_fullconenat.diff | 799 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 799 insertions(+) create mode 100644 add_fullconenat.diff diff --git a/add_fullconenat.diff b/add_fullconenat.diff new file mode 100644 index 0000000..c9b99df --- /dev/null +++ b/add_fullconenat.diff @@ -0,0 +1,799 @@ +diff --git a/arch/arm64/configs/nanopi-r2_linux_defconfig b/arch/arm64/configs/nanopi-r2_linux_defconfig +index 3f3b7e32f3f2..794537f2a9df 100644 +--- a/arch/arm64/configs/nanopi-r2_linux_defconfig ++++ b/arch/arm64/configs/nanopi-r2_linux_defconfig +@@ -1664,3 +1664,4 @@ CONFIG_SCHEDSTATS=y + CONFIG_DEBUG_SPINLOCK=y + CONFIG_FUNCTION_TRACER=y + CONFIG_BLK_DEV_IO_TRACE=y ++CONFIG_NETFILTER_XT_TARGET_FULLCONENAT=y +diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig +index f17b402111ce..5af7577fd9ff 100644 +--- a/net/ipv4/netfilter/Kconfig ++++ b/net/ipv4/netfilter/Kconfig +@@ -239,6 +239,15 @@ config IP_NF_TARGET_NETMAP + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_TARGET_NETMAP. + ++config IP_NF_TARGET_FULLCONENAT ++ tristate "FULLCONENAT target support" ++ depends on NETFILTER_ADVANCED ++ select NETFILTER_XT_TARGET_FULLCONENAT ++ ---help--- ++ This is a backwards-compat option for the user's convenience ++ (e.g. when running oldconfig). It selects ++ CONFIG_NETFILTER_XT_TARGET_FULLCONENAT. ++ + config IP_NF_TARGET_REDIRECT + tristate "REDIRECT target support" + depends on NETFILTER_ADVANCED +diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig +index 91efae88e8c2..6fd1c3cfdc23 100644 +--- a/net/netfilter/Kconfig ++++ b/net/netfilter/Kconfig +@@ -956,6 +956,15 @@ config NETFILTER_XT_TARGET_NETMAP + + To compile it as a module, choose M here. If unsure, say N. + ++config NETFILTER_XT_TARGET_FULLCONENAT ++ tristate '"FULLCONENAT" target support' ++ depends on NF_NAT ++ ---help--- ++ Full Cone NAT ++ ++ To compile it as a module, choose M here. If unsure, say N. ++ ++ + config NETFILTER_XT_TARGET_NFLOG + tristate '"NFLOG" target support' + default m if NETFILTER_ADVANCED=n +diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile +index 4fc075b612fe..eea8bfd0b2bf 100644 +--- a/net/netfilter/Makefile ++++ b/net/netfilter/Makefile +@@ -209,3 +209,6 @@ obj-$(CONFIG_IP_SET) += ipset/ + + # IPVS + obj-$(CONFIG_IP_VS) += ipvs/ ++ ++obj-$(CONFIG_NETFILTER_XT_TARGET_FULLCONENAT) += xt_FULLCONENAT.o ++ +diff --git a/net/netfilter/xt_FULLCONENAT.c b/net/netfilter/xt_FULLCONENAT.c +new file mode 100644 +index 000000000000..8555b54e2dc6 +--- /dev/null ++++ b/net/netfilter/xt_FULLCONENAT.c +@@ -0,0 +1,733 @@ ++/* ++ * Copyright (c) 2018 Chion Tang ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS ++#include ++#endif ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define HASH_2(x, y) ((x + y) / 2 * (x + y + 1) + y) ++ ++#define HASHTABLE_BUCKET_BITS 10 ++ ++#ifndef NF_NAT_RANGE_PROTO_RANDOM_FULLY ++#define NF_NAT_RANGE_PROTO_RANDOM_FULLY (1 << 4) ++#endif ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) ++ ++static inline int nf_ct_netns_get(struct net *net, u8 nfproto) { return 0; } ++ ++static inline void nf_ct_netns_put(struct net *net, u8 nfproto) {} ++ ++static inline struct net_device *xt_in(const struct xt_action_param *par) { ++ return par->in; ++} ++ ++static inline struct net_device *xt_out(const struct xt_action_param *par) { ++ return par->out; ++} ++ ++static inline unsigned int xt_hooknum(const struct xt_action_param *par) { ++ return par->hooknum; ++} ++ ++#endif ++ ++struct nat_mapping_original_tuple { ++ struct nf_conntrack_tuple tuple; ++ ++ struct list_head node; ++}; ++ ++struct nat_mapping { ++ uint16_t port; /* external UDP port */ ++ int ifindex; /* external interface index*/ ++ ++ __be32 int_addr; /* internal source ip address */ ++ uint16_t int_port; /* internal source port */ ++ ++ int refer_count; /* how many references linked to this mapping ++ * aka. length of original_tuple_list */ ++ ++ struct list_head original_tuple_list; ++ ++ struct hlist_node node_by_ext_port; ++ struct hlist_node node_by_int_src; ++ ++}; ++ ++struct tuple_list { ++ struct nf_conntrack_tuple tuple_original; ++ struct nf_conntrack_tuple tuple_reply; ++ struct list_head list; ++}; ++ ++#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS ++struct notifier_block ct_event_notifier; ++#else ++struct nf_ct_event_notifier ct_event_notifier; ++#endif ++int tg_refer_count = 0; ++int ct_event_notifier_registered = 0; ++ ++static DEFINE_MUTEX(nf_ct_net_event_lock); ++ ++static DEFINE_HASHTABLE(mapping_table_by_ext_port, HASHTABLE_BUCKET_BITS); ++static DEFINE_HASHTABLE(mapping_table_by_int_src, HASHTABLE_BUCKET_BITS); ++ ++static DEFINE_SPINLOCK(fullconenat_lock); ++ ++static LIST_HEAD(dying_tuple_list); ++static DEFINE_SPINLOCK(dying_tuple_list_lock); ++static void gc_worker(struct work_struct *work); ++static struct workqueue_struct *wq __read_mostly = NULL; ++static DECLARE_DELAYED_WORK(gc_worker_wk, gc_worker); ++ ++static char tuple_tmp_string[512]; ++/* non-atomic: can only be called serially within lock zones. */ ++static char* nf_ct_stringify_tuple(const struct nf_conntrack_tuple *t) { ++ snprintf(tuple_tmp_string, sizeof(tuple_tmp_string), "%pI4:%hu -> %pI4:%hu", ++ &t->src.u3.ip, be16_to_cpu(t->src.u.all), ++ &t->dst.u3.ip, be16_to_cpu(t->dst.u.all)); ++ return tuple_tmp_string; ++} ++ ++static struct nat_mapping* allocate_mapping(const __be32 int_addr, const uint16_t int_port, const uint16_t port, const int ifindex) { ++ struct nat_mapping *p_new; ++ u32 hash_src; ++ ++ p_new = kmalloc(sizeof(struct nat_mapping), GFP_ATOMIC); ++ if (p_new == NULL) { ++ pr_debug("xt_FULLCONENAT: ERROR: kmalloc() for new nat_mapping failed.\n"); ++ return NULL; ++ } ++ p_new->port = port; ++ p_new->int_addr = int_addr; ++ p_new->int_port = int_port; ++ p_new->ifindex = ifindex; ++ p_new->refer_count = 0; ++ (p_new->original_tuple_list).next = &(p_new->original_tuple_list); ++ (p_new->original_tuple_list).prev = &(p_new->original_tuple_list); ++ ++ hash_src = HASH_2(int_addr, (u32)int_port); ++ ++ hash_add(mapping_table_by_ext_port, &p_new->node_by_ext_port, port); ++ hash_add(mapping_table_by_int_src, &p_new->node_by_int_src, hash_src); ++ ++ pr_debug("xt_FULLCONENAT: new mapping allocated for %pI4:%d ==> %d\n", ++ &p_new->int_addr, p_new->int_port, p_new->port); ++ ++ return p_new; ++} ++ ++static void add_original_tuple_to_mapping(struct nat_mapping *mapping, const struct nf_conntrack_tuple* original_tuple) { ++ struct nat_mapping_original_tuple *item = kmalloc(sizeof(struct nat_mapping_original_tuple), GFP_ATOMIC); ++ if (item == NULL) { ++ pr_debug("xt_FULLCONENAT: ERROR: kmalloc() for nat_mapping_original_tuple failed.\n"); ++ return; ++ } ++ memcpy(&item->tuple, original_tuple, sizeof(struct nf_conntrack_tuple)); ++ list_add(&item->node, &mapping->original_tuple_list); ++ (mapping->refer_count)++; ++} ++ ++static struct nat_mapping* get_mapping_by_ext_port(const uint16_t port, const int ifindex) { ++ struct nat_mapping *p_current; ++ ++ hash_for_each_possible(mapping_table_by_ext_port, p_current, node_by_ext_port, port) { ++ if (p_current->port == port && p_current->ifindex == ifindex) { ++ return p_current; ++ } ++ } ++ ++ return NULL; ++} ++ ++static struct nat_mapping* get_mapping_by_int_src(const __be32 src_ip, const uint16_t src_port) { ++ struct nat_mapping *p_current; ++ u32 hash_src = HASH_2(src_ip, (u32)src_port); ++ ++ hash_for_each_possible(mapping_table_by_int_src, p_current, node_by_int_src, hash_src) { ++ if (p_current->int_addr == src_ip && p_current->int_port == src_port) { ++ return p_current; ++ } ++ } ++ ++ return NULL; ++} ++ ++static void kill_mapping(struct nat_mapping *mapping) { ++ struct list_head *iter, *tmp; ++ struct nat_mapping_original_tuple *original_tuple_item; ++ ++ if (mapping == NULL) { ++ return; ++ } ++ ++ list_for_each_safe(iter, tmp, &mapping->original_tuple_list) { ++ original_tuple_item = list_entry(iter, struct nat_mapping_original_tuple, node); ++ list_del(&original_tuple_item->node); ++ kfree(original_tuple_item); ++ } ++ ++ hash_del(&mapping->node_by_ext_port); ++ hash_del(&mapping->node_by_int_src); ++ kfree(mapping); ++} ++ ++static void destroy_mappings(void) { ++ struct nat_mapping *p_current; ++ struct hlist_node *tmp; ++ int i; ++ ++ spin_lock_bh(&fullconenat_lock); ++ ++ hash_for_each_safe(mapping_table_by_ext_port, i, tmp, p_current, node_by_ext_port) { ++ kill_mapping(p_current); ++ } ++ ++ spin_unlock_bh(&fullconenat_lock); ++} ++ ++/* check if a mapping is valid. ++ * possibly delete and free an invalid mapping. ++ * the mapping should not be used anymore after check_mapping() returns 0. */ ++static int check_mapping(struct nat_mapping* mapping, struct net *net, const struct nf_conntrack_zone *zone) { ++ struct list_head *iter, *tmp; ++ struct nat_mapping_original_tuple *original_tuple_item; ++ struct nf_conntrack_tuple_hash *tuple_hash; ++ struct nf_conn *ct; ++ ++ if (mapping == NULL) { ++ return 0; ++ } ++ ++ if (mapping->port == 0 || mapping->int_addr == 0 || mapping->int_port == 0 || mapping->ifindex == -1) { ++ return 0; ++ } ++ ++ /* for dying/unconfirmed conntrack tuples, an IPCT_DESTROY event may NOT be fired. ++ * so we manually kill one of those tuples once we acquire one. */ ++ ++ list_for_each_safe(iter, tmp, &mapping->original_tuple_list) { ++ original_tuple_item = list_entry(iter, struct nat_mapping_original_tuple, node); ++ ++ tuple_hash = nf_conntrack_find_get(net, zone, &original_tuple_item->tuple); ++ ++ if (tuple_hash == NULL) { ++ pr_debug("xt_FULLCONENAT: check_mapping(): tuple %s dying/unconfirmed. free this tuple.\n", nf_ct_stringify_tuple(&original_tuple_item->tuple)); ++ ++ list_del(&original_tuple_item->node); ++ kfree(original_tuple_item); ++ (mapping->refer_count)--; ++ } else { ++ ct = nf_ct_tuplehash_to_ctrack(tuple_hash); ++ if (ct != NULL) ++ nf_ct_put(ct); ++ } ++ ++ } ++ ++ /* kill the mapping if need */ ++ pr_debug("xt_FULLCONENAT: check_mapping() refer_count for mapping at ext_port %d is now %d\n", mapping->port, mapping->refer_count); ++ if (mapping->refer_count <= 0) { ++ pr_debug("xt_FULLCONENAT: check_mapping(): kill dying/unconfirmed mapping at ext port %d\n", mapping->port); ++ kill_mapping(mapping); ++ return 0; ++ } else { ++ return 1; ++ } ++} ++ ++static void handle_dying_tuples(void) { ++ struct list_head *iter, *tmp, *iter_2, *tmp_2; ++ struct tuple_list *item; ++ struct nf_conntrack_tuple *ct_tuple; ++ struct nat_mapping *mapping; ++ __be32 ip; ++ uint16_t port; ++ struct nat_mapping_original_tuple *original_tuple_item; ++ ++ spin_lock_bh(&fullconenat_lock); ++ spin_lock_bh(&dying_tuple_list_lock); ++ ++ list_for_each_safe(iter, tmp, &dying_tuple_list) { ++ item = list_entry(iter, struct tuple_list, list); ++ ++ /* we dont know the conntrack direction for now so we try in both ways. */ ++ ct_tuple = &(item->tuple_original); ++ ip = (ct_tuple->src).u3.ip; ++ port = be16_to_cpu((ct_tuple->src).u.udp.port); ++ mapping = get_mapping_by_int_src(ip, port); ++ if (mapping == NULL) { ++ ct_tuple = &(item->tuple_reply); ++ ip = (ct_tuple->src).u3.ip; ++ port = be16_to_cpu((ct_tuple->src).u.udp.port); ++ mapping = get_mapping_by_int_src(ip, port); ++ if (mapping != NULL) { ++ pr_debug("xt_FULLCONENAT: handle_dying_tuples(): INBOUND dying conntrack at ext port %d\n", mapping->port); ++ } ++ } else { ++ pr_debug("xt_FULLCONENAT: handle_dying_tuples(): OUTBOUND dying conntrack at ext port %d\n", mapping->port); ++ } ++ ++ if (mapping == NULL) { ++ goto next; ++ } ++ ++ /* look for the corresponding out-dated tuple and free it */ ++ list_for_each_safe(iter_2, tmp_2, &mapping->original_tuple_list) { ++ original_tuple_item = list_entry(iter_2, struct nat_mapping_original_tuple, node); ++ ++ if (nf_ct_tuple_equal(&original_tuple_item->tuple, &(item->tuple_original))) { ++ pr_debug("xt_FULLCONENAT: handle_dying_tuples(): tuple %s expired. free this tuple.\n", ++ nf_ct_stringify_tuple(&original_tuple_item->tuple)); ++ list_del(&original_tuple_item->node); ++ kfree(original_tuple_item); ++ (mapping->refer_count)--; ++ } ++ } ++ ++ /* then kill the mapping if needed*/ ++ pr_debug("xt_FULLCONENAT: handle_dying_tuples(): refer_count for mapping at ext_port %d is now %d\n", mapping->port, mapping->refer_count); ++ if (mapping->refer_count <= 0) { ++ pr_debug("xt_FULLCONENAT: handle_dying_tuples(): kill expired mapping at ext port %d\n", mapping->port); ++ kill_mapping(mapping); ++ } ++ ++next: ++ list_del(&item->list); ++ kfree(item); ++ } ++ ++ spin_unlock_bh(&dying_tuple_list_lock); ++ spin_unlock_bh(&fullconenat_lock); ++} ++ ++static void gc_worker(struct work_struct *work) { ++ handle_dying_tuples(); ++} ++ ++/* conntrack destroy event callback function */ ++#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS ++static int ct_event_cb(struct notifier_block *this, unsigned long events, void *ptr) { ++ struct nf_ct_event *item = ptr; ++#else ++static int ct_event_cb(unsigned int events, struct nf_ct_event *item) { ++#endif ++ struct nf_conn *ct; ++ struct nf_conntrack_tuple *ct_tuple_reply, *ct_tuple_original; ++ uint8_t protonum; ++ struct tuple_list *dying_tuple_item; ++ ++ ct = item->ct; ++ /* we handle only conntrack destroy events */ ++ if (ct == NULL || !(events & (1 << IPCT_DESTROY))) { ++ return 0; ++ } ++ ++ ct_tuple_original = &(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); ++ ++ ct_tuple_reply = &(ct->tuplehash[IP_CT_DIR_REPLY].tuple); ++ ++ protonum = (ct_tuple_original->dst).protonum; ++ if (protonum != IPPROTO_UDP) { ++ return 0; ++ } ++ ++ dying_tuple_item = kmalloc(sizeof(struct tuple_list), GFP_ATOMIC); ++ ++ if (dying_tuple_item == NULL) { ++ pr_debug("xt_FULLCONENAT: warning: ct_event_cb(): kmalloc failed.\n"); ++ return 0; ++ } ++ ++ memcpy(&(dying_tuple_item->tuple_original), ct_tuple_original, sizeof(struct nf_conntrack_tuple)); ++ memcpy(&(dying_tuple_item->tuple_reply), ct_tuple_reply, sizeof(struct nf_conntrack_tuple)); ++ ++ spin_lock_bh(&dying_tuple_list_lock); ++ ++ list_add(&(dying_tuple_item->list), &dying_tuple_list); ++ ++ spin_unlock_bh(&dying_tuple_list_lock); ++ ++ if (wq != NULL) ++ queue_delayed_work(wq, &gc_worker_wk, msecs_to_jiffies(100)); ++ ++ return 0; ++} ++ ++static __be32 get_device_ip(const struct net_device* dev) { ++ struct in_device* in_dev; ++ struct in_ifaddr* if_info; ++ __be32 result; ++ ++ if (dev == NULL) { ++ return 0; ++ } ++ ++ rcu_read_lock(); ++ in_dev = dev->ip_ptr; ++ if (in_dev == NULL) { ++ rcu_read_unlock(); ++ return 0; ++ } ++ if_info = in_dev->ifa_list; ++ if (if_info) { ++ result = if_info->ifa_local; ++ rcu_read_unlock(); ++ return result; ++ } else { ++ rcu_read_unlock(); ++ return 0; ++ } ++} ++ ++static uint16_t find_appropriate_port(struct net *net, const struct nf_conntrack_zone *zone, const uint16_t original_port, const int ifindex, const struct nf_nat_ipv4_range *range) { ++ uint16_t min, start, selected, range_size, i; ++ struct nat_mapping* mapping = NULL; ++ ++ if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { ++ min = be16_to_cpu((range->min).udp.port); ++ range_size = be16_to_cpu((range->max).udp.port) - min + 1; ++ } else { ++ /* minimum port is 1024. same behavior as default linux NAT. */ ++ min = 1024; ++ range_size = 65535 - min + 1; ++ } ++ ++ if ((range->flags & NF_NAT_RANGE_PROTO_RANDOM) ++ || (range->flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY)) { ++ /* for now we do the same thing for both --random and --random-fully */ ++ ++ /* select a random starting point */ ++ start = (uint16_t)(prandom_u32() % (u32)range_size); ++ } else { ++ ++ if ((original_port >= min && original_port <= min + range_size - 1) ++ || !(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) { ++ /* 1. try to preserve the port if it's available */ ++ mapping = get_mapping_by_ext_port(original_port, ifindex); ++ if (mapping == NULL || !(check_mapping(mapping, net, zone))) { ++ return original_port; ++ } ++ } ++ ++ /* otherwise, we start from zero */ ++ start = 0; ++ } ++ ++ for (i = 0; i < range_size; i++) { ++ /* 2. try to find an available port */ ++ selected = min + ((start + i) % range_size); ++ mapping = get_mapping_by_ext_port(selected, ifindex); ++ if (mapping == NULL || !(check_mapping(mapping, net, zone))) { ++ return selected; ++ } ++ } ++ ++ /* 3. at least we tried. override a previous mapping. */ ++ selected = min + start; ++ mapping = get_mapping_by_ext_port(selected, ifindex); ++ kill_mapping(mapping); ++ ++ return selected; ++} ++ ++static unsigned int fullconenat_tg(struct sk_buff *skb, const struct xt_action_param *par) ++{ ++ const struct nf_nat_ipv4_multi_range_compat *mr; ++ const struct nf_nat_ipv4_range *range; ++ ++ const struct nf_conntrack_zone *zone; ++ struct net *net; ++ struct nf_conn *ct; ++ enum ip_conntrack_info ctinfo; ++ struct nf_conntrack_tuple *ct_tuple, *ct_tuple_origin; ++ ++ struct net_device *net_dev; ++ ++ struct nat_mapping *mapping, *src_mapping; ++ unsigned int ret; ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0) ++ struct nf_nat_range2 newrange; ++#else ++ struct nf_nat_range newrange; ++#endif ++ ++ __be32 new_ip, ip; ++ uint16_t port, original_port, want_port; ++ uint8_t protonum; ++ int ifindex; ++ ++ ip = 0; ++ original_port = 0; ++ src_mapping = NULL; ++ ++ mr = par->targinfo; ++ range = &mr->range[0]; ++ ++ mapping = NULL; ++ ret = XT_CONTINUE; ++ ++ ct = nf_ct_get(skb, &ctinfo); ++ net = nf_ct_net(ct); ++ zone = nf_ct_zone(ct); ++ ++ memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); ++ memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); ++ newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS; ++ newrange.min_proto = mr->range[0].min; ++ newrange.max_proto = mr->range[0].max; ++ ++ if (xt_hooknum(par) == NF_INET_PRE_ROUTING) { ++ /* inbound packets */ ++ ifindex = xt_in(par)->ifindex; ++ ++ ct_tuple_origin = &(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); ++ ++ protonum = (ct_tuple_origin->dst).protonum; ++ if (protonum != IPPROTO_UDP) { ++ return ret; ++ } ++ ip = (ct_tuple_origin->dst).u3.ip; ++ port = be16_to_cpu((ct_tuple_origin->dst).u.udp.port); ++ ++ /* get the corresponding ifindex by the dst_ip (aka. external ip of this host), ++ * in case the packet needs to be forwarded from another inbound interface. */ ++ net_dev = ip_dev_find(net, ip); ++ if (net_dev != NULL) { ++ ifindex = net_dev->ifindex; ++ dev_put(net_dev); ++ } ++ ++ spin_lock_bh(&fullconenat_lock); ++ ++ /* find an active mapping based on the inbound port */ ++ mapping = get_mapping_by_ext_port(port, ifindex); ++ if (mapping == NULL) { ++ spin_unlock_bh(&fullconenat_lock); ++ return ret; ++ } ++ if (check_mapping(mapping, net, zone)) { ++ newrange.flags = NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED; ++ newrange.min_addr.ip = mapping->int_addr; ++ newrange.max_addr.ip = mapping->int_addr; ++ newrange.min_proto.udp.port = cpu_to_be16(mapping->int_port); ++ newrange.max_proto = newrange.min_proto; ++ ++ pr_debug("xt_FULLCONENAT: %s ==> %pI4:%d\n", nf_ct_stringify_tuple(ct_tuple_origin), &mapping->int_addr, mapping->int_port); ++ ++ ret = nf_nat_setup_info(ct, &newrange, HOOK2MANIP(xt_hooknum(par))); ++ ++ if (ret == NF_ACCEPT) { ++ add_original_tuple_to_mapping(mapping, ct_tuple_origin); ++ pr_debug("xt_FULLCONENAT: fullconenat_tg(): INBOUND: refer_count for mapping at ext_port %d is now %d\n", mapping->port, mapping->refer_count); ++ } ++ } ++ spin_unlock_bh(&fullconenat_lock); ++ return ret; ++ ++ ++ } else if (xt_hooknum(par) == NF_INET_POST_ROUTING) { ++ /* outbound packets */ ++ ifindex = xt_out(par)->ifindex; ++ ++ ct_tuple_origin = &(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); ++ protonum = (ct_tuple_origin->dst).protonum; ++ ++ spin_lock_bh(&fullconenat_lock); ++ ++ if (protonum == IPPROTO_UDP) { ++ ip = (ct_tuple_origin->src).u3.ip; ++ original_port = be16_to_cpu((ct_tuple_origin->src).u.udp.port); ++ ++ src_mapping = get_mapping_by_int_src(ip, original_port); ++ if (src_mapping != NULL && check_mapping(src_mapping, net, zone)) { ++ ++ /* outbound nat: if a previously established mapping is active, ++ * we will reuse that mapping. */ ++ ++ newrange.flags = NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED; ++ newrange.min_proto.udp.port = cpu_to_be16(src_mapping->port); ++ newrange.max_proto = newrange.min_proto; ++ ++ } else { ++ ++ /* if not, we find a new external port to map to. ++ * the SNAT may fail so we should re-check the mapped port later. */ ++ want_port = find_appropriate_port(net, zone, original_port, ifindex, range); ++ ++ newrange.flags = NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED; ++ newrange.min_proto.udp.port = cpu_to_be16(want_port); ++ newrange.max_proto = newrange.min_proto; ++ ++ src_mapping = NULL; ++ ++ } ++ } ++ ++ if(mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) { ++ newrange.min_addr.ip = mr->range[0].min_ip; ++ newrange.max_addr.ip = mr->range[0].max_ip; ++ } else { ++ new_ip = get_device_ip(skb->dev); ++ newrange.min_addr.ip = new_ip; ++ newrange.max_addr.ip = new_ip; ++ } ++ ++ /* do SNAT now */ ++ ret = nf_nat_setup_info(ct, &newrange, HOOK2MANIP(xt_hooknum(par))); ++ ++ if (protonum != IPPROTO_UDP || ret != NF_ACCEPT) { ++ /* for non-UDP packets and failed SNAT, bailout */ ++ spin_unlock_bh(&fullconenat_lock); ++ return ret; ++ } ++ ++ /* the reply tuple contains the mapped port. */ ++ ct_tuple = &(ct->tuplehash[IP_CT_DIR_REPLY].tuple); ++ /* this is the resulted mapped port. */ ++ port = be16_to_cpu((ct_tuple->dst).u.udp.port); ++ ++ pr_debug("xt_FULLCONENAT: %s ==> %d\n", nf_ct_stringify_tuple(ct_tuple_origin), port); ++ ++ /* save the mapping information into our mapping table */ ++ mapping = src_mapping; ++ if (mapping == NULL || !check_mapping(mapping, net, zone)) { ++ mapping = allocate_mapping(ip, original_port, port, ifindex); ++ } ++ if (mapping != NULL) { ++ add_original_tuple_to_mapping(mapping, ct_tuple_origin); ++ pr_debug("xt_FULLCONENAT: fullconenat_tg(): OUTBOUND: refer_count for mapping at ext_port %d is now %d\n", mapping->port, mapping->refer_count); ++ } ++ ++ spin_unlock_bh(&fullconenat_lock); ++ return ret; ++ } ++ ++ return ret; ++} ++ ++static int fullconenat_tg_check(const struct xt_tgchk_param *par) ++{ ++ mutex_lock(&nf_ct_net_event_lock); ++ ++ tg_refer_count++; ++ ++ pr_debug("xt_FULLCONENAT: fullconenat_tg_check(): tg_refer_count is now %d\n", tg_refer_count); ++ ++ if (tg_refer_count == 1) { ++ nf_ct_netns_get(par->net, par->family); ++#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS ++ ct_event_notifier.notifier_call = ct_event_cb; ++#else ++ ct_event_notifier.fcn = ct_event_cb; ++#endif ++ ++ if (nf_conntrack_register_notifier(par->net, &ct_event_notifier) == 0) { ++ ct_event_notifier_registered = 1; ++ pr_debug("xt_FULLCONENAT: fullconenat_tg_check(): ct_event_notifier registered\n"); ++ } else { ++ printk("xt_FULLCONENAT: warning: failed to register a conntrack notifier. Disable active GC for mappings.\n"); ++ } ++ ++ } ++ ++ mutex_unlock(&nf_ct_net_event_lock); ++ ++ return 0; ++} ++ ++static void fullconenat_tg_destroy(const struct xt_tgdtor_param *par) ++{ ++ mutex_lock(&nf_ct_net_event_lock); ++ ++ tg_refer_count--; ++ ++ pr_debug("xt_FULLCONENAT: fullconenat_tg_destroy(): tg_refer_count is now %d\n", tg_refer_count); ++ ++ if (tg_refer_count == 0) { ++ if (ct_event_notifier_registered) { ++ nf_conntrack_unregister_notifier(par->net, &ct_event_notifier); ++ ct_event_notifier_registered = 0; ++ ++ pr_debug("xt_FULLCONENAT: fullconenat_tg_destroy(): ct_event_notifier unregistered\n"); ++ ++ } ++ nf_ct_netns_put(par->net, par->family); ++ } ++ ++ mutex_unlock(&nf_ct_net_event_lock); ++} ++ ++static struct xt_target tg_reg[] __read_mostly = { ++ { ++ .name = "FULLCONENAT", ++ .family = NFPROTO_IPV4, ++ .revision = 0, ++ .target = fullconenat_tg, ++ .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), ++ .table = "nat", ++ .hooks = (1 << NF_INET_PRE_ROUTING) | ++ (1 << NF_INET_POST_ROUTING), ++ .checkentry = fullconenat_tg_check, ++ .destroy = fullconenat_tg_destroy, ++ .me = THIS_MODULE, ++ }, ++}; ++ ++static int __init fullconenat_tg_init(void) ++{ ++ wq = create_singlethread_workqueue("xt_FULLCONENAT"); ++ if (wq == NULL) { ++ printk("xt_FULLCONENAT: warning: failed to create workqueue\n"); ++ } ++ ++ return xt_register_targets(tg_reg, ARRAY_SIZE(tg_reg)); ++} ++ ++static void fullconenat_tg_exit(void) ++{ ++ xt_unregister_targets(tg_reg, ARRAY_SIZE(tg_reg)); ++ ++ if (wq) { ++ cancel_delayed_work_sync(&gc_worker_wk); ++ flush_workqueue(wq); ++ destroy_workqueue(wq); ++ } ++ ++ handle_dying_tuples(); ++ destroy_mappings(); ++} ++ ++module_init(fullconenat_tg_init); ++module_exit(fullconenat_tg_exit); ++ ++MODULE_LICENSE("GPL"); ++MODULE_DESCRIPTION("Xtables: implementation of RFC3489 full cone NAT"); ++MODULE_AUTHOR("Chion Tang "); ++MODULE_ALIAS("ipt_FULLCONENAT");