From ab59b19be78aac65cdd599fb5002c9019885e061 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 4 Feb 2010 14:54:05 +0100 Subject: netfilter: nf_conntrack: per netns nf_conntrack_cachep nf_conntrack_cachep is currently shared by all netns instances, but because of SLAB_DESTROY_BY_RCU special semantics, this is wrong. If we use a shared slab cache, one object can instantly flight between one hash table (netns ONE) to another one (netns TWO), and concurrent reader (doing a lookup in netns ONE, 'finding' an object of netns TWO) can be fooled without notice, because no RCU grace period has to be observed between object freeing and its reuse. We dont have this problem with UDP/TCP slab caches because TCP/UDP hashtables are global to the machine (and each object has a pointer to its netns). If we use per netns conntrack hash tables, we also *must* use per netns conntrack slab caches, to guarantee an object can not escape from one namespace to another one. Signed-off-by: Eric Dumazet [Patrick: added unique slab name allocation] Signed-off-by: Patrick McHardy --- include/net/netns/conntrack.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index ba1ba0c5efd..aed23b6c847 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -11,6 +11,7 @@ struct nf_conntrack_ecache; struct netns_ct { atomic_t count; unsigned int expect_count; + struct kmem_cache *nf_conntrack_cachep; struct hlist_nulls_head *hash; struct hlist_head *expect_hash; struct hlist_nulls_head unconfirmed; @@ -28,5 +29,6 @@ struct netns_ct { #endif int hash_vmalloc; int expect_vmalloc; + char *slabname; }; #endif -- cgit v1.2.3 From 9ab48ddcb144fdee908708669448dd136cf4894a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 8 Feb 2010 17:35:23 +0100 Subject: netfilter: nf_conntrack: fix hash resizing with namespaces As noticed by Jon Masters , the conntrack hash size is global and not per namespace, but modifiable at runtime through /sys/module/nf_conntrack/hashsize. Changing the hash size will only resize the hash in the current namespace however, so other namespaces will use an invalid hash size. This can cause crashes when enlarging the hashsize, or false negative lookups when shrinking it. Move the hash size into the per-namespace data and only use the global hash size to initialize the per-namespace value when instanciating a new namespace. Additionally restrict hash resizing to init_net for now as other namespaces are not handled currently. Cc: stable@kernel.org Signed-off-by: Patrick McHardy --- include/net/netns/conntrack.h | 1 + include/net/netns/ipv4.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index aed23b6c847..63d449807d9 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -11,6 +11,7 @@ struct nf_conntrack_ecache; struct netns_ct { atomic_t count; unsigned int expect_count; + unsigned int htable_size; struct kmem_cache *nf_conntrack_cachep; struct hlist_nulls_head *hash; struct hlist_head *expect_hash; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 2eb3814d625..9a4b8b71407 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -40,6 +40,7 @@ struct netns_ipv4 { struct xt_table *iptable_security; struct xt_table *nat_table; struct hlist_head *nat_bysource; + unsigned int nat_htable_size; int nat_vmalloced; #endif -- cgit v1.2.3 From 63e690caf24e8f43ba019fe1107669746b072d80 Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Mon, 8 Mar 2010 13:13:07 +0100 Subject: netfilter: include/linux/netfilter/nf_conntrack_tuple_common.h: Checkpatch cleanup include/linux/netfilter/nf_conntrack_tuple_common.h:5: ERROR: open brace '{' following enum go on the same line Signed-off-by: Andrea Gelmini Acked-by: David S. Miller Signed-off-by: Patrick McHardy --- include/linux/netfilter/nf_conntrack_tuple_common.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/nf_conntrack_tuple_common.h b/include/linux/netfilter/nf_conntrack_tuple_common.h index 8e145f0d61c..2ea22b018a8 100644 --- a/include/linux/netfilter/nf_conntrack_tuple_common.h +++ b/include/linux/netfilter/nf_conntrack_tuple_common.h @@ -1,8 +1,7 @@ #ifndef _NF_CONNTRACK_TUPLE_COMMON_H #define _NF_CONNTRACK_TUPLE_COMMON_H -enum ip_conntrack_dir -{ +enum ip_conntrack_dir { IP_CT_DIR_ORIGINAL, IP_CT_DIR_REPLY, IP_CT_DIR_MAX -- cgit v1.2.3 From 7b4df05537f4e6c0c3524055ece7f99b5c98cc87 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Mon, 8 Mar 2010 13:17:01 +0100 Subject: netfilter: remove stale declaration for ip6_masked_addrcmp() Commit f2ffd9ee... ("[NETFILTER]: Move ip6_masked_addrcmp to include/net/ipv6.h") replaced ip6_masked_addrcmp() with ipv6_masked_addr_cmp(). Function definition went away. Let's remove its declaration as well in header file. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: Patrick McHardy --- include/linux/netfilter_ipv6/ip6_tables.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index e5ba03d783c..18442ff19c0 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -316,10 +316,6 @@ extern int ip6t_ext_hdr(u8 nexthdr); extern int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, int target, unsigned short *fragoff); -extern int ip6_masked_addrcmp(const struct in6_addr *addr1, - const struct in6_addr *mask, - const struct in6_addr *addr2); - #define IP6T_ALIGN(s) XT_ALIGN(s) #ifdef CONFIG_COMPAT -- cgit v1.2.3 From 28b949885f80efb87d7cebdcf879c99db12c37bd Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sat, 28 Feb 2009 03:23:57 +0100 Subject: netfilter: xtables: merge xt_MARK into xt_mark Two arguments for combining the two: - xt_mark is pretty useless without xt_MARK - the actual code is so small anyway that the kmod metadata and the module in its loaded state totally outweighs the combined actual code size. i586-before: -rw-r--r-- 1 jengelh users 3821 Feb 10 01:01 xt_MARK.ko -rw-r--r-- 1 jengelh users 2592 Feb 10 00:04 xt_MARK.o -rw-r--r-- 1 jengelh users 3274 Feb 10 01:01 xt_mark.ko -rw-r--r-- 1 jengelh users 2108 Feb 10 00:05 xt_mark.o text data bss dec hex filename 354 264 0 618 26a xt_MARK.o 223 176 0 399 18f xt_mark.o And the runtime size is like 14 KB. i586-after: -rw-r--r-- 1 jengelh users 3264 Feb 18 17:28 xt_mark.o Signed-off-by: Jan Engelhardt --- include/linux/netfilter/xt_MARK.h | 6 +----- include/linux/netfilter/xt_mark.h | 4 ++++ 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/xt_MARK.h b/include/linux/netfilter/xt_MARK.h index bc9561bdef7..41c456deba2 100644 --- a/include/linux/netfilter/xt_MARK.h +++ b/include/linux/netfilter/xt_MARK.h @@ -1,10 +1,6 @@ #ifndef _XT_MARK_H_target #define _XT_MARK_H_target -#include - -struct xt_mark_tginfo2 { - __u32 mark, mask; -}; +#include #endif /*_XT_MARK_H_target */ diff --git a/include/linux/netfilter/xt_mark.h b/include/linux/netfilter/xt_mark.h index 6607c8f38ea..ecadc40d5cd 100644 --- a/include/linux/netfilter/xt_mark.h +++ b/include/linux/netfilter/xt_mark.h @@ -3,6 +3,10 @@ #include +struct xt_mark_tginfo2 { + __u32 mark, mask; +}; + struct xt_mark_mtinfo1 { __u32 mark, mask; __u8 invert; -- cgit v1.2.3 From b8f00ba27e4acc4a2224360ce3aa8a22354434c5 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Fri, 26 Feb 2010 14:20:32 +0100 Subject: netfilter: xtables: merge xt_CONNMARK into xt_connmark Signed-off-by: Jan Engelhardt --- include/linux/netfilter/xt_CONNMARK.h | 22 +--------------------- include/linux/netfilter/xt_connmark.h | 11 +++++++++++ 2 files changed, 12 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/xt_CONNMARK.h b/include/linux/netfilter/xt_CONNMARK.h index 0a854586675..2f2e48ec802 100644 --- a/include/linux/netfilter/xt_CONNMARK.h +++ b/include/linux/netfilter/xt_CONNMARK.h @@ -1,26 +1,6 @@ #ifndef _XT_CONNMARK_H_target #define _XT_CONNMARK_H_target -#include - -/* Copyright (C) 2002,2004 MARA Systems AB - * by Henrik Nordstrom - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -enum { - XT_CONNMARK_SET = 0, - XT_CONNMARK_SAVE, - XT_CONNMARK_RESTORE -}; - -struct xt_connmark_tginfo1 { - __u32 ctmark, ctmask, nfmask; - __u8 mode; -}; +#include #endif /*_XT_CONNMARK_H_target*/ diff --git a/include/linux/netfilter/xt_connmark.h b/include/linux/netfilter/xt_connmark.h index 619e47cde01..efc17a8305f 100644 --- a/include/linux/netfilter/xt_connmark.h +++ b/include/linux/netfilter/xt_connmark.h @@ -12,6 +12,17 @@ * (at your option) any later version. */ +enum { + XT_CONNMARK_SET = 0, + XT_CONNMARK_SAVE, + XT_CONNMARK_RESTORE +}; + +struct xt_connmark_tginfo1 { + __u32 ctmark, ctmask, nfmask; + __u8 mode; +}; + struct xt_connmark_mtinfo1 { __u32 mark, mask; __u8 invert; -- cgit v1.2.3 From 0079c5aee34880bcee7feee9960f0502c73dc5fa Mon Sep 17 00:00:00 2001 From: Tim Gardner Date: Tue, 16 Mar 2010 19:53:13 +0100 Subject: netfilter: xt_recent: add an entry reaper One of the problems with the way xt_recent is implemented is that there is no efficient way to remove expired entries. Of course, one can write a rule '-m recent --remove', but you have to know beforehand which entry to delete. This commit adds reaper logic which checks the head of the LRU list when a rule is invoked that has a '--seconds' value and XT_RECENT_REAP set. If an entry ceases to accumulate time stamps, then it will eventually bubble to the top of the LRU list where it is then reaped. Signed-off-by: Tim Gardner Signed-off-by: Eric Dumazet Signed-off-by: Patrick McHardy --- include/linux/netfilter/xt_recent.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/netfilter/xt_recent.h b/include/linux/netfilter/xt_recent.h index d2c27660992..bba990ecb01 100644 --- a/include/linux/netfilter/xt_recent.h +++ b/include/linux/netfilter/xt_recent.h @@ -9,6 +9,7 @@ enum { XT_RECENT_UPDATE = 1 << 2, XT_RECENT_REMOVE = 1 << 3, XT_RECENT_TTL = 1 << 4, + XT_RECENT_REAP = 1 << 5, XT_RECENT_SOURCE = 0, XT_RECENT_DEST = 1, @@ -16,6 +17,9 @@ enum { XT_RECENT_NAME_LEN = 200, }; +/* Only allowed with --rcheck and --update */ +#define XT_RECENT_MODIFIERS (XT_RECENT_TTL|XT_RECENT_REAP) + struct xt_recent_mtinfo { __u32 seconds; __u32 hit_count; -- cgit v1.2.3 From 606a9a02633c02d0e09fc96706f041053dbc57ee Mon Sep 17 00:00:00 2001 From: Tim Gardner Date: Wed, 17 Mar 2010 16:18:56 +0100 Subject: netfilter: xt_recent: check for unsupported user space flags Signed-off-by: Tim Gardner Signed-off-by: Patrick McHardy --- include/linux/netfilter/xt_recent.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/netfilter/xt_recent.h b/include/linux/netfilter/xt_recent.h index bba990ecb01..83318e01425 100644 --- a/include/linux/netfilter/xt_recent.h +++ b/include/linux/netfilter/xt_recent.h @@ -20,6 +20,9 @@ enum { /* Only allowed with --rcheck and --update */ #define XT_RECENT_MODIFIERS (XT_RECENT_TTL|XT_RECENT_REAP) +#define XT_RECENT_VALID_FLAGS (XT_RECENT_CHECK|XT_RECENT_SET|XT_RECENT_UPDATE|\ + XT_RECENT_REMOVE|XT_RECENT_TTL|XT_RECENT_REAP) + struct xt_recent_mtinfo { __u32 seconds; __u32 hit_count; -- cgit v1.2.3 From 16599786ae5e9d5f936706d2202d8c7224cd51ed Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Thu, 18 Mar 2010 10:30:44 +0100 Subject: netfilter: update documentation fields of x_tables.h Signed-off-by: Jan Engelhardt --- include/linux/netfilter/x_tables.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 84c7c928e9e..c68ff82366b 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -197,6 +197,7 @@ struct xt_counters_info { * @family: Actual NFPROTO_* through which the function is invoked * (helpful when match->family == NFPROTO_UNSPEC) * @hotdrop: drop packet if we had inspection problems + * Network namespace obtainable using dev_net(in/out) */ struct xt_match_param { const struct net_device *in, *out; @@ -213,12 +214,14 @@ struct xt_match_param { * struct xt_mtchk_param - parameters for match extensions' * checkentry functions * + * @net: network namespace through which the check was invoked * @table: table the rule is tried to be inserted into * @entryinfo: the family-specific rule data - * (struct ipt_ip, ip6t_ip, ebt_entry) + * (struct ipt_ip, ip6t_ip, arpt_arp or (note) ebt_entry) * @match: struct xt_match through which this function was invoked * @matchinfo: per-match data * @hook_mask: via which hooks the new rule is reachable + * Other fields as above. */ struct xt_mtchk_param { struct net *net; @@ -230,7 +233,10 @@ struct xt_mtchk_param { u_int8_t family; }; -/* Match destructor parameters */ +/** + * struct xt_mdtor_param - match destructor parameters + * Fields as above. + */ struct xt_mtdtor_param { struct net *net; const struct xt_match *match; -- cgit v1.2.3 From 4f948db1915ff05e4ce0fd98e6323db6a3ec0fc0 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Thu, 18 Mar 2010 11:03:51 +0100 Subject: netfilter: xtables: remove almost-unused xt_match_param.data member This member is taking up a "long" per match, yet is only used by one module out of the roughly 90 modules, ip6t_hbh. ip6t_hbh can be restructured a little to accomodate for the lack of the .data member. This variant uses checking the par->match address, which should avoid having to add two extra functions, including calls, i.e. (hbh_mt6: call hbhdst_mt6(skb, par, NEXTHDR_OPT), dst_mt6: call hbhdst_mt6(skb, par, NEXTHDR_DEST)) Signed-off-by: Jan Engelhardt --- include/linux/netfilter/x_tables.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index c68ff82366b..cf91473624e 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -315,9 +315,6 @@ struct xt_match { /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; - /* Free to use by each match */ - unsigned long data; - const char *table; unsigned int matchsize; #ifdef CONFIG_COMPAT -- cgit v1.2.3 From f5c511c67aaec323c186543856cfddab31bed1d1 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Thu, 18 Mar 2010 14:02:10 +0100 Subject: netfilter: xtables: reduce holes in struct xt_target This will save one full padding chunk (8 bytes on x86_64) per target. Signed-off-by: Jan Engelhardt --- include/linux/netfilter/x_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index cf91473624e..f8f55515337 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -331,6 +331,7 @@ struct xt_target { struct list_head list; const char name[XT_FUNCTION_MAXNAMELEN-1]; + u_int8_t revision; /* Returns verdict. Argument order changed since 2.6.9, as this must now handle non-linear skbs, using skb_copy_bits and @@ -363,7 +364,6 @@ struct xt_target { unsigned short proto; unsigned short family; - u_int8_t revision; }; /* Furniture shopping... */ -- cgit v1.2.3 From fd0ec0e6216baea854465bbdb177f2d1b2ccaf22 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Fri, 10 Jul 2009 19:27:47 +0200 Subject: netfilter: xtables: consolidate code into xt_request_find_match Signed-off-by: Jan Engelhardt --- include/linux/netfilter/x_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index f8f55515337..dd9d15a73a9 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -436,6 +436,8 @@ extern struct xt_table_info *xt_replace_table(struct xt_table *table, extern struct xt_match *xt_find_match(u8 af, const char *name, u8 revision); extern struct xt_target *xt_find_target(u8 af, const char *name, u8 revision); +extern struct xt_match *xt_request_find_match(u8 af, const char *name, + u8 revision); extern struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision); extern int xt_find_revision(u8 af, const char *name, u8 revision, -- cgit v1.2.3 From b0f38452ff73da7e9e0ddc68cd5c6b93c897ca0d Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Fri, 19 Mar 2010 17:16:42 +0100 Subject: netfilter: xtables: change xt_match.checkentry return type Restore function signatures from bool to int so that we can report memory allocation failures or similar using -ENOMEM rather than always having to pass -EINVAL back. This semantic patch may not be too precise (checking for functions that use xt_mtchk_param rather than functions referenced by xt_match.checkentry), but reviewed, it produced the intended result. // @@ type bool; identifier check, par; @@ -bool check +int check (struct xt_mtchk_param *par) { ... } // Signed-off-by: Jan Engelhardt --- include/linux/netfilter/x_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index dd9d15a73a9..33c1a62a099 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -303,7 +303,7 @@ struct xt_match { const struct xt_match_param *); /* Called when user tries to insert an entry of this type. */ - bool (*checkentry)(const struct xt_mtchk_param *); + int (*checkentry)(const struct xt_mtchk_param *); /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_mtdtor_param *); -- cgit v1.2.3 From 135367b8f6a18507af6b9a6910a14b5699415309 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Fri, 19 Mar 2010 17:16:42 +0100 Subject: netfilter: xtables: change xt_target.checkentry return type Restore function signatures from bool to int so that we can report memory allocation failures or similar using -ENOMEM rather than always having to pass -EINVAL back. // @@ type bool; identifier check, par; @@ -bool check +int check (struct xt_tgchk_param *par) { ... } // Minus the change it does to xt_ct_find_proto. Signed-off-by: Jan Engelhardt --- include/linux/netfilter/x_tables.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 33c1a62a099..1a65d45ee4f 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -342,8 +342,8 @@ struct xt_target { /* Called when user tries to insert an entry of this type: hook_mask is a bitmask of hooks from which it can be called. */ - /* Should return true or false. */ - bool (*checkentry)(const struct xt_tgchk_param *); + /* Should return true or false, or an error code (-Exxxx). */ + int (*checkentry)(const struct xt_tgchk_param *); /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_tgdtor_param *); -- cgit v1.2.3 From a79ff731a1b277d0e92d9453bdf374e04cec717a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 13 Apr 2010 11:21:46 +0200 Subject: netfilter: xtables: make XT_ALIGN() usable in exported headers by exporting __ALIGN_KERNEL() XT_ALIGN() was rewritten through ALIGN() by commit 42107f5009da223daa800d6da6904d77297ae829 "netfilter: xtables: symmetric COMPAT_XT_ALIGN definition". ALIGN() is not exported in userspace headers, which created compile problem for tc(8) and will create problem for iptables(8). We can't export generic looking name ALIGN() but we can export less generic __ALIGN_KERNEL() (suggested by Ben Hutchings). Google knows nothing about __ALIGN_KERNEL(). COMPAT_XT_ALIGN() changed for symmetry. Reported-by: Andreas Henriksson Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/linux/kernel.h | 5 +++-- include/linux/netfilter/x_tables.h | 6 +++--- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 7f070746336..284ea995646 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -4,6 +4,8 @@ /* * 'kernel.h' contains some often-used function prototypes etc */ +#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) +#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) #ifdef __KERNEL__ @@ -37,8 +39,7 @@ extern const char linux_proc_banner[]; #define STACK_MAGIC 0xdeadbeef -#define ALIGN(x,a) __ALIGN_MASK(x,(typeof(x))(a)-1) -#define __ALIGN_MASK(x,mask) (((x)+(mask))&~(mask)) +#define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) #define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) #define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 1a65d45ee4f..26ced0c323a 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -1,6 +1,6 @@ #ifndef _X_TABLES_H #define _X_TABLES_H - +#include #include #define XT_FUNCTION_MAXNAMELEN 30 @@ -93,7 +93,7 @@ struct _xt_align { __u64 u64; }; -#define XT_ALIGN(s) ALIGN((s), __alignof__(struct _xt_align)) +#define XT_ALIGN(s) __ALIGN_KERNEL((s), __alignof__(struct _xt_align)) /* Standard return verdict, or do jump. */ #define XT_STANDARD_TARGET "" @@ -603,7 +603,7 @@ struct _compat_xt_align { compat_u64 u64; }; -#define COMPAT_XT_ALIGN(s) ALIGN((s), __alignof__(struct _compat_xt_align)) +#define COMPAT_XT_ALIGN(s) __ALIGN_KERNEL((s), __alignof__(struct _compat_xt_align)) extern void xt_compat_lock(u_int8_t af); extern void xt_compat_unlock(u_int8_t af); -- cgit v1.2.3 From 9f93ff5be54108066372d1c4100c515d9d9acc1b Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 13 Apr 2010 14:09:15 +0200 Subject: Restore __ALIGN_MASK() Fix lib/bitmap.c compile failure due to __ALIGN_KERNEL changes. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/linux/kernel.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 284ea995646..db6717d0fd6 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -40,6 +40,7 @@ extern const char linux_proc_banner[]; #define STACK_MAGIC 0xdeadbeef #define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) +#define __ALIGN_MASK(x, mask) __ALIGN_KERNEL_MASK((x), (mask)) #define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) #define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) -- cgit v1.2.3 From 9c6eb28aca52d562f3ffbaebaa56385df9972a43 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Tue, 13 Apr 2010 15:32:16 +0200 Subject: netfilter: ipv6: add IPSKB_REROUTED exclusion to NF_HOOK/POSTROUTING invocation Similar to how IPv4's ip_output.c works, have ip6_output also check the IPSKB_REROUTED flag. It will be set from xt_TEE for cloned packets since Xtables can currently only deal with a single packet in flight at a time. Signed-off-by: Jan Engelhardt Acked-by: David S. Miller [Patrick: changed to use an IP6SKB value instead of IPSKB] Signed-off-by: Patrick McHardy --- include/linux/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index e0cc9a7db2b..7bdf6ffe2b4 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -250,6 +250,7 @@ struct inet6_skb_parm { #define IP6SKB_XFRM_TRANSFORMED 1 #define IP6SKB_FORWARDED 2 +#define IP6SKB_REROUTED 4 }; #define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb)) -- cgit v1.2.3 From ea2d9b41bd418894d1ee25de1642c3325d71c397 Mon Sep 17 00:00:00 2001 From: Bart De Schuymer Date: Thu, 15 Apr 2010 12:14:51 +0200 Subject: netfilter: bridge-netfilter: simplify IP DNAT Remove br_netfilter.c::br_nf_local_out(). The function br_nf_local_out() was needed because the PF_BRIDGE::LOCAL_OUT hook could be called when IP DNAT happens on to-be-bridged traffic. The new scheme eliminates this mess. Signed-off-by: Bart De Schuymer Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index f8105e54716..ffab6c423a5 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -41,9 +41,8 @@ enum nf_br_hook_priorities { #define BRNF_PKT_TYPE 0x01 #define BRNF_BRIDGED_DNAT 0x02 -#define BRNF_DONT_TAKE_PARENT 0x04 -#define BRNF_BRIDGED 0x08 -#define BRNF_NF_BRIDGE_PREROUTING 0x10 +#define BRNF_BRIDGED 0x04 +#define BRNF_NF_BRIDGE_PREROUTING 0x08 /* Only used in br_forward.c */ @@ -68,6 +67,18 @@ static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) } } +extern int br_handle_frame_finish(struct sk_buff *skb); +/* Only used in br_device.c */ +static inline int br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) +{ + struct nf_bridge_info *nf_bridge = skb->nf_bridge; + + skb_pull(skb, ETH_HLEN); + nf_bridge->mask ^= BRNF_BRIDGED_DNAT; + skb->dev = nf_bridge->physindev; + return br_handle_frame_finish(skb); +} + /* This is called by the IP fragmenting code and it ensures there is * enough room for the encapsulating header (if there is one). */ static inline unsigned int nf_bridge_pad(const struct sk_buff *skb) -- cgit v1.2.3 From e179e6322ac334e21a3c6d669d95bc967e5d0a80 Mon Sep 17 00:00:00 2001 From: Bart De Schuymer Date: Thu, 15 Apr 2010 12:26:39 +0200 Subject: netfilter: bridge-netfilter: Fix MAC header handling with IP DNAT - fix IP DNAT on vlan- or pppoe-encapsulated traffic: The functions neigh_hh_output() or dst->neighbour->output() overwrite the complete Ethernet header, although we only need the destination MAC address. For encapsulated packets, they ended up overwriting the encapsulating header. The new code copies the Ethernet source MAC address and protocol number before calling dst->neighbour->output(). The Ethernet source MAC and protocol number are copied back in place in br_nf_pre_routing_finish_bridge_slow(). This also makes the IP DNAT more transparent because in the old scheme the source MAC of the bridge was copied into the source address in the Ethernet header. We also let skb->protocol equal ETH_P_IP resp. ETH_P_IPV6 during the execution of the PF_INET resp. PF_INET6 hooks. - Speed up IP DNAT by calling neigh_hh_bridge() instead of neigh_hh_output(): if dst->hh is available, we already know the MAC address so we can just copy it. Signed-off-by: Bart De Schuymer Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge.h | 5 ++++- include/net/neighbour.h | 14 ++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index ffab6c423a5..ea0e44b9043 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -43,7 +43,8 @@ enum nf_br_hook_priorities { #define BRNF_BRIDGED_DNAT 0x02 #define BRNF_BRIDGED 0x04 #define BRNF_NF_BRIDGE_PREROUTING 0x08 - +#define BRNF_8021Q 0x10 +#define BRNF_PPPoE 0x20 /* Only used in br_forward.c */ extern int nf_bridge_copy_header(struct sk_buff *skb); @@ -75,6 +76,8 @@ static inline int br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) skb_pull(skb, ETH_HLEN); nf_bridge->mask ^= BRNF_BRIDGED_DNAT; + skb_copy_to_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN), + skb->nf_bridge->data, ETH_HLEN-ETH_ALEN); skb->dev = nf_bridge->physindev; return br_handle_frame_finish(skb); } diff --git a/include/net/neighbour.h b/include/net/neighbour.h index da1d58be31b..eb21340a573 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -299,6 +299,20 @@ static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) return 0; } +#ifdef CONFIG_BRIDGE_NETFILTER +static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb) +{ + unsigned seq, hh_alen; + + do { + seq = read_seqbegin(&hh->hh_lock); + hh_alen = HH_DATA_ALIGN(ETH_HLEN); + memcpy(skb->data - hh_alen, hh->hh_data, ETH_ALEN + hh_alen - ETH_HLEN); + } while (read_seqretry(&hh->hh_lock, seq)); + return 0; +} +#endif + static inline int neigh_hh_output(struct hh_cache *hh, struct sk_buff *skb) { unsigned seq; -- cgit v1.2.3 From e281b19897dc21c1071802808d461627d747a877 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 19 Apr 2010 14:17:47 +0200 Subject: netfilter: xtables: inclusion of xt_TEE xt_TEE can be used to clone and reroute a packet. This can for example be used to copy traffic at a router for logging purposes to another dedicated machine. References: http://www.gossamer-threads.com/lists/iptables/devel/68781 Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/Kbuild | 1 + include/linux/netfilter/xt_TEE.h | 9 +++++++++ 2 files changed, 10 insertions(+) create mode 100644 include/linux/netfilter/xt_TEE.h (limited to 'include') diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index a5a63e41b8a..48767cd1645 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -16,6 +16,7 @@ header-y += xt_RATEEST.h header-y += xt_SECMARK.h header-y += xt_TCPMSS.h header-y += xt_TCPOPTSTRIP.h +header-y += xt_TEE.h header-y += xt_TPROXY.h header-y += xt_comment.h header-y += xt_connbytes.h diff --git a/include/linux/netfilter/xt_TEE.h b/include/linux/netfilter/xt_TEE.h new file mode 100644 index 00000000000..55d4a501152 --- /dev/null +++ b/include/linux/netfilter/xt_TEE.h @@ -0,0 +1,9 @@ +#ifndef _XT_TEE_TARGET_H +#define _XT_TEE_TARGET_H + +struct xt_tee_tginfo { + union nf_inet_addr gw; + char oif[16]; +}; + +#endif /* _XT_TEE_TARGET_H */ -- cgit v1.2.3 From f3c5c1bfd430858d3a05436f82c51e53104feb6b Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 19 Apr 2010 16:05:10 +0200 Subject: netfilter: xtables: make ip_tables reentrant Currently, the table traverser stores return addresses in the ruleset itself (struct ip6t_entry->comefrom). This has a well-known drawback: the jumpstack is overwritten on reentry, making it necessary for targets to return absolute verdicts. Also, the ruleset (which might be heavy memory-wise) needs to be replicated for each CPU that can possibly invoke ip6t_do_table. This patch decouples the jumpstack from struct ip6t_entry and instead puts it into xt_table_info. Not being restricted by 'comefrom' anymore, we can set up a stack as needed. By default, there is room allocated for two entries into the traverser. arp_tables is not touched though, because there is just one/two modules and further patches seek to collapse the table traverser anyhow. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 26ced0c323a..50c867256ca 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -401,6 +401,13 @@ struct xt_table_info { unsigned int hook_entry[NF_INET_NUMHOOKS]; unsigned int underflow[NF_INET_NUMHOOKS]; + /* + * Number of user chains. Since tables cannot have loops, at most + * @stacksize jumps (number of user chains) can possibly be made. + */ + unsigned int stacksize; + unsigned int *stackptr; + void ***jumpstack; /* ipt_entry tables: one per CPU */ /* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */ void *entries[1]; -- cgit v1.2.3 From 22265a5c3c103cf8c50be62e6c90d045eb649e6d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 20 Apr 2010 15:07:32 +0200 Subject: netfilter: xt_TEE: resolve oif using netdevice notifiers Replace the runtime oif name resolving by netdevice notifier based resolving. When an oif is given, a netdevice notifier is registered to resolve the name on NETDEV_REGISTER or NETDEV_CHANGE and unresolve it again on NETDEV_UNREGISTER or NETDEV_CHANGE to a different name. Signed-off-by: Patrick McHardy --- include/linux/netfilter/xt_TEE.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/netfilter/xt_TEE.h b/include/linux/netfilter/xt_TEE.h index 55d4a501152..5c21d5c829a 100644 --- a/include/linux/netfilter/xt_TEE.h +++ b/include/linux/netfilter/xt_TEE.h @@ -4,6 +4,9 @@ struct xt_tee_tginfo { union nf_inet_addr gw; char oif[16]; + + /* used internally by the kernel */ + struct xt_tee_priv *priv __attribute__((aligned(8))); }; #endif /* _XT_TEE_TARGET_H */ -- cgit v1.2.3 From 6c79bf0f2440fd250c8fce8d9b82fcf03d4e8350 Mon Sep 17 00:00:00 2001 From: Bart De Schuymer Date: Tue, 20 Apr 2010 16:22:01 +0200 Subject: netfilter: bridge-netfilter: fix refragmenting IP traffic encapsulated in PPPoE traffic The MTU for IP traffic encapsulated inside PPPoE traffic is smaller than the MTU of the Ethernet device (1500). Connection tracking gathers all IP packets and sometimes will refragment them in ip_fragment(). We then need to subtract the length of the encapsulating header from the mtu used in ip_fragment(). The check in br_nf_dev_queue_xmit() which determines if ip_fragment() has to be called is also updated for the PPPoE-encapsulated packets. nf_bridge_copy_header() is also updated to make sure the PPPoE data length field has the correct value. Signed-off-by: Bart De Schuymer Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index ea0e44b9043..0ddd161f3b0 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -68,6 +68,13 @@ static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) } } +static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) +{ + if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE)) + return PPPOE_SES_HLEN; + return 0; +} + extern int br_handle_frame_finish(struct sk_buff *skb); /* Only used in br_device.c */ static inline int br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) -- cgit v1.2.3 From af740b2c8f4521e2c45698ee6040941a82d6349d Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 23 Apr 2010 12:34:56 +0200 Subject: netfilter: nf_conntrack: extend with extra stat counter I suspect an unfortunatly series of events occuring under a DDoS attack, in function __nf_conntrack_find() nf_contrack_core.c. Adding a stats counter to see if the search is restarted too often. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Patrick McHardy --- include/linux/netfilter/nf_conntrack_common.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index c608677dda6..14e6d32002c 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -113,6 +113,7 @@ struct ip_conntrack_stat { unsigned int expect_new; unsigned int expect_create; unsigned int expect_delete; + unsigned int search_restart; }; /* call to create an explicit dependency on nf_conntrack. */ -- cgit v1.2.3 From 4b2cbd42bef5a22bb681acd607a7c3fbca1eeb3c Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Tue, 27 Apr 2010 15:34:34 +0200 Subject: netfilter: x_tables: rectify XT_FUNCTION_MAXNAMELEN usage There has been quite a confusion in userspace about XT_FUNCTION_MAXNAMELEN; because struct xt_entry_match used MAX-1, userspace would have to do an awkward MAX-2 for maximum length checking (due to '\0'). This patch adds a new define that matches the definition of XT_TABLE_MAXNAMELEN - being the size of the actual struct member, not one off. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 50c867256ca..eeb4884c30b 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -4,6 +4,7 @@ #include #define XT_FUNCTION_MAXNAMELEN 30 +#define XT_EXTENSION_MAXNAMELEN 29 #define XT_TABLE_MAXNAMELEN 32 struct xt_entry_match { @@ -12,8 +13,7 @@ struct xt_entry_match { __u16 match_size; /* Used by userspace */ - char name[XT_FUNCTION_MAXNAMELEN-1]; - + char name[XT_EXTENSION_MAXNAMELEN]; __u8 revision; } user; struct { @@ -36,8 +36,7 @@ struct xt_entry_target { __u16 target_size; /* Used by userspace */ - char name[XT_FUNCTION_MAXNAMELEN-1]; - + char name[XT_EXTENSION_MAXNAMELEN]; __u8 revision; } user; struct { @@ -70,8 +69,7 @@ struct xt_standard_target { /* The argument to IPT_SO_GET_REVISION_*. Returns highest revision * kernel supports, if >= revision. */ struct xt_get_revision { - char name[XT_FUNCTION_MAXNAMELEN-1]; - + char name[XT_EXTENSION_MAXNAMELEN]; __u8 revision; }; @@ -291,7 +289,7 @@ struct xt_tgdtor_param { struct xt_match { struct list_head list; - const char name[XT_FUNCTION_MAXNAMELEN-1]; + const char name[XT_EXTENSION_MAXNAMELEN]; u_int8_t revision; /* Return true or false: return FALSE and set *hotdrop = 1 to @@ -330,7 +328,7 @@ struct xt_match { struct xt_target { struct list_head list; - const char name[XT_FUNCTION_MAXNAMELEN-1]; + const char name[XT_EXTENSION_MAXNAMELEN]; u_int8_t revision; /* Returns verdict. Argument order changed since 2.6.9, as this -- cgit v1.2.3