From 8b27b10f5863a5b63e46304a71aa01463d1efac4 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 17 Oct 2010 16:17:20 +0300 Subject: ipvs: optimize checksums for apps Avoid full checksum calculation for apps that can provide info whether csum was broken after payload mangling. For now only ip_vs_ftp mangles payload and it updates the csum, so the full recalculation is avoided for all packets. Add CHECKSUM_UNNECESSARY for snat_handler (TCP and UDP). It is needed to support SNAT from local address for the case when csum is fully recalculated. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 6e8a6192e57..adcdba9dd18 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -597,11 +597,19 @@ struct ip_vs_app { __be16 port; /* port number in net order */ atomic_t usecnt; /* usage counter */ - /* output hook: return false if can't linearize. diff set for TCP. */ + /* + * output hook: Process packet in inout direction, diff set for TCP. + * Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok, + * 2=Mangled but checksum was not updated + */ int (*pkt_out)(struct ip_vs_app *, struct ip_vs_conn *, struct sk_buff *, int *diff); - /* input hook: return false if can't linearize. diff set for TCP. */ + /* + * input hook: Process packet in outin direction, diff set for TCP. + * Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok, + * 2=Mangled but checksum was not updated + */ int (*pkt_in)(struct ip_vs_app *, struct ip_vs_conn *, struct sk_buff *, int *diff); -- cgit v1.2.3 From cf356d69db0afef692cd640917bc70f708c27f14 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 17 Oct 2010 16:21:07 +0300 Subject: ipvs: switch to notrack mode Change skb->ipvs_property semantic. This is preparation to support ip_vs_out processing in LOCAL_OUT. ipvs_property=1 will be used to avoid expensive lookups for traffic sent by transmitters. Now when conntrack support is not used we call ip_vs_notrack method to avoid problems in OUTPUT and POST_ROUTING hooks instead of exiting POST_ROUTING as before. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index adcdba9dd18..0e4618470ce 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -25,7 +25,7 @@ #include #include /* for struct ipv6hdr */ #include /* for ipv6_addr_copy */ -#ifdef CONFIG_IP_VS_NFCT +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #include #endif @@ -1021,6 +1021,24 @@ static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum) return csum_partial(diff, sizeof(diff), oldsum); } +/* + * Forget current conntrack (unconfirmed) and attach notrack entry + */ +static inline void ip_vs_notrack(struct sk_buff *skb) +{ +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); + + if (!ct || !nf_ct_is_untracked(ct)) { + nf_reset(skb); + skb->nfct = &nf_ct_untracked_get()->ct_general; + skb->nfctinfo = IP_CT_NEW; + nf_conntrack_get(skb->nfct); + } +#endif +} + #ifdef CONFIG_IP_VS_NFCT /* * Netfilter connection tracking -- cgit v1.2.3 From 190ecd27cd7294105e3b26ca71663c7d940acbbb Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 17 Oct 2010 16:24:37 +0300 Subject: ipvs: do not schedule conns from real servers This patch is needed to avoid scheduling of packets from local real server when we add ip_vs_in in LOCAL_OUT hook to support local client. Currently, when ip_vs_in can not find existing connection it tries to create new one by calling ip_vs_schedule. The default indication from ip_vs_schedule was if connection was scheduled to real server. If real server is not available we try to use the bypass forwarding method or to send ICMP error. But in some cases we do not want to use the bypass feature. So, add flag 'ignored' to indicate if the scheduler ignores this packet. Make sure we do not create new connections from replies. We can hit this problem for persistent services and local real server when ip_vs_in is added to LOCAL_OUT hook to handle local clients. Also, make sure ip_vs_schedule ignores SYN packets for Active FTP DATA from local real server. The FTP DATA connection should be created on SYN+ACK from client to assign correct connection daddr. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 0e4618470ce..9d5c1b96530 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -849,7 +849,8 @@ extern int ip_vs_unbind_scheduler(struct ip_vs_service *svc); extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name); extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler); extern struct ip_vs_conn * -ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb); +ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, + struct ip_vs_protocol *pp, int *ignored); extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_protocol *pp); -- cgit v1.2.3 From fc604767613b6d2036cdc35b660bc39451040a47 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 17 Oct 2010 16:38:15 +0300 Subject: ipvs: changes for local real server This patch deals with local real servers: - Add support for DNAT to local address (different real server port). It needs ip_vs_out hook in LOCAL_OUT for both families because skb->protocol is not set for locally generated packets and can not be used to set 'af'. - Skip packets in ip_vs_in marked with skb->ipvs_property because ip_vs_out processing can be executed in LOCAL_OUT but we still have the conn_out_get check in ip_vs_in. - Ignore packets with inet->nodefrag from local stack - Require skb_dst(skb) != NULL because we use it to get struct net - Add support for changing the route to local IPv4 stack after DNAT depending on the source address type. Local client sets output route and the remote client sets input route. It looks like IPv6 does not need such rerouting because the replies use addresses from initial incoming header, not from skb route. - All transmitters now have strict checks for the destination address type: redirect from non-local address to local real server requires NAT method, local address can not be used as source address when talking to remote real server. - Now LOCALNODE is not set explicitly as forwarding method in real server to allow the connections to provide correct forwarding method to the backup server. Not sure if this breaks tools that expect to see 'Local' real server type. If needed, this can be supported with new flag IP_VS_DEST_F_LOCAL. Now it should be possible connections in backup that lost their fwmark information during sync to be forwarded properly to their daddr, even if it is local address in the backup server. By this way backup could be used as real server for DR or TUN, for NAT there are some restrictions because tuple collisions in conntracks can create problems for the traffic. - Call ip_vs_dst_reset when destination is updated in case some real server IP type is changed between local and remote. [ horms@verge.net.au: removed trailing whitespace ] Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 9d5c1b96530..2f88d594233 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -409,6 +409,7 @@ struct ip_vs_conn { /* packet transmitter for different forwarding methods. If it mangles the packet, it must return NF_DROP or better NF_STOLEN, otherwise this must be changed to a sk_buff **. + NF_ACCEPT can be returned when destination is local. */ int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); -- cgit v1.2.3 From 0d79641a96d612aaa6d57a4d4f521d7ed9c9ccdd Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 17 Oct 2010 16:46:17 +0300 Subject: ipvs: provide address family for debugging As skb->protocol is not valid in LOCAL_OUT add parameter for address family in packet debugging functions. Even if ports are not present in AH and ESP change them to use ip_vs_tcpudp_debug_packet to show at least valid addresses as before. This patch removes the last user of skb->protocol in IPVS. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 2f88d594233..b7bbd6c28cf 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -136,24 +136,24 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len, if (net_ratelimit()) \ printk(KERN_DEBUG pr_fmt(msg), ##__VA_ARGS__); \ } while (0) -#define IP_VS_DBG_PKT(level, pp, skb, ofs, msg) \ +#define IP_VS_DBG_PKT(level, af, pp, skb, ofs, msg) \ do { \ if (level <= ip_vs_get_debug_level()) \ - pp->debug_packet(pp, skb, ofs, msg); \ + pp->debug_packet(af, pp, skb, ofs, msg); \ } while (0) -#define IP_VS_DBG_RL_PKT(level, pp, skb, ofs, msg) \ +#define IP_VS_DBG_RL_PKT(level, af, pp, skb, ofs, msg) \ do { \ if (level <= ip_vs_get_debug_level() && \ net_ratelimit()) \ - pp->debug_packet(pp, skb, ofs, msg); \ + pp->debug_packet(af, pp, skb, ofs, msg); \ } while (0) #else /* NO DEBUGGING at ALL */ #define IP_VS_DBG_BUF(level, msg...) do {} while (0) #define IP_VS_ERR_BUF(msg...) do {} while (0) #define IP_VS_DBG(level, msg...) do {} while (0) #define IP_VS_DBG_RL(msg...) do {} while (0) -#define IP_VS_DBG_PKT(level, pp, skb, ofs, msg) do {} while (0) -#define IP_VS_DBG_RL_PKT(level, pp, skb, ofs, msg) do {} while (0) +#define IP_VS_DBG_PKT(level, af, pp, skb, ofs, msg) do {} while (0) +#define IP_VS_DBG_RL_PKT(level, af, pp, skb, ofs, msg) do {} while (0) #endif #define IP_VS_BUG() BUG() @@ -345,7 +345,7 @@ struct ip_vs_protocol { int (*app_conn_bind)(struct ip_vs_conn *cp); - void (*debug_packet)(struct ip_vs_protocol *pp, + void (*debug_packet)(int af, struct ip_vs_protocol *pp, const struct sk_buff *skb, int offset, const char *msg); @@ -828,7 +828,8 @@ extern int ip_vs_set_state_timeout(int *table, int num, const char *const *names, const char *name, int to); extern void -ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb, +ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, + const struct sk_buff *skb, int offset, const char *msg); extern struct ip_vs_protocol ip_vs_protocol_tcp; -- cgit v1.2.3