From b2111724a639ec31a19fdca62ea3a0a222d59d11 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 28 Dec 2012 06:06:37 +0000 Subject: net: use per task frag allocator in skb_append_datato_frags Use the new per task frag allocator in skb_append_datato_frags(), to reduce number of frags and page allocator overhead. Tested: ifconfig lo mtu 16436 perf record netperf -t UDP_STREAM ; perf report before : Throughput: 32928 Mbit/s 51.79% netperf [kernel.kallsyms] [k] copy_user_generic_string 5.98% netperf [kernel.kallsyms] [k] __alloc_pages_nodemask 5.58% netperf [kernel.kallsyms] [k] get_page_from_freelist 5.01% netperf [kernel.kallsyms] [k] __rmqueue 3.74% netperf [kernel.kallsyms] [k] skb_append_datato_frags 1.87% netperf [kernel.kallsyms] [k] prep_new_page 1.42% netperf [kernel.kallsyms] [k] next_zones_zonelist 1.28% netperf [kernel.kallsyms] [k] __inc_zone_state 1.26% netperf [kernel.kallsyms] [k] alloc_pages_current 0.78% netperf [kernel.kallsyms] [k] sock_alloc_send_pskb 0.74% netperf [kernel.kallsyms] [k] udp_sendmsg 0.72% netperf [kernel.kallsyms] [k] zone_watermark_ok 0.68% netperf [kernel.kallsyms] [k] __cpuset_node_allowed_softwall 0.67% netperf [kernel.kallsyms] [k] fib_table_lookup 0.60% netperf [kernel.kallsyms] [k] memcpy_fromiovecend 0.55% netperf [kernel.kallsyms] [k] __udp4_lib_lookup after: Throughput: 47185 Mbit/s 61.74% netperf [kernel.kallsyms] [k] copy_user_generic_string 2.07% netperf [kernel.kallsyms] [k] prep_new_page 1.98% netperf [kernel.kallsyms] [k] skb_append_datato_frags 1.02% netperf [kernel.kallsyms] [k] sock_alloc_send_pskb 0.97% netperf [kernel.kallsyms] [k] enqueue_task_fair 0.97% netperf [kernel.kallsyms] [k] udp_sendmsg 0.91% netperf [kernel.kallsyms] [k] __ip_route_output_key 0.88% netperf [kernel.kallsyms] [k] __netif_receive_skb 0.87% netperf [kernel.kallsyms] [k] fib_table_lookup 0.85% netperf [kernel.kallsyms] [k] resched_task 0.78% netperf [kernel.kallsyms] [k] __udp4_lib_lookup 0.77% netperf [kernel.kallsyms] [k] _raw_spin_lock_irqsave Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/skbuff.c | 43 ++++++++++++++++--------------------------- 1 file changed, 16 insertions(+), 27 deletions(-) (limited to 'net/core/skbuff.c') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3ab989b0de4..ec8737ec59b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2686,48 +2686,37 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, int len, int odd, struct sk_buff *skb), void *from, int length) { - int frg_cnt = 0; - skb_frag_t *frag = NULL; - struct page *page = NULL; - int copy, left; + int frg_cnt = skb_shinfo(skb)->nr_frags; + int copy; int offset = 0; int ret; + struct page_frag *pfrag = ¤t->task_frag; do { /* Return error if we don't have space for new frag */ - frg_cnt = skb_shinfo(skb)->nr_frags; if (frg_cnt >= MAX_SKB_FRAGS) - return -EFAULT; - - /* allocate a new page for next frag */ - page = alloc_pages(sk->sk_allocation, 0); + return -EMSGSIZE; - /* If alloc_page fails just return failure and caller will - * free previous allocated pages by doing kfree_skb() - */ - if (page == NULL) + if (!sk_page_frag_refill(sk, pfrag)) return -ENOMEM; - /* initialize the next frag */ - skb_fill_page_desc(skb, frg_cnt, page, 0, 0); - skb->truesize += PAGE_SIZE; - atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc); - - /* get the new initialized frag */ - frg_cnt = skb_shinfo(skb)->nr_frags; - frag = &skb_shinfo(skb)->frags[frg_cnt - 1]; - /* copy the user data to page */ - left = PAGE_SIZE - frag->page_offset; - copy = (length > left)? left : length; + copy = min_t(int, length, pfrag->size - pfrag->offset); - ret = getfrag(from, skb_frag_address(frag) + skb_frag_size(frag), - offset, copy, 0, skb); + ret = getfrag(from, page_address(pfrag->page) + pfrag->offset, + offset, copy, 0, skb); if (ret < 0) return -EFAULT; /* copy was successful so update the size parameters */ - skb_frag_size_add(frag, copy); + skb_fill_page_desc(skb, frg_cnt, pfrag->page, pfrag->offset, + copy); + frg_cnt++; + pfrag->offset += copy; + get_page(pfrag->page); + + skb->truesize += copy; + atomic_add(copy, &sk->sk_wmem_alloc); skb->len += copy; skb->data_len += copy; offset += copy; -- cgit v1.2.3 From 61c5e88aecd6fbf2480f39394bb495964e6d9984 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 28 Dec 2012 18:24:28 +0000 Subject: skbuff: make __kmalloc_reserve static Sparse detected case where this local function should be static. It may even allow some compiler optimizations. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/skbuff.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net/core/skbuff.c') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ec8737ec59b..bc96100fe23 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -155,8 +155,9 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here) */ #define kmalloc_reserve(size, gfp, node, pfmemalloc) \ __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc) -void *__kmalloc_reserve(size_t size, gfp_t flags, int node, unsigned long ip, - bool *pfmemalloc) + +static void *__kmalloc_reserve(size_t size, gfp_t flags, int node, + unsigned long ip, bool *pfmemalloc) { void *obj; bool ret_pfmemalloc = false; -- cgit v1.2.3 From 9ca1b22d6d228177e6f929f6818a1cd3d5e30c4a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 5 Jan 2013 21:31:18 +0000 Subject: net: splice: avoid high order page splitting splice() can handle pages of any order, but network code tries hard to split them in PAGE_SIZE units. Not quite successfully anyway, as __splice_segment() assumed poff < PAGE_SIZE. This is true for the skb->data part, not necessarily for the fragments. This patch removes this logic to give the pages as they are in the skb. Signed-off-by: Eric Dumazet Cc: Willy Tarreau Signed-off-by: David S. Miller --- net/core/skbuff.c | 38 +++++++++----------------------------- 1 file changed, 9 insertions(+), 29 deletions(-) (limited to 'net/core/skbuff.c') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index bc96100fe23..b03fc0c6a95 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1707,20 +1707,6 @@ static bool spd_fill_page(struct splice_pipe_desc *spd, return false; } -static inline void __segment_seek(struct page **page, unsigned int *poff, - unsigned int *plen, unsigned int off) -{ - unsigned long n; - - *poff += off; - n = *poff / PAGE_SIZE; - if (n) - *page = nth_page(*page, n); - - *poff = *poff % PAGE_SIZE; - *plen -= off; -} - static bool __splice_segment(struct page *page, unsigned int poff, unsigned int plen, unsigned int *off, unsigned int *len, struct sk_buff *skb, @@ -1728,6 +1714,8 @@ static bool __splice_segment(struct page *page, unsigned int poff, struct sock *sk, struct pipe_inode_info *pipe) { + unsigned int flen; + if (!*len) return true; @@ -1738,24 +1726,16 @@ static bool __splice_segment(struct page *page, unsigned int poff, } /* ignore any bits we already processed */ - if (*off) { - __segment_seek(&page, &poff, &plen, *off); - *off = 0; - } - - do { - unsigned int flen = min(*len, plen); + poff += *off; + plen -= *off; + *off = 0; - /* the linear region may spread across several pages */ - flen = min_t(unsigned int, flen, PAGE_SIZE - poff); + flen = min(*len, plen); - if (spd_fill_page(spd, pipe, page, &flen, poff, skb, linear, sk)) - return true; - - __segment_seek(&page, &poff, &plen, flen); - *len -= flen; + if (spd_fill_page(spd, pipe, page, &flen, poff, skb, linear, sk)) + return true; - } while (*len && plen); + *len -= flen; return false; } -- cgit v1.2.3 From fda55eca5a33f33ffcd4192c6b2d75179714a52c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Jan 2013 09:28:21 +0000 Subject: net: introduce skb_transport_header_was_set() We have skb_mac_header_was_set() helper to tell if mac_header was set on a skb. We would like the same for transport_header. __netif_receive_skb() doesn't reset the transport header if already set by GRO layer. Note that network stacks usually reset the transport header anyway, after pulling the network header, so this change only allows a followup patch to have more precise qdisc pkt_len computation for GSO packets at ingress side. Signed-off-by: Eric Dumazet Cc: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/core/skbuff.c') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b03fc0c6a95..1e1b9ea0296 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -260,6 +260,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, skb->end = skb->tail + size; #ifdef NET_SKBUFF_DATA_USES_OFFSET skb->mac_header = ~0U; + skb->transport_header = ~0U; #endif /* make sure we initialize shinfo sequentially */ @@ -328,6 +329,7 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) skb->end = skb->tail + size; #ifdef NET_SKBUFF_DATA_USES_OFFSET skb->mac_header = ~0U; + skb->transport_header = ~0U; #endif /* make sure we initialize shinfo sequentially */ -- cgit v1.2.3 From 18aafc622abf492809723d9c5a3c5dcea287169e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 11 Jan 2013 14:46:37 +0000 Subject: net: splice: fix __splice_segment() commit 9ca1b22d6d2 (net: splice: avoid high order page splitting) forgot that skb->head could need a copy into several page frags. This could be the case for loopback traffic mostly. Also remove now useless skb argument from linear_to_page() and __splice_segment() prototypes. Signed-off-by: Eric Dumazet Cc: Willy Tarreau Signed-off-by: David S. Miller --- net/core/skbuff.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'net/core/skbuff.c') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1e1b9ea0296..2568c449fe3 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1652,7 +1652,7 @@ static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i) static struct page *linear_to_page(struct page *page, unsigned int *len, unsigned int *offset, - struct sk_buff *skb, struct sock *sk) + struct sock *sk) { struct page_frag *pfrag = sk_page_frag(sk); @@ -1685,14 +1685,14 @@ static bool spd_can_coalesce(const struct splice_pipe_desc *spd, static bool spd_fill_page(struct splice_pipe_desc *spd, struct pipe_inode_info *pipe, struct page *page, unsigned int *len, unsigned int offset, - struct sk_buff *skb, bool linear, + bool linear, struct sock *sk) { if (unlikely(spd->nr_pages == MAX_SKB_FRAGS)) return true; if (linear) { - page = linear_to_page(page, len, &offset, skb, sk); + page = linear_to_page(page, len, &offset, sk); if (!page) return true; } @@ -1711,13 +1711,11 @@ static bool spd_fill_page(struct splice_pipe_desc *spd, static bool __splice_segment(struct page *page, unsigned int poff, unsigned int plen, unsigned int *off, - unsigned int *len, struct sk_buff *skb, + unsigned int *len, struct splice_pipe_desc *spd, bool linear, struct sock *sk, struct pipe_inode_info *pipe) { - unsigned int flen; - if (!*len) return true; @@ -1732,12 +1730,16 @@ static bool __splice_segment(struct page *page, unsigned int poff, plen -= *off; *off = 0; - flen = min(*len, plen); - - if (spd_fill_page(spd, pipe, page, &flen, poff, skb, linear, sk)) - return true; + do { + unsigned int flen = min(*len, plen); - *len -= flen; + if (spd_fill_page(spd, pipe, page, &flen, poff, + linear, sk)) + return true; + poff += flen; + plen -= flen; + *len -= flen; + } while (*len && plen); return false; } @@ -1760,7 +1762,7 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, if (__splice_segment(virt_to_page(skb->data), (unsigned long) skb->data & (PAGE_SIZE - 1), skb_headlen(skb), - offset, len, skb, spd, + offset, len, spd, skb_head_is_locked(skb), sk, pipe)) return true; @@ -1773,7 +1775,7 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, if (__splice_segment(skb_frag_page(f), f->page_offset, skb_frag_size(f), - offset, len, skb, spd, false, sk, pipe)) + offset, len, spd, false, sk, pipe)) return true; } -- cgit v1.2.3 From cef401de7be8c4e155c6746bfccf721a4fa5fab9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 25 Jan 2013 20:34:37 +0000 Subject: net: fix possible wrong checksum generation Pravin Shelar mentioned that GSO could potentially generate wrong TX checksum if skb has fragments that are overwritten by the user between the checksum computation and transmit. He suggested to linearize skbs but this extra copy can be avoided for normal tcp skbs cooked by tcp_sendmsg(). This patch introduces a new SKB_GSO_SHARED_FRAG flag, set in skb_shinfo(skb)->gso_type if at least one frag can be modified by the user. Typical sources of such possible overwrites are {vm}splice(), sendfile(), and macvtap/tun/virtio_net drivers. Tested: $ netperf -H 7.7.8.84 MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 7.7.8.84 () port 0 AF_INET Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 16384 16384 10.00 3959.52 $ netperf -H 7.7.8.84 -t TCP_SENDFILE TCP SENDFILE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 7.7.8.84 () port 0 AF_INET Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 16384 16384 10.00 3216.80 Performance of the SENDFILE is impacted by the extra allocation and copy, and because we use order-0 pages, while the TCP_STREAM uses bigger pages. Reported-by: Pravin Shelar Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/skbuff.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net/core/skbuff.c') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2568c449fe3..bddc1dd2e7f 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2340,6 +2340,8 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) { int pos = skb_headlen(skb); + skb_shinfo(skb1)->gso_type = skb_shinfo(skb)->gso_type; + if (len < pos) /* Split line is inside header. */ skb_split_inside_header(skb, skb1, len, pos); else /* Second chunk has no header, nothing to copy. */ @@ -2845,6 +2847,8 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) skb_copy_from_linear_data_offset(skb, offset, skb_put(nskb, hsize), hsize); + skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type; + while (pos < offset + len && i < nfrags) { *frag = skb_shinfo(skb)->frags[i]; __skb_frag_ref(frag); -- cgit v1.2.3 From e5e67305885eb12849b5475764b0542f03dc2b59 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 8 Feb 2013 10:17:15 +0000 Subject: skbuff: Move definition of NETDEV_FRAG_PAGE_MAX_SIZE In order to address the fact that some devices cannot support the full 32K frag size we need to have the value accessible somewhere so that we can use it to do comparisons against what the device can support. As such I am moving the values out of skbuff.c and into skbuff.h. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- net/core/skbuff.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net/core/skbuff.c') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 55f7ef6ada6..6114c114356 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -351,10 +351,6 @@ struct netdev_alloc_cache { }; static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); -#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768) -#define NETDEV_FRAG_PAGE_MAX_SIZE (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER) -#define NETDEV_PAGECNT_MAX_BIAS NETDEV_FRAG_PAGE_MAX_SIZE - static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) { struct netdev_alloc_cache *nc; -- cgit v1.2.3 From f05de73bf82fbbc00265c06d12efb7273f7dc54a Mon Sep 17 00:00:00 2001 From: Jean Sacren Date: Mon, 11 Feb 2013 13:30:38 +0000 Subject: skbuff: create skb_panic() function and its wrappers Create skb_panic() function in lieu of both skb_over_panic() and skb_under_panic() so that code duplication would be avoided. Update type and variable name where necessary. Jiri Pirko suggested using wrappers so that we would be able to keep the fruits of the original code. Signed-off-by: Jean Sacren Cc: Jiri Pirko Signed-off-by: David S. Miller --- net/core/skbuff.c | 48 +++++++++++++++++++----------------------------- 1 file changed, 19 insertions(+), 29 deletions(-) (limited to 'net/core/skbuff.c') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 6114c114356..8731c39b7a5 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -104,47 +104,37 @@ static const struct pipe_buf_operations sock_pipe_buf_ops = { .get = sock_pipe_buf_get, }; -/* - * Keep out-of-line to prevent kernel bloat. - * __builtin_return_address is not used because it is not always - * reliable. - */ - /** - * skb_over_panic - private function - * @skb: buffer - * @sz: size - * @here: address - * - * Out of line support code for skb_put(). Not user callable. + * skb_panic - private function for out-of-line support + * @skb: buffer + * @sz: size + * @addr: address + * @panic: skb_over_panic or skb_under_panic + * + * Out-of-line support for skb_put() and skb_push(). + * Called via the wrapper skb_over_panic() or skb_under_panic(). + * Keep out of line to prevent kernel bloat. + * __builtin_return_address is not used because it is not always reliable. */ -static void skb_over_panic(struct sk_buff *skb, int sz, void *here) +static void skb_panic(struct sk_buff *skb, unsigned int sz, void *addr, + const char panic[]) { pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n", - __func__, here, skb->len, sz, skb->head, skb->data, + panic, addr, skb->len, sz, skb->head, skb->data, (unsigned long)skb->tail, (unsigned long)skb->end, skb->dev ? skb->dev->name : ""); BUG(); } -/** - * skb_under_panic - private function - * @skb: buffer - * @sz: size - * @here: address - * - * Out of line support code for skb_push(). Not user callable. - */ - -static void skb_under_panic(struct sk_buff *skb, int sz, void *here) +static void skb_over_panic(struct sk_buff *skb, unsigned int sz, void *addr) { - pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n", - __func__, here, skb->len, sz, skb->head, skb->data, - (unsigned long)skb->tail, (unsigned long)skb->end, - skb->dev ? skb->dev->name : ""); - BUG(); + skb_panic(skb, sz, addr, __func__); } +static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr) +{ + skb_panic(skb, sz, addr, __func__); +} /* * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells -- cgit v1.2.3 From 99d5851eefe589346c976a4b539ee498267bc5fd Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 13 Feb 2013 11:20:27 +0000 Subject: net: skbuff: fix compile error in skb_panic() I get the following build error on next-20130213 due to the following commit: commit f05de73bf82fbbc00265c06d12efb7273f7dc54a ("skbuff: create skb_panic() function and its wrappers"). It adds an argument called panic to a function that uses the BUG() macro which tries to call panic, but the argument masks the panic() function declaration, resulting in the following error (gcc 4.2.4): net/core/skbuff.c In function 'skb_panic': net/core/skbuff.c +126 : error: called object 'panic' is not a function This is fixed by renaming the argument to msg. Signed-off-by: James Hogan Cc: Jean Sacren Cc: Jiri Pirko Cc: David S. Miller Signed-off-by: David S. Miller --- net/core/skbuff.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/core/skbuff.c') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8731c39b7a5..21a22cce6e5 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -109,7 +109,7 @@ static const struct pipe_buf_operations sock_pipe_buf_ops = { * @skb: buffer * @sz: size * @addr: address - * @panic: skb_over_panic or skb_under_panic + * @msg: skb_over_panic or skb_under_panic * * Out-of-line support for skb_put() and skb_push(). * Called via the wrapper skb_over_panic() or skb_under_panic(). @@ -117,10 +117,10 @@ static const struct pipe_buf_operations sock_pipe_buf_ops = { * __builtin_return_address is not used because it is not always reliable. */ static void skb_panic(struct sk_buff *skb, unsigned int sz, void *addr, - const char panic[]) + const char msg[]) { pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n", - panic, addr, skb->len, sz, skb->head, skb->data, + msg, addr, skb->len, sz, skb->head, skb->data, (unsigned long)skb->tail, (unsigned long)skb->end, skb->dev ? skb->dev->name : ""); BUG(); -- cgit v1.2.3 From c9af6db4c11ccc6c3e7f19bbc15d54023956f97c Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 11 Feb 2013 09:27:41 +0000 Subject: net: Fix possible wrong checksum generation. Patch cef401de7be8c4e (net: fix possible wrong checksum generation) fixed wrong checksum calculation but it broke TSO by defining new GSO type but not a netdev feature for that type. net_gso_ok() would not allow hardware checksum/segmentation offload of such packets without the feature. Following patch fixes TSO and wrong checksum. This patch uses same logic that Eric Dumazet used. Patch introduces new flag SKBTX_SHARED_FRAG if at least one frag can be modified by the user. but SKBTX_SHARED_FRAG flag is kept in skb shared info tx_flags rather than gso_type. tx_flags is better compared to gso_type since we can have skb with shared frag without gso packet. It does not link SHARED_FRAG to GSO, So there is no need to define netdev feature for this. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/core/skbuff.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net/core/skbuff.c') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 21a22cce6e5..6c1ad09f879 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2326,8 +2326,7 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) { int pos = skb_headlen(skb); - skb_shinfo(skb1)->gso_type = skb_shinfo(skb)->gso_type; - + skb_shinfo(skb)->tx_flags = skb_shinfo(skb1)->tx_flags & SKBTX_SHARED_FRAG; if (len < pos) /* Split line is inside header. */ skb_split_inside_header(skb, skb1, len, pos); else /* Second chunk has no header, nothing to copy. */ @@ -2833,7 +2832,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) skb_copy_from_linear_data_offset(skb, offset, skb_put(nskb, hsize), hsize); - skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type; + skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; while (pos < offset + len && i < nfrags) { *frag = skb_shinfo(skb)->frags[i]; -- cgit v1.2.3 From 68c331631143f5f039baac99a650e0b9e1ea02b6 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 14 Feb 2013 14:02:41 +0000 Subject: v4 GRE: Add TCP segmentation offload for GRE Following patch adds GRE protocol offload handler so that skb_gso_segment() can segment GRE packets. SKB GSO CB is added to keep track of total header length so that skb_segment can push entire header. e.g. in case of GRE, skb_segment need to push inner and outer headers to every segment. New NETIF_F_GRE_GSO feature is added for devices which support HW GRE TSO offload. Currently none of devices support it therefore GRE GSO always fall backs to software GSO. [ Compute pkt_len before ip_local_out() invocation. -DaveM ] Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/core/skbuff.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net/core/skbuff.c') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 6c1ad09f879..2a3ca33c30a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2738,6 +2738,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) unsigned int mss = skb_shinfo(skb)->gso_size; unsigned int doffset = skb->data - skb_mac_header(skb); unsigned int offset = doffset; + unsigned int tnl_hlen = skb_tnl_header_len(skb); unsigned int headroom; unsigned int len; int sg = !!(features & NETIF_F_SG); @@ -2814,7 +2815,10 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) skb_set_network_header(nskb, skb->mac_len); nskb->transport_header = (nskb->network_header + skb_network_header_len(skb)); - skb_copy_from_linear_data(skb, nskb->data, doffset); + + skb_copy_from_linear_data_offset(skb, -tnl_hlen, + nskb->data - tnl_hlen, + doffset + tnl_hlen); if (fskb != skb_shinfo(skb)->frag_list) continue; -- cgit v1.2.3 From 68534c682e8f5c333f835818ca5a89d3e6288870 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Tue, 19 Feb 2013 22:51:30 +0000 Subject: net: fix a wrong assignment in skb_split() commit c9af6db4c11ccc6c3e7f1 (net: Fix possible wrong checksum generation) has a suspicous piece: - skb_shinfo(skb1)->gso_type = skb_shinfo(skb)->gso_type; - + skb_shinfo(skb)->tx_flags = skb_shinfo(skb1)->tx_flags & SKBTX_SHARED_FRAG; skb1 is the new skb, therefore should be on the left side of the assignment. This patch fixes it. Cc: Pravin B Shelar Cc: David S. Miller Signed-off-by: Cong Wang Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/skbuff.c') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2a3ca33c30a..33245ef54c3 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2326,7 +2326,7 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) { int pos = skb_headlen(skb); - skb_shinfo(skb)->tx_flags = skb_shinfo(skb1)->tx_flags & SKBTX_SHARED_FRAG; + skb_shinfo(skb1)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; if (len < pos) /* Split line is inside header. */ skb_split_inside_header(skb, skb1, len, pos); else /* Second chunk has no header, nothing to copy. */ -- cgit v1.2.3