summaryrefslogtreecommitdiff
path: root/net/core/skbuff.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-02-20 18:58:50 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2013-02-20 18:58:50 -0800
commita0b1c42951dd06ec83cc1bc2c9788131d9fefcd8 (patch)
treea572f1523cf904c93020c9cdb32f3bc84ec3ac16 /net/core/skbuff.c
parent8ec4942212a6d337982967778a3dc3b60aea782e (diff)
parentecd9883724b78cc72ed92c98bcb1a46c764fff21 (diff)
downloadlinux-3.10-a0b1c42951dd06ec83cc1bc2c9788131d9fefcd8.tar.gz
linux-3.10-a0b1c42951dd06ec83cc1bc2c9788131d9fefcd8.tar.bz2
linux-3.10-a0b1c42951dd06ec83cc1bc2c9788131d9fefcd8.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking update from David Miller: 1) Checkpoint/restarted TCP sockets now can properly propagate the TCP timestamp offset. From Andrey Vagin. 2) VMWARE VM VSOCK layer, from Andy King. 3) Much improved support for virtual functions and SR-IOV in bnx2x, from Ariel ELior. 4) All protocols on ipv4 and ipv6 are now network namespace aware, and all the compatability checks for initial-namespace-only protocols is removed. Thanks to Tom Parkin for helping deal with the last major holdout, L2TP. 5) IPV6 support in netpoll and network namespace support in pktgen, from Cong Wang. 6) Multiple Registration Protocol (MRP) and Multiple VLAN Registration Protocol (MVRP) support, from David Ward. 7) Compute packet lengths more accurately in the packet scheduler, from Eric Dumazet. 8) Use per-task page fragment allocator in skb_append_datato_frags(), also from Eric Dumazet. 9) Add support for connection tracking labels in netfilter, from Florian Westphal. 10) Fix default multicast group joining on ipv6, and add anti-spoofing checks to 6to4 and 6rd. From Hannes Frederic Sowa. 11) Make ipv4/ipv6 fragmentation memory limits more reasonable in modern times, rearrange inet frag datastructures for better cacheline locality, and move more operations outside of locking. From Jesper Dangaard Brouer. 12) Instead of strict master <--> slave relationships, allow arbitrary scenerios with "upper device lists". From Jiri Pirko. 13) Improve rate limiting accuracy in TBF and act_police, also from Jiri Pirko. 14) Add a BPF filter netfilter match target, from Willem de Bruijn. 15) Orphan and delete a bunch of pre-historic networking drivers from Paul Gortmaker. 16) Add TSO support for GRE tunnels, from Pravin B SHelar. Although this still needs some minor bug fixing before it's %100 correct in all cases. 17) Handle unresolved IPSEC states like ARP, with a resolution packet queue. From Steffen Klassert. 18) Remove TCP Appropriate Byte Count support (ABC), from Stephen Hemminger. This was long overdue. 19) Support SO_REUSEPORT, from Tom Herbert. 20) Allow locking a socket BPF filter, so that it cannot change after a process drops capabilities. 21) Add VLAN filtering to bridge, from Vlad Yasevich. 22) Bring ipv6 on-par with ipv4 and do not cache neighbour entries in the ipv6 routes, from YOSHIFUJI Hideaki. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1538 commits) ipv6: fix race condition regarding dst->expires and dst->from. net: fix a wrong assignment in skb_split() ip_gre: remove an extra dst_release() ppp: set qdisc_tx_busylock to avoid LOCKDEP splat atl1c: restore buffer state net: fix a build failure when !CONFIG_PROC_FS net: ipv4: fix waring -Wunused-variable net: proc: fix build failed when procfs is not configured Revert "xen: netback: remove redundant xenvif_put" net: move procfs code to net/core/net-procfs.c qmi_wwan, cdc-ether: add ADU960S bonding: set sysfs device_type to 'bond' bonding: fix bond_release_all inconsistencies b44: use netdev_alloc_skb_ip_align() xen: netback: remove redundant xenvif_put net: fec: Do a sanity check on the gpio number ip_gre: propogate target device GSO capability to the tunnel device ip_gre: allow CSUM capable devices to handle packets bonding: Fix initialize after use for 3ad machine state spinlock bonding: Fix race condition between bond_enslave() and bond_3ad_update_lacp_rate() ...
Diffstat (limited to 'net/core/skbuff.c')
-rw-r--r--net/core/skbuff.c111
1 files changed, 48 insertions, 63 deletions
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 32443ebc3e8..33245ef54c3 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -104,47 +104,37 @@ static const struct pipe_buf_operations sock_pipe_buf_ops = {
.get = sock_pipe_buf_get,
};
-/*
- * Keep out-of-line to prevent kernel bloat.
- * __builtin_return_address is not used because it is not always
- * reliable.
- */
-
/**
- * skb_over_panic - private function
- * @skb: buffer
- * @sz: size
- * @here: address
- *
- * Out of line support code for skb_put(). Not user callable.
+ * skb_panic - private function for out-of-line support
+ * @skb: buffer
+ * @sz: size
+ * @addr: address
+ * @msg: skb_over_panic or skb_under_panic
+ *
+ * Out-of-line support for skb_put() and skb_push().
+ * Called via the wrapper skb_over_panic() or skb_under_panic().
+ * Keep out of line to prevent kernel bloat.
+ * __builtin_return_address is not used because it is not always reliable.
*/
-static void skb_over_panic(struct sk_buff *skb, int sz, void *here)
+static void skb_panic(struct sk_buff *skb, unsigned int sz, void *addr,
+ const char msg[])
{
pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n",
- __func__, here, skb->len, sz, skb->head, skb->data,
+ msg, addr, skb->len, sz, skb->head, skb->data,
(unsigned long)skb->tail, (unsigned long)skb->end,
skb->dev ? skb->dev->name : "<NULL>");
BUG();
}
-/**
- * skb_under_panic - private function
- * @skb: buffer
- * @sz: size
- * @here: address
- *
- * Out of line support code for skb_push(). Not user callable.
- */
-
-static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
+static void skb_over_panic(struct sk_buff *skb, unsigned int sz, void *addr)
{
- pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n",
- __func__, here, skb->len, sz, skb->head, skb->data,
- (unsigned long)skb->tail, (unsigned long)skb->end,
- skb->dev ? skb->dev->name : "<NULL>");
- BUG();
+ skb_panic(skb, sz, addr, __func__);
}
+static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr)
+{
+ skb_panic(skb, sz, addr, __func__);
+}
/*
* kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells
@@ -155,8 +145,9 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
*/
#define kmalloc_reserve(size, gfp, node, pfmemalloc) \
__kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc)
-void *__kmalloc_reserve(size_t size, gfp_t flags, int node, unsigned long ip,
- bool *pfmemalloc)
+
+static void *__kmalloc_reserve(size_t size, gfp_t flags, int node,
+ unsigned long ip, bool *pfmemalloc)
{
void *obj;
bool ret_pfmemalloc = false;
@@ -259,6 +250,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
skb->end = skb->tail + size;
#ifdef NET_SKBUFF_DATA_USES_OFFSET
skb->mac_header = ~0U;
+ skb->transport_header = ~0U;
#endif
/* make sure we initialize shinfo sequentially */
@@ -327,6 +319,7 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
skb->end = skb->tail + size;
#ifdef NET_SKBUFF_DATA_USES_OFFSET
skb->mac_header = ~0U;
+ skb->transport_header = ~0U;
#endif
/* make sure we initialize shinfo sequentially */
@@ -348,10 +341,6 @@ struct netdev_alloc_cache {
};
static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
-#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768)
-#define NETDEV_FRAG_PAGE_MAX_SIZE (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER)
-#define NETDEV_PAGECNT_MAX_BIAS NETDEV_FRAG_PAGE_MAX_SIZE
-
static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
{
struct netdev_alloc_cache *nc;
@@ -2337,6 +2326,7 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
{
int pos = skb_headlen(skb);
+ skb_shinfo(skb1)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG;
if (len < pos) /* Split line is inside header. */
skb_split_inside_header(skb, skb1, len, pos);
else /* Second chunk has no header, nothing to copy. */
@@ -2668,48 +2658,37 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
int len, int odd, struct sk_buff *skb),
void *from, int length)
{
- int frg_cnt = 0;
- skb_frag_t *frag = NULL;
- struct page *page = NULL;
- int copy, left;
+ int frg_cnt = skb_shinfo(skb)->nr_frags;
+ int copy;
int offset = 0;
int ret;
+ struct page_frag *pfrag = &current->task_frag;
do {
/* Return error if we don't have space for new frag */
- frg_cnt = skb_shinfo(skb)->nr_frags;
if (frg_cnt >= MAX_SKB_FRAGS)
- return -EFAULT;
+ return -EMSGSIZE;
- /* allocate a new page for next frag */
- page = alloc_pages(sk->sk_allocation, 0);
-
- /* If alloc_page fails just return failure and caller will
- * free previous allocated pages by doing kfree_skb()
- */
- if (page == NULL)
+ if (!sk_page_frag_refill(sk, pfrag))
return -ENOMEM;
- /* initialize the next frag */
- skb_fill_page_desc(skb, frg_cnt, page, 0, 0);
- skb->truesize += PAGE_SIZE;
- atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
-
- /* get the new initialized frag */
- frg_cnt = skb_shinfo(skb)->nr_frags;
- frag = &skb_shinfo(skb)->frags[frg_cnt - 1];
-
/* copy the user data to page */
- left = PAGE_SIZE - frag->page_offset;
- copy = (length > left)? left : length;
+ copy = min_t(int, length, pfrag->size - pfrag->offset);
- ret = getfrag(from, skb_frag_address(frag) + skb_frag_size(frag),
- offset, copy, 0, skb);
+ ret = getfrag(from, page_address(pfrag->page) + pfrag->offset,
+ offset, copy, 0, skb);
if (ret < 0)
return -EFAULT;
/* copy was successful so update the size parameters */
- skb_frag_size_add(frag, copy);
+ skb_fill_page_desc(skb, frg_cnt, pfrag->page, pfrag->offset,
+ copy);
+ frg_cnt++;
+ pfrag->offset += copy;
+ get_page(pfrag->page);
+
+ skb->truesize += copy;
+ atomic_add(copy, &sk->sk_wmem_alloc);
skb->len += copy;
skb->data_len += copy;
offset += copy;
@@ -2759,6 +2738,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
unsigned int mss = skb_shinfo(skb)->gso_size;
unsigned int doffset = skb->data - skb_mac_header(skb);
unsigned int offset = doffset;
+ unsigned int tnl_hlen = skb_tnl_header_len(skb);
unsigned int headroom;
unsigned int len;
int sg = !!(features & NETIF_F_SG);
@@ -2835,7 +2815,10 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
skb_set_network_header(nskb, skb->mac_len);
nskb->transport_header = (nskb->network_header +
skb_network_header_len(skb));
- skb_copy_from_linear_data(skb, nskb->data, doffset);
+
+ skb_copy_from_linear_data_offset(skb, -tnl_hlen,
+ nskb->data - tnl_hlen,
+ doffset + tnl_hlen);
if (fskb != skb_shinfo(skb)->frag_list)
continue;
@@ -2853,6 +2836,8 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
skb_copy_from_linear_data_offset(skb, offset,
skb_put(nskb, hsize), hsize);
+ skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG;
+
while (pos < offset + len && i < nfrags) {
*frag = skb_shinfo(skb)->frags[i];
__skb_frag_ref(frag);