summaryrefslogtreecommitdiff
path: root/net/packet/af_packet.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-10-02 13:38:27 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-02 13:38:27 -0700
commitaecdc33e111b2c447b622e287c6003726daa1426 (patch)
tree3e7657eae4b785e1a1fb5dfb225dbae0b2f0cfc6 /net/packet/af_packet.c
parenta20acf99f75e49271381d65db097c9763060a1e8 (diff)
parenta3a6cab5ea10cca64d036851fe0d932448f2fe4f (diff)
downloadlinux-stable-aecdc33e111b2c447b622e287c6003726daa1426.tar.gz
linux-stable-aecdc33e111b2c447b622e287c6003726daa1426.tar.bz2
linux-stable-aecdc33e111b2c447b622e287c6003726daa1426.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking changes from David Miller: 1) GRE now works over ipv6, from Dmitry Kozlov. 2) Make SCTP more network namespace aware, from Eric Biederman. 3) TEAM driver now works with non-ethernet devices, from Jiri Pirko. 4) Make openvswitch network namespace aware, from Pravin B Shelar. 5) IPV6 NAT implementation, from Patrick McHardy. 6) Server side support for TCP Fast Open, from Jerry Chu and others. 7) Packet BPF filter supports MOD and XOR, from Eric Dumazet and Daniel Borkmann. 8) Increate the loopback default MTU to 64K, from Eric Dumazet. 9) Use a per-task rather than per-socket page fragment allocator for outgoing networking traffic. This benefits processes that have very many mostly idle sockets, which is quite common. From Eric Dumazet. 10) Use up to 32K for page fragment allocations, with fallbacks to smaller sizes when higher order page allocations fail. Benefits are a) less segments for driver to process b) less calls to page allocator c) less waste of space. From Eric Dumazet. 11) Allow GRO to be used on GRE tunnels, from Eric Dumazet. 12) VXLAN device driver, one way to handle VLAN issues such as the limitation of 4096 VLAN IDs yet still have some level of isolation. From Stephen Hemminger. 13) As usual there is a large boatload of driver changes, with the scale perhaps tilted towards the wireless side this time around. Fix up various fairly trivial conflicts, mostly caused by the user namespace changes. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1012 commits) hyperv: Add buffer for extended info after the RNDIS response message. hyperv: Report actual status in receive completion packet hyperv: Remove extra allocated space for recv_pkt_list elements hyperv: Fix page buffer handling in rndis_filter_send_request() hyperv: Fix the missing return value in rndis_filter_set_packet_filter() hyperv: Fix the max_xfer_size in RNDIS initialization vxlan: put UDP socket in correct namespace vxlan: Depend on CONFIG_INET sfc: Fix the reported priorities of different filter types sfc: Remove EFX_FILTER_FLAG_RX_OVERRIDE_IP sfc: Fix loopback self-test with separate_tx_channels=1 sfc: Fix MCDI structure field lookup sfc: Add parentheses around use of bitfield macro arguments sfc: Fix null function pointer in efx_sriov_channel_type vxlan: virtual extensible lan igmp: export symbol ip_mc_leave_group netlink: add attributes to fdb interface tg3: unconditionally select HWMON support when tg3 is enabled. Revert "net: ti cpsw ethernet: allow reading phy interface mode from DT" gre: fix sparse warning ...
Diffstat (limited to 'net/packet/af_packet.c')
-rw-r--r--net/packet/af_packet.c143
1 files changed, 19 insertions, 124 deletions
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 048fba476aa5..94060edbbd70 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -93,6 +93,8 @@
#include <net/inet_common.h>
#endif
+#include "internal.h"
+
/*
Assumptions:
- if device has no dev->hard_header routine, it adds and removes ll header
@@ -146,14 +148,6 @@ dev->hard_header == NULL (ll header is added by device, we cannot control it)
/* Private packet socket structures. */
-struct packet_mclist {
- struct packet_mclist *next;
- int ifindex;
- int count;
- unsigned short type;
- unsigned short alen;
- unsigned char addr[MAX_ADDR_LEN];
-};
/* identical to struct packet_mreq except it has
* a longer address field.
*/
@@ -175,63 +169,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
#define BLK_PLUS_PRIV(sz_of_priv) \
(BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT))
-/* kbdq - kernel block descriptor queue */
-struct tpacket_kbdq_core {
- struct pgv *pkbdq;
- unsigned int feature_req_word;
- unsigned int hdrlen;
- unsigned char reset_pending_on_curr_blk;
- unsigned char delete_blk_timer;
- unsigned short kactive_blk_num;
- unsigned short blk_sizeof_priv;
-
- /* last_kactive_blk_num:
- * trick to see if user-space has caught up
- * in order to avoid refreshing timer when every single pkt arrives.
- */
- unsigned short last_kactive_blk_num;
-
- char *pkblk_start;
- char *pkblk_end;
- int kblk_size;
- unsigned int knum_blocks;
- uint64_t knxt_seq_num;
- char *prev;
- char *nxt_offset;
- struct sk_buff *skb;
-
- atomic_t blk_fill_in_prog;
-
- /* Default is set to 8ms */
-#define DEFAULT_PRB_RETIRE_TOV (8)
-
- unsigned short retire_blk_tov;
- unsigned short version;
- unsigned long tov_in_jiffies;
-
- /* timer to retire an outstanding block */
- struct timer_list retire_blk_timer;
-};
-
#define PGV_FROM_VMALLOC 1
-struct pgv {
- char *buffer;
-};
-
-struct packet_ring_buffer {
- struct pgv *pg_vec;
- unsigned int head;
- unsigned int frames_per_block;
- unsigned int frame_size;
- unsigned int frame_max;
-
- unsigned int pg_vec_order;
- unsigned int pg_vec_pages;
- unsigned int pg_vec_len;
-
- struct tpacket_kbdq_core prb_bdqc;
- atomic_t pending;
-};
#define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status)
#define BLOCK_NUM_PKTS(x) ((x)->hdr.bh1.num_pkts)
@@ -269,52 +207,6 @@ static void prb_fill_vlan_info(struct tpacket_kbdq_core *,
struct tpacket3_hdr *);
static void packet_flush_mclist(struct sock *sk);
-struct packet_fanout;
-struct packet_sock {
- /* struct sock has to be the first member of packet_sock */
- struct sock sk;
- struct packet_fanout *fanout;
- struct tpacket_stats stats;
- union tpacket_stats_u stats_u;
- struct packet_ring_buffer rx_ring;
- struct packet_ring_buffer tx_ring;
- int copy_thresh;
- spinlock_t bind_lock;
- struct mutex pg_vec_lock;
- unsigned int running:1, /* prot_hook is attached*/
- auxdata:1,
- origdev:1,
- has_vnet_hdr:1;
- int ifindex; /* bound device */
- __be16 num;
- struct packet_mclist *mclist;
- atomic_t mapped;
- enum tpacket_versions tp_version;
- unsigned int tp_hdrlen;
- unsigned int tp_reserve;
- unsigned int tp_loss:1;
- unsigned int tp_tstamp;
- struct packet_type prot_hook ____cacheline_aligned_in_smp;
-};
-
-#define PACKET_FANOUT_MAX 256
-
-struct packet_fanout {
-#ifdef CONFIG_NET_NS
- struct net *net;
-#endif
- unsigned int num_members;
- u16 id;
- u8 type;
- u8 defrag;
- atomic_t rr_cur;
- struct list_head list;
- struct sock *arr[PACKET_FANOUT_MAX];
- spinlock_t lock;
- atomic_t sk_ref;
- struct packet_type prot_hook ____cacheline_aligned_in_smp;
-};
-
struct packet_skb_cb {
unsigned int origlen;
union {
@@ -334,11 +226,6 @@ struct packet_skb_cb {
(((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \
((x)->kactive_blk_num+1) : 0)
-static struct packet_sock *pkt_sk(struct sock *sk)
-{
- return (struct packet_sock *)sk;
-}
-
static void __fanout_unlink(struct sock *sk, struct packet_sock *po);
static void __fanout_link(struct sock *sk, struct packet_sock *po);
@@ -968,7 +855,8 @@ static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc,
ppd->hv1.tp_vlan_tci = vlan_tx_tag_get(pkc->skb);
ppd->tp_status = TP_STATUS_VLAN_VALID;
} else {
- ppd->hv1.tp_vlan_tci = ppd->tp_status = 0;
+ ppd->hv1.tp_vlan_tci = 0;
+ ppd->tp_status = TP_STATUS_AVAILABLE;
}
}
@@ -1243,7 +1131,8 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
}
-static DEFINE_MUTEX(fanout_mutex);
+DEFINE_MUTEX(fanout_mutex);
+EXPORT_SYMBOL_GPL(fanout_mutex);
static LIST_HEAD(fanout_list);
static void __fanout_link(struct sock *sk, struct packet_sock *po)
@@ -1364,9 +1253,9 @@ static void fanout_release(struct sock *sk)
if (!f)
return;
+ mutex_lock(&fanout_mutex);
po->fanout = NULL;
- mutex_lock(&fanout_mutex);
if (atomic_dec_and_test(&f->sk_ref)) {
list_del(&f->list);
dev_remove_pack(&f->prot_hook);
@@ -2063,7 +1952,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
int tp_len, size_max;
unsigned char *addr;
int len_sum = 0;
- int status = 0;
+ int status = TP_STATUS_AVAILABLE;
int hlen, tlen;
mutex_lock(&po->pg_vec_lock);
@@ -2428,10 +2317,13 @@ static int packet_release(struct socket *sock)
net = sock_net(sk);
po = pkt_sk(sk);
- spin_lock_bh(&net->packet.sklist_lock);
+ mutex_lock(&net->packet.sklist_lock);
sk_del_node_init_rcu(sk);
+ mutex_unlock(&net->packet.sklist_lock);
+
+ preempt_disable();
sock_prot_inuse_add(net, sk->sk_prot, -1);
- spin_unlock_bh(&net->packet.sklist_lock);
+ preempt_enable();
spin_lock(&po->bind_lock);
unregister_prot_hook(sk, false);
@@ -2630,10 +2522,13 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
register_prot_hook(sk);
}
- spin_lock_bh(&net->packet.sklist_lock);
+ mutex_lock(&net->packet.sklist_lock);
sk_add_node_rcu(sk, &net->packet.sklist);
+ mutex_unlock(&net->packet.sklist_lock);
+
+ preempt_disable();
sock_prot_inuse_add(net, &packet_proto, 1);
- spin_unlock_bh(&net->packet.sklist_lock);
+ preempt_enable();
return 0;
out:
@@ -3886,7 +3781,7 @@ static const struct file_operations packet_seq_fops = {
static int __net_init packet_net_init(struct net *net)
{
- spin_lock_init(&net->packet.sklist_lock);
+ mutex_init(&net->packet.sklist_lock);
INIT_HLIST_HEAD(&net->packet.sklist);
if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))