summaryrefslogtreecommitdiff
path: root/net/ipv4/udp.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/udp.c')
-rw-r--r--net/ipv4/udp.c165
1 files changed, 103 insertions, 62 deletions
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 0fa9f70e4b1..d5e75e97651 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -106,7 +106,7 @@
#include <net/xfrm.h>
#include "udp_impl.h"
-struct udp_table udp_table;
+struct udp_table udp_table __read_mostly;
EXPORT_SYMBOL(udp_table);
int sysctl_udp_mem[3] __read_mostly;
@@ -121,14 +121,16 @@ EXPORT_SYMBOL(sysctl_udp_wmem_min);
atomic_t udp_memory_allocated;
EXPORT_SYMBOL(udp_memory_allocated);
-#define PORTS_PER_CHAIN (65536 / UDP_HTABLE_SIZE)
+#define MAX_UDP_PORTS 65536
+#define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN)
static int udp_lib_lport_inuse(struct net *net, __u16 num,
const struct udp_hslot *hslot,
unsigned long *bitmap,
struct sock *sk,
int (*saddr_comp)(const struct sock *sk1,
- const struct sock *sk2))
+ const struct sock *sk2),
+ unsigned int log)
{
struct sock *sk2;
struct hlist_nulls_node *node;
@@ -142,8 +144,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
|| sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
(*saddr_comp)(sk, sk2)) {
if (bitmap)
- __set_bit(sk2->sk_hash / UDP_HTABLE_SIZE,
- bitmap);
+ __set_bit(sk2->sk_hash >> log, bitmap);
else
return 1;
}
@@ -180,13 +181,15 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
/*
* force rand to be an odd multiple of UDP_HTABLE_SIZE
*/
- rand = (rand | 1) * UDP_HTABLE_SIZE;
- for (last = first + UDP_HTABLE_SIZE; first != last; first++) {
- hslot = &udptable->hash[udp_hashfn(net, first)];
+ rand = (rand | 1) * (udptable->mask + 1);
+ for (last = first + udptable->mask + 1;
+ first != last;
+ first++) {
+ hslot = udp_hashslot(udptable, net, first);
bitmap_zero(bitmap, PORTS_PER_CHAIN);
spin_lock_bh(&hslot->lock);
udp_lib_lport_inuse(net, snum, hslot, bitmap, sk,
- saddr_comp);
+ saddr_comp, udptable->log);
snum = first;
/*
@@ -196,7 +199,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
*/
do {
if (low <= snum && snum <= high &&
- !test_bit(snum / UDP_HTABLE_SIZE, bitmap))
+ !test_bit(snum >> udptable->log, bitmap))
goto found;
snum += rand;
} while (snum != first);
@@ -204,13 +207,14 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
}
goto fail;
} else {
- hslot = &udptable->hash[udp_hashfn(net, snum)];
+ hslot = udp_hashslot(udptable, net, snum);
spin_lock_bh(&hslot->lock);
- if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, saddr_comp))
+ if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk,
+ saddr_comp, 0))
goto fail_unlock;
}
found:
- inet_sk(sk)->num = snum;
+ inet_sk(sk)->inet_num = snum;
sk->sk_hash = snum;
if (sk_unhashed(sk)) {
sk_nulls_add_node_rcu(sk, &hslot->head);
@@ -229,8 +233,8 @@ static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
return (!ipv6_only_sock(sk2) &&
- (!inet1->rcv_saddr || !inet2->rcv_saddr ||
- inet1->rcv_saddr == inet2->rcv_saddr));
+ (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr ||
+ inet1->inet_rcv_saddr == inet2->inet_rcv_saddr));
}
int udp_v4_get_port(struct sock *sk, unsigned short snum)
@@ -249,18 +253,18 @@ static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr,
struct inet_sock *inet = inet_sk(sk);
score = (sk->sk_family == PF_INET ? 1 : 0);
- if (inet->rcv_saddr) {
- if (inet->rcv_saddr != daddr)
+ if (inet->inet_rcv_saddr) {
+ if (inet->inet_rcv_saddr != daddr)
return -1;
score += 2;
}
- if (inet->daddr) {
- if (inet->daddr != saddr)
+ if (inet->inet_daddr) {
+ if (inet->inet_daddr != saddr)
return -1;
score += 2;
}
- if (inet->dport) {
- if (inet->dport != sport)
+ if (inet->inet_dport) {
+ if (inet->inet_dport != sport)
return -1;
score += 2;
}
@@ -283,7 +287,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
struct sock *sk, *result;
struct hlist_nulls_node *node;
unsigned short hnum = ntohs(dport);
- unsigned int hash = udp_hashfn(net, hnum);
+ unsigned int hash = udp_hashfn(net, hnum, udptable->mask);
struct udp_hslot *hslot = &udptable->hash[hash];
int score, badness;
@@ -356,9 +360,10 @@ static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk,
if (!net_eq(sock_net(s), net) ||
s->sk_hash != hnum ||
- (inet->daddr && inet->daddr != rmt_addr) ||
- (inet->dport != rmt_port && inet->dport) ||
- (inet->rcv_saddr && inet->rcv_saddr != loc_addr) ||
+ (inet->inet_daddr && inet->inet_daddr != rmt_addr) ||
+ (inet->inet_dport != rmt_port && inet->inet_dport) ||
+ (inet->inet_rcv_saddr &&
+ inet->inet_rcv_saddr != loc_addr) ||
ipv6_only_sock(s) ||
(s->sk_bound_dev_if && s->sk_bound_dev_if != dif))
continue;
@@ -642,14 +647,14 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
} else {
if (sk->sk_state != TCP_ESTABLISHED)
return -EDESTADDRREQ;
- daddr = inet->daddr;
- dport = inet->dport;
+ daddr = inet->inet_daddr;
+ dport = inet->inet_dport;
/* Open fast path for connected socket.
Route will not be used, if at least one option is set.
*/
connected = 1;
}
- ipc.addr = inet->saddr;
+ ipc.addr = inet->inet_saddr;
ipc.oif = sk->sk_bound_dev_if;
err = sock_tx_timestamp(msg, sk, &ipc.shtx);
@@ -704,7 +709,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
.proto = sk->sk_protocol,
.flags = inet_sk_flowi_flags(sk),
.uli_u = { .ports =
- { .sport = inet->sport,
+ { .sport = inet->inet_sport,
.dport = dport } } };
struct net *net = sock_net(sk);
@@ -748,7 +753,7 @@ back_from_confirm:
inet->cork.fl.fl4_dst = daddr;
inet->cork.fl.fl_ip_dport = dport;
inet->cork.fl.fl4_src = saddr;
- inet->cork.fl.fl_ip_sport = inet->sport;
+ inet->cork.fl.fl_ip_sport = inet->inet_sport;
up->pending = AF_INET;
do_append_data:
@@ -862,6 +867,7 @@ static unsigned int first_packet_length(struct sock *sk)
udp_lib_checksum_complete(skb)) {
UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS,
IS_UDPLITE(sk));
+ atomic_inc(&sk->sk_drops);
__skb_unlink(skb, rcvq);
__skb_queue_tail(&list_kill, skb);
}
@@ -982,7 +988,7 @@ try_again:
UDP_INC_STATS_USER(sock_net(sk),
UDP_MIB_INDATAGRAMS, is_udplite);
- sock_recv_timestamp(msg, sk, skb);
+ sock_recv_ts_and_drops(msg, sk, skb);
/* Copy the address. */
if (sin) {
@@ -1023,15 +1029,15 @@ int udp_disconnect(struct sock *sk, int flags)
*/
sk->sk_state = TCP_CLOSE;
- inet->daddr = 0;
- inet->dport = 0;
+ inet->inet_daddr = 0;
+ inet->inet_dport = 0;
sk->sk_bound_dev_if = 0;
if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
inet_reset_saddr(sk);
if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) {
sk->sk_prot->unhash(sk);
- inet->sport = 0;
+ inet->inet_sport = 0;
}
sk_dst_reset(sk);
return 0;
@@ -1042,12 +1048,12 @@ void udp_lib_unhash(struct sock *sk)
{
if (sk_hashed(sk)) {
struct udp_table *udptable = sk->sk_prot->h.udp_table;
- unsigned int hash = udp_hashfn(sock_net(sk), sk->sk_hash);
- struct udp_hslot *hslot = &udptable->hash[hash];
+ struct udp_hslot *hslot = udp_hashslot(udptable, sock_net(sk),
+ sk->sk_hash);
spin_lock_bh(&hslot->lock);
if (sk_nulls_del_node_init_rcu(sk)) {
- inet_sk(sk)->num = 0;
+ inet_sk(sk)->inet_num = 0;
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
}
spin_unlock_bh(&hslot->lock);
@@ -1057,25 +1063,22 @@ EXPORT_SYMBOL(udp_lib_unhash);
static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
- int is_udplite = IS_UDPLITE(sk);
- int rc;
+ int rc = sock_queue_rcv_skb(sk, skb);
+
+ if (rc < 0) {
+ int is_udplite = IS_UDPLITE(sk);
- if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) {
/* Note that an ENOMEM error is charged twice */
- if (rc == -ENOMEM) {
+ if (rc == -ENOMEM)
UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
is_udplite);
- atomic_inc(&sk->sk_drops);
- }
- goto drop;
+ UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
+ kfree_skb(skb);
+ return -1;
}
return 0;
-drop:
- UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
- kfree_skb(skb);
- return -1;
}
/* returns:
@@ -1182,6 +1185,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
drop:
UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
+ atomic_inc(&sk->sk_drops);
kfree_skb(skb);
return -1;
}
@@ -1198,7 +1202,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
struct udp_table *udptable)
{
struct sock *sk;
- struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))];
+ struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest));
int dif;
spin_lock(&hslot->lock);
@@ -1620,9 +1624,14 @@ static struct sock *udp_get_first(struct seq_file *seq, int start)
struct udp_iter_state *state = seq->private;
struct net *net = seq_file_net(seq);
- for (state->bucket = start; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
+ for (state->bucket = start; state->bucket <= state->udp_table->mask;
+ ++state->bucket) {
struct hlist_nulls_node *node;
struct udp_hslot *hslot = &state->udp_table->hash[state->bucket];
+
+ if (hlist_nulls_empty(&hslot->head))
+ continue;
+
spin_lock_bh(&hslot->lock);
sk_nulls_for_each(sk, node, &hslot->head) {
if (!net_eq(sock_net(sk), net))
@@ -1647,7 +1656,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
} while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
if (!sk) {
- if (state->bucket < UDP_HTABLE_SIZE)
+ if (state->bucket <= state->udp_table->mask)
spin_unlock_bh(&state->udp_table->hash[state->bucket].lock);
return udp_get_first(seq, state->bucket + 1);
}
@@ -1667,7 +1676,7 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos)
static void *udp_seq_start(struct seq_file *seq, loff_t *pos)
{
struct udp_iter_state *state = seq->private;
- state->bucket = UDP_HTABLE_SIZE;
+ state->bucket = MAX_UDP_PORTS;
return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
}
@@ -1689,7 +1698,7 @@ static void udp_seq_stop(struct seq_file *seq, void *v)
{
struct udp_iter_state *state = seq->private;
- if (state->bucket < UDP_HTABLE_SIZE)
+ if (state->bucket <= state->udp_table->mask)
spin_unlock_bh(&state->udp_table->hash[state->bucket].lock);
}
@@ -1744,12 +1753,12 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f,
int bucket, int *len)
{
struct inet_sock *inet = inet_sk(sp);
- __be32 dest = inet->daddr;
- __be32 src = inet->rcv_saddr;
- __u16 destp = ntohs(inet->dport);
- __u16 srcp = ntohs(inet->sport);
+ __be32 dest = inet->inet_daddr;
+ __be32 src = inet->inet_rcv_saddr;
+ __u16 destp = ntohs(inet->inet_dport);
+ __u16 srcp = ntohs(inet->inet_sport);
- seq_printf(f, "%4d: %08X:%04X %08X:%04X"
+ seq_printf(f, "%5d: %08X:%04X %08X:%04X"
" %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n",
bucket, src, srcp, dest, destp, sp->sk_state,
sk_wmem_alloc_get(sp),
@@ -1815,11 +1824,43 @@ void udp4_proc_exit(void)
}
#endif /* CONFIG_PROC_FS */
-void __init udp_table_init(struct udp_table *table)
+static __initdata unsigned long uhash_entries;
+static int __init set_uhash_entries(char *str)
{
- int i;
+ if (!str)
+ return 0;
+ uhash_entries = simple_strtoul(str, &str, 0);
+ if (uhash_entries && uhash_entries < UDP_HTABLE_SIZE_MIN)
+ uhash_entries = UDP_HTABLE_SIZE_MIN;
+ return 1;
+}
+__setup("uhash_entries=", set_uhash_entries);
- for (i = 0; i < UDP_HTABLE_SIZE; i++) {
+void __init udp_table_init(struct udp_table *table, const char *name)
+{
+ unsigned int i;
+
+ if (!CONFIG_BASE_SMALL)
+ table->hash = alloc_large_system_hash(name,
+ sizeof(struct udp_hslot),
+ uhash_entries,
+ 21, /* one slot per 2 MB */
+ 0,
+ &table->log,
+ &table->mask,
+ 64 * 1024);
+ /*
+ * Make sure hash table has the minimum size
+ */
+ if (CONFIG_BASE_SMALL || table->mask < UDP_HTABLE_SIZE_MIN - 1) {
+ table->hash = kmalloc(UDP_HTABLE_SIZE_MIN *
+ sizeof(struct udp_hslot), GFP_KERNEL);
+ if (!table->hash)
+ panic(name);
+ table->log = ilog2(UDP_HTABLE_SIZE_MIN);
+ table->mask = UDP_HTABLE_SIZE_MIN - 1;
+ }
+ for (i = 0; i <= table->mask; i++) {
INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i);
spin_lock_init(&table->hash[i].lock);
}
@@ -1829,7 +1870,7 @@ void __init udp_init(void)
{
unsigned long nr_pages, limit;
- udp_table_init(&udp_table);
+ udp_table_init(&udp_table, "UDP");
/* Set the pressure threshold up by the same strategy of TCP. It is a
* fraction of global memory that is up to 1/2 at 256 MB, decreasing
* toward zero with the amount of memory, with a floor of 128 pages.