diff options
44 files changed, 488 insertions, 385 deletions
diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index 7c5e9817e99..8f86d6b621c 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -176,7 +176,7 @@ extern void icmpv6_send(struct sk_buff *skb, __u32 info, struct net_device *dev); -extern int icmpv6_init(struct net_proto_family *ops); +extern int icmpv6_init(void); extern int icmpv6_err_convert(int type, int code, int *err); extern void icmpv6_cleanup(void); diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index e31b8c84f2c..0c82c80b277 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -113,7 +113,8 @@ enum { XFRM_POLICY_TYPE_MAIN = 0, XFRM_POLICY_TYPE_SUB = 1, - XFRM_POLICY_TYPE_MAX = 2 + XFRM_POLICY_TYPE_MAX = 2, + XFRM_POLICY_TYPE_ANY = 255 }; enum diff --git a/include/net/icmp.h b/include/net/icmp.h index 9f7ef3c8bae..faba64db8ff 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -48,7 +48,7 @@ struct sk_buff; extern void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info); extern int icmp_rcv(struct sk_buff *skb); extern int icmp_ioctl(struct sock *sk, int cmd, unsigned long arg); -extern void icmp_init(struct net_proto_family *ops); +extern int icmp_init(void); extern void icmp_out_count(unsigned char type); /* Move into dst.h ? */ diff --git a/include/net/llc_if.h b/include/net/llc_if.h index c608812a8e8..b595a004d31 100644 --- a/include/net/llc_if.h +++ b/include/net/llc_if.h @@ -74,11 +74,6 @@ static inline int llc_mac_null(const u8 *mac) return is_zero_ether_addr(mac); } -static inline int llc_addrany(const struct llc_addr *addr) -{ - return llc_mac_null(addr->mac) && !addr->lsap; -} - static inline int llc_mac_multicast(const u8 *mac) { return is_multicast_ether_addr(mac); diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 59b70624b05..5aedf324de6 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -77,7 +77,7 @@ struct nd_opt_hdr { } __attribute__((__packed__)); -extern int ndisc_init(struct net_proto_family *ops); +extern int ndisc_init(void); extern void ndisc_cleanup(void); @@ -107,7 +107,7 @@ extern int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *d /* * IGMP */ -extern int igmp6_init(struct net_proto_family *ops); +extern int igmp6_init(void); extern void igmp6_cleanup(void); diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index a9b4f608629..504fde17452 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -26,6 +26,8 @@ struct netns_ipv4 { struct hlist_head *fib_table_hash; struct sock *fibnl; + struct sock **icmp_sk; + struct netns_frags frags; #ifdef CONFIG_NETFILTER struct xt_table *iptable_filter; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 1dd7de4e419..82623d3a8e3 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -36,5 +36,6 @@ struct netns_ipv6 { struct xt_table *ip6table_mangle; struct xt_table *ip6table_raw; #endif + struct sock **icmp_sk; }; #endif diff --git a/include/net/request_sock.h b/include/net/request_sock.h index cff4608179c..040780add35 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -31,8 +31,7 @@ struct request_sock_ops { int obj_size; struct kmem_cache *slab; int (*rtx_syn_ack)(struct sock *sk, - struct request_sock *req, - struct dst_entry *dst); + struct request_sock *req); void (*send_ack)(struct sk_buff *skb, struct request_sock *req); void (*send_reset)(struct sock *sk, diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 57df27f1958..a653eb3e1e7 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -368,11 +368,6 @@ void sctp_sysctl_unregister(void); #else static inline void sctp_sysctl_register(void) { return; } static inline void sctp_sysctl_unregister(void) { return; } -static inline int sctp_sysctl_jiffies_ms(ctl_table *table, int __user *name, int nlen, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen) { - return -ENOSYS; -} #endif /* Size of Supported Address Parameter for 'x' address types. */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 9c827a749b6..8966599ddb9 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1661,6 +1661,9 @@ struct sctp_association { /* Transport to which SHUTDOWN chunk was last sent. */ struct sctp_transport *shutdown_last_sent_to; + /* How many times have we resent a SHUTDOWN */ + int shutdown_retries; + /* Transport to which INIT chunk was last sent. */ struct sctp_transport *init_last_sent_to; @@ -1695,6 +1698,11 @@ struct sctp_association { */ __u16 unack_data; + /* The total number of data chunks that we've had to retransmit + * as the result of a T3 timer expiration + */ + __u32 rtx_data_chunks; + /* This is the association's receive buffer space. This value is used * to set a_rwnd field in an INIT or a SACK chunk. */ diff --git a/include/net/sock.h b/include/net/sock.h index fd987608765..39112e75411 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -850,6 +850,7 @@ extern struct sock *sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot); extern void sk_free(struct sock *sk); +extern void sk_release_kernel(struct sock *sk); extern struct sock *sk_clone(const struct sock *sk, const gfp_t priority); @@ -1333,6 +1334,18 @@ static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, int copied_e } #endif +/* + * Kernel sockets, f.e. rtnl or icmp_socket, are a part of a namespace. + * They should not hold a referrence to a namespace in order to allow + * to stop it. + * Sockets after sk_change_net should be released using sk_release_kernel + */ +static inline void sk_change_net(struct sock *sk, struct net *net) +{ + put_net(sk->sk_net); + sk->sk_net = net; +} + extern void sock_enable_timestamp(struct sock *sk); extern int sock_get_timestamp(struct sock *, struct timeval __user *); extern int sock_get_timestampns(struct sock *, struct timespec __user *); diff --git a/include/net/tcp.h b/include/net/tcp.h index 7de4ea3a04d..ae9774b478f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1373,7 +1373,7 @@ struct tcp_request_sock_ops { #endif }; -extern void tcp_v4_init(struct net_proto_family *ops); +extern void tcp_v4_init(void); extern void tcp_init(void); #endif /* _TCP_H */ diff --git a/include/net/xfrm.h b/include/net/xfrm.h index eea7785cc75..9b620566519 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -121,6 +121,7 @@ extern struct mutex xfrm_cfg_mutex; struct xfrm_state { /* Note: bydst is re-used during gc */ + struct list_head all; struct hlist_node bydst; struct hlist_node bysrc; struct hlist_node byspi; @@ -424,6 +425,7 @@ struct xfrm_tmpl struct xfrm_policy { struct xfrm_policy *next; + struct list_head bytype; struct hlist_node bydst; struct hlist_node byidx; @@ -1160,6 +1162,18 @@ struct xfrm6_tunnel { int priority; }; +struct xfrm_state_walk { + struct xfrm_state *state; + int count; + u8 proto; +}; + +struct xfrm_policy_walk { + struct xfrm_policy *policy; + int count; + u8 type, cur_type; +}; + extern void xfrm_init(void); extern void xfrm4_init(void); extern void xfrm_state_init(void); @@ -1184,7 +1198,23 @@ static inline void xfrm6_fini(void) extern int xfrm_proc_init(void); #endif -extern int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), void *); +static inline void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto) +{ + walk->proto = proto; + walk->state = NULL; + walk->count = 0; +} + +static inline void xfrm_state_walk_done(struct xfrm_state_walk *walk) +{ + if (walk->state != NULL) { + xfrm_state_put(walk->state); + walk->state = NULL; + } +} + +extern int xfrm_state_walk(struct xfrm_state_walk *walk, + int (*func)(struct xfrm_state *, int, void*), void *); extern struct xfrm_state *xfrm_state_alloc(void); extern struct xfrm_state *xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, struct flowi *fl, struct xfrm_tmpl *tmpl, @@ -1306,7 +1336,25 @@ static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) #endif struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp); -extern int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*), void *); + +static inline void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type) +{ + walk->cur_type = XFRM_POLICY_TYPE_MAIN; + walk->type = type; + walk->policy = NULL; + walk->count = 0; +} + +static inline void xfrm_policy_walk_done(struct xfrm_policy_walk *walk) +{ + if (walk->policy != NULL) { + xfrm_pol_put(walk->policy); + walk->policy = NULL; + } +} + +extern int xfrm_policy_walk(struct xfrm_policy_walk *walk, + int (*func)(struct xfrm_policy *, int, int, void*), void *); int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl); struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, struct xfrm_selector *sel, diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 18058bbc796..61166f66479 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -1033,25 +1033,8 @@ static const struct seq_operations aarp_seq_ops = { static int aarp_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct aarp_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - - rc = seq_open(file, &aarp_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; - memset(s, 0, sizeof(*s)); -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &aarp_seq_ops, + sizeof(struct aarp_iter_state)); } const struct file_operations atalk_seq_arp_fops = { diff --git a/net/atm/lec.c b/net/atm/lec.c index 0e450d12f03..e2d800d818e 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -1169,32 +1169,7 @@ static const struct seq_operations lec_seq_ops = { static int lec_seq_open(struct inode *inode, struct file *file) { - struct lec_state *state; - struct seq_file *seq; - int rc = -EAGAIN; - - state = kmalloc(sizeof(*state), GFP_KERNEL); - if (!state) { - rc = -ENOMEM; - goto out; - } - - rc = seq_open(file, &lec_seq_ops); - if (rc) - goto out_kfree; - seq = file->private_data; - seq->private = state; -out: - return rc; - -out_kfree: - kfree(state); - goto out; -} - -static int lec_seq_release(struct inode *inode, struct file *file) -{ - return seq_release_private(inode, file); + return seq_open_private(file, &lec_seq_ops, sizeof(struct lec_state)); } static const struct file_operations lec_seq_fops = { @@ -1202,7 +1177,7 @@ static const struct file_operations lec_seq_fops = { .open = lec_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = lec_seq_release, + .release = seq_release_private, }; #endif diff --git a/net/atm/proc.c b/net/atm/proc.c index e9693aed7ef..b995b66b558 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -114,31 +114,13 @@ static int __vcc_seq_open(struct inode *inode, struct file *file, int family, const struct seq_operations *ops) { struct vcc_state *state; - struct seq_file *seq; - int rc = -ENOMEM; - state = kmalloc(sizeof(*state), GFP_KERNEL); - if (!state) - goto out; - - rc = seq_open(file, ops); - if (rc) - goto out_kfree; + state = __seq_open_private(file, ops, sizeof(*state)); + if (state == NULL) + return -ENOMEM; state->family = family; - - seq = file->private_data; - seq->private = state; -out: - return rc; -out_kfree: - kfree(state); - goto out; -} - -static int vcc_seq_release(struct inode *inode, struct file *file) -{ - return seq_release_private(inode, file); + return 0; } static void *vcc_seq_start(struct seq_file *seq, loff_t *pos) @@ -314,7 +296,7 @@ static const struct file_operations pvc_seq_fops = { .open = pvc_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = vcc_seq_release, + .release = seq_release_private, }; static int vcc_seq_show(struct seq_file *seq, void *v) @@ -348,7 +330,7 @@ static const struct file_operations vcc_seq_fops = { .open = vcc_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = vcc_seq_release, + .release = seq_release_private, }; static int svc_seq_show(struct seq_file *seq, void *v) @@ -383,7 +365,7 @@ static const struct file_operations svc_seq_fops = { .open = svc_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = vcc_seq_release, + .release = seq_release_private, }; static ssize_t proc_dev_atm_read(struct file *file, char __user *buf, diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index cec582563e0..f8a3455f449 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -156,39 +156,14 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from) EXPORT_SYMBOL(dev_mc_unsync); #ifdef CONFIG_PROC_FS -static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(dev_base_lock) -{ - struct net *net = seq_file_net(seq); - struct net_device *dev; - loff_t off = 0; - - read_lock(&dev_base_lock); - for_each_netdev(net, dev) { - if (off++ == *pos) - return dev; - } - return NULL; -} - -static void *dev_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - ++*pos; - return next_net_device((struct net_device *)v); -} - -static void dev_mc_seq_stop(struct seq_file *seq, void *v) - __releases(dev_base_lock) -{ - read_unlock(&dev_base_lock); -} - - static int dev_mc_seq_show(struct seq_file *seq, void *v) { struct dev_addr_list *m; struct net_device *dev = v; + if (v == SEQ_START_TOKEN) + return 0; + netif_tx_lock_bh(dev); for (m = dev->mc_list; m; m = m->next) { int i; @@ -206,9 +181,9 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v) } static const struct seq_operations dev_mc_seq_ops = { - .start = dev_mc_seq_start, - .next = dev_mc_seq_next, - .stop = dev_mc_seq_stop, + .start = dev_seq_start, + .next = dev_seq_next, + .stop = dev_seq_stop, .show = dev_mc_seq_show, }; diff --git a/net/core/sock.c b/net/core/sock.c index 09cb3a74de7..0ca06973802 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -987,6 +987,25 @@ void sk_free(struct sock *sk) sk_prot_free(sk->sk_prot_creator, sk); } +/* + * Last sock_put should drop referrence to sk->sk_net. It has already + * been dropped in sk_change_net. Taking referrence to stopping namespace + * is not an option. + * Take referrence to a socket to remove it from hash _alive_ and after that + * destroy it in the context of init_net. + */ +void sk_release_kernel(struct sock *sk) +{ + if (sk == NULL || sk->sk_socket == NULL) + return; + + sock_hold(sk); + sock_release(sk->sk_socket); + sk->sk_net = get_net(&init_net); + sock_put(sk); +} +EXPORT_SYMBOL(sk_release_kernel); + struct sock *sk_clone(const struct sock *sk, const gfp_t priority) { struct sock *newsk; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 474075adbde..514a40b7fc7 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -471,15 +471,14 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk, return &rt->u.dst; } -static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, - struct dst_entry *dst) +static int dccp_v4_send_response(struct sock *sk, struct request_sock *req) { int err = -1; struct sk_buff *skb; + struct dst_entry *dst; - /* First, grab a route. */ - - if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) + dst = inet_csk_route_req(sk, req); + if (dst == NULL) goto out; skb = dccp_make_response(sk, dst, req); @@ -620,7 +619,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) dreq->dreq_iss = dccp_v4_init_sequence(skb); dreq->dreq_service = service; - if (dccp_v4_send_response(sk, req, NULL)) + if (dccp_v4_send_response(sk, req)) goto drop_and_free; inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 490333d47c7..1a5e50b9067 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -224,8 +224,7 @@ out: } -static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, - struct dst_entry *dst) +static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) { struct inet6_request_sock *ireq6 = inet6_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); @@ -234,6 +233,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, struct in6_addr *final_p = NULL, final; struct flowi fl; int err = -1; + struct dst_entry *dst; memset(&fl, 0, sizeof(fl)); fl.proto = IPPROTO_DCCP; @@ -245,28 +245,26 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, fl.fl_ip_sport = inet_sk(sk)->sport; security_req_classify_flow(req, &fl); - if (dst == NULL) { - opt = np->opt; + opt = np->opt; - if (opt != NULL && opt->srcrt != NULL) { - const struct rt0_hdr *rt0 = (struct rt0_hdr *)opt->srcrt; + if (opt != NULL && opt->srcrt != NULL) { + const struct rt0_hdr *rt0 = (struct rt0_hdr *)opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } + ipv6_addr_copy(&final, &fl.fl6_dst); + ipv6_addr_copy(&fl.fl6_dst, rt0->addr); + final_p = &final; + } - err = ip6_dst_lookup(sk, &dst, &fl); - if (err) - goto done; + err = ip6_dst_lookup(sk, &dst, &fl); + if (err) + goto done; - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); + if (final_p) + ipv6_addr_copy(&fl.fl6_dst, final_p); - err = xfrm_lookup(&dst, &fl, sk, 0); - if (err < 0) - goto done; - } + err = xfrm_lookup(&dst, &fl, sk, 0); + if (err < 0) + goto done; skb = dccp_make_response(sk, dst, req); if (skb != NULL) { @@ -448,7 +446,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) dreq->dreq_iss = dccp_v6_init_sequence(skb); dreq->dreq_service = service; - if (dccp_v6_send_response(sk, req, NULL)) + if (dccp_v6_send_response(sk, req)) goto drop_and_free; inet6_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 027d1814e1a..33ad48321b0 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -216,7 +216,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, * counter (backoff, monitored by dccp_response_timer). */ req->retrans++; - req->rsk_ops->rtx_syn_ack(sk, req, NULL); + req->rsk_ops->rtx_syn_ack(sk, req); } /* Network Duplicate, discard packet */ return NULL; diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index acd48ee522d..23fd95a7ad1 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -2320,25 +2320,8 @@ static const struct seq_operations dn_socket_seq_ops = { static int dn_socket_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct dn_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - - rc = seq_open(file, &dn_socket_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; - memset(s, 0, sizeof(*s)); -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &dn_socket_seq_ops, + sizeof(struct dn_iter_state)); } static const struct file_operations dn_socket_seq_fops = { diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index c270080f370..4f539bd4871 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1415,7 +1415,7 @@ static int __init inet_init(void) ip_init(); - tcp_v4_init(&inet_family_ops); + tcp_v4_init(); /* Setup TCP slab cache for open requests. */ tcp_init(); @@ -1430,7 +1430,8 @@ static int __init inet_init(void) * Set the ICMP layer up */ - icmp_init(&inet_family_ops); + if (icmp_init() < 0) + panic("Failed to create the ICMP control socket.\n"); /* * Initialise the multicast router diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index a13c074dac0..b51f4b0a326 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -229,14 +229,16 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1]; * * On SMP we have one ICMP socket per-cpu. */ -static DEFINE_PER_CPU(struct socket *, __icmp_socket) = NULL; -#define icmp_socket __get_cpu_var(__icmp_socket) +static struct sock *icmp_sk(struct net *net) +{ + return net->ipv4.icmp_sk[smp_processor_id()]; +} -static inline int icmp_xmit_lock(void) +static inline int icmp_xmit_lock(struct sock *sk) { local_bh_disable(); - if (unlikely(!spin_trylock(&icmp_socket->sk->sk_lock.slock))) { + if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { /* This can happen if the output path signals a * dst_link_failure() for an outgoing ICMP packet. */ @@ -246,9 +248,9 @@ static inline int icmp_xmit_lock(void) return 0; } -static inline void icmp_xmit_unlock(void) +static inline void icmp_xmit_unlock(struct sock *sk) { - spin_unlock_bh(&icmp_socket->sk->sk_lock.slock); + spin_unlock_bh(&sk->sk_lock.slock); } /* @@ -346,19 +348,21 @@ static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd, static void icmp_push_reply(struct icmp_bxm *icmp_param, struct ipcm_cookie *ipc, struct rtable *rt) { + struct sock *sk; struct sk_buff *skb; - if (ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param, + sk = icmp_sk(rt->u.dst.dev->nd_net); + if (ip_append_data(sk, icmp_glue_bits, icmp_param, icmp_param->data_len+icmp_param->head_len, icmp_param->head_len, ipc, rt, MSG_DONTWAIT) < 0) - ip_flush_pending_frames(icmp_socket->sk); - else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) { + ip_flush_pending_frames(sk); + else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { struct icmphdr *icmph = icmp_hdr(skb); __wsum csum = 0; struct sk_buff *skb1; - skb_queue_walk(&icmp_socket->sk->sk_write_queue, skb1) { + skb_queue_walk(&sk->sk_write_queue, skb1) { csum = csum_add(csum, skb1->csum); } csum = csum_partial_copy_nocheck((void *)&icmp_param->data, @@ -366,7 +370,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, icmp_param->head_len, csum); icmph->checksum = csum_fold(csum); skb->ip_summed = CHECKSUM_NONE; - ip_push_pending_frames(icmp_socket->sk); + ip_push_pending_frames(sk); } } @@ -376,16 +380,17 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) { - struct sock *sk = icmp_socket->sk; - struct inet_sock *inet = inet_sk(sk); struct ipcm_cookie ipc; struct rtable *rt = (struct rtable *)skb->dst; + struct net *net = rt->u.dst.dev->nd_net; + struct sock *sk = icmp_sk(net); + struct inet_sock *inet = inet_sk(sk); __be32 daddr; if (ip_options_echo(&icmp_param->replyopts, skb)) return; - if (icmp_xmit_lock()) + if (icmp_xmit_lock(sk)) return; icmp_param->data.icmph.checksum = 0; @@ -405,7 +410,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) .tos = RT_TOS(ip_hdr(skb)->tos) } }, .proto = IPPROTO_ICMP }; security_skb_classify_flow(skb, &fl); - if (ip_route_output_key(rt->u.dst.dev->nd_net, &rt, &fl)) + if (ip_route_output_key(net, &rt, &fl)) goto out_unlock; } if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type, @@ -413,7 +418,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) icmp_push_reply(icmp_param, &ipc, rt); ip_rt_put(rt); out_unlock: - icmp_xmit_unlock(); + icmp_xmit_unlock(sk); } @@ -438,10 +443,12 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) __be32 saddr; u8 tos; struct net *net; + struct sock *sk; if (!rt) goto out; net = rt->u.dst.dev->nd_net; + sk = icmp_sk(net); /* * Find the original header. It is expected to be valid, of course. @@ -505,7 +512,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) } } - if (icmp_xmit_lock()) + if (icmp_xmit_lock(sk)) return; /* @@ -544,7 +551,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) icmp_param.data.icmph.checksum = 0; icmp_param.skb = skb_in; icmp_param.offset = skb_network_offset(skb_in); - inet_sk(icmp_socket->sk)->tos = tos; + inet_sk(sk)->tos = tos; ipc.addr = iph->saddr; ipc.opt = &icmp_param.replyopts; @@ -652,7 +659,7 @@ route_done: ende: ip_rt_put(rt); out_unlock: - icmp_xmit_unlock(); + icmp_xmit_unlock(sk); out:; } @@ -1139,29 +1146,46 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = { }, }; -void __init icmp_init(struct net_proto_family *ops) +static void __net_exit icmp_sk_exit(struct net *net) { - struct inet_sock *inet; int i; - for_each_possible_cpu(i) { - int err; + for_each_possible_cpu(i) + sk_release_kernel(net->ipv4.icmp_sk[i]); + kfree(net->ipv4.icmp_sk); + net->ipv4.icmp_sk = NULL; +} - err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP, - &per_cpu(__icmp_socket, i)); +int __net_init icmp_sk_init(struct net *net) +{ + int i, err; + net->ipv4.icmp_sk = + kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL); + if (net->ipv4.icmp_sk == NULL) + return -ENOMEM; + + for_each_possible_cpu(i) { + struct sock *sk; + struct socket *sock; + struct inet_sock *inet; + + err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP, &sock); if (err < 0) - panic("Failed to create the ICMP control socket.\n"); + goto fail; + + net->ipv4.icmp_sk[i] = sk = sock->sk; + sk_change_net(sk, net); - per_cpu(__icmp_socket, i)->sk->sk_allocation = GFP_ATOMIC; + sk->sk_allocation = GFP_ATOMIC; /* Enough space for 2 64K ICMP packets, including * sk_buff struct overhead. */ - per_cpu(__icmp_socket, i)->sk->sk_sndbuf = + sk->sk_sndbuf = (2 * ((64 * 1024) + sizeof(struct sk_buff))); - inet = inet_sk(per_cpu(__icmp_socket, i)->sk); + inet = inet_sk(sk); inet->uc_ttl = -1; inet->pmtudisc = IP_PMTUDISC_DONT; @@ -1169,8 +1193,23 @@ void __init icmp_init(struct net_proto_family *ops) * see it, we do not wish this socket to see incoming * packets. */ - per_cpu(__icmp_socket, i)->sk->sk_prot->unhash(per_cpu(__icmp_socket, i)->sk); + sk->sk_prot->unhash(sk); } + return 0; + +fail: + icmp_sk_exit(net); + return err; +} + +static struct pernet_operations __net_initdata icmp_sk_ops = { + .init = icmp_sk_init, + .exit = icmp_sk_exit, +}; + +int __init icmp_init(void) +{ + return register_pernet_device(&icmp_sk_ops); } EXPORT_SYMBOL(icmp_err_convert); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index b189278c7bc..c0e0fa03fce 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -463,7 +463,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, if (time_after_eq(now, req->expires)) { if ((req->retrans < thresh || (inet_rsk(req)->acked && req->retrans < max_retries)) - && !req->rsk_ops->rtx_syn_ack(parent, req, NULL)) { + && !req->rsk_ops->rtx_syn_ack(parent, req)) { unsigned long timeo; if (req->retrans++ == 0) diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 089252e82c0..9668c3a23ef 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -379,7 +379,7 @@ static const struct file_operations ct_cpu_seq_fops = { .open = ct_cpu_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release, }; int __init nf_conntrack_ipv4_compat_init(void) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 00156bf421c..3b26f9586dc 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -723,8 +723,8 @@ static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, * This still operates on a request_sock only, not on a big * socket. */ -static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, - struct dst_entry *dst) +static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req, + struct dst_entry *dst) { const struct inet_request_sock *ireq = inet_rsk(req); int err = -1; @@ -732,7 +732,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, /* First, grab a route. */ if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) - goto out; + return -1; skb = tcp_make_synack(sk, dst, req); @@ -751,11 +751,15 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, err = net_xmit_eval(err); } -out: dst_release(dst); return err; } +static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req) +{ + return __tcp_v4_send_synack(sk, req, NULL); +} + /* * IPv4 request_sock destructor. */ @@ -1380,7 +1384,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) } tcp_rsk(req)->snt_isn = isn; - if (tcp_v4_send_synack(sk, req, dst)) + if (__tcp_v4_send_synack(sk, req, dst)) goto drop_and_free; if (want_cookie) { @@ -2443,7 +2447,7 @@ struct proto tcp_prot = { REF_PROTO_INUSE(tcp) }; -void __init tcp_v4_init(struct net_proto_family *ops) +void __init tcp_v4_init(void) { if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW, IPPROTO_TCP) < 0) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index b61b76847ad..0fdd1db641a 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -536,7 +536,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, * Enforce "SYN-ACK" according to figure 8, figure 6 * of RFC793, fixed by RFC1122. */ - req->rsk_ops->rtx_syn_ack(sk, req, NULL); + req->rsk_ops->rtx_syn_ack(sk, req); return NULL; } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index f0aa9773874..9869f87243c 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -808,13 +808,13 @@ static int __init inet6_init(void) if (err) goto sysctl_fail; #endif - err = icmpv6_init(&inet6_family_ops); + err = icmpv6_init(); if (err) goto icmp_fail; - err = ndisc_init(&inet6_family_ops); + err = ndisc_init(); if (err) goto ndisc_fail; - err = igmp6_init(&inet6_family_ops); + err = igmp6_init(); if (err) goto igmp_fail; err = ipv6_netfilter_init(); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 121d517bf91..9f55a965c88 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -80,8 +80,10 @@ EXPORT_SYMBOL(icmpv6msg_statistics); * * On SMP we have one ICMP socket per-cpu. */ -static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL; -#define icmpv6_socket __get_cpu_var(__icmpv6_socket) +static inline struct sock *icmpv6_sk(struct net *net) +{ + return net->ipv6.icmp_sk[smp_processor_id()]; +} static int icmpv6_rcv(struct sk_buff *skb); @@ -90,11 +92,11 @@ static struct inet6_protocol icmpv6_protocol = { .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; -static __inline__ int icmpv6_xmit_lock(void) +static __inline__ int icmpv6_xmit_lock(struct sock *sk) { local_bh_disable(); - if (unlikely(!spin_trylock(&icmpv6_socket->sk->sk_lock.slock))) { + if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { /* This can happen if the output path (f.e. SIT or * ip6ip6 tunnel) signals dst_link_failure() for an * outgoing ICMP6 packet. @@ -105,9 +107,9 @@ static __inline__ int icmpv6_xmit_lock(void) return 0; } -static __inline__ void icmpv6_xmit_unlock(void) +static __inline__ void icmpv6_xmit_unlock(struct sock *sk) { - spin_unlock_bh(&icmpv6_socket->sk->sk_lock.slock); + spin_unlock_bh(&sk->sk_lock.slock); } /* @@ -389,12 +391,12 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, fl.fl_icmp_code = code; security_skb_classify_flow(skb, &fl); - if (icmpv6_xmit_lock()) - return; - - sk = icmpv6_socket->sk; + sk = icmpv6_sk(&init_net); np = inet6_sk(sk); + if (icmpv6_xmit_lock(sk)) + return; + if (!icmpv6_xrlim_allow(sk, type, &fl)) goto out; @@ -498,7 +500,7 @@ out_put: out_dst_release: dst_release(dst); out: - icmpv6_xmit_unlock(); + icmpv6_xmit_unlock(sk); } EXPORT_SYMBOL(icmpv6_send); @@ -535,12 +537,12 @@ static void icmpv6_echo_reply(struct sk_buff *skb) fl.fl_icmp_type = ICMPV6_ECHO_REPLY; security_skb_classify_flow(skb, &fl); - if (icmpv6_xmit_lock()) - return; - - sk = icmpv6_socket->sk; + sk = icmpv6_sk(&init_net); np = inet6_sk(sk); + if (icmpv6_xmit_lock(sk)) + return; + if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) fl.oif = np->mcast_oif; @@ -584,7 +586,7 @@ out_put: in6_dev_put(idev); dst_release(dst); out: - icmpv6_xmit_unlock(); + icmpv6_xmit_unlock(sk); } static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info) @@ -776,18 +778,24 @@ drop_no_count: } /* - * Special lock-class for __icmpv6_socket: + * Special lock-class for __icmpv6_sk: */ static struct lock_class_key icmpv6_socket_sk_dst_lock_key; -int __init icmpv6_init(struct net_proto_family *ops) +static int __net_init icmpv6_sk_init(struct net *net) { struct sock *sk; int err, i, j; + net->ipv6.icmp_sk = + kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL); + if (net->ipv6.icmp_sk == NULL) + return -ENOMEM; + for_each_possible_cpu(i) { + struct socket *sock; err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, - &per_cpu(__icmpv6_socket, i)); + &sock); if (err < 0) { printk(KERN_ERR "Failed to initialize the ICMP6 control socket " @@ -796,12 +804,14 @@ int __init icmpv6_init(struct net_proto_family *ops) goto fail; } - sk = per_cpu(__icmpv6_socket, i)->sk; + net->ipv6.icmp_sk[i] = sk = sock->sk; + sk_change_net(sk, net); + sk->sk_allocation = GFP_ATOMIC; /* * Split off their lock-class, because sk->sk_dst_lock * gets used from softirqs, which is safe for - * __icmpv6_socket (because those never get directly used + * __icmpv6_sk (because those never get directly used * via userspace syscalls), but unsafe for normal sockets. */ lockdep_set_class(&sk->sk_dst_lock, @@ -815,36 +825,56 @@ int __init icmpv6_init(struct net_proto_family *ops) sk->sk_prot->unhash(sk); } - - - if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) { - printk(KERN_ERR "Failed to register ICMP6 protocol\n"); - err = -EAGAIN; - goto fail; - } - return 0; fail: - for (j = 0; j < i; j++) { - if (!cpu_possible(j)) - continue; - sock_release(per_cpu(__icmpv6_socket, j)); - } - + for (j = 0; j < i; j++) + sk_release_kernel(net->ipv6.icmp_sk[j]); + kfree(net->ipv6.icmp_sk); return err; } -void icmpv6_cleanup(void) +static void __net_exit icmpv6_sk_exit(struct net *net) { int i; for_each_possible_cpu(i) { - sock_release(per_cpu(__icmpv6_socket, i)); + sk_release_kernel(net->ipv6.icmp_sk[i]); } + kfree(net->ipv6.icmp_sk); +} + +static struct pernet_operations __net_initdata icmpv6_sk_ops = { + .init = icmpv6_sk_init, + .exit = icmpv6_sk_exit, +}; + +int __init icmpv6_init(void) +{ + int err; + + err = register_pernet_subsys(&icmpv6_sk_ops); + if (err < 0) + return err; + + err = -EAGAIN; + if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) + goto fail; + return 0; + +fail: + printk(KERN_ERR "Failed to register ICMP6 protocol\n"); + unregister_pernet_subsys(&icmpv6_sk_ops); + return err; +} + +void __exit icmpv6_cleanup(void) +{ + unregister_pernet_subsys(&icmpv6_sk_ops); inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); } + static const struct icmp6_err { int err; int fatal; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 8b67ca07467..a59d259cf97 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -596,7 +596,6 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) return offset; } -EXPORT_SYMBOL_GPL(ip6_find_1stfragopt); static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index ab228d1ea11..8ce894d9006 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2597,7 +2597,7 @@ static const struct file_operations igmp6_mcf_seq_fops = { }; #endif -int __init igmp6_init(struct net_proto_family *ops) +int __init igmp6_init(void) { struct ipv6_pinfo *np; struct sock *sk; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 0d33a7d3212..1fc33c8c723 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1733,7 +1733,7 @@ static int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name, #endif -int __init ndisc_init(struct net_proto_family *ops) +int __init ndisc_init(void) { struct ipv6_pinfo *np; struct sock *sk; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 12750f2b05a..1cbbb87dbad 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -455,8 +455,7 @@ out: } -static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, - struct dst_entry *dst) +static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req) { struct inet6_request_sock *treq = inet6_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); @@ -464,6 +463,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, struct ipv6_txoptions *opt = NULL; struct in6_addr * final_p = NULL, final; struct flowi fl; + struct dst_entry *dst; int err = -1; memset(&fl, 0, sizeof(fl)); @@ -476,24 +476,22 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, fl.fl_ip_sport = inet_sk(sk)->sport; security_req_classify_flow(req, &fl); - if (dst == NULL) { - opt = np->opt; - if (opt && opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } - - err = ip6_dst_lookup(sk, &dst, &fl); - if (err) - goto done; - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) - goto done; + opt = np->opt; + if (opt && opt->srcrt) { + struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; + ipv6_addr_copy(&final, &fl.fl6_dst); + ipv6_addr_copy(&fl.fl6_dst, rt0->addr); + final_p = &final; } + err = ip6_dst_lookup(sk, &dst, &fl); + if (err) + goto done; + if (final_p) + ipv6_addr_copy(&fl.fl6_dst, final_p); + if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) + goto done; + skb = tcp_make_synack(sk, dst, req); if (skb) { struct tcphdr *th = tcp_hdr(skb); @@ -1294,7 +1292,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) security_inet_conn_request(sk, skb, req); - if (tcp_v6_send_synack(sk, req, NULL)) + if (tcp_v6_send_synack(sk, req)) goto drop; inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); diff --git a/net/key/af_key.c b/net/key/af_key.c index 8b5f486ac80..7cb6f121336 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1742,12 +1742,18 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr { u8 proto; struct pfkey_dump_data data = { .skb = skb, .hdr = hdr, .sk = sk }; + struct xfrm_state_walk walk; + int rc; proto = pfkey_satype2proto(hdr->sadb_msg_satype); if (proto == 0) return -EINVAL; - return xfrm_state_walk(proto, dump_sa, &data); + xfrm_state_walk_init(&walk, proto); + rc = xfrm_state_walk(&walk, dump_sa, &data); + xfrm_state_walk_done(&walk); + + return rc; } static int pfkey_promisc(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) @@ -1780,7 +1786,9 @@ static int check_reqid(struct xfrm_policy *xp, int dir, int count, void *ptr) static u32 gen_reqid(void) { + struct xfrm_policy_walk walk; u32 start; + int rc; static u32 reqid = IPSEC_MANUAL_REQID_MAX; start = reqid; @@ -1788,8 +1796,10 @@ static u32 gen_reqid(void) ++reqid; if (reqid == 0) reqid = IPSEC_MANUAL_REQID_MAX+1; - if (xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, check_reqid, - (void*)&reqid) != -EEXIST) + xfrm_policy_walk_init(&walk, XFRM_POLICY_TYPE_MAIN); + rc = xfrm_policy_walk(&walk, check_reqid, (void*)&reqid); + xfrm_policy_walk_done(&walk); + if (rc != -EEXIST) return reqid; } while (reqid != start); return 0; @@ -2665,8 +2675,14 @@ static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr) static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) { struct pfkey_dump_data data = { .skb = skb, .hdr = hdr, .sk = sk }; + struct xfrm_policy_walk walk; + int rc; + + xfrm_policy_walk_init(&walk, XFRM_POLICY_TYPE_MAIN); + rc = xfrm_policy_walk(&walk, dump_sp, &data); + xfrm_policy_walk_done(&walk); - return xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, dump_sp, &data); + return rc; } static int key_notify_policy_flush(struct km_event *c) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index e88e96af613..a9bf6e4fd0c 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -293,7 +293,7 @@ static const struct file_operations ct_cpu_seq_fops = { .open = ct_cpu_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release, }; #endif /* CONFIG_PROC_FS */ diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 1ab0da2632e..524e826bb97 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1344,22 +1344,6 @@ static void netlink_data_ready(struct sock *sk, int len) * queueing. */ -static void __netlink_release(struct sock *sk) -{ - /* - * Last sock_put should drop referrence to sk->sk_net. It has already - * been dropped in netlink_kernel_create. Taking referrence to stopping - * namespace is not an option. - * Take referrence to a socket to remove it from netlink lookup table - * _alive_ and after that destroy it in the context of init_net. - */ - - sock_hold(sk); - sock_release(sk->sk_socket); - sk->sk_net = get_net(&init_net); - sock_put(sk); -} - struct sock * netlink_kernel_create(struct net *net, int unit, unsigned int groups, void (*input)(struct sk_buff *skb), @@ -1388,8 +1372,7 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups, goto out_sock_release_nosk; sk = sock->sk; - put_net(sk->sk_net); - sk->sk_net = net; + sk_change_net(sk, net); if (groups < 32) groups = 32; @@ -1424,7 +1407,7 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups, out_sock_release: kfree(listeners); - __netlink_release(sk); + netlink_kernel_release(sk); return NULL; out_sock_release_nosk: @@ -1437,10 +1420,7 @@ EXPORT_SYMBOL(netlink_kernel_create); void netlink_kernel_release(struct sock *sk) { - if (sk == NULL || sk->sk_socket == NULL) - return; - - __netlink_release(sk); + sk_release_kernel(sk); } EXPORT_SYMBOL(netlink_kernel_release); diff --git a/net/rxrpc/ar-proc.c b/net/rxrpc/ar-proc.c index 83eda247fe4..017322e2786 100644 --- a/net/rxrpc/ar-proc.c +++ b/net/rxrpc/ar-proc.c @@ -103,7 +103,7 @@ const struct file_operations rxrpc_call_seq_fops = { .open = rxrpc_call_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release, }; /* @@ -188,5 +188,5 @@ const struct file_operations rxrpc_connection_seq_fops = { .open = rxrpc_connection_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release, }; diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 1bb3c5c35d2..fd4deefab3c 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -494,6 +494,8 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, */ if (transport == transport->asoc->peer.retran_path) sctp_assoc_update_retran_path(transport->asoc); + transport->asoc->rtx_data_chunks += + transport->asoc->unack_data; break; case SCTP_RTXR_FAST_RTX: SCTP_INC_STATS(SCTP_MIB_FAST_RETRANSMITS); @@ -504,6 +506,7 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, break; case SCTP_RTXR_T1_RTX: SCTP_INC_STATS(SCTP_MIB_T1_RETRANSMITS); + transport->asoc->init_retries++; break; default: BUG(); diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 9e214da82d9..82bea300308 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -281,8 +281,10 @@ static void * sctp_assocs_seq_start(struct seq_file *seq, loff_t *pos) *pos = 0; if (*pos == 0) - seq_printf(seq, " ASSOC SOCK STY SST ST HBKT ASSOC-ID TX_QUEUE RX_QUEUE UID INODE LPORT " - "RPORT LADDRS <-> RADDRS\n"); + seq_printf(seq, " ASSOC SOCK STY SST ST HBKT " + "ASSOC-ID TX_QUEUE RX_QUEUE UID INODE LPORT " + "RPORT LADDRS <-> RADDRS " + "HBINT INS OUTS MAXRT T1X T2X RTXC\n"); return (void *)pos; } @@ -321,15 +323,21 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) assoc = sctp_assoc(epb); sk = epb->sk; seq_printf(seq, - "%8p %8p %-3d %-3d %-2d %-4d %4d %8d %8d %7d %5lu %-5d %5d ", + "%8p %8p %-3d %-3d %-2d %-4d " + "%4d %8d %8d %7d %5lu %-5d %5d " + "%8d %5d %5d %4d %4d %4d %8d ", assoc, sk, sctp_sk(sk)->type, sk->sk_state, - assoc->state, hash, assoc->assoc_id, + assoc->state, hash, + assoc->assoc_id, assoc->sndbuf_used, atomic_read(&assoc->rmem_alloc), sock_i_uid(sk), sock_i_ino(sk), epb->bind_addr.port, - assoc->peer.port); - + assoc->peer.port, + assoc->hbinterval, assoc->c.sinit_max_instreams, + assoc->c.sinit_num_ostreams, assoc->max_retrans, + assoc->init_retries, assoc->shutdown_retries, + assoc->rtx_data_chunks); seq_printf(seq, " "); sctp_seq_dump_local_addrs(seq, epb); seq_printf(seq, "<-> "); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index f2ed6473fee..ade0cbd3a52 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -5312,6 +5312,8 @@ sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep, SCTP_DEBUG_PRINTK("Timer T2 expired.\n"); SCTP_INC_STATS(SCTP_MIB_T2_SHUTDOWN_EXPIREDS); + ((struct sctp_association *)asoc)->shutdown_retries++; + if (asoc->overall_error_count >= asoc->max_retrans) { sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ETIMEDOUT)); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 9fc4c315f6c..bae94a8031a 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -46,6 +46,7 @@ EXPORT_SYMBOL(xfrm_cfg_mutex); static DEFINE_RWLOCK(xfrm_policy_lock); +static struct list_head xfrm_policy_bytype[XFRM_POLICY_TYPE_MAX]; unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2]; EXPORT_SYMBOL(xfrm_policy_count); @@ -208,6 +209,7 @@ struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp) policy = kzalloc(sizeof(struct xfrm_policy), gfp); if (policy) { + INIT_LIST_HEAD(&policy->bytype); INIT_HLIST_NODE(&policy->bydst); INIT_HLIST_NODE(&policy->byidx); rwlock_init(&policy->lock); @@ -230,6 +232,10 @@ void xfrm_policy_destroy(struct xfrm_policy *policy) if (del_timer(&policy->timer)) BUG(); + write_lock_bh(&xfrm_policy_lock); + list_del(&policy->bytype); + write_unlock_bh(&xfrm_policy_lock); + security_xfrm_policy_free(policy); kfree(policy); } @@ -584,6 +590,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) xfrm_pol_hold(policy); + list_add_tail(&policy->bytype, &xfrm_policy_bytype[policy->type]); write_unlock_bh(&xfrm_policy_lock); if (delpol) @@ -822,57 +829,60 @@ out: } EXPORT_SYMBOL(xfrm_policy_flush); -int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*), +int xfrm_policy_walk(struct xfrm_policy_walk *walk, + int (*func)(struct xfrm_policy *, int, int, void*), void *data) { - struct xfrm_policy *pol, *last = NULL; - struct hlist_node *entry; - int dir, last_dir = 0, count, error; + struct xfrm_policy *old, *pol, *last = NULL; + int error = 0; + + if (walk->type >= XFRM_POLICY_TYPE_MAX && + walk->type != XFRM_POLICY_TYPE_ANY) + return -EINVAL; + if (walk->policy == NULL && walk->count != 0) + return 0; + + old = pol = walk->policy; + walk->policy = NULL; read_lock_bh(&xfrm_policy_lock); - count = 0; - for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { - struct hlist_head *table = xfrm_policy_bydst[dir].table; - int i; + for (; walk->cur_type < XFRM_POLICY_TYPE_MAX; walk->cur_type++) { + if (walk->type != walk->cur_type && + walk->type != XFRM_POLICY_TYPE_ANY) + continue; - hlist_for_each_entry(pol, entry, - &xfrm_policy_inexact[dir], bydst) { - if (pol->type != type) + if (pol == NULL) { + pol = list_first_entry(&xfrm_policy_bytype[walk->cur_type], + struct xfrm_policy, bytype); + } + list_for_each_entry_from(pol, &xfrm_policy_bytype[walk->cur_type], bytype) { + if (pol->dead) continue; if (last) { - error = func(last, last_dir % XFRM_POLICY_MAX, - count, data); - if (error) + error = func(last, xfrm_policy_id2dir(last->index), + walk->count, data); + if (error) { + xfrm_pol_hold(last); + walk->policy = last; goto out; - } - last = pol; - last_dir = dir; - count++; - } - for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { - hlist_for_each_entry(pol, entry, table + i, bydst) { - if (pol->type != type) - continue; - if (last) { - error = func(last, last_dir % XFRM_POLICY_MAX, - count, data); - if (error) - goto out; } - last = pol; - last_dir = dir; - count++; } + last = pol; + walk->count++; } + pol = NULL; } - if (count == 0) { + if (walk->count == 0) { error = -ENOENT; goto out; } - error = func(last, last_dir % XFRM_POLICY_MAX, 0, data); + if (last) + error = func(last, xfrm_policy_id2dir(last->index), 0, data); out: read_unlock_bh(&xfrm_policy_lock); + if (old != NULL) + xfrm_pol_put(old); return error; } EXPORT_SYMBOL(xfrm_policy_walk); @@ -2365,6 +2375,9 @@ static void __init xfrm_policy_init(void) panic("XFRM: failed to allocate bydst hash\n"); } + for (dir = 0; dir < XFRM_POLICY_TYPE_MAX; dir++) + INIT_LIST_HEAD(&xfrm_policy_bytype[dir]); + INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task); register_netdevice_notifier(&xfrm_dev_notifier); } diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 7ba65e82941..9880b792e6a 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -50,6 +50,7 @@ static DEFINE_SPINLOCK(xfrm_state_lock); * Main use is finding SA after policy selected tunnel or transport mode. * Also, it can be used by ah/esp icmp error handler to find offending SA. */ +static LIST_HEAD(xfrm_state_all); static struct hlist_head *xfrm_state_bydst __read_mostly; static struct hlist_head *xfrm_state_bysrc __read_mostly; static struct hlist_head *xfrm_state_byspi __read_mostly; @@ -510,6 +511,7 @@ struct xfrm_state *xfrm_state_alloc(void) if (x) { atomic_set(&x->refcnt, 1); atomic_set(&x->tunnel_users, 0); + INIT_LIST_HEAD(&x->all); INIT_HLIST_NODE(&x->bydst); INIT_HLIST_NODE(&x->bysrc); INIT_HLIST_NODE(&x->byspi); @@ -533,6 +535,10 @@ void __xfrm_state_destroy(struct xfrm_state *x) { BUG_TRAP(x->km.state == XFRM_STATE_DEAD); + spin_lock_bh(&xfrm_state_lock); + list_del(&x->all); + spin_unlock_bh(&xfrm_state_lock); + spin_lock_bh(&xfrm_state_gc_lock); hlist_add_head(&x->bydst, &xfrm_state_gc_list); spin_unlock_bh(&xfrm_state_gc_lock); @@ -909,6 +915,8 @@ static void __xfrm_state_insert(struct xfrm_state *x) x->genid = ++xfrm_state_genid; + list_add_tail(&x->all, &xfrm_state_all); + h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr, x->props.reqid, x->props.family); hlist_add_head(&x->bydst, xfrm_state_bydst+h); @@ -1518,36 +1526,47 @@ unlock: } EXPORT_SYMBOL(xfrm_alloc_spi); -int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), +int xfrm_state_walk(struct xfrm_state_walk *walk, + int (*func)(struct xfrm_state *, int, void*), void *data) { - int i; - struct xfrm_state *x, *last = NULL; - struct hlist_node *entry; - int count = 0; + struct xfrm_state *old, *x, *last = NULL; int err = 0; + if (walk->state == NULL && walk->count != 0) + return 0; + + old = x = walk->state; + walk->state = NULL; spin_lock_bh(&xfrm_state_lock); - for (i = 0; i <= xfrm_state_hmask; i++) { - hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { - if (!xfrm_id_proto_match(x->id.proto, proto)) - continue; - if (last) { - err = func(last, count, data); - if (err) - goto out; + if (x == NULL) + x = list_first_entry(&xfrm_state_all, struct xfrm_state, all); + list_for_each_entry_from(x, &xfrm_state_all, all) { + if (x->km.state == XFRM_STATE_DEAD) + continue; + if (!xfrm_id_proto_match(x->id.proto, walk->proto)) + continue; + if (last) { + err = func(last, walk->count, data); + if (err) { + xfrm_state_hold(last); + walk->state = last; + goto out; } - last = x; - count++; } + last = x; + walk->count++; } - if (count == 0) { + if (walk->count == 0) { err = -ENOENT; goto out; } - err = func(last, 0, data); + if (last) + err = func(last, 0, data); out: spin_unlock_bh(&xfrm_state_lock); + if (old != NULL) + xfrm_state_put(old); return err; } EXPORT_SYMBOL(xfrm_state_walk); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index f971ca5645f..f5fd5b3147c 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -532,8 +532,6 @@ struct xfrm_dump_info { struct sk_buff *out_skb; u32 nlmsg_seq; u16 nlmsg_flags; - int start_idx; - int this_idx; }; static int copy_sec_ctx(struct xfrm_sec_ctx *s, struct sk_buff *skb) @@ -600,9 +598,6 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr) struct nlmsghdr *nlh; int err; - if (sp->this_idx < sp->start_idx) - goto out; - nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, sp->nlmsg_seq, XFRM_MSG_NEWSA, sizeof(*p), sp->nlmsg_flags); if (nlh == NULL) @@ -615,8 +610,6 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr) goto nla_put_failure; nlmsg_end(skb, nlh); -out: - sp->this_idx++; return 0; nla_put_failure: @@ -624,18 +617,32 @@ nla_put_failure: return err; } +static int xfrm_dump_sa_done(struct netlink_callback *cb) +{ + struct xfrm_state_walk *walk = (struct xfrm_state_walk *) &cb->args[1]; + xfrm_state_walk_done(walk); + return 0; +} + static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) { + struct xfrm_state_walk *walk = (struct xfrm_state_walk *) &cb->args[1]; struct xfrm_dump_info info; + BUILD_BUG_ON(sizeof(struct xfrm_state_walk) > + sizeof(cb->args) - sizeof(cb->args[0])); + info.in_skb = cb->skb; info.out_skb = skb; info.nlmsg_seq = cb->nlh->nlmsg_seq; info.nlmsg_flags = NLM_F_MULTI; - info.this_idx = 0; - info.start_idx = cb->args[0]; - (void) xfrm_state_walk(0, dump_one_state, &info); - cb->args[0] = info.this_idx; + + if (!cb->args[0]) { + cb->args[0] = 1; + xfrm_state_walk_init(walk, 0); + } + + (void) xfrm_state_walk(walk, dump_one_state, &info); return skb->len; } @@ -654,7 +661,6 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, info.out_skb = skb; info.nlmsg_seq = seq; info.nlmsg_flags = 0; - info.this_idx = info.start_idx = 0; if (dump_one_state(x, 0, &info)) { kfree_skb(skb); @@ -1232,9 +1238,6 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr struct sk_buff *skb = sp->out_skb; struct nlmsghdr *nlh; - if (sp->this_idx < sp->start_idx) - goto out; - nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, sp->nlmsg_seq, XFRM_MSG_NEWPOLICY, sizeof(*p), sp->nlmsg_flags); if (nlh == NULL) @@ -1250,8 +1253,6 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr goto nlmsg_failure; nlmsg_end(skb, nlh); -out: - sp->this_idx++; return 0; nlmsg_failure: @@ -1259,21 +1260,33 @@ nlmsg_failure: return -EMSGSIZE; } +static int xfrm_dump_policy_done(struct netlink_callback *cb) +{ + struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) &cb->args[1]; + + xfrm_policy_walk_done(walk); + return 0; +} + static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb) { + struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) &cb->args[1]; struct xfrm_dump_info info; + BUILD_BUG_ON(sizeof(struct xfrm_policy_walk) > + sizeof(cb->args) - sizeof(cb->args[0])); + info.in_skb = cb->skb; info.out_skb = skb; info.nlmsg_seq = cb->nlh->nlmsg_seq; info.nlmsg_flags = NLM_F_MULTI; - info.this_idx = 0; - info.start_idx = cb->args[0]; - (void) xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, dump_one_policy, &info); -#ifdef CONFIG_XFRM_SUB_POLICY - (void) xfrm_policy_walk(XFRM_POLICY_TYPE_SUB, dump_one_policy, &info); -#endif - cb->args[0] = info.this_idx; + + if (!cb->args[0]) { + cb->args[0] = 1; + xfrm_policy_walk_init(walk, XFRM_POLICY_TYPE_ANY); + } + + (void) xfrm_policy_walk(walk, dump_one_policy, &info); return skb->len; } @@ -1293,7 +1306,6 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, info.out_skb = skb; info.nlmsg_seq = seq; info.nlmsg_flags = 0; - info.this_idx = info.start_idx = 0; if (dump_one_policy(xp, dir, 0, &info) < 0) { kfree_skb(skb); @@ -1891,15 +1903,18 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { static struct xfrm_link { int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **); int (*dump)(struct sk_buff *, struct netlink_callback *); + int (*done)(struct netlink_callback *); } xfrm_dispatch[XFRM_NR_MSGTYPES] = { [XFRM_MSG_NEWSA - XFRM_MSG_BASE] = { .doit = xfrm_add_sa }, [XFRM_MSG_DELSA - XFRM_MSG_BASE] = { .doit = xfrm_del_sa }, [XFRM_MSG_GETSA - XFRM_MSG_BASE] = { .doit = xfrm_get_sa, - .dump = xfrm_dump_sa }, + .dump = xfrm_dump_sa, + .done = xfrm_dump_sa_done }, [XFRM_MSG_NEWPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_add_policy }, [XFRM_MSG_DELPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_get_policy }, [XFRM_MSG_GETPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_get_policy, - .dump = xfrm_dump_policy }, + .dump = xfrm_dump_policy, + .done = xfrm_dump_policy_done }, [XFRM_MSG_ALLOCSPI - XFRM_MSG_BASE] = { .doit = xfrm_alloc_userspi }, [XFRM_MSG_ACQUIRE - XFRM_MSG_BASE] = { .doit = xfrm_add_acquire }, [XFRM_MSG_EXPIRE - XFRM_MSG_BASE] = { .doit = xfrm_add_sa_expire }, @@ -1938,7 +1953,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (link->dump == NULL) return -EINVAL; - return netlink_dump_start(xfrm_nl, skb, nlh, link->dump, NULL); + return netlink_dump_start(xfrm_nl, skb, nlh, link->dump, link->done); } err = nlmsg_parse(nlh, xfrm_msg_min[type], attrs, XFRMA_MAX, |