From b595076a180a56d1bb170e6eceda6eb9d76f4cd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 1 Nov 2010 15:38:34 -0400 Subject: tree-wide: fix comment/printk typos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "gadget", "through", "command", "maintain", "maintain", "controller", "address", "between", "initiali[zs]e", "instead", "function", "select", "already", "equal", "access", "management", "hierarchy", "registration", "interest", "relative", "memory", "offset", "already", Signed-off-by: Uwe Kleine-König Signed-off-by: Jiri Kosina --- net/core/dev.c | 2 +- net/decnet/dn_dev.c | 2 +- net/ipv4/tcp_output.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 35dfb831848..89204e8c0e1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6096,7 +6096,7 @@ static void __net_exit default_device_exit(struct net *net) static void __net_exit default_device_exit_batch(struct list_head *net_list) { /* At exit all network devices most be removed from a network - * namespace. Do this in the reverse order of registeration. + * namespace. Do this in the reverse order of registration. * Do this across as many network namespaces as possible to * improve batching efficiency. */ diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 4c409b46aa3..9b73e0b03e3 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -1112,7 +1112,7 @@ static struct dn_dev *dn_dev_create(struct net_device *dev, int *err) /* * This processes a device up event. We only start up * the loopback device & ethernet devices with correct - * MAC addreses automatically. Others must be started + * MAC addresses automatically. Others must be started * specifically. * * FIXME: How should we configure the loopback address ? If we could dispense diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 05b1ecf3676..e9615220716 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1336,7 +1336,7 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, return 0; } -/* Intialize TSO state of a skb. +/* Initialize TSO state of a skb. * This must be invoked the first time we consider transmitting * SKB onto the wire. */ -- cgit v1.2.3 From c996d8b9a8f37bd1b4dd7823abc42780b20998f8 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Mon, 15 Nov 2010 19:55:34 +0000 Subject: Docs/Kconfig: Update: osdl.org -> linuxfoundation.org Some of the documentation refers to web pages under the domain `osdl.org'. However, `osdl.org' now redirects to `linuxfoundation.org'. Rather than rely on redirections, this patch updates the addresses appropriately; for the most part, only documentation that is meant to be current has been updated. The patch should be pretty quick to scan and check; each new web-page url was gotten by trying out the original URL in a browser and then simply copying the the redirected URL (formatting as necessary). There is some conflict as to which one of these domain names is preferred: linuxfoundation.org linux-foundation.org So, I wrote: info@linuxfoundation.org and got this reply: Message-ID: <4CE17EE6.9040807@linuxfoundation.org> Date: Mon, 15 Nov 2010 10:41:42 -0800 From: David Ames ... linuxfoundation.org is preferred. The canonical name for our web site is www.linuxfoundation.org. Our list site is actually lists.linux-foundation.org. Regarding email linuxfoundation.org is preferred there are a few people who choose to use linux-foundation.org for their own reasons. Consequently, I used `linuxfoundation.org' for web pages and `lists.linux-foundation.org' for mailing-list web pages and email addresses; the only personal email address I updated from `@osdl.org' was that of Andrew Morton, who prefers `linux-foundation.org' according `git log'. Signed-off-by: Michael Witten Signed-off-by: Jiri Kosina --- net/Kconfig | 4 +++- net/dccp/Kconfig | 4 +++- net/ipv4/Kconfig | 4 +++- net/sched/Kconfig | 2 +- 4 files changed, 10 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index 55fd82e9ffd..5e203b55870 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -247,7 +247,9 @@ config NET_TCPPROBE what was just said, you don't need it: say N. Documentation on how to use TCP connection probing can be found - at http://linux-net.osdl.org/index.php/TcpProbe + at: + + http://www.linuxfoundation.org/collaborate/workgroups/networking/tcpprobe To compile this code as a module, choose M here: the module will be called tcp_probe. diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig index ad6dffd9070..b75968a0401 100644 --- a/net/dccp/Kconfig +++ b/net/dccp/Kconfig @@ -49,7 +49,9 @@ config NET_DCCPPROBE what was just said, you don't need it: say N. Documentation on how to use DCCP connection probing can be found - at http://linux-net.osdl.org/index.php/DccpProbe + at: + + http://www.linuxfoundation.org/collaborate/workgroups/networking/dccpprobe To compile this code as a module, choose M here: the module will be called dccp_probe. diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 9e95d7fb6d5..a5a1050595d 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -432,7 +432,9 @@ config INET_DIAG ---help--- Support for INET (TCP, DCCP, etc) socket monitoring interface used by native Linux tools such as ss. ss is included in iproute2, currently - downloadable at . + downloadable at: + + http://www.linuxfoundation.org/collaborate/workgroups/networking/iproute2 If unsure, say Y. diff --git a/net/sched/Kconfig b/net/sched/Kconfig index a36270a994d..f04d4a484d5 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -24,7 +24,7 @@ menuconfig NET_SCHED To administer these schedulers, you'll need the user-level utilities from the package iproute2+tc at . That package also contains some documentation; for more, check out - . + . This Quality of Service (QoS) support will enable you to use Differentiated Services (diffserv) and Resource Reservation Protocol -- cgit v1.2.3 From 62bac4af3d778f6d06d351c0442008967c512588 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 25 Oct 2010 12:50:15 -0400 Subject: svcrpc: don't set then immediately clear XPT_DEFERRED There's no harm to doing this, since the only caller will immediately call svc_enqueue() afterwards, ensuring we don't miss the remaining deferred requests just because XPT_DEFERRED was briefly cleared. But why not just do this the simple way? Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index c82fe739fbd..a74cb67f15b 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1059,14 +1059,13 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt) if (!test_bit(XPT_DEFERRED, &xprt->xpt_flags)) return NULL; spin_lock(&xprt->xpt_lock); - clear_bit(XPT_DEFERRED, &xprt->xpt_flags); if (!list_empty(&xprt->xpt_deferred)) { dr = list_entry(xprt->xpt_deferred.next, struct svc_deferred_req, handle.recent); list_del_init(&dr->handle.recent); - set_bit(XPT_DEFERRED, &xprt->xpt_flags); - } + } else + clear_bit(XPT_DEFERRED, &xprt->xpt_flags); spin_unlock(&xprt->xpt_lock); return dr; } -- cgit v1.2.3 From ca7896cd83456082b1e78816cdf7e41658ef7bcd Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 25 Oct 2010 14:12:40 -0400 Subject: nfsd4: centralize more calls to svc_xprt_received Follow up on b48fa6b99100dc7772af3cd276035fcec9719ceb by moving all the svc_xprt_received() calls for the main xprt to one place. The clearing of XPT_BUSY here is critical to the correctness of the server, so I'd prefer it to be obvious where we do it. The only substantive result is moving svc_xprt_received() after svc_receive_deferred(). Other than a (likely insignificant) delay waking up the next thread, that should be harmless. Also reshuffle the exit code a little to skip a few other steps that we don't care about the in the svc_delete_xprt() case. Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index a74cb67f15b..dd61cd02461 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -716,7 +716,10 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) { dprintk("svc_recv: found XPT_CLOSE\n"); svc_delete_xprt(xprt); - } else if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) { + /* Leave XPT_BUSY set on the dead xprt: */ + goto out; + } + if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) { struct svc_xprt *newxpt; newxpt = xprt->xpt_ops->xpo_accept(xprt); if (newxpt) { @@ -741,28 +744,23 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) spin_unlock_bh(&serv->sv_lock); svc_xprt_received(newxpt); } - svc_xprt_received(xprt); } else { dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", rqstp, pool->sp_id, xprt, atomic_read(&xprt->xpt_ref.refcount)); rqstp->rq_deferred = svc_deferred_dequeue(xprt); - if (rqstp->rq_deferred) { - svc_xprt_received(xprt); + if (rqstp->rq_deferred) len = svc_deferred_recv(rqstp); - } else { + else len = xprt->xpt_ops->xpo_recvfrom(rqstp); - svc_xprt_received(xprt); - } dprintk("svc: got len=%d\n", len); } + svc_xprt_received(xprt); /* No data, incomplete (TCP) read, or accept() */ - if (len == 0 || len == -EAGAIN) { - rqstp->rq_res.len = 0; - svc_xprt_release(rqstp); - return -EAGAIN; - } + if (len == 0 || len == -EAGAIN) + goto out; + clear_bit(XPT_OLD, &xprt->xpt_flags); rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp)); @@ -771,6 +769,10 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) if (serv->sv_stats) serv->sv_stats->netcnt++; return len; +out: + rqstp->rq_res.len = 0; + svc_xprt_release(rqstp); + return -EAGAIN; } EXPORT_SYMBOL_GPL(svc_recv); -- cgit v1.2.3 From f8c0d226fef05226ff1a85055c8ed663022f40c1 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 25 Oct 2010 18:11:21 -0400 Subject: svcrpc: simplify svc_close_all There's no need to be fooling with XPT_BUSY now that all the threads are gone. The list_del_init() here could execute at the same time as the svc_xprt_enqueue()'s list_add_tail(), with undefined results. We don't really care at this point, but it might result in a spurious list-corruption warning or something. And svc_close() isn't adding any value; just call svc_delete_xprt() directly. Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index dd61cd02461..8c018df8069 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -941,16 +941,16 @@ void svc_close_all(struct list_head *xprt_list) struct svc_xprt *xprt; struct svc_xprt *tmp; + /* + * The server is shutting down, and no more threads are running. + * svc_xprt_enqueue() might still be running, but at worst it + * will re-add the xprt to sp_sockets, which will soon get + * freed. So we don't bother with any more locking, and don't + * leave the close to the (nonexistent) server threads: + */ list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) { set_bit(XPT_CLOSE, &xprt->xpt_flags); - if (test_bit(XPT_BUSY, &xprt->xpt_flags)) { - /* Waiting to be processed, but no threads left, - * So just remove it from the waiting list - */ - list_del_init(&xprt->xpt_ready); - clear_bit(XPT_BUSY, &xprt->xpt_flags); - } - svc_close_xprt(xprt); + svc_delete_xprt(xprt); } } -- cgit v1.2.3 From b176331627fccc726d28f4fc4a357d1f3c19dbf0 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 25 Oct 2010 20:24:48 -0400 Subject: svcrpc: svc_close_xprt comment Neil Brown had to explain to me why we do this here; record the answer for posterity. Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 8c018df8069..bfbda676574 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -931,7 +931,12 @@ void svc_close_xprt(struct svc_xprt *xprt) if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) /* someone else will have to effect the close */ return; - + /* + * We expect svc_close_xprt() to work even when no threads are + * running (e.g., while configuring the server before starting + * any threads), so if the transport isn't busy, we delete + * it ourself: + */ svc_delete_xprt(xprt); } EXPORT_SYMBOL_GPL(svc_close_xprt); -- cgit v1.2.3 From 9c335c0b8daf56b9f73479d00b1dd726e1fcca09 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 26 Oct 2010 11:32:03 -0400 Subject: svcrpc: fix wspace-checking race We call svc_xprt_enqueue() after something happens which we think may require handling from a server thread. To avoid such events being lost, svc_xprt_enqueue() must guarantee that there will be a svc_serv() call from a server thread following any such event. It does that by either waking up a server thread itself, or checking that XPT_BUSY is set (in which case somebody else is doing it). But the check of XPT_BUSY could occur just as someone finishes processing some other event, and just before they clear XPT_BUSY. Therefore it's important not to clear XPT_BUSY without subsequently doing another svc_export_enqueue() to check whether the xprt should be requeued. The xpo_wspace() check in svc_xprt_enqueue() breaks this rule, allowing an event to be missed in situations like: data arrives call svc_tcp_data_ready(): call svc_xprt_enqueue(): set BUSY find no write space svc_reserve(): free up write space call svc_enqueue(): test BUSY clear BUSY So, instead, check wspace in the same places that the state flags are checked: before taking BUSY, and in svc_receive(). Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index bfbda676574..b6f47da170b 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -302,6 +302,15 @@ static void svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp) list_del(&rqstp->rq_list); } +static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt) +{ + if (xprt->xpt_flags & ((1<xpt_flags & ((1<xpt_ops->xpo_has_wspace(xprt); + return false; +} + /* * Queue up a transport with data pending. If there are idle nfsd * processes, wake 'em up. @@ -314,8 +323,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) struct svc_rqst *rqstp; int cpu; - if (!(xprt->xpt_flags & - ((1<xpt_pool != NULL); xprt->xpt_pool = pool; - /* Handle pending connection */ - if (test_bit(XPT_CONN, &xprt->xpt_flags)) - goto process; - - /* Handle close in-progress */ - if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) - goto process; - - /* Check if we have space to reply to a request */ - if (!xprt->xpt_ops->xpo_has_wspace(xprt)) { - /* Don't enqueue while not enough space for reply */ - dprintk("svc: no write space, transport %p not enqueued\n", - xprt); - xprt->xpt_pool = NULL; - clear_bit(XPT_BUSY, &xprt->xpt_flags); - goto out_unlock; - } - - process: if (!list_empty(&pool->sp_threads)) { rqstp = list_entry(pool->sp_threads.next, struct svc_rqst, @@ -744,7 +733,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) spin_unlock_bh(&serv->sv_lock); svc_xprt_received(newxpt); } - } else { + } else if (xprt->xpt_ops->xpo_has_wspace(xprt)) { dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", rqstp, pool->sp_id, xprt, atomic_read(&xprt->xpt_ref.refcount)); -- cgit v1.2.3 From 6f107b5861ecb09abfa7e2f9927e3884d1d81f91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 8 Dec 2010 14:35:34 +0800 Subject: net: Add missing lockdep class names for af_alg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Miloslav Trmač Signed-off-by: Herbert Xu --- net/core/sock.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 3eed5424e65..634d5bc409b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -157,7 +157,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = { "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" , "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , - "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , + "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" , "sk_lock-AF_MAX" }; static const char *const af_family_slock_key_strings[AF_MAX+1] = { @@ -173,7 +173,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = { "slock-27" , "slock-28" , "slock-AF_CAN" , "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , - "slock-AF_IEEE802154", "slock-AF_CAIF" , + "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" , "slock-AF_MAX" }; static const char *const af_family_clock_key_strings[AF_MAX+1] = { @@ -189,7 +189,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = { "clock-27" , "clock-28" , "clock-AF_CAN" , "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , - "clock-AF_IEEE802154", "clock-AF_CAIF" , + "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" , "clock-AF_MAX" }; -- cgit v1.2.3 From a34f0b31398020e2d3be653eb695bd17a9cf3b55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 10 Dec 2010 14:55:42 +0100 Subject: fix comment typos concerning "consistent" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Uwe Kleine-König Signed-off-by: Jiri Kosina --- net/ipv6/af_inet6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 54e8e42f7a8..0935c5a42b1 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -300,7 +300,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; } - /* Reproduce AF_INET checks to make the bindings consitant */ + /* Reproduce AF_INET checks to make the bindings consistent */ v4addr = addr->sin6_addr.s6_addr32[3]; chk_addr_ret = inet_addr_type(net, v4addr); if (!sysctl_ip_nonlocal_bind && -- cgit v1.2.3 From 4129ccf303593a1922a934697f99e682ff491504 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 14 Dec 2010 14:58:59 +0000 Subject: SUNRPC: Avoid return code checking in rpcbind XDR encoder functions Clean up. The trend in the other XDR encoder functions is to BUG() when encoding problems occur, since a problem here is always due to a local coding error. Then, instead of a status, zero is unconditionally returned. Update the rpcbind XDR encoders to behave this way. To finish the update, use the new-style be32_to_cpup() and cpu_to_be32() macros, and compute the buffer sizes using raw integers instead of sizeof(). This matches the conventions used in other XDR functions. Signed-off-by: Chuck Lever Tested-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- net/sunrpc/rpcb_clnt.c | 60 ++++++++++++++++++-------------------------------- 1 file changed, 21 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index fa6d7ca2c85..d2a2ea090f9 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -705,14 +705,11 @@ static int rpcb_enc_mapping(struct rpc_rqst *req, __be32 *p, xdr_init_encode(&xdr, &req->rq_snd_buf, p); - p = xdr_reserve_space(&xdr, sizeof(__be32) * RPCB_mappingargs_sz); - if (unlikely(p == NULL)) - return -EIO; - - *p++ = htonl(rpcb->r_prog); - *p++ = htonl(rpcb->r_vers); - *p++ = htonl(rpcb->r_prot); - *p = htonl(rpcb->r_port); + p = xdr_reserve_space(&xdr, RPCB_mappingargs_sz << 2); + *p++ = cpu_to_be32(rpcb->r_prog); + *p++ = cpu_to_be32(rpcb->r_vers); + *p++ = cpu_to_be32(rpcb->r_prot); + *p = cpu_to_be32(rpcb->r_port); return 0; } @@ -728,11 +725,11 @@ static int rpcb_dec_getport(struct rpc_rqst *req, __be32 *p, rpcb->r_port = 0; - p = xdr_inline_decode(&xdr, sizeof(__be32)); + p = xdr_inline_decode(&xdr, 4); if (unlikely(p == NULL)) return -EIO; - port = ntohl(*p); + port = be32_to_cpup(p); dprintk("RPC: %5u PMAP_%s result: %lu\n", task->tk_pid, task->tk_msg.rpc_proc->p_name, port); if (unlikely(port > USHRT_MAX)) @@ -750,7 +747,7 @@ static int rpcb_dec_set(struct rpc_rqst *req, __be32 *p, xdr_init_decode(&xdr, &req->rq_rcv_buf, p); - p = xdr_inline_decode(&xdr, sizeof(__be32)); + p = xdr_inline_decode(&xdr, 4); if (unlikely(p == NULL)) return -EIO; @@ -764,24 +761,16 @@ static int rpcb_dec_set(struct rpc_rqst *req, __be32 *p, return 0; } -static int encode_rpcb_string(struct xdr_stream *xdr, const char *string, - const u32 maxstrlen) +static void encode_rpcb_string(struct xdr_stream *xdr, const char *string, + const u32 maxstrlen) { - u32 len; __be32 *p; + u32 len; - if (unlikely(string == NULL)) - return -EIO; len = strlen(string); - if (unlikely(len > maxstrlen)) - return -EIO; - - p = xdr_reserve_space(xdr, sizeof(__be32) + len); - if (unlikely(p == NULL)) - return -EIO; + BUG_ON(len > maxstrlen); + p = xdr_reserve_space(xdr, 4 + len); xdr_encode_opaque(p, string, len); - - return 0; } static int rpcb_enc_getaddr(struct rpc_rqst *req, __be32 *p, @@ -797,20 +786,13 @@ static int rpcb_enc_getaddr(struct rpc_rqst *req, __be32 *p, xdr_init_encode(&xdr, &req->rq_snd_buf, p); - p = xdr_reserve_space(&xdr, - sizeof(__be32) * (RPCB_program_sz + RPCB_version_sz)); - if (unlikely(p == NULL)) - return -EIO; - *p++ = htonl(rpcb->r_prog); - *p = htonl(rpcb->r_vers); - - if (encode_rpcb_string(&xdr, rpcb->r_netid, RPCBIND_MAXNETIDLEN)) - return -EIO; - if (encode_rpcb_string(&xdr, rpcb->r_addr, RPCBIND_MAXUADDRLEN)) - return -EIO; - if (encode_rpcb_string(&xdr, rpcb->r_owner, RPCB_MAXOWNERLEN)) - return -EIO; + p = xdr_reserve_space(&xdr, (RPCB_program_sz + RPCB_version_sz) << 2); + *p++ = cpu_to_be32(rpcb->r_prog); + *p = cpu_to_be32(rpcb->r_vers); + encode_rpcb_string(&xdr, rpcb->r_netid, RPCBIND_MAXNETIDLEN); + encode_rpcb_string(&xdr, rpcb->r_addr, RPCBIND_MAXUADDRLEN); + encode_rpcb_string(&xdr, rpcb->r_owner, RPCB_MAXOWNERLEN); return 0; } @@ -827,10 +809,10 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, __be32 *p, xdr_init_decode(&xdr, &req->rq_rcv_buf, p); - p = xdr_inline_decode(&xdr, sizeof(__be32)); + p = xdr_inline_decode(&xdr, 4); if (unlikely(p == NULL)) goto out_fail; - len = ntohl(*p); + len = be32_to_cpup(p); /* * If the returned universal address is a null string, -- cgit v1.2.3 From 1ac7c23e4af5e83525137661595000099f1ce94f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 14 Dec 2010 14:59:09 +0000 Subject: SUNRPC: Determine value of "nrprocs" automatically Clean up. Just fixed a panic where the nrprocs field in a different upper layer client was set by hand incorrectly. Use the compiler-generated method used by the other upper layer protocols. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/rpcb_clnt.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index d2a2ea090f9..43838c72b77 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -57,10 +57,6 @@ enum { RPCBPROC_GETSTAT, }; -#define RPCB_HIGHPROC_2 RPCBPROC_CALLIT -#define RPCB_HIGHPROC_3 RPCBPROC_TADDR2UADDR -#define RPCB_HIGHPROC_4 RPCBPROC_GETSTAT - /* * r_owner * @@ -975,19 +971,19 @@ static struct rpcb_info rpcb_next_version6[] = { static struct rpc_version rpcb_version2 = { .number = RPCBVERS_2, - .nrprocs = RPCB_HIGHPROC_2, + .nrprocs = ARRAY_SIZE(rpcb_procedures2), .procs = rpcb_procedures2 }; static struct rpc_version rpcb_version3 = { .number = RPCBVERS_3, - .nrprocs = RPCB_HIGHPROC_3, + .nrprocs = ARRAY_SIZE(rpcb_procedures3), .procs = rpcb_procedures3 }; static struct rpc_version rpcb_version4 = { .number = RPCBVERS_4, - .nrprocs = RPCB_HIGHPROC_4, + .nrprocs = ARRAY_SIZE(rpcb_procedures4), .procs = rpcb_procedures4 }; -- cgit v1.2.3 From 9f06c719f474be7003763284a990bed6377bb0d4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 14 Dec 2010 14:59:18 +0000 Subject: SUNRPC: New xdr_streams XDR encoder API Now that all client-side XDR encoder routines use xdr_streams, there should be no need to support the legacy calling sequence [rpc_rqst *, __be32 *, RPC arg *] anywhere. We can construct an xdr_stream in the generic RPC code, instead of in each encoder function. Also, all the client-side encoder functions return 0 now, making a return value superfluous. Take this opportunity to convert them to return void instead. This is a refactoring change. It should not cause different behavior. Signed-off-by: Chuck Lever Tested-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- net/sunrpc/auth.c | 14 +++++++++++-- net/sunrpc/auth_gss/auth_gss.c | 31 ++++++++++++++++++---------- net/sunrpc/clnt.c | 5 ++--- net/sunrpc/rpcb_clnt.c | 47 ++++++++++++++++++------------------------ 4 files changed, 54 insertions(+), 43 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index afe67849269..651c9da703c 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -563,8 +563,17 @@ rpcauth_checkverf(struct rpc_task *task, __be32 *p) return cred->cr_ops->crvalidate(task, p); } +static void rpcauth_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp, + __be32 *data, void *obj) +{ + struct xdr_stream xdr; + + xdr_init_encode(&xdr, &rqstp->rq_snd_buf, data); + encode(rqstp, &xdr, obj); +} + int -rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, +rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp, __be32 *data, void *obj) { struct rpc_cred *cred = task->tk_rqstp->rq_cred; @@ -574,7 +583,8 @@ rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, if (cred->cr_ops->crwrap_req) return cred->cr_ops->crwrap_req(task, encode, rqstp, data, obj); /* By default, we encode the arguments normally. */ - return encode(rqstp, data, obj); + rpcauth_wrap_req_encode(encode, rqstp, data, obj); + return 0; } int diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 3835ce35e22..42b46f9a670 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1231,9 +1231,19 @@ out_bad: return NULL; } +static void gss_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp, + __be32 *p, void *obj) +{ + struct xdr_stream xdr; + + xdr_init_encode(&xdr, &rqstp->rq_snd_buf, p); + encode(rqstp, &xdr, obj); +} + static inline int gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, - kxdrproc_t encode, struct rpc_rqst *rqstp, __be32 *p, void *obj) + kxdreproc_t encode, struct rpc_rqst *rqstp, + __be32 *p, void *obj) { struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; struct xdr_buf integ_buf; @@ -1249,9 +1259,7 @@ gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; *p++ = htonl(rqstp->rq_seqno); - status = encode(rqstp, p, obj); - if (status) - return status; + gss_wrap_req_encode(encode, rqstp, p, obj); if (xdr_buf_subsegment(snd_buf, &integ_buf, offset, snd_buf->len - offset)) @@ -1325,7 +1333,8 @@ out: static inline int gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, - kxdrproc_t encode, struct rpc_rqst *rqstp, __be32 *p, void *obj) + kxdreproc_t encode, struct rpc_rqst *rqstp, + __be32 *p, void *obj) { struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; u32 offset; @@ -1342,9 +1351,7 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; *p++ = htonl(rqstp->rq_seqno); - status = encode(rqstp, p, obj); - if (status) - return status; + gss_wrap_req_encode(encode, rqstp, p, obj); status = alloc_enc_pages(rqstp); if (status) @@ -1394,7 +1401,7 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, static int gss_wrap_req(struct rpc_task *task, - kxdrproc_t encode, void *rqstp, __be32 *p, void *obj) + kxdreproc_t encode, void *rqstp, __be32 *p, void *obj) { struct rpc_cred *cred = task->tk_rqstp->rq_cred; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, @@ -1407,12 +1414,14 @@ gss_wrap_req(struct rpc_task *task, /* The spec seems a little ambiguous here, but I think that not * wrapping context destruction requests makes the most sense. */ - status = encode(rqstp, p, obj); + gss_wrap_req_encode(encode, rqstp, p, obj); + status = 0; goto out; } switch (gss_cred->gc_service) { case RPC_GSS_SVC_NONE: - status = encode(rqstp, p, obj); + gss_wrap_req_encode(encode, rqstp, p, obj); + status = 0; break; case RPC_GSS_SVC_INTEGRITY: status = gss_wrap_req_integ(cred, ctx, encode, diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 92ce94f5146..d446a32be66 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1095,7 +1095,7 @@ static void rpc_xdr_encode(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; - kxdrproc_t encode; + kxdreproc_t encode; __be32 *p; dprint_status(task); @@ -1776,9 +1776,8 @@ out_overflow: goto out_garbage; } -static int rpcproc_encode_null(void *rqstp, __be32 *data, void *obj) +static void rpcproc_encode_null(void *rqstp, struct xdr_stream *xdr, void *obj) { - return 0; } static int rpcproc_decode_null(void *rqstp, __be32 *data, void *obj) diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 43838c72b77..63912a1a298 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -689,25 +689,21 @@ static void rpcb_getport_done(struct rpc_task *child, void *data) * XDR functions for rpcbind */ -static int rpcb_enc_mapping(struct rpc_rqst *req, __be32 *p, - const struct rpcbind_args *rpcb) +static void rpcb_enc_mapping(struct rpc_rqst *req, struct xdr_stream *xdr, + const struct rpcbind_args *rpcb) { struct rpc_task *task = req->rq_task; - struct xdr_stream xdr; + __be32 *p; dprintk("RPC: %5u encoding PMAP_%s call (%u, %u, %d, %u)\n", task->tk_pid, task->tk_msg.rpc_proc->p_name, rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port); - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - - p = xdr_reserve_space(&xdr, RPCB_mappingargs_sz << 2); + p = xdr_reserve_space(xdr, RPCB_mappingargs_sz << 2); *p++ = cpu_to_be32(rpcb->r_prog); *p++ = cpu_to_be32(rpcb->r_vers); *p++ = cpu_to_be32(rpcb->r_prot); *p = cpu_to_be32(rpcb->r_port); - - return 0; } static int rpcb_dec_getport(struct rpc_rqst *req, __be32 *p, @@ -769,27 +765,24 @@ static void encode_rpcb_string(struct xdr_stream *xdr, const char *string, xdr_encode_opaque(p, string, len); } -static int rpcb_enc_getaddr(struct rpc_rqst *req, __be32 *p, - const struct rpcbind_args *rpcb) +static void rpcb_enc_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, + const struct rpcbind_args *rpcb) { struct rpc_task *task = req->rq_task; - struct xdr_stream xdr; + __be32 *p; dprintk("RPC: %5u encoding RPCB_%s call (%u, %u, '%s', '%s')\n", task->tk_pid, task->tk_msg.rpc_proc->p_name, rpcb->r_prog, rpcb->r_vers, rpcb->r_netid, rpcb->r_addr); - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - - p = xdr_reserve_space(&xdr, (RPCB_program_sz + RPCB_version_sz) << 2); + p = xdr_reserve_space(xdr, (RPCB_program_sz + RPCB_version_sz) << 2); *p++ = cpu_to_be32(rpcb->r_prog); *p = cpu_to_be32(rpcb->r_vers); - encode_rpcb_string(&xdr, rpcb->r_netid, RPCBIND_MAXNETIDLEN); - encode_rpcb_string(&xdr, rpcb->r_addr, RPCBIND_MAXUADDRLEN); - encode_rpcb_string(&xdr, rpcb->r_owner, RPCB_MAXOWNERLEN); - return 0; + encode_rpcb_string(xdr, rpcb->r_netid, RPCBIND_MAXNETIDLEN); + encode_rpcb_string(xdr, rpcb->r_addr, RPCBIND_MAXUADDRLEN); + encode_rpcb_string(xdr, rpcb->r_owner, RPCB_MAXOWNERLEN); } static int rpcb_dec_getaddr(struct rpc_rqst *req, __be32 *p, @@ -849,7 +842,7 @@ out_fail: static struct rpc_procinfo rpcb_procedures2[] = { [RPCBPROC_SET] = { .p_proc = RPCBPROC_SET, - .p_encode = (kxdrproc_t)rpcb_enc_mapping, + .p_encode = (kxdreproc_t)rpcb_enc_mapping, .p_decode = (kxdrproc_t)rpcb_dec_set, .p_arglen = RPCB_mappingargs_sz, .p_replen = RPCB_setres_sz, @@ -859,7 +852,7 @@ static struct rpc_procinfo rpcb_procedures2[] = { }, [RPCBPROC_UNSET] = { .p_proc = RPCBPROC_UNSET, - .p_encode = (kxdrproc_t)rpcb_enc_mapping, + .p_encode = (kxdreproc_t)rpcb_enc_mapping, .p_decode = (kxdrproc_t)rpcb_dec_set, .p_arglen = RPCB_mappingargs_sz, .p_replen = RPCB_setres_sz, @@ -869,7 +862,7 @@ static struct rpc_procinfo rpcb_procedures2[] = { }, [RPCBPROC_GETPORT] = { .p_proc = RPCBPROC_GETPORT, - .p_encode = (kxdrproc_t)rpcb_enc_mapping, + .p_encode = (kxdreproc_t)rpcb_enc_mapping, .p_decode = (kxdrproc_t)rpcb_dec_getport, .p_arglen = RPCB_mappingargs_sz, .p_replen = RPCB_getportres_sz, @@ -882,7 +875,7 @@ static struct rpc_procinfo rpcb_procedures2[] = { static struct rpc_procinfo rpcb_procedures3[] = { [RPCBPROC_SET] = { .p_proc = RPCBPROC_SET, - .p_encode = (kxdrproc_t)rpcb_enc_getaddr, + .p_encode = (kxdreproc_t)rpcb_enc_getaddr, .p_decode = (kxdrproc_t)rpcb_dec_set, .p_arglen = RPCB_getaddrargs_sz, .p_replen = RPCB_setres_sz, @@ -892,7 +885,7 @@ static struct rpc_procinfo rpcb_procedures3[] = { }, [RPCBPROC_UNSET] = { .p_proc = RPCBPROC_UNSET, - .p_encode = (kxdrproc_t)rpcb_enc_getaddr, + .p_encode = (kxdreproc_t)rpcb_enc_getaddr, .p_decode = (kxdrproc_t)rpcb_dec_set, .p_arglen = RPCB_getaddrargs_sz, .p_replen = RPCB_setres_sz, @@ -902,7 +895,7 @@ static struct rpc_procinfo rpcb_procedures3[] = { }, [RPCBPROC_GETADDR] = { .p_proc = RPCBPROC_GETADDR, - .p_encode = (kxdrproc_t)rpcb_enc_getaddr, + .p_encode = (kxdreproc_t)rpcb_enc_getaddr, .p_decode = (kxdrproc_t)rpcb_dec_getaddr, .p_arglen = RPCB_getaddrargs_sz, .p_replen = RPCB_getaddrres_sz, @@ -915,7 +908,7 @@ static struct rpc_procinfo rpcb_procedures3[] = { static struct rpc_procinfo rpcb_procedures4[] = { [RPCBPROC_SET] = { .p_proc = RPCBPROC_SET, - .p_encode = (kxdrproc_t)rpcb_enc_getaddr, + .p_encode = (kxdreproc_t)rpcb_enc_getaddr, .p_decode = (kxdrproc_t)rpcb_dec_set, .p_arglen = RPCB_getaddrargs_sz, .p_replen = RPCB_setres_sz, @@ -925,7 +918,7 @@ static struct rpc_procinfo rpcb_procedures4[] = { }, [RPCBPROC_UNSET] = { .p_proc = RPCBPROC_UNSET, - .p_encode = (kxdrproc_t)rpcb_enc_getaddr, + .p_encode = (kxdreproc_t)rpcb_enc_getaddr, .p_decode = (kxdrproc_t)rpcb_dec_set, .p_arglen = RPCB_getaddrargs_sz, .p_replen = RPCB_setres_sz, @@ -935,7 +928,7 @@ static struct rpc_procinfo rpcb_procedures4[] = { }, [RPCBPROC_GETADDR] = { .p_proc = RPCBPROC_GETADDR, - .p_encode = (kxdrproc_t)rpcb_enc_getaddr, + .p_encode = (kxdreproc_t)rpcb_enc_getaddr, .p_decode = (kxdrproc_t)rpcb_dec_getaddr, .p_arglen = RPCB_getaddrargs_sz, .p_replen = RPCB_getaddrres_sz, -- cgit v1.2.3 From bf2695516db982e90a22fc94f93491b481796bb1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 14 Dec 2010 14:59:29 +0000 Subject: SUNRPC: New xdr_streams XDR decoder API Now that all client-side XDR decoder routines use xdr_streams, there should be no need to support the legacy calling sequence [rpc_rqst *, __be32 *, RPC res *] anywhere. We can construct an xdr_stream in the generic RPC code, instead of in each decoder function. This is a refactoring change. It should not cause different behavior. Signed-off-by: Chuck Lever Tested-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- net/sunrpc/auth.c | 14 +++++++++++-- net/sunrpc/auth_gss/auth_gss.c | 13 ++++++++++-- net/sunrpc/clnt.c | 4 ++-- net/sunrpc/rpcb_clnt.c | 46 ++++++++++++++++++------------------------ 4 files changed, 45 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 651c9da703c..67e31276682 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -587,8 +587,18 @@ rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp, return 0; } +static int +rpcauth_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp, + __be32 *data, void *obj) +{ + struct xdr_stream xdr; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, data); + return decode(rqstp, &xdr, obj); +} + int -rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, +rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp, __be32 *data, void *obj) { struct rpc_cred *cred = task->tk_rqstp->rq_cred; @@ -599,7 +609,7 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, return cred->cr_ops->crunwrap_resp(task, decode, rqstp, data, obj); /* By default, we decode the arguments normally. */ - return decode(rqstp, data, obj); + return rpcauth_unwrap_req_decode(decode, rqstp, data, obj); } int diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 42b46f9a670..45dbf1521b9 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1503,10 +1503,19 @@ gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, return 0; } +static int +gss_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp, + __be32 *p, void *obj) +{ + struct xdr_stream xdr; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + return decode(rqstp, &xdr, obj); +} static int gss_unwrap_resp(struct rpc_task *task, - kxdrproc_t decode, void *rqstp, __be32 *p, void *obj) + kxdrdproc_t decode, void *rqstp, __be32 *p, void *obj) { struct rpc_cred *cred = task->tk_rqstp->rq_cred; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, @@ -1537,7 +1546,7 @@ gss_unwrap_resp(struct rpc_task *task, cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + (p - savedp) + (savedlen - head->iov_len); out_decode: - status = decode(rqstp, p, obj); + status = gss_unwrap_req_decode(decode, rqstp, p, obj); out: gss_put_ctx(ctx); dprintk("RPC: %5u gss_unwrap_resp returning %d\n", task->tk_pid, diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index d446a32be66..4e8210dcda7 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1535,7 +1535,7 @@ call_decode(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; struct rpc_rqst *req = task->tk_rqstp; - kxdrproc_t decode = task->tk_msg.rpc_proc->p_decode; + kxdrdproc_t decode = task->tk_msg.rpc_proc->p_decode; __be32 *p; dprintk("RPC: %5u call_decode (status %d)\n", @@ -1780,7 +1780,7 @@ static void rpcproc_encode_null(void *rqstp, struct xdr_stream *xdr, void *obj) { } -static int rpcproc_decode_null(void *rqstp, __be32 *data, void *obj) +static int rpcproc_decode_null(void *rqstp, struct xdr_stream *xdr, void *obj) { return 0; } diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 63912a1a298..c652e4cc9fe 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -706,18 +706,16 @@ static void rpcb_enc_mapping(struct rpc_rqst *req, struct xdr_stream *xdr, *p = cpu_to_be32(rpcb->r_port); } -static int rpcb_dec_getport(struct rpc_rqst *req, __be32 *p, +static int rpcb_dec_getport(struct rpc_rqst *req, struct xdr_stream *xdr, struct rpcbind_args *rpcb) { struct rpc_task *task = req->rq_task; - struct xdr_stream xdr; unsigned long port; - - xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + __be32 *p; rpcb->r_port = 0; - p = xdr_inline_decode(&xdr, 4); + p = xdr_inline_decode(xdr, 4); if (unlikely(p == NULL)) return -EIO; @@ -731,20 +729,18 @@ static int rpcb_dec_getport(struct rpc_rqst *req, __be32 *p, return 0; } -static int rpcb_dec_set(struct rpc_rqst *req, __be32 *p, +static int rpcb_dec_set(struct rpc_rqst *req, struct xdr_stream *xdr, unsigned int *boolp) { struct rpc_task *task = req->rq_task; - struct xdr_stream xdr; - - xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + __be32 *p; - p = xdr_inline_decode(&xdr, 4); + p = xdr_inline_decode(xdr, 4); if (unlikely(p == NULL)) return -EIO; *boolp = 0; - if (*p) + if (*p != xdr_zero) *boolp = 1; dprintk("RPC: %5u RPCB_%s call %s\n", @@ -785,20 +781,18 @@ static void rpcb_enc_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, encode_rpcb_string(xdr, rpcb->r_owner, RPCB_MAXOWNERLEN); } -static int rpcb_dec_getaddr(struct rpc_rqst *req, __be32 *p, +static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, struct rpcbind_args *rpcb) { struct sockaddr_storage address; struct sockaddr *sap = (struct sockaddr *)&address; struct rpc_task *task = req->rq_task; - struct xdr_stream xdr; + __be32 *p; u32 len; rpcb->r_port = 0; - xdr_init_decode(&xdr, &req->rq_rcv_buf, p); - - p = xdr_inline_decode(&xdr, 4); + p = xdr_inline_decode(xdr, 4); if (unlikely(p == NULL)) goto out_fail; len = be32_to_cpup(p); @@ -816,7 +810,7 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, __be32 *p, if (unlikely(len > RPCBIND_MAXUADDRLEN)) goto out_fail; - p = xdr_inline_decode(&xdr, len); + p = xdr_inline_decode(xdr, len); if (unlikely(p == NULL)) goto out_fail; dprintk("RPC: %5u RPCB_%s reply: %s\n", task->tk_pid, @@ -843,7 +837,7 @@ static struct rpc_procinfo rpcb_procedures2[] = { [RPCBPROC_SET] = { .p_proc = RPCBPROC_SET, .p_encode = (kxdreproc_t)rpcb_enc_mapping, - .p_decode = (kxdrproc_t)rpcb_dec_set, + .p_decode = (kxdrdproc_t)rpcb_dec_set, .p_arglen = RPCB_mappingargs_sz, .p_replen = RPCB_setres_sz, .p_statidx = RPCBPROC_SET, @@ -853,7 +847,7 @@ static struct rpc_procinfo rpcb_procedures2[] = { [RPCBPROC_UNSET] = { .p_proc = RPCBPROC_UNSET, .p_encode = (kxdreproc_t)rpcb_enc_mapping, - .p_decode = (kxdrproc_t)rpcb_dec_set, + .p_decode = (kxdrdproc_t)rpcb_dec_set, .p_arglen = RPCB_mappingargs_sz, .p_replen = RPCB_setres_sz, .p_statidx = RPCBPROC_UNSET, @@ -863,7 +857,7 @@ static struct rpc_procinfo rpcb_procedures2[] = { [RPCBPROC_GETPORT] = { .p_proc = RPCBPROC_GETPORT, .p_encode = (kxdreproc_t)rpcb_enc_mapping, - .p_decode = (kxdrproc_t)rpcb_dec_getport, + .p_decode = (kxdrdproc_t)rpcb_dec_getport, .p_arglen = RPCB_mappingargs_sz, .p_replen = RPCB_getportres_sz, .p_statidx = RPCBPROC_GETPORT, @@ -876,7 +870,7 @@ static struct rpc_procinfo rpcb_procedures3[] = { [RPCBPROC_SET] = { .p_proc = RPCBPROC_SET, .p_encode = (kxdreproc_t)rpcb_enc_getaddr, - .p_decode = (kxdrproc_t)rpcb_dec_set, + .p_decode = (kxdrdproc_t)rpcb_dec_set, .p_arglen = RPCB_getaddrargs_sz, .p_replen = RPCB_setres_sz, .p_statidx = RPCBPROC_SET, @@ -886,7 +880,7 @@ static struct rpc_procinfo rpcb_procedures3[] = { [RPCBPROC_UNSET] = { .p_proc = RPCBPROC_UNSET, .p_encode = (kxdreproc_t)rpcb_enc_getaddr, - .p_decode = (kxdrproc_t)rpcb_dec_set, + .p_decode = (kxdrdproc_t)rpcb_dec_set, .p_arglen = RPCB_getaddrargs_sz, .p_replen = RPCB_setres_sz, .p_statidx = RPCBPROC_UNSET, @@ -896,7 +890,7 @@ static struct rpc_procinfo rpcb_procedures3[] = { [RPCBPROC_GETADDR] = { .p_proc = RPCBPROC_GETADDR, .p_encode = (kxdreproc_t)rpcb_enc_getaddr, - .p_decode = (kxdrproc_t)rpcb_dec_getaddr, + .p_decode = (kxdrdproc_t)rpcb_dec_getaddr, .p_arglen = RPCB_getaddrargs_sz, .p_replen = RPCB_getaddrres_sz, .p_statidx = RPCBPROC_GETADDR, @@ -909,7 +903,7 @@ static struct rpc_procinfo rpcb_procedures4[] = { [RPCBPROC_SET] = { .p_proc = RPCBPROC_SET, .p_encode = (kxdreproc_t)rpcb_enc_getaddr, - .p_decode = (kxdrproc_t)rpcb_dec_set, + .p_decode = (kxdrdproc_t)rpcb_dec_set, .p_arglen = RPCB_getaddrargs_sz, .p_replen = RPCB_setres_sz, .p_statidx = RPCBPROC_SET, @@ -919,7 +913,7 @@ static struct rpc_procinfo rpcb_procedures4[] = { [RPCBPROC_UNSET] = { .p_proc = RPCBPROC_UNSET, .p_encode = (kxdreproc_t)rpcb_enc_getaddr, - .p_decode = (kxdrproc_t)rpcb_dec_set, + .p_decode = (kxdrdproc_t)rpcb_dec_set, .p_arglen = RPCB_getaddrargs_sz, .p_replen = RPCB_setres_sz, .p_statidx = RPCBPROC_UNSET, @@ -929,7 +923,7 @@ static struct rpc_procinfo rpcb_procedures4[] = { [RPCBPROC_GETADDR] = { .p_proc = RPCBPROC_GETADDR, .p_encode = (kxdreproc_t)rpcb_enc_getaddr, - .p_decode = (kxdrproc_t)rpcb_dec_getaddr, + .p_decode = (kxdrdproc_t)rpcb_dec_getaddr, .p_arglen = RPCB_getaddrargs_sz, .p_replen = RPCB_getaddrres_sz, .p_statidx = RPCBPROC_GETADDR, -- cgit v1.2.3 From 7c96aef75949a56ec427fc6a2522dace2af33605 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 15 Nov 2010 11:27:01 +1100 Subject: sunrpc: remove xpt_pool The xpt_pool field is only used for reporting BUGs. And it isn't used correctly. In particular, when it is cleared in svc_xprt_received before XPT_BUSY is cleared, there is no guarantee that either the compiler or the CPU might not re-order to two assignments, just setting xpt_pool to NULL after XPT_BUSY is cleared. If a different cpu were running svc_xprt_enqueue at this moment, it might see XPT_BUSY clear and then xpt_pool non-NULL, and so BUG. This could be fixed by calling smp_mb__before_clear_bit() before the clear_bit. However as xpt_pool isn't really used, it seems safest to simply remove xpt_pool. Another alternate would be to change the clear_bit to clear_bit_unlock, and the test_and_set_bit to test_and_set_bit_lock. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 5a75d23645c..5eae53b1e30 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -351,8 +351,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) dprintk("svc: transport %p busy, not enqueued\n", xprt); goto out_unlock; } - BUG_ON(xprt->xpt_pool != NULL); - xprt->xpt_pool = pool; if (!list_empty(&pool->sp_threads)) { rqstp = list_entry(pool->sp_threads.next, @@ -370,13 +368,11 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) rqstp->rq_reserved = serv->sv_max_mesg; atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); pool->sp_stats.threads_woken++; - BUG_ON(xprt->xpt_pool != pool); wake_up(&rqstp->rq_wait); } else { dprintk("svc: transport %p put into queue\n", xprt); list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); pool->sp_stats.sockets_queued++; - BUG_ON(xprt->xpt_pool != pool); } out_unlock: @@ -415,7 +411,6 @@ static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool) void svc_xprt_received(struct svc_xprt *xprt) { BUG_ON(!test_bit(XPT_BUSY, &xprt->xpt_flags)); - xprt->xpt_pool = NULL; /* As soon as we clear busy, the xprt could be closed and * 'put', so we need a reference to call svc_xprt_enqueue with: */ -- cgit v1.2.3 From 3942302ea9e1dffa933021b20bf1642046e7641b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 15 Nov 2010 11:27:01 +1100 Subject: sunrpc: svc_sock_names should hold ref to socket being closed. Currently svc_sock_names calls svc_close_xprt on a svc_sock to which it does not own a reference. As soon as svc_close_xprt sets XPT_CLOSE, the socket could be freed by a separate thread (though this is a very unlikely race). It is safer to hold a reference while calling svc_close_xprt. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 07919e16be3..52bd1130e83 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -324,19 +324,21 @@ int svc_sock_names(struct svc_serv *serv, char *buf, const size_t buflen, len = onelen; break; } - if (toclose && strcmp(toclose, buf + len) == 0) + if (toclose && strcmp(toclose, buf + len) == 0) { closesk = svsk; - else + svc_xprt_get(&closesk->sk_xprt); + } else len += onelen; } spin_unlock_bh(&serv->sv_lock); - if (closesk) + if (closesk) { /* Should unregister with portmap, but you cannot * unregister just one protocol... */ svc_close_xprt(&closesk->sk_xprt); - else if (toclose) + svc_xprt_put(&closesk->sk_xprt); + } else if (toclose) return -ENOENT; return len; } -- cgit v1.2.3 From 66c941f4aa8aef397a757001af61073db23b39e5 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Wed, 15 Dec 2010 14:47:20 +0800 Subject: net: sunrpc: kill unused macros These macros never be used for several years. Signed-off-by: Shan Wei Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/svcauth_gss.c | 2 -- net/sunrpc/svcauth.c | 1 - net/sunrpc/svcauth_unix.c | 2 -- 3 files changed, 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index dec2a6fc7c1..bcdae78fdfc 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -67,7 +67,6 @@ static int netobj_equal(struct xdr_netobj *a, struct xdr_netobj *b) #define RSI_HASHBITS 6 #define RSI_HASHMAX (1< Date: Sun, 14 Nov 2010 18:08:11 -0800 Subject: net/sunrpc/auth_gss/gss_krb5_crypto.c: Use normal negative error value return And remove unnecessary double semicolon too. No effect to code, as test is != 0. Signed-off-by: Joe Perches Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/gss_krb5_crypto.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 75ee993ea05..9576f35ab70 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -137,7 +137,7 @@ arcfour_hmac_md5_usage_to_salt(unsigned int usage, u8 salt[4]) ms_usage = 13; break; default: - return EINVAL;; + return -EINVAL; } salt[0] = (ms_usage >> 0) & 0xff; salt[1] = (ms_usage >> 8) & 0xff; -- cgit v1.2.3 From b3bcedadf23264c3b7afcbfbfe1965a17ef1352c Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 21 Dec 2010 10:52:24 -0500 Subject: net/sunrpc/clnt.c: Convert sprintf_symbol to %ps Signed-off-by: Joe Perches Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 4e8210dcda7..57d344cf225 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1829,23 +1829,15 @@ static void rpc_show_task(const struct rpc_clnt *clnt, const struct rpc_task *task) { const char *rpc_waitq = "none"; - char *p, action[KSYM_SYMBOL_LEN]; if (RPC_IS_QUEUED(task)) rpc_waitq = rpc_qname(task->tk_waitqueue); - /* map tk_action pointer to a function name; then trim off - * the "+0x0 [sunrpc]" */ - sprint_symbol(action, (unsigned long)task->tk_action); - p = strchr(action, '+'); - if (p) - *p = '\0'; - - printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%s q:%s\n", + printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%ps q:%s\n", task->tk_pid, task->tk_flags, task->tk_status, clnt, task->tk_rqstp, task->tk_timeout, task->tk_ops, clnt->cl_protname, clnt->cl_vers, rpc_proc_name(task), - action, rpc_waitq); + task->tk_action, rpc_waitq); } void rpc_show_tasks(void) -- cgit v1.2.3 From beb0f0a9fba1fa98b378329a9a5b0a73f25097ae Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Dec 2010 21:19:26 +0000 Subject: kernel panic when mount NFSv4 On Tue, 2010-12-14 at 16:58 +0800, Mi Jinlong wrote: > Hi, > > When testing NFSv4 at RHEL6 with kernel 2.6.32, I got a kernel panic > at NFS client's __rpc_create_common function. > > The panic place is: > rpc_mkpipe > __rpc_lookup_create() <=== find pipefile *idmap* > __rpc_mkpipe() <=== pipefile is *idmap* > __rpc_create_common() > ****** BUG_ON(!d_unhashed(dentry)); ****** *panic* > > It means that the dentry's d_flags have be set DCACHE_UNHASHED, > but it should not be set here. > > Is someone known this bug? or give me some idea? > > A reproduce program is append, but it can't reproduce the bug every time. > the export is: "/nfsroot *(rw,no_root_squash,fsid=0,insecure)" > > And the panic message is append. > > ============================================================================ > #!/bin/sh > > LOOPTOTAL=768 > LOOPCOUNT=0 > ret=0 > > while [ $LOOPCOUNT -ne $LOOPTOTAL ] > do > ((LOOPCOUNT += 1)) > service nfs restart > /usr/sbin/rpc.idmapd > mount -t nfs4 127.0.0.1:/ /mnt|| return 1; > ls -l /var/lib/nfs/rpc_pipefs/nfs/*/ > umount /mnt > echo $LOOPCOUNT > done > > =============================================================================== > Code: af 60 01 00 00 89 fa 89 f0 e8 64 cf 89 f0 e8 5c 7c 64 cf 31 c0 8b 5c 24 10 8b > 74 24 14 8b 7c 24 18 8b 6c 24 1c 83 c4 20 c3 <0f> 0b eb fc 8b 46 28 c7 44 24 08 20 > de ee f0 c7 44 24 04 56 ea > EIP:[] __rpc_create_common+0x8a/0xc0 [sunrpc] SS:ESP 0068:eccb5d28 > ---[ end trace 8f5606cd08928ed2]--- > Kernel panic - not syncing: Fatal exception > Pid:7131, comm: mount.nfs4 Tainted: G D -------------------2.6.32 #1 > Call Trace: > [] ? panic+0x42/0xed > [] ? oops_end+0xbc/0xd0 > [] ? do_invalid_op+0x0/0x90 > [] ? do_invalid_op+0x7f/0x90 > [] ? __rpc_create_common+0x8a/0xc0[sunrpc] > [] ? rpc_free_task+0x33/0x70[sunrpc] > [] ? prc_call_sync+0x48/0x60[sunrpc] > [] ? rpc_ping+0x4e/0x60[sunrpc] > [] ? rpc_create+0x38f/0x4f0[sunrpc] > [] ? error_code+0x73/0x78 > [] ? __rpc_create_common+0x8a/0xc0[sunrpc] > [] ? d_lookup+0x2a/0x40 > [] ? rpc_mkpipe+0x111/0x1b0[sunrpc] > [] ? nfs_create_rpc_client+0xb4/0xf0[nfs] > [] ? nfs_fscache_get_client_cookie+0x1d/0x50[nfs] > [] ? nfs_idmap_new+0x7b/0x140[nfs] > [] ? strlcpy+0x3a/0x60 > [] ? nfs4_set_client+0xea/0x2b0[nfs] > [] ? nfs4_create_server+0xac/0x1b0[nfs] > [] ? krealloc+0x40/0x50 > [] ? nfs4_remote_get_sb+0x6b/0x250[nfs] > [] ? kstrdup+0x3c/0x60 > [] ? vfs_kern_mount+0x69/0x170 > [] ? nfs_do_root_mount+0x6c/0xa0[nfs] > [] ? nfs4_try_mount+0x37/0xa0[nfs] > [] ? nfs4_validate_text_mount_data+-x7d/0xf0[nfs] > [] ? nfs4_get_sb+0x92/0x2f0 > [] ? vfs_kern_mount+0x69/0x170 > [] ? get_fs_type+0x32/0xb0 > [] ? do_kern_mount+0x3f/0xe0 > [] ? do_mount+0x2ef/0x740 > [] ? copy_mount_options+0xb0/0x120 > [] ? sys_mount+0x6e/0xa0 Hi, Does the following patch fix the problem? Cheers Trond -------------------------- SUNRPC: Fix a BUG in __rpc_create_common From: Trond Myklebust Mi Jinlong reports: When testing NFSv4 at RHEL6 with kernel 2.6.32, I got a kernel panic at NFS client's __rpc_create_common function. The panic place is: rpc_mkpipe __rpc_lookup_create() <=== find pipefile *idmap* __rpc_mkpipe() <=== pipefile is *idmap* __rpc_create_common() ****** BUG_ON(!d_unhashed(dentry)); ****** *panic* The test is wrong: we can find ourselves with a hashed negative dentry here if the idmapper tried to look up the file before we got round to creating it. Just replace the BUG_ON() with a d_drop(dentry). Reported-by: Mi Jinlong Signed-off-by: Trond Myklebust --- net/sunrpc/rpc_pipe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 10a17a37ec4..5356d95343f 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -466,7 +466,7 @@ static int __rpc_create_common(struct inode *dir, struct dentry *dentry, { struct inode *inode; - BUG_ON(!d_unhashed(dentry)); + d_drop(dentry); inode = rpc_get_inode(dir->i_sb, mode); if (!inode) goto out_err; -- cgit v1.2.3 From 31f7aa65f536995c6d933c57230919ae408952a5 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 24 Dec 2010 14:03:38 -0500 Subject: svcrpc: modifying valid sunrpc cache entries is racy Once a sunrpc cache entry is VALID, we should be replacing it (and allowing any concurrent users to destroy it on last put) instead of trying to update it in place. Otherwise someone referencing the ip_map we're modifying here could try to use the m_client just as we're putting the last reference. The bug should only be seen by users of the legacy nfsd interfaces. (Thanks to Neil for suggestion to use sunrpc_invalidate.) Reviewed-by: NeilBrown Signed-off-by: J. Bruce Fields --- net/sunrpc/svcauth_unix.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index a04ac9193d5..59a7c524a8b 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -401,8 +401,7 @@ struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr) return NULL; if ((ipm->m_client->addr_changes - ipm->m_add_change) >0) { - if (test_and_set_bit(CACHE_NEGATIVE, &ipm->h.flags) == 0) - auth_domain_put(&ipm->m_client->h); + sunrpc_invalidate(&ipm->h, sn->ip_map_cache); rv = NULL; } else { rv = &ipm->m_client->h; -- cgit v1.2.3 From bdd5f05d91e8ae68075b812ce244c918d3d752cd Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 4 Jan 2011 13:31:45 -0500 Subject: SUNRPC: Remove more code when NFSD_DEPRECATED is not configured Signed-off-by: NeilBrown [bfields@redhat.com: moved svcauth_unix_purge outside ifdef's.] Signed-off-by: J. Bruce Fields --- net/sunrpc/svcauth_unix.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'net') diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 59a7c524a8b..30916b06c12 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -30,7 +30,9 @@ struct unix_domain { struct auth_domain h; +#ifdef CONFIG_NFSD_DEPRECATED int addr_changes; +#endif /* CONFIG_NFSD_DEPRECATED */ /* other stuff later */ }; @@ -64,7 +66,9 @@ struct auth_domain *unix_domain_find(char *name) return NULL; } new->h.flavour = &svcauth_unix; +#ifdef CONFIG_NFSD_DEPRECATED new->addr_changes = 0; +#endif /* CONFIG_NFSD_DEPRECATED */ rv = auth_domain_lookup(name, &new->h); } } @@ -91,7 +95,9 @@ struct ip_map { char m_class[8]; /* e.g. "nfsd" */ struct in6_addr m_addr; struct unix_domain *m_client; +#ifdef CONFIG_NFSD_DEPRECATED int m_add_change; +#endif /* CONFIG_NFSD_DEPRECATED */ }; static void ip_map_put(struct kref *kref) @@ -145,7 +151,9 @@ static void update(struct cache_head *cnew, struct cache_head *citem) kref_get(&item->m_client->h.ref); new->m_client = item->m_client; +#ifdef CONFIG_NFSD_DEPRECATED new->m_add_change = item->m_add_change; +#endif /* CONFIG_NFSD_DEPRECATED */ } static struct cache_head *ip_map_alloc(void) { @@ -330,6 +338,7 @@ static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, ip.h.flags = 0; if (!udom) set_bit(CACHE_NEGATIVE, &ip.h.flags); +#ifdef CONFIG_NFSD_DEPRECATED else { ip.m_add_change = udom->addr_changes; /* if this is from the legacy set_client system call, @@ -338,6 +347,7 @@ static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, if (expiry == NEVER) ip.m_add_change++; } +#endif /* CONFIG_NFSD_DEPRECATED */ ip.h.expiry_time = expiry; ch = sunrpc_cache_update(cd, &ip.h, &ipm->h, hash_str(ipm->m_class, IP_HASHBITS) ^ @@ -357,6 +367,7 @@ static inline int ip_map_update(struct net *net, struct ip_map *ipm, return __ip_map_update(sn->ip_map_cache, ipm, udom, expiry); } +#ifdef CONFIG_NFSD_DEPRECATED int auth_unix_add_addr(struct net *net, struct in6_addr *addr, struct auth_domain *dom) { struct unix_domain *udom; @@ -411,6 +422,7 @@ struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr) return rv; } EXPORT_SYMBOL_GPL(auth_unix_lookup); +#endif /* CONFIG_NFSD_DEPRECATED */ void svcauth_unix_purge(void) { -- cgit v1.2.3 From d76d1815f3e72fb627ad7f95ef63120b0a557c9c Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 2 Jan 2011 21:28:34 -0500 Subject: svcrpc: avoid double reply caused by deferral race Commit d29068c431599fa "sunrpc: Simplify cache_defer_req and related functions." asserted that cache_check() could determine success or failure of cache_defer_req() by checking the CACHE_PENDING bit. This isn't quite right. We need to know whether cache_defer_req() created a deferred request, in which case sending an rpc reply has become the responsibility of the deferred request, and it is important that we not send our own reply, resulting in two different replies to the same request. And the CACHE_PENDING bit doesn't tell us that; we could have succesfully created a deferred request at the same time as another thread cleared the CACHE_PENDING bit. So, partially revert that commit, to ensure that cache_check() returns -EAGAIN if and only if a deferred request has been created. Signed-off-by: J. Bruce Fields Acked-by: NeilBrown --- net/sunrpc/cache.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index e433e7580e2..0d6002f26d8 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -37,7 +37,7 @@ #define RPCDBG_FACILITY RPCDBG_CACHE -static void cache_defer_req(struct cache_req *req, struct cache_head *item); +static bool cache_defer_req(struct cache_req *req, struct cache_head *item); static void cache_revisit_request(struct cache_head *item); static void cache_init(struct cache_head *h) @@ -268,9 +268,11 @@ int cache_check(struct cache_detail *detail, } if (rv == -EAGAIN) { - cache_defer_req(rqstp, h); - if (!test_bit(CACHE_PENDING, &h->flags)) { - /* Request is not deferred */ + if (!cache_defer_req(rqstp, h)) { + /* + * Request was not deferred; handle it as best + * we can ourselves: + */ rv = cache_is_valid(detail, h); if (rv == -EAGAIN) rv = -ETIMEDOUT; @@ -618,18 +620,19 @@ static void cache_limit_defers(void) discard->revisit(discard, 1); } -static void cache_defer_req(struct cache_req *req, struct cache_head *item) +/* Return true if and only if a deferred request is queued. */ +static bool cache_defer_req(struct cache_req *req, struct cache_head *item) { struct cache_deferred_req *dreq; if (req->thread_wait) { cache_wait_req(req, item); if (!test_bit(CACHE_PENDING, &item->flags)) - return; + return false; } dreq = req->defer(req); if (dreq == NULL) - return; + return false; setup_deferral(dreq, item, 1); if (!test_bit(CACHE_PENDING, &item->flags)) /* Bit could have been cleared before we managed to @@ -638,6 +641,7 @@ static void cache_defer_req(struct cache_req *req, struct cache_head *item) cache_revisit_request(item); cache_limit_defers(); + return true; } static void cache_revisit_request(struct cache_head *item) -- cgit v1.2.3 From 9e701c610923aaeac8b38b9202a686d1cc9ee35d Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 2 Jan 2011 21:56:36 -0500 Subject: svcrpc: simpler request dropping Currently we use -EAGAIN returns to determine when to drop a deferred request. On its own, that is error-prone, as it makes us treat -EAGAIN returns from other functions specially to prevent inadvertent dropping. So, use a flag on the request instead. Returning an error on request deferral is still required, to prevent further processing, but we no longer need worry that an error return on its own could result in a drop. Signed-off-by: J. Bruce Fields --- net/sunrpc/svc.c | 3 ++- net/sunrpc/svc_xprt.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 6359c42c494..df1931f8ae9 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1005,6 +1005,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) rqstp->rq_splice_ok = 1; /* Will be turned off only when NFSv4 Sessions are used */ rqstp->rq_usedeferral = 1; + rqstp->rq_dropme = false; /* Setup reply header */ rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp); @@ -1106,7 +1107,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) *statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); /* Encode reply */ - if (*statp == rpc_drop_reply) { + if (rqstp->rq_dropme) { if (procp->pc_release) procp->pc_release(rqstp, NULL, rqstp->rq_resp); goto dropit; diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 5eae53b1e30..173f3b975d4 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1019,6 +1019,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req) } svc_xprt_get(rqstp->rq_xprt); dr->xprt = rqstp->rq_xprt; + rqstp->rq_dropme = true; dr->handle.revisit = svc_revisit; return &dr->handle; -- cgit v1.2.3 From 6bab93f87ec703bf6650875881b11f9f27d7da56 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 3 Jan 2011 15:10:27 -0500 Subject: svcrpc: take lock on turning entry NEGATIVE in cache_check We attempt to turn a cache entry negative in place. But that entry may already have been filled in by some other task since we last checked whether it was valid, so we could be modifying an already-valid entry. If nothing else there's a likely leak in such a case when the entry is eventually put() and contents are not freed because it has CACHE_NEGATIVE set. So, take the cache_lock just as sunrpc_cache_update() does. Reviewed-by: NeilBrown Signed-off-by: J. Bruce Fields --- net/sunrpc/cache.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 0d6002f26d8..a6c57334fa1 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -213,6 +213,23 @@ static inline int cache_is_valid(struct cache_detail *detail, struct cache_head } } +static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h) +{ + int rv; + + write_lock(&detail->hash_lock); + rv = cache_is_valid(detail, h); + if (rv != -EAGAIN) { + write_unlock(&detail->hash_lock); + return rv; + } + set_bit(CACHE_NEGATIVE, &h->flags); + cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY); + write_unlock(&detail->hash_lock); + cache_fresh_unlocked(h, detail); + return -ENOENT; +} + /* * This is the generic cache management routine for all * the authentication caches. @@ -251,14 +268,8 @@ int cache_check(struct cache_detail *detail, case -EINVAL: clear_bit(CACHE_PENDING, &h->flags); cache_revisit_request(h); - if (rv == -EAGAIN) { - set_bit(CACHE_NEGATIVE, &h->flags); - cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY); - cache_fresh_unlocked(h, detail); - rv = -ENOENT; - } + rv = try_to_negate_entry(detail, h); break; - case -EAGAIN: clear_bit(CACHE_PENDING, &h->flags); cache_revisit_request(h); -- cgit v1.2.3 From fdef7aa5d4020fd94ffcbf0078d6bd9e5a111e19 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 4 Jan 2011 14:12:47 -0500 Subject: svcrpc: ensure cache_check caller sees updated entry Supposes cache_check runs simultaneously with an update on a different CPU: cache_check task doing update ^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^ 1. test for CACHE_VALID 1'. set entry->data & !CACHE_NEGATIVE 2. use entry->data 2'. set CACHE_VALID If the two memory writes performed in step 1' and 2' appear misordered with respect to the reads in step 1 and 2, then the caller could get stale data at step 2 even though it saw CACHE_VALID set on the cache entry. Add memory barriers to prevent this. Reviewed-by: NeilBrown Signed-off-by: J. Bruce Fields --- net/sunrpc/cache.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index a6c57334fa1..72ad836e4fe 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -128,6 +128,7 @@ static void cache_fresh_locked(struct cache_head *head, time_t expiry) { head->expiry_time = expiry; head->last_refresh = seconds_since_boot(); + smp_wmb(); /* paired with smp_rmb() in cache_is_valid() */ set_bit(CACHE_VALID, &head->flags); } @@ -208,8 +209,16 @@ static inline int cache_is_valid(struct cache_detail *detail, struct cache_head /* entry is valid */ if (test_bit(CACHE_NEGATIVE, &h->flags)) return -ENOENT; - else + else { + /* + * In combination with write barrier in + * sunrpc_cache_update, ensures that anyone + * using the cache entry after this sees the + * updated contents: + */ + smp_rmb(); return 0; + } } } -- cgit v1.2.3 From 4b5b3ba16be1b195d2e1161746637acd4b9fed4f Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Thu, 6 Jan 2011 02:04:24 +0000 Subject: SUNRPC move svc_drop to caller of svc_process_common The NFSv4.1 shared back channel does not need to call svc_drop because the callback service never outlives the single connection it services, and it reuses it's buffers and keeps the trasport. Signed-off-by: Andy Adamson Acked-by: Bruce Fields Signed-off-by: Trond Myklebust --- net/sunrpc/svc.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 6359c42c494..606d182895a 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1147,7 +1147,6 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) dropit: svc_authorise(rqstp); /* doesn't hurt to call this twice */ dprintk("svc: svc_process dropit\n"); - svc_drop(rqstp); return 0; err_short_len: @@ -1218,7 +1217,6 @@ svc_process(struct svc_rqst *rqstp) struct kvec *resv = &rqstp->rq_res.head[0]; struct svc_serv *serv = rqstp->rq_server; u32 dir; - int error; /* * Setup response xdr_buf. @@ -1246,11 +1244,13 @@ svc_process(struct svc_rqst *rqstp) return 0; } - error = svc_process_common(rqstp, argv, resv); - if (error <= 0) - return error; - - return svc_send(rqstp); + /* Returns 1 for send, 0 for drop */ + if (svc_process_common(rqstp, argv, resv)) + return svc_send(rqstp); + else { + svc_drop(rqstp); + return 0; + } } #if defined(CONFIG_NFS_V4_1) @@ -1264,7 +1264,6 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, { struct kvec *argv = &rqstp->rq_arg.head[0]; struct kvec *resv = &rqstp->rq_res.head[0]; - int error; /* Build the svc_rqst used by the common processing routine */ rqstp->rq_xprt = serv->bc_xprt; @@ -1292,12 +1291,15 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, svc_getu32(argv); /* XID */ svc_getnl(argv); /* CALLDIR */ - error = svc_process_common(rqstp, argv, resv); - if (error <= 0) - return error; - - memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf)); - return bc_send(req); + /* Returns 1 for send, 0 for drop */ + if (svc_process_common(rqstp, argv, resv)) { + memcpy(&req->rq_snd_buf, &rqstp->rq_res, + sizeof(req->rq_snd_buf)); + return bc_send(req); + } else { + /* Nothing to do to drop request */ + return 0; + } } EXPORT_SYMBOL(bc_svc_process); #endif /* CONFIG_NFS_V4_1 */ -- cgit v1.2.3 From 71e161a6a9fa021a280e564254fcda894e6fbd14 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Thu, 6 Jan 2011 02:04:25 +0000 Subject: SUNRPC fix bc_send print Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- net/sunrpc/bc_svc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/bc_svc.c b/net/sunrpc/bc_svc.c index 7dcfe0cc350..1dd1a689000 100644 --- a/net/sunrpc/bc_svc.c +++ b/net/sunrpc/bc_svc.c @@ -59,8 +59,8 @@ int bc_send(struct rpc_rqst *req) ret = task->tk_status; rpc_put_task(task); } - return ret; dprintk("RPC: bc_send ret= %d\n", ret); + return ret; } #endif /* CONFIG_NFS_V4_1 */ -- cgit v1.2.3 From 1f11a034cdc4b45ee56d51b87a9e37cb776fb15b Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Thu, 6 Jan 2011 02:04:26 +0000 Subject: SUNRPC new transport for the NFSv4.1 shared back channel Move the current sock create and destroy routines into the new transport ops. Back channel socket will be destroyed by the svc_closs_all call in svc_destroy. Added check: only TCP supported on shared back channel. Signed-off-by: Andy Adamson Acked-by: Bruce Fields Signed-off-by: Trond Myklebust --- net/sunrpc/svc.c | 4 --- net/sunrpc/svcsock.c | 82 +++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 62 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 606d182895a..261e2d1dff1 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -488,10 +488,6 @@ svc_destroy(struct svc_serv *serv) if (svc_serv_is_pooled(serv)) svc_pool_map_put(); -#if defined(CONFIG_NFS_V4_1) - svc_sock_destroy(serv->bc_xprt); -#endif /* CONFIG_NFS_V4_1 */ - svc_unregister(serv); kfree(serv->sv_pools); kfree(serv); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 07919e16be3..6630f2922f4 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -66,6 +66,13 @@ static void svc_sock_free(struct svc_xprt *); static struct svc_xprt *svc_create_socket(struct svc_serv *, int, struct net *, struct sockaddr *, int, int); +#if defined(CONFIG_NFS_V4_1) +static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int, + struct net *, struct sockaddr *, + int, int); +static void svc_bc_sock_free(struct svc_xprt *xprt); +#endif /* CONFIG_NFS_V4_1 */ + #ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key svc_key[2]; static struct lock_class_key svc_slock_key[2]; @@ -1184,6 +1191,39 @@ static struct svc_xprt *svc_tcp_create(struct svc_serv *serv, return svc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags); } +#if defined(CONFIG_NFS_V4_1) +static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int, + struct net *, struct sockaddr *, + int, int); +static void svc_bc_sock_free(struct svc_xprt *xprt); + +static struct svc_xprt *svc_bc_tcp_create(struct svc_serv *serv, + struct net *net, + struct sockaddr *sa, int salen, + int flags) +{ + return svc_bc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags); +} + +static void svc_bc_tcp_sock_detach(struct svc_xprt *xprt) +{ +} + +static struct svc_xprt_ops svc_tcp_bc_ops = { + .xpo_create = svc_bc_tcp_create, + .xpo_detach = svc_bc_tcp_sock_detach, + .xpo_free = svc_bc_sock_free, + .xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr, +}; + +static struct svc_xprt_class svc_tcp_bc_class = { + .xcl_name = "tcp-bc", + .xcl_owner = THIS_MODULE, + .xcl_ops = &svc_tcp_bc_ops, + .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP, +}; +#endif /* CONFIG_NFS_V4_1 */ + static struct svc_xprt_ops svc_tcp_ops = { .xpo_create = svc_tcp_create, .xpo_recvfrom = svc_tcp_recvfrom, @@ -1509,41 +1549,43 @@ static void svc_sock_free(struct svc_xprt *xprt) kfree(svsk); } +#if defined(CONFIG_NFS_V4_1) /* - * Create a svc_xprt. - * - * For internal use only (e.g. nfsv4.1 backchannel). - * Callers should typically use the xpo_create() method. + * Create a back channel svc_xprt which shares the fore channel socket. */ -struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot) +static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv, + int protocol, + struct net *net, + struct sockaddr *sin, int len, + int flags) { struct svc_sock *svsk; - struct svc_xprt *xprt = NULL; + struct svc_xprt *xprt; + + if (protocol != IPPROTO_TCP) { + printk(KERN_WARNING "svc: only TCP sockets" + " supported on shared back channel\n"); + return ERR_PTR(-EINVAL); + } - dprintk("svc: %s\n", __func__); svsk = kzalloc(sizeof(*svsk), GFP_KERNEL); if (!svsk) - goto out; + return ERR_PTR(-ENOMEM); xprt = &svsk->sk_xprt; - if (prot == IPPROTO_TCP) - svc_xprt_init(&svc_tcp_class, xprt, serv); - else if (prot == IPPROTO_UDP) - svc_xprt_init(&svc_udp_class, xprt, serv); - else - BUG(); -out: - dprintk("svc: %s return %p\n", __func__, xprt); + svc_xprt_init(&svc_tcp_bc_class, xprt, serv); + + serv->bc_xprt = xprt; + return xprt; } -EXPORT_SYMBOL_GPL(svc_sock_create); /* - * Destroy a svc_sock. + * Free a back channel svc_sock. */ -void svc_sock_destroy(struct svc_xprt *xprt) +static void svc_bc_sock_free(struct svc_xprt *xprt) { if (xprt) kfree(container_of(xprt, struct svc_sock, sk_xprt)); } -EXPORT_SYMBOL_GPL(svc_sock_destroy); +#endif /* CONFIG_NFS_V4_1 */ -- cgit v1.2.3 From 16b2d1e1d12de000404d7c845d0db1226511f84d Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Thu, 6 Jan 2011 02:04:27 +0000 Subject: SUNRPC register and unregister the back channel transport Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- net/sunrpc/svcsock.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 6630f2922f4..e6b66d81115 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1222,6 +1222,24 @@ static struct svc_xprt_class svc_tcp_bc_class = { .xcl_ops = &svc_tcp_bc_ops, .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP, }; + +static void svc_init_bc_xprt_sock(void) +{ + svc_reg_xprt_class(&svc_tcp_bc_class); +} + +static void svc_cleanup_bc_xprt_sock(void) +{ + svc_unreg_xprt_class(&svc_tcp_bc_class); +} +#else /* CONFIG_NFS_V4_1 */ +static void svc_init_bc_xprt_sock(void) +{ +} + +static void svc_cleanup_bc_xprt_sock(void) +{ +} #endif /* CONFIG_NFS_V4_1 */ static struct svc_xprt_ops svc_tcp_ops = { @@ -1247,12 +1265,14 @@ void svc_init_xprt_sock(void) { svc_reg_xprt_class(&svc_tcp_class); svc_reg_xprt_class(&svc_udp_class); + svc_init_bc_xprt_sock(); } void svc_cleanup_xprt_sock(void) { svc_unreg_xprt_class(&svc_tcp_class); svc_unreg_xprt_class(&svc_udp_class); + svc_cleanup_bc_xprt_sock(); } static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) -- cgit v1.2.3 From 2c2618c6f29c41a0a966f14f05c8bf45fcabb750 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Thu, 6 Jan 2011 02:04:31 +0000 Subject: NFS associate sessionid with callback connection The sessions based callback service is started prior to the CREATE_SESSION call so that it can handle CB_NULL requests which can be sent before the CREATE_SESSION call returns and the session ID is known. Set the callback sessionid after a sucessful CREATE_SESSION. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- net/sunrpc/svcsock.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index e6b66d81115..db3013e4aa0 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1605,7 +1605,9 @@ static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv, */ static void svc_bc_sock_free(struct svc_xprt *xprt) { - if (xprt) + if (xprt) { + kfree(xprt->xpt_bc_sid); kfree(container_of(xprt, struct svc_sock, sk_xprt)); + } } #endif /* CONFIG_NFS_V4_1 */ -- cgit v1.2.3 From 4a19de0f4b693139bb10b7cc3cfe1f618576ba67 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Thu, 6 Jan 2011 02:04:35 +0000 Subject: NFS rename client back channel transport field Differentiate from server backchannel Signed-off-by: Andy Adamson Acked-by: Bruce Fields Signed-off-by: Trond Myklebust --- net/sunrpc/svc.c | 2 +- net/sunrpc/svcsock.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 261e2d1dff1..0e659c665a8 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1262,7 +1262,7 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, struct kvec *resv = &rqstp->rq_res.head[0]; /* Build the svc_rqst used by the common processing routine */ - rqstp->rq_xprt = serv->bc_xprt; + rqstp->rq_xprt = serv->sv_bc_xprt; rqstp->rq_xid = req->rq_xid; rqstp->rq_prot = req->rq_xprt->prot; rqstp->rq_server = serv; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index db3013e4aa0..d265aa700bb 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1595,7 +1595,7 @@ static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv, xprt = &svsk->sk_xprt; svc_xprt_init(&svc_tcp_bc_class, xprt, serv); - serv->bc_xprt = xprt; + serv->sv_bc_xprt = xprt; return xprt; } -- cgit v1.2.3 From 2cf5be93d1b704f342ad423a49f0e78d73939e66 Mon Sep 17 00:00:00 2001 From: Samuel Jero Date: Thu, 30 Dec 2010 12:15:16 +0100 Subject: dccp: fix return value for sequence-invalid packets Currently dccp_check_seqno returns 0 (indicating a valid packet) if the acknowledgment number is out of bounds and the sync that RFC 4340 mandates at this point is currently being rate-limited. This function should return -1, indicating an invalid packet. Signed-off-by: Samuel Jero Acked-by: Gerrit Renker --- net/dccp/input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/dccp/input.c b/net/dccp/input.c index 15af247ea00..8cde009e8b8 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -260,7 +260,7 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) */ if (time_before(now, (dp->dccps_rate_last + sysctl_dccp_sync_ratelimit))) - return 0; + return -1; DCCP_WARN("Step 6 failed for %s packet, " "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and " -- cgit v1.2.3 From 763dadd47c884853a22f2f19ea27e58431303ff3 Mon Sep 17 00:00:00 2001 From: Samuel Jero Date: Thu, 30 Dec 2010 12:15:41 +0100 Subject: dccp: fix bug in updating the GSR Currently dccp_check_seqno allows any valid packet to update the Greatest Sequence Number Received, even if that packet's sequence number is less than the current GSR. This patch adds a check to make sure that the new packet's sequence number is greater than GSR. Signed-off-by: Samuel Jero Signed-off-by: Gerrit Renker --- net/dccp/dccp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 45087052d89..5fdb0722901 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -426,7 +426,8 @@ static inline void dccp_update_gsr(struct sock *sk, u64 seq) { struct dccp_sock *dp = dccp_sk(sk); - dp->dccps_gsr = seq; + if (after48(seq, dp->dccps_gsr)) + dp->dccps_gsr = seq; /* Sequence validity window depends on remote Sequence Window (7.5.1) */ dp->dccps_swl = SUB48(ADD48(dp->dccps_gsr, 1), dp->dccps_r_seq_win / 4); /* -- cgit v1.2.3 From bfbb23466adcbc77facea3046b44f75530079472 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sun, 2 Jan 2011 18:15:58 +0100 Subject: dccp: make upper bound for seq_window consistent on 32/64 bit The 'seq_window' sysctl sets the initial value for the DCCP Sequence Window, which may range from 32..2^46-1 (RFC 4340, 7.5.2). The patch sets the upper bound consistently to 2^32-1 on both 32 and 64 bit systems, which should be sufficient - with a RTT of 1sec and 1-byte packets, a seq_window of 2^32-1 corresponds to a link speed of 34 Gbps. Signed-off-by: Gerrit Renker --- net/dccp/sysctl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c index 563943822e5..42348824ee3 100644 --- a/net/dccp/sysctl.c +++ b/net/dccp/sysctl.c @@ -21,7 +21,8 @@ /* Boundary values */ static int zero = 0, u8_max = 0xFF; -static unsigned long seqw_min = 32; +static unsigned long seqw_min = DCCPF_SEQ_WMIN, + seqw_max = 0xFFFFFFFF; /* maximum on 32 bit */ static struct ctl_table dccp_default_table[] = { { @@ -31,6 +32,7 @@ static struct ctl_table dccp_default_table[] = { .mode = 0644, .proc_handler = proc_doulongvec_minmax, .extra1 = &seqw_min, /* RFC 4340, 7.5.2 */ + .extra2 = &seqw_max, }, { .procname = "rx_ccid", -- cgit v1.2.3 From 0ab03c2b1478f2438d2c80204f7fef65b1bca9cf Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Fri, 7 Jan 2011 03:15:05 +0000 Subject: netlink: test for all flags of the NLM_F_DUMP composite Due to NLM_F_DUMP is composed of two bits, NLM_F_ROOT | NLM_F_MATCH, when doing "if (x & NLM_F_DUMP)", it tests for _either_ of the bits being set. Because NLM_F_MATCH's value overlaps with NLM_F_EXCL, non-dump requests with NLM_F_EXCL set are mistaken as dump requests. Substitute the condition to test for _all_ bits being set. Signed-off-by: Jan Engelhardt Acked-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 2 +- net/ipv4/inet_diag.c | 2 +- net/netfilter/nf_conntrack_netlink.c | 4 ++-- net/netlink/genetlink.c | 2 +- net/xfrm/xfrm_user.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 750db57f3bb..a5f7535aab5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1820,7 +1820,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (kind != 2 && security_netlink_recv(skb, CAP_NET_ADMIN)) return -EPERM; - if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { + if (kind == 2 && (nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { struct sock *rtnl; rtnl_dumpit_func dumpit; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 2ada17129fc..2746c1fa641 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -858,7 +858,7 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) nlmsg_len(nlh) < hdrlen) return -EINVAL; - if (nlh->nlmsg_flags & NLM_F_DUMP) { + if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { if (nlmsg_attrlen(nlh, hdrlen)) { struct nlattr *attr; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 0cdba50c0d6..746140264b2 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -928,7 +928,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, u16 zone; int err; - if (nlh->nlmsg_flags & NLM_F_DUMP) + if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) return netlink_dump_start(ctnl, skb, nlh, ctnetlink_dump_table, ctnetlink_done); @@ -1790,7 +1790,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, u16 zone; int err; - if (nlh->nlmsg_flags & NLM_F_DUMP) { + if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { return netlink_dump_start(ctnl, skb, nlh, ctnetlink_exp_dump_table, ctnetlink_exp_done); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 1781d99145e..f83cb370292 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -519,7 +519,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) security_netlink_recv(skb, CAP_NET_ADMIN)) return -EPERM; - if (nlh->nlmsg_flags & NLM_F_DUMP) { + if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { if (ops->dumpit == NULL) return -EOPNOTSUPP; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 8eb88951091..6a8da81ff66 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2187,7 +2187,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) || type == (XFRM_MSG_GETPOLICY - XFRM_MSG_BASE)) && - (nlh->nlmsg_flags & NLM_F_DUMP)) { + (nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { if (link->dump == NULL) return -EINVAL; -- cgit v1.2.3 From 697d0e338c7fd392cf73bf120150ab6e5516a3a3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 8 Jan 2011 17:41:42 +0000 Subject: net: fix kernel-doc warning in core/filter.c Fix new kernel-doc notation warning in net/core/filter.c: Warning(net/core/filter.c:172): No description found for parameter 'fentry' Warning(net/core/filter.c:172): Excess function parameter 'filter' description in 'sk_run_filter' Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- net/core/filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index 2b27d4efdd4..afc58374ca9 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -158,7 +158,7 @@ EXPORT_SYMBOL(sk_filter); /** * sk_run_filter - run a filter on a socket * @skb: buffer to run the filter on - * @filter: filter to apply + * @fentry: filter to apply * * Decode and apply filter instructions to the skb->data. * Return length to keep, 0 for none. @skb is the data we are -- cgit v1.2.3 From 9497a0518e8183959e45da05f317f1cb36f2cd7c Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Sun, 9 Jan 2011 06:23:30 +0000 Subject: net offloading: Accept NETIF_F_HW_CSUM for all protocols. We currently only have software fallback for one type of checksum: the TCP/UDP one's complement. This means that a protocol that uses hardware offloading for a different type of checksum (FCoE, SCTP) must directly check the device's features and do the right thing ahead of time. By the time we get to dev_can_checksum(), we're only deciding whether to apply the one algorithm in software or hardware. NETIF_F_HW_CSUM has the same capabilities as the software version, so we should always use it if present. The primary advantage of this is multiply tagged vlans can use hardware checksumming. Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index a215269d2e3..d8befd06da0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1734,7 +1734,7 @@ EXPORT_SYMBOL(netif_device_attach); static bool can_checksum_protocol(unsigned long features, __be16 protocol) { - return ((features & NETIF_F_NO_CSUM) || + return ((features & NETIF_F_GEN_CSUM) || ((features & NETIF_F_V4_CSUM) && protocol == htons(ETH_P_IP)) || ((features & NETIF_F_V6_CSUM) && -- cgit v1.2.3 From f01a5236bd4b140198fbcc550f085e8361fd73fa Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Sun, 9 Jan 2011 06:23:31 +0000 Subject: net offloading: Generalize netif_get_vlan_features(). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit netif_get_vlan_features() is currently only used by netif_needs_gso(), so it only concerns itself with GSO features. However, several other places also should take into account the contents of the packet when deciding whether to offload to hardware. This generalizes the function to return features about all of the various forms of offloading. Since offloads tend to be linked together, this avoids duplicating the logic in each location (i.e. the scatter/gather code also needs the checksum logic). Suggested-by: Michał Mirosław Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- net/core/dev.c | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index d8befd06da0..a51dfd7b56f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2017,22 +2017,41 @@ static inline void skb_orphan_try(struct sk_buff *skb) } } -int netif_get_vlan_features(struct sk_buff *skb, struct net_device *dev) +static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features) +{ + if (!can_checksum_protocol(protocol, features)) { + features &= ~NETIF_F_ALL_CSUM; + features &= ~NETIF_F_SG; + } else if (illegal_highdma(skb->dev, skb)) { + features &= ~NETIF_F_SG; + } + + return features; +} + +int netif_skb_features(struct sk_buff *skb) { __be16 protocol = skb->protocol; + int features = skb->dev->features; if (protocol == htons(ETH_P_8021Q)) { struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; protocol = veh->h_vlan_encapsulated_proto; - } else if (!skb->vlan_tci) - return dev->features; + } else if (!vlan_tx_tag_present(skb)) { + return harmonize_features(skb, protocol, features); + } - if (protocol != htons(ETH_P_8021Q)) - return dev->features & dev->vlan_features; - else - return 0; + features &= skb->dev->vlan_features; + + if (protocol != htons(ETH_P_8021Q)) { + return harmonize_features(skb, protocol, features); + } else { + features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | + NETIF_F_GEN_CSUM; + return harmonize_features(skb, protocol, features); + } } -EXPORT_SYMBOL(netif_get_vlan_features); +EXPORT_SYMBOL(netif_skb_features); /* * Returns true if either: -- cgit v1.2.3 From fc741216db156994c554ac31c1151fe0e00d8f0e Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Sun, 9 Jan 2011 06:23:32 +0000 Subject: net offloading: Pass features into netif_needs_gso(). Now that there is a single function that can compute the device features relevant to a packet, we don't want to run it for each offload. This converts netif_needs_gso() to take the features of the device, rather than computing them itself. Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- net/core/dev.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index a51dfd7b56f..1444ed3861a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2086,6 +2086,8 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, int rc = NETDEV_TX_OK; if (likely(!skb->next)) { + int features; + /* * If device doesnt need skb->dst, release it right now while * its hot in this cpu cache @@ -2098,8 +2100,10 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, skb_orphan_try(skb); + features = netif_skb_features(skb); + if (vlan_tx_tag_present(skb) && - !(dev->features & NETIF_F_HW_VLAN_TX)) { + !(features & NETIF_F_HW_VLAN_TX)) { skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb)); if (unlikely(!skb)) goto out; @@ -2107,7 +2111,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, skb->vlan_tci = 0; } - if (netif_needs_gso(dev, skb)) { + if (netif_needs_gso(skb, features)) { if (unlikely(dev_gso_segment(skb))) goto out_kfree_skb; if (skb->next) -- cgit v1.2.3 From 91ecb63c074d802f8cf103f1dafb4aed24d0f24c Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Sun, 9 Jan 2011 06:23:33 +0000 Subject: net offloading: Convert dev_gso_segment() to use precomputed features. This switches dev_gso_segment() to use the device features computed by the centralized routine. In doing so, it fixes a problem where it would always use dev->features, instead of those appropriate to the number of vlan tags if any are present. Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- net/core/dev.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 1444ed3861a..4cd3e84e129 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1971,16 +1971,14 @@ static void dev_gso_skb_destructor(struct sk_buff *skb) /** * dev_gso_segment - Perform emulated hardware segmentation on skb. * @skb: buffer to segment + * @features: device features as applicable to this skb * * This function segments the given skb and stores the list of segments * in skb->next. */ -static int dev_gso_segment(struct sk_buff *skb) +static int dev_gso_segment(struct sk_buff *skb, int features) { - struct net_device *dev = skb->dev; struct sk_buff *segs; - int features = dev->features & ~(illegal_highdma(dev, skb) ? - NETIF_F_SG : 0); segs = skb_gso_segment(skb, features); @@ -2112,7 +2110,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, } if (netif_needs_gso(skb, features)) { - if (unlikely(dev_gso_segment(skb))) + if (unlikely(dev_gso_segment(skb, features))) goto out_kfree_skb; if (skb->next) goto gso; -- cgit v1.2.3 From 02932ce9e2c136e6fab2571c8e0dd69ae8ec9853 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Sun, 9 Jan 2011 06:23:34 +0000 Subject: net offloading: Convert skb_need_linearize() to use precomputed features. This switches skb_need_linearize() to use the features that have been centrally computed. In doing so, this fixes a problem where scatter/gather should not be used because the card does not support checksum offloading on that type of packet. On device registration we only check that some form of checksum offloading is available if scatter/gatther is enabled but we must also check at transmission time. Examples of this include IPv6 or vlan packets on a NIC that only supports IPv4 offloading. Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- net/core/dev.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 4cd3e84e129..2f838f1d222 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2059,22 +2059,13 @@ EXPORT_SYMBOL(netif_skb_features); * support DMA from it. */ static inline int skb_needs_linearize(struct sk_buff *skb, - struct net_device *dev) + int features) { - if (skb_is_nonlinear(skb)) { - int features = dev->features; - - if (vlan_tx_tag_present(skb)) - features &= dev->vlan_features; - - return (skb_has_frag_list(skb) && - !(features & NETIF_F_FRAGLIST)) || + return skb_is_nonlinear(skb) && + ((skb_has_frag_list(skb) && + !(features & NETIF_F_FRAGLIST)) || (skb_shinfo(skb)->nr_frags && - (!(features & NETIF_F_SG) || - illegal_highdma(dev, skb))); - } - - return 0; + !(features & NETIF_F_SG))); } int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, @@ -2115,7 +2106,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, if (skb->next) goto gso; } else { - if (skb_needs_linearize(skb, dev) && + if (skb_needs_linearize(skb, features) && __skb_linearize(skb)) goto out_kfree_skb; -- cgit v1.2.3 From 0363466866d901fbc658f4e63dd61e7cc93dd0af Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Sun, 9 Jan 2011 06:23:35 +0000 Subject: net offloading: Convert checksums to use centrally computed features. In order to compute the features for other offloads (primarily scatter/gather), we need to first check the ability of the NIC to offload the checksum for the packet. Since we have already computed this, we can directly use the result instead of figuring it out again. Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- net/core/dev.c | 40 ++++++++++++---------------------------- 1 file changed, 12 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 2f838f1d222..3fe443be4b1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1732,33 +1732,6 @@ void netif_device_attach(struct net_device *dev) } EXPORT_SYMBOL(netif_device_attach); -static bool can_checksum_protocol(unsigned long features, __be16 protocol) -{ - return ((features & NETIF_F_GEN_CSUM) || - ((features & NETIF_F_V4_CSUM) && - protocol == htons(ETH_P_IP)) || - ((features & NETIF_F_V6_CSUM) && - protocol == htons(ETH_P_IPV6)) || - ((features & NETIF_F_FCOE_CRC) && - protocol == htons(ETH_P_FCOE))); -} - -static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) -{ - __be16 protocol = skb->protocol; - int features = dev->features; - - if (vlan_tx_tag_present(skb)) { - features &= dev->vlan_features; - } else if (protocol == htons(ETH_P_8021Q)) { - struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; - protocol = veh->h_vlan_encapsulated_proto; - features &= dev->vlan_features; - } - - return can_checksum_protocol(features, protocol); -} - /** * skb_dev_set -- assign a new device to a buffer * @skb: buffer for the new device @@ -2015,6 +1988,17 @@ static inline void skb_orphan_try(struct sk_buff *skb) } } +static bool can_checksum_protocol(unsigned long features, __be16 protocol) +{ + return ((features & NETIF_F_GEN_CSUM) || + ((features & NETIF_F_V4_CSUM) && + protocol == htons(ETH_P_IP)) || + ((features & NETIF_F_V6_CSUM) && + protocol == htons(ETH_P_IPV6)) || + ((features & NETIF_F_FCOE_CRC) && + protocol == htons(ETH_P_FCOE))); +} + static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features) { if (!can_checksum_protocol(protocol, features)) { @@ -2117,7 +2101,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, if (skb->ip_summed == CHECKSUM_PARTIAL) { skb_set_transport_header(skb, skb_checksum_start_offset(skb)); - if (!dev_can_checksum(dev, skb) && + if (!(features & NETIF_F_ALL_CSUM) && skb_checksum_help(skb)) goto out_kfree_skb; } -- cgit v1.2.3 From 83723d60717f8da0f53f91cf42a845ed56c09662 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 10 Jan 2011 20:11:38 +0100 Subject: netfilter: x_tables: dont block BH while reading counters Using "iptables -L" with a lot of rules have a too big BH latency. Jesper mentioned ~6 ms and worried of frame drops. Switch to a per_cpu seqlock scheme, so that taking a snapshot of counters doesnt need to block BH (for this cpu, but also other cpus). This adds two increments on seqlock sequence per ipt_do_table() call, its a reasonable cost for allowing "iptables -L" not block BH processing. Reported-by: Jesper Dangaard Brouer Signed-off-by: Eric Dumazet CC: Patrick McHardy Acked-by: Stephen Hemminger Acked-by: Jesper Dangaard Brouer Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/arp_tables.c | 45 +++++++++++++---------------------------- net/ipv4/netfilter/ip_tables.c | 45 +++++++++++++---------------------------- net/ipv6/netfilter/ip6_tables.c | 45 +++++++++++++---------------------------- net/netfilter/x_tables.c | 3 ++- 4 files changed, 44 insertions(+), 94 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 3fac340a28d..e855fffaed9 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -710,42 +710,25 @@ static void get_counters(const struct xt_table_info *t, struct arpt_entry *iter; unsigned int cpu; unsigned int i; - unsigned int curcpu = get_cpu(); - - /* Instead of clearing (by a previous call to memset()) - * the counters and using adds, we set the counters - * with data used by 'current' CPU - * - * Bottom half has to be disabled to prevent deadlock - * if new softirq were to run and call ipt_do_table - */ - local_bh_disable(); - i = 0; - xt_entry_foreach(iter, t->entries[curcpu], t->size) { - SET_COUNTER(counters[i], iter->counters.bcnt, - iter->counters.pcnt); - ++i; - } - local_bh_enable(); - /* Processing counters from other cpus, we can let bottom half enabled, - * (preemption is disabled) - */ for_each_possible_cpu(cpu) { - if (cpu == curcpu) - continue; + seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock; + i = 0; - local_bh_disable(); - xt_info_wrlock(cpu); xt_entry_foreach(iter, t->entries[cpu], t->size) { - ADD_COUNTER(counters[i], iter->counters.bcnt, - iter->counters.pcnt); + u64 bcnt, pcnt; + unsigned int start; + + do { + start = read_seqbegin(lock); + bcnt = iter->counters.bcnt; + pcnt = iter->counters.pcnt; + } while (read_seqretry(lock, start)); + + ADD_COUNTER(counters[i], bcnt, pcnt); ++i; } - xt_info_wrunlock(cpu); - local_bh_enable(); } - put_cpu(); } static struct xt_counters *alloc_counters(const struct xt_table *table) @@ -759,7 +742,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) * about). */ countersize = sizeof(struct xt_counters) * private->number; - counters = vmalloc(countersize); + counters = vzalloc(countersize); if (counters == NULL) return ERR_PTR(-ENOMEM); @@ -1007,7 +990,7 @@ static int __do_replace(struct net *net, const char *name, struct arpt_entry *iter; ret = 0; - counters = vmalloc(num_counters * sizeof(struct xt_counters)); + counters = vzalloc(num_counters * sizeof(struct xt_counters)); if (!counters) { ret = -ENOMEM; goto out; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index a846d633b3b..652efea013d 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -884,42 +884,25 @@ get_counters(const struct xt_table_info *t, struct ipt_entry *iter; unsigned int cpu; unsigned int i; - unsigned int curcpu = get_cpu(); - - /* Instead of clearing (by a previous call to memset()) - * the counters and using adds, we set the counters - * with data used by 'current' CPU. - * - * Bottom half has to be disabled to prevent deadlock - * if new softirq were to run and call ipt_do_table - */ - local_bh_disable(); - i = 0; - xt_entry_foreach(iter, t->entries[curcpu], t->size) { - SET_COUNTER(counters[i], iter->counters.bcnt, - iter->counters.pcnt); - ++i; - } - local_bh_enable(); - /* Processing counters from other cpus, we can let bottom half enabled, - * (preemption is disabled) - */ for_each_possible_cpu(cpu) { - if (cpu == curcpu) - continue; + seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock; + i = 0; - local_bh_disable(); - xt_info_wrlock(cpu); xt_entry_foreach(iter, t->entries[cpu], t->size) { - ADD_COUNTER(counters[i], iter->counters.bcnt, - iter->counters.pcnt); + u64 bcnt, pcnt; + unsigned int start; + + do { + start = read_seqbegin(lock); + bcnt = iter->counters.bcnt; + pcnt = iter->counters.pcnt; + } while (read_seqretry(lock, start)); + + ADD_COUNTER(counters[i], bcnt, pcnt); ++i; /* macro does multi eval of i */ } - xt_info_wrunlock(cpu); - local_bh_enable(); } - put_cpu(); } static struct xt_counters *alloc_counters(const struct xt_table *table) @@ -932,7 +915,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) (other than comefrom, which userspace doesn't care about). */ countersize = sizeof(struct xt_counters) * private->number; - counters = vmalloc(countersize); + counters = vzalloc(countersize); if (counters == NULL) return ERR_PTR(-ENOMEM); @@ -1203,7 +1186,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, struct ipt_entry *iter; ret = 0; - counters = vmalloc(num_counters * sizeof(struct xt_counters)); + counters = vzalloc(num_counters * sizeof(struct xt_counters)); if (!counters) { ret = -ENOMEM; goto out; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 455582384ec..7d227c644f7 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -897,42 +897,25 @@ get_counters(const struct xt_table_info *t, struct ip6t_entry *iter; unsigned int cpu; unsigned int i; - unsigned int curcpu = get_cpu(); - - /* Instead of clearing (by a previous call to memset()) - * the counters and using adds, we set the counters - * with data used by 'current' CPU - * - * Bottom half has to be disabled to prevent deadlock - * if new softirq were to run and call ipt_do_table - */ - local_bh_disable(); - i = 0; - xt_entry_foreach(iter, t->entries[curcpu], t->size) { - SET_COUNTER(counters[i], iter->counters.bcnt, - iter->counters.pcnt); - ++i; - } - local_bh_enable(); - /* Processing counters from other cpus, we can let bottom half enabled, - * (preemption is disabled) - */ for_each_possible_cpu(cpu) { - if (cpu == curcpu) - continue; + seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock; + i = 0; - local_bh_disable(); - xt_info_wrlock(cpu); xt_entry_foreach(iter, t->entries[cpu], t->size) { - ADD_COUNTER(counters[i], iter->counters.bcnt, - iter->counters.pcnt); + u64 bcnt, pcnt; + unsigned int start; + + do { + start = read_seqbegin(lock); + bcnt = iter->counters.bcnt; + pcnt = iter->counters.pcnt; + } while (read_seqretry(lock, start)); + + ADD_COUNTER(counters[i], bcnt, pcnt); ++i; } - xt_info_wrunlock(cpu); - local_bh_enable(); } - put_cpu(); } static struct xt_counters *alloc_counters(const struct xt_table *table) @@ -945,7 +928,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) (other than comefrom, which userspace doesn't care about). */ countersize = sizeof(struct xt_counters) * private->number; - counters = vmalloc(countersize); + counters = vzalloc(countersize); if (counters == NULL) return ERR_PTR(-ENOMEM); @@ -1216,7 +1199,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, struct ip6t_entry *iter; ret = 0; - counters = vmalloc(num_counters * sizeof(struct xt_counters)); + counters = vzalloc(num_counters * sizeof(struct xt_counters)); if (!counters) { ret = -ENOMEM; goto out; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 80463507420..c9423763107 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1325,7 +1325,8 @@ static int __init xt_init(void) for_each_possible_cpu(i) { struct xt_info_lock *lock = &per_cpu(xt_info_locks, i); - spin_lock_init(&lock->lock); + + seqlock_init(&lock->lock); lock->readers = 0; } -- cgit v1.2.3 From 6650239a4b01077e80d5a4468562756d77afaa59 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 8 Jan 2011 17:45:38 -0500 Subject: NFS: Don't use vm_map_ram() in readdir vm_map_ram() is not available on NOMMU platforms, and causes trouble on incoherrent architectures such as ARM when we access the page data through both the direct and the virtual mapping. The alternative is to use the direct mapping to access page data for the case when we are not crossing a page boundary, but to copy the data into a linear scratch buffer when we are accessing data that spans page boundaries. Signed-off-by: Trond Myklebust Tested-by: Marc Kleine-Budde Cc: stable@kernel.org [2.6.37] --- net/sunrpc/xdr.c | 155 ++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 124 insertions(+), 31 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index cd9e841e749..679cd674b81 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -552,6 +552,74 @@ void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int b } EXPORT_SYMBOL_GPL(xdr_write_pages); +static void xdr_set_iov(struct xdr_stream *xdr, struct kvec *iov, + __be32 *p, unsigned int len) +{ + if (len > iov->iov_len) + len = iov->iov_len; + if (p == NULL) + p = (__be32*)iov->iov_base; + xdr->p = p; + xdr->end = (__be32*)(iov->iov_base + len); + xdr->iov = iov; + xdr->page_ptr = NULL; +} + +static int xdr_set_page_base(struct xdr_stream *xdr, + unsigned int base, unsigned int len) +{ + unsigned int pgnr; + unsigned int maxlen; + unsigned int pgoff; + unsigned int pgend; + void *kaddr; + + maxlen = xdr->buf->page_len; + if (base >= maxlen) + return -EINVAL; + maxlen -= base; + if (len > maxlen) + len = maxlen; + + base += xdr->buf->page_base; + + pgnr = base >> PAGE_SHIFT; + xdr->page_ptr = &xdr->buf->pages[pgnr]; + kaddr = page_address(*xdr->page_ptr); + + pgoff = base & ~PAGE_MASK; + xdr->p = (__be32*)(kaddr + pgoff); + + pgend = pgoff + len; + if (pgend > PAGE_SIZE) + pgend = PAGE_SIZE; + xdr->end = (__be32*)(kaddr + pgend); + xdr->iov = NULL; + return 0; +} + +static void xdr_set_next_page(struct xdr_stream *xdr) +{ + unsigned int newbase; + + newbase = (1 + xdr->page_ptr - xdr->buf->pages) << PAGE_SHIFT; + newbase -= xdr->buf->page_base; + + if (xdr_set_page_base(xdr, newbase, PAGE_SIZE) < 0) + xdr_set_iov(xdr, xdr->buf->tail, NULL, xdr->buf->len); +} + +static bool xdr_set_next_buffer(struct xdr_stream *xdr) +{ + if (xdr->page_ptr != NULL) + xdr_set_next_page(xdr); + else if (xdr->iov == xdr->buf->head) { + if (xdr_set_page_base(xdr, 0, PAGE_SIZE) < 0) + xdr_set_iov(xdr, xdr->buf->tail, NULL, xdr->buf->len); + } + return xdr->p != xdr->end; +} + /** * xdr_init_decode - Initialize an xdr_stream for decoding data. * @xdr: pointer to xdr_stream struct @@ -560,41 +628,67 @@ EXPORT_SYMBOL_GPL(xdr_write_pages); */ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) { - struct kvec *iov = buf->head; - unsigned int len = iov->iov_len; - - if (len > buf->len) - len = buf->len; xdr->buf = buf; - xdr->iov = iov; - xdr->p = p; - xdr->end = (__be32 *)((char *)iov->iov_base + len); + xdr->scratch.iov_base = NULL; + xdr->scratch.iov_len = 0; + if (buf->head[0].iov_len != 0) + xdr_set_iov(xdr, buf->head, p, buf->len); + else if (buf->page_len != 0) + xdr_set_page_base(xdr, 0, buf->len); } EXPORT_SYMBOL_GPL(xdr_init_decode); -/** - * xdr_inline_peek - Allow read-ahead in the XDR data stream - * @xdr: pointer to xdr_stream struct - * @nbytes: number of bytes of data to decode - * - * Check if the input buffer is long enough to enable us to decode - * 'nbytes' more bytes of data starting at the current position. - * If so return the current pointer without updating the current - * pointer position. - */ -__be32 * xdr_inline_peek(struct xdr_stream *xdr, size_t nbytes) +static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) { __be32 *p = xdr->p; __be32 *q = p + XDR_QUADLEN(nbytes); if (unlikely(q > xdr->end || q < p)) return NULL; + xdr->p = q; return p; } -EXPORT_SYMBOL_GPL(xdr_inline_peek); /** - * xdr_inline_decode - Retrieve non-page XDR data to decode + * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data. + * @xdr: pointer to xdr_stream struct + * @buf: pointer to an empty buffer + * @buflen: size of 'buf' + * + * The scratch buffer is used when decoding from an array of pages. + * If an xdr_inline_decode() call spans across page boundaries, then + * we copy the data into the scratch buffer in order to allow linear + * access. + */ +void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen) +{ + xdr->scratch.iov_base = buf; + xdr->scratch.iov_len = buflen; +} +EXPORT_SYMBOL_GPL(xdr_set_scratch_buffer); + +static __be32 *xdr_copy_to_scratch(struct xdr_stream *xdr, size_t nbytes) +{ + __be32 *p; + void *cpdest = xdr->scratch.iov_base; + size_t cplen = (char *)xdr->end - (char *)xdr->p; + + if (nbytes > xdr->scratch.iov_len) + return NULL; + memcpy(cpdest, xdr->p, cplen); + cpdest += cplen; + nbytes -= cplen; + if (!xdr_set_next_buffer(xdr)) + return NULL; + p = __xdr_inline_decode(xdr, nbytes); + if (p == NULL) + return NULL; + memcpy(cpdest, p, nbytes); + return xdr->scratch.iov_base; +} + +/** + * xdr_inline_decode - Retrieve XDR data to decode * @xdr: pointer to xdr_stream struct * @nbytes: number of bytes of data to decode * @@ -605,13 +699,16 @@ EXPORT_SYMBOL_GPL(xdr_inline_peek); */ __be32 * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) { - __be32 *p = xdr->p; - __be32 *q = p + XDR_QUADLEN(nbytes); + __be32 *p; - if (unlikely(q > xdr->end || q < p)) + if (nbytes == 0) + return xdr->p; + if (xdr->p == xdr->end && !xdr_set_next_buffer(xdr)) return NULL; - xdr->p = q; - return p; + p = __xdr_inline_decode(xdr, nbytes); + if (p != NULL) + return p; + return xdr_copy_to_scratch(xdr, nbytes); } EXPORT_SYMBOL_GPL(xdr_inline_decode); @@ -671,16 +768,12 @@ EXPORT_SYMBOL_GPL(xdr_read_pages); */ void xdr_enter_page(struct xdr_stream *xdr, unsigned int len) { - char * kaddr = page_address(xdr->buf->pages[0]); xdr_read_pages(xdr, len); /* * Position current pointer at beginning of tail, and * set remaining message length. */ - if (len > PAGE_CACHE_SIZE - xdr->buf->page_base) - len = PAGE_CACHE_SIZE - xdr->buf->page_base; - xdr->p = (__be32 *)(kaddr + xdr->buf->page_base); - xdr->end = (__be32 *)((char *)xdr->p + len); + xdr_set_page_base(xdr, 0, len); } EXPORT_SYMBOL_GPL(xdr_enter_page); -- cgit v1.2.3 From facb4edc1e0e849ea98e147a821e60d6d6272c0a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 10 Jan 2011 04:06:58 +0000 Subject: phonet: some signedness bugs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dan Rosenberg pointed out that there were some signed comparison bugs in the phonet protocol. http://marc.info/?l=full-disclosure&m=129424528425330&w=2 The problem is that we check for array overflows but "protocol" is signed and we don't check for array underflows. If you have already have CAP_SYS_ADMIN then you could use the bugs to get root, or someone could cause an oops by mistake. Signed-off-by: Dan Carpenter Acked-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/af_phonet.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index fd95beb72f5..1072b2c19d3 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -37,7 +37,7 @@ /* Transport protocol registration */ static struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly; -static struct phonet_protocol *phonet_proto_get(int protocol) +static struct phonet_protocol *phonet_proto_get(unsigned int protocol) { struct phonet_protocol *pp; @@ -458,7 +458,7 @@ static struct packet_type phonet_packet_type __read_mostly = { static DEFINE_MUTEX(proto_tab_lock); -int __init_or_module phonet_proto_register(int protocol, +int __init_or_module phonet_proto_register(unsigned int protocol, struct phonet_protocol *pp) { int err = 0; @@ -481,7 +481,7 @@ int __init_or_module phonet_proto_register(int protocol, } EXPORT_SYMBOL(phonet_proto_register); -void phonet_proto_unregister(int protocol, struct phonet_protocol *pp) +void phonet_proto_unregister(unsigned int protocol, struct phonet_protocol *pp) { mutex_lock(&proto_tab_lock); BUG_ON(proto_tab[protocol] != pp); -- cgit v1.2.3 From 91b5c98c2e062f982423686c77b8bf31f37fa196 Mon Sep 17 00:00:00 2001 From: Dan Rosenberg Date: Mon, 10 Jan 2011 16:00:54 -0800 Subject: caif: don't set connection request param size before copying data The size field should not be set until after the data is successfully copied in. Signed-off-by: Dan Rosenberg Signed-off-by: David S. Miller --- net/caif/caif_socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 1bf0cf50379..8184c031d02 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -740,12 +740,12 @@ static int setsockopt(struct socket *sock, if (cf_sk->sk.sk_protocol != CAIFPROTO_UTIL) return -ENOPROTOOPT; lock_sock(&(cf_sk->sk)); - cf_sk->conn_req.param.size = ol; if (ol > sizeof(cf_sk->conn_req.param.data) || copy_from_user(&cf_sk->conn_req.param.data, ov, ol)) { release_sock(&cf_sk->sk); return -EINVAL; } + cf_sk->conn_req.param.size = ol; release_sock(&cf_sk->sk); return 0; -- cgit v1.2.3 From 36909ea43814cba34f7c921e99cba33d770a54e1 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 9 Jan 2011 19:36:31 +0000 Subject: net: Add alloc_netdev_mqs function Added alloc_netdev_mqs function which allows the number of transmit and receive queues to be specified independenty. alloc_netdev_mq was changed to a macro to call the new function. Also added alloc_etherdev_mqs with same purpose. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/core/dev.c | 32 +++++++++++++++++++++----------- net/ethernet/eth.c | 12 +++++++----- 2 files changed, 28 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 3fe443be4b1..3295b94884a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5617,18 +5617,20 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev) } /** - * alloc_netdev_mq - allocate network device + * alloc_netdev_mqs - allocate network device * @sizeof_priv: size of private data to allocate space for * @name: device name format string * @setup: callback to initialize device - * @queue_count: the number of subqueues to allocate + * @txqs: the number of TX subqueues to allocate + * @rxqs: the number of RX subqueues to allocate * * Allocates a struct net_device with private data area for driver use * and performs basic initialization. Also allocates subquue structs - * for each queue on the device at the end of the netdevice. + * for each queue on the device. */ -struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, - void (*setup)(struct net_device *), unsigned int queue_count) +struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, + void (*setup)(struct net_device *), + unsigned int txqs, unsigned int rxqs) { struct net_device *dev; size_t alloc_size; @@ -5636,12 +5638,20 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, BUG_ON(strlen(name) >= sizeof(dev->name)); - if (queue_count < 1) { + if (txqs < 1) { pr_err("alloc_netdev: Unable to allocate device " "with zero queues.\n"); return NULL; } +#ifdef CONFIG_RPS + if (rxqs < 1) { + pr_err("alloc_netdev: Unable to allocate device " + "with zero RX queues.\n"); + return NULL; + } +#endif + alloc_size = sizeof(struct net_device); if (sizeof_priv) { /* ensure 32-byte alignment of private area */ @@ -5672,14 +5682,14 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, dev_net_set(dev, &init_net); - dev->num_tx_queues = queue_count; - dev->real_num_tx_queues = queue_count; + dev->num_tx_queues = txqs; + dev->real_num_tx_queues = txqs; if (netif_alloc_netdev_queues(dev)) goto free_pcpu; #ifdef CONFIG_RPS - dev->num_rx_queues = queue_count; - dev->real_num_rx_queues = queue_count; + dev->num_rx_queues = rxqs; + dev->real_num_rx_queues = rxqs; if (netif_alloc_rx_queues(dev)) goto free_pcpu; #endif @@ -5707,7 +5717,7 @@ free_p: kfree(p); return NULL; } -EXPORT_SYMBOL(alloc_netdev_mq); +EXPORT_SYMBOL(alloc_netdev_mqs); /** * free_netdev - free network device diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index f00ef2f1d81..f9d7ac924f1 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -347,10 +347,11 @@ void ether_setup(struct net_device *dev) EXPORT_SYMBOL(ether_setup); /** - * alloc_etherdev_mq - Allocates and sets up an Ethernet device + * alloc_etherdev_mqs - Allocates and sets up an Ethernet device * @sizeof_priv: Size of additional driver-private structure to be allocated * for this Ethernet device - * @queue_count: The number of queues this device has. + * @txqs: The number of TX queues this device has. + * @txqs: The number of RX queues this device has. * * Fill in the fields of the device structure with Ethernet-generic * values. Basically does everything except registering the device. @@ -360,11 +361,12 @@ EXPORT_SYMBOL(ether_setup); * this private data area. */ -struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count) +struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs, + unsigned int rxqs) { - return alloc_netdev_mq(sizeof_priv, "eth%d", ether_setup, queue_count); + return alloc_netdev_mqs(sizeof_priv, "eth%d", ether_setup, txqs, rxqs); } -EXPORT_SYMBOL(alloc_etherdev_mq); +EXPORT_SYMBOL(alloc_etherdev_mqs); static size_t _format_mac_addr(char *buf, int buflen, const unsigned char *addr, int len) -- cgit v1.2.3 From bfe0d0298f2a67d94d58c39ea904a999aeeb7c3c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 9 Jan 2011 08:30:54 +0000 Subject: net_sched: factorize qdisc stats handling HTB takes into account skb is segmented in stats updates. Generalize this to all schedulers. They should use qdisc_bstats_update() helper instead of manipulating bstats.bytes and bstats.packets Add bstats_update() helper too for classes that use gnet_stats_basic_packed fields. Note : Right now, TCQ_F_CAN_BYPASS shortcurt can be taken only if no stab is setup on qdisc. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 5 ++++- net/sched/act_csum.c | 3 +-- net/sched/act_ipt.c | 3 +-- net/sched/act_mirred.c | 3 +-- net/sched/act_nat.c | 3 +-- net/sched/act_pedit.c | 3 +-- net/sched/act_police.c | 3 +-- net/sched/act_simple.c | 3 +-- net/sched/act_skbedit.c | 3 +-- net/sched/sch_atm.c | 6 ++---- net/sched/sch_cbq.c | 6 ++---- net/sched/sch_drr.c | 8 ++------ net/sched/sch_dsmark.c | 3 +-- net/sched/sch_hfsc.c | 6 ++---- net/sched/sch_htb.c | 17 ++++++----------- net/sched/sch_ingress.c | 3 +-- net/sched/sch_multiq.c | 3 +-- net/sched/sch_netem.c | 6 ++---- net/sched/sch_prio.c | 3 +-- net/sched/sch_red.c | 3 +-- net/sched/sch_sfq.c | 3 +-- net/sched/sch_tbf.c | 3 +-- net/sched/sch_teql.c | 3 +-- 23 files changed, 36 insertions(+), 66 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 3295b94884a..a3ef808b5e3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2297,7 +2297,10 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, */ if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) skb_dst_force(skb); - __qdisc_update_bstats(q, skb->len); + + qdisc_skb_cb(skb)->pkt_len = skb->len; + qdisc_bstats_update(q, skb); + if (sch_direct_xmit(skb, q, dev, txq, root_lock)) { if (unlikely(contended)) { spin_unlock(&q->busylock); diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 67dc7ce9b63..83ddfc07e45 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -508,8 +508,7 @@ static int tcf_csum(struct sk_buff *skb, spin_lock(&p->tcf_lock); p->tcf_tm.lastuse = jiffies; - p->tcf_bstats.bytes += qdisc_pkt_len(skb); - p->tcf_bstats.packets++; + bstats_update(&p->tcf_bstats, skb); action = p->tcf_action; update_flags = p->update_flags; spin_unlock(&p->tcf_lock); diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 8daef963225..c2a7c20e81c 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -209,8 +209,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a, spin_lock(&ipt->tcf_lock); ipt->tcf_tm.lastuse = jiffies; - ipt->tcf_bstats.bytes += qdisc_pkt_len(skb); - ipt->tcf_bstats.packets++; + bstats_update(&ipt->tcf_bstats, skb); /* yes, we have to worry about both in and out dev worry later - danger - this API seems to have changed diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 0c311be9282..d765067e99d 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -165,8 +165,7 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a, spin_lock(&m->tcf_lock); m->tcf_tm.lastuse = jiffies; - m->tcf_bstats.bytes += qdisc_pkt_len(skb); - m->tcf_bstats.packets++; + bstats_update(&m->tcf_bstats, skb); dev = m->tcfm_dev; if (!dev) { diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 186eb837e60..178a4bd7b7c 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -125,8 +125,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a, egress = p->flags & TCA_NAT_FLAG_EGRESS; action = p->tcf_action; - p->tcf_bstats.bytes += qdisc_pkt_len(skb); - p->tcf_bstats.packets++; + bstats_update(&p->tcf_bstats, skb); spin_unlock(&p->tcf_lock); diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index a0593c9640d..445bef716f7 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -187,8 +187,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, bad: p->tcf_qstats.overlimits++; done: - p->tcf_bstats.bytes += qdisc_pkt_len(skb); - p->tcf_bstats.packets++; + bstats_update(&p->tcf_bstats, skb); spin_unlock(&p->tcf_lock); return p->tcf_action; } diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 7ebf7439b47..e2f08b1e2e5 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -298,8 +298,7 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, spin_lock(&police->tcf_lock); - police->tcf_bstats.bytes += qdisc_pkt_len(skb); - police->tcf_bstats.packets++; + bstats_update(&police->tcf_bstats, skb); if (police->tcfp_ewma_rate && police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 97e84f3ee77..7287cff7af3 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -42,8 +42,7 @@ static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result spin_lock(&d->tcf_lock); d->tcf_tm.lastuse = jiffies; - d->tcf_bstats.bytes += qdisc_pkt_len(skb); - d->tcf_bstats.packets++; + bstats_update(&d->tcf_bstats, skb); /* print policy string followed by _ then packet count * Example if this was the 3rd packet and the string was "hello" diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 66cbf4eb885..836f5fee9e5 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -46,8 +46,7 @@ static int tcf_skbedit(struct sk_buff *skb, struct tc_action *a, spin_lock(&d->tcf_lock); d->tcf_tm.lastuse = jiffies; - d->tcf_bstats.bytes += qdisc_pkt_len(skb); - d->tcf_bstats.packets++; + bstats_update(&d->tcf_bstats, skb); if (d->flags & SKBEDIT_F_PRIORITY) skb->priority = d->priority; diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 282540778aa..943d733409d 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -422,10 +422,8 @@ drop: __maybe_unused } return ret; } - sch->bstats.bytes += qdisc_pkt_len(skb); - sch->bstats.packets++; - flow->bstats.bytes += qdisc_pkt_len(skb); - flow->bstats.packets++; + qdisc_bstats_update(sch, skb); + bstats_update(&flow->bstats, skb); /* * Okay, this may seem weird. We pretend we've dropped the packet if * it goes via ATM. The reason for this is that the outer qdisc diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index eb763159086..c80d1c210c5 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -390,8 +390,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) ret = qdisc_enqueue(skb, cl->q); if (ret == NET_XMIT_SUCCESS) { sch->q.qlen++; - sch->bstats.packets++; - sch->bstats.bytes += qdisc_pkt_len(skb); + qdisc_bstats_update(sch, skb); cbq_mark_toplevel(q, cl); if (!cl->next_alive) cbq_activate_class(cl); @@ -650,8 +649,7 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child) ret = qdisc_enqueue(skb, cl->q); if (ret == NET_XMIT_SUCCESS) { sch->q.qlen++; - sch->bstats.packets++; - sch->bstats.bytes += qdisc_pkt_len(skb); + qdisc_bstats_update(sch, skb); if (!cl->next_alive) cbq_activate_class(cl); return 0; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index aa8b5313f8c..de55e642eaf 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -351,7 +351,6 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct drr_sched *q = qdisc_priv(sch); struct drr_class *cl; - unsigned int len; int err; cl = drr_classify(skb, sch, &err); @@ -362,7 +361,6 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch) return err; } - len = qdisc_pkt_len(skb); err = qdisc_enqueue(skb, cl->qdisc); if (unlikely(err != NET_XMIT_SUCCESS)) { if (net_xmit_drop_count(err)) { @@ -377,10 +375,8 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch) cl->deficit = cl->quantum; } - cl->bstats.packets++; - cl->bstats.bytes += len; - sch->bstats.packets++; - sch->bstats.bytes += len; + bstats_update(&cl->bstats, skb); + qdisc_bstats_update(sch, skb); sch->q.qlen++; return err; diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 1d295d62bb5..60f4bdd4408 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -260,8 +260,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) return err; } - sch->bstats.bytes += qdisc_pkt_len(skb); - sch->bstats.packets++; + qdisc_bstats_update(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 069c62b7bb3..2e45791d4f6 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1599,10 +1599,8 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (cl->qdisc->q.qlen == 1) set_active(cl, qdisc_pkt_len(skb)); - cl->bstats.packets++; - cl->bstats.bytes += qdisc_pkt_len(skb); - sch->bstats.packets++; - sch->bstats.bytes += qdisc_pkt_len(skb); + bstats_update(&cl->bstats, skb); + qdisc_bstats_update(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 01b519d6c52..984c1b0c683 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -569,15 +569,12 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) } return ret; } else { - cl->bstats.packets += - skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1; - cl->bstats.bytes += qdisc_pkt_len(skb); + bstats_update(&cl->bstats, skb); htb_activate(q, cl); } sch->q.qlen++; - sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1; - sch->bstats.bytes += qdisc_pkt_len(skb); + qdisc_bstats_update(sch, skb); return NET_XMIT_SUCCESS; } @@ -648,12 +645,10 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, htb_add_to_wait_tree(q, cl, diff); } - /* update byte stats except for leaves which are already updated */ - if (cl->level) { - cl->bstats.bytes += bytes; - cl->bstats.packets += skb_is_gso(skb)? - skb_shinfo(skb)->gso_segs:1; - } + /* update basic stats except for leaves which are already updated */ + if (cl->level) + bstats_update(&cl->bstats, skb); + cl = cl->parent; } } diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index f10e34a6844..bce1665239b 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -63,8 +63,7 @@ static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch) result = tc_classify(skb, p->filter_list, &res); - sch->bstats.packets++; - sch->bstats.bytes += qdisc_pkt_len(skb); + qdisc_bstats_update(sch, skb); switch (result) { case TC_ACT_SHOT: result = TC_ACT_SHOT; diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 32690deab5d..21f13da2476 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -83,8 +83,7 @@ multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch) ret = qdisc_enqueue(skb, qdisc); if (ret == NET_XMIT_SUCCESS) { - sch->bstats.bytes += qdisc_pkt_len(skb); - sch->bstats.packets++; + qdisc_bstats_update(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index e5593c083a7..1c4bce86347 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -240,8 +240,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (likely(ret == NET_XMIT_SUCCESS)) { sch->q.qlen++; - sch->bstats.bytes += qdisc_pkt_len(skb); - sch->bstats.packets++; + qdisc_bstats_update(sch, skb); } else if (net_xmit_drop_count(ret)) { sch->qstats.drops++; } @@ -477,8 +476,7 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) __skb_queue_after(list, skb, nskb); sch->qstats.backlog += qdisc_pkt_len(nskb); - sch->bstats.bytes += qdisc_pkt_len(nskb); - sch->bstats.packets++; + qdisc_bstats_update(sch, nskb); return NET_XMIT_SUCCESS; } diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index b1c95bce33c..966158d49dd 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -84,8 +84,7 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch) ret = qdisc_enqueue(skb, qdisc); if (ret == NET_XMIT_SUCCESS) { - sch->bstats.bytes += qdisc_pkt_len(skb); - sch->bstats.packets++; + qdisc_bstats_update(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; } diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index a67ba3c5a0c..a6009c5a2c9 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -94,8 +94,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch) ret = qdisc_enqueue(skb, child); if (likely(ret == NET_XMIT_SUCCESS)) { - sch->bstats.bytes += qdisc_pkt_len(skb); - sch->bstats.packets++; + qdisc_bstats_update(sch, skb); sch->q.qlen++; } else if (net_xmit_drop_count(ret)) { q->stats.pdrop++; diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index d54ac94066c..239ec53a634 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -403,8 +403,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) slot->allot = q->scaled_quantum; } if (++sch->q.qlen <= q->limit) { - sch->bstats.bytes += qdisc_pkt_len(skb); - sch->bstats.packets++; + qdisc_bstats_update(sch, skb); return NET_XMIT_SUCCESS; } diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 641a30d6463..77565e72181 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -134,8 +134,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) } sch->q.qlen++; - sch->bstats.bytes += qdisc_pkt_len(skb); - sch->bstats.packets++; + qdisc_bstats_update(sch, skb); return NET_XMIT_SUCCESS; } diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 106479a7c94..af9360d1f6e 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -83,8 +83,7 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc* sch) if (q->q.qlen < dev->tx_queue_len) { __skb_queue_tail(&q->q, skb); - sch->bstats.bytes += qdisc_pkt_len(skb); - sch->bstats.packets++; + qdisc_bstats_update(sch, skb); return NET_XMIT_SUCCESS; } -- cgit v1.2.3 From 545ecdc3b3a2fe0b54a3053bf8bf85321bbca7da Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Sat, 8 Jan 2011 13:57:12 +0000 Subject: arp: allow to invalidate specific ARP entries IPv4 over firewire needs to be able to remove ARP entries from the ARP cache that belong to nodes that are removed, because IPv4 over firewire uses ARP packets for private information about nodes. This information becomes invalid as soon as node drops off the bus and when it reconnects, its only possible to start talking to it after it responded to an ARP packet. But ARP cache prevents such packets from being sent. Signed-off-by: Maxim Levitsky Signed-off-by: David S. Miller --- net/ipv4/arp.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index a2fc7b961db..04c8b69fd42 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1143,6 +1143,23 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev) return err; } +int arp_invalidate(struct net_device *dev, __be32 ip) +{ + struct neighbour *neigh = neigh_lookup(&arp_tbl, &ip, dev); + int err = -ENXIO; + + if (neigh) { + if (neigh->nud_state & ~NUD_NOARP) + err = neigh_update(neigh, NULL, NUD_FAILED, + NEIGH_UPDATE_F_OVERRIDE| + NEIGH_UPDATE_F_ADMIN); + neigh_release(neigh); + } + + return err; +} +EXPORT_SYMBOL(arp_invalidate); + static int arp_req_delete_public(struct net *net, struct arpreq *r, struct net_device *dev) { @@ -1163,7 +1180,6 @@ static int arp_req_delete(struct net *net, struct arpreq *r, { int err; __be32 ip; - struct neighbour *neigh; if (r->arp_flags & ATF_PUBL) return arp_req_delete_public(net, r, dev); @@ -1181,16 +1197,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r, if (!dev) return -EINVAL; } - err = -ENXIO; - neigh = neigh_lookup(&arp_tbl, &ip, dev); - if (neigh) { - if (neigh->nud_state & ~NUD_NOARP) - err = neigh_update(neigh, NULL, NUD_FAILED, - NEIGH_UPDATE_F_OVERRIDE| - NEIGH_UPDATE_F_ADMIN); - neigh_release(neigh); - } - return err; + return arp_invalidate(dev, ip); } /* -- cgit v1.2.3 From d7b92affba524e0ca848a5ab60649fb91190d9b5 Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Fri, 7 Jan 2011 01:57:08 +0000 Subject: CAIF: Fix IPv6 support in receive path for GPRS/3G Checks version field of IP in the receive path for GPRS/3G data and appropriately sets the value of skb->protocol. Signed-off-by: Sjur Braendeland Signed-off-by: David S. Miller --- net/caif/chnl_net.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index 84a422c9894..fa9dab372b6 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -76,6 +76,8 @@ static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt) struct chnl_net *priv = container_of(layr, struct chnl_net, chnl); int pktlen; int err = 0; + const u8 *ip_version; + u8 buf; priv = container_of(layr, struct chnl_net, chnl); @@ -90,7 +92,21 @@ static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt) * send the packet to the net stack. */ skb->dev = priv->netdev; - skb->protocol = htons(ETH_P_IP); + + /* check the version of IP */ + ip_version = skb_header_pointer(skb, 0, 1, &buf); + if (!ip_version) + return -EINVAL; + switch (*ip_version >> 4) { + case 4: + skb->protocol = htons(ETH_P_IP); + break; + case 6: + skb->protocol = htons(ETH_P_IPV6); + break; + default: + return -EINVAL; + } /* If we change the header in loop mode, the checksum is corrupted. */ if (priv->conn_req.protocol == CAIFPROTO_DATAGRAM_LOOP) -- cgit v1.2.3 From 219fd58be62d01e30224c7af919dea77b7e392a8 Mon Sep 17 00:00:00 2001 From: "M. Mohan Kumar" Date: Mon, 10 Jan 2011 14:23:53 -0600 Subject: net/9p: Use proper data types Use proper data types for storing the count of the binary blob and length of a string. Without this patch length calculation of string will always result in -1 because of comparision between signed and unsigned integer. Signed-off-by: M. Mohan Kumar Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- net/9p/protocol.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 798beac7f10..1e308f21092 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -178,27 +178,24 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, break; case 's':{ char **sptr = va_arg(ap, char **); - int16_t len; - int size; + uint16_t len; errcode = p9pdu_readf(pdu, proto_version, "w", &len); if (errcode) break; - size = max_t(int16_t, len, 0); - - *sptr = kmalloc(size + 1, GFP_KERNEL); + *sptr = kmalloc(len + 1, GFP_KERNEL); if (*sptr == NULL) { errcode = -EFAULT; break; } - if (pdu_read(pdu, *sptr, size)) { + if (pdu_read(pdu, *sptr, len)) { errcode = -EFAULT; kfree(*sptr); *sptr = NULL; } else - (*sptr)[size] = 0; + (*sptr)[len] = 0; } break; case 'Q':{ @@ -234,14 +231,14 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, } break; case 'D':{ - int32_t *count = va_arg(ap, int32_t *); + uint32_t *count = va_arg(ap, uint32_t *); void **data = va_arg(ap, void **); errcode = p9pdu_readf(pdu, proto_version, "d", count); if (!errcode) { *count = - min_t(int32_t, *count, + min_t(uint32_t, *count, pdu->size - pdu->offset); *data = &pdu->sdata[pdu->offset]; } @@ -404,9 +401,10 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, break; case 's':{ const char *sptr = va_arg(ap, const char *); - int16_t len = 0; + uint16_t len = 0; if (sptr) - len = min_t(int16_t, strlen(sptr), USHRT_MAX); + len = min_t(uint16_t, strlen(sptr), + USHRT_MAX); errcode = p9pdu_writef(pdu, proto_version, "w", len); @@ -438,7 +436,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, stbuf->n_gid, stbuf->n_muid); } break; case 'D':{ - int32_t count = va_arg(ap, int32_t); + uint32_t count = va_arg(ap, uint32_t); const void *data = va_arg(ap, const void *); errcode = p9pdu_writef(pdu, proto_version, "d", -- cgit v1.2.3 From d75faea330dbd1873c9094e9926ae306590c0998 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 30 Nov 2010 19:15:01 -0500 Subject: rpc: move sk_bc_xprt to svc_xprt This seems obviously transport-level information even if it's currently used only by the server socket code. Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 10 ++++++---- net/sunrpc/xprtsock.c | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 52bd1130e83..3bb400f86ea 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -987,15 +987,17 @@ static int svc_process_calldir(struct svc_sock *svsk, struct svc_rqst *rqstp, vec[0] = rqstp->rq_arg.head[0]; } else { /* REPLY */ - if (svsk->sk_bc_xprt) - req = xprt_lookup_rqst(svsk->sk_bc_xprt, xid); + struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt; + + if (bc_xprt) + req = xprt_lookup_rqst(bc_xprt, xid); if (!req) { printk(KERN_NOTICE "%s: Got unrecognized reply: " - "calldir 0x%x sk_bc_xprt %p xid %08x\n", + "calldir 0x%x xpt_bc_xprt %p xid %08x\n", __func__, ntohl(calldir), - svsk->sk_bc_xprt, xid); + bc_xprt, xid); vec[0] = rqstp->rq_arg.head[0]; goto out; } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index dfcab5ac65a..18dc42eb559 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2379,9 +2379,9 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) * The backchannel uses the same socket connection as the * forechannel */ + args->bc_xprt->xpt_bc_xprt = xprt; xprt->bc_xprt = args->bc_xprt; bc_sock = container_of(args->bc_xprt, struct svc_sock, sk_xprt); - bc_sock->sk_bc_xprt = xprt; transport->sock = bc_sock->sk_sock; transport->inet = bc_sock->sk_sk; -- cgit v1.2.3 From 99de8ea962bbc11a51ad4c52e3dc93bee5f6ba70 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 8 Dec 2010 12:45:44 -0500 Subject: rpc: keep backchannel xprt as long as server connection Multiple backchannels can share the same tcp connection; from rfc 5661 section 2.10.3.1: A connection's association with a session is not exclusive. A connection associated with the channel(s) of one session may be simultaneously associated with the channel(s) of other sessions including sessions associated with other client IDs. However, multiple backchannels share a connection, they must all share the same xid stream (hence the same rpc_xprt); the only way we have to match replies with calls at the rpc layer is using the xid. So, keep the rpc_xprt around as long as the connection lasts, in case we're asked to use the connection as a backchannel again. Requests to create new backchannel clients over a given server connection should results in creating new clients that reuse the existing rpc_xprt. But to start, just reject attempts to associate multiple rpc_xprt's with the same underlying bc_xprt. Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 4 ++++ net/sunrpc/xprt.c | 2 +- net/sunrpc/xprtsock.c | 34 ++++++++++++++++++++++++---------- 3 files changed, 29 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 173f3b975d4..ab86b7927f8 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -13,6 +13,7 @@ #include #include #include +#include #define RPCDBG_FACILITY RPCDBG_SVCXPRT @@ -128,6 +129,9 @@ static void svc_xprt_free(struct kref *kref) if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) svcauth_unix_info_release(xprt); put_net(xprt->xpt_net); + /* See comment on corresponding get in xs_setup_bc_tcp(): */ + if (xprt->xpt_bc_xprt) + xprt_put(xprt->xpt_bc_xprt); xprt->xpt_ops->xpo_free(xprt); module_put(owner); } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 4c8f18aff7c..749ad15ae30 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -965,6 +965,7 @@ struct rpc_xprt *xprt_alloc(struct net *net, int size, int max_req) xprt = kzalloc(size, GFP_KERNEL); if (xprt == NULL) goto out; + kref_init(&xprt->kref); xprt->max_reqs = max_req; xprt->slot = kcalloc(max_req, sizeof(struct rpc_rqst), GFP_KERNEL); @@ -1102,7 +1103,6 @@ found: return xprt; } - kref_init(&xprt->kref); spin_lock_init(&xprt->transport_lock); spin_lock_init(&xprt->reserve_lock); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 18dc42eb559..0ef4dd4414e 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2375,16 +2375,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) xprt->reestablish_timeout = 0; xprt->idle_timeout = 0; - /* - * The backchannel uses the same socket connection as the - * forechannel - */ - args->bc_xprt->xpt_bc_xprt = xprt; - xprt->bc_xprt = args->bc_xprt; - bc_sock = container_of(args->bc_xprt, struct svc_sock, sk_xprt); - transport->sock = bc_sock->sk_sock; - transport->inet = bc_sock->sk_sk; - xprt->ops = &bc_tcp_ops; switch (addr->sa_family) { @@ -2406,6 +2396,29 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) xprt->address_strings[RPC_DISPLAY_PORT], xprt->address_strings[RPC_DISPLAY_PROTO]); + /* + * The backchannel uses the same socket connection as the + * forechannel + */ + if (args->bc_xprt->xpt_bc_xprt) { + /* XXX: actually, want to catch this case... */ + ret = ERR_PTR(-EINVAL); + goto out_err; + } + /* + * Once we've associated a backchannel xprt with a connection, + * we want to keep it around as long as long as the connection + * lasts, in case we need to start using it for a backchannel + * again; this reference won't be dropped until bc_xprt is + * destroyed. + */ + xprt_get(xprt); + args->bc_xprt->xpt_bc_xprt = xprt; + xprt->bc_xprt = args->bc_xprt; + bc_sock = container_of(args->bc_xprt, struct svc_sock, sk_xprt); + transport->sock = bc_sock->sk_sock; + transport->inet = bc_sock->sk_sk; + /* * Since we don't want connections for the backchannel, we set * the xprt status to connected @@ -2415,6 +2428,7 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) if (try_module_get(THIS_MODULE)) return xprt; + xprt_put(xprt); ret = ERR_PTR(-EINVAL); out_err: xprt_free(xprt); -- cgit v1.2.3 From f0418aa4b1103f959d64dc18273efa04ee0140e9 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 8 Dec 2010 13:48:19 -0500 Subject: rpc: allow xprt_class->setup to return a preexisting xprt This allows us to reuse the xprt associated with a server connection if one has already been set up. Signed-off-by: J. Bruce Fields --- net/sunrpc/xprt.c | 3 +++ net/sunrpc/xprtsock.c | 18 +++++++++--------- 2 files changed, 12 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 749ad15ae30..856274d7e85 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1102,6 +1102,9 @@ found: -PTR_ERR(xprt)); return xprt; } + if (test_and_set_bit(XPRT_INITIALIZED, &xprt->state)) + /* ->setup returned a pre-initialized xprt: */ + return xprt; spin_lock_init(&xprt->transport_lock); spin_lock_init(&xprt->reserve_lock); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 0ef4dd4414e..ee091c869fc 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2359,6 +2359,15 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) struct svc_sock *bc_sock; struct rpc_xprt *ret; + if (args->bc_xprt->xpt_bc_xprt) { + /* + * This server connection already has a backchannel + * export; we can't create a new one, as we wouldn't be + * able to match replies based on xid any more. So, + * reuse the already-existing one: + */ + return args->bc_xprt->xpt_bc_xprt; + } xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); if (IS_ERR(xprt)) return xprt; @@ -2396,15 +2405,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) xprt->address_strings[RPC_DISPLAY_PORT], xprt->address_strings[RPC_DISPLAY_PROTO]); - /* - * The backchannel uses the same socket connection as the - * forechannel - */ - if (args->bc_xprt->xpt_bc_xprt) { - /* XXX: actually, want to catch this case... */ - ret = ERR_PTR(-EINVAL); - goto out_err; - } /* * Once we've associated a backchannel xprt with a connection, * we want to keep it around as long as long as the connection -- cgit v1.2.3 From c191a836a908d1dd6b40c503741f91b914de3348 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 11 Jan 2011 01:14:22 +0000 Subject: tcp: disallow bind() to reuse addr/port inet_csk_bind_conflict() logic currently disallows a bind() if it finds a friend socket (a socket bound on same address/port) satisfying a set of conditions : 1) Current (to be bound) socket doesnt have sk_reuse set OR 2) other socket doesnt have sk_reuse set OR 3) other socket is in LISTEN state We should add the CLOSE state in the 3) condition, in order to avoid two REUSEADDR sockets in CLOSE state with same local address/port, since this can deny further operations. Note : a prior patch tried to address the problem in a different (and buggy) way. (commit fda48a0d7a8412ced tcp: bind() fix when many ports are bound). Reported-by: Gaspar Chilingarov Reported-by: Daniel Baluta Tested-by: Daniel Baluta Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 5 +++-- net/ipv6/inet6_connection_sock.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 25e318153f1..97e5fb76526 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -73,7 +73,7 @@ int inet_csk_bind_conflict(const struct sock *sk, !sk2->sk_bound_dev_if || sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { if (!reuse || !sk2->sk_reuse || - sk2->sk_state == TCP_LISTEN) { + ((1 << sk2->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))) { const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) || sk2_rcv_saddr == sk_rcv_saddr(sk)) @@ -122,7 +122,8 @@ again: (tb->num_owners < smallest_size || smallest_size == -1)) { smallest_size = tb->num_owners; smallest_rover = rover; - if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) { + if (atomic_read(&hashinfo->bsockets) > (high - low) + 1 && + !inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) { spin_unlock(&head->lock); snum = smallest_rover; goto have_snum; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index e46305d1815..d144e629d2b 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -44,7 +44,7 @@ int inet6_csk_bind_conflict(const struct sock *sk, !sk2->sk_bound_dev_if || sk->sk_bound_dev_if == sk2->sk_bound_dev_if) && (!sk->sk_reuse || !sk2->sk_reuse || - sk2->sk_state == TCP_LISTEN) && + ((1 << sk2->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))) && ipv6_rcv_saddr_equal(sk, sk2)) break; } -- cgit v1.2.3 From fa6dd8a2c89861d05621ce7e2880e485bec22fba Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 11 Jan 2011 08:04:12 +0000 Subject: xfrm: check trunc_len in XFRMA_ALG_AUTH_TRUNC Maximum trunc length is defined by MAX_AH_AUTH_LEN (in bytes) and need to be checked when this value is set (in bits) by the user. In ah4.c and ah6.c a BUG_ON() checks this condiftion. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 6a8da81ff66..d5e1e0b0889 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) #include @@ -302,7 +303,8 @@ static int attach_auth_trunc(struct xfrm_algo_auth **algpp, u8 *props, algo = xfrm_aalg_get_byname(ualg->alg_name, 1); if (!algo) return -ENOSYS; - if (ualg->alg_trunc_len > algo->uinfo.auth.icv_fullbits) + if ((ualg->alg_trunc_len / 8) > MAX_AH_AUTH_LEN || + ualg->alg_trunc_len > algo->uinfo.auth.icv_fullbits) return -EINVAL; *props = algo->desc.sadb_alg_id; -- cgit v1.2.3 From 4b0ef1f223be4e092632b4152ceec5627ac10f59 Mon Sep 17 00:00:00 2001 From: Dang Hongwu Date: Tue, 11 Jan 2011 07:13:33 +0000 Subject: ah: reload pointers to skb data after calling skb_cow_data() skb_cow_data() may allocate a new data buffer, so pointers on skb should be set after this function. Bug was introduced by commit dff3bb06 ("ah4: convert to ahash") and 8631e9bd ("ah6: convert to ahash"). Signed-off-by: Wang Xuefu Acked-by: Krzysztof Witek Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv4/ah4.c | 7 ++++--- net/ipv6/ah6.c | 8 +++++--- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 880a5ec6dce..86961bec70a 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -314,14 +314,15 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) skb->ip_summed = CHECKSUM_NONE; - ah = (struct ip_auth_hdr *)skb->data; - iph = ip_hdr(skb); - ihl = ip_hdrlen(skb); if ((err = skb_cow_data(skb, 0, &trailer)) < 0) goto out; nfrags = err; + ah = (struct ip_auth_hdr *)skb->data; + iph = ip_hdr(skb); + ihl = ip_hdrlen(skb); + work_iph = ah_alloc_tmp(ahash, nfrags, ihl + ahp->icv_trunc_len); if (!work_iph) goto out; diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index ee82d4ef26c..1aba54ae53c 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -538,14 +538,16 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) if (!pskb_may_pull(skb, ah_hlen)) goto out; - ip6h = ipv6_hdr(skb); - - skb_push(skb, hdr_len); if ((err = skb_cow_data(skb, 0, &trailer)) < 0) goto out; nfrags = err; + ah = (struct ip_auth_hdr *)skb->data; + ip6h = ipv6_hdr(skb); + + skb_push(skb, hdr_len); + work_iph = ah_alloc_tmp(ahash, nfrags, hdr_len + ahp->icv_trunc_len); if (!work_iph) goto out; -- cgit v1.2.3 From 13ee6ac579574a2a95e982b19920fd2495dce8cd Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 11 Jan 2011 23:54:42 +0100 Subject: netfilter: fix race in conntrack between dump_table and destroy The netlink interface to dump the connection tracking table has a race when entries are deleted at the same time. A customer reported a crash and the backtrace showed thatctnetlink_dump_table was running while a conntrack entry was being destroyed. (see https://bugzilla.vyatta.com/show_bug.cgi?id=6402). According to RCU documentation, when using hlist_nulls the reader must handle the case of seeing a deleted entry and not proceed further down the linked list. The old code would continue which caused the scan to walk into the free list. This patch uses locking (rather than RCU) for this operation which is guaranteed safe, and no longer requires getting reference while doing dump operation. Signed-off-by: Stephen Hemminger Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 746140264b2..5cb8d3027b1 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -645,25 +645,23 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); u_int8_t l3proto = nfmsg->nfgen_family; - rcu_read_lock(); + spin_lock_bh(&nf_conntrack_lock); last = (struct nf_conn *)cb->args[1]; for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) { restart: - hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[cb->args[0]], + hlist_nulls_for_each_entry(h, n, &net->ct.hash[cb->args[0]], hnnode) { if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) continue; ct = nf_ct_tuplehash_to_ctrack(h); - if (!atomic_inc_not_zero(&ct->ct_general.use)) - continue; /* Dump entries of a given L3 protocol number. * If it is not specified, ie. l3proto == 0, * then dump everything. */ if (l3proto && nf_ct_l3num(ct) != l3proto) - goto releasect; + continue; if (cb->args[1]) { if (ct != last) - goto releasect; + continue; cb->args[1] = 0; } if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, @@ -681,8 +679,6 @@ restart: if (acct) memset(acct, 0, sizeof(struct nf_conn_counter[IP_CT_DIR_MAX])); } -releasect: - nf_ct_put(ct); } if (cb->args[1]) { cb->args[1] = 0; @@ -690,7 +686,7 @@ releasect: } } out: - rcu_read_unlock(); + spin_unlock_bh(&nf_conntrack_lock); if (last) nf_ct_put(last); -- cgit v1.2.3 From 5b919f833d9d60588d026ad82d17f17e8872c7a9 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 12 Jan 2011 00:34:49 -0800 Subject: net: ax25: fix information leak to userland harder Commit fe10ae53384e48c51996941b7720ee16995cbcb7 adds a memset() to clear the structure being sent back to userspace, but accidentally used the wrong size. Reported-by: Brad Spengler Signed-off-by: Kees Cook Cc: stable@kernel.org Signed-off-by: David S. Miller --- net/ax25/af_ax25.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index bb86d293239..6da5daeebab 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1392,7 +1392,7 @@ static int ax25_getname(struct socket *sock, struct sockaddr *uaddr, ax25_cb *ax25; int err = 0; - memset(fsa, 0, sizeof(fsa)); + memset(fsa, 0, sizeof(*fsa)); lock_sock(sk); ax25 = ax25_sk(sk); -- cgit v1.2.3 From 2fc72c7b84002ffb3c66918e2a7b0ee607d8b5aa Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 12 Jan 2011 20:25:08 +0100 Subject: netfilter: fix compilation when conntrack is disabled but tproxy is enabled The IPv6 tproxy patches split IPv6 defragmentation off of conntrack, but failed to update the #ifdef stanzas guarding the defragmentation related fields and code in skbuff and conntrack related code in nf_defrag_ipv6.c. This patch adds the required #ifdefs so that IPv6 tproxy can truly be used without connection tracking. Original report: http://marc.info/?l=linux-netdev&m=129010118516341&w=2 Reported-by: Randy Dunlap Acked-by: Randy Dunlap Signed-off-by: KOVACS Krisztian Signed-off-by: Pablo Neira Ayuso --- net/core/skbuff.c | 2 ++ net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 19d6c21220f..d31bb36ae0d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -380,6 +380,8 @@ static void skb_release_head_state(struct sk_buff *skb) } #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_conntrack_put(skb->nfct); +#endif +#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED nf_conntrack_put_reasm(skb->nfct_reasm); #endif #ifdef CONFIG_BRIDGE_NETFILTER diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index 99abfb53bab..97c5b21b967 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -19,13 +19,15 @@ #include #include +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #include #include #include #include #include -#include #include +#endif +#include #include static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, @@ -33,8 +35,10 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, { u16 zone = NF_CT_DEFAULT_ZONE; +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) if (skb->nfct) zone = nf_ct_zone((struct nf_conn *)skb->nfct); +#endif #ifdef CONFIG_BRIDGE_NETFILTER if (skb->nf_bridge && @@ -56,9 +60,11 @@ static unsigned int ipv6_defrag(unsigned int hooknum, { struct sk_buff *reasm; +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) /* Previously seen (loopback)? */ if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct)) return NF_ACCEPT; +#endif reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb)); /* queued */ -- cgit v1.2.3 From 6c0f3af72cb1622a66962a1180c36ef8c41be8e2 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 16 Nov 2010 11:14:34 -0800 Subject: ceph: add dir_layout to inode Add a ceph_dir_layout to the inode, and calculate dentry hash values based on the parent directory's specified dir_hash function. This is needed because the old default Linux dcache hash function is extremely week and leads to a poor distribution of files among dir fragments. Signed-off-by: Sage Weil --- net/ceph/ceph_hash.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ceph/ceph_hash.c b/net/ceph/ceph_hash.c index 815ef882679..0a1b53bce76 100644 --- a/net/ceph/ceph_hash.c +++ b/net/ceph/ceph_hash.c @@ -1,5 +1,6 @@ #include +#include /* * Robert Jenkin's hash function. @@ -104,6 +105,7 @@ unsigned ceph_str_hash(int type, const char *s, unsigned len) return -1; } } +EXPORT_SYMBOL(ceph_str_hash); const char *ceph_str_hash_name(int type) { @@ -116,3 +118,4 @@ const char *ceph_str_hash_name(int type) return "unknown"; } } +EXPORT_SYMBOL(ceph_str_hash_name); -- cgit v1.2.3 From b0aee3516d84c05240065a53f238ba7a718f56b9 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Fri, 24 Dec 2010 23:01:12 +0100 Subject: ceph: Always free allocated memory in osdmap_decode() Always free memory allocated to 'pi' in net/ceph/osdmap.c::osdmap_decode(). Signed-off-by: Jesper Juhl Signed-off-by: Sage Weil --- net/ceph/osdmap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index d73f3f6efa3..71603ac3dff 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -605,8 +605,10 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) goto bad; } err = __decode_pool(p, end, pi); - if (err < 0) + if (err < 0) { + kfree(pi); goto bad; + } __insert_pg_pool(&map->pg_pools, pi); } -- cgit v1.2.3 From f363e45fd1184219b472ea549cb7e192e24ef4d2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Jan 2011 14:49:46 +0100 Subject: net/ceph: make ceph_msgr_wq non-reentrant ceph messenger code does a rather complex dancing around multithread workqueue to make sure the same work item isn't executed concurrently on different CPUs. This restriction can be provided by workqueue with WQ_NON_REENTRANT. Make ceph_msgr_wq non-reentrant workqueue with the default concurrency level and remove the QUEUED/BUSY logic. * This removes backoff handling in con_work() but it couldn't reliably block execution of con_work() to begin with - queue_con() can be called after the work started but before BUSY is set. It seems that it was an optimization for a rather cold path and can be safely removed. * The number of concurrent work items is bound by the number of connections and connetions are independent from each other. With the default concurrency level, different connections will be executed independently. Signed-off-by: Tejun Heo Cc: Sage Weil Cc: ceph-devel@vger.kernel.org Signed-off-by: Sage Weil --- net/ceph/messenger.c | 46 ++-------------------------------------------- 1 file changed, 2 insertions(+), 44 deletions(-) (limited to 'net') diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index b6ff4a1519a..dff633d62e5 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -96,7 +96,7 @@ struct workqueue_struct *ceph_msgr_wq; int ceph_msgr_init(void) { - ceph_msgr_wq = create_workqueue("ceph-msgr"); + ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_NON_REENTRANT, 0); if (!ceph_msgr_wq) { pr_err("msgr_init failed to create workqueue\n"); return -ENOMEM; @@ -1920,20 +1920,6 @@ bad_tag: /* * Atomically queue work on a connection. Bump @con reference to * avoid races with connection teardown. - * - * There is some trickery going on with QUEUED and BUSY because we - * only want a _single_ thread operating on each connection at any - * point in time, but we want to use all available CPUs. - * - * The worker thread only proceeds if it can atomically set BUSY. It - * clears QUEUED and does it's thing. When it thinks it's done, it - * clears BUSY, then rechecks QUEUED.. if it's set again, it loops - * (tries again to set BUSY). - * - * To queue work, we first set QUEUED, _then_ if BUSY isn't set, we - * try to queue work. If that fails (work is already queued, or BUSY) - * we give up (work also already being done or is queued) but leave QUEUED - * set so that the worker thread will loop if necessary. */ static void queue_con(struct ceph_connection *con) { @@ -1948,11 +1934,7 @@ static void queue_con(struct ceph_connection *con) return; } - set_bit(QUEUED, &con->state); - if (test_bit(BUSY, &con->state)) { - dout("queue_con %p - already BUSY\n", con); - con->ops->put(con); - } else if (!queue_work(ceph_msgr_wq, &con->work.work)) { + if (!queue_delayed_work(ceph_msgr_wq, &con->work, 0)) { dout("queue_con %p - already queued\n", con); con->ops->put(con); } else { @@ -1967,15 +1949,6 @@ static void con_work(struct work_struct *work) { struct ceph_connection *con = container_of(work, struct ceph_connection, work.work); - int backoff = 0; - -more: - if (test_and_set_bit(BUSY, &con->state) != 0) { - dout("con_work %p BUSY already set\n", con); - goto out; - } - dout("con_work %p start, clearing QUEUED\n", con); - clear_bit(QUEUED, &con->state); mutex_lock(&con->mutex); @@ -1994,28 +1967,13 @@ more: try_read(con) < 0 || try_write(con) < 0) { mutex_unlock(&con->mutex); - backoff = 1; ceph_fault(con); /* error/fault path */ goto done_unlocked; } done: mutex_unlock(&con->mutex); - done_unlocked: - clear_bit(BUSY, &con->state); - dout("con->state=%lu\n", con->state); - if (test_bit(QUEUED, &con->state)) { - if (!backoff || test_bit(OPENING, &con->state)) { - dout("con_work %p QUEUED reset, looping\n", con); - goto more; - } - dout("con_work %p QUEUED reset, but just faulted\n", con); - clear_bit(QUEUED, &con->state); - } - dout("con_work %p done\n", con); - -out: con->ops->put(con); } -- cgit v1.2.3 From c74a1cbb3cac348f276fabc381758f5b0b4713b2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 12 Jan 2011 16:59:34 -0500 Subject: pass default dentry_operations to mount_pseudo() Signed-off-by: Al Viro --- net/socket.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index ccc576a6a50..ac2219f90d5 100644 --- a/net/socket.c +++ b/net/socket.c @@ -306,20 +306,6 @@ static const struct super_operations sockfs_ops = { .statfs = simple_statfs, }; -static struct dentry *sockfs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) -{ - return mount_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC); -} - -static struct vfsmount *sock_mnt __read_mostly; - -static struct file_system_type sock_fs_type = { - .name = "sockfs", - .mount = sockfs_mount, - .kill_sb = kill_anon_super, -}; - /* * sockfs_dname() is called from d_path(). */ @@ -333,6 +319,21 @@ static const struct dentry_operations sockfs_dentry_operations = { .d_dname = sockfs_dname, }; +static struct dentry *sockfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + return mount_pseudo(fs_type, "socket:", &sockfs_ops, + &sockfs_dentry_operations, SOCKFS_MAGIC); +} + +static struct vfsmount *sock_mnt __read_mostly; + +static struct file_system_type sock_fs_type = { + .name = "sockfs", + .mount = sockfs_mount, + .kill_sb = kill_anon_super, +}; + /* * Obtains the first available file descriptor and sets it up for use. * @@ -368,7 +369,6 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags) } path.mnt = mntget(sock_mnt); - d_set_d_op(path.dentry, &sockfs_dentry_operations); d_instantiate(path.dentry, SOCK_INODE(sock)); SOCK_INODE(sock)->i_fop = &socket_file_ops; -- cgit v1.2.3 From 72b43d0898e97f588293b4a24b33c58c46633d81 Mon Sep 17 00:00:00 2001 From: Alexey Kuznetsov Date: Wed, 12 Jan 2011 08:34:08 +0000 Subject: inet6: prevent network storms caused by linux IPv6 routers Linux IPv6 forwards unicast packets, which are link layer multicasts... The hole was present since day one. I was 100% this check is there, but it is not. The problem shows itself, f.e. when Microsoft Network Load Balancer runs on a network. This software resolves IPv6 unicast addresses to multicast MAC addresses. Signed-off-by: Alexey Kuznetsov Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 94b5bf132b2..5f8d242be3f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -401,6 +401,9 @@ int ip6_forward(struct sk_buff *skb) goto drop; } + if (skb->pkt_type != PACKET_HOST) + goto drop; + skb_forward_csum(skb); /* -- cgit v1.2.3 From 3806b4f3b6115ce324b7125844f9e6acc80d34ec Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 12 Jan 2011 14:50:51 +0000 Subject: eth: fix new kernel-doc warning Fix new kernel-doc warning (copy-paste typo): Warning(net/ethernet/eth.c:366): No description found for parameter 'rxqs' Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- net/ethernet/eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index f9d7ac924f1..44d2b42fda5 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -351,7 +351,7 @@ EXPORT_SYMBOL(ether_setup); * @sizeof_priv: Size of additional driver-private structure to be allocated * for this Ethernet device * @txqs: The number of TX queues this device has. - * @txqs: The number of RX queues this device has. + * @rxqs: The number of RX queues this device has. * * Fill in the fields of the device structure with Ethernet-generic * values. Basically does everything except registering the device. -- cgit v1.2.3 From f31e8d4982653b39fe312f9938be0f49dd9ab5fa Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 13 Jan 2011 14:19:55 +0100 Subject: netfilter: ctnetlink: fix loop in ctnetlink_get_conntrack() This patch fixes a loop in ctnetlink_get_conntrack() that can be triggered if you use the same socket to receive events and to perform a GET operation. Under heavy load, netlink_unicast() may return -EAGAIN, this error code is reserved in nfnetlink for the module load-on-demand. Instead, we return -ENOBUFS which is the appropriate error code that has to be propagated to user-space. Reported-by: Holger Eitzenberger Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 5cb8d3027b1..2b7eef37875 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -972,7 +972,8 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, free: kfree_skb(skb2); out: - return err; + /* this avoids a loop in nfnetlink. */ + return err == -EAGAIN ? -ENOBUFS : err; } #ifdef CONFIG_NF_NAT_NEEDED -- cgit v1.2.3 From 681c4d07dd5b2ce2ad9f6dbbf7841e479fbc7754 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 12 Jan 2011 13:40:33 +0100 Subject: mac80211: fix lockdep warning Since the introduction of the fixes for the reorder timer, mac80211 will cause lockdep warnings because lockdep confuses local->skb_queue and local->rx_skb_queue and treats their lock as the same. However, their locks are different, and are valid in different contexts (the former is used in IRQ context, the latter in BH only) and the only thing to be done is mark the former as a different lock class so that lockdep can tell the difference. Reported-by: Larry Finger Reported-by: Sujith Reported-by: Miles Lane Tested-by: Sujith Tested-by: Johannes Berg Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/main.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 485d36bc9a4..a46ff06d7cb 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -39,6 +39,8 @@ module_param(ieee80211_disable_40mhz_24ghz, bool, 0644); MODULE_PARM_DESC(ieee80211_disable_40mhz_24ghz, "Disable 40MHz support in the 2.4GHz band"); +static struct lock_class_key ieee80211_rx_skb_queue_class; + void ieee80211_configure_filter(struct ieee80211_local *local) { u64 mc; @@ -569,7 +571,15 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, spin_lock_init(&local->filter_lock); spin_lock_init(&local->queue_stop_reason_lock); - skb_queue_head_init(&local->rx_skb_queue); + /* + * The rx_skb_queue is only accessed from tasklets, + * but other SKB queues are used from within IRQ + * context. Therefore, this one needs a different + * locking class so our direct, non-irq-safe use of + * the queue's lock doesn't throw lockdep warnings. + */ + skb_queue_head_init_class(&local->rx_skb_queue, + &ieee80211_rx_skb_queue_class); INIT_DELAYED_WORK(&local->scan_work, ieee80211_scan_work); -- cgit v1.2.3 From 82694f764dad783a123394e2220b92b9be721b43 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Wed, 12 Jan 2011 15:18:11 +0200 Subject: mac80211: use maximum number of AMPDU frames as default in BA RX When the buffer size is set to zero in the block ack parameter set field, we should use the maximum supported number of subframes. The existing code was bogus and was doing some unnecessary calculations that lead to wrong values. Thanks Johannes for helping me figure this one out. Cc: stable@kernel.org Cc: Johannes Berg Signed-off-by: Luciano Coelho Reviewed-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/agg-rx.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index f138b195d65..227ca82eef7 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -185,8 +185,6 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, struct ieee80211_mgmt *mgmt, size_t len) { - struct ieee80211_hw *hw = &local->hw; - struct ieee80211_conf *conf = &hw->conf; struct tid_ampdu_rx *tid_agg_rx; u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num, status; u8 dialog_token; @@ -231,13 +229,8 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, goto end_no_lock; } /* determine default buffer size */ - if (buf_size == 0) { - struct ieee80211_supported_band *sband; - - sband = local->hw.wiphy->bands[conf->channel->band]; - buf_size = IEEE80211_MIN_AMPDU_BUF; - buf_size = buf_size << sband->ht_cap.ampdu_factor; - } + if (buf_size == 0) + buf_size = IEEE80211_MAX_AMPDU_BUF; /* examine state machine */ -- cgit v1.2.3 From ed7809d9c41b514115ddffaa860694393c2016b3 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Thu, 13 Jan 2011 21:53:38 +0100 Subject: batman-adv: Even Batman should not dereference NULL pointers There's a problem in net/batman-adv/unicast.c::frag_send_skb(). dev_alloc_skb() allocates memory and may fail, thus returning NULL. If this happens we'll pass a NULL pointer on to skb_split() which in turn hands it to skb_split_inside_header() from where it gets passed to skb_put() that lets skb_tail_pointer() play with it and that function dereferences it. And thus the bat dies. While I was at it I also moved the call to dev_alloc_skb() above the assignment to 'unicast_packet' since there's no reason to do that assignment if the memory allocation fails. Signed-off-by: Jesper Juhl Signed-off-by: Sven Eckelmann --- net/batman-adv/unicast.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index dc2e28bed84..ee41fef04b2 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -229,10 +229,12 @@ int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv, if (!bat_priv->primary_if) goto dropped; - unicast_packet = (struct unicast_packet *) skb->data; + frag_skb = dev_alloc_skb(data_len - (data_len / 2) + ucf_hdr_len); + if (!frag_skb) + goto dropped; + unicast_packet = (struct unicast_packet *) skb->data; memcpy(&tmp_uc, unicast_packet, uc_hdr_len); - frag_skb = dev_alloc_skb(data_len - (data_len / 2) + ucf_hdr_len); skb_split(skb, frag_skb, data_len / 2); if (my_skb_head_push(skb, ucf_hdr_len - uc_hdr_len) < 0 || -- cgit v1.2.3 From 1ac9ad1394fa542ac7ae0dc943ee3cda678799fa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 12 Jan 2011 12:13:14 +0000 Subject: net: remove dev_txq_stats_fold() After recent changes, (percpu stats on vlan/tunnels...), we dont need anymore per struct netdev_queue tx_bytes/tx_packets/tx_dropped counters. Only remaining users are ixgbe, sch_teql, gianfar & macvlan : 1) ixgbe can be converted to use existing tx_ring counters. 2) macvlan incremented txq->tx_dropped, it can use the dev->stats.tx_dropped counter. 3) sch_teql : almost revert ab35cd4b8f42 (Use net_device internal stats) Now we have ndo_get_stats64(), use it, even for "unsigned long" fields (No need to bring back a struct net_device_stats) 4) gianfar adds a stats structure per tx queue to hold tx_bytes/tx_packets This removes a lockdep warning (and possible lockup) in rndis gadget, calling dev_get_stats() from hard IRQ context. Ref: http://www.spinics.net/lists/netdev/msg149202.html Reported-by: Neil Jones Signed-off-by: Eric Dumazet CC: Jarek Poplawski CC: Alexander Duyck CC: Jeff Kirsher CC: Sandeep Gopalpet CC: Michal Nazarewicz Signed-off-by: David S. Miller --- net/core/dev.c | 29 ----------------------------- net/sched/sch_teql.c | 26 +++++++++++++++++++++----- 2 files changed, 21 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index a3ef808b5e3..83507c265e4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5523,34 +5523,6 @@ void netdev_run_todo(void) } } -/** - * dev_txq_stats_fold - fold tx_queues stats - * @dev: device to get statistics from - * @stats: struct rtnl_link_stats64 to hold results - */ -void dev_txq_stats_fold(const struct net_device *dev, - struct rtnl_link_stats64 *stats) -{ - u64 tx_bytes = 0, tx_packets = 0, tx_dropped = 0; - unsigned int i; - struct netdev_queue *txq; - - for (i = 0; i < dev->num_tx_queues; i++) { - txq = netdev_get_tx_queue(dev, i); - spin_lock_bh(&txq->_xmit_lock); - tx_bytes += txq->tx_bytes; - tx_packets += txq->tx_packets; - tx_dropped += txq->tx_dropped; - spin_unlock_bh(&txq->_xmit_lock); - } - if (tx_bytes || tx_packets || tx_dropped) { - stats->tx_bytes = tx_bytes; - stats->tx_packets = tx_packets; - stats->tx_dropped = tx_dropped; - } -} -EXPORT_SYMBOL(dev_txq_stats_fold); - /* Convert net_device_stats to rtnl_link_stats64. They have the same * fields in the same order, with only the type differing. */ @@ -5594,7 +5566,6 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev)); } else { netdev_stats_to_stats64(storage, &dev->stats); - dev_txq_stats_fold(dev, storage); } storage->rx_dropped += atomic_long_read(&dev->rx_dropped); return storage; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index af9360d1f6e..84ce48eadff 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -59,6 +59,10 @@ struct teql_master struct net_device *dev; struct Qdisc *slaves; struct list_head master_list; + unsigned long tx_bytes; + unsigned long tx_packets; + unsigned long tx_errors; + unsigned long tx_dropped; }; struct teql_sched_data @@ -274,7 +278,6 @@ static inline int teql_resolve(struct sk_buff *skb, static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev) { struct teql_master *master = netdev_priv(dev); - struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); struct Qdisc *start, *q; int busy; int nores; @@ -314,8 +317,8 @@ restart: __netif_tx_unlock(slave_txq); master->slaves = NEXT_SLAVE(q); netif_wake_queue(dev); - txq->tx_packets++; - txq->tx_bytes += length; + master->tx_packets++; + master->tx_bytes += length; return NETDEV_TX_OK; } __netif_tx_unlock(slave_txq); @@ -342,10 +345,10 @@ restart: netif_stop_queue(dev); return NETDEV_TX_BUSY; } - dev->stats.tx_errors++; + master->tx_errors++; drop: - txq->tx_dropped++; + master->tx_dropped++; dev_kfree_skb(skb); return NETDEV_TX_OK; } @@ -398,6 +401,18 @@ static int teql_master_close(struct net_device *dev) return 0; } +static struct rtnl_link_stats64 *teql_master_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + struct teql_master *m = netdev_priv(dev); + + stats->tx_packets = m->tx_packets; + stats->tx_bytes = m->tx_bytes; + stats->tx_errors = m->tx_errors; + stats->tx_dropped = m->tx_dropped; + return stats; +} + static int teql_master_mtu(struct net_device *dev, int new_mtu) { struct teql_master *m = netdev_priv(dev); @@ -422,6 +437,7 @@ static const struct net_device_ops teql_netdev_ops = { .ndo_open = teql_master_open, .ndo_stop = teql_master_close, .ndo_start_xmit = teql_master_xmit, + .ndo_get_stats64 = teql_master_stats64, .ndo_change_mtu = teql_master_mtu, }; -- cgit v1.2.3 From e1fcc7e2a719d139322fab3f47cfbd4340cf3d82 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Jan 2011 15:56:31 +0000 Subject: rxrpc: rxrpc_workqueue isn't used during memory reclaim rxrpc_workqueue isn't depended upon while reclaiming memory. Convert to alloc_workqueue() without WQ_MEM_RECLAIM. Signed-off-by: Tejun Heo Signed-off-by: David Howells Cc: linux-afs@lists.infradead.org Signed-off-by: Linus Torvalds --- net/rxrpc/af_rxrpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 0b9bb2085ce..74c064c0dfd 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -808,7 +808,7 @@ static int __init af_rxrpc_init(void) goto error_call_jar; } - rxrpc_workqueue = create_workqueue("krxrpcd"); + rxrpc_workqueue = alloc_workqueue("krxrpcd", 0, 1); if (!rxrpc_workqueue) { printk(KERN_NOTICE "RxRPC: Failed to allocate work queue\n"); goto error_work_queue; -- cgit v1.2.3 From aa0adb1a85e159cf57f0e11282bc6c9e3606a5f3 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 15 Jan 2011 14:39:43 +0000 Subject: batman-adv: Use "__attribute__" shortcut macros Linux 2.6.21 defines different macros for __attribute__ which are also used inside batman-adv. The next version of checkpatch.pl warns about the usage of __attribute__((packed))). Linux 2.6.33 defines an extra macro __always_unused which is used to assist source code analyzers and can be used to removed the last existing __attribute__ inside the source code. Signed-off-by: Sven Eckelmann --- net/batman-adv/main.h | 6 +++--- net/batman-adv/packet.h | 14 +++++++------- net/batman-adv/types.h | 4 ++-- 3 files changed, 12 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index d4d9926c220..65106fb61b8 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -151,9 +151,9 @@ int debug_log(struct bat_priv *bat_priv, char *fmt, ...); } \ while (0) #else /* !CONFIG_BATMAN_ADV_DEBUG */ -static inline void bat_dbg(char type __attribute__((unused)), - struct bat_priv *bat_priv __attribute__((unused)), - char *fmt __attribute__((unused)), ...) +static inline void bat_dbg(char type __always_unused, + struct bat_priv *bat_priv __always_unused, + char *fmt __always_unused, ...) { } #endif diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index b49fdf70a6d..2284e8129cb 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -63,7 +63,7 @@ struct batman_packet { uint8_t num_hna; uint8_t gw_flags; /* flags related to gateway class */ uint8_t align; -} __attribute__((packed)); +} __packed; #define BAT_PACKET_LEN sizeof(struct batman_packet) @@ -76,7 +76,7 @@ struct icmp_packet { uint8_t orig[6]; uint16_t seqno; uint8_t uid; -} __attribute__((packed)); +} __packed; #define BAT_RR_LEN 16 @@ -93,14 +93,14 @@ struct icmp_packet_rr { uint8_t uid; uint8_t rr_cur; uint8_t rr[BAT_RR_LEN][ETH_ALEN]; -} __attribute__((packed)); +} __packed; struct unicast_packet { uint8_t packet_type; uint8_t version; /* batman version field */ uint8_t dest[6]; uint8_t ttl; -} __attribute__((packed)); +} __packed; struct unicast_frag_packet { uint8_t packet_type; @@ -110,7 +110,7 @@ struct unicast_frag_packet { uint8_t flags; uint8_t orig[6]; uint16_t seqno; -} __attribute__((packed)); +} __packed; struct bcast_packet { uint8_t packet_type; @@ -118,7 +118,7 @@ struct bcast_packet { uint8_t orig[6]; uint8_t ttl; uint32_t seqno; -} __attribute__((packed)); +} __packed; struct vis_packet { uint8_t packet_type; @@ -131,6 +131,6 @@ struct vis_packet { * neighbors */ uint8_t target_orig[6]; /* who should receive this packet */ uint8_t sender_orig[6]; /* who sent or rebroadcasted this packet */ -} __attribute__((packed)); +} __packed; #endif /* _NET_BATMAN_ADV_PACKET_H_ */ diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 97cb23dd3e6..bf3f6f5a12c 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -246,13 +246,13 @@ struct vis_info { /* this packet might be part of the vis send queue. */ struct sk_buff *skb_packet; /* vis_info may follow here*/ -} __attribute__((packed)); +} __packed; struct vis_info_entry { uint8_t src[ETH_ALEN]; uint8_t dest[ETH_ALEN]; uint8_t quality; /* quality = 0 means HNA */ -} __attribute__((packed)); +} __packed; struct recvlist_node { struct list_head list; -- cgit v1.2.3 From 5e5073280379d38e86ade471daa7443b553fc839 Mon Sep 17 00:00:00 2001 From: Kurt Van Dijck Date: Sat, 15 Jan 2011 20:56:42 -0800 Subject: can: test size of struct sockaddr in sendmsg This patch makes the CAN socket code conform to the manpage of sendmsg. Signed-off-by: Kurt Van Dijck Acked-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/can/bcm.c | 3 +++ net/can/raw.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'net') diff --git a/net/can/bcm.c b/net/can/bcm.c index 9d5e8accfab..092dc88a7c6 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1256,6 +1256,9 @@ static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock, struct sockaddr_can *addr = (struct sockaddr_can *)msg->msg_name; + if (msg->msg_namelen < sizeof(*addr)) + return -EINVAL; + if (addr->can_family != AF_CAN) return -EINVAL; diff --git a/net/can/raw.c b/net/can/raw.c index e88f610fdb7..883e9d74fdd 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -649,6 +649,9 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, struct sockaddr_can *addr = (struct sockaddr_can *)msg->msg_name; + if (msg->msg_namelen < sizeof(*addr)) + return -EINVAL; + if (addr->can_family != AF_CAN) return -EINVAL; -- cgit v1.2.3 From 01a859014b35deb6cc63b1dc2808ca7a0e10a4de Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 15 Jan 2011 03:06:39 +0000 Subject: caif: checking the wrong variable In the original code we check if (servl == NULL) twice. The first time should print the message that cfmuxl_remove_uplayer() failed and set "ret" correctly, but instead it just returns success. The second check should be checking the value of "ret" instead of "servl". Signed-off-by: Dan Carpenter Acked-by: Sjur Braendeland Signed-off-by: David S. Miller --- net/caif/cfcnfg.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c index 21ede141018..c665de778b6 100644 --- a/net/caif/cfcnfg.c +++ b/net/caif/cfcnfg.c @@ -191,6 +191,7 @@ int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer) struct cflayer *servl = NULL; struct cfcnfg_phyinfo *phyinfo = NULL; u8 phyid = 0; + caif_assert(adap_layer != NULL); channel_id = adap_layer->id; if (adap_layer->dn == NULL || channel_id == 0) { @@ -199,16 +200,16 @@ int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer) goto end; } servl = cfmuxl_remove_uplayer(cnfg->mux, channel_id); - if (servl == NULL) - goto end; - layer_set_up(servl, NULL); - ret = cfctrl_linkdown_req(cnfg->ctrl, channel_id, adap_layer); if (servl == NULL) { pr_err("PROTOCOL ERROR - Error removing service_layer Channel_Id(%d)", channel_id); ret = -EINVAL; goto end; } + layer_set_up(servl, NULL); + ret = cfctrl_linkdown_req(cnfg->ctrl, channel_id, adap_layer); + if (ret) + goto end; caif_assert(channel_id == servl->id); if (adap_layer->dn != NULL) { phyid = cfsrvl_getphyid(adap_layer->dn); -- cgit v1.2.3 From 2fdc1c8093255f9da877d7b9ce3f46c2098377dc Mon Sep 17 00:00:00 2001 From: Romain Francoise Date: Mon, 17 Jan 2011 07:59:18 +0000 Subject: ipv6: Silence privacy extensions initialization When a network namespace is created (via CLONE_NEWNET), the loopback interface is automatically added to the new namespace, triggering a printk in ipv6_add_dev() if CONFIG_IPV6_PRIVACY is set. This is problematic for applications which use CLONE_NEWNET as part of a sandbox, like Chromium's suid sandbox or recent versions of vsftpd. On a busy machine, it can lead to thousands of useless "lo: Disabled Privacy Extensions" messages appearing in dmesg. It's easy enough to check the status of privacy extensions via the use_tempaddr sysctl, so just removing the printk seems like the most sensible solution. Signed-off-by: Romain Francoise Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 5b189c97c2f..24a1cf110d8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -420,9 +420,6 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) dev->type == ARPHRD_TUNNEL6 || dev->type == ARPHRD_SIT || dev->type == ARPHRD_NONE) { - printk(KERN_INFO - "%s: Disabled Privacy Extensions\n", - dev->name); ndev->cnf.use_tempaddr = -1; } else { in6_dev_hold(ndev); -- cgit v1.2.3 From 6ee400aafb60289b78fcde5ebccd8c4973fc53f4 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 17 Jan 2011 20:46:00 +0000 Subject: net offloading: Do not mask out NETIF_F_HW_VLAN_TX for vlan. In netif_skb_features() we return only the features that are valid for vlans if we have a vlan packet. However, we should not mask out NETIF_F_HW_VLAN_TX since it enables transmission of vlan tags and is obviously valid. Reported-by: Eric Dumazet Signed-off-by: Jesse Gross Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 83507c265e4..4c58d11d3b6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2023,13 +2023,13 @@ int netif_skb_features(struct sk_buff *skb) return harmonize_features(skb, protocol, features); } - features &= skb->dev->vlan_features; + features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_TX); if (protocol != htons(ETH_P_8021Q)) { return harmonize_features(skb, protocol, features); } else { features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | - NETIF_F_GEN_CSUM; + NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_TX; return harmonize_features(skb, protocol, features); } } -- cgit v1.2.3