diff options
Diffstat (limited to 'net')
53 files changed, 774 insertions, 252 deletions
diff --git a/net/9p/Makefile b/net/9p/Makefile index 198a640d53a..a0874cc1f71 100644 --- a/net/9p/Makefile +++ b/net/9p/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o util.o \ protocol.o \ trans_fd.o \ + trans_common.o \ 9pnet_virtio-objs := \ trans_virtio.o \ diff --git a/net/9p/client.c b/net/9p/client.c index a848bca9fbf..347ec0cd271 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -229,10 +229,23 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) return ERR_PTR(-ENOMEM); } init_waitqueue_head(req->wq); - req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize, - GFP_KERNEL); - req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize, - GFP_KERNEL); + if ((c->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == + P9_TRANS_PREF_PAYLOAD_SEP) { + int alloc_msize = min(c->msize, 4096); + req->tc = kmalloc(sizeof(struct p9_fcall)+alloc_msize, + GFP_KERNEL); + req->tc->capacity = alloc_msize; + req->rc = kmalloc(sizeof(struct p9_fcall)+alloc_msize, + GFP_KERNEL); + req->rc->capacity = alloc_msize; + } else { + req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize, + GFP_KERNEL); + req->tc->capacity = c->msize; + req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize, + GFP_KERNEL); + req->rc->capacity = c->msize; + } if ((!req->tc) || (!req->rc)) { printk(KERN_ERR "Couldn't grow tag array\n"); kfree(req->tc); @@ -243,9 +256,7 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) return ERR_PTR(-ENOMEM); } req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall); - req->tc->capacity = c->msize; req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall); - req->rc->capacity = c->msize; } p9pdu_reset(req->tc); @@ -443,6 +454,7 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) { int8_t type; int err; + int ecode; err = p9_parse_header(req->rc, NULL, &type, NULL, 0); if (err) { @@ -450,36 +462,53 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) return err; } - if (type == P9_RERROR || type == P9_RLERROR) { - int ecode; - - if (!p9_is_proto_dotl(c)) { - char *ename; + if (type != P9_RERROR && type != P9_RLERROR) + return 0; - err = p9pdu_readf(req->rc, c->proto_version, "s?d", - &ename, &ecode); - if (err) - goto out_err; + if (!p9_is_proto_dotl(c)) { + char *ename; + + if (req->tc->pbuf_size) { + /* Handle user buffers */ + size_t len = req->rc->size - req->rc->offset; + if (req->tc->pubuf) { + /* User Buffer */ + err = copy_from_user( + &req->rc->sdata[req->rc->offset], + req->tc->pubuf, len); + if (err) { + err = -EFAULT; + goto out_err; + } + } else { + /* Kernel Buffer */ + memmove(&req->rc->sdata[req->rc->offset], + req->tc->pkbuf, len); + } + } + err = p9pdu_readf(req->rc, c->proto_version, "s?d", + &ename, &ecode); + if (err) + goto out_err; - if (p9_is_proto_dotu(c)) - err = -ecode; + if (p9_is_proto_dotu(c)) + err = -ecode; - if (!err || !IS_ERR_VALUE(err)) { - err = p9_errstr2errno(ename, strlen(ename)); + if (!err || !IS_ERR_VALUE(err)) { + err = p9_errstr2errno(ename, strlen(ename)); - P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, ename); + P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, + ename); - kfree(ename); - } - } else { - err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); - err = -ecode; - - P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); + kfree(ename); } + } else { + err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); + err = -ecode; + + P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); + } - } else - err = 0; return err; @@ -1191,6 +1220,27 @@ error: } EXPORT_SYMBOL(p9_client_fsync); +int p9_client_sync_fs(struct p9_fid *fid) +{ + int err = 0; + struct p9_req_t *req; + struct p9_client *clnt; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TSYNC_FS fid %d\n", fid->fid); + + clnt = fid->clnt; + req = p9_client_rpc(clnt, P9_TSYNCFS, "d", fid->fid); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + P9_DPRINTK(P9_DEBUG_9P, "<<< RSYNCFS fid %d\n", fid->fid); + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL(p9_client_sync_fs); + int p9_client_clunk(struct p9_fid *fid) { int err; @@ -1270,7 +1320,15 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, if (count < rsize) rsize = count; - req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, rsize); + /* Don't bother zerocopy form small IO (< 1024) */ + if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == + P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) { + req = p9_client_rpc(clnt, P9_TREAD, "dqE", fid->fid, offset, + rsize, data, udata); + } else { + req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, + rsize); + } if (IS_ERR(req)) { err = PTR_ERR(req); goto error; @@ -1284,13 +1342,15 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count); - if (data) { - memmove(data, dataptr, count); - } else { - err = copy_to_user(udata, dataptr, count); - if (err) { - err = -EFAULT; - goto free_and_error; + if (!req->tc->pbuf_size) { + if (data) { + memmove(data, dataptr, count); + } else { + err = copy_to_user(udata, dataptr, count); + if (err) { + err = -EFAULT; + goto free_and_error; + } } } p9_free_req(clnt, req); @@ -1323,12 +1383,21 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, if (count < rsize) rsize = count; - if (data) - req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, offset, - rsize, data); - else - req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, offset, - rsize, udata); + + /* Don't bother zerocopy form small IO (< 1024) */ + if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == + P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) { + req = p9_client_rpc(clnt, P9_TWRITE, "dqE", fid->fid, offset, + rsize, data, udata); + } else { + + if (data) + req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, + offset, rsize, data); + else + req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, + offset, rsize, udata); + } if (IS_ERR(req)) { err = PTR_ERR(req); goto error; @@ -1716,7 +1785,14 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) if (count < rsize) rsize = count; - req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, offset, rsize); + if ((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == + P9_TRANS_PREF_PAYLOAD_SEP) { + req = p9_client_rpc(clnt, P9_TREADDIR, "dqF", fid->fid, + offset, rsize, data); + } else { + req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, + offset, rsize); + } if (IS_ERR(req)) { err = PTR_ERR(req); goto error; @@ -1730,7 +1806,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count); - if (data) + if (!req->tc->pbuf_size && data) memmove(data, dataptr, count); p9_free_req(clnt, req); diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 1e308f21092..2ce515b859b 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -114,6 +114,26 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size) return size - len; } +static size_t +pdu_write_urw(struct p9_fcall *pdu, const char *kdata, const char __user *udata, + size_t size) +{ + BUG_ON(pdu->size > P9_IOHDRSZ); + pdu->pubuf = (char __user *)udata; + pdu->pkbuf = (char *)kdata; + pdu->pbuf_size = size; + return 0; +} + +static size_t +pdu_write_readdir(struct p9_fcall *pdu, const char *kdata, size_t size) +{ + BUG_ON(pdu->size > P9_READDIRHDRSZ); + pdu->pkbuf = (char *)kdata; + pdu->pbuf_size = size; + return 0; +} + /* b - int8_t w - int16_t @@ -445,6 +465,25 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, errcode = -EFAULT; } break; + case 'E':{ + int32_t cnt = va_arg(ap, int32_t); + const char *k = va_arg(ap, const void *); + const char *u = va_arg(ap, const void *); + errcode = p9pdu_writef(pdu, proto_version, "d", + cnt); + if (!errcode && pdu_write_urw(pdu, k, u, cnt)) + errcode = -EFAULT; + } + break; + case 'F':{ + int32_t cnt = va_arg(ap, int32_t); + const char *k = va_arg(ap, const void *); + errcode = p9pdu_writef(pdu, proto_version, "d", + cnt); + if (!errcode && pdu_write_readdir(pdu, k, cnt)) + errcode = -EFAULT; + } + break; case 'U':{ int32_t count = va_arg(ap, int32_t); const char __user *udata = @@ -579,6 +618,7 @@ EXPORT_SYMBOL(p9stat_read); int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type) { + pdu->id = type; return p9pdu_writef(pdu, 0, "dbw", 0, type, tag); } @@ -606,6 +646,10 @@ void p9pdu_reset(struct p9_fcall *pdu) { pdu->offset = 0; pdu->size = 0; + pdu->private = NULL; + pdu->pubuf = NULL; + pdu->pkbuf = NULL; + pdu->pbuf_size = 0; } int p9dirent_read(char *buf, int len, struct p9_dirent *dirent, diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c new file mode 100644 index 00000000000..d62b9aa58df --- /dev/null +++ b/net/9p/trans_common.c @@ -0,0 +1,97 @@ +/* + * Copyright IBM Corporation, 2010 + * Author Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + */ + +#include <linux/slab.h> +#include <linux/module.h> +#include <net/9p/9p.h> +#include <net/9p/client.h> +#include <linux/scatterlist.h> +#include "trans_common.h" + +/** + * p9_release_req_pages - Release pages after the transaction. + * @*private: PDU's private page of struct trans_rpage_info + */ +void +p9_release_req_pages(struct trans_rpage_info *rpinfo) +{ + int i = 0; + + while (rpinfo->rp_data[i] && rpinfo->rp_nr_pages--) { + put_page(rpinfo->rp_data[i]); + i++; + } +} +EXPORT_SYMBOL(p9_release_req_pages); + +/** + * p9_nr_pages - Return number of pages needed to accomodate the payload. + */ +int +p9_nr_pages(struct p9_req_t *req) +{ + int start_page, end_page; + start_page = (unsigned long long)req->tc->pubuf >> PAGE_SHIFT; + end_page = ((unsigned long long)req->tc->pubuf + req->tc->pbuf_size + + PAGE_SIZE - 1) >> PAGE_SHIFT; + return end_page - start_page; +} +EXPORT_SYMBOL(p9_nr_pages); + +/** + * payload_gup - Translates user buffer into kernel pages and + * pins them either for read/write through get_user_pages_fast(). + * @req: Request to be sent to server. + * @pdata_off: data offset into the first page after translation (gup). + * @pdata_len: Total length of the IO. gup may not return requested # of pages. + * @nr_pages: number of pages to accomodate the payload + * @rw: Indicates if the pages are for read or write. + */ +int +p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len, + int nr_pages, u8 rw) +{ + uint32_t first_page_bytes = 0; + uint32_t pdata_mapped_pages; + struct trans_rpage_info *rpinfo; + + *pdata_off = (size_t)req->tc->pubuf & (PAGE_SIZE-1); + + if (*pdata_off) + first_page_bytes = min((PAGE_SIZE - *pdata_off), + req->tc->pbuf_size); + + rpinfo = req->tc->private; + pdata_mapped_pages = get_user_pages_fast((unsigned long)req->tc->pubuf, + nr_pages, rw, &rpinfo->rp_data[0]); + + if (pdata_mapped_pages < 0) { + printk(KERN_ERR "get_user_pages_fast failed:%d udata:%p" + "nr_pages:%d\n", pdata_mapped_pages, + req->tc->pubuf, nr_pages); + pdata_mapped_pages = 0; + return -EIO; + } + rpinfo->rp_nr_pages = pdata_mapped_pages; + if (*pdata_off) { + *pdata_len = first_page_bytes; + *pdata_len += min((req->tc->pbuf_size - *pdata_len), + ((size_t)pdata_mapped_pages - 1) << PAGE_SHIFT); + } else { + *pdata_len = min(req->tc->pbuf_size, + (size_t)pdata_mapped_pages << PAGE_SHIFT); + } + return 0; +} +EXPORT_SYMBOL(p9_payload_gup); diff --git a/net/9p/trans_common.h b/net/9p/trans_common.h new file mode 100644 index 00000000000..76309223bb0 --- /dev/null +++ b/net/9p/trans_common.h @@ -0,0 +1,32 @@ +/* + * Copyright IBM Corporation, 2010 + * Author Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + */ + +/* TRUE if it is user context */ +#define P9_IS_USER_CONTEXT (!segment_eq(get_fs(), KERNEL_DS)) + +/** + * struct trans_rpage_info - To store mapped page information in PDU. + * @rp_alloc:Set if this structure is allocd, not a reuse unused space in pdu. + * @rp_nr_pages: Number of mapped pages + * @rp_data: Array of page pointers + */ +struct trans_rpage_info { + u8 rp_alloc; + int rp_nr_pages; + struct page *rp_data[0]; +}; + +void p9_release_req_pages(struct trans_rpage_info *); +int p9_payload_gup(struct p9_req_t *, size_t *, int *, int, u8); +int p9_nr_pages(struct p9_req_t *); diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 078eb162d9b..a30471e5174 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -153,10 +153,11 @@ struct p9_conn { unsigned long wsched; }; +static void p9_poll_workfn(struct work_struct *work); + static DEFINE_SPINLOCK(p9_poll_lock); static LIST_HEAD(p9_poll_pending_list); -static struct workqueue_struct *p9_mux_wq; -static struct task_struct *p9_poll_task; +static DECLARE_WORK(p9_poll_work, p9_poll_workfn); static void p9_mux_poll_stop(struct p9_conn *m) { @@ -384,7 +385,7 @@ static void p9_read_work(struct work_struct *work) if (n & POLLIN) { P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m); - queue_work(p9_mux_wq, &m->rq); + schedule_work(&m->rq); } else clear_bit(Rworksched, &m->wsched); } else @@ -497,7 +498,7 @@ static void p9_write_work(struct work_struct *work) if (n & POLLOUT) { P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m); - queue_work(p9_mux_wq, &m->wq); + schedule_work(&m->wq); } else clear_bit(Wworksched, &m->wsched); } else @@ -516,15 +517,14 @@ static int p9_pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) container_of(wait, struct p9_poll_wait, wait); struct p9_conn *m = pwait->conn; unsigned long flags; - DECLARE_WAITQUEUE(dummy_wait, p9_poll_task); spin_lock_irqsave(&p9_poll_lock, flags); if (list_empty(&m->poll_pending_link)) list_add_tail(&m->poll_pending_link, &p9_poll_pending_list); spin_unlock_irqrestore(&p9_poll_lock, flags); - /* perform the default wake up operation */ - return default_wake_function(&dummy_wait, mode, sync, key); + schedule_work(&p9_poll_work); + return 1; } /** @@ -629,7 +629,7 @@ static void p9_poll_mux(struct p9_conn *m) P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can read\n", m); if (!test_and_set_bit(Rworksched, &m->wsched)) { P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m); - queue_work(p9_mux_wq, &m->rq); + schedule_work(&m->rq); } } @@ -639,7 +639,7 @@ static void p9_poll_mux(struct p9_conn *m) if ((m->wsize || !list_empty(&m->unsent_req_list)) && !test_and_set_bit(Wworksched, &m->wsched)) { P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m); - queue_work(p9_mux_wq, &m->wq); + schedule_work(&m->wq); } } } @@ -677,7 +677,7 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) n = p9_fd_poll(m->client, NULL); if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched)) - queue_work(p9_mux_wq, &m->wq); + schedule_work(&m->wq); return 0; } @@ -1047,12 +1047,12 @@ static struct p9_trans_module p9_fd_trans = { * */ -static int p9_poll_proc(void *a) +static void p9_poll_workfn(struct work_struct *work) { unsigned long flags; P9_DPRINTK(P9_DEBUG_TRANS, "start %p\n", current); - repeat: + spin_lock_irqsave(&p9_poll_lock, flags); while (!list_empty(&p9_poll_pending_list)) { struct p9_conn *conn = list_first_entry(&p9_poll_pending_list, @@ -1067,35 +1067,11 @@ static int p9_poll_proc(void *a) } spin_unlock_irqrestore(&p9_poll_lock, flags); - set_current_state(TASK_INTERRUPTIBLE); - if (list_empty(&p9_poll_pending_list)) { - P9_DPRINTK(P9_DEBUG_TRANS, "sleeping...\n"); - schedule(); - } - __set_current_state(TASK_RUNNING); - - if (!kthread_should_stop()) - goto repeat; - P9_DPRINTK(P9_DEBUG_TRANS, "finish\n"); - return 0; } int p9_trans_fd_init(void) { - p9_mux_wq = create_workqueue("v9fs"); - if (!p9_mux_wq) { - printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n"); - return -ENOMEM; - } - - p9_poll_task = kthread_run(p9_poll_proc, NULL, "v9fs-poll"); - if (IS_ERR(p9_poll_task)) { - destroy_workqueue(p9_mux_wq); - printk(KERN_WARNING "v9fs: mux: creating poll task failed\n"); - return PTR_ERR(p9_poll_task); - } - v9fs_register_trans(&p9_tcp_trans); v9fs_register_trans(&p9_unix_trans); v9fs_register_trans(&p9_fd_trans); @@ -1105,10 +1081,8 @@ int p9_trans_fd_init(void) void p9_trans_fd_exit(void) { - kthread_stop(p9_poll_task); + flush_work_sync(&p9_poll_work); v9fs_unregister_trans(&p9_tcp_trans); v9fs_unregister_trans(&p9_unix_trans); v9fs_unregister_trans(&p9_fd_trans); - - destroy_workqueue(p9_mux_wq); } diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index c8f3f72ab20..9b550ed9c71 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -45,6 +45,7 @@ #include <linux/scatterlist.h> #include <linux/virtio.h> #include <linux/virtio_9p.h> +#include "trans_common.h" #define VIRTQUEUE_NUM 128 @@ -155,6 +156,14 @@ static void req_done(struct virtqueue *vq) rc->tag); req = p9_tag_lookup(chan->client, rc->tag); req->status = REQ_STATUS_RCVD; + if (req->tc->private) { + struct trans_rpage_info *rp = req->tc->private; + /*Release pages */ + p9_release_req_pages(rp); + if (rp->rp_alloc) + kfree(rp); + req->tc->private = NULL; + } p9_client_cb(chan->client, req); } else { spin_unlock_irqrestore(&chan->lock, flags); @@ -203,6 +212,38 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req) } /** + * pack_sg_list_p - Just like pack_sg_list. Instead of taking a buffer, + * this takes a list of pages. + * @sg: scatter/gather list to pack into + * @start: which segment of the sg_list to start at + * @pdata_off: Offset into the first page + * @**pdata: a list of pages to add into sg. + * @count: amount of data to pack into the scatter/gather list + */ +static int +pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off, + struct page **pdata, int count) +{ + int s; + int i = 0; + int index = start; + + if (pdata_off) { + s = min((int)(PAGE_SIZE - pdata_off), count); + sg_set_page(&sg[index++], pdata[i++], s, pdata_off); + count -= s; + } + + while (count) { + BUG_ON(index > limit); + s = min((int)PAGE_SIZE, count); + sg_set_page(&sg[index++], pdata[i++], s, 0); + count -= s; + } + return index-start; +} + +/** * p9_virtio_request - issue a request * @client: client instance issuing the request * @req: request to be issued @@ -212,22 +253,97 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req) static int p9_virtio_request(struct p9_client *client, struct p9_req_t *req) { - int in, out; + int in, out, inp, outp; struct virtio_chan *chan = client->trans; char *rdata = (char *)req->rc+sizeof(struct p9_fcall); unsigned long flags; - int err; + size_t pdata_off = 0; + struct trans_rpage_info *rpinfo = NULL; + int err, pdata_len = 0; P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); req_retry: req->status = REQ_STATUS_SENT; + if (req->tc->pbuf_size && (req->tc->pubuf && P9_IS_USER_CONTEXT)) { + int nr_pages = p9_nr_pages(req); + int rpinfo_size = sizeof(struct trans_rpage_info) + + sizeof(struct page *) * nr_pages; + + if (rpinfo_size <= (req->tc->capacity - req->tc->size)) { + /* We can use sdata */ + req->tc->private = req->tc->sdata + req->tc->size; + rpinfo = (struct trans_rpage_info *)req->tc->private; + rpinfo->rp_alloc = 0; + } else { + req->tc->private = kmalloc(rpinfo_size, GFP_NOFS); + if (!req->tc->private) { + P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: " + "private kmalloc returned NULL"); + return -ENOMEM; + } + rpinfo = (struct trans_rpage_info *)req->tc->private; + rpinfo->rp_alloc = 1; + } + + err = p9_payload_gup(req, &pdata_off, &pdata_len, nr_pages, + req->tc->id == P9_TREAD ? 1 : 0); + if (err < 0) { + if (rpinfo->rp_alloc) + kfree(rpinfo); + return err; + } + } + spin_lock_irqsave(&chan->lock, flags); + + /* Handle out VirtIO ring buffers */ out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata, - req->tc->size); - in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata, - client->msize); + req->tc->size); + + if (req->tc->pbuf_size && (req->tc->id == P9_TWRITE)) { + /* We have additional write payload buffer to take care */ + if (req->tc->pubuf && P9_IS_USER_CONTEXT) { + outp = pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM, + pdata_off, rpinfo->rp_data, pdata_len); + } else { + char *pbuf = req->tc->pubuf ? req->tc->pubuf : + req->tc->pkbuf; + outp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, pbuf, + req->tc->pbuf_size); + } + out += outp; + } + + /* Handle in VirtIO ring buffers */ + if (req->tc->pbuf_size && + ((req->tc->id == P9_TREAD) || (req->tc->id == P9_TREADDIR))) { + /* + * Take care of additional Read payload. + * 11 is the read/write header = PDU Header(7) + IO Size (4). + * Arrange in such a way that server places header in the + * alloced memory and payload onto the user buffer. + */ + inp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata, 11); + /* + * Running executables in the filesystem may result in + * a read request with kernel buffer as opposed to user buffer. + */ + if (req->tc->pubuf && P9_IS_USER_CONTEXT) { + in = pack_sg_list_p(chan->sg, out+inp, VIRTQUEUE_NUM, + pdata_off, rpinfo->rp_data, pdata_len); + } else { + char *pbuf = req->tc->pubuf ? req->tc->pubuf : + req->tc->pkbuf; + in = pack_sg_list(chan->sg, out+inp, VIRTQUEUE_NUM, + pbuf, req->tc->pbuf_size); + } + in += inp; + } else { + in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata, + client->msize); + } err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); if (err < 0) { @@ -246,6 +362,8 @@ req_retry: P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: " "virtio rpc add_buf returned failure"); + if (rpinfo && rpinfo->rp_alloc) + kfree(rpinfo); return -EIO; } } @@ -448,6 +566,7 @@ static struct p9_trans_module p9_virtio_trans = { .request = p9_virtio_request, .cancel = p9_virtio_cancel, .maxsize = PAGE_SIZE*16, + .pref = P9_TRANS_PREF_PAYLOAD_SEP, .def = 0, .owner = THIS_MODULE, }; diff --git a/net/Makefile b/net/Makefile index a3330ebe2c5..a51d9465e62 100644 --- a/net/Makefile +++ b/net/Makefile @@ -19,9 +19,7 @@ obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_INET) += ipv4/ obj-$(CONFIG_XFRM) += xfrm/ obj-$(CONFIG_UNIX) += unix/ -ifneq ($(CONFIG_IPV6),) -obj-y += ipv6/ -endif +obj-$(CONFIG_NET) += ipv6/ obj-$(CONFIG_PACKET) += packet/ obj-$(CONFIG_NET_KEY) += key/ obj-$(CONFIG_BRIDGE) += bridge/ diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index b1805ff9541..c258796313e 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -727,7 +727,9 @@ static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp) break; } + tty_unlock(); schedule(); + tty_lock(); } set_current_state(TASK_RUNNING); remove_wait_queue(&dev->wait, &wait); diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig index 9190ae462cb..6dee7bf648a 100644 --- a/net/bridge/Kconfig +++ b/net/bridge/Kconfig @@ -6,6 +6,7 @@ config BRIDGE tristate "802.1d Ethernet Bridging" select LLC select STP + depends on IPV6 || IPV6=n ---help--- If you say Y here, then your Linux box will be able to act as an Ethernet bridge, which means that the different Ethernet segments it diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 09d5c098792..030a002ff8e 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -37,10 +37,9 @@ rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock)) #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -static inline int ipv6_is_local_multicast(const struct in6_addr *addr) +static inline int ipv6_is_transient_multicast(const struct in6_addr *addr) { - if (ipv6_addr_is_multicast(addr) && - IPV6_ADDR_MC_SCOPE(addr) <= IPV6_ADDR_SCOPE_LINKLOCAL) + if (ipv6_addr_is_multicast(addr) && IPV6_ADDR_MC_FLAG_TRANSIENT(addr)) return 1; return 0; } @@ -435,7 +434,6 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, eth = eth_hdr(skb); memcpy(eth->h_source, br->dev->dev_addr, 6); - ipv6_eth_mc_map(group, eth->h_dest); eth->h_proto = htons(ETH_P_IPV6); skb_put(skb, sizeof(*eth)); @@ -447,8 +445,10 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, ip6h->payload_len = htons(8 + sizeof(*mldq)); ip6h->nexthdr = IPPROTO_HOPOPTS; ip6h->hop_limit = 1; - ipv6_addr_set(&ip6h->saddr, 0, 0, 0, 0); + ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0, + &ip6h->saddr); ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1)); + ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest); hopopt = (u8 *)(ip6h + 1); hopopt[0] = IPPROTO_ICMPV6; /* next hdr */ @@ -780,11 +780,11 @@ static int br_ip6_multicast_add_group(struct net_bridge *br, { struct br_ip br_group; - if (ipv6_is_local_multicast(group)) + if (!ipv6_is_transient_multicast(group)) return 0; ipv6_addr_copy(&br_group.u.ip6, group); - br_group.proto = htons(ETH_P_IP); + br_group.proto = htons(ETH_P_IPV6); return br_multicast_add_group(br, port, &br_group); } @@ -1013,18 +1013,19 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, nsrcs = skb_header_pointer(skb, len + offsetof(struct mld2_grec, - grec_mca), + grec_nsrcs), sizeof(_nsrcs), &_nsrcs); if (!nsrcs) return -EINVAL; if (!pskb_may_pull(skb, len + sizeof(*grec) + - sizeof(struct in6_addr) * (*nsrcs))) + sizeof(struct in6_addr) * ntohs(*nsrcs))) return -EINVAL; grec = (struct mld2_grec *)(skb->data + len); - len += sizeof(*grec) + sizeof(struct in6_addr) * (*nsrcs); + len += sizeof(*grec) + + sizeof(struct in6_addr) * ntohs(*nsrcs); /* We treat these as MLDv1 reports for now. */ switch (grec->grec_type) { @@ -1340,7 +1341,7 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br, { struct br_ip br_group; - if (ipv6_is_local_multicast(group)) + if (!ipv6_is_transient_multicast(group)) return; ipv6_addr_copy(&br_group.u.ip6, group); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 35b36b86d76..05f357828a2 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -336,7 +336,6 @@ static void reset_connection(struct ceph_connection *con) ceph_msg_put(con->out_msg); con->out_msg = NULL; } - con->out_keepalive_pending = false; con->in_seq = 0; con->in_seq_acked = 0; } @@ -1248,8 +1247,6 @@ static int process_connect(struct ceph_connection *con) con->auth_retry); if (con->auth_retry == 2) { con->error_msg = "connect authorization failure"; - reset_connection(con); - set_bit(CLOSED, &con->state); return -1; } con->auth_retry = 1; @@ -1715,14 +1712,6 @@ more: /* open the socket first? */ if (con->sock == NULL) { - /* - * if we were STANDBY and are reconnecting _this_ - * connection, bump connect_seq now. Always bump - * global_seq. - */ - if (test_and_clear_bit(STANDBY, &con->state)) - con->connect_seq++; - prepare_write_banner(msgr, con); prepare_write_connect(msgr, con, 1); prepare_read_banner(con); @@ -1951,7 +1940,24 @@ static void con_work(struct work_struct *work) work.work); mutex_lock(&con->mutex); + if (test_and_clear_bit(BACKOFF, &con->state)) { + dout("con_work %p backing off\n", con); + if (queue_delayed_work(ceph_msgr_wq, &con->work, + round_jiffies_relative(con->delay))) { + dout("con_work %p backoff %lu\n", con, con->delay); + mutex_unlock(&con->mutex); + return; + } else { + con->ops->put(con); + dout("con_work %p FAILED to back off %lu\n", con, + con->delay); + } + } + if (test_bit(STANDBY, &con->state)) { + dout("con_work %p STANDBY\n", con); + goto done; + } if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ dout("con_work CLOSED\n"); con_close_socket(con); @@ -2008,10 +2014,12 @@ static void ceph_fault(struct ceph_connection *con) /* Requeue anything that hasn't been acked */ list_splice_init(&con->out_sent, &con->out_queue); - /* If there are no messages in the queue, place the connection - * in a STANDBY state (i.e., don't try to reconnect just yet). */ - if (list_empty(&con->out_queue) && !con->out_keepalive_pending) { - dout("fault setting STANDBY\n"); + /* If there are no messages queued or keepalive pending, place + * the connection in a STANDBY state */ + if (list_empty(&con->out_queue) && + !test_bit(KEEPALIVE_PENDING, &con->state)) { + dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con); + clear_bit(WRITE_PENDING, &con->state); set_bit(STANDBY, &con->state); } else { /* retry after a delay. */ @@ -2019,11 +2027,24 @@ static void ceph_fault(struct ceph_connection *con) con->delay = BASE_DELAY_INTERVAL; else if (con->delay < MAX_DELAY_INTERVAL) con->delay *= 2; - dout("fault queueing %p delay %lu\n", con, con->delay); con->ops->get(con); if (queue_delayed_work(ceph_msgr_wq, &con->work, - round_jiffies_relative(con->delay)) == 0) + round_jiffies_relative(con->delay))) { + dout("fault queued %p delay %lu\n", con, con->delay); + } else { con->ops->put(con); + dout("fault failed to queue %p delay %lu, backoff\n", + con, con->delay); + /* + * In many cases we see a socket state change + * while con_work is running and end up + * queuing (non-delayed) work, such that we + * can't backoff with a delay. Set a flag so + * that when con_work restarts we schedule the + * delay then. + */ + set_bit(BACKOFF, &con->state); + } } out_unlock: @@ -2094,6 +2115,19 @@ void ceph_messenger_destroy(struct ceph_messenger *msgr) } EXPORT_SYMBOL(ceph_messenger_destroy); +static void clear_standby(struct ceph_connection *con) +{ + /* come back from STANDBY? */ + if (test_and_clear_bit(STANDBY, &con->state)) { + mutex_lock(&con->mutex); + dout("clear_standby %p and ++connect_seq\n", con); + con->connect_seq++; + WARN_ON(test_bit(WRITE_PENDING, &con->state)); + WARN_ON(test_bit(KEEPALIVE_PENDING, &con->state)); + mutex_unlock(&con->mutex); + } +} + /* * Queue up an outgoing message on the given connection. */ @@ -2126,6 +2160,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) /* if there wasn't anything waiting to send before, queue * new work */ + clear_standby(con); if (test_and_set_bit(WRITE_PENDING, &con->state) == 0) queue_con(con); } @@ -2191,6 +2226,8 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg) */ void ceph_con_keepalive(struct ceph_connection *con) { + dout("con_keepalive %p\n", con); + clear_standby(con); if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 && test_and_set_bit(WRITE_PENDING, &con->state) == 0) queue_con(con); diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index 1a040e64c69..cd9c21df87d 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c @@ -16,22 +16,30 @@ struct page **ceph_get_direct_page_vector(const char __user *data, int num_pages, bool write_page) { struct page **pages; - int rc; + int got = 0; + int rc = 0; pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS); if (!pages) return ERR_PTR(-ENOMEM); down_read(¤t->mm->mmap_sem); - rc = get_user_pages(current, current->mm, (unsigned long)data, - num_pages, write_page, 0, pages, NULL); + while (got < num_pages) { + rc = get_user_pages(current, current->mm, + (unsigned long)data + ((unsigned long)got * PAGE_SIZE), + num_pages - got, write_page, 0, pages + got, NULL); + if (rc < 0) + break; + BUG_ON(rc == 0); + got += rc; + } up_read(¤t->mm->mmap_sem); - if (rc < num_pages) + if (rc < 0) goto fail; return pages; fail: - ceph_put_page_vector(pages, rc > 0 ? rc : 0, false); + ceph_put_page_vector(pages, got, false); return ERR_PTR(rc); } EXPORT_SYMBOL(ceph_get_direct_page_vector); diff --git a/net/core/dev.c b/net/core/dev.c index 8ae6631abcc..6561021d22d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1114,13 +1114,21 @@ EXPORT_SYMBOL(netdev_bonding_change); void dev_load(struct net *net, const char *name) { struct net_device *dev; + int no_module; rcu_read_lock(); dev = dev_get_by_name_rcu(net, name); rcu_read_unlock(); - if (!dev && capable(CAP_NET_ADMIN)) - request_module("%s", name); + no_module = !dev; + if (no_module && capable(CAP_NET_ADMIN)) + no_module = request_module("netdev-%s", name); + if (no_module && capable(CAP_SYS_MODULE)) { + if (!request_module("%s", name)) + pr_err("Loading kernel module for a network device " +"with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s " +"instead\n", name); + } } EXPORT_SYMBOL(dev_load); diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index 508f9c18992..133fd22ea28 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -144,7 +144,7 @@ void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list, list_for_each_entry(ha, &from_list->list, list) { type = addr_type ? addr_type : ha->type; - __hw_addr_del(to_list, ha->addr, addr_len, addr_type); + __hw_addr_del(to_list, ha->addr, addr_len, type); } } EXPORT_SYMBOL(__hw_addr_del_multiple); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index a9e7fc4c461..b5bada92f63 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3321,7 +3321,7 @@ static void show_results(struct pktgen_dev *pkt_dev, int nr_frags) pkt_dev->started_at); ktime_t idle = ns_to_ktime(pkt_dev->idle_acc); - p += sprintf(p, "OK: %llu(c%llu+d%llu) nsec, %llu (%dbyte,%dfrags)\n", + p += sprintf(p, "OK: %llu(c%llu+d%llu) usec, %llu (%dbyte,%dfrags)\n", (unsigned long long)ktime_to_us(elapsed), (unsigned long long)ktime_to_us(ktime_sub(elapsed, idle)), (unsigned long long)ktime_to_us(idle), diff --git a/net/core/scm.c b/net/core/scm.c index bbe45445080..4c1ef026d69 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -95,7 +95,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) int fd = fdp[i]; struct file *file; - if (fd < 0 || !(file = fget(fd))) + if (fd < 0 || !(file = fget_raw(fd))) return -EBADF; *fpp++ = file; fpl->count++; diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index d5074a56728..c44348adba3 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1193,7 +1193,7 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlattr **tb, goto err; } - if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setets) { + if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setpfc) { struct ieee_pfc *pfc = nla_data(ieee[DCB_ATTR_IEEE_PFC]); err = ops->ieee_setpfc(netdev, pfc); if (err) diff --git a/net/dccp/input.c b/net/dccp/input.c index 8cde009e8b8..4222e7a654b 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -614,6 +614,9 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, /* Caller (dccp_v4_do_rcv) will send Reset */ dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; return 1; + } else if (sk->sk_state == DCCP_CLOSED) { + dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; + return 1; } if (sk->sk_state != DCCP_REQUESTING && sk->sk_state != DCCP_RESPOND) { @@ -668,10 +671,6 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, } switch (sk->sk_state) { - case DCCP_CLOSED: - dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; - return 1; - case DCCP_REQUESTING: queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len); if (queued >= 0) diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index 739435a6af3..cfa7a5e1c5c 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -67,8 +67,9 @@ dns_resolver_instantiate(struct key *key, const void *_data, size_t datalen) size_t result_len = 0; const char *data = _data, *end, *opt; - kenter("%%%d,%s,'%s',%zu", - key->serial, key->description, data, datalen); + kenter("%%%d,%s,'%*.*s',%zu", + key->serial, key->description, + (int)datalen, (int)datalen, data, datalen); if (datalen <= 1 || !data || data[datalen - 1] != '\0') return -EINVAL; @@ -217,6 +218,19 @@ static void dns_resolver_describe(const struct key *key, struct seq_file *m) seq_printf(m, ": %u", key->datalen); } +/* + * read the DNS data + * - the key's semaphore is read-locked + */ +static long dns_resolver_read(const struct key *key, + char __user *buffer, size_t buflen) +{ + if (key->type_data.x[0]) + return key->type_data.x[0]; + + return user_read(key, buffer, buflen); +} + struct key_type key_type_dns_resolver = { .name = "dns_resolver", .instantiate = dns_resolver_instantiate, @@ -224,7 +238,7 @@ struct key_type key_type_dns_resolver = { .revoke = user_revoke, .destroy = user_destroy, .describe = dns_resolver_describe, - .read = user_read, + .read = dns_resolver_read, }; static int __init init_dns_resolver(void) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index df4616fce92..036652c8166 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -670,7 +670,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) ifap = &ifa->ifa_next) { if (!strcmp(ifr.ifr_name, ifa->ifa_label) && sin_orig.sin_addr.s_addr == - ifa->ifa_address) { + ifa->ifa_local) { break; /* found */ } } @@ -1040,8 +1040,8 @@ static void inetdev_send_gratuitous_arp(struct net_device *dev, return; arp_send(ARPOP_REQUEST, ETH_P_ARP, - ifa->ifa_address, dev, - ifa->ifa_address, NULL, + ifa->ifa_local, dev, + ifa->ifa_local, NULL, dev->dev_addr, NULL); } diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index c5af909cf70..3c8dfa16614 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -505,7 +505,9 @@ restart: } rcu_read_unlock(); + local_bh_disable(); inet_twsk_deschedule(tw, twdr); + local_bh_enable(); inet_twsk_put(tw); goto restart_rcu; } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 6613edfac28..d1d0e2c256f 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1765,4 +1765,4 @@ module_exit(ipgre_fini); MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("gre"); MODULE_ALIAS_RTNL_LINK("gretap"); -MODULE_ALIAS("gre0"); +MODULE_ALIAS_NETDEV("gre0"); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 988f52fba54..a5f58e7cbb2 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -913,4 +913,4 @@ static void __exit ipip_fini(void) module_init(ipip_init); module_exit(ipip_fini); MODULE_LICENSE("GPL"); -MODULE_ALIAS("tunl0"); +MODULE_ALIAS_NETDEV("tunl0"); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index eb7f82ebf4a..65f6c040624 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1222,7 +1222,7 @@ static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb, } /* D-SACK for already forgotten data... Do dumb counting. */ - if (dup_sack && + if (dup_sack && tp->undo_marker && tp->undo_retrans && !after(end_seq_0, prior_snd_una) && after(end_seq_0, tp->undo_marker)) tp->undo_retrans--; @@ -1299,7 +1299,8 @@ static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, /* Account D-SACK for retransmitted packet. */ if (dup_sack && (sacked & TCPCB_RETRANS)) { - if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) + if (tp->undo_marker && tp->undo_retrans && + after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) tp->undo_retrans--; if (sacked & TCPCB_SACKED_ACKED) state->reord = min(fack_count, state->reord); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 406f320336e..dfa5beb0c1c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2162,7 +2162,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) if (!tp->retrans_stamp) tp->retrans_stamp = TCP_SKB_CB(skb)->when; - tp->undo_retrans++; + tp->undo_retrans += tcp_skb_pcount(skb); /* snd_nxt is stored to detect loss of retransmitted segment, * see tcp_input.c tcp_sacktag_write_queue(). diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 4f4483e697b..e528a42a52b 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -57,6 +57,7 @@ MODULE_AUTHOR("Ville Nuorvala"); MODULE_DESCRIPTION("IPv6 tunneling device"); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NETDEV("ip6tnl0"); #ifdef IP6_TNL_DEBUG #define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__) diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 09c88891a75..de338037a73 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -410,7 +410,7 @@ fallback: if (p != NULL) { sb_add(m, "%02x", *p++); for (i = 1; i < len; i++) - sb_add(m, ":%02x", p[i]); + sb_add(m, ":%02x", *p++); } sb_add(m, " "); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a998db6e789..e7db7014e89 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -739,8 +739,10 @@ restart: if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); - else + else if (!(rt->dst.flags & DST_HOST)) nrt = rt6_alloc_clone(rt, &fl->fl6_dst); + else + goto out2; dst_release(&rt->dst); rt = nrt ? : net->ipv6.ip6_null_entry; @@ -2557,14 +2559,16 @@ static int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - struct net *net = current->nsproxy->net_ns; - int delay = net->ipv6.sysctl.flush_delay; - if (write) { - proc_dointvec(ctl, write, buffer, lenp, ppos); - fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); - return 0; - } else + struct net *net; + int delay; + if (!write) return -EINVAL; + + net = (struct net *)ctl->extra1; + delay = net->ipv6.sysctl.flush_delay; + proc_dointvec(ctl, write, buffer, lenp, ppos); + fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); + return 0; } ctl_table ipv6_route_table_template[] = { @@ -2651,6 +2655,7 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) if (table) { table[0].data = &net->ipv6.sysctl.flush_delay; + table[0].extra1 = net; table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh; table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 8ce38f10a54..d2c16e10f65 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1290,4 +1290,4 @@ static int __init sit_init(void) module_init(sit_init); module_exit(sit_cleanup); MODULE_LICENSE("GPL"); -MODULE_ALIAS("sit0"); +MODULE_ALIAS_NETDEV("sit0"); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 8acba456744..7a10a8d1b2d 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1229,6 +1229,7 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) } mutex_unlock(&local->iflist_mtx); unregister_netdevice_many(&unreg_list); + list_del(&unreg_list); } static u32 ieee80211_idle_off(struct ieee80211_local *local, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 45fbb9e3374..c9ceb4d57ab 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1033,6 +1033,12 @@ void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata, if (is_multicast_ether_addr(hdr->addr1)) return; + /* + * In case we receive frames after disassociation. + */ + if (!sdata->u.mgd.associated) + return; + ieee80211_sta_reset_conn_monitor(sdata); } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 22f7ad5101a..ba98e1308f3 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -808,9 +808,9 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, dest->u_threshold = udest->u_threshold; dest->l_threshold = udest->l_threshold; - spin_lock(&dest->dst_lock); + spin_lock_bh(&dest->dst_lock); ip_vs_dst_reset(dest); - spin_unlock(&dest->dst_lock); + spin_unlock_bh(&dest->dst_lock); if (add) ip_vs_new_estimator(&dest->stats); diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index b07393eab88..91816998ed8 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -85,6 +85,8 @@ EXPORT_SYMBOL(nf_log_unregister); int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger) { + if (pf >= ARRAY_SIZE(nf_loggers)) + return -EINVAL; mutex_lock(&nf_log_mutex); if (__find_logger(pf, logger->name) == NULL) { mutex_unlock(&nf_log_mutex); @@ -98,6 +100,8 @@ EXPORT_SYMBOL(nf_log_bind_pf); void nf_log_unbind_pf(u_int8_t pf) { + if (pf >= ARRAY_SIZE(nf_loggers)) + return; mutex_lock(&nf_log_mutex); rcu_assign_pointer(nf_loggers[pf], NULL); mutex_unlock(&nf_log_mutex); diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c index 4d87befb04c..474d621cbc2 100644 --- a/net/netfilter/nf_tproxy_core.c +++ b/net/netfilter/nf_tproxy_core.c @@ -28,26 +28,23 @@ nf_tproxy_destructor(struct sk_buff *skb) skb->destructor = NULL; if (sk) - nf_tproxy_put_sock(sk); + sock_put(sk); } /* consumes sk */ -int +void nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) { - bool transparent = (sk->sk_state == TCP_TIME_WAIT) ? - inet_twsk(sk)->tw_transparent : - inet_sk(sk)->transparent; - - if (transparent) { - skb_orphan(skb); - skb->sk = sk; - skb->destructor = nf_tproxy_destructor; - return 1; - } else - nf_tproxy_put_sock(sk); - - return 0; + /* assigning tw sockets complicates things; most + * skb->sk->X checks would have to test sk->sk_state first */ + if (sk->sk_state == TCP_TIME_WAIT) { + inet_twsk_put(inet_twsk(sk)); + return; + } + + skb_orphan(skb); + skb->sk = sk; + skb->destructor = nf_tproxy_destructor; } EXPORT_SYMBOL_GPL(nf_tproxy_assign_sock); diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index 640678f47a2..dcfd57eb9d0 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -33,6 +33,20 @@ #include <net/netfilter/nf_tproxy_core.h> #include <linux/netfilter/xt_TPROXY.h> +static bool tproxy_sk_is_transparent(struct sock *sk) +{ + if (sk->sk_state != TCP_TIME_WAIT) { + if (inet_sk(sk)->transparent) + return true; + sock_put(sk); + } else { + if (inet_twsk(sk)->tw_transparent) + return true; + inet_twsk_put(inet_twsk(sk)); + } + return false; +} + static inline __be32 tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr) { @@ -141,7 +155,7 @@ tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport, skb->dev, NFT_LOOKUP_LISTENER); /* NOTE: assign_sock consumes our sk reference */ - if (sk && nf_tproxy_assign_sock(skb, sk)) { + if (sk && tproxy_sk_is_transparent(sk)) { /* This should be in a separate target, but we don't do multiple targets on the same rule yet */ skb->mark = (skb->mark & ~mark_mask) ^ mark_value; @@ -149,6 +163,8 @@ tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport, pr_debug("redirecting: proto %hhu %pI4:%hu -> %pI4:%hu, mark: %x\n", iph->protocol, &iph->daddr, ntohs(hp->dest), &laddr, ntohs(lport), skb->mark); + + nf_tproxy_assign_sock(skb, sk); return NF_ACCEPT; } @@ -306,7 +322,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) par->in, NFT_LOOKUP_LISTENER); /* NOTE: assign_sock consumes our sk reference */ - if (sk && nf_tproxy_assign_sock(skb, sk)) { + if (sk && tproxy_sk_is_transparent(sk)) { /* This should be in a separate target, but we don't do multiple targets on the same rule yet */ skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value; @@ -314,6 +330,8 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) pr_debug("redirecting: proto %hhu %pI6:%hu -> %pI6:%hu, mark: %x\n", tproto, &iph->saddr, ntohs(hp->source), laddr, ntohs(lport), skb->mark); + + nf_tproxy_assign_sock(skb, sk); return NF_ACCEPT; } diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 00d6ae83830..9cc46356b57 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -35,6 +35,15 @@ #include <net/netfilter/nf_conntrack.h> #endif +static void +xt_socket_put_sk(struct sock *sk) +{ + if (sk->sk_state == TCP_TIME_WAIT) + inet_twsk_put(inet_twsk(sk)); + else + sock_put(sk); +} + static int extract_icmp4_fields(const struct sk_buff *skb, u8 *protocol, @@ -164,7 +173,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, (sk->sk_state == TCP_TIME_WAIT && inet_twsk(sk)->tw_transparent)); - nf_tproxy_put_sock(sk); + xt_socket_put_sk(sk); if (wildcard || !transparent) sk = NULL; @@ -298,7 +307,7 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) (sk->sk_state == TCP_TIME_WAIT && inet_twsk(sk)->tw_transparent)); - nf_tproxy_put_sock(sk); + xt_socket_put_sk(sk); if (wildcard || !transparent) sk = NULL; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 478181d53c5..1f924595bde 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1407,7 +1407,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, int noblock = flags&MSG_DONTWAIT; size_t copied; struct sk_buff *skb, *data_skb; - int err; + int err, ret; if (flags&MSG_OOB) return -EOPNOTSUPP; @@ -1470,8 +1470,13 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, skb_free_datagram(sk, skb); - if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) - netlink_dump(sk); + if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) { + ret = netlink_dump(sk); + if (ret) { + sk->sk_err = ret; + sk->sk_error_report(sk); + } + } scm_recv(sock, msg, siocb->scm, flags); out: @@ -1736,6 +1741,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, struct netlink_callback *cb; struct sock *sk; struct netlink_sock *nlk; + int ret; cb = kzalloc(sizeof(*cb), GFP_KERNEL); if (cb == NULL) @@ -1764,9 +1770,13 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, nlk->cb = cb; mutex_unlock(nlk->cb_mutex); - netlink_dump(sk); + ret = netlink_dump(sk); + sock_put(sk); + if (ret) + return ret; + /* We successfully started a dump, by returning -EINTR we * signal not to send ACK even if it was requested. */ diff --git a/net/rds/ib.c b/net/rds/ib.c index 4123967d4d6..cce19f95c62 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -364,7 +364,6 @@ void rds_ib_exit(void) rds_ib_sysctl_exit(); rds_ib_recv_exit(); rds_trans_unregister(&rds_ib_transport); - rds_ib_fmr_exit(); } struct rds_transport rds_ib_transport = { @@ -400,13 +399,9 @@ int rds_ib_init(void) INIT_LIST_HEAD(&rds_ib_devices); - ret = rds_ib_fmr_init(); - if (ret) - goto out; - ret = ib_register_client(&rds_ib_client); if (ret) - goto out_fmr_exit; + goto out; ret = rds_ib_sysctl_init(); if (ret) @@ -430,8 +425,6 @@ out_sysctl: rds_ib_sysctl_exit(); out_ibreg: rds_ib_unregister_client(); -out_fmr_exit: - rds_ib_fmr_exit(); out: return ret; } diff --git a/net/rds/ib.h b/net/rds/ib.h index e34ad032b66..4297d92788d 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -307,8 +307,6 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, void rds_ib_sync_mr(void *trans_private, int dir); void rds_ib_free_mr(void *trans_private, int invalidate); void rds_ib_flush_mrs(void); -int rds_ib_fmr_init(void); -void rds_ib_fmr_exit(void); /* ib_recv.c */ int rds_ib_recv_init(void); diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 18a833c450c..819c35a0d9c 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -38,8 +38,6 @@ #include "ib.h" #include "xlist.h" -static struct workqueue_struct *rds_ib_fmr_wq; - static DEFINE_PER_CPU(unsigned long, clean_list_grace); #define CLEAN_LIST_BUSY_BIT 0 @@ -307,7 +305,7 @@ static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev) int err = 0, iter = 0; if (atomic_read(&pool->dirty_count) >= pool->max_items / 10) - queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10); + schedule_delayed_work(&pool->flush_worker, 10); while (1) { ibmr = rds_ib_reuse_fmr(pool); @@ -696,24 +694,6 @@ out_nolock: return ret; } -int rds_ib_fmr_init(void) -{ - rds_ib_fmr_wq = create_workqueue("rds_fmr_flushd"); - if (!rds_ib_fmr_wq) - return -ENOMEM; - return 0; -} - -/* - * By the time this is called all the IB devices should have been torn down and - * had their pools freed. As each pool is freed its work struct is waited on, - * so the pool flushing work queue should be idle by the time we get here. - */ -void rds_ib_fmr_exit(void) -{ - destroy_workqueue(rds_ib_fmr_wq); -} - static void rds_ib_mr_pool_flush_worker(struct work_struct *work) { struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker.work); @@ -741,7 +721,7 @@ void rds_ib_free_mr(void *trans_private, int invalidate) /* If we've pinned too many pages, request a flush */ if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned || atomic_read(&pool->dirty_count) >= pool->max_items / 10) - queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10); + schedule_delayed_work(&pool->flush_worker, 10); if (invalidate) { if (likely(!in_interrupt())) { @@ -749,8 +729,7 @@ void rds_ib_free_mr(void *trans_private, int invalidate) } else { /* We get here if the user created a MR marked * as use_once and invalidate at the same time. */ - queue_delayed_work(rds_ib_fmr_wq, - &pool->flush_worker, 10); + schedule_delayed_work(&pool->flush_worker, 10); } } diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 71f373c421b..c47a511f203 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -551,7 +551,10 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, if (conn->c_loopback && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) { rds_cong_map_updated(conn->c_fcong, ~(u64) 0); - return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; + scat = &rm->data.op_sg[sg]; + ret = sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; + ret = min_t(int, ret, scat->length - conn->c_xmit_data_off); + return ret; } /* FIXME we may overallocate here */ diff --git a/net/rds/loop.c b/net/rds/loop.c index aeec1d483b1..bca6761a3ca 100644 --- a/net/rds/loop.c +++ b/net/rds/loop.c @@ -61,10 +61,15 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm, unsigned int hdr_off, unsigned int sg, unsigned int off) { + struct scatterlist *sgp = &rm->data.op_sg[sg]; + int ret = sizeof(struct rds_header) + + be32_to_cpu(rm->m_inc.i_hdr.h_len); + /* Do not send cong updates to loopback */ if (rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) { rds_cong_map_updated(conn->c_fcong, ~(u64) 0); - return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; + ret = min_t(int, ret, sgp->length - conn->c_xmit_data_off); + goto out; } BUG_ON(hdr_off || sg || off); @@ -80,8 +85,8 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm, NULL); rds_inc_put(&rm->m_inc); - - return sizeof(struct rds_header) + be32_to_cpu(rm->m_inc.i_hdr.h_len); +out: + return ret; } /* diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c index 89315009bab..1a2b0633fec 100644 --- a/net/rxrpc/ar-input.c +++ b/net/rxrpc/ar-input.c @@ -423,6 +423,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb) goto protocol_error; } + case RXRPC_PACKET_TYPE_ACKALL: case RXRPC_PACKET_TYPE_ACK: /* ACK processing is done in process context */ read_lock_bh(&call->state_lock); diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c index 5ee16f0353f..43ea7de2fc8 100644 --- a/net/rxrpc/ar-key.c +++ b/net/rxrpc/ar-key.c @@ -25,6 +25,7 @@ #include <keys/user-type.h> #include "ar-internal.h" +static int rxrpc_vet_description_s(const char *); static int rxrpc_instantiate(struct key *, const void *, size_t); static int rxrpc_instantiate_s(struct key *, const void *, size_t); static void rxrpc_destroy(struct key *); @@ -52,6 +53,7 @@ EXPORT_SYMBOL(key_type_rxrpc); */ struct key_type key_type_rxrpc_s = { .name = "rxrpc_s", + .vet_description = rxrpc_vet_description_s, .instantiate = rxrpc_instantiate_s, .match = user_match, .destroy = rxrpc_destroy_s, @@ -59,6 +61,23 @@ struct key_type key_type_rxrpc_s = { }; /* + * Vet the description for an RxRPC server key + */ +static int rxrpc_vet_description_s(const char *desc) +{ + unsigned long num; + char *p; + + num = simple_strtoul(desc, &p, 10); + if (*p != ':' || num > 65535) + return -EINVAL; + num = simple_strtoul(p + 1, &p, 10); + if (*p || num < 1 || num > 255) + return -EINVAL; + return 0; +} + +/* * parse an RxKAD type XDR format token * - the caller guarantees we have at least 4 words */ @@ -89,11 +108,11 @@ static int rxrpc_instantiate_xdr_rxkad(struct key *key, const __be32 *xdr, return ret; plen -= sizeof(*token); - token = kmalloc(sizeof(*token), GFP_KERNEL); + token = kzalloc(sizeof(*token), GFP_KERNEL); if (!token) return -ENOMEM; - token->kad = kmalloc(plen, GFP_KERNEL); + token->kad = kzalloc(plen, GFP_KERNEL); if (!token->kad) { kfree(token); return -ENOMEM; @@ -731,10 +750,10 @@ static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen) goto error; ret = -ENOMEM; - token = kmalloc(sizeof(*token), GFP_KERNEL); + token = kzalloc(sizeof(*token), GFP_KERNEL); if (!token) goto error; - token->kad = kmalloc(plen, GFP_KERNEL); + token->kad = kzalloc(plen, GFP_KERNEL); if (!token->kad) goto error_free; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 34dc598440a..1bc698039ae 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -839,6 +839,7 @@ void dev_deactivate(struct net_device *dev) list_add(&dev->unreg_list, &single); dev_deactivate_many(&single); + list_del(&single); } static void dev_init_scheduler_queue(struct net_device *dev, diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 2cc46f0962c..b23428f3c0d 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2029,11 +2029,11 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc, *errp = sctp_make_op_error_fixed(asoc, chunk); if (*errp) { - sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM, - WORD_ROUND(ntohs(param.p->length))); - sctp_addto_chunk_fixed(*errp, - WORD_ROUND(ntohs(param.p->length)), - param.v); + if (!sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM, + WORD_ROUND(ntohs(param.p->length)))) + sctp_addto_chunk_fixed(*errp, + WORD_ROUND(ntohs(param.p->length)), + param.v); } else { /* If there is no memory for generating the ERROR * report as specified, an ABORT will be triggered diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 243fc09b164..3fc8624fcd1 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -252,23 +252,37 @@ static void rpc_set_active(struct rpc_task *task) /* * Mark an RPC call as having completed by clearing the 'active' bit + * and then waking up all tasks that were sleeping. */ -static void rpc_mark_complete_task(struct rpc_task *task) +static int rpc_complete_task(struct rpc_task *task) { - smp_mb__before_clear_bit(); + void *m = &task->tk_runstate; + wait_queue_head_t *wq = bit_waitqueue(m, RPC_TASK_ACTIVE); + struct wait_bit_key k = __WAIT_BIT_KEY_INITIALIZER(m, RPC_TASK_ACTIVE); + unsigned long flags; + int ret; + + spin_lock_irqsave(&wq->lock, flags); clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate); - smp_mb__after_clear_bit(); - wake_up_bit(&task->tk_runstate, RPC_TASK_ACTIVE); + ret = atomic_dec_and_test(&task->tk_count); + if (waitqueue_active(wq)) + __wake_up_locked_key(wq, TASK_NORMAL, &k); + spin_unlock_irqrestore(&wq->lock, flags); + return ret; } /* * Allow callers to wait for completion of an RPC call + * + * Note the use of out_of_line_wait_on_bit() rather than wait_on_bit() + * to enforce taking of the wq->lock and hence avoid races with + * rpc_complete_task(). */ int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *)) { if (action == NULL) action = rpc_wait_bit_killable; - return wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE, + return out_of_line_wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE, action, TASK_KILLABLE); } EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task); @@ -857,34 +871,67 @@ static void rpc_async_release(struct work_struct *work) rpc_free_task(container_of(work, struct rpc_task, u.tk_work)); } -void rpc_put_task(struct rpc_task *task) +static void rpc_release_resources_task(struct rpc_task *task) { - if (!atomic_dec_and_test(&task->tk_count)) - return; - /* Release resources */ if (task->tk_rqstp) xprt_release(task); if (task->tk_msg.rpc_cred) put_rpccred(task->tk_msg.rpc_cred); rpc_task_release_client(task); - if (task->tk_workqueue != NULL) { +} + +static void rpc_final_put_task(struct rpc_task *task, + struct workqueue_struct *q) +{ + if (q != NULL) { INIT_WORK(&task->u.tk_work, rpc_async_release); - queue_work(task->tk_workqueue, &task->u.tk_work); + queue_work(q, &task->u.tk_work); } else rpc_free_task(task); } + +static void rpc_do_put_task(struct rpc_task *task, struct workqueue_struct *q) +{ + if (atomic_dec_and_test(&task->tk_count)) { + rpc_release_resources_task(task); + rpc_final_put_task(task, q); + } +} + +void rpc_put_task(struct rpc_task *task) +{ + rpc_do_put_task(task, NULL); +} EXPORT_SYMBOL_GPL(rpc_put_task); +void rpc_put_task_async(struct rpc_task *task) +{ + rpc_do_put_task(task, task->tk_workqueue); +} +EXPORT_SYMBOL_GPL(rpc_put_task_async); + static void rpc_release_task(struct rpc_task *task) { dprintk("RPC: %5u release task\n", task->tk_pid); BUG_ON (RPC_IS_QUEUED(task)); - /* Wake up anyone who is waiting for task completion */ - rpc_mark_complete_task(task); + rpc_release_resources_task(task); - rpc_put_task(task); + /* + * Note: at this point we have been removed from rpc_clnt->cl_tasks, + * so it should be safe to use task->tk_count as a test for whether + * or not any other processes still hold references to our rpc_task. + */ + if (atomic_read(&task->tk_count) != 1 + !RPC_IS_ASYNC(task)) { + /* Wake up anyone who may be waiting for task completion */ + if (!rpc_complete_task(task)) + return; + } else { + if (!atomic_dec_and_test(&task->tk_count)) + return; + } + rpc_final_put_task(task, task->tk_workqueue); } int rpciod_up(void) @@ -908,7 +955,7 @@ static int rpciod_start(void) * Create the rpciod thread and wait for it to start. */ dprintk("RPC: creating workqueue rpciod\n"); - wq = alloc_workqueue("rpciod", WQ_RESCUER, 0); + wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM, 0); rpciod_workqueue = wq; return rpciod_workqueue != NULL; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 9df1eadc912..1a10dcd999e 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -1335,6 +1335,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, p, 0, length, DMA_FROM_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) { put_page(p); + svc_rdma_put_context(ctxt, 1); return; } atomic_inc(&xprt->sc_dma_used); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index c431f5a5796..be96d429b47 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1631,7 +1631,8 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt, } xs_reclassify_socket(family, sock); - if (xs_bind(transport, sock)) { + err = xs_bind(transport, sock); + if (err) { sock_release(sock); goto out; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index dd419d28620..ba5b8c20849 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -850,7 +850,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) * Get the parent directory, calculate the hash for last * component. */ - err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd); + err = kern_path_parent(sunaddr->sun_path, &nd); if (err) goto out_mknod_parent; @@ -1724,7 +1724,11 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, msg->msg_namelen = 0; - mutex_lock(&u->readlock); + err = mutex_lock_interruptible(&u->readlock); + if (err) { + err = sock_intr_errno(sock_rcvtimeo(sk, noblock)); + goto out; + } skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) { @@ -1864,7 +1868,11 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, memset(&tmp_scm, 0, sizeof(tmp_scm)); } - mutex_lock(&u->readlock); + err = mutex_lock_interruptible(&u->readlock); + if (err) { + err = sock_intr_errno(timeo); + goto out; + } do { int chunk; @@ -1895,11 +1903,12 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, timeo = unix_stream_data_wait(sk, timeo); - if (signal_pending(current)) { + if (signal_pending(current) + || mutex_lock_interruptible(&u->readlock)) { err = sock_intr_errno(timeo); goto out; } - mutex_lock(&u->readlock); + continue; unlock: unix_state_unlock(sk); diff --git a/net/unix/garbage.c b/net/unix/garbage.c index f89f83bf828..b6f4b994eb3 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -104,7 +104,7 @@ struct sock *unix_get_socket(struct file *filp) /* * Socket ? */ - if (S_ISSOCK(inode->i_mode)) { + if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) { struct socket *sock = SOCKET_I(inode); struct sock *s = sock->sk; diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 3e5dbd4e4cd..d112f038edf 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -802,11 +802,11 @@ int cfg80211_wext_siwfreq(struct net_device *dev, return freq; if (freq == 0) return -EINVAL; - wdev_lock(wdev); mutex_lock(&rdev->devlist_mtx); + wdev_lock(wdev); err = cfg80211_set_freq(rdev, wdev, freq, NL80211_CHAN_NO_HT); - mutex_unlock(&rdev->devlist_mtx); wdev_unlock(wdev); + mutex_unlock(&rdev->devlist_mtx); return err; default: return -EOPNOTSUPP; |