summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/Kconfig1
-rw-r--r--fs/lockd/clntlock.c112
-rw-r--r--fs/lockd/clntproc.c317
-rw-r--r--fs/lockd/host.c12
-rw-r--r--fs/lockd/mon.c11
-rw-r--r--fs/lockd/svc4proc.c157
-rw-r--r--fs/lockd/svclock.c349
-rw-r--r--fs/lockd/svcproc.c151
-rw-r--r--fs/lockd/svcshare.c4
-rw-r--r--fs/lockd/svcsubs.c7
-rw-r--r--fs/lockd/xdr.c17
-rw-r--r--fs/lockd/xdr4.c21
-rw-r--r--fs/locks.c106
-rw-r--r--fs/namespace.c38
-rw-r--r--fs/nfs/callback.c20
-rw-r--r--fs/nfs/callback_xdr.c28
-rw-r--r--fs/nfs/delegation.c19
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/dir.c114
-rw-r--r--fs/nfs/direct.c949
-rw-r--r--fs/nfs/file.c49
-rw-r--r--fs/nfs/idmap.c47
-rw-r--r--fs/nfs/inode.c229
-rw-r--r--fs/nfs/iostat.h164
-rw-r--r--fs/nfs/mount_clnt.c17
-rw-r--r--fs/nfs/nfs2xdr.c4
-rw-r--r--fs/nfs/nfs3acl.c16
-rw-r--r--fs/nfs/nfs3proc.c246
-rw-r--r--fs/nfs/nfs3xdr.c6
-rw-r--r--fs/nfs/nfs4proc.c180
-rw-r--r--fs/nfs/nfs4state.c1
-rw-r--r--fs/nfs/nfs4xdr.c2
-rw-r--r--fs/nfs/pagelist.c16
-rw-r--r--fs/nfs/proc.c156
-rw-r--r--fs/nfs/read.c102
-rw-r--r--fs/nfs/unlink.c3
-rw-r--r--fs/nfs/write.c288
-rw-r--r--fs/nfsd/nfs4callback.c2
-rw-r--r--fs/nfsd/nfs4state.c13
-rw-r--r--fs/proc/base.c39
-rw-r--r--include/linux/fs.h7
-rw-r--r--include/linux/lockd/lockd.h27
-rw-r--r--include/linux/lockd/share.h2
-rw-r--r--include/linux/lockd/xdr.h1
-rw-r--r--include/linux/nfs_fs.h102
-rw-r--r--include/linux/nfs_fs_i.h8
-rw-r--r--include/linux/nfs_fs_sb.h6
-rw-r--r--include/linux/nfs_xdr.h5
-rw-r--r--include/linux/sunrpc/clnt.h20
-rw-r--r--include/linux/sunrpc/gss_krb5.h2
-rw-r--r--include/linux/sunrpc/metrics.h77
-rw-r--r--include/linux/sunrpc/rpc_pipe_fs.h2
-rw-r--r--include/linux/sunrpc/sched.h9
-rw-r--r--include/linux/sunrpc/xprt.h13
-rw-r--r--net/sunrpc/auth.c16
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c2
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seal.c15
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_unseal.c4
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c17
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_mech.c6
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_seal.c5
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_unseal.c4
-rw-r--r--net/sunrpc/clnt.c53
-rw-r--r--net/sunrpc/pmap_clnt.c41
-rw-r--r--net/sunrpc/rpc_pipe.c31
-rw-r--r--net/sunrpc/sched.c12
-rw-r--r--net/sunrpc/stats.c115
-rw-r--r--net/sunrpc/xprt.c29
-rw-r--r--net/sunrpc/xprtsock.c49
69 files changed, 2825 insertions, 1869 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index c8d0a209120..e207be68d4c 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1555,6 +1555,7 @@ config RPCSEC_GSS_SPKM3
select CRYPTO
select CRYPTO_MD5
select CRYPTO_DES
+ select CRYPTO_CAST5
help
Provides for secure RPC calls by means of a gss-api
mechanism based on the SPKM3 public-key mechanism.
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index da6354baa0b..bce74446870 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -44,32 +44,25 @@ static LIST_HEAD(nlm_blocked);
/*
* Queue up a lock for blocking so that the GRANTED request can see it
*/
-int nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl)
+struct nlm_wait *nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl)
{
struct nlm_wait *block;
- BUG_ON(req->a_block != NULL);
block = kmalloc(sizeof(*block), GFP_KERNEL);
- if (block == NULL)
- return -ENOMEM;
- block->b_host = host;
- block->b_lock = fl;
- init_waitqueue_head(&block->b_wait);
- block->b_status = NLM_LCK_BLOCKED;
-
- list_add(&block->b_list, &nlm_blocked);
- req->a_block = block;
-
- return 0;
+ if (block != NULL) {
+ block->b_host = host;
+ block->b_lock = fl;
+ init_waitqueue_head(&block->b_wait);
+ block->b_status = NLM_LCK_BLOCKED;
+ list_add(&block->b_list, &nlm_blocked);
+ }
+ return block;
}
-void nlmclnt_finish_block(struct nlm_rqst *req)
+void nlmclnt_finish_block(struct nlm_wait *block)
{
- struct nlm_wait *block = req->a_block;
-
if (block == NULL)
return;
- req->a_block = NULL;
list_del(&block->b_list);
kfree(block);
}
@@ -77,15 +70,14 @@ void nlmclnt_finish_block(struct nlm_rqst *req)
/*
* Block on a lock
*/
-long nlmclnt_block(struct nlm_rqst *req, long timeout)
+int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
{
- struct nlm_wait *block = req->a_block;
long ret;
/* A borken server might ask us to block even if we didn't
* request it. Just say no!
*/
- if (!req->a_args.block)
+ if (block == NULL)
return -EAGAIN;
/* Go to sleep waiting for GRANT callback. Some servers seem
@@ -99,13 +91,10 @@ long nlmclnt_block(struct nlm_rqst *req, long timeout)
ret = wait_event_interruptible_timeout(block->b_wait,
block->b_status != NLM_LCK_BLOCKED,
timeout);
-
- if (block->b_status != NLM_LCK_BLOCKED) {
- req->a_res.status = block->b_status;
- block->b_status = NLM_LCK_BLOCKED;
- }
-
- return ret;
+ if (ret < 0)
+ return -ERESTARTSYS;
+ req->a_res.status = block->b_status;
+ return 0;
}
/*
@@ -125,7 +114,15 @@ u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock)
list_for_each_entry(block, &nlm_blocked, b_list) {
struct file_lock *fl_blocked = block->b_lock;
- if (!nlm_compare_locks(fl_blocked, fl))
+ if (fl_blocked->fl_start != fl->fl_start)
+ continue;
+ if (fl_blocked->fl_end != fl->fl_end)
+ continue;
+ /*
+ * Careful! The NLM server will return the 32-bit "pid" that
+ * we put on the wire: in this case the lockowner "pid".
+ */
+ if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid)
continue;
if (!nlm_cmp_addr(&block->b_host->h_addr, addr))
continue;
@@ -147,34 +144,6 @@ u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock)
*/
/*
- * Mark the locks for reclaiming.
- * FIXME: In 2.5 we don't want to iterate through any global file_lock_list.
- * Maintain NLM lock reclaiming lists in the nlm_host instead.
- */
-static
-void nlmclnt_mark_reclaim(struct nlm_host *host)
-{
- struct file_lock *fl;
- struct inode *inode;
- struct list_head *tmp;
-
- list_for_each(tmp, &file_lock_list) {
- fl = list_entry(tmp, struct file_lock, fl_link);
-
- inode = fl->fl_file->f_dentry->d_inode;
- if (inode->i_sb->s_magic != NFS_SUPER_MAGIC)
- continue;
- if (fl->fl_u.nfs_fl.owner == NULL)
- continue;
- if (fl->fl_u.nfs_fl.owner->host != host)
- continue;
- if (!(fl->fl_u.nfs_fl.flags & NFS_LCK_GRANTED))
- continue;
- fl->fl_u.nfs_fl.flags |= NFS_LCK_RECLAIM;
- }
-}
-
-/*
* Someone has sent us an SM_NOTIFY. Ensure we bind to the new port number,
* that we mark locks for reclaiming, and that we bump the pseudo NSM state.
*/
@@ -186,7 +155,12 @@ void nlmclnt_prepare_reclaim(struct nlm_host *host, u32 newstate)
host->h_state++;
host->h_nextrebind = 0;
nlm_rebind_host(host);
- nlmclnt_mark_reclaim(host);
+
+ /*
+ * Mark the locks for reclaiming.
+ */
+ list_splice_init(&host->h_granted, &host->h_reclaim);
+
dprintk("NLM: reclaiming locks for host %s", host->h_name);
}
@@ -215,9 +189,7 @@ reclaimer(void *ptr)
{
struct nlm_host *host = (struct nlm_host *) ptr;
struct nlm_wait *block;
- struct list_head *tmp;
- struct file_lock *fl;
- struct inode *inode;
+ struct file_lock *fl, *next;
daemonize("%s-reclaim", host->h_name);
allow_signal(SIGKILL);
@@ -229,23 +201,13 @@ reclaimer(void *ptr)
/* First, reclaim all locks that have been marked. */
restart:
- list_for_each(tmp, &file_lock_list) {
- fl = list_entry(tmp, struct file_lock, fl_link);
+ list_for_each_entry_safe(fl, next, &host->h_reclaim, fl_u.nfs_fl.list) {
+ list_del_init(&fl->fl_u.nfs_fl.list);
- inode = fl->fl_file->f_dentry->d_inode;
- if (inode->i_sb->s_magic != NFS_SUPER_MAGIC)
- continue;
- if (fl->fl_u.nfs_fl.owner == NULL)
- continue;
- if (fl->fl_u.nfs_fl.owner->host != host)
- continue;
- if (!(fl->fl_u.nfs_fl.flags & NFS_LCK_RECLAIM))
- continue;
-
- fl->fl_u.nfs_fl.flags &= ~NFS_LCK_RECLAIM;
- nlmclnt_reclaim(host, fl);
if (signalled())
- break;
+ continue;
+ if (nlmclnt_reclaim(host, fl) == 0)
+ list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted);
goto restart;
}
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 970b6a6aa33..f96e38155b5 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -132,59 +132,18 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl)
memcpy(&lock->fh, NFS_FH(fl->fl_file->f_dentry->d_inode), sizeof(struct nfs_fh));
lock->caller = system_utsname.nodename;
lock->oh.data = req->a_owner;
- lock->oh.len = sprintf(req->a_owner, "%d@%s",
- current->pid, system_utsname.nodename);
- locks_copy_lock(&lock->fl, fl);
+ lock->oh.len = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s",
+ (unsigned int)fl->fl_u.nfs_fl.owner->pid,
+ system_utsname.nodename);
+ lock->svid = fl->fl_u.nfs_fl.owner->pid;
+ lock->fl.fl_start = fl->fl_start;
+ lock->fl.fl_end = fl->fl_end;
+ lock->fl.fl_type = fl->fl_type;
}
static void nlmclnt_release_lockargs(struct nlm_rqst *req)
{
- struct file_lock *fl = &req->a_args.lock.fl;
-
- if (fl->fl_ops && fl->fl_ops->fl_release_private)
- fl->fl_ops->fl_release_private(fl);
-}
-
-/*
- * Initialize arguments for GRANTED call. The nlm_rqst structure
- * has been cleared already.
- */
-int
-nlmclnt_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock)
-{
- locks_copy_lock(&call->a_args.lock.fl, &lock->fl);
- memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh));
- call->a_args.lock.caller = system_utsname.nodename;
- call->a_args.lock.oh.len = lock->oh.len;
-
- /* set default data area */
- call->a_args.lock.oh.data = call->a_owner;
-
- if (lock->oh.len > NLMCLNT_OHSIZE) {
- void *data = kmalloc(lock->oh.len, GFP_KERNEL);
- if (!data) {
- nlmclnt_freegrantargs(call);
- return 0;
- }
- call->a_args.lock.oh.data = (u8 *) data;
- }
-
- memcpy(call->a_args.lock.oh.data, lock->oh.data, lock->oh.len);
- return 1;
-}
-
-void
-nlmclnt_freegrantargs(struct nlm_rqst *call)
-{
- struct file_lock *fl = &call->a_args.lock.fl;
- /*
- * Check whether we allocated memory for the owner.
- */
- if (call->a_args.lock.oh.data != (u8 *) call->a_owner) {
- kfree(call->a_args.lock.oh.data);
- }
- if (fl->fl_ops && fl->fl_ops->fl_release_private)
- fl->fl_ops->fl_release_private(fl);
+ BUG_ON(req->a_args.lock.fl.fl_ops != NULL);
}
/*
@@ -193,9 +152,8 @@ nlmclnt_freegrantargs(struct nlm_rqst *call)
int
nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
{
- struct nfs_server *nfssrv = NFS_SERVER(inode);
struct nlm_host *host;
- struct nlm_rqst reqst, *call = &reqst;
+ struct nlm_rqst *call;
sigset_t oldset;
unsigned long flags;
int status, proto, vers;
@@ -209,23 +167,17 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
/* Retrieve transport protocol from NFS client */
proto = NFS_CLIENT(inode)->cl_xprt->prot;
- if (!(host = nlmclnt_lookup_host(NFS_ADDR(inode), proto, vers)))
+ host = nlmclnt_lookup_host(NFS_ADDR(inode), proto, vers);
+ if (host == NULL)
return -ENOLCK;
- /* Create RPC client handle if not there, and copy soft
- * and intr flags from NFS client. */
- if (host->h_rpcclnt == NULL) {
- struct rpc_clnt *clnt;
+ call = nlm_alloc_call(host);
+ if (call == NULL)
+ return -ENOMEM;
- /* Bind an rpc client to this host handle (does not
- * perform a portmapper lookup) */
- if (!(clnt = nlm_bind_host(host))) {
- status = -ENOLCK;
- goto done;
- }
- clnt->cl_softrtry = nfssrv->client->cl_softrtry;
- clnt->cl_intr = nfssrv->client->cl_intr;
- }
+ nlmclnt_locks_init_private(fl, host);
+ /* Set up the argument struct */
+ nlmclnt_setlockargs(call, fl);
/* Keep the old signal mask */
spin_lock_irqsave(&current->sighand->siglock, flags);
@@ -238,26 +190,10 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
&& (current->flags & PF_EXITING)) {
sigfillset(&current->blocked); /* Mask all signals */
recalc_sigpending();
- spin_unlock_irqrestore(&current->sighand->siglock, flags);
- call = nlmclnt_alloc_call();
- if (!call) {
- status = -ENOMEM;
- goto out_restore;
- }
call->a_flags = RPC_TASK_ASYNC;
- } else {
- spin_unlock_irqrestore(&current->sighand->siglock, flags);
- memset(call, 0, sizeof(*call));
- locks_init_lock(&call->a_args.lock.fl);
- locks_init_lock(&call->a_res.lock.fl);
}
- call->a_host = host;
-
- nlmclnt_locks_init_private(fl, host);
-
- /* Set up the argument struct */
- nlmclnt_setlockargs(call, fl);
+ spin_unlock_irqrestore(&current->sighand->siglock, flags);
if (IS_SETLK(cmd) || IS_SETLKW(cmd)) {
if (fl->fl_type != F_UNLCK) {
@@ -270,41 +206,58 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
else
status = -EINVAL;
- out_restore:
+ fl->fl_ops->fl_release_private(fl);
+ fl->fl_ops = NULL;
+
spin_lock_irqsave(&current->sighand->siglock, flags);
current->blocked = oldset;
recalc_sigpending();
spin_unlock_irqrestore(&current->sighand->siglock, flags);
-done:
dprintk("lockd: clnt proc returns %d\n", status);
- nlm_release_host(host);
return status;
}
EXPORT_SYMBOL(nlmclnt_proc);
/*
* Allocate an NLM RPC call struct
+ *
+ * Note: the caller must hold a reference to host. In case of failure,
+ * this reference will be released.
*/
-struct nlm_rqst *
-nlmclnt_alloc_call(void)
+struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
{
struct nlm_rqst *call;
- while (!signalled()) {
- call = (struct nlm_rqst *) kmalloc(sizeof(struct nlm_rqst), GFP_KERNEL);
- if (call) {
- memset(call, 0, sizeof(*call));
+ for(;;) {
+ call = kzalloc(sizeof(*call), GFP_KERNEL);
+ if (call != NULL) {
locks_init_lock(&call->a_args.lock.fl);
locks_init_lock(&call->a_res.lock.fl);
+ call->a_host = host;
return call;
}
- printk("nlmclnt_alloc_call: failed, waiting for memory\n");
+ if (signalled())
+ break;
+ printk("nlm_alloc_call: failed, waiting for memory\n");
schedule_timeout_interruptible(5*HZ);
}
+ nlm_release_host(host);
return NULL;
}
+void nlm_release_call(struct nlm_rqst *call)
+{
+ nlm_release_host(call->a_host);
+ nlmclnt_release_lockargs(call);
+ kfree(call);
+}
+
+static void nlmclnt_rpc_release(void *data)
+{
+ return nlm_release_call(data);
+}
+
static int nlm_wait_on_grace(wait_queue_head_t *queue)
{
DEFINE_WAIT(wait);
@@ -401,57 +354,45 @@ in_grace_period:
/*
* Generic NLM call, async version.
*/
-int nlmsvc_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
+static int __nlm_async_call(struct nlm_rqst *req, u32 proc, struct rpc_message *msg, const struct rpc_call_ops *tk_ops)
{
struct nlm_host *host = req->a_host;
struct rpc_clnt *clnt;
- struct rpc_message msg = {
- .rpc_argp = &req->a_args,
- .rpc_resp = &req->a_res,
- };
- int status;
+ int status = -ENOLCK;
dprintk("lockd: call procedure %d on %s (async)\n",
(int)proc, host->h_name);
/* If we have no RPC client yet, create one. */
- if ((clnt = nlm_bind_host(host)) == NULL)
- return -ENOLCK;
- msg.rpc_proc = &clnt->cl_procinfo[proc];
+ clnt = nlm_bind_host(host);
+ if (clnt == NULL)
+ goto out_err;
+ msg->rpc_proc = &clnt->cl_procinfo[proc];
/* bootstrap and kick off the async RPC call */
- status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, tk_ops, req);
-
+ status = rpc_call_async(clnt, msg, RPC_TASK_ASYNC, tk_ops, req);
+ if (status == 0)
+ return 0;
+out_err:
+ nlm_release_call(req);
return status;
}
-static int nlmclnt_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
+int nlm_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
{
- struct nlm_host *host = req->a_host;
- struct rpc_clnt *clnt;
- struct nlm_args *argp = &req->a_args;
- struct nlm_res *resp = &req->a_res;
struct rpc_message msg = {
- .rpc_argp = argp,
- .rpc_resp = resp,
+ .rpc_argp = &req->a_args,
+ .rpc_resp = &req->a_res,
};
- int status;
-
- dprintk("lockd: call procedure %d on %s (async)\n",
- (int)proc, host->h_name);
-
- /* If we have no RPC client yet, create one. */
- if ((clnt = nlm_bind_host(host)) == NULL)
- return -ENOLCK;
- msg.rpc_proc = &clnt->cl_procinfo[proc];
+ return __nlm_async_call(req, proc, &msg, tk_ops);
+}
- /* Increment host refcount */
- nlm_get_host(host);
- /* bootstrap and kick off the async RPC call */
- status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, tk_ops, req);
- if (status < 0)
- nlm_release_host(host);
- return status;
+int nlm_async_reply(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
+{
+ struct rpc_message msg = {
+ .rpc_argp = &req->a_res,
+ };
+ return __nlm_async_call(req, proc, &msg, tk_ops);
}
/*
@@ -463,36 +404,41 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl)
int status;
status = nlmclnt_call(req, NLMPROC_TEST);
- nlmclnt_release_lockargs(req);
if (status < 0)
- return status;
+ goto out;
- status = req->a_res.status;
- if (status == NLM_LCK_GRANTED) {
- fl->fl_type = F_UNLCK;
- } if (status == NLM_LCK_DENIED) {
- /*
- * Report the conflicting lock back to the application.
- */
- locks_copy_lock(fl, &req->a_res.lock.fl);
- fl->fl_pid = 0;
- } else {
- return nlm_stat_to_errno(req->a_res.status);
+ switch (req->a_res.status) {
+ case NLM_LCK_GRANTED:
+ fl->fl_type = F_UNLCK;
+ break;
+ case NLM_LCK_DENIED:
+ /*
+ * Report the conflicting lock back to the application.
+ */
+ fl->fl_start = req->a_res.lock.fl.fl_start;
+ fl->fl_end = req->a_res.lock.fl.fl_start;
+ fl->fl_type = req->a_res.lock.fl.fl_type;
+ fl->fl_pid = 0;
+ break;
+ default:
+ status = nlm_stat_to_errno(req->a_res.status);
}
-
- return 0;
+out:
+ nlm_release_call(req);
+ return status;
}
static void nlmclnt_locks_copy_lock(struct file_lock *new, struct file_lock *fl)
{
- memcpy(&new->fl_u.nfs_fl, &fl->fl_u.nfs_fl, sizeof(new->fl_u.nfs_fl));
- nlm_get_lockowner(new->fl_u.nfs_fl.owner);
+ new->fl_u.nfs_fl.state = fl->fl_u.nfs_fl.state;
+ new->fl_u.nfs_fl.owner = nlm_get_lockowner(fl->fl_u.nfs_fl.owner);
+ list_add_tail(&new->fl_u.nfs_fl.list, &fl->fl_u.nfs_fl.owner->host->h_granted);
}
static void nlmclnt_locks_release_private(struct file_lock *fl)
{
+ list_del(&fl->fl_u.nfs_fl.list);
nlm_put_lockowner(fl->fl_u.nfs_fl.owner);
- fl->fl_ops = NULL;
}
static struct file_lock_operations nlmclnt_lock_ops = {
@@ -504,8 +450,8 @@ static void nlmclnt_locks_init_private(struct file_lock *fl, struct nlm_host *ho
{
BUG_ON(fl->fl_ops != NULL);
fl->fl_u.nfs_fl.state = 0;
- fl->fl_u.nfs_fl.flags = 0;
fl->fl_u.nfs_fl.owner = nlm_find_lockowner(host, fl->fl_owner);
+ INIT_LIST_HEAD(&fl->fl_u.nfs_fl.list);
fl->fl_ops = &nlmclnt_lock_ops;
}
@@ -552,57 +498,52 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
{
struct nlm_host *host = req->a_host;
struct nlm_res *resp = &req->a_res;
- long timeout;
- int status;
+ struct nlm_wait *block = NULL;
+ int status = -ENOLCK;
if (!host->h_monitored && nsm_monitor(host) < 0) {
printk(KERN_NOTICE "lockd: failed to monitor %s\n",
host->h_name);
- status = -ENOLCK;
goto out;
}
- if (req->a_args.block) {
- status = nlmclnt_prepare_block(req, host, fl);
- if (status < 0)
- goto out;
- }
+ block = nlmclnt_prepare_block(host, fl);
for(;;) {
status = nlmclnt_call(req, NLMPROC_LOCK);
if (status < 0)
goto out_unblock;
- if (resp->status != NLM_LCK_BLOCKED)
+ if (!req->a_args.block)
break;
- /* Wait on an NLM blocking lock */
- timeout = nlmclnt_block(req, NLMCLNT_POLL_TIMEOUT);
/* Did a reclaimer thread notify us of a server reboot? */
if (resp->status == NLM_LCK_DENIED_GRACE_PERIOD)
continue;
if (resp->status != NLM_LCK_BLOCKED)
break;
- if (timeout >= 0)
- continue;
- /* We were interrupted. Send a CANCEL request to the server
+ /* Wait on an NLM blocking lock */
+ status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT);
+ /* if we were interrupted. Send a CANCEL request to the server
* and exit
*/
- status = (int)timeout;
- goto out_unblock;
+ if (status < 0)
+ goto out_unblock;
+ if (resp->status != NLM_LCK_BLOCKED)
+ break;
}
if (resp->status == NLM_LCK_GRANTED) {
fl->fl_u.nfs_fl.state = host->h_state;
- fl->fl_u.nfs_fl.flags |= NFS_LCK_GRANTED;
fl->fl_flags |= FL_SLEEP;
+ /* Ensure the resulting lock will get added to granted list */
do_vfs_lock(fl);
}
status = nlm_stat_to_errno(resp->status);
out_unblock:
- nlmclnt_finish_block(req);
+ nlmclnt_finish_block(block);
/* Cancel the blocked request if it is still pending */
if (resp->status == NLM_LCK_BLOCKED)
nlmclnt_cancel(host, req->a_args.block, fl);
out:
- nlmclnt_release_lockargs(req);
+ nlm_release_call(req);
return status;
}
@@ -658,10 +599,6 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
struct nlm_res *resp = &req->a_res;
int status;
- /* Clean the GRANTED flag now so the lock doesn't get
- * reclaimed while we're stuck in the unlock call. */
- fl->fl_u.nfs_fl.flags &= ~NFS_LCK_GRANTED;
-
/*
* Note: the server is supposed to either grant us the unlock
* request, or to deny it with NLM_LCK_DENIED_GRACE_PERIOD. In either
@@ -669,32 +606,24 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
*/
do_vfs_lock(fl);
- if (req->a_flags & RPC_TASK_ASYNC) {
- status = nlmclnt_async_call(req, NLMPROC_UNLOCK,
- &nlmclnt_unlock_ops);
- /* Hrmf... Do the unlock early since locks_remove_posix()
- * really expects us to free the lock synchronously */
- if (status < 0) {
- nlmclnt_release_lockargs(req);
- kfree(req);
- }
- return status;
- }
+ if (req->a_flags & RPC_TASK_ASYNC)
+ return nlm_async_call(req, NLMPROC_UNLOCK, &nlmclnt_unlock_ops);
status = nlmclnt_call(req, NLMPROC_UNLOCK);
- nlmclnt_release_lockargs(req);
if (status < 0)
- return status;
+ goto out;
+ status = 0;
if (resp->status == NLM_LCK_GRANTED)
- return 0;
+ goto out;
if (resp->status != NLM_LCK_DENIED_NOLOCKS)
printk("lockd: unexpected unlock status: %d\n", resp->status);
-
/* What to do now? I'm out of my depth... */
-
- return -ENOLCK;
+ status = -ENOLCK;
+out:
+ nlm_release_call(req);
+ return status;
}
static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
@@ -716,9 +645,6 @@ static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
if (status != NLM_LCK_GRANTED)
printk(KERN_WARNING "lockd: unexpected unlock status: %d\n", status);
die:
- nlm_release_host(req->a_host);
- nlmclnt_release_lockargs(req);
- kfree(req);
return;
retry_rebind:
nlm_rebind_host(req->a_host);
@@ -728,6 +654,7 @@ die:
static const struct rpc_call_ops nlmclnt_unlock_ops = {
.rpc_call_done = nlmclnt_unlock_callback,
+ .rpc_release = nlmclnt_rpc_release,
};
/*
@@ -749,20 +676,15 @@ static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl
recalc_sigpending();
spin_unlock_irqrestore(&current->sighand->siglock, flags);
- req = nlmclnt_alloc_call();
+ req = nlm_alloc_call(nlm_get_host(host));
if (!req)
return -ENOMEM;
- req->a_host = host;
req->a_flags = RPC_TASK_ASYNC;
nlmclnt_setlockargs(req, fl);
req->a_args.block = block;
- status = nlmclnt_async_call(req, NLMPROC_CANCEL, &nlmclnt_cancel_ops);
- if (status < 0) {
- nlmclnt_release_lockargs(req);
- kfree(req);
- }
+ status = nlm_async_call(req, NLMPROC_CANCEL, &nlmclnt_cancel_ops);
spin_lock_irqsave(&current->sighand->siglock, flags);
current->blocked = oldset;
@@ -791,6 +713,7 @@ static void nlmclnt_cancel_callback(struct rpc_task *task, void *data)
switch (req->a_res.status) {
case NLM_LCK_GRANTED:
case NLM_LCK_DENIED_GRACE_PERIOD:
+ case NLM_LCK_DENIED:
/* Everything's good */
break;
case NLM_LCK_DENIED_NOLOCKS:
@@ -802,9 +725,6 @@ static void nlmclnt_cancel_callback(struct rpc_task *task, void *data)
}
die:
- nlm_release_host(req->a_host);
- nlmclnt_release_lockargs(req);
- kfree(req);
return;
retry_cancel:
@@ -818,6 +738,7 @@ retry_cancel:
static const struct rpc_call_ops nlmclnt_cancel_ops = {
.rpc_call_done = nlmclnt_cancel_callback,
+ .rpc_release = nlmclnt_rpc_release,
};
/*
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 82f7a0b1d8a..112ebf8b8df 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -123,6 +123,8 @@ nlm_lookup_host(int server, struct sockaddr_in *sin,
nlm_hosts[hash] = host;
INIT_LIST_HEAD(&host->h_lockowners);
spin_lock_init(&host->h_lock);
+ INIT_LIST_HEAD(&host->h_granted);
+ INIT_LIST_HEAD(&host->h_reclaim);
if (++nrhosts > NLM_HOST_MAX)
next_gc = 0;
@@ -191,11 +193,12 @@ nlm_bind_host(struct nlm_host *host)
xprt->resvport = 1; /* NLM requires a reserved port */
/* Existing NLM servers accept AUTH_UNIX only */
- clnt = rpc_create_client(xprt, host->h_name, &nlm_program,
+ clnt = rpc_new_client(xprt, host->h_name, &nlm_program,
host->h_version, RPC_AUTH_UNIX);
if (IS_ERR(clnt))
goto forgetit;
clnt->cl_autobind = 1; /* turn on pmap queries */
+ clnt->cl_softrtry = 1; /* All queries are soft */
host->h_rpcclnt = clnt;
}
@@ -242,8 +245,12 @@ void nlm_release_host(struct nlm_host *host)
{
if (host != NULL) {
dprintk("lockd: release host %s\n", host->h_name);
- atomic_dec(&host->h_count);
BUG_ON(atomic_read(&host->h_count) < 0);
+ if (atomic_dec_and_test(&host->h_count)) {
+ BUG_ON(!list_empty(&host->h_lockowners));
+ BUG_ON(!list_empty(&host->h_granted));
+ BUG_ON(!list_empty(&host->h_reclaim));
+ }
}
}
@@ -331,7 +338,6 @@ nlm_gc_hosts(void)
rpc_destroy_client(host->h_rpcclnt);
}
}
- BUG_ON(!list_empty(&host->h_lockowners));
kfree(host);
nrhosts--;
}
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index a89cb8aa2c8..3fc683f46b3 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -35,6 +35,10 @@ nsm_mon_unmon(struct nlm_host *host, u32 proc, struct nsm_res *res)
struct rpc_clnt *clnt;
int status;
struct nsm_args args;
+ struct rpc_message msg = {
+ .rpc_argp = &args,
+ .rpc_resp = res,
+ };
clnt = nsm_create();
if (IS_ERR(clnt)) {
@@ -49,7 +53,8 @@ nsm_mon_unmon(struct nlm_host *host, u32 proc, struct nsm_res *res)
args.proc = NLMPROC_NSM_NOTIFY;
memset(res, 0, sizeof(*res));
- status = rpc_call(clnt, proc, &args, res, 0);
+ msg.rpc_proc = &clnt->cl_procinfo[proc];
+ status = rpc_call_sync(clnt, &msg, 0);
if (status < 0)
printk(KERN_DEBUG "nsm_mon_unmon: rpc failed, status=%d\n",
status);
@@ -214,12 +219,16 @@ static struct rpc_procinfo nsm_procedures[] = {
.p_encode = (kxdrproc_t) xdr_encode_mon,
.p_decode = (kxdrproc_t) xdr_decode_stat_res,
.p_bufsiz = MAX(SM_mon_sz, SM_monres_sz) << 2,
+ .p_statidx = SM_MON,
+ .p_name = "MONITOR",
},
[SM_UNMON] = {
.p_proc = SM_UNMON,
.p_encode = (kxdrproc_t) xdr_encode_unmon,
.p_decode = (kxdrproc_t) xdr_decode_stat,
.p_bufsiz = MAX(SM_mon_id_sz, SM_unmonres_sz) << 2,
+ .p_statidx = SM_UNMON,
+ .p_name = "UNMONITOR",
},
};
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index b10f913aa06..a2dd9ccb9b3 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -21,10 +21,6 @@
#define NLMDBG_FACILITY NLMDBG_CLIENT
-static u32 nlm4svc_callback(struct svc_rqst *, u32, struct nlm_res *);
-
-static const struct rpc_call_ops nlm4svc_callback_ops;
-
/*
* Obtain client and file from arguments
*/
@@ -234,83 +230,89 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
}
/*
+ * This is the generic lockd callback for async RPC calls
+ */
+static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
+{
+ dprintk("lockd: %4d callback returned %d\n", task->tk_pid,
+ -task->tk_status);
+}
+
+static void nlm4svc_callback_release(void *data)
+{
+ nlm_release_call(data);
+}
+
+static const struct rpc_call_ops nlm4svc_callback_ops = {
+ .rpc_call_done = nlm4svc_callback_exit,
+ .rpc_release = nlm4svc_callback_release,
+};
+
+/*
* `Async' versions of the above service routines. They aren't really,
* because we send the callback before the reply proper. I hope this
* doesn't break any clients.
*/
-static int
-nlm4svc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
- void *resp)
+static int nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
+ int (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res *))
{
- struct nlm_res res;
- u32 stat;
+ struct nlm_host *host;
+ struct nlm_rqst *call;
+ int stat;
- dprintk("lockd: TEST_MSG called\n");
- memset(&res, 0, sizeof(res));
+ host = nlmsvc_lookup_host(rqstp);
+ if (host == NULL)
+ return rpc_system_err;
+
+ call = nlm_alloc_call(host);
+ if (call == NULL)
+ return rpc_system_err;
- if ((stat = nlm4svc_proc_test(rqstp, argp, &res)) == 0)
- stat = nlm4svc_callback(rqstp, NLMPROC_TEST_RES, &res);
- return stat;
+ stat = func(rqstp, argp, &call->a_res);
+ if (stat != 0) {
+ nlm_release_call(call);
+ return stat;
+ }
+
+ call->a_flags = RPC_TASK_ASYNC;
+ if (nlm_async_reply(call, proc, &nlm4svc_callback_ops) < 0)
+ return rpc_system_err;
+ return rpc_success;
}
-static int
-nlm4svc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlm4svc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
void *resp)
{
- struct nlm_res res;
- u32 stat;
+ dprintk("lockd: TEST_MSG called\n");
+ return nlm4svc_callback(rqstp, NLMPROC_TEST_RES, argp, nlm4svc_proc_test);
+}
+static int nlm4svc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+ void *resp)
+{
dprintk("lockd: LOCK_MSG called\n");
- memset(&res, 0, sizeof(res));
-
- if ((stat = nlm4svc_proc_lock(rqstp, argp, &res)) == 0)
- stat = nlm4svc_callback(rqstp, NLMPROC_LOCK_RES, &res);
- return stat;
+ return nlm4svc_callback(rqstp, NLMPROC_LOCK_RES, argp, nlm4svc_proc_lock);
}
-static int
-nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
void *resp)
{
- struct nlm_res res;
- u32 stat;
-
dprintk("lockd: CANCEL_MSG called\n");
- memset(&res, 0, sizeof(res));
-
- if ((stat = nlm4svc_proc_cancel(rqstp, argp, &res)) == 0)
- stat = nlm4svc_callback(rqstp, NLMPROC_CANCEL_RES, &res);
- return stat;
+ return nlm4svc_callback(rqstp, NLMPROC_CANCEL_RES, argp, nlm4svc_proc_cancel);
}
-static int
-nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
void *resp)
{
- struct nlm_res res;
- u32 stat;
-
dprintk("lockd: UNLOCK_MSG called\n");
- memset(&res, 0, sizeof(res));
-
- if ((stat = nlm4svc_proc_unlock(rqstp, argp, &res)) == 0)
- stat = nlm4svc_callback(rqstp, NLMPROC_UNLOCK_RES, &res);
- return stat;
+ return nlm4svc_callback(rqstp, NLMPROC_UNLOCK_RES, argp, nlm4svc_proc_unlock);
}
-static int
-nlm4svc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlm4svc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
void *resp)
{
- struct nlm_res res;
- u32 stat;
-
dprintk("lockd: GRANTED_MSG called\n");
- memset(&res, 0, sizeof(res));
-
- if ((stat = nlm4svc_proc_granted(rqstp, argp, &res)) == 0)
- stat = nlm4svc_callback(rqstp, NLMPROC_GRANTED_RES, &res);
- return stat;
+ return nlm4svc_callback(rqstp, NLMPROC_GRANTED_RES, argp, nlm4svc_proc_granted);
}
/*
@@ -472,55 +474,6 @@ nlm4svc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res *argp,
/*
- * This is the generic lockd callback for async RPC calls
- */
-static u32
-nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp)
-{
- struct nlm_host *host;
- struct nlm_rqst *call;
-
- if (!(call = nlmclnt_alloc_call()))
- return rpc_system_err;
-
- host = nlmclnt_lookup_host(&rqstp->rq_addr,
- rqstp->rq_prot, rqstp->rq_vers);
- if (!host) {
- kfree(call);
- return rpc_system_err;
- }
-
- call->a_flags = RPC_TASK_ASYNC;
- call->a_host = host;
- memcpy(&call->a_args, resp, sizeof(*resp));
-
- if (nlmsvc_async_call(call, proc, &nlm4svc_callback_ops) < 0)
- goto error;
-
- return rpc_success;
- error:
- kfree(call);
- nlm_release_host(host);
- return rpc_system_err;
-}
-
-static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
-{
- struct nlm_rqst *call = data;
-
- if (task->tk_status < 0) {
- dprintk("lockd: %4d callback failed (errno = %d)\n",
- task->tk_pid, -task->tk_status);
- }
- nlm_release_host(call->a_host);
- kfree(call);
-}
-
-static const struct rpc_call_ops nlm4svc_callback_ops = {
- .rpc_call_done = nlm4svc_callback_exit,
-};
-
-/*
* NLM Server procedures.
*/
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 9cfced65d4a..d2b66bad7d5 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -39,9 +39,12 @@
#define nlm_deadlock nlm_lck_denied
#endif
+static void nlmsvc_release_block(struct nlm_block *block);
static void nlmsvc_insert_block(struct nlm_block *block, unsigned long);
static int nlmsvc_remove_block(struct nlm_block *block);
+static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock);
+static void nlmsvc_freegrantargs(struct nlm_rqst *call);
static const struct rpc_call_ops nlmsvc_grant_ops;
/*
@@ -58,6 +61,7 @@ nlmsvc_insert_block(struct nlm_block *block, unsigned long when)
struct nlm_block **bp, *b;
dprintk("lockd: nlmsvc_insert_block(%p, %ld)\n", block, when);
+ kref_get(&block->b_count);
if (block->b_queued)
nlmsvc_remove_block(block);
bp = &nlm_blocked;
@@ -90,6 +94,7 @@ nlmsvc_remove_block(struct nlm_block *block)
if (b == block) {
*bp = block->b_next;
block->b_queued = 0;
+ nlmsvc_release_block(block);
return 1;
}
}
@@ -98,11 +103,10 @@ nlmsvc_remove_block(struct nlm_block *block)
}
/*
- * Find a block for a given lock and optionally remove it from
- * the list.
+ * Find a block for a given lock
*/
static struct nlm_block *
-nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock, int remove)
+nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock)
{
struct nlm_block **head, *block;
struct file_lock *fl;
@@ -112,17 +116,14 @@ nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock, int remove)
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end, lock->fl.fl_type);
for (head = &nlm_blocked; (block = *head) != 0; head = &block->b_next) {
- fl = &block->b_call.a_args.lock.fl;
+ fl = &block->b_call->a_args.lock.fl;
dprintk("lockd: check f=%p pd=%d %Ld-%Ld ty=%d cookie=%s\n",
block->b_file, fl->fl_pid,
(long long)fl->fl_start,
(long long)fl->fl_end, fl->fl_type,
- nlmdbg_cookie2a(&block->b_call.a_args.cookie));
+ nlmdbg_cookie2a(&block->b_call->a_args.cookie));
if (block->b_file == file && nlm_compare_locks(fl, &lock->fl)) {
- if (remove) {
- *head = block->b_next;
- block->b_queued = 0;
- }
+ kref_get(&block->b_count);
return block;
}
}
@@ -150,11 +151,13 @@ nlmsvc_find_block(struct nlm_cookie *cookie, struct sockaddr_in *sin)
for (block = nlm_blocked; block; block = block->b_next) {
dprintk("cookie: head of blocked queue %p, block %p\n",
nlm_blocked, block);
- if (nlm_cookie_match(&block->b_call.a_args.cookie,cookie)
+ if (nlm_cookie_match(&block->b_call->a_args.cookie,cookie)
&& nlm_cmp_addr(sin, &block->b_host->h_addr))
break;
}
+ if (block != NULL)
+ kref_get(&block->b_count);
return block;
}
@@ -174,27 +177,30 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
{
struct nlm_block *block;
struct nlm_host *host;
- struct nlm_rqst *call;
+ struct nlm_rqst *call = NULL;
/* Create host handle for callback */
- host = nlmclnt_lookup_host(&rqstp->rq_addr,
- rqstp->rq_prot, rqstp->rq_vers);
+ host = nlmsvc_lookup_host(rqstp);
if (host == NULL)
return NULL;
+ call = nlm_alloc_call(host);
+ if (call == NULL)
+ return NULL;
+
/* Allocate memory for block, and initialize arguments */
- if (!(block = (struct nlm_block *) kmalloc(sizeof(*block), GFP_KERNEL)))
+ block = kzalloc(sizeof(*block), GFP_KERNEL);
+ if (block == NULL)
goto failed;
- memset(block, 0, sizeof(*block));
- locks_init_lock(&block->b_call.a_args.lock.fl);
- locks_init_lock(&block->b_call.a_res.lock.fl);
+ kref_init(&block->b_count);
- if (!nlmclnt_setgrantargs(&block->b_call, lock))
+ if (!nlmsvc_setgrantargs(call, lock))
goto failed_free;
/* Set notifier function for VFS, and init args */
- block->b_call.a_args.lock.fl.fl_lmops = &nlmsvc_lock_operations;
- block->b_call.a_args.cookie = *cookie; /* see above */
+ call->a_args.lock.fl.fl_flags |= FL_SLEEP;
+ call->a_args.lock.fl.fl_lmops = &nlmsvc_lock_operations;
+ call->a_args.cookie = *cookie; /* see above */
dprintk("lockd: created block %p...\n", block);
@@ -202,22 +208,23 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
block->b_daemon = rqstp->rq_server;
block->b_host = host;
block->b_file = file;
+ file->f_count++;
/* Add to file's list of blocks */
block->b_fnext = file->f_blocks;
file->f_blocks = block;
/* Set up RPC arguments for callback */
- call = &block->b_call;
- call->a_host = host;
+ block->b_call = call;
call->a_flags = RPC_TASK_ASYNC;
+ call->a_block = block;
return block;
failed_free:
kfree(block);
failed:
- nlm_release_host(host);
+ nlm_release_call(call);
return NULL;
}
@@ -227,29 +234,26 @@ failed:
* It is the caller's responsibility to check whether the file
* can be closed hereafter.
*/
-static int
-nlmsvc_delete_block(struct nlm_block *block, int unlock)
+static int nlmsvc_unlink_block(struct nlm_block *block)
{
- struct file_lock *fl = &block->b_call.a_args.lock.fl;
- struct nlm_file *file = block->b_file;
- struct nlm_block **bp;
- int status = 0;
-
- dprintk("lockd: deleting block %p...\n", block);
+ int status;
+ dprintk("lockd: unlinking block %p...\n", block);
/* Remove block from list */
+ status = posix_unblock_lock(block->b_file->f_file, &block->b_call->a_args.lock.fl);
nlmsvc_remove_block(block);
- if (unlock)
- status = posix_unblock_lock(file->f_file, fl);
+ return status;
+}
- /* If the block is in the middle of a GRANT callback,
- * don't kill it yet. */
- if (block->b_incall) {
- nlmsvc_insert_block(block, NLM_NEVER);
- block->b_done = 1;
- return status;
- }
+static void nlmsvc_free_block(struct kref *kref)
+{
+ struct nlm_block *block = container_of(kref, struct nlm_block, b_count);
+ struct nlm_file *file = block->b_file;
+ struct nlm_block **bp;
+ dprintk("lockd: freeing block %p...\n", block);
+
+ down(&file->f_sema);
/* Remove block from file's list of blocks */
for (bp = &file->f_blocks; *bp; bp = &(*bp)->b_fnext) {
if (*bp == block) {
@@ -257,36 +261,93 @@ nlmsvc_delete_block(struct nlm_block *block, int unlock)
break;
}
}
+ up(&file->f_sema);
- if (block->b_host)
- nlm_release_host(block->b_host);
- nlmclnt_freegrantargs(&block->b_call);
+ nlmsvc_freegrantargs(block->b_call);
+ nlm_release_call(block->b_call);
+ nlm_release_file(block->b_file);
kfree(block);
- return status;
+}
+
+static void nlmsvc_release_block(struct nlm_block *block)
+{
+ if (block != NULL)
+ kref_put(&block->b_count, nlmsvc_free_block);
+}
+
+static void nlmsvc_act_mark(struct nlm_host *host, struct nlm_file *file)
+{
+ struct nlm_block *block;
+
+ down(&file->f_sema);
+ for (block = file->f_blocks; block != NULL; block = block->b_fnext)
+ block->b_host->h_inuse = 1;
+ up(&file->f_sema);
+}
+
+static void nlmsvc_act_unlock(struct nlm_host *host, struct nlm_file *file)
+{
+ struct nlm_block *block;
+
+restart:
+ down(&file->f_sema);
+ for (block = file->f_blocks; block != NULL; block = block->b_fnext) {
+ if (host != NULL && host != block->b_host)
+ continue;
+ if (!block->b_queued)
+ continue;
+ kref_get(&block->b_count);
+ up(&file->f_sema);
+ nlmsvc_unlink_block(block);
+ nlmsvc_release_block(block);
+ goto restart;
+ }
+ up(&file->f_sema);
}
/*
* Loop over all blocks and perform the action specified.
* (NLM_ACT_CHECK handled by nlmsvc_inspect_file).
*/
-int
+void
nlmsvc_traverse_blocks(struct nlm_host *host, struct nlm_file *file, int action)
{
- struct nlm_block *block, *next;
- /* XXX: Will everything get cleaned up if we don't unlock here? */
+ if (action == NLM_ACT_MARK)
+ nlmsvc_act_mark(host, file);
+ else
+ nlmsvc_act_unlock(host, file);
+}
- down(&file->f_sema);
- for (block = file->f_blocks; block; block = next) {
- next = block->b_fnext;
- if (action == NLM_ACT_MARK)
- block->b_host->h_inuse = 1;
- else if (action == NLM_ACT_UNLOCK) {
- if (host == NULL || host == block->b_host)
- nlmsvc_delete_block(block, 1);
- }
+/*
+ * Initialize arguments for GRANTED call. The nlm_rqst structure
+ * has been cleared already.
+ */
+static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock)
+{
+ locks_copy_lock(&call->a_args.lock.fl, &lock->fl);
+ memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh));
+ call->a_args.lock.caller = system_utsname.nodename;
+ call->a_args.lock.oh.len = lock->oh.len;
+
+ /* set default data area */
+ call->a_args.lock.oh.data = call->a_owner;
+ call->a_args.lock.svid = lock->fl.fl_pid;
+
+ if (lock->oh.len > NLMCLNT_OHSIZE) {
+ void *data = kmalloc(lock->oh.len, GFP_KERNEL);
+ if (!data)
+ return 0;
+ call->a_args.lock.oh.data = (u8 *) data;
}
- up(&file->f_sema);
- return 0;
+
+ memcpy(call->a_args.lock.oh.data, lock->oh.data, lock->oh.len);
+ return 1;
+}
+
+static void nlmsvc_freegrantargs(struct nlm_rqst *call)
+{
+ if (call->a_args.lock.oh.data != call->a_owner)
+ kfree(call->a_args.lock.oh.data);
}
/*
@@ -297,9 +358,9 @@ u32
nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
struct nlm_lock *lock, int wait, struct nlm_cookie *cookie)
{
- struct file_lock *conflock;
- struct nlm_block *block;
+ struct nlm_block *block, *newblock = NULL;
int error;
+ u32 ret;
dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n",
file->f_file->f_dentry->d_inode->i_sb->s_id,
@@ -310,69 +371,65 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
wait);
- /* Get existing block (in case client is busy-waiting) */
- block = nlmsvc_lookup_block(file, lock, 0);
-
- lock->fl.fl_flags |= FL_LOCKD;
-
+ lock->fl.fl_flags &= ~FL_SLEEP;
again:
/* Lock file against concurrent access */
down(&file->f_sema);
+ /* Get existing block (in case client is busy-waiting) */
+ block = nlmsvc_lookup_block(file, lock);
+ if (block == NULL) {
+ if (newblock != NULL)
+ lock = &newblock->b_call->a_args.lock;
+ } else
+ lock = &block->b_call->a_args.lock;
- if (!(conflock = posix_test_lock(file->f_file, &lock->fl))) {
- error = posix_lock_file(file->f_file, &lock->fl);
+ error = posix_lock_file(file->f_file, &lock->fl);
+ lock->fl.fl_flags &= ~FL_SLEEP;
- if (block)
- nlmsvc_delete_block(block, 0);
- up(&file->f_sema);
+ dprintk("lockd: posix_lock_file returned %d\n", error);
- dprintk("lockd: posix_lock_file returned %d\n", -error);
- switch(-error) {
+ switch(error) {
case 0:
- return nlm_granted;
- case EDEADLK:
- return nlm_deadlock;
- case EAGAIN:
- return nlm_lck_denied;
+ ret = nlm_granted;
+ goto out;
+ case -EAGAIN:
+ break;
+ case -EDEADLK:
+ ret = nlm_deadlock;
+ goto out;
default: /* includes ENOLCK */
- return nlm_lck_denied_nolocks;
- }
+ ret = nlm_lck_denied_nolocks;
+ goto out;
}
- if (!wait) {
- up(&file->f_sema);
- return nlm_lck_denied;
- }
+ ret = nlm_lck_denied;
+ if (!wait)
+ goto out;
- if (posix_locks_deadlock(&lock->fl, conflock)) {
- up(&file->f_sema);
- return nlm_deadlock;
- }
+ ret = nlm_lck_blocked;
+ if (block != NULL)
+ goto out;
/* If we don't have a block, create and initialize it. Then
* retry because we may have slept in kmalloc. */
/* We have to release f_sema as nlmsvc_create_block may try to
* to claim it while doing host garbage collection */
- if (block == NULL) {
+ if (newblock == NULL) {
up(&file->f_sema);
dprintk("lockd: blocking on this lock (allocating).\n");
- if (!(block = nlmsvc_create_block(rqstp, file, lock, cookie)))
+ if (!(newblock = nlmsvc_create_block(rqstp, file, lock, cookie)))
return nlm_lck_denied_nolocks;
goto again;
}
/* Append to list of blocked */
- nlmsvc_insert_block(block, NLM_NEVER);
-
- if (list_empty(&block->b_call.a_args.lock.fl.fl_block)) {
- /* Now add block to block list of the conflicting lock
- if we haven't done so. */
- dprintk("lockd: blocking on this lock.\n");
- posix_block_lock(conflock, &block->b_call.a_args.lock.fl);
- }
-
+ nlmsvc_insert_block(newblock, NLM_NEVER);
+out:
up(&file->f_sema);
- return nlm_lck_blocked;
+ nlmsvc_release_block(newblock);
+ nlmsvc_release_block(block);
+ dprintk("lockd: nlmsvc_lock returned %u\n", ret);
+ return ret;
}
/*
@@ -382,8 +439,6 @@ u32
nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock,
struct nlm_lock *conflock)
{
- struct file_lock *fl;
-
dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
file->f_file->f_dentry->d_inode->i_sb->s_id,
file->f_file->f_dentry->d_inode->i_ino,
@@ -391,13 +446,14 @@ nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end);
- if ((fl = posix_test_lock(file->f_file, &lock->fl)) != NULL) {
+ if (posix_test_lock(file->f_file, &lock->fl, &conflock->fl)) {
dprintk("lockd: conflicting lock(ty=%d, %Ld-%Ld)\n",
- fl->fl_type, (long long)fl->fl_start,
- (long long)fl->fl_end);
+ conflock->fl.fl_type,
+ (long long)conflock->fl.fl_start,
+ (long long)conflock->fl.fl_end);
conflock->caller = "somehost"; /* FIXME */
conflock->oh.len = 0; /* don't return OH info */
- conflock->fl = *fl;
+ conflock->svid = conflock->fl.fl_pid;
return nlm_lck_denied;
}
@@ -453,9 +509,12 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock)
(long long)lock->fl.fl_end);
down(&file->f_sema);
- if ((block = nlmsvc_lookup_block(file, lock, 1)) != NULL)
- status = nlmsvc_delete_block(block, 1);
+ block = nlmsvc_lookup_block(file, lock);
up(&file->f_sema);
+ if (block != NULL) {
+ status = nlmsvc_unlink_block(block);
+ nlmsvc_release_block(block);
+ }
return status ? nlm_lck_denied : nlm_granted;
}
@@ -473,7 +532,7 @@ nlmsvc_notify_blocked(struct file_lock *fl)
dprintk("lockd: VFS unblock notification for block %p\n", fl);
for (bp = &nlm_blocked; (block = *bp) != 0; bp = &block->b_next) {
- if (nlm_compare_locks(&block->b_call.a_args.lock.fl, fl)) {
+ if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) {
nlmsvc_insert_block(block, 0);
svc_wake_up(block->b_daemon);
return;
@@ -508,17 +567,13 @@ static void
nlmsvc_grant_blocked(struct nlm_block *block)
{
struct nlm_file *file = block->b_file;
- struct nlm_lock *lock = &block->b_call.a_args.lock;
- struct file_lock *conflock;
+ struct nlm_lock *lock = &block->b_call->a_args.lock;
int error;
dprintk("lockd: grant blocked lock %p\n", block);
- /* First thing is lock the file */
- down(&file->f_sema);
-
/* Unlink block request from list */
- nlmsvc_remove_block(block);
+ nlmsvc_unlink_block(block);
/* If b_granted is true this means we've been here before.
* Just retry the grant callback, possibly refreshing the RPC
@@ -529,24 +584,21 @@ nlmsvc_grant_blocked(struct nlm_block *block)
}
/* Try the lock operation again */
- if ((conflock = posix_test_lock(file->f_file, &lock->fl)) != NULL) {
- /* Bummer, we blocked again */
+ lock->fl.fl_flags |= FL_SLEEP;
+ error = posix_lock_file(file->f_file, &lock->fl);
+ lock->fl.fl_flags &= ~FL_SLEEP;
+
+ switch (error) {
+ case 0:
+ break;
+ case -EAGAIN:
dprintk("lockd: lock still blocked\n");
nlmsvc_insert_block(block, NLM_NEVER);
- posix_block_lock(conflock, &lock->fl);
- up(&file->f_sema);
return;
- }
-
- /* Alright, no conflicting lock. Now lock it for real. If the
- * following yields an error, this is most probably due to low
- * memory. Retry the lock in a few seconds.
- */
- if ((error = posix_lock_file(file->f_file, &lock->fl)) < 0) {
+ default:
printk(KERN_WARNING "lockd: unexpected error %d in %s!\n",
-error, __FUNCTION__);
nlmsvc_insert_block(block, 10 * HZ);
- up(&file->f_sema);
return;
}
@@ -554,17 +606,15 @@ callback:
/* Lock was granted by VFS. */
dprintk("lockd: GRANTing blocked lock.\n");
block->b_granted = 1;
- block->b_incall = 1;
/* Schedule next grant callback in 30 seconds */
nlmsvc_insert_block(block, 30 * HZ);
/* Call the client */
- nlm_get_host(block->b_call.a_host);
- if (nlmsvc_async_call(&block->b_call, NLMPROC_GRANTED_MSG,
+ kref_get(&block->b_count);
+ if (nlm_async_call(block->b_call, NLMPROC_GRANTED_MSG,
&nlmsvc_grant_ops) < 0)
- nlm_release_host(block->b_call.a_host);
- up(&file->f_sema);
+ nlmsvc_release_block(block);
}
/*
@@ -578,20 +628,10 @@ callback:
static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
{
struct nlm_rqst *call = data;
- struct nlm_block *block;
+ struct nlm_block *block = call->a_block;
unsigned long timeout;
- struct sockaddr_in *peer_addr = RPC_PEERADDR(task->tk_client);
dprintk("lockd: GRANT_MSG RPC callback\n");
- dprintk("callback: looking for cookie %s, host (%u.%u.%u.%u)\n",
- nlmdbg_cookie2a(&call->a_args.cookie),
- NIPQUAD(peer_addr->sin_addr.s_addr));
- if (!(block = nlmsvc_find_block(&call->a_args.cookie, peer_addr))) {
- dprintk("lockd: no block for cookie %s, host (%u.%u.%u.%u)\n",
- nlmdbg_cookie2a(&call->a_args.cookie),
- NIPQUAD(peer_addr->sin_addr.s_addr));
- return;
- }
/* Technically, we should down the file semaphore here. Since we
* move the block towards the head of the queue only, no harm
@@ -608,13 +648,18 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
}
nlmsvc_insert_block(block, timeout);
svc_wake_up(block->b_daemon);
- block->b_incall = 0;
+}
- nlm_release_host(call->a_host);
+void nlmsvc_grant_release(void *data)
+{
+ struct nlm_rqst *call = data;
+
+ nlmsvc_release_block(call->a_block);
}
static const struct rpc_call_ops nlmsvc_grant_ops = {
.rpc_call_done = nlmsvc_grant_callback,
+ .rpc_release = nlmsvc_grant_release,
};
/*
@@ -634,25 +679,17 @@ nlmsvc_grant_reply(struct svc_rqst *rqstp, struct nlm_cookie *cookie, u32 status
return;
file = block->b_file;
- file->f_count++;
- down(&file->f_sema);
- block = nlmsvc_find_block(cookie, &rqstp->rq_addr);
if (block) {
if (status == NLM_LCK_DENIED_GRACE_PERIOD) {
/* Try again in a couple of seconds */
nlmsvc_insert_block(block, 10 * HZ);
- up(&file->f_sema);
} else {
/* Lock is now held by client, or has been rejected.
* In both cases, the block should be removed. */
- up(&file->f_sema);
- if (status == NLM_LCK_GRANTED)
- nlmsvc_delete_block(block, 0);
- else
- nlmsvc_delete_block(block, 1);
+ nlmsvc_unlink_block(block);
}
}
- nlm_release_file(file);
+ nlmsvc_release_block(block);
}
/*
@@ -675,10 +712,12 @@ nlmsvc_retry_blocked(void)
break;
dprintk("nlmsvc_retry_blocked(%p, when=%ld, done=%d)\n",
block, block->b_when, block->b_done);
+ kref_get(&block->b_count);
if (block->b_done)
- nlmsvc_delete_block(block, 0);
+ nlmsvc_unlink_block(block);
else
nlmsvc_grant_blocked(block);
+ nlmsvc_release_block(block);
}
if ((block = nlm_blocked) && block->b_when != NLM_NEVER)
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 35681d9cf1f..d210cf304e9 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -22,10 +22,6 @@
#define NLMDBG_FACILITY NLMDBG_CLIENT
-static u32 nlmsvc_callback(struct svc_rqst *, u32, struct nlm_res *);
-
-static const struct rpc_call_ops nlmsvc_callback_ops;
-
#ifdef CONFIG_LOCKD_V4
static u32
cast_to_nlm(u32 status, u32 vers)
@@ -262,83 +258,91 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
}
/*
+ * This is the generic lockd callback for async RPC calls
+ */
+static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
+{
+ dprintk("lockd: %4d callback returned %d\n", task->tk_pid,
+ -task->tk_status);
+}
+
+static void nlmsvc_callback_release(void *data)
+{
+ nlm_release_call(data);
+}
+
+static const struct rpc_call_ops nlmsvc_callback_ops = {
+ .rpc_call_done = nlmsvc_callback_exit,
+ .rpc_release = nlmsvc_callback_release,
+};
+
+/*
* `Async' versions of the above service routines. They aren't really,
* because we send the callback before the reply proper. I hope this
* doesn't break any clients.
*/
-static int
-nlmsvc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
- void *resp)
+static int nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
+ int (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res *))
{
- struct nlm_res res;
- u32 stat;
+ struct nlm_host *host;
+ struct nlm_rqst *call;
+ int stat;
- dprintk("lockd: TEST_MSG called\n");
- memset(&res, 0, sizeof(res));
+ host = nlmsvc_lookup_host(rqstp);
+ if (host == NULL)
+ return rpc_system_err;
- if ((stat = nlmsvc_proc_test(rqstp, argp, &res)) == 0)
- stat = nlmsvc_callback(rqstp, NLMPROC_TEST_RES, &res);
- return stat;
+ call = nlm_alloc_call(host);
+ if (call == NULL)
+ return rpc_system_err;
+
+ stat = func(rqstp, argp, &call->a_res);
+ if (stat != 0) {
+ nlm_release_call(call);
+ return stat;
+ }
+
+ call->a_flags = RPC_TASK_ASYNC;
+ if (nlm_async_reply(call, proc, &nlmsvc_callback_ops) < 0)
+ return rpc_system_err;
+ return rpc_success;
}
-static int
-nlmsvc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlmsvc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
void *resp)
{
- struct nlm_res res;
- u32 stat;
+ dprintk("lockd: TEST_MSG called\n");
+ return nlmsvc_callback(rqstp, NLMPROC_TEST_RES, argp, nlmsvc_proc_test);
+}
+static int nlmsvc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+ void *resp)
+{
dprintk("lockd: LOCK_MSG called\n");
- memset(&res, 0, sizeof(res));
-
- if ((stat = nlmsvc_proc_lock(rqstp, argp, &res)) == 0)
- stat = nlmsvc_callback(rqstp, NLMPROC_LOCK_RES, &res);
- return stat;
+ return nlmsvc_callback(rqstp, NLMPROC_LOCK_RES, argp, nlmsvc_proc_lock);
}
-static int
-nlmsvc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlmsvc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
void *resp)
{
- struct nlm_res res;
- u32 stat;
-
dprintk("lockd: CANCEL_MSG called\n");
- memset(&res, 0, sizeof(res));
-
- if ((stat = nlmsvc_proc_cancel(rqstp, argp, &res)) == 0)
- stat = nlmsvc_callback(rqstp, NLMPROC_CANCEL_RES, &res);
- return stat;
+ return nlmsvc_callback(rqstp, NLMPROC_CANCEL_RES, argp, nlmsvc_proc_cancel);
}
static int
nlmsvc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
void *resp)
{
- struct nlm_res res;
- u32 stat;
-
dprintk("lockd: UNLOCK_MSG called\n");
- memset(&res, 0, sizeof(res));
-
- if ((stat = nlmsvc_proc_unlock(rqstp, argp, &res)) == 0)
- stat = nlmsvc_callback(rqstp, NLMPROC_UNLOCK_RES, &res);
- return stat;
+ return nlmsvc_callback(rqstp, NLMPROC_UNLOCK_RES, argp, nlmsvc_proc_unlock);
}
static int
nlmsvc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
void *resp)
{
- struct nlm_res res;
- u32 stat;
-
dprintk("lockd: GRANTED_MSG called\n");
- memset(&res, 0, sizeof(res));
-
- if ((stat = nlmsvc_proc_granted(rqstp, argp, &res)) == 0)
- stat = nlmsvc_callback(rqstp, NLMPROC_GRANTED_RES, &res);
- return stat;
+ return nlmsvc_callback(rqstp, NLMPROC_GRANTED_RES, argp, nlmsvc_proc_granted);
}
/*
@@ -497,55 +501,6 @@ nlmsvc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res *argp,
}
/*
- * This is the generic lockd callback for async RPC calls
- */
-static u32
-nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp)
-{
- struct nlm_host *host;
- struct nlm_rqst *call;
-
- if (!(call = nlmclnt_alloc_call()))
- return rpc_system_err;
-
- host = nlmclnt_lookup_host(&rqstp->rq_addr,
- rqstp->rq_prot, rqstp->rq_vers);
- if (!host) {
- kfree(call);
- return rpc_system_err;
- }
-
- call->a_flags = RPC_TASK_ASYNC;
- call->a_host = host;
- memcpy(&call->a_args, resp, sizeof(*resp));
-
- if (nlmsvc_async_call(call, proc, &nlmsvc_callback_ops) < 0)
- goto error;
-
- return rpc_success;
- error:
- nlm_release_host(host);
- kfree(call);
- return rpc_system_err;
-}
-
-static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
-{
- struct nlm_rqst *call = data;
-
- if (task->tk_status < 0) {
- dprintk("lockd: %4d callback failed (errno = %d)\n",
- task->tk_pid, -task->tk_status);
- }
- nlm_release_host(call->a_host);
- kfree(call);
-}
-
-static const struct rpc_call_ops nlmsvc_callback_ops = {
- .rpc_call_done = nlmsvc_callback_exit,
-};
-
-/*
* NLM Server procedures.
*/
diff --git a/fs/lockd/svcshare.c b/fs/lockd/svcshare.c
index 4943fb7836c..27288c83da9 100644
--- a/fs/lockd/svcshare.c
+++ b/fs/lockd/svcshare.c
@@ -88,7 +88,7 @@ nlmsvc_unshare_file(struct nlm_host *host, struct nlm_file *file,
* Traverse all shares for a given file (and host).
* NLM_ACT_CHECK is handled by nlmsvc_inspect_file.
*/
-int
+void
nlmsvc_traverse_shares(struct nlm_host *host, struct nlm_file *file, int action)
{
struct nlm_share *share, **shpp;
@@ -106,6 +106,4 @@ nlmsvc_traverse_shares(struct nlm_host *host, struct nlm_file *file, int action)
}
shpp = &share->s_next;
}
-
- return 0;
}
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 62f4a385177..c7a6e3ae44d 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -182,7 +182,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, int action)
again:
file->f_locks = 0;
for (fl = inode->i_flock; fl; fl = fl->fl_next) {
- if (!(fl->fl_flags & FL_LOCKD))
+ if (fl->fl_lmops != &nlmsvc_lock_operations)
continue;
/* update current lock count */
@@ -224,9 +224,8 @@ nlm_inspect_file(struct nlm_host *host, struct nlm_file *file, int action)
if (file->f_count || file->f_blocks || file->f_shares)
return 1;
} else {
- if (nlmsvc_traverse_blocks(host, file, action)
- || nlmsvc_traverse_shares(host, file, action))
- return 1;
+ nlmsvc_traverse_blocks(host, file, action);
+ nlmsvc_traverse_shares(host, file, action);
}
return nlm_traverse_locks(host, file, action);
}
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 1d700a4dd0b..f22a3764461 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -131,10 +131,11 @@ nlm_decode_lock(u32 *p, struct nlm_lock *lock)
|| !(p = nlm_decode_fh(p, &lock->fh))
|| !(p = nlm_decode_oh(p, &lock->oh)))
return NULL;
+ lock->svid = ntohl(*p++);
locks_init_lock(fl);
fl->fl_owner = current->files;
- fl->fl_pid = ntohl(*p++);
+ fl->fl_pid = (pid_t)lock->svid;
fl->fl_flags = FL_POSIX;
fl->fl_type = F_RDLCK; /* as good as anything else */
start = ntohl(*p++);
@@ -174,7 +175,7 @@ nlm_encode_lock(u32 *p, struct nlm_lock *lock)
else
len = loff_t_to_s32(fl->fl_end - fl->fl_start + 1);
- *p++ = htonl(fl->fl_pid);
+ *p++ = htonl(lock->svid);
*p++ = htonl(start);
*p++ = htonl(len);
@@ -197,7 +198,7 @@ nlm_encode_testres(u32 *p, struct nlm_res *resp)
struct file_lock *fl = &resp->lock.fl;
*p++ = (fl->fl_type == F_RDLCK)? xdr_zero : xdr_one;
- *p++ = htonl(fl->fl_pid);
+ *p++ = htonl(resp->lock.svid);
/* Encode owner handle. */
if (!(p = xdr_encode_netobj(p, &resp->lock.oh)))
@@ -298,7 +299,8 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
memset(lock, 0, sizeof(*lock));
locks_init_lock(&lock->fl);
- lock->fl.fl_pid = ~(u32) 0;
+ lock->svid = ~(u32) 0;
+ lock->fl.fl_pid = (pid_t)lock->svid;
if (!(p = nlm_decode_cookie(p, &argp->cookie))
|| !(p = xdr_decode_string_inplace(p, &lock->caller,
@@ -415,7 +417,8 @@ nlmclt_decode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
memset(&resp->lock, 0, sizeof(resp->lock));
locks_init_lock(fl);
excl = ntohl(*p++);
- fl->fl_pid = ntohl(*p++);
+ resp->lock.svid = ntohl(*p++);
+ fl->fl_pid = (pid_t)resp->lock.svid;
if (!(p = nlm_decode_oh(p, &resp->lock.oh)))
return -EIO;
@@ -543,7 +546,9 @@ nlmclt_decode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
.p_proc = NLMPROC_##proc, \
.p_encode = (kxdrproc_t) nlmclt_encode_##argtype, \
.p_decode = (kxdrproc_t) nlmclt_decode_##restype, \
- .p_bufsiz = MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2 \
+ .p_bufsiz = MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2, \
+ .p_statidx = NLMPROC_##proc, \
+ .p_name = #proc, \
}
static struct rpc_procinfo nlm_procedures[] = {
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index fdcf105a530..36eb175ec33 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -130,10 +130,11 @@ nlm4_decode_lock(u32 *p, struct nlm_lock *lock)
|| !(p = nlm4_decode_fh(p, &lock->fh))
|| !(p = nlm4_decode_oh(p, &lock->oh)))
return NULL;
+ lock->svid = ntohl(*p++);
locks_init_lock(fl);
fl->fl_owner = current->files;
- fl->fl_pid = ntohl(*p++);
+ fl->fl_pid = (pid_t)lock->svid;
fl->fl_flags = FL_POSIX;
fl->fl_type = F_RDLCK; /* as good as anything else */
p = xdr_decode_hyper(p, &start);
@@ -167,7 +168,7 @@ nlm4_encode_lock(u32 *p, struct nlm_lock *lock)
|| (fl->fl_end > NLM4_OFFSET_MAX && fl->fl_end != OFFSET_MAX))
return NULL;
- *p++ = htonl(fl->fl_pid);
+ *p++ = htonl(lock->svid);
start = loff_t_to_s64(fl->fl_start);
if (fl->fl_end == OFFSET_MAX)
@@ -198,7 +199,7 @@ nlm4_encode_testres(u32 *p, struct nlm_res *resp)
struct file_lock *fl = &resp->lock.fl;
*p++ = (fl->fl_type == F_RDLCK)? xdr_zero : xdr_one;
- *p++ = htonl(fl->fl_pid);
+ *p++ = htonl(resp->lock.svid);
/* Encode owner handle. */
if (!(p = xdr_encode_netobj(p, &resp->lock.oh)))
@@ -212,8 +213,8 @@ nlm4_encode_testres(u32 *p, struct nlm_res *resp)
p = xdr_encode_hyper(p, start);
p = xdr_encode_hyper(p, len);
- dprintk("xdr: encode_testres (status %d pid %d type %d start %Ld end %Ld)\n",
- resp->status, fl->fl_pid, fl->fl_type,
+ dprintk("xdr: encode_testres (status %u pid %d type %d start %Ld end %Ld)\n",
+ resp->status, (int)resp->lock.svid, fl->fl_type,
(long long)fl->fl_start, (long long)fl->fl_end);
}
@@ -303,7 +304,8 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
memset(lock, 0, sizeof(*lock));
locks_init_lock(&lock->fl);
- lock->fl.fl_pid = ~(u32) 0;
+ lock->svid = ~(u32) 0;
+ lock->fl.fl_pid = (pid_t)lock->svid;
if (!(p = nlm4_decode_cookie(p, &argp->cookie))
|| !(p = xdr_decode_string_inplace(p, &lock->caller,
@@ -420,7 +422,8 @@ nlm4clt_decode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
memset(&resp->lock, 0, sizeof(resp->lock));
locks_init_lock(fl);
excl = ntohl(*p++);
- fl->fl_pid = ntohl(*p++);
+ resp->lock.svid = ntohl(*p++);
+ fl->fl_pid = (pid_t)resp->lock.svid;
if (!(p = nlm4_decode_oh(p, &resp->lock.oh)))
return -EIO;
@@ -548,7 +551,9 @@ nlm4clt_decode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
.p_proc = NLMPROC_##proc, \
.p_encode = (kxdrproc_t) nlm4clt_encode_##argtype, \
.p_decode = (kxdrproc_t) nlm4clt_decode_##restype, \
- .p_bufsiz = MAX(NLM4_##argtype##_sz, NLM4_##restype##_sz) << 2 \
+ .p_bufsiz = MAX(NLM4_##argtype##_sz, NLM4_##restype##_sz) << 2, \
+ .p_statidx = NLMPROC_##proc, \
+ .p_name = #proc, \
}
static struct rpc_procinfo nlm4_procedures[] = {
diff --git a/fs/locks.c b/fs/locks.c
index 909eab8fb1d..56f996e98bb 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -139,10 +139,7 @@ int lease_break_time = 45;
#define for_each_lock(inode, lockp) \
for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next)
-LIST_HEAD(file_lock_list);
-
-EXPORT_SYMBOL(file_lock_list);
-
+static LIST_HEAD(file_lock_list);
static LIST_HEAD(blocked_list);
static kmem_cache_t *filelock_cache;
@@ -153,6 +150,21 @@ static struct file_lock *locks_alloc_lock(void)
return kmem_cache_alloc(filelock_cache, SLAB_KERNEL);
}
+static void locks_release_private(struct file_lock *fl)
+{
+ if (fl->fl_ops) {
+ if (fl->fl_ops->fl_release_private)
+ fl->fl_ops->fl_release_private(fl);
+ fl->fl_ops = NULL;
+ }
+ if (fl->fl_lmops) {
+ if (fl->fl_lmops->fl_release_private)
+ fl->fl_lmops->fl_release_private(fl);
+ fl->fl_lmops = NULL;
+ }
+
+}
+
/* Free a lock which is not in use. */
static void locks_free_lock(struct file_lock *fl)
{
@@ -169,18 +181,7 @@ static void locks_free_lock(struct file_lock *fl)
if (!list_empty(&fl->fl_link))
panic("Attempting to free lock on active lock list");
- if (fl->fl_ops) {
- if (fl->fl_ops->fl_release_private)
- fl->fl_ops->fl_release_private(fl);
- fl->fl_ops = NULL;
- }
-
- if (fl->fl_lmops) {
- if (fl->fl_lmops->fl_release_private)
- fl->fl_lmops->fl_release_private(fl);
- fl->fl_lmops = NULL;
- }
-
+ locks_release_private(fl);
kmem_cache_free(filelock_cache, fl);
}
@@ -218,24 +219,46 @@ static void init_once(void *foo, kmem_cache_t *cache, unsigned long flags)
locks_init_lock(lock);
}
+static void locks_copy_private(struct file_lock *new, struct file_lock *fl)
+{
+ if (fl->fl_ops) {
+ if (fl->fl_ops->fl_copy_lock)
+ fl->fl_ops->fl_copy_lock(new, fl);
+ new->fl_ops = fl->fl_ops;
+ }
+ if (fl->fl_lmops) {
+ if (fl->fl_lmops->fl_copy_lock)
+ fl->fl_lmops->fl_copy_lock(new, fl);
+ new->fl_lmops = fl->fl_lmops;
+ }
+}
+
/*
* Initialize a new lock from an existing file_lock structure.
*/
-void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
+static void __locks_copy_lock(struct file_lock *new, const struct file_lock *fl)
{
new->fl_owner = fl->fl_owner;
new->fl_pid = fl->fl_pid;
- new->fl_file = fl->fl_file;
+ new->fl_file = NULL;
new->fl_flags = fl->fl_flags;
new->fl_type = fl->fl_type;
new->fl_start = fl->fl_start;
new->fl_end = fl->fl_end;
+ new->fl_ops = NULL;
+ new->fl_lmops = NULL;
+}
+
+void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
+{
+ locks_release_private(new);
+
+ __locks_copy_lock(new, fl);
+ new->fl_file = fl->fl_file;
new->fl_ops = fl->fl_ops;
new->fl_lmops = fl->fl_lmops;
- if (fl->fl_ops && fl->fl_ops->fl_copy_lock)
- fl->fl_ops->fl_copy_lock(new, fl);
- if (fl->fl_lmops && fl->fl_lmops->fl_copy_lock)
- fl->fl_lmops->fl_copy_lock(new, fl);
+
+ locks_copy_private(new, fl);
}
EXPORT_SYMBOL(locks_copy_lock);
@@ -654,8 +677,9 @@ static int locks_block_on_timeout(struct file_lock *blocker, struct file_lock *w
return result;
}
-struct file_lock *
-posix_test_lock(struct file *filp, struct file_lock *fl)
+int
+posix_test_lock(struct file *filp, struct file_lock *fl,
+ struct file_lock *conflock)
{
struct file_lock *cfl;
@@ -666,9 +690,13 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
if (posix_locks_conflict(cfl, fl))
break;
}
+ if (cfl) {
+ __locks_copy_lock(conflock, cfl);
+ unlock_kernel();
+ return 1;
+ }
unlock_kernel();
-
- return (cfl);
+ return 0;
}
EXPORT_SYMBOL(posix_test_lock);
@@ -904,7 +932,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request)
fl->fl_start = request->fl_start;
fl->fl_end = request->fl_end;
fl->fl_type = request->fl_type;
- fl->fl_u = request->fl_u;
+ locks_release_private(fl);
+ locks_copy_private(fl, request);
request = fl;
added = 1;
}
@@ -1544,7 +1573,7 @@ asmlinkage long sys_flock(unsigned int fd, unsigned int cmd)
*/
int fcntl_getlk(struct file *filp, struct flock __user *l)
{
- struct file_lock *fl, file_lock;
+ struct file_lock *fl, cfl, file_lock;
struct flock flock;
int error;
@@ -1568,7 +1597,7 @@ int fcntl_getlk(struct file *filp, struct flock __user *l)
else
fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
} else {
- fl = posix_test_lock(filp, &file_lock);
+ fl = (posix_test_lock(filp, &file_lock, &cfl) ? &cfl : NULL);
}
flock.l_type = F_UNLCK;
@@ -1698,7 +1727,7 @@ out:
*/
int fcntl_getlk64(struct file *filp, struct flock64 __user *l)
{
- struct file_lock *fl, file_lock;
+ struct file_lock *fl, cfl, file_lock;
struct flock64 flock;
int error;
@@ -1722,7 +1751,7 @@ int fcntl_getlk64(struct file *filp, struct flock64 __user *l)
else
fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
} else {
- fl = posix_test_lock(filp, &file_lock);
+ fl = (posix_test_lock(filp, &file_lock, &cfl) ? &cfl : NULL);
}
flock.l_type = F_UNLCK;
@@ -1936,21 +1965,6 @@ void locks_remove_flock(struct file *filp)
}
/**
- * posix_block_lock - blocks waiting for a file lock
- * @blocker: the lock which is blocking
- * @waiter: the lock which conflicts and has to wait
- *
- * lockd needs to block waiting for locks.
- */
-void
-posix_block_lock(struct file_lock *blocker, struct file_lock *waiter)
-{
- locks_insert_block(blocker, waiter);
-}
-
-EXPORT_SYMBOL(posix_block_lock);
-
-/**
* posix_unblock_lock - stop waiting for a file lock
* @filp: how the file was opened
* @waiter: the lock which was waiting
diff --git a/fs/namespace.c b/fs/namespace.c
index 39c81a8d631..71e75bcf4d2 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -399,6 +399,44 @@ struct seq_operations mounts_op = {
.show = show_vfsmnt
};
+static int show_vfsstat(struct seq_file *m, void *v)
+{
+ struct vfsmount *mnt = v;
+ int err = 0;
+
+ /* device */
+ if (mnt->mnt_devname) {
+ seq_puts(m, "device ");
+ mangle(m, mnt->mnt_devname);
+ } else
+ seq_puts(m, "no device");
+
+ /* mount point */
+ seq_puts(m, " mounted on ");
+ seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
+ seq_putc(m, ' ');
+
+ /* file system type */
+ seq_puts(m, "with fstype ");
+ mangle(m, mnt->mnt_sb->s_type->name);
+
+ /* optional statistics */
+ if (mnt->mnt_sb->s_op->show_stats) {
+ seq_putc(m, ' ');
+ err = mnt->mnt_sb->s_op->show_stats(m, mnt);
+ }
+
+ seq_putc(m, '\n');
+ return err;
+}
+
+struct seq_operations mountstats_op = {
+ .start = m_start,
+ .next = m_next,
+ .stop = m_stop,
+ .show = show_vfsstat,
+};
+
/**
* may_umount_tree - check if a mount tree is busy
* @mnt: root of mount tree
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index fcd97406a77..99d2cfbce86 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -55,7 +55,12 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
complete(&nfs_callback_info.started);
- while (nfs_callback_info.users != 0 || !signalled()) {
+ for(;;) {
+ if (signalled()) {
+ if (nfs_callback_info.users == 0)
+ break;
+ flush_signals(current);
+ }
/*
* Listen for a request on the socket
*/
@@ -73,6 +78,7 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
svc_process(serv, rqstp);
}
+ svc_exit_thread(rqstp);
nfs_callback_info.pid = 0;
complete(&nfs_callback_info.stopped);
unlock_kernel();
@@ -134,11 +140,13 @@ int nfs_callback_down(void)
lock_kernel();
down(&nfs_callback_sema);
- if (--nfs_callback_info.users || nfs_callback_info.pid == 0)
- goto out;
- kill_proc(nfs_callback_info.pid, SIGKILL, 1);
- wait_for_completion(&nfs_callback_info.stopped);
-out:
+ nfs_callback_info.users--;
+ do {
+ if (nfs_callback_info.users != 0 || nfs_callback_info.pid == 0)
+ break;
+ if (kill_proc(nfs_callback_info.pid, SIGKILL, 1) < 0)
+ break;
+ } while (wait_for_completion_timeout(&nfs_callback_info.stopped, 5*HZ) == 0);
up(&nfs_callback_sema);
unlock_kernel();
return ret;
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 7c33b9a81a9..05c38cf40b6 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -330,7 +330,7 @@ static unsigned encode_op_hdr(struct xdr_stream *xdr, uint32_t op, uint32_t res)
static unsigned encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct cb_getattrres *res)
{
- uint32_t *savep;
+ uint32_t *savep = NULL;
unsigned status = res->status;
if (unlikely(status != 0))
@@ -358,23 +358,26 @@ static unsigned process_op(struct svc_rqst *rqstp,
struct xdr_stream *xdr_in, void *argp,
struct xdr_stream *xdr_out, void *resp)
{
- struct callback_op *op;
- unsigned int op_nr;
+ struct callback_op *op = &callback_ops[0];
+ unsigned int op_nr = OP_CB_ILLEGAL;
unsigned int status = 0;
long maxlen;
unsigned res;
dprintk("%s: start\n", __FUNCTION__);
status = decode_op_hdr(xdr_in, &op_nr);
- if (unlikely(status != 0)) {
- op_nr = OP_CB_ILLEGAL;
- op = &callback_ops[0];
- } else if (unlikely(op_nr != OP_CB_GETATTR && op_nr != OP_CB_RECALL)) {
- op_nr = OP_CB_ILLEGAL;
- op = &callback_ops[0];
- status = htonl(NFS4ERR_OP_ILLEGAL);
- } else
- op = &callback_ops[op_nr];
+ if (likely(status == 0)) {
+ switch (op_nr) {
+ case OP_CB_GETATTR:
+ case OP_CB_RECALL:
+ op = &callback_ops[op_nr];
+ break;
+ default:
+ op_nr = OP_CB_ILLEGAL;
+ op = &callback_ops[0];
+ status = htonl(NFS4ERR_OP_ILLEGAL);
+ }
+ }
maxlen = xdr_out->end - xdr_out->p;
if (maxlen > 0 && maxlen < PAGE_SIZE) {
@@ -416,6 +419,7 @@ static int nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp
decode_compound_hdr_arg(&xdr_in, &hdr_arg);
hdr_res.taglen = hdr_arg.taglen;
hdr_res.tag = hdr_arg.tag;
+ hdr_res.nops = NULL;
encode_compound_hdr_res(&xdr_out, &hdr_res);
for (;;) {
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index c6f07c1c71e..d3be923d4e4 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -421,3 +421,22 @@ void nfs_delegation_reap_unclaimed(struct nfs4_client *clp)
nfs_free_delegation(delegation);
}
}
+
+int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
+{
+ struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_delegation *delegation;
+ int res = 0;
+
+ if (nfsi->delegation_state == 0)
+ return 0;
+ spin_lock(&clp->cl_lock);
+ delegation = nfsi->delegation;
+ if (delegation != NULL) {
+ memcpy(dst->data, delegation->stateid.data, sizeof(dst->data));
+ res = 1;
+ }
+ spin_unlock(&clp->cl_lock);
+ return res;
+}
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 7a0b2bfce77..3858694652f 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -41,6 +41,7 @@ void nfs_delegation_reap_unclaimed(struct nfs4_client *clp);
int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state);
int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl);
+int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
static inline int nfs_have_delegation(struct inode *inode, int flags)
{
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index a1554bead69..06c48b385c9 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -34,6 +34,7 @@
#include "nfs4_fs.h"
#include "delegation.h"
+#include "iostat.h"
#define NFS_PARANOIA 1
/* #define NFS_DEBUG_VERBOSE 1 */
@@ -129,6 +130,9 @@ nfs_opendir(struct inode *inode, struct file *filp)
{
int res = 0;
+ dfprintk(VFS, "NFS: opendir(%s/%ld)\n",
+ inode->i_sb->s_id, inode->i_ino);
+
lock_kernel();
/* Call generic open code in order to cache credentials */
if (!res)
@@ -172,7 +176,9 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
unsigned long timestamp;
int error;
- dfprintk(VFS, "NFS: nfs_readdir_filler() reading cookie %Lu into page %lu.\n", (long long)desc->entry->cookie, page->index);
+ dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n",
+ __FUNCTION__, (long long)desc->entry->cookie,
+ page->index);
again:
timestamp = jiffies;
@@ -244,7 +250,8 @@ int find_dirent(nfs_readdir_descriptor_t *desc)
status;
while((status = dir_decode(desc)) == 0) {
- dfprintk(VFS, "NFS: found cookie %Lu\n", (unsigned long long)entry->cookie);
+ dfprintk(DIRCACHE, "NFS: %s: examining cookie %Lu\n",
+ __FUNCTION__, (unsigned long long)entry->cookie);
if (entry->prev_cookie == *desc->dir_cookie)
break;
if (loop_count++ > 200) {
@@ -252,7 +259,6 @@ int find_dirent(nfs_readdir_descriptor_t *desc)
schedule();
}
}
- dfprintk(VFS, "NFS: find_dirent() returns %d\n", status);
return status;
}
@@ -276,7 +282,8 @@ int find_dirent_index(nfs_readdir_descriptor_t *desc)
if (status)
break;
- dfprintk(VFS, "NFS: found cookie %Lu at index %Ld\n", (unsigned long long)entry->cookie, desc->current_index);
+ dfprintk(DIRCACHE, "NFS: found cookie %Lu at index %Ld\n",
+ (unsigned long long)entry->cookie, desc->current_index);
if (desc->file->f_pos == desc->current_index) {
*desc->dir_cookie = entry->cookie;
@@ -288,7 +295,6 @@ int find_dirent_index(nfs_readdir_descriptor_t *desc)
schedule();
}
}
- dfprintk(VFS, "NFS: find_dirent_index() returns %d\n", status);
return status;
}
@@ -303,7 +309,9 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc)
struct page *page;
int status;
- dfprintk(VFS, "NFS: find_dirent_page() searching directory page %ld\n", desc->page_index);
+ dfprintk(DIRCACHE, "NFS: %s: searching page %ld for target %Lu\n",
+ __FUNCTION__, desc->page_index,
+ (long long) *desc->dir_cookie);
page = read_cache_page(inode->i_mapping, desc->page_index,
(filler_t *)nfs_readdir_filler, desc);
@@ -324,7 +332,7 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc)
if (status < 0)
dir_page_release(desc);
out:
- dfprintk(VFS, "NFS: find_dirent_page() returns %d\n", status);
+ dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __FUNCTION__, status);
return status;
read_error:
page_cache_release(page);
@@ -346,13 +354,15 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
/* Always search-by-index from the beginning of the cache */
if (*desc->dir_cookie == 0) {
- dfprintk(VFS, "NFS: readdir_search_pagecache() searching for offset %Ld\n", (long long)desc->file->f_pos);
+ dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for offset %Ld\n",
+ (long long)desc->file->f_pos);
desc->page_index = 0;
desc->entry->cookie = desc->entry->prev_cookie = 0;
desc->entry->eof = 0;
desc->current_index = 0;
} else
- dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie);
+ dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for cookie %Lu\n",
+ (unsigned long long)*desc->dir_cookie);
for (;;) {
res = find_dirent_page(desc);
@@ -365,7 +375,8 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
schedule();
}
}
- dfprintk(VFS, "NFS: readdir_search_pagecache() returned %d\n", res);
+
+ dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __FUNCTION__, res);
return res;
}
@@ -390,7 +401,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
int loop_count = 0,
res;
- dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)entry->cookie);
+ dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n",
+ (unsigned long long)entry->cookie);
for(;;) {
unsigned d_type = DT_UNKNOWN;
@@ -427,7 +439,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
dir_page_release(desc);
if (dentry != NULL)
dput(dentry);
- dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (unsigned long long)*desc->dir_cookie, res);
+ dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
+ (unsigned long long)*desc->dir_cookie, res);
return res;
}
@@ -453,7 +466,8 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
struct page *page = NULL;
int status;
- dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie);
+ dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
+ (unsigned long long)*desc->dir_cookie);
page = alloc_page(GFP_HIGHUSER);
if (!page) {
@@ -485,7 +499,8 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
desc->entry->cookie = desc->entry->prev_cookie = 0;
desc->entry->eof = 0;
out:
- dfprintk(VFS, "NFS: uncached_readdir() returns %d\n", status);
+ dfprintk(DIRCACHE, "NFS: %s: returns %d\n",
+ __FUNCTION__, status);
return status;
out_release:
dir_page_release(desc);
@@ -507,6 +522,11 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
struct nfs_fattr fattr;
long res;
+ dfprintk(VFS, "NFS: readdir(%s/%s) starting at cookie %Lu\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name,
+ (long long)filp->f_pos);
+ nfs_inc_stats(inode, NFSIOS_VFSGETDENTS);
+
lock_kernel();
res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
@@ -566,9 +586,12 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
}
}
unlock_kernel();
- if (res < 0)
- return res;
- return 0;
+ if (res > 0)
+ res = 0;
+ dfprintk(VFS, "NFS: readdir(%s/%s) returns %ld\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name,
+ res);
+ return res;
}
loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
@@ -599,6 +622,10 @@ out:
*/
int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync)
{
+ dfprintk(VFS, "NFS: fsync_dir(%s/%s) datasync %d\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name,
+ datasync);
+
return 0;
}
@@ -713,6 +740,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
parent = dget_parent(dentry);
lock_kernel();
dir = parent->d_inode;
+ nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
inode = dentry->d_inode;
if (!inode) {
@@ -722,8 +750,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
}
if (is_bad_inode(inode)) {
- dfprintk(VFS, "nfs_lookup_validate: %s/%s has dud inode\n",
- dentry->d_parent->d_name.name, dentry->d_name.name);
+ dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n",
+ __FUNCTION__, dentry->d_parent->d_name.name,
+ dentry->d_name.name);
goto out_bad;
}
@@ -755,6 +784,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
out_valid:
unlock_kernel();
dput(parent);
+ dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n",
+ __FUNCTION__, dentry->d_parent->d_name.name,
+ dentry->d_name.name);
return 1;
out_zap_parent:
nfs_zap_caches(dir);
@@ -771,6 +803,9 @@ out_zap_parent:
d_drop(dentry);
unlock_kernel();
dput(parent);
+ dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n",
+ __FUNCTION__, dentry->d_parent->d_name.name,
+ dentry->d_name.name);
return 0;
}
@@ -844,6 +879,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
dfprintk(VFS, "NFS: lookup(%s/%s)\n",
dentry->d_parent->d_name.name, dentry->d_name.name);
+ nfs_inc_stats(dir, NFSIOS_VFSLOOKUP);
res = ERR_PTR(-ENAMETOOLONG);
if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
@@ -865,9 +901,9 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
res = ERR_PTR(error);
goto out_unlock;
}
- res = ERR_PTR(-EACCES);
inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
- if (!inode)
+ res = (struct dentry *)inode;
+ if (IS_ERR(res))
goto out_unlock;
no_entry:
res = d_add_unique(dentry, inode);
@@ -912,6 +948,9 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
struct dentry *res = NULL;
int error;
+ dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n",
+ dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
+
/* Check that we are indeed trying to open this file */
if (!is_atomic_open(dir, nd))
goto no_open;
@@ -1057,7 +1096,7 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
return NULL;
dentry->d_op = NFS_PROTO(dir)->dentry_ops;
inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
- if (!inode) {
+ if (IS_ERR(inode)) {
dput(dentry);
return NULL;
}
@@ -1095,9 +1134,9 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
if (error < 0)
goto out_err;
}
- error = -ENOMEM;
inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
- if (inode == NULL)
+ error = PTR_ERR(inode);
+ if (IS_ERR(inode))
goto out_err;
d_instantiate(dentry, inode);
return 0;
@@ -1119,8 +1158,8 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
int error;
int open_flags = 0;
- dfprintk(VFS, "NFS: create(%s/%ld, %s\n", dir->i_sb->s_id,
- dir->i_ino, dentry->d_name.name);
+ dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
+ dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
attr.ia_mode = mode;
attr.ia_valid = ATTR_MODE;
@@ -1153,8 +1192,8 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
struct iattr attr;
int status;
- dfprintk(VFS, "NFS: mknod(%s/%ld, %s\n", dir->i_sb->s_id,
- dir->i_ino, dentry->d_name.name);
+ dfprintk(VFS, "NFS: mknod(%s/%ld), %s\n",
+ dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
if (!new_valid_dev(rdev))
return -EINVAL;
@@ -1186,8 +1225,8 @@ static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
struct iattr attr;
int error;
- dfprintk(VFS, "NFS: mkdir(%s/%ld, %s\n", dir->i_sb->s_id,
- dir->i_ino, dentry->d_name.name);
+ dfprintk(VFS, "NFS: mkdir(%s/%ld), %s\n",
+ dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
attr.ia_valid = ATTR_MODE;
attr.ia_mode = mode | S_IFDIR;
@@ -1212,8 +1251,8 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
{
int error;
- dfprintk(VFS, "NFS: rmdir(%s/%ld, %s\n", dir->i_sb->s_id,
- dir->i_ino, dentry->d_name.name);
+ dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n",
+ dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
lock_kernel();
nfs_begin_data_update(dir);
@@ -1241,6 +1280,7 @@ static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
atomic_read(&dentry->d_count));
+ nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
#ifdef NFS_PARANOIA
if (!dentry->d_inode)
@@ -1268,8 +1308,8 @@ dentry->d_parent->d_name.name, dentry->d_name.name);
sillycounter++;
sprintf(suffix, "%*.*x", countersize, countersize, sillycounter);
- dfprintk(VFS, "trying to rename %s to %s\n",
- dentry->d_name.name, silly);
+ dfprintk(VFS, "NFS: trying to rename %s to %s\n",
+ dentry->d_name.name, silly);
sdentry = lookup_one_len(silly, dentry->d_parent, slen);
/*
@@ -1640,6 +1680,8 @@ int nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
struct rpc_cred *cred;
int res = 0;
+ nfs_inc_stats(inode, NFSIOS_VFSACCESS);
+
if (mask == 0)
goto out;
/* Is this sys_access() ? */
@@ -1679,13 +1721,15 @@ force_lookup:
res = PTR_ERR(cred);
unlock_kernel();
out:
+ dfprintk(VFS, "NFS: permission(%s/%ld), mask=0x%x, res=%d\n",
+ inode->i_sb->s_id, inode->i_ino, mask, res);
return res;
out_notsup:
res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
if (res == 0)
res = generic_permission(inode, mask, NULL);
unlock_kernel();
- return res;
+ goto out;
}
/*
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 4ae2f3b33fe..0f583cb16dd 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -7,11 +7,11 @@
*
* There are important applications whose performance or correctness
* depends on uncached access to file data. Database clusters
- * (multiple copies of the same instance running on separate hosts)
+ * (multiple copies of the same instance running on separate hosts)
* implement their own cache coherency protocol that subsumes file
- * system cache protocols. Applications that process datasets
- * considerably larger than the client's memory do not always benefit
- * from a local cache. A streaming video server, for instance, has no
+ * system cache protocols. Applications that process datasets
+ * considerably larger than the client's memory do not always benefit
+ * from a local cache. A streaming video server, for instance, has no
* need to cache the contents of a file.
*
* When an application requests uncached I/O, all read and write requests
@@ -34,6 +34,7 @@
* 08 Jun 2003 Port to 2.5 APIs --cel
* 31 Mar 2004 Handle direct I/O without VFS support --cel
* 15 Sep 2004 Parallel async reads --cel
+ * 04 May 2005 support O_DIRECT with aio --cel
*
*/
@@ -54,10 +55,10 @@
#include <asm/uaccess.h>
#include <asm/atomic.h>
+#include "iostat.h"
+
#define NFSDBG_FACILITY NFSDBG_VFS
-#define MAX_DIRECTIO_SIZE (4096UL << PAGE_SHIFT)
-static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty);
static kmem_cache_t *nfs_direct_cachep;
/*
@@ -65,38 +66,78 @@ static kmem_cache_t *nfs_direct_cachep;
*/
struct nfs_direct_req {
struct kref kref; /* release manager */
- struct list_head list; /* nfs_read_data structs */
- wait_queue_head_t wait; /* wait for i/o completion */
+
+ /* I/O parameters */
+ struct list_head list, /* nfs_read/write_data structs */
+ rewrite_list; /* saved nfs_write_data structs */
+ struct nfs_open_context *ctx; /* file open context info */
+ struct kiocb * iocb; /* controlling i/o request */
+ struct inode * inode; /* target file of i/o */
+ unsigned long user_addr; /* location of user's buffer */
+ size_t user_count; /* total bytes to move */
+ loff_t pos; /* starting offset in file */
struct page ** pages; /* pages in our buffer */
unsigned int npages; /* count of pages */
- atomic_t complete, /* i/os we're waiting for */
- count, /* bytes actually processed */
+
+ /* completion state */
+ spinlock_t lock; /* protect completion state */
+ int outstanding; /* i/os we're waiting for */
+ ssize_t count, /* bytes actually processed */
error; /* any reported error */
+ struct completion completion; /* wait for i/o completion */
+
+ /* commit state */
+ struct nfs_write_data * commit_data; /* special write_data for commits */
+ int flags;
+#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */
+#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */
+ struct nfs_writeverf verf; /* unstable write verifier */
};
+static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync);
+static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode);
/**
- * nfs_get_user_pages - find and set up pages underlying user's buffer
- * rw: direction (read or write)
- * user_addr: starting address of this segment of user's buffer
- * count: size of this segment
- * @pages: returned array of page struct pointers underlying user's buffer
+ * nfs_direct_IO - NFS address space operation for direct I/O
+ * @rw: direction (read or write)
+ * @iocb: target I/O control block
+ * @iov: array of vectors that define I/O buffer
+ * @pos: offset in file to begin the operation
+ * @nr_segs: size of iovec array
+ *
+ * The presence of this routine in the address space ops vector means
+ * the NFS client supports direct I/O. However, we shunt off direct
+ * read and write requests before the VFS gets them, so this method
+ * should never be called.
*/
-static inline int
-nfs_get_user_pages(int rw, unsigned long user_addr, size_t size,
- struct page ***pages)
+ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
+{
+ struct dentry *dentry = iocb->ki_filp->f_dentry;
+
+ dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n",
+ dentry->d_name.name, (long long) pos, nr_segs);
+
+ return -EINVAL;
+}
+
+static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
+{
+ int i;
+ for (i = 0; i < npages; i++) {
+ struct page *page = pages[i];
+ if (do_dirty && !PageCompound(page))
+ set_page_dirty_lock(page);
+ page_cache_release(page);
+ }
+ kfree(pages);
+}
+
+static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, struct page ***pages)
{
int result = -ENOMEM;
unsigned long page_count;
size_t array_size;
- /* set an arbitrary limit to prevent type overflow */
- /* XXX: this can probably be as large as INT_MAX */
- if (size > MAX_DIRECTIO_SIZE) {
- *pages = NULL;
- return -EFBIG;
- }
-
page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT;
page_count -= user_addr >> PAGE_SHIFT;
@@ -108,75 +149,117 @@ nfs_get_user_pages(int rw, unsigned long user_addr, size_t size,
page_count, (rw == READ), 0,
*pages, NULL);
up_read(&current->mm->mmap_sem);
- /*
- * If we got fewer pages than expected from get_user_pages(),
- * the user buffer runs off the end of a mapping; return EFAULT.
- */
- if (result >= 0 && result < page_count) {
- nfs_free_user_pages(*pages, result, 0);
+ if (result != page_count) {
+ /*
+ * If we got fewer pages than expected from
+ * get_user_pages(), the user buffer runs off the
+ * end of a mapping; return EFAULT.
+ */
+ if (result >= 0) {
+ nfs_free_user_pages(*pages, result, 0);
+ result = -EFAULT;
+ } else
+ kfree(*pages);
*pages = NULL;
- result = -EFAULT;
}
}
return result;
}
-/**
- * nfs_free_user_pages - tear down page struct array
- * @pages: array of page struct pointers underlying target buffer
- * @npages: number of pages in the array
- * @do_dirty: dirty the pages as we release them
- */
-static void
-nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
+static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
{
- int i;
- for (i = 0; i < npages; i++) {
- struct page *page = pages[i];
- if (do_dirty && !PageCompound(page))
- set_page_dirty_lock(page);
- page_cache_release(page);
- }
- kfree(pages);
+ struct nfs_direct_req *dreq;
+
+ dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
+ if (!dreq)
+ return NULL;
+
+ kref_init(&dreq->kref);
+ init_completion(&dreq->completion);
+ INIT_LIST_HEAD(&dreq->list);
+ INIT_LIST_HEAD(&dreq->rewrite_list);
+ dreq->iocb = NULL;
+ dreq->ctx = NULL;
+ spin_lock_init(&dreq->lock);
+ dreq->outstanding = 0;
+ dreq->count = 0;
+ dreq->error = 0;
+ dreq->flags = 0;
+
+ return dreq;
}
-/**
- * nfs_direct_req_release - release nfs_direct_req structure for direct read
- * @kref: kref object embedded in an nfs_direct_req structure
- *
- */
static void nfs_direct_req_release(struct kref *kref)
{
struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
+
+ if (dreq->ctx != NULL)
+ put_nfs_open_context(dreq->ctx);
kmem_cache_free(nfs_direct_cachep, dreq);
}
-/**
- * nfs_direct_read_alloc - allocate nfs_read_data structures for direct read
- * @count: count of bytes for the read request
- * @rsize: local rsize setting
+/*
+ * Collects and returns the final error value/byte-count.
+ */
+static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq)
+{
+ ssize_t result = -EIOCBQUEUED;
+
+ /* Async requests don't wait here */
+ if (dreq->iocb)
+ goto out;
+
+ result = wait_for_completion_interruptible(&dreq->completion);
+
+ if (!result)
+ result = dreq->error;
+ if (!result)
+ result = dreq->count;
+
+out:
+ kref_put(&dreq->kref, nfs_direct_req_release);
+ return (ssize_t) result;
+}
+
+/*
+ * We must hold a reference to all the pages in this direct read request
+ * until the RPCs complete. This could be long *after* we are woken up in
+ * nfs_direct_wait (for instance, if someone hits ^C on a slow server).
*
+ * In addition, synchronous I/O uses a stack-allocated iocb. Thus we
+ * can't trust the iocb is still valid here if this is a synchronous
+ * request. If the waiter is woken prematurely, the iocb is long gone.
+ */
+static void nfs_direct_complete(struct nfs_direct_req *dreq)
+{
+ nfs_free_user_pages(dreq->pages, dreq->npages, 1);
+
+ if (dreq->iocb) {
+ long res = (long) dreq->error;
+ if (!res)
+ res = (long) dreq->count;
+ aio_complete(dreq->iocb, res, 0);
+ }
+ complete_all(&dreq->completion);
+
+ kref_put(&dreq->kref, nfs_direct_req_release);
+}
+
+/*
* Note we also set the number of requests we have in the dreq when we are
* done. This prevents races with I/O completion so we will always wait
* until all requests have been dispatched and completed.
*/
-static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int rsize)
+static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize)
{
struct list_head *list;
struct nfs_direct_req *dreq;
- unsigned int reads = 0;
unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
+ dreq = nfs_direct_req_alloc();
if (!dreq)
return NULL;
- kref_init(&dreq->kref);
- init_waitqueue_head(&dreq->wait);
- INIT_LIST_HEAD(&dreq->list);
- atomic_set(&dreq->count, 0);
- atomic_set(&dreq->error, 0);
-
list = &dreq->list;
for(;;) {
struct nfs_read_data *data = nfs_readdata_alloc(rpages);
@@ -196,72 +279,70 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int
list_add(&data->pages, list);
data->req = (struct nfs_page *) dreq;
- reads++;
+ dreq->outstanding++;
if (nbytes <= rsize)
break;
nbytes -= rsize;
}
kref_get(&dreq->kref);
- atomic_set(&dreq->complete, reads);
return dreq;
}
-/**
- * nfs_direct_read_result - handle a read reply for a direct read request
- * @data: address of NFS READ operation control block
- * @status: status of this NFS READ operation
- *
- * We must hold a reference to all the pages in this direct read request
- * until the RPCs complete. This could be long *after* we are woken up in
- * nfs_direct_read_wait (for instance, if someone hits ^C on a slow server).
- */
-static void nfs_direct_read_result(struct nfs_read_data *data, int status)
+static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
{
+ struct nfs_read_data *data = calldata;
struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
- if (likely(status >= 0))
- atomic_add(data->res.count, &dreq->count);
+ if (nfs_readpage_result(task, data) != 0)
+ return;
+
+ spin_lock(&dreq->lock);
+
+ if (likely(task->tk_status >= 0))
+ dreq->count += data->res.count;
else
- atomic_set(&dreq->error, status);
+ dreq->error = task->tk_status;
- if (unlikely(atomic_dec_and_test(&dreq->complete))) {
- nfs_free_user_pages(dreq->pages, dreq->npages, 1);
- wake_up(&dreq->wait);
- kref_put(&dreq->kref, nfs_direct_req_release);
+ if (--dreq->outstanding) {
+ spin_unlock(&dreq->lock);
+ return;
}
+
+ spin_unlock(&dreq->lock);
+ nfs_direct_complete(dreq);
}
-/**
- * nfs_direct_read_schedule - dispatch NFS READ operations for a direct read
- * @dreq: address of nfs_direct_req struct for this request
- * @inode: target inode
- * @ctx: target file open context
- * @user_addr: starting address of this segment of user's buffer
- * @count: size of this segment
- * @file_offset: offset in file to begin the operation
- *
+static const struct rpc_call_ops nfs_read_direct_ops = {
+ .rpc_call_done = nfs_direct_read_result,
+ .rpc_release = nfs_readdata_release,
+};
+
+/*
* For each nfs_read_data struct that was allocated on the list, dispatch
* an NFS READ operation
*/
-static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
- struct inode *inode, struct nfs_open_context *ctx,
- unsigned long user_addr, size_t count, loff_t file_offset)
+static void nfs_direct_read_schedule(struct nfs_direct_req *dreq)
{
+ struct nfs_open_context *ctx = dreq->ctx;
+ struct inode *inode = ctx->dentry->d_inode;
struct list_head *list = &dreq->list;
struct page **pages = dreq->pages;
+ size_t count = dreq->user_count;
+ loff_t pos = dreq->pos;
+ size_t rsize = NFS_SERVER(inode)->rsize;
unsigned int curpage, pgbase;
- unsigned int rsize = NFS_SERVER(inode)->rsize;
curpage = 0;
- pgbase = user_addr & ~PAGE_MASK;
+ pgbase = dreq->user_addr & ~PAGE_MASK;
do {
struct nfs_read_data *data;
- unsigned int bytes;
+ size_t bytes;
bytes = rsize;
if (count < rsize)
bytes = count;
+ BUG_ON(list_empty(list));
data = list_entry(list->next, struct nfs_read_data, pages);
list_del_init(&data->pages);
@@ -269,7 +350,7 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
data->cred = ctx->cred;
data->args.fh = NFS_FH(inode);
data->args.context = ctx;
- data->args.offset = file_offset;
+ data->args.offset = pos;
data->args.pgbase = pgbase;
data->args.pages = &pages[curpage];
data->args.count = bytes;
@@ -277,77 +358,38 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
data->res.eof = 0;
data->res.count = bytes;
+ rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
+ &nfs_read_direct_ops, data);
NFS_PROTO(inode)->read_setup(data);
data->task.tk_cookie = (unsigned long) inode;
- data->complete = nfs_direct_read_result;
lock_kernel();
rpc_execute(&data->task);
unlock_kernel();
- dfprintk(VFS, "NFS: %4d initiated direct read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+ dfprintk(VFS, "NFS: %5u initiated direct read call (req %s/%Ld, %zu bytes @ offset %Lu)\n",
data->task.tk_pid,
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
bytes,
(unsigned long long)data->args.offset);
- file_offset += bytes;
+ pos += bytes;
pgbase += bytes;
curpage += pgbase >> PAGE_SHIFT;
pgbase &= ~PAGE_MASK;
count -= bytes;
} while (count != 0);
+ BUG_ON(!list_empty(list));
}
-/**
- * nfs_direct_read_wait - wait for I/O completion for direct reads
- * @dreq: request on which we are to wait
- * @intr: whether or not this wait can be interrupted
- *
- * Collects and returns the final error value/byte-count.
- */
-static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr)
-{
- int result = 0;
-
- if (intr) {
- result = wait_event_interruptible(dreq->wait,
- (atomic_read(&dreq->complete) == 0));
- } else {
- wait_event(dreq->wait, (atomic_read(&dreq->complete) == 0));
- }
-
- if (!result)
- result = atomic_read(&dreq->error);
- if (!result)
- result = atomic_read(&dreq->count);
-
- kref_put(&dreq->kref, nfs_direct_req_release);
- return (ssize_t) result;
-}
-
-/**
- * nfs_direct_read_seg - Read in one iov segment. Generate separate
- * read RPCs for each "rsize" bytes.
- * @inode: target inode
- * @ctx: target file open context
- * @user_addr: starting address of this segment of user's buffer
- * @count: size of this segment
- * @file_offset: offset in file to begin the operation
- * @pages: array of addresses of page structs defining user's buffer
- * @nr_pages: number of pages in the array
- *
- */
-static ssize_t nfs_direct_read_seg(struct inode *inode,
- struct nfs_open_context *ctx, unsigned long user_addr,
- size_t count, loff_t file_offset, struct page **pages,
- unsigned int nr_pages)
+static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, unsigned int nr_pages)
{
ssize_t result;
sigset_t oldset;
+ struct inode *inode = iocb->ki_filp->f_mapping->host;
struct rpc_clnt *clnt = NFS_CLIENT(inode);
struct nfs_direct_req *dreq;
@@ -355,284 +397,350 @@ static ssize_t nfs_direct_read_seg(struct inode *inode,
if (!dreq)
return -ENOMEM;
+ dreq->user_addr = user_addr;
+ dreq->user_count = count;
+ dreq->pos = pos;
dreq->pages = pages;
dreq->npages = nr_pages;
+ dreq->inode = inode;
+ dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data);
+ if (!is_sync_kiocb(iocb))
+ dreq->iocb = iocb;
+ nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count);
rpc_clnt_sigmask(clnt, &oldset);
- nfs_direct_read_schedule(dreq, inode, ctx, user_addr, count,
- file_offset);
- result = nfs_direct_read_wait(dreq, clnt->cl_intr);
+ nfs_direct_read_schedule(dreq);
+ result = nfs_direct_wait(dreq);
rpc_clnt_sigunmask(clnt, &oldset);
return result;
}
-/**
- * nfs_direct_read - For each iov segment, map the user's buffer
- * then generate read RPCs.
- * @inode: target inode
- * @ctx: target file open context
- * @iov: array of vectors that define I/O buffer
- * file_offset: offset in file to begin the operation
- * nr_segs: size of iovec array
- *
- * We've already pushed out any non-direct writes so that this read
- * will see them when we read from the server.
- */
-static ssize_t
-nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx,
- const struct iovec *iov, loff_t file_offset,
- unsigned long nr_segs)
+static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
{
- ssize_t tot_bytes = 0;
- unsigned long seg = 0;
-
- while ((seg < nr_segs) && (tot_bytes >= 0)) {
- ssize_t result;
- int page_count;
- struct page **pages;
- const struct iovec *vec = &iov[seg++];
- unsigned long user_addr = (unsigned long) vec->iov_base;
- size_t size = vec->iov_len;
-
- page_count = nfs_get_user_pages(READ, user_addr, size, &pages);
- if (page_count < 0) {
- nfs_free_user_pages(pages, 0, 0);
- if (tot_bytes > 0)
- break;
- return page_count;
- }
+ list_splice_init(&dreq->rewrite_list, &dreq->list);
+ while (!list_empty(&dreq->list)) {
+ struct nfs_write_data *data = list_entry(dreq->list.next, struct nfs_write_data, pages);
+ list_del(&data->pages);
+ nfs_writedata_release(data);
+ }
+}
- result = nfs_direct_read_seg(inode, ctx, user_addr, size,
- file_offset, pages, page_count);
+#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
+{
+ struct list_head *pos;
- if (result <= 0) {
- if (tot_bytes > 0)
- break;
- return result;
- }
- tot_bytes += result;
- file_offset += result;
- if (result < size)
- break;
+ list_splice_init(&dreq->rewrite_list, &dreq->list);
+ list_for_each(pos, &dreq->list)
+ dreq->outstanding++;
+ dreq->count = 0;
+
+ nfs_direct_write_schedule(dreq, FLUSH_STABLE);
+}
+
+static void nfs_direct_commit_result(struct rpc_task *task, void *calldata)
+{
+ struct nfs_write_data *data = calldata;
+ struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+
+ /* Call the NFS version-specific code */
+ if (NFS_PROTO(data->inode)->commit_done(task, data) != 0)
+ return;
+ if (unlikely(task->tk_status < 0)) {
+ dreq->error = task->tk_status;
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ }
+ if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {
+ dprintk("NFS: %5u commit verify failed\n", task->tk_pid);
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
}
- return tot_bytes;
+ dprintk("NFS: %5u commit returned %d\n", task->tk_pid, task->tk_status);
+ nfs_direct_write_complete(dreq, data->inode);
}
-/**
- * nfs_direct_write_seg - Write out one iov segment. Generate separate
- * write RPCs for each "wsize" bytes, then commit.
- * @inode: target inode
- * @ctx: target file open context
- * user_addr: starting address of this segment of user's buffer
- * count: size of this segment
- * file_offset: offset in file to begin the operation
- * @pages: array of addresses of page structs defining user's buffer
- * nr_pages: size of pages array
- */
-static ssize_t nfs_direct_write_seg(struct inode *inode,
- struct nfs_open_context *ctx, unsigned long user_addr,
- size_t count, loff_t file_offset, struct page **pages,
- int nr_pages)
+static const struct rpc_call_ops nfs_commit_direct_ops = {
+ .rpc_call_done = nfs_direct_commit_result,
+ .rpc_release = nfs_commit_release,
+};
+
+static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
{
- const unsigned int wsize = NFS_SERVER(inode)->wsize;
- size_t request;
- int curpage, need_commit;
- ssize_t result, tot_bytes;
- struct nfs_writeverf first_verf;
- struct nfs_write_data *wdata;
-
- wdata = nfs_writedata_alloc(NFS_SERVER(inode)->wpages);
- if (!wdata)
- return -ENOMEM;
+ struct nfs_write_data *data = dreq->commit_data;
+ struct rpc_task *task = &data->task;
- wdata->inode = inode;
- wdata->cred = ctx->cred;
- wdata->args.fh = NFS_FH(inode);
- wdata->args.context = ctx;
- wdata->args.stable = NFS_UNSTABLE;
- if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize)
- wdata->args.stable = NFS_FILE_SYNC;
- wdata->res.fattr = &wdata->fattr;
- wdata->res.verf = &wdata->verf;
+ data->inode = dreq->inode;
+ data->cred = dreq->ctx->cred;
- nfs_begin_data_update(inode);
-retry:
- need_commit = 0;
- tot_bytes = 0;
- curpage = 0;
- request = count;
- wdata->args.pgbase = user_addr & ~PAGE_MASK;
- wdata->args.offset = file_offset;
- do {
- wdata->args.count = request;
- if (wdata->args.count > wsize)
- wdata->args.count = wsize;
- wdata->args.pages = &pages[curpage];
+ data->args.fh = NFS_FH(data->inode);
+ data->args.offset = dreq->pos;
+ data->args.count = dreq->user_count;
+ data->res.count = 0;
+ data->res.fattr = &data->fattr;
+ data->res.verf = &data->verf;
- dprintk("NFS: direct write: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n",
- wdata->args.count, (long long) wdata->args.offset,
- user_addr + tot_bytes, wdata->args.pgbase, curpage);
+ rpc_init_task(&data->task, NFS_CLIENT(dreq->inode), RPC_TASK_ASYNC,
+ &nfs_commit_direct_ops, data);
+ NFS_PROTO(data->inode)->commit_setup(data, 0);
- lock_kernel();
- result = NFS_PROTO(inode)->write(wdata);
- unlock_kernel();
+ data->task.tk_priority = RPC_PRIORITY_NORMAL;
+ data->task.tk_cookie = (unsigned long)data->inode;
+ /* Note: task.tk_ops->rpc_release will free dreq->commit_data */
+ dreq->commit_data = NULL;
- if (result <= 0) {
- if (tot_bytes > 0)
- break;
- goto out;
- }
+ dprintk("NFS: %5u initiated commit call\n", task->tk_pid);
- if (tot_bytes == 0)
- memcpy(&first_verf.verifier, &wdata->verf.verifier,
- sizeof(first_verf.verifier));
- if (wdata->verf.committed != NFS_FILE_SYNC) {
- need_commit = 1;
- if (memcmp(&first_verf.verifier, &wdata->verf.verifier,
- sizeof(first_verf.verifier)))
- goto sync_retry;
- }
+ lock_kernel();
+ rpc_execute(&data->task);
+ unlock_kernel();
+}
- tot_bytes += result;
+static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
+{
+ int flags = dreq->flags;
- /* in case of a short write: stop now, let the app recover */
- if (result < wdata->args.count)
+ dreq->flags = 0;
+ switch (flags) {
+ case NFS_ODIRECT_DO_COMMIT:
+ nfs_direct_commit_schedule(dreq);
break;
+ case NFS_ODIRECT_RESCHED_WRITES:
+ nfs_direct_write_reschedule(dreq);
+ break;
+ default:
+ nfs_end_data_update(inode);
+ if (dreq->commit_data != NULL)
+ nfs_commit_free(dreq->commit_data);
+ nfs_direct_free_writedata(dreq);
+ nfs_direct_complete(dreq);
+ }
+}
- wdata->args.offset += result;
- wdata->args.pgbase += result;
- curpage += wdata->args.pgbase >> PAGE_SHIFT;
- wdata->args.pgbase &= ~PAGE_MASK;
- request -= result;
- } while (request != 0);
+static void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
+{
+ dreq->commit_data = nfs_commit_alloc(0);
+ if (dreq->commit_data != NULL)
+ dreq->commit_data->req = (struct nfs_page *) dreq;
+}
+#else
+static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
+{
+ dreq->commit_data = NULL;
+}
- /*
- * Commit data written so far, even in the event of an error
- */
- if (need_commit) {
- wdata->args.count = tot_bytes;
- wdata->args.offset = file_offset;
+static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
+{
+ nfs_end_data_update(inode);
+ nfs_direct_free_writedata(dreq);
+ nfs_direct_complete(dreq);
+}
+#endif
- lock_kernel();
- result = NFS_PROTO(inode)->commit(wdata);
- unlock_kernel();
+static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize)
+{
+ struct list_head *list;
+ struct nfs_direct_req *dreq;
+ unsigned int wpages = (wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+
+ dreq = nfs_direct_req_alloc();
+ if (!dreq)
+ return NULL;
+
+ list = &dreq->list;
+ for(;;) {
+ struct nfs_write_data *data = nfs_writedata_alloc(wpages);
- if (result < 0 || memcmp(&first_verf.verifier,
- &wdata->verf.verifier,
- sizeof(first_verf.verifier)) != 0)
- goto sync_retry;
+ if (unlikely(!data)) {
+ while (!list_empty(list)) {
+ data = list_entry(list->next,
+ struct nfs_write_data, pages);
+ list_del(&data->pages);
+ nfs_writedata_free(data);
+ }
+ kref_put(&dreq->kref, nfs_direct_req_release);
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&data->pages);
+ list_add(&data->pages, list);
+
+ data->req = (struct nfs_page *) dreq;
+ dreq->outstanding++;
+ if (nbytes <= wsize)
+ break;
+ nbytes -= wsize;
}
- result = tot_bytes;
-out:
- nfs_end_data_update(inode);
- nfs_writedata_free(wdata);
- return result;
+ nfs_alloc_commit_data(dreq);
-sync_retry:
- wdata->args.stable = NFS_FILE_SYNC;
- goto retry;
+ kref_get(&dreq->kref);
+ return dreq;
}
-/**
- * nfs_direct_write - For each iov segment, map the user's buffer
- * then generate write and commit RPCs.
- * @inode: target inode
- * @ctx: target file open context
- * @iov: array of vectors that define I/O buffer
- * file_offset: offset in file to begin the operation
- * nr_segs: size of iovec array
- *
- * Upon return, generic_file_direct_IO invalidates any cached pages
- * that non-direct readers might access, so they will pick up these
- * writes immediately.
- */
-static ssize_t nfs_direct_write(struct inode *inode,
- struct nfs_open_context *ctx, const struct iovec *iov,
- loff_t file_offset, unsigned long nr_segs)
+static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
{
- ssize_t tot_bytes = 0;
- unsigned long seg = 0;
-
- while ((seg < nr_segs) && (tot_bytes >= 0)) {
- ssize_t result;
- int page_count;
- struct page **pages;
- const struct iovec *vec = &iov[seg++];
- unsigned long user_addr = (unsigned long) vec->iov_base;
- size_t size = vec->iov_len;
-
- page_count = nfs_get_user_pages(WRITE, user_addr, size, &pages);
- if (page_count < 0) {
- nfs_free_user_pages(pages, 0, 0);
- if (tot_bytes > 0)
- break;
- return page_count;
- }
+ struct nfs_write_data *data = calldata;
+ struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+ int status = task->tk_status;
+
+ if (nfs_writeback_done(task, data) != 0)
+ return;
+
+ spin_lock(&dreq->lock);
- result = nfs_direct_write_seg(inode, ctx, user_addr, size,
- file_offset, pages, page_count);
- nfs_free_user_pages(pages, page_count, 0);
+ if (likely(status >= 0))
+ dreq->count += data->res.count;
+ else
+ dreq->error = task->tk_status;
- if (result <= 0) {
- if (tot_bytes > 0)
+ if (data->res.verf->committed != NFS_FILE_SYNC) {
+ switch (dreq->flags) {
+ case 0:
+ memcpy(&dreq->verf, &data->verf, sizeof(dreq->verf));
+ dreq->flags = NFS_ODIRECT_DO_COMMIT;
break;
- return result;
+ case NFS_ODIRECT_DO_COMMIT:
+ if (memcmp(&dreq->verf, &data->verf, sizeof(dreq->verf))) {
+ dprintk("NFS: %5u write verify failed\n", task->tk_pid);
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ }
}
- tot_bytes += result;
- file_offset += result;
- if (result < size)
- break;
}
- return tot_bytes;
+ /* In case we have to resend */
+ data->args.stable = NFS_FILE_SYNC;
+
+ spin_unlock(&dreq->lock);
}
-/**
- * nfs_direct_IO - NFS address space operation for direct I/O
- * rw: direction (read or write)
- * @iocb: target I/O control block
- * @iov: array of vectors that define I/O buffer
- * file_offset: offset in file to begin the operation
- * nr_segs: size of iovec array
- *
+/*
+ * NB: Return the value of the first error return code. Subsequent
+ * errors after the first one are ignored.
*/
-ssize_t
-nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
- loff_t file_offset, unsigned long nr_segs)
+static void nfs_direct_write_release(void *calldata)
{
- ssize_t result = -EINVAL;
- struct file *file = iocb->ki_filp;
- struct nfs_open_context *ctx;
- struct dentry *dentry = file->f_dentry;
- struct inode *inode = dentry->d_inode;
+ struct nfs_write_data *data = calldata;
+ struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
- /*
- * No support for async yet
- */
- if (!is_sync_kiocb(iocb))
- return result;
-
- ctx = (struct nfs_open_context *)file->private_data;
- switch (rw) {
- case READ:
- dprintk("NFS: direct_IO(read) (%s) off/no(%Lu/%lu)\n",
- dentry->d_name.name, file_offset, nr_segs);
-
- result = nfs_direct_read(inode, ctx, iov,
- file_offset, nr_segs);
- break;
- case WRITE:
- dprintk("NFS: direct_IO(write) (%s) off/no(%Lu/%lu)\n",
- dentry->d_name.name, file_offset, nr_segs);
-
- result = nfs_direct_write(inode, ctx, iov,
- file_offset, nr_segs);
- break;
- default:
- break;
+ spin_lock(&dreq->lock);
+ if (--dreq->outstanding) {
+ spin_unlock(&dreq->lock);
+ return;
}
+ spin_unlock(&dreq->lock);
+
+ nfs_direct_write_complete(dreq, data->inode);
+}
+
+static const struct rpc_call_ops nfs_write_direct_ops = {
+ .rpc_call_done = nfs_direct_write_result,
+ .rpc_release = nfs_direct_write_release,
+};
+
+/*
+ * For each nfs_write_data struct that was allocated on the list, dispatch
+ * an NFS WRITE operation
+ */
+static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync)
+{
+ struct nfs_open_context *ctx = dreq->ctx;
+ struct inode *inode = ctx->dentry->d_inode;
+ struct list_head *list = &dreq->list;
+ struct page **pages = dreq->pages;
+ size_t count = dreq->user_count;
+ loff_t pos = dreq->pos;
+ size_t wsize = NFS_SERVER(inode)->wsize;
+ unsigned int curpage, pgbase;
+
+ curpage = 0;
+ pgbase = dreq->user_addr & ~PAGE_MASK;
+ do {
+ struct nfs_write_data *data;
+ size_t bytes;
+
+ bytes = wsize;
+ if (count < wsize)
+ bytes = count;
+
+ BUG_ON(list_empty(list));
+ data = list_entry(list->next, struct nfs_write_data, pages);
+ list_move_tail(&data->pages, &dreq->rewrite_list);
+
+ data->inode = inode;
+ data->cred = ctx->cred;
+ data->args.fh = NFS_FH(inode);
+ data->args.context = ctx;
+ data->args.offset = pos;
+ data->args.pgbase = pgbase;
+ data->args.pages = &pages[curpage];
+ data->args.count = bytes;
+ data->res.fattr = &data->fattr;
+ data->res.count = bytes;
+ data->res.verf = &data->verf;
+
+ rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
+ &nfs_write_direct_ops, data);
+ NFS_PROTO(inode)->write_setup(data, sync);
+
+ data->task.tk_priority = RPC_PRIORITY_NORMAL;
+ data->task.tk_cookie = (unsigned long) inode;
+
+ lock_kernel();
+ rpc_execute(&data->task);
+ unlock_kernel();
+
+ dfprintk(VFS, "NFS: %5u initiated direct write call (req %s/%Ld, %zu bytes @ offset %Lu)\n",
+ data->task.tk_pid,
+ inode->i_sb->s_id,
+ (long long)NFS_FILEID(inode),
+ bytes,
+ (unsigned long long)data->args.offset);
+
+ pos += bytes;
+ pgbase += bytes;
+ curpage += pgbase >> PAGE_SHIFT;
+ pgbase &= ~PAGE_MASK;
+
+ count -= bytes;
+ } while (count != 0);
+ BUG_ON(!list_empty(list));
+}
+
+static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, int nr_pages)
+{
+ ssize_t result;
+ sigset_t oldset;
+ struct inode *inode = iocb->ki_filp->f_mapping->host;
+ struct rpc_clnt *clnt = NFS_CLIENT(inode);
+ struct nfs_direct_req *dreq;
+ size_t wsize = NFS_SERVER(inode)->wsize;
+ int sync = 0;
+
+ dreq = nfs_direct_write_alloc(count, wsize);
+ if (!dreq)
+ return -ENOMEM;
+ if (dreq->commit_data == NULL || count < wsize)
+ sync = FLUSH_STABLE;
+
+ dreq->user_addr = user_addr;
+ dreq->user_count = count;
+ dreq->pos = pos;
+ dreq->pages = pages;
+ dreq->npages = nr_pages;
+ dreq->inode = inode;
+ dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data);
+ if (!is_sync_kiocb(iocb))
+ dreq->iocb = iocb;
+
+ nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, count);
+
+ nfs_begin_data_update(inode);
+
+ rpc_clnt_sigmask(clnt, &oldset);
+ nfs_direct_write_schedule(dreq, sync);
+ result = nfs_direct_wait(dreq);
+ rpc_clnt_sigunmask(clnt, &oldset);
+
return result;
}
@@ -640,49 +748,40 @@ nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
* nfs_file_direct_read - file direct read operation for NFS files
* @iocb: target I/O control block
* @buf: user's buffer into which to read data
- * count: number of bytes to read
- * pos: byte offset in file where reading starts
+ * @count: number of bytes to read
+ * @pos: byte offset in file where reading starts
*
* We use this function for direct reads instead of calling
* generic_file_aio_read() in order to avoid gfar's check to see if
* the request starts before the end of the file. For that check
* to work, we must generate a GETATTR before each direct read, and
* even then there is a window between the GETATTR and the subsequent
- * READ where the file size could change. So our preference is simply
+ * READ where the file size could change. Our preference is simply
* to do all reads the application wants, and the server will take
* care of managing the end of file boundary.
- *
+ *
* This function also eliminates unnecessarily updating the file's
* atime locally, as the NFS server sets the file's atime, and this
* client must read the updated atime from the server back into its
* cache.
*/
-ssize_t
-nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
+ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
{
ssize_t retval = -EINVAL;
- loff_t *ppos = &iocb->ki_pos;
+ int page_count;
+ struct page **pages;
struct file *file = iocb->ki_filp;
- struct nfs_open_context *ctx =
- (struct nfs_open_context *) file->private_data;
struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- struct iovec iov = {
- .iov_base = buf,
- .iov_len = count,
- };
dprintk("nfs: direct read(%s/%s, %lu@%Ld)\n",
file->f_dentry->d_parent->d_name.name,
file->f_dentry->d_name.name,
(unsigned long) count, (long long) pos);
- if (!is_sync_kiocb(iocb))
- goto out;
if (count < 0)
goto out;
retval = -EFAULT;
- if (!access_ok(VERIFY_WRITE, iov.iov_base, iov.iov_len))
+ if (!access_ok(VERIFY_WRITE, buf, count))
goto out;
retval = 0;
if (!count)
@@ -692,9 +791,16 @@ nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t
if (retval)
goto out;
- retval = nfs_direct_read(inode, ctx, &iov, pos, 1);
+ retval = nfs_get_user_pages(READ, (unsigned long) buf,
+ count, &pages);
+ if (retval < 0)
+ goto out;
+ page_count = retval;
+
+ retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos,
+ pages, page_count);
if (retval > 0)
- *ppos = pos + retval;
+ iocb->ki_pos = pos + retval;
out:
return retval;
@@ -704,8 +810,8 @@ out:
* nfs_file_direct_write - file direct write operation for NFS files
* @iocb: target I/O control block
* @buf: user's buffer from which to write data
- * count: number of bytes to write
- * pos: byte offset in file where writing starts
+ * @count: number of bytes to write
+ * @pos: byte offset in file where writing starts
*
* We use this function for direct writes instead of calling
* generic_file_aio_write() in order to avoid taking the inode
@@ -725,28 +831,19 @@ out:
* Note that O_APPEND is not supported for NFS direct writes, as there
* is no atomic O_APPEND write facility in the NFS protocol.
*/
-ssize_t
-nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
+ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
{
ssize_t retval;
+ int page_count;
+ struct page **pages;
struct file *file = iocb->ki_filp;
- struct nfs_open_context *ctx =
- (struct nfs_open_context *) file->private_data;
struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- struct iovec iov = {
- .iov_base = (char __user *)buf,
- };
dfprintk(VFS, "nfs: direct write(%s/%s, %lu@%Ld)\n",
file->f_dentry->d_parent->d_name.name,
file->f_dentry->d_name.name,
(unsigned long) count, (long long) pos);
- retval = -EINVAL;
- if (!is_sync_kiocb(iocb))
- goto out;
-
retval = generic_write_checks(file, &pos, &count, 0);
if (retval)
goto out;
@@ -757,19 +854,35 @@ nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count,
retval = 0;
if (!count)
goto out;
- iov.iov_len = count,
retval = -EFAULT;
- if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len))
+ if (!access_ok(VERIFY_READ, buf, count))
goto out;
retval = nfs_sync_mapping(mapping);
if (retval)
goto out;
- retval = nfs_direct_write(inode, ctx, &iov, pos, 1);
+ retval = nfs_get_user_pages(WRITE, (unsigned long) buf,
+ count, &pages);
+ if (retval < 0)
+ goto out;
+ page_count = retval;
+
+ retval = nfs_direct_write(iocb, (unsigned long) buf, count,
+ pos, pages, page_count);
+
+ /*
+ * XXX: nfs_end_data_update() already ensures this file's
+ * cached data is subsequently invalidated. Do we really
+ * need to call invalidate_inode_pages2() again here?
+ *
+ * For aio writes, this invalidation will almost certainly
+ * occur before the writes complete. Kind of racey.
+ */
if (mapping->nrpages)
invalidate_inode_pages2(mapping);
+
if (retval > 0)
iocb->ki_pos = pos + retval;
@@ -777,6 +890,10 @@ out:
return retval;
}
+/**
+ * nfs_init_directcache - create a slab cache for nfs_direct_req structures
+ *
+ */
int nfs_init_directcache(void)
{
nfs_direct_cachep = kmem_cache_create("nfs_direct_cache",
@@ -790,6 +907,10 @@ int nfs_init_directcache(void)
return 0;
}
+/**
+ * nfs_init_directcache - destroy the slab cache for nfs_direct_req structures
+ *
+ */
void nfs_destroy_directcache(void)
{
if (kmem_cache_destroy(nfs_direct_cachep))
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 7a79fbe9f53..5263b2864a4 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -32,6 +32,7 @@
#include <asm/system.h>
#include "delegation.h"
+#include "iostat.h"
#define NFSDBG_FACILITY NFSDBG_FILE
@@ -102,18 +103,15 @@ static int nfs_check_flags(int flags)
static int
nfs_file_open(struct inode *inode, struct file *filp)
{
- struct nfs_server *server = NFS_SERVER(inode);
- int (*open)(struct inode *, struct file *);
int res;
res = nfs_check_flags(filp->f_flags);
if (res)
return res;
+ nfs_inc_stats(inode, NFSIOS_VFSOPEN);
lock_kernel();
- /* Do NFSv4 open() call */
- if ((open = server->rpc_ops->file_open) != NULL)
- res = open(inode, filp);
+ res = NFS_SERVER(inode)->rpc_ops->file_open(inode, filp);
unlock_kernel();
return res;
}
@@ -124,6 +122,7 @@ nfs_file_release(struct inode *inode, struct file *filp)
/* Ensure that dirty pages are flushed out with the right creds */
if (filp->f_mode & FMODE_WRITE)
filemap_fdatawrite(filp->f_mapping);
+ nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
return NFS_PROTO(inode)->file_release(inode, filp);
}
@@ -199,6 +198,7 @@ nfs_file_flush(struct file *file)
if ((file->f_mode & FMODE_WRITE) == 0)
return 0;
+ nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
lock_kernel();
/* Ensure that data+attribute caches are up to date after close() */
status = nfs_wb_all(inode);
@@ -229,6 +229,7 @@ nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos)
(unsigned long) count, (unsigned long) pos);
result = nfs_revalidate_file(inode, iocb->ki_filp);
+ nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count);
if (!result)
result = generic_file_aio_read(iocb, buf, count, pos);
return result;
@@ -282,6 +283,7 @@ nfs_fsync(struct file *file, struct dentry *dentry, int datasync)
dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
+ nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
lock_kernel();
status = nfs_wb_all(inode);
if (!status) {
@@ -316,6 +318,17 @@ static int nfs_commit_write(struct file *file, struct page *page, unsigned offse
return status;
}
+static int nfs_invalidate_page(struct page *page, unsigned long offset)
+{
+ /* FIXME: we really should cancel any unstarted writes on this page */
+ return 1;
+}
+
+static int nfs_release_page(struct page *page, gfp_t gfp)
+{
+ return !nfs_wb_page(page->mapping->host, page);
+}
+
struct address_space_operations nfs_file_aops = {
.readpage = nfs_readpage,
.readpages = nfs_readpages,
@@ -324,6 +337,8 @@ struct address_space_operations nfs_file_aops = {
.writepages = nfs_writepages,
.prepare_write = nfs_prepare_write,
.commit_write = nfs_commit_write,
+ .invalidatepage = nfs_invalidate_page,
+ .releasepage = nfs_release_page,
#ifdef CONFIG_NFS_DIRECTIO
.direct_IO = nfs_direct_IO,
#endif
@@ -365,6 +380,7 @@ nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t
if (!count)
goto out;
+ nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
result = generic_file_aio_write(iocb, buf, count, pos);
out:
return result;
@@ -376,15 +392,17 @@ out_swapfile:
static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
{
- struct file_lock *cfl;
+ struct file_lock cfl;
struct inode *inode = filp->f_mapping->host;
int status = 0;
lock_kernel();
/* Try local locking first */
- cfl = posix_test_lock(filp, fl);
- if (cfl != NULL) {
- locks_copy_lock(fl, cfl);
+ if (posix_test_lock(filp, fl, &cfl)) {
+ fl->fl_start = cfl.fl_start;
+ fl->fl_end = cfl.fl_end;
+ fl->fl_type = cfl.fl_type;
+ fl->fl_pid = cfl.fl_pid;
goto out;
}
@@ -425,10 +443,8 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl)
static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
{
struct inode *inode = filp->f_mapping->host;
- sigset_t oldset;
int status;
- rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset);
/*
* Flush all pending writes before doing anything
* with locks..
@@ -446,17 +462,14 @@ static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
else
status = do_vfs_lock(filp, fl);
unlock_kernel();
- rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
return status;
}
static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
{
struct inode *inode = filp->f_mapping->host;
- sigset_t oldset;
int status;
- rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset);
/*
* Flush all pending writes before doing anything
* with locks..
@@ -489,7 +502,6 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
nfs_sync_mapping(filp->f_mapping);
nfs_zap_caches(inode);
out:
- rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
return status;
}
@@ -504,9 +516,7 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
inode->i_sb->s_id, inode->i_ino,
fl->fl_type, fl->fl_flags,
(long long)fl->fl_start, (long long)fl->fl_end);
-
- if (!inode)
- return -EINVAL;
+ nfs_inc_stats(inode, NFSIOS_VFSLOCK);
/* No mandatory locks over NFS */
if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID &&
@@ -531,9 +541,6 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
inode->i_sb->s_id, inode->i_ino,
fl->fl_type, fl->fl_flags);
- if (!inode)
- return -EINVAL;
-
/*
* No BSD flocks over NFS allowed.
* Note: we could try to fake a POSIX lock request here by
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 821edd30333..3fab5b0cfc5 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -35,6 +35,7 @@
*/
#include <linux/module.h>
+#include <linux/mutex.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/slab.h>
@@ -74,8 +75,8 @@ struct idmap {
struct dentry *idmap_dentry;
wait_queue_head_t idmap_wq;
struct idmap_msg idmap_im;
- struct semaphore idmap_lock; /* Serializes upcalls */
- struct semaphore idmap_im_lock; /* Protects the hashtable */
+ struct mutex idmap_lock; /* Serializes upcalls */
+ struct mutex idmap_im_lock; /* Protects the hashtable */
struct idmap_hashtable idmap_user_hash;
struct idmap_hashtable idmap_group_hash;
};
@@ -101,11 +102,9 @@ nfs_idmap_new(struct nfs4_client *clp)
if (clp->cl_idmap != NULL)
return;
- if ((idmap = kmalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
+ if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
return;
- memset(idmap, 0, sizeof(*idmap));
-
snprintf(idmap->idmap_path, sizeof(idmap->idmap_path),
"%s/idmap", clp->cl_rpcclient->cl_pathname);
@@ -116,8 +115,8 @@ nfs_idmap_new(struct nfs4_client *clp)
return;
}
- init_MUTEX(&idmap->idmap_lock);
- init_MUTEX(&idmap->idmap_im_lock);
+ mutex_init(&idmap->idmap_lock);
+ mutex_init(&idmap->idmap_im_lock);
init_waitqueue_head(&idmap->idmap_wq);
idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER;
idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP;
@@ -132,6 +131,8 @@ nfs_idmap_delete(struct nfs4_client *clp)
if (!idmap)
return;
+ dput(idmap->idmap_dentry);
+ idmap->idmap_dentry = NULL;
rpc_unlink(idmap->idmap_path);
clp->cl_idmap = NULL;
kfree(idmap);
@@ -232,8 +233,8 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
if (namelen >= IDMAP_NAMESZ)
return -EINVAL;
- down(&idmap->idmap_lock);
- down(&idmap->idmap_im_lock);
+ mutex_lock(&idmap->idmap_lock);
+ mutex_lock(&idmap->idmap_im_lock);
he = idmap_lookup_name(h, name, namelen);
if (he != NULL) {
@@ -259,11 +260,11 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
}
set_current_state(TASK_UNINTERRUPTIBLE);
- up(&idmap->idmap_im_lock);
+ mutex_unlock(&idmap->idmap_im_lock);
schedule();
current->state = TASK_RUNNING;
remove_wait_queue(&idmap->idmap_wq, &wq);
- down(&idmap->idmap_im_lock);
+ mutex_lock(&idmap->idmap_im_lock);
if (im->im_status & IDMAP_STATUS_SUCCESS) {
*id = im->im_id;
@@ -272,8 +273,8 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
out:
memset(im, 0, sizeof(*im));
- up(&idmap->idmap_im_lock);
- up(&idmap->idmap_lock);
+ mutex_unlock(&idmap->idmap_im_lock);
+ mutex_unlock(&idmap->idmap_lock);
return (ret);
}
@@ -293,8 +294,8 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
im = &idmap->idmap_im;
- down(&idmap->idmap_lock);
- down(&idmap->idmap_im_lock);
+ mutex_lock(&idmap->idmap_lock);
+ mutex_lock(&idmap->idmap_im_lock);
he = idmap_lookup_id(h, id);
if (he != 0) {
@@ -320,11 +321,11 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
}
set_current_state(TASK_UNINTERRUPTIBLE);
- up(&idmap->idmap_im_lock);
+ mutex_unlock(&idmap->idmap_im_lock);
schedule();
current->state = TASK_RUNNING;
remove_wait_queue(&idmap->idmap_wq, &wq);
- down(&idmap->idmap_im_lock);
+ mutex_lock(&idmap->idmap_im_lock);
if (im->im_status & IDMAP_STATUS_SUCCESS) {
if ((len = strnlen(im->im_name, IDMAP_NAMESZ)) == 0)
@@ -335,8 +336,8 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
out:
memset(im, 0, sizeof(*im));
- up(&idmap->idmap_im_lock);
- up(&idmap->idmap_lock);
+ mutex_unlock(&idmap->idmap_im_lock);
+ mutex_unlock(&idmap->idmap_lock);
return ret;
}
@@ -380,7 +381,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
if (copy_from_user(&im_in, src, mlen) != 0)
return (-EFAULT);
- down(&idmap->idmap_im_lock);
+ mutex_lock(&idmap->idmap_im_lock);
ret = mlen;
im->im_status = im_in.im_status;
@@ -440,7 +441,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
idmap_update_entry(he, im_in.im_name, namelen_in, im_in.im_id);
ret = mlen;
out:
- up(&idmap->idmap_im_lock);
+ mutex_unlock(&idmap->idmap_im_lock);
return ret;
}
@@ -452,10 +453,10 @@ idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg)
if (msg->errno >= 0)
return;
- down(&idmap->idmap_im_lock);
+ mutex_lock(&idmap->idmap_im_lock);
im->im_status = IDMAP_STATUS_LOOKUPFAIL;
wake_up(&idmap->idmap_wq);
- up(&idmap->idmap_im_lock);
+ mutex_unlock(&idmap->idmap_im_lock);
}
/*
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 3413996f9a8..2f7656b911b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -26,6 +26,7 @@
#include <linux/unistd.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/stats.h>
+#include <linux/sunrpc/metrics.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_mount.h>
#include <linux/nfs4_mount.h>
@@ -42,6 +43,7 @@
#include "nfs4_fs.h"
#include "callback.h"
#include "delegation.h"
+#include "iostat.h"
#define NFSDBG_FACILITY NFSDBG_VFS
#define NFS_PARANOIA 1
@@ -65,6 +67,7 @@ static void nfs_clear_inode(struct inode *);
static void nfs_umount_begin(struct super_block *);
static int nfs_statfs(struct super_block *, struct kstatfs *);
static int nfs_show_options(struct seq_file *, struct vfsmount *);
+static int nfs_show_stats(struct seq_file *, struct vfsmount *);
static void nfs_zap_acl_cache(struct inode *);
static struct rpc_program nfs_program;
@@ -78,6 +81,7 @@ static struct super_operations nfs_sops = {
.clear_inode = nfs_clear_inode,
.umount_begin = nfs_umount_begin,
.show_options = nfs_show_options,
+ .show_stats = nfs_show_stats,
};
/*
@@ -133,7 +137,7 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
static int
nfs_write_inode(struct inode *inode, int sync)
{
- int flags = sync ? FLUSH_WAIT : 0;
+ int flags = sync ? FLUSH_SYNC : 0;
int ret;
ret = nfs_commit_inode(inode, flags);
@@ -237,7 +241,6 @@ static struct inode *
nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo)
{
struct nfs_server *server = NFS_SB(sb);
- struct inode *rooti;
int error;
error = server->rpc_ops->getroot(server, rootfh, fsinfo);
@@ -246,10 +249,7 @@ nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *f
return ERR_PTR(error);
}
- rooti = nfs_fhget(sb, rootfh, fsinfo->fattr);
- if (!rooti)
- return ERR_PTR(-ENOMEM);
- return rooti;
+ return nfs_fhget(sb, rootfh, fsinfo->fattr);
}
/*
@@ -277,6 +277,10 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
sb->s_magic = NFS_SUPER_MAGIC;
+ server->io_stats = nfs_alloc_iostats();
+ if (server->io_stats == NULL)
+ return -ENOMEM;
+
root_inode = nfs_get_root(sb, &server->fh, &fsinfo);
/* Did getting the root inode fail? */
if (IS_ERR(root_inode)) {
@@ -290,6 +294,9 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
}
sb->s_root->d_op = server->rpc_ops->dentry_ops;
+ /* mount time stamp, in seconds */
+ server->mount_time = jiffies;
+
/* Get some general file system info */
if (server->namelen == 0 &&
server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0)
@@ -396,6 +403,9 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans);
+ server->retrans_timeo = timeparms.to_initval;
+ server->retrans_count = timeparms.to_retries;
+
/* create transport and client */
xprt = xprt_create_proto(proto, &server->addr, &timeparms);
if (IS_ERR(xprt)) {
@@ -579,7 +589,7 @@ nfs_statfs(struct super_block *sb, struct kstatfs *buf)
}
-static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
+static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults)
{
static struct proc_nfs_info {
int flag;
@@ -588,28 +598,26 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
} nfs_info[] = {
{ NFS_MOUNT_SOFT, ",soft", ",hard" },
{ NFS_MOUNT_INTR, ",intr", "" },
- { NFS_MOUNT_POSIX, ",posix", "" },
{ NFS_MOUNT_NOCTO, ",nocto", "" },
{ NFS_MOUNT_NOAC, ",noac", "" },
- { NFS_MOUNT_NONLM, ",nolock", ",lock" },
+ { NFS_MOUNT_NONLM, ",nolock", "" },
{ NFS_MOUNT_NOACL, ",noacl", "" },
{ 0, NULL, NULL }
};
struct proc_nfs_info *nfs_infop;
- struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
char buf[12];
char *proto;
- seq_printf(m, ",v%d", nfss->rpc_ops->version);
+ seq_printf(m, ",vers=%d", nfss->rpc_ops->version);
seq_printf(m, ",rsize=%d", nfss->rsize);
seq_printf(m, ",wsize=%d", nfss->wsize);
- if (nfss->acregmin != 3*HZ)
+ if (nfss->acregmin != 3*HZ || showdefaults)
seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ);
- if (nfss->acregmax != 60*HZ)
+ if (nfss->acregmax != 60*HZ || showdefaults)
seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ);
- if (nfss->acdirmin != 30*HZ)
+ if (nfss->acdirmin != 30*HZ || showdefaults)
seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ);
- if (nfss->acdirmax != 60*HZ)
+ if (nfss->acdirmax != 60*HZ || showdefaults)
seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ);
for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
if (nfss->flags & nfs_infop->flag)
@@ -629,8 +637,96 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
proto = buf;
}
seq_printf(m, ",proto=%s", proto);
+ seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ);
+ seq_printf(m, ",retrans=%u", nfss->retrans_count);
+}
+
+static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
+{
+ struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
+
+ nfs_show_mount_options(m, nfss, 0);
+
seq_puts(m, ",addr=");
seq_escape(m, nfss->hostname, " \t\n\\");
+
+ return 0;
+}
+
+static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
+{
+ int i, cpu;
+ struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
+ struct rpc_auth *auth = nfss->client->cl_auth;
+ struct nfs_iostats totals = { };
+
+ seq_printf(m, "statvers=%s", NFS_IOSTAT_VERS);
+
+ /*
+ * Display all mount option settings
+ */
+ seq_printf(m, "\n\topts:\t");
+ seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? "ro" : "rw");
+ seq_puts(m, mnt->mnt_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : "");
+ seq_puts(m, mnt->mnt_sb->s_flags & MS_NOATIME ? ",noatime" : "");
+ seq_puts(m, mnt->mnt_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : "");
+ nfs_show_mount_options(m, nfss, 1);
+
+ seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ);
+
+ seq_printf(m, "\n\tcaps:\t");
+ seq_printf(m, "caps=0x%x", nfss->caps);
+ seq_printf(m, ",wtmult=%d", nfss->wtmult);
+ seq_printf(m, ",dtsize=%d", nfss->dtsize);
+ seq_printf(m, ",bsize=%d", nfss->bsize);
+ seq_printf(m, ",namelen=%d", nfss->namelen);
+
+#ifdef CONFIG_NFS_V4
+ if (nfss->rpc_ops->version == 4) {
+ seq_printf(m, "\n\tnfsv4:\t");
+ seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]);
+ seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]);
+ seq_printf(m, ",acl=0x%x", nfss->acl_bitmask);
+ }
+#endif
+
+ /*
+ * Display security flavor in effect for this mount
+ */
+ seq_printf(m, "\n\tsec:\tflavor=%d", auth->au_ops->au_flavor);
+ if (auth->au_flavor)
+ seq_printf(m, ",pseudoflavor=%d", auth->au_flavor);
+
+ /*
+ * Display superblock I/O counters
+ */
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ struct nfs_iostats *stats;
+
+ if (!cpu_possible(cpu))
+ continue;
+
+ preempt_disable();
+ stats = per_cpu_ptr(nfss->io_stats, cpu);
+
+ for (i = 0; i < __NFSIOS_COUNTSMAX; i++)
+ totals.events[i] += stats->events[i];
+ for (i = 0; i < __NFSIOS_BYTESMAX; i++)
+ totals.bytes[i] += stats->bytes[i];
+
+ preempt_enable();
+ }
+
+ seq_printf(m, "\n\tevents:\t");
+ for (i = 0; i < __NFSIOS_COUNTSMAX; i++)
+ seq_printf(m, "%lu ", totals.events[i]);
+ seq_printf(m, "\n\tbytes:\t");
+ for (i = 0; i < __NFSIOS_BYTESMAX; i++)
+ seq_printf(m, "%Lu ", totals.bytes[i]);
+ seq_printf(m, "\n");
+
+ rpc_print_iostats(m, nfss->client);
+
return 0;
}
@@ -660,6 +756,8 @@ static void nfs_zap_caches_locked(struct inode *inode)
struct nfs_inode *nfsi = NFS_I(inode);
int mode = inode->i_mode;
+ nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
+
NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
NFS_ATTRTIMEO_UPDATE(inode) = jiffies;
@@ -751,7 +849,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
.fh = fh,
.fattr = fattr
};
- struct inode *inode = NULL;
+ struct inode *inode = ERR_PTR(-ENOENT);
unsigned long hash;
if ((fattr->valid & NFS_ATTR_FATTR) == 0)
@@ -764,8 +862,11 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
hash = nfs_fattr_to_ino_t(fattr);
- if (!(inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc)))
+ inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc);
+ if (inode == NULL) {
+ inode = ERR_PTR(-ENOMEM);
goto out_no_inode;
+ }
if (inode->i_state & I_NEW) {
struct nfs_inode *nfsi = NFS_I(inode);
@@ -834,7 +935,7 @@ out:
return inode;
out_no_inode:
- printk("nfs_fhget: iget failed\n");
+ dprintk("nfs_fhget: iget failed with error %ld\n", PTR_ERR(inode));
goto out;
}
@@ -847,6 +948,8 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
struct nfs_fattr fattr;
int error;
+ nfs_inc_stats(inode, NFSIOS_VFSSETATTR);
+
if (attr->ia_valid & ATTR_SIZE) {
if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode))
attr->ia_valid &= ~ATTR_SIZE;
@@ -859,11 +962,9 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
lock_kernel();
nfs_begin_data_update(inode);
- /* Write all dirty data if we're changing file permissions or size */
- if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE)) != 0) {
- filemap_write_and_wait(inode->i_mapping);
- nfs_wb_all(inode);
- }
+ /* Write all dirty data */
+ filemap_write_and_wait(inode->i_mapping);
+ nfs_wb_all(inode);
/*
* Return any delegations if we're going to change ACLs
*/
@@ -902,6 +1003,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
spin_unlock(&inode->i_lock);
}
if ((attr->ia_valid & ATTR_SIZE) != 0) {
+ nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
inode->i_size = attr->ia_size;
vmtruncate(inode, attr->ia_size);
}
@@ -949,7 +1051,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
int err;
/* Flush out writes to the server in order to update c/mtime */
- nfs_sync_inode(inode, 0, 0, FLUSH_WAIT|FLUSH_NOCOMMIT);
+ nfs_sync_inode_wait(inode, 0, 0, FLUSH_NOCOMMIT);
/*
* We may force a getattr if the user cares about atime.
@@ -973,7 +1075,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
return err;
}
-struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred)
+static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, struct dentry *dentry, struct rpc_cred *cred)
{
struct nfs_open_context *ctx;
@@ -981,6 +1083,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rp
if (ctx != NULL) {
atomic_set(&ctx->count, 1);
ctx->dentry = dget(dentry);
+ ctx->vfsmnt = mntget(mnt);
ctx->cred = get_rpccred(cred);
ctx->state = NULL;
ctx->lockowner = current->files;
@@ -1011,6 +1114,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx)
if (ctx->cred != NULL)
put_rpccred(ctx->cred);
dput(ctx->dentry);
+ mntput(ctx->vfsmnt);
kfree(ctx);
}
}
@@ -1019,7 +1123,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx)
* Ensure that mmap has a recent RPC credential for use when writing out
* shared pages
*/
-void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
+static void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
{
struct inode *inode = filp->f_dentry->d_inode;
struct nfs_inode *nfsi = NFS_I(inode);
@@ -1051,7 +1155,7 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c
return ctx;
}
-void nfs_file_clear_open_context(struct file *filp)
+static void nfs_file_clear_open_context(struct file *filp)
{
struct inode *inode = filp->f_dentry->d_inode;
struct nfs_open_context *ctx = (struct nfs_open_context *)filp->private_data;
@@ -1076,7 +1180,7 @@ int nfs_open(struct inode *inode, struct file *filp)
cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
if (IS_ERR(cred))
return PTR_ERR(cred);
- ctx = alloc_nfs_open_context(filp->f_dentry, cred);
+ ctx = alloc_nfs_open_context(filp->f_vfsmnt, filp->f_dentry, cred);
put_rpccred(cred);
if (ctx == NULL)
return -ENOMEM;
@@ -1185,6 +1289,7 @@ int nfs_attribute_timeout(struct inode *inode)
*/
int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
{
+ nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE);
if (!(NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))
&& !nfs_attribute_timeout(inode))
return NFS_STALE(inode) ? -ESTALE : 0;
@@ -1201,6 +1306,7 @@ void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
struct nfs_inode *nfsi = NFS_I(inode);
if (nfsi->cache_validity & NFS_INO_INVALID_DATA) {
+ nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
if (S_ISREG(inode->i_mode))
nfs_sync_mapping(mapping);
invalidate_inode_pages2(mapping);
@@ -1299,39 +1405,37 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
if ((fattr->valid & NFS_ATTR_FATTR) == 0)
return 0;
+ /* Has the inode gone and changed behind our back? */
+ if (nfsi->fileid != fattr->fileid
+ || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
+ return -EIO;
+ }
+
/* Are we in the process of updating data on the server? */
data_unstable = nfs_caches_unstable(inode);
/* Do atomic weak cache consistency updates */
nfs_wcc_update_inode(inode, fattr);
- if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
- nfsi->change_attr != fattr->change_attr) {
+ if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0) {
+ if (nfsi->change_attr == fattr->change_attr)
+ goto out;
nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
if (!data_unstable)
nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
}
- /* Has the inode gone and changed behind our back? */
- if (nfsi->fileid != fattr->fileid
- || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
- return -EIO;
- }
-
- cur_size = i_size_read(inode);
- new_isize = nfs_size_to_loff_t(fattr->size);
-
/* Verify a few of the more important attributes */
if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
if (!data_unstable)
nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
}
- if (cur_size != new_isize) {
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
- if (nfsi->npages == 0)
- nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
- }
+
+ cur_size = i_size_read(inode);
+ new_isize = nfs_size_to_loff_t(fattr->size);
+ if (cur_size != new_isize && nfsi->npages == 0)
+ nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
/* Have any file permissions changed? */
if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
@@ -1343,6 +1447,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
if (inode->i_nlink != fattr->nlink)
nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+out:
if (!timespec_equal(&inode->i_atime, &fattr->atime))
nfsi->cache_validity |= NFS_INO_INVALID_ATIME;
@@ -1481,15 +1586,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
nfsi->cache_change_attribute = jiffies;
}
- if ((fattr->valid & NFS_ATTR_FATTR_V4)
- && nfsi->change_attr != fattr->change_attr) {
- dprintk("NFS: change_attr change on server for file %s/%ld\n",
- inode->i_sb->s_id, inode->i_ino);
- nfsi->change_attr = fattr->change_attr;
- invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
- nfsi->cache_change_attribute = jiffies;
- }
-
/* If ctime has changed we should definitely clear access+acl caches */
if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) {
invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
@@ -1519,8 +1615,20 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
inode->i_blksize = fattr->du.nfs2.blocksize;
}
+ if ((fattr->valid & NFS_ATTR_FATTR_V4)) {
+ if (nfsi->change_attr != fattr->change_attr) {
+ dprintk("NFS: change_attr change on server for file %s/%ld\n",
+ inode->i_sb->s_id, inode->i_ino);
+ nfsi->change_attr = fattr->change_attr;
+ invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+ nfsi->cache_change_attribute = jiffies;
+ } else
+ invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA);
+ }
+
/* Update attrtimeo value if we're out of the unstable period */
if (invalid & NFS_INO_INVALID_ATTR) {
+ nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
nfsi->attrtimeo_timestamp = jiffies;
} else if (time_after(jiffies, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) {
@@ -1637,10 +1745,9 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type,
#endif /* CONFIG_NFS_V3 */
s = ERR_PTR(-ENOMEM);
- server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
+ server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
if (!server)
goto out_err;
- memset(server, 0, sizeof(struct nfs_server));
/* Zero out the NFS state stuff */
init_nfsv4_state(server);
server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
@@ -1712,6 +1819,7 @@ static void nfs_kill_super(struct super_block *s)
rpciod_down(); /* release rpciod */
+ nfs_free_iostats(server->io_stats);
kfree(server->hostname);
kfree(server);
}
@@ -1738,6 +1846,7 @@ static struct super_operations nfs4_sops = {
.clear_inode = nfs4_clear_inode,
.umount_begin = nfs_umount_begin,
.show_options = nfs_show_options,
+ .show_stats = nfs_show_stats,
};
/*
@@ -1800,6 +1909,9 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans);
+ server->retrans_timeo = timeparms.to_initval;
+ server->retrans_count = timeparms.to_retries;
+
clp = nfs4_get_client(&server->addr.sin_addr);
if (!clp) {
dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__);
@@ -1941,10 +2053,9 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
return ERR_PTR(-EINVAL);
}
- server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
+ server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
if (!server)
return ERR_PTR(-ENOMEM);
- memset(server, 0, sizeof(struct nfs_server));
/* Zero out the NFS state stuff */
init_nfsv4_state(server);
server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
@@ -2024,10 +2135,12 @@ static void nfs4_kill_super(struct super_block *sb)
if (server->client != NULL && !IS_ERR(server->client))
rpc_shutdown_client(server->client);
- rpciod_down(); /* release rpciod */
destroy_nfsv4_state(server);
+ rpciod_down();
+
+ nfs_free_iostats(server->io_stats);
kfree(server->hostname);
kfree(server);
}
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h
new file mode 100644
index 00000000000..6350ecbde58
--- /dev/null
+++ b/fs/nfs/iostat.h
@@ -0,0 +1,164 @@
+/*
+ * linux/fs/nfs/iostat.h
+ *
+ * Declarations for NFS client per-mount statistics
+ *
+ * Copyright (C) 2005, 2006 Chuck Lever <cel@netapp.com>
+ *
+ * NFS client per-mount statistics provide information about the health of
+ * the NFS client and the health of each NFS mount point. Generally these
+ * are not for detailed problem diagnosis, but simply to indicate that there
+ * is a problem.
+ *
+ * These counters are not meant to be human-readable, but are meant to be
+ * integrated into system monitoring tools such as "sar" and "iostat". As
+ * such, the counters are sampled by the tools over time, and are never
+ * zeroed after a file system is mounted. Moving averages can be computed
+ * by the tools by taking the difference between two instantaneous samples
+ * and dividing that by the time between the samples.
+ */
+
+#ifndef _NFS_IOSTAT
+#define _NFS_IOSTAT
+
+#define NFS_IOSTAT_VERS "1.0"
+
+/*
+ * NFS byte counters
+ *
+ * 1. SERVER - the number of payload bytes read from or written to the
+ * server by the NFS client via an NFS READ or WRITE request.
+ *
+ * 2. NORMAL - the number of bytes read or written by applications via
+ * the read(2) and write(2) system call interfaces.
+ *
+ * 3. DIRECT - the number of bytes read or written from files opened
+ * with the O_DIRECT flag.
+ *
+ * These counters give a view of the data throughput into and out of the NFS
+ * client. Comparing the number of bytes requested by an application with the
+ * number of bytes the client requests from the server can provide an
+ * indication of client efficiency (per-op, cache hits, etc).
+ *
+ * These counters can also help characterize which access methods are in
+ * use. DIRECT by itself shows whether there is any O_DIRECT traffic.
+ * NORMAL + DIRECT shows how much data is going through the system call
+ * interface. A large amount of SERVER traffic without much NORMAL or
+ * DIRECT traffic shows that applications are using mapped files.
+ *
+ * NFS page counters
+ *
+ * These count the number of pages read or written via nfs_readpage(),
+ * nfs_readpages(), or their write equivalents.
+ */
+enum nfs_stat_bytecounters {
+ NFSIOS_NORMALREADBYTES = 0,
+ NFSIOS_NORMALWRITTENBYTES,
+ NFSIOS_DIRECTREADBYTES,
+ NFSIOS_DIRECTWRITTENBYTES,
+ NFSIOS_SERVERREADBYTES,
+ NFSIOS_SERVERWRITTENBYTES,
+ NFSIOS_READPAGES,
+ NFSIOS_WRITEPAGES,
+ __NFSIOS_BYTESMAX,
+};
+
+/*
+ * NFS event counters
+ *
+ * These counters provide a low-overhead way of monitoring client activity
+ * without enabling NFS trace debugging. The counters show the rate at
+ * which VFS requests are made, and how often the client invalidates its
+ * data and attribute caches. This allows system administrators to monitor
+ * such things as how close-to-open is working, and answer questions such
+ * as "why are there so many GETATTR requests on the wire?"
+ *
+ * They also count anamolous events such as short reads and writes, silly
+ * renames due to close-after-delete, and operations that change the size
+ * of a file (such operations can often be the source of data corruption
+ * if applications aren't using file locking properly).
+ */
+enum nfs_stat_eventcounters {
+ NFSIOS_INODEREVALIDATE = 0,
+ NFSIOS_DENTRYREVALIDATE,
+ NFSIOS_DATAINVALIDATE,
+ NFSIOS_ATTRINVALIDATE,
+ NFSIOS_VFSOPEN,
+ NFSIOS_VFSLOOKUP,
+ NFSIOS_VFSACCESS,
+ NFSIOS_VFSUPDATEPAGE,
+ NFSIOS_VFSREADPAGE,
+ NFSIOS_VFSREADPAGES,
+ NFSIOS_VFSWRITEPAGE,
+ NFSIOS_VFSWRITEPAGES,
+ NFSIOS_VFSGETDENTS,
+ NFSIOS_VFSSETATTR,
+ NFSIOS_VFSFLUSH,
+ NFSIOS_VFSFSYNC,
+ NFSIOS_VFSLOCK,
+ NFSIOS_VFSRELEASE,
+ NFSIOS_CONGESTIONWAIT,
+ NFSIOS_SETATTRTRUNC,
+ NFSIOS_EXTENDWRITE,
+ NFSIOS_SILLYRENAME,
+ NFSIOS_SHORTREAD,
+ NFSIOS_SHORTWRITE,
+ NFSIOS_DELAY,
+ __NFSIOS_COUNTSMAX,
+};
+
+#ifdef __KERNEL__
+
+#include <linux/percpu.h>
+#include <linux/cache.h>
+
+struct nfs_iostats {
+ unsigned long long bytes[__NFSIOS_BYTESMAX];
+ unsigned long events[__NFSIOS_COUNTSMAX];
+} ____cacheline_aligned;
+
+static inline void nfs_inc_server_stats(struct nfs_server *server, enum nfs_stat_eventcounters stat)
+{
+ struct nfs_iostats *iostats;
+ int cpu;
+
+ cpu = get_cpu();
+ iostats = per_cpu_ptr(server->io_stats, cpu);
+ iostats->events[stat] ++;
+ put_cpu_no_resched();
+}
+
+static inline void nfs_inc_stats(struct inode *inode, enum nfs_stat_eventcounters stat)
+{
+ nfs_inc_server_stats(NFS_SERVER(inode), stat);
+}
+
+static inline void nfs_add_server_stats(struct nfs_server *server, enum nfs_stat_bytecounters stat, unsigned long addend)
+{
+ struct nfs_iostats *iostats;
+ int cpu;
+
+ cpu = get_cpu();
+ iostats = per_cpu_ptr(server->io_stats, cpu);
+ iostats->bytes[stat] += addend;
+ put_cpu_no_resched();
+}
+
+static inline void nfs_add_stats(struct inode *inode, enum nfs_stat_bytecounters stat, unsigned long addend)
+{
+ nfs_add_server_stats(NFS_SERVER(inode), stat, addend);
+}
+
+static inline struct nfs_iostats *nfs_alloc_iostats(void)
+{
+ return alloc_percpu(struct nfs_iostats);
+}
+
+static inline void nfs_free_iostats(struct nfs_iostats *stats)
+{
+ if (stats != NULL)
+ free_percpu(stats);
+}
+
+#endif
+#endif
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 0b9a78353d6..445abb4d421 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -49,9 +49,12 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
struct mnt_fhstatus result = {
.fh = fh
};
+ struct rpc_message msg = {
+ .rpc_argp = path,
+ .rpc_resp = &result,
+ };
char hostname[32];
int status;
- int call;
dprintk("NFS: nfs_mount(%08x:%s)\n",
(unsigned)ntohl(addr->sin_addr.s_addr), path);
@@ -61,8 +64,12 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
if (IS_ERR(mnt_clnt))
return PTR_ERR(mnt_clnt);
- call = (version == NFS_MNT3_VERSION) ? MOUNTPROC3_MNT : MNTPROC_MNT;
- status = rpc_call(mnt_clnt, call, path, &result, 0);
+ if (version == NFS_MNT3_VERSION)
+ msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT];
+ else
+ msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT];
+
+ status = rpc_call_sync(mnt_clnt, &msg, 0);
return status < 0? status : (result.status? -EACCES : 0);
}
@@ -137,6 +144,8 @@ static struct rpc_procinfo mnt_procedures[] = {
.p_encode = (kxdrproc_t) xdr_encode_dirpath,
.p_decode = (kxdrproc_t) xdr_decode_fhstatus,
.p_bufsiz = MNT_dirpath_sz << 2,
+ .p_statidx = MNTPROC_MNT,
+ .p_name = "MOUNT",
},
};
@@ -146,6 +155,8 @@ static struct rpc_procinfo mnt3_procedures[] = {
.p_encode = (kxdrproc_t) xdr_encode_dirpath,
.p_decode = (kxdrproc_t) xdr_decode_fhstatus3,
.p_bufsiz = MNT_dirpath_sz << 2,
+ .p_statidx = MOUNTPROC3_MNT,
+ .p_name = "MOUNT",
},
};
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 6548a65de94..f0015fa876e 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -682,7 +682,9 @@ nfs_stat_to_errno(int stat)
.p_encode = (kxdrproc_t) nfs_xdr_##argtype, \
.p_decode = (kxdrproc_t) nfs_xdr_##restype, \
.p_bufsiz = MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2, \
- .p_timer = timer \
+ .p_timer = timer, \
+ .p_statidx = NFSPROC_##proc, \
+ .p_name = #proc, \
}
struct rpc_procinfo nfs_procedures[] = {
PROC(GETATTR, fhandle, attrstat, 1),
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 6a5bbc0ae94..33287879bd2 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -190,6 +190,10 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
struct nfs3_getaclres res = {
.fattr = &fattr,
};
+ struct rpc_message msg = {
+ .rpc_argp = &args,
+ .rpc_resp = &res,
+ };
struct posix_acl *acl;
int status, count;
@@ -218,8 +222,8 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
return NULL;
dprintk("NFS call getacl\n");
- status = rpc_call(server->client_acl, ACLPROC3_GETACL,
- &args, &res, 0);
+ msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL];
+ status = rpc_call_sync(server->client_acl, &msg, 0);
dprintk("NFS reply getacl: %d\n", status);
/* pages may have been allocated at the xdr layer. */
@@ -286,6 +290,10 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
.acl_access = acl,
.pages = pages,
};
+ struct rpc_message msg = {
+ .rpc_argp = &args,
+ .rpc_resp = &fattr,
+ };
int status, count;
status = -EOPNOTSUPP;
@@ -306,8 +314,8 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
dprintk("NFS call setacl\n");
nfs_begin_data_update(inode);
- status = rpc_call(server->client_acl, ACLPROC3_SETACL,
- &args, &fattr, 0);
+ msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL];
+ status = rpc_call_sync(server->client_acl, &msg, 0);
spin_lock(&inode->i_lock);
NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS;
spin_unlock(&inode->i_lock);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index ed67567f055..cf186f0d2b3 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -19,6 +19,8 @@
#include <linux/smp_lock.h>
#include <linux/nfs_mount.h>
+#include "iostat.h"
+
#define NFSDBG_FACILITY NFSDBG_PROC
extern struct rpc_procinfo nfs3_procedures[];
@@ -41,27 +43,14 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
return res;
}
-static inline int
-nfs3_rpc_call_wrapper(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags)
-{
- struct rpc_message msg = {
- .rpc_proc = &clnt->cl_procinfo[proc],
- .rpc_argp = argp,
- .rpc_resp = resp,
- };
- return nfs3_rpc_wrapper(clnt, &msg, flags);
-}
-
-#define rpc_call(clnt, proc, argp, resp, flags) \
- nfs3_rpc_call_wrapper(clnt, proc, argp, resp, flags)
-#define rpc_call_sync(clnt, msg, flags) \
- nfs3_rpc_wrapper(clnt, msg, flags)
+#define rpc_call_sync(clnt, msg, flags) nfs3_rpc_wrapper(clnt, msg, flags)
static int
-nfs3_async_handle_jukebox(struct rpc_task *task)
+nfs3_async_handle_jukebox(struct rpc_task *task, struct inode *inode)
{
if (task->tk_status != -EJUKEBOX)
return 0;
+ nfs_inc_stats(inode, NFSIOS_DELAY);
task->tk_status = 0;
rpc_restart_call(task);
rpc_delay(task, NFS_JUKEBOX_RETRY_TIME);
@@ -72,14 +61,21 @@ static int
do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle,
struct nfs_fsinfo *info)
{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_FSINFO],
+ .rpc_argp = fhandle,
+ .rpc_resp = info,
+ };
int status;
dprintk("%s: call fsinfo\n", __FUNCTION__);
nfs_fattr_init(info->fattr);
- status = rpc_call(client, NFS3PROC_FSINFO, fhandle, info, 0);
+ status = rpc_call_sync(client, &msg, 0);
dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status);
if (!(info->fattr->valid & NFS_ATTR_FATTR)) {
- status = rpc_call(client, NFS3PROC_GETATTR, fhandle, info->fattr, 0);
+ msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
+ msg.rpc_resp = info->fattr;
+ status = rpc_call_sync(client, &msg, 0);
dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
}
return status;
@@ -107,12 +103,16 @@ static int
nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fattr *fattr)
{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR],
+ .rpc_argp = fhandle,
+ .rpc_resp = fattr,
+ };
int status;
dprintk("NFS call getattr\n");
nfs_fattr_init(fattr);
- status = rpc_call(server->client, NFS3PROC_GETATTR,
- fhandle, fattr, 0);
+ status = rpc_call_sync(server->client, &msg, 0);
dprintk("NFS reply getattr: %d\n", status);
return status;
}
@@ -126,11 +126,16 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
.fh = NFS_FH(inode),
.sattr = sattr,
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_SETATTR],
+ .rpc_argp = &arg,
+ .rpc_resp = fattr,
+ };
int status;
dprintk("NFS call setattr\n");
nfs_fattr_init(fattr);
- status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0);
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
if (status == 0)
nfs_setattr_update_inode(inode, sattr);
dprintk("NFS reply setattr: %d\n", status);
@@ -152,15 +157,23 @@ nfs3_proc_lookup(struct inode *dir, struct qstr *name,
.fh = fhandle,
.fattr = fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_LOOKUP],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int status;
dprintk("NFS call lookup %s\n", name->name);
nfs_fattr_init(&dir_attr);
nfs_fattr_init(fattr);
- status = rpc_call(NFS_CLIENT(dir), NFS3PROC_LOOKUP, &arg, &res, 0);
- if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR))
- status = rpc_call(NFS_CLIENT(dir), NFS3PROC_GETATTR,
- fhandle, fattr, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) {
+ msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
+ msg.rpc_argp = fhandle;
+ msg.rpc_resp = fattr;
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ }
dprintk("NFS reply lookup: %d\n", status);
if (status >= 0)
status = nfs_refresh_inode(dir, &dir_attr);
@@ -180,7 +193,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
.rpc_proc = &nfs3_procedures[NFS3PROC_ACCESS],
.rpc_argp = &arg,
.rpc_resp = &res,
- .rpc_cred = entry->cred
+ .rpc_cred = entry->cred,
};
int mode = entry->mask;
int status;
@@ -226,12 +239,16 @@ static int nfs3_proc_readlink(struct inode *inode, struct page *page,
.pglen = pglen,
.pages = &page
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_READLINK],
+ .rpc_argp = &args,
+ .rpc_resp = &fattr,
+ };
int status;
dprintk("NFS call readlink\n");
nfs_fattr_init(&fattr);
- status = rpc_call(NFS_CLIENT(inode), NFS3PROC_READLINK,
- &args, &fattr, 0);
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
nfs_refresh_inode(inode, &fattr);
dprintk("NFS reply readlink: %d\n", status);
return status;
@@ -327,6 +344,11 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
.fh = &fhandle,
.fattr = &fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_CREATE],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
mode_t mode = sattr->ia_mode;
int status;
@@ -343,8 +365,8 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
again:
nfs_fattr_init(&dir_attr);
nfs_fattr_init(&fattr);
- status = rpc_call(NFS_CLIENT(dir), NFS3PROC_CREATE, &arg, &res, 0);
- nfs_post_op_update_inode(dir, &dir_attr);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ nfs_refresh_inode(dir, &dir_attr);
/* If the server doesn't support the exclusive creation semantics,
* try again with simple 'guarded' mode. */
@@ -447,7 +469,7 @@ nfs3_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
struct rpc_message *msg = &task->tk_msg;
struct nfs_fattr *dir_attr;
- if (nfs3_async_handle_jukebox(task))
+ if (nfs3_async_handle_jukebox(task, dir->d_inode))
return 1;
if (msg->rpc_argp) {
dir_attr = (struct nfs_fattr*)msg->rpc_resp;
@@ -474,12 +496,17 @@ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
.fromattr = &old_dir_attr,
.toattr = &new_dir_attr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_RENAME],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int status;
dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name);
nfs_fattr_init(&old_dir_attr);
nfs_fattr_init(&new_dir_attr);
- status = rpc_call(NFS_CLIENT(old_dir), NFS3PROC_RENAME, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
nfs_post_op_update_inode(old_dir, &old_dir_attr);
nfs_post_op_update_inode(new_dir, &new_dir_attr);
dprintk("NFS reply rename: %d\n", status);
@@ -500,12 +527,17 @@ nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
.dir_attr = &dir_attr,
.fattr = &fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_LINK],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int status;
dprintk("NFS call link %s\n", name->name);
nfs_fattr_init(&dir_attr);
nfs_fattr_init(&fattr);
- status = rpc_call(NFS_CLIENT(inode), NFS3PROC_LINK, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
nfs_post_op_update_inode(dir, &dir_attr);
nfs_post_op_update_inode(inode, &fattr);
dprintk("NFS reply link: %d\n", status);
@@ -531,6 +563,11 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
.fh = fhandle,
.fattr = fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_SYMLINK],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int status;
if (path->len > NFS3_MAXPATHLEN)
@@ -538,7 +575,7 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
dprintk("NFS call symlink %s -> %s\n", name->name, path->name);
nfs_fattr_init(&dir_attr);
nfs_fattr_init(fattr);
- status = rpc_call(NFS_CLIENT(dir), NFS3PROC_SYMLINK, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_post_op_update_inode(dir, &dir_attr);
dprintk("NFS reply symlink: %d\n", status);
return status;
@@ -560,6 +597,11 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
.fh = &fhandle,
.fattr = &fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_MKDIR],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int mode = sattr->ia_mode;
int status;
@@ -569,7 +611,7 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
nfs_fattr_init(&dir_attr);
nfs_fattr_init(&fattr);
- status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_post_op_update_inode(dir, &dir_attr);
if (status != 0)
goto out;
@@ -591,11 +633,16 @@ nfs3_proc_rmdir(struct inode *dir, struct qstr *name)
.name = name->name,
.len = name->len
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_RMDIR],
+ .rpc_argp = &arg,
+ .rpc_resp = &dir_attr,
+ };
int status;
dprintk("NFS call rmdir %s\n", name->name);
nfs_fattr_init(&dir_attr);
- status = rpc_call(NFS_CLIENT(dir), NFS3PROC_RMDIR, &arg, &dir_attr, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_post_op_update_inode(dir, &dir_attr);
dprintk("NFS reply rmdir: %d\n", status);
return status;
@@ -672,6 +719,11 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
.fh = &fh,
.fattr = &fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_MKNOD],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
mode_t mode = sattr->ia_mode;
int status;
@@ -690,7 +742,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
nfs_fattr_init(&dir_attr);
nfs_fattr_init(&fattr);
- status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_post_op_update_inode(dir, &dir_attr);
if (status != 0)
goto out;
@@ -707,11 +759,16 @@ static int
nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsstat *stat)
{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_FSSTAT],
+ .rpc_argp = fhandle,
+ .rpc_resp = stat,
+ };
int status;
dprintk("NFS call fsstat\n");
nfs_fattr_init(stat->fattr);
- status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, stat, 0);
+ status = rpc_call_sync(server->client, &msg, 0);
dprintk("NFS reply statfs: %d\n", status);
return status;
}
@@ -720,11 +777,16 @@ static int
nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsinfo *info)
{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_FSINFO],
+ .rpc_argp = fhandle,
+ .rpc_resp = info,
+ };
int status;
dprintk("NFS call fsinfo\n");
nfs_fattr_init(info->fattr);
- status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0);
+ status = rpc_call_sync(server->client_sys, &msg, 0);
dprintk("NFS reply fsinfo: %d\n", status);
return status;
}
@@ -733,40 +795,34 @@ static int
nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_pathconf *info)
{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_PATHCONF],
+ .rpc_argp = fhandle,
+ .rpc_resp = info,
+ };
int status;
dprintk("NFS call pathconf\n");
nfs_fattr_init(info->fattr);
- status = rpc_call(server->client, NFS3PROC_PATHCONF, fhandle, info, 0);
+ status = rpc_call_sync(server->client, &msg, 0);
dprintk("NFS reply pathconf: %d\n", status);
return status;
}
extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int);
-static void nfs3_read_done(struct rpc_task *task, void *calldata)
+static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
{
- struct nfs_read_data *data = calldata;
-
- if (nfs3_async_handle_jukebox(task))
- return;
+ if (nfs3_async_handle_jukebox(task, data->inode))
+ return -EAGAIN;
/* Call back common NFS readpage processing */
if (task->tk_status >= 0)
nfs_refresh_inode(data->inode, &data->fattr);
- nfs_readpage_result(task, calldata);
+ return 0;
}
-static const struct rpc_call_ops nfs3_read_ops = {
- .rpc_call_done = nfs3_read_done,
- .rpc_release = nfs_readdata_release,
-};
-
-static void
-nfs3_proc_read_setup(struct nfs_read_data *data)
+static void nfs3_proc_read_setup(struct nfs_read_data *data)
{
- struct rpc_task *task = &data->task;
- struct inode *inode = data->inode;
- int flags;
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_READ],
.rpc_argp = &data->args,
@@ -774,37 +830,20 @@ nfs3_proc_read_setup(struct nfs_read_data *data)
.rpc_cred = data->cred,
};
- /* N.B. Do we need to test? Never called for swapfile inode */
- flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
-
- /* Finalize the task. */
- rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_read_ops, data);
- rpc_call_setup(task, &msg, 0);
+ rpc_call_setup(&data->task, &msg, 0);
}
-static void nfs3_write_done(struct rpc_task *task, void *calldata)
+static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
{
- struct nfs_write_data *data = calldata;
-
- if (nfs3_async_handle_jukebox(task))
- return;
+ if (nfs3_async_handle_jukebox(task, data->inode))
+ return -EAGAIN;
if (task->tk_status >= 0)
nfs_post_op_update_inode(data->inode, data->res.fattr);
- nfs_writeback_done(task, calldata);
+ return 0;
}
-static const struct rpc_call_ops nfs3_write_ops = {
- .rpc_call_done = nfs3_write_done,
- .rpc_release = nfs_writedata_release,
-};
-
-static void
-nfs3_proc_write_setup(struct nfs_write_data *data, int how)
+static void nfs3_proc_write_setup(struct nfs_write_data *data, int how)
{
- struct rpc_task *task = &data->task;
- struct inode *inode = data->inode;
- int stable;
- int flags;
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_WRITE],
.rpc_argp = &data->args,
@@ -812,45 +851,28 @@ nfs3_proc_write_setup(struct nfs_write_data *data, int how)
.rpc_cred = data->cred,
};
+ data->args.stable = NFS_UNSTABLE;
if (how & FLUSH_STABLE) {
- if (!NFS_I(inode)->ncommit)
- stable = NFS_FILE_SYNC;
- else
- stable = NFS_DATA_SYNC;
- } else
- stable = NFS_UNSTABLE;
- data->args.stable = stable;
-
- /* Set the initial flags for the task. */
- flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+ data->args.stable = NFS_FILE_SYNC;
+ if (NFS_I(data->inode)->ncommit)
+ data->args.stable = NFS_DATA_SYNC;
+ }
/* Finalize the task. */
- rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_write_ops, data);
- rpc_call_setup(task, &msg, 0);
+ rpc_call_setup(&data->task, &msg, 0);
}
-static void nfs3_commit_done(struct rpc_task *task, void *calldata)
+static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
{
- struct nfs_write_data *data = calldata;
-
- if (nfs3_async_handle_jukebox(task))
- return;
+ if (nfs3_async_handle_jukebox(task, data->inode))
+ return -EAGAIN;
if (task->tk_status >= 0)
nfs_post_op_update_inode(data->inode, data->res.fattr);
- nfs_commit_done(task, calldata);
+ return 0;
}
-static const struct rpc_call_ops nfs3_commit_ops = {
- .rpc_call_done = nfs3_commit_done,
- .rpc_release = nfs_commit_release,
-};
-
-static void
-nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
+static void nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
{
- struct rpc_task *task = &data->task;
- struct inode *inode = data->inode;
- int flags;
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT],
.rpc_argp = &data->args,
@@ -858,12 +880,7 @@ nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
.rpc_cred = data->cred,
};
- /* Set the initial flags for the task. */
- flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
- /* Finalize the task. */
- rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_commit_ops, data);
- rpc_call_setup(task, &msg, 0);
+ rpc_call_setup(&data->task, &msg, 0);
}
static int
@@ -902,8 +919,11 @@ struct nfs_rpc_ops nfs_v3_clientops = {
.pathconf = nfs3_proc_pathconf,
.decode_dirent = nfs3_decode_dirent,
.read_setup = nfs3_proc_read_setup,
+ .read_done = nfs3_read_done,
.write_setup = nfs3_proc_write_setup,
+ .write_done = nfs3_write_done,
.commit_setup = nfs3_proc_commit_setup,
+ .commit_done = nfs3_commit_done,
.file_open = nfs_open,
.file_release = nfs_release,
.lock = nfs3_proc_lock,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 5224a191efb..ec233619687 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1109,7 +1109,9 @@ nfs3_xdr_setaclres(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
.p_encode = (kxdrproc_t) nfs3_xdr_##argtype, \
.p_decode = (kxdrproc_t) nfs3_xdr_##restype, \
.p_bufsiz = MAX(NFS3_##argtype##_sz,NFS3_##restype##_sz) << 2, \
- .p_timer = timer \
+ .p_timer = timer, \
+ .p_statidx = NFS3PROC_##proc, \
+ .p_name = #proc, \
}
struct rpc_procinfo nfs3_procedures[] = {
@@ -1150,6 +1152,7 @@ static struct rpc_procinfo nfs3_acl_procedures[] = {
.p_decode = (kxdrproc_t) nfs3_xdr_getaclres,
.p_bufsiz = MAX(ACL3_getaclargs_sz, ACL3_getaclres_sz) << 2,
.p_timer = 1,
+ .p_name = "GETACL",
},
[ACLPROC3_SETACL] = {
.p_proc = ACLPROC3_SETACL,
@@ -1157,6 +1160,7 @@ static struct rpc_procinfo nfs3_acl_procedures[] = {
.p_decode = (kxdrproc_t) nfs3_xdr_setaclres,
.p_bufsiz = MAX(ACL3_setaclargs_sz, ACL3_setaclres_sz) << 2,
.p_timer = 0,
+ .p_name = "SETACL",
},
};
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f8c0066e02e..47ece1dd3c6 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -51,6 +51,7 @@
#include "nfs4_fs.h"
#include "delegation.h"
+#include "iostat.h"
#define NFSDBG_FACILITY NFSDBG_PROC
@@ -335,7 +336,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data
if (!(data->f_attr.valid & NFS_ATTR_FATTR))
goto out;
inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr);
- if (inode == NULL)
+ if (IS_ERR(inode))
goto out;
state = nfs4_get_open_state(inode, data->owner);
if (state == NULL)
@@ -604,11 +605,14 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
int status;
atomic_inc(&data->count);
+ /*
+ * If rpc_run_task() ends up calling ->rpc_release(), we
+ * want to ensure that it takes the 'error' code path.
+ */
+ data->rpc_status = -ENOMEM;
task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data);
- if (IS_ERR(task)) {
- nfs4_opendata_free(data);
+ if (IS_ERR(task))
return PTR_ERR(task);
- }
status = nfs4_wait_for_completion_rpc_task(task);
if (status != 0) {
data->cancelled = 1;
@@ -707,11 +711,14 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
int status;
atomic_inc(&data->count);
+ /*
+ * If rpc_run_task() ends up calling ->rpc_release(), we
+ * want to ensure that it takes the 'error' code path.
+ */
+ data->rpc_status = -ENOMEM;
task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data);
- if (IS_ERR(task)) {
- nfs4_opendata_free(data);
+ if (IS_ERR(task))
return PTR_ERR(task);
- }
status = nfs4_wait_for_completion_rpc_task(task);
if (status != 0) {
data->cancelled = 1;
@@ -908,7 +915,7 @@ out_put_state_owner:
static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred)
{
struct nfs4_exception exception = { };
- struct nfs4_state *res;
+ struct nfs4_state *res = ERR_PTR(-EIO);
int err;
do {
@@ -1017,12 +1024,12 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
return res;
}
-static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
- struct nfs_fh *fhandle, struct iattr *sattr,
- struct nfs4_state *state)
+static int _nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,
+ struct iattr *sattr, struct nfs4_state *state)
{
+ struct nfs_server *server = NFS_SERVER(inode);
struct nfs_setattrargs arg = {
- .fh = fhandle,
+ .fh = NFS_FH(inode),
.iap = sattr,
.server = server,
.bitmask = server->attr_bitmask,
@@ -1041,7 +1048,9 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
nfs_fattr_init(fattr);
- if (state != NULL) {
+ if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) {
+ /* Use that stateid */
+ } else if (state != NULL) {
msg.rpc_cred = state->owner->so_cred;
nfs4_copy_stateid(&arg.stateid, state, current->files);
} else
@@ -1053,16 +1062,15 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
return status;
}
-static int nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
- struct nfs_fh *fhandle, struct iattr *sattr,
- struct nfs4_state *state)
+static int nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,
+ struct iattr *sattr, struct nfs4_state *state)
{
+ struct nfs_server *server = NFS_SERVER(inode);
struct nfs4_exception exception = { };
int err;
do {
err = nfs4_handle_exception(server,
- _nfs4_do_setattr(server, fattr, fhandle, sattr,
- state),
+ _nfs4_do_setattr(inode, fattr, sattr, state),
&exception);
} while (exception.retry);
return err;
@@ -1503,8 +1511,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
if (ctx != NULL)
state = ctx->state;
- status = nfs4_do_setattr(NFS_SERVER(inode), fattr,
- NFS_FH(inode), sattr, state);
+ status = nfs4_do_setattr(inode, fattr, sattr, state);
if (status == 0)
nfs_setattr_update_inode(inode, sattr);
if (ctx != NULL)
@@ -1823,8 +1830,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
d_instantiate(dentry, igrab(state->inode));
if (flags & O_EXCL) {
struct nfs_fattr fattr;
- status = nfs4_do_setattr(NFS_SERVER(dir), &fattr,
- NFS_FH(state->inode), sattr, state);
+ status = nfs4_do_setattr(state->inode, &fattr, sattr, state);
if (status == 0)
nfs_setattr_update_inode(state->inode, sattr);
}
@@ -2344,75 +2350,50 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
return err;
}
-static void nfs4_read_done(struct rpc_task *task, void *calldata)
+static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
{
- struct nfs_read_data *data = calldata;
- struct inode *inode = data->inode;
+ struct nfs_server *server = NFS_SERVER(data->inode);
- if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
+ if (nfs4_async_handle_error(task, server) == -EAGAIN) {
rpc_restart_call(task);
- return;
+ return -EAGAIN;
}
if (task->tk_status > 0)
- renew_lease(NFS_SERVER(inode), data->timestamp);
- /* Call back common NFS readpage processing */
- nfs_readpage_result(task, calldata);
+ renew_lease(server, data->timestamp);
+ return 0;
}
-static const struct rpc_call_ops nfs4_read_ops = {
- .rpc_call_done = nfs4_read_done,
- .rpc_release = nfs_readdata_release,
-};
-
-static void
-nfs4_proc_read_setup(struct nfs_read_data *data)
+static void nfs4_proc_read_setup(struct nfs_read_data *data)
{
- struct rpc_task *task = &data->task;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ],
.rpc_argp = &data->args,
.rpc_resp = &data->res,
.rpc_cred = data->cred,
};
- struct inode *inode = data->inode;
- int flags;
data->timestamp = jiffies;
- /* N.B. Do we need to test? Never called for swapfile inode */
- flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
-
- /* Finalize the task. */
- rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_read_ops, data);
- rpc_call_setup(task, &msg, 0);
+ rpc_call_setup(&data->task, &msg, 0);
}
-static void nfs4_write_done(struct rpc_task *task, void *calldata)
+static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
{
- struct nfs_write_data *data = calldata;
struct inode *inode = data->inode;
if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
rpc_restart_call(task);
- return;
+ return -EAGAIN;
}
if (task->tk_status >= 0) {
renew_lease(NFS_SERVER(inode), data->timestamp);
nfs_post_op_update_inode(inode, data->res.fattr);
}
- /* Call back common NFS writeback processing */
- nfs_writeback_done(task, calldata);
+ return 0;
}
-static const struct rpc_call_ops nfs4_write_ops = {
- .rpc_call_done = nfs4_write_done,
- .rpc_release = nfs_writedata_release,
-};
-
-static void
-nfs4_proc_write_setup(struct nfs_write_data *data, int how)
+static void nfs4_proc_write_setup(struct nfs_write_data *data, int how)
{
- struct rpc_task *task = &data->task;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE],
.rpc_argp = &data->args,
@@ -2422,7 +2403,6 @@ nfs4_proc_write_setup(struct nfs_write_data *data, int how)
struct inode *inode = data->inode;
struct nfs_server *server = NFS_SERVER(inode);
int stable;
- int flags;
if (how & FLUSH_STABLE) {
if (!NFS_I(inode)->ncommit)
@@ -2437,57 +2417,37 @@ nfs4_proc_write_setup(struct nfs_write_data *data, int how)
data->timestamp = jiffies;
- /* Set the initial flags for the task. */
- flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
/* Finalize the task. */
- rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_write_ops, data);
- rpc_call_setup(task, &msg, 0);
+ rpc_call_setup(&data->task, &msg, 0);
}
-static void nfs4_commit_done(struct rpc_task *task, void *calldata)
+static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
{
- struct nfs_write_data *data = calldata;
struct inode *inode = data->inode;
if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
rpc_restart_call(task);
- return;
+ return -EAGAIN;
}
if (task->tk_status >= 0)
nfs_post_op_update_inode(inode, data->res.fattr);
- /* Call back common NFS writeback processing */
- nfs_commit_done(task, calldata);
+ return 0;
}
-static const struct rpc_call_ops nfs4_commit_ops = {
- .rpc_call_done = nfs4_commit_done,
- .rpc_release = nfs_commit_release,
-};
-
-static void
-nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
+static void nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
{
- struct rpc_task *task = &data->task;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT],
.rpc_argp = &data->args,
.rpc_resp = &data->res,
.rpc_cred = data->cred,
};
- struct inode *inode = data->inode;
- struct nfs_server *server = NFS_SERVER(inode);
- int flags;
+ struct nfs_server *server = NFS_SERVER(data->inode);
data->args.bitmask = server->attr_bitmask;
data->res.server = server;
- /* Set the initial flags for the task. */
- flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
- /* Finalize the task. */
- rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_commit_ops, data);
- rpc_call_setup(task, &msg, 0);
+ rpc_call_setup(&data->task, &msg, 0);
}
/*
@@ -2755,8 +2715,10 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
rpc_wake_up_task(task);
task->tk_status = 0;
return -EAGAIN;
- case -NFS4ERR_GRACE:
case -NFS4ERR_DELAY:
+ nfs_inc_server_stats((struct nfs_server *) server,
+ NFSIOS_DELAY);
+ case -NFS4ERR_GRACE:
rpc_delay(task, NFS4_POLL_RETRY_MAX);
task->tk_status = 0;
return -EAGAIN;
@@ -2893,8 +2855,7 @@ int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short p
return status;
}
-int
-nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
+static int _nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
{
struct nfs_fsinfo fsinfo;
struct rpc_message msg = {
@@ -2918,6 +2879,24 @@ nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
return status;
}
+int nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
+{
+ long timeout;
+ int err;
+ do {
+ err = _nfs4_proc_setclientid_confirm(clp, cred);
+ switch (err) {
+ case 0:
+ return err;
+ case -NFS4ERR_RESOURCE:
+ /* The IBM lawyers misread another document! */
+ case -NFS4ERR_DELAY:
+ err = nfs4_delay(clp->cl_rpcclient, &timeout);
+ }
+ } while (err == 0);
+ return err;
+}
+
struct nfs4_delegreturndata {
struct nfs4_delegreturnargs args;
struct nfs4_delegreturnres res;
@@ -2958,7 +2937,7 @@ static void nfs4_delegreturn_release(void *calldata)
kfree(calldata);
}
-const static struct rpc_call_ops nfs4_delegreturn_ops = {
+static const struct rpc_call_ops nfs4_delegreturn_ops = {
.rpc_call_prepare = nfs4_delegreturn_prepare,
.rpc_call_done = nfs4_delegreturn_done,
.rpc_release = nfs4_delegreturn_release,
@@ -2986,10 +2965,8 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
data->rpc_status = 0;
task = rpc_run_task(NFS_CLIENT(inode), RPC_TASK_ASYNC, &nfs4_delegreturn_ops, data);
- if (IS_ERR(task)) {
- nfs4_delegreturn_release(data);
+ if (IS_ERR(task))
return PTR_ERR(task);
- }
status = nfs4_wait_for_completion_rpc_task(task);
if (status == 0) {
status = data->rpc_status;
@@ -3209,7 +3186,6 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
struct nfs_seqid *seqid)
{
struct nfs4_unlockdata *data;
- struct rpc_task *task;
data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid);
if (data == NULL) {
@@ -3219,10 +3195,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
/* Unlock _before_ we do the RPC call */
do_vfs_lock(fl->fl_file, fl);
- task = rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data);
- if (IS_ERR(task))
- nfs4_locku_release_calldata(data);
- return task;
+ return rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data);
}
static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
@@ -3403,10 +3376,8 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
data->arg.reclaim = 1;
task = rpc_run_task(NFS_CLIENT(state->inode), RPC_TASK_ASYNC,
&nfs4_lock_ops, data);
- if (IS_ERR(task)) {
- nfs4_lock_release(data);
+ if (IS_ERR(task))
return PTR_ERR(task);
- }
ret = nfs4_wait_for_completion_rpc_task(task);
if (ret == 0) {
ret = data->rpc_status;
@@ -3588,6 +3559,8 @@ ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen)
{
size_t len = strlen(XATTR_NAME_NFSV4_ACL) + 1;
+ if (!nfs4_server_supports_acls(NFS_SERVER(dentry->d_inode)))
+ return 0;
if (buf && buflen < len)
return -ERANGE;
if (buf)
@@ -3644,8 +3617,11 @@ struct nfs_rpc_ops nfs_v4_clientops = {
.pathconf = nfs4_proc_pathconf,
.decode_dirent = nfs4_decode_dirent,
.read_setup = nfs4_proc_read_setup,
+ .read_done = nfs4_read_done,
.write_setup = nfs4_proc_write_setup,
+ .write_done = nfs4_write_done,
.commit_setup = nfs4_proc_commit_setup,
+ .commit_done = nfs4_commit_done,
.file_open = nfs_open,
.file_release = nfs_release,
.lock = nfs4_proc_lock,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index afad0255e7d..96e5b82c153 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -977,6 +977,7 @@ out:
out_error:
printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n",
NIPQUAD(clp->cl_addr.s_addr), -status);
+ set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
goto out;
}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 0a1bd36a483..7c5d70efe72 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4344,6 +4344,8 @@ nfs_stat_to_errno(int stat)
.p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \
.p_decode = (kxdrproc_t) nfs4_xdr_##restype, \
.p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2, \
+ .p_statidx = NFSPROC4_CLNT_##proc, \
+ .p_name = #proc, \
}
struct rpc_procinfo nfs4_procedures[] = {
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index d53857b148e..106aca388eb 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -85,6 +85,9 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
atomic_set(&req->wb_complete, 0);
req->wb_index = page->index;
page_cache_get(page);
+ BUG_ON(PagePrivate(page));
+ BUG_ON(!PageLocked(page));
+ BUG_ON(page->mapping->host != inode);
req->wb_offset = offset;
req->wb_pgbase = offset;
req->wb_bytes = count;
@@ -132,9 +135,11 @@ void nfs_clear_page_writeback(struct nfs_page *req)
{
struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
- spin_lock(&nfsi->req_lock);
- radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
- spin_unlock(&nfsi->req_lock);
+ if (req->wb_page != NULL) {
+ spin_lock(&nfsi->req_lock);
+ radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
+ spin_unlock(&nfsi->req_lock);
+ }
nfs_unlock_request(req);
}
@@ -147,8 +152,9 @@ void nfs_clear_page_writeback(struct nfs_page *req)
*/
void nfs_clear_request(struct nfs_page *req)
{
- if (req->wb_page) {
- page_cache_release(req->wb_page);
+ struct page *page = req->wb_page;
+ if (page != NULL) {
+ page_cache_release(page);
req->wb_page = NULL;
}
}
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index f5150d71c03..9dd85cac2df 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -58,16 +58,23 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
{
struct nfs_fattr *fattr = info->fattr;
struct nfs2_fsstat fsinfo;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_GETATTR],
+ .rpc_argp = fhandle,
+ .rpc_resp = fattr,
+ };
int status;
dprintk("%s: call getattr\n", __FUNCTION__);
nfs_fattr_init(fattr);
- status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0);
+ status = rpc_call_sync(server->client_sys, &msg, 0);
dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
if (status)
return status;
dprintk("%s: call statfs\n", __FUNCTION__);
- status = rpc_call(server->client_sys, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+ msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS];
+ msg.rpc_resp = &fsinfo;
+ status = rpc_call_sync(server->client_sys, &msg, 0);
dprintk("%s: reply statfs: %d\n", __FUNCTION__, status);
if (status)
return status;
@@ -90,12 +97,16 @@ static int
nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fattr *fattr)
{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_GETATTR],
+ .rpc_argp = fhandle,
+ .rpc_resp = fattr,
+ };
int status;
dprintk("NFS call getattr\n");
nfs_fattr_init(fattr);
- status = rpc_call(server->client, NFSPROC_GETATTR,
- fhandle, fattr, 0);
+ status = rpc_call_sync(server->client, &msg, 0);
dprintk("NFS reply getattr: %d\n", status);
return status;
}
@@ -109,6 +120,11 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
.fh = NFS_FH(inode),
.sattr = sattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_SETATTR],
+ .rpc_argp = &arg,
+ .rpc_resp = fattr,
+ };
int status;
/* Mask out the non-modebit related stuff from attr->ia_mode */
@@ -116,7 +132,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
dprintk("NFS call setattr\n");
nfs_fattr_init(fattr);
- status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0);
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
if (status == 0)
nfs_setattr_update_inode(inode, sattr);
dprintk("NFS reply setattr: %d\n", status);
@@ -136,11 +152,16 @@ nfs_proc_lookup(struct inode *dir, struct qstr *name,
.fh = fhandle,
.fattr = fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_LOOKUP],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int status;
dprintk("NFS call lookup %s\n", name->name);
nfs_fattr_init(fattr);
- status = rpc_call(NFS_CLIENT(dir), NFSPROC_LOOKUP, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
dprintk("NFS reply lookup: %d\n", status);
return status;
}
@@ -154,10 +175,14 @@ static int nfs_proc_readlink(struct inode *inode, struct page *page,
.pglen = pglen,
.pages = &page
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_READLINK],
+ .rpc_argp = &args,
+ };
int status;
dprintk("NFS call readlink\n");
- status = rpc_call(NFS_CLIENT(inode), NFSPROC_READLINK, &args, NULL, 0);
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
dprintk("NFS reply readlink: %d\n", status);
return status;
}
@@ -233,11 +258,16 @@ nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
.fh = &fhandle,
.fattr = &fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_CREATE],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int status;
nfs_fattr_init(&fattr);
dprintk("NFS call create %s\n", dentry->d_name.name);
- status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
if (status == 0)
status = nfs_instantiate(dentry, &fhandle, &fattr);
dprintk("NFS reply create: %d\n", status);
@@ -263,6 +293,11 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
.fh = &fhandle,
.fattr = &fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_CREATE],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int status, mode;
dprintk("NFS call mknod %s\n", dentry->d_name.name);
@@ -277,13 +312,13 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
}
nfs_fattr_init(&fattr);
- status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_mark_for_revalidate(dir);
if (status == -EINVAL && S_ISFIFO(mode)) {
sattr->ia_mode = mode;
nfs_fattr_init(&fattr);
- status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
}
if (status == 0)
status = nfs_instantiate(dentry, &fhandle, &fattr);
@@ -302,8 +337,6 @@ nfs_proc_remove(struct inode *dir, struct qstr *name)
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_REMOVE],
.rpc_argp = &arg,
- .rpc_resp = NULL,
- .rpc_cred = NULL
};
int status;
@@ -355,10 +388,14 @@ nfs_proc_rename(struct inode *old_dir, struct qstr *old_name,
.toname = new_name->name,
.tolen = new_name->len
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_RENAME],
+ .rpc_argp = &arg,
+ };
int status;
dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name);
- status = rpc_call(NFS_CLIENT(old_dir), NFSPROC_RENAME, &arg, NULL, 0);
+ status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
nfs_mark_for_revalidate(old_dir);
nfs_mark_for_revalidate(new_dir);
dprintk("NFS reply rename: %d\n", status);
@@ -374,10 +411,14 @@ nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
.toname = name->name,
.tolen = name->len
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_LINK],
+ .rpc_argp = &arg,
+ };
int status;
dprintk("NFS call link %s\n", name->name);
- status = rpc_call(NFS_CLIENT(inode), NFSPROC_LINK, &arg, NULL, 0);
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
nfs_mark_for_revalidate(inode);
nfs_mark_for_revalidate(dir);
dprintk("NFS reply link: %d\n", status);
@@ -397,6 +438,10 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
.tolen = path->len,
.sattr = sattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_SYMLINK],
+ .rpc_argp = &arg,
+ };
int status;
if (path->len > NFS2_MAXPATHLEN)
@@ -404,7 +449,7 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
dprintk("NFS call symlink %s -> %s\n", name->name, path->name);
nfs_fattr_init(fattr);
fhandle->size = 0;
- status = rpc_call(NFS_CLIENT(dir), NFSPROC_SYMLINK, &arg, NULL, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_mark_for_revalidate(dir);
dprintk("NFS reply symlink: %d\n", status);
return status;
@@ -425,11 +470,16 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
.fh = &fhandle,
.fattr = &fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_MKDIR],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int status;
dprintk("NFS call mkdir %s\n", dentry->d_name.name);
nfs_fattr_init(&fattr);
- status = rpc_call(NFS_CLIENT(dir), NFSPROC_MKDIR, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_mark_for_revalidate(dir);
if (status == 0)
status = nfs_instantiate(dentry, &fhandle, &fattr);
@@ -445,10 +495,14 @@ nfs_proc_rmdir(struct inode *dir, struct qstr *name)
.name = name->name,
.len = name->len
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_RMDIR],
+ .rpc_argp = &arg,
+ };
int status;
dprintk("NFS call rmdir %s\n", name->name);
- status = rpc_call(NFS_CLIENT(dir), NFSPROC_RMDIR, &arg, NULL, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_mark_for_revalidate(dir);
dprintk("NFS reply rmdir: %d\n", status);
return status;
@@ -470,13 +524,12 @@ nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
.fh = NFS_FH(dir),
.cookie = cookie,
.count = count,
- .pages = &page
+ .pages = &page,
};
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_READDIR],
.rpc_argp = &arg,
- .rpc_resp = NULL,
- .rpc_cred = cred
+ .rpc_cred = cred,
};
int status;
@@ -495,11 +548,16 @@ nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsstat *stat)
{
struct nfs2_fsstat fsinfo;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_STATFS],
+ .rpc_argp = fhandle,
+ .rpc_resp = &fsinfo,
+ };
int status;
dprintk("NFS call statfs\n");
nfs_fattr_init(stat->fattr);
- status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+ status = rpc_call_sync(server->client, &msg, 0);
dprintk("NFS reply statfs: %d\n", status);
if (status)
goto out;
@@ -518,11 +576,16 @@ nfs_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsinfo *info)
{
struct nfs2_fsstat fsinfo;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_STATFS],
+ .rpc_argp = fhandle,
+ .rpc_resp = &fsinfo,
+ };
int status;
dprintk("NFS call fsinfo\n");
nfs_fattr_init(info->fattr);
- status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+ status = rpc_call_sync(server->client, &msg, 0);
dprintk("NFS reply fsinfo: %d\n", status);
if (status)
goto out;
@@ -550,10 +613,8 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int);
-static void nfs_read_done(struct rpc_task *task, void *calldata)
+static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
{
- struct nfs_read_data *data = calldata;
-
if (task->tk_status >= 0) {
nfs_refresh_inode(data->inode, data->res.fattr);
/* Emulate the eof flag, which isn't normally needed in NFSv2
@@ -562,20 +623,11 @@ static void nfs_read_done(struct rpc_task *task, void *calldata)
if (data->args.offset + data->args.count >= data->res.fattr->size)
data->res.eof = 1;
}
- nfs_readpage_result(task, calldata);
+ return 0;
}
-static const struct rpc_call_ops nfs_read_ops = {
- .rpc_call_done = nfs_read_done,
- .rpc_release = nfs_readdata_release,
-};
-
-static void
-nfs_proc_read_setup(struct nfs_read_data *data)
+static void nfs_proc_read_setup(struct nfs_read_data *data)
{
- struct rpc_task *task = &data->task;
- struct inode *inode = data->inode;
- int flags;
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_READ],
.rpc_argp = &data->args,
@@ -583,34 +635,18 @@ nfs_proc_read_setup(struct nfs_read_data *data)
.rpc_cred = data->cred,
};
- /* N.B. Do we need to test? Never called for swapfile inode */
- flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
-
- /* Finalize the task. */
- rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs_read_ops, data);
- rpc_call_setup(task, &msg, 0);
+ rpc_call_setup(&data->task, &msg, 0);
}
-static void nfs_write_done(struct rpc_task *task, void *calldata)
+static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
{
- struct nfs_write_data *data = calldata;
-
if (task->tk_status >= 0)
nfs_post_op_update_inode(data->inode, data->res.fattr);
- nfs_writeback_done(task, calldata);
+ return 0;
}
-static const struct rpc_call_ops nfs_write_ops = {
- .rpc_call_done = nfs_write_done,
- .rpc_release = nfs_writedata_release,
-};
-
-static void
-nfs_proc_write_setup(struct nfs_write_data *data, int how)
+static void nfs_proc_write_setup(struct nfs_write_data *data, int how)
{
- struct rpc_task *task = &data->task;
- struct inode *inode = data->inode;
- int flags;
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_WRITE],
.rpc_argp = &data->args,
@@ -621,12 +657,8 @@ nfs_proc_write_setup(struct nfs_write_data *data, int how)
/* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */
data->args.stable = NFS_FILE_SYNC;
- /* Set the initial flags for the task. */
- flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
/* Finalize the task. */
- rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs_write_ops, data);
- rpc_call_setup(task, &msg, 0);
+ rpc_call_setup(&data->task, &msg, 0);
}
static void
@@ -672,7 +704,9 @@ struct nfs_rpc_ops nfs_v2_clientops = {
.pathconf = nfs_proc_pathconf,
.decode_dirent = nfs_decode_dirent,
.read_setup = nfs_proc_read_setup,
+ .read_done = nfs_read_done,
.write_setup = nfs_proc_write_setup,
+ .write_done = nfs_write_done,
.commit_setup = nfs_proc_commit_setup,
.file_open = nfs_open,
.file_release = nfs_release,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 05eb43fadf8..3961524fd4a 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -31,17 +31,49 @@
#include <asm/system.h>
+#include "iostat.h"
+
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
static int nfs_pagein_one(struct list_head *, struct inode *);
-static void nfs_readpage_result_partial(struct nfs_read_data *, int);
-static void nfs_readpage_result_full(struct nfs_read_data *, int);
+static const struct rpc_call_ops nfs_read_partial_ops;
+static const struct rpc_call_ops nfs_read_full_ops;
static kmem_cache_t *nfs_rdata_cachep;
-mempool_t *nfs_rdata_mempool;
+static mempool_t *nfs_rdata_mempool;
#define MIN_POOL_READ (32)
+struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
+{
+ struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
+
+ if (p) {
+ memset(p, 0, sizeof(*p));
+ INIT_LIST_HEAD(&p->pages);
+ if (pagecount < NFS_PAGEVEC_SIZE)
+ p->pagevec = &p->page_array[0];
+ else {
+ size_t size = ++pagecount * sizeof(struct page *);
+ p->pagevec = kmalloc(size, GFP_NOFS);
+ if (p->pagevec) {
+ memset(p->pagevec, 0, size);
+ } else {
+ mempool_free(p, nfs_rdata_mempool);
+ p = NULL;
+ }
+ }
+ }
+ return p;
+}
+
+void nfs_readdata_free(struct nfs_read_data *p)
+{
+ if (p && (p->pagevec != &p->page_array[0]))
+ kfree(p->pagevec);
+ mempool_free(p, nfs_rdata_mempool);
+}
+
void nfs_readdata_release(void *data)
{
nfs_readdata_free(data);
@@ -133,6 +165,8 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
}
count -= result;
rdata->args.pgbase += result;
+ nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, result);
+
/* Note: result == 0 should only happen if we're caching
* a write that extends the file and punches a hole.
*/
@@ -196,9 +230,11 @@ static void nfs_readpage_release(struct nfs_page *req)
* Set up the NFS read request struct
*/
static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+ const struct rpc_call_ops *call_ops,
unsigned int count, unsigned int offset)
{
struct inode *inode;
+ int flags;
data->req = req;
data->inode = inode = req->wb_context->dentry->d_inode;
@@ -216,6 +252,9 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
data->res.eof = 0;
nfs_fattr_init(&data->fattr);
+ /* Set up the initial task struct. */
+ flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
+ rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data);
NFS_PROTO(inode)->read_setup(data);
data->task.tk_cookie = (unsigned long)inode;
@@ -303,14 +342,15 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode)
list_del_init(&data->pages);
data->pagevec[0] = page;
- data->complete = nfs_readpage_result_partial;
if (nbytes > rsize) {
- nfs_read_rpcsetup(req, data, rsize, offset);
+ nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
+ rsize, offset);
offset += rsize;
nbytes -= rsize;
} else {
- nfs_read_rpcsetup(req, data, nbytes, offset);
+ nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
+ nbytes, offset);
nbytes = 0;
}
nfs_execute_read(data);
@@ -356,8 +396,7 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode)
}
req = nfs_list_entry(data->pages.next);
- data->complete = nfs_readpage_result_full;
- nfs_read_rpcsetup(req, data, count, 0);
+ nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0);
nfs_execute_read(data);
return 0;
@@ -391,12 +430,15 @@ nfs_pagein_list(struct list_head *head, int rpages)
/*
* Handle a read reply that fills part of a page.
*/
-static void nfs_readpage_result_partial(struct nfs_read_data *data, int status)
+static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
{
+ struct nfs_read_data *data = calldata;
struct nfs_page *req = data->req;
struct page *page = req->wb_page;
- if (status >= 0) {
+ if (nfs_readpage_result(task, data) != 0)
+ return;
+ if (task->tk_status >= 0) {
unsigned int request = data->args.count;
unsigned int result = data->res.count;
@@ -415,20 +457,28 @@ static void nfs_readpage_result_partial(struct nfs_read_data *data, int status)
}
}
+static const struct rpc_call_ops nfs_read_partial_ops = {
+ .rpc_call_done = nfs_readpage_result_partial,
+ .rpc_release = nfs_readdata_release,
+};
+
/*
* This is the callback from RPC telling us whether a reply was
* received or some error occurred (timeout or socket shutdown).
*/
-static void nfs_readpage_result_full(struct nfs_read_data *data, int status)
+static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
{
+ struct nfs_read_data *data = calldata;
unsigned int count = data->res.count;
+ if (nfs_readpage_result(task, data) != 0)
+ return;
while (!list_empty(&data->pages)) {
struct nfs_page *req = nfs_list_entry(data->pages.next);
struct page *page = req->wb_page;
nfs_list_remove_request(req);
- if (status >= 0) {
+ if (task->tk_status >= 0) {
if (count < PAGE_CACHE_SIZE) {
if (count < req->wb_bytes)
memclear_highpage_flush(page,
@@ -444,22 +494,33 @@ static void nfs_readpage_result_full(struct nfs_read_data *data, int status)
}
}
+static const struct rpc_call_ops nfs_read_full_ops = {
+ .rpc_call_done = nfs_readpage_result_full,
+ .rpc_release = nfs_readdata_release,
+};
+
/*
* This is the callback from RPC telling us whether a reply was
* received or some error occurred (timeout or socket shutdown).
*/
-void nfs_readpage_result(struct rpc_task *task, void *calldata)
+int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
{
- struct nfs_read_data *data = calldata;
struct nfs_readargs *argp = &data->args;
struct nfs_readres *resp = &data->res;
- int status = task->tk_status;
+ int status;
dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
- task->tk_pid, status);
+ task->tk_pid, task->tk_status);
+
+ status = NFS_PROTO(data->inode)->read_done(task, data);
+ if (status != 0)
+ return status;
+
+ nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count);
/* Is this a short read? */
if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) {
+ nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
/* Has the server at least made some progress? */
if (resp->count != 0) {
/* Yes, so retry the read at the end of the data */
@@ -467,14 +528,14 @@ void nfs_readpage_result(struct rpc_task *task, void *calldata)
argp->pgbase += resp->count;
argp->count -= resp->count;
rpc_restart_call(task);
- return;
+ return -EAGAIN;
}
task->tk_status = -EIO;
}
spin_lock(&data->inode->i_lock);
NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
spin_unlock(&data->inode->i_lock);
- data->complete(data, status);
+ return 0;
}
/*
@@ -491,6 +552,9 @@ int nfs_readpage(struct file *file, struct page *page)
dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
page, PAGE_CACHE_SIZE, page->index);
+ nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
+ nfs_add_stats(inode, NFSIOS_READPAGES, 1);
+
/*
* Try to flush any pending writes to the file..
*
@@ -570,6 +634,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
nr_pages);
+ nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
if (filp == NULL) {
desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
@@ -582,6 +647,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
if (!list_empty(&head)) {
int err = nfs_pagein_list(&head, server->rpages);
if (!ret)
+ nfs_add_stats(inode, NFSIOS_READPAGES, err);
ret = err;
}
put_nfs_open_context(desc.ctx);
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index a65c7b53d55..0e28189c215 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -163,10 +163,9 @@ nfs_async_unlink(struct dentry *dentry)
struct rpc_clnt *clnt = NFS_CLIENT(dir->d_inode);
int status = -ENOMEM;
- data = kmalloc(sizeof(*data), GFP_KERNEL);
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
goto out;
- memset(data, 0, sizeof(*data));
data->cred = rpcauth_lookupcred(clnt->cl_auth, 0);
if (IS_ERR(data->cred)) {
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 9449b683550..3f5225404c9 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -63,6 +63,7 @@
#include <linux/smp_lock.h>
#include "delegation.h"
+#include "iostat.h"
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
@@ -76,20 +77,21 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context*,
struct inode *,
struct page *,
unsigned int, unsigned int);
-static void nfs_writeback_done_partial(struct nfs_write_data *, int);
-static void nfs_writeback_done_full(struct nfs_write_data *, int);
static int nfs_wait_on_write_congestion(struct address_space *, int);
static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int);
static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
unsigned int npages, int how);
+static const struct rpc_call_ops nfs_write_partial_ops;
+static const struct rpc_call_ops nfs_write_full_ops;
+static const struct rpc_call_ops nfs_commit_ops;
static kmem_cache_t *nfs_wdata_cachep;
-mempool_t *nfs_wdata_mempool;
+static mempool_t *nfs_wdata_mempool;
static mempool_t *nfs_commit_mempool;
static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
-static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
+struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
{
struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
@@ -100,11 +102,39 @@ static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
p->pagevec = &p->page_array[0];
else {
size_t size = ++pagecount * sizeof(struct page *);
+ p->pagevec = kzalloc(size, GFP_NOFS);
+ if (!p->pagevec) {
+ mempool_free(p, nfs_commit_mempool);
+ p = NULL;
+ }
+ }
+ }
+ return p;
+}
+
+void nfs_commit_free(struct nfs_write_data *p)
+{
+ if (p && (p->pagevec != &p->page_array[0]))
+ kfree(p->pagevec);
+ mempool_free(p, nfs_commit_mempool);
+}
+
+struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
+{
+ struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
+
+ if (p) {
+ memset(p, 0, sizeof(*p));
+ INIT_LIST_HEAD(&p->pages);
+ if (pagecount < NFS_PAGEVEC_SIZE)
+ p->pagevec = &p->page_array[0];
+ else {
+ size_t size = ++pagecount * sizeof(struct page *);
p->pagevec = kmalloc(size, GFP_NOFS);
if (p->pagevec) {
memset(p->pagevec, 0, size);
} else {
- mempool_free(p, nfs_commit_mempool);
+ mempool_free(p, nfs_wdata_mempool);
p = NULL;
}
}
@@ -112,11 +142,11 @@ static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
return p;
}
-static inline void nfs_commit_free(struct nfs_write_data *p)
+void nfs_writedata_free(struct nfs_write_data *p)
{
if (p && (p->pagevec != &p->page_array[0]))
kfree(p->pagevec);
- mempool_free(p, nfs_commit_mempool);
+ mempool_free(p, nfs_wdata_mempool);
}
void nfs_writedata_release(void *wdata)
@@ -136,6 +166,7 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c
end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count);
if (i_size >= end)
return;
+ nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
i_size_write(inode, end);
}
@@ -225,6 +256,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
wdata->args.pgbase += result;
written += result;
count -= result;
+ nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, result);
} while (count);
/* Update file length */
nfs_grow_file(page, offset, written);
@@ -281,6 +313,9 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
int priority = wb_priority(wbc);
int err;
+ nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
+ nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
+
/*
* Note: We need to ensure that we have a reference to the inode
* if we are to do asynchronous writes. If not, waiting
@@ -345,6 +380,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
struct inode *inode = mapping->host;
int err;
+ nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
+
err = generic_writepages(mapping, wbc);
if (err)
return err;
@@ -356,6 +393,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
err = nfs_flush_inode(inode, 0, 0, wb_priority(wbc));
if (err < 0)
goto out;
+ nfs_add_stats(inode, NFSIOS_WRITEPAGES, err);
wbc->nr_to_write -= err;
if (!wbc->nonblocking && wbc->sync_mode == WB_SYNC_ALL) {
err = nfs_wait_on_requests(inode, 0, 0);
@@ -391,6 +429,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
if (nfs_have_delegation(inode, FMODE_WRITE))
nfsi->change_attr++;
}
+ SetPagePrivate(req->wb_page);
nfsi->npages++;
atomic_inc(&req->wb_count);
return 0;
@@ -407,6 +446,7 @@ static void nfs_inode_remove_request(struct nfs_page *req)
BUG_ON (!NFS_WBACK_BUSY(req));
spin_lock(&nfsi->req_lock);
+ ClearPagePrivate(req->wb_page);
radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
nfsi->npages--;
if (!nfsi->npages) {
@@ -499,8 +539,7 @@ nfs_mark_request_commit(struct nfs_page *req)
*
* Interruptible by signals only if mounted with intr flag.
*/
-static int
-nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages)
+static int nfs_wait_on_requests_locked(struct inode *inode, unsigned long idx_start, unsigned int npages)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_page *req;
@@ -513,7 +552,6 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int
else
idx_end = idx_start + npages - 1;
- spin_lock(&nfsi->req_lock);
next = idx_start;
while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) {
if (req->wb_index > idx_end)
@@ -526,15 +564,25 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int
spin_unlock(&nfsi->req_lock);
error = nfs_wait_on_request(req);
nfs_release_request(req);
+ spin_lock(&nfsi->req_lock);
if (error < 0)
return error;
- spin_lock(&nfsi->req_lock);
res++;
}
- spin_unlock(&nfsi->req_lock);
return res;
}
+static int nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ int ret;
+
+ spin_lock(&nfsi->req_lock);
+ ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
+ spin_unlock(&nfsi->req_lock);
+ return ret;
+}
+
/*
* nfs_scan_dirty - Scan an inode for dirty requests
* @inode: NFS inode to scan
@@ -586,6 +634,11 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_st
}
return res;
}
+#else
+static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
+{
+ return 0;
+}
#endif
static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr)
@@ -598,6 +651,9 @@ static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr)
if (!bdi_write_congested(bdi))
return 0;
+
+ nfs_inc_stats(mapping->host, NFSIOS_CONGESTIONWAIT);
+
if (intr) {
struct rpc_clnt *clnt = NFS_CLIENT(mapping->host);
sigset_t oldset;
@@ -653,8 +709,11 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
spin_unlock(&nfsi->req_lock);
error = nfs_wait_on_request(req);
nfs_release_request(req);
- if (error < 0)
+ if (error < 0) {
+ if (new)
+ nfs_release_request(new);
return ERR_PTR(error);
+ }
continue;
}
spin_unlock(&nfsi->req_lock);
@@ -748,6 +807,8 @@ int nfs_updatepage(struct file *file, struct page *page,
struct nfs_page *req;
int status = 0;
+ nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
+
dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n",
file->f_dentry->d_parent->d_name.name,
file->f_dentry->d_name.name, count,
@@ -857,10 +918,12 @@ static inline int flush_task_priority(int how)
*/
static void nfs_write_rpcsetup(struct nfs_page *req,
struct nfs_write_data *data,
+ const struct rpc_call_ops *call_ops,
unsigned int count, unsigned int offset,
int how)
{
struct inode *inode;
+ int flags;
/* Set up the RPC argument and reply structs
* NB: take care not to mess about with data->commit et al. */
@@ -881,6 +944,9 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
data->res.verf = &data->verf;
nfs_fattr_init(&data->fattr);
+ /* Set up the initial task struct. */
+ flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+ rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data);
NFS_PROTO(inode)->write_setup(data, how);
data->task.tk_priority = flush_task_priority(how);
@@ -910,7 +976,7 @@ static void nfs_execute_write(struct nfs_write_data *data)
* Generate multiple small requests to write out a single
* contiguous dirty area on one page.
*/
-static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how)
+static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how)
{
struct nfs_page *req = nfs_list_entry(head->next);
struct page *page = req->wb_page;
@@ -944,14 +1010,15 @@ static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how)
list_del_init(&data->pages);
data->pagevec[0] = page;
- data->complete = nfs_writeback_done_partial;
if (nbytes > wsize) {
- nfs_write_rpcsetup(req, data, wsize, offset, how);
+ nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
+ wsize, offset, how);
offset += wsize;
nbytes -= wsize;
} else {
- nfs_write_rpcsetup(req, data, nbytes, offset, how);
+ nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
+ nbytes, offset, how);
nbytes = 0;
}
nfs_execute_write(data);
@@ -978,16 +1045,13 @@ out_bad:
* This is the case if nfs_updatepage detects a conflicting request
* that has been written but not committed.
*/
-static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
+static int nfs_flush_one(struct inode *inode, struct list_head *head, int how)
{
struct nfs_page *req;
struct page **pages;
struct nfs_write_data *data;
unsigned int count;
- if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE)
- return nfs_flush_multi(head, inode, how);
-
data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages);
if (!data)
goto out_bad;
@@ -1005,9 +1069,8 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
}
req = nfs_list_entry(data->pages.next);
- data->complete = nfs_writeback_done_full;
/* Set up the argument struct */
- nfs_write_rpcsetup(req, data, count, 0, how);
+ nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how);
nfs_execute_write(data);
return 0;
@@ -1021,24 +1084,32 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
return -ENOMEM;
}
-static int
-nfs_flush_list(struct list_head *head, int wpages, int how)
+static int nfs_flush_list(struct inode *inode, struct list_head *head, int npages, int how)
{
LIST_HEAD(one_request);
- struct nfs_page *req;
- int error = 0;
- unsigned int pages = 0;
+ int (*flush_one)(struct inode *, struct list_head *, int);
+ struct nfs_page *req;
+ int wpages = NFS_SERVER(inode)->wpages;
+ int wsize = NFS_SERVER(inode)->wsize;
+ int error;
- while (!list_empty(head)) {
- pages += nfs_coalesce_requests(head, &one_request, wpages);
+ flush_one = nfs_flush_one;
+ if (wsize < PAGE_CACHE_SIZE)
+ flush_one = nfs_flush_multi;
+ /* For single writes, FLUSH_STABLE is more efficient */
+ if (npages <= wpages && npages == NFS_I(inode)->npages
+ && nfs_list_entry(head->next)->wb_bytes <= wsize)
+ how |= FLUSH_STABLE;
+
+ do {
+ nfs_coalesce_requests(head, &one_request, wpages);
req = nfs_list_entry(one_request.next);
- error = nfs_flush_one(&one_request, req->wb_context->dentry->d_inode, how);
+ error = flush_one(inode, &one_request, how);
if (error < 0)
- break;
- }
- if (error >= 0)
- return pages;
-
+ goto out_err;
+ } while (!list_empty(head));
+ return 0;
+out_err:
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
@@ -1051,8 +1122,9 @@ nfs_flush_list(struct list_head *head, int wpages, int how)
/*
* Handle a write reply that flushed part of a page.
*/
-static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
+static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
{
+ struct nfs_write_data *data = calldata;
struct nfs_page *req = data->req;
struct page *page = req->wb_page;
@@ -1062,11 +1134,14 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
req->wb_bytes,
(long long)req_offset(req));
- if (status < 0) {
+ if (nfs_writeback_done(task, data) != 0)
+ return;
+
+ if (task->tk_status < 0) {
ClearPageUptodate(page);
SetPageError(page);
- req->wb_context->error = status;
- dprintk(", error = %d\n", status);
+ req->wb_context->error = task->tk_status;
+ dprintk(", error = %d\n", task->tk_status);
} else {
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
if (data->verf.committed < NFS_FILE_SYNC) {
@@ -1087,6 +1162,11 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
nfs_writepage_release(req);
}
+static const struct rpc_call_ops nfs_write_partial_ops = {
+ .rpc_call_done = nfs_writeback_done_partial,
+ .rpc_release = nfs_writedata_release,
+};
+
/*
* Handle a write reply that flushes a whole page.
*
@@ -1094,11 +1174,15 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
* writebacks since the page->count is kept > 1 for as long
* as the page has a write request pending.
*/
-static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
+static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
{
+ struct nfs_write_data *data = calldata;
struct nfs_page *req;
struct page *page;
+ if (nfs_writeback_done(task, data) != 0)
+ return;
+
/* Update attributes as result of writeback. */
while (!list_empty(&data->pages)) {
req = nfs_list_entry(data->pages.next);
@@ -1111,13 +1195,13 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
req->wb_bytes,
(long long)req_offset(req));
- if (status < 0) {
+ if (task->tk_status < 0) {
ClearPageUptodate(page);
SetPageError(page);
- req->wb_context->error = status;
+ req->wb_context->error = task->tk_status;
end_page_writeback(page);
nfs_inode_remove_request(req);
- dprintk(", error = %d\n", status);
+ dprintk(", error = %d\n", task->tk_status);
goto next;
}
end_page_writeback(page);
@@ -1139,18 +1223,30 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
}
}
+static const struct rpc_call_ops nfs_write_full_ops = {
+ .rpc_call_done = nfs_writeback_done_full,
+ .rpc_release = nfs_writedata_release,
+};
+
+
/*
* This function is called when the WRITE call is complete.
*/
-void nfs_writeback_done(struct rpc_task *task, void *calldata)
+int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
{
- struct nfs_write_data *data = calldata;
struct nfs_writeargs *argp = &data->args;
struct nfs_writeres *resp = &data->res;
+ int status;
dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
task->tk_pid, task->tk_status);
+ /* Call the NFS version-specific code */
+ status = NFS_PROTO(data->inode)->write_done(task, data);
+ if (status != 0)
+ return status;
+ nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
+
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
/* We tried a write call, but the server did not
@@ -1176,6 +1272,8 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata)
if (task->tk_status >= 0 && resp->count < argp->count) {
static unsigned long complain;
+ nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE);
+
/* Has the server at least made some progress? */
if (resp->count != 0) {
/* Was this an NFSv2 write or an NFSv3 stable write? */
@@ -1191,7 +1289,7 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata)
argp->stable = NFS_FILE_SYNC;
}
rpc_restart_call(task);
- return;
+ return -EAGAIN;
}
if (time_before(complain, jiffies)) {
printk(KERN_WARNING
@@ -1202,11 +1300,7 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata)
/* Can't do anything about it except throw an error. */
task->tk_status = -EIO;
}
-
- /*
- * Process the nfs_page list
- */
- data->complete(data, task->tk_status);
+ return 0;
}
@@ -1220,10 +1314,12 @@ void nfs_commit_release(void *wdata)
* Set up the argument/result storage required for the RPC call.
*/
static void nfs_commit_rpcsetup(struct list_head *head,
- struct nfs_write_data *data, int how)
+ struct nfs_write_data *data,
+ int how)
{
struct nfs_page *first;
struct inode *inode;
+ int flags;
/* Set up the RPC argument and reply structs
* NB: take care not to mess about with data->commit et al. */
@@ -1243,7 +1339,10 @@ static void nfs_commit_rpcsetup(struct list_head *head,
data->res.fattr = &data->fattr;
data->res.verf = &data->verf;
nfs_fattr_init(&data->fattr);
-
+
+ /* Set up the initial task struct. */
+ flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+ rpc_init_task(&data->task, NFS_CLIENT(inode), flags, &nfs_commit_ops, data);
NFS_PROTO(inode)->commit_setup(data, how);
data->task.tk_priority = flush_task_priority(how);
@@ -1284,7 +1383,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
/*
* COMMIT call returned
*/
-void nfs_commit_done(struct rpc_task *task, void *calldata)
+static void nfs_commit_done(struct rpc_task *task, void *calldata)
{
struct nfs_write_data *data = calldata;
struct nfs_page *req;
@@ -1293,6 +1392,10 @@ void nfs_commit_done(struct rpc_task *task, void *calldata)
dprintk("NFS: %4d nfs_commit_done (status %d)\n",
task->tk_pid, task->tk_status);
+ /* Call the NFS version-specific code */
+ if (NFS_PROTO(data->inode)->commit_done(task, data) != 0)
+ return;
+
while (!list_empty(&data->pages)) {
req = nfs_list_entry(data->pages.next);
nfs_list_remove_request(req);
@@ -1326,6 +1429,16 @@ void nfs_commit_done(struct rpc_task *task, void *calldata)
}
sub_page_state(nr_unstable,res);
}
+
+static const struct rpc_call_ops nfs_commit_ops = {
+ .rpc_call_done = nfs_commit_done,
+ .rpc_release = nfs_commit_release,
+};
+#else
+static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how)
+{
+ return 0;
+}
#endif
static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
@@ -1333,24 +1446,16 @@ static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
{
struct nfs_inode *nfsi = NFS_I(inode);
LIST_HEAD(head);
- int res,
- error = 0;
+ int res;
spin_lock(&nfsi->req_lock);
res = nfs_scan_dirty(inode, &head, idx_start, npages);
spin_unlock(&nfsi->req_lock);
if (res) {
- struct nfs_server *server = NFS_SERVER(inode);
-
- /* For single writes, FLUSH_STABLE is more efficient */
- if (res == nfsi->npages && nfsi->npages <= server->wpages) {
- if (res > 1 || nfs_list_entry(head.next)->wb_bytes <= server->wsize)
- how |= FLUSH_STABLE;
- }
- error = nfs_flush_list(&head, server->wpages, how);
+ int error = nfs_flush_list(inode, &head, res, how);
+ if (error < 0)
+ return error;
}
- if (error < 0)
- return error;
return res;
}
@@ -1359,14 +1464,13 @@ int nfs_commit_inode(struct inode *inode, int how)
{
struct nfs_inode *nfsi = NFS_I(inode);
LIST_HEAD(head);
- int res,
- error = 0;
+ int res;
spin_lock(&nfsi->req_lock);
res = nfs_scan_commit(inode, &head, 0, 0);
spin_unlock(&nfsi->req_lock);
if (res) {
- error = nfs_commit_list(inode, &head, how);
+ int error = nfs_commit_list(inode, &head, how);
if (error < 0)
return error;
}
@@ -1374,28 +1478,38 @@ int nfs_commit_inode(struct inode *inode, int how)
}
#endif
-int nfs_sync_inode(struct inode *inode, unsigned long idx_start,
- unsigned int npages, int how)
+int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start,
+ unsigned int npages, int how)
{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ LIST_HEAD(head);
int nocommit = how & FLUSH_NOCOMMIT;
- int wait = how & FLUSH_WAIT;
- int error;
-
- how &= ~(FLUSH_WAIT|FLUSH_NOCOMMIT);
+ int pages, ret;
+ how &= ~FLUSH_NOCOMMIT;
+ spin_lock(&nfsi->req_lock);
do {
- if (wait) {
- error = nfs_wait_on_requests(inode, idx_start, npages);
- if (error != 0)
- continue;
- }
- error = nfs_flush_inode(inode, idx_start, npages, how);
- if (error != 0)
+ ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
+ if (ret != 0)
continue;
- if (!nocommit)
- error = nfs_commit_inode(inode, how);
- } while (error > 0);
- return error;
+ pages = nfs_scan_dirty(inode, &head, idx_start, npages);
+ if (pages != 0) {
+ spin_unlock(&nfsi->req_lock);
+ ret = nfs_flush_list(inode, &head, pages, how);
+ spin_lock(&nfsi->req_lock);
+ continue;
+ }
+ if (nocommit)
+ break;
+ pages = nfs_scan_commit(inode, &head, 0, 0);
+ if (pages == 0)
+ break;
+ spin_unlock(&nfsi->req_lock);
+ ret = nfs_commit_list(inode, &head, how);
+ spin_lock(&nfsi->req_lock);
+ } while (ret >= 0);
+ spin_unlock(&nfsi->req_lock);
+ return ret;
}
int nfs_init_writepagecache(void)
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 8d3d23c8a4d..c872bd07fc1 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -326,6 +326,8 @@ out:
.p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \
.p_decode = (kxdrproc_t) nfs4_xdr_##restype, \
.p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2, \
+ .p_statidx = NFSPROC4_CB_##call, \
+ .p_name = #proc, \
}
static struct rpc_procinfo nfs4_cb_procedures[] = {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1143cfb6454..f6ab762bea9 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2639,7 +2639,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
struct nfs4_stateid *lock_stp;
struct file *filp;
struct file_lock file_lock;
- struct file_lock *conflock;
+ struct file_lock conflock;
int status = 0;
unsigned int strhashval;
@@ -2775,11 +2775,11 @@ conflicting_lock:
/* XXX There is a race here. Future patch needed to provide
* an atomic posix_lock_and_test_file
*/
- if (!(conflock = posix_test_lock(filp, &file_lock))) {
+ if (!posix_test_lock(filp, &file_lock, &conflock)) {
status = nfserr_serverfault;
goto out;
}
- nfs4_set_lock_denied(conflock, &lock->lk_denied);
+ nfs4_set_lock_denied(&conflock, &lock->lk_denied);
out:
if (status && lock->lk_is_new && lock_sop)
release_stateowner(lock_sop);
@@ -2800,7 +2800,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
struct inode *inode;
struct file file;
struct file_lock file_lock;
- struct file_lock *conflicting_lock;
+ struct file_lock conflock;
int status;
if (nfs4_in_grace())
@@ -2864,10 +2864,9 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
file.f_dentry = current_fh->fh_dentry;
status = nfs_ok;
- conflicting_lock = posix_test_lock(&file, &file_lock);
- if (conflicting_lock) {
+ if (posix_test_lock(&file, &file_lock, &conflock)) {
status = nfserr_denied;
- nfs4_set_lock_denied(conflicting_lock, &lockt->lt_denied);
+ nfs4_set_lock_denied(&conflock, &lockt->lt_denied);
}
out:
nfs4_unlock_state();
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 20feb7568de..8f1f49ceebe 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -104,6 +104,7 @@ enum pid_directory_inos {
PROC_TGID_MAPS,
PROC_TGID_NUMA_MAPS,
PROC_TGID_MOUNTS,
+ PROC_TGID_MOUNTSTATS,
PROC_TGID_WCHAN,
#ifdef CONFIG_MMU
PROC_TGID_SMAPS,
@@ -144,6 +145,7 @@ enum pid_directory_inos {
PROC_TID_MAPS,
PROC_TID_NUMA_MAPS,
PROC_TID_MOUNTS,
+ PROC_TID_MOUNTSTATS,
PROC_TID_WCHAN,
#ifdef CONFIG_MMU
PROC_TID_SMAPS,
@@ -201,6 +203,7 @@ static struct pid_entry tgid_base_stuff[] = {
E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO),
E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO),
E(PROC_TGID_MOUNTS, "mounts", S_IFREG|S_IRUGO),
+ E(PROC_TGID_MOUNTSTATS, "mountstats", S_IFREG|S_IRUSR),
#ifdef CONFIG_MMU
E(PROC_TGID_SMAPS, "smaps", S_IFREG|S_IRUGO),
#endif
@@ -732,6 +735,38 @@ static struct file_operations proc_mounts_operations = {
.poll = mounts_poll,
};
+extern struct seq_operations mountstats_op;
+static int mountstats_open(struct inode *inode, struct file *file)
+{
+ struct task_struct *task = proc_task(inode);
+ int ret = seq_open(file, &mountstats_op);
+
+ if (!ret) {
+ struct seq_file *m = file->private_data;
+ struct namespace *namespace;
+ task_lock(task);
+ namespace = task->namespace;
+ if (namespace)
+ get_namespace(namespace);
+ task_unlock(task);
+
+ if (namespace)
+ m->private = namespace;
+ else {
+ seq_release(inode, file);
+ ret = -EINVAL;
+ }
+ }
+ return ret;
+}
+
+static struct file_operations proc_mountstats_operations = {
+ .open = mountstats_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = mounts_release,
+};
+
#define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */
static ssize_t proc_info_read(struct file * file, char __user * buf,
@@ -1730,6 +1765,10 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
inode->i_fop = &proc_smaps_operations;
break;
#endif
+ case PROC_TID_MOUNTSTATS:
+ case PROC_TGID_MOUNTSTATS:
+ inode->i_fop = &proc_mountstats_operations;
+ break;
#ifdef CONFIG_SECURITY
case PROC_TID_ATTR:
inode->i_nlink = 2;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 21e8cf795c3..5adf32b90f3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -678,7 +678,6 @@ extern spinlock_t files_lock;
#define FL_POSIX 1
#define FL_FLOCK 2
#define FL_ACCESS 8 /* not trying to lock, just looking */
-#define FL_LOCKD 16 /* lock held by rpc.lockd */
#define FL_LEASE 32 /* lease held on this file */
#define FL_SLEEP 128 /* A blocking lock */
@@ -742,8 +741,6 @@ struct file_lock {
#define OFFT_OFFSET_MAX INT_LIMIT(off_t)
#endif
-extern struct list_head file_lock_list;
-
#include <linux/fcntl.h>
extern int fcntl_getlk(struct file *, struct flock __user *);
@@ -765,10 +762,9 @@ extern void locks_init_lock(struct file_lock *);
extern void locks_copy_lock(struct file_lock *, struct file_lock *);
extern void locks_remove_posix(struct file *, fl_owner_t);
extern void locks_remove_flock(struct file *);
-extern struct file_lock *posix_test_lock(struct file *, struct file_lock *);
+extern int posix_test_lock(struct file *, struct file_lock *, struct file_lock *);
extern int posix_lock_file(struct file *, struct file_lock *);
extern int posix_lock_file_wait(struct file *, struct file_lock *);
-extern void posix_block_lock(struct file_lock *, struct file_lock *);
extern int posix_unblock_lock(struct file *, struct file_lock *);
extern int posix_locks_deadlock(struct file_lock *, struct file_lock *);
extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl);
@@ -1097,6 +1093,7 @@ struct super_operations {
void (*umount_begin) (struct super_block *);
int (*show_options)(struct seq_file *, struct vfsmount *);
+ int (*show_stats)(struct seq_file *, struct vfsmount *);
ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index ef21ed29603..995f89dc8c0 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -14,6 +14,7 @@
#include <linux/config.h>
#include <linux/in.h>
#include <linux/fs.h>
+#include <linux/kref.h>
#include <linux/utsname.h>
#include <linux/nfsd/nfsfh.h>
#include <linux/lockd/bind.h>
@@ -58,6 +59,8 @@ struct nlm_host {
unsigned long h_expires; /* eligible for GC */
struct list_head h_lockowners; /* Lockowners for the client */
spinlock_t h_lock;
+ struct list_head h_granted; /* Locks in GRANTED state */
+ struct list_head h_reclaim; /* Locks in RECLAIM state */
};
/*
@@ -83,9 +86,9 @@ struct nlm_rqst {
struct nlm_host * a_host; /* host handle */
struct nlm_args a_args; /* arguments */
struct nlm_res a_res; /* result */
- struct nlm_wait * a_block;
+ struct nlm_block * a_block;
unsigned int a_retries; /* Retry count */
- char a_owner[NLMCLNT_OHSIZE];
+ u8 a_owner[NLMCLNT_OHSIZE];
};
/*
@@ -110,16 +113,16 @@ struct nlm_file {
*/
#define NLM_NEVER (~(unsigned long) 0)
struct nlm_block {
+ struct kref b_count; /* Reference count */
struct nlm_block * b_next; /* linked list (all blocks) */
struct nlm_block * b_fnext; /* linked list (per file) */
- struct nlm_rqst b_call; /* RPC args & callback info */
+ struct nlm_rqst * b_call; /* RPC args & callback info */
struct svc_serv * b_daemon; /* NLM service */
struct nlm_host * b_host; /* host handle for RPC clnt */
unsigned long b_when; /* next re-xmit */
unsigned int b_id; /* block id */
unsigned char b_queued; /* re-queued */
unsigned char b_granted; /* VFS granted lock */
- unsigned char b_incall; /* doing callback */
unsigned char b_done; /* callback complete */
struct nlm_file * b_file; /* file in question */
};
@@ -145,15 +148,16 @@ extern unsigned long nlmsvc_timeout;
/*
* Lockd client functions
*/
-struct nlm_rqst * nlmclnt_alloc_call(void);
-int nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl);
-void nlmclnt_finish_block(struct nlm_rqst *req);
-long nlmclnt_block(struct nlm_rqst *req, long timeout);
+struct nlm_rqst * nlm_alloc_call(struct nlm_host *host);
+void nlm_release_call(struct nlm_rqst *);
+int nlm_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *);
+int nlm_async_reply(struct nlm_rqst *, u32, const struct rpc_call_ops *);
+struct nlm_wait * nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl);
+void nlmclnt_finish_block(struct nlm_wait *block);
+int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout);
u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *);
void nlmclnt_recovery(struct nlm_host *, u32);
int nlmclnt_reclaim(struct nlm_host *, struct file_lock *);
-int nlmclnt_setgrantargs(struct nlm_rqst *, struct nlm_lock *);
-void nlmclnt_freegrantargs(struct nlm_rqst *);
/*
* Host cache
@@ -172,7 +176,6 @@ extern struct nlm_host *nlm_find_client(void);
/*
* Server-side lock handling
*/
-int nlmsvc_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *);
u32 nlmsvc_lock(struct svc_rqst *, struct nlm_file *,
struct nlm_lock *, int, struct nlm_cookie *);
u32 nlmsvc_unlock(struct nlm_file *, struct nlm_lock *);
@@ -180,7 +183,7 @@ u32 nlmsvc_testlock(struct nlm_file *, struct nlm_lock *,
struct nlm_lock *);
u32 nlmsvc_cancel_blocked(struct nlm_file *, struct nlm_lock *);
unsigned long nlmsvc_retry_blocked(void);
-int nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
+void nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
int action);
void nlmsvc_grant_reply(struct svc_rqst *, struct nlm_cookie *, u32);
diff --git a/include/linux/lockd/share.h b/include/linux/lockd/share.h
index 5d8aa325f14..c75a424ebe4 100644
--- a/include/linux/lockd/share.h
+++ b/include/linux/lockd/share.h
@@ -25,6 +25,6 @@ u32 nlmsvc_share_file(struct nlm_host *, struct nlm_file *,
struct nlm_args *);
u32 nlmsvc_unshare_file(struct nlm_host *, struct nlm_file *,
struct nlm_args *);
-int nlmsvc_traverse_shares(struct nlm_host *, struct nlm_file *, int);
+void nlmsvc_traverse_shares(struct nlm_host *, struct nlm_file *, int);
#endif /* LINUX_LOCKD_SHARE_H */
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index d7a5cc4cfa9..bb0a0f1caa9 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -28,6 +28,7 @@ struct nlm_lock {
int len; /* length of "caller" */
struct nfs_fh fh;
struct xdr_netobj oh;
+ u32 svid;
struct file_lock fl;
};
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index b4dc6e2e10c..cbebd7d1b9e 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -56,9 +56,7 @@
* When flushing a cluster of dirty pages, there can be different
* strategies:
*/
-#define FLUSH_AGING 0 /* only flush old buffers */
#define FLUSH_SYNC 1 /* file being synced, or contention */
-#define FLUSH_WAIT 2 /* wait for completion */
#define FLUSH_STABLE 4 /* commit to stable storage */
#define FLUSH_LOWPRI 8 /* low priority background flush */
#define FLUSH_HIGHPRI 16 /* high priority memory reclaim flush */
@@ -78,6 +76,7 @@ struct nfs_access_entry {
struct nfs4_state;
struct nfs_open_context {
atomic_t count;
+ struct vfsmount *vfsmnt;
struct dentry *dentry;
struct rpc_cred *cred;
struct nfs4_state *state;
@@ -118,8 +117,7 @@ struct nfs_inode {
unsigned long cache_validity; /* bit mask */
/*
- * read_cache_jiffies is when we started read-caching this inode,
- * and read_cache_mtime is the mtime of the inode at that time.
+ * read_cache_jiffies is when we started read-caching this inode.
* attrtimeo is for how long the cached information is assumed
* to be valid. A successful attribute revalidation doubles
* attrtimeo (up to acregmax/acdirmax), a failure resets it to
@@ -128,11 +126,6 @@ struct nfs_inode {
* We need to revalidate the cached attrs for this inode if
*
* jiffies - read_cache_jiffies > attrtimeo
- *
- * and invalidate any cached data/flush out any dirty pages if
- * we find that
- *
- * mtime != read_cache_mtime
*/
unsigned long read_cache_jiffies;
unsigned long attrtimeo;
@@ -311,12 +304,9 @@ extern void nfs_begin_attr_update(struct inode *);
extern void nfs_end_attr_update(struct inode *);
extern void nfs_begin_data_update(struct inode *);
extern void nfs_end_data_update(struct inode *);
-extern struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred);
extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
extern void put_nfs_open_context(struct nfs_open_context *ctx);
-extern void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx);
extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode);
-extern void nfs_file_clear_open_context(struct file *filp);
/* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */
extern u32 root_nfs_parse_addr(char *name); /*__init*/
@@ -415,21 +405,22 @@ extern int nfs_writepage(struct page *page, struct writeback_control *wbc);
extern int nfs_writepages(struct address_space *, struct writeback_control *);
extern int nfs_flush_incompatible(struct file *file, struct page *page);
extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
-extern void nfs_writeback_done(struct rpc_task *task, void *data);
-extern void nfs_writedata_release(void *data);
+extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
+extern void nfs_writedata_release(void *);
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-extern void nfs_commit_done(struct rpc_task *, void *data);
-extern void nfs_commit_release(void *data);
+struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount);
+void nfs_commit_free(struct nfs_write_data *p);
#endif
/*
* Try to write back everything synchronously (but check the
* return value!)
*/
-extern int nfs_sync_inode(struct inode *, unsigned long, unsigned int, int);
+extern int nfs_sync_inode_wait(struct inode *, unsigned long, unsigned int, int);
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
extern int nfs_commit_inode(struct inode *, int);
+extern void nfs_commit_release(void *wdata);
#else
static inline int
nfs_commit_inode(struct inode *inode, int how)
@@ -447,7 +438,7 @@ nfs_have_writebacks(struct inode *inode)
static inline int
nfs_wb_all(struct inode *inode)
{
- int error = nfs_sync_inode(inode, 0, 0, FLUSH_WAIT);
+ int error = nfs_sync_inode_wait(inode, 0, 0, 0);
return (error < 0) ? error : 0;
}
@@ -456,8 +447,8 @@ nfs_wb_all(struct inode *inode)
*/
static inline int nfs_wb_page_priority(struct inode *inode, struct page* page, int how)
{
- int error = nfs_sync_inode(inode, page->index, 1,
- how | FLUSH_WAIT | FLUSH_STABLE);
+ int error = nfs_sync_inode_wait(inode, page->index, 1,
+ how | FLUSH_STABLE);
return (error < 0) ? error : 0;
}
@@ -469,37 +460,8 @@ static inline int nfs_wb_page(struct inode *inode, struct page* page)
/*
* Allocate and free nfs_write_data structures
*/
-extern mempool_t *nfs_wdata_mempool;
-
-static inline struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
-{
- struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
-
- if (p) {
- memset(p, 0, sizeof(*p));
- INIT_LIST_HEAD(&p->pages);
- if (pagecount < NFS_PAGEVEC_SIZE)
- p->pagevec = &p->page_array[0];
- else {
- size_t size = ++pagecount * sizeof(struct page *);
- p->pagevec = kmalloc(size, GFP_NOFS);
- if (p->pagevec) {
- memset(p->pagevec, 0, size);
- } else {
- mempool_free(p, nfs_wdata_mempool);
- p = NULL;
- }
- }
- }
- return p;
-}
-
-static inline void nfs_writedata_free(struct nfs_write_data *p)
-{
- if (p && (p->pagevec != &p->page_array[0]))
- kfree(p->pagevec);
- mempool_free(p, nfs_wdata_mempool);
-}
+extern struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount);
+extern void nfs_writedata_free(struct nfs_write_data *p);
/*
* linux/fs/nfs/read.c
@@ -507,44 +469,14 @@ static inline void nfs_writedata_free(struct nfs_write_data *p)
extern int nfs_readpage(struct file *, struct page *);
extern int nfs_readpages(struct file *, struct address_space *,
struct list_head *, unsigned);
-extern void nfs_readpage_result(struct rpc_task *, void *);
-extern void nfs_readdata_release(void *data);
-
+extern int nfs_readpage_result(struct rpc_task *, struct nfs_read_data *);
+extern void nfs_readdata_release(void *data);
/*
* Allocate and free nfs_read_data structures
*/
-extern mempool_t *nfs_rdata_mempool;
-
-static inline struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
-{
- struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
-
- if (p) {
- memset(p, 0, sizeof(*p));
- INIT_LIST_HEAD(&p->pages);
- if (pagecount < NFS_PAGEVEC_SIZE)
- p->pagevec = &p->page_array[0];
- else {
- size_t size = ++pagecount * sizeof(struct page *);
- p->pagevec = kmalloc(size, GFP_NOFS);
- if (p->pagevec) {
- memset(p->pagevec, 0, size);
- } else {
- mempool_free(p, nfs_rdata_mempool);
- p = NULL;
- }
- }
- }
- return p;
-}
-
-static inline void nfs_readdata_free(struct nfs_read_data *p)
-{
- if (p && (p->pagevec != &p->page_array[0]))
- kfree(p->pagevec);
- mempool_free(p, nfs_rdata_mempool);
-}
+extern struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount);
+extern void nfs_readdata_free(struct nfs_read_data *p);
/*
* linux/fs/nfs3proc.c
diff --git a/include/linux/nfs_fs_i.h b/include/linux/nfs_fs_i.h
index e2c18dabff8..861730275ba 100644
--- a/include/linux/nfs_fs_i.h
+++ b/include/linux/nfs_fs_i.h
@@ -12,8 +12,8 @@ struct nlm_lockowner;
*/
struct nfs_lock_info {
u32 state;
- u32 flags;
struct nlm_lockowner *owner;
+ struct list_head list;
};
struct nfs4_lock_state;
@@ -21,10 +21,4 @@ struct nfs4_lock_info {
struct nfs4_lock_state *owner;
};
-/*
- * Lock flag values
- */
-#define NFS_LCK_GRANTED 0x0001 /* lock has been granted */
-#define NFS_LCK_RECLAIM 0x0002 /* lock marked for reclaiming */
-
#endif
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 3d3a305488c..65dec21af77 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -4,6 +4,8 @@
#include <linux/list.h>
#include <linux/backing-dev.h>
+struct nfs_iostats;
+
/*
* NFS client parameters stored in the superblock.
*/
@@ -12,6 +14,7 @@ struct nfs_server {
struct rpc_clnt * client_sys; /* 2nd handle for FSINFO */
struct rpc_clnt * client_acl; /* ACL RPC client handle */
struct nfs_rpc_ops * rpc_ops; /* NFS protocol vector */
+ struct nfs_iostats * io_stats; /* I/O statistics */
struct backing_dev_info backing_dev_info;
int flags; /* various flags */
unsigned int caps; /* server capabilities */
@@ -26,10 +29,13 @@ struct nfs_server {
unsigned int acregmax;
unsigned int acdirmin;
unsigned int acdirmax;
+ unsigned long retrans_timeo; /* retransmit timeout */
+ unsigned int retrans_count; /* number of retransmit tries */
unsigned int namelen;
char * hostname; /* remote hostname */
struct nfs_fh fh;
struct sockaddr_in addr;
+ unsigned long mount_time; /* when this fs was mounted */
#ifdef CONFIG_NFS_V4
/* Our own IP address, as a null-terminated string.
* This is used to generate the clientid, and the callback address.
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 6d6f69ec567..7fafc4c546b 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -695,7 +695,6 @@ struct nfs_read_data {
#ifdef CONFIG_NFS_V4
unsigned long timestamp; /* For lease renewal */
#endif
- void (*complete) (struct nfs_read_data *, int);
struct page *page_array[NFS_PAGEVEC_SIZE + 1];
};
@@ -714,7 +713,6 @@ struct nfs_write_data {
#ifdef CONFIG_NFS_V4
unsigned long timestamp; /* For lease renewal */
#endif
- void (*complete) (struct nfs_write_data *, int);
struct page *page_array[NFS_PAGEVEC_SIZE + 1];
};
@@ -769,8 +767,11 @@ struct nfs_rpc_ops {
struct nfs_pathconf *);
u32 * (*decode_dirent)(u32 *, struct nfs_entry *, int plus);
void (*read_setup) (struct nfs_read_data *);
+ int (*read_done) (struct rpc_task *, struct nfs_read_data *);
void (*write_setup) (struct nfs_write_data *, int how);
+ int (*write_done) (struct rpc_task *, struct nfs_write_data *);
void (*commit_setup) (struct nfs_write_data *, int how);
+ int (*commit_done) (struct rpc_task *, struct nfs_write_data *);
int (*file_open) (struct inode *, struct file *);
int (*file_release) (struct inode *, struct file *);
int (*lock)(struct file *, int, struct file_lock *);
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index f147e6b8433..8fe9f35eba3 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -45,7 +45,8 @@ struct rpc_clnt {
char * cl_server; /* server machine name */
char * cl_protname; /* protocol name */
struct rpc_auth * cl_auth; /* authenticator */
- struct rpc_stat * cl_stats; /* statistics */
+ struct rpc_stat * cl_stats; /* per-program statistics */
+ struct rpc_iostats * cl_metrics; /* per-client statistics */
unsigned int cl_softrtry : 1,/* soft timeouts */
cl_intr : 1,/* interruptible */
@@ -59,6 +60,7 @@ struct rpc_clnt {
int cl_nodelen; /* nodename length */
char cl_nodename[UNX_MAXNODENAME];
char cl_pathname[30];/* Path in rpc_pipe_fs */
+ struct vfsmount * cl_vfsmnt;
struct dentry * cl_dentry; /* inode */
struct rpc_clnt * cl_parent; /* Points to parent of clones */
struct rpc_rtt cl_rtt_default;
@@ -100,6 +102,8 @@ struct rpc_procinfo {
unsigned int p_bufsiz; /* req. buffer size */
unsigned int p_count; /* call count */
unsigned int p_timer; /* Which RTT timer to use */
+ u32 p_statidx; /* Which procedure to account */
+ char * p_name; /* name of procedure */
};
#define RPC_CONGESTED(clnt) (RPCXPRT_CONGESTED((clnt)->cl_xprt))
@@ -137,20 +141,6 @@ size_t rpc_max_payload(struct rpc_clnt *);
void rpc_force_rebind(struct rpc_clnt *);
int rpc_ping(struct rpc_clnt *clnt, int flags);
-static __inline__
-int rpc_call(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags)
-{
- struct rpc_message msg = {
- .rpc_proc = &clnt->cl_procinfo[proc],
- .rpc_argp = argp,
- .rpc_resp = resp,
- .rpc_cred = NULL
- };
- return rpc_call_sync(clnt, &msg, flags);
-}
-
-extern void rpciod_wake_up(void);
-
/*
* Helper function for NFSroot support
*/
diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index 2c3601d3104..1279280d719 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -53,6 +53,8 @@ struct krb5_ctx {
struct xdr_netobj mech_used;
};
+extern spinlock_t krb5_seq_lock;
+
#define KG_TOK_MIC_MSG 0x0101
#define KG_TOK_WRAP_MSG 0x0201
diff --git a/include/linux/sunrpc/metrics.h b/include/linux/sunrpc/metrics.h
new file mode 100644
index 00000000000..8f96e9dc369
--- /dev/null
+++ b/include/linux/sunrpc/metrics.h
@@ -0,0 +1,77 @@
+/*
+ * linux/include/linux/sunrpc/metrics.h
+ *
+ * Declarations for RPC client per-operation metrics
+ *
+ * Copyright (C) 2005 Chuck Lever <cel@netapp.com>
+ *
+ * RPC client per-operation statistics provide latency and retry
+ * information about each type of RPC procedure in a given RPC program.
+ * These statistics are not for detailed problem diagnosis, but simply
+ * to indicate whether the problem is local or remote.
+ *
+ * These counters are not meant to be human-readable, but are meant to be
+ * integrated into system monitoring tools such as "sar" and "iostat". As
+ * such, the counters are sampled by the tools over time, and are never
+ * zeroed after a file system is mounted. Moving averages can be computed
+ * by the tools by taking the difference between two instantaneous samples
+ * and dividing that by the time between the samples.
+ *
+ * The counters are maintained in a single array per RPC client, indexed
+ * by procedure number. There is no need to maintain separate counter
+ * arrays per-CPU because these counters are always modified behind locks.
+ */
+
+#ifndef _LINUX_SUNRPC_METRICS_H
+#define _LINUX_SUNRPC_METRICS_H
+
+#include <linux/seq_file.h>
+
+#define RPC_IOSTATS_VERS "1.0"
+
+struct rpc_iostats {
+ /*
+ * These counters give an idea about how many request
+ * transmissions are required, on average, to complete that
+ * particular procedure. Some procedures may require more
+ * than one transmission because the server is unresponsive,
+ * the client is retransmitting too aggressively, or the
+ * requests are large and the network is congested.
+ */
+ unsigned long om_ops, /* count of operations */
+ om_ntrans, /* count of RPC transmissions */
+ om_timeouts; /* count of major timeouts */
+
+ /*
+ * These count how many bytes are sent and received for a
+ * given RPC procedure type. This indicates how much load a
+ * particular procedure is putting on the network. These
+ * counts include the RPC and ULP headers, and the request
+ * payload.
+ */
+ unsigned long long om_bytes_sent, /* count of bytes out */
+ om_bytes_recv; /* count of bytes in */
+
+ /*
+ * The length of time an RPC request waits in queue before
+ * transmission, the network + server latency of the request,
+ * and the total time the request spent from init to release
+ * are measured.
+ */
+ unsigned long long om_queue, /* jiffies queued for xmit */
+ om_rtt, /* jiffies for RPC RTT */
+ om_execute; /* jiffies for RPC execution */
+} ____cacheline_aligned;
+
+struct rpc_task;
+struct rpc_clnt;
+
+/*
+ * EXPORTed functions for managing rpc_iostats structures
+ */
+struct rpc_iostats * rpc_alloc_iostats(struct rpc_clnt *);
+void rpc_count_iostats(struct rpc_task *);
+void rpc_print_iostats(struct seq_file *, struct rpc_clnt *);
+void rpc_free_iostats(struct rpc_iostats *);
+
+#endif /* _LINUX_SUNRPC_METRICS_H */
diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index 63929349571..2c2189cb30a 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -45,6 +45,8 @@ extern struct dentry *rpc_mkdir(char *, struct rpc_clnt *);
extern int rpc_rmdir(char *);
extern struct dentry *rpc_mkpipe(char *, void *, struct rpc_pipe_ops *, int flags);
extern int rpc_unlink(char *);
+extern struct vfsmount *rpc_get_mount(void);
+extern void rpc_put_mount(void);
#endif
#endif
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 8b25629accd..82a91bb2236 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -86,6 +86,12 @@ struct rpc_task {
struct work_struct tk_work; /* Async task work queue */
struct rpc_wait tk_wait; /* RPC wait */
} u;
+
+ unsigned short tk_timeouts; /* maj timeouts */
+ size_t tk_bytes_sent; /* total bytes sent */
+ unsigned long tk_start; /* RPC task init timestamp */
+ long tk_rtt; /* round-trip time (jiffies) */
+
#ifdef RPC_DEBUG
unsigned short tk_pid; /* debugging aid */
#endif
@@ -203,6 +209,7 @@ struct rpc_wait_queue {
unsigned char priority; /* current priority */
unsigned char count; /* # task groups remaining serviced so far */
unsigned char nr; /* # tasks remaining for cookie */
+ unsigned short qlen; /* total # tasks waiting in queue */
#ifdef RPC_DEBUG
const char * name;
#endif
@@ -269,13 +276,13 @@ void * rpc_malloc(struct rpc_task *, size_t);
void rpc_free(struct rpc_task *);
int rpciod_up(void);
void rpciod_down(void);
-void rpciod_wake_up(void);
int __rpc_wait_for_completion_task(struct rpc_task *task, int (*)(void *));
#ifdef RPC_DEBUG
void rpc_show_tasks(void);
#endif
int rpc_init_mempool(void);
void rpc_destroy_mempool(void);
+extern struct workqueue_struct *rpciod_workqueue;
static inline void rpc_exit(struct rpc_task *task, int status)
{
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 6ef99b14ff0..7eebbab7160 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -114,6 +114,7 @@ struct rpc_xprt_ops {
void (*release_request)(struct rpc_task *task);
void (*close)(struct rpc_xprt *xprt);
void (*destroy)(struct rpc_xprt *xprt);
+ void (*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq);
};
struct rpc_xprt {
@@ -187,6 +188,18 @@ struct rpc_xprt {
struct list_head recv;
+ struct {
+ unsigned long bind_count, /* total number of binds */
+ connect_count, /* total number of connects */
+ connect_start, /* connect start timestamp */
+ connect_time, /* jiffies waiting for connect */
+ sends, /* how many complete requests */
+ recvs, /* how many complete requests */
+ bad_xids; /* lookup_rqst didn't find XID */
+
+ unsigned long long req_u, /* average requests on the wire */
+ bklog_u; /* backlog queue utilization */
+ } stat;
void (*old_data_ready)(struct sock *, int);
void (*old_state_change)(struct sock *);
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 8d6f1a176b1..55163af3dca 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -64,14 +64,26 @@ rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt)
struct rpc_authops *ops;
u32 flavor = pseudoflavor_to_flavor(pseudoflavor);
- if (flavor >= RPC_AUTH_MAXFLAVOR || !(ops = auth_flavors[flavor]))
- return ERR_PTR(-EINVAL);
+ auth = ERR_PTR(-EINVAL);
+ if (flavor >= RPC_AUTH_MAXFLAVOR)
+ goto out;
+
+ /* FIXME - auth_flavors[] really needs an rw lock,
+ * and module refcounting. */
+#ifdef CONFIG_KMOD
+ if ((ops = auth_flavors[flavor]) == NULL)
+ request_module("rpc-auth-%u", flavor);
+#endif
+ if ((ops = auth_flavors[flavor]) == NULL)
+ goto out;
auth = ops->create(clnt, pseudoflavor);
if (IS_ERR(auth))
return auth;
if (clnt->cl_auth)
rpcauth_destroy(clnt->cl_auth);
clnt->cl_auth = auth;
+
+out:
return auth;
}
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index bb46efd92e5..900ef31f5a0 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -721,6 +721,8 @@ gss_destroy(struct rpc_auth *auth)
gss_auth = container_of(auth, struct gss_auth, rpc_auth);
rpc_unlink(gss_auth->path);
+ dput(gss_auth->dentry);
+ gss_auth->dentry = NULL;
gss_mech_put(gss_auth->mech);
rpcauth_free_credcache(auth);
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index d0dfdfd5e79..f43311221a7 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -70,15 +70,19 @@
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
+spinlock_t krb5_seq_lock = SPIN_LOCK_UNLOCKED;
+
u32
gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
struct xdr_netobj *token)
{
struct krb5_ctx *ctx = gss_ctx->internal_ctx_id;
s32 checksum_type;
- struct xdr_netobj md5cksum = {.len = 0, .data = NULL};
+ char cksumdata[16];
+ struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
unsigned char *ptr, *krb5_hdr, *msg_start;
s32 now;
+ u32 seq_send;
dprintk("RPC: gss_krb5_seal\n");
@@ -133,16 +137,15 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
BUG();
}
- kfree(md5cksum.data);
+ spin_lock(&krb5_seq_lock);
+ seq_send = ctx->seq_send++;
+ spin_unlock(&krb5_seq_lock);
if ((krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff,
- ctx->seq_send, krb5_hdr + 16, krb5_hdr + 8)))
+ seq_send, krb5_hdr + 16, krb5_hdr + 8)))
goto out_err;
- ctx->seq_send++;
-
return ((ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE);
out_err:
- kfree(md5cksum.data);
return GSS_S_FAILURE;
}
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index db055fd7d77..0828cf64100 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -79,7 +79,8 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
int signalg;
int sealalg;
s32 checksum_type;
- struct xdr_netobj md5cksum = {.len = 0, .data = NULL};
+ char cksumdata[16];
+ struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
s32 now;
int direction;
s32 seqnum;
@@ -176,6 +177,5 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
ret = GSS_S_COMPLETE;
out:
- kfree(md5cksum.data);
return ret;
}
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index af777cf9f25..89d1f3e1412 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -121,12 +121,14 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
{
struct krb5_ctx *kctx = ctx->internal_ctx_id;
s32 checksum_type;
- struct xdr_netobj md5cksum = {.len = 0, .data = NULL};
+ char cksumdata[16];
+ struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
int blocksize = 0, plainlen;
unsigned char *ptr, *krb5_hdr, *msg_start;
s32 now;
int headlen;
struct page **tmp_pages;
+ u32 seq_send;
dprintk("RPC: gss_wrap_kerberos\n");
@@ -205,23 +207,22 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
BUG();
}
- kfree(md5cksum.data);
+ spin_lock(&krb5_seq_lock);
+ seq_send = kctx->seq_send++;
+ spin_unlock(&krb5_seq_lock);
/* XXX would probably be more efficient to compute checksum
* and encrypt at the same time: */
if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff,
- kctx->seq_send, krb5_hdr + 16, krb5_hdr + 8)))
+ seq_send, krb5_hdr + 16, krb5_hdr + 8)))
goto out_err;
if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize,
pages))
goto out_err;
- kctx->seq_send++;
-
return ((kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE);
out_err:
- if (md5cksum.data) kfree(md5cksum.data);
return GSS_S_FAILURE;
}
@@ -232,7 +233,8 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
int signalg;
int sealalg;
s32 checksum_type;
- struct xdr_netobj md5cksum = {.len = 0, .data = NULL};
+ char cksumdata[16];
+ struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
s32 now;
int direction;
s32 seqnum;
@@ -358,6 +360,5 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
ret = GSS_S_COMPLETE;
out:
- if (md5cksum.data) kfree(md5cksum.data);
return ret;
}
diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c
index 58400807d4d..5bf11ccba7c 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_mech.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c
@@ -102,6 +102,12 @@ get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg)
alg_mode = CRYPTO_TFM_MODE_CBC;
setkey = 1;
break;
+ case NID_cast5_cbc:
+ /* XXXX here in name only, not used */
+ alg_name = "cast5";
+ alg_mode = CRYPTO_TFM_MODE_CBC;
+ setkey = 0; /* XXX will need to set to 1 */
+ break;
case NID_md5:
if (key.len == 0) {
dprintk("RPC: SPKM3 get_key: NID_md5 zero Key length\n");
diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c
index 86fbf7c3e39..18c7862bc23 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_seal.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_seal.c
@@ -57,7 +57,8 @@ spkm3_make_token(struct spkm3_ctx *ctx,
{
s32 checksum_type;
char tokhdrbuf[25];
- struct xdr_netobj md5cksum = {.len = 0, .data = NULL};
+ char cksumdata[16];
+ struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
struct xdr_netobj mic_hdr = {.len = 0, .data = tokhdrbuf};
int tokenlen = 0;
unsigned char *ptr;
@@ -115,13 +116,11 @@ spkm3_make_token(struct spkm3_ctx *ctx,
dprintk("RPC: gss_spkm3_seal: SPKM_WRAP_TOK not supported\n");
goto out_err;
}
- kfree(md5cksum.data);
/* XXX need to implement sequence numbers, and ctx->expired */
return GSS_S_COMPLETE;
out_err:
- kfree(md5cksum.data);
token->data = NULL;
token->len = 0;
return GSS_S_FAILURE;
diff --git a/net/sunrpc/auth_gss/gss_spkm3_unseal.c b/net/sunrpc/auth_gss/gss_spkm3_unseal.c
index 96851b0ba1b..8537f581ef9 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_unseal.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_unseal.c
@@ -56,7 +56,8 @@ spkm3_read_token(struct spkm3_ctx *ctx,
{
s32 code;
struct xdr_netobj wire_cksum = {.len =0, .data = NULL};
- struct xdr_netobj md5cksum = {.len = 0, .data = NULL};
+ char cksumdata[16];
+ struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
unsigned char *ptr = (unsigned char *)read_token->data;
unsigned char *cksum;
int bodysize, md5elen;
@@ -120,7 +121,6 @@ spkm3_read_token(struct spkm3_ctx *ctx,
/* XXX: need to add expiration and sequencing */
ret = GSS_S_COMPLETE;
out:
- kfree(md5cksum.data);
kfree(wire_cksum.data);
return ret;
}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d7847978204..aa8965e9d30 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -28,12 +28,11 @@
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/utsname.h>
+#include <linux/workqueue.h>
#include <linux/sunrpc/clnt.h>
-#include <linux/workqueue.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
-
-#include <linux/nfs.h>
+#include <linux/sunrpc/metrics.h>
#define RPC_SLACK_SPACE (1024) /* total overkill */
@@ -71,8 +70,15 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
static uint32_t clntid;
int error;
+ clnt->cl_vfsmnt = ERR_PTR(-ENOENT);
+ clnt->cl_dentry = ERR_PTR(-ENOENT);
if (dir_name == NULL)
return 0;
+
+ clnt->cl_vfsmnt = rpc_get_mount();
+ if (IS_ERR(clnt->cl_vfsmnt))
+ return PTR_ERR(clnt->cl_vfsmnt);
+
for (;;) {
snprintf(clnt->cl_pathname, sizeof(clnt->cl_pathname),
"%s/clnt%x", dir_name,
@@ -85,6 +91,7 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
if (error != -EEXIST) {
printk(KERN_INFO "RPC: Couldn't create pipefs entry %s, error %d\n",
clnt->cl_pathname, error);
+ rpc_put_mount();
return error;
}
}
@@ -147,6 +154,7 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname,
clnt->cl_vers = version->number;
clnt->cl_prot = xprt->prot;
clnt->cl_stats = program->stats;
+ clnt->cl_metrics = rpc_alloc_iostats(clnt);
rpc_init_wait_queue(&clnt->cl_pmap_default.pm_bindwait, "bindwait");
if (!clnt->cl_port)
@@ -175,7 +183,11 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname,
return clnt;
out_no_auth:
- rpc_rmdir(clnt->cl_pathname);
+ if (!IS_ERR(clnt->cl_dentry)) {
+ rpc_rmdir(clnt->cl_pathname);
+ dput(clnt->cl_dentry);
+ rpc_put_mount();
+ }
out_no_path:
if (clnt->cl_server != clnt->cl_inline_name)
kfree(clnt->cl_server);
@@ -240,11 +252,15 @@ rpc_clone_client(struct rpc_clnt *clnt)
new->cl_autobind = 0;
new->cl_oneshot = 0;
new->cl_dead = 0;
+ if (!IS_ERR(new->cl_dentry)) {
+ dget(new->cl_dentry);
+ rpc_get_mount();
+ }
rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
if (new->cl_auth)
atomic_inc(&new->cl_auth->au_count);
new->cl_pmap = &new->cl_pmap_default;
- rpc_init_wait_queue(&new->cl_pmap_default.pm_bindwait, "bindwait");
+ new->cl_metrics = rpc_alloc_iostats(clnt);
return new;
out_no_clnt:
printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__);
@@ -314,6 +330,12 @@ rpc_destroy_client(struct rpc_clnt *clnt)
if (clnt->cl_server != clnt->cl_inline_name)
kfree(clnt->cl_server);
out_free:
+ rpc_free_iostats(clnt->cl_metrics);
+ clnt->cl_metrics = NULL;
+ if (!IS_ERR(clnt->cl_dentry)) {
+ dput(clnt->cl_dentry);
+ rpc_put_mount();
+ }
kfree(clnt);
return 0;
}
@@ -473,15 +495,16 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
int status;
/* If this client is slain all further I/O fails */
+ status = -EIO;
if (clnt->cl_dead)
- return -EIO;
+ goto out_release;
flags |= RPC_TASK_ASYNC;
/* Create/initialize a new RPC task */
status = -ENOMEM;
if (!(task = rpc_new_task(clnt, flags, tk_ops, data)))
- goto out;
+ goto out_release;
/* Mask signals on GSS_AUTH upcalls */
rpc_task_sigmask(task, &oldset);
@@ -496,7 +519,10 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
rpc_release_task(task);
rpc_restore_sigmask(&oldset);
-out:
+ return status;
+out_release:
+ if (tk_ops->rpc_release != NULL)
+ tk_ops->rpc_release(data);
return status;
}
@@ -993,6 +1019,8 @@ call_timeout(struct rpc_task *task)
}
dprintk("RPC: %4d call_timeout (major)\n", task->tk_pid);
+ task->tk_timeouts++;
+
if (RPC_IS_SOFT(task)) {
printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
clnt->cl_protname, clnt->cl_server);
@@ -1045,6 +1073,11 @@ call_decode(struct rpc_task *task)
return;
}
+ /*
+ * Ensure that we see all writes made by xprt_complete_rqst()
+ * before it changed req->rq_received.
+ */
+ smp_rmb();
req->rq_rcv_buf.len = req->rq_private_buf.len;
/* Check that the softirq receive buffer is valid */
@@ -1194,8 +1227,8 @@ call_verify(struct rpc_task *task)
task->tk_action = call_bind;
goto out_retry;
case RPC_AUTH_TOOWEAK:
- printk(KERN_NOTICE "call_verify: server requires stronger "
- "authentication.\n");
+ printk(KERN_NOTICE "call_verify: server %s requires stronger "
+ "authentication.\n", task->tk_client->cl_server);
break;
default:
printk(KERN_WARNING "call_verify: unknown auth error: %x\n", n);
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
index 8139ce68e91..d25b054ec92 100644
--- a/net/sunrpc/pmap_clnt.c
+++ b/net/sunrpc/pmap_clnt.c
@@ -82,6 +82,7 @@ rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt)
rpc_call_setup(child, &msg, 0);
/* ... and run the child task */
+ task->tk_xprt->stat.bind_count++;
rpc_run_child(task, child, pmap_getport_done);
return;
@@ -103,6 +104,11 @@ rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot)
.pm_prot = prot,
.pm_port = 0
};
+ struct rpc_message msg = {
+ .rpc_proc = &pmap_procedures[PMAP_GETPORT],
+ .rpc_argp = &map,
+ .rpc_resp = &map.pm_port,
+ };
struct rpc_clnt *pmap_clnt;
char hostname[32];
int status;
@@ -116,7 +122,7 @@ rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot)
return PTR_ERR(pmap_clnt);
/* Setup the call info struct */
- status = rpc_call(pmap_clnt, PMAP_GETPORT, &map, &map.pm_port, 0);
+ status = rpc_call_sync(pmap_clnt, &msg, 0);
if (status >= 0) {
if (map.pm_port != 0)
@@ -161,16 +167,27 @@ pmap_getport_done(struct rpc_task *task)
int
rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
{
- struct sockaddr_in sin;
- struct rpc_portmap map;
+ struct sockaddr_in sin = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+ };
+ struct rpc_portmap map = {
+ .pm_prog = prog,
+ .pm_vers = vers,
+ .pm_prot = prot,
+ .pm_port = port,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &pmap_procedures[port ? PMAP_SET : PMAP_UNSET],
+ .rpc_argp = &map,
+ .rpc_resp = okay,
+ };
struct rpc_clnt *pmap_clnt;
int error = 0;
dprintk("RPC: registering (%d, %d, %d, %d) with portmapper.\n",
prog, vers, prot, port);
- sin.sin_family = AF_INET;
- sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP, 1);
if (IS_ERR(pmap_clnt)) {
error = PTR_ERR(pmap_clnt);
@@ -178,13 +195,7 @@ rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
return error;
}
- map.pm_prog = prog;
- map.pm_vers = vers;
- map.pm_prot = prot;
- map.pm_port = port;
-
- error = rpc_call(pmap_clnt, port? PMAP_SET : PMAP_UNSET,
- &map, okay, 0);
+ error = rpc_call_sync(pmap_clnt, &msg, 0);
if (error < 0) {
printk(KERN_WARNING
@@ -260,6 +271,8 @@ static struct rpc_procinfo pmap_procedures[] = {
.p_decode = (kxdrproc_t) xdr_decode_bool,
.p_bufsiz = 4,
.p_count = 1,
+ .p_statidx = PMAP_SET,
+ .p_name = "SET",
},
[PMAP_UNSET] = {
.p_proc = PMAP_UNSET,
@@ -267,6 +280,8 @@ static struct rpc_procinfo pmap_procedures[] = {
.p_decode = (kxdrproc_t) xdr_decode_bool,
.p_bufsiz = 4,
.p_count = 1,
+ .p_statidx = PMAP_UNSET,
+ .p_name = "UNSET",
},
[PMAP_GETPORT] = {
.p_proc = PMAP_GETPORT,
@@ -274,6 +289,8 @@ static struct rpc_procinfo pmap_procedures[] = {
.p_decode = (kxdrproc_t) xdr_decode_port,
.p_bufsiz = 4,
.p_count = 1,
+ .p_statidx = PMAP_GETPORT,
+ .p_name = "GETPORT",
},
};
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index ad9d9fc4e73..aa4158be990 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -91,7 +91,8 @@ rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg)
res = 0;
} else if (rpci->flags & RPC_PIPE_WAIT_FOR_OPEN) {
if (list_empty(&rpci->pipe))
- schedule_delayed_work(&rpci->queue_timeout,
+ queue_delayed_work(rpciod_workqueue,
+ &rpci->queue_timeout,
RPC_UPCALL_TIMEOUT);
list_add_tail(&msg->list, &rpci->pipe);
rpci->pipelen += msg->len;
@@ -132,7 +133,7 @@ rpc_close_pipes(struct inode *inode)
if (ops->release_pipe)
ops->release_pipe(inode);
cancel_delayed_work(&rpci->queue_timeout);
- flush_scheduled_work();
+ flush_workqueue(rpciod_workqueue);
}
rpc_inode_setowner(inode, NULL);
mutex_unlock(&inode->i_mutex);
@@ -434,14 +435,17 @@ static struct rpc_filelist authfiles[] = {
},
};
-static int
-rpc_get_mount(void)
+struct vfsmount *rpc_get_mount(void)
{
- return simple_pin_fs("rpc_pipefs", &rpc_mount, &rpc_mount_count);
+ int err;
+
+ err = simple_pin_fs("rpc_pipefs", &rpc_mount, &rpc_mount_count);
+ if (err != 0)
+ return ERR_PTR(err);
+ return rpc_mount;
}
-static void
-rpc_put_mount(void)
+void rpc_put_mount(void)
{
simple_release_fs(&rpc_mount, &rpc_mount_count);
}
@@ -451,12 +455,13 @@ rpc_lookup_parent(char *path, struct nameidata *nd)
{
if (path[0] == '\0')
return -ENOENT;
- if (rpc_get_mount()) {
+ nd->mnt = rpc_get_mount();
+ if (IS_ERR(nd->mnt)) {
printk(KERN_WARNING "%s: %s failed to mount "
"pseudofilesystem \n", __FILE__, __FUNCTION__);
- return -ENODEV;
+ return PTR_ERR(nd->mnt);
}
- nd->mnt = mntget(rpc_mount);
+ mntget(nd->mnt);
nd->dentry = dget(rpc_mount->mnt_root);
nd->last_type = LAST_ROOT;
nd->flags = LOOKUP_PARENT;
@@ -593,7 +598,6 @@ __rpc_mkdir(struct inode *dir, struct dentry *dentry)
d_instantiate(dentry, inode);
dir->i_nlink++;
inode_dir_notify(dir, DN_CREATE);
- rpc_get_mount();
return 0;
out_err:
printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n",
@@ -614,7 +618,6 @@ __rpc_rmdir(struct inode *dir, struct dentry *dentry)
if (!error) {
inode_dir_notify(dir, DN_DELETE);
d_drop(dentry);
- rpc_put_mount();
}
return 0;
}
@@ -668,7 +671,7 @@ rpc_mkdir(char *path, struct rpc_clnt *rpc_client)
out:
mutex_unlock(&dir->i_mutex);
rpc_release_path(&nd);
- return dentry;
+ return dget(dentry);
err_depopulate:
rpc_depopulate(dentry);
__rpc_rmdir(dir, dentry);
@@ -732,7 +735,7 @@ rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags)
out:
mutex_unlock(&dir->i_mutex);
rpc_release_path(&nd);
- return dentry;
+ return dget(dentry);
err_dput:
dput(dentry);
dentry = ERR_PTR(-ENOMEM);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index dff07795bd1..b9969b91a9f 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -65,7 +65,7 @@ static LIST_HEAD(all_tasks);
*/
static DEFINE_MUTEX(rpciod_mutex);
static unsigned int rpciod_users;
-static struct workqueue_struct *rpciod_workqueue;
+struct workqueue_struct *rpciod_workqueue;
/*
* Spinlock for other critical sections of code.
@@ -182,6 +182,7 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *
else
list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]);
task->u.tk_wait.rpc_waitq = queue;
+ queue->qlen++;
rpc_set_queued(task);
dprintk("RPC: %4d added to queue %p \"%s\"\n",
@@ -216,6 +217,7 @@ static void __rpc_remove_wait_queue(struct rpc_task *task)
__rpc_remove_wait_queue_priority(task);
else
list_del(&task->u.tk_wait.list);
+ queue->qlen--;
dprintk("RPC: %4d removed from queue %p \"%s\"\n",
task->tk_pid, queue, rpc_qname(queue));
}
@@ -816,6 +818,9 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, cons
BUG_ON(task->tk_ops == NULL);
+ /* starting timestamp */
+ task->tk_start = jiffies;
+
dprintk("RPC: %4d new task procpid %d\n", task->tk_pid,
current->pid);
}
@@ -917,8 +922,11 @@ struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
{
struct rpc_task *task;
task = rpc_new_task(clnt, flags, ops, data);
- if (task == NULL)
+ if (task == NULL) {
+ if (ops->rpc_release != NULL)
+ ops->rpc_release(data);
return ERR_PTR(-ENOMEM);
+ }
atomic_inc(&task->tk_count);
rpc_execute(task);
return task;
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 4979f226e28..790941e8af4 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -21,6 +21,7 @@
#include <linux/seq_file.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/svcsock.h>
+#include <linux/sunrpc/metrics.h>
#define RPCDBG_FACILITY RPCDBG_MISC
@@ -106,6 +107,120 @@ void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) {
}
}
+/**
+ * rpc_alloc_iostats - allocate an rpc_iostats structure
+ * @clnt: RPC program, version, and xprt
+ *
+ */
+struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt)
+{
+ unsigned int ops = clnt->cl_maxproc;
+ size_t size = ops * sizeof(struct rpc_iostats);
+ struct rpc_iostats *new;
+
+ new = kmalloc(size, GFP_KERNEL);
+ if (new)
+ memset(new, 0 , size);
+ return new;
+}
+EXPORT_SYMBOL(rpc_alloc_iostats);
+
+/**
+ * rpc_free_iostats - release an rpc_iostats structure
+ * @stats: doomed rpc_iostats structure
+ *
+ */
+void rpc_free_iostats(struct rpc_iostats *stats)
+{
+ kfree(stats);
+}
+EXPORT_SYMBOL(rpc_free_iostats);
+
+/**
+ * rpc_count_iostats - tally up per-task stats
+ * @task: completed rpc_task
+ *
+ * Relies on the caller for serialization.
+ */
+void rpc_count_iostats(struct rpc_task *task)
+{
+ struct rpc_rqst *req = task->tk_rqstp;
+ struct rpc_iostats *stats = task->tk_client->cl_metrics;
+ struct rpc_iostats *op_metrics;
+ long rtt, execute, queue;
+
+ if (!stats || !req)
+ return;
+ op_metrics = &stats[task->tk_msg.rpc_proc->p_statidx];
+
+ op_metrics->om_ops++;
+ op_metrics->om_ntrans += req->rq_ntrans;
+ op_metrics->om_timeouts += task->tk_timeouts;
+
+ op_metrics->om_bytes_sent += task->tk_bytes_sent;
+ op_metrics->om_bytes_recv += req->rq_received;
+
+ queue = (long)req->rq_xtime - task->tk_start;
+ if (queue < 0)
+ queue = -queue;
+ op_metrics->om_queue += queue;
+
+ rtt = task->tk_rtt;
+ if (rtt < 0)
+ rtt = -rtt;
+ op_metrics->om_rtt += rtt;
+
+ execute = (long)jiffies - task->tk_start;
+ if (execute < 0)
+ execute = -execute;
+ op_metrics->om_execute += execute;
+}
+
+void _print_name(struct seq_file *seq, unsigned int op, struct rpc_procinfo *procs)
+{
+ if (procs[op].p_name)
+ seq_printf(seq, "\t%12s: ", procs[op].p_name);
+ else if (op == 0)
+ seq_printf(seq, "\t NULL: ");
+ else
+ seq_printf(seq, "\t%12u: ", op);
+}
+
+#define MILLISECS_PER_JIFFY (1000 / HZ)
+
+void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
+{
+ struct rpc_iostats *stats = clnt->cl_metrics;
+ struct rpc_xprt *xprt = clnt->cl_xprt;
+ unsigned int op, maxproc = clnt->cl_maxproc;
+
+ if (!stats)
+ return;
+
+ seq_printf(seq, "\tRPC iostats version: %s ", RPC_IOSTATS_VERS);
+ seq_printf(seq, "p/v: %u/%u (%s)\n",
+ clnt->cl_prog, clnt->cl_vers, clnt->cl_protname);
+
+ if (xprt)
+ xprt->ops->print_stats(xprt, seq);
+
+ seq_printf(seq, "\tper-op statistics\n");
+ for (op = 0; op < maxproc; op++) {
+ struct rpc_iostats *metrics = &stats[op];
+ _print_name(seq, op, clnt->cl_procinfo);
+ seq_printf(seq, "%lu %lu %lu %Lu %Lu %Lu %Lu %Lu\n",
+ metrics->om_ops,
+ metrics->om_ntrans,
+ metrics->om_timeouts,
+ metrics->om_bytes_sent,
+ metrics->om_bytes_recv,
+ metrics->om_queue * MILLISECS_PER_JIFFY,
+ metrics->om_rtt * MILLISECS_PER_JIFFY,
+ metrics->om_execute * MILLISECS_PER_JIFFY);
+ }
+}
+EXPORT_SYMBOL(rpc_print_iostats);
+
/*
* Register/unregister RPC proc files
*/
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 8ff2c8acb22..4dd5b3cfe75 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -44,13 +44,13 @@
#include <linux/random.h>
#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/metrics.h>
/*
* Local variables
*/
#ifdef RPC_DEBUG
-# undef RPC_DEBUG_DATA
# define RPCDBG_FACILITY RPCDBG_XPRT
#endif
@@ -548,6 +548,7 @@ void xprt_connect(struct rpc_task *task)
task->tk_timeout = xprt->connect_timeout;
rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL);
+ xprt->stat.connect_start = jiffies;
xprt->ops->connect(task);
}
return;
@@ -558,6 +559,8 @@ static void xprt_connect_status(struct rpc_task *task)
struct rpc_xprt *xprt = task->tk_xprt;
if (task->tk_status >= 0) {
+ xprt->stat.connect_count++;
+ xprt->stat.connect_time += (long)jiffies - xprt->stat.connect_start;
dprintk("RPC: %4d xprt_connect_status: connection established\n",
task->tk_pid);
return;
@@ -601,16 +604,14 @@ static void xprt_connect_status(struct rpc_task *task)
struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid)
{
struct list_head *pos;
- struct rpc_rqst *req = NULL;
list_for_each(pos, &xprt->recv) {
struct rpc_rqst *entry = list_entry(pos, struct rpc_rqst, rq_list);
- if (entry->rq_xid == xid) {
- req = entry;
- break;
- }
+ if (entry->rq_xid == xid)
+ return entry;
}
- return req;
+ xprt->stat.bad_xids++;
+ return NULL;
}
/**
@@ -646,7 +647,12 @@ void xprt_complete_rqst(struct rpc_task *task, int copied)
dprintk("RPC: %5u xid %08x complete (%d bytes received)\n",
task->tk_pid, ntohl(req->rq_xid), copied);
+ task->tk_xprt->stat.recvs++;
+ task->tk_rtt = (long)jiffies - req->rq_xtime;
+
list_del_init(&req->rq_list);
+ /* Ensure all writes are done before we update req->rq_received */
+ smp_wmb();
req->rq_received = req->rq_private_buf.len = copied;
rpc_wake_up_task(task);
}
@@ -723,7 +729,6 @@ void xprt_transmit(struct rpc_task *task)
dprintk("RPC: %4d xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
- smp_rmb();
if (!req->rq_received) {
if (list_empty(&req->rq_list)) {
spin_lock_bh(&xprt->transport_lock);
@@ -744,12 +749,19 @@ void xprt_transmit(struct rpc_task *task)
if (status == 0) {
dprintk("RPC: %4d xmit complete\n", task->tk_pid);
spin_lock_bh(&xprt->transport_lock);
+
xprt->ops->set_retrans_timeout(task);
+
+ xprt->stat.sends++;
+ xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs;
+ xprt->stat.bklog_u += xprt->backlog.qlen;
+
/* Don't race with disconnect */
if (!xprt_connected(xprt))
task->tk_status = -ENOTCONN;
else if (!req->rq_received)
rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer);
+
xprt->ops->release_xprt(xprt, task);
spin_unlock_bh(&xprt->transport_lock);
return;
@@ -848,6 +860,7 @@ void xprt_release(struct rpc_task *task)
if (!(req = task->tk_rqstp))
return;
+ rpc_count_iostats(task);
spin_lock_bh(&xprt->transport_lock);
xprt->ops->release_xprt(xprt, task);
if (xprt->ops->release_request)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index c458f8d1d6d..4b4e7dfdff1 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -382,6 +382,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
/* If we've sent the entire packet, immediately
* reset the count of bytes sent. */
req->rq_bytes_sent += status;
+ task->tk_bytes_sent += status;
if (likely(req->rq_bytes_sent >= req->rq_slen)) {
req->rq_bytes_sent = 0;
return 0;
@@ -1114,6 +1115,8 @@ static void xs_tcp_connect_worker(void *args)
}
/* Tell the socket layer to start connecting... */
+ xprt->stat.connect_count++;
+ xprt->stat.connect_start = jiffies;
status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr,
sizeof(xprt->addr), O_NONBLOCK);
dprintk("RPC: %p connect status %d connected %d sock state %d\n",
@@ -1177,6 +1180,50 @@ static void xs_connect(struct rpc_task *task)
}
}
+/**
+ * xs_udp_print_stats - display UDP socket-specifc stats
+ * @xprt: rpc_xprt struct containing statistics
+ * @seq: output file
+ *
+ */
+static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
+{
+ seq_printf(seq, "\txprt:\tudp %u %lu %lu %lu %lu %Lu %Lu\n",
+ xprt->port,
+ xprt->stat.bind_count,
+ xprt->stat.sends,
+ xprt->stat.recvs,
+ xprt->stat.bad_xids,
+ xprt->stat.req_u,
+ xprt->stat.bklog_u);
+}
+
+/**
+ * xs_tcp_print_stats - display TCP socket-specifc stats
+ * @xprt: rpc_xprt struct containing statistics
+ * @seq: output file
+ *
+ */
+static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
+{
+ long idle_time = 0;
+
+ if (xprt_connected(xprt))
+ idle_time = (long)(jiffies - xprt->last_used) / HZ;
+
+ seq_printf(seq, "\txprt:\ttcp %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu\n",
+ xprt->port,
+ xprt->stat.bind_count,
+ xprt->stat.connect_count,
+ xprt->stat.connect_time,
+ idle_time,
+ xprt->stat.sends,
+ xprt->stat.recvs,
+ xprt->stat.bad_xids,
+ xprt->stat.req_u,
+ xprt->stat.bklog_u);
+}
+
static struct rpc_xprt_ops xs_udp_ops = {
.set_buffer_size = xs_udp_set_buffer_size,
.reserve_xprt = xprt_reserve_xprt_cong,
@@ -1191,6 +1238,7 @@ static struct rpc_xprt_ops xs_udp_ops = {
.release_request = xprt_release_rqst_cong,
.close = xs_close,
.destroy = xs_destroy,
+ .print_stats = xs_udp_print_stats,
};
static struct rpc_xprt_ops xs_tcp_ops = {
@@ -1204,6 +1252,7 @@ static struct rpc_xprt_ops xs_tcp_ops = {
.set_retrans_timeout = xprt_set_retrans_timeout_def,
.close = xs_close,
.destroy = xs_destroy,
+ .print_stats = xs_tcp_print_stats,
};
/**