From 032ebf2620ef99a4fedaa0f77dc2272095ac5863 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@g5.osdl.org>
Date: Fri, 12 May 2006 18:42:09 -0700
Subject: Alternative fix for MMC oops on unmount after removal

Make sure to clear the driverfs_dev pointer when we do del_gendisk() (on
disk removal), so that other users that may still have a ref to the disk
won't try to use the stale pointer.

Also move the KOBJ_REMOVE uevent handler up, so that the uevent still
has access to the driverfs_dev data.

This all should hopefully fix the problems with MMC umounts after device
removals that caused commit 56cf6504fc1c0c221b82cebc16a444b684140fb7 and
its reversal (1a2acc9e9214699a99389e323e6686e9e0e2ca67).

Original problem reported by Todd Blumer and others.

Acked-by: Greg KH <gregkh@suse.de>
Cc: Russell King <rmk+lkml@arm.linux.org.uk>
Cc: James Bottomley <James.Bottomley@SteelEye.com>
Cc: Erik Mouw <erik@harddisk-recovery.com>
Cc: Andrew Vasquez <andrew.vasquez@qlogic.com>
Cc: Todd Blumer <todd@sdgsystems.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/partitions/check.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 45ae7dd3c65..7ef1f094de9 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -533,6 +533,7 @@ void del_gendisk(struct gendisk *disk)
 
 	devfs_remove_disk(disk);
 
+	kobject_uevent(&disk->kobj, KOBJ_REMOVE);
 	if (disk->holder_dir)
 		kobject_unregister(disk->holder_dir);
 	if (disk->slave_dir)
@@ -545,7 +546,7 @@ void del_gendisk(struct gendisk *disk)
 			kfree(disk_name);
 		}
 		put_device(disk->driverfs_dev);
+		disk->driverfs_dev = NULL;
 	}
-	kobject_uevent(&disk->kobj, KOBJ_REMOVE);
 	kobject_del(&disk->kobj);
 }
-- 
cgit v1.2.3


From 6aff5cb8ec270db569800b1bb59bd20003a76f07 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Mon, 15 May 2006 09:43:50 -0700
Subject: [PATCH] fs/open.c: unexport sys_openat

Remove the unused EXPORT_SYMBOL_GPL(sys_openat).

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/open.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/open.c b/fs/open.c
index 53ec28c3677..317b7c7f38a 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1124,7 +1124,6 @@ asmlinkage long sys_openat(int dfd, const char __user *filename, int flags,
 	prevent_tail_call(ret);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(sys_openat);
 
 #ifndef __alpha__
 
-- 
cgit v1.2.3


From a5370553952a9a414860d878b67c49eff11313bd Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 15 May 2006 09:43:51 -0700
Subject: [PATCH] autofs4: NFY_NONE wait race fix

This patch fixes two problems.

First, the comparison of entries in the waitq.c was incorrect.

Second, the NFY_NONE check was incorrect. The test of whether the dentry
is mounted if ineffective, for example, if an expire fails then we could
wait forever on a non existant expire. The bug was identified by Jeff
Moyer.

The patch changes autofs4 to wait on expires only as this is all that's
needed.  If there is no existing wait when autofs4_wait is call with a type
of NFY_NONE it delays until either a wait appears or the the expire flag is
cleared.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/autofs_i.h |  5 ++--
 fs/autofs4/root.c     | 10 ++-----
 fs/autofs4/waitq.c    | 77 +++++++++++++++++++++++++++++++++++----------------
 3 files changed, 58 insertions(+), 34 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 57c4903614e..d6603d02304 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -74,8 +74,8 @@ struct autofs_wait_queue {
 	struct autofs_wait_queue *next;
 	autofs_wqt_t wait_queue_token;
 	/* We use the following to see what we are waiting for */
-	int hash;
-	int len;
+	unsigned int hash;
+	unsigned int len;
 	char *name;
 	u32 dev;
 	u64 ino;
@@ -85,7 +85,6 @@ struct autofs_wait_queue {
 	pid_t tgid;
 	/* This is for status reporting upon return */
 	int status;
-	atomic_t notify;
 	atomic_t wait_ctr;
 };
 
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 84e030c8ddd..5100f984783 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -327,6 +327,7 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
 static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+	struct autofs_info *ino = autofs4_dentry_ino(dentry);
 	int oz_mode = autofs4_oz_mode(sbi);
 	unsigned int lookup_type;
 	int status;
@@ -340,13 +341,8 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
 	if (oz_mode || !lookup_type)
 		goto done;
 
-	/*
-	 * If a request is pending wait for it.
-	 * If it's a mount then it won't be expired till at least
-	 * a liitle later and if it's an expire then we might need
-	 * to mount it again.
-	 */
-	if (autofs4_ispending(dentry)) {
+	/* If an expire request is pending wait for it. */
+	if (ino && (ino->flags & AUTOFS_INF_EXPIRING)) {
 		DPRINTK("waiting for active request %p name=%.*s",
 			dentry, dentry->d_name.len, dentry->d_name.name);
 
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 142ab6aa2aa..ce103e7b0bc 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -189,14 +189,30 @@ static int autofs4_getpath(struct autofs_sb_info *sbi,
 	return len;
 }
 
+static struct autofs_wait_queue *
+autofs4_find_wait(struct autofs_sb_info *sbi,
+		  char *name, unsigned int hash, unsigned int len)
+{
+	struct autofs_wait_queue *wq;
+
+	for (wq = sbi->queues; wq; wq = wq->next) {
+		if (wq->hash == hash &&
+		    wq->len == len &&
+		    wq->name && !memcmp(wq->name, name, len))
+			break;
+	}
+	return wq;
+}
+
 int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 		enum autofs_notify notify)
 {
+	struct autofs_info *ino;
 	struct autofs_wait_queue *wq;
 	char *name;
 	unsigned int len = 0;
 	unsigned int hash = 0;
-	int status;
+	int status, type;
 
 	/* In catatonic mode, we don't wait for nobody */
 	if (sbi->catatonic)
@@ -223,21 +239,41 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 		return -EINTR;
 	}
 
-	for (wq = sbi->queues ; wq ; wq = wq->next) {
-		if (wq->hash == dentry->d_name.hash &&
-		    wq->len == len &&
-		    wq->name && !memcmp(wq->name, name, len))
-			break;
-	}
+	wq = autofs4_find_wait(sbi, name, hash, len);
+	ino = autofs4_dentry_ino(dentry);
+	if (!wq && ino && notify == NFY_NONE) {
+		/*
+		 * Either we've betean the pending expire to post it's
+		 * wait or it finished while we waited on the mutex.
+		 * So we need to wait till either, the wait appears
+		 * or the expire finishes.
+		 */
+
+		while (ino->flags & AUTOFS_INF_EXPIRING) {
+			mutex_unlock(&sbi->wq_mutex);
+			schedule_timeout_interruptible(HZ/10);
+			if (mutex_lock_interruptible(&sbi->wq_mutex)) {
+				kfree(name);
+				return -EINTR;
+			}
+			wq = autofs4_find_wait(sbi, name, hash, len);
+			if (wq)
+				break;
+		}
 
-	if (!wq) {
-		/* Can't wait for an expire if there's no mount */
-		if (notify == NFY_NONE && !d_mountpoint(dentry)) {
+		/*
+		 * Not ideal but the status has already gone. Of the two
+		 * cases where we wait on NFY_NONE neither depend on the
+		 * return status of the wait.
+		 */
+		if (!wq) {
 			kfree(name);
 			mutex_unlock(&sbi->wq_mutex);
-			return -ENOENT;
+			return 0;
 		}
+	}
 
+	if (!wq) {
 		/* Create a new wait queue */
 		wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL);
 		if (!wq) {
@@ -263,20 +299,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 		wq->tgid = current->tgid;
 		wq->status = -EINTR; /* Status return if interrupted */
 		atomic_set(&wq->wait_ctr, 2);
-		atomic_set(&wq->notify, 1);
-		mutex_unlock(&sbi->wq_mutex);
-	} else {
-		atomic_inc(&wq->wait_ctr);
 		mutex_unlock(&sbi->wq_mutex);
-		kfree(name);
-		DPRINTK("existing wait id = 0x%08lx, name = %.*s, nfy=%d",
-			(unsigned long) wq->wait_queue_token, wq->len, wq->name, notify);
-	}
-
-	if (notify != NFY_NONE && atomic_read(&wq->notify)) {
-		int type;
-
-		atomic_dec(&wq->notify);
 
 		if (sbi->version < 5) {
 			if (notify == NFY_MOUNT)
@@ -299,6 +322,12 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 
 		/* autofs4_notify_daemon() may block */
 		autofs4_notify_daemon(sbi, wq, type);
+	} else {
+		atomic_inc(&wq->wait_ctr);
+		mutex_unlock(&sbi->wq_mutex);
+		kfree(name);
+		DPRINTK("existing wait id = 0x%08lx, name = %.*s, nfy=%d",
+			(unsigned long) wq->wait_queue_token, wq->len, wq->name, notify);
 	}
 
 	/* wq->name is NULL if and only if the lock is already released */
-- 
cgit v1.2.3


From 3b7c8108273bed41a2fc04533cc9f2026ff38c8e Mon Sep 17 00:00:00 2001
From: Olaf Kirch <okir@suse.de>
Date: Mon, 15 May 2006 09:43:57 -0700
Subject: [PATCH] smbfs chroot issue (CVE-2006-1864)

Mark Moseley reported that a chroot environment on a SMB share can be left
via "cd ..\\".  Similar to CVE-2006-1863 issue with cifs, this fix is for
smbfs.

Steven French <sfrench@us.ibm.com> wrote:

Looks fine to me.  This should catch the slash on lookup or equivalent,
which will be all obvious paths of interest.

Signed-off-by: Chris Wright <chrisw@sous-sol.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/smbfs/dir.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'fs')

diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c
index 34c7a11d91f..70d9c5a37f5 100644
--- a/fs/smbfs/dir.c
+++ b/fs/smbfs/dir.c
@@ -434,6 +434,11 @@ smb_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 	if (dentry->d_name.len > SMB_MAXNAMELEN)
 		goto out;
 
+	/* Do not allow lookup of names with backslashes in */
+	error = -EINVAL;
+	if (memchr(dentry->d_name.name, '\\', dentry->d_name.len))
+		goto out;
+
 	lock_kernel();
 	error = smb_proc_getattr(dentry, &finfo);
 #ifdef SMBFS_PARANOIA
-- 
cgit v1.2.3


From 48564e628bd7662d7a0b3ac81c41cd0e4cc36dae Mon Sep 17 00:00:00 2001
From: Jan Niehusmann <jan@gondor.com>
Date: Mon, 15 May 2006 09:44:12 -0700
Subject: [PATCH] smbfs: Fix slab corruption in samba error path

Yesterday, I got the following error with 2.6.16.13 during a file copy from
a smb filesystem over a wireless link.  I guess there was some error on the
wireless link, which in turn caused an error condition for the smb
filesystem.

In the log, smb_file_read reports error=4294966784 (0xfffffe00), which also
shows up in the slab dumps, and also is -ERESTARTSYS.  Error code 27499
corresponds to 0x6b6b, so the rq_errno field seems to be the only one being
set after freeing the slab.

In smb_add_request (which is the only place in smbfs where I found
ERESTARTSYS), I found the following:

        if (!timeleft || signal_pending(current)) {
                /*
                 * On timeout or on interrupt we want to try and remove the
                 * request from the recvq/xmitq.
                 */
                smb_lock_server(server);
                if (!(req->rq_flags & SMB_REQ_RECEIVED)) {
                        list_del_init(&req->rq_queue);
                        smb_rput(req);
                }
                smb_unlock_server(server);
        }
	[...]
        if (signal_pending(current))
                req->rq_errno = -ERESTARTSYS;

I guess that some codepath like smbiod_flush() caused the request to be
removed from the queue, and smb_rput(req) be called, without
SMB_REQ_RECEIVED being set.  This violates an asumption made by the quoted
code.

Then, the above code calls smb_rput(req) again, the req gets freed, and
req->rq_errno = -ERESTARTSYS writes into the already freed slab.  As
list_del_init doesn't cause an error if called multiple times, that does
cause the observed behaviour (freed slab with rq_errno=-ERESTARTSYS).

If this observation is correct, the following patch should fix it.

I wonder why the smb code uses list_del_init everywhere - using list_del
instead would catch such situations by poisoning the next and prev
pointers.

May  4 23:29:21 knautsch kernel: [17180085.456000] ipw2200: Firmware error detected.  Restarting.
May  4 23:29:21 knautsch kernel: [17180085.456000] ipw2200: Sysfs 'error' log captured.
May  4 23:33:02 knautsch kernel: [17180306.316000] ipw2200: Firmware error detected.  Restarting.
May  4 23:33:02 knautsch kernel: [17180306.316000] ipw2200: Sysfs 'error' log already exists.
May  4 23:33:02 knautsch kernel: [17180306.968000] smb_file_read: //some_file validation failed, error=4294966784
May  4 23:34:18 knautsch kernel: [17180383.256000] smb_file_read: //some_file validation failed, error=4294966784
May  4 23:34:18 knautsch kernel: [17180383.284000] SMB connection re-established (-5)
May  4 23:37:19 knautsch kernel: [17180563.956000] smb_file_read: //some_file validation failed, error=4294966784
May  4 23:40:09 knautsch kernel: [17180733.636000] smb_file_read: //some_file validation failed, error=4294966784
May  4 23:40:26 knautsch kernel: [17180750.700000] smb_file_read: //some_file validation failed, error=4294966784
May  4 23:43:02 knautsch kernel: [17180907.304000] smb_file_read: //some_file validation failed, error=4294966784
May  4 23:43:08 knautsch kernel: [17180912.324000] smb_file_read: //some_file validation failed, error=4294966784
May  4 23:43:34 knautsch kernel: [17180938.416000] smb_errno: class Unknown, code 27499 from command 0x6b
May  4 23:43:34 knautsch kernel: [17180938.416000] Slab corruption: start=c4ebe09c, len=244
May  4 23:43:34 knautsch kernel: [17180938.416000] Redzone: 0x5a2cf071/0x5a2cf071.
May  4 23:43:34 knautsch kernel: [17180938.416000] Last user: [<e087b903>](smb_rput+0x53/0x90 [smbfs])
May  4 23:43:34 knautsch kernel: [17180938.416000] 000: 6b 6b 6b 6b 6b 6b 6b 6b 6a 6b 6b 6b 6b 6b 6b 6b
May  4 23:43:34 knautsch kernel: [17180938.416000] 0f0: 00 fe ff ff
May  4 23:43:34 knautsch kernel: [17180938.416000] Next obj: start=c4ebe19c, len=244
May  4 23:43:34 knautsch kernel: [17180938.416000] Redzone: 0x5a2cf071/0x5a2cf071.
May  4 23:43:34 knautsch kernel: [17180938.416000] Last user: [<00000000>](_stext+0x3feffde0/0x30)
May  4 23:43:34 knautsch kernel: [17180938.416000] 000: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b
May  4 23:43:34 knautsch kernel: [17180938.416000] 010: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b
May  4 23:43:34 knautsch kernel: [17180938.460000] SMB connection re-established (-5)
May  4 23:43:42 knautsch kernel: [17180946.292000] ipw2200: Firmware error detected.  Restarting.
May  4 23:43:42 knautsch kernel: [17180946.292000] ipw2200: Sysfs 'error' log already exists.
May  4 23:45:04 knautsch kernel: [17181028.752000] ipw2200: Firmware error detected.  Restarting.
May  4 23:45:04 knautsch kernel: [17181028.752000] ipw2200: Sysfs 'error' log already exists.
May  4 23:45:05 knautsch kernel: [17181029.868000] smb_file_read: //some_file validation failed, error=4294966784
May  4 23:45:36 knautsch kernel: [17181060.984000] smb_errno: class Unknown, code 27499 from command 0x6b
May  4 23:45:36 knautsch kernel: [17181060.984000] Slab corruption: start=c4ebe09c, len=244
May  4 23:45:36 knautsch kernel: [17181060.984000] Redzone: 0x5a2cf071/0x5a2cf071.
May  4 23:45:36 knautsch kernel: [17181060.984000] Last user: [<e087b903>](smb_rput+0x53/0x90 [smbfs])
May  4 23:45:36 knautsch kernel: [17181060.984000] 000: 6b 6b 6b 6b 6b 6b 6b 6b 6a 6b 6b 6b 6b 6b 6b 6b
May  4 23:45:36 knautsch kernel: [17181060.984000] 0f0: 00 fe ff ff
May  4 23:45:36 knautsch kernel: [17181060.984000] Next obj: start=c4ebe19c, len=244
May  4 23:45:36 knautsch kernel: [17181060.984000] Redzone: 0x5a2cf071/0x5a2cf071.
May  4 23:45:36 knautsch kernel: [17181060.984000] Last user: [<00000000>](_stext+0x3feffde0/0x30)
May  4 23:45:36 knautsch kernel: [17181060.984000] 000: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b
May  4 23:45:36 knautsch kernel: [17181060.984000] 010: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b
May  4 23:45:36 knautsch kernel: [17181061.024000] SMB connection re-established (-5)
May  4 23:46:17 knautsch kernel: [17181102.132000] smb_file_read: //some_file validation failed, error=4294966784
May  4 23:47:46 knautsch kernel: [17181190.468000] smb_errno: class Unknown, code 27499 from command 0x6b
May  4 23:47:46 knautsch kernel: [17181190.468000] Slab corruption: start=c4ebe09c, len=244
May  4 23:47:46 knautsch kernel: [17181190.468000] Redzone: 0x5a2cf071/0x5a2cf071.
May  4 23:47:46 knautsch kernel: [17181190.468000] Last user: [<e087b903>](smb_rput+0x53/0x90 [smbfs])
May  4 23:47:46 knautsch kernel: [17181190.468000] 000: 6b 6b 6b 6b 6b 6b 6b 6b 6a 6b 6b 6b 6b 6b 6b 6b
May  4 23:47:46 knautsch kernel: [17181190.468000] 0f0: 00 fe ff ff
May  4 23:47:46 knautsch kernel: [17181190.468000] Next obj: start=c4ebe19c, len=244
May  4 23:47:46 knautsch kernel: [17181190.468000] Redzone: 0x5a2cf071/0x5a2cf071.
May  4 23:47:46 knautsch kernel: [17181190.468000] Last user: [<00000000>](_stext+0x3feffde0/0x30)
May  4 23:47:46 knautsch kernel: [17181190.468000] 000: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b
May  4 23:47:46 knautsch kernel: [17181190.468000] 010: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b
May  4 23:47:46 knautsch kernel: [17181190.492000] SMB connection re-established (-5)
May  4 23:49:20 knautsch kernel: [17181284.828000] smb_file_read: //some_file validation failed, error=4294966784
May  4 23:49:39 knautsch kernel: [17181303.896000] smb_file_read: //some_file validation failed, error=4294966784

Signed-off-by: Jan Niehusmann <jan@gondor.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/smbfs/request.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c
index c71c375863c..c71dd2760d3 100644
--- a/fs/smbfs/request.c
+++ b/fs/smbfs/request.c
@@ -339,9 +339,11 @@ int smb_add_request(struct smb_request *req)
 		/*
 		 * On timeout or on interrupt we want to try and remove the
 		 * request from the recvq/xmitq.
+		 * First check if the request is still part of a queue. (May
+		 * have been removed by some error condition)
 		 */
 		smb_lock_server(server);
-		if (!(req->rq_flags & SMB_REQ_RECEIVED)) {
+		if (!list_empty(&req->rq_queue)) {
 			list_del_init(&req->rq_queue);
 			smb_rput(req);
 		}
-- 
cgit v1.2.3


From 343f1fe6f2e3fb4912db241e639b0721c2e14f2e Mon Sep 17 00:00:00 2001
From: Latchesar Ionkov <lucho@ionkov.net>
Date: Mon, 15 May 2006 09:44:18 -0700
Subject: [PATCH] v9fs: Twalk memory leak

v9fs leaks memory if the file server responds with Rerror to a Twalk
message.  The patch fixes the leak.

Signed-off-by: Latchesar Ionkov <lucho@ionkov.net>
Cc: Eric Van Hensbergen <ericvh@hera.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/fcall.c | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/fcall.c b/fs/9p/fcall.c
index 71742ba150c..6f2617820a4 100644
--- a/fs/9p/fcall.c
+++ b/fs/9p/fcall.c
@@ -98,23 +98,20 @@ v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
 static void v9fs_t_clunk_cb(void *a, struct v9fs_fcall *tc,
 	struct v9fs_fcall *rc, int err)
 {
-	int fid;
+	int fid, id;
 	struct v9fs_session_info *v9ses;
 
-	if (err)
-		return;
-
+	id = 0;
 	fid = tc->params.tclunk.fid;
-	kfree(tc);
-
-	if (!rc)
-		return;
-
-	v9ses = a;
-	if (rc->id == RCLUNK)
-		v9fs_put_idpool(fid, &v9ses->fidpool);
+	if (rc)
+		id = rc->id;
 
+	kfree(tc);
 	kfree(rc);
+	if (id == RCLUNK) {
+		v9ses = a;
+		v9fs_put_idpool(fid, &v9ses->fidpool);
+	}
 }
 
 /**
-- 
cgit v1.2.3


From 41e5a6ac80c600e1f8bda0a4871f0b797e097d78 Mon Sep 17 00:00:00 2001
From: Latchesar Ionkov <lucho@ionkov.net>
Date: Mon, 15 May 2006 09:44:21 -0700
Subject: [PATCH] v9fs: signal handling fixes

Multiple races can happen when v9fs is interrupted by a signal and Tflush
message is sent to the server.  After v9fs sends Tflush it doesn't wait
until it receives Rflush, and possibly the response of the original
message.  This behavior may confuse v9fs what fids are allocated by the
file server.

This patch fixes the races and the fid allocation.

Signed-off-by: Latchesar Ionkov <lucho@ionkov.net>
Cc: Eric Van Hensbergen <ericvh@hera.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/mux.c       | 222 ++++++++++++++++++++++++++++++++----------------------
 fs/9p/mux.h       |   4 +-
 fs/9p/vfs_file.c  |  13 +++-
 fs/9p/vfs_inode.c |  19 ++++-
 4 files changed, 158 insertions(+), 100 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/mux.c b/fs/9p/mux.c
index 3e5b124a721..f4407eb276c 100644
--- a/fs/9p/mux.c
+++ b/fs/9p/mux.c
@@ -50,15 +50,23 @@ enum {
 	Wpending = 8,		/* can write */
 };
 
+enum {
+	None,
+	Flushing,
+	Flushed,
+};
+
 struct v9fs_mux_poll_task;
 
 struct v9fs_req {
+	spinlock_t lock;
 	int tag;
 	struct v9fs_fcall *tcall;
 	struct v9fs_fcall *rcall;
 	int err;
 	v9fs_mux_req_callback cb;
 	void *cba;
+	int flush;
 	struct list_head req_list;
 };
 
@@ -96,8 +104,8 @@ struct v9fs_mux_poll_task {
 
 struct v9fs_mux_rpc {
 	struct v9fs_mux_data *m;
-	struct v9fs_req *req;
 	int err;
+	struct v9fs_fcall *tcall;
 	struct v9fs_fcall *rcall;
 	wait_queue_head_t wqueue;
 };
@@ -524,10 +532,9 @@ again:
 
 static void process_request(struct v9fs_mux_data *m, struct v9fs_req *req)
 {
-	int ecode, tag;
+	int ecode;
 	struct v9fs_str *ename;
 
-	tag = req->tag;
 	if (!req->err && req->rcall->id == RERROR) {
 		ecode = req->rcall->params.rerror.errno;
 		ename = &req->rcall->params.rerror.error;
@@ -553,23 +560,6 @@ static void process_request(struct v9fs_mux_data *m, struct v9fs_req *req)
 		if (!req->err)
 			req->err = -EIO;
 	}
-
-	if (req->err == ERREQFLUSH)
-		return;
-
-	if (req->cb) {
-		dprintk(DEBUG_MUX, "calling callback tcall %p rcall %p\n",
-			req->tcall, req->rcall);
-
-		(*req->cb) (req->cba, req->tcall, req->rcall, req->err);
-		req->cb = NULL;
-	} else
-		kfree(req->rcall);
-
-	v9fs_mux_put_tag(m, tag);
-
-	wake_up(&m->equeue);
-	kfree(req);
 }
 
 /**
@@ -669,17 +659,26 @@ static void v9fs_read_work(void *a)
 		list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) {
 			if (rreq->tag == rcall->tag) {
 				req = rreq;
-				req->rcall = rcall;
-				list_del(&req->req_list);
-				spin_unlock(&m->lock);
-				process_request(m, req);
+				if (req->flush != Flushing)
+					list_del(&req->req_list);
 				break;
 			}
-
 		}
+		spin_unlock(&m->lock);
 
-		if (!req) {
-			spin_unlock(&m->lock);
+		if (req) {
+			req->rcall = rcall;
+			process_request(m, req);
+
+			if (req->flush != Flushing) {
+				if (req->cb)
+					(*req->cb) (req, req->cba);
+				else
+					kfree(req->rcall);
+
+				wake_up(&m->equeue);
+			}
+		} else {
 			if (err >= 0 && rcall->id != RFLUSH)
 				dprintk(DEBUG_ERROR,
 					"unexpected response mux %p id %d tag %d\n",
@@ -746,7 +745,6 @@ static struct v9fs_req *v9fs_send_request(struct v9fs_mux_data *m,
 		return ERR_PTR(-ENOMEM);
 
 	v9fs_set_tag(tc, n);
-
 	if ((v9fs_debug_level&DEBUG_FCALL) == DEBUG_FCALL) {
 		char buf[150];
 
@@ -754,12 +752,14 @@ static struct v9fs_req *v9fs_send_request(struct v9fs_mux_data *m,
 		printk(KERN_NOTICE "<<< %p %s\n", m, buf);
 	}
 
+	spin_lock_init(&req->lock);
 	req->tag = n;
 	req->tcall = tc;
 	req->rcall = NULL;
 	req->err = 0;
 	req->cb = cb;
 	req->cba = cba;
+	req->flush = None;
 
 	spin_lock(&m->lock);
 	list_add_tail(&req->req_list, &m->unsent_req_list);
@@ -776,72 +776,108 @@ static struct v9fs_req *v9fs_send_request(struct v9fs_mux_data *m,
 	return req;
 }
 
-static void v9fs_mux_flush_cb(void *a, struct v9fs_fcall *tc,
-			      struct v9fs_fcall *rc, int err)
+static void v9fs_mux_free_request(struct v9fs_mux_data *m, struct v9fs_req *req)
+{
+	v9fs_mux_put_tag(m, req->tag);
+	kfree(req);
+}
+
+static void v9fs_mux_flush_cb(struct v9fs_req *freq, void *a)
 {
 	v9fs_mux_req_callback cb;
 	int tag;
 	struct v9fs_mux_data *m;
-	struct v9fs_req *req, *rptr;
+	struct v9fs_req *req, *rreq, *rptr;
 
 	m = a;
-	dprintk(DEBUG_MUX, "mux %p tc %p rc %p err %d oldtag %d\n", m, tc,
-		rc, err, tc->params.tflush.oldtag);
+	dprintk(DEBUG_MUX, "mux %p tc %p rc %p err %d oldtag %d\n", m,
+		freq->tcall, freq->rcall, freq->err,
+		freq->tcall->params.tflush.oldtag);
 
 	spin_lock(&m->lock);
 	cb = NULL;
-	tag = tc->params.tflush.oldtag;
-	list_for_each_entry_safe(req, rptr, &m->req_list, req_list) {
-		if (req->tag == tag) {
+	tag = freq->tcall->params.tflush.oldtag;
+	req = NULL;
+	list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) {
+		if (rreq->tag == tag) {
+			req = rreq;
 			list_del(&req->req_list);
-			if (req->cb) {
-				cb = req->cb;
-				req->cb = NULL;
-				spin_unlock(&m->lock);
-				(*cb) (req->cba, req->tcall, req->rcall,
-				       req->err);
-			}
-			kfree(req);
-			wake_up(&m->equeue);
 			break;
 		}
 	}
+	spin_unlock(&m->lock);
 
-	if (!cb)
-		spin_unlock(&m->lock);
+	if (req) {
+		spin_lock(&req->lock);
+		req->flush = Flushed;
+		spin_unlock(&req->lock);
+
+		if (req->cb)
+			(*req->cb) (req, req->cba);
+		else
+			kfree(req->rcall);
+
+		wake_up(&m->equeue);
+	}
 
-	v9fs_mux_put_tag(m, tag);
-	kfree(tc);
-	kfree(rc);
+	kfree(freq->tcall);
+	kfree(freq->rcall);
+	v9fs_mux_free_request(m, freq);
 }
 
-static void
+static int
 v9fs_mux_flush_request(struct v9fs_mux_data *m, struct v9fs_req *req)
 {
 	struct v9fs_fcall *fc;
+	struct v9fs_req *rreq, *rptr;
 
 	dprintk(DEBUG_MUX, "mux %p req %p tag %d\n", m, req, req->tag);
 
+	/* if a response was received for a request, do nothing */
+	spin_lock(&req->lock);
+	if (req->rcall || req->err) {
+		spin_unlock(&req->lock);
+		dprintk(DEBUG_MUX, "mux %p req %p response already received\n", m, req);
+		return 0;
+	}
+
+	req->flush = Flushing;
+	spin_unlock(&req->lock);
+
+	spin_lock(&m->lock);
+	/* if the request is not sent yet, just remove it from the list */
+	list_for_each_entry_safe(rreq, rptr, &m->unsent_req_list, req_list) {
+		if (rreq->tag == req->tag) {
+			dprintk(DEBUG_MUX, "mux %p req %p request is not sent yet\n", m, req);
+			list_del(&rreq->req_list);
+			req->flush = Flushed;
+			spin_unlock(&m->lock);
+			if (req->cb)
+				(*req->cb) (req, req->cba);
+			return 0;
+		}
+	}
+	spin_unlock(&m->lock);
+
+	clear_thread_flag(TIF_SIGPENDING);
 	fc = v9fs_create_tflush(req->tag);
 	v9fs_send_request(m, fc, v9fs_mux_flush_cb, m);
+	return 1;
 }
 
 static void
-v9fs_mux_rpc_cb(void *a, struct v9fs_fcall *tc, struct v9fs_fcall *rc, int err)
+v9fs_mux_rpc_cb(struct v9fs_req *req, void *a)
 {
 	struct v9fs_mux_rpc *r;
 
-	if (err == ERREQFLUSH) {
-		kfree(rc);
-		dprintk(DEBUG_MUX, "err req flush\n");
-		return;
-	}
-
+	dprintk(DEBUG_MUX, "req %p r %p\n", req, a);
 	r = a;
-	dprintk(DEBUG_MUX, "mux %p req %p tc %p rc %p err %d\n", r->m, r->req,
-		tc, rc, err);
-	r->rcall = rc;
-	r->err = err;
+	r->rcall = req->rcall;
+	r->err = req->err;
+
+	if (req->flush!=None && !req->err)
+		r->err = -ERESTARTSYS;
+
 	wake_up(&r->wqueue);
 }
 
@@ -856,12 +892,13 @@ int
 v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
 	     struct v9fs_fcall **rc)
 {
-	int err;
+	int err, sigpending;
 	unsigned long flags;
 	struct v9fs_req *req;
 	struct v9fs_mux_rpc r;
 
 	r.err = 0;
+	r.tcall = tc;
 	r.rcall = NULL;
 	r.m = m;
 	init_waitqueue_head(&r.wqueue);
@@ -869,48 +906,50 @@ v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
 	if (rc)
 		*rc = NULL;
 
+	sigpending = 0;
+	if (signal_pending(current)) {
+		sigpending = 1;
+		clear_thread_flag(TIF_SIGPENDING);
+	}
+
 	req = v9fs_send_request(m, tc, v9fs_mux_rpc_cb, &r);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
 		dprintk(DEBUG_MUX, "error %d\n", err);
-		return PTR_ERR(req);
+		return err;
 	}
 
-	r.req = req;
-	dprintk(DEBUG_MUX, "mux %p tc %p tag %d rpc %p req %p\n", m, tc,
-		req->tag, &r, req);
 	err = wait_event_interruptible(r.wqueue, r.rcall != NULL || r.err < 0);
 	if (r.err < 0)
 		err = r.err;
 
 	if (err == -ERESTARTSYS && m->trans->status == Connected && m->err == 0) {
-		spin_lock(&m->lock);
-		req->tcall = NULL;
-		req->err = ERREQFLUSH;
-		spin_unlock(&m->lock);
+		if (v9fs_mux_flush_request(m, req)) {
+			/* wait until we get response of the flush message */
+			do {
+				clear_thread_flag(TIF_SIGPENDING);
+				err = wait_event_interruptible(r.wqueue,
+					r.rcall || r.err);
+			} while (!r.rcall && !r.err && err==-ERESTARTSYS &&
+				m->trans->status==Connected && !m->err);
+		}
+		sigpending = 1;
+	}
 
-		clear_thread_flag(TIF_SIGPENDING);
-		v9fs_mux_flush_request(m, req);
+	if (sigpending) {
 		spin_lock_irqsave(&current->sighand->siglock, flags);
 		recalc_sigpending();
 		spin_unlock_irqrestore(&current->sighand->siglock, flags);
 	}
 
-	if (!err) {
-		if (r.rcall)
-			dprintk(DEBUG_MUX, "got response id %d tag %d\n",
-				r.rcall->id, r.rcall->tag);
-
-		if (rc)
-			*rc = r.rcall;
-		else
-			kfree(r.rcall);
-	} else {
+	if (rc)
+		*rc = r.rcall;
+	else
 		kfree(r.rcall);
-		dprintk(DEBUG_MUX, "got error %d\n", err);
-		if (err > 0)
-			err = -EIO;
-	}
+
+	v9fs_mux_free_request(m, req);
+	if (err > 0)
+		err = -EIO;
 
 	return err;
 }
@@ -951,12 +990,15 @@ void v9fs_mux_cancel(struct v9fs_mux_data *m, int err)
 	struct v9fs_req *req, *rtmp;
 	LIST_HEAD(cancel_list);
 
-	dprintk(DEBUG_MUX, "mux %p err %d\n", m, err);
+	dprintk(DEBUG_ERROR, "mux %p err %d\n", m, err);
 	m->err = err;
 	spin_lock(&m->lock);
 	list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
 		list_move(&req->req_list, &cancel_list);
 	}
+	list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) {
+		list_move(&req->req_list, &cancel_list);
+	}
 	spin_unlock(&m->lock);
 
 	list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
@@ -965,11 +1007,9 @@ void v9fs_mux_cancel(struct v9fs_mux_data *m, int err)
 			req->err = err;
 
 		if (req->cb)
-			(*req->cb) (req->cba, req->tcall, req->rcall, req->err);
+			(*req->cb) (req, req->cba);
 		else
 			kfree(req->rcall);
-
-		kfree(req);
 	}
 
 	wake_up(&m->equeue);
diff --git a/fs/9p/mux.h b/fs/9p/mux.h
index e90bfd32ea4..fb10c50186a 100644
--- a/fs/9p/mux.h
+++ b/fs/9p/mux.h
@@ -24,6 +24,7 @@
  */
 
 struct v9fs_mux_data;
+struct v9fs_req;
 
 /**
  * v9fs_mux_req_callback - callback function that is called when the
@@ -36,8 +37,7 @@ struct v9fs_mux_data;
  * @rc - response call
  * @err - error code (non-zero if error occured)
  */
-typedef void (*v9fs_mux_req_callback)(void *a, struct v9fs_fcall *tc,
-	struct v9fs_fcall *rc, int err);
+typedef void (*v9fs_mux_req_callback)(struct v9fs_req *req, void *a);
 
 int v9fs_mux_global_init(void);
 void v9fs_mux_global_exit(void);
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 083dcfcd158..1a8e46084f0 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -72,11 +72,17 @@ int v9fs_file_open(struct inode *inode, struct file *file)
 		return -ENOSPC;
 	}
 
-	err = v9fs_t_walk(v9ses, vfid->fid, fid, NULL, NULL);
+	err = v9fs_t_walk(v9ses, vfid->fid, fid, NULL, &fcall);
 	if (err < 0) {
 		dprintk(DEBUG_ERROR, "rewalk didn't work\n");
-		goto put_fid;
+		if (fcall && fcall->id == RWALK)
+			goto clunk_fid;
+		else {
+			v9fs_put_idpool(fid, &v9ses->fidpool);
+			goto free_fcall;
+		}
 	}
+	kfree(fcall);
 
 	/* TODO: do special things for O_EXCL, O_NOFOLLOW, O_SYNC */
 	/* translate open mode appropriately */
@@ -109,8 +115,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
 clunk_fid:
 	v9fs_t_clunk(v9ses, fid);
 
-put_fid:
-	v9fs_put_idpool(fid, &v9ses->fidpool);
+free_fcall:
 	kfree(fcall);
 
 	return err;
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 133db366d30..2cb87ba4b1c 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -270,7 +270,10 @@ v9fs_create(struct v9fs_session_info *v9ses, u32 pfid, char *name, u32 perm,
 	err = v9fs_t_walk(v9ses, pfid, fid, NULL, &fcall);
 	if (err < 0) {
 		PRINT_FCALL_ERROR("clone error", fcall);
-		goto put_fid;
+		if (fcall && fcall->id == RWALK)
+			goto clunk_fid;
+		else
+			goto put_fid;
 	}
 	kfree(fcall);
 
@@ -322,6 +325,9 @@ v9fs_clone_walk(struct v9fs_session_info *v9ses, u32 fid, struct dentry *dentry)
 		&fcall);
 
 	if (err < 0) {
+		if (fcall && fcall->id == RWALK)
+			goto clunk_fid;
+
 		PRINT_FCALL_ERROR("walk error", fcall);
 		v9fs_put_idpool(nfid, &v9ses->fidpool);
 		goto error;
@@ -640,19 +646,26 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
 	}
 
 	result = v9fs_t_walk(v9ses, dirfidnum, newfid,
-		(char *)dentry->d_name.name, NULL);
+		(char *)dentry->d_name.name, &fcall);
+
 	if (result < 0) {
-		v9fs_put_idpool(newfid, &v9ses->fidpool);
+		if (fcall && fcall->id == RWALK)
+			v9fs_t_clunk(v9ses, newfid);
+		else
+			v9fs_put_idpool(newfid, &v9ses->fidpool);
+
 		if (result == -ENOENT) {
 			d_add(dentry, NULL);
 			dprintk(DEBUG_VFS,
 				"Return negative dentry %p count %d\n",
 				dentry, atomic_read(&dentry->d_count));
+			kfree(fcall);
 			return NULL;
 		}
 		dprintk(DEBUG_ERROR, "walk error:%d\n", result);
 		goto FreeFcall;
 	}
+	kfree(fcall);
 
 	result = v9fs_t_stat(v9ses, newfid, &fcall);
 	if (result < 0) {
-- 
cgit v1.2.3


From 3835a9bd07778d87dea37fbf190f70883515e8fc Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 15 May 2006 09:44:27 -0700
Subject: [PATCH] fs/compat.c: fix 'if (a |= b )' typo

Mentioned by Mark Armbrust somewhere on Usenet.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Ulrich Drepper <drepper@redhat.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/compat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/compat.c b/fs/compat.c
index 970888aad84..01f39f87f37 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1913,7 +1913,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
 	}
 
 	if (sigmask) {
-		if (sigsetsize |= sizeof(compat_sigset_t))
+		if (sigsetsize != sizeof(compat_sigset_t))
 			return -EINVAL;
 		if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
 			return -EFAULT;
-- 
cgit v1.2.3


From eee391a66d774e644bf3cbb35403562e09d88bb2 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 15 May 2006 09:44:30 -0700
Subject: [PATCH] revert "vfs: propagate mnt_flags into do_loopback/vfsmount"

Revert commit f6422f17d3a480f21917a3895e2a46b968f56a08, due to

Valdis.Kletnieks@vt.edu wrote:
>
> There seems to have been a bug introduced in this changeset:
>
> Am running 2.6.17-rc3-mm1.  When this changeset is applied, 'mount --bind'
> misbehaves:
>
> > # mkdir /foo
> > # mount -t tmpfs -o rw,nosuid,nodev,noexec,noatime,nodiratime none /foo
> > # mkdir /foo/bar
> > # mount --bind /foo/bar /foo
> > # tail -2 /proc/mounts
> > none /foo tmpfs rw,nosuid,nodev,noexec,noatime,nodiratime 0 0
> > none /foo tmpfs rw 0 0
>
> Reverting this changeset causes both mounts to have the same options.
>
> (Thanks to Stephen Smalley for tracking down the changeset...)
>

Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: <Valdis.Kletnieks@vt.edu>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/namespace.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/namespace.c b/fs/namespace.c
index 2c5f1f80bdc..bf478addb85 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -899,13 +899,11 @@ static int do_change_type(struct nameidata *nd, int flag)
 /*
  * do loopback mount.
  */
-static int do_loopback(struct nameidata *nd, char *old_name, unsigned long flags, int mnt_flags)
+static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
 {
 	struct nameidata old_nd;
 	struct vfsmount *mnt = NULL;
-	int recurse = flags & MS_REC;
 	int err = mount_is_safe(nd);
-
 	if (err)
 		return err;
 	if (!old_name || !*old_name)
@@ -939,7 +937,6 @@ static int do_loopback(struct nameidata *nd, char *old_name, unsigned long flags
 		spin_unlock(&vfsmount_lock);
 		release_mounts(&umount_list);
 	}
-	mnt->mnt_flags = mnt_flags;
 
 out:
 	up_write(&namespace_sem);
@@ -1353,7 +1350,7 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
 		retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
 				    data_page);
 	else if (flags & MS_BIND)
-		retval = do_loopback(&nd, dev_name, flags, mnt_flags);
+		retval = do_loopback(&nd, dev_name, flags & MS_REC);
 	else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
 		retval = do_change_type(&nd, flags);
 	else if (flags & MS_MOVE)
-- 
cgit v1.2.3


From 194a61b8e09ac526c33777a688ee2a1504d7fbc3 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 15 May 2006 09:44:42 -0700
Subject: [PATCH] jffs2 warning fixes

fs/jffs2/nodelist.c: In function `check_node_data':
fs/jffs2/nodelist.c:441: warning: unsigned int format, different type arg (arg 4)
fs/jffs2/nodelist.c:464: warning: int format, different type arg (arg 5)

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jffs2/nodelist.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c
index d4d0c41490c..1d46677afd1 100644
--- a/fs/jffs2/nodelist.c
+++ b/fs/jffs2/nodelist.c
@@ -438,7 +438,8 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info
 	if (c->mtd->point) {
 		err = c->mtd->point(c->mtd, ofs, len, &retlen, &buffer);
 		if (!err && retlen < tn->csize) {
-			JFFS2_WARNING("MTD point returned len too short: %u instead of %u.\n", retlen, tn->csize);
+			JFFS2_WARNING("MTD point returned len too short: %zu "
+					"instead of %u.\n", retlen, tn->csize);
 			c->mtd->unpoint(c->mtd, buffer, ofs, len);
 		} else if (err)
 			JFFS2_WARNING("MTD point failed: error code %d.\n", err);
@@ -461,7 +462,8 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info
 		}
 
 		if (retlen != len) {
-			JFFS2_ERROR("short read at %#08x: %d instead of %d.\n", ofs, retlen, len);
+			JFFS2_ERROR("short read at %#08x: %zd instead of %d.\n",
+					ofs, retlen, len);
 			err = -EIO;
 			goto free_out;
 		}
-- 
cgit v1.2.3


From 53013cba4118a5cfe8f7c7ea5e5bc1c48b160f76 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Fri, 5 May 2006 19:04:03 -0700
Subject: ocfs2: take data locks around extend

We need to take a data lock around extends to protect the pages that
ocfs2_zero_extend is going to be pulling into the page cache. Otherwise an
extend on one node might populate the page cache with data pages that have
no lock coverage.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/aops.c | 46 +++++++++++++++++++++++++++++++------
 fs/ocfs2/aops.h |  4 ++--
 fs/ocfs2/file.c | 70 +++++++++++++++++++++++++++++++++++++--------------------
 3 files changed, 87 insertions(+), 33 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 0d858d0b25b..47152bf9a7f 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -276,13 +276,29 @@ static int ocfs2_writepage(struct page *page, struct writeback_control *wbc)
 	return ret;
 }
 
+/* This can also be called from ocfs2_write_zero_page() which has done
+ * it's own cluster locking. */
+int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page,
+			       unsigned from, unsigned to)
+{
+	int ret;
+
+	down_read(&OCFS2_I(inode)->ip_alloc_sem);
+
+	ret = block_prepare_write(page, from, to, ocfs2_get_block);
+
+	up_read(&OCFS2_I(inode)->ip_alloc_sem);
+
+	return ret;
+}
+
 /*
  * ocfs2_prepare_write() can be an outer-most ocfs2 call when it is called
  * from loopback.  It must be able to perform its own locking around
  * ocfs2_get_block().
  */
-int ocfs2_prepare_write(struct file *file, struct page *page,
-			unsigned from, unsigned to)
+static int ocfs2_prepare_write(struct file *file, struct page *page,
+			       unsigned from, unsigned to)
 {
 	struct inode *inode = page->mapping->host;
 	int ret;
@@ -295,11 +311,7 @@ int ocfs2_prepare_write(struct file *file, struct page *page,
 		goto out;
 	}
 
-	down_read(&OCFS2_I(inode)->ip_alloc_sem);
-
-	ret = block_prepare_write(page, from, to, ocfs2_get_block);
-
-	up_read(&OCFS2_I(inode)->ip_alloc_sem);
+	ret = ocfs2_prepare_write_nolock(inode, page, from, to);
 
 	ocfs2_meta_unlock(inode, 0);
 out:
@@ -625,11 +637,31 @@ static ssize_t ocfs2_direct_IO(int rw,
 	int ret;
 
 	mlog_entry_void();
+
+	/*
+	 * We get PR data locks even for O_DIRECT.  This allows
+	 * concurrent O_DIRECT I/O but doesn't let O_DIRECT with
+	 * extending and buffered zeroing writes race.  If they did
+	 * race then the buffered zeroing could be written back after
+	 * the O_DIRECT I/O.  It's one thing to tell people not to mix
+	 * buffered and O_DIRECT writes, but expecting them to
+	 * understand that file extension is also an implicit buffered
+	 * write is too much.  By getting the PR we force writeback of
+	 * the buffered zeroing before proceeding.
+	 */
+	ret = ocfs2_data_lock(inode, 0);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out;
+	}
+	ocfs2_data_unlock(inode, 0);
+
 	ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
 					    inode->i_sb->s_bdev, iov, offset,
 					    nr_segs, 
 					    ocfs2_direct_IO_get_blocks,
 					    ocfs2_dio_end_io);
+out:
 	mlog_exit(ret);
 	return ret;
 }
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index d40456d509a..e88c3f0b8fa 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -22,8 +22,8 @@
 #ifndef OCFS2_AOPS_H
 #define OCFS2_AOPS_H
 
-int ocfs2_prepare_write(struct file *file, struct page *page,
-			unsigned from, unsigned to);
+int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page,
+			       unsigned from, unsigned to);
 
 struct ocfs2_journal_handle *ocfs2_start_walk_page_trans(struct inode *inode,
 							 struct page *page,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 581eb451a41..20fffeed630 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -613,7 +613,8 @@ leave:
 
 /* Some parts of this taken from generic_cont_expand, which turned out
  * to be too fragile to do exactly what we need without us having to
- * worry about recursive locking in ->commit_write(). */
+ * worry about recursive locking in ->prepare_write() and
+ * ->commit_write(). */
 static int ocfs2_write_zero_page(struct inode *inode,
 				 u64 size)
 {
@@ -641,7 +642,7 @@ static int ocfs2_write_zero_page(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_prepare_write(NULL, page, offset, offset);
+	ret = ocfs2_prepare_write_nolock(inode, page, offset, offset);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto out_unlock;
@@ -695,13 +696,26 @@ out:
 	return ret;
 }
 
+/* 
+ * A tail_to_skip value > 0 indicates that we're being called from
+ * ocfs2_file_aio_write(). This has the following implications:
+ *
+ * - we don't want to update i_size
+ * - di_bh will be NULL, which is fine because it's only used in the
+ *   case where we want to update i_size.
+ * - ocfs2_zero_extend() will then only be filling the hole created
+ *   between i_size and the start of the write.
+ */
 static int ocfs2_extend_file(struct inode *inode,
 			     struct buffer_head *di_bh,
-			     u64 new_i_size)
+			     u64 new_i_size,
+			     size_t tail_to_skip)
 {
 	int ret = 0;
 	u32 clusters_to_add;
 
+	BUG_ON(!tail_to_skip && !di_bh);
+
 	/* setattr sometimes calls us like this. */
 	if (new_i_size == 0)
 		goto out;
@@ -714,27 +728,44 @@ static int ocfs2_extend_file(struct inode *inode,
 		OCFS2_I(inode)->ip_clusters;
 
 	if (clusters_to_add) {
-		ret = ocfs2_extend_allocation(inode, clusters_to_add);
+		/* 
+		 * protect the pages that ocfs2_zero_extend is going to
+		 * be pulling into the page cache.. we do this before the
+		 * metadata extend so that we don't get into the situation
+		 * where we've extended the metadata but can't get the data
+		 * lock to zero.
+		 */
+		ret = ocfs2_data_lock(inode, 1);
 		if (ret < 0) {
 			mlog_errno(ret);
 			goto out;
 		}
 
-		ret = ocfs2_zero_extend(inode, new_i_size);
+		ret = ocfs2_extend_allocation(inode, clusters_to_add);
 		if (ret < 0) {
 			mlog_errno(ret);
-			goto out;
+			goto out_unlock;
 		}
-	} 
 
-	/* No allocation required, we just use this helper to
-	 * do a trivial update of i_size. */
-	ret = ocfs2_simple_size_update(inode, di_bh, new_i_size);
-	if (ret < 0) {
-		mlog_errno(ret);
-		goto out;
+		ret = ocfs2_zero_extend(inode, (u64)new_i_size - tail_to_skip);
+		if (ret < 0) {
+			mlog_errno(ret);
+			goto out_unlock;
+		}
+	}
+
+	if (!tail_to_skip) {
+		/* We're being called from ocfs2_setattr() which wants
+		 * us to update i_size */
+		ret = ocfs2_simple_size_update(inode, di_bh, new_i_size);
+		if (ret < 0)
+			mlog_errno(ret);
 	}
 
+out_unlock:
+	if (clusters_to_add) /* this is the only case in which we lock */
+		ocfs2_data_unlock(inode, 1);
+
 out:
 	return ret;
 }
@@ -793,7 +824,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
 		if (i_size_read(inode) > attr->ia_size)
 			status = ocfs2_truncate_file(inode, bh, attr->ia_size);
 		else
-			status = ocfs2_extend_file(inode, bh, attr->ia_size);
+			status = ocfs2_extend_file(inode, bh, attr->ia_size, 0);
 		if (status < 0) {
 			if (status != -ENOSPC)
 				mlog_errno(status);
@@ -1049,21 +1080,12 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
 		if (!clusters)
 			break;
 
-		ret = ocfs2_extend_allocation(inode, clusters);
+		ret = ocfs2_extend_file(inode, NULL, newsize, count);
 		if (ret < 0) {
 			if (ret != -ENOSPC)
 				mlog_errno(ret);
 			goto out;
 		}
-
-		/* Fill any holes which would've been created by this
-		 * write. If we're O_APPEND, this will wind up
-		 * (correctly) being a noop. */
-		ret = ocfs2_zero_extend(inode, (u64) newsize - count);
-		if (ret < 0) {
-			mlog_errno(ret);
-			goto out;
-		}
 		break;
 	}
 
-- 
cgit v1.2.3


From c4374f8a6093fbee42ac4368b3ca180d1d0c7c6d Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Fri, 5 May 2006 19:04:35 -0700
Subject: ocfs2: take meta data lock in ocfs2_file_aio_read()

Temporarily take the meta data lock in ocfs2_file_aio_read() to allow us to
update our inode fields.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/file.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'fs')

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 20fffeed630..a9559c87453 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1168,6 +1168,22 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
 		ocfs2_iocb_set_rw_locked(iocb);
 	}
 
+	/*
+	 * We're fine letting folks race truncates and extending
+	 * writes with read across the cluster, just like they can
+	 * locally. Hence no rw_lock during read.
+	 * 
+	 * Take and drop the meta data lock to update inode fields
+	 * like i_size. This allows the checks down below
+	 * generic_file_aio_read() a chance of actually working. 
+	 */
+	ret = ocfs2_meta_lock(inode, NULL, NULL, 0);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto bail;
+	}
+	ocfs2_meta_unlock(inode, 0);
+
 	ret = generic_file_aio_read(iocb, buf, count, iocb->ki_pos);
 	if (ret == -EINVAL)
 		mlog(ML_ERROR, "generic_file_aio_read returned -EINVAL\n");
-- 
cgit v1.2.3


From dd4a2c2bfe159cc39e9672e875c8314563699764 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Wed, 12 Apr 2006 14:24:05 -0700
Subject: ocfs2: Don't populate uptodate cache in ocfs2_force_read_journal()

This greatly reduces the amount of memory useded during recovery.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/journal.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 6a610ae5358..c53d505cbd4 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -870,9 +870,11 @@ static int ocfs2_force_read_journal(struct inode *inode)
 		if (p_blocks > CONCURRENT_JOURNAL_FILL)
 			p_blocks = CONCURRENT_JOURNAL_FILL;
 
+		/* We are reading journal data which should not
+		 * be put in the uptodate cache */
 		status = ocfs2_read_blocks(OCFS2_SB(inode->i_sb),
 					   p_blkno, p_blocks, bhs, 0,
-					   inode);
+					   NULL);
 		if (status < 0) {
 			mlog_errno(status);
 			goto bail;
-- 
cgit v1.2.3


From afae00ab45ea71d89086f924ebee6ca51c81e48e Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Wed, 12 Apr 2006 14:37:00 -0700
Subject: ocfs2: fix gfp mask in some file system paths

We were using GFP_KERNEL in a handful of places which really wanted
GFP_NOFS. Fix this.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/extent_map.c | 6 +++---
 fs/ocfs2/journal.c    | 4 ++--
 fs/ocfs2/uptodate.c   | 4 ++--
 fs/ocfs2/vote.c       | 6 +++---
 4 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 4601fc256f1..1a5c69071df 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -569,7 +569,7 @@ static int ocfs2_extent_map_insert(struct inode *inode,
 
 	ret = -ENOMEM;
 	ctxt.new_ent = kmem_cache_alloc(ocfs2_em_ent_cachep,
-					GFP_KERNEL);
+					GFP_NOFS);
 	if (!ctxt.new_ent) {
 		mlog_errno(ret);
 		return ret;
@@ -583,14 +583,14 @@ static int ocfs2_extent_map_insert(struct inode *inode,
 		if (ctxt.need_left && !ctxt.left_ent) {
 			ctxt.left_ent =
 				kmem_cache_alloc(ocfs2_em_ent_cachep,
-						 GFP_KERNEL);
+						 GFP_NOFS);
 			if (!ctxt.left_ent)
 				break;
 		}
 		if (ctxt.need_right && !ctxt.right_ent) {
 			ctxt.right_ent =
 				kmem_cache_alloc(ocfs2_em_ent_cachep,
-						 GFP_KERNEL);
+						 GFP_NOFS);
 			if (!ctxt.right_ent)
 				break;
 		}
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index c53d505cbd4..eebc3cfa6be 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -117,7 +117,7 @@ struct ocfs2_journal_handle *ocfs2_alloc_handle(struct ocfs2_super *osb)
 {
 	struct ocfs2_journal_handle *retval = NULL;
 
-	retval = kcalloc(1, sizeof(*retval), GFP_KERNEL);
+	retval = kcalloc(1, sizeof(*retval), GFP_NOFS);
 	if (!retval) {
 		mlog(ML_ERROR, "Failed to allocate memory for journal "
 		     "handle!\n");
@@ -984,7 +984,7 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
 {
 	struct ocfs2_la_recovery_item *item;
 
-	item = kmalloc(sizeof(struct ocfs2_la_recovery_item), GFP_KERNEL);
+	item = kmalloc(sizeof(struct ocfs2_la_recovery_item), GFP_NOFS);
 	if (!item) {
 		/* Though we wish to avoid it, we are in fact safe in
 		 * skipping local alloc cleanup as fsck.ocfs2 is more
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index 04a684dfdd9..b8a00a79332 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -337,7 +337,7 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi,
 	     (unsigned long long)oi->ip_blkno,
 	     (unsigned long long)block, expand_tree);
 
-	new = kmem_cache_alloc(ocfs2_uptodate_cachep, GFP_KERNEL);
+	new = kmem_cache_alloc(ocfs2_uptodate_cachep, GFP_NOFS);
 	if (!new) {
 		mlog_errno(-ENOMEM);
 		return;
@@ -349,7 +349,7 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi,
 		 * has no way of tracking that. */
 		for(i = 0; i < OCFS2_INODE_MAX_CACHE_ARRAY; i++) {
 			tree[i] = kmem_cache_alloc(ocfs2_uptodate_cachep,
-						   GFP_KERNEL);
+						   GFP_NOFS);
 			if (!tree[i]) {
 				mlog_errno(-ENOMEM);
 				goto out_free;
diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c
index 53049a20419..ee42765a855 100644
--- a/fs/ocfs2/vote.c
+++ b/fs/ocfs2/vote.c
@@ -586,7 +586,7 @@ static struct ocfs2_net_wait_ctxt *ocfs2_new_net_wait_ctxt(unsigned int response
 {
 	struct ocfs2_net_wait_ctxt *w;
 
-	w = kcalloc(1, sizeof(*w), GFP_KERNEL);
+	w = kcalloc(1, sizeof(*w), GFP_NOFS);
 	if (!w) {
 		mlog_errno(-ENOMEM);
 		goto bail;
@@ -749,7 +749,7 @@ static struct ocfs2_vote_msg * ocfs2_new_vote_request(struct ocfs2_super *osb,
 
 	BUG_ON(!ocfs2_is_valid_vote_request(type));
 
-	request = kcalloc(1, sizeof(*request), GFP_KERNEL);
+	request = kcalloc(1, sizeof(*request), GFP_NOFS);
 	if (!request) {
 		mlog_errno(-ENOMEM);
 	} else {
@@ -1129,7 +1129,7 @@ static int ocfs2_handle_vote_message(struct o2net_msg *msg,
 	struct ocfs2_super *osb = data;
 	struct ocfs2_vote_work *work;
 
-	work = kmalloc(sizeof(struct ocfs2_vote_work), GFP_KERNEL);
+	work = kmalloc(sizeof(struct ocfs2_vote_work), GFP_NOFS);
 	if (!work) {
 		status = -ENOMEM;
 		mlog_errno(status);
-- 
cgit v1.2.3


From 84efad1a53dd05969094f9a2562b4e6666571c00 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Mon, 27 Mar 2006 18:46:09 -0800
Subject: configfs: Fix a reference leak in configfs_mkdir().

configfs_mkdir() failed to release the working parent reference in most
exit paths.  Also changed the exit path for readability.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/configfs/dir.c | 37 ++++++++++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 5638c8f9362..810c1395d6b 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -704,13 +704,18 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	struct module *owner;
 	char *name;
 
-	if (dentry->d_parent == configfs_sb->s_root)
-		return -EPERM;
+	if (dentry->d_parent == configfs_sb->s_root) {
+		ret = -EPERM;
+		goto out;
+	}
 
 	sd = dentry->d_parent->d_fsdata;
-	if (!(sd->s_type & CONFIGFS_USET_DIR))
-		return -EPERM;
+	if (!(sd->s_type & CONFIGFS_USET_DIR)) {
+		ret = -EPERM;
+		goto out;
+	}
 
+	/* Get a working ref for the duration of this function */
 	parent_item = configfs_get_config_item(dentry->d_parent);
 	type = parent_item->ci_type;
 	subsys = to_config_group(parent_item)->cg_subsys;
@@ -719,15 +724,16 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	if (!type || !type->ct_group_ops ||
 	    (!type->ct_group_ops->make_group &&
 	     !type->ct_group_ops->make_item)) {
-		config_item_put(parent_item);
-		return -EPERM;  /* What lack-of-mkdir returns */
+		ret = -EPERM;  /* Lack-of-mkdir returns -EPERM */
+		goto out_put;
 	}
 
 	name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL);
 	if (!name) {
-		config_item_put(parent_item);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto out_put;
 	}
+
 	snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name);
 
 	down(&subsys->su_sem);
@@ -748,8 +754,8 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 
 	kfree(name);
 	if (!item) {
-		config_item_put(parent_item);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto out_put;
 	}
 
 	ret = -EINVAL;
@@ -776,12 +782,19 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 				client_drop_item(parent_item, item);
 				up(&subsys->su_sem);
 
-				config_item_put(parent_item);
 				module_put(owner);
 			}
 		}
 	}
 
+out_put:
+	/*
+	 * link_obj()/link_group() took a reference from child->parent.
+	 * Drop our working ref
+	 */
+	config_item_put(parent_item);
+
+out:
 	return ret;
 }
 
@@ -801,6 +814,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
 	if (sd->s_type & CONFIGFS_USET_DEFAULT)
 		return -EPERM;
 
+	/* Get a working ref until we have the child */
 	parent_item = configfs_get_config_item(dentry->d_parent);
 	subsys = to_config_group(parent_item)->cg_subsys;
 	BUG_ON(!subsys);
@@ -817,6 +831,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
 		return ret;
 	}
 
+	/* Get a working ref for the duration of this function */
 	item = configfs_get_config_item(dentry);
 
 	/* Drop reference from above, item already holds one. */
-- 
cgit v1.2.3


From eed7a0db460595b139428d252798a83f1e1ce1d3 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Tue, 11 Apr 2006 21:37:20 -0700
Subject: configfs: configfs_mkdir() failed to cleanup linkage.

If configfs_mkdir() errored in certain ways after the parent<->child
linkage was already created, it would not undo the linkage.  Also,
comment the reference counting for clarity.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/configfs/dir.c | 104 +++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 72 insertions(+), 32 deletions(-)

(limited to 'fs')

diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 810c1395d6b..5f952187fc5 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -505,13 +505,15 @@ static int populate_groups(struct config_group *group)
 	int i;
 
 	if (group->default_groups) {
-		/* FYI, we're faking mkdir here
+		/*
+		 * FYI, we're faking mkdir here
 		 * I'm not sure we need this semaphore, as we're called
 		 * from our parent's mkdir.  That holds our parent's
 		 * i_mutex, so afaik lookup cannot continue through our
 		 * parent to find us, let alone mess with our tree.
 		 * That said, taking our i_mutex is closer to mkdir
-		 * emulation, and shouldn't hurt. */
+		 * emulation, and shouldn't hurt.
+		 */
 		mutex_lock(&dentry->d_inode->i_mutex);
 
 		for (i = 0; group->default_groups[i]; i++) {
@@ -546,20 +548,34 @@ static void unlink_obj(struct config_item *item)
 
 		item->ci_group = NULL;
 		item->ci_parent = NULL;
+
+		/* Drop the reference for ci_entry */
 		config_item_put(item);
 
+		/* Drop the reference for ci_parent */
 		config_group_put(group);
 	}
 }
 
 static void link_obj(struct config_item *parent_item, struct config_item *item)
 {
-	/* Parent seems redundant with group, but it makes certain
-	 * traversals much nicer. */
+	/*
+	 * Parent seems redundant with group, but it makes certain
+	 * traversals much nicer.
+	 */
 	item->ci_parent = parent_item;
+
+	/*
+	 * We hold a reference on the parent for the child's ci_parent
+	 * link.
+	 */
 	item->ci_group = config_group_get(to_config_group(parent_item));
 	list_add_tail(&item->ci_entry, &item->ci_group->cg_children);
 
+	/*
+	 * We hold a reference on the child for ci_entry on the parent's
+	 * cg_children
+	 */
 	config_item_get(item);
 }
 
@@ -684,6 +700,10 @@ static void client_drop_item(struct config_item *parent_item,
 	type = parent_item->ci_type;
 	BUG_ON(!type);
 
+	/*
+	 * If ->drop_item() exists, it is responsible for the
+	 * config_item_put().
+	 */
 	if (type->ct_group_ops && type->ct_group_ops->drop_item)
 		type->ct_group_ops->drop_item(to_config_group(parent_item),
 						item);
@@ -694,14 +714,14 @@ static void client_drop_item(struct config_item *parent_item,
 
 static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 {
-	int ret;
+	int ret, module_got = 0;
 	struct config_group *group;
 	struct config_item *item;
 	struct config_item *parent_item;
 	struct configfs_subsystem *subsys;
 	struct configfs_dirent *sd;
 	struct config_item_type *type;
-	struct module *owner;
+	struct module *owner = NULL;
 	char *name;
 
 	if (dentry->d_parent == configfs_sb->s_root) {
@@ -754,43 +774,63 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 
 	kfree(name);
 	if (!item) {
+		/*
+		 * If item == NULL, then link_obj() was never called.
+		 * There are no extra references to clean up.
+		 */
 		ret = -ENOMEM;
 		goto out_put;
 	}
 
-	ret = -EINVAL;
+	/*
+	 * link_obj() has been called (via link_group() for groups).
+	 * From here on out, errors must clean that up.
+	 */
+
 	type = item->ci_type;
-	if (type) {
-		owner = type->ct_owner;
-		if (try_module_get(owner)) {
-			if (group) {
-				ret = configfs_attach_group(parent_item,
-							    item,
-							    dentry);
-			} else {
-				ret = configfs_attach_item(parent_item,
-							   item,
-							   dentry);
-			}
+	if (!type) {
+		ret = -EINVAL;
+		goto out_unlink;
+	}
 
-			if (ret) {
-				down(&subsys->su_sem);
-				if (group)
-					unlink_group(group);
-				else
-					unlink_obj(item);
-				client_drop_item(parent_item, item);
-				up(&subsys->su_sem);
+	owner = type->ct_owner;
+	if (!try_module_get(owner)) {
+		ret = -EINVAL;
+		goto out_unlink;
+	}
 
-				module_put(owner);
-			}
-		}
+	/*
+	 * I hate doing it this way, but if there is
+	 * an error,  module_put() probably should
+	 * happen after any cleanup.
+	 */
+	module_got = 1;
+
+	if (group)
+		ret = configfs_attach_group(parent_item, item, dentry);
+	else
+		ret = configfs_attach_item(parent_item, item, dentry);
+
+out_unlink:
+	if (ret) {
+		/* Tear down everything we built up */
+		down(&subsys->su_sem);
+		if (group)
+			unlink_group(group);
+		else
+			unlink_obj(item);
+		client_drop_item(parent_item, item);
+		up(&subsys->su_sem);
+
+		if (module_got)
+			module_put(owner);
 	}
 
 out_put:
 	/*
-	 * link_obj()/link_group() took a reference from child->parent.
-	 * Drop our working ref
+	 * link_obj()/link_group() took a reference from child->parent,
+	 * so the parent is safely pinned.  We can drop our working
+	 * reference.
 	 */
 	config_item_put(parent_item);
 
-- 
cgit v1.2.3


From cef0893dcf1fdf22943aa49e75ee1eb3bfffe5f5 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Wed, 3 May 2006 11:38:53 -0700
Subject: configfs: Make sure configfs_init() is called before consumers.

configfs_init() needs to be called first to register configfs before anyconsumers try to access it.  Move up configfs in fs/Makefile to make
sure it is initialized early.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/Makefile b/fs/Makefile
index 83bf478e786..078d3d1191a 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -45,6 +45,7 @@ obj-$(CONFIG_DNOTIFY)		+= dnotify.o
 obj-$(CONFIG_PROC_FS)		+= proc/
 obj-y				+= partitions/
 obj-$(CONFIG_SYSFS)		+= sysfs/
+obj-$(CONFIG_CONFIGFS_FS)	+= configfs/
 obj-y				+= devpts/
 
 obj-$(CONFIG_PROFILING)		+= dcookies.o
@@ -100,5 +101,4 @@ obj-$(CONFIG_BEFS_FS)		+= befs/
 obj-$(CONFIG_HOSTFS)		+= hostfs/
 obj-$(CONFIG_HPPFS)		+= hppfs/
 obj-$(CONFIG_DEBUG_FS)		+= debugfs/
-obj-$(CONFIG_CONFIGFS_FS)	+= configfs/
 obj-$(CONFIG_OCFS2_FS)		+= ocfs2/
-- 
cgit v1.2.3


From d64b1c878fc1e384ae53d1d40034239bc33848f4 Mon Sep 17 00:00:00 2001
From: Lin Feng Shen <shenlinf@cn.ibm.com>
Date: Sat, 20 May 2006 14:59:49 -0700
Subject: [PATCH] NFS: fix error handling on access_ok in compat_sys_nfsservctl

Functions compat_nfs_svc_trans, compat_nfs_clnt_trans,
compat_nfs_exp_trans, compat_nfs_getfd_trans and compat_nfs_getfs_trans,
which are called by compat_sys_nfsservctl(fs/compat.c), don't handle the
return value of access_ok properly.  access_ok return 1 when the addr is
valid, and 0 when it's not, but these functions have the reversed
understanding.  When the address is valid, they always return -EFAULT to
compat_sys_nfsservctl.

An example is to run /usr/sbin/rpc.nfsd(32bit program on Power5).  It
doesn't function as expected.  strace showes that nfsservctl returns
-EFAULT.

The patch fixes this by correcting the error handling on the return value
of access_ok in the five functions.

Signed-off-by: Lin Feng Shen <shenlinf@cn.ibm.com>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/compat.c | 177 +++++++++++++++++++++++++++++++-----------------------------
 1 file changed, 92 insertions(+), 85 deletions(-)

(limited to 'fs')

diff --git a/fs/compat.c b/fs/compat.c
index 01f39f87f37..b1f64786a61 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -2030,109 +2030,115 @@ union compat_nfsctl_res {
 	struct knfsd_fh		cr32_getfs;
 };
 
-static int compat_nfs_svc_trans(struct nfsctl_arg *karg, struct compat_nfsctl_arg __user *arg)
-{
-	int err;
-
-	err = access_ok(VERIFY_READ, &arg->ca32_svc, sizeof(arg->ca32_svc));
-	err |= get_user(karg->ca_version, &arg->ca32_version);
-	err |= __get_user(karg->ca_svc.svc_port, &arg->ca32_svc.svc32_port);
-	err |= __get_user(karg->ca_svc.svc_nthreads, &arg->ca32_svc.svc32_nthreads);
-	return (err) ? -EFAULT : 0;
+static int compat_nfs_svc_trans(struct nfsctl_arg *karg,
+				struct compat_nfsctl_arg __user *arg)
+{
+	if (!access_ok(VERIFY_READ, &arg->ca32_svc, sizeof(arg->ca32_svc)) ||
+		get_user(karg->ca_version, &arg->ca32_version) ||
+		__get_user(karg->ca_svc.svc_port, &arg->ca32_svc.svc32_port) ||
+		__get_user(karg->ca_svc.svc_nthreads,
+				&arg->ca32_svc.svc32_nthreads))
+		return -EFAULT;
+	return 0;
 }
 
-static int compat_nfs_clnt_trans(struct nfsctl_arg *karg, struct compat_nfsctl_arg __user *arg)
-{
-	int err;
-
-	err = access_ok(VERIFY_READ, &arg->ca32_client, sizeof(arg->ca32_client));
-	err |= get_user(karg->ca_version, &arg->ca32_version);
-	err |= __copy_from_user(&karg->ca_client.cl_ident[0],
-			  &arg->ca32_client.cl32_ident[0],
-			  NFSCLNT_IDMAX);
-	err |= __get_user(karg->ca_client.cl_naddr, &arg->ca32_client.cl32_naddr);
-	err |= __copy_from_user(&karg->ca_client.cl_addrlist[0],
-			  &arg->ca32_client.cl32_addrlist[0],
-			  (sizeof(struct in_addr) * NFSCLNT_ADDRMAX));
-	err |= __get_user(karg->ca_client.cl_fhkeytype,
-		      &arg->ca32_client.cl32_fhkeytype);
-	err |= __get_user(karg->ca_client.cl_fhkeylen,
-		      &arg->ca32_client.cl32_fhkeylen);
-	err |= __copy_from_user(&karg->ca_client.cl_fhkey[0],
-			  &arg->ca32_client.cl32_fhkey[0],
-			  NFSCLNT_KEYMAX);
+static int compat_nfs_clnt_trans(struct nfsctl_arg *karg,
+				struct compat_nfsctl_arg __user *arg)
+{
+	if (!access_ok(VERIFY_READ, &arg->ca32_client,
+			sizeof(arg->ca32_client)) ||
+		get_user(karg->ca_version, &arg->ca32_version) ||
+		__copy_from_user(&karg->ca_client.cl_ident[0],
+				&arg->ca32_client.cl32_ident[0],
+				NFSCLNT_IDMAX) ||
+		__get_user(karg->ca_client.cl_naddr,
+				&arg->ca32_client.cl32_naddr) ||
+		__copy_from_user(&karg->ca_client.cl_addrlist[0],
+				&arg->ca32_client.cl32_addrlist[0],
+				(sizeof(struct in_addr) * NFSCLNT_ADDRMAX)) ||
+		__get_user(karg->ca_client.cl_fhkeytype,
+				&arg->ca32_client.cl32_fhkeytype) ||
+		__get_user(karg->ca_client.cl_fhkeylen,
+				&arg->ca32_client.cl32_fhkeylen) ||
+		__copy_from_user(&karg->ca_client.cl_fhkey[0],
+				&arg->ca32_client.cl32_fhkey[0],
+				NFSCLNT_KEYMAX))
+		return -EFAULT;
 
-	return (err) ? -EFAULT : 0;
+	return 0;
 }
 
-static int compat_nfs_exp_trans(struct nfsctl_arg *karg, struct compat_nfsctl_arg __user *arg)
-{
-	int err;
-
-	err = access_ok(VERIFY_READ, &arg->ca32_export, sizeof(arg->ca32_export));
-	err |= get_user(karg->ca_version, &arg->ca32_version);
-	err |= __copy_from_user(&karg->ca_export.ex_client[0],
-			  &arg->ca32_export.ex32_client[0],
-			  NFSCLNT_IDMAX);
-	err |= __copy_from_user(&karg->ca_export.ex_path[0],
-			  &arg->ca32_export.ex32_path[0],
-			  NFS_MAXPATHLEN);
-	err |= __get_user(karg->ca_export.ex_dev,
-		      &arg->ca32_export.ex32_dev);
-	err |= __get_user(karg->ca_export.ex_ino,
-		      &arg->ca32_export.ex32_ino);
-	err |= __get_user(karg->ca_export.ex_flags,
-		      &arg->ca32_export.ex32_flags);
-	err |= __get_user(karg->ca_export.ex_anon_uid,
-		      &arg->ca32_export.ex32_anon_uid);
-	err |= __get_user(karg->ca_export.ex_anon_gid,
-		      &arg->ca32_export.ex32_anon_gid);
+static int compat_nfs_exp_trans(struct nfsctl_arg *karg,
+				struct compat_nfsctl_arg __user *arg)
+{
+	if (!access_ok(VERIFY_READ, &arg->ca32_export,
+				sizeof(arg->ca32_export)) ||
+		get_user(karg->ca_version, &arg->ca32_version) ||
+		__copy_from_user(&karg->ca_export.ex_client[0],
+				&arg->ca32_export.ex32_client[0],
+				NFSCLNT_IDMAX) ||
+		__copy_from_user(&karg->ca_export.ex_path[0],
+				&arg->ca32_export.ex32_path[0],
+				NFS_MAXPATHLEN) ||
+		__get_user(karg->ca_export.ex_dev,
+				&arg->ca32_export.ex32_dev) ||
+		__get_user(karg->ca_export.ex_ino,
+				&arg->ca32_export.ex32_ino) ||
+		__get_user(karg->ca_export.ex_flags,
+				&arg->ca32_export.ex32_flags) ||
+		__get_user(karg->ca_export.ex_anon_uid,
+				&arg->ca32_export.ex32_anon_uid) ||
+		__get_user(karg->ca_export.ex_anon_gid,
+				&arg->ca32_export.ex32_anon_gid))
+		return -EFAULT;
 	SET_UID(karg->ca_export.ex_anon_uid, karg->ca_export.ex_anon_uid);
 	SET_GID(karg->ca_export.ex_anon_gid, karg->ca_export.ex_anon_gid);
 
-	return (err) ? -EFAULT : 0;
+	return 0;
 }
 
-static int compat_nfs_getfd_trans(struct nfsctl_arg *karg, struct compat_nfsctl_arg __user *arg)
-{
-	int err;
-
-	err = access_ok(VERIFY_READ, &arg->ca32_getfd, sizeof(arg->ca32_getfd));
-	err |= get_user(karg->ca_version, &arg->ca32_version);
-	err |= __copy_from_user(&karg->ca_getfd.gd_addr,
-			  &arg->ca32_getfd.gd32_addr,
-			  (sizeof(struct sockaddr)));
-	err |= __copy_from_user(&karg->ca_getfd.gd_path,
-			  &arg->ca32_getfd.gd32_path,
-			  (NFS_MAXPATHLEN+1));
-	err |= __get_user(karg->ca_getfd.gd_version,
-		      &arg->ca32_getfd.gd32_version);
+static int compat_nfs_getfd_trans(struct nfsctl_arg *karg,
+				struct compat_nfsctl_arg __user *arg)
+{
+	if (!access_ok(VERIFY_READ, &arg->ca32_getfd,
+			sizeof(arg->ca32_getfd)) ||
+		get_user(karg->ca_version, &arg->ca32_version) ||
+		__copy_from_user(&karg->ca_getfd.gd_addr,
+				&arg->ca32_getfd.gd32_addr,
+				(sizeof(struct sockaddr))) ||
+		__copy_from_user(&karg->ca_getfd.gd_path,
+				&arg->ca32_getfd.gd32_path,
+				(NFS_MAXPATHLEN+1)) ||
+		__get_user(karg->ca_getfd.gd_version,
+				&arg->ca32_getfd.gd32_version))
+		return -EFAULT;
 
-	return (err) ? -EFAULT : 0;
+	return 0;
 }
 
-static int compat_nfs_getfs_trans(struct nfsctl_arg *karg, struct compat_nfsctl_arg __user *arg)
+static int compat_nfs_getfs_trans(struct nfsctl_arg *karg,
+				struct compat_nfsctl_arg __user *arg)
 {
-	int err;
-
-	err = access_ok(VERIFY_READ, &arg->ca32_getfs, sizeof(arg->ca32_getfs));
-	err |= get_user(karg->ca_version, &arg->ca32_version);
-	err |= __copy_from_user(&karg->ca_getfs.gd_addr,
-			  &arg->ca32_getfs.gd32_addr,
-			  (sizeof(struct sockaddr)));
-	err |= __copy_from_user(&karg->ca_getfs.gd_path,
-			  &arg->ca32_getfs.gd32_path,
-			  (NFS_MAXPATHLEN+1));
-	err |= __get_user(karg->ca_getfs.gd_maxlen,
-		      &arg->ca32_getfs.gd32_maxlen);
+	if (!access_ok(VERIFY_READ,&arg->ca32_getfs,sizeof(arg->ca32_getfs)) ||
+		get_user(karg->ca_version, &arg->ca32_version) ||
+		__copy_from_user(&karg->ca_getfs.gd_addr,
+				&arg->ca32_getfs.gd32_addr,
+				(sizeof(struct sockaddr))) ||
+		__copy_from_user(&karg->ca_getfs.gd_path,
+				&arg->ca32_getfs.gd32_path,
+				(NFS_MAXPATHLEN+1)) ||
+		__get_user(karg->ca_getfs.gd_maxlen,
+				&arg->ca32_getfs.gd32_maxlen))
+		return -EFAULT;
 
-	return (err) ? -EFAULT : 0;
+	return 0;
 }
 
 /* This really doesn't need translations, we are only passing
  * back a union which contains opaque nfs file handle data.
  */
-static int compat_nfs_getfh_res_trans(union nfsctl_res *kres, union compat_nfsctl_res __user *res)
+static int compat_nfs_getfh_res_trans(union nfsctl_res *kres,
+				union compat_nfsctl_res __user *res)
 {
 	int err;
 
@@ -2141,8 +2147,9 @@ static int compat_nfs_getfh_res_trans(union nfsctl_res *kres, union compat_nfsct
 	return (err) ? -EFAULT : 0;
 }
 
-asmlinkage long compat_sys_nfsservctl(int cmd, struct compat_nfsctl_arg __user *arg,
-					union compat_nfsctl_res __user *res)
+asmlinkage long compat_sys_nfsservctl(int cmd,
+				struct compat_nfsctl_arg __user *arg,
+				union compat_nfsctl_res __user *res)
 {
 	struct nfsctl_arg *karg;
 	union nfsctl_res *kres;
-- 
cgit v1.2.3


From 8c7b389e532e964f07057dac8a56c43465544759 Mon Sep 17 00:00:00 2001
From: Peter Staubach <staubach@redhat.com>
Date: Sat, 20 May 2006 14:59:56 -0700
Subject: [PATCH] NFS server subtree_check returns dubious value

Address a problem found when a Linux NFS server uses the "subtree_check"
export option.

The "subtree_check" NFS export option was designed to prohibit a client
from using a file handle for which it should not have permission.  The
algorithm used is to ensure that the entire path to the file being
referenced is accessible to the user attempting to use the file handle.  If
some part of the path is not accessible, then the operation is aborted and
the appropriate version of ESTALE is returned to the NFS client.

The error, ESTALE, is unfortunate in that it causes NFS clients to make
certain assumptions about the continued existence of the file.  They assume
that the file no longer exists and refuse to attempt to access it again.
In this case, the file really does exist, but access was denied by the
server for a particular user.

A better error to return would be an EACCES sort of error.  This would
inform the client that the particular operation that it was attempting was
not allowed, without the nasty side effects of the ESTALE error.

Signed-off-by: Peter Staubach <staubach@redhat.com>
Acked-By: NeilBrown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/exportfs/expfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index b06b54f1bbb..4c39009350f 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -102,7 +102,7 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent,
 		if (acceptable(context, result))
 			return result;
 		if (S_ISDIR(result->d_inode->i_mode)) {
-			/* there is no other dentry, so fail */
+			err = -EACCES;
 			goto err_result;
 		}
 
-- 
cgit v1.2.3


From 9ccfc29c671c9d0a83c2a114d4bc5f85f3cd749d Mon Sep 17 00:00:00 2001
From: Florin Malita <fmalita@gmail.com>
Date: Sat, 20 May 2006 14:59:58 -0700
Subject: [PATCH] nfsd: sign conversion obscuring errors in
 nfsd_set_posix_acl()

Assigning the result of posix_acl_to_xattr() to an unsigned data type
(size/size_t) obscures possible errors.

Coverity CID: 1206.

Signed-off-by: Florin Malita <fmalita@gmail.com>
Acked-by: NeilBrown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/vfs.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 6aa92d0e687..1d65f13f458 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1922,11 +1922,10 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
 		value = kmalloc(size, GFP_KERNEL);
 		if (!value)
 			return -ENOMEM;
-		size = posix_acl_to_xattr(acl, value, size);
-		if (size < 0) {
-			error = size;
+		error = posix_acl_to_xattr(acl, value, size);
+		if (error < 0)
 			goto getout;
-		}
+		size = error;
 	} else
 		size = 0;
 
-- 
cgit v1.2.3


From df88912a2165f56a7402db80126cf8ea075221fe Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sat, 20 May 2006 15:00:01 -0700
Subject: [PATCH] binfmt_flat: don't check for EMFILE

Bernd Schmidt points out that binfmt_flat is now leaving the exec file open
while the application runs.  This offsets all the application's fd numbers.
We should have closed the file within exec(), not at exit()-time.

But there doesn't seem to be a lot of point in doing all this just to avoid
going over RLIMIT_NOFILE by one fd for a few microseconds.  So take the EMFILE
checking out again.  This will cause binfmt_flat to again fail LTP's
exec-should-return-EMFILE-when-fdtable-is-full test.  That test appears to be
wrong anyway - Open Group specs say nothing about exec() returning EMFILE.

Cc: Bernd Schmidt <bernd.schmidt@analog.com>
Cc: Greg Ungerer <gerg@uclinux.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/binfmt_flat.c | 30 +++++++++---------------------
 1 file changed, 9 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 69f44dcdb0b..b1c902e319c 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -428,7 +428,6 @@ static int load_flat_file(struct linux_binprm * bprm,
 	loff_t fpos;
 	unsigned long start_code, end_code;
 	int ret;
-	int exec_fileno;
 
 	hdr = ((struct flat_hdr *) bprm->buf);		/* exec-header */
 	inode = bprm->file->f_dentry->d_inode;
@@ -502,21 +501,12 @@ static int load_flat_file(struct linux_binprm * bprm,
 		goto err;
 	}
 
-	/* check file descriptor */
-	exec_fileno = get_unused_fd();
-	if (exec_fileno < 0) {
-		ret = -EMFILE;
-		goto err;
-	}
-	get_file(bprm->file);
-	fd_install(exec_fileno, bprm->file);
-
 	/* Flush all traces of the currently running executable */
 	if (id == 0) {
 		result = flush_old_exec(bprm);
 		if (result) {
 			ret = result;
-			goto err_close;
+			goto err;
 		}
 
 		/* OK, This is the point of no return */
@@ -548,7 +538,7 @@ static int load_flat_file(struct linux_binprm * bprm,
 				textpos = (unsigned long) -ENOMEM;
 			printk("Unable to mmap process text, errno %d\n", (int)-textpos);
 			ret = textpos;
-			goto err_close;
+			goto err;
 		}
 
 		down_write(&current->mm->mmap_sem);
@@ -564,7 +554,7 @@ static int load_flat_file(struct linux_binprm * bprm,
 					(int)-datapos);
 			do_munmap(current->mm, textpos, text_len);
 			ret = realdatastart;
-			goto err_close;
+			goto err;
 		}
 		datapos = realdatastart + MAX_SHARED_LIBS * sizeof(unsigned long);
 
@@ -587,7 +577,7 @@ static int load_flat_file(struct linux_binprm * bprm,
 			do_munmap(current->mm, textpos, text_len);
 			do_munmap(current->mm, realdatastart, data_len + extra);
 			ret = result;
-			goto err_close;
+			goto err;
 		}
 
 		reloc = (unsigned long *) (datapos+(ntohl(hdr->reloc_start)-text_len));
@@ -606,7 +596,7 @@ static int load_flat_file(struct linux_binprm * bprm,
 			printk("Unable to allocate RAM for process text/data, errno %d\n",
 					(int)-textpos);
 			ret = textpos;
-			goto err_close;
+			goto err;
 		}
 
 		realdatastart = textpos + ntohl(hdr->data_start);
@@ -652,7 +642,7 @@ static int load_flat_file(struct linux_binprm * bprm,
 			do_munmap(current->mm, textpos, text_len + data_len + extra +
 				MAX_SHARED_LIBS * sizeof(unsigned long));
 			ret = result;
-			goto err_close;
+			goto err;
 		}
 	}
 
@@ -717,7 +707,7 @@ static int load_flat_file(struct linux_binprm * bprm,
 				addr = calc_reloc(*rp, libinfo, id, 0);
 				if (addr == RELOC_FAILED) {
 					ret = -ENOEXEC;
-					goto err_close;
+					goto err;
 				}
 				*rp = addr;
 			}
@@ -747,7 +737,7 @@ static int load_flat_file(struct linux_binprm * bprm,
 			rp = (unsigned long *) calc_reloc(addr, libinfo, id, 1);
 			if (rp == (unsigned long *)RELOC_FAILED) {
 				ret = -ENOEXEC;
-				goto err_close;
+				goto err;
 			}
 
 			/* Get the pointer's value.  */
@@ -762,7 +752,7 @@ static int load_flat_file(struct linux_binprm * bprm,
 				addr = calc_reloc(addr, libinfo, id, 0);
 				if (addr == RELOC_FAILED) {
 					ret = -ENOEXEC;
-					goto err_close;
+					goto err;
 				}
 
 				/* Write back the relocated pointer.  */
@@ -783,8 +773,6 @@ static int load_flat_file(struct linux_binprm * bprm,
 			stack_len);
 
 	return 0;
-err_close:
-	sys_close(exec_fileno);
 err:
 	return ret;
 }
-- 
cgit v1.2.3


From 66055a4e7334b05354c835123ff621c5f700e56a Mon Sep 17 00:00:00 2001
From: Amy Griffis <amy.griffis@hp.com>
Date: Sat, 20 May 2006 15:00:06 -0700
Subject: [PATCH] fix race in inotify_release

While doing some inotify stress testing, I hit the following race.  In
inotify_release(), it's possible for a watch to be removed from the lists
in between dropping dev->mutex and taking inode->inotify_mutex.  The
reference we hold prevents the watch from being freed, but not from being
removed.

Checking the dev's idr mapping will prevent a double list_del of the
same watch.

Signed-off-by: Amy Griffis <amy.griffis@hp.com>
Acked-by: John McCutchan <john@johnmccutchan.com>
Cc: Robert Love <rml@novell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/inotify.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/inotify.c b/fs/inotify.c
index 1f50302849c..7d572533652 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -848,7 +848,11 @@ static int inotify_release(struct inode *ignored, struct file *file)
 		inode = watch->inode;
 		mutex_lock(&inode->inotify_mutex);
 		mutex_lock(&dev->mutex);
-		remove_watch_no_event(watch, dev);
+
+		/* make sure we didn't race with another list removal */
+		if (likely(idr_find(&dev->idr, watch->wd)))
+			remove_watch_no_event(watch, dev);
+
 		mutex_unlock(&dev->mutex);
 		mutex_unlock(&inode->inotify_mutex);
 		put_inotify_watch(watch);
-- 
cgit v1.2.3


From d66fd908acc8ba88541ecc570d89b0243f947c5e Mon Sep 17 00:00:00 2001
From: Amy Griffis <amy.griffis@hp.com>
Date: Sat, 20 May 2006 15:00:07 -0700
Subject: [PATCH] fix NULL dereference in inotify_ignore

Don't reassign to watch.  If idr_find() returns NULL, then
put_inotify_watch() will choke.

Signed-off-by: Amy Griffis <amy.griffis@hp.com>
Cc: John McCutchan <john@johnmccutchan.com>
Cc: Robert Love <rlove@rlove.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/inotify.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/inotify.c b/fs/inotify.c
index 7d572533652..732ec4bd577 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -894,8 +894,7 @@ static int inotify_ignore(struct inotify_device *dev, s32 wd)
 	mutex_lock(&dev->mutex);
 
 	/* make sure that we did not race */
-	watch = idr_find(&dev->idr, wd);
-	if (likely(watch))
+	if (likely(idr_find(&dev->idr, wd) == watch))
 		remove_watch(watch, dev);
 
 	mutex_unlock(&dev->mutex);
-- 
cgit v1.2.3


From f2d395865faa2a7cd4620b07178e58cbb160ba08 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 22 May 2006 22:35:25 -0700
Subject: [PATCH] knfsd: Fix two problems that can cause rmmod nfsd to die

Both cause the 'entries' count in the export cache to be non-zero at module
removal time, so unregistering that cache fails and results in an oops.

1/ exp_pseudoroot (used for NFSv4 only) leaks a reference to an export
   entry.
2/ sunrpc_cache_update doesn't increment the entries count when it adds
   an entry.

Thanks to "david m.  richter" <richterd@citi.umich.edu> for triggering the
problem and finding one of the bugs.

Cc: "david m. richter" <richterd@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/export.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 4e0578121d9..3eec30000f3 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1066,9 +1066,11 @@ exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp,
 		rv = nfserr_perm;
 	else if (IS_ERR(exp))
 		rv = nfserrno(PTR_ERR(exp));
-	else
+	else {
 		rv = fh_compose(fhp, exp,
 				fsid_key->ek_dentry, NULL);
+		exp_put(exp);
+	}
 	cache_put(&fsid_key->h, &svc_expkey_cache);
 	return rv;
 }
-- 
cgit v1.2.3


From a2eb0c101d24aca9d3d16c30c4f79f3a70c89208 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 22 May 2006 22:35:27 -0700
Subject: [PATCH] md: Make sure bi_max_vecs is set properly in bio_split

Else a subsequent bio_clone might make a mess.

Signed-off-by: Neil Brown <neilb@suse.de>
Cc: "Don Dupuis" <dondster@gmail.com>
Acked-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/bio.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/bio.c b/fs/bio.c
index eb8fbc53f2c..098c12b2d60 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1116,6 +1116,9 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors)
 	bp->bio1.bi_io_vec = &bp->bv1;
 	bp->bio2.bi_io_vec = &bp->bv2;
 
+	bp->bio1.bi_max_vecs = 1;
+	bp->bio2.bi_max_vecs = 1;
+
 	bp->bio1.bi_end_io = bio_pair_end_1;
 	bp->bio2.bi_end_io = bio_pair_end_2;
 
-- 
cgit v1.2.3


From b964638ffd59b61c13f02b81e5118a6e573d91cd Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Wed, 24 May 2006 07:43:38 -0500
Subject: JFS: Fix multiple errors in metapage_releasepage

It looks like metapage_releasepage was making in invalid assumption that
the releasepage method would not be called on a dirty page.  Instead of
issuing a warning and releasing the metapage, it should return 0, indicating
that the private data for the page cannot be released.

I also realized that metapage_releasepage had the return code all wrong.  If
it is successful in releasing the private data, it should return 1, otherwise
it needs to return 0.

Lastly, there is no need to call wait_on_page_writeback, since
try_to_release_page will not call us with a page in writback state.

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/jfs_metapage.c | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index f28696f235c..2b220dd6b4e 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -542,7 +542,7 @@ add_failed:
 static int metapage_releasepage(struct page *page, gfp_t gfp_mask)
 {
 	struct metapage *mp;
-	int busy = 0;
+	int ret = 1;
 	unsigned int offset;
 
 	for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
@@ -552,30 +552,20 @@ static int metapage_releasepage(struct page *page, gfp_t gfp_mask)
 			continue;
 
 		jfs_info("metapage_releasepage: mp = 0x%p", mp);
-		if (mp->count || mp->nohomeok) {
+		if (mp->count || mp->nohomeok ||
+		    test_bit(META_dirty, &mp->flag)) {
 			jfs_info("count = %ld, nohomeok = %d", mp->count,
 				 mp->nohomeok);
-			busy = 1;
+			ret = 0;
 			continue;
 		}
-		wait_on_page_writeback(page);
-		//WARN_ON(test_bit(META_dirty, &mp->flag));
-		if (test_bit(META_dirty, &mp->flag)) {
-			dump_mem("dirty mp in metapage_releasepage", mp,
-				 sizeof(struct metapage));
-			dump_mem("page", page, sizeof(struct page));
-			dump_stack();
-		}
 		if (mp->lsn)
 			remove_from_logsync(mp);
 		remove_metapage(page, mp);
 		INCREMENT(mpStat.pagefree);
 		free_metapage(mp);
 	}
-	if (busy)
-		return -1;
-
-	return 0;
+	return ret;
 }
 
 static void metapage_invalidatepage(struct page *page, unsigned long offset)
-- 
cgit v1.2.3