diff options
author | Paul Mackerras <paulus@samba.org> | 2006-08-31 15:45:48 +1000 |
---|---|---|
committer | Paul Mackerras <paulus@samba.org> | 2006-08-31 15:45:48 +1000 |
commit | aa43f77939c97bf9d3580c6a5e71a5a40290e451 (patch) | |
tree | 095c0b8b3da4b6554a3f8ef4b39240a5d9216d4d /fs | |
parent | 2818c5dec5e28d65d52afbb7695bbbafe6377ee5 (diff) | |
parent | 4c15343167b5febe7bb0ba96aad5bef42ae94d3b (diff) | |
download | linux-3.10-aa43f77939c97bf9d3580c6a5e71a5a40290e451.tar.gz linux-3.10-aa43f77939c97bf9d3580c6a5e71a5a40290e451.tar.bz2 linux-3.10-aa43f77939c97bf9d3580c6a5e71a5a40290e451.zip |
Merge branch 'merge'
Diffstat (limited to 'fs')
56 files changed, 1398 insertions, 609 deletions
diff --git a/fs/adfs/super.c b/fs/adfs/super.c index ba1c88af49f..82011019494 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -308,7 +308,7 @@ static struct adfs_discmap *adfs_read_map(struct super_block *sb, struct adfs_di if (adfs_checkmap(sb, dm)) return dm; - adfs_error(sb, NULL, "map corrupted"); + adfs_error(sb, "map corrupted"); error_free: while (--zone >= 0) diff --git a/fs/block_dev.c b/fs/block_dev.c index 37534573960..045f98854f1 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -884,6 +884,61 @@ void bd_set_size(struct block_device *bdev, loff_t size) } EXPORT_SYMBOL(bd_set_size); +static int __blkdev_put(struct block_device *bdev, unsigned int subclass) +{ + int ret = 0; + struct inode *bd_inode = bdev->bd_inode; + struct gendisk *disk = bdev->bd_disk; + + mutex_lock_nested(&bdev->bd_mutex, subclass); + lock_kernel(); + if (!--bdev->bd_openers) { + sync_blockdev(bdev); + kill_bdev(bdev); + } + if (bdev->bd_contains == bdev) { + if (disk->fops->release) + ret = disk->fops->release(bd_inode, NULL); + } else { + mutex_lock_nested(&bdev->bd_contains->bd_mutex, + subclass + 1); + bdev->bd_contains->bd_part_count--; + mutex_unlock(&bdev->bd_contains->bd_mutex); + } + if (!bdev->bd_openers) { + struct module *owner = disk->fops->owner; + + put_disk(disk); + module_put(owner); + + if (bdev->bd_contains != bdev) { + kobject_put(&bdev->bd_part->kobj); + bdev->bd_part = NULL; + } + bdev->bd_disk = NULL; + bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; + if (bdev != bdev->bd_contains) + __blkdev_put(bdev->bd_contains, subclass + 1); + bdev->bd_contains = NULL; + } + unlock_kernel(); + mutex_unlock(&bdev->bd_mutex); + bdput(bdev); + return ret; +} + +int blkdev_put(struct block_device *bdev) +{ + return __blkdev_put(bdev, BD_MUTEX_NORMAL); +} +EXPORT_SYMBOL(blkdev_put); + +int blkdev_put_partition(struct block_device *bdev) +{ + return __blkdev_put(bdev, BD_MUTEX_PARTITION); +} +EXPORT_SYMBOL(blkdev_put_partition); + static int blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags); @@ -980,7 +1035,7 @@ out_first: bdev->bd_disk = NULL; bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; if (bdev != bdev->bd_contains) - blkdev_put(bdev->bd_contains); + __blkdev_put(bdev->bd_contains, BD_MUTEX_WHOLE); bdev->bd_contains = NULL; put_disk(disk); module_put(owner); @@ -1079,63 +1134,6 @@ static int blkdev_open(struct inode * inode, struct file * filp) return res; } -static int __blkdev_put(struct block_device *bdev, unsigned int subclass) -{ - int ret = 0; - struct inode *bd_inode = bdev->bd_inode; - struct gendisk *disk = bdev->bd_disk; - - mutex_lock_nested(&bdev->bd_mutex, subclass); - lock_kernel(); - if (!--bdev->bd_openers) { - sync_blockdev(bdev); - kill_bdev(bdev); - } - if (bdev->bd_contains == bdev) { - if (disk->fops->release) - ret = disk->fops->release(bd_inode, NULL); - } else { - mutex_lock_nested(&bdev->bd_contains->bd_mutex, - subclass + 1); - bdev->bd_contains->bd_part_count--; - mutex_unlock(&bdev->bd_contains->bd_mutex); - } - if (!bdev->bd_openers) { - struct module *owner = disk->fops->owner; - - put_disk(disk); - module_put(owner); - - if (bdev->bd_contains != bdev) { - kobject_put(&bdev->bd_part->kobj); - bdev->bd_part = NULL; - } - bdev->bd_disk = NULL; - bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; - if (bdev != bdev->bd_contains) - __blkdev_put(bdev->bd_contains, subclass + 1); - bdev->bd_contains = NULL; - } - unlock_kernel(); - mutex_unlock(&bdev->bd_mutex); - bdput(bdev); - return ret; -} - -int blkdev_put(struct block_device *bdev) -{ - return __blkdev_put(bdev, BD_MUTEX_NORMAL); -} - -EXPORT_SYMBOL(blkdev_put); - -int blkdev_put_partition(struct block_device *bdev) -{ - return __blkdev_put(bdev, BD_MUTEX_PARTITION); -} - -EXPORT_SYMBOL(blkdev_put_partition); - static int blkdev_close(struct inode * inode, struct file * filp) { struct block_device *bdev = I_BDEV(filp->f_mapping->host); diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index a61d17ed182..0feb3bd49cb 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -1,3 +1,13 @@ +Version 1.45 +------------ +Do not time out lockw calls when using posix extensions. Do not +time out requests if server still responding reasonably fast +on requests on other threads. Improve POSIX locking emulation, +(lock cancel now works, and unlock of merged range works even +to Windows servers now). Fix oops on mount to lanman servers +(win9x, os/2 etc.) when null password. Do not send listxattr +(SMB to query all EAs) if nouser_xattr specified. + Version 1.44 ------------ Rewritten sessionsetup support, including support for legacy SMB diff --git a/fs/cifs/README b/fs/cifs/README index 7986d0d97ac..5f0e1bd64fe 100644 --- a/fs/cifs/README +++ b/fs/cifs/README @@ -408,7 +408,7 @@ A partial list of the supported mount options follows: user_xattr Allow getting and setting user xattrs as OS/2 EAs (extended attributes) to the server (default) e.g. via setfattr and getfattr utilities. - nouser_xattr Do not allow getfattr/setfattr to get/set xattrs + nouser_xattr Do not allow getfattr/setfattr to get/set/list xattrs mapchars Translate six of the seven reserved characters (not backslash) *?<>|: to the remap range (above 0xF000), which also diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index a89efaf78a2..4bc250b2d9f 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -277,7 +277,8 @@ void calc_lanman_hash(struct cifsSesInfo * ses, char * lnm_session_key) return; memset(password_with_pad, 0, CIFS_ENCPWD_SIZE); - strncpy(password_with_pad, ses->password, CIFS_ENCPWD_SIZE); + if(ses->password) + strncpy(password_with_pad, ses->password, CIFS_ENCPWD_SIZE); if((ses->server->secMode & SECMODE_PW_ENCRYPT) == 0) if(extended_security & CIFSSEC_MAY_PLNTXT) { diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index c28ede59994..3cd750029be 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -402,7 +402,6 @@ static struct quotactl_ops cifs_quotactl_ops = { }; #endif -#ifdef CONFIG_CIFS_EXPERIMENTAL static void cifs_umount_begin(struct vfsmount * vfsmnt, int flags) { struct cifs_sb_info *cifs_sb; @@ -422,7 +421,7 @@ static void cifs_umount_begin(struct vfsmount * vfsmnt, int flags) tcon->tidStatus = CifsExiting; up(&tcon->tconSem); - /* cancel_brl_requests(tcon); */ + /* cancel_brl_requests(tcon); */ /* BB mark all brl mids as exiting */ /* cancel_notify_requests(tcon); */ if(tcon->ses && tcon->ses->server) { @@ -438,7 +437,6 @@ static void cifs_umount_begin(struct vfsmount * vfsmnt, int flags) return; } -#endif static int cifs_remount(struct super_block *sb, int *flags, char *data) { @@ -457,9 +455,7 @@ struct super_operations cifs_super_ops = { unless later we add lazy close of inodes or unless the kernel forgets to call us with the same number of releases (closes) as opens */ .show_options = cifs_show_options, -#ifdef CONFIG_CIFS_EXPERIMENTAL .umount_begin = cifs_umount_begin, -#endif .remount_fs = cifs_remount, }; diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 8f75c6f2470..39ee8ef3bde 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -100,5 +100,5 @@ extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t); extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); extern int cifs_ioctl (struct inode * inode, struct file * filep, unsigned int command, unsigned long arg); -#define CIFS_VERSION "1.44" +#define CIFS_VERSION "1.45" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 6d7cf5f3bc0..b24006c47df 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -3,6 +3,7 @@ * * Copyright (C) International Business Machines Corp., 2002,2006 * Author(s): Steve French (sfrench@us.ibm.com) + * Jeremy Allison (jra@samba.org) * * This library is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published @@ -158,7 +159,8 @@ struct TCP_Server_Info { /* 16th byte of RFC1001 workstation name is always null */ char workstation_RFC1001_name[SERVER_NAME_LEN_WITH_NULL]; __u32 sequence_number; /* needed for CIFS PDU signature */ - char mac_signing_key[CIFS_SESS_KEY_SIZE + 16]; + char mac_signing_key[CIFS_SESS_KEY_SIZE + 16]; + unsigned long lstrp; /* when we got last response from this server */ }; /* @@ -266,14 +268,14 @@ struct cifsTconInfo { }; /* - * This info hangs off the cifsFileInfo structure. This is used to track - * byte stream locks on the file + * This info hangs off the cifsFileInfo structure, pointed to by llist. + * This is used to track byte stream locks on the file */ struct cifsLockInfo { - struct cifsLockInfo *next; - int start; - int length; - int type; + struct list_head llist; /* pointer to next cifsLockInfo */ + __u64 offset; + __u64 length; + __u8 type; }; /* @@ -304,6 +306,8 @@ struct cifsFileInfo { /* lock scope id (0 if none) */ struct file * pfile; /* needed for writepage */ struct inode * pInode; /* needed for oplock break */ + struct semaphore lock_sem; + struct list_head llist; /* list of byte range locks we have. */ unsigned closePend:1; /* file is marked to close */ unsigned invalidHandle:1; /* file closed via session abend */ atomic_t wrtPending; /* handle in use - defer close */ diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index a5ddc62d6fe..b35c55c3c8b 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -50,6 +50,10 @@ extern int SendReceive(const unsigned int /* xid */ , struct cifsSesInfo *, extern int SendReceive2(const unsigned int /* xid */ , struct cifsSesInfo *, struct kvec *, int /* nvec to send */, int * /* type of buf returned */ , const int long_op); +extern int SendReceiveBlockingLock(const unsigned int /* xid */ , struct cifsTconInfo *, + struct smb_hdr * /* input */ , + struct smb_hdr * /* out */ , + int * /* bytes returned */); extern int checkSMBhdr(struct smb_hdr *smb, __u16 mid); extern int checkSMB(struct smb_hdr *smb, __u16 mid, int length); extern int is_valid_oplock_break(struct smb_hdr *smb, struct TCP_Server_Info *); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 19678c575df..075d8fb3d37 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -477,7 +477,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) /* BB get server time for time conversions and add code to use it and timezone since this is not UTC */ - if (rsp->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) { + if (rsp->EncryptionKeyLength == cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) { memcpy(server->cryptKey, rsp->EncryptionKey, CIFS_CRYPTO_KEY_SIZE); } else if (server->secMode & SECMODE_PW_ENCRYPT) { @@ -1460,8 +1460,13 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, pSMB->hdr.smb_buf_length += count; pSMB->ByteCount = cpu_to_le16(count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + if (waitFlag) { + rc = SendReceiveBlockingLock(xid, tcon, (struct smb_hdr *) pSMB, + (struct smb_hdr *) pSMBr, &bytes_returned); + } else { + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, timeout); + } cifs_stats_inc(&tcon->num_locks); if (rc) { cFYI(1, ("Send error in Lock = %d", rc)); @@ -1484,6 +1489,7 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, char *data_offset; struct cifs_posix_lock *parm_data; int rc = 0; + int timeout = 0; int bytes_returned = 0; __u16 params, param_offset, offset, byte_count, count; @@ -1503,7 +1509,6 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, pSMB->MaxSetupCount = 0; pSMB->Reserved = 0; pSMB->Flags = 0; - pSMB->Timeout = 0; pSMB->Reserved2 = 0; param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4; offset = param_offset + params; @@ -1529,8 +1534,13 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, (((char *) &pSMB->hdr.Protocol) + offset); parm_data->lock_type = cpu_to_le16(lock_type); - if(waitFlag) + if(waitFlag) { + timeout = 3; /* blocking operation, no timeout */ parm_data->lock_flags = cpu_to_le16(1); + pSMB->Timeout = cpu_to_le32(-1); + } else + pSMB->Timeout = 0; + parm_data->pid = cpu_to_le32(current->tgid); parm_data->start = cpu_to_le64(pLockData->fl_start); parm_data->length = cpu_to_le64(len); /* normalize negative numbers */ @@ -1541,8 +1551,14 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, pSMB->Reserved4 = 0; pSMB->hdr.smb_buf_length += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, - (struct smb_hdr *) pSMBr, &bytes_returned, 0); + if (waitFlag) { + rc = SendReceiveBlockingLock(xid, tcon, (struct smb_hdr *) pSMB, + (struct smb_hdr *) pSMBr, &bytes_returned); + } else { + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + (struct smb_hdr *) pSMBr, &bytes_returned, timeout); + } + if (rc) { cFYI(1, ("Send error in Posix Lock = %d", rc)); } else if (get_flag) { diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 876eb9ef85f..5d394c72686 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -182,6 +182,7 @@ cifs_reconnect(struct TCP_Server_Info *server) while ((server->tcpStatus != CifsExiting) && (server->tcpStatus != CifsGood)) { + try_to_freeze(); if(server->protocolType == IPV6) { rc = ipv6_connect(&server->addr.sockAddr6,&server->ssocket); } else { @@ -612,6 +613,10 @@ multi_t2_fnd: #ifdef CONFIG_CIFS_STATS2 mid_entry->when_received = jiffies; #endif + /* so we do not time out requests to server + which is still responding (since server could + be busy but not dead) */ + server->lstrp = jiffies; break; } } @@ -1266,33 +1271,35 @@ find_unc(__be32 new_target_ip_addr, char *uncName, char *userName) read_lock(&GlobalSMBSeslock); list_for_each(tmp, &GlobalTreeConnectionList) { - cFYI(1, ("Next tcon - ")); + cFYI(1, ("Next tcon")); tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList); if (tcon->ses) { if (tcon->ses->server) { cFYI(1, - (" old ip addr: %x == new ip %x ?", + ("old ip addr: %x == new ip %x ?", tcon->ses->server->addr.sockAddr.sin_addr. s_addr, new_target_ip_addr)); if (tcon->ses->server->addr.sockAddr.sin_addr. s_addr == new_target_ip_addr) { - /* BB lock tcon and server and tcp session and increment use count here? */ + /* BB lock tcon, server and tcp session and increment use count here? */ /* found a match on the TCP session */ /* BB check if reconnection needed */ - cFYI(1,("Matched ip, old UNC: %s == new: %s ?", + cFYI(1,("IP match, old UNC: %s new: %s", tcon->treeName, uncName)); if (strncmp (tcon->treeName, uncName, MAX_TREE_SIZE) == 0) { cFYI(1, - ("Matched UNC, old user: %s == new: %s ?", + ("and old usr: %s new: %s", tcon->treeName, uncName)); if (strncmp (tcon->ses->userName, userName, MAX_USERNAME_SIZE) == 0) { read_unlock(&GlobalSMBSeslock); - return tcon;/* also matched user (smb session)*/ + /* matched smb session + (user name */ + return tcon; } } } @@ -1969,7 +1976,18 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, } cFYI(1,("Negotiate caps 0x%x",(int)cap)); - +#ifdef CONFIG_CIFS_DEBUG2 + if(cap & CIFS_UNIX_FCNTL_CAP) + cFYI(1,("FCNTL cap")); + if(cap & CIFS_UNIX_EXTATTR_CAP) + cFYI(1,("EXTATTR cap")); + if(cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) + cFYI(1,("POSIX path cap")); + if(cap & CIFS_UNIX_XATTR_CAP) + cFYI(1,("XATTR cap")); + if(cap & CIFS_UNIX_POSIX_ACL_CAP) + cFYI(1,("POSIX ACL cap")); +#endif /* CIFS_DEBUG2 */ if (CIFSSMBSetFSUnixInfo(xid, tcon, cap)) { cFYI(1,("setting capabilities failed")); } diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index ba4cbe9b068..914239d5363 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -267,6 +267,10 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, pCifsFile->invalidHandle = FALSE; pCifsFile->closePend = FALSE; init_MUTEX(&pCifsFile->fh_sem); + init_MUTEX(&pCifsFile->lock_sem); + INIT_LIST_HEAD(&pCifsFile->llist); + atomic_set(&pCifsFile->wrtPending,0); + /* set the following in open now pCifsFile->pfile = file; */ write_lock(&GlobalSMBSeslock); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 944d2b9e092..e9c5ba9084f 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -5,6 +5,7 @@ * * Copyright (C) International Business Machines Corp., 2002,2003 * Author(s): Steve French (sfrench@us.ibm.com) + * Jeremy Allison (jra@samba.org) * * This library is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published @@ -47,6 +48,8 @@ static inline struct cifsFileInfo *cifs_init_private( private_data->netfid = netfid; private_data->pid = current->tgid; init_MUTEX(&private_data->fh_sem); + init_MUTEX(&private_data->lock_sem); + INIT_LIST_HEAD(&private_data->llist); private_data->pfile = file; /* needed for writepage */ private_data->pInode = inode; private_data->invalidHandle = FALSE; @@ -473,6 +476,8 @@ int cifs_close(struct inode *inode, struct file *file) cifs_sb = CIFS_SB(inode->i_sb); pTcon = cifs_sb->tcon; if (pSMBFile) { + struct cifsLockInfo *li, *tmp; + pSMBFile->closePend = TRUE; if (pTcon) { /* no sense reconnecting to close a file that is @@ -496,6 +501,16 @@ int cifs_close(struct inode *inode, struct file *file) pSMBFile->netfid); } } + + /* Delete any outstanding lock records. + We'll lose them when the file is closed anyway. */ + down(&pSMBFile->lock_sem); + list_for_each_entry_safe(li, tmp, &pSMBFile->llist, llist) { + list_del(&li->llist); + kfree(li); + } + up(&pSMBFile->lock_sem); + write_lock(&GlobalSMBSeslock); list_del(&pSMBFile->flist); list_del(&pSMBFile->tlist); @@ -570,6 +585,21 @@ int cifs_closedir(struct inode *inode, struct file *file) return rc; } +static int store_file_lock(struct cifsFileInfo *fid, __u64 len, + __u64 offset, __u8 lockType) +{ + struct cifsLockInfo *li = kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL); + if (li == NULL) + return -ENOMEM; + li->offset = offset; + li->length = len; + li->type = lockType; + down(&fid->lock_sem); + list_add(&li->llist, &fid->llist); + up(&fid->lock_sem); + return 0; +} + int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) { int rc, xid; @@ -581,6 +611,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) struct cifsTconInfo *pTcon; __u16 netfid; __u8 lockType = LOCKING_ANDX_LARGE_FILES; + int posix_locking; length = 1 + pfLock->fl_end - pfLock->fl_start; rc = -EACCES; @@ -639,15 +670,14 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) } netfid = ((struct cifsFileInfo *)file->private_data)->netfid; + posix_locking = (cifs_sb->tcon->ses->capabilities & CAP_UNIX) && + (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(cifs_sb->tcon->fsUnixInfo.Capability)); /* BB add code here to normalize offset and length to account for negative length which we can not accept over the wire */ if (IS_GETLK(cmd)) { - if(experimEnabled && - (cifs_sb->tcon->ses->capabilities & CAP_UNIX) && - (CIFS_UNIX_FCNTL_CAP & - le64_to_cpu(cifs_sb->tcon->fsUnixInfo.Capability))) { + if(posix_locking) { int posix_lock_type; if(lockType & LOCKING_ANDX_SHARED_LOCK) posix_lock_type = CIFS_RDLCK; @@ -683,10 +713,15 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) FreeXid(xid); return rc; } - if (experimEnabled && - (cifs_sb->tcon->ses->capabilities & CAP_UNIX) && - (CIFS_UNIX_FCNTL_CAP & - le64_to_cpu(cifs_sb->tcon->fsUnixInfo.Capability))) { + + if (!numLock && !numUnlock) { + /* if no lock or unlock then nothing + to do since we do not know what it is */ + FreeXid(xid); + return -EOPNOTSUPP; + } + + if (posix_locking) { int posix_lock_type; if(lockType & LOCKING_ANDX_SHARED_LOCK) posix_lock_type = CIFS_RDLCK; @@ -695,18 +730,46 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) if(numUnlock == 1) posix_lock_type = CIFS_UNLCK; - else if(numLock == 0) { - /* if no lock or unlock then nothing - to do since we do not know what it is */ - FreeXid(xid); - return -EOPNOTSUPP; - } + rc = CIFSSMBPosixLock(xid, pTcon, netfid, 0 /* set */, length, pfLock, posix_lock_type, wait_flag); - } else - rc = CIFSSMBLock(xid, pTcon, netfid, length, pfLock->fl_start, - numUnlock, numLock, lockType, wait_flag); + } else { + struct cifsFileInfo *fid = (struct cifsFileInfo *)file->private_data; + + if (numLock) { + rc = CIFSSMBLock(xid, pTcon, netfid, length, pfLock->fl_start, + 0, numLock, lockType, wait_flag); + + if (rc == 0) { + /* For Windows locks we must store them. */ + rc = store_file_lock(fid, length, + pfLock->fl_start, lockType); + } + } else if (numUnlock) { + /* For each stored lock that this unlock overlaps + completely, unlock it. */ + int stored_rc = 0; + struct cifsLockInfo *li, *tmp; + + down(&fid->lock_sem); + list_for_each_entry_safe(li, tmp, &fid->llist, llist) { + if (pfLock->fl_start <= li->offset && + length >= li->length) { + stored_rc = CIFSSMBLock(xid, pTcon, netfid, + li->length, li->offset, + 1, 0, li->type, FALSE); + if (stored_rc) + rc = stored_rc; + + list_del(&li->llist); + kfree(li); + } + } + up(&fid->lock_sem); + } + } + if (pfLock->fl_flags & FL_POSIX) posix_lock_file_wait(file, pfLock); FreeXid(xid); diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index b66eff5dc62..ce87550e918 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -72,6 +72,7 @@ static const struct smb_to_posix_error mapping_table_ERRDOS[] = { {ERRinvlevel,-EOPNOTSUPP}, {ERRdirnotempty, -ENOTEMPTY}, {ERRnotlocked, -ENOLCK}, + {ERRcancelviolation, -ENOLCK}, {ERRalreadyexists, -EEXIST}, {ERRmoredata, -EOVERFLOW}, {ERReasnotsupported,-EOPNOTSUPP}, diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 03bbcb37791..105761e3ba0 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -556,7 +556,7 @@ static int cifs_entry_is_dot(char *current_entry, struct cifsFileInfo *cfile) FIND_FILE_STANDARD_INFO * pFindData = (FIND_FILE_STANDARD_INFO *)current_entry; filename = &pFindData->FileName[0]; - len = le32_to_cpu(pFindData->FileNameLength); + len = pFindData->FileNameLength; } else { cFYI(1,("Unknown findfirst level %d",cfile->srch_inf.info_level)); } diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 7202d534ef0..d1705ab8136 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -372,7 +372,7 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, /* no capabilities flags in old lanman negotiation */ - pSMB->old_req.PasswordLength = CIFS_SESS_KEY_SIZE; + pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE); /* BB calculate hash with password */ /* and copy into bcc */ diff --git a/fs/cifs/smberr.h b/fs/cifs/smberr.h index cd41c67ff8d..212c3c29640 100644 --- a/fs/cifs/smberr.h +++ b/fs/cifs/smberr.h @@ -95,6 +95,7 @@ #define ERRinvlevel 124 #define ERRdirnotempty 145 #define ERRnotlocked 158 +#define ERRcancelviolation 173 #define ERRalreadyexists 183 #define ERRbadpipe 230 #define ERRpipebusy 231 diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 17ba329e2b3..48d47b46b1f 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -3,7 +3,8 @@ * * Copyright (C) International Business Machines Corp., 2002,2005 * Author(s): Steve French (sfrench@us.ibm.com) - * + * Jeremy Allison (jra@samba.org) 2006. + * * This library is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation; either version 2.1 of the License, or @@ -36,7 +37,7 @@ extern mempool_t *cifs_mid_poolp; extern kmem_cache_t *cifs_oplock_cachep; static struct mid_q_entry * -AllocMidQEntry(struct smb_hdr *smb_buffer, struct cifsSesInfo *ses) +AllocMidQEntry(const struct smb_hdr *smb_buffer, struct cifsSesInfo *ses) { struct mid_q_entry *temp; @@ -203,6 +204,10 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer, rc = 0; } + /* Don't want to modify the buffer as a + side effect of this call. */ + smb_buffer->smb_buf_length = smb_buf_length; + return rc; } @@ -217,6 +222,7 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, unsigned int len = iov[0].iov_len; unsigned int total_len; int first_vec = 0; + unsigned int smb_buf_length = smb_buffer->smb_buf_length; if(ssocket == NULL) return -ENOTSOCK; /* BB eventually add reconnect code here */ @@ -293,36 +299,15 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, } else rc = 0; + /* Don't want to modify the buffer as a + side effect of this call. */ + smb_buffer->smb_buf_length = smb_buf_length; + return rc; } -int -SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, - struct kvec *iov, int n_vec, int * pRespBufType /* ret */, - const int long_op) +static int wait_for_free_request(struct cifsSesInfo *ses, const int long_op) { - int rc = 0; - unsigned int receive_len; - unsigned long timeout; - struct mid_q_entry *midQ; - struct smb_hdr *in_buf = iov[0].iov_base; - - *pRespBufType = CIFS_NO_BUFFER; /* no response buf yet */ - - if ((ses == NULL) || (ses->server == NULL)) { - cifs_small_buf_release(in_buf); - cERROR(1,("Null session")); - return -EIO; - } - - if(ses->server->tcpStatus == CifsExiting) { - cifs_small_buf_release(in_buf); - return -ENOENT; - } - - /* Ensure that we do not send more than 50 overlapping requests - to the same server. We may make this configurable later or - use ses->maxReq */ if(long_op == -1) { /* oplock breaks must not be held up */ atomic_inc(&ses->server->inFlight); @@ -345,53 +330,140 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, } else { if(ses->server->tcpStatus == CifsExiting) { spin_unlock(&GlobalMid_Lock); - cifs_small_buf_release(in_buf); return -ENOENT; } - /* can not count locking commands against total since - they are allowed to block on server */ + /* can not count locking commands against total since + they are allowed to block on server */ - if(long_op < 3) { /* update # of requests on the wire to server */ + if (long_op < 3) atomic_inc(&ses->server->inFlight); - } spin_unlock(&GlobalMid_Lock); break; } } } - /* make sure that we sign in the same order that we send on this socket - and avoid races inside tcp sendmsg code that could cause corruption - of smb data */ - - down(&ses->server->tcpSem); + return 0; +} +static int allocate_mid(struct cifsSesInfo *ses, struct smb_hdr *in_buf, + struct mid_q_entry **ppmidQ) +{ if (ses->server->tcpStatus == CifsExiting) { - rc = -ENOENT; - goto out_unlock2; + return -ENOENT; } else if (ses->server->tcpStatus == CifsNeedReconnect) { cFYI(1,("tcp session dead - return to caller to retry")); - rc = -EAGAIN; - goto out_unlock2; + return -EAGAIN; } else if (ses->status != CifsGood) { /* check if SMB session is bad because we are setting it up */ if((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) && (in_buf->Command != SMB_COM_NEGOTIATE)) { - rc = -EAGAIN; - goto out_unlock2; + return -EAGAIN; } /* else ok - we are setting up session */ } - midQ = AllocMidQEntry(in_buf, ses); - if (midQ == NULL) { + *ppmidQ = AllocMidQEntry(in_buf, ses); + if (*ppmidQ == NULL) { + return -ENOMEM; + } + return 0; +} + +static int wait_for_response(struct cifsSesInfo *ses, + struct mid_q_entry *midQ, + unsigned long timeout, + unsigned long time_to_wait) +{ + unsigned long curr_timeout; + + for (;;) { + curr_timeout = timeout + jiffies; + wait_event(ses->server->response_q, + (!(midQ->midState == MID_REQUEST_SUBMITTED)) || + time_after(jiffies, curr_timeout) || + ((ses->server->tcpStatus != CifsGood) && + (ses->server->tcpStatus != CifsNew))); + + if (time_after(jiffies, curr_timeout) && + (midQ->midState == MID_REQUEST_SUBMITTED) && + ((ses->server->tcpStatus == CifsGood) || + (ses->server->tcpStatus == CifsNew))) { + + unsigned long lrt; + + /* We timed out. Is the server still + sending replies ? */ + spin_lock(&GlobalMid_Lock); + lrt = ses->server->lstrp; + spin_unlock(&GlobalMid_Lock); + + /* Calculate time_to_wait past last receive time. + Although we prefer not to time out if the + server is still responding - we will time + out if the server takes more than 15 (or 45 + or 180) seconds to respond to this request + and has not responded to any request from + other threads on the client within 10 seconds */ + lrt += time_to_wait; + if (time_after(jiffies, lrt)) { + /* No replies for time_to_wait. */ + cERROR(1,("server not responding")); + return -1; + } + } else { + return 0; + } + } +} + +int +SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, + struct kvec *iov, int n_vec, int * pRespBufType /* ret */, + const int long_op) +{ + int rc = 0; + unsigned int receive_len; + unsigned long timeout; + struct mid_q_entry *midQ; + struct smb_hdr *in_buf = iov[0].iov_base; + + *pRespBufType = CIFS_NO_BUFFER; /* no response buf yet */ + + if ((ses == NULL) || (ses->server == NULL)) { + cifs_small_buf_release(in_buf); + cERROR(1,("Null session")); + return -EIO; + } + + if(ses->server->tcpStatus == CifsExiting) { + cifs_small_buf_release(in_buf); + return -ENOENT; + } + + /* Ensure that we do not send more than 50 overlapping requests + to the same server. We may make this configurable later or + use ses->maxReq */ + + rc = wait_for_free_request(ses, long_op); + if (rc) { + cifs_small_buf_release(in_buf); + return rc; + } + + /* make sure that we sign in the same order that we send on this socket + and avoid races inside tcp sendmsg code that could cause corruption + of smb data */ + + down(&ses->server->tcpSem); + + rc = allocate_mid(ses, in_buf, &midQ); + if (rc) { up(&ses->server->tcpSem); cifs_small_buf_release(in_buf); - /* If not lock req, update # of requests on wire to server */ - if(long_op < 3) { - atomic_dec(&ses->server->inFlight); - wake_up(&ses->server->request_q); - } - return -ENOMEM; + /* Update # of requests on wire to server */ + atomic_dec(&ses->server->inFlight); + wake_up(&ses->server->request_q); + return rc; } rc = cifs_sign_smb2(iov, n_vec, ses->server, &midQ->sequence_number); @@ -406,32 +478,23 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, atomic_dec(&ses->server->inSend); midQ->when_sent = jiffies; #endif - if(rc < 0) { - DeleteMidQEntry(midQ); - up(&ses->server->tcpSem); - cifs_small_buf_release(in_buf); - /* If not lock req, update # of requests on wire to server */ - if(long_op < 3) { - atomic_dec(&ses->server->inFlight); - wake_up(&ses->server->request_q); - } - return rc; - } else { - up(&ses->server->tcpSem); - cifs_small_buf_release(in_buf); - } + + up(&ses->server->tcpSem); + cifs_small_buf_release(in_buf); + + if(rc < 0) + goto out; if (long_op == -1) - goto cifs_no_response_exit2; + goto out; else if (long_op == 2) /* writes past end of file can take loong time */ timeout = 180 * HZ; else if (long_op == 1) timeout = 45 * HZ; /* should be greater than servers oplock break timeout (about 43 seconds) */ - else if (long_op > 2) { - timeout = MAX_SCHEDULE_TIMEOUT; - } else + else timeout = 15 * HZ; + /* wait for 15 seconds or until woken up due to response arriving or due to last connection to this server being unmounted */ if (signal_pending(current)) { @@ -441,19 +504,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, } /* No user interrupts in wait - wreaks havoc with performance */ - if(timeout != MAX_SCHEDULE_TIMEOUT) { - timeout += jiffies; - wait_event(ses->server->response_q, - (!(midQ->midState & MID_REQUEST_SUBMITTED)) || - time_after(jiffies, timeout) || - ((ses->server->tcpStatus != CifsGood) && - (ses->server->tcpStatus != CifsNew))); - } else { - wait_event(ses->server->response_q, - (!(midQ->midState & MID_REQUEST_SUBMITTED)) || - ((ses->server->tcpStatus != CifsGood) && - (ses->server->tcpStatus != CifsNew))); - } + wait_for_response(ses, midQ, timeout, 10 * HZ); spin_lock(&GlobalMid_Lock); if (midQ->resp_buf) { @@ -481,11 +532,9 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, } spin_unlock(&GlobalMid_Lock); DeleteMidQEntry(midQ); - /* If not lock req, update # of requests on wire to server */ - if(long_op < 3) { - atomic_dec(&ses->server->inFlight); - wake_up(&ses->server->request_q); - } + /* Update # of requests on wire to server */ + atomic_dec(&ses->server->inFlight); + wake_up(&ses->server->request_q); return rc; } @@ -536,24 +585,12 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, cFYI(1,("Bad MID state?")); } } -cifs_no_response_exit2: - DeleteMidQEntry(midQ); - - if(long_op < 3) { - atomic_dec(&ses->server->inFlight); - wake_up(&ses->server->request_q); - } - return rc; +out: -out_unlock2: - up(&ses->server->tcpSem); - cifs_small_buf_release(in_buf); - /* If not lock req, update # of requests on wire to server */ - if(long_op < 3) { - atomic_dec(&ses->server->inFlight); - wake_up(&ses->server->request_q); - } + DeleteMidQEntry(midQ); + atomic_dec(&ses->server->inFlight); + wake_up(&ses->server->request_q); return rc; } @@ -583,85 +620,34 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, /* Ensure that we do not send more than 50 overlapping requests to the same server. We may make this configurable later or use ses->maxReq */ - if(long_op == -1) { - /* oplock breaks must not be held up */ - atomic_inc(&ses->server->inFlight); - } else { - spin_lock(&GlobalMid_Lock); - while(1) { - if(atomic_read(&ses->server->inFlight) >= - cifs_max_pending){ - spin_unlock(&GlobalMid_Lock); -#ifdef CONFIG_CIFS_STATS2 - atomic_inc(&ses->server->num_waiters); -#endif - wait_event(ses->server->request_q, - atomic_read(&ses->server->inFlight) - < cifs_max_pending); -#ifdef CONFIG_CIFS_STATS2 - atomic_dec(&ses->server->num_waiters); -#endif - spin_lock(&GlobalMid_Lock); - } else { - if(ses->server->tcpStatus == CifsExiting) { - spin_unlock(&GlobalMid_Lock); - return -ENOENT; - } - /* can not count locking commands against total since - they are allowed to block on server */ - - if(long_op < 3) { - /* update # of requests on the wire to server */ - atomic_inc(&ses->server->inFlight); - } - spin_unlock(&GlobalMid_Lock); - break; - } - } - } + rc = wait_for_free_request(ses, long_op); + if (rc) + return rc; + /* make sure that we sign in the same order that we send on this socket and avoid races inside tcp sendmsg code that could cause corruption of smb data */ down(&ses->server->tcpSem); - if (ses->server->tcpStatus == CifsExiting) { - rc = -ENOENT; - goto out_unlock; - } else if (ses->server->tcpStatus == CifsNeedReconnect) { - cFYI(1,("tcp session dead - return to caller to retry")); - rc = -EAGAIN; - goto out_unlock; - } else if (ses->status != CifsGood) { - /* check if SMB session is bad because we are setting it up */ - if((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) && - (in_buf->Command != SMB_COM_NEGOTIATE)) { - rc = -EAGAIN; - goto out_unlock; - } /* else ok - we are setting up session */ - } - midQ = AllocMidQEntry(in_buf, ses); - if (midQ == NULL) { + rc = allocate_mid(ses, in_buf, &midQ); + if (rc) { up(&ses->server->tcpSem); - /* If not lock req, update # of requests on wire to server */ - if(long_op < 3) { - atomic_dec(&ses->server->inFlight); - wake_up(&ses->server->request_q); - } - return -ENOMEM; + /* Update # of requests on wire to server */ + atomic_dec(&ses->server->inFlight); + wake_up(&ses->server->request_q); + return rc; } if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { - up(&ses->server->tcpSem); cERROR(1, ("Illegal length, greater than maximum frame, %d", in_buf->smb_buf_length)); DeleteMidQEntry(midQ); - /* If not lock req, update # of requests on wire to server */ - if(long_op < 3) { - atomic_dec(&ses->server->inFlight); - wake_up(&ses->server->request_q); - } + up(&ses->server->tcpSem); + /* Update # of requests on wire to server */ + atomic_dec(&ses->server->inFlight); + wake_up(&ses->server->request_q); return -EIO; } @@ -677,27 +663,19 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, atomic_dec(&ses->server->inSend); midQ->when_sent = jiffies; #endif - if(rc < 0) { - DeleteMidQEntry(midQ); - up(&ses->server->tcpSem); - /* If not lock req, update # of requests on wire to server */ - if(long_op < 3) { - atomic_dec(&ses->server->inFlight); - wake_up(&ses->server->request_q); - } - return rc; - } else - up(&ses->server->tcpSem); + up(&ses->server->tcpSem); + + if(rc < 0) + goto out; + if (long_op == -1) - goto cifs_no_response_exit; + goto out; else if (long_op == 2) /* writes past end of file can take loong time */ timeout = 180 * HZ; else if (long_op == 1) timeout = 45 * HZ; /* should be greater than servers oplock break timeout (about 43 seconds) */ - else if (long_op > 2) { - timeout = MAX_SCHEDULE_TIMEOUT; - } else + else timeout = 15 * HZ; /* wait for 15 seconds or until woken up due to response arriving or due to last connection to this server being unmounted */ @@ -708,19 +686,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, } /* No user interrupts in wait - wreaks havoc with performance */ - if(timeout != MAX_SCHEDULE_TIMEOUT) { - timeout += jiffies; - wait_event(ses->server->response_q, - (!(midQ->midState & MID_REQUEST_SUBMITTED)) || - time_after(jiffies, timeout) || - ((ses->server->tcpStatus != CifsGood) && - (ses->server->tcpStatus != CifsNew))); - } else { - wait_event(ses->server->response_q, - (!(midQ->midState & MID_REQUEST_SUBMITTED)) || - ((ses->server->tcpStatus != CifsGood) && - (ses->server->tcpStatus != CifsNew))); - } + wait_for_response(ses, midQ, timeout, 10 * HZ); spin_lock(&GlobalMid_Lock); if (midQ->resp_buf) { @@ -748,11 +714,9 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, } spin_unlock(&GlobalMid_Lock); DeleteMidQEntry(midQ); - /* If not lock req, update # of requests on wire to server */ - if(long_op < 3) { - atomic_dec(&ses->server->inFlight); - wake_up(&ses->server->request_q); - } + /* Update # of requests on wire to server */ + atomic_dec(&ses->server->inFlight); + wake_up(&ses->server->request_q); return rc; } @@ -799,23 +763,253 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, cERROR(1,("Bad MID state?")); } } -cifs_no_response_exit: + +out: + DeleteMidQEntry(midQ); + atomic_dec(&ses->server->inFlight); + wake_up(&ses->server->request_q); - if(long_op < 3) { - atomic_dec(&ses->server->inFlight); - wake_up(&ses->server->request_q); - } + return rc; +} + +/* Send an NT_CANCEL SMB to cause the POSIX blocking lock to return. */ + +static int +send_nt_cancel(struct cifsTconInfo *tcon, struct smb_hdr *in_buf, + struct mid_q_entry *midQ) +{ + int rc = 0; + struct cifsSesInfo *ses = tcon->ses; + __u16 mid = in_buf->Mid; + header_assemble(in_buf, SMB_COM_NT_CANCEL, tcon, 0); + in_buf->Mid = mid; + down(&ses->server->tcpSem); + rc = cifs_sign_smb(in_buf, ses->server, &midQ->sequence_number); + if (rc) { + up(&ses->server->tcpSem); + return rc; + } + rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, + (struct sockaddr *) &(ses->server->addr.sockAddr)); + up(&ses->server->tcpSem); return rc; +} + +/* We send a LOCKINGX_CANCEL_LOCK to cause the Windows + blocking lock to return. */ + +static int +send_lock_cancel(const unsigned int xid, struct cifsTconInfo *tcon, + struct smb_hdr *in_buf, + struct smb_hdr *out_buf) +{ + int bytes_returned; + struct cifsSesInfo *ses = tcon->ses; + LOCK_REQ *pSMB = (LOCK_REQ *)in_buf; + + /* We just modify the current in_buf to change + the type of lock from LOCKING_ANDX_SHARED_LOCK + or LOCKING_ANDX_EXCLUSIVE_LOCK to + LOCKING_ANDX_CANCEL_LOCK. */ + + pSMB->LockType = LOCKING_ANDX_CANCEL_LOCK|LOCKING_ANDX_LARGE_FILES; + pSMB->Timeout = 0; + pSMB->hdr.Mid = GetNextMid(ses->server); + + return SendReceive(xid, ses, in_buf, out_buf, + &bytes_returned, 0); +} -out_unlock: +int +SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, + struct smb_hdr *in_buf, struct smb_hdr *out_buf, + int *pbytes_returned) +{ + int rc = 0; + int rstart = 0; + unsigned int receive_len; + struct mid_q_entry *midQ; + struct cifsSesInfo *ses; + + if (tcon == NULL || tcon->ses == NULL) { + cERROR(1,("Null smb session")); + return -EIO; + } + ses = tcon->ses; + + if(ses->server == NULL) { + cERROR(1,("Null tcp session")); + return -EIO; + } + + if(ses->server->tcpStatus == CifsExiting) + return -ENOENT; + + /* Ensure that we do not send more than 50 overlapping requests + to the same server. We may make this configurable later or + use ses->maxReq */ + + rc = wait_for_free_request(ses, 3); + if (rc) + return rc; + + /* make sure that we sign in the same order that we send on this socket + and avoid races inside tcp sendmsg code that could cause corruption + of smb data */ + + down(&ses->server->tcpSem); + + rc = allocate_mid(ses, in_buf, &midQ); + if (rc) { + up(&ses->server->tcpSem); + return rc; + } + + if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { + up(&ses->server->tcpSem); + cERROR(1, ("Illegal length, greater than maximum frame, %d", + in_buf->smb_buf_length)); + DeleteMidQEntry(midQ); + return -EIO; + } + + rc = cifs_sign_smb(in_buf, ses->server, &midQ->sequence_number); + + midQ->midState = MID_REQUEST_SUBMITTED; +#ifdef CONFIG_CIFS_STATS2 + atomic_inc(&ses->server->inSend); +#endif + rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, + (struct sockaddr *) &(ses->server->addr.sockAddr)); +#ifdef CONFIG_CIFS_STATS2 + atomic_dec(&ses->server->inSend); + midQ->when_sent = jiffies; +#endif up(&ses->server->tcpSem); - /* If not lock req, update # of requests on wire to server */ - if(long_op < 3) { - atomic_dec(&ses->server->inFlight); - wake_up(&ses->server->request_q); + + if(rc < 0) { + DeleteMidQEntry(midQ); + return rc; + } + + /* Wait for a reply - allow signals to interrupt. */ + rc = wait_event_interruptible(ses->server->response_q, + (!(midQ->midState == MID_REQUEST_SUBMITTED)) || + ((ses->server->tcpStatus != CifsGood) && + (ses->server->tcpStatus != CifsNew))); + + /* Were we interrupted by a signal ? */ + if ((rc == -ERESTARTSYS) && + (midQ->midState == MID_REQUEST_SUBMITTED) && + ((ses->server->tcpStatus == CifsGood) || + (ses->server->tcpStatus == CifsNew))) { + + if (in_buf->Command == SMB_COM_TRANSACTION2) { + /* POSIX lock. We send a NT_CANCEL SMB to cause the + blocking lock to return. */ + + rc = send_nt_cancel(tcon, in_buf, midQ); + if (rc) { + DeleteMidQEntry(midQ); + return rc; + } + } else { + /* Windows lock. We send a LOCKINGX_CANCEL_LOCK + to cause the blocking lock to return. */ + + rc = send_lock_cancel(xid, tcon, in_buf, out_buf); + + /* If we get -ENOLCK back the lock may have + already been removed. Don't exit in this case. */ + if (rc && rc != -ENOLCK) { + DeleteMidQEntry(midQ); + return rc; + } + } + + /* Wait 5 seconds for the response. */ + if (wait_for_response(ses, midQ, 5 * HZ, 5 * HZ)==0) { + /* We got the response - restart system call. */ + rstart = 1; + } + } + + spin_lock(&GlobalMid_Lock); + if (midQ->resp_buf) { + spin_unlock(&GlobalMid_Lock); + receive_len = midQ->resp_buf->smb_buf_length; + } else { + cERROR(1,("No response for cmd %d mid %d", + midQ->command, midQ->mid)); + if(midQ->midState == MID_REQUEST_SUBMITTED) { + if(ses->server->tcpStatus == CifsExiting) + rc = -EHOSTDOWN; + else { + ses->server->tcpStatus = CifsNeedReconnect; + midQ->midState = MID_RETRY_NEEDED; + } + } + + if (rc != -EHOSTDOWN) { + if(midQ->midState == MID_RETRY_NEEDED) { + rc = -EAGAIN; + cFYI(1,("marking request for retry")); + } else { + rc = -EIO; + } + } + spin_unlock(&GlobalMid_Lock); + DeleteMidQEntry(midQ); + return rc; } + + if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { + cERROR(1, ("Frame too large received. Length: %d Xid: %d", + receive_len, xid)); + rc = -EIO; + } else { /* rcvd frame is ok */ + + if (midQ->resp_buf && out_buf + && (midQ->midState == MID_RESPONSE_RECEIVED)) { + out_buf->smb_buf_length = receive_len; + memcpy((char *)out_buf + 4, + (char *)midQ->resp_buf + 4, + receive_len); + + dump_smb(out_buf, 92); + /* convert the length into a more usable form */ + if((receive_len > 24) && + (ses->server->secMode & (SECMODE_SIGN_REQUIRED | + SECMODE_SIGN_ENABLED))) { + rc = cifs_verify_signature(out_buf, + ses->server->mac_signing_key, + midQ->sequence_number+1); + if(rc) { + cERROR(1,("Unexpected SMB signature")); + /* BB FIXME add code to kill session */ + } + } + + *pbytes_returned = out_buf->smb_buf_length; + + /* BB special case reconnect tid and uid here? */ + rc = map_smb_to_linux_error(out_buf); + /* convert ByteCount if necessary */ + if (receive_len >= + sizeof (struct smb_hdr) - + 4 /* do not count RFC1001 header */ + + (2 * out_buf->WordCount) + 2 /* bcc */ ) + BCC(out_buf) = le16_to_cpu(BCC_LE(out_buf)); + } else { + rc = -EIO; + cERROR(1,("Bad MID state?")); + } + } + DeleteMidQEntry(midQ); + if (rstart && rc == -EACCES) + return -ERESTARTSYS; return rc; } diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 7754d641775..067648b7179 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -330,11 +330,15 @@ ssize_t cifs_listxattr(struct dentry * direntry, char * data, size_t buf_size) sb = direntry->d_inode->i_sb; if(sb == NULL) return -EIO; - xid = GetXid(); cifs_sb = CIFS_SB(sb); pTcon = cifs_sb->tcon; + if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) + return -EOPNOTSUPP; + + xid = GetXid(); + full_path = build_path_from_dentry(direntry); if(full_path == NULL) { FreeXid(xid); diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 19ffb043abb..3a3567433b9 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1168,7 +1168,7 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *epi) eexit_1: DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_unlink(%p, %p) = %d\n", - current, ep, epi->file, error)); + current, ep, epi->ffd.file, error)); return error; } @@ -1236,7 +1236,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k struct eventpoll *ep = epi->ep; DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n", - current, epi->file, epi, ep)); + current, epi->ffd.file, epi, ep)); write_lock_irqsave(&ep->lock, flags); diff --git a/fs/exec.c b/fs/exec.c index 8344ba73a2a..54135df2a96 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -486,8 +486,6 @@ struct file *open_exec(const char *name) if (!(nd.mnt->mnt_flags & MNT_NOEXEC) && S_ISREG(inode->i_mode)) { int err = vfs_permission(&nd, MAY_EXEC); - if (!err && !(inode->i_mode & 0111)) - err = -EACCES; file = ERR_PTR(err); if (!err) { file = nameidata_to_filp(&nd, O_RDONLY); @@ -753,7 +751,7 @@ no_thread_group: write_lock_irq(&tasklist_lock); spin_lock(&oldsighand->siglock); - spin_lock(&newsighand->siglock); + spin_lock_nested(&newsighand->siglock, SINGLE_DEPTH_NESTING); rcu_assign_pointer(current->sighand, newsighand); recalc_sigpending(); @@ -922,12 +920,6 @@ int prepare_binprm(struct linux_binprm *bprm) int retval; mode = inode->i_mode; - /* - * Check execute perms again - if the caller has CAP_DAC_OVERRIDE, - * generic_permission lets a non-executable through - */ - if (!(mode & 0111)) /* with at least _one_ execute bit set */ - return -EACCES; if (bprm->file->f_op == NULL) return -EACCES; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index f2702cda977..681dea8f953 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -775,7 +775,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) if (EXT2_INODE_SIZE(sb) == 0) goto cantfind_ext2; sbi->s_inodes_per_block = sb->s_blocksize / EXT2_INODE_SIZE(sb); - if (sbi->s_inodes_per_block == 0) + if (sbi->s_inodes_per_block == 0 || sbi->s_inodes_per_group == 0) goto cantfind_ext2; sbi->s_itb_per_group = sbi->s_inodes_per_group / sbi->s_inodes_per_block; diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index a504a40d6d2..063d994bda0 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c @@ -1269,12 +1269,12 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode, goal = le32_to_cpu(es->s_first_data_block); group_no = (goal - le32_to_cpu(es->s_first_data_block)) / EXT3_BLOCKS_PER_GROUP(sb); + goal_group = group_no; +retry_alloc: gdp = ext3_get_group_desc(sb, group_no, &gdp_bh); if (!gdp) goto io_error; - goal_group = group_no; -retry: free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); /* * if there is not enough free blocks to make a new resevation @@ -1349,7 +1349,7 @@ retry: if (my_rsv) { my_rsv = NULL; group_no = goal_group; - goto retry; + goto retry_alloc; } /* No space left on the device */ *errp = -ENOSPC; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 63614ed1633..5c4fcd1dbf5 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -395,14 +395,16 @@ static int fuse_readpages(struct file *file, struct address_space *mapping, struct fuse_readpages_data data; int err; + err = -EIO; if (is_bad_inode(inode)) - return -EIO; + goto clean_pages_up; data.file = file; data.inode = inode; data.req = fuse_get_req(fc); + err = PTR_ERR(data.req); if (IS_ERR(data.req)) - return PTR_ERR(data.req); + goto clean_pages_up; err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data); if (!err) { @@ -412,6 +414,10 @@ static int fuse_readpages(struct file *file, struct address_space *mapping, fuse_put_request(fc, data.req); } return err; + +clean_pages_up: + put_pages_list(pages); + return err; } static size_t fuse_send_write(struct fuse_req *req, struct file *file, diff --git a/fs/ioprio.c b/fs/ioprio.c index 93aa5715f22..78b1deae3fa 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -44,6 +44,9 @@ static int set_task_ioprio(struct task_struct *task, int ioprio) task->ioprio = ioprio; ioc = task->io_context; + /* see wmb() in current_io_context() */ + smp_read_barrier_depends(); + if (ioc && ioc->set_ioprio) ioc->set_ioprio(ioc, ioprio); @@ -111,9 +114,9 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio) continue; ret = set_task_ioprio(p, ioprio); if (ret) - break; + goto free_uid; } while_each_thread(g, p); - +free_uid: if (who) free_uid(user); break; @@ -137,6 +140,29 @@ out: return ret; } +int ioprio_best(unsigned short aprio, unsigned short bprio) +{ + unsigned short aclass = IOPRIO_PRIO_CLASS(aprio); + unsigned short bclass = IOPRIO_PRIO_CLASS(bprio); + + if (!ioprio_valid(aprio)) + return bprio; + if (!ioprio_valid(bprio)) + return aprio; + + if (aclass == IOPRIO_CLASS_NONE) + aclass = IOPRIO_CLASS_BE; + if (bclass == IOPRIO_CLASS_NONE) + bclass = IOPRIO_CLASS_BE; + + if (aclass == bclass) + return min(aprio, bprio); + if (aclass > bclass) + return bprio; + else + return aprio; +} + asmlinkage long sys_ioprio_get(int which, int who) { struct task_struct *g, *p; diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 0971814c38b..42da6078431 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -261,7 +261,7 @@ void journal_commit_transaction(journal_t *journal) struct buffer_head *bh = jh2bh(jh); jbd_lock_bh_state(bh); - kfree(jh->b_committed_data); + jbd_slab_free(jh->b_committed_data, bh->b_size); jh->b_committed_data = NULL; jbd_unlock_bh_state(bh); } @@ -745,14 +745,14 @@ restart_loop: * Otherwise, we can just throw away the frozen data now. */ if (jh->b_committed_data) { - kfree(jh->b_committed_data); + jbd_slab_free(jh->b_committed_data, bh->b_size); jh->b_committed_data = NULL; if (jh->b_frozen_data) { jh->b_committed_data = jh->b_frozen_data; jh->b_frozen_data = NULL; } } else if (jh->b_frozen_data) { - kfree(jh->b_frozen_data); + jbd_slab_free(jh->b_frozen_data, bh->b_size); jh->b_frozen_data = NULL; } diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 8c9b28dff11..f66724ce443 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -84,6 +84,7 @@ EXPORT_SYMBOL(journal_force_commit); static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); static void __journal_abort_soft (journal_t *journal, int errno); +static int journal_create_jbd_slab(size_t slab_size); /* * Helper function used to manage commit timeouts @@ -328,10 +329,10 @@ repeat: char *tmp; jbd_unlock_bh_state(bh_in); - tmp = jbd_rep_kmalloc(bh_in->b_size, GFP_NOFS); + tmp = jbd_slab_alloc(bh_in->b_size, GFP_NOFS); jbd_lock_bh_state(bh_in); if (jh_in->b_frozen_data) { - kfree(tmp); + jbd_slab_free(tmp, bh_in->b_size); goto repeat; } @@ -1069,17 +1070,17 @@ static int load_superblock(journal_t *journal) int journal_load(journal_t *journal) { int err; + journal_superblock_t *sb; err = load_superblock(journal); if (err) return err; + sb = journal->j_superblock; /* If this is a V2 superblock, then we have to check the * features flags on it. */ if (journal->j_format_version >= 2) { - journal_superblock_t *sb = journal->j_superblock; - if ((sb->s_feature_ro_compat & ~cpu_to_be32(JFS_KNOWN_ROCOMPAT_FEATURES)) || (sb->s_feature_incompat & @@ -1090,6 +1091,13 @@ int journal_load(journal_t *journal) } } + /* + * Create a slab for this blocksize + */ + err = journal_create_jbd_slab(cpu_to_be32(sb->s_blocksize)); + if (err) + return err; + /* Let the recovery code check whether it needs to recover any * data from the journal. */ if (journal_recover(journal)) @@ -1612,6 +1620,77 @@ void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry) } /* + * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed + * and allocate frozen and commit buffers from these slabs. + * + * Reason for doing this is to avoid, SLAB_DEBUG - since it could + * cause bh to cross page boundary. + */ + +#define JBD_MAX_SLABS 5 +#define JBD_SLAB_INDEX(size) (size >> 11) + +static kmem_cache_t *jbd_slab[JBD_MAX_SLABS]; +static const char *jbd_slab_names[JBD_MAX_SLABS] = { + "jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k" +}; + +static void journal_destroy_jbd_slabs(void) +{ + int i; + + for (i = 0; i < JBD_MAX_SLABS; i++) { + if (jbd_slab[i]) + kmem_cache_destroy(jbd_slab[i]); + jbd_slab[i] = NULL; + } +} + +static int journal_create_jbd_slab(size_t slab_size) +{ + int i = JBD_SLAB_INDEX(slab_size); + + BUG_ON(i >= JBD_MAX_SLABS); + + /* + * Check if we already have a slab created for this size + */ + if (jbd_slab[i]) + return 0; + + /* + * Create a slab and force alignment to be same as slabsize - + * this will make sure that allocations won't cross the page + * boundary. + */ + jbd_slab[i] = kmem_cache_create(jbd_slab_names[i], + slab_size, slab_size, 0, NULL, NULL); + if (!jbd_slab[i]) { + printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n"); + return -ENOMEM; + } + return 0; +} + +void * jbd_slab_alloc(size_t size, gfp_t flags) +{ + int idx; + + idx = JBD_SLAB_INDEX(size); + BUG_ON(jbd_slab[idx] == NULL); + return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL); +} + +void jbd_slab_free(void *ptr, size_t size) +{ + int idx; + + idx = JBD_SLAB_INDEX(size); + BUG_ON(jbd_slab[idx] == NULL); + kmem_cache_free(jbd_slab[idx], ptr); +} + +/* * Journal_head storage management */ static kmem_cache_t *journal_head_cache; @@ -1799,13 +1878,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh) printk(KERN_WARNING "%s: freeing " "b_frozen_data\n", __FUNCTION__); - kfree(jh->b_frozen_data); + jbd_slab_free(jh->b_frozen_data, bh->b_size); } if (jh->b_committed_data) { printk(KERN_WARNING "%s: freeing " "b_committed_data\n", __FUNCTION__); - kfree(jh->b_committed_data); + jbd_slab_free(jh->b_committed_data, bh->b_size); } bh->b_private = NULL; jh->b_bh = NULL; /* debug, really */ @@ -1961,6 +2040,7 @@ static void journal_destroy_caches(void) journal_destroy_revoke_caches(); journal_destroy_journal_head_cache(); journal_destroy_handle_cache(); + journal_destroy_jbd_slabs(); } static int __init journal_init(void) diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 508b2ea91f4..de2e4cbbf79 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -666,8 +666,9 @@ repeat: if (!frozen_buffer) { JBUFFER_TRACE(jh, "allocate memory for buffer"); jbd_unlock_bh_state(bh); - frozen_buffer = jbd_kmalloc(jh2bh(jh)->b_size, - GFP_NOFS); + frozen_buffer = + jbd_slab_alloc(jh2bh(jh)->b_size, + GFP_NOFS); if (!frozen_buffer) { printk(KERN_EMERG "%s: OOM for frozen_buffer\n", @@ -879,7 +880,7 @@ int journal_get_undo_access(handle_t *handle, struct buffer_head *bh) repeat: if (!jh->b_committed_data) { - committed_data = jbd_kmalloc(jh2bh(jh)->b_size, GFP_NOFS); + committed_data = jbd_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS); if (!committed_data) { printk(KERN_EMERG "%s: No memory for committed data\n", __FUNCTION__); @@ -906,7 +907,7 @@ repeat: out: journal_put_journal_head(jh); if (unlikely(committed_data)) - kfree(committed_data); + jbd_slab_free(committed_data, bh->b_size); return err; } diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 43e3f566aad..a223cf4faa9 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -168,16 +168,15 @@ void jfs_dirty_inode(struct inode *inode) set_cflag(COMMIT_Dirty, inode); } -static int -jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks, - struct buffer_head *bh_result, int create) +int jfs_get_block(struct inode *ip, sector_t lblock, + struct buffer_head *bh_result, int create) { s64 lblock64 = lblock; int rc = 0; xad_t xad; s64 xaddr; int xflag; - s32 xlen = max_blocks; + s32 xlen = bh_result->b_size >> ip->i_blkbits; /* * Take appropriate lock on inode @@ -188,7 +187,7 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks, IREAD_LOCK(ip); if (((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size) && - (!xtLookup(ip, lblock64, max_blocks, &xflag, &xaddr, &xlen, 0)) && + (!xtLookup(ip, lblock64, xlen, &xflag, &xaddr, &xlen, 0)) && xaddr) { if (xflag & XAD_NOTRECORDED) { if (!create) @@ -255,13 +254,6 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks, return rc; } -static int jfs_get_block(struct inode *ip, sector_t lblock, - struct buffer_head *bh_result, int create) -{ - return jfs_get_blocks(ip, lblock, bh_result->b_size >> ip->i_blkbits, - bh_result, create); -} - static int jfs_writepage(struct page *page, struct writeback_control *wbc) { return nobh_writepage(page, jfs_get_block, wbc); diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index b5c7da6190d..1fc48df670c 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h @@ -32,6 +32,7 @@ extern void jfs_truncate_nolock(struct inode *, loff_t); extern void jfs_free_zero_link(struct inode *); extern struct dentry *jfs_get_parent(struct dentry *dentry); extern void jfs_set_inode_flags(struct inode *); +extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int); extern const struct address_space_operations jfs_aops; extern struct inode_operations jfs_dir_inode_operations; diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 4f6cfebc82d..143bcd1d5ea 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -26,6 +26,7 @@ #include <linux/moduleparam.h> #include <linux/kthread.h> #include <linux/posix_acl.h> +#include <linux/buffer_head.h> #include <asm/uaccess.h> #include <linux/seq_file.h> @@ -298,7 +299,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, break; } -#if defined(CONFIG_QUOTA) +#ifdef CONFIG_QUOTA case Opt_quota: case Opt_usrquota: *flag |= JFS_USRQUOTA; @@ -597,7 +598,7 @@ static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs) if (sbi->flag & JFS_NOINTEGRITY) seq_puts(seq, ",nointegrity"); -#if defined(CONFIG_QUOTA) +#ifdef CONFIG_QUOTA if (sbi->flag & JFS_USRQUOTA) seq_puts(seq, ",usrquota"); @@ -608,6 +609,113 @@ static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs) return 0; } +#ifdef CONFIG_QUOTA + +/* Read data from quotafile - avoid pagecache and such because we cannot afford + * acquiring the locks... As quota files are never truncated and quota code + * itself serializes the operations (and noone else should touch the files) + * we don't have to be afraid of races */ +static ssize_t jfs_quota_read(struct super_block *sb, int type, char *data, + size_t len, loff_t off) +{ + struct inode *inode = sb_dqopt(sb)->files[type]; + sector_t blk = off >> sb->s_blocksize_bits; + int err = 0; + int offset = off & (sb->s_blocksize - 1); + int tocopy; + size_t toread; + struct buffer_head tmp_bh; + struct buffer_head *bh; + loff_t i_size = i_size_read(inode); + + if (off > i_size) + return 0; + if (off+len > i_size) + len = i_size-off; + toread = len; + while (toread > 0) { + tocopy = sb->s_blocksize - offset < toread ? + sb->s_blocksize - offset : toread; + + tmp_bh.b_state = 0; + tmp_bh.b_size = 1 << inode->i_blkbits; + err = jfs_get_block(inode, blk, &tmp_bh, 0); + if (err) + return err; + if (!buffer_mapped(&tmp_bh)) /* A hole? */ + memset(data, 0, tocopy); + else { + bh = sb_bread(sb, tmp_bh.b_blocknr); + if (!bh) + return -EIO; + memcpy(data, bh->b_data+offset, tocopy); + brelse(bh); + } + offset = 0; + toread -= tocopy; + data += tocopy; + blk++; + } + return len; +} + +/* Write to quotafile */ +static ssize_t jfs_quota_write(struct super_block *sb, int type, + const char *data, size_t len, loff_t off) +{ + struct inode *inode = sb_dqopt(sb)->files[type]; + sector_t blk = off >> sb->s_blocksize_bits; + int err = 0; + int offset = off & (sb->s_blocksize - 1); + int tocopy; + size_t towrite = len; + struct buffer_head tmp_bh; + struct buffer_head *bh; + + mutex_lock(&inode->i_mutex); + while (towrite > 0) { + tocopy = sb->s_blocksize - offset < towrite ? + sb->s_blocksize - offset : towrite; + + tmp_bh.b_state = 0; + tmp_bh.b_size = 1 << inode->i_blkbits; + err = jfs_get_block(inode, blk, &tmp_bh, 1); + if (err) + goto out; + if (offset || tocopy != sb->s_blocksize) + bh = sb_bread(sb, tmp_bh.b_blocknr); + else + bh = sb_getblk(sb, tmp_bh.b_blocknr); + if (!bh) { + err = -EIO; + goto out; + } + lock_buffer(bh); + memcpy(bh->b_data+offset, data, tocopy); + flush_dcache_page(bh->b_page); + set_buffer_uptodate(bh); + mark_buffer_dirty(bh); + unlock_buffer(bh); + brelse(bh); + offset = 0; + towrite -= tocopy; + data += tocopy; + blk++; + } +out: + if (len == towrite) + return err; + if (inode->i_size < off+len-towrite) + i_size_write(inode, off+len-towrite); + inode->i_version++; + inode->i_mtime = inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); + mutex_unlock(&inode->i_mutex); + return len - towrite; +} + +#endif + static struct super_operations jfs_super_operations = { .alloc_inode = jfs_alloc_inode, .destroy_inode = jfs_destroy_inode, @@ -621,7 +729,11 @@ static struct super_operations jfs_super_operations = { .unlockfs = jfs_unlockfs, .statfs = jfs_statfs, .remount_fs = jfs_remount, - .show_options = jfs_show_options + .show_options = jfs_show_options, +#ifdef CONFIG_QUOTA + .quota_read = jfs_quota_read, + .quota_write = jfs_quota_write, +#endif }; static struct export_operations jfs_export_operations = { diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 2a4df9b3779..01b4db9e546 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -237,19 +237,22 @@ static int nlm_traverse_files(struct nlm_host *host, int action) { struct nlm_file *file, **fp; - int i; + int i, ret = 0; mutex_lock(&nlm_file_mutex); for (i = 0; i < FILE_NRHASH; i++) { fp = nlm_files + i; while ((file = *fp) != NULL) { + file->f_count++; + mutex_unlock(&nlm_file_mutex); + /* Traverse locks, blocks and shares of this file * and update file->f_locks count */ - if (nlm_inspect_file(host, file, action)) { - mutex_unlock(&nlm_file_mutex); - return 1; - } + if (nlm_inspect_file(host, file, action)) + ret = 1; + mutex_lock(&nlm_file_mutex); + file->f_count--; /* No more references to this file. Let go of it. */ if (!file->f_blocks && !file->f_locks && !file->f_shares && !file->f_count) { @@ -262,7 +265,7 @@ nlm_traverse_files(struct nlm_host *host, int action) } } mutex_unlock(&nlm_file_mutex); - return 0; + return ret; } /* diff --git a/fs/locks.c b/fs/locks.c index b0b41a64e10..d7c53392cac 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1421,8 +1421,9 @@ static int __setlease(struct file *filp, long arg, struct file_lock **flp) if (!leases_enable) goto out; - error = lease_alloc(filp, arg, &fl); - if (error) + error = -ENOMEM; + fl = locks_alloc_lock(); + if (fl == NULL) goto out; locks_copy_lock(fl, lease); @@ -1430,6 +1431,7 @@ static int __setlease(struct file *filp, long arg, struct file_lock **flp) locks_insert_lock(before, fl); *flp = fl; + error = 0; out: return error; } diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 9ea91c5eeb7..330ff9fc7cf 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -204,6 +204,8 @@ static int minix_fill_super(struct super_block *s, void *data, int silent) /* * Allocate the buffer map to keep the superblock small. */ + if (sbi->s_imap_blocks == 0 || sbi->s_zmap_blocks == 0) + goto out_illegal_sb; i = (sbi->s_imap_blocks + sbi->s_zmap_blocks) * sizeof(bh); map = kmalloc(i, GFP_KERNEL); if (!map) @@ -263,7 +265,7 @@ out_no_root: out_no_bitmap: printk("MINIX-fs: bad superblock or unable to read bitmaps\n"); - out_freemap: +out_freemap: for (i = 0; i < sbi->s_imap_blocks; i++) brelse(sbi->s_imap[i]); for (i = 0; i < sbi->s_zmap_blocks; i++) @@ -276,11 +278,16 @@ out_no_map: printk("MINIX-fs: can't allocate map\n"); goto out_release; +out_illegal_sb: + if (!silent) + printk("MINIX-fs: bad superblock\n"); + goto out_release; + out_no_fs: if (!silent) printk("VFS: Can't find a Minix or Minix V2 filesystem " "on device %s\n", s->s_id); - out_release: +out_release: brelse(bh); goto out; @@ -290,7 +297,7 @@ out_bad_hblock: out_bad_sb: printk("MINIX-fs: unable to read superblock\n"); - out: +out: s->s_fs_info = NULL; kfree(sbi); return -EINVAL; diff --git a/fs/namei.c b/fs/namei.c index 55a131230f9..432d6bc6fab 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -227,10 +227,10 @@ int generic_permission(struct inode *inode, int mask, int permission(struct inode *inode, int mask, struct nameidata *nd) { + umode_t mode = inode->i_mode; int retval, submask; if (mask & MAY_WRITE) { - umode_t mode = inode->i_mode; /* * Nobody gets write access to a read-only fs. @@ -247,6 +247,13 @@ int permission(struct inode *inode, int mask, struct nameidata *nd) } + /* + * MAY_EXEC on regular files requires special handling: We override + * filesystem execute permissions if the mode bits aren't set. + */ + if ((mask & MAY_EXEC) && S_ISREG(mode) && !(mode & S_IXUGO)) + return -EACCES; + /* Ordinary permission routines do not understand MAY_APPEND. */ submask = mask & ~MAY_APPEND; if (inode->i_op && inode->i_op->permission) @@ -1767,6 +1774,8 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir) if (nd->last_type != LAST_NORM) goto fail; nd->flags &= ~LOOKUP_PARENT; + nd->flags |= LOOKUP_CREATE; + nd->intent.open.flags = O_EXCL; /* * Do the final lookup. diff --git a/fs/nfs/file.c b/fs/nfs/file.c index cc2b874ad5a..48e892880d5 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -312,7 +312,13 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset) static int nfs_release_page(struct page *page, gfp_t gfp) { - return !nfs_wb_page(page->mapping->host, page); + if (gfp & __GFP_FS) + return !nfs_wb_page(page->mapping->host, page); + else + /* + * Avoid deadlock on nfs_wait_on_request(). + */ + return 0; } const struct address_space_operations nfs_file_aops = { diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index b81e7ed3c90..07a5dd57646 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -130,9 +130,7 @@ nfs_idmap_delete(struct nfs4_client *clp) if (!idmap) return; - dput(idmap->idmap_dentry); - idmap->idmap_dentry = NULL; - rpc_unlink(idmap->idmap_path); + rpc_unlink(idmap->idmap_dentry); clp->cl_idmap = NULL; kfree(idmap); } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e6ee97f19d8..153898e1331 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2668,7 +2668,7 @@ out: nfs4_set_cached_acl(inode, acl); } -static inline ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) +static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) { struct page *pages[NFS4ACL_MAXPAGES]; struct nfs_getaclargs args = { @@ -2721,6 +2721,19 @@ out_free: return ret; } +static ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) +{ + struct nfs4_exception exception = { }; + ssize_t ret; + do { + ret = __nfs4_get_acl_uncached(inode, buf, buflen); + if (ret >= 0) + break; + ret = nfs4_handle_exception(NFS_SERVER(inode), ret, &exception); + } while (exception.retry); + return ret; +} + static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) { struct nfs_server *server = NFS_SERVER(inode); @@ -2737,7 +2750,7 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) return nfs4_get_acl_uncached(inode, buf, buflen); } -static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen) +static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen) { struct nfs_server *server = NFS_SERVER(inode); struct page *pages[NFS4ACL_MAXPAGES]; @@ -2763,6 +2776,18 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen return ret; } +static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(inode), + __nfs4_proc_set_acl(inode, buf, buflen), + &exception); + } while (exception.retry); + return err; +} + static int nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server) { diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 1750d996f49..730ec8fb31c 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3355,7 +3355,7 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n struct kvec *iov = rcvbuf->head; unsigned int nr, pglen = rcvbuf->page_len; uint32_t *end, *entry, *p, *kaddr; - uint32_t len, attrlen; + uint32_t len, attrlen, xlen; int hdrlen, recvd, status; status = decode_op_hdr(xdr, OP_READDIR); @@ -3377,10 +3377,10 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n BUG_ON(pglen + readdir->pgbase > PAGE_CACHE_SIZE); kaddr = p = (uint32_t *) kmap_atomic(page, KM_USER0); - end = (uint32_t *) ((char *)p + pglen + readdir->pgbase); + end = p + ((pglen + readdir->pgbase) >> 2); entry = p; for (nr = 0; *p++; nr++) { - if (p + 3 > end) + if (end - p < 3) goto short_pkt; dprintk("cookie = %Lu, ", *((unsigned long long *)p)); p += 2; /* cookie */ @@ -3389,18 +3389,19 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)\n", len); goto err_unmap; } - dprintk("filename = %*s\n", len, (char *)p); - p += XDR_QUADLEN(len); - if (p + 1 > end) + xlen = XDR_QUADLEN(len); + if (end - p < xlen + 1) goto short_pkt; + dprintk("filename = %*s\n", len, (char *)p); + p += xlen; len = ntohl(*p++); /* bitmap length */ - p += len; - if (p + 1 > end) + if (end - p < len + 1) goto short_pkt; + p += len; attrlen = XDR_QUADLEN(ntohl(*p++)); - p += attrlen; /* attributes */ - if (p + 2 > end) + if (end - p < attrlen + 2) goto short_pkt; + p += attrlen; /* attributes */ entry = p; } if (!nr && (entry[0] != 0 || entry[1] == 0)) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 65c0c5b3235..da9cf11c326 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -116,10 +116,17 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; base &= ~PAGE_CACHE_MASK; pglen = PAGE_CACHE_SIZE - base; - if (pglen < remainder) + for (;;) { + if (remainder <= pglen) { + memclear_highpage_flush(*pages, base, remainder); + break; + } memclear_highpage_flush(*pages, base, pglen); - else - memclear_highpage_flush(*pages, base, remainder); + pages++; + remainder -= pglen; + pglen = PAGE_CACHE_SIZE; + base = 0; + } } /* @@ -476,6 +483,8 @@ static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data) unsigned int base = data->args.pgbase; struct page **pages; + if (data->res.eof) + count = data->args.count; if (unlikely(count == 0)) return; pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; @@ -483,11 +492,7 @@ static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data) count += base; for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) SetPageUptodate(*pages); - /* - * Was this an eof or a short read? If the latter, don't mark the page - * as uptodate yet. - */ - if (count > 0 && (data->res.eof || data->args.count == data->res.count)) + if (count != 0) SetPageUptodate(*pages); } @@ -502,6 +507,8 @@ static void nfs_readpage_set_pages_error(struct nfs_read_data *data) count += base; for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) SetPageError(*pages); + if (count != 0) + SetPageError(*pages); } /* diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 1b8346dd057..9503240ef0e 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -2375,7 +2375,6 @@ leave: mlog(0, "returning %d\n", ret); return ret; } -EXPORT_SYMBOL_GPL(dlm_migrate_lockres); int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock) { diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c index b0c3134f4f7..37be4b2e0d4 100644 --- a/fs/ocfs2/dlm/dlmunlock.c +++ b/fs/ocfs2/dlm/dlmunlock.c @@ -155,7 +155,7 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm, else status = dlm_get_unlock_actions(dlm, res, lock, lksb, &actions); - if (status != DLM_NORMAL) + if (status != DLM_NORMAL && (status != DLM_CANCELGRANT || !master_node)) goto leave; /* By now this has been masked out of cancel requests. */ @@ -183,8 +183,7 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm, spin_lock(&lock->spinlock); /* if the master told us the lock was already granted, * let the ast handle all of these actions */ - if (status == DLM_NORMAL && - lksb->status == DLM_CANCELGRANT) { + if (status == DLM_CANCELGRANT) { actions &= ~(DLM_UNLOCK_REMOVE_LOCK| DLM_UNLOCK_REGRANT_LOCK| DLM_UNLOCK_CLEAR_CONVERT_TYPE); @@ -349,14 +348,9 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm, vec, veclen, owner, &status); if (tmpret >= 0) { // successfully sent and received - if (status == DLM_CANCELGRANT) - ret = DLM_NORMAL; - else if (status == DLM_FORWARD) { + if (status == DLM_FORWARD) mlog(0, "master was in-progress. retry\n"); - ret = DLM_FORWARD; - } else - ret = status; - lksb->status = status; + ret = status; } else { mlog_errno(tmpret); if (dlm_is_host_down(tmpret)) { @@ -372,7 +366,6 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm, /* something bad. this will BUG in ocfs2 */ ret = dlm_err_to_dlm_status(tmpret); } - lksb->status = ret; } return ret; @@ -483,6 +476,10 @@ int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data) /* lock was found on queue */ lksb = lock->lksb; + if (flags & (LKM_VALBLK|LKM_PUT_LVB) && + lock->ml.type != LKM_EXMODE) + flags &= ~(LKM_VALBLK|LKM_PUT_LVB); + /* unlockast only called on originating node */ if (flags & LKM_PUT_LVB) { lksb->flags |= DLM_LKSB_PUT_LVB; @@ -507,11 +504,8 @@ not_found: "cookie=%u:%llu\n", dlm_get_lock_cookie_node(unlock->cookie), dlm_get_lock_cookie_seq(unlock->cookie)); - else { - /* send the lksb->status back to the other node */ - status = lksb->status; + else dlm_lock_put(lock); - } leave: if (res) @@ -533,26 +527,22 @@ static enum dlm_status dlm_get_cancel_actions(struct dlm_ctxt *dlm, if (dlm_lock_on_list(&res->blocked, lock)) { /* cancel this outright */ - lksb->status = DLM_NORMAL; status = DLM_NORMAL; *actions = (DLM_UNLOCK_CALL_AST | DLM_UNLOCK_REMOVE_LOCK); } else if (dlm_lock_on_list(&res->converting, lock)) { /* cancel the request, put back on granted */ - lksb->status = DLM_NORMAL; status = DLM_NORMAL; *actions = (DLM_UNLOCK_CALL_AST | DLM_UNLOCK_REMOVE_LOCK | DLM_UNLOCK_REGRANT_LOCK | DLM_UNLOCK_CLEAR_CONVERT_TYPE); } else if (dlm_lock_on_list(&res->granted, lock)) { - /* too late, already granted. DLM_CANCELGRANT */ - lksb->status = DLM_CANCELGRANT; - status = DLM_NORMAL; + /* too late, already granted. */ + status = DLM_CANCELGRANT; *actions = DLM_UNLOCK_CALL_AST; } else { mlog(ML_ERROR, "lock to cancel is not on any list!\n"); - lksb->status = DLM_IVLOCKID; status = DLM_IVLOCKID; *actions = 0; } @@ -569,13 +559,11 @@ static enum dlm_status dlm_get_unlock_actions(struct dlm_ctxt *dlm, /* unlock request */ if (!dlm_lock_on_list(&res->granted, lock)) { - lksb->status = DLM_DENIED; status = DLM_DENIED; dlm_error(status); *actions = 0; } else { /* unlock granted lock */ - lksb->status = DLM_NORMAL; status = DLM_NORMAL; *actions = (DLM_UNLOCK_FREE_LOCK | DLM_UNLOCK_CALL_AST | @@ -632,6 +620,8 @@ retry: spin_lock(&res->spinlock); is_master = (res->owner == dlm->node_num); + if (flags & LKM_VALBLK && lock->ml.type != LKM_EXMODE) + flags &= ~LKM_VALBLK; spin_unlock(&res->spinlock); if (is_master) { @@ -665,7 +655,7 @@ retry: } if (call_ast) { - mlog(0, "calling unlockast(%p, %d)\n", data, lksb->status); + mlog(0, "calling unlockast(%p, %d)\n", data, status); if (is_master) { /* it is possible that there is one last bast * pending. make sure it is flushed, then @@ -677,9 +667,12 @@ retry: wait_event(dlm->ast_wq, dlm_lock_basts_flushed(dlm, lock)); } - (*unlockast)(data, lksb->status); + (*unlockast)(data, status); } + if (status == DLM_CANCELGRANT) + status = DLM_NORMAL; + if (status == DLM_NORMAL) { mlog(0, "kicking the thread\n"); dlm_kick_thread(dlm, res); diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 0d1973ea32b..1f17a4d0828 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -840,6 +840,12 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, mlog(0, "Allocating %u clusters for a new window.\n", ocfs2_local_alloc_window_bits(osb)); + + /* Instruct the allocation code to try the most recently used + * cluster group. We'll re-record the group used this pass + * below. */ + ac->ac_last_group = osb->la_last_gd; + /* we used the generic suballoc reserve function, but we set * everything up nicely, so there's no reason why we can't use * the more specific cluster api to claim bits. */ @@ -852,6 +858,8 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, goto bail; } + osb->la_last_gd = ac->ac_last_group; + la->la_bm_off = cpu_to_le32(cluster_off); alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count); /* just in case... In the future when we find space ourselves, diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index cd4a6f253d1..0462a7f4e21 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -197,7 +197,6 @@ struct ocfs2_super struct ocfs2_node_map recovery_map; struct ocfs2_node_map umount_map; - u32 num_clusters; u64 root_blkno; u64 system_dir_blkno; u64 bitmap_blkno; @@ -237,6 +236,7 @@ struct ocfs2_super enum ocfs2_local_alloc_state local_alloc_state; struct buffer_head *local_alloc_bh; + u64 la_last_gd; /* Next two fields are for local node slot recovery during * mount. */ diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 195523090c8..9d91e66f51a 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -70,12 +70,6 @@ static int ocfs2_block_group_search(struct inode *inode, struct buffer_head *group_bh, u32 bits_wanted, u32 min_bits, u16 *bit_off, u16 *bits_found); -static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, - u32 bits_wanted, - u32 min_bits, - u16 *bit_off, - unsigned int *num_bits, - u64 *bg_blkno); static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, struct ocfs2_alloc_context *ac, u32 bits_wanted, @@ -85,11 +79,6 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, u64 *bg_blkno); static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, int nr); -static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, - struct buffer_head *bg_bh, - unsigned int bits_wanted, - u16 *bit_off, - u16 *bits_found); static inline int ocfs2_block_group_set_bits(struct ocfs2_journal_handle *handle, struct inode *alloc_inode, struct ocfs2_group_desc *bg, @@ -143,6 +132,64 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl) return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc); } +/* somewhat more expensive than our other checks, so use sparingly. */ +static int ocfs2_check_group_descriptor(struct super_block *sb, + struct ocfs2_dinode *di, + struct ocfs2_group_desc *gd) +{ + unsigned int max_bits; + + if (!OCFS2_IS_VALID_GROUP_DESC(gd)) { + OCFS2_RO_ON_INVALID_GROUP_DESC(sb, gd); + return -EIO; + } + + if (di->i_blkno != gd->bg_parent_dinode) { + ocfs2_error(sb, "Group descriptor # %llu has bad parent " + "pointer (%llu, expected %llu)", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + (unsigned long long)le64_to_cpu(gd->bg_parent_dinode), + (unsigned long long)le64_to_cpu(di->i_blkno)); + return -EIO; + } + + max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc); + if (le16_to_cpu(gd->bg_bits) > max_bits) { + ocfs2_error(sb, "Group descriptor # %llu has bit count of %u", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + le16_to_cpu(gd->bg_bits)); + return -EIO; + } + + if (le16_to_cpu(gd->bg_chain) >= + le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) { + ocfs2_error(sb, "Group descriptor # %llu has bad chain %u", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + le16_to_cpu(gd->bg_chain)); + return -EIO; + } + + if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) { + ocfs2_error(sb, "Group descriptor # %llu has bit count %u but " + "claims that %u are free", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + le16_to_cpu(gd->bg_bits), + le16_to_cpu(gd->bg_free_bits_count)); + return -EIO; + } + + if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) { + ocfs2_error(sb, "Group descriptor # %llu has bit count %u but " + "max bitmap bits of %u", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + le16_to_cpu(gd->bg_bits), + 8 * le16_to_cpu(gd->bg_size)); + return -EIO; + } + + return 0; +} + static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle, struct inode *alloc_inode, struct buffer_head *bg_bh, @@ -663,6 +710,7 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, struct buffer_head *bg_bh, unsigned int bits_wanted, + unsigned int total_bits, u16 *bit_off, u16 *bits_found) { @@ -679,10 +727,8 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, found = start = best_offset = best_size = 0; bitmap = bg->bg_bitmap; - while((offset = ocfs2_find_next_zero_bit(bitmap, - le16_to_cpu(bg->bg_bits), - start)) != -1) { - if (offset == le16_to_cpu(bg->bg_bits)) + while((offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start)) != -1) { + if (offset == total_bits) break; if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) { @@ -911,14 +957,35 @@ static int ocfs2_cluster_group_search(struct inode *inode, { int search = -ENOSPC; int ret; - struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data; + struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data; u16 tmp_off, tmp_found; + unsigned int max_bits, gd_cluster_off; BUG_ON(!ocfs2_is_cluster_bitmap(inode)); - if (bg->bg_free_bits_count) { + if (gd->bg_free_bits_count) { + max_bits = le16_to_cpu(gd->bg_bits); + + /* Tail groups in cluster bitmaps which aren't cpg + * aligned are prone to partial extention by a failed + * fs resize. If the file system resize never got to + * update the dinode cluster count, then we don't want + * to trust any clusters past it, regardless of what + * the group descriptor says. */ + gd_cluster_off = ocfs2_blocks_to_clusters(inode->i_sb, + le64_to_cpu(gd->bg_blkno)); + if ((gd_cluster_off + max_bits) > + OCFS2_I(inode)->ip_clusters) { + max_bits = OCFS2_I(inode)->ip_clusters - gd_cluster_off; + mlog(0, "Desc %llu, bg_bits %u, clusters %u, use %u\n", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + le16_to_cpu(gd->bg_bits), + OCFS2_I(inode)->ip_clusters, max_bits); + } + ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), group_bh, bits_wanted, + max_bits, &tmp_off, &tmp_found); if (ret) return ret; @@ -951,17 +1018,109 @@ static int ocfs2_block_group_search(struct inode *inode, if (bg->bg_free_bits_count) ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), group_bh, bits_wanted, + le16_to_cpu(bg->bg_bits), bit_off, bits_found); return ret; } +static int ocfs2_alloc_dinode_update_counts(struct inode *inode, + struct ocfs2_journal_handle *handle, + struct buffer_head *di_bh, + u32 num_bits, + u16 chain) +{ + int ret; + u32 tmp_used; + struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; + struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain; + + ret = ocfs2_journal_access(handle, inode, di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + + tmp_used = le32_to_cpu(di->id1.bitmap1.i_used); + di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used); + le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits); + + ret = ocfs2_journal_dirty(handle, di_bh); + if (ret < 0) + mlog_errno(ret); + +out: + return ret; +} + +static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, + u32 bits_wanted, + u32 min_bits, + u16 *bit_off, + unsigned int *num_bits, + u64 gd_blkno, + u16 *bits_left) +{ + int ret; + u16 found; + struct buffer_head *group_bh = NULL; + struct ocfs2_group_desc *gd; + struct inode *alloc_inode = ac->ac_inode; + struct ocfs2_journal_handle *handle = ac->ac_handle; + + ret = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), gd_blkno, + &group_bh, OCFS2_BH_CACHED, alloc_inode); + if (ret < 0) { + mlog_errno(ret); + return ret; + } + + gd = (struct ocfs2_group_desc *) group_bh->b_data; + if (!OCFS2_IS_VALID_GROUP_DESC(gd)) { + OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, gd); + ret = -EIO; + goto out; + } + + ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits, + bit_off, &found); + if (ret < 0) { + if (ret != -ENOSPC) + mlog_errno(ret); + goto out; + } + + *num_bits = found; + + ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh, + *num_bits, + le16_to_cpu(gd->bg_chain)); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh, + *bit_off, *num_bits); + if (ret < 0) + mlog_errno(ret); + + *bits_left = le16_to_cpu(gd->bg_free_bits_count); + +out: + brelse(group_bh); + + return ret; +} + static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, u32 bits_wanted, u32 min_bits, u16 *bit_off, unsigned int *num_bits, - u64 *bg_blkno) + u64 *bg_blkno, + u16 *bits_left) { int status; u16 chain, tmp_bits; @@ -988,9 +1147,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, goto bail; } bg = (struct ocfs2_group_desc *) group_bh->b_data; - if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); - status = -EIO; + status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg); + if (status) { + mlog_errno(status); goto bail; } @@ -1018,9 +1177,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, goto bail; } bg = (struct ocfs2_group_desc *) group_bh->b_data; - if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); - status = -EIO; + status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg); + if (status) { + mlog_errno(status); goto bail; } } @@ -1099,6 +1258,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, (unsigned long long)fe->i_blkno); *bg_blkno = le64_to_cpu(bg->bg_blkno); + *bits_left = le16_to_cpu(bg->bg_free_bits_count); bail: if (group_bh) brelse(group_bh); @@ -1120,6 +1280,8 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, { int status; u16 victim, i; + u16 bits_left = 0; + u64 hint_blkno = ac->ac_last_group; struct ocfs2_chain_list *cl; struct ocfs2_dinode *fe; @@ -1146,6 +1308,28 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, goto bail; } + if (hint_blkno) { + /* Attempt to short-circuit the usual search mechanism + * by jumping straight to the most recently used + * allocation group. This helps us mantain some + * contiguousness across allocations. */ + status = ocfs2_search_one_group(ac, bits_wanted, min_bits, + bit_off, num_bits, + hint_blkno, &bits_left); + if (!status) { + /* Be careful to update *bg_blkno here as the + * caller is expecting it to be filled in, and + * ocfs2_search_one_group() won't do that for + * us. */ + *bg_blkno = hint_blkno; + goto set_hint; + } + if (status < 0 && status != -ENOSPC) { + mlog_errno(status); + goto bail; + } + } + cl = (struct ocfs2_chain_list *) &fe->id2.i_chain; victim = ocfs2_find_victim_chain(cl); @@ -1153,9 +1337,9 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, ac->ac_allow_chain_relink = 1; status = ocfs2_search_chain(ac, bits_wanted, min_bits, bit_off, - num_bits, bg_blkno); + num_bits, bg_blkno, &bits_left); if (!status) - goto bail; + goto set_hint; if (status < 0 && status != -ENOSPC) { mlog_errno(status); goto bail; @@ -1177,8 +1361,8 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, ac->ac_chain = i; status = ocfs2_search_chain(ac, bits_wanted, min_bits, - bit_off, num_bits, - bg_blkno); + bit_off, num_bits, bg_blkno, + &bits_left); if (!status) break; if (status < 0 && status != -ENOSPC) { @@ -1186,8 +1370,19 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, goto bail; } } -bail: +set_hint: + if (status != -ENOSPC) { + /* If the next search of this group is not likely to + * yield a suitable extent, then we reset the last + * group hint so as to not waste a disk read */ + if (bits_left < min_bits) + ac->ac_last_group = 0; + else + ac->ac_last_group = *bg_blkno; + } + +bail: mlog_exit(status); return status; } @@ -1341,7 +1536,7 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb, { int status; unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given; - u64 bg_blkno; + u64 bg_blkno = 0; u16 bg_bit_off; mlog_entry_void(); @@ -1494,9 +1689,9 @@ static int ocfs2_free_suballoc_bits(struct ocfs2_journal_handle *handle, } group = (struct ocfs2_group_desc *) group_bh->b_data; - if (!OCFS2_IS_VALID_GROUP_DESC(group)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, group); - status = -EIO; + status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, group); + if (status) { + mlog_errno(status); goto bail; } BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits)); diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index a76c82a7cea..c787838d105 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h @@ -49,6 +49,8 @@ struct ocfs2_alloc_context { u16 ac_chain; int ac_allow_chain_relink; group_search_t *ac_group_search; + + u64 ac_last_group; }; void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac); diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 382706a67ff..d17e33e66a1 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1442,8 +1442,13 @@ static int ocfs2_initialize_super(struct super_block *sb, osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno; + /* We don't have a cluster lock on the bitmap here because + * we're only interested in static information and the extra + * complexity at mount time isn't worht it. Don't pass the + * inode in to the read function though as we don't want it to + * be put in the cache. */ status = ocfs2_read_block(osb, osb->bitmap_blkno, &bitmap_bh, 0, - inode); + NULL); iput(inode); if (status < 0) { mlog_errno(status); @@ -1452,7 +1457,6 @@ static int ocfs2_initialize_super(struct super_block *sb, di = (struct ocfs2_dinode *) bitmap_bh->b_data; osb->bitmap_cpg = le16_to_cpu(di->id2.i_chain.cl_cpg); - osb->num_clusters = le32_to_cpu(di->id1.bitmap1.i_total); brelse(bitmap_bh); mlog(0, "cluster bitmap inode: %llu, clusters per group: %u\n", (unsigned long long)osb->bitmap_blkno, osb->bitmap_cpg); diff --git a/fs/partitions/sun.c b/fs/partitions/sun.c index abe91ca03ed..0a5927c806c 100644 --- a/fs/partitions/sun.c +++ b/fs/partitions/sun.c @@ -74,7 +74,7 @@ int sun_partition(struct parsed_partitions *state, struct block_device *bdev) spc = be16_to_cpu(label->ntrks) * be16_to_cpu(label->nsect); for (i = 0; i < 8; i++, p++) { unsigned long st_sector; - int num_sectors; + unsigned int num_sectors; st_sector = be32_to_cpu(p->start_cylinder) * spc; num_sectors = be32_to_cpu(p->num_sectors); diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 9f2cfc30f9c..94215622544 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -169,7 +169,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off, "Mapped: %8lu kB\n" "Slab: %8lu kB\n" "PageTables: %8lu kB\n" - "NFS Unstable: %8lu kB\n" + "NFS_Unstable: %8lu kB\n" "Bounce: %8lu kB\n" "CommitLimit: %8lu kB\n" "Committed_AS: %8lu kB\n" diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 39fedaa88a0..d935fb9394e 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -424,7 +424,7 @@ int xattr_readdir(struct file *file, filldir_t filler, void *buf) int res = -ENOTDIR; if (!file->f_op || !file->f_op->readdir) goto out; - mutex_lock(&inode->i_mutex); + mutex_lock_nested(&inode->i_mutex, I_MUTEX_XATTR); // down(&inode->i_zombie); res = -ENOENT; if (!IS_DEADDIR(inode)) { diff --git a/fs/udf/super.c b/fs/udf/super.c index 4df822c881b..fcce1a21a51 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -115,6 +115,13 @@ static struct inode *udf_alloc_inode(struct super_block *sb) ei = (struct udf_inode_info *)kmem_cache_alloc(udf_inode_cachep, SLAB_KERNEL); if (!ei) return NULL; + + ei->i_unique = 0; + ei->i_lenExtents = 0; + ei->i_next_alloc_block = 0; + ei->i_next_alloc_goal = 0; + ei->i_strat4096 = 0; + return &ei->vfs_inode; } @@ -1652,7 +1659,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) iput(inode); goto error_out; } - sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_maxbytes = 1<<30; return 0; error_out: diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c index e1b0e8cfecb..0abd66ce36e 100644 --- a/fs/udf/truncate.c +++ b/fs/udf/truncate.c @@ -239,37 +239,51 @@ void udf_truncate_extents(struct inode * inode) { if (offset) { - extoffset -= adsize; - etype = udf_next_aext(inode, &bloc, &extoffset, &eloc, &elen, &bh, 1); - if (etype == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30)) - { - extoffset -= adsize; - elen = EXT_NOT_RECORDED_NOT_ALLOCATED | (elen + offset); - udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 0); + /* + * OK, there is not extent covering inode->i_size and + * no extent above inode->i_size => truncate is + * extending the file by 'offset'. + */ + if ((!bh && extoffset == udf_file_entry_alloc_offset(inode)) || + (bh && extoffset == sizeof(struct allocExtDesc))) { + /* File has no extents at all! */ + memset(&eloc, 0x00, sizeof(kernel_lb_addr)); + elen = EXT_NOT_RECORDED_NOT_ALLOCATED | offset; + udf_add_aext(inode, &bloc, &extoffset, eloc, elen, &bh, 1); } - else if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) - { - kernel_lb_addr neloc = { 0, 0 }; + else { extoffset -= adsize; - nelen = EXT_NOT_RECORDED_NOT_ALLOCATED | - ((elen + offset + inode->i_sb->s_blocksize - 1) & - ~(inode->i_sb->s_blocksize - 1)); - udf_write_aext(inode, bloc, &extoffset, neloc, nelen, bh, 1); - udf_add_aext(inode, &bloc, &extoffset, eloc, (etype << 30) | elen, &bh, 1); - } - else - { - if (elen & (inode->i_sb->s_blocksize - 1)) + etype = udf_next_aext(inode, &bloc, &extoffset, &eloc, &elen, &bh, 1); + if (etype == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30)) + { + extoffset -= adsize; + elen = EXT_NOT_RECORDED_NOT_ALLOCATED | (elen + offset); + udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 0); + } + else if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) { + kernel_lb_addr neloc = { 0, 0 }; extoffset -= adsize; - elen = EXT_RECORDED_ALLOCATED | - ((elen + inode->i_sb->s_blocksize - 1) & + nelen = EXT_NOT_RECORDED_NOT_ALLOCATED | + ((elen + offset + inode->i_sb->s_blocksize - 1) & ~(inode->i_sb->s_blocksize - 1)); - udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 1); + udf_write_aext(inode, bloc, &extoffset, neloc, nelen, bh, 1); + udf_add_aext(inode, &bloc, &extoffset, eloc, (etype << 30) | elen, &bh, 1); + } + else + { + if (elen & (inode->i_sb->s_blocksize - 1)) + { + extoffset -= adsize; + elen = EXT_RECORDED_ALLOCATED | + ((elen + inode->i_sb->s_blocksize - 1) & + ~(inode->i_sb->s_blocksize - 1)); + udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 1); + } + memset(&eloc, 0x00, sizeof(kernel_lb_addr)); + elen = EXT_NOT_RECORDED_NOT_ALLOCATED | offset; + udf_add_aext(inode, &bloc, &extoffset, eloc, elen, &bh, 1); } - memset(&eloc, 0x00, sizeof(kernel_lb_addr)); - elen = EXT_NOT_RECORDED_NOT_ALLOCATED | offset; - udf_add_aext(inode, &bloc, &extoffset, eloc, elen, &bh, 1); } } } diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index e7c8615beb6..30c6e8a9446 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -169,18 +169,20 @@ static void ufs_clear_frag(struct inode *inode, struct buffer_head *bh) static struct buffer_head * ufs_clear_frags(struct inode *inode, sector_t beg, - unsigned int n) + unsigned int n, sector_t want) { - struct buffer_head *res, *bh; + struct buffer_head *res = NULL, *bh; sector_t end = beg + n; - res = sb_getblk(inode->i_sb, beg); - ufs_clear_frag(inode, res); - for (++beg; beg < end; ++beg) { + for (; beg < end; ++beg) { bh = sb_getblk(inode->i_sb, beg); ufs_clear_frag(inode, bh); - brelse(bh); + if (want != beg) + brelse(bh); + else + res = bh; } + BUG_ON(!res); return res; } @@ -265,7 +267,9 @@ repeat: lastfrag = ufsi->i_lastfrag; } - goal = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock]) + uspi->s_fpb; + tmp = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock]); + if (tmp) + goal = tmp + uspi->s_fpb; tmp = ufs_new_fragments (inode, p, fragment - blockoff, goal, required + blockoff, err, locked_page); @@ -277,13 +281,15 @@ repeat: tmp = ufs_new_fragments(inode, p, fragment - (blockoff - lastblockoff), fs32_to_cpu(sb, *p), required + (blockoff - lastblockoff), err, locked_page); - } + } else /* (lastblock > block) */ { /* * We will allocate new block before last allocated block */ - else /* (lastblock > block) */ { - if (lastblock && (tmp = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock-1]))) - goal = tmp + uspi->s_fpb; + if (block) { + tmp = fs32_to_cpu(sb, ufsi->i_u1.i_data[block-1]); + if (tmp) + goal = tmp + uspi->s_fpb; + } tmp = ufs_new_fragments(inode, p, fragment - blockoff, goal, uspi->s_fpb, err, locked_page); } @@ -296,7 +302,7 @@ repeat: } if (!phys) { - result = ufs_clear_frags(inode, tmp + blockoff, required); + result = ufs_clear_frags(inode, tmp, required, tmp + blockoff); } else { *phys = tmp + blockoff; result = NULL; @@ -383,7 +389,7 @@ repeat: } } - if (block && (tmp = fs32_to_cpu(sb, ((__fs32*)bh->b_data)[block-1]) + uspi->s_fpb)) + if (block && (tmp = fs32_to_cpu(sb, ((__fs32*)bh->b_data)[block-1]))) goal = tmp + uspi->s_fpb; else goal = bh->b_blocknr + uspi->s_fpb; @@ -397,7 +403,8 @@ repeat: if (!phys) { - result = ufs_clear_frags(inode, tmp + blockoff, uspi->s_fpb); + result = ufs_clear_frags(inode, tmp, uspi->s_fpb, + tmp + blockoff); } else { *phys = tmp + blockoff; *new = 1; diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index c9b55872079..ea11d04c41a 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c @@ -375,17 +375,15 @@ static int ufs_alloc_lastblock(struct inode *inode) int err = 0; struct address_space *mapping = inode->i_mapping; struct ufs_sb_private_info *uspi = UFS_SB(inode->i_sb)->s_uspi; - struct ufs_inode_info *ufsi = UFS_I(inode); unsigned lastfrag, i, end; struct page *lastpage; struct buffer_head *bh; lastfrag = (i_size_read(inode) + uspi->s_fsize - 1) >> uspi->s_fshift; - if (!lastfrag) { - ufsi->i_lastfrag = 0; + if (!lastfrag) goto out; - } + lastfrag--; lastpage = ufs_get_locked_page(mapping, lastfrag >> @@ -400,25 +398,25 @@ static int ufs_alloc_lastblock(struct inode *inode) for (i = 0; i < end; ++i) bh = bh->b_this_page; - if (!buffer_mapped(bh)) { - err = ufs_getfrag_block(inode, lastfrag, bh, 1); - - if (unlikely(err)) - goto out_unlock; - - if (buffer_new(bh)) { - clear_buffer_new(bh); - unmap_underlying_metadata(bh->b_bdev, - bh->b_blocknr); - /* - * we do not zeroize fragment, because of - * if it maped to hole, it already contains zeroes - */ - set_buffer_uptodate(bh); - mark_buffer_dirty(bh); - set_page_dirty(lastpage); - } + + err = ufs_getfrag_block(inode, lastfrag, bh, 1); + + if (unlikely(err)) + goto out_unlock; + + if (buffer_new(bh)) { + clear_buffer_new(bh); + unmap_underlying_metadata(bh->b_bdev, + bh->b_blocknr); + /* + * we do not zeroize fragment, because of + * if it maped to hole, it already contains zeroes + */ + set_buffer_uptodate(bh); + mark_buffer_dirty(bh); + set_page_dirty(lastpage); } + out_unlock: ufs_put_locked_page(lastpage); out: @@ -440,23 +438,11 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size) if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) return -EPERM; - if (inode->i_size > old_i_size) { - /* - * if we expand file we should care about - * allocation of block for last byte first of all - */ - err = ufs_alloc_lastblock(inode); + err = ufs_alloc_lastblock(inode); - if (err) { - i_size_write(inode, old_i_size); - goto out; - } - /* - * go away, because of we expand file, and we do not - * need free blocks, and zeroizes page - */ - lock_kernel(); - goto almost_end; + if (err) { + i_size_write(inode, old_i_size); + goto out; } block_truncate_page(inode->i_mapping, inode->i_size, ufs_getfrag_block); @@ -477,21 +463,8 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size) yield(); } - if (inode->i_size < old_i_size) { - /* - * now we should have enough space - * to allocate block for last byte - */ - err = ufs_alloc_lastblock(inode); - if (err) - /* - * looks like all the same - we have no space, - * but we truncate file already - */ - inode->i_size = (ufsi->i_lastfrag - 1) * uspi->s_fsize; - } -almost_end: inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; + ufsi->i_lastfrag = DIRECT_FRAGMENT; unlock_kernel(); mark_inode_dirty(inode); out: diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index eef6763f3a6..d2bbcd882a6 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -1835,40 +1835,47 @@ xfs_alloc_fix_freelist( &agbp))) return error; if (!pag->pagf_init) { + ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); + ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); args->agbp = NULL; return 0; } } else agbp = NULL; - /* If this is a metadata preferred pag and we are user data + /* + * If this is a metadata preferred pag and we are user data * then try somewhere else if we are not being asked to * try harder at this point */ - if (pag->pagf_metadata && args->userdata && flags) { + if (pag->pagf_metadata && args->userdata && + (flags & XFS_ALLOC_FLAG_TRYLOCK)) { + ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); args->agbp = NULL; return 0; } - need = XFS_MIN_FREELIST_PAG(pag, mp); - delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0; - /* - * If it looks like there isn't a long enough extent, or enough - * total blocks, reject it. - */ - longest = (pag->pagf_longest > delta) ? - (pag->pagf_longest - delta) : - (pag->pagf_flcount > 0 || pag->pagf_longest > 0); - if (args->minlen + args->alignment + args->minalignslop - 1 > longest || - (!(flags & XFS_ALLOC_FLAG_FREEING) && - (int)(pag->pagf_freeblks + pag->pagf_flcount - - need - args->total) < - (int)args->minleft)) { - if (agbp) - xfs_trans_brelse(tp, agbp); - args->agbp = NULL; - return 0; + if (!(flags & XFS_ALLOC_FLAG_FREEING)) { + need = XFS_MIN_FREELIST_PAG(pag, mp); + delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0; + /* + * If it looks like there isn't a long enough extent, or enough + * total blocks, reject it. + */ + longest = (pag->pagf_longest > delta) ? + (pag->pagf_longest - delta) : + (pag->pagf_flcount > 0 || pag->pagf_longest > 0); + if ((args->minlen + args->alignment + args->minalignslop - 1) > + longest || + ((int)(pag->pagf_freeblks + pag->pagf_flcount - + need - args->total) < (int)args->minleft)) { + if (agbp) + xfs_trans_brelse(tp, agbp); + args->agbp = NULL; + return 0; + } } + /* * Get the a.g. freespace buffer. * Can fail if we're not blocking on locks, and it's held. @@ -1878,6 +1885,8 @@ xfs_alloc_fix_freelist( &agbp))) return error; if (agbp == NULL) { + ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); + ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); args->agbp = NULL; return 0; } @@ -1887,22 +1896,24 @@ xfs_alloc_fix_freelist( */ agf = XFS_BUF_TO_AGF(agbp); need = XFS_MIN_FREELIST(agf, mp); - delta = need > be32_to_cpu(agf->agf_flcount) ? - (need - be32_to_cpu(agf->agf_flcount)) : 0; /* * If there isn't enough total or single-extent, reject it. */ - longest = be32_to_cpu(agf->agf_longest); - longest = (longest > delta) ? (longest - delta) : - (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0); - if (args->minlen + args->alignment + args->minalignslop - 1 > longest || - (!(flags & XFS_ALLOC_FLAG_FREEING) && - (int)(be32_to_cpu(agf->agf_freeblks) + - be32_to_cpu(agf->agf_flcount) - need - args->total) < - (int)args->minleft)) { - xfs_trans_brelse(tp, agbp); - args->agbp = NULL; - return 0; + if (!(flags & XFS_ALLOC_FLAG_FREEING)) { + delta = need > be32_to_cpu(agf->agf_flcount) ? + (need - be32_to_cpu(agf->agf_flcount)) : 0; + longest = be32_to_cpu(agf->agf_longest); + longest = (longest > delta) ? (longest - delta) : + (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0); + if ((args->minlen + args->alignment + args->minalignslop - 1) > + longest || + ((int)(be32_to_cpu(agf->agf_freeblks) + + be32_to_cpu(agf->agf_flcount) - need - args->total) < + (int)args->minleft)) { + xfs_trans_brelse(tp, agbp); + args->agbp = NULL; + return 0; + } } /* * Make the freelist shorter if it's too long. @@ -1950,12 +1961,11 @@ xfs_alloc_fix_freelist( * on a completely full ag. */ if (targs.agbno == NULLAGBLOCK) { - if (!(flags & XFS_ALLOC_FLAG_FREEING)) { - xfs_trans_brelse(tp, agflbp); - args->agbp = NULL; - return 0; - } - break; + if (flags & XFS_ALLOC_FLAG_FREEING) + break; + xfs_trans_brelse(tp, agflbp); + args->agbp = NULL; + return 0; } /* * Put each allocated block on the list. @@ -2442,31 +2452,26 @@ xfs_free_extent( xfs_fsblock_t bno, /* starting block number of extent */ xfs_extlen_t len) /* length of extent */ { -#ifdef DEBUG - xfs_agf_t *agf; /* a.g. freespace header */ -#endif - xfs_alloc_arg_t args; /* allocation argument structure */ + xfs_alloc_arg_t args; int error; ASSERT(len != 0); + memset(&args, 0, sizeof(xfs_alloc_arg_t)); args.tp = tp; args.mp = tp->t_mountp; args.agno = XFS_FSB_TO_AGNO(args.mp, bno); ASSERT(args.agno < args.mp->m_sb.sb_agcount); args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); - args.alignment = 1; - args.minlen = args.minleft = args.minalignslop = 0; down_read(&args.mp->m_peraglock); args.pag = &args.mp->m_perag[args.agno]; if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING))) goto error0; #ifdef DEBUG ASSERT(args.agbp != NULL); - agf = XFS_BUF_TO_AGF(args.agbp); - ASSERT(args.agbno + len <= be32_to_cpu(agf->agf_length)); + ASSERT((args.agbno + len) <= + be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)); #endif - error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, - len, 0); + error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); error0: up_read(&args.mp->m_peraglock); return error; diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 3a613753906..bf46fae303a 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -4993,7 +4993,7 @@ xfs_bmapi( bma.firstblock = *firstblock; bma.alen = alen; bma.off = aoff; - bma.conv = (flags & XFS_BMAPI_CONVERT); + bma.conv = !!(flags & XFS_BMAPI_CONVERT); bma.wasdel = wasdelay; bma.minlen = minlen; bma.low = flist->xbf_low; |