summaryrefslogtreecommitdiff
path: root/fs/ceph
diff options
context:
space:
mode:
authorAlex Elder <elder@inktank.com>2013-03-14 14:09:05 -0500
committerSage Weil <sage@inktank.com>2013-05-01 21:17:58 -0700
commitacead002b200569273bed331c93c4a91d25e10b8 (patch)
tree779bf689da149acf73bd75f51641f3700469f6b7 /fs/ceph
parenta19308048182d5f9e16b03b1d1c038d9346c7589 (diff)
downloadlinux-3.10-acead002b200569273bed331c93c4a91d25e10b8.tar.gz
linux-3.10-acead002b200569273bed331c93c4a91d25e10b8.tar.bz2
linux-3.10-acead002b200569273bed331c93c4a91d25e10b8.zip
libceph: don't build request in ceph_osdc_new_request()
This patch moves the call to ceph_osdc_build_request() out of ceph_osdc_new_request() and into its caller. This is in order to defer formatting osd operation information into the request message until just before request is started. The only unusual (ab)user of ceph_osdc_build_request() is ceph_writepages_start(), where the final length of write request may change (downward) based on the current inode size or the oldest snapshot context with dirty data for the inode. The remaining callers don't change anything in the request after has been built. This means the ops array is now supplied by the caller. It also means there is no need to pass the mtime to ceph_osdc_new_request() (it gets provided to ceph_osdc_build_request()). And rather than passing a do_sync flag, have the number of ops in the ops array supplied imply adding a second STARTSYNC operation after the READ or WRITE requested. This and some of the patches that follow are related to having the messenger (only) be responsible for filling the content of the message header, as described here: http://tracker.ceph.com/issues/4589 Signed-off-by: Alex Elder <elder@inktank.com> Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/addr.c36
-rw-r--r--fs/ceph/file.c20
2 files changed, 36 insertions, 20 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index ae438d02a42..681463d5459 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -284,7 +284,9 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
&ceph_inode_to_client(inode)->client->osdc;
struct ceph_inode_info *ci = ceph_inode(inode);
struct page *page = list_entry(page_list->prev, struct page, lru);
+ struct ceph_vino vino;
struct ceph_osd_request *req;
+ struct ceph_osd_req_op op;
u64 off;
u64 len;
int i;
@@ -308,16 +310,17 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
len = nr_pages << PAGE_CACHE_SHIFT;
dout("start_read %p nr_pages %d is %lld~%lld\n", inode, nr_pages,
off, len);
-
- req = ceph_osdc_new_request(osdc, &ci->i_layout, ceph_vino(inode),
- off, &len,
- CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
- NULL, 0,
+ vino = ceph_vino(inode);
+ req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len,
+ 1, &op, CEPH_OSD_OP_READ,
+ CEPH_OSD_FLAG_READ, NULL,
ci->i_truncate_seq, ci->i_truncate_size,
- NULL, false);
+ false);
if (IS_ERR(req))
return PTR_ERR(req);
+ ceph_osdc_build_request(req, off, 1, &op, NULL, vino.snap, NULL);
+
/* build page vector */
nr_pages = calc_pages_for(0, len);
pages = kmalloc(sizeof(*pages) * nr_pages, GFP_NOFS);
@@ -736,6 +739,7 @@ retry:
last_snapc = snapc;
while (!done && index <= end) {
+ struct ceph_osd_req_op ops[2];
unsigned i;
int first;
pgoff_t next;
@@ -825,20 +829,22 @@ get_more_pages:
/* ok */
if (locked_pages == 0) {
+ struct ceph_vino vino;
+ int num_ops = do_sync ? 2 : 1;
+
/* prepare async write request */
offset = (u64) page_offset(page);
len = wsize;
+ vino = ceph_vino(inode);
+ /* BUG_ON(vino.snap != CEPH_NOSNAP); */
req = ceph_osdc_new_request(&fsc->client->osdc,
- &ci->i_layout,
- ceph_vino(inode),
- offset, &len,
+ &ci->i_layout, vino, offset, &len,
+ num_ops, ops,
CEPH_OSD_OP_WRITE,
CEPH_OSD_FLAG_WRITE |
CEPH_OSD_FLAG_ONDISK,
- snapc, do_sync,
- ci->i_truncate_seq,
- ci->i_truncate_size,
- &inode->i_mtime, true);
+ snapc, ci->i_truncate_seq,
+ ci->i_truncate_size, true);
if (IS_ERR(req)) {
rc = PTR_ERR(req);
@@ -846,6 +852,10 @@ get_more_pages:
break;
}
+ ceph_osdc_build_request(req, offset,
+ num_ops, ops, snapc, vino.snap,
+ &inode->i_mtime);
+
req->r_data_out.type = CEPH_OSD_DATA_TYPE_PAGES;
req->r_data_out.length = len;
req->r_data_out.alignment = 0;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index aeafa67bfe9..3d6dcf23b4a 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -475,14 +475,17 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_snap_context *snapc;
+ struct ceph_vino vino;
struct ceph_osd_request *req;
+ struct ceph_osd_req_op ops[2];
+ int num_ops = 1;
struct page **pages;
int num_pages;
long long unsigned pos;
u64 len;
int written = 0;
int flags;
- int do_sync = 0;
int check_caps = 0;
int page_align, io_align;
unsigned long buf_align;
@@ -516,7 +519,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
if ((file->f_flags & (O_SYNC|O_DIRECT)) == 0)
flags |= CEPH_OSD_FLAG_ACK;
else
- do_sync = 1;
+ num_ops++; /* Also include a 'startsync' command. */
/*
* we may need to do multiple writes here if we span an object
@@ -527,16 +530,19 @@ more:
buf_align = (unsigned long)data & ~PAGE_MASK;
len = left;
+ snapc = ci->i_snap_realm->cached_context;
+ vino = ceph_vino(inode);
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
- ceph_vino(inode), pos, &len,
- CEPH_OSD_OP_WRITE, flags,
- ci->i_snap_realm->cached_context,
- do_sync,
+ vino, pos, &len, num_ops, ops,
+ CEPH_OSD_OP_WRITE, flags, snapc,
ci->i_truncate_seq, ci->i_truncate_size,
- &mtime, false);
+ false);
if (IS_ERR(req))
return PTR_ERR(req);
+ ceph_osdc_build_request(req, pos, num_ops, ops,
+ snapc, vino.snap, &mtime);
+
/* write from beginning of first page, regardless of io alignment */
page_align = file->f_flags & O_DIRECT ? buf_align : io_align;
num_pages = calc_pages_for(page_align, len);