From 166e1f901b01872e8b70733a3f2e2c6980389cf8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 12 Sep 2011 12:08:27 +0200 Subject: block: export __make_request Avoid the hacks need for request based device mappers currently by simply exporting the symbol instead of trying to get it through the back door. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0e67c45b3bc..e9c3d9b0763 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -675,6 +675,8 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, struct scsi_ioctl_command __user *); +extern int __make_request(struct request_queue *q, struct bio *bio); + /* * A queue has just exitted congestion. Note this in the global counter of * congested queues, and wake up anyone who was waiting for requests to be -- cgit v1.2.3 From c20e8de27fef9f59869c81c288ad6cf28200e00c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 12 Sep 2011 12:03:37 +0200 Subject: block: rename __make_request() to blk_queue_bio() Now that it's exported, lets put it in a more sane namespace. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e9c3d9b0763..085f95414c7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -675,7 +675,7 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, struct scsi_ioctl_command __user *); -extern int __make_request(struct request_queue *q, struct bio *bio); +extern int blk_queue_bio(struct request_queue *q, struct bio *bio); /* * A queue has just exitted congestion. Note this in the global counter of -- cgit v1.2.3 From 5a7bbad27a410350e64a2d7f5ec18fc73836c14f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 12 Sep 2011 12:12:01 +0200 Subject: block: remove support for bio remapping from ->make_request There is very little benefit in allowing to let a ->make_request instance update the bios device and sector and loop around it in __generic_make_request when we can archive the same through calling generic_make_request from the driver and letting the loop in generic_make_request handle it. Note that various drivers got the return value from ->make_request and returned non-zero values for errors. Signed-off-by: Christoph Hellwig Acked-by: NeilBrown Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 085f95414c7..c712efdafc3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -193,7 +193,7 @@ struct request_pm_state #include typedef void (request_fn_proc) (struct request_queue *q); -typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); +typedef void (make_request_fn) (struct request_queue *q, struct bio *bio); typedef int (prep_rq_fn) (struct request_queue *, struct request *); typedef void (unprep_rq_fn) (struct request_queue *, struct request *); @@ -675,7 +675,7 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, struct scsi_ioctl_command __user *); -extern int blk_queue_bio(struct request_queue *q, struct bio *bio); +extern void blk_queue_bio(struct request_queue *q, struct bio *bio); /* * A queue has just exitted congestion. Note this in the global counter of -- cgit v1.2.3 From 75df713627f28f88b901b329c8857747545fd4ab Mon Sep 17 00:00:00 2001 From: Suresh Jayaraman Date: Wed, 21 Sep 2011 10:00:16 +0200 Subject: block: document blk-plug Thus spake Andrew Morton: "And I have the usual maintainability whine. If someone comes up to vmscan.c and sees it calling blk_start_plug(), how are they supposed to work out why that call is there? They go look at the blk_start_plug() definition and it is undocumented. I think we can do better than this?" Adapted from the LWN article - http://lwn.net/Articles/438256/ by Jens Axboe and from an earlier attempt by Shaohua Li to document blk-plug. [akpm@linux-foundation.org: grammatical and spelling tweaks] Signed-off-by: Suresh Jayaraman Cc: Shaohua Li Cc: Jonathan Corbet Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c712efdafc3..1978655faa3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -860,17 +860,23 @@ struct request_queue *blk_alloc_queue_node(gfp_t, int); extern void blk_put_queue(struct request_queue *); /* - * Note: Code in between changing the blk_plug list/cb_list or element of such - * lists is preemptable, but such code can't do sleep (or be very careful), - * otherwise data is corrupted. For details, please check schedule() where - * blk_schedule_flush_plug() is called. + * blk_plug permits building a queue of related requests by holding the I/O + * fragments for a short period. This allows merging of sequential requests + * into single larger request. As the requests are moved from a per-task list to + * the device's request_queue in a batch, this results in improved scalability + * as the lock contention for request_queue lock is reduced. + * + * It is ok not to disable preemption when adding the request to the plug list + * or when attempting a merge, because blk_schedule_flush_list() will only flush + * the plug list when the task sleeps by itself. For details, please see + * schedule() where blk_schedule_flush_plug() is called. */ struct blk_plug { - unsigned long magic; - struct list_head list; - struct list_head cb_list; - unsigned int should_sort; - unsigned int count; + unsigned long magic; /* detect uninitialized use-cases */ + struct list_head list; /* requests */ + struct list_head cb_list; /* md requires an unplug callback */ + unsigned int should_sort; /* list to be sorted before flushing? */ + unsigned int count; /* number of queued requests */ }; #define BLK_MAX_REQUEST_COUNT 16 -- cgit v1.2.3 From 456be1484ffc72a24bdb4200b5847c4fa90139d9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Oct 2011 12:57:20 +0200 Subject: loop: remove the incorrect write_begin/write_end shortcut Currently the loop device tries to call directly into write_begin/write_end instead of going through ->write if it can. This is a fairly nasty shortcut as write_begin and write_end are only callbacks for the generic write code and expect to be called with filesystem specific locks held. This code currently causes various issues for clustered filesystems as it doesn't take the required cluster locks, and it also causes issues for XFS as it doesn't properly lock against the swapext ioctl as called by the defragmentation tools. This in case causes data corruption if defragmentation hits a busy loop device in the wrong time window, as reported by RH QA. The reason why we have this shortcut is that it saves a data copy when doing a transformation on the loop device, which is the technical term for using cryptoloop (or an XOR transformation). Given that cryptoloop has been deprecated in favour of dm-crypt my opinion is that we should simply drop this shortcut instead of finding complicated ways to to introduce a formal interface for this shortcut. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/loop.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/loop.h b/include/linux/loop.h index 683d6989011..a0688068911 100644 --- a/include/linux/loop.h +++ b/include/linux/loop.h @@ -73,7 +73,6 @@ struct loop_device { */ enum { LO_FLAGS_READ_ONLY = 1, - LO_FLAGS_USE_AOPS = 2, LO_FLAGS_AUTOCLEAR = 4, }; -- cgit v1.2.3 From bc9fcbf9cb8ec76d340da16fbf48a9a316e14c52 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 19 Oct 2011 14:31:18 +0200 Subject: block: move blk_throtl prototypes to block/blk.h blk_throtl interface is block internal and there's no reason to have them in linux/blkdev.h. Move them to block/blk.h. This patch doesn't introduce any functional change. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0b68044e7ab..5267cd2f20d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1197,20 +1197,6 @@ static inline uint64_t rq_io_start_time_ns(struct request *req) } #endif -#ifdef CONFIG_BLK_DEV_THROTTLING -extern int blk_throtl_init(struct request_queue *q); -extern void blk_throtl_exit(struct request_queue *q); -extern int blk_throtl_bio(struct request_queue *q, struct bio **bio); -#else /* CONFIG_BLK_DEV_THROTTLING */ -static inline int blk_throtl_bio(struct request_queue *q, struct bio **bio) -{ - return 0; -} - -static inline int blk_throtl_init(struct request_queue *q) { return 0; } -static inline int blk_throtl_exit(struct request_queue *q) { return 0; } -#endif /* CONFIG_BLK_DEV_THROTTLING */ - #define MODULE_ALIAS_BLOCKDEV(major,minor) \ MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ -- cgit v1.2.3 From bd87b5898a72b1aef6acf3705c61c9f6372adf0c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 19 Oct 2011 14:33:08 +0200 Subject: block: drop @tsk from attempt_plug_merge() and explain sync rules attempt_plug_merge() accesses elevator without holding queue_lock and may call into ->elevator_bio_merge_fn(). The elvator is guaranteed to be valid because it's accessed iff the plugged list has requests and elevator is never exited with live requests, so as long as the elevator method can deal with unlocked access, this is safe. Explain the sync rules around attempt_plug_merge() and drop the unnecessary @tsk parameter. This patch doesn't introduce any functional change. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/elevator.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/elevator.h b/include/linux/elevator.h index d800d514218..1d0f7a2ff73 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -38,6 +38,12 @@ struct elevator_ops elevator_merged_fn *elevator_merged_fn; elevator_merge_req_fn *elevator_merge_req_fn; elevator_allow_merge_fn *elevator_allow_merge_fn; + + /* + * Used for both plugged list and elevator merging and in the + * former case called without queue_lock. Read comment on top of + * attempt_plug_merge() for details. + */ elevator_bio_merged_fn *elevator_bio_merged_fn; elevator_dispatch_fn *elevator_dispatch_fn; -- cgit v1.2.3 From 9562ad9ab36df7ccef920d119f3b5100025db95f Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 24 Oct 2011 16:11:30 +0200 Subject: block: Remove the control of complete cpu from bio. bio originally has the functionality to set the complete cpu, but it is broken. Chirstoph said that "This code is unused, and from the all the discussions lately pretty obviously broken. The only thing keeping it serves is creating more confusion and possibly more bugs." And Jens replied with "We can kill bio_set_completion_cpu(). I'm fine with leaving cpu control to the request based drivers, they are the only ones that can toggle the setting anyway". So this patch tries to remove all the work of controling complete cpu from a bio. Cc: Shaohua Li Cc: Christoph Hellwig Signed-off-by: Tao Ma Signed-off-by: Jens Axboe --- include/linux/bio.h | 8 -------- include/linux/blk_types.h | 11 ++++------- 2 files changed, 4 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index ce33e6868a2..a3c071c9e18 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -268,14 +268,6 @@ extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set extern void bvec_free_bs(struct bio_set *, struct bio_vec *, unsigned int); extern unsigned int bvec_nr_vecs(unsigned short idx); -/* - * Allow queuer to specify a completion CPU for this bio - */ -static inline void bio_set_completion_cpu(struct bio *bio, unsigned int cpu) -{ - bio->bi_comp_cpu = cpu; -} - /* * bio_set is used to allow other portions of the IO system to * allocate their own private memory pools for bio and iovec structures. diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 71fc53bb8f1..4053cbd4490 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -59,8 +59,6 @@ struct bio { unsigned int bi_max_vecs; /* max bvl_vecs we can hold */ - unsigned int bi_comp_cpu; /* completion CPU */ - atomic_t bi_cnt; /* pin count */ struct bio_vec *bi_io_vec; /* the actual vec list */ @@ -93,11 +91,10 @@ struct bio { #define BIO_BOUNCED 5 /* bio is a bounce bio */ #define BIO_USER_MAPPED 6 /* contains user pages */ #define BIO_EOPNOTSUPP 7 /* not supported */ -#define BIO_CPU_AFFINE 8 /* complete bio on same CPU as submitted */ -#define BIO_NULL_MAPPED 9 /* contains invalid user pages */ -#define BIO_FS_INTEGRITY 10 /* fs owns integrity data, not block layer */ -#define BIO_QUIET 11 /* Make BIO Quiet */ -#define BIO_MAPPED_INTEGRITY 12/* integrity metadata has been remapped */ +#define BIO_NULL_MAPPED 8 /* contains invalid user pages */ +#define BIO_FS_INTEGRITY 9 /* fs owns integrity data, not block layer */ +#define BIO_QUIET 10 /* Make BIO Quiet */ +#define BIO_MAPPED_INTEGRITY 11/* integrity metadata has been remapped */ #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) /* -- cgit v1.2.3