summaryrefslogtreecommitdiff
path: root/block/bio.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-02-12 14:13:23 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-12 14:13:23 -0800
commit3e12cefbe143b4947171ff92dd50024c4841e291 (patch)
treef58ec23a4092576ed08843cca5f5443a32106bd1 /block/bio.c
parent6bec0035286119eefc32a5b1102127e6a4032cb2 (diff)
parentd427e3c82ef4fc5fbb22c0cef0b040e6767b1028 (diff)
downloadlinux-exynos-3e12cefbe143b4947171ff92dd50024c4841e291.tar.gz
linux-exynos-3e12cefbe143b4947171ff92dd50024c4841e291.tar.bz2
linux-exynos-3e12cefbe143b4947171ff92dd50024c4841e291.zip
Merge branch 'for-3.20/core' of git://git.kernel.dk/linux-block
Pull core block IO changes from Jens Axboe: "This contains: - A series from Christoph that cleans up and refactors various parts of the REQ_BLOCK_PC handling. Contributions in that series from Dongsu Park and Kent Overstreet as well. - CFQ: - A bug fix for cfq for realtime IO scheduling from Jeff Moyer. - A stable patch fixing a potential crash in CFQ in OOM situations. From Konstantin Khlebnikov. - blk-mq: - Add support for tag allocation policies, from Shaohua. This is a prep patch enabling libata (and other SCSI parts) to use the blk-mq tagging, instead of rolling their own. - Various little tweaks from Keith and Mike, in preparation for DM blk-mq support. - Minor little fixes or tweaks from me. - A double free error fix from Tony Battersby. - The partition 4k issue fixes from Matthew and Boaz. - Add support for zero+unprovision for blkdev_issue_zeroout() from Martin" * 'for-3.20/core' of git://git.kernel.dk/linux-block: (27 commits) block: remove unused function blk_bio_map_sg block: handle the null_mapped flag correctly in blk_rq_map_user_iov blk-mq: fix double-free in error path block: prevent request-to-request merging with gaps if not allowed blk-mq: make blk_mq_run_queues() static dm: fix multipath regression due to initializing wrong request cfq-iosched: handle failure of cfq group allocation block: Quiesce zeroout wrapper block: rewrite and split __bio_copy_iov() block: merge __bio_map_user_iov into bio_map_user_iov block: merge __bio_map_kern into bio_map_kern block: pass iov_iter to the BLOCK_PC mapping functions block: add a helper to free bio bounce buffer pages block: use blk_rq_map_user_iov to implement blk_rq_map_user block: simplify bio_map_kern block: mark blk-mq devices as stackable block: keep established cmd_flags when cloning into a blk-mq request block: add blk-mq support to blk_insert_cloned_request() block: require blk_rq_prep_clone() be given an initialized clone request blk-mq: add tag allocation policy ...
Diffstat (limited to 'block/bio.c')
-rw-r--r--block/bio.c426
1 files changed, 191 insertions, 235 deletions
diff --git a/block/bio.c b/block/bio.c
index 471d7382c7d1..f66a4eae16ee 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -28,7 +28,6 @@
#include <linux/mempool.h>
#include <linux/workqueue.h>
#include <linux/cgroup.h>
-#include <scsi/sg.h> /* for struct sg_iovec */
#include <trace/events/block.h>
@@ -1022,21 +1021,11 @@ void bio_copy_data(struct bio *dst, struct bio *src)
EXPORT_SYMBOL(bio_copy_data);
struct bio_map_data {
- int nr_sgvecs;
int is_our_pages;
- struct sg_iovec sgvecs[];
+ struct iov_iter iter;
+ struct iovec iov[];
};
-static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio,
- const struct sg_iovec *iov, int iov_count,
- int is_our_pages)
-{
- memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count);
- bmd->nr_sgvecs = iov_count;
- bmd->is_our_pages = is_our_pages;
- bio->bi_private = bmd;
-}
-
static struct bio_map_data *bio_alloc_map_data(unsigned int iov_count,
gfp_t gfp_mask)
{
@@ -1044,85 +1033,101 @@ static struct bio_map_data *bio_alloc_map_data(unsigned int iov_count,
return NULL;
return kmalloc(sizeof(struct bio_map_data) +
- sizeof(struct sg_iovec) * iov_count, gfp_mask);
+ sizeof(struct iovec) * iov_count, gfp_mask);
}
-static int __bio_copy_iov(struct bio *bio, const struct sg_iovec *iov, int iov_count,
- int to_user, int from_user, int do_free_page)
+/**
+ * bio_copy_from_iter - copy all pages from iov_iter to bio
+ * @bio: The &struct bio which describes the I/O as destination
+ * @iter: iov_iter as source
+ *
+ * Copy all pages from iov_iter to bio.
+ * Returns 0 on success, or error on failure.
+ */
+static int bio_copy_from_iter(struct bio *bio, struct iov_iter iter)
{
- int ret = 0, i;
+ int i;
struct bio_vec *bvec;
- int iov_idx = 0;
- unsigned int iov_off = 0;
bio_for_each_segment_all(bvec, bio, i) {
- char *bv_addr = page_address(bvec->bv_page);
- unsigned int bv_len = bvec->bv_len;
+ ssize_t ret;
- while (bv_len && iov_idx < iov_count) {
- unsigned int bytes;
- char __user *iov_addr;
+ ret = copy_page_from_iter(bvec->bv_page,
+ bvec->bv_offset,
+ bvec->bv_len,
+ &iter);
- bytes = min_t(unsigned int,
- iov[iov_idx].iov_len - iov_off, bv_len);
- iov_addr = iov[iov_idx].iov_base + iov_off;
+ if (!iov_iter_count(&iter))
+ break;
- if (!ret) {
- if (to_user)
- ret = copy_to_user(iov_addr, bv_addr,
- bytes);
+ if (ret < bvec->bv_len)
+ return -EFAULT;
+ }
- if (from_user)
- ret = copy_from_user(bv_addr, iov_addr,
- bytes);
+ return 0;
+}
- if (ret)
- ret = -EFAULT;
- }
+/**
+ * bio_copy_to_iter - copy all pages from bio to iov_iter
+ * @bio: The &struct bio which describes the I/O as source
+ * @iter: iov_iter as destination
+ *
+ * Copy all pages from bio to iov_iter.
+ * Returns 0 on success, or error on failure.
+ */
+static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter)
+{
+ int i;
+ struct bio_vec *bvec;
- bv_len -= bytes;
- bv_addr += bytes;
- iov_addr += bytes;
- iov_off += bytes;
+ bio_for_each_segment_all(bvec, bio, i) {
+ ssize_t ret;
- if (iov[iov_idx].iov_len == iov_off) {
- iov_idx++;
- iov_off = 0;
- }
- }
+ ret = copy_page_to_iter(bvec->bv_page,
+ bvec->bv_offset,
+ bvec->bv_len,
+ &iter);
+
+ if (!iov_iter_count(&iter))
+ break;
- if (do_free_page)
- __free_page(bvec->bv_page);
+ if (ret < bvec->bv_len)
+ return -EFAULT;
}
- return ret;
+ return 0;
+}
+
+static void bio_free_pages(struct bio *bio)
+{
+ struct bio_vec *bvec;
+ int i;
+
+ bio_for_each_segment_all(bvec, bio, i)
+ __free_page(bvec->bv_page);
}
/**
* bio_uncopy_user - finish previously mapped bio
* @bio: bio being terminated
*
- * Free pages allocated from bio_copy_user() and write back data
+ * Free pages allocated from bio_copy_user_iov() and write back data
* to user space in case of a read.
*/
int bio_uncopy_user(struct bio *bio)
{
struct bio_map_data *bmd = bio->bi_private;
- struct bio_vec *bvec;
- int ret = 0, i;
+ int ret = 0;
if (!bio_flagged(bio, BIO_NULL_MAPPED)) {
/*
* if we're in a workqueue, the request is orphaned, so
* don't copy into a random user address space, just free.
*/
- if (current->mm)
- ret = __bio_copy_iov(bio, bmd->sgvecs, bmd->nr_sgvecs,
- bio_data_dir(bio) == READ,
- 0, bmd->is_our_pages);
- else if (bmd->is_our_pages)
- bio_for_each_segment_all(bvec, bio, i)
- __free_page(bvec->bv_page);
+ if (current->mm && bio_data_dir(bio) == READ)
+ ret = bio_copy_to_iter(bio, bmd->iter);
+ if (bmd->is_our_pages)
+ bio_free_pages(bio);
}
kfree(bmd);
bio_put(bio);
@@ -1132,12 +1137,10 @@ EXPORT_SYMBOL(bio_uncopy_user);
/**
* bio_copy_user_iov - copy user data to bio
- * @q: destination block queue
- * @map_data: pointer to the rq_map_data holding pages (if necessary)
- * @iov: the iovec.
- * @iov_count: number of elements in the iovec
- * @write_to_vm: bool indicating writing to pages or not
- * @gfp_mask: memory allocation flags
+ * @q: destination block queue
+ * @map_data: pointer to the rq_map_data holding pages (if necessary)
+ * @iter: iovec iterator
+ * @gfp_mask: memory allocation flags
*
* Prepares and returns a bio for indirect user io, bouncing data
* to/from kernel pages as necessary. Must be paired with
@@ -1145,25 +1148,25 @@ EXPORT_SYMBOL(bio_uncopy_user);
*/
struct bio *bio_copy_user_iov(struct request_queue *q,
struct rq_map_data *map_data,
- const struct sg_iovec *iov, int iov_count,
- int write_to_vm, gfp_t gfp_mask)
+ const struct iov_iter *iter,
+ gfp_t gfp_mask)
{
struct bio_map_data *bmd;
- struct bio_vec *bvec;
struct page *page;
struct bio *bio;
int i, ret;
int nr_pages = 0;
- unsigned int len = 0;
+ unsigned int len = iter->count;
unsigned int offset = map_data ? map_data->offset & ~PAGE_MASK : 0;
- for (i = 0; i < iov_count; i++) {
+ for (i = 0; i < iter->nr_segs; i++) {
unsigned long uaddr;
unsigned long end;
unsigned long start;
- uaddr = (unsigned long)iov[i].iov_base;
- end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ uaddr = (unsigned long) iter->iov[i].iov_base;
+ end = (uaddr + iter->iov[i].iov_len + PAGE_SIZE - 1)
+ >> PAGE_SHIFT;
start = uaddr >> PAGE_SHIFT;
/*
@@ -1173,22 +1176,31 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
return ERR_PTR(-EINVAL);
nr_pages += end - start;
- len += iov[i].iov_len;
}
if (offset)
nr_pages++;
- bmd = bio_alloc_map_data(iov_count, gfp_mask);
+ bmd = bio_alloc_map_data(iter->nr_segs, gfp_mask);
if (!bmd)
return ERR_PTR(-ENOMEM);
+ /*
+ * We need to do a deep copy of the iov_iter including the iovecs.
+ * The caller provided iov might point to an on-stack or otherwise
+ * shortlived one.
+ */
+ bmd->is_our_pages = map_data ? 0 : 1;
+ memcpy(bmd->iov, iter->iov, sizeof(struct iovec) * iter->nr_segs);
+ iov_iter_init(&bmd->iter, iter->type, bmd->iov,
+ iter->nr_segs, iter->count);
+
ret = -ENOMEM;
bio = bio_kmalloc(gfp_mask, nr_pages);
if (!bio)
goto out_bmd;
- if (!write_to_vm)
+ if (iter->type & WRITE)
bio->bi_rw |= REQ_WRITE;
ret = 0;
@@ -1236,20 +1248,18 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
/*
* success
*/
- if ((!write_to_vm && (!map_data || !map_data->null_mapped)) ||
+ if (((iter->type & WRITE) && (!map_data || !map_data->null_mapped)) ||
(map_data && map_data->from_user)) {
- ret = __bio_copy_iov(bio, iov, iov_count, 0, 1, 0);
+ ret = bio_copy_from_iter(bio, *iter);
if (ret)
goto cleanup;
}
- bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1);
+ bio->bi_private = bmd;
return bio;
cleanup:
if (!map_data)
- bio_for_each_segment_all(bvec, bio, i)
- __free_page(bvec->bv_page);
-
+ bio_free_pages(bio);
bio_put(bio);
out_bmd:
kfree(bmd);
@@ -1257,46 +1267,30 @@ out_bmd:
}
/**
- * bio_copy_user - copy user data to bio
- * @q: destination block queue
- * @map_data: pointer to the rq_map_data holding pages (if necessary)
- * @uaddr: start of user address
- * @len: length in bytes
- * @write_to_vm: bool indicating writing to pages or not
- * @gfp_mask: memory allocation flags
+ * bio_map_user_iov - map user iovec into bio
+ * @q: the struct request_queue for the bio
+ * @iter: iovec iterator
+ * @gfp_mask: memory allocation flags
*
- * Prepares and returns a bio for indirect user io, bouncing data
- * to/from kernel pages as necessary. Must be paired with
- * call bio_uncopy_user() on io completion.
+ * Map the user space address into a bio suitable for io to a block
+ * device. Returns an error pointer in case of error.
*/
-struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data,
- unsigned long uaddr, unsigned int len,
- int write_to_vm, gfp_t gfp_mask)
+struct bio *bio_map_user_iov(struct request_queue *q,
+ const struct iov_iter *iter,
+ gfp_t gfp_mask)
{
- struct sg_iovec iov;
-
- iov.iov_base = (void __user *)uaddr;
- iov.iov_len = len;
-
- return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask);
-}
-EXPORT_SYMBOL(bio_copy_user);
-
-static struct bio *__bio_map_user_iov(struct request_queue *q,
- struct block_device *bdev,
- const struct sg_iovec *iov, int iov_count,
- int write_to_vm, gfp_t gfp_mask)
-{
- int i, j;
+ int j;
int nr_pages = 0;
struct page **pages;
struct bio *bio;
int cur_page = 0;
int ret, offset;
+ struct iov_iter i;
+ struct iovec iov;
- for (i = 0; i < iov_count; i++) {
- unsigned long uaddr = (unsigned long)iov[i].iov_base;
- unsigned long len = iov[i].iov_len;
+ iov_for_each(iov, i, *iter) {
+ unsigned long uaddr = (unsigned long) iov.iov_base;
+ unsigned long len = iov.iov_len;
unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
unsigned long start = uaddr >> PAGE_SHIFT;
@@ -1326,16 +1320,17 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
if (!pages)
goto out;
- for (i = 0; i < iov_count; i++) {
- unsigned long uaddr = (unsigned long)iov[i].iov_base;
- unsigned long len = iov[i].iov_len;
+ iov_for_each(iov, i, *iter) {
+ unsigned long uaddr = (unsigned long) iov.iov_base;
+ unsigned long len = iov.iov_len;
unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
unsigned long start = uaddr >> PAGE_SHIFT;
const int local_nr_pages = end - start;
const int page_limit = cur_page + local_nr_pages;
ret = get_user_pages_fast(uaddr, local_nr_pages,
- write_to_vm, &pages[cur_page]);
+ (iter->type & WRITE) != WRITE,
+ &pages[cur_page]);
if (ret < local_nr_pages) {
ret = -EFAULT;
goto out_unmap;
@@ -1375,72 +1370,10 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
/*
* set data direction, and check if mapped pages need bouncing
*/
- if (!write_to_vm)
+ if (iter->type & WRITE)
bio->bi_rw |= REQ_WRITE;
- bio->bi_bdev = bdev;
bio->bi_flags |= (1 << BIO_USER_MAPPED);
- return bio;
-
- out_unmap:
- for (i = 0; i < nr_pages; i++) {
- if(!pages[i])
- break;
- page_cache_release(pages[i]);
- }
- out:
- kfree(pages);
- bio_put(bio);
- return ERR_PTR(ret);
-}
-
-/**
- * bio_map_user - map user address into bio
- * @q: the struct request_queue for the bio
- * @bdev: destination block device
- * @uaddr: start of user address
- * @len: length in bytes
- * @write_to_vm: bool indicating writing to pages or not
- * @gfp_mask: memory allocation flags
- *
- * Map the user space address into a bio suitable for io to a block
- * device. Returns an error pointer in case of error.
- */
-struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev,
- unsigned long uaddr, unsigned int len, int write_to_vm,
- gfp_t gfp_mask)
-{
- struct sg_iovec iov;
-
- iov.iov_base = (void __user *)uaddr;
- iov.iov_len = len;
-
- return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask);
-}
-EXPORT_SYMBOL(bio_map_user);
-
-/**
- * bio_map_user_iov - map user sg_iovec table into bio
- * @q: the struct request_queue for the bio
- * @bdev: destination block device
- * @iov: the iovec.
- * @iov_count: number of elements in the iovec
- * @write_to_vm: bool indicating writing to pages or not
- * @gfp_mask: memory allocation flags
- *
- * Map the user space address into a bio suitable for io to a block
- * device. Returns an error pointer in case of error.
- */
-struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev,
- const struct sg_iovec *iov, int iov_count,
- int write_to_vm, gfp_t gfp_mask)
-{
- struct bio *bio;
-
- bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm,
- gfp_mask);
- if (IS_ERR(bio))
- return bio;
/*
* subtle -- if __bio_map_user() ended up bouncing a bio,
@@ -1449,8 +1382,18 @@ struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev,
* reference to it
*/
bio_get(bio);
-
return bio;
+
+ out_unmap:
+ for (j = 0; j < nr_pages; j++) {
+ if (!pages[j])
+ break;
+ page_cache_release(pages[j]);
+ }
+ out:
+ kfree(pages);
+ bio_put(bio);
+ return ERR_PTR(ret);
}
static void __bio_unmap_user(struct bio *bio)
@@ -1492,8 +1435,18 @@ static void bio_map_kern_endio(struct bio *bio, int err)
bio_put(bio);
}
-static struct bio *__bio_map_kern(struct request_queue *q, void *data,
- unsigned int len, gfp_t gfp_mask)
+/**
+ * bio_map_kern - map kernel address into bio
+ * @q: the struct request_queue for the bio
+ * @data: pointer to buffer to map
+ * @len: length in bytes
+ * @gfp_mask: allocation flags for bio allocation
+ *
+ * Map the kernel address into a bio suitable for io to a block
+ * device. Returns an error pointer in case of error.
+ */
+struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
+ gfp_t gfp_mask)
{
unsigned long kaddr = (unsigned long)data;
unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -1517,8 +1470,11 @@ static struct bio *__bio_map_kern(struct request_queue *q, void *data,
bytes = len;
if (bio_add_pc_page(q, bio, virt_to_page(data), bytes,
- offset) < bytes)
- break;
+ offset) < bytes) {
+ /* we don't support partial mappings */
+ bio_put(bio);
+ return ERR_PTR(-EINVAL);
+ }
data += bytes;
len -= bytes;
@@ -1528,57 +1484,26 @@ static struct bio *__bio_map_kern(struct request_queue *q, void *data,
bio->bi_end_io = bio_map_kern_endio;
return bio;
}
+EXPORT_SYMBOL(bio_map_kern);
-/**
- * bio_map_kern - map kernel address into bio
- * @q: the struct request_queue for the bio
- * @data: pointer to buffer to map
- * @len: length in bytes
- * @gfp_mask: allocation flags for bio allocation
- *
- * Map the kernel address into a bio suitable for io to a block
- * device. Returns an error pointer in case of error.
- */
-struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
- gfp_t gfp_mask)
+static void bio_copy_kern_endio(struct bio *bio, int err)
{
- struct bio *bio;
-
- bio = __bio_map_kern(q, data, len, gfp_mask);
- if (IS_ERR(bio))
- return bio;
-
- if (bio->bi_iter.bi_size == len)
- return bio;
-
- /*
- * Don't support partial mappings.
- */
+ bio_free_pages(bio);
bio_put(bio);
- return ERR_PTR(-EINVAL);
}
-EXPORT_SYMBOL(bio_map_kern);
-static void bio_copy_kern_endio(struct bio *bio, int err)
+static void bio_copy_kern_endio_read(struct bio *bio, int err)
{
+ char *p = bio->bi_private;
struct bio_vec *bvec;
- const int read = bio_data_dir(bio) == READ;
- struct bio_map_data *bmd = bio->bi_private;
int i;
- char *p = bmd->sgvecs[0].iov_base;
bio_for_each_segment_all(bvec, bio, i) {
- char *addr = page_address(bvec->bv_page);
-
- if (read)
- memcpy(p, addr, bvec->bv_len);
-
- __free_page(bvec->bv_page);
+ memcpy(p, page_address(bvec->bv_page), bvec->bv_len);
p += bvec->bv_len;
}
- kfree(bmd);
- bio_put(bio);
+ bio_copy_kern_endio(bio, err);
}
/**
@@ -1595,28 +1520,59 @@ static void bio_copy_kern_endio(struct bio *bio, int err)
struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
gfp_t gfp_mask, int reading)
{
+ unsigned long kaddr = (unsigned long)data;
+ unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ unsigned long start = kaddr >> PAGE_SHIFT;
struct bio *bio;
- struct bio_vec *bvec;
- int i;
+ void *p = data;
+ int nr_pages = 0;
+
+ /*
+ * Overflow, abort
+ */
+ if (end < start)
+ return ERR_PTR(-EINVAL);
- bio = bio_copy_user(q, NULL, (unsigned long)data, len, 1, gfp_mask);
- if (IS_ERR(bio))
- return bio;
+ nr_pages = end - start;
+ bio = bio_kmalloc(gfp_mask, nr_pages);
+ if (!bio)
+ return ERR_PTR(-ENOMEM);
- if (!reading) {
- void *p = data;
+ while (len) {
+ struct page *page;
+ unsigned int bytes = PAGE_SIZE;
- bio_for_each_segment_all(bvec, bio, i) {
- char *addr = page_address(bvec->bv_page);
+ if (bytes > len)
+ bytes = len;
- memcpy(addr, p, bvec->bv_len);
- p += bvec->bv_len;
- }
+ page = alloc_page(q->bounce_gfp | gfp_mask);
+ if (!page)
+ goto cleanup;
+
+ if (!reading)
+ memcpy(page_address(page), p, bytes);
+
+ if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes)
+ break;
+
+ len -= bytes;
+ p += bytes;
}
- bio->bi_end_io = bio_copy_kern_endio;
+ if (reading) {
+ bio->bi_end_io = bio_copy_kern_endio_read;
+ bio->bi_private = data;
+ } else {
+ bio->bi_end_io = bio_copy_kern_endio;
+ bio->bi_rw |= REQ_WRITE;
+ }
return bio;
+
+cleanup:
+ bio_free_pages(bio);
+ bio_put(bio);
+ return ERR_PTR(-ENOMEM);
}
EXPORT_SYMBOL(bio_copy_kern);