summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/md/raid1.c168
-rw-r--r--drivers/md/raid1.h3
2 files changed, 147 insertions, 24 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index b16d2ee5e9d..b45d641f4f3 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -318,25 +318,34 @@ static void raid1_end_read_request(struct bio *bio, int error)
rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
}
+static void close_write(r1bio_t *r1_bio)
+{
+ /* it really is the end of this request */
+ if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
+ /* free extra copy of the data pages */
+ int i = r1_bio->behind_page_count;
+ while (i--)
+ safe_put_page(r1_bio->behind_bvecs[i].bv_page);
+ kfree(r1_bio->behind_bvecs);
+ r1_bio->behind_bvecs = NULL;
+ }
+ /* clear the bitmap if all writes complete successfully */
+ bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
+ r1_bio->sectors,
+ !test_bit(R1BIO_Degraded, &r1_bio->state),
+ test_bit(R1BIO_BehindIO, &r1_bio->state));
+ md_write_end(r1_bio->mddev);
+}
+
static void r1_bio_write_done(r1bio_t *r1_bio)
{
- if (atomic_dec_and_test(&r1_bio->remaining))
- {
- /* it really is the end of this request */
- if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
- /* free extra copy of the data pages */
- int i = r1_bio->behind_page_count;
- while (i--)
- safe_put_page(r1_bio->behind_bvecs[i].bv_page);
- kfree(r1_bio->behind_bvecs);
- r1_bio->behind_bvecs = NULL;
- }
- /* clear the bitmap if all writes complete successfully */
- bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
- r1_bio->sectors,
- !test_bit(R1BIO_Degraded, &r1_bio->state),
- test_bit(R1BIO_BehindIO, &r1_bio->state));
- md_write_end(r1_bio->mddev);
+ if (!atomic_dec_and_test(&r1_bio->remaining))
+ return;
+
+ if (test_bit(R1BIO_WriteError, &r1_bio->state))
+ reschedule_retry(r1_bio);
+ else {
+ close_write(r1_bio);
if (test_bit(R1BIO_MadeGood, &r1_bio->state))
reschedule_retry(r1_bio);
else
@@ -360,12 +369,10 @@ static void raid1_end_write_request(struct bio *bio, int error)
/*
* 'one mirror IO has finished' event handler:
*/
- r1_bio->bios[mirror] = NULL;
- to_put = bio;
if (!uptodate) {
- md_error(r1_bio->mddev, conf->mirrors[mirror].rdev);
- /* an I/O failed, we can't clear the bitmap */
- set_bit(R1BIO_Degraded, &r1_bio->state);
+ set_bit(WriteErrorSeen,
+ &conf->mirrors[mirror].rdev->flags);
+ set_bit(R1BIO_WriteError, &r1_bio->state);
} else {
/*
* Set R1BIO_Uptodate in our master bio, so that we
@@ -380,6 +387,8 @@ static void raid1_end_write_request(struct bio *bio, int error)
sector_t first_bad;
int bad_sectors;
+ r1_bio->bios[mirror] = NULL;
+ to_put = bio;
set_bit(R1BIO_Uptodate, &r1_bio->state);
/* Maybe we can clear some bad blocks. */
@@ -1724,6 +1733,101 @@ static void fix_read_error(conf_t *conf, int read_disk,
}
}
+static void bi_complete(struct bio *bio, int error)
+{
+ complete((struct completion *)bio->bi_private);
+}
+
+static int submit_bio_wait(int rw, struct bio *bio)
+{
+ struct completion event;
+ rw |= REQ_SYNC;
+
+ init_completion(&event);
+ bio->bi_private = &event;
+ bio->bi_end_io = bi_complete;
+ submit_bio(rw, bio);
+ wait_for_completion(&event);
+
+ return test_bit(BIO_UPTODATE, &bio->bi_flags);
+}
+
+static int narrow_write_error(r1bio_t *r1_bio, int i)
+{
+ mddev_t *mddev = r1_bio->mddev;
+ conf_t *conf = mddev->private;
+ mdk_rdev_t *rdev = conf->mirrors[i].rdev;
+ int vcnt, idx;
+ struct bio_vec *vec;
+
+ /* bio has the data to be written to device 'i' where
+ * we just recently had a write error.
+ * We repeatedly clone the bio and trim down to one block,
+ * then try the write. Where the write fails we record
+ * a bad block.
+ * It is conceivable that the bio doesn't exactly align with
+ * blocks. We must handle this somehow.
+ *
+ * We currently own a reference on the rdev.
+ */
+
+ int block_sectors;
+ sector_t sector;
+ int sectors;
+ int sect_to_write = r1_bio->sectors;
+ int ok = 1;
+
+ if (rdev->badblocks.shift < 0)
+ return 0;
+
+ block_sectors = 1 << rdev->badblocks.shift;
+ sector = r1_bio->sector;
+ sectors = ((sector + block_sectors)
+ & ~(sector_t)(block_sectors - 1))
+ - sector;
+
+ if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
+ vcnt = r1_bio->behind_page_count;
+ vec = r1_bio->behind_bvecs;
+ idx = 0;
+ while (vec[idx].bv_page == NULL)
+ idx++;
+ } else {
+ vcnt = r1_bio->master_bio->bi_vcnt;
+ vec = r1_bio->master_bio->bi_io_vec;
+ idx = r1_bio->master_bio->bi_idx;
+ }
+ while (sect_to_write) {
+ struct bio *wbio;
+ if (sectors > sect_to_write)
+ sectors = sect_to_write;
+ /* Write at 'sector' for 'sectors'*/
+
+ wbio = bio_alloc_mddev(GFP_NOIO, vcnt, mddev);
+ memcpy(wbio->bi_io_vec, vec, vcnt * sizeof(struct bio_vec));
+ wbio->bi_sector = r1_bio->sector;
+ wbio->bi_rw = WRITE;
+ wbio->bi_vcnt = vcnt;
+ wbio->bi_size = r1_bio->sectors << 9;
+ wbio->bi_idx = idx;
+
+ md_trim_bio(wbio, sector - r1_bio->sector, sectors);
+ wbio->bi_sector += rdev->data_offset;
+ wbio->bi_bdev = rdev->bdev;
+ if (submit_bio_wait(WRITE, wbio) == 0)
+ /* failure! */
+ ok = rdev_set_badblocks(rdev, sector,
+ sectors, 0)
+ && ok;
+
+ bio_put(wbio);
+ sect_to_write -= sectors;
+ sector += sectors;
+ sectors = block_sectors;
+ }
+ return ok;
+}
+
static void raid1d(mddev_t *mddev)
{
r1bio_t *r1_bio;
@@ -1775,7 +1879,8 @@ static void raid1d(mddev_t *mddev)
md_done_sync(mddev, s, 1);
} else
sync_request_write(mddev, r1_bio);
- } else if (test_bit(R1BIO_MadeGood, &r1_bio->state)) {
+ } else if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
+ test_bit(R1BIO_WriteError, &r1_bio->state)) {
int m;
for (m = 0; m < conf->raid_disks ; m++)
if (r1_bio->bios[m] == IO_MADE_GOOD) {
@@ -1785,7 +1890,24 @@ static void raid1d(mddev_t *mddev)
r1_bio->sector,
r1_bio->sectors);
rdev_dec_pending(rdev, mddev);
+ } else if (r1_bio->bios[m] != NULL) {
+ /* This drive got a write error. We
+ * need to narrow down and record
+ * precise write errors.
+ */
+ if (!narrow_write_error(r1_bio, m)) {
+ md_error(mddev,
+ conf->mirrors[m].rdev);
+ /* an I/O failed, we can't clear
+ * the bitmap */
+ set_bit(R1BIO_Degraded,
+ &r1_bio->state);
+ }
+ rdev_dec_pending(conf->mirrors[m].rdev,
+ mddev);
}
+ if (test_bit(R1BIO_WriteError, &r1_bio->state))
+ close_write(r1_bio);
raid_end_bio_io(r1_bio);
} else if (test_bit(R1BIO_ReadError, &r1_bio->state)) {
int disk;
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
index b7885474e96..e0d676b4897 100644
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -145,7 +145,8 @@ struct r1bio_s {
/* If a write for this request means we can clear some
* known-bad-block records, we set this flag
*/
-#define R1BIO_MadeGood 7
+#define R1BIO_MadeGood 7
+#define R1BIO_WriteError 8
extern int md_raid1_congested(mddev_t *mddev, int bits);