summaryrefslogtreecommitdiff |
diff options
author | Takashi Iwai <tiwai@suse.de> | 2017-05-02 16:56:31 +0200 |
---|---|---|
committer | Takashi Iwai <tiwai@suse.de> | 2017-05-02 16:56:31 +0200 |
commit | 27e2849cbe8faa13c9131647d343d87e46f24970 (patch) | |
tree | 4bec981e1e453f788cf1a64a22059976a3dbb2eb | |
parent | d2e1679e1f725e3fc171859001449caab16f701a (diff) |
Revert "- md/raid5: sort bios..." (bsc#1037027)rpm-4.4.63-2
We revert the recent MD changes due to regressions for RAID0.
111 files changed, 337 insertions, 12014 deletions
diff --git a/patches.drivers/0005-md-r5cache-caching-phase-of-r5cache.patch b/patches.drivers/0005-md-r5cache-caching-phase-of-r5cache.patch index 9adcf107fa..bb9945ae06 100644 --- a/patches.drivers/0005-md-r5cache-caching-phase-of-r5cache.patch +++ b/patches.drivers/0005-md-r5cache-caching-phase-of-r5cache.patch @@ -201,7 +201,7 @@ Signed-off-by: Coly Li <colyli@suse.de> continue; if (i == sh->pd_idx || i == sh->qd_idx) continue; -@@ -550,8 +631,10 @@ int r5l_write_stripe(struct r5l_log *log +@@ -551,8 +632,10 @@ int r5l_write_stripe(struct r5l_log *log for (i = 0; i < sh->disks; i++) { void *addr; @@ -213,7 +213,7 @@ Signed-off-by: Coly Li <colyli@suse.de> write_disks++; /* checksum is already calculated in last run */ if (test_bit(STRIPE_LOG_TRAPPED, &sh->state)) -@@ -817,7 +900,6 @@ static void r5l_write_super_and_discard_ +@@ -827,7 +910,6 @@ static void r5l_write_super_and_discard_ } } @@ -221,7 +221,7 @@ Signed-off-by: Coly Li <colyli@suse.de> static void r5l_do_reclaim(struct r5l_log *log) { sector_t reclaim_target = xchg(&log->reclaim_target, 0); -@@ -1218,12 +1300,80 @@ int r5c_try_caching_write(struct r5conf +@@ -1228,12 +1310,80 @@ int r5c_try_caching_write(struct r5conf int disks) { struct r5l_log *log = conf->log; @@ -305,7 +305,7 @@ Signed-off-by: Coly Li <colyli@suse.de> } /* -@@ -1234,6 +1384,9 @@ void r5c_finish_stripe_write_out(struct +@@ -1244,6 +1394,9 @@ void r5c_finish_stripe_write_out(struct struct stripe_head *sh, struct stripe_head_state *s) { @@ -315,7 +315,7 @@ Signed-off-by: Coly Li <colyli@suse.de> if (!conf->log || !test_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags)) return; -@@ -1243,7 +1396,78 @@ void r5c_finish_stripe_write_out(struct +@@ -1253,7 +1406,78 @@ void r5c_finish_stripe_write_out(struct if (conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH) return; @@ -397,7 +397,7 @@ Signed-off-by: Coly Li <colyli@suse.de> --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c -@@ -218,8 +218,17 @@ static void raid5_wakeup_stripe_thread(s +@@ -290,8 +290,17 @@ static void raid5_wakeup_stripe_thread(s static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh, struct list_head *temp_inactive_list) { @@ -415,7 +415,7 @@ Signed-off-by: Coly Li <colyli@suse.de> if (test_bit(STRIPE_HANDLE, &sh->state)) { if (test_bit(STRIPE_DELAYED, &sh->state) && !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) -@@ -245,8 +254,29 @@ static void do_release_stripe(struct r5c +@@ -317,8 +326,29 @@ static void do_release_stripe(struct r5c < IO_THRESHOLD) md_wakeup_thread(conf->mddev->thread); atomic_dec(&conf->active_stripes); @@ -447,11 +447,11 @@ Signed-off-by: Coly Li <colyli@suse.de> } } -@@ -830,8 +860,17 @@ static void ops_run_io(struct stripe_hea +@@ -902,8 +932,17 @@ static void ops_run_io(struct stripe_hea might_sleep(); -- if (r5l_write_stripe(conf->log, sh) == 0) +- if (log_stripe(sh, s) == 0) - return; + if (!test_bit(STRIPE_R5C_CACHING, &sh->state)) { + /* writing out phase */ @@ -462,12 +462,12 @@ Signed-off-by: Coly Li <colyli@suse.de> + r5c_cache_data(conf->log, sh, s); + return; + } -+ } ++ } + for (i = disks; i--; ) { int op, op_flags = 0; int replace_only = 0; -@@ -1044,7 +1083,7 @@ again: +@@ -1116,7 +1155,7 @@ again: static struct dma_async_tx_descriptor * async_copy_data(int frombio, struct bio *bio, struct page **page, sector_t sector, struct dma_async_tx_descriptor *tx, @@ -476,7 +476,7 @@ Signed-off-by: Coly Li <colyli@suse.de> { struct bio_vec bvl; struct bvec_iter iter; -@@ -1084,7 +1123,8 @@ async_copy_data(int frombio, struct bio +@@ -1156,7 +1195,8 @@ async_copy_data(int frombio, struct bio if (frombio) { if (sh->raid_conf->skip_copy && b_offset == 0 && page_offset == 0 && @@ -486,7 +486,7 @@ Signed-off-by: Coly Li <colyli@suse.de> *page = bio_page; else tx = async_memcpy(*page, bio_page, page_offset, -@@ -1166,7 +1206,7 @@ static void ops_run_biofill(struct strip +@@ -1238,7 +1278,7 @@ static void ops_run_biofill(struct strip while (rbi && rbi->bi_iter.bi_sector < dev->sector + STRIPE_SECTORS) { tx = async_copy_data(0, rbi, &dev->page, @@ -495,7 +495,7 @@ Signed-off-by: Coly Li <colyli@suse.de> rbi = r5_next_bio(rbi, dev->sector); } } -@@ -1293,10 +1333,15 @@ static int set_syndrome_sources(struct p +@@ -1365,10 +1405,15 @@ static int set_syndrome_sources(struct p if (i == sh->qd_idx || i == sh->pd_idx || (srctype == SYNDROME_SRC_ALL) || (srctype == SYNDROME_SRC_WANT_DRAIN && @@ -514,7 +514,7 @@ Signed-off-by: Coly Li <colyli@suse.de> i = raid6_next_disk(i, disks); } while (i != d0_idx); -@@ -1475,6 +1520,13 @@ static void ops_complete_prexor(void *st +@@ -1547,6 +1592,13 @@ static void ops_complete_prexor(void *st pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector); @@ -528,7 +528,7 @@ Signed-off-by: Coly Li <colyli@suse.de> } static struct dma_async_tx_descriptor * -@@ -1496,7 +1548,9 @@ ops_run_prexor5(struct stripe_head *sh, +@@ -1568,7 +1620,9 @@ ops_run_prexor5(struct stripe_head *sh, for (i = disks; i--; ) { struct r5dev *dev = &sh->dev[i]; /* Only process blocks that are known to be uptodate */ @@ -539,7 +539,7 @@ Signed-off-by: Coly Li <colyli@suse.de> xor_srcs[count++] = dev->page; } -@@ -1530,6 +1584,7 @@ ops_run_prexor6(struct stripe_head *sh, +@@ -1602,6 +1656,7 @@ ops_run_prexor6(struct stripe_head *sh, static struct dma_async_tx_descriptor * ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) { @@ -547,7 +547,7 @@ Signed-off-by: Coly Li <colyli@suse.de> int disks = sh->disks; int i; struct stripe_head *head_sh = sh; -@@ -1547,6 +1602,11 @@ ops_run_biodrain(struct stripe_head *sh, +@@ -1619,6 +1674,11 @@ ops_run_biodrain(struct stripe_head *sh, again: dev = &sh->dev[i]; @@ -559,7 +559,7 @@ Signed-off-by: Coly Li <colyli@suse.de> spin_lock_irq(&sh->stripe_lock); chosen = dev->towrite; dev->towrite = NULL; -@@ -1566,8 +1626,10 @@ again: +@@ -1638,8 +1698,10 @@ again: set_bit(R5_Discard, &dev->flags); else { tx = async_copy_data(1, wbi, &dev->page, @@ -572,7 +572,7 @@ Signed-off-by: Coly Li <colyli@suse.de> set_bit(R5_SkipCopy, &dev->flags); clear_bit(R5_UPTODATE, &dev->flags); clear_bit(R5_OVERWRITE, &dev->flags); -@@ -1675,7 +1737,8 @@ again: +@@ -1747,7 +1809,8 @@ again: xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; for (i = disks; i--; ) { struct r5dev *dev = &sh->dev[i]; @@ -582,7 +582,7 @@ Signed-off-by: Coly Li <colyli@suse.de> xor_srcs[count++] = dev->page; } } else { -@@ -2796,6 +2859,13 @@ schedule_reconstruction(struct stripe_he +@@ -2868,6 +2931,13 @@ schedule_reconstruction(struct stripe_he int level = conf->level; if (rcw) { @@ -596,7 +596,7 @@ Signed-off-by: Coly Li <colyli@suse.de> for (i = disks; i--; ) { struct r5dev *dev = &sh->dev[i]; -@@ -2806,6 +2876,9 @@ schedule_reconstruction(struct stripe_he +@@ -2878,6 +2948,9 @@ schedule_reconstruction(struct stripe_he if (!expand) clear_bit(R5_UPTODATE, &dev->flags); s->locked++; @@ -606,7 +606,7 @@ Signed-off-by: Coly Li <colyli@suse.de> } } /* if we are not expanding this is a proper write request, and -@@ -2845,6 +2918,9 @@ schedule_reconstruction(struct stripe_he +@@ -2917,6 +2990,9 @@ schedule_reconstruction(struct stripe_he set_bit(R5_LOCKED, &dev->flags); clear_bit(R5_UPTODATE, &dev->flags); s->locked++; @@ -616,7 +616,7 @@ Signed-off-by: Coly Li <colyli@suse.de> } } if (!s->locked) -@@ -3516,9 +3592,12 @@ static void handle_stripe_dirtying(struc +@@ -3588,9 +3664,12 @@ static void handle_stripe_dirtying(struc } else for (i = disks; i--; ) { /* would I have to read this buffer for read_modify_write */ struct r5dev *dev = &sh->dev[i]; @@ -631,7 +631,7 @@ Signed-off-by: Coly Li <colyli@suse.de> test_bit(R5_Wantcompute, &dev->flags))) { if (test_bit(R5_Insync, &dev->flags)) rmw++; -@@ -3530,13 +3609,15 @@ static void handle_stripe_dirtying(struc +@@ -3602,13 +3681,15 @@ static void handle_stripe_dirtying(struc i != sh->pd_idx && i != sh->qd_idx && !test_bit(R5_LOCKED, &dev->flags) && !(test_bit(R5_UPTODATE, &dev->flags) || @@ -648,7 +648,7 @@ Signed-off-by: Coly Li <colyli@suse.de> pr_debug("for sector %llu, rmw=%d rcw=%d\n", (unsigned long long)sh->sector, rmw, rcw); set_bit(STRIPE_HANDLE, &sh->state); -@@ -3548,10 +3629,24 @@ static void handle_stripe_dirtying(struc +@@ -3620,10 +3701,24 @@ static void handle_stripe_dirtying(struc (unsigned long long)sh->sector, rmw); for (i = disks; i--; ) { struct r5dev *dev = &sh->dev[i]; @@ -676,7 +676,7 @@ Signed-off-by: Coly Li <colyli@suse.de> test_bit(R5_Insync, &dev->flags)) { if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { -@@ -3577,6 +3672,7 @@ static void handle_stripe_dirtying(struc +@@ -3649,6 +3744,7 @@ static void handle_stripe_dirtying(struc i != sh->pd_idx && i != sh->qd_idx && !test_bit(R5_LOCKED, &dev->flags) && !(test_bit(R5_UPTODATE, &dev->flags) || @@ -684,7 +684,7 @@ Signed-off-by: Coly Li <colyli@suse.de> test_bit(R5_Wantcompute, &dev->flags))) { rcw++; if (test_bit(R5_Insync, &dev->flags) && -@@ -3616,7 +3712,7 @@ static void handle_stripe_dirtying(struc +@@ -3688,7 +3784,7 @@ static void handle_stripe_dirtying(struc */ if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) && (s->locked == 0 && (rcw == 0 || rmw == 0) && @@ -693,7 +693,7 @@ Signed-off-by: Coly Li <colyli@suse.de> schedule_reconstruction(sh, s, rcw == 0, 0); } -@@ -4110,6 +4206,8 @@ static void analyse_stripe(struct stripe +@@ -4182,6 +4278,8 @@ static void analyse_stripe(struct stripe if (test_bit(R5_InJournal, &dev->flags)) s->injournal++; @@ -702,7 +702,7 @@ Signed-off-by: Coly Li <colyli@suse.de> } if (test_bit(STRIPE_SYNCING, &sh->state)) { /* If there is a failed device being replaced, -@@ -4338,7 +4436,8 @@ static void handle_stripe(struct stripe_ +@@ -4410,7 +4508,8 @@ static void handle_stripe(struct stripe_ struct r5dev *dev = &sh->dev[i]; if (test_bit(R5_LOCKED, &dev->flags) && (i == sh->pd_idx || i == sh->qd_idx || @@ -712,9 +712,9 @@ Signed-off-by: Coly Li <colyli@suse.de> pr_debug("Writing block %d\n", i); set_bit(R5_Wantwrite, &dev->flags); if (prexor) -@@ -4378,6 +4477,10 @@ static void handle_stripe(struct stripe_ - test_bit(R5_Discard, &qdev->flags)))))) - handle_stripe_clean_event(conf, sh, disks, &s.return_bi); +@@ -4452,6 +4551,10 @@ static void handle_stripe(struct stripe_ + + log_stripe_write_finished(sh); + if (s.just_cached) + r5c_handle_cached_data_endio(conf, sh, disks, &s.return_bi); @@ -723,7 +723,7 @@ Signed-off-by: Coly Li <colyli@suse.de> /* Now we might consider reading some blocks, either to check/generate * parity, or to satisfy requests * or to load a block that is being partially written. -@@ -6537,6 +6640,11 @@ static struct r5conf *setup_conf(struct +@@ -6611,6 +6714,11 @@ static struct r5conf *setup_conf(struct for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) INIT_LIST_HEAD(conf->temp_inactive_list + i); @@ -759,7 +759,7 @@ Signed-off-by: Coly Li <colyli@suse.de> }; #define STRIPE_EXPAND_SYNC_FLAGS \ -@@ -620,6 +626,12 @@ struct r5conf { +@@ -543,6 +549,12 @@ struct r5conf { */ atomic_t active_stripes; struct list_head inactive_list[NR_STRIPE_HASH_LOCKS]; @@ -772,4 +772,3 @@ Signed-off-by: Coly Li <colyli@suse.de> atomic_t empty_inactive_list_nr; struct llist_head released_stripes; wait_queue_head_t wait_for_quiescent; - diff --git a/patches.drivers/0006-md-r5cache-write-out-phase-and-reclaim-support.patch b/patches.drivers/0006-md-r5cache-write-out-phase-and-reclaim-support.patch index 712c4257cd..358fe100b6 100644 --- a/patches.drivers/0006-md-r5cache-write-out-phase-and-reclaim-support.patch +++ b/patches.drivers/0006-md-r5cache-write-out-phase-and-reclaim-support.patch @@ -1,3 +1,4 @@ +From a39f7afde358ca89e9fc09a5525d3f8631a98a3a Mon Sep 17 00:00:00 2001 From: Song Liu <songliubraving@fb.com> Date: Thu, 17 Nov 2016 15:24:40 -0800 Subject: [PATCH] md/r5cache: write-out phase and reclaim support @@ -64,15 +65,25 @@ Shaohua Li <shli@fb.com>. Signed-off-by: Song Liu <songliubraving@fb.com> Signed-off-by: Shaohua Li <shli@fb.com> Signed-off-by: Coly Li <colyli@suse.de> + --- - drivers/md/raid5-cache.c | 411 +++++++++++++++++++++++++++++++++++++++++++---- + drivers/md/raid5-cache.c | 412 +++++++++++++++++++++++++++++++++++++++++++---- + drivers/md/raid5-log.h | 4 drivers/md/raid5.c | 21 ++ - drivers/md/raid5.h | 39 +++- - 3 files changed, 430 insertions(+), 41 deletions(-) + drivers/md/raid5.h | 34 ++- + 4 files changed, 429 insertions(+), 42 deletions(-) --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c -@@ -29,12 +29,21 @@ +@@ -20,6 +20,7 @@ + #include <linux/random.h> + #include "md.h" + #include "raid5.h" ++#include "raid5-log.h" + #include "bitmap.h" + + /* +@@ -29,12 +30,21 @@ #define BLOCK_SECTORS (8) /* @@ -96,7 +107,7 @@ Signed-off-by: Coly Li <colyli@suse.de> /* * We only need 2 bios per I/O unit to make progress, but ensure we * have a few more available to not get too tight. -@@ -141,6 +150,12 @@ struct r5l_log { +@@ -141,6 +151,12 @@ struct r5l_log { /* for r5c_cache */ enum r5c_journal_mode r5c_journal_mode; @@ -109,7 +120,7 @@ Signed-off-by: Coly Li <colyli@suse.de> }; /* -@@ -256,11 +271,109 @@ void r5c_handle_cached_data_endio(struct +@@ -256,11 +272,109 @@ void r5c_handle_cached_data_endio(struct } } @@ -220,7 +231,7 @@ Signed-off-by: Coly Li <colyli@suse.de> { struct r5conf *conf = sh->raid_conf; struct r5l_log *log = conf->log; -@@ -440,6 +553,7 @@ static void r5_reserve_log_entry(struct +@@ -440,6 +554,7 @@ static void r5_reserve_log_entry(struct { log->log_start = r5l_ring_add(log, log->log_start, BLOCK_SECTORS); @@ -228,7 +239,7 @@ Signed-off-by: Coly Li <colyli@suse.de> /* * If we filled up the log device start from the beginning again, * which will require a new bio. -@@ -600,21 +714,43 @@ static int r5l_log_stripe(struct r5l_log +@@ -600,21 +715,43 @@ static int r5l_log_stripe(struct r5l_log atomic_inc(&io->pending_stripe); sh->log_io = io; @@ -273,7 +284,7 @@ Signed-off-by: Coly Li <colyli@suse.de> if (!log) return -EAGAIN; -@@ -658,22 +794,49 @@ int r5l_write_stripe(struct r5l_log *log +@@ -658,22 +795,49 @@ int r5l_write_stripe(struct r5l_log *log mutex_lock(&log->io_mutex); /* meta + data */ reserve = (1 + write_disks) << (PAGE_SHIFT - 9); @@ -334,7 +345,7 @@ Signed-off-by: Coly Li <colyli@suse.de> return 0; } -@@ -720,10 +883,40 @@ static void r5l_run_no_space_stripes(str +@@ -720,10 +884,40 @@ static void r5l_run_no_space_stripes(str spin_unlock(&log->no_space_stripes_lock); } @@ -376,7 +387,7 @@ Signed-off-by: Coly Li <colyli@suse.de> } static void r5l_run_no_mem_stripe(struct r5l_log *log) -@@ -769,6 +962,7 @@ static bool r5l_complete_finished_ios(st +@@ -769,6 +963,7 @@ static bool r5l_complete_finished_ios(st static void __r5l_stripe_write_finished(struct r5l_io_unit *io) { struct r5l_log *log = io->log; @@ -384,7 +395,7 @@ Signed-off-by: Coly Li <colyli@suse.de> unsigned long flags; spin_lock_irqsave(&log->io_list_lock, flags); -@@ -779,7 +973,8 @@ static void __r5l_stripe_write_finished( +@@ -779,7 +974,8 @@ static void __r5l_stripe_write_finished( return; } @@ -394,7 +405,7 @@ Signed-off-by: Coly Li <colyli@suse.de> r5l_wake_reclaim(log, 0); spin_unlock_irqrestore(&log->io_list_lock, flags); -@@ -900,14 +1095,146 @@ static void r5l_write_super_and_discard_ +@@ -900,14 +1096,146 @@ static void r5l_write_super_and_discard_ } } @@ -542,7 +553,7 @@ Signed-off-by: Coly Li <colyli@suse.de> /* * move proper io_unit to reclaim list. We should not change the order. * reclaimable/unreclaimable io_unit can be mixed in the list, we -@@ -928,12 +1255,12 @@ static void r5l_do_reclaim(struct r5l_lo +@@ -928,12 +1256,12 @@ static void r5l_do_reclaim(struct r5l_lo log->io_list_lock); } @@ -558,7 +569,7 @@ Signed-off-by: Coly Li <colyli@suse.de> return; /* -@@ -945,7 +1272,7 @@ static void r5l_do_reclaim(struct r5l_lo +@@ -945,7 +1273,7 @@ static void r5l_do_reclaim(struct r5l_lo mutex_lock(&log->io_mutex); log->last_checkpoint = next_checkpoint; @@ -567,7 +578,7 @@ Signed-off-by: Coly Li <colyli@suse.de> mutex_unlock(&log->io_mutex); r5l_run_no_space_stripes(log); -@@ -959,14 +1286,17 @@ static void r5l_reclaim_thread(struct md +@@ -959,14 +1287,17 @@ static void r5l_reclaim_thread(struct md if (!log) return; @@ -586,7 +597,7 @@ Signed-off-by: Coly Li <colyli@suse.de> do { target = log->reclaim_target; if (new < target) -@@ -990,11 +1320,12 @@ void r5l_quiesce(struct r5l_log *log, in +@@ -990,11 +1321,12 @@ void r5l_quiesce(struct r5l_log *log, in return; log->reclaim_thread = md_register_thread(r5l_reclaim_thread, log->rdev->mddev, "reclaim"); @@ -600,7 +611,7 @@ Signed-off-by: Coly Li <colyli@suse.de> md_unregister_thread(&log->reclaim_thread); r5l_do_reclaim(log); } -@@ -1415,12 +1746,22 @@ void r5c_finish_stripe_write_out(struct +@@ -1415,12 +1747,22 @@ void r5c_finish_stripe_write_out(struct if (do_wakeup) wake_up(&conf->wait_for_overlap); @@ -623,7 +634,7 @@ Signed-off-by: Coly Li <colyli@suse.de> int pages = 0; int reserve; int i; -@@ -1451,12 +1792,15 @@ r5c_cache_data(struct r5l_log *log, stru +@@ -1451,12 +1793,15 @@ r5c_cache_data(struct r5l_log *log, stru mutex_lock(&log->io_mutex); /* meta + data */ reserve = (1 + pages) << (PAGE_SHIFT - 9); @@ -644,7 +655,7 @@ Signed-off-by: Coly Li <colyli@suse.de> } else { ret = r5l_log_stripe(log, sh, pages, 0); if (ret) { -@@ -1470,7 +1814,6 @@ r5c_cache_data(struct r5l_log *log, stru +@@ -1470,7 +1815,6 @@ r5c_cache_data(struct r5l_log *log, stru return 0; } @@ -652,7 +663,7 @@ Signed-off-by: Coly Li <colyli@suse.de> static int r5l_load_log(struct r5l_log *log) { struct md_rdev *rdev = log->rdev; -@@ -1530,6 +1873,9 @@ create: +@@ -1530,6 +1874,9 @@ create: log->max_free_space = RECLAIM_MAX_FREE_SPACE; log->last_checkpoint = cp; log->next_checkpoint = cp; @@ -662,7 +673,7 @@ Signed-off-by: Coly Li <colyli@suse.de> __free_page(page); -@@ -1600,6 +1946,8 @@ int r5l_init_log(struct r5conf *conf, st +@@ -1604,6 +1951,8 @@ int r5l_init_log(struct r5conf *conf, st log->rdev->mddev, "reclaim"); if (!log->reclaim_thread) goto reclaim_thread; @@ -671,7 +682,7 @@ Signed-off-by: Coly Li <colyli@suse.de> init_waitqueue_head(&log->iounit_wait); INIT_LIST_HEAD(&log->no_mem_stripes); -@@ -1608,6 +1956,9 @@ int r5l_init_log(struct r5conf *conf, st +@@ -1612,6 +1961,9 @@ int r5l_init_log(struct r5conf *conf, st spin_lock_init(&log->no_space_stripes_lock); log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH; @@ -681,9 +692,24 @@ Signed-off-by: Coly Li <colyli@suse.de> if (r5l_load_log(log)) goto error; +--- a/drivers/md/raid5-log.h ++++ b/drivers/md/raid5-log.h +@@ -19,9 +19,11 @@ r5c_finish_stripe_write_out(struct r5con + struct stripe_head_state *s); + extern void r5c_release_extra_page(struct stripe_head *sh); + extern void r5c_use_extra_page(struct stripe_head *sh); ++extern void r5l_wake_reclaim(struct r5l_log *log, sector_t space); + extern void r5c_handle_cached_data_endio(struct r5conf *conf, + struct stripe_head *sh, int disks, struct bio_list *return_bi); +-extern int r5c_cache_data(struct r5l_log *log, struct stripe_head *sh); ++extern int r5c_cache_data(struct r5l_log *log, struct stripe_head *sh, ++ struct stripe_head_state *s); + extern void r5c_make_stripe_write_out(struct stripe_head *sh); + extern void r5c_flush_cache(struct r5conf *conf, int num); + extern void r5c_check_stripe_cache_usage(struct r5conf *conf); --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c -@@ -228,6 +228,16 @@ static void do_release_stripe(struct r5c +@@ -229,6 +229,16 @@ static void do_release_stripe(struct r5c for (i = sh->disks; i--; ) if (test_bit(R5_InJournal, &sh->dev[i].flags)) injournal++; @@ -700,7 +726,7 @@ Signed-off-by: Coly Li <colyli@suse.de> if (test_bit(STRIPE_HANDLE, &sh->state)) { if (test_bit(STRIPE_DELAYED, &sh->state) && -@@ -268,6 +278,7 @@ static void do_release_stripe(struct r5c +@@ -269,6 +279,7 @@ static void do_release_stripe(struct r5c if (test_and_clear_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state)) atomic_dec(&conf->r5c_cached_partial_stripes); list_add_tail(&sh->lru, &conf->r5c_full_stripe_list); @@ -708,7 +734,7 @@ Signed-off-by: Coly Li <colyli@suse.de> } else { /* partial stripe */ if (!test_and_set_bit(STRIPE_R5C_PARTIAL_STRIPE, -@@ -639,9 +650,12 @@ raid5_get_active_stripe(struct r5conf *c +@@ -640,9 +651,12 @@ raid5_get_active_stripe(struct r5conf *c } if (noblock && sh == NULL) break; @@ -721,7 +747,7 @@ Signed-off-by: Coly Li <colyli@suse.de> wait_event_lock_irq( conf->wait_for_stripe, !list_empty(conf->inactive_list + hash) && -@@ -1992,7 +2006,9 @@ static struct stripe_head *alloc_stripe( +@@ -1993,7 +2007,9 @@ static struct stripe_head *alloc_stripe( spin_lock_init(&sh->batch_lock); INIT_LIST_HEAD(&sh->batch_list); INIT_LIST_HEAD(&sh->lru); @@ -731,7 +757,7 @@ Signed-off-by: Coly Li <colyli@suse.de> for (i = 0; i < disks; i++) { struct r5dev *dev = &sh->dev[i]; -@@ -4759,6 +4775,10 @@ static int raid5_congested(struct mddev +@@ -4762,6 +4778,10 @@ static int raid5_congested(struct mddev if (test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) return 1; @@ -742,7 +768,7 @@ Signed-off-by: Coly Li <colyli@suse.de> if (conf->quiesce) return 1; if (atomic_read(&conf->empty_inactive_list_nr)) -@@ -7706,6 +7726,7 @@ static void raid5_quiesce(struct mddev * +@@ -7697,6 +7717,7 @@ static void raid5_quiesce(struct mddev * /* '2' tells resync/reshape to pause so that all * active stripes can drain */ @@ -807,13 +833,3 @@ Signed-off-by: Coly Li <colyli@suse.de> struct shrinker shrinker; int pool_size; /* number of disks in stripeheads in pool */ spinlock_t device_lock; -@@ -746,4 +758,9 @@ extern void r5l_stripe_write_finished(st - extern int r5l_handle_flush_request(struct r5l_log *log, struct bio *bio); - extern void r5l_quiesce(struct r5l_log *log, int state); - extern bool r5l_log_disk_error(struct r5conf *conf); -+extern void r5l_wake_reclaim(struct r5l_log *log, sector_t space); -+extern void r5c_make_stripe_write_out(struct stripe_head *sh); -+extern void r5c_flush_cache(struct r5conf *conf, int num); -+extern void r5c_check_stripe_cache_usage(struct r5conf *conf); -+extern void r5c_check_cached_full_stripe(struct r5conf *conf); - #endif diff --git a/patches.drivers/0007-md-r5cache-sysfs-entry-journal_mode.patch b/patches.drivers/0007-md-r5cache-sysfs-entry-journal_mode.patch index bb2a1b602a..20ffc4b406 100644 --- a/patches.drivers/0007-md-r5cache-sysfs-entry-journal_mode.patch +++ b/patches.drivers/0007-md-r5cache-sysfs-entry-journal_mode.patch @@ -110,7 +110,7 @@ Signed-off-by: Coly Li <colyli@suse.de> * be called in write-back mode. --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c -@@ -6319,6 +6319,7 @@ static struct attribute *raid5_attrs[] = +@@ -6322,6 +6322,7 @@ static struct attribute *raid5_attrs[] = &raid5_group_thread_cnt.attr, &raid5_skip_copy.attr, &raid5_rmw_level.attr, @@ -120,9 +120,9 @@ Signed-off-by: Coly Li <colyli@suse.de> static struct attribute_group raid5_attrs_group = { --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h -@@ -763,4 +763,5 @@ extern void r5c_make_stripe_write_out(st - extern void r5c_flush_cache(struct r5conf *conf, int num); - extern void r5c_check_stripe_cache_usage(struct r5conf *conf); - extern void r5c_check_cached_full_stripe(struct r5conf *conf); +@@ -749,4 +749,5 @@ extern sector_t raid5_compute_sector(str + extern struct stripe_head * + raid5_get_active_stripe(struct r5conf *conf, sector_t sector, + int previous, int noblock, int noquiesce); +extern struct md_sysfs_entry r5c_journal_mode; #endif diff --git a/patches.drivers/0009-md-r5cache-disable-write-back-for-degraded-array.patch b/patches.drivers/0009-md-r5cache-disable-write-back-for-degraded-array.patch index 1c4a3427fa..b199ca90d8 100644 --- a/patches.drivers/0009-md-r5cache-disable-write-back-for-degraded-array.patch +++ b/patches.drivers/0009-md-r5cache-disable-write-back-for-degraded-array.patch @@ -24,14 +24,15 @@ Signed-off-by: Shaohua Li <shli@fb.com> Signed-off-by: Coly Li <colyli@suse.de> --- - drivers/md/raid5-cache.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ - drivers/md/raid5.c | 15 ++++++++------- - drivers/md/raid5.h | 2 ++ - 3 files changed, 56 insertions(+), 7 deletions(-) + drivers/md/raid5-cache.c | 53 +++++++++++++++++++++++++++++++++++++++++------ + drivers/md/raid5-log.h | 4 +-- + drivers/md/raid5.c | 15 +++++++------ + drivers/md/raid5.h | 2 + + 4 files changed, 59 insertions(+), 15 deletions(-) --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c -@@ -161,6 +161,8 @@ struct r5l_log { +@@ -163,6 +163,8 @@ struct r5l_log { /* to submit async io_units, to fulfill ordering of flush */ struct work_struct deferred_io_work; @@ -40,7 +41,7 @@ Signed-off-by: Coly Li <colyli@suse.de> }; /* -@@ -603,6 +605,21 @@ static void r5l_submit_io_async(struct w +@@ -612,6 +614,21 @@ static void r5l_submit_io_async(struct w r5l_do_submit_io(log, io); } @@ -62,7 +63,7 @@ Signed-off-by: Coly Li <colyli@suse.de> static void r5l_submit_current_io(struct r5l_log *log) { struct r5l_io_unit *io = log->current_io; -@@ -1793,6 +1810,10 @@ static ssize_t r5c_journal_mode_store(st +@@ -2270,6 +2287,10 @@ static ssize_t r5c_journal_mode_store(st val > R5C_JOURNAL_MODE_WRITE_BACK) return -EINVAL; @@ -73,7 +74,7 @@ Signed-off-by: Coly Li <colyli@suse.de> mddev_suspend(mddev); conf->log->r5c_journal_mode = val; mddev_resume(mddev); -@@ -1847,6 +1868,16 @@ int r5c_try_caching_write(struct r5conf +@@ -2324,6 +2345,16 @@ int r5c_try_caching_write(struct r5conf set_bit(STRIPE_R5C_CACHING, &sh->state); } @@ -90,7 +91,7 @@ Signed-off-by: Coly Li <colyli@suse.de> for (i = disks; i--; ) { dev = &sh->dev[i]; /* if non-overwrite, use writing-out phase */ -@@ -2074,6 +2105,19 @@ ioerr: +@@ -2580,6 +2611,19 @@ ioerr: return ret; } @@ -110,7 +111,7 @@ Signed-off-by: Coly Li <colyli@suse.de> int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) { struct r5l_log *log; -@@ -2145,6 +2189,7 @@ int r5l_init_log(struct r5conf *conf, st +@@ -2655,6 +2699,7 @@ int r5l_init_log(struct r5conf *conf, st spin_lock_init(&log->no_space_stripes_lock); INIT_WORK(&log->deferred_io_work, r5l_submit_io_async); @@ -118,17 +119,45 @@ Signed-off-by: Coly Li <colyli@suse.de> log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH; INIT_LIST_HEAD(&log->stripe_in_journal_list); -@@ -2175,6 +2220,7 @@ io_kc: +@@ -2685,13 +2730,9 @@ io_kc: + return -EINVAL; + } - void r5l_exit_log(struct r5l_log *log) +-void r5l_exit_log(struct r5conf *conf) ++void r5l_exit_log(struct r5l_log *log) { +- struct r5l_log *log = conf->log; +- +- conf->log = NULL; +- synchronize_rcu(); +- + flush_work(&log->disable_writeback_work); md_unregister_thread(&log->reclaim_thread); mempool_destroy(log->meta_pool); bioset_free(log->bs); +--- a/drivers/md/raid5-log.h ++++ b/drivers/md/raid5-log.h +@@ -2,7 +2,7 @@ + #define _RAID5_LOG_H + + extern int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev); +-extern void r5l_exit_log(struct r5conf *conf); ++extern void r5l_exit_log(struct r5l_log *log); + extern int r5l_write_stripe(struct r5l_log *log, struct stripe_head *head_sh); + extern void r5l_write_stripe_run(struct r5l_log *log); + extern void r5l_flush_stripe_to_raid(struct r5l_log *log); +@@ -59,7 +59,7 @@ static inline void log_write_stripe_run( + static inline void log_exit(struct r5conf *conf) + { + if (conf->log) +- r5l_exit_log(conf); ++ r5l_exit_log(conf->log); + } + + static inline int log_init(struct r5conf *conf, struct md_rdev *journal_dev) --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c -@@ -554,7 +554,7 @@ static struct stripe_head *__find_stripe +@@ -555,7 +555,7 @@ static struct stripe_head *__find_stripe * of the two sections, and some non-in_sync devices may * be insync in the section most affected by failed devices. */ @@ -137,7 +166,7 @@ Signed-off-by: Coly Li <colyli@suse.de> { int degraded, degraded2; int i; -@@ -617,7 +617,7 @@ static int has_failed(struct r5conf *con +@@ -618,7 +618,7 @@ static int has_failed(struct r5conf *con if (conf->mddev->reshape_position == MaxSector) return conf->mddev->degraded > conf->max_degraded; @@ -146,7 +175,7 @@ Signed-off-by: Coly Li <colyli@suse.de> if (degraded > conf->max_degraded) return 1; return 0; -@@ -2524,7 +2524,7 @@ static void raid5_error(struct mddev *md +@@ -2559,7 +2559,7 @@ static void raid5_error(struct mddev *md spin_lock_irqsave(&conf->device_lock, flags); clear_bit(In_sync, &rdev->flags); @@ -155,7 +184,7 @@ Signed-off-by: Coly Li <colyli@suse.de> spin_unlock_irqrestore(&conf->device_lock, flags); set_bit(MD_RECOVERY_INTR, &mddev->recovery); -@@ -2538,6 +2538,7 @@ static void raid5_error(struct mddev *md +@@ -2573,6 +2573,7 @@ static void raid5_error(struct mddev *md bdevname(rdev->bdev, b), mdname(mddev), conf->raid_disks - mddev->degraded); @@ -163,7 +192,7 @@ Signed-off-by: Coly Li <colyli@suse.de> } /* -@@ -7012,7 +7013,7 @@ static int raid5_run(struct mddev *mddev +@@ -7135,7 +7136,7 @@ static int raid5_run(struct mddev *mddev /* * 0 for a fully functional array, 1 or 2 for a degraded array. */ @@ -172,7 +201,7 @@ Signed-off-by: Coly Li <colyli@suse.de> if (has_failed(conf)) { pr_crit("md/raid:%s: not enough operational devices (%d/%d failed)\n", -@@ -7258,7 +7259,7 @@ static int raid5_spare_active(struct mdd +@@ -7376,7 +7377,7 @@ static int raid5_spare_active(struct mdd } } spin_lock_irqsave(&conf->device_lock, flags); @@ -181,7 +210,7 @@ Signed-off-by: Coly Li <colyli@suse.de> spin_unlock_irqrestore(&conf->device_lock, flags); print_raid5_conf(conf); return count; -@@ -7616,7 +7617,7 @@ static int raid5_start_reshape(struct md +@@ -7727,7 +7728,7 @@ static int raid5_start_reshape(struct md * pre and post number of devices. */ spin_lock_irqsave(&conf->device_lock, flags); @@ -190,7 +219,7 @@ Signed-off-by: Coly Li <colyli@suse.de> spin_unlock_irqrestore(&conf->device_lock, flags); } mddev->raid_disks = conf->raid_disks; -@@ -7704,7 +7705,7 @@ static void raid5_finish_reshape(struct +@@ -7815,7 +7816,7 @@ static void raid5_finish_reshape(struct } else { int d; spin_lock_irq(&conf->device_lock); @@ -201,17 +230,11 @@ Signed-off-by: Coly Li <colyli@suse.de> d < conf->raid_disks - mddev->delta_disks; --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h -@@ -750,6 +750,7 @@ extern sector_t raid5_compute_sector(str +@@ -760,5 +760,7 @@ extern sector_t raid5_compute_sector(str extern struct stripe_head * raid5_get_active_stripe(struct r5conf *conf, sector_t sector, int previous, int noblock, int noquiesce); +extern int raid5_calc_degraded(struct r5conf *conf); - extern int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev); - extern void r5l_exit_log(struct r5l_log *log); - extern int r5l_write_stripe(struct r5l_log *log, struct stripe_head *head_sh); -@@ -765,4 +766,5 @@ extern void r5c_flush_cache(struct r5con - extern void r5c_check_stripe_cache_usage(struct r5conf *conf); - extern void r5c_check_cached_full_stripe(struct r5conf *conf); extern struct md_sysfs_entry r5c_journal_mode; +extern void r5c_update_on_rdev_error(struct mddev *mddev); #endif diff --git a/patches.drivers/0013-raid5-cache-suspend-reclaim-thread-instead-of-shutdo.patch b/patches.drivers/0013-raid5-cache-suspend-reclaim-thread-instead-of-shutdo.patch index d5be430865..3e42f0bbab 100644 --- a/patches.drivers/0013-raid5-cache-suspend-reclaim-thread-instead-of-shutdo.patch +++ b/patches.drivers/0013-raid5-cache-suspend-reclaim-thread-instead-of-shutdo.patch @@ -21,7 +21,7 @@ Signed-off-by: Coly Li <colyli@suse.de> --- a/drivers/md/md.c +++ b/drivers/md/md.c -@@ -7195,10 +7195,12 @@ static int md_thread(void *arg) +@@ -7203,10 +7203,12 @@ static int md_thread(void *arg) wait_event_interruptible_timeout (thread->wqueue, test_bit(THREAD_WAKEUP, &thread->flags) @@ -44,8 +44,8 @@ Signed-off-by: Coly Li <colyli@suse.de> +#include <linux/kthread.h> #include "md.h" #include "raid5.h" - #include "bitmap.h" -@@ -1454,23 +1455,14 @@ void r5l_quiesce(struct r5l_log *log, in + #include "raid5-log.h" +@@ -1438,23 +1439,14 @@ void r5l_quiesce(struct r5l_log *log, in struct mddev *mddev; if (!log || state == 2) return; diff --git a/patches.drivers/0015-md-r5cache-handle-alloc_page-failure.patch b/patches.drivers/0015-md-r5cache-handle-alloc_page-failure.patch index 3d0d78ae3e..01c0e27644 100644 --- a/patches.drivers/0015-md-r5cache-handle-alloc_page-failure.patch +++ b/patches.drivers/0015-md-r5cache-handle-alloc_page-failure.patch @@ -28,7 +28,7 @@ Signed-off-by: Coly Li <colyli@suse.de> --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c -@@ -2357,15 +2357,40 @@ int r5c_try_caching_write(struct r5conf +@@ -2326,15 +2326,40 @@ int r5c_try_caching_write(struct r5conf */ void r5c_release_extra_page(struct stripe_head *sh) { @@ -72,7 +72,7 @@ Signed-off-by: Coly Li <colyli@suse.de> /* --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c -@@ -876,6 +876,8 @@ static void ops_run_io(struct stripe_hea +@@ -889,6 +889,8 @@ static void ops_run_io(struct stripe_hea if (!test_bit(STRIPE_R5C_CACHING, &sh->state)) { /* writing out phase */ @@ -81,7 +81,7 @@ Signed-off-by: Coly Li <colyli@suse.de> if (r5l_write_stripe(conf->log, sh) == 0) return; } else { /* caching phase */ -@@ -2007,6 +2009,7 @@ static struct stripe_head *alloc_stripe( +@@ -2023,6 +2025,7 @@ static struct stripe_head *alloc_stripe( INIT_LIST_HEAD(&sh->batch_list); INIT_LIST_HEAD(&sh->lru); INIT_LIST_HEAD(&sh->r5c); @@ -89,7 +89,7 @@ Signed-off-by: Coly Li <colyli@suse.de> atomic_set(&sh->count, 1); sh->log_start = MaxSector; for (i = 0; i < disks; i++) { -@@ -2253,10 +2256,24 @@ static int resize_stripes(struct r5conf +@@ -2264,10 +2267,24 @@ static int resize_stripes(struct r5conf */ ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO); if (ndisks) { @@ -117,7 +117,7 @@ Signed-off-by: Coly Li <colyli@suse.de> } else err = -ENOMEM; -@@ -3581,10 +3598,10 @@ unhash: +@@ -3627,10 +3644,10 @@ unhash: break_stripe_batch_list(head_sh, STRIPE_EXPAND_SYNC_FLAGS); } @@ -132,7 +132,7 @@ Signed-off-by: Coly Li <colyli@suse.de> { int rmw = 0, rcw = 0, i; sector_t recovery_cp = conf->mddev->recovery_cp; -@@ -3650,12 +3667,32 @@ static void handle_stripe_dirtying(struc +@@ -3696,12 +3713,32 @@ static void handle_stripe_dirtying(struc dev->page == dev->orig_page && !test_bit(R5_LOCKED, &sh->dev[sh->pd_idx].flags)) { /* alloc page for prexor */ @@ -168,7 +168,7 @@ Signed-off-by: Coly Li <colyli@suse.de> if ((dev->towrite || i == sh->pd_idx || i == sh->qd_idx || test_bit(R5_InJournal, &dev->flags)) && -@@ -3731,6 +3768,7 @@ static void handle_stripe_dirtying(struc +@@ -3777,6 +3814,7 @@ static void handle_stripe_dirtying(struc (s->locked == 0 && (rcw == 0 || rmw == 0) && !test_bit(STRIPE_BIT_DELAY, &sh->state))) schedule_reconstruction(sh, s, rcw == 0, 0); @@ -176,7 +176,7 @@ Signed-off-by: Coly Li <colyli@suse.de> } static void handle_parity_checks5(struct r5conf *conf, struct stripe_head *sh, -@@ -4546,8 +4584,12 @@ static void handle_stripe(struct stripe_ +@@ -4594,8 +4632,12 @@ static void handle_stripe(struct stripe_ if (ret == -EAGAIN || /* stripe under reclaim: !caching && injournal */ (!test_bit(STRIPE_R5C_CACHING, &sh->state) && @@ -191,16 +191,16 @@ Signed-off-by: Coly Li <colyli@suse.de> } } -@@ -6461,6 +6503,8 @@ static void raid5_free_percpu(struct r5c +@@ -6509,6 +6551,8 @@ static void raid5_free_percpu(struct r5c static void free_conf(struct r5conf *conf) { + int i; + - if (conf->log) - r5l_exit_log(conf->log); + log_exit(conf); + if (conf->shrinker.nr_deferred) -@@ -6469,6 +6513,9 @@ static void free_conf(struct r5conf *con +@@ -6517,6 +6561,9 @@ static void free_conf(struct r5conf *con free_thread_groups(conf); shrink_stripes(conf); raid5_free_percpu(conf); @@ -210,7 +210,7 @@ Signed-off-by: Coly Li <colyli@suse.de> kfree(conf->disks); kfree(conf->stripe_hashtbl); kfree(conf); -@@ -6651,9 +6698,16 @@ static struct r5conf *setup_conf(struct +@@ -6699,9 +6746,16 @@ static struct r5conf *setup_conf(struct conf->disks = kzalloc(max_disks * sizeof(struct disk_info), GFP_KERNEL); @@ -229,7 +229,7 @@ Signed-off-by: Coly Li <colyli@suse.de> if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h -@@ -276,6 +276,7 @@ struct stripe_head_state { +@@ -282,6 +282,7 @@ struct stripe_head_state { struct md_rdev *blocked_rdev; int handle_bad_blocks; int log_failed; @@ -237,7 +237,7 @@ Signed-off-by: Coly Li <colyli@suse.de> }; /* Flags for struct r5dev.flags */ -@@ -439,6 +440,7 @@ enum { +@@ -446,6 +447,7 @@ enum { struct disk_info { struct md_rdev *rdev, *replacement; @@ -245,7 +245,7 @@ Signed-off-by: Coly Li <colyli@suse.de> }; /* -@@ -559,6 +561,9 @@ enum r5_cache_state { +@@ -572,6 +574,9 @@ enum r5_cache_state { * only process stripes that are already * occupying the log */ diff --git a/patches.drivers/0039-md-r5cache-enable-chunk_aligned_read-with-write-back.patch b/patches.drivers/0039-md-r5cache-enable-chunk_aligned_read-with-write-back.patch index d926e2709f..ef6b8ccdf4 100644 --- a/patches.drivers/0039-md-r5cache-enable-chunk_aligned_read-with-write-back.patch +++ b/patches.drivers/0039-md-r5cache-enable-chunk_aligned_read-with-write-back.patch @@ -57,8 +57,8 @@ Signed-off-by: Coly Li <colyli@suse.de> +#include <linux/types.h> #include "md.h" #include "raid5.h" - #include "bitmap.h" -@@ -164,9 +165,60 @@ struct r5l_log { + #include "raid5-log.h" +@@ -165,9 +166,60 @@ struct r5l_log { struct work_struct deferred_io_work; /* to disable write back during in degraded mode */ struct work_struct disable_writeback_work; @@ -119,7 +119,7 @@ Signed-off-by: Coly Li <colyli@suse.de> * an IO range starts from a meta data block and end at the next meta data * block. The io unit's the meta data block tracks data/parity followed it. io * unit is written to log disk with normal write, as we always flush log disk -@@ -412,16 +464,6 @@ void r5c_make_stripe_write_out(struct st +@@ -413,16 +465,6 @@ void r5c_make_stripe_write_out(struct st if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) atomic_inc(&conf->preread_active_stripes); @@ -136,7 +136,7 @@ Signed-off-by: Coly Li <colyli@suse.de> } static void r5c_handle_data_cached(struct stripe_head *sh) -@@ -2320,6 +2362,10 @@ int r5c_try_caching_write(struct r5conf +@@ -2321,6 +2363,10 @@ int r5c_try_caching_write(struct r5conf int i; struct r5dev *dev; int to_cache = 0; @@ -147,7 +147,7 @@ Signed-off-by: Coly Li <colyli@suse.de> BUG_ON(!r5c_is_writeback(log)); -@@ -2364,6 +2410,44 @@ int r5c_try_caching_write(struct r5conf +@@ -2365,6 +2411,44 @@ int r5c_try_caching_write(struct r5conf } } @@ -192,7 +192,7 @@ Signed-off-by: Coly Li <colyli@suse.de> for (i = disks; i--; ) { dev = &sh->dev[i]; if (dev->towrite) { -@@ -2438,17 +2522,20 @@ void r5c_finish_stripe_write_out(struct +@@ -2439,17 +2523,20 @@ void r5c_finish_stripe_write_out(struct struct stripe_head *sh, struct stripe_head_state *s) { @@ -216,7 +216,7 @@ Signed-off-by: Coly Li <colyli@suse.de> return; for (i = sh->disks; i--; ) { -@@ -2470,12 +2557,43 @@ void r5c_finish_stripe_write_out(struct +@@ -2471,12 +2558,43 @@ void r5c_finish_stripe_write_out(struct if (do_wakeup) wake_up(&conf->wait_for_overlap); @@ -264,7 +264,7 @@ Signed-off-by: Coly Li <colyli@suse.de> } int -@@ -2535,6 +2653,22 @@ r5c_cache_data(struct r5l_log *log, stru +@@ -2536,6 +2654,22 @@ r5c_cache_data(struct r5l_log *log, stru return 0; } @@ -287,7 +287,7 @@ Signed-off-by: Coly Li <colyli@suse.de> static int r5l_load_log(struct r5l_log *log) { struct md_rdev *rdev = log->rdev; -@@ -2680,6 +2814,9 @@ int r5l_init_log(struct r5conf *conf, st +@@ -2685,6 +2819,9 @@ int r5l_init_log(struct r5conf *conf, st if (!log->meta_pool) goto out_mempool; @@ -299,7 +299,7 @@ Signed-off-by: Coly Li <colyli@suse.de> if (!log->reclaim_thread) --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c -@@ -279,13 +279,13 @@ static void do_release_stripe(struct r5c +@@ -280,13 +280,13 @@ static void do_release_stripe(struct r5c atomic_dec(&conf->r5c_cached_partial_stripes); list_add_tail(&sh->lru, &conf->r5c_full_stripe_list); r5c_check_cached_full_stripe(conf); @@ -319,7 +319,7 @@ Signed-off-by: Coly Li <colyli@suse.de> } } } -@@ -5028,6 +5028,13 @@ static int raid5_read_one_chunk(struct m +@@ -5031,6 +5031,13 @@ static int raid5_read_one_chunk(struct m rdev->recovery_offset >= end_sector))) rdev = NULL; } @@ -333,7 +333,7 @@ Signed-off-by: Coly Li <colyli@suse.de> if (rdev) { sector_t first_bad; int bad_sectors; -@@ -5384,7 +5391,6 @@ static void raid5_make_request(struct md +@@ -5387,7 +5394,6 @@ static void raid5_make_request(struct md * data on failed drives. */ if (rw == READ && mddev->degraded == 0 && @@ -343,8 +343,8 @@ Signed-off-by: Coly Li <colyli@suse.de> if (!bi) --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h -@@ -777,4 +777,5 @@ extern void r5c_check_stripe_cache_usage - extern void r5c_check_cached_full_stripe(struct r5conf *conf); +@@ -763,4 +763,5 @@ raid5_get_active_stripe(struct r5conf *c + extern int raid5_calc_degraded(struct r5conf *conf); extern struct md_sysfs_entry r5c_journal_mode; extern void r5c_update_on_rdev_error(struct mddev *mddev); +extern bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect); diff --git a/patches.drivers/0046-md-raid5-sort-bios.patch b/patches.drivers/0046-md-raid5-sort-bios.patch deleted file mode 100644 index 852e0564e5..0000000000 --- a/patches.drivers/0046-md-raid5-sort-bios.patch +++ /dev/null @@ -1,286 +0,0 @@ -From: Shaohua Li <shli@fb.com> -Date: Fri, 3 Mar 2017 22:06:12 -0800 -Subject: [PATCH] md/raid5: sort bios -Git-commit: aaf9f12ebfafd1ea603d61ead6dbcf456a86e0f3 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -Previous patch (raid5: only dispatch IO from raid5d for harddisk raid) -defers IO dispatching. The goal is to create better IO pattern. At that -time, we don't sort the deffered IO and hope the block layer can do IO -merge and sort. Now the raid5-cache writeback could create large amount -of bios. And if we enable muti-thread for stripe handling, we can't -control when to dispatch IO to raid disks. In a lot of time, we are -dispatching IO which block layer can't do merge effectively. - -This patch moves further for the IO dispatching defer. We accumulate -bios, but we don't dispatch all the bios after a threshold is met. This -'dispatch partial portion of bios' stragety allows bios coming in a -large time window are sent to disks together. At the dispatching time, -there is large chance the block layer can merge the bios. To make this -more effective, we dispatch IO in ascending order. This increases -request merge chance and reduces disk seek. - -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> ---- - drivers/md/raid5.c | 138 +++++++++++++++++++++++++++++++++++++++++++---------- - drivers/md/raid5.h | 14 ++++- - 2 files changed, 126 insertions(+), 26 deletions(-) - ---- a/drivers/md/raid5.c -+++ b/drivers/md/raid5.c -@@ -56,6 +56,7 @@ - #include <linux/nodemask.h> - #include <linux/flex_array.h> - #include <trace/events/block.h> -+#include <linux/list_sort.h> - - #include "md.h" - #include "raid5.h" -@@ -861,41 +862,107 @@ static int use_new_offset(struct r5conf - return 1; - } - --static void flush_deferred_bios(struct r5conf *conf) -+static void dispatch_bio_list(struct bio_list *tmp) - { -- struct bio_list tmp; - struct bio *bio; - -- if (!conf->batch_bio_dispatch || !conf->group_cnt) -+ while ((bio = bio_list_pop(tmp))) -+ generic_make_request(bio); -+} -+ -+static int cmp_stripe(void *priv, struct list_head *a, struct list_head *b) -+{ -+ const struct r5pending_data *da = list_entry(a, -+ struct r5pending_data, sibling); -+ const struct r5pending_data *db = list_entry(b, -+ struct r5pending_data, sibling); -+ if (da->sector > db->sector) -+ return 1; -+ if (da->sector < db->sector) -+ return -1; -+ return 0; -+} -+ -+static void dispatch_defer_bios(struct r5conf *conf, int target, -+ struct bio_list *list) -+{ -+ struct r5pending_data *data; -+ struct list_head *first, *next = NULL; -+ int cnt = 0; -+ -+ if (conf->pending_data_cnt == 0) -+ return; -+ -+ list_sort(NULL, &conf->pending_list, cmp_stripe); -+ -+ first = conf->pending_list.next; -+ -+ /* temporarily move the head */ -+ if (conf->next_pending_data) -+ list_move_tail(&conf->pending_list, -+ &conf->next_pending_data->sibling); -+ -+ while (!list_empty(&conf->pending_list)) { -+ data = list_first_entry(&conf->pending_list, -+ struct r5pending_data, sibling); -+ if (&data->sibling == first) -+ first = data->sibling.next; -+ next = data->sibling.next; -+ -+ bio_list_merge(list, &data->bios); -+ list_move(&data->sibling, &conf->free_list); -+ cnt++; -+ if (cnt >= target) -+ break; -+ } -+ conf->pending_data_cnt -= cnt; -+ BUG_ON(conf->pending_data_cnt < 0 || cnt < target); -+ -+ if (next != &conf->pending_list) -+ conf->next_pending_data = list_entry(next, -+ struct r5pending_data, sibling); -+ else -+ conf->next_pending_data = NULL; -+ /* list isn't empty */ -+ if (first != &conf->pending_list) -+ list_move_tail(&conf->pending_list, first); -+} -+ -+static void flush_deferred_bios(struct r5conf *conf) -+{ -+ struct bio_list tmp = BIO_EMPTY_LIST; -+ -+ if (conf->pending_data_cnt == 0) - return; - -- bio_list_init(&tmp); - spin_lock(&conf->pending_bios_lock); -- bio_list_merge(&tmp, &conf->pending_bios); -- bio_list_init(&conf->pending_bios); -+ dispatch_defer_bios(conf, conf->pending_data_cnt, &tmp); -+ BUG_ON(conf->pending_data_cnt != 0); - spin_unlock(&conf->pending_bios_lock); - -- while ((bio = bio_list_pop(&tmp))) -- generic_make_request(bio); -+ dispatch_bio_list(&tmp); - } - --static void defer_bio_issue(struct r5conf *conf, struct bio *bio) -+static void defer_issue_bios(struct r5conf *conf, sector_t sector, -+ struct bio_list *bios) - { -- /* -- * change group_cnt will drain all bios, so this is safe -- * -- * A read generally means a read-modify-write, which usually means a -- * randwrite, so we don't delay it -- */ -- if (!conf->batch_bio_dispatch || !conf->group_cnt || -- bio_op(bio) == REQ_OP_READ) { -- generic_make_request(bio); -- return; -- } -+ struct bio_list tmp = BIO_EMPTY_LIST; -+ struct r5pending_data *ent; -+ - spin_lock(&conf->pending_bios_lock); -- bio_list_add(&conf->pending_bios, bio); -+ ent = list_first_entry(&conf->free_list, struct r5pending_data, -+ sibling); -+ list_move_tail(&ent->sibling, &conf->pending_list); -+ ent->sector = sector; -+ bio_list_init(&ent->bios); -+ bio_list_merge(&ent->bios, bios); -+ conf->pending_data_cnt++; -+ if (conf->pending_data_cnt >= PENDING_IO_MAX) -+ dispatch_defer_bios(conf, PENDING_IO_ONE_FLUSH, &tmp); -+ - spin_unlock(&conf->pending_bios_lock); -- md_wakeup_thread(conf->mddev->thread); -+ -+ dispatch_bio_list(&tmp); - } - - static void -@@ -908,6 +975,8 @@ static void ops_run_io(struct stripe_hea - struct r5conf *conf = sh->raid_conf; - int i, disks = sh->disks; - struct stripe_head *head_sh = sh; -+ struct bio_list pending_bios = BIO_EMPTY_LIST; -+ bool should_defer; - - might_sleep(); - -@@ -924,6 +993,8 @@ static void ops_run_io(struct stripe_hea - } - } - -+ should_defer = conf->batch_bio_dispatch && conf->group_cnt; -+ - for (i = disks; i--; ) { - int op, op_flags = 0; - int replace_only = 0; -@@ -1078,7 +1149,10 @@ again: - trace_block_bio_remap(bdev_get_queue(bi->bi_bdev), - bi, disk_devt(conf->mddev->gendisk), - sh->dev[i].sector); -- defer_bio_issue(conf, bi); -+ if (should_defer && op_is_write(op)) -+ bio_list_add(&pending_bios, bi); -+ else -+ generic_make_request(bi); - } - if (rrdev) { - if (s->syncing || s->expanding || s->expanded -@@ -1123,7 +1197,10 @@ again: - trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev), - rbi, disk_devt(conf->mddev->gendisk), - sh->dev[i].sector); -- defer_bio_issue(conf, rbi); -+ if (should_defer && op_is_write(op)) -+ bio_list_add(&pending_bios, rbi); -+ else -+ generic_make_request(rbi); - } - if (!rdev && !rrdev) { - if (op_is_write(op)) -@@ -1141,6 +1218,9 @@ again: - if (sh != head_sh) - goto again; - } -+ -+ if (should_defer && !bio_list_empty(&pending_bios)) -+ defer_issue_bios(conf, head_sh->sector, &pending_bios); - } - - static struct dma_async_tx_descriptor * -@@ -6653,6 +6733,7 @@ static void free_conf(struct r5conf *con - put_page(conf->disks[i].extra_page); - kfree(conf->disks); - kfree(conf->stripe_hashtbl); -+ kfree(conf->pending_data); - kfree(conf); - } - -@@ -6798,6 +6879,14 @@ static struct r5conf *setup_conf(struct - conf = kzalloc(sizeof(struct r5conf), GFP_KERNEL); - if (conf == NULL) - goto abort; -+ INIT_LIST_HEAD(&conf->free_list); -+ INIT_LIST_HEAD(&conf->pending_list); -+ conf->pending_data = kzalloc(sizeof(struct r5pending_data) * -+ PENDING_IO_MAX, GFP_KERNEL); -+ if (!conf->pending_data) -+ goto abort; -+ for (i = 0; i < PENDING_IO_MAX; i++) -+ list_add(&conf->pending_data[i].sibling, &conf->free_list); - /* Don't enable multi-threading by default*/ - if (!alloc_thread_groups(conf, 0, &group_cnt, &worker_cnt_per_group, - &new_group)) { -@@ -6821,7 +6910,6 @@ static struct r5conf *setup_conf(struct - atomic_set(&conf->active_stripes, 0); - atomic_set(&conf->preread_active_stripes, 0); - atomic_set(&conf->active_aligned_reads, 0); -- bio_list_init(&conf->pending_bios); - spin_lock_init(&conf->pending_bios_lock); - conf->batch_bio_dispatch = true; - rdev_for_each(rdev, mddev) { ---- a/drivers/md/raid5.h -+++ b/drivers/md/raid5.h -@@ -571,6 +571,14 @@ enum r5_cache_state { - */ - }; - -+#define PENDING_IO_MAX 512 -+#define PENDING_IO_ONE_FLUSH 128 -+struct r5pending_data { -+ struct list_head sibling; -+ sector_t sector; /* stripe sector */ -+ struct bio_list bios; -+}; -+ - struct r5conf { - struct hlist_head *stripe_hashtbl; - /* only protect corresponding hash list and inactive_list */ -@@ -689,9 +697,13 @@ struct r5conf { - int worker_cnt_per_group; - struct r5l_log *log; - -- struct bio_list pending_bios; - spinlock_t pending_bios_lock; - bool batch_bio_dispatch; -+ struct r5pending_data *pending_data; -+ struct list_head free_list; -+ struct list_head pending_list; -+ int pending_data_cnt; -+ struct r5pending_data *next_pending_data; - }; - - diff --git a/patches.drivers/0045-raid5-only-dispatch-IO-from-raid5d-for-harddisk-raid.patch b/patches.drivers/0105-raid5-only-dispatch-IO-from-raid5d-for-harddisk-raid.patch index 6cdac3b87b..aee066c638 100644 --- a/patches.drivers/0045-raid5-only-dispatch-IO-from-raid5d-for-harddisk-raid.patch +++ b/patches.drivers/0105-raid5-only-dispatch-IO-from-raid5d-for-harddisk-raid.patch @@ -1,3 +1,4 @@ +From 765d704db1f583630d52dc14c1ea573db6783459 Mon Sep 17 00:00:00 2001 From: Shaohua Li <shli@fb.com> Date: Wed, 4 Jan 2017 09:33:23 -0800 Subject: [PATCH] raid5: only dispatch IO from raid5d for harddisk raid @@ -38,16 +39,15 @@ Cc: NeilBrown <neilb@suse.com> Cc: Song Liu <songliubraving@fb.com> Signed-off-by: Shaohua Li <shli@fb.com> Signed-off-by: Coly Li <colyli@suse.de> + --- - drivers/md/raid5.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- - drivers/md/raid5.h | 4 ++++ - 2 files changed, 57 insertions(+), 2 deletions(-) + drivers/md/raid5.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++-- + drivers/md/raid5.h | 5 ++++ + 2 files changed, 58 insertions(+), 2 deletions(-) -diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c -index 3c7e106..9d744a8 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c -@@ -863,6 +863,43 @@ static int use_new_offset(struct r5conf *conf, struct stripe_head *sh) +@@ -876,6 +876,43 @@ static int use_new_offset(struct r5conf return 1; } @@ -91,7 +91,7 @@ index 3c7e106..9d744a8 100644 static void raid5_end_read_request(struct bio *bi); static void -@@ -1043,7 +1080,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) +@@ -1056,7 +1093,7 @@ again: trace_block_bio_remap(bdev_get_queue(bi->bi_bdev), bi, disk_devt(conf->mddev->gendisk), sh->dev[i].sector); @@ -100,7 +100,7 @@ index 3c7e106..9d744a8 100644 } if (rrdev) { if (s->syncing || s->expanding || s->expanded -@@ -1088,7 +1125,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) +@@ -1101,7 +1138,7 @@ again: trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev), rbi, disk_devt(conf->mddev->gendisk), sh->dev[i].sector); @@ -109,7 +109,7 @@ index 3c7e106..9d744a8 100644 } if (!rdev && !rrdev) { if (op_is_write(op)) -@@ -6126,6 +6163,8 @@ static void raid5d(struct md_thread *thread) +@@ -6180,6 +6217,8 @@ static void raid5d(struct md_thread *thr mutex_unlock(&conf->cache_size_mutex); } @@ -118,7 +118,7 @@ index 3c7e106..9d744a8 100644 r5l_flush_stripe_to_raid(conf->log); async_tx_issue_pending_all(); -@@ -6711,6 +6750,18 @@ static struct r5conf *setup_conf(struct mddev *mddev) +@@ -6803,6 +6842,18 @@ static struct r5conf *setup_conf(struct atomic_set(&conf->active_stripes, 0); atomic_set(&conf->preread_active_stripes, 0); atomic_set(&conf->active_aligned_reads, 0); @@ -137,21 +137,17 @@ index 3c7e106..9d744a8 100644 conf->bypass_threshold = BYPASS_THRESHOLD; conf->recovery_disabled = mddev->recovery_disabled - 1; -diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h -index 1440fa2..ebb89bd 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h -@@ -684,6 +684,10 @@ struct r5conf { +@@ -699,6 +699,11 @@ struct r5conf { int group_cnt; int worker_cnt_per_group; struct r5l_log *log; + + struct bio_list pending_bios; -+ spinlock_t pending_bios_lock; -+ bool batch_bio_dispatch; ++ spinlock_t pending_bios_lock; ++ bool batch_bio_dispatch; ++ + void *log_private; }; - --- -2.10.2 - diff --git a/patches.drivers/0108-md-raid1-use-bio_clone_bioset_partial-in-case-of-wri.patch b/patches.drivers/0108-md-raid1-use-bio_clone_bioset_partial-in-case-of-wri.patch deleted file mode 100644 index 85f2d50715..0000000000 --- a/patches.drivers/0108-md-raid1-use-bio_clone_bioset_partial-in-case-of-wri.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 8e58e327e25c7fffc3fb79a24c76637bdda37716 Mon Sep 17 00:00:00 2001 -From: Ming Lei <tom.leiming@gmail.com> -Date: Tue, 14 Feb 2017 23:29:01 +0800 -Subject: [PATCH] md/raid1: use bio_clone_bioset_partial() in case of write - behind -Git-commit: 8e58e327e25c7fffc3fb79a24c76637bdda37716 -Patch-mainline: v4.11-rc1 -References: FATE#321488 - -Write behind need to replace pages in bio's bvecs, and we have -to clone a fresh bio with new bvec table, so use the introduced -bio_clone_bioset_partial() for it. - -For other bio_clone_mddev() cases, we will use fast clone since -they don't need to touch bvec table. - -Reviewed-by: Christoph Hellwig <hch@lst.de> -Signed-off-by: Ming Lei <tom.leiming@gmail.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid1.c | 20 +++++++++++++++----- - 1 file changed, 15 insertions(+), 5 deletions(-) - -diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c -index 7b0f647..691d6d9 100644 ---- a/drivers/md/raid1.c -+++ b/drivers/md/raid1.c -@@ -1345,13 +1345,12 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, - - first_clone = 1; - for (i = 0; i < disks; i++) { -- struct bio *mbio; -+ struct bio *mbio = NULL; -+ sector_t offset; - if (!r1_bio->bios[i]) - continue; - -- mbio = bio_clone_mddev(bio, GFP_NOIO, mddev); -- bio_trim(mbio, r1_bio->sector - bio->bi_iter.bi_sector, -- max_sectors); -+ offset = r1_bio->sector - bio->bi_iter.bi_sector; - - if (first_clone) { - /* do behind I/O ? -@@ -1361,8 +1360,13 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, - if (bitmap && - (atomic_read(&bitmap->behind_writes) - < mddev->bitmap_info.max_write_behind) && -- !waitqueue_active(&bitmap->behind_wait)) -+ !waitqueue_active(&bitmap->behind_wait)) { -+ mbio = bio_clone_bioset_partial(bio, GFP_NOIO, -+ mddev->bio_set, -+ offset, -+ max_sectors); - alloc_behind_pages(mbio, r1_bio); -+ } - - bitmap_startwrite(bitmap, r1_bio->sector, - r1_bio->sectors, -@@ -1370,6 +1374,12 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, - &r1_bio->state)); - first_clone = 0; - } -+ -+ if (!mbio) { -+ mbio = bio_clone_mddev(bio, GFP_NOIO, mddev); -+ bio_trim(mbio, offset, max_sectors); -+ } -+ - if (r1_bio->behind_bvecs) { - struct bio_vec *bvec; - int j; --- -2.10.2 - diff --git a/patches.drivers/0109-md-remove-unnecessary-check-on-mddev.patch b/patches.drivers/0109-md-remove-unnecessary-check-on-mddev.patch deleted file mode 100644 index 5df262a34b..0000000000 --- a/patches.drivers/0109-md-remove-unnecessary-check-on-mddev.patch +++ /dev/null @@ -1,37 +0,0 @@ -From ed7ef732ca9f7d6f42be8df6cc7bf4ace3534af3 Mon Sep 17 00:00:00 2001 -From: Ming Lei <tom.leiming@gmail.com> -Date: Tue, 14 Feb 2017 23:29:02 +0800 -Subject: [PATCH] md: remove unnecessary check on mddev -Git-commit: ed7ef732ca9f7d6f42be8df6cc7bf4ace3534af3 -Patch-mainline: v4.11-rc1 -References: FATE#321488 - -mddev is never NULL and neither is ->bio_set, so -remove the check. - -Reviewed-by: Christoph Hellwig <hch@lst.de> -Signed-off-by: Ming Lei <tom.leiming@gmail.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/md.c | 3 --- - 1 file changed, 3 deletions(-) - -diff --git a/drivers/md/md.c b/drivers/md/md.c -index 137a1fe..0e408bc 100644 ---- a/drivers/md/md.c -+++ b/drivers/md/md.c -@@ -193,9 +193,6 @@ EXPORT_SYMBOL_GPL(bio_alloc_mddev); - struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask, - struct mddev *mddev) - { -- if (!mddev || !mddev->bio_set) -- return bio_clone(bio, gfp_mask); -- - return bio_clone_bioset(bio, gfp_mask, mddev->bio_set); - } - EXPORT_SYMBOL_GPL(bio_clone_mddev); --- -2.10.2 - diff --git a/patches.drivers/0110-md-fast-clone-bio-in-bio_clone_mddev.patch b/patches.drivers/0110-md-fast-clone-bio-in-bio_clone_mddev.patch deleted file mode 100644 index d9901f0812..0000000000 --- a/patches.drivers/0110-md-fast-clone-bio-in-bio_clone_mddev.patch +++ /dev/null @@ -1,175 +0,0 @@ -From d7a1030839d35c04a620e841f406b9b2a8600041 Mon Sep 17 00:00:00 2001 -From: Ming Lei <tom.leiming@gmail.com> -Date: Tue, 14 Feb 2017 23:29:03 +0800 -Subject: [PATCH] md: fast clone bio in bio_clone_mddev() -Git-commit: d7a1030839d35c04a620e841f406b9b2a8600041 -Patch-mainline: v4.11-rc1 -References: FATE#321488 - -Firstly bio_clone_mddev() is used in raid normal I/O and isn't -in resync I/O path. - -Secondly all the direct access to bvec table in raid happens on -resync I/O except for write behind of raid1, in which we still -use bio_clone() for allocating new bvec table. - -So this patch replaces bio_clone() with bio_clone_fast() -in bio_clone_mddev(). - -Also kill bio_clone_mddev() and call bio_clone_fast() directly, as -suggested by Christoph Hellwig. - -Reviewed-by: Christoph Hellwig <hch@lst.de> -Signed-off-by: Ming Lei <tom.leiming@gmail.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/faulty.c | 2 +- - drivers/md/md.c | 7 ------- - drivers/md/md.h | 2 -- - drivers/md/raid1.c | 10 ++++++---- - drivers/md/raid10.c | 11 +++++------ - drivers/md/raid5.c | 4 ++-- - 6 files changed, 14 insertions(+), 22 deletions(-) - ---- a/drivers/md/faulty.c -+++ b/drivers/md/faulty.c -@@ -214,7 +214,7 @@ static void faulty_make_request(struct m - } - } - if (failit) { -- struct bio *b = bio_clone_mddev(bio, GFP_NOIO, mddev); -+ struct bio *b = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); - - b->bi_bdev = conf->rdev->bdev; - b->bi_private = bio; ---- a/drivers/md/md.c -+++ b/drivers/md/md.c -@@ -190,13 +190,6 @@ struct bio *bio_alloc_mddev(gfp_t gfp_ma - } - EXPORT_SYMBOL_GPL(bio_alloc_mddev); - --struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask, -- struct mddev *mddev) --{ -- return bio_clone_bioset(bio, gfp_mask, mddev->bio_set); --} --EXPORT_SYMBOL_GPL(bio_clone_mddev); -- - /* - * We have a system wide 'event count' that is incremented - * on any 'interesting' event, and readers of /proc/mdstat ---- a/drivers/md/md.h -+++ b/drivers/md/md.h -@@ -685,8 +685,6 @@ extern void md_rdev_clear(struct md_rdev - - extern void mddev_suspend(struct mddev *mddev); - extern void mddev_resume(struct mddev *mddev); --extern struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask, -- struct mddev *mddev); - extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, - struct mddev *mddev); - ---- a/drivers/md/raid1.c -+++ b/drivers/md/raid1.c -@@ -1224,7 +1224,7 @@ read_again: - } - r1_bio->read_disk = rdisk; - -- read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev); -+ read_bio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); - bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector, - max_sectors); - -@@ -1482,7 +1482,7 @@ static void raid1_write_request(struct m - } - - if (!mbio) { -- mbio = bio_clone_mddev(bio, GFP_NOIO, mddev); -+ mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); - bio_trim(mbio, offset, max_sectors); - } - -@@ -2373,7 +2373,8 @@ static int narrow_write_error(struct r1b - - wbio->bi_vcnt = vcnt; - } else { -- wbio = bio_clone_mddev(r1_bio->master_bio, GFP_NOIO, mddev); -+ wbio = bio_clone_fast(r1_bio->master_bio, GFP_NOIO, -+ mddev->bio_set); - } - - bio_set_op_attrs(wbio, REQ_OP_WRITE, 0); -@@ -2518,7 +2519,8 @@ read_more: - const unsigned long do_sync - = r1_bio->master_bio->bi_opf & REQ_SYNC; - r1_bio->read_disk = disk; -- bio = bio_clone_mddev(r1_bio->master_bio, GFP_NOIO, mddev); -+ bio = bio_clone_fast(r1_bio->master_bio, GFP_NOIO, -+ mddev->bio_set); - bio_trim(bio, r1_bio->sector - bio->bi_iter.bi_sector, - max_sectors); - r1_bio->bios[r1_bio->read_disk] = bio; ---- a/drivers/md/raid10.c -+++ b/drivers/md/raid10.c -@@ -1133,7 +1133,7 @@ read_again: - } - slot = r10_bio->read_slot; - -- read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev); -+ read_bio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); - bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector, - max_sectors); - -@@ -1407,7 +1407,7 @@ retry_write: - int d = r10_bio->devs[i].devnum; - if (r10_bio->devs[i].bio) { - struct md_rdev *rdev = conf->mirrors[d].rdev; -- mbio = bio_clone_mddev(bio, GFP_NOIO, mddev); -+ mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); - bio_trim(mbio, r10_bio->sector - bio->bi_iter.bi_sector, - max_sectors); - r10_bio->devs[i].bio = mbio; -@@ -1458,7 +1458,7 @@ retry_write: - smp_mb(); - rdev = conf->mirrors[d].rdev; - } -- mbio = bio_clone_mddev(bio, GFP_NOIO, mddev); -+ mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); - bio_trim(mbio, r10_bio->sector - bio->bi_iter.bi_sector, - max_sectors); - r10_bio->devs[i].repl_bio = mbio; -@@ -2588,7 +2588,7 @@ static int narrow_write_error(struct r10 - if (sectors > sect_to_write) - sectors = sect_to_write; - /* Write at 'sector' for 'sectors' */ -- wbio = bio_clone_mddev(bio, GFP_NOIO, mddev); -+ wbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); - bio_trim(wbio, sector - bio->bi_iter.bi_sector, sectors); - wsector = r10_bio->devs[i].addr + (sector - r10_bio->sector); - wbio->bi_iter.bi_sector = wsector + -@@ -2664,8 +2664,7 @@ read_more: - mdname(mddev), - bdevname(rdev->bdev, b), - (unsigned long long)r10_bio->sector); -- bio = bio_clone_mddev(r10_bio->master_bio, -- GFP_NOIO, mddev); -+ bio = bio_clone_fast(r10_bio->master_bio, GFP_NOIO, mddev->bio_set); - bio_trim(bio, r10_bio->sector - bio->bi_iter.bi_sector, max_sectors); - r10_bio->devs[slot].bio = bio; - r10_bio->devs[slot].rdev = rdev; ---- a/drivers/md/raid5.c -+++ b/drivers/md/raid5.c -@@ -5110,9 +5110,9 @@ static int raid5_read_one_chunk(struct m - return 0; - } - /* -- * use bio_clone_mddev to make a copy of the bio -+ * use bio_clone_fast to make a copy of the bio - */ -- align_bi = bio_clone_mddev(raid_bio, GFP_NOIO, mddev); -+ align_bi = bio_clone_fast(raid_bio, GFP_NOIO, mddev->bio_set); - if (!align_bi) - return 0; - /* diff --git a/patches.drivers/0111-dm-block-manager-add-unlikely-annotations-on-dm_bufi.patch b/patches.drivers/0111-dm-block-manager-add-unlikely-annotations-on-dm_bufi.patch deleted file mode 100644 index afdf218218..0000000000 --- a/patches.drivers/0111-dm-block-manager-add-unlikely-annotations-on-dm_bufi.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 602548bdd5ac4ed7025d992e3ad61a628af4c500 Mon Sep 17 00:00:00 2001 -From: Joe Thornber <ejt@redhat.com> -Date: Thu, 19 Nov 2015 12:55:58 +0000 -Subject: [PATCH] dm block manager: add unlikely() annotations on dm_bufio - error paths -Git-commit: 602548bdd5ac4ed7025d992e3ad61a628af4c500 -Patch-mainline: v4.11-rc1 -References: FATE#321488 - -Signed-off-by: Joe Thornber <ejt@redhat.com> -Signed-off-by: Mike Snitzer <snitzer@redhat.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/persistent-data/dm-block-manager.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c -index a6dde7c..8212f14 100644 ---- a/drivers/md/persistent-data/dm-block-manager.c -+++ b/drivers/md/persistent-data/dm-block-manager.c -@@ -462,7 +462,7 @@ int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b, - int r; - - p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); -- if (IS_ERR(p)) -+ if (unlikely(IS_ERR(p))) - return PTR_ERR(p); - - aux = dm_bufio_get_aux_data(to_buffer(*result)); -@@ -498,7 +498,7 @@ int dm_bm_write_lock(struct dm_block_manager *bm, - return -EPERM; - - p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); -- if (IS_ERR(p)) -+ if (unlikely(IS_ERR(p))) - return PTR_ERR(p); - - aux = dm_bufio_get_aux_data(to_buffer(*result)); -@@ -531,7 +531,7 @@ int dm_bm_read_try_lock(struct dm_block_manager *bm, - int r; - - p = dm_bufio_get(bm->bufio, b, (struct dm_buffer **) result); -- if (IS_ERR(p)) -+ if (unlikely(IS_ERR(p))) - return PTR_ERR(p); - if (unlikely(!p)) - return -EWOULDBLOCK; -@@ -567,7 +567,7 @@ int dm_bm_write_lock_zero(struct dm_block_manager *bm, - return -EPERM; - - p = dm_bufio_new(bm->bufio, b, (struct dm_buffer **) result); -- if (IS_ERR(p)) -+ if (unlikely(IS_ERR(p))) - return PTR_ERR(p); - - memset(p, 0, dm_bm_block_size(bm)); --- -2.10.2 - diff --git a/patches.drivers/0112-dm-space-map-common-memcpy-the-disk-root-to-ensure-i.patch b/patches.drivers/0112-dm-space-map-common-memcpy-the-disk-root-to-ensure-i.patch deleted file mode 100644 index 4a41ba8fe9..0000000000 --- a/patches.drivers/0112-dm-space-map-common-memcpy-the-disk-root-to-ensure-i.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 3ba3ba1e8411532dc4a05b4e8932c9e358d70a44 Mon Sep 17 00:00:00 2001 -From: Joe Thornber <ejt@redhat.com> -Date: Thu, 19 Nov 2015 13:03:36 +0000 -Subject: [PATCH] dm space map common: memcpy the disk root to ensure it's arch - aligned -Git-commit: 3ba3ba1e8411532dc4a05b4e8932c9e358d70a44 -Patch-mainline: v4.11-rc1 -References: FATE#321488 - -The metadata_space_map_root passed to sm_ll_open_metadata() may or may -not be arch aligned, use memcpy to ensure it is. This is not a fast -path so the extra memcpy doesn't hurt us. - -Long-term it'd be better to use the kernel's alignment infrastructure to -remove the memcpy()s that are littered across persistent-data (btree, -array, space-maps, etc). - -Signed-off-by: Joe Thornber <ejt@redhat.com> -Signed-off-by: Mike Snitzer <snitzer@redhat.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/persistent-data/dm-space-map-common.c | 16 +++++++++++----- - 1 file changed, 11 insertions(+), 5 deletions(-) - -diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c -index 4c28608..829b4ce 100644 ---- a/drivers/md/persistent-data/dm-space-map-common.c -+++ b/drivers/md/persistent-data/dm-space-map-common.c -@@ -626,13 +626,19 @@ int sm_ll_open_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm, - void *root_le, size_t len) - { - int r; -- struct disk_sm_root *smr = root_le; -+ struct disk_sm_root smr; - - if (len < sizeof(struct disk_sm_root)) { - DMERR("sm_metadata root too small"); - return -ENOMEM; - } - -+ /* -+ * We don't know the alignment of the root_le buffer, so need to -+ * copy into a new structure. -+ */ -+ memcpy(&smr, root_le, sizeof(smr)); -+ - r = sm_ll_init(ll, tm); - if (r < 0) - return r; -@@ -644,10 +650,10 @@ int sm_ll_open_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm, - ll->max_entries = metadata_ll_max_entries; - ll->commit = metadata_ll_commit; - -- ll->nr_blocks = le64_to_cpu(smr->nr_blocks); -- ll->nr_allocated = le64_to_cpu(smr->nr_allocated); -- ll->bitmap_root = le64_to_cpu(smr->bitmap_root); -- ll->ref_count_root = le64_to_cpu(smr->ref_count_root); -+ ll->nr_blocks = le64_to_cpu(smr.nr_blocks); -+ ll->nr_allocated = le64_to_cpu(smr.nr_allocated); -+ ll->bitmap_root = le64_to_cpu(smr.bitmap_root); -+ ll->ref_count_root = le64_to_cpu(smr.ref_count_root); - - return ll->open_index(ll); - } --- -2.10.2 - diff --git a/patches.drivers/0113-dm-btree-use-GFP_NOFS-in-dm_btree_del.patch b/patches.drivers/0113-dm-btree-use-GFP_NOFS-in-dm_btree_del.patch deleted file mode 100644 index ac78dc8b02..0000000000 --- a/patches.drivers/0113-dm-btree-use-GFP_NOFS-in-dm_btree_del.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 9f9ef0657d53d988dc07b096052b3dd07d6e3c46 Mon Sep 17 00:00:00 2001 -From: Joe Thornber <ejt@redhat.com> -Date: Thu, 19 Nov 2015 13:36:45 +0000 -Subject: [PATCH] dm btree: use GFP_NOFS in dm_btree_del() -Git-commit: 9f9ef0657d53d988dc07b096052b3dd07d6e3c46 -Patch-mainline: v4.11-rc1 -References: FATE#321488 - -dm_btree_del() is called from an ioctl so don't recurse into FS. - -Signed-off-by: Joe Thornber <ejt@redhat.com> -Signed-off-by: Mike Snitzer <snitzer@redhat.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/persistent-data/dm-btree.c | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c -index 20a4032..1744f36 100644 ---- a/drivers/md/persistent-data/dm-btree.c -+++ b/drivers/md/persistent-data/dm-btree.c -@@ -272,7 +272,12 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root) - int r; - struct del_stack *s; - -- s = kmalloc(sizeof(*s), GFP_NOIO); -+ /* -+ * dm_btree_del() is called via an ioctl, as such should be -+ * considered an FS op. We can't recurse back into the FS, so we -+ * allocate GFP_NOFS. -+ */ -+ s = kmalloc(sizeof(*s), GFP_NOFS); - if (!s) - return -ENOMEM; - s->info = info; --- -2.10.2 - diff --git a/patches.drivers/0114-dm-bitset-introduce-cursor-api.patch b/patches.drivers/0114-dm-bitset-introduce-cursor-api.patch deleted file mode 100644 index 2404b578b8..0000000000 --- a/patches.drivers/0114-dm-bitset-introduce-cursor-api.patch +++ /dev/null @@ -1,131 +0,0 @@ -From 6fe28dbf05e329ce136f38219d95a1826ceafebd Mon Sep 17 00:00:00 2001 -From: Joe Thornber <ejt@redhat.com> -Date: Mon, 3 Oct 2016 14:15:02 -0400 -Subject: [PATCH] dm bitset: introduce cursor api -Git-commit: 6fe28dbf05e329ce136f38219d95a1826ceafebd -Patch-mainline: v4.11-rc1 -References: FATE#321488 - -Signed-off-by: Joe Thornber <ejt@redhat.com> -Signed-off-by: Mike Snitzer <snitzer@redhat.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/persistent-data/dm-bitset.c | 69 ++++++++++++++++++++++++++++++++++ - drivers/md/persistent-data/dm-bitset.h | 22 +++++++++++ - 2 files changed, 91 insertions(+) - -diff --git a/drivers/md/persistent-data/dm-bitset.c b/drivers/md/persistent-data/dm-bitset.c -index 36f7cc2..7cb2851 100644 ---- a/drivers/md/persistent-data/dm-bitset.c -+++ b/drivers/md/persistent-data/dm-bitset.c -@@ -168,4 +168,73 @@ int dm_bitset_test_bit(struct dm_disk_bitset *info, dm_block_t root, - } - EXPORT_SYMBOL_GPL(dm_bitset_test_bit); - -+static int cursor_next_array_entry(struct dm_bitset_cursor *c) -+{ -+ int r; -+ __le64 *value; -+ -+ r = dm_array_cursor_next(&c->cursor); -+ if (r) -+ return r; -+ -+ dm_array_cursor_get_value(&c->cursor, (void **) &value); -+ c->array_index++; -+ c->bit_index = 0; -+ c->current_bits = le64_to_cpu(*value); -+ return 0; -+} -+ -+int dm_bitset_cursor_begin(struct dm_disk_bitset *info, -+ dm_block_t root, uint32_t nr_entries, -+ struct dm_bitset_cursor *c) -+{ -+ int r; -+ __le64 *value; -+ -+ if (!nr_entries) -+ return -ENODATA; -+ -+ c->info = info; -+ c->entries_remaining = nr_entries; -+ -+ r = dm_array_cursor_begin(&info->array_info, root, &c->cursor); -+ if (r) -+ return r; -+ -+ dm_array_cursor_get_value(&c->cursor, (void **) &value); -+ c->array_index = 0; -+ c->bit_index = 0; -+ c->current_bits = le64_to_cpu(*value); -+ -+ return r; -+} -+EXPORT_SYMBOL_GPL(dm_bitset_cursor_begin); -+ -+void dm_bitset_cursor_end(struct dm_bitset_cursor *c) -+{ -+ return dm_array_cursor_end(&c->cursor); -+} -+EXPORT_SYMBOL_GPL(dm_bitset_cursor_end); -+ -+int dm_bitset_cursor_next(struct dm_bitset_cursor *c) -+{ -+ int r = 0; -+ -+ if (!c->entries_remaining) -+ return -ENODATA; -+ -+ c->entries_remaining--; -+ if (++c->bit_index > 63) -+ r = cursor_next_array_entry(c); -+ -+ return r; -+} -+EXPORT_SYMBOL_GPL(dm_bitset_cursor_next); -+ -+bool dm_bitset_cursor_get_value(struct dm_bitset_cursor *c) -+{ -+ return test_bit(c->bit_index, (unsigned long *) &c->current_bits); -+} -+EXPORT_SYMBOL_GPL(dm_bitset_cursor_get_value); -+ - /*----------------------------------------------------------------*/ -diff --git a/drivers/md/persistent-data/dm-bitset.h b/drivers/md/persistent-data/dm-bitset.h -index c2287d6..017c0d4 100644 ---- a/drivers/md/persistent-data/dm-bitset.h -+++ b/drivers/md/persistent-data/dm-bitset.h -@@ -161,6 +161,28 @@ int dm_bitset_test_bit(struct dm_disk_bitset *info, dm_block_t root, - int dm_bitset_flush(struct dm_disk_bitset *info, dm_block_t root, - dm_block_t *new_root); - -+struct dm_bitset_cursor { -+ struct dm_disk_bitset *info; -+ struct dm_array_cursor cursor; -+ -+ uint32_t entries_remaining; -+ uint32_t array_index; -+ uint32_t bit_index; -+ uint64_t current_bits; -+}; -+ -+/* -+ * Make sure you've flush any dm_disk_bitset and updated the root before -+ * using this. -+ */ -+int dm_bitset_cursor_begin(struct dm_disk_bitset *info, -+ dm_block_t root, uint32_t nr_entries, -+ struct dm_bitset_cursor *c); -+void dm_bitset_cursor_end(struct dm_bitset_cursor *c); -+ -+int dm_bitset_cursor_next(struct dm_bitset_cursor *c); -+bool dm_bitset_cursor_get_value(struct dm_bitset_cursor *c); -+ - /*----------------------------------------------------------------*/ - - #endif /* _LINUX_DM_BITSET_H */ --- -2.10.2 - diff --git a/patches.drivers/0115-dm-cache-metadata-use-bitset-cursor-api-to-load-disc.patch b/patches.drivers/0115-dm-cache-metadata-use-bitset-cursor-api-to-load-disc.patch deleted file mode 100644 index e0515c54ff..0000000000 --- a/patches.drivers/0115-dm-cache-metadata-use-bitset-cursor-api-to-load-disc.patch +++ /dev/null @@ -1,90 +0,0 @@ -From ae4a46a1f60942263d6fd119fe1da49bb16d2bd5 Mon Sep 17 00:00:00 2001 -From: Joe Thornber <ejt@redhat.com> -Date: Mon, 3 Oct 2016 14:16:20 -0400 -Subject: [PATCH] dm cache metadata: use bitset cursor api to load discard - bitset -Git-commit: ae4a46a1f60942263d6fd119fe1da49bb16d2bd5 -Patch-mainline: v4.11-rc1 -References: FATE#321488 - -Signed-off-by: Joe Thornber <ejt@redhat.com> -Signed-off-by: Mike Snitzer <snitzer@redhat.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/dm-cache-metadata.c | 48 ++++++++++++++++++++++++------------------ - 1 file changed, 28 insertions(+), 20 deletions(-) - -diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c -index 624fe43..9364a02 100644 ---- a/drivers/md/dm-cache-metadata.c -+++ b/drivers/md/dm-cache-metadata.c -@@ -995,14 +995,6 @@ static int __clear_discard(struct dm_cache_metadata *cmd, dm_dblock_t b) - from_dblock(b), &cmd->discard_root); - } - --static int __is_discarded(struct dm_cache_metadata *cmd, dm_dblock_t b, -- bool *is_discarded) --{ -- return dm_bitset_test_bit(&cmd->discard_info, cmd->discard_root, -- from_dblock(b), &cmd->discard_root, -- is_discarded); --} -- - static int __discard(struct dm_cache_metadata *cmd, - dm_dblock_t dblock, bool discard) - { -@@ -1032,22 +1024,38 @@ static int __load_discards(struct dm_cache_metadata *cmd, - load_discard_fn fn, void *context) - { - int r = 0; -- dm_block_t b; -- bool discard; -+ uint32_t b; -+ struct dm_bitset_cursor c; - -- for (b = 0; b < from_dblock(cmd->discard_nr_blocks); b++) { -- dm_dblock_t dblock = to_dblock(b); -+ if (from_dblock(cmd->discard_nr_blocks) == 0) -+ /* nothing to do */ -+ return 0; - -- if (cmd->clean_when_opened) { -- r = __is_discarded(cmd, dblock, &discard); -- if (r) -- return r; -- } else -- discard = false; -+ if (cmd->clean_when_opened) { -+ r = dm_bitset_flush(&cmd->discard_info, cmd->discard_root, &cmd->discard_root); -+ if (r) -+ return r; - -- r = fn(context, cmd->discard_block_size, dblock, discard); -+ r = dm_bitset_cursor_begin(&cmd->discard_info, cmd->discard_root, -+ from_dblock(cmd->discard_nr_blocks), &c); - if (r) -- break; -+ return r; -+ -+ for (b = 0; b < from_dblock(cmd->discard_nr_blocks); b++) { -+ r = fn(context, cmd->discard_block_size, to_dblock(b), -+ dm_bitset_cursor_get_value(&c)); -+ if (r) -+ break; -+ } -+ -+ dm_bitset_cursor_end(&c); -+ -+ } else { -+ for (b = 0; b < from_dblock(cmd->discard_nr_blocks); b++) { -+ r = fn(context, cmd->discard_block_size, to_dblock(b), false); -+ if (r) -+ return r; -+ } - } - - return r; --- -2.10.2 - diff --git a/patches.drivers/0116-dm-cache-metadata-add-metadata2-feature.patch b/patches.drivers/0116-dm-cache-metadata-add-metadata2-feature.patch deleted file mode 100644 index 72afbe97aa..0000000000 --- a/patches.drivers/0116-dm-cache-metadata-add-metadata2-feature.patch +++ /dev/null @@ -1,709 +0,0 @@ -From 629d0a8a1a104187db8fbf966e4cc5cfb6aa9a3c Mon Sep 17 00:00:00 2001 -From: Joe Thornber <ejt@redhat.com> -Date: Thu, 22 Sep 2016 06:15:21 -0400 -Subject: [PATCH] dm cache metadata: add "metadata2" feature -Git-commit: 629d0a8a1a104187db8fbf966e4cc5cfb6aa9a3c -Patch-mainline: v4.11-rc1 -References: FATE#321488 - -If "metadata2" is provided as a table argument when creating/loading a -cache target a more compact metadata format, with separate dirty bits, -is used. "metadata2" improves speed of shutting down a cache target. - -Signed-off-by: Joe Thornber <ejt@redhat.com> -Signed-off-by: Mike Snitzer <snitzer@redhat.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - Documentation/device-mapper/cache.txt | 4 + - drivers/md/dm-cache-metadata.c | 278 ++++++++++++++++++++++++++++++---- - drivers/md/dm-cache-metadata.h | 11 +- - drivers/md/dm-cache-target.c | 38 +++-- - 4 files changed, 278 insertions(+), 53 deletions(-) - -diff --git a/Documentation/device-mapper/cache.txt b/Documentation/device-mapper/cache.txt -index 785eab8..f228604 100644 ---- a/Documentation/device-mapper/cache.txt -+++ b/Documentation/device-mapper/cache.txt -@@ -207,6 +207,10 @@ Optional feature arguments are: - block, then the cache block is invalidated. - To enable passthrough mode the cache must be clean. - -+ metadata2 : use version 2 of the metadata. This stores the dirty bits -+ in a separate btree, which improves speed of shutting -+ down the cache. -+ - A policy called 'default' is always registered. This is an alias for - the policy we currently think is giving best all round performance. - -diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c -index 9364a02..0610be7 100644 ---- a/drivers/md/dm-cache-metadata.c -+++ b/drivers/md/dm-cache-metadata.c -@@ -25,7 +25,7 @@ - * defines a range of metadata versions that this module can handle. - */ - #define MIN_CACHE_VERSION 1 --#define MAX_CACHE_VERSION 1 -+#define MAX_CACHE_VERSION 2 - - #define CACHE_METADATA_CACHE_SIZE 64 - -@@ -55,6 +55,7 @@ enum mapping_bits { - - /* - * The data on the cache is different from that on the origin. -+ * This flag is only used by metadata format 1. - */ - M_DIRTY = 2 - }; -@@ -93,12 +94,18 @@ struct cache_disk_superblock { - __le32 write_misses; - - __le32 policy_version[CACHE_POLICY_VERSION_SIZE]; -+ -+ /* -+ * Metadata format 2 fields. -+ */ -+ __le64 dirty_root; - } __packed; - - struct dm_cache_metadata { - atomic_t ref_count; - struct list_head list; - -+ unsigned version; - struct block_device *bdev; - struct dm_block_manager *bm; - struct dm_space_map *metadata_sm; -@@ -142,11 +149,18 @@ struct dm_cache_metadata { - bool fail_io:1; - - /* -+ * Metadata format 2 fields. -+ */ -+ dm_block_t dirty_root; -+ struct dm_disk_bitset dirty_info; -+ -+ /* - * These structures are used when loading metadata. They're too - * big to put on the stack. - */ - struct dm_array_cursor mapping_cursor; - struct dm_array_cursor hint_cursor; -+ struct dm_bitset_cursor dirty_cursor; - }; - - /*------------------------------------------------------------------- -@@ -170,6 +184,7 @@ static void sb_prepare_for_write(struct dm_block_validator *v, - static int check_metadata_version(struct cache_disk_superblock *disk_super) - { - uint32_t metadata_version = le32_to_cpu(disk_super->version); -+ - if (metadata_version < MIN_CACHE_VERSION || metadata_version > MAX_CACHE_VERSION) { - DMERR("Cache metadata version %u found, but only versions between %u and %u supported.", - metadata_version, MIN_CACHE_VERSION, MAX_CACHE_VERSION); -@@ -310,6 +325,11 @@ static void __copy_sm_root(struct dm_cache_metadata *cmd, - sizeof(cmd->metadata_space_map_root)); - } - -+static bool separate_dirty_bits(struct dm_cache_metadata *cmd) -+{ -+ return cmd->version >= 2; -+} -+ - static int __write_initial_superblock(struct dm_cache_metadata *cmd) - { - int r; -@@ -341,7 +361,7 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd) - disk_super->flags = 0; - memset(disk_super->uuid, 0, sizeof(disk_super->uuid)); - disk_super->magic = cpu_to_le64(CACHE_SUPERBLOCK_MAGIC); -- disk_super->version = cpu_to_le32(MAX_CACHE_VERSION); -+ disk_super->version = cpu_to_le32(cmd->version); - memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name)); - memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version)); - disk_super->policy_hint_size = 0; -@@ -362,6 +382,9 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd) - disk_super->write_hits = cpu_to_le32(0); - disk_super->write_misses = cpu_to_le32(0); - -+ if (separate_dirty_bits(cmd)) -+ disk_super->dirty_root = cpu_to_le64(cmd->dirty_root); -+ - return dm_tm_commit(cmd->tm, sblock); - } - -@@ -382,6 +405,13 @@ static int __format_metadata(struct dm_cache_metadata *cmd) - if (r < 0) - goto bad; - -+ if (separate_dirty_bits(cmd)) { -+ dm_disk_bitset_init(cmd->tm, &cmd->dirty_info); -+ r = dm_bitset_empty(&cmd->dirty_info, &cmd->dirty_root); -+ if (r < 0) -+ goto bad; -+ } -+ - dm_disk_bitset_init(cmd->tm, &cmd->discard_info); - r = dm_bitset_empty(&cmd->discard_info, &cmd->discard_root); - if (r < 0) -@@ -407,9 +437,10 @@ static int __format_metadata(struct dm_cache_metadata *cmd) - static int __check_incompat_features(struct cache_disk_superblock *disk_super, - struct dm_cache_metadata *cmd) - { -- uint32_t features; -+ uint32_t incompat_flags, features; - -- features = le32_to_cpu(disk_super->incompat_flags) & ~DM_CACHE_FEATURE_INCOMPAT_SUPP; -+ incompat_flags = le32_to_cpu(disk_super->incompat_flags); -+ features = incompat_flags & ~DM_CACHE_FEATURE_INCOMPAT_SUPP; - if (features) { - DMERR("could not access metadata due to unsupported optional features (%lx).", - (unsigned long)features); -@@ -470,6 +501,7 @@ static int __open_metadata(struct dm_cache_metadata *cmd) - } - - __setup_mapping_info(cmd); -+ dm_disk_bitset_init(cmd->tm, &cmd->dirty_info); - dm_disk_bitset_init(cmd->tm, &cmd->discard_info); - sb_flags = le32_to_cpu(disk_super->flags); - cmd->clean_when_opened = test_bit(CLEAN_SHUTDOWN, &sb_flags); -@@ -548,6 +580,7 @@ static unsigned long clear_clean_shutdown(unsigned long flags) - static void read_superblock_fields(struct dm_cache_metadata *cmd, - struct cache_disk_superblock *disk_super) - { -+ cmd->version = le32_to_cpu(disk_super->version); - cmd->flags = le32_to_cpu(disk_super->flags); - cmd->root = le64_to_cpu(disk_super->mapping_root); - cmd->hint_root = le64_to_cpu(disk_super->hint_root); -@@ -567,6 +600,9 @@ static void read_superblock_fields(struct dm_cache_metadata *cmd, - cmd->stats.write_hits = le32_to_cpu(disk_super->write_hits); - cmd->stats.write_misses = le32_to_cpu(disk_super->write_misses); - -+ if (separate_dirty_bits(cmd)) -+ cmd->dirty_root = le64_to_cpu(disk_super->dirty_root); -+ - cmd->changed = false; - } - -@@ -625,6 +661,13 @@ static int __commit_transaction(struct dm_cache_metadata *cmd, - */ - BUILD_BUG_ON(sizeof(struct cache_disk_superblock) > 512); - -+ if (separate_dirty_bits(cmd)) { -+ r = dm_bitset_flush(&cmd->dirty_info, cmd->dirty_root, -+ &cmd->dirty_root); -+ if (r) -+ return r; -+ } -+ - r = dm_bitset_flush(&cmd->discard_info, cmd->discard_root, - &cmd->discard_root); - if (r) -@@ -649,6 +692,8 @@ static int __commit_transaction(struct dm_cache_metadata *cmd, - update_flags(disk_super, mutator); - - disk_super->mapping_root = cpu_to_le64(cmd->root); -+ if (separate_dirty_bits(cmd)) -+ disk_super->dirty_root = cpu_to_le64(cmd->dirty_root); - disk_super->hint_root = cpu_to_le64(cmd->hint_root); - disk_super->discard_root = cpu_to_le64(cmd->discard_root); - disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size); -@@ -698,7 +743,8 @@ static void unpack_value(__le64 value_le, dm_oblock_t *block, unsigned *flags) - static struct dm_cache_metadata *metadata_open(struct block_device *bdev, - sector_t data_block_size, - bool may_format_device, -- size_t policy_hint_size) -+ size_t policy_hint_size, -+ unsigned metadata_version) - { - int r; - struct dm_cache_metadata *cmd; -@@ -709,6 +755,7 @@ static struct dm_cache_metadata *metadata_open(struct block_device *bdev, - return ERR_PTR(-ENOMEM); - } - -+ cmd->version = metadata_version; - atomic_set(&cmd->ref_count, 1); - init_rwsem(&cmd->root_lock); - cmd->bdev = bdev; -@@ -757,7 +804,8 @@ static struct dm_cache_metadata *lookup(struct block_device *bdev) - static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev, - sector_t data_block_size, - bool may_format_device, -- size_t policy_hint_size) -+ size_t policy_hint_size, -+ unsigned metadata_version) - { - struct dm_cache_metadata *cmd, *cmd2; - -@@ -768,7 +816,8 @@ static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev, - if (cmd) - return cmd; - -- cmd = metadata_open(bdev, data_block_size, may_format_device, policy_hint_size); -+ cmd = metadata_open(bdev, data_block_size, may_format_device, -+ policy_hint_size, metadata_version); - if (!IS_ERR(cmd)) { - mutex_lock(&table_lock); - cmd2 = lookup(bdev); -@@ -800,10 +849,11 @@ static bool same_params(struct dm_cache_metadata *cmd, sector_t data_block_size) - struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev, - sector_t data_block_size, - bool may_format_device, -- size_t policy_hint_size) -+ size_t policy_hint_size, -+ unsigned metadata_version) - { -- struct dm_cache_metadata *cmd = lookup_or_open(bdev, data_block_size, -- may_format_device, policy_hint_size); -+ struct dm_cache_metadata *cmd = lookup_or_open(bdev, data_block_size, may_format_device, -+ policy_hint_size, metadata_version); - - if (!IS_ERR(cmd) && !same_params(cmd, data_block_size)) { - dm_cache_metadata_close(cmd); -@@ -829,8 +879,8 @@ void dm_cache_metadata_close(struct dm_cache_metadata *cmd) - /* - * Checks that the given cache block is either unmapped or clean. - */ --static int block_unmapped_or_clean(struct dm_cache_metadata *cmd, dm_cblock_t b, -- bool *result) -+static int block_clean_combined_dirty(struct dm_cache_metadata *cmd, dm_cblock_t b, -+ bool *result) - { - int r; - __le64 value; -@@ -838,10 +888,8 @@ static int block_unmapped_or_clean(struct dm_cache_metadata *cmd, dm_cblock_t b, - unsigned flags; - - r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(b), &value); -- if (r) { -- DMERR("block_unmapped_or_clean failed"); -+ if (r) - return r; -- } - - unpack_value(value, &ob, &flags); - *result = !((flags & M_VALID) && (flags & M_DIRTY)); -@@ -849,17 +897,19 @@ static int block_unmapped_or_clean(struct dm_cache_metadata *cmd, dm_cblock_t b, - return 0; - } - --static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd, -- dm_cblock_t begin, dm_cblock_t end, -- bool *result) -+static int blocks_are_clean_combined_dirty(struct dm_cache_metadata *cmd, -+ dm_cblock_t begin, dm_cblock_t end, -+ bool *result) - { - int r; - *result = true; - - while (begin != end) { -- r = block_unmapped_or_clean(cmd, begin, result); -- if (r) -+ r = block_clean_combined_dirty(cmd, begin, result); -+ if (r) { -+ DMERR("block_clean_combined_dirty failed"); - return r; -+ } - - if (!*result) { - DMERR("cache block %llu is dirty", -@@ -873,6 +923,48 @@ static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd, - return 0; - } - -+static int blocks_are_clean_separate_dirty(struct dm_cache_metadata *cmd, -+ dm_cblock_t begin, dm_cblock_t end, -+ bool *result) -+{ -+ int r; -+ bool dirty_flag; -+ *result = true; -+ -+ // FIXME: use a cursor so we can benefit from preloading metadata. -+ while (begin != end) { -+ /* -+ * We assume that unmapped blocks have their dirty bit -+ * cleared. -+ */ -+ r = dm_bitset_test_bit(&cmd->dirty_info, cmd->dirty_root, -+ from_cblock(begin), &cmd->dirty_root, &dirty_flag); -+ if (r) -+ return r; -+ -+ if (dirty_flag) { -+ DMERR("cache block %llu is dirty", -+ (unsigned long long) from_cblock(begin)); -+ *result = false; -+ return 0; -+ } -+ -+ begin = to_cblock(from_cblock(begin) + 1); -+ } -+ -+ return 0; -+} -+ -+static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd, -+ dm_cblock_t begin, dm_cblock_t end, -+ bool *result) -+{ -+ if (separate_dirty_bits(cmd)) -+ return blocks_are_clean_separate_dirty(cmd, begin, end, result); -+ else -+ return blocks_are_clean_combined_dirty(cmd, begin, end, result); -+} -+ - static bool cmd_write_lock(struct dm_cache_metadata *cmd) - { - down_write(&cmd->root_lock); -@@ -950,8 +1042,18 @@ int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size) - r = dm_array_resize(&cmd->info, cmd->root, from_cblock(cmd->cache_blocks), - from_cblock(new_cache_size), - &null_mapping, &cmd->root); -- if (!r) -- cmd->cache_blocks = new_cache_size; -+ if (r) -+ goto out; -+ -+ if (separate_dirty_bits(cmd)) { -+ r = dm_bitset_resize(&cmd->dirty_info, cmd->dirty_root, -+ from_cblock(cmd->cache_blocks), from_cblock(new_cache_size), -+ false, &cmd->dirty_root); -+ if (r) -+ goto out; -+ } -+ -+ cmd->cache_blocks = new_cache_size; - cmd->changed = true; - - out: -@@ -1185,11 +1287,11 @@ static bool hints_array_available(struct dm_cache_metadata *cmd, - hints_array_initialized(cmd); - } - --static int __load_mapping(struct dm_cache_metadata *cmd, -- uint64_t cb, bool hints_valid, -- struct dm_array_cursor *mapping_cursor, -- struct dm_array_cursor *hint_cursor, -- load_mapping_fn fn, void *context) -+static int __load_mapping_v1(struct dm_cache_metadata *cmd, -+ uint64_t cb, bool hints_valid, -+ struct dm_array_cursor *mapping_cursor, -+ struct dm_array_cursor *hint_cursor, -+ load_mapping_fn fn, void *context) - { - int r = 0; - -@@ -1221,6 +1323,45 @@ static int __load_mapping(struct dm_cache_metadata *cmd, - return r; - } - -+static int __load_mapping_v2(struct dm_cache_metadata *cmd, -+ uint64_t cb, bool hints_valid, -+ struct dm_array_cursor *mapping_cursor, -+ struct dm_array_cursor *hint_cursor, -+ struct dm_bitset_cursor *dirty_cursor, -+ load_mapping_fn fn, void *context) -+{ -+ int r = 0; -+ -+ __le64 mapping; -+ __le32 hint = 0; -+ -+ __le64 *mapping_value_le; -+ __le32 *hint_value_le; -+ -+ dm_oblock_t oblock; -+ unsigned flags; -+ bool dirty; -+ -+ dm_array_cursor_get_value(mapping_cursor, (void **) &mapping_value_le); -+ memcpy(&mapping, mapping_value_le, sizeof(mapping)); -+ unpack_value(mapping, &oblock, &flags); -+ -+ if (flags & M_VALID) { -+ if (hints_valid) { -+ dm_array_cursor_get_value(hint_cursor, (void **) &hint_value_le); -+ memcpy(&hint, hint_value_le, sizeof(hint)); -+ } -+ -+ dirty = dm_bitset_cursor_get_value(dirty_cursor); -+ r = fn(context, oblock, to_cblock(cb), dirty, -+ le32_to_cpu(hint), hints_valid); -+ if (r) -+ DMERR("policy couldn't load cblock"); -+ } -+ -+ return r; -+} -+ - static int __load_mappings(struct dm_cache_metadata *cmd, - struct dm_cache_policy *policy, - load_mapping_fn fn, void *context) -@@ -1246,10 +1387,28 @@ static int __load_mappings(struct dm_cache_metadata *cmd, - } - } - -+ if (separate_dirty_bits(cmd)) { -+ r = dm_bitset_cursor_begin(&cmd->dirty_info, cmd->dirty_root, -+ from_cblock(cmd->cache_blocks), -+ &cmd->dirty_cursor); -+ if (r) { -+ dm_array_cursor_end(&cmd->hint_cursor); -+ dm_array_cursor_end(&cmd->mapping_cursor); -+ return r; -+ } -+ } -+ - for (cb = 0; ; cb++) { -- r = __load_mapping(cmd, cb, hints_valid, -- &cmd->mapping_cursor, &cmd->hint_cursor, -- fn, context); -+ if (separate_dirty_bits(cmd)) -+ r = __load_mapping_v2(cmd, cb, hints_valid, -+ &cmd->mapping_cursor, -+ &cmd->hint_cursor, -+ &cmd->dirty_cursor, -+ fn, context); -+ else -+ r = __load_mapping_v1(cmd, cb, hints_valid, -+ &cmd->mapping_cursor, &cmd->hint_cursor, -+ fn, context); - if (r) - goto out; - -@@ -1272,12 +1431,23 @@ static int __load_mappings(struct dm_cache_metadata *cmd, - goto out; - } - } -+ -+ if (separate_dirty_bits(cmd)) { -+ r = dm_bitset_cursor_next(&cmd->dirty_cursor); -+ if (r) { -+ DMERR("dm_bitset_cursor_next for dirty failed"); -+ goto out; -+ } -+ } - } - out: - dm_array_cursor_end(&cmd->mapping_cursor); - if (hints_valid) - dm_array_cursor_end(&cmd->hint_cursor); - -+ if (separate_dirty_bits(cmd)) -+ dm_bitset_cursor_end(&cmd->dirty_cursor); -+ - return r; - } - -@@ -1360,13 +1530,55 @@ static int __dirty(struct dm_cache_metadata *cmd, dm_cblock_t cblock, bool dirty - - } - --int dm_cache_set_dirty(struct dm_cache_metadata *cmd, -- dm_cblock_t cblock, bool dirty) -+static int __set_dirty_bits_v1(struct dm_cache_metadata *cmd, unsigned nr_bits, unsigned long *bits) -+{ -+ int r; -+ unsigned i; -+ for (i = 0; i < nr_bits; i++) { -+ r = __dirty(cmd, to_cblock(i), test_bit(i, bits)); -+ if (r) -+ return r; -+ } -+ -+ return 0; -+} -+ -+static int __set_dirty_bits_v2(struct dm_cache_metadata *cmd, unsigned nr_bits, unsigned long *bits) -+{ -+ int r = 0; -+ unsigned i; -+ -+ /* nr_bits is really just a sanity check */ -+ if (nr_bits != from_cblock(cmd->cache_blocks)) { -+ DMERR("dirty bitset is wrong size"); -+ return -EINVAL; -+ } -+ -+ for (i = 0; i < nr_bits; i++) { -+ if (test_bit(i, bits)) -+ r = dm_bitset_set_bit(&cmd->dirty_info, cmd->dirty_root, i, &cmd->dirty_root); -+ else -+ r = dm_bitset_clear_bit(&cmd->dirty_info, cmd->dirty_root, i, &cmd->dirty_root); -+ -+ if (r) -+ return r; -+ } -+ -+ cmd->changed = true; -+ return dm_bitset_flush(&cmd->dirty_info, cmd->dirty_root, &cmd->dirty_root); -+} -+ -+int dm_cache_set_dirty_bits(struct dm_cache_metadata *cmd, -+ unsigned nr_bits, -+ unsigned long *bits) - { - int r; - - WRITE_LOCK(cmd); -- r = __dirty(cmd, cblock, dirty); -+ if (separate_dirty_bits(cmd)) -+ r = __set_dirty_bits_v2(cmd, nr_bits, bits); -+ else -+ r = __set_dirty_bits_v1(cmd, nr_bits, bits); - WRITE_UNLOCK(cmd); - - return r; -diff --git a/drivers/md/dm-cache-metadata.h b/drivers/md/dm-cache-metadata.h -index 8528744..4f07c08 100644 ---- a/drivers/md/dm-cache-metadata.h -+++ b/drivers/md/dm-cache-metadata.h -@@ -45,18 +45,20 @@ - * As these various flags are defined they should be added to the - * following masks. - */ -+ - #define DM_CACHE_FEATURE_COMPAT_SUPP 0UL - #define DM_CACHE_FEATURE_COMPAT_RO_SUPP 0UL - #define DM_CACHE_FEATURE_INCOMPAT_SUPP 0UL - - /* -- * Reopens or creates a new, empty metadata volume. -- * Returns an ERR_PTR on failure. -+ * Reopens or creates a new, empty metadata volume. Returns an ERR_PTR on -+ * failure. If reopening then features must match. - */ - struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev, - sector_t data_block_size, - bool may_format_device, -- size_t policy_hint_size); -+ size_t policy_hint_size, -+ unsigned metadata_version); - - void dm_cache_metadata_close(struct dm_cache_metadata *cmd); - -@@ -91,7 +93,8 @@ int dm_cache_load_mappings(struct dm_cache_metadata *cmd, - load_mapping_fn fn, - void *context); - --int dm_cache_set_dirty(struct dm_cache_metadata *cmd, dm_cblock_t cblock, bool dirty); -+int dm_cache_set_dirty_bits(struct dm_cache_metadata *cmd, -+ unsigned nr_bits, unsigned long *bits); - - struct dm_cache_statistics { - uint32_t read_hits; -diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c -index 897dc72..5813d2a 100644 ---- a/drivers/md/dm-cache-target.c -+++ b/drivers/md/dm-cache-target.c -@@ -179,6 +179,7 @@ enum cache_io_mode { - struct cache_features { - enum cache_metadata_mode mode; - enum cache_io_mode io_mode; -+ unsigned metadata_version; - }; - - struct cache_stats { -@@ -2541,13 +2542,14 @@ static void init_features(struct cache_features *cf) - { - cf->mode = CM_WRITE; - cf->io_mode = CM_IO_WRITEBACK; -+ cf->metadata_version = 1; - } - - static int parse_features(struct cache_args *ca, struct dm_arg_set *as, - char **error) - { - static struct dm_arg _args[] = { -- {0, 1, "Invalid number of cache feature arguments"}, -+ {0, 2, "Invalid number of cache feature arguments"}, - }; - - int r; -@@ -2573,6 +2575,9 @@ static int parse_features(struct cache_args *ca, struct dm_arg_set *as, - else if (!strcasecmp(arg, "passthrough")) - cf->io_mode = CM_IO_PASSTHROUGH; - -+ else if (!strcasecmp(arg, "metadata2")) -+ cf->metadata_version = 2; -+ - else { - *error = "Unrecognised cache feature requested"; - return -EINVAL; -@@ -2827,7 +2832,8 @@ static int cache_create(struct cache_args *ca, struct cache **result) - - cmd = dm_cache_metadata_open(cache->metadata_dev->bdev, - ca->block_size, may_format, -- dm_cache_policy_get_hint_size(cache->policy)); -+ dm_cache_policy_get_hint_size(cache->policy), -+ ca->features.metadata_version); - if (IS_ERR(cmd)) { - *error = "Error creating metadata object"; - r = PTR_ERR(cmd); -@@ -3172,21 +3178,16 @@ static int cache_end_io(struct dm_target *ti, struct bio *bio, int error) - - static int write_dirty_bitset(struct cache *cache) - { -- unsigned i, r; -+ int r; - - if (get_cache_mode(cache) >= CM_READ_ONLY) - return -EINVAL; - -- for (i = 0; i < from_cblock(cache->cache_size); i++) { -- r = dm_cache_set_dirty(cache->cmd, to_cblock(i), -- is_dirty(cache, to_cblock(i))); -- if (r) { -- metadata_operation_failed(cache, "dm_cache_set_dirty", r); -- return r; -- } -- } -+ r = dm_cache_set_dirty_bits(cache->cmd, from_cblock(cache->cache_size), cache->dirty_bitset); -+ if (r) -+ metadata_operation_failed(cache, "dm_cache_set_dirty_bits", r); - -- return 0; -+ return r; - } - - static int write_discard_bitset(struct cache *cache) -@@ -3562,14 +3563,19 @@ static void cache_status(struct dm_target *ti, status_type_t type, - (unsigned) atomic_read(&cache->stats.promotion), - (unsigned long) atomic_read(&cache->nr_dirty)); - -+ if (cache->features.metadata_version == 2) -+ DMEMIT("2 metadata2 "); -+ else -+ DMEMIT("1 "); -+ - if (writethrough_mode(&cache->features)) -- DMEMIT("1 writethrough "); -+ DMEMIT("writethrough "); - - else if (passthrough_mode(&cache->features)) -- DMEMIT("1 passthrough "); -+ DMEMIT("passthrough "); - - else if (writeback_mode(&cache->features)) -- DMEMIT("1 writeback "); -+ DMEMIT("writeback "); - - else { - DMERR("%s: internal error: unknown io mode: %d", -@@ -3817,7 +3823,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) - - static struct target_type cache_target = { - .name = "cache", -- .version = {1, 9, 0}, -+ .version = {1, 10, 0}, - .module = THIS_MODULE, - .ctr = cache_ctr, - .dtr = cache_dtr, --- -2.10.2 - diff --git a/patches.drivers/0117-dm-cache-metadata-name-the-cache-block-that-couldn-t.patch b/patches.drivers/0117-dm-cache-metadata-name-the-cache-block-that-couldn-t.patch deleted file mode 100644 index 2c1b0ac835..0000000000 --- a/patches.drivers/0117-dm-cache-metadata-name-the-cache-block-that-couldn-t.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 48551054fc256285289f6d03abd50cb74fb71819 Mon Sep 17 00:00:00 2001 -From: Mike Snitzer <snitzer@redhat.com> -Date: Tue, 4 Oct 2016 15:22:17 -0400 -Subject: [PATCH] dm cache metadata: name the cache block that couldn't be - loaded -Git-commit: 48551054fc256285289f6d03abd50cb74fb71819 -Patch-mainline: v4.11-rc1 -References: FATE#321488 - -Improves __load_mapping_v1() and __load_mapping_v2() DMERR messages to -explicitly name the cache block number whose mapping couldn't be -loaded. - -Signed-off-by: Mike Snitzer <snitzer@redhat.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/dm-cache-metadata.c | 12 ++++++++---- - 1 file changed, 8 insertions(+), 4 deletions(-) - -diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c -index 0610be7..5a5ef99 100644 ---- a/drivers/md/dm-cache-metadata.c -+++ b/drivers/md/dm-cache-metadata.c -@@ -1316,8 +1316,10 @@ static int __load_mapping_v1(struct dm_cache_metadata *cmd, - - r = fn(context, oblock, to_cblock(cb), flags & M_DIRTY, - le32_to_cpu(hint), hints_valid); -- if (r) -- DMERR("policy couldn't load cblock"); -+ if (r) { -+ DMERR("policy couldn't load cache block %llu", -+ (unsigned long long) from_cblock(to_cblock(cb))); -+ } - } - - return r; -@@ -1355,8 +1357,10 @@ static int __load_mapping_v2(struct dm_cache_metadata *cmd, - dirty = dm_bitset_cursor_get_value(dirty_cursor); - r = fn(context, oblock, to_cblock(cb), dirty, - le32_to_cpu(hint), hints_valid); -- if (r) -- DMERR("policy couldn't load cblock"); -+ if (r) { -+ DMERR("policy couldn't load cache block %llu", -+ (unsigned long long) from_cblock(to_cblock(cb))); -+ } - } - - return r; --- -2.10.2 - diff --git a/patches.drivers/0118-dm-bitset-add-dm_bitset_new.patch b/patches.drivers/0118-dm-bitset-add-dm_bitset_new.patch deleted file mode 100644 index 8f87e125ac..0000000000 --- a/patches.drivers/0118-dm-bitset-add-dm_bitset_new.patch +++ /dev/null @@ -1,102 +0,0 @@ -From 2151249eaabb48151cff6364adb4054b3497d62d Mon Sep 17 00:00:00 2001 -From: Joe Thornber <ejt@redhat.com> -Date: Thu, 22 Sep 2016 10:44:41 -0400 -Subject: [PATCH] dm bitset: add dm_bitset_new() -Git-commit: 2151249eaabb48151cff6364adb4054b3497d62d -Patch-mainline: v4.11-rc1 -References: FATE#321488 - -A more efficient way of creating a populated bitset. - -Signed-off-by: Joe Thornber <ejt@redhat.com> -Signed-off-by: Mike Snitzer <snitzer@redhat.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/persistent-data/dm-bitset.c | 42 ++++++++++++++++++++++++++++++++++ - drivers/md/persistent-data/dm-bitset.h | 16 +++++++++++++ - 2 files changed, 58 insertions(+) - -diff --git a/drivers/md/persistent-data/dm-bitset.c b/drivers/md/persistent-data/dm-bitset.c -index 7cb2851..fbf8d9b 100644 ---- a/drivers/md/persistent-data/dm-bitset.c -+++ b/drivers/md/persistent-data/dm-bitset.c -@@ -39,6 +39,48 @@ int dm_bitset_empty(struct dm_disk_bitset *info, dm_block_t *root) - } - EXPORT_SYMBOL_GPL(dm_bitset_empty); - -+struct packer_context { -+ bit_value_fn fn; -+ unsigned nr_bits; -+ void *context; -+}; -+ -+static int pack_bits(uint32_t index, void *value, void *context) -+{ -+ int r; -+ struct packer_context *p = context; -+ unsigned bit, nr = min(64u, p->nr_bits - (index * 64)); -+ uint64_t word = 0; -+ bool bv; -+ -+ for (bit = 0; bit < nr; bit++) { -+ r = p->fn(index * 64 + bit, &bv, p->context); -+ if (r) -+ return r; -+ -+ if (bv) -+ set_bit(bit, (unsigned long *) &word); -+ else -+ clear_bit(bit, (unsigned long *) &word); -+ } -+ -+ *((__le64 *) value) = cpu_to_le64(word); -+ -+ return 0; -+} -+ -+int dm_bitset_new(struct dm_disk_bitset *info, dm_block_t *root, -+ uint32_t size, bit_value_fn fn, void *context) -+{ -+ struct packer_context p; -+ p.fn = fn; -+ p.nr_bits = size; -+ p.context = context; -+ -+ return dm_array_new(&info->array_info, root, dm_div_up(size, 64), pack_bits, &p); -+} -+EXPORT_SYMBOL_GPL(dm_bitset_new); -+ - int dm_bitset_resize(struct dm_disk_bitset *info, dm_block_t root, - uint32_t old_nr_entries, uint32_t new_nr_entries, - bool default_value, dm_block_t *new_root) -diff --git a/drivers/md/persistent-data/dm-bitset.h b/drivers/md/persistent-data/dm-bitset.h -index 017c0d4..a086368 100644 ---- a/drivers/md/persistent-data/dm-bitset.h -+++ b/drivers/md/persistent-data/dm-bitset.h -@@ -93,6 +93,22 @@ void dm_disk_bitset_init(struct dm_transaction_manager *tm, - int dm_bitset_empty(struct dm_disk_bitset *info, dm_block_t *new_root); - - /* -+ * Creates a new bitset populated with values provided by a callback -+ * function. This is more efficient than creating an empty bitset, -+ * resizing, and then setting values since that process incurs a lot of -+ * copying. -+ * -+ * info - describes the array -+ * root - the root block of the array on disk -+ * size - the number of entries in the array -+ * fn - the callback -+ * context - passed to the callback -+ */ -+typedef int (*bit_value_fn)(uint32_t index, bool *value, void *context); -+int dm_bitset_new(struct dm_disk_bitset *info, dm_block_t *root, -+ uint32_t size, bit_value_fn fn, void *context); -+ -+/* - * Resize the bitset. - * - * info - describes the bitset --- -2.10.2 - diff --git a/patches.drivers/0119-dm-cache-metadata-use-dm_bitset_new-to-create-the-di.patch b/patches.drivers/0119-dm-cache-metadata-use-dm_bitset_new-to-create-the-di.patch deleted file mode 100644 index 54dca1569e..0000000000 --- a/patches.drivers/0119-dm-cache-metadata-use-dm_bitset_new-to-create-the-di.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 683bb1a3742bb0c8768711aa5ff1034d92e447f2 Mon Sep 17 00:00:00 2001 -From: Joe Thornber <ejt@redhat.com> -Date: Thu, 22 Sep 2016 10:45:21 -0400 -Subject: [PATCH] dm cache metadata: use dm_bitset_new() to create the dirty - bitset in format 2 -Git-commit: 683bb1a3742bb0c8768711aa5ff1034d92e447f2 -Patch-mainline: v4.11-rc1 -References: FATE#321488 - -Big speed up with large configs. - -Signed-off-by: Joe Thornber <ejt@redhat.com> -Signed-off-by: Mike Snitzer <snitzer@redhat.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/dm-cache-metadata.c | 22 +++++++++++----------- - 1 file changed, 11 insertions(+), 11 deletions(-) - -diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c -index 5a5ef99..7e31a4b 100644 ---- a/drivers/md/dm-cache-metadata.c -+++ b/drivers/md/dm-cache-metadata.c -@@ -1547,10 +1547,16 @@ static int __set_dirty_bits_v1(struct dm_cache_metadata *cmd, unsigned nr_bits, - return 0; - } - -+static int is_dirty_callback(uint32_t index, bool *value, void *context) -+{ -+ unsigned long *bits = context; -+ *value = test_bit(index, bits); -+ return 0; -+} -+ - static int __set_dirty_bits_v2(struct dm_cache_metadata *cmd, unsigned nr_bits, unsigned long *bits) - { - int r = 0; -- unsigned i; - - /* nr_bits is really just a sanity check */ - if (nr_bits != from_cblock(cmd->cache_blocks)) { -@@ -1558,18 +1564,12 @@ static int __set_dirty_bits_v2(struct dm_cache_metadata *cmd, unsigned nr_bits, - return -EINVAL; - } - -- for (i = 0; i < nr_bits; i++) { -- if (test_bit(i, bits)) -- r = dm_bitset_set_bit(&cmd->dirty_info, cmd->dirty_root, i, &cmd->dirty_root); -- else -- r = dm_bitset_clear_bit(&cmd->dirty_info, cmd->dirty_root, i, &cmd->dirty_root); -- -- if (r) -- return r; -- } -+ r = dm_bitset_del(&cmd->dirty_info, cmd->dirty_root); -+ if (r) -+ return r; - - cmd->changed = true; -- return dm_bitset_flush(&cmd->dirty_info, cmd->dirty_root, &cmd->dirty_root); -+ return dm_bitset_new(&cmd->dirty_info, &cmd->dirty_root, nr_bits, is_dirty_callback, bits); - } - - int dm_cache_set_dirty_bits(struct dm_cache_metadata *cmd, --- -2.10.2 - diff --git a/patches.drivers/0120-dm-persistent-data-add-cursor-skip-functions-to-the-.patch b/patches.drivers/0120-dm-persistent-data-add-cursor-skip-functions-to-the-.patch deleted file mode 100644 index 1ed6b2ceb0..0000000000 --- a/patches.drivers/0120-dm-persistent-data-add-cursor-skip-functions-to-the-.patch +++ /dev/null @@ -1,161 +0,0 @@ -From 9b696229aa7de356675a938c6c8a70b46085ed66 Mon Sep 17 00:00:00 2001 -From: Joe Thornber <ejt@redhat.com> -Date: Wed, 5 Oct 2016 10:40:39 -0400 -Subject: [PATCH] dm persistent data: add cursor skip functions to the cursor - APIs -Git-commit: 9b696229aa7de356675a938c6c8a70b46085ed66 -Patch-mainline: v4.11-rc1 -References: FATE#321488 - -Signed-off-by: Joe Thornber <ejt@redhat.com> -Signed-off-by: Mike Snitzer <snitzer@redhat.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/persistent-data/dm-array.c | 21 ++++++++++++++++++++ - drivers/md/persistent-data/dm-array.h | 1 + - drivers/md/persistent-data/dm-bitset.c | 35 ++++++++++++++++++++++++++++++++++ - drivers/md/persistent-data/dm-bitset.h | 1 + - drivers/md/persistent-data/dm-btree.c | 11 +++++++++++ - drivers/md/persistent-data/dm-btree.h | 1 + - 6 files changed, 70 insertions(+) - -diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c -index 7938cd2..185dc60 100644 ---- a/drivers/md/persistent-data/dm-array.c -+++ b/drivers/md/persistent-data/dm-array.c -@@ -976,6 +976,27 @@ int dm_array_cursor_next(struct dm_array_cursor *c) - } - EXPORT_SYMBOL_GPL(dm_array_cursor_next); - -+int dm_array_cursor_skip(struct dm_array_cursor *c, uint32_t count) -+{ -+ int r; -+ -+ do { -+ uint32_t remaining = le32_to_cpu(c->ab->nr_entries) - c->index; -+ -+ if (count < remaining) { -+ c->index += count; -+ return 0; -+ } -+ -+ count -= remaining; -+ r = dm_array_cursor_next(c); -+ -+ } while (!r); -+ -+ return r; -+} -+EXPORT_SYMBOL_GPL(dm_array_cursor_skip); -+ - void dm_array_cursor_get_value(struct dm_array_cursor *c, void **value_le) - { - *value_le = element_at(c->info, c->ab, c->index); -diff --git a/drivers/md/persistent-data/dm-array.h b/drivers/md/persistent-data/dm-array.h -index 27ee49a..d7d2d57 100644 ---- a/drivers/md/persistent-data/dm-array.h -+++ b/drivers/md/persistent-data/dm-array.h -@@ -207,6 +207,7 @@ void dm_array_cursor_end(struct dm_array_cursor *c); - - uint32_t dm_array_cursor_index(struct dm_array_cursor *c); - int dm_array_cursor_next(struct dm_array_cursor *c); -+int dm_array_cursor_skip(struct dm_array_cursor *c, uint32_t count); - - /* - * value_le is only valid while the cursor points at the current value. -diff --git a/drivers/md/persistent-data/dm-bitset.c b/drivers/md/persistent-data/dm-bitset.c -index fbf8d9b..b7208d8 100644 ---- a/drivers/md/persistent-data/dm-bitset.c -+++ b/drivers/md/persistent-data/dm-bitset.c -@@ -273,6 +273,41 @@ int dm_bitset_cursor_next(struct dm_bitset_cursor *c) - } - EXPORT_SYMBOL_GPL(dm_bitset_cursor_next); - -+int dm_bitset_cursor_skip(struct dm_bitset_cursor *c, uint32_t count) -+{ -+ int r; -+ __le64 *value; -+ uint32_t nr_array_skip; -+ uint32_t remaining_in_word = 64 - c->bit_index; -+ -+ if (c->entries_remaining < count) -+ return -ENODATA; -+ -+ if (count < remaining_in_word) { -+ c->bit_index += count; -+ c->entries_remaining -= count; -+ return 0; -+ -+ } else { -+ c->entries_remaining -= remaining_in_word; -+ count -= remaining_in_word; -+ } -+ -+ nr_array_skip = (count / 64) + 1; -+ r = dm_array_cursor_skip(&c->cursor, nr_array_skip); -+ if (r) -+ return r; -+ -+ dm_array_cursor_get_value(&c->cursor, (void **) &value); -+ c->entries_remaining -= count; -+ c->array_index += nr_array_skip; -+ c->bit_index = count & 63; -+ c->current_bits = le64_to_cpu(*value); -+ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(dm_bitset_cursor_skip); -+ - bool dm_bitset_cursor_get_value(struct dm_bitset_cursor *c) - { - return test_bit(c->bit_index, (unsigned long *) &c->current_bits); -diff --git a/drivers/md/persistent-data/dm-bitset.h b/drivers/md/persistent-data/dm-bitset.h -index a086368..df888da 100644 ---- a/drivers/md/persistent-data/dm-bitset.h -+++ b/drivers/md/persistent-data/dm-bitset.h -@@ -197,6 +197,7 @@ int dm_bitset_cursor_begin(struct dm_disk_bitset *info, - void dm_bitset_cursor_end(struct dm_bitset_cursor *c); - - int dm_bitset_cursor_next(struct dm_bitset_cursor *c); -+int dm_bitset_cursor_skip(struct dm_bitset_cursor *c, uint32_t count); - bool dm_bitset_cursor_get_value(struct dm_bitset_cursor *c); - - /*----------------------------------------------------------------*/ -diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c -index 1744f36..02e2ee0 100644 ---- a/drivers/md/persistent-data/dm-btree.c -+++ b/drivers/md/persistent-data/dm-btree.c -@@ -1144,6 +1144,17 @@ int dm_btree_cursor_next(struct dm_btree_cursor *c) - } - EXPORT_SYMBOL_GPL(dm_btree_cursor_next); - -+int dm_btree_cursor_skip(struct dm_btree_cursor *c, uint32_t count) -+{ -+ int r = 0; -+ -+ while (count-- && !r) -+ r = dm_btree_cursor_next(c); -+ -+ return r; -+} -+EXPORT_SYMBOL_GPL(dm_btree_cursor_skip); -+ - int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le) - { - if (c->depth) { -diff --git a/drivers/md/persistent-data/dm-btree.h b/drivers/md/persistent-data/dm-btree.h -index db9bd26..3dc5bb1 100644 ---- a/drivers/md/persistent-data/dm-btree.h -+++ b/drivers/md/persistent-data/dm-btree.h -@@ -209,6 +209,7 @@ int dm_btree_cursor_begin(struct dm_btree_info *info, dm_block_t root, - bool prefetch_leaves, struct dm_btree_cursor *c); - void dm_btree_cursor_end(struct dm_btree_cursor *c); - int dm_btree_cursor_next(struct dm_btree_cursor *c); -+int dm_btree_cursor_skip(struct dm_btree_cursor *c, uint32_t count); - int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le); - - #endif /* _LINUX_DM_BTREE_H */ --- -2.10.2 - diff --git a/patches.drivers/0121-dm-cache-metadata-use-cursor-api-in-blocks_are_clean.patch b/patches.drivers/0121-dm-cache-metadata-use-cursor-api-in-blocks_are_clean.patch deleted file mode 100644 index d387cbb4d5..0000000000 --- a/patches.drivers/0121-dm-cache-metadata-use-cursor-api-in-blocks_are_clean.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 7f1b21591a632c6caefd9aa53b630808f4f477e1 Mon Sep 17 00:00:00 2001 -From: Mike Snitzer <snitzer@redhat.com> -Date: Tue, 4 Oct 2016 15:00:47 -0400 -Subject: [PATCH] dm cache metadata: use cursor api in - blocks_are_clean_separate_dirty() -Git-commit: 7f1b21591a632c6caefd9aa53b630808f4f477e1 -Patch-mainline: v4.11-rc1 -References: FATE#321488 - -Signed-off-by: Mike Snitzer <snitzer@redhat.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/dm-cache-metadata.c | 33 ++++++++++++++++++++++++++------- - 1 file changed, 26 insertions(+), 7 deletions(-) - -diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c -index 7e31a4b..e4c2c1a 100644 ---- a/drivers/md/dm-cache-metadata.c -+++ b/drivers/md/dm-cache-metadata.c -@@ -931,27 +931,46 @@ static int blocks_are_clean_separate_dirty(struct dm_cache_metadata *cmd, - bool dirty_flag; - *result = true; - -- // FIXME: use a cursor so we can benefit from preloading metadata. -+ r = dm_bitset_cursor_begin(&cmd->dirty_info, cmd->dirty_root, -+ from_cblock(begin), &cmd->dirty_cursor); -+ if (r) { -+ DMERR("%s: dm_bitset_cursor_begin for dirty failed", __func__); -+ return r; -+ } -+ -+ r = dm_bitset_cursor_skip(&cmd->dirty_cursor, from_cblock(begin)); -+ if (r) { -+ DMERR("%s: dm_bitset_cursor_skip for dirty failed", __func__); -+ dm_bitset_cursor_end(&cmd->dirty_cursor); -+ return r; -+ } -+ - while (begin != end) { - /* - * We assume that unmapped blocks have their dirty bit - * cleared. - */ -- r = dm_bitset_test_bit(&cmd->dirty_info, cmd->dirty_root, -- from_cblock(begin), &cmd->dirty_root, &dirty_flag); -- if (r) -- return r; -- -+ dirty_flag = dm_bitset_cursor_get_value(&cmd->dirty_cursor); - if (dirty_flag) { -- DMERR("cache block %llu is dirty", -+ DMERR("%s: cache block %llu is dirty", __func__, - (unsigned long long) from_cblock(begin)); -+ dm_bitset_cursor_end(&cmd->dirty_cursor); - *result = false; - return 0; - } - -+ r = dm_bitset_cursor_next(&cmd->dirty_cursor); -+ if (r) { -+ DMERR("%s: dm_bitset_cursor_next for dirty failed", __func__); -+ dm_bitset_cursor_end(&cmd->dirty_cursor); -+ return r; -+ } -+ - begin = to_cblock(from_cblock(begin) + 1); - } - -+ dm_bitset_cursor_end(&cmd->dirty_cursor); -+ - return 0; - } - --- -2.10.2 - diff --git a/patches.drivers/0122-dm-space-map-metadata-constify-dm_space_map-structur.patch b/patches.drivers/0122-dm-space-map-metadata-constify-dm_space_map-structur.patch deleted file mode 100644 index 1ddb23fc32..0000000000 --- a/patches.drivers/0122-dm-space-map-metadata-constify-dm_space_map-structur.patch +++ /dev/null @@ -1,54 +0,0 @@ -From b79af13efd98ca2908f2df1120e79a7ff70faa0d Mon Sep 17 00:00:00 2001 -From: Bhumika Goyal <bhumirks@gmail.com> -Date: Wed, 15 Feb 2017 23:43:28 +0530 -Subject: [PATCH] dm space map metadata: constify dm_space_map structures -Git-commit: b79af13efd98ca2908f2df1120e79a7ff70faa0d -Patch-mainline: v4.11-rc1 -References: FATE#321488 - -Declare dm_space_map structures as const as they are only passed as an -argument to the function memcpy. This argument is of type const void *, -so dm_space_map structures having this property can be declared as -const. - -File size before: - text data bss dec hex filename - 4889 240 0 5129 1409 dm-space-map-metadata.o - -File size after: - text data bss dec hex filename - 5139 0 0 5139 1413 dm-space-map-metadata.o - -Signed-off-by: Bhumika Goyal <bhumirks@gmail.com> -Signed-off-by: Mike Snitzer <snitzer@redhat.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/persistent-data/dm-space-map-metadata.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c -index 20557e2..4aed69d 100644 ---- a/drivers/md/persistent-data/dm-space-map-metadata.c -+++ b/drivers/md/persistent-data/dm-space-map-metadata.c -@@ -544,7 +544,7 @@ static int sm_metadata_copy_root(struct dm_space_map *sm, void *where_le, size_t - - static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks); - --static struct dm_space_map ops = { -+static const struct dm_space_map ops = { - .destroy = sm_metadata_destroy, - .extend = sm_metadata_extend, - .get_nr_blocks = sm_metadata_get_nr_blocks, -@@ -671,7 +671,7 @@ static int sm_bootstrap_copy_root(struct dm_space_map *sm, void *where, - return -EINVAL; - } - --static struct dm_space_map bootstrap_ops = { -+static const struct dm_space_map bootstrap_ops = { - .destroy = sm_bootstrap_destroy, - .extend = sm_bootstrap_extend, - .get_nr_blocks = sm_bootstrap_get_nr_blocks, --- -2.10.2 - diff --git a/patches.drivers/0123-md-raid5-Don-t-reinvent-the-wheel-but-use-existing-l.patch b/patches.drivers/0123-md-raid5-Don-t-reinvent-the-wheel-but-use-existing-l.patch deleted file mode 100644 index 5709dd7cc7..0000000000 --- a/patches.drivers/0123-md-raid5-Don-t-reinvent-the-wheel-but-use-existing-l.patch +++ /dev/null @@ -1,45 +0,0 @@ -From eae8263fb1f4256460270dd8f42334604dcdfac6 Mon Sep 17 00:00:00 2001 -From: Byungchul Park <byungchul.park@lge.com> -Date: Tue, 14 Feb 2017 16:26:24 +0900 -Subject: [PATCH] md/raid5: Don't reinvent the wheel but use existing llist API -Git-commit: eae8263fb1f4256460270dd8f42334604dcdfac6 -Patch-mainline: v4.11-rc1 -References: FATE#321488 - -Although llist provides proper APIs, they are not used. Make them used. - -Signed-off-by: Byungchul Park <byungchul.park@lge.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid5.c | 6 ++---- - 1 file changed, 2 insertions(+), 4 deletions(-) - -diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c -index b193316..7b7722bb2 100644 ---- a/drivers/md/raid5.c -+++ b/drivers/md/raid5.c -@@ -353,17 +353,15 @@ static void release_inactive_stripe_list(struct r5conf *conf, - static int release_stripe_list(struct r5conf *conf, - struct list_head *temp_inactive_list) - { -- struct stripe_head *sh; -+ struct stripe_head *sh, *t; - int count = 0; - struct llist_node *head; - - head = llist_del_all(&conf->released_stripes); - head = llist_reverse_order(head); -- while (head) { -+ llist_for_each_entry_safe(sh, t, head, release_list) { - int hash; - -- sh = llist_entry(head, struct stripe_head, release_list); -- head = llist_next(head); - /* sh could be readded after STRIPE_ON_RELEASE_LIST is cleard */ - smp_mb(); - clear_bit(STRIPE_ON_RELEASE_LIST, &sh->state); --- -2.10.2 - diff --git a/patches.drivers/0124-dm-round-robin-revert-use-percpu-repeat_count-and-cu.patch b/patches.drivers/0124-dm-round-robin-revert-use-percpu-repeat_count-and-cu.patch deleted file mode 100644 index d284f47465..0000000000 --- a/patches.drivers/0124-dm-round-robin-revert-use-percpu-repeat_count-and-cu.patch +++ /dev/null @@ -1,162 +0,0 @@ -From 37a098e9d10db6e2efc05fe61e3a6ff2e9802c53 Mon Sep 17 00:00:00 2001 -From: Mike Snitzer <snitzer@redhat.com> -Date: Thu, 16 Feb 2017 23:57:17 -0500 -Subject: [PATCH] dm round robin: revert "use percpu 'repeat_count' and - 'current_path'" -Git-commit: 37a098e9d10db6e2efc05fe61e3a6ff2e9802c53 -Patch-mainline: v4.11-rc1 -References: FATE#321488 - -The sloppy nature of lockless access to percpu pointers -(s->current_path) in rr_select_path(), from multiple threads, is -causing some paths to used more than others -- which results in less -IO performance being observed. - -Revert these upstream commits to restore truly symmetric round-robin -IO submission in DM multipath: - -b0b477c dm round robin: use percpu 'repeat_count' and 'current_path' -802934b dm round robin: do not use this_cpu_ptr() without having preemption disabled - -There is no benefit to all this complexity if repeat_count = 1 (which is -the recommended default). - -Cc: stable@vger.kernel.org # 4.6+ -Signed-off-by: Mike Snitzer <snitzer@redhat.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/dm-round-robin.c | 67 ++++++++++----------------------------------- - 1 file changed, 14 insertions(+), 53 deletions(-) - -diff --git a/drivers/md/dm-round-robin.c b/drivers/md/dm-round-robin.c -index 6c25213..bdbb7e6 100644 ---- a/drivers/md/dm-round-robin.c -+++ b/drivers/md/dm-round-robin.c -@@ -17,8 +17,8 @@ - #include <linux/module.h> - - #define DM_MSG_PREFIX "multipath round-robin" --#define RR_MIN_IO 1000 --#define RR_VERSION "1.1.0" -+#define RR_MIN_IO 1 -+#define RR_VERSION "1.2.0" - - /*----------------------------------------------------------------- - * Path-handling code, paths are held in lists -@@ -47,44 +47,19 @@ struct selector { - struct list_head valid_paths; - struct list_head invalid_paths; - spinlock_t lock; -- struct dm_path * __percpu *current_path; -- struct percpu_counter repeat_count; - }; - --static void set_percpu_current_path(struct selector *s, struct dm_path *path) --{ -- int cpu; -- -- for_each_possible_cpu(cpu) -- *per_cpu_ptr(s->current_path, cpu) = path; --} -- - static struct selector *alloc_selector(void) - { - struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL); - -- if (!s) -- return NULL; -- -- INIT_LIST_HEAD(&s->valid_paths); -- INIT_LIST_HEAD(&s->invalid_paths); -- spin_lock_init(&s->lock); -- -- s->current_path = alloc_percpu(struct dm_path *); -- if (!s->current_path) -- goto out_current_path; -- set_percpu_current_path(s, NULL); -- -- if (percpu_counter_init(&s->repeat_count, 0, GFP_KERNEL)) -- goto out_repeat_count; -+ if (s) { -+ INIT_LIST_HEAD(&s->valid_paths); -+ INIT_LIST_HEAD(&s->invalid_paths); -+ spin_lock_init(&s->lock); -+ } - - return s; -- --out_repeat_count: -- free_percpu(s->current_path); --out_current_path: -- kfree(s); -- return NULL;; - } - - static int rr_create(struct path_selector *ps, unsigned argc, char **argv) -@@ -105,8 +80,6 @@ static void rr_destroy(struct path_selector *ps) - - free_paths(&s->valid_paths); - free_paths(&s->invalid_paths); -- free_percpu(s->current_path); -- percpu_counter_destroy(&s->repeat_count); - kfree(s); - ps->context = NULL; - } -@@ -157,6 +130,11 @@ static int rr_add_path(struct path_selector *ps, struct dm_path *path, - return -EINVAL; - } - -+ if (repeat_count > 1) { -+ DMWARN_LIMIT("repeat_count > 1 is deprecated, using 1 instead"); -+ repeat_count = 1; -+ } -+ - /* allocate the path */ - pi = kmalloc(sizeof(*pi), GFP_KERNEL); - if (!pi) { -@@ -183,9 +161,6 @@ static void rr_fail_path(struct path_selector *ps, struct dm_path *p) - struct path_info *pi = p->pscontext; - - spin_lock_irqsave(&s->lock, flags); -- if (p == *this_cpu_ptr(s->current_path)) -- set_percpu_current_path(s, NULL); -- - list_move(&pi->list, &s->invalid_paths); - spin_unlock_irqrestore(&s->lock, flags); - } -@@ -208,29 +183,15 @@ static struct dm_path *rr_select_path(struct path_selector *ps, size_t nr_bytes) - unsigned long flags; - struct selector *s = ps->context; - struct path_info *pi = NULL; -- struct dm_path *current_path = NULL; -- -- local_irq_save(flags); -- current_path = *this_cpu_ptr(s->current_path); -- if (current_path) { -- percpu_counter_dec(&s->repeat_count); -- if (percpu_counter_read_positive(&s->repeat_count) > 0) { -- local_irq_restore(flags); -- return current_path; -- } -- } - -- spin_lock(&s->lock); -+ spin_lock_irqsave(&s->lock, flags); - if (!list_empty(&s->valid_paths)) { - pi = list_entry(s->valid_paths.next, struct path_info, list); - list_move_tail(&pi->list, &s->valid_paths); -- percpu_counter_set(&s->repeat_count, pi->repeat_count); -- set_percpu_current_path(s, pi->path); -- current_path = pi->path; - } - spin_unlock_irqrestore(&s->lock, flags); - -- return current_path; -+ return pi ? pi->path : NULL; - } - - static struct path_selector_type rr_ps = { --- -2.10.2 - diff --git a/patches.drivers/0125-md-linear-shutup-lockdep-warnning.patch b/patches.drivers/0125-md-linear-shutup-lockdep-warnning.patch deleted file mode 100644 index 4ceb786551..0000000000 --- a/patches.drivers/0125-md-linear-shutup-lockdep-warnning.patch +++ /dev/null @@ -1,36 +0,0 @@ -From d939cdfde34f50b95254b375f498447c82190b3e Mon Sep 17 00:00:00 2001 -From: Shaohua Li <shli@fb.com> -Date: Tue, 21 Feb 2017 11:57:01 -0800 -Subject: [PATCH] md/linear: shutup lockdep warnning -Git-commit: d939cdfde34f50b95254b375f498447c82190b3e -Patch-mainline: v4.11-rc1 -References: FATE#321488 - -Commit 03a9e24(md linear: fix a race between linear_add() and -linear_congested()) introduces the warnning. - -Acked-by: Coly Li <colyli@suse.de> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/linear.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/drivers/md/linear.c b/drivers/md/linear.c -index 789008b..5b06b0d 100644 ---- a/drivers/md/linear.c -+++ b/drivers/md/linear.c -@@ -224,7 +224,8 @@ static int linear_add(struct mddev *mddev, struct md_rdev *rdev) - * oldconf until no one uses it anymore. - */ - mddev_suspend(mddev); -- oldconf = rcu_dereference(mddev->private); -+ oldconf = rcu_dereference_protected(mddev->private, -+ lockdep_is_held(&mddev->reconfig_mutex)); - mddev->raid_disks++; - WARN_ONCE(mddev->raid_disks != newconf->raid_disks, - "copied raid_disks doesn't match mddev->raid_disks"); --- -2.10.2 - diff --git a/patches.drivers/0126-md-raid1-fix-write-behind-issues-introduced-by-bio_c.patch b/patches.drivers/0126-md-raid1-fix-write-behind-issues-introduced-by-bio_c.patch deleted file mode 100644 index 193c1d95df..0000000000 --- a/patches.drivers/0126-md-raid1-fix-write-behind-issues-introduced-by-bio_c.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 1ec492232ed659acde8cc00b9ecc7529778e03e1 Mon Sep 17 00:00:00 2001 -From: Shaohua Li <shli@fb.com> -Date: Tue, 21 Feb 2017 14:27:57 -0800 -Subject: [PATCH] md/raid1: fix write behind issues introduced by - bio_clone_bioset_partial -Git-commit: 1ec492232ed659acde8cc00b9ecc7529778e03e1 -Patch-mainline: v4.11-rc1 -References: FATE#321488 - -There are two issues, introduced by commit 8e58e32(md/raid1: use -bio_clone_bioset_partial() in case of write behind): -- bio_clone_bioset_partial() uses bytes instead of sectors as parameters -- in writebehind mode, we return bio if all !writemostly disk bios finish, - which could happen before writemostly disk bios run. So all - writemostly disk bios should have their bvec. Here we just make sure - all bios are cloned instead of fast cloned. - -Reviewed-by: Ming Lei <tom.leiming@gmail.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid1.c | 15 +++++++++++---- - 1 file changed, 11 insertions(+), 4 deletions(-) - -diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c -index 8901f0c..d4e8796 100644 ---- a/drivers/md/raid1.c -+++ b/drivers/md/raid1.c -@@ -1472,8 +1472,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio) - !waitqueue_active(&bitmap->behind_wait)) { - mbio = bio_clone_bioset_partial(bio, GFP_NOIO, - mddev->bio_set, -- offset, -- max_sectors); -+ offset << 9, -+ max_sectors << 9); - alloc_behind_pages(mbio, r1_bio); - } - -@@ -1485,8 +1485,15 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio) - } - - if (!mbio) { -- mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); -- bio_trim(mbio, offset, max_sectors); -+ if (r1_bio->behind_bvecs) -+ mbio = bio_clone_bioset_partial(bio, GFP_NOIO, -+ mddev->bio_set, -+ offset << 9, -+ max_sectors << 9); -+ else { -+ mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); -+ bio_trim(mbio, offset, max_sectors); -+ } - } - - if (r1_bio->behind_bvecs) { --- -2.10.2 - diff --git a/patches.drivers/0127-dm-raid-bump-the-target-version.patch b/patches.drivers/0127-dm-raid-bump-the-target-version.patch deleted file mode 100644 index 6e9d957518..0000000000 --- a/patches.drivers/0127-dm-raid-bump-the-target-version.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 2664f3c94abc7181171b7c05b2aaa76ea7d9d613 Mon Sep 17 00:00:00 2001 -From: Mike Snitzer <snitzer@redhat.com> -Date: Tue, 28 Feb 2017 15:31:44 -0500 -Subject: [PATCH] dm raid: bump the target version -Git-commit: 2664f3c94abc7181171b7c05b2aaa76ea7d9d613 -Patch-mainline: v4.11-rc1 -References: FATE#321488 - -This version bump reflects that the reshape corruption fix (commit -92a39f6cc "dm raid: fix data corruption on reshape request") is -present. - -Done as a separate fix because the above referenced commit is marked for -stable and target version bumps in a stable@ fix are a recipe for the -fix to never get backported to stable@ kernels (because of target -version number conflicts). - -Also, move RESUME_STAY_FROZEN_FLAGS up with the reset the the _FLAGS -definitions now that we don't need to worry about stable@ conflicts as a -result of missing context. - -Signed-off-by: Mike Snitzer <snitzer@redhat.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/dm-raid.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c -index 350527f..f8564d6 100644 ---- a/drivers/md/dm-raid.c -+++ b/drivers/md/dm-raid.c -@@ -101,6 +101,8 @@ struct raid_dev { - #define CTR_FLAG_RAID10_USE_NEAR_SETS (1 << __CTR_FLAG_RAID10_USE_NEAR_SETS) - #define CTR_FLAG_JOURNAL_DEV (1 << __CTR_FLAG_JOURNAL_DEV) - -+#define RESUME_STAY_FROZEN_FLAGS (CTR_FLAG_DELTA_DISKS | CTR_FLAG_DATA_OFFSET) -+ - /* - * Definitions of various constructor flags to - * be used in checks of valid / invalid flags -@@ -3756,8 +3758,6 @@ static int raid_preresume(struct dm_target *ti) - return r; - } - --#define RESUME_STAY_FROZEN_FLAGS (CTR_FLAG_DELTA_DISKS | CTR_FLAG_DATA_OFFSET) -- - static void raid_resume(struct dm_target *ti) - { - struct raid_set *rs = ti->private; -@@ -3791,7 +3791,7 @@ static void raid_resume(struct dm_target *ti) - - static struct target_type raid_target = { - .name = "raid", -- .version = {1, 10, 0}, -+ .version = {1, 10, 1}, - .module = THIS_MODULE, - .ctr = raid_ctr, - .dtr = raid_dtr, --- -2.10.2 - diff --git a/patches.drivers/0128-md-raid10-submit-bio-directly-to-replacement-disk.patch b/patches.drivers/0128-md-raid10-submit-bio-directly-to-replacement-disk.patch deleted file mode 100644 index c4fd1f2c57..0000000000 --- a/patches.drivers/0128-md-raid10-submit-bio-directly-to-replacement-disk.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 6d399783e9d4e9bd44931501948059d24ad96ff8 Mon Sep 17 00:00:00 2001 -From: Shaohua Li <shli@fb.com> -Date: Thu, 23 Feb 2017 12:26:41 -0800 -Subject: [PATCH] md/raid10: submit bio directly to replacement disk -Git-commit: 6d399783e9d4e9bd44931501948059d24ad96ff8 -Patch-mainline: v4.11-rc3 -References: FATE#321488 - -Commit 57c67df(md/raid10: submit IO from originating thread instead of -md thread) submits bio directly for normal disks but not for replacement -disks. There is no point we shouldn't do this for replacement disks. - -Cc: NeilBrown <neilb@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid10.c | 19 ++++++++++++++++--- - 1 file changed, 16 insertions(+), 3 deletions(-) - -diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c -index 063c43d..1443305 100644 ---- a/drivers/md/raid10.c -+++ b/drivers/md/raid10.c -@@ -1477,11 +1477,24 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, - mbio->bi_bdev = (void*)rdev; - - atomic_inc(&r10_bio->remaining); -+ -+ cb = blk_check_plugged(raid10_unplug, mddev, -+ sizeof(*plug)); -+ if (cb) -+ plug = container_of(cb, struct raid10_plug_cb, -+ cb); -+ else -+ plug = NULL; - spin_lock_irqsave(&conf->device_lock, flags); -- bio_list_add(&conf->pending_bio_list, mbio); -- conf->pending_count++; -+ if (plug) { -+ bio_list_add(&plug->pending, mbio); -+ plug->pending_cnt++; -+ } else { -+ bio_list_add(&conf->pending_bio_list, mbio); -+ conf->pending_count++; -+ } - spin_unlock_irqrestore(&conf->device_lock, flags); -- if (!mddev_check_plugged(mddev)) -+ if (!plug) - md_wakeup_thread(mddev->thread); - } - } --- -2.10.2 - diff --git a/patches.drivers/0129-md-delete-dead-code.patch b/patches.drivers/0129-md-delete-dead-code.patch deleted file mode 100644 index b263b200e5..0000000000 --- a/patches.drivers/0129-md-delete-dead-code.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 99b3d74ec05c4a4c57766a90d65b53d78ab06404 Mon Sep 17 00:00:00 2001 -From: Shaohua Li <shli@fb.com> -Date: Thu, 23 Feb 2017 12:31:10 -0800 -Subject: [PATCH] md: delete dead code -Git-commit: 99b3d74ec05c4a4c57766a90d65b53d78ab06404 -Patch-mainline: v4.11-rc3 -References: FATE#321488 - -Nobody is using mddev_check_plugged(), so delete the dead code - -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/md.c | 8 -------- - drivers/md/md.h | 6 ------ - 2 files changed, 14 deletions(-) - -diff --git a/drivers/md/md.c b/drivers/md/md.c -index 548d1b8..82bd1f3 100644 ---- a/drivers/md/md.c -+++ b/drivers/md/md.c -@@ -440,14 +440,6 @@ void md_flush_request(struct mddev *mddev, struct bio *bio) - } - EXPORT_SYMBOL(md_flush_request); - --void md_unplug(struct blk_plug_cb *cb, bool from_schedule) --{ -- struct mddev *mddev = cb->data; -- md_wakeup_thread(mddev->thread); -- kfree(cb); --} --EXPORT_SYMBOL(md_unplug); -- - static inline struct mddev *mddev_get(struct mddev *mddev) - { - atomic_inc(&mddev->active); -diff --git a/drivers/md/md.h b/drivers/md/md.h -index b8859cb..dde8ecb 100644 ---- a/drivers/md/md.h -+++ b/drivers/md/md.h -@@ -676,16 +676,10 @@ extern void mddev_resume(struct mddev *mddev); - extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, - struct mddev *mddev); - --extern void md_unplug(struct blk_plug_cb *cb, bool from_schedule); - extern void md_reload_sb(struct mddev *mddev, int raid_disk); - extern void md_update_sb(struct mddev *mddev, int force); - extern void md_kick_rdev_from_array(struct md_rdev * rdev); - struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr); --static inline int mddev_check_plugged(struct mddev *mddev) --{ -- return !!blk_check_plugged(md_unplug, mddev, -- sizeof(struct blk_plug_cb)); --} - - static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev) - { --- -2.10.2 - diff --git a/patches.drivers/0130-md-don-t-impose-the-MD_SB_DISKS-limit-on-arrays-with.patch b/patches.drivers/0130-md-don-t-impose-the-MD_SB_DISKS-limit-on-arrays-with.patch deleted file mode 100644 index f874003ed5..0000000000 --- a/patches.drivers/0130-md-don-t-impose-the-MD_SB_DISKS-limit-on-arrays-with.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 1b3bae49fba52f1ec499c36c53bc07761a9f6c4d Mon Sep 17 00:00:00 2001 -From: NeilBrown <neilb@suse.com> -Date: Wed, 1 Mar 2017 07:31:28 +1100 -Subject: [PATCH] md: don't impose the MD_SB_DISKS limit on arrays without - metadata. -Git-commit: 1b3bae49fba52f1ec499c36c53bc07761a9f6c4d -Patch-mainline: v4.11-rc3 -References: FATE#321488 - -These arrays, created with "mdadm --build" don't benefit from a limit. -The default will be used, which is '0' and is interpreted as "don't -impose a limit". - -Reported-by: ian_bruce@mail.ru -Signed-off-by: NeilBrown <neilb@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/md.c | 7 +++---- - 1 file changed, 3 insertions(+), 4 deletions(-) - -diff --git a/drivers/md/md.c b/drivers/md/md.c -index bd15a18..cd89ad3 100644 ---- a/drivers/md/md.c -+++ b/drivers/md/md.c -@@ -6450,11 +6450,10 @@ static int set_array_info(struct mddev *mddev, mdu_array_info_t *info) - mddev->layout = info->layout; - mddev->chunk_sectors = info->chunk_size >> 9; - -- mddev->max_disks = MD_SB_DISKS; -- - if (mddev->persistent) { -- mddev->flags = 0; -- mddev->sb_flags = 0; -+ mddev->max_disks = MD_SB_DISKS; -+ mddev->flags = 0; -+ mddev->sb_flags = 0; - } - set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); - --- -2.10.2 - diff --git a/patches.drivers/0131-drivers-md-bcache-util.h-remove-duplicate-inclusion-.patch b/patches.drivers/0131-drivers-md-bcache-util.h-remove-duplicate-inclusion-.patch deleted file mode 100644 index fb71a94a5d..0000000000 --- a/patches.drivers/0131-drivers-md-bcache-util.h-remove-duplicate-inclusion-.patch +++ /dev/null @@ -1,31 +0,0 @@ -From f4b7ac68f438fa8521bbbf421f194ff10b0a7577 Mon Sep 17 00:00:00 2001 -From: Masanari Iida <standby24x7@gmail.com> -Date: Thu, 9 Mar 2017 16:17:03 -0800 -Subject: [PATCH] drivers/md/bcache/util.h: remove duplicate inclusion of - blkdev.h -Git-commit: f4b7ac68f438fa8521bbbf421f194ff10b0a7577 -Patch-mainline: v4.11-rc2 -References: FATE#321488 - -Link: http://lkml.kernel.org/r/20170226060230.11555-1-standby24x7@gmail.com -Signed-off-by: Masanari Iida <standby24x7@gmail.com> -Acked-by: Coly Li <colyli@suse.de> -Cc: Kent Overstreet <kent.overstreet@gmail.com> -Signed-off-by: Andrew Morton <akpm@linux-foundation.org> -Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/bcache/util.h | 1 - - 1 file changed, 1 deletion(-) - ---- a/drivers/md/bcache/util.h -+++ b/drivers/md/bcache/util.h -@@ -4,7 +4,6 @@ - - #include <linux/blkdev.h> - #include <linux/errno.h> --#include <linux/blkdev.h> - #include <linux/kernel.h> - #include <linux/llist.h> - #include <linux/ratelimit.h> diff --git a/patches.drivers/0132-md-fix-super_offset-endianness-in-super_1_rdev_size_.patch b/patches.drivers/0132-md-fix-super_offset-endianness-in-super_1_rdev_size_.patch deleted file mode 100644 index 0aaf4686e3..0000000000 --- a/patches.drivers/0132-md-fix-super_offset-endianness-in-super_1_rdev_size_.patch +++ /dev/null @@ -1,35 +0,0 @@ -From 3fb632e40d7667d8bedfabc28850ac06d5493f54 Mon Sep 17 00:00:00 2001 -From: Jason Yan <yanaijie@huawei.com> -Date: Fri, 10 Mar 2017 11:27:23 +0800 -Subject: [PATCH] md: fix super_offset endianness in super_1_rdev_size_change -Git-commit: 3fb632e40d7667d8bedfabc28850ac06d5493f54 -Patch-mainline: v4.11-rc3 -References: FATE#321488 - -The sb->super_offset should be big-endian, but the rdev->sb_start is in -host byte order, so fix this by adding cpu_to_le64. - -Signed-off-by: Jason Yan <yanaijie@huawei.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/md.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/drivers/md/md.c b/drivers/md/md.c -index cd89ad3..6e76d97 100644 ---- a/drivers/md/md.c -+++ b/drivers/md/md.c -@@ -1879,7 +1879,7 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors) - } - sb = page_address(rdev->sb_page); - sb->data_size = cpu_to_le64(num_sectors); -- sb->super_offset = rdev->sb_start; -+ sb->super_offset = cpu_to_le64(rdev->sb_start); - sb->sb_csum = calc_sb_1_csum(sb); - do { - md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size, --- -2.10.2 - diff --git a/patches.drivers/0133-md-fix-incorrect-use-of-lexx_to_cpu-in-does_sb_need_.patch b/patches.drivers/0133-md-fix-incorrect-use-of-lexx_to_cpu-in-does_sb_need_.patch deleted file mode 100644 index 1aa60fddad..0000000000 --- a/patches.drivers/0133-md-fix-incorrect-use-of-lexx_to_cpu-in-does_sb_need_.patch +++ /dev/null @@ -1,34 +0,0 @@ -From 1345921393ba23b60d3fcf15933e699232ad25ae Mon Sep 17 00:00:00 2001 -From: Jason Yan <yanaijie@huawei.com> -Date: Fri, 10 Mar 2017 11:49:12 +0800 -Subject: [PATCH] md: fix incorrect use of lexx_to_cpu in does_sb_need_changing -Git-commit: 1345921393ba23b60d3fcf15933e699232ad25ae -Patch-mainline: v4.11-rc3 -References: FATE#321488 - -The sb->layout is of type __le32, so we shoud use le32_to_cpu. - -Signed-off-by: Jason Yan <yanaijie@huawei.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/md.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/drivers/md/md.c b/drivers/md/md.c -index 6e76d97..f6ae1d6 100644 ---- a/drivers/md/md.c -+++ b/drivers/md/md.c -@@ -2287,7 +2287,7 @@ static bool does_sb_need_changing(struct mddev *mddev) - /* Check if any mddev parameters have changed */ - if ((mddev->dev_sectors != le64_to_cpu(sb->size)) || - (mddev->reshape_position != le64_to_cpu(sb->reshape_position)) || -- (mddev->layout != le64_to_cpu(sb->layout)) || -+ (mddev->layout != le32_to_cpu(sb->layout)) || - (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) || - (mddev->chunk_sectors != le32_to_cpu(sb->chunksize))) - return true; --- -2.10.2 - diff --git a/patches.drivers/0134-md-r5cache-fix-set_syndrome_sources-for-data-in-cach.patch b/patches.drivers/0134-md-r5cache-fix-set_syndrome_sources-for-data-in-cach.patch deleted file mode 100644 index 0ec3558611..0000000000 --- a/patches.drivers/0134-md-r5cache-fix-set_syndrome_sources-for-data-in-cach.patch +++ /dev/null @@ -1,40 +0,0 @@ -From 0977762f6d15f13caccc20d71a5dec47d098907d Mon Sep 17 00:00:00 2001 -From: Song Liu <songliubraving@fb.com> -Date: Mon, 13 Mar 2017 13:44:35 -0700 -Subject: [PATCH] md/r5cache: fix set_syndrome_sources() for data in cache -Git-commit: 0977762f6d15f13caccc20d71a5dec47d098907d -Patch-mainline: v4.11-rc3 -References: FATE#321488 - -Before this patch, device InJournal will be included in prexor -(SYNDROME_SRC_WANT_DRAIN) but not in reconstruct (SYNDROME_SRC_WRITTEN). So it -will break parity calculation. With srctype == SYNDROME_SRC_WRITTEN, we need -include both dev with non-null ->written and dev with R5_InJournal. This fixes -logic in 1e6d690(md/r5cache: caching phase of r5cache) - -Cc: stable@vger.kernel.org (v4.10+) -Signed-off-by: Song Liu <songliubraving@fb.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid5.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c -index 6bfedfc..ed5cd70 100644 ---- a/drivers/md/raid5.c -+++ b/drivers/md/raid5.c -@@ -1401,7 +1401,8 @@ static int set_syndrome_sources(struct page **srcs, - (test_bit(R5_Wantdrain, &dev->flags) || - test_bit(R5_InJournal, &dev->flags))) || - (srctype == SYNDROME_SRC_WRITTEN && -- dev->written)) { -+ (dev->written || -+ test_bit(R5_InJournal, &dev->flags)))) { - if (test_bit(R5_InJournal, &dev->flags)) - srcs[slot] = sh->dev[i].orig_page; - else --- -2.10.2 - diff --git a/patches.drivers/0135-md-raid1-fix-a-trivial-typo-in-comments.patch b/patches.drivers/0135-md-raid1-fix-a-trivial-typo-in-comments.patch deleted file mode 100644 index e75efd7beb..0000000000 --- a/patches.drivers/0135-md-raid1-fix-a-trivial-typo-in-comments.patch +++ /dev/null @@ -1,38 +0,0 @@ -From 11353b9d10392e79e32603d2178e75feb25eaf0d Mon Sep 17 00:00:00 2001 -From: Zhilong Liu <zlliu@suse.com> -Date: Tue, 14 Mar 2017 15:52:26 +0800 -Subject: [PATCH] md/raid1: fix a trivial typo in comments -Git-commit: 11353b9d10392e79e32603d2178e75feb25eaf0d -Patch-mainline: v4.11-rc3 -References: FATE#321488 - -Raid1.c: fix a trivial typo in comments of freeze_array(). - -Cc: Jack Wang <jack.wang.usish@gmail.com> -Cc: Guoqing Jiang <gqjiang@suse.com> -Cc: John Stoffel <john@stoffel.org> -Acked-by: Coly Li <colyli@suse.de> -Signed-off-by: Zhilong Liu <zlliu@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid1.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c -index c33e96e..a34f587 100644 ---- a/drivers/md/raid1.c -+++ b/drivers/md/raid1.c -@@ -1027,7 +1027,7 @@ static int get_unqueued_pending(struct r1conf *conf) - static void freeze_array(struct r1conf *conf, int extra) - { - /* Stop sync I/O and normal I/O and wait for everything to -- * go quite. -+ * go quiet. - * This is called in two situations: - * 1) management command handlers (reshape, remove disk, quiesce). - * 2) one normal I/O request failed. --- -2.10.2 - diff --git a/patches.drivers/0136-dm-verity-fec-limit-error-correction-recursion.patch b/patches.drivers/0136-dm-verity-fec-limit-error-correction-recursion.patch deleted file mode 100644 index 383ce9dbfe..0000000000 --- a/patches.drivers/0136-dm-verity-fec-limit-error-correction-recursion.patch +++ /dev/null @@ -1,93 +0,0 @@ -From f1a880a93baaadb14c10a348fd199f1cdb6bcccd Mon Sep 17 00:00:00 2001 -From: Sami Tolvanen <samitolvanen@google.com> -Date: Wed, 15 Mar 2017 15:12:23 -0700 -Subject: [PATCH] dm verity fec: limit error correction recursion -Git-commit: f1a880a93baaadb14c10a348fd199f1cdb6bcccd -Patch-mainline: v4.11-rc6 -References: FATE#321488 - -If the hash tree itself is sufficiently corrupt in addition to data blocks, -it's possible for error correction to end up in a deep recursive loop, -which eventually causes a kernel panic. This change limits the -recursion to a reasonable level during a single I/O operation. - -Fixes: a739ff3f543a ("dm verity: add support for forward error correction") -Signed-off-by: Sami Tolvanen <samitolvanen@google.com> -Signed-off-by: Mike Snitzer <snitzer@redhat.com> -Cc: stable@vger.kernel.org # v4.5+ -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/dm-verity-fec.c | 12 +++++++++++- - drivers/md/dm-verity-fec.h | 4 ++++ - 2 files changed, 15 insertions(+), 1 deletion(-) - -diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c -index 0f0eb8a..c3cc04d 100644 ---- a/drivers/md/dm-verity-fec.c -+++ b/drivers/md/dm-verity-fec.c -@@ -439,6 +439,13 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io, - if (!verity_fec_is_enabled(v)) - return -EOPNOTSUPP; - -+ if (fio->level >= DM_VERITY_FEC_MAX_RECURSION) { -+ DMWARN_LIMIT("%s: FEC: recursion too deep", v->data_dev->name); -+ return -EIO; -+ } -+ -+ fio->level++; -+ - if (type == DM_VERITY_BLOCK_TYPE_METADATA) - block += v->data_blocks; - -@@ -470,7 +477,7 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io, - if (r < 0) { - r = fec_decode_rsb(v, io, fio, rsb, offset, true); - if (r < 0) -- return r; -+ goto done; - } - - if (dest) -@@ -480,6 +487,8 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io, - r = verity_for_bv_block(v, io, iter, fec_bv_copy); - } - -+done: -+ fio->level--; - return r; - } - -@@ -520,6 +529,7 @@ void verity_fec_init_io(struct dm_verity_io *io) - memset(fio->bufs, 0, sizeof(fio->bufs)); - fio->nbufs = 0; - fio->output = NULL; -+ fio->level = 0; - } - - /* -diff --git a/drivers/md/dm-verity-fec.h b/drivers/md/dm-verity-fec.h -index 7fa0298..bb31ce8 100644 ---- a/drivers/md/dm-verity-fec.h -+++ b/drivers/md/dm-verity-fec.h -@@ -27,6 +27,9 @@ - #define DM_VERITY_FEC_BUF_MAX \ - (1 << (PAGE_SHIFT - DM_VERITY_FEC_BUF_RS_BITS)) - -+/* maximum recursion level for verity_fec_decode */ -+#define DM_VERITY_FEC_MAX_RECURSION 4 -+ - #define DM_VERITY_OPT_FEC_DEV "use_fec_from_device" - #define DM_VERITY_OPT_FEC_BLOCKS "fec_blocks" - #define DM_VERITY_OPT_FEC_START "fec_start" -@@ -58,6 +61,7 @@ struct dm_verity_fec_io { - unsigned nbufs; /* number of buffers allocated */ - u8 *output; /* buffer for corrected output */ - size_t output_pos; -+ unsigned level; /* recursion level */ - }; - - #ifdef CONFIG_DM_VERITY_FEC --- -2.10.2 - diff --git a/patches.drivers/0137-dm-cache-metadata-fix-metadata2-format-s-blocks_are_.patch b/patches.drivers/0137-dm-cache-metadata-fix-metadata2-format-s-blocks_are_.patch deleted file mode 100644 index 233771fee7..0000000000 --- a/patches.drivers/0137-dm-cache-metadata-fix-metadata2-format-s-blocks_are_.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 0d963b6e650d9d5533223f3dbcde7dda466df65c Mon Sep 17 00:00:00 2001 -From: Joe Thornber <ejt@redhat.com> -Date: Mon, 20 Mar 2017 11:54:11 -0400 -Subject: [PATCH] dm cache metadata: fix metadata2 format's - blocks_are_clean_separate_dirty -Git-commit: 0d963b6e650d9d5533223f3dbcde7dda466df65c -Patch-mainline: v4.11-rc6 -References: FATE#321488 - -The dm_bitset_cursor_begin() call was using the incorrect nr_entries. -Also, the last dm_bitset_cursor_next() must be avoided if we're at the -end of the cursor. - -Fixes: 7f1b21591a6 ("dm cache metadata: use cursor api in blocks_are_clean_separate_dirty()") -Signed-off-by: Joe Thornber <ejt@redhat.com> -Signed-off-by: Mike Snitzer <snitzer@redhat.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/dm-cache-metadata.c | 8 +++++--- - 1 file changed, 5 insertions(+), 3 deletions(-) - -diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c -index e4c2c1a..6735c8d 100644 ---- a/drivers/md/dm-cache-metadata.c -+++ b/drivers/md/dm-cache-metadata.c -@@ -932,7 +932,7 @@ static int blocks_are_clean_separate_dirty(struct dm_cache_metadata *cmd, - *result = true; - - r = dm_bitset_cursor_begin(&cmd->dirty_info, cmd->dirty_root, -- from_cblock(begin), &cmd->dirty_cursor); -+ from_cblock(cmd->cache_blocks), &cmd->dirty_cursor); - if (r) { - DMERR("%s: dm_bitset_cursor_begin for dirty failed", __func__); - return r; -@@ -959,14 +959,16 @@ static int blocks_are_clean_separate_dirty(struct dm_cache_metadata *cmd, - return 0; - } - -+ begin = to_cblock(from_cblock(begin) + 1); -+ if (begin == end) -+ break; -+ - r = dm_bitset_cursor_next(&cmd->dirty_cursor); - if (r) { - DMERR("%s: dm_bitset_cursor_next for dirty failed", __func__); - dm_bitset_cursor_end(&cmd->dirty_cursor); - return r; - } -- -- begin = to_cblock(from_cblock(begin) + 1); - } - - dm_bitset_cursor_end(&cmd->dirty_cursor); --- -2.10.2 - diff --git a/patches.drivers/0138-dm-raid-fix-NULL-pointer-dereference-for-raid1-witho.patch b/patches.drivers/0138-dm-raid-fix-NULL-pointer-dereference-for-raid1-witho.patch deleted file mode 100644 index 250b0fe973..0000000000 --- a/patches.drivers/0138-dm-raid-fix-NULL-pointer-dereference-for-raid1-witho.patch +++ /dev/null @@ -1,97 +0,0 @@ -From 7a0c5c5b834fb60764b494b0e39c239da3b0774b Mon Sep 17 00:00:00 2001 -From: Dmitry Bilunov <kmeaw@yandex-team.ru> -Date: Thu, 30 Mar 2017 18:14:26 +0300 -Subject: [PATCH] dm raid: fix NULL pointer dereference for raid1 without - bitmap -Git-commit: 7a0c5c5b834fb60764b494b0e39c239da3b0774b -Patch-mainline: v4.11-rc6 -References: FATE#321488 - -Commit 4257e08 ("dm raid: support to change bitmap region size") -introduced a bitmap resize call during preresume phase. User can create -a DM device with "raid" target configured as raid1 with no metadata -devices to hold superblock/bitmap info. It can be achieved using the -following sequence: - - truncate -s 32M /dev/shm/raid-test - LOOP=$(losetup --show -f /dev/shm/raid-test) - dmsetup create raid-test-linear0 --table "0 1024 linear $LOOP 0" - dmsetup create raid-test-linear1 --table "0 1024 linear $LOOP 1024" - dmsetup create raid-test --table "0 1024 raid raid1 1 2048 2 - /dev/mapper/raid-test-linear0 - /dev/mapper/raid-test-linear1" - -This results in the following crash: - -[ 4029.110216] device-mapper: raid: Ignoring chunk size parameter for RAID 1 -[ 4029.110217] device-mapper: raid: Choosing default region size of 4MiB -[ 4029.111349] md/raid1:mdX: active with 2 out of 2 mirrors -[ 4029.114770] BUG: unable to handle kernel NULL pointer dereference at 0000000000000030 -[ 4029.114802] IP: bitmap_resize+0x25/0x7c0 [md_mod] -[ 4029.114816] PGD 0 -… -[ 4029.115059] Hardware name: Aquarius Pro P30 S85 BUY-866/B85M-E, BIOS 2304 05/25/2015 -[ 4029.115079] task: ffff88015cc29a80 task.stack: ffffc90001a5c000 -[ 4029.115097] RIP: 0010:bitmap_resize+0x25/0x7c0 [md_mod] -[ 4029.115112] RSP: 0018:ffffc90001a5fb68 EFLAGS: 00010246 -[ 4029.115127] RAX: 0000000000000005 RBX: 0000000000000000 RCX: 0000000000000000 -[ 4029.115146] RDX: 0000000000000000 RSI: 0000000000000400 RDI: 0000000000000000 -[ 4029.115166] RBP: ffffc90001a5fc28 R08: 0000000800000000 R09: 00000008ffffffff -[ 4029.115185] R10: ffffea0005661600 R11: ffff88015cc29a80 R12: ffff88021231f058 -[ 4029.115204] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 -[ 4029.115223] FS: 00007fe73a6b4740(0000) GS:ffff88021ea80000(0000) knlGS:0000000000000000 -[ 4029.115245] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 -[ 4029.115261] CR2: 0000000000000030 CR3: 0000000159a74000 CR4: 00000000001426e0 -[ 4029.115281] Call Trace: -[ 4029.115291] ? raid_iterate_devices+0x63/0x80 [dm_raid] -[ 4029.115309] ? dm_table_all_devices_attribute.isra.23+0x41/0x70 [dm_mod] -[ 4029.115329] ? dm_table_set_restrictions+0x225/0x2d0 [dm_mod] -[ 4029.115346] raid_preresume+0x81/0x2e0 [dm_raid] -[ 4029.115361] dm_table_resume_targets+0x47/0xe0 [dm_mod] -[ 4029.115378] dm_resume+0xa8/0xd0 [dm_mod] -[ 4029.115391] dev_suspend+0x123/0x250 [dm_mod] -[ 4029.115405] ? table_load+0x350/0x350 [dm_mod] -[ 4029.115419] ctl_ioctl+0x1c2/0x490 [dm_mod] -[ 4029.115433] dm_ctl_ioctl+0xe/0x20 [dm_mod] -[ 4029.115447] do_vfs_ioctl+0x8d/0x5a0 -[ 4029.115459] ? ____fput+0x9/0x10 -[ 4029.115470] ? task_work_run+0x79/0xa0 -[ 4029.115481] SyS_ioctl+0x3c/0x70 -[ 4029.115493] entry_SYSCALL_64_fastpath+0x13/0x94 - -The raid_preresume() function incorrectly assumes that the raid_set has -a bitmap enabled if RT_FLAG_RS_BITMAP_LOADED is set. But -RT_FLAG_RS_BITMAP_LOADED is getting set in __load_dirty_region_bitmap() -even if there is no bitmap present (and bitmap_load() happily returns 0 -even if a bitmap isn't present). So the only way forward in the -near-term is to check if the bitmap is present by seeing if -mddev->bitmap is not NULL after bitmap_load() has been called. - -By doing so the above NULL pointer is avoided. - -Fixes: 4257e08 ("dm raid: support to change bitmap region size") -Cc: stable@vger.kernel.org # v4.8+ -Signed-off-by: Dmitry Bilunov <kmeaw@yandex-team.ru> -Signed-off-by: Andrey Smetanin <asmetanin@yandex-team.ru> -Acked-by: Heinz Mauelshagen <heinzm@redhat.com> -Signed-off-by: Mike Snitzer <snitzer@redhat.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/dm-raid.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c -index f8564d6..1e217ba 100644 ---- a/drivers/md/dm-raid.c -+++ b/drivers/md/dm-raid.c -@@ -3726,7 +3726,7 @@ static int raid_preresume(struct dm_target *ti) - return r; - - /* Resize bitmap to adjust to changed region size (aka MD bitmap chunksize) */ -- if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) && -+ if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) && mddev->bitmap && - mddev->bitmap_info.chunksize != to_bytes(rs->requested_bitmap_chunk_sectors)) { - r = bitmap_resize(mddev->bitmap, mddev->dev_sectors, - to_bytes(rs->requested_bitmap_chunk_sectors), 0); --- -2.10.2 - diff --git a/patches.drivers/0139-dm-verity-fec-fix-bufio-leaks.patch b/patches.drivers/0139-dm-verity-fec-fix-bufio-leaks.patch deleted file mode 100644 index 830fbbdc77..0000000000 --- a/patches.drivers/0139-dm-verity-fec-fix-bufio-leaks.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 86e3e83b443669dd2bcc5c8a83b23e3aa0694c0d Mon Sep 17 00:00:00 2001 -From: Sami Tolvanen <samitolvanen@google.com> -Date: Fri, 31 Mar 2017 12:32:45 -0700 -Subject: [PATCH] dm verity fec: fix bufio leaks -Git-commit: 86e3e83b443669dd2bcc5c8a83b23e3aa0694c0d -Patch-mainline: v4.11-rc6 -References: FATE#321488 - -Buffers read through dm_bufio_read() were not released in all code paths. - -Fixes: a739ff3f543a ("dm verity: add support for forward error correction") -Cc: stable@vger.kernel.org # v4.5+ -Signed-off-by: Sami Tolvanen <samitolvanen@google.com> -Signed-off-by: Mike Snitzer <snitzer@redhat.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/dm-verity-fec.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c -index c3cc04d..78f3601 100644 ---- a/drivers/md/dm-verity-fec.c -+++ b/drivers/md/dm-verity-fec.c -@@ -146,8 +146,6 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio, - block = fec_buffer_rs_block(v, fio, n, i); - res = fec_decode_rs8(v, fio, block, &par[offset], neras); - if (res < 0) { -- dm_bufio_release(buf); -- - r = res; - goto error; - } -@@ -172,6 +170,8 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio, - done: - r = corrected; - error: -+ dm_bufio_release(buf); -+ - if (r < 0 && neras) - DMERR_LIMIT("%s: FEC %llu: failed to correct: %d", - v->data_dev->name, (unsigned long long)rsb, r); -@@ -269,7 +269,7 @@ static int fec_read_bufs(struct dm_verity *v, struct dm_verity_io *io, - &is_zero) == 0) { - /* skip known zero blocks entirely */ - if (is_zero) -- continue; -+ goto done; - - /* - * skip if we have already found the theoretical --- -2.10.2 - diff --git a/patches.drivers/0140-dm-rq-Avoid-that-request-processing-stalls-sporadica.patch b/patches.drivers/0140-dm-rq-Avoid-that-request-processing-stalls-sporadica.patch deleted file mode 100644 index ebd493e0a4..0000000000 --- a/patches.drivers/0140-dm-rq-Avoid-that-request-processing-stalls-sporadica.patch +++ /dev/null @@ -1,46 +0,0 @@ -From 6077c2d706097c00d8f2fed57d3f3c45cd228ee8 Mon Sep 17 00:00:00 2001 -From: Bart Van Assche <bart.vanassche@sandisk.com> -Date: Fri, 7 Apr 2017 11:16:54 -0700 -Subject: [PATCH] dm rq: Avoid that request processing stalls sporadically -Git-commit: 6077c2d706097c00d8f2fed57d3f3c45cd228ee8 -Patch-mainline: v4.11-rc6 -References: FATE#321488 - -While running the srp-test software I noticed that request -processing stalls sporadically at the beginning of a test, namely -when mkfs is run against a dm-mpath device. Every time when that -happened the following command was sufficient to resume request -Processing: - - echo run >/sys/kernel/debug/block/dm-0/state - -This patch avoids that such request processing stalls occur. The -test I ran is as follows: - - while srp-test/run_tests -d -r 30 -t 02-mq; do :; done - -Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com> -Cc: Mike Snitzer <snitzer@redhat.com> -Cc: dm-devel@redhat.com -Signed-off-by: Jens Axboe <axboe@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/dm-rq.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c -index 28955b9..0b081d1 100644 ---- a/drivers/md/dm-rq.c -+++ b/drivers/md/dm-rq.c -@@ -755,6 +755,7 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, - /* Undo dm_start_request() before requeuing */ - rq_end_stats(md, rq); - rq_completed(md, rq_data_dir(rq), false); -+ blk_mq_delay_run_hw_queue(hctx, 100/*ms*/); - return BLK_MQ_RQ_QUEUE_BUSY; - } - --- -2.10.2 - diff --git a/patches.drivers/0141-md-raid5-prioritize-stripes-for-writeback.patch b/patches.drivers/0141-md-raid5-prioritize-stripes-for-writeback.patch deleted file mode 100644 index 5db7e2ab82..0000000000 --- a/patches.drivers/0141-md-raid5-prioritize-stripes-for-writeback.patch +++ /dev/null @@ -1,165 +0,0 @@ -From: Shaohua Li <shli@fb.com> -Date: Wed, 15 Feb 2017 19:37:32 -0800 -Subject: [PATCH] md/raid5: prioritize stripes for writeback -Git-commit: 535ae4eb1225f19e1d1848c65eafea8b7e9112f4 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -In raid5-cache writeback mode, we have two types of stripes to handle. -- stripes which aren't cached yet -- stripes which are cached and flushing out to raid disks - -Upperlayer is more sensistive to latency of the first type of stripes -generally. But we only one handle list for all these stripes, where the -two types of stripes are mixed together. When reclaim flushes a lot of -stripes, the first type of stripes could be noticeably delayed. On the -other hand, if the log space is tight, we'd like to handle the second -type of stripes faster and free log space. - -This patch destinguishes the two types stripes. They are added into -different handle list. When we try to get a stripe to handl, we prefer -the first type of stripes unless log space is tight. - -This should have no impact for !writeback case. - -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid5.c | 48 +++++++++++++++++++++++++++++++++++++++--------- - drivers/md/raid5.h | 2 ++ - 2 files changed, 41 insertions(+), 9 deletions(-) - -diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c -index ed5cd70..5a28bd9 100644 ---- a/drivers/md/raid5.c -+++ b/drivers/md/raid5.c -@@ -176,6 +176,13 @@ static int stripe_operations_active(struct stripe_head *sh) - test_bit(STRIPE_COMPUTE_RUN, &sh->state); - } - -+static bool stripe_is_lowprio(struct stripe_head *sh) -+{ -+ return (test_bit(STRIPE_R5C_FULL_STRIPE, &sh->state) || -+ test_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state)) && -+ !test_bit(STRIPE_R5C_CACHING, &sh->state); -+} -+ - static void raid5_wakeup_stripe_thread(struct stripe_head *sh) - { - struct r5conf *conf = sh->raid_conf; -@@ -191,7 +198,10 @@ static void raid5_wakeup_stripe_thread(struct stripe_head *sh) - if (list_empty(&sh->lru)) { - struct r5worker_group *group; - group = conf->worker_groups + cpu_to_group(cpu); -- list_add_tail(&sh->lru, &group->handle_list); -+ if (stripe_is_lowprio(sh)) -+ list_add_tail(&sh->lru, &group->loprio_list); -+ else -+ list_add_tail(&sh->lru, &group->handle_list); - group->stripes_cnt++; - sh->group = group; - } -@@ -254,7 +264,12 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh, - clear_bit(STRIPE_DELAYED, &sh->state); - clear_bit(STRIPE_BIT_DELAY, &sh->state); - if (conf->worker_cnt_per_group == 0) { -- list_add_tail(&sh->lru, &conf->handle_list); -+ if (stripe_is_lowprio(sh)) -+ list_add_tail(&sh->lru, -+ &conf->loprio_list); -+ else -+ list_add_tail(&sh->lru, -+ &conf->handle_list); - } else { - raid5_wakeup_stripe_thread(sh); - return; -@@ -5172,19 +5187,27 @@ static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio) - */ - static struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group) - { -- struct stripe_head *sh = NULL, *tmp; -+ struct stripe_head *sh, *tmp; - struct list_head *handle_list = NULL; -- struct r5worker_group *wg = NULL; -+ struct r5worker_group *wg; -+ bool second_try = !r5c_is_writeback(conf->log); -+ bool try_loprio = test_bit(R5C_LOG_TIGHT, &conf->cache_state); - -+again: -+ wg = NULL; -+ sh = NULL; - if (conf->worker_cnt_per_group == 0) { -- handle_list = &conf->handle_list; -+ handle_list = try_loprio ? &conf->loprio_list : -+ &conf->handle_list; - } else if (group != ANY_GROUP) { -- handle_list = &conf->worker_groups[group].handle_list; -+ handle_list = try_loprio ? &conf->worker_groups[group].loprio_list : -+ &conf->worker_groups[group].handle_list; - wg = &conf->worker_groups[group]; - } else { - int i; - for (i = 0; i < conf->group_cnt; i++) { -- handle_list = &conf->worker_groups[i].handle_list; -+ handle_list = try_loprio ? &conf->worker_groups[i].loprio_list : -+ &conf->worker_groups[i].handle_list; - wg = &conf->worker_groups[i]; - if (!list_empty(handle_list)) - break; -@@ -5235,8 +5258,13 @@ static struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group) - wg = NULL; - } - -- if (!sh) -- return NULL; -+ if (!sh) { -+ if (second_try) -+ return NULL; -+ second_try = true; -+ try_loprio = !try_loprio; -+ goto again; -+ } - - if (wg) { - wg->stripes_cnt--; -@@ -6546,6 +6574,7 @@ static int alloc_thread_groups(struct r5conf *conf, int cnt, - - group = &(*worker_groups)[i]; - INIT_LIST_HEAD(&group->handle_list); -+ INIT_LIST_HEAD(&group->loprio_list); - group->conf = conf; - group->workers = workers + i * cnt; - -@@ -6773,6 +6802,7 @@ static struct r5conf *setup_conf(struct mddev *mddev) - init_waitqueue_head(&conf->wait_for_stripe); - init_waitqueue_head(&conf->wait_for_overlap); - INIT_LIST_HEAD(&conf->handle_list); -+ INIT_LIST_HEAD(&conf->loprio_list); - INIT_LIST_HEAD(&conf->hold_list); - INIT_LIST_HEAD(&conf->delayed_list); - INIT_LIST_HEAD(&conf->bitmap_list); -diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h -index 4bb27b9..6b9d2e8 100644 ---- a/drivers/md/raid5.h -+++ b/drivers/md/raid5.h -@@ -542,6 +542,7 @@ struct r5worker { - - struct r5worker_group { - struct list_head handle_list; -+ struct list_head loprio_list; - struct r5conf *conf; - struct r5worker *workers; - int stripes_cnt; -@@ -608,6 +609,7 @@ struct r5conf { - */ - - struct list_head handle_list; /* stripes needing handling */ -+ struct list_head loprio_list; /* low priority stripes */ - struct list_head hold_list; /* preread ready stripes */ - struct list_head delayed_list; /* stripes that have plugged requests */ - struct list_head bitmap_list; /* stripes delaying awaiting bitmap update */ --- -2.10.2 - diff --git a/patches.drivers/0142-md-raid5-cache-bump-flush-stripe-batch-size.patch b/patches.drivers/0142-md-raid5-cache-bump-flush-stripe-batch-size.patch deleted file mode 100644 index 49dc63a6f9..0000000000 --- a/patches.drivers/0142-md-raid5-cache-bump-flush-stripe-batch-size.patch +++ /dev/null @@ -1,62 +0,0 @@ -From: Shaohua Li <shli@fb.com> -Date: Wed, 15 Feb 2017 19:58:05 -0800 -Subject: [PATCH] md/raid5-cache: bump flush stripe batch size -Git-commit: 84890c03b6c5d7e8d76ea5e20b6aaf7e7ad410f0 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -Bump the flush stripe batch size to 2048. For my 12 disks raid -array, the stripes takes: -12 * 4k * 2048 = 96MB - -This is still quite small. A hardware raid card generally has 1GB size, -which we suggest the raid5-cache has similar cache size. - -The advantage of a big batch size is we can dispatch a lot of IO in the -same time, then we can do some scheduling to make better IO pattern. - -Last patch prioritizes stripes, so we don't worry about a big flush -stripe batch will starve normal stripes. - -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid5-cache.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c -index 3f307be..738e180 100644 ---- a/drivers/md/raid5-cache.c -+++ b/drivers/md/raid5-cache.c -@@ -43,7 +43,7 @@ - /* wake up reclaim thread periodically */ - #define R5C_RECLAIM_WAKEUP_INTERVAL (30 * HZ) - /* start flush with these full stripes */ --#define R5C_FULL_STRIPE_FLUSH_BATCH 256 -+#define R5C_FULL_STRIPE_FLUSH_BATCH(conf) (conf->max_nr_stripes / 4) - /* reclaim stripes in groups */ - #define R5C_RECLAIM_STRIPE_GROUP (NR_STRIPE_HASH_LOCKS * 2) - -@@ -381,7 +381,7 @@ void r5c_check_cached_full_stripe(struct r5conf *conf) - * or a full stripe (chunk size / 4k stripes). - */ - if (atomic_read(&conf->r5c_cached_full_stripes) >= -- min(R5C_FULL_STRIPE_FLUSH_BATCH, -+ min(R5C_FULL_STRIPE_FLUSH_BATCH(conf), - conf->chunk_sectors >> STRIPE_SHIFT)) - r5l_wake_reclaim(conf->log, 0); - } -@@ -1393,7 +1393,7 @@ static void r5c_do_reclaim(struct r5conf *conf) - stripes_to_flush = R5C_RECLAIM_STRIPE_GROUP; - else if (total_cached > conf->min_nr_stripes * 1 / 2 || - atomic_read(&conf->r5c_cached_full_stripes) - flushing_full > -- R5C_FULL_STRIPE_FLUSH_BATCH) -+ R5C_FULL_STRIPE_FLUSH_BATCH(conf)) - /* - * if stripe cache pressure moderate, or if there is many full - * stripes,flush all full stripes --- -2.10.2 - diff --git a/patches.drivers/0143-md-r5cache-improve-recovery-with-read-ahead-page-poo.patch b/patches.drivers/0143-md-r5cache-improve-recovery-with-read-ahead-page-poo.patch deleted file mode 100644 index 2dc9760b09..0000000000 --- a/patches.drivers/0143-md-r5cache-improve-recovery-with-read-ahead-page-poo.patch +++ /dev/null @@ -1,360 +0,0 @@ -From: Song Liu <songliubraving@fb.com> -Date: Tue, 7 Mar 2017 16:49:17 -0800 -Subject: [PATCH] md/r5cache: improve recovery with read ahead page pool -Git-commit: effe6ee7523aa50d0517bd7da141e112b44d89fa -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -In r5cache recovery, the journal device is scanned page by page. -Currently, we use sync_page_io() to read journal device. This is -not efficient when we have to recovery many stripes from the journal. - -To improve the speed of recovery, this patch introduces a read ahead -page pool (ra_pool) to recovery_ctx. With ra_pool, multiple consecutive -pages are read in one IO. Then the recovery code read the journal from -ra_pool. - -With ra_pool, r5l_recovery_ctx has become much bigger. Therefore, -r5l_recovery_log() is refactored so r5l_recovery_ctx is not using -stack space. - -Signed-off-by: Song Liu <songliubraving@fb.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid5-cache.c | 221 +++++++++++++++++++++++++++++++++++++---------- - 1 file changed, 175 insertions(+), 46 deletions(-) - -diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c -index 738e180..5c8640c 100644 ---- a/drivers/md/raid5-cache.c -+++ b/drivers/md/raid5-cache.c -@@ -30,6 +30,7 @@ - * underneath hardware sector size. only works with PAGE_SIZE == 4096 - */ - #define BLOCK_SECTORS (8) -+#define BLOCK_SECTOR_SHIFT (3) - - /* - * log->max_free_space is min(1/4 disk size, 10G reclaimable space). -@@ -1552,6 +1553,8 @@ bool r5l_log_disk_error(struct r5conf *conf) - return ret; - } - -+#define R5L_RECOVERY_PAGE_POOL_SIZE 256 -+ - struct r5l_recovery_ctx { - struct page *meta_page; /* current meta */ - sector_t meta_total_blocks; /* total size of current meta and data */ -@@ -1560,18 +1563,131 @@ struct r5l_recovery_ctx { - int data_parity_stripes; /* number of data_parity stripes */ - int data_only_stripes; /* number of data_only stripes */ - struct list_head cached_list; -+ -+ /* -+ * read ahead page pool (ra_pool) -+ * in recovery, log is read sequentially. It is not efficient to -+ * read every page with sync_page_io(). The read ahead page pool -+ * reads multiple pages with one IO, so further log read can -+ * just copy data from the pool. -+ */ -+ struct page *ra_pool[R5L_RECOVERY_PAGE_POOL_SIZE]; -+ sector_t pool_offset; /* offset of first page in the pool */ -+ int total_pages; /* total allocated pages */ -+ int valid_pages; /* pages with valid data */ -+ struct bio *ra_bio; /* bio to do the read ahead */ - }; - -+static int r5l_recovery_allocate_ra_pool(struct r5l_log *log, -+ struct r5l_recovery_ctx *ctx) -+{ -+ struct page *page; -+ -+ ctx->ra_bio = bio_alloc_bioset(GFP_KERNEL, BIO_MAX_PAGES, log->bs); -+ if (!ctx->ra_bio) -+ return -ENOMEM; -+ -+ ctx->valid_pages = 0; -+ ctx->total_pages = 0; -+ while (ctx->total_pages < R5L_RECOVERY_PAGE_POOL_SIZE) { -+ page = alloc_page(GFP_KERNEL); -+ -+ if (!page) -+ break; -+ ctx->ra_pool[ctx->total_pages] = page; -+ ctx->total_pages += 1; -+ } -+ -+ if (ctx->total_pages == 0) { -+ bio_put(ctx->ra_bio); -+ return -ENOMEM; -+ } -+ -+ ctx->pool_offset = 0; -+ return 0; -+} -+ -+static void r5l_recovery_free_ra_pool(struct r5l_log *log, -+ struct r5l_recovery_ctx *ctx) -+{ -+ int i; -+ -+ for (i = 0; i < ctx->total_pages; ++i) -+ put_page(ctx->ra_pool[i]); -+ bio_put(ctx->ra_bio); -+} -+ -+/* -+ * fetch ctx->valid_pages pages from offset -+ * In normal cases, ctx->valid_pages == ctx->total_pages after the call. -+ * However, if the offset is close to the end of the journal device, -+ * ctx->valid_pages could be smaller than ctx->total_pages -+ */ -+static int r5l_recovery_fetch_ra_pool(struct r5l_log *log, -+ struct r5l_recovery_ctx *ctx, -+ sector_t offset) -+{ -+ bio_reset(ctx->ra_bio); -+ ctx->ra_bio->bi_bdev = log->rdev->bdev; -+ bio_set_op_attrs(ctx->ra_bio, REQ_OP_READ, 0); -+ ctx->ra_bio->bi_iter.bi_sector = log->rdev->data_offset + offset; -+ -+ ctx->valid_pages = 0; -+ ctx->pool_offset = offset; -+ -+ while (ctx->valid_pages < ctx->total_pages) { -+ bio_add_page(ctx->ra_bio, -+ ctx->ra_pool[ctx->valid_pages], PAGE_SIZE, 0); -+ ctx->valid_pages += 1; -+ -+ offset = r5l_ring_add(log, offset, BLOCK_SECTORS); -+ -+ if (offset == 0) /* reached end of the device */ -+ break; -+ } -+ -+ return submit_bio_wait(ctx->ra_bio); -+} -+ -+/* -+ * try read a page from the read ahead page pool, if the page is not in the -+ * pool, call r5l_recovery_fetch_ra_pool -+ */ -+static int r5l_recovery_read_page(struct r5l_log *log, -+ struct r5l_recovery_ctx *ctx, -+ struct page *page, -+ sector_t offset) -+{ -+ int ret; -+ -+ if (offset < ctx->pool_offset || -+ offset >= ctx->pool_offset + ctx->valid_pages * BLOCK_SECTORS) { -+ ret = r5l_recovery_fetch_ra_pool(log, ctx, offset); -+ if (ret) -+ return ret; -+ } -+ -+ BUG_ON(offset < ctx->pool_offset || -+ offset >= ctx->pool_offset + ctx->valid_pages * BLOCK_SECTORS); -+ -+ memcpy(page_address(page), -+ page_address(ctx->ra_pool[(offset - ctx->pool_offset) >> -+ BLOCK_SECTOR_SHIFT]), -+ PAGE_SIZE); -+ return 0; -+} -+ - static int r5l_recovery_read_meta_block(struct r5l_log *log, - struct r5l_recovery_ctx *ctx) - { - struct page *page = ctx->meta_page; - struct r5l_meta_block *mb; - u32 crc, stored_crc; -+ int ret; - -- if (!sync_page_io(log->rdev, ctx->pos, PAGE_SIZE, page, REQ_OP_READ, 0, -- false)) -- return -EIO; -+ ret = r5l_recovery_read_page(log, ctx, page, ctx->pos); -+ if (ret != 0) -+ return ret; - - mb = page_address(page); - stored_crc = le32_to_cpu(mb->checksum); -@@ -1653,8 +1769,7 @@ static void r5l_recovery_load_data(struct r5l_log *log, - raid5_compute_sector(conf, - le64_to_cpu(payload->location), 0, - &dd_idx, sh); -- sync_page_io(log->rdev, log_offset, PAGE_SIZE, -- sh->dev[dd_idx].page, REQ_OP_READ, 0, false); -+ r5l_recovery_read_page(log, ctx, sh->dev[dd_idx].page, log_offset); - sh->dev[dd_idx].log_checksum = - le32_to_cpu(payload->checksum[0]); - ctx->meta_total_blocks += BLOCK_SECTORS; -@@ -1673,17 +1788,15 @@ static void r5l_recovery_load_parity(struct r5l_log *log, - struct r5conf *conf = mddev->private; - - ctx->meta_total_blocks += BLOCK_SECTORS * conf->max_degraded; -- sync_page_io(log->rdev, log_offset, PAGE_SIZE, -- sh->dev[sh->pd_idx].page, REQ_OP_READ, 0, false); -+ r5l_recovery_read_page(log, ctx, sh->dev[sh->pd_idx].page, log_offset); - sh->dev[sh->pd_idx].log_checksum = - le32_to_cpu(payload->checksum[0]); - set_bit(R5_Wantwrite, &sh->dev[sh->pd_idx].flags); - - if (sh->qd_idx >= 0) { -- sync_page_io(log->rdev, -- r5l_ring_add(log, log_offset, BLOCK_SECTORS), -- PAGE_SIZE, sh->dev[sh->qd_idx].page, -- REQ_OP_READ, 0, false); -+ r5l_recovery_read_page( -+ log, ctx, sh->dev[sh->qd_idx].page, -+ r5l_ring_add(log, log_offset, BLOCK_SECTORS)); - sh->dev[sh->qd_idx].log_checksum = - le32_to_cpu(payload->checksum[1]); - set_bit(R5_Wantwrite, &sh->dev[sh->qd_idx].flags); -@@ -1814,14 +1927,15 @@ r5c_recovery_replay_stripes(struct list_head *cached_stripe_list, - - /* if matches return 0; otherwise return -EINVAL */ - static int --r5l_recovery_verify_data_checksum(struct r5l_log *log, struct page *page, -+r5l_recovery_verify_data_checksum(struct r5l_log *log, -+ struct r5l_recovery_ctx *ctx, -+ struct page *page, - sector_t log_offset, __le32 log_checksum) - { - void *addr; - u32 checksum; - -- sync_page_io(log->rdev, log_offset, PAGE_SIZE, -- page, REQ_OP_READ, 0, false); -+ r5l_recovery_read_page(log, ctx, page, log_offset); - addr = kmap_atomic(page); - checksum = crc32c_le(log->uuid_checksum, addr, PAGE_SIZE); - kunmap_atomic(addr); -@@ -1853,17 +1967,17 @@ r5l_recovery_verify_data_checksum_for_mb(struct r5l_log *log, - - if (payload->header.type == R5LOG_PAYLOAD_DATA) { - if (r5l_recovery_verify_data_checksum( -- log, page, log_offset, -+ log, ctx, page, log_offset, - payload->checksum[0]) < 0) - goto mismatch; - } else if (payload->header.type == R5LOG_PAYLOAD_PARITY) { - if (r5l_recovery_verify_data_checksum( -- log, page, log_offset, -+ log, ctx, page, log_offset, - payload->checksum[0]) < 0) - goto mismatch; - if (conf->max_degraded == 2 && /* q for RAID 6 */ - r5l_recovery_verify_data_checksum( -- log, page, -+ log, ctx, page, - r5l_ring_add(log, log_offset, - BLOCK_SECTORS), - payload->checksum[1]) < 0) -@@ -2241,55 +2355,70 @@ static void r5c_recovery_flush_data_only_stripes(struct r5l_log *log, - static int r5l_recovery_log(struct r5l_log *log) - { - struct mddev *mddev = log->rdev->mddev; -- struct r5l_recovery_ctx ctx; -+ struct r5l_recovery_ctx *ctx; - int ret; - sector_t pos; - -- ctx.pos = log->last_checkpoint; -- ctx.seq = log->last_cp_seq; -- ctx.meta_page = alloc_page(GFP_KERNEL); -- ctx.data_only_stripes = 0; -- ctx.data_parity_stripes = 0; -- INIT_LIST_HEAD(&ctx.cached_list); -- -- if (!ctx.meta_page) -+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); -+ if (!ctx) - return -ENOMEM; - -- ret = r5c_recovery_flush_log(log, &ctx); -- __free_page(ctx.meta_page); -+ ctx->pos = log->last_checkpoint; -+ ctx->seq = log->last_cp_seq; -+ INIT_LIST_HEAD(&ctx->cached_list); -+ ctx->meta_page = alloc_page(GFP_KERNEL); - -- if (ret) -- return ret; -+ if (!ctx->meta_page) { -+ ret = -ENOMEM; -+ goto meta_page; -+ } -+ -+ if (r5l_recovery_allocate_ra_pool(log, ctx) != 0) { -+ ret = -ENOMEM; -+ goto ra_pool; -+ } - -- pos = ctx.pos; -- ctx.seq += 10000; -+ ret = r5c_recovery_flush_log(log, ctx); - -+ if (ret) -+ goto error; - -- if ((ctx.data_only_stripes == 0) && (ctx.data_parity_stripes == 0)) -+ pos = ctx->pos; -+ ctx->seq += 10000; -+ -+ if ((ctx->data_only_stripes == 0) && (ctx->data_parity_stripes == 0)) - pr_debug("md/raid:%s: starting from clean shutdown\n", - mdname(mddev)); - else - pr_debug("md/raid:%s: recovering %d data-only stripes and %d data-parity stripes\n", -- mdname(mddev), ctx.data_only_stripes, -- ctx.data_parity_stripes); -- -- if (ctx.data_only_stripes == 0) { -- log->next_checkpoint = ctx.pos; -- r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq++); -- ctx.pos = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS); -- } else if (r5c_recovery_rewrite_data_only_stripes(log, &ctx)) { -+ mdname(mddev), ctx->data_only_stripes, -+ ctx->data_parity_stripes); -+ -+ if (ctx->data_only_stripes == 0) { -+ log->next_checkpoint = ctx->pos; -+ r5l_log_write_empty_meta_block(log, ctx->pos, ctx->seq++); -+ ctx->pos = r5l_ring_add(log, ctx->pos, BLOCK_SECTORS); -+ } else if (r5c_recovery_rewrite_data_only_stripes(log, ctx)) { - pr_err("md/raid:%s: failed to rewrite stripes to journal\n", - mdname(mddev)); -- return -EIO; -+ ret = -EIO; -+ goto error; - } - -- log->log_start = ctx.pos; -- log->seq = ctx.seq; -+ log->log_start = ctx->pos; -+ log->seq = ctx->seq; - log->last_checkpoint = pos; - r5l_write_super(log, pos); - -- r5c_recovery_flush_data_only_stripes(log, &ctx); -- return 0; -+ r5c_recovery_flush_data_only_stripes(log, ctx); -+ ret = 0; -+error: -+ r5l_recovery_free_ra_pool(log, ctx); -+ra_pool: -+ __free_page(ctx->meta_page); -+meta_page: -+ kfree(ctx); -+ return ret; - } - - static void r5l_write_super(struct r5l_log *log, sector_t cp) --- -2.10.2 - diff --git a/patches.drivers/0144-md-r5cache-handle-R5LOG_PAYLOAD_FLUSH-in-recovery.patch b/patches.drivers/0144-md-r5cache-handle-R5LOG_PAYLOAD_FLUSH-in-recovery.patch deleted file mode 100644 index 1a7ae538e5..0000000000 --- a/patches.drivers/0144-md-r5cache-handle-R5LOG_PAYLOAD_FLUSH-in-recovery.patch +++ /dev/null @@ -1,116 +0,0 @@ -From: Song Liu <songliubraving@fb.com> -Date: Tue, 7 Mar 2017 17:44:21 -0800 -Subject: [PATCH] md/r5cache: handle R5LOG_PAYLOAD_FLUSH in recovery -Git-commit: 2d4f468753777a29124f779cdffd1ec009d70597 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -This patch adds handling of R5LOG_PAYLOAD_FLUSH in journal recovery. -Next patch will add logic that generate R5LOG_PAYLOAD_FLUSH on flush -finish. - -When R5LOG_PAYLOAD_FLUSH is seen in recovery, pending data and parity -will be dropped from recovery. This will reduce the number of stripes -to replay, and thus accelerate the recovery process. - -Signed-off-by: Song Liu <songliubraving@fb.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid5-cache.c | 47 +++++++++++++++++++++++++++++++++++++++++------ - 1 file changed, 41 insertions(+), 6 deletions(-) - -diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c -index 5f82dab..f85ab27 100644 ---- a/drivers/md/raid5-cache.c -+++ b/drivers/md/raid5-cache.c -@@ -1959,6 +1959,7 @@ r5l_recovery_verify_data_checksum_for_mb(struct r5l_log *log, - sector_t log_offset = r5l_ring_add(log, ctx->pos, BLOCK_SECTORS); - struct page *page; - struct r5l_payload_data_parity *payload; -+ struct r5l_payload_flush *payload_flush; - - page = alloc_page(GFP_KERNEL); - if (!page) -@@ -1966,6 +1967,7 @@ r5l_recovery_verify_data_checksum_for_mb(struct r5l_log *log, - - while (mb_offset < le32_to_cpu(mb->meta_size)) { - payload = (void *)mb + mb_offset; -+ payload_flush = (void *)mb + mb_offset; - - if (payload->header.type == R5LOG_PAYLOAD_DATA) { - if (r5l_recovery_verify_data_checksum( -@@ -1984,15 +1986,23 @@ r5l_recovery_verify_data_checksum_for_mb(struct r5l_log *log, - BLOCK_SECTORS), - payload->checksum[1]) < 0) - goto mismatch; -- } else /* not R5LOG_PAYLOAD_DATA or R5LOG_PAYLOAD_PARITY */ -+ } else if (payload->header.type == R5LOG_PAYLOAD_FLUSH) { -+ /* nothing to do for R5LOG_PAYLOAD_FLUSH here */ -+ } else /* not R5LOG_PAYLOAD_DATA/PARITY/FLUSH */ - goto mismatch; - -- log_offset = r5l_ring_add(log, log_offset, -- le32_to_cpu(payload->size)); -+ if (payload->header.type == R5LOG_PAYLOAD_FLUSH) { -+ mb_offset += sizeof(struct r5l_payload_flush) + -+ le32_to_cpu(payload_flush->size); -+ } else { -+ /* DATA or PARITY payload */ -+ log_offset = r5l_ring_add(log, log_offset, -+ le32_to_cpu(payload->size)); -+ mb_offset += sizeof(struct r5l_payload_data_parity) + -+ sizeof(__le32) * -+ (le32_to_cpu(payload->size) >> (PAGE_SHIFT - 9)); -+ } - -- mb_offset += sizeof(struct r5l_payload_data_parity) + -- sizeof(__le32) * -- (le32_to_cpu(payload->size) >> (PAGE_SHIFT - 9)); - } - - put_page(page); -@@ -2020,6 +2030,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log, - struct r5conf *conf = mddev->private; - struct r5l_meta_block *mb; - struct r5l_payload_data_parity *payload; -+ struct r5l_payload_flush *payload_flush; - int mb_offset; - sector_t log_offset; - sector_t stripe_sect; -@@ -2045,6 +2056,30 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log, - int dd; - - payload = (void *)mb + mb_offset; -+ payload_flush = (void *)mb + mb_offset; -+ -+ if (payload->header.type == R5LOG_PAYLOAD_FLUSH) { -+ int i, count; -+ -+ count = le32_to_cpu(payload_flush->size) / sizeof(__le64); -+ for (i = 0; i < count; ++i) { -+ stripe_sect = le64_to_cpu(payload_flush->flush_stripes[i]); -+ sh = r5c_recovery_lookup_stripe(cached_stripe_list, -+ stripe_sect); -+ if (sh) { -+ WARN_ON(test_bit(STRIPE_R5C_CACHING, &sh->state)); -+ r5l_recovery_reset_stripe(sh); -+ list_del_init(&sh->lru); -+ raid5_release_stripe(sh); -+ } -+ } -+ -+ mb_offset += sizeof(struct r5l_payload_flush) + -+ le32_to_cpu(payload_flush->size); -+ continue; -+ } -+ -+ /* DATA or PARITY payload */ - stripe_sect = (payload->header.type == R5LOG_PAYLOAD_DATA) ? - raid5_compute_sector( - conf, le64_to_cpu(payload->location), 0, &dd, --- -2.10.2 - diff --git a/patches.drivers/0145-md-r5cache-generate-R5LOG_PAYLOAD_FLUSH.patch b/patches.drivers/0145-md-r5cache-generate-R5LOG_PAYLOAD_FLUSH.patch deleted file mode 100644 index 3ec48bfec0..0000000000 --- a/patches.drivers/0145-md-r5cache-generate-R5LOG_PAYLOAD_FLUSH.patch +++ /dev/null @@ -1,103 +0,0 @@ -From: Song Liu <songliubraving@fb.com> -Date: Thu, 9 Mar 2017 21:23:39 -0800 -Subject: [PATCH] md/r5cache: generate R5LOG_PAYLOAD_FLUSH -Git-commit: ea17481fb48888fa11f412766bde36be9171247e -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -In r5c_finish_stripe_write_out(), R5LOG_PAYLOAD_FLUSH is append to -log->current_io. - -Appending R5LOG_PAYLOAD_FLUSH in quiesce needs extra writes to -journal. To simplify the logic, we just skip R5LOG_PAYLOAD_FLUSH in -quiesce. - -Even R5LOG_PAYLOAD_FLUSH supports multiple stripes per payload. -However, current implementation is one stripe per R5LOG_PAYLOAD_FLUSH, -which is simpler. - -Signed-off-by: Song Liu <songliubraving@fb.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid5-cache.c | 45 ++++++++++++++++++++++++++++++++++++++++++--- - 1 file changed, 42 insertions(+), 3 deletions(-) - ---- a/drivers/md/raid5-cache.c -+++ b/drivers/md/raid5-cache.c -@@ -593,7 +593,7 @@ static void r5l_log_endio(struct bio *bi - - spin_lock_irqsave(&log->io_list_lock, flags); - __r5l_set_io_unit_state(io, IO_UNIT_IO_END); -- if (log->need_cache_flush) -+ if (log->need_cache_flush && !list_empty(&io->stripe_list)) - r5l_move_to_end_ios(log); - else - r5l_log_run_stripes(log); -@@ -621,9 +621,11 @@ static void r5l_log_endio(struct bio *bi - bio_endio(bi); - atomic_dec(&io->pending_stripe); - } -- if (atomic_read(&io->pending_stripe) == 0) -- __r5l_stripe_write_finished(io); - } -+ -+ /* finish flush only io_unit and PAYLOAD_FLUSH only io_unit */ -+ if (atomic_read(&io->pending_stripe) == 0) -+ __r5l_stripe_write_finished(io); - } - - static void r5l_do_submit_io(struct r5l_log *log, struct r5l_io_unit *io) -@@ -845,6 +847,41 @@ static void r5l_append_payload_page(stru - r5_reserve_log_entry(log, io); - } - -+static void r5l_append_flush_payload(struct r5l_log *log, sector_t sect) -+{ -+ struct mddev *mddev = log->rdev->mddev; -+ struct r5conf *conf = mddev->private; -+ struct r5l_io_unit *io; -+ struct r5l_payload_flush *payload; -+ int meta_size; -+ -+ /* -+ * payload_flush requires extra writes to the journal. -+ * To avoid handling the extra IO in quiesce, just skip -+ * flush_payload -+ */ -+ if (conf->quiesce) -+ return; -+ -+ mutex_lock(&log->io_mutex); -+ meta_size = sizeof(struct r5l_payload_flush) + sizeof(__le64); -+ -+ if (r5l_get_meta(log, meta_size)) { -+ mutex_unlock(&log->io_mutex); -+ return; -+ } -+ -+ /* current implementation is one stripe per flush payload */ -+ io = log->current_io; -+ payload = page_address(io->meta_page) + io->meta_offset; -+ payload->header.type = cpu_to_le16(R5LOG_PAYLOAD_FLUSH); -+ payload->header.flags = cpu_to_le16(0); -+ payload->size = cpu_to_le32(sizeof(__le64)); -+ payload->flush_stripes[0] = cpu_to_le64(sect); -+ io->meta_offset += meta_size; -+ mutex_unlock(&log->io_mutex); -+} -+ - static int r5l_log_stripe(struct r5l_log *log, struct stripe_head *sh, - int data_pages, int parity_pages) - { -@@ -2784,6 +2821,8 @@ void r5c_finish_stripe_write_out(struct - atomic_dec(&conf->r5c_flushing_full_stripes); - atomic_dec(&conf->r5c_cached_full_stripes); - } -+ -+ r5l_append_flush_payload(log, sh->sector); - } - - int r5c_cache_data(struct r5l_log *log, struct stripe_head *sh) diff --git a/patches.drivers/0146-md-raid5-use-md_write_start-to-count-stripes-not-bio.patch b/patches.drivers/0146-md-raid5-use-md_write_start-to-count-stripes-not-bio.patch deleted file mode 100644 index 8e08567a17..0000000000 --- a/patches.drivers/0146-md-raid5-use-md_write_start-to-count-stripes-not-bio.patch +++ /dev/null @@ -1,215 +0,0 @@ -From: NeilBrown <neilb@suse.com> -Date: Wed, 15 Mar 2017 14:05:12 +1100 -Subject: [PATCH] md/raid5: use md_write_start to count stripes, not bios -Git-commit: 497280509f32340d90feac030bce18006a3e3605 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -We use md_write_start() to increase the count of pending writes, and -md_write_end() to decrement the count. We currently count bios -submitted to md/raid5. Change it count stripe_heads that a WRITE bio -has been attached to. - -So now, raid5_make_request() calls md_write_start() and then -md_write_end() to keep the count elevated during the setup of the -request. - -add_stripe_bio() calls md_write_start() for each stripe_head, and the -completion routines always call md_write_end(), instead of only -calling it when raid5_dec_bi_active_stripes() returns 0. -make_discard_request also calls md_write_start/end(). - -The parallel between md_write_{start,end} and use of bi_phys_segments -can be seen in that: - Whenever we set bi_phys_segments to 1, we now call md_write_start. - Whenever we increment it on non-read requests with - raid5_inc_bi_active_stripes(), we now call md_write_start(). - Whenever we decrement bi_phys_segments on non-read requsts with - raid5_dec_bi_active_stripes(), we now call md_write_end(). - -This reduces our dependence on keeping a per-bio count of active -stripes in bi_phys_segments. - -md_write_inc() is added which parallels md_write_start(), but requires -that a write has already been started, and is certain never to sleep. -This can be used inside a spinlocked region when adding to a write -request. - -Signed-off-by: NeilBrown <neilb@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/md.c | 17 +++++++++++++++++ - drivers/md/md.h | 1 + - drivers/md/raid5-cache.c | 2 +- - drivers/md/raid5.c | 27 +++++++++++++-------------- - 4 files changed, 32 insertions(+), 15 deletions(-) - -diff --git a/drivers/md/md.c b/drivers/md/md.c -index 42e68b2..41f766a 100644 ---- a/drivers/md/md.c -+++ b/drivers/md/md.c -@@ -7907,6 +7907,23 @@ void md_write_start(struct mddev *mddev, struct bio *bi) - } - EXPORT_SYMBOL(md_write_start); - -+/* md_write_inc can only be called when md_write_start() has -+ * already been called at least once of the current request. -+ * It increments the counter and is useful when a single request -+ * is split into several parts. Each part causes an increment and -+ * so needs a matching md_write_end(). -+ * Unlike md_write_start(), it is safe to call md_write_inc() inside -+ * a spinlocked region. -+ */ -+void md_write_inc(struct mddev *mddev, struct bio *bi) -+{ -+ if (bio_data_dir(bi) != WRITE) -+ return; -+ WARN_ON_ONCE(mddev->in_sync || mddev->ro); -+ atomic_inc(&mddev->writes_pending); -+} -+EXPORT_SYMBOL(md_write_inc); -+ - void md_write_end(struct mddev *mddev) - { - if (atomic_dec_and_test(&mddev->writes_pending)) { -diff --git a/drivers/md/md.h b/drivers/md/md.h -index e094006..0cd1272 100644 ---- a/drivers/md/md.h -+++ b/drivers/md/md.h -@@ -648,6 +648,7 @@ extern void md_wakeup_thread(struct md_thread *thread); - extern void md_check_recovery(struct mddev *mddev); - extern void md_reap_sync_thread(struct mddev *mddev); - extern void md_write_start(struct mddev *mddev, struct bio *bi); -+extern void md_write_inc(struct mddev *mddev, struct bio *bi); - extern void md_write_end(struct mddev *mddev); - extern void md_done_sync(struct mddev *mddev, int blocks, int ok); - extern void md_error(struct mddev *mddev, struct md_rdev *rdev); -diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c -index 6449313..f5034ec 100644 ---- a/drivers/md/raid5-cache.c -+++ b/drivers/md/raid5-cache.c -@@ -318,8 +318,8 @@ r5c_return_dev_pending_writes(struct r5conf *conf, struct r5dev *dev, - while (wbi && wbi->bi_iter.bi_sector < - dev->sector + STRIPE_SECTORS) { - wbi2 = r5_next_bio(wbi, dev->sector); -+ md_write_end(conf->mddev); - if (!raid5_dec_bi_active_stripes(wbi)) { -- md_write_end(conf->mddev); - bio_list_add(return_bi, wbi); - } - wbi = wbi2; -diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c -index 88cc898..a684003 100644 ---- a/drivers/md/raid5.c -+++ b/drivers/md/raid5.c -@@ -3274,6 +3274,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, - bi->bi_next = *bip; - *bip = bi; - raid5_inc_bi_active_stripes(bi); -+ md_write_inc(conf->mddev, bi); - - if (forwrite) { - /* check if page is covered */ -@@ -3397,10 +3398,9 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, - struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); - - bi->bi_error = -EIO; -- if (!raid5_dec_bi_active_stripes(bi)) { -- md_write_end(conf->mddev); -+ md_write_end(conf->mddev); -+ if (!raid5_dec_bi_active_stripes(bi)) - bio_list_add(return_bi, bi); -- } - bi = nextbi; - } - if (bitmap_end) -@@ -3421,10 +3421,9 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, - struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); - - bi->bi_error = -EIO; -- if (!raid5_dec_bi_active_stripes(bi)) { -- md_write_end(conf->mddev); -+ md_write_end(conf->mddev); -+ if (!raid5_dec_bi_active_stripes(bi)) - bio_list_add(return_bi, bi); -- } - bi = bi2; - } - -@@ -3781,10 +3780,9 @@ static void handle_stripe_clean_event(struct r5conf *conf, - while (wbi && wbi->bi_iter.bi_sector < - dev->sector + STRIPE_SECTORS) { - wbi2 = r5_next_bio(wbi, dev->sector); -- if (!raid5_dec_bi_active_stripes(wbi)) { -- md_write_end(conf->mddev); -+ md_write_end(conf->mddev); -+ if (!raid5_dec_bi_active_stripes(wbi)) - bio_list_add(return_bi, wbi); -- } - wbi = wbi2; - } - bitmap_endwrite(conf->mddev->bitmap, sh->sector, -@@ -5487,6 +5485,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi) - - bi->bi_next = NULL; - bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ -+ md_write_start(mddev, bi); - - stripe_sectors = conf->chunk_sectors * - (conf->raid_disks - conf->max_degraded); -@@ -5533,6 +5532,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi) - sh->dev[d].towrite = bi; - set_bit(R5_OVERWRITE, &sh->dev[d].flags); - raid5_inc_bi_active_stripes(bi); -+ md_write_inc(mddev, bi); - sh->overwrite_disks++; - } - spin_unlock_irq(&sh->stripe_lock); -@@ -5555,9 +5555,9 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi) - release_stripe_plug(mddev, sh); - } - -+ md_write_end(mddev); - remaining = raid5_dec_bi_active_stripes(bi); - if (remaining == 0) { -- md_write_end(mddev); - bio_endio(bi); - } - } -@@ -5592,8 +5592,6 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi) - do_flush = bi->bi_opf & REQ_PREFLUSH; - } - -- md_write_start(mddev, bi); -- - /* - * If array is degraded, better not do chunk aligned read because - * later we might have to read it again in order to reconstruct -@@ -5615,6 +5613,7 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi) - last_sector = bio_end_sector(bi); - bi->bi_next = NULL; - bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ -+ md_write_start(mddev, bi); - - prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE); - for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { -@@ -5749,11 +5748,11 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi) - } - finish_wait(&conf->wait_for_overlap, &w); - -+ if (rw == WRITE) -+ md_write_end(mddev); - remaining = raid5_dec_bi_active_stripes(bi); - if (remaining == 0) { - -- if ( rw == WRITE ) -- md_write_end(mddev); - - trace_block_bio_complete(bdev_get_queue(bi->bi_bdev), - bi, 0); --- -2.10.2 - diff --git a/patches.drivers/0147-md-raid5-simplfy-delaying-of-writes-while-metadata-i.patch b/patches.drivers/0147-md-raid5-simplfy-delaying-of-writes-while-metadata-i.patch deleted file mode 100644 index 8d47f87d75..0000000000 --- a/patches.drivers/0147-md-raid5-simplfy-delaying-of-writes-while-metadata-i.patch +++ /dev/null @@ -1,132 +0,0 @@ -From: NeilBrown <neilb@suse.com> -Date: Wed, 15 Mar 2017 14:05:12 +1100 -Subject: [PATCH] md/raid5: simplfy delaying of writes while metadata is updated. -Git-commit: 16d997b78b157315f5c90fcbc2f9ce575cb3879f -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -If a device fails during a write, we must ensure the failure is -recorded in the metadata before the completion of the write is -acknowleged. - -Commit c3cce6cda162 ("md/raid5: ensure device failure recorded before -write request returns.") added code for this, but it was -unnecessarily complicated. We already had similar functionality for -handling updates to the bad-block-list, thanks to Commit de393cdea66c -("md: make it easier to wait for bad blocks to be acknowledged.") - -So revert most of the former commit, and instead avoid collecting -completed writes if MD_CHANGE_PENDING is set. raid5d() will then flush -the metadata and retry the stripe_head. -As this change can leave a stripe_head ready for handling immediately -after handle_active_stripes() returns, we change raid5_do_work() to -pause when MD_CHANGE_PENDING is set, so that it doesn't spin. - -We check MD_CHANGE_PENDING *after* analyse_stripe() as it could be set -asynchronously. After analyse_stripe(), we have collected stable data -about the state of devices, which will be used to make decisions. - -Signed-off-by: NeilBrown <neilb@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid5.c | 31 ++++++++----------------------- - drivers/md/raid5.h | 3 --- - 2 files changed, 8 insertions(+), 26 deletions(-) - -diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c -index a684003..a2c9ddc 100644 ---- a/drivers/md/raid5.c -+++ b/drivers/md/raid5.c -@@ -4691,7 +4691,8 @@ static void handle_stripe(struct stripe_head *sh) - if (test_bit(STRIPE_LOG_TRAPPED, &sh->state)) - goto finish; - -- if (s.handle_bad_blocks) { -+ if (s.handle_bad_blocks || -+ test_bit(MD_SB_CHANGE_PENDING, &conf->mddev->sb_flags)) { - set_bit(STRIPE_HANDLE, &sh->state); - goto finish; - } -@@ -5021,15 +5022,8 @@ static void handle_stripe(struct stripe_head *sh) - md_wakeup_thread(conf->mddev->thread); - } - -- if (!bio_list_empty(&s.return_bi)) { -- if (test_bit(MD_SB_CHANGE_PENDING, &conf->mddev->sb_flags)) { -- spin_lock_irq(&conf->device_lock); -- bio_list_merge(&conf->return_bi, &s.return_bi); -- spin_unlock_irq(&conf->device_lock); -- md_wakeup_thread(conf->mddev->thread); -- } else -- return_io(&s.return_bi); -- } -+ if (!bio_list_empty(&s.return_bi)) -+ return_io(&s.return_bi); - - clear_bit_unlock(STRIPE_ACTIVE, &sh->state); - } -@@ -6226,6 +6220,7 @@ static void raid5_do_work(struct work_struct *work) - struct r5worker *worker = container_of(work, struct r5worker, work); - struct r5worker_group *group = worker->group; - struct r5conf *conf = group->conf; -+ struct mddev *mddev = conf->mddev; - int group_id = group - conf->worker_groups; - int handled; - struct blk_plug plug; -@@ -6246,6 +6241,9 @@ static void raid5_do_work(struct work_struct *work) - if (!batch_size && !released) - break; - handled += batch_size; -+ wait_event_lock_irq(mddev->sb_wait, -+ !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags), -+ conf->device_lock); - } - pr_debug("%d stripes handled\n", handled); - -@@ -6273,18 +6271,6 @@ static void raid5d(struct md_thread *thread) - - md_check_recovery(mddev); - -- if (!bio_list_empty(&conf->return_bi) && -- !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) { -- struct bio_list tmp = BIO_EMPTY_LIST; -- spin_lock_irq(&conf->device_lock); -- if (!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) { -- bio_list_merge(&tmp, &conf->return_bi); -- bio_list_init(&conf->return_bi); -- } -- spin_unlock_irq(&conf->device_lock); -- return_io(&tmp); -- } -- - blk_start_plug(&plug); - handled = 0; - spin_lock_irq(&conf->device_lock); -@@ -6936,7 +6922,6 @@ static struct r5conf *setup_conf(struct mddev *mddev) - INIT_LIST_HEAD(&conf->hold_list); - INIT_LIST_HEAD(&conf->delayed_list); - INIT_LIST_HEAD(&conf->bitmap_list); -- bio_list_init(&conf->return_bi); - init_llist_head(&conf->released_stripes); - atomic_set(&conf->active_stripes, 0); - atomic_set(&conf->preread_active_stripes, 0); -diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h -index ba5b7a3..13800dc 100644 ---- a/drivers/md/raid5.h -+++ b/drivers/md/raid5.h -@@ -638,9 +638,6 @@ struct r5conf { - int skip_copy; /* Don't copy data from bio to stripe cache */ - struct list_head *last_hold; /* detect hold_list promotions */ - -- /* bios to have bi_end_io called after metadata is synced */ -- struct bio_list return_bi; -- - atomic_t reshape_stripes; /* stripes with pending writes for reshape */ - /* unfortunately we need two cache names as we temporarily have - * two caches. --- -2.10.2 - diff --git a/patches.drivers/0148-md-raid5-call-bio_endio-directly-rather-than-queuein.patch b/patches.drivers/0148-md-raid5-call-bio_endio-directly-rather-than-queuein.patch deleted file mode 100644 index 6856b57452..0000000000 --- a/patches.drivers/0148-md-raid5-call-bio_endio-directly-rather-than-queuein.patch +++ /dev/null @@ -1,222 +0,0 @@ -From: NeilBrown <neilb@suse.com> -Date: Wed, 15 Mar 2017 14:05:12 +1100 -Subject: [PATCH] md/raid5: call bio_endio() directly rather than queueing for later. -Git-commit: bd83d0a28c68bacba88a3193a1bd6a083bb8d9f5 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -We currently gather bios that need to be returned into a bio_list -and call bio_endio() on them all together. -The original reason for this was to avoid making the calls while -holding a spinlock. -Locking has changed a lot since then, and that reason is no longer -valid. - -So discard return_io() and various return_bi lists, and just call -bio_endio() directly as needed. - -Signed-off-by: NeilBrown <neilb@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid5-cache.c | 13 +++++-------- - drivers/md/raid5-log.h | 2 +- - drivers/md/raid5.c | 38 ++++++++++---------------------------- - drivers/md/raid5.h | 1 - - 4 files changed, 16 insertions(+), 38 deletions(-) - ---- a/drivers/md/raid5-cache.c -+++ b/drivers/md/raid5-cache.c -@@ -308,8 +308,7 @@ static void __r5l_set_io_unit_state(stru - } - - static void --r5c_return_dev_pending_writes(struct r5conf *conf, struct r5dev *dev, -- struct bio_list *return_bi) -+r5c_return_dev_pending_writes(struct r5conf *conf, struct r5dev *dev) - { - struct bio *wbi, *wbi2; - -@@ -319,23 +318,21 @@ r5c_return_dev_pending_writes(struct r5c - dev->sector + STRIPE_SECTORS) { - wbi2 = r5_next_bio(wbi, dev->sector); - md_write_end(conf->mddev); -- if (!raid5_dec_bi_active_stripes(wbi)) { -- bio_list_add(return_bi, wbi); -- } -+ if (!raid5_dec_bi_active_stripes(wbi)) -+ bio_endio(wbi); - wbi = wbi2; - } - } - - void r5c_handle_cached_data_endio(struct r5conf *conf, -- struct stripe_head *sh, int disks, struct bio_list *return_bi) -+ struct stripe_head *sh, int disks) - { - int i; - - for (i = sh->disks; i--; ) { - if (sh->dev[i].written) { - set_bit(R5_UPTODATE, &sh->dev[i].flags); -- r5c_return_dev_pending_writes(conf, &sh->dev[i], -- return_bi); -+ r5c_return_dev_pending_writes(conf, &sh->dev[i]); - bitmap_endwrite(conf->mddev->bitmap, sh->sector, - STRIPE_SECTORS, - !test_bit(STRIPE_DEGRADED, &sh->state), ---- a/drivers/md/raid5-log.h -+++ b/drivers/md/raid5-log.h -@@ -21,7 +21,7 @@ extern void r5c_release_extra_page(struc - extern void r5c_use_extra_page(struct stripe_head *sh); - extern void r5l_wake_reclaim(struct r5l_log *log, sector_t space); - extern void r5c_handle_cached_data_endio(struct r5conf *conf, -- struct stripe_head *sh, int disks, struct bio_list *return_bi); -+ struct stripe_head *sh, int disks); - extern int r5c_cache_data(struct r5l_log *log, struct stripe_head *sh); - extern void r5c_make_stripe_write_out(struct stripe_head *sh); - extern void r5c_flush_cache(struct r5conf *conf, int num); ---- a/drivers/md/raid5.c -+++ b/drivers/md/raid5.c -@@ -156,17 +156,6 @@ static int raid6_idx_to_slot(int idx, st - return slot; - } - --static void return_io(struct bio_list *return_bi) --{ -- struct bio *bi; -- while ((bi = bio_list_pop(return_bi)) != NULL) { -- bi->bi_iter.bi_size = 0; -- trace_block_bio_complete(bdev_get_queue(bi->bi_bdev), -- bi, 0); -- bio_endio(bi); -- } --} -- - static void print_raid5_conf (struct r5conf *conf); - - static int stripe_operations_active(struct stripe_head *sh) -@@ -1297,7 +1286,6 @@ async_copy_data(int frombio, struct bio - static void ops_complete_biofill(void *stripe_head_ref) - { - struct stripe_head *sh = stripe_head_ref; -- struct bio_list return_bi = BIO_EMPTY_LIST; - int i; - - pr_debug("%s: stripe %llu\n", __func__, -@@ -1322,15 +1310,13 @@ static void ops_complete_biofill(void *s - dev->sector + STRIPE_SECTORS) { - rbi2 = r5_next_bio(rbi, dev->sector); - if (!raid5_dec_bi_active_stripes(rbi)) -- bio_list_add(&return_bi, rbi); -+ bio_endio(rbi); - rbi = rbi2; - } - } - } - clear_bit(STRIPE_BIOFILL_RUN, &sh->state); - -- return_io(&return_bi); -- - set_bit(STRIPE_HANDLE, &sh->state); - raid5_release_stripe(sh); - } -@@ -3348,8 +3334,7 @@ static void stripe_set_idx(sector_t stri - - static void - handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, -- struct stripe_head_state *s, int disks, -- struct bio_list *return_bi) -+ struct stripe_head_state *s, int disks) - { - int i; - BUG_ON(sh->batch_head); -@@ -3397,7 +3382,7 @@ handle_failed_stripe(struct r5conf *conf - bi->bi_error = -EIO; - md_write_end(conf->mddev); - if (!raid5_dec_bi_active_stripes(bi)) -- bio_list_add(return_bi, bi); -+ bio_endio(bi); - bi = nextbi; - } - if (bitmap_end) -@@ -3420,7 +3405,7 @@ handle_failed_stripe(struct r5conf *conf - bi->bi_error = -EIO; - md_write_end(conf->mddev); - if (!raid5_dec_bi_active_stripes(bi)) -- bio_list_add(return_bi, bi); -+ bio_endio(bi); - bi = bi2; - } - -@@ -3446,7 +3431,7 @@ handle_failed_stripe(struct r5conf *conf - - bi->bi_error = -EIO; - if (!raid5_dec_bi_active_stripes(bi)) -- bio_list_add(return_bi, bi); -+ bio_endio(bi); - bi = nextbi; - } - } -@@ -3745,7 +3730,7 @@ static void break_stripe_batch_list(stru - * never LOCKED, so we don't need to test 'failed' directly. - */ - static void handle_stripe_clean_event(struct r5conf *conf, -- struct stripe_head *sh, int disks, struct bio_list *return_bi) -+ struct stripe_head *sh, int disks) - { - int i; - struct r5dev *dev; -@@ -3779,7 +3764,7 @@ returnbi: - wbi2 = r5_next_bio(wbi, dev->sector); - md_write_end(conf->mddev); - if (!raid5_dec_bi_active_stripes(wbi)) -- bio_list_add(return_bi, wbi); -+ bio_endio(wbi); - wbi = wbi2; - } - bitmap_endwrite(conf->mddev->bitmap, sh->sector, -@@ -4722,7 +4707,7 @@ static void handle_stripe(struct stripe_ - sh->reconstruct_state = 0; - break_stripe_batch_list(sh, 0); - if (s.to_read+s.to_write+s.written) -- handle_failed_stripe(conf, sh, &s, disks, &s.return_bi); -+ handle_failed_stripe(conf, sh, &s, disks); - if (s.syncing + s.replacing) - handle_failed_sync(conf, sh, &s); - } -@@ -4788,10 +4773,10 @@ static void handle_stripe(struct stripe_ - && !test_bit(R5_LOCKED, &qdev->flags) - && (test_bit(R5_UPTODATE, &qdev->flags) || - test_bit(R5_Discard, &qdev->flags)))))) -- handle_stripe_clean_event(conf, sh, disks, &s.return_bi); -+ handle_stripe_clean_event(conf, sh, disks); - - if (s.just_cached) -- r5c_handle_cached_data_endio(conf, sh, disks, &s.return_bi); -+ r5c_handle_cached_data_endio(conf, sh, disks); - log_stripe_write_finished(sh); - - /* Now we might consider reading some blocks, either to check/generate -@@ -5019,9 +5004,6 @@ finish: - md_wakeup_thread(conf->mddev->thread); - } - -- if (!bio_list_empty(&s.return_bi)) -- return_io(&s.return_bi); -- - clear_bit_unlock(STRIPE_ACTIVE, &sh->state); - } - ---- a/drivers/md/raid5.h -+++ b/drivers/md/raid5.h -@@ -278,7 +278,6 @@ struct stripe_head_state { - int dec_preread_active; - unsigned long ops_request; - -- struct bio_list return_bi; - struct md_rdev *blocked_rdev; - int handle_bad_blocks; - int log_failed; diff --git a/patches.drivers/0149-md-raid5-use-bio_inc_remaining-instead-of-repurposin.patch b/patches.drivers/0149-md-raid5-use-bio_inc_remaining-instead-of-repurposin.patch deleted file mode 100644 index 6fb227653f..0000000000 --- a/patches.drivers/0149-md-raid5-use-bio_inc_remaining-instead-of-repurposin.patch +++ /dev/null @@ -1,266 +0,0 @@ -From: NeilBrown <neilb@suse.com> -Date: Wed, 15 Mar 2017 14:05:13 +1100 -Subject: [PATCH] md/raid5: use bio_inc_remaining() instead of repurposing - bi_phys_segments as a counter -Git-commit: 016c76ac76e4c678b01a75a602dc6be0282f5b29 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -md/raid5 needs to keep track of how many stripe_heads are processing a -bio so that it can delay calling bio_endio() until all stripe_heads -have completed. It currently uses 16 bits of ->bi_phys_segments for -this purpose. - -16 bits is only enough for 256M requests, and it is possible for a -single bio to be larger than this, which causes problems. Also, the -bio struct contains a larger counter, __bi_remaining, which has a -purpose very similar to the purpose of our counter. So stop using -->bi_phys_segments, and instead use __bi_remaining. - -This means we don't need to initialize the counter, as our caller -initializes it to '1'. It also means we can call bio_endio() directly -as it tests this counter internally. - -Signed-off-by: NeilBrown <neilb@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid5-cache.c | 3 +-- - drivers/md/raid5.c | 57 +++++++++++------------------------------------- - drivers/md/raid5.h | 17 +-------------- - 3 files changed, 15 insertions(+), 62 deletions(-) - -diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c -index 5be8dbc..25eb048 100644 ---- a/drivers/md/raid5-cache.c -+++ b/drivers/md/raid5-cache.c -@@ -318,8 +318,7 @@ r5c_return_dev_pending_writes(struct r5conf *conf, struct r5dev *dev) - dev->sector + STRIPE_SECTORS) { - wbi2 = r5_next_bio(wbi, dev->sector); - md_write_end(conf->mddev); -- if (!raid5_dec_bi_active_stripes(wbi)) -- bio_endio(wbi); -+ bio_endio(wbi); - wbi = wbi2; - } - } -diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c -index 44c8ceb..0ec9e02 100644 ---- a/drivers/md/raid5.c -+++ b/drivers/md/raid5.c -@@ -1322,8 +1322,7 @@ static void ops_complete_biofill(void *stripe_head_ref) - while (rbi && rbi->bi_iter.bi_sector < - dev->sector + STRIPE_SECTORS) { - rbi2 = r5_next_bio(rbi, dev->sector); -- if (!raid5_dec_bi_active_stripes(rbi)) -- bio_endio(rbi); -+ bio_endio(rbi); - rbi = rbi2; - } - } -@@ -3196,14 +3195,6 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, - (unsigned long long)bi->bi_iter.bi_sector, - (unsigned long long)sh->sector); - -- /* -- * If several bio share a stripe. The bio bi_phys_segments acts as a -- * reference count to avoid race. The reference count should already be -- * increased before this function is called (for example, in -- * raid5_make_request()), so other bio sharing this stripe will not free the -- * stripe. If a stripe is owned by one stripe, the stripe lock will -- * protect it. -- */ - spin_lock_irq(&sh->stripe_lock); - /* Don't allow new IO added to stripes in batch list */ - if (sh->batch_head) -@@ -3259,7 +3250,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, - if (*bip) - bi->bi_next = *bip; - *bip = bi; -- raid5_inc_bi_active_stripes(bi); -+ bio_inc_remaining(bi); - md_write_inc(conf->mddev, bi); - - if (forwrite) { -@@ -3384,8 +3375,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, - - bi->bi_error = -EIO; - md_write_end(conf->mddev); -- if (!raid5_dec_bi_active_stripes(bi)) -- bio_endio(bi); -+ bio_endio(bi); - bi = nextbi; - } - if (bitmap_end) -@@ -3407,8 +3397,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, - - bi->bi_error = -EIO; - md_write_end(conf->mddev); -- if (!raid5_dec_bi_active_stripes(bi)) -- bio_endio(bi); -+ bio_endio(bi); - bi = bi2; - } - -@@ -3433,8 +3422,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, - r5_next_bio(bi, sh->dev[i].sector); - - bi->bi_error = -EIO; -- if (!raid5_dec_bi_active_stripes(bi)) -- bio_endio(bi); -+ bio_endio(bi); - bi = nextbi; - } - } -@@ -3766,8 +3754,7 @@ static void handle_stripe_clean_event(struct r5conf *conf, - dev->sector + STRIPE_SECTORS) { - wbi2 = r5_next_bio(wbi, dev->sector); - md_write_end(conf->mddev); -- if (!raid5_dec_bi_active_stripes(wbi)) -- bio_endio(wbi); -+ bio_endio(wbi); - wbi = wbi2; - } - bitmap_endwrite(conf->mddev->bitmap, sh->sector, -@@ -5112,7 +5099,7 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf) - * this sets the active strip count to 1 and the processed - * strip count to zero (upper 8 bits) - */ -- raid5_set_bi_stripes(bi, 1); /* biased count of active stripes */ -+ raid5_set_bi_processed_stripes(bi, 0); - } - - return bi; -@@ -5449,7 +5436,6 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi) - struct r5conf *conf = mddev->private; - sector_t logical_sector, last_sector; - struct stripe_head *sh; -- int remaining; - int stripe_sectors; - - if (mddev->reshape_position != MaxSector) -@@ -5460,7 +5446,6 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi) - last_sector = bi->bi_iter.bi_sector + (bi->bi_iter.bi_size>>9); - - bi->bi_next = NULL; -- bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ - md_write_start(mddev, bi); - - stripe_sectors = conf->chunk_sectors * -@@ -5507,7 +5492,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi) - continue; - sh->dev[d].towrite = bi; - set_bit(R5_OVERWRITE, &sh->dev[d].flags); -- raid5_inc_bi_active_stripes(bi); -+ bio_inc_remaining(bi); - md_write_inc(mddev, bi); - sh->overwrite_disks++; - } -@@ -5532,10 +5517,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi) - } - - md_write_end(mddev); -- remaining = raid5_dec_bi_active_stripes(bi); -- if (remaining == 0) { -- bio_endio(bi); -- } -+ bio_endio(bi); - } - - static void raid5_make_request(struct mddev *mddev, struct bio * bi) -@@ -5546,7 +5528,6 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi) - sector_t logical_sector, last_sector; - struct stripe_head *sh; - const int rw = bio_data_dir(bi); -- int remaining; - DEFINE_WAIT(w); - bool do_prepare; - bool do_flush = false; -@@ -5588,7 +5569,6 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi) - logical_sector = bi->bi_iter.bi_sector & ~((sector_t)STRIPE_SECTORS-1); - last_sector = bio_end_sector(bi); - bi->bi_next = NULL; -- bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ - md_write_start(mddev, bi); - - prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE); -@@ -5726,14 +5706,7 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi) - - if (rw == WRITE) - md_write_end(mddev); -- remaining = raid5_dec_bi_active_stripes(bi); -- if (remaining == 0) { -- -- -- trace_block_bio_complete(bdev_get_queue(bi->bi_bdev), -- bi, 0); -- bio_endio(bi); -- } -+ bio_endio(bi); - } - - static sector_t raid5_size(struct mddev *mddev, sector_t sectors, int raid_disks); -@@ -6098,7 +6071,6 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) - int dd_idx; - sector_t sector, logical_sector, last_sector; - int scnt = 0; -- int remaining; - int handled = 0; - - logical_sector = raid_bio->bi_iter.bi_sector & -@@ -6137,12 +6109,9 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) - raid5_release_stripe(sh); - handled++; - } -- remaining = raid5_dec_bi_active_stripes(raid_bio); -- if (remaining == 0) { -- trace_block_bio_complete(bdev_get_queue(raid_bio->bi_bdev), -- raid_bio, 0); -- bio_endio(raid_bio); -- } -+ -+ bio_endio(raid_bio); -+ - if (atomic_dec_and_test(&conf->active_aligned_reads)) - wake_up(&conf->wait_for_quiescent); - return handled; -diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h -index fd5c21c..7d74fb3 100644 ---- a/drivers/md/raid5.h -+++ b/drivers/md/raid5.h -@@ -488,8 +488,7 @@ static inline struct bio *r5_next_bio(struct bio *bio, sector_t sector) - } - - /* -- * We maintain a biased count of active stripes in the bottom 16 bits of -- * bi_phys_segments, and a count of processed stripes in the upper 16 bits -+ * We maintain a count of processed stripes in the upper 16 bits - */ - static inline int raid5_bi_processed_stripes(struct bio *bio) - { -@@ -498,20 +497,6 @@ static inline int raid5_bi_processed_stripes(struct bio *bio) - return (atomic_read(segments) >> 16) & 0xffff; - } - --static inline int raid5_dec_bi_active_stripes(struct bio *bio) --{ -- atomic_t *segments = (atomic_t *)&bio->bi_phys_segments; -- -- return atomic_sub_return(1, segments) & 0xffff; --} -- --static inline void raid5_inc_bi_active_stripes(struct bio *bio) --{ -- atomic_t *segments = (atomic_t *)&bio->bi_phys_segments; -- -- atomic_inc(segments); --} -- - static inline void raid5_set_bi_processed_stripes(struct bio *bio, - unsigned int cnt) - { --- -2.10.2 - diff --git a/patches.drivers/0150-md-raid5-remove-over-loading-of-bi_phys_segments.patch b/patches.drivers/0150-md-raid5-remove-over-loading-of-bi_phys_segments.patch deleted file mode 100644 index 3cc61adae7..0000000000 --- a/patches.drivers/0150-md-raid5-remove-over-loading-of-bi_phys_segments.patch +++ /dev/null @@ -1,172 +0,0 @@ -From: NeilBrown <neilb@suse.com> -Date: Wed, 15 Mar 2017 14:05:13 +1100 -Subject: [PATCH] md/raid5: remove over-loading of ->bi_phys_segments. -Git-commit: 0472a42ba1f89ec85f070c731f4440d7cc38c44c -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -When a read request, which bypassed the cache, fails, we need to retry -it through the cache. -This involves attaching it to a sequence of stripe_heads, and it may not -be possible to get all the stripe_heads we need at once. -We do what we can, and record how far we got in ->bi_phys_segments so -we can pick up again later. - -There is only ever one bio which may have a non-zero offset stored in -->bi_phys_segments, the one that is either active in the single thread -which calls retry_aligned_read(), or is in conf->retry_read_aligned -waiting for retry_aligned_read() to be called again. - -So we only need to store one offset value. This can be in a local -variable passed between remove_bio_from_retry() and -retry_aligned_read(), or in the r5conf structure next to the -->retry_read_aligned pointer. - -Storing it there allows the last usage of ->bi_phys_segments to be -removed from md/raid5.c. - -Signed-off-by: NeilBrown <neilb@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid5.c | 24 ++++++++++++------------ - drivers/md/raid5.h | 30 +----------------------------- - 2 files changed, 13 insertions(+), 41 deletions(-) - ---- a/drivers/md/raid5.c -+++ b/drivers/md/raid5.c -@@ -5089,12 +5089,14 @@ static void add_bio_to_retry(struct bio - md_wakeup_thread(conf->mddev->thread); - } - --static struct bio *remove_bio_from_retry(struct r5conf *conf) -+static struct bio *remove_bio_from_retry(struct r5conf *conf, -+ unsigned int *offset) - { - struct bio *bi; - - bi = conf->retry_read_aligned; - if (bi) { -+ *offset = conf->retry_read_offset; - conf->retry_read_aligned = NULL; - return bi; - } -@@ -5102,11 +5104,7 @@ static struct bio *remove_bio_from_retry - if(bi) { - conf->retry_read_aligned_list = bi->bi_next; - bi->bi_next = NULL; -- /* -- * this sets the active strip count to 1 and the processed -- * strip count to zero (upper 8 bits) -- */ -- raid5_set_bi_processed_stripes(bi, 0); -+ *offset = 0; - } - - return bi; -@@ -6062,7 +6060,8 @@ static inline sector_t raid5_sync_reques - return STRIPE_SECTORS; - } - --static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) -+static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio, -+ unsigned int offset) - { - /* We may not be able to submit a whole bio at once as there - * may not be enough stripe_heads available. -@@ -6091,7 +6090,7 @@ static int retry_aligned_read(struct r5 - sector += STRIPE_SECTORS, - scnt++) { - -- if (scnt < raid5_bi_processed_stripes(raid_bio)) -+ if (scnt < offset) - /* already done this stripe */ - continue; - -@@ -6099,15 +6098,15 @@ static int retry_aligned_read(struct r5 - - if (!sh) { - /* failed to get a stripe - must wait */ -- raid5_set_bi_processed_stripes(raid_bio, scnt); - conf->retry_read_aligned = raid_bio; -+ conf->retry_read_offset = scnt; - return handled; - } - - if (!add_stripe_bio(sh, raid_bio, dd_idx, 0, 0)) { - raid5_release_stripe(sh); -- raid5_set_bi_processed_stripes(raid_bio, scnt); - conf->retry_read_aligned = raid_bio; -+ conf->retry_read_offset = scnt; - return handled; - } - -@@ -6235,6 +6234,7 @@ static void raid5d(struct md_thread *thr - while (1) { - struct bio *bio; - int batch_size, released; -+ unsigned int offset; - - released = release_stripe_list(conf, conf->temp_inactive_list); - if (released) -@@ -6252,10 +6252,10 @@ static void raid5d(struct md_thread *thr - } - raid5_activate_delayed(conf); - -- while ((bio = remove_bio_from_retry(conf))) { -+ while ((bio = remove_bio_from_retry(conf, &offset))) { - int ok; - spin_unlock_irq(&conf->device_lock); -- ok = retry_aligned_read(conf, bio); -+ ok = retry_aligned_read(conf, bio, offset); - spin_lock_irq(&conf->device_lock); - if (!ok) - break; ---- a/drivers/md/raid5.h -+++ b/drivers/md/raid5.h -@@ -487,35 +487,6 @@ static inline struct bio *r5_next_bio(st - return NULL; - } - --/* -- * We maintain a count of processed stripes in the upper 16 bits -- */ --static inline int raid5_bi_processed_stripes(struct bio *bio) --{ -- atomic_t *segments = (atomic_t *)&bio->bi_phys_segments; -- -- return (atomic_read(segments) >> 16) & 0xffff; --} -- --static inline void raid5_set_bi_processed_stripes(struct bio *bio, -- unsigned int cnt) --{ -- atomic_t *segments = (atomic_t *)&bio->bi_phys_segments; -- int old, new; -- -- do { -- old = atomic_read(segments); -- new = (old & 0xffff) | (cnt << 16); -- } while (atomic_cmpxchg(segments, old, new) != old); --} -- --static inline void raid5_set_bi_stripes(struct bio *bio, unsigned int cnt) --{ -- atomic_t *segments = (atomic_t *)&bio->bi_phys_segments; -- -- atomic_set(segments, cnt); --} -- - #define NR_STRIPES 256 - #define STRIPE_SIZE PAGE_SIZE - #define STRIPE_SHIFT (PAGE_SHIFT - 9) -@@ -619,6 +590,7 @@ struct r5conf { - struct list_head delayed_list; /* stripes that have plugged requests */ - struct list_head bitmap_list; /* stripes delaying awaiting bitmap update */ - struct bio *retry_read_aligned; /* currently retrying aligned bios */ -+ unsigned int retry_read_offset; /* sector offset into retry_read_aligned */ - struct bio *retry_read_aligned_list; /* aligned bios retry list */ - atomic_t preread_active_stripes; /* stripes with scheduled io */ - atomic_t active_aligned_reads; diff --git a/patches.drivers/0151-Revert-md-raid5-limit-request-size-according-to-impl.patch b/patches.drivers/0151-Revert-md-raid5-limit-request-size-according-to-impl.patch deleted file mode 100644 index 7183484b82..0000000000 --- a/patches.drivers/0151-Revert-md-raid5-limit-request-size-according-to-impl.patch +++ /dev/null @@ -1,45 +0,0 @@ -From: NeilBrown <neilb@suse.com> -Date: Wed, 15 Mar 2017 14:05:13 +1100 -Subject: [PATCH] Revert "md/raid5: limit request size according to - implementation limits" -Git-commit: 97d53438081edd25ccb1de34051efe084d240828 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -This reverts commit e8d7c33232e5fdfa761c3416539bc5b4acd12db5. - -Now that raid5 doesn't abuse bi_phys_segments any more, we no longer -need to impose these limits. - -Signed-off-by: NeilBrown <neilb@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid5.c | 9 --------- - 1 file changed, 9 deletions(-) - -diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c -index 1c8be66..00a34fa 100644 ---- a/drivers/md/raid5.c -+++ b/drivers/md/raid5.c -@@ -7361,15 +7361,6 @@ static int raid5_run(struct mddev *mddev) - stripe = (stripe | (stripe-1)) + 1; - mddev->queue->limits.discard_alignment = stripe; - mddev->queue->limits.discard_granularity = stripe; -- -- /* -- * We use 16-bit counter of active stripes in bi_phys_segments -- * (minus one for over-loaded initialization) -- */ -- blk_queue_max_hw_sectors(mddev->queue, 0xfffe * STRIPE_SECTORS); -- blk_queue_max_discard_sectors(mddev->queue, -- 0xfffe * STRIPE_SECTORS); -- - /* - * unaligned part of discard request will be ignored, so can't - * guarantee discard_zeroes_data --- -2.10.2 - diff --git a/patches.drivers/0152-md-raid1-raid10-move-rXbio-accounting-closer-to-allo.patch b/patches.drivers/0152-md-raid1-raid10-move-rXbio-accounting-closer-to-allo.patch deleted file mode 100644 index 115c7a6a23..0000000000 --- a/patches.drivers/0152-md-raid1-raid10-move-rXbio-accounting-closer-to-allo.patch +++ /dev/null @@ -1,119 +0,0 @@ -From: NeilBrown <neilb@suse.com> -Date: Wed, 15 Mar 2017 14:05:13 +1100 -Subject: [PATCH] md/raid1, raid10: move rXbio accounting closer to allocation. -Git-commit: 6b6c8110e173ce10f2b169d82a6670001f7184d1 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -When raid1 or raid10 find they will need to allocate a new -r1bio/r10bio, in order to work around a known bad block, they -account for the allocation well before the allocation is -made. This separation makes the correctness less obvious -and requires comments. - -The accounting needs to be a little before: before the first -rXbio is submitted, but that is all. - -So move the accounting down to where it makes more sense. - -Signed-off-by: NeilBrown <neilb@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid1.c | 24 +++++++++++------------- - drivers/md/raid10.c | 22 +++++++++------------- - 2 files changed, 20 insertions(+), 26 deletions(-) - -diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c -index 730e572..3afa60e 100644 ---- a/drivers/md/raid1.c -+++ b/drivers/md/raid1.c -@@ -1436,18 +1436,9 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio) - goto retry_write; - } - -- if (max_sectors < r1_bio->sectors) { -- /* We are splitting this write into multiple parts, so -- * we need to prepare for allocating another r1_bio. -- */ -+ if (max_sectors < r1_bio->sectors) - r1_bio->sectors = max_sectors; -- spin_lock_irq(&conf->device_lock); -- if (bio->bi_phys_segments == 0) -- bio->bi_phys_segments = 2; -- else -- bio->bi_phys_segments++; -- spin_unlock_irq(&conf->device_lock); -- } -+ - sectors_handled = r1_bio->sector + max_sectors - bio->bi_iter.bi_sector; - - atomic_set(&r1_bio->remaining, 1); -@@ -1553,10 +1544,17 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio) - * as it could result in the bio being freed. - */ - if (sectors_handled < bio_sectors(bio)) { -- r1_bio_write_done(r1_bio); -- /* We need another r1_bio. It has already been counted -+ /* We need another r1_bio, which must be accounted - * in bio->bi_phys_segments - */ -+ spin_lock_irq(&conf->device_lock); -+ if (bio->bi_phys_segments == 0) -+ bio->bi_phys_segments = 2; -+ else -+ bio->bi_phys_segments++; -+ spin_unlock_irq(&conf->device_lock); -+ -+ r1_bio_write_done(r1_bio); - r1_bio = alloc_r1bio(mddev, bio, sectors_handled); - goto retry_write; - } -diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c -index e89a8d7..c7c5b26 100644 ---- a/drivers/md/raid10.c -+++ b/drivers/md/raid10.c -@@ -1384,18 +1384,8 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, - goto retry_write; - } - -- if (max_sectors < r10_bio->sectors) { -- /* We are splitting this into multiple parts, so -- * we need to prepare for allocating another r10_bio. -- */ -+ if (max_sectors < r10_bio->sectors) - r10_bio->sectors = max_sectors; -- spin_lock_irq(&conf->device_lock); -- if (bio->bi_phys_segments == 0) -- bio->bi_phys_segments = 2; -- else -- bio->bi_phys_segments++; -- spin_unlock_irq(&conf->device_lock); -- } - sectors_handled = r10_bio->sector + max_sectors - - bio->bi_iter.bi_sector; - -@@ -1505,10 +1495,16 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, - */ - - if (sectors_handled < bio_sectors(bio)) { -- one_write_done(r10_bio); -- /* We need another r10_bio. It has already been counted -+ /* We need another r10_bio and it needs to be counted - * in bio->bi_phys_segments. - */ -+ spin_lock_irq(&conf->device_lock); -+ if (bio->bi_phys_segments == 0) -+ bio->bi_phys_segments = 2; -+ else -+ bio->bi_phys_segments++; -+ spin_unlock_irq(&conf->device_lock); -+ one_write_done(r10_bio); - r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); - - r10_bio->master_bio = bio; --- -2.10.2 - diff --git a/patches.drivers/0153-md-raid10-stop-using-bi_phys_segments.patch b/patches.drivers/0153-md-raid10-stop-using-bi_phys_segments.patch deleted file mode 100644 index 6090f26b9a..0000000000 --- a/patches.drivers/0153-md-raid10-stop-using-bi_phys_segments.patch +++ /dev/null @@ -1,195 +0,0 @@ -From: NeilBrown <neilb@suse.com> -Date: Wed, 15 Mar 2017 14:05:13 +1100 -Subject: [PATCH] md/raid10: stop using bi_phys_segments -Git-commit: fd16f2e8489100eb8005483ff630856bce51f803 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -raid10 currently repurposes bi_phys_segments on each -incoming bio to count how many r10bio was used to encode the -request. - -We need to know when the number of attached r10bio reaches -zero to: -1/ call bio_endio() when all IO on the bio is finished -2/ decrement ->nr_pending so that resync IO can proceed. - -Now that the bio has its own __bi_remaining counter, that -can be used instead. We can call bio_inc_remaining to -increment the counter and call bio_endio() every time an -r10bio completes, rather than only when bi_phys_segments -reaches zero. - -This addresses point 1, but not point 2. bio_endio() -doesn't (and cannot) report when the last r10bio has -finished, so a different approach is needed. - -So: instead of counting bios in ->nr_pending, count r10bios. -i.e. every time we attach a bio, increment nr_pending. -Every time an r10bio completes, decrement nr_pending. - -Normally we only increment nr_pending after first checking -that ->barrier is zero, or some other non-trivial tests and -possible waiting. When attaching multiple r10bios to a bio, -we only need the tests and the waiting once. After the -first increment, subsequent increments can happen -unconditionally as they are really all part of the one -request. - -So introduce inc_pending() which can be used when we know -that nr_pending is already elevated. - -Note that this fixes a bug. freeze_array() contains the line - atomic_read(&conf->nr_pending) == conf->nr_queued+extra, -which implies that the units for ->nr_pending, ->nr_queued and extra -are the same. -->nr_queue and extra count r10_bios, but prior to this patch, -->nr_pending counted bios. If a bio ever resulted in multiple -r10_bios (due to bad blocks), freeze_array() would not work correctly. -Now it does. - -Signed-off-by: NeilBrown <neilb@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid10.c | 76 ++++++++++++++++++----------------------------------- - 1 file changed, 25 insertions(+), 51 deletions(-) - -diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c -index c7c5b26..0f1b78b 100644 ---- a/drivers/md/raid10.c -+++ b/drivers/md/raid10.c -@@ -301,27 +301,18 @@ static void reschedule_retry(struct r10bio *r10_bio) - static void raid_end_bio_io(struct r10bio *r10_bio) - { - struct bio *bio = r10_bio->master_bio; -- int done; - struct r10conf *conf = r10_bio->mddev->private; - -- if (bio->bi_phys_segments) { -- unsigned long flags; -- spin_lock_irqsave(&conf->device_lock, flags); -- bio->bi_phys_segments--; -- done = (bio->bi_phys_segments == 0); -- spin_unlock_irqrestore(&conf->device_lock, flags); -- } else -- done = 1; - if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) - bio->bi_error = -EIO; -- if (done) { -- bio_endio(bio); -- /* -- * Wake up any possible resync thread that waits for the device -- * to go idle. -- */ -- allow_barrier(conf); -- } -+ -+ bio_endio(bio); -+ /* -+ * Wake up any possible resync thread that waits for the device -+ * to go idle. -+ */ -+ allow_barrier(conf); -+ - free_r10bio(r10_bio); - } - -@@ -985,6 +976,15 @@ static void wait_barrier(struct r10conf *conf) - spin_unlock_irq(&conf->resync_lock); - } - -+static void inc_pending(struct r10conf *conf) -+{ -+ /* The current request requires multiple r10_bio, so -+ * we need to increment the pending count. -+ */ -+ WARN_ON(!atomic_read(&conf->nr_pending)); -+ atomic_inc(&conf->nr_pending); -+} -+ - static void allow_barrier(struct r10conf *conf) - { - if ((atomic_dec_and_test(&conf->nr_pending)) || -@@ -1162,12 +1162,8 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, - sectors_handled = (r10_bio->sector + max_sectors - - bio->bi_iter.bi_sector); - r10_bio->sectors = max_sectors; -- spin_lock_irq(&conf->device_lock); -- if (bio->bi_phys_segments == 0) -- bio->bi_phys_segments = 2; -- else -- bio->bi_phys_segments++; -- spin_unlock_irq(&conf->device_lock); -+ inc_pending(conf); -+ bio_inc_remaining(bio); - /* - * Cannot call generic_make_request directly as that will be - * queued in __generic_make_request and subsequent -@@ -1262,9 +1258,7 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, - * on which we have seen a write error, we want to avoid - * writing to those blocks. This potentially requires several - * writes to write around the bad blocks. Each set of writes -- * gets its own r10_bio with a set of bios attached. The number -- * of r10_bios is recored in bio->bi_phys_segments just as with -- * the read case. -+ * gets its own r10_bio with a set of bios attached. - */ - - r10_bio->read_slot = -1; /* make sure repl_bio gets freed */ -@@ -1495,15 +1489,9 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, - */ - - if (sectors_handled < bio_sectors(bio)) { -- /* We need another r10_bio and it needs to be counted -- * in bio->bi_phys_segments. -- */ -- spin_lock_irq(&conf->device_lock); -- if (bio->bi_phys_segments == 0) -- bio->bi_phys_segments = 2; -- else -- bio->bi_phys_segments++; -- spin_unlock_irq(&conf->device_lock); -+ /* We need another r10_bio and it needs to be counted */ -+ inc_pending(conf); -+ bio_inc_remaining(bio); - one_write_done(r10_bio); - r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); - -@@ -1532,16 +1520,6 @@ static void __make_request(struct mddev *mddev, struct bio *bio) - r10_bio->sector = bio->bi_iter.bi_sector; - r10_bio->state = 0; - -- /* -- * We might need to issue multiple reads to different devices if there -- * are bad blocks around, so we keep track of the number of reads in -- * bio->bi_phys_segments. If this is 0, there is only one r10_bio and -- * no locking will be needed when the request completes. If it is -- * non-zero, then it is the number of not-completed requests. -- */ -- bio->bi_phys_segments = 0; -- bio_clear_flag(bio, BIO_SEG_VALID); -- - if (bio_data_dir(bio) == READ) - raid10_read_request(mddev, bio, r10_bio); - else -@@ -2693,12 +2671,8 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio) - r10_bio->sector + max_sectors - - mbio->bi_iter.bi_sector; - r10_bio->sectors = max_sectors; -- spin_lock_irq(&conf->device_lock); -- if (mbio->bi_phys_segments == 0) -- mbio->bi_phys_segments = 2; -- else -- mbio->bi_phys_segments++; -- spin_unlock_irq(&conf->device_lock); -+ bio_inc_remaining(mbio); -+ inc_pending(conf); - generic_make_request(bio); - - r10_bio = mempool_alloc(conf->r10bio_pool, --- -2.10.2 - diff --git a/patches.drivers/0154-md-raid1-stop-using-bi_phys_segment.patch b/patches.drivers/0154-md-raid1-stop-using-bi_phys_segment.patch deleted file mode 100644 index c59584b0c0..0000000000 --- a/patches.drivers/0154-md-raid1-stop-using-bi_phys_segment.patch +++ /dev/null @@ -1,186 +0,0 @@ -From: NeilBrown <neilb@suse.com> -Date: Wed, 15 Mar 2017 14:05:14 +1100 -Subject: [PATCH] md/raid1: stop using bi_phys_segment -Git-commit: 37011e3afb0fdc462307dc006246358bddf61e92 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -Change to use bio->__bi_remaining to count number of r1bio attached -to a bio. -See precious raid10 patch for more details. - -Like the raid10.c patch, this fixes a bug as nr_queued and nr_pending -used to measure different things, but were being compared. - -This patch fixes another bug in that nr_pending previously did not -could write-behind requests, so behind writes could continue while -resync was happening. How that nr_pending counts all r1_bio, -the resync cannot commence until the behind writes have completed. - -Signed-off-by: NeilBrown <neilb@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid1.c | 89 ++++++++++++++---------------------------------------- - 1 file changed, 23 insertions(+), 66 deletions(-) - -diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c -index 3afa60e..941f810 100644 ---- a/drivers/md/raid1.c -+++ b/drivers/md/raid1.c -@@ -246,35 +246,17 @@ static void reschedule_retry(struct r1bio *r1_bio) - static void call_bio_endio(struct r1bio *r1_bio) - { - struct bio *bio = r1_bio->master_bio; -- int done; - struct r1conf *conf = r1_bio->mddev->private; -- sector_t bi_sector = bio->bi_iter.bi_sector; -- -- if (bio->bi_phys_segments) { -- unsigned long flags; -- spin_lock_irqsave(&conf->device_lock, flags); -- bio->bi_phys_segments--; -- done = (bio->bi_phys_segments == 0); -- spin_unlock_irqrestore(&conf->device_lock, flags); -- /* -- * make_request() might be waiting for -- * bi_phys_segments to decrease -- */ -- wake_up(&conf->wait_barrier); -- } else -- done = 1; - - if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) - bio->bi_error = -EIO; - -- if (done) { -- bio_endio(bio); -- /* -- * Wake up any possible resync thread that waits for the device -- * to go idle. -- */ -- allow_barrier(conf, bi_sector); -- } -+ bio_endio(bio); -+ /* -+ * Wake up any possible resync thread that waits for the device -+ * to go idle. -+ */ -+ allow_barrier(conf, r1_bio->sector); - } - - static void raid_end_bio_io(struct r1bio *r1_bio) -@@ -977,6 +959,16 @@ static void wait_read_barrier(struct r1conf *conf, sector_t sector_nr) - spin_unlock_irq(&conf->resync_lock); - } - -+static void inc_pending(struct r1conf *conf, sector_t bi_sector) -+{ -+ /* The current request requires multiple r1_bio, so -+ * we need to increment the pending count, and the corresponding -+ * window count. -+ */ -+ int idx = sector_to_idx(bi_sector); -+ atomic_inc(&conf->nr_pending[idx]); -+} -+ - static void wait_barrier(struct r1conf *conf, sector_t sector_nr) - { - int idx = sector_to_idx(sector_nr); -@@ -1192,17 +1184,6 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio) - r1_bio = alloc_r1bio(mddev, bio, 0); - - /* -- * We might need to issue multiple reads to different -- * devices if there are bad blocks around, so we keep -- * track of the number of reads in bio->bi_phys_segments. -- * If this is 0, there is only one r1_bio and no locking -- * will be needed when requests complete. If it is -- * non-zero, then it is the number of not-completed requests. -- */ -- bio->bi_phys_segments = 0; -- bio_clear_flag(bio, BIO_SEG_VALID); -- -- /* - * make_request() can abort the operation when read-ahead is being - * used and no empty request is available. - */ -@@ -1257,12 +1238,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio) - sectors_handled = (r1_bio->sector + max_sectors - - bio->bi_iter.bi_sector); - r1_bio->sectors = max_sectors; -- spin_lock_irq(&conf->device_lock); -- if (bio->bi_phys_segments == 0) -- bio->bi_phys_segments = 2; -- else -- bio->bi_phys_segments++; -- spin_unlock_irq(&conf->device_lock); -+ bio_inc_remaining(bio); - - /* - * Cannot call generic_make_request directly as that will be -@@ -1329,16 +1305,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio) - - r1_bio = alloc_r1bio(mddev, bio, 0); - -- /* We might need to issue multiple writes to different -- * devices if there are bad blocks around, so we keep -- * track of the number of writes in bio->bi_phys_segments. -- * If this is 0, there is only one r1_bio and no locking -- * will be needed when requests complete. If it is -- * non-zero, then it is the number of not-completed requests. -- */ -- bio->bi_phys_segments = 0; -- bio_clear_flag(bio, BIO_SEG_VALID); -- - if (conf->pending_count >= max_queued_requests) { - md_wakeup_thread(mddev->thread); - raid1_log(mddev, "wait queued"); -@@ -1544,16 +1510,11 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio) - * as it could result in the bio being freed. - */ - if (sectors_handled < bio_sectors(bio)) { -- /* We need another r1_bio, which must be accounted -- * in bio->bi_phys_segments -- */ -- spin_lock_irq(&conf->device_lock); -- if (bio->bi_phys_segments == 0) -- bio->bi_phys_segments = 2; -- else -- bio->bi_phys_segments++; -- spin_unlock_irq(&conf->device_lock); -+ /* We need another r1_bio, which must be counted */ -+ sector_t sect = bio->bi_iter.bi_sector + sectors_handled; - -+ inc_pending(conf, sect); -+ bio_inc_remaining(bio); - r1_bio_write_done(r1_bio); - r1_bio = alloc_r1bio(mddev, bio, sectors_handled); - goto retry_write; -@@ -2573,12 +2534,7 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio) - int sectors_handled = (r1_bio->sector + max_sectors - - mbio->bi_iter.bi_sector); - r1_bio->sectors = max_sectors; -- spin_lock_irq(&conf->device_lock); -- if (mbio->bi_phys_segments == 0) -- mbio->bi_phys_segments = 2; -- else -- mbio->bi_phys_segments++; -- spin_unlock_irq(&conf->device_lock); -+ bio_inc_remaining(mbio); - trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), - bio, bio_dev, bio_sector); - generic_make_request(bio); -@@ -2586,6 +2542,7 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio) - - r1_bio = alloc_r1bio(mddev, mbio, sectors_handled); - set_bit(R1BIO_ReadError, &r1_bio->state); -+ inc_pending(conf, r1_bio->sector); - - goto read_more; - } else { --- -2.10.2 - diff --git a/patches.drivers/0155-md-raid5-don-t-test-writes_pending-in-raid5_remove_d.patch b/patches.drivers/0155-md-raid5-don-t-test-writes_pending-in-raid5_remove_d.patch deleted file mode 100644 index 9629da5219..0000000000 --- a/patches.drivers/0155-md-raid5-don-t-test-writes_pending-in-raid5_remove_d.patch +++ /dev/null @@ -1,46 +0,0 @@ -From: NeilBrown <neilb@suse.com> -Date: Wed, 15 Mar 2017 14:05:14 +1100 -Subject: [PATCH] md/raid5: don't test ->writes_pending in raid5_remove_disk -Git-commit: 84dd97a69092cef858483b775f1900d743d796a4 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -This test on ->writes_pending cannot be safe as the counter -can be incremented at any moment and cannot be locked against. - -Change it to test conf->active_stripes, which at least -can be locked against. More changes are still needed. - -A future patch will change ->writes_pending, and testing it here will -be very inconvenient. - -Signed-off-by: NeilBrown <neilb@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid5.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c -index 00a34fa..0b1a433 100644 ---- a/drivers/md/raid5.c -+++ b/drivers/md/raid5.c -@@ -7532,9 +7532,12 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev) - /* - * we can't wait pending write here, as this is called in - * raid5d, wait will deadlock. -+ * neilb: there is no locking about new writes here, -+ * so this cannot be safe. - */ -- if (atomic_read(&mddev->writes_pending)) -+ if (atomic_read(&conf->active_stripes)) { - return -EBUSY; -+ } - log_exit(conf); - return 0; - } --- -2.10.2 - diff --git a/patches.drivers/0156-md-factor-out-set_in_sync.patch b/patches.drivers/0156-md-factor-out-set_in_sync.patch deleted file mode 100644 index 18a0d6a2d2..0000000000 --- a/patches.drivers/0156-md-factor-out-set_in_sync.patch +++ /dev/null @@ -1,133 +0,0 @@ -From: NeilBrown <neilb@suse.com> -Date: Wed, 15 Mar 2017 14:05:14 +1100 -Subject: [PATCH] md: factor out set_in_sync() -Git-commit: 6497709b5d1bccce7de1df678d5f147d614551d1 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -Three separate places in md.c check if the number of active -writes is zero and, if so, sets mddev->in_sync. - -There are a few differences, but there shouldn't be: -- it is always appropriate to notify the change in - sysfs_state, and there is no need to do this outside a - spin-locked region. -- we never need to check ->recovery_cp. The state of resync - is not relevant for whether there are any pending writes - or not (which is what ->in_sync reports). - -So create set_in_sync() which does the correct tests and -makes the correct changes, and call this in all three -places. - -Any behaviour changes here a minor and cosmetic. - -Signed-off-by: NeilBrown <neilb@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/md.c | 54 ++++++++++++++++++++---------------------------------- - 1 file changed, 20 insertions(+), 34 deletions(-) - -diff --git a/drivers/md/md.c b/drivers/md/md.c -index 41f766a..5951dea7 100644 ---- a/drivers/md/md.c -+++ b/drivers/md/md.c -@@ -2252,6 +2252,21 @@ static void export_array(struct mddev *mddev) - mddev->major_version = 0; - } - -+static bool set_in_sync(struct mddev *mddev) -+{ -+ WARN_ON_ONCE(!spin_is_locked(&mddev->lock)); -+ if (atomic_read(&mddev->writes_pending) == 0) { -+ if (mddev->in_sync == 0) { -+ mddev->in_sync = 1; -+ set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags); -+ sysfs_notify_dirent_safe(mddev->sysfs_state); -+ } -+ } -+ if (mddev->safemode == 1) -+ mddev->safemode = 0; -+ return mddev->in_sync; -+} -+ - static void sync_sbs(struct mddev *mddev, int nospares) - { - /* Update each superblock (in-memory image), but -@@ -4024,7 +4039,7 @@ static int restart_array(struct mddev *mddev); - static ssize_t - array_state_store(struct mddev *mddev, const char *buf, size_t len) - { -- int err; -+ int err = 0; - enum array_state st = match_word(buf, array_states); - - if (mddev->pers && (st == active || st == clean) && mddev->ro != 1) { -@@ -4037,18 +4052,9 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len) - clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags); - md_wakeup_thread(mddev->thread); - wake_up(&mddev->sb_wait); -- err = 0; - } else /* st == clean */ { - restart_array(mddev); -- if (atomic_read(&mddev->writes_pending) == 0) { -- if (mddev->in_sync == 0) { -- mddev->in_sync = 1; -- if (mddev->safemode == 1) -- mddev->safemode = 0; -- set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags); -- } -- err = 0; -- } else -+ if (!set_in_sync(mddev)) - err = -EBUSY; - } - if (!err) -@@ -4106,15 +4112,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len) - if (err) - break; - spin_lock(&mddev->lock); -- if (atomic_read(&mddev->writes_pending) == 0) { -- if (mddev->in_sync == 0) { -- mddev->in_sync = 1; -- if (mddev->safemode == 1) -- mddev->safemode = 0; -- set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags); -- } -- err = 0; -- } else -+ if (!set_in_sync(mddev)) - err = -EBUSY; - spin_unlock(&mddev->lock); - } else -@@ -8582,22 +8580,10 @@ void md_check_recovery(struct mddev *mddev) - } - } - -- if (!mddev->external) { -- int did_change = 0; -+ if (!mddev->external && !mddev->in_sync) { - spin_lock(&mddev->lock); -- if (mddev->safemode && -- !atomic_read(&mddev->writes_pending) && -- !mddev->in_sync && -- mddev->recovery_cp == MaxSector) { -- mddev->in_sync = 1; -- did_change = 1; -- set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags); -- } -- if (mddev->safemode == 1) -- mddev->safemode = 0; -+ set_in_sync(mddev); - spin_unlock(&mddev->lock); -- if (did_change) -- sysfs_notify_dirent_safe(mddev->sysfs_state); - } - - if (mddev->sb_flags) --- -2.10.2 - diff --git a/patches.drivers/0157-md-close-a-race-with-setting-mddev-in_sync.patch b/patches.drivers/0157-md-close-a-race-with-setting-mddev-in_sync.patch deleted file mode 100644 index 6faa995e70..0000000000 --- a/patches.drivers/0157-md-close-a-race-with-setting-mddev-in_sync.patch +++ /dev/null @@ -1,70 +0,0 @@ -From: NeilBrown <neilb@suse.com> -Date: Wed, 15 Mar 2017 14:05:14 +1100 -Subject: [PATCH] md: close a race with setting mddev->in_sync -Git-commit: 55cc39f345256af241deb6152ff5c06bedd10f11 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -If ->in_sync is being set just as md_write_start() is being called, -it is possible that set_in_sync() won't see the elevated -->writes_pending, and md_write_start() won't see the set ->in_sync. - -To close this race, re-test ->writes_pending after setting ->in_sync, -and add memory barriers to ensure the increment of ->writes_pending -will be seen by the time of this second test, or the new ->in_sync -will be seen by md_write_start(). - -Add a spinlock to array_state_show() to ensure this temporary -instability is never visible from userspace. - -Signed-off-by: NeilBrown <neilb@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/md.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/drivers/md/md.c b/drivers/md/md.c -index 5951dea7..dee6bbf 100644 ---- a/drivers/md/md.c -+++ b/drivers/md/md.c -@@ -2258,6 +2258,10 @@ static bool set_in_sync(struct mddev *mddev) - if (atomic_read(&mddev->writes_pending) == 0) { - if (mddev->in_sync == 0) { - mddev->in_sync = 1; -+ smp_mb(); -+ if (atomic_read(&mddev->writes_pending)) -+ /* lost a race with md_write_start() */ -+ mddev->in_sync = 0; - set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags); - sysfs_notify_dirent_safe(mddev->sysfs_state); - } -@@ -4011,6 +4015,7 @@ array_state_show(struct mddev *mddev, char *page) - st = read_auto; - break; - case 0: -+ spin_lock(&mddev->lock); - if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) - st = write_pending; - else if (mddev->in_sync) -@@ -4019,6 +4024,7 @@ array_state_show(struct mddev *mddev, char *page) - st = active_idle; - else - st = active; -+ spin_unlock(&mddev->lock); - } - else { - if (list_empty(&mddev->disks) && -@@ -7885,6 +7891,7 @@ void md_write_start(struct mddev *mddev, struct bio *bi) - did_change = 1; - } - atomic_inc(&mddev->writes_pending); -+ smp_mb(); /* Match smp_mb in set_in_sync() */ - if (mddev->safemode == 1) - mddev->safemode = 0; - if (mddev->in_sync) { --- -2.10.2 - diff --git a/patches.drivers/0158-percpu-refcount-support-synchronous-switch-to-atomic.patch b/patches.drivers/0158-percpu-refcount-support-synchronous-switch-to-atomic.patch deleted file mode 100644 index d4395f11f8..0000000000 --- a/patches.drivers/0158-percpu-refcount-support-synchronous-switch-to-atomic.patch +++ /dev/null @@ -1,73 +0,0 @@ -From: NeilBrown <neilb@suse.com> -Date: Wed, 15 Mar 2017 14:05:14 +1100 -Subject: [PATCH] percpu-refcount: support synchronous switch to atomic mode. -Git-commit: 210f7cdcf088c304ee0533ffd33d6f71a8821862 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -percpu_ref_switch_to_atomic_sync() schedules the switch to atomic mode, then -waits for it to complete. - -Also export percpu_ref_switch_to_* so they can be used from modules. - -This will be used in md/raid to count the number of pending write -requests to an array. -We occasionally need to check if the count is zero, but most often -we don't care. -We always want updates to the counter to be fast, as in some cases -we count every 4K page. - -Signed-off-by: NeilBrown <neilb@suse.com> -Acked-by: Tejun Heo <tj@kernel.org> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - include/linux/percpu-refcount.h | 1 + - lib/percpu-refcount.c | 17 +++++++++++++++++ - 2 files changed, 18 insertions(+) - ---- a/include/linux/percpu-refcount.h -+++ b/include/linux/percpu-refcount.h -@@ -99,6 +99,7 @@ int __must_check percpu_ref_init(struct - void percpu_ref_exit(struct percpu_ref *ref); - void percpu_ref_switch_to_atomic(struct percpu_ref *ref, - percpu_ref_func_t *confirm_switch); -+void percpu_ref_switch_to_atomic_sync(struct percpu_ref *ref); - void percpu_ref_switch_to_percpu(struct percpu_ref *ref); - void percpu_ref_kill_and_confirm(struct percpu_ref *ref, - percpu_ref_func_t *confirm_kill); ---- a/lib/percpu-refcount.c -+++ b/lib/percpu-refcount.c -@@ -249,6 +249,22 @@ static void __percpu_ref_switch_to_percp - smp_store_release(&ref->percpu_count_ptr, - ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC); - } -+EXPORT_SYMBOL_GPL(percpu_ref_switch_to_atomic); -+ -+/** -+ * percpu_ref_switch_to_atomic_sync - switch a percpu_ref to atomic mode -+ * @ref: percpu_ref to switch to atomic mode -+ * -+ * Schedule switching the ref to atomic mode, and wait for the -+ * switch to complete. Caller must ensure that no other thread -+ * will switch back to percpu mode. -+ */ -+void percpu_ref_switch_to_atomic_sync(struct percpu_ref *ref) -+{ -+ percpu_ref_switch_to_atomic(ref, NULL); -+ wait_event(percpu_ref_switch_waitq, !ref->confirm_switch); -+} -+EXPORT_SYMBOL_GPL(percpu_ref_switch_to_atomic_sync); - - /** - * percpu_ref_switch_to_percpu - switch a percpu_ref to percpu mode -@@ -276,6 +292,7 @@ void percpu_ref_switch_to_percpu(struct - if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD)) - __percpu_ref_switch_to_percpu(ref); - } -+EXPORT_SYMBOL_GPL(percpu_ref_switch_to_percpu); - - /** - * percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation diff --git a/patches.drivers/0159-MD-use-per-cpu-counter-for-writes_pending.patch b/patches.drivers/0159-MD-use-per-cpu-counter-for-writes_pending.patch deleted file mode 100644 index cd22ae038b..0000000000 --- a/patches.drivers/0159-MD-use-per-cpu-counter-for-writes_pending.patch +++ /dev/null @@ -1,248 +0,0 @@ -From: NeilBrown <neilb@suse.com> -Date: Wed, 15 Mar 2017 14:05:14 +1100 -Subject: [PATCH] MD: use per-cpu counter for writes_pending -Git-commit: 4ad23a976413aa57fe5ba7a25953dc35ccca5b71 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -The 'writes_pending' counter is used to determine when the -array is stable so that it can be marked in the superblock -as "Clean". Consequently it needs to be updated frequently -but only checked for zero occasionally. Recent changes to -raid5 cause the count to be updated even more often - once -per 4K rather than once per bio. This provided -justification for making the updates more efficient. - -So we replace the atomic counter a percpu-refcount. -This can be incremented and decremented cheaply most of the -time, and can be switched to "atomic" mode when more -precise counting is needed. As it is possible for multiple -threads to want a precise count, we introduce a -"sync_checker" counter to count the number of threads -in "set_in_sync()", and only switch the refcount back -to percpu mode when that is zero. - -We need to be careful about races between set_in_sync() -setting ->in_sync to 1, and md_write_start() setting it -to zero. md_write_start() holds the rcu_read_lock() -while checking if the refcount is in percpu mode. If -it is, then we know a switch to 'atomic' will not happen until -after we call rcu_read_unlock(), in which case set_in_sync() -will see the elevated count, and not set in_sync to 1. -If it is not in percpu mode, we take the mddev->lock to -ensure proper synchronization. - -It is no longer possible to quickly check if the count is zero, which -we previously did to update a timer or to schedule the md_thread. -So now we do these every time we decrement that counter, but make -sure they are fast. - -mod_timer() already optimizes the case where the timeout value doesn't -actually change. We leverage that further by always rounding off the -jiffies to the timeout value. This may delay the marking of 'clean' -slightly, but ensure we only perform atomic operation here when absolutely -needed. - -md_wakeup_thread() current always calls wake_up(), even if -THREAD_WAKEUP is already set. That too can be optimised to avoid -calls to wake_up(). - -Signed-off-by: NeilBrown <neilb@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/md.c | 70 ++++++++++++++++++++++++++++++++++++++------------------- - drivers/md/md.h | 3 ++- - 2 files changed, 49 insertions(+), 24 deletions(-) - -diff --git a/drivers/md/md.c b/drivers/md/md.c -index dee6bbf..1db88d7 100644 ---- a/drivers/md/md.c -+++ b/drivers/md/md.c -@@ -65,6 +65,8 @@ - #include <linux/raid/md_p.h> - #include <linux/raid/md_u.h> - #include <linux/slab.h> -+#include <linux/percpu-refcount.h> -+ - #include <trace/events/block.h> - #include "md.h" - #include "bitmap.h" -@@ -2255,16 +2257,24 @@ static void export_array(struct mddev *mddev) - static bool set_in_sync(struct mddev *mddev) - { - WARN_ON_ONCE(!spin_is_locked(&mddev->lock)); -- if (atomic_read(&mddev->writes_pending) == 0) { -- if (mddev->in_sync == 0) { -+ if (!mddev->in_sync) { -+ mddev->sync_checkers++; -+ spin_unlock(&mddev->lock); -+ percpu_ref_switch_to_atomic_sync(&mddev->writes_pending); -+ spin_lock(&mddev->lock); -+ if (!mddev->in_sync && -+ percpu_ref_is_zero(&mddev->writes_pending)) { - mddev->in_sync = 1; -+ /* -+ * Ensure ->in_sync is visible before we clear -+ * ->sync_checkers. -+ */ - smp_mb(); -- if (atomic_read(&mddev->writes_pending)) -- /* lost a race with md_write_start() */ -- mddev->in_sync = 0; - set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags); - sysfs_notify_dirent_safe(mddev->sysfs_state); - } -+ if (--mddev->sync_checkers == 0) -+ percpu_ref_switch_to_percpu(&mddev->writes_pending); - } - if (mddev->safemode == 1) - mddev->safemode = 0; -@@ -5120,6 +5130,7 @@ static void md_free(struct kobject *ko) - del_gendisk(mddev->gendisk); - put_disk(mddev->gendisk); - } -+ percpu_ref_exit(&mddev->writes_pending); - - kfree(mddev); - } -@@ -5145,6 +5156,8 @@ static void mddev_delayed_delete(struct work_struct *ws) - kobject_put(&mddev->kobj); - } - -+static void no_op(struct percpu_ref *r) {} -+ - static int md_alloc(dev_t dev, char *name) - { - static DEFINE_MUTEX(disks_mutex); -@@ -5196,6 +5209,10 @@ static int md_alloc(dev_t dev, char *name) - blk_queue_make_request(mddev->queue, md_make_request); - blk_set_stacking_limits(&mddev->queue->limits); - -+ if (percpu_ref_init(&mddev->writes_pending, no_op, 0, GFP_KERNEL) < 0) -+ goto abort; -+ /* We want to start with the refcount at zero */ -+ percpu_ref_put(&mddev->writes_pending); - disk = alloc_disk(1 << shift); - if (!disk) { - blk_cleanup_queue(mddev->queue); -@@ -5279,11 +5296,10 @@ static void md_safemode_timeout(unsigned long data) - { - struct mddev *mddev = (struct mddev *) data; - -- if (!atomic_read(&mddev->writes_pending)) { -- mddev->safemode = 1; -- if (mddev->external) -- sysfs_notify_dirent_safe(mddev->sysfs_state); -- } -+ mddev->safemode = 1; -+ if (mddev->external) -+ sysfs_notify_dirent_safe(mddev->sysfs_state); -+ - md_wakeup_thread(mddev->thread); - } - -@@ -5488,7 +5504,6 @@ int md_run(struct mddev *mddev) - } else if (mddev->ro == 2) /* auto-readonly not meaningful */ - mddev->ro = 0; - -- atomic_set(&mddev->writes_pending,0); - atomic_set(&mddev->max_corr_read_errors, - MD_DEFAULT_MAX_CORRECTED_READ_ERRORS); - mddev->safemode = 0; -@@ -7342,8 +7357,8 @@ void md_wakeup_thread(struct md_thread *thread) - { - if (thread) { - pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm); -- set_bit(THREAD_WAKEUP, &thread->flags); -- wake_up(&thread->wqueue); -+ if (!test_and_set_bit(THREAD_WAKEUP, &thread->flags)) -+ wake_up(&thread->wqueue); - } - } - EXPORT_SYMBOL(md_wakeup_thread); -@@ -7890,11 +7905,13 @@ void md_write_start(struct mddev *mddev, struct bio *bi) - md_wakeup_thread(mddev->sync_thread); - did_change = 1; - } -- atomic_inc(&mddev->writes_pending); -+ rcu_read_lock(); -+ percpu_ref_get(&mddev->writes_pending); - smp_mb(); /* Match smp_mb in set_in_sync() */ - if (mddev->safemode == 1) - mddev->safemode = 0; -- if (mddev->in_sync) { -+ /* sync_checkers is always 0 when writes_pending is in per-cpu mode */ -+ if (mddev->in_sync || !mddev->sync_checkers) { - spin_lock(&mddev->lock); - if (mddev->in_sync) { - mddev->in_sync = 0; -@@ -7905,6 +7922,7 @@ void md_write_start(struct mddev *mddev, struct bio *bi) - } - spin_unlock(&mddev->lock); - } -+ rcu_read_unlock(); - if (did_change) - sysfs_notify_dirent_safe(mddev->sysfs_state); - wait_event(mddev->sb_wait, -@@ -7925,19 +7943,25 @@ void md_write_inc(struct mddev *mddev, struct bio *bi) - if (bio_data_dir(bi) != WRITE) - return; - WARN_ON_ONCE(mddev->in_sync || mddev->ro); -- atomic_inc(&mddev->writes_pending); -+ percpu_ref_get(&mddev->writes_pending); - } - EXPORT_SYMBOL(md_write_inc); - - void md_write_end(struct mddev *mddev) - { -- if (atomic_dec_and_test(&mddev->writes_pending)) { -- if (mddev->safemode == 2) -- md_wakeup_thread(mddev->thread); -- else if (mddev->safemode_delay) -- mod_timer(&mddev->safemode_timer, jiffies + mddev->safemode_delay); -- } -+ percpu_ref_put(&mddev->writes_pending); -+ -+ if (mddev->safemode == 2) -+ md_wakeup_thread(mddev->thread); -+ else if (mddev->safemode_delay) -+ /* The roundup() ensures this only performs locking once -+ * every ->safemode_delay jiffies -+ */ -+ mod_timer(&mddev->safemode_timer, -+ roundup(jiffies, mddev->safemode_delay) + -+ mddev->safemode_delay); - } -+ - EXPORT_SYMBOL(md_write_end); - - /* md_allow_write(mddev) -@@ -8538,7 +8562,7 @@ void md_check_recovery(struct mddev *mddev) - test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || - test_bit(MD_RECOVERY_DONE, &mddev->recovery) || - (mddev->external == 0 && mddev->safemode == 1) || -- (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending) -+ (mddev->safemode == 2 - && !mddev->in_sync && mddev->recovery_cp == MaxSector) - )) - return; -diff --git a/drivers/md/md.h b/drivers/md/md.h -index 0cd1272..7a7847d 100644 ---- a/drivers/md/md.h -+++ b/drivers/md/md.h -@@ -409,7 +409,8 @@ struct mddev { - */ - unsigned int safemode_delay; - struct timer_list safemode_timer; -- atomic_t writes_pending; -+ struct percpu_ref writes_pending; -+ int sync_checkers; /* # of threads checking writes_pending */ - struct request_queue *queue; /* for plugging ... */ - - struct bitmap *bitmap; /* the bitmap for the device */ --- -2.10.2 - diff --git a/patches.drivers/0160-md-raid10-refactor-some-codes-from-raid10_write_requ.patch b/patches.drivers/0160-md-raid10-refactor-some-codes-from-raid10_write_requ.patch deleted file mode 100644 index 0b964088ce..0000000000 --- a/patches.drivers/0160-md-raid10-refactor-some-codes-from-raid10_write_requ.patch +++ /dev/null @@ -1,227 +0,0 @@ -From: Guoqing Jiang <gqjiang@suse.com> -Date: Mon, 20 Mar 2017 17:46:04 +0800 -Subject: [PATCH] md/raid10: refactor some codes from raid10_write_request -Git-commit: 27f26a0f3767b6688b9a88b9becb6f8e760421f3 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -Previously, we clone both bio and repl_bio in raid10_write_request, -then add the cloned bio to plug->pending or conf->pending_bio_list -based on plug or not, and most of the logics are same for the two -conditions. - -So introduce raid10_write_one_disk for it, and use replacement parameter -to distinguish the difference. No functional changes in the patch. - -Signed-off-by: Guoqing Jiang <gqjiang@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid10.c | 175 ++++++++++++++++++++++------------------------------ - 1 file changed, 75 insertions(+), 100 deletions(-) - -diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c -index 0f1b78b..28c62e0 100644 ---- a/drivers/md/raid10.c -+++ b/drivers/md/raid10.c -@@ -1185,18 +1185,82 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, - return; - } - --static void raid10_write_request(struct mddev *mddev, struct bio *bio, -- struct r10bio *r10_bio) -+static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, -+ struct bio *bio, bool replacement, -+ int n_copy, int max_sectors) - { -- struct r10conf *conf = mddev->private; -- int i; - const int op = bio_op(bio); - const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); - const unsigned long do_fua = (bio->bi_opf & REQ_FUA); - unsigned long flags; -- struct md_rdev *blocked_rdev; - struct blk_plug_cb *cb; - struct raid10_plug_cb *plug = NULL; -+ struct r10conf *conf = mddev->private; -+ struct md_rdev *rdev; -+ int devnum = r10_bio->devs[n_copy].devnum; -+ struct bio *mbio; -+ -+ if (replacement) { -+ rdev = conf->mirrors[devnum].replacement; -+ if (rdev == NULL) { -+ /* Replacement just got moved to main 'rdev' */ -+ smp_mb(); -+ rdev = conf->mirrors[devnum].rdev; -+ } -+ } else -+ rdev = conf->mirrors[devnum].rdev; -+ -+ mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); -+ bio_trim(mbio, r10_bio->sector - bio->bi_iter.bi_sector, max_sectors); -+ if (replacement) -+ r10_bio->devs[n_copy].repl_bio = mbio; -+ else -+ r10_bio->devs[n_copy].bio = mbio; -+ -+ mbio->bi_iter.bi_sector = (r10_bio->devs[n_copy].addr + -+ choose_data_offset(r10_bio, rdev)); -+ mbio->bi_bdev = rdev->bdev; -+ mbio->bi_end_io = raid10_end_write_request; -+ bio_set_op_attrs(mbio, op, do_sync | do_fua); -+ if (!replacement && test_bit(FailFast, -+ &conf->mirrors[devnum].rdev->flags) -+ && enough(conf, devnum)) -+ mbio->bi_opf |= MD_FAILFAST; -+ mbio->bi_private = r10_bio; -+ -+ if (conf->mddev->gendisk) -+ trace_block_bio_remap(bdev_get_queue(mbio->bi_bdev), -+ mbio, disk_devt(conf->mddev->gendisk), -+ r10_bio->sector); -+ /* flush_pending_writes() needs access to the rdev so...*/ -+ mbio->bi_bdev = (void *)rdev; -+ -+ atomic_inc(&r10_bio->remaining); -+ -+ cb = blk_check_plugged(raid10_unplug, mddev, sizeof(*plug)); -+ if (cb) -+ plug = container_of(cb, struct raid10_plug_cb, cb); -+ else -+ plug = NULL; -+ spin_lock_irqsave(&conf->device_lock, flags); -+ if (plug) { -+ bio_list_add(&plug->pending, mbio); -+ plug->pending_cnt++; -+ } else { -+ bio_list_add(&conf->pending_bio_list, mbio); -+ conf->pending_count++; -+ } -+ spin_unlock_irqrestore(&conf->device_lock, flags); -+ if (!plug) -+ md_wakeup_thread(mddev->thread); -+} -+ -+static void raid10_write_request(struct mddev *mddev, struct bio *bio, -+ struct r10bio *r10_bio) -+{ -+ struct r10conf *conf = mddev->private; -+ int i; -+ struct md_rdev *blocked_rdev; - sector_t sectors; - int sectors_handled; - int max_sectors; -@@ -1387,101 +1451,12 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, - bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0); - - for (i = 0; i < conf->copies; i++) { -- struct bio *mbio; -- int d = r10_bio->devs[i].devnum; -- if (r10_bio->devs[i].bio) { -- struct md_rdev *rdev = conf->mirrors[d].rdev; -- mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); -- bio_trim(mbio, r10_bio->sector - bio->bi_iter.bi_sector, -- max_sectors); -- r10_bio->devs[i].bio = mbio; -- -- mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+ -- choose_data_offset(r10_bio, rdev)); -- mbio->bi_bdev = rdev->bdev; -- mbio->bi_end_io = raid10_end_write_request; -- bio_set_op_attrs(mbio, op, do_sync | do_fua); -- if (test_bit(FailFast, &conf->mirrors[d].rdev->flags) && -- enough(conf, d)) -- mbio->bi_opf |= MD_FAILFAST; -- mbio->bi_private = r10_bio; -- -- if (conf->mddev->gendisk) -- trace_block_bio_remap(bdev_get_queue(mbio->bi_bdev), -- mbio, disk_devt(conf->mddev->gendisk), -- r10_bio->sector); -- /* flush_pending_writes() needs access to the rdev so...*/ -- mbio->bi_bdev = (void*)rdev; -- -- atomic_inc(&r10_bio->remaining); -- -- cb = blk_check_plugged(raid10_unplug, mddev, -- sizeof(*plug)); -- if (cb) -- plug = container_of(cb, struct raid10_plug_cb, -- cb); -- else -- plug = NULL; -- spin_lock_irqsave(&conf->device_lock, flags); -- if (plug) { -- bio_list_add(&plug->pending, mbio); -- plug->pending_cnt++; -- } else { -- bio_list_add(&conf->pending_bio_list, mbio); -- conf->pending_count++; -- } -- spin_unlock_irqrestore(&conf->device_lock, flags); -- if (!plug) -- md_wakeup_thread(mddev->thread); -- } -- -- if (r10_bio->devs[i].repl_bio) { -- struct md_rdev *rdev = conf->mirrors[d].replacement; -- if (rdev == NULL) { -- /* Replacement just got moved to main 'rdev' */ -- smp_mb(); -- rdev = conf->mirrors[d].rdev; -- } -- mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); -- bio_trim(mbio, r10_bio->sector - bio->bi_iter.bi_sector, -- max_sectors); -- r10_bio->devs[i].repl_bio = mbio; -- -- mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr + -- choose_data_offset(r10_bio, rdev)); -- mbio->bi_bdev = rdev->bdev; -- mbio->bi_end_io = raid10_end_write_request; -- bio_set_op_attrs(mbio, op, do_sync | do_fua); -- mbio->bi_private = r10_bio; -- -- if (conf->mddev->gendisk) -- trace_block_bio_remap(bdev_get_queue(mbio->bi_bdev), -- mbio, disk_devt(conf->mddev->gendisk), -- r10_bio->sector); -- /* flush_pending_writes() needs access to the rdev so...*/ -- mbio->bi_bdev = (void*)rdev; -- -- atomic_inc(&r10_bio->remaining); -- -- cb = blk_check_plugged(raid10_unplug, mddev, -- sizeof(*plug)); -- if (cb) -- plug = container_of(cb, struct raid10_plug_cb, -- cb); -- else -- plug = NULL; -- spin_lock_irqsave(&conf->device_lock, flags); -- if (plug) { -- bio_list_add(&plug->pending, mbio); -- plug->pending_cnt++; -- } else { -- bio_list_add(&conf->pending_bio_list, mbio); -- conf->pending_count++; -- } -- spin_unlock_irqrestore(&conf->device_lock, flags); -- if (!plug) -- md_wakeup_thread(mddev->thread); -- } -+ if (r10_bio->devs[i].bio) -+ raid10_write_one_disk(mddev, r10_bio, bio, false, -+ i, max_sectors); -+ if (r10_bio->devs[i].repl_bio) -+ raid10_write_one_disk(mddev, r10_bio, bio, true, -+ i, max_sectors); - } - - /* Don't remove the bias on 'remaining' (one_write_done) until --- -2.10.2 - diff --git a/patches.drivers/0161-md-fix-several-trivial-typos-in-comments.patch b/patches.drivers/0161-md-fix-several-trivial-typos-in-comments.patch deleted file mode 100644 index ff24ee88d8..0000000000 --- a/patches.drivers/0161-md-fix-several-trivial-typos-in-comments.patch +++ /dev/null @@ -1,64 +0,0 @@ -From: Zhilong Liu <zlliu@suse.com> -Date: Wed, 15 Mar 2017 16:14:53 +0800 -Subject: [PATCH] md: fix several trivial typos in comments -Git-commit: 3560741e316b3ea52cfb27901ae284921445180f -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -Signed-off-by: Zhilong Liu <zlliu@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/bitmap.c | 2 +- - drivers/md/raid5.c | 6 +++--- - 2 files changed, 4 insertions(+), 4 deletions(-) - -diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c -index cc7bbd2..bf7419a 100644 ---- a/drivers/md/bitmap.c -+++ b/drivers/md/bitmap.c -@@ -697,7 +697,7 @@ static int bitmap_read_sb(struct bitmap *bitmap) - - out: - kunmap_atomic(sb); -- /* Assiging chunksize is required for "re_read" */ -+ /* Assigning chunksize is required for "re_read" */ - bitmap->mddev->bitmap_info.chunksize = chunksize; - if (err == 0 && nodes && (bitmap->cluster_slot < 0)) { - err = md_setup_cluster(bitmap->mddev, nodes); -diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c -index 0b1a433..266d661 100644 ---- a/drivers/md/raid5.c -+++ b/drivers/md/raid5.c -@@ -2296,7 +2296,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) - * pages have been transferred over, and the old kmem_cache is - * freed when all stripes are done. - * 3/ reallocate conf->disks to be suitable bigger. If this fails, -- * we simple return a failre status - no need to clean anything up. -+ * we simple return a failure status - no need to clean anything up. - * 4/ allocate new pages for the new slots in the new stripe_heads. - * If this fails, we don't bother trying the shrink the - * stripe_heads down again, we just leave them as they are. -@@ -3558,7 +3558,7 @@ static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s, - !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) - /* Pre-reads at not permitted until after short delay - * to gather multiple requests. However if this -- * device is no Insync, the block could only be be computed -+ * device is no Insync, the block could only be computed - * and there is no need to delay that. - */ - return 0; -@@ -3577,7 +3577,7 @@ static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s, - - /* If we are forced to do a reconstruct-write, either because - * the current RAID6 implementation only supports that, or -- * or because parity cannot be trusted and we are currently -+ * because parity cannot be trusted and we are currently - * recovering it, there is extra need to be careful. - * If one of the devices that we would need to read, because - * it is not being overwritten (and maybe not written at all) --- -2.10.2 - diff --git a/patches.drivers/0162-md-raid1-raid10-don-t-handle-failure-of-bio_add_page.patch b/patches.drivers/0162-md-raid1-raid10-don-t-handle-failure-of-bio_add_page.patch deleted file mode 100644 index daa5d98551..0000000000 --- a/patches.drivers/0162-md-raid1-raid10-don-t-handle-failure-of-bio_add_page.patch +++ /dev/null @@ -1,134 +0,0 @@ -From: Ming Lei <tom.leiming@gmail.com> -Date: Fri, 17 Mar 2017 00:12:22 +0800 -Subject: [PATCH] md: raid1/raid10: don't handle failure of bio_add_page() -Git-commit: c85ba149de4bd14aa028ac824f9f12aeded28b86 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -All bio_add_page() is for adding one page into resync bio, -which is big enough to hold RESYNC_PAGES pages, and -the current bio_add_page() doesn't check queue limit any more, -so it won't fail at all. - -remove unused label (shaohua) - -Signed-off-by: Ming Lei <tom.leiming@gmail.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid1.c | 22 ++++++---------------- - drivers/md/raid10.c | 41 ++++++++++------------------------------- - 2 files changed, 16 insertions(+), 47 deletions(-) - -diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c -index 941f810..569f501 100644 ---- a/drivers/md/raid1.c -+++ b/drivers/md/raid1.c -@@ -2894,28 +2894,18 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, - bio = r1_bio->bios[i]; - if (bio->bi_end_io) { - page = bio->bi_io_vec[bio->bi_vcnt].bv_page; -- if (bio_add_page(bio, page, len, 0) == 0) { -- /* stop here */ -- bio->bi_io_vec[bio->bi_vcnt].bv_page = page; -- while (i > 0) { -- i--; -- bio = r1_bio->bios[i]; -- if (bio->bi_end_io==NULL) -- continue; -- /* remove last page from this bio */ -- bio->bi_vcnt--; -- bio->bi_iter.bi_size -= len; -- bio_clear_flag(bio, BIO_SEG_VALID); -- } -- goto bio_full; -- } -+ -+ /* -+ * won't fail because the vec table is big -+ * enough to hold all these pages -+ */ -+ bio_add_page(bio, page, len, 0); - } - } - nr_sectors += len>>9; - sector_nr += len>>9; - sync_blocks -= (len>>9); - } while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES); -- bio_full: - r1_bio->sectors = nr_sectors; - - if (mddev_is_clustered(mddev) && -diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c -index 28c62e0..6c9783b 100644 ---- a/drivers/md/raid10.c -+++ b/drivers/md/raid10.c -@@ -3413,27 +3413,16 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, - if (len == 0) - break; - for (bio= biolist ; bio ; bio=bio->bi_next) { -- struct bio *bio2; - page = bio->bi_io_vec[bio->bi_vcnt].bv_page; -- if (bio_add_page(bio, page, len, 0)) -- continue; -- -- /* stop here */ -- bio->bi_io_vec[bio->bi_vcnt].bv_page = page; -- for (bio2 = biolist; -- bio2 && bio2 != bio; -- bio2 = bio2->bi_next) { -- /* remove last page from this bio */ -- bio2->bi_vcnt--; -- bio2->bi_iter.bi_size -= len; -- bio_clear_flag(bio2, BIO_SEG_VALID); -- } -- goto bio_full; -+ /* -+ * won't fail because the vec table is big enough -+ * to hold all these pages -+ */ -+ bio_add_page(bio, page, len, 0); - } - nr_sectors += len>>9; - sector_nr += len>>9; - } while (biolist->bi_vcnt < RESYNC_PAGES); -- bio_full: - r10_bio->sectors = nr_sectors; - - while (biolist) { -@@ -4502,25 +4491,15 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, - if (len > PAGE_SIZE) - len = PAGE_SIZE; - for (bio = blist; bio ; bio = bio->bi_next) { -- struct bio *bio2; -- if (bio_add_page(bio, page, len, 0)) -- continue; -- -- /* Didn't fit, must stop */ -- for (bio2 = blist; -- bio2 && bio2 != bio; -- bio2 = bio2->bi_next) { -- /* Remove last page from this bio */ -- bio2->bi_vcnt--; -- bio2->bi_iter.bi_size -= len; -- bio_clear_flag(bio2, BIO_SEG_VALID); -- } -- goto bio_full; -+ /* -+ * won't fail because the vec table is big enough -+ * to hold all these pages -+ */ -+ bio_add_page(bio, page, len, 0); - } - sector_nr += len >> 9; - nr_sectors += len >> 9; - } --bio_full: - rcu_read_unlock(); - r10_bio->sectors = nr_sectors; - --- -2.10.2 - diff --git a/patches.drivers/0163-md-move-two-macros-into-md.h.patch b/patches.drivers/0163-md-move-two-macros-into-md.h.patch deleted file mode 100644 index 44d8f9901d..0000000000 --- a/patches.drivers/0163-md-move-two-macros-into-md.h.patch +++ /dev/null @@ -1,58 +0,0 @@ -From: Ming Lei <tom.leiming@gmail.com> -Date: Fri, 17 Mar 2017 00:12:23 +0800 -Subject: [PATCH] md: move two macros into md.h -Git-commit: d8e29fbc3bed181f2653fb89ac8c34e40db39c30 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -Both raid1 and raid10 share common resync -block size and page count, so move them into md.h. - -Signed-off-by: Ming Lei <tom.leiming@gmail.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/md.h | 5 +++++ - drivers/md/raid1.c | 2 -- - drivers/md/raid10.c | 3 --- - 3 files changed, 5 insertions(+), 5 deletions(-) - ---- a/drivers/md/md.h -+++ b/drivers/md/md.h -@@ -723,4 +723,9 @@ static inline void mddev_clear_unsupport - { - mddev->flags &= ~unsupported_flags; - } -+ -+/* Maximum size of each resync request */ -+#define RESYNC_BLOCK_SIZE (64*1024) -+#define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE) -+ - #endif /* _MD_MD_H */ ---- a/drivers/md/raid1.c -+++ b/drivers/md/raid1.c -@@ -91,10 +91,8 @@ static void r1bio_pool_free(void *r1_bio - kfree(r1_bio); - } - --#define RESYNC_BLOCK_SIZE (64*1024) - #define RESYNC_DEPTH 32 - #define RESYNC_SECTORS (RESYNC_BLOCK_SIZE >> 9) --#define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE) - #define RESYNC_WINDOW (RESYNC_BLOCK_SIZE * RESYNC_DEPTH) - #define RESYNC_WINDOW_SECTORS (RESYNC_WINDOW >> 9) - #define CLUSTER_RESYNC_WINDOW (16 * RESYNC_WINDOW) ---- a/drivers/md/raid10.c -+++ b/drivers/md/raid10.c -@@ -125,9 +125,6 @@ static void r10bio_pool_free(void *r10_b - kfree(r10_bio); - } - --/* Maximum size of each resync request */ --#define RESYNC_BLOCK_SIZE (64*1024) --#define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE) - /* amount of memory to reserve for resync requests */ - #define RESYNC_WINDOW (1024*1024) - /* maximum number of concurrent requests, memory permitting */ diff --git a/patches.drivers/0164-md-prepare-for-managing-resync-I-O-pages-in-clean-wa.patch b/patches.drivers/0164-md-prepare-for-managing-resync-I-O-pages-in-clean-wa.patch deleted file mode 100644 index 56918039c9..0000000000 --- a/patches.drivers/0164-md-prepare-for-managing-resync-I-O-pages-in-clean-wa.patch +++ /dev/null @@ -1,85 +0,0 @@ -From: Ming Lei <tom.leiming@gmail.com> -Date: Fri, 17 Mar 2017 00:12:24 +0800 -Subject: [PATCH] md: prepare for managing resync I/O pages in clean way -Git-commit: 513e2faa0138462ce014e1b0e226ca45c83bc6c1 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -Now resync I/O use bio's bec table to manage pages, -this way is very hacky, and may not work any more -once multipage bvec is introduced. - -So introduce helpers and new data structure for -managing resync I/O pages more cleanly. - -Signed-off-by: Ming Lei <tom.leiming@gmail.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/md.h | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 50 insertions(+) - -diff --git a/drivers/md/md.h b/drivers/md/md.h -index 31d2d70..0418b29 100644 ---- a/drivers/md/md.h -+++ b/drivers/md/md.h -@@ -723,4 +723,54 @@ static inline void mddev_check_writesame(struct mddev *mddev, struct bio *bio) - #define RESYNC_BLOCK_SIZE (64*1024) - #define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE) - -+/* for managing resync I/O pages */ -+struct resync_pages { -+ unsigned idx; /* for get/put page from the pool */ -+ void *raid_bio; -+ struct page *pages[RESYNC_PAGES]; -+}; -+ -+static inline int resync_alloc_pages(struct resync_pages *rp, -+ gfp_t gfp_flags) -+{ -+ int i; -+ -+ for (i = 0; i < RESYNC_PAGES; i++) { -+ rp->pages[i] = alloc_page(gfp_flags); -+ if (!rp->pages[i]) -+ goto out_free; -+ } -+ -+ return 0; -+ -+out_free: -+ while (--i >= 0) -+ put_page(rp->pages[i]); -+ return -ENOMEM; -+} -+ -+static inline void resync_free_pages(struct resync_pages *rp) -+{ -+ int i; -+ -+ for (i = 0; i < RESYNC_PAGES; i++) -+ put_page(rp->pages[i]); -+} -+ -+static inline void resync_get_all_pages(struct resync_pages *rp) -+{ -+ int i; -+ -+ for (i = 0; i < RESYNC_PAGES; i++) -+ get_page(rp->pages[i]); -+} -+ -+static inline struct page *resync_fetch_page(struct resync_pages *rp, -+ unsigned idx) -+{ -+ if (WARN_ON_ONCE(idx >= RESYNC_PAGES)) -+ return NULL; -+ return rp->pages[idx]; -+} -+ - #endif /* _MD_MD_H */ --- -2.10.2 - diff --git a/patches.drivers/0165-md-raid1-simplify-r1buf_pool_free.patch b/patches.drivers/0165-md-raid1-simplify-r1buf_pool_free.patch deleted file mode 100644 index a23e29c44b..0000000000 --- a/patches.drivers/0165-md-raid1-simplify-r1buf_pool_free.patch +++ /dev/null @@ -1,61 +0,0 @@ -From: Ming Lei <tom.leiming@gmail.com> -Date: Fri, 17 Mar 2017 00:12:25 +0800 -Subject: [PATCH] md: raid1: simplify r1buf_pool_free() -Git-commit: a7234234d0d6373d0510582ab632efbf73243403 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -This patch gets each page's reference of each bio for resync, -then r1buf_pool_free() gets simplified a lot. - -The same policy has been taken in raid10's buf pool allocation/free -too. - -Signed-off-by: Ming Lei <tom.leiming@gmail.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid1.c | 17 ++++++++--------- - 1 file changed, 8 insertions(+), 9 deletions(-) - -diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c -index c31f9e2..7ee0911 100644 ---- a/drivers/md/raid1.c -+++ b/drivers/md/raid1.c -@@ -142,10 +142,13 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) - } - /* If not user-requests, copy the page pointers to all bios */ - if (!test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery)) { -- for (i=0; i<RESYNC_PAGES ; i++) -- for (j=1; j<pi->raid_disks; j++) -- r1_bio->bios[j]->bi_io_vec[i].bv_page = -+ for (i = 0; i< RESYNC_PAGES; i++) -+ for (j = 1; j < pi->raid_disks; j++) { -+ struct page *page = - r1_bio->bios[0]->bi_io_vec[i].bv_page; -+ get_page(page); -+ r1_bio->bios[j]->bi_io_vec[i].bv_page = page; -+ } - } - - r1_bio->master_bio = NULL; -@@ -170,12 +173,8 @@ static void r1buf_pool_free(void *__r1_bio, void *data) - struct r1bio *r1bio = __r1_bio; - - for (i = 0; i < RESYNC_PAGES; i++) -- for (j = pi->raid_disks; j-- ;) { -- if (j == 0 || -- r1bio->bios[j]->bi_io_vec[i].bv_page != -- r1bio->bios[0]->bi_io_vec[i].bv_page) -- safe_put_page(r1bio->bios[j]->bi_io_vec[i].bv_page); -- } -+ for (j = pi->raid_disks; j-- ;) -+ safe_put_page(r1bio->bios[j]->bi_io_vec[i].bv_page); - for (i=0 ; i < pi->raid_disks; i++) - bio_put(r1bio->bios[i]); - --- -2.10.2 - diff --git a/patches.drivers/0166-md-raid1-don-t-use-bio-s-vec-table-to-manage-resync-.patch b/patches.drivers/0166-md-raid1-don-t-use-bio-s-vec-table-to-manage-resync-.patch deleted file mode 100644 index 8380c11dd0..0000000000 --- a/patches.drivers/0166-md-raid1-don-t-use-bio-s-vec-table-to-manage-resync-.patch +++ /dev/null @@ -1,236 +0,0 @@ -From: Ming Lei <tom.leiming@gmail.com> -Date: Fri, 17 Mar 2017 00:12:26 +0800 -Subject: [PATCH] md: raid1: don't use bio's vec table to manage resync pages -Git-commit: 98d30c5812c343c970b5997369b4f6b197c29b3d -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -Now we allocate one page array for managing resync pages, instead -of using bio's vec table to do that, and the old way is very hacky -and won't work any more if multipage bvec is enabled. - -The introduced cost is that we need to allocate (128 + 16) * raid_disks -bytes per r1_bio, and it is fine because the inflight r1_bio for -resync shouldn't be much, as pointed by Shaohua. - -Also the bio_reset() in raid1_sync_request() is removed because -all bios are freshly new now and not necessary to reset any more. - -This patch can be thought as a cleanup too - -Suggested-by: Shaohua Li <shli@kernel.org> -Signed-off-by: Ming Lei <tom.leiming@gmail.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid1.c | 93 +++++++++++++++++++++++++++++++++++++----------------- - 1 file changed, 64 insertions(+), 29 deletions(-) - -diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c -index 7ee0911..89a384b 100644 ---- a/drivers/md/raid1.c -+++ b/drivers/md/raid1.c -@@ -81,6 +81,24 @@ static void lower_barrier(struct r1conf *conf, sector_t sector_nr); - #define raid1_log(md, fmt, args...) \ - do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid1 " fmt, ##args); } while (0) - -+/* -+ * 'strct resync_pages' stores actual pages used for doing the resync -+ * IO, and it is per-bio, so make .bi_private points to it. -+ */ -+static inline struct resync_pages *get_resync_pages(struct bio *bio) -+{ -+ return bio->bi_private; -+} -+ -+/* -+ * for resync bio, r1bio pointer can be retrieved from the per-bio -+ * 'struct resync_pages'. -+ */ -+static inline struct r1bio *get_resync_r1bio(struct bio *bio) -+{ -+ return get_resync_pages(bio)->raid_bio; -+} -+ - static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data) - { - struct pool_info *pi = data; -@@ -108,12 +126,18 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) - struct r1bio *r1_bio; - struct bio *bio; - int need_pages; -- int i, j; -+ int j; -+ struct resync_pages *rps; - - r1_bio = r1bio_pool_alloc(gfp_flags, pi); - if (!r1_bio) - return NULL; - -+ rps = kmalloc(sizeof(struct resync_pages) * pi->raid_disks, -+ gfp_flags); -+ if (!rps) -+ goto out_free_r1bio; -+ - /* - * Allocate bios : 1 for reading, n-1 for writing - */ -@@ -133,22 +157,22 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) - need_pages = pi->raid_disks; - else - need_pages = 1; -- for (j = 0; j < need_pages; j++) { -+ for (j = 0; j < pi->raid_disks; j++) { -+ struct resync_pages *rp = &rps[j]; -+ - bio = r1_bio->bios[j]; -- bio->bi_vcnt = RESYNC_PAGES; -- -- if (bio_alloc_pages(bio, gfp_flags)) -- goto out_free_pages; -- } -- /* If not user-requests, copy the page pointers to all bios */ -- if (!test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery)) { -- for (i = 0; i< RESYNC_PAGES; i++) -- for (j = 1; j < pi->raid_disks; j++) { -- struct page *page = -- r1_bio->bios[0]->bi_io_vec[i].bv_page; -- get_page(page); -- r1_bio->bios[j]->bi_io_vec[i].bv_page = page; -- } -+ -+ if (j < need_pages) { -+ if (resync_alloc_pages(rp, gfp_flags)) -+ goto out_free_pages; -+ } else { -+ memcpy(rp, &rps[0], sizeof(*rp)); -+ resync_get_all_pages(rp); -+ } -+ -+ rp->idx = 0; -+ rp->raid_bio = r1_bio; -+ bio->bi_private = rp; - } - - r1_bio->master_bio = NULL; -@@ -157,11 +181,14 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) - - out_free_pages: - while (--j >= 0) -- bio_free_pages(r1_bio->bios[j]); -+ resync_free_pages(&rps[j]); - - out_free_bio: - while (++j < pi->raid_disks) - bio_put(r1_bio->bios[j]); -+ kfree(rps); -+ -+out_free_r1bio: - r1bio_pool_free(r1_bio, data); - return NULL; - } -@@ -169,14 +196,18 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) - static void r1buf_pool_free(void *__r1_bio, void *data) - { - struct pool_info *pi = data; -- int i,j; -+ int i; - struct r1bio *r1bio = __r1_bio; -+ struct resync_pages *rp = NULL; - -- for (i = 0; i < RESYNC_PAGES; i++) -- for (j = pi->raid_disks; j-- ;) -- safe_put_page(r1bio->bios[j]->bi_io_vec[i].bv_page); -- for (i=0 ; i < pi->raid_disks; i++) -+ for (i = pi->raid_disks; i--; ) { -+ rp = get_resync_pages(r1bio->bios[i]); -+ resync_free_pages(rp); - bio_put(r1bio->bios[i]); -+ } -+ -+ /* resync pages array stored in the 1st bio's .bi_private */ -+ kfree(rp); - - r1bio_pool_free(r1bio, data); - } -@@ -1844,7 +1875,7 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev) - - static void end_sync_read(struct bio *bio) - { -- struct r1bio *r1_bio = bio->bi_private; -+ struct r1bio *r1_bio = get_resync_r1bio(bio); - - update_head_pos(r1_bio->read_disk, r1_bio); - -@@ -1863,7 +1894,7 @@ static void end_sync_read(struct bio *bio) - static void end_sync_write(struct bio *bio) - { - int uptodate = !bio->bi_error; -- struct r1bio *r1_bio = bio->bi_private; -+ struct r1bio *r1_bio = get_resync_r1bio(bio); - struct mddev *mddev = r1_bio->mddev; - struct r1conf *conf = mddev->private; - sector_t first_bad; -@@ -2080,6 +2111,7 @@ static void process_checks(struct r1bio *r1_bio) - int size; - int error; - struct bio *b = r1_bio->bios[i]; -+ struct resync_pages *rp = get_resync_pages(b); - if (b->bi_end_io != end_sync_read) - continue; - /* fixup the bio for reuse, but preserve errno */ -@@ -2092,7 +2124,8 @@ static void process_checks(struct r1bio *r1_bio) - conf->mirrors[i].rdev->data_offset; - b->bi_bdev = conf->mirrors[i].rdev->bdev; - b->bi_end_io = end_sync_read; -- b->bi_private = r1_bio; -+ rp->raid_bio = r1_bio; -+ b->bi_private = rp; - - size = b->bi_iter.bi_size; - for (j = 0; j < vcnt ; j++) { -@@ -2746,7 +2779,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, - for (i = 0; i < conf->raid_disks * 2; i++) { - struct md_rdev *rdev; - bio = r1_bio->bios[i]; -- bio_reset(bio); - - rdev = rcu_dereference(conf->mirrors[i].rdev); - if (rdev == NULL || -@@ -2802,7 +2834,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, - atomic_inc(&rdev->nr_pending); - bio->bi_iter.bi_sector = sector_nr + rdev->data_offset; - bio->bi_bdev = rdev->bdev; -- bio->bi_private = r1_bio; - if (test_bit(FailFast, &rdev->flags)) - bio->bi_opf |= MD_FAILFAST; - } -@@ -2888,9 +2919,12 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, - } - - for (i = 0 ; i < conf->raid_disks * 2; i++) { -+ struct resync_pages *rp; -+ - bio = r1_bio->bios[i]; -+ rp = get_resync_pages(bio); - if (bio->bi_end_io) { -- page = bio->bi_io_vec[bio->bi_vcnt].bv_page; -+ page = resync_fetch_page(rp, rp->idx++); - - /* - * won't fail because the vec table is big -@@ -2902,7 +2936,8 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, - nr_sectors += len>>9; - sector_nr += len>>9; - sync_blocks -= (len>>9); -- } while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES); -+ } while (get_resync_pages(r1_bio->bios[disk]->bi_private)->idx < RESYNC_PAGES); -+ - r1_bio->sectors = nr_sectors; - - if (mddev_is_clustered(mddev) && --- -2.10.2 - diff --git a/patches.drivers/0167-md-raid1-retrieve-page-from-pre-allocated-resync-pag.patch b/patches.drivers/0167-md-raid1-retrieve-page-from-pre-allocated-resync-pag.patch deleted file mode 100644 index 250b2365e9..0000000000 --- a/patches.drivers/0167-md-raid1-retrieve-page-from-pre-allocated-resync-pag.patch +++ /dev/null @@ -1,84 +0,0 @@ -From: Ming Lei <tom.leiming@gmail.com> -Date: Fri, 17 Mar 2017 00:12:27 +0800 -Subject: [PATCH] md: raid1: retrieve page from pre-allocated resync page array -Git-commit: 44cf0f4dc76b5e44e6a9c727be6902434f99a9bd -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -Now one page array is allocated for each resync bio, and we can -retrieve page from this table directly. - -Signed-off-by: Ming Lei <tom.leiming@gmail.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid1.c | 16 ++++++++-------- - 1 file changed, 8 insertions(+), 8 deletions(-) - -diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c -index 89a384b..21ef09a 100644 ---- a/drivers/md/raid1.c -+++ b/drivers/md/raid1.c -@@ -1973,6 +1973,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio) - struct mddev *mddev = r1_bio->mddev; - struct r1conf *conf = mddev->private; - struct bio *bio = r1_bio->bios[r1_bio->read_disk]; -+ struct page **pages = get_resync_pages(bio)->pages; - sector_t sect = r1_bio->sector; - int sectors = r1_bio->sectors; - int idx = 0; -@@ -2006,7 +2007,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio) - */ - rdev = conf->mirrors[d].rdev; - if (sync_page_io(rdev, sect, s<<9, -- bio->bi_io_vec[idx].bv_page, -+ pages[idx], - REQ_OP_READ, 0, false)) { - success = 1; - break; -@@ -2061,7 +2062,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio) - continue; - rdev = conf->mirrors[d].rdev; - if (r1_sync_page_io(rdev, sect, s, -- bio->bi_io_vec[idx].bv_page, -+ pages[idx], - WRITE) == 0) { - r1_bio->bios[d]->bi_end_io = NULL; - rdev_dec_pending(rdev, mddev); -@@ -2076,7 +2077,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio) - continue; - rdev = conf->mirrors[d].rdev; - if (r1_sync_page_io(rdev, sect, s, -- bio->bi_io_vec[idx].bv_page, -+ pages[idx], - READ) != 0) - atomic_add(s, &rdev->corrected_errors); - } -@@ -2152,6 +2153,8 @@ static void process_checks(struct r1bio *r1_bio) - struct bio *pbio = r1_bio->bios[primary]; - struct bio *sbio = r1_bio->bios[i]; - int error = sbio->bi_error; -+ struct page **ppages = get_resync_pages(pbio)->pages; -+ struct page **spages = get_resync_pages(sbio)->pages; - - if (sbio->bi_end_io != end_sync_read) - continue; -@@ -2160,11 +2163,8 @@ static void process_checks(struct r1bio *r1_bio) - - if (!error) { - for (j = vcnt; j-- ; ) { -- struct page *p, *s; -- p = pbio->bi_io_vec[j].bv_page; -- s = sbio->bi_io_vec[j].bv_page; -- if (memcmp(page_address(p), -- page_address(s), -+ if (memcmp(page_address(ppages[j]), -+ page_address(spages[j]), - sbio->bi_io_vec[j].bv_len)) - break; - } --- -2.10.2 - diff --git a/patches.drivers/0168-md-raid1-use-bio-helper-in-process_checks.patch b/patches.drivers/0168-md-raid1-use-bio-helper-in-process_checks.patch deleted file mode 100644 index a87712105f..0000000000 --- a/patches.drivers/0168-md-raid1-use-bio-helper-in-process_checks.patch +++ /dev/null @@ -1,68 +0,0 @@ -From: Ming Lei <tom.leiming@gmail.com> -Date: Fri, 17 Mar 2017 00:12:28 +0800 -Subject: [PATCH] md: raid1: use bio helper in process_checks() -Git-commit: 60928a91b0b3beca4a1cf2739118f967c783f79a -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -Avoid to direct access to bvec table. - -Signed-off-by: Ming Lei <tom.leiming@gmail.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid1.c | 12 ++++++++---- - 1 file changed, 8 insertions(+), 4 deletions(-) - -diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c -index 21ef09a..d27b846 100644 ---- a/drivers/md/raid1.c -+++ b/drivers/md/raid1.c -@@ -2111,6 +2111,7 @@ static void process_checks(struct r1bio *r1_bio) - int j; - int size; - int error; -+ struct bio_vec *bi; - struct bio *b = r1_bio->bios[i]; - struct resync_pages *rp = get_resync_pages(b); - if (b->bi_end_io != end_sync_read) -@@ -2129,9 +2130,7 @@ static void process_checks(struct r1bio *r1_bio) - b->bi_private = rp; - - size = b->bi_iter.bi_size; -- for (j = 0; j < vcnt ; j++) { -- struct bio_vec *bi; -- bi = &b->bi_io_vec[j]; -+ bio_for_each_segment_all(bi, b, j) { - bi->bv_offset = 0; - if (size > PAGE_SIZE) - bi->bv_len = PAGE_SIZE; -@@ -2155,17 +2154,22 @@ static void process_checks(struct r1bio *r1_bio) - int error = sbio->bi_error; - struct page **ppages = get_resync_pages(pbio)->pages; - struct page **spages = get_resync_pages(sbio)->pages; -+ struct bio_vec *bi; -+ int page_len[RESYNC_PAGES]; - - if (sbio->bi_end_io != end_sync_read) - continue; - /* Now we can 'fixup' the error value */ - sbio->bi_error = 0; - -+ bio_for_each_segment_all(bi, sbio, j) -+ page_len[j] = bi->bv_len; -+ - if (!error) { - for (j = vcnt; j-- ; ) { - if (memcmp(page_address(ppages[j]), - page_address(spages[j]), -- sbio->bi_io_vec[j].bv_len)) -+ page_len[j])) - break; - } - } else --- -2.10.2 - diff --git a/patches.drivers/0169-md-raid1-move-offset-out-of-loop.patch b/patches.drivers/0169-md-raid1-move-offset-out-of-loop.patch deleted file mode 100644 index 959c0d2f68..0000000000 --- a/patches.drivers/0169-md-raid1-move-offset-out-of-loop.patch +++ /dev/null @@ -1,50 +0,0 @@ -From: Ming Lei <tom.leiming@gmail.com> -Date: Fri, 17 Mar 2017 00:12:30 +0800 -Subject: [PATCH] md: raid1: move 'offset' out of loop -Git-commit: d8c84c4f8becc1fb993911e18c8aef5ecf7a72ac -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -The 'offset' local variable can't be changed inside the loop, so -move it out. - -Signed-off-by: Ming Lei <tom.leiming@gmail.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid1.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c -index d27b846..64bf200 100644 ---- a/drivers/md/raid1.c -+++ b/drivers/md/raid1.c -@@ -1294,6 +1294,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio) - int first_clone; - int sectors_handled; - int max_sectors; -+ sector_t offset; - - /* - * Register the new request and wait if the reconstruction -@@ -1439,13 +1440,13 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio) - atomic_set(&r1_bio->behind_remaining, 0); - - first_clone = 1; -+ -+ offset = r1_bio->sector - bio->bi_iter.bi_sector; - for (i = 0; i < disks; i++) { - struct bio *mbio = NULL; -- sector_t offset; - if (!r1_bio->bios[i]) - continue; - -- offset = r1_bio->sector - bio->bi_iter.bi_sector; - - if (first_clone) { - /* do behind I/O ? --- -2.10.2 - diff --git a/patches.drivers/0170-md-raid1-improve-write-behind.patch b/patches.drivers/0170-md-raid1-improve-write-behind.patch deleted file mode 100644 index 6e317cf289..0000000000 --- a/patches.drivers/0170-md-raid1-improve-write-behind.patch +++ /dev/null @@ -1,225 +0,0 @@ -From: Ming Lei <tom.leiming@gmail.com> -Date: Fri, 17 Mar 2017 00:12:31 +0800 -Subject: [PATCH] md: raid1: improve write behind -Git-commit: 841c1316c7da6199a7df473893c141943991a756 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -This patch improve handling of write behind in the following ways: - -- introduce behind master bio to hold all write behind pages -- fast clone bios from behind master bio -- avoid to change bvec table directly -- use bio_copy_data() and make code more clean - -Suggested-by: Shaohua Li <shli@fb.com> -Signed-off-by: Ming Lei <tom.leiming@gmail.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid1.c | 118 ++++++++++++++++++++++++----------------------------- - drivers/md/raid1.h | 10 +++-- - 2 files changed, 61 insertions(+), 67 deletions(-) - -diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c -index 64bf200..c6a671f 100644 ---- a/drivers/md/raid1.c -+++ b/drivers/md/raid1.c -@@ -388,12 +388,9 @@ static void close_write(struct r1bio *r1_bio) - { - /* it really is the end of this request */ - if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { -- /* free extra copy of the data pages */ -- int i = r1_bio->behind_page_count; -- while (i--) -- safe_put_page(r1_bio->behind_bvecs[i].bv_page); -- kfree(r1_bio->behind_bvecs); -- r1_bio->behind_bvecs = NULL; -+ bio_free_pages(r1_bio->behind_master_bio); -+ bio_put(r1_bio->behind_master_bio); -+ r1_bio->behind_master_bio = NULL; - } - /* clear the bitmap if all writes complete successfully */ - bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, -@@ -495,6 +492,10 @@ static void raid1_end_write_request(struct bio *bio) - } - - if (behind) { -+ /* we release behind master bio when all write are done */ -+ if (r1_bio->behind_master_bio == bio) -+ to_put = NULL; -+ - if (test_bit(WriteMostly, &rdev->flags)) - atomic_dec(&r1_bio->behind_remaining); - -@@ -1089,39 +1090,46 @@ static void unfreeze_array(struct r1conf *conf) - wake_up(&conf->wait_barrier); - } - --/* duplicate the data pages for behind I/O -- */ --static void alloc_behind_pages(struct bio *bio, struct r1bio *r1_bio) -+static struct bio *alloc_behind_master_bio(struct r1bio *r1_bio, -+ struct bio *bio, -+ int offset, int size) - { -- int i; -- struct bio_vec *bvec; -- struct bio_vec *bvecs = kzalloc(bio->bi_vcnt * sizeof(struct bio_vec), -- GFP_NOIO); -- if (unlikely(!bvecs)) -- return; -+ unsigned vcnt = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; -+ int i = 0; -+ struct bio *behind_bio = NULL; -+ -+ behind_bio = bio_alloc_mddev(GFP_NOIO, vcnt, r1_bio->mddev); -+ if (!behind_bio) -+ goto fail; -+ -+ while (i < vcnt && size) { -+ struct page *page; -+ int len = min_t(int, PAGE_SIZE, size); -+ -+ page = alloc_page(GFP_NOIO); -+ if (unlikely(!page)) -+ goto free_pages; -+ -+ bio_add_page(behind_bio, page, len, 0); -+ -+ size -= len; -+ i++; -+ } - -- bio_for_each_segment_all(bvec, bio, i) { -- bvecs[i] = *bvec; -- bvecs[i].bv_page = alloc_page(GFP_NOIO); -- if (unlikely(!bvecs[i].bv_page)) -- goto do_sync_io; -- memcpy(kmap(bvecs[i].bv_page) + bvec->bv_offset, -- kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len); -- kunmap(bvecs[i].bv_page); -- kunmap(bvec->bv_page); -- } -- r1_bio->behind_bvecs = bvecs; -- r1_bio->behind_page_count = bio->bi_vcnt; -+ bio_copy_data_partial(behind_bio, bio, offset, -+ behind_bio->bi_iter.bi_size); -+ -+ r1_bio->behind_master_bio = behind_bio;; - set_bit(R1BIO_BehindIO, &r1_bio->state); -- return; - --do_sync_io: -- for (i = 0; i < bio->bi_vcnt; i++) -- if (bvecs[i].bv_page) -- put_page(bvecs[i].bv_page); -- kfree(bvecs); -+ return behind_bio; -+ -+free_pages: - pr_debug("%dB behind alloc failed, doing sync I/O\n", - bio->bi_iter.bi_size); -+ bio_free_pages(behind_bio); -+fail: -+ return behind_bio; - } - - struct raid1_plug_cb { -@@ -1457,11 +1465,9 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio) - (atomic_read(&bitmap->behind_writes) - < mddev->bitmap_info.max_write_behind) && - !waitqueue_active(&bitmap->behind_wait)) { -- mbio = bio_clone_bioset_partial(bio, GFP_NOIO, -- mddev->bio_set, -- offset << 9, -- max_sectors << 9); -- alloc_behind_pages(mbio, r1_bio); -+ mbio = alloc_behind_master_bio(r1_bio, bio, -+ offset << 9, -+ max_sectors << 9); - } - - bitmap_startwrite(bitmap, r1_bio->sector, -@@ -1472,26 +1478,17 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio) - } - - if (!mbio) { -- if (r1_bio->behind_bvecs) -- mbio = bio_clone_bioset_partial(bio, GFP_NOIO, -- mddev->bio_set, -- offset << 9, -- max_sectors << 9); -+ if (r1_bio->behind_master_bio) -+ mbio = bio_clone_fast(r1_bio->behind_master_bio, -+ GFP_NOIO, -+ mddev->bio_set); - else { - mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); - bio_trim(mbio, offset, max_sectors); - } - } - -- if (r1_bio->behind_bvecs) { -- struct bio_vec *bvec; -- int j; -- -- /* -- * We trimmed the bio, so _all is legit -- */ -- bio_for_each_segment_all(bvec, mbio, j) -- bvec->bv_page = r1_bio->behind_bvecs[j].bv_page; -+ if (r1_bio->behind_master_bio) { - if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags)) - atomic_inc(&r1_bio->behind_remaining); - } -@@ -2386,18 +2383,11 @@ static int narrow_write_error(struct r1bio *r1_bio, int i) - /* Write at 'sector' for 'sectors'*/ - - if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { -- unsigned vcnt = r1_bio->behind_page_count; -- struct bio_vec *vec = r1_bio->behind_bvecs; -- -- while (!vec->bv_page) { -- vec++; -- vcnt--; -- } -- -- wbio = bio_alloc_mddev(GFP_NOIO, vcnt, mddev); -- memcpy(wbio->bi_io_vec, vec, vcnt * sizeof(struct bio_vec)); -- -- wbio->bi_vcnt = vcnt; -+ wbio = bio_clone_fast(r1_bio->behind_master_bio, -+ GFP_NOIO, -+ mddev->bio_set); -+ /* We really need a _all clone */ -+ wbio->bi_iter = (struct bvec_iter){ 0 }; - } else { - wbio = bio_clone_fast(r1_bio->master_bio, GFP_NOIO, - mddev->bio_set); -diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h -index dd22a37..4271cd7 100644 ---- a/drivers/md/raid1.h -+++ b/drivers/md/raid1.h -@@ -153,9 +153,13 @@ struct r1bio { - int read_disk; - - struct list_head retry_list; -- /* Next two are only valid when R1BIO_BehindIO is set */ -- struct bio_vec *behind_bvecs; -- int behind_page_count; -+ -+ /* -+ * When R1BIO_BehindIO is set, we store pages for write behind -+ * in behind_master_bio. -+ */ -+ struct bio *behind_master_bio; -+ - /* - * if the IO is in WRITE direction, then multiple bios are used. - * We choose the number when they are allocated. --- -2.10.2 - diff --git a/patches.drivers/0171-md-raid10-refactor-code-of-read-reshape-s-.bi_end_io.patch b/patches.drivers/0171-md-raid10-refactor-code-of-read-reshape-s-.bi_end_io.patch deleted file mode 100644 index 5b9ca4f6dd..0000000000 --- a/patches.drivers/0171-md-raid10-refactor-code-of-read-reshape-s-.bi_end_io.patch +++ /dev/null @@ -1,81 +0,0 @@ -From: Ming Lei <tom.leiming@gmail.com> -Date: Fri, 17 Mar 2017 00:12:32 +0800 -Subject: [PATCH] md: raid10: refactor code of read reshape's .bi_end_io -Git-commit: 81fa152008ac903877b59bcc7d16777c3292c206 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -reshape read request is a bit special and requires one extra -bio which isn't allocated from r10buf_pool. - -Refactor the .bi_end_io for read reshape, so that we can use -raid10's resync page mangement approach easily in the following -patches. - -Signed-off-by: Ming Lei <tom.leiming@gmail.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid10.c | 28 ++++++++++++++++++---------- - 1 file changed, 18 insertions(+), 10 deletions(-) - -diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c -index 301e73f..94f5c36 100644 ---- a/drivers/md/raid10.c -+++ b/drivers/md/raid10.c -@@ -1889,17 +1889,9 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev) - return err; - } - --static void end_sync_read(struct bio *bio) -+static void __end_sync_read(struct r10bio *r10_bio, struct bio *bio, int d) - { -- struct r10bio *r10_bio = bio->bi_private; - struct r10conf *conf = r10_bio->mddev->private; -- int d; -- -- if (bio == r10_bio->master_bio) { -- /* this is a reshape read */ -- d = r10_bio->read_slot; /* really the read dev */ -- } else -- d = find_bio_disk(conf, r10_bio, bio, NULL, NULL); - - if (!bio->bi_error) - set_bit(R10BIO_Uptodate, &r10_bio->state); -@@ -1923,6 +1915,22 @@ static void end_sync_read(struct bio *bio) - } - } - -+static void end_sync_read(struct bio *bio) -+{ -+ struct r10bio *r10_bio = bio->bi_private; -+ struct r10conf *conf = r10_bio->mddev->private; -+ int d = find_bio_disk(conf, r10_bio, bio, NULL, NULL); -+ -+ __end_sync_read(r10_bio, bio, d); -+} -+ -+static void end_reshape_read(struct bio *bio) -+{ -+ struct r10bio *r10_bio = bio->bi_private; -+ -+ __end_sync_read(r10_bio, bio, r10_bio->read_slot); -+} -+ - static void end_sync_request(struct r10bio *r10_bio) - { - struct mddev *mddev = r10_bio->mddev; -@@ -4438,7 +4446,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, - read_bio->bi_iter.bi_sector = (r10_bio->devs[r10_bio->read_slot].addr - + rdev->data_offset); - read_bio->bi_private = r10_bio; -- read_bio->bi_end_io = end_sync_read; -+ read_bio->bi_end_io = end_reshape_read; - bio_set_op_attrs(read_bio, REQ_OP_READ, 0); - read_bio->bi_flags &= (~0UL << BIO_RESET_BITS); - read_bio->bi_error = 0; --- -2.10.2 - diff --git a/patches.drivers/0172-md-raid10-don-t-use-bio-s-vec-table-to-manage-resync.patch b/patches.drivers/0172-md-raid10-don-t-use-bio-s-vec-table-to-manage-resync.patch deleted file mode 100644 index 888d3dea2f..0000000000 --- a/patches.drivers/0172-md-raid10-don-t-use-bio-s-vec-table-to-manage-resync.patch +++ /dev/null @@ -1,379 +0,0 @@ -From: Ming Lei <tom.leiming@gmail.com> -Date: Fri, 17 Mar 2017 00:12:33 +0800 -Subject: [PATCH] md: raid10: don't use bio's vec table to manage resync pages -Git-commit: f0250618361db1447d66c494c6dd2df815f42c87 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -Now we allocate one page array for managing resync pages, instead -of using bio's vec table to do that, and the old way is very hacky -and won't work any more if multipage bvec is enabled. - -The introduced cost is that we need to allocate (128 + 16) * copies -bytes per r10_bio, and it is fine because the inflight r10_bio for -resync shouldn't be much, as pointed by Shaohua. - -Also bio_reset() in raid10_sync_request() and reshape_request() -are removed because all bios are freshly new now in these functions -and not necessary to reset any more. - -This patch can be thought as cleanup too. - -Suggested-by: Shaohua Li <shli@kernel.org> -Signed-off-by: Ming Lei <tom.leiming@gmail.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid10.c | 134 ++++++++++++++++++++++++++++++++-------------------- - 1 file changed, 82 insertions(+), 52 deletions(-) - -diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c -index 94f5c36..629ae45 100644 ---- a/drivers/md/raid10.c -+++ b/drivers/md/raid10.c -@@ -110,6 +110,24 @@ static void end_reshape(struct r10conf *conf); - #define raid10_log(md, fmt, args...) \ - do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid10 " fmt, ##args); } while (0) - -+/* -+ * 'strct resync_pages' stores actual pages used for doing the resync -+ * IO, and it is per-bio, so make .bi_private points to it. -+ */ -+static inline struct resync_pages *get_resync_pages(struct bio *bio) -+{ -+ return bio->bi_private; -+} -+ -+/* -+ * for resync bio, r10bio pointer can be retrieved from the per-bio -+ * 'struct resync_pages'. -+ */ -+static inline struct r10bio *get_resync_r10bio(struct bio *bio) -+{ -+ return get_resync_pages(bio)->raid_bio; -+} -+ - static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data) - { - struct r10conf *conf = data; -@@ -140,11 +158,11 @@ static void r10bio_pool_free(void *r10_bio, void *data) - static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data) - { - struct r10conf *conf = data; -- struct page *page; - struct r10bio *r10_bio; - struct bio *bio; -- int i, j; -- int nalloc; -+ int j; -+ int nalloc, nalloc_rp; -+ struct resync_pages *rps; - - r10_bio = r10bio_pool_alloc(gfp_flags, conf); - if (!r10_bio) -@@ -156,6 +174,15 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data) - else - nalloc = 2; /* recovery */ - -+ /* allocate once for all bios */ -+ if (!conf->have_replacement) -+ nalloc_rp = nalloc; -+ else -+ nalloc_rp = nalloc * 2; -+ rps = kmalloc(sizeof(struct resync_pages) * nalloc_rp, gfp_flags); -+ if (!rps) -+ goto out_free_r10bio; -+ - /* - * Allocate bios. - */ -@@ -175,36 +202,40 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data) - * Allocate RESYNC_PAGES data pages and attach them - * where needed. - */ -- for (j = 0 ; j < nalloc; j++) { -+ for (j = 0; j < nalloc; j++) { - struct bio *rbio = r10_bio->devs[j].repl_bio; -+ struct resync_pages *rp, *rp_repl; -+ -+ rp = &rps[j]; -+ if (rbio) -+ rp_repl = &rps[nalloc + j]; -+ - bio = r10_bio->devs[j].bio; -- for (i = 0; i < RESYNC_PAGES; i++) { -- if (j > 0 && !test_bit(MD_RECOVERY_SYNC, -- &conf->mddev->recovery)) { -- /* we can share bv_page's during recovery -- * and reshape */ -- struct bio *rbio = r10_bio->devs[0].bio; -- page = rbio->bi_io_vec[i].bv_page; -- get_page(page); -- } else -- page = alloc_page(gfp_flags); -- if (unlikely(!page)) -+ -+ if (!j || test_bit(MD_RECOVERY_SYNC, -+ &conf->mddev->recovery)) { -+ if (resync_alloc_pages(rp, gfp_flags)) - goto out_free_pages; -+ } else { -+ memcpy(rp, &rps[0], sizeof(*rp)); -+ resync_get_all_pages(rp); -+ } - -- bio->bi_io_vec[i].bv_page = page; -- if (rbio) -- rbio->bi_io_vec[i].bv_page = page; -+ rp->idx = 0; -+ rp->raid_bio = r10_bio; -+ bio->bi_private = rp; -+ if (rbio) { -+ memcpy(rp_repl, rp, sizeof(*rp)); -+ rbio->bi_private = rp_repl; - } - } - - return r10_bio; - - out_free_pages: -- for ( ; i > 0 ; i--) -- safe_put_page(bio->bi_io_vec[i-1].bv_page); -- while (j--) -- for (i = 0; i < RESYNC_PAGES ; i++) -- safe_put_page(r10_bio->devs[j].bio->bi_io_vec[i].bv_page); -+ while (--j >= 0) -+ resync_free_pages(&rps[j * 2]); -+ - j = 0; - out_free_bio: - for ( ; j < nalloc; j++) { -@@ -213,30 +244,34 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data) - if (r10_bio->devs[j].repl_bio) - bio_put(r10_bio->devs[j].repl_bio); - } -+ kfree(rps); -+out_free_r10bio: - r10bio_pool_free(r10_bio, conf); - return NULL; - } - - static void r10buf_pool_free(void *__r10_bio, void *data) - { -- int i; - struct r10conf *conf = data; - struct r10bio *r10bio = __r10_bio; - int j; -+ struct resync_pages *rp = NULL; - -- for (j=0; j < conf->copies; j++) { -+ for (j = conf->copies; j--; ) { - struct bio *bio = r10bio->devs[j].bio; -- if (bio) { -- for (i = 0; i < RESYNC_PAGES; i++) { -- safe_put_page(bio->bi_io_vec[i].bv_page); -- bio->bi_io_vec[i].bv_page = NULL; -- } -- bio_put(bio); -- } -+ -+ rp = get_resync_pages(bio); -+ resync_free_pages(rp); -+ bio_put(bio); -+ - bio = r10bio->devs[j].repl_bio; - if (bio) - bio_put(bio); - } -+ -+ /* resync pages array stored in the 1st bio's .bi_private */ -+ kfree(rp); -+ - r10bio_pool_free(r10bio, conf); - } - -@@ -1917,7 +1952,7 @@ static void __end_sync_read(struct r10bio *r10_bio, struct bio *bio, int d) - - static void end_sync_read(struct bio *bio) - { -- struct r10bio *r10_bio = bio->bi_private; -+ struct r10bio *r10_bio = get_resync_r10bio(bio); - struct r10conf *conf = r10_bio->mddev->private; - int d = find_bio_disk(conf, r10_bio, bio, NULL, NULL); - -@@ -1926,6 +1961,7 @@ static void end_sync_read(struct bio *bio) - - static void end_reshape_read(struct bio *bio) - { -+ /* reshape read bio isn't allocated from r10buf_pool */ - struct r10bio *r10_bio = bio->bi_private; - - __end_sync_read(r10_bio, bio, r10_bio->read_slot); -@@ -1960,7 +1996,7 @@ static void end_sync_request(struct r10bio *r10_bio) - - static void end_sync_write(struct bio *bio) - { -- struct r10bio *r10_bio = bio->bi_private; -+ struct r10bio *r10_bio = get_resync_r10bio(bio); - struct mddev *mddev = r10_bio->mddev; - struct r10conf *conf = mddev->private; - int d; -@@ -2040,6 +2076,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) - for (i=0 ; i < conf->copies ; i++) { - int j, d; - struct md_rdev *rdev; -+ struct resync_pages *rp; - - tbio = r10_bio->devs[i].bio; - -@@ -2081,11 +2118,13 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) - * First we need to fixup bv_offset, bv_len and - * bi_vecs, as the read request might have corrupted these - */ -+ rp = get_resync_pages(tbio); - bio_reset(tbio); - - tbio->bi_vcnt = vcnt; - tbio->bi_iter.bi_size = fbio->bi_iter.bi_size; -- tbio->bi_private = r10_bio; -+ rp->raid_bio = r10_bio; -+ tbio->bi_private = rp; - tbio->bi_iter.bi_sector = r10_bio->devs[i].addr; - tbio->bi_end_io = end_sync_write; - bio_set_op_attrs(tbio, REQ_OP_WRITE, 0); -@@ -3149,10 +3188,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, - } - } - bio = r10_bio->devs[0].bio; -- bio_reset(bio); - bio->bi_next = biolist; - biolist = bio; -- bio->bi_private = r10_bio; - bio->bi_end_io = end_sync_read; - bio_set_op_attrs(bio, REQ_OP_READ, 0); - if (test_bit(FailFast, &rdev->flags)) -@@ -3176,10 +3213,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, - - if (!test_bit(In_sync, &mrdev->flags)) { - bio = r10_bio->devs[1].bio; -- bio_reset(bio); - bio->bi_next = biolist; - biolist = bio; -- bio->bi_private = r10_bio; - bio->bi_end_io = end_sync_write; - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - bio->bi_iter.bi_sector = to_addr -@@ -3204,10 +3239,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, - if (mreplace == NULL || bio == NULL || - test_bit(Faulty, &mreplace->flags)) - break; -- bio_reset(bio); - bio->bi_next = biolist; - biolist = bio; -- bio->bi_private = r10_bio; - bio->bi_end_io = end_sync_write; - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - bio->bi_iter.bi_sector = to_addr + -@@ -3329,7 +3362,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, - r10_bio->devs[i].repl_bio->bi_end_io = NULL; - - bio = r10_bio->devs[i].bio; -- bio_reset(bio); - bio->bi_error = -EIO; - rcu_read_lock(); - rdev = rcu_dereference(conf->mirrors[d].rdev); -@@ -3354,7 +3386,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, - atomic_inc(&r10_bio->remaining); - bio->bi_next = biolist; - biolist = bio; -- bio->bi_private = r10_bio; - bio->bi_end_io = end_sync_read; - bio_set_op_attrs(bio, REQ_OP_READ, 0); - if (test_bit(FailFast, &conf->mirrors[d].rdev->flags)) -@@ -3373,13 +3404,11 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, - - /* Need to set up for writing to the replacement */ - bio = r10_bio->devs[i].repl_bio; -- bio_reset(bio); - bio->bi_error = -EIO; - - sector = r10_bio->devs[i].addr; - bio->bi_next = biolist; - biolist = bio; -- bio->bi_private = r10_bio; - bio->bi_end_io = end_sync_write; - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - if (test_bit(FailFast, &conf->mirrors[d].rdev->flags)) -@@ -3418,7 +3447,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, - if (len == 0) - break; - for (bio= biolist ; bio ; bio=bio->bi_next) { -- page = bio->bi_io_vec[bio->bi_vcnt].bv_page; -+ struct resync_pages *rp = get_resync_pages(bio); -+ page = resync_fetch_page(rp, rp->idx++); - /* - * won't fail because the vec table is big enough - * to hold all these pages -@@ -3427,7 +3457,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, - } - nr_sectors += len>>9; - sector_nr += len>>9; -- } while (biolist->bi_vcnt < RESYNC_PAGES); -+ } while (get_resync_pages(biolist)->idx < RESYNC_PAGES); - r10_bio->sectors = nr_sectors; - - while (biolist) { -@@ -3435,7 +3465,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, - biolist = biolist->bi_next; - - bio->bi_next = NULL; -- r10_bio = bio->bi_private; -+ r10_bio = get_resync_r10bio(bio); - r10_bio->sectors = nr_sectors; - - if (bio->bi_end_io == end_sync_read) { -@@ -4326,6 +4356,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, - struct bio *blist; - struct bio *bio, *read_bio; - int sectors_done = 0; -+ struct page **pages; - - if (sector_nr == 0) { - /* If restarting in the middle, skip the initial sectors */ -@@ -4476,11 +4507,9 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, - if (!rdev2 || test_bit(Faulty, &rdev2->flags)) - continue; - -- bio_reset(b); - b->bi_bdev = rdev2->bdev; - b->bi_iter.bi_sector = r10_bio->devs[s/2].addr + - rdev2->new_data_offset; -- b->bi_private = r10_bio; - b->bi_end_io = end_reshape_write; - bio_set_op_attrs(b, REQ_OP_WRITE, 0); - b->bi_next = blist; -@@ -4490,8 +4519,9 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, - /* Now add as many pages as possible to all of these bios. */ - - nr_sectors = 0; -+ pages = get_resync_pages(r10_bio->devs[0].bio)->pages; - for (s = 0 ; s < max_sectors; s += PAGE_SIZE >> 9) { -- struct page *page = r10_bio->devs[0].bio->bi_io_vec[s/(PAGE_SIZE>>9)].bv_page; -+ struct page *page = pages[s / (PAGE_SIZE >> 9)]; - int len = (max_sectors - s) << 9; - if (len > PAGE_SIZE) - len = PAGE_SIZE; -@@ -4675,7 +4705,7 @@ static int handle_reshape_read_error(struct mddev *mddev, - - static void end_reshape_write(struct bio *bio) - { -- struct r10bio *r10_bio = bio->bi_private; -+ struct r10bio *r10_bio = get_resync_r10bio(bio); - struct mddev *mddev = r10_bio->mddev; - struct r10conf *conf = mddev->private; - int d; --- -2.10.2 - diff --git a/patches.drivers/0173-md-raid10-retrieve-page-from-preallocated-resync-pag.patch b/patches.drivers/0173-md-raid10-retrieve-page-from-preallocated-resync-pag.patch deleted file mode 100644 index 5606cab002..0000000000 --- a/patches.drivers/0173-md-raid10-retrieve-page-from-preallocated-resync-pag.patch +++ /dev/null @@ -1,88 +0,0 @@ -From: Ming Lei <tom.leiming@gmail.com> -Date: Fri, 17 Mar 2017 00:12:34 +0800 -Subject: [PATCH] md: raid10: retrieve page from preallocated resync page array -Git-commit: cdb76be31568604f389f951bd0efadd3f530f2dd -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -Now one page array is allocated for each resync bio, and we can -retrieve page from this table directly. - -Signed-off-by: Ming Lei <tom.leiming@gmail.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid10.c | 13 +++++++++---- - 1 file changed, 9 insertions(+), 4 deletions(-) - -diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c -index 629ae45..827bb5b 100644 ---- a/drivers/md/raid10.c -+++ b/drivers/md/raid10.c -@@ -2055,6 +2055,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) - int i, first; - struct bio *tbio, *fbio; - int vcnt; -+ struct page **tpages, **fpages; - - atomic_set(&r10_bio->remaining, 1); - -@@ -2070,6 +2071,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) - fbio = r10_bio->devs[i].bio; - fbio->bi_iter.bi_size = r10_bio->sectors << 9; - fbio->bi_iter.bi_idx = 0; -+ fpages = get_resync_pages(fbio)->pages; - - vcnt = (r10_bio->sectors + (PAGE_SIZE >> 9) - 1) >> (PAGE_SHIFT - 9); - /* now find blocks with errors */ -@@ -2084,6 +2086,8 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) - continue; - if (i == first) - continue; -+ -+ tpages = get_resync_pages(tbio)->pages; - d = r10_bio->devs[i].devnum; - rdev = conf->mirrors[d].rdev; - if (!r10_bio->devs[i].bio->bi_error) { -@@ -2096,8 +2100,8 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) - int len = PAGE_SIZE; - if (sectors < (len / 512)) - len = sectors * 512; -- if (memcmp(page_address(fbio->bi_io_vec[j].bv_page), -- page_address(tbio->bi_io_vec[j].bv_page), -+ if (memcmp(page_address(fpages[j]), -+ page_address(tpages[j]), - len)) - break; - sectors -= len/512; -@@ -2195,6 +2199,7 @@ static void fix_recovery_read_error(struct r10bio *r10_bio) - int idx = 0; - int dr = r10_bio->devs[0].devnum; - int dw = r10_bio->devs[1].devnum; -+ struct page **pages = get_resync_pages(bio)->pages; - - while (sectors) { - int s = sectors; -@@ -2210,7 +2215,7 @@ static void fix_recovery_read_error(struct r10bio *r10_bio) - ok = sync_page_io(rdev, - addr, - s << 9, -- bio->bi_io_vec[idx].bv_page, -+ pages[idx], - REQ_OP_READ, 0, false); - if (ok) { - rdev = conf->mirrors[dw].rdev; -@@ -2218,7 +2223,7 @@ static void fix_recovery_read_error(struct r10bio *r10_bio) - ok = sync_page_io(rdev, - addr, - s << 9, -- bio->bi_io_vec[idx].bv_page, -+ pages[idx], - REQ_OP_WRITE, 0, false); - if (!ok) { - set_bit(WriteErrorSeen, &rdev->flags); --- -2.10.2 - diff --git a/patches.drivers/0174-md-raid10-avoid-direct-access-to-bvec-table-in-handl.patch b/patches.drivers/0174-md-raid10-avoid-direct-access-to-bvec-table-in-handl.patch deleted file mode 100644 index 9ed0431e70..0000000000 --- a/patches.drivers/0174-md-raid10-avoid-direct-access-to-bvec-table-in-handl.patch +++ /dev/null @@ -1,48 +0,0 @@ -From: Ming Lei <tom.leiming@gmail.com> -Date: Fri, 17 Mar 2017 00:12:35 +0800 -Subject: [PATCH] md: raid10: avoid direct access to bvec table in - handle_reshape_read_error -Git-commit: 2d06e3b7145bb08705615e6e7400024d8e36a5c0 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -All reshape I/O share pages from 1st copy device, so just use that pages -for avoiding direct access to bvec table in handle_reshape_read_error. - -Signed-off-by: Ming Lei <tom.leiming@gmail.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid10.c | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c -index 827bb5b..0f13d57 100644 ---- a/drivers/md/raid10.c -+++ b/drivers/md/raid10.c -@@ -4653,7 +4653,10 @@ static int handle_reshape_read_error(struct mddev *mddev, - struct r10bio *r10b = &on_stack.r10_bio; - int slot = 0; - int idx = 0; -- struct bio_vec *bvec = r10_bio->master_bio->bi_io_vec; -+ struct page **pages; -+ -+ /* reshape IOs share pages from .devs[0].bio */ -+ pages = get_resync_pages(r10_bio->devs[0].bio)->pages; - - r10b->sector = r10_bio->sector; - __raid10_find_phys(&conf->prev, r10b); -@@ -4682,7 +4685,7 @@ static int handle_reshape_read_error(struct mddev *mddev, - success = sync_page_io(rdev, - addr, - s << 9, -- bvec[idx].bv_page, -+ pages[idx], - REQ_OP_READ, 0, false); - rdev_dec_pending(rdev, mddev); - rcu_read_lock(); --- -2.10.2 - diff --git a/patches.drivers/0175-md-raid1-skip-data-copy-for-behind-io-for-discard-re.patch b/patches.drivers/0175-md-raid1-skip-data-copy-for-behind-io-for-discard-re.patch deleted file mode 100644 index d9074efa82..0000000000 --- a/patches.drivers/0175-md-raid1-skip-data-copy-for-behind-io-for-discard-re.patch +++ /dev/null @@ -1,49 +0,0 @@ -From: Shaohua Li <shli@fb.com> -Date: Fri, 24 Mar 2017 15:20:47 -0700 -Subject: [PATCH] md/raid1: skip data copy for behind io for discard request -Git-commit: 41743c1f046a14c6749fd1808bb3793c08e47a3e -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -discard request doesn't have data attached, so it's meaningless to -allocate memory and copy from original bio for behind IO. And the copy -is bogus because bio_copy_data_partial can't handle discard request. - -We don't support writesame/writezeros request so far. - -Reviewed-by: Ming Lei <tom.leiming@gmail.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid1.c | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c -index c6a671f..b7d9651 100644 ---- a/drivers/md/raid1.c -+++ b/drivers/md/raid1.c -@@ -1102,6 +1102,10 @@ static struct bio *alloc_behind_master_bio(struct r1bio *r1_bio, - if (!behind_bio) - goto fail; - -+ /* discard op, we don't support writezero/writesame yet */ -+ if (!bio_has_data(bio)) -+ goto skip_copy; -+ - while (i < vcnt && size) { - struct page *page; - int len = min_t(int, PAGE_SIZE, size); -@@ -1118,7 +1122,7 @@ static struct bio *alloc_behind_master_bio(struct r1bio *r1_bio, - - bio_copy_data_partial(behind_bio, bio, offset, - behind_bio->bi_iter.bi_size); -- -+skip_copy: - r1_bio->behind_master_bio = behind_bio;; - set_bit(R1BIO_BehindIO, &r1_bio->state); - --- -2.10.2 - diff --git a/patches.drivers/0176-md-raid5-cache-fix-payload-endianness-problem-in-rai.patch b/patches.drivers/0176-md-raid5-cache-fix-payload-endianness-problem-in-rai.patch deleted file mode 100644 index 1a4f320103..0000000000 --- a/patches.drivers/0176-md-raid5-cache-fix-payload-endianness-problem-in-rai.patch +++ /dev/null @@ -1,102 +0,0 @@ -From: Jason Yan <yanaijie@huawei.com> -Date: Sat, 25 Mar 2017 09:44:39 +0800 -Subject: [PATCH] md/raid5-cache: fix payload endianness problem in raid5-cache -Git-commit: 1ad45a9bc4e0cd5a6e6fb0e6c5d35d6c87f14c76 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -The payload->header.type and payload->size are little-endian, so just -convert them to the right byte order. - -Signed-off-by: Jason Yan <yanaijie@huawei.com> -Cc: <stable@vger.kernel.org> #v4.10+ -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid5-cache.c | 18 +++++++++--------- - 1 file changed, 9 insertions(+), 9 deletions(-) - -diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c -index 25eb048..b6194e0 100644 ---- a/drivers/md/raid5-cache.c -+++ b/drivers/md/raid5-cache.c -@@ -2002,12 +2002,12 @@ r5l_recovery_verify_data_checksum_for_mb(struct r5l_log *log, - payload = (void *)mb + mb_offset; - payload_flush = (void *)mb + mb_offset; - -- if (payload->header.type == R5LOG_PAYLOAD_DATA) { -+ if (le16_to_cpu(payload->header.type) == R5LOG_PAYLOAD_DATA) { - if (r5l_recovery_verify_data_checksum( - log, ctx, page, log_offset, - payload->checksum[0]) < 0) - goto mismatch; -- } else if (payload->header.type == R5LOG_PAYLOAD_PARITY) { -+ } else if (le16_to_cpu(payload->header.type) == R5LOG_PAYLOAD_PARITY) { - if (r5l_recovery_verify_data_checksum( - log, ctx, page, log_offset, - payload->checksum[0]) < 0) -@@ -2019,12 +2019,12 @@ r5l_recovery_verify_data_checksum_for_mb(struct r5l_log *log, - BLOCK_SECTORS), - payload->checksum[1]) < 0) - goto mismatch; -- } else if (payload->header.type == R5LOG_PAYLOAD_FLUSH) { -+ } else if (le16_to_cpu(payload->header.type) == R5LOG_PAYLOAD_FLUSH) { - /* nothing to do for R5LOG_PAYLOAD_FLUSH here */ - } else /* not R5LOG_PAYLOAD_DATA/PARITY/FLUSH */ - goto mismatch; - -- if (payload->header.type == R5LOG_PAYLOAD_FLUSH) { -+ if (le16_to_cpu(payload->header.type) == R5LOG_PAYLOAD_FLUSH) { - mb_offset += sizeof(struct r5l_payload_flush) + - le32_to_cpu(payload_flush->size); - } else { -@@ -2091,7 +2091,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log, - payload = (void *)mb + mb_offset; - payload_flush = (void *)mb + mb_offset; - -- if (payload->header.type == R5LOG_PAYLOAD_FLUSH) { -+ if (le16_to_cpu(payload->header.type) == R5LOG_PAYLOAD_FLUSH) { - int i, count; - - count = le32_to_cpu(payload_flush->size) / sizeof(__le64); -@@ -2113,7 +2113,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log, - } - - /* DATA or PARITY payload */ -- stripe_sect = (payload->header.type == R5LOG_PAYLOAD_DATA) ? -+ stripe_sect = (le16_to_cpu(payload->header.type) == R5LOG_PAYLOAD_DATA) ? - raid5_compute_sector( - conf, le64_to_cpu(payload->location), 0, &dd, - NULL) -@@ -2151,7 +2151,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log, - list_add_tail(&sh->lru, cached_stripe_list); - } - -- if (payload->header.type == R5LOG_PAYLOAD_DATA) { -+ if (le16_to_cpu(payload->header.type) == R5LOG_PAYLOAD_DATA) { - if (!test_bit(STRIPE_R5C_CACHING, &sh->state) && - test_bit(R5_Wantwrite, &sh->dev[sh->pd_idx].flags)) { - r5l_recovery_replay_one_stripe(conf, sh, ctx); -@@ -2159,7 +2159,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log, - } - r5l_recovery_load_data(log, sh, ctx, payload, - log_offset); -- } else if (payload->header.type == R5LOG_PAYLOAD_PARITY) -+ } else if (le16_to_cpu(payload->header.type) == R5LOG_PAYLOAD_PARITY) - r5l_recovery_load_parity(log, sh, ctx, payload, - log_offset); - else -@@ -2361,7 +2361,7 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log, - payload = (void *)mb + offset; - payload->header.type = cpu_to_le16( - R5LOG_PAYLOAD_DATA); -- payload->size = BLOCK_SECTORS; -+ payload->size = cpu_to_le32(BLOCK_SECTORS); - payload->location = cpu_to_le64( - raid5_compute_blocknr(sh, i, 0)); - addr = kmap_atomic(dev->page); --- -2.10.2 - diff --git a/patches.drivers/0177-md-raid1-kill-warning-on-powerpc_pseries.patch b/patches.drivers/0177-md-raid1-kill-warning-on-powerpc_pseries.patch deleted file mode 100644 index b884f1d1fc..0000000000 --- a/patches.drivers/0177-md-raid1-kill-warning-on-powerpc_pseries.patch +++ /dev/null @@ -1,46 +0,0 @@ -From: Ming Lei <tom.leiming@gmail.com> -Date: Tue, 28 Mar 2017 16:17:55 +0800 -Subject: [PATCH] md: raid1: kill warning on powerpc_pseries -Git-commit: 8fc04e6ea02d631fd344f462002078b8067793de -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -This patch kills the warning reported on powerpc_pseries, -and actually we don't need the initialization. - - After merging the md tree, today's linux-next build (powerpc - pseries_le_defconfig) produced this warning: - - drivers/md/raid1.c: In function 'raid1d': - drivers/md/raid1.c:2172:9: warning: 'page_len$' may be used uninitialized in this function [-Wmaybe-uninitialized] - if (memcmp(page_address(ppages[j]), - ^ - drivers/md/raid1.c:2160:7: note: 'page_len$' was declared here - int page_len[RESYNC_PAGES]; - ^ - -Signed-off-by: Ming Lei <tom.leiming@gmail.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid1.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c -index b7d9651..7d67235 100644 ---- a/drivers/md/raid1.c -+++ b/drivers/md/raid1.c -@@ -2157,7 +2157,7 @@ static void process_checks(struct r1bio *r1_bio) - struct page **ppages = get_resync_pages(pbio)->pages; - struct page **spages = get_resync_pages(sbio)->pages; - struct bio_vec *bi; -- int page_len[RESYNC_PAGES]; -+ int page_len[RESYNC_PAGES] = { 0 }; - - if (sbio->bi_end_io != end_sync_read) - continue; --- -2.10.2 - diff --git a/patches.drivers/0178-md-update-slab_cache-before-releasing-new-stripes-wh.patch b/patches.drivers/0178-md-update-slab_cache-before-releasing-new-stripes-wh.patch deleted file mode 100644 index ad8f200d99..0000000000 --- a/patches.drivers/0178-md-update-slab_cache-before-releasing-new-stripes-wh.patch +++ /dev/null @@ -1,105 +0,0 @@ -From: Dennis Yang <dennisyang@qnap.com> -Date: Wed, 29 Mar 2017 15:46:13 +0800 -Subject: [PATCH] md: update slab_cache before releasing new stripes when - stripes resizing -Git-commit: 583da48e388f472e8818d9bb60ef6a1d40ee9f9d -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -When growing raid5 device on machine with small memory, there is chance that -mdadm will be killed and the following bug report can be observed. The same -bug could also be reproduced in linux-4.10.6. - -[57600.075774] BUG: unable to handle kernel NULL pointer dereference at (null) -[57600.083796] IP: [<ffffffff81a6aa87>] _raw_spin_lock+0x7/0x20 -[57600.110378] PGD 421cf067 PUD 4442d067 PMD 0 -[57600.114678] Oops: 0002 [#1] SMP -[57600.180799] CPU: 1 PID: 25990 Comm: mdadm Tainted: P O 4.2.8 #1 -[57600.187849] Hardware name: To be filled by O.E.M. To be filled by O.E.M./MAHOBAY, BIOS QV05AR66 03/06/2013 -[57600.197490] task: ffff880044e47240 ti: ffff880043070000 task.ti: ffff880043070000 -[57600.204963] RIP: 0010:[<ffffffff81a6aa87>] [<ffffffff81a6aa87>] _raw_spin_lock+0x7/0x20 -[57600.213057] RSP: 0018:ffff880043073810 EFLAGS: 00010046 -[57600.218359] RAX: 0000000000000000 RBX: 000000000000000c RCX: ffff88011e296dd0 -[57600.225486] RDX: 0000000000000001 RSI: ffffe8ffffcb46c0 RDI: 0000000000000000 -[57600.232613] RBP: ffff880043073878 R08: ffff88011e5f8170 R09: 0000000000000282 -[57600.239739] R10: 0000000000000005 R11: 28f5c28f5c28f5c3 R12: ffff880043073838 -[57600.246872] R13: ffffe8ffffcb46c0 R14: 0000000000000000 R15: ffff8800b9706a00 -[57600.253999] FS: 00007f576106c700(0000) GS:ffff88011e280000(0000) knlGS:0000000000000000 -[57600.262078] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 -[57600.267817] CR2: 0000000000000000 CR3: 00000000428fe000 CR4: 00000000001406e0 -[57600.274942] Stack: -[57600.276949] ffffffff8114ee35 ffff880043073868 0000000000000282 000000000000eb3f -[57600.284383] ffffffff81119043 ffff880043073838 ffff880043073838 ffff88003e197b98 -[57600.291820] ffffe8ffffcb46c0 ffff88003e197360 0000000000000286 ffff880043073968 -[57600.299254] Call Trace: -[57600.301698] [<ffffffff8114ee35>] ? cache_flusharray+0x35/0xe0 -[57600.307523] [<ffffffff81119043>] ? __page_cache_release+0x23/0x110 -[57600.313779] [<ffffffff8114eb53>] kmem_cache_free+0x63/0xc0 -[57600.319344] [<ffffffff81579942>] drop_one_stripe+0x62/0x90 -[57600.324915] [<ffffffff81579b5b>] raid5_cache_scan+0x8b/0xb0 -[57600.330563] [<ffffffff8111b98a>] shrink_slab.part.36+0x19a/0x250 -[57600.336650] [<ffffffff8111e38c>] shrink_zone+0x23c/0x250 -[57600.342039] [<ffffffff8111e4f3>] do_try_to_free_pages+0x153/0x420 -[57600.348210] [<ffffffff8111e851>] try_to_free_pages+0x91/0xa0 -[57600.353959] [<ffffffff811145b1>] __alloc_pages_nodemask+0x4d1/0x8b0 -[57600.360303] [<ffffffff8157a30b>] check_reshape+0x62b/0x770 -[57600.365866] [<ffffffff8157a4a5>] raid5_check_reshape+0x55/0xa0 -[57600.371778] [<ffffffff81583df7>] update_raid_disks+0xc7/0x110 -[57600.377604] [<ffffffff81592b73>] md_ioctl+0xd83/0x1b10 -[57600.382827] [<ffffffff81385380>] blkdev_ioctl+0x170/0x690 -[57600.388307] [<ffffffff81195238>] block_ioctl+0x38/0x40 -[57600.393525] [<ffffffff811731c5>] do_vfs_ioctl+0x2b5/0x480 -[57600.399010] [<ffffffff8115e07b>] ? vfs_write+0x14b/0x1f0 -[57600.404400] [<ffffffff811733cc>] SyS_ioctl+0x3c/0x70 -[57600.409447] [<ffffffff81a6ad97>] entry_SYSCALL_64_fastpath+0x12/0x6a -[57600.415875] Code: 00 00 00 00 55 48 89 e5 8b 07 85 c0 74 04 31 c0 5d c3 ba 01 00 00 00 f0 0f b1 17 85 c0 75 ef b0 01 5d c3 90 31 c0 ba 01 00 00 00 <f0> 0f b1 17 85 c0 75 01 c3 55 89 c6 48 89 e5 e8 85 d1 63 ff 5d -[57600.435460] RIP [<ffffffff81a6aa87>] _raw_spin_lock+0x7/0x20 -[57600.441208] RSP <ffff880043073810> -[57600.444690] CR2: 0000000000000000 -[57600.448000] ---[ end trace cbc6b5cc4bf9831d ]--- - -The problem is that resize_stripes() releases new stripe_heads before assigning new -slab cache to conf->slab_cache. If the shrinker function raid5_cache_scan() gets called -after resize_stripes() starting releasing new stripes but right before new slab cache -being assigned, it is possible that these new stripe_heads will be freed with the old -slab_cache which was already been destoryed and that triggers this bug. - -Signed-off-by: Dennis Yang <dennisyang@qnap.com> -Fixes: edbe83ab4c27 ("md/raid5: allow the stripe_cache to grow and shrink.") -Cc: stable@vger.kernel.org (4.1+) -Reviewed-by: NeilBrown <neilb@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid5.c | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c -index 6036d5e..a567655 100644 ---- a/drivers/md/raid5.c -+++ b/drivers/md/raid5.c -@@ -2409,6 +2409,10 @@ static int resize_stripes(struct r5conf *conf, int newsize) - err = -ENOMEM; - - mutex_unlock(&conf->cache_size_mutex); -+ -+ conf->slab_cache = sc; -+ conf->active_name = 1-conf->active_name; -+ - /* Step 4, return new stripes to service */ - while(!list_empty(&newstripes)) { - nsh = list_entry(newstripes.next, struct stripe_head, lru); -@@ -2426,8 +2430,6 @@ static int resize_stripes(struct r5conf *conf, int newsize) - } - /* critical section pass, GFP_NOIO no longer needed */ - -- conf->slab_cache = sc; -- conf->active_name = 1-conf->active_name; - if (!err) - conf->pool_size = newsize; - return err; --- -2.10.2 - diff --git a/patches.drivers/0179-md-raid6-Fix-anomily-when-recovering-a-single-device.patch b/patches.drivers/0179-md-raid6-Fix-anomily-when-recovering-a-single-device.patch deleted file mode 100644 index a92dca81ed..0000000000 --- a/patches.drivers/0179-md-raid6-Fix-anomily-when-recovering-a-single-device.patch +++ /dev/null @@ -1,69 +0,0 @@ -From: NeilBrown <neilb@suse.com> -Date: Mon, 3 Apr 2017 12:11:32 +1000 -Subject: [PATCH] md/raid6: Fix anomily when recovering a single device in - RAID6. -Git-commit: 7471fb77ce4dc4cb81291189947fcdf621a97987 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -When recoverying a single missing/failed device in a RAID6, -those stripes where the Q block is on the missing device are -handled a bit differently. In these cases it is easy to -check that the P block is correct, so we do. This results -in the P block be destroy. Consequently the P block needs -to be read a second time in order to compute Q. This causes -lots of seeks and hurts performance. - -It shouldn't be necessary to re-read P as it can be computed -from the DATA. But we only compute blocks on missing -devices, since c337869d9501 ("md: do not compute parity -unless it is on a failed drive"). - -So relax the change made in that commit to allow computing -of the P block in a RAID6 which it is the only missing that -block. - -This makes RAID6 recovery run much faster as the disk just -"before" the recovering device is no longer seeking -back-and-forth. - -Reported-by-tested-by: Brad Campbell <lists2009@fnarfbargle.com> -Reviewed-by: Dan Williams <dan.j.williams@intel.com> -Signed-off-by: NeilBrown <neilb@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid5.c | 13 ++++++++++++- - 1 file changed, 12 insertions(+), 1 deletion(-) - -diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c -index a567655..09d94ad 100644 ---- a/drivers/md/raid5.c -+++ b/drivers/md/raid5.c -@@ -3619,9 +3619,20 @@ static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s, - BUG_ON(test_bit(R5_Wantcompute, &dev->flags)); - BUG_ON(test_bit(R5_Wantread, &dev->flags)); - BUG_ON(sh->batch_head); -+ -+ /* -+ * In the raid6 case if the only non-uptodate disk is P -+ * then we already trusted P to compute the other failed -+ * drives. It is safe to compute rather than re-read P. -+ * In other cases we only compute blocks from failed -+ * devices, otherwise check/repair might fail to detect -+ * a real inconsistency. -+ */ -+ - if ((s->uptodate == disks - 1) && -+ ((sh->qd_idx >= 0 && sh->pd_idx == disk_idx) || - (s->failed && (disk_idx == s->failed_num[0] || -- disk_idx == s->failed_num[1]))) { -+ disk_idx == s->failed_num[1])))) { - /* have disk failed, and we're requested to fetch it; - * do compute it - */ --- -2.10.2 - diff --git a/patches.drivers/0180-md-raid10-reset-the-first-at-the-end-of-loop.patch b/patches.drivers/0180-md-raid10-reset-the-first-at-the-end-of-loop.patch deleted file mode 100644 index 4ead4b7b28..0000000000 --- a/patches.drivers/0180-md-raid10-reset-the-first-at-the-end-of-loop.patch +++ /dev/null @@ -1,46 +0,0 @@ -From: Guoqing Jiang <gqjiang@suse.com> -Date: Thu, 6 Apr 2017 09:12:18 +0800 -Subject: [PATCH] md/raid10: reset the 'first' at the end of loop -Git-commit: 6f287ca6046edd34ed83aafb7f9033c9c2e809e2 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -We need to set "first = 0' at the end of rdev_for_each -loop, so we can get the array's min_offset_diff correctly -otherwise min_offset_diff just means the last rdev's -offset diff. - -Suggested-by: NeilBrown <neilb@suse.com> -Signed-off-by: Guoqing Jiang <gqjiang@suse.com> -Reviewed-by: NeilBrown <neilb@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid10.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c -index 0f13d57..e055ec9 100644 ---- a/drivers/md/raid10.c -+++ b/drivers/md/raid10.c -@@ -3769,6 +3769,7 @@ static int raid10_run(struct mddev *mddev) - - if (blk_queue_discard(bdev_get_queue(rdev->bdev))) - discard_supported = true; -+ first = 0; - } - - if (mddev->queue) { -@@ -4172,6 +4173,7 @@ static int raid10_start_reshape(struct mddev *mddev) - if (first || diff < min_offset_diff) - min_offset_diff = diff; - } -+ first = 0; - } - - if (max(before_length, after_length) > min_offset_diff) --- -2.10.2 - diff --git a/patches.drivers/0181-md-MD_CLOSING-needs-to-be-cleared-after-called-md_se.patch b/patches.drivers/0181-md-MD_CLOSING-needs-to-be-cleared-after-called-md_se.patch deleted file mode 100644 index eda74b3ebd..0000000000 --- a/patches.drivers/0181-md-MD_CLOSING-needs-to-be-cleared-after-called-md_se.patch +++ /dev/null @@ -1,59 +0,0 @@ -From: NeilBrown <neilb@suse.com> -Date: Thu, 6 Apr 2017 11:16:33 +0800 -Subject: [PATCH] md: MD_CLOSING needs to be cleared after called - md_set_readonly or do_md_stop -Git-commit: 065e519e71b2c1f41936cce75b46b5ab34adb588 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -if called md_set_readonly and set MD_CLOSING bit, the mddev cannot -be opened any more due to the MD_CLOING bit wasn't cleared. Thus it -needs to be cleared in md_ioctl after any call to md_set_readonly() -or do_md_stop(). - -Signed-off-by: NeilBrown <neilb@suse.com> -Fixes: af8d8e6f0315 ("md: changes for MD_STILL_CLOSED flag") -Cc: stable@vger.kernel.org (v4.9+) -Signed-off-by: Zhilong Liu <zlliu@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/md.c | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/drivers/md/md.c b/drivers/md/md.c -index 1db88d7..a612b9f 100644 ---- a/drivers/md/md.c -+++ b/drivers/md/md.c -@@ -6925,6 +6925,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, - void __user *argp = (void __user *)arg; - struct mddev *mddev = NULL; - int ro; -+ bool did_set_md_closing = false; - - if (!md_ioctl_valid(cmd)) - return -ENOTTY; -@@ -7014,7 +7015,9 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, - err = -EBUSY; - goto out; - } -+ WARN_ON_ONCE(test_bit(MD_CLOSING, &mddev->flags)); - set_bit(MD_CLOSING, &mddev->flags); -+ did_set_md_closing = true; - mutex_unlock(&mddev->open_mutex); - sync_blockdev(bdev); - } -@@ -7207,6 +7210,8 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, - mddev->hold_active = 0; - mddev_unlock(mddev); - out: -+ if(did_set_md_closing) -+ clear_bit(MD_CLOSING, &mddev->flags); - return err; - } - #ifdef CONFIG_COMPAT --- -2.10.2 - diff --git a/patches.drivers/0182-md.c-didn-t-unlock-the-mddev-before-return-EINVAL-in.patch b/patches.drivers/0182-md.c-didn-t-unlock-the-mddev-before-return-EINVAL-in.patch deleted file mode 100644 index b2845094a5..0000000000 --- a/patches.drivers/0182-md.c-didn-t-unlock-the-mddev-before-return-EINVAL-in.patch +++ /dev/null @@ -1,42 +0,0 @@ -From: Zhilong Liu <zlliu@suse.com> -Date: Mon, 10 Apr 2017 14:15:55 +0800 -Subject: [PATCH] md.c:didn't unlock the mddev before return EINVAL in - array_size_store -Git-commit: b670883bb9e55ba63a278d83e034faefc01ce2cf -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -Md.c: it needs to release the mddev lock before -the array_size_store() returns. - -Fixes: ab5a98b132fd ("md-cluster: change array_sectors and update size are not supported") - -Signed-off-by: Zhilong Liu <zlliu@suse.com> -Reviewed-by: Guoqing Jiang <gqjiang@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/md.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/drivers/md/md.c b/drivers/md/md.c -index a612b9f..5022be1 100644 ---- a/drivers/md/md.c -+++ b/drivers/md/md.c -@@ -4950,8 +4950,10 @@ array_size_store(struct mddev *mddev, const char *buf, size_t len) - return err; - - /* cluster raid doesn't support change array_sectors */ -- if (mddev_is_clustered(mddev)) -+ if (mddev_is_clustered(mddev)) { -+ mddev_unlock(mddev); - return -EINVAL; -+ } - - if (strncmp(buf, "default", 7) == 0) { - if (mddev->pers) --- -2.10.2 - diff --git a/patches.drivers/0183-md-raid1-avoid-reusing-a-resync-bio-after-error-hand.patch b/patches.drivers/0183-md-raid1-avoid-reusing-a-resync-bio-after-error-hand.patch deleted file mode 100644 index c9e8b98de3..0000000000 --- a/patches.drivers/0183-md-raid1-avoid-reusing-a-resync-bio-after-error-hand.patch +++ /dev/null @@ -1,54 +0,0 @@ -From: NeilBrown <neilb@suse.com> -Date: Thu, 6 Apr 2017 12:06:37 +1000 -Subject: [PATCH] md/raid1: avoid reusing a resync bio after error handling. -Git-commit: 0c9d5b127f695818c2c5a3868c1f28ca2969e905 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -fix_sync_read_error() modifies a bio on a newly faulty -device by setting bi_end_io to end_sync_write. -This ensure that put_buf() will still call rdev_dec_pending() -as required, but makes sure that subsequent code in -fix_sync_read_error() doesn't try to read from the device. - -Unfortunately this interacts badly with sync_request_write() -which assumes that any bio with bi_end_io set to non-NULL -other than end_sync_read is safe to write to. - -As the device is now faulty it doesn't make sense to write. -As the bio was recently used for a read, it is "dirty" -and not suitable for immediate submission. -In particular, ->bi_next might be non-NULL, which will cause -generic_make_request() to complain. - -Break this interaction by refusing to write to devices -which are marked as Faulty. - -Reported-and-tested-by: Michael Wang <yun.wang@profitbricks.com> -Fixes: 2e52d449bcec ("md/raid1: add failfast handling for reads.") -Cc: stable@vger.kernel.org (v4.10+) -Signed-off-by: NeilBrown <neilb@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid1.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c -index 7d67235..70278b9 100644 ---- a/drivers/md/raid1.c -+++ b/drivers/md/raid1.c -@@ -2218,6 +2218,8 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio) - (i == r1_bio->read_disk || - !test_bit(MD_RECOVERY_SYNC, &mddev->recovery)))) - continue; -+ if (test_bit(Faulty, &conf->mirrors[i].rdev->flags)) -+ continue; - - bio_set_op_attrs(wbio, REQ_OP_WRITE, 0); - if (test_bit(FailFast, &conf->mirrors[i].rdev->flags)) --- -2.10.2 - diff --git a/patches.drivers/0184-md-raid1-simplify-the-splitting-of-requests.patch b/patches.drivers/0184-md-raid1-simplify-the-splitting-of-requests.patch deleted file mode 100644 index d9b480d06e..0000000000 --- a/patches.drivers/0184-md-raid1-simplify-the-splitting-of-requests.patch +++ /dev/null @@ -1,323 +0,0 @@ -From: NeilBrown <neilb@suse.com> -Date: Wed, 5 Apr 2017 14:05:50 +1000 -Subject: [PATCH] md/raid1: simplify the splitting of requests. -Git-commit: c230e7e53526c223a3e1caf40747d6e37c0e4394 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -raid1 currently splits requests in two different ways for -two different reasons. - -First, bio_split() is used to ensure the bio fits within a -resync accounting region. -Second, multiple r1bios are allocated for each bio to handle -the possiblity of known bad blocks on some devices. - -This can be simplified to just use bio_split() once, and not -use multiple r1bios. -We delay the split until we know a maximum bio size that can -be handled with a single r1bio, and then split the bio and -queue the remainder for later handling. - -This avoids all loops inside raid1.c request handling. Just -a single read, or a single set of writes, is submitted to -lower-level devices for each bio that comes from -generic_make_request(). - -When the bio needs to be split, generic_make_request() will -do the necessary looping and call md_make_request() multiple -times. - -raid1_make_request() no longer queues request for raid1 to handle, -so we can remove that branch from the 'if'. - -This patch also creates a new private bio_set -(conf->bio_split) for splitting bios. Using fs_bio_set -is wrong, as it is meant to be used by filesystems, not -block devices. Using it inside md can lead to deadlocks -under high memory pressure. - -Delete unused variable in raid1_write_request() (Shaohua) - -Signed-off-by: NeilBrown <neilb@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid1.c | 120 ++++++++++++++++++++++------------------------------- - drivers/md/raid1.h | 2 - 2 files changed, 52 insertions(+), 70 deletions(-) - ---- a/drivers/md/raid1.c -+++ b/drivers/md/raid1.c -@@ -1198,7 +1198,8 @@ alloc_r1bio(struct mddev *mddev, struct - return r1_bio; - } - --static void raid1_read_request(struct mddev *mddev, struct bio *bio) -+static void raid1_read_request(struct mddev *mddev, struct bio *bio, -+ int max_read_sectors) - { - struct r1conf *conf = mddev->private; - struct raid1_info *mirror; -@@ -1207,7 +1208,6 @@ static void raid1_read_request(struct md - struct bitmap *bitmap = mddev->bitmap; - const int op = bio_op(bio); - const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); -- int sectors_handled; - int max_sectors; - int rdisk; - -@@ -1218,12 +1218,12 @@ static void raid1_read_request(struct md - wait_read_barrier(conf, bio->bi_iter.bi_sector); - - r1_bio = alloc_r1bio(mddev, bio, 0); -+ r1_bio->sectors = max_read_sectors; - - /* - * make_request() can abort the operation when read-ahead is being - * used and no empty request is available. - */ --read_again: - rdisk = read_balance(conf, r1_bio, &max_sectors); - - if (rdisk < 0) { -@@ -1243,11 +1243,20 @@ read_again: - wait_event(bitmap->behind_wait, - atomic_read(&bitmap->behind_writes) == 0); - } -+ -+ if (max_sectors < bio_sectors(bio)) { -+ struct bio *split = bio_split(bio, max_sectors, -+ GFP_NOIO, conf->bio_split); -+ bio_chain(split, bio); -+ generic_make_request(bio); -+ bio = split; -+ r1_bio->master_bio = bio; -+ r1_bio->sectors = max_sectors; -+ } -+ - r1_bio->read_disk = rdisk; - - read_bio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); -- bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector, -- max_sectors); - - r1_bio->bios[rdisk] = read_bio; - -@@ -1266,30 +1275,11 @@ read_again: - read_bio, disk_devt(mddev->gendisk), - r1_bio->sector); - -- if (max_sectors < r1_bio->sectors) { -- /* -- * could not read all from this device, so we will need another -- * r1_bio. -- */ -- sectors_handled = (r1_bio->sector + max_sectors -- - bio->bi_iter.bi_sector); -- r1_bio->sectors = max_sectors; -- bio_inc_remaining(bio); -- -- /* -- * Cannot call generic_make_request directly as that will be -- * queued in __make_request and subsequent mempool_alloc might -- * block waiting for it. So hand bio over to raid1d. -- */ -- reschedule_retry(r1_bio); -- -- r1_bio = alloc_r1bio(mddev, bio, sectors_handled); -- goto read_again; -- } else -- generic_make_request(read_bio); -+ generic_make_request(read_bio); - } - --static void raid1_write_request(struct mddev *mddev, struct bio *bio) -+static void raid1_write_request(struct mddev *mddev, struct bio *bio, -+ int max_write_sectors) - { - struct r1conf *conf = mddev->private; - struct r1bio *r1_bio; -@@ -1300,9 +1290,7 @@ static void raid1_write_request(struct m - struct blk_plug_cb *cb; - struct raid1_plug_cb *plug = NULL; - int first_clone; -- int sectors_handled; - int max_sectors; -- sector_t offset; - - /* - * Register the new request and wait if the reconstruction -@@ -1341,6 +1329,7 @@ static void raid1_write_request(struct m - wait_barrier(conf, bio->bi_iter.bi_sector); - - r1_bio = alloc_r1bio(mddev, bio, 0); -+ r1_bio->sectors = max_write_sectors; - - if (conf->pending_count >= max_queued_requests) { - md_wakeup_thread(mddev->thread); -@@ -1439,17 +1428,21 @@ static void raid1_write_request(struct m - goto retry_write; - } - -- if (max_sectors < r1_bio->sectors) -+ if (max_sectors < bio_sectors(bio)) { -+ struct bio *split = bio_split(bio, max_sectors, -+ GFP_NOIO, conf->bio_split); -+ bio_chain(split, bio); -+ generic_make_request(bio); -+ bio = split; -+ r1_bio->master_bio = bio; - r1_bio->sectors = max_sectors; -- -- sectors_handled = r1_bio->sector + max_sectors - bio->bi_iter.bi_sector; -+ } - - atomic_set(&r1_bio->remaining, 1); - atomic_set(&r1_bio->behind_remaining, 0); - - first_clone = 1; - -- offset = r1_bio->sector - bio->bi_iter.bi_sector; - for (i = 0; i < disks; i++) { - struct bio *mbio = NULL; - if (!r1_bio->bios[i]) -@@ -1466,7 +1459,7 @@ static void raid1_write_request(struct m - < mddev->bitmap_info.max_write_behind) && - !waitqueue_active(&bitmap->behind_wait)) { - mbio = alloc_behind_master_bio(r1_bio, bio, -- offset << 9, -+ 0, - max_sectors << 9); - } - -@@ -1482,10 +1475,8 @@ static void raid1_write_request(struct m - mbio = bio_clone_fast(r1_bio->behind_master_bio, - GFP_NOIO, - mddev->bio_set); -- else { -+ else - mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); -- bio_trim(mbio, offset, max_sectors); -- } - } - - if (r1_bio->behind_master_bio) { -@@ -1532,19 +1523,6 @@ static void raid1_write_request(struct m - if (!plug) - md_wakeup_thread(mddev->thread); - } -- /* Mustn't call r1_bio_write_done before this next test, -- * as it could result in the bio being freed. -- */ -- if (sectors_handled < bio_sectors(bio)) { -- /* We need another r1_bio, which must be counted */ -- sector_t sect = bio->bi_iter.bi_sector + sectors_handled; -- -- inc_pending(conf, sect); -- bio_inc_remaining(bio); -- r1_bio_write_done(r1_bio); -- r1_bio = alloc_r1bio(mddev, bio, sectors_handled); -- goto retry_write; -- } - - r1_bio_write_done(r1_bio); - -@@ -1554,7 +1532,6 @@ static void raid1_write_request(struct m - - static void raid1_make_request(struct mddev *mddev, struct bio *bio) - { -- struct bio *split; - sector_t sectors; - - if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { -@@ -1562,22 +1539,20 @@ static void raid1_make_request(struct md - return; - } - -- /* if bio exceeds barrier unit boundary, split it */ -- do { -- sectors = align_to_barrier_unit_end( -- bio->bi_iter.bi_sector, bio_sectors(bio)); -- if (sectors < bio_sectors(bio)) { -- split = bio_split(bio, sectors, GFP_NOIO, fs_bio_set); -- bio_chain(split, bio); -- } else { -- split = bio; -- } -+ /* -+ * There is a limit to the maximum size, but -+ * the read/write handler might find a lower limit -+ * due to bad blocks. To avoid multiple splits, -+ * we pass the maximum number of sectors down -+ * and let the lower level perform the split. -+ */ -+ sectors = align_to_barrier_unit_end( -+ bio->bi_iter.bi_sector, bio_sectors(bio)); - -- if (bio_data_dir(split) == READ) -- raid1_read_request(mddev, split); -- else -- raid1_write_request(mddev, split); -- } while (split != bio); -+ if (bio_data_dir(bio) == READ) -+ raid1_read_request(mddev, bio, sectors); -+ else -+ raid1_write_request(mddev, bio, sectors); - } - - static void raid1_status(struct seq_file *seq, struct mddev *mddev) -@@ -2622,10 +2597,7 @@ static void raid1d(struct md_thread *thr - else if (test_bit(R1BIO_ReadError, &r1_bio->state)) - handle_read_error(conf, r1_bio); - else -- /* just a partial read to be scheduled from separate -- * context -- */ -- generic_make_request(r1_bio->bios[r1_bio->read_disk]); -+ WARN_ON_ONCE(1); - - cond_resched(); - if (mddev->sb_flags & ~(1<<MD_SB_CHANGE_PENDING)) -@@ -3013,6 +2985,10 @@ static struct r1conf *setup_conf(struct - if (!conf->r1bio_pool) - goto abort; - -+ conf->bio_split = bioset_create(BIO_POOL_SIZE, 0); -+ if (!conf->bio_split) -+ goto abort; -+ - conf->poolinfo->mddev = mddev; - - err = -EINVAL; -@@ -3094,6 +3070,8 @@ static struct r1conf *setup_conf(struct - kfree(conf->nr_waiting); - kfree(conf->nr_queued); - kfree(conf->barrier); -+ if (conf->bio_split) -+ bioset_free(conf->bio_split); - kfree(conf); - } - return ERR_PTR(err); -@@ -3199,6 +3177,8 @@ static void raid1_free(struct mddev *mdd - kfree(conf->nr_waiting); - kfree(conf->nr_queued); - kfree(conf->barrier); -+ if (conf->bio_split) -+ bioset_free(conf->bio_split); - kfree(conf); - } - ---- a/drivers/md/raid1.h -+++ b/drivers/md/raid1.h -@@ -107,6 +107,8 @@ struct r1conf { - mempool_t *r1bio_pool; - mempool_t *r1buf_pool; - -+ struct bio_set *bio_split; -+ - /* temporary buffer to synchronous IO when attempting to repair - * a read error. - */ diff --git a/patches.drivers/0185-md-raid1-simplify-alloc_behind_master_bio.patch b/patches.drivers/0185-md-raid1-simplify-alloc_behind_master_bio.patch deleted file mode 100644 index b17704e674..0000000000 --- a/patches.drivers/0185-md-raid1-simplify-alloc_behind_master_bio.patch +++ /dev/null @@ -1,62 +0,0 @@ -From: NeilBrown <neilb@suse.com> -Date: Wed, 5 Apr 2017 14:05:50 +1000 -Subject: [PATCH] md/raid1: simplify alloc_behind_master_bio() -Git-commit: cb83efcfd26a28b76eef8815a41158c1896fc5ba -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -Now that we always always pass an offset of 0 and a size -that matches the bio to alloc_behind_master_bio(), -we can remove the offset/size args and simplify the code. - -We could probably remove bio_copy_data_partial() too. - -Signed-off-by: NeilBrown <neilb@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid1.c | 11 ++++------- - 1 file changed, 4 insertions(+), 7 deletions(-) - -diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c -index 94f1d75..18af00c 100644 ---- a/drivers/md/raid1.c -+++ b/drivers/md/raid1.c -@@ -1091,9 +1091,9 @@ static void unfreeze_array(struct r1conf *conf) - } - - static struct bio *alloc_behind_master_bio(struct r1bio *r1_bio, -- struct bio *bio, -- int offset, int size) -+ struct bio *bio) - { -+ int size = bio->bi_iter.bi_size; - unsigned vcnt = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; - int i = 0; - struct bio *behind_bio = NULL; -@@ -1120,8 +1120,7 @@ static struct bio *alloc_behind_master_bio(struct r1bio *r1_bio, - i++; - } - -- bio_copy_data_partial(behind_bio, bio, offset, -- behind_bio->bi_iter.bi_size); -+ bio_copy_data(behind_bio, bio); - skip_copy: - r1_bio->behind_master_bio = behind_bio;; - set_bit(R1BIO_BehindIO, &r1_bio->state); -@@ -1462,9 +1461,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, - (atomic_read(&bitmap->behind_writes) - < mddev->bitmap_info.max_write_behind) && - !waitqueue_active(&bitmap->behind_wait)) { -- mbio = alloc_behind_master_bio(r1_bio, bio, -- 0, -- max_sectors << 9); -+ mbio = alloc_behind_master_bio(r1_bio, bio); - } - - bitmap_startwrite(bitmap, r1_bio->sector, --- -2.10.2 - diff --git a/patches.drivers/0186-md-raid1-simplify-handle_read_error.patch b/patches.drivers/0186-md-raid1-simplify-handle_read_error.patch deleted file mode 100644 index 2a52a3c0fc..0000000000 --- a/patches.drivers/0186-md-raid1-simplify-handle_read_error.patch +++ /dev/null @@ -1,296 +0,0 @@ -From: NeilBrown <neilb@suse.com> -Date: Wed, 5 Apr 2017 14:05:50 +1000 -Subject: [PATCH] md/raid1: simplify handle_read_error(). -Git-commit: 689389a06ce79fdced85b5115717f71c71e623e0 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -handle_read_error() duplicates a lot of the work that raid1_read_request() -does, so it makes sense to just use that function. -This doesn't quite work as handle_read_error() relies on the same r1bio -being re-used so that, in the case of a read-only array, setting -IO_BLOCKED in r1bio->bios[] ensures read_balance() won't re-use -that device. -So we need to allow a r1bio to be passed to raid1_read_request(), and to -have that function mostly initialise the r1bio, but leave the bios[] -array untouched. - -Two parts of handle_read_error() that need to be preserved are the warning -message it prints, so they are conditionally added to raid1_read_request(). - -Note that this highlights a minor bug on alloc_r1bio(). It doesn't -initalise the bios[] array, so it is possible that old content is there, -which might cause read_balance() to ignore some devices with no good reason. - -With this change, we no longer need inc_pending(), or the sectors_handled -arg to alloc_r1bio(). - -As handle_read_error() is called from raid1d() and allocates memory, -there is tiny chance of a deadlock. All element of various pools -could be queued waiting for raid1 to handle them, and there may be no -extra memory free. -Achieving guaranteed forward progress would probably require a second -thread and another mempool. Instead of that complexity, add -__GFP_HIGH to any allocations when read1_read_request() is called -from raid1d. - -Signed-off-by: NeilBrown <neilb@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid1.c | 140 +++++++++++++++++++++++------------------------------ - 1 file changed, 60 insertions(+), 80 deletions(-) - -diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c -index 18af00c..29a9aa9 100644 ---- a/drivers/md/raid1.c -+++ b/drivers/md/raid1.c -@@ -988,16 +988,6 @@ static void wait_read_barrier(struct r1conf *conf, sector_t sector_nr) - spin_unlock_irq(&conf->resync_lock); - } - --static void inc_pending(struct r1conf *conf, sector_t bi_sector) --{ -- /* The current request requires multiple r1_bio, so -- * we need to increment the pending count, and the corresponding -- * window count. -- */ -- int idx = sector_to_idx(bi_sector); -- atomic_inc(&conf->nr_pending[idx]); --} -- - static void wait_barrier(struct r1conf *conf, sector_t sector_nr) - { - int idx = sector_to_idx(sector_nr); -@@ -1184,35 +1174,60 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule) - kfree(plug); - } - -+static void init_r1bio(struct r1bio *r1_bio, struct mddev *mddev, struct bio *bio) -+{ -+ r1_bio->master_bio = bio; -+ r1_bio->sectors = bio_sectors(bio); -+ r1_bio->state = 0; -+ r1_bio->mddev = mddev; -+ r1_bio->sector = bio->bi_iter.bi_sector; -+} -+ - static inline struct r1bio * --alloc_r1bio(struct mddev *mddev, struct bio *bio, sector_t sectors_handled) -+alloc_r1bio(struct mddev *mddev, struct bio *bio) - { - struct r1conf *conf = mddev->private; - struct r1bio *r1_bio; - - r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); -- -- r1_bio->master_bio = bio; -- r1_bio->sectors = bio_sectors(bio) - sectors_handled; -- r1_bio->state = 0; -- r1_bio->mddev = mddev; -- r1_bio->sector = bio->bi_iter.bi_sector + sectors_handled; -- -+ /* Ensure no bio records IO_BLOCKED */ -+ memset(r1_bio->bios, 0, conf->raid_disks * sizeof(r1_bio->bios[0])); -+ init_r1bio(r1_bio, mddev, bio); - return r1_bio; - } - - static void raid1_read_request(struct mddev *mddev, struct bio *bio, -- int max_read_sectors) -+ int max_read_sectors, struct r1bio *r1_bio) - { - struct r1conf *conf = mddev->private; - struct raid1_info *mirror; -- struct r1bio *r1_bio; - struct bio *read_bio; - struct bitmap *bitmap = mddev->bitmap; - const int op = bio_op(bio); - const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); - int max_sectors; - int rdisk; -+ bool print_msg = !!r1_bio; -+ char b[BDEVNAME_SIZE]; -+ -+ /* -+ * If r1_bio is set, we are blocking the raid1d thread -+ * so there is a tiny risk of deadlock. So ask for -+ * emergency memory if needed. -+ */ -+ gfp_t gfp = r1_bio ? (GFP_NOIO | __GFP_HIGH) : GFP_NOIO; -+ -+ if (print_msg) { -+ /* Need to get the block device name carefully */ -+ struct md_rdev *rdev; -+ rcu_read_lock(); -+ rdev = rcu_dereference(conf->mirrors[r1_bio->read_disk].rdev); -+ if (rdev) -+ bdevname(rdev->bdev, b); -+ else -+ strcpy(b, "???"); -+ rcu_read_unlock(); -+ } - - /* - * Still need barrier for READ in case that whole -@@ -1220,7 +1235,10 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, - */ - wait_read_barrier(conf, bio->bi_iter.bi_sector); - -- r1_bio = alloc_r1bio(mddev, bio, 0); -+ if (!r1_bio) -+ r1_bio = alloc_r1bio(mddev, bio); -+ else -+ init_r1bio(r1_bio, mddev, bio); - r1_bio->sectors = max_read_sectors; - - /* -@@ -1231,11 +1249,23 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, - - if (rdisk < 0) { - /* couldn't find anywhere to read from */ -+ if (print_msg) { -+ pr_crit_ratelimited("md/raid1:%s: %s: unrecoverable I/O read error for block %llu\n", -+ mdname(mddev), -+ b, -+ (unsigned long long)r1_bio->sector); -+ } - raid_end_bio_io(r1_bio); - return; - } - mirror = conf->mirrors + rdisk; - -+ if (print_msg) -+ pr_info_ratelimited("md/raid1:%s: redirecting sector %llu to other mirror: %s\n", -+ mdname(mddev), -+ (unsigned long long)r1_bio->sector, -+ bdevname(mirror->rdev->bdev, b)); -+ - if (test_bit(WriteMostly, &mirror->rdev->flags) && - bitmap) { - /* -@@ -1249,7 +1279,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, - - if (max_sectors < bio_sectors(bio)) { - struct bio *split = bio_split(bio, max_sectors, -- GFP_NOIO, conf->bio_split); -+ gfp, conf->bio_split); - bio_chain(split, bio); - generic_make_request(bio); - bio = split; -@@ -1259,7 +1289,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, - - r1_bio->read_disk = rdisk; - -- read_bio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); -+ read_bio = bio_clone_fast(bio, gfp, mddev->bio_set); - - r1_bio->bios[rdisk] = read_bio; - -@@ -1331,7 +1361,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, - } - wait_barrier(conf, bio->bi_iter.bi_sector); - -- r1_bio = alloc_r1bio(mddev, bio, 0); -+ r1_bio = alloc_r1bio(mddev, bio); - r1_bio->sectors = max_write_sectors; - - if (conf->pending_count >= max_queued_requests) { -@@ -1551,7 +1581,7 @@ static void raid1_make_request(struct mddev *mddev, struct bio *bio) - bio->bi_iter.bi_sector, bio_sectors(bio)); - - if (bio_data_dir(bio) == READ) -- raid1_read_request(mddev, bio, sectors); -+ raid1_read_request(mddev, bio, sectors, NULL); - else - raid1_write_request(mddev, bio, sectors); - } -@@ -2443,11 +2473,8 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio) - - static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio) - { -- int disk; -- int max_sectors; - struct mddev *mddev = conf->mddev; - struct bio *bio; -- char b[BDEVNAME_SIZE]; - struct md_rdev *rdev; - dev_t bio_dev; - sector_t bio_sector; -@@ -2463,7 +2490,6 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio) - */ - - bio = r1_bio->bios[r1_bio->read_disk]; -- bdevname(bio->bi_bdev, b); - bio_dev = bio->bi_bdev->bd_dev; - bio_sector = conf->mirrors[r1_bio->read_disk].rdev->data_offset + r1_bio->sector; - bio_put(bio); -@@ -2481,58 +2507,12 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio) - } - - rdev_dec_pending(rdev, conf->mddev); -+ allow_barrier(conf, r1_bio->sector); -+ bio = r1_bio->master_bio; - --read_more: -- disk = read_balance(conf, r1_bio, &max_sectors); -- if (disk == -1) { -- pr_crit_ratelimited("md/raid1:%s: %s: unrecoverable I/O read error for block %llu\n", -- mdname(mddev), b, (unsigned long long)r1_bio->sector); -- raid_end_bio_io(r1_bio); -- } else { -- const unsigned long do_sync -- = r1_bio->master_bio->bi_opf & REQ_SYNC; -- r1_bio->read_disk = disk; -- bio = bio_clone_fast(r1_bio->master_bio, GFP_NOIO, -- mddev->bio_set); -- bio_trim(bio, r1_bio->sector - bio->bi_iter.bi_sector, -- max_sectors); -- r1_bio->bios[r1_bio->read_disk] = bio; -- rdev = conf->mirrors[disk].rdev; -- pr_info_ratelimited("md/raid1:%s: redirecting sector %llu to other mirror: %s\n", -- mdname(mddev), -- (unsigned long long)r1_bio->sector, -- bdevname(rdev->bdev, b)); -- bio->bi_iter.bi_sector = r1_bio->sector + rdev->data_offset; -- bio->bi_bdev = rdev->bdev; -- bio->bi_end_io = raid1_end_read_request; -- bio_set_op_attrs(bio, REQ_OP_READ, do_sync); -- if (test_bit(FailFast, &rdev->flags) && -- test_bit(R1BIO_FailFast, &r1_bio->state)) -- bio->bi_opf |= MD_FAILFAST; -- bio->bi_private = r1_bio; -- if (max_sectors < r1_bio->sectors) { -- /* Drat - have to split this up more */ -- struct bio *mbio = r1_bio->master_bio; -- int sectors_handled = (r1_bio->sector + max_sectors -- - mbio->bi_iter.bi_sector); -- r1_bio->sectors = max_sectors; -- bio_inc_remaining(mbio); -- trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), -- bio, bio_dev, bio_sector); -- generic_make_request(bio); -- bio = NULL; -- -- r1_bio = alloc_r1bio(mddev, mbio, sectors_handled); -- set_bit(R1BIO_ReadError, &r1_bio->state); -- inc_pending(conf, r1_bio->sector); -- -- goto read_more; -- } else { -- trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), -- bio, bio_dev, bio_sector); -- generic_make_request(bio); -- } -- } -+ /* Reuse the old r1_bio so that the IO_BLOCKED settings are preserved */ -+ r1_bio->state = 0; -+ raid1_read_request(mddev, bio, r1_bio->sectors, r1_bio); - } - - static void raid1d(struct md_thread *thread) --- -2.10.2 - diff --git a/patches.drivers/0187-md-raid1-factor-out-flush_bio_list.patch b/patches.drivers/0187-md-raid1-factor-out-flush_bio_list.patch deleted file mode 100644 index d515ce2dd3..0000000000 --- a/patches.drivers/0187-md-raid1-factor-out-flush_bio_list.patch +++ /dev/null @@ -1,114 +0,0 @@ -From: NeilBrown <neilb@suse.com> -Date: Wed, 5 Apr 2017 14:05:51 +1000 -Subject: [PATCH] md/raid1: factor out flush_bio_list() -Git-commit: 673ca68d93879b9ffbbed874c9e70ca6e37cab15 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -flush_pending_writes() and raid1_unplug() each contain identical -copies of a fairly large slab of code. So factor that out into -new flush_bio_list() to simplify maintenance. - -Signed-off-by: NeilBrown <neilb@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid1.c | 66 +++++++++++++++++++++--------------------------------- - 1 file changed, 26 insertions(+), 40 deletions(-) - -diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c -index 29a9aa9..57611f4 100644 ---- a/drivers/md/raid1.c -+++ b/drivers/md/raid1.c -@@ -787,6 +787,30 @@ static int raid1_congested(struct mddev *mddev, int bits) - return ret; - } - -+static void flush_bio_list(struct r1conf *conf, struct bio *bio) -+{ -+ /* flush any pending bitmap writes to disk before proceeding w/ I/O */ -+ bitmap_unplug(conf->mddev->bitmap); -+ wake_up(&conf->wait_barrier); -+ -+ while (bio) { /* submit pending writes */ -+ struct bio *next = bio->bi_next; -+ struct md_rdev *rdev = (void*)bio->bi_bdev; -+ bio->bi_next = NULL; -+ bio->bi_bdev = rdev->bdev; -+ if (test_bit(Faulty, &rdev->flags)) { -+ bio->bi_error = -EIO; -+ bio_endio(bio); -+ } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && -+ !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) -+ /* Just ignore it */ -+ bio_endio(bio); -+ else -+ generic_make_request(bio); -+ bio = next; -+ } -+} -+ - static void flush_pending_writes(struct r1conf *conf) - { - /* Any writes that have been queued but are awaiting -@@ -799,27 +823,7 @@ static void flush_pending_writes(struct r1conf *conf) - bio = bio_list_get(&conf->pending_bio_list); - conf->pending_count = 0; - spin_unlock_irq(&conf->device_lock); -- /* flush any pending bitmap writes to -- * disk before proceeding w/ I/O */ -- bitmap_unplug(conf->mddev->bitmap); -- wake_up(&conf->wait_barrier); -- -- while (bio) { /* submit pending writes */ -- struct bio *next = bio->bi_next; -- struct md_rdev *rdev = (void*)bio->bi_bdev; -- bio->bi_next = NULL; -- bio->bi_bdev = rdev->bdev; -- if (test_bit(Faulty, &rdev->flags)) { -- bio->bi_error = -EIO; -- bio_endio(bio); -- } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && -- !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) -- /* Just ignore it */ -- bio_endio(bio); -- else -- generic_make_request(bio); -- bio = next; -- } -+ flush_bio_list(conf, bio); - } else - spin_unlock_irq(&conf->device_lock); - } -@@ -1152,25 +1156,7 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule) - - /* we aren't scheduling, so we can do the write-out directly. */ - bio = bio_list_get(&plug->pending); -- bitmap_unplug(mddev->bitmap); -- wake_up(&conf->wait_barrier); -- -- while (bio) { /* submit pending writes */ -- struct bio *next = bio->bi_next; -- struct md_rdev *rdev = (void*)bio->bi_bdev; -- bio->bi_next = NULL; -- bio->bi_bdev = rdev->bdev; -- if (test_bit(Faulty, &rdev->flags)) { -- bio->bi_error = -EIO; -- bio_endio(bio); -- } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && -- !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) -- /* Just ignore it */ -- bio_endio(bio); -- else -- generic_make_request(bio); -- bio = next; -- } -+ flush_bio_list(conf, bio); - kfree(plug); - } - --- -2.10.2 - diff --git a/patches.drivers/0188-md-raid10-simplify-the-splitting-of-requests.patch b/patches.drivers/0188-md-raid10-simplify-the-splitting-of-requests.patch deleted file mode 100644 index de94693b4c..0000000000 --- a/patches.drivers/0188-md-raid10-simplify-the-splitting-of-requests.patch +++ /dev/null @@ -1,345 +0,0 @@ -From: NeilBrown <neilb@suse.com> -Date: Wed, 5 Apr 2017 14:05:51 +1000 -Subject: [PATCH] md/raid10: simplify the splitting of requests. -Git-commit: fc9977dd069e4f82fcacb262652117c488647319 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -raid10 splits requests in two different ways for two different -reasons. - -First, bio_split() is used to ensure the bio fits with a chunk. -Second, multiple r10bio structures are allocated to represent the -different sections that need to go to different devices, to avoid -known bad blocks. - -This can be simplified to just use bio_split() once, and not to use -multiple r10bios. -We delay the split until we know a maximum bio size that can -be handled with a single r10bio, and then split the bio and queue -the remainder for later handling. - -As with raid1, we allocate a new bio_set to help with the splitting. -It is not correct to use fs_bio_set in a device driver. - -Signed-off-by: NeilBrown <neilb@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid10.c | 164 ++++++++++++++++------------------------------------ - drivers/md/raid10.h | 1 + - 2 files changed, 51 insertions(+), 114 deletions(-) - -diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c -index e055ec9..41845ba 100644 ---- a/drivers/md/raid10.c -+++ b/drivers/md/raid10.c -@@ -1127,7 +1127,6 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, - struct bio *read_bio; - const int op = bio_op(bio); - const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); -- int sectors_handled; - int max_sectors; - sector_t sectors; - struct md_rdev *rdev; -@@ -1140,7 +1139,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, - */ - wait_barrier(conf); - -- sectors = bio_sectors(bio); -+ sectors = r10_bio->sectors; - while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && - bio->bi_iter.bi_sector < conf->reshape_progress && - bio->bi_iter.bi_sector + sectors > conf->reshape_progress) { -@@ -1157,17 +1156,23 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, - wait_barrier(conf); - } - --read_again: - rdev = read_balance(conf, r10_bio, &max_sectors); - if (!rdev) { - raid_end_bio_io(r10_bio); - return; - } -+ if (max_sectors < bio_sectors(bio)) { -+ struct bio *split = bio_split(bio, max_sectors, -+ GFP_NOIO, conf->bio_split); -+ bio_chain(split, bio); -+ generic_make_request(bio); -+ bio = split; -+ r10_bio->master_bio = bio; -+ r10_bio->sectors = max_sectors; -+ } - slot = r10_bio->read_slot; - - read_bio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); -- bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector, -- max_sectors); - - r10_bio->devs[slot].bio = read_bio; - r10_bio->devs[slot].rdev = rdev; -@@ -1186,40 +1191,13 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, - trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev), - read_bio, disk_devt(mddev->gendisk), - r10_bio->sector); -- if (max_sectors < r10_bio->sectors) { -- /* -- * Could not read all from this device, so we will need another -- * r10_bio. -- */ -- sectors_handled = (r10_bio->sector + max_sectors -- - bio->bi_iter.bi_sector); -- r10_bio->sectors = max_sectors; -- inc_pending(conf); -- bio_inc_remaining(bio); -- /* -- * Cannot call generic_make_request directly as that will be -- * queued in __generic_make_request and subsequent -- * mempool_alloc might block waiting for it. so hand bio over -- * to raid10d. -- */ -- reschedule_retry(r10_bio); -- -- r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); -- -- r10_bio->master_bio = bio; -- r10_bio->sectors = bio_sectors(bio) - sectors_handled; -- r10_bio->state = 0; -- r10_bio->mddev = mddev; -- r10_bio->sector = bio->bi_iter.bi_sector + sectors_handled; -- goto read_again; -- } else -- generic_make_request(read_bio); -+ generic_make_request(read_bio); - return; - } - - static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, - struct bio *bio, bool replacement, -- int n_copy, int max_sectors) -+ int n_copy) - { - const int op = bio_op(bio); - const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); -@@ -1243,7 +1221,6 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, - rdev = conf->mirrors[devnum].rdev; - - mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); -- bio_trim(mbio, r10_bio->sector - bio->bi_iter.bi_sector, max_sectors); - if (replacement) - r10_bio->devs[n_copy].repl_bio = mbio; - else -@@ -1294,7 +1271,6 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, - int i; - struct md_rdev *blocked_rdev; - sector_t sectors; -- int sectors_handled; - int max_sectors; - - md_write_start(mddev, bio); -@@ -1306,7 +1282,7 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, - */ - wait_barrier(conf); - -- sectors = bio_sectors(bio); -+ sectors = r10_bio->sectors; - while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && - bio->bi_iter.bi_sector < conf->reshape_progress && - bio->bi_iter.bi_sector + sectors > conf->reshape_progress) { -@@ -1476,44 +1452,29 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, - - if (max_sectors < r10_bio->sectors) - r10_bio->sectors = max_sectors; -- sectors_handled = r10_bio->sector + max_sectors - -- bio->bi_iter.bi_sector; -+ -+ if (r10_bio->sectors < bio_sectors(bio)) { -+ struct bio *split = bio_split(bio, r10_bio->sectors, -+ GFP_NOIO, conf->bio_split); -+ bio_chain(split, bio); -+ generic_make_request(bio); -+ bio = split; -+ r10_bio->master_bio = bio; -+ } - - atomic_set(&r10_bio->remaining, 1); - bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0); - - for (i = 0; i < conf->copies; i++) { - if (r10_bio->devs[i].bio) -- raid10_write_one_disk(mddev, r10_bio, bio, false, -- i, max_sectors); -+ raid10_write_one_disk(mddev, r10_bio, bio, false, i); - if (r10_bio->devs[i].repl_bio) -- raid10_write_one_disk(mddev, r10_bio, bio, true, -- i, max_sectors); -- } -- -- /* Don't remove the bias on 'remaining' (one_write_done) until -- * after checking if we need to go around again. -- */ -- -- if (sectors_handled < bio_sectors(bio)) { -- /* We need another r10_bio and it needs to be counted */ -- inc_pending(conf); -- bio_inc_remaining(bio); -- one_write_done(r10_bio); -- r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); -- -- r10_bio->master_bio = bio; -- r10_bio->sectors = bio_sectors(bio) - sectors_handled; -- -- r10_bio->mddev = mddev; -- r10_bio->sector = bio->bi_iter.bi_sector + sectors_handled; -- r10_bio->state = 0; -- goto retry_write; -+ raid10_write_one_disk(mddev, r10_bio, bio, true, i); - } - one_write_done(r10_bio); - } - --static void __make_request(struct mddev *mddev, struct bio *bio) -+static void __make_request(struct mddev *mddev, struct bio *bio, int sectors) - { - struct r10conf *conf = mddev->private; - struct r10bio *r10_bio; -@@ -1521,7 +1482,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio) - r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); - - r10_bio->master_bio = bio; -- r10_bio->sectors = bio_sectors(bio); -+ r10_bio->sectors = sectors; - - r10_bio->mddev = mddev; - r10_bio->sector = bio->bi_iter.bi_sector; -@@ -1538,54 +1499,26 @@ static void raid10_make_request(struct mddev *mddev, struct bio *bio) - struct r10conf *conf = mddev->private; - sector_t chunk_mask = (conf->geo.chunk_mask & conf->prev.chunk_mask); - int chunk_sects = chunk_mask + 1; -- -- struct bio *split; -+ int sectors = bio_sectors(bio); - - if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { - md_flush_request(mddev, bio); - return; - } - -- do { -- -- /* -- * If this request crosses a chunk boundary, we need to split -- * it. -- */ -- if (unlikely((bio->bi_iter.bi_sector & chunk_mask) + -- bio_sectors(bio) > chunk_sects -- && (conf->geo.near_copies < conf->geo.raid_disks -- || conf->prev.near_copies < -- conf->prev.raid_disks))) { -- split = bio_split(bio, chunk_sects - -- (bio->bi_iter.bi_sector & -- (chunk_sects - 1)), -- GFP_NOIO, fs_bio_set); -- bio_chain(split, bio); -- } else { -- split = bio; -- } -- -- /* -- * If a bio is splitted, the first part of bio will pass -- * barrier but the bio is queued in current->bio_list (see -- * generic_make_request). If there is a raise_barrier() called -- * here, the second part of bio can't pass barrier. But since -- * the first part bio isn't dispatched to underlaying disks -- * yet, the barrier is never released, hence raise_barrier will -- * alays wait. We have a deadlock. -- * Note, this only happens in read path. For write path, the -- * first part of bio is dispatched in a schedule() call -- * (because of blk plug) or offloaded to raid10d. -- * Quitting from the function immediately can change the bio -- * order queued in bio_list and avoid the deadlock. -- */ -- __make_request(mddev, split); -- if (split != bio && bio_data_dir(bio) == READ) { -- generic_make_request(bio); -- break; -- } -- } while (split != bio); -+ /* -+ * If this request crosses a chunk boundary, we need to split -+ * it. -+ */ -+ if (unlikely((bio->bi_iter.bi_sector & chunk_mask) + -+ sectors > chunk_sects -+ && (conf->geo.near_copies < conf->geo.raid_disks -+ || conf->prev.near_copies < -+ conf->prev.raid_disks))) -+ sectors = chunk_sects - -+ (bio->bi_iter.bi_sector & -+ (chunk_sects - 1)); -+ __make_request(mddev, bio, sectors); - - /* In case raid10d snuck in to freeze_array */ - wake_up(&conf->wait_barrier); -@@ -2873,13 +2806,8 @@ static void raid10d(struct md_thread *thread) - recovery_request_write(mddev, r10_bio); - else if (test_bit(R10BIO_ReadError, &r10_bio->state)) - handle_read_error(mddev, r10_bio); -- else { -- /* just a partial read to be scheduled from a -- * separate context -- */ -- int slot = r10_bio->read_slot; -- generic_make_request(r10_bio->devs[slot].bio); -- } -+ else -+ WARN_ON_ONCE(1); - - cond_resched(); - if (mddev->sb_flags & ~(1<<MD_SB_CHANGE_PENDING)) -@@ -3652,6 +3580,10 @@ static struct r10conf *setup_conf(struct mddev *mddev) - if (!conf->r10bio_pool) - goto out; - -+ conf->bio_split = bioset_create(BIO_POOL_SIZE, 0); -+ if (!conf->bio_split) -+ goto out; -+ - calc_sectors(conf, mddev->dev_sectors); - if (mddev->reshape_position == MaxSector) { - conf->prev = conf->geo; -@@ -3689,6 +3621,8 @@ static struct r10conf *setup_conf(struct mddev *mddev) - mempool_destroy(conf->r10bio_pool); - kfree(conf->mirrors); - safe_put_page(conf->tmppage); -+ if (conf->bio_split) -+ bioset_free(conf->bio_split); - kfree(conf); - } - return ERR_PTR(err); -@@ -3899,6 +3833,8 @@ static void raid10_free(struct mddev *mddev, void *priv) - kfree(conf->mirrors); - kfree(conf->mirrors_old); - kfree(conf->mirrors_new); -+ if (conf->bio_split) -+ bioset_free(conf->bio_split); - kfree(conf); - } - -diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h -index 3162615..735ce1a 100644 ---- a/drivers/md/raid10.h -+++ b/drivers/md/raid10.h -@@ -82,6 +82,7 @@ struct r10conf { - mempool_t *r10bio_pool; - mempool_t *r10buf_pool; - struct page *tmppage; -+ struct bio_set *bio_split; - - /* When taking over an array from a different personality, we store - * the new thread here until we fully activate the array. --- -2.10.2 - diff --git a/patches.drivers/0189-md-raid10-simplify-handle_read_error.patch b/patches.drivers/0189-md-raid10-simplify-handle_read_error.patch deleted file mode 100644 index 546fe0b56a..0000000000 --- a/patches.drivers/0189-md-raid10-simplify-handle_read_error.patch +++ /dev/null @@ -1,227 +0,0 @@ -From: NeilBrown <neilb@suse.com> -Date: Wed, 5 Apr 2017 14:05:51 +1000 -Subject: [PATCH] md/raid10: simplify handle_read_error() -Git-commit: 545250f2480911f053b092d4229d9f83a9dff222 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -handle_read_error() duplicates a lot of the work that raid10_read_request() -does, so it makes sense to just use that function. - -handle_read_error() relies on the same r10bio being re-used so that, -in the case of a read-only array, setting IO_BLOCKED in r1bio->devs[].bio -ensures read_balance() won't re-use that device. -So when called from raid10_make_request() we clear that array, but not -when called from handle_read_error(). - -Two parts of handle_read_error() that need to be preserved are the warning -message it prints, so they are conditionally added to -raid10_read_request(). If the failing rdev can be found, messages -are printed. Otherwise they aren't. - -Not that as rdev_dec_pending() has already been called on the failing -rdev, we need to use rcu_read_lock() to get a new reference from -the conf. We only use this to get the name of the failing block device. - -With this change, we no longer need inc_pending(). - -Signed-off-by: NeilBrown <neilb@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid10.c | 122 ++++++++++++++++++++-------------------------------- - 1 file changed, 47 insertions(+), 75 deletions(-) - -diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c -index 41845ba..4167091 100644 ---- a/drivers/md/raid10.c -+++ b/drivers/md/raid10.c -@@ -1008,15 +1008,6 @@ static void wait_barrier(struct r10conf *conf) - spin_unlock_irq(&conf->resync_lock); - } - --static void inc_pending(struct r10conf *conf) --{ -- /* The current request requires multiple r10_bio, so -- * we need to increment the pending count. -- */ -- WARN_ON(!atomic_read(&conf->nr_pending)); -- atomic_inc(&conf->nr_pending); --} -- - static void allow_barrier(struct r10conf *conf) - { - if ((atomic_dec_and_test(&conf->nr_pending)) || -@@ -1130,8 +1121,38 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, - int max_sectors; - sector_t sectors; - struct md_rdev *rdev; -- int slot; -+ char b[BDEVNAME_SIZE]; -+ int slot = r10_bio->read_slot; -+ struct md_rdev *err_rdev = NULL; -+ gfp_t gfp = GFP_NOIO; -+ -+ if (r10_bio->devs[slot].rdev) { -+ /* -+ * This is an error retry, but we cannot -+ * safely dereference the rdev in the r10_bio, -+ * we must use the one in conf. -+ * If it has already been disconnected (unlikely) -+ * we lose the device name in error messages. -+ */ -+ int disk; -+ /* -+ * As we are blocking raid10, it is a little safer to -+ * use __GFP_HIGH. -+ */ -+ gfp = GFP_NOIO | __GFP_HIGH; - -+ rcu_read_lock(); -+ disk = r10_bio->devs[slot].devnum; -+ err_rdev = rcu_dereference(conf->mirrors[disk].rdev); -+ if (err_rdev) -+ bdevname(err_rdev->bdev, b); -+ else { -+ strcpy(b, "???"); -+ /* This never gets dereferenced */ -+ err_rdev = r10_bio->devs[slot].rdev; -+ } -+ rcu_read_unlock(); -+ } - /* - * Register the new request and wait if the reconstruction - * thread has put up a bar for new requests. -@@ -1158,12 +1179,22 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, - - rdev = read_balance(conf, r10_bio, &max_sectors); - if (!rdev) { -+ if (err_rdev) { -+ pr_crit_ratelimited("md/raid10:%s: %s: unrecoverable I/O read error for block %llu\n", -+ mdname(mddev), b, -+ (unsigned long long)r10_bio->sector); -+ } - raid_end_bio_io(r10_bio); - return; - } -+ if (err_rdev) -+ pr_err_ratelimited("md/raid10:%s: %s: redirecting sector %llu to another mirror\n", -+ mdname(mddev), -+ bdevname(rdev->bdev, b), -+ (unsigned long long)r10_bio->sector); - if (max_sectors < bio_sectors(bio)) { - struct bio *split = bio_split(bio, max_sectors, -- GFP_NOIO, conf->bio_split); -+ gfp, conf->bio_split); - bio_chain(split, bio); - generic_make_request(bio); - bio = split; -@@ -1172,7 +1203,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, - } - slot = r10_bio->read_slot; - -- read_bio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); -+ read_bio = bio_clone_fast(bio, gfp, mddev->bio_set); - - r10_bio->devs[slot].bio = read_bio; - r10_bio->devs[slot].rdev = rdev; -@@ -1487,6 +1518,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors) - r10_bio->mddev = mddev; - r10_bio->sector = bio->bi_iter.bi_sector; - r10_bio->state = 0; -+ memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * conf->copies); - - if (bio_data_dir(bio) == READ) - raid10_read_request(mddev, bio, r10_bio); -@@ -2556,9 +2588,6 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio) - struct bio *bio; - struct r10conf *conf = mddev->private; - struct md_rdev *rdev = r10_bio->devs[slot].rdev; -- char b[BDEVNAME_SIZE]; -- unsigned long do_sync; -- int max_sectors; - dev_t bio_dev; - sector_t bio_last_sector; - -@@ -2571,7 +2600,6 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio) - * frozen. - */ - bio = r10_bio->devs[slot].bio; -- bdevname(bio->bi_bdev, b); - bio_dev = bio->bi_bdev->bd_dev; - bio_last_sector = r10_bio->devs[slot].addr + rdev->data_offset + r10_bio->sectors; - bio_put(bio); -@@ -2587,65 +2615,9 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio) - md_error(mddev, rdev); - - rdev_dec_pending(rdev, mddev); -- --read_more: -- rdev = read_balance(conf, r10_bio, &max_sectors); -- if (rdev == NULL) { -- pr_crit_ratelimited("md/raid10:%s: %s: unrecoverable I/O read error for block %llu\n", -- mdname(mddev), b, -- (unsigned long long)r10_bio->sector); -- raid_end_bio_io(r10_bio); -- return; -- } -- -- do_sync = (r10_bio->master_bio->bi_opf & REQ_SYNC); -- slot = r10_bio->read_slot; -- pr_err_ratelimited("md/raid10:%s: %s: redirecting sector %llu to another mirror\n", -- mdname(mddev), -- bdevname(rdev->bdev, b), -- (unsigned long long)r10_bio->sector); -- bio = bio_clone_fast(r10_bio->master_bio, GFP_NOIO, mddev->bio_set); -- bio_trim(bio, r10_bio->sector - bio->bi_iter.bi_sector, max_sectors); -- r10_bio->devs[slot].bio = bio; -- r10_bio->devs[slot].rdev = rdev; -- bio->bi_iter.bi_sector = r10_bio->devs[slot].addr -- + choose_data_offset(r10_bio, rdev); -- bio->bi_bdev = rdev->bdev; -- bio_set_op_attrs(bio, REQ_OP_READ, do_sync); -- if (test_bit(FailFast, &rdev->flags) && -- test_bit(R10BIO_FailFast, &r10_bio->state)) -- bio->bi_opf |= MD_FAILFAST; -- bio->bi_private = r10_bio; -- bio->bi_end_io = raid10_end_read_request; -- trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), -- bio, bio_dev, -- bio_last_sector - r10_bio->sectors); -- -- if (max_sectors < r10_bio->sectors) { -- /* Drat - have to split this up more */ -- struct bio *mbio = r10_bio->master_bio; -- int sectors_handled = -- r10_bio->sector + max_sectors -- - mbio->bi_iter.bi_sector; -- r10_bio->sectors = max_sectors; -- bio_inc_remaining(mbio); -- inc_pending(conf); -- generic_make_request(bio); -- -- r10_bio = mempool_alloc(conf->r10bio_pool, -- GFP_NOIO); -- r10_bio->master_bio = mbio; -- r10_bio->sectors = bio_sectors(mbio) - sectors_handled; -- r10_bio->state = 0; -- set_bit(R10BIO_ReadError, -- &r10_bio->state); -- r10_bio->mddev = mddev; -- r10_bio->sector = mbio->bi_iter.bi_sector -- + sectors_handled; -- -- goto read_more; -- } else -- generic_make_request(bio); -+ allow_barrier(conf); -+ r10_bio->state = 0; -+ raid10_read_request(mddev, r10_bio->master_bio, r10_bio); - } - - static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) --- -2.10.2 - diff --git a/patches.drivers/0190-md-raid5-make-chunk_aligned_read-split-bios-more-cle.patch b/patches.drivers/0190-md-raid5-make-chunk_aligned_read-split-bios-more-cle.patch deleted file mode 100644 index e53ca4ae05..0000000000 --- a/patches.drivers/0190-md-raid5-make-chunk_aligned_read-split-bios-more-cle.patch +++ /dev/null @@ -1,103 +0,0 @@ -From: NeilBrown <neilb@suse.com> -Date: Wed, 5 Apr 2017 14:05:51 +1000 -Subject: [PATCH] md/raid5: make chunk_aligned_read() split bios more cleanly. -Git-commit: dd7a8f5dee81ffb1794df1103f07c63fd4f1d766 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -chunk_aligned_read() currently uses fs_bio_set - which is meant for -filesystems to use - and loops if multiple splits are needed, which is -not best practice. -As this is only used for READ requests, not writes, it is unlikely -to cause a problem. However it is best to be consistent in how -we split bios, and to follow the pattern used in raid1/raid10. - -So create a private bioset, bio_split, and use it to perform a single -split, submitting the remainder to generic_make_request() for later -processing. - -Signed-off-by: NeilBrown <neilb@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid5.c | 33 +++++++++++++++++---------------- - drivers/md/raid5.h | 1 + - 2 files changed, 18 insertions(+), 16 deletions(-) - -diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c -index f3692ff..356cd9c 100644 ---- a/drivers/md/raid5.c -+++ b/drivers/md/raid5.c -@@ -5246,24 +5246,20 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio) - static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio) - { - struct bio *split; -+ sector_t sector = raid_bio->bi_iter.bi_sector; -+ unsigned chunk_sects = mddev->chunk_sectors; -+ unsigned sectors = chunk_sects - (sector & (chunk_sects-1)); - -- do { -- sector_t sector = raid_bio->bi_iter.bi_sector; -- unsigned chunk_sects = mddev->chunk_sectors; -- unsigned sectors = chunk_sects - (sector & (chunk_sects-1)); -- -- if (sectors < bio_sectors(raid_bio)) { -- split = bio_split(raid_bio, sectors, GFP_NOIO, fs_bio_set); -- bio_chain(split, raid_bio); -- } else -- split = raid_bio; -+ if (sectors < bio_sectors(raid_bio)) { -+ struct r5conf *conf = mddev->private; -+ split = bio_split(raid_bio, sectors, GFP_NOIO, conf->bio_split); -+ bio_chain(split, raid_bio); -+ generic_make_request(raid_bio); -+ raid_bio = split; -+ } - -- if (!raid5_read_one_chunk(mddev, split)) { -- if (split != raid_bio) -- generic_make_request(raid_bio); -- return split; -- } -- } while (split != raid_bio); -+ if (!raid5_read_one_chunk(mddev, raid_bio)) -+ return raid_bio; - - return NULL; - } -@@ -6747,6 +6743,8 @@ static void free_conf(struct r5conf *conf) - if (conf->disks[i].extra_page) - put_page(conf->disks[i].extra_page); - kfree(conf->disks); -+ if (conf->bio_split) -+ bioset_free(conf->bio_split); - kfree(conf->stripe_hashtbl); - kfree(conf->pending_data); - kfree(conf); -@@ -6922,6 +6920,9 @@ static struct r5conf *setup_conf(struct mddev *mddev) - goto abort; - } - -+ conf->bio_split = bioset_create(BIO_POOL_SIZE, 0); -+ if (!conf->bio_split) -+ goto abort; - conf->mddev = mddev; - - if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) -diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h -index cdc7f92..625c7f1 100644 ---- a/drivers/md/raid5.h -+++ b/drivers/md/raid5.h -@@ -646,6 +646,7 @@ struct r5conf { - int pool_size; /* number of disks in stripeheads in pool */ - spinlock_t device_lock; - struct disk_info *disks; -+ struct bio_set *bio_split; - - /* When taking over an array from a different personality, we store - * the new thread here until we fully activate the array. --- -2.10.2 - diff --git a/patches.drivers/0191-md-linear-improve-bio-splitting.patch b/patches.drivers/0191-md-linear-improve-bio-splitting.patch deleted file mode 100644 index 8a04ec9836..0000000000 --- a/patches.drivers/0191-md-linear-improve-bio-splitting.patch +++ /dev/null @@ -1,121 +0,0 @@ -From: NeilBrown <neilb@suse.com> -Date: Wed, 5 Apr 2017 14:05:51 +1000 -Subject: [PATCH] md/linear: improve bio splitting. -Git-commit: 868f604b1de2ddbdcf194d7792a0d91d7da719c1 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -linear_make_request() uses fs_bio_set, which is meant for filesystems -to use, and loops, possible allocating from the same bio set multiple -times. -These behaviors can theoretically cause deadlocks, though as -linear requests are hardly ever split, it is unlikely in practice. - -Change to use mddev->bio_set - otherwise unused for linear, and submit -the tail of a split request to generic_make_request() for it to -handle. - -Signed-off-by: NeilBrown <neilb@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/linear.c | 73 +++++++++++++++++++++++++---------------------------- - 1 file changed, 34 insertions(+), 39 deletions(-) - -diff --git a/drivers/md/linear.c b/drivers/md/linear.c -index 3e38e02..f16316f 100644 ---- a/drivers/md/linear.c -+++ b/drivers/md/linear.c -@@ -249,53 +249,48 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio) - { - char b[BDEVNAME_SIZE]; - struct dev_info *tmp_dev; -- struct bio *split; - sector_t start_sector, end_sector, data_offset; -+ sector_t bio_sector = bio->bi_iter.bi_sector; - - if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { - md_flush_request(mddev, bio); - return; - } - -- do { -- sector_t bio_sector = bio->bi_iter.bi_sector; -- tmp_dev = which_dev(mddev, bio_sector); -- start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors; -- end_sector = tmp_dev->end_sector; -- data_offset = tmp_dev->rdev->data_offset; -- bio->bi_bdev = tmp_dev->rdev->bdev; -- -- if (unlikely(bio_sector >= end_sector || -- bio_sector < start_sector)) -- goto out_of_bounds; -- -- if (unlikely(bio_end_sector(bio) > end_sector)) { -- /* This bio crosses a device boundary, so we have to -- * split it. -- */ -- split = bio_split(bio, end_sector - bio_sector, -- GFP_NOIO, fs_bio_set); -- bio_chain(split, bio); -- } else { -- split = bio; -- } -+ tmp_dev = which_dev(mddev, bio_sector); -+ start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors; -+ end_sector = tmp_dev->end_sector; -+ data_offset = tmp_dev->rdev->data_offset; -+ -+ if (unlikely(bio_sector >= end_sector || -+ bio_sector < start_sector)) -+ goto out_of_bounds; -+ -+ if (unlikely(bio_end_sector(bio) > end_sector)) { -+ /* This bio crosses a device boundary, so we have to split it */ -+ struct bio *split = bio_split(bio, end_sector - bio_sector, -+ GFP_NOIO, mddev->bio_set); -+ bio_chain(split, bio); -+ generic_make_request(bio); -+ bio = split; -+ } - -- split->bi_iter.bi_sector = split->bi_iter.bi_sector - -- start_sector + data_offset; -- -- if (unlikely((bio_op(split) == REQ_OP_DISCARD) && -- !blk_queue_discard(bdev_get_queue(split->bi_bdev)))) { -- /* Just ignore it */ -- bio_endio(split); -- } else { -- if (mddev->gendisk) -- trace_block_bio_remap(bdev_get_queue(split->bi_bdev), -- split, disk_devt(mddev->gendisk), -- bio_sector); -- mddev_check_writesame(mddev, split); -- generic_make_request(split); -- } -- } while (split != bio); -+ bio->bi_bdev = tmp_dev->rdev->bdev; -+ bio->bi_iter.bi_sector = bio->bi_iter.bi_sector - -+ start_sector + data_offset; -+ -+ if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && -+ !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) { -+ /* Just ignore it */ -+ bio_endio(bio); -+ } else { -+ if (mddev->gendisk) -+ trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), -+ bio, disk_devt(mddev->gendisk), -+ bio_sector); -+ mddev_check_writesame(mddev, bio); -+ generic_make_request(bio); -+ } - return; - - out_of_bounds: --- -2.10.2 - diff --git a/patches.drivers/0192-md-raid0-fix-up-bio-splitting.patch b/patches.drivers/0192-md-raid0-fix-up-bio-splitting.patch deleted file mode 100644 index c351550f81..0000000000 --- a/patches.drivers/0192-md-raid0-fix-up-bio-splitting.patch +++ /dev/null @@ -1,122 +0,0 @@ -From: NeilBrown <neilb@suse.com> -Date: Wed, 5 Apr 2017 14:05:51 +1000 -Subject: [PATCH] md/raid0: fix up bio splitting. -Git-commit: f00d7c85be9e39752ef87047a019dfc4cefbd299 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -raid0_make_request() should use a private bio_set rather than the -shared fs_bio_set, which is only meant for filesystems to use. - -raid0_make_request() shouldn't loop around using the bio_set -multiple times as that can deadlock. - -So use mddev->bio_set and pass the tail to generic_make_request() -instead of looping on it. - -Signed-off-by: NeilBrown <neilb@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid0.c | 73 +++++++++++++++++++++++++++--------------------------- - 1 file changed, 37 insertions(+), 36 deletions(-) - -diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c -index 56f70c3..e777e48 100644 ---- a/drivers/md/raid0.c -+++ b/drivers/md/raid0.c -@@ -462,52 +462,53 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio) - { - struct strip_zone *zone; - struct md_rdev *tmp_dev; -- struct bio *split; -+ sector_t bio_sector; -+ sector_t sector; -+ unsigned chunk_sects; -+ unsigned sectors; - - if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { - md_flush_request(mddev, bio); - return; - } - -- do { -- sector_t bio_sector = bio->bi_iter.bi_sector; -- sector_t sector = bio_sector; -- unsigned chunk_sects = mddev->chunk_sectors; -+ bio_sector = bio->bi_iter.bi_sector; -+ sector = bio_sector; -+ chunk_sects = mddev->chunk_sectors; - -- unsigned sectors = chunk_sects - -- (likely(is_power_of_2(chunk_sects)) -- ? (sector & (chunk_sects-1)) -- : sector_div(sector, chunk_sects)); -+ sectors = chunk_sects - -+ (likely(is_power_of_2(chunk_sects)) -+ ? (sector & (chunk_sects-1)) -+ : sector_div(sector, chunk_sects)); - -- /* Restore due to sector_div */ -- sector = bio_sector; -+ /* Restore due to sector_div */ -+ sector = bio_sector; - -- if (sectors < bio_sectors(bio)) { -- split = bio_split(bio, sectors, GFP_NOIO, fs_bio_set); -- bio_chain(split, bio); -- } else { -- split = bio; -- } -+ if (sectors < bio_sectors(bio)) { -+ struct bio *split = bio_split(bio, sectors, GFP_NOIO, mddev->bio_set); -+ bio_chain(split, bio); -+ generic_make_request(bio); -+ bio = split; -+ } - -- zone = find_zone(mddev->private, §or); -- tmp_dev = map_sector(mddev, zone, sector, §or); -- split->bi_bdev = tmp_dev->bdev; -- split->bi_iter.bi_sector = sector + zone->dev_start + -- tmp_dev->data_offset; -- -- if (unlikely((bio_op(split) == REQ_OP_DISCARD) && -- !blk_queue_discard(bdev_get_queue(split->bi_bdev)))) { -- /* Just ignore it */ -- bio_endio(split); -- } else { -- if (mddev->gendisk) -- trace_block_bio_remap(bdev_get_queue(split->bi_bdev), -- split, disk_devt(mddev->gendisk), -- bio_sector); -- mddev_check_writesame(mddev, split); -- generic_make_request(split); -- } -- } while (split != bio); -+ zone = find_zone(mddev->private, §or); -+ tmp_dev = map_sector(mddev, zone, sector, §or); -+ bio->bi_bdev = tmp_dev->bdev; -+ bio->bi_iter.bi_sector = sector + zone->dev_start + -+ tmp_dev->data_offset; -+ -+ if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && -+ !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) { -+ /* Just ignore it */ -+ bio_endio(bio); -+ } else { -+ if (mddev->gendisk) -+ trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), -+ bio, disk_devt(mddev->gendisk), -+ bio_sector); -+ mddev_check_writesame(mddev, bio); -+ generic_make_request(bio); -+ } - } - - static void raid0_status(struct seq_file *seq, struct mddev *mddev) --- -2.10.2 - diff --git a/patches.drivers/0193-md-allow-creation-of-mdNNN-arrays-via-md_mod-paramet.patch b/patches.drivers/0193-md-allow-creation-of-mdNNN-arrays-via-md_mod-paramet.patch deleted file mode 100644 index 9d975e020b..0000000000 --- a/patches.drivers/0193-md-allow-creation-of-mdNNN-arrays-via-md_mod-paramet.patch +++ /dev/null @@ -1,124 +0,0 @@ -From: NeilBrown <neilb@suse.com> -Date: Wed, 12 Apr 2017 16:26:13 +1000 -Subject: [PATCH] md: allow creation of mdNNN arrays via - md_mod/parameters/new_array -Git-commit: 039b7225e6e98783a7a7e79c52b29c437f29967d -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -The intention when creating the "new_array" parameter and the -possibility of having array names line "md_HOME" was to transition -away from the old way of creating arrays and to eventually only use -this new way. - -The "old" way of creating array is to create a device node in /dev -and then open it. The act of opening creates the array. -This is problematic because sometimes the device node can be opened -when we don't want to create an array. This can easily happen -when some rule triggered by udev looks at a device as it is being -destroyed. The node in /dev continues to exist for a short period -after an array is stopped, and opening it during this time recreates -the array (as an inactive array). - -Unfortunately no clear plan for the transition was created. It is now -time to fix that. - -This patch allows devices with numeric names, like "md999" to be -created by writing to "new_array". This will only work if the minor -number given is not already in use. This will allow mdadm to -support the creation of arrays with numbers > 511 (currently not -possible) by writing to new_array. -mdadm can, at some point, use this approach to create *all* arrays, -which will allow the transition to only using the new-way. - -Signed-off-by: NeilBrown <neilb@suse.com> -Acted-by: Coly Li <colyli@suse.de> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/md.c | 36 ++++++++++++++++++++++++++++++------ - 1 file changed, 30 insertions(+), 6 deletions(-) - -diff --git a/drivers/md/md.c b/drivers/md/md.c -index 5022be1..554bf213 100644 ---- a/drivers/md/md.c -+++ b/drivers/md/md.c -@@ -5162,6 +5162,15 @@ static void no_op(struct percpu_ref *r) {} - - static int md_alloc(dev_t dev, char *name) - { -+ /* -+ * If dev is zero, name is the name of a device to allocate with -+ * an arbitrary minor number. It will be "md_???" -+ * If dev is non-zero it must be a device number with a MAJOR of -+ * MD_MAJOR or mdp_major. In this case, if "name" is NULL, then -+ * the device is being created by opening a node in /dev. -+ * If "name" is not NULL, the device is being created by -+ * writing to /sys/module/md_mod/parameters/new_array. -+ */ - static DEFINE_MUTEX(disks_mutex); - struct mddev *mddev = mddev_find(dev); - struct gendisk *disk; -@@ -5187,7 +5196,7 @@ static int md_alloc(dev_t dev, char *name) - if (mddev->gendisk) - goto abort; - -- if (name) { -+ if (name && !dev) { - /* Need to ensure that 'name' is not a duplicate. - */ - struct mddev *mddev2; -@@ -5201,6 +5210,11 @@ static int md_alloc(dev_t dev, char *name) - } - spin_unlock(&all_mddevs_lock); - } -+ if (name && dev) -+ /* -+ * Creating /dev/mdNNN via "newarray", so adjust hold_active. -+ */ -+ mddev->hold_active = UNTIL_STOP; - - error = -ENOMEM; - mddev->queue = blk_alloc_queue(GFP_KERNEL); -@@ -5277,21 +5291,31 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) - - static int add_named_array(const char *val, struct kernel_param *kp) - { -- /* val must be "md_*" where * is not all digits. -- * We allocate an array with a large free minor number, and -+ /* -+ * val must be "md_*" or "mdNNN". -+ * For "md_*" we allocate an array with a large free minor number, and - * set the name to val. val must not already be an active name. -+ * For "mdNNN" we allocate an array with the minor number NNN -+ * which must not already be in use. - */ - int len = strlen(val); - char buf[DISK_NAME_LEN]; -+ unsigned long devnum; - - while (len && val[len-1] == '\n') - len--; - if (len >= DISK_NAME_LEN) - return -E2BIG; - strlcpy(buf, val, len+1); -- if (strncmp(buf, "md_", 3) != 0) -- return -EINVAL; -- return md_alloc(0, buf); -+ if (strncmp(buf, "md_", 3) == 0) -+ return md_alloc(0, buf); -+ if (strncmp(buf, "md", 2) == 0 && -+ isdigit(buf[2]) && -+ kstrtoul(buf+2, 10, &devnum) == 0 && -+ devnum <= MINORMASK) -+ return md_alloc(MKDEV(MD_MAJOR, devnum), NULL); -+ -+ return -EINVAL; - } - - static void md_safemode_timeout(unsigned long data) --- -2.10.2 - diff --git a/patches.drivers/0194-md-support-disabling-of-create-on-open-semantics.patch b/patches.drivers/0194-md-support-disabling-of-create-on-open-semantics.patch deleted file mode 100644 index 4e9d63abe7..0000000000 --- a/patches.drivers/0194-md-support-disabling-of-create-on-open-semantics.patch +++ /dev/null @@ -1,70 +0,0 @@ -From: NeilBrown <neilb@suse.com> -Date: Wed, 12 Apr 2017 16:26:13 +1000 -Subject: [PATCH] md: support disabling of create-on-open semantics. -Git-commit: 78b6350dcaadb03b4a2970b16387227ba6744876 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -md allows a new array device to be created by simply -opening a device file. This make it difficult to -remove the device and udev is likely to open the device file -as part of processing the REMOVE event. - -There is an alternate mechanism for creating arrays -by writing to the new_array module parameter. -When using tools that work with this parameter, it is -best to disable the old semantics. -This new module parameter allows that. - -Signed-off-by: NeilBrown <neilb@suse.com> -Acted-by: Coly Li <colyli@suse.de> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/md.c | 14 +++++++++++++- - 1 file changed, 13 insertions(+), 1 deletion(-) - -diff --git a/drivers/md/md.c b/drivers/md/md.c -index 554bf213..6cc6dd7 100644 ---- a/drivers/md/md.c -+++ b/drivers/md/md.c -@@ -174,6 +174,16 @@ static const struct block_device_operations md_fops; - - static int start_readonly; - -+/* -+ * The original mechanism for creating an md device is to create -+ * a device node in /dev and to open it. This causes races with device-close. -+ * The preferred method is to write to the "new_array" module parameter. -+ * This can avoid races. -+ * Setting create_on_open to false disables the original mechanism -+ * so all the races disappear. -+ */ -+static bool create_on_open = true; -+ - /* bio_clone_mddev - * like bio_clone, but with a local bio set - */ -@@ -5285,7 +5295,8 @@ static int md_alloc(dev_t dev, char *name) - - static struct kobject *md_probe(dev_t dev, int *part, void *data) - { -- md_alloc(dev, NULL); -+ if (create_on_open) -+ md_alloc(dev, NULL); - return NULL; - } - -@@ -9200,6 +9211,7 @@ static int set_ro(const char *val, struct kernel_param *kp) - module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR); - module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR); - module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR); -+module_param(create_on_open, bool, S_IRUSR|S_IWUSR); - - MODULE_LICENSE("GPL"); - MODULE_DESCRIPTION("MD RAID framework"); --- -2.10.2 - diff --git a/patches.drivers/0195-md-cluster-Fix-a-memleak-in-an-error-handling-path.patch b/patches.drivers/0195-md-cluster-Fix-a-memleak-in-an-error-handling-path.patch deleted file mode 100644 index fc2a1957fe..0000000000 --- a/patches.drivers/0195-md-cluster-Fix-a-memleak-in-an-error-handling-path.patch +++ /dev/null @@ -1,39 +0,0 @@ -From: Christophe JAILLET <christophe.jaillet@wanadoo.fr> -Date: Fri, 14 Apr 2017 08:08:03 +0200 -Subject: [PATCH] md-cluster: Fix a memleak in an error handling path -Git-commit: 835d89e92fa77b6a1effea3db80202e1ffec413a -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -We know that 'bm_lockres' is NULL here, so 'lockres_free(bm_lockres)' is a -no-op. According to resource handling in case of error a few lines below, -it is likely that 'bitmap_free(bitmap)' was expected instead. - -Fixes: b98938d16a10 ("md-cluster: introduce cluster_check_sync_size") - -Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr> -Reviewed-by: Guoqing Jiang <gqjiang@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/md-cluster.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c -index b21ef58..7299ce2 100644 ---- a/drivers/md/md-cluster.c -+++ b/drivers/md/md-cluster.c -@@ -1127,7 +1127,7 @@ int cluster_check_sync_size(struct mddev *mddev) - bm_lockres = lockres_init(mddev, str, NULL, 1); - if (!bm_lockres) { - pr_err("md-cluster: Cannot initialize %s\n", str); -- lockres_free(bm_lockres); -+ bitmap_free(bitmap); - return -1; - } - bm_lockres->flags |= DLM_LKF_NOQUEUE; --- -2.10.2 - diff --git a/patches.drivers/0196-md-raid10-wait-up-frozen-array-in-handle_write_compl.patch b/patches.drivers/0196-md-raid10-wait-up-frozen-array-in-handle_write_compl.patch deleted file mode 100644 index 76ce4024d2..0000000000 --- a/patches.drivers/0196-md-raid10-wait-up-frozen-array-in-handle_write_compl.patch +++ /dev/null @@ -1,44 +0,0 @@ -From: Guoqing Jiang <gqjiang@suse.com> -Date: Mon, 17 Apr 2017 17:11:05 +0800 -Subject: [PATCH] md/raid10: wait up frozen array in handle_write_completed -Git-commit: cf25ae78fc50010f66b9be945017796da34c434d -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -Since nr_queued is changed, we need to call wake_up here -if the array is already frozen and waiting for condition -"nr_pending == nr_queued + extra" to be true. - -And commit 824e47daddbf ("RAID1: avoid unnecessary spin -locks in I/O barrier code") which has already added the -wake_up for raid1. - -Signed-off-by: Guoqing Jiang <gqjiang@suse.com> -Reviewed-by: NeilBrown <neilb@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid10.c | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c -index 4167091..acb3f46 100644 ---- a/drivers/md/raid10.c -+++ b/drivers/md/raid10.c -@@ -2704,6 +2704,11 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) - list_add(&r10_bio->retry_list, &conf->bio_end_io_list); - conf->nr_queued++; - spin_unlock_irq(&conf->device_lock); -+ /* -+ * In case freeze_array() is waiting for condition -+ * nr_pending == nr_queued + extra to be true. -+ */ -+ wake_up(&conf->wait_barrier); - md_wakeup_thread(conf->mddev->thread); - } else { - if (test_bit(R10BIO_WriteError, --- -2.10.2 - diff --git a/patches.drivers/0197-md-raid1-10-remove-unused-queue.patch b/patches.drivers/0197-md-raid1-10-remove-unused-queue.patch deleted file mode 100644 index ef1cf85dae..0000000000 --- a/patches.drivers/0197-md-raid1-10-remove-unused-queue.patch +++ /dev/null @@ -1,65 +0,0 @@ -From: Lidong Zhong <lidong.zhong@suse.com> -Date: Fri, 21 Apr 2017 15:21:38 +0800 -Subject: [PATCH] md/raid1/10: remove unused queue -Git-commit: 296617581eac713b3fda588216ae6d16d1e76dd5 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -A queue is declared and get from the disk of the array, but it's not -used anywhere. So removing it from the source. - -Signed-off-by: Lidong Zhong <lzhong@suse.com> -Acted-by: Guoqing Jiang <gqjiang@suse.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> - ---- - drivers/md/raid1.c | 3 --- - drivers/md/raid10.c | 2 -- - 2 files changed, 5 deletions(-) - -diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c -index 57611f4..14a9d36 100644 ---- a/drivers/md/raid1.c -+++ b/drivers/md/raid1.c -@@ -2961,7 +2961,6 @@ static struct r1conf *setup_conf(struct mddev *mddev) - err = -EINVAL; - spin_lock_init(&conf->device_lock); - rdev_for_each(rdev, mddev) { -- struct request_queue *q; - int disk_idx = rdev->raid_disk; - if (disk_idx >= mddev->raid_disks - || disk_idx < 0) -@@ -2974,8 +2973,6 @@ static struct r1conf *setup_conf(struct mddev *mddev) - if (disk->rdev) - goto abort; - disk->rdev = rdev; -- q = bdev_get_queue(rdev->bdev); -- - disk->head_position = 0; - disk->seq_start = MaxSector; - } -diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c -index acb3f46..5de951b 100644 ---- a/drivers/md/raid10.c -+++ b/drivers/md/raid10.c -@@ -3644,7 +3644,6 @@ static int raid10_run(struct mddev *mddev) - - rdev_for_each(rdev, mddev) { - long long diff; -- struct request_queue *q; - - disk_idx = rdev->raid_disk; - if (disk_idx < 0) -@@ -3663,7 +3662,6 @@ static int raid10_run(struct mddev *mddev) - goto out_free_conf; - disk->rdev = rdev; - } -- q = bdev_get_queue(rdev->bdev); - diff = (rdev->new_data_offset - rdev->data_offset); - if (!mddev->reshape_backwards) - diff = -diff; --- -2.10.2 - diff --git a/patches.drivers/0310-block-bio-pass-bvec-table-to-bio_init.patch b/patches.drivers/0310-block-bio-pass-bvec-table-to-bio_init.patch index 8197dc3bc7..4f972fe122 100644 --- a/patches.drivers/0310-block-bio-pass-bvec-table-to-bio_init.patch +++ b/patches.drivers/0310-block-bio-pass-bvec-table-to-bio_init.patch @@ -22,25 +22,27 @@ Fixed up the new O_DIRECT cases. Signed-off-by: Jens Axboe <axboe@fb.com> Signed-off-by: Hannes Reinecke <hare@suse.de> --- - block/bio.c | 8 ++++++-- - drivers/block/floppy.c | 3 +-- - drivers/md/bcache/io.c | 4 +--- - drivers/md/bcache/journal.c | 4 +--- - drivers/md/bcache/movinggc.c | 6 ++---- - drivers/md/bcache/request.c | 2 +- - drivers/md/bcache/super.c | 12 +++--------- - drivers/md/bcache/writeback.c | 5 ++--- - drivers/md/dm-bufio.c | 4 +--- - drivers/md/dm.c | 2 +- - drivers/md/multipath.c | 2 +- - drivers/md/raid5-cache.c | 2 +- - drivers/md/raid5.c | 9 ++------- - drivers/nvme/target/io-cmd.c | 4 +--- - fs/block_dev.c | 4 +--- - fs/logfs/dev_bdev.c | 4 +--- - include/linux/bio.h | 3 ++- + block/bio.c | 8 ++++++-- + drivers/block/floppy.c | 3 +-- + drivers/md/bcache/io.c | 4 +--- + drivers/md/bcache/journal.c | 4 +--- + drivers/md/bcache/movinggc.c | 6 ++---- + drivers/md/bcache/request.c | 2 +- + drivers/md/bcache/super.c | 12 +++--------- + drivers/md/bcache/writeback.c | 5 ++--- + drivers/md/dm-bufio.c | 4 +--- + drivers/md/dm.c | 2 +- + drivers/md/multipath.c | 2 +- + drivers/md/raid5-cache.c | 2 +- + drivers/md/raid5.c | 9 ++------- + drivers/nvme/target/io-cmd.c | 4 +--- + fs/block_dev.c | 4 +--- + fs/logfs/dev_bdev.c | 4 +--- + include/linux/bio.h | 3 ++- 17 files changed, 28 insertions(+), 50 deletions(-) +diff --git a/block/bio.c b/block/bio.c +index 01e31a1..2a12705 100644 --- a/block/bio.c +++ b/block/bio.c @@ -270,11 +270,15 @@ static void bio_free(struct bio *bio) @@ -60,7 +62,7 @@ Signed-off-by: Hannes Reinecke <hare@suse.de> } EXPORT_SYMBOL(bio_init); -@@ -486,7 +490,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_m +@@ -480,7 +484,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) return NULL; bio = p + front_pad; @@ -69,9 +71,11 @@ Signed-off-by: Hannes Reinecke <hare@suse.de> if (nr_iovecs > inline_vecs) { unsigned long idx = 0; +diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c +index c5a6385..973ddd2 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c -@@ -3809,8 +3809,7 @@ static int __floppy_read_block_0(struct +@@ -3809,8 +3809,7 @@ static int __floppy_read_block_0(struct block_device *bdev, int drive) cbdata.drive = drive; @@ -81,9 +85,11 @@ Signed-off-by: Hannes Reinecke <hare@suse.de> bio_vec.bv_page = page; bio_vec.bv_len = size; bio_vec.bv_offset = 0; +diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c +index e97b0ac..db45a88 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c -@@ -24,9 +24,7 @@ struct bio *bch_bbio_alloc(struct cache_ +@@ -24,9 +24,7 @@ struct bio *bch_bbio_alloc(struct cache_set *c) struct bbio *b = mempool_alloc(c->bio_meta, GFP_NOIO); struct bio *bio = &b->bio; @@ -94,9 +100,11 @@ Signed-off-by: Hannes Reinecke <hare@suse.de> return bio; } +diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c +index 6925023..1198e53 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c -@@ -448,13 +448,11 @@ static void do_journal_discard(struct ca +@@ -448,13 +448,11 @@ static void do_journal_discard(struct cache *ca) atomic_set(&ja->discard_in_flight, DISCARD_IN_FLIGHT); @@ -111,9 +119,11 @@ Signed-off-by: Hannes Reinecke <hare@suse.de> bio->bi_iter.bi_size = bucket_bytes(ca); bio->bi_end_io = journal_discard_endio; +diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c +index 5c4bdde..13b8a90 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c -@@ -77,15 +77,13 @@ static void moving_init(struct moving_io +@@ -77,15 +77,13 @@ static void moving_init(struct moving_io *io) { struct bio *bio = &io->bio.bio; @@ -131,9 +141,11 @@ Signed-off-by: Hannes Reinecke <hare@suse.de> bch_bio_map(bio, NULL); } +diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c +index 9de2f13..76d2087 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c -@@ -621,7 +621,7 @@ static void do_bio_hook(struct search *s +@@ -621,7 +621,7 @@ static void do_bio_hook(struct search *s, struct bio *orig_bio) { struct bio *bio = &s->bio.bio; @@ -142,9 +154,11 @@ Signed-off-by: Hannes Reinecke <hare@suse.de> __bio_clone_fast(bio, orig_bio); bio->bi_end_io = request_endio; bio->bi_private = &s->cl; +diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c +index 2d2e2da..42c789e 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c -@@ -1155,9 +1155,7 @@ static void register_bdev(struct cache_s +@@ -1155,9 +1155,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page, dc->bdev = bdev; dc->bdev->bd_holder = dc; @@ -155,7 +169,7 @@ Signed-off-by: Hannes Reinecke <hare@suse.de> dc->sb_bio.bi_io_vec[0].bv_page = sb_page; get_page(sb_page); -@@ -1818,9 +1816,7 @@ static int cache_alloc(struct cache_sb * +@@ -1818,9 +1816,7 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca) __module_get(THIS_MODULE); kobject_init(&ca->kobj, &bch_cache_ktype); @@ -166,7 +180,7 @@ Signed-off-by: Hannes Reinecke <hare@suse.de> free = roundup_pow_of_two(ca->sb.nbuckets) >> 10; -@@ -1856,9 +1852,7 @@ static int register_cache(struct cache_s +@@ -1856,9 +1852,7 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page, ca->bdev = bdev; ca->bdev->bd_holder = ca; @@ -177,9 +191,11 @@ Signed-off-by: Hannes Reinecke <hare@suse.de> ca->sb_bio.bi_io_vec[0].bv_page = sb_page; get_page(sb_page); +diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c +index dd415eb..d811bfd 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c -@@ -106,14 +106,13 @@ static void dirty_init(struct keybuf_key +@@ -106,14 +106,13 @@ static void dirty_init(struct keybuf_key *w) struct dirty_io *io = w->private; struct bio *bio = &io->bio; @@ -196,9 +212,11 @@ Signed-off-by: Hannes Reinecke <hare@suse.de> bch_bio_map(bio, NULL); } +diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c +index b3ba142..262e753 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c -@@ -611,9 +611,7 @@ static void use_inline_bio(struct dm_buf +@@ -611,9 +611,7 @@ static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block, char *ptr; int len; @@ -209,9 +227,11 @@ Signed-off-by: Hannes Reinecke <hare@suse.de> b->bio.bi_iter.bi_sector = block << b->c->sectors_per_block_bits; b->bio.bi_bdev = b->c->bdev; b->bio.bi_end_io = inline_endio; +diff --git a/drivers/md/dm.c b/drivers/md/dm.c +index cd762ff..47f4845 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c -@@ -1556,7 +1556,7 @@ static struct mapped_device *alloc_dev(i +@@ -1525,7 +1525,7 @@ static struct mapped_device *alloc_dev(int minor) if (!md->bdev) goto bad; @@ -220,9 +240,11 @@ Signed-off-by: Hannes Reinecke <hare@suse.de> md->flush_bio.bi_bdev = md->bdev; md->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; +diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c +index 589b807..34ed939 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c -@@ -130,7 +130,7 @@ static void multipath_make_request(struc +@@ -130,7 +130,7 @@ static void multipath_make_request(struct mddev *mddev, struct bio * bio) } multipath = conf->multipaths + mp_bh->path; @@ -231,9 +253,11 @@ Signed-off-by: Hannes Reinecke <hare@suse.de> __bio_clone_fast(&mp_bh->bio, bio); mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset; +diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c +index bf69058..626a3db 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c -@@ -2825,7 +2825,7 @@ int r5l_init_log(struct r5conf *conf, st +@@ -1214,7 +1214,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) INIT_LIST_HEAD(&log->io_end_ios); INIT_LIST_HEAD(&log->flushing_ios); INIT_LIST_HEAD(&log->finished_ios); @@ -242,9 +266,11 @@ Signed-off-by: Hannes Reinecke <hare@suse.de> log->io_kc = KMEM_CACHE(r5l_io_unit, 0); if (!log->io_kc) +diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c +index a378b4f..5ffb6c1 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c -@@ -2038,13 +2038,8 @@ static struct stripe_head *alloc_stripe( +@@ -2004,13 +2004,8 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp, for (i = 0; i < disks; i++) { struct r5dev *dev = &sh->dev[i]; @@ -258,11 +284,13 @@ Signed-off-by: Hannes Reinecke <hare@suse.de> + bio_init(&dev->req, &dev->vec, 1); + bio_init(&dev->rreq, &dev->rvec, 1); } - - if (raid5_has_ppl(conf)) { + } + return sh; +diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c +index ef52b1e..c4dc9ea 100644 --- a/drivers/nvme/target/io-cmd.c +++ b/drivers/nvme/target/io-cmd.c -@@ -37,9 +37,7 @@ static void nvmet_inline_bio_init(struct +@@ -37,9 +37,7 @@ static void nvmet_inline_bio_init(struct nvmet_req *req) { struct bio *bio = &req->inline_bio; @@ -273,9 +301,11 @@ Signed-off-by: Hannes Reinecke <hare@suse.de> } static void nvmet_execute_rw(struct nvmet_req *req) +diff --git a/fs/block_dev.c b/fs/block_dev.c +index 83ec522..e896f4c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c -@@ -212,9 +212,7 @@ __blkdev_direct_IO_simple(struct kiocb * +@@ -212,9 +212,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, return -ENOMEM; } @@ -286,9 +316,11 @@ Signed-off-by: Hannes Reinecke <hare@suse.de> bio.bi_bdev = bdev; bio.bi_iter.bi_sector = pos >> blkbits; bio.bi_private = current; +diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c +index ee46843..cd676bd 100644 --- a/fs/logfs/dev_bdev.c +++ b/fs/logfs/dev_bdev.c -@@ -19,9 +19,7 @@ static int sync_request(struct page *pag +@@ -19,9 +19,7 @@ static int sync_request(struct page *page, struct block_device *bdev, int op) struct bio bio; struct bio_vec bio_vec; @@ -299,9 +331,11 @@ Signed-off-by: Hannes Reinecke <hare@suse.de> bio_vec.bv_page = page; bio_vec.bv_len = PAGE_SIZE; bio_vec.bv_offset = 0; +diff --git a/include/linux/bio.h b/include/linux/bio.h +index d367cd3..70a7244 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h -@@ -420,7 +420,8 @@ extern int bio_phys_segments(struct requ +@@ -420,7 +420,8 @@ extern int bio_phys_segments(struct request_queue *, struct bio *); extern int submit_bio_wait(struct bio *bio); extern void bio_advance(struct bio *, unsigned); @@ -311,3 +345,6 @@ Signed-off-by: Hannes Reinecke <hare@suse.de> extern void bio_reset(struct bio *); void bio_chain(struct bio *, struct bio *); +-- +1.8.5.6 + diff --git a/patches.drivers/0398-blk-mq-Introduce-blk_mq_delay_run_hw_queue.patch b/patches.drivers/0398-blk-mq-Introduce-blk_mq_delay_run_hw_queue.patch deleted file mode 100644 index 5c10350f5c..0000000000 --- a/patches.drivers/0398-blk-mq-Introduce-blk_mq_delay_run_hw_queue.patch +++ /dev/null @@ -1,105 +0,0 @@ -From: Bart Van Assche <bart.vanassche@sandisk.com> -Date: Fri, 7 Apr 2017 11:16:52 -0700 -Subject: [PATCH] blk-mq: Introduce blk_mq_delay_run_hw_queue() -Git-commit: 7587a5ae7eef0439f7be31f1b5959af062bbc5ec -Patch-mainline: v4.11-rc6 -References: bsc#1033577 - -Introduce a function that runs a hardware queue unconditionally -after a delay. Note: there is already a function that stops and -restarts a hardware queue after a delay, namely blk_mq_delay_queue(). - -This function will be used in the next patch in this series. - -Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com> -Cc: Christoph Hellwig <hch@lst.de> -Cc: Hannes Reinecke <hare@suse.de> -Cc: Long Li <longli@microsoft.com> -Cc: K. Y. Srinivasan <kys@microsoft.com> -Signed-off-by: Jens Axboe <axboe@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> ---- - block/blk-mq.c | 32 ++++++++++++++++++++++++++++++-- - include/linux/blk-mq.h | 2 ++ - 2 files changed, 32 insertions(+), 2 deletions(-) - ---- a/block/blk-mq.c -+++ b/block/blk-mq.c -@@ -981,7 +981,8 @@ static int blk_mq_hctx_next_cpu(struct b - return hctx->next_cpu; - } - --void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) -+static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async, -+ unsigned long msecs) - { - if (unlikely(blk_mq_hctx_stopped(hctx) || - !blk_mq_hw_queue_mapped(hctx))) -@@ -998,7 +999,24 @@ void blk_mq_run_hw_queue(struct blk_mq_h - put_cpu(); - } - -- kblockd_schedule_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work); -+ if (msecs == 0) -+ kblockd_schedule_work_on(blk_mq_hctx_next_cpu(hctx), -+ &hctx->run_work); -+ else -+ kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx), -+ &hctx->delayed_run_work, -+ msecs_to_jiffies(msecs)); -+} -+ -+void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs) -+{ -+ __blk_mq_delay_run_hw_queue(hctx, true, msecs); -+} -+EXPORT_SYMBOL(blk_mq_delay_run_hw_queue); -+ -+void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) -+{ -+ __blk_mq_delay_run_hw_queue(hctx, async, 0); - } - - void blk_mq_run_hw_queues(struct request_queue *q, bool async) -@@ -1102,6 +1120,15 @@ static void blk_mq_run_work_fn(struct wo - __blk_mq_run_hw_queue(hctx); - } - -+static void blk_mq_delayed_run_work_fn(struct work_struct *work) -+{ -+ struct blk_mq_hw_ctx *hctx; -+ -+ hctx = container_of(work, struct blk_mq_hw_ctx, delayed_run_work.work); -+ -+ __blk_mq_run_hw_queue(hctx); -+} -+ - static void blk_mq_delay_work_fn(struct work_struct *work) - { - struct blk_mq_hw_ctx *hctx; -@@ -1785,6 +1812,7 @@ static int blk_mq_init_hctx(struct reque - node = hctx->numa_node = set->numa_node; - - INIT_WORK(&hctx->run_work, blk_mq_run_work_fn); -+ INIT_DELAYED_WORK(&hctx->delayed_run_work, blk_mq_delayed_run_work_fn); - INIT_DELAYED_WORK(&hctx->delay_work, blk_mq_delay_work_fn); - spin_lock_init(&hctx->lock); - INIT_LIST_HEAD(&hctx->dispatch); ---- a/include/linux/blk-mq.h -+++ b/include/linux/blk-mq.h -@@ -54,6 +54,7 @@ struct blk_mq_hw_ctx { - - atomic_t nr_active; - -+ struct delayed_work delayed_run_work; - struct delayed_work delay_work; - - struct blk_mq_cpu_notifier cpu_notifier; -@@ -239,6 +240,7 @@ void blk_mq_stop_hw_queues(struct reques - void blk_mq_start_hw_queues(struct request_queue *q); - void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); - void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); -+void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); - void blk_mq_run_hw_queues(struct request_queue *q, bool async); - void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); - void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, diff --git a/patches.suse/0001-raid5-separate-header-for-log-functions.patch b/patches.suse/0001-raid5-separate-header-for-log-functions.patch index a92e5e4544..0544976523 100644 --- a/patches.suse/0001-raid5-separate-header-for-log-functions.patch +++ b/patches.suse/0001-raid5-separate-header-for-log-functions.patch @@ -15,38 +15,19 @@ pr_debug() calls to r5l_init_log(). Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com> Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.com> +Acked-by: NeilBrown <neilb@suse.com> + --- - drivers/md/raid5-cache.c | 17 +++++++-- - drivers/md/raid5-log.h | 81 +++++++++++++++++++++++++++++++++++++++++++++++ - drivers/md/raid5.c | 47 +++++++-------------------- - drivers/md/raid5.h | 17 --------- - 4 files changed, 107 insertions(+), 55 deletions(-) + drivers/md/raid5-cache.c | 11 ++++++- + drivers/md/raid5-log.h | 71 +++++++++++++++++++++++++++++++++++++++++++++++ + drivers/md/raid5.c | 35 ++++++++--------------- + drivers/md/raid5.h | 9 ----- + 4 files changed, 94 insertions(+), 32 deletions(-) create mode 100644 drivers/md/raid5-log.h --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c -@@ -343,6 +343,8 @@ void r5c_handle_cached_data_endio(struct - } - } - -+void r5l_wake_reclaim(struct r5l_log *log, sector_t space); -+ - /* Check whether we should flush some stripes to free up stripe cache */ - void r5c_check_stripe_cache_usage(struct r5conf *conf) - { -@@ -2620,9 +2622,7 @@ void r5c_finish_stripe_write_out(struct - } - } - --int --r5c_cache_data(struct r5l_log *log, struct stripe_head *sh, -- struct stripe_head_state *s) -+int r5c_cache_data(struct r5l_log *log, struct stripe_head *sh) - { - struct r5conf *conf = sh->raid_conf; - int pages = 0; -@@ -2784,6 +2784,10 @@ void r5c_update_on_rdev_error(struct mdd +@@ -1193,6 +1193,10 @@ ioerr: int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) { struct r5l_log *log; @@ -57,7 +38,7 @@ Signed-off-by: Coly Li <colyli@suse.com> if (PAGE_SIZE != 4096) return -EINVAL; -@@ -2886,8 +2890,13 @@ io_kc: +@@ -1264,8 +1268,13 @@ io_kc: return -EINVAL; } @@ -69,12 +50,12 @@ Signed-off-by: Coly Li <colyli@suse.com> + conf->log = NULL; + synchronize_rcu(); + - flush_work(&log->disable_writeback_work); md_unregister_thread(&log->reclaim_thread); mempool_destroy(log->meta_pool); + bioset_free(log->bs); --- /dev/null +++ b/drivers/md/raid5-log.h -@@ -0,0 +1,81 @@ +@@ -0,0 +1,71 @@ +#ifndef _RAID5_LOG_H +#define _RAID5_LOG_H + @@ -96,7 +77,6 @@ Signed-off-by: Coly Li <colyli@suse.com> + struct stripe_head_state *s); +extern void r5c_release_extra_page(struct stripe_head *sh); +extern void r5c_use_extra_page(struct stripe_head *sh); -+extern void r5l_wake_reclaim(struct r5l_log *log, sector_t space); +extern void r5c_handle_cached_data_endio(struct r5conf *conf, + struct stripe_head *sh, int disks, struct bio_list *return_bi); +extern int r5c_cache_data(struct r5l_log *log, struct stripe_head *sh); @@ -112,17 +92,8 @@ Signed-off-by: Coly Li <colyli@suse.com> +{ + struct r5conf *conf = sh->raid_conf; + -+ if (conf->log) { -+ if (!test_bit(STRIPE_R5C_CACHING, &sh->state)) { -+ /* writing out phase */ -+ if (s->waiting_extra_page) -+ return 0; -+ return r5l_write_stripe(conf->log, sh); -+ } else if (test_bit(STRIPE_LOG_TRAPPED, &sh->state)) { -+ /* caching phase */ -+ return r5c_cache_data(conf->log, sh); -+ } -+ } ++ if (conf->log) ++ return r5l_write_stripe(conf->log, sh); + + return -EAGAIN; +} @@ -158,7 +129,7 @@ Signed-off-by: Coly Li <colyli@suse.com> +#endif --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c -@@ -62,6 +62,7 @@ +@@ -61,6 +61,7 @@ #include "raid5.h" #include "raid0.h" #include "bitmap.h" @@ -166,28 +137,16 @@ Signed-off-by: Coly Li <colyli@suse.com> #define cpu_to_group(cpu) cpu_to_node(cpu) #define ANY_GROUP NUMA_NO_NODE -@@ -980,18 +981,8 @@ static void ops_run_io(struct stripe_hea +@@ -901,7 +902,7 @@ static void ops_run_io(struct stripe_hea might_sleep(); -- if (!test_bit(STRIPE_R5C_CACHING, &sh->state)) { -- /* writing out phase */ -- if (s->waiting_extra_page) -- return; -- if (r5l_write_stripe(conf->log, sh) == 0) -- return; -- } else { /* caching phase */ -- if (test_bit(STRIPE_LOG_TRAPPED, &sh->state)) { -- r5c_cache_data(conf->log, sh, s); -- return; -- } -- } +- if (r5l_write_stripe(conf->log, sh) == 0) + if (log_stripe(sh, s) == 0) -+ return; - - should_defer = conf->batch_bio_dispatch && conf->group_cnt; - -@@ -3332,7 +3323,7 @@ handle_failed_stripe(struct r5conf *conf + return; + for (i = disks; i--; ) { + int op, op_flags = 0; +@@ -3111,7 +3112,7 @@ handle_failed_stripe(struct r5conf *conf if (bi) bitmap_end = 1; @@ -196,7 +155,7 @@ Signed-off-by: Coly Li <colyli@suse.com> if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) wake_up(&conf->wait_for_overlap); -@@ -3751,7 +3742,7 @@ returnbi: +@@ -3515,7 +3516,7 @@ returnbi: discard_pending = 1; } @@ -205,16 +164,16 @@ Signed-off-by: Coly Li <colyli@suse.com> if (!discard_pending && test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) { -@@ -4741,7 +4732,7 @@ static void handle_stripe(struct stripe_ +@@ -4446,6 +4447,8 @@ static void handle_stripe(struct stripe_ + test_bit(R5_Discard, &qdev->flags)))))) + handle_stripe_clean_event(conf, sh, disks, &s.return_bi); - if (s.just_cached) - r5c_handle_cached_data_endio(conf, sh, disks, &s.return_bi); -- r5l_stripe_write_finished(sh); + log_stripe_write_finished(sh); - ++ /* Now we might consider reading some blocks, either to check/generate * parity, or to satisfy requests -@@ -6142,7 +6133,7 @@ static int handle_active_stripes(struct + * or to load a block that is being partially written. +@@ -5785,7 +5788,7 @@ static int handle_active_stripes(struct for (i = 0; i < batch_size; i++) handle_stripe(batch[i]); @@ -223,10 +182,10 @@ Signed-off-by: Coly Li <colyli@suse.com> cond_resched(); -@@ -6720,8 +6711,8 @@ static void free_conf(struct r5conf *con - { - int i; +@@ -6358,8 +6361,8 @@ static void raid5_free_percpu(struct r5c + static void free_conf(struct r5conf *conf) + { - if (conf->log) - r5l_exit_log(conf->log); + log_exit(conf); @@ -234,7 +193,7 @@ Signed-off-by: Coly Li <colyli@suse.com> if (conf->shrinker.nr_deferred) unregister_shrinker(&conf->shrinker); -@@ -7446,13 +7437,8 @@ static int raid5_run(struct mddev *mddev +@@ -7047,13 +7050,8 @@ static int raid5_run(struct mddev *mddev blk_queue_max_hw_sectors(mddev->queue, UINT_MAX); } @@ -250,7 +209,7 @@ Signed-off-by: Coly Li <colyli@suse.com> return 0; abort: -@@ -7566,17 +7552,13 @@ static int raid5_remove_disk(struct mdde +@@ -7167,17 +7165,13 @@ static int raid5_remove_disk(struct mdde print_raid5_conf(conf); if (test_bit(Journal, &rdev->flags) && conf->log) { @@ -269,7 +228,7 @@ Signed-off-by: Coly Li <colyli@suse.com> return 0; } if (rdev == p->rdev) -@@ -7645,7 +7627,6 @@ static int raid5_add_disk(struct mddev * +@@ -7246,7 +7240,6 @@ static int raid5_add_disk(struct mddev * int last = conf->raid_disks - 1; if (test_bit(Journal, &rdev->flags)) { @@ -277,7 +236,7 @@ Signed-off-by: Coly Li <colyli@suse.com> if (conf->log) return -EBUSY; -@@ -7654,9 +7635,7 @@ static int raid5_add_disk(struct mddev * +@@ -7255,9 +7248,7 @@ static int raid5_add_disk(struct mddev * * The array is in readonly mode if journal is missing, so no * write requests running. We should be safe */ @@ -290,10 +249,10 @@ Signed-off-by: Coly Li <colyli@suse.com> if (mddev->recovery_disabled == conf->recovery_disabled) --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h -@@ -779,21 +779,4 @@ extern struct stripe_head * +@@ -626,13 +626,4 @@ extern sector_t raid5_compute_sector(str + extern struct stripe_head * raid5_get_active_stripe(struct r5conf *conf, sector_t sector, int previous, int noblock, int noquiesce); - extern int raid5_calc_degraded(struct r5conf *conf); -extern int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev); -extern void r5l_exit_log(struct r5l_log *log); -extern int r5l_write_stripe(struct r5l_log *log, struct stripe_head *head_sh); @@ -303,12 +262,4 @@ Signed-off-by: Coly Li <colyli@suse.com> -extern int r5l_handle_flush_request(struct r5l_log *log, struct bio *bio); -extern void r5l_quiesce(struct r5l_log *log, int state); -extern bool r5l_log_disk_error(struct r5conf *conf); --extern void r5l_wake_reclaim(struct r5l_log *log, sector_t space); --extern void r5c_make_stripe_write_out(struct stripe_head *sh); --extern void r5c_flush_cache(struct r5conf *conf, int num); --extern void r5c_check_stripe_cache_usage(struct r5conf *conf); --extern void r5c_check_cached_full_stripe(struct r5conf *conf); --extern struct md_sysfs_entry r5c_journal_mode; --extern void r5c_update_on_rdev_error(struct mddev *mddev); --extern bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect); #endif diff --git a/patches.suse/0003-raid5-ppl-Partial-Parity-Log-write-logging-implement.patch b/patches.suse/0003-raid5-ppl-Partial-Parity-Log-write-logging-implement.patch index 3d83e799ec..66a29e4ff6 100644 --- a/patches.suse/0003-raid5-ppl-Partial-Parity-Log-write-logging-implement.patch +++ b/patches.suse/0003-raid5-ppl-Partial-Parity-Log-write-logging-implement.patch @@ -48,9 +48,9 @@ Acked-by: NeilBrown <neilb@suse.com> drivers/md/raid5-log.h | 24 + drivers/md/raid5-ppl.c | 703 +++++++++++++++++++++++++++++++++++++++++ drivers/md/raid5.c | 64 +++ - drivers/md/raid5.h | 16 + drivers/md/raid5.h | 17 include/uapi/linux/raid/md_p.h | 27 + - 7 files changed, 875 insertions(+), 5 deletions(-) + 7 files changed, 875 insertions(+), 6 deletions(-) create mode 100644 Documentation/md/raid5-ppl.txt create mode 100644 drivers/md/raid5-ppl.c @@ -114,13 +114,13 @@ Acked-by: NeilBrown <neilb@suse.com> # and must come before md.o, as they each initialise --- a/drivers/md/raid5-log.h +++ b/drivers/md/raid5-log.h -@@ -31,6 +31,20 @@ extern struct md_sysfs_entry r5c_journal +@@ -30,12 +30,28 @@ extern struct md_sysfs_entry r5c_journal extern void r5c_update_on_rdev_error(struct mddev *mddev); extern bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect); +extern struct dma_async_tx_descriptor * +ops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu, -+ struct dma_async_tx_descriptor *tx); ++ struct dma_async_tx_descriptor *tx); +extern int ppl_init_log(struct r5conf *conf); +extern void ppl_exit_log(struct r5conf *conf); +extern int ppl_write_stripe(struct r5conf *conf, struct stripe_head *sh); @@ -135,16 +135,15 @@ Acked-by: NeilBrown <neilb@suse.com> static inline int log_stripe(struct stripe_head *sh, struct stripe_head_state *s) { struct r5conf *conf = sh->raid_conf; -@@ -45,6 +59,8 @@ static inline int log_stripe(struct stri - /* caching phase */ - return r5c_cache_data(conf->log, sh); - } -+ } else if (raid5_has_ppl(conf)) { + + if (conf->log) + return r5l_write_stripe(conf->log, sh); ++ else if (raid5_has_ppl(conf)) + return ppl_write_stripe(conf, sh); - } return -EAGAIN; -@@ -56,24 +72,32 @@ static inline void log_stripe_write_fini + } +@@ -46,24 +62,32 @@ static inline void log_stripe_write_fini if (conf->log) r5l_stripe_write_finished(sh); @@ -163,7 +162,7 @@ Acked-by: NeilBrown <neilb@suse.com> static inline void log_exit(struct r5conf *conf) { if (conf->log) - r5l_exit_log(conf); + r5l_exit_log(conf->log); + else if (raid5_has_ppl(conf)) + ppl_exit_log(conf); } @@ -885,7 +884,7 @@ Acked-by: NeilBrown <neilb@suse.com> +} --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c -@@ -465,6 +465,11 @@ static void shrink_buffers(struct stripe +@@ -464,6 +464,11 @@ static void shrink_buffers(struct stripe sh->dev[i].page = NULL; put_page(p); } @@ -897,7 +896,7 @@ Acked-by: NeilBrown <neilb@suse.com> } static int grow_buffers(struct stripe_head *sh, gfp_t gfp) -@@ -481,6 +486,13 @@ static int grow_buffers(struct stripe_he +@@ -480,6 +485,13 @@ static int grow_buffers(struct stripe_he sh->dev[i].page = page; sh->dev[i].orig_page = page; } @@ -911,7 +910,7 @@ Acked-by: NeilBrown <neilb@suse.com> return 0; } -@@ -729,7 +741,7 @@ static bool stripe_can_batch(struct stri +@@ -728,7 +740,7 @@ static bool stripe_can_batch(struct stri { struct r5conf *conf = sh->raid_conf; @@ -920,7 +919,7 @@ Acked-by: NeilBrown <neilb@suse.com> return false; return test_bit(STRIPE_BATCH_READY, &sh->state) && !test_bit(STRIPE_BITMAP_PENDING, &sh->state) && -@@ -2075,6 +2087,9 @@ static void raid_run_ops(struct stripe_h +@@ -1956,6 +1968,9 @@ static void raid_run_ops(struct stripe_h async_tx_ack(tx); } @@ -930,7 +929,7 @@ Acked-by: NeilBrown <neilb@suse.com> if (test_bit(STRIPE_OP_PREXOR, &ops_request)) { if (level < 6) tx = ops_run_prexor5(sh, percpu, tx); -@@ -3155,6 +3170,12 @@ schedule_reconstruction(struct stripe_he +@@ -2965,6 +2980,12 @@ schedule_reconstruction(struct stripe_he s->locked++; } @@ -943,7 +942,7 @@ Acked-by: NeilBrown <neilb@suse.com> pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n", __func__, (unsigned long long)sh->sector, s->locked, s->ops_request); -@@ -3202,6 +3223,36 @@ static int add_stripe_bio(struct stripe_ +@@ -3012,6 +3033,36 @@ static int add_stripe_bio(struct stripe_ if (*bip && (*bip)->bi_iter.bi_sector < bio_end_sector(bi)) goto overlap; @@ -980,7 +979,7 @@ Acked-by: NeilBrown <neilb@suse.com> if (!forwrite || previous) clear_bit(STRIPE_BATCH_READY, &sh->state); -@@ -7218,6 +7269,13 @@ static int raid5_run(struct mddev *mddev +@@ -6938,6 +6989,13 @@ static int raid5_run(struct mddev *mddev BUG_ON(mddev->delta_disks != 0); } @@ -994,7 +993,7 @@ Acked-by: NeilBrown <neilb@suse.com> if (mddev->private == NULL) conf = setup_conf(mddev); else -@@ -7699,7 +7757,7 @@ static int raid5_resize(struct mddev *md +@@ -7419,7 +7477,7 @@ static int raid5_resize(struct mddev *md sector_t newsize; struct r5conf *conf = mddev->private; @@ -1003,7 +1002,7 @@ Acked-by: NeilBrown <neilb@suse.com> return -EINVAL; sectors &= ~((sector_t)conf->chunk_sectors - 1); newsize = raid5_size(mddev, sectors, mddev->raid_disks); -@@ -7750,7 +7808,7 @@ static int check_reshape(struct mddev *m +@@ -7470,7 +7528,7 @@ static int check_reshape(struct mddev *m { struct r5conf *conf = mddev->private; @@ -1030,7 +1029,7 @@ Acked-by: NeilBrown <neilb@suse.com> sector_t log_start; /* first meta block on the journal */ struct list_head r5c; /* for r5c_cache->stripe_in_journal */ /** -@@ -400,6 +406,7 @@ enum { +@@ -394,6 +400,7 @@ enum { STRIPE_OP_BIODRAIN, STRIPE_OP_RECONSTRUCT, STRIPE_OP_CHECK, @@ -1038,7 +1037,7 @@ Acked-by: NeilBrown <neilb@suse.com> }; /* -@@ -525,6 +532,12 @@ static inline void raid5_set_bi_stripes( +@@ -518,6 +525,12 @@ static inline void raid5_set_bi_stripes( atomic_set(segments, cnt); } @@ -1051,14 +1050,17 @@ Acked-by: NeilBrown <neilb@suse.com> /* NOTE NR_STRIPE_HASH_LOCKS must remain below 64. * This is because we sometimes take all the spinlocks * and creating that much locking depth can cause -@@ -696,6 +709,7 @@ struct r5conf { +@@ -676,9 +689,9 @@ struct r5conf { int group_cnt; int worker_cnt_per_group; struct r5l_log *log; + void *log_private; + }; - spinlock_t pending_bios_lock; - bool batch_bio_dispatch; +- + /* + * Our supported algorithms + */ --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h @@ -398,4 +398,31 @@ struct r5l_meta_block { diff --git a/patches.suse/0007-md-cluster-introduce-cluster_check_sync_size.patch b/patches.suse/0007-md-cluster-introduce-cluster_check_sync_size.patch index 3715b4386f..c52991c444 100644 --- a/patches.suse/0007-md-cluster-introduce-cluster_check_sync_size.patch +++ b/patches.suse/0007-md-cluster-introduce-cluster_check_sync_size.patch @@ -179,7 +179,7 @@ index 0e99163f9689..94b014b082d7 100644 + if (!bm_lockres) { + pr_err("md-cluster: Cannot initialize %s\n", str); + lockres_free(bm_lockres); -+ return -1; ++ return -ENOMEM; + } + bm_lockres->flags |= DLM_LKF_NOQUEUE; + rv = dlm_lock_sync(bm_lockres, DLM_LOCK_PW); diff --git a/patches.suse/0008-raid5-ppl-silence-a-misleading-warning-message.patch b/patches.suse/0008-raid5-ppl-silence-a-misleading-warning-message.patch deleted file mode 100644 index 94f8064159..0000000000 --- a/patches.suse/0008-raid5-ppl-silence-a-misleading-warning-message.patch +++ /dev/null @@ -1,37 +0,0 @@ -From: Dan Carpenter <dan.carpenter@oracle.com> -Date: Tue, 21 Mar 2017 23:43:05 +0300 -Subject: [PATCH] raid5-ppl: silence a misleading warning message -Git-commit: 0b408baf7f4f3ea94239d021a1f19e60cd8694de -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: fate#321488 - -The "need_cache_flush" variable is never set to false. When the -variable is true that means we print a warning message at the end of -the function. - -Fixes: 3418d036c81d ("raid5-ppl: Partial Parity Log write logging implementation") -Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com> -Reviewed-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> ---- - drivers/md/raid5-ppl.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c -index 27bad3e..86ea9ad 100644 ---- a/drivers/md/raid5-ppl.c -+++ b/drivers/md/raid5-ppl.c -@@ -1070,7 +1070,7 @@ int ppl_init_log(struct r5conf *conf) - struct mddev *mddev = conf->mddev; - int ret = 0; - int i; -- bool need_cache_flush; -+ bool need_cache_flush = false; - - pr_debug("md/raid:%s: enabling distributed Partial Parity Log\n", - mdname(conf->mddev)); --- -2.10.2 - diff --git a/patches.suse/0009-md-raid5-use-consistency_policy-to-remove-journal-fe.patch b/patches.suse/0009-md-raid5-use-consistency_policy-to-remove-journal-fe.patch deleted file mode 100644 index 426941db37..0000000000 --- a/patches.suse/0009-md-raid5-use-consistency_policy-to-remove-journal-fe.patch +++ /dev/null @@ -1,114 +0,0 @@ -From: Song Liu <songliubraving@fb.com> -Date: Mon, 27 Mar 2017 10:51:33 -0700 -Subject: [PATCH] md/raid5: use consistency_policy to remove journal feature -Git-commit: 0bb0c10500ba634216238c40e1eeddce92b4d488 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: FATE#321488 - -When journal device of an array fails, the array is forced into read-only -mode. To make the array normal without adding another journal device, we -need to remove journal _feature_ from the array. - -This patch allows remove journal _feature_ from an array, For journal -existing journal should be either missing or faulty. - -To remove journal feature, it is necessary to remove the journal device -first: - - mdadm --fail /dev/md0 /dev/sdb - mdadm: set /dev/sdb faulty in /dev/md0 - mdadm --remove /dev/md0 /dev/sdb - mdadm: hot removed /dev/sdb from /dev/md0 - -Then the journal feature can be removed by echoing into the sysfs file: - - cat /sys/block/md0/md/consistency_policy - journal - - echo resync > /sys/block/md0/md/consistency_policy - cat /sys/block/md0/md/consistency_policy - resync - -Signed-off-by: Song Liu <songliubraving@fb.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> ---- - drivers/md/raid5.c | 46 ++++++++++++++++++++++++++++++++++++---------- - 1 file changed, 36 insertions(+), 10 deletions(-) - -diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c -index 266d661..6036d5e 100644 ---- a/drivers/md/raid5.c -+++ b/drivers/md/raid5.c -@@ -8292,17 +8292,41 @@ static int raid5_change_consistency_policy(struct mddev *mddev, const char *buf) - } - - if (strncmp(buf, "ppl", 3) == 0 && !raid5_has_ppl(conf)) { -- mddev_suspend(mddev); -- set_bit(MD_HAS_PPL, &mddev->flags); -- err = log_init(conf, NULL); -- if (!err) -+ /* ppl only works with RAID 5 */ -+ if (conf->level == 5) { -+ mddev_suspend(mddev); -+ set_bit(MD_HAS_PPL, &mddev->flags); -+ err = log_init(conf, NULL); -+ if (!err) -+ raid5_reset_stripe_cache(mddev); -+ mddev_resume(mddev); -+ } else -+ err = -EINVAL; -+ } else if (strncmp(buf, "resync", 6) == 0) { -+ if (raid5_has_ppl(conf)) { -+ mddev_suspend(mddev); -+ log_exit(conf); - raid5_reset_stripe_cache(mddev); -- mddev_resume(mddev); -- } else if (strncmp(buf, "resync", 6) == 0 && raid5_has_ppl(conf)) { -- mddev_suspend(mddev); -- log_exit(conf); -- raid5_reset_stripe_cache(mddev); -- mddev_resume(mddev); -+ mddev_resume(mddev); -+ } else if (test_bit(MD_HAS_JOURNAL, &conf->mddev->flags) && -+ r5l_log_disk_error(conf)) { -+ bool journal_dev_exists = false; -+ struct md_rdev *rdev; -+ -+ rdev_for_each(rdev, mddev) -+ if (test_bit(Journal, &rdev->flags)) { -+ journal_dev_exists = true; -+ break; -+ } -+ -+ if (!journal_dev_exists) { -+ mddev_suspend(mddev); -+ clear_bit(MD_HAS_JOURNAL, &mddev->flags); -+ mddev_resume(mddev); -+ } else /* need remove journal device first */ -+ err = -EBUSY; -+ } else -+ err = -EINVAL; - } else { - err = -EINVAL; - } -@@ -8337,6 +8361,7 @@ static struct md_personality raid6_personality = - .quiesce = raid5_quiesce, - .takeover = raid6_takeover, - .congested = raid5_congested, -+ .change_consistency_policy = raid5_change_consistency_policy, - }; - static struct md_personality raid5_personality = - { -@@ -8385,6 +8410,7 @@ static struct md_personality raid4_personality = - .quiesce = raid5_quiesce, - .takeover = raid4_takeover, - .congested = raid5_congested, -+ .change_consistency_policy = raid5_change_consistency_policy, - }; - - static int __init raid5_init(void) --- -2.10.2 - diff --git a/patches.suse/0010-raid5-ppl-move-no_mem_stripes-to-struct-ppl_conf.patch b/patches.suse/0010-raid5-ppl-move-no_mem_stripes-to-struct-ppl_conf.patch deleted file mode 100644 index 093fd7c7e1..0000000000 --- a/patches.suse/0010-raid5-ppl-move-no_mem_stripes-to-struct-ppl_conf.patch +++ /dev/null @@ -1,119 +0,0 @@ -From: Artur Paszkiewicz <artur.paszkiewicz@intel.com> -Date: Tue, 4 Apr 2017 13:13:56 +0200 -Subject: [PATCH] raid5-ppl: move no_mem_stripes to struct ppl_conf -Git-commit: 94568f64af50bb37c418b200449698cfe7e1da5f -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: fate#321488 - -Use a single no_mem_stripes list instead of per member device lists for -handling stripes that need retrying in case of failed io_unit -allocation. Because io_units are allocated from a memory pool shared -between all member disks, the no_mem_stripes list should be checked when -an io_unit for any member is freed. This fixes a deadlock that could -happen if there are stripes in more than one no_mem_stripes list. - -Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> ---- - drivers/md/raid5-ppl.c | 36 +++++++++++++++++++++++------------- - 1 file changed, 23 insertions(+), 13 deletions(-) - -diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c -index 86ea9ad..355cf35 100644 ---- a/drivers/md/raid5-ppl.c -+++ b/drivers/md/raid5-ppl.c -@@ -107,6 +107,10 @@ struct ppl_conf { - /* used only for recovery */ - int recovered_entries; - int mismatch_count; -+ -+ /* stripes to retry if failed to allocate io_unit */ -+ struct list_head no_mem_stripes; -+ spinlock_t no_mem_stripes_lock; - }; - - struct ppl_log { -@@ -119,8 +123,6 @@ struct ppl_log { - * always at the end of io_list */ - spinlock_t io_list_lock; - struct list_head io_list; /* all io_units of this log */ -- struct list_head no_mem_stripes;/* stripes to retry if failed to -- * allocate io_unit */ - }; - - #define PPL_IO_INLINE_BVECS 32 -@@ -347,9 +349,9 @@ int ppl_write_stripe(struct r5conf *conf, struct stripe_head *sh) - atomic_inc(&sh->count); - - if (ppl_log_stripe(log, sh)) { -- spin_lock_irq(&log->io_list_lock); -- list_add_tail(&sh->log_list, &log->no_mem_stripes); -- spin_unlock_irq(&log->io_list_lock); -+ spin_lock_irq(&ppl_conf->no_mem_stripes_lock); -+ list_add_tail(&sh->log_list, &ppl_conf->no_mem_stripes); -+ spin_unlock_irq(&ppl_conf->no_mem_stripes_lock); - } - - mutex_unlock(&log->io_mutex); -@@ -492,25 +494,32 @@ void ppl_write_stripe_run(struct r5conf *conf) - static void ppl_io_unit_finished(struct ppl_io_unit *io) - { - struct ppl_log *log = io->log; -+ struct ppl_conf *ppl_conf = log->ppl_conf; - unsigned long flags; - - pr_debug("%s: seq: %llu\n", __func__, io->seq); - -- spin_lock_irqsave(&log->io_list_lock, flags); -+ local_irq_save(flags); - -+ spin_lock(&log->io_list_lock); - list_del(&io->log_sibling); -- mempool_free(io, log->ppl_conf->io_pool); -+ spin_unlock(&log->io_list_lock); -+ -+ mempool_free(io, ppl_conf->io_pool); -+ -+ spin_lock(&ppl_conf->no_mem_stripes_lock); -+ if (!list_empty(&ppl_conf->no_mem_stripes)) { -+ struct stripe_head *sh; - -- if (!list_empty(&log->no_mem_stripes)) { -- struct stripe_head *sh = list_first_entry(&log->no_mem_stripes, -- struct stripe_head, -- log_list); -+ sh = list_first_entry(&ppl_conf->no_mem_stripes, -+ struct stripe_head, log_list); - list_del_init(&sh->log_list); - set_bit(STRIPE_HANDLE, &sh->state); - raid5_release_stripe(sh); - } -+ spin_unlock(&ppl_conf->no_mem_stripes_lock); - -- spin_unlock_irqrestore(&log->io_list_lock, flags); -+ local_irq_restore(flags); - } - - void ppl_stripe_write_finished(struct stripe_head *sh) -@@ -1135,6 +1144,8 @@ int ppl_init_log(struct r5conf *conf) - } - - atomic64_set(&ppl_conf->seq, 0); -+ INIT_LIST_HEAD(&ppl_conf->no_mem_stripes); -+ spin_lock_init(&ppl_conf->no_mem_stripes_lock); - - if (!mddev->external) { - ppl_conf->signature = ~crc32c_le(~0, mddev->uuid, sizeof(mddev->uuid)); -@@ -1150,7 +1161,6 @@ int ppl_init_log(struct r5conf *conf) - mutex_init(&log->io_mutex); - spin_lock_init(&log->io_list_lock); - INIT_LIST_HEAD(&log->io_list); -- INIT_LIST_HEAD(&log->no_mem_stripes); - - log->ppl_conf = ppl_conf; - log->rdev = rdev; --- -2.10.2 - diff --git a/patches.suse/0011-raid5-ppl-use-resize_stripes-when-enabling-or-disabl.patch b/patches.suse/0011-raid5-ppl-use-resize_stripes-when-enabling-or-disabl.patch deleted file mode 100644 index 60acf31105..0000000000 --- a/patches.suse/0011-raid5-ppl-use-resize_stripes-when-enabling-or-disabl.patch +++ /dev/null @@ -1,305 +0,0 @@ -From: Artur Paszkiewicz <artur.paszkiewicz@intel.com> -Date: Tue, 4 Apr 2017 13:13:57 +0200 -Subject: [PATCH] raid5-ppl: use resize_stripes() when enabling or disabling ppl -Git-commit: 845b9e229fe0716ab6b4d94b4364c99069667b59 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: fate#321488 - -Use resize_stripes() instead of raid5_reset_stripe_cache() to allocate -or free sh->ppl_page at runtime for all stripes in the stripe cache. -raid5_reset_stripe_cache() required suspending the mddev and could -deadlock because of GFP_KERNEL allocations. - -Move the 'newsize' check to check_reshape() to allow reallocating the -stripes with the same number of disks. Allocate sh->ppl_page in -alloc_stripe() instead of grow_buffers(). Pass 'struct r5conf *conf' as -a parameter to alloc_stripe() because it is needed to check whether to -allocate ppl_page. Add free_stripe() and use it to free stripes rather -than directly call kmem_cache_free(). Also free sh->ppl_page in -free_stripe(). - -Set MD_HAS_PPL at the end of ppl_init_log() instead of explicitly -setting it in advance and add another parameter to log_init() to allow -calling ppl_init_log() without the bit set. Don't try to calculate -partial parity or add a stripe to log if it does not have ppl_page set. - -Enabling ppl can now be performed without suspending the mddev, because -the log won't be used until new stripes are allocated with ppl_page. -Calling mddev_suspend/resume is still necessary when disabling ppl, -because we want all stripes to finish before stopping the log, but -resize_stripes() can be called after mddev_resume() when ppl is no -longer active. - -Suggested-by: NeilBrown <neilb@suse.com> -Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> ---- - drivers/md/raid5-log.h | 5 +- - drivers/md/raid5-ppl.c | 3 + - drivers/md/raid5.c | 88 +++++++++++++++++++++---------------------------- - 3 files changed, 43 insertions(+), 53 deletions(-) - ---- a/drivers/md/raid5-log.h -+++ b/drivers/md/raid5-log.h -@@ -85,11 +85,12 @@ static inline void log_exit(struct r5con - ppl_exit_log(conf); - } - --static inline int log_init(struct r5conf *conf, struct md_rdev *journal_dev) -+static inline int log_init(struct r5conf *conf, struct md_rdev *journal_dev, -+ bool ppl) - { - if (journal_dev) - return r5l_init_log(conf, journal_dev); -- else if (raid5_has_ppl(conf)) -+ else if (ppl) - return ppl_init_log(conf); - - return 0; ---- a/drivers/md/raid5-ppl.c -+++ b/drivers/md/raid5-ppl.c -@@ -328,7 +328,7 @@ int ppl_write_stripe(struct r5conf *conf - struct ppl_io_unit *io = sh->ppl_io; - struct ppl_log *log; - -- if (io || test_bit(STRIPE_SYNCING, &sh->state) || -+ if (io || test_bit(STRIPE_SYNCING, &sh->state) || !sh->ppl_page || - !test_bit(R5_Wantwrite, &sh->dev[sh->pd_idx].flags) || - !test_bit(R5_Insync, &sh->dev[sh->pd_idx].flags)) { - clear_bit(STRIPE_LOG_TRAPPED, &sh->state); -@@ -1204,6 +1204,7 @@ int ppl_init_log(struct r5conf *conf) - } - - conf->log_private = ppl_conf; -+ set_bit(MD_HAS_PPL, &ppl_conf->mddev->flags); - - return 0; - err: ---- a/drivers/md/raid5.c -+++ b/drivers/md/raid5.c -@@ -464,11 +464,6 @@ static void shrink_buffers(struct stripe - sh->dev[i].page = NULL; - put_page(p); - } -- -- if (sh->ppl_page) { -- put_page(sh->ppl_page); -- sh->ppl_page = NULL; -- } - } - - static int grow_buffers(struct stripe_head *sh, gfp_t gfp) -@@ -486,12 +481,6 @@ static int grow_buffers(struct stripe_he - sh->dev[i].orig_page = page; - } - -- if (raid5_has_ppl(sh->raid_conf)) { -- sh->ppl_page = alloc_page(gfp); -- if (!sh->ppl_page) -- return 1; -- } -- - return 0; - } - -@@ -2022,8 +2011,15 @@ static void raid_run_ops(struct stripe_h - put_cpu(); - } - -+static void free_stripe(struct kmem_cache *sc, struct stripe_head *sh) -+{ -+ if (sh->ppl_page) -+ __free_page(sh->ppl_page); -+ kmem_cache_free(sc, sh); -+} -+ - static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp, -- int disks) -+ int disks, struct r5conf *conf) - { - struct stripe_head *sh; - int i; -@@ -2037,6 +2033,7 @@ static struct stripe_head *alloc_stripe( - INIT_LIST_HEAD(&sh->r5c); - INIT_LIST_HEAD(&sh->log_list); - atomic_set(&sh->count, 1); -+ sh->raid_conf = conf; - sh->log_start = MaxSector; - for (i = 0; i < disks; i++) { - struct r5dev *dev = &sh->dev[i]; -@@ -2049,6 +2046,14 @@ static struct stripe_head *alloc_stripe( - dev->rreq.bi_io_vec = &dev->rvec; - dev->rreq.bi_max_vecs = 1; - } -+ -+ if (raid5_has_ppl(conf)) { -+ sh->ppl_page = alloc_page(gfp); -+ if (!sh->ppl_page) { -+ free_stripe(sc, sh); -+ sh = NULL; -+ } -+ } - } - return sh; - } -@@ -2056,15 +2061,13 @@ static int grow_one_stripe(struct r5conf - { - struct stripe_head *sh; - -- sh = alloc_stripe(conf->slab_cache, gfp, conf->pool_size); -+ sh = alloc_stripe(conf->slab_cache, gfp, conf->pool_size, conf); - if (!sh) - return 0; - -- sh->raid_conf = conf; -- - if (grow_buffers(sh, gfp)) { - shrink_buffers(sh); -- kmem_cache_free(conf->slab_cache, sh); -+ free_stripe(conf->slab_cache, sh); - return 0; - } - sh->hash_lock_index = -@@ -2209,9 +2212,6 @@ static int resize_stripes(struct r5conf - int i; - int hash, cnt; - -- if (newsize <= conf->pool_size) -- return 0; /* never bother to shrink */ -- - err = md_allow_write(conf->mddev); - if (err) - return err; -@@ -2227,11 +2227,10 @@ static int resize_stripes(struct r5conf - mutex_lock(&conf->cache_size_mutex); - - for (i = conf->max_nr_stripes; i; i--) { -- nsh = alloc_stripe(sc, GFP_KERNEL, newsize); -+ nsh = alloc_stripe(sc, GFP_KERNEL, newsize, conf); - if (!nsh) - break; - -- nsh->raid_conf = conf; - list_add(&nsh->lru, &newstripes); - } - if (i) { -@@ -2239,7 +2238,7 @@ static int resize_stripes(struct r5conf - while (!list_empty(&newstripes)) { - nsh = list_entry(newstripes.next, struct stripe_head, lru); - list_del(&nsh->lru); -- kmem_cache_free(sc, nsh); -+ free_stripe(sc, nsh); - } - kmem_cache_destroy(sc); - mutex_unlock(&conf->cache_size_mutex); -@@ -2265,7 +2264,7 @@ static int resize_stripes(struct r5conf - nsh->dev[i].orig_page = osh->dev[i].page; - } - nsh->hash_lock_index = hash; -- kmem_cache_free(conf->slab_cache, osh); -+ free_stripe(conf->slab_cache, osh); - cnt++; - if (cnt >= conf->max_nr_stripes / NR_STRIPE_HASH_LOCKS + - !!((conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS) > hash)) { -@@ -2340,7 +2339,7 @@ static int drop_one_stripe(struct r5conf - return 0; - BUG_ON(atomic_read(&sh->count)); - shrink_buffers(sh); -- kmem_cache_free(conf->slab_cache, sh); -+ free_stripe(conf->slab_cache, sh); - atomic_dec(&conf->active_stripes); - conf->max_nr_stripes--; - return 1; -@@ -3063,7 +3062,7 @@ schedule_reconstruction(struct stripe_he - s->locked++; - } - -- if (raid5_has_ppl(sh->raid_conf) && -+ if (raid5_has_ppl(sh->raid_conf) && sh->ppl_page && - test_bit(STRIPE_OP_BIODRAIN, &s->ops_request) && - !test_bit(STRIPE_FULL_WRITE, &sh->state) && - test_bit(R5_Insync, &sh->dev[pd_idx].flags)) -@@ -7371,7 +7370,7 @@ static int raid5_run(struct mddev *mddev - blk_queue_max_hw_sectors(mddev->queue, UINT_MAX); - } - -- if (log_init(conf, journal_dev)) -+ if (log_init(conf, journal_dev, raid5_has_ppl(conf))) - goto abort; - - return 0; -@@ -7577,7 +7576,7 @@ static int raid5_add_disk(struct mddev * - * The array is in readonly mode if journal is missing, so no - * write requests running. We should be safe - */ -- log_init(conf, rdev); -+ log_init(conf, rdev, false); - return 0; - } - if (mddev->recovery_disabled == conf->recovery_disabled) -@@ -7727,6 +7726,9 @@ static int check_reshape(struct mddev *m - mddev->chunk_sectors) - ) < 0) - return -ENOMEM; -+ -+ if (conf->previous_raid_disks + mddev->delta_disks <= conf->pool_size) -+ return 0; /* never bother to shrink */ - return resize_stripes(conf, (conf->previous_raid_disks - + mddev->delta_disks)); - } -@@ -8212,20 +8214,6 @@ static void *raid6_takeover(struct mddev - return setup_conf(mddev); - } - --static void raid5_reset_stripe_cache(struct mddev *mddev) --{ -- struct r5conf *conf = mddev->private; -- -- mutex_lock(&conf->cache_size_mutex); -- while (conf->max_nr_stripes && -- drop_one_stripe(conf)) -- ; -- while (conf->min_nr_stripes > conf->max_nr_stripes && -- grow_one_stripe(conf, GFP_KERNEL)) -- ; -- mutex_unlock(&conf->cache_size_mutex); --} -- - static int raid5_change_consistency_policy(struct mddev *mddev, const char *buf) - { - struct r5conf *conf; -@@ -8240,23 +8228,23 @@ static int raid5_change_consistency_poli - return -ENODEV; - } - -- if (strncmp(buf, "ppl", 3) == 0 && !raid5_has_ppl(conf)) { -+ if (strncmp(buf, "ppl", 3) == 0) { - /* ppl only works with RAID 5 */ -- if (conf->level == 5) { -- mddev_suspend(mddev); -- set_bit(MD_HAS_PPL, &mddev->flags); -- err = log_init(conf, NULL); -- if (!err) -- raid5_reset_stripe_cache(mddev); -- mddev_resume(mddev); -+ if (!raid5_has_ppl(conf) && conf->level == 5) { -+ err = log_init(conf, NULL, true); -+ if (!err) { -+ err = resize_stripes(conf, conf->pool_size); -+ if (err) -+ log_exit(conf); -+ } - } else - err = -EINVAL; - } else if (strncmp(buf, "resync", 6) == 0) { - if (raid5_has_ppl(conf)) { - mddev_suspend(mddev); - log_exit(conf); -- raid5_reset_stripe_cache(mddev); - mddev_resume(mddev); -+ err = resize_stripes(conf, conf->pool_size); - } else if (test_bit(MD_HAS_JOURNAL, &conf->mddev->flags) && - r5l_log_disk_error(conf)) { - bool journal_dev_exists = false; diff --git a/patches.suse/0012-raid5-ppl-partial-parity-calculation-optimization.patch b/patches.suse/0012-raid5-ppl-partial-parity-calculation-optimization.patch deleted file mode 100644 index bb1410e3e5..0000000000 --- a/patches.suse/0012-raid5-ppl-partial-parity-calculation-optimization.patch +++ /dev/null @@ -1,99 +0,0 @@ -From: Artur Paszkiewicz <artur.paszkiewicz@intel.com> -Date: Tue, 4 Apr 2017 13:13:58 +0200 -Subject: [PATCH] raid5-ppl: partial parity calculation optimization -Git-commit: ae1713e296449caf820635d384a99936ce281a71 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: fate#321488 - -In case of read-modify-write, partial partity is the same as the result -of ops_run_prexor5(), so we can just copy sh->dev[pd_idx].page into -sh->ppl_page instead of calculating it again. - -Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> ---- - drivers/md/raid5-ppl.c | 20 ++++++++++---------- - drivers/md/raid5.c | 6 +++--- - 2 files changed, 13 insertions(+), 13 deletions(-) - -diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c -index 71968cf..4eb0ebc 100644 ---- a/drivers/md/raid5-ppl.c -+++ b/drivers/md/raid5-ppl.c -@@ -153,7 +153,7 @@ ops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu, - struct dma_async_tx_descriptor *tx) - { - int disks = sh->disks; -- struct page **xor_srcs = flex_array_get(percpu->scribble, 0); -+ struct page **srcs = flex_array_get(percpu->scribble, 0); - int count = 0, pd_idx = sh->pd_idx, i; - struct async_submit_ctl submit; - -@@ -166,18 +166,18 @@ ops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu, - * differently. - */ - if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) { -- /* rmw: xor old data and parity from updated disks */ -- for (i = disks; i--;) { -- struct r5dev *dev = &sh->dev[i]; -- if (test_bit(R5_Wantdrain, &dev->flags) || i == pd_idx) -- xor_srcs[count++] = dev->page; -- } -+ /* -+ * rmw: xor old data and parity from updated disks -+ * This is calculated earlier by ops_run_prexor5() so just copy -+ * the parity dev page. -+ */ -+ srcs[count++] = sh->dev[pd_idx].page; - } else if (sh->reconstruct_state == reconstruct_state_drain_run) { - /* rcw: xor data from all not updated disks */ - for (i = disks; i--;) { - struct r5dev *dev = &sh->dev[i]; - if (test_bit(R5_UPTODATE, &dev->flags)) -- xor_srcs[count++] = dev->page; -+ srcs[count++] = dev->page; - } - } else { - return tx; -@@ -188,10 +188,10 @@ ops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu, - + sizeof(struct page *) * (sh->disks + 2)); - - if (count == 1) -- tx = async_memcpy(sh->ppl_page, xor_srcs[0], 0, 0, PAGE_SIZE, -+ tx = async_memcpy(sh->ppl_page, srcs[0], 0, 0, PAGE_SIZE, - &submit); - else -- tx = async_xor(sh->ppl_page, xor_srcs, 0, count, PAGE_SIZE, -+ tx = async_xor(sh->ppl_page, srcs, 0, count, PAGE_SIZE, - &submit); - - return tx; -diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c -index e04d7b1..f3692ff 100644 ---- a/drivers/md/raid5.c -+++ b/drivers/md/raid5.c -@@ -2079,9 +2079,6 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) - async_tx_ack(tx); - } - -- if (test_bit(STRIPE_OP_PARTIAL_PARITY, &ops_request)) -- tx = ops_run_partial_parity(sh, percpu, tx); -- - if (test_bit(STRIPE_OP_PREXOR, &ops_request)) { - if (level < 6) - tx = ops_run_prexor5(sh, percpu, tx); -@@ -2089,6 +2086,9 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) - tx = ops_run_prexor6(sh, percpu, tx); - } - -+ if (test_bit(STRIPE_OP_PARTIAL_PARITY, &ops_request)) -+ tx = ops_run_partial_parity(sh, percpu, tx); -+ - if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) { - tx = ops_run_biodrain(sh, tx); - overlap_clear++; --- -2.10.2 - diff --git a/patches.suse/0013-raid5-ppl-use-a-single-mempool-for-ppl_io_unit-and-h.patch b/patches.suse/0013-raid5-ppl-use-a-single-mempool-for-ppl_io_unit-and-h.patch deleted file mode 100644 index 150517faa1..0000000000 --- a/patches.suse/0013-raid5-ppl-use-a-single-mempool-for-ppl_io_unit-and-h.patch +++ /dev/null @@ -1,145 +0,0 @@ -From: Artur Paszkiewicz <artur.paszkiewicz@intel.com> -Date: Tue, 11 Apr 2017 20:50:51 +0200 -Subject: [PATCH] raid5-ppl: use a single mempool for ppl_io_unit and header_page -Git-commit: fcd403aff643a120dbb3f1851337df5353ac8970 -Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git -Patch-mainline: Queued in subsystem maintainer repository -References: fate#321488 - -Allocate both struct ppl_io_unit and its header_page from a shared -mempool to avoid a possible deadlock. Implement allocate and free -functions for the mempool, remove the second pool for allocating -header_page. The header_pages are now freed with their io_units, not -when the ppl bio completes. Also, use GFP_NOWAIT instead of GFP_ATOMIC -for allocating ppl_io_unit because we can handle failed allocations and -there is no reason to utilize emergency reserves. - -Suggested-by: NeilBrown <neilb@suse.com> -Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com> -Signed-off-by: Shaohua Li <shli@fb.com> -Signed-off-by: Coly Li <colyli@suse.de> ---- - drivers/md/raid5-ppl.c | 53 +++++++++++++++++++++++++++++++++++--------------- - 1 file changed, 37 insertions(+), 16 deletions(-) - -diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c -index 4eb0ebc..5d25beb 100644 ---- a/drivers/md/raid5-ppl.c -+++ b/drivers/md/raid5-ppl.c -@@ -102,7 +102,6 @@ struct ppl_conf { - struct kmem_cache *io_kc; - mempool_t *io_pool; - struct bio_set *bs; -- mempool_t *meta_pool; - - /* used only for recovery */ - int recovered_entries; -@@ -197,25 +196,55 @@ ops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu, - return tx; - } - -+static void *ppl_io_pool_alloc(gfp_t gfp_mask, void *pool_data) -+{ -+ struct kmem_cache *kc = pool_data; -+ struct ppl_io_unit *io; -+ -+ io = kmem_cache_alloc(kc, gfp_mask); -+ if (!io) -+ return NULL; -+ -+ io->header_page = alloc_page(gfp_mask); -+ if (!io->header_page) { -+ kmem_cache_free(kc, io); -+ return NULL; -+ } -+ -+ return io; -+} -+ -+static void ppl_io_pool_free(void *element, void *pool_data) -+{ -+ struct kmem_cache *kc = pool_data; -+ struct ppl_io_unit *io = element; -+ -+ __free_page(io->header_page); -+ kmem_cache_free(kc, io); -+} -+ - static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log, - struct stripe_head *sh) - { - struct ppl_conf *ppl_conf = log->ppl_conf; - struct ppl_io_unit *io; - struct ppl_header *pplhdr; -+ struct page *header_page; - -- io = mempool_alloc(ppl_conf->io_pool, GFP_ATOMIC); -+ io = mempool_alloc(ppl_conf->io_pool, GFP_NOWAIT); - if (!io) - return NULL; - -+ header_page = io->header_page; - memset(io, 0, sizeof(*io)); -+ io->header_page = header_page; -+ - io->log = log; - INIT_LIST_HEAD(&io->log_sibling); - INIT_LIST_HEAD(&io->stripe_list); - atomic_set(&io->pending_stripes, 0); - bio_init(&io->bio, io->biovec, PPL_IO_INLINE_BVECS); - -- io->header_page = mempool_alloc(ppl_conf->meta_pool, GFP_NOIO); - pplhdr = page_address(io->header_page); - clear_page(pplhdr); - memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED); -@@ -371,8 +400,6 @@ static void ppl_log_endio(struct bio *bio) - if (bio->bi_error) - md_error(ppl_conf->mddev, log->rdev); - -- mempool_free(io->header_page, ppl_conf->meta_pool); -- - list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) { - list_del_init(&sh->log_list); - -@@ -1007,7 +1034,6 @@ static void __ppl_exit_log(struct ppl_conf *ppl_conf) - - kfree(ppl_conf->child_logs); - -- mempool_destroy(ppl_conf->meta_pool); - if (ppl_conf->bs) - bioset_free(ppl_conf->bs); - mempool_destroy(ppl_conf->io_pool); -@@ -1113,25 +1139,20 @@ int ppl_init_log(struct r5conf *conf) - - ppl_conf->io_kc = KMEM_CACHE(ppl_io_unit, 0); - if (!ppl_conf->io_kc) { -- ret = -EINVAL; -+ ret = -ENOMEM; - goto err; - } - -- ppl_conf->io_pool = mempool_create_slab_pool(conf->raid_disks, ppl_conf->io_kc); -+ ppl_conf->io_pool = mempool_create(conf->raid_disks, ppl_io_pool_alloc, -+ ppl_io_pool_free, ppl_conf->io_kc); - if (!ppl_conf->io_pool) { -- ret = -EINVAL; -+ ret = -ENOMEM; - goto err; - } - - ppl_conf->bs = bioset_create(conf->raid_disks, 0); - if (!ppl_conf->bs) { -- ret = -EINVAL; -- goto err; -- } -- -- ppl_conf->meta_pool = mempool_create_page_pool(conf->raid_disks, 0); -- if (!ppl_conf->meta_pool) { -- ret = -EINVAL; -+ ret = -ENOMEM; - goto err; - } - --- -2.10.2 - diff --git a/series.conf b/series.conf index c504201451..f08ea2e514 100644 --- a/series.conf +++ b/series.conf @@ -11537,6 +11537,7 @@ patches.fixes/0001-md-handle-read-only-member-devices-better.patch # FATE#321488, drivers/md back port upto 4.10, part 1 + patches.suse/0001-raid5-separate-header-for-log-functions.patch patches.drivers/0002-md-r5cache-Check-array-size-in-r5l_init_log.patch patches.drivers/0003-md-r5cache-move-some-code-to-raid5.h.patch patches.drivers/0004-md-r5cache-State-machine-for-raid5-cache-write-back-.patch @@ -11580,22 +11581,14 @@ patches.drivers/0042-md-raid5-cache-exclude-reclaiming-stripes-in-reclaim.patch patches.drivers/0043-md-disable-WRITE-SAME-if-it-fails-in-underlayer-disk.patch patches.drivers/0044-md-fail-if-mddev-bio_set-can-t-be-created.patch - patches.drivers/0045-raid5-only-dispatch-IO-from-raid5d-for-harddisk-raid.patch - patches.drivers/0046-md-raid5-sort-bios.patch - patches.suse/0001-raid5-separate-header-for-log-functions.patch + # end part 1 + patches.suse/0002-md-superblock-changes-for-PPL.patch patches.suse/0003-raid5-ppl-Partial-Parity-Log-write-logging-implement.patch patches.suse/0004-md-add-sysfs-entries-for-PPL.patch patches.suse/0005-raid5-ppl-load-and-recover-the-log.patch patches.suse/0006-raid5-ppl-support-disk-hot-add-remove-with-PPL.patch patches.suse/0007-raid5-ppl-runtime-PPL-enabling-or-disabling.patch - patches.suse/0008-raid5-ppl-silence-a-misleading-warning-message.patch - patches.suse/0009-md-raid5-use-consistency_policy-to-remove-journal-fe.patch - patches.suse/0010-raid5-ppl-move-no_mem_stripes-to-struct-ppl_conf.patch - patches.suse/0011-raid5-ppl-use-resize_stripes-when-enabling-or-disabl.patch - patches.suse/0012-raid5-ppl-partial-parity-calculation-optimization.patch - patches.suse/0013-raid5-ppl-use-a-single-mempool-for-ppl_io_unit-and-h.patch - # end part 1 ########################################################## # NVDIMM @@ -12290,7 +12283,6 @@ patches.drivers/mmc-fix-use-after-free-of-struct-request.patch patches.fixes/scsi-fix-memory-leak-of-sdpk-on-when-gd-fails-to-all.patch patches.drivers/0397-block-fix-bio_will_gap-for-first-bvec-with-offset.patch - patches.drivers/0398-blk-mq-Introduce-blk_mq_delay_run_hw_queue.patch #bsc#1020907 patches.drivers/0001-generic-syscalls-wire-up-preadv2-and-pwritev2-syscal.patch patches.drivers/0002-arm64-unistd32.h-wire-up-missing-syscalls-for-compat.patch @@ -12393,98 +12385,9 @@ patches.drivers/0102-dm-rq-cope-with-DM-device-destruction-while-in-dm_ol.patch patches.drivers/0103-dm-crypt-replace-RCU-read-side-section-with-rwsem.patch patches.drivers/0104-dm-don-t-allow-ioctls-to-targets-that-don-t-map-to-w.patch + patches.drivers/0105-raid5-only-dispatch-IO-from-raid5d-for-harddisk-raid.patch patches.drivers/0106-dm-raid-fix-raid-check-regression-due-to-improper-cl.patch patches.arch/0107-MIPS-sysmips-Remove-duplicated-include-from-syscall..patch - patches.drivers/0108-md-raid1-use-bio_clone_bioset_partial-in-case-of-wri.patch - patches.drivers/0109-md-remove-unnecessary-check-on-mddev.patch - patches.drivers/0110-md-fast-clone-bio-in-bio_clone_mddev.patch - patches.drivers/0111-dm-block-manager-add-unlikely-annotations-on-dm_bufi.patch - patches.drivers/0112-dm-space-map-common-memcpy-the-disk-root-to-ensure-i.patch - patches.drivers/0113-dm-btree-use-GFP_NOFS-in-dm_btree_del.patch - patches.drivers/0114-dm-bitset-introduce-cursor-api.patch - patches.drivers/0115-dm-cache-metadata-use-bitset-cursor-api-to-load-disc.patch - patches.drivers/0116-dm-cache-metadata-add-metadata2-feature.patch - patches.drivers/0117-dm-cache-metadata-name-the-cache-block-that-couldn-t.patch - patches.drivers/0118-dm-bitset-add-dm_bitset_new.patch - patches.drivers/0119-dm-cache-metadata-use-dm_bitset_new-to-create-the-di.patch - patches.drivers/0120-dm-persistent-data-add-cursor-skip-functions-to-the-.patch - patches.drivers/0121-dm-cache-metadata-use-cursor-api-in-blocks_are_clean.patch - patches.drivers/0122-dm-space-map-metadata-constify-dm_space_map-structur.patch - patches.drivers/0123-md-raid5-Don-t-reinvent-the-wheel-but-use-existing-l.patch - patches.drivers/0124-dm-round-robin-revert-use-percpu-repeat_count-and-cu.patch - patches.drivers/0125-md-linear-shutup-lockdep-warnning.patch - patches.drivers/0126-md-raid1-fix-write-behind-issues-introduced-by-bio_c.patch - patches.drivers/0127-dm-raid-bump-the-target-version.patch - patches.drivers/0128-md-raid10-submit-bio-directly-to-replacement-disk.patch - patches.drivers/0129-md-delete-dead-code.patch - patches.drivers/0130-md-don-t-impose-the-MD_SB_DISKS-limit-on-arrays-with.patch - patches.drivers/0131-drivers-md-bcache-util.h-remove-duplicate-inclusion-.patch - patches.drivers/0132-md-fix-super_offset-endianness-in-super_1_rdev_size_.patch - patches.drivers/0133-md-fix-incorrect-use-of-lexx_to_cpu-in-does_sb_need_.patch - patches.drivers/0134-md-r5cache-fix-set_syndrome_sources-for-data-in-cach.patch - patches.drivers/0135-md-raid1-fix-a-trivial-typo-in-comments.patch - patches.drivers/0136-dm-verity-fec-limit-error-correction-recursion.patch - patches.drivers/0137-dm-cache-metadata-fix-metadata2-format-s-blocks_are_.patch - patches.drivers/0138-dm-raid-fix-NULL-pointer-dereference-for-raid1-witho.patch - patches.drivers/0139-dm-verity-fec-fix-bufio-leaks.patch - patches.drivers/0140-dm-rq-Avoid-that-request-processing-stalls-sporadica.patch - patches.drivers/0141-md-raid5-prioritize-stripes-for-writeback.patch - patches.drivers/0142-md-raid5-cache-bump-flush-stripe-batch-size.patch - patches.drivers/0143-md-r5cache-improve-recovery-with-read-ahead-page-poo.patch - patches.drivers/0144-md-r5cache-handle-R5LOG_PAYLOAD_FLUSH-in-recovery.patch - patches.drivers/0145-md-r5cache-generate-R5LOG_PAYLOAD_FLUSH.patch - patches.drivers/0146-md-raid5-use-md_write_start-to-count-stripes-not-bio.patch - patches.drivers/0147-md-raid5-simplfy-delaying-of-writes-while-metadata-i.patch - patches.drivers/0148-md-raid5-call-bio_endio-directly-rather-than-queuein.patch - patches.drivers/0149-md-raid5-use-bio_inc_remaining-instead-of-repurposin.patch - patches.drivers/0150-md-raid5-remove-over-loading-of-bi_phys_segments.patch - patches.drivers/0151-Revert-md-raid5-limit-request-size-according-to-impl.patch - patches.drivers/0152-md-raid1-raid10-move-rXbio-accounting-closer-to-allo.patch - patches.drivers/0153-md-raid10-stop-using-bi_phys_segments.patch - patches.drivers/0154-md-raid1-stop-using-bi_phys_segment.patch - patches.drivers/0155-md-raid5-don-t-test-writes_pending-in-raid5_remove_d.patch - patches.drivers/0156-md-factor-out-set_in_sync.patch - patches.drivers/0157-md-close-a-race-with-setting-mddev-in_sync.patch - patches.drivers/0158-percpu-refcount-support-synchronous-switch-to-atomic.patch - patches.drivers/0159-MD-use-per-cpu-counter-for-writes_pending.patch - patches.drivers/0160-md-raid10-refactor-some-codes-from-raid10_write_requ.patch - patches.drivers/0161-md-fix-several-trivial-typos-in-comments.patch - patches.drivers/0162-md-raid1-raid10-don-t-handle-failure-of-bio_add_page.patch - patches.drivers/0163-md-move-two-macros-into-md.h.patch - patches.drivers/0164-md-prepare-for-managing-resync-I-O-pages-in-clean-wa.patch - patches.drivers/0165-md-raid1-simplify-r1buf_pool_free.patch - patches.drivers/0166-md-raid1-don-t-use-bio-s-vec-table-to-manage-resync-.patch - patches.drivers/0167-md-raid1-retrieve-page-from-pre-allocated-resync-pag.patch - patches.drivers/0168-md-raid1-use-bio-helper-in-process_checks.patch - patches.drivers/0169-md-raid1-move-offset-out-of-loop.patch - patches.drivers/0170-md-raid1-improve-write-behind.patch - patches.drivers/0171-md-raid10-refactor-code-of-read-reshape-s-.bi_end_io.patch - patches.drivers/0172-md-raid10-don-t-use-bio-s-vec-table-to-manage-resync.patch - patches.drivers/0173-md-raid10-retrieve-page-from-preallocated-resync-pag.patch - patches.drivers/0174-md-raid10-avoid-direct-access-to-bvec-table-in-handl.patch - patches.drivers/0175-md-raid1-skip-data-copy-for-behind-io-for-discard-re.patch - patches.drivers/0176-md-raid5-cache-fix-payload-endianness-problem-in-rai.patch - patches.drivers/0177-md-raid1-kill-warning-on-powerpc_pseries.patch - patches.drivers/0178-md-update-slab_cache-before-releasing-new-stripes-wh.patch - patches.drivers/0179-md-raid6-Fix-anomily-when-recovering-a-single-device.patch - patches.drivers/0180-md-raid10-reset-the-first-at-the-end-of-loop.patch - patches.drivers/0181-md-MD_CLOSING-needs-to-be-cleared-after-called-md_se.patch - patches.drivers/0182-md.c-didn-t-unlock-the-mddev-before-return-EINVAL-in.patch - patches.drivers/0183-md-raid1-avoid-reusing-a-resync-bio-after-error-hand.patch - patches.drivers/0184-md-raid1-simplify-the-splitting-of-requests.patch - patches.drivers/0185-md-raid1-simplify-alloc_behind_master_bio.patch - patches.drivers/0186-md-raid1-simplify-handle_read_error.patch - patches.drivers/0187-md-raid1-factor-out-flush_bio_list.patch - patches.drivers/0188-md-raid10-simplify-the-splitting-of-requests.patch - patches.drivers/0189-md-raid10-simplify-handle_read_error.patch - patches.drivers/0190-md-raid5-make-chunk_aligned_read-split-bios-more-cle.patch - patches.drivers/0191-md-linear-improve-bio-splitting.patch - patches.drivers/0192-md-raid0-fix-up-bio-splitting.patch - patches.drivers/0193-md-allow-creation-of-mdNNN-arrays-via-md_mod-paramet.patch - patches.drivers/0194-md-support-disabling-of-create-on-open-semantics.patch - patches.drivers/0195-md-cluster-Fix-a-memleak-in-an-error-handling-path.patch - patches.drivers/0196-md-raid10-wait-up-frozen-array-in-handle_write_compl.patch - patches.drivers/0197-md-raid1-10-remove-unused-queue.patch patches.fixes/jbd2-Fix-dbench4-performance-regression-for-nobarrie.patch # end part 3 |