Home Home > GIT Browse
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTakashi Iwai <tiwai@suse.de>2017-05-02 16:53:36 +0200
committerTakashi Iwai <tiwai@suse.de>2017-05-02 16:54:25 +0200
commitd2e1679e1f725e3fc171859001449caab16f701a (patch)
treef2fce7e0708aa20b741507afa43becd84007a150
parentda2fdcbb2b2b7d34fc3f0915e2270db4ae65b052 (diff)
Revert "- md: clear WantReplacement once disk is removed..." (bsc#1037027).
We revert the recent MD changes due to regressions for RAID0.
-rw-r--r--patches.drivers/0058-md-reduce-the-number-of-synchronize_rcu-calls-when-m.patch104
-rw-r--r--patches.drivers/0081-block-kill-off-q-flush_flags.patch67
-rw-r--r--patches.drivers/0087-md-cleanup-mddev-flag-clear-for-takeover.patch10
-rw-r--r--patches.drivers/0150-md-raid5-remove-over-loading-of-bi_phys_segments.patch22
-rw-r--r--patches.drivers/0198-md-clear-WantReplacement-once-disk-is-removed.patch89
-rw-r--r--patches.drivers/0199-md-raid1-Use-a-new-variable-to-count-flighting-sync-.patch81
-rw-r--r--patches.drivers/0310-block-bio-pass-bvec-table-to-bio_init.patch8
-rw-r--r--patches.suse/0001-raid5-separate-header-for-log-functions.patch30
-rw-r--r--patches.suse/0002-md-cluster-remove-a-disk-asynchronously-from-cluster.patch25
-rw-r--r--patches.suse/0002-md-superblock-changes-for-PPL.patch65
-rw-r--r--patches.suse/0003-raid5-ppl-Partial-Parity-Log-write-logging-implement.patch90
-rw-r--r--patches.suse/0011-raid5-ppl-use-resize_stripes-when-enabling-or-disabl.patch40
-rw-r--r--patches.suse/md-expedite-hot-remove91
-rw-r--r--series.conf29
14 files changed, 336 insertions, 415 deletions
diff --git a/patches.drivers/0058-md-reduce-the-number-of-synchronize_rcu-calls-when-m.patch b/patches.drivers/0058-md-reduce-the-number-of-synchronize_rcu-calls-when-m.patch
index 163ecf3b4c..ccae2df3e7 100644
--- a/patches.drivers/0058-md-reduce-the-number-of-synchronize_rcu-calls-when-m.patch
+++ b/patches.drivers/0058-md-reduce-the-number-of-synchronize_rcu-calls-when-m.patch
@@ -1,10 +1,10 @@
From: NeilBrown <neilb@suse.com>
Date: Thu, 2 Jun 2016 16:19:53 +1000
-Subject: [PATCH] md: reduce the number of synchronize_rcu() calls when
- multiple devices fail.
+Subject: md: reduce the number of synchronize_rcu() calls when multiple
+ devices fail.
+References: bsc#1003941,FATE#321732
Git-commit: d787be4092e27728cb4c012bee9762098ef3c662
Patch-Mainline: v4.8-rc1
-References: bsc#1003941,FATE#321732
Every time a device is removed with ->hot_remove_disk() a synchronize_rcu() call is made
which can delay several milliseconds in some case.
@@ -19,33 +19,41 @@ synchronize_rcu() step if the flag is set.
fix build error(Shaohua)
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
-Signed-off-by: Coly Li <colyli@suse.de>
+Signed-off-by: Hannes Reinecke <hare@suse.de>
---
- drivers/md/md.c | 29 ++++++++++++++++++++++++++---
+ drivers/md/md.c | 51 +++++++++++++++++++++++++++++---------------------
drivers/md/md.h | 5 +++++
drivers/md/multipath.c | 14 ++++++++------
drivers/md/raid1.c | 17 ++++++++++-------
- drivers/md/raid10.c | 19 +++++++++++--------
+ drivers/md/raid10.c | 20 +++++++++++---------
drivers/md/raid5.c | 15 +++++++++------
- 6 files changed, 69 insertions(+), 30 deletions(-)
+ 6 files changed, 73 insertions(+), 49 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
-index 0793754eeffd..2ed547f5c3b6 100644
+index c654e860..93cf987 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
-@@ -8180,15 +8180,34 @@ static int remove_and_add_spares(struct mddev *mddev,
+@@ -8193,37 +8193,46 @@ static int remove_and_add_spares(struct mddev *mddev,
+ {
struct md_rdev *rdev;
int spares = 0;
+- int remove_some = 0;
+-
int removed = 0;
+ bool remove_some = false;
- rdev_for_each(rdev, mddev)
+ rdev_for_each(rdev, mddev) {
-+ if ((this == NULL || rdev == this) &&
-+ rdev->raid_disk >= 0 &&
-+ !test_bit(Blocked, &rdev->flags) &&
+ if ((this == NULL || rdev == this) &&
+ rdev->raid_disk >= 0 &&
+ !test_bit(Blocked, &rdev->flags) &&
+- (test_bit(Faulty, &rdev->flags) ||
+- (!test_bit(In_sync, &rdev->flags) &&
+- !test_bit(Journal, &rdev->flags))) &&
+ test_bit(Faulty, &rdev->flags) &&
-+ atomic_read(&rdev->nr_pending)==0) {
+ atomic_read(&rdev->nr_pending)==0) {
+- remove_some = 1;
+- set_bit(RemoveSynchronised, &rdev->flags);
+ /* Faulty non-Blocked devices with nr_pending == 0
+ * never get nr_pending incremented,
+ * never get Faulty cleared, and never get Blocked set.
@@ -53,41 +61,52 @@ index 0793754eeffd..2ed547f5c3b6 100644
+ */
+ remove_some = true;
+ set_bit(RemoveSynchronized, &rdev->flags);
-+ }
+ }
+- if (remove_some) {
+ }
+
+ if (remove_some)
-+ synchronize_rcu();
+ synchronize_rcu();
+- /* Now we know that no-one will take a new reference */
+- list_for_each_entry(rdev, &mddev->disks, same_set)
+- if (test_bit(RemoveSynchronised, &rdev->flags)) {
+- if (mddev->pers->hot_remove_disk(
+- mddev, rdev) == 0) {
+- sysfs_unlink_rdev(mddev, rdev);
+- rdev->raid_disk = -1;
+- removed++;
+- }
+- clear_bit(RemoveSynchronised, &rdev->flags);
+ rdev_for_each(rdev, mddev) {
- if ((this == NULL || rdev == this) &&
- rdev->raid_disk >= 0 &&
- !test_bit(Blocked, &rdev->flags) &&
-- (test_bit(Faulty, &rdev->flags) ||
++ if ((this == NULL || rdev == this) &&
++ rdev->raid_disk >= 0 &&
++ !test_bit(Blocked, &rdev->flags) &&
+ ((test_bit(RemoveSynchronized, &rdev->flags) ||
- (!test_bit(In_sync, &rdev->flags) &&
- !test_bit(Journal, &rdev->flags))) &&
-- atomic_read(&rdev->nr_pending)==0) {
++ (!test_bit(In_sync, &rdev->flags) &&
++ !test_bit(Journal, &rdev->flags))) &&
+ atomic_read(&rdev->nr_pending)==0)) {
- if (mddev->pers->hot_remove_disk(
- mddev, rdev) == 0) {
- sysfs_unlink_rdev(mddev, rdev);
-@@ -8196,6 +8215,10 @@ static int remove_and_add_spares(struct mddev *mddev,
- removed++;
++ if (mddev->pers->hot_remove_disk(
++ mddev, rdev) == 0) {
++ sysfs_unlink_rdev(mddev, rdev);
++ rdev->raid_disk = -1;
++ removed++;
}
- }
+- synchronize_rcu();
+- /* Now any temp reference that was taken is released */
++ }
+ if (remove_some && test_bit(RemoveSynchronized, &rdev->flags))
+ clear_bit(RemoveSynchronized, &rdev->flags);
-+ }
+ }
+
if (removed && mddev->kobj.sd)
sysfs_notify(&mddev->kobj, NULL, "degraded");
diff --git a/drivers/md/md.h b/drivers/md/md.h
-index 03b19aad4921..dc65ca65b26e 100644
+index 43ebfe8..896182f 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
-@@ -163,6 +163,11 @@ enum flag_bits {
- * than other devices in the array
+@@ -168,6 +168,11 @@ enum flag_bits {
+ * another call.
*/
ClusterRemove,
+ RemoveSynchronized, /* synchronize_rcu() was called after
@@ -99,7 +118,7 @@ index 03b19aad4921..dc65ca65b26e 100644
static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
-index 7eb9972a37e6..c145a5a114eb 100644
+index 7eb9972..c145a5a 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -298,12 +298,14 @@ static int multipath_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
@@ -124,10 +143,10 @@ index 7eb9972a37e6..c145a5a114eb 100644
err = md_integrity_register(mddev);
}
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
-index 34f20c03d1f6..5027ef4752ac 100644
+index af751c6..0922b28 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
-@@ -1656,13 +1656,16 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
+@@ -1661,13 +1661,16 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
goto abort;
}
p->rdev = NULL;
@@ -152,10 +171,10 @@ index 34f20c03d1f6..5027ef4752ac 100644
* Move down the replacement. We drain all IO before
* doing this to avoid confusion.
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
-index 34facda18e72..8ee5d96e6a2d 100644
+index 760dece..7e4b370 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
-@@ -1766,7 +1766,7 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
+@@ -1768,7 +1768,7 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
err = -EBUSY;
goto abort;
}
@@ -164,11 +183,12 @@ index 34facda18e72..8ee5d96e6a2d 100644
* is not possible.
*/
if (!test_bit(Faulty, &rdev->flags) &&
-@@ -1778,13 +1778,16 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
+@@ -1780,14 +1780,16 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
goto abort;
}
*rdevp = NULL;
-- synchronize_rcu();
+- if (!test_bit(RemoveSynchronised, &rdev->flags))
+- synchronize_rcu();
- if (atomic_read(&rdev->nr_pending)) {
- /* lost the race, try later */
- err = -EBUSY;
@@ -189,10 +209,10 @@ index 34facda18e72..8ee5d96e6a2d 100644
p->rdev = p->replacement;
clear_bit(Replacement, &p->replacement->flags);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
-index f6a191aaaa91..413cc7d847da 100644
+index 8beda92..c3a5540 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
-@@ -7197,12 +7197,15 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
+@@ -7206,12 +7206,15 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
goto abort;
}
*rdevp = NULL;
@@ -215,5 +235,5 @@ index f6a191aaaa91..413cc7d847da 100644
p->rdev = p->replacement;
clear_bit(Replacement, &p->replacement->flags);
--
-2.12.0
+1.8.5.6
diff --git a/patches.drivers/0081-block-kill-off-q-flush_flags.patch b/patches.drivers/0081-block-kill-off-q-flush_flags.patch
index d764d4022e..3a673133b1 100644
--- a/patches.drivers/0081-block-kill-off-q-flush_flags.patch
+++ b/patches.drivers/0081-block-kill-off-q-flush_flags.patch
@@ -12,19 +12,21 @@ entries.
Signed-off-by: Jens Axboe <axboe@fb.com>
Acked-by: Hannes Reinecke <hare@suse.de>
---
- block/blk-core.c | 3 ++-
- block/blk-flush.c | 11 ++++++-----
- block/blk-settings.c | 18 ++++++++++--------
- drivers/block/xen-blkback/xenbus.c | 2 +-
- drivers/md/dm-table.c | 12 ++++++------
- drivers/md/raid5-cache.c | 3 ++-
- drivers/target/target_core_iblock.c | 6 +++---
- include/linux/blkdev.h | 5 ++---
- 8 files changed, 32 insertions(+), 28 deletions(-)
+ block/blk-core.c | 3 ++-
+ block/blk-flush.c | 11 ++++++-----
+ block/blk-settings.c | 18 ++++++++++--------
+ drivers/block/xen-blkback/xenbus.c | 2 +-
+ drivers/md/dm-table.c | 12 ++++++------
+ drivers/md/raid5-cache.c | 3 ++-
+ drivers/target/target_core_iblock.c | 6 +++---
+ include/linux/blkdev.h | 7 +++----
+ 8 files changed, 33 insertions(+), 29 deletions(-)
+diff --git a/block/blk-core.c b/block/blk-core.c
+index c593882..cd5c9ab 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
-@@ -1966,7 +1966,8 @@ generic_make_request_checks(struct bio *
+@@ -1966,7 +1966,8 @@ generic_make_request_checks(struct bio *bio)
* drivers without flush support don't have to worry
* about them.
*/
@@ -34,6 +36,8 @@ Acked-by: Hannes Reinecke <hare@suse.de>
bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA);
if (!nr_sectors) {
err = 0;
+diff --git a/block/blk-flush.c b/block/blk-flush.c
+index 9c423e5..b1c91d2 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -95,17 +95,18 @@ enum {
@@ -58,7 +62,7 @@ Acked-by: Hannes Reinecke <hare@suse.de>
policy |= REQ_FSEQ_POSTFLUSH;
}
return policy;
-@@ -384,7 +385,7 @@ static void mq_flush_data_end_io(struct
+@@ -384,7 +385,7 @@ static void mq_flush_data_end_io(struct request *rq, int error)
void blk_insert_flush(struct request *rq)
{
struct request_queue *q = rq->q;
@@ -67,7 +71,7 @@ Acked-by: Hannes Reinecke <hare@suse.de>
unsigned int policy = blk_flush_policy(fflags, rq);
struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx);
-@@ -393,7 +394,7 @@ void blk_insert_flush(struct request *rq
+@@ -393,7 +394,7 @@ void blk_insert_flush(struct request *rq)
* REQ_FLUSH and FUA for the driver.
*/
rq->cmd_flags &= ~REQ_FLUSH;
@@ -76,9 +80,11 @@ Acked-by: Hannes Reinecke <hare@suse.de>
rq->cmd_flags &= ~REQ_FUA;
/*
+diff --git a/block/blk-settings.c b/block/blk-settings.c
+index 88c303b..dedb7b8 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
-@@ -827,7 +827,12 @@ EXPORT_SYMBOL(blk_queue_update_dma_align
+@@ -827,7 +827,12 @@ EXPORT_SYMBOL(blk_queue_update_dma_alignment);
void blk_queue_flush_queueable(struct request_queue *q, bool queueable)
{
@@ -92,7 +98,7 @@ Acked-by: Hannes Reinecke <hare@suse.de>
}
EXPORT_SYMBOL_GPL(blk_queue_flush_queueable);
-@@ -842,16 +847,13 @@ EXPORT_SYMBOL_GPL(blk_queue_flush_queuea
+@@ -842,16 +847,13 @@ EXPORT_SYMBOL_GPL(blk_queue_flush_queueable);
void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua)
{
spin_lock_irq(q->queue_lock);
@@ -113,9 +119,11 @@ Acked-by: Hannes Reinecke <hare@suse.de>
queue_flag_clear(QUEUE_FLAG_FUA, q);
spin_unlock_irq(q->queue_lock);
}
+diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
+index f0105d8..01c24fa 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
-@@ -413,7 +413,7 @@ static int xen_vbd_create(struct xen_blk
+@@ -413,7 +413,7 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
vbd->type |= VDISK_REMOVABLE;
q = bdev_get_queue(bdev);
@@ -124,9 +132,11 @@ Acked-by: Hannes Reinecke <hare@suse.de>
vbd->flush_support = true;
if (q && blk_queue_secdiscard(q))
+diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
+index 40ebf9d..80fb9bb 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
-@@ -1365,13 +1365,13 @@ static void dm_table_verify_integrity(st
+@@ -1354,13 +1354,13 @@ static void dm_table_verify_integrity(struct dm_table *t)
static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data)
{
@@ -143,7 +153,7 @@ Acked-by: Hannes Reinecke <hare@suse.de>
{
struct dm_target *ti;
unsigned i = 0;
-@@ -1392,7 +1392,7 @@ static bool dm_table_supports_flush(stru
+@@ -1381,7 +1381,7 @@ static bool dm_table_supports_flush(struct dm_table *t, unsigned flush)
return true;
if (ti->type->iterate_devices &&
@@ -152,7 +162,7 @@ Acked-by: Hannes Reinecke <hare@suse.de>
return true;
}
-@@ -1535,9 +1535,9 @@ void dm_table_set_restrictions(struct dm
+@@ -1524,9 +1524,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
else
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
@@ -164,17 +174,19 @@ Acked-by: Hannes Reinecke <hare@suse.de>
fua = true;
}
blk_queue_write_cache(q, wc, fua);
+diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
+index ac51bc5..e889e2d 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
-@@ -2783,6 +2783,7 @@ void r5c_update_on_rdev_error(struct mdd
+@@ -1188,6 +1188,7 @@ ioerr:
int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
{
+ struct request_queue *q = bdev_get_queue(rdev->bdev);
struct r5l_log *log;
+ char b[BDEVNAME_SIZE];
- if (PAGE_SIZE != 4096)
-@@ -2808,7 +2809,7 @@ int r5l_init_log(struct r5conf *conf, st
+@@ -1197,7 +1198,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
return -ENOMEM;
log->rdev = rdev;
@@ -183,9 +195,11 @@ Acked-by: Hannes Reinecke <hare@suse.de>
log->uuid_checksum = crc32c_le(~0, rdev->mddev->uuid,
sizeof(rdev->mddev->uuid));
+diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
+index 3cbe060..2077bc2 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
-@@ -686,10 +686,10 @@ iblock_execute_rw(struct se_cmd *cmd, st
+@@ -686,10 +686,10 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
* Force writethrough using WRITE_FUA if a volatile write cache
* is not enabled, or if initiator set the Force Unit Access bit.
*/
@@ -198,7 +212,7 @@ Acked-by: Hannes Reinecke <hare@suse.de>
rw = WRITE_FUA;
else
rw = WRITE;
-@@ -835,7 +835,7 @@ static bool iblock_get_write_cache(struc
+@@ -835,7 +835,7 @@ static bool iblock_get_write_cache(struct se_device *dev)
struct block_device *bd = ib_dev->ibd_bd;
struct request_queue *q = bdev_get_queue(bd);
@@ -207,6 +221,8 @@ Acked-by: Hannes Reinecke <hare@suse.de>
}
static const struct target_backend_ops iblock_ops = {
+diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
+index 16131fb..11a7551 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -433,8 +433,6 @@ struct request_queue {
@@ -226,7 +242,7 @@ Acked-by: Hannes Reinecke <hare@suse.de>
#define QUEUE_FLAG_NO_ROUND 26 /* Don't round timeout up to next second */
#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
-@@ -1378,7 +1377,7 @@ static inline unsigned int block_size(st
+@@ -1367,7 +1366,7 @@ static inline unsigned int block_size(struct block_device *bdev)
static inline bool queue_flush_queueable(struct request_queue *q)
{
@@ -235,3 +251,6 @@ Acked-by: Hannes Reinecke <hare@suse.de>
}
typedef struct {struct page *v;} Sector;
+--
+1.8.5.6
+
diff --git a/patches.drivers/0087-md-cleanup-mddev-flag-clear-for-takeover.patch b/patches.drivers/0087-md-cleanup-mddev-flag-clear-for-takeover.patch
index a102cfeeec..0c21941f51 100644
--- a/patches.drivers/0087-md-cleanup-mddev-flag-clear-for-takeover.patch
+++ b/patches.drivers/0087-md-cleanup-mddev-flag-clear-for-takeover.patch
@@ -24,7 +24,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
-@@ -212,6 +212,7 @@ extern int rdev_clear_badblocks(struct m
+@@ -225,6 +225,7 @@ extern int rdev_clear_badblocks(struct m
int is_new);
struct md_cluster_info;
@@ -32,7 +32,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
enum mddev_flags {
MD_ARRAY_FIRST_USE, /* First use of array, needs initialization */
MD_CLOSING, /* If set, we are closing the array, do not open
-@@ -706,4 +707,11 @@ static inline void mddev_check_writesame
+@@ -719,4 +720,11 @@ static inline void mddev_check_writesame
!bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors)
mddev->queue->limits.max_write_same_sectors = 0;
}
@@ -99,7 +99,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
/*
* Number of guaranteed r1bios in case of extreme VM load:
*/
-@@ -3372,8 +3376,8 @@ static void *raid1_takeover(struct mddev
+@@ -3391,8 +3395,8 @@ static void *raid1_takeover(struct mddev
if (!IS_ERR(conf)) {
/* Array must appear to be quiesced */
conf->array_frozen = 1;
@@ -113,15 +113,15 @@ Signed-off-by: Coly Li <colyli@suse.de>
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -63,6 +63,8 @@
- #include "raid0.h"
#include "bitmap.h"
+ #include "raid5-log.h"
+#define UNSUPPORTED_MDDEV_FLAGS (1L << MD_FAILFAST_SUPPORTED)
+
#define cpu_to_group(cpu) cpu_to_node(cpu)
#define ANY_GROUP NUMA_NO_NODE
-@@ -8103,7 +8105,8 @@ static void *raid5_takeover_raid1(struct
+@@ -8024,7 +8026,8 @@ static void *raid5_takeover_raid1(struct
ret = setup_conf(mddev);
if (!IS_ERR_VALUE(ret))
diff --git a/patches.drivers/0150-md-raid5-remove-over-loading-of-bi_phys_segments.patch b/patches.drivers/0150-md-raid5-remove-over-loading-of-bi_phys_segments.patch
index 00a03be02a..3cc61adae7 100644
--- a/patches.drivers/0150-md-raid5-remove-over-loading-of-bi_phys_segments.patch
+++ b/patches.drivers/0150-md-raid5-remove-over-loading-of-bi_phys_segments.patch
@@ -37,7 +37,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
-@@ -5079,12 +5079,14 @@ static void add_bio_to_retry(struct bio
+@@ -5089,12 +5089,14 @@ static void add_bio_to_retry(struct bio
md_wakeup_thread(conf->mddev->thread);
}
@@ -53,7 +53,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
conf->retry_read_aligned = NULL;
return bi;
}
-@@ -5092,11 +5094,7 @@ static struct bio *remove_bio_from_retry
+@@ -5102,11 +5104,7 @@ static struct bio *remove_bio_from_retry
if(bi) {
conf->retry_read_aligned_list = bi->bi_next;
bi->bi_next = NULL;
@@ -66,7 +66,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
}
return bi;
-@@ -6052,7 +6050,8 @@ static inline sector_t raid5_sync_reques
+@@ -6062,7 +6060,8 @@ static inline sector_t raid5_sync_reques
return STRIPE_SECTORS;
}
@@ -76,7 +76,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
{
/* We may not be able to submit a whole bio at once as there
* may not be enough stripe_heads available.
-@@ -6081,7 +6080,7 @@ static int retry_aligned_read(struct r5
+@@ -6091,7 +6090,7 @@ static int retry_aligned_read(struct r5
sector += STRIPE_SECTORS,
scnt++) {
@@ -85,7 +85,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
/* already done this stripe */
continue;
-@@ -6089,15 +6088,15 @@ static int retry_aligned_read(struct r5
+@@ -6099,15 +6098,15 @@ static int retry_aligned_read(struct r5
if (!sh) {
/* failed to get a stripe - must wait */
@@ -103,7 +103,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
return handled;
}
-@@ -6225,6 +6224,7 @@ static void raid5d(struct md_thread *thr
+@@ -6235,6 +6234,7 @@ static void raid5d(struct md_thread *thr
while (1) {
struct bio *bio;
int batch_size, released;
@@ -111,7 +111,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
released = release_stripe_list(conf, conf->temp_inactive_list);
if (released)
-@@ -6242,10 +6242,10 @@ static void raid5d(struct md_thread *thr
+@@ -6252,10 +6252,10 @@ static void raid5d(struct md_thread *thr
}
raid5_activate_delayed(conf);
@@ -159,10 +159,10 @@ Signed-off-by: Coly Li <colyli@suse.de>
- atomic_set(segments, cnt);
-}
-
- /* NOTE NR_STRIPE_HASH_LOCKS must remain below 64.
- * This is because we sometimes take all the spinlocks
- * and creating that much locking depth can cause
-@@ -613,6 +584,7 @@ struct r5conf {
+ #define NR_STRIPES 256
+ #define STRIPE_SIZE PAGE_SIZE
+ #define STRIPE_SHIFT (PAGE_SHIFT - 9)
+@@ -619,6 +590,7 @@ struct r5conf {
struct list_head delayed_list; /* stripes that have plugged requests */
struct list_head bitmap_list; /* stripes delaying awaiting bitmap update */
struct bio *retry_read_aligned; /* currently retrying aligned bios */
diff --git a/patches.drivers/0198-md-clear-WantReplacement-once-disk-is-removed.patch b/patches.drivers/0198-md-clear-WantReplacement-once-disk-is-removed.patch
deleted file mode 100644
index 609eec6f80..0000000000
--- a/patches.drivers/0198-md-clear-WantReplacement-once-disk-is-removed.patch
+++ /dev/null
@@ -1,89 +0,0 @@
-From: Guoqing Jiang <gqjiang@suse.com>
-Date: Mon, 24 Apr 2017 15:58:04 +0800
-Subject: [PATCH] md: clear WantReplacement once disk is removed
-Git-commit: e5bc9c3c5432f5531a58e6fdd9f6c6587f2137b3
-Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git
-Patch-mainline: Queued in subsystem maintainer repository
-References: FATE#321488
-
-We can clear 'WantReplacement' flag directly no
-matter it's replacement existed or not since the
-semantic is same as before.
-
-Also since the disk is removed from array, then
-it is straightforward to remove 'WantReplacement'
-flag and the comments in raid10/5 can be removed
-as well.
-
-Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
-Signed-off-by: Shaohua Li <shli@fb.com>
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/raid1.c | 6 +++---
- drivers/md/raid10.c | 8 ++------
- drivers/md/raid5.c | 9 +++------
- 3 files changed, 8 insertions(+), 15 deletions(-)
-
-diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
-index 14a9d36b25b8..70a596c10306 100644
---- a/drivers/md/raid1.c
-+++ b/drivers/md/raid1.c
-@@ -1831,9 +1831,9 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
- p->rdev = repl;
- conf->mirrors[conf->raid_disks + number].rdev = NULL;
- unfreeze_array(conf);
-- clear_bit(WantReplacement, &rdev->flags);
-- } else
-- clear_bit(WantReplacement, &rdev->flags);
-+ }
-+
-+ clear_bit(WantReplacement, &rdev->flags);
- err = md_integrity_register(mddev);
- }
- abort:
-diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
-index 5de951bcd24c..2883b720a265 100644
---- a/drivers/md/raid10.c
-+++ b/drivers/md/raid10.c
-@@ -1874,13 +1874,9 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
- * but will never see neither -- if they are careful.
- */
- p->replacement = NULL;
-- clear_bit(WantReplacement, &rdev->flags);
-- } else
-- /* We might have just remove the Replacement as faulty
-- * Clear the flag just in case
-- */
-- clear_bit(WantReplacement, &rdev->flags);
-+ }
-
-+ clear_bit(WantReplacement, &rdev->flags);
- err = md_integrity_register(mddev);
-
- abort:
-diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
-index 356cd9c7c753..3d971e5a1b0e 100644
---- a/drivers/md/raid5.c
-+++ b/drivers/md/raid5.c
-@@ -7603,15 +7603,12 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
- * but will never see neither - if they are careful
- */
- p->replacement = NULL;
-- clear_bit(WantReplacement, &rdev->flags);
-
- if (!err)
- err = log_modify(conf, p->rdev, true);
-- } else
-- /* We might have just removed the Replacement as faulty-
-- * clear the bit just in case
-- */
-- clear_bit(WantReplacement, &rdev->flags);
-+ }
-+
-+ clear_bit(WantReplacement, &rdev->flags);
- abort:
-
- print_raid5_conf(conf);
---
-2.12.0
-
diff --git a/patches.drivers/0199-md-raid1-Use-a-new-variable-to-count-flighting-sync-.patch b/patches.drivers/0199-md-raid1-Use-a-new-variable-to-count-flighting-sync-.patch
deleted file mode 100644
index e6a10f3e54..0000000000
--- a/patches.drivers/0199-md-raid1-Use-a-new-variable-to-count-flighting-sync-.patch
+++ /dev/null
@@ -1,81 +0,0 @@
-From: Xiao Ni <xni@redhat.com>
-Date: Thu, 27 Apr 2017 16:28:49 +0800
-Subject: [PATCH] md/raid1: Use a new variable to count flighting sync requests
-Git-commit: 43ac9b84a399bc10210a2d9f4e0778b7c6059c07
-Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git
-Patch-mainline: Queued in subsystem maintainer repository
-References: FATE#321488
-
-In new barrier codes, raise_barrier waits if conf->nr_pending[idx] is not zero.
-After all the conditions are true, the resync request can go on be handled. But
-it adds conf->nr_pending[idx] again. The next resync request hit the same bucket
-idx need to wait the resync request which is submitted before. The performance
-of resync/recovery is degraded.
-So we should use a new variable to count sync requests which are in flight.
-
-I did a simple test:
-1. Without the patch, create a raid1 with two disks. The resync speed:
-Device: rrqm/s wrqm/s r/s w/s rMB/s wMB/s avgrq-sz avgqu-sz await r_await w_await svctm %util
-sdb 0.00 0.00 166.00 0.00 10.38 0.00 128.00 0.03 0.20 0.20 0.00 0.19 3.20
-sdc 0.00 0.00 0.00 166.00 0.00 10.38 128.00 0.96 5.77 0.00 5.77 5.75 95.50
-2. With the patch, the result is:
-sdb 2214.00 0.00 766.00 0.00 185.69 0.00 496.46 2.80 3.66 3.66 0.00 1.03 79.10
-sdc 0.00 2205.00 0.00 769.00 0.00 186.44 496.52 5.25 6.84 0.00 6.84 1.30 100.10
-
-Suggested-by: Shaohua Li <shli@kernel.org>
-Signed-off-by: Xiao Ni <xni@redhat.com>
-Acked-by: Coly Li <colyli@suse.de>
-Signed-off-by: Shaohua Li <shli@fb.com>
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/raid1.c | 7 ++++---
- drivers/md/raid1.h | 1 +
- 2 files changed, 5 insertions(+), 3 deletions(-)
-
-diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
-index 70a596c10306..5d19c1a7d90a 100644
---- a/drivers/md/raid1.c
-+++ b/drivers/md/raid1.c
-@@ -885,7 +885,7 @@ static void raise_barrier(struct r1conf *conf, sector_t sector_nr)
- atomic_read(&conf->barrier[idx]) < RESYNC_DEPTH,
- conf->resync_lock);
-
-- atomic_inc(&conf->nr_pending[idx]);
-+ atomic_inc(&conf->nr_sync_pending);
- spin_unlock_irq(&conf->resync_lock);
- }
-
-@@ -896,7 +896,7 @@ static void lower_barrier(struct r1conf *conf, sector_t sector_nr)
- BUG_ON(atomic_read(&conf->barrier[idx]) <= 0);
-
- atomic_dec(&conf->barrier[idx]);
-- atomic_dec(&conf->nr_pending[idx]);
-+ atomic_dec(&conf->nr_sync_pending);
- wake_up(&conf->wait_barrier);
- }
-
-@@ -1033,7 +1033,8 @@ static int get_unqueued_pending(struct r1conf *conf)
- {
- int idx, ret;
-
-- for (ret = 0, idx = 0; idx < BARRIER_BUCKETS_NR; idx++)
-+ ret = atomic_read(&conf->nr_sync_pending);
-+ for (idx = 0; idx < BARRIER_BUCKETS_NR; idx++)
- ret += atomic_read(&conf->nr_pending[idx]) -
- atomic_read(&conf->nr_queued[idx]);
-
-diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
-index b0ab0da6e39e..c8894ef1e9d2 100644
---- a/drivers/md/raid1.h
-+++ b/drivers/md/raid1.h
-@@ -84,6 +84,7 @@ struct r1conf {
- */
- wait_queue_head_t wait_barrier;
- spinlock_t resync_lock;
-+ atomic_t nr_sync_pending;
- atomic_t *nr_pending;
- atomic_t *nr_waiting;
- atomic_t *nr_queued;
---
-2.12.0
-
diff --git a/patches.drivers/0310-block-bio-pass-bvec-table-to-bio_init.patch b/patches.drivers/0310-block-bio-pass-bvec-table-to-bio_init.patch
index 8fff324ae6..8197dc3bc7 100644
--- a/patches.drivers/0310-block-bio-pass-bvec-table-to-bio_init.patch
+++ b/patches.drivers/0310-block-bio-pass-bvec-table-to-bio_init.patch
@@ -233,7 +233,7 @@ Signed-off-by: Hannes Reinecke <hare@suse.de>
mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset;
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
-@@ -2821,7 +2821,7 @@ int r5l_init_log(struct r5conf *conf, st
+@@ -2825,7 +2825,7 @@ int r5l_init_log(struct r5conf *conf, st
INIT_LIST_HEAD(&log->io_end_ios);
INIT_LIST_HEAD(&log->flushing_ios);
INIT_LIST_HEAD(&log->finished_ios);
@@ -244,7 +244,7 @@ Signed-off-by: Hannes Reinecke <hare@suse.de>
if (!log->io_kc)
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
-@@ -2142,13 +2142,8 @@ static struct stripe_head *alloc_stripe(
+@@ -2038,13 +2038,8 @@ static struct stripe_head *alloc_stripe(
for (i = 0; i < disks; i++) {
struct r5dev *dev = &sh->dev[i];
@@ -258,8 +258,8 @@ Signed-off-by: Hannes Reinecke <hare@suse.de>
+ bio_init(&dev->req, &dev->vec, 1);
+ bio_init(&dev->rreq, &dev->rvec, 1);
}
- }
- return sh;
+
+ if (raid5_has_ppl(conf)) {
--- a/drivers/nvme/target/io-cmd.c
+++ b/drivers/nvme/target/io-cmd.c
@@ -37,9 +37,7 @@ static void nvmet_inline_bio_init(struct
diff --git a/patches.suse/0001-raid5-separate-header-for-log-functions.patch b/patches.suse/0001-raid5-separate-header-for-log-functions.patch
index 55a39469f4..a92e5e4544 100644
--- a/patches.suse/0001-raid5-separate-header-for-log-functions.patch
+++ b/patches.suse/0001-raid5-separate-header-for-log-functions.patch
@@ -46,9 +46,9 @@ Signed-off-by: Coly Li <colyli@suse.com>
{
struct r5conf *conf = sh->raid_conf;
int pages = 0;
-@@ -2785,6 +2785,10 @@ int r5l_init_log(struct r5conf *conf, st
+@@ -2784,6 +2784,10 @@ void r5c_update_on_rdev_error(struct mdd
+ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
{
- struct request_queue *q = bdev_get_queue(rdev->bdev);
struct r5l_log *log;
+ char b[BDEVNAME_SIZE];
+
@@ -57,7 +57,7 @@ Signed-off-by: Coly Li <colyli@suse.com>
if (PAGE_SIZE != 4096)
return -EINVAL;
-@@ -2887,8 +2891,13 @@ io_kc:
+@@ -2886,8 +2890,13 @@ io_kc:
return -EINVAL;
}
@@ -164,9 +164,9 @@ Signed-off-by: Coly Li <colyli@suse.com>
#include "bitmap.h"
+#include "raid5-log.h"
- #define UNSUPPORTED_MDDEV_FLAGS (1L << MD_FAILFAST_SUPPORTED)
-
-@@ -982,18 +983,8 @@ static void ops_run_io(struct stripe_hea
+ #define cpu_to_group(cpu) cpu_to_node(cpu)
+ #define ANY_GROUP NUMA_NO_NODE
+@@ -980,18 +981,8 @@ static void ops_run_io(struct stripe_hea
might_sleep();
@@ -187,7 +187,7 @@ Signed-off-by: Coly Li <colyli@suse.com>
should_defer = conf->batch_bio_dispatch && conf->group_cnt;
-@@ -3329,7 +3320,7 @@ handle_failed_stripe(struct r5conf *conf
+@@ -3332,7 +3323,7 @@ handle_failed_stripe(struct r5conf *conf
if (bi)
bitmap_end = 1;
@@ -196,7 +196,7 @@ Signed-off-by: Coly Li <colyli@suse.com>
if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
wake_up(&conf->wait_for_overlap);
-@@ -3748,7 +3739,7 @@ returnbi:
+@@ -3751,7 +3742,7 @@ returnbi:
discard_pending = 1;
}
@@ -205,7 +205,7 @@ Signed-off-by: Coly Li <colyli@suse.com>
if (!discard_pending &&
test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) {
-@@ -4738,7 +4729,7 @@ static void handle_stripe(struct stripe_
+@@ -4741,7 +4732,7 @@ static void handle_stripe(struct stripe_
if (s.just_cached)
r5c_handle_cached_data_endio(conf, sh, disks, &s.return_bi);
@@ -214,7 +214,7 @@ Signed-off-by: Coly Li <colyli@suse.com>
/* Now we might consider reading some blocks, either to check/generate
* parity, or to satisfy requests
-@@ -6139,7 +6130,7 @@ static int handle_active_stripes(struct
+@@ -6142,7 +6133,7 @@ static int handle_active_stripes(struct
for (i = 0; i < batch_size; i++)
handle_stripe(batch[i]);
@@ -223,7 +223,7 @@ Signed-off-by: Coly Li <colyli@suse.com>
cond_resched();
-@@ -6717,8 +6708,8 @@ static void free_conf(struct r5conf *con
+@@ -6720,8 +6711,8 @@ static void free_conf(struct r5conf *con
{
int i;
@@ -234,7 +234,7 @@ Signed-off-by: Coly Li <colyli@suse.com>
if (conf->shrinker.nr_deferred)
unregister_shrinker(&conf->shrinker);
-@@ -7443,13 +7434,8 @@ static int raid5_run(struct mddev *mddev
+@@ -7446,13 +7437,8 @@ static int raid5_run(struct mddev *mddev
blk_queue_max_hw_sectors(mddev->queue, UINT_MAX);
}
@@ -250,7 +250,7 @@ Signed-off-by: Coly Li <colyli@suse.com>
return 0;
abort:
-@@ -7563,17 +7549,13 @@ static int raid5_remove_disk(struct mdde
+@@ -7566,17 +7552,13 @@ static int raid5_remove_disk(struct mdde
print_raid5_conf(conf);
if (test_bit(Journal, &rdev->flags) && conf->log) {
@@ -269,7 +269,7 @@ Signed-off-by: Coly Li <colyli@suse.com>
return 0;
}
if (rdev == p->rdev)
-@@ -7642,7 +7624,6 @@ static int raid5_add_disk(struct mddev *
+@@ -7645,7 +7627,6 @@ static int raid5_add_disk(struct mddev *
int last = conf->raid_disks - 1;
if (test_bit(Journal, &rdev->flags)) {
@@ -277,7 +277,7 @@ Signed-off-by: Coly Li <colyli@suse.com>
if (conf->log)
return -EBUSY;
-@@ -7651,9 +7632,7 @@ static int raid5_add_disk(struct mddev *
+@@ -7654,9 +7635,7 @@ static int raid5_add_disk(struct mddev *
* The array is in readonly mode if journal is missing, so no
* write requests running. We should be safe
*/
diff --git a/patches.suse/0002-md-cluster-remove-a-disk-asynchronously-from-cluster.patch b/patches.suse/0002-md-cluster-remove-a-disk-asynchronously-from-cluster.patch
index 6e23bd4fdb..baf4cf7b1e 100644
--- a/patches.suse/0002-md-cluster-remove-a-disk-asynchronously-from-cluster.patch
+++ b/patches.suse/0002-md-cluster-remove-a-disk-asynchronously-from-cluster.patch
@@ -20,14 +20,16 @@ Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: NeilBrown <neilb@suse.com>
Acked-by: Guoqing Jiang <gqjiang@suse.com>
---
- drivers/md/md-cluster.c | 7 +++++--
- drivers/md/md.c | 12 ++++++++++++
- drivers/md/md.h | 1 +
+ drivers/md/md-cluster.c | 7 +++++--
+ drivers/md/md.c | 12 ++++++++++++
+ drivers/md/md.h | 1 +
3 files changed, 18 insertions(+), 2 deletions(-)
+diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
+index ba2a976..2d75219 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
-@@ -440,8 +440,11 @@ static void process_remove_disk(struct m
+@@ -442,8 +442,11 @@ static void process_remove_disk(struct mddev *mddev, struct cluster_msg *msg)
struct md_rdev *rdev = md_find_rdev_nr_rcu(mddev,
le32_to_cpu(msg->raid_slot));
@@ -41,9 +43,11 @@ Acked-by: Guoqing Jiang <gqjiang@suse.com>
else
pr_warn("%s: %d Could not find disk(%d) to REMOVE\n",
__func__, __LINE__, le32_to_cpu(msg->raid_slot));
+diff --git a/drivers/md/md.c b/drivers/md/md.c
+index 3bc5f01..b70f41e 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
-@@ -8339,6 +8339,18 @@ void md_check_recovery(struct mddev *mdd
+@@ -8445,6 +8445,18 @@ void md_check_recovery(struct mddev *mddev)
goto unlock;
}
@@ -62,13 +66,18 @@ Acked-by: Guoqing Jiang <gqjiang@suse.com>
if (!mddev->external) {
int did_change = 0;
spin_lock(&mddev->lock);
+diff --git a/drivers/md/md.h b/drivers/md/md.h
+index 8f3dd3a..792854e 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
-@@ -162,6 +162,7 @@ enum flag_bits {
- * Usually, this device should be faster
- * than other devices in the array
+@@ -180,6 +180,7 @@ enum flag_bits {
+ * so it is save to remove without
+ * another call.
*/
+ ClusterRemove,
};
static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
+--
+2.6.2
+
diff --git a/patches.suse/0002-md-superblock-changes-for-PPL.patch b/patches.suse/0002-md-superblock-changes-for-PPL.patch
index 4ee2b65a19..7289628c2e 100644
--- a/patches.suse/0002-md-superblock-changes-for-PPL.patch
+++ b/patches.suse/0002-md-superblock-changes-for-PPL.patch
@@ -1,4 +1,3 @@
-From ea0213e0c7cc1c1b52badf27bd7db4f50a67baaa Mon Sep 17 00:00:00 2001
From: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Date: Thu, 9 Mar 2017 09:59:57 +0100
Subject: [PATCH] md: superblock changes for PPL
@@ -15,20 +14,17 @@ to mddev->flags to indicate that PPL is enabled on an array.
Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Signed-off-by: Shaohua Li <shli@fb.com>
-Signed-off-by: Coly Li <colyli@suse.de>
+Acked-by: NeilBrown <neilb@suse.com>
+
---
- drivers/md/md.c | 19 +++++++++++++++++++
- drivers/md/md.h | 8 ++++++++
- drivers/md/raid0.c | 3 ++-
- drivers/md/raid1.c | 3 ++-
- include/uapi/linux/raid/md_p.h | 18 ++++++++++++++----
- 5 files changed, 45 insertions(+), 6 deletions(-)
+ drivers/md/md.c | 19 +++++++++++++++++++
+ drivers/md/md.h | 8 ++++++++
+ include/uapi/linux/raid/md_p.h | 18 ++++++++++++++----
+ 3 files changed, 41 insertions(+), 4 deletions(-)
-diff --git a/drivers/md/md.c b/drivers/md/md.c
-index 72ef3f18ac9a..d57045996d35 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
-@@ -1507,6 +1507,12 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
+@@ -1524,6 +1524,12 @@ static int super_1_load(struct md_rdev *
} else if (sb->bblog_offset != 0)
rdev->badblocks.shift = 0;
@@ -41,7 +37,7 @@ index 72ef3f18ac9a..d57045996d35 100644
if (!refdev) {
ret = 1;
} else {
-@@ -1619,6 +1625,13 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
+@@ -1636,6 +1642,13 @@ static int super_1_validate(struct mddev
if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
set_bit(MD_HAS_JOURNAL, &mddev->flags);
@@ -55,7 +51,7 @@ index 72ef3f18ac9a..d57045996d35 100644
} else if (mddev->pers == NULL) {
/* Insist of good event counter while assembling, except for
* spares (which don't need an event count) */
-@@ -1832,6 +1845,12 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
+@@ -1849,6 +1862,12 @@ retry:
if (test_bit(MD_HAS_JOURNAL, &mddev->flags))
sb->feature_map |= cpu_to_le32(MD_FEATURE_JOURNAL);
@@ -68,8 +64,6 @@ index 72ef3f18ac9a..d57045996d35 100644
rdev_for_each(rdev2, mddev) {
i = rdev2->desc_nr;
if (test_bit(Faulty, &rdev2->flags))
-diff --git a/drivers/md/md.h b/drivers/md/md.h
-index 1c00160b09f9..a7b2f16452c4 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -122,6 +122,13 @@ struct md_rdev {
@@ -86,44 +80,14 @@ index 1c00160b09f9..a7b2f16452c4 100644
};
enum flag_bits {
Faulty, /* device is known to have a fault */
-@@ -226,6 +233,7 @@ enum mddev_flags {
- * supported as calls to md_error() will
- * never cause the array to become failed.
+@@ -196,6 +203,7 @@ enum flag_bits {
+ * it didn't fail, so don't use FailFast
+ * any more for metadata
*/
+ MD_HAS_PPL, /* The raid array has PPL feature set */
};
- enum mddev_sb_flags {
-diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
-index 93347ca7c7a6..56f70c3ad37c 100644
---- a/drivers/md/raid0.c
-+++ b/drivers/md/raid0.c
-@@ -29,7 +29,8 @@
- #define UNSUPPORTED_MDDEV_FLAGS \
- ((1L << MD_HAS_JOURNAL) | \
- (1L << MD_JOURNAL_CLEAN) | \
-- (1L << MD_FAILFAST_SUPPORTED))
-+ (1L << MD_FAILFAST_SUPPORTED) |\
-+ (1L << MD_HAS_PPL))
-
- static int raid0_congested(struct mddev *mddev, int bits)
- {
-diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
-index a34f58772022..730e57259af9 100644
---- a/drivers/md/raid1.c
-+++ b/drivers/md/raid1.c
-@@ -47,7 +47,8 @@
-
- #define UNSUPPORTED_MDDEV_FLAGS \
- ((1L << MD_HAS_JOURNAL) | \
-- (1L << MD_JOURNAL_CLEAN))
-+ (1L << MD_JOURNAL_CLEAN) | \
-+ (1L << MD_HAS_PPL))
-
- /*
- * Number of guaranteed r1bios in case of extreme VM load:
-diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h
-index 9930f3e9040f..fe2112810c43 100644
+ static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
--- a/include/uapi/linux/raid/md_p.h
+++ b/include/uapi/linux/raid/md_p.h
@@ -242,10 +242,18 @@ struct mdp_superblock_1 {
@@ -165,6 +129,3 @@ index 9930f3e9040f..fe2112810c43 100644
)
struct r5l_payload_header {
---
-2.12.0
-
diff --git a/patches.suse/0003-raid5-ppl-Partial-Parity-Log-write-logging-implement.patch b/patches.suse/0003-raid5-ppl-Partial-Parity-Log-write-logging-implement.patch
index b90ed99ca4..3d83e799ec 100644
--- a/patches.suse/0003-raid5-ppl-Partial-Parity-Log-write-logging-implement.patch
+++ b/patches.suse/0003-raid5-ppl-Partial-Parity-Log-write-logging-implement.patch
@@ -1,4 +1,3 @@
-From 3418d036c81dcb604b7c7c71b209d5890a8418aa Mon Sep 17 00:00:00 2001
From: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Date: Thu, 9 Mar 2017 09:59:59 +0100
Subject: [PATCH] raid5-ppl: Partial Parity Log write logging implementation
@@ -41,22 +40,20 @@ implemented.
Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Signed-off-by: Shaohua Li <shli@fb.com>
-Signed-off-by: Coly Li <colyli@suse.de>
+Acked-by: NeilBrown <neilb@suse.com>
+
---
- Documentation/md/raid5-ppl.txt | 44 +++
- drivers/md/Makefile | 2 +-
- drivers/md/raid5-log.h | 24 ++
- drivers/md/raid5-ppl.c | 703 +++++++++++++++++++++++++++++++++++++++++
- drivers/md/raid5.c | 64 +++-
- drivers/md/raid5.h | 10 +-
- include/uapi/linux/raid/md_p.h | 27 ++
- 7 files changed, 869 insertions(+), 5 deletions(-)
+ Documentation/md/raid5-ppl.txt | 44 ++
+ drivers/md/Makefile | 2
+ drivers/md/raid5-log.h | 24 +
+ drivers/md/raid5-ppl.c | 703 +++++++++++++++++++++++++++++++++++++++++
+ drivers/md/raid5.c | 64 +++
+ drivers/md/raid5.h | 16
+ include/uapi/linux/raid/md_p.h | 27 +
+ 7 files changed, 875 insertions(+), 5 deletions(-)
create mode 100644 Documentation/md/raid5-ppl.txt
create mode 100644 drivers/md/raid5-ppl.c
-diff --git a/Documentation/md/raid5-ppl.txt b/Documentation/md/raid5-ppl.txt
-new file mode 100644
-index 000000000000..127072b09363
--- /dev/null
+++ b/Documentation/md/raid5-ppl.txt
@@ -0,0 +1,44 @@
@@ -104,11 +101,9 @@ index 000000000000..127072b09363
+Currently, volatile write-back cache should be disabled on all member drives
+when using PPL. Otherwise it cannot guarantee consistency in case of power
+failure.
-diff --git a/drivers/md/Makefile b/drivers/md/Makefile
-index 3cbda1af87a0..4d48714ccc6b 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
-@@ -18,7 +18,7 @@ dm-cache-cleaner-y += dm-cache-policy-cleaner.o
+@@ -18,7 +18,7 @@ dm-cache-cleaner-y += dm-cache-policy-cl
dm-era-y += dm-era-target.o
dm-verity-y += dm-verity-target.o
md-mod-y += md.o bitmap.o
@@ -117,17 +112,15 @@ index 3cbda1af87a0..4d48714ccc6b 100644
# Note: link order is important. All raid personalities
# and must come before md.o, as they each initialise
-diff --git a/drivers/md/raid5-log.h b/drivers/md/raid5-log.h
-index 2da4bd3bbd79..a67fb58513b9 100644
--- a/drivers/md/raid5-log.h
+++ b/drivers/md/raid5-log.h
-@@ -31,6 +31,20 @@ extern struct md_sysfs_entry r5c_journal_mode;
+@@ -31,6 +31,20 @@ extern struct md_sysfs_entry r5c_journal
extern void r5c_update_on_rdev_error(struct mddev *mddev);
extern bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect);
+extern struct dma_async_tx_descriptor *
+ops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu,
-+ struct dma_async_tx_descriptor *tx);
++ struct dma_async_tx_descriptor *tx);
+extern int ppl_init_log(struct r5conf *conf);
+extern void ppl_exit_log(struct r5conf *conf);
+extern int ppl_write_stripe(struct r5conf *conf, struct stripe_head *sh);
@@ -142,7 +135,7 @@ index 2da4bd3bbd79..a67fb58513b9 100644
static inline int log_stripe(struct stripe_head *sh, struct stripe_head_state *s)
{
struct r5conf *conf = sh->raid_conf;
-@@ -45,6 +59,8 @@ static inline int log_stripe(struct stripe_head *sh, struct stripe_head_state *s
+@@ -45,6 +59,8 @@ static inline int log_stripe(struct stri
/* caching phase */
return r5c_cache_data(conf->log, sh);
}
@@ -151,7 +144,7 @@ index 2da4bd3bbd79..a67fb58513b9 100644
}
return -EAGAIN;
-@@ -56,24 +72,32 @@ static inline void log_stripe_write_finished(struct stripe_head *sh)
+@@ -56,24 +72,32 @@ static inline void log_stripe_write_fini
if (conf->log)
r5l_stripe_write_finished(sh);
@@ -184,9 +177,6 @@ index 2da4bd3bbd79..a67fb58513b9 100644
return 0;
}
-diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
-new file mode 100644
-index 000000000000..db5b72b11594
--- /dev/null
+++ b/drivers/md/raid5-ppl.c
@@ -0,0 +1,703 @@
@@ -893,11 +883,9 @@ index 000000000000..db5b72b11594
+ __ppl_exit_log(ppl_conf);
+ return ret;
+}
-diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
-index f575f40d2acb..6b86e0826afe 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
-@@ -482,6 +482,11 @@ static void shrink_buffers(struct stripe_head *sh)
+@@ -465,6 +465,11 @@ static void shrink_buffers(struct stripe
sh->dev[i].page = NULL;
put_page(p);
}
@@ -909,7 +897,7 @@ index f575f40d2acb..6b86e0826afe 100644
}
static int grow_buffers(struct stripe_head *sh, gfp_t gfp)
-@@ -498,6 +503,13 @@ static int grow_buffers(struct stripe_head *sh, gfp_t gfp)
+@@ -481,6 +486,13 @@ static int grow_buffers(struct stripe_he
sh->dev[i].page = page;
sh->dev[i].orig_page = page;
}
@@ -923,7 +911,7 @@ index f575f40d2acb..6b86e0826afe 100644
return 0;
}
-@@ -746,7 +758,7 @@ static bool stripe_can_batch(struct stripe_head *sh)
+@@ -729,7 +741,7 @@ static bool stripe_can_batch(struct stri
{
struct r5conf *conf = sh->raid_conf;
@@ -932,7 +920,7 @@ index f575f40d2acb..6b86e0826afe 100644
return false;
return test_bit(STRIPE_BATCH_READY, &sh->state) &&
!test_bit(STRIPE_BITMAP_PENDING, &sh->state) &&
-@@ -2093,6 +2105,9 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
+@@ -2075,6 +2087,9 @@ static void raid_run_ops(struct stripe_h
async_tx_ack(tx);
}
@@ -942,7 +930,7 @@ index f575f40d2acb..6b86e0826afe 100644
if (test_bit(STRIPE_OP_PREXOR, &ops_request)) {
if (level < 6)
tx = ops_run_prexor5(sh, percpu, tx);
-@@ -3168,6 +3183,12 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
+@@ -3155,6 +3170,12 @@ schedule_reconstruction(struct stripe_he
s->locked++;
}
@@ -955,7 +943,7 @@ index f575f40d2acb..6b86e0826afe 100644
pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n",
__func__, (unsigned long long)sh->sector,
s->locked, s->ops_request);
-@@ -3215,6 +3236,36 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx,
+@@ -3202,6 +3223,36 @@ static int add_stripe_bio(struct stripe_
if (*bip && (*bip)->bi_iter.bi_sector < bio_end_sector(bi))
goto overlap;
@@ -992,7 +980,7 @@ index f575f40d2acb..6b86e0826afe 100644
if (!forwrite || previous)
clear_bit(STRIPE_BATCH_READY, &sh->state);
-@@ -7208,6 +7259,13 @@ static int raid5_run(struct mddev *mddev)
+@@ -7218,6 +7269,13 @@ static int raid5_run(struct mddev *mddev
BUG_ON(mddev->delta_disks != 0);
}
@@ -1006,7 +994,7 @@ index f575f40d2acb..6b86e0826afe 100644
if (mddev->private == NULL)
conf = setup_conf(mddev);
else
-@@ -7689,7 +7747,7 @@ static int raid5_resize(struct mddev *mddev, sector_t sectors)
+@@ -7699,7 +7757,7 @@ static int raid5_resize(struct mddev *md
sector_t newsize;
struct r5conf *conf = mddev->private;
@@ -1015,7 +1003,7 @@ index f575f40d2acb..6b86e0826afe 100644
return -EINVAL;
sectors &= ~((sector_t)conf->chunk_sectors - 1);
newsize = raid5_size(mddev, sectors, mddev->raid_disks);
-@@ -7740,7 +7798,7 @@ static int check_reshape(struct mddev *mddev)
+@@ -7750,7 +7808,7 @@ static int check_reshape(struct mddev *m
{
struct r5conf *conf = mddev->private;
@@ -1024,11 +1012,9 @@ index f575f40d2acb..6b86e0826afe 100644
return -EINVAL;
if (mddev->delta_disks == 0 &&
mddev->new_layout == mddev->layout &&
-diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
-index 6dd295a80ee1..ba5b7a3790af 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
-@@ -224,10 +224,16 @@ struct stripe_head {
+@@ -224,8 +224,14 @@ struct stripe_head {
spinlock_t batch_lock; /* only header's lock is useful */
struct list_head batch_list; /* protected by head's batch lock*/
@@ -1039,13 +1025,11 @@ index 6dd295a80ee1..ba5b7a3790af 100644
+ };
+
struct list_head log_list;
- sector_t log_start; /* first meta block on the journal */
- struct list_head r5c; /* for r5c_cache->stripe_in_journal */
+
+ struct page *ppl_page; /* partial parity of this stripe */
+ sector_t log_start; /* first meta block on the journal */
+ struct list_head r5c; /* for r5c_cache->stripe_in_journal */
/**
- * struct stripe_operations
- * @target - STRIPE_OP_COMPUTE_BLK target
@@ -400,6 +406,7 @@ enum {
STRIPE_OP_BIODRAIN,
STRIPE_OP_RECONSTRUCT,
@@ -1054,7 +1038,20 @@ index 6dd295a80ee1..ba5b7a3790af 100644
};
/*
-@@ -696,6 +703,7 @@ struct r5conf {
+@@ -525,6 +532,12 @@ static inline void raid5_set_bi_stripes(
+ atomic_set(segments, cnt);
+ }
+
++#define NR_STRIPES 256
++#define STRIPE_SIZE PAGE_SIZE
++#define STRIPE_SHIFT (PAGE_SHIFT - 9)
++#define STRIPE_SECTORS (STRIPE_SIZE>>9)
++
++
+ /* NOTE NR_STRIPE_HASH_LOCKS must remain below 64.
+ * This is because we sometimes take all the spinlocks
+ * and creating that much locking depth can cause
+@@ -696,6 +709,7 @@ struct r5conf {
int group_cnt;
int worker_cnt_per_group;
struct r5l_log *log;
@@ -1062,8 +1059,6 @@ index 6dd295a80ee1..ba5b7a3790af 100644
spinlock_t pending_bios_lock;
bool batch_bio_dispatch;
-diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h
-index fe2112810c43..d9a1ead867b9 100644
--- a/include/uapi/linux/raid/md_p.h
+++ b/include/uapi/linux/raid/md_p.h
@@ -398,4 +398,31 @@ struct r5l_meta_block {
@@ -1098,6 +1093,3 @@ index fe2112810c43..d9a1ead867b9 100644
+} __attribute__ ((__packed__));
+
#endif
---
-2.12.0
-
diff --git a/patches.suse/0011-raid5-ppl-use-resize_stripes-when-enabling-or-disabl.patch b/patches.suse/0011-raid5-ppl-use-resize_stripes-when-enabling-or-disabl.patch
index f52098c406..60acf31105 100644
--- a/patches.suse/0011-raid5-ppl-use-resize_stripes-when-enabling-or-disabl.patch
+++ b/patches.suse/0011-raid5-ppl-use-resize_stripes-when-enabling-or-disabl.patch
@@ -43,7 +43,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
--- a/drivers/md/raid5-log.h
+++ b/drivers/md/raid5-log.h
-@@ -93,11 +93,12 @@ static inline void log_exit(struct r5con
+@@ -85,11 +85,12 @@ static inline void log_exit(struct r5con
ppl_exit_log(conf);
}
@@ -79,7 +79,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
err:
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
-@@ -467,11 +467,6 @@ static void shrink_buffers(struct stripe
+@@ -464,11 +464,6 @@ static void shrink_buffers(struct stripe
sh->dev[i].page = NULL;
put_page(p);
}
@@ -91,7 +91,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
}
static int grow_buffers(struct stripe_head *sh, gfp_t gfp)
-@@ -489,12 +484,6 @@ static int grow_buffers(struct stripe_he
+@@ -486,12 +481,6 @@ static int grow_buffers(struct stripe_he
sh->dev[i].orig_page = page;
}
@@ -104,7 +104,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
return 0;
}
-@@ -2131,8 +2120,15 @@ static void raid_run_ops(struct stripe_h
+@@ -2022,8 +2011,15 @@ static void raid_run_ops(struct stripe_h
put_cpu();
}
@@ -121,7 +121,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
{
struct stripe_head *sh;
int i;
-@@ -2146,6 +2142,7 @@ static struct stripe_head *alloc_stripe(
+@@ -2037,6 +2033,7 @@ static struct stripe_head *alloc_stripe(
INIT_LIST_HEAD(&sh->r5c);
INIT_LIST_HEAD(&sh->log_list);
atomic_set(&sh->count, 1);
@@ -129,9 +129,9 @@ Signed-off-by: Coly Li <colyli@suse.de>
sh->log_start = MaxSector;
for (i = 0; i < disks; i++) {
struct r5dev *dev = &sh->dev[i];
-@@ -2153,6 +2150,14 @@ static struct stripe_head *alloc_stripe(
- bio_init(&dev->req, &dev->vec, 1);
- bio_init(&dev->rreq, &dev->rvec, 1);
+@@ -2049,6 +2046,14 @@ static struct stripe_head *alloc_stripe(
+ dev->rreq.bi_io_vec = &dev->rvec;
+ dev->rreq.bi_max_vecs = 1;
}
+
+ if (raid5_has_ppl(conf)) {
@@ -144,7 +144,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
}
return sh;
}
-@@ -2160,15 +2165,13 @@ static int grow_one_stripe(struct r5conf
+@@ -2056,15 +2061,13 @@ static int grow_one_stripe(struct r5conf
{
struct stripe_head *sh;
@@ -162,7 +162,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
return 0;
}
sh->hash_lock_index =
-@@ -2313,9 +2316,6 @@ static int resize_stripes(struct r5conf
+@@ -2209,9 +2212,6 @@ static int resize_stripes(struct r5conf
int i;
int hash, cnt;
@@ -172,7 +172,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
err = md_allow_write(conf->mddev);
if (err)
return err;
-@@ -2331,11 +2331,10 @@ static int resize_stripes(struct r5conf
+@@ -2227,11 +2227,10 @@ static int resize_stripes(struct r5conf
mutex_lock(&conf->cache_size_mutex);
for (i = conf->max_nr_stripes; i; i--) {
@@ -185,7 +185,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
list_add(&nsh->lru, &newstripes);
}
if (i) {
-@@ -2343,7 +2342,7 @@ static int resize_stripes(struct r5conf
+@@ -2239,7 +2238,7 @@ static int resize_stripes(struct r5conf
while (!list_empty(&newstripes)) {
nsh = list_entry(newstripes.next, struct stripe_head, lru);
list_del(&nsh->lru);
@@ -194,7 +194,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
}
kmem_cache_destroy(sc);
mutex_unlock(&conf->cache_size_mutex);
-@@ -2369,7 +2368,7 @@ static int resize_stripes(struct r5conf
+@@ -2265,7 +2264,7 @@ static int resize_stripes(struct r5conf
nsh->dev[i].orig_page = osh->dev[i].page;
}
nsh->hash_lock_index = hash;
@@ -203,7 +203,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
cnt++;
if (cnt >= conf->max_nr_stripes / NR_STRIPE_HASH_LOCKS +
!!((conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS) > hash)) {
-@@ -2444,7 +2443,7 @@ static int drop_one_stripe(struct r5conf
+@@ -2340,7 +2339,7 @@ static int drop_one_stripe(struct r5conf
return 0;
BUG_ON(atomic_read(&sh->count));
shrink_buffers(sh);
@@ -212,7 +212,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
atomic_dec(&conf->active_stripes);
conf->max_nr_stripes--;
return 1;
-@@ -3167,7 +3166,7 @@ schedule_reconstruction(struct stripe_he
+@@ -3063,7 +3062,7 @@ schedule_reconstruction(struct stripe_he
s->locked++;
}
@@ -221,7 +221,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
test_bit(STRIPE_OP_BIODRAIN, &s->ops_request) &&
!test_bit(STRIPE_FULL_WRITE, &sh->state) &&
test_bit(R5_Insync, &sh->dev[pd_idx].flags))
-@@ -7495,7 +7494,7 @@ static int raid5_run(struct mddev *mddev
+@@ -7371,7 +7370,7 @@ static int raid5_run(struct mddev *mddev
blk_queue_max_hw_sectors(mddev->queue, UINT_MAX);
}
@@ -230,7 +230,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
goto abort;
return 0;
-@@ -7701,7 +7700,7 @@ static int raid5_add_disk(struct mddev *
+@@ -7577,7 +7576,7 @@ static int raid5_add_disk(struct mddev *
* The array is in readonly mode if journal is missing, so no
* write requests running. We should be safe
*/
@@ -239,7 +239,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
return 0;
}
if (mddev->recovery_disabled == conf->recovery_disabled)
-@@ -7851,6 +7850,9 @@ static int check_reshape(struct mddev *m
+@@ -7727,6 +7726,9 @@ static int check_reshape(struct mddev *m
mddev->chunk_sectors)
) < 0)
return -ENOMEM;
@@ -249,7 +249,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
return resize_stripes(conf, (conf->previous_raid_disks
+ mddev->delta_disks));
}
-@@ -8341,20 +8343,6 @@ static void *raid6_takeover(struct mddev
+@@ -8212,20 +8214,6 @@ static void *raid6_takeover(struct mddev
return setup_conf(mddev);
}
@@ -270,7 +270,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
static int raid5_change_consistency_policy(struct mddev *mddev, const char *buf)
{
struct r5conf *conf;
-@@ -8369,23 +8357,23 @@ static int raid5_change_consistency_poli
+@@ -8240,23 +8228,23 @@ static int raid5_change_consistency_poli
return -ENODEV;
}
diff --git a/patches.suse/md-expedite-hot-remove b/patches.suse/md-expedite-hot-remove
new file mode 100644
index 0000000000..3285ed7684
--- /dev/null
+++ b/patches.suse/md-expedite-hot-remove
@@ -0,0 +1,91 @@
+From: NeilBrown <neilb@suse.de>
+Subject: md/raid10: expedite the removal of lots of devices.
+Git-commit: d787be4092e27728cb4c012bee9762098ef3c662
+Patch-mainline: v4.8
+References: bnc#768084
+
+Currently each call to raid10_remove_disk() calls synchronize_rcu
+which can cause a delay of a few milliseconds. For 100 devices this is
+a few hundreds of milliseconds. For 1000 it is a few seconds.
+
+So hoist the 'synchronize_rcu()' up out of the loop. This requires us
+to call both before and after, but 2 isn't much worse than 1, but is
+lots better than 1000.
+
+Signed-off-by: Neil Brown <neilb@suse.de>
+
+---
+ drivers/md/md.c | 26 ++++++++++++++++++++------
+ drivers/md/md.h | 5 +++++
+ drivers/md/raid10.c | 3 ++-
+ 3 files changed, 27 insertions(+), 7 deletions(-)
+
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -8219,6 +8219,8 @@ static int remove_and_add_spares(struct
+ {
+ struct md_rdev *rdev;
+ int spares = 0;
++ int remove_some = 0;
++
+ int removed = 0;
+
+ rdev_for_each(rdev, mddev)
+@@ -8229,13 +8231,25 @@ static int remove_and_add_spares(struct
+ (!test_bit(In_sync, &rdev->flags) &&
+ !test_bit(Journal, &rdev->flags))) &&
+ atomic_read(&rdev->nr_pending)==0) {
+- if (mddev->pers->hot_remove_disk(
+- mddev, rdev) == 0) {
+- sysfs_unlink_rdev(mddev, rdev);
+- rdev->raid_disk = -1;
+- removed++;
+- }
++ remove_some = 1;
++ set_bit(RemoveSynchronised, &rdev->flags);
+ }
++ if (remove_some) {
++ synchronize_rcu();
++ /* Now we know that no-one will take a new reference */
++ list_for_each_entry(rdev, &mddev->disks, same_set)
++ if (test_bit(RemoveSynchronised, &rdev->flags)) {
++ if (mddev->pers->hot_remove_disk(
++ mddev, rdev) == 0) {
++ sysfs_unlink_rdev(mddev, rdev);
++ rdev->raid_disk = -1;
++ removed++;
++ }
++ clear_bit(RemoveSynchronised, &rdev->flags);
++ }
++ synchronize_rcu();
++ /* Now any temp reference that was taken is released */
++ }
+ if (removed && mddev->kobj.sd)
+ sysfs_notify(&mddev->kobj, NULL, "degraded");
+
+--- a/drivers/md/md.h
++++ b/drivers/md/md.h
+@@ -196,6 +196,11 @@ enum flag_bits {
+ * Usually, this device should be faster
+ * than other devices in the array
+ */
++ RemoveSynchronised, /* synchronize_rcu was called after
++ * This device was known to be faulty,
++ * so it is save to remove without
++ * another call.
++ */
+ };
+
+ #define BB_LEN_MASK (0x00000000000001FFULL)
+--- a/drivers/md/raid10.c
++++ b/drivers/md/raid10.c
+@@ -1845,7 +1845,8 @@ static int raid10_remove_disk(struct mdd
+ goto abort;
+ }
+ *rdevp = NULL;
+- synchronize_rcu();
++ if (!test_bit(RemoveSynchronised, &rdev->flags))
++ synchronize_rcu();
+ if (atomic_read(&rdev->nr_pending)) {
+ /* lost the race, try later */
+ err = -EBUSY;
diff --git a/series.conf b/series.conf
index e411cb1a90..c504201451 100644
--- a/series.conf
+++ b/series.conf
@@ -11308,6 +11308,7 @@
patches.suse/md-bitmap-unplug-fixes
patches.suse/blk-timeout-no-round
patches.suse/md-raid10-silence-conf
+ patches.suse/md-expedite-hot-remove
patches.suse/md-update-sb-async
# support md RAIDs with journal (fate#320291)
@@ -11581,6 +11582,19 @@
patches.drivers/0044-md-fail-if-mddev-bio_set-can-t-be-created.patch
patches.drivers/0045-raid5-only-dispatch-IO-from-raid5d-for-harddisk-raid.patch
patches.drivers/0046-md-raid5-sort-bios.patch
+ patches.suse/0001-raid5-separate-header-for-log-functions.patch
+ patches.suse/0002-md-superblock-changes-for-PPL.patch
+ patches.suse/0003-raid5-ppl-Partial-Parity-Log-write-logging-implement.patch
+ patches.suse/0004-md-add-sysfs-entries-for-PPL.patch
+ patches.suse/0005-raid5-ppl-load-and-recover-the-log.patch
+ patches.suse/0006-raid5-ppl-support-disk-hot-add-remove-with-PPL.patch
+ patches.suse/0007-raid5-ppl-runtime-PPL-enabling-or-disabling.patch
+ patches.suse/0008-raid5-ppl-silence-a-misleading-warning-message.patch
+ patches.suse/0009-md-raid5-use-consistency_policy-to-remove-journal-fe.patch
+ patches.suse/0010-raid5-ppl-move-no_mem_stripes-to-struct-ppl_conf.patch
+ patches.suse/0011-raid5-ppl-use-resize_stripes-when-enabling-or-disabl.patch
+ patches.suse/0012-raid5-ppl-partial-parity-calculation-optimization.patch
+ patches.suse/0013-raid5-ppl-use-a-single-mempool-for-ppl_io_unit-and-h.patch
# end part 1
##########################################################
@@ -12338,19 +12352,6 @@
patches.drivers/0098-md-cleanup-bio-op-flags-handling-in-raid1_write_requ.patch
patches.drivers/0099-block-simplify-blk_init_allocated_queue.patch
patches.drivers/0100-dm-remove-incomplete-BLOCK_PC-support.patch
- patches.suse/0001-raid5-separate-header-for-log-functions.patch
- patches.suse/0002-md-superblock-changes-for-PPL.patch
- patches.suse/0003-raid5-ppl-Partial-Parity-Log-write-logging-implement.patch
- patches.suse/0004-md-add-sysfs-entries-for-PPL.patch
- patches.suse/0005-raid5-ppl-load-and-recover-the-log.patch
- patches.suse/0006-raid5-ppl-support-disk-hot-add-remove-with-PPL.patch
- patches.suse/0007-raid5-ppl-runtime-PPL-enabling-or-disabling.patch
- patches.suse/0008-raid5-ppl-silence-a-misleading-warning-message.patch
- patches.suse/0009-md-raid5-use-consistency_policy-to-remove-journal-fe.patch
- patches.suse/0010-raid5-ppl-move-no_mem_stripes-to-struct-ppl_conf.patch
- patches.suse/0011-raid5-ppl-use-resize_stripes-when-enabling-or-disabl.patch
- patches.suse/0012-raid5-ppl-partial-parity-calculation-optimization.patch
- patches.suse/0013-raid5-ppl-use-a-single-mempool-for-ppl_io_unit-and-h.patch
# end part 2
patches.drivers/dax-Call-get_blocks-with-create-1-for-write-faults-t.patch
@@ -12484,8 +12485,6 @@
patches.drivers/0195-md-cluster-Fix-a-memleak-in-an-error-handling-path.patch
patches.drivers/0196-md-raid10-wait-up-frozen-array-in-handle_write_compl.patch
patches.drivers/0197-md-raid1-10-remove-unused-queue.patch
- patches.drivers/0198-md-clear-WantReplacement-once-disk-is-removed.patch
- patches.drivers/0199-md-raid1-Use-a-new-variable-to-count-flighting-sync-.patch
patches.fixes/jbd2-Fix-dbench4-performance-regression-for-nobarrie.patch
# end part 3