/*
* Copyright (C) 2010-2011 Neil Brown
- * Copyright (C) 2010-2015 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2010-2016 Red Hat, Inc. All rights reserved.
*
* This file is released under the GPL.
*/
#include <linux/device-mapper.h>
#define DM_MSG_PREFIX "raid"
-#define MAX_RAID_DEVICES 253 /* raid4/5/6 limit */
+#define MAX_RAID_DEVICES 253 /* md-raid kernel limit */
static bool devices_handle_discard_safely = false;
/*
* Flags for rs->ctr_flags field.
+ *
+ * 1 = no flag value
+ * 2 = flag with value
+ */
+#define CTR_FLAG_SYNC 0x1 /* 1 */ /* Not with raid0! */
+#define CTR_FLAG_NOSYNC 0x2 /* 1 */ /* Not with raid0! */
+#define CTR_FLAG_REBUILD 0x4 /* 2 */ /* Not with raid0! */
+#define CTR_FLAG_DAEMON_SLEEP 0x8 /* 2 */ /* Not with raid0! */
+#define CTR_FLAG_MIN_RECOVERY_RATE 0x10 /* 2 */ /* Not with raid0! */
+#define CTR_FLAG_MAX_RECOVERY_RATE 0x20 /* 2 */ /* Not with raid0! */
+#define CTR_FLAG_MAX_WRITE_BEHIND 0x40 /* 2 */ /* Only with raid1! */
+#define CTR_FLAG_WRITE_MOSTLY 0x80 /* 2 */ /* Only with raid1! */
+#define CTR_FLAG_STRIPE_CACHE 0x100 /* 2 */ /* Only with raid4/5/6! */
+#define CTR_FLAG_REGION_SIZE 0x200 /* 2 */ /* Not with raid0! */
+#define CTR_FLAG_RAID10_COPIES 0x400 /* 2 */ /* Only with raid10 */
+#define CTR_FLAG_RAID10_FORMAT 0x800 /* 2 */ /* Only with raid10 */
+
+/*
+ * Definitions of various constructor flags to
+ * be used in checks of valid / invalid flags
+ * per raid level.
+ */
+/* Define all any sync flags */
+#define CTR_FLAGS_ANY_SYNC (CTR_FLAG_SYNC | CTR_FLAG_NOSYNC)
+
+/* Define flags for options without argument (e.g. 'nosync') */
+#define CTR_FLAG_OPTIONS_NO_ARGS CTR_FLAGS_ANY_SYNC
+
+/* Define flags for options with one argument (e.g. 'delta_disks +2') */
+#define CTR_FLAG_OPTIONS_ONE_ARG (CTR_FLAG_REBUILD | \
+ CTR_FLAG_WRITE_MOSTLY | \
+ CTR_FLAG_DAEMON_SLEEP | \
+ CTR_FLAG_MIN_RECOVERY_RATE | \
+ CTR_FLAG_MAX_RECOVERY_RATE | \
+ CTR_FLAG_MAX_WRITE_BEHIND | \
+ CTR_FLAG_STRIPE_CACHE | \
+ CTR_FLAG_REGION_SIZE | \
+ CTR_FLAG_RAID10_COPIES | \
+ CTR_FLAG_RAID10_FORMAT)
+
+/* All ctr optional arguments */
+#define ALL_CTR_FLAGS (CTR_FLAG_OPTIONS_NO_ARGS | \
+ CTR_FLAG_OPTIONS_ONE_ARG)
+
+/* Invalid options definitions per raid level... */
+
+/* "raid0" does not accept any options */
+#define RAID0_INVALID_FLAGS ALL_CTR_FLAGS
+
+/* "raid1" does not accept stripe cache or any raid10 options */
+#define RAID1_INVALID_FLAGS (CTR_FLAG_STRIPE_CACHE | \
+ CTR_FLAG_RAID10_COPIES | \
+ CTR_FLAG_RAID10_FORMAT)
+
+/* "raid10" does not accept any raid1 or stripe cache options */
+#define RAID10_INVALID_FLAGS (CTR_FLAG_WRITE_MOSTLY | \
+ CTR_FLAG_MAX_WRITE_BEHIND | \
+ CTR_FLAG_STRIPE_CACHE)
+/*
+ * "raid4/5/6" do not accept any raid1 or raid10 specific options
+ *
+ * "raid6" does not accept "nosync", because it is not guaranteed
+ * that both parity and q-syndrome are being written properly with
+ * any writes
*/
-#define CTR_FLAG_SYNC 0x1
-#define CTR_FLAG_NOSYNC 0x2
-#define CTR_FLAG_REBUILD 0x4
-#define CTR_FLAG_DAEMON_SLEEP 0x8
-#define CTR_FLAG_MIN_RECOVERY_RATE 0x10
-#define CTR_FLAG_MAX_RECOVERY_RATE 0x20
-#define CTR_FLAG_MAX_WRITE_BEHIND 0x40
-#define CTR_FLAG_STRIPE_CACHE 0x80
-#define CTR_FLAG_REGION_SIZE 0x100
-#define CTR_FLAG_RAID10_COPIES 0x200
-#define CTR_FLAG_RAID10_FORMAT 0x400
+#define RAID45_INVALID_FLAGS (CTR_FLAG_WRITE_MOSTLY | \
+ CTR_FLAG_MAX_WRITE_BEHIND | \
+ CTR_FLAG_RAID10_FORMAT | \
+ CTR_FLAG_RAID10_COPIES)
+#define RAID6_INVALID_FLAGS (CTR_FLAG_NOSYNC | RAID45_INVALID_FLAGS)
+/* ...invalid options definitions per raid level */
struct raid_set {
struct dm_target *ti;
{"raid6_nc", "RAID6 (N continue)", 2, 4, 6, ALGORITHM_ROTATING_N_CONTINUE}
};
+/* True, if @v is in inclusive range [@min, @max] */
+static bool _in_range(long v, long min, long max)
+{
+ return v >= min && v <= max;
+}
+
+/* ctr flag bit manipulation... */
+/* Set single @flag in @flags */
+static void _set_flag(uint32_t flag, uint32_t *flags)
+{
+ WARN_ON_ONCE(hweight32(flag) != 1);
+ *flags |= flag;
+}
+
+/* Test single @flag in @flags */
+static bool _test_flag(uint32_t flag, uint32_t flags)
+{
+ WARN_ON_ONCE(hweight32(flag) != 1);
+ return (flag & flags) ? true : false;
+}
+
+/* Return true if single @flag is set in @*flags, else set it and return false */
+static bool _test_and_set_flag(uint32_t flag, uint32_t *flags)
+{
+ if (_test_flag(flag, *flags))
+ return true;
+
+ _set_flag(flag, flags);
+ return false;
+}
+/* ...ctr and runtime flag bit manipulation */
+
+/* All table line arguments are defined here */
+static struct arg_name_flag {
+ const uint32_t flag;
+ const char *name;
+} _arg_name_flags[] = {
+ { CTR_FLAG_SYNC, "sync"},
+ { CTR_FLAG_NOSYNC, "nosync"},
+ { CTR_FLAG_REBUILD, "rebuild"},
+ { CTR_FLAG_DAEMON_SLEEP, "daemon_sleep"},
+ { CTR_FLAG_MIN_RECOVERY_RATE, "min_recovery_rate"},
+ { CTR_FLAG_MAX_RECOVERY_RATE, "max_recovery_rate"},
+ { CTR_FLAG_MAX_WRITE_BEHIND, "max_write_behind"},
+ { CTR_FLAG_WRITE_MOSTLY, "writemostly"},
+ { CTR_FLAG_STRIPE_CACHE, "stripe_cache"},
+ { CTR_FLAG_REGION_SIZE, "region_size"},
+ { CTR_FLAG_RAID10_COPIES, "raid10_copies"},
+ { CTR_FLAG_RAID10_FORMAT, "raid10_format"},
+};
+
+/* Return argument name string for given @flag */
+static const char *_argname_by_flag(const uint32_t flag)
+{
+ if (hweight32(flag) == 1) {
+ struct arg_name_flag *anf = _arg_name_flags + ARRAY_SIZE(_arg_name_flags);
+
+ while (anf-- > _arg_name_flags)
+ if (_test_flag(flag, anf->flag))
+ return anf->name;
+
+ } else
+ DMERR("%s called with more than one flag!", __func__);
+
+ return NULL;
+}
+
+/*
+ * bool helpers to test for various raid levels of a raid type
+ */
+
+/* Return true, if raid type in @rt is raid0 */
+static bool rt_is_raid0(struct raid_type *rt)
+{
+ return !rt->level;
+}
+
+/* Return true, if raid type in @rt is raid1 */
+static bool rt_is_raid1(struct raid_type *rt)
+{
+ return rt->level == 1;
+}
+
+/* Return true, if raid type in @rt is raid10 */
+static bool rt_is_raid10(struct raid_type *rt)
+{
+ return rt->level == 10;
+}
+
+/* Return true, if raid type in @rt is raid4/5 */
+static bool rt_is_raid45(struct raid_type *rt)
+{
+ return _in_range(rt->level, 4, 5);
+}
+
+/* Return true, if raid type in @rt is raid6 */
+static bool rt_is_raid6(struct raid_type *rt)
+{
+ return rt->level == 6;
+}
+/* END: raid level bools */
+
+/*
+ * Convenience functions to set ti->error to @errmsg and
+ * return @r in order to shorten code in a lot of places
+ */
+static int ti_error_ret(struct dm_target *ti, const char *errmsg, int r)
+{
+ ti->error = (char *) errmsg;
+ return r;
+}
+
+static int ti_error_einval(struct dm_target *ti, const char *errmsg)
+{
+ return ti_error_ret(ti, errmsg, -EINVAL);
+}
+/* END: convenience functions to set ti->error to @errmsg... */
+
+/* Return invalid ctr flags for the raid level of @rs */
+static uint32_t _invalid_flags(struct raid_set *rs)
+{
+ if (rt_is_raid0(rs->raid_type))
+ return RAID0_INVALID_FLAGS;
+ else if (rt_is_raid1(rs->raid_type))
+ return RAID1_INVALID_FLAGS;
+ else if (rt_is_raid10(rs->raid_type))
+ return RAID10_INVALID_FLAGS;
+ else if (rt_is_raid45(rs->raid_type))
+ return RAID45_INVALID_FLAGS;
+ else if (rt_is_raid6(rs->raid_type))
+ return RAID6_INVALID_FLAGS;
+
+ return ~0;
+}
+
+/*
+ * Check for any invalid flags set on @rs defined by bitset @invalid_flags
+ *
+ * Has to be called after parsing of the ctr flags!
+ */
+static int rs_check_for_invalid_flags(struct raid_set *rs)
+{
+ unsigned int ctr_flags = rs->ctr_flags, flag = 0;
+ const uint32_t invalid_flags = _invalid_flags(rs);
+
+ while ((ctr_flags &= ~flag)) {
+ flag = 1 << __ffs(ctr_flags);
+
+ if (_test_flag(flag, rs->ctr_flags) &&
+ _test_flag(flag, invalid_flags))
+ return ti_error_einval(rs->ti, "Invalid flag combined");
+ }
+
+ return 0;
+}
+
static char *raid10_md_layout_to_format(int layout)
{
/*
return (f << 8) | n;
}
-static struct raid_type *get_raid_type(char *name)
+static struct raid_type *get_raid_type(const char *name)
{
int i;
unsigned i;
struct raid_set *rs;
- if (raid_devs <= raid_type->parity_devs) {
- ti->error = "Insufficient number of devices";
- return ERR_PTR(-EINVAL);
- }
+ if (raid_devs <= raid_type->parity_devs)
+ return ERR_PTR(ti_error_einval(ti, "Insufficient number of devices"));
rs = kzalloc(sizeof(*rs) + raid_devs * sizeof(rs->dev[0]), GFP_KERNEL);
- if (!rs) {
- ti->error = "Cannot allocate raid context";
- return ERR_PTR(-ENOMEM);
- }
+ if (!rs)
+ return ERR_PTR(ti_error_ret(ti, "Cannot allocate raid context", -ENOMEM));
mddev_init(&rs->md);
* This code parses those words. If there is a failure,
* the caller must use context_free to unwind the operations.
*/
-static int dev_parms(struct raid_set *rs, char **argv)
+static int parse_dev_params(struct raid_set *rs, struct dm_arg_set *as)
{
int i;
int rebuild = 0;
int metadata_available = 0;
- int ret = 0;
+ int r = 0;
+ const char *arg;
+
+ /* Put off the number of raid devices argument to get to dev pairs */
+ arg = dm_shift_arg(as);
+ if (!arg)
+ return -EINVAL;
- for (i = 0; i < rs->md.raid_disks; i++, argv += 2) {
+ for (i = 0; i < rs->md.raid_disks; i++) {
rs->dev[i].rdev.raid_disk = i;
rs->dev[i].meta_dev = NULL;
rs->dev[i].rdev.data_offset = 0;
rs->dev[i].rdev.mddev = &rs->md;
- if (strcmp(argv[0], "-")) {
- ret = dm_get_device(rs->ti, argv[0],
+ arg = dm_shift_arg(as);
+ if (!arg)
+ return -EINVAL;
+
+ if (strcmp(arg, "-")) {
+ r = dm_get_device(rs->ti, arg,
dm_table_get_mode(rs->ti->table),
&rs->dev[i].meta_dev);
- rs->ti->error = "RAID metadata device lookup failure";
- if (ret)
- return ret;
+ if (r)
+ return ti_error_ret(rs->ti, "RAID metadata device lookup failure", r);
rs->dev[i].rdev.sb_page = alloc_page(GFP_KERNEL);
if (!rs->dev[i].rdev.sb_page)
- return -ENOMEM;
+ return ti_error_ret(rs->ti, "Failed to allocate superblock page", -ENOMEM);
}
- if (!strcmp(argv[1], "-")) {
+ arg = dm_shift_arg(as);
+ if (!arg)
+ return -EINVAL;
+
+ if (!strcmp(arg, "-")) {
if (!test_bit(In_sync, &rs->dev[i].rdev.flags) &&
- (!rs->dev[i].rdev.recovery_offset)) {
- rs->ti->error = "Drive designated for rebuild not specified";
- return -EINVAL;
- }
+ (!rs->dev[i].rdev.recovery_offset))
+ return ti_error_einval(rs->ti, "Drive designated for rebuild not specified");
- rs->ti->error = "No data device supplied with metadata device";
if (rs->dev[i].meta_dev)
- return -EINVAL;
+ return ti_error_einval(rs->ti, "No data device supplied with metadata device");
continue;
}
- ret = dm_get_device(rs->ti, argv[1],
+ r = dm_get_device(rs->ti, arg,
dm_table_get_mode(rs->ti->table),
&rs->dev[i].data_dev);
- if (ret) {
- rs->ti->error = "RAID device lookup failure";
- return ret;
- }
+ if (r)
+ return ti_error_ret(rs->ti, "RAID device lookup failure", r);
if (rs->dev[i].meta_dev) {
metadata_available = 1;
* User could specify 'nosync' option if desperate.
*/
DMERR("Unable to rebuild drive while array is not in-sync");
- rs->ti->error = "RAID device lookup failure";
- return -EINVAL;
+ return ti_error_einval(rs->ti, "Unable to rebuild drive while array is not in-sync");
}
return 0;
/*
* Validate user-supplied value.
*/
- if (region_size > rs->ti->len) {
- rs->ti->error = "Supplied region size is too large";
- return -EINVAL;
- }
+ if (region_size > rs->ti->len)
+ return ti_error_einval(rs->ti, "Supplied region size is too large");
if (region_size < min_region_size) {
DMERR("Supplied region_size (%lu sectors) below minimum (%lu)",
region_size, min_region_size);
- rs->ti->error = "Supplied region size is too small";
- return -EINVAL;
+ return ti_error_einval(rs->ti, "Supplied region size is too small");
}
- if (!is_power_of_2(region_size)) {
- rs->ti->error = "Region size is not a power of 2";
- return -EINVAL;
- }
+ if (!is_power_of_2(region_size))
+ return ti_error_einval(rs->ti, "Region size is not a power of 2");
- if (region_size < rs->md.chunk_sectors) {
- rs->ti->error = "Region size is smaller than the chunk size";
- return -EINVAL;
- }
+ if (region_size < rs->md.chunk_sectors)
+ return ti_error_einval(rs->ti, "Region size is smaller than the chunk size");
}
/*
* [raid10_copies <# copies>] Number of copies. (Default: 2)
* [raid10_format <near|far|offset>] Layout algorithm. (Default: near)
*/
-static int parse_raid_params(struct raid_set *rs, char **argv,
+static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as,
unsigned num_raid_params)
{
char *raid10_format = "near";
unsigned raid10_copies = 2;
unsigned i;
- unsigned long value, region_size = 0;
+ unsigned value, region_size = 0;
sector_t sectors_per_dev = rs->ti->len;
sector_t max_io_len;
- char *key;
+ const char *arg, *key;
+ struct raid_dev *rd;
+
+ arg = dm_shift_arg(as);
+ num_raid_params--; /* Account for chunk_size argument */
+
+ if (kstrtouint(arg, 10, &value) < 0)
+ return ti_error_einval(rs->ti, "Bad numerical argument given for chunk_size");
/*
* First, parse the in-order required arguments
* "chunk_size" is the only argument of this type.
*/
- if ((kstrtoul(argv[0], 10, &value) < 0)) {
- rs->ti->error = "Bad chunk size";
- return -EINVAL;
- } else if (rs->raid_type->level == 1) {
+ if (rs->raid_type->level == 1) {
if (value)
DMERR("Ignoring chunk size parameter for RAID 1");
value = 0;
- } else if (!is_power_of_2(value)) {
- rs->ti->error = "Chunk size must be a power of 2";
- return -EINVAL;
- } else if (value < 8) {
- rs->ti->error = "Chunk size value is too small";
- return -EINVAL;
- }
+ } else if (!is_power_of_2(value))
+ return ti_error_einval(rs->ti, "Chunk size must be a power of 2");
+ else if (value < 8)
+ return ti_error_einval(rs->ti, "Chunk size value is too small");
rs->md.new_chunk_sectors = rs->md.chunk_sectors = value;
- argv++;
- num_raid_params--;
/*
* We set each individual device as In_sync with a completed
* Second, parse the unordered optional arguments
*/
for (i = 0; i < num_raid_params; i++) {
- if (!strcasecmp(argv[i], "nosync")) {
+ arg = dm_shift_arg(as);
+ if (!arg)
+ return ti_error_einval(rs->ti, "Not enough raid parameters given");
+
+ if (!strcasecmp(arg, "nosync")) {
rs->md.recovery_cp = MaxSector;
- rs->ctr_flags |= CTR_FLAG_NOSYNC;
+ _set_flag(CTR_FLAG_NOSYNC, &rs->ctr_flags);
continue;
}
- if (!strcasecmp(argv[i], "sync")) {
+ if (!strcasecmp(arg, "sync")) {
rs->md.recovery_cp = 0;
- rs->ctr_flags |= CTR_FLAG_SYNC;
+ _set_flag(CTR_FLAG_SYNC, &rs->ctr_flags);
continue;
}
- /* The rest of the optional arguments come in key/value pairs */
- if ((i + 1) >= num_raid_params) {
- rs->ti->error = "Wrong number of raid parameters given";
- return -EINVAL;
- }
+ key = arg;
+ arg = dm_shift_arg(as);
+ i++; /* Account for the argument pairs */
+ if (!arg)
+ return ti_error_einval(rs->ti, "Wrong number of raid parameters given");
- key = argv[i++];
+ /*
+ * Parameters that take a string value are checked here.
+ */
- /* Parameters that take a string value are checked here. */
- if (!strcasecmp(key, "raid10_format")) {
- if (rs->raid_type->level != 10) {
- rs->ti->error = "'raid10_format' is an invalid parameter for this RAID type";
- return -EINVAL;
- }
- if (strcmp("near", argv[i]) &&
- strcmp("far", argv[i]) &&
- strcmp("offset", argv[i])) {
- rs->ti->error = "Invalid 'raid10_format' value given";
- return -EINVAL;
- }
- raid10_format = argv[i];
- rs->ctr_flags |= CTR_FLAG_RAID10_FORMAT;
+ if (!strcasecmp(key, _argname_by_flag(CTR_FLAG_RAID10_FORMAT))) {
+ if (_test_and_set_flag(CTR_FLAG_RAID10_FORMAT, &rs->ctr_flags))
+ return ti_error_einval(rs->ti, "Only one raid10_format argument pair allowed");
+ if (rs->raid_type->level != 10)
+ return ti_error_einval(rs->ti, "'raid10_format' is an invalid parameter for this RAID type");
+ if (strcmp("near", arg) &&
+ strcmp("far", arg) &&
+ strcmp("offset", arg))
+ return ti_error_einval(rs->ti, "Invalid 'raid10_format' value given");
+
+ raid10_format = (char *) arg;
continue;
}
- if (kstrtoul(argv[i], 10, &value) < 0) {
- rs->ti->error = "Bad numerical argument given in raid params";
- return -EINVAL;
- }
+ if (kstrtouint(arg, 10, &value) < 0)
+ return ti_error_einval(rs->ti, "Bad numerical argument given in raid params");
+
+ if (!strcasecmp(key, _argname_by_flag(CTR_FLAG_REBUILD))) {
+ /*
+ * "rebuild" is being passed in by userspace to provide
+ * indexes of replaced devices and to set up additional
+ * devices on raid level takeover.
+ */
+ if (!_in_range(value, 0, rs->md.raid_disks - 1))
+ return ti_error_einval(rs->ti, "Invalid rebuild index given");
+
+ rd = rs->dev + value;
+ clear_bit(In_sync, &rd->rdev.flags);
+ clear_bit(Faulty, &rd->rdev.flags);
+ rd->rdev.recovery_offset = 0;
+ _set_flag(CTR_FLAG_REBUILD, &rs->ctr_flags);
+ } else if (!strcasecmp(key, _argname_by_flag(CTR_FLAG_WRITE_MOSTLY))) {
+ if (rs->raid_type->level != 1)
+ return ti_error_einval(rs->ti, "write_mostly option is only valid for RAID1");
+
+ if (!_in_range(value, 0, rs->md.raid_disks - 1))
+ return ti_error_einval(rs->ti, "Invalid write_mostly index given");
- /* Parameters that take a numeric value are checked here */
- if (!strcasecmp(key, "rebuild")) {
- if (value >= rs->md.raid_disks) {
- rs->ti->error = "Invalid rebuild index given";
- return -EINVAL;
- }
- clear_bit(In_sync, &rs->dev[value].rdev.flags);
- rs->dev[value].rdev.recovery_offset = 0;
- rs->ctr_flags |= CTR_FLAG_REBUILD;
- } else if (!strcasecmp(key, "write_mostly")) {
- if (rs->raid_type->level != 1) {
- rs->ti->error = "write_mostly option is only valid for RAID1";
- return -EINVAL;
- }
- if (value >= rs->md.raid_disks) {
- rs->ti->error = "Invalid write_mostly drive index given";
- return -EINVAL;
- }
set_bit(WriteMostly, &rs->dev[value].rdev.flags);
- } else if (!strcasecmp(key, "max_write_behind")) {
- if (rs->raid_type->level != 1) {
- rs->ti->error = "max_write_behind option is only valid for RAID1";
- return -EINVAL;
- }
- rs->ctr_flags |= CTR_FLAG_MAX_WRITE_BEHIND;
+ _set_flag(CTR_FLAG_WRITE_MOSTLY, &rs->ctr_flags);
+ } else if (!strcasecmp(key, _argname_by_flag(CTR_FLAG_MAX_WRITE_BEHIND))) {
+ if (rs->raid_type->level != 1)
+ return ti_error_einval(rs->ti, "max_write_behind option is only valid for RAID1");
+
+ if (_test_and_set_flag(CTR_FLAG_MAX_WRITE_BEHIND, &rs->ctr_flags))
+ return ti_error_einval(rs->ti, "Only one max_write_behind argument pair allowed");
/*
* In device-mapper, we specify things in sectors, but
* MD records this value in kB
*/
value /= 2;
- if (value > COUNTER_MAX) {
- rs->ti->error = "Max write-behind limit out of range";
- return -EINVAL;
- }
+ if (value > COUNTER_MAX)
+ return ti_error_einval(rs->ti, "Max write-behind limit out of range");
+
rs->md.bitmap_info.max_write_behind = value;
- } else if (!strcasecmp(key, "daemon_sleep")) {
- rs->ctr_flags |= CTR_FLAG_DAEMON_SLEEP;
- if (!value || (value > MAX_SCHEDULE_TIMEOUT)) {
- rs->ti->error = "daemon sleep period out of range";
- return -EINVAL;
- }
+ } else if (!strcasecmp(key, _argname_by_flag(CTR_FLAG_DAEMON_SLEEP))) {
+ if (_test_and_set_flag(CTR_FLAG_DAEMON_SLEEP, &rs->ctr_flags))
+ return ti_error_einval(rs->ti, "Only one daemon_sleep argument pair allowed");
+ if (!value || (value > MAX_SCHEDULE_TIMEOUT))
+ return ti_error_einval(rs->ti, "daemon sleep period out of range");
rs->md.bitmap_info.daemon_sleep = value;
- } else if (!strcasecmp(key, "stripe_cache")) {
- rs->ctr_flags |= CTR_FLAG_STRIPE_CACHE;
-
+ } else if (!strcasecmp(key, _argname_by_flag(CTR_FLAG_STRIPE_CACHE))) {
+ if (_test_and_set_flag(CTR_FLAG_STRIPE_CACHE, &rs->ctr_flags))
+ return ti_error_einval(rs->ti, "Only one stripe_cache argument pair allowed");
/*
* In device-mapper, we specify things in sectors, but
* MD records this value in kB
*/
value /= 2;
- if ((rs->raid_type->level != 5) &&
- (rs->raid_type->level != 6)) {
- rs->ti->error = "Inappropriate argument: stripe_cache";
- return -EINVAL;
- }
- if (raid5_set_cache_size(&rs->md, (int)value)) {
- rs->ti->error = "Bad stripe_cache size";
- return -EINVAL;
- }
- } else if (!strcasecmp(key, "min_recovery_rate")) {
- rs->ctr_flags |= CTR_FLAG_MIN_RECOVERY_RATE;
- if (value > INT_MAX) {
- rs->ti->error = "min_recovery_rate out of range";
- return -EINVAL;
- }
+ if (!_in_range(rs->raid_type->level, 4, 6))
+ return ti_error_einval(rs->ti, "Inappropriate argument: stripe_cache");
+ if (raid5_set_cache_size(&rs->md, (int)value))
+ return ti_error_einval(rs->ti, "Bad stripe_cache size");
+
+ } else if (!strcasecmp(key, _argname_by_flag(CTR_FLAG_MIN_RECOVERY_RATE))) {
+ if (_test_and_set_flag(CTR_FLAG_MIN_RECOVERY_RATE, &rs->ctr_flags))
+ return ti_error_einval(rs->ti, "Only one min_recovery_rate argument pair allowed");
+ if (value > INT_MAX)
+ return ti_error_einval(rs->ti, "min_recovery_rate out of range");
rs->md.sync_speed_min = (int)value;
- } else if (!strcasecmp(key, "max_recovery_rate")) {
- rs->ctr_flags |= CTR_FLAG_MAX_RECOVERY_RATE;
- if (value > INT_MAX) {
- rs->ti->error = "max_recovery_rate out of range";
- return -EINVAL;
- }
+ } else if (!strcasecmp(key, _argname_by_flag(CTR_FLAG_MAX_RECOVERY_RATE))) {
+ if (_test_and_set_flag(CTR_FLAG_MIN_RECOVERY_RATE, &rs->ctr_flags))
+ return ti_error_einval(rs->ti, "Only one max_recovery_rate argument pair allowed");
+ if (value > INT_MAX)
+ return ti_error_einval(rs->ti, "max_recovery_rate out of range");
rs->md.sync_speed_max = (int)value;
- } else if (!strcasecmp(key, "region_size")) {
- rs->ctr_flags |= CTR_FLAG_REGION_SIZE;
+ } else if (!strcasecmp(key, _argname_by_flag(CTR_FLAG_REGION_SIZE))) {
+ if (_test_and_set_flag(CTR_FLAG_REGION_SIZE, &rs->ctr_flags))
+ return ti_error_einval(rs->ti, "Only one region_size argument pair allowed");
+
region_size = value;
- } else if (!strcasecmp(key, "raid10_copies") &&
- (rs->raid_type->level == 10)) {
- if ((value < 2) || (value > 0xFF)) {
- rs->ti->error = "Bad value for 'raid10_copies'";
- return -EINVAL;
- }
- rs->ctr_flags |= CTR_FLAG_RAID10_COPIES;
+ } else if (!strcasecmp(key, _argname_by_flag(CTR_FLAG_RAID10_COPIES))) {
+ if (_test_and_set_flag(CTR_FLAG_RAID10_COPIES, &rs->ctr_flags))
+ return ti_error_einval(rs->ti, "Only one raid10_copies argument pair allowed");
+
+ if (!_in_range(value, 2, rs->md.raid_disks))
+ return ti_error_einval(rs->ti, "Bad value for 'raid10_copies'");
+
raid10_copies = value;
} else {
DMERR("Unable to parse RAID parameter: %s", key);
- rs->ti->error = "Unable to parse RAID parameters";
- return -EINVAL;
+ return ti_error_einval(rs->ti, "Unable to parse RAID parameters");
}
}
return -EINVAL;
if (rs->raid_type->level == 10) {
- if (raid10_copies > rs->md.raid_disks) {
- rs->ti->error = "Not enough devices to satisfy specification";
- return -EINVAL;
- }
+ if (raid10_copies > rs->md.raid_disks)
+ return ti_error_einval(rs->ti, "Not enough devices to satisfy specification");
/*
* If the format is not "near", we only support
* two copies at the moment.
*/
- if (strcmp("near", raid10_format) && (raid10_copies > 2)) {
- rs->ti->error = "Too many copies for given RAID10 format.";
- return -EINVAL;
- }
+ if (strcmp("near", raid10_format) && (raid10_copies > 2))
+ return ti_error_einval(rs->ti, "Too many copies for given RAID10 format.");
/* (Len * #mirrors) / #devices */
sectors_per_dev = rs->ti->len * raid10_copies;
rs->md.new_layout = rs->md.layout;
} else if ((!rs->raid_type->level || rs->raid_type->level > 1) &&
sector_div(sectors_per_dev,
- (rs->md.raid_disks - rs->raid_type->parity_devs))) {
- rs->ti->error = "Target length not divisible by number of data devices";
- return -EINVAL;
- }
+ (rs->md.raid_disks - rs->raid_type->parity_devs)))
+ return ti_error_einval(rs->ti, "Target length not divisible by number of data devices");
+
rs->md.dev_sectors = sectors_per_dev;
/* Assume there are no metadata devices until the drives are parsed */
rs->md.persistent = 0;
rs->md.external = 1;
- return 0;
+ /* Check, if any invalid ctr arguments have been passed in for the raid level */
+ return rs_check_for_invalid_flags(rs);
}
static void do_table_event(struct work_struct *ws)
if (rdev->sb_loaded)
return 0;
- if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, 1)) {
+ if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, 1)) {
DMERR("Failed to read superblock of device at position %d",
rdev->raid_disk);
md_error(rdev->mddev, rdev);
*/
static int super_load(struct md_rdev *rdev, struct md_rdev *refdev)
{
- int ret;
+ int r;
struct dm_raid_superblock *sb;
struct dm_raid_superblock *refsb;
uint64_t events_sb, events_refsb;
return -EINVAL;
}
- ret = read_disk_sb(rdev, rdev->sb_size);
- if (ret)
- return ret;
+ r = read_disk_sb(rdev, rdev->sb_size);
+ if (r)
+ return r;
sb = page_address(rdev->sb_page);
if (!test_bit(FirstUse, &r->flags) && (r->raid_disk >= 0)) {
role = le32_to_cpu(sb2->array_position);
if (role != r->raid_disk) {
- if (rs->raid_type->level != 1) {
- rs->ti->error = "Cannot change device "
- "positions in RAID array";
- return -EINVAL;
- }
+ if (rs->raid_type->level != 1)
+ return ti_error_einval(rs->ti, "Cannot change device "
+ "positions in RAID array");
DMINFO("RAID1 device #%d now at position #%d",
role, r->raid_disk);
}
*/
static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
{
- int ret;
+ int r;
struct raid_dev *dev;
struct md_rdev *rdev, *tmp, *freshest;
struct mddev *mddev = &rs->md;
if (!rdev->meta_bdev)
continue;
- ret = super_load(rdev, freshest);
+ r = super_load(rdev, freshest);
- switch (ret) {
+ switch (r) {
case 1:
freshest = rdev;
break;
if (!freshest)
return 0;
- if (validate_raid_redundancy(rs)) {
- rs->ti->error = "Insufficient redundancy to activate array";
- return -EINVAL;
- }
+ if (validate_raid_redundancy(rs))
+ return ti_error_einval(rs->ti, "Insufficient redundancy to activate array");
/*
* Validation of the freshest device provides the source of
* validation for the remaining devices.
*/
- ti->error = "Unable to assemble array: Invalid superblocks";
if (super_validate(rs, freshest))
- return -EINVAL;
+ return ti_error_einval(rs->ti, "Unable to assemble array: Invalid superblocks");
rdev_for_each(rdev, mddev)
if ((rdev != freshest) && super_validate(rs, rdev))
}
/*
- * Construct a RAID4/5/6 mapping:
+ * Construct a RAID0/1/10/4/5/6 mapping:
* Args:
- * <raid_type> <#raid_params> <raid_params> \
- * <#raid_devs> { <meta_dev1> <dev1> .. <meta_devN> <devN> }
+ * <raid_type> <#raid_params> <raid_params>{0,} \
+ * <#raid_devs> [<meta_dev1> <dev1>]{1,}
*
* <raid_params> varies by <raid_type>. See 'parse_raid_params' for
* details on possible <raid_params>.
+ *
+ * Userspace is free to initialize the metadata devices, hence the superblocks to
+ * enforce recreation based on the passed in table parameters.
+ *
*/
static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
{
- int ret;
+ int r;
struct raid_type *rt;
- unsigned long num_raid_params, num_raid_devs;
+ unsigned num_raid_params, num_raid_devs;
struct raid_set *rs = NULL;
-
- /* Must have at least <raid_type> <#raid_params> */
- if (argc < 2) {
- ti->error = "Too few arguments";
- return -EINVAL;
- }
-
- /* raid type */
- rt = get_raid_type(argv[0]);
- if (!rt) {
- ti->error = "Unrecognised raid_type";
- return -EINVAL;
- }
- argc--;
- argv++;
-
- /* number of RAID parameters */
- if (kstrtoul(argv[0], 10, &num_raid_params) < 0) {
- ti->error = "Cannot understand number of RAID parameters";
- return -EINVAL;
- }
- argc--;
- argv++;
-
- /* Skip over RAID params for now and find out # of devices */
- if (num_raid_params >= argc) {
- ti->error = "Arguments do not agree with counts given";
- return -EINVAL;
- }
-
- if ((kstrtoul(argv[num_raid_params], 10, &num_raid_devs) < 0) ||
- (num_raid_devs > MAX_RAID_DEVICES)) {
- ti->error = "Cannot understand number of raid devices";
- return -EINVAL;
- }
-
- argc -= num_raid_params + 1; /* +1: we already have num_raid_devs */
- if (argc != (num_raid_devs * 2)) {
- ti->error = "Supplied RAID devices does not match the count given";
- return -EINVAL;
- }
-
- rs = context_alloc(ti, rt, (unsigned)num_raid_devs);
+ const char *arg;
+ struct dm_arg_set as = { argc, argv }, as_nrd;
+ struct dm_arg _args[] = {
+ { 0, as.argc, "Cannot understand number of raid parameters" },
+ { 1, 254, "Cannot understand number of raid devices parameters" }
+ };
+
+ /* Must have <raid_type> */
+ arg = dm_shift_arg(&as);
+ if (!arg)
+ return ti_error_einval(rs->ti, "No arguments");
+
+ rt = get_raid_type(arg);
+ if (!rt)
+ return ti_error_einval(rs->ti, "Unrecognised raid_type");
+
+ /* Must have <#raid_params> */
+ if (dm_read_arg_group(_args, &as, &num_raid_params, &ti->error))
+ return -EINVAL;
+
+ /* number of raid device tupples <meta_dev data_dev> */
+ as_nrd = as;
+ dm_consume_args(&as_nrd, num_raid_params);
+ _args[1].max = (as_nrd.argc - 1) / 2;
+ if (dm_read_arg(_args + 1, &as_nrd, &num_raid_devs, &ti->error))
+ return -EINVAL;
+
+ if (!_in_range(num_raid_devs, 1, MAX_RAID_DEVICES))
+ return ti_error_einval(rs->ti, "Invalid number of supplied raid devices");
+
+ rs = context_alloc(ti, rt, num_raid_devs);
if (IS_ERR(rs))
return PTR_ERR(rs);
- ret = parse_raid_params(rs, argv, (unsigned)num_raid_params);
- if (ret)
+ r = parse_raid_params(rs, &as, num_raid_params);
+ if (r)
goto bad;
- argv += num_raid_params + 1;
-
- ret = dev_parms(rs, argv);
- if (ret)
+ r = parse_dev_params(rs, &as);
+ if (r)
goto bad;
rs->md.sync_super = super_sync;
- ret = analyse_superblocks(ti, rs);
- if (ret)
+ r = analyse_superblocks(ti, rs);
+ if (r)
goto bad;
INIT_WORK(&rs->md.event_work, do_table_event);
/* Has to be held on running the array */
mddev_lock_nointr(&rs->md);
- ret = md_run(&rs->md);
+ r = md_run(&rs->md);
rs->md.in_sync = 0; /* Assume already marked dirty */
mddev_unlock(&rs->md);
- if (ret) {
+ if (r) {
ti->error = "Fail to run raid array";
goto bad;
}
if (ti->len != rs->md.array_sectors) {
- ti->error = "Array size does not match requested target length";
- ret = -EINVAL;
+ r = ti_error_einval(ti, "Array size does not match requested target length");
goto size_mismatch;
}
rs->callbacks.congested_fn = raid_is_congested;
bad:
context_free(rs);
- return ret;
+ return r;
}
static void raid_dtr(struct dm_target *ti)
{
struct raid_set *rs = ti->private;
unsigned i;
- int ret = 0;
+ int r = 0;
- for (i = 0; !ret && i < rs->md.raid_disks; i++)
+ for (i = 0; !r && i < rs->md.raid_disks; i++)
if (rs->dev[i].data_dev)
- ret = fn(ti,
+ r = fn(ti,
rs->dev[i].data_dev,
0, /* No offset on data devs */
rs->md.dev_sectors,
data);
- return ret;
+ return r;
}
static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
for (i = 0; i < rs->md.raid_disks; i++) {
r = &rs->dev[i].rdev;
if (test_bit(Faulty, &r->flags) && r->sb_page &&
- sync_page_io(r, 0, r->sb_size, r->sb_page, READ, 1)) {
+ sync_page_io(r, 0, r->sb_size, r->sb_page, REQ_OP_READ, 0,
+ 1)) {
DMINFO("Faulty %s device #%d has readable super block."
" Attempting to revive it.",
rs->raid_type->name, i);
static struct target_type raid_target = {
.name = "raid",
- .version = {1, 8, 0},
+ .version = {1, 8, 1},
.module = THIS_MODULE,
.ctr = raid_ctr,
.dtr = raid_dtr,