X-Git-Url: http://git.monstr.eu/?a=blobdiff_plain;f=drivers%2Fmd%2Fdm-raid.c;h=ebb64eb66defdb8b7dd48e534638802a8f84c9fe;hb=f090279eaff814a550b35bb51aac6b8541bddf97;hp=52532745a50f85d6eb765d8d6fcaa838301334b6;hpb=564884fbdecaea56fb65f2f32963059d3049b967;p=linux-2.6-microblaze.git diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 52532745a50f..ebb64eb66def 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2010-2011 Neil Brown - * Copyright (C) 2010-2015 Red Hat, Inc. All rights reserved. + * Copyright (C) 2010-2016 Red Hat, Inc. All rights reserved. * * This file is released under the GPL. */ @@ -17,7 +17,7 @@ #include #define DM_MSG_PREFIX "raid" -#define MAX_RAID_DEVICES 253 /* raid4/5/6 limit */ +#define MAX_RAID_DEVICES 253 /* md-raid kernel limit */ static bool devices_handle_discard_safely = false; @@ -47,18 +47,77 @@ struct raid_dev { /* * Flags for rs->ctr_flags field. + * + * 1 = no flag value + * 2 = flag with value + */ +#define CTR_FLAG_SYNC 0x1 /* 1 */ /* Not with raid0! */ +#define CTR_FLAG_NOSYNC 0x2 /* 1 */ /* Not with raid0! */ +#define CTR_FLAG_REBUILD 0x4 /* 2 */ /* Not with raid0! */ +#define CTR_FLAG_DAEMON_SLEEP 0x8 /* 2 */ /* Not with raid0! */ +#define CTR_FLAG_MIN_RECOVERY_RATE 0x10 /* 2 */ /* Not with raid0! */ +#define CTR_FLAG_MAX_RECOVERY_RATE 0x20 /* 2 */ /* Not with raid0! */ +#define CTR_FLAG_MAX_WRITE_BEHIND 0x40 /* 2 */ /* Only with raid1! */ +#define CTR_FLAG_WRITE_MOSTLY 0x80 /* 2 */ /* Only with raid1! */ +#define CTR_FLAG_STRIPE_CACHE 0x100 /* 2 */ /* Only with raid4/5/6! */ +#define CTR_FLAG_REGION_SIZE 0x200 /* 2 */ /* Not with raid0! */ +#define CTR_FLAG_RAID10_COPIES 0x400 /* 2 */ /* Only with raid10 */ +#define CTR_FLAG_RAID10_FORMAT 0x800 /* 2 */ /* Only with raid10 */ + +/* + * Definitions of various constructor flags to + * be used in checks of valid / invalid flags + * per raid level. + */ +/* Define all any sync flags */ +#define CTR_FLAGS_ANY_SYNC (CTR_FLAG_SYNC | CTR_FLAG_NOSYNC) + +/* Define flags for options without argument (e.g. 'nosync') */ +#define CTR_FLAG_OPTIONS_NO_ARGS CTR_FLAGS_ANY_SYNC + +/* Define flags for options with one argument (e.g. 'delta_disks +2') */ +#define CTR_FLAG_OPTIONS_ONE_ARG (CTR_FLAG_REBUILD | \ + CTR_FLAG_WRITE_MOSTLY | \ + CTR_FLAG_DAEMON_SLEEP | \ + CTR_FLAG_MIN_RECOVERY_RATE | \ + CTR_FLAG_MAX_RECOVERY_RATE | \ + CTR_FLAG_MAX_WRITE_BEHIND | \ + CTR_FLAG_STRIPE_CACHE | \ + CTR_FLAG_REGION_SIZE | \ + CTR_FLAG_RAID10_COPIES | \ + CTR_FLAG_RAID10_FORMAT) + +/* All ctr optional arguments */ +#define ALL_CTR_FLAGS (CTR_FLAG_OPTIONS_NO_ARGS | \ + CTR_FLAG_OPTIONS_ONE_ARG) + +/* Invalid options definitions per raid level... */ + +/* "raid0" does not accept any options */ +#define RAID0_INVALID_FLAGS ALL_CTR_FLAGS + +/* "raid1" does not accept stripe cache or any raid10 options */ +#define RAID1_INVALID_FLAGS (CTR_FLAG_STRIPE_CACHE | \ + CTR_FLAG_RAID10_COPIES | \ + CTR_FLAG_RAID10_FORMAT) + +/* "raid10" does not accept any raid1 or stripe cache options */ +#define RAID10_INVALID_FLAGS (CTR_FLAG_WRITE_MOSTLY | \ + CTR_FLAG_MAX_WRITE_BEHIND | \ + CTR_FLAG_STRIPE_CACHE) +/* + * "raid4/5/6" do not accept any raid1 or raid10 specific options + * + * "raid6" does not accept "nosync", because it is not guaranteed + * that both parity and q-syndrome are being written properly with + * any writes */ -#define CTR_FLAG_SYNC 0x1 -#define CTR_FLAG_NOSYNC 0x2 -#define CTR_FLAG_REBUILD 0x4 -#define CTR_FLAG_DAEMON_SLEEP 0x8 -#define CTR_FLAG_MIN_RECOVERY_RATE 0x10 -#define CTR_FLAG_MAX_RECOVERY_RATE 0x20 -#define CTR_FLAG_MAX_WRITE_BEHIND 0x40 -#define CTR_FLAG_STRIPE_CACHE 0x80 -#define CTR_FLAG_REGION_SIZE 0x100 -#define CTR_FLAG_RAID10_COPIES 0x200 -#define CTR_FLAG_RAID10_FORMAT 0x400 +#define RAID45_INVALID_FLAGS (CTR_FLAG_WRITE_MOSTLY | \ + CTR_FLAG_MAX_WRITE_BEHIND | \ + CTR_FLAG_RAID10_FORMAT | \ + CTR_FLAG_RAID10_COPIES) +#define RAID6_INVALID_FLAGS (CTR_FLAG_NOSYNC | RAID45_INVALID_FLAGS) +/* ...invalid options definitions per raid level */ struct raid_set { struct dm_target *ti; @@ -95,6 +154,162 @@ static struct raid_type { {"raid6_nc", "RAID6 (N continue)", 2, 4, 6, ALGORITHM_ROTATING_N_CONTINUE} }; +/* True, if @v is in inclusive range [@min, @max] */ +static bool _in_range(long v, long min, long max) +{ + return v >= min && v <= max; +} + +/* ctr flag bit manipulation... */ +/* Set single @flag in @flags */ +static void _set_flag(uint32_t flag, uint32_t *flags) +{ + WARN_ON_ONCE(hweight32(flag) != 1); + *flags |= flag; +} + +/* Test single @flag in @flags */ +static bool _test_flag(uint32_t flag, uint32_t flags) +{ + WARN_ON_ONCE(hweight32(flag) != 1); + return (flag & flags) ? true : false; +} + +/* Return true if single @flag is set in @*flags, else set it and return false */ +static bool _test_and_set_flag(uint32_t flag, uint32_t *flags) +{ + if (_test_flag(flag, *flags)) + return true; + + _set_flag(flag, flags); + return false; +} +/* ...ctr and runtime flag bit manipulation */ + +/* All table line arguments are defined here */ +static struct arg_name_flag { + const uint32_t flag; + const char *name; +} _arg_name_flags[] = { + { CTR_FLAG_SYNC, "sync"}, + { CTR_FLAG_NOSYNC, "nosync"}, + { CTR_FLAG_REBUILD, "rebuild"}, + { CTR_FLAG_DAEMON_SLEEP, "daemon_sleep"}, + { CTR_FLAG_MIN_RECOVERY_RATE, "min_recovery_rate"}, + { CTR_FLAG_MAX_RECOVERY_RATE, "max_recovery_rate"}, + { CTR_FLAG_MAX_WRITE_BEHIND, "max_write_behind"}, + { CTR_FLAG_WRITE_MOSTLY, "writemostly"}, + { CTR_FLAG_STRIPE_CACHE, "stripe_cache"}, + { CTR_FLAG_REGION_SIZE, "region_size"}, + { CTR_FLAG_RAID10_COPIES, "raid10_copies"}, + { CTR_FLAG_RAID10_FORMAT, "raid10_format"}, +}; + +/* Return argument name string for given @flag */ +static const char *_argname_by_flag(const uint32_t flag) +{ + if (hweight32(flag) == 1) { + struct arg_name_flag *anf = _arg_name_flags + ARRAY_SIZE(_arg_name_flags); + + while (anf-- > _arg_name_flags) + if (_test_flag(flag, anf->flag)) + return anf->name; + + } else + DMERR("%s called with more than one flag!", __func__); + + return NULL; +} + +/* + * bool helpers to test for various raid levels of a raid type + */ + +/* Return true, if raid type in @rt is raid0 */ +static bool rt_is_raid0(struct raid_type *rt) +{ + return !rt->level; +} + +/* Return true, if raid type in @rt is raid1 */ +static bool rt_is_raid1(struct raid_type *rt) +{ + return rt->level == 1; +} + +/* Return true, if raid type in @rt is raid10 */ +static bool rt_is_raid10(struct raid_type *rt) +{ + return rt->level == 10; +} + +/* Return true, if raid type in @rt is raid4/5 */ +static bool rt_is_raid45(struct raid_type *rt) +{ + return _in_range(rt->level, 4, 5); +} + +/* Return true, if raid type in @rt is raid6 */ +static bool rt_is_raid6(struct raid_type *rt) +{ + return rt->level == 6; +} +/* END: raid level bools */ + +/* + * Convenience functions to set ti->error to @errmsg and + * return @r in order to shorten code in a lot of places + */ +static int ti_error_ret(struct dm_target *ti, const char *errmsg, int r) +{ + ti->error = (char *) errmsg; + return r; +} + +static int ti_error_einval(struct dm_target *ti, const char *errmsg) +{ + return ti_error_ret(ti, errmsg, -EINVAL); +} +/* END: convenience functions to set ti->error to @errmsg... */ + +/* Return invalid ctr flags for the raid level of @rs */ +static uint32_t _invalid_flags(struct raid_set *rs) +{ + if (rt_is_raid0(rs->raid_type)) + return RAID0_INVALID_FLAGS; + else if (rt_is_raid1(rs->raid_type)) + return RAID1_INVALID_FLAGS; + else if (rt_is_raid10(rs->raid_type)) + return RAID10_INVALID_FLAGS; + else if (rt_is_raid45(rs->raid_type)) + return RAID45_INVALID_FLAGS; + else if (rt_is_raid6(rs->raid_type)) + return RAID6_INVALID_FLAGS; + + return ~0; +} + +/* + * Check for any invalid flags set on @rs defined by bitset @invalid_flags + * + * Has to be called after parsing of the ctr flags! + */ +static int rs_check_for_invalid_flags(struct raid_set *rs) +{ + unsigned int ctr_flags = rs->ctr_flags, flag = 0; + const uint32_t invalid_flags = _invalid_flags(rs); + + while ((ctr_flags &= ~flag)) { + flag = 1 << __ffs(ctr_flags); + + if (_test_flag(flag, rs->ctr_flags) && + _test_flag(flag, invalid_flags)) + return ti_error_einval(rs->ti, "Invalid flag combined"); + } + + return 0; +} + static char *raid10_md_layout_to_format(int layout) { /* @@ -135,7 +350,7 @@ static int raid10_format_to_md_layout(char *format, unsigned copies) return (f << 8) | n; } -static struct raid_type *get_raid_type(char *name) +static struct raid_type *get_raid_type(const char *name) { int i; @@ -151,16 +366,12 @@ static struct raid_set *context_alloc(struct dm_target *ti, struct raid_type *ra unsigned i; struct raid_set *rs; - if (raid_devs <= raid_type->parity_devs) { - ti->error = "Insufficient number of devices"; - return ERR_PTR(-EINVAL); - } + if (raid_devs <= raid_type->parity_devs) + return ERR_PTR(ti_error_einval(ti, "Insufficient number of devices")); rs = kzalloc(sizeof(*rs) + raid_devs * sizeof(rs->dev[0]), GFP_KERNEL); - if (!rs) { - ti->error = "Cannot allocate raid context"; - return ERR_PTR(-ENOMEM); - } + if (!rs) + return ERR_PTR(ti_error_ret(ti, "Cannot allocate raid context", -ENOMEM)); mddev_init(&rs->md); @@ -220,14 +431,20 @@ static void context_free(struct raid_set *rs) * This code parses those words. If there is a failure, * the caller must use context_free to unwind the operations. */ -static int dev_parms(struct raid_set *rs, char **argv) +static int parse_dev_params(struct raid_set *rs, struct dm_arg_set *as) { int i; int rebuild = 0; int metadata_available = 0; - int ret = 0; + int r = 0; + const char *arg; + + /* Put off the number of raid devices argument to get to dev pairs */ + arg = dm_shift_arg(as); + if (!arg) + return -EINVAL; - for (i = 0; i < rs->md.raid_disks; i++, argv += 2) { + for (i = 0; i < rs->md.raid_disks; i++) { rs->dev[i].rdev.raid_disk = i; rs->dev[i].meta_dev = NULL; @@ -240,40 +457,42 @@ static int dev_parms(struct raid_set *rs, char **argv) rs->dev[i].rdev.data_offset = 0; rs->dev[i].rdev.mddev = &rs->md; - if (strcmp(argv[0], "-")) { - ret = dm_get_device(rs->ti, argv[0], + arg = dm_shift_arg(as); + if (!arg) + return -EINVAL; + + if (strcmp(arg, "-")) { + r = dm_get_device(rs->ti, arg, dm_table_get_mode(rs->ti->table), &rs->dev[i].meta_dev); - rs->ti->error = "RAID metadata device lookup failure"; - if (ret) - return ret; + if (r) + return ti_error_ret(rs->ti, "RAID metadata device lookup failure", r); rs->dev[i].rdev.sb_page = alloc_page(GFP_KERNEL); if (!rs->dev[i].rdev.sb_page) - return -ENOMEM; + return ti_error_ret(rs->ti, "Failed to allocate superblock page", -ENOMEM); } - if (!strcmp(argv[1], "-")) { + arg = dm_shift_arg(as); + if (!arg) + return -EINVAL; + + if (!strcmp(arg, "-")) { if (!test_bit(In_sync, &rs->dev[i].rdev.flags) && - (!rs->dev[i].rdev.recovery_offset)) { - rs->ti->error = "Drive designated for rebuild not specified"; - return -EINVAL; - } + (!rs->dev[i].rdev.recovery_offset)) + return ti_error_einval(rs->ti, "Drive designated for rebuild not specified"); - rs->ti->error = "No data device supplied with metadata device"; if (rs->dev[i].meta_dev) - return -EINVAL; + return ti_error_einval(rs->ti, "No data device supplied with metadata device"); continue; } - ret = dm_get_device(rs->ti, argv[1], + r = dm_get_device(rs->ti, arg, dm_table_get_mode(rs->ti->table), &rs->dev[i].data_dev); - if (ret) { - rs->ti->error = "RAID device lookup failure"; - return ret; - } + if (r) + return ti_error_ret(rs->ti, "RAID device lookup failure", r); if (rs->dev[i].meta_dev) { metadata_available = 1; @@ -302,8 +521,7 @@ static int dev_parms(struct raid_set *rs, char **argv) * User could specify 'nosync' option if desperate. */ DMERR("Unable to rebuild drive while array is not in-sync"); - rs->ti->error = "RAID device lookup failure"; - return -EINVAL; + return ti_error_einval(rs->ti, "Unable to rebuild drive while array is not in-sync"); } return 0; @@ -340,27 +558,20 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size) /* * Validate user-supplied value. */ - if (region_size > rs->ti->len) { - rs->ti->error = "Supplied region size is too large"; - return -EINVAL; - } + if (region_size > rs->ti->len) + return ti_error_einval(rs->ti, "Supplied region size is too large"); if (region_size < min_region_size) { DMERR("Supplied region_size (%lu sectors) below minimum (%lu)", region_size, min_region_size); - rs->ti->error = "Supplied region size is too small"; - return -EINVAL; + return ti_error_einval(rs->ti, "Supplied region size is too small"); } - if (!is_power_of_2(region_size)) { - rs->ti->error = "Region size is not a power of 2"; - return -EINVAL; - } + if (!is_power_of_2(region_size)) + return ti_error_einval(rs->ti, "Region size is not a power of 2"); - if (region_size < rs->md.chunk_sectors) { - rs->ti->error = "Region size is smaller than the chunk size"; - return -EINVAL; - } + if (region_size < rs->md.chunk_sectors) + return ti_error_einval(rs->ti, "Region size is smaller than the chunk size"); } /* @@ -492,39 +703,38 @@ too_many: * [raid10_copies <# copies>] Number of copies. (Default: 2) * [raid10_format ] Layout algorithm. (Default: near) */ -static int parse_raid_params(struct raid_set *rs, char **argv, +static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as, unsigned num_raid_params) { char *raid10_format = "near"; unsigned raid10_copies = 2; unsigned i; - unsigned long value, region_size = 0; + unsigned value, region_size = 0; sector_t sectors_per_dev = rs->ti->len; sector_t max_io_len; - char *key; + const char *arg, *key; + struct raid_dev *rd; + + arg = dm_shift_arg(as); + num_raid_params--; /* Account for chunk_size argument */ + + if (kstrtouint(arg, 10, &value) < 0) + return ti_error_einval(rs->ti, "Bad numerical argument given for chunk_size"); /* * First, parse the in-order required arguments * "chunk_size" is the only argument of this type. */ - if ((kstrtoul(argv[0], 10, &value) < 0)) { - rs->ti->error = "Bad chunk size"; - return -EINVAL; - } else if (rs->raid_type->level == 1) { + if (rs->raid_type->level == 1) { if (value) DMERR("Ignoring chunk size parameter for RAID 1"); value = 0; - } else if (!is_power_of_2(value)) { - rs->ti->error = "Chunk size must be a power of 2"; - return -EINVAL; - } else if (value < 8) { - rs->ti->error = "Chunk size value is too small"; - return -EINVAL; - } + } else if (!is_power_of_2(value)) + return ti_error_einval(rs->ti, "Chunk size must be a power of 2"); + else if (value < 8) + return ti_error_einval(rs->ti, "Chunk size value is too small"); rs->md.new_chunk_sectors = rs->md.chunk_sectors = value; - argv++; - num_raid_params--; /* * We set each individual device as In_sync with a completed @@ -552,137 +762,135 @@ static int parse_raid_params(struct raid_set *rs, char **argv, * Second, parse the unordered optional arguments */ for (i = 0; i < num_raid_params; i++) { - if (!strcasecmp(argv[i], "nosync")) { + arg = dm_shift_arg(as); + if (!arg) + return ti_error_einval(rs->ti, "Not enough raid parameters given"); + + if (!strcasecmp(arg, "nosync")) { rs->md.recovery_cp = MaxSector; - rs->ctr_flags |= CTR_FLAG_NOSYNC; + _set_flag(CTR_FLAG_NOSYNC, &rs->ctr_flags); continue; } - if (!strcasecmp(argv[i], "sync")) { + if (!strcasecmp(arg, "sync")) { rs->md.recovery_cp = 0; - rs->ctr_flags |= CTR_FLAG_SYNC; + _set_flag(CTR_FLAG_SYNC, &rs->ctr_flags); continue; } - /* The rest of the optional arguments come in key/value pairs */ - if ((i + 1) >= num_raid_params) { - rs->ti->error = "Wrong number of raid parameters given"; - return -EINVAL; - } + key = arg; + arg = dm_shift_arg(as); + i++; /* Account for the argument pairs */ + if (!arg) + return ti_error_einval(rs->ti, "Wrong number of raid parameters given"); - key = argv[i++]; + /* + * Parameters that take a string value are checked here. + */ - /* Parameters that take a string value are checked here. */ - if (!strcasecmp(key, "raid10_format")) { - if (rs->raid_type->level != 10) { - rs->ti->error = "'raid10_format' is an invalid parameter for this RAID type"; - return -EINVAL; - } - if (strcmp("near", argv[i]) && - strcmp("far", argv[i]) && - strcmp("offset", argv[i])) { - rs->ti->error = "Invalid 'raid10_format' value given"; - return -EINVAL; - } - raid10_format = argv[i]; - rs->ctr_flags |= CTR_FLAG_RAID10_FORMAT; + if (!strcasecmp(key, _argname_by_flag(CTR_FLAG_RAID10_FORMAT))) { + if (_test_and_set_flag(CTR_FLAG_RAID10_FORMAT, &rs->ctr_flags)) + return ti_error_einval(rs->ti, "Only one raid10_format argument pair allowed"); + if (rs->raid_type->level != 10) + return ti_error_einval(rs->ti, "'raid10_format' is an invalid parameter for this RAID type"); + if (strcmp("near", arg) && + strcmp("far", arg) && + strcmp("offset", arg)) + return ti_error_einval(rs->ti, "Invalid 'raid10_format' value given"); + + raid10_format = (char *) arg; continue; } - if (kstrtoul(argv[i], 10, &value) < 0) { - rs->ti->error = "Bad numerical argument given in raid params"; - return -EINVAL; - } + if (kstrtouint(arg, 10, &value) < 0) + return ti_error_einval(rs->ti, "Bad numerical argument given in raid params"); + + if (!strcasecmp(key, _argname_by_flag(CTR_FLAG_REBUILD))) { + /* + * "rebuild" is being passed in by userspace to provide + * indexes of replaced devices and to set up additional + * devices on raid level takeover. + */ + if (!_in_range(value, 0, rs->md.raid_disks - 1)) + return ti_error_einval(rs->ti, "Invalid rebuild index given"); + + rd = rs->dev + value; + clear_bit(In_sync, &rd->rdev.flags); + clear_bit(Faulty, &rd->rdev.flags); + rd->rdev.recovery_offset = 0; + _set_flag(CTR_FLAG_REBUILD, &rs->ctr_flags); + } else if (!strcasecmp(key, _argname_by_flag(CTR_FLAG_WRITE_MOSTLY))) { + if (rs->raid_type->level != 1) + return ti_error_einval(rs->ti, "write_mostly option is only valid for RAID1"); + + if (!_in_range(value, 0, rs->md.raid_disks - 1)) + return ti_error_einval(rs->ti, "Invalid write_mostly index given"); - /* Parameters that take a numeric value are checked here */ - if (!strcasecmp(key, "rebuild")) { - if (value >= rs->md.raid_disks) { - rs->ti->error = "Invalid rebuild index given"; - return -EINVAL; - } - clear_bit(In_sync, &rs->dev[value].rdev.flags); - rs->dev[value].rdev.recovery_offset = 0; - rs->ctr_flags |= CTR_FLAG_REBUILD; - } else if (!strcasecmp(key, "write_mostly")) { - if (rs->raid_type->level != 1) { - rs->ti->error = "write_mostly option is only valid for RAID1"; - return -EINVAL; - } - if (value >= rs->md.raid_disks) { - rs->ti->error = "Invalid write_mostly drive index given"; - return -EINVAL; - } set_bit(WriteMostly, &rs->dev[value].rdev.flags); - } else if (!strcasecmp(key, "max_write_behind")) { - if (rs->raid_type->level != 1) { - rs->ti->error = "max_write_behind option is only valid for RAID1"; - return -EINVAL; - } - rs->ctr_flags |= CTR_FLAG_MAX_WRITE_BEHIND; + _set_flag(CTR_FLAG_WRITE_MOSTLY, &rs->ctr_flags); + } else if (!strcasecmp(key, _argname_by_flag(CTR_FLAG_MAX_WRITE_BEHIND))) { + if (rs->raid_type->level != 1) + return ti_error_einval(rs->ti, "max_write_behind option is only valid for RAID1"); + + if (_test_and_set_flag(CTR_FLAG_MAX_WRITE_BEHIND, &rs->ctr_flags)) + return ti_error_einval(rs->ti, "Only one max_write_behind argument pair allowed"); /* * In device-mapper, we specify things in sectors, but * MD records this value in kB */ value /= 2; - if (value > COUNTER_MAX) { - rs->ti->error = "Max write-behind limit out of range"; - return -EINVAL; - } + if (value > COUNTER_MAX) + return ti_error_einval(rs->ti, "Max write-behind limit out of range"); + rs->md.bitmap_info.max_write_behind = value; - } else if (!strcasecmp(key, "daemon_sleep")) { - rs->ctr_flags |= CTR_FLAG_DAEMON_SLEEP; - if (!value || (value > MAX_SCHEDULE_TIMEOUT)) { - rs->ti->error = "daemon sleep period out of range"; - return -EINVAL; - } + } else if (!strcasecmp(key, _argname_by_flag(CTR_FLAG_DAEMON_SLEEP))) { + if (_test_and_set_flag(CTR_FLAG_DAEMON_SLEEP, &rs->ctr_flags)) + return ti_error_einval(rs->ti, "Only one daemon_sleep argument pair allowed"); + if (!value || (value > MAX_SCHEDULE_TIMEOUT)) + return ti_error_einval(rs->ti, "daemon sleep period out of range"); rs->md.bitmap_info.daemon_sleep = value; - } else if (!strcasecmp(key, "stripe_cache")) { - rs->ctr_flags |= CTR_FLAG_STRIPE_CACHE; - + } else if (!strcasecmp(key, _argname_by_flag(CTR_FLAG_STRIPE_CACHE))) { + if (_test_and_set_flag(CTR_FLAG_STRIPE_CACHE, &rs->ctr_flags)) + return ti_error_einval(rs->ti, "Only one stripe_cache argument pair allowed"); /* * In device-mapper, we specify things in sectors, but * MD records this value in kB */ value /= 2; - if ((rs->raid_type->level != 5) && - (rs->raid_type->level != 6)) { - rs->ti->error = "Inappropriate argument: stripe_cache"; - return -EINVAL; - } - if (raid5_set_cache_size(&rs->md, (int)value)) { - rs->ti->error = "Bad stripe_cache size"; - return -EINVAL; - } - } else if (!strcasecmp(key, "min_recovery_rate")) { - rs->ctr_flags |= CTR_FLAG_MIN_RECOVERY_RATE; - if (value > INT_MAX) { - rs->ti->error = "min_recovery_rate out of range"; - return -EINVAL; - } + if (!_in_range(rs->raid_type->level, 4, 6)) + return ti_error_einval(rs->ti, "Inappropriate argument: stripe_cache"); + if (raid5_set_cache_size(&rs->md, (int)value)) + return ti_error_einval(rs->ti, "Bad stripe_cache size"); + + } else if (!strcasecmp(key, _argname_by_flag(CTR_FLAG_MIN_RECOVERY_RATE))) { + if (_test_and_set_flag(CTR_FLAG_MIN_RECOVERY_RATE, &rs->ctr_flags)) + return ti_error_einval(rs->ti, "Only one min_recovery_rate argument pair allowed"); + if (value > INT_MAX) + return ti_error_einval(rs->ti, "min_recovery_rate out of range"); rs->md.sync_speed_min = (int)value; - } else if (!strcasecmp(key, "max_recovery_rate")) { - rs->ctr_flags |= CTR_FLAG_MAX_RECOVERY_RATE; - if (value > INT_MAX) { - rs->ti->error = "max_recovery_rate out of range"; - return -EINVAL; - } + } else if (!strcasecmp(key, _argname_by_flag(CTR_FLAG_MAX_RECOVERY_RATE))) { + if (_test_and_set_flag(CTR_FLAG_MIN_RECOVERY_RATE, &rs->ctr_flags)) + return ti_error_einval(rs->ti, "Only one max_recovery_rate argument pair allowed"); + if (value > INT_MAX) + return ti_error_einval(rs->ti, "max_recovery_rate out of range"); rs->md.sync_speed_max = (int)value; - } else if (!strcasecmp(key, "region_size")) { - rs->ctr_flags |= CTR_FLAG_REGION_SIZE; + } else if (!strcasecmp(key, _argname_by_flag(CTR_FLAG_REGION_SIZE))) { + if (_test_and_set_flag(CTR_FLAG_REGION_SIZE, &rs->ctr_flags)) + return ti_error_einval(rs->ti, "Only one region_size argument pair allowed"); + region_size = value; - } else if (!strcasecmp(key, "raid10_copies") && - (rs->raid_type->level == 10)) { - if ((value < 2) || (value > 0xFF)) { - rs->ti->error = "Bad value for 'raid10_copies'"; - return -EINVAL; - } - rs->ctr_flags |= CTR_FLAG_RAID10_COPIES; + } else if (!strcasecmp(key, _argname_by_flag(CTR_FLAG_RAID10_COPIES))) { + if (_test_and_set_flag(CTR_FLAG_RAID10_COPIES, &rs->ctr_flags)) + return ti_error_einval(rs->ti, "Only one raid10_copies argument pair allowed"); + + if (!_in_range(value, 2, rs->md.raid_disks)) + return ti_error_einval(rs->ti, "Bad value for 'raid10_copies'"); + raid10_copies = value; } else { DMERR("Unable to parse RAID parameter: %s", key); - rs->ti->error = "Unable to parse RAID parameters"; - return -EINVAL; + return ti_error_einval(rs->ti, "Unable to parse RAID parameters"); } } @@ -698,19 +906,15 @@ static int parse_raid_params(struct raid_set *rs, char **argv, return -EINVAL; if (rs->raid_type->level == 10) { - if (raid10_copies > rs->md.raid_disks) { - rs->ti->error = "Not enough devices to satisfy specification"; - return -EINVAL; - } + if (raid10_copies > rs->md.raid_disks) + return ti_error_einval(rs->ti, "Not enough devices to satisfy specification"); /* * If the format is not "near", we only support * two copies at the moment. */ - if (strcmp("near", raid10_format) && (raid10_copies > 2)) { - rs->ti->error = "Too many copies for given RAID10 format."; - return -EINVAL; - } + if (strcmp("near", raid10_format) && (raid10_copies > 2)) + return ti_error_einval(rs->ti, "Too many copies for given RAID10 format."); /* (Len * #mirrors) / #devices */ sectors_per_dev = rs->ti->len * raid10_copies; @@ -721,17 +925,17 @@ static int parse_raid_params(struct raid_set *rs, char **argv, rs->md.new_layout = rs->md.layout; } else if ((!rs->raid_type->level || rs->raid_type->level > 1) && sector_div(sectors_per_dev, - (rs->md.raid_disks - rs->raid_type->parity_devs))) { - rs->ti->error = "Target length not divisible by number of data devices"; - return -EINVAL; - } + (rs->md.raid_disks - rs->raid_type->parity_devs))) + return ti_error_einval(rs->ti, "Target length not divisible by number of data devices"); + rs->md.dev_sectors = sectors_per_dev; /* Assume there are no metadata devices until the drives are parsed */ rs->md.persistent = 0; rs->md.external = 1; - return 0; + /* Check, if any invalid ctr arguments have been passed in for the raid level */ + return rs_check_for_invalid_flags(rs); } static void do_table_event(struct work_struct *ws) @@ -792,7 +996,7 @@ static int read_disk_sb(struct md_rdev *rdev, int size) if (rdev->sb_loaded) return 0; - if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, 1)) { + if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, 1)) { DMERR("Failed to read superblock of device at position %d", rdev->raid_disk); md_error(rdev->mddev, rdev); @@ -848,7 +1052,7 @@ static void super_sync(struct mddev *mddev, struct md_rdev *rdev) */ static int super_load(struct md_rdev *rdev, struct md_rdev *refdev) { - int ret; + int r; struct dm_raid_superblock *sb; struct dm_raid_superblock *refsb; uint64_t events_sb, events_refsb; @@ -860,9 +1064,9 @@ static int super_load(struct md_rdev *rdev, struct md_rdev *refdev) return -EINVAL; } - ret = read_disk_sb(rdev, rdev->sb_size); - if (ret) - return ret; + r = read_disk_sb(rdev, rdev->sb_size); + if (r) + return r; sb = page_address(rdev->sb_page); @@ -1004,11 +1208,9 @@ static int super_init_validation(struct mddev *mddev, struct md_rdev *rdev) if (!test_bit(FirstUse, &r->flags) && (r->raid_disk >= 0)) { role = le32_to_cpu(sb2->array_position); if (role != r->raid_disk) { - if (rs->raid_type->level != 1) { - rs->ti->error = "Cannot change device " - "positions in RAID array"; - return -EINVAL; - } + if (rs->raid_type->level != 1) + return ti_error_einval(rs->ti, "Cannot change device " + "positions in RAID array"); DMINFO("RAID1 device #%d now at position #%d", role, r->raid_disk); } @@ -1072,7 +1274,7 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev) */ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) { - int ret; + int r; struct raid_dev *dev; struct md_rdev *rdev, *tmp, *freshest; struct mddev *mddev = &rs->md; @@ -1097,9 +1299,9 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) if (!rdev->meta_bdev) continue; - ret = super_load(rdev, freshest); + r = super_load(rdev, freshest); - switch (ret) { + switch (r) { case 1: freshest = rdev; break; @@ -1139,18 +1341,15 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) if (!freshest) return 0; - if (validate_raid_redundancy(rs)) { - rs->ti->error = "Insufficient redundancy to activate array"; - return -EINVAL; - } + if (validate_raid_redundancy(rs)) + return ti_error_einval(rs->ti, "Insufficient redundancy to activate array"); /* * Validation of the freshest device provides the source of * validation for the remaining devices. */ - ti->error = "Unable to assemble array: Invalid superblocks"; if (super_validate(rs, freshest)) - return -EINVAL; + return ti_error_einval(rs->ti, "Unable to assemble array: Invalid superblocks"); rdev_for_each(rdev, mddev) if ((rdev != freshest) && super_validate(rs, rdev)) @@ -1207,79 +1406,69 @@ static void configure_discard_support(struct dm_target *ti, struct raid_set *rs) } /* - * Construct a RAID4/5/6 mapping: + * Construct a RAID0/1/10/4/5/6 mapping: * Args: - * <#raid_params> \ - * <#raid_devs> { .. } + * <#raid_params> {0,} \ + * <#raid_devs> [ ]{1,} * * varies by . See 'parse_raid_params' for * details on possible . + * + * Userspace is free to initialize the metadata devices, hence the superblocks to + * enforce recreation based on the passed in table parameters. + * */ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) { - int ret; + int r; struct raid_type *rt; - unsigned long num_raid_params, num_raid_devs; + unsigned num_raid_params, num_raid_devs; struct raid_set *rs = NULL; - - /* Must have at least <#raid_params> */ - if (argc < 2) { - ti->error = "Too few arguments"; - return -EINVAL; - } - - /* raid type */ - rt = get_raid_type(argv[0]); - if (!rt) { - ti->error = "Unrecognised raid_type"; - return -EINVAL; - } - argc--; - argv++; - - /* number of RAID parameters */ - if (kstrtoul(argv[0], 10, &num_raid_params) < 0) { - ti->error = "Cannot understand number of RAID parameters"; - return -EINVAL; - } - argc--; - argv++; - - /* Skip over RAID params for now and find out # of devices */ - if (num_raid_params >= argc) { - ti->error = "Arguments do not agree with counts given"; - return -EINVAL; - } - - if ((kstrtoul(argv[num_raid_params], 10, &num_raid_devs) < 0) || - (num_raid_devs > MAX_RAID_DEVICES)) { - ti->error = "Cannot understand number of raid devices"; - return -EINVAL; - } - - argc -= num_raid_params + 1; /* +1: we already have num_raid_devs */ - if (argc != (num_raid_devs * 2)) { - ti->error = "Supplied RAID devices does not match the count given"; - return -EINVAL; - } - - rs = context_alloc(ti, rt, (unsigned)num_raid_devs); + const char *arg; + struct dm_arg_set as = { argc, argv }, as_nrd; + struct dm_arg _args[] = { + { 0, as.argc, "Cannot understand number of raid parameters" }, + { 1, 254, "Cannot understand number of raid devices parameters" } + }; + + /* Must have */ + arg = dm_shift_arg(&as); + if (!arg) + return ti_error_einval(rs->ti, "No arguments"); + + rt = get_raid_type(arg); + if (!rt) + return ti_error_einval(rs->ti, "Unrecognised raid_type"); + + /* Must have <#raid_params> */ + if (dm_read_arg_group(_args, &as, &num_raid_params, &ti->error)) + return -EINVAL; + + /* number of raid device tupples */ + as_nrd = as; + dm_consume_args(&as_nrd, num_raid_params); + _args[1].max = (as_nrd.argc - 1) / 2; + if (dm_read_arg(_args + 1, &as_nrd, &num_raid_devs, &ti->error)) + return -EINVAL; + + if (!_in_range(num_raid_devs, 1, MAX_RAID_DEVICES)) + return ti_error_einval(rs->ti, "Invalid number of supplied raid devices"); + + rs = context_alloc(ti, rt, num_raid_devs); if (IS_ERR(rs)) return PTR_ERR(rs); - ret = parse_raid_params(rs, argv, (unsigned)num_raid_params); - if (ret) + r = parse_raid_params(rs, &as, num_raid_params); + if (r) goto bad; - argv += num_raid_params + 1; - - ret = dev_parms(rs, argv); - if (ret) + r = parse_dev_params(rs, &as); + if (r) goto bad; rs->md.sync_super = super_sync; - ret = analyse_superblocks(ti, rs); - if (ret) + r = analyse_superblocks(ti, rs); + if (r) goto bad; INIT_WORK(&rs->md.event_work, do_table_event); @@ -1293,18 +1482,17 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) /* Has to be held on running the array */ mddev_lock_nointr(&rs->md); - ret = md_run(&rs->md); + r = md_run(&rs->md); rs->md.in_sync = 0; /* Assume already marked dirty */ mddev_unlock(&rs->md); - if (ret) { + if (r) { ti->error = "Fail to run raid array"; goto bad; } if (ti->len != rs->md.array_sectors) { - ti->error = "Array size does not match requested target length"; - ret = -EINVAL; + r = ti_error_einval(ti, "Array size does not match requested target length"); goto size_mismatch; } rs->callbacks.congested_fn = raid_is_congested; @@ -1318,7 +1506,7 @@ size_mismatch: bad: context_free(rs); - return ret; + return r; } static void raid_dtr(struct dm_target *ti) @@ -1603,17 +1791,17 @@ static int raid_iterate_devices(struct dm_target *ti, { struct raid_set *rs = ti->private; unsigned i; - int ret = 0; + int r = 0; - for (i = 0; !ret && i < rs->md.raid_disks; i++) + for (i = 0; !r && i < rs->md.raid_disks; i++) if (rs->dev[i].data_dev) - ret = fn(ti, + r = fn(ti, rs->dev[i].data_dev, 0, /* No offset on data devs */ rs->md.dev_sectors, data); - return ret; + return r; } static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits) @@ -1651,7 +1839,8 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs) for (i = 0; i < rs->md.raid_disks; i++) { r = &rs->dev[i].rdev; if (test_bit(Faulty, &r->flags) && r->sb_page && - sync_page_io(r, 0, r->sb_size, r->sb_page, READ, 1)) { + sync_page_io(r, 0, r->sb_size, r->sb_page, REQ_OP_READ, 0, + 1)) { DMINFO("Faulty %s device #%d has readable super block." " Attempting to revive it.", rs->raid_type->name, i); @@ -1723,7 +1912,7 @@ static void raid_resume(struct dm_target *ti) static struct target_type raid_target = { .name = "raid", - .version = {1, 8, 0}, + .version = {1, 8, 1}, .module = THIS_MODULE, .ctr = raid_ctr, .dtr = raid_dtr,