Support for DISCARD and WRITE_ZEROES in the ubd driver using
fallocate.
DISCARD is enabled by default and can be disabled using a new
UBD command line flag.
If the underlying fs on which the UBD image is stored does not
support DISCARD the support for both DISCARD and WRITE_ZEROES
is turned off.
Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
struct openflags openflags;
unsigned shared:1;
unsigned no_cow:1;
struct openflags openflags;
unsigned shared:1;
unsigned no_cow:1;
struct cow cow;
struct platform_device pdev;
struct request_queue *queue;
struct cow cow;
struct platform_device pdev;
struct request_queue *queue;
.boot_openflags = OPEN_FLAGS, \
.openflags = OPEN_FLAGS, \
.no_cow = 0, \
.boot_openflags = OPEN_FLAGS, \
.openflags = OPEN_FLAGS, \
.no_cow = 0, \
.shared = 0, \
.cow = DEFAULT_COW, \
.lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
.shared = 0, \
.cow = DEFAULT_COW, \
.lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
*index_out = n;
err = -EINVAL;
*index_out = n;
err = -EINVAL;
- for (i = 0; i < sizeof("rscd="); i++) {
+ for (i = 0; i < sizeof("rscdt="); i++) {
switch (*str) {
case 'r':
flags.w = 0;
switch (*str) {
case 'r':
flags.w = 0;
case 'c':
ubd_dev->shared = 1;
break;
case 'c':
ubd_dev->shared = 1;
break;
+ case 't':
+ ubd_dev->no_trim = 1;
+ break;
case '=':
str++;
goto break_loop;
default:
*error_out = "Expected '=' or flag letter "
case '=':
str++;
goto break_loop;
default:
*error_out = "Expected '=' or flag letter "
" 'c' will cause the device to be treated as being shared between multiple\n"
" UMLs and file locking will be turned off - this is appropriate for a\n"
" cluster filesystem and inappropriate at almost all other times.\n\n"
" 'c' will cause the device to be treated as being shared between multiple\n"
" UMLs and file locking will be turned off - this is appropriate for a\n"
" cluster filesystem and inappropriate at almost all other times.\n\n"
+" 't' will disable trim/discard support on the device (enabled by default).\n\n"
);
static int udb_setup(char *str)
);
static int udb_setup(char *str)
for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
struct io_thread_req *io_req = (*irq_req_buffer)[count];
for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
struct io_thread_req *io_req = (*irq_req_buffer)[count];
- if (!blk_update_request(io_req->req, io_req->error, io_req->length))
- __blk_mq_end_request(io_req->req, io_req->error);
-
+ if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
+ blk_queue_max_discard_sectors(io_req->req->q, 0);
+ blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
+ blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q);
+ }
+ if ((io_req->error) || (io_req->buffer == NULL))
+ blk_mq_end_request(io_req->req, io_req->error);
+ else {
+ if (!blk_update_request(io_req->req, io_req->error, io_req->length))
+ __blk_mq_end_request(io_req->req, io_req->error);
+ }
if(err < 0) goto error;
ubd_dev->cow.fd = err;
}
if(err < 0) goto error;
ubd_dev->cow.fd = err;
}
+ if (ubd_dev->no_trim == 0) {
+ ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
+ ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE;
+ blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
+ blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue);
+ }
blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
return 0;
error:
blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
return 0;
error:
case REQ_OP_WRITE:
ret = queue_rw_req(hctx, req);
break;
case REQ_OP_WRITE:
ret = queue_rw_req(hctx, req);
break;
+ case REQ_OP_DISCARD:
+ case REQ_OP_WRITE_ZEROES:
+ ret = ubd_queue_one_vec(hctx, req, (u64)blk_rq_pos(req) << 9, NULL);
+ break;
default:
WARN_ON_ONCE(1);
res = BLK_STS_NOTSUPP;
default:
WARN_ON_ONCE(1);
res = BLK_STS_NOTSUPP;
n = os_pwrite_file(req->fds[1], &req->bitmap_words,
sizeof(req->bitmap_words), req->cow_offset);
n = os_pwrite_file(req->fds[1], &req->bitmap_words,
sizeof(req->bitmap_words), req->cow_offset);
- if(n != sizeof(req->bitmap_words))
+ if (n != sizeof(req->bitmap_words))
return map_error(-n);
return map_error(0);
return map_error(-n);
return map_error(0);
static void do_io(struct io_thread_req *req)
{
static void do_io(struct io_thread_req *req)
{
unsigned long len;
int n, nsectors, start, end, bit;
__u64 off;
unsigned long len;
int n, nsectors, start, end, bit;
__u64 off;
+ /* FLUSH is really a special case, we cannot "case" it with others */
+
if (req_op(req->req) == REQ_OP_FLUSH) {
/* fds[0] is always either the rw image or our cow file */
req->error = map_error(-os_sync_file(req->fds[0]));
if (req_op(req->req) == REQ_OP_FLUSH) {
/* fds[0] is always either the rw image or our cow file */
req->error = map_error(-os_sync_file(req->fds[0]));
off = req->offset + req->offsets[bit] +
start * req->sectorsize;
len = (end - start) * req->sectorsize;
off = req->offset + req->offsets[bit] +
start * req->sectorsize;
len = (end - start) * req->sectorsize;
- buf = &req->buffer[start * req->sectorsize];
+ if (req->buffer != NULL)
+ buf = &req->buffer[start * req->sectorsize];
- if (req_op(req->req) == REQ_OP_READ) {
+ switch (req_op(req->req)) {
+ case REQ_OP_READ:
n = 0;
do {
buf = &buf[n];
len -= n;
n = os_pread_file(req->fds[bit], buf, len, off);
n = 0;
do {
buf = &buf[n];
len -= n;
n = os_pread_file(req->fds[bit], buf, len, off);
req->error = map_error(-n);
return;
}
} while((n < len) && (n != 0));
if (n < len) memset(&buf[n], 0, len - n);
req->error = map_error(-n);
return;
}
} while((n < len) && (n != 0));
if (n < len) memset(&buf[n], 0, len - n);
+ break;
+ case REQ_OP_WRITE:
n = os_pwrite_file(req->fds[bit], buf, len, off);
if(n != len){
req->error = map_error(-n);
return;
}
n = os_pwrite_file(req->fds[bit], buf, len, off);
if(n != len){
req->error = map_error(-n);
return;
}
+ break;
+ case REQ_OP_DISCARD:
+ case REQ_OP_WRITE_ZEROES:
+ n = os_falloc_punch(req->fds[bit], off, len);
+ if (n) {
+ req->error = map_error(-n);
+ return;
+ }
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ req->error = BLK_STS_NOTSUPP;
+ return;
extern unsigned os_major(unsigned long long dev);
extern unsigned os_minor(unsigned long long dev);
extern unsigned long long os_makedev(unsigned major, unsigned minor);
extern unsigned os_major(unsigned long long dev);
extern unsigned os_minor(unsigned long long dev);
extern unsigned long long os_makedev(unsigned major, unsigned minor);
+extern int os_falloc_punch(int fd, unsigned long long offset, int count);
/* start_up.c */
extern void os_early_checks(void);
/* start_up.c */
extern void os_early_checks(void);
{
return makedev(major, minor);
}
{
return makedev(major, minor);
}
+
+int os_falloc_punch(int fd, unsigned long long offset, int len)
+{
+ int n = fallocate(fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, offset, len);
+
+ if (n < 0)
+ return -errno;
+ return n;
+}
+