1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Network block device - make block devices work over TCP
5 * Note that you can not swap over this thing, yet. Seems to work but
6 * deadlocks sometimes - you can not swap over TCP in general.
8 * Copyright 1997-2000, 2008 Pavel Machek <pavel@ucw.cz>
9 * Parts copyright 2001 Steven Whitehouse <steve@chygwyn.com>
11 * (part of code stolen from loop.c)
14 #include <linux/major.h>
16 #include <linux/blkdev.h>
17 #include <linux/module.h>
18 #include <linux/init.h>
19 #include <linux/sched.h>
20 #include <linux/sched/mm.h>
22 #include <linux/bio.h>
23 #include <linux/stat.h>
24 #include <linux/errno.h>
25 #include <linux/file.h>
26 #include <linux/ioctl.h>
27 #include <linux/mutex.h>
28 #include <linux/compiler.h>
29 #include <linux/completion.h>
30 #include <linux/err.h>
31 #include <linux/kernel.h>
32 #include <linux/slab.h>
34 #include <linux/net.h>
35 #include <linux/kthread.h>
36 #include <linux/types.h>
37 #include <linux/debugfs.h>
38 #include <linux/blk-mq.h>
40 #include <linux/uaccess.h>
41 #include <asm/types.h>
43 #include <linux/nbd.h>
44 #include <linux/nbd-netlink.h>
45 #include <net/genetlink.h>
47 #define CREATE_TRACE_POINTS
48 #include <trace/events/nbd.h>
50 static DEFINE_IDR(nbd_index_idr);
51 static DEFINE_MUTEX(nbd_index_mutex);
52 static struct workqueue_struct *nbd_del_wq;
53 static int nbd_total_devices = 0;
58 struct request *pending;
65 struct recv_thread_args {
66 struct work_struct work;
67 struct nbd_device *nbd;
71 struct link_dead_args {
72 struct work_struct work;
76 #define NBD_RT_TIMEDOUT 0
77 #define NBD_RT_DISCONNECT_REQUESTED 1
78 #define NBD_RT_DISCONNECTED 2
79 #define NBD_RT_HAS_PID_FILE 3
80 #define NBD_RT_HAS_CONFIG_REF 4
81 #define NBD_RT_BOUND 5
82 #define NBD_RT_DISCONNECT_ON_CLOSE 6
83 #define NBD_RT_HAS_BACKEND_FILE 7
85 #define NBD_DESTROY_ON_DISCONNECT 0
86 #define NBD_DISCONNECT_REQUESTED 1
90 unsigned long runtime_flags;
91 u64 dead_conn_timeout;
93 struct nbd_sock **socks;
95 atomic_t live_connections;
96 wait_queue_head_t conn_wait;
98 atomic_t recv_threads;
99 wait_queue_head_t recv_wq;
102 #if IS_ENABLED(CONFIG_DEBUG_FS)
103 struct dentry *dbg_dir;
108 struct blk_mq_tag_set tag_set;
111 refcount_t config_refs;
113 struct nbd_config *config;
114 struct mutex config_lock;
115 struct gendisk *disk;
116 struct workqueue_struct *recv_workq;
117 struct work_struct remove_work;
119 struct list_head list;
120 struct task_struct *task_recv;
121 struct task_struct *task_setup;
123 struct completion *destroy_complete;
129 #define NBD_CMD_REQUEUED 1
132 struct nbd_device *nbd;
142 #if IS_ENABLED(CONFIG_DEBUG_FS)
143 static struct dentry *nbd_dbg_dir;
146 #define nbd_name(nbd) ((nbd)->disk->disk_name)
148 #define NBD_MAGIC 0x68797548
150 #define NBD_DEF_BLKSIZE 1024
152 static unsigned int nbds_max = 16;
153 static int max_part = 16;
154 static int part_shift;
156 static int nbd_dev_dbg_init(struct nbd_device *nbd);
157 static void nbd_dev_dbg_close(struct nbd_device *nbd);
158 static void nbd_config_put(struct nbd_device *nbd);
159 static void nbd_connect_reply(struct genl_info *info, int index);
160 static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info);
161 static void nbd_dead_link_work(struct work_struct *work);
162 static void nbd_disconnect_and_put(struct nbd_device *nbd);
164 static inline struct device *nbd_to_dev(struct nbd_device *nbd)
166 return disk_to_dev(nbd->disk);
169 static void nbd_requeue_cmd(struct nbd_cmd *cmd)
171 struct request *req = blk_mq_rq_from_pdu(cmd);
173 if (!test_and_set_bit(NBD_CMD_REQUEUED, &cmd->flags))
174 blk_mq_requeue_request(req, true);
177 #define NBD_COOKIE_BITS 32
179 static u64 nbd_cmd_handle(struct nbd_cmd *cmd)
181 struct request *req = blk_mq_rq_from_pdu(cmd);
182 u32 tag = blk_mq_unique_tag(req);
183 u64 cookie = cmd->cmd_cookie;
185 return (cookie << NBD_COOKIE_BITS) | tag;
188 static u32 nbd_handle_to_tag(u64 handle)
193 static u32 nbd_handle_to_cookie(u64 handle)
195 return (u32)(handle >> NBD_COOKIE_BITS);
198 static const char *nbdcmd_to_ascii(int cmd)
201 case NBD_CMD_READ: return "read";
202 case NBD_CMD_WRITE: return "write";
203 case NBD_CMD_DISC: return "disconnect";
204 case NBD_CMD_FLUSH: return "flush";
205 case NBD_CMD_TRIM: return "trim/discard";
210 static ssize_t pid_show(struct device *dev,
211 struct device_attribute *attr, char *buf)
213 struct gendisk *disk = dev_to_disk(dev);
214 struct nbd_device *nbd = (struct nbd_device *)disk->private_data;
216 return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv));
219 static const struct device_attribute pid_attr = {
220 .attr = { .name = "pid", .mode = 0444},
224 static ssize_t backend_show(struct device *dev,
225 struct device_attribute *attr, char *buf)
227 struct gendisk *disk = dev_to_disk(dev);
228 struct nbd_device *nbd = (struct nbd_device *)disk->private_data;
230 return sprintf(buf, "%s\n", nbd->backend ?: "");
233 static const struct device_attribute backend_attr = {
234 .attr = { .name = "backend", .mode = 0444},
235 .show = backend_show,
239 * Place this in the last just before the nbd is freed to
240 * make sure that the disk and the related kobject are also
241 * totally removed to avoid duplicate creation of the same
244 static void nbd_notify_destroy_completion(struct nbd_device *nbd)
246 if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) &&
247 nbd->destroy_complete)
248 complete(nbd->destroy_complete);
251 static void nbd_dev_remove(struct nbd_device *nbd)
253 struct gendisk *disk = nbd->disk;
256 blk_cleanup_disk(disk);
257 blk_mq_free_tag_set(&nbd->tag_set);
260 * Remove from idr after del_gendisk() completes, so if the same ID is
261 * reused, the following add_disk() will succeed.
263 mutex_lock(&nbd_index_mutex);
264 idr_remove(&nbd_index_idr, nbd->index);
265 nbd_notify_destroy_completion(nbd);
266 mutex_unlock(&nbd_index_mutex);
271 static void nbd_dev_remove_work(struct work_struct *work)
273 nbd_dev_remove(container_of(work, struct nbd_device, remove_work));
276 static void nbd_put(struct nbd_device *nbd)
278 if (!refcount_dec_and_test(&nbd->refs))
281 /* Call del_gendisk() asynchrounously to prevent deadlock */
282 if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags))
283 queue_work(nbd_del_wq, &nbd->remove_work);
288 static int nbd_disconnected(struct nbd_config *config)
290 return test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags) ||
291 test_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags);
294 static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock,
297 if (!nsock->dead && notify && !nbd_disconnected(nbd->config)) {
298 struct link_dead_args *args;
299 args = kmalloc(sizeof(struct link_dead_args), GFP_NOIO);
301 INIT_WORK(&args->work, nbd_dead_link_work);
302 args->index = nbd->index;
303 queue_work(system_wq, &args->work);
307 kernel_sock_shutdown(nsock->sock, SHUT_RDWR);
308 if (atomic_dec_return(&nbd->config->live_connections) == 0) {
309 if (test_and_clear_bit(NBD_RT_DISCONNECT_REQUESTED,
310 &nbd->config->runtime_flags)) {
311 set_bit(NBD_RT_DISCONNECTED,
312 &nbd->config->runtime_flags);
313 dev_info(nbd_to_dev(nbd),
314 "Disconnected due to user request.\n");
319 nsock->pending = NULL;
323 static void nbd_size_clear(struct nbd_device *nbd)
325 if (nbd->config->bytesize) {
326 set_capacity(nbd->disk, 0);
327 kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
331 static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
335 blksize = NBD_DEF_BLKSIZE;
336 if (blksize < 512 || blksize > PAGE_SIZE || !is_power_of_2(blksize))
339 nbd->config->bytesize = bytesize;
340 nbd->config->blksize = blksize;
345 if (nbd->config->flags & NBD_FLAG_SEND_TRIM) {
346 nbd->disk->queue->limits.discard_granularity = blksize;
347 nbd->disk->queue->limits.discard_alignment = blksize;
348 blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX);
350 blk_queue_logical_block_size(nbd->disk->queue, blksize);
351 blk_queue_physical_block_size(nbd->disk->queue, blksize);
354 set_bit(GD_NEED_PART_SCAN, &nbd->disk->state);
355 if (!set_capacity_and_notify(nbd->disk, bytesize >> 9))
356 kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
360 static void nbd_complete_rq(struct request *req)
362 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
364 dev_dbg(nbd_to_dev(cmd->nbd), "request %p: %s\n", req,
365 cmd->status ? "failed" : "done");
367 blk_mq_end_request(req, cmd->status);
371 * Forcibly shutdown the socket causing all listeners to error
373 static void sock_shutdown(struct nbd_device *nbd)
375 struct nbd_config *config = nbd->config;
378 if (config->num_connections == 0)
380 if (test_and_set_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
383 for (i = 0; i < config->num_connections; i++) {
384 struct nbd_sock *nsock = config->socks[i];
385 mutex_lock(&nsock->tx_lock);
386 nbd_mark_nsock_dead(nbd, nsock, 0);
387 mutex_unlock(&nsock->tx_lock);
389 dev_warn(disk_to_dev(nbd->disk), "shutting down sockets\n");
392 static u32 req_to_nbd_cmd_type(struct request *req)
394 switch (req_op(req)) {
398 return NBD_CMD_FLUSH;
400 return NBD_CMD_WRITE;
408 static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
411 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
412 struct nbd_device *nbd = cmd->nbd;
413 struct nbd_config *config;
415 if (!mutex_trylock(&cmd->lock))
416 return BLK_EH_RESET_TIMER;
418 if (!refcount_inc_not_zero(&nbd->config_refs)) {
419 cmd->status = BLK_STS_TIMEOUT;
420 mutex_unlock(&cmd->lock);
423 config = nbd->config;
425 if (config->num_connections > 1 ||
426 (config->num_connections == 1 && nbd->tag_set.timeout)) {
427 dev_err_ratelimited(nbd_to_dev(nbd),
428 "Connection timed out, retrying (%d/%d alive)\n",
429 atomic_read(&config->live_connections),
430 config->num_connections);
432 * Hooray we have more connections, requeue this IO, the submit
433 * path will put it on a real connection. Or if only one
434 * connection is configured, the submit path will wait util
435 * a new connection is reconfigured or util dead timeout.
438 if (cmd->index < config->num_connections) {
439 struct nbd_sock *nsock =
440 config->socks[cmd->index];
441 mutex_lock(&nsock->tx_lock);
442 /* We can have multiple outstanding requests, so
443 * we don't want to mark the nsock dead if we've
444 * already reconnected with a new socket, so
445 * only mark it dead if its the same socket we
448 if (cmd->cookie == nsock->cookie)
449 nbd_mark_nsock_dead(nbd, nsock, 1);
450 mutex_unlock(&nsock->tx_lock);
452 mutex_unlock(&cmd->lock);
453 nbd_requeue_cmd(cmd);
459 if (!nbd->tag_set.timeout) {
461 * Userspace sets timeout=0 to disable socket disconnection,
462 * so just warn and reset the timer.
464 struct nbd_sock *nsock = config->socks[cmd->index];
466 dev_info(nbd_to_dev(nbd), "Possible stuck request %p: control (%s@%llu,%uB). Runtime %u seconds\n",
467 req, nbdcmd_to_ascii(req_to_nbd_cmd_type(req)),
468 (unsigned long long)blk_rq_pos(req) << 9,
469 blk_rq_bytes(req), (req->timeout / HZ) * cmd->retries);
471 mutex_lock(&nsock->tx_lock);
472 if (cmd->cookie != nsock->cookie) {
473 nbd_requeue_cmd(cmd);
474 mutex_unlock(&nsock->tx_lock);
475 mutex_unlock(&cmd->lock);
479 mutex_unlock(&nsock->tx_lock);
480 mutex_unlock(&cmd->lock);
482 return BLK_EH_RESET_TIMER;
485 dev_err_ratelimited(nbd_to_dev(nbd), "Connection timed out\n");
486 set_bit(NBD_RT_TIMEDOUT, &config->runtime_flags);
487 cmd->status = BLK_STS_IOERR;
488 mutex_unlock(&cmd->lock);
492 blk_mq_complete_request(req);
497 * Send or receive packet.
499 static int sock_xmit(struct nbd_device *nbd, int index, int send,
500 struct iov_iter *iter, int msg_flags, int *sent)
502 struct nbd_config *config = nbd->config;
503 struct socket *sock = config->socks[index]->sock;
506 unsigned int noreclaim_flag;
508 if (unlikely(!sock)) {
509 dev_err_ratelimited(disk_to_dev(nbd->disk),
510 "Attempted %s on closed socket in sock_xmit\n",
511 (send ? "send" : "recv"));
515 msg.msg_iter = *iter;
517 noreclaim_flag = memalloc_noreclaim_save();
519 sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC;
522 msg.msg_control = NULL;
523 msg.msg_controllen = 0;
524 msg.msg_flags = msg_flags | MSG_NOSIGNAL;
527 result = sock_sendmsg(sock, &msg);
529 result = sock_recvmsg(sock, &msg, msg.msg_flags);
533 result = -EPIPE; /* short read */
538 } while (msg_data_left(&msg));
540 memalloc_noreclaim_restore(noreclaim_flag);
546 * Different settings for sk->sk_sndtimeo can result in different return values
547 * if there is a signal pending when we enter sendmsg, because reasons?
549 static inline int was_interrupted(int result)
551 return result == -ERESTARTSYS || result == -EINTR;
554 /* always call with the tx_lock held */
555 static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
557 struct request *req = blk_mq_rq_from_pdu(cmd);
558 struct nbd_config *config = nbd->config;
559 struct nbd_sock *nsock = config->socks[index];
561 struct nbd_request request = {.magic = htonl(NBD_REQUEST_MAGIC)};
562 struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)};
563 struct iov_iter from;
564 unsigned long size = blk_rq_bytes(req);
568 u32 nbd_cmd_flags = 0;
569 int sent = nsock->sent, skip = 0;
571 iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request));
573 type = req_to_nbd_cmd_type(req);
577 if (rq_data_dir(req) == WRITE &&
578 (config->flags & NBD_FLAG_READ_ONLY)) {
579 dev_err_ratelimited(disk_to_dev(nbd->disk),
580 "Write on read-only\n");
584 if (req->cmd_flags & REQ_FUA)
585 nbd_cmd_flags |= NBD_CMD_FLAG_FUA;
587 /* We did a partial send previously, and we at least sent the whole
588 * request struct, so just go and send the rest of the pages in the
592 if (sent >= sizeof(request)) {
593 skip = sent - sizeof(request);
595 /* initialize handle for tracing purposes */
596 handle = nbd_cmd_handle(cmd);
600 iov_iter_advance(&from, sent);
605 cmd->cookie = nsock->cookie;
607 request.type = htonl(type | nbd_cmd_flags);
608 if (type != NBD_CMD_FLUSH) {
609 request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
610 request.len = htonl(size);
612 handle = nbd_cmd_handle(cmd);
613 memcpy(request.handle, &handle, sizeof(handle));
615 trace_nbd_send_request(&request, nbd->index, blk_mq_rq_from_pdu(cmd));
617 dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
618 req, nbdcmd_to_ascii(type),
619 (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
620 result = sock_xmit(nbd, index, 1, &from,
621 (type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent);
622 trace_nbd_header_sent(req, handle);
624 if (was_interrupted(result)) {
625 /* If we havne't sent anything we can just return BUSY,
626 * however if we have sent something we need to make
627 * sure we only allow this req to be sent until we are
631 nsock->pending = req;
634 set_bit(NBD_CMD_REQUEUED, &cmd->flags);
635 return BLK_STS_RESOURCE;
637 dev_err_ratelimited(disk_to_dev(nbd->disk),
638 "Send control failed (result %d)\n", result);
642 if (type != NBD_CMD_WRITE)
647 struct bio *next = bio->bi_next;
648 struct bvec_iter iter;
651 bio_for_each_segment(bvec, bio, iter) {
652 bool is_last = !next && bio_iter_last(bvec, iter);
653 int flags = is_last ? 0 : MSG_MORE;
655 dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n",
657 iov_iter_bvec(&from, WRITE, &bvec, 1, bvec.bv_len);
659 if (skip >= iov_iter_count(&from)) {
660 skip -= iov_iter_count(&from);
663 iov_iter_advance(&from, skip);
666 result = sock_xmit(nbd, index, 1, &from, flags, &sent);
668 if (was_interrupted(result)) {
669 /* We've already sent the header, we
670 * have no choice but to set pending and
673 nsock->pending = req;
675 set_bit(NBD_CMD_REQUEUED, &cmd->flags);
676 return BLK_STS_RESOURCE;
678 dev_err(disk_to_dev(nbd->disk),
679 "Send data failed (result %d)\n",
684 * The completion might already have come in,
685 * so break for the last one instead of letting
686 * the iterator do it. This prevents use-after-free
695 trace_nbd_payload_sent(req, handle);
696 nsock->pending = NULL;
701 /* NULL returned = something went wrong, inform userspace */
702 static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
704 struct nbd_config *config = nbd->config;
706 struct nbd_reply reply;
708 struct request *req = NULL;
712 struct kvec iov = {.iov_base = &reply, .iov_len = sizeof(reply)};
717 iov_iter_kvec(&to, READ, &iov, 1, sizeof(reply));
718 result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
720 if (!nbd_disconnected(config))
721 dev_err(disk_to_dev(nbd->disk),
722 "Receive control failed (result %d)\n", result);
723 return ERR_PTR(result);
726 if (ntohl(reply.magic) != NBD_REPLY_MAGIC) {
727 dev_err(disk_to_dev(nbd->disk), "Wrong magic (0x%lx)\n",
728 (unsigned long)ntohl(reply.magic));
729 return ERR_PTR(-EPROTO);
732 memcpy(&handle, reply.handle, sizeof(handle));
733 tag = nbd_handle_to_tag(handle);
734 hwq = blk_mq_unique_tag_to_hwq(tag);
735 if (hwq < nbd->tag_set.nr_hw_queues)
736 req = blk_mq_tag_to_rq(nbd->tag_set.tags[hwq],
737 blk_mq_unique_tag_to_tag(tag));
738 if (!req || !blk_mq_request_started(req)) {
739 dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%d) %p\n",
741 return ERR_PTR(-ENOENT);
743 trace_nbd_header_received(req, handle);
744 cmd = blk_mq_rq_to_pdu(req);
746 mutex_lock(&cmd->lock);
747 if (cmd->cmd_cookie != nbd_handle_to_cookie(handle)) {
748 dev_err(disk_to_dev(nbd->disk), "Double reply on req %p, cmd_cookie %u, handle cookie %u\n",
749 req, cmd->cmd_cookie, nbd_handle_to_cookie(handle));
753 if (cmd->status != BLK_STS_OK) {
754 dev_err(disk_to_dev(nbd->disk), "Command already handled %p\n",
759 if (test_bit(NBD_CMD_REQUEUED, &cmd->flags)) {
760 dev_err(disk_to_dev(nbd->disk), "Raced with timeout on req %p\n",
765 if (ntohl(reply.error)) {
766 dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
768 cmd->status = BLK_STS_IOERR;
772 dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", req);
773 if (rq_data_dir(req) != WRITE) {
774 struct req_iterator iter;
777 rq_for_each_segment(bvec, req, iter) {
778 iov_iter_bvec(&to, READ, &bvec, 1, bvec.bv_len);
779 result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
781 dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
784 * If we've disconnected, we need to make sure we
785 * complete this request, otherwise error out
786 * and let the timeout stuff handle resubmitting
787 * this request onto another connection.
789 if (nbd_disconnected(config)) {
790 cmd->status = BLK_STS_IOERR;
796 dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n",
801 trace_nbd_payload_received(req, handle);
802 mutex_unlock(&cmd->lock);
803 return ret ? ERR_PTR(ret) : cmd;
806 static void recv_work(struct work_struct *work)
808 struct recv_thread_args *args = container_of(work,
809 struct recv_thread_args,
811 struct nbd_device *nbd = args->nbd;
812 struct nbd_config *config = nbd->config;
817 cmd = nbd_read_stat(nbd, args->index);
819 struct nbd_sock *nsock = config->socks[args->index];
821 mutex_lock(&nsock->tx_lock);
822 nbd_mark_nsock_dead(nbd, nsock, 1);
823 mutex_unlock(&nsock->tx_lock);
827 rq = blk_mq_rq_from_pdu(cmd);
828 if (likely(!blk_should_fake_timeout(rq->q)))
829 blk_mq_complete_request(rq);
832 atomic_dec(&config->recv_threads);
833 wake_up(&config->recv_wq);
837 static bool nbd_clear_req(struct request *req, void *data, bool reserved)
839 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
841 mutex_lock(&cmd->lock);
842 cmd->status = BLK_STS_IOERR;
843 mutex_unlock(&cmd->lock);
845 blk_mq_complete_request(req);
849 static void nbd_clear_que(struct nbd_device *nbd)
851 blk_mq_quiesce_queue(nbd->disk->queue);
852 blk_mq_tagset_busy_iter(&nbd->tag_set, nbd_clear_req, NULL);
853 blk_mq_unquiesce_queue(nbd->disk->queue);
854 dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n");
857 static int find_fallback(struct nbd_device *nbd, int index)
859 struct nbd_config *config = nbd->config;
861 struct nbd_sock *nsock = config->socks[index];
862 int fallback = nsock->fallback_index;
864 if (test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
867 if (config->num_connections <= 1) {
868 dev_err_ratelimited(disk_to_dev(nbd->disk),
869 "Dead connection, failed to find a fallback\n");
873 if (fallback >= 0 && fallback < config->num_connections &&
874 !config->socks[fallback]->dead)
877 if (nsock->fallback_index < 0 ||
878 nsock->fallback_index >= config->num_connections ||
879 config->socks[nsock->fallback_index]->dead) {
881 for (i = 0; i < config->num_connections; i++) {
884 if (!config->socks[i]->dead) {
889 nsock->fallback_index = new_index;
891 dev_err_ratelimited(disk_to_dev(nbd->disk),
892 "Dead connection, failed to find a fallback\n");
896 new_index = nsock->fallback_index;
900 static int wait_for_reconnect(struct nbd_device *nbd)
902 struct nbd_config *config = nbd->config;
903 if (!config->dead_conn_timeout)
905 if (test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
907 return wait_event_timeout(config->conn_wait,
908 atomic_read(&config->live_connections) > 0,
909 config->dead_conn_timeout) > 0;
912 static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
914 struct request *req = blk_mq_rq_from_pdu(cmd);
915 struct nbd_device *nbd = cmd->nbd;
916 struct nbd_config *config;
917 struct nbd_sock *nsock;
920 if (!refcount_inc_not_zero(&nbd->config_refs)) {
921 dev_err_ratelimited(disk_to_dev(nbd->disk),
922 "Socks array is empty\n");
923 blk_mq_start_request(req);
926 config = nbd->config;
928 if (index >= config->num_connections) {
929 dev_err_ratelimited(disk_to_dev(nbd->disk),
930 "Attempted send on invalid socket\n");
932 blk_mq_start_request(req);
935 cmd->status = BLK_STS_OK;
937 nsock = config->socks[index];
938 mutex_lock(&nsock->tx_lock);
940 int old_index = index;
941 index = find_fallback(nbd, index);
942 mutex_unlock(&nsock->tx_lock);
944 if (wait_for_reconnect(nbd)) {
948 /* All the sockets should already be down at this point,
949 * we just want to make sure that DISCONNECTED is set so
950 * any requests that come in that were queue'ed waiting
951 * for the reconnect timer don't trigger the timer again
952 * and instead just error out.
956 blk_mq_start_request(req);
962 /* Handle the case that we have a pending request that was partially
963 * transmitted that _has_ to be serviced first. We need to call requeue
964 * here so that it gets put _after_ the request that is already on the
967 blk_mq_start_request(req);
968 if (unlikely(nsock->pending && nsock->pending != req)) {
969 nbd_requeue_cmd(cmd);
974 * Some failures are related to the link going down, so anything that
975 * returns EAGAIN can be retried on a different socket.
977 ret = nbd_send_cmd(nbd, cmd, index);
978 if (ret == -EAGAIN) {
979 dev_err_ratelimited(disk_to_dev(nbd->disk),
980 "Request send failed, requeueing\n");
981 nbd_mark_nsock_dead(nbd, nsock, 1);
982 nbd_requeue_cmd(cmd);
986 mutex_unlock(&nsock->tx_lock);
991 static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
992 const struct blk_mq_queue_data *bd)
994 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
998 * Since we look at the bio's to send the request over the network we
999 * need to make sure the completion work doesn't mark this request done
1000 * before we are done doing our send. This keeps us from dereferencing
1001 * freed data if we have particularly fast completions (ie we get the
1002 * completion before we exit sock_xmit on the last bvec) or in the case
1003 * that the server is misbehaving (or there was an error) before we're
1004 * done sending everything over the wire.
1006 mutex_lock(&cmd->lock);
1007 clear_bit(NBD_CMD_REQUEUED, &cmd->flags);
1009 /* We can be called directly from the user space process, which means we
1010 * could possibly have signals pending so our sendmsg will fail. In
1011 * this case we need to return that we are busy, otherwise error out as
1014 ret = nbd_handle_cmd(cmd, hctx->queue_num);
1016 ret = BLK_STS_IOERR;
1019 mutex_unlock(&cmd->lock);
1024 static struct socket *nbd_get_socket(struct nbd_device *nbd, unsigned long fd,
1027 struct socket *sock;
1030 sock = sockfd_lookup(fd, err);
1034 if (sock->ops->shutdown == sock_no_shutdown) {
1035 dev_err(disk_to_dev(nbd->disk), "Unsupported socket: shutdown callout must be supported.\n");
1044 static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
1047 struct nbd_config *config = nbd->config;
1048 struct socket *sock;
1049 struct nbd_sock **socks;
1050 struct nbd_sock *nsock;
1053 sock = nbd_get_socket(nbd, arg, &err);
1058 * We need to make sure we don't get any errant requests while we're
1059 * reallocating the ->socks array.
1061 blk_mq_freeze_queue(nbd->disk->queue);
1063 if (!netlink && !nbd->task_setup &&
1064 !test_bit(NBD_RT_BOUND, &config->runtime_flags))
1065 nbd->task_setup = current;
1068 (nbd->task_setup != current ||
1069 test_bit(NBD_RT_BOUND, &config->runtime_flags))) {
1070 dev_err(disk_to_dev(nbd->disk),
1071 "Device being setup by another task");
1076 nsock = kzalloc(sizeof(*nsock), GFP_KERNEL);
1082 socks = krealloc(config->socks, (config->num_connections + 1) *
1083 sizeof(struct nbd_sock *), GFP_KERNEL);
1090 config->socks = socks;
1092 nsock->fallback_index = -1;
1093 nsock->dead = false;
1094 mutex_init(&nsock->tx_lock);
1096 nsock->pending = NULL;
1099 socks[config->num_connections++] = nsock;
1100 atomic_inc(&config->live_connections);
1101 blk_mq_unfreeze_queue(nbd->disk->queue);
1106 blk_mq_unfreeze_queue(nbd->disk->queue);
1111 static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
1113 struct nbd_config *config = nbd->config;
1114 struct socket *sock, *old;
1115 struct recv_thread_args *args;
1119 sock = nbd_get_socket(nbd, arg, &err);
1123 args = kzalloc(sizeof(*args), GFP_KERNEL);
1129 for (i = 0; i < config->num_connections; i++) {
1130 struct nbd_sock *nsock = config->socks[i];
1135 mutex_lock(&nsock->tx_lock);
1137 mutex_unlock(&nsock->tx_lock);
1140 sk_set_memalloc(sock->sk);
1141 if (nbd->tag_set.timeout)
1142 sock->sk->sk_sndtimeo = nbd->tag_set.timeout;
1143 atomic_inc(&config->recv_threads);
1144 refcount_inc(&nbd->config_refs);
1146 nsock->fallback_index = -1;
1148 nsock->dead = false;
1149 INIT_WORK(&args->work, recv_work);
1153 mutex_unlock(&nsock->tx_lock);
1156 clear_bit(NBD_RT_DISCONNECTED, &config->runtime_flags);
1158 /* We take the tx_mutex in an error path in the recv_work, so we
1159 * need to queue_work outside of the tx_mutex.
1161 queue_work(nbd->recv_workq, &args->work);
1163 atomic_inc(&config->live_connections);
1164 wake_up(&config->conn_wait);
1172 static void nbd_bdev_reset(struct block_device *bdev)
1174 if (bdev->bd_openers > 1)
1176 set_capacity(bdev->bd_disk, 0);
1179 static void nbd_parse_flags(struct nbd_device *nbd)
1181 struct nbd_config *config = nbd->config;
1182 if (config->flags & NBD_FLAG_READ_ONLY)
1183 set_disk_ro(nbd->disk, true);
1185 set_disk_ro(nbd->disk, false);
1186 if (config->flags & NBD_FLAG_SEND_TRIM)
1187 blk_queue_flag_set(QUEUE_FLAG_DISCARD, nbd->disk->queue);
1188 if (config->flags & NBD_FLAG_SEND_FLUSH) {
1189 if (config->flags & NBD_FLAG_SEND_FUA)
1190 blk_queue_write_cache(nbd->disk->queue, true, true);
1192 blk_queue_write_cache(nbd->disk->queue, true, false);
1195 blk_queue_write_cache(nbd->disk->queue, false, false);
1198 static void send_disconnects(struct nbd_device *nbd)
1200 struct nbd_config *config = nbd->config;
1201 struct nbd_request request = {
1202 .magic = htonl(NBD_REQUEST_MAGIC),
1203 .type = htonl(NBD_CMD_DISC),
1205 struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)};
1206 struct iov_iter from;
1209 for (i = 0; i < config->num_connections; i++) {
1210 struct nbd_sock *nsock = config->socks[i];
1212 iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request));
1213 mutex_lock(&nsock->tx_lock);
1214 ret = sock_xmit(nbd, i, 1, &from, 0, NULL);
1216 dev_err(disk_to_dev(nbd->disk),
1217 "Send disconnect failed %d\n", ret);
1218 mutex_unlock(&nsock->tx_lock);
1222 static int nbd_disconnect(struct nbd_device *nbd)
1224 struct nbd_config *config = nbd->config;
1226 dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
1227 set_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags);
1228 set_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags);
1229 send_disconnects(nbd);
1233 static void nbd_clear_sock(struct nbd_device *nbd)
1237 nbd->task_setup = NULL;
1240 static void nbd_config_put(struct nbd_device *nbd)
1242 if (refcount_dec_and_mutex_lock(&nbd->config_refs,
1243 &nbd->config_lock)) {
1244 struct nbd_config *config = nbd->config;
1245 nbd_dev_dbg_close(nbd);
1246 nbd_size_clear(nbd);
1247 if (test_and_clear_bit(NBD_RT_HAS_PID_FILE,
1248 &config->runtime_flags))
1249 device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
1250 nbd->task_recv = NULL;
1251 if (test_and_clear_bit(NBD_RT_HAS_BACKEND_FILE,
1252 &config->runtime_flags)) {
1253 device_remove_file(disk_to_dev(nbd->disk), &backend_attr);
1254 kfree(nbd->backend);
1255 nbd->backend = NULL;
1257 nbd_clear_sock(nbd);
1258 if (config->num_connections) {
1260 for (i = 0; i < config->num_connections; i++) {
1261 sockfd_put(config->socks[i]->sock);
1262 kfree(config->socks[i]);
1264 kfree(config->socks);
1269 if (nbd->recv_workq)
1270 destroy_workqueue(nbd->recv_workq);
1271 nbd->recv_workq = NULL;
1273 nbd->tag_set.timeout = 0;
1274 nbd->disk->queue->limits.discard_granularity = 0;
1275 nbd->disk->queue->limits.discard_alignment = 0;
1276 blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX);
1277 blk_queue_flag_clear(QUEUE_FLAG_DISCARD, nbd->disk->queue);
1279 mutex_unlock(&nbd->config_lock);
1281 module_put(THIS_MODULE);
1285 static int nbd_start_device(struct nbd_device *nbd)
1287 struct nbd_config *config = nbd->config;
1288 int num_connections = config->num_connections;
1295 if (num_connections > 1 &&
1296 !(config->flags & NBD_FLAG_CAN_MULTI_CONN)) {
1297 dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n");
1301 nbd->recv_workq = alloc_workqueue("knbd%d-recv",
1302 WQ_MEM_RECLAIM | WQ_HIGHPRI |
1303 WQ_UNBOUND, 0, nbd->index);
1304 if (!nbd->recv_workq) {
1305 dev_err(disk_to_dev(nbd->disk), "Could not allocate knbd recv work queue.\n");
1309 blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections);
1310 nbd->task_recv = current;
1312 nbd_parse_flags(nbd);
1314 error = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
1316 dev_err(disk_to_dev(nbd->disk), "device_create_file failed for pid!\n");
1319 set_bit(NBD_RT_HAS_PID_FILE, &config->runtime_flags);
1321 nbd_dev_dbg_init(nbd);
1322 for (i = 0; i < num_connections; i++) {
1323 struct recv_thread_args *args;
1325 args = kzalloc(sizeof(*args), GFP_KERNEL);
1329 * If num_connections is m (2 < m),
1330 * and NO.1 ~ NO.n(1 < n < m) kzallocs are successful.
1331 * But NO.(n + 1) failed. We still have n recv threads.
1332 * So, add flush_workqueue here to prevent recv threads
1333 * dropping the last config_refs and trying to destroy
1334 * the workqueue from inside the workqueue.
1337 flush_workqueue(nbd->recv_workq);
1340 sk_set_memalloc(config->socks[i]->sock->sk);
1341 if (nbd->tag_set.timeout)
1342 config->socks[i]->sock->sk->sk_sndtimeo =
1343 nbd->tag_set.timeout;
1344 atomic_inc(&config->recv_threads);
1345 refcount_inc(&nbd->config_refs);
1346 INIT_WORK(&args->work, recv_work);
1349 queue_work(nbd->recv_workq, &args->work);
1351 return nbd_set_size(nbd, config->bytesize, config->blksize);
1354 static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *bdev)
1356 struct nbd_config *config = nbd->config;
1359 ret = nbd_start_device(nbd);
1364 set_bit(GD_NEED_PART_SCAN, &nbd->disk->state);
1365 mutex_unlock(&nbd->config_lock);
1366 ret = wait_event_interruptible(config->recv_wq,
1367 atomic_read(&config->recv_threads) == 0);
1370 flush_workqueue(nbd->recv_workq);
1372 mutex_lock(&nbd->config_lock);
1373 nbd_bdev_reset(bdev);
1374 /* user requested, ignore socket errors */
1375 if (test_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags))
1377 if (test_bit(NBD_RT_TIMEDOUT, &config->runtime_flags))
1382 static void nbd_clear_sock_ioctl(struct nbd_device *nbd,
1383 struct block_device *bdev)
1386 __invalidate_device(bdev, true);
1387 nbd_bdev_reset(bdev);
1388 if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
1389 &nbd->config->runtime_flags))
1390 nbd_config_put(nbd);
1393 static void nbd_set_cmd_timeout(struct nbd_device *nbd, u64 timeout)
1395 nbd->tag_set.timeout = timeout * HZ;
1397 blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ);
1399 blk_queue_rq_timeout(nbd->disk->queue, 30 * HZ);
1402 /* Must be called with config_lock held */
1403 static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
1404 unsigned int cmd, unsigned long arg)
1406 struct nbd_config *config = nbd->config;
1410 case NBD_DISCONNECT:
1411 return nbd_disconnect(nbd);
1412 case NBD_CLEAR_SOCK:
1413 nbd_clear_sock_ioctl(nbd, bdev);
1416 return nbd_add_socket(nbd, arg, false);
1417 case NBD_SET_BLKSIZE:
1418 return nbd_set_size(nbd, config->bytesize, arg);
1420 return nbd_set_size(nbd, arg, config->blksize);
1421 case NBD_SET_SIZE_BLOCKS:
1422 if (check_mul_overflow((loff_t)arg, config->blksize, &bytesize))
1424 return nbd_set_size(nbd, bytesize, config->blksize);
1425 case NBD_SET_TIMEOUT:
1426 nbd_set_cmd_timeout(nbd, arg);
1430 config->flags = arg;
1433 return nbd_start_device_ioctl(nbd, bdev);
1436 * This is for compatibility only. The queue is always cleared
1437 * by NBD_DO_IT or NBD_CLEAR_SOCK.
1440 case NBD_PRINT_DEBUG:
1442 * For compatibility only, we no longer keep a list of
1443 * outstanding requests.
1450 static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
1451 unsigned int cmd, unsigned long arg)
1453 struct nbd_device *nbd = bdev->bd_disk->private_data;
1454 struct nbd_config *config = nbd->config;
1455 int error = -EINVAL;
1457 if (!capable(CAP_SYS_ADMIN))
1460 /* The block layer will pass back some non-nbd ioctls in case we have
1461 * special handling for them, but we don't so just return an error.
1463 if (_IOC_TYPE(cmd) != 0xab)
1466 mutex_lock(&nbd->config_lock);
1468 /* Don't allow ioctl operations on a nbd device that was created with
1469 * netlink, unless it's DISCONNECT or CLEAR_SOCK, which are fine.
1471 if (!test_bit(NBD_RT_BOUND, &config->runtime_flags) ||
1472 (cmd == NBD_DISCONNECT || cmd == NBD_CLEAR_SOCK))
1473 error = __nbd_ioctl(bdev, nbd, cmd, arg);
1475 dev_err(nbd_to_dev(nbd), "Cannot use ioctl interface on a netlink controlled device.\n");
1476 mutex_unlock(&nbd->config_lock);
1480 static struct nbd_config *nbd_alloc_config(void)
1482 struct nbd_config *config;
1484 config = kzalloc(sizeof(struct nbd_config), GFP_NOFS);
1487 atomic_set(&config->recv_threads, 0);
1488 init_waitqueue_head(&config->recv_wq);
1489 init_waitqueue_head(&config->conn_wait);
1490 config->blksize = NBD_DEF_BLKSIZE;
1491 atomic_set(&config->live_connections, 0);
1492 try_module_get(THIS_MODULE);
1496 static int nbd_open(struct block_device *bdev, fmode_t mode)
1498 struct nbd_device *nbd;
1501 mutex_lock(&nbd_index_mutex);
1502 nbd = bdev->bd_disk->private_data;
1507 if (!refcount_inc_not_zero(&nbd->refs)) {
1511 if (!refcount_inc_not_zero(&nbd->config_refs)) {
1512 struct nbd_config *config;
1514 mutex_lock(&nbd->config_lock);
1515 if (refcount_inc_not_zero(&nbd->config_refs)) {
1516 mutex_unlock(&nbd->config_lock);
1519 config = nbd->config = nbd_alloc_config();
1522 mutex_unlock(&nbd->config_lock);
1525 refcount_set(&nbd->config_refs, 1);
1526 refcount_inc(&nbd->refs);
1527 mutex_unlock(&nbd->config_lock);
1529 set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
1530 } else if (nbd_disconnected(nbd->config)) {
1532 set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
1535 mutex_unlock(&nbd_index_mutex);
1539 static void nbd_release(struct gendisk *disk, fmode_t mode)
1541 struct nbd_device *nbd = disk->private_data;
1543 if (test_bit(NBD_RT_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) &&
1544 disk->part0->bd_openers == 0)
1545 nbd_disconnect_and_put(nbd);
1547 nbd_config_put(nbd);
1551 static const struct block_device_operations nbd_fops =
1553 .owner = THIS_MODULE,
1555 .release = nbd_release,
1557 .compat_ioctl = nbd_ioctl,
1560 #if IS_ENABLED(CONFIG_DEBUG_FS)
1562 static int nbd_dbg_tasks_show(struct seq_file *s, void *unused)
1564 struct nbd_device *nbd = s->private;
1567 seq_printf(s, "recv: %d\n", task_pid_nr(nbd->task_recv));
1572 DEFINE_SHOW_ATTRIBUTE(nbd_dbg_tasks);
1574 static int nbd_dbg_flags_show(struct seq_file *s, void *unused)
1576 struct nbd_device *nbd = s->private;
1577 u32 flags = nbd->config->flags;
1579 seq_printf(s, "Hex: 0x%08x\n\n", flags);
1581 seq_puts(s, "Known flags:\n");
1583 if (flags & NBD_FLAG_HAS_FLAGS)
1584 seq_puts(s, "NBD_FLAG_HAS_FLAGS\n");
1585 if (flags & NBD_FLAG_READ_ONLY)
1586 seq_puts(s, "NBD_FLAG_READ_ONLY\n");
1587 if (flags & NBD_FLAG_SEND_FLUSH)
1588 seq_puts(s, "NBD_FLAG_SEND_FLUSH\n");
1589 if (flags & NBD_FLAG_SEND_FUA)
1590 seq_puts(s, "NBD_FLAG_SEND_FUA\n");
1591 if (flags & NBD_FLAG_SEND_TRIM)
1592 seq_puts(s, "NBD_FLAG_SEND_TRIM\n");
1597 DEFINE_SHOW_ATTRIBUTE(nbd_dbg_flags);
1599 static int nbd_dev_dbg_init(struct nbd_device *nbd)
1602 struct nbd_config *config = nbd->config;
1607 dir = debugfs_create_dir(nbd_name(nbd), nbd_dbg_dir);
1609 dev_err(nbd_to_dev(nbd), "Failed to create debugfs dir for '%s'\n",
1613 config->dbg_dir = dir;
1615 debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_fops);
1616 debugfs_create_u64("size_bytes", 0444, dir, &config->bytesize);
1617 debugfs_create_u32("timeout", 0444, dir, &nbd->tag_set.timeout);
1618 debugfs_create_u64("blocksize", 0444, dir, &config->blksize);
1619 debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_fops);
1624 static void nbd_dev_dbg_close(struct nbd_device *nbd)
1626 debugfs_remove_recursive(nbd->config->dbg_dir);
1629 static int nbd_dbg_init(void)
1631 struct dentry *dbg_dir;
1633 dbg_dir = debugfs_create_dir("nbd", NULL);
1637 nbd_dbg_dir = dbg_dir;
1642 static void nbd_dbg_close(void)
1644 debugfs_remove_recursive(nbd_dbg_dir);
1647 #else /* IS_ENABLED(CONFIG_DEBUG_FS) */
1649 static int nbd_dev_dbg_init(struct nbd_device *nbd)
1654 static void nbd_dev_dbg_close(struct nbd_device *nbd)
1658 static int nbd_dbg_init(void)
1663 static void nbd_dbg_close(void)
1669 static int nbd_init_request(struct blk_mq_tag_set *set, struct request *rq,
1670 unsigned int hctx_idx, unsigned int numa_node)
1672 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq);
1673 cmd->nbd = set->driver_data;
1675 mutex_init(&cmd->lock);
1679 static const struct blk_mq_ops nbd_mq_ops = {
1680 .queue_rq = nbd_queue_rq,
1681 .complete = nbd_complete_rq,
1682 .init_request = nbd_init_request,
1683 .timeout = nbd_xmit_timeout,
1686 static int nbd_dev_add(int index)
1688 struct nbd_device *nbd;
1689 struct gendisk *disk;
1692 nbd = kzalloc(sizeof(struct nbd_device), GFP_KERNEL);
1696 nbd->tag_set.ops = &nbd_mq_ops;
1697 nbd->tag_set.nr_hw_queues = 1;
1698 nbd->tag_set.queue_depth = 128;
1699 nbd->tag_set.numa_node = NUMA_NO_NODE;
1700 nbd->tag_set.cmd_size = sizeof(struct nbd_cmd);
1701 nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
1703 nbd->tag_set.driver_data = nbd;
1704 INIT_WORK(&nbd->remove_work, nbd_dev_remove_work);
1705 nbd->destroy_complete = NULL;
1706 nbd->backend = NULL;
1708 err = blk_mq_alloc_tag_set(&nbd->tag_set);
1713 err = idr_alloc(&nbd_index_idr, nbd, index, index + 1,
1718 err = idr_alloc(&nbd_index_idr, nbd, 0, 0, GFP_KERNEL);
1726 disk = blk_mq_alloc_disk(&nbd->tag_set, NULL);
1728 err = PTR_ERR(disk);
1734 * Tell the block layer that we are not a rotational device
1736 blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
1737 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
1738 disk->queue->limits.discard_granularity = 0;
1739 disk->queue->limits.discard_alignment = 0;
1740 blk_queue_max_discard_sectors(disk->queue, 0);
1741 blk_queue_max_segment_size(disk->queue, UINT_MAX);
1742 blk_queue_max_segments(disk->queue, USHRT_MAX);
1743 blk_queue_max_hw_sectors(disk->queue, 65536);
1744 disk->queue->limits.max_sectors = 256;
1746 mutex_init(&nbd->config_lock);
1747 refcount_set(&nbd->config_refs, 0);
1748 refcount_set(&nbd->refs, 1);
1749 INIT_LIST_HEAD(&nbd->list);
1750 disk->major = NBD_MAJOR;
1751 disk->first_minor = index << part_shift;
1752 disk->minors = 1 << part_shift;
1753 disk->fops = &nbd_fops;
1754 disk->private_data = nbd;
1755 sprintf(disk->disk_name, "nbd%d", index);
1757 nbd_total_devices++;
1761 idr_remove(&nbd_index_idr, index);
1763 blk_mq_free_tag_set(&nbd->tag_set);
1770 static int find_free_cb(int id, void *ptr, void *data)
1772 struct nbd_device *nbd = ptr;
1773 struct nbd_device **found = data;
1775 if (!refcount_read(&nbd->config_refs)) {
1782 /* Netlink interface. */
1783 static const struct nla_policy nbd_attr_policy[NBD_ATTR_MAX + 1] = {
1784 [NBD_ATTR_INDEX] = { .type = NLA_U32 },
1785 [NBD_ATTR_SIZE_BYTES] = { .type = NLA_U64 },
1786 [NBD_ATTR_BLOCK_SIZE_BYTES] = { .type = NLA_U64 },
1787 [NBD_ATTR_TIMEOUT] = { .type = NLA_U64 },
1788 [NBD_ATTR_SERVER_FLAGS] = { .type = NLA_U64 },
1789 [NBD_ATTR_CLIENT_FLAGS] = { .type = NLA_U64 },
1790 [NBD_ATTR_SOCKETS] = { .type = NLA_NESTED},
1791 [NBD_ATTR_DEAD_CONN_TIMEOUT] = { .type = NLA_U64 },
1792 [NBD_ATTR_DEVICE_LIST] = { .type = NLA_NESTED},
1793 [NBD_ATTR_BACKEND_IDENTIFIER] = { .type = NLA_STRING},
1796 static const struct nla_policy nbd_sock_policy[NBD_SOCK_MAX + 1] = {
1797 [NBD_SOCK_FD] = { .type = NLA_U32 },
1800 /* We don't use this right now since we don't parse the incoming list, but we
1801 * still want it here so userspace knows what to expect.
1803 static const struct nla_policy __attribute__((unused))
1804 nbd_device_policy[NBD_DEVICE_ATTR_MAX + 1] = {
1805 [NBD_DEVICE_INDEX] = { .type = NLA_U32 },
1806 [NBD_DEVICE_CONNECTED] = { .type = NLA_U8 },
1809 static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd)
1811 struct nbd_config *config = nbd->config;
1812 u64 bsize = config->blksize;
1813 u64 bytes = config->bytesize;
1815 if (info->attrs[NBD_ATTR_SIZE_BYTES])
1816 bytes = nla_get_u64(info->attrs[NBD_ATTR_SIZE_BYTES]);
1818 if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES])
1819 bsize = nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]);
1821 if (bytes != config->bytesize || bsize != config->blksize)
1822 return nbd_set_size(nbd, bytes, bsize);
1826 static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
1828 DECLARE_COMPLETION_ONSTACK(destroy_complete);
1829 struct nbd_device *nbd = NULL;
1830 struct nbd_config *config;
1833 bool put_dev = false;
1835 if (!netlink_capable(skb, CAP_SYS_ADMIN))
1838 if (info->attrs[NBD_ATTR_INDEX])
1839 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
1840 if (!info->attrs[NBD_ATTR_SOCKETS]) {
1841 printk(KERN_ERR "nbd: must specify at least one socket\n");
1844 if (!info->attrs[NBD_ATTR_SIZE_BYTES]) {
1845 printk(KERN_ERR "nbd: must specify a size in bytes for the device\n");
1849 mutex_lock(&nbd_index_mutex);
1851 ret = idr_for_each(&nbd_index_idr, &find_free_cb, &nbd);
1854 new_index = nbd_dev_add(-1);
1855 if (new_index < 0) {
1856 mutex_unlock(&nbd_index_mutex);
1857 printk(KERN_ERR "nbd: failed to add new device\n");
1860 nbd = idr_find(&nbd_index_idr, new_index);
1863 nbd = idr_find(&nbd_index_idr, index);
1865 ret = nbd_dev_add(index);
1867 mutex_unlock(&nbd_index_mutex);
1868 printk(KERN_ERR "nbd: failed to add new device\n");
1871 nbd = idr_find(&nbd_index_idr, index);
1875 printk(KERN_ERR "nbd: couldn't find device at index %d\n",
1877 mutex_unlock(&nbd_index_mutex);
1881 if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) &&
1882 test_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags)) {
1883 nbd->destroy_complete = &destroy_complete;
1884 mutex_unlock(&nbd_index_mutex);
1886 /* Wait untill the the nbd stuff is totally destroyed */
1887 wait_for_completion(&destroy_complete);
1891 if (!refcount_inc_not_zero(&nbd->refs)) {
1892 mutex_unlock(&nbd_index_mutex);
1895 printk(KERN_ERR "nbd: device at index %d is going down\n",
1899 mutex_unlock(&nbd_index_mutex);
1901 mutex_lock(&nbd->config_lock);
1902 if (refcount_read(&nbd->config_refs)) {
1903 mutex_unlock(&nbd->config_lock);
1907 printk(KERN_ERR "nbd: nbd%d already in use\n", index);
1910 if (WARN_ON(nbd->config)) {
1911 mutex_unlock(&nbd->config_lock);
1915 config = nbd->config = nbd_alloc_config();
1917 mutex_unlock(&nbd->config_lock);
1919 printk(KERN_ERR "nbd: couldn't allocate config\n");
1922 refcount_set(&nbd->config_refs, 1);
1923 set_bit(NBD_RT_BOUND, &config->runtime_flags);
1925 ret = nbd_genl_size_set(info, nbd);
1929 if (info->attrs[NBD_ATTR_TIMEOUT])
1930 nbd_set_cmd_timeout(nbd,
1931 nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]));
1932 if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) {
1933 config->dead_conn_timeout =
1934 nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]);
1935 config->dead_conn_timeout *= HZ;
1937 if (info->attrs[NBD_ATTR_SERVER_FLAGS])
1939 nla_get_u64(info->attrs[NBD_ATTR_SERVER_FLAGS]);
1940 if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) {
1941 u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]);
1942 if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) {
1944 * We have 1 ref to keep the device around, and then 1
1945 * ref for our current operation here, which will be
1946 * inherited by the config. If we already have
1947 * DESTROY_ON_DISCONNECT set then we know we don't have
1948 * that extra ref already held so we don't need the
1951 if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT,
1955 if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT,
1957 refcount_inc(&nbd->refs);
1959 if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
1960 set_bit(NBD_RT_DISCONNECT_ON_CLOSE,
1961 &config->runtime_flags);
1965 if (info->attrs[NBD_ATTR_SOCKETS]) {
1966 struct nlattr *attr;
1969 nla_for_each_nested(attr, info->attrs[NBD_ATTR_SOCKETS],
1971 struct nlattr *socks[NBD_SOCK_MAX+1];
1973 if (nla_type(attr) != NBD_SOCK_ITEM) {
1974 printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n");
1978 ret = nla_parse_nested_deprecated(socks, NBD_SOCK_MAX,
1983 printk(KERN_ERR "nbd: error processing sock list\n");
1987 if (!socks[NBD_SOCK_FD])
1989 fd = (int)nla_get_u32(socks[NBD_SOCK_FD]);
1990 ret = nbd_add_socket(nbd, fd, true);
1995 ret = nbd_start_device(nbd);
1998 if (info->attrs[NBD_ATTR_BACKEND_IDENTIFIER]) {
1999 nbd->backend = nla_strdup(info->attrs[NBD_ATTR_BACKEND_IDENTIFIER],
2001 if (!nbd->backend) {
2006 ret = device_create_file(disk_to_dev(nbd->disk), &backend_attr);
2008 dev_err(disk_to_dev(nbd->disk),
2009 "device_create_file failed for backend!\n");
2012 set_bit(NBD_RT_HAS_BACKEND_FILE, &config->runtime_flags);
2014 mutex_unlock(&nbd->config_lock);
2016 set_bit(NBD_RT_HAS_CONFIG_REF, &config->runtime_flags);
2017 refcount_inc(&nbd->config_refs);
2018 nbd_connect_reply(info, nbd->index);
2020 nbd_config_put(nbd);
2026 static void nbd_disconnect_and_put(struct nbd_device *nbd)
2028 mutex_lock(&nbd->config_lock);
2029 nbd_disconnect(nbd);
2030 nbd_clear_sock(nbd);
2031 mutex_unlock(&nbd->config_lock);
2033 * Make sure recv thread has finished, so it does not drop the last
2034 * config ref and try to destroy the workqueue from inside the work
2037 if (nbd->recv_workq)
2038 flush_workqueue(nbd->recv_workq);
2039 if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
2040 &nbd->config->runtime_flags))
2041 nbd_config_put(nbd);
2044 static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info)
2046 struct nbd_device *nbd;
2049 if (!netlink_capable(skb, CAP_SYS_ADMIN))
2052 if (!info->attrs[NBD_ATTR_INDEX]) {
2053 printk(KERN_ERR "nbd: must specify an index to disconnect\n");
2056 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
2057 mutex_lock(&nbd_index_mutex);
2058 nbd = idr_find(&nbd_index_idr, index);
2060 mutex_unlock(&nbd_index_mutex);
2061 printk(KERN_ERR "nbd: couldn't find device at index %d\n",
2065 if (!refcount_inc_not_zero(&nbd->refs)) {
2066 mutex_unlock(&nbd_index_mutex);
2067 printk(KERN_ERR "nbd: device at index %d is going down\n",
2071 mutex_unlock(&nbd_index_mutex);
2072 if (!refcount_inc_not_zero(&nbd->config_refs))
2074 nbd_disconnect_and_put(nbd);
2075 nbd_config_put(nbd);
2081 static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
2083 struct nbd_device *nbd = NULL;
2084 struct nbd_config *config;
2087 bool put_dev = false;
2089 if (!netlink_capable(skb, CAP_SYS_ADMIN))
2092 if (!info->attrs[NBD_ATTR_INDEX]) {
2093 printk(KERN_ERR "nbd: must specify a device to reconfigure\n");
2096 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
2097 mutex_lock(&nbd_index_mutex);
2098 nbd = idr_find(&nbd_index_idr, index);
2100 mutex_unlock(&nbd_index_mutex);
2101 printk(KERN_ERR "nbd: couldn't find a device at index %d\n",
2106 if (info->attrs[NBD_ATTR_BACKEND_IDENTIFIER]) {
2107 if (nla_strcmp(info->attrs[NBD_ATTR_BACKEND_IDENTIFIER],
2109 mutex_unlock(&nbd_index_mutex);
2110 dev_err(nbd_to_dev(nbd),
2111 "backend image doesn't match with %s\n",
2116 mutex_unlock(&nbd_index_mutex);
2117 dev_err(nbd_to_dev(nbd), "must specify backend\n");
2121 if (!refcount_inc_not_zero(&nbd->refs)) {
2122 mutex_unlock(&nbd_index_mutex);
2123 printk(KERN_ERR "nbd: device at index %d is going down\n",
2127 mutex_unlock(&nbd_index_mutex);
2129 if (!refcount_inc_not_zero(&nbd->config_refs)) {
2130 dev_err(nbd_to_dev(nbd),
2131 "not configured, cannot reconfigure\n");
2136 mutex_lock(&nbd->config_lock);
2137 config = nbd->config;
2138 if (!test_bit(NBD_RT_BOUND, &config->runtime_flags) ||
2140 dev_err(nbd_to_dev(nbd),
2141 "not configured, cannot reconfigure\n");
2146 ret = nbd_genl_size_set(info, nbd);
2150 if (info->attrs[NBD_ATTR_TIMEOUT])
2151 nbd_set_cmd_timeout(nbd,
2152 nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]));
2153 if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) {
2154 config->dead_conn_timeout =
2155 nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]);
2156 config->dead_conn_timeout *= HZ;
2158 if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) {
2159 u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]);
2160 if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) {
2161 if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT,
2165 if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT,
2167 refcount_inc(&nbd->refs);
2170 if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
2171 set_bit(NBD_RT_DISCONNECT_ON_CLOSE,
2172 &config->runtime_flags);
2174 clear_bit(NBD_RT_DISCONNECT_ON_CLOSE,
2175 &config->runtime_flags);
2179 if (info->attrs[NBD_ATTR_SOCKETS]) {
2180 struct nlattr *attr;
2183 nla_for_each_nested(attr, info->attrs[NBD_ATTR_SOCKETS],
2185 struct nlattr *socks[NBD_SOCK_MAX+1];
2187 if (nla_type(attr) != NBD_SOCK_ITEM) {
2188 printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n");
2192 ret = nla_parse_nested_deprecated(socks, NBD_SOCK_MAX,
2197 printk(KERN_ERR "nbd: error processing sock list\n");
2201 if (!socks[NBD_SOCK_FD])
2203 fd = (int)nla_get_u32(socks[NBD_SOCK_FD]);
2204 ret = nbd_reconnect_socket(nbd, fd);
2210 dev_info(nbd_to_dev(nbd), "reconnected socket\n");
2214 mutex_unlock(&nbd->config_lock);
2215 nbd_config_put(nbd);
2222 static const struct genl_small_ops nbd_connect_genl_ops[] = {
2224 .cmd = NBD_CMD_CONNECT,
2225 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2226 .doit = nbd_genl_connect,
2229 .cmd = NBD_CMD_DISCONNECT,
2230 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2231 .doit = nbd_genl_disconnect,
2234 .cmd = NBD_CMD_RECONFIGURE,
2235 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2236 .doit = nbd_genl_reconfigure,
2239 .cmd = NBD_CMD_STATUS,
2240 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2241 .doit = nbd_genl_status,
2245 static const struct genl_multicast_group nbd_mcast_grps[] = {
2246 { .name = NBD_GENL_MCAST_GROUP_NAME, },
2249 static struct genl_family nbd_genl_family __ro_after_init = {
2251 .name = NBD_GENL_FAMILY_NAME,
2252 .version = NBD_GENL_VERSION,
2253 .module = THIS_MODULE,
2254 .small_ops = nbd_connect_genl_ops,
2255 .n_small_ops = ARRAY_SIZE(nbd_connect_genl_ops),
2256 .maxattr = NBD_ATTR_MAX,
2257 .policy = nbd_attr_policy,
2258 .mcgrps = nbd_mcast_grps,
2259 .n_mcgrps = ARRAY_SIZE(nbd_mcast_grps),
2262 static int populate_nbd_status(struct nbd_device *nbd, struct sk_buff *reply)
2264 struct nlattr *dev_opt;
2268 /* This is a little racey, but for status it's ok. The
2269 * reason we don't take a ref here is because we can't
2270 * take a ref in the index == -1 case as we would need
2271 * to put under the nbd_index_mutex, which could
2272 * deadlock if we are configured to remove ourselves
2273 * once we're disconnected.
2275 if (refcount_read(&nbd->config_refs))
2277 dev_opt = nla_nest_start_noflag(reply, NBD_DEVICE_ITEM);
2280 ret = nla_put_u32(reply, NBD_DEVICE_INDEX, nbd->index);
2283 ret = nla_put_u8(reply, NBD_DEVICE_CONNECTED,
2287 nla_nest_end(reply, dev_opt);
2291 static int status_cb(int id, void *ptr, void *data)
2293 struct nbd_device *nbd = ptr;
2294 return populate_nbd_status(nbd, (struct sk_buff *)data);
2297 static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info)
2299 struct nlattr *dev_list;
2300 struct sk_buff *reply;
2306 if (info->attrs[NBD_ATTR_INDEX])
2307 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
2309 mutex_lock(&nbd_index_mutex);
2311 msg_size = nla_total_size(nla_attr_size(sizeof(u32)) +
2312 nla_attr_size(sizeof(u8)));
2313 msg_size *= (index == -1) ? nbd_total_devices : 1;
2315 reply = genlmsg_new(msg_size, GFP_KERNEL);
2318 reply_head = genlmsg_put_reply(reply, info, &nbd_genl_family, 0,
2325 dev_list = nla_nest_start_noflag(reply, NBD_ATTR_DEVICE_LIST);
2327 ret = idr_for_each(&nbd_index_idr, &status_cb, reply);
2333 struct nbd_device *nbd;
2334 nbd = idr_find(&nbd_index_idr, index);
2336 ret = populate_nbd_status(nbd, reply);
2343 nla_nest_end(reply, dev_list);
2344 genlmsg_end(reply, reply_head);
2345 ret = genlmsg_reply(reply, info);
2347 mutex_unlock(&nbd_index_mutex);
2351 static void nbd_connect_reply(struct genl_info *info, int index)
2353 struct sk_buff *skb;
2357 skb = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL);
2360 msg_head = genlmsg_put_reply(skb, info, &nbd_genl_family, 0,
2366 ret = nla_put_u32(skb, NBD_ATTR_INDEX, index);
2371 genlmsg_end(skb, msg_head);
2372 genlmsg_reply(skb, info);
2375 static void nbd_mcast_index(int index)
2377 struct sk_buff *skb;
2381 skb = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL);
2384 msg_head = genlmsg_put(skb, 0, 0, &nbd_genl_family, 0,
2390 ret = nla_put_u32(skb, NBD_ATTR_INDEX, index);
2395 genlmsg_end(skb, msg_head);
2396 genlmsg_multicast(&nbd_genl_family, skb, 0, 0, GFP_KERNEL);
2399 static void nbd_dead_link_work(struct work_struct *work)
2401 struct link_dead_args *args = container_of(work, struct link_dead_args,
2403 nbd_mcast_index(args->index);
2407 static int __init nbd_init(void)
2411 BUILD_BUG_ON(sizeof(struct nbd_request) != 28);
2414 printk(KERN_ERR "nbd: max_part must be >= 0\n");
2420 part_shift = fls(max_part);
2423 * Adjust max_part according to part_shift as it is exported
2424 * to user space so that user can know the max number of
2425 * partition kernel should be able to manage.
2427 * Note that -1 is required because partition 0 is reserved
2428 * for the whole disk.
2430 max_part = (1UL << part_shift) - 1;
2433 if ((1UL << part_shift) > DISK_MAX_PARTS)
2436 if (nbds_max > 1UL << (MINORBITS - part_shift))
2439 if (register_blkdev(NBD_MAJOR, "nbd"))
2442 nbd_del_wq = alloc_workqueue("nbd-del", WQ_UNBOUND, 0);
2444 unregister_blkdev(NBD_MAJOR, "nbd");
2448 if (genl_register_family(&nbd_genl_family)) {
2449 destroy_workqueue(nbd_del_wq);
2450 unregister_blkdev(NBD_MAJOR, "nbd");
2455 mutex_lock(&nbd_index_mutex);
2456 for (i = 0; i < nbds_max; i++)
2458 mutex_unlock(&nbd_index_mutex);
2462 static int nbd_exit_cb(int id, void *ptr, void *data)
2464 struct list_head *list = (struct list_head *)data;
2465 struct nbd_device *nbd = ptr;
2467 /* Skip nbd that is being removed asynchronously */
2468 if (refcount_read(&nbd->refs))
2469 list_add_tail(&nbd->list, list);
2474 static void __exit nbd_cleanup(void)
2476 struct nbd_device *nbd;
2477 LIST_HEAD(del_list);
2481 mutex_lock(&nbd_index_mutex);
2482 idr_for_each(&nbd_index_idr, &nbd_exit_cb, &del_list);
2483 mutex_unlock(&nbd_index_mutex);
2485 while (!list_empty(&del_list)) {
2486 nbd = list_first_entry(&del_list, struct nbd_device, list);
2487 list_del_init(&nbd->list);
2488 if (refcount_read(&nbd->refs) != 1)
2489 printk(KERN_ERR "nbd: possibly leaking a device\n");
2493 /* Also wait for nbd_dev_remove_work() completes */
2494 destroy_workqueue(nbd_del_wq);
2496 idr_destroy(&nbd_index_idr);
2497 genl_unregister_family(&nbd_genl_family);
2498 unregister_blkdev(NBD_MAJOR, "nbd");
2501 module_init(nbd_init);
2502 module_exit(nbd_cleanup);
2504 MODULE_DESCRIPTION("Network Block Device");
2505 MODULE_LICENSE("GPL");
2507 module_param(nbds_max, int, 0444);
2508 MODULE_PARM_DESC(nbds_max, "number of network block devices to initialize (default: 16)");
2509 module_param(max_part, int, 0444);
2510 MODULE_PARM_DESC(max_part, "number of partitions per device (default: 16)");