Merge tag 'v6.9-rc-smb3-server-fixes' of git://git.samba.org/ksmbd
[linux-2.6-microblaze.git] / drivers / block / sunvdc.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* sunvdc.c: Sun LDOM Virtual Disk Client.
3  *
4  * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
5  */
6
7 #include <linux/module.h>
8 #include <linux/kernel.h>
9 #include <linux/types.h>
10 #include <linux/blk-mq.h>
11 #include <linux/hdreg.h>
12 #include <linux/cdrom.h>
13 #include <linux/slab.h>
14 #include <linux/spinlock.h>
15 #include <linux/completion.h>
16 #include <linux/delay.h>
17 #include <linux/init.h>
18 #include <linux/list.h>
19 #include <linux/scatterlist.h>
20
21 #include <asm/vio.h>
22 #include <asm/ldc.h>
23
24 #define DRV_MODULE_NAME         "sunvdc"
25 #define PFX DRV_MODULE_NAME     ": "
26 #define DRV_MODULE_VERSION      "1.2"
27 #define DRV_MODULE_RELDATE      "November 24, 2014"
28
29 static char version[] =
30         DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
31 MODULE_AUTHOR("David S. Miller <davem@davemloft.net>");
32 MODULE_DESCRIPTION("Sun LDOM virtual disk client driver");
33 MODULE_LICENSE("GPL");
34 MODULE_VERSION(DRV_MODULE_VERSION);
35
36 #define VDC_TX_RING_SIZE        512
37 #define VDC_DEFAULT_BLK_SIZE    512
38
39 #define MAX_XFER_BLKS           (128 * 1024)
40 #define MAX_XFER_SIZE           (MAX_XFER_BLKS / VDC_DEFAULT_BLK_SIZE)
41 #define MAX_RING_COOKIES        ((MAX_XFER_BLKS / PAGE_SIZE) + 2)
42
43 #define WAITING_FOR_LINK_UP     0x01
44 #define WAITING_FOR_TX_SPACE    0x02
45 #define WAITING_FOR_GEN_CMD     0x04
46 #define WAITING_FOR_ANY         -1
47
48 #define VDC_MAX_RETRIES 10
49
50 static struct workqueue_struct *sunvdc_wq;
51
52 struct vdc_req_entry {
53         struct request          *req;
54 };
55
56 struct vdc_port {
57         struct vio_driver_state vio;
58
59         struct gendisk          *disk;
60
61         struct vdc_completion   *cmp;
62
63         u64                     req_id;
64         u64                     seq;
65         struct vdc_req_entry    rq_arr[VDC_TX_RING_SIZE];
66
67         unsigned long           ring_cookies;
68
69         u64                     max_xfer_size;
70         u32                     vdisk_block_size;
71         u32                     drain;
72
73         u64                     ldc_timeout;
74         struct delayed_work     ldc_reset_timer_work;
75         struct work_struct      ldc_reset_work;
76
77         /* The server fills these in for us in the disk attribute
78          * ACK packet.
79          */
80         u64                     operations;
81         u32                     vdisk_size;
82         u8                      vdisk_type;
83         u8                      vdisk_mtype;
84         u32                     vdisk_phys_blksz;
85
86         struct blk_mq_tag_set   tag_set;
87
88         char                    disk_name[32];
89 };
90
91 static void vdc_ldc_reset(struct vdc_port *port);
92 static void vdc_ldc_reset_work(struct work_struct *work);
93 static void vdc_ldc_reset_timer_work(struct work_struct *work);
94
95 static inline struct vdc_port *to_vdc_port(struct vio_driver_state *vio)
96 {
97         return container_of(vio, struct vdc_port, vio);
98 }
99
100 /* Ordered from largest major to lowest */
101 static struct vio_version vdc_versions[] = {
102         { .major = 1, .minor = 2 },
103         { .major = 1, .minor = 1 },
104         { .major = 1, .minor = 0 },
105 };
106
107 static inline int vdc_version_supported(struct vdc_port *port,
108                                         u16 major, u16 minor)
109 {
110         return port->vio.ver.major == major && port->vio.ver.minor >= minor;
111 }
112
113 #define VDCBLK_NAME     "vdisk"
114 static int vdc_major;
115 #define PARTITION_SHIFT 3
116
117 static inline u32 vdc_tx_dring_avail(struct vio_dring_state *dr)
118 {
119         return vio_dring_avail(dr, VDC_TX_RING_SIZE);
120 }
121
122 static int vdc_getgeo(struct block_device *bdev, struct hd_geometry *geo)
123 {
124         struct gendisk *disk = bdev->bd_disk;
125         sector_t nsect = get_capacity(disk);
126         sector_t cylinders = nsect;
127
128         geo->heads = 0xff;
129         geo->sectors = 0x3f;
130         sector_div(cylinders, geo->heads * geo->sectors);
131         geo->cylinders = cylinders;
132         if ((sector_t)(geo->cylinders + 1) * geo->heads * geo->sectors < nsect)
133                 geo->cylinders = 0xffff;
134
135         return 0;
136 }
137
138 /* Add ioctl/CDROM_GET_CAPABILITY to support cdrom_id in udev
139  * when vdisk_mtype is VD_MEDIA_TYPE_CD or VD_MEDIA_TYPE_DVD.
140  * Needed to be able to install inside an ldom from an iso image.
141  */
142 static int vdc_ioctl(struct block_device *bdev, blk_mode_t mode,
143                      unsigned command, unsigned long argument)
144 {
145         struct vdc_port *port = bdev->bd_disk->private_data;
146         int i;
147
148         switch (command) {
149         case CDROMMULTISESSION:
150                 pr_debug(PFX "Multisession CDs not supported\n");
151                 for (i = 0; i < sizeof(struct cdrom_multisession); i++)
152                         if (put_user(0, (char __user *)(argument + i)))
153                                 return -EFAULT;
154                 return 0;
155
156         case CDROM_GET_CAPABILITY:
157                 if (!vdc_version_supported(port, 1, 1))
158                         return -EINVAL;
159                 switch (port->vdisk_mtype) {
160                 case VD_MEDIA_TYPE_CD:
161                 case VD_MEDIA_TYPE_DVD:
162                         return 0;
163                 default:
164                         return -EINVAL;
165                 }
166         default:
167                 pr_debug(PFX "ioctl %08x not supported\n", command);
168                 return -EINVAL;
169         }
170 }
171
172 static const struct block_device_operations vdc_fops = {
173         .owner          = THIS_MODULE,
174         .getgeo         = vdc_getgeo,
175         .ioctl          = vdc_ioctl,
176         .compat_ioctl   = blkdev_compat_ptr_ioctl,
177 };
178
179 static void vdc_blk_queue_start(struct vdc_port *port)
180 {
181         struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
182
183         /* restart blk queue when ring is half emptied. also called after
184          * handshake completes, so check for initial handshake before we've
185          * allocated a disk.
186          */
187         if (port->disk && vdc_tx_dring_avail(dr) * 100 / VDC_TX_RING_SIZE >= 50)
188                 blk_mq_start_stopped_hw_queues(port->disk->queue, true);
189 }
190
191 static void vdc_finish(struct vio_driver_state *vio, int err, int waiting_for)
192 {
193         if (vio->cmp &&
194             (waiting_for == -1 ||
195              vio->cmp->waiting_for == waiting_for)) {
196                 vio->cmp->err = err;
197                 complete(&vio->cmp->com);
198                 vio->cmp = NULL;
199         }
200 }
201
202 static void vdc_handshake_complete(struct vio_driver_state *vio)
203 {
204         struct vdc_port *port = to_vdc_port(vio);
205
206         cancel_delayed_work(&port->ldc_reset_timer_work);
207         vdc_finish(vio, 0, WAITING_FOR_LINK_UP);
208         vdc_blk_queue_start(port);
209 }
210
211 static int vdc_handle_unknown(struct vdc_port *port, void *arg)
212 {
213         struct vio_msg_tag *pkt = arg;
214
215         printk(KERN_ERR PFX "Received unknown msg [%02x:%02x:%04x:%08x]\n",
216                pkt->type, pkt->stype, pkt->stype_env, pkt->sid);
217         printk(KERN_ERR PFX "Resetting connection.\n");
218
219         ldc_disconnect(port->vio.lp);
220
221         return -ECONNRESET;
222 }
223
224 static int vdc_send_attr(struct vio_driver_state *vio)
225 {
226         struct vdc_port *port = to_vdc_port(vio);
227         struct vio_disk_attr_info pkt;
228
229         memset(&pkt, 0, sizeof(pkt));
230
231         pkt.tag.type = VIO_TYPE_CTRL;
232         pkt.tag.stype = VIO_SUBTYPE_INFO;
233         pkt.tag.stype_env = VIO_ATTR_INFO;
234         pkt.tag.sid = vio_send_sid(vio);
235
236         pkt.xfer_mode = VIO_DRING_MODE;
237         pkt.vdisk_block_size = port->vdisk_block_size;
238         pkt.max_xfer_size = port->max_xfer_size;
239
240         viodbg(HS, "SEND ATTR xfer_mode[0x%x] blksz[%u] max_xfer[%llu]\n",
241                pkt.xfer_mode, pkt.vdisk_block_size, pkt.max_xfer_size);
242
243         return vio_ldc_send(&port->vio, &pkt, sizeof(pkt));
244 }
245
246 static int vdc_handle_attr(struct vio_driver_state *vio, void *arg)
247 {
248         struct vdc_port *port = to_vdc_port(vio);
249         struct vio_disk_attr_info *pkt = arg;
250
251         viodbg(HS, "GOT ATTR stype[0x%x] ops[%llx] disk_size[%llu] disk_type[%x] "
252                "mtype[0x%x] xfer_mode[0x%x] blksz[%u] max_xfer[%llu]\n",
253                pkt->tag.stype, pkt->operations,
254                pkt->vdisk_size, pkt->vdisk_type, pkt->vdisk_mtype,
255                pkt->xfer_mode, pkt->vdisk_block_size,
256                pkt->max_xfer_size);
257
258         if (pkt->tag.stype == VIO_SUBTYPE_ACK) {
259                 switch (pkt->vdisk_type) {
260                 case VD_DISK_TYPE_DISK:
261                 case VD_DISK_TYPE_SLICE:
262                         break;
263
264                 default:
265                         printk(KERN_ERR PFX "%s: Bogus vdisk_type 0x%x\n",
266                                vio->name, pkt->vdisk_type);
267                         return -ECONNRESET;
268                 }
269
270                 if (pkt->vdisk_block_size > port->vdisk_block_size) {
271                         printk(KERN_ERR PFX "%s: BLOCK size increased "
272                                "%u --> %u\n",
273                                vio->name,
274                                port->vdisk_block_size, pkt->vdisk_block_size);
275                         return -ECONNRESET;
276                 }
277
278                 port->operations = pkt->operations;
279                 port->vdisk_type = pkt->vdisk_type;
280                 if (vdc_version_supported(port, 1, 1)) {
281                         port->vdisk_size = pkt->vdisk_size;
282                         port->vdisk_mtype = pkt->vdisk_mtype;
283                 }
284                 if (pkt->max_xfer_size < port->max_xfer_size)
285                         port->max_xfer_size = pkt->max_xfer_size;
286                 port->vdisk_block_size = pkt->vdisk_block_size;
287
288                 port->vdisk_phys_blksz = VDC_DEFAULT_BLK_SIZE;
289                 if (vdc_version_supported(port, 1, 2))
290                         port->vdisk_phys_blksz = pkt->phys_block_size;
291
292                 return 0;
293         } else {
294                 printk(KERN_ERR PFX "%s: Attribute NACK\n", vio->name);
295
296                 return -ECONNRESET;
297         }
298 }
299
300 static void vdc_end_special(struct vdc_port *port, struct vio_disk_desc *desc)
301 {
302         int err = desc->status;
303
304         vdc_finish(&port->vio, -err, WAITING_FOR_GEN_CMD);
305 }
306
307 static void vdc_end_one(struct vdc_port *port, struct vio_dring_state *dr,
308                         unsigned int index)
309 {
310         struct vio_disk_desc *desc = vio_dring_entry(dr, index);
311         struct vdc_req_entry *rqe = &port->rq_arr[index];
312         struct request *req;
313
314         if (unlikely(desc->hdr.state != VIO_DESC_DONE))
315                 return;
316
317         ldc_unmap(port->vio.lp, desc->cookies, desc->ncookies);
318         desc->hdr.state = VIO_DESC_FREE;
319         dr->cons = vio_dring_next(dr, index);
320
321         req = rqe->req;
322         if (req == NULL) {
323                 vdc_end_special(port, desc);
324                 return;
325         }
326
327         rqe->req = NULL;
328
329         blk_mq_end_request(req, desc->status ? BLK_STS_IOERR : 0);
330
331         vdc_blk_queue_start(port);
332 }
333
334 static int vdc_ack(struct vdc_port *port, void *msgbuf)
335 {
336         struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
337         struct vio_dring_data *pkt = msgbuf;
338
339         if (unlikely(pkt->dring_ident != dr->ident ||
340                      pkt->start_idx != pkt->end_idx ||
341                      pkt->start_idx >= VDC_TX_RING_SIZE))
342                 return 0;
343
344         vdc_end_one(port, dr, pkt->start_idx);
345
346         return 0;
347 }
348
349 static int vdc_nack(struct vdc_port *port, void *msgbuf)
350 {
351         /* XXX Implement me XXX */
352         return 0;
353 }
354
355 static void vdc_event(void *arg, int event)
356 {
357         struct vdc_port *port = arg;
358         struct vio_driver_state *vio = &port->vio;
359         unsigned long flags;
360         int err;
361
362         spin_lock_irqsave(&vio->lock, flags);
363
364         if (unlikely(event == LDC_EVENT_RESET)) {
365                 vio_link_state_change(vio, event);
366                 queue_work(sunvdc_wq, &port->ldc_reset_work);
367                 goto out;
368         }
369
370         if (unlikely(event == LDC_EVENT_UP)) {
371                 vio_link_state_change(vio, event);
372                 goto out;
373         }
374
375         if (unlikely(event != LDC_EVENT_DATA_READY)) {
376                 pr_warn(PFX "Unexpected LDC event %d\n", event);
377                 goto out;
378         }
379
380         err = 0;
381         while (1) {
382                 union {
383                         struct vio_msg_tag tag;
384                         u64 raw[8];
385                 } msgbuf;
386
387                 err = ldc_read(vio->lp, &msgbuf, sizeof(msgbuf));
388                 if (unlikely(err < 0)) {
389                         if (err == -ECONNRESET)
390                                 vio_conn_reset(vio);
391                         break;
392                 }
393                 if (err == 0)
394                         break;
395                 viodbg(DATA, "TAG [%02x:%02x:%04x:%08x]\n",
396                        msgbuf.tag.type,
397                        msgbuf.tag.stype,
398                        msgbuf.tag.stype_env,
399                        msgbuf.tag.sid);
400                 err = vio_validate_sid(vio, &msgbuf.tag);
401                 if (err < 0)
402                         break;
403
404                 if (likely(msgbuf.tag.type == VIO_TYPE_DATA)) {
405                         if (msgbuf.tag.stype == VIO_SUBTYPE_ACK)
406                                 err = vdc_ack(port, &msgbuf);
407                         else if (msgbuf.tag.stype == VIO_SUBTYPE_NACK)
408                                 err = vdc_nack(port, &msgbuf);
409                         else
410                                 err = vdc_handle_unknown(port, &msgbuf);
411                 } else if (msgbuf.tag.type == VIO_TYPE_CTRL) {
412                         err = vio_control_pkt_engine(vio, &msgbuf);
413                 } else {
414                         err = vdc_handle_unknown(port, &msgbuf);
415                 }
416                 if (err < 0)
417                         break;
418         }
419         if (err < 0)
420                 vdc_finish(&port->vio, err, WAITING_FOR_ANY);
421 out:
422         spin_unlock_irqrestore(&vio->lock, flags);
423 }
424
425 static int __vdc_tx_trigger(struct vdc_port *port)
426 {
427         struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
428         struct vio_dring_data hdr = {
429                 .tag = {
430                         .type           = VIO_TYPE_DATA,
431                         .stype          = VIO_SUBTYPE_INFO,
432                         .stype_env      = VIO_DRING_DATA,
433                         .sid            = vio_send_sid(&port->vio),
434                 },
435                 .dring_ident            = dr->ident,
436                 .start_idx              = dr->prod,
437                 .end_idx                = dr->prod,
438         };
439         int err, delay;
440         int retries = 0;
441
442         hdr.seq = dr->snd_nxt;
443         delay = 1;
444         do {
445                 err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr));
446                 if (err > 0) {
447                         dr->snd_nxt++;
448                         break;
449                 }
450                 udelay(delay);
451                 if ((delay <<= 1) > 128)
452                         delay = 128;
453                 if (retries++ > VDC_MAX_RETRIES)
454                         break;
455         } while (err == -EAGAIN);
456
457         if (err == -ENOTCONN)
458                 vdc_ldc_reset(port);
459         return err;
460 }
461
462 static int __send_request(struct request *req)
463 {
464         struct vdc_port *port = req->q->disk->private_data;
465         struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
466         struct scatterlist sg[MAX_RING_COOKIES];
467         struct vdc_req_entry *rqe;
468         struct vio_disk_desc *desc;
469         unsigned int map_perm;
470         int nsg, err, i;
471         u64 len;
472         u8 op;
473
474         if (WARN_ON(port->ring_cookies > MAX_RING_COOKIES))
475                 return -EINVAL;
476
477         map_perm = LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_IO;
478
479         if (rq_data_dir(req) == READ) {
480                 map_perm |= LDC_MAP_W;
481                 op = VD_OP_BREAD;
482         } else {
483                 map_perm |= LDC_MAP_R;
484                 op = VD_OP_BWRITE;
485         }
486
487         sg_init_table(sg, port->ring_cookies);
488         nsg = blk_rq_map_sg(req->q, req, sg);
489
490         len = 0;
491         for (i = 0; i < nsg; i++)
492                 len += sg[i].length;
493
494         desc = vio_dring_cur(dr);
495
496         err = ldc_map_sg(port->vio.lp, sg, nsg,
497                          desc->cookies, port->ring_cookies,
498                          map_perm);
499         if (err < 0) {
500                 printk(KERN_ERR PFX "ldc_map_sg() failure, err=%d.\n", err);
501                 return err;
502         }
503
504         rqe = &port->rq_arr[dr->prod];
505         rqe->req = req;
506
507         desc->hdr.ack = VIO_ACK_ENABLE;
508         desc->req_id = port->req_id;
509         desc->operation = op;
510         if (port->vdisk_type == VD_DISK_TYPE_DISK) {
511                 desc->slice = 0xff;
512         } else {
513                 desc->slice = 0;
514         }
515         desc->status = ~0;
516         desc->offset = (blk_rq_pos(req) << 9) / port->vdisk_block_size;
517         desc->size = len;
518         desc->ncookies = err;
519
520         /* This has to be a non-SMP write barrier because we are writing
521          * to memory which is shared with the peer LDOM.
522          */
523         wmb();
524         desc->hdr.state = VIO_DESC_READY;
525
526         err = __vdc_tx_trigger(port);
527         if (err < 0) {
528                 printk(KERN_ERR PFX "vdc_tx_trigger() failure, err=%d\n", err);
529         } else {
530                 port->req_id++;
531                 dr->prod = vio_dring_next(dr, dr->prod);
532         }
533
534         return err;
535 }
536
537 static blk_status_t vdc_queue_rq(struct blk_mq_hw_ctx *hctx,
538                                  const struct blk_mq_queue_data *bd)
539 {
540         struct vdc_port *port = hctx->queue->queuedata;
541         struct vio_dring_state *dr;
542         unsigned long flags;
543
544         dr = &port->vio.drings[VIO_DRIVER_TX_RING];
545
546         blk_mq_start_request(bd->rq);
547
548         spin_lock_irqsave(&port->vio.lock, flags);
549
550         /*
551          * Doing drain, just end the request in error
552          */
553         if (unlikely(port->drain)) {
554                 spin_unlock_irqrestore(&port->vio.lock, flags);
555                 return BLK_STS_IOERR;
556         }
557
558         if (unlikely(vdc_tx_dring_avail(dr) < 1)) {
559                 spin_unlock_irqrestore(&port->vio.lock, flags);
560                 blk_mq_stop_hw_queue(hctx);
561                 return BLK_STS_DEV_RESOURCE;
562         }
563
564         if (__send_request(bd->rq) < 0) {
565                 spin_unlock_irqrestore(&port->vio.lock, flags);
566                 return BLK_STS_IOERR;
567         }
568
569         spin_unlock_irqrestore(&port->vio.lock, flags);
570         return BLK_STS_OK;
571 }
572
573 static int generic_request(struct vdc_port *port, u8 op, void *buf, int len)
574 {
575         struct vio_dring_state *dr;
576         struct vio_completion comp;
577         struct vio_disk_desc *desc;
578         unsigned int map_perm;
579         unsigned long flags;
580         int op_len, err;
581         void *req_buf;
582
583         if (!(((u64)1 << (u64)op) & port->operations))
584                 return -EOPNOTSUPP;
585
586         switch (op) {
587         case VD_OP_BREAD:
588         case VD_OP_BWRITE:
589         default:
590                 return -EINVAL;
591
592         case VD_OP_FLUSH:
593                 op_len = 0;
594                 map_perm = 0;
595                 break;
596
597         case VD_OP_GET_WCE:
598                 op_len = sizeof(u32);
599                 map_perm = LDC_MAP_W;
600                 break;
601
602         case VD_OP_SET_WCE:
603                 op_len = sizeof(u32);
604                 map_perm = LDC_MAP_R;
605                 break;
606
607         case VD_OP_GET_VTOC:
608                 op_len = sizeof(struct vio_disk_vtoc);
609                 map_perm = LDC_MAP_W;
610                 break;
611
612         case VD_OP_SET_VTOC:
613                 op_len = sizeof(struct vio_disk_vtoc);
614                 map_perm = LDC_MAP_R;
615                 break;
616
617         case VD_OP_GET_DISKGEOM:
618                 op_len = sizeof(struct vio_disk_geom);
619                 map_perm = LDC_MAP_W;
620                 break;
621
622         case VD_OP_SET_DISKGEOM:
623                 op_len = sizeof(struct vio_disk_geom);
624                 map_perm = LDC_MAP_R;
625                 break;
626
627         case VD_OP_SCSICMD:
628                 op_len = 16;
629                 map_perm = LDC_MAP_RW;
630                 break;
631
632         case VD_OP_GET_DEVID:
633                 op_len = sizeof(struct vio_disk_devid);
634                 map_perm = LDC_MAP_W;
635                 break;
636
637         case VD_OP_GET_EFI:
638         case VD_OP_SET_EFI:
639                 return -EOPNOTSUPP;
640         }
641
642         map_perm |= LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_IO;
643
644         op_len = (op_len + 7) & ~7;
645         req_buf = kzalloc(op_len, GFP_KERNEL);
646         if (!req_buf)
647                 return -ENOMEM;
648
649         if (len > op_len)
650                 len = op_len;
651
652         if (map_perm & LDC_MAP_R)
653                 memcpy(req_buf, buf, len);
654
655         spin_lock_irqsave(&port->vio.lock, flags);
656
657         dr = &port->vio.drings[VIO_DRIVER_TX_RING];
658
659         /* XXX If we want to use this code generically we have to
660          * XXX handle TX ring exhaustion etc.
661          */
662         desc = vio_dring_cur(dr);
663
664         err = ldc_map_single(port->vio.lp, req_buf, op_len,
665                              desc->cookies, port->ring_cookies,
666                              map_perm);
667         if (err < 0) {
668                 spin_unlock_irqrestore(&port->vio.lock, flags);
669                 kfree(req_buf);
670                 return err;
671         }
672
673         init_completion(&comp.com);
674         comp.waiting_for = WAITING_FOR_GEN_CMD;
675         port->vio.cmp = &comp;
676
677         desc->hdr.ack = VIO_ACK_ENABLE;
678         desc->req_id = port->req_id;
679         desc->operation = op;
680         desc->slice = 0;
681         desc->status = ~0;
682         desc->offset = 0;
683         desc->size = op_len;
684         desc->ncookies = err;
685
686         /* This has to be a non-SMP write barrier because we are writing
687          * to memory which is shared with the peer LDOM.
688          */
689         wmb();
690         desc->hdr.state = VIO_DESC_READY;
691
692         err = __vdc_tx_trigger(port);
693         if (err >= 0) {
694                 port->req_id++;
695                 dr->prod = vio_dring_next(dr, dr->prod);
696                 spin_unlock_irqrestore(&port->vio.lock, flags);
697
698                 wait_for_completion(&comp.com);
699                 err = comp.err;
700         } else {
701                 port->vio.cmp = NULL;
702                 spin_unlock_irqrestore(&port->vio.lock, flags);
703         }
704
705         if (map_perm & LDC_MAP_W)
706                 memcpy(buf, req_buf, len);
707
708         kfree(req_buf);
709
710         return err;
711 }
712
713 static int vdc_alloc_tx_ring(struct vdc_port *port)
714 {
715         struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
716         unsigned long len, entry_size;
717         int ncookies;
718         void *dring;
719
720         entry_size = sizeof(struct vio_disk_desc) +
721                 (sizeof(struct ldc_trans_cookie) * port->ring_cookies);
722         len = (VDC_TX_RING_SIZE * entry_size);
723
724         ncookies = VIO_MAX_RING_COOKIES;
725         dring = ldc_alloc_exp_dring(port->vio.lp, len,
726                                     dr->cookies, &ncookies,
727                                     (LDC_MAP_SHADOW |
728                                      LDC_MAP_DIRECT |
729                                      LDC_MAP_RW));
730         if (IS_ERR(dring))
731                 return PTR_ERR(dring);
732
733         dr->base = dring;
734         dr->entry_size = entry_size;
735         dr->num_entries = VDC_TX_RING_SIZE;
736         dr->prod = dr->cons = 0;
737         dr->pending = VDC_TX_RING_SIZE;
738         dr->ncookies = ncookies;
739
740         return 0;
741 }
742
743 static void vdc_free_tx_ring(struct vdc_port *port)
744 {
745         struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
746
747         if (dr->base) {
748                 ldc_free_exp_dring(port->vio.lp, dr->base,
749                                    (dr->entry_size * dr->num_entries),
750                                    dr->cookies, dr->ncookies);
751                 dr->base = NULL;
752                 dr->entry_size = 0;
753                 dr->num_entries = 0;
754                 dr->pending = 0;
755                 dr->ncookies = 0;
756         }
757 }
758
759 static int vdc_port_up(struct vdc_port *port)
760 {
761         struct vio_completion comp;
762
763         init_completion(&comp.com);
764         comp.err = 0;
765         comp.waiting_for = WAITING_FOR_LINK_UP;
766         port->vio.cmp = &comp;
767
768         vio_port_up(&port->vio);
769         wait_for_completion(&comp.com);
770         return comp.err;
771 }
772
773 static void vdc_port_down(struct vdc_port *port)
774 {
775         ldc_disconnect(port->vio.lp);
776         ldc_unbind(port->vio.lp);
777         vdc_free_tx_ring(port);
778         vio_ldc_free(&port->vio);
779 }
780
781 static const struct blk_mq_ops vdc_mq_ops = {
782         .queue_rq       = vdc_queue_rq,
783 };
784
785 static int probe_disk(struct vdc_port *port)
786 {
787         struct queue_limits lim = {
788                 .physical_block_size            = port->vdisk_phys_blksz,
789                 .max_hw_sectors                 = port->max_xfer_size,
790                 /* Each segment in a request is up to an aligned page in size. */
791                 .seg_boundary_mask              = PAGE_SIZE - 1,
792                 .max_segment_size               = PAGE_SIZE,
793                 .max_segments                   = port->ring_cookies,
794         };
795         struct request_queue *q;
796         struct gendisk *g;
797         int err;
798
799         err = vdc_port_up(port);
800         if (err)
801                 return err;
802
803         /* Using version 1.2 means vdisk_phys_blksz should be set unless the
804          * disk is reserved by another system.
805          */
806         if (vdc_version_supported(port, 1, 2) && !port->vdisk_phys_blksz)
807                 return -ENODEV;
808
809         if (vdc_version_supported(port, 1, 1)) {
810                 /* vdisk_size should be set during the handshake, if it wasn't
811                  * then the underlying disk is reserved by another system
812                  */
813                 if (port->vdisk_size == -1)
814                         return -ENODEV;
815         } else {
816                 struct vio_disk_geom geom;
817
818                 err = generic_request(port, VD_OP_GET_DISKGEOM,
819                                       &geom, sizeof(geom));
820                 if (err < 0) {
821                         printk(KERN_ERR PFX "VD_OP_GET_DISKGEOM returns "
822                                "error %d\n", err);
823                         return err;
824                 }
825                 port->vdisk_size = ((u64)geom.num_cyl *
826                                     (u64)geom.num_hd *
827                                     (u64)geom.num_sec);
828         }
829
830         err = blk_mq_alloc_sq_tag_set(&port->tag_set, &vdc_mq_ops,
831                         VDC_TX_RING_SIZE, BLK_MQ_F_SHOULD_MERGE);
832         if (err)
833                 return err;
834
835         g = blk_mq_alloc_disk(&port->tag_set, &lim, port);
836         if (IS_ERR(g)) {
837                 printk(KERN_ERR PFX "%s: Could not allocate gendisk.\n",
838                        port->vio.name);
839                 err = PTR_ERR(g);
840                 goto out_free_tag;
841         }
842
843         port->disk = g;
844         q = g->queue;
845
846         g->major = vdc_major;
847         g->first_minor = port->vio.vdev->dev_no << PARTITION_SHIFT;
848         g->minors = 1 << PARTITION_SHIFT;
849         strcpy(g->disk_name, port->disk_name);
850
851         g->fops = &vdc_fops;
852         g->queue = q;
853         g->private_data = port;
854
855         set_capacity(g, port->vdisk_size);
856
857         if (vdc_version_supported(port, 1, 1)) {
858                 switch (port->vdisk_mtype) {
859                 case VD_MEDIA_TYPE_CD:
860                         pr_info(PFX "Virtual CDROM %s\n", port->disk_name);
861                         g->flags |= GENHD_FL_REMOVABLE;
862                         set_disk_ro(g, 1);
863                         break;
864
865                 case VD_MEDIA_TYPE_DVD:
866                         pr_info(PFX "Virtual DVD %s\n", port->disk_name);
867                         g->flags |= GENHD_FL_REMOVABLE;
868                         set_disk_ro(g, 1);
869                         break;
870
871                 case VD_MEDIA_TYPE_FIXED:
872                         pr_info(PFX "Virtual Hard disk %s\n", port->disk_name);
873                         break;
874                 }
875         }
876
877         pr_info(PFX "%s: %u sectors (%u MB) protocol %d.%d\n",
878                g->disk_name,
879                port->vdisk_size, (port->vdisk_size >> (20 - 9)),
880                port->vio.ver.major, port->vio.ver.minor);
881
882         err = device_add_disk(&port->vio.vdev->dev, g, NULL);
883         if (err)
884                 goto out_cleanup_disk;
885
886         return 0;
887
888 out_cleanup_disk:
889         put_disk(g);
890 out_free_tag:
891         blk_mq_free_tag_set(&port->tag_set);
892         return err;
893 }
894
895 static struct ldc_channel_config vdc_ldc_cfg = {
896         .event          = vdc_event,
897         .mtu            = 64,
898         .mode           = LDC_MODE_UNRELIABLE,
899 };
900
901 static struct vio_driver_ops vdc_vio_ops = {
902         .send_attr              = vdc_send_attr,
903         .handle_attr            = vdc_handle_attr,
904         .handshake_complete     = vdc_handshake_complete,
905 };
906
907 static void print_version(void)
908 {
909         static int version_printed;
910
911         if (version_printed++ == 0)
912                 printk(KERN_INFO "%s", version);
913 }
914
915 struct vdc_check_port_data {
916         int     dev_no;
917         char    *type;
918 };
919
920 static int vdc_device_probed(struct device *dev, void *arg)
921 {
922         struct vio_dev *vdev = to_vio_dev(dev);
923         struct vdc_check_port_data *port_data;
924
925         port_data = (struct vdc_check_port_data *)arg;
926
927         if ((vdev->dev_no == port_data->dev_no) &&
928             (!(strcmp((char *)&vdev->type, port_data->type))) &&
929                 dev_get_drvdata(dev)) {
930                 /* This device has already been configured
931                  * by vdc_port_probe()
932                  */
933                 return 1;
934         } else {
935                 return 0;
936         }
937 }
938
939 /* Determine whether the VIO device is part of an mpgroup
940  * by locating all the virtual-device-port nodes associated
941  * with the parent virtual-device node for the VIO device
942  * and checking whether any of these nodes are vdc-ports
943  * which have already been configured.
944  *
945  * Returns true if this device is part of an mpgroup and has
946  * already been probed.
947  */
948 static bool vdc_port_mpgroup_check(struct vio_dev *vdev)
949 {
950         struct vdc_check_port_data port_data;
951         struct device *dev;
952
953         port_data.dev_no = vdev->dev_no;
954         port_data.type = (char *)&vdev->type;
955
956         dev = device_find_child(vdev->dev.parent, &port_data,
957                                 vdc_device_probed);
958
959         if (dev)
960                 return true;
961
962         return false;
963 }
964
965 static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
966 {
967         struct mdesc_handle *hp;
968         struct vdc_port *port;
969         int err;
970         const u64 *ldc_timeout;
971
972         print_version();
973
974         hp = mdesc_grab();
975         if (!hp)
976                 return -ENODEV;
977
978         err = -ENODEV;
979         if ((vdev->dev_no << PARTITION_SHIFT) & ~(u64)MINORMASK) {
980                 printk(KERN_ERR PFX "Port id [%llu] too large.\n",
981                        vdev->dev_no);
982                 goto err_out_release_mdesc;
983         }
984
985         /* Check if this device is part of an mpgroup */
986         if (vdc_port_mpgroup_check(vdev)) {
987                 printk(KERN_WARNING
988                         "VIO: Ignoring extra vdisk port %s",
989                         dev_name(&vdev->dev));
990                 goto err_out_release_mdesc;
991         }
992
993         port = kzalloc(sizeof(*port), GFP_KERNEL);
994         if (!port) {
995                 err = -ENOMEM;
996                 goto err_out_release_mdesc;
997         }
998
999         if (vdev->dev_no >= 26)
1000                 snprintf(port->disk_name, sizeof(port->disk_name),
1001                          VDCBLK_NAME "%c%c",
1002                          'a' + ((int)vdev->dev_no / 26) - 1,
1003                          'a' + ((int)vdev->dev_no % 26));
1004         else
1005                 snprintf(port->disk_name, sizeof(port->disk_name),
1006                          VDCBLK_NAME "%c", 'a' + ((int)vdev->dev_no % 26));
1007         port->vdisk_size = -1;
1008
1009         /* Actual wall time may be double due to do_generic_file_read() doing
1010          * a readahead I/O first, and once that fails it will try to read a
1011          * single page.
1012          */
1013         ldc_timeout = mdesc_get_property(hp, vdev->mp, "vdc-timeout", NULL);
1014         port->ldc_timeout = ldc_timeout ? *ldc_timeout : 0;
1015         INIT_DELAYED_WORK(&port->ldc_reset_timer_work, vdc_ldc_reset_timer_work);
1016         INIT_WORK(&port->ldc_reset_work, vdc_ldc_reset_work);
1017
1018         err = vio_driver_init(&port->vio, vdev, VDEV_DISK,
1019                               vdc_versions, ARRAY_SIZE(vdc_versions),
1020                               &vdc_vio_ops, port->disk_name);
1021         if (err)
1022                 goto err_out_free_port;
1023
1024         port->vdisk_block_size = VDC_DEFAULT_BLK_SIZE;
1025         port->max_xfer_size = MAX_XFER_SIZE;
1026         port->ring_cookies = MAX_RING_COOKIES;
1027
1028         err = vio_ldc_alloc(&port->vio, &vdc_ldc_cfg, port);
1029         if (err)
1030                 goto err_out_free_port;
1031
1032         err = vdc_alloc_tx_ring(port);
1033         if (err)
1034                 goto err_out_free_ldc;
1035
1036         err = probe_disk(port);
1037         if (err)
1038                 goto err_out_free_tx_ring;
1039
1040         /* Note that the device driver_data is used to determine
1041          * whether the port has been probed.
1042          */
1043         dev_set_drvdata(&vdev->dev, port);
1044
1045         mdesc_release(hp);
1046
1047         return 0;
1048
1049 err_out_free_tx_ring:
1050         vdc_free_tx_ring(port);
1051
1052 err_out_free_ldc:
1053         vio_ldc_free(&port->vio);
1054
1055 err_out_free_port:
1056         kfree(port);
1057
1058 err_out_release_mdesc:
1059         mdesc_release(hp);
1060         return err;
1061 }
1062
1063 static void vdc_port_remove(struct vio_dev *vdev)
1064 {
1065         struct vdc_port *port = dev_get_drvdata(&vdev->dev);
1066
1067         if (port) {
1068                 blk_mq_stop_hw_queues(port->disk->queue);
1069
1070                 flush_work(&port->ldc_reset_work);
1071                 cancel_delayed_work_sync(&port->ldc_reset_timer_work);
1072                 del_timer_sync(&port->vio.timer);
1073
1074                 del_gendisk(port->disk);
1075                 put_disk(port->disk);
1076                 blk_mq_free_tag_set(&port->tag_set);
1077
1078                 vdc_free_tx_ring(port);
1079                 vio_ldc_free(&port->vio);
1080
1081                 dev_set_drvdata(&vdev->dev, NULL);
1082
1083                 kfree(port);
1084         }
1085 }
1086
1087 static void vdc_requeue_inflight(struct vdc_port *port)
1088 {
1089         struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
1090         u32 idx;
1091
1092         for (idx = dr->cons; idx != dr->prod; idx = vio_dring_next(dr, idx)) {
1093                 struct vio_disk_desc *desc = vio_dring_entry(dr, idx);
1094                 struct vdc_req_entry *rqe = &port->rq_arr[idx];
1095                 struct request *req;
1096
1097                 ldc_unmap(port->vio.lp, desc->cookies, desc->ncookies);
1098                 desc->hdr.state = VIO_DESC_FREE;
1099                 dr->cons = vio_dring_next(dr, idx);
1100
1101                 req = rqe->req;
1102                 if (req == NULL) {
1103                         vdc_end_special(port, desc);
1104                         continue;
1105                 }
1106
1107                 rqe->req = NULL;
1108                 blk_mq_requeue_request(req, false);
1109         }
1110 }
1111
1112 static void vdc_queue_drain(struct vdc_port *port)
1113 {
1114         struct request_queue *q = port->disk->queue;
1115
1116         /*
1117          * Mark the queue as draining, then freeze/quiesce to ensure
1118          * that all existing requests are seen in ->queue_rq() and killed
1119          */
1120         port->drain = 1;
1121         spin_unlock_irq(&port->vio.lock);
1122
1123         blk_mq_freeze_queue(q);
1124         blk_mq_quiesce_queue(q);
1125
1126         spin_lock_irq(&port->vio.lock);
1127         port->drain = 0;
1128         blk_mq_unquiesce_queue(q);
1129         blk_mq_unfreeze_queue(q);
1130 }
1131
1132 static void vdc_ldc_reset_timer_work(struct work_struct *work)
1133 {
1134         struct vdc_port *port;
1135         struct vio_driver_state *vio;
1136
1137         port = container_of(work, struct vdc_port, ldc_reset_timer_work.work);
1138         vio = &port->vio;
1139
1140         spin_lock_irq(&vio->lock);
1141         if (!(port->vio.hs_state & VIO_HS_COMPLETE)) {
1142                 pr_warn(PFX "%s ldc down %llu seconds, draining queue\n",
1143                         port->disk_name, port->ldc_timeout);
1144                 vdc_queue_drain(port);
1145                 vdc_blk_queue_start(port);
1146         }
1147         spin_unlock_irq(&vio->lock);
1148 }
1149
1150 static void vdc_ldc_reset_work(struct work_struct *work)
1151 {
1152         struct vdc_port *port;
1153         struct vio_driver_state *vio;
1154         unsigned long flags;
1155
1156         port = container_of(work, struct vdc_port, ldc_reset_work);
1157         vio = &port->vio;
1158
1159         spin_lock_irqsave(&vio->lock, flags);
1160         vdc_ldc_reset(port);
1161         spin_unlock_irqrestore(&vio->lock, flags);
1162 }
1163
1164 static void vdc_ldc_reset(struct vdc_port *port)
1165 {
1166         int err;
1167
1168         assert_spin_locked(&port->vio.lock);
1169
1170         pr_warn(PFX "%s ldc link reset\n", port->disk_name);
1171         blk_mq_stop_hw_queues(port->disk->queue);
1172         vdc_requeue_inflight(port);
1173         vdc_port_down(port);
1174
1175         err = vio_ldc_alloc(&port->vio, &vdc_ldc_cfg, port);
1176         if (err) {
1177                 pr_err(PFX "%s vio_ldc_alloc:%d\n", port->disk_name, err);
1178                 return;
1179         }
1180
1181         err = vdc_alloc_tx_ring(port);
1182         if (err) {
1183                 pr_err(PFX "%s vio_alloc_tx_ring:%d\n", port->disk_name, err);
1184                 goto err_free_ldc;
1185         }
1186
1187         if (port->ldc_timeout)
1188                 mod_delayed_work(system_wq, &port->ldc_reset_timer_work,
1189                           round_jiffies(jiffies + HZ * port->ldc_timeout));
1190         mod_timer(&port->vio.timer, round_jiffies(jiffies + HZ));
1191         return;
1192
1193 err_free_ldc:
1194         vio_ldc_free(&port->vio);
1195 }
1196
1197 static const struct vio_device_id vdc_port_match[] = {
1198         {
1199                 .type = "vdc-port",
1200         },
1201         {},
1202 };
1203 MODULE_DEVICE_TABLE(vio, vdc_port_match);
1204
1205 static struct vio_driver vdc_port_driver = {
1206         .id_table       = vdc_port_match,
1207         .probe          = vdc_port_probe,
1208         .remove         = vdc_port_remove,
1209         .name           = "vdc_port",
1210 };
1211
1212 static int __init vdc_init(void)
1213 {
1214         int err;
1215
1216         sunvdc_wq = alloc_workqueue("sunvdc", 0, 0);
1217         if (!sunvdc_wq)
1218                 return -ENOMEM;
1219
1220         err = register_blkdev(0, VDCBLK_NAME);
1221         if (err < 0)
1222                 goto out_free_wq;
1223
1224         vdc_major = err;
1225
1226         err = vio_register_driver(&vdc_port_driver);
1227         if (err)
1228                 goto out_unregister_blkdev;
1229
1230         return 0;
1231
1232 out_unregister_blkdev:
1233         unregister_blkdev(vdc_major, VDCBLK_NAME);
1234         vdc_major = 0;
1235
1236 out_free_wq:
1237         destroy_workqueue(sunvdc_wq);
1238         return err;
1239 }
1240
1241 static void __exit vdc_exit(void)
1242 {
1243         vio_unregister_driver(&vdc_port_driver);
1244         unregister_blkdev(vdc_major, VDCBLK_NAME);
1245         destroy_workqueue(sunvdc_wq);
1246 }
1247
1248 module_init(vdc_init);
1249 module_exit(vdc_exit);