Merge tag 'x86_bugs_pbrsb' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
[linux-2.6-microblaze.git] / drivers / block / sunvdc.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* sunvdc.c: Sun LDOM Virtual Disk Client.
3  *
4  * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
5  */
6
7 #include <linux/module.h>
8 #include <linux/kernel.h>
9 #include <linux/types.h>
10 #include <linux/blk-mq.h>
11 #include <linux/hdreg.h>
12 #include <linux/cdrom.h>
13 #include <linux/slab.h>
14 #include <linux/spinlock.h>
15 #include <linux/completion.h>
16 #include <linux/delay.h>
17 #include <linux/init.h>
18 #include <linux/list.h>
19 #include <linux/scatterlist.h>
20
21 #include <asm/vio.h>
22 #include <asm/ldc.h>
23
24 #define DRV_MODULE_NAME         "sunvdc"
25 #define PFX DRV_MODULE_NAME     ": "
26 #define DRV_MODULE_VERSION      "1.2"
27 #define DRV_MODULE_RELDATE      "November 24, 2014"
28
29 static char version[] =
30         DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
31 MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
32 MODULE_DESCRIPTION("Sun LDOM virtual disk client driver");
33 MODULE_LICENSE("GPL");
34 MODULE_VERSION(DRV_MODULE_VERSION);
35
36 #define VDC_TX_RING_SIZE        512
37 #define VDC_DEFAULT_BLK_SIZE    512
38
39 #define MAX_XFER_BLKS           (128 * 1024)
40 #define MAX_XFER_SIZE           (MAX_XFER_BLKS / VDC_DEFAULT_BLK_SIZE)
41 #define MAX_RING_COOKIES        ((MAX_XFER_BLKS / PAGE_SIZE) + 2)
42
43 #define WAITING_FOR_LINK_UP     0x01
44 #define WAITING_FOR_TX_SPACE    0x02
45 #define WAITING_FOR_GEN_CMD     0x04
46 #define WAITING_FOR_ANY         -1
47
48 #define VDC_MAX_RETRIES 10
49
50 static struct workqueue_struct *sunvdc_wq;
51
52 struct vdc_req_entry {
53         struct request          *req;
54 };
55
56 struct vdc_port {
57         struct vio_driver_state vio;
58
59         struct gendisk          *disk;
60
61         struct vdc_completion   *cmp;
62
63         u64                     req_id;
64         u64                     seq;
65         struct vdc_req_entry    rq_arr[VDC_TX_RING_SIZE];
66
67         unsigned long           ring_cookies;
68
69         u64                     max_xfer_size;
70         u32                     vdisk_block_size;
71         u32                     drain;
72
73         u64                     ldc_timeout;
74         struct delayed_work     ldc_reset_timer_work;
75         struct work_struct      ldc_reset_work;
76
77         /* The server fills these in for us in the disk attribute
78          * ACK packet.
79          */
80         u64                     operations;
81         u32                     vdisk_size;
82         u8                      vdisk_type;
83         u8                      vdisk_mtype;
84         u32                     vdisk_phys_blksz;
85
86         struct blk_mq_tag_set   tag_set;
87
88         char                    disk_name[32];
89 };
90
91 static void vdc_ldc_reset(struct vdc_port *port);
92 static void vdc_ldc_reset_work(struct work_struct *work);
93 static void vdc_ldc_reset_timer_work(struct work_struct *work);
94
95 static inline struct vdc_port *to_vdc_port(struct vio_driver_state *vio)
96 {
97         return container_of(vio, struct vdc_port, vio);
98 }
99
100 /* Ordered from largest major to lowest */
101 static struct vio_version vdc_versions[] = {
102         { .major = 1, .minor = 2 },
103         { .major = 1, .minor = 1 },
104         { .major = 1, .minor = 0 },
105 };
106
107 static inline int vdc_version_supported(struct vdc_port *port,
108                                         u16 major, u16 minor)
109 {
110         return port->vio.ver.major == major && port->vio.ver.minor >= minor;
111 }
112
113 #define VDCBLK_NAME     "vdisk"
114 static int vdc_major;
115 #define PARTITION_SHIFT 3
116
117 static inline u32 vdc_tx_dring_avail(struct vio_dring_state *dr)
118 {
119         return vio_dring_avail(dr, VDC_TX_RING_SIZE);
120 }
121
122 static int vdc_getgeo(struct block_device *bdev, struct hd_geometry *geo)
123 {
124         struct gendisk *disk = bdev->bd_disk;
125         sector_t nsect = get_capacity(disk);
126         sector_t cylinders = nsect;
127
128         geo->heads = 0xff;
129         geo->sectors = 0x3f;
130         sector_div(cylinders, geo->heads * geo->sectors);
131         geo->cylinders = cylinders;
132         if ((sector_t)(geo->cylinders + 1) * geo->heads * geo->sectors < nsect)
133                 geo->cylinders = 0xffff;
134
135         return 0;
136 }
137
138 /* Add ioctl/CDROM_GET_CAPABILITY to support cdrom_id in udev
139  * when vdisk_mtype is VD_MEDIA_TYPE_CD or VD_MEDIA_TYPE_DVD.
140  * Needed to be able to install inside an ldom from an iso image.
141  */
142 static int vdc_ioctl(struct block_device *bdev, fmode_t mode,
143                      unsigned command, unsigned long argument)
144 {
145         struct vdc_port *port = bdev->bd_disk->private_data;
146         int i;
147
148         switch (command) {
149         case CDROMMULTISESSION:
150                 pr_debug(PFX "Multisession CDs not supported\n");
151                 for (i = 0; i < sizeof(struct cdrom_multisession); i++)
152                         if (put_user(0, (char __user *)(argument + i)))
153                                 return -EFAULT;
154                 return 0;
155
156         case CDROM_GET_CAPABILITY:
157                 if (!vdc_version_supported(port, 1, 1))
158                         return -EINVAL;
159                 switch (port->vdisk_mtype) {
160                 case VD_MEDIA_TYPE_CD:
161                 case VD_MEDIA_TYPE_DVD:
162                         return 0;
163                 default:
164                         return -EINVAL;
165                 }
166         default:
167                 pr_debug(PFX "ioctl %08x not supported\n", command);
168                 return -EINVAL;
169         }
170 }
171
172 static const struct block_device_operations vdc_fops = {
173         .owner          = THIS_MODULE,
174         .getgeo         = vdc_getgeo,
175         .ioctl          = vdc_ioctl,
176         .compat_ioctl   = blkdev_compat_ptr_ioctl,
177 };
178
179 static void vdc_blk_queue_start(struct vdc_port *port)
180 {
181         struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
182
183         /* restart blk queue when ring is half emptied. also called after
184          * handshake completes, so check for initial handshake before we've
185          * allocated a disk.
186          */
187         if (port->disk && vdc_tx_dring_avail(dr) * 100 / VDC_TX_RING_SIZE >= 50)
188                 blk_mq_start_stopped_hw_queues(port->disk->queue, true);
189 }
190
191 static void vdc_finish(struct vio_driver_state *vio, int err, int waiting_for)
192 {
193         if (vio->cmp &&
194             (waiting_for == -1 ||
195              vio->cmp->waiting_for == waiting_for)) {
196                 vio->cmp->err = err;
197                 complete(&vio->cmp->com);
198                 vio->cmp = NULL;
199         }
200 }
201
202 static void vdc_handshake_complete(struct vio_driver_state *vio)
203 {
204         struct vdc_port *port = to_vdc_port(vio);
205
206         cancel_delayed_work(&port->ldc_reset_timer_work);
207         vdc_finish(vio, 0, WAITING_FOR_LINK_UP);
208         vdc_blk_queue_start(port);
209 }
210
211 static int vdc_handle_unknown(struct vdc_port *port, void *arg)
212 {
213         struct vio_msg_tag *pkt = arg;
214
215         printk(KERN_ERR PFX "Received unknown msg [%02x:%02x:%04x:%08x]\n",
216                pkt->type, pkt->stype, pkt->stype_env, pkt->sid);
217         printk(KERN_ERR PFX "Resetting connection.\n");
218
219         ldc_disconnect(port->vio.lp);
220
221         return -ECONNRESET;
222 }
223
224 static int vdc_send_attr(struct vio_driver_state *vio)
225 {
226         struct vdc_port *port = to_vdc_port(vio);
227         struct vio_disk_attr_info pkt;
228
229         memset(&pkt, 0, sizeof(pkt));
230
231         pkt.tag.type = VIO_TYPE_CTRL;
232         pkt.tag.stype = VIO_SUBTYPE_INFO;
233         pkt.tag.stype_env = VIO_ATTR_INFO;
234         pkt.tag.sid = vio_send_sid(vio);
235
236         pkt.xfer_mode = VIO_DRING_MODE;
237         pkt.vdisk_block_size = port->vdisk_block_size;
238         pkt.max_xfer_size = port->max_xfer_size;
239
240         viodbg(HS, "SEND ATTR xfer_mode[0x%x] blksz[%u] max_xfer[%llu]\n",
241                pkt.xfer_mode, pkt.vdisk_block_size, pkt.max_xfer_size);
242
243         return vio_ldc_send(&port->vio, &pkt, sizeof(pkt));
244 }
245
246 static int vdc_handle_attr(struct vio_driver_state *vio, void *arg)
247 {
248         struct vdc_port *port = to_vdc_port(vio);
249         struct vio_disk_attr_info *pkt = arg;
250
251         viodbg(HS, "GOT ATTR stype[0x%x] ops[%llx] disk_size[%llu] disk_type[%x] "
252                "mtype[0x%x] xfer_mode[0x%x] blksz[%u] max_xfer[%llu]\n",
253                pkt->tag.stype, pkt->operations,
254                pkt->vdisk_size, pkt->vdisk_type, pkt->vdisk_mtype,
255                pkt->xfer_mode, pkt->vdisk_block_size,
256                pkt->max_xfer_size);
257
258         if (pkt->tag.stype == VIO_SUBTYPE_ACK) {
259                 switch (pkt->vdisk_type) {
260                 case VD_DISK_TYPE_DISK:
261                 case VD_DISK_TYPE_SLICE:
262                         break;
263
264                 default:
265                         printk(KERN_ERR PFX "%s: Bogus vdisk_type 0x%x\n",
266                                vio->name, pkt->vdisk_type);
267                         return -ECONNRESET;
268                 }
269
270                 if (pkt->vdisk_block_size > port->vdisk_block_size) {
271                         printk(KERN_ERR PFX "%s: BLOCK size increased "
272                                "%u --> %u\n",
273                                vio->name,
274                                port->vdisk_block_size, pkt->vdisk_block_size);
275                         return -ECONNRESET;
276                 }
277
278                 port->operations = pkt->operations;
279                 port->vdisk_type = pkt->vdisk_type;
280                 if (vdc_version_supported(port, 1, 1)) {
281                         port->vdisk_size = pkt->vdisk_size;
282                         port->vdisk_mtype = pkt->vdisk_mtype;
283                 }
284                 if (pkt->max_xfer_size < port->max_xfer_size)
285                         port->max_xfer_size = pkt->max_xfer_size;
286                 port->vdisk_block_size = pkt->vdisk_block_size;
287
288                 port->vdisk_phys_blksz = VDC_DEFAULT_BLK_SIZE;
289                 if (vdc_version_supported(port, 1, 2))
290                         port->vdisk_phys_blksz = pkt->phys_block_size;
291
292                 return 0;
293         } else {
294                 printk(KERN_ERR PFX "%s: Attribute NACK\n", vio->name);
295
296                 return -ECONNRESET;
297         }
298 }
299
300 static void vdc_end_special(struct vdc_port *port, struct vio_disk_desc *desc)
301 {
302         int err = desc->status;
303
304         vdc_finish(&port->vio, -err, WAITING_FOR_GEN_CMD);
305 }
306
307 static void vdc_end_one(struct vdc_port *port, struct vio_dring_state *dr,
308                         unsigned int index)
309 {
310         struct vio_disk_desc *desc = vio_dring_entry(dr, index);
311         struct vdc_req_entry *rqe = &port->rq_arr[index];
312         struct request *req;
313
314         if (unlikely(desc->hdr.state != VIO_DESC_DONE))
315                 return;
316
317         ldc_unmap(port->vio.lp, desc->cookies, desc->ncookies);
318         desc->hdr.state = VIO_DESC_FREE;
319         dr->cons = vio_dring_next(dr, index);
320
321         req = rqe->req;
322         if (req == NULL) {
323                 vdc_end_special(port, desc);
324                 return;
325         }
326
327         rqe->req = NULL;
328
329         blk_mq_end_request(req, desc->status ? BLK_STS_IOERR : 0);
330
331         vdc_blk_queue_start(port);
332 }
333
334 static int vdc_ack(struct vdc_port *port, void *msgbuf)
335 {
336         struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
337         struct vio_dring_data *pkt = msgbuf;
338
339         if (unlikely(pkt->dring_ident != dr->ident ||
340                      pkt->start_idx != pkt->end_idx ||
341                      pkt->start_idx >= VDC_TX_RING_SIZE))
342                 return 0;
343
344         vdc_end_one(port, dr, pkt->start_idx);
345
346         return 0;
347 }
348
349 static int vdc_nack(struct vdc_port *port, void *msgbuf)
350 {
351         /* XXX Implement me XXX */
352         return 0;
353 }
354
355 static void vdc_event(void *arg, int event)
356 {
357         struct vdc_port *port = arg;
358         struct vio_driver_state *vio = &port->vio;
359         unsigned long flags;
360         int err;
361
362         spin_lock_irqsave(&vio->lock, flags);
363
364         if (unlikely(event == LDC_EVENT_RESET)) {
365                 vio_link_state_change(vio, event);
366                 queue_work(sunvdc_wq, &port->ldc_reset_work);
367                 goto out;
368         }
369
370         if (unlikely(event == LDC_EVENT_UP)) {
371                 vio_link_state_change(vio, event);
372                 goto out;
373         }
374
375         if (unlikely(event != LDC_EVENT_DATA_READY)) {
376                 pr_warn(PFX "Unexpected LDC event %d\n", event);
377                 goto out;
378         }
379
380         err = 0;
381         while (1) {
382                 union {
383                         struct vio_msg_tag tag;
384                         u64 raw[8];
385                 } msgbuf;
386
387                 err = ldc_read(vio->lp, &msgbuf, sizeof(msgbuf));
388                 if (unlikely(err < 0)) {
389                         if (err == -ECONNRESET)
390                                 vio_conn_reset(vio);
391                         break;
392                 }
393                 if (err == 0)
394                         break;
395                 viodbg(DATA, "TAG [%02x:%02x:%04x:%08x]\n",
396                        msgbuf.tag.type,
397                        msgbuf.tag.stype,
398                        msgbuf.tag.stype_env,
399                        msgbuf.tag.sid);
400                 err = vio_validate_sid(vio, &msgbuf.tag);
401                 if (err < 0)
402                         break;
403
404                 if (likely(msgbuf.tag.type == VIO_TYPE_DATA)) {
405                         if (msgbuf.tag.stype == VIO_SUBTYPE_ACK)
406                                 err = vdc_ack(port, &msgbuf);
407                         else if (msgbuf.tag.stype == VIO_SUBTYPE_NACK)
408                                 err = vdc_nack(port, &msgbuf);
409                         else
410                                 err = vdc_handle_unknown(port, &msgbuf);
411                 } else if (msgbuf.tag.type == VIO_TYPE_CTRL) {
412                         err = vio_control_pkt_engine(vio, &msgbuf);
413                 } else {
414                         err = vdc_handle_unknown(port, &msgbuf);
415                 }
416                 if (err < 0)
417                         break;
418         }
419         if (err < 0)
420                 vdc_finish(&port->vio, err, WAITING_FOR_ANY);
421 out:
422         spin_unlock_irqrestore(&vio->lock, flags);
423 }
424
425 static int __vdc_tx_trigger(struct vdc_port *port)
426 {
427         struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
428         struct vio_dring_data hdr = {
429                 .tag = {
430                         .type           = VIO_TYPE_DATA,
431                         .stype          = VIO_SUBTYPE_INFO,
432                         .stype_env      = VIO_DRING_DATA,
433                         .sid            = vio_send_sid(&port->vio),
434                 },
435                 .dring_ident            = dr->ident,
436                 .start_idx              = dr->prod,
437                 .end_idx                = dr->prod,
438         };
439         int err, delay;
440         int retries = 0;
441
442         hdr.seq = dr->snd_nxt;
443         delay = 1;
444         do {
445                 err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr));
446                 if (err > 0) {
447                         dr->snd_nxt++;
448                         break;
449                 }
450                 udelay(delay);
451                 if ((delay <<= 1) > 128)
452                         delay = 128;
453                 if (retries++ > VDC_MAX_RETRIES)
454                         break;
455         } while (err == -EAGAIN);
456
457         if (err == -ENOTCONN)
458                 vdc_ldc_reset(port);
459         return err;
460 }
461
462 static int __send_request(struct request *req)
463 {
464         struct vdc_port *port = req->q->disk->private_data;
465         struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
466         struct scatterlist sg[MAX_RING_COOKIES];
467         struct vdc_req_entry *rqe;
468         struct vio_disk_desc *desc;
469         unsigned int map_perm;
470         int nsg, err, i;
471         u64 len;
472         u8 op;
473
474         if (WARN_ON(port->ring_cookies > MAX_RING_COOKIES))
475                 return -EINVAL;
476
477         map_perm = LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_IO;
478
479         if (rq_data_dir(req) == READ) {
480                 map_perm |= LDC_MAP_W;
481                 op = VD_OP_BREAD;
482         } else {
483                 map_perm |= LDC_MAP_R;
484                 op = VD_OP_BWRITE;
485         }
486
487         sg_init_table(sg, port->ring_cookies);
488         nsg = blk_rq_map_sg(req->q, req, sg);
489
490         len = 0;
491         for (i = 0; i < nsg; i++)
492                 len += sg[i].length;
493
494         desc = vio_dring_cur(dr);
495
496         err = ldc_map_sg(port->vio.lp, sg, nsg,
497                          desc->cookies, port->ring_cookies,
498                          map_perm);
499         if (err < 0) {
500                 printk(KERN_ERR PFX "ldc_map_sg() failure, err=%d.\n", err);
501                 return err;
502         }
503
504         rqe = &port->rq_arr[dr->prod];
505         rqe->req = req;
506
507         desc->hdr.ack = VIO_ACK_ENABLE;
508         desc->req_id = port->req_id;
509         desc->operation = op;
510         if (port->vdisk_type == VD_DISK_TYPE_DISK) {
511                 desc->slice = 0xff;
512         } else {
513                 desc->slice = 0;
514         }
515         desc->status = ~0;
516         desc->offset = (blk_rq_pos(req) << 9) / port->vdisk_block_size;
517         desc->size = len;
518         desc->ncookies = err;
519
520         /* This has to be a non-SMP write barrier because we are writing
521          * to memory which is shared with the peer LDOM.
522          */
523         wmb();
524         desc->hdr.state = VIO_DESC_READY;
525
526         err = __vdc_tx_trigger(port);
527         if (err < 0) {
528                 printk(KERN_ERR PFX "vdc_tx_trigger() failure, err=%d\n", err);
529         } else {
530                 port->req_id++;
531                 dr->prod = vio_dring_next(dr, dr->prod);
532         }
533
534         return err;
535 }
536
537 static blk_status_t vdc_queue_rq(struct blk_mq_hw_ctx *hctx,
538                                  const struct blk_mq_queue_data *bd)
539 {
540         struct vdc_port *port = hctx->queue->queuedata;
541         struct vio_dring_state *dr;
542         unsigned long flags;
543
544         dr = &port->vio.drings[VIO_DRIVER_TX_RING];
545
546         blk_mq_start_request(bd->rq);
547
548         spin_lock_irqsave(&port->vio.lock, flags);
549
550         /*
551          * Doing drain, just end the request in error
552          */
553         if (unlikely(port->drain)) {
554                 spin_unlock_irqrestore(&port->vio.lock, flags);
555                 return BLK_STS_IOERR;
556         }
557
558         if (unlikely(vdc_tx_dring_avail(dr) < 1)) {
559                 spin_unlock_irqrestore(&port->vio.lock, flags);
560                 blk_mq_stop_hw_queue(hctx);
561                 return BLK_STS_DEV_RESOURCE;
562         }
563
564         if (__send_request(bd->rq) < 0) {
565                 spin_unlock_irqrestore(&port->vio.lock, flags);
566                 return BLK_STS_IOERR;
567         }
568
569         spin_unlock_irqrestore(&port->vio.lock, flags);
570         return BLK_STS_OK;
571 }
572
573 static int generic_request(struct vdc_port *port, u8 op, void *buf, int len)
574 {
575         struct vio_dring_state *dr;
576         struct vio_completion comp;
577         struct vio_disk_desc *desc;
578         unsigned int map_perm;
579         unsigned long flags;
580         int op_len, err;
581         void *req_buf;
582
583         if (!(((u64)1 << (u64)op) & port->operations))
584                 return -EOPNOTSUPP;
585
586         switch (op) {
587         case VD_OP_BREAD:
588         case VD_OP_BWRITE:
589         default:
590                 return -EINVAL;
591
592         case VD_OP_FLUSH:
593                 op_len = 0;
594                 map_perm = 0;
595                 break;
596
597         case VD_OP_GET_WCE:
598                 op_len = sizeof(u32);
599                 map_perm = LDC_MAP_W;
600                 break;
601
602         case VD_OP_SET_WCE:
603                 op_len = sizeof(u32);
604                 map_perm = LDC_MAP_R;
605                 break;
606
607         case VD_OP_GET_VTOC:
608                 op_len = sizeof(struct vio_disk_vtoc);
609                 map_perm = LDC_MAP_W;
610                 break;
611
612         case VD_OP_SET_VTOC:
613                 op_len = sizeof(struct vio_disk_vtoc);
614                 map_perm = LDC_MAP_R;
615                 break;
616
617         case VD_OP_GET_DISKGEOM:
618                 op_len = sizeof(struct vio_disk_geom);
619                 map_perm = LDC_MAP_W;
620                 break;
621
622         case VD_OP_SET_DISKGEOM:
623                 op_len = sizeof(struct vio_disk_geom);
624                 map_perm = LDC_MAP_R;
625                 break;
626
627         case VD_OP_SCSICMD:
628                 op_len = 16;
629                 map_perm = LDC_MAP_RW;
630                 break;
631
632         case VD_OP_GET_DEVID:
633                 op_len = sizeof(struct vio_disk_devid);
634                 map_perm = LDC_MAP_W;
635                 break;
636
637         case VD_OP_GET_EFI:
638         case VD_OP_SET_EFI:
639                 return -EOPNOTSUPP;
640         }
641
642         map_perm |= LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_IO;
643
644         op_len = (op_len + 7) & ~7;
645         req_buf = kzalloc(op_len, GFP_KERNEL);
646         if (!req_buf)
647                 return -ENOMEM;
648
649         if (len > op_len)
650                 len = op_len;
651
652         if (map_perm & LDC_MAP_R)
653                 memcpy(req_buf, buf, len);
654
655         spin_lock_irqsave(&port->vio.lock, flags);
656
657         dr = &port->vio.drings[VIO_DRIVER_TX_RING];
658
659         /* XXX If we want to use this code generically we have to
660          * XXX handle TX ring exhaustion etc.
661          */
662         desc = vio_dring_cur(dr);
663
664         err = ldc_map_single(port->vio.lp, req_buf, op_len,
665                              desc->cookies, port->ring_cookies,
666                              map_perm);
667         if (err < 0) {
668                 spin_unlock_irqrestore(&port->vio.lock, flags);
669                 kfree(req_buf);
670                 return err;
671         }
672
673         init_completion(&comp.com);
674         comp.waiting_for = WAITING_FOR_GEN_CMD;
675         port->vio.cmp = &comp;
676
677         desc->hdr.ack = VIO_ACK_ENABLE;
678         desc->req_id = port->req_id;
679         desc->operation = op;
680         desc->slice = 0;
681         desc->status = ~0;
682         desc->offset = 0;
683         desc->size = op_len;
684         desc->ncookies = err;
685
686         /* This has to be a non-SMP write barrier because we are writing
687          * to memory which is shared with the peer LDOM.
688          */
689         wmb();
690         desc->hdr.state = VIO_DESC_READY;
691
692         err = __vdc_tx_trigger(port);
693         if (err >= 0) {
694                 port->req_id++;
695                 dr->prod = vio_dring_next(dr, dr->prod);
696                 spin_unlock_irqrestore(&port->vio.lock, flags);
697
698                 wait_for_completion(&comp.com);
699                 err = comp.err;
700         } else {
701                 port->vio.cmp = NULL;
702                 spin_unlock_irqrestore(&port->vio.lock, flags);
703         }
704
705         if (map_perm & LDC_MAP_W)
706                 memcpy(buf, req_buf, len);
707
708         kfree(req_buf);
709
710         return err;
711 }
712
713 static int vdc_alloc_tx_ring(struct vdc_port *port)
714 {
715         struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
716         unsigned long len, entry_size;
717         int ncookies;
718         void *dring;
719
720         entry_size = sizeof(struct vio_disk_desc) +
721                 (sizeof(struct ldc_trans_cookie) * port->ring_cookies);
722         len = (VDC_TX_RING_SIZE * entry_size);
723
724         ncookies = VIO_MAX_RING_COOKIES;
725         dring = ldc_alloc_exp_dring(port->vio.lp, len,
726                                     dr->cookies, &ncookies,
727                                     (LDC_MAP_SHADOW |
728                                      LDC_MAP_DIRECT |
729                                      LDC_MAP_RW));
730         if (IS_ERR(dring))
731                 return PTR_ERR(dring);
732
733         dr->base = dring;
734         dr->entry_size = entry_size;
735         dr->num_entries = VDC_TX_RING_SIZE;
736         dr->prod = dr->cons = 0;
737         dr->pending = VDC_TX_RING_SIZE;
738         dr->ncookies = ncookies;
739
740         return 0;
741 }
742
743 static void vdc_free_tx_ring(struct vdc_port *port)
744 {
745         struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
746
747         if (dr->base) {
748                 ldc_free_exp_dring(port->vio.lp, dr->base,
749                                    (dr->entry_size * dr->num_entries),
750                                    dr->cookies, dr->ncookies);
751                 dr->base = NULL;
752                 dr->entry_size = 0;
753                 dr->num_entries = 0;
754                 dr->pending = 0;
755                 dr->ncookies = 0;
756         }
757 }
758
759 static int vdc_port_up(struct vdc_port *port)
760 {
761         struct vio_completion comp;
762
763         init_completion(&comp.com);
764         comp.err = 0;
765         comp.waiting_for = WAITING_FOR_LINK_UP;
766         port->vio.cmp = &comp;
767
768         vio_port_up(&port->vio);
769         wait_for_completion(&comp.com);
770         return comp.err;
771 }
772
773 static void vdc_port_down(struct vdc_port *port)
774 {
775         ldc_disconnect(port->vio.lp);
776         ldc_unbind(port->vio.lp);
777         vdc_free_tx_ring(port);
778         vio_ldc_free(&port->vio);
779 }
780
781 static const struct blk_mq_ops vdc_mq_ops = {
782         .queue_rq       = vdc_queue_rq,
783 };
784
785 static int probe_disk(struct vdc_port *port)
786 {
787         struct request_queue *q;
788         struct gendisk *g;
789         int err;
790
791         err = vdc_port_up(port);
792         if (err)
793                 return err;
794
795         /* Using version 1.2 means vdisk_phys_blksz should be set unless the
796          * disk is reserved by another system.
797          */
798         if (vdc_version_supported(port, 1, 2) && !port->vdisk_phys_blksz)
799                 return -ENODEV;
800
801         if (vdc_version_supported(port, 1, 1)) {
802                 /* vdisk_size should be set during the handshake, if it wasn't
803                  * then the underlying disk is reserved by another system
804                  */
805                 if (port->vdisk_size == -1)
806                         return -ENODEV;
807         } else {
808                 struct vio_disk_geom geom;
809
810                 err = generic_request(port, VD_OP_GET_DISKGEOM,
811                                       &geom, sizeof(geom));
812                 if (err < 0) {
813                         printk(KERN_ERR PFX "VD_OP_GET_DISKGEOM returns "
814                                "error %d\n", err);
815                         return err;
816                 }
817                 port->vdisk_size = ((u64)geom.num_cyl *
818                                     (u64)geom.num_hd *
819                                     (u64)geom.num_sec);
820         }
821
822         err = blk_mq_alloc_sq_tag_set(&port->tag_set, &vdc_mq_ops,
823                         VDC_TX_RING_SIZE, BLK_MQ_F_SHOULD_MERGE);
824         if (err)
825                 return err;
826
827         g = blk_mq_alloc_disk(&port->tag_set, port);
828         if (IS_ERR(g)) {
829                 printk(KERN_ERR PFX "%s: Could not allocate gendisk.\n",
830                        port->vio.name);
831                 err = PTR_ERR(g);
832                 goto out_free_tag;
833         }
834
835         port->disk = g;
836         q = g->queue;
837
838         /* Each segment in a request is up to an aligned page in size. */
839         blk_queue_segment_boundary(q, PAGE_SIZE - 1);
840         blk_queue_max_segment_size(q, PAGE_SIZE);
841
842         blk_queue_max_segments(q, port->ring_cookies);
843         blk_queue_max_hw_sectors(q, port->max_xfer_size);
844         g->major = vdc_major;
845         g->first_minor = port->vio.vdev->dev_no << PARTITION_SHIFT;
846         g->minors = 1 << PARTITION_SHIFT;
847         strcpy(g->disk_name, port->disk_name);
848
849         g->fops = &vdc_fops;
850         g->queue = q;
851         g->private_data = port;
852
853         set_capacity(g, port->vdisk_size);
854
855         if (vdc_version_supported(port, 1, 1)) {
856                 switch (port->vdisk_mtype) {
857                 case VD_MEDIA_TYPE_CD:
858                         pr_info(PFX "Virtual CDROM %s\n", port->disk_name);
859                         g->flags |= GENHD_FL_REMOVABLE;
860                         set_disk_ro(g, 1);
861                         break;
862
863                 case VD_MEDIA_TYPE_DVD:
864                         pr_info(PFX "Virtual DVD %s\n", port->disk_name);
865                         g->flags |= GENHD_FL_REMOVABLE;
866                         set_disk_ro(g, 1);
867                         break;
868
869                 case VD_MEDIA_TYPE_FIXED:
870                         pr_info(PFX "Virtual Hard disk %s\n", port->disk_name);
871                         break;
872                 }
873         }
874
875         blk_queue_physical_block_size(q, port->vdisk_phys_blksz);
876
877         pr_info(PFX "%s: %u sectors (%u MB) protocol %d.%d\n",
878                g->disk_name,
879                port->vdisk_size, (port->vdisk_size >> (20 - 9)),
880                port->vio.ver.major, port->vio.ver.minor);
881
882         err = device_add_disk(&port->vio.vdev->dev, g, NULL);
883         if (err)
884                 goto out_cleanup_disk;
885
886         return 0;
887
888 out_cleanup_disk:
889         put_disk(g);
890 out_free_tag:
891         blk_mq_free_tag_set(&port->tag_set);
892         return err;
893 }
894
895 static struct ldc_channel_config vdc_ldc_cfg = {
896         .event          = vdc_event,
897         .mtu            = 64,
898         .mode           = LDC_MODE_UNRELIABLE,
899 };
900
901 static struct vio_driver_ops vdc_vio_ops = {
902         .send_attr              = vdc_send_attr,
903         .handle_attr            = vdc_handle_attr,
904         .handshake_complete     = vdc_handshake_complete,
905 };
906
907 static void print_version(void)
908 {
909         static int version_printed;
910
911         if (version_printed++ == 0)
912                 printk(KERN_INFO "%s", version);
913 }
914
915 struct vdc_check_port_data {
916         int     dev_no;
917         char    *type;
918 };
919
920 static int vdc_device_probed(struct device *dev, void *arg)
921 {
922         struct vio_dev *vdev = to_vio_dev(dev);
923         struct vdc_check_port_data *port_data;
924
925         port_data = (struct vdc_check_port_data *)arg;
926
927         if ((vdev->dev_no == port_data->dev_no) &&
928             (!(strcmp((char *)&vdev->type, port_data->type))) &&
929                 dev_get_drvdata(dev)) {
930                 /* This device has already been configured
931                  * by vdc_port_probe()
932                  */
933                 return 1;
934         } else {
935                 return 0;
936         }
937 }
938
939 /* Determine whether the VIO device is part of an mpgroup
940  * by locating all the virtual-device-port nodes associated
941  * with the parent virtual-device node for the VIO device
942  * and checking whether any of these nodes are vdc-ports
943  * which have already been configured.
944  *
945  * Returns true if this device is part of an mpgroup and has
946  * already been probed.
947  */
948 static bool vdc_port_mpgroup_check(struct vio_dev *vdev)
949 {
950         struct vdc_check_port_data port_data;
951         struct device *dev;
952
953         port_data.dev_no = vdev->dev_no;
954         port_data.type = (char *)&vdev->type;
955
956         dev = device_find_child(vdev->dev.parent, &port_data,
957                                 vdc_device_probed);
958
959         if (dev)
960                 return true;
961
962         return false;
963 }
964
965 static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
966 {
967         struct mdesc_handle *hp;
968         struct vdc_port *port;
969         int err;
970         const u64 *ldc_timeout;
971
972         print_version();
973
974         hp = mdesc_grab();
975
976         err = -ENODEV;
977         if ((vdev->dev_no << PARTITION_SHIFT) & ~(u64)MINORMASK) {
978                 printk(KERN_ERR PFX "Port id [%llu] too large.\n",
979                        vdev->dev_no);
980                 goto err_out_release_mdesc;
981         }
982
983         /* Check if this device is part of an mpgroup */
984         if (vdc_port_mpgroup_check(vdev)) {
985                 printk(KERN_WARNING
986                         "VIO: Ignoring extra vdisk port %s",
987                         dev_name(&vdev->dev));
988                 goto err_out_release_mdesc;
989         }
990
991         port = kzalloc(sizeof(*port), GFP_KERNEL);
992         if (!port) {
993                 err = -ENOMEM;
994                 goto err_out_release_mdesc;
995         }
996
997         if (vdev->dev_no >= 26)
998                 snprintf(port->disk_name, sizeof(port->disk_name),
999                          VDCBLK_NAME "%c%c",
1000                          'a' + ((int)vdev->dev_no / 26) - 1,
1001                          'a' + ((int)vdev->dev_no % 26));
1002         else
1003                 snprintf(port->disk_name, sizeof(port->disk_name),
1004                          VDCBLK_NAME "%c", 'a' + ((int)vdev->dev_no % 26));
1005         port->vdisk_size = -1;
1006
1007         /* Actual wall time may be double due to do_generic_file_read() doing
1008          * a readahead I/O first, and once that fails it will try to read a
1009          * single page.
1010          */
1011         ldc_timeout = mdesc_get_property(hp, vdev->mp, "vdc-timeout", NULL);
1012         port->ldc_timeout = ldc_timeout ? *ldc_timeout : 0;
1013         INIT_DELAYED_WORK(&port->ldc_reset_timer_work, vdc_ldc_reset_timer_work);
1014         INIT_WORK(&port->ldc_reset_work, vdc_ldc_reset_work);
1015
1016         err = vio_driver_init(&port->vio, vdev, VDEV_DISK,
1017                               vdc_versions, ARRAY_SIZE(vdc_versions),
1018                               &vdc_vio_ops, port->disk_name);
1019         if (err)
1020                 goto err_out_free_port;
1021
1022         port->vdisk_block_size = VDC_DEFAULT_BLK_SIZE;
1023         port->max_xfer_size = MAX_XFER_SIZE;
1024         port->ring_cookies = MAX_RING_COOKIES;
1025
1026         err = vio_ldc_alloc(&port->vio, &vdc_ldc_cfg, port);
1027         if (err)
1028                 goto err_out_free_port;
1029
1030         err = vdc_alloc_tx_ring(port);
1031         if (err)
1032                 goto err_out_free_ldc;
1033
1034         err = probe_disk(port);
1035         if (err)
1036                 goto err_out_free_tx_ring;
1037
1038         /* Note that the device driver_data is used to determine
1039          * whether the port has been probed.
1040          */
1041         dev_set_drvdata(&vdev->dev, port);
1042
1043         mdesc_release(hp);
1044
1045         return 0;
1046
1047 err_out_free_tx_ring:
1048         vdc_free_tx_ring(port);
1049
1050 err_out_free_ldc:
1051         vio_ldc_free(&port->vio);
1052
1053 err_out_free_port:
1054         kfree(port);
1055
1056 err_out_release_mdesc:
1057         mdesc_release(hp);
1058         return err;
1059 }
1060
1061 static void vdc_port_remove(struct vio_dev *vdev)
1062 {
1063         struct vdc_port *port = dev_get_drvdata(&vdev->dev);
1064
1065         if (port) {
1066                 blk_mq_stop_hw_queues(port->disk->queue);
1067
1068                 flush_work(&port->ldc_reset_work);
1069                 cancel_delayed_work_sync(&port->ldc_reset_timer_work);
1070                 del_timer_sync(&port->vio.timer);
1071
1072                 del_gendisk(port->disk);
1073                 put_disk(port->disk);
1074                 blk_mq_free_tag_set(&port->tag_set);
1075
1076                 vdc_free_tx_ring(port);
1077                 vio_ldc_free(&port->vio);
1078
1079                 dev_set_drvdata(&vdev->dev, NULL);
1080
1081                 kfree(port);
1082         }
1083 }
1084
1085 static void vdc_requeue_inflight(struct vdc_port *port)
1086 {
1087         struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
1088         u32 idx;
1089
1090         for (idx = dr->cons; idx != dr->prod; idx = vio_dring_next(dr, idx)) {
1091                 struct vio_disk_desc *desc = vio_dring_entry(dr, idx);
1092                 struct vdc_req_entry *rqe = &port->rq_arr[idx];
1093                 struct request *req;
1094
1095                 ldc_unmap(port->vio.lp, desc->cookies, desc->ncookies);
1096                 desc->hdr.state = VIO_DESC_FREE;
1097                 dr->cons = vio_dring_next(dr, idx);
1098
1099                 req = rqe->req;
1100                 if (req == NULL) {
1101                         vdc_end_special(port, desc);
1102                         continue;
1103                 }
1104
1105                 rqe->req = NULL;
1106                 blk_mq_requeue_request(req, false);
1107         }
1108 }
1109
1110 static void vdc_queue_drain(struct vdc_port *port)
1111 {
1112         struct request_queue *q = port->disk->queue;
1113
1114         /*
1115          * Mark the queue as draining, then freeze/quiesce to ensure
1116          * that all existing requests are seen in ->queue_rq() and killed
1117          */
1118         port->drain = 1;
1119         spin_unlock_irq(&port->vio.lock);
1120
1121         blk_mq_freeze_queue(q);
1122         blk_mq_quiesce_queue(q);
1123
1124         spin_lock_irq(&port->vio.lock);
1125         port->drain = 0;
1126         blk_mq_unquiesce_queue(q);
1127         blk_mq_unfreeze_queue(q);
1128 }
1129
1130 static void vdc_ldc_reset_timer_work(struct work_struct *work)
1131 {
1132         struct vdc_port *port;
1133         struct vio_driver_state *vio;
1134
1135         port = container_of(work, struct vdc_port, ldc_reset_timer_work.work);
1136         vio = &port->vio;
1137
1138         spin_lock_irq(&vio->lock);
1139         if (!(port->vio.hs_state & VIO_HS_COMPLETE)) {
1140                 pr_warn(PFX "%s ldc down %llu seconds, draining queue\n",
1141                         port->disk_name, port->ldc_timeout);
1142                 vdc_queue_drain(port);
1143                 vdc_blk_queue_start(port);
1144         }
1145         spin_unlock_irq(&vio->lock);
1146 }
1147
1148 static void vdc_ldc_reset_work(struct work_struct *work)
1149 {
1150         struct vdc_port *port;
1151         struct vio_driver_state *vio;
1152         unsigned long flags;
1153
1154         port = container_of(work, struct vdc_port, ldc_reset_work);
1155         vio = &port->vio;
1156
1157         spin_lock_irqsave(&vio->lock, flags);
1158         vdc_ldc_reset(port);
1159         spin_unlock_irqrestore(&vio->lock, flags);
1160 }
1161
1162 static void vdc_ldc_reset(struct vdc_port *port)
1163 {
1164         int err;
1165
1166         assert_spin_locked(&port->vio.lock);
1167
1168         pr_warn(PFX "%s ldc link reset\n", port->disk_name);
1169         blk_mq_stop_hw_queues(port->disk->queue);
1170         vdc_requeue_inflight(port);
1171         vdc_port_down(port);
1172
1173         err = vio_ldc_alloc(&port->vio, &vdc_ldc_cfg, port);
1174         if (err) {
1175                 pr_err(PFX "%s vio_ldc_alloc:%d\n", port->disk_name, err);
1176                 return;
1177         }
1178
1179         err = vdc_alloc_tx_ring(port);
1180         if (err) {
1181                 pr_err(PFX "%s vio_alloc_tx_ring:%d\n", port->disk_name, err);
1182                 goto err_free_ldc;
1183         }
1184
1185         if (port->ldc_timeout)
1186                 mod_delayed_work(system_wq, &port->ldc_reset_timer_work,
1187                           round_jiffies(jiffies + HZ * port->ldc_timeout));
1188         mod_timer(&port->vio.timer, round_jiffies(jiffies + HZ));
1189         return;
1190
1191 err_free_ldc:
1192         vio_ldc_free(&port->vio);
1193 }
1194
1195 static const struct vio_device_id vdc_port_match[] = {
1196         {
1197                 .type = "vdc-port",
1198         },
1199         {},
1200 };
1201 MODULE_DEVICE_TABLE(vio, vdc_port_match);
1202
1203 static struct vio_driver vdc_port_driver = {
1204         .id_table       = vdc_port_match,
1205         .probe          = vdc_port_probe,
1206         .remove         = vdc_port_remove,
1207         .name           = "vdc_port",
1208 };
1209
1210 static int __init vdc_init(void)
1211 {
1212         int err;
1213
1214         sunvdc_wq = alloc_workqueue("sunvdc", 0, 0);
1215         if (!sunvdc_wq)
1216                 return -ENOMEM;
1217
1218         err = register_blkdev(0, VDCBLK_NAME);
1219         if (err < 0)
1220                 goto out_free_wq;
1221
1222         vdc_major = err;
1223
1224         err = vio_register_driver(&vdc_port_driver);
1225         if (err)
1226                 goto out_unregister_blkdev;
1227
1228         return 0;
1229
1230 out_unregister_blkdev:
1231         unregister_blkdev(vdc_major, VDCBLK_NAME);
1232         vdc_major = 0;
1233
1234 out_free_wq:
1235         destroy_workqueue(sunvdc_wq);
1236         return err;
1237 }
1238
1239 static void __exit vdc_exit(void)
1240 {
1241         vio_unregister_driver(&vdc_port_driver);
1242         unregister_blkdev(vdc_major, VDCBLK_NAME);
1243         destroy_workqueue(sunvdc_wq);
1244 }
1245
1246 module_init(vdc_init);
1247 module_exit(vdc_exit);