perf tools: Update powerpc's syscall.tbl copy from the kernel sources
[linux-2.6-microblaze.git] / drivers / scsi / vmw_pvscsi.c
1 /*
2  * Linux driver for VMware's para-virtualized SCSI HBA.
3  *
4  * Copyright (C) 2008-2014, VMware, Inc. All Rights Reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License as published by the
8  * Free Software Foundation; version 2 of the License and no later version.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
13  * NON INFRINGEMENT.  See the GNU General Public License for more
14  * details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19  *
20  * Maintained by: Jim Gill <jgill@vmware.com>
21  *
22  */
23
24 #include <linux/kernel.h>
25 #include <linux/module.h>
26 #include <linux/interrupt.h>
27 #include <linux/slab.h>
28 #include <linux/workqueue.h>
29 #include <linux/pci.h>
30
31 #include <scsi/scsi.h>
32 #include <scsi/scsi_host.h>
33 #include <scsi/scsi_cmnd.h>
34 #include <scsi/scsi_device.h>
35 #include <scsi/scsi_tcq.h>
36
37 #include "vmw_pvscsi.h"
38
39 #define PVSCSI_LINUX_DRIVER_DESC "VMware PVSCSI driver"
40
41 MODULE_DESCRIPTION(PVSCSI_LINUX_DRIVER_DESC);
42 MODULE_AUTHOR("VMware, Inc.");
43 MODULE_LICENSE("GPL");
44 MODULE_VERSION(PVSCSI_DRIVER_VERSION_STRING);
45
46 #define PVSCSI_DEFAULT_NUM_PAGES_PER_RING       8
47 #define PVSCSI_DEFAULT_NUM_PAGES_MSG_RING       1
48 #define PVSCSI_DEFAULT_QUEUE_DEPTH              254
49 #define SGL_SIZE                                PAGE_SIZE
50
51 struct pvscsi_sg_list {
52         struct PVSCSISGElement sge[PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT];
53 };
54
55 struct pvscsi_ctx {
56         /*
57          * The index of the context in cmd_map serves as the context ID for a
58          * 1-to-1 mapping completions back to requests.
59          */
60         struct scsi_cmnd        *cmd;
61         struct pvscsi_sg_list   *sgl;
62         struct list_head        list;
63         dma_addr_t              dataPA;
64         dma_addr_t              sensePA;
65         dma_addr_t              sglPA;
66         struct completion       *abort_cmp;
67 };
68
69 struct pvscsi_adapter {
70         char                            *mmioBase;
71         u8                              rev;
72         bool                            use_msg;
73         bool                            use_req_threshold;
74
75         spinlock_t                      hw_lock;
76
77         struct workqueue_struct         *workqueue;
78         struct work_struct              work;
79
80         struct PVSCSIRingReqDesc        *req_ring;
81         unsigned                        req_pages;
82         unsigned                        req_depth;
83         dma_addr_t                      reqRingPA;
84
85         struct PVSCSIRingCmpDesc        *cmp_ring;
86         unsigned                        cmp_pages;
87         dma_addr_t                      cmpRingPA;
88
89         struct PVSCSIRingMsgDesc        *msg_ring;
90         unsigned                        msg_pages;
91         dma_addr_t                      msgRingPA;
92
93         struct PVSCSIRingsState         *rings_state;
94         dma_addr_t                      ringStatePA;
95
96         struct pci_dev                  *dev;
97         struct Scsi_Host                *host;
98
99         struct list_head                cmd_pool;
100         struct pvscsi_ctx               *cmd_map;
101 };
102
103
104 /* Command line parameters */
105 static int pvscsi_ring_pages;
106 static int pvscsi_msg_ring_pages = PVSCSI_DEFAULT_NUM_PAGES_MSG_RING;
107 static int pvscsi_cmd_per_lun    = PVSCSI_DEFAULT_QUEUE_DEPTH;
108 static bool pvscsi_disable_msi;
109 static bool pvscsi_disable_msix;
110 static bool pvscsi_use_msg       = true;
111 static bool pvscsi_use_req_threshold = true;
112
113 #define PVSCSI_RW (S_IRUSR | S_IWUSR)
114
115 module_param_named(ring_pages, pvscsi_ring_pages, int, PVSCSI_RW);
116 MODULE_PARM_DESC(ring_pages, "Number of pages per req/cmp ring - (default="
117                  __stringify(PVSCSI_DEFAULT_NUM_PAGES_PER_RING)
118                  "[up to 16 targets],"
119                  __stringify(PVSCSI_SETUP_RINGS_MAX_NUM_PAGES)
120                  "[for 16+ targets])");
121
122 module_param_named(msg_ring_pages, pvscsi_msg_ring_pages, int, PVSCSI_RW);
123 MODULE_PARM_DESC(msg_ring_pages, "Number of pages for the msg ring - (default="
124                  __stringify(PVSCSI_DEFAULT_NUM_PAGES_MSG_RING) ")");
125
126 module_param_named(cmd_per_lun, pvscsi_cmd_per_lun, int, PVSCSI_RW);
127 MODULE_PARM_DESC(cmd_per_lun, "Maximum commands per lun - (default="
128                  __stringify(PVSCSI_DEFAULT_QUEUE_DEPTH) ")");
129
130 module_param_named(disable_msi, pvscsi_disable_msi, bool, PVSCSI_RW);
131 MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)");
132
133 module_param_named(disable_msix, pvscsi_disable_msix, bool, PVSCSI_RW);
134 MODULE_PARM_DESC(disable_msix, "Disable MSI-X use in driver - (default=0)");
135
136 module_param_named(use_msg, pvscsi_use_msg, bool, PVSCSI_RW);
137 MODULE_PARM_DESC(use_msg, "Use msg ring when available - (default=1)");
138
139 module_param_named(use_req_threshold, pvscsi_use_req_threshold,
140                    bool, PVSCSI_RW);
141 MODULE_PARM_DESC(use_req_threshold, "Use driver-based request coalescing if configured - (default=1)");
142
143 static const struct pci_device_id pvscsi_pci_tbl[] = {
144         { PCI_VDEVICE(VMWARE, PCI_DEVICE_ID_VMWARE_PVSCSI) },
145         { 0 }
146 };
147
148 MODULE_DEVICE_TABLE(pci, pvscsi_pci_tbl);
149
150 static struct device *
151 pvscsi_dev(const struct pvscsi_adapter *adapter)
152 {
153         return &(adapter->dev->dev);
154 }
155
156 static struct pvscsi_ctx *
157 pvscsi_find_context(const struct pvscsi_adapter *adapter, struct scsi_cmnd *cmd)
158 {
159         struct pvscsi_ctx *ctx, *end;
160
161         end = &adapter->cmd_map[adapter->req_depth];
162         for (ctx = adapter->cmd_map; ctx < end; ctx++)
163                 if (ctx->cmd == cmd)
164                         return ctx;
165
166         return NULL;
167 }
168
169 static struct pvscsi_ctx *
170 pvscsi_acquire_context(struct pvscsi_adapter *adapter, struct scsi_cmnd *cmd)
171 {
172         struct pvscsi_ctx *ctx;
173
174         if (list_empty(&adapter->cmd_pool))
175                 return NULL;
176
177         ctx = list_first_entry(&adapter->cmd_pool, struct pvscsi_ctx, list);
178         ctx->cmd = cmd;
179         list_del(&ctx->list);
180
181         return ctx;
182 }
183
184 static void pvscsi_release_context(struct pvscsi_adapter *adapter,
185                                    struct pvscsi_ctx *ctx)
186 {
187         ctx->cmd = NULL;
188         ctx->abort_cmp = NULL;
189         list_add(&ctx->list, &adapter->cmd_pool);
190 }
191
192 /*
193  * Map a pvscsi_ctx struct to a context ID field value; we map to a simple
194  * non-zero integer. ctx always points to an entry in cmd_map array, hence
195  * the return value is always >=1.
196  */
197 static u64 pvscsi_map_context(const struct pvscsi_adapter *adapter,
198                               const struct pvscsi_ctx *ctx)
199 {
200         return ctx - adapter->cmd_map + 1;
201 }
202
203 static struct pvscsi_ctx *
204 pvscsi_get_context(const struct pvscsi_adapter *adapter, u64 context)
205 {
206         return &adapter->cmd_map[context - 1];
207 }
208
209 static void pvscsi_reg_write(const struct pvscsi_adapter *adapter,
210                              u32 offset, u32 val)
211 {
212         writel(val, adapter->mmioBase + offset);
213 }
214
215 static u32 pvscsi_reg_read(const struct pvscsi_adapter *adapter, u32 offset)
216 {
217         return readl(adapter->mmioBase + offset);
218 }
219
220 static u32 pvscsi_read_intr_status(const struct pvscsi_adapter *adapter)
221 {
222         return pvscsi_reg_read(adapter, PVSCSI_REG_OFFSET_INTR_STATUS);
223 }
224
225 static void pvscsi_write_intr_status(const struct pvscsi_adapter *adapter,
226                                      u32 val)
227 {
228         pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_INTR_STATUS, val);
229 }
230
231 static void pvscsi_unmask_intr(const struct pvscsi_adapter *adapter)
232 {
233         u32 intr_bits;
234
235         intr_bits = PVSCSI_INTR_CMPL_MASK;
236         if (adapter->use_msg)
237                 intr_bits |= PVSCSI_INTR_MSG_MASK;
238
239         pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_INTR_MASK, intr_bits);
240 }
241
242 static void pvscsi_mask_intr(const struct pvscsi_adapter *adapter)
243 {
244         pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_INTR_MASK, 0);
245 }
246
247 static void pvscsi_write_cmd_desc(const struct pvscsi_adapter *adapter,
248                                   u32 cmd, const void *desc, size_t len)
249 {
250         const u32 *ptr = desc;
251         size_t i;
252
253         len /= sizeof(*ptr);
254         pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_COMMAND, cmd);
255         for (i = 0; i < len; i++)
256                 pvscsi_reg_write(adapter,
257                                  PVSCSI_REG_OFFSET_COMMAND_DATA, ptr[i]);
258 }
259
260 static void pvscsi_abort_cmd(const struct pvscsi_adapter *adapter,
261                              const struct pvscsi_ctx *ctx)
262 {
263         struct PVSCSICmdDescAbortCmd cmd = { 0 };
264
265         cmd.target = ctx->cmd->device->id;
266         cmd.context = pvscsi_map_context(adapter, ctx);
267
268         pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_ABORT_CMD, &cmd, sizeof(cmd));
269 }
270
271 static void pvscsi_kick_rw_io(const struct pvscsi_adapter *adapter)
272 {
273         pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_KICK_RW_IO, 0);
274 }
275
276 static void pvscsi_process_request_ring(const struct pvscsi_adapter *adapter)
277 {
278         pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_KICK_NON_RW_IO, 0);
279 }
280
281 static int scsi_is_rw(unsigned char op)
282 {
283         return op == READ_6  || op == WRITE_6 ||
284                op == READ_10 || op == WRITE_10 ||
285                op == READ_12 || op == WRITE_12 ||
286                op == READ_16 || op == WRITE_16;
287 }
288
289 static void pvscsi_kick_io(const struct pvscsi_adapter *adapter,
290                            unsigned char op)
291 {
292         if (scsi_is_rw(op)) {
293                 struct PVSCSIRingsState *s = adapter->rings_state;
294
295                 if (!adapter->use_req_threshold ||
296                     s->reqProdIdx - s->reqConsIdx >= s->reqCallThreshold)
297                         pvscsi_kick_rw_io(adapter);
298         } else {
299                 pvscsi_process_request_ring(adapter);
300         }
301 }
302
303 static void ll_adapter_reset(const struct pvscsi_adapter *adapter)
304 {
305         dev_dbg(pvscsi_dev(adapter), "Adapter Reset on %p\n", adapter);
306
307         pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_ADAPTER_RESET, NULL, 0);
308 }
309
310 static void ll_bus_reset(const struct pvscsi_adapter *adapter)
311 {
312         dev_dbg(pvscsi_dev(adapter), "Resetting bus on %p\n", adapter);
313
314         pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_RESET_BUS, NULL, 0);
315 }
316
317 static void ll_device_reset(const struct pvscsi_adapter *adapter, u32 target)
318 {
319         struct PVSCSICmdDescResetDevice cmd = { 0 };
320
321         dev_dbg(pvscsi_dev(adapter), "Resetting device: target=%u\n", target);
322
323         cmd.target = target;
324
325         pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_RESET_DEVICE,
326                               &cmd, sizeof(cmd));
327 }
328
329 static void pvscsi_create_sg(struct pvscsi_ctx *ctx,
330                              struct scatterlist *sg, unsigned count)
331 {
332         unsigned i;
333         struct PVSCSISGElement *sge;
334
335         BUG_ON(count > PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT);
336
337         sge = &ctx->sgl->sge[0];
338         for (i = 0; i < count; i++, sg = sg_next(sg)) {
339                 sge[i].addr   = sg_dma_address(sg);
340                 sge[i].length = sg_dma_len(sg);
341                 sge[i].flags  = 0;
342         }
343 }
344
345 /*
346  * Map all data buffers for a command into PCI space and
347  * setup the scatter/gather list if needed.
348  */
349 static int pvscsi_map_buffers(struct pvscsi_adapter *adapter,
350                               struct pvscsi_ctx *ctx, struct scsi_cmnd *cmd,
351                               struct PVSCSIRingReqDesc *e)
352 {
353         unsigned count;
354         unsigned bufflen = scsi_bufflen(cmd);
355         struct scatterlist *sg;
356
357         e->dataLen = bufflen;
358         e->dataAddr = 0;
359         if (bufflen == 0)
360                 return 0;
361
362         sg = scsi_sglist(cmd);
363         count = scsi_sg_count(cmd);
364         if (count != 0) {
365                 int segs = scsi_dma_map(cmd);
366
367                 if (segs == -ENOMEM) {
368                         scmd_printk(KERN_DEBUG, cmd,
369                                     "vmw_pvscsi: Failed to map cmd sglist for DMA.\n");
370                         return -ENOMEM;
371                 } else if (segs > 1) {
372                         pvscsi_create_sg(ctx, sg, segs);
373
374                         e->flags |= PVSCSI_FLAG_CMD_WITH_SG_LIST;
375                         ctx->sglPA = dma_map_single(&adapter->dev->dev,
376                                         ctx->sgl, SGL_SIZE, DMA_TO_DEVICE);
377                         if (dma_mapping_error(&adapter->dev->dev, ctx->sglPA)) {
378                                 scmd_printk(KERN_ERR, cmd,
379                                             "vmw_pvscsi: Failed to map ctx sglist for DMA.\n");
380                                 scsi_dma_unmap(cmd);
381                                 ctx->sglPA = 0;
382                                 return -ENOMEM;
383                         }
384                         e->dataAddr = ctx->sglPA;
385                 } else
386                         e->dataAddr = sg_dma_address(sg);
387         } else {
388                 /*
389                  * In case there is no S/G list, scsi_sglist points
390                  * directly to the buffer.
391                  */
392                 ctx->dataPA = dma_map_single(&adapter->dev->dev, sg, bufflen,
393                                              cmd->sc_data_direction);
394                 if (dma_mapping_error(&adapter->dev->dev, ctx->dataPA)) {
395                         scmd_printk(KERN_DEBUG, cmd,
396                                     "vmw_pvscsi: Failed to map direct data buffer for DMA.\n");
397                         return -ENOMEM;
398                 }
399                 e->dataAddr = ctx->dataPA;
400         }
401
402         return 0;
403 }
404
405 /*
406  * The device incorrectly doesn't clear the first byte of the sense
407  * buffer in some cases. We have to do it ourselves.
408  * Otherwise we run into trouble when SWIOTLB is forced.
409  */
410 static void pvscsi_patch_sense(struct scsi_cmnd *cmd)
411 {
412         if (cmd->sense_buffer)
413                 cmd->sense_buffer[0] = 0;
414 }
415
416 static void pvscsi_unmap_buffers(const struct pvscsi_adapter *adapter,
417                                  struct pvscsi_ctx *ctx)
418 {
419         struct scsi_cmnd *cmd;
420         unsigned bufflen;
421
422         cmd = ctx->cmd;
423         bufflen = scsi_bufflen(cmd);
424
425         if (bufflen != 0) {
426                 unsigned count = scsi_sg_count(cmd);
427
428                 if (count != 0) {
429                         scsi_dma_unmap(cmd);
430                         if (ctx->sglPA) {
431                                 dma_unmap_single(&adapter->dev->dev, ctx->sglPA,
432                                                  SGL_SIZE, DMA_TO_DEVICE);
433                                 ctx->sglPA = 0;
434                         }
435                 } else
436                         dma_unmap_single(&adapter->dev->dev, ctx->dataPA,
437                                          bufflen, cmd->sc_data_direction);
438         }
439         if (cmd->sense_buffer)
440                 dma_unmap_single(&adapter->dev->dev, ctx->sensePA,
441                                  SCSI_SENSE_BUFFERSIZE, DMA_FROM_DEVICE);
442 }
443
444 static int pvscsi_allocate_rings(struct pvscsi_adapter *adapter)
445 {
446         adapter->rings_state = dma_alloc_coherent(&adapter->dev->dev, PAGE_SIZE,
447                         &adapter->ringStatePA, GFP_KERNEL);
448         if (!adapter->rings_state)
449                 return -ENOMEM;
450
451         adapter->req_pages = min(PVSCSI_MAX_NUM_PAGES_REQ_RING,
452                                  pvscsi_ring_pages);
453         adapter->req_depth = adapter->req_pages
454                                         * PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
455         adapter->req_ring = dma_alloc_coherent(&adapter->dev->dev,
456                         adapter->req_pages * PAGE_SIZE, &adapter->reqRingPA,
457                         GFP_KERNEL);
458         if (!adapter->req_ring)
459                 return -ENOMEM;
460
461         adapter->cmp_pages = min(PVSCSI_MAX_NUM_PAGES_CMP_RING,
462                                  pvscsi_ring_pages);
463         adapter->cmp_ring = dma_alloc_coherent(&adapter->dev->dev,
464                         adapter->cmp_pages * PAGE_SIZE, &adapter->cmpRingPA,
465                         GFP_KERNEL);
466         if (!adapter->cmp_ring)
467                 return -ENOMEM;
468
469         BUG_ON(!IS_ALIGNED(adapter->ringStatePA, PAGE_SIZE));
470         BUG_ON(!IS_ALIGNED(adapter->reqRingPA, PAGE_SIZE));
471         BUG_ON(!IS_ALIGNED(adapter->cmpRingPA, PAGE_SIZE));
472
473         if (!adapter->use_msg)
474                 return 0;
475
476         adapter->msg_pages = min(PVSCSI_MAX_NUM_PAGES_MSG_RING,
477                                  pvscsi_msg_ring_pages);
478         adapter->msg_ring = dma_alloc_coherent(&adapter->dev->dev,
479                         adapter->msg_pages * PAGE_SIZE, &adapter->msgRingPA,
480                         GFP_KERNEL);
481         if (!adapter->msg_ring)
482                 return -ENOMEM;
483         BUG_ON(!IS_ALIGNED(adapter->msgRingPA, PAGE_SIZE));
484
485         return 0;
486 }
487
488 static void pvscsi_setup_all_rings(const struct pvscsi_adapter *adapter)
489 {
490         struct PVSCSICmdDescSetupRings cmd = { 0 };
491         dma_addr_t base;
492         unsigned i;
493
494         cmd.ringsStatePPN   = adapter->ringStatePA >> PAGE_SHIFT;
495         cmd.reqRingNumPages = adapter->req_pages;
496         cmd.cmpRingNumPages = adapter->cmp_pages;
497
498         base = adapter->reqRingPA;
499         for (i = 0; i < adapter->req_pages; i++) {
500                 cmd.reqRingPPNs[i] = base >> PAGE_SHIFT;
501                 base += PAGE_SIZE;
502         }
503
504         base = adapter->cmpRingPA;
505         for (i = 0; i < adapter->cmp_pages; i++) {
506                 cmd.cmpRingPPNs[i] = base >> PAGE_SHIFT;
507                 base += PAGE_SIZE;
508         }
509
510         memset(adapter->rings_state, 0, PAGE_SIZE);
511         memset(adapter->req_ring, 0, adapter->req_pages * PAGE_SIZE);
512         memset(adapter->cmp_ring, 0, adapter->cmp_pages * PAGE_SIZE);
513
514         pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_SETUP_RINGS,
515                               &cmd, sizeof(cmd));
516
517         if (adapter->use_msg) {
518                 struct PVSCSICmdDescSetupMsgRing cmd_msg = { 0 };
519
520                 cmd_msg.numPages = adapter->msg_pages;
521
522                 base = adapter->msgRingPA;
523                 for (i = 0; i < adapter->msg_pages; i++) {
524                         cmd_msg.ringPPNs[i] = base >> PAGE_SHIFT;
525                         base += PAGE_SIZE;
526                 }
527                 memset(adapter->msg_ring, 0, adapter->msg_pages * PAGE_SIZE);
528
529                 pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_SETUP_MSG_RING,
530                                       &cmd_msg, sizeof(cmd_msg));
531         }
532 }
533
534 static int pvscsi_change_queue_depth(struct scsi_device *sdev, int qdepth)
535 {
536         if (!sdev->tagged_supported)
537                 qdepth = 1;
538         return scsi_change_queue_depth(sdev, qdepth);
539 }
540
541 /*
542  * Pull a completion descriptor off and pass the completion back
543  * to the SCSI mid layer.
544  */
545 static void pvscsi_complete_request(struct pvscsi_adapter *adapter,
546                                     const struct PVSCSIRingCmpDesc *e)
547 {
548         struct pvscsi_ctx *ctx;
549         struct scsi_cmnd *cmd;
550         struct completion *abort_cmp;
551         u32 btstat = e->hostStatus;
552         u32 sdstat = e->scsiStatus;
553
554         ctx = pvscsi_get_context(adapter, e->context);
555         cmd = ctx->cmd;
556         abort_cmp = ctx->abort_cmp;
557         pvscsi_unmap_buffers(adapter, ctx);
558         if (sdstat != SAM_STAT_CHECK_CONDITION)
559                 pvscsi_patch_sense(cmd);
560         pvscsi_release_context(adapter, ctx);
561         if (abort_cmp) {
562                 /*
563                  * The command was requested to be aborted. Just signal that
564                  * the request completed and swallow the actual cmd completion
565                  * here. The abort handler will post a completion for this
566                  * command indicating that it got successfully aborted.
567                  */
568                 complete(abort_cmp);
569                 return;
570         }
571
572         cmd->result = 0;
573         if (sdstat != SAM_STAT_GOOD &&
574             (btstat == BTSTAT_SUCCESS ||
575              btstat == BTSTAT_LINKED_COMMAND_COMPLETED ||
576              btstat == BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG)) {
577                 if (sdstat == SAM_STAT_COMMAND_TERMINATED) {
578                         cmd->result = (DID_RESET << 16);
579                 } else {
580                         cmd->result = (DID_OK << 16) | sdstat;
581                         if (sdstat == SAM_STAT_CHECK_CONDITION &&
582                             cmd->sense_buffer)
583                                 cmd->result |= (DRIVER_SENSE << 24);
584                 }
585         } else
586                 switch (btstat) {
587                 case BTSTAT_SUCCESS:
588                 case BTSTAT_LINKED_COMMAND_COMPLETED:
589                 case BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG:
590                         /* If everything went fine, let's move on..  */
591                         cmd->result = (DID_OK << 16);
592                         break;
593
594                 case BTSTAT_DATARUN:
595                 case BTSTAT_DATA_UNDERRUN:
596                         /* Report residual data in underruns */
597                         scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen);
598                         cmd->result = (DID_ERROR << 16);
599                         break;
600
601                 case BTSTAT_SELTIMEO:
602                         /* Our emulation returns this for non-connected devs */
603                         cmd->result = (DID_BAD_TARGET << 16);
604                         break;
605
606                 case BTSTAT_LUNMISMATCH:
607                 case BTSTAT_TAGREJECT:
608                 case BTSTAT_BADMSG:
609                         cmd->result = (DRIVER_INVALID << 24);
610                         fallthrough;
611
612                 case BTSTAT_HAHARDWARE:
613                 case BTSTAT_INVPHASE:
614                 case BTSTAT_HATIMEOUT:
615                 case BTSTAT_NORESPONSE:
616                 case BTSTAT_DISCONNECT:
617                 case BTSTAT_HASOFTWARE:
618                 case BTSTAT_BUSFREE:
619                 case BTSTAT_SENSFAILED:
620                         cmd->result |= (DID_ERROR << 16);
621                         break;
622
623                 case BTSTAT_SENTRST:
624                 case BTSTAT_RECVRST:
625                 case BTSTAT_BUSRESET:
626                         cmd->result = (DID_RESET << 16);
627                         break;
628
629                 case BTSTAT_ABORTQUEUE:
630                         cmd->result = (DID_BUS_BUSY << 16);
631                         break;
632
633                 case BTSTAT_SCSIPARITY:
634                         cmd->result = (DID_PARITY << 16);
635                         break;
636
637                 default:
638                         cmd->result = (DID_ERROR << 16);
639                         scmd_printk(KERN_DEBUG, cmd,
640                                     "Unknown completion status: 0x%x\n",
641                                     btstat);
642         }
643
644         dev_dbg(&cmd->device->sdev_gendev,
645                 "cmd=%p %x ctx=%p result=0x%x status=0x%x,%x\n",
646                 cmd, cmd->cmnd[0], ctx, cmd->result, btstat, sdstat);
647
648         cmd->scsi_done(cmd);
649 }
650
651 /*
652  * barrier usage : Since the PVSCSI device is emulated, there could be cases
653  * where we may want to serialize some accesses between the driver and the
654  * emulation layer. We use compiler barriers instead of the more expensive
655  * memory barriers because PVSCSI is only supported on X86 which has strong
656  * memory access ordering.
657  */
658 static void pvscsi_process_completion_ring(struct pvscsi_adapter *adapter)
659 {
660         struct PVSCSIRingsState *s = adapter->rings_state;
661         struct PVSCSIRingCmpDesc *ring = adapter->cmp_ring;
662         u32 cmp_entries = s->cmpNumEntriesLog2;
663
664         while (s->cmpConsIdx != s->cmpProdIdx) {
665                 struct PVSCSIRingCmpDesc *e = ring + (s->cmpConsIdx &
666                                                       MASK(cmp_entries));
667                 /*
668                  * This barrier() ensures that *e is not dereferenced while
669                  * the device emulation still writes data into the slot.
670                  * Since the device emulation advances s->cmpProdIdx only after
671                  * updating the slot we want to check it first.
672                  */
673                 barrier();
674                 pvscsi_complete_request(adapter, e);
675                 /*
676                  * This barrier() ensures that compiler doesn't reorder write
677                  * to s->cmpConsIdx before the read of (*e) inside
678                  * pvscsi_complete_request. Otherwise, device emulation may
679                  * overwrite *e before we had a chance to read it.
680                  */
681                 barrier();
682                 s->cmpConsIdx++;
683         }
684 }
685
686 /*
687  * Translate a Linux SCSI request into a request ring entry.
688  */
689 static int pvscsi_queue_ring(struct pvscsi_adapter *adapter,
690                              struct pvscsi_ctx *ctx, struct scsi_cmnd *cmd)
691 {
692         struct PVSCSIRingsState *s;
693         struct PVSCSIRingReqDesc *e;
694         struct scsi_device *sdev;
695         u32 req_entries;
696
697         s = adapter->rings_state;
698         sdev = cmd->device;
699         req_entries = s->reqNumEntriesLog2;
700
701         /*
702          * If this condition holds, we might have room on the request ring, but
703          * we might not have room on the completion ring for the response.
704          * However, we have already ruled out this possibility - we would not
705          * have successfully allocated a context if it were true, since we only
706          * have one context per request entry.  Check for it anyway, since it
707          * would be a serious bug.
708          */
709         if (s->reqProdIdx - s->cmpConsIdx >= 1 << req_entries) {
710                 scmd_printk(KERN_ERR, cmd, "vmw_pvscsi: "
711                             "ring full: reqProdIdx=%d cmpConsIdx=%d\n",
712                             s->reqProdIdx, s->cmpConsIdx);
713                 return -1;
714         }
715
716         e = adapter->req_ring + (s->reqProdIdx & MASK(req_entries));
717
718         e->bus    = sdev->channel;
719         e->target = sdev->id;
720         memset(e->lun, 0, sizeof(e->lun));
721         e->lun[1] = sdev->lun;
722
723         if (cmd->sense_buffer) {
724                 ctx->sensePA = dma_map_single(&adapter->dev->dev,
725                                 cmd->sense_buffer, SCSI_SENSE_BUFFERSIZE,
726                                 DMA_FROM_DEVICE);
727                 if (dma_mapping_error(&adapter->dev->dev, ctx->sensePA)) {
728                         scmd_printk(KERN_DEBUG, cmd,
729                                     "vmw_pvscsi: Failed to map sense buffer for DMA.\n");
730                         ctx->sensePA = 0;
731                         return -ENOMEM;
732                 }
733                 e->senseAddr = ctx->sensePA;
734                 e->senseLen = SCSI_SENSE_BUFFERSIZE;
735         } else {
736                 e->senseLen  = 0;
737                 e->senseAddr = 0;
738         }
739         e->cdbLen   = cmd->cmd_len;
740         e->vcpuHint = smp_processor_id();
741         memcpy(e->cdb, cmd->cmnd, e->cdbLen);
742
743         e->tag = SIMPLE_QUEUE_TAG;
744
745         if (cmd->sc_data_direction == DMA_FROM_DEVICE)
746                 e->flags = PVSCSI_FLAG_CMD_DIR_TOHOST;
747         else if (cmd->sc_data_direction == DMA_TO_DEVICE)
748                 e->flags = PVSCSI_FLAG_CMD_DIR_TODEVICE;
749         else if (cmd->sc_data_direction == DMA_NONE)
750                 e->flags = PVSCSI_FLAG_CMD_DIR_NONE;
751         else
752                 e->flags = 0;
753
754         if (pvscsi_map_buffers(adapter, ctx, cmd, e) != 0) {
755                 if (cmd->sense_buffer) {
756                         dma_unmap_single(&adapter->dev->dev, ctx->sensePA,
757                                          SCSI_SENSE_BUFFERSIZE,
758                                          DMA_FROM_DEVICE);
759                         ctx->sensePA = 0;
760                 }
761                 return -ENOMEM;
762         }
763
764         e->context = pvscsi_map_context(adapter, ctx);
765
766         barrier();
767
768         s->reqProdIdx++;
769
770         return 0;
771 }
772
773 static int pvscsi_queue_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
774 {
775         struct Scsi_Host *host = cmd->device->host;
776         struct pvscsi_adapter *adapter = shost_priv(host);
777         struct pvscsi_ctx *ctx;
778         unsigned long flags;
779         unsigned char op;
780
781         spin_lock_irqsave(&adapter->hw_lock, flags);
782
783         ctx = pvscsi_acquire_context(adapter, cmd);
784         if (!ctx || pvscsi_queue_ring(adapter, ctx, cmd) != 0) {
785                 if (ctx)
786                         pvscsi_release_context(adapter, ctx);
787                 spin_unlock_irqrestore(&adapter->hw_lock, flags);
788                 return SCSI_MLQUEUE_HOST_BUSY;
789         }
790
791         cmd->scsi_done = done;
792         op = cmd->cmnd[0];
793
794         dev_dbg(&cmd->device->sdev_gendev,
795                 "queued cmd %p, ctx %p, op=%x\n", cmd, ctx, op);
796
797         spin_unlock_irqrestore(&adapter->hw_lock, flags);
798
799         pvscsi_kick_io(adapter, op);
800
801         return 0;
802 }
803
804 static DEF_SCSI_QCMD(pvscsi_queue)
805
806 static int pvscsi_abort(struct scsi_cmnd *cmd)
807 {
808         struct pvscsi_adapter *adapter = shost_priv(cmd->device->host);
809         struct pvscsi_ctx *ctx;
810         unsigned long flags;
811         int result = SUCCESS;
812         DECLARE_COMPLETION_ONSTACK(abort_cmp);
813         int done;
814
815         scmd_printk(KERN_DEBUG, cmd, "task abort on host %u, %p\n",
816                     adapter->host->host_no, cmd);
817
818         spin_lock_irqsave(&adapter->hw_lock, flags);
819
820         /*
821          * Poll the completion ring first - we might be trying to abort
822          * a command that is waiting to be dispatched in the completion ring.
823          */
824         pvscsi_process_completion_ring(adapter);
825
826         /*
827          * If there is no context for the command, it either already succeeded
828          * or else was never properly issued.  Not our problem.
829          */
830         ctx = pvscsi_find_context(adapter, cmd);
831         if (!ctx) {
832                 scmd_printk(KERN_DEBUG, cmd, "Failed to abort cmd %p\n", cmd);
833                 goto out;
834         }
835
836         /*
837          * Mark that the command has been requested to be aborted and issue
838          * the abort.
839          */
840         ctx->abort_cmp = &abort_cmp;
841
842         pvscsi_abort_cmd(adapter, ctx);
843         spin_unlock_irqrestore(&adapter->hw_lock, flags);
844         /* Wait for 2 secs for the completion. */
845         done = wait_for_completion_timeout(&abort_cmp, msecs_to_jiffies(2000));
846         spin_lock_irqsave(&adapter->hw_lock, flags);
847
848         if (!done) {
849                 /*
850                  * Failed to abort the command, unmark the fact that it
851                  * was requested to be aborted.
852                  */
853                 ctx->abort_cmp = NULL;
854                 result = FAILED;
855                 scmd_printk(KERN_DEBUG, cmd,
856                             "Failed to get completion for aborted cmd %p\n",
857                             cmd);
858                 goto out;
859         }
860
861         /*
862          * Successfully aborted the command.
863          */
864         cmd->result = (DID_ABORT << 16);
865         cmd->scsi_done(cmd);
866
867 out:
868         spin_unlock_irqrestore(&adapter->hw_lock, flags);
869         return result;
870 }
871
872 /*
873  * Abort all outstanding requests.  This is only safe to use if the completion
874  * ring will never be walked again or the device has been reset, because it
875  * destroys the 1-1 mapping between context field passed to emulation and our
876  * request structure.
877  */
878 static void pvscsi_reset_all(struct pvscsi_adapter *adapter)
879 {
880         unsigned i;
881
882         for (i = 0; i < adapter->req_depth; i++) {
883                 struct pvscsi_ctx *ctx = &adapter->cmd_map[i];
884                 struct scsi_cmnd *cmd = ctx->cmd;
885                 if (cmd) {
886                         scmd_printk(KERN_ERR, cmd,
887                                     "Forced reset on cmd %p\n", cmd);
888                         pvscsi_unmap_buffers(adapter, ctx);
889                         pvscsi_patch_sense(cmd);
890                         pvscsi_release_context(adapter, ctx);
891                         cmd->result = (DID_RESET << 16);
892                         cmd->scsi_done(cmd);
893                 }
894         }
895 }
896
897 static int pvscsi_host_reset(struct scsi_cmnd *cmd)
898 {
899         struct Scsi_Host *host = cmd->device->host;
900         struct pvscsi_adapter *adapter = shost_priv(host);
901         unsigned long flags;
902         bool use_msg;
903
904         scmd_printk(KERN_INFO, cmd, "SCSI Host reset\n");
905
906         spin_lock_irqsave(&adapter->hw_lock, flags);
907
908         use_msg = adapter->use_msg;
909
910         if (use_msg) {
911                 adapter->use_msg = false;
912                 spin_unlock_irqrestore(&adapter->hw_lock, flags);
913
914                 /*
915                  * Now that we know that the ISR won't add more work on the
916                  * workqueue we can safely flush any outstanding work.
917                  */
918                 flush_workqueue(adapter->workqueue);
919                 spin_lock_irqsave(&adapter->hw_lock, flags);
920         }
921
922         /*
923          * We're going to tear down the entire ring structure and set it back
924          * up, so stalling new requests until all completions are flushed and
925          * the rings are back in place.
926          */
927
928         pvscsi_process_request_ring(adapter);
929
930         ll_adapter_reset(adapter);
931
932         /*
933          * Now process any completions.  Note we do this AFTER adapter reset,
934          * which is strange, but stops races where completions get posted
935          * between processing the ring and issuing the reset.  The backend will
936          * not touch the ring memory after reset, so the immediately pre-reset
937          * completion ring state is still valid.
938          */
939         pvscsi_process_completion_ring(adapter);
940
941         pvscsi_reset_all(adapter);
942         adapter->use_msg = use_msg;
943         pvscsi_setup_all_rings(adapter);
944         pvscsi_unmask_intr(adapter);
945
946         spin_unlock_irqrestore(&adapter->hw_lock, flags);
947
948         return SUCCESS;
949 }
950
951 static int pvscsi_bus_reset(struct scsi_cmnd *cmd)
952 {
953         struct Scsi_Host *host = cmd->device->host;
954         struct pvscsi_adapter *adapter = shost_priv(host);
955         unsigned long flags;
956
957         scmd_printk(KERN_INFO, cmd, "SCSI Bus reset\n");
958
959         /*
960          * We don't want to queue new requests for this bus after
961          * flushing all pending requests to emulation, since new
962          * requests could then sneak in during this bus reset phase,
963          * so take the lock now.
964          */
965         spin_lock_irqsave(&adapter->hw_lock, flags);
966
967         pvscsi_process_request_ring(adapter);
968         ll_bus_reset(adapter);
969         pvscsi_process_completion_ring(adapter);
970
971         spin_unlock_irqrestore(&adapter->hw_lock, flags);
972
973         return SUCCESS;
974 }
975
976 static int pvscsi_device_reset(struct scsi_cmnd *cmd)
977 {
978         struct Scsi_Host *host = cmd->device->host;
979         struct pvscsi_adapter *adapter = shost_priv(host);
980         unsigned long flags;
981
982         scmd_printk(KERN_INFO, cmd, "SCSI device reset on scsi%u:%u\n",
983                     host->host_no, cmd->device->id);
984
985         /*
986          * We don't want to queue new requests for this device after flushing
987          * all pending requests to emulation, since new requests could then
988          * sneak in during this device reset phase, so take the lock now.
989          */
990         spin_lock_irqsave(&adapter->hw_lock, flags);
991
992         pvscsi_process_request_ring(adapter);
993         ll_device_reset(adapter, cmd->device->id);
994         pvscsi_process_completion_ring(adapter);
995
996         spin_unlock_irqrestore(&adapter->hw_lock, flags);
997
998         return SUCCESS;
999 }
1000
1001 static struct scsi_host_template pvscsi_template;
1002
1003 static const char *pvscsi_info(struct Scsi_Host *host)
1004 {
1005         struct pvscsi_adapter *adapter = shost_priv(host);
1006         static char buf[256];
1007
1008         sprintf(buf, "VMware PVSCSI storage adapter rev %d, req/cmp/msg rings: "
1009                 "%u/%u/%u pages, cmd_per_lun=%u", adapter->rev,
1010                 adapter->req_pages, adapter->cmp_pages, adapter->msg_pages,
1011                 pvscsi_template.cmd_per_lun);
1012
1013         return buf;
1014 }
1015
1016 static struct scsi_host_template pvscsi_template = {
1017         .module                         = THIS_MODULE,
1018         .name                           = "VMware PVSCSI Host Adapter",
1019         .proc_name                      = "vmw_pvscsi",
1020         .info                           = pvscsi_info,
1021         .queuecommand                   = pvscsi_queue,
1022         .this_id                        = -1,
1023         .sg_tablesize                   = PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT,
1024         .dma_boundary                   = UINT_MAX,
1025         .max_sectors                    = 0xffff,
1026         .change_queue_depth             = pvscsi_change_queue_depth,
1027         .eh_abort_handler               = pvscsi_abort,
1028         .eh_device_reset_handler        = pvscsi_device_reset,
1029         .eh_bus_reset_handler           = pvscsi_bus_reset,
1030         .eh_host_reset_handler          = pvscsi_host_reset,
1031 };
1032
1033 static void pvscsi_process_msg(const struct pvscsi_adapter *adapter,
1034                                const struct PVSCSIRingMsgDesc *e)
1035 {
1036         struct PVSCSIRingsState *s = adapter->rings_state;
1037         struct Scsi_Host *host = adapter->host;
1038         struct scsi_device *sdev;
1039
1040         printk(KERN_INFO "vmw_pvscsi: msg type: 0x%x - MSG RING: %u/%u (%u) \n",
1041                e->type, s->msgProdIdx, s->msgConsIdx, s->msgNumEntriesLog2);
1042
1043         BUILD_BUG_ON(PVSCSI_MSG_LAST != 2);
1044
1045         if (e->type == PVSCSI_MSG_DEV_ADDED) {
1046                 struct PVSCSIMsgDescDevStatusChanged *desc;
1047                 desc = (struct PVSCSIMsgDescDevStatusChanged *)e;
1048
1049                 printk(KERN_INFO
1050                        "vmw_pvscsi: msg: device added at scsi%u:%u:%u\n",
1051                        desc->bus, desc->target, desc->lun[1]);
1052
1053                 if (!scsi_host_get(host))
1054                         return;
1055
1056                 sdev = scsi_device_lookup(host, desc->bus, desc->target,
1057                                           desc->lun[1]);
1058                 if (sdev) {
1059                         printk(KERN_INFO "vmw_pvscsi: device already exists\n");
1060                         scsi_device_put(sdev);
1061                 } else
1062                         scsi_add_device(adapter->host, desc->bus,
1063                                         desc->target, desc->lun[1]);
1064
1065                 scsi_host_put(host);
1066         } else if (e->type == PVSCSI_MSG_DEV_REMOVED) {
1067                 struct PVSCSIMsgDescDevStatusChanged *desc;
1068                 desc = (struct PVSCSIMsgDescDevStatusChanged *)e;
1069
1070                 printk(KERN_INFO
1071                        "vmw_pvscsi: msg: device removed at scsi%u:%u:%u\n",
1072                        desc->bus, desc->target, desc->lun[1]);
1073
1074                 if (!scsi_host_get(host))
1075                         return;
1076
1077                 sdev = scsi_device_lookup(host, desc->bus, desc->target,
1078                                           desc->lun[1]);
1079                 if (sdev) {
1080                         scsi_remove_device(sdev);
1081                         scsi_device_put(sdev);
1082                 } else
1083                         printk(KERN_INFO
1084                                "vmw_pvscsi: failed to lookup scsi%u:%u:%u\n",
1085                                desc->bus, desc->target, desc->lun[1]);
1086
1087                 scsi_host_put(host);
1088         }
1089 }
1090
1091 static int pvscsi_msg_pending(const struct pvscsi_adapter *adapter)
1092 {
1093         struct PVSCSIRingsState *s = adapter->rings_state;
1094
1095         return s->msgProdIdx != s->msgConsIdx;
1096 }
1097
1098 static void pvscsi_process_msg_ring(const struct pvscsi_adapter *adapter)
1099 {
1100         struct PVSCSIRingsState *s = adapter->rings_state;
1101         struct PVSCSIRingMsgDesc *ring = adapter->msg_ring;
1102         u32 msg_entries = s->msgNumEntriesLog2;
1103
1104         while (pvscsi_msg_pending(adapter)) {
1105                 struct PVSCSIRingMsgDesc *e = ring + (s->msgConsIdx &
1106                                                       MASK(msg_entries));
1107
1108                 barrier();
1109                 pvscsi_process_msg(adapter, e);
1110                 barrier();
1111                 s->msgConsIdx++;
1112         }
1113 }
1114
1115 static void pvscsi_msg_workqueue_handler(struct work_struct *data)
1116 {
1117         struct pvscsi_adapter *adapter;
1118
1119         adapter = container_of(data, struct pvscsi_adapter, work);
1120
1121         pvscsi_process_msg_ring(adapter);
1122 }
1123
1124 static int pvscsi_setup_msg_workqueue(struct pvscsi_adapter *adapter)
1125 {
1126         char name[32];
1127
1128         if (!pvscsi_use_msg)
1129                 return 0;
1130
1131         pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_COMMAND,
1132                          PVSCSI_CMD_SETUP_MSG_RING);
1133
1134         if (pvscsi_reg_read(adapter, PVSCSI_REG_OFFSET_COMMAND_STATUS) == -1)
1135                 return 0;
1136
1137         snprintf(name, sizeof(name),
1138                  "vmw_pvscsi_wq_%u", adapter->host->host_no);
1139
1140         adapter->workqueue = create_singlethread_workqueue(name);
1141         if (!adapter->workqueue) {
1142                 printk(KERN_ERR "vmw_pvscsi: failed to create work queue\n");
1143                 return 0;
1144         }
1145         INIT_WORK(&adapter->work, pvscsi_msg_workqueue_handler);
1146
1147         return 1;
1148 }
1149
1150 static bool pvscsi_setup_req_threshold(struct pvscsi_adapter *adapter,
1151                                       bool enable)
1152 {
1153         u32 val;
1154
1155         if (!pvscsi_use_req_threshold)
1156                 return false;
1157
1158         pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_COMMAND,
1159                          PVSCSI_CMD_SETUP_REQCALLTHRESHOLD);
1160         val = pvscsi_reg_read(adapter, PVSCSI_REG_OFFSET_COMMAND_STATUS);
1161         if (val == -1) {
1162                 printk(KERN_INFO "vmw_pvscsi: device does not support req_threshold\n");
1163                 return false;
1164         } else {
1165                 struct PVSCSICmdDescSetupReqCall cmd_msg = { 0 };
1166                 cmd_msg.enable = enable;
1167                 printk(KERN_INFO
1168                        "vmw_pvscsi: %sabling reqCallThreshold\n",
1169                         enable ? "en" : "dis");
1170                 pvscsi_write_cmd_desc(adapter,
1171                                       PVSCSI_CMD_SETUP_REQCALLTHRESHOLD,
1172                                       &cmd_msg, sizeof(cmd_msg));
1173                 return pvscsi_reg_read(adapter,
1174                                        PVSCSI_REG_OFFSET_COMMAND_STATUS) != 0;
1175         }
1176 }
1177
1178 static irqreturn_t pvscsi_isr(int irq, void *devp)
1179 {
1180         struct pvscsi_adapter *adapter = devp;
1181         unsigned long flags;
1182
1183         spin_lock_irqsave(&adapter->hw_lock, flags);
1184         pvscsi_process_completion_ring(adapter);
1185         if (adapter->use_msg && pvscsi_msg_pending(adapter))
1186                 queue_work(adapter->workqueue, &adapter->work);
1187         spin_unlock_irqrestore(&adapter->hw_lock, flags);
1188
1189         return IRQ_HANDLED;
1190 }
1191
1192 static irqreturn_t pvscsi_shared_isr(int irq, void *devp)
1193 {
1194         struct pvscsi_adapter *adapter = devp;
1195         u32 val = pvscsi_read_intr_status(adapter);
1196
1197         if (!(val & PVSCSI_INTR_ALL_SUPPORTED))
1198                 return IRQ_NONE;
1199         pvscsi_write_intr_status(devp, val);
1200         return pvscsi_isr(irq, devp);
1201 }
1202
1203 static void pvscsi_free_sgls(const struct pvscsi_adapter *adapter)
1204 {
1205         struct pvscsi_ctx *ctx = adapter->cmd_map;
1206         unsigned i;
1207
1208         for (i = 0; i < adapter->req_depth; ++i, ++ctx)
1209                 free_pages((unsigned long)ctx->sgl, get_order(SGL_SIZE));
1210 }
1211
1212 static void pvscsi_shutdown_intr(struct pvscsi_adapter *adapter)
1213 {
1214         free_irq(pci_irq_vector(adapter->dev, 0), adapter);
1215         pci_free_irq_vectors(adapter->dev);
1216 }
1217
1218 static void pvscsi_release_resources(struct pvscsi_adapter *adapter)
1219 {
1220         if (adapter->workqueue)
1221                 destroy_workqueue(adapter->workqueue);
1222
1223         if (adapter->mmioBase)
1224                 pci_iounmap(adapter->dev, adapter->mmioBase);
1225
1226         pci_release_regions(adapter->dev);
1227
1228         if (adapter->cmd_map) {
1229                 pvscsi_free_sgls(adapter);
1230                 kfree(adapter->cmd_map);
1231         }
1232
1233         if (adapter->rings_state)
1234                 dma_free_coherent(&adapter->dev->dev, PAGE_SIZE,
1235                                     adapter->rings_state, adapter->ringStatePA);
1236
1237         if (adapter->req_ring)
1238                 dma_free_coherent(&adapter->dev->dev,
1239                                     adapter->req_pages * PAGE_SIZE,
1240                                     adapter->req_ring, adapter->reqRingPA);
1241
1242         if (adapter->cmp_ring)
1243                 dma_free_coherent(&adapter->dev->dev,
1244                                     adapter->cmp_pages * PAGE_SIZE,
1245                                     adapter->cmp_ring, adapter->cmpRingPA);
1246
1247         if (adapter->msg_ring)
1248                 dma_free_coherent(&adapter->dev->dev,
1249                                     adapter->msg_pages * PAGE_SIZE,
1250                                     adapter->msg_ring, adapter->msgRingPA);
1251 }
1252
1253 /*
1254  * Allocate scatter gather lists.
1255  *
1256  * These are statically allocated.  Trying to be clever was not worth it.
1257  *
1258  * Dynamic allocation can fail, and we can't go deep into the memory
1259  * allocator, since we're a SCSI driver, and trying too hard to allocate
1260  * memory might generate disk I/O.  We also don't want to fail disk I/O
1261  * in that case because we can't get an allocation - the I/O could be
1262  * trying to swap out data to free memory.  Since that is pathological,
1263  * just use a statically allocated scatter list.
1264  *
1265  */
1266 static int pvscsi_allocate_sg(struct pvscsi_adapter *adapter)
1267 {
1268         struct pvscsi_ctx *ctx;
1269         int i;
1270
1271         ctx = adapter->cmd_map;
1272         BUILD_BUG_ON(sizeof(struct pvscsi_sg_list) > SGL_SIZE);
1273
1274         for (i = 0; i < adapter->req_depth; ++i, ++ctx) {
1275                 ctx->sgl = (void *)__get_free_pages(GFP_KERNEL,
1276                                                     get_order(SGL_SIZE));
1277                 ctx->sglPA = 0;
1278                 BUG_ON(!IS_ALIGNED(((unsigned long)ctx->sgl), PAGE_SIZE));
1279                 if (!ctx->sgl) {
1280                         for (; i >= 0; --i, --ctx) {
1281                                 free_pages((unsigned long)ctx->sgl,
1282                                            get_order(SGL_SIZE));
1283                                 ctx->sgl = NULL;
1284                         }
1285                         return -ENOMEM;
1286                 }
1287         }
1288
1289         return 0;
1290 }
1291
1292 /*
1293  * Query the device, fetch the config info and return the
1294  * maximum number of targets on the adapter. In case of
1295  * failure due to any reason return default i.e. 16.
1296  */
1297 static u32 pvscsi_get_max_targets(struct pvscsi_adapter *adapter)
1298 {
1299         struct PVSCSICmdDescConfigCmd cmd;
1300         struct PVSCSIConfigPageHeader *header;
1301         struct device *dev;
1302         dma_addr_t configPagePA;
1303         void *config_page;
1304         u32 numPhys = 16;
1305
1306         dev = pvscsi_dev(adapter);
1307         config_page = dma_alloc_coherent(&adapter->dev->dev, PAGE_SIZE,
1308                         &configPagePA, GFP_KERNEL);
1309         if (!config_page) {
1310                 dev_warn(dev, "vmw_pvscsi: failed to allocate memory for config page\n");
1311                 goto exit;
1312         }
1313         BUG_ON(configPagePA & ~PAGE_MASK);
1314
1315         /* Fetch config info from the device. */
1316         cmd.configPageAddress = ((u64)PVSCSI_CONFIG_CONTROLLER_ADDRESS) << 32;
1317         cmd.configPageNum = PVSCSI_CONFIG_PAGE_CONTROLLER;
1318         cmd.cmpAddr = configPagePA;
1319         cmd._pad = 0;
1320
1321         /*
1322          * Mark the completion page header with error values. If the device
1323          * completes the command successfully, it sets the status values to
1324          * indicate success.
1325          */
1326         header = config_page;
1327         memset(header, 0, sizeof *header);
1328         header->hostStatus = BTSTAT_INVPARAM;
1329         header->scsiStatus = SDSTAT_CHECK;
1330
1331         pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_CONFIG, &cmd, sizeof cmd);
1332
1333         if (header->hostStatus == BTSTAT_SUCCESS &&
1334             header->scsiStatus == SDSTAT_GOOD) {
1335                 struct PVSCSIConfigPageController *config;
1336
1337                 config = config_page;
1338                 numPhys = config->numPhys;
1339         } else
1340                 dev_warn(dev, "vmw_pvscsi: PVSCSI_CMD_CONFIG failed. hostStatus = 0x%x, scsiStatus = 0x%x\n",
1341                          header->hostStatus, header->scsiStatus);
1342         dma_free_coherent(&adapter->dev->dev, PAGE_SIZE, config_page,
1343                           configPagePA);
1344 exit:
1345         return numPhys;
1346 }
1347
1348 static int pvscsi_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1349 {
1350         unsigned int irq_flag = PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY;
1351         struct pvscsi_adapter *adapter;
1352         struct pvscsi_adapter adapter_temp;
1353         struct Scsi_Host *host = NULL;
1354         unsigned int i;
1355         int error;
1356         u32 max_id;
1357
1358         error = -ENODEV;
1359
1360         if (pci_enable_device(pdev))
1361                 return error;
1362
1363         if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) {
1364                 printk(KERN_INFO "vmw_pvscsi: using 64bit dma\n");
1365         } else if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32))) {
1366                 printk(KERN_INFO "vmw_pvscsi: using 32bit dma\n");
1367         } else {
1368                 printk(KERN_ERR "vmw_pvscsi: failed to set DMA mask\n");
1369                 goto out_disable_device;
1370         }
1371
1372         /*
1373          * Let's use a temp pvscsi_adapter struct until we find the number of
1374          * targets on the adapter, after that we will switch to the real
1375          * allocated struct.
1376          */
1377         adapter = &adapter_temp;
1378         memset(adapter, 0, sizeof(*adapter));
1379         adapter->dev  = pdev;
1380         adapter->rev = pdev->revision;
1381
1382         if (pci_request_regions(pdev, "vmw_pvscsi")) {
1383                 printk(KERN_ERR "vmw_pvscsi: pci memory selection failed\n");
1384                 goto out_disable_device;
1385         }
1386
1387         for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
1388                 if ((pci_resource_flags(pdev, i) & PCI_BASE_ADDRESS_SPACE_IO))
1389                         continue;
1390
1391                 if (pci_resource_len(pdev, i) < PVSCSI_MEM_SPACE_SIZE)
1392                         continue;
1393
1394                 break;
1395         }
1396
1397         if (i == DEVICE_COUNT_RESOURCE) {
1398                 printk(KERN_ERR
1399                        "vmw_pvscsi: adapter has no suitable MMIO region\n");
1400                 goto out_release_resources_and_disable;
1401         }
1402
1403         adapter->mmioBase = pci_iomap(pdev, i, PVSCSI_MEM_SPACE_SIZE);
1404
1405         if (!adapter->mmioBase) {
1406                 printk(KERN_ERR
1407                        "vmw_pvscsi: can't iomap for BAR %d memsize %lu\n",
1408                        i, PVSCSI_MEM_SPACE_SIZE);
1409                 goto out_release_resources_and_disable;
1410         }
1411
1412         pci_set_master(pdev);
1413
1414         /*
1415          * Ask the device for max number of targets before deciding the
1416          * default pvscsi_ring_pages value.
1417          */
1418         max_id = pvscsi_get_max_targets(adapter);
1419         printk(KERN_INFO "vmw_pvscsi: max_id: %u\n", max_id);
1420
1421         if (pvscsi_ring_pages == 0)
1422                 /*
1423                  * Set the right default value. Up to 16 it is 8, above it is
1424                  * max.
1425                  */
1426                 pvscsi_ring_pages = (max_id > 16) ?
1427                         PVSCSI_SETUP_RINGS_MAX_NUM_PAGES :
1428                         PVSCSI_DEFAULT_NUM_PAGES_PER_RING;
1429         printk(KERN_INFO
1430                "vmw_pvscsi: setting ring_pages to %d\n",
1431                pvscsi_ring_pages);
1432
1433         pvscsi_template.can_queue =
1434                 min(PVSCSI_MAX_NUM_PAGES_REQ_RING, pvscsi_ring_pages) *
1435                 PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
1436         pvscsi_template.cmd_per_lun =
1437                 min(pvscsi_template.can_queue, pvscsi_cmd_per_lun);
1438         host = scsi_host_alloc(&pvscsi_template, sizeof(struct pvscsi_adapter));
1439         if (!host) {
1440                 printk(KERN_ERR "vmw_pvscsi: failed to allocate host\n");
1441                 goto out_release_resources_and_disable;
1442         }
1443
1444         /*
1445          * Let's use the real pvscsi_adapter struct here onwards.
1446          */
1447         adapter = shost_priv(host);
1448         memset(adapter, 0, sizeof(*adapter));
1449         adapter->dev  = pdev;
1450         adapter->host = host;
1451         /*
1452          * Copy back what we already have to the allocated adapter struct.
1453          */
1454         adapter->rev = adapter_temp.rev;
1455         adapter->mmioBase = adapter_temp.mmioBase;
1456
1457         spin_lock_init(&adapter->hw_lock);
1458         host->max_channel = 0;
1459         host->max_lun     = 1;
1460         host->max_cmd_len = 16;
1461         host->max_id      = max_id;
1462
1463         pci_set_drvdata(pdev, host);
1464
1465         ll_adapter_reset(adapter);
1466
1467         adapter->use_msg = pvscsi_setup_msg_workqueue(adapter);
1468
1469         error = pvscsi_allocate_rings(adapter);
1470         if (error) {
1471                 printk(KERN_ERR "vmw_pvscsi: unable to allocate ring memory\n");
1472                 goto out_release_resources;
1473         }
1474
1475         /*
1476          * From this point on we should reset the adapter if anything goes
1477          * wrong.
1478          */
1479         pvscsi_setup_all_rings(adapter);
1480
1481         adapter->cmd_map = kcalloc(adapter->req_depth,
1482                                    sizeof(struct pvscsi_ctx), GFP_KERNEL);
1483         if (!adapter->cmd_map) {
1484                 printk(KERN_ERR "vmw_pvscsi: failed to allocate memory.\n");
1485                 error = -ENOMEM;
1486                 goto out_reset_adapter;
1487         }
1488
1489         INIT_LIST_HEAD(&adapter->cmd_pool);
1490         for (i = 0; i < adapter->req_depth; i++) {
1491                 struct pvscsi_ctx *ctx = adapter->cmd_map + i;
1492                 list_add(&ctx->list, &adapter->cmd_pool);
1493         }
1494
1495         error = pvscsi_allocate_sg(adapter);
1496         if (error) {
1497                 printk(KERN_ERR "vmw_pvscsi: unable to allocate s/g table\n");
1498                 goto out_reset_adapter;
1499         }
1500
1501         if (pvscsi_disable_msix)
1502                 irq_flag &= ~PCI_IRQ_MSIX;
1503         if (pvscsi_disable_msi)
1504                 irq_flag &= ~PCI_IRQ_MSI;
1505
1506         error = pci_alloc_irq_vectors(adapter->dev, 1, 1, irq_flag);
1507         if (error < 0)
1508                 goto out_reset_adapter;
1509
1510         adapter->use_req_threshold = pvscsi_setup_req_threshold(adapter, true);
1511         printk(KERN_DEBUG "vmw_pvscsi: driver-based request coalescing %sabled\n",
1512                adapter->use_req_threshold ? "en" : "dis");
1513
1514         if (adapter->dev->msix_enabled || adapter->dev->msi_enabled) {
1515                 printk(KERN_INFO "vmw_pvscsi: using MSI%s\n",
1516                         adapter->dev->msix_enabled ? "-X" : "");
1517                 error = request_irq(pci_irq_vector(pdev, 0), pvscsi_isr,
1518                                 0, "vmw_pvscsi", adapter);
1519         } else {
1520                 printk(KERN_INFO "vmw_pvscsi: using INTx\n");
1521                 error = request_irq(pci_irq_vector(pdev, 0), pvscsi_shared_isr,
1522                                 IRQF_SHARED, "vmw_pvscsi", adapter);
1523         }
1524
1525         if (error) {
1526                 printk(KERN_ERR
1527                        "vmw_pvscsi: unable to request IRQ: %d\n", error);
1528                 goto out_reset_adapter;
1529         }
1530
1531         error = scsi_add_host(host, &pdev->dev);
1532         if (error) {
1533                 printk(KERN_ERR
1534                        "vmw_pvscsi: scsi_add_host failed: %d\n", error);
1535                 goto out_reset_adapter;
1536         }
1537
1538         dev_info(&pdev->dev, "VMware PVSCSI rev %d host #%u\n",
1539                  adapter->rev, host->host_no);
1540
1541         pvscsi_unmask_intr(adapter);
1542
1543         scsi_scan_host(host);
1544
1545         return 0;
1546
1547 out_reset_adapter:
1548         ll_adapter_reset(adapter);
1549 out_release_resources:
1550         pvscsi_shutdown_intr(adapter);
1551         pvscsi_release_resources(adapter);
1552         scsi_host_put(host);
1553 out_disable_device:
1554         pci_disable_device(pdev);
1555
1556         return error;
1557
1558 out_release_resources_and_disable:
1559         pvscsi_shutdown_intr(adapter);
1560         pvscsi_release_resources(adapter);
1561         goto out_disable_device;
1562 }
1563
1564 static void __pvscsi_shutdown(struct pvscsi_adapter *adapter)
1565 {
1566         pvscsi_mask_intr(adapter);
1567
1568         if (adapter->workqueue)
1569                 flush_workqueue(adapter->workqueue);
1570
1571         pvscsi_shutdown_intr(adapter);
1572
1573         pvscsi_process_request_ring(adapter);
1574         pvscsi_process_completion_ring(adapter);
1575         ll_adapter_reset(adapter);
1576 }
1577
1578 static void pvscsi_shutdown(struct pci_dev *dev)
1579 {
1580         struct Scsi_Host *host = pci_get_drvdata(dev);
1581         struct pvscsi_adapter *adapter = shost_priv(host);
1582
1583         __pvscsi_shutdown(adapter);
1584 }
1585
1586 static void pvscsi_remove(struct pci_dev *pdev)
1587 {
1588         struct Scsi_Host *host = pci_get_drvdata(pdev);
1589         struct pvscsi_adapter *adapter = shost_priv(host);
1590
1591         scsi_remove_host(host);
1592
1593         __pvscsi_shutdown(adapter);
1594         pvscsi_release_resources(adapter);
1595
1596         scsi_host_put(host);
1597
1598         pci_disable_device(pdev);
1599 }
1600
1601 static struct pci_driver pvscsi_pci_driver = {
1602         .name           = "vmw_pvscsi",
1603         .id_table       = pvscsi_pci_tbl,
1604         .probe          = pvscsi_probe,
1605         .remove         = pvscsi_remove,
1606         .shutdown       = pvscsi_shutdown,
1607 };
1608
1609 static int __init pvscsi_init(void)
1610 {
1611         pr_info("%s - version %s\n",
1612                 PVSCSI_LINUX_DRIVER_DESC, PVSCSI_DRIVER_VERSION_STRING);
1613         return pci_register_driver(&pvscsi_pci_driver);
1614 }
1615
1616 static void __exit pvscsi_exit(void)
1617 {
1618         pci_unregister_driver(&pvscsi_pci_driver);
1619 }
1620
1621 module_init(pvscsi_init);
1622 module_exit(pvscsi_exit);