q->boundary_rq = NULL;
                }
 
-               if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn)
+               if (rq->cmd_flags & REQ_DONTPREP)
+                       break;
+
+               if (q->dma_drain_size && rq->data_len) {
+                       /*
+                        * make sure space for the drain appears we
+                        * know we can do this because max_hw_segments
+                        * has been adjusted to be one fewer than the
+                        * device can handle
+                        */
+                       rq->nr_phys_segments++;
+                       rq->nr_hw_segments++;
+               }
+
+               if (!q->prep_rq_fn)
                        break;
 
                ret = q->prep_rq_fn(q, rq);
                         * avoid resource deadlock.  REQ_STARTED will
                         * prevent other fs requests from passing this one.
                         */
+                       if (q->dma_drain_size && rq->data_len &&
+                           !(rq->cmd_flags & REQ_DONTPREP)) {
+                               /*
+                                * remove the space for the drain we added
+                                * so that we don't add it again
+                                */
+                               --rq->nr_phys_segments;
+                               --rq->nr_hw_segments;
+                       }
+
                        rq = NULL;
                        break;
                } else if (ret == BLKPREP_KILL) {
 
 
 EXPORT_SYMBOL(blk_queue_stack_limits);
 
+/**
+ * blk_queue_dma_drain - Set up a drain buffer for excess dma.
+ *
+ * @q:  the request queue for the device
+ * @buf:       physically contiguous buffer
+ * @size:      size of the buffer in bytes
+ *
+ * Some devices have excess DMA problems and can't simply discard (or
+ * zero fill) the unwanted piece of the transfer.  They have to have a
+ * real area of memory to transfer it into.  The use case for this is
+ * ATAPI devices in DMA mode.  If the packet command causes a transfer
+ * bigger than the transfer size some HBAs will lock up if there
+ * aren't DMA elements to contain the excess transfer.  What this API
+ * does is adjust the queue so that the buf is always appended
+ * silently to the scatterlist.
+ *
+ * Note: This routine adjusts max_hw_segments to make room for
+ * appending the drain buffer.  If you call
+ * blk_queue_max_hw_segments() or blk_queue_max_phys_segments() after
+ * calling this routine, you must set the limit to one fewer than your
+ * device can support otherwise there won't be room for the drain
+ * buffer.
+ */
+int blk_queue_dma_drain(struct request_queue *q, void *buf,
+                               unsigned int size)
+{
+       if (q->max_hw_segments < 2 || q->max_phys_segments < 2)
+               return -EINVAL;
+       /* make room for appending the drain */
+       --q->max_hw_segments;
+       --q->max_phys_segments;
+       q->dma_drain_buffer = buf;
+       q->dma_drain_size = size;
+
+       return 0;
+}
+
+EXPORT_SYMBOL_GPL(blk_queue_dma_drain);
+
 /**
  * blk_queue_segment_boundary - set boundary rules for segment merging
  * @q:  the request queue for the device
                bvprv = bvec;
        } /* segments in rq */
 
+       if (q->dma_drain_size) {
+               sg->page_link &= ~0x02;
+               sg = sg_next(sg);
+               sg_set_page(sg, virt_to_page(q->dma_drain_buffer),
+                           q->dma_drain_size,
+                           ((unsigned long)q->dma_drain_buffer) &
+                           (PAGE_SIZE - 1));
+               nsegs++;
+       }
+
        if (sg)
                sg_mark_end(sg);
 
 
        unsigned int            max_segment_size;
 
        unsigned long           seg_boundary_mask;
+       void                    *dma_drain_buffer;
+       unsigned int            dma_drain_size;
        unsigned int            dma_alignment;
 
        struct blk_queue_tag    *queue_tags;
 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
 extern void blk_queue_hardsect_size(struct request_queue *, unsigned short);
 extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b);
+extern int blk_queue_dma_drain(struct request_queue *q, void *buf,
+                              unsigned int size);
 extern void blk_queue_segment_boundary(struct request_queue *, unsigned long);
 extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn);
 extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *);