Merge 5.11-rc7 into usb-next
[linux-2.6-microblaze.git] / drivers / usb / host / xhci-ring.c
index 89c3be9..5e548a1 100644 (file)
 #include "xhci-trace.h"
 #include "xhci-mtk.h"
 
+static int queue_command(struct xhci_hcd *xhci, struct xhci_command *cmd,
+                        u32 field1, u32 field2,
+                        u32 field3, u32 field4, bool command_must_succeed);
+
 /*
  * Returns zero if the TRB isn't in this segment, otherwise it returns the DMA
  * address of the TRB.
@@ -151,10 +155,11 @@ static void next_trb(struct xhci_hcd *xhci,
 
 /*
  * See Cycle bit rules. SW is the consumer for the event ring only.
- * Don't make a ring full of link TRBs.  That would be dumb and this would loop.
  */
 void inc_deq(struct xhci_hcd *xhci, struct xhci_ring *ring)
 {
+       unsigned int link_trb_count = 0;
+
        /* event ring doesn't have link trbs, check for last trb */
        if (ring->type == TYPE_EVENT) {
                if (!last_trb_on_seg(ring->deq_seg, ring->dequeue)) {
@@ -170,14 +175,23 @@ void inc_deq(struct xhci_hcd *xhci, struct xhci_ring *ring)
 
        /* All other rings have link trbs */
        if (!trb_is_link(ring->dequeue)) {
-               ring->dequeue++;
-               ring->num_trbs_free++;
+               if (last_trb_on_seg(ring->deq_seg, ring->dequeue)) {
+                       xhci_warn(xhci, "Missing link TRB at end of segment\n");
+               } else {
+                       ring->dequeue++;
+                       ring->num_trbs_free++;
+               }
        }
+
        while (trb_is_link(ring->dequeue)) {
                ring->deq_seg = ring->deq_seg->next;
                ring->dequeue = ring->deq_seg->trbs;
-       }
 
+               if (link_trb_count++ > ring->num_segs) {
+                       xhci_warn(xhci, "Ring is an endless link TRB loop\n");
+                       break;
+               }
+       }
 out:
        trace_xhci_inc_deq(ring);
 
@@ -186,7 +200,6 @@ out:
 
 /*
  * See Cycle bit rules. SW is the consumer for the event ring only.
- * Don't make a ring full of link TRBs.  That would be dumb and this would loop.
  *
  * If we've just enqueued a TRB that is in the middle of a TD (meaning the
  * chain bit is set), then set the chain bit in all the following link TRBs.
@@ -206,11 +219,18 @@ static void inc_enq(struct xhci_hcd *xhci, struct xhci_ring *ring,
 {
        u32 chain;
        union xhci_trb *next;
+       unsigned int link_trb_count = 0;
 
        chain = le32_to_cpu(ring->enqueue->generic.field[3]) & TRB_CHAIN;
        /* If this is not event ring, there is one less usable TRB */
        if (!trb_is_link(ring->enqueue))
                ring->num_trbs_free--;
+
+       if (last_trb_on_seg(ring->enq_seg, ring->enqueue)) {
+               xhci_err(xhci, "Tried to move enqueue past ring segment\n");
+               return;
+       }
+
        next = ++(ring->enqueue);
 
        /* Update the dequeue pointer further if that was a link TRB */
@@ -247,6 +267,11 @@ static void inc_enq(struct xhci_hcd *xhci, struct xhci_ring *ring,
                ring->enq_seg = ring->enq_seg->next;
                ring->enqueue = ring->enq_seg->trbs;
                next = ring->enqueue;
+
+               if (link_trb_count++ > ring->num_segs) {
+                       xhci_warn(xhci, "%s: Ring link TRB loop\n", __func__);
+                       break;
+               }
        }
 
        trace_xhci_inc_enq(ring);
@@ -408,9 +433,8 @@ void xhci_ring_ep_doorbell(struct xhci_hcd *xhci,
        trace_xhci_ring_ep_doorbell(slot_id, DB_VALUE(ep_index, stream_id));
 
        writel(DB_VALUE(ep_index, stream_id), db_addr);
-       /* The CPU has better things to do at this point than wait for a
-        * write-posting flush.  It'll get there soon enough.
-        */
+       /* flush the write */
+       readl(db_addr);
 }
 
 /* Ring the doorbell for any rings with pending URBs */
@@ -446,6 +470,46 @@ void xhci_ring_doorbell_for_active_rings(struct xhci_hcd *xhci,
        ring_doorbell_for_active_rings(xhci, slot_id, ep_index);
 }
 
+static struct xhci_virt_ep *xhci_get_virt_ep(struct xhci_hcd *xhci,
+                                            unsigned int slot_id,
+                                            unsigned int ep_index)
+{
+       if (slot_id == 0 || slot_id >= MAX_HC_SLOTS) {
+               xhci_warn(xhci, "Invalid slot_id %u\n", slot_id);
+               return NULL;
+       }
+       if (ep_index >= EP_CTX_PER_DEV) {
+               xhci_warn(xhci, "Invalid endpoint index %u\n", ep_index);
+               return NULL;
+       }
+       if (!xhci->devs[slot_id]) {
+               xhci_warn(xhci, "No xhci virt device for slot_id %u\n", slot_id);
+               return NULL;
+       }
+
+       return &xhci->devs[slot_id]->eps[ep_index];
+}
+
+static struct xhci_ring *xhci_virt_ep_to_ring(struct xhci_hcd *xhci,
+                                             struct xhci_virt_ep *ep,
+                                             unsigned int stream_id)
+{
+       /* common case, no streams */
+       if (!(ep->ep_state & EP_HAS_STREAMS))
+               return ep->ring;
+
+       if (!ep->stream_info)
+               return NULL;
+
+       if (stream_id == 0 || stream_id >= ep->stream_info->num_streams) {
+               xhci_warn(xhci, "Invalid stream_id %u request for slot_id %u ep_index %u\n",
+                         stream_id, ep->vdev->slot_id, ep->ep_index);
+               return NULL;
+       }
+
+       return ep->stream_info->stream_rings[stream_id];
+}
+
 /* Get the right ring for the given slot_id, ep_index and stream_id.
  * If the endpoint supports streams, boundary check the URB's stream ID.
  * If the endpoint doesn't support streams, return the singular endpoint ring.
@@ -456,30 +520,11 @@ struct xhci_ring *xhci_triad_to_transfer_ring(struct xhci_hcd *xhci,
 {
        struct xhci_virt_ep *ep;
 
-       ep = &xhci->devs[slot_id]->eps[ep_index];
-       /* Common case: no streams */
-       if (!(ep->ep_state & EP_HAS_STREAMS))
-               return ep->ring;
-
-       if (stream_id == 0) {
-               xhci_warn(xhci,
-                               "WARN: Slot ID %u, ep index %u has streams, "
-                               "but URB has no stream ID.\n",
-                               slot_id, ep_index);
+       ep = xhci_get_virt_ep(xhci, slot_id, ep_index);
+       if (!ep)
                return NULL;
-       }
 
-       if (stream_id < ep->stream_info->num_streams)
-               return ep->stream_info->stream_rings[stream_id];
-
-       xhci_warn(xhci,
-                       "WARN: Slot ID %u, ep index %u has "
-                       "stream IDs 1 to %u allocated, "
-                       "but stream ID %u is requested.\n",
-                       slot_id, ep_index,
-                       ep->stream_info->num_streams - 1,
-                       stream_id);
-       return NULL;
+       return xhci_virt_ep_to_ring(xhci, ep, stream_id);
 }
 
 
@@ -506,73 +551,55 @@ static u64 xhci_get_hw_deq(struct xhci_hcd *xhci, struct xhci_virt_device *vdev,
        return le64_to_cpu(ep_ctx->deq);
 }
 
-/*
- * Move the xHC's endpoint ring dequeue pointer past cur_td.
- * Record the new state of the xHC's endpoint ring dequeue segment,
- * dequeue pointer, stream id, and new consumer cycle state in state.
- * Update our internal representation of the ring's dequeue pointer.
- *
- * We do this in three jumps:
- *  - First we update our new ring state to be the same as when the xHC stopped.
- *  - Then we traverse the ring to find the segment that contains
- *    the last TRB in the TD.  We toggle the xHC's new cycle state when we pass
- *    any link TRBs with the toggle cycle bit set.
- *  - Finally we move the dequeue state one TRB further, toggling the cycle bit
- *    if we've moved it past a link TRB with the toggle cycle bit set.
- *
- * Some of the uses of xhci_generic_trb are grotty, but if they're done
- * with correct __le32 accesses they should work fine.  Only users of this are
- * in here.
- */
-void xhci_find_new_dequeue_state(struct xhci_hcd *xhci,
-               unsigned int slot_id, unsigned int ep_index,
-               unsigned int stream_id, struct xhci_td *cur_td,
-               struct xhci_dequeue_state *state)
+static int xhci_move_dequeue_past_td(struct xhci_hcd *xhci,
+                               unsigned int slot_id, unsigned int ep_index,
+                               unsigned int stream_id, struct xhci_td *td)
 {
        struct xhci_virt_device *dev = xhci->devs[slot_id];
        struct xhci_virt_ep *ep = &dev->eps[ep_index];
        struct xhci_ring *ep_ring;
+       struct xhci_command *cmd;
        struct xhci_segment *new_seg;
        union xhci_trb *new_deq;
+       int new_cycle;
        dma_addr_t addr;
        u64 hw_dequeue;
        bool cycle_found = false;
        bool td_last_trb_found = false;
+       u32 trb_sct = 0;
+       int ret;
 
        ep_ring = xhci_triad_to_transfer_ring(xhci, slot_id,
                        ep_index, stream_id);
        if (!ep_ring) {
-               xhci_warn(xhci, "WARN can't find new dequeue state "
-                               "for invalid stream ID %u.\n",
-                               stream_id);
-               return;
+               xhci_warn(xhci, "WARN can't find new dequeue, invalid stream ID %u\n",
+                         stream_id);
+               return -ENODEV;
        }
        /*
         * A cancelled TD can complete with a stall if HW cached the trb.
-        * In this case driver can't find cur_td, but if the ring is empty we
+        * In this case driver can't find td, but if the ring is empty we
         * can move the dequeue pointer to the current enqueue position.
+        * We shouldn't hit this anymore as cached cancelled TRBs are given back
+        * after clearing the cache, but be on the safe side and keep it anyway
         */
-       if (!cur_td) {
+       if (!td) {
                if (list_empty(&ep_ring->td_list)) {
-                       state->new_deq_seg = ep_ring->enq_seg;
-                       state->new_deq_ptr = ep_ring->enqueue;
-                       state->new_cycle_state = ep_ring->cycle_state;
-                       goto done;
+                       new_seg = ep_ring->enq_seg;
+                       new_deq = ep_ring->enqueue;
+                       new_cycle = ep_ring->cycle_state;
+                       xhci_dbg(xhci, "ep ring empty, Set new dequeue = enqueue");
+                       goto deq_found;
                } else {
-                       xhci_warn(xhci, "Can't find new dequeue state, missing cur_td\n");
-                       return;
+                       xhci_warn(xhci, "Can't find new dequeue state, missing td\n");
+                       return -EINVAL;
                }
        }
 
-       /* Dig out the cycle state saved by the xHC during the stop ep cmd */
-       xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
-                       "Finding endpoint context");
-
        hw_dequeue = xhci_get_hw_deq(xhci, dev, ep_index, stream_id);
        new_seg = ep_ring->deq_seg;
        new_deq = ep_ring->dequeue;
-       state->new_cycle_state = hw_dequeue & 0x1;
-       state->stream_id = stream_id;
+       new_cycle = hw_dequeue & 0x1;
 
        /*
         * We want to find the pointer, segment and cycle state of the new trb
@@ -587,40 +614,71 @@ void xhci_find_new_dequeue_state(struct xhci_hcd *xhci,
                        if (td_last_trb_found)
                                break;
                }
-               if (new_deq == cur_td->last_trb)
+               if (new_deq == td->last_trb)
                        td_last_trb_found = true;
 
                if (cycle_found && trb_is_link(new_deq) &&
                    link_trb_toggles_cycle(new_deq))
-                       state->new_cycle_state ^= 0x1;
+                       new_cycle ^= 0x1;
 
                next_trb(xhci, ep_ring, &new_seg, &new_deq);
 
                /* Search wrapped around, bail out */
                if (new_deq == ep->ring->dequeue) {
                        xhci_err(xhci, "Error: Failed finding new dequeue state\n");
-                       state->new_deq_seg = NULL;
-                       state->new_deq_ptr = NULL;
-                       return;
+                       return -EINVAL;
                }
 
        } while (!cycle_found || !td_last_trb_found);
 
-       state->new_deq_seg = new_seg;
-       state->new_deq_ptr = new_deq;
+deq_found:
 
-done:
        /* Don't update the ring cycle state for the producer (us). */
-       xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
-                       "Cycle state = 0x%x", state->new_cycle_state);
+       addr = xhci_trb_virt_to_dma(new_seg, new_deq);
+       if (addr == 0) {
+               xhci_warn(xhci, "Can't find dma of new dequeue ptr\n");
+               xhci_warn(xhci, "deq seg = %p, deq ptr = %p\n", new_seg, new_deq);
+               return -EINVAL;
+       }
+
+       if ((ep->ep_state & SET_DEQ_PENDING)) {
+               xhci_warn(xhci, "Set TR Deq already pending, don't submit for 0x%pad\n",
+                         &addr);
+               return -EBUSY;
+       }
+
+       /* This function gets called from contexts where it cannot sleep */
+       cmd = xhci_alloc_command(xhci, false, GFP_ATOMIC);
+       if (!cmd) {
+               xhci_warn(xhci, "Can't alloc Set TR Deq cmd 0x%pad\n", &addr);
+               return -ENOMEM;
+       }
+
+       if (stream_id)
+               trb_sct = SCT_FOR_TRB(SCT_PRI_TR);
+       ret = queue_command(xhci, cmd,
+               lower_32_bits(addr) | trb_sct | new_cycle,
+               upper_32_bits(addr),
+               STREAM_ID_FOR_TRB(stream_id), SLOT_ID_FOR_TRB(slot_id) |
+               EP_ID_FOR_TRB(ep_index) | TRB_TYPE(TRB_SET_DEQ), false);
+       if (ret < 0) {
+               xhci_free_command(xhci, cmd);
+               return ret;
+       }
+       ep->queued_deq_seg = new_seg;
+       ep->queued_deq_ptr = new_deq;
 
        xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
-                       "New dequeue segment = %p (virtual)",
-                       state->new_deq_seg);
-       addr = xhci_trb_virt_to_dma(state->new_deq_seg, state->new_deq_ptr);
-       xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
-                       "New dequeue pointer = 0x%llx (DMA)",
-                       (unsigned long long) addr);
+                      "Set TR Deq ptr 0x%llx, cycle %u\n", addr, new_cycle);
+
+       /* Stop the TD queueing code from ringing the doorbell until
+        * this command completes.  The HC won't set the dequeue pointer
+        * if the ring is running, and ringing the doorbell starts the
+        * ring running.
+        */
+       ep->ep_state |= SET_DEQ_PENDING;
+       xhci_ring_cmd_db(xhci);
+       return 0;
 }
 
 /* flip_cycle means flip the cycle bit of all but the first and last TRB.
@@ -713,150 +771,320 @@ static void xhci_unmap_td_bounce_buffer(struct xhci_hcd *xhci,
        seg->bounce_offs = 0;
 }
 
-/*
- * When we get a command completion for a Stop Endpoint Command, we need to
- * unlink any cancelled TDs from the ring.  There are two ways to do that:
- *
- *  1. If the HW was in the middle of processing the TD that needs to be
- *     cancelled, then we must move the ring's dequeue pointer past the last TRB
- *     in the TD with a Set Dequeue Pointer Command.
- *  2. Otherwise, we turn all the TRBs in the TD into No-op TRBs (with the chain
- *     bit cleared) so that the HW will skip over them.
- */
-static void xhci_handle_cmd_stop_ep(struct xhci_hcd *xhci, int slot_id,
-               union xhci_trb *trb, struct xhci_event_cmd *event)
+static int xhci_td_cleanup(struct xhci_hcd *xhci, struct xhci_td *td,
+                          struct xhci_ring *ep_ring, int status)
 {
-       unsigned int ep_index;
-       struct xhci_ring *ep_ring;
-       struct xhci_virt_ep *ep;
-       struct xhci_td *cur_td = NULL;
-       struct xhci_td *last_unlinked_td;
-       struct xhci_ep_ctx *ep_ctx;
-       struct xhci_virt_device *vdev;
-       u64 hw_deq;
-       struct xhci_dequeue_state deq_state;
+       struct urb *urb = NULL;
 
-       if (unlikely(TRB_TO_SUSPEND_PORT(le32_to_cpu(trb->generic.field[3])))) {
-               if (!xhci->devs[slot_id])
-                       xhci_warn(xhci, "Stop endpoint command "
-                               "completion for disabled slot %u\n",
-                               slot_id);
-               return;
+       /* Clean up the endpoint's TD list */
+       urb = td->urb;
+
+       /* if a bounce buffer was used to align this td then unmap it */
+       xhci_unmap_td_bounce_buffer(xhci, ep_ring, td);
+
+       /* Do one last check of the actual transfer length.
+        * If the host controller said we transferred more data than the buffer
+        * length, urb->actual_length will be a very big number (since it's
+        * unsigned).  Play it safe and say we didn't transfer anything.
+        */
+       if (urb->actual_length > urb->transfer_buffer_length) {
+               xhci_warn(xhci, "URB req %u and actual %u transfer length mismatch\n",
+                         urb->transfer_buffer_length, urb->actual_length);
+               urb->actual_length = 0;
+               status = 0;
        }
+       /* TD might be removed from td_list if we are giving back a cancelled URB */
+       if (!list_empty(&td->td_list))
+               list_del_init(&td->td_list);
+       /* Giving back a cancelled URB, or if a slated TD completed anyway */
+       if (!list_empty(&td->cancelled_td_list))
+               list_del_init(&td->cancelled_td_list);
 
-       memset(&deq_state, 0, sizeof(deq_state));
-       ep_index = TRB_TO_EP_INDEX(le32_to_cpu(trb->generic.field[3]));
+       inc_td_cnt(urb);
+       /* Giveback the urb when all the tds are completed */
+       if (last_td_in_urb(td)) {
+               if ((urb->actual_length != urb->transfer_buffer_length &&
+                    (urb->transfer_flags & URB_SHORT_NOT_OK)) ||
+                   (status != 0 && !usb_endpoint_xfer_isoc(&urb->ep->desc)))
+                       xhci_dbg(xhci, "Giveback URB %p, len = %d, expected = %d, status = %d\n",
+                                urb, urb->actual_length,
+                                urb->transfer_buffer_length, status);
 
-       vdev = xhci->devs[slot_id];
-       ep_ctx = xhci_get_ep_ctx(xhci, vdev->out_ctx, ep_index);
-       trace_xhci_handle_cmd_stop_ep(ep_ctx);
+               /* set isoc urb status to 0 just as EHCI, UHCI, and OHCI */
+               if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS)
+                       status = 0;
+               xhci_giveback_urb_in_irq(xhci, td, status);
+       }
 
-       ep = &xhci->devs[slot_id]->eps[ep_index];
-       last_unlinked_td = list_last_entry(&ep->cancelled_td_list,
-                       struct xhci_td, cancelled_td_list);
+       return 0;
+}
 
-       if (list_empty(&ep->cancelled_td_list)) {
-               xhci_stop_watchdog_timer_in_irq(xhci, ep);
-               ring_doorbell_for_active_rings(xhci, slot_id, ep_index);
-               return;
+
+/* Complete the cancelled URBs we unlinked from td_list. */
+static void xhci_giveback_invalidated_tds(struct xhci_virt_ep *ep)
+{
+       struct xhci_ring *ring;
+       struct xhci_td *td, *tmp_td;
+
+       list_for_each_entry_safe(td, tmp_td, &ep->cancelled_td_list,
+                                cancelled_td_list) {
+
+               /*
+                * Doesn't matter what we pass for status, since the core will
+                * just overwrite it (because the URB has been unlinked).
+                */
+               ring = xhci_urb_to_transfer_ring(ep->xhci, td->urb);
+
+               if (td->cancel_status == TD_CLEARED)
+                       xhci_td_cleanup(ep->xhci, td, ring, 0);
+
+               if (ep->xhci->xhc_state & XHCI_STATE_DYING)
+                       return;
+       }
+}
+
+static int xhci_reset_halted_ep(struct xhci_hcd *xhci, unsigned int slot_id,
+                               unsigned int ep_index, enum xhci_ep_reset_type reset_type)
+{
+       struct xhci_command *command;
+       int ret = 0;
+
+       command = xhci_alloc_command(xhci, false, GFP_ATOMIC);
+       if (!command) {
+               ret = -ENOMEM;
+               goto done;
        }
 
-       /* Fix up the ep ring first, so HW stops executing cancelled TDs.
-        * We have the xHCI lock, so nothing can modify this list until we drop
-        * it.  We're also in the event handler, so we can't get re-interrupted
-        * if another Stop Endpoint command completes
+       ret = xhci_queue_reset_ep(xhci, command, slot_id, ep_index, reset_type);
+done:
+       if (ret)
+               xhci_err(xhci, "ERROR queuing reset endpoint for slot %d ep_index %d, %d\n",
+                        slot_id, ep_index, ret);
+       return ret;
+}
+
+static void xhci_handle_halted_endpoint(struct xhci_hcd *xhci,
+                               struct xhci_virt_ep *ep, unsigned int stream_id,
+                               struct xhci_td *td,
+                               enum xhci_ep_reset_type reset_type)
+{
+       unsigned int slot_id = ep->vdev->slot_id;
+       int err;
+
+       /*
+        * Avoid resetting endpoint if link is inactive. Can cause host hang.
+        * Device will be reset soon to recover the link so don't do anything
         */
-       list_for_each_entry(cur_td, &ep->cancelled_td_list, cancelled_td_list) {
+       if (ep->vdev->flags & VDEV_PORT_ERROR)
+               return;
+
+       /* add td to cancelled list and let reset ep handler take care of it */
+       if (reset_type == EP_HARD_RESET) {
+               ep->ep_state |= EP_HARD_CLEAR_TOGGLE;
+               if (td && list_empty(&td->cancelled_td_list)) {
+                       list_add_tail(&td->cancelled_td_list, &ep->cancelled_td_list);
+                       td->cancel_status = TD_HALTED;
+               }
+       }
+
+       if (ep->ep_state & EP_HALTED) {
+               xhci_dbg(xhci, "Reset ep command already pending\n");
+               return;
+       }
+
+       err = xhci_reset_halted_ep(xhci, slot_id, ep->ep_index, reset_type);
+       if (err)
+               return;
+
+       ep->ep_state |= EP_HALTED;
+
+       xhci_ring_cmd_db(xhci);
+}
+
+/*
+ * Fix up the ep ring first, so HW stops executing cancelled TDs.
+ * We have the xHCI lock, so nothing can modify this list until we drop it.
+ * We're also in the event handler, so we can't get re-interrupted if another
+ * Stop Endpoint command completes.
+ *
+ * only call this when ring is not in a running state
+ */
+
+static int xhci_invalidate_cancelled_tds(struct xhci_virt_ep *ep)
+{
+       struct xhci_hcd         *xhci;
+       struct xhci_td          *td = NULL;
+       struct xhci_td          *tmp_td = NULL;
+       struct xhci_td          *cached_td = NULL;
+       struct xhci_ring        *ring;
+       u64                     hw_deq;
+       unsigned int            slot_id = ep->vdev->slot_id;
+       int                     err;
+
+       xhci = ep->xhci;
+
+       list_for_each_entry_safe(td, tmp_td, &ep->cancelled_td_list, cancelled_td_list) {
                xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
                                "Removing canceled TD starting at 0x%llx (dma).",
                                (unsigned long long)xhci_trb_virt_to_dma(
-                                       cur_td->start_seg, cur_td->first_trb));
-               ep_ring = xhci_urb_to_transfer_ring(xhci, cur_td->urb);
-               if (!ep_ring) {
-                       /* This shouldn't happen unless a driver is mucking
-                        * with the stream ID after submission.  This will
-                        * leave the TD on the hardware ring, and the hardware
-                        * will try to execute it, and may access a buffer
-                        * that has already been freed.  In the best case, the
-                        * hardware will execute it, and the event handler will
-                        * ignore the completion event for that TD, since it was
-                        * removed from the td_list for that endpoint.  In
-                        * short, don't muck with the stream ID after
-                        * submission.
-                        */
-                       xhci_warn(xhci, "WARN Cancelled URB %p "
-                                       "has invalid stream ID %u.\n",
-                                       cur_td->urb,
-                                       cur_td->urb->stream_id);
-                       goto remove_finished_td;
+                                       td->start_seg, td->first_trb));
+               list_del_init(&td->td_list);
+               ring = xhci_urb_to_transfer_ring(xhci, td->urb);
+               if (!ring) {
+                       xhci_warn(xhci, "WARN Cancelled URB %p has invalid stream ID %u.\n",
+                                 td->urb, td->urb->stream_id);
+                       continue;
                }
                /*
-                * If we stopped on the TD we need to cancel, then we have to
+                * If ring stopped on the TD we need to cancel, then we have to
                 * move the xHC endpoint ring dequeue pointer past this TD.
                 */
-               hw_deq = xhci_get_hw_deq(xhci, vdev, ep_index,
-                                        cur_td->urb->stream_id);
+               hw_deq = xhci_get_hw_deq(xhci, ep->vdev, ep->ep_index,
+                                        td->urb->stream_id);
                hw_deq &= ~0xf;
 
-               if (trb_in_td(xhci, cur_td->start_seg, cur_td->first_trb,
-                             cur_td->last_trb, hw_deq, false)) {
-                       xhci_find_new_dequeue_state(xhci, slot_id, ep_index,
-                                                   cur_td->urb->stream_id,
-                                                   cur_td, &deq_state);
+               if (trb_in_td(xhci, td->start_seg, td->first_trb,
+                             td->last_trb, hw_deq, false)) {
+                       switch (td->cancel_status) {
+                       case TD_CLEARED: /* TD is already no-op */
+                       case TD_CLEARING_CACHE: /* set TR deq command already queued */
+                               break;
+                       case TD_DIRTY: /* TD is cached, clear it */
+                       case TD_HALTED:
+                               /* FIXME  stream case, several stopped rings */
+                               cached_td = td;
+                               break;
+                       }
                } else {
-                       td_to_noop(xhci, ep_ring, cur_td, false);
+                       td_to_noop(xhci, ring, td, false);
+                       td->cancel_status = TD_CLEARED;
                }
+       }
+       if (cached_td) {
+               cached_td->cancel_status = TD_CLEARING_CACHE;
+
+               err = xhci_move_dequeue_past_td(xhci, slot_id, ep->ep_index,
+                                               cached_td->urb->stream_id,
+                                               cached_td);
+               /* Failed to move past cached td, try just setting it noop */
+               if (err) {
+                       td_to_noop(xhci, ring, cached_td, false);
+                       cached_td->cancel_status = TD_CLEARED;
+               }
+               cached_td = NULL;
+       }
+       return 0;
+}
 
-remove_finished_td:
-               /*
-                * The event handler won't see a completion for this TD anymore,
-                * so remove it from the endpoint ring's TD list.  Keep it in
-                * the cancelled TD list for URB completion later.
-                */
-               list_del_init(&cur_td->td_list);
+/*
+ * Returns the TD the endpoint ring halted on.
+ * Only call for non-running rings without streams.
+ */
+static struct xhci_td *find_halted_td(struct xhci_virt_ep *ep)
+{
+       struct xhci_td  *td;
+       u64             hw_deq;
+
+       if (!list_empty(&ep->ring->td_list)) { /* Not streams compatible */
+               hw_deq = xhci_get_hw_deq(ep->xhci, ep->vdev, ep->ep_index, 0);
+               hw_deq &= ~0xf;
+               td = list_first_entry(&ep->ring->td_list, struct xhci_td, td_list);
+               if (trb_in_td(ep->xhci, td->start_seg, td->first_trb,
+                               td->last_trb, hw_deq, false))
+                       return td;
        }
+       return NULL;
+}
 
-       xhci_stop_watchdog_timer_in_irq(xhci, ep);
+/*
+ * When we get a command completion for a Stop Endpoint Command, we need to
+ * unlink any cancelled TDs from the ring.  There are two ways to do that:
+ *
+ *  1. If the HW was in the middle of processing the TD that needs to be
+ *     cancelled, then we must move the ring's dequeue pointer past the last TRB
+ *     in the TD with a Set Dequeue Pointer Command.
+ *  2. Otherwise, we turn all the TRBs in the TD into No-op TRBs (with the chain
+ *     bit cleared) so that the HW will skip over them.
+ */
+static void xhci_handle_cmd_stop_ep(struct xhci_hcd *xhci, int slot_id,
+                                   union xhci_trb *trb, u32 comp_code)
+{
+       unsigned int ep_index;
+       struct xhci_virt_ep *ep;
+       struct xhci_ep_ctx *ep_ctx;
+       struct xhci_td *td = NULL;
+       enum xhci_ep_reset_type reset_type;
+       struct xhci_command *command;
 
-       /* If necessary, queue a Set Transfer Ring Dequeue Pointer command */
-       if (deq_state.new_deq_ptr && deq_state.new_deq_seg) {
-               xhci_queue_new_dequeue_state(xhci, slot_id, ep_index,
-                                            &deq_state);
-               xhci_ring_cmd_db(xhci);
-       } else {
-               /* Otherwise ring the doorbell(s) to restart queued transfers */
-               ring_doorbell_for_active_rings(xhci, slot_id, ep_index);
+       if (unlikely(TRB_TO_SUSPEND_PORT(le32_to_cpu(trb->generic.field[3])))) {
+               if (!xhci->devs[slot_id])
+                       xhci_warn(xhci, "Stop endpoint command completion for disabled slot %u\n",
+                                 slot_id);
+               return;
        }
 
+       ep_index = TRB_TO_EP_INDEX(le32_to_cpu(trb->generic.field[3]));
+       ep = xhci_get_virt_ep(xhci, slot_id, ep_index);
+       if (!ep)
+               return;
+
+       ep_ctx = xhci_get_ep_ctx(xhci, ep->vdev->out_ctx, ep_index);
+
+       trace_xhci_handle_cmd_stop_ep(ep_ctx);
+
+       if (comp_code == COMP_CONTEXT_STATE_ERROR) {
        /*
-        * Drop the lock and complete the URBs in the cancelled TD list.
-        * New TDs to be cancelled might be added to the end of the list before
-        * we can complete all the URBs for the TDs we already unlinked.
-        * So stop when we've completed the URB for the last TD we unlinked.
+        * If stop endpoint command raced with a halting endpoint we need to
+        * reset the host side endpoint first.
+        * If the TD we halted on isn't cancelled the TD should be given back
+        * with a proper error code, and the ring dequeue moved past the TD.
+        * If streams case we can't find hw_deq, or the TD we halted on so do a
+        * soft reset.
+        *
+        * Proper error code is unknown here, it would be -EPIPE if device side
+        * of enadpoit halted (aka STALL), and -EPROTO if not (transaction error)
+        * We use -EPROTO, if device is stalled it should return a stall error on
+        * next transfer, which then will return -EPIPE, and device side stall is
+        * noted and cleared by class driver.
         */
-       do {
-               cur_td = list_first_entry(&ep->cancelled_td_list,
-                               struct xhci_td, cancelled_td_list);
-               list_del_init(&cur_td->cancelled_td_list);
+               switch (GET_EP_CTX_STATE(ep_ctx)) {
+               case EP_STATE_HALTED:
+                       xhci_dbg(xhci, "Stop ep completion raced with stall, reset ep\n");
+                       if (ep->ep_state & EP_HAS_STREAMS) {
+                               reset_type = EP_SOFT_RESET;
+                       } else {
+                               reset_type = EP_HARD_RESET;
+                               td = find_halted_td(ep);
+                               if (td)
+                                       td->status = -EPROTO;
+                       }
+                       /* reset ep, reset handler cleans up cancelled tds */
+                       xhci_handle_halted_endpoint(xhci, ep, 0, td, reset_type);
+                       xhci_stop_watchdog_timer_in_irq(xhci, ep);
+                       return;
+               case EP_STATE_RUNNING:
+                       /* Race, HW handled stop ep cmd before ep was running */
+                       command = xhci_alloc_command(xhci, false, GFP_ATOMIC);
+                       if (!command)
+                               xhci_stop_watchdog_timer_in_irq(xhci, ep);
 
-               /* Clean up the cancelled URB */
-               /* Doesn't matter what we pass for status, since the core will
-                * just overwrite it (because the URB has been unlinked).
-                */
-               ep_ring = xhci_urb_to_transfer_ring(xhci, cur_td->urb);
-               xhci_unmap_td_bounce_buffer(xhci, ep_ring, cur_td);
-               inc_td_cnt(cur_td->urb);
-               if (last_td_in_urb(cur_td))
-                       xhci_giveback_urb_in_irq(xhci, cur_td, 0);
+                       mod_timer(&ep->stop_cmd_timer,
+                                 jiffies + XHCI_STOP_EP_CMD_TIMEOUT * HZ);
+                       xhci_queue_stop_endpoint(xhci, command, slot_id, ep_index, 0);
+                       xhci_ring_cmd_db(xhci);
 
-               /* Stop processing the cancelled list if the watchdog timer is
-                * running.
-                */
-               if (xhci->xhc_state & XHCI_STATE_DYING)
                        return;
-       } while (cur_td != last_unlinked_td);
+               default:
+                       break;
+               }
+       }
+       /* will queue a set TR deq if stopped on a cancelled, uncleared TD */
+       xhci_invalidate_cancelled_tds(ep);
+       xhci_stop_watchdog_timer_in_irq(xhci, ep);
 
-       /* Return to the event handler with xhci->lock re-acquired */
+       /* Otherwise ring the doorbell(s) to restart queued transfers */
+       xhci_giveback_invalidated_tds(ep);
+       ring_doorbell_for_active_rings(xhci, slot_id, ep_index);
 }
 
 static void xhci_kill_ring_urbs(struct xhci_hcd *xhci, struct xhci_ring *ring)
@@ -1069,17 +1297,18 @@ static void xhci_handle_cmd_set_deq(struct xhci_hcd *xhci, int slot_id,
        unsigned int ep_index;
        unsigned int stream_id;
        struct xhci_ring *ep_ring;
-       struct xhci_virt_device *dev;
        struct xhci_virt_ep *ep;
        struct xhci_ep_ctx *ep_ctx;
        struct xhci_slot_ctx *slot_ctx;
+       struct xhci_td *td, *tmp_td;
 
        ep_index = TRB_TO_EP_INDEX(le32_to_cpu(trb->generic.field[3]));
        stream_id = TRB_TO_STREAM_ID(le32_to_cpu(trb->generic.field[2]));
-       dev = xhci->devs[slot_id];
-       ep = &dev->eps[ep_index];
+       ep = xhci_get_virt_ep(xhci, slot_id, ep_index);
+       if (!ep)
+               return;
 
-       ep_ring = xhci_stream_id_to_ring(dev, ep_index, stream_id);
+       ep_ring = xhci_virt_ep_to_ring(xhci, ep, stream_id);
        if (!ep_ring) {
                xhci_warn(xhci, "WARN Set TR deq ptr command for freed stream ID %u\n",
                                stream_id);
@@ -1087,8 +1316,8 @@ static void xhci_handle_cmd_set_deq(struct xhci_hcd *xhci, int slot_id,
                goto cleanup;
        }
 
-       ep_ctx = xhci_get_ep_ctx(xhci, dev->out_ctx, ep_index);
-       slot_ctx = xhci_get_slot_ctx(xhci, dev->out_ctx);
+       ep_ctx = xhci_get_ep_ctx(xhci, ep->vdev->out_ctx, ep_index);
+       slot_ctx = xhci_get_slot_ctx(xhci, ep->vdev->out_ctx);
        trace_xhci_handle_cmd_set_deq(slot_ctx);
        trace_xhci_handle_cmd_set_deq_ep(ep_ctx);
 
@@ -1141,7 +1370,7 @@ static void xhci_handle_cmd_set_deq(struct xhci_hcd *xhci, int slot_id,
                        /* Update the ring's dequeue segment and dequeue pointer
                         * to reflect the new position.
                         */
-                       update_ring_for_set_deq_completion(xhci, dev,
+                       update_ring_for_set_deq_completion(xhci, ep->vdev,
                                ep_ring, ep_index);
                } else {
                        xhci_warn(xhci, "Mismatch between completed Set TR Deq Ptr command & xHCI internal state.\n");
@@ -1149,11 +1378,19 @@ static void xhci_handle_cmd_set_deq(struct xhci_hcd *xhci, int slot_id,
                                  ep->queued_deq_seg, ep->queued_deq_ptr);
                }
        }
-
+       /* HW cached TDs cleared from cache, give them back */
+       list_for_each_entry_safe(td, tmp_td, &ep->cancelled_td_list,
+                                cancelled_td_list) {
+               ep_ring = xhci_urb_to_transfer_ring(ep->xhci, td->urb);
+               if (td->cancel_status == TD_CLEARING_CACHE) {
+                       td->cancel_status = TD_CLEARED;
+                       xhci_td_cleanup(ep->xhci, td, ep_ring, td->status);
+               }
+       }
 cleanup:
-       dev->eps[ep_index].ep_state &= ~SET_DEQ_PENDING;
-       dev->eps[ep_index].queued_deq_seg = NULL;
-       dev->eps[ep_index].queued_deq_ptr = NULL;
+       ep->ep_state &= ~SET_DEQ_PENDING;
+       ep->queued_deq_seg = NULL;
+       ep->queued_deq_ptr = NULL;
        /* Restart any rings with pending URBs */
        ring_doorbell_for_active_rings(xhci, slot_id, ep_index);
 }
@@ -1161,13 +1398,16 @@ cleanup:
 static void xhci_handle_cmd_reset_ep(struct xhci_hcd *xhci, int slot_id,
                union xhci_trb *trb, u32 cmd_comp_code)
 {
-       struct xhci_virt_device *vdev;
+       struct xhci_virt_ep *ep;
        struct xhci_ep_ctx *ep_ctx;
        unsigned int ep_index;
 
        ep_index = TRB_TO_EP_INDEX(le32_to_cpu(trb->generic.field[3]));
-       vdev = xhci->devs[slot_id];
-       ep_ctx = xhci_get_ep_ctx(xhci, vdev->out_ctx, ep_index);
+       ep = xhci_get_virt_ep(xhci, slot_id, ep_index);
+       if (!ep)
+               return;
+
+       ep_ctx = xhci_get_ep_ctx(xhci, ep->vdev->out_ctx, ep_index);
        trace_xhci_handle_cmd_reset_ep(ep_ctx);
 
        /* This command will only fail if the endpoint wasn't halted,
@@ -1176,27 +1416,15 @@ static void xhci_handle_cmd_reset_ep(struct xhci_hcd *xhci, int slot_id,
        xhci_dbg_trace(xhci, trace_xhci_dbg_reset_ep,
                "Ignoring reset ep completion code of %u", cmd_comp_code);
 
-       /* HW with the reset endpoint quirk needs to have a configure endpoint
-        * command complete before the endpoint can be used.  Queue that here
-        * because the HW can't handle two commands being queued in a row.
-        */
-       if (xhci->quirks & XHCI_RESET_EP_QUIRK) {
-               struct xhci_command *command;
+       /* Cleanup cancelled TDs as ep is stopped. May queue a Set TR Deq cmd */
+       xhci_invalidate_cancelled_tds(ep);
 
-               command = xhci_alloc_command(xhci, false, GFP_ATOMIC);
-               if (!command)
-                       return;
+       if (xhci->quirks & XHCI_RESET_EP_QUIRK)
+               xhci_dbg(xhci, "Note: Removed workaround to queue config ep for this hw");
+       /* Clear our internal halted state */
+       ep->ep_state &= ~EP_HALTED;
 
-               xhci_dbg_trace(xhci, trace_xhci_dbg_quirks,
-                               "Queueing configure endpoint command");
-               xhci_queue_configure_endpoint(xhci, command,
-                               xhci->devs[slot_id]->in_ctx->dma, slot_id,
-                               false);
-               xhci_ring_cmd_db(xhci);
-       } else {
-               /* Clear our internal halted state */
-               xhci->devs[slot_id]->eps[ep_index].ep_state &= ~EP_HALTED;
-       }
+       xhci_giveback_invalidated_tds(ep);
 
        /* if this was a soft reset, then restart */
        if ((le32_to_cpu(trb->generic.field[3])) & TRB_TSP)
@@ -1231,7 +1459,7 @@ static void xhci_handle_cmd_disable_slot(struct xhci_hcd *xhci, int slot_id)
 }
 
 static void xhci_handle_cmd_config_ep(struct xhci_hcd *xhci, int slot_id,
-               struct xhci_event_cmd *event, u32 cmd_comp_code)
+               u32 cmd_comp_code)
 {
        struct xhci_virt_device *virt_dev;
        struct xhci_input_control_ctx *ctrl_ctx;
@@ -1249,6 +1477,8 @@ static void xhci_handle_cmd_config_ep(struct xhci_hcd *xhci, int slot_id,
         * is not waiting on the configure endpoint command.
         */
        virt_dev = xhci->devs[slot_id];
+       if (!virt_dev)
+               return;
        ctrl_ctx = xhci_get_input_control_ctx(virt_dev->in_ctx);
        if (!ctrl_ctx) {
                xhci_warn(xhci, "Could not get input context, bad type.\n");
@@ -1293,24 +1523,27 @@ static void xhci_handle_cmd_addr_dev(struct xhci_hcd *xhci, int slot_id)
        struct xhci_slot_ctx *slot_ctx;
 
        vdev = xhci->devs[slot_id];
+       if (!vdev)
+               return;
        slot_ctx = xhci_get_slot_ctx(xhci, vdev->out_ctx);
        trace_xhci_handle_cmd_addr_dev(slot_ctx);
 }
 
-static void xhci_handle_cmd_reset_dev(struct xhci_hcd *xhci, int slot_id,
-               struct xhci_event_cmd *event)
+static void xhci_handle_cmd_reset_dev(struct xhci_hcd *xhci, int slot_id)
 {
        struct xhci_virt_device *vdev;
        struct xhci_slot_ctx *slot_ctx;
 
        vdev = xhci->devs[slot_id];
+       if (!vdev) {
+               xhci_warn(xhci, "Reset device command completion for disabled slot %u\n",
+                         slot_id);
+               return;
+       }
        slot_ctx = xhci_get_slot_ctx(xhci, vdev->out_ctx);
        trace_xhci_handle_cmd_reset_dev(slot_ctx);
 
        xhci_dbg(xhci, "Completed reset device command.\n");
-       if (!xhci->devs[slot_id])
-               xhci_warn(xhci, "Reset device command completion "
-                               "for disabled slot %u\n", slot_id);
 }
 
 static void xhci_handle_cmd_nec_get_fw(struct xhci_hcd *xhci,
@@ -1403,7 +1636,7 @@ time_out_completed:
 static void handle_cmd_completion(struct xhci_hcd *xhci,
                struct xhci_event_cmd *event)
 {
-       int slot_id = TRB_TO_SLOT_ID(le32_to_cpu(event->flags));
+       unsigned int slot_id = TRB_TO_SLOT_ID(le32_to_cpu(event->flags));
        u64 cmd_dma;
        dma_addr_t cmd_dequeue_dma;
        u32 cmd_comp_code;
@@ -1411,6 +1644,11 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,
        struct xhci_command *cmd;
        u32 cmd_type;
 
+       if (slot_id >= MAX_HC_SLOTS) {
+               xhci_warn(xhci, "Invalid slot_id %u\n", slot_id);
+               return;
+       }
+
        cmd_dma = le64_to_cpu(event->cmd_trb);
        cmd_trb = xhci->cmd_ring->dequeue;
 
@@ -1471,8 +1709,7 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,
                break;
        case TRB_CONFIG_EP:
                if (!cmd->completion)
-                       xhci_handle_cmd_config_ep(xhci, slot_id, event,
-                                                 cmd_comp_code);
+                       xhci_handle_cmd_config_ep(xhci, slot_id, cmd_comp_code);
                break;
        case TRB_EVAL_CONTEXT:
                break;
@@ -1483,7 +1720,8 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,
                WARN_ON(slot_id != TRB_TO_SLOT_ID(
                                le32_to_cpu(cmd_trb->generic.field[3])));
                if (!cmd->completion)
-                       xhci_handle_cmd_stop_ep(xhci, slot_id, cmd_trb, event);
+                       xhci_handle_cmd_stop_ep(xhci, slot_id, cmd_trb,
+                                               cmd_comp_code);
                break;
        case TRB_SET_DEQ:
                WARN_ON(slot_id != TRB_TO_SLOT_ID(
@@ -1506,7 +1744,7 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,
                 */
                slot_id = TRB_TO_SLOT_ID(
                                le32_to_cpu(cmd_trb->generic.field[3]));
-               xhci_handle_cmd_reset_dev(xhci, slot_id, event);
+               xhci_handle_cmd_reset_dev(xhci, slot_id);
                break;
        case TRB_NEC_GET_FW:
                xhci_handle_cmd_nec_get_fw(xhci, event);
@@ -1533,11 +1771,8 @@ event_handled:
 }
 
 static void handle_vendor_event(struct xhci_hcd *xhci,
-               union xhci_trb *event)
+                               union xhci_trb *event, u32 trb_type)
 {
-       u32 trb_type;
-
-       trb_type = TRB_FIELD_TO_TYPE(le32_to_cpu(event->generic.field[3]));
        xhci_dbg(xhci, "Vendor specific event TRB type = %u\n", trb_type);
        if (trb_type == TRB_NEC_CMD_COMP && (xhci->quirks & XHCI_NEC_HOST))
                handle_cmd_completion(xhci, &event->event_cmd);
@@ -1854,37 +2089,6 @@ static void xhci_clear_hub_tt_buffer(struct xhci_hcd *xhci, struct xhci_td *td,
        }
 }
 
-static void xhci_cleanup_halted_endpoint(struct xhci_hcd *xhci,
-               unsigned int slot_id, unsigned int ep_index,
-               unsigned int stream_id, struct xhci_td *td,
-               enum xhci_ep_reset_type reset_type)
-{
-       struct xhci_virt_ep *ep = &xhci->devs[slot_id]->eps[ep_index];
-       struct xhci_command *command;
-
-       /*
-        * Avoid resetting endpoint if link is inactive. Can cause host hang.
-        * Device will be reset soon to recover the link so don't do anything
-        */
-       if (xhci->devs[slot_id]->flags & VDEV_PORT_ERROR)
-               return;
-
-       command = xhci_alloc_command(xhci, false, GFP_ATOMIC);
-       if (!command)
-               return;
-
-       ep->ep_state |= EP_HALTED;
-
-       xhci_queue_reset_ep(xhci, command, slot_id, ep_index, reset_type);
-
-       if (reset_type == EP_HARD_RESET) {
-               ep->ep_state |= EP_HARD_CLEAR_TOGGLE;
-               xhci_cleanup_stalled_ring(xhci, slot_id, ep_index, stream_id,
-                                         td);
-       }
-       xhci_ring_cmd_db(xhci);
-}
-
 /* Check if an error has halted the endpoint ring.  The class driver will
  * cleanup the halt for a non-default control endpoint if we indicate a stall.
  * However, a babble and other errors also halt the endpoint ring, and the class
@@ -1925,82 +2129,63 @@ int xhci_is_vendor_info_code(struct xhci_hcd *xhci, unsigned int trb_comp_code)
        return 0;
 }
 
-static int xhci_td_cleanup(struct xhci_hcd *xhci, struct xhci_td *td,
-               struct xhci_ring *ep_ring, int *status)
-{
-       struct urb *urb = NULL;
-
-       /* Clean up the endpoint's TD list */
-       urb = td->urb;
-
-       /* if a bounce buffer was used to align this td then unmap it */
-       xhci_unmap_td_bounce_buffer(xhci, ep_ring, td);
-
-       /* Do one last check of the actual transfer length.
-        * If the host controller said we transferred more data than the buffer
-        * length, urb->actual_length will be a very big number (since it's
-        * unsigned).  Play it safe and say we didn't transfer anything.
-        */
-       if (urb->actual_length > urb->transfer_buffer_length) {
-               xhci_warn(xhci, "URB req %u and actual %u transfer length mismatch\n",
-                         urb->transfer_buffer_length, urb->actual_length);
-               urb->actual_length = 0;
-               *status = 0;
-       }
-       list_del_init(&td->td_list);
-       /* Was this TD slated to be cancelled but completed anyway? */
-       if (!list_empty(&td->cancelled_td_list))
-               list_del_init(&td->cancelled_td_list);
-
-       inc_td_cnt(urb);
-       /* Giveback the urb when all the tds are completed */
-       if (last_td_in_urb(td)) {
-               if ((urb->actual_length != urb->transfer_buffer_length &&
-                    (urb->transfer_flags & URB_SHORT_NOT_OK)) ||
-                   (*status != 0 && !usb_endpoint_xfer_isoc(&urb->ep->desc)))
-                       xhci_dbg(xhci, "Giveback URB %p, len = %d, expected = %d, status = %d\n",
-                                urb, urb->actual_length,
-                                urb->transfer_buffer_length, *status);
-
-               /* set isoc urb status to 0 just as EHCI, UHCI, and OHCI */
-               if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS)
-                       *status = 0;
-               xhci_giveback_urb_in_irq(xhci, td, *status);
-       }
-
-       return 0;
-}
-
 static int finish_td(struct xhci_hcd *xhci, struct xhci_td *td,
-       struct xhci_transfer_event *event,
-       struct xhci_virt_ep *ep, int *status)
+       struct xhci_transfer_event *event, struct xhci_virt_ep *ep)
 {
-       struct xhci_virt_device *xdev;
        struct xhci_ep_ctx *ep_ctx;
        struct xhci_ring *ep_ring;
-       unsigned int slot_id;
        u32 trb_comp_code;
-       int ep_index;
 
-       slot_id = TRB_TO_SLOT_ID(le32_to_cpu(event->flags));
-       xdev = xhci->devs[slot_id];
-       ep_index = TRB_TO_EP_ID(le32_to_cpu(event->flags)) - 1;
        ep_ring = xhci_dma_to_transfer_ring(ep, le64_to_cpu(event->buffer));
-       ep_ctx = xhci_get_ep_ctx(xhci, xdev->out_ctx, ep_index);
+       ep_ctx = xhci_get_ep_ctx(xhci, ep->vdev->out_ctx, ep->ep_index);
        trb_comp_code = GET_COMP_CODE(le32_to_cpu(event->transfer_len));
 
-       if (trb_comp_code == COMP_STOPPED_LENGTH_INVALID ||
-                       trb_comp_code == COMP_STOPPED ||
-                       trb_comp_code == COMP_STOPPED_SHORT_PACKET) {
-               /* The Endpoint Stop Command completion will take care of any
-                * stopped TDs.  A stopped TD may be restarted, so don't update
+       switch (trb_comp_code) {
+       case COMP_STOPPED_LENGTH_INVALID:
+       case COMP_STOPPED_SHORT_PACKET:
+       case COMP_STOPPED:
+               /*
+                * The "Stop Endpoint" completion will take care of any
+                * stopped TDs. A stopped TD may be restarted, so don't update
                 * the ring dequeue pointer or take this TD off any lists yet.
                 */
                return 0;
-       }
-       if (trb_comp_code == COMP_STALL_ERROR ||
-               xhci_requires_manual_halt_cleanup(xhci, ep_ctx,
-                                               trb_comp_code)) {
+       case COMP_USB_TRANSACTION_ERROR:
+       case COMP_BABBLE_DETECTED_ERROR:
+       case COMP_SPLIT_TRANSACTION_ERROR:
+               /*
+                * If endpoint context state is not halted we might be
+                * racing with a reset endpoint command issued by a unsuccessful
+                * stop endpoint completion (context error). In that case the
+                * td should be on the cancelled list, and EP_HALTED flag set.
+                *
+                * Or then it's not halted due to the 0.95 spec stating that a
+                * babbling control endpoint should not halt. The 0.96 spec
+                * again says it should.  Some HW claims to be 0.95 compliant,
+                * but it halts the control endpoint anyway.
+                */
+               if (GET_EP_CTX_STATE(ep_ctx) != EP_STATE_HALTED) {
+                       /*
+                        * If EP_HALTED is set and TD is on the cancelled list
+                        * the TD and dequeue pointer will be handled by reset
+                        * ep command completion
+                        */
+                       if ((ep->ep_state & EP_HALTED) &&
+                           !list_empty(&td->cancelled_td_list)) {
+                               xhci_dbg(xhci, "Already resolving halted ep for 0x%llx\n",
+                                        (unsigned long long)xhci_trb_virt_to_dma(
+                                                td->start_seg, td->first_trb));
+                               return 0;
+                       }
+                       /* endpoint not halted, don't reset it */
+                       break;
+               }
+               /* Almost same procedure as for STALL_ERROR below */
+               xhci_clear_hub_tt_buffer(xhci, td, ep);
+               xhci_handle_halted_endpoint(xhci, ep, ep_ring->stream_id, td,
+                                           EP_HARD_RESET);
+               return 0;
+       case COMP_STALL_ERROR:
                /*
                 * xhci internal endpoint state will go to a "halt" state for
                 * any stall, including default control pipe protocol stall.
@@ -2011,18 +2196,24 @@ static int finish_td(struct xhci_hcd *xhci, struct xhci_td *td,
                 * stall later. Hub TT buffer should only be cleared for FS/LS
                 * devices behind HS hubs for functional stalls.
                 */
-               if ((ep_index != 0) || (trb_comp_code != COMP_STALL_ERROR))
+               if (ep->ep_index != 0)
                        xhci_clear_hub_tt_buffer(xhci, td, ep);
-               xhci_cleanup_halted_endpoint(xhci, slot_id, ep_index,
-                                       ep_ring->stream_id, td, EP_HARD_RESET);
-       } else {
-               /* Update ring dequeue pointer */
-               while (ep_ring->dequeue != td->last_trb)
-                       inc_deq(xhci, ep_ring);
-               inc_deq(xhci, ep_ring);
+
+               xhci_handle_halted_endpoint(xhci, ep, ep_ring->stream_id, td,
+                                           EP_HARD_RESET);
+
+               return 0; /* xhci_handle_halted_endpoint marked td cancelled */
+       default:
+               break;
        }
 
-       return xhci_td_cleanup(xhci, td, ep_ring, status);
+       /* Update ring dequeue pointer */
+       ep_ring->dequeue = td->last_trb;
+       ep_ring->deq_seg = td->last_trb_seg;
+       ep_ring->num_trbs_free += td->num_trbs - 1;
+       inc_deq(xhci, ep_ring);
+
+       return xhci_td_cleanup(xhci, td, ep_ring, td->status);
 }
 
 /* sum trb lengths from ring dequeue up to stop_trb, _excluding_ stop_trb */
@@ -2045,21 +2236,15 @@ static int sum_trb_lengths(struct xhci_hcd *xhci, struct xhci_ring *ring,
  */
 static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td,
        union xhci_trb *ep_trb, struct xhci_transfer_event *event,
-       struct xhci_virt_ep *ep, int *status)
+       struct xhci_virt_ep *ep)
 {
-       struct xhci_virt_device *xdev;
-       unsigned int slot_id;
-       int ep_index;
        struct xhci_ep_ctx *ep_ctx;
        u32 trb_comp_code;
        u32 remaining, requested;
        u32 trb_type;
 
        trb_type = TRB_FIELD_TO_TYPE(le32_to_cpu(ep_trb->generic.field[3]));
-       slot_id = TRB_TO_SLOT_ID(le32_to_cpu(event->flags));
-       xdev = xhci->devs[slot_id];
-       ep_index = TRB_TO_EP_ID(le32_to_cpu(event->flags)) - 1;
-       ep_ctx = xhci_get_ep_ctx(xhci, xdev->out_ctx, ep_index);
+       ep_ctx = xhci_get_ep_ctx(xhci, ep->vdev->out_ctx, ep->ep_index);
        trb_comp_code = GET_COMP_CODE(le32_to_cpu(event->transfer_len));
        requested = td->urb->transfer_buffer_length;
        remaining = EVENT_TRB_LEN(le32_to_cpu(event->transfer_len));
@@ -2069,13 +2254,13 @@ static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td,
                if (trb_type != TRB_STATUS) {
                        xhci_warn(xhci, "WARN: Success on ctrl %s TRB without IOC set?\n",
                                  (trb_type == TRB_DATA) ? "data" : "setup");
-                       *status = -ESHUTDOWN;
+                       td->status = -ESHUTDOWN;
                        break;
                }
-               *status = 0;
+               td->status = 0;
                break;
        case COMP_SHORT_PACKET:
-               *status = 0;
+               td->status = 0;
                break;
        case COMP_STOPPED_SHORT_PACKET:
                if (trb_type == TRB_DATA || trb_type == TRB_NORMAL)
@@ -2107,7 +2292,7 @@ static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td,
                                                       ep_ctx, trb_comp_code))
                        break;
                xhci_dbg(xhci, "TRB error %u, halted endpoint index = %u\n",
-                        trb_comp_code, ep_index);
+                        trb_comp_code, ep->ep_index);
                fallthrough;
        case COMP_STALL_ERROR:
                /* Did we transfer part of the data (middle) phase? */
@@ -2139,7 +2324,7 @@ static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td,
                td->urb->actual_length = requested;
 
 finish_td:
-       return finish_td(xhci, td, event, ep, status);
+       return finish_td(xhci, td, event, ep);
 }
 
 /*
@@ -2147,9 +2332,8 @@ finish_td:
  */
 static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td,
        union xhci_trb *ep_trb, struct xhci_transfer_event *event,
-       struct xhci_virt_ep *ep, int *status)
+       struct xhci_virt_ep *ep)
 {
-       struct xhci_ring *ep_ring;
        struct urb_priv *urb_priv;
        int idx;
        struct usb_iso_packet_descriptor *frame;
@@ -2158,7 +2342,6 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td,
        u32 remaining, requested, ep_trb_len;
        int short_framestatus;
 
-       ep_ring = xhci_dma_to_transfer_ring(ep, le64_to_cpu(event->buffer));
        trb_comp_code = GET_COMP_CODE(le32_to_cpu(event->transfer_len));
        urb_priv = td->urb->hcpriv;
        idx = urb_priv->num_tds_done;
@@ -2219,26 +2402,23 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td,
        }
 
        if (sum_trbs_for_length)
-               frame->actual_length = sum_trb_lengths(xhci, ep_ring, ep_trb) +
+               frame->actual_length = sum_trb_lengths(xhci, ep->ring, ep_trb) +
                        ep_trb_len - remaining;
        else
                frame->actual_length = requested;
 
        td->urb->actual_length += frame->actual_length;
 
-       return finish_td(xhci, td, event, ep, status);
+       return finish_td(xhci, td, event, ep);
 }
 
 static int skip_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td,
-                       struct xhci_transfer_event *event,
-                       struct xhci_virt_ep *ep, int *status)
+                       struct xhci_virt_ep *ep, int status)
 {
-       struct xhci_ring *ep_ring;
        struct urb_priv *urb_priv;
        struct usb_iso_packet_descriptor *frame;
        int idx;
 
-       ep_ring = xhci_dma_to_transfer_ring(ep, le64_to_cpu(event->buffer));
        urb_priv = td->urb->hcpriv;
        idx = urb_priv->num_tds_done;
        frame = &td->urb->iso_frame_desc[idx];
@@ -2250,11 +2430,12 @@ static int skip_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td,
        frame->actual_length = 0;
 
        /* Update ring dequeue pointer */
-       while (ep_ring->dequeue != td->last_trb)
-               inc_deq(xhci, ep_ring);
-       inc_deq(xhci, ep_ring);
+       ep->ring->dequeue = td->last_trb;
+       ep->ring->deq_seg = td->last_trb_seg;
+       ep->ring->num_trbs_free += td->num_trbs - 1;
+       inc_deq(xhci, ep->ring);
 
-       return xhci_td_cleanup(xhci, td, ep_ring, status);
+       return xhci_td_cleanup(xhci, td, ep->ring, status);
 }
 
 /*
@@ -2262,18 +2443,14 @@ static int skip_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td,
  */
 static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td,
        union xhci_trb *ep_trb, struct xhci_transfer_event *event,
-       struct xhci_virt_ep *ep, int *status)
+       struct xhci_virt_ep *ep)
 {
        struct xhci_slot_ctx *slot_ctx;
        struct xhci_ring *ep_ring;
        u32 trb_comp_code;
        u32 remaining, requested, ep_trb_len;
-       unsigned int slot_id;
-       int ep_index;
 
-       slot_id = TRB_TO_SLOT_ID(le32_to_cpu(event->flags));
-       slot_ctx = xhci_get_slot_ctx(xhci, xhci->devs[slot_id]->out_ctx);
-       ep_index = TRB_TO_EP_ID(le32_to_cpu(event->flags)) - 1;
+       slot_ctx = xhci_get_slot_ctx(xhci, ep->vdev->out_ctx);
        ep_ring = xhci_dma_to_transfer_ring(ep, le64_to_cpu(event->buffer));
        trb_comp_code = GET_COMP_CODE(le32_to_cpu(event->transfer_len));
        remaining = EVENT_TRB_LEN(le32_to_cpu(event->transfer_len));
@@ -2290,13 +2467,13 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td,
                                 td->urb->ep->desc.bEndpointAddress,
                                 requested, remaining);
                }
-               *status = 0;
+               td->status = 0;
                break;
        case COMP_SHORT_PACKET:
                xhci_dbg(xhci, "ep %#x - asked for %d bytes, %d bytes untransferred\n",
                         td->urb->ep->desc.bEndpointAddress,
                         requested, remaining);
-               *status = 0;
+               td->status = 0;
                break;
        case COMP_STOPPED_SHORT_PACKET:
                td->urb->actual_length = remaining;
@@ -2310,9 +2487,11 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td,
                if ((ep_ring->err_count++ > MAX_SOFT_RETRY) ||
                    le32_to_cpu(slot_ctx->tt_info) & TT_SLOT)
                        break;
-               *status = 0;
-               xhci_cleanup_halted_endpoint(xhci, slot_id, ep_index,
-                                       ep_ring->stream_id, td, EP_SOFT_RESET);
+
+               td->status = 0;
+
+               xhci_handle_halted_endpoint(xhci, ep, ep_ring->stream_id, td,
+                                           EP_SOFT_RESET);
                return 0;
        default:
                /* do nothing */
@@ -2331,7 +2510,7 @@ finish_td:
                          remaining);
                td->urb->actual_length = 0;
        }
-       return finish_td(xhci, td, event, ep, status);
+       return finish_td(xhci, td, event, ep);
 }
 
 /*
@@ -2342,7 +2521,6 @@ finish_td:
 static int handle_tx_event(struct xhci_hcd *xhci,
                struct xhci_transfer_event *event)
 {
-       struct xhci_virt_device *xdev;
        struct xhci_virt_ep *ep;
        struct xhci_ring *ep_ring;
        unsigned int slot_id;
@@ -2363,16 +2541,14 @@ static int handle_tx_event(struct xhci_hcd *xhci,
        trb_comp_code = GET_COMP_CODE(le32_to_cpu(event->transfer_len));
        ep_trb_dma = le64_to_cpu(event->buffer);
 
-       xdev = xhci->devs[slot_id];
-       if (!xdev) {
-               xhci_err(xhci, "ERROR Transfer event pointed to bad slot %u\n",
-                        slot_id);
+       ep = xhci_get_virt_ep(xhci, slot_id, ep_index);
+       if (!ep) {
+               xhci_err(xhci, "ERROR Invalid Transfer event\n");
                goto err_out;
        }
 
-       ep = &xdev->eps[ep_index];
        ep_ring = xhci_dma_to_transfer_ring(ep, ep_trb_dma);
-       ep_ctx = xhci_get_ep_ctx(xhci, xdev->out_ctx, ep_index);
+       ep_ctx = xhci_get_ep_ctx(xhci, ep->vdev->out_ctx, ep_index);
 
        if (GET_EP_CTX_STATE(ep_ctx) == EP_STATE_DISABLED) {
                xhci_err(xhci,
@@ -2388,8 +2564,8 @@ static int handle_tx_event(struct xhci_hcd *xhci,
                case COMP_USB_TRANSACTION_ERROR:
                case COMP_INVALID_STREAM_TYPE_ERROR:
                case COMP_INVALID_STREAM_ID_ERROR:
-                       xhci_cleanup_halted_endpoint(xhci, slot_id, ep_index, 0,
-                                                    NULL, EP_SOFT_RESET);
+                       xhci_handle_halted_endpoint(xhci, ep, 0, NULL,
+                                                   EP_SOFT_RESET);
                        goto cleanup;
                case COMP_RING_UNDERRUN:
                case COMP_RING_OVERRUN:
@@ -2445,7 +2621,6 @@ static int handle_tx_event(struct xhci_hcd *xhci,
        case COMP_STALL_ERROR:
                xhci_dbg(xhci, "Stalled endpoint for slot %u ep %u\n", slot_id,
                         ep_index);
-               ep->ep_state |= EP_HALTED;
                status = -EPIPE;
                break;
        case COMP_SPLIT_TRANSACTION_ERROR:
@@ -2573,11 +2748,10 @@ static int handle_tx_event(struct xhci_hcd *xhci,
                        if (trb_comp_code == COMP_STALL_ERROR ||
                            xhci_requires_manual_halt_cleanup(xhci, ep_ctx,
                                                              trb_comp_code)) {
-                               xhci_cleanup_halted_endpoint(xhci, slot_id,
-                                                            ep_index,
-                                                            ep_ring->stream_id,
-                                                            NULL,
-                                                            EP_HARD_RESET);
+                               xhci_handle_halted_endpoint(xhci, ep,
+                                                           ep_ring->stream_id,
+                                                           NULL,
+                                                           EP_HARD_RESET);
                        }
                        goto cleanup;
                }
@@ -2636,7 +2810,7 @@ static int handle_tx_event(struct xhci_hcd *xhci,
                                return -ESHUTDOWN;
                        }
 
-                       skip_isoc_td(xhci, td, event, ep, &status);
+                       skip_isoc_td(xhci, td, ep, status);
                        goto cleanup;
                }
                if (trb_comp_code == COMP_SHORT_PACKET)
@@ -2664,25 +2838,26 @@ static int handle_tx_event(struct xhci_hcd *xhci,
                 * endpoint. Otherwise, the endpoint remains stalled
                 * indefinitely.
                 */
+
                if (trb_is_noop(ep_trb)) {
                        if (trb_comp_code == COMP_STALL_ERROR ||
                            xhci_requires_manual_halt_cleanup(xhci, ep_ctx,
                                                              trb_comp_code))
-                               xhci_cleanup_halted_endpoint(xhci, slot_id,
-                                                            ep_index,
-                                                            ep_ring->stream_id,
-                                                            td, EP_HARD_RESET);
+                               xhci_handle_halted_endpoint(xhci, ep,
+                                                           ep_ring->stream_id,
+                                                           td, EP_HARD_RESET);
                        goto cleanup;
                }
 
+               td->status = status;
+
                /* update the urb's actual_length and give back to the core */
                if (usb_endpoint_xfer_control(&td->urb->ep->desc))
-                       process_ctrl_td(xhci, td, ep_trb, event, ep, &status);
+                       process_ctrl_td(xhci, td, ep_trb, event, ep);
                else if (usb_endpoint_xfer_isoc(&td->urb->ep->desc))
-                       process_isoc_td(xhci, td, ep_trb, event, ep, &status);
+                       process_isoc_td(xhci, td, ep_trb, event, ep);
                else
-                       process_bulk_intr_td(xhci, td, ep_trb, event, ep,
-                                            &status);
+                       process_bulk_intr_td(xhci, td, ep_trb, event, ep);
 cleanup:
                handling_skipped_tds = ep->skip &&
                        trb_comp_code != COMP_MISSED_SERVICE_ERROR &&
@@ -2727,6 +2902,7 @@ static int xhci_handle_event(struct xhci_hcd *xhci)
 {
        union xhci_trb *event;
        int update_ptrs = 1;
+       u32 trb_type;
        int ret;
 
        /* Event ring hasn't been allocated yet. */
@@ -2748,31 +2924,30 @@ static int xhci_handle_event(struct xhci_hcd *xhci)
         * speculative reads of the event's flags/data below.
         */
        rmb();
+       trb_type = TRB_FIELD_TO_TYPE(le32_to_cpu(event->event_cmd.flags));
        /* FIXME: Handle more event types. */
-       switch (le32_to_cpu(event->event_cmd.flags) & TRB_TYPE_BITMASK) {
-       case TRB_TYPE(TRB_COMPLETION):
+
+       switch (trb_type) {
+       case TRB_COMPLETION:
                handle_cmd_completion(xhci, &event->event_cmd);
                break;
-       case TRB_TYPE(TRB_PORT_STATUS):
+       case TRB_PORT_STATUS:
                handle_port_status(xhci, event);
                update_ptrs = 0;
                break;
-       case TRB_TYPE(TRB_TRANSFER):
+       case TRB_TRANSFER:
                ret = handle_tx_event(xhci, &event->trans_event);
                if (ret >= 0)
                        update_ptrs = 0;
                break;
-       case TRB_TYPE(TRB_DEV_NOTE):
+       case TRB_DEV_NOTE:
                handle_device_notification(xhci, event);
                break;
        default:
-               if ((le32_to_cpu(event->event_cmd.flags) & TRB_TYPE_BITMASK) >=
-                   TRB_TYPE(48))
-                       handle_vendor_event(xhci, event);
+               if (trb_type >= TRB_VENDOR_DEFINED_LOW)
+                       handle_vendor_event(xhci, event, trb_type);
                else
-                       xhci_warn(xhci, "ERROR unknown event type %d\n",
-                                 TRB_FIELD_TO_TYPE(
-                                 le32_to_cpu(event->event_cmd.flags)));
+                       xhci_warn(xhci, "ERROR unknown event type %d\n", trb_type);
        }
        /* Any of the above functions may drop and re-acquire the lock, so check
         * to make sure a watchdog timer didn't mark the host as non-responsive.
@@ -2953,6 +3128,7 @@ static int prepare_ring(struct xhci_hcd *xhci, struct xhci_ring *ep_ring,
                u32 ep_state, unsigned int num_trbs, gfp_t mem_flags)
 {
        unsigned int num_trbs_needed;
+       unsigned int link_trb_count = 0;
 
        /* Make sure the endpoint has been added to xHC schedule */
        switch (ep_state) {
@@ -3024,7 +3200,19 @@ static int prepare_ring(struct xhci_hcd *xhci, struct xhci_ring *ep_ring,
 
                ep_ring->enq_seg = ep_ring->enq_seg->next;
                ep_ring->enqueue = ep_ring->enq_seg->trbs;
+
+               /* prevent infinite loop if all first trbs are link trbs */
+               if (link_trb_count++ > ep_ring->num_segs) {
+                       xhci_warn(xhci, "Ring is an endless link TRB loop\n");
+                       return -EINVAL;
+               }
        }
+
+       if (last_trb_on_seg(ep_ring->enq_seg, ep_ring->enqueue)) {
+               xhci_warn(xhci, "Missing link TRB at end of ring segment\n");
+               return -EINVAL;
+       }
+
        return 0;
 }
 
@@ -3043,7 +3231,8 @@ static int prepare_transfer(struct xhci_hcd *xhci,
        struct xhci_ring *ep_ring;
        struct xhci_ep_ctx *ep_ctx = xhci_get_ep_ctx(xhci, xdev->out_ctx, ep_index);
 
-       ep_ring = xhci_stream_id_to_ring(xdev, ep_index, stream_id);
+       ep_ring = xhci_triad_to_transfer_ring(xhci, xdev->slot_id, ep_index,
+                                             stream_id);
        if (!ep_ring) {
                xhci_dbg(xhci, "Can't prepare ring for bad stream ID %u\n",
                                stream_id);
@@ -3412,7 +3601,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
                        field |= TRB_IOC;
                        more_trbs_coming = false;
                        td->last_trb = ring->enqueue;
-
+                       td->last_trb_seg = ring->enq_seg;
                        if (xhci_urb_suitable_for_idt(urb)) {
                                memcpy(&send_addr, urb->transfer_buffer,
                                       trb_buff_len);
@@ -3438,7 +3627,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
                                upper_32_bits(send_addr),
                                length_field,
                                field);
-
+               td->num_trbs++;
                addr += trb_buff_len;
                sent_len = trb_buff_len;
 
@@ -3462,8 +3651,10 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
                                       ep_index, urb->stream_id,
                                       1, urb, 1, mem_flags);
                urb_priv->td[1].last_trb = ring->enqueue;
+               urb_priv->td[1].last_trb_seg = ring->enq_seg;
                field = TRB_TYPE(TRB_NORMAL) | ring->cycle_state | TRB_IOC;
                queue_trb(xhci, ring, 0, 0, 0, TRB_INTR_TARGET(0), field);
+               urb_priv->td[1].num_trbs++;
        }
 
        check_trb_math(urb, enqd_len);
@@ -3514,6 +3705,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 
        urb_priv = urb->hcpriv;
        td = &urb_priv->td[0];
+       td->num_trbs = num_trbs;
 
        /*
         * Don't give the first TRB to the hardware (by toggling the cycle bit)
@@ -3586,6 +3778,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 
        /* Save the DMA address of the last TRB in the TD */
        td->last_trb = ep_ring->enqueue;
+       td->last_trb_seg = ep_ring->enq_seg;
 
        /* Queue status TRB - see Table 7 and sections 4.11.2.2 and 6.4.1.2.3 */
        /* If the device sent data, the status stage is an OUT transfer */
@@ -3830,7 +4023,7 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
                        goto cleanup;
                }
                td = &urb_priv->td[i];
-
+               td->num_trbs = trbs_per_td;
                /* use SIA as default, if frame id is used overwrite it */
                sia_frame_id = TRB_SIA;
                if (!(urb->transfer_flags & URB_ISO_ASAP) &&
@@ -3873,6 +4066,7 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
                        } else {
                                more_trbs_coming = false;
                                td->last_trb = ep_ring->enqueue;
+                               td->last_trb_seg = ep_ring->enq_seg;
                                field |= TRB_IOC;
                                if (trb_block_event_intr(xhci, num_tds, i))
                                        field |= TRB_BEI;
@@ -4156,71 +4350,6 @@ int xhci_queue_stop_endpoint(struct xhci_hcd *xhci, struct xhci_command *cmd,
                        trb_slot_id | trb_ep_index | type | trb_suspend, false);
 }
 
-/* Set Transfer Ring Dequeue Pointer command */
-void xhci_queue_new_dequeue_state(struct xhci_hcd *xhci,
-               unsigned int slot_id, unsigned int ep_index,
-               struct xhci_dequeue_state *deq_state)
-{
-       dma_addr_t addr;
-       u32 trb_slot_id = SLOT_ID_FOR_TRB(slot_id);
-       u32 trb_ep_index = EP_ID_FOR_TRB(ep_index);
-       u32 trb_stream_id = STREAM_ID_FOR_TRB(deq_state->stream_id);
-       u32 trb_sct = 0;
-       u32 type = TRB_TYPE(TRB_SET_DEQ);
-       struct xhci_virt_ep *ep;
-       struct xhci_command *cmd;
-       int ret;
-
-       xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
-               "Set TR Deq Ptr cmd, new deq seg = %p (0x%llx dma), new deq ptr = %p (0x%llx dma), new cycle = %u",
-               deq_state->new_deq_seg,
-               (unsigned long long)deq_state->new_deq_seg->dma,
-               deq_state->new_deq_ptr,
-               (unsigned long long)xhci_trb_virt_to_dma(
-                       deq_state->new_deq_seg, deq_state->new_deq_ptr),
-               deq_state->new_cycle_state);
-
-       addr = xhci_trb_virt_to_dma(deq_state->new_deq_seg,
-                                   deq_state->new_deq_ptr);
-       if (addr == 0) {
-               xhci_warn(xhci, "WARN Cannot submit Set TR Deq Ptr\n");
-               xhci_warn(xhci, "WARN deq seg = %p, deq pt = %p\n",
-                         deq_state->new_deq_seg, deq_state->new_deq_ptr);
-               return;
-       }
-       ep = &xhci->devs[slot_id]->eps[ep_index];
-       if ((ep->ep_state & SET_DEQ_PENDING)) {
-               xhci_warn(xhci, "WARN Cannot submit Set TR Deq Ptr\n");
-               xhci_warn(xhci, "A Set TR Deq Ptr command is pending.\n");
-               return;
-       }
-
-       /* This function gets called from contexts where it cannot sleep */
-       cmd = xhci_alloc_command(xhci, false, GFP_ATOMIC);
-       if (!cmd)
-               return;
-
-       ep->queued_deq_seg = deq_state->new_deq_seg;
-       ep->queued_deq_ptr = deq_state->new_deq_ptr;
-       if (deq_state->stream_id)
-               trb_sct = SCT_FOR_TRB(SCT_PRI_TR);
-       ret = queue_command(xhci, cmd,
-               lower_32_bits(addr) | trb_sct | deq_state->new_cycle_state,
-               upper_32_bits(addr), trb_stream_id,
-               trb_slot_id | trb_ep_index | type, false);
-       if (ret < 0) {
-               xhci_free_command(xhci, cmd);
-               return;
-       }
-
-       /* Stop the TD queueing code from ringing the doorbell until
-        * this command completes.  The HC won't set the dequeue pointer
-        * if the ring is running, and ringing the doorbell starts the
-        * ring running.
-        */
-       ep->ep_state |= SET_DEQ_PENDING;
-}
-
 int xhci_queue_reset_ep(struct xhci_hcd *xhci, struct xhci_command *cmd,
                        int slot_id, unsigned int ep_index,
                        enum xhci_ep_reset_type reset_type)