1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
3 #include <linux/init.h>
4 #include <linux/kernel.h>
5 #include <linux/module.h>
7 #include <uapi/linux/idxd.h>
11 static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu)
13 struct idxd_desc *desc;
14 struct idxd_device *idxd = wq->idxd;
16 desc = wq->descs[idx];
17 memset(desc->hw, 0, sizeof(struct dsa_hw_desc));
18 memset(desc->completion, 0, idxd->data->compl_size);
21 if (device_pasid_enabled(idxd))
22 desc->hw->pasid = idxd->pasid;
27 struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype)
30 struct idxd_device *idxd = wq->idxd;
31 DEFINE_SBQ_WAIT(wait);
32 struct sbq_wait_state *ws;
33 struct sbitmap_queue *sbq;
35 if (idxd->state != IDXD_DEV_ENABLED)
39 idx = sbitmap_queue_get(sbq, &cpu);
41 if (optype == IDXD_OP_NONBLOCK)
42 return ERR_PTR(-EAGAIN);
44 return __get_desc(wq, idx, cpu);
49 sbitmap_prepare_to_wait(sbq, ws, &wait, TASK_INTERRUPTIBLE);
50 if (signal_pending_state(TASK_INTERRUPTIBLE, current))
52 idx = sbitmap_queue_get(sbq, &cpu);
58 sbitmap_finish_wait(sbq, ws, &wait);
60 return ERR_PTR(-EAGAIN);
62 return __get_desc(wq, idx, cpu);
65 void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc)
70 sbitmap_queue_clear(&wq->sbq, desc->id, cpu);
73 static struct idxd_desc *list_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
74 struct idxd_desc *desc)
76 struct idxd_desc *d, *n;
78 lockdep_assert_held(&ie->list_lock);
79 list_for_each_entry_safe(d, n, &ie->work_list, list) {
87 * At this point, the desc needs to be aborted is held by the completion
88 * handler where it has taken it off the pending list but has not added to the
89 * work list. It will be cleaned up by the interrupt handler when it sees the
90 * IDXD_COMP_DESC_ABORT for completion status.
95 static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
96 struct idxd_desc *desc)
98 struct idxd_desc *d, *t, *found = NULL;
99 struct llist_node *head;
102 desc->completion->status = IDXD_COMP_DESC_ABORT;
104 * Grab the list lock so it will block the irq thread handler. This allows the
105 * abort code to locate the descriptor need to be aborted.
107 spin_lock(&ie->list_lock);
108 head = llist_del_all(&ie->pending_llist);
110 llist_for_each_entry_safe(d, t, head, llnode) {
116 if (d->completion->status)
117 list_add_tail(&d->list, &flist);
119 list_add_tail(&d->list, &ie->work_list);
124 found = list_abort_desc(wq, ie, desc);
125 spin_unlock(&ie->list_lock);
128 idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, false);
131 * completing the descriptor will return desc to allocator and
132 * the desc can be acquired by a different process and the
133 * desc->list can be modified. Delete desc from list so the
134 * list trasversing does not get corrupted by the other process.
136 list_for_each_entry_safe(d, t, &flist, list) {
137 list_del_init(&d->list);
138 idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, true);
143 * ENQCMDS typically fail when the WQ is inactive or busy. On host submission, the driver
144 * has better control of number of descriptors being submitted to a shared wq by limiting
145 * the number of driver allocated descriptors to the wq size. However, when the swq is
146 * exported to a guest kernel, it may be shared with multiple guest kernels. This means
147 * the likelihood of getting busy returned on the swq when submitting goes significantly up.
148 * Having a tunable retry mechanism allows the driver to keep trying for a bit before giving
149 * up. The sysfs knob can be tuned by the system administrator.
151 int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc)
153 unsigned int retries = wq->enqcmds_retries;
157 rc = enqcmds(portal, desc);
166 int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
168 struct idxd_device *idxd = wq->idxd;
169 struct idxd_irq_entry *ie = NULL;
170 u32 desc_flags = desc->hw->flags;
171 void __iomem *portal;
174 if (idxd->state != IDXD_DEV_ENABLED)
177 if (!percpu_ref_tryget_live(&wq->wq_active)) {
178 wait_for_completion(&wq->wq_resurrect);
179 if (!percpu_ref_tryget_live(&wq->wq_active))
183 portal = idxd_wq_portal_addr(wq);
186 * The wmb() flushes writes to coherent DMA data before
187 * possibly triggering a DMA read. The wmb() is necessary
188 * even on UP because the recipient is a device.
193 * Pending the descriptor to the lockless list for the irq_entry
194 * that we designated the descriptor to.
196 if (desc_flags & IDXD_OP_FLAG_RCI) {
198 desc->hw->int_handle = ie->int_handle;
199 llist_add(&desc->llnode, &ie->pending_llist);
202 if (wq_dedicated(wq)) {
203 iosubmit_cmds512(portal, desc->hw, 1);
205 rc = idxd_enqcmds(wq, portal, desc->hw);
207 percpu_ref_put(&wq->wq_active);
208 /* abort operation frees the descriptor */
210 llist_abort_desc(wq, ie, desc);
215 percpu_ref_put(&wq->wq_active);