RDMA/bnxt_re: Refactor the queue index update
authorChandramohan Akula <chandramohan.akula@broadcom.com>
Mon, 23 Oct 2023 14:03:22 +0000 (07:03 -0700)
committerLeon Romanovsky <leon@kernel.org>
Mon, 13 Nov 2023 08:26:41 +0000 (10:26 +0200)
The queue index wrap around logic is based on power of 2 size depth.
All queues are created with power of 2 depth. This increases the
memory usage by the driver. This change is required for the next
patches that avoids the power of 2 depth requirement for each of
the queues.

Update the function that increments producer index and consumer
index during wrap around. Also, changes the index handling across
multiple functions.

Signed-off-by: Chandramohan Akula <chandramohan.akula@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
Link: https://lore.kernel.org/r/1698069803-1787-2-git-send-email-selvin.xavier@broadcom.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
drivers/infiniband/hw/bnxt_re/qplib_fp.c
drivers/infiniband/hw/bnxt_re/qplib_fp.h
drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
drivers/infiniband/hw/bnxt_re/qplib_res.c
drivers/infiniband/hw/bnxt_re/qplib_res.h

index abbabea..b821c37 100644 (file)
@@ -237,18 +237,15 @@ static void clean_nq(struct bnxt_qplib_nq *nq, struct bnxt_qplib_cq *cq)
        struct bnxt_qplib_hwq *hwq = &nq->hwq;
        struct nq_base *nqe, **nq_ptr;
        int budget = nq->budget;
-       u32 sw_cons, raw_cons;
        uintptr_t q_handle;
        u16 type;
 
        spin_lock_bh(&hwq->lock);
        /* Service the NQ until empty */
-       raw_cons = hwq->cons;
        while (budget--) {
-               sw_cons = HWQ_CMP(raw_cons, hwq);
                nq_ptr = (struct nq_base **)hwq->pbl_ptr;
-               nqe = &nq_ptr[NQE_PG(sw_cons)][NQE_IDX(sw_cons)];
-               if (!NQE_CMP_VALID(nqe, raw_cons, hwq->max_elements))
+               nqe = &nq_ptr[NQE_PG(hwq->cons)][NQE_IDX(hwq->cons)];
+               if (!NQE_CMP_VALID(nqe, nq->nq_db.dbinfo.flags))
                        break;
 
                /*
@@ -276,7 +273,8 @@ static void clean_nq(struct bnxt_qplib_nq *nq, struct bnxt_qplib_cq *cq)
                default:
                        break;
                }
-               raw_cons++;
+               bnxt_qplib_hwq_incr_cons(hwq->max_elements, &hwq->cons,
+                                        1, &nq->nq_db.dbinfo.flags);
        }
        spin_unlock_bh(&hwq->lock);
 }
@@ -302,18 +300,16 @@ static void bnxt_qplib_service_nq(struct tasklet_struct *t)
        struct bnxt_qplib_hwq *hwq = &nq->hwq;
        struct bnxt_qplib_cq *cq;
        int budget = nq->budget;
-       u32 sw_cons, raw_cons;
        struct nq_base *nqe;
        uintptr_t q_handle;
+       u32 hw_polled = 0;
        u16 type;
 
        spin_lock_bh(&hwq->lock);
        /* Service the NQ until empty */
-       raw_cons = hwq->cons;
        while (budget--) {
-               sw_cons = HWQ_CMP(raw_cons, hwq);
-               nqe = bnxt_qplib_get_qe(hwq, sw_cons, NULL);
-               if (!NQE_CMP_VALID(nqe, raw_cons, hwq->max_elements))
+               nqe = bnxt_qplib_get_qe(hwq, hwq->cons, NULL);
+               if (!NQE_CMP_VALID(nqe, nq->nq_db.dbinfo.flags))
                        break;
 
                /*
@@ -372,12 +368,12 @@ static void bnxt_qplib_service_nq(struct tasklet_struct *t)
                                 "nqe with type = 0x%x not handled\n", type);
                        break;
                }
-               raw_cons++;
+               hw_polled++;
+               bnxt_qplib_hwq_incr_cons(hwq->max_elements, &hwq->cons,
+                                        1, &nq->nq_db.dbinfo.flags);
        }
-       if (hwq->cons != raw_cons) {
-               hwq->cons = raw_cons;
+       if (hw_polled)
                bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, nq->res->cctx, true);
-       }
        spin_unlock_bh(&hwq->lock);
 }
 
@@ -505,6 +501,7 @@ static int bnxt_qplib_map_nq_db(struct bnxt_qplib_nq *nq,  u32 reg_offt)
        pdev = nq->pdev;
        nq_db = &nq->nq_db;
 
+       nq_db->dbinfo.flags = 0;
        nq_db->reg.bar_id = NQ_CONS_PCI_BAR_REGION;
        nq_db->reg.bar_base = pci_resource_start(pdev, nq_db->reg.bar_id);
        if (!nq_db->reg.bar_base) {
@@ -649,7 +646,7 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res,
                rc = -ENOMEM;
                goto fail;
        }
-
+       srq->dbinfo.flags = 0;
        bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
                                 CMDQ_BASE_OPCODE_CREATE_SRQ,
                                 sizeof(req));
@@ -703,13 +700,9 @@ int bnxt_qplib_modify_srq(struct bnxt_qplib_res *res,
                          struct bnxt_qplib_srq *srq)
 {
        struct bnxt_qplib_hwq *srq_hwq = &srq->hwq;
-       u32 sw_prod, sw_cons, count = 0;
-
-       sw_prod = HWQ_CMP(srq_hwq->prod, srq_hwq);
-       sw_cons = HWQ_CMP(srq_hwq->cons, srq_hwq);
+       u32 count;
 
-       count = sw_prod > sw_cons ? sw_prod - sw_cons :
-                                   srq_hwq->max_elements - sw_cons + sw_prod;
+       count = __bnxt_qplib_get_avail(srq_hwq);
        if (count > srq->threshold) {
                srq->arm_req = false;
                bnxt_qplib_srq_arm_db(&srq->dbinfo, srq->threshold);
@@ -761,7 +754,7 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq,
        struct bnxt_qplib_hwq *srq_hwq = &srq->hwq;
        struct rq_wqe *srqe;
        struct sq_sge *hw_sge;
-       u32 sw_prod, sw_cons, count = 0;
+       u32 count = 0;
        int i, next;
 
        spin_lock(&srq_hwq->lock);
@@ -775,8 +768,7 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq,
        srq->start_idx = srq->swq[next].next_idx;
        spin_unlock(&srq_hwq->lock);
 
-       sw_prod = HWQ_CMP(srq_hwq->prod, srq_hwq);
-       srqe = bnxt_qplib_get_qe(srq_hwq, sw_prod, NULL);
+       srqe = bnxt_qplib_get_qe(srq_hwq, srq_hwq->prod, NULL);
        memset(srqe, 0, srq->wqe_size);
        /* Calculate wqe_size16 and data_len */
        for (i = 0, hw_sge = (struct sq_sge *)srqe->data;
@@ -792,17 +784,10 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq,
        srqe->wr_id[0] = cpu_to_le32((u32)next);
        srq->swq[next].wr_id = wqe->wr_id;
 
-       srq_hwq->prod++;
+       bnxt_qplib_hwq_incr_prod(&srq->dbinfo, srq_hwq, srq->dbinfo.max_slot);
 
        spin_lock(&srq_hwq->lock);
-       sw_prod = HWQ_CMP(srq_hwq->prod, srq_hwq);
-       /* retaining srq_hwq->cons for this logic
-        * actually the lock is only required to
-        * read srq_hwq->cons.
-        */
-       sw_cons = HWQ_CMP(srq_hwq->cons, srq_hwq);
-       count = sw_prod > sw_cons ? sw_prod - sw_cons :
-                                   srq_hwq->max_elements - sw_cons + sw_prod;
+       count = __bnxt_qplib_get_avail(srq_hwq);
        spin_unlock(&srq_hwq->lock);
        /* Ring DB */
        bnxt_qplib_ring_prod_db(&srq->dbinfo, DBC_DBC_TYPE_SRQ);
@@ -849,6 +834,7 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
        u32 tbl_indx;
        int rc;
 
+       sq->dbinfo.flags = 0;
        bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
                                 CMDQ_BASE_OPCODE_CREATE_QP1,
                                 sizeof(req));
@@ -885,6 +871,7 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 
        /* RQ */
        if (rq->max_wqe) {
+               rq->dbinfo.flags = 0;
                hwq_attr.res = res;
                hwq_attr.sginfo = &rq->sg_info;
                hwq_attr.stride = sizeof(struct sq_sge);
@@ -992,6 +979,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
        u32 tbl_indx;
        u16 nsge;
 
+       sq->dbinfo.flags = 0;
        bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
                                 CMDQ_BASE_OPCODE_CREATE_QP,
                                 sizeof(req));
@@ -1040,6 +1028,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 
        /* RQ */
        if (!qp->srq) {
+               rq->dbinfo.flags = 0;
                hwq_attr.res = res;
                hwq_attr.sginfo = &rq->sg_info;
                hwq_attr.stride = sizeof(struct sq_sge);
@@ -1454,12 +1443,15 @@ bail:
 static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp)
 {
        struct bnxt_qplib_hwq *cq_hwq = &cq->hwq;
+       u32 peek_flags, peek_cons;
        struct cq_base *hw_cqe;
        int i;
 
+       peek_flags = cq->dbinfo.flags;
+       peek_cons = cq_hwq->cons;
        for (i = 0; i < cq_hwq->max_elements; i++) {
-               hw_cqe = bnxt_qplib_get_qe(cq_hwq, i, NULL);
-               if (!CQE_CMP_VALID(hw_cqe, i, cq_hwq->max_elements))
+               hw_cqe = bnxt_qplib_get_qe(cq_hwq, peek_cons, NULL);
+               if (!CQE_CMP_VALID(hw_cqe, peek_flags))
                        continue;
                /*
                 * The valid test of the entry must be done first before
@@ -1489,6 +1481,8 @@ static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp)
                default:
                        break;
                }
+               bnxt_qplib_hwq_incr_cons(cq_hwq->max_elements, &peek_cons,
+                                        1, &peek_flags);
        }
 }
 
@@ -1961,7 +1955,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
        bnxt_qplib_fill_psn_search(qp, wqe, swq);
 queue_err:
        bnxt_qplib_swq_mod_start(sq, wqe_idx);
-       bnxt_qplib_hwq_incr_prod(hwq, swq->slots);
+       bnxt_qplib_hwq_incr_prod(&sq->dbinfo, hwq, swq->slots);
        qp->wqe_cnt++;
 done:
        if (sch_handler) {
@@ -2049,7 +2043,7 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
        base_hdr->wr_id[0] = cpu_to_le32(wqe_idx);
 queue_err:
        bnxt_qplib_swq_mod_start(rq, wqe_idx);
-       bnxt_qplib_hwq_incr_prod(hwq, swq->slots);
+       bnxt_qplib_hwq_incr_prod(&rq->dbinfo, hwq, swq->slots);
 done:
        if (sch_handler) {
                nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC);
@@ -2086,6 +2080,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
                return -EINVAL;
        }
 
+       cq->dbinfo.flags = 0;
        hwq_attr.res = res;
        hwq_attr.depth = cq->max_wqe;
        hwq_attr.stride = sizeof(struct cq_base);
@@ -2101,7 +2096,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
 
        req.dpi = cpu_to_le32(cq->dpi->dpi);
        req.cq_handle = cpu_to_le64(cq->cq_handle);
-       req.cq_size = cpu_to_le32(cq->hwq.max_elements);
+       req.cq_size = cpu_to_le32(cq->max_wqe);
        pbl = &cq->hwq.pbl[PBL_LVL_0];
        pg_sz_lvl = (bnxt_qplib_base_pg_size(&cq->hwq) <<
                     CMDQ_CREATE_CQ_PG_SIZE_SFT);
@@ -2144,6 +2139,8 @@ void bnxt_qplib_resize_cq_complete(struct bnxt_qplib_res *res,
 {
        bnxt_qplib_free_hwq(res, &cq->hwq);
        memcpy(&cq->hwq, &cq->resize_hwq, sizeof(cq->hwq));
+       /* Reset only the cons bit in the flags */
+       cq->dbinfo.flags &= ~(1UL << BNXT_QPLIB_FLAG_EPOCH_CONS_SHIFT);
 }
 
 int bnxt_qplib_resize_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq,
@@ -2240,7 +2237,8 @@ static int __flush_sq(struct bnxt_qplib_q *sq, struct bnxt_qplib_qp *qp,
                cqe++;
                (*budget)--;
 skip_compl:
-               bnxt_qplib_hwq_incr_cons(&sq->hwq, sq->swq[last].slots);
+               bnxt_qplib_hwq_incr_cons(sq->hwq.max_elements, &sq->hwq.cons,
+                                        sq->swq[last].slots, &sq->dbinfo.flags);
                sq->swq_last = sq->swq[last].next_idx;
        }
        *pcqe = cqe;
@@ -2287,7 +2285,8 @@ static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp,
                cqe->wr_id = rq->swq[last].wr_id;
                cqe++;
                (*budget)--;
-               bnxt_qplib_hwq_incr_cons(&rq->hwq, rq->swq[last].slots);
+               bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons,
+                                        rq->swq[last].slots, &rq->dbinfo.flags);
                rq->swq_last = rq->swq[last].next_idx;
        }
        *pcqe = cqe;
@@ -2316,7 +2315,7 @@ void bnxt_qplib_mark_qp_error(void *qp_handle)
 static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
                     u32 cq_cons, u32 swq_last, u32 cqe_sq_cons)
 {
-       u32 peek_sw_cq_cons, peek_raw_cq_cons, peek_sq_cons_idx;
+       u32 peek_sw_cq_cons, peek_sq_cons_idx, peek_flags;
        struct bnxt_qplib_q *sq = &qp->sq;
        struct cq_req *peek_req_hwcqe;
        struct bnxt_qplib_qp *peek_qp;
@@ -2347,16 +2346,14 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
        }
        if (sq->condition) {
                /* Peek at the completions */
-               peek_raw_cq_cons = cq->hwq.cons;
+               peek_flags = cq->dbinfo.flags;
                peek_sw_cq_cons = cq_cons;
                i = cq->hwq.max_elements;
                while (i--) {
-                       peek_sw_cq_cons = HWQ_CMP((peek_sw_cq_cons), &cq->hwq);
                        peek_hwcqe = bnxt_qplib_get_qe(&cq->hwq,
                                                       peek_sw_cq_cons, NULL);
                        /* If the next hwcqe is VALID */
-                       if (CQE_CMP_VALID(peek_hwcqe, peek_raw_cq_cons,
-                                         cq->hwq.max_elements)) {
+                       if (CQE_CMP_VALID(peek_hwcqe, peek_flags)) {
                        /*
                         * The valid test of the entry must be done first before
                         * reading any further.
@@ -2399,8 +2396,9 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
                                rc = -EINVAL;
                                goto out;
                        }
-                       peek_sw_cq_cons++;
-                       peek_raw_cq_cons++;
+                       bnxt_qplib_hwq_incr_cons(cq->hwq.max_elements,
+                                                &peek_sw_cq_cons,
+                                                1, &peek_flags);
                }
                dev_err(&cq->hwq.pdev->dev,
                        "Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x\n",
@@ -2487,7 +2485,8 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
                        }
                }
 skip:
-               bnxt_qplib_hwq_incr_cons(&sq->hwq, swq->slots);
+               bnxt_qplib_hwq_incr_cons(sq->hwq.max_elements, &sq->hwq.cons,
+                                        swq->slots, &sq->dbinfo.flags);
                sq->swq_last = swq->next_idx;
                if (sq->single)
                        break;
@@ -2514,7 +2513,8 @@ static void bnxt_qplib_release_srqe(struct bnxt_qplib_srq *srq, u32 tag)
        srq->swq[srq->last_idx].next_idx = (int)tag;
        srq->last_idx = (int)tag;
        srq->swq[srq->last_idx].next_idx = -1;
-       srq->hwq.cons++; /* Support for SRQE counter */
+       bnxt_qplib_hwq_incr_cons(srq->hwq.max_elements, &srq->hwq.cons,
+                                srq->dbinfo.max_slot, &srq->dbinfo.flags);
        spin_unlock(&srq->hwq.lock);
 }
 
@@ -2583,7 +2583,8 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
                cqe->wr_id = swq->wr_id;
                cqe++;
                (*budget)--;
-               bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots);
+               bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons,
+                                        swq->slots, &rq->dbinfo.flags);
                rq->swq_last = swq->next_idx;
                *pcqe = cqe;
 
@@ -2669,7 +2670,8 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
                cqe->wr_id = swq->wr_id;
                cqe++;
                (*budget)--;
-               bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots);
+               bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons,
+                                        swq->slots, &rq->dbinfo.flags);
                rq->swq_last = swq->next_idx;
                *pcqe = cqe;
 
@@ -2686,14 +2688,11 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
 bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq)
 {
        struct cq_base *hw_cqe;
-       u32 sw_cons, raw_cons;
        bool rc = true;
 
-       raw_cons = cq->hwq.cons;
-       sw_cons = HWQ_CMP(raw_cons, &cq->hwq);
-       hw_cqe = bnxt_qplib_get_qe(&cq->hwq, sw_cons, NULL);
+       hw_cqe = bnxt_qplib_get_qe(&cq->hwq, cq->hwq.cons, NULL);
         /* Check for Valid bit. If the CQE is valid, return false */
-       rc = !CQE_CMP_VALID(hw_cqe, raw_cons, cq->hwq.max_elements);
+       rc = !CQE_CMP_VALID(hw_cqe, cq->dbinfo.flags);
        return rc;
 }
 
@@ -2775,7 +2774,8 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
                cqe->wr_id = swq->wr_id;
                cqe++;
                (*budget)--;
-               bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots);
+               bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons,
+                                        swq->slots, &rq->dbinfo.flags);
                rq->swq_last = swq->next_idx;
                *pcqe = cqe;
 
@@ -2848,7 +2848,8 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
                        cqe++;
                        (*budget)--;
                }
-               bnxt_qplib_hwq_incr_cons(&sq->hwq, sq->swq[swq_last].slots);
+               bnxt_qplib_hwq_incr_cons(sq->hwq.max_elements, &sq->hwq.cons,
+                                        sq->swq[swq_last].slots, &sq->dbinfo.flags);
                sq->swq_last = sq->swq[swq_last].next_idx;
        }
        *pcqe = cqe;
@@ -2933,19 +2934,17 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
                       int num_cqes, struct bnxt_qplib_qp **lib_qp)
 {
        struct cq_base *hw_cqe;
-       u32 sw_cons, raw_cons;
        int budget, rc = 0;
+       u32 hw_polled = 0;
        u8 type;
 
-       raw_cons = cq->hwq.cons;
        budget = num_cqes;
 
        while (budget) {
-               sw_cons = HWQ_CMP(raw_cons, &cq->hwq);
-               hw_cqe = bnxt_qplib_get_qe(&cq->hwq, sw_cons, NULL);
+               hw_cqe = bnxt_qplib_get_qe(&cq->hwq, cq->hwq.cons, NULL);
 
                /* Check for Valid bit */
-               if (!CQE_CMP_VALID(hw_cqe, raw_cons, cq->hwq.max_elements))
+               if (!CQE_CMP_VALID(hw_cqe, cq->dbinfo.flags))
                        break;
 
                /*
@@ -2960,7 +2959,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
                        rc = bnxt_qplib_cq_process_req(cq,
                                                       (struct cq_req *)hw_cqe,
                                                       &cqe, &budget,
-                                                      sw_cons, lib_qp);
+                                                      cq->hwq.cons, lib_qp);
                        break;
                case CQ_BASE_CQE_TYPE_RES_RC:
                        rc = bnxt_qplib_cq_process_res_rc(cq,
@@ -3006,12 +3005,13 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
                                dev_err(&cq->hwq.pdev->dev,
                                        "process_cqe error rc = 0x%x\n", rc);
                }
-               raw_cons++;
+               hw_polled++;
+               bnxt_qplib_hwq_incr_cons(cq->hwq.max_elements, &cq->hwq.cons,
+                                        1, &cq->dbinfo.flags);
+
        }
-       if (cq->hwq.cons != raw_cons) {
-               cq->hwq.cons = raw_cons;
+       if (hw_polled)
                bnxt_qplib_ring_db(&cq->dbinfo, DBC_DBC_TYPE_CQ);
-       }
 exit:
        return num_cqes - budget;
 }
index 404b851..23c27cb 100644 (file)
@@ -348,9 +348,21 @@ struct bnxt_qplib_qp {
 #define CQE_IDX(x)             ((x) & CQE_MAX_IDX_PER_PG)
 
 #define ROCE_CQE_CMP_V                 0
-#define CQE_CMP_VALID(hdr, raw_cons, cp_bit)                   \
+#define CQE_CMP_VALID(hdr, pass)                       \
        (!!((hdr)->cqe_type_toggle & CQ_BASE_TOGGLE) ==         \
-          !((raw_cons) & (cp_bit)))
+          !((pass) & BNXT_QPLIB_FLAG_EPOCH_CONS_MASK))
+
+static inline u32 __bnxt_qplib_get_avail(struct bnxt_qplib_hwq *hwq)
+{
+       int cons, prod, avail;
+
+       cons = hwq->cons;
+       prod = hwq->prod;
+       avail = cons - prod;
+       if (cons <= prod)
+               avail += hwq->depth;
+       return avail;
+}
 
 static inline bool bnxt_qplib_queue_full(struct bnxt_qplib_q *que,
                                         u8 slots)
@@ -443,9 +455,9 @@ struct bnxt_qplib_cq {
 #define NQE_PG(x)              (((x) & ~NQE_MAX_IDX_PER_PG) / NQE_CNT_PER_PG)
 #define NQE_IDX(x)             ((x) & NQE_MAX_IDX_PER_PG)
 
-#define NQE_CMP_VALID(hdr, raw_cons, cp_bit)                   \
+#define NQE_CMP_VALID(hdr, pass)                       \
        (!!(le32_to_cpu((hdr)->info63_v[0]) & NQ_BASE_V) ==     \
-          !((raw_cons) & (cp_bit)))
+          !((pass) & BNXT_QPLIB_FLAG_EPOCH_CONS_MASK))
 
 #define BNXT_QPLIB_NQE_MAX_CNT         (128 * 1024)
 
index e47b4ca..15e6d2b 100644 (file)
@@ -734,17 +734,15 @@ static void bnxt_qplib_service_creq(struct tasklet_struct *t)
        u32 type, budget = CREQ_ENTRY_POLL_BUDGET;
        struct bnxt_qplib_hwq *hwq = &creq->hwq;
        struct creq_base *creqe;
-       u32 sw_cons, raw_cons;
        unsigned long flags;
        u32 num_wakeup = 0;
+       u32 hw_polled = 0;
 
        /* Service the CREQ until budget is over */
        spin_lock_irqsave(&hwq->lock, flags);
-       raw_cons = hwq->cons;
        while (budget > 0) {
-               sw_cons = HWQ_CMP(raw_cons, hwq);
-               creqe = bnxt_qplib_get_qe(hwq, sw_cons, NULL);
-               if (!CREQ_CMP_VALID(creqe, raw_cons, hwq->max_elements))
+               creqe = bnxt_qplib_get_qe(hwq, hwq->cons, NULL);
+               if (!CREQ_CMP_VALID(creqe, creq->creq_db.dbinfo.flags))
                        break;
                /* The valid test of the entry must be done first before
                 * reading any further.
@@ -775,15 +773,15 @@ static void bnxt_qplib_service_creq(struct tasklet_struct *t)
                                         type);
                        break;
                }
-               raw_cons++;
                budget--;
+               hw_polled++;
+               bnxt_qplib_hwq_incr_cons(hwq->max_elements, &hwq->cons,
+                                        1, &creq->creq_db.dbinfo.flags);
        }
 
-       if (hwq->cons != raw_cons) {
-               hwq->cons = raw_cons;
+       if (hw_polled)
                bnxt_qplib_ring_nq_db(&creq->creq_db.dbinfo,
                                      rcfw->res->cctx, true);
-       }
        spin_unlock_irqrestore(&hwq->lock, flags);
        if (num_wakeup)
                wake_up_nr(&rcfw->cmdq.waitq, num_wakeup);
@@ -1113,6 +1111,7 @@ static int bnxt_qplib_map_creq_db(struct bnxt_qplib_rcfw *rcfw, u32 reg_offt)
        pdev = rcfw->pdev;
        creq_db = &rcfw->creq.creq_db;
 
+       creq_db->dbinfo.flags = 0;
        creq_db->reg.bar_id = RCFW_COMM_CONS_PCI_BAR_REGION;
        creq_db->reg.bar_base = pci_resource_start(pdev, creq_db->reg.bar_id);
        if (!creq_db->reg.bar_id)
index 7b31bee..45996e6 100644 (file)
@@ -141,9 +141,9 @@ struct bnxt_qplib_crsbe {
 /* Allocate 1 per QP for async error notification for now */
 #define BNXT_QPLIB_CREQE_MAX_CNT       (64 * 1024)
 #define BNXT_QPLIB_CREQE_UNITS         16      /* 16-Bytes per prod unit */
-#define CREQ_CMP_VALID(hdr, raw_cons, cp_bit)                  \
+#define CREQ_CMP_VALID(hdr, pass)                      \
        (!!((hdr)->v & CREQ_BASE_V) ==                          \
-          !((raw_cons) & (cp_bit)))
+          !((pass) & BNXT_QPLIB_FLAG_EPOCH_CONS_MASK))
 #define CREQ_ENTRY_POLL_BUDGET         0x100
 
 /* HWQ */
index 157db6b..ae2bde3 100644 (file)
@@ -343,7 +343,7 @@ done:
        hwq->cons = 0;
        hwq->pdev = pdev;
        hwq->depth = hwq_attr->depth;
-       hwq->max_elements = depth;
+       hwq->max_elements = hwq->depth;
        hwq->element_size = stride;
        hwq->qe_ppg = pg_size / stride;
        /* For direct access to the elements */
index 5949f00..3e3383b 100644 (file)
@@ -186,6 +186,14 @@ struct bnxt_qplib_db_info {
        struct bnxt_qplib_hwq   *hwq;
        u32                     xid;
        u32                     max_slot;
+       u32                     flags;
+};
+
+enum bnxt_qplib_db_info_flags_mask {
+       BNXT_QPLIB_FLAG_EPOCH_CONS_SHIFT        = 0x0UL,
+       BNXT_QPLIB_FLAG_EPOCH_PROD_SHIFT        = 0x1UL,
+       BNXT_QPLIB_FLAG_EPOCH_CONS_MASK         = 0x1UL,
+       BNXT_QPLIB_FLAG_EPOCH_PROD_MASK         = 0x2UL,
 };
 
 /* Tables */
@@ -396,24 +404,34 @@ void bnxt_qplib_unmap_db_bar(struct bnxt_qplib_res *res);
 
 int bnxt_qplib_determine_atomics(struct pci_dev *dev);
 
-static inline void bnxt_qplib_hwq_incr_prod(struct bnxt_qplib_hwq *hwq, u32 cnt)
+static inline void bnxt_qplib_hwq_incr_prod(struct bnxt_qplib_db_info *dbinfo,
+                                           struct bnxt_qplib_hwq *hwq, u32 cnt)
 {
-       hwq->prod = (hwq->prod + cnt) % hwq->depth;
+       /* move prod and update toggle/epoch if wrap around */
+       hwq->prod += cnt;
+       if (hwq->prod >= hwq->depth) {
+               hwq->prod %= hwq->depth;
+               dbinfo->flags ^= 1UL << BNXT_QPLIB_FLAG_EPOCH_PROD_SHIFT;
+       }
 }
 
-static inline void bnxt_qplib_hwq_incr_cons(struct bnxt_qplib_hwq *hwq,
-                                           u32 cnt)
+static inline void bnxt_qplib_hwq_incr_cons(u32 max_elements, u32 *cons, u32 cnt,
+                                           u32 *dbinfo_flags)
 {
-       hwq->cons = (hwq->cons + cnt) % hwq->depth;
+       /* move cons and update toggle/epoch if wrap around */
+       *cons += cnt;
+       if (*cons >= max_elements) {
+               *cons %= max_elements;
+               *dbinfo_flags ^= 1UL << BNXT_QPLIB_FLAG_EPOCH_CONS_SHIFT;
+       }
 }
 
 static inline void bnxt_qplib_ring_db32(struct bnxt_qplib_db_info *info,
                                        bool arm)
 {
-       u32 key;
+       u32 key = 0;
 
-       key = info->hwq->cons & (info->hwq->max_elements - 1);
-       key |= (CMPL_DOORBELL_IDX_VALID |
+       key |= info->hwq->cons | (CMPL_DOORBELL_IDX_VALID |
                (CMPL_DOORBELL_KEY_CMPL & CMPL_DOORBELL_KEY_MASK));
        if (!arm)
                key |= CMPL_DOORBELL_MASK;
@@ -427,8 +445,7 @@ static inline void bnxt_qplib_ring_db(struct bnxt_qplib_db_info *info,
 
        key = (info->xid & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | type;
        key <<= 32;
-       key |= (info->hwq->cons & (info->hwq->max_elements - 1)) &
-               DBC_DBC_INDEX_MASK;
+       key |= (info->hwq->cons & DBC_DBC_INDEX_MASK);
        writeq(key, info->db);
 }