diff options
| author | Linus Torvalds <[email protected]> | 2025-03-29 18:12:28 +0000 |
|---|---|---|
| committer | Linus Torvalds <[email protected]> | 2025-03-29 18:12:28 +0000 |
| commit | 092e335082f22880207384ad736729c67d784665 (patch) | |
| tree | 6f737fedfa5ad5d53f622cc7fc531683cba1a0f8 /drivers/infiniband/hw/mana/cq.c | |
| parent | Merge tag 'for-linus-fwctl' of git://git.kernel.org/pub/scm/linux/kernel/git/... (diff) | |
| parent | IB/mad: Check available slots before posting receive WRs (diff) | |
| download | kernel-092e335082f22880207384ad736729c67d784665.tar.gz kernel-092e335082f22880207384ad736729c67d784665.zip | |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
- Usual minor updates and fixes for bnxt_re, hfi1, rxe, mana, iser,
mlx5, vmw_pvrdma, hns
- Make rxe work on tun devices
- mana gains more standard verbs as it moves toward supporting
in-kernel verbs
- DMABUF support for mana
- Fix page size calculations when memory registration exceeds 4G
- On Demand Paging support for rxe
- mlx5 support for RDMA TRANSPORT flow tables and a new ucap mechanism
to access control use of them
- Optional RDMA_TX/RX counters per QP in mlx5
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (73 commits)
IB/mad: Check available slots before posting receive WRs
RDMA/mana_ib: Fix integer overflow during queue creation
RDMA/mlx5: Fix calculation of total invalidated pages
RDMA/mlx5: Fix mlx5_poll_one() cur_qp update flow
RDMA/mlx5: Fix page_size variable overflow
RDMA/mlx5: Drop access_flags from _mlx5_mr_cache_alloc()
RDMA/mlx5: Fix cache entry update on dereg error
RDMA/mlx5: Fix MR cache initialization error flow
RDMA/mlx5: Support optional-counters binding for QPs
RDMA/mlx5: Compile fs.c regardless of INFINIBAND_USER_ACCESS config
RDMA/core: Pass port to counter bind/unbind operations
RDMA/core: Add support to optional-counters binding configuration
RDMA/core: Create and destroy rdma_counter using rdma_zalloc_drv_obj()
RDMA/mlx5: Add optional counters for RDMA_TX/RX_packets/bytes
RDMA/core: Fix use-after-free when rename device name
RDMA/bnxt_re: Support perf management counters
RDMA/rxe: Fix incorrect return value of rxe_odp_atomic_op()
RDMA/uverbs: Propagate errors from rdma_lookup_get_uobject()
RDMA/mana_ib: Handle net event for pointing to the current netdev
net: mana: Change the function signature of mana_get_primary_netdev_rcu
...
Diffstat (limited to 'drivers/infiniband/hw/mana/cq.c')
| -rw-r--r-- | drivers/infiniband/hw/mana/cq.c | 228 |
1 files changed, 200 insertions, 28 deletions
diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c index f04a679d2871..0fc4e2679218 100644 --- a/drivers/infiniband/hw/mana/cq.c +++ b/drivers/infiniband/hw/mana/cq.c @@ -15,42 +15,58 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_device *ibdev = ibcq->device; struct mana_ib_create_cq ucmd = {}; struct mana_ib_dev *mdev; + struct gdma_context *gc; bool is_rnic_cq; u32 doorbell; + u32 buf_size; int err; mdev = container_of(ibdev, struct mana_ib_dev, ib_dev); + gc = mdev_to_gc(mdev); cq->comp_vector = attr->comp_vector % ibdev->num_comp_vectors; cq->cq_handle = INVALID_MANA_HANDLE; - if (udata->inlen < offsetof(struct mana_ib_create_cq, flags)) - return -EINVAL; + if (udata) { + if (udata->inlen < offsetof(struct mana_ib_create_cq, flags)) + return -EINVAL; - err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen)); - if (err) { - ibdev_dbg(ibdev, - "Failed to copy from udata for create cq, %d\n", err); - return err; - } + err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen)); + if (err) { + ibdev_dbg(ibdev, "Failed to copy from udata for create cq, %d\n", err); + return err; + } - is_rnic_cq = !!(ucmd.flags & MANA_IB_CREATE_RNIC_CQ); + is_rnic_cq = !!(ucmd.flags & MANA_IB_CREATE_RNIC_CQ); - if (!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) { - ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe); - return -EINVAL; - } + if ((!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) || + attr->cqe > U32_MAX / COMP_ENTRY_SIZE) { + ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe); + return -EINVAL; + } - cq->cqe = attr->cqe; - err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE, &cq->queue); - if (err) { - ibdev_dbg(ibdev, "Failed to create queue for create cq, %d\n", err); - return err; - } + cq->cqe = attr->cqe; + err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE, + &cq->queue); + if (err) { + ibdev_dbg(ibdev, "Failed to create queue for create cq, %d\n", err); + return err; + } - mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext, - ibucontext); - doorbell = mana_ucontext->doorbell; + mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext, + ibucontext); + doorbell = mana_ucontext->doorbell; + } else { + is_rnic_cq = true; + buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr->cqe * COMP_ENTRY_SIZE)); + cq->cqe = buf_size / COMP_ENTRY_SIZE; + err = mana_ib_create_kernel_queue(mdev, buf_size, GDMA_CQ, &cq->queue); + if (err) { + ibdev_dbg(ibdev, "Failed to create kernel queue for create cq, %d\n", err); + return err; + } + doorbell = gc->mana_ib.doorbell; + } if (is_rnic_cq) { err = mana_ib_gd_create_cq(mdev, cq, doorbell); @@ -66,13 +82,19 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, } } - resp.cqid = cq->queue.id; - err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen)); - if (err) { - ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err); - goto err_remove_cq_cb; + if (udata) { + resp.cqid = cq->queue.id; + err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen)); + if (err) { + ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err); + goto err_remove_cq_cb; + } } + spin_lock_init(&cq->cq_lock); + INIT_LIST_HEAD(&cq->list_send_qp); + INIT_LIST_HEAD(&cq->list_recv_qp); + return 0; err_remove_cq_cb: @@ -122,7 +144,10 @@ int mana_ib_install_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq) return -EINVAL; /* Create CQ table entry */ WARN_ON(gc->cq_table[cq->queue.id]); - gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL); + if (cq->queue.kmem) + gdma_cq = cq->queue.kmem; + else + gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL); if (!gdma_cq) return -ENOMEM; @@ -141,6 +166,153 @@ void mana_ib_remove_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq) if (cq->queue.id >= gc->max_num_cqs || cq->queue.id == INVALID_QUEUE_ID) return; + if (cq->queue.kmem) + /* Then it will be cleaned and removed by the mana */ + return; + kfree(gc->cq_table[cq->queue.id]); gc->cq_table[cq->queue.id] = NULL; } + +int mana_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) +{ + struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq); + struct gdma_queue *gdma_cq = cq->queue.kmem; + + if (!gdma_cq) + return -EINVAL; + + mana_gd_ring_cq(gdma_cq, SET_ARM_BIT); + return 0; +} + +static inline void handle_ud_sq_cqe(struct mana_ib_qp *qp, struct gdma_comp *cqe) +{ + struct mana_rdma_cqe *rdma_cqe = (struct mana_rdma_cqe *)cqe->cqe_data; + struct gdma_queue *wq = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].kmem; + struct ud_sq_shadow_wqe *shadow_wqe; + + shadow_wqe = shadow_queue_get_next_to_complete(&qp->shadow_sq); + if (!shadow_wqe) + return; + + shadow_wqe->header.error_code = rdma_cqe->ud_send.vendor_error; + + wq->tail += shadow_wqe->header.posted_wqe_size; + shadow_queue_advance_next_to_complete(&qp->shadow_sq); +} + +static inline void handle_ud_rq_cqe(struct mana_ib_qp *qp, struct gdma_comp *cqe) +{ + struct mana_rdma_cqe *rdma_cqe = (struct mana_rdma_cqe *)cqe->cqe_data; + struct gdma_queue *wq = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].kmem; + struct ud_rq_shadow_wqe *shadow_wqe; + + shadow_wqe = shadow_queue_get_next_to_complete(&qp->shadow_rq); + if (!shadow_wqe) + return; + + shadow_wqe->byte_len = rdma_cqe->ud_recv.msg_len; + shadow_wqe->src_qpn = rdma_cqe->ud_recv.src_qpn; + shadow_wqe->header.error_code = IB_WC_SUCCESS; + + wq->tail += shadow_wqe->header.posted_wqe_size; + shadow_queue_advance_next_to_complete(&qp->shadow_rq); +} + +static void mana_handle_cqe(struct mana_ib_dev *mdev, struct gdma_comp *cqe) +{ + struct mana_ib_qp *qp = mana_get_qp_ref(mdev, cqe->wq_num, cqe->is_sq); + + if (!qp) + return; + + if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD) { + if (cqe->is_sq) + handle_ud_sq_cqe(qp, cqe); + else + handle_ud_rq_cqe(qp, cqe); + } + + mana_put_qp_ref(qp); +} + +static void fill_verbs_from_shadow_wqe(struct mana_ib_qp *qp, struct ib_wc *wc, + const struct shadow_wqe_header *shadow_wqe) +{ + const struct ud_rq_shadow_wqe *ud_wqe = (const struct ud_rq_shadow_wqe *)shadow_wqe; + + wc->wr_id = shadow_wqe->wr_id; + wc->status = shadow_wqe->error_code; + wc->opcode = shadow_wqe->opcode; + wc->vendor_err = shadow_wqe->error_code; + wc->wc_flags = 0; + wc->qp = &qp->ibqp; + wc->pkey_index = 0; + + if (shadow_wqe->opcode == IB_WC_RECV) { + wc->byte_len = ud_wqe->byte_len; + wc->src_qp = ud_wqe->src_qpn; + wc->wc_flags |= IB_WC_GRH; + } +} + +static int mana_process_completions(struct mana_ib_cq *cq, int nwc, struct ib_wc *wc) +{ + struct shadow_wqe_header *shadow_wqe; + struct mana_ib_qp *qp; + int wc_index = 0; + + /* process send shadow queue completions */ + list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) { + while ((shadow_wqe = shadow_queue_get_next_to_consume(&qp->shadow_sq)) + != NULL) { + if (wc_index >= nwc) + goto out; + + fill_verbs_from_shadow_wqe(qp, &wc[wc_index], shadow_wqe); + shadow_queue_advance_consumer(&qp->shadow_sq); + wc_index++; + } + } + + /* process recv shadow queue completions */ + list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) { + while ((shadow_wqe = shadow_queue_get_next_to_consume(&qp->shadow_rq)) + != NULL) { + if (wc_index >= nwc) + goto out; + + fill_verbs_from_shadow_wqe(qp, &wc[wc_index], shadow_wqe); + shadow_queue_advance_consumer(&qp->shadow_rq); + wc_index++; + } + } + +out: + return wc_index; +} + +int mana_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) +{ + struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq); + struct mana_ib_dev *mdev = container_of(ibcq->device, struct mana_ib_dev, ib_dev); + struct gdma_queue *queue = cq->queue.kmem; + struct gdma_comp gdma_cqe; + unsigned long flags; + int num_polled = 0; + int comp_read, i; + + spin_lock_irqsave(&cq->cq_lock, flags); + for (i = 0; i < num_entries; i++) { + comp_read = mana_gd_poll_cq(queue, &gdma_cqe, 1); + if (comp_read < 1) + break; + mana_handle_cqe(mdev, &gdma_cqe); + } + + num_polled = mana_process_completions(cq, num_entries, wc); + spin_unlock_irqrestore(&cq->cq_lock, flags); + + return num_polled; +} |
