diff options
| author | Dave Airlie <[email protected]> | 2020-05-08 03:31:06 +0000 |
|---|---|---|
| committer | Dave Airlie <[email protected]> | 2020-05-08 03:31:08 +0000 |
| commit | 370fb6b0aaf07c66a3317d5b35fba4345b31035c (patch) | |
| tree | b68550f0d7b340440dc4048ed007b59ffa563e60 /drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | |
| parent | Merge tag 'amd-drm-next-5.8-2020-04-24' of git://people.freedesktop.org/~agd5... (diff) | |
| parent | drm/amdkfd: Enable over-subscription with >1 GWS queue (diff) | |
| download | kernel-370fb6b0aaf07c66a3317d5b35fba4345b31035c.tar.gz kernel-370fb6b0aaf07c66a3317d5b35fba4345b31035c.zip | |
Merge tag 'amd-drm-next-5.8-2020-04-30' of git://people.freedesktop.org/~agd5f/linux into drm-next
amd-drm-next-5.8-2020-04-30:
amdgpu:
- SR-IOV fixes
- SDMA fix for Navi
- VCN 2.5 DPG fixes
- Display fixes
- Display stuttering fixes for pageflip and cursor
- Add support for handling encrypted GPU memory
- Add UAPI for encrypted GPU memory
- Rework IB pool handling
amdkfd:
- Expose asic revision in topology
- Add UAPI for GWS (Global Wave Sync) resource management
UAPI:
- Add amdgpu UAPI for encrypted GPU memory
Used by: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4401
- Add amdkfd UAPI for GWS (Global Wave Sync) resource management
Thunk usage of KFD ioctl: https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/blob/roc-2.8.0/src/queues.c#L840
ROCr usage of Thunk API: https://github.com/RadeonOpenCompute/ROCR-Runtime/blob/roc-3.1.0/src/core/runtime/amd_gpu_agent.cpp#L597
HCC code using ROCr API: https://github.com/RadeonOpenCompute/hcc/blob/98ee9f34945d3b5f572d7a4c15cbffa506487734/lib/hsa/mcwamp_hsa.cpp#L2161
HIP code using HCC API: https://github.com/ROCm-Developer-Tools/HIP/blob/cf8589b8c8a40ddcc55fa3a51e23390a49824130/src/hip_module.cpp#L567
Signed-off-by: Dave Airlie <[email protected]>
From: Alex Deucher <[email protected]>
Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 20 |
1 files changed, 17 insertions, 3 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index e206064ffb82..b544baf306f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -410,6 +410,18 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid); + /* Invalidate L2, because if we don't do it, we might get stale cache + * lines from previous IBs. + */ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_GCR_REQ)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, (SDMA_GCR_GL2_INV | + SDMA_GCR_GL2_WB | + SDMA_GCR_GLM_INV | + SDMA_GCR_GLM_WB) << 16); + amdgpu_ring_write(ring, 0xffffff80); + amdgpu_ring_write(ring, 0xffff); + /* An IB packet must end on a 8 DW boundary--the next dword * must be on a 8-dword boundary. Our IB packet below is 6 * dwords long, thus add x number of NOPs, such that, in @@ -1634,7 +1646,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 + 10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */ - .emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */ + .emit_ib_size = 5 + 7 + 6, /* sdma_v5_0_ring_emit_ib */ .emit_ib = sdma_v5_0_ring_emit_ib, .emit_fence = sdma_v5_0_ring_emit_fence, .emit_pipeline_sync = sdma_v5_0_ring_emit_pipeline_sync, @@ -1694,10 +1706,12 @@ static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev) static void sdma_v5_0_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, - uint32_t byte_count) + uint32_t byte_count, + bool tmz) { ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | - SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | + SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0); ib->ptr[ib->length_dw++] = byte_count - 1; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); |
