提交 f29224a6 编写于 作者: C Christian König 提交者: Alex Deucher

drm/amdgpu: fix amdgpu_fill_buffer (v2)

The mem start is relative to the domain in the address space, so this
worked only when VRAM was mapped at offset 0.

It also didn't handled multiple drm_mm_nodes for split VRAM.

v2: rebase on dma_fence renaming
Signed-off-by: NChristian König <christian.koenig@amd.com>
Reviewed-by: NAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: NAlex Deucher <alexander.deucher@amd.com>
上级 3b1c9036
...@@ -1382,28 +1382,40 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, ...@@ -1382,28 +1382,40 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
} }
int amdgpu_fill_buffer(struct amdgpu_bo *bo, int amdgpu_fill_buffer(struct amdgpu_bo *bo,
uint32_t src_data, uint32_t src_data,
struct reservation_object *resv, struct reservation_object *resv,
struct dma_fence **fence) struct dma_fence **fence)
{ {
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct amdgpu_job *job; uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
uint32_t max_bytes, byte_count; struct drm_mm_node *mm_node;
uint64_t dst_offset; unsigned long num_pages;
unsigned int num_loops, num_dw; unsigned int num_loops, num_dw;
unsigned int i;
struct amdgpu_job *job;
int r; int r;
byte_count = bo->tbo.num_pages << PAGE_SHIFT; if (!ring->ready) {
max_bytes = adev->mman.buffer_funcs->fill_max_bytes; DRM_ERROR("Trying to clear memory with ring turned off.\n");
num_loops = DIV_ROUND_UP(byte_count, max_bytes); return -EINVAL;
}
num_pages = bo->tbo.num_pages;
mm_node = bo->tbo.mem.mm_node;
num_loops = 0;
while (num_pages) {
uint32_t byte_count = mm_node->size << PAGE_SHIFT;
num_loops += DIV_ROUND_UP(byte_count, max_bytes);
num_pages -= mm_node->size;
++mm_node;
}
num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw; num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw;
/* for IB padding */ /* for IB padding */
while (num_dw & 0x7) num_dw += 64;
num_dw++;
r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job); r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job);
if (r) if (r)
...@@ -1411,28 +1423,43 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, ...@@ -1411,28 +1423,43 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
if (resv) { if (resv) {
r = amdgpu_sync_resv(adev, &job->sync, resv, r = amdgpu_sync_resv(adev, &job->sync, resv,
AMDGPU_FENCE_OWNER_UNDEFINED); AMDGPU_FENCE_OWNER_UNDEFINED);
if (r) { if (r) {
DRM_ERROR("sync failed (%d).\n", r); DRM_ERROR("sync failed (%d).\n", r);
goto error_free; goto error_free;
} }
} }
dst_offset = bo->tbo.mem.start << PAGE_SHIFT; num_pages = bo->tbo.num_pages;
for (i = 0; i < num_loops; i++) { mm_node = bo->tbo.mem.mm_node;
uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data, while (num_pages) {
dst_offset, cur_size_in_bytes); uint32_t byte_count = mm_node->size << PAGE_SHIFT;
uint64_t dst_addr;
dst_offset += cur_size_in_bytes; r = amdgpu_mm_node_addr(&bo->tbo, mm_node,
byte_count -= cur_size_in_bytes; &bo->tbo.mem, &dst_addr);
if (r)
return r;
while (byte_count) {
uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data,
dst_addr, cur_size_in_bytes);
dst_addr += cur_size_in_bytes;
byte_count -= cur_size_in_bytes;
}
num_pages -= mm_node->size;
++mm_node;
} }
amdgpu_ring_pad_ib(ring, &job->ibs[0]); amdgpu_ring_pad_ib(ring, &job->ibs[0]);
WARN_ON(job->ibs[0].length_dw > num_dw); WARN_ON(job->ibs[0].length_dw > num_dw);
r = amdgpu_job_submit(job, ring, &adev->mman.entity, r = amdgpu_job_submit(job, ring, &adev->mman.entity,
AMDGPU_FENCE_OWNER_UNDEFINED, fence); AMDGPU_FENCE_OWNER_UNDEFINED, fence);
if (r) if (r)
goto error_free; goto error_free;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册