si_dma.c 8.5 KB
Newer Older
C
Christian König 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
/*
 * Copyright 2013 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 * Authors: Alex Deucher
 */
#include <drm/drmP.h>
#include "radeon.h"
#include "radeon_asic.h"
27
#include "radeon_trace.h"
C
Christian König 已提交
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
#include "sid.h"

u32 si_gpu_check_soft_reset(struct radeon_device *rdev);

/**
 * si_dma_is_lockup - Check if the DMA engine is locked up
 *
 * @rdev: radeon_device pointer
 * @ring: radeon_ring structure holding ring information
 *
 * Check if the async DMA engine is locked up.
 * Returns true if the engine appears to be locked up, false if not.
 */
bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
{
	u32 reset_mask = si_gpu_check_soft_reset(rdev);
	u32 mask;

	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
		mask = RADEON_RESET_DMA;
	else
		mask = RADEON_RESET_DMA1;

	if (!(reset_mask & mask)) {
52
		radeon_ring_lockup_update(rdev, ring);
C
Christian König 已提交
53 54 55 56 57 58
		return false;
	}
	return radeon_ring_test_lockup(rdev, ring);
}

/**
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
 * si_dma_vm_copy_pages - update PTEs by copying them from the GART
 *
 * @rdev: radeon_device pointer
 * @ib: indirect buffer to fill with commands
 * @pe: addr of the page entry
 * @src: src addr where to copy from
 * @count: number of page entries to update
 *
 * Update PTEs by copying them from the GART using the DMA (SI).
 */
void si_dma_vm_copy_pages(struct radeon_device *rdev,
			  struct radeon_ib *ib,
			  uint64_t pe, uint64_t src,
			  unsigned count)
{
	while (count) {
		unsigned bytes = count * 8;
		if (bytes > 0xFFFF8)
			bytes = 0xFFFF8;

		ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY,
						      1, 0, 0, bytes);
		ib->ptr[ib->length_dw++] = lower_32_bits(pe);
		ib->ptr[ib->length_dw++] = lower_32_bits(src);
		ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
		ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff;

		pe += bytes;
		src += bytes;
		count -= bytes / 8;
	}
}

/**
 * si_dma_vm_write_pages - update PTEs by writing them manually
C
Christian König 已提交
94 95 96 97 98 99 100 101 102
 *
 * @rdev: radeon_device pointer
 * @ib: indirect buffer to fill with commands
 * @pe: addr of the page entry
 * @addr: dst addr to write into pe
 * @count: number of page entries to update
 * @incr: increase next addr by incr bytes
 * @flags: access flags
 *
103
 * Update PTEs by writing them manually using the DMA (SI).
C
Christian König 已提交
104
 */
105 106 107 108 109
void si_dma_vm_write_pages(struct radeon_device *rdev,
			   struct radeon_ib *ib,
			   uint64_t pe,
			   uint64_t addr, unsigned count,
			   uint32_t incr, uint32_t flags)
C
Christian König 已提交
110 111 112 113
{
	uint64_t value;
	unsigned ndw;

114 115 116 117 118 119 120 121 122 123 124
	while (count) {
		ndw = count * 2;
		if (ndw > 0xFFFFE)
			ndw = 0xFFFFE;

		/* for non-physically contiguous pages (system) */
		ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
		ib->ptr[ib->length_dw++] = pe;
		ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
		for (; ndw > 0; ndw -= 2, --count, pe += 8) {
			if (flags & R600_PTE_SYSTEM) {
125
				value = radeon_vm_map_gart(rdev, addr);
126
			} else if (flags & R600_PTE_VALID) {
C
Christian König 已提交
127
				value = addr;
128
			} else {
C
Christian König 已提交
129
				value = 0;
130 131 132 133
			}
			addr += incr;
			value |= flags;
			ib->ptr[ib->length_dw++] = value;
C
Christian König 已提交
134 135 136
			ib->ptr[ib->length_dw++] = upper_32_bits(value);
		}
	}
137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184
}

/**
 * si_dma_vm_set_pages - update the page tables using the DMA
 *
 * @rdev: radeon_device pointer
 * @ib: indirect buffer to fill with commands
 * @pe: addr of the page entry
 * @addr: dst addr to write into pe
 * @count: number of page entries to update
 * @incr: increase next addr by incr bytes
 * @flags: access flags
 *
 * Update the page tables using the DMA (SI).
 */
void si_dma_vm_set_pages(struct radeon_device *rdev,
			 struct radeon_ib *ib,
			 uint64_t pe,
			 uint64_t addr, unsigned count,
			 uint32_t incr, uint32_t flags)
{
	uint64_t value;
	unsigned ndw;

	while (count) {
		ndw = count * 2;
		if (ndw > 0xFFFFE)
			ndw = 0xFFFFE;

		if (flags & R600_PTE_VALID)
			value = addr;
		else
			value = 0;

		/* for physically contiguous pages (vram) */
		ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
		ib->ptr[ib->length_dw++] = pe; /* dst addr */
		ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
		ib->ptr[ib->length_dw++] = flags; /* mask */
		ib->ptr[ib->length_dw++] = 0;
		ib->ptr[ib->length_dw++] = value; /* value */
		ib->ptr[ib->length_dw++] = upper_32_bits(value);
		ib->ptr[ib->length_dw++] = incr; /* increment size */
		ib->ptr[ib->length_dw++] = 0;
		pe += ndw * 4;
		addr += (ndw / 2) * incr;
		count -= ndw / 2;
	}
C
Christian König 已提交
185 186
}

187 188
void si_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
		     unsigned vm_id, uint64_t pd_addr)
C
Christian König 已提交
189

190
{
C
Christian König 已提交
191
	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
192 193
	if (vm_id < 8) {
		radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2));
C
Christian König 已提交
194
	} else {
195
		radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2));
C
Christian König 已提交
196
	}
197
	radeon_ring_write(ring, pd_addr >> 12);
C
Christian König 已提交
198 199 200 201 202 203 204 205 206

	/* flush hdp cache */
	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
	radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
	radeon_ring_write(ring, 1);

	/* bits 0-7 are the VM contexts0-7 */
	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
	radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
207
	radeon_ring_write(ring, 1 << vm_id);
208 209 210 211 212 213 214 215

	/* wait for invalidate to complete */
	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0));
	radeon_ring_write(ring, VM_INVALIDATE_REQUEST);
	radeon_ring_write(ring, 0xff << 16); /* retry */
	radeon_ring_write(ring, 1 << vm_id); /* mask */
	radeon_ring_write(ring, 0); /* value */
	radeon_ring_write(ring, (0 << 28) | 0x20); /* func(always) | poll interval */
C
Christian König 已提交
216 217 218 219 220 221 222 223 224
}

/**
 * si_copy_dma - copy pages using the DMA engine
 *
 * @rdev: radeon_device pointer
 * @src_offset: src GPU address
 * @dst_offset: dst GPU address
 * @num_gpu_pages: number of GPU pages to xfer
225
 * @resv: reservation object to sync to
C
Christian König 已提交
226 227 228 229 230
 *
 * Copy GPU paging using the DMA engine (SI).
 * Used by the radeon ttm implementation to move pages if
 * registered as the asic copy callback.
 */
231 232 233 234
struct radeon_fence *si_copy_dma(struct radeon_device *rdev,
				 uint64_t src_offset, uint64_t dst_offset,
				 unsigned num_gpu_pages,
				 struct reservation_object *resv)
C
Christian König 已提交
235
{
236
	struct radeon_fence *fence;
237
	struct radeon_sync sync;
C
Christian König 已提交
238 239 240 241 242 243
	int ring_index = rdev->asic->copy.dma_ring_index;
	struct radeon_ring *ring = &rdev->ring[ring_index];
	u32 size_in_bytes, cur_size_in_bytes;
	int i, num_loops;
	int r = 0;

244
	radeon_sync_create(&sync);
C
Christian König 已提交
245 246 247 248 249 250

	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
	num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
	r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
	if (r) {
		DRM_ERROR("radeon: moving bo (%d).\n", r);
251
		radeon_sync_free(rdev, &sync, NULL);
252
		return ERR_PTR(r);
C
Christian König 已提交
253 254
	}

255 256
	radeon_sync_resv(rdev, &sync, resv, false);
	radeon_sync_rings(rdev, &sync, ring->idx);
C
Christian König 已提交
257 258 259 260 261 262 263

	for (i = 0; i < num_loops; i++) {
		cur_size_in_bytes = size_in_bytes;
		if (cur_size_in_bytes > 0xFFFFF)
			cur_size_in_bytes = 0xFFFFF;
		size_in_bytes -= cur_size_in_bytes;
		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
264 265
		radeon_ring_write(ring, lower_32_bits(dst_offset));
		radeon_ring_write(ring, lower_32_bits(src_offset));
C
Christian König 已提交
266 267 268 269 270 271
		radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
		radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
		src_offset += cur_size_in_bytes;
		dst_offset += cur_size_in_bytes;
	}

272
	r = radeon_fence_emit(rdev, &fence, ring->idx);
C
Christian König 已提交
273 274
	if (r) {
		radeon_ring_unlock_undo(rdev, ring);
275
		radeon_sync_free(rdev, &sync, NULL);
276
		return ERR_PTR(r);
C
Christian König 已提交
277 278
	}

279
	radeon_ring_unlock_commit(rdev, ring, false);
280
	radeon_sync_free(rdev, &sync, fence);
C
Christian König 已提交
281

282
	return fence;
C
Christian König 已提交
283 284
}