amdgpu_ih.c 8.0 KB
Newer Older
A
Alex Deucher 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
/*
 * Copyright 2014 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

24 25
#include <linux/dma-mapping.h>

A
Alex Deucher 已提交
26 27 28 29 30 31 32
#include "amdgpu.h"
#include "amdgpu_ih.h"

/**
 * amdgpu_ih_ring_init - initialize the IH state
 *
 * @adev: amdgpu_device pointer
33 34 35
 * @ih: ih ring to initialize
 * @ring_size: ring size to allocate
 * @use_bus_addr: true when we can use dma_alloc_coherent
A
Alex Deucher 已提交
36 37 38 39 40
 *
 * Initializes the IH state and allocates a buffer
 * for the IH ring buffer.
 * Returns 0 for success, errors for failure.
 */
41 42
int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
			unsigned ring_size, bool use_bus_addr)
A
Alex Deucher 已提交
43 44 45 46 47 48 49
{
	u32 rb_bufsz;
	int r;

	/* Align ring size */
	rb_bufsz = order_base_2(ring_size / 4);
	ring_size = (1 << rb_bufsz) * 4;
50 51 52 53 54 55
	ih->ring_size = ring_size;
	ih->ptr_mask = ih->ring_size - 1;
	ih->rptr = 0;
	ih->use_bus_addr = use_bus_addr;

	if (use_bus_addr) {
56 57
		dma_addr_t dma_addr;

58 59 60 61 62 63 64
		if (ih->ring)
			return 0;

		/* add 8 bytes for the rptr/wptr shadows and
		 * add them to the end of the ring allocation.
		 */
		ih->ring = dma_alloc_coherent(adev->dev, ih->ring_size + 8,
65
					      &dma_addr, GFP_KERNEL);
66 67 68
		if (ih->ring == NULL)
			return -ENOMEM;

69 70 71 72 73
		ih->gpu_addr = dma_addr;
		ih->wptr_addr = dma_addr + ih->ring_size;
		ih->wptr_cpu = &ih->ring[ih->ring_size / 4];
		ih->rptr_addr = dma_addr + ih->ring_size + 4;
		ih->rptr_cpu = &ih->ring[(ih->ring_size / 4) + 1];
A
Alex Deucher 已提交
74
	} else {
75 76 77
		unsigned wptr_offs, rptr_offs;

		r = amdgpu_device_wb_get(adev, &wptr_offs);
78 79 80
		if (r)
			return r;

81
		r = amdgpu_device_wb_get(adev, &rptr_offs);
A
Alex Deucher 已提交
82
		if (r) {
83
			amdgpu_device_wb_free(adev, wptr_offs);
A
Alex Deucher 已提交
84 85 86
			return r;
		}

87 88 89 90
		r = amdgpu_bo_create_kernel(adev, ih->ring_size, PAGE_SIZE,
					    AMDGPU_GEM_DOMAIN_GTT,
					    &ih->ring_obj, &ih->gpu_addr,
					    (void **)&ih->ring);
A
Alex Deucher 已提交
91
		if (r) {
92 93
			amdgpu_device_wb_free(adev, rptr_offs);
			amdgpu_device_wb_free(adev, wptr_offs);
A
Alex Deucher 已提交
94 95
			return r;
		}
96 97 98 99 100

		ih->wptr_addr = adev->wb.gpu_addr + wptr_offs * 4;
		ih->wptr_cpu = &adev->wb.wb[wptr_offs];
		ih->rptr_addr = adev->wb.gpu_addr + rptr_offs * 4;
		ih->rptr_cpu = &adev->wb.wb[rptr_offs];
A
Alex Deucher 已提交
101
	}
102 103

	init_waitqueue_head(&ih->wait_process);
104
	return 0;
A
Alex Deucher 已提交
105 106 107 108 109 110
}

/**
 * amdgpu_ih_ring_fini - tear down the IH state
 *
 * @adev: amdgpu_device pointer
111
 * @ih: ih ring to tear down
A
Alex Deucher 已提交
112 113 114 115
 *
 * Tears down the IH state and frees buffer
 * used for the IH ring buffer.
 */
116
void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
A
Alex Deucher 已提交
117
{
118 119 120 121

	if (!ih->ring)
		return;

122 123 124 125 126 127
	if (ih->use_bus_addr) {

		/* add 8 bytes for the rptr/wptr shadows and
		 * add them to the end of the ring allocation.
		 */
		dma_free_coherent(adev->dev, ih->ring_size + 8,
128
				  (void *)ih->ring, ih->gpu_addr);
129
		ih->ring = NULL;
A
Alex Deucher 已提交
130
	} else {
131 132
		amdgpu_bo_free_kernel(&ih->ring_obj, &ih->gpu_addr,
				      (void **)&ih->ring);
133 134
		amdgpu_device_wb_free(adev, (ih->wptr_addr - ih->gpu_addr) / 4);
		amdgpu_device_wb_free(adev, (ih->rptr_addr - ih->gpu_addr) / 4);
A
Alex Deucher 已提交
135 136 137
	}
}

138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
/**
 * amdgpu_ih_ring_write - write IV to the ring buffer
 *
 * @ih: ih ring to write to
 * @iv: the iv to write
 * @num_dw: size of the iv in dw
 *
 * Writes an IV to the ring buffer using the CPU and increment the wptr.
 * Used for testing and delegating IVs to a software ring.
 */
void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
			  unsigned int num_dw)
{
	uint32_t wptr = le32_to_cpu(*ih->wptr_cpu) >> 2;
	unsigned int i;

	for (i = 0; i < num_dw; ++i)
	        ih->ring[wptr++] = cpu_to_le32(iv[i]);

	wptr <<= 2;
	wptr &= ih->ptr_mask;

	/* Only commit the new wptr if we don't overflow */
	if (wptr != READ_ONCE(ih->rptr)) {
		wmb();
		WRITE_ONCE(*ih->wptr_cpu, cpu_to_le32(wptr));
	}
}

167
/**
P
Philip Yang 已提交
168
 * amdgpu_ih_wait_on_checkpoint_process_ts - wait to process IVs up to checkpoint
169 170 171 172 173 174
 *
 * @adev: amdgpu_device pointer
 * @ih: ih ring to process
 *
 * Used to ensure ring has processed IVs up to the checkpoint write pointer.
 */
P
Philip Yang 已提交
175
int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev,
176 177
					struct amdgpu_ih_ring *ih)
{
P
Philip Yang 已提交
178 179 180
	uint32_t checkpoint_wptr;
	uint64_t checkpoint_ts;
	long timeout = HZ;
181 182 183 184 185

	if (!ih->enabled || adev->shutdown)
		return -ENODEV;

	checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih);
P
Philip Yang 已提交
186
	/* Order wptr with ring data. */
187
	rmb();
P
Philip Yang 已提交
188
	checkpoint_ts = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1);
189

P
Philip Yang 已提交
190
	return wait_event_interruptible_timeout(ih->wait_process,
191 192
		    amdgpu_ih_ts_after(checkpoint_ts, ih->processed_timestamp) ||
		    ih->rptr == amdgpu_ih_get_wptr(adev, ih), timeout);
193 194
}

A
Alex Deucher 已提交
195 196 197 198
/**
 * amdgpu_ih_process - interrupt handler
 *
 * @adev: amdgpu_device pointer
199
 * @ih: ih ring to process
A
Alex Deucher 已提交
200 201 202 203
 *
 * Interrupt hander (VI), walk the IH ring.
 * Returns irq process return code.
 */
204
int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
A
Alex Deucher 已提交
205
{
206
	unsigned int count;
A
Alex Deucher 已提交
207 208
	u32 wptr;

209
	if (!ih->enabled || adev->shutdown)
A
Alex Deucher 已提交
210 211
		return IRQ_NONE;

212
	wptr = amdgpu_ih_get_wptr(adev, ih);
A
Alex Deucher 已提交
213 214

restart_ih:
215
	count  = AMDGPU_IH_MAX_NUM_IVS;
216
	DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr);
A
Alex Deucher 已提交
217 218 219 220

	/* Order reading of wptr vs. reading of IH ring data */
	rmb();

221
	while (ih->rptr != wptr && --count) {
222
		amdgpu_irq_dispatch(adev, ih);
223
		ih->rptr &= ih->ptr_mask;
A
Alex Deucher 已提交
224
	}
225

226
	amdgpu_ih_set_rptr(adev, ih);
227
	wake_up_all(&ih->wait_process);
A
Alex Deucher 已提交
228 229

	/* make sure wptr hasn't changed while processing */
230
	wptr = amdgpu_ih_get_wptr(adev, ih);
231
	if (wptr != ih->rptr)
A
Alex Deucher 已提交
232 233 234 235
		goto restart_ih;

	return IRQ_HANDLED;
}
236

237 238 239 240
/**
 * amdgpu_ih_decode_iv_helper - decode an interrupt vector
 *
 * @adev: amdgpu_device pointer
241 242
 * @ih: ih ring to process
 * @entry: IV entry
243 244
 *
 * Decodes the interrupt vector at the current rptr
245
 * position and also advance the position for Vega10
246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
 * and later GPUs.
 */
void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
				struct amdgpu_ih_ring *ih,
				struct amdgpu_iv_entry *entry)
{
	/* wptr/rptr are in bytes! */
	u32 ring_index = ih->rptr >> 2;
	uint32_t dw[8];

	dw[0] = le32_to_cpu(ih->ring[ring_index + 0]);
	dw[1] = le32_to_cpu(ih->ring[ring_index + 1]);
	dw[2] = le32_to_cpu(ih->ring[ring_index + 2]);
	dw[3] = le32_to_cpu(ih->ring[ring_index + 3]);
	dw[4] = le32_to_cpu(ih->ring[ring_index + 4]);
	dw[5] = le32_to_cpu(ih->ring[ring_index + 5]);
	dw[6] = le32_to_cpu(ih->ring[ring_index + 6]);
	dw[7] = le32_to_cpu(ih->ring[ring_index + 7]);

	entry->client_id = dw[0] & 0xff;
	entry->src_id = (dw[0] >> 8) & 0xff;
	entry->ring_id = (dw[0] >> 16) & 0xff;
	entry->vmid = (dw[0] >> 24) & 0xf;
	entry->vmid_src = (dw[0] >> 31);
	entry->timestamp = dw[1] | ((u64)(dw[2] & 0xffff) << 32);
	entry->timestamp_src = dw[2] >> 31;
	entry->pasid = dw[3] & 0xffff;
	entry->pasid_src = dw[3] >> 31;
	entry->src_data[0] = dw[4];
	entry->src_data[1] = dw[5];
	entry->src_data[2] = dw[6];
	entry->src_data[3] = dw[7];

	/* wptr/rptr are in bytes! */
	ih->rptr += 32;
}
P
Philip Yang 已提交
282 283 284 285 286 287 288 289 290 291 292 293 294 295 296

uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr,
				       signed int offset)
{
	uint32_t iv_size = 32;
	uint32_t ring_index;
	uint32_t dw1, dw2;

	rptr += iv_size * offset;
	ring_index = (rptr & ih->ptr_mask) >> 2;

	dw1 = le32_to_cpu(ih->ring[ring_index + 1]);
	dw2 = le32_to_cpu(ih->ring[ring_index + 2]);
	return dw1 | ((u64)(dw2 & 0xffff) << 32);
}