radeon_benchmark.c 6.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
/*
 * Copyright 2009 Jerome Glisse.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 * Authors: Jerome Glisse
 */
#include <drm/drmP.h>
#include <drm/radeon_drm.h>
#include "radeon_reg.h"
#include "radeon.h"

29 30 31 32
#define RADEON_BENCHMARK_COPY_BLIT 1
#define RADEON_BENCHMARK_COPY_DMA  0

#define RADEON_BENCHMARK_ITERATIONS 1024
33
#define RADEON_BENCHMARK_COMMON_MODES_N 17
34 35 36 37 38 39 40 41 42 43 44 45

static int radeon_benchmark_do_move(struct radeon_device *rdev, unsigned size,
				    uint64_t saddr, uint64_t daddr,
				    int flag, int n)
{
	unsigned long start_jiffies;
	unsigned long end_jiffies;
	struct radeon_fence *fence = NULL;
	int i, r;

	start_jiffies = jiffies;
	for (i = 0; i < n; i++) {
46
		r = radeon_fence_create(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX);
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
		if (r)
			return r;

		switch (flag) {
		case RADEON_BENCHMARK_COPY_DMA:
			r = radeon_copy_dma(rdev, saddr, daddr,
					    size / RADEON_GPU_PAGE_SIZE,
					    fence);
			break;
		case RADEON_BENCHMARK_COPY_BLIT:
			r = radeon_copy_blit(rdev, saddr, daddr,
					     size / RADEON_GPU_PAGE_SIZE,
					     fence);
			break;
		default:
			DRM_ERROR("Unknown copy method\n");
			r = -EINVAL;
		}
		if (r)
			goto exit_do_move;
		r = radeon_fence_wait(fence, false);
		if (r)
			goto exit_do_move;
		radeon_fence_unref(&fence);
	}
	end_jiffies = jiffies;
	r = jiffies_to_msecs(end_jiffies - start_jiffies);

exit_do_move:
	if (fence)
		radeon_fence_unref(&fence);
	return r;
}


static void radeon_benchmark_log_results(int n, unsigned size,
					 unsigned int time,
					 unsigned sdomain, unsigned ddomain,
					 char *kind)
{
	unsigned int throughput = (n * (size >> 10)) / time;
	DRM_INFO("radeon: %s %u bo moves of %u kB from"
		 " %d to %d in %u ms, throughput: %u Mb/s or %u MB/s\n",
		 kind, n, size >> 10, sdomain, ddomain, time,
		 throughput * 8, throughput);
}

static void radeon_benchmark_move(struct radeon_device *rdev, unsigned size,
				  unsigned sdomain, unsigned ddomain)
96
{
97 98
	struct radeon_bo *dobj = NULL;
	struct radeon_bo *sobj = NULL;
99
	uint64_t saddr, daddr;
100
	int r, n;
101
	int time;
102

103
	n = RADEON_BENCHMARK_ITERATIONS;
104
	r = radeon_bo_create(rdev, size, PAGE_SIZE, true, sdomain, &sobj);
105 106 107
	if (r) {
		goto out_cleanup;
	}
108 109 110 111 112
	r = radeon_bo_reserve(sobj, false);
	if (unlikely(r != 0))
		goto out_cleanup;
	r = radeon_bo_pin(sobj, sdomain, &saddr);
	radeon_bo_unreserve(sobj);
113 114 115
	if (r) {
		goto out_cleanup;
	}
116
	r = radeon_bo_create(rdev, size, PAGE_SIZE, true, ddomain, &dobj);
117 118 119
	if (r) {
		goto out_cleanup;
	}
120 121 122 123 124
	r = radeon_bo_reserve(dobj, false);
	if (unlikely(r != 0))
		goto out_cleanup;
	r = radeon_bo_pin(dobj, ddomain, &daddr);
	radeon_bo_unreserve(dobj);
125 126 127
	if (r) {
		goto out_cleanup;
	}
128 129

	/* r100 doesn't have dma engine so skip the test */
130 131 132
	/* also, VRAM-to-VRAM test doesn't make much sense for DMA */
	/* skip it as well if domains are the same */
	if ((rdev->asic->copy_dma) && (sdomain != ddomain)) {
133 134 135
		time = radeon_benchmark_do_move(rdev, size, saddr, daddr,
						RADEON_BENCHMARK_COPY_DMA, n);
		if (time < 0)
136
			goto out_cleanup;
137 138 139
		if (time > 0)
			radeon_benchmark_log_results(n, size, time,
						     sdomain, ddomain, "dma");
140
	}
141 142 143 144 145 146 147 148 149

	time = radeon_benchmark_do_move(rdev, size, saddr, daddr,
					RADEON_BENCHMARK_COPY_BLIT, n);
	if (time < 0)
		goto out_cleanup;
	if (time > 0)
		radeon_benchmark_log_results(n, size, time,
					     sdomain, ddomain, "blit");

150 151
out_cleanup:
	if (sobj) {
152 153 154 155 156 157
		r = radeon_bo_reserve(sobj, false);
		if (likely(r == 0)) {
			radeon_bo_unpin(sobj);
			radeon_bo_unreserve(sobj);
		}
		radeon_bo_unref(&sobj);
158 159
	}
	if (dobj) {
160 161 162 163 164 165
		r = radeon_bo_reserve(dobj, false);
		if (likely(r == 0)) {
			radeon_bo_unpin(dobj);
			radeon_bo_unreserve(dobj);
		}
		radeon_bo_unref(&dobj);
166
	}
167

168
	if (r) {
169
		DRM_ERROR("Error while benchmarking BO move.\n");
170 171 172
	}
}

173
void radeon_benchmark(struct radeon_device *rdev, int test_number)
174
{
175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
	int i;
	int common_modes[RADEON_BENCHMARK_COMMON_MODES_N] = {
		640 * 480 * 4,
		720 * 480 * 4,
		800 * 600 * 4,
		848 * 480 * 4,
		1024 * 768 * 4,
		1152 * 768 * 4,
		1280 * 720 * 4,
		1280 * 800 * 4,
		1280 * 854 * 4,
		1280 * 960 * 4,
		1280 * 1024 * 4,
		1440 * 900 * 4,
		1400 * 1050 * 4,
		1680 * 1050 * 4,
		1600 * 1200 * 4,
		1920 * 1080 * 4,
		1920 * 1200 * 4
	};

	switch (test_number) {
	case 1:
		/* simple test, VRAM to GTT and GTT to VRAM */
		radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_GTT,
				      RADEON_GEM_DOMAIN_VRAM);
		radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_VRAM,
				      RADEON_GEM_DOMAIN_GTT);
		break;
	case 2:
		/* simple test, VRAM to VRAM */
		radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_VRAM,
				      RADEON_GEM_DOMAIN_VRAM);
		break;
	case 3:
		/* GTT to VRAM, buffer size sweep, powers of 2 */
		for (i = 1; i <= 65536; i <<= 1)
			radeon_benchmark_move(rdev, i*1024,
					      RADEON_GEM_DOMAIN_GTT,
					      RADEON_GEM_DOMAIN_VRAM);
		break;
	case 4:
		/* VRAM to GTT, buffer size sweep, powers of 2 */
		for (i = 1; i <= 65536; i <<= 1)
			radeon_benchmark_move(rdev, i*1024,
					      RADEON_GEM_DOMAIN_VRAM,
					      RADEON_GEM_DOMAIN_GTT);
		break;
	case 5:
		/* VRAM to VRAM, buffer size sweep, powers of 2 */
		for (i = 1; i <= 65536; i <<= 1)
			radeon_benchmark_move(rdev, i*1024,
					      RADEON_GEM_DOMAIN_VRAM,
					      RADEON_GEM_DOMAIN_VRAM);
		break;
	case 6:
		/* GTT to VRAM, buffer size sweep, common modes */
232
		for (i = 0; i < RADEON_BENCHMARK_COMMON_MODES_N; i++)
233 234 235 236 237 238
			radeon_benchmark_move(rdev, common_modes[i],
					      RADEON_GEM_DOMAIN_GTT,
					      RADEON_GEM_DOMAIN_VRAM);
		break;
	case 7:
		/* VRAM to GTT, buffer size sweep, common modes */
239
		for (i = 0; i < RADEON_BENCHMARK_COMMON_MODES_N; i++)
240 241 242 243 244 245
			radeon_benchmark_move(rdev, common_modes[i],
					      RADEON_GEM_DOMAIN_VRAM,
					      RADEON_GEM_DOMAIN_GTT);
		break;
	case 8:
		/* VRAM to VRAM, buffer size sweep, common modes */
246
		for (i = 0; i < RADEON_BENCHMARK_COMMON_MODES_N; i++)
247 248 249 250 251 252 253 254
			radeon_benchmark_move(rdev, common_modes[i],
					      RADEON_GEM_DOMAIN_VRAM,
					      RADEON_GEM_DOMAIN_VRAM);
		break;

	default:
		DRM_ERROR("Unknown benchmark\n");
	}
255
}