vc4_plane.c 41.6 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/*
 * Copyright (C) 2015 Broadcom
 */

/**
 * DOC: VC4 plane module
 *
 * Each DRM plane is a layer of pixels being scanned out by the HVS.
 *
 * At atomic modeset check time, we compute the HVS display element
 * state that would be necessary for displaying the plane (giving us a
 * chance to figure out if a plane configuration is invalid), then at
 * atomic flush time the CRTC will ask us to write our element state
 * into the region of the HVS that it has allocated for us.
 */

18 19
#include <drm/drm_atomic.h>
#include <drm/drm_atomic_helper.h>
D
Daniel Vetter 已提交
20
#include <drm/drm_atomic_uapi.h>
S
Sam Ravnborg 已提交
21 22
#include <drm/drm_fb_cma_helper.h>
#include <drm/drm_fourcc.h>
23
#include <drm/drm_gem_framebuffer_helper.h>
S
Sam Ravnborg 已提交
24
#include <drm/drm_plane_helper.h>
25

26
#include "uapi/drm/vc4_drm.h"
S
Sam Ravnborg 已提交
27

28 29 30 31 32 33 34
#include "vc4_drv.h"
#include "vc4_regs.h"

static const struct hvs_format {
	u32 drm; /* DRM_FORMAT_* */
	u32 hvs; /* HVS_FORMAT_* */
	u32 pixel_order;
35
	u32 pixel_order_hvs5;
36 37
} hvs_formats[] = {
	{
38 39
		.drm = DRM_FORMAT_XRGB8888,
		.hvs = HVS_PIXEL_FORMAT_RGBA8888,
40
		.pixel_order = HVS_PIXEL_ORDER_ABGR,
41
		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
42 43
	},
	{
44 45
		.drm = DRM_FORMAT_ARGB8888,
		.hvs = HVS_PIXEL_FORMAT_RGBA8888,
46
		.pixel_order = HVS_PIXEL_ORDER_ABGR,
47
		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
48
	},
49
	{
50 51
		.drm = DRM_FORMAT_ABGR8888,
		.hvs = HVS_PIXEL_FORMAT_RGBA8888,
52
		.pixel_order = HVS_PIXEL_ORDER_ARGB,
53
		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
54 55
	},
	{
56 57
		.drm = DRM_FORMAT_XBGR8888,
		.hvs = HVS_PIXEL_FORMAT_RGBA8888,
58
		.pixel_order = HVS_PIXEL_ORDER_ARGB,
59
		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
60
	},
61
	{
62 63
		.drm = DRM_FORMAT_RGB565,
		.hvs = HVS_PIXEL_FORMAT_RGB565,
64
		.pixel_order = HVS_PIXEL_ORDER_XRGB,
65 66
	},
	{
67 68
		.drm = DRM_FORMAT_BGR565,
		.hvs = HVS_PIXEL_FORMAT_RGB565,
69
		.pixel_order = HVS_PIXEL_ORDER_XBGR,
70 71
	},
	{
72 73
		.drm = DRM_FORMAT_ARGB1555,
		.hvs = HVS_PIXEL_FORMAT_RGBA5551,
74
		.pixel_order = HVS_PIXEL_ORDER_ABGR,
75 76
	},
	{
77 78
		.drm = DRM_FORMAT_XRGB1555,
		.hvs = HVS_PIXEL_FORMAT_RGBA5551,
79
		.pixel_order = HVS_PIXEL_ORDER_ABGR,
80
	},
81
	{
82 83
		.drm = DRM_FORMAT_RGB888,
		.hvs = HVS_PIXEL_FORMAT_RGB888,
84
		.pixel_order = HVS_PIXEL_ORDER_XRGB,
85 86
	},
	{
87 88
		.drm = DRM_FORMAT_BGR888,
		.hvs = HVS_PIXEL_FORMAT_RGB888,
89
		.pixel_order = HVS_PIXEL_ORDER_XBGR,
90
	},
91 92 93
	{
		.drm = DRM_FORMAT_YUV422,
		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
94
		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
95 96 97 98
	},
	{
		.drm = DRM_FORMAT_YVU422,
		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
99
		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
100 101 102 103
	},
	{
		.drm = DRM_FORMAT_YUV420,
		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
104
		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
105 106 107 108
	},
	{
		.drm = DRM_FORMAT_YVU420,
		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
109
		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
110 111 112 113
	},
	{
		.drm = DRM_FORMAT_NV12,
		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE,
114
		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
115
	},
116 117 118 119 120
	{
		.drm = DRM_FORMAT_NV21,
		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE,
		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
	},
121 122 123
	{
		.drm = DRM_FORMAT_NV16,
		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE,
124
		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
125
	},
126 127 128 129 130
	{
		.drm = DRM_FORMAT_NV61,
		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE,
		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
	},
131 132 133 134 135 136 137 138 139 140 141 142 143 144
};

static const struct hvs_format *vc4_get_hvs_format(u32 drm_format)
{
	unsigned i;

	for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) {
		if (hvs_formats[i].drm == drm_format)
			return &hvs_formats[i];
	}

	return NULL;
}

145 146
static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst)
{
147 148 149
	if (dst == src)
		return VC4_SCALING_NONE;
	if (3 * dst >= 2 * src)
150 151
		return VC4_SCALING_PPF;
	else
152
		return VC4_SCALING_TPZ;
153 154
}

155 156
static bool plane_enabled(struct drm_plane_state *state)
{
157
	return state->fb && !WARN_ON(!state->crtc);
158 159
}

160
static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane)
161 162 163 164 165 166 167 168 169 170
{
	struct vc4_plane_state *vc4_state;

	if (WARN_ON(!plane->state))
		return NULL;

	vc4_state = kmemdup(plane->state, sizeof(*vc4_state), GFP_KERNEL);
	if (!vc4_state)
		return NULL;

171
	memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm));
172
	vc4_state->dlist_initialized = 0;
173

174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
	__drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base);

	if (vc4_state->dlist) {
		vc4_state->dlist = kmemdup(vc4_state->dlist,
					   vc4_state->dlist_count * 4,
					   GFP_KERNEL);
		if (!vc4_state->dlist) {
			kfree(vc4_state);
			return NULL;
		}
		vc4_state->dlist_size = vc4_state->dlist_count;
	}

	return &vc4_state->base;
}

190 191
static void vc4_plane_destroy_state(struct drm_plane *plane,
				    struct drm_plane_state *state)
192
{
193
	struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
194 195
	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);

196
	if (drm_mm_node_allocated(&vc4_state->lbm)) {
197 198 199 200 201 202 203
		unsigned long irqflags;

		spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
		drm_mm_remove_node(&vc4_state->lbm);
		spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
	}

204
	kfree(vc4_state->dlist);
205
	__drm_atomic_helper_plane_destroy_state(&vc4_state->base);
206 207 208 209
	kfree(state);
}

/* Called during init to allocate the plane's atomic state. */
210
static void vc4_plane_reset(struct drm_plane *plane)
211 212 213 214 215 216 217 218 219
{
	struct vc4_plane_state *vc4_state;

	WARN_ON(plane->state);

	vc4_state = kzalloc(sizeof(*vc4_state), GFP_KERNEL);
	if (!vc4_state)
		return;

220
	__drm_atomic_helper_plane_reset(plane, &vc4_state->base);
221 222 223 224 225 226
}

static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val)
{
	if (vc4_state->dlist_count == vc4_state->dlist_size) {
		u32 new_size = max(4u, vc4_state->dlist_count * 2);
227
		u32 *new_dlist = kmalloc_array(new_size, 4, GFP_KERNEL);
228 229 230 231 232 233 234 235 236 237 238 239 240

		if (!new_dlist)
			return;
		memcpy(new_dlist, vc4_state->dlist, vc4_state->dlist_count * 4);

		kfree(vc4_state->dlist);
		vc4_state->dlist = new_dlist;
		vc4_state->dlist_size = new_size;
	}

	vc4_state->dlist[vc4_state->dlist_count++] = val;
}

241 242 243 244 245
/* Returns the scl0/scl1 field based on whether the dimensions need to
 * be up/down/non-scaled.
 *
 * This is a replication of a table from the spec.
 */
246
static u32 vc4_get_scl_field(struct drm_plane_state *state, int plane)
247 248 249
{
	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);

250
	switch (vc4_state->x_scaling[plane] << 2 | vc4_state->y_scaling[plane]) {
251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
	case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF:
		return SCALER_CTL0_SCL_H_PPF_V_PPF;
	case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF:
		return SCALER_CTL0_SCL_H_TPZ_V_PPF;
	case VC4_SCALING_PPF << 2 | VC4_SCALING_TPZ:
		return SCALER_CTL0_SCL_H_PPF_V_TPZ;
	case VC4_SCALING_TPZ << 2 | VC4_SCALING_TPZ:
		return SCALER_CTL0_SCL_H_TPZ_V_TPZ;
	case VC4_SCALING_PPF << 2 | VC4_SCALING_NONE:
		return SCALER_CTL0_SCL_H_PPF_V_NONE;
	case VC4_SCALING_NONE << 2 | VC4_SCALING_PPF:
		return SCALER_CTL0_SCL_H_NONE_V_PPF;
	case VC4_SCALING_NONE << 2 | VC4_SCALING_TPZ:
		return SCALER_CTL0_SCL_H_NONE_V_TPZ;
	case VC4_SCALING_TPZ << 2 | VC4_SCALING_NONE:
		return SCALER_CTL0_SCL_H_TPZ_V_NONE;
	default:
	case VC4_SCALING_NONE << 2 | VC4_SCALING_NONE:
		/* The unity case is independently handled by
		 * SCALER_CTL0_UNITY.
		 */
		return 0;
	}
}

276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321
static int vc4_plane_margins_adj(struct drm_plane_state *pstate)
{
	struct vc4_plane_state *vc4_pstate = to_vc4_plane_state(pstate);
	unsigned int left, right, top, bottom, adjhdisplay, adjvdisplay;
	struct drm_crtc_state *crtc_state;

	crtc_state = drm_atomic_get_new_crtc_state(pstate->state,
						   pstate->crtc);

	vc4_crtc_get_margins(crtc_state, &left, &right, &top, &bottom);
	if (!left && !right && !top && !bottom)
		return 0;

	if (left + right >= crtc_state->mode.hdisplay ||
	    top + bottom >= crtc_state->mode.vdisplay)
		return -EINVAL;

	adjhdisplay = crtc_state->mode.hdisplay - (left + right);
	vc4_pstate->crtc_x = DIV_ROUND_CLOSEST(vc4_pstate->crtc_x *
					       adjhdisplay,
					       crtc_state->mode.hdisplay);
	vc4_pstate->crtc_x += left;
	if (vc4_pstate->crtc_x > crtc_state->mode.hdisplay - left)
		vc4_pstate->crtc_x = crtc_state->mode.hdisplay - left;

	adjvdisplay = crtc_state->mode.vdisplay - (top + bottom);
	vc4_pstate->crtc_y = DIV_ROUND_CLOSEST(vc4_pstate->crtc_y *
					       adjvdisplay,
					       crtc_state->mode.vdisplay);
	vc4_pstate->crtc_y += top;
	if (vc4_pstate->crtc_y > crtc_state->mode.vdisplay - top)
		vc4_pstate->crtc_y = crtc_state->mode.vdisplay - top;

	vc4_pstate->crtc_w = DIV_ROUND_CLOSEST(vc4_pstate->crtc_w *
					       adjhdisplay,
					       crtc_state->mode.hdisplay);
	vc4_pstate->crtc_h = DIV_ROUND_CLOSEST(vc4_pstate->crtc_h *
					       adjvdisplay,
					       crtc_state->mode.vdisplay);

	if (!vc4_pstate->crtc_w || !vc4_pstate->crtc_h)
		return -EINVAL;

	return 0;
}

322
static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
323 324 325
{
	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
	struct drm_framebuffer *fb = state->fb;
326
	struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
327
	u32 subpixel_src_mask = (1 << 16) - 1;
328
	int num_planes = fb->format->num_planes;
329
	struct drm_crtc_state *crtc_state;
330 331
	u32 h_subsample = fb->format->hsub;
	u32 v_subsample = fb->format->vsub;
332 333 334 335 336 337 338 339 340
	int i, ret;

	crtc_state = drm_atomic_get_existing_crtc_state(state->state,
							state->crtc);
	if (!crtc_state) {
		DRM_DEBUG_KMS("Invalid crtc state\n");
		return -EINVAL;
	}

341 342
	ret = drm_atomic_helper_check_plane_state(state, crtc_state, 1,
						  INT_MAX, true, true);
343 344 345
	if (ret)
		return ret;

346 347
	for (i = 0; i < num_planes; i++)
		vc4_state->offsets[i] = bo->paddr + fb->offsets[i];
348

349
	/* We don't support subpixel source positioning for scaling. */
350 351 352 353
	if ((state->src.x1 & subpixel_src_mask) ||
	    (state->src.x2 & subpixel_src_mask) ||
	    (state->src.y1 & subpixel_src_mask) ||
	    (state->src.y2 & subpixel_src_mask)) {
354 355 356
		return -EINVAL;
	}

357 358 359 360
	vc4_state->src_x = state->src.x1 >> 16;
	vc4_state->src_y = state->src.y1 >> 16;
	vc4_state->src_w[0] = (state->src.x2 - state->src.x1) >> 16;
	vc4_state->src_h[0] = (state->src.y2 - state->src.y1) >> 16;
361

362 363 364 365
	vc4_state->crtc_x = state->dst.x1;
	vc4_state->crtc_y = state->dst.y1;
	vc4_state->crtc_w = state->dst.x2 - state->dst.x1;
	vc4_state->crtc_h = state->dst.y2 - state->dst.y1;
366

367 368 369 370
	ret = vc4_plane_margins_adj(state);
	if (ret)
		return ret;

371 372 373 374 375
	vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0],
						       vc4_state->crtc_w);
	vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0],
						       vc4_state->crtc_h);

376 377 378
	vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE &&
			       vc4_state->y_scaling[0] == VC4_SCALING_NONE);

379 380 381 382 383 384 385 386 387 388 389 390 391
	if (num_planes > 1) {
		vc4_state->is_yuv = true;

		vc4_state->src_w[1] = vc4_state->src_w[0] / h_subsample;
		vc4_state->src_h[1] = vc4_state->src_h[0] / v_subsample;

		vc4_state->x_scaling[1] =
			vc4_get_scaling_mode(vc4_state->src_w[1],
					     vc4_state->crtc_w);
		vc4_state->y_scaling[1] =
			vc4_get_scaling_mode(vc4_state->src_h[1],
					     vc4_state->crtc_h);

392 393 394 395 396
		/* YUV conversion requires that horizontal scaling be enabled
		 * on the UV plane even if vc4_get_scaling_mode() returned
		 * VC4_SCALING_NONE (which can happen when the down-scaling
		 * ratio is 0.5). Let's force it to VC4_SCALING_PPF in this
		 * case.
397
		 */
398 399
		if (vc4_state->x_scaling[1] == VC4_SCALING_NONE)
			vc4_state->x_scaling[1] = VC4_SCALING_PPF;
400
	} else {
401
		vc4_state->is_yuv = false;
402 403
		vc4_state->x_scaling[1] = VC4_SCALING_NONE;
		vc4_state->y_scaling[1] = VC4_SCALING_NONE;
404 405
	}

406 407 408
	return 0;
}

409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439
static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
{
	u32 scale, recip;

	scale = (1 << 16) * src / dst;

	/* The specs note that while the reciprocal would be defined
	 * as (1<<32)/scale, ~0 is close enough.
	 */
	recip = ~0 / scale;

	vc4_dlist_write(vc4_state,
			VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) |
			VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE));
	vc4_dlist_write(vc4_state,
			VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP));
}

static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
{
	u32 scale = (1 << 16) * src / dst;

	vc4_dlist_write(vc4_state,
			SCALER_PPF_AGC |
			VC4_SET_FIELD(scale, SCALER_PPF_SCALE) |
			VC4_SET_FIELD(0, SCALER_PPF_IPHASE));
}

static u32 vc4_lbm_size(struct drm_plane_state *state)
{
	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
440
	struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
441
	u32 pix_per_line;
442 443
	u32 lbm;

444 445 446 447 448
	/* LBM is not needed when there's no vertical scaling. */
	if (vc4_state->y_scaling[0] == VC4_SCALING_NONE &&
	    vc4_state->y_scaling[1] == VC4_SCALING_NONE)
		return 0;

449 450 451 452 453 454 455 456 457 458 459 460
	/*
	 * This can be further optimized in the RGB/YUV444 case if the PPF
	 * decimation factor is between 0.5 and 1.0 by using crtc_w.
	 *
	 * It's not an issue though, since in that case since src_w[0] is going
	 * to be greater than or equal to crtc_w.
	 */
	if (vc4_state->x_scaling[0] == VC4_SCALING_TPZ)
		pix_per_line = vc4_state->crtc_w;
	else
		pix_per_line = vc4_state->src_w[0];

461
	if (!vc4_state->is_yuv) {
462
		if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ)
463 464 465 466 467 468 469 470 471 472
			lbm = pix_per_line * 8;
		else {
			/* In special cases, this multiplier might be 12. */
			lbm = pix_per_line * 16;
		}
	} else {
		/* There are cases for this going down to a multiplier
		 * of 2, but according to the firmware source, the
		 * table in the docs is somewhat wrong.
		 */
473 474 475
		lbm = pix_per_line * 16;
	}

476 477 478 479 480
	/* Align it to 64 or 128 (hvs5) bytes */
	lbm = roundup(lbm, vc4->hvs->hvs5 ? 128 : 64);

	/* Each "word" of the LBM memory contains 2 or 4 (hvs5) pixels */
	lbm /= vc4->hvs->hvs5 ? 4 : 2;
481 482 483 484

	return lbm;
}

485 486
static void vc4_write_scaling_parameters(struct drm_plane_state *state,
					 int channel)
487 488 489 490
{
	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);

	/* Ch0 H-PPF Word 0: Scaling Parameters */
491
	if (vc4_state->x_scaling[channel] == VC4_SCALING_PPF) {
492
		vc4_write_ppf(vc4_state,
493
			      vc4_state->src_w[channel], vc4_state->crtc_w);
494 495 496
	}

	/* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */
497
	if (vc4_state->y_scaling[channel] == VC4_SCALING_PPF) {
498
		vc4_write_ppf(vc4_state,
499
			      vc4_state->src_h[channel], vc4_state->crtc_h);
500 501 502 503
		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
	}

	/* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */
504
	if (vc4_state->x_scaling[channel] == VC4_SCALING_TPZ) {
505
		vc4_write_tpz(vc4_state,
506
			      vc4_state->src_w[channel], vc4_state->crtc_w);
507 508 509
	}

	/* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */
510
	if (vc4_state->y_scaling[channel] == VC4_SCALING_TPZ) {
511
		vc4_write_tpz(vc4_state,
512
			      vc4_state->src_h[channel], vc4_state->crtc_h);
513 514 515
		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
	}
}
516

517 518 519 520 521 522 523
static void vc4_plane_calc_load(struct drm_plane_state *state)
{
	unsigned int hvs_load_shift, vrefresh, i;
	struct drm_framebuffer *fb = state->fb;
	struct vc4_plane_state *vc4_state;
	struct drm_crtc_state *crtc_state;
	unsigned int vscale_factor;
M
Maxime Ripard 已提交
524 525 526 527 528
	struct vc4_dev *vc4;

	vc4 = to_vc4_dev(state->plane->dev);
	if (!vc4->load_tracker_available)
		return;
529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576

	vc4_state = to_vc4_plane_state(state);
	crtc_state = drm_atomic_get_existing_crtc_state(state->state,
							state->crtc);
	vrefresh = drm_mode_vrefresh(&crtc_state->adjusted_mode);

	/* The HVS is able to process 2 pixels/cycle when scaling the source,
	 * 4 pixels/cycle otherwise.
	 * Alpha blending step seems to be pipelined and it's always operating
	 * at 4 pixels/cycle, so the limiting aspect here seems to be the
	 * scaler block.
	 * HVS load is expressed in clk-cycles/sec (AKA Hz).
	 */
	if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
	    vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
	    vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
	    vc4_state->y_scaling[1] != VC4_SCALING_NONE)
		hvs_load_shift = 1;
	else
		hvs_load_shift = 2;

	vc4_state->membus_load = 0;
	vc4_state->hvs_load = 0;
	for (i = 0; i < fb->format->num_planes; i++) {
		/* Even if the bandwidth/plane required for a single frame is
		 *
		 * vc4_state->src_w[i] * vc4_state->src_h[i] * cpp * vrefresh
		 *
		 * when downscaling, we have to read more pixels per line in
		 * the time frame reserved for a single line, so the bandwidth
		 * demand can be punctually higher. To account for that, we
		 * calculate the down-scaling factor and multiply the plane
		 * load by this number. We're likely over-estimating the read
		 * demand, but that's better than under-estimating it.
		 */
		vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i],
					     vc4_state->crtc_h);
		vc4_state->membus_load += vc4_state->src_w[i] *
					  vc4_state->src_h[i] * vscale_factor *
					  fb->format->cpp[i];
		vc4_state->hvs_load += vc4_state->crtc_h * vc4_state->crtc_w;
	}

	vc4_state->hvs_load *= vrefresh;
	vc4_state->hvs_load >>= hvs_load_shift;
	vc4_state->membus_load *= vrefresh;
}

577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593
static int vc4_plane_allocate_lbm(struct drm_plane_state *state)
{
	struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
	unsigned long irqflags;
	u32 lbm_size;

	lbm_size = vc4_lbm_size(state);
	if (!lbm_size)
		return 0;

	if (WARN_ON(!vc4_state->lbm_offset))
		return -EINVAL;

	/* Allocate the LBM memory that the HVS will use for temporary
	 * storage due to our scaling/format conversion.
	 */
594
	if (!drm_mm_node_allocated(&vc4_state->lbm)) {
595 596 597 598 599
		int ret;

		spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
		ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm,
						 &vc4_state->lbm,
600 601 602
						 lbm_size,
						 vc4->hvs->hvs5 ? 64 : 32,
						 0, 0);
603 604 605 606 607 608 609 610 611 612 613 614 615
		spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);

		if (ret)
			return ret;
	} else {
		WARN_ON_ONCE(lbm_size != vc4_state->lbm.size);
	}

	vc4_state->dlist[vc4_state->lbm_offset] = vc4_state->lbm.start;

	return 0;
}

616 617 618 619 620 621
/* Writes out a full display list for an active plane to the plane's
 * private dlist state.
 */
static int vc4_plane_mode_set(struct drm_plane *plane,
			      struct drm_plane_state *state)
{
622
	struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
623 624 625
	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
	struct drm_framebuffer *fb = state->fb;
	u32 ctl0_offset = vc4_state->dlist_count;
V
Ville Syrjälä 已提交
626
	const struct hvs_format *format = vc4_get_hvs_format(fb->format->format);
627
	u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier);
628
	int num_planes = fb->format->num_planes;
629 630
	u32 h_subsample = fb->format->hsub;
	u32 v_subsample = fb->format->vsub;
631
	bool mix_plane_alpha;
632
	bool covers_screen;
633
	u32 scl0, scl1, pitch0;
634
	u32 tiling, src_y;
635
	u32 hvs_format = format->hvs;
636
	unsigned int rotation;
637
	int ret, i;
638

639 640 641
	if (vc4_state->dlist_initialized)
		return 0;

642
	ret = vc4_plane_setup_clipping_and_scaling(state);
643 644 645
	if (ret)
		return ret;

646 647 648 649 650 651 652
	/* SCL1 is used for Cb/Cr scaling of planar formats.  For RGB
	 * and 4:4:4, scl1 should be set to scl0 so both channels of
	 * the scaler do the same thing.  For YUV, the Y plane needs
	 * to be put in channel 1 and Cb/Cr in channel 0, so we swap
	 * the scl fields here.
	 */
	if (num_planes == 1) {
653
		scl0 = vc4_get_scl_field(state, 0);
654 655 656 657 658
		scl1 = scl0;
	} else {
		scl0 = vc4_get_scl_field(state, 1);
		scl1 = vc4_get_scl_field(state, 0);
	}
659

660 661 662 663 664 665 666 667 668 669
	rotation = drm_rotation_simplify(state->rotation,
					 DRM_MODE_ROTATE_0 |
					 DRM_MODE_REFLECT_X |
					 DRM_MODE_REFLECT_Y);

	/* We must point to the last line when Y reflection is enabled. */
	src_y = vc4_state->src_y;
	if (rotation & DRM_MODE_REFLECT_Y)
		src_y += vc4_state->src_h[0] - 1;

670
	switch (base_format_mod) {
671 672 673
	case DRM_FORMAT_MOD_LINEAR:
		tiling = SCALER_CTL0_TILING_LINEAR;
		pitch0 = VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH);
674 675 676 677 678

		/* Adjust the base pointer to the first pixel to be scanned
		 * out.
		 */
		for (i = 0; i < num_planes; i++) {
679
			vc4_state->offsets[i] += src_y /
680 681
						 (i ? v_subsample : 1) *
						 fb->pitches[i];
682

683 684 685 686
			vc4_state->offsets[i] += vc4_state->src_x /
						 (i ? h_subsample : 1) *
						 fb->format->cpp[i];
		}
687

688
		break;
689 690 691

	case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: {
		u32 tile_size_shift = 12; /* T tiles are 4kb */
692 693
		/* Whole-tile offsets, mostly for setting the pitch. */
		u32 tile_w_shift = fb->format->cpp[0] == 2 ? 6 : 5;
694
		u32 tile_h_shift = 5; /* 16 and 32bpp are 32 pixels high */
695 696 697 698 699 700 701 702 703 704
		u32 tile_w_mask = (1 << tile_w_shift) - 1;
		/* The height mask on 32-bit-per-pixel tiles is 63, i.e. twice
		 * the height (in pixels) of a 4k tile.
		 */
		u32 tile_h_mask = (2 << tile_h_shift) - 1;
		/* For T-tiled, the FB pitch is "how many bytes from one row to
		 * the next, such that
		 *
		 *	pitch * tile_h == tile_size * tiles_per_row
		 */
705
		u32 tiles_w = fb->pitches[0] >> (tile_size_shift - tile_h_shift);
706 707
		u32 tiles_l = vc4_state->src_x >> tile_w_shift;
		u32 tiles_r = tiles_w - tiles_l;
708
		u32 tiles_t = src_y >> tile_h_shift;
709 710 711 712
		/* Intra-tile offsets, which modify the base address (the
		 * SCALER_PITCH0_TILE_Y_OFFSET tells HVS how to walk from that
		 * base address).
		 */
713 714 715
		u32 tile_y = (src_y >> 4) & 1;
		u32 subtile_y = (src_y >> 2) & 3;
		u32 utile_y = src_y & 3;
716
		u32 x_off = vc4_state->src_x & tile_w_mask;
717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733
		u32 y_off = src_y & tile_h_mask;

		/* When Y reflection is requested we must set the
		 * SCALER_PITCH0_TILE_LINE_DIR flag to tell HVS that all lines
		 * after the initial one should be fetched in descending order,
		 * which makes sense since we start from the last line and go
		 * backward.
		 * Don't know why we need y_off = max_y_off - y_off, but it's
		 * definitely required (I guess it's also related to the "going
		 * backward" situation).
		 */
		if (rotation & DRM_MODE_REFLECT_Y) {
			y_off = tile_h_mask - y_off;
			pitch0 = SCALER_PITCH0_TILE_LINE_DIR;
		} else {
			pitch0 = 0;
		}
734

735
		tiling = SCALER_CTL0_TILING_256B_OR_T;
736 737 738 739
		pitch0 |= (VC4_SET_FIELD(x_off, SCALER_PITCH0_SINK_PIX) |
			   VC4_SET_FIELD(y_off, SCALER_PITCH0_TILE_Y_OFFSET) |
			   VC4_SET_FIELD(tiles_l, SCALER_PITCH0_TILE_WIDTH_L) |
			   VC4_SET_FIELD(tiles_r, SCALER_PITCH0_TILE_WIDTH_R));
740 741 742 743 744 745 746 747 748 749 750 751 752 753
		vc4_state->offsets[0] += tiles_t * (tiles_w << tile_size_shift);
		vc4_state->offsets[0] += subtile_y << 8;
		vc4_state->offsets[0] += utile_y << 4;

		/* Rows of tiles alternate left-to-right and right-to-left. */
		if (tiles_t & 1) {
			pitch0 |= SCALER_PITCH0_TILE_INITIAL_LINE_DIR;
			vc4_state->offsets[0] += (tiles_w - tiles_l) <<
						 tile_size_shift;
			vc4_state->offsets[0] -= (1 + !tile_y) << 10;
		} else {
			vc4_state->offsets[0] += tiles_l << tile_size_shift;
			vc4_state->offsets[0] += tile_y << 10;
		}
754 755

		break;
756 757
	}

758 759 760 761
	case DRM_FORMAT_MOD_BROADCOM_SAND64:
	case DRM_FORMAT_MOD_BROADCOM_SAND128:
	case DRM_FORMAT_MOD_BROADCOM_SAND256: {
		uint32_t param = fourcc_mod_broadcom_param(fb->modifier);
762
		u32 tile_w, tile, x_off, pix_per_tile;
763

764
		hvs_format = HVS_PIXEL_FORMAT_H264;
765 766 767 768

		switch (base_format_mod) {
		case DRM_FORMAT_MOD_BROADCOM_SAND64:
			tiling = SCALER_CTL0_TILING_64B;
769
			tile_w = 64;
770 771 772
			break;
		case DRM_FORMAT_MOD_BROADCOM_SAND128:
			tiling = SCALER_CTL0_TILING_128B;
773
			tile_w = 128;
774 775 776
			break;
		case DRM_FORMAT_MOD_BROADCOM_SAND256:
			tiling = SCALER_CTL0_TILING_256B_OR_T;
777
			tile_w = 256;
778 779 780 781 782 783 784 785 786 787
			break;
		default:
			break;
		}

		if (param > SCALER_TILE_HEIGHT_MASK) {
			DRM_DEBUG_KMS("SAND height too large (%d)\n", param);
			return -EINVAL;
		}

788 789 790 791 792 793 794 795 796
		pix_per_tile = tile_w / fb->format->cpp[0];
		tile = vc4_state->src_x / pix_per_tile;
		x_off = vc4_state->src_x % pix_per_tile;

		/* Adjust the base pointer to the first pixel to be scanned
		 * out.
		 */
		for (i = 0; i < num_planes; i++) {
			vc4_state->offsets[i] += param * tile_w * tile;
797
			vc4_state->offsets[i] += src_y /
798 799 800 801 802 803 804
						 (i ? v_subsample : 1) *
						 tile_w;
			vc4_state->offsets[i] += x_off /
						 (i ? h_subsample : 1) *
						 fb->format->cpp[i];
		}

805 806 807 808
		pitch0 = VC4_SET_FIELD(param, SCALER_TILE_HEIGHT);
		break;
	}

809 810 811 812 813 814
	default:
		DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx",
			      (long long)fb->modifier);
		return -EINVAL;
	}

815 816 817 818 819 820 821
	/* Don't waste cycles mixing with plane alpha if the set alpha
	 * is opaque or there is no per-pixel alpha information.
	 * In any case we use the alpha property value as the fixed alpha.
	 */
	mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE &&
			  fb->format->has_alpha;

822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871
	if (!vc4->hvs->hvs5) {
	/* Control word */
		vc4_dlist_write(vc4_state,
				SCALER_CTL0_VALID |
				(rotation & DRM_MODE_REFLECT_X ? SCALER_CTL0_HFLIP : 0) |
				(rotation & DRM_MODE_REFLECT_Y ? SCALER_CTL0_VFLIP : 0) |
				VC4_SET_FIELD(SCALER_CTL0_RGBA_EXPAND_ROUND, SCALER_CTL0_RGBA_EXPAND) |
				(format->pixel_order << SCALER_CTL0_ORDER_SHIFT) |
				(hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
				VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) |
				(vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) |
				VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
				VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1));

		/* Position Word 0: Image Positions and Alpha Value */
		vc4_state->pos0_offset = vc4_state->dlist_count;
		vc4_dlist_write(vc4_state,
				VC4_SET_FIELD(state->alpha >> 8, SCALER_POS0_FIXED_ALPHA) |
				VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) |
				VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y));

		/* Position Word 1: Scaled Image Dimensions. */
		if (!vc4_state->is_unity) {
			vc4_dlist_write(vc4_state,
					VC4_SET_FIELD(vc4_state->crtc_w,
						      SCALER_POS1_SCL_WIDTH) |
					VC4_SET_FIELD(vc4_state->crtc_h,
						      SCALER_POS1_SCL_HEIGHT));
		}

		/* Position Word 2: Source Image Size, Alpha */
		vc4_state->pos2_offset = vc4_state->dlist_count;
		vc4_dlist_write(vc4_state,
				VC4_SET_FIELD(fb->format->has_alpha ?
					      SCALER_POS2_ALPHA_MODE_PIPELINE :
					      SCALER_POS2_ALPHA_MODE_FIXED,
					      SCALER_POS2_ALPHA_MODE) |
				(mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) |
				(fb->format->has_alpha ?
						SCALER_POS2_ALPHA_PREMULT : 0) |
				VC4_SET_FIELD(vc4_state->src_w[0],
					      SCALER_POS2_WIDTH) |
				VC4_SET_FIELD(vc4_state->src_h[0],
					      SCALER_POS2_HEIGHT));

		/* Position Word 3: Context.  Written by the HVS. */
		vc4_dlist_write(vc4_state, 0xc0c0c0c0);

	} else {
		u32 hvs_pixel_order = format->pixel_order;
872

873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905
		if (format->pixel_order_hvs5)
			hvs_pixel_order = format->pixel_order_hvs5;

		/* Control word */
		vc4_dlist_write(vc4_state,
				SCALER_CTL0_VALID |
				(hvs_pixel_order << SCALER_CTL0_ORDER_SHIFT) |
				(hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
				VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) |
				(vc4_state->is_unity ?
						SCALER5_CTL0_UNITY : 0) |
				VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
				VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1) |
				SCALER5_CTL0_ALPHA_EXPAND |
				SCALER5_CTL0_RGB_EXPAND);

		/* Position Word 0: Image Positions and Alpha Value */
		vc4_state->pos0_offset = vc4_state->dlist_count;
		vc4_dlist_write(vc4_state,
				(rotation & DRM_MODE_REFLECT_Y ?
						SCALER5_POS0_VFLIP : 0) |
				VC4_SET_FIELD(vc4_state->crtc_x,
					      SCALER_POS0_START_X) |
				(rotation & DRM_MODE_REFLECT_X ?
					      SCALER5_POS0_HFLIP : 0) |
				VC4_SET_FIELD(vc4_state->crtc_y,
					      SCALER5_POS0_START_Y)
			       );

		/* Control Word 2 */
		vc4_dlist_write(vc4_state,
				VC4_SET_FIELD(state->alpha >> 4,
					      SCALER5_CTL2_ALPHA) |
906 907
				(fb->format->has_alpha ?
					SCALER5_CTL2_ALPHA_PREMULT : 0) |
908 909 910 911 912 913 914 915 916 917 918 919
				(mix_plane_alpha ?
					SCALER5_CTL2_ALPHA_MIX : 0) |
				VC4_SET_FIELD(fb->format->has_alpha ?
				      SCALER5_CTL2_ALPHA_MODE_PIPELINE :
				      SCALER5_CTL2_ALPHA_MODE_FIXED,
				      SCALER5_CTL2_ALPHA_MODE)
			       );

		/* Position Word 1: Scaled Image Dimensions. */
		if (!vc4_state->is_unity) {
			vc4_dlist_write(vc4_state,
					VC4_SET_FIELD(vc4_state->crtc_w,
D
Dom Cobley 已提交
920
						      SCALER5_POS1_SCL_WIDTH) |
921
					VC4_SET_FIELD(vc4_state->crtc_h,
D
Dom Cobley 已提交
922
						      SCALER5_POS1_SCL_HEIGHT));
923 924 925 926 927 928 929 930 931 932 933 934 935
		}

		/* Position Word 2: Source Image Size */
		vc4_state->pos2_offset = vc4_state->dlist_count;
		vc4_dlist_write(vc4_state,
				VC4_SET_FIELD(vc4_state->src_w[0],
					      SCALER5_POS2_WIDTH) |
				VC4_SET_FIELD(vc4_state->src_h[0],
					      SCALER5_POS2_HEIGHT));

		/* Position Word 3: Context.  Written by the HVS. */
		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
	}
936

937 938 939 940 941

	/* Pointer Word 0/1/2: RGB / Y / Cb / Cr Pointers
	 *
	 * The pointers may be any byte address.
	 */
942
	vc4_state->ptr0_offset = vc4_state->dlist_count;
943 944
	for (i = 0; i < num_planes; i++)
		vc4_dlist_write(vc4_state, vc4_state->offsets[i]);
945

946 947 948
	/* Pointer Context Word 0/1/2: Written by the HVS */
	for (i = 0; i < num_planes; i++)
		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
949

950 951 952 953 954
	/* Pitch word 0 */
	vc4_dlist_write(vc4_state, pitch0);

	/* Pitch word 1/2 */
	for (i = 1; i < num_planes; i++) {
955 956 957 958 959 960 961
		if (hvs_format != HVS_PIXEL_FORMAT_H264) {
			vc4_dlist_write(vc4_state,
					VC4_SET_FIELD(fb->pitches[i],
						      SCALER_SRC_PITCH));
		} else {
			vc4_dlist_write(vc4_state, pitch0);
		}
962 963 964 965 966 967 968 969
	}

	/* Colorspace conversion words */
	if (vc4_state->is_yuv) {
		vc4_dlist_write(vc4_state, SCALER_CSC0_ITR_R_601_5);
		vc4_dlist_write(vc4_state, SCALER_CSC1_ITR_R_601_5);
		vc4_dlist_write(vc4_state, SCALER_CSC2_ITR_R_601_5);
	}
970

971 972
	vc4_state->lbm_offset = 0;

973 974 975 976
	if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
	    vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
	    vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
	    vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
977 978 979
		/* Reserve a slot for the LBM Base Address. The real value will
		 * be set when calling vc4_plane_allocate_lbm().
		 */
980
		if (vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
981 982
		    vc4_state->y_scaling[1] != VC4_SCALING_NONE)
			vc4_state->lbm_offset = vc4_state->dlist_count++;
983

984 985 986 987 988 989 990 991
		if (num_planes > 1) {
			/* Emit Cb/Cr as channel 0 and Y as channel
			 * 1. This matches how we set up scl0/scl1
			 * above.
			 */
			vc4_write_scaling_parameters(state, 1);
		}
		vc4_write_scaling_parameters(state, 0);
992 993 994 995

		/* If any PPF setup was done, then all the kernel
		 * pointers get uploaded.
		 */
996 997 998 999
		if (vc4_state->x_scaling[0] == VC4_SCALING_PPF ||
		    vc4_state->y_scaling[0] == VC4_SCALING_PPF ||
		    vc4_state->x_scaling[1] == VC4_SCALING_PPF ||
		    vc4_state->y_scaling[1] == VC4_SCALING_PPF) {
1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013
			u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start,
						   SCALER_PPF_KERNEL_OFFSET);

			/* HPPF plane 0 */
			vc4_dlist_write(vc4_state, kernel);
			/* VPPF plane 0 */
			vc4_dlist_write(vc4_state, kernel);
			/* HPPF plane 1 */
			vc4_dlist_write(vc4_state, kernel);
			/* VPPF plane 1 */
			vc4_dlist_write(vc4_state, kernel);
		}
	}

1014 1015 1016
	vc4_state->dlist[ctl0_offset] |=
		VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE);

1017 1018 1019 1020 1021
	/* crtc_* are already clipped coordinates. */
	covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 &&
			vc4_state->crtc_w == state->crtc->mode.hdisplay &&
			vc4_state->crtc_h == state->crtc->mode.vdisplay;
	/* Background fill might be necessary when the plane has per-pixel
1022 1023
	 * alpha content or a non-opaque plane alpha and could blend from the
	 * background or does not cover the entire screen.
1024
	 */
1025 1026
	vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen ||
				   state->alpha != DRM_BLEND_ALPHA_OPAQUE;
1027

1028 1029 1030 1031 1032 1033 1034
	/* Flag the dlist as initialized to avoid checking it twice in case
	 * the async update check already called vc4_plane_mode_set() and
	 * decided to fallback to sync update because async update was not
	 * possible.
	 */
	vc4_state->dlist_initialized = 1;

1035 1036
	vc4_plane_calc_load(state);

1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050
	return 0;
}

/* If a modeset involves changing the setup of a plane, the atomic
 * infrastructure will call this to validate a proposed plane setup.
 * However, if a plane isn't getting updated, this (and the
 * corresponding vc4_plane_atomic_update) won't get called.  Thus, we
 * compute the dlist here and have all active plane dlists get updated
 * in the CRTC's flush.
 */
static int vc4_plane_atomic_check(struct drm_plane *plane,
				  struct drm_plane_state *state)
{
	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
1051
	int ret;
1052 1053 1054

	vc4_state->dlist_count = 0;

1055
	if (!plane_enabled(state))
1056
		return 0;
1057 1058 1059 1060 1061 1062

	ret = vc4_plane_mode_set(plane, state);
	if (ret)
		return ret;

	return vc4_plane_allocate_lbm(state);
1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079
}

static void vc4_plane_atomic_update(struct drm_plane *plane,
				    struct drm_plane_state *old_state)
{
	/* No contents here.  Since we don't know where in the CRTC's
	 * dlist we should be stored, our dlist is uploaded to the
	 * hardware with vc4_plane_write_dlist() at CRTC atomic_flush
	 * time.
	 */
}

u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist)
{
	struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
	int i;

1080 1081
	vc4_state->hw_dlist = dlist;

1082 1083 1084 1085 1086 1087 1088
	/* Can't memcpy_toio() because it needs to be 32-bit writes. */
	for (i = 0; i < vc4_state->dlist_count; i++)
		writel(vc4_state->dlist[i], &dlist[i]);

	return vc4_state->dlist_count;
}

1089
u32 vc4_plane_dlist_size(const struct drm_plane_state *state)
1090
{
1091 1092
	const struct vc4_plane_state *vc4_state =
		container_of(state, typeof(*vc4_state), base);
1093 1094 1095 1096

	return vc4_state->dlist_count;
}

1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115
/* Updates the plane to immediately (well, once the FIFO needs
 * refilling) scan out from at a new framebuffer.
 */
void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb)
{
	struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
	struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
	uint32_t addr;

	/* We're skipping the address adjustment for negative origin,
	 * because this is only called on the primary plane.
	 */
	WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0);
	addr = bo->paddr + fb->offsets[0];

	/* Write the new address into the hardware immediately.  The
	 * scanout will start from this address as soon as the FIFO
	 * needs to refill with pixels.
	 */
1116
	writel(addr, &vc4_state->hw_dlist[vc4_state->ptr0_offset]);
1117 1118 1119 1120 1121

	/* Also update the CPU-side dlist copy, so that any later
	 * atomic updates that don't do a new modeset on our plane
	 * also use our updated address.
	 */
1122
	vc4_state->dlist[vc4_state->ptr0_offset] = addr;
1123 1124
}

1125 1126 1127
static void vc4_plane_atomic_async_update(struct drm_plane *plane,
					  struct drm_plane_state *state)
{
1128
	struct vc4_plane_state *vc4_state, *new_vc4_state;
1129

1130
	swap(plane->state->fb, state->fb);
1131 1132
	plane->state->crtc_x = state->crtc_x;
	plane->state->crtc_y = state->crtc_y;
1133 1134
	plane->state->crtc_w = state->crtc_w;
	plane->state->crtc_h = state->crtc_h;
1135 1136
	plane->state->src_x = state->src_x;
	plane->state->src_y = state->src_y;
1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149
	plane->state->src_w = state->src_w;
	plane->state->src_h = state->src_h;
	plane->state->src_h = state->src_h;
	plane->state->alpha = state->alpha;
	plane->state->pixel_blend_mode = state->pixel_blend_mode;
	plane->state->rotation = state->rotation;
	plane->state->zpos = state->zpos;
	plane->state->normalized_zpos = state->normalized_zpos;
	plane->state->color_encoding = state->color_encoding;
	plane->state->color_range = state->color_range;
	plane->state->src = state->src;
	plane->state->dst = state->dst;
	plane->state->visible = state->visible;
1150 1151 1152 1153

	new_vc4_state = to_vc4_plane_state(state);
	vc4_state = to_vc4_plane_state(plane->state);

1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173
	vc4_state->crtc_x = new_vc4_state->crtc_x;
	vc4_state->crtc_y = new_vc4_state->crtc_y;
	vc4_state->crtc_h = new_vc4_state->crtc_h;
	vc4_state->crtc_w = new_vc4_state->crtc_w;
	vc4_state->src_x = new_vc4_state->src_x;
	vc4_state->src_y = new_vc4_state->src_y;
	memcpy(vc4_state->src_w, new_vc4_state->src_w,
	       sizeof(vc4_state->src_w));
	memcpy(vc4_state->src_h, new_vc4_state->src_h,
	       sizeof(vc4_state->src_h));
	memcpy(vc4_state->x_scaling, new_vc4_state->x_scaling,
	       sizeof(vc4_state->x_scaling));
	memcpy(vc4_state->y_scaling, new_vc4_state->y_scaling,
	       sizeof(vc4_state->y_scaling));
	vc4_state->is_unity = new_vc4_state->is_unity;
	vc4_state->is_yuv = new_vc4_state->is_yuv;
	memcpy(vc4_state->offsets, new_vc4_state->offsets,
	       sizeof(vc4_state->offsets));
	vc4_state->needs_bg_fill = new_vc4_state->needs_bg_fill;

1174 1175 1176 1177 1178 1179 1180
	/* Update the current vc4_state pos0, pos2 and ptr0 dlist entries. */
	vc4_state->dlist[vc4_state->pos0_offset] =
		new_vc4_state->dlist[vc4_state->pos0_offset];
	vc4_state->dlist[vc4_state->pos2_offset] =
		new_vc4_state->dlist[vc4_state->pos2_offset];
	vc4_state->dlist[vc4_state->ptr0_offset] =
		new_vc4_state->dlist[vc4_state->ptr0_offset];
1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196

	/* Note that we can't just call vc4_plane_write_dlist()
	 * because that would smash the context data that the HVS is
	 * currently using.
	 */
	writel(vc4_state->dlist[vc4_state->pos0_offset],
	       &vc4_state->hw_dlist[vc4_state->pos0_offset]);
	writel(vc4_state->dlist[vc4_state->pos2_offset],
	       &vc4_state->hw_dlist[vc4_state->pos2_offset]);
	writel(vc4_state->dlist[vc4_state->ptr0_offset],
	       &vc4_state->hw_dlist[vc4_state->ptr0_offset]);
}

static int vc4_plane_atomic_async_check(struct drm_plane *plane,
					struct drm_plane_state *state)
{
1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211
	struct vc4_plane_state *old_vc4_state, *new_vc4_state;
	int ret;
	u32 i;

	ret = vc4_plane_mode_set(plane, state);
	if (ret)
		return ret;

	old_vc4_state = to_vc4_plane_state(plane->state);
	new_vc4_state = to_vc4_plane_state(state);
	if (old_vc4_state->dlist_count != new_vc4_state->dlist_count ||
	    old_vc4_state->pos0_offset != new_vc4_state->pos0_offset ||
	    old_vc4_state->pos2_offset != new_vc4_state->pos2_offset ||
	    old_vc4_state->ptr0_offset != new_vc4_state->ptr0_offset ||
	    vc4_lbm_size(plane->state) != vc4_lbm_size(state))
1212 1213
		return -EINVAL;

1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228
	/* Only pos0, pos2 and ptr0 DWORDS can be updated in an async update
	 * if anything else has changed, fallback to a sync update.
	 */
	for (i = 0; i < new_vc4_state->dlist_count; i++) {
		if (i == new_vc4_state->pos0_offset ||
		    i == new_vc4_state->pos2_offset ||
		    i == new_vc4_state->ptr0_offset ||
		    (new_vc4_state->lbm_offset &&
		     i == new_vc4_state->lbm_offset))
			continue;

		if (new_vc4_state->dlist[i] != old_vc4_state->dlist[i])
			return -EINVAL;
	}

1229 1230 1231
	return 0;
}

1232 1233 1234 1235
static int vc4_prepare_fb(struct drm_plane *plane,
			  struct drm_plane_state *state)
{
	struct vc4_bo *bo;
1236
	int ret;
1237

1238
	if (!state->fb)
1239 1240 1241
		return 0;

	bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base);
1242

1243
	drm_gem_fb_prepare_fb(plane, state);
1244 1245 1246 1247

	if (plane->state->fb == state->fb)
		return 0;

1248 1249 1250 1251
	ret = vc4_bo_inc_usecnt(bo);
	if (ret)
		return ret;

1252 1253 1254
	return 0;
}

1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266
static void vc4_cleanup_fb(struct drm_plane *plane,
			   struct drm_plane_state *state)
{
	struct vc4_bo *bo;

	if (plane->state->fb == state->fb || !state->fb)
		return;

	bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base);
	vc4_bo_dec_usecnt(bo);
}

1267 1268 1269
static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = {
	.atomic_check = vc4_plane_atomic_check,
	.atomic_update = vc4_plane_atomic_update,
1270
	.prepare_fb = vc4_prepare_fb,
1271
	.cleanup_fb = vc4_cleanup_fb,
1272 1273
	.atomic_async_check = vc4_plane_atomic_async_check,
	.atomic_async_update = vc4_plane_atomic_async_update,
1274 1275 1276 1277 1278 1279 1280
};

static void vc4_plane_destroy(struct drm_plane *plane)
{
	drm_plane_cleanup(plane);
}

1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294
static bool vc4_format_mod_supported(struct drm_plane *plane,
				     uint32_t format,
				     uint64_t modifier)
{
	/* Support T_TILING for RGB formats only. */
	switch (format) {
	case DRM_FORMAT_XRGB8888:
	case DRM_FORMAT_ARGB8888:
	case DRM_FORMAT_ABGR8888:
	case DRM_FORMAT_XBGR8888:
	case DRM_FORMAT_RGB565:
	case DRM_FORMAT_BGR565:
	case DRM_FORMAT_ARGB1555:
	case DRM_FORMAT_XRGB1555:
1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312
		switch (fourcc_mod_broadcom_mod(modifier)) {
		case DRM_FORMAT_MOD_LINEAR:
		case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED:
			return true;
		default:
			return false;
		}
	case DRM_FORMAT_NV12:
	case DRM_FORMAT_NV21:
		switch (fourcc_mod_broadcom_mod(modifier)) {
		case DRM_FORMAT_MOD_LINEAR:
		case DRM_FORMAT_MOD_BROADCOM_SAND64:
		case DRM_FORMAT_MOD_BROADCOM_SAND128:
		case DRM_FORMAT_MOD_BROADCOM_SAND256:
			return true;
		default:
			return false;
		}
1313 1314 1315 1316
	case DRM_FORMAT_RGBX1010102:
	case DRM_FORMAT_BGRX1010102:
	case DRM_FORMAT_RGBA1010102:
	case DRM_FORMAT_BGRA1010102:
1317 1318 1319 1320 1321
	case DRM_FORMAT_YUV422:
	case DRM_FORMAT_YVU422:
	case DRM_FORMAT_YUV420:
	case DRM_FORMAT_YVU420:
	case DRM_FORMAT_NV16:
1322
	case DRM_FORMAT_NV61:
1323 1324 1325 1326 1327
	default:
		return (modifier == DRM_FORMAT_MOD_LINEAR);
	}
}

1328
static const struct drm_plane_funcs vc4_plane_funcs = {
1329
	.update_plane = drm_atomic_helper_update_plane,
1330 1331 1332 1333 1334 1335
	.disable_plane = drm_atomic_helper_disable_plane,
	.destroy = vc4_plane_destroy,
	.set_property = NULL,
	.reset = vc4_plane_reset,
	.atomic_duplicate_state = vc4_plane_duplicate_state,
	.atomic_destroy_state = vc4_plane_destroy_state,
1336
	.format_mod_supported = vc4_format_mod_supported,
1337 1338 1339 1340 1341 1342 1343 1344 1345 1346
};

struct drm_plane *vc4_plane_init(struct drm_device *dev,
				 enum drm_plane_type type)
{
	struct drm_plane *plane = NULL;
	struct vc4_plane *vc4_plane;
	u32 formats[ARRAY_SIZE(hvs_formats)];
	int ret = 0;
	unsigned i;
1347 1348
	static const uint64_t modifiers[] = {
		DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED,
1349 1350 1351
		DRM_FORMAT_MOD_BROADCOM_SAND128,
		DRM_FORMAT_MOD_BROADCOM_SAND64,
		DRM_FORMAT_MOD_BROADCOM_SAND256,
1352 1353 1354
		DRM_FORMAT_MOD_LINEAR,
		DRM_FORMAT_MOD_INVALID
	};
1355 1356 1357

	vc4_plane = devm_kzalloc(dev->dev, sizeof(*vc4_plane),
				 GFP_KERNEL);
1358 1359
	if (!vc4_plane)
		return ERR_PTR(-ENOMEM);
1360

1361 1362 1363
	for (i = 0; i < ARRAY_SIZE(hvs_formats); i++)
		formats[i] = hvs_formats[i].drm;

1364
	plane = &vc4_plane->base;
1365
	ret = drm_universal_plane_init(dev, plane, 0,
1366
				       &vc4_plane_funcs,
1367
				       formats, ARRAY_SIZE(formats),
1368
				       modifiers, type, NULL);
1369 1370
	if (ret)
		return ERR_PTR(ret);
1371 1372 1373

	drm_plane_helper_add(plane, &vc4_plane_helper_funcs);

1374
	drm_plane_create_alpha_property(plane);
1375 1376 1377 1378 1379
	drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0,
					   DRM_MODE_ROTATE_0 |
					   DRM_MODE_ROTATE_180 |
					   DRM_MODE_REFLECT_X |
					   DRM_MODE_REFLECT_Y);
1380

1381 1382
	return plane;
}
1383

1384
int vc4_plane_create_additional_planes(struct drm_device *drm)
1385 1386
{
	struct drm_plane *cursor_plane;
1387
	struct drm_crtc *crtc;
1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398
	unsigned int i;

	/* Set up some arbitrary number of planes.  We're not limited
	 * by a set number of physical registers, just the space in
	 * the HVS (16k) and how small an plane can be (28 bytes).
	 * However, each plane we set up takes up some memory, and
	 * increases the cost of looping over planes, which atomic
	 * modesetting does quite a bit.  As a result, we pick a
	 * modest number of planes to expose, that should hopefully
	 * still cover any sane usecase.
	 */
1399
	for (i = 0; i < 16; i++) {
1400 1401 1402 1403 1404 1405
		struct drm_plane *plane =
			vc4_plane_init(drm, DRM_PLANE_TYPE_OVERLAY);

		if (IS_ERR(plane))
			continue;

1406 1407
		plane->possible_crtcs =
			GENMASK(drm->mode_config.num_crtc - 1, 0);
1408 1409
	}

1410 1411 1412 1413 1414 1415 1416 1417 1418 1419
	drm_for_each_crtc(crtc, drm) {
		/* Set up the legacy cursor after overlay initialization,
		 * since we overlay planes on the CRTC in the order they were
		 * initialized.
		 */
		cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR);
		if (!IS_ERR(cursor_plane)) {
			cursor_plane->possible_crtcs = drm_crtc_mask(crtc);
			crtc->cursor = cursor_plane;
		}
1420 1421 1422 1423
	}

	return 0;
}