vc4_plane.c 41.0 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/*
 * Copyright (C) 2015 Broadcom
 */

/**
 * DOC: VC4 plane module
 *
 * Each DRM plane is a layer of pixels being scanned out by the HVS.
 *
 * At atomic modeset check time, we compute the HVS display element
 * state that would be necessary for displaying the plane (giving us a
 * chance to figure out if a plane configuration is invalid), then at
 * atomic flush time the CRTC will ask us to write our element state
 * into the region of the HVS that it has allocated for us.
 */

18 19
#include <drm/drm_atomic.h>
#include <drm/drm_atomic_helper.h>
D
Daniel Vetter 已提交
20
#include <drm/drm_atomic_uapi.h>
S
Sam Ravnborg 已提交
21 22
#include <drm/drm_fb_cma_helper.h>
#include <drm/drm_fourcc.h>
23
#include <drm/drm_gem_framebuffer_helper.h>
S
Sam Ravnborg 已提交
24
#include <drm/drm_plane_helper.h>
25

26
#include "uapi/drm/vc4_drm.h"
S
Sam Ravnborg 已提交
27

28 29 30 31 32 33 34
#include "vc4_drv.h"
#include "vc4_regs.h"

static const struct hvs_format {
	u32 drm; /* DRM_FORMAT_* */
	u32 hvs; /* HVS_FORMAT_* */
	u32 pixel_order;
35
	u32 pixel_order_hvs5;
36 37
} hvs_formats[] = {
	{
38 39
		.drm = DRM_FORMAT_XRGB8888,
		.hvs = HVS_PIXEL_FORMAT_RGBA8888,
40
		.pixel_order = HVS_PIXEL_ORDER_ABGR,
41
		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
42 43
	},
	{
44 45
		.drm = DRM_FORMAT_ARGB8888,
		.hvs = HVS_PIXEL_FORMAT_RGBA8888,
46
		.pixel_order = HVS_PIXEL_ORDER_ABGR,
47
		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
48
	},
49
	{
50 51
		.drm = DRM_FORMAT_ABGR8888,
		.hvs = HVS_PIXEL_FORMAT_RGBA8888,
52
		.pixel_order = HVS_PIXEL_ORDER_ARGB,
53
		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
54 55
	},
	{
56 57
		.drm = DRM_FORMAT_XBGR8888,
		.hvs = HVS_PIXEL_FORMAT_RGBA8888,
58
		.pixel_order = HVS_PIXEL_ORDER_ARGB,
59
		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
60
	},
61
	{
62 63
		.drm = DRM_FORMAT_RGB565,
		.hvs = HVS_PIXEL_FORMAT_RGB565,
64
		.pixel_order = HVS_PIXEL_ORDER_XRGB,
65 66
	},
	{
67 68
		.drm = DRM_FORMAT_BGR565,
		.hvs = HVS_PIXEL_FORMAT_RGB565,
69
		.pixel_order = HVS_PIXEL_ORDER_XBGR,
70 71
	},
	{
72 73
		.drm = DRM_FORMAT_ARGB1555,
		.hvs = HVS_PIXEL_FORMAT_RGBA5551,
74
		.pixel_order = HVS_PIXEL_ORDER_ABGR,
75 76
	},
	{
77 78
		.drm = DRM_FORMAT_XRGB1555,
		.hvs = HVS_PIXEL_FORMAT_RGBA5551,
79
		.pixel_order = HVS_PIXEL_ORDER_ABGR,
80
	},
81
	{
82 83
		.drm = DRM_FORMAT_RGB888,
		.hvs = HVS_PIXEL_FORMAT_RGB888,
84
		.pixel_order = HVS_PIXEL_ORDER_XRGB,
85 86
	},
	{
87 88
		.drm = DRM_FORMAT_BGR888,
		.hvs = HVS_PIXEL_FORMAT_RGB888,
89
		.pixel_order = HVS_PIXEL_ORDER_XBGR,
90
	},
91 92 93
	{
		.drm = DRM_FORMAT_YUV422,
		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
94
		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
95 96 97 98
	},
	{
		.drm = DRM_FORMAT_YVU422,
		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
99
		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
100 101 102 103
	},
	{
		.drm = DRM_FORMAT_YUV420,
		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
104
		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
105 106 107 108
	},
	{
		.drm = DRM_FORMAT_YVU420,
		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
109
		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
110 111 112 113
	},
	{
		.drm = DRM_FORMAT_NV12,
		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE,
114
		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
115
	},
116 117 118 119 120
	{
		.drm = DRM_FORMAT_NV21,
		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE,
		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
	},
121 122 123
	{
		.drm = DRM_FORMAT_NV16,
		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE,
124
		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
125
	},
126 127 128 129 130
	{
		.drm = DRM_FORMAT_NV61,
		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE,
		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
	},
131 132 133 134 135 136 137 138 139 140 141 142 143 144
};

static const struct hvs_format *vc4_get_hvs_format(u32 drm_format)
{
	unsigned i;

	for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) {
		if (hvs_formats[i].drm == drm_format)
			return &hvs_formats[i];
	}

	return NULL;
}

145 146
static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst)
{
147 148 149
	if (dst == src)
		return VC4_SCALING_NONE;
	if (3 * dst >= 2 * src)
150 151
		return VC4_SCALING_PPF;
	else
152
		return VC4_SCALING_TPZ;
153 154
}

155 156
static bool plane_enabled(struct drm_plane_state *state)
{
157
	return state->fb && !WARN_ON(!state->crtc);
158 159
}

160
static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane)
161 162 163 164 165 166 167 168 169 170
{
	struct vc4_plane_state *vc4_state;

	if (WARN_ON(!plane->state))
		return NULL;

	vc4_state = kmemdup(plane->state, sizeof(*vc4_state), GFP_KERNEL);
	if (!vc4_state)
		return NULL;

171
	memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm));
172
	vc4_state->dlist_initialized = 0;
173

174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
	__drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base);

	if (vc4_state->dlist) {
		vc4_state->dlist = kmemdup(vc4_state->dlist,
					   vc4_state->dlist_count * 4,
					   GFP_KERNEL);
		if (!vc4_state->dlist) {
			kfree(vc4_state);
			return NULL;
		}
		vc4_state->dlist_size = vc4_state->dlist_count;
	}

	return &vc4_state->base;
}

190 191
static void vc4_plane_destroy_state(struct drm_plane *plane,
				    struct drm_plane_state *state)
192
{
193
	struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
194 195
	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);

196
	if (drm_mm_node_allocated(&vc4_state->lbm)) {
197 198 199 200 201 202 203
		unsigned long irqflags;

		spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
		drm_mm_remove_node(&vc4_state->lbm);
		spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
	}

204
	kfree(vc4_state->dlist);
205
	__drm_atomic_helper_plane_destroy_state(&vc4_state->base);
206 207 208 209
	kfree(state);
}

/* Called during init to allocate the plane's atomic state. */
210
static void vc4_plane_reset(struct drm_plane *plane)
211 212 213 214 215 216 217 218 219
{
	struct vc4_plane_state *vc4_state;

	WARN_ON(plane->state);

	vc4_state = kzalloc(sizeof(*vc4_state), GFP_KERNEL);
	if (!vc4_state)
		return;

220
	__drm_atomic_helper_plane_reset(plane, &vc4_state->base);
221 222 223 224 225 226
}

static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val)
{
	if (vc4_state->dlist_count == vc4_state->dlist_size) {
		u32 new_size = max(4u, vc4_state->dlist_count * 2);
227
		u32 *new_dlist = kmalloc_array(new_size, 4, GFP_KERNEL);
228 229 230 231 232 233 234 235 236 237 238 239 240

		if (!new_dlist)
			return;
		memcpy(new_dlist, vc4_state->dlist, vc4_state->dlist_count * 4);

		kfree(vc4_state->dlist);
		vc4_state->dlist = new_dlist;
		vc4_state->dlist_size = new_size;
	}

	vc4_state->dlist[vc4_state->dlist_count++] = val;
}

241 242 243 244 245
/* Returns the scl0/scl1 field based on whether the dimensions need to
 * be up/down/non-scaled.
 *
 * This is a replication of a table from the spec.
 */
246
static u32 vc4_get_scl_field(struct drm_plane_state *state, int plane)
247 248 249
{
	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);

250
	switch (vc4_state->x_scaling[plane] << 2 | vc4_state->y_scaling[plane]) {
251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
	case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF:
		return SCALER_CTL0_SCL_H_PPF_V_PPF;
	case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF:
		return SCALER_CTL0_SCL_H_TPZ_V_PPF;
	case VC4_SCALING_PPF << 2 | VC4_SCALING_TPZ:
		return SCALER_CTL0_SCL_H_PPF_V_TPZ;
	case VC4_SCALING_TPZ << 2 | VC4_SCALING_TPZ:
		return SCALER_CTL0_SCL_H_TPZ_V_TPZ;
	case VC4_SCALING_PPF << 2 | VC4_SCALING_NONE:
		return SCALER_CTL0_SCL_H_PPF_V_NONE;
	case VC4_SCALING_NONE << 2 | VC4_SCALING_PPF:
		return SCALER_CTL0_SCL_H_NONE_V_PPF;
	case VC4_SCALING_NONE << 2 | VC4_SCALING_TPZ:
		return SCALER_CTL0_SCL_H_NONE_V_TPZ;
	case VC4_SCALING_TPZ << 2 | VC4_SCALING_NONE:
		return SCALER_CTL0_SCL_H_TPZ_V_NONE;
	default:
	case VC4_SCALING_NONE << 2 | VC4_SCALING_NONE:
		/* The unity case is independently handled by
		 * SCALER_CTL0_UNITY.
		 */
		return 0;
	}
}

276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321
static int vc4_plane_margins_adj(struct drm_plane_state *pstate)
{
	struct vc4_plane_state *vc4_pstate = to_vc4_plane_state(pstate);
	unsigned int left, right, top, bottom, adjhdisplay, adjvdisplay;
	struct drm_crtc_state *crtc_state;

	crtc_state = drm_atomic_get_new_crtc_state(pstate->state,
						   pstate->crtc);

	vc4_crtc_get_margins(crtc_state, &left, &right, &top, &bottom);
	if (!left && !right && !top && !bottom)
		return 0;

	if (left + right >= crtc_state->mode.hdisplay ||
	    top + bottom >= crtc_state->mode.vdisplay)
		return -EINVAL;

	adjhdisplay = crtc_state->mode.hdisplay - (left + right);
	vc4_pstate->crtc_x = DIV_ROUND_CLOSEST(vc4_pstate->crtc_x *
					       adjhdisplay,
					       crtc_state->mode.hdisplay);
	vc4_pstate->crtc_x += left;
	if (vc4_pstate->crtc_x > crtc_state->mode.hdisplay - left)
		vc4_pstate->crtc_x = crtc_state->mode.hdisplay - left;

	adjvdisplay = crtc_state->mode.vdisplay - (top + bottom);
	vc4_pstate->crtc_y = DIV_ROUND_CLOSEST(vc4_pstate->crtc_y *
					       adjvdisplay,
					       crtc_state->mode.vdisplay);
	vc4_pstate->crtc_y += top;
	if (vc4_pstate->crtc_y > crtc_state->mode.vdisplay - top)
		vc4_pstate->crtc_y = crtc_state->mode.vdisplay - top;

	vc4_pstate->crtc_w = DIV_ROUND_CLOSEST(vc4_pstate->crtc_w *
					       adjhdisplay,
					       crtc_state->mode.hdisplay);
	vc4_pstate->crtc_h = DIV_ROUND_CLOSEST(vc4_pstate->crtc_h *
					       adjvdisplay,
					       crtc_state->mode.vdisplay);

	if (!vc4_pstate->crtc_w || !vc4_pstate->crtc_h)
		return -EINVAL;

	return 0;
}

322
static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
323 324 325
{
	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
	struct drm_framebuffer *fb = state->fb;
326
	struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
327
	u32 subpixel_src_mask = (1 << 16) - 1;
328
	int num_planes = fb->format->num_planes;
329
	struct drm_crtc_state *crtc_state;
330 331
	u32 h_subsample = fb->format->hsub;
	u32 v_subsample = fb->format->vsub;
332 333 334 335 336 337 338 339 340
	int i, ret;

	crtc_state = drm_atomic_get_existing_crtc_state(state->state,
							state->crtc);
	if (!crtc_state) {
		DRM_DEBUG_KMS("Invalid crtc state\n");
		return -EINVAL;
	}

341 342
	ret = drm_atomic_helper_check_plane_state(state, crtc_state, 1,
						  INT_MAX, true, true);
343 344 345
	if (ret)
		return ret;

346 347
	for (i = 0; i < num_planes; i++)
		vc4_state->offsets[i] = bo->paddr + fb->offsets[i];
348

349
	/* We don't support subpixel source positioning for scaling. */
350 351 352 353
	if ((state->src.x1 & subpixel_src_mask) ||
	    (state->src.x2 & subpixel_src_mask) ||
	    (state->src.y1 & subpixel_src_mask) ||
	    (state->src.y2 & subpixel_src_mask)) {
354 355 356
		return -EINVAL;
	}

357 358 359 360
	vc4_state->src_x = state->src.x1 >> 16;
	vc4_state->src_y = state->src.y1 >> 16;
	vc4_state->src_w[0] = (state->src.x2 - state->src.x1) >> 16;
	vc4_state->src_h[0] = (state->src.y2 - state->src.y1) >> 16;
361

362 363 364 365
	vc4_state->crtc_x = state->dst.x1;
	vc4_state->crtc_y = state->dst.y1;
	vc4_state->crtc_w = state->dst.x2 - state->dst.x1;
	vc4_state->crtc_h = state->dst.y2 - state->dst.y1;
366

367 368 369 370
	ret = vc4_plane_margins_adj(state);
	if (ret)
		return ret;

371 372 373 374 375
	vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0],
						       vc4_state->crtc_w);
	vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0],
						       vc4_state->crtc_h);

376 377 378
	vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE &&
			       vc4_state->y_scaling[0] == VC4_SCALING_NONE);

379 380 381 382 383 384 385 386 387 388 389 390 391
	if (num_planes > 1) {
		vc4_state->is_yuv = true;

		vc4_state->src_w[1] = vc4_state->src_w[0] / h_subsample;
		vc4_state->src_h[1] = vc4_state->src_h[0] / v_subsample;

		vc4_state->x_scaling[1] =
			vc4_get_scaling_mode(vc4_state->src_w[1],
					     vc4_state->crtc_w);
		vc4_state->y_scaling[1] =
			vc4_get_scaling_mode(vc4_state->src_h[1],
					     vc4_state->crtc_h);

392 393 394 395 396
		/* YUV conversion requires that horizontal scaling be enabled
		 * on the UV plane even if vc4_get_scaling_mode() returned
		 * VC4_SCALING_NONE (which can happen when the down-scaling
		 * ratio is 0.5). Let's force it to VC4_SCALING_PPF in this
		 * case.
397
		 */
398 399
		if (vc4_state->x_scaling[1] == VC4_SCALING_NONE)
			vc4_state->x_scaling[1] = VC4_SCALING_PPF;
400
	} else {
401
		vc4_state->is_yuv = false;
402 403
		vc4_state->x_scaling[1] = VC4_SCALING_NONE;
		vc4_state->y_scaling[1] = VC4_SCALING_NONE;
404 405
	}

406 407 408
	return 0;
}

409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442
static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
{
	u32 scale, recip;

	scale = (1 << 16) * src / dst;

	/* The specs note that while the reciprocal would be defined
	 * as (1<<32)/scale, ~0 is close enough.
	 */
	recip = ~0 / scale;

	vc4_dlist_write(vc4_state,
			VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) |
			VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE));
	vc4_dlist_write(vc4_state,
			VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP));
}

static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
{
	u32 scale = (1 << 16) * src / dst;

	vc4_dlist_write(vc4_state,
			SCALER_PPF_AGC |
			VC4_SET_FIELD(scale, SCALER_PPF_SCALE) |
			VC4_SET_FIELD(0, SCALER_PPF_IPHASE));
}

static u32 vc4_lbm_size(struct drm_plane_state *state)
{
	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
	/* This is the worst case number.  One of the two sizes will
	 * be used depending on the scaling configuration.
	 */
443
	u32 pix_per_line = max(vc4_state->src_w[0], (u32)vc4_state->crtc_w);
444 445
	u32 lbm;

446 447 448 449 450
	/* LBM is not needed when there's no vertical scaling. */
	if (vc4_state->y_scaling[0] == VC4_SCALING_NONE &&
	    vc4_state->y_scaling[1] == VC4_SCALING_NONE)
		return 0;

451
	if (!vc4_state->is_yuv) {
452
		if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ)
453 454 455 456 457 458 459 460 461 462
			lbm = pix_per_line * 8;
		else {
			/* In special cases, this multiplier might be 12. */
			lbm = pix_per_line * 16;
		}
	} else {
		/* There are cases for this going down to a multiplier
		 * of 2, but according to the firmware source, the
		 * table in the docs is somewhat wrong.
		 */
463 464 465 466 467 468 469 470
		lbm = pix_per_line * 16;
	}

	lbm = roundup(lbm, 32);

	return lbm;
}

471 472
static void vc4_write_scaling_parameters(struct drm_plane_state *state,
					 int channel)
473 474 475 476
{
	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);

	/* Ch0 H-PPF Word 0: Scaling Parameters */
477
	if (vc4_state->x_scaling[channel] == VC4_SCALING_PPF) {
478
		vc4_write_ppf(vc4_state,
479
			      vc4_state->src_w[channel], vc4_state->crtc_w);
480 481 482
	}

	/* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */
483
	if (vc4_state->y_scaling[channel] == VC4_SCALING_PPF) {
484
		vc4_write_ppf(vc4_state,
485
			      vc4_state->src_h[channel], vc4_state->crtc_h);
486 487 488 489
		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
	}

	/* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */
490
	if (vc4_state->x_scaling[channel] == VC4_SCALING_TPZ) {
491
		vc4_write_tpz(vc4_state,
492
			      vc4_state->src_w[channel], vc4_state->crtc_w);
493 494 495
	}

	/* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */
496
	if (vc4_state->y_scaling[channel] == VC4_SCALING_TPZ) {
497
		vc4_write_tpz(vc4_state,
498
			      vc4_state->src_h[channel], vc4_state->crtc_h);
499 500 501
		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
	}
}
502

503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557
static void vc4_plane_calc_load(struct drm_plane_state *state)
{
	unsigned int hvs_load_shift, vrefresh, i;
	struct drm_framebuffer *fb = state->fb;
	struct vc4_plane_state *vc4_state;
	struct drm_crtc_state *crtc_state;
	unsigned int vscale_factor;

	vc4_state = to_vc4_plane_state(state);
	crtc_state = drm_atomic_get_existing_crtc_state(state->state,
							state->crtc);
	vrefresh = drm_mode_vrefresh(&crtc_state->adjusted_mode);

	/* The HVS is able to process 2 pixels/cycle when scaling the source,
	 * 4 pixels/cycle otherwise.
	 * Alpha blending step seems to be pipelined and it's always operating
	 * at 4 pixels/cycle, so the limiting aspect here seems to be the
	 * scaler block.
	 * HVS load is expressed in clk-cycles/sec (AKA Hz).
	 */
	if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
	    vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
	    vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
	    vc4_state->y_scaling[1] != VC4_SCALING_NONE)
		hvs_load_shift = 1;
	else
		hvs_load_shift = 2;

	vc4_state->membus_load = 0;
	vc4_state->hvs_load = 0;
	for (i = 0; i < fb->format->num_planes; i++) {
		/* Even if the bandwidth/plane required for a single frame is
		 *
		 * vc4_state->src_w[i] * vc4_state->src_h[i] * cpp * vrefresh
		 *
		 * when downscaling, we have to read more pixels per line in
		 * the time frame reserved for a single line, so the bandwidth
		 * demand can be punctually higher. To account for that, we
		 * calculate the down-scaling factor and multiply the plane
		 * load by this number. We're likely over-estimating the read
		 * demand, but that's better than under-estimating it.
		 */
		vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i],
					     vc4_state->crtc_h);
		vc4_state->membus_load += vc4_state->src_w[i] *
					  vc4_state->src_h[i] * vscale_factor *
					  fb->format->cpp[i];
		vc4_state->hvs_load += vc4_state->crtc_h * vc4_state->crtc_w;
	}

	vc4_state->hvs_load *= vrefresh;
	vc4_state->hvs_load >>= hvs_load_shift;
	vc4_state->membus_load *= vrefresh;
}

558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574
static int vc4_plane_allocate_lbm(struct drm_plane_state *state)
{
	struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
	unsigned long irqflags;
	u32 lbm_size;

	lbm_size = vc4_lbm_size(state);
	if (!lbm_size)
		return 0;

	if (WARN_ON(!vc4_state->lbm_offset))
		return -EINVAL;

	/* Allocate the LBM memory that the HVS will use for temporary
	 * storage due to our scaling/format conversion.
	 */
575
	if (!drm_mm_node_allocated(&vc4_state->lbm)) {
576 577 578 579 580
		int ret;

		spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
		ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm,
						 &vc4_state->lbm,
581 582 583
						 lbm_size,
						 vc4->hvs->hvs5 ? 64 : 32,
						 0, 0);
584 585 586 587 588 589 590 591 592 593 594 595 596
		spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);

		if (ret)
			return ret;
	} else {
		WARN_ON_ONCE(lbm_size != vc4_state->lbm.size);
	}

	vc4_state->dlist[vc4_state->lbm_offset] = vc4_state->lbm.start;

	return 0;
}

597 598 599 600 601 602
/* Writes out a full display list for an active plane to the plane's
 * private dlist state.
 */
static int vc4_plane_mode_set(struct drm_plane *plane,
			      struct drm_plane_state *state)
{
603
	struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
604 605 606
	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
	struct drm_framebuffer *fb = state->fb;
	u32 ctl0_offset = vc4_state->dlist_count;
V
Ville Syrjälä 已提交
607
	const struct hvs_format *format = vc4_get_hvs_format(fb->format->format);
608
	u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier);
609
	int num_planes = fb->format->num_planes;
610 611
	u32 h_subsample = fb->format->hsub;
	u32 v_subsample = fb->format->vsub;
612
	bool mix_plane_alpha;
613
	bool covers_screen;
614
	u32 scl0, scl1, pitch0;
615
	u32 tiling, src_y;
616
	u32 hvs_format = format->hvs;
617
	unsigned int rotation;
618
	int ret, i;
619

620 621 622
	if (vc4_state->dlist_initialized)
		return 0;

623
	ret = vc4_plane_setup_clipping_and_scaling(state);
624 625 626
	if (ret)
		return ret;

627 628 629 630 631 632 633
	/* SCL1 is used for Cb/Cr scaling of planar formats.  For RGB
	 * and 4:4:4, scl1 should be set to scl0 so both channels of
	 * the scaler do the same thing.  For YUV, the Y plane needs
	 * to be put in channel 1 and Cb/Cr in channel 0, so we swap
	 * the scl fields here.
	 */
	if (num_planes == 1) {
634
		scl0 = vc4_get_scl_field(state, 0);
635 636 637 638 639
		scl1 = scl0;
	} else {
		scl0 = vc4_get_scl_field(state, 1);
		scl1 = vc4_get_scl_field(state, 0);
	}
640

641 642 643 644 645 646 647 648 649 650
	rotation = drm_rotation_simplify(state->rotation,
					 DRM_MODE_ROTATE_0 |
					 DRM_MODE_REFLECT_X |
					 DRM_MODE_REFLECT_Y);

	/* We must point to the last line when Y reflection is enabled. */
	src_y = vc4_state->src_y;
	if (rotation & DRM_MODE_REFLECT_Y)
		src_y += vc4_state->src_h[0] - 1;

651
	switch (base_format_mod) {
652 653 654
	case DRM_FORMAT_MOD_LINEAR:
		tiling = SCALER_CTL0_TILING_LINEAR;
		pitch0 = VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH);
655 656 657 658 659

		/* Adjust the base pointer to the first pixel to be scanned
		 * out.
		 */
		for (i = 0; i < num_planes; i++) {
660
			vc4_state->offsets[i] += src_y /
661 662
						 (i ? v_subsample : 1) *
						 fb->pitches[i];
663

664 665 666 667
			vc4_state->offsets[i] += vc4_state->src_x /
						 (i ? h_subsample : 1) *
						 fb->format->cpp[i];
		}
668

669
		break;
670 671 672

	case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: {
		u32 tile_size_shift = 12; /* T tiles are 4kb */
673 674
		/* Whole-tile offsets, mostly for setting the pitch. */
		u32 tile_w_shift = fb->format->cpp[0] == 2 ? 6 : 5;
675
		u32 tile_h_shift = 5; /* 16 and 32bpp are 32 pixels high */
676 677 678 679 680 681 682 683 684 685
		u32 tile_w_mask = (1 << tile_w_shift) - 1;
		/* The height mask on 32-bit-per-pixel tiles is 63, i.e. twice
		 * the height (in pixels) of a 4k tile.
		 */
		u32 tile_h_mask = (2 << tile_h_shift) - 1;
		/* For T-tiled, the FB pitch is "how many bytes from one row to
		 * the next, such that
		 *
		 *	pitch * tile_h == tile_size * tiles_per_row
		 */
686
		u32 tiles_w = fb->pitches[0] >> (tile_size_shift - tile_h_shift);
687 688
		u32 tiles_l = vc4_state->src_x >> tile_w_shift;
		u32 tiles_r = tiles_w - tiles_l;
689
		u32 tiles_t = src_y >> tile_h_shift;
690 691 692 693
		/* Intra-tile offsets, which modify the base address (the
		 * SCALER_PITCH0_TILE_Y_OFFSET tells HVS how to walk from that
		 * base address).
		 */
694 695 696
		u32 tile_y = (src_y >> 4) & 1;
		u32 subtile_y = (src_y >> 2) & 3;
		u32 utile_y = src_y & 3;
697
		u32 x_off = vc4_state->src_x & tile_w_mask;
698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714
		u32 y_off = src_y & tile_h_mask;

		/* When Y reflection is requested we must set the
		 * SCALER_PITCH0_TILE_LINE_DIR flag to tell HVS that all lines
		 * after the initial one should be fetched in descending order,
		 * which makes sense since we start from the last line and go
		 * backward.
		 * Don't know why we need y_off = max_y_off - y_off, but it's
		 * definitely required (I guess it's also related to the "going
		 * backward" situation).
		 */
		if (rotation & DRM_MODE_REFLECT_Y) {
			y_off = tile_h_mask - y_off;
			pitch0 = SCALER_PITCH0_TILE_LINE_DIR;
		} else {
			pitch0 = 0;
		}
715

716
		tiling = SCALER_CTL0_TILING_256B_OR_T;
717 718 719 720
		pitch0 |= (VC4_SET_FIELD(x_off, SCALER_PITCH0_SINK_PIX) |
			   VC4_SET_FIELD(y_off, SCALER_PITCH0_TILE_Y_OFFSET) |
			   VC4_SET_FIELD(tiles_l, SCALER_PITCH0_TILE_WIDTH_L) |
			   VC4_SET_FIELD(tiles_r, SCALER_PITCH0_TILE_WIDTH_R));
721 722 723 724 725 726 727 728 729 730 731 732 733 734
		vc4_state->offsets[0] += tiles_t * (tiles_w << tile_size_shift);
		vc4_state->offsets[0] += subtile_y << 8;
		vc4_state->offsets[0] += utile_y << 4;

		/* Rows of tiles alternate left-to-right and right-to-left. */
		if (tiles_t & 1) {
			pitch0 |= SCALER_PITCH0_TILE_INITIAL_LINE_DIR;
			vc4_state->offsets[0] += (tiles_w - tiles_l) <<
						 tile_size_shift;
			vc4_state->offsets[0] -= (1 + !tile_y) << 10;
		} else {
			vc4_state->offsets[0] += tiles_l << tile_size_shift;
			vc4_state->offsets[0] += tile_y << 10;
		}
735 736

		break;
737 738
	}

739 740 741 742
	case DRM_FORMAT_MOD_BROADCOM_SAND64:
	case DRM_FORMAT_MOD_BROADCOM_SAND128:
	case DRM_FORMAT_MOD_BROADCOM_SAND256: {
		uint32_t param = fourcc_mod_broadcom_param(fb->modifier);
743
		u32 tile_w, tile, x_off, pix_per_tile;
744

745
		hvs_format = HVS_PIXEL_FORMAT_H264;
746 747 748 749

		switch (base_format_mod) {
		case DRM_FORMAT_MOD_BROADCOM_SAND64:
			tiling = SCALER_CTL0_TILING_64B;
750
			tile_w = 64;
751 752 753
			break;
		case DRM_FORMAT_MOD_BROADCOM_SAND128:
			tiling = SCALER_CTL0_TILING_128B;
754
			tile_w = 128;
755 756 757
			break;
		case DRM_FORMAT_MOD_BROADCOM_SAND256:
			tiling = SCALER_CTL0_TILING_256B_OR_T;
758
			tile_w = 256;
759 760 761 762 763 764 765 766 767 768
			break;
		default:
			break;
		}

		if (param > SCALER_TILE_HEIGHT_MASK) {
			DRM_DEBUG_KMS("SAND height too large (%d)\n", param);
			return -EINVAL;
		}

769 770 771 772 773 774 775 776 777
		pix_per_tile = tile_w / fb->format->cpp[0];
		tile = vc4_state->src_x / pix_per_tile;
		x_off = vc4_state->src_x % pix_per_tile;

		/* Adjust the base pointer to the first pixel to be scanned
		 * out.
		 */
		for (i = 0; i < num_planes; i++) {
			vc4_state->offsets[i] += param * tile_w * tile;
778
			vc4_state->offsets[i] += src_y /
779 780 781 782 783 784 785
						 (i ? v_subsample : 1) *
						 tile_w;
			vc4_state->offsets[i] += x_off /
						 (i ? h_subsample : 1) *
						 fb->format->cpp[i];
		}

786 787 788 789
		pitch0 = VC4_SET_FIELD(param, SCALER_TILE_HEIGHT);
		break;
	}

790 791 792 793 794 795
	default:
		DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx",
			      (long long)fb->modifier);
		return -EINVAL;
	}

796 797 798 799 800 801 802
	/* Don't waste cycles mixing with plane alpha if the set alpha
	 * is opaque or there is no per-pixel alpha information.
	 * In any case we use the alpha property value as the fixed alpha.
	 */
	mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE &&
			  fb->format->has_alpha;

803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852
	if (!vc4->hvs->hvs5) {
	/* Control word */
		vc4_dlist_write(vc4_state,
				SCALER_CTL0_VALID |
				(rotation & DRM_MODE_REFLECT_X ? SCALER_CTL0_HFLIP : 0) |
				(rotation & DRM_MODE_REFLECT_Y ? SCALER_CTL0_VFLIP : 0) |
				VC4_SET_FIELD(SCALER_CTL0_RGBA_EXPAND_ROUND, SCALER_CTL0_RGBA_EXPAND) |
				(format->pixel_order << SCALER_CTL0_ORDER_SHIFT) |
				(hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
				VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) |
				(vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) |
				VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
				VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1));

		/* Position Word 0: Image Positions and Alpha Value */
		vc4_state->pos0_offset = vc4_state->dlist_count;
		vc4_dlist_write(vc4_state,
				VC4_SET_FIELD(state->alpha >> 8, SCALER_POS0_FIXED_ALPHA) |
				VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) |
				VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y));

		/* Position Word 1: Scaled Image Dimensions. */
		if (!vc4_state->is_unity) {
			vc4_dlist_write(vc4_state,
					VC4_SET_FIELD(vc4_state->crtc_w,
						      SCALER_POS1_SCL_WIDTH) |
					VC4_SET_FIELD(vc4_state->crtc_h,
						      SCALER_POS1_SCL_HEIGHT));
		}

		/* Position Word 2: Source Image Size, Alpha */
		vc4_state->pos2_offset = vc4_state->dlist_count;
		vc4_dlist_write(vc4_state,
				VC4_SET_FIELD(fb->format->has_alpha ?
					      SCALER_POS2_ALPHA_MODE_PIPELINE :
					      SCALER_POS2_ALPHA_MODE_FIXED,
					      SCALER_POS2_ALPHA_MODE) |
				(mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) |
				(fb->format->has_alpha ?
						SCALER_POS2_ALPHA_PREMULT : 0) |
				VC4_SET_FIELD(vc4_state->src_w[0],
					      SCALER_POS2_WIDTH) |
				VC4_SET_FIELD(vc4_state->src_h[0],
					      SCALER_POS2_HEIGHT));

		/* Position Word 3: Context.  Written by the HVS. */
		vc4_dlist_write(vc4_state, 0xc0c0c0c0);

	} else {
		u32 hvs_pixel_order = format->pixel_order;
853

854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916
		if (format->pixel_order_hvs5)
			hvs_pixel_order = format->pixel_order_hvs5;

		/* Control word */
		vc4_dlist_write(vc4_state,
				SCALER_CTL0_VALID |
				(hvs_pixel_order << SCALER_CTL0_ORDER_SHIFT) |
				(hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
				VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) |
				(vc4_state->is_unity ?
						SCALER5_CTL0_UNITY : 0) |
				VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
				VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1) |
				SCALER5_CTL0_ALPHA_EXPAND |
				SCALER5_CTL0_RGB_EXPAND);

		/* Position Word 0: Image Positions and Alpha Value */
		vc4_state->pos0_offset = vc4_state->dlist_count;
		vc4_dlist_write(vc4_state,
				(rotation & DRM_MODE_REFLECT_Y ?
						SCALER5_POS0_VFLIP : 0) |
				VC4_SET_FIELD(vc4_state->crtc_x,
					      SCALER_POS0_START_X) |
				(rotation & DRM_MODE_REFLECT_X ?
					      SCALER5_POS0_HFLIP : 0) |
				VC4_SET_FIELD(vc4_state->crtc_y,
					      SCALER5_POS0_START_Y)
			       );

		/* Control Word 2 */
		vc4_dlist_write(vc4_state,
				VC4_SET_FIELD(state->alpha >> 4,
					      SCALER5_CTL2_ALPHA) |
				fb->format->has_alpha ?
					SCALER5_CTL2_ALPHA_PREMULT : 0 |
				(mix_plane_alpha ?
					SCALER5_CTL2_ALPHA_MIX : 0) |
				VC4_SET_FIELD(fb->format->has_alpha ?
				      SCALER5_CTL2_ALPHA_MODE_PIPELINE :
				      SCALER5_CTL2_ALPHA_MODE_FIXED,
				      SCALER5_CTL2_ALPHA_MODE)
			       );

		/* Position Word 1: Scaled Image Dimensions. */
		if (!vc4_state->is_unity) {
			vc4_dlist_write(vc4_state,
					VC4_SET_FIELD(vc4_state->crtc_w,
						      SCALER_POS1_SCL_WIDTH) |
					VC4_SET_FIELD(vc4_state->crtc_h,
						      SCALER_POS1_SCL_HEIGHT));
		}

		/* Position Word 2: Source Image Size */
		vc4_state->pos2_offset = vc4_state->dlist_count;
		vc4_dlist_write(vc4_state,
				VC4_SET_FIELD(vc4_state->src_w[0],
					      SCALER5_POS2_WIDTH) |
				VC4_SET_FIELD(vc4_state->src_h[0],
					      SCALER5_POS2_HEIGHT));

		/* Position Word 3: Context.  Written by the HVS. */
		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
	}
917

918 919 920 921 922

	/* Pointer Word 0/1/2: RGB / Y / Cb / Cr Pointers
	 *
	 * The pointers may be any byte address.
	 */
923
	vc4_state->ptr0_offset = vc4_state->dlist_count;
924 925
	for (i = 0; i < num_planes; i++)
		vc4_dlist_write(vc4_state, vc4_state->offsets[i]);
926

927 928 929
	/* Pointer Context Word 0/1/2: Written by the HVS */
	for (i = 0; i < num_planes; i++)
		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
930

931 932 933 934 935
	/* Pitch word 0 */
	vc4_dlist_write(vc4_state, pitch0);

	/* Pitch word 1/2 */
	for (i = 1; i < num_planes; i++) {
936 937 938 939 940 941 942
		if (hvs_format != HVS_PIXEL_FORMAT_H264) {
			vc4_dlist_write(vc4_state,
					VC4_SET_FIELD(fb->pitches[i],
						      SCALER_SRC_PITCH));
		} else {
			vc4_dlist_write(vc4_state, pitch0);
		}
943 944 945 946 947 948 949 950
	}

	/* Colorspace conversion words */
	if (vc4_state->is_yuv) {
		vc4_dlist_write(vc4_state, SCALER_CSC0_ITR_R_601_5);
		vc4_dlist_write(vc4_state, SCALER_CSC1_ITR_R_601_5);
		vc4_dlist_write(vc4_state, SCALER_CSC2_ITR_R_601_5);
	}
951

952 953
	vc4_state->lbm_offset = 0;

954 955 956 957
	if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
	    vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
	    vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
	    vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
958 959 960
		/* Reserve a slot for the LBM Base Address. The real value will
		 * be set when calling vc4_plane_allocate_lbm().
		 */
961
		if (vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
962 963
		    vc4_state->y_scaling[1] != VC4_SCALING_NONE)
			vc4_state->lbm_offset = vc4_state->dlist_count++;
964

965 966 967 968 969 970 971 972
		if (num_planes > 1) {
			/* Emit Cb/Cr as channel 0 and Y as channel
			 * 1. This matches how we set up scl0/scl1
			 * above.
			 */
			vc4_write_scaling_parameters(state, 1);
		}
		vc4_write_scaling_parameters(state, 0);
973 974 975 976

		/* If any PPF setup was done, then all the kernel
		 * pointers get uploaded.
		 */
977 978 979 980
		if (vc4_state->x_scaling[0] == VC4_SCALING_PPF ||
		    vc4_state->y_scaling[0] == VC4_SCALING_PPF ||
		    vc4_state->x_scaling[1] == VC4_SCALING_PPF ||
		    vc4_state->y_scaling[1] == VC4_SCALING_PPF) {
981 982 983 984 985 986 987 988 989 990 991 992 993 994
			u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start,
						   SCALER_PPF_KERNEL_OFFSET);

			/* HPPF plane 0 */
			vc4_dlist_write(vc4_state, kernel);
			/* VPPF plane 0 */
			vc4_dlist_write(vc4_state, kernel);
			/* HPPF plane 1 */
			vc4_dlist_write(vc4_state, kernel);
			/* VPPF plane 1 */
			vc4_dlist_write(vc4_state, kernel);
		}
	}

995 996 997
	vc4_state->dlist[ctl0_offset] |=
		VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE);

998 999 1000 1001 1002
	/* crtc_* are already clipped coordinates. */
	covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 &&
			vc4_state->crtc_w == state->crtc->mode.hdisplay &&
			vc4_state->crtc_h == state->crtc->mode.vdisplay;
	/* Background fill might be necessary when the plane has per-pixel
1003 1004
	 * alpha content or a non-opaque plane alpha and could blend from the
	 * background or does not cover the entire screen.
1005
	 */
1006 1007
	vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen ||
				   state->alpha != DRM_BLEND_ALPHA_OPAQUE;
1008

1009 1010 1011 1012 1013 1014 1015
	/* Flag the dlist as initialized to avoid checking it twice in case
	 * the async update check already called vc4_plane_mode_set() and
	 * decided to fallback to sync update because async update was not
	 * possible.
	 */
	vc4_state->dlist_initialized = 1;

1016 1017
	vc4_plane_calc_load(state);

1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031
	return 0;
}

/* If a modeset involves changing the setup of a plane, the atomic
 * infrastructure will call this to validate a proposed plane setup.
 * However, if a plane isn't getting updated, this (and the
 * corresponding vc4_plane_atomic_update) won't get called.  Thus, we
 * compute the dlist here and have all active plane dlists get updated
 * in the CRTC's flush.
 */
static int vc4_plane_atomic_check(struct drm_plane *plane,
				  struct drm_plane_state *state)
{
	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
1032
	int ret;
1033 1034 1035

	vc4_state->dlist_count = 0;

1036
	if (!plane_enabled(state))
1037
		return 0;
1038 1039 1040 1041 1042 1043

	ret = vc4_plane_mode_set(plane, state);
	if (ret)
		return ret;

	return vc4_plane_allocate_lbm(state);
1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060
}

static void vc4_plane_atomic_update(struct drm_plane *plane,
				    struct drm_plane_state *old_state)
{
	/* No contents here.  Since we don't know where in the CRTC's
	 * dlist we should be stored, our dlist is uploaded to the
	 * hardware with vc4_plane_write_dlist() at CRTC atomic_flush
	 * time.
	 */
}

u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist)
{
	struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
	int i;

1061 1062
	vc4_state->hw_dlist = dlist;

1063 1064 1065 1066 1067 1068 1069
	/* Can't memcpy_toio() because it needs to be 32-bit writes. */
	for (i = 0; i < vc4_state->dlist_count; i++)
		writel(vc4_state->dlist[i], &dlist[i]);

	return vc4_state->dlist_count;
}

1070
u32 vc4_plane_dlist_size(const struct drm_plane_state *state)
1071
{
1072 1073
	const struct vc4_plane_state *vc4_state =
		container_of(state, typeof(*vc4_state), base);
1074 1075 1076 1077

	return vc4_state->dlist_count;
}

1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096
/* Updates the plane to immediately (well, once the FIFO needs
 * refilling) scan out from at a new framebuffer.
 */
void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb)
{
	struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
	struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
	uint32_t addr;

	/* We're skipping the address adjustment for negative origin,
	 * because this is only called on the primary plane.
	 */
	WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0);
	addr = bo->paddr + fb->offsets[0];

	/* Write the new address into the hardware immediately.  The
	 * scanout will start from this address as soon as the FIFO
	 * needs to refill with pixels.
	 */
1097
	writel(addr, &vc4_state->hw_dlist[vc4_state->ptr0_offset]);
1098 1099 1100 1101 1102

	/* Also update the CPU-side dlist copy, so that any later
	 * atomic updates that don't do a new modeset on our plane
	 * also use our updated address.
	 */
1103
	vc4_state->dlist[vc4_state->ptr0_offset] = addr;
1104 1105
}

1106 1107 1108
static void vc4_plane_atomic_async_update(struct drm_plane *plane,
					  struct drm_plane_state *state)
{
1109
	struct vc4_plane_state *vc4_state, *new_vc4_state;
1110

1111
	swap(plane->state->fb, state->fb);
1112 1113
	plane->state->crtc_x = state->crtc_x;
	plane->state->crtc_y = state->crtc_y;
1114 1115
	plane->state->crtc_w = state->crtc_w;
	plane->state->crtc_h = state->crtc_h;
1116 1117
	plane->state->src_x = state->src_x;
	plane->state->src_y = state->src_y;
1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130
	plane->state->src_w = state->src_w;
	plane->state->src_h = state->src_h;
	plane->state->src_h = state->src_h;
	plane->state->alpha = state->alpha;
	plane->state->pixel_blend_mode = state->pixel_blend_mode;
	plane->state->rotation = state->rotation;
	plane->state->zpos = state->zpos;
	plane->state->normalized_zpos = state->normalized_zpos;
	plane->state->color_encoding = state->color_encoding;
	plane->state->color_range = state->color_range;
	plane->state->src = state->src;
	plane->state->dst = state->dst;
	plane->state->visible = state->visible;
1131 1132 1133 1134

	new_vc4_state = to_vc4_plane_state(state);
	vc4_state = to_vc4_plane_state(plane->state);

1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154
	vc4_state->crtc_x = new_vc4_state->crtc_x;
	vc4_state->crtc_y = new_vc4_state->crtc_y;
	vc4_state->crtc_h = new_vc4_state->crtc_h;
	vc4_state->crtc_w = new_vc4_state->crtc_w;
	vc4_state->src_x = new_vc4_state->src_x;
	vc4_state->src_y = new_vc4_state->src_y;
	memcpy(vc4_state->src_w, new_vc4_state->src_w,
	       sizeof(vc4_state->src_w));
	memcpy(vc4_state->src_h, new_vc4_state->src_h,
	       sizeof(vc4_state->src_h));
	memcpy(vc4_state->x_scaling, new_vc4_state->x_scaling,
	       sizeof(vc4_state->x_scaling));
	memcpy(vc4_state->y_scaling, new_vc4_state->y_scaling,
	       sizeof(vc4_state->y_scaling));
	vc4_state->is_unity = new_vc4_state->is_unity;
	vc4_state->is_yuv = new_vc4_state->is_yuv;
	memcpy(vc4_state->offsets, new_vc4_state->offsets,
	       sizeof(vc4_state->offsets));
	vc4_state->needs_bg_fill = new_vc4_state->needs_bg_fill;

1155 1156 1157 1158 1159 1160 1161
	/* Update the current vc4_state pos0, pos2 and ptr0 dlist entries. */
	vc4_state->dlist[vc4_state->pos0_offset] =
		new_vc4_state->dlist[vc4_state->pos0_offset];
	vc4_state->dlist[vc4_state->pos2_offset] =
		new_vc4_state->dlist[vc4_state->pos2_offset];
	vc4_state->dlist[vc4_state->ptr0_offset] =
		new_vc4_state->dlist[vc4_state->ptr0_offset];
1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177

	/* Note that we can't just call vc4_plane_write_dlist()
	 * because that would smash the context data that the HVS is
	 * currently using.
	 */
	writel(vc4_state->dlist[vc4_state->pos0_offset],
	       &vc4_state->hw_dlist[vc4_state->pos0_offset]);
	writel(vc4_state->dlist[vc4_state->pos2_offset],
	       &vc4_state->hw_dlist[vc4_state->pos2_offset]);
	writel(vc4_state->dlist[vc4_state->ptr0_offset],
	       &vc4_state->hw_dlist[vc4_state->ptr0_offset]);
}

static int vc4_plane_atomic_async_check(struct drm_plane *plane,
					struct drm_plane_state *state)
{
1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192
	struct vc4_plane_state *old_vc4_state, *new_vc4_state;
	int ret;
	u32 i;

	ret = vc4_plane_mode_set(plane, state);
	if (ret)
		return ret;

	old_vc4_state = to_vc4_plane_state(plane->state);
	new_vc4_state = to_vc4_plane_state(state);
	if (old_vc4_state->dlist_count != new_vc4_state->dlist_count ||
	    old_vc4_state->pos0_offset != new_vc4_state->pos0_offset ||
	    old_vc4_state->pos2_offset != new_vc4_state->pos2_offset ||
	    old_vc4_state->ptr0_offset != new_vc4_state->ptr0_offset ||
	    vc4_lbm_size(plane->state) != vc4_lbm_size(state))
1193 1194
		return -EINVAL;

1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209
	/* Only pos0, pos2 and ptr0 DWORDS can be updated in an async update
	 * if anything else has changed, fallback to a sync update.
	 */
	for (i = 0; i < new_vc4_state->dlist_count; i++) {
		if (i == new_vc4_state->pos0_offset ||
		    i == new_vc4_state->pos2_offset ||
		    i == new_vc4_state->ptr0_offset ||
		    (new_vc4_state->lbm_offset &&
		     i == new_vc4_state->lbm_offset))
			continue;

		if (new_vc4_state->dlist[i] != old_vc4_state->dlist[i])
			return -EINVAL;
	}

1210 1211 1212
	return 0;
}

1213 1214 1215 1216
static int vc4_prepare_fb(struct drm_plane *plane,
			  struct drm_plane_state *state)
{
	struct vc4_bo *bo;
1217
	int ret;
1218

1219
	if (!state->fb)
1220 1221 1222
		return 0;

	bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base);
1223

1224
	drm_gem_fb_prepare_fb(plane, state);
1225 1226 1227 1228

	if (plane->state->fb == state->fb)
		return 0;

1229 1230 1231 1232
	ret = vc4_bo_inc_usecnt(bo);
	if (ret)
		return ret;

1233 1234 1235
	return 0;
}

1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247
static void vc4_cleanup_fb(struct drm_plane *plane,
			   struct drm_plane_state *state)
{
	struct vc4_bo *bo;

	if (plane->state->fb == state->fb || !state->fb)
		return;

	bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base);
	vc4_bo_dec_usecnt(bo);
}

1248 1249 1250
static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = {
	.atomic_check = vc4_plane_atomic_check,
	.atomic_update = vc4_plane_atomic_update,
1251
	.prepare_fb = vc4_prepare_fb,
1252
	.cleanup_fb = vc4_cleanup_fb,
1253 1254
	.atomic_async_check = vc4_plane_atomic_async_check,
	.atomic_async_update = vc4_plane_atomic_async_update,
1255 1256 1257 1258 1259 1260 1261
};

static void vc4_plane_destroy(struct drm_plane *plane)
{
	drm_plane_cleanup(plane);
}

1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275
static bool vc4_format_mod_supported(struct drm_plane *plane,
				     uint32_t format,
				     uint64_t modifier)
{
	/* Support T_TILING for RGB formats only. */
	switch (format) {
	case DRM_FORMAT_XRGB8888:
	case DRM_FORMAT_ARGB8888:
	case DRM_FORMAT_ABGR8888:
	case DRM_FORMAT_XBGR8888:
	case DRM_FORMAT_RGB565:
	case DRM_FORMAT_BGR565:
	case DRM_FORMAT_ARGB1555:
	case DRM_FORMAT_XRGB1555:
1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293
		switch (fourcc_mod_broadcom_mod(modifier)) {
		case DRM_FORMAT_MOD_LINEAR:
		case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED:
			return true;
		default:
			return false;
		}
	case DRM_FORMAT_NV12:
	case DRM_FORMAT_NV21:
		switch (fourcc_mod_broadcom_mod(modifier)) {
		case DRM_FORMAT_MOD_LINEAR:
		case DRM_FORMAT_MOD_BROADCOM_SAND64:
		case DRM_FORMAT_MOD_BROADCOM_SAND128:
		case DRM_FORMAT_MOD_BROADCOM_SAND256:
			return true;
		default:
			return false;
		}
1294 1295 1296 1297
	case DRM_FORMAT_RGBX1010102:
	case DRM_FORMAT_BGRX1010102:
	case DRM_FORMAT_RGBA1010102:
	case DRM_FORMAT_BGRA1010102:
1298 1299 1300 1301 1302
	case DRM_FORMAT_YUV422:
	case DRM_FORMAT_YVU422:
	case DRM_FORMAT_YUV420:
	case DRM_FORMAT_YVU420:
	case DRM_FORMAT_NV16:
1303
	case DRM_FORMAT_NV61:
1304 1305 1306 1307 1308
	default:
		return (modifier == DRM_FORMAT_MOD_LINEAR);
	}
}

1309
static const struct drm_plane_funcs vc4_plane_funcs = {
1310
	.update_plane = drm_atomic_helper_update_plane,
1311 1312 1313 1314 1315 1316
	.disable_plane = drm_atomic_helper_disable_plane,
	.destroy = vc4_plane_destroy,
	.set_property = NULL,
	.reset = vc4_plane_reset,
	.atomic_duplicate_state = vc4_plane_duplicate_state,
	.atomic_destroy_state = vc4_plane_destroy_state,
1317
	.format_mod_supported = vc4_format_mod_supported,
1318 1319 1320 1321 1322 1323 1324 1325 1326 1327
};

struct drm_plane *vc4_plane_init(struct drm_device *dev,
				 enum drm_plane_type type)
{
	struct drm_plane *plane = NULL;
	struct vc4_plane *vc4_plane;
	u32 formats[ARRAY_SIZE(hvs_formats)];
	int ret = 0;
	unsigned i;
1328 1329
	static const uint64_t modifiers[] = {
		DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED,
1330 1331 1332
		DRM_FORMAT_MOD_BROADCOM_SAND128,
		DRM_FORMAT_MOD_BROADCOM_SAND64,
		DRM_FORMAT_MOD_BROADCOM_SAND256,
1333 1334 1335
		DRM_FORMAT_MOD_LINEAR,
		DRM_FORMAT_MOD_INVALID
	};
1336 1337 1338

	vc4_plane = devm_kzalloc(dev->dev, sizeof(*vc4_plane),
				 GFP_KERNEL);
1339 1340
	if (!vc4_plane)
		return ERR_PTR(-ENOMEM);
1341

1342 1343 1344
	for (i = 0; i < ARRAY_SIZE(hvs_formats); i++)
		formats[i] = hvs_formats[i].drm;

1345
	plane = &vc4_plane->base;
1346
	ret = drm_universal_plane_init(dev, plane, 0,
1347
				       &vc4_plane_funcs,
1348
				       formats, ARRAY_SIZE(formats),
1349
				       modifiers, type, NULL);
1350 1351 1352

	drm_plane_helper_add(plane, &vc4_plane_helper_funcs);

1353
	drm_plane_create_alpha_property(plane);
1354 1355 1356 1357 1358
	drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0,
					   DRM_MODE_ROTATE_0 |
					   DRM_MODE_ROTATE_180 |
					   DRM_MODE_REFLECT_X |
					   DRM_MODE_REFLECT_Y);
1359

1360 1361
	return plane;
}
1362

1363
int vc4_plane_create_additional_planes(struct drm_device *drm)
1364 1365
{
	struct drm_plane *cursor_plane;
1366
	struct drm_crtc *crtc;
1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384
	unsigned int i;

	/* Set up some arbitrary number of planes.  We're not limited
	 * by a set number of physical registers, just the space in
	 * the HVS (16k) and how small an plane can be (28 bytes).
	 * However, each plane we set up takes up some memory, and
	 * increases the cost of looping over planes, which atomic
	 * modesetting does quite a bit.  As a result, we pick a
	 * modest number of planes to expose, that should hopefully
	 * still cover any sane usecase.
	 */
	for (i = 0; i < 8; i++) {
		struct drm_plane *plane =
			vc4_plane_init(drm, DRM_PLANE_TYPE_OVERLAY);

		if (IS_ERR(plane))
			continue;

1385 1386
		plane->possible_crtcs =
			GENMASK(drm->mode_config.num_crtc - 1, 0);
1387 1388
	}

1389 1390 1391 1392 1393 1394 1395 1396 1397 1398
	drm_for_each_crtc(crtc, drm) {
		/* Set up the legacy cursor after overlay initialization,
		 * since we overlay planes on the CRTC in the order they were
		 * initialized.
		 */
		cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR);
		if (!IS_ERR(cursor_plane)) {
			cursor_plane->possible_crtcs = drm_crtc_mask(crtc);
			crtc->cursor = cursor_plane;
		}
1399 1400 1401 1402
	}

	return 0;
}