提交 cb92d452 编写于 作者: A Alex Deucher 提交者: Dave Airlie

drm/radeon/kms: add blit support for cayman (v2)

Allows us to use the 3D engine for memory management
and allows us to use vram beyond the BAR aperture.

v2: fix copy paste typo
Reported-by: NNils Wallménius <nils.wallmenius@gmail.com>
Signed-off-by: NAlex Deucher <alexdeucher@gmail.com>
Signed-off-by: NDave Airlie <airlied@redhat.com>
上级 ac10f81d
......@@ -39,17 +39,335 @@
const u32 cayman_default_state[] =
{
/* XXX fill in additional blit state */
0xc0066900,
0x00000000,
0x00000060, /* DB_RENDER_CONTROL */
0x00000000, /* DB_COUNT_CONTROL */
0x00000000, /* DB_DEPTH_VIEW */
0x0000002a, /* DB_RENDER_OVERRIDE */
0x00000000, /* DB_RENDER_OVERRIDE2 */
0x00000000, /* DB_HTILE_DATA_BASE */
0xc0026900,
0x00000316,
0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */
0x00000010, /* */
0x0000000a,
0x00000000, /* DB_STENCIL_CLEAR */
0x00000000, /* DB_DEPTH_CLEAR */
0xc0036900,
0x0000000f,
0x00000000, /* DB_DEPTH_INFO */
0x00000000, /* DB_Z_INFO */
0x00000000, /* DB_STENCIL_INFO */
0xc0016900,
0x00000080,
0x00000000, /* PA_SC_WINDOW_OFFSET */
0xc00d6900,
0x00000083,
0x0000ffff, /* PA_SC_CLIPRECT_RULE */
0x00000000, /* PA_SC_CLIPRECT_0_TL */
0x20002000, /* PA_SC_CLIPRECT_0_BR */
0x00000000,
0x20002000,
0x00000000,
0x20002000,
0x00000000,
0x20002000,
0xaaaaaaaa, /* PA_SC_EDGERULE */
0x00000000, /* PA_SU_HARDWARE_SCREEN_OFFSET */
0x0000000f, /* CB_TARGET_MASK */
0x0000000f, /* CB_SHADER_MASK */
0xc0226900,
0x00000094,
0x80000000, /* PA_SC_VPORT_SCISSOR_0_TL */
0x20002000, /* PA_SC_VPORT_SCISSOR_0_BR */
0x80000000,
0x20002000,
0x80000000,
0x20002000,
0x80000000,
0x20002000,
0x80000000,
0x20002000,
0x80000000,
0x20002000,
0x80000000,
0x20002000,
0x80000000,
0x20002000,
0x80000000,
0x20002000,
0x80000000,
0x20002000,
0x80000000,
0x20002000,
0x80000000,
0x20002000,
0x80000000,
0x20002000,
0x80000000,
0x20002000,
0x80000000,
0x20002000,
0x80000000,
0x20002000,
0x00000000, /* PA_SC_VPORT_ZMIN_0 */
0x3f800000, /* PA_SC_VPORT_ZMAX_0 */
0xc0016900,
0x000000d4,
0x00000000, /* SX_MISC */
0xc0026900,
0x000000d9,
0x00000000, /* CP_RINGID */
0x00000000, /* CP_VMID */
0xc0096900,
0x00000100,
0x00ffffff, /* VGT_MAX_VTX_INDX */
0x00000000, /* VGT_MIN_VTX_INDX */
0x00000000, /* VGT_INDX_OFFSET */
0x00000000, /* VGT_MULTI_PRIM_IB_RESET_INDX */
0x00000000, /* SX_ALPHA_TEST_CONTROL */
0x00000000, /* CB_BLEND_RED */
0x00000000, /* CB_BLEND_GREEN */
0x00000000, /* CB_BLEND_BLUE */
0x00000000, /* CB_BLEND_ALPHA */
0xc0016900,
0x00000187,
0x00000100, /* SPI_VS_OUT_ID_0 */
0xc0026900,
0x00000191,
0x00000100, /* SPI_PS_INPUT_CNTL_0 */
0x00000101, /* SPI_PS_INPUT_CNTL_1 */
0xc0016900,
0x000001b1,
0x00000000, /* SPI_VS_OUT_CONFIG */
0xc0106900,
0x000001b3,
0x20000001, /* SPI_PS_IN_CONTROL_0 */
0x00000000, /* SPI_PS_IN_CONTROL_1 */
0x00000000, /* SPI_INTERP_CONTROL_0 */
0x00000000, /* SPI_INPUT_Z */
0x00000000, /* SPI_FOG_CNTL */
0x00100000, /* SPI_BARYC_CNTL */
0x00000000, /* SPI_PS_IN_CONTROL_2 */
0x00000000, /* SPI_COMPUTE_INPUT_CNTL */
0x00000000, /* SPI_COMPUTE_NUM_THREAD_X */
0x00000000, /* SPI_COMPUTE_NUM_THREAD_Y */
0x00000000, /* SPI_COMPUTE_NUM_THREAD_Z */
0x00000000, /* SPI_GPR_MGMT */
0x00000000, /* SPI_LDS_MGMT */
0x00000000, /* SPI_STACK_MGMT */
0x00000000, /* SPI_WAVE_MGMT_1 */
0x00000000, /* SPI_WAVE_MGMT_2 */
0xc0016900,
0x000001e0,
0x00000000, /* CB_BLEND0_CONTROL */
0xc00e6900,
0x00000200,
0x00000000, /* DB_DEPTH_CONTROL */
0x00000000, /* DB_EQAA */
0x00cc0010, /* CB_COLOR_CONTROL */
0x00000210, /* DB_SHADER_CONTROL */
0x00010000, /* PA_CL_CLIP_CNTL */
0x00000004, /* PA_SU_SC_MODE_CNTL */
0x00000100, /* PA_CL_VTE_CNTL */
0x00000000, /* PA_CL_VS_OUT_CNTL */
0x00000000, /* PA_CL_NANINF_CNTL */
0x00000000, /* PA_SU_LINE_STIPPLE_CNTL */
0x00000000, /* PA_SU_LINE_STIPPLE_SCALE */
0x00000000, /* PA_SU_PRIM_FILTER_CNTL */
0x00000000, /* */
0x00000000, /* */
0xc0026900,
0x00000229,
0x00000000, /* SQ_PGM_START_FS */
0x00000000,
0xc0016900,
0x0000023b,
0x00000000, /* SQ_LDS_ALLOC_PS */
0xc0066900,
0x00000240,
0x00000000, /* SQ_ESGS_RING_ITEMSIZE */
0x00000000,
0x00000000,
0x00000000,
0x00000000,
0x00000000,
0xc0046900,
0x00000247,
0x00000000, /* SQ_GS_VERT_ITEMSIZE */
0x00000000,
0x00000000,
0x00000000,
0xc0116900,
0x00000280,
0x00000000, /* PA_SU_POINT_SIZE */
0x00000000, /* PA_SU_POINT_MINMAX */
0x00000008, /* PA_SU_LINE_CNTL */
0x00000000, /* PA_SC_LINE_STIPPLE */
0x00000000, /* VGT_OUTPUT_PATH_CNTL */
0x00000000, /* VGT_HOS_CNTL */
0x00000000,
0x00000000,
0x00000000,
0x00000000,
0x00000000,
0x00000000,
0x00000000,
0x00000000,
0x00000000,
0x00000000,
0x00000000, /* VGT_GS_MODE */
0xc0026900,
0x00000292,
0x00000000, /* PA_SC_MODE_CNTL_0 */
0x00000000, /* PA_SC_MODE_CNTL_1 */
0xc0016900,
0x000002a1,
0x00000000, /* VGT_PRIMITIVEID_EN */
0xc0016900,
0x000002a5,
0x00000000, /* VGT_MULTI_PRIM_IB_RESET_EN */
0xc0026900,
0x000002a8,
0x00000000, /* VGT_INSTANCE_STEP_RATE_0 */
0x00000000,
0xc0026900,
0x000002ad,
0x00000000, /* VGT_REUSE_OFF */
0x00000000,
0xc0016900,
0x000002d5,
0x00000000, /* VGT_SHADER_STAGES_EN */
0xc0016900,
0x000002dc,
0x0000aa00, /* DB_ALPHA_TO_MASK */
0xc0066900,
0x000002de,
0x00000000, /* PA_SU_POLY_OFFSET_DB_FMT_CNTL */
0x00000000,
0x00000000,
0x00000000,
0x00000000,
0x00000000,
0xc0026900,
0x000002e5,
0x00000000, /* VGT_STRMOUT_CONFIG */
0x00000000,
0xc01b6900,
0x000002f5,
0x76543210, /* PA_SC_CENTROID_PRIORITY_0 */
0xfedcba98, /* PA_SC_CENTROID_PRIORITY_1 */
0x00000000, /* PA_SC_LINE_CNTL */
0x00000000, /* PA_SC_AA_CONFIG */
0x00000005, /* PA_SU_VTX_CNTL */
0x3f800000, /* PA_CL_GB_VERT_CLIP_ADJ */
0x3f800000, /* PA_CL_GB_VERT_DISC_ADJ */
0x3f800000, /* PA_CL_GB_HORZ_CLIP_ADJ */
0x3f800000, /* PA_CL_GB_HORZ_DISC_ADJ */
0x00000000, /* PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
0x00000000,
0x00000000,
0x00000000,
0x00000000,
0x00000000,
0x00000000,
0x00000000,
0x00000000,
0x00000000,
0x00000000,
0x00000000,
0x00000000,
0x00000000,
0x00000000,
0x00000000,
0xffffffff, /* PA_SC_AA_MASK_X0Y0_X1Y0 */
0xffffffff,
0xc0026900,
0x00000316,
0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */
0x00000010, /* */
};
const u32 cayman_vs[] =
{
0x00000004,
0x80400400,
0x0000a03c,
0x95000688,
0x00004000,
0x15000688,
0x00000000,
0x88000000,
0x04000000,
0x67961001,
#ifdef __BIG_ENDIAN
0x00020000,
#else
0x00000000,
#endif
0x00000000,
0x04000000,
0x67961000,
#ifdef __BIG_ENDIAN
0x00020008,
#else
0x00000008,
#endif
0x00000000,
};
const u32 cayman_ps[] =
{
0x00000004,
0xa00c0000,
0x00000008,
0x80400000,
0x00000000,
0x95000688,
0x00000000,
0x88000000,
0x00380400,
0x00146b10,
0x00380000,
0x20146b10,
0x00380400,
0x40146b00,
0x80380000,
0x60146b00,
0x00000010,
0x000d1000,
0xb0800000,
0x00000000,
};
const u32 cayman_ps_size = ARRAY_SIZE(cayman_ps);
const u32 cayman_vs_size = ARRAY_SIZE(cayman_vs);
const u32 cayman_default_size = ARRAY_SIZE(cayman_default_state);
......@@ -25,8 +25,11 @@
#ifndef CAYMAN_BLIT_SHADERS_H
#define CAYMAN_BLIT_SHADERS_H
extern const u32 cayman_ps[];
extern const u32 cayman_vs[];
extern const u32 cayman_default_state[];
extern const u32 cayman_ps_size, cayman_vs_size;
extern const u32 cayman_default_size;
#endif
......@@ -31,6 +31,7 @@
#include "evergreend.h"
#include "evergreen_blit_shaders.h"
#include "cayman_blit_shaders.h"
#define DI_PT_RECTLIST 0x11
#define DI_INDEX_SIZE_16_BIT 0x0
......@@ -265,238 +266,240 @@ set_default_state(struct radeon_device *rdev)
u64 gpu_addr;
int dwords;
switch (rdev->family) {
case CHIP_CEDAR:
default:
num_ps_gprs = 93;
num_vs_gprs = 46;
num_temp_gprs = 4;
num_gs_gprs = 31;
num_es_gprs = 31;
num_hs_gprs = 23;
num_ls_gprs = 23;
num_ps_threads = 96;
num_vs_threads = 16;
num_gs_threads = 16;
num_es_threads = 16;
num_hs_threads = 16;
num_ls_threads = 16;
num_ps_stack_entries = 42;
num_vs_stack_entries = 42;
num_gs_stack_entries = 42;
num_es_stack_entries = 42;
num_hs_stack_entries = 42;
num_ls_stack_entries = 42;
break;
case CHIP_REDWOOD:
num_ps_gprs = 93;
num_vs_gprs = 46;
num_temp_gprs = 4;
num_gs_gprs = 31;
num_es_gprs = 31;
num_hs_gprs = 23;
num_ls_gprs = 23;
num_ps_threads = 128;
num_vs_threads = 20;
num_gs_threads = 20;
num_es_threads = 20;
num_hs_threads = 20;
num_ls_threads = 20;
num_ps_stack_entries = 42;
num_vs_stack_entries = 42;
num_gs_stack_entries = 42;
num_es_stack_entries = 42;
num_hs_stack_entries = 42;
num_ls_stack_entries = 42;
break;
case CHIP_JUNIPER:
num_ps_gprs = 93;
num_vs_gprs = 46;
num_temp_gprs = 4;
num_gs_gprs = 31;
num_es_gprs = 31;
num_hs_gprs = 23;
num_ls_gprs = 23;
num_ps_threads = 128;
num_vs_threads = 20;
num_gs_threads = 20;
num_es_threads = 20;
num_hs_threads = 20;
num_ls_threads = 20;
num_ps_stack_entries = 85;
num_vs_stack_entries = 85;
num_gs_stack_entries = 85;
num_es_stack_entries = 85;
num_hs_stack_entries = 85;
num_ls_stack_entries = 85;
break;
case CHIP_CYPRESS:
case CHIP_HEMLOCK:
num_ps_gprs = 93;
num_vs_gprs = 46;
num_temp_gprs = 4;
num_gs_gprs = 31;
num_es_gprs = 31;
num_hs_gprs = 23;
num_ls_gprs = 23;
num_ps_threads = 128;
num_vs_threads = 20;
num_gs_threads = 20;
num_es_threads = 20;
num_hs_threads = 20;
num_ls_threads = 20;
num_ps_stack_entries = 85;
num_vs_stack_entries = 85;
num_gs_stack_entries = 85;
num_es_stack_entries = 85;
num_hs_stack_entries = 85;
num_ls_stack_entries = 85;
break;
case CHIP_PALM:
num_ps_gprs = 93;
num_vs_gprs = 46;
num_temp_gprs = 4;
num_gs_gprs = 31;
num_es_gprs = 31;
num_hs_gprs = 23;
num_ls_gprs = 23;
num_ps_threads = 96;
num_vs_threads = 16;
num_gs_threads = 16;
num_es_threads = 16;
num_hs_threads = 16;
num_ls_threads = 16;
num_ps_stack_entries = 42;
num_vs_stack_entries = 42;
num_gs_stack_entries = 42;
num_es_stack_entries = 42;
num_hs_stack_entries = 42;
num_ls_stack_entries = 42;
break;
case CHIP_BARTS:
num_ps_gprs = 93;
num_vs_gprs = 46;
num_temp_gprs = 4;
num_gs_gprs = 31;
num_es_gprs = 31;
num_hs_gprs = 23;
num_ls_gprs = 23;
num_ps_threads = 128;
num_vs_threads = 20;
num_gs_threads = 20;
num_es_threads = 20;
num_hs_threads = 20;
num_ls_threads = 20;
num_ps_stack_entries = 85;
num_vs_stack_entries = 85;
num_gs_stack_entries = 85;
num_es_stack_entries = 85;
num_hs_stack_entries = 85;
num_ls_stack_entries = 85;
break;
case CHIP_TURKS:
num_ps_gprs = 93;
num_vs_gprs = 46;
num_temp_gprs = 4;
num_gs_gprs = 31;
num_es_gprs = 31;
num_hs_gprs = 23;
num_ls_gprs = 23;
num_ps_threads = 128;
num_vs_threads = 20;
num_gs_threads = 20;
num_es_threads = 20;
num_hs_threads = 20;
num_ls_threads = 20;
num_ps_stack_entries = 42;
num_vs_stack_entries = 42;
num_gs_stack_entries = 42;
num_es_stack_entries = 42;
num_hs_stack_entries = 42;
num_ls_stack_entries = 42;
break;
case CHIP_CAICOS:
num_ps_gprs = 93;
num_vs_gprs = 46;
num_temp_gprs = 4;
num_gs_gprs = 31;
num_es_gprs = 31;
num_hs_gprs = 23;
num_ls_gprs = 23;
num_ps_threads = 128;
num_vs_threads = 10;
num_gs_threads = 10;
num_es_threads = 10;
num_hs_threads = 10;
num_ls_threads = 10;
num_ps_stack_entries = 42;
num_vs_stack_entries = 42;
num_gs_stack_entries = 42;
num_es_stack_entries = 42;
num_hs_stack_entries = 42;
num_ls_stack_entries = 42;
break;
}
if ((rdev->family == CHIP_CEDAR) ||
(rdev->family == CHIP_PALM) ||
(rdev->family == CHIP_CAICOS))
sq_config = 0;
else
sq_config = VC_ENABLE;
sq_config |= (EXPORT_SRC_C |
CS_PRIO(0) |
LS_PRIO(0) |
HS_PRIO(0) |
PS_PRIO(0) |
VS_PRIO(1) |
GS_PRIO(2) |
ES_PRIO(3));
sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) |
NUM_VS_GPRS(num_vs_gprs) |
NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) |
NUM_ES_GPRS(num_es_gprs));
sq_gpr_resource_mgmt_3 = (NUM_HS_GPRS(num_hs_gprs) |
NUM_LS_GPRS(num_ls_gprs));
sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) |
NUM_VS_THREADS(num_vs_threads) |
NUM_GS_THREADS(num_gs_threads) |
NUM_ES_THREADS(num_es_threads));
sq_thread_resource_mgmt_2 = (NUM_HS_THREADS(num_hs_threads) |
NUM_LS_THREADS(num_ls_threads));
sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
NUM_ES_STACK_ENTRIES(num_es_stack_entries));
sq_stack_resource_mgmt_3 = (NUM_HS_STACK_ENTRIES(num_hs_stack_entries) |
NUM_LS_STACK_ENTRIES(num_ls_stack_entries));
/* set clear context state */
radeon_ring_write(rdev, PACKET3(PACKET3_CLEAR_STATE, 0));
radeon_ring_write(rdev, 0);
/* disable dyn gprs */
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
radeon_ring_write(rdev, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - PACKET3_SET_CONFIG_REG_START) >> 2);
radeon_ring_write(rdev, 0);
if (rdev->family < CHIP_CAYMAN) {
switch (rdev->family) {
case CHIP_CEDAR:
default:
num_ps_gprs = 93;
num_vs_gprs = 46;
num_temp_gprs = 4;
num_gs_gprs = 31;
num_es_gprs = 31;
num_hs_gprs = 23;
num_ls_gprs = 23;
num_ps_threads = 96;
num_vs_threads = 16;
num_gs_threads = 16;
num_es_threads = 16;
num_hs_threads = 16;
num_ls_threads = 16;
num_ps_stack_entries = 42;
num_vs_stack_entries = 42;
num_gs_stack_entries = 42;
num_es_stack_entries = 42;
num_hs_stack_entries = 42;
num_ls_stack_entries = 42;
break;
case CHIP_REDWOOD:
num_ps_gprs = 93;
num_vs_gprs = 46;
num_temp_gprs = 4;
num_gs_gprs = 31;
num_es_gprs = 31;
num_hs_gprs = 23;
num_ls_gprs = 23;
num_ps_threads = 128;
num_vs_threads = 20;
num_gs_threads = 20;
num_es_threads = 20;
num_hs_threads = 20;
num_ls_threads = 20;
num_ps_stack_entries = 42;
num_vs_stack_entries = 42;
num_gs_stack_entries = 42;
num_es_stack_entries = 42;
num_hs_stack_entries = 42;
num_ls_stack_entries = 42;
break;
case CHIP_JUNIPER:
num_ps_gprs = 93;
num_vs_gprs = 46;
num_temp_gprs = 4;
num_gs_gprs = 31;
num_es_gprs = 31;
num_hs_gprs = 23;
num_ls_gprs = 23;
num_ps_threads = 128;
num_vs_threads = 20;
num_gs_threads = 20;
num_es_threads = 20;
num_hs_threads = 20;
num_ls_threads = 20;
num_ps_stack_entries = 85;
num_vs_stack_entries = 85;
num_gs_stack_entries = 85;
num_es_stack_entries = 85;
num_hs_stack_entries = 85;
num_ls_stack_entries = 85;
break;
case CHIP_CYPRESS:
case CHIP_HEMLOCK:
num_ps_gprs = 93;
num_vs_gprs = 46;
num_temp_gprs = 4;
num_gs_gprs = 31;
num_es_gprs = 31;
num_hs_gprs = 23;
num_ls_gprs = 23;
num_ps_threads = 128;
num_vs_threads = 20;
num_gs_threads = 20;
num_es_threads = 20;
num_hs_threads = 20;
num_ls_threads = 20;
num_ps_stack_entries = 85;
num_vs_stack_entries = 85;
num_gs_stack_entries = 85;
num_es_stack_entries = 85;
num_hs_stack_entries = 85;
num_ls_stack_entries = 85;
break;
case CHIP_PALM:
num_ps_gprs = 93;
num_vs_gprs = 46;
num_temp_gprs = 4;
num_gs_gprs = 31;
num_es_gprs = 31;
num_hs_gprs = 23;
num_ls_gprs = 23;
num_ps_threads = 96;
num_vs_threads = 16;
num_gs_threads = 16;
num_es_threads = 16;
num_hs_threads = 16;
num_ls_threads = 16;
num_ps_stack_entries = 42;
num_vs_stack_entries = 42;
num_gs_stack_entries = 42;
num_es_stack_entries = 42;
num_hs_stack_entries = 42;
num_ls_stack_entries = 42;
break;
case CHIP_BARTS:
num_ps_gprs = 93;
num_vs_gprs = 46;
num_temp_gprs = 4;
num_gs_gprs = 31;
num_es_gprs = 31;
num_hs_gprs = 23;
num_ls_gprs = 23;
num_ps_threads = 128;
num_vs_threads = 20;
num_gs_threads = 20;
num_es_threads = 20;
num_hs_threads = 20;
num_ls_threads = 20;
num_ps_stack_entries = 85;
num_vs_stack_entries = 85;
num_gs_stack_entries = 85;
num_es_stack_entries = 85;
num_hs_stack_entries = 85;
num_ls_stack_entries = 85;
break;
case CHIP_TURKS:
num_ps_gprs = 93;
num_vs_gprs = 46;
num_temp_gprs = 4;
num_gs_gprs = 31;
num_es_gprs = 31;
num_hs_gprs = 23;
num_ls_gprs = 23;
num_ps_threads = 128;
num_vs_threads = 20;
num_gs_threads = 20;
num_es_threads = 20;
num_hs_threads = 20;
num_ls_threads = 20;
num_ps_stack_entries = 42;
num_vs_stack_entries = 42;
num_gs_stack_entries = 42;
num_es_stack_entries = 42;
num_hs_stack_entries = 42;
num_ls_stack_entries = 42;
break;
case CHIP_CAICOS:
num_ps_gprs = 93;
num_vs_gprs = 46;
num_temp_gprs = 4;
num_gs_gprs = 31;
num_es_gprs = 31;
num_hs_gprs = 23;
num_ls_gprs = 23;
num_ps_threads = 128;
num_vs_threads = 10;
num_gs_threads = 10;
num_es_threads = 10;
num_hs_threads = 10;
num_ls_threads = 10;
num_ps_stack_entries = 42;
num_vs_stack_entries = 42;
num_gs_stack_entries = 42;
num_es_stack_entries = 42;
num_hs_stack_entries = 42;
num_ls_stack_entries = 42;
break;
}
/* SQ config */
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 11));
radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_START) >> 2);
radeon_ring_write(rdev, sq_config);
radeon_ring_write(rdev, sq_gpr_resource_mgmt_1);
radeon_ring_write(rdev, sq_gpr_resource_mgmt_2);
radeon_ring_write(rdev, sq_gpr_resource_mgmt_3);
radeon_ring_write(rdev, 0);
radeon_ring_write(rdev, 0);
radeon_ring_write(rdev, sq_thread_resource_mgmt);
radeon_ring_write(rdev, sq_thread_resource_mgmt_2);
radeon_ring_write(rdev, sq_stack_resource_mgmt_1);
radeon_ring_write(rdev, sq_stack_resource_mgmt_2);
radeon_ring_write(rdev, sq_stack_resource_mgmt_3);
if ((rdev->family == CHIP_CEDAR) ||
(rdev->family == CHIP_PALM) ||
(rdev->family == CHIP_CAICOS))
sq_config = 0;
else
sq_config = VC_ENABLE;
sq_config |= (EXPORT_SRC_C |
CS_PRIO(0) |
LS_PRIO(0) |
HS_PRIO(0) |
PS_PRIO(0) |
VS_PRIO(1) |
GS_PRIO(2) |
ES_PRIO(3));
sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) |
NUM_VS_GPRS(num_vs_gprs) |
NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) |
NUM_ES_GPRS(num_es_gprs));
sq_gpr_resource_mgmt_3 = (NUM_HS_GPRS(num_hs_gprs) |
NUM_LS_GPRS(num_ls_gprs));
sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) |
NUM_VS_THREADS(num_vs_threads) |
NUM_GS_THREADS(num_gs_threads) |
NUM_ES_THREADS(num_es_threads));
sq_thread_resource_mgmt_2 = (NUM_HS_THREADS(num_hs_threads) |
NUM_LS_THREADS(num_ls_threads));
sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
NUM_ES_STACK_ENTRIES(num_es_stack_entries));
sq_stack_resource_mgmt_3 = (NUM_HS_STACK_ENTRIES(num_hs_stack_entries) |
NUM_LS_STACK_ENTRIES(num_ls_stack_entries));
/* disable dyn gprs */
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
radeon_ring_write(rdev, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - PACKET3_SET_CONFIG_REG_START) >> 2);
radeon_ring_write(rdev, 0);
/* SQ config */
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 11));
radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_START) >> 2);
radeon_ring_write(rdev, sq_config);
radeon_ring_write(rdev, sq_gpr_resource_mgmt_1);
radeon_ring_write(rdev, sq_gpr_resource_mgmt_2);
radeon_ring_write(rdev, sq_gpr_resource_mgmt_3);
radeon_ring_write(rdev, 0);
radeon_ring_write(rdev, 0);
radeon_ring_write(rdev, sq_thread_resource_mgmt);
radeon_ring_write(rdev, sq_thread_resource_mgmt_2);
radeon_ring_write(rdev, sq_stack_resource_mgmt_1);
radeon_ring_write(rdev, sq_stack_resource_mgmt_2);
radeon_ring_write(rdev, sq_stack_resource_mgmt_3);
}
/* CONTEXT_CONTROL */
radeon_ring_write(rdev, 0xc0012800);
......@@ -570,7 +573,10 @@ int evergreen_blit_init(struct radeon_device *rdev)
mutex_init(&rdev->r600_blit.mutex);
rdev->r600_blit.state_offset = 0;
rdev->r600_blit.state_len = evergreen_default_size;
if (rdev->family < CHIP_CAYMAN)
rdev->r600_blit.state_len = evergreen_default_size;
else
rdev->r600_blit.state_len = cayman_default_size;
dwords = rdev->r600_blit.state_len;
while (dwords & 0xf) {
......@@ -582,11 +588,17 @@ int evergreen_blit_init(struct radeon_device *rdev)
obj_size = ALIGN(obj_size, 256);
rdev->r600_blit.vs_offset = obj_size;
obj_size += evergreen_vs_size * 4;
if (rdev->family < CHIP_CAYMAN)
obj_size += evergreen_vs_size * 4;
else
obj_size += cayman_vs_size * 4;
obj_size = ALIGN(obj_size, 256);
rdev->r600_blit.ps_offset = obj_size;
obj_size += evergreen_ps_size * 4;
if (rdev->family < CHIP_CAYMAN)
obj_size += evergreen_ps_size * 4;
else
obj_size += cayman_ps_size * 4;
obj_size = ALIGN(obj_size, 256);
r = radeon_bo_create(rdev, obj_size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM,
......@@ -609,16 +621,29 @@ int evergreen_blit_init(struct radeon_device *rdev)
return r;
}
memcpy_toio(ptr + rdev->r600_blit.state_offset,
evergreen_default_state, rdev->r600_blit.state_len * 4);
if (num_packet2s)
memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4),
packet2s, num_packet2s * 4);
for (i = 0; i < evergreen_vs_size; i++)
*(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(evergreen_vs[i]);
for (i = 0; i < evergreen_ps_size; i++)
*(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(evergreen_ps[i]);
if (rdev->family < CHIP_CAYMAN) {
memcpy_toio(ptr + rdev->r600_blit.state_offset,
evergreen_default_state, rdev->r600_blit.state_len * 4);
if (num_packet2s)
memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4),
packet2s, num_packet2s * 4);
for (i = 0; i < evergreen_vs_size; i++)
*(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(evergreen_vs[i]);
for (i = 0; i < evergreen_ps_size; i++)
*(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(evergreen_ps[i]);
} else {
memcpy_toio(ptr + rdev->r600_blit.state_offset,
cayman_default_state, rdev->r600_blit.state_len * 4);
if (num_packet2s)
memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4),
packet2s, num_packet2s * 4);
for (i = 0; i < cayman_vs_size; i++)
*(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(cayman_vs[i]);
for (i = 0; i < cayman_ps_size; i++)
*(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(cayman_ps[i]);
}
radeon_bo_kunmap(rdev->r600_blit.shader_obj);
radeon_bo_unreserve(rdev->r600_blit.shader_obj);
......
......@@ -1387,14 +1387,12 @@ static int cayman_startup(struct radeon_device *rdev)
return r;
cayman_gpu_init(rdev);
#if 0
r = cayman_blit_init(rdev);
r = evergreen_blit_init(rdev);
if (r) {
cayman_blit_fini(rdev);
evergreen_blit_fini(rdev);
rdev->asic->copy = NULL;
dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
}
#endif
/* allocate wb buffer */
r = radeon_wb_init(rdev);
......@@ -1452,7 +1450,7 @@ int cayman_resume(struct radeon_device *rdev)
int cayman_suspend(struct radeon_device *rdev)
{
/* int r; */
int r;
/* FIXME: we should wait for ring to be empty */
cayman_cp_enable(rdev, false);
......@@ -1461,14 +1459,13 @@ int cayman_suspend(struct radeon_device *rdev)
radeon_wb_disable(rdev);
cayman_pcie_gart_disable(rdev);
#if 0
/* unpin shaders bo */
r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
if (likely(r == 0)) {
radeon_bo_unpin(rdev->r600_blit.shader_obj);
radeon_bo_unreserve(rdev->r600_blit.shader_obj);
}
#endif
return 0;
}
......@@ -1580,7 +1577,7 @@ int cayman_init(struct radeon_device *rdev)
void cayman_fini(struct radeon_device *rdev)
{
/* cayman_blit_fini(rdev); */
evergreen_blit_fini(rdev);
cayman_cp_fini(rdev);
r600_irq_fini(rdev);
radeon_wb_fini(rdev);
......
......@@ -906,9 +906,9 @@ static struct radeon_asic cayman_asic = {
.get_vblank_counter = &evergreen_get_vblank_counter,
.fence_ring_emit = &r600_fence_ring_emit,
.cs_parse = &evergreen_cs_parse,
.copy_blit = NULL,
.copy_dma = NULL,
.copy = NULL,
.copy_blit = &evergreen_copy_blit,
.copy_dma = &evergreen_copy_blit,
.copy = &evergreen_copy_blit,
.get_engine_clock = &radeon_atom_get_engine_clock,
.set_engine_clock = &radeon_atom_set_engine_clock,
.get_memory_clock = &radeon_atom_get_memory_clock,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册