提交 8f8e00c1 编写于 作者: A Alex Deucher

drm/amdgpu/gfx: clean up harvest configuration (v2)

Read back harvest configuration from registers and simplify
calculations.  No need to program the raster config registers.
These are programmed as golden registers and the user mode
drivers program them as well.

v2: rebase on Tom's patches
Reviewed-by: NChristian König <christian.koenig@amd.com>
Signed-off-by: NAlex Deucher <alexander.deucher@amd.com>
上级 a750b47e
...@@ -1133,6 +1133,7 @@ struct amdgpu_gca_config { ...@@ -1133,6 +1133,7 @@ struct amdgpu_gca_config {
unsigned multi_gpu_tile_size; unsigned multi_gpu_tile_size;
unsigned mc_arb_ramcfg; unsigned mc_arb_ramcfg;
unsigned gb_addr_config; unsigned gb_addr_config;
unsigned num_rbs;
uint32_t tile_mode_array[32]; uint32_t tile_mode_array[32];
uint32_t macrotile_mode_array[16]; uint32_t macrotile_mode_array[16];
......
...@@ -447,8 +447,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file ...@@ -447,8 +447,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
dev_info.max_memory_clock = adev->pm.default_mclk * 10; dev_info.max_memory_clock = adev->pm.default_mclk * 10;
} }
dev_info.enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask; dev_info.enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask;
dev_info.num_rb_pipes = adev->gfx.config.max_backends_per_se * dev_info.num_rb_pipes = adev->gfx.config.num_rbs;
adev->gfx.config.max_shader_engines;
dev_info.num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts; dev_info.num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts;
dev_info._pad = 0; dev_info._pad = 0;
dev_info.ids_flags = 0; dev_info.ids_flags = 0;
......
...@@ -1598,39 +1598,31 @@ void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num) ...@@ -1598,39 +1598,31 @@ void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
*/ */
static u32 gfx_v7_0_create_bitmask(u32 bit_width) static u32 gfx_v7_0_create_bitmask(u32 bit_width)
{ {
return (u32)((1ULL<<bit_width)-1); return (u32)((1ULL << bit_width) - 1);
} }
/** /**
* gfx_v7_0_get_rb_disabled - computes the mask of disabled RBs * gfx_v7_0_get_rb_active_bitmap - computes the mask of enabled RBs
* *
* @adev: amdgpu_device pointer * @adev: amdgpu_device pointer
* @max_rb_num: max RBs (render backends) for the asic
* @se_num: number of SEs (shader engines) for the asic
* @sh_per_se: number of SH blocks per SE for the asic
* *
* Calculates the bitmask of disabled RBs (CIK). * Calculates the bitmask of enabled RBs (CIK).
* Returns the disabled RB bitmask. * Returns the enabled RB bitmask.
*/ */
static u32 gfx_v7_0_get_rb_disabled(struct amdgpu_device *adev, static u32 gfx_v7_0_get_rb_active_bitmap(struct amdgpu_device *adev)
u32 max_rb_num_per_se,
u32 sh_per_se)
{ {
u32 data, mask; u32 data, mask;
data = RREG32(mmCC_RB_BACKEND_DISABLE); data = RREG32(mmCC_RB_BACKEND_DISABLE);
if (data & 1)
data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
else
data = 0;
data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE); data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
mask = gfx_v7_0_create_bitmask(max_rb_num_per_se / sh_per_se); mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_backends_per_se /
adev->gfx.config.max_sh_per_se);
return data & mask; return (~data) & mask;
} }
/** /**
...@@ -1639,68 +1631,36 @@ static u32 gfx_v7_0_get_rb_disabled(struct amdgpu_device *adev, ...@@ -1639,68 +1631,36 @@ static u32 gfx_v7_0_get_rb_disabled(struct amdgpu_device *adev,
* @adev: amdgpu_device pointer * @adev: amdgpu_device pointer
* @se_num: number of SEs (shader engines) for the asic * @se_num: number of SEs (shader engines) for the asic
* @sh_per_se: number of SH blocks per SE for the asic * @sh_per_se: number of SH blocks per SE for the asic
* @max_rb_num: max RBs (render backends) for the asic
* *
* Configures per-SE/SH RB registers (CIK). * Configures per-SE/SH RB registers (CIK).
*/ */
static void gfx_v7_0_setup_rb(struct amdgpu_device *adev, static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
u32 se_num, u32 sh_per_se,
u32 max_rb_num_per_se)
{ {
int i, j; int i, j;
u32 data; u32 data, tmp, num_rbs = 0;
u32 disabled_rbs = 0; u32 active_rbs = 0;
u32 enabled_rbs = 0;
mutex_lock(&adev->grbm_idx_mutex); mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < se_num; i++) { for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < sh_per_se; j++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
gfx_v7_0_select_se_sh(adev, i, j); gfx_v7_0_select_se_sh(adev, i, j);
data = gfx_v7_0_get_rb_disabled(adev, max_rb_num_per_se, sh_per_se); data = gfx_v7_0_get_rb_active_bitmap(adev);
if (adev->asic_type == CHIP_HAWAII) if (adev->asic_type == CHIP_HAWAII)
disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH); active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
HAWAII_RB_BITMAP_WIDTH_PER_SH);
else else
disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH); active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
CIK_RB_BITMAP_WIDTH_PER_SH);
} }
} }
gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
mutex_unlock(&adev->grbm_idx_mutex); mutex_unlock(&adev->grbm_idx_mutex);
enabled_rbs = (~disabled_rbs) & ((1UL<<(max_rb_num_per_se*se_num))-1); adev->gfx.config.backend_enable_mask = active_rbs;
tmp = active_rbs;
adev->gfx.config.backend_enable_mask = enabled_rbs; while (tmp >>= 1)
num_rbs++;
mutex_lock(&adev->grbm_idx_mutex); adev->gfx.config.num_rbs = num_rbs;
for (i = 0; i < se_num; i++) {
gfx_v7_0_select_se_sh(adev, i, 0xffffffff);
data = 0;
for (j = 0; j < sh_per_se; j++) {
switch (enabled_rbs & 3) {
case 0:
if (j == 0)
data |= (RASTER_CONFIG_RB_MAP_3 <<
PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
else
data |= (RASTER_CONFIG_RB_MAP_0 <<
PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
break;
case 1:
data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
break;
case 2:
data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
break;
case 3:
default:
data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
break;
}
enabled_rbs >>= 2;
}
WREG32(mmPA_SC_RASTER_CONFIG, data);
}
gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
mutex_unlock(&adev->grbm_idx_mutex);
} }
/** /**
...@@ -1931,9 +1891,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) ...@@ -1931,9 +1891,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
gfx_v7_0_tiling_mode_table_init(adev); gfx_v7_0_tiling_mode_table_init(adev);
gfx_v7_0_setup_rb(adev, adev->gfx.config.max_shader_engines, gfx_v7_0_setup_rb(adev);
adev->gfx.config.max_sh_per_se,
adev->gfx.config.max_backends_per_se);
/* set HW defaults for 3D engine */ /* set HW defaults for 3D engine */
WREG32(mmCP_MEQ_THRESHOLDS, WREG32(mmCP_MEQ_THRESHOLDS,
...@@ -4026,28 +3984,20 @@ static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev, ...@@ -4026,28 +3984,20 @@ static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev,
} }
} }
static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev, static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev)
u32 se, u32 sh)
{ {
u32 mask = 0, tmp, tmp1; u32 data, mask;
int i;
gfx_v7_0_select_se_sh(adev, se, sh);
tmp = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
tmp1 = RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
tmp &= 0xffff0000; data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
tmp |= tmp1; data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
tmp >>= 16; data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
for (i = 0; i < adev->gfx.config.max_cu_per_sh; i ++) { mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_backends_per_se /
mask <<= 1; adev->gfx.config.max_sh_per_se);
mask |= 1;
}
return (~tmp) & mask; return (~data) & mask;
} }
static void gfx_v7_0_init_ao_cu_mask(struct amdgpu_device *adev) static void gfx_v7_0_init_ao_cu_mask(struct amdgpu_device *adev)
...@@ -5304,10 +5254,11 @@ int gfx_v7_0_get_cu_info(struct amdgpu_device *adev, ...@@ -5304,10 +5254,11 @@ int gfx_v7_0_get_cu_info(struct amdgpu_device *adev,
mask = 1; mask = 1;
ao_bitmap = 0; ao_bitmap = 0;
counter = 0; counter = 0;
bitmap = gfx_v7_0_get_cu_active_bitmap(adev, i, j); gfx_v7_0_select_se_sh(adev, i, j);
bitmap = gfx_v7_0_get_cu_active_bitmap(adev);
cu_info->bitmap[i][j] = bitmap; cu_info->bitmap[i][j] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { for (k = 0; k < 16; k ++) {
if (bitmap & mask) { if (bitmap & mask) {
if (counter < 2) if (counter < 2)
ao_bitmap |= mask; ao_bitmap |= mask;
...@@ -5319,9 +5270,11 @@ int gfx_v7_0_get_cu_info(struct amdgpu_device *adev, ...@@ -5319,9 +5270,11 @@ int gfx_v7_0_get_cu_info(struct amdgpu_device *adev,
ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
} }
} }
gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
mutex_unlock(&adev->grbm_idx_mutex);
cu_info->number = active_cu_number; cu_info->number = active_cu_number;
cu_info->ao_cu_mask = ao_cu_mask; cu_info->ao_cu_mask = ao_cu_mask;
mutex_unlock(&adev->grbm_idx_mutex);
return 0; return 0;
} }
...@@ -2572,11 +2572,6 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) ...@@ -2572,11 +2572,6 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
} }
} }
static u32 gfx_v8_0_create_bitmask(u32 bit_width)
{
return (u32)((1ULL << bit_width) - 1);
}
void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num) void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
{ {
u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
...@@ -2597,89 +2592,50 @@ void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num) ...@@ -2597,89 +2592,50 @@ void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
WREG32(mmGRBM_GFX_INDEX, data); WREG32(mmGRBM_GFX_INDEX, data);
} }
static u32 gfx_v8_0_get_rb_disabled(struct amdgpu_device *adev, static u32 gfx_v8_0_create_bitmask(u32 bit_width)
u32 max_rb_num_per_se, {
u32 sh_per_se) return (u32)((1ULL << bit_width) - 1);
}
static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
{ {
u32 data, mask; u32 data, mask;
data = RREG32(mmCC_RB_BACKEND_DISABLE); data = RREG32(mmCC_RB_BACKEND_DISABLE);
data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE); data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
mask = gfx_v8_0_create_bitmask(max_rb_num_per_se / sh_per_se); mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
adev->gfx.config.max_sh_per_se);
return data & mask; return (~data) & mask;
} }
static void gfx_v8_0_setup_rb(struct amdgpu_device *adev, static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
u32 se_num, u32 sh_per_se,
u32 max_rb_num_per_se)
{ {
int i, j; int i, j;
u32 data, mask; u32 data, tmp, num_rbs = 0;
u32 disabled_rbs = 0; u32 active_rbs = 0;
u32 enabled_rbs = 0;
mutex_lock(&adev->grbm_idx_mutex); mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < se_num; i++) { for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < sh_per_se; j++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
gfx_v8_0_select_se_sh(adev, i, j); gfx_v8_0_select_se_sh(adev, i, j);
data = gfx_v8_0_get_rb_disabled(adev, data = gfx_v8_0_get_rb_active_bitmap(adev);
max_rb_num_per_se, sh_per_se); active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
disabled_rbs |= data << ((i * sh_per_se + j) * RB_BITMAP_WIDTH_PER_SH);
RB_BITMAP_WIDTH_PER_SH);
} }
} }
gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
mutex_unlock(&adev->grbm_idx_mutex); mutex_unlock(&adev->grbm_idx_mutex);
mask = 1; adev->gfx.config.backend_enable_mask = active_rbs;
for (i = 0; i < max_rb_num_per_se * se_num; i++) { tmp = active_rbs;
if (!(disabled_rbs & mask)) while (tmp >>= 1)
enabled_rbs |= mask; num_rbs++;
mask <<= 1; adev->gfx.config.num_rbs = num_rbs;
}
adev->gfx.config.backend_enable_mask = enabled_rbs;
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < se_num; i++) {
gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
data = RREG32(mmPA_SC_RASTER_CONFIG);
for (j = 0; j < sh_per_se; j++) {
switch (enabled_rbs & 3) {
case 0:
if (j == 0)
data |= (RASTER_CONFIG_RB_MAP_3 <<
PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
else
data |= (RASTER_CONFIG_RB_MAP_0 <<
PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
break;
case 1:
data |= (RASTER_CONFIG_RB_MAP_0 <<
(i * sh_per_se + j) * 2);
break;
case 2:
data |= (RASTER_CONFIG_RB_MAP_3 <<
(i * sh_per_se + j) * 2);
break;
case 3:
default:
data |= (RASTER_CONFIG_RB_MAP_2 <<
(i * sh_per_se + j) * 2);
break;
}
enabled_rbs >>= 2;
}
WREG32(mmPA_SC_RASTER_CONFIG, data);
}
gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
mutex_unlock(&adev->grbm_idx_mutex);
} }
/** /**
...@@ -2749,9 +2705,7 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) ...@@ -2749,9 +2705,7 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
gfx_v8_0_tiling_mode_table_init(adev); gfx_v8_0_tiling_mode_table_init(adev);
gfx_v8_0_setup_rb(adev, adev->gfx.config.max_shader_engines, gfx_v8_0_setup_rb(adev);
adev->gfx.config.max_sh_per_se,
adev->gfx.config.max_backends_per_se);
/* XXX SH_MEM regs */ /* XXX SH_MEM regs */
/* where to put LDS, scratch, GPUVM in FSA64 space */ /* where to put LDS, scratch, GPUVM in FSA64 space */
...@@ -5187,32 +5141,24 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) ...@@ -5187,32 +5141,24 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
} }
} }
static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev, static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
u32 se, u32 sh)
{ {
u32 mask = 0, tmp, tmp1; u32 data, mask;
int i;
gfx_v8_0_select_se_sh(adev, se, sh); data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
tmp = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG); data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
tmp1 = RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
tmp &= 0xffff0000; data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
tmp |= tmp1; mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
tmp >>= 16; adev->gfx.config.max_sh_per_se);
for (i = 0; i < adev->gfx.config.max_cu_per_sh; i ++) { return (~data) & mask;
mask <<= 1;
mask |= 1;
}
return (~tmp) & mask;
} }
int gfx_v8_0_get_cu_info(struct amdgpu_device *adev, int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info) struct amdgpu_cu_info *cu_info)
{ {
int i, j, k, counter, active_cu_number = 0; int i, j, k, counter, active_cu_number = 0;
u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
...@@ -5226,10 +5172,11 @@ int gfx_v8_0_get_cu_info(struct amdgpu_device *adev, ...@@ -5226,10 +5172,11 @@ int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
mask = 1; mask = 1;
ao_bitmap = 0; ao_bitmap = 0;
counter = 0; counter = 0;
bitmap = gfx_v8_0_get_cu_active_bitmap(adev, i, j); gfx_v8_0_select_se_sh(adev, i, j);
bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
cu_info->bitmap[i][j] = bitmap; cu_info->bitmap[i][j] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { for (k = 0; k < 16; k ++) {
if (bitmap & mask) { if (bitmap & mask) {
if (counter < 2) if (counter < 2)
ao_bitmap |= mask; ao_bitmap |= mask;
...@@ -5241,9 +5188,11 @@ int gfx_v8_0_get_cu_info(struct amdgpu_device *adev, ...@@ -5241,9 +5188,11 @@ int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
} }
} }
gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
mutex_unlock(&adev->grbm_idx_mutex);
cu_info->number = active_cu_number; cu_info->number = active_cu_number;
cu_info->ao_cu_mask = ao_cu_mask; cu_info->ao_cu_mask = ao_cu_mask;
mutex_unlock(&adev->grbm_idx_mutex);
return 0; return 0;
} }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册