提交 786a7828 编写于 作者: D Dave Airlie

Merge branch 'drm-next-3.15' of git://people.freedesktop.org/~deathsimple/linux into drm-next

this is the second pull request for 3.15 radeon changes. Highlights this time:
- Better VRAM usage
- VM page table rework
- Enabling different UVD clocks again
- Some general cleanups and improvements

* 'drm-next-3.15' of git://people.freedesktop.org/~deathsimple/linux:
  drm/radeon: remove struct radeon_bo_list
  drm/radeon: drop non blocking allocations from sub allocator
  drm/radeon: remove global vm lock
  drm/radeon: use normal BOs for the page tables v4
  drm/radeon: further cleanup vm flushing & fencing
  drm/radeon: separate gart and vm functions
  drm/radeon: fix VCE suspend/resume
  drm/radeon: fix missing bo reservation
  drm/radeon: limit how much memory TTM can move per IB according to VRAM usage
  drm/radeon: validate relocations in the order determined by userspace v3
  drm/radeon: add buffers to the LRU list from smallest to largest
  drm/radeon: deduplicate code in radeon_gem_busy_ioctl
  drm/radeon: track memory statistics about VRAM and GTT usage and buffer moves v2
  drm/radeon: add a way to get and set initial buffer domains v2
  drm/radeon: use variable UVD clocks
  drm/radeon: cleanup the fence ring locking code
  drm/radeon: improve ring lockup detection code v2
...@@ -80,7 +80,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \ ...@@ -80,7 +80,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \
r600_dpm.o rs780_dpm.o rv6xx_dpm.o rv770_dpm.o rv730_dpm.o rv740_dpm.o \ r600_dpm.o rs780_dpm.o rv6xx_dpm.o rv770_dpm.o rv730_dpm.o rv740_dpm.o \
rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \ rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \
trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \ trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \
ci_dpm.o dce6_afmt.o ci_dpm.o dce6_afmt.o radeon_vm.o
# add async DMA block # add async DMA block
radeon-y += \ radeon-y += \
......
...@@ -1274,12 +1274,12 @@ int r100_reloc_pitch_offset(struct radeon_cs_parser *p, ...@@ -1274,12 +1274,12 @@ int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
value = radeon_get_ib_value(p, idx); value = radeon_get_ib_value(p, idx);
tmp = value & 0x003fffff; tmp = value & 0x003fffff;
tmp += (((u32)reloc->lobj.gpu_offset) >> 10); tmp += (((u32)reloc->gpu_offset) >> 10);
if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) if (reloc->tiling_flags & RADEON_TILING_MACRO)
tile_flags |= RADEON_DST_TILE_MACRO; tile_flags |= RADEON_DST_TILE_MACRO;
if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) { if (reloc->tiling_flags & RADEON_TILING_MICRO) {
if (reg == RADEON_SRC_PITCH_OFFSET) { if (reg == RADEON_SRC_PITCH_OFFSET) {
DRM_ERROR("Cannot src blit from microtiled surface\n"); DRM_ERROR("Cannot src blit from microtiled surface\n");
radeon_cs_dump_packet(p, pkt); radeon_cs_dump_packet(p, pkt);
...@@ -1325,7 +1325,7 @@ int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, ...@@ -1325,7 +1325,7 @@ int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
return r; return r;
} }
idx_value = radeon_get_ib_value(p, idx); idx_value = radeon_get_ib_value(p, idx);
ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->gpu_offset);
track->arrays[i + 0].esize = idx_value >> 8; track->arrays[i + 0].esize = idx_value >> 8;
track->arrays[i + 0].robj = reloc->robj; track->arrays[i + 0].robj = reloc->robj;
...@@ -1337,7 +1337,7 @@ int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, ...@@ -1337,7 +1337,7 @@ int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
radeon_cs_dump_packet(p, pkt); radeon_cs_dump_packet(p, pkt);
return r; return r;
} }
ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset); ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->gpu_offset);
track->arrays[i + 1].robj = reloc->robj; track->arrays[i + 1].robj = reloc->robj;
track->arrays[i + 1].esize = idx_value >> 24; track->arrays[i + 1].esize = idx_value >> 24;
track->arrays[i + 1].esize &= 0x7F; track->arrays[i + 1].esize &= 0x7F;
...@@ -1351,7 +1351,7 @@ int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, ...@@ -1351,7 +1351,7 @@ int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
return r; return r;
} }
idx_value = radeon_get_ib_value(p, idx); idx_value = radeon_get_ib_value(p, idx);
ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->gpu_offset);
track->arrays[i + 0].robj = reloc->robj; track->arrays[i + 0].robj = reloc->robj;
track->arrays[i + 0].esize = idx_value >> 8; track->arrays[i + 0].esize = idx_value >> 8;
track->arrays[i + 0].esize &= 0x7F; track->arrays[i + 0].esize &= 0x7F;
...@@ -1594,7 +1594,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p, ...@@ -1594,7 +1594,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
track->zb.robj = reloc->robj; track->zb.robj = reloc->robj;
track->zb.offset = idx_value; track->zb.offset = idx_value;
track->zb_dirty = true; track->zb_dirty = true;
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); ib[idx] = idx_value + ((u32)reloc->gpu_offset);
break; break;
case RADEON_RB3D_COLOROFFSET: case RADEON_RB3D_COLOROFFSET:
r = radeon_cs_packet_next_reloc(p, &reloc, 0); r = radeon_cs_packet_next_reloc(p, &reloc, 0);
...@@ -1607,7 +1607,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p, ...@@ -1607,7 +1607,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
track->cb[0].robj = reloc->robj; track->cb[0].robj = reloc->robj;
track->cb[0].offset = idx_value; track->cb[0].offset = idx_value;
track->cb_dirty = true; track->cb_dirty = true;
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); ib[idx] = idx_value + ((u32)reloc->gpu_offset);
break; break;
case RADEON_PP_TXOFFSET_0: case RADEON_PP_TXOFFSET_0:
case RADEON_PP_TXOFFSET_1: case RADEON_PP_TXOFFSET_1:
...@@ -1621,16 +1621,16 @@ static int r100_packet0_check(struct radeon_cs_parser *p, ...@@ -1621,16 +1621,16 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
return r; return r;
} }
if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) if (reloc->tiling_flags & RADEON_TILING_MACRO)
tile_flags |= RADEON_TXO_MACRO_TILE; tile_flags |= RADEON_TXO_MACRO_TILE;
if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) if (reloc->tiling_flags & RADEON_TILING_MICRO)
tile_flags |= RADEON_TXO_MICRO_TILE_X2; tile_flags |= RADEON_TXO_MICRO_TILE_X2;
tmp = idx_value & ~(0x7 << 2); tmp = idx_value & ~(0x7 << 2);
tmp |= tile_flags; tmp |= tile_flags;
ib[idx] = tmp + ((u32)reloc->lobj.gpu_offset); ib[idx] = tmp + ((u32)reloc->gpu_offset);
} else } else
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); ib[idx] = idx_value + ((u32)reloc->gpu_offset);
track->textures[i].robj = reloc->robj; track->textures[i].robj = reloc->robj;
track->tex_dirty = true; track->tex_dirty = true;
break; break;
...@@ -1648,7 +1648,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p, ...@@ -1648,7 +1648,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
return r; return r;
} }
track->textures[0].cube_info[i].offset = idx_value; track->textures[0].cube_info[i].offset = idx_value;
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); ib[idx] = idx_value + ((u32)reloc->gpu_offset);
track->textures[0].cube_info[i].robj = reloc->robj; track->textures[0].cube_info[i].robj = reloc->robj;
track->tex_dirty = true; track->tex_dirty = true;
break; break;
...@@ -1666,7 +1666,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p, ...@@ -1666,7 +1666,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
return r; return r;
} }
track->textures[1].cube_info[i].offset = idx_value; track->textures[1].cube_info[i].offset = idx_value;
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); ib[idx] = idx_value + ((u32)reloc->gpu_offset);
track->textures[1].cube_info[i].robj = reloc->robj; track->textures[1].cube_info[i].robj = reloc->robj;
track->tex_dirty = true; track->tex_dirty = true;
break; break;
...@@ -1684,7 +1684,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p, ...@@ -1684,7 +1684,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
return r; return r;
} }
track->textures[2].cube_info[i].offset = idx_value; track->textures[2].cube_info[i].offset = idx_value;
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); ib[idx] = idx_value + ((u32)reloc->gpu_offset);
track->textures[2].cube_info[i].robj = reloc->robj; track->textures[2].cube_info[i].robj = reloc->robj;
track->tex_dirty = true; track->tex_dirty = true;
break; break;
...@@ -1702,9 +1702,9 @@ static int r100_packet0_check(struct radeon_cs_parser *p, ...@@ -1702,9 +1702,9 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
return r; return r;
} }
if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) if (reloc->tiling_flags & RADEON_TILING_MACRO)
tile_flags |= RADEON_COLOR_TILE_ENABLE; tile_flags |= RADEON_COLOR_TILE_ENABLE;
if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) if (reloc->tiling_flags & RADEON_TILING_MICRO)
tile_flags |= RADEON_COLOR_MICROTILE_ENABLE; tile_flags |= RADEON_COLOR_MICROTILE_ENABLE;
tmp = idx_value & ~(0x7 << 16); tmp = idx_value & ~(0x7 << 16);
...@@ -1772,7 +1772,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p, ...@@ -1772,7 +1772,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
radeon_cs_dump_packet(p, pkt); radeon_cs_dump_packet(p, pkt);
return r; return r;
} }
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); ib[idx] = idx_value + ((u32)reloc->gpu_offset);
break; break;
case RADEON_PP_CNTL: case RADEON_PP_CNTL:
{ {
...@@ -1932,7 +1932,7 @@ static int r100_packet3_check(struct radeon_cs_parser *p, ...@@ -1932,7 +1932,7 @@ static int r100_packet3_check(struct radeon_cs_parser *p,
radeon_cs_dump_packet(p, pkt); radeon_cs_dump_packet(p, pkt);
return r; return r;
} }
ib[idx+1] = radeon_get_ib_value(p, idx+1) + ((u32)reloc->lobj.gpu_offset); ib[idx+1] = radeon_get_ib_value(p, idx+1) + ((u32)reloc->gpu_offset);
r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj); r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
if (r) { if (r) {
return r; return r;
...@@ -1946,7 +1946,7 @@ static int r100_packet3_check(struct radeon_cs_parser *p, ...@@ -1946,7 +1946,7 @@ static int r100_packet3_check(struct radeon_cs_parser *p,
radeon_cs_dump_packet(p, pkt); radeon_cs_dump_packet(p, pkt);
return r; return r;
} }
ib[idx] = radeon_get_ib_value(p, idx) + ((u32)reloc->lobj.gpu_offset); ib[idx] = radeon_get_ib_value(p, idx) + ((u32)reloc->gpu_offset);
track->num_arrays = 1; track->num_arrays = 1;
track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 2)); track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 2));
......
...@@ -185,7 +185,7 @@ int r200_packet0_check(struct radeon_cs_parser *p, ...@@ -185,7 +185,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
track->zb.robj = reloc->robj; track->zb.robj = reloc->robj;
track->zb.offset = idx_value; track->zb.offset = idx_value;
track->zb_dirty = true; track->zb_dirty = true;
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); ib[idx] = idx_value + ((u32)reloc->gpu_offset);
break; break;
case RADEON_RB3D_COLOROFFSET: case RADEON_RB3D_COLOROFFSET:
r = radeon_cs_packet_next_reloc(p, &reloc, 0); r = radeon_cs_packet_next_reloc(p, &reloc, 0);
...@@ -198,7 +198,7 @@ int r200_packet0_check(struct radeon_cs_parser *p, ...@@ -198,7 +198,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
track->cb[0].robj = reloc->robj; track->cb[0].robj = reloc->robj;
track->cb[0].offset = idx_value; track->cb[0].offset = idx_value;
track->cb_dirty = true; track->cb_dirty = true;
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); ib[idx] = idx_value + ((u32)reloc->gpu_offset);
break; break;
case R200_PP_TXOFFSET_0: case R200_PP_TXOFFSET_0:
case R200_PP_TXOFFSET_1: case R200_PP_TXOFFSET_1:
...@@ -215,16 +215,16 @@ int r200_packet0_check(struct radeon_cs_parser *p, ...@@ -215,16 +215,16 @@ int r200_packet0_check(struct radeon_cs_parser *p,
return r; return r;
} }
if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) if (reloc->tiling_flags & RADEON_TILING_MACRO)
tile_flags |= R200_TXO_MACRO_TILE; tile_flags |= R200_TXO_MACRO_TILE;
if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) if (reloc->tiling_flags & RADEON_TILING_MICRO)
tile_flags |= R200_TXO_MICRO_TILE; tile_flags |= R200_TXO_MICRO_TILE;
tmp = idx_value & ~(0x7 << 2); tmp = idx_value & ~(0x7 << 2);
tmp |= tile_flags; tmp |= tile_flags;
ib[idx] = tmp + ((u32)reloc->lobj.gpu_offset); ib[idx] = tmp + ((u32)reloc->gpu_offset);
} else } else
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); ib[idx] = idx_value + ((u32)reloc->gpu_offset);
track->textures[i].robj = reloc->robj; track->textures[i].robj = reloc->robj;
track->tex_dirty = true; track->tex_dirty = true;
break; break;
...@@ -268,7 +268,7 @@ int r200_packet0_check(struct radeon_cs_parser *p, ...@@ -268,7 +268,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
return r; return r;
} }
track->textures[i].cube_info[face - 1].offset = idx_value; track->textures[i].cube_info[face - 1].offset = idx_value;
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); ib[idx] = idx_value + ((u32)reloc->gpu_offset);
track->textures[i].cube_info[face - 1].robj = reloc->robj; track->textures[i].cube_info[face - 1].robj = reloc->robj;
track->tex_dirty = true; track->tex_dirty = true;
break; break;
...@@ -287,9 +287,9 @@ int r200_packet0_check(struct radeon_cs_parser *p, ...@@ -287,9 +287,9 @@ int r200_packet0_check(struct radeon_cs_parser *p,
} }
if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) if (reloc->tiling_flags & RADEON_TILING_MACRO)
tile_flags |= RADEON_COLOR_TILE_ENABLE; tile_flags |= RADEON_COLOR_TILE_ENABLE;
if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) if (reloc->tiling_flags & RADEON_TILING_MICRO)
tile_flags |= RADEON_COLOR_MICROTILE_ENABLE; tile_flags |= RADEON_COLOR_MICROTILE_ENABLE;
tmp = idx_value & ~(0x7 << 16); tmp = idx_value & ~(0x7 << 16);
...@@ -362,7 +362,7 @@ int r200_packet0_check(struct radeon_cs_parser *p, ...@@ -362,7 +362,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
radeon_cs_dump_packet(p, pkt); radeon_cs_dump_packet(p, pkt);
return r; return r;
} }
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); ib[idx] = idx_value + ((u32)reloc->gpu_offset);
break; break;
case RADEON_PP_CNTL: case RADEON_PP_CNTL:
{ {
......
...@@ -640,7 +640,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, ...@@ -640,7 +640,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
track->cb[i].robj = reloc->robj; track->cb[i].robj = reloc->robj;
track->cb[i].offset = idx_value; track->cb[i].offset = idx_value;
track->cb_dirty = true; track->cb_dirty = true;
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); ib[idx] = idx_value + ((u32)reloc->gpu_offset);
break; break;
case R300_ZB_DEPTHOFFSET: case R300_ZB_DEPTHOFFSET:
r = radeon_cs_packet_next_reloc(p, &reloc, 0); r = radeon_cs_packet_next_reloc(p, &reloc, 0);
...@@ -653,7 +653,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, ...@@ -653,7 +653,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
track->zb.robj = reloc->robj; track->zb.robj = reloc->robj;
track->zb.offset = idx_value; track->zb.offset = idx_value;
track->zb_dirty = true; track->zb_dirty = true;
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); ib[idx] = idx_value + ((u32)reloc->gpu_offset);
break; break;
case R300_TX_OFFSET_0: case R300_TX_OFFSET_0:
case R300_TX_OFFSET_0+4: case R300_TX_OFFSET_0+4:
...@@ -682,16 +682,16 @@ static int r300_packet0_check(struct radeon_cs_parser *p, ...@@ -682,16 +682,16 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
if (p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS) { if (p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS) {
ib[idx] = (idx_value & 31) | /* keep the 1st 5 bits */ ib[idx] = (idx_value & 31) | /* keep the 1st 5 bits */
((idx_value & ~31) + (u32)reloc->lobj.gpu_offset); ((idx_value & ~31) + (u32)reloc->gpu_offset);
} else { } else {
if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) if (reloc->tiling_flags & RADEON_TILING_MACRO)
tile_flags |= R300_TXO_MACRO_TILE; tile_flags |= R300_TXO_MACRO_TILE;
if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) if (reloc->tiling_flags & RADEON_TILING_MICRO)
tile_flags |= R300_TXO_MICRO_TILE; tile_flags |= R300_TXO_MICRO_TILE;
else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO_SQUARE) else if (reloc->tiling_flags & RADEON_TILING_MICRO_SQUARE)
tile_flags |= R300_TXO_MICRO_TILE_SQUARE; tile_flags |= R300_TXO_MICRO_TILE_SQUARE;
tmp = idx_value + ((u32)reloc->lobj.gpu_offset); tmp = idx_value + ((u32)reloc->gpu_offset);
tmp |= tile_flags; tmp |= tile_flags;
ib[idx] = tmp; ib[idx] = tmp;
} }
...@@ -753,11 +753,11 @@ static int r300_packet0_check(struct radeon_cs_parser *p, ...@@ -753,11 +753,11 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
return r; return r;
} }
if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) if (reloc->tiling_flags & RADEON_TILING_MACRO)
tile_flags |= R300_COLOR_TILE_ENABLE; tile_flags |= R300_COLOR_TILE_ENABLE;
if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) if (reloc->tiling_flags & RADEON_TILING_MICRO)
tile_flags |= R300_COLOR_MICROTILE_ENABLE; tile_flags |= R300_COLOR_MICROTILE_ENABLE;
else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO_SQUARE) else if (reloc->tiling_flags & RADEON_TILING_MICRO_SQUARE)
tile_flags |= R300_COLOR_MICROTILE_SQUARE_ENABLE; tile_flags |= R300_COLOR_MICROTILE_SQUARE_ENABLE;
tmp = idx_value & ~(0x7 << 16); tmp = idx_value & ~(0x7 << 16);
...@@ -838,11 +838,11 @@ static int r300_packet0_check(struct radeon_cs_parser *p, ...@@ -838,11 +838,11 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
return r; return r;
} }
if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) if (reloc->tiling_flags & RADEON_TILING_MACRO)
tile_flags |= R300_DEPTHMACROTILE_ENABLE; tile_flags |= R300_DEPTHMACROTILE_ENABLE;
if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) if (reloc->tiling_flags & RADEON_TILING_MICRO)
tile_flags |= R300_DEPTHMICROTILE_TILED; tile_flags |= R300_DEPTHMICROTILE_TILED;
else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO_SQUARE) else if (reloc->tiling_flags & RADEON_TILING_MICRO_SQUARE)
tile_flags |= R300_DEPTHMICROTILE_TILED_SQUARE; tile_flags |= R300_DEPTHMICROTILE_TILED_SQUARE;
tmp = idx_value & ~(0x7 << 16); tmp = idx_value & ~(0x7 << 16);
...@@ -1052,7 +1052,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, ...@@ -1052,7 +1052,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
radeon_cs_dump_packet(p, pkt); radeon_cs_dump_packet(p, pkt);
return r; return r;
} }
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); ib[idx] = idx_value + ((u32)reloc->gpu_offset);
break; break;
case 0x4e0c: case 0x4e0c:
/* RB3D_COLOR_CHANNEL_MASK */ /* RB3D_COLOR_CHANNEL_MASK */
...@@ -1097,7 +1097,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, ...@@ -1097,7 +1097,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
track->aa.robj = reloc->robj; track->aa.robj = reloc->robj;
track->aa.offset = idx_value; track->aa.offset = idx_value;
track->aa_dirty = true; track->aa_dirty = true;
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); ib[idx] = idx_value + ((u32)reloc->gpu_offset);
break; break;
case R300_RB3D_AARESOLVE_PITCH: case R300_RB3D_AARESOLVE_PITCH:
track->aa.pitch = idx_value & 0x3FFE; track->aa.pitch = idx_value & 0x3FFE;
...@@ -1162,7 +1162,7 @@ static int r300_packet3_check(struct radeon_cs_parser *p, ...@@ -1162,7 +1162,7 @@ static int r300_packet3_check(struct radeon_cs_parser *p,
radeon_cs_dump_packet(p, pkt); radeon_cs_dump_packet(p, pkt);
return r; return r;
} }
ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->gpu_offset);
r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj); r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
if (r) { if (r) {
return r; return r;
......
...@@ -1022,7 +1022,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) ...@@ -1022,7 +1022,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
"0x%04X\n", reg); "0x%04X\n", reg);
return -EINVAL; return -EINVAL;
} }
ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
break; break;
case SQ_CONFIG: case SQ_CONFIG:
track->sq_config = radeon_get_ib_value(p, idx); track->sq_config = radeon_get_ib_value(p, idx);
...@@ -1043,7 +1043,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) ...@@ -1043,7 +1043,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
track->db_depth_info = radeon_get_ib_value(p, idx); track->db_depth_info = radeon_get_ib_value(p, idx);
ib[idx] &= C_028010_ARRAY_MODE; ib[idx] &= C_028010_ARRAY_MODE;
track->db_depth_info &= C_028010_ARRAY_MODE; track->db_depth_info &= C_028010_ARRAY_MODE;
if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { if (reloc->tiling_flags & RADEON_TILING_MACRO) {
ib[idx] |= S_028010_ARRAY_MODE(V_028010_ARRAY_2D_TILED_THIN1); ib[idx] |= S_028010_ARRAY_MODE(V_028010_ARRAY_2D_TILED_THIN1);
track->db_depth_info |= S_028010_ARRAY_MODE(V_028010_ARRAY_2D_TILED_THIN1); track->db_depth_info |= S_028010_ARRAY_MODE(V_028010_ARRAY_2D_TILED_THIN1);
} else { } else {
...@@ -1084,9 +1084,9 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) ...@@ -1084,9 +1084,9 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
} }
tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16; tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16;
track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8; track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8;
ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
track->vgt_strmout_bo[tmp] = reloc->robj; track->vgt_strmout_bo[tmp] = reloc->robj;
track->vgt_strmout_bo_mc[tmp] = reloc->lobj.gpu_offset; track->vgt_strmout_bo_mc[tmp] = reloc->gpu_offset;
track->streamout_dirty = true; track->streamout_dirty = true;
break; break;
case VGT_STRMOUT_BUFFER_SIZE_0: case VGT_STRMOUT_BUFFER_SIZE_0:
...@@ -1105,7 +1105,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) ...@@ -1105,7 +1105,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
"0x%04X\n", reg); "0x%04X\n", reg);
return -EINVAL; return -EINVAL;
} }
ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
break; break;
case R_028238_CB_TARGET_MASK: case R_028238_CB_TARGET_MASK:
track->cb_target_mask = radeon_get_ib_value(p, idx); track->cb_target_mask = radeon_get_ib_value(p, idx);
...@@ -1142,10 +1142,10 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) ...@@ -1142,10 +1142,10 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
} }
tmp = (reg - R_0280A0_CB_COLOR0_INFO) / 4; tmp = (reg - R_0280A0_CB_COLOR0_INFO) / 4;
track->cb_color_info[tmp] = radeon_get_ib_value(p, idx); track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { if (reloc->tiling_flags & RADEON_TILING_MACRO) {
ib[idx] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_2D_TILED_THIN1); ib[idx] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_2D_TILED_THIN1);
track->cb_color_info[tmp] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_2D_TILED_THIN1); track->cb_color_info[tmp] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_2D_TILED_THIN1);
} else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) { } else if (reloc->tiling_flags & RADEON_TILING_MICRO) {
ib[idx] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_1D_TILED_THIN1); ib[idx] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_1D_TILED_THIN1);
track->cb_color_info[tmp] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_1D_TILED_THIN1); track->cb_color_info[tmp] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_1D_TILED_THIN1);
} }
...@@ -1214,7 +1214,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) ...@@ -1214,7 +1214,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
} }
track->cb_color_frag_bo[tmp] = reloc->robj; track->cb_color_frag_bo[tmp] = reloc->robj;
track->cb_color_frag_offset[tmp] = (u64)ib[idx] << 8; track->cb_color_frag_offset[tmp] = (u64)ib[idx] << 8;
ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
} }
if (G_0280A0_TILE_MODE(track->cb_color_info[tmp])) { if (G_0280A0_TILE_MODE(track->cb_color_info[tmp])) {
track->cb_dirty = true; track->cb_dirty = true;
...@@ -1245,7 +1245,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) ...@@ -1245,7 +1245,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
} }
track->cb_color_tile_bo[tmp] = reloc->robj; track->cb_color_tile_bo[tmp] = reloc->robj;
track->cb_color_tile_offset[tmp] = (u64)ib[idx] << 8; track->cb_color_tile_offset[tmp] = (u64)ib[idx] << 8;
ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
} }
if (G_0280A0_TILE_MODE(track->cb_color_info[tmp])) { if (G_0280A0_TILE_MODE(track->cb_color_info[tmp])) {
track->cb_dirty = true; track->cb_dirty = true;
...@@ -1281,10 +1281,10 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) ...@@ -1281,10 +1281,10 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
} }
tmp = (reg - CB_COLOR0_BASE) / 4; tmp = (reg - CB_COLOR0_BASE) / 4;
track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8; track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8;
ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
track->cb_color_base_last[tmp] = ib[idx]; track->cb_color_base_last[tmp] = ib[idx];
track->cb_color_bo[tmp] = reloc->robj; track->cb_color_bo[tmp] = reloc->robj;
track->cb_color_bo_mc[tmp] = reloc->lobj.gpu_offset; track->cb_color_bo_mc[tmp] = reloc->gpu_offset;
track->cb_dirty = true; track->cb_dirty = true;
break; break;
case DB_DEPTH_BASE: case DB_DEPTH_BASE:
...@@ -1295,9 +1295,9 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) ...@@ -1295,9 +1295,9 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
return -EINVAL; return -EINVAL;
} }
track->db_offset = radeon_get_ib_value(p, idx) << 8; track->db_offset = radeon_get_ib_value(p, idx) << 8;
ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
track->db_bo = reloc->robj; track->db_bo = reloc->robj;
track->db_bo_mc = reloc->lobj.gpu_offset; track->db_bo_mc = reloc->gpu_offset;
track->db_dirty = true; track->db_dirty = true;
break; break;
case DB_HTILE_DATA_BASE: case DB_HTILE_DATA_BASE:
...@@ -1308,7 +1308,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) ...@@ -1308,7 +1308,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
return -EINVAL; return -EINVAL;
} }
track->htile_offset = radeon_get_ib_value(p, idx) << 8; track->htile_offset = radeon_get_ib_value(p, idx) << 8;
ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
track->htile_bo = reloc->robj; track->htile_bo = reloc->robj;
track->db_dirty = true; track->db_dirty = true;
break; break;
...@@ -1377,7 +1377,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) ...@@ -1377,7 +1377,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
"0x%04X\n", reg); "0x%04X\n", reg);
return -EINVAL; return -EINVAL;
} }
ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
break; break;
case SX_MEMORY_EXPORT_BASE: case SX_MEMORY_EXPORT_BASE:
r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
...@@ -1386,7 +1386,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) ...@@ -1386,7 +1386,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
"0x%04X\n", reg); "0x%04X\n", reg);
return -EINVAL; return -EINVAL;
} }
ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
break; break;
case SX_MISC: case SX_MISC:
track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0; track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0;
...@@ -1672,7 +1672,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, ...@@ -1672,7 +1672,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
return -EINVAL; return -EINVAL;
} }
offset = reloc->lobj.gpu_offset + offset = reloc->gpu_offset +
(idx_value & 0xfffffff0) + (idx_value & 0xfffffff0) +
((u64)(tmp & 0xff) << 32); ((u64)(tmp & 0xff) << 32);
...@@ -1713,7 +1713,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, ...@@ -1713,7 +1713,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
return -EINVAL; return -EINVAL;
} }
offset = reloc->lobj.gpu_offset + offset = reloc->gpu_offset +
idx_value + idx_value +
((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
...@@ -1765,7 +1765,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, ...@@ -1765,7 +1765,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
return -EINVAL; return -EINVAL;
} }
offset = reloc->lobj.gpu_offset + offset = reloc->gpu_offset +
(radeon_get_ib_value(p, idx+1) & 0xfffffff0) + (radeon_get_ib_value(p, idx+1) & 0xfffffff0) +
((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
...@@ -1805,7 +1805,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, ...@@ -1805,7 +1805,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
tmp = radeon_get_ib_value(p, idx) + tmp = radeon_get_ib_value(p, idx) +
((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
offset = reloc->lobj.gpu_offset + tmp; offset = reloc->gpu_offset + tmp;
if ((tmp + size) > radeon_bo_size(reloc->robj)) { if ((tmp + size) > radeon_bo_size(reloc->robj)) {
dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n", dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n",
...@@ -1835,7 +1835,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, ...@@ -1835,7 +1835,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
tmp = radeon_get_ib_value(p, idx+2) + tmp = radeon_get_ib_value(p, idx+2) +
((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32); ((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32);
offset = reloc->lobj.gpu_offset + tmp; offset = reloc->gpu_offset + tmp;
if ((tmp + size) > radeon_bo_size(reloc->robj)) { if ((tmp + size) > radeon_bo_size(reloc->robj)) {
dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n", dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n",
...@@ -1861,7 +1861,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, ...@@ -1861,7 +1861,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
DRM_ERROR("bad SURFACE_SYNC\n"); DRM_ERROR("bad SURFACE_SYNC\n");
return -EINVAL; return -EINVAL;
} }
ib[idx+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); ib[idx+2] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
} }
break; break;
case PACKET3_EVENT_WRITE: case PACKET3_EVENT_WRITE:
...@@ -1877,7 +1877,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, ...@@ -1877,7 +1877,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
DRM_ERROR("bad EVENT_WRITE\n"); DRM_ERROR("bad EVENT_WRITE\n");
return -EINVAL; return -EINVAL;
} }
offset = reloc->lobj.gpu_offset + offset = reloc->gpu_offset +
(radeon_get_ib_value(p, idx+1) & 0xfffffff8) + (radeon_get_ib_value(p, idx+1) & 0xfffffff8) +
((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
...@@ -1899,7 +1899,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, ...@@ -1899,7 +1899,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
return -EINVAL; return -EINVAL;
} }
offset = reloc->lobj.gpu_offset + offset = reloc->gpu_offset +
(radeon_get_ib_value(p, idx+1) & 0xfffffffc) + (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
...@@ -1964,11 +1964,11 @@ static int r600_packet3_check(struct radeon_cs_parser *p, ...@@ -1964,11 +1964,11 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
DRM_ERROR("bad SET_RESOURCE\n"); DRM_ERROR("bad SET_RESOURCE\n");
return -EINVAL; return -EINVAL;
} }
base_offset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); base_offset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) if (reloc->tiling_flags & RADEON_TILING_MACRO)
ib[idx+1+(i*7)+0] |= S_038000_TILE_MODE(V_038000_ARRAY_2D_TILED_THIN1); ib[idx+1+(i*7)+0] |= S_038000_TILE_MODE(V_038000_ARRAY_2D_TILED_THIN1);
else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) else if (reloc->tiling_flags & RADEON_TILING_MICRO)
ib[idx+1+(i*7)+0] |= S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1); ib[idx+1+(i*7)+0] |= S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1);
} }
texture = reloc->robj; texture = reloc->robj;
...@@ -1978,13 +1978,13 @@ static int r600_packet3_check(struct radeon_cs_parser *p, ...@@ -1978,13 +1978,13 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
DRM_ERROR("bad SET_RESOURCE\n"); DRM_ERROR("bad SET_RESOURCE\n");
return -EINVAL; return -EINVAL;
} }
mip_offset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); mip_offset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
mipmap = reloc->robj; mipmap = reloc->robj;
r = r600_check_texture_resource(p, idx+(i*7)+1, r = r600_check_texture_resource(p, idx+(i*7)+1,
texture, mipmap, texture, mipmap,
base_offset + radeon_get_ib_value(p, idx+1+(i*7)+2), base_offset + radeon_get_ib_value(p, idx+1+(i*7)+2),
mip_offset + radeon_get_ib_value(p, idx+1+(i*7)+3), mip_offset + radeon_get_ib_value(p, idx+1+(i*7)+3),
reloc->lobj.tiling_flags); reloc->tiling_flags);
if (r) if (r)
return r; return r;
ib[idx+1+(i*7)+2] += base_offset; ib[idx+1+(i*7)+2] += base_offset;
...@@ -2008,7 +2008,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, ...@@ -2008,7 +2008,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
ib[idx+1+(i*7)+1] = radeon_bo_size(reloc->robj) - offset; ib[idx+1+(i*7)+1] = radeon_bo_size(reloc->robj) - offset;
} }
offset64 = reloc->lobj.gpu_offset + offset; offset64 = reloc->gpu_offset + offset;
ib[idx+1+(i*8)+0] = offset64; ib[idx+1+(i*8)+0] = offset64;
ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) | ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) |
(upper_32_bits(offset64) & 0xff); (upper_32_bits(offset64) & 0xff);
...@@ -2118,7 +2118,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, ...@@ -2118,7 +2118,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
offset + 4, radeon_bo_size(reloc->robj)); offset + 4, radeon_bo_size(reloc->robj));
return -EINVAL; return -EINVAL;
} }
ib[idx+1] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); ib[idx+1] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
} }
break; break;
case PACKET3_SURFACE_BASE_UPDATE: case PACKET3_SURFACE_BASE_UPDATE:
...@@ -2151,7 +2151,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, ...@@ -2151,7 +2151,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
offset + 4, radeon_bo_size(reloc->robj)); offset + 4, radeon_bo_size(reloc->robj));
return -EINVAL; return -EINVAL;
} }
offset += reloc->lobj.gpu_offset; offset += reloc->gpu_offset;
ib[idx+1] = offset; ib[idx+1] = offset;
ib[idx+2] = upper_32_bits(offset) & 0xff; ib[idx+2] = upper_32_bits(offset) & 0xff;
} }
...@@ -2170,7 +2170,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, ...@@ -2170,7 +2170,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
offset + 4, radeon_bo_size(reloc->robj)); offset + 4, radeon_bo_size(reloc->robj));
return -EINVAL; return -EINVAL;
} }
offset += reloc->lobj.gpu_offset; offset += reloc->gpu_offset;
ib[idx+3] = offset; ib[idx+3] = offset;
ib[idx+4] = upper_32_bits(offset) & 0xff; ib[idx+4] = upper_32_bits(offset) & 0xff;
} }
...@@ -2199,7 +2199,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, ...@@ -2199,7 +2199,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
offset + 8, radeon_bo_size(reloc->robj)); offset + 8, radeon_bo_size(reloc->robj));
return -EINVAL; return -EINVAL;
} }
offset += reloc->lobj.gpu_offset; offset += reloc->gpu_offset;
ib[idx+0] = offset; ib[idx+0] = offset;
ib[idx+1] = upper_32_bits(offset) & 0xff; ib[idx+1] = upper_32_bits(offset) & 0xff;
break; break;
...@@ -2224,7 +2224,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, ...@@ -2224,7 +2224,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
offset + 4, radeon_bo_size(reloc->robj)); offset + 4, radeon_bo_size(reloc->robj));
return -EINVAL; return -EINVAL;
} }
offset += reloc->lobj.gpu_offset; offset += reloc->gpu_offset;
ib[idx+1] = offset; ib[idx+1] = offset;
ib[idx+2] = upper_32_bits(offset) & 0xff; ib[idx+2] = upper_32_bits(offset) & 0xff;
} else { } else {
...@@ -2248,7 +2248,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, ...@@ -2248,7 +2248,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
offset + 4, radeon_bo_size(reloc->robj)); offset + 4, radeon_bo_size(reloc->robj));
return -EINVAL; return -EINVAL;
} }
offset += reloc->lobj.gpu_offset; offset += reloc->gpu_offset;
ib[idx+3] = offset; ib[idx+3] = offset;
ib[idx+4] = upper_32_bits(offset) & 0xff; ib[idx+4] = upper_32_bits(offset) & 0xff;
} else { } else {
...@@ -2505,14 +2505,14 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p) ...@@ -2505,14 +2505,14 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p)
dst_offset = radeon_get_ib_value(p, idx+1); dst_offset = radeon_get_ib_value(p, idx+1);
dst_offset <<= 8; dst_offset <<= 8;
ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
p->idx += count + 5; p->idx += count + 5;
} else { } else {
dst_offset = radeon_get_ib_value(p, idx+1); dst_offset = radeon_get_ib_value(p, idx+1);
dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; ib[idx+2] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
p->idx += count + 3; p->idx += count + 3;
} }
if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
...@@ -2539,22 +2539,22 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p) ...@@ -2539,22 +2539,22 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p)
/* tiled src, linear dst */ /* tiled src, linear dst */
src_offset = radeon_get_ib_value(p, idx+1); src_offset = radeon_get_ib_value(p, idx+1);
src_offset <<= 8; src_offset <<= 8;
ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
dst_offset = radeon_get_ib_value(p, idx+5); dst_offset = radeon_get_ib_value(p, idx+5);
dst_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32; dst_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32;
ib[idx+5] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); ib[idx+5] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
ib[idx+6] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; ib[idx+6] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
} else { } else {
/* linear src, tiled dst */ /* linear src, tiled dst */
src_offset = radeon_get_ib_value(p, idx+5); src_offset = radeon_get_ib_value(p, idx+5);
src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32; src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32;
ib[idx+5] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); ib[idx+5] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; ib[idx+6] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
dst_offset = radeon_get_ib_value(p, idx+1); dst_offset = radeon_get_ib_value(p, idx+1);
dst_offset <<= 8; dst_offset <<= 8;
ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
} }
p->idx += 7; p->idx += 7;
} else { } else {
...@@ -2564,10 +2564,10 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p) ...@@ -2564,10 +2564,10 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p)
dst_offset = radeon_get_ib_value(p, idx+1); dst_offset = radeon_get_ib_value(p, idx+1);
dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32; dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
p->idx += 5; p->idx += 5;
} else { } else {
src_offset = radeon_get_ib_value(p, idx+2); src_offset = radeon_get_ib_value(p, idx+2);
...@@ -2575,10 +2575,10 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p) ...@@ -2575,10 +2575,10 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p)
dst_offset = radeon_get_ib_value(p, idx+1); dst_offset = radeon_get_ib_value(p, idx+1);
dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff0000)) << 16; dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff0000)) << 16;
ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
ib[idx+3] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; ib[idx+3] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff) << 16; ib[idx+3] += (upper_32_bits(dst_reloc->gpu_offset) & 0xff) << 16;
p->idx += 4; p->idx += 4;
} }
} }
...@@ -2610,8 +2610,8 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p) ...@@ -2610,8 +2610,8 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p)
dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
return -EINVAL; return -EINVAL;
} }
ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000; ib[idx+3] += (upper_32_bits(dst_reloc->gpu_offset) << 16) & 0x00ff0000;
p->idx += 4; p->idx += 4;
break; break;
case DMA_PACKET_NOP: case DMA_PACKET_NOP:
......
...@@ -363,9 +363,8 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, i ...@@ -363,9 +363,8 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, i
void radeon_fence_process(struct radeon_device *rdev, int ring); void radeon_fence_process(struct radeon_device *rdev, int ring);
bool radeon_fence_signaled(struct radeon_fence *fence); bool radeon_fence_signaled(struct radeon_fence *fence);
int radeon_fence_wait(struct radeon_fence *fence, bool interruptible); int radeon_fence_wait(struct radeon_fence *fence, bool interruptible);
int radeon_fence_wait_locked(struct radeon_fence *fence); int radeon_fence_wait_next(struct radeon_device *rdev, int ring);
int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring); int radeon_fence_wait_empty(struct radeon_device *rdev, int ring);
int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring);
int radeon_fence_wait_any(struct radeon_device *rdev, int radeon_fence_wait_any(struct radeon_device *rdev,
struct radeon_fence **fences, struct radeon_fence **fences,
bool intr); bool intr);
...@@ -457,6 +456,7 @@ struct radeon_bo { ...@@ -457,6 +456,7 @@ struct radeon_bo {
/* Protected by gem.mutex */ /* Protected by gem.mutex */
struct list_head list; struct list_head list;
/* Protected by tbo.reserved */ /* Protected by tbo.reserved */
u32 initial_domain;
u32 placements[3]; u32 placements[3];
struct ttm_placement placement; struct ttm_placement placement;
struct ttm_buffer_object tbo; struct ttm_buffer_object tbo;
...@@ -479,16 +479,6 @@ struct radeon_bo { ...@@ -479,16 +479,6 @@ struct radeon_bo {
}; };
#define gem_to_radeon_bo(gobj) container_of((gobj), struct radeon_bo, gem_base) #define gem_to_radeon_bo(gobj) container_of((gobj), struct radeon_bo, gem_base)
struct radeon_bo_list {
struct ttm_validate_buffer tv;
struct radeon_bo *bo;
uint64_t gpu_offset;
bool written;
unsigned domain;
unsigned alt_domain;
u32 tiling_flags;
};
int radeon_gem_debugfs_init(struct radeon_device *rdev); int radeon_gem_debugfs_init(struct radeon_device *rdev);
/* sub-allocation manager, it has to be protected by another lock. /* sub-allocation manager, it has to be protected by another lock.
...@@ -805,8 +795,8 @@ struct radeon_ring { ...@@ -805,8 +795,8 @@ struct radeon_ring {
unsigned ring_size; unsigned ring_size;
unsigned ring_free_dw; unsigned ring_free_dw;
int count_dw; int count_dw;
unsigned long last_activity; atomic_t last_rptr;
unsigned last_rptr; atomic64_t last_activity;
uint64_t gpu_addr; uint64_t gpu_addr;
uint32_t align_mask; uint32_t align_mask;
uint32_t ptr_mask; uint32_t ptr_mask;
...@@ -858,17 +848,22 @@ struct radeon_mec { ...@@ -858,17 +848,22 @@ struct radeon_mec {
#define R600_PTE_READABLE (1 << 5) #define R600_PTE_READABLE (1 << 5)
#define R600_PTE_WRITEABLE (1 << 6) #define R600_PTE_WRITEABLE (1 << 6)
struct radeon_vm_pt {
struct radeon_bo *bo;
uint64_t addr;
};
struct radeon_vm { struct radeon_vm {
struct list_head list;
struct list_head va; struct list_head va;
unsigned id; unsigned id;
/* contains the page directory */ /* contains the page directory */
struct radeon_sa_bo *page_directory; struct radeon_bo *page_directory;
uint64_t pd_gpu_addr; uint64_t pd_gpu_addr;
unsigned max_pde_used;
/* array of page tables, one for each page directory entry */ /* array of page tables, one for each page directory entry */
struct radeon_sa_bo **page_tables; struct radeon_vm_pt *page_tables;
struct mutex mutex; struct mutex mutex;
/* last fence for cs using this vm */ /* last fence for cs using this vm */
...@@ -880,10 +875,7 @@ struct radeon_vm { ...@@ -880,10 +875,7 @@ struct radeon_vm {
}; };
struct radeon_vm_manager { struct radeon_vm_manager {
struct mutex lock;
struct list_head lru_vm;
struct radeon_fence *active[RADEON_NUM_VM]; struct radeon_fence *active[RADEON_NUM_VM];
struct radeon_sa_manager sa_manager;
uint32_t max_pfn; uint32_t max_pfn;
/* number of VMIDs */ /* number of VMIDs */
unsigned nvm; unsigned nvm;
...@@ -986,9 +978,12 @@ void cayman_dma_fini(struct radeon_device *rdev); ...@@ -986,9 +978,12 @@ void cayman_dma_fini(struct radeon_device *rdev);
struct radeon_cs_reloc { struct radeon_cs_reloc {
struct drm_gem_object *gobj; struct drm_gem_object *gobj;
struct radeon_bo *robj; struct radeon_bo *robj;
struct radeon_bo_list lobj; struct ttm_validate_buffer tv;
uint64_t gpu_offset;
unsigned domain;
unsigned alt_domain;
uint32_t tiling_flags;
uint32_t handle; uint32_t handle;
uint32_t flags;
}; };
struct radeon_cs_chunk { struct radeon_cs_chunk {
...@@ -1012,6 +1007,7 @@ struct radeon_cs_parser { ...@@ -1012,6 +1007,7 @@ struct radeon_cs_parser {
unsigned nrelocs; unsigned nrelocs;
struct radeon_cs_reloc *relocs; struct radeon_cs_reloc *relocs;
struct radeon_cs_reloc **relocs_ptr; struct radeon_cs_reloc **relocs_ptr;
struct radeon_cs_reloc *vm_bos;
struct list_head validated; struct list_head validated;
unsigned dma_reloc_idx; unsigned dma_reloc_idx;
/* indices of various chunks */ /* indices of various chunks */
...@@ -1635,7 +1631,6 @@ int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev, ...@@ -1635,7 +1631,6 @@ int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev,
struct radeon_vce { struct radeon_vce {
struct radeon_bo *vcpu_bo; struct radeon_bo *vcpu_bo;
void *cpu_addr;
uint64_t gpu_addr; uint64_t gpu_addr;
unsigned fw_version; unsigned fw_version;
unsigned fb_version; unsigned fb_version;
...@@ -2117,6 +2112,8 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data, ...@@ -2117,6 +2112,8 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp); struct drm_file *filp);
int radeon_gem_va_ioctl(struct drm_device *dev, void *data, int radeon_gem_va_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp); struct drm_file *filp);
int radeon_gem_op_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
int radeon_gem_set_tiling_ioctl(struct drm_device *dev, void *data, int radeon_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp); struct drm_file *filp);
...@@ -2307,6 +2304,10 @@ struct radeon_device { ...@@ -2307,6 +2304,10 @@ struct radeon_device {
/* virtual memory */ /* virtual memory */
struct radeon_vm_manager vm_manager; struct radeon_vm_manager vm_manager;
struct mutex gpu_clock_mutex; struct mutex gpu_clock_mutex;
/* memory stats */
atomic64_t vram_usage;
atomic64_t gtt_usage;
atomic64_t num_bytes_moved;
/* ACPI interface */ /* ACPI interface */
struct radeon_atif atif; struct radeon_atif atif;
struct radeon_atcs atcs; struct radeon_atcs atcs;
...@@ -2794,16 +2795,22 @@ extern void radeon_program_register_sequence(struct radeon_device *rdev, ...@@ -2794,16 +2795,22 @@ extern void radeon_program_register_sequence(struct radeon_device *rdev,
*/ */
int radeon_vm_manager_init(struct radeon_device *rdev); int radeon_vm_manager_init(struct radeon_device *rdev);
void radeon_vm_manager_fini(struct radeon_device *rdev); void radeon_vm_manager_fini(struct radeon_device *rdev);
void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm); int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm);
void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm); void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm);
int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm); struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev,
void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm); struct radeon_vm *vm,
struct list_head *head);
struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
struct radeon_vm *vm, int ring); struct radeon_vm *vm, int ring);
void radeon_vm_flush(struct radeon_device *rdev,
struct radeon_vm *vm,
int ring);
void radeon_vm_fence(struct radeon_device *rdev, void radeon_vm_fence(struct radeon_device *rdev,
struct radeon_vm *vm, struct radeon_vm *vm,
struct radeon_fence *fence); struct radeon_fence *fence);
uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr); uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr);
int radeon_vm_update_page_directory(struct radeon_device *rdev,
struct radeon_vm *vm);
int radeon_vm_bo_update(struct radeon_device *rdev, int radeon_vm_bo_update(struct radeon_device *rdev,
struct radeon_vm *vm, struct radeon_vm *vm,
struct radeon_bo *bo, struct radeon_bo *bo,
......
...@@ -24,16 +24,59 @@ ...@@ -24,16 +24,59 @@
* Authors: * Authors:
* Jerome Glisse <glisse@freedesktop.org> * Jerome Glisse <glisse@freedesktop.org>
*/ */
#include <linux/list_sort.h>
#include <drm/drmP.h> #include <drm/drmP.h>
#include <drm/radeon_drm.h> #include <drm/radeon_drm.h>
#include "radeon_reg.h" #include "radeon_reg.h"
#include "radeon.h" #include "radeon.h"
#include "radeon_trace.h" #include "radeon_trace.h"
#define RADEON_CS_MAX_PRIORITY 32u
#define RADEON_CS_NUM_BUCKETS (RADEON_CS_MAX_PRIORITY + 1)
/* This is based on the bucket sort with O(n) time complexity.
* An item with priority "i" is added to bucket[i]. The lists are then
* concatenated in descending order.
*/
struct radeon_cs_buckets {
struct list_head bucket[RADEON_CS_NUM_BUCKETS];
};
static void radeon_cs_buckets_init(struct radeon_cs_buckets *b)
{
unsigned i;
for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++)
INIT_LIST_HEAD(&b->bucket[i]);
}
static void radeon_cs_buckets_add(struct radeon_cs_buckets *b,
struct list_head *item, unsigned priority)
{
/* Since buffers which appear sooner in the relocation list are
* likely to be used more often than buffers which appear later
* in the list, the sort mustn't change the ordering of buffers
* with the same priority, i.e. it must be stable.
*/
list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]);
}
static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b,
struct list_head *out_list)
{
unsigned i;
/* Connect the sorted buckets in the output list. */
for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) {
list_splice(&b->bucket[i], out_list);
}
}
static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
{ {
struct drm_device *ddev = p->rdev->ddev; struct drm_device *ddev = p->rdev->ddev;
struct radeon_cs_chunk *chunk; struct radeon_cs_chunk *chunk;
struct radeon_cs_buckets buckets;
unsigned i, j; unsigned i, j;
bool duplicate; bool duplicate;
...@@ -52,8 +95,12 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) ...@@ -52,8 +95,12 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
if (p->relocs == NULL) { if (p->relocs == NULL) {
return -ENOMEM; return -ENOMEM;
} }
radeon_cs_buckets_init(&buckets);
for (i = 0; i < p->nrelocs; i++) { for (i = 0; i < p->nrelocs; i++) {
struct drm_radeon_cs_reloc *r; struct drm_radeon_cs_reloc *r;
unsigned priority;
duplicate = false; duplicate = false;
r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4]; r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
...@@ -78,8 +125,14 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) ...@@ -78,8 +125,14 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
} }
p->relocs_ptr[i] = &p->relocs[i]; p->relocs_ptr[i] = &p->relocs[i];
p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj); p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj);
p->relocs[i].lobj.bo = p->relocs[i].robj;
p->relocs[i].lobj.written = !!r->write_domain; /* The userspace buffer priorities are from 0 to 15. A higher
* number means the buffer is more important.
* Also, the buffers used for write have a higher priority than
* the buffers used for read only, which doubles the range
* to 0 to 31. 32 is reserved for the kernel driver.
*/
priority = (r->flags & 0xf) * 2 + !!r->write_domain;
/* the first reloc of an UVD job is the msg and that must be in /* the first reloc of an UVD job is the msg and that must be in
VRAM, also but everything into VRAM on AGP cards to avoid VRAM, also but everything into VRAM on AGP cards to avoid
...@@ -87,29 +140,38 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) ...@@ -87,29 +140,38 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
if (p->ring == R600_RING_TYPE_UVD_INDEX && if (p->ring == R600_RING_TYPE_UVD_INDEX &&
(i == 0 || drm_pci_device_is_agp(p->rdev->ddev))) { (i == 0 || drm_pci_device_is_agp(p->rdev->ddev))) {
/* TODO: is this still needed for NI+ ? */ /* TODO: is this still needed for NI+ ? */
p->relocs[i].lobj.domain = p->relocs[i].domain =
RADEON_GEM_DOMAIN_VRAM; RADEON_GEM_DOMAIN_VRAM;
p->relocs[i].lobj.alt_domain = p->relocs[i].alt_domain =
RADEON_GEM_DOMAIN_VRAM; RADEON_GEM_DOMAIN_VRAM;
/* prioritize this over any other relocation */
priority = RADEON_CS_MAX_PRIORITY;
} else { } else {
uint32_t domain = r->write_domain ? uint32_t domain = r->write_domain ?
r->write_domain : r->read_domains; r->write_domain : r->read_domains;
p->relocs[i].lobj.domain = domain; p->relocs[i].domain = domain;
if (domain == RADEON_GEM_DOMAIN_VRAM) if (domain == RADEON_GEM_DOMAIN_VRAM)
domain |= RADEON_GEM_DOMAIN_GTT; domain |= RADEON_GEM_DOMAIN_GTT;
p->relocs[i].lobj.alt_domain = domain; p->relocs[i].alt_domain = domain;
} }
p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo; p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
p->relocs[i].handle = r->handle; p->relocs[i].handle = r->handle;
radeon_bo_list_add_object(&p->relocs[i].lobj, radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head,
&p->validated); priority);
} }
return radeon_bo_list_validate(&p->ticket, &p->validated, p->ring);
radeon_cs_buckets_get_list(&buckets, &p->validated);
if (p->cs_flags & RADEON_CS_USE_VM)
p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm,
&p->validated);
return radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring);
} }
static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority) static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
...@@ -290,6 +352,16 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) ...@@ -290,6 +352,16 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
return 0; return 0;
} }
static int cmp_size_smaller_first(void *priv, struct list_head *a,
struct list_head *b)
{
struct radeon_cs_reloc *la = list_entry(a, struct radeon_cs_reloc, tv.head);
struct radeon_cs_reloc *lb = list_entry(b, struct radeon_cs_reloc, tv.head);
/* Sort A before B if A is smaller. */
return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
}
/** /**
* cs_parser_fini() - clean parser states * cs_parser_fini() - clean parser states
* @parser: parser structure holding parsing context. * @parser: parser structure holding parsing context.
...@@ -303,6 +375,18 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo ...@@ -303,6 +375,18 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo
unsigned i; unsigned i;
if (!error) { if (!error) {
/* Sort the buffer list from the smallest to largest buffer,
* which affects the order of buffers in the LRU list.
* This assures that the smallest buffers are added first
* to the LRU list, so they are likely to be later evicted
* first, instead of large buffers whose eviction is more
* expensive.
*
* This slightly lowers the number of bytes moved by TTM
* per frame under memory pressure.
*/
list_sort(NULL, &parser->validated, cmp_size_smaller_first);
ttm_eu_fence_buffer_objects(&parser->ticket, ttm_eu_fence_buffer_objects(&parser->ticket,
&parser->validated, &parser->validated,
parser->ib.fence); parser->ib.fence);
...@@ -320,6 +404,7 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo ...@@ -320,6 +404,7 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo
kfree(parser->track); kfree(parser->track);
kfree(parser->relocs); kfree(parser->relocs);
kfree(parser->relocs_ptr); kfree(parser->relocs_ptr);
kfree(parser->vm_bos);
for (i = 0; i < parser->nchunks; i++) for (i = 0; i < parser->nchunks; i++)
drm_free_large(parser->chunks[i].kdata); drm_free_large(parser->chunks[i].kdata);
kfree(parser->chunks); kfree(parser->chunks);
...@@ -359,24 +444,32 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev, ...@@ -359,24 +444,32 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev,
return r; return r;
} }
static int radeon_bo_vm_update_pte(struct radeon_cs_parser *parser, static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p,
struct radeon_vm *vm) struct radeon_vm *vm)
{ {
struct radeon_device *rdev = parser->rdev; struct radeon_device *rdev = p->rdev;
struct radeon_bo_list *lobj; int i, r;
struct radeon_bo *bo;
int r;
r = radeon_vm_bo_update(rdev, vm, rdev->ring_tmp_bo.bo, &rdev->ring_tmp_bo.bo->tbo.mem); r = radeon_vm_update_page_directory(rdev, vm);
if (r) { if (r)
return r; return r;
}
list_for_each_entry(lobj, &parser->validated, tv.head) { r = radeon_vm_bo_update(rdev, vm, rdev->ring_tmp_bo.bo,
bo = lobj->bo; &rdev->ring_tmp_bo.bo->tbo.mem);
r = radeon_vm_bo_update(parser->rdev, vm, bo, &bo->tbo.mem); if (r)
if (r) { return r;
for (i = 0; i < p->nrelocs; i++) {
struct radeon_bo *bo;
/* ignore duplicates */
if (p->relocs_ptr[i] != &p->relocs[i])
continue;
bo = p->relocs[i].robj;
r = radeon_vm_bo_update(rdev, vm, bo, &bo->tbo.mem);
if (r)
return r; return r;
}
} }
return 0; return 0;
} }
...@@ -408,20 +501,13 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, ...@@ -408,20 +501,13 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
if (parser->ring == R600_RING_TYPE_UVD_INDEX) if (parser->ring == R600_RING_TYPE_UVD_INDEX)
radeon_uvd_note_usage(rdev); radeon_uvd_note_usage(rdev);
mutex_lock(&rdev->vm_manager.lock);
mutex_lock(&vm->mutex); mutex_lock(&vm->mutex);
r = radeon_vm_alloc_pt(rdev, vm);
if (r) {
goto out;
}
r = radeon_bo_vm_update_pte(parser, vm); r = radeon_bo_vm_update_pte(parser, vm);
if (r) { if (r) {
goto out; goto out;
} }
radeon_cs_sync_rings(parser); radeon_cs_sync_rings(parser);
radeon_semaphore_sync_to(parser->ib.semaphore, vm->fence); radeon_semaphore_sync_to(parser->ib.semaphore, vm->fence);
radeon_semaphore_sync_to(parser->ib.semaphore,
radeon_vm_grab_id(rdev, vm, parser->ring));
if ((rdev->family >= CHIP_TAHITI) && if ((rdev->family >= CHIP_TAHITI) &&
(parser->chunk_const_ib_idx != -1)) { (parser->chunk_const_ib_idx != -1)) {
...@@ -430,14 +516,8 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, ...@@ -430,14 +516,8 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
r = radeon_ib_schedule(rdev, &parser->ib, NULL); r = radeon_ib_schedule(rdev, &parser->ib, NULL);
} }
if (!r) {
radeon_vm_fence(rdev, vm, parser->ib.fence);
}
out: out:
radeon_vm_add_to_lru(rdev, vm);
mutex_unlock(&vm->mutex); mutex_unlock(&vm->mutex);
mutex_unlock(&rdev->vm_manager.lock);
return r; return r;
} }
...@@ -705,9 +785,9 @@ int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p, ...@@ -705,9 +785,9 @@ int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
/* FIXME: we assume reloc size is 4 dwords */ /* FIXME: we assume reloc size is 4 dwords */
if (nomm) { if (nomm) {
*cs_reloc = p->relocs; *cs_reloc = p->relocs;
(*cs_reloc)->lobj.gpu_offset = (*cs_reloc)->gpu_offset =
(u64)relocs_chunk->kdata[idx + 3] << 32; (u64)relocs_chunk->kdata[idx + 3] << 32;
(*cs_reloc)->lobj.gpu_offset |= relocs_chunk->kdata[idx + 0]; (*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0];
} else } else
*cs_reloc = p->relocs_ptr[(idx / 4)]; *cs_reloc = p->relocs_ptr[(idx / 4)];
return 0; return 0;
......
...@@ -1191,14 +1191,12 @@ int radeon_device_init(struct radeon_device *rdev, ...@@ -1191,14 +1191,12 @@ int radeon_device_init(struct radeon_device *rdev,
r = radeon_gem_init(rdev); r = radeon_gem_init(rdev);
if (r) if (r)
return r; return r;
/* initialize vm here */
mutex_init(&rdev->vm_manager.lock);
/* Adjust VM size here. /* Adjust VM size here.
* Currently set to 4GB ((1 << 20) 4k pages). * Currently set to 4GB ((1 << 20) 4k pages).
* Max GPUVM size for cayman and SI is 40 bits. * Max GPUVM size for cayman and SI is 40 bits.
*/ */
rdev->vm_manager.max_pfn = 1 << 20; rdev->vm_manager.max_pfn = 1 << 20;
INIT_LIST_HEAD(&rdev->vm_manager.lru_vm);
/* Set asic functions */ /* Set asic functions */
r = radeon_asic_init(rdev); r = radeon_asic_init(rdev);
...@@ -1445,10 +1443,9 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon) ...@@ -1445,10 +1443,9 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon)
/* evict vram memory */ /* evict vram memory */
radeon_bo_evict_vram(rdev); radeon_bo_evict_vram(rdev);
mutex_lock(&rdev->ring_lock);
/* wait for gpu to finish processing current batch */ /* wait for gpu to finish processing current batch */
for (i = 0; i < RADEON_NUM_RINGS; i++) { for (i = 0; i < RADEON_NUM_RINGS; i++) {
r = radeon_fence_wait_empty_locked(rdev, i); r = radeon_fence_wait_empty(rdev, i);
if (r) { if (r) {
/* delay GPU reset to resume */ /* delay GPU reset to resume */
force_completion = true; force_completion = true;
...@@ -1457,7 +1454,6 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon) ...@@ -1457,7 +1454,6 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon)
if (force_completion) { if (force_completion) {
radeon_fence_driver_force_completion(rdev); radeon_fence_driver_force_completion(rdev);
} }
mutex_unlock(&rdev->ring_lock);
radeon_save_bios_scratch_regs(rdev); radeon_save_bios_scratch_regs(rdev);
......
...@@ -79,9 +79,10 @@ ...@@ -79,9 +79,10 @@
* 2.35.0 - Add CIK macrotile mode array query * 2.35.0 - Add CIK macrotile mode array query
* 2.36.0 - Fix CIK DCE tiling setup * 2.36.0 - Fix CIK DCE tiling setup
* 2.37.0 - allow GS ring setup on r6xx/r7xx * 2.37.0 - allow GS ring setup on r6xx/r7xx
* 2.38.0 - RADEON_GEM_OP (GET_INITIAL_DOMAIN, SET_INITIAL_DOMAIN)
*/ */
#define KMS_DRIVER_MAJOR 2 #define KMS_DRIVER_MAJOR 2
#define KMS_DRIVER_MINOR 37 #define KMS_DRIVER_MINOR 38
#define KMS_DRIVER_PATCHLEVEL 0 #define KMS_DRIVER_PATCHLEVEL 0
int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
int radeon_driver_unload_kms(struct drm_device *dev); int radeon_driver_unload_kms(struct drm_device *dev);
......
...@@ -288,7 +288,6 @@ static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq) ...@@ -288,7 +288,6 @@ static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq)
* @rdev: radeon device pointer * @rdev: radeon device pointer
* @target_seq: sequence number(s) we want to wait for * @target_seq: sequence number(s) we want to wait for
* @intr: use interruptable sleep * @intr: use interruptable sleep
* @lock_ring: whether the ring should be locked or not
* *
* Wait for the requested sequence number(s) to be written by any ring * Wait for the requested sequence number(s) to be written by any ring
* (all asics). Sequnce number array is indexed by ring id. * (all asics). Sequnce number array is indexed by ring id.
...@@ -299,7 +298,7 @@ static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq) ...@@ -299,7 +298,7 @@ static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq)
* -EDEADLK is returned when a GPU lockup has been detected. * -EDEADLK is returned when a GPU lockup has been detected.
*/ */
static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq, static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq,
bool intr, bool lock_ring) bool intr)
{ {
uint64_t last_seq[RADEON_NUM_RINGS]; uint64_t last_seq[RADEON_NUM_RINGS];
bool signaled; bool signaled;
...@@ -358,9 +357,6 @@ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq, ...@@ -358,9 +357,6 @@ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq,
if (i != RADEON_NUM_RINGS) if (i != RADEON_NUM_RINGS)
continue; continue;
if (lock_ring)
mutex_lock(&rdev->ring_lock);
for (i = 0; i < RADEON_NUM_RINGS; ++i) { for (i = 0; i < RADEON_NUM_RINGS; ++i) {
if (!target_seq[i]) if (!target_seq[i])
continue; continue;
...@@ -378,14 +374,9 @@ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq, ...@@ -378,14 +374,9 @@ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq,
/* remember that we need an reset */ /* remember that we need an reset */
rdev->needs_reset = true; rdev->needs_reset = true;
if (lock_ring)
mutex_unlock(&rdev->ring_lock);
wake_up_all(&rdev->fence_queue); wake_up_all(&rdev->fence_queue);
return -EDEADLK; return -EDEADLK;
} }
if (lock_ring)
mutex_unlock(&rdev->ring_lock);
} }
} }
return 0; return 0;
...@@ -416,7 +407,7 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr) ...@@ -416,7 +407,7 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr)
if (seq[fence->ring] == RADEON_FENCE_SIGNALED_SEQ) if (seq[fence->ring] == RADEON_FENCE_SIGNALED_SEQ)
return 0; return 0;
r = radeon_fence_wait_seq(fence->rdev, seq, intr, true); r = radeon_fence_wait_seq(fence->rdev, seq, intr);
if (r) if (r)
return r; return r;
...@@ -464,7 +455,7 @@ int radeon_fence_wait_any(struct radeon_device *rdev, ...@@ -464,7 +455,7 @@ int radeon_fence_wait_any(struct radeon_device *rdev,
if (num_rings == 0) if (num_rings == 0)
return -ENOENT; return -ENOENT;
r = radeon_fence_wait_seq(rdev, seq, intr, true); r = radeon_fence_wait_seq(rdev, seq, intr);
if (r) { if (r) {
return r; return r;
} }
...@@ -472,37 +463,7 @@ int radeon_fence_wait_any(struct radeon_device *rdev, ...@@ -472,37 +463,7 @@ int radeon_fence_wait_any(struct radeon_device *rdev,
} }
/** /**
* radeon_fence_wait_locked - wait for a fence to signal * radeon_fence_wait_next - wait for the next fence to signal
*
* @fence: radeon fence object
*
* Wait for the requested fence to signal (all asics).
* Returns 0 if the fence has passed, error for all other cases.
*/
int radeon_fence_wait_locked(struct radeon_fence *fence)
{
uint64_t seq[RADEON_NUM_RINGS] = {};
int r;
if (fence == NULL) {
WARN(1, "Querying an invalid fence : %p !\n", fence);
return -EINVAL;
}
seq[fence->ring] = fence->seq;
if (seq[fence->ring] == RADEON_FENCE_SIGNALED_SEQ)
return 0;
r = radeon_fence_wait_seq(fence->rdev, seq, false, false);
if (r)
return r;
fence->seq = RADEON_FENCE_SIGNALED_SEQ;
return 0;
}
/**
* radeon_fence_wait_next_locked - wait for the next fence to signal
* *
* @rdev: radeon device pointer * @rdev: radeon device pointer
* @ring: ring index the fence is associated with * @ring: ring index the fence is associated with
...@@ -511,7 +472,7 @@ int radeon_fence_wait_locked(struct radeon_fence *fence) ...@@ -511,7 +472,7 @@ int radeon_fence_wait_locked(struct radeon_fence *fence)
* Returns 0 if the next fence has passed, error for all other cases. * Returns 0 if the next fence has passed, error for all other cases.
* Caller must hold ring lock. * Caller must hold ring lock.
*/ */
int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring) int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
{ {
uint64_t seq[RADEON_NUM_RINGS] = {}; uint64_t seq[RADEON_NUM_RINGS] = {};
...@@ -521,11 +482,11 @@ int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring) ...@@ -521,11 +482,11 @@ int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring)
already the last emited fence */ already the last emited fence */
return -ENOENT; return -ENOENT;
} }
return radeon_fence_wait_seq(rdev, seq, false, false); return radeon_fence_wait_seq(rdev, seq, false);
} }
/** /**
* radeon_fence_wait_empty_locked - wait for all fences to signal * radeon_fence_wait_empty - wait for all fences to signal
* *
* @rdev: radeon device pointer * @rdev: radeon device pointer
* @ring: ring index the fence is associated with * @ring: ring index the fence is associated with
...@@ -534,7 +495,7 @@ int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring) ...@@ -534,7 +495,7 @@ int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring)
* Returns 0 if the fences have passed, error for all other cases. * Returns 0 if the fences have passed, error for all other cases.
* Caller must hold ring lock. * Caller must hold ring lock.
*/ */
int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring) int radeon_fence_wait_empty(struct radeon_device *rdev, int ring)
{ {
uint64_t seq[RADEON_NUM_RINGS] = {}; uint64_t seq[RADEON_NUM_RINGS] = {};
int r; int r;
...@@ -543,7 +504,7 @@ int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring) ...@@ -543,7 +504,7 @@ int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring)
if (!seq[ring]) if (!seq[ring])
return 0; return 0;
r = radeon_fence_wait_seq(rdev, seq, false, false); r = radeon_fence_wait_seq(rdev, seq, false);
if (r) { if (r) {
if (r == -EDEADLK) if (r == -EDEADLK)
return -EDEADLK; return -EDEADLK;
...@@ -794,7 +755,7 @@ void radeon_fence_driver_fini(struct radeon_device *rdev) ...@@ -794,7 +755,7 @@ void radeon_fence_driver_fini(struct radeon_device *rdev)
for (ring = 0; ring < RADEON_NUM_RINGS; ring++) { for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
if (!rdev->fence_drv[ring].initialized) if (!rdev->fence_drv[ring].initialized)
continue; continue;
r = radeon_fence_wait_empty_locked(rdev, ring); r = radeon_fence_wait_empty(rdev, ring);
if (r) { if (r) {
/* no need to trigger GPU reset as we are unloading */ /* no need to trigger GPU reset as we are unloading */
radeon_fence_driver_force_completion(rdev); radeon_fence_driver_force_completion(rdev);
......
...@@ -344,18 +344,7 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, ...@@ -344,18 +344,7 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data,
} }
robj = gem_to_radeon_bo(gobj); robj = gem_to_radeon_bo(gobj);
r = radeon_bo_wait(robj, &cur_placement, true); r = radeon_bo_wait(robj, &cur_placement, true);
switch (cur_placement) { args->domain = radeon_mem_type_to_domain(cur_placement);
case TTM_PL_VRAM:
args->domain = RADEON_GEM_DOMAIN_VRAM;
break;
case TTM_PL_TT:
args->domain = RADEON_GEM_DOMAIN_GTT;
break;
case TTM_PL_SYSTEM:
args->domain = RADEON_GEM_DOMAIN_CPU;
default:
break;
}
drm_gem_object_unreference_unlocked(gobj); drm_gem_object_unreference_unlocked(gobj);
r = radeon_gem_handle_lockup(rdev, r); r = radeon_gem_handle_lockup(rdev, r);
return r; return r;
...@@ -533,6 +522,42 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data, ...@@ -533,6 +522,42 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data,
return r; return r;
} }
int radeon_gem_op_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
struct drm_radeon_gem_op *args = data;
struct drm_gem_object *gobj;
struct radeon_bo *robj;
int r;
gobj = drm_gem_object_lookup(dev, filp, args->handle);
if (gobj == NULL) {
return -ENOENT;
}
robj = gem_to_radeon_bo(gobj);
r = radeon_bo_reserve(robj, false);
if (unlikely(r))
goto out;
switch (args->op) {
case RADEON_GEM_OP_GET_INITIAL_DOMAIN:
args->value = robj->initial_domain;
break;
case RADEON_GEM_OP_SET_INITIAL_DOMAIN:
robj->initial_domain = args->value & (RADEON_GEM_DOMAIN_VRAM |
RADEON_GEM_DOMAIN_GTT |
RADEON_GEM_DOMAIN_CPU);
break;
default:
r = -EINVAL;
}
radeon_bo_unreserve(robj);
out:
drm_gem_object_unreference_unlocked(gobj);
return r;
}
int radeon_mode_dumb_create(struct drm_file *file_priv, int radeon_mode_dumb_create(struct drm_file *file_priv,
struct drm_device *dev, struct drm_device *dev,
struct drm_mode_create_dumb *args) struct drm_mode_create_dumb *args)
......
...@@ -486,6 +486,21 @@ static int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file ...@@ -486,6 +486,21 @@ static int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file
case RADEON_INFO_VCE_FB_VERSION: case RADEON_INFO_VCE_FB_VERSION:
*value = rdev->vce.fb_version; *value = rdev->vce.fb_version;
break; break;
case RADEON_INFO_NUM_BYTES_MOVED:
value = (uint32_t*)&value64;
value_size = sizeof(uint64_t);
value64 = atomic64_read(&rdev->num_bytes_moved);
break;
case RADEON_INFO_VRAM_USAGE:
value = (uint32_t*)&value64;
value_size = sizeof(uint64_t);
value64 = atomic64_read(&rdev->vram_usage);
break;
case RADEON_INFO_GTT_USAGE:
value = (uint32_t*)&value64;
value_size = sizeof(uint64_t);
value64 = atomic64_read(&rdev->gtt_usage);
break;
default: default:
DRM_DEBUG_KMS("Invalid request %d\n", info->request); DRM_DEBUG_KMS("Invalid request %d\n", info->request);
return -EINVAL; return -EINVAL;
...@@ -544,7 +559,13 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) ...@@ -544,7 +559,13 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
return -ENOMEM; return -ENOMEM;
} }
radeon_vm_init(rdev, &fpriv->vm); r = radeon_vm_init(rdev, &fpriv->vm);
if (r)
return r;
r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false);
if (r)
return r;
/* map the ib pool buffer read only into /* map the ib pool buffer read only into
* virtual address space */ * virtual address space */
...@@ -553,6 +574,8 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) ...@@ -553,6 +574,8 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
r = radeon_vm_bo_set_addr(rdev, bo_va, RADEON_VA_IB_OFFSET, r = radeon_vm_bo_set_addr(rdev, bo_va, RADEON_VA_IB_OFFSET,
RADEON_VM_PAGE_READABLE | RADEON_VM_PAGE_READABLE |
RADEON_VM_PAGE_SNOOPED); RADEON_VM_PAGE_SNOOPED);
radeon_bo_unreserve(rdev->ring_tmp_bo.bo);
if (r) { if (r) {
radeon_vm_fini(rdev, &fpriv->vm); radeon_vm_fini(rdev, &fpriv->vm);
kfree(fpriv); kfree(fpriv);
...@@ -814,5 +837,6 @@ const struct drm_ioctl_desc radeon_ioctls_kms[] = { ...@@ -814,5 +837,6 @@ const struct drm_ioctl_desc radeon_ioctls_kms[] = {
DRM_IOCTL_DEF_DRV(RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(RADEON_GEM_OP, radeon_gem_op_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
}; };
int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms); int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms);
...@@ -56,11 +56,36 @@ static void radeon_bo_clear_va(struct radeon_bo *bo) ...@@ -56,11 +56,36 @@ static void radeon_bo_clear_va(struct radeon_bo *bo)
} }
} }
static void radeon_update_memory_usage(struct radeon_bo *bo,
unsigned mem_type, int sign)
{
struct radeon_device *rdev = bo->rdev;
u64 size = (u64)bo->tbo.num_pages << PAGE_SHIFT;
switch (mem_type) {
case TTM_PL_TT:
if (sign > 0)
atomic64_add(size, &rdev->gtt_usage);
else
atomic64_sub(size, &rdev->gtt_usage);
break;
case TTM_PL_VRAM:
if (sign > 0)
atomic64_add(size, &rdev->vram_usage);
else
atomic64_sub(size, &rdev->vram_usage);
break;
}
}
static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo) static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
{ {
struct radeon_bo *bo; struct radeon_bo *bo;
bo = container_of(tbo, struct radeon_bo, tbo); bo = container_of(tbo, struct radeon_bo, tbo);
radeon_update_memory_usage(bo, bo->tbo.mem.mem_type, -1);
mutex_lock(&bo->rdev->gem.mutex); mutex_lock(&bo->rdev->gem.mutex);
list_del_init(&bo->list); list_del_init(&bo->list);
mutex_unlock(&bo->rdev->gem.mutex); mutex_unlock(&bo->rdev->gem.mutex);
...@@ -145,6 +170,9 @@ int radeon_bo_create(struct radeon_device *rdev, ...@@ -145,6 +170,9 @@ int radeon_bo_create(struct radeon_device *rdev,
bo->surface_reg = -1; bo->surface_reg = -1;
INIT_LIST_HEAD(&bo->list); INIT_LIST_HEAD(&bo->list);
INIT_LIST_HEAD(&bo->va); INIT_LIST_HEAD(&bo->va);
bo->initial_domain = domain & (RADEON_GEM_DOMAIN_VRAM |
RADEON_GEM_DOMAIN_GTT |
RADEON_GEM_DOMAIN_CPU);
radeon_ttm_placement_from_domain(bo, domain); radeon_ttm_placement_from_domain(bo, domain);
/* Kernel allocation are uninterruptible */ /* Kernel allocation are uninterruptible */
down_read(&rdev->pm.mclk_lock); down_read(&rdev->pm.mclk_lock);
...@@ -338,39 +366,105 @@ void radeon_bo_fini(struct radeon_device *rdev) ...@@ -338,39 +366,105 @@ void radeon_bo_fini(struct radeon_device *rdev)
arch_phys_wc_del(rdev->mc.vram_mtrr); arch_phys_wc_del(rdev->mc.vram_mtrr);
} }
void radeon_bo_list_add_object(struct radeon_bo_list *lobj, /* Returns how many bytes TTM can move per IB.
struct list_head *head) */
static u64 radeon_bo_get_threshold_for_moves(struct radeon_device *rdev)
{ {
if (lobj->written) { u64 real_vram_size = rdev->mc.real_vram_size;
list_add(&lobj->tv.head, head); u64 vram_usage = atomic64_read(&rdev->vram_usage);
} else {
list_add_tail(&lobj->tv.head, head); /* This function is based on the current VRAM usage.
} *
* - If all of VRAM is free, allow relocating the number of bytes that
* is equal to 1/4 of the size of VRAM for this IB.
* - If more than one half of VRAM is occupied, only allow relocating
* 1 MB of data for this IB.
*
* - From 0 to one half of used VRAM, the threshold decreases
* linearly.
* __________________
* 1/4 of -|\ |
* VRAM | \ |
* | \ |
* | \ |
* | \ |
* | \ |
* | \ |
* | \________|1 MB
* |----------------|
* VRAM 0 % 100 %
* used used
*
* Note: It's a threshold, not a limit. The threshold must be crossed
* for buffer relocations to stop, so any buffer of an arbitrary size
* can be moved as long as the threshold isn't crossed before
* the relocation takes place. We don't want to disable buffer
* relocations completely.
*
* The idea is that buffers should be placed in VRAM at creation time
* and TTM should only do a minimum number of relocations during
* command submission. In practice, you need to submit at least
* a dozen IBs to move all buffers to VRAM if they are in GTT.
*
* Also, things can get pretty crazy under memory pressure and actual
* VRAM usage can change a lot, so playing safe even at 50% does
* consistently increase performance.
*/
u64 half_vram = real_vram_size >> 1;
u64 half_free_vram = vram_usage >= half_vram ? 0 : half_vram - vram_usage;
u64 bytes_moved_threshold = half_free_vram >> 1;
return max(bytes_moved_threshold, 1024*1024ull);
} }
int radeon_bo_list_validate(struct ww_acquire_ctx *ticket, int radeon_bo_list_validate(struct radeon_device *rdev,
struct ww_acquire_ctx *ticket,
struct list_head *head, int ring) struct list_head *head, int ring)
{ {
struct radeon_bo_list *lobj; struct radeon_cs_reloc *lobj;
struct radeon_bo *bo; struct radeon_bo *bo;
u32 domain;
int r; int r;
u64 bytes_moved = 0, initial_bytes_moved;
u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev);
r = ttm_eu_reserve_buffers(ticket, head); r = ttm_eu_reserve_buffers(ticket, head);
if (unlikely(r != 0)) { if (unlikely(r != 0)) {
return r; return r;
} }
list_for_each_entry(lobj, head, tv.head) { list_for_each_entry(lobj, head, tv.head) {
bo = lobj->bo; bo = lobj->robj;
if (!bo->pin_count) { if (!bo->pin_count) {
domain = lobj->domain; u32 domain = lobj->domain;
u32 current_domain =
radeon_mem_type_to_domain(bo->tbo.mem.mem_type);
/* Check if this buffer will be moved and don't move it
* if we have moved too many buffers for this IB already.
*
* Note that this allows moving at least one buffer of
* any size, because it doesn't take the current "bo"
* into account. We don't want to disallow buffer moves
* completely.
*/
if (current_domain != RADEON_GEM_DOMAIN_CPU &&
(domain & current_domain) == 0 && /* will be moved */
bytes_moved > bytes_moved_threshold) {
/* don't move it */
domain = current_domain;
}
retry: retry:
radeon_ttm_placement_from_domain(bo, domain); radeon_ttm_placement_from_domain(bo, domain);
if (ring == R600_RING_TYPE_UVD_INDEX) if (ring == R600_RING_TYPE_UVD_INDEX)
radeon_uvd_force_into_uvd_segment(bo); radeon_uvd_force_into_uvd_segment(bo);
r = ttm_bo_validate(&bo->tbo, &bo->placement,
true, false); initial_bytes_moved = atomic64_read(&rdev->num_bytes_moved);
r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
bytes_moved += atomic64_read(&rdev->num_bytes_moved) -
initial_bytes_moved;
if (unlikely(r)) { if (unlikely(r)) {
if (r != -ERESTARTSYS && domain != lobj->alt_domain) { if (r != -ERESTARTSYS && domain != lobj->alt_domain) {
domain = lobj->alt_domain; domain = lobj->alt_domain;
...@@ -564,14 +658,23 @@ int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved, ...@@ -564,14 +658,23 @@ int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
} }
void radeon_bo_move_notify(struct ttm_buffer_object *bo, void radeon_bo_move_notify(struct ttm_buffer_object *bo,
struct ttm_mem_reg *mem) struct ttm_mem_reg *new_mem)
{ {
struct radeon_bo *rbo; struct radeon_bo *rbo;
if (!radeon_ttm_bo_is_radeon_bo(bo)) if (!radeon_ttm_bo_is_radeon_bo(bo))
return; return;
rbo = container_of(bo, struct radeon_bo, tbo); rbo = container_of(bo, struct radeon_bo, tbo);
radeon_bo_check_tiling(rbo, 0, 1); radeon_bo_check_tiling(rbo, 0, 1);
radeon_vm_bo_invalidate(rbo->rdev, rbo); radeon_vm_bo_invalidate(rbo->rdev, rbo);
/* update statistics */
if (!new_mem)
return;
radeon_update_memory_usage(rbo, bo->mem.mem_type, -1);
radeon_update_memory_usage(rbo, new_mem->mem_type, 1);
} }
int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo) int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
......
...@@ -138,9 +138,8 @@ extern int radeon_bo_evict_vram(struct radeon_device *rdev); ...@@ -138,9 +138,8 @@ extern int radeon_bo_evict_vram(struct radeon_device *rdev);
extern void radeon_bo_force_delete(struct radeon_device *rdev); extern void radeon_bo_force_delete(struct radeon_device *rdev);
extern int radeon_bo_init(struct radeon_device *rdev); extern int radeon_bo_init(struct radeon_device *rdev);
extern void radeon_bo_fini(struct radeon_device *rdev); extern void radeon_bo_fini(struct radeon_device *rdev);
extern void radeon_bo_list_add_object(struct radeon_bo_list *lobj, extern int radeon_bo_list_validate(struct radeon_device *rdev,
struct list_head *head); struct ww_acquire_ctx *ticket,
extern int radeon_bo_list_validate(struct ww_acquire_ctx *ticket,
struct list_head *head, int ring); struct list_head *head, int ring);
extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo, extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo,
struct vm_area_struct *vma); struct vm_area_struct *vma);
...@@ -151,7 +150,7 @@ extern void radeon_bo_get_tiling_flags(struct radeon_bo *bo, ...@@ -151,7 +150,7 @@ extern void radeon_bo_get_tiling_flags(struct radeon_bo *bo,
extern int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved, extern int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
bool force_drop); bool force_drop);
extern void radeon_bo_move_notify(struct ttm_buffer_object *bo, extern void radeon_bo_move_notify(struct ttm_buffer_object *bo,
struct ttm_mem_reg *mem); struct ttm_mem_reg *new_mem);
extern int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo); extern int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
extern int radeon_bo_get_surface_reg(struct radeon_bo *bo); extern int radeon_bo_get_surface_reg(struct radeon_bo *bo);
...@@ -181,7 +180,7 @@ extern int radeon_sa_bo_manager_suspend(struct radeon_device *rdev, ...@@ -181,7 +180,7 @@ extern int radeon_sa_bo_manager_suspend(struct radeon_device *rdev,
extern int radeon_sa_bo_new(struct radeon_device *rdev, extern int radeon_sa_bo_new(struct radeon_device *rdev,
struct radeon_sa_manager *sa_manager, struct radeon_sa_manager *sa_manager,
struct radeon_sa_bo **sa_bo, struct radeon_sa_bo **sa_bo,
unsigned size, unsigned align, bool block); unsigned size, unsigned align);
extern void radeon_sa_bo_free(struct radeon_device *rdev, extern void radeon_sa_bo_free(struct radeon_device *rdev,
struct radeon_sa_bo **sa_bo, struct radeon_sa_bo **sa_bo,
struct radeon_fence *fence); struct radeon_fence *fence);
......
...@@ -260,7 +260,7 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev) ...@@ -260,7 +260,7 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev)
if (!ring->ready) { if (!ring->ready) {
continue; continue;
} }
r = radeon_fence_wait_empty_locked(rdev, i); r = radeon_fence_wait_empty(rdev, i);
if (r) { if (r) {
/* needs a GPU reset dont reset here */ /* needs a GPU reset dont reset here */
mutex_unlock(&rdev->ring_lock); mutex_unlock(&rdev->ring_lock);
...@@ -896,7 +896,7 @@ static void radeon_dpm_change_power_state_locked(struct radeon_device *rdev) ...@@ -896,7 +896,7 @@ static void radeon_dpm_change_power_state_locked(struct radeon_device *rdev)
for (i = 0; i < RADEON_NUM_RINGS; i++) { for (i = 0; i < RADEON_NUM_RINGS; i++) {
struct radeon_ring *ring = &rdev->ring[i]; struct radeon_ring *ring = &rdev->ring[i];
if (ring->ready) if (ring->ready)
radeon_fence_wait_empty_locked(rdev, i); radeon_fence_wait_empty(rdev, i);
} }
/* program the new power state */ /* program the new power state */
...@@ -943,8 +943,6 @@ void radeon_dpm_enable_uvd(struct radeon_device *rdev, bool enable) ...@@ -943,8 +943,6 @@ void radeon_dpm_enable_uvd(struct radeon_device *rdev, bool enable)
if (enable) { if (enable) {
mutex_lock(&rdev->pm.mutex); mutex_lock(&rdev->pm.mutex);
rdev->pm.dpm.uvd_active = true; rdev->pm.dpm.uvd_active = true;
/* disable this for now */
#if 0
if ((rdev->pm.dpm.sd == 1) && (rdev->pm.dpm.hd == 0)) if ((rdev->pm.dpm.sd == 1) && (rdev->pm.dpm.hd == 0))
dpm_state = POWER_STATE_TYPE_INTERNAL_UVD_SD; dpm_state = POWER_STATE_TYPE_INTERNAL_UVD_SD;
else if ((rdev->pm.dpm.sd == 2) && (rdev->pm.dpm.hd == 0)) else if ((rdev->pm.dpm.sd == 2) && (rdev->pm.dpm.hd == 0))
...@@ -954,7 +952,6 @@ void radeon_dpm_enable_uvd(struct radeon_device *rdev, bool enable) ...@@ -954,7 +952,6 @@ void radeon_dpm_enable_uvd(struct radeon_device *rdev, bool enable)
else if ((rdev->pm.dpm.sd == 0) && (rdev->pm.dpm.hd == 2)) else if ((rdev->pm.dpm.sd == 0) && (rdev->pm.dpm.hd == 2))
dpm_state = POWER_STATE_TYPE_INTERNAL_UVD_HD2; dpm_state = POWER_STATE_TYPE_INTERNAL_UVD_HD2;
else else
#endif
dpm_state = POWER_STATE_TYPE_INTERNAL_UVD; dpm_state = POWER_STATE_TYPE_INTERNAL_UVD;
rdev->pm.dpm.state = dpm_state; rdev->pm.dpm.state = dpm_state;
mutex_unlock(&rdev->pm.mutex); mutex_unlock(&rdev->pm.mutex);
......
...@@ -63,7 +63,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, ...@@ -63,7 +63,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring,
{ {
int r; int r;
r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256, true); r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256);
if (r) { if (r) {
dev_err(rdev->dev, "failed to get a new IB (%d)\n", r); dev_err(rdev->dev, "failed to get a new IB (%d)\n", r);
return r; return r;
...@@ -145,6 +145,13 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, ...@@ -145,6 +145,13 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
return r; return r;
} }
/* grab a vm id if necessary */
if (ib->vm) {
struct radeon_fence *vm_id_fence;
vm_id_fence = radeon_vm_grab_id(rdev, ib->vm, ib->ring);
radeon_semaphore_sync_to(ib->semaphore, vm_id_fence);
}
/* sync with other rings */ /* sync with other rings */
r = radeon_semaphore_sync_rings(rdev, ib->semaphore, ib->ring); r = radeon_semaphore_sync_rings(rdev, ib->semaphore, ib->ring);
if (r) { if (r) {
...@@ -153,11 +160,9 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, ...@@ -153,11 +160,9 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
return r; return r;
} }
/* if we can't remember our last VM flush then flush now! */ if (ib->vm)
/* XXX figure out why we have to flush for every IB */ radeon_vm_flush(rdev, ib->vm, ib->ring);
if (ib->vm /*&& !ib->vm->last_flush*/) {
radeon_ring_vm_flush(rdev, ib->ring, ib->vm);
}
if (const_ib) { if (const_ib) {
radeon_ring_ib_execute(rdev, const_ib->ring, const_ib); radeon_ring_ib_execute(rdev, const_ib->ring, const_ib);
radeon_semaphore_free(rdev, &const_ib->semaphore, NULL); radeon_semaphore_free(rdev, &const_ib->semaphore, NULL);
...@@ -172,10 +177,10 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, ...@@ -172,10 +177,10 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
if (const_ib) { if (const_ib) {
const_ib->fence = radeon_fence_ref(ib->fence); const_ib->fence = radeon_fence_ref(ib->fence);
} }
/* we just flushed the VM, remember that */
if (ib->vm && !ib->vm->last_flush) { if (ib->vm)
ib->vm->last_flush = radeon_fence_ref(ib->fence); radeon_vm_fence(rdev, ib->vm, ib->fence);
}
radeon_ring_unlock_commit(rdev, ring); radeon_ring_unlock_commit(rdev, ring);
return 0; return 0;
} }
...@@ -382,7 +387,7 @@ int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring *ring, unsi ...@@ -382,7 +387,7 @@ int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring *ring, unsi
if (ndw < ring->ring_free_dw) { if (ndw < ring->ring_free_dw) {
break; break;
} }
r = radeon_fence_wait_next_locked(rdev, ring->idx); r = radeon_fence_wait_next(rdev, ring->idx);
if (r) if (r)
return r; return r;
} }
...@@ -485,8 +490,8 @@ void radeon_ring_unlock_undo(struct radeon_device *rdev, struct radeon_ring *rin ...@@ -485,8 +490,8 @@ void radeon_ring_unlock_undo(struct radeon_device *rdev, struct radeon_ring *rin
void radeon_ring_lockup_update(struct radeon_device *rdev, void radeon_ring_lockup_update(struct radeon_device *rdev,
struct radeon_ring *ring) struct radeon_ring *ring)
{ {
ring->last_rptr = radeon_ring_get_rptr(rdev, ring); atomic_set(&ring->last_rptr, radeon_ring_get_rptr(rdev, ring));
ring->last_activity = jiffies; atomic64_set(&ring->last_activity, jiffies_64);
} }
/** /**
...@@ -498,22 +503,19 @@ void radeon_ring_lockup_update(struct radeon_device *rdev, ...@@ -498,22 +503,19 @@ void radeon_ring_lockup_update(struct radeon_device *rdev,
bool radeon_ring_test_lockup(struct radeon_device *rdev, struct radeon_ring *ring) bool radeon_ring_test_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
{ {
uint32_t rptr = radeon_ring_get_rptr(rdev, ring); uint32_t rptr = radeon_ring_get_rptr(rdev, ring);
unsigned long cjiffies, elapsed; uint64_t last = atomic64_read(&ring->last_activity);
uint64_t elapsed;
cjiffies = jiffies; if (rptr != atomic_read(&ring->last_rptr)) {
if (!time_after(cjiffies, ring->last_activity)) { /* ring is still working, no lockup */
/* likely a wrap around */
radeon_ring_lockup_update(rdev, ring); radeon_ring_lockup_update(rdev, ring);
return false; return false;
} }
if (rptr != ring->last_rptr) {
/* CP is still working no lockup */ elapsed = jiffies_to_msecs(jiffies_64 - last);
radeon_ring_lockup_update(rdev, ring);
return false;
}
elapsed = jiffies_to_msecs(cjiffies - ring->last_activity);
if (radeon_lockup_timeout && elapsed >= radeon_lockup_timeout) { if (radeon_lockup_timeout && elapsed >= radeon_lockup_timeout) {
dev_err(rdev->dev, "GPU lockup CP stall for more than %lumsec\n", elapsed); dev_err(rdev->dev, "ring %d stalled for more than %llumsec\n",
ring->idx, elapsed);
return true; return true;
} }
/* give a chance to the GPU ... */ /* give a chance to the GPU ... */
......
...@@ -312,7 +312,7 @@ static bool radeon_sa_bo_next_hole(struct radeon_sa_manager *sa_manager, ...@@ -312,7 +312,7 @@ static bool radeon_sa_bo_next_hole(struct radeon_sa_manager *sa_manager,
int radeon_sa_bo_new(struct radeon_device *rdev, int radeon_sa_bo_new(struct radeon_device *rdev,
struct radeon_sa_manager *sa_manager, struct radeon_sa_manager *sa_manager,
struct radeon_sa_bo **sa_bo, struct radeon_sa_bo **sa_bo,
unsigned size, unsigned align, bool block) unsigned size, unsigned align)
{ {
struct radeon_fence *fences[RADEON_NUM_RINGS]; struct radeon_fence *fences[RADEON_NUM_RINGS];
unsigned tries[RADEON_NUM_RINGS]; unsigned tries[RADEON_NUM_RINGS];
...@@ -353,14 +353,11 @@ int radeon_sa_bo_new(struct radeon_device *rdev, ...@@ -353,14 +353,11 @@ int radeon_sa_bo_new(struct radeon_device *rdev,
r = radeon_fence_wait_any(rdev, fences, false); r = radeon_fence_wait_any(rdev, fences, false);
spin_lock(&sa_manager->wq.lock); spin_lock(&sa_manager->wq.lock);
/* if we have nothing to wait for block */ /* if we have nothing to wait for block */
if (r == -ENOENT && block) { if (r == -ENOENT) {
r = wait_event_interruptible_locked( r = wait_event_interruptible_locked(
sa_manager->wq, sa_manager->wq,
radeon_sa_event(sa_manager, size, align) radeon_sa_event(sa_manager, size, align)
); );
} else if (r == -ENOENT) {
r = -ENOMEM;
} }
} while (!r); } while (!r);
......
...@@ -42,7 +42,7 @@ int radeon_semaphore_create(struct radeon_device *rdev, ...@@ -42,7 +42,7 @@ int radeon_semaphore_create(struct radeon_device *rdev,
return -ENOMEM; return -ENOMEM;
} }
r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &(*semaphore)->sa_bo, r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &(*semaphore)->sa_bo,
8 * RADEON_NUM_SYNCS, 8, true); 8 * RADEON_NUM_SYNCS, 8);
if (r) { if (r) {
kfree(*semaphore); kfree(*semaphore);
*semaphore = NULL; *semaphore = NULL;
...@@ -147,7 +147,9 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev, ...@@ -147,7 +147,9 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev,
if (++count > RADEON_NUM_SYNCS) { if (++count > RADEON_NUM_SYNCS) {
/* not enough room, wait manually */ /* not enough room, wait manually */
radeon_fence_wait_locked(fence); r = radeon_fence_wait(fence, false);
if (r)
return r;
continue; continue;
} }
...@@ -161,7 +163,9 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev, ...@@ -161,7 +163,9 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev,
if (!radeon_semaphore_emit_signal(rdev, i, semaphore)) { if (!radeon_semaphore_emit_signal(rdev, i, semaphore)) {
/* signaling wasn't successful wait manually */ /* signaling wasn't successful wait manually */
radeon_ring_undo(&rdev->ring[i]); radeon_ring_undo(&rdev->ring[i]);
radeon_fence_wait_locked(fence); r = radeon_fence_wait(fence, false);
if (r)
return r;
continue; continue;
} }
...@@ -169,7 +173,9 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev, ...@@ -169,7 +173,9 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev,
if (!radeon_semaphore_emit_wait(rdev, ring, semaphore)) { if (!radeon_semaphore_emit_wait(rdev, ring, semaphore)) {
/* waiting wasn't successful wait manually */ /* waiting wasn't successful wait manually */
radeon_ring_undo(&rdev->ring[i]); radeon_ring_undo(&rdev->ring[i]);
radeon_fence_wait_locked(fence); r = radeon_fence_wait(fence, false);
if (r)
return r;
continue; continue;
} }
......
...@@ -406,8 +406,14 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, ...@@ -406,8 +406,14 @@ static int radeon_bo_move(struct ttm_buffer_object *bo,
if (r) { if (r) {
memcpy: memcpy:
r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem); r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
if (r) {
return r;
}
} }
return r;
/* update statistics */
atomic64_add((u64)bo->num_pages << PAGE_SHIFT, &rdev->num_bytes_moved);
return 0;
} }
static int radeon_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem) static int radeon_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
......
...@@ -453,7 +453,7 @@ static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p, ...@@ -453,7 +453,7 @@ static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p,
} }
reloc = p->relocs_ptr[(idx / 4)]; reloc = p->relocs_ptr[(idx / 4)];
start = reloc->lobj.gpu_offset; start = reloc->gpu_offset;
end = start + radeon_bo_size(reloc->robj); end = start + radeon_bo_size(reloc->robj);
start += offset; start += offset;
...@@ -805,8 +805,7 @@ void radeon_uvd_note_usage(struct radeon_device *rdev) ...@@ -805,8 +805,7 @@ void radeon_uvd_note_usage(struct radeon_device *rdev)
(rdev->pm.dpm.hd != hd)) { (rdev->pm.dpm.hd != hd)) {
rdev->pm.dpm.sd = sd; rdev->pm.dpm.sd = sd;
rdev->pm.dpm.hd = hd; rdev->pm.dpm.hd = hd;
/* disable this for now */ streams_changed = true;
/*streams_changed = true;*/
} }
} }
......
...@@ -119,7 +119,7 @@ int radeon_vce_init(struct radeon_device *rdev) ...@@ -119,7 +119,7 @@ int radeon_vce_init(struct radeon_device *rdev)
if (rdev->vce.fw_version != ((40 << 24) | (2 << 16) | (2 << 8))) if (rdev->vce.fw_version != ((40 << 24) | (2 << 16) | (2 << 8)))
return -EINVAL; return -EINVAL;
/* load firmware into VRAM */ /* allocate firmware, stack and heap BO */
size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size) + size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size) +
RADEON_VCE_STACK_SIZE + RADEON_VCE_HEAP_SIZE; RADEON_VCE_STACK_SIZE + RADEON_VCE_HEAP_SIZE;
...@@ -130,16 +130,21 @@ int radeon_vce_init(struct radeon_device *rdev) ...@@ -130,16 +130,21 @@ int radeon_vce_init(struct radeon_device *rdev)
return r; return r;
} }
r = radeon_vce_resume(rdev); r = radeon_bo_reserve(rdev->vce.vcpu_bo, false);
if (r) if (r) {
radeon_bo_unref(&rdev->vce.vcpu_bo);
dev_err(rdev->dev, "(%d) failed to reserve VCE bo\n", r);
return r; return r;
}
memset(rdev->vce.cpu_addr, 0, size); r = radeon_bo_pin(rdev->vce.vcpu_bo, RADEON_GEM_DOMAIN_VRAM,
memcpy(rdev->vce.cpu_addr, rdev->vce_fw->data, rdev->vce_fw->size); &rdev->vce.gpu_addr);
radeon_bo_unreserve(rdev->vce.vcpu_bo);
r = radeon_vce_suspend(rdev); if (r) {
if (r) radeon_bo_unref(&rdev->vce.vcpu_bo);
dev_err(rdev->dev, "(%d) VCE bo pin failed\n", r);
return r; return r;
}
for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) { for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) {
atomic_set(&rdev->vce.handles[i], 0); atomic_set(&rdev->vce.handles[i], 0);
...@@ -158,8 +163,12 @@ int radeon_vce_init(struct radeon_device *rdev) ...@@ -158,8 +163,12 @@ int radeon_vce_init(struct radeon_device *rdev)
*/ */
void radeon_vce_fini(struct radeon_device *rdev) void radeon_vce_fini(struct radeon_device *rdev)
{ {
radeon_vce_suspend(rdev); if (rdev->vce.vcpu_bo == NULL)
return;
radeon_bo_unref(&rdev->vce.vcpu_bo); radeon_bo_unref(&rdev->vce.vcpu_bo);
release_firmware(rdev->vce_fw);
} }
/** /**
...@@ -167,22 +176,23 @@ void radeon_vce_fini(struct radeon_device *rdev) ...@@ -167,22 +176,23 @@ void radeon_vce_fini(struct radeon_device *rdev)
* *
* @rdev: radeon_device pointer * @rdev: radeon_device pointer
* *
* TODO: Test VCE suspend/resume
*/ */
int radeon_vce_suspend(struct radeon_device *rdev) int radeon_vce_suspend(struct radeon_device *rdev)
{ {
int r; int i;
if (rdev->vce.vcpu_bo == NULL) if (rdev->vce.vcpu_bo == NULL)
return 0; return 0;
r = radeon_bo_reserve(rdev->vce.vcpu_bo, false); for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i)
if (!r) { if (atomic_read(&rdev->vce.handles[i]))
radeon_bo_kunmap(rdev->vce.vcpu_bo); break;
radeon_bo_unpin(rdev->vce.vcpu_bo);
radeon_bo_unreserve(rdev->vce.vcpu_bo); if (i == RADEON_MAX_VCE_HANDLES)
} return 0;
return r;
/* TODO: suspending running encoding sessions isn't supported */
return -EINVAL;
} }
/** /**
...@@ -190,10 +200,10 @@ int radeon_vce_suspend(struct radeon_device *rdev) ...@@ -190,10 +200,10 @@ int radeon_vce_suspend(struct radeon_device *rdev)
* *
* @rdev: radeon_device pointer * @rdev: radeon_device pointer
* *
* TODO: Test VCE suspend/resume
*/ */
int radeon_vce_resume(struct radeon_device *rdev) int radeon_vce_resume(struct radeon_device *rdev)
{ {
void *cpu_addr;
int r; int r;
if (rdev->vce.vcpu_bo == NULL) if (rdev->vce.vcpu_bo == NULL)
...@@ -201,26 +211,21 @@ int radeon_vce_resume(struct radeon_device *rdev) ...@@ -201,26 +211,21 @@ int radeon_vce_resume(struct radeon_device *rdev)
r = radeon_bo_reserve(rdev->vce.vcpu_bo, false); r = radeon_bo_reserve(rdev->vce.vcpu_bo, false);
if (r) { if (r) {
radeon_bo_unref(&rdev->vce.vcpu_bo);
dev_err(rdev->dev, "(%d) failed to reserve VCE bo\n", r); dev_err(rdev->dev, "(%d) failed to reserve VCE bo\n", r);
return r; return r;
} }
r = radeon_bo_pin(rdev->vce.vcpu_bo, RADEON_GEM_DOMAIN_VRAM, r = radeon_bo_kmap(rdev->vce.vcpu_bo, &cpu_addr);
&rdev->vce.gpu_addr);
if (r) { if (r) {
radeon_bo_unreserve(rdev->vce.vcpu_bo); radeon_bo_unreserve(rdev->vce.vcpu_bo);
radeon_bo_unref(&rdev->vce.vcpu_bo);
dev_err(rdev->dev, "(%d) VCE bo pin failed\n", r);
return r;
}
r = radeon_bo_kmap(rdev->vce.vcpu_bo, &rdev->vce.cpu_addr);
if (r) {
dev_err(rdev->dev, "(%d) VCE map failed\n", r); dev_err(rdev->dev, "(%d) VCE map failed\n", r);
return r; return r;
} }
memcpy(cpu_addr, rdev->vce_fw->data, rdev->vce_fw->size);
radeon_bo_kunmap(rdev->vce.vcpu_bo);
radeon_bo_unreserve(rdev->vce.vcpu_bo); radeon_bo_unreserve(rdev->vce.vcpu_bo);
return 0; return 0;
...@@ -456,7 +461,7 @@ int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi) ...@@ -456,7 +461,7 @@ int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi)
return -EINVAL; return -EINVAL;
} }
offset += p->relocs_ptr[(idx / 4)]->lobj.gpu_offset; offset += p->relocs_ptr[(idx / 4)]->gpu_offset;
p->ib.ptr[lo] = offset & 0xFFFFFFFF; p->ib.ptr[lo] = offset & 0xFFFFFFFF;
p->ib.ptr[hi] = offset >> 32; p->ib.ptr[hi] = offset >> 32;
......
此差异已折叠。
...@@ -510,6 +510,7 @@ typedef struct { ...@@ -510,6 +510,7 @@ typedef struct {
#define DRM_RADEON_GEM_GET_TILING 0x29 #define DRM_RADEON_GEM_GET_TILING 0x29
#define DRM_RADEON_GEM_BUSY 0x2a #define DRM_RADEON_GEM_BUSY 0x2a
#define DRM_RADEON_GEM_VA 0x2b #define DRM_RADEON_GEM_VA 0x2b
#define DRM_RADEON_GEM_OP 0x2c
#define DRM_IOCTL_RADEON_CP_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t) #define DRM_IOCTL_RADEON_CP_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t)
#define DRM_IOCTL_RADEON_CP_START DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_CP_START) #define DRM_IOCTL_RADEON_CP_START DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_CP_START)
...@@ -552,6 +553,7 @@ typedef struct { ...@@ -552,6 +553,7 @@ typedef struct {
#define DRM_IOCTL_RADEON_GEM_GET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling) #define DRM_IOCTL_RADEON_GEM_GET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling)
#define DRM_IOCTL_RADEON_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_BUSY, struct drm_radeon_gem_busy) #define DRM_IOCTL_RADEON_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_BUSY, struct drm_radeon_gem_busy)
#define DRM_IOCTL_RADEON_GEM_VA DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_VA, struct drm_radeon_gem_va) #define DRM_IOCTL_RADEON_GEM_VA DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_VA, struct drm_radeon_gem_va)
#define DRM_IOCTL_RADEON_GEM_OP DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_OP, struct drm_radeon_gem_op)
typedef struct drm_radeon_init { typedef struct drm_radeon_init {
enum { enum {
...@@ -884,6 +886,16 @@ struct drm_radeon_gem_pwrite { ...@@ -884,6 +886,16 @@ struct drm_radeon_gem_pwrite {
uint64_t data_ptr; uint64_t data_ptr;
}; };
/* Sets or returns a value associated with a buffer. */
struct drm_radeon_gem_op {
uint32_t handle; /* buffer */
uint32_t op; /* RADEON_GEM_OP_* */
uint64_t value; /* input or return value */
};
#define RADEON_GEM_OP_GET_INITIAL_DOMAIN 0
#define RADEON_GEM_OP_SET_INITIAL_DOMAIN 1
#define RADEON_VA_MAP 1 #define RADEON_VA_MAP 1
#define RADEON_VA_UNMAP 2 #define RADEON_VA_UNMAP 2
...@@ -992,6 +1004,9 @@ struct drm_radeon_cs { ...@@ -992,6 +1004,9 @@ struct drm_radeon_cs {
#define RADEON_INFO_VCE_FW_VERSION 0x1b #define RADEON_INFO_VCE_FW_VERSION 0x1b
/* version of VCE feedback */ /* version of VCE feedback */
#define RADEON_INFO_VCE_FB_VERSION 0x1c #define RADEON_INFO_VCE_FB_VERSION 0x1c
#define RADEON_INFO_NUM_BYTES_MOVED 0x1d
#define RADEON_INFO_VRAM_USAGE 0x1e
#define RADEON_INFO_GTT_USAGE 0x1f
struct drm_radeon_info { struct drm_radeon_info {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册