提交 ee60e29f 编写于 作者: C Christian König 提交者: Alex Deucher

drm/radeon: rework VMID handling

Move binding onto the ring, simplifying handling a bit.
Signed-off-by: NChristian König <deathsimple@vodafone.de>
Reviewed-by: NJerome Glisse <jglisse@redhat.com>
上级 9b40e5d8
......@@ -1497,14 +1497,6 @@ void cayman_vm_fini(struct radeon_device *rdev)
{
}
int cayman_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id)
{
WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (id << 2), 0);
WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (id << 2), vm->last_pfn);
WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (id << 2), vm->pt_gpu_addr >> 12);
return 0;
}
#define R600_PTE_VALID (1 << 0)
#define R600_PTE_SYSTEM (1 << 1)
#define R600_PTE_SNOOPED (1 << 2)
......@@ -1540,10 +1532,20 @@ void cayman_vm_set_page(struct radeon_device *rdev, struct radeon_vm *vm,
void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ib *ib)
{
struct radeon_ring *ring = &rdev->ring[ib->ring];
struct radeon_vm *vm = ib->vm;
if (!ib->vm || ib->vm->id == -1)
if (vm == NULL)
return;
radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (vm->id << 2), 0));
radeon_ring_write(ring, 0);
radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (vm->id << 2), 0));
radeon_ring_write(ring, vm->last_pfn);
radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0));
radeon_ring_write(ring, vm->pt_gpu_addr >> 12);
/* flush hdp cache */
radeon_ring_write(ring, PACKET0(HDP_MEM_COHERENCY_FLUSH_CNTL, 0));
radeon_ring_write(ring, 0x1);
......
......@@ -253,6 +253,22 @@ static inline struct radeon_fence *radeon_fence_later(struct radeon_fence *a,
}
}
static inline bool radeon_fence_is_earlier(struct radeon_fence *a,
struct radeon_fence *b)
{
if (!a) {
return false;
}
if (!b) {
return true;
}
BUG_ON(a->ring != b->ring);
return a->seq < b->seq;
}
/*
* Tiling registers
*/
......@@ -628,10 +644,13 @@ struct radeon_ring {
/*
* VM
*/
#define RADEON_NUM_VM 16
struct radeon_vm {
struct list_head list;
struct list_head va;
int id;
unsigned id;
unsigned last_pfn;
u64 pt_gpu_addr;
u64 *pt;
......@@ -646,7 +665,7 @@ struct radeon_vm {
struct radeon_vm_manager {
struct mutex lock;
struct list_head lru_vm;
uint32_t use_bitmap;
struct radeon_fence *active[RADEON_NUM_VM];
struct radeon_sa_manager sa_manager;
uint32_t max_pfn;
/* number of VMIDs */
......@@ -1117,7 +1136,6 @@ struct radeon_asic {
struct {
int (*init)(struct radeon_device *rdev);
void (*fini)(struct radeon_device *rdev);
int (*bind)(struct radeon_device *rdev, struct radeon_vm *vm, int id);
uint32_t (*page_flags)(struct radeon_device *rdev,
struct radeon_vm *vm,
uint32_t flags);
......@@ -1734,7 +1752,6 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v);
#define radeon_gart_set_page(rdev, i, p) (rdev)->asic->gart.set_page((rdev), (i), (p))
#define radeon_asic_vm_init(rdev) (rdev)->asic->vm.init((rdev))
#define radeon_asic_vm_fini(rdev) (rdev)->asic->vm.fini((rdev))
#define radeon_asic_vm_bind(rdev, v, id) (rdev)->asic->vm.bind((rdev), (v), (id))
#define radeon_asic_vm_page_flags(rdev, v, flags) (rdev)->asic->vm.page_flags((rdev), (v), (flags))
#define radeon_asic_vm_set_page(rdev, v, pfn, addr, flags) (rdev)->asic->vm.set_page((rdev), (v), (pfn), (addr), (flags))
#define radeon_ring_start(rdev, r, cp) (rdev)->asic->ring[(r)].ring_start((rdev), (cp))
......@@ -1817,6 +1834,11 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm);
void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm);
int radeon_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm);
void radeon_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm);
struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
struct radeon_vm *vm, int ring);
void radeon_vm_fence(struct radeon_device *rdev,
struct radeon_vm *vm,
struct radeon_fence *fence);
int radeon_vm_bo_update_pte(struct radeon_device *rdev,
struct radeon_vm *vm,
struct radeon_bo *bo,
......
......@@ -1375,7 +1375,6 @@ static struct radeon_asic cayman_asic = {
.vm = {
.init = &cayman_vm_init,
.fini = &cayman_vm_fini,
.bind = &cayman_vm_bind,
.page_flags = &cayman_vm_page_flags,
.set_page = &cayman_vm_set_page,
},
......@@ -1480,7 +1479,6 @@ static struct radeon_asic trinity_asic = {
.vm = {
.init = &cayman_vm_init,
.fini = &cayman_vm_fini,
.bind = &cayman_vm_bind,
.page_flags = &cayman_vm_page_flags,
.set_page = &cayman_vm_set_page,
},
......@@ -1585,7 +1583,6 @@ static struct radeon_asic si_asic = {
.vm = {
.init = &si_vm_init,
.fini = &si_vm_fini,
.bind = &si_vm_bind,
.page_flags = &cayman_vm_page_flags,
.set_page = &cayman_vm_set_page,
},
......@@ -1599,7 +1596,7 @@ static struct radeon_asic si_asic = {
.ring_test = &r600_ring_test,
.ib_test = &r600_ib_test,
.is_lockup = &si_gpu_is_lockup,
.vm_flush = &cayman_vm_flush,
.vm_flush = &si_vm_flush,
},
[CAYMAN_RING_TYPE_CP1_INDEX] = {
.ib_execute = &si_ring_ib_execute,
......@@ -1610,7 +1607,7 @@ static struct radeon_asic si_asic = {
.ring_test = &r600_ring_test,
.ib_test = &r600_ib_test,
.is_lockup = &si_gpu_is_lockup,
.vm_flush = &cayman_vm_flush,
.vm_flush = &si_vm_flush,
},
[CAYMAN_RING_TYPE_CP2_INDEX] = {
.ib_execute = &si_ring_ib_execute,
......@@ -1621,7 +1618,7 @@ static struct radeon_asic si_asic = {
.ring_test = &r600_ring_test,
.ib_test = &r600_ib_test,
.is_lockup = &si_gpu_is_lockup,
.vm_flush = &cayman_vm_flush,
.vm_flush = &si_vm_flush,
}
},
.irq = {
......
......@@ -440,7 +440,6 @@ int cayman_asic_reset(struct radeon_device *rdev);
void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
int cayman_vm_init(struct radeon_device *rdev);
void cayman_vm_fini(struct radeon_device *rdev);
int cayman_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id);
void cayman_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm);
void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ib *ib);
uint32_t cayman_vm_page_flags(struct radeon_device *rdev,
......@@ -470,8 +469,7 @@ int si_irq_set(struct radeon_device *rdev);
int si_irq_process(struct radeon_device *rdev);
int si_vm_init(struct radeon_device *rdev);
void si_vm_fini(struct radeon_device *rdev);
int si_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id);
void si_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm);
void si_vm_flush(struct radeon_device *rdev, struct radeon_ib *ib);
int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
uint64_t si_get_gpu_clock(struct radeon_device *rdev);
......
......@@ -485,6 +485,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
}
radeon_cs_sync_rings(parser);
radeon_cs_sync_to(parser, vm->last_flush);
radeon_cs_sync_to(parser, radeon_vm_grab_id(rdev, vm, parser->ring));
if ((rdev->family >= CHIP_TAHITI) &&
(parser->chunk_const_ib_idx != -1)) {
......@@ -493,13 +494,11 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
r = radeon_ib_schedule(rdev, &parser->ib, NULL);
}
out:
if (!r) {
if (vm->fence) {
radeon_fence_unref(&vm->fence);
}
vm->fence = radeon_fence_ref(parser->ib.fence);
radeon_vm_fence(rdev, vm, parser->ib.fence);
}
out:
mutex_unlock(&vm->mutex);
mutex_unlock(&rdev->vm_manager.lock);
return r;
......
......@@ -1018,7 +1018,6 @@ int radeon_device_init(struct radeon_device *rdev,
return r;
/* initialize vm here */
mutex_init(&rdev->vm_manager.lock);
rdev->vm_manager.use_bitmap = 1;
rdev->vm_manager.max_pfn = 1 << 20;
INIT_LIST_HEAD(&rdev->vm_manager.lru_vm);
......
......@@ -437,7 +437,6 @@ int radeon_vm_manager_init(struct radeon_device *rdev)
int r;
if (!rdev->vm_manager.enabled) {
/* mark first vm as always in use, it's the system one */
/* allocate enough for 2 full VM pts */
r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager,
rdev->vm_manager.max_pfn * 8 * 2,
......@@ -461,7 +460,7 @@ int radeon_vm_manager_init(struct radeon_device *rdev)
/* restore page table */
list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) {
if (vm->id == -1)
if (vm->sa_bo == NULL)
continue;
list_for_each_entry(bo_va, &vm->va, vm_list) {
......@@ -475,11 +474,6 @@ int radeon_vm_manager_init(struct radeon_device *rdev)
DRM_ERROR("Failed to update pte for vm %d!\n", vm->id);
}
}
r = radeon_asic_vm_bind(rdev, vm, vm->id);
if (r) {
DRM_ERROR("Failed to bind vm %d!\n", vm->id);
}
}
return 0;
}
......@@ -500,10 +494,6 @@ static void radeon_vm_unbind_locked(struct radeon_device *rdev,
{
struct radeon_bo_va *bo_va;
if (vm->id == -1) {
return;
}
/* wait for vm use to end */
while (vm->fence) {
int r;
......@@ -523,9 +513,7 @@ static void radeon_vm_unbind_locked(struct radeon_device *rdev,
radeon_fence_unref(&vm->last_flush);
/* hw unbind */
rdev->vm_manager.use_bitmap &= ~(1 << vm->id);
list_del_init(&vm->list);
vm->id = -1;
radeon_sa_bo_free(rdev, &vm->sa_bo, NULL);
vm->pt = NULL;
......@@ -544,6 +532,7 @@ static void radeon_vm_unbind_locked(struct radeon_device *rdev,
void radeon_vm_manager_fini(struct radeon_device *rdev)
{
struct radeon_vm *vm, *tmp;
int i;
if (!rdev->vm_manager.enabled)
return;
......@@ -553,6 +542,9 @@ void radeon_vm_manager_fini(struct radeon_device *rdev)
list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) {
radeon_vm_unbind_locked(rdev, vm);
}
for (i = 0; i < RADEON_NUM_VM; ++i) {
radeon_fence_unref(&rdev->vm_manager.active[i]);
}
radeon_asic_vm_fini(rdev);
mutex_unlock(&rdev->vm_manager.lock);
......@@ -593,14 +585,13 @@ void radeon_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm)
int radeon_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm)
{
struct radeon_vm *vm_evict;
unsigned i;
int id = -1, r;
int r;
if (vm == NULL) {
return -EINVAL;
}
if (vm->id != -1) {
if (vm->sa_bo != NULL) {
/* update lru */
list_del_init(&vm->list);
list_add_tail(&vm->list, &rdev->vm_manager.lru_vm);
......@@ -623,33 +614,86 @@ int radeon_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm)
vm->pt_gpu_addr = radeon_sa_bo_gpu_addr(vm->sa_bo);
memset(vm->pt, 0, RADEON_GPU_PAGE_ALIGN(vm->last_pfn * 8));
retry_id:
/* search for free vm */
for (i = 0; i < rdev->vm_manager.nvm; i++) {
if (!(rdev->vm_manager.use_bitmap & (1 << i))) {
id = i;
break;
list_add_tail(&vm->list, &rdev->vm_manager.lru_vm);
return radeon_vm_bo_update_pte(rdev, vm, rdev->ring_tmp_bo.bo,
&rdev->ring_tmp_bo.bo->tbo.mem);
}
/**
* radeon_vm_grab_id - allocate the next free VMID
*
* @rdev: radeon_device pointer
* @vm: vm to allocate id for
* @ring: ring we want to submit job to
*
* Allocate an id for the vm (cayman+).
* Returns the fence we need to sync to (if any).
*
* Global and local mutex must be locked!
*/
struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
struct radeon_vm *vm, int ring)
{
struct radeon_fence *best[RADEON_NUM_RINGS] = {};
unsigned choices[2] = {};
unsigned i;
/* check if the id is still valid */
if (vm->fence && vm->fence == rdev->vm_manager.active[vm->id])
return NULL;
/* we definately need to flush */
radeon_fence_unref(&vm->last_flush);
/* skip over VMID 0, since it is the system VM */
for (i = 1; i < rdev->vm_manager.nvm; ++i) {
struct radeon_fence *fence = rdev->vm_manager.active[i];
if (fence == NULL) {
/* found a free one */
vm->id = i;
return NULL;
}
if (radeon_fence_is_earlier(fence, best[fence->ring])) {
best[fence->ring] = fence;
choices[fence->ring == ring ? 0 : 1] = i;
}
}
/* evict vm if necessary */
if (id == -1) {
vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, struct radeon_vm, list);
radeon_vm_unbind(rdev, vm_evict);
goto retry_id;
}
/* do hw bind */
r = radeon_asic_vm_bind(rdev, vm, id);
radeon_fence_unref(&vm->last_flush);
if (r) {
radeon_sa_bo_free(rdev, &vm->sa_bo, NULL);
return r;
for (i = 0; i < 2; ++i) {
if (choices[i]) {
vm->id = choices[i];
return rdev->vm_manager.active[choices[i]];
}
}
rdev->vm_manager.use_bitmap |= 1 << id;
vm->id = id;
list_add_tail(&vm->list, &rdev->vm_manager.lru_vm);
return radeon_vm_bo_update_pte(rdev, vm, rdev->ring_tmp_bo.bo,
&rdev->ring_tmp_bo.bo->tbo.mem);
/* should never happen */
BUG();
return NULL;
}
/**
* radeon_vm_fence - remember fence for vm
*
* @rdev: radeon_device pointer
* @vm: vm we want to fence
* @fence: fence to remember
*
* Fence the vm (cayman+).
* Set the fence used to protect page table and id.
*
* Global and local mutex must be locked!
*/
void radeon_vm_fence(struct radeon_device *rdev,
struct radeon_vm *vm,
struct radeon_fence *fence)
{
radeon_fence_unref(&rdev->vm_manager.active[vm->id]);
rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence);
radeon_fence_unref(&vm->fence);
vm->fence = radeon_fence_ref(fence);
}
/* object have to be reserved */
......@@ -806,7 +850,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
uint32_t flags;
/* nothing to do if vm isn't bound */
if (vm->id == -1)
if (vm->sa_bo == NULL)
return 0;
bo_va = radeon_bo_va(bo, vm);
......@@ -928,7 +972,7 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
{
int r;
vm->id = -1;
vm->id = 0;
vm->fence = NULL;
mutex_init(&vm->mutex);
INIT_LIST_HEAD(&vm->list);
......
......@@ -2789,14 +2789,30 @@ void si_vm_fini(struct radeon_device *rdev)
{
}
int si_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id)
void si_vm_flush(struct radeon_device *rdev, struct radeon_ib *ib)
{
if (id < 8)
WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (id << 2), vm->pt_gpu_addr >> 12);
else
WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((id - 8) << 2),
vm->pt_gpu_addr >> 12);
return 0;
struct radeon_ring *ring = &rdev->ring[ib->ring];
struct radeon_vm *vm = ib->vm;
if (vm == NULL)
return;
if (vm->id < 8) {
radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR
+ (vm->id << 2), 0));
} else {
radeon_ring_write(ring, PACKET0(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR
+ ((vm->id - 8) << 2), 0));
}
radeon_ring_write(ring, vm->pt_gpu_addr >> 12);
/* flush hdp cache */
radeon_ring_write(ring, PACKET0(HDP_MEM_COHERENCY_FLUSH_CNTL, 0));
radeon_ring_write(ring, 0x1);
/* bits 0-7 are the VM contexts0-7 */
radeon_ring_write(ring, PACKET0(VM_INVALIDATE_REQUEST, 0));
radeon_ring_write(ring, 1 << ib->vm->id);
}
/*
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册