提交 b2ff0e8a 编写于 作者: A Andres Rodriguez 提交者: Alex Deucher

drm/amdgpu: add framework for HW specific priority settings v9

Add an initial framework for changing the HW priorities of rings. The
framework allows requesting priority changes for the lifetime of an
amdgpu_job. After the job completes the priority will decay to the next
lowest priority for which a request is still valid.

A new ring function set_priority() can now be populated to take care of
the HW specific programming sequence for priority changes.

v2: set priority before emitting IB, and take a ref on amdgpu_job
v3: use AMD_SCHED_PRIORITY_* instead of AMDGPU_CTX_PRIORITY_*
v4: plug amdgpu_ring_restore_priority_cb into amdgpu_job_free_cb
v5: use atomic for tracking job priorities instead of last_job
v6: rename amdgpu_ring_priority_[get/put]() and align parameters
v7: replace spinlocks with mutexes for KIQ compatibility
v8: raise ring priority during cs_ioctl, instead of job_run
v9: priority_get() before push_job()
Reviewed-by: NChristian König <christian.koenig@amd.com>
Acked-by: NChristian König <christian.koenig@amd.com>
Signed-off-by: NAndres Rodriguez <andresx7@gmail.com>
Signed-off-by: NAlex Deucher <alexander.deucher@amd.com>
上级 c2636dc5
......@@ -1177,6 +1177,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
job->uf_sequence = seq;
amdgpu_job_free_resources(job);
amdgpu_ring_priority_get(job->ring,
amd_sched_get_job_priority(&job->base));
trace_amdgpu_cs_ioctl(job);
amd_sched_entity_push_job(&job->base);
......
......@@ -103,6 +103,7 @@ static void amdgpu_job_free_cb(struct amd_sched_job *s_job)
{
struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base);
amdgpu_ring_priority_put(job->ring, amd_sched_get_job_priority(s_job));
dma_fence_put(job->fence);
amdgpu_sync_free(&job->sync);
amdgpu_sync_free(&job->dep_sync);
......@@ -139,6 +140,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
job->fence_ctx = entity->fence_context;
*f = dma_fence_get(&job->base.s_fence->finished);
amdgpu_job_free_resources(job);
amdgpu_ring_priority_get(job->ring,
amd_sched_get_job_priority(&job->base));
amd_sched_entity_push_job(&job->base);
return 0;
......@@ -203,6 +206,7 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job)
/* if gpu reset, hw fence will be replaced here */
dma_fence_put(job->fence);
job->fence = dma_fence_get(fence);
amdgpu_job_free_resources(job);
return fence;
}
......
......@@ -154,6 +154,75 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring)
ring->funcs->end_use(ring);
}
/**
* amdgpu_ring_priority_put - restore a ring's priority
*
* @ring: amdgpu_ring structure holding the information
* @priority: target priority
*
* Release a request for executing at @priority
*/
void amdgpu_ring_priority_put(struct amdgpu_ring *ring,
enum amd_sched_priority priority)
{
int i;
if (!ring->funcs->set_priority)
return;
if (atomic_dec_return(&ring->num_jobs[priority]) > 0)
return;
/* no need to restore if the job is already at the lowest priority */
if (priority == AMD_SCHED_PRIORITY_NORMAL)
return;
mutex_lock(&ring->priority_mutex);
/* something higher prio is executing, no need to decay */
if (ring->priority > priority)
goto out_unlock;
/* decay priority to the next level with a job available */
for (i = priority; i >= AMD_SCHED_PRIORITY_MIN; i--) {
if (i == AMD_SCHED_PRIORITY_NORMAL
|| atomic_read(&ring->num_jobs[i])) {
ring->priority = i;
ring->funcs->set_priority(ring, i);
break;
}
}
out_unlock:
mutex_unlock(&ring->priority_mutex);
}
/**
* amdgpu_ring_priority_get - change the ring's priority
*
* @ring: amdgpu_ring structure holding the information
* @priority: target priority
*
* Request a ring's priority to be raised to @priority (refcounted).
*/
void amdgpu_ring_priority_get(struct amdgpu_ring *ring,
enum amd_sched_priority priority)
{
if (!ring->funcs->set_priority)
return;
atomic_inc(&ring->num_jobs[priority]);
mutex_lock(&ring->priority_mutex);
if (priority <= ring->priority)
goto out_unlock;
ring->priority = priority;
ring->funcs->set_priority(ring, priority);
out_unlock:
mutex_unlock(&ring->priority_mutex);
}
/**
* amdgpu_ring_init - init driver ring struct.
*
......@@ -169,7 +238,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
unsigned max_dw, struct amdgpu_irq_src *irq_src,
unsigned irq_type)
{
int r;
int r, i;
int sched_hw_submission = amdgpu_sched_hw_submission;
/* Set the hw submission limit higher for KIQ because
......@@ -247,9 +316,14 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
}
ring->max_dw = max_dw;
ring->priority = AMD_SCHED_PRIORITY_NORMAL;
mutex_init(&ring->priority_mutex);
INIT_LIST_HEAD(&ring->lru_list);
amdgpu_ring_lru_touch(adev, ring);
for (i = 0; i < AMD_SCHED_PRIORITY_MAX; ++i)
atomic_set(&ring->num_jobs[i], 0);
if (amdgpu_debugfs_ring_init(adev, ring)) {
DRM_ERROR("Failed to register debugfs file for rings !\n");
}
......
......@@ -24,6 +24,7 @@
#ifndef __AMDGPU_RING_H__
#define __AMDGPU_RING_H__
#include <drm/amdgpu_drm.h>
#include "gpu_scheduler.h"
/* max number of rings */
......@@ -56,6 +57,7 @@ struct amdgpu_device;
struct amdgpu_ring;
struct amdgpu_ib;
struct amdgpu_cs_parser;
struct amdgpu_job;
/*
* Fences.
......@@ -147,6 +149,9 @@ struct amdgpu_ring_funcs {
void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg);
void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
/* priority functions */
void (*set_priority) (struct amdgpu_ring *ring,
enum amd_sched_priority priority);
};
struct amdgpu_ring {
......@@ -187,6 +192,12 @@ struct amdgpu_ring {
volatile u32 *cond_exe_cpu_addr;
unsigned vm_inv_eng;
bool has_compute_vm_bug;
atomic_t num_jobs[AMD_SCHED_PRIORITY_MAX];
struct mutex priority_mutex;
/* protected by priority_mutex */
int priority;
#if defined(CONFIG_DEBUG_FS)
struct dentry *ent;
#endif
......@@ -197,6 +208,10 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
void amdgpu_ring_commit(struct amdgpu_ring *ring);
void amdgpu_ring_undo(struct amdgpu_ring *ring);
void amdgpu_ring_priority_get(struct amdgpu_ring *ring,
enum amd_sched_priority priority);
void amdgpu_ring_priority_put(struct amdgpu_ring *ring,
enum amd_sched_priority priority);
int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
unsigned ring_size, struct amdgpu_irq_src *irq_src,
unsigned irq_type);
......
......@@ -170,4 +170,11 @@ void amd_sched_job_recovery(struct amd_gpu_scheduler *sched);
bool amd_sched_dependency_optimized(struct dma_fence* fence,
struct amd_sched_entity *entity);
void amd_sched_job_kickout(struct amd_sched_job *s_job);
static inline enum amd_sched_priority
amd_sched_get_job_priority(struct amd_sched_job *job)
{
return (job->s_entity->rq - job->sched->sched_rq);
}
#endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册