提交 e2a8986f 编写于 作者: D Dave Airlie

Merge branch 'drm-next-4.3' of git://people.freedesktop.org/~agd5f/linux into drm-next

amdgpu and radeon changes for 4.3. Highlights:
- Fiji support for amdgpu.
- CGS support for amdgpu.  This is a new driver
  internal cross-component API.
- Initial GPU scheduler for amdgpu.  Still disabled
  by default.
- Lots of bug fixes and optimizations

* 'drm-next-4.3' of git://people.freedesktop.org/~agd5f/linux: (130 commits)
  drm/amdgpu: wait on page directory changes. v2
  drm/amdgpu: Select BACKLIGHT_LCD_SUPPORT
  drm/radeon: Select BACKLIGHT_LCD_SUPPORT
  drm/amdgpu: cleanup sheduler rq handling v2
  drm/amdgpu: move prepare work out of scheduler to cs_ioctl
  drm/amdgpu: fix unnecessary wake up
  drm/amdgpu: fix duplicated mapping invoke bug
  drm/amdgpu: drop bo_list_clone when no scheduler
  drm/amdgpu: disable GPU reset by default
  drm/amdgpu: fix type mismatch error
  drm/amdgpu: add reference for **fence
  drm/amdgpu: fix waiting for all fences before flipping
  drm/amdgpu: fix UVD return code checking
  drm/amdgpu: remove scheduler fence list v2
  drm/amdgpu: remove amd_sched_wait_emit v2
  drm/amdgpu: remove unecessary scheduler fence callbacks
  drm/amdgpu: fix scheduler fence implementation
  drm/amdgpu: don't grab dev->struct_mutex in pm functions
  drm/amdgpu: Don't take dev->struct_mutex in bo_force_delete
  drm/radeon: Don't take dev->struct_mutex in pm functions
  ...
......@@ -128,6 +128,7 @@ config DRM_RADEON
select POWER_SUPPLY
select HWMON
select BACKLIGHT_CLASS_DEVICE
select BACKLIGHT_LCD_SUPPORT
select INTERVAL_TREE
help
Choose this option if you have an ATI Radeon graphics card. There
......@@ -151,6 +152,7 @@ config DRM_AMDGPU
select POWER_SUPPLY
select HWMON
select BACKLIGHT_CLASS_DEVICE
select BACKLIGHT_LCD_SUPPORT
select INTERVAL_TREE
help
Choose this option if you have a recent AMD Radeon graphics card.
......
......@@ -3,7 +3,9 @@
# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/amd/include/asic_reg \
-Idrivers/gpu/drm/amd/include
-Idrivers/gpu/drm/amd/include \
-Idrivers/gpu/drm/amd/amdgpu \
-Idrivers/gpu/drm/amd/scheduler
amdgpu-y := amdgpu_drv.o
......@@ -21,7 +23,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
# add asic specific block
amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o gmc_v7_0.o cik_ih.o kv_smc.o kv_dpm.o \
ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o
ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o \
amdgpu_amdkfd_gfx_v7.o
amdgpu-y += \
vi.o
......@@ -43,6 +46,7 @@ amdgpu-y += \
amdgpu_dpm.o \
cz_smc.o cz_dpm.o \
tonga_smc.o tonga_dpm.o \
fiji_smc.o fiji_dpm.o \
iceland_smc.o iceland_dpm.o
# add DCE block
......@@ -74,9 +78,17 @@ amdgpu-y += \
# add amdkfd interfaces
amdgpu-y += \
amdgpu_amdkfd.o \
amdgpu_amdkfd_gfx_v7.o \
amdgpu_amdkfd_gfx_v8.o
# add cgs
amdgpu-y += amdgpu_cgs.o
# GPU scheduler
amdgpu-y += \
../scheduler/gpu_scheduler.o \
../scheduler/sched_fence.o \
amdgpu_sched.o
amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
......
......@@ -42,17 +42,19 @@
#include <ttm/ttm_module.h>
#include <ttm/ttm_execbuf_util.h>
#include <drm/drmP.h>
#include <drm/drm_gem.h>
#include <drm/amdgpu_drm.h>
#include "amd_shared.h"
#include "amdgpu_family.h"
#include "amdgpu_mode.h"
#include "amdgpu_ih.h"
#include "amdgpu_irq.h"
#include "amdgpu_ucode.h"
#include "amdgpu_gds.h"
#include "gpu_scheduler.h"
/*
* Modules parameters.
*/
......@@ -77,7 +79,11 @@ extern int amdgpu_bapm;
extern int amdgpu_deep_color;
extern int amdgpu_vm_size;
extern int amdgpu_vm_block_size;
extern int amdgpu_enable_scheduler;
extern int amdgpu_sched_jobs;
extern int amdgpu_sched_hw_submission;
#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
#define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */
#define AMDGPU_FENCE_JIFFIES_TIMEOUT (HZ / 2)
/* AMDGPU_IB_POOL_SIZE must be a power of 2 */
......@@ -178,6 +184,7 @@ struct amdgpu_ring;
struct amdgpu_semaphore;
struct amdgpu_cs_parser;
struct amdgpu_irq_src;
struct amdgpu_fpriv;
enum amdgpu_cp_irq {
AMDGPU_CP_IRQ_GFX_EOP = 0,
......@@ -381,10 +388,10 @@ struct amdgpu_fence_driver {
uint64_t sync_seq[AMDGPU_MAX_RINGS];
atomic64_t last_seq;
bool initialized;
bool delayed_irq;
struct amdgpu_irq_src *irq_src;
unsigned irq_type;
struct delayed_work lockup_work;
wait_queue_head_t fence_queue;
};
/* some special values for the owner field */
......@@ -423,20 +430,18 @@ void amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
struct amdgpu_irq_src *irq_src,
unsigned irq_type);
void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);
void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner,
struct amdgpu_fence **fence);
int amdgpu_fence_recreate(struct amdgpu_ring *ring, void *owner,
uint64_t seq, struct amdgpu_fence **fence);
void amdgpu_fence_process(struct amdgpu_ring *ring);
int amdgpu_fence_wait_next(struct amdgpu_ring *ring);
int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
bool amdgpu_fence_signaled(struct amdgpu_fence *fence);
int amdgpu_fence_wait(struct amdgpu_fence *fence, bool interruptible);
int amdgpu_fence_wait_any(struct amdgpu_device *adev,
signed long amdgpu_fence_wait_any(struct amdgpu_device *adev,
struct amdgpu_fence **fences,
bool intr);
bool intr, long t);
struct amdgpu_fence *amdgpu_fence_ref(struct amdgpu_fence *fence);
void amdgpu_fence_unref(struct amdgpu_fence **fence);
......@@ -481,7 +486,7 @@ static inline bool amdgpu_fence_is_earlier(struct amdgpu_fence *a,
return a->seq < b->seq;
}
int amdgpu_user_fence_emit(struct amdgpu_ring *ring, struct amdgpu_user_fence *user,
int amdgpu_user_fence_emit(struct amdgpu_ring *ring, struct amdgpu_user_fence *user,
void *owner, struct amdgpu_fence **fence);
/*
......@@ -532,14 +537,16 @@ struct amdgpu_bo_va_mapping {
struct amdgpu_bo_va {
/* protected by bo being reserved */
struct list_head bo_list;
uint64_t addr;
struct amdgpu_fence *last_pt_update;
struct fence *last_pt_update;
unsigned ref_count;
/* protected by vm mutex */
struct list_head mappings;
/* protected by vm mutex and spinlock */
struct list_head vm_status;
/* mappings for this bo_va */
struct list_head invalids;
struct list_head valids;
/* constant after initialization */
struct amdgpu_vm *vm;
struct amdgpu_bo *bo;
......@@ -697,8 +704,8 @@ struct amdgpu_sync {
};
void amdgpu_sync_create(struct amdgpu_sync *sync);
void amdgpu_sync_fence(struct amdgpu_sync *sync,
struct amdgpu_fence *fence);
int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
struct fence *f);
int amdgpu_sync_resv(struct amdgpu_device *adev,
struct amdgpu_sync *sync,
struct reservation_object *resv,
......@@ -821,7 +828,9 @@ struct amdgpu_flip_work {
uint64_t base;
struct drm_pending_vblank_event *event;
struct amdgpu_bo *old_rbo;
struct fence *fence;
struct fence *excl;
unsigned shared_count;
struct fence **shared;
};
......@@ -844,6 +853,8 @@ struct amdgpu_ib {
uint32_t gws_base, gws_size;
uint32_t oa_base, oa_size;
uint32_t flags;
/* resulting sequence number */
uint64_t sequence;
};
enum amdgpu_ring_type {
......@@ -854,11 +865,23 @@ enum amdgpu_ring_type {
AMDGPU_RING_TYPE_VCE
};
extern struct amd_sched_backend_ops amdgpu_sched_ops;
int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_ib *ibs,
unsigned num_ibs,
int (*free_job)(struct amdgpu_cs_parser *),
void *owner,
struct fence **fence);
struct amdgpu_ring {
struct amdgpu_device *adev;
const struct amdgpu_ring_funcs *funcs;
struct amdgpu_fence_driver fence_drv;
struct amd_gpu_scheduler *scheduler;
spinlock_t fence_lock;
struct mutex *ring_lock;
struct amdgpu_bo *ring_obj;
volatile uint32_t *ring;
......@@ -892,6 +915,7 @@ struct amdgpu_ring {
struct amdgpu_ctx *current_ctx;
enum amdgpu_ring_type type;
char name[16];
bool is_pte_ring;
};
/*
......@@ -943,18 +967,22 @@ struct amdgpu_vm {
struct rb_root va;
/* protecting invalidated and freed */
/* protecting invalidated */
spinlock_t status_lock;
/* BOs moved, but not yet updated in the PT */
struct list_head invalidated;
/* BOs freed, but not yet updated in the PT */
/* BOs cleared in the PT because of a move */
struct list_head cleared;
/* BO mappings freed, but not yet updated in the PT */
struct list_head freed;
/* contains the page directory */
struct amdgpu_bo *page_directory;
unsigned max_pde_used;
struct fence *page_directory_fence;
/* array of page tables, one for each page directory entry */
struct amdgpu_vm_pt *page_tables;
......@@ -983,27 +1011,47 @@ struct amdgpu_vm_manager {
* context related structures
*/
struct amdgpu_ctx_state {
uint64_t flags;
uint32_t hangs;
#define AMDGPU_CTX_MAX_CS_PENDING 16
struct amdgpu_ctx_ring {
uint64_t sequence;
struct fence *fences[AMDGPU_CTX_MAX_CS_PENDING];
struct amd_sched_entity entity;
};
struct amdgpu_ctx {
/* call kref_get()before CS start and kref_put() after CS fence signaled */
struct kref refcount;
struct amdgpu_fpriv *fpriv;
struct amdgpu_ctx_state state;
uint32_t id;
unsigned reset_counter;
struct kref refcount;
struct amdgpu_device *adev;
unsigned reset_counter;
spinlock_t ring_lock;
struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS];
};
struct amdgpu_ctx_mgr {
struct amdgpu_device *adev;
struct idr ctx_handles;
/* lock for IDR system */
struct mutex lock;
struct amdgpu_device *adev;
struct mutex lock;
/* protected by lock */
struct idr ctx_handles;
};
int amdgpu_ctx_init(struct amdgpu_device *adev, bool kernel,
struct amdgpu_ctx *ctx);
void amdgpu_ctx_fini(struct amdgpu_ctx *ctx);
struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
struct fence *fence, uint64_t queued_seq);
struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
struct amdgpu_ring *ring, uint64_t seq);
int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
/*
* file private structure
*/
......@@ -1012,7 +1060,7 @@ struct amdgpu_fpriv {
struct amdgpu_vm vm;
struct mutex bo_list_lock;
struct idr bo_list_handles;
struct amdgpu_ctx_mgr ctx_mgr;
struct amdgpu_ctx_mgr ctx_mgr;
};
/*
......@@ -1029,6 +1077,8 @@ struct amdgpu_bo_list {
struct amdgpu_bo_list_entry *array;
};
struct amdgpu_bo_list *
amdgpu_bo_list_clone(struct amdgpu_bo_list *list);
struct amdgpu_bo_list *
amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id);
void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
......@@ -1205,6 +1255,14 @@ struct amdgpu_cs_parser {
/* user fence */
struct amdgpu_user_fence uf;
struct amdgpu_ring *ring;
struct mutex job_lock;
struct work_struct job_work;
int (*prepare_job)(struct amdgpu_cs_parser *sched_job);
int (*run_job)(struct amdgpu_cs_parser *sched_job);
int (*free_job)(struct amdgpu_cs_parser *sched_job);
struct amd_sched_fence *s_fence;
};
static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, uint32_t ib_idx, int idx)
......@@ -1849,17 +1907,12 @@ struct amdgpu_atcs {
struct amdgpu_atcs_functions functions;
};
int amdgpu_ctx_alloc(struct amdgpu_device *adev,struct amdgpu_fpriv *fpriv,
uint32_t *id,uint32_t flags);
int amdgpu_ctx_free(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv,
uint32_t id);
void amdgpu_ctx_fini(struct amdgpu_fpriv *fpriv);
struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
/*
* CGS
*/
void *amdgpu_cgs_create_device(struct amdgpu_device *adev);
void amdgpu_cgs_destroy_device(void *cgs_device);
extern int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
/*
* Core structure, functions and helpers.
......@@ -1883,7 +1936,7 @@ struct amdgpu_device {
struct rw_semaphore exclusive_lock;
/* ASIC */
enum amdgpu_asic_type asic_type;
enum amd_asic_type asic_type;
uint32_t family;
uint32_t rev_id;
uint32_t external_rev_id;
......@@ -1976,7 +2029,6 @@ struct amdgpu_device {
struct amdgpu_irq_src hpd_irq;
/* rings */
wait_queue_head_t fence_queue;
unsigned fence_context;
struct mutex ring_lock;
unsigned num_rings;
......@@ -2028,6 +2080,9 @@ struct amdgpu_device {
/* amdkfd interface */
struct kfd_dev *kfd;
/* kernel conext for IB submission */
struct amdgpu_ctx kernel_ctx;
};
bool amdgpu_device_is_px(struct drm_device *dev);
......@@ -2215,6 +2270,12 @@ void amdgpu_pci_config_reset(struct amdgpu_device *adev);
bool amdgpu_card_posted(struct amdgpu_device *adev);
void amdgpu_update_display_priority(struct amdgpu_device *adev);
bool amdgpu_boot_test_post_card(struct amdgpu_device *adev);
struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev,
struct drm_file *filp,
struct amdgpu_ctx *ctx,
struct amdgpu_ib *ibs,
uint32_t num_ibs);
int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data);
int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
u32 ip_instance, u32 ring,
......@@ -2278,8 +2339,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
struct amdgpu_bo_list_entry *amdgpu_vm_get_bos(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct list_head *head);
struct amdgpu_fence *amdgpu_vm_grab_id(struct amdgpu_ring *ring,
struct amdgpu_vm *vm);
int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
struct amdgpu_sync *sync);
void amdgpu_vm_flush(struct amdgpu_ring *ring,
struct amdgpu_vm *vm,
struct amdgpu_fence *updates);
......
......@@ -21,7 +21,7 @@
*/
#include "amdgpu_amdkfd.h"
#include "amdgpu_family.h"
#include "amd_shared.h"
#include <drm/drmP.h>
#include "amdgpu.h"
#include <linux/module.h>
......@@ -50,9 +50,11 @@ bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev)
#endif
switch (rdev->asic_type) {
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_KAVERI:
kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions();
break;
#endif
case CHIP_CARRIZO:
kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
break;
......
......@@ -897,7 +897,7 @@ bool amdgpu_atombios_get_asic_ss_info(struct amdgpu_device *adev,
if ((id == ASIC_INTERNAL_ENGINE_SS) ||
(id == ASIC_INTERNAL_MEMORY_SS))
ss->rate /= 100;
if (adev->flags & AMDGPU_IS_APU)
if (adev->flags & AMD_IS_APU)
amdgpu_atombios_get_igp_ss_overrides(adev, ss, id);
return true;
}
......@@ -1058,7 +1058,7 @@ void amdgpu_atombios_set_memory_clock(struct amdgpu_device *adev,
SET_MEMORY_CLOCK_PS_ALLOCATION args;
int index = GetIndexIntoMasterTable(COMMAND, SetMemoryClock);
if (adev->flags & AMDGPU_IS_APU)
if (adev->flags & AMD_IS_APU)
return;
args.ulTargetMemoryClock = cpu_to_le32(mem_clock); /* 10 khz */
......
......@@ -42,7 +42,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence);
if (r)
goto exit_do_move;
r = amdgpu_fence_wait(fence, false);
r = fence_wait(&fence->base, false);
if (r)
goto exit_do_move;
amdgpu_fence_unref(&fence);
......
......@@ -48,7 +48,7 @@ static bool igp_read_bios_from_vram(struct amdgpu_device *adev)
resource_size_t vram_base;
resource_size_t size = 256 * 1024; /* ??? */
if (!(adev->flags & AMDGPU_IS_APU))
if (!(adev->flags & AMD_IS_APU))
if (!amdgpu_card_posted(adev))
return false;
......@@ -184,7 +184,7 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
bool found = false;
/* ATRM is for the discrete card only */
if (adev->flags & AMDGPU_IS_APU)
if (adev->flags & AMD_IS_APU)
return false;
while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) {
......@@ -246,7 +246,7 @@ static inline bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
static bool amdgpu_read_disabled_bios(struct amdgpu_device *adev)
{
if (adev->flags & AMDGPU_IS_APU)
if (adev->flags & AMD_IS_APU)
return igp_read_bios_from_vram(adev);
else
return amdgpu_asic_read_disabled_bios(adev);
......
......@@ -62,6 +62,39 @@ static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv,
return 0;
}
struct amdgpu_bo_list *
amdgpu_bo_list_clone(struct amdgpu_bo_list *list)
{
struct amdgpu_bo_list *result;
unsigned i;
result = kmalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL);
if (!result)
return NULL;
result->array = drm_calloc_large(list->num_entries,
sizeof(struct amdgpu_bo_list_entry));
if (!result->array) {
kfree(result);
return NULL;
}
mutex_init(&result->lock);
result->gds_obj = list->gds_obj;
result->gws_obj = list->gws_obj;
result->oa_obj = list->oa_obj;
result->has_userptr = list->has_userptr;
result->num_entries = list->num_entries;
memcpy(result->array, list->array, list->num_entries *
sizeof(struct amdgpu_bo_list_entry));
for (i = 0; i < result->num_entries; ++i)
amdgpu_bo_ref(result->array[i].robj);
return result;
}
static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
{
struct amdgpu_bo_list *list;
......
此差异已折叠。
......@@ -126,12 +126,54 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
return 0;
}
static void amdgpu_job_work_func(struct work_struct *work)
{
struct amdgpu_cs_parser *sched_job =
container_of(work, struct amdgpu_cs_parser,
job_work);
mutex_lock(&sched_job->job_lock);
if (sched_job->free_job)
sched_job->free_job(sched_job);
mutex_unlock(&sched_job->job_lock);
/* after processing job, free memory */
fence_put(&sched_job->s_fence->base);
kfree(sched_job);
}
struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev,
struct drm_file *filp,
struct amdgpu_ctx *ctx,
struct amdgpu_ib *ibs,
uint32_t num_ibs)
{
struct amdgpu_cs_parser *parser;
int i;
parser = kzalloc(sizeof(struct amdgpu_cs_parser), GFP_KERNEL);
if (!parser)
return NULL;
parser->adev = adev;
parser->filp = filp;
parser->ctx = ctx;
parser->ibs = ibs;
parser->num_ibs = num_ibs;
if (amdgpu_enable_scheduler) {
mutex_init(&parser->job_lock);
INIT_WORK(&parser->job_work, amdgpu_job_work_func);
}
for (i = 0; i < num_ibs; i++)
ibs[i].ctx = ctx;
return parser;
}
int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
{
union drm_amdgpu_cs *cs = data;
uint64_t *chunk_array_user;
uint64_t *chunk_array = NULL;
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
struct amdgpu_bo_list *bo_list = NULL;
unsigned size, i;
int r = 0;
......@@ -143,17 +185,30 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
r = -EINVAL;
goto out;
}
p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
if (!amdgpu_enable_scheduler)
p->bo_list = bo_list;
else {
if (bo_list && !bo_list->has_userptr) {
p->bo_list = amdgpu_bo_list_clone(bo_list);
amdgpu_bo_list_put(bo_list);
if (!p->bo_list)
return -ENOMEM;
} else if (bo_list && bo_list->has_userptr)
p->bo_list = bo_list;
else
p->bo_list = NULL;
}
/* get chunks */
INIT_LIST_HEAD(&p->validated);
chunk_array = kcalloc(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
if (chunk_array == NULL) {
r = -ENOMEM;
goto out;
}
chunk_array_user = (uint64_t *)(unsigned long)(cs->in.chunks);
chunk_array_user = (uint64_t __user *)(cs->in.chunks);
if (copy_from_user(chunk_array, chunk_array_user,
sizeof(uint64_t)*cs->in.num_chunks)) {
r = -EFAULT;
......@@ -161,7 +216,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
}
p->nchunks = cs->in.num_chunks;
p->chunks = kcalloc(p->nchunks, sizeof(struct amdgpu_cs_chunk),
p->chunks = kmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),
GFP_KERNEL);
if (p->chunks == NULL) {
r = -ENOMEM;
......@@ -173,7 +228,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
struct drm_amdgpu_cs_chunk user_chunk;
uint32_t __user *cdata;
chunk_ptr = (void __user *)(unsigned long)chunk_array[i];
chunk_ptr = (void __user *)chunk_array[i];
if (copy_from_user(&user_chunk, chunk_ptr,
sizeof(struct drm_amdgpu_cs_chunk))) {
r = -EFAULT;
......@@ -183,7 +238,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
p->chunks[i].length_dw = user_chunk.length_dw;
size = p->chunks[i].length_dw;
cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
cdata = (void __user *)user_chunk.chunk_data;
p->chunks[i].user_ptr = cdata;
p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
......@@ -235,11 +290,10 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
}
}
p->ibs = kcalloc(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL);
if (!p->ibs) {
p->ibs = kmalloc_array(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL);
if (!p->ibs)
r = -ENOMEM;
goto out;
}
out:
kfree(chunk_array);
......@@ -415,18 +469,8 @@ static int cmp_size_smaller_first(void *priv, struct list_head *a,
return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
}
/**
* cs_parser_fini() - clean parser states
* @parser: parser structure holding parsing context.
* @error: error number
*
* If error is set than unvalidate buffer, otherwise just free memory
* used by parsing context.
**/
static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff)
static void amdgpu_cs_parser_fini_early(struct amdgpu_cs_parser *parser, int error, bool backoff)
{
unsigned i;
if (!error) {
/* Sort the buffer list from the smallest to largest buffer,
* which affects the order of buffers in the LRU list.
......@@ -447,11 +491,19 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
ttm_eu_backoff_reservation(&parser->ticket,
&parser->validated);
}
}
static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser)
{
unsigned i;
if (parser->ctx)
amdgpu_ctx_put(parser->ctx);
if (parser->bo_list)
amdgpu_bo_list_put(parser->bo_list);
if (parser->bo_list) {
if (amdgpu_enable_scheduler && !parser->bo_list->has_userptr)
amdgpu_bo_list_free(parser->bo_list);
else
amdgpu_bo_list_put(parser->bo_list);
}
drm_free_large(parser->vm_bos);
for (i = 0; i < parser->nchunks; i++)
drm_free_large(parser->chunks[i].kdata);
......@@ -462,6 +514,29 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
kfree(parser->ibs);
if (parser->uf.bo)
drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base);
if (!amdgpu_enable_scheduler)
kfree(parser);
}
/**
* cs_parser_fini() - clean parser states
* @parser: parser structure holding parsing context.
* @error: error number
*
* If error is set than unvalidate buffer, otherwise just free memory
* used by parsing context.
**/
static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff)
{
amdgpu_cs_parser_fini_early(parser, error, backoff);
amdgpu_cs_parser_fini_late(parser);
}
static int amdgpu_cs_parser_free_job(struct amdgpu_cs_parser *sched_job)
{
amdgpu_cs_parser_fini_late(sched_job);
return 0;
}
static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
......@@ -476,12 +551,18 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
if (r)
return r;
r = amdgpu_sync_fence(adev, &p->ibs[0].sync, vm->page_directory_fence);
if (r)
return r;
r = amdgpu_vm_clear_freed(adev, vm);
if (r)
return r;
if (p->bo_list) {
for (i = 0; i < p->bo_list->num_entries; i++) {
struct fence *f;
/* ignore duplicates */
bo = p->bo_list->array[i].robj;
if (!bo)
......@@ -495,7 +576,10 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
if (r)
return r;
amdgpu_sync_fence(&p->ibs[0].sync, bo_va->last_pt_update);
f = bo_va->last_pt_update;
r = amdgpu_sync_fence(adev, &p->ibs[0].sync, f);
if (r)
return r;
}
}
......@@ -529,9 +613,9 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
goto out;
}
amdgpu_cs_sync_rings(parser);
r = amdgpu_ib_schedule(adev, parser->num_ibs, parser->ibs,
parser->filp);
if (!amdgpu_enable_scheduler)
r = amdgpu_ib_schedule(adev, parser->num_ibs, parser->ibs,
parser->filp);
out:
mutex_unlock(&vm->mutex);
......@@ -650,7 +734,6 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
ib->oa_size = amdgpu_bo_size(oa);
}
}
/* wrap the last IB with user fence */
if (parser->uf.bo) {
struct amdgpu_ib *ib = &parser->ibs[parser->num_ibs - 1];
......@@ -693,9 +776,9 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
sizeof(struct drm_amdgpu_cs_chunk_dep);
for (j = 0; j < num_deps; ++j) {
struct amdgpu_fence *fence;
struct amdgpu_ring *ring;
struct amdgpu_ctx *ctx;
struct fence *fence;
r = amdgpu_cs_get_ring(adev, deps[j].ip_type,
deps[j].ip_instance,
......@@ -707,50 +790,34 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
if (ctx == NULL)
return -EINVAL;
r = amdgpu_fence_recreate(ring, p->filp,
deps[j].handle,
&fence);
if (r) {
fence = amdgpu_ctx_get_fence(ctx, ring,
deps[j].handle);
if (IS_ERR(fence)) {
r = PTR_ERR(fence);
amdgpu_ctx_put(ctx);
return r;
}
amdgpu_sync_fence(&ib->sync, fence);
amdgpu_fence_unref(&fence);
amdgpu_ctx_put(ctx);
} else if (fence) {
r = amdgpu_sync_fence(adev, &ib->sync, fence);
fence_put(fence);
amdgpu_ctx_put(ctx);
if (r)
return r;
}
}
}
return 0;
}
int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
static int amdgpu_cs_parser_prepare_job(struct amdgpu_cs_parser *sched_job)
{
struct amdgpu_device *adev = dev->dev_private;
union drm_amdgpu_cs *cs = data;
struct amdgpu_cs_parser parser;
int r, i;
struct amdgpu_cs_parser *parser = sched_job;
struct amdgpu_device *adev = sched_job->adev;
bool reserved_buffers = false;
down_read(&adev->exclusive_lock);
if (!adev->accel_working) {
up_read(&adev->exclusive_lock);
return -EBUSY;
}
/* initialize parser */
memset(&parser, 0, sizeof(struct amdgpu_cs_parser));
parser.filp = filp;
parser.adev = adev;
r = amdgpu_cs_parser_init(&parser, data);
if (r) {
DRM_ERROR("Failed to initialize parser !\n");
amdgpu_cs_parser_fini(&parser, r, false);
up_read(&adev->exclusive_lock);
r = amdgpu_cs_handle_lockup(adev, r);
return r;
}
r = amdgpu_cs_parser_relocs(&parser);
r = amdgpu_cs_parser_relocs(parser);
if (r) {
if (r != -ERESTARTSYS) {
if (r == -ENOMEM)
......@@ -762,30 +829,114 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
if (!r) {
reserved_buffers = true;
r = amdgpu_cs_ib_fill(adev, &parser);
r = amdgpu_cs_ib_fill(adev, parser);
}
if (!r) {
r = amdgpu_cs_dependencies(adev, parser);
if (r)
DRM_ERROR("Failed in the dependencies handling %d!\n", r);
}
if (r) {
amdgpu_cs_parser_fini(parser, r, reserved_buffers);
return r;
}
for (i = 0; i < parser->num_ibs; i++)
trace_amdgpu_cs(parser, i);
r = amdgpu_cs_ib_vm_chunk(adev, parser);
return r;
}
static struct amdgpu_ring *amdgpu_cs_parser_get_ring(
struct amdgpu_device *adev,
struct amdgpu_cs_parser *parser)
{
int i, r;
struct amdgpu_cs_chunk *chunk;
struct drm_amdgpu_cs_chunk_ib *chunk_ib;
struct amdgpu_ring *ring;
for (i = 0; i < parser->nchunks; i++) {
chunk = &parser->chunks[i];
chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
continue;
r = amdgpu_cs_get_ring(adev, chunk_ib->ip_type,
chunk_ib->ip_instance, chunk_ib->ring,
&ring);
if (r)
return NULL;
break;
}
return ring;
}
int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
{
struct amdgpu_device *adev = dev->dev_private;
union drm_amdgpu_cs *cs = data;
struct amdgpu_cs_parser *parser;
int r;
if (!r)
r = amdgpu_cs_dependencies(adev, &parser);
down_read(&adev->exclusive_lock);
if (!adev->accel_working) {
up_read(&adev->exclusive_lock);
return -EBUSY;
}
parser = amdgpu_cs_parser_create(adev, filp, NULL, NULL, 0);
if (!parser)
return -ENOMEM;
r = amdgpu_cs_parser_init(parser, data);
if (r) {
amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
DRM_ERROR("Failed to initialize parser !\n");
amdgpu_cs_parser_fini(parser, r, false);
up_read(&adev->exclusive_lock);
r = amdgpu_cs_handle_lockup(adev, r);
return r;
}
for (i = 0; i < parser.num_ibs; i++)
trace_amdgpu_cs(&parser, i);
if (amdgpu_enable_scheduler && parser->num_ibs) {
struct amdgpu_ring * ring =
amdgpu_cs_parser_get_ring(adev, parser);
r = amdgpu_cs_parser_prepare_job(parser);
if (r)
goto out;
parser->ring = ring;
parser->free_job = amdgpu_cs_parser_free_job;
mutex_lock(&parser->job_lock);
r = amd_sched_push_job(ring->scheduler,
&parser->ctx->rings[ring->idx].entity,
parser,
&parser->s_fence);
if (r) {
mutex_unlock(&parser->job_lock);
goto out;
}
parser->ibs[parser->num_ibs - 1].sequence =
amdgpu_ctx_add_fence(parser->ctx, ring,
&parser->s_fence->base,
parser->s_fence->v_seq);
cs->out.handle = parser->s_fence->v_seq;
list_sort(NULL, &parser->validated, cmp_size_smaller_first);
ttm_eu_fence_buffer_objects(&parser->ticket,
&parser->validated,
&parser->s_fence->base);
r = amdgpu_cs_ib_vm_chunk(adev, &parser);
if (r) {
goto out;
mutex_unlock(&parser->job_lock);
up_read(&adev->exclusive_lock);
return 0;
}
r = amdgpu_cs_parser_prepare_job(parser);
if (r)
goto out;
cs->out.handle = parser.ibs[parser.num_ibs - 1].fence->seq;
cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence;
out:
amdgpu_cs_parser_fini(&parser, r, true);
amdgpu_cs_parser_fini(parser, r, true);
up_read(&adev->exclusive_lock);
r = amdgpu_cs_handle_lockup(adev, r);
return r;
......@@ -806,30 +957,29 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
union drm_amdgpu_wait_cs *wait = data;
struct amdgpu_device *adev = dev->dev_private;
unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
struct amdgpu_fence *fence = NULL;
struct amdgpu_ring *ring = NULL;
struct amdgpu_ctx *ctx;
struct fence *fence;
long r;
ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
if (ctx == NULL)
return -EINVAL;
r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance,
wait->in.ring, &ring);
if (r) {
amdgpu_ctx_put(ctx);
if (r)
return r;
}
r = amdgpu_fence_recreate(ring, filp, wait->in.handle, &fence);
if (r) {
amdgpu_ctx_put(ctx);
return r;
}
ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
if (ctx == NULL)
return -EINVAL;
fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle);
if (IS_ERR(fence))
r = PTR_ERR(fence);
else if (fence) {
r = fence_wait_timeout(fence, true, timeout);
fence_put(fence);
} else
r = 1;
r = fence_wait_timeout(&fence->base, true, timeout);
amdgpu_fence_unref(&fence);
amdgpu_ctx_put(ctx);
if (r < 0)
return r;
......@@ -864,7 +1014,16 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
if (!reloc->bo_va)
continue;
list_for_each_entry(mapping, &reloc->bo_va->mappings, list) {
list_for_each_entry(mapping, &reloc->bo_va->valids, list) {
if (mapping->it.start > addr ||
addr > mapping->it.last)
continue;
*bo = reloc->bo_va->bo;
return mapping;
}
list_for_each_entry(mapping, &reloc->bo_va->invalids, list) {
if (mapping->it.start > addr ||
addr > mapping->it.last)
continue;
......
......@@ -25,54 +25,107 @@
#include <drm/drmP.h>
#include "amdgpu.h"
static void amdgpu_ctx_do_release(struct kref *ref)
int amdgpu_ctx_init(struct amdgpu_device *adev, bool kernel,
struct amdgpu_ctx *ctx)
{
struct amdgpu_ctx *ctx;
struct amdgpu_ctx_mgr *mgr;
unsigned i, j;
int r;
ctx = container_of(ref, struct amdgpu_ctx, refcount);
mgr = &ctx->fpriv->ctx_mgr;
memset(ctx, 0, sizeof(*ctx));
ctx->adev = adev;
kref_init(&ctx->refcount);
spin_lock_init(&ctx->ring_lock);
for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
ctx->rings[i].sequence = 1;
idr_remove(&mgr->ctx_handles, ctx->id);
kfree(ctx);
if (amdgpu_enable_scheduler) {
/* create context entity for each ring */
for (i = 0; i < adev->num_rings; i++) {
struct amd_sched_rq *rq;
if (kernel)
rq = &adev->rings[i]->scheduler->kernel_rq;
else
rq = &adev->rings[i]->scheduler->sched_rq;
r = amd_sched_entity_init(adev->rings[i]->scheduler,
&ctx->rings[i].entity,
rq, amdgpu_sched_jobs);
if (r)
break;
}
if (i < adev->num_rings) {
for (j = 0; j < i; j++)
amd_sched_entity_fini(adev->rings[j]->scheduler,
&ctx->rings[j].entity);
kfree(ctx);
return r;
}
}
return 0;
}
int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, uint32_t *id, uint32_t flags)
void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
{
struct amdgpu_device *adev = ctx->adev;
unsigned i, j;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
for (j = 0; j < AMDGPU_CTX_MAX_CS_PENDING; ++j)
fence_put(ctx->rings[i].fences[j]);
if (amdgpu_enable_scheduler) {
for (i = 0; i < adev->num_rings; i++)
amd_sched_entity_fini(adev->rings[i]->scheduler,
&ctx->rings[i].entity);
}
}
static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv,
uint32_t *id)
{
int r;
struct amdgpu_ctx *ctx;
struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
struct amdgpu_ctx *ctx;
int r;
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
mutex_lock(&mgr->lock);
r = idr_alloc(&mgr->ctx_handles, ctx, 0, 0, GFP_KERNEL);
r = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL);
if (r < 0) {
mutex_unlock(&mgr->lock);
kfree(ctx);
return r;
}
*id = (uint32_t)r;
memset(ctx, 0, sizeof(*ctx));
ctx->id = *id;
ctx->fpriv = fpriv;
kref_init(&ctx->refcount);
r = amdgpu_ctx_init(adev, false, ctx);
mutex_unlock(&mgr->lock);
return 0;
return r;
}
int amdgpu_ctx_free(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, uint32_t id)
static void amdgpu_ctx_do_release(struct kref *ref)
{
struct amdgpu_ctx *ctx;
ctx = container_of(ref, struct amdgpu_ctx, refcount);
amdgpu_ctx_fini(ctx);
kfree(ctx);
}
static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id)
{
struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
struct amdgpu_ctx *ctx;
mutex_lock(&mgr->lock);
ctx = idr_find(&mgr->ctx_handles, id);
if (ctx) {
idr_remove(&mgr->ctx_handles, id);
kref_put(&ctx->refcount, amdgpu_ctx_do_release);
mutex_unlock(&mgr->lock);
return 0;
......@@ -86,9 +139,13 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev,
union drm_amdgpu_ctx_out *out)
{
struct amdgpu_ctx *ctx;
struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
struct amdgpu_ctx_mgr *mgr;
unsigned reset_counter;
if (!fpriv)
return -EINVAL;
mgr = &fpriv->ctx_mgr;
mutex_lock(&mgr->lock);
ctx = idr_find(&mgr->ctx_handles, id);
if (!ctx) {
......@@ -97,8 +154,8 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev,
}
/* TODO: these two are always zero */
out->state.flags = ctx->state.flags;
out->state.hangs = ctx->state.hangs;
out->state.flags = 0x0;
out->state.hangs = 0x0;
/* determine if a GPU reset has occured since the last call */
reset_counter = atomic_read(&adev->gpu_reset_counter);
......@@ -113,28 +170,11 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev,
return 0;
}
void amdgpu_ctx_fini(struct amdgpu_fpriv *fpriv)
{
struct idr *idp;
struct amdgpu_ctx *ctx;
uint32_t id;
struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
idp = &mgr->ctx_handles;
idr_for_each_entry(idp,ctx,id) {
if (kref_put(&ctx->refcount, amdgpu_ctx_do_release) != 1)
DRM_ERROR("ctx (id=%ul) is still alive\n",ctx->id);
}
mutex_destroy(&mgr->lock);
}
int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
int r;
uint32_t id;
uint32_t flags;
union drm_amdgpu_ctx *args = data;
struct amdgpu_device *adev = dev->dev_private;
......@@ -142,15 +182,14 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
r = 0;
id = args->in.ctx_id;
flags = args->in.flags;
switch (args->in.op) {
case AMDGPU_CTX_OP_ALLOC_CTX:
r = amdgpu_ctx_alloc(adev, fpriv, &id, flags);
r = amdgpu_ctx_alloc(adev, fpriv, &id);
args->out.alloc.ctx_id = id;
break;
case AMDGPU_CTX_OP_FREE_CTX:
r = amdgpu_ctx_free(adev, fpriv, id);
r = amdgpu_ctx_free(fpriv, id);
break;
case AMDGPU_CTX_OP_QUERY_STATE:
r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
......@@ -165,7 +204,12 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id)
{
struct amdgpu_ctx *ctx;
struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
struct amdgpu_ctx_mgr *mgr;
if (!fpriv)
return NULL;
mgr = &fpriv->ctx_mgr;
mutex_lock(&mgr->lock);
ctx = idr_find(&mgr->ctx_handles, id);
......@@ -177,17 +221,96 @@ struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id)
int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
{
struct amdgpu_fpriv *fpriv;
struct amdgpu_ctx_mgr *mgr;
if (ctx == NULL)
return -EINVAL;
fpriv = ctx->fpriv;
mgr = &fpriv->ctx_mgr;
mutex_lock(&mgr->lock);
kref_put(&ctx->refcount, amdgpu_ctx_do_release);
mutex_unlock(&mgr->lock);
return 0;
}
uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
struct fence *fence, uint64_t queued_seq)
{
struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
uint64_t seq = 0;
unsigned idx = 0;
struct fence *other = NULL;
if (amdgpu_enable_scheduler)
seq = queued_seq;
else
seq = cring->sequence;
idx = seq % AMDGPU_CTX_MAX_CS_PENDING;
other = cring->fences[idx];
if (other) {
signed long r;
r = fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
if (r < 0)
DRM_ERROR("Error (%ld) waiting for fence!\n", r);
}
fence_get(fence);
spin_lock(&ctx->ring_lock);
cring->fences[idx] = fence;
if (!amdgpu_enable_scheduler)
cring->sequence++;
spin_unlock(&ctx->ring_lock);
fence_put(other);
return seq;
}
struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
struct amdgpu_ring *ring, uint64_t seq)
{
struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
struct fence *fence;
uint64_t queued_seq;
spin_lock(&ctx->ring_lock);
if (amdgpu_enable_scheduler)
queued_seq = amd_sched_next_queued_seq(&cring->entity);
else
queued_seq = cring->sequence;
if (seq >= queued_seq) {
spin_unlock(&ctx->ring_lock);
return ERR_PTR(-EINVAL);
}
if (seq + AMDGPU_CTX_MAX_CS_PENDING < queued_seq) {
spin_unlock(&ctx->ring_lock);
return NULL;
}
fence = fence_get(cring->fences[seq % AMDGPU_CTX_MAX_CS_PENDING]);
spin_unlock(&ctx->ring_lock);
return fence;
}
void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
{
mutex_init(&mgr->lock);
idr_init(&mgr->ctx_handles);
}
void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
{
struct amdgpu_ctx *ctx;
struct idr *idp;
uint32_t id;
idp = &mgr->ctx_handles;
idr_for_each_entry(idp, ctx, id) {
if (kref_put(&ctx->refcount, amdgpu_ctx_do_release) != 1)
DRM_ERROR("ctx %p is still alive\n", ctx);
}
idr_destroy(&mgr->ctx_handles);
mutex_destroy(&mgr->lock);
}
......@@ -55,6 +55,7 @@ static const char *amdgpu_asic_name[] = {
"MULLINS",
"TOPAZ",
"TONGA",
"FIJI",
"CARRIZO",
"LAST",
};
......@@ -63,7 +64,7 @@ bool amdgpu_device_is_px(struct drm_device *dev)
{
struct amdgpu_device *adev = dev->dev_private;
if (adev->flags & AMDGPU_IS_PX)
if (adev->flags & AMD_IS_PX)
return true;
return false;
}
......@@ -1160,6 +1161,7 @@ static int amdgpu_early_init(struct amdgpu_device *adev)
switch (adev->asic_type) {
case CHIP_TOPAZ:
case CHIP_TONGA:
case CHIP_FIJI:
case CHIP_CARRIZO:
if (adev->asic_type == CHIP_CARRIZO)
adev->family = AMDGPU_FAMILY_CZ;
......@@ -1377,7 +1379,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->ddev = ddev;
adev->pdev = pdev;
adev->flags = flags;
adev->asic_type = flags & AMDGPU_ASIC_MASK;
adev->asic_type = flags & AMD_ASIC_MASK;
adev->is_atom_bios = false;
adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
adev->mc.gtt_size = 512 * 1024 * 1024;
......@@ -1523,6 +1525,11 @@ int amdgpu_device_init(struct amdgpu_device *adev,
return r;
}
r = amdgpu_ctx_init(adev, true, &adev->kernel_ctx);
if (r) {
dev_err(adev->dev, "failed to create kernel context (%d).\n", r);
return r;
}
r = amdgpu_ib_ring_tests(adev);
if (r)
DRM_ERROR("ib ring test failed (%d).\n", r);
......@@ -1584,6 +1591,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
adev->shutdown = true;
/* evict vram memory */
amdgpu_bo_evict_vram(adev);
amdgpu_ctx_fini(&adev->kernel_ctx);
amdgpu_ib_pool_fini(adev);
amdgpu_fence_driver_fini(adev);
amdgpu_fbdev_fini(adev);
......@@ -1627,8 +1635,7 @@ int amdgpu_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon)
struct amdgpu_device *adev;
struct drm_crtc *crtc;
struct drm_connector *connector;
int i, r;
bool force_completion = false;
int r;
if (dev == NULL || dev->dev_private == NULL) {
return -ENODEV;
......@@ -1667,21 +1674,7 @@ int amdgpu_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon)
/* evict vram memory */
amdgpu_bo_evict_vram(adev);
/* wait for gpu to finish processing current batch */
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
if (!ring)
continue;
r = amdgpu_fence_wait_empty(ring);
if (r) {
/* delay GPU reset to resume */
force_completion = true;
}
}
if (force_completion) {
amdgpu_fence_driver_force_completion(adev);
}
amdgpu_fence_driver_suspend(adev);
r = amdgpu_suspend(adev);
......@@ -1739,6 +1732,8 @@ int amdgpu_resume_kms(struct drm_device *dev, bool resume, bool fbcon)
r = amdgpu_resume(adev);
amdgpu_fence_driver_resume(adev);
r = amdgpu_ib_ring_tests(adev);
if (r)
DRM_ERROR("ib ring test failed (%d).\n", r);
......
......@@ -35,6 +35,36 @@
#include <drm/drm_crtc_helper.h>
#include <drm/drm_edid.h>
static void amdgpu_flip_wait_fence(struct amdgpu_device *adev,
struct fence **f)
{
struct amdgpu_fence *fence;
long r;
if (*f == NULL)
return;
fence = to_amdgpu_fence(*f);
if (fence) {
r = fence_wait(&fence->base, false);
if (r == -EDEADLK) {
up_read(&adev->exclusive_lock);
r = amdgpu_gpu_reset(adev);
down_read(&adev->exclusive_lock);
}
} else
r = fence_wait(*f, false);
if (r)
DRM_ERROR("failed to wait on page flip fence (%ld)!\n", r);
/* We continue with the page flip even if we failed to wait on
* the fence, otherwise the DRM core and userspace will be
* confused about which BO the CRTC is scanning out
*/
fence_put(*f);
*f = NULL;
}
static void amdgpu_flip_work_func(struct work_struct *__work)
{
......@@ -44,34 +74,13 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
struct amdgpu_crtc *amdgpuCrtc = adev->mode_info.crtcs[work->crtc_id];
struct drm_crtc *crtc = &amdgpuCrtc->base;
struct amdgpu_fence *fence;
unsigned long flags;
int r;
unsigned i;
down_read(&adev->exclusive_lock);
if (work->fence) {
fence = to_amdgpu_fence(work->fence);
if (fence) {
r = amdgpu_fence_wait(fence, false);
if (r == -EDEADLK) {
up_read(&adev->exclusive_lock);
r = amdgpu_gpu_reset(adev);
down_read(&adev->exclusive_lock);
}
} else
r = fence_wait(work->fence, false);
if (r)
DRM_ERROR("failed to wait on page flip fence (%d)!\n", r);
/* We continue with the page flip even if we failed to wait on
* the fence, otherwise the DRM core and userspace will be
* confused about which BO the CRTC is scanning out
*/
fence_put(work->fence);
work->fence = NULL;
}
amdgpu_flip_wait_fence(adev, &work->excl);
for (i = 0; i < work->shared_count; ++i)
amdgpu_flip_wait_fence(adev, &work->shared[i]);
/* We borrow the event spin lock for protecting flip_status */
spin_lock_irqsave(&crtc->dev->event_lock, flags);
......@@ -108,6 +117,7 @@ static void amdgpu_unpin_work_func(struct work_struct *__work)
DRM_ERROR("failed to reserve buffer after flip\n");
drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base);
kfree(work->shared);
kfree(work);
}
......@@ -127,7 +137,7 @@ int amdgpu_crtc_page_flip(struct drm_crtc *crtc,
unsigned long flags;
u64 tiling_flags;
u64 base;
int r;
int i, r;
work = kzalloc(sizeof *work, GFP_KERNEL);
if (work == NULL)
......@@ -167,7 +177,19 @@ int amdgpu_crtc_page_flip(struct drm_crtc *crtc,
goto cleanup;
}
work->fence = fence_get(reservation_object_get_excl(new_rbo->tbo.resv));
r = reservation_object_get_fences_rcu(new_rbo->tbo.resv, &work->excl,
&work->shared_count,
&work->shared);
if (unlikely(r != 0)) {
amdgpu_bo_unreserve(new_rbo);
DRM_ERROR("failed to get fences for buffer\n");
goto cleanup;
}
fence_get(work->excl);
for (i = 0; i < work->shared_count; ++i)
fence_get(work->shared[i]);
amdgpu_bo_get_tiling_flags(new_rbo, &tiling_flags);
amdgpu_bo_unreserve(new_rbo);
......@@ -212,7 +234,10 @@ int amdgpu_crtc_page_flip(struct drm_crtc *crtc,
cleanup:
drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base);
fence_put(work->fence);
fence_put(work->excl);
for (i = 0; i < work->shared_count; ++i)
fence_put(work->shared[i]);
kfree(work->shared);
kfree(work);
return r;
......
......@@ -63,7 +63,7 @@ int amdgpu_disp_priority = 0;
int amdgpu_hw_i2c = 0;
int amdgpu_pcie_gen2 = -1;
int amdgpu_msi = -1;
int amdgpu_lockup_timeout = 10000;
int amdgpu_lockup_timeout = 0;
int amdgpu_dpm = -1;
int amdgpu_smc_load_fw = 1;
int amdgpu_aspm = -1;
......@@ -75,6 +75,9 @@ int amdgpu_deep_color = 0;
int amdgpu_vm_size = 8;
int amdgpu_vm_block_size = -1;
int amdgpu_exp_hw_support = 0;
int amdgpu_enable_scheduler = 0;
int amdgpu_sched_jobs = 16;
int amdgpu_sched_hw_submission = 2;
MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
......@@ -103,7 +106,7 @@ module_param_named(pcie_gen2, amdgpu_pcie_gen2, int, 0444);
MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)");
module_param_named(msi, amdgpu_msi, int, 0444);
MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (defaul 10000 = 10 seconds, 0 = disable)");
MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default 0 = disable)");
module_param_named(lockup_timeout, amdgpu_lockup_timeout, int, 0444);
MODULE_PARM_DESC(dpm, "DPM support (1 = enable, 0 = disable, -1 = auto)");
......@@ -139,36 +142,45 @@ module_param_named(vm_block_size, amdgpu_vm_block_size, int, 0444);
MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))");
module_param_named(exp_hw_support, amdgpu_exp_hw_support, int, 0444);
MODULE_PARM_DESC(enable_scheduler, "enable SW GPU scheduler (1 = enable, 0 = disable ((default))");
module_param_named(enable_scheduler, amdgpu_enable_scheduler, int, 0444);
MODULE_PARM_DESC(sched_jobs, "the max number of jobs supported in the sw queue (default 16)");
module_param_named(sched_jobs, amdgpu_sched_jobs, int, 0444);
MODULE_PARM_DESC(sched_hw_submission, "the max number of HW submissions (default 2)");
module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444);
static struct pci_device_id pciidlist[] = {
#ifdef CONFIG_DRM_AMDGPU_CIK
/* Kaveri */
{0x1002, 0x1304, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMDGPU_IS_MOBILITY|AMDGPU_IS_APU},
{0x1002, 0x1305, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMDGPU_IS_APU},
{0x1002, 0x1306, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMDGPU_IS_MOBILITY|AMDGPU_IS_APU},
{0x1002, 0x1307, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMDGPU_IS_APU},
{0x1002, 0x1309, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMDGPU_IS_MOBILITY|AMDGPU_IS_APU},
{0x1002, 0x130A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMDGPU_IS_MOBILITY|AMDGPU_IS_APU},
{0x1002, 0x130B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMDGPU_IS_MOBILITY|AMDGPU_IS_APU},
{0x1002, 0x130C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMDGPU_IS_MOBILITY|AMDGPU_IS_APU},
{0x1002, 0x130D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMDGPU_IS_MOBILITY|AMDGPU_IS_APU},
{0x1002, 0x130E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMDGPU_IS_MOBILITY|AMDGPU_IS_APU},
{0x1002, 0x130F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMDGPU_IS_APU},
{0x1002, 0x1310, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMDGPU_IS_APU},
{0x1002, 0x1311, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMDGPU_IS_APU},
{0x1002, 0x1312, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMDGPU_IS_APU},
{0x1002, 0x1313, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMDGPU_IS_APU},
{0x1002, 0x1315, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMDGPU_IS_APU},
{0x1002, 0x1316, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMDGPU_IS_APU},
{0x1002, 0x1317, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMDGPU_IS_MOBILITY|AMDGPU_IS_APU},
{0x1002, 0x1318, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMDGPU_IS_MOBILITY|AMDGPU_IS_APU},
{0x1002, 0x131B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMDGPU_IS_APU},
{0x1002, 0x131C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMDGPU_IS_APU},
{0x1002, 0x131D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMDGPU_IS_APU},
{0x1002, 0x1304, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x1305, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
{0x1002, 0x1306, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x1307, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
{0x1002, 0x1309, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x130A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x130B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x130C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x130D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x130E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x130F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
{0x1002, 0x1310, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
{0x1002, 0x1311, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
{0x1002, 0x1312, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
{0x1002, 0x1313, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
{0x1002, 0x1315, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
{0x1002, 0x1316, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
{0x1002, 0x1317, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x1318, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x131B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
{0x1002, 0x131C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
{0x1002, 0x131D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
/* Bonaire */
{0x1002, 0x6640, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE|AMDGPU_IS_MOBILITY},
{0x1002, 0x6641, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE|AMDGPU_IS_MOBILITY},
{0x1002, 0x6646, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE|AMDGPU_IS_MOBILITY},
{0x1002, 0x6647, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE|AMDGPU_IS_MOBILITY},
{0x1002, 0x6640, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE|AMD_IS_MOBILITY},
{0x1002, 0x6641, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE|AMD_IS_MOBILITY},
{0x1002, 0x6646, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE|AMD_IS_MOBILITY},
{0x1002, 0x6647, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE|AMD_IS_MOBILITY},
{0x1002, 0x6649, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE},
{0x1002, 0x6650, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE},
{0x1002, 0x6651, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE},
......@@ -190,39 +202,39 @@ static struct pci_device_id pciidlist[] = {
{0x1002, 0x67BA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAWAII},
{0x1002, 0x67BE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAWAII},
/* Kabini */
{0x1002, 0x9830, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMDGPU_IS_MOBILITY|AMDGPU_IS_APU},
{0x1002, 0x9831, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMDGPU_IS_APU},
{0x1002, 0x9832, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMDGPU_IS_MOBILITY|AMDGPU_IS_APU},
{0x1002, 0x9833, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMDGPU_IS_APU},
{0x1002, 0x9834, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMDGPU_IS_MOBILITY|AMDGPU_IS_APU},
{0x1002, 0x9835, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMDGPU_IS_APU},
{0x1002, 0x9836, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMDGPU_IS_MOBILITY|AMDGPU_IS_APU},
{0x1002, 0x9837, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMDGPU_IS_APU},
{0x1002, 0x9838, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMDGPU_IS_MOBILITY|AMDGPU_IS_APU},
{0x1002, 0x9839, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMDGPU_IS_MOBILITY|AMDGPU_IS_APU},
{0x1002, 0x983a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMDGPU_IS_APU},
{0x1002, 0x983b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMDGPU_IS_MOBILITY|AMDGPU_IS_APU},
{0x1002, 0x983c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMDGPU_IS_APU},
{0x1002, 0x983d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMDGPU_IS_APU},
{0x1002, 0x983e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMDGPU_IS_APU},
{0x1002, 0x983f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMDGPU_IS_APU},
{0x1002, 0x9830, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x9831, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_APU},
{0x1002, 0x9832, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x9833, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_APU},
{0x1002, 0x9834, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x9835, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_APU},
{0x1002, 0x9836, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x9837, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_APU},
{0x1002, 0x9838, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x9839, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x983a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_APU},
{0x1002, 0x983b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x983c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_APU},
{0x1002, 0x983d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_APU},
{0x1002, 0x983e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_APU},
{0x1002, 0x983f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_APU},
/* mullins */
{0x1002, 0x9850, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMDGPU_IS_MOBILITY|AMDGPU_IS_APU},
{0x1002, 0x9851, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMDGPU_IS_MOBILITY|AMDGPU_IS_APU},
{0x1002, 0x9852, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMDGPU_IS_MOBILITY|AMDGPU_IS_APU},
{0x1002, 0x9853, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMDGPU_IS_MOBILITY|AMDGPU_IS_APU},
{0x1002, 0x9854, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMDGPU_IS_MOBILITY|AMDGPU_IS_APU},
{0x1002, 0x9855, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMDGPU_IS_MOBILITY|AMDGPU_IS_APU},
{0x1002, 0x9856, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMDGPU_IS_MOBILITY|AMDGPU_IS_APU},
{0x1002, 0x9857, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMDGPU_IS_MOBILITY|AMDGPU_IS_APU},
{0x1002, 0x9858, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMDGPU_IS_MOBILITY|AMDGPU_IS_APU},
{0x1002, 0x9859, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMDGPU_IS_MOBILITY|AMDGPU_IS_APU},
{0x1002, 0x985A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMDGPU_IS_MOBILITY|AMDGPU_IS_APU},
{0x1002, 0x985B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMDGPU_IS_MOBILITY|AMDGPU_IS_APU},
{0x1002, 0x985C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMDGPU_IS_MOBILITY|AMDGPU_IS_APU},
{0x1002, 0x985D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMDGPU_IS_MOBILITY|AMDGPU_IS_APU},
{0x1002, 0x985E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMDGPU_IS_MOBILITY|AMDGPU_IS_APU},
{0x1002, 0x985F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMDGPU_IS_MOBILITY|AMDGPU_IS_APU},
{0x1002, 0x9850, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x9851, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x9852, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x9853, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x9854, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x9855, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x9856, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x9857, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x9858, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x9859, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x985A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x985B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x985C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x985D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x985E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x985F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
#endif
/* topaz */
{0x1002, 0x6900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
......@@ -240,12 +252,14 @@ static struct pci_device_id pciidlist[] = {
{0x1002, 0x6930, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA},
{0x1002, 0x6938, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA},
{0x1002, 0x6939, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA},
/* fiji */
{0x1002, 0x7300, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_FIJI},
/* carrizo */
{0x1002, 0x9870, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CARRIZO|AMDGPU_IS_APU},
{0x1002, 0x9874, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CARRIZO|AMDGPU_IS_APU},
{0x1002, 0x9875, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CARRIZO|AMDGPU_IS_APU},
{0x1002, 0x9876, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CARRIZO|AMDGPU_IS_APU},
{0x1002, 0x9877, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CARRIZO|AMDGPU_IS_APU},
{0x1002, 0x9870, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CARRIZO|AMD_IS_APU},
{0x1002, 0x9874, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CARRIZO|AMD_IS_APU},
{0x1002, 0x9875, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CARRIZO|AMD_IS_APU},
{0x1002, 0x9876, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CARRIZO|AMD_IS_APU},
{0x1002, 0x9877, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CARRIZO|AMD_IS_APU},
{0, 0, 0}
};
......@@ -281,7 +295,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
unsigned long flags = ent->driver_data;
int ret;
if ((flags & AMDGPU_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) {
if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) {
DRM_INFO("This hardware requires experimental hardware support.\n"
"See modparam exp_hw_support\n");
return -ENODEV;
......
......@@ -31,7 +31,7 @@
#include <linux/firmware.h>
#include <linux/platform_device.h>
#include "amdgpu_family.h"
#include "amd_shared.h"
/* General customization:
*/
......
......@@ -88,6 +88,7 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm,
ib->fence = NULL;
ib->user = NULL;
ib->vm = vm;
ib->ctx = NULL;
ib->gds_base = 0;
ib->gds_size = 0;
ib->gws_base = 0;
......@@ -142,6 +143,7 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
struct amdgpu_ring *ring;
struct amdgpu_ctx *ctx, *old_ctx;
struct amdgpu_vm *vm;
uint64_t sequence;
unsigned i;
int r = 0;
......@@ -165,9 +167,11 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
if (vm) {
/* grab a vm id if necessary */
struct amdgpu_fence *vm_id_fence = NULL;
vm_id_fence = amdgpu_vm_grab_id(ibs->ring, ibs->vm);
amdgpu_sync_fence(&ibs->sync, vm_id_fence);
r = amdgpu_vm_grab_id(ibs->vm, ibs->ring, &ibs->sync);
if (r) {
amdgpu_ring_unlock_undo(ring);
return r;
}
}
r = amdgpu_sync_rings(&ibs->sync, ring);
......@@ -212,11 +216,18 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
return r;
}
sequence = amdgpu_enable_scheduler ? ib->sequence : 0;
if (!amdgpu_enable_scheduler && ib->ctx)
ib->sequence = amdgpu_ctx_add_fence(ib->ctx, ring,
&ib->fence->base,
sequence);
/* wrap the last IB with fence */
if (ib->user) {
uint64_t addr = amdgpu_bo_gpu_offset(ib->user->bo);
addr += ib->user->offset;
amdgpu_ring_emit_fence(ring, addr, ib->fence->seq,
amdgpu_ring_emit_fence(ring, addr, ib->sequence,
AMDGPU_FENCE_FLAG_64BIT);
}
......
......@@ -206,6 +206,8 @@ int amdgpu_ih_process(struct amdgpu_device *adev)
amdgpu_amdkfd_interrupt(adev,
(const void *) &adev->irq.ih.ring[ring_index]);
entry.iv_entry = (const uint32_t *)
&adev->irq.ih.ring[ring_index];
amdgpu_ih_decode_iv(adev, &entry);
adev->irq.ih.rptr &= adev->irq.ih.ptr_mask;
......
......@@ -52,6 +52,7 @@ struct amdgpu_iv_entry {
unsigned ring_id;
unsigned vm_id;
unsigned pas_id;
const uint32_t *iv_entry;
};
int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size,
......
......@@ -272,6 +272,11 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
kfree(src->enabled_types);
src->enabled_types = NULL;
if (src->data) {
kfree(src->data);
kfree(src);
adev->irq.sources[i] = NULL;
}
}
}
......
......@@ -40,6 +40,7 @@ struct amdgpu_irq_src {
unsigned num_types;
atomic_t *enabled_types;
const struct amdgpu_irq_src_funcs *funcs;
void *data;
};
/* provided by interrupt generating IP blocks */
......
......@@ -96,8 +96,8 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
if ((amdgpu_runtime_pm != 0) &&
amdgpu_has_atpx() &&
((flags & AMDGPU_IS_APU) == 0))
flags |= AMDGPU_IS_PX;
((flags & AMD_IS_APU) == 0))
flags |= AMD_IS_PX;
/* amdgpu_device_init should report only fatal error
* like memory allocation failure or iomapping failure,
......@@ -451,11 +451,11 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
dev_info.num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts;
dev_info._pad = 0;
dev_info.ids_flags = 0;
if (adev->flags & AMDGPU_IS_APU)
if (adev->flags & AMD_IS_APU)
dev_info.ids_flags |= AMDGPU_IDS_FLAGS_FUSION;
dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE;
dev_info.virtual_address_max = (uint64_t)adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
dev_info.virtual_address_alignment = max(PAGE_SIZE, 0x10000UL);
dev_info.virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE);
dev_info.pte_fragment_size = (1 << AMDGPU_LOG2_PAGES_PER_FRAG) *
AMDGPU_GPU_PAGE_SIZE;
dev_info.gart_page_size = AMDGPU_GPU_PAGE_SIZE;
......@@ -527,10 +527,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
mutex_init(&fpriv->bo_list_lock);
idr_init(&fpriv->bo_list_handles);
/* init context manager */
mutex_init(&fpriv->ctx_mgr.lock);
idr_init(&fpriv->ctx_mgr.ctx_handles);
fpriv->ctx_mgr.adev = adev;
amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
file_priv->driver_priv = fpriv;
......@@ -571,8 +568,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
idr_destroy(&fpriv->bo_list_handles);
mutex_destroy(&fpriv->bo_list_lock);
/* release context */
amdgpu_ctx_fini(fpriv);
amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
kfree(fpriv);
file_priv->driver_priv = NULL;
......
......@@ -223,18 +223,6 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
size_t acc_size;
int r;
/* VI has a hw bug where VM PTEs have to be allocated in groups of 8.
* do this as a temporary workaround
*/
if (!(domain & (AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA))) {
if (adev->asic_type >= CHIP_TOPAZ) {
if (byte_align & 0x7fff)
byte_align = ALIGN(byte_align, 0x8000);
if (size & 0x7fff)
size = ALIGN(size, 0x8000);
}
}
page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT;
size = ALIGN(size, PAGE_SIZE);
......@@ -462,7 +450,7 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo)
int amdgpu_bo_evict_vram(struct amdgpu_device *adev)
{
/* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */
if (0 && (adev->flags & AMDGPU_IS_APU)) {
if (0 && (adev->flags & AMD_IS_APU)) {
/* Useless to evict on IGP chips */
return 0;
}
......@@ -478,7 +466,6 @@ void amdgpu_bo_force_delete(struct amdgpu_device *adev)
}
dev_err(adev->dev, "Userspace still has active objects !\n");
list_for_each_entry_safe(bo, n, &adev->gem.objects, list) {
mutex_lock(&adev->ddev->struct_mutex);
dev_err(adev->dev, "%p %p %lu %lu force free\n",
&bo->gem_base, bo, (unsigned long)bo->gem_base.size,
*((unsigned long *)&bo->gem_base.refcount));
......@@ -486,8 +473,7 @@ void amdgpu_bo_force_delete(struct amdgpu_device *adev)
list_del_init(&bo->list);
mutex_unlock(&bo->adev->gem.mutex);
/* this should unref the ttm bo */
drm_gem_object_unreference(&bo->gem_base);
mutex_unlock(&adev->ddev->struct_mutex);
drm_gem_object_unreference_unlocked(&bo->gem_base);
}
}
......@@ -658,13 +644,13 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
* @shared: true if fence should be added shared
*
*/
void amdgpu_bo_fence(struct amdgpu_bo *bo, struct amdgpu_fence *fence,
void amdgpu_bo_fence(struct amdgpu_bo *bo, struct fence *fence,
bool shared)
{
struct reservation_object *resv = bo->tbo.resv;
if (shared)
reservation_object_add_shared_fence(resv, &fence->base);
reservation_object_add_shared_fence(resv, fence);
else
reservation_object_add_excl_fence(resv, &fence->base);
reservation_object_add_excl_fence(resv, fence);
}
......@@ -161,7 +161,7 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
struct ttm_mem_reg *new_mem);
int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
void amdgpu_bo_fence(struct amdgpu_bo *bo, struct amdgpu_fence *fence,
void amdgpu_bo_fence(struct amdgpu_bo *bo, struct fence *fence,
bool shared);
/*
......
......@@ -82,7 +82,7 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev,
mutex_unlock(&adev->pm.mutex);
/* Can't set dpm state when the card is off */
if (!(adev->flags & AMDGPU_IS_PX) ||
if (!(adev->flags & AMD_IS_PX) ||
(ddev->switch_power_state == DRM_SWITCH_POWER_ON))
amdgpu_pm_compute_clocks(adev);
fail:
......@@ -538,7 +538,7 @@ static void amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)
/* vce just modifies an existing state so force a change */
if (ps->vce_active != adev->pm.dpm.vce_active)
goto force;
if (adev->flags & AMDGPU_IS_APU) {
if (adev->flags & AMD_IS_APU) {
/* for APUs if the num crtcs changed but state is the same,
* all we need to do is update the display configuration.
*/
......@@ -580,7 +580,6 @@ static void amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)
amdgpu_dpm_print_power_state(adev, adev->pm.dpm.requested_ps);
}
mutex_lock(&adev->ddev->struct_mutex);
mutex_lock(&adev->ring_lock);
/* update whether vce is active */
......@@ -628,7 +627,6 @@ static void amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)
done:
mutex_unlock(&adev->ring_lock);
mutex_unlock(&adev->ddev->struct_mutex);
}
void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)
......
......@@ -342,6 +342,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
amdgpu_fence_driver_init_ring(ring);
}
init_waitqueue_head(&ring->fence_drv.fence_queue);
r = amdgpu_wb_get(adev, &ring->rptr_offs);
if (r) {
dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r);
......@@ -367,7 +369,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
}
ring->next_rptr_gpu_addr = adev->wb.gpu_addr + (ring->next_rptr_offs * 4);
ring->next_rptr_cpu_addr = &adev->wb.wb[ring->next_rptr_offs];
spin_lock_init(&ring->fence_lock);
r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type);
if (r) {
dev_err(adev->dev, "failed initializing fences (%d).\n", r);
......
......@@ -160,7 +160,8 @@ static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager *sa_manager)
sa_bo = list_entry(sa_manager->hole->next, struct amdgpu_sa_bo, olist);
list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist, olist) {
if (sa_bo->fence == NULL || !amdgpu_fence_signaled(sa_bo->fence)) {
if (sa_bo->fence == NULL ||
!fence_is_signaled(&sa_bo->fence->base)) {
return;
}
amdgpu_sa_bo_remove_locked(sa_bo);
......@@ -274,7 +275,7 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
sa_bo = list_first_entry(&sa_manager->flist[i],
struct amdgpu_sa_bo, flist);
if (!amdgpu_fence_signaled(sa_bo->fence)) {
if (!fence_is_signaled(&sa_bo->fence->base)) {
fences[i] = sa_bo->fence;
continue;
}
......@@ -317,6 +318,7 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev,
struct amdgpu_fence *fences[AMDGPU_MAX_RINGS];
unsigned tries[AMDGPU_MAX_RINGS];
int i, r;
signed long t;
BUG_ON(align > sa_manager->align);
BUG_ON(size > sa_manager->size);
......@@ -350,7 +352,8 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev,
} while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries));
spin_unlock(&sa_manager->wq.lock);
r = amdgpu_fence_wait_any(adev, fences, false);
t = amdgpu_fence_wait_any(adev, fences, false, MAX_SCHEDULE_TIMEOUT);
r = (t > 0) ? 0 : t;
spin_lock(&sa_manager->wq.lock);
/* if we have nothing to wait for block */
if (r == -ENOENT) {
......@@ -379,7 +382,7 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
sa_manager = (*sa_bo)->manager;
spin_lock(&sa_manager->wq.lock);
if (fence && !amdgpu_fence_signaled(fence)) {
if (fence && !fence_is_signaled(&fence->base)) {
(*sa_bo)->fence = amdgpu_fence_ref(fence);
list_add_tail(&(*sa_bo)->flist,
&sa_manager->flist[fence->ring->idx]);
......
/*
* Copyright 2015 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*
*/
#include <linux/kthread.h>
#include <linux/wait.h>
#include <linux/sched.h>
#include <drm/drmP.h>
#include "amdgpu.h"
static int amdgpu_sched_prepare_job(struct amd_gpu_scheduler *sched,
struct amd_sched_entity *entity,
struct amd_sched_job *job)
{
int r = 0;
struct amdgpu_cs_parser *sched_job;
if (!job || !job->data) {
DRM_ERROR("job is null\n");
return -EINVAL;
}
sched_job = (struct amdgpu_cs_parser *)job->data;
if (sched_job->prepare_job) {
r = sched_job->prepare_job(sched_job);
if (r) {
DRM_ERROR("Prepare job error\n");
schedule_work(&sched_job->job_work);
}
}
return r;
}
static struct fence *amdgpu_sched_run_job(struct amd_gpu_scheduler *sched,
struct amd_sched_entity *entity,
struct amd_sched_job *job)
{
int r = 0;
struct amdgpu_cs_parser *sched_job;
struct amdgpu_fence *fence;
if (!job || !job->data) {
DRM_ERROR("job is null\n");
return NULL;
}
sched_job = (struct amdgpu_cs_parser *)job->data;
mutex_lock(&sched_job->job_lock);
r = amdgpu_ib_schedule(sched_job->adev,
sched_job->num_ibs,
sched_job->ibs,
sched_job->filp);
if (r)
goto err;
fence = amdgpu_fence_ref(sched_job->ibs[sched_job->num_ibs - 1].fence);
if (sched_job->run_job) {
r = sched_job->run_job(sched_job);
if (r)
goto err;
}
mutex_unlock(&sched_job->job_lock);
return &fence->base;
err:
DRM_ERROR("Run job error\n");
mutex_unlock(&sched_job->job_lock);
schedule_work(&sched_job->job_work);
return NULL;
}
static void amdgpu_sched_process_job(struct amd_gpu_scheduler *sched,
struct amd_sched_job *job)
{
struct amdgpu_cs_parser *sched_job;
if (!job || !job->data) {
DRM_ERROR("job is null\n");
return;
}
sched_job = (struct amdgpu_cs_parser *)job->data;
schedule_work(&sched_job->job_work);
}
struct amd_sched_backend_ops amdgpu_sched_ops = {
.prepare_job = amdgpu_sched_prepare_job,
.run_job = amdgpu_sched_run_job,
.process_job = amdgpu_sched_process_job
};
int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_ib *ibs,
unsigned num_ibs,
int (*free_job)(struct amdgpu_cs_parser *),
void *owner,
struct fence **f)
{
int r = 0;
if (amdgpu_enable_scheduler) {
struct amdgpu_cs_parser *sched_job =
amdgpu_cs_parser_create(adev, owner, &adev->kernel_ctx,
ibs, num_ibs);
if(!sched_job) {
return -ENOMEM;
}
sched_job->free_job = free_job;
mutex_lock(&sched_job->job_lock);
r = amd_sched_push_job(ring->scheduler,
&adev->kernel_ctx.rings[ring->idx].entity,
sched_job, &sched_job->s_fence);
if (r) {
mutex_unlock(&sched_job->job_lock);
kfree(sched_job);
return r;
}
ibs[num_ibs - 1].sequence = sched_job->s_fence->v_seq;
*f = fence_get(&sched_job->s_fence->base);
mutex_unlock(&sched_job->job_lock);
} else {
r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner);
if (r)
return r;
*f = fence_get(&ibs[num_ibs - 1].fence->base);
}
return 0;
}
......@@ -53,20 +53,24 @@ void amdgpu_sync_create(struct amdgpu_sync *sync)
}
/**
* amdgpu_sync_fence - use the semaphore to sync to a fence
* amdgpu_sync_fence - remember to sync to this fence
*
* @sync: sync object to add fence to
* @fence: fence to sync to
*
* Sync to the fence using the semaphore objects
*/
void amdgpu_sync_fence(struct amdgpu_sync *sync,
struct amdgpu_fence *fence)
int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
struct fence *f)
{
struct amdgpu_fence *fence;
struct amdgpu_fence *other;
if (!fence)
return;
if (!f)
return 0;
fence = to_amdgpu_fence(f);
if (!fence || fence->ring->adev != adev)
return fence_wait(f, true);
other = sync->sync_to[fence->ring->idx];
sync->sync_to[fence->ring->idx] = amdgpu_fence_ref(
......@@ -79,6 +83,8 @@ void amdgpu_sync_fence(struct amdgpu_sync *sync,
amdgpu_fence_later(fence, other));
amdgpu_fence_unref(&other);
}
return 0;
}
/**
......@@ -106,11 +112,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
/* always sync to the exclusive fence */
f = reservation_object_get_excl(resv);
fence = f ? to_amdgpu_fence(f) : NULL;
if (fence && fence->ring->adev == adev)
amdgpu_sync_fence(sync, fence);
else if (f)
r = fence_wait(f, true);
r = amdgpu_sync_fence(adev, sync, f);
flist = reservation_object_get_list(resv);
if (!flist || r)
......@@ -121,14 +123,26 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
reservation_object_held(resv));
fence = f ? to_amdgpu_fence(f) : NULL;
if (fence && fence->ring->adev == adev) {
if (fence->owner != owner ||
fence->owner == AMDGPU_FENCE_OWNER_UNDEFINED)
amdgpu_sync_fence(sync, fence);
} else if (f) {
r = fence_wait(f, true);
if (r)
break;
/* VM updates are only interesting
* for other VM updates and moves.
*/
if ((owner != AMDGPU_FENCE_OWNER_MOVE) &&
(fence->owner != AMDGPU_FENCE_OWNER_MOVE) &&
((owner == AMDGPU_FENCE_OWNER_VM) !=
(fence->owner == AMDGPU_FENCE_OWNER_VM)))
continue;
/* Ignore fence from the same owner as
* long as it isn't undefined.
*/
if (owner != AMDGPU_FENCE_OWNER_UNDEFINED &&
fence->owner == owner)
continue;
}
r = amdgpu_sync_fence(adev, sync, f);
if (r)
break;
}
return r;
}
......@@ -164,9 +178,9 @@ int amdgpu_sync_rings(struct amdgpu_sync *sync,
return -EINVAL;
}
if (count >= AMDGPU_NUM_SYNCS) {
if (amdgpu_enable_scheduler || (count >= AMDGPU_NUM_SYNCS)) {
/* not enough room, wait manually */
r = amdgpu_fence_wait(fence, false);
r = fence_wait(&fence->base, false);
if (r)
return r;
continue;
......@@ -186,7 +200,7 @@ int amdgpu_sync_rings(struct amdgpu_sync *sync,
if (!amdgpu_semaphore_emit_signal(other, semaphore)) {
/* signaling wasn't successful wait manually */
amdgpu_ring_undo(other);
r = amdgpu_fence_wait(fence, false);
r = fence_wait(&fence->base, false);
if (r)
return r;
continue;
......@@ -196,7 +210,7 @@ int amdgpu_sync_rings(struct amdgpu_sync *sync,
if (!amdgpu_semaphore_emit_wait(ring, semaphore)) {
/* waiting wasn't successful wait manually */
amdgpu_ring_undo(other);
r = amdgpu_fence_wait(fence, false);
r = fence_wait(&fence->base, false);
if (r)
return r;
continue;
......
......@@ -116,7 +116,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
goto out_lclean_unpin;
}
r = amdgpu_fence_wait(fence, false);
r = fence_wait(&fence->base, false);
if (r) {
DRM_ERROR("Failed to wait for GTT->VRAM fence %d\n", i);
goto out_lclean_unpin;
......@@ -161,7 +161,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
goto out_lclean_unpin;
}
r = amdgpu_fence_wait(fence, false);
r = fence_wait(&fence->base, false);
if (r) {
DRM_ERROR("Failed to wait for VRAM->GTT fence %d\n", i);
goto out_lclean_unpin;
......@@ -238,7 +238,7 @@ void amdgpu_test_moves(struct amdgpu_device *adev)
static int amdgpu_test_create_and_emit_fence(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_fence **fence)
struct fence **fence)
{
uint32_t handle = ring->idx ^ 0xdeafbeef;
int r;
......@@ -269,15 +269,16 @@ static int amdgpu_test_create_and_emit_fence(struct amdgpu_device *adev,
DRM_ERROR("Failed to get dummy destroy msg\n");
return r;
}
} else {
struct amdgpu_fence *a_fence = NULL;
r = amdgpu_ring_lock(ring, 64);
if (r) {
DRM_ERROR("Failed to lock ring A %d\n", ring->idx);
return r;
}
amdgpu_fence_emit(ring, AMDGPU_FENCE_OWNER_UNDEFINED, fence);
amdgpu_fence_emit(ring, AMDGPU_FENCE_OWNER_UNDEFINED, &a_fence);
amdgpu_ring_unlock_commit(ring);
*fence = &a_fence->base;
}
return 0;
}
......@@ -286,7 +287,7 @@ void amdgpu_test_ring_sync(struct amdgpu_device *adev,
struct amdgpu_ring *ringA,
struct amdgpu_ring *ringB)
{
struct amdgpu_fence *fence1 = NULL, *fence2 = NULL;
struct fence *fence1 = NULL, *fence2 = NULL;
struct amdgpu_semaphore *semaphore = NULL;
int r;
......@@ -322,7 +323,7 @@ void amdgpu_test_ring_sync(struct amdgpu_device *adev,
mdelay(1000);
if (amdgpu_fence_signaled(fence1)) {
if (fence_is_signaled(fence1)) {
DRM_ERROR("Fence 1 signaled without waiting for semaphore.\n");
goto out_cleanup;
}
......@@ -335,7 +336,7 @@ void amdgpu_test_ring_sync(struct amdgpu_device *adev,
amdgpu_semaphore_emit_signal(ringB, semaphore);
amdgpu_ring_unlock_commit(ringB);
r = amdgpu_fence_wait(fence1, false);
r = fence_wait(fence1, false);
if (r) {
DRM_ERROR("Failed to wait for sync fence 1\n");
goto out_cleanup;
......@@ -343,7 +344,7 @@ void amdgpu_test_ring_sync(struct amdgpu_device *adev,
mdelay(1000);
if (amdgpu_fence_signaled(fence2)) {
if (fence_is_signaled(fence2)) {
DRM_ERROR("Fence 2 signaled without waiting for semaphore.\n");
goto out_cleanup;
}
......@@ -356,7 +357,7 @@ void amdgpu_test_ring_sync(struct amdgpu_device *adev,
amdgpu_semaphore_emit_signal(ringB, semaphore);
amdgpu_ring_unlock_commit(ringB);
r = amdgpu_fence_wait(fence2, false);
r = fence_wait(fence2, false);
if (r) {
DRM_ERROR("Failed to wait for sync fence 1\n");
goto out_cleanup;
......@@ -366,10 +367,10 @@ void amdgpu_test_ring_sync(struct amdgpu_device *adev,
amdgpu_semaphore_free(adev, &semaphore, NULL);
if (fence1)
amdgpu_fence_unref(&fence1);
fence_put(fence1);
if (fence2)
amdgpu_fence_unref(&fence2);
fence_put(fence2);
if (r)
printk(KERN_WARNING "Error while testing ring sync (%d).\n", r);
......@@ -380,7 +381,7 @@ static void amdgpu_test_ring_sync2(struct amdgpu_device *adev,
struct amdgpu_ring *ringB,
struct amdgpu_ring *ringC)
{
struct amdgpu_fence *fenceA = NULL, *fenceB = NULL;
struct fence *fenceA = NULL, *fenceB = NULL;
struct amdgpu_semaphore *semaphore = NULL;
bool sigA, sigB;
int i, r;
......@@ -416,11 +417,11 @@ static void amdgpu_test_ring_sync2(struct amdgpu_device *adev,
mdelay(1000);
if (amdgpu_fence_signaled(fenceA)) {
if (fence_is_signaled(fenceA)) {
DRM_ERROR("Fence A signaled without waiting for semaphore.\n");
goto out_cleanup;
}
if (amdgpu_fence_signaled(fenceB)) {
if (fence_is_signaled(fenceB)) {
DRM_ERROR("Fence B signaled without waiting for semaphore.\n");
goto out_cleanup;
}
......@@ -435,8 +436,8 @@ static void amdgpu_test_ring_sync2(struct amdgpu_device *adev,
for (i = 0; i < 30; ++i) {
mdelay(100);
sigA = amdgpu_fence_signaled(fenceA);
sigB = amdgpu_fence_signaled(fenceB);
sigA = fence_is_signaled(fenceA);
sigB = fence_is_signaled(fenceB);
if (sigA || sigB)
break;
}
......@@ -461,12 +462,12 @@ static void amdgpu_test_ring_sync2(struct amdgpu_device *adev,
mdelay(1000);
r = amdgpu_fence_wait(fenceA, false);
r = fence_wait(fenceA, false);
if (r) {
DRM_ERROR("Failed to wait for sync fence A\n");
goto out_cleanup;
}
r = amdgpu_fence_wait(fenceB, false);
r = fence_wait(fenceB, false);
if (r) {
DRM_ERROR("Failed to wait for sync fence B\n");
goto out_cleanup;
......@@ -476,10 +477,10 @@ static void amdgpu_test_ring_sync2(struct amdgpu_device *adev,
amdgpu_semaphore_free(adev, &semaphore, NULL);
if (fenceA)
amdgpu_fence_unref(&fenceA);
fence_put(fenceA);
if (fenceB)
amdgpu_fence_unref(&fenceB);
fence_put(fenceB);
if (r)
printk(KERN_WARNING "Error while testing ring sync (%d).\n", r);
......
......@@ -52,6 +52,7 @@
#endif
#define FIRMWARE_TONGA "amdgpu/tonga_uvd.bin"
#define FIRMWARE_CARRIZO "amdgpu/carrizo_uvd.bin"
#define FIRMWARE_FIJI "amdgpu/fiji_uvd.bin"
/**
* amdgpu_uvd_cs_ctx - Command submission parser context
......@@ -81,6 +82,7 @@ MODULE_FIRMWARE(FIRMWARE_MULLINS);
#endif
MODULE_FIRMWARE(FIRMWARE_TONGA);
MODULE_FIRMWARE(FIRMWARE_CARRIZO);
MODULE_FIRMWARE(FIRMWARE_FIJI);
static void amdgpu_uvd_note_usage(struct amdgpu_device *adev);
static void amdgpu_uvd_idle_work_handler(struct work_struct *work);
......@@ -116,6 +118,9 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
case CHIP_TONGA:
fw_name = FIRMWARE_TONGA;
break;
case CHIP_FIJI:
fw_name = FIRMWARE_FIJI;
break;
case CHIP_CARRIZO:
fw_name = FIRMWARE_CARRIZO;
break;
......@@ -283,7 +288,7 @@ void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) {
uint32_t handle = atomic_read(&adev->uvd.handles[i]);
if (handle != 0 && adev->uvd.filp[i] == filp) {
struct amdgpu_fence *fence;
struct fence *fence;
amdgpu_uvd_note_usage(adev);
......@@ -293,8 +298,8 @@ void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
continue;
}
amdgpu_fence_wait(fence, false);
amdgpu_fence_unref(&fence);
fence_wait(fence, false);
fence_put(fence);
adev->uvd.filp[i] = NULL;
atomic_set(&adev->uvd.handles[i], 0);
......@@ -374,7 +379,8 @@ static int amdgpu_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[])
unsigned height_in_mb = ALIGN(height / 16, 2);
unsigned fs_in_mb = width_in_mb * height_in_mb;
unsigned image_size, tmp, min_dpb_size, num_dpb_buffer, min_ctx_size;
unsigned image_size, tmp, min_dpb_size, num_dpb_buffer;
unsigned min_ctx_size = 0;
image_size = width * height;
image_size += image_size / 2;
......@@ -507,28 +513,25 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
{
struct amdgpu_device *adev = ctx->parser->adev;
int32_t *msg, msg_type, handle;
struct fence *f;
void *ptr;
int i, r;
long r;
int i;
if (offset & 0x3F) {
DRM_ERROR("UVD messages must be 64 byte aligned!\n");
return -EINVAL;
}
f = reservation_object_get_excl(bo->tbo.resv);
if (f) {
r = amdgpu_fence_wait((struct amdgpu_fence *)f, false);
if (r) {
DRM_ERROR("Failed waiting for UVD message (%d)!\n", r);
return r;
}
r = reservation_object_wait_timeout_rcu(bo->tbo.resv, true, false,
MAX_SCHEDULE_TIMEOUT);
if (r < 0) {
DRM_ERROR("Failed waiting for UVD message (%ld)!\n", r);
return r;
}
r = amdgpu_bo_kmap(bo, &ptr);
if (r) {
DRM_ERROR("Failed mapping the UVD message (%d)!\n", r);
DRM_ERROR("Failed mapping the UVD message (%ld)!\n", r);
return r;
}
......@@ -803,14 +806,24 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
return 0;
}
static int amdgpu_uvd_free_job(
struct amdgpu_cs_parser *sched_job)
{
amdgpu_ib_free(sched_job->adev, sched_job->ibs);
kfree(sched_job->ibs);
return 0;
}
static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring,
struct amdgpu_bo *bo,
struct amdgpu_fence **fence)
struct fence **fence)
{
struct ttm_validate_buffer tv;
struct ww_acquire_ctx ticket;
struct list_head head;
struct amdgpu_ib ib;
struct amdgpu_ib *ib = NULL;
struct fence *f = NULL;
struct amdgpu_device *adev = ring->adev;
uint64_t addr;
int i, r;
......@@ -832,34 +845,49 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring,
r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
if (r)
goto err;
r = amdgpu_ib_get(ring, NULL, 64, &ib);
if (r)
ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
if (!ib) {
r = -ENOMEM;
goto err;
}
r = amdgpu_ib_get(ring, NULL, 64, ib);
if (r)
goto err1;
addr = amdgpu_bo_gpu_offset(bo);
ib.ptr[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0);
ib.ptr[1] = addr;
ib.ptr[2] = PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0);
ib.ptr[3] = addr >> 32;
ib.ptr[4] = PACKET0(mmUVD_GPCOM_VCPU_CMD, 0);
ib.ptr[5] = 0;
ib->ptr[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0);
ib->ptr[1] = addr;
ib->ptr[2] = PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0);
ib->ptr[3] = addr >> 32;
ib->ptr[4] = PACKET0(mmUVD_GPCOM_VCPU_CMD, 0);
ib->ptr[5] = 0;
for (i = 6; i < 16; ++i)
ib.ptr[i] = PACKET2(0);
ib.length_dw = 16;
ib->ptr[i] = PACKET2(0);
ib->length_dw = 16;
r = amdgpu_ib_schedule(ring->adev, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED);
r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
&amdgpu_uvd_free_job,
AMDGPU_FENCE_OWNER_UNDEFINED,
&f);
if (r)
goto err;
ttm_eu_fence_buffer_objects(&ticket, &head, &ib.fence->base);
goto err2;
if (fence)
*fence = amdgpu_fence_ref(ib.fence);
ttm_eu_fence_buffer_objects(&ticket, &head, f);
amdgpu_ib_free(ring->adev, &ib);
if (fence)
*fence = fence_get(f);
amdgpu_bo_unref(&bo);
return 0;
fence_put(f);
if (amdgpu_enable_scheduler)
return 0;
amdgpu_ib_free(ring->adev, ib);
kfree(ib);
return 0;
err2:
amdgpu_ib_free(ring->adev, ib);
err1:
kfree(ib);
err:
ttm_eu_backoff_reservation(&ticket, &head);
return r;
......@@ -869,7 +897,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring,
crash the vcpu so just try to emmit a dummy create/destroy msg to
avoid this */
int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
struct amdgpu_fence **fence)
struct fence **fence)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_bo *bo;
......@@ -916,7 +944,7 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
}
int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
struct amdgpu_fence **fence)
struct fence **fence)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_bo *bo;
......
......@@ -29,9 +29,9 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev);
int amdgpu_uvd_suspend(struct amdgpu_device *adev);
int amdgpu_uvd_resume(struct amdgpu_device *adev);
int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
struct amdgpu_fence **fence);
struct fence **fence);
int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
struct amdgpu_fence **fence);
struct fence **fence);
void amdgpu_uvd_free_handles(struct amdgpu_device *adev,
struct drm_file *filp);
int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx);
......
......@@ -48,6 +48,7 @@
#endif
#define FIRMWARE_TONGA "amdgpu/tonga_vce.bin"
#define FIRMWARE_CARRIZO "amdgpu/carrizo_vce.bin"
#define FIRMWARE_FIJI "amdgpu/fiji_vce.bin"
#ifdef CONFIG_DRM_AMDGPU_CIK
MODULE_FIRMWARE(FIRMWARE_BONAIRE);
......@@ -58,6 +59,7 @@ MODULE_FIRMWARE(FIRMWARE_MULLINS);
#endif
MODULE_FIRMWARE(FIRMWARE_TONGA);
MODULE_FIRMWARE(FIRMWARE_CARRIZO);
MODULE_FIRMWARE(FIRMWARE_FIJI);
static void amdgpu_vce_idle_work_handler(struct work_struct *work);
......@@ -101,6 +103,9 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
case CHIP_CARRIZO:
fw_name = FIRMWARE_CARRIZO;
break;
case CHIP_FIJI:
fw_name = FIRMWARE_FIJI;
break;
default:
return -EINVAL;
......@@ -334,6 +339,14 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
}
}
static int amdgpu_vce_free_job(
struct amdgpu_cs_parser *sched_job)
{
amdgpu_ib_free(sched_job->adev, sched_job->ibs);
kfree(sched_job->ibs);
return 0;
}
/**
* amdgpu_vce_get_create_msg - generate a VCE create msg
*
......@@ -345,59 +358,69 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
* Open up a stream for HW test
*/
int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
struct amdgpu_fence **fence)
struct fence **fence)
{
const unsigned ib_size_dw = 1024;
struct amdgpu_ib ib;
struct amdgpu_ib *ib = NULL;
struct fence *f = NULL;
struct amdgpu_device *adev = ring->adev;
uint64_t dummy;
int i, r;
r = amdgpu_ib_get(ring, NULL, ib_size_dw * 4, &ib);
ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
if (!ib)
return -ENOMEM;
r = amdgpu_ib_get(ring, NULL, ib_size_dw * 4, ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
kfree(ib);
return r;
}
dummy = ib.gpu_addr + 1024;
dummy = ib->gpu_addr + 1024;
/* stitch together an VCE create msg */
ib.length_dw = 0;
ib.ptr[ib.length_dw++] = 0x0000000c; /* len */
ib.ptr[ib.length_dw++] = 0x00000001; /* session cmd */
ib.ptr[ib.length_dw++] = handle;
ib.ptr[ib.length_dw++] = 0x00000030; /* len */
ib.ptr[ib.length_dw++] = 0x01000001; /* create cmd */
ib.ptr[ib.length_dw++] = 0x00000000;
ib.ptr[ib.length_dw++] = 0x00000042;
ib.ptr[ib.length_dw++] = 0x0000000a;
ib.ptr[ib.length_dw++] = 0x00000001;
ib.ptr[ib.length_dw++] = 0x00000080;
ib.ptr[ib.length_dw++] = 0x00000060;
ib.ptr[ib.length_dw++] = 0x00000100;
ib.ptr[ib.length_dw++] = 0x00000100;
ib.ptr[ib.length_dw++] = 0x0000000c;
ib.ptr[ib.length_dw++] = 0x00000000;
ib.ptr[ib.length_dw++] = 0x00000014; /* len */
ib.ptr[ib.length_dw++] = 0x05000005; /* feedback buffer */
ib.ptr[ib.length_dw++] = upper_32_bits(dummy);
ib.ptr[ib.length_dw++] = dummy;
ib.ptr[ib.length_dw++] = 0x00000001;
for (i = ib.length_dw; i < ib_size_dw; ++i)
ib.ptr[i] = 0x0;
r = amdgpu_ib_schedule(ring->adev, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED);
if (r) {
DRM_ERROR("amdgpu: failed to schedule ib (%d).\n", r);
}
ib->length_dw = 0;
ib->ptr[ib->length_dw++] = 0x0000000c; /* len */
ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */
ib->ptr[ib->length_dw++] = handle;
ib->ptr[ib->length_dw++] = 0x00000030; /* len */
ib->ptr[ib->length_dw++] = 0x01000001; /* create cmd */
ib->ptr[ib->length_dw++] = 0x00000000;
ib->ptr[ib->length_dw++] = 0x00000042;
ib->ptr[ib->length_dw++] = 0x0000000a;
ib->ptr[ib->length_dw++] = 0x00000001;
ib->ptr[ib->length_dw++] = 0x00000080;
ib->ptr[ib->length_dw++] = 0x00000060;
ib->ptr[ib->length_dw++] = 0x00000100;
ib->ptr[ib->length_dw++] = 0x00000100;
ib->ptr[ib->length_dw++] = 0x0000000c;
ib->ptr[ib->length_dw++] = 0x00000000;
ib->ptr[ib->length_dw++] = 0x00000014; /* len */
ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */
ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
ib->ptr[ib->length_dw++] = dummy;
ib->ptr[ib->length_dw++] = 0x00000001;
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
&amdgpu_vce_free_job,
AMDGPU_FENCE_OWNER_UNDEFINED,
&f);
if (r)
goto err;
if (fence)
*fence = amdgpu_fence_ref(ib.fence);
amdgpu_ib_free(ring->adev, &ib);
*fence = fence_get(f);
fence_put(f);
if (amdgpu_enable_scheduler)
return 0;
err:
amdgpu_ib_free(adev, ib);
kfree(ib);
return r;
}
......@@ -412,49 +435,59 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
* Close up a stream for HW test or if userspace failed to do so
*/
int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
struct amdgpu_fence **fence)
struct fence **fence)
{
const unsigned ib_size_dw = 1024;
struct amdgpu_ib ib;
struct amdgpu_ib *ib = NULL;
struct fence *f = NULL;
struct amdgpu_device *adev = ring->adev;
uint64_t dummy;
int i, r;
r = amdgpu_ib_get(ring, NULL, ib_size_dw * 4, &ib);
ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
if (!ib)
return -ENOMEM;
r = amdgpu_ib_get(ring, NULL, ib_size_dw * 4, ib);
if (r) {
kfree(ib);
DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
return r;
}
dummy = ib.gpu_addr + 1024;
dummy = ib->gpu_addr + 1024;
/* stitch together an VCE destroy msg */
ib.length_dw = 0;
ib.ptr[ib.length_dw++] = 0x0000000c; /* len */
ib.ptr[ib.length_dw++] = 0x00000001; /* session cmd */
ib.ptr[ib.length_dw++] = handle;
ib.ptr[ib.length_dw++] = 0x00000014; /* len */
ib.ptr[ib.length_dw++] = 0x05000005; /* feedback buffer */
ib.ptr[ib.length_dw++] = upper_32_bits(dummy);
ib.ptr[ib.length_dw++] = dummy;
ib.ptr[ib.length_dw++] = 0x00000001;
ib.ptr[ib.length_dw++] = 0x00000008; /* len */
ib.ptr[ib.length_dw++] = 0x02000001; /* destroy cmd */
for (i = ib.length_dw; i < ib_size_dw; ++i)
ib.ptr[i] = 0x0;
r = amdgpu_ib_schedule(ring->adev, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED);
if (r) {
DRM_ERROR("amdgpu: failed to schedule ib (%d).\n", r);
}
ib->length_dw = 0;
ib->ptr[ib->length_dw++] = 0x0000000c; /* len */
ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */
ib->ptr[ib->length_dw++] = handle;
ib->ptr[ib->length_dw++] = 0x00000014; /* len */
ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */
ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
ib->ptr[ib->length_dw++] = dummy;
ib->ptr[ib->length_dw++] = 0x00000001;
ib->ptr[ib->length_dw++] = 0x00000008; /* len */
ib->ptr[ib->length_dw++] = 0x02000001; /* destroy cmd */
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
&amdgpu_vce_free_job,
AMDGPU_FENCE_OWNER_UNDEFINED,
&f);
if (r)
goto err;
if (fence)
*fence = amdgpu_fence_ref(ib.fence);
amdgpu_ib_free(ring->adev, &ib);
*fence = fence_get(f);
fence_put(f);
if (amdgpu_enable_scheduler)
return 0;
err:
amdgpu_ib_free(adev, ib);
kfree(ib);
return r;
}
......@@ -800,7 +833,7 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
*/
int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring)
{
struct amdgpu_fence *fence = NULL;
struct fence *fence = NULL;
int r;
r = amdgpu_vce_get_create_msg(ring, 1, NULL);
......@@ -815,13 +848,13 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring)
goto error;
}
r = amdgpu_fence_wait(fence, false);
r = fence_wait(fence, false);
if (r) {
DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
} else {
DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
}
error:
amdgpu_fence_unref(&fence);
fence_put(fence);
return r;
}
......@@ -29,9 +29,9 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev);
int amdgpu_vce_suspend(struct amdgpu_device *adev);
int amdgpu_vce_resume(struct amdgpu_device *adev);
int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
struct amdgpu_fence **fence);
struct fence **fence);
int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
struct amdgpu_fence **fence);
struct fence **fence);
void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
bool amdgpu_vce_ring_emit_semaphore(struct amdgpu_ring *ring,
......
......@@ -127,16 +127,16 @@ struct amdgpu_bo_list_entry *amdgpu_vm_get_bos(struct amdgpu_device *adev,
/**
* amdgpu_vm_grab_id - allocate the next free VMID
*
* @ring: ring we want to submit job to
* @vm: vm to allocate id for
* @ring: ring we want to submit job to
* @sync: sync object where we add dependencies
*
* Allocate an id for the vm (cayman+).
* Returns the fence we need to sync to (if any).
* Allocate an id for the vm, adding fences to the sync obj as necessary.
*
* Global and local mutex must be locked!
* Global mutex must be locked!
*/
struct amdgpu_fence *amdgpu_vm_grab_id(struct amdgpu_ring *ring,
struct amdgpu_vm *vm)
int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
struct amdgpu_sync *sync)
{
struct amdgpu_fence *best[AMDGPU_MAX_RINGS] = {};
struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx];
......@@ -148,7 +148,7 @@ struct amdgpu_fence *amdgpu_vm_grab_id(struct amdgpu_ring *ring,
/* check if the id is still valid */
if (vm_id->id && vm_id->last_id_use &&
vm_id->last_id_use == adev->vm_manager.active[vm_id->id])
return NULL;
return 0;
/* we definately need to flush */
vm_id->pd_gpu_addr = ~0ll;
......@@ -161,7 +161,7 @@ struct amdgpu_fence *amdgpu_vm_grab_id(struct amdgpu_ring *ring,
/* found a free one */
vm_id->id = i;
trace_amdgpu_vm_grab_id(i, ring->idx);
return NULL;
return 0;
}
if (amdgpu_fence_is_earlier(fence, best[fence->ring->idx])) {
......@@ -172,15 +172,19 @@ struct amdgpu_fence *amdgpu_vm_grab_id(struct amdgpu_ring *ring,
for (i = 0; i < 2; ++i) {
if (choices[i]) {
struct amdgpu_fence *fence;
fence = adev->vm_manager.active[choices[i]];
vm_id->id = choices[i];
trace_amdgpu_vm_grab_id(choices[i], ring->idx);
return adev->vm_manager.active[choices[i]];
return amdgpu_sync_fence(ring->adev, sync, &fence->base);
}
}
/* should never happen */
BUG();
return NULL;
return -EINVAL;
}
/**
......@@ -200,13 +204,15 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring,
{
uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx];
struct amdgpu_fence *flushed_updates = vm_id->flushed_updates;
if (pd_addr != vm_id->pd_gpu_addr || !vm_id->flushed_updates ||
amdgpu_fence_is_earlier(vm_id->flushed_updates, updates)) {
if (pd_addr != vm_id->pd_gpu_addr || !flushed_updates ||
(updates && amdgpu_fence_is_earlier(flushed_updates, updates))) {
trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id);
amdgpu_fence_unref(&vm_id->flushed_updates);
vm_id->flushed_updates = amdgpu_fence_ref(updates);
vm_id->flushed_updates = amdgpu_fence_ref(
amdgpu_fence_later(flushed_updates, updates));
amdgpu_fence_unref(&flushed_updates);
vm_id->pd_gpu_addr = pd_addr;
amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr);
}
......@@ -300,6 +306,16 @@ static void amdgpu_vm_update_pages(struct amdgpu_device *adev,
}
}
static int amdgpu_vm_free_job(
struct amdgpu_cs_parser *sched_job)
{
int i;
for (i = 0; i < sched_job->num_ibs; i++)
amdgpu_ib_free(sched_job->adev, &sched_job->ibs[i]);
kfree(sched_job->ibs);
return 0;
}
/**
* amdgpu_vm_clear_bo - initially clear the page dir/table
*
......@@ -310,7 +326,8 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
struct amdgpu_bo *bo)
{
struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring;
struct amdgpu_ib ib;
struct fence *fence = NULL;
struct amdgpu_ib *ib;
unsigned entries;
uint64_t addr;
int r;
......@@ -330,24 +347,33 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
addr = amdgpu_bo_gpu_offset(bo);
entries = amdgpu_bo_size(bo) / 8;
r = amdgpu_ib_get(ring, NULL, entries * 2 + 64, &ib);
if (r)
ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
if (!ib)
goto error_unreserve;
ib.length_dw = 0;
amdgpu_vm_update_pages(adev, &ib, addr, 0, entries, 0, 0, 0);
amdgpu_vm_pad_ib(adev, &ib);
WARN_ON(ib.length_dw > 64);
r = amdgpu_ib_schedule(adev, 1, &ib, AMDGPU_FENCE_OWNER_VM);
r = amdgpu_ib_get(ring, NULL, entries * 2 + 64, ib);
if (r)
goto error_free;
amdgpu_bo_fence(bo, ib.fence, true);
ib->length_dw = 0;
amdgpu_vm_update_pages(adev, ib, addr, 0, entries, 0, 0, 0);
amdgpu_vm_pad_ib(adev, ib);
WARN_ON(ib->length_dw > 64);
r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
&amdgpu_vm_free_job,
AMDGPU_FENCE_OWNER_VM,
&fence);
if (!r)
amdgpu_bo_fence(bo, fence, true);
fence_put(fence);
if (amdgpu_enable_scheduler) {
amdgpu_bo_unreserve(bo);
return 0;
}
error_free:
amdgpu_ib_free(adev, &ib);
amdgpu_ib_free(adev, ib);
kfree(ib);
error_unreserve:
amdgpu_bo_unreserve(bo);
......@@ -400,7 +426,9 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
uint32_t incr = AMDGPU_VM_PTE_COUNT * 8;
uint64_t last_pde = ~0, last_pt = ~0;
unsigned count = 0, pt_idx, ndw;
struct amdgpu_ib ib;
struct amdgpu_ib *ib;
struct fence *fence = NULL;
int r;
/* padding, etc. */
......@@ -413,10 +441,14 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
if (ndw > 0xfffff)
return -ENOMEM;
r = amdgpu_ib_get(ring, NULL, ndw * 4, &ib);
ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
if (!ib)
return -ENOMEM;
r = amdgpu_ib_get(ring, NULL, ndw * 4, ib);
if (r)
return r;
ib.length_dw = 0;
ib->length_dw = 0;
/* walk over the address space and update the page directory */
for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) {
......@@ -436,7 +468,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
((last_pt + incr * count) != pt)) {
if (count) {
amdgpu_vm_update_pages(adev, &ib, last_pde,
amdgpu_vm_update_pages(adev, ib, last_pde,
last_pt, count, incr,
AMDGPU_PTE_VALID, 0);
}
......@@ -450,23 +482,37 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
}
if (count)
amdgpu_vm_update_pages(adev, &ib, last_pde, last_pt, count,
amdgpu_vm_update_pages(adev, ib, last_pde, last_pt, count,
incr, AMDGPU_PTE_VALID, 0);
if (ib.length_dw != 0) {
amdgpu_vm_pad_ib(adev, &ib);
amdgpu_sync_resv(adev, &ib.sync, pd->tbo.resv, AMDGPU_FENCE_OWNER_VM);
WARN_ON(ib.length_dw > ndw);
r = amdgpu_ib_schedule(adev, 1, &ib, AMDGPU_FENCE_OWNER_VM);
if (r) {
amdgpu_ib_free(adev, &ib);
return r;
}
amdgpu_bo_fence(pd, ib.fence, true);
if (ib->length_dw != 0) {
amdgpu_vm_pad_ib(adev, ib);
amdgpu_sync_resv(adev, &ib->sync, pd->tbo.resv, AMDGPU_FENCE_OWNER_VM);
WARN_ON(ib->length_dw > ndw);
r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
&amdgpu_vm_free_job,
AMDGPU_FENCE_OWNER_VM,
&fence);
if (r)
goto error_free;
amdgpu_bo_fence(pd, fence, true);
fence_put(vm->page_directory_fence);
vm->page_directory_fence = fence_get(fence);
fence_put(fence);
}
if (!amdgpu_enable_scheduler || ib->length_dw == 0) {
amdgpu_ib_free(adev, ib);
kfree(ib);
}
amdgpu_ib_free(adev, &ib);
return 0;
error_free:
amdgpu_ib_free(adev, ib);
kfree(ib);
return r;
}
/**
......@@ -640,7 +686,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_device *adev,
*/
static void amdgpu_vm_fence_pts(struct amdgpu_vm *vm,
uint64_t start, uint64_t end,
struct amdgpu_fence *fence)
struct fence *fence)
{
unsigned i;
......@@ -670,12 +716,13 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct amdgpu_bo_va_mapping *mapping,
uint64_t addr, uint32_t gtt_flags,
struct amdgpu_fence **fence)
struct fence **fence)
{
struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring;
unsigned nptes, ncmds, ndw;
uint32_t flags = gtt_flags;
struct amdgpu_ib ib;
struct amdgpu_ib *ib;
struct fence *f = NULL;
int r;
/* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
......@@ -722,46 +769,65 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
if (ndw > 0xfffff)
return -ENOMEM;
r = amdgpu_ib_get(ring, NULL, ndw * 4, &ib);
if (r)
ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
if (!ib)
return -ENOMEM;
r = amdgpu_ib_get(ring, NULL, ndw * 4, ib);
if (r) {
kfree(ib);
return r;
ib.length_dw = 0;
}
ib->length_dw = 0;
if (!(flags & AMDGPU_PTE_VALID)) {
unsigned i;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_fence *f = vm->ids[i].last_id_use;
amdgpu_sync_fence(&ib.sync, f);
r = amdgpu_sync_fence(adev, &ib->sync, &f->base);
if (r)
return r;
}
}
r = amdgpu_vm_update_ptes(adev, vm, &ib, mapping->it.start,
r = amdgpu_vm_update_ptes(adev, vm, ib, mapping->it.start,
mapping->it.last + 1, addr + mapping->offset,
flags, gtt_flags);
if (r) {
amdgpu_ib_free(adev, &ib);
amdgpu_ib_free(adev, ib);
kfree(ib);
return r;
}
amdgpu_vm_pad_ib(adev, &ib);
WARN_ON(ib.length_dw > ndw);
amdgpu_vm_pad_ib(adev, ib);
WARN_ON(ib->length_dw > ndw);
r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
&amdgpu_vm_free_job,
AMDGPU_FENCE_OWNER_VM,
&f);
if (r)
goto error_free;
r = amdgpu_ib_schedule(adev, 1, &ib, AMDGPU_FENCE_OWNER_VM);
if (r) {
amdgpu_ib_free(adev, &ib);
return r;
}
amdgpu_vm_fence_pts(vm, mapping->it.start,
mapping->it.last + 1, ib.fence);
mapping->it.last + 1, f);
if (fence) {
amdgpu_fence_unref(fence);
*fence = amdgpu_fence_ref(ib.fence);
fence_put(*fence);
*fence = fence_get(f);
}
fence_put(f);
if (!amdgpu_enable_scheduler) {
amdgpu_ib_free(adev, ib);
kfree(ib);
}
amdgpu_ib_free(adev, &ib);
return 0;
error_free:
amdgpu_ib_free(adev, ib);
kfree(ib);
return r;
}
/**
......@@ -794,21 +860,25 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
addr = 0;
}
if (addr == bo_va->addr)
return 0;
flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem);
list_for_each_entry(mapping, &bo_va->mappings, list) {
spin_lock(&vm->status_lock);
if (!list_empty(&bo_va->vm_status))
list_splice_init(&bo_va->valids, &bo_va->invalids);
spin_unlock(&vm->status_lock);
list_for_each_entry(mapping, &bo_va->invalids, list) {
r = amdgpu_vm_bo_update_mapping(adev, vm, mapping, addr,
flags, &bo_va->last_pt_update);
if (r)
return r;
}
bo_va->addr = addr;
spin_lock(&vm->status_lock);
list_splice_init(&bo_va->invalids, &bo_va->valids);
list_del_init(&bo_va->vm_status);
if (!mem)
list_add(&bo_va->vm_status, &vm->cleared);
spin_unlock(&vm->status_lock);
return 0;
......@@ -861,7 +931,7 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev,
struct amdgpu_vm *vm, struct amdgpu_sync *sync)
{
struct amdgpu_bo_va *bo_va = NULL;
int r;
int r = 0;
spin_lock(&vm->status_lock);
while (!list_empty(&vm->invalidated)) {
......@@ -878,8 +948,9 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev,
spin_unlock(&vm->status_lock);
if (bo_va)
amdgpu_sync_fence(sync, bo_va->last_pt_update);
return 0;
r = amdgpu_sync_fence(adev, sync, bo_va->last_pt_update);
return r;
}
/**
......@@ -907,10 +978,10 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
}
bo_va->vm = vm;
bo_va->bo = bo;
bo_va->addr = 0;
bo_va->ref_count = 1;
INIT_LIST_HEAD(&bo_va->bo_list);
INIT_LIST_HEAD(&bo_va->mappings);
INIT_LIST_HEAD(&bo_va->valids);
INIT_LIST_HEAD(&bo_va->invalids);
INIT_LIST_HEAD(&bo_va->vm_status);
mutex_lock(&vm->mutex);
......@@ -999,12 +1070,10 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
mapping->offset = offset;
mapping->flags = flags;
list_add(&mapping->list, &bo_va->mappings);
list_add(&mapping->list, &bo_va->invalids);
interval_tree_insert(&mapping->it, &vm->va);
trace_amdgpu_vm_bo_map(bo_va, mapping);
bo_va->addr = 0;
/* Make sure the page tables are allocated */
saddr >>= amdgpu_vm_block_size;
eaddr >>= amdgpu_vm_block_size;
......@@ -1085,17 +1154,27 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
{
struct amdgpu_bo_va_mapping *mapping;
struct amdgpu_vm *vm = bo_va->vm;
bool valid = true;
saddr /= AMDGPU_GPU_PAGE_SIZE;
list_for_each_entry(mapping, &bo_va->mappings, list) {
list_for_each_entry(mapping, &bo_va->valids, list) {
if (mapping->it.start == saddr)
break;
}
if (&mapping->list == &bo_va->mappings) {
amdgpu_bo_unreserve(bo_va->bo);
return -ENOENT;
if (&mapping->list == &bo_va->valids) {
valid = false;
list_for_each_entry(mapping, &bo_va->invalids, list) {
if (mapping->it.start == saddr)
break;
}
if (&mapping->list == &bo_va->invalids) {
amdgpu_bo_unreserve(bo_va->bo);
return -ENOENT;
}
}
mutex_lock(&vm->mutex);
......@@ -1103,12 +1182,10 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
interval_tree_remove(&mapping->it, &vm->va);
trace_amdgpu_vm_bo_unmap(bo_va, mapping);
if (bo_va->addr) {
/* clear the old address */
if (valid)
list_add(&mapping->list, &vm->freed);
} else {
else
kfree(mapping);
}
mutex_unlock(&vm->mutex);
amdgpu_bo_unreserve(bo_va->bo);
......@@ -1139,16 +1216,19 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
list_del(&bo_va->vm_status);
spin_unlock(&vm->status_lock);
list_for_each_entry_safe(mapping, next, &bo_va->mappings, list) {
list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
list_del(&mapping->list);
interval_tree_remove(&mapping->it, &vm->va);
trace_amdgpu_vm_bo_unmap(bo_va, mapping);
if (bo_va->addr)
list_add(&mapping->list, &vm->freed);
else
kfree(mapping);
list_add(&mapping->list, &vm->freed);
}
amdgpu_fence_unref(&bo_va->last_pt_update);
list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
list_del(&mapping->list);
interval_tree_remove(&mapping->it, &vm->va);
kfree(mapping);
}
fence_put(bo_va->last_pt_update);
kfree(bo_va);
mutex_unlock(&vm->mutex);
......@@ -1169,12 +1249,10 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va;
list_for_each_entry(bo_va, &bo->va, bo_list) {
if (bo_va->addr) {
spin_lock(&bo_va->vm->status_lock);
list_del(&bo_va->vm_status);
spin_lock(&bo_va->vm->status_lock);
if (list_empty(&bo_va->vm_status))
list_add(&bo_va->vm_status, &bo_va->vm->invalidated);
spin_unlock(&bo_va->vm->status_lock);
}
spin_unlock(&bo_va->vm->status_lock);
}
}
......@@ -1202,6 +1280,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
vm->va = RB_ROOT;
spin_lock_init(&vm->status_lock);
INIT_LIST_HEAD(&vm->invalidated);
INIT_LIST_HEAD(&vm->cleared);
INIT_LIST_HEAD(&vm->freed);
pd_size = amdgpu_vm_directory_size(adev);
......@@ -1215,6 +1294,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
return -ENOMEM;
}
vm->page_directory_fence = NULL;
r = amdgpu_bo_create(adev, pd_size, align, true,
AMDGPU_GEM_DOMAIN_VRAM, 0,
NULL, &vm->page_directory);
......@@ -1263,6 +1344,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
kfree(vm->page_tables);
amdgpu_bo_unref(&vm->page_directory);
fence_put(vm->page_directory_fence);
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
amdgpu_fence_unref(&vm->ids[i].flushed_updates);
......
......@@ -812,7 +812,7 @@ amdgpu_atombios_encoder_setup_dig_transmitter(struct drm_encoder *encoder, int a
else
args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_DIG1_ENCODER;
if ((adev->flags & AMDGPU_IS_APU) &&
if ((adev->flags & AMD_IS_APU) &&
(amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_UNIPHY)) {
if (is_dp ||
!amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock)) {
......
......@@ -838,7 +838,7 @@ static u32 cik_get_xclk(struct amdgpu_device *adev)
{
u32 reference_clock = adev->clock.spll.reference_freq;
if (adev->flags & AMDGPU_IS_APU) {
if (adev->flags & AMD_IS_APU) {
if (RREG32_SMC(ixGENERAL_PWRMGT) & GENERAL_PWRMGT__GPU_COUNTER_CLK_MASK)
return reference_clock / 2;
} else {
......@@ -1235,7 +1235,7 @@ static void cik_gpu_soft_reset(struct amdgpu_device *adev, u32 reset_mask)
if (reset_mask & AMDGPU_RESET_VMC)
srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_VMC_MASK;
if (!(adev->flags & AMDGPU_IS_APU)) {
if (!(adev->flags & AMD_IS_APU)) {
if (reset_mask & AMDGPU_RESET_MC)
srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_MC_MASK;
}
......@@ -1411,7 +1411,7 @@ static void cik_gpu_pci_config_reset(struct amdgpu_device *adev)
dev_warn(adev->dev, "Wait for MC idle timed out !\n");
}
if (adev->flags & AMDGPU_IS_APU)
if (adev->flags & AMD_IS_APU)
kv_save_regs_for_reset(adev, &kv_save);
/* disable BM */
......@@ -1429,7 +1429,7 @@ static void cik_gpu_pci_config_reset(struct amdgpu_device *adev)
}
/* does asic init need to be run first??? */
if (adev->flags & AMDGPU_IS_APU)
if (adev->flags & AMD_IS_APU)
kv_restore_regs_for_reset(adev, &kv_save);
}
......@@ -1570,7 +1570,7 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
if (amdgpu_pcie_gen2 == 0)
return;
if (adev->flags & AMDGPU_IS_APU)
if (adev->flags & AMD_IS_APU)
return;
ret = drm_pcie_get_speed_cap_mask(adev->ddev, &mask);
......@@ -1730,7 +1730,7 @@ static void cik_program_aspm(struct amdgpu_device *adev)
return;
/* XXX double check APUs */
if (adev->flags & AMDGPU_IS_APU)
if (adev->flags & AMD_IS_APU)
return;
orig = data = RREG32_PCIE(ixPCIE_LC_N_FTS_CNTL);
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册