提交 6f6e68b3 编写于 作者: D Dave Airlie

Merge branch 'drm-next-4.8' of git://people.freedesktop.org/~agd5f/linux into drm-next

This is the main 4.8 pull for radeon and amdgpu.  Sorry for the delay,
I meant to send this out last week, but I was moving house.  Lots of
changes here:
- ATPX improvements for better dGPU power control on PX systems
- New power features for CZ/BR/ST
- Pipelined BO moves and evictions in TTM
- GPU scheduler improvements
- GPU reset improvements
- Overclocking on dGPUs with amdgpu
- Lots of code cleanup
- Bug fixes

* 'drm-next-4.8' of git://people.freedesktop.org/~agd5f/linux: (191 commits)
  drm/amd/powerplay: don't add invalid voltage.
  drm/amdgpu: add read/write function for GC CAC programming
  drm/amd/powerplay: add definitions related to di/dt feature for fiji and polaris.
  drm/amd/powerplay: add shared definitions for di/dt feature.
  drm/amdgpu: remove gfx8 registers that vary between asics
  drm/amd/powerplay: add mvdd dpm support.
  drm/amdgpu: get number of shade engine by cgs interface.
  drm/amdgpu: remove more of the ring backup code
  drm/amd/powerplay:  Unify family defines
  drm/amdgpu: clean up ring_backup code, no need more
  drm/amdgpu: ib test first after gpu reset
  drm/amdgpu: recovery hw jobs when gpu reset V3
  drm/amdgpu: abstract amdgpu_vm_is_gpu_reset
  drm/amdgpu: add a bool to specify if needing vm flush V2
  drm/amdgpu: add amd_sched_job_recovery
  drm/amdgpu: force completion for gpu reset
  drm/amdgpu: block ttm first before parking scheduler
  drm/amd: add amd_sched_hw_job_reset
  drm/amd: add parent for sched fence
  drm/amdgpu: remove evict vram
  ...
...@@ -85,8 +85,12 @@ extern int amdgpu_vm_debug; ...@@ -85,8 +85,12 @@ extern int amdgpu_vm_debug;
extern int amdgpu_sched_jobs; extern int amdgpu_sched_jobs;
extern int amdgpu_sched_hw_submission; extern int amdgpu_sched_hw_submission;
extern int amdgpu_powerplay; extern int amdgpu_powerplay;
extern int amdgpu_powercontainment;
extern unsigned amdgpu_pcie_gen_cap; extern unsigned amdgpu_pcie_gen_cap;
extern unsigned amdgpu_pcie_lane_cap; extern unsigned amdgpu_pcie_lane_cap;
extern unsigned amdgpu_cg_mask;
extern unsigned amdgpu_pg_mask;
extern char *amdgpu_disable_cu;
#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
#define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */ #define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */
...@@ -183,6 +187,10 @@ int amdgpu_set_clockgating_state(struct amdgpu_device *adev, ...@@ -183,6 +187,10 @@ int amdgpu_set_clockgating_state(struct amdgpu_device *adev,
int amdgpu_set_powergating_state(struct amdgpu_device *adev, int amdgpu_set_powergating_state(struct amdgpu_device *adev,
enum amd_ip_block_type block_type, enum amd_ip_block_type block_type,
enum amd_powergating_state state); enum amd_powergating_state state);
int amdgpu_wait_for_idle(struct amdgpu_device *adev,
enum amd_ip_block_type block_type);
bool amdgpu_is_idle(struct amdgpu_device *adev,
enum amd_ip_block_type block_type);
struct amdgpu_ip_block_version { struct amdgpu_ip_block_version {
enum amd_ip_block_type type; enum amd_ip_block_type type;
...@@ -594,11 +602,9 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, ...@@ -594,11 +602,9 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
struct amdgpu_sync *sync, struct amdgpu_sync *sync,
struct reservation_object *resv, struct reservation_object *resv,
void *owner); void *owner);
bool amdgpu_sync_is_idle(struct amdgpu_sync *sync); struct fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
int amdgpu_sync_cycle_fences(struct amdgpu_sync *dst, struct amdgpu_sync *src, struct amdgpu_ring *ring);
struct fence *fence);
struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
int amdgpu_sync_wait(struct amdgpu_sync *sync);
void amdgpu_sync_free(struct amdgpu_sync *sync); void amdgpu_sync_free(struct amdgpu_sync *sync);
int amdgpu_sync_init(void); int amdgpu_sync_init(void);
void amdgpu_sync_fini(void); void amdgpu_sync_fini(void);
...@@ -754,12 +760,11 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, ...@@ -754,12 +760,11 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
struct amdgpu_job **job); struct amdgpu_job **job);
void amdgpu_job_free_resources(struct amdgpu_job *job);
void amdgpu_job_free(struct amdgpu_job *job); void amdgpu_job_free(struct amdgpu_job *job);
void amdgpu_job_free_func(struct kref *refcount);
int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
struct amd_sched_entity *entity, void *owner, struct amd_sched_entity *entity, void *owner,
struct fence **f); struct fence **f);
void amdgpu_job_timeout_func(struct work_struct *work);
struct amdgpu_ring { struct amdgpu_ring {
struct amdgpu_device *adev; struct amdgpu_device *adev;
...@@ -771,8 +776,6 @@ struct amdgpu_ring { ...@@ -771,8 +776,6 @@ struct amdgpu_ring {
struct amdgpu_bo *ring_obj; struct amdgpu_bo *ring_obj;
volatile uint32_t *ring; volatile uint32_t *ring;
unsigned rptr_offs; unsigned rptr_offs;
u64 next_rptr_gpu_addr;
volatile u32 *next_rptr_cpu_addr;
unsigned wptr; unsigned wptr;
unsigned wptr_old; unsigned wptr_old;
unsigned ring_size; unsigned ring_size;
...@@ -791,7 +794,6 @@ struct amdgpu_ring { ...@@ -791,7 +794,6 @@ struct amdgpu_ring {
u32 doorbell_index; u32 doorbell_index;
bool use_doorbell; bool use_doorbell;
unsigned wptr_offs; unsigned wptr_offs;
unsigned next_rptr_offs;
unsigned fence_offs; unsigned fence_offs;
uint64_t current_ctx; uint64_t current_ctx;
enum amdgpu_ring_type type; enum amdgpu_ring_type type;
...@@ -799,6 +801,9 @@ struct amdgpu_ring { ...@@ -799,6 +801,9 @@ struct amdgpu_ring {
unsigned cond_exe_offs; unsigned cond_exe_offs;
u64 cond_exe_gpu_addr; u64 cond_exe_gpu_addr;
volatile u32 *cond_exe_cpu_addr; volatile u32 *cond_exe_cpu_addr;
#if defined(CONFIG_DEBUG_FS)
struct dentry *ent;
#endif
}; };
/* /*
...@@ -861,6 +866,7 @@ struct amdgpu_vm { ...@@ -861,6 +866,7 @@ struct amdgpu_vm {
struct amdgpu_bo *page_directory; struct amdgpu_bo *page_directory;
unsigned max_pde_used; unsigned max_pde_used;
struct fence *page_directory_fence; struct fence *page_directory_fence;
uint64_t last_eviction_counter;
/* array of page tables, one for each page directory entry */ /* array of page tables, one for each page directory entry */
struct amdgpu_vm_pt *page_tables; struct amdgpu_vm_pt *page_tables;
...@@ -883,13 +889,14 @@ struct amdgpu_vm_id { ...@@ -883,13 +889,14 @@ struct amdgpu_vm_id {
struct fence *first; struct fence *first;
struct amdgpu_sync active; struct amdgpu_sync active;
struct fence *last_flush; struct fence *last_flush;
struct amdgpu_ring *last_user;
atomic64_t owner; atomic64_t owner;
uint64_t pd_gpu_addr; uint64_t pd_gpu_addr;
/* last flushed PD/PT update */ /* last flushed PD/PT update */
struct fence *flushed_updates; struct fence *flushed_updates;
uint32_t current_gpu_reset_count;
uint32_t gds_base; uint32_t gds_base;
uint32_t gds_size; uint32_t gds_size;
uint32_t gws_base; uint32_t gws_base;
...@@ -905,6 +912,10 @@ struct amdgpu_vm_manager { ...@@ -905,6 +912,10 @@ struct amdgpu_vm_manager {
struct list_head ids_lru; struct list_head ids_lru;
struct amdgpu_vm_id ids[AMDGPU_NUM_VM]; struct amdgpu_vm_id ids[AMDGPU_NUM_VM];
/* Handling of VM fences */
u64 fence_context;
unsigned seqno[AMDGPU_MAX_RINGS];
uint32_t max_pfn; uint32_t max_pfn;
/* vram base address for page table entry */ /* vram base address for page table entry */
u64 vram_base_offset; u64 vram_base_offset;
...@@ -926,17 +937,14 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); ...@@ -926,17 +937,14 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
struct list_head *validated, struct list_head *validated,
struct amdgpu_bo_list_entry *entry); struct amdgpu_bo_list_entry *entry);
void amdgpu_vm_get_pt_bos(struct amdgpu_vm *vm, struct list_head *duplicates); void amdgpu_vm_get_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct list_head *duplicates);
void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
struct amdgpu_vm *vm); struct amdgpu_vm *vm);
int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
struct amdgpu_sync *sync, struct fence *fence, struct amdgpu_sync *sync, struct fence *fence,
unsigned *vm_id, uint64_t *vm_pd_addr); struct amdgpu_job *job);
int amdgpu_vm_flush(struct amdgpu_ring *ring, int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job);
unsigned vm_id, uint64_t pd_addr,
uint32_t gds_base, uint32_t gds_size,
uint32_t gws_base, uint32_t gws_size,
uint32_t oa_base, uint32_t oa_size);
void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id); void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id);
uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr); uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
...@@ -1142,6 +1150,12 @@ struct amdgpu_cu_info { ...@@ -1142,6 +1150,12 @@ struct amdgpu_cu_info {
uint32_t bitmap[4][4]; uint32_t bitmap[4][4];
}; };
struct amdgpu_gfx_funcs {
/* get the gpu clock counter */
uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
};
struct amdgpu_gfx { struct amdgpu_gfx {
struct mutex gpu_clock_mutex; struct mutex gpu_clock_mutex;
struct amdgpu_gca_config config; struct amdgpu_gca_config config;
...@@ -1178,6 +1192,7 @@ struct amdgpu_gfx { ...@@ -1178,6 +1192,7 @@ struct amdgpu_gfx {
/* ce ram size*/ /* ce ram size*/
unsigned ce_ram_size; unsigned ce_ram_size;
struct amdgpu_cu_info cu_info; struct amdgpu_cu_info cu_info;
const struct amdgpu_gfx_funcs *funcs;
}; };
int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
...@@ -1195,10 +1210,6 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); ...@@ -1195,10 +1210,6 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
void amdgpu_ring_commit(struct amdgpu_ring *ring); void amdgpu_ring_commit(struct amdgpu_ring *ring);
void amdgpu_ring_undo(struct amdgpu_ring *ring); void amdgpu_ring_undo(struct amdgpu_ring *ring);
unsigned amdgpu_ring_backup(struct amdgpu_ring *ring,
uint32_t **data);
int amdgpu_ring_restore(struct amdgpu_ring *ring,
unsigned size, uint32_t *data);
int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
unsigned ring_size, u32 nop, u32 align_mask, unsigned ring_size, u32 nop, u32 align_mask,
struct amdgpu_irq_src *irq_src, unsigned irq_type, struct amdgpu_irq_src *irq_src, unsigned irq_type,
...@@ -1250,6 +1261,7 @@ struct amdgpu_job { ...@@ -1250,6 +1261,7 @@ struct amdgpu_job {
uint32_t num_ibs; uint32_t num_ibs;
void *owner; void *owner;
uint64_t ctx; uint64_t ctx;
bool vm_needs_flush;
unsigned vm_id; unsigned vm_id;
uint64_t vm_pd_addr; uint64_t vm_pd_addr;
uint32_t gds_base, gds_size; uint32_t gds_base, gds_size;
...@@ -1257,8 +1269,7 @@ struct amdgpu_job { ...@@ -1257,8 +1269,7 @@ struct amdgpu_job {
uint32_t oa_base, oa_size; uint32_t oa_base, oa_size;
/* user fence handling */ /* user fence handling */
struct amdgpu_bo *uf_bo; uint64_t uf_addr;
uint32_t uf_offset;
uint64_t uf_sequence; uint64_t uf_sequence;
}; };
...@@ -1560,6 +1571,12 @@ struct amdgpu_dpm_funcs { ...@@ -1560,6 +1571,12 @@ struct amdgpu_dpm_funcs {
u32 (*get_fan_control_mode)(struct amdgpu_device *adev); u32 (*get_fan_control_mode)(struct amdgpu_device *adev);
int (*set_fan_speed_percent)(struct amdgpu_device *adev, u32 speed); int (*set_fan_speed_percent)(struct amdgpu_device *adev, u32 speed);
int (*get_fan_speed_percent)(struct amdgpu_device *adev, u32 *speed); int (*get_fan_speed_percent)(struct amdgpu_device *adev, u32 *speed);
int (*force_clock_level)(struct amdgpu_device *adev, enum pp_clock_type type, uint32_t mask);
int (*print_clock_levels)(struct amdgpu_device *adev, enum pp_clock_type type, char *buf);
int (*get_sclk_od)(struct amdgpu_device *adev);
int (*set_sclk_od)(struct amdgpu_device *adev, uint32_t value);
int (*get_mclk_od)(struct amdgpu_device *adev);
int (*set_mclk_od)(struct amdgpu_device *adev, uint32_t value);
}; };
struct amdgpu_dpm { struct amdgpu_dpm {
...@@ -1767,6 +1784,8 @@ int amdgpu_debugfs_init(struct drm_minor *minor); ...@@ -1767,6 +1784,8 @@ int amdgpu_debugfs_init(struct drm_minor *minor);
void amdgpu_debugfs_cleanup(struct drm_minor *minor); void amdgpu_debugfs_cleanup(struct drm_minor *minor);
#endif #endif
int amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
/* /*
* amdgpu smumgr functions * amdgpu smumgr functions
*/ */
...@@ -1811,12 +1830,8 @@ struct amdgpu_asic_funcs { ...@@ -1811,12 +1830,8 @@ struct amdgpu_asic_funcs {
u32 sh_num, u32 reg_offset, u32 *value); u32 sh_num, u32 reg_offset, u32 *value);
void (*set_vga_state)(struct amdgpu_device *adev, bool state); void (*set_vga_state)(struct amdgpu_device *adev, bool state);
int (*reset)(struct amdgpu_device *adev); int (*reset)(struct amdgpu_device *adev);
/* wait for mc_idle */
int (*wait_for_mc_idle)(struct amdgpu_device *adev);
/* get the reference clock */ /* get the reference clock */
u32 (*get_xclk)(struct amdgpu_device *adev); u32 (*get_xclk)(struct amdgpu_device *adev);
/* get the gpu clock counter */
uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
/* MM block clocks */ /* MM block clocks */
int (*set_uvd_clocks)(struct amdgpu_device *adev, u32 vclk, u32 dclk); int (*set_uvd_clocks)(struct amdgpu_device *adev, u32 vclk, u32 dclk);
int (*set_vce_clocks)(struct amdgpu_device *adev, u32 evclk, u32 ecclk); int (*set_vce_clocks)(struct amdgpu_device *adev, u32 evclk, u32 ecclk);
...@@ -2003,6 +2018,10 @@ struct amdgpu_device { ...@@ -2003,6 +2018,10 @@ struct amdgpu_device {
spinlock_t didt_idx_lock; spinlock_t didt_idx_lock;
amdgpu_rreg_t didt_rreg; amdgpu_rreg_t didt_rreg;
amdgpu_wreg_t didt_wreg; amdgpu_wreg_t didt_wreg;
/* protects concurrent gc_cac register access */
spinlock_t gc_cac_idx_lock;
amdgpu_rreg_t gc_cac_rreg;
amdgpu_wreg_t gc_cac_wreg;
/* protects concurrent ENDPOINT (audio) register access */ /* protects concurrent ENDPOINT (audio) register access */
spinlock_t audio_endpt_idx_lock; spinlock_t audio_endpt_idx_lock;
amdgpu_block_rreg_t audio_endpt_rreg; amdgpu_block_rreg_t audio_endpt_rreg;
...@@ -2028,6 +2047,7 @@ struct amdgpu_device { ...@@ -2028,6 +2047,7 @@ struct amdgpu_device {
atomic64_t vram_vis_usage; atomic64_t vram_vis_usage;
atomic64_t gtt_usage; atomic64_t gtt_usage;
atomic64_t num_bytes_moved; atomic64_t num_bytes_moved;
atomic64_t num_evictions;
atomic_t gpu_reset_counter; atomic_t gpu_reset_counter;
/* display */ /* display */
...@@ -2131,6 +2151,8 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v); ...@@ -2131,6 +2151,8 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
#define WREG32_UVD_CTX(reg, v) adev->uvd_ctx_wreg(adev, (reg), (v)) #define WREG32_UVD_CTX(reg, v) adev->uvd_ctx_wreg(adev, (reg), (v))
#define RREG32_DIDT(reg) adev->didt_rreg(adev, (reg)) #define RREG32_DIDT(reg) adev->didt_rreg(adev, (reg))
#define WREG32_DIDT(reg, v) adev->didt_wreg(adev, (reg), (v)) #define WREG32_DIDT(reg, v) adev->didt_wreg(adev, (reg), (v))
#define RREG32_GC_CAC(reg) adev->gc_cac_rreg(adev, (reg))
#define WREG32_GC_CAC(reg, v) adev->gc_cac_wreg(adev, (reg), (v))
#define RREG32_AUDIO_ENDPT(block, reg) adev->audio_endpt_rreg(adev, (block), (reg)) #define RREG32_AUDIO_ENDPT(block, reg) adev->audio_endpt_rreg(adev, (block), (reg))
#define WREG32_AUDIO_ENDPT(block, reg, v) adev->audio_endpt_wreg(adev, (block), (reg), (v)) #define WREG32_AUDIO_ENDPT(block, reg, v) adev->audio_endpt_wreg(adev, (block), (reg), (v))
#define WREG32_P(reg, val, mask) \ #define WREG32_P(reg, val, mask) \
...@@ -2206,12 +2228,10 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) ...@@ -2206,12 +2228,10 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
*/ */
#define amdgpu_asic_set_vga_state(adev, state) (adev)->asic_funcs->set_vga_state((adev), (state)) #define amdgpu_asic_set_vga_state(adev, state) (adev)->asic_funcs->set_vga_state((adev), (state))
#define amdgpu_asic_reset(adev) (adev)->asic_funcs->reset((adev)) #define amdgpu_asic_reset(adev) (adev)->asic_funcs->reset((adev))
#define amdgpu_asic_wait_for_mc_idle(adev) (adev)->asic_funcs->wait_for_mc_idle((adev))
#define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev)) #define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev))
#define amdgpu_asic_set_uvd_clocks(adev, v, d) (adev)->asic_funcs->set_uvd_clocks((adev), (v), (d)) #define amdgpu_asic_set_uvd_clocks(adev, v, d) (adev)->asic_funcs->set_uvd_clocks((adev), (v), (d))
#define amdgpu_asic_set_vce_clocks(adev, ev, ec) (adev)->asic_funcs->set_vce_clocks((adev), (ev), (ec)) #define amdgpu_asic_set_vce_clocks(adev, ev, ec) (adev)->asic_funcs->set_vce_clocks((adev), (ev), (ec))
#define amdgpu_asic_get_virtual_caps(adev) ((adev)->asic_funcs->get_virtual_caps((adev))) #define amdgpu_asic_get_virtual_caps(adev) ((adev)->asic_funcs->get_virtual_caps((adev)))
#define amdgpu_asic_get_gpu_clock_counter(adev) (adev)->asic_funcs->get_gpu_clock_counter((adev))
#define amdgpu_asic_read_disabled_bios(adev) (adev)->asic_funcs->read_disabled_bios((adev)) #define amdgpu_asic_read_disabled_bios(adev) (adev)->asic_funcs->read_disabled_bios((adev))
#define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l)) #define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l))
#define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v))) #define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v)))
...@@ -2264,6 +2284,8 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) ...@@ -2264,6 +2284,8 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
#define amdgpu_dpm_print_power_state(adev, ps) (adev)->pm.funcs->print_power_state((adev), (ps)) #define amdgpu_dpm_print_power_state(adev, ps) (adev)->pm.funcs->print_power_state((adev), (ps))
#define amdgpu_dpm_vblank_too_short(adev) (adev)->pm.funcs->vblank_too_short((adev)) #define amdgpu_dpm_vblank_too_short(adev) (adev)->pm.funcs->vblank_too_short((adev))
#define amdgpu_dpm_enable_bapm(adev, e) (adev)->pm.funcs->enable_bapm((adev), (e)) #define amdgpu_dpm_enable_bapm(adev, e) (adev)->pm.funcs->enable_bapm((adev), (e))
#define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
#define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance))
#define amdgpu_dpm_get_temperature(adev) \ #define amdgpu_dpm_get_temperature(adev) \
((adev)->pp_enabled ? \ ((adev)->pp_enabled ? \
...@@ -2342,6 +2364,18 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) ...@@ -2342,6 +2364,18 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
#define amdgpu_dpm_force_clock_level(adev, type, level) \ #define amdgpu_dpm_force_clock_level(adev, type, level) \
(adev)->powerplay.pp_funcs->force_clock_level((adev)->powerplay.pp_handle, type, level) (adev)->powerplay.pp_funcs->force_clock_level((adev)->powerplay.pp_handle, type, level)
#define amdgpu_dpm_get_sclk_od(adev) \
(adev)->powerplay.pp_funcs->get_sclk_od((adev)->powerplay.pp_handle)
#define amdgpu_dpm_set_sclk_od(adev, value) \
(adev)->powerplay.pp_funcs->set_sclk_od((adev)->powerplay.pp_handle, value)
#define amdgpu_dpm_get_mclk_od(adev) \
((adev)->powerplay.pp_funcs->get_mclk_od((adev)->powerplay.pp_handle))
#define amdgpu_dpm_set_mclk_od(adev, value) \
((adev)->powerplay.pp_funcs->set_mclk_od((adev)->powerplay.pp_handle, value))
#define amdgpu_dpm_dispatch_task(adev, event_id, input, output) \ #define amdgpu_dpm_dispatch_task(adev, event_id, input, output) \
(adev)->powerplay.pp_funcs->dispatch_tasks((adev)->powerplay.pp_handle, (event_id), (input), (output)) (adev)->powerplay.pp_funcs->dispatch_tasks((adev)->powerplay.pp_handle, (event_id), (input), (output))
...@@ -2383,9 +2417,13 @@ bool amdgpu_device_is_px(struct drm_device *dev); ...@@ -2383,9 +2417,13 @@ bool amdgpu_device_is_px(struct drm_device *dev);
#if defined(CONFIG_VGA_SWITCHEROO) #if defined(CONFIG_VGA_SWITCHEROO)
void amdgpu_register_atpx_handler(void); void amdgpu_register_atpx_handler(void);
void amdgpu_unregister_atpx_handler(void); void amdgpu_unregister_atpx_handler(void);
bool amdgpu_has_atpx_dgpu_power_cntl(void);
bool amdgpu_is_atpx_hybrid(void);
#else #else
static inline void amdgpu_register_atpx_handler(void) {} static inline void amdgpu_register_atpx_handler(void) {}
static inline void amdgpu_unregister_atpx_handler(void) {} static inline void amdgpu_unregister_atpx_handler(void) {}
static inline bool amdgpu_has_atpx_dgpu_power_cntl(void) { return false; }
static inline bool amdgpu_is_atpx_hybrid(void) { return false; }
#endif #endif
/* /*
......
...@@ -240,8 +240,8 @@ uint64_t get_gpu_clock_counter(struct kgd_dev *kgd) ...@@ -240,8 +240,8 @@ uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
{ {
struct amdgpu_device *rdev = (struct amdgpu_device *)kgd; struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;
if (rdev->asic_funcs->get_gpu_clock_counter) if (rdev->gfx.funcs->get_gpu_clock_counter)
return rdev->asic_funcs->get_gpu_clock_counter(rdev); return rdev->gfx.funcs->get_gpu_clock_counter(rdev);
return 0; return 0;
} }
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/acpi.h> #include <linux/acpi.h>
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/delay.h>
#include "amd_acpi.h" #include "amd_acpi.h"
...@@ -27,6 +28,7 @@ struct amdgpu_atpx_functions { ...@@ -27,6 +28,7 @@ struct amdgpu_atpx_functions {
struct amdgpu_atpx { struct amdgpu_atpx {
acpi_handle handle; acpi_handle handle;
struct amdgpu_atpx_functions functions; struct amdgpu_atpx_functions functions;
bool is_hybrid;
}; };
static struct amdgpu_atpx_priv { static struct amdgpu_atpx_priv {
...@@ -63,6 +65,14 @@ bool amdgpu_has_atpx(void) { ...@@ -63,6 +65,14 @@ bool amdgpu_has_atpx(void) {
return amdgpu_atpx_priv.atpx_detected; return amdgpu_atpx_priv.atpx_detected;
} }
bool amdgpu_has_atpx_dgpu_power_cntl(void) {
return amdgpu_atpx_priv.atpx.functions.power_cntl;
}
bool amdgpu_is_atpx_hybrid(void) {
return amdgpu_atpx_priv.atpx.is_hybrid;
}
/** /**
* amdgpu_atpx_call - call an ATPX method * amdgpu_atpx_call - call an ATPX method
* *
...@@ -142,18 +152,12 @@ static void amdgpu_atpx_parse_functions(struct amdgpu_atpx_functions *f, u32 mas ...@@ -142,18 +152,12 @@ static void amdgpu_atpx_parse_functions(struct amdgpu_atpx_functions *f, u32 mas
*/ */
static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx) static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx)
{ {
/* make sure required functions are enabled */ u32 valid_bits = 0;
/* dGPU power control is required */
if (atpx->functions.power_cntl == false) {
printk("ATPX dGPU power cntl not present, forcing\n");
atpx->functions.power_cntl = true;
}
if (atpx->functions.px_params) { if (atpx->functions.px_params) {
union acpi_object *info; union acpi_object *info;
struct atpx_px_params output; struct atpx_px_params output;
size_t size; size_t size;
u32 valid_bits;
info = amdgpu_atpx_call(atpx->handle, ATPX_FUNCTION_GET_PX_PARAMETERS, NULL); info = amdgpu_atpx_call(atpx->handle, ATPX_FUNCTION_GET_PX_PARAMETERS, NULL);
if (!info) if (!info)
...@@ -172,6 +176,10 @@ static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx) ...@@ -172,6 +176,10 @@ static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx)
memcpy(&output, info->buffer.pointer, size); memcpy(&output, info->buffer.pointer, size);
valid_bits = output.flags & output.valid_flags; valid_bits = output.flags & output.valid_flags;
kfree(info);
}
/* if separate mux flag is set, mux controls are required */ /* if separate mux flag is set, mux controls are required */
if (valid_bits & ATPX_SEPARATE_MUX_FOR_I2C) { if (valid_bits & ATPX_SEPARATE_MUX_FOR_I2C) {
atpx->functions.i2c_mux_cntl = true; atpx->functions.i2c_mux_cntl = true;
...@@ -183,8 +191,28 @@ static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx) ...@@ -183,8 +191,28 @@ static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx)
ATPX_DFP_SIGNAL_MUXED)) ATPX_DFP_SIGNAL_MUXED))
atpx->functions.disp_mux_cntl = true; atpx->functions.disp_mux_cntl = true;
kfree(info);
/* some bioses set these bits rather than flagging power_cntl as supported */
if (valid_bits & (ATPX_DYNAMIC_PX_SUPPORTED |
ATPX_DYNAMIC_DGPU_POWER_OFF_SUPPORTED))
atpx->functions.power_cntl = true;
atpx->is_hybrid = false;
if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) {
printk("ATPX Hybrid Graphics\n");
#if 1
/* This is a temporary hack until the D3 cold support
* makes it upstream. The ATPX power_control method seems
* to still work on even if the system should be using
* the new standardized hybrid D3 cold ACPI interface.
*/
atpx->functions.power_cntl = true;
#else
atpx->functions.power_cntl = false;
#endif
atpx->is_hybrid = true;
} }
return 0; return 0;
} }
...@@ -259,6 +287,10 @@ static int amdgpu_atpx_set_discrete_state(struct amdgpu_atpx *atpx, u8 state) ...@@ -259,6 +287,10 @@ static int amdgpu_atpx_set_discrete_state(struct amdgpu_atpx *atpx, u8 state)
if (!info) if (!info)
return -EIO; return -EIO;
kfree(info); kfree(info);
/* 200ms delay is required after off */
if (state == 0)
msleep(200);
} }
return 0; return 0;
} }
......
...@@ -94,6 +94,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, ...@@ -94,6 +94,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
unsigned last_entry = 0, first_userptr = num_entries; unsigned last_entry = 0, first_userptr = num_entries;
unsigned i; unsigned i;
int r; int r;
unsigned long total_size = 0;
array = drm_malloc_ab(num_entries, sizeof(struct amdgpu_bo_list_entry)); array = drm_malloc_ab(num_entries, sizeof(struct amdgpu_bo_list_entry));
if (!array) if (!array)
...@@ -140,6 +141,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, ...@@ -140,6 +141,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_OA) if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_OA)
oa_obj = entry->robj; oa_obj = entry->robj;
total_size += amdgpu_bo_size(entry->robj);
trace_amdgpu_bo_list_set(list, entry->robj); trace_amdgpu_bo_list_set(list, entry->robj);
} }
...@@ -155,6 +157,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, ...@@ -155,6 +157,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
list->array = array; list->array = array;
list->num_entries = num_entries; list->num_entries = num_entries;
trace_amdgpu_cs_bo_status(list->num_entries, total_size);
return 0; return 0;
error_free: error_free:
......
...@@ -312,6 +312,8 @@ static uint32_t amdgpu_cgs_read_ind_register(struct cgs_device *cgs_device, ...@@ -312,6 +312,8 @@ static uint32_t amdgpu_cgs_read_ind_register(struct cgs_device *cgs_device,
return RREG32_UVD_CTX(index); return RREG32_UVD_CTX(index);
case CGS_IND_REG__DIDT: case CGS_IND_REG__DIDT:
return RREG32_DIDT(index); return RREG32_DIDT(index);
case CGS_IND_REG_GC_CAC:
return RREG32_GC_CAC(index);
case CGS_IND_REG__AUDIO_ENDPT: case CGS_IND_REG__AUDIO_ENDPT:
DRM_ERROR("audio endpt register access not implemented.\n"); DRM_ERROR("audio endpt register access not implemented.\n");
return 0; return 0;
...@@ -336,6 +338,8 @@ static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device, ...@@ -336,6 +338,8 @@ static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device,
return WREG32_UVD_CTX(index, value); return WREG32_UVD_CTX(index, value);
case CGS_IND_REG__DIDT: case CGS_IND_REG__DIDT:
return WREG32_DIDT(index, value); return WREG32_DIDT(index, value);
case CGS_IND_REG_GC_CAC:
return WREG32_GC_CAC(index, value);
case CGS_IND_REG__AUDIO_ENDPT: case CGS_IND_REG__AUDIO_ENDPT:
DRM_ERROR("audio endpt register access not implemented.\n"); DRM_ERROR("audio endpt register access not implemented.\n");
return; return;
...@@ -787,6 +791,7 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, ...@@ -787,6 +791,7 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
} }
hdr = (const struct smc_firmware_header_v1_0 *) adev->pm.fw->data; hdr = (const struct smc_firmware_header_v1_0 *) adev->pm.fw->data;
amdgpu_ucode_print_smc_hdr(&hdr->header);
adev->pm.fw_version = le32_to_cpu(hdr->header.ucode_version); adev->pm.fw_version = le32_to_cpu(hdr->header.ucode_version);
ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes); ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes);
ucode_start_address = le32_to_cpu(hdr->ucode_start_addr); ucode_start_address = le32_to_cpu(hdr->ucode_start_addr);
...@@ -830,6 +835,9 @@ static int amdgpu_cgs_query_system_info(struct cgs_device *cgs_device, ...@@ -830,6 +835,9 @@ static int amdgpu_cgs_query_system_info(struct cgs_device *cgs_device,
case CGS_SYSTEM_INFO_GFX_CU_INFO: case CGS_SYSTEM_INFO_GFX_CU_INFO:
sys_info->value = adev->gfx.cu_info.number; sys_info->value = adev->gfx.cu_info.number;
break; break;
case CGS_SYSTEM_INFO_GFX_SE_INFO:
sys_info->value = adev->gfx.config.max_shader_engines;
break;
default: default:
return -ENODEV; return -ENODEV;
} }
...@@ -972,11 +980,11 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device, ...@@ -972,11 +980,11 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
params->integer.value = argument->value; params->integer.value = argument->value;
break; break;
case ACPI_TYPE_STRING: case ACPI_TYPE_STRING:
params->string.length = argument->method_length; params->string.length = argument->data_length;
params->string.pointer = argument->pointer; params->string.pointer = argument->pointer;
break; break;
case ACPI_TYPE_BUFFER: case ACPI_TYPE_BUFFER:
params->buffer.length = argument->method_length; params->buffer.length = argument->data_length;
params->buffer.pointer = argument->pointer; params->buffer.pointer = argument->pointer;
break; break;
default: default:
...@@ -1079,17 +1087,14 @@ int amdgpu_cgs_call_acpi_method(struct cgs_device *cgs_device, ...@@ -1079,17 +1087,14 @@ int amdgpu_cgs_call_acpi_method(struct cgs_device *cgs_device,
struct cgs_acpi_method_info info = {0}; struct cgs_acpi_method_info info = {0};
acpi_input[0].type = CGS_ACPI_TYPE_INTEGER; acpi_input[0].type = CGS_ACPI_TYPE_INTEGER;
acpi_input[0].method_length = sizeof(uint32_t);
acpi_input[0].data_length = sizeof(uint32_t); acpi_input[0].data_length = sizeof(uint32_t);
acpi_input[0].value = acpi_function; acpi_input[0].value = acpi_function;
acpi_input[1].type = CGS_ACPI_TYPE_BUFFER; acpi_input[1].type = CGS_ACPI_TYPE_BUFFER;
acpi_input[1].method_length = CGS_ACPI_MAX_BUFFER_SIZE;
acpi_input[1].data_length = input_size; acpi_input[1].data_length = input_size;
acpi_input[1].pointer = pinput; acpi_input[1].pointer = pinput;
acpi_output.type = CGS_ACPI_TYPE_BUFFER; acpi_output.type = CGS_ACPI_TYPE_BUFFER;
acpi_output.method_length = CGS_ACPI_MAX_BUFFER_SIZE;
acpi_output.data_length = output_size; acpi_output.data_length = output_size;
acpi_output.pointer = poutput; acpi_output.pointer = poutput;
......
...@@ -1690,7 +1690,6 @@ amdgpu_connector_add(struct amdgpu_device *adev, ...@@ -1690,7 +1690,6 @@ amdgpu_connector_add(struct amdgpu_device *adev,
DRM_MODE_SCALE_NONE); DRM_MODE_SCALE_NONE);
/* no HPD on analog connectors */ /* no HPD on analog connectors */
amdgpu_connector->hpd.hpd = AMDGPU_HPD_NONE; amdgpu_connector->hpd.hpd = AMDGPU_HPD_NONE;
connector->polled = DRM_CONNECTOR_POLL_CONNECT;
connector->interlace_allowed = true; connector->interlace_allowed = true;
connector->doublescan_allowed = true; connector->doublescan_allowed = true;
break; break;
...@@ -1893,8 +1892,10 @@ amdgpu_connector_add(struct amdgpu_device *adev, ...@@ -1893,8 +1892,10 @@ amdgpu_connector_add(struct amdgpu_device *adev,
} }
if (amdgpu_connector->hpd.hpd == AMDGPU_HPD_NONE) { if (amdgpu_connector->hpd.hpd == AMDGPU_HPD_NONE) {
if (i2c_bus->valid) if (i2c_bus->valid) {
connector->polled = DRM_CONNECTOR_POLL_CONNECT; connector->polled = DRM_CONNECTOR_POLL_CONNECT |
DRM_CONNECTOR_POLL_DISCONNECT;
}
} else } else
connector->polled = DRM_CONNECTOR_POLL_HPD; connector->polled = DRM_CONNECTOR_POLL_HPD;
......
...@@ -216,11 +216,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) ...@@ -216,11 +216,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
if (ret) if (ret)
goto free_all_kdata; goto free_all_kdata;
if (p->uf_entry.robj) { if (p->uf_entry.robj)
p->job->uf_bo = amdgpu_bo_ref(p->uf_entry.robj); p->job->uf_addr = uf_offset;
p->job->uf_offset = uf_offset;
}
kfree(chunk_array); kfree(chunk_array);
return 0; return 0;
...@@ -459,7 +456,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, ...@@ -459,7 +456,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
list_splice(&need_pages, &p->validated); list_splice(&need_pages, &p->validated);
} }
amdgpu_vm_get_pt_bos(&fpriv->vm, &duplicates); amdgpu_vm_get_pt_bos(p->adev, &fpriv->vm, &duplicates);
p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev); p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev);
p->bytes_moved = 0; p->bytes_moved = 0;
...@@ -472,6 +469,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, ...@@ -472,6 +469,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
if (r) if (r)
goto error_validate; goto error_validate;
fpriv->vm.last_eviction_counter =
atomic64_read(&p->adev->num_evictions);
if (p->bo_list) { if (p->bo_list) {
struct amdgpu_bo *gds = p->bo_list->gds_obj; struct amdgpu_bo *gds = p->bo_list->gds_obj;
struct amdgpu_bo *gws = p->bo_list->gws_obj; struct amdgpu_bo *gws = p->bo_list->gws_obj;
...@@ -499,6 +499,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, ...@@ -499,6 +499,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
} }
} }
if (p->uf_entry.robj)
p->job->uf_addr += amdgpu_bo_gpu_offset(p->uf_entry.robj);
error_validate: error_validate:
if (r) { if (r) {
amdgpu_vm_move_pt_bos_in_lru(p->adev, &fpriv->vm); amdgpu_vm_move_pt_bos_in_lru(p->adev, &fpriv->vm);
...@@ -653,18 +656,21 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, ...@@ -653,18 +656,21 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
/* Only for UVD/VCE VM emulation */ /* Only for UVD/VCE VM emulation */
if (ring->funcs->parse_cs) { if (ring->funcs->parse_cs) {
p->job->vm = NULL;
for (i = 0; i < p->job->num_ibs; i++) { for (i = 0; i < p->job->num_ibs; i++) {
r = amdgpu_ring_parse_cs(ring, p, i); r = amdgpu_ring_parse_cs(ring, p, i);
if (r) if (r)
return r; return r;
} }
} } else {
p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
r = amdgpu_bo_vm_update_pte(p, vm); r = amdgpu_bo_vm_update_pte(p, vm);
if (!r) if (r)
amdgpu_cs_sync_rings(p);
return r; return r;
}
return amdgpu_cs_sync_rings(p);
} }
static int amdgpu_cs_handle_lockup(struct amdgpu_device *adev, int r) static int amdgpu_cs_handle_lockup(struct amdgpu_device *adev, int r)
...@@ -761,7 +767,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, ...@@ -761,7 +767,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
} }
/* UVD & VCE fw doesn't support user fences */ /* UVD & VCE fw doesn't support user fences */
if (parser->job->uf_bo && ( if (parser->job->uf_addr && (
parser->job->ring->type == AMDGPU_RING_TYPE_UVD || parser->job->ring->type == AMDGPU_RING_TYPE_UVD ||
parser->job->ring->type == AMDGPU_RING_TYPE_VCE)) parser->job->ring->type == AMDGPU_RING_TYPE_VCE))
return -EINVAL; return -EINVAL;
...@@ -830,17 +836,13 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, ...@@ -830,17 +836,13 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
{ {
struct amdgpu_ring *ring = p->job->ring; struct amdgpu_ring *ring = p->job->ring;
struct amd_sched_entity *entity = &p->ctx->rings[ring->idx].entity; struct amd_sched_entity *entity = &p->ctx->rings[ring->idx].entity;
struct fence *fence;
struct amdgpu_job *job; struct amdgpu_job *job;
int r; int r;
job = p->job; job = p->job;
p->job = NULL; p->job = NULL;
r = amd_sched_job_init(&job->base, &ring->sched, r = amd_sched_job_init(&job->base, &ring->sched, entity, p->filp);
entity, amdgpu_job_timeout_func,
amdgpu_job_free_func,
p->filp, &fence);
if (r) { if (r) {
amdgpu_job_free(job); amdgpu_job_free(job);
return r; return r;
...@@ -848,9 +850,10 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, ...@@ -848,9 +850,10 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
job->owner = p->filp; job->owner = p->filp;
job->ctx = entity->fence_context; job->ctx = entity->fence_context;
p->fence = fence_get(fence); p->fence = fence_get(&job->base.s_fence->finished);
cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, fence); cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);
job->uf_sequence = cs->out.handle; job->uf_sequence = cs->out.handle;
amdgpu_job_free_resources(job);
trace_amdgpu_cs_ioctl(job); trace_amdgpu_cs_ioctl(job);
amd_sched_entity_push_job(&job->base); amd_sched_entity_push_job(&job->base);
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
* Alex Deucher * Alex Deucher
* Jerome Glisse * Jerome Glisse
*/ */
#include <linux/kthread.h>
#include <linux/console.h> #include <linux/console.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/debugfs.h> #include <linux/debugfs.h>
...@@ -35,6 +36,7 @@ ...@@ -35,6 +36,7 @@
#include <linux/vga_switcheroo.h> #include <linux/vga_switcheroo.h>
#include <linux/efi.h> #include <linux/efi.h>
#include "amdgpu.h" #include "amdgpu.h"
#include "amdgpu_trace.h"
#include "amdgpu_i2c.h" #include "amdgpu_i2c.h"
#include "atom.h" #include "atom.h"
#include "amdgpu_atombios.h" #include "amdgpu_atombios.h"
...@@ -79,24 +81,27 @@ bool amdgpu_device_is_px(struct drm_device *dev) ...@@ -79,24 +81,27 @@ bool amdgpu_device_is_px(struct drm_device *dev)
uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
bool always_indirect) bool always_indirect)
{ {
uint32_t ret;
if ((reg * 4) < adev->rmmio_size && !always_indirect) if ((reg * 4) < adev->rmmio_size && !always_indirect)
return readl(((void __iomem *)adev->rmmio) + (reg * 4)); ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
else { else {
unsigned long flags; unsigned long flags;
uint32_t ret;
spin_lock_irqsave(&adev->mmio_idx_lock, flags); spin_lock_irqsave(&adev->mmio_idx_lock, flags);
writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4)); writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4)); ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
return ret;
} }
trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
return ret;
} }
void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
bool always_indirect) bool always_indirect)
{ {
trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
if ((reg * 4) < adev->rmmio_size && !always_indirect) if ((reg * 4) < adev->rmmio_size && !always_indirect)
writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
else { else {
...@@ -1070,11 +1075,14 @@ int amdgpu_set_clockgating_state(struct amdgpu_device *adev, ...@@ -1070,11 +1075,14 @@ int amdgpu_set_clockgating_state(struct amdgpu_device *adev,
int i, r = 0; int i, r = 0;
for (i = 0; i < adev->num_ip_blocks; i++) { for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_block_status[i].valid)
continue;
if (adev->ip_blocks[i].type == block_type) { if (adev->ip_blocks[i].type == block_type) {
r = adev->ip_blocks[i].funcs->set_clockgating_state((void *)adev, r = adev->ip_blocks[i].funcs->set_clockgating_state((void *)adev,
state); state);
if (r) if (r)
return r; return r;
break;
} }
} }
return r; return r;
...@@ -1087,16 +1095,53 @@ int amdgpu_set_powergating_state(struct amdgpu_device *adev, ...@@ -1087,16 +1095,53 @@ int amdgpu_set_powergating_state(struct amdgpu_device *adev,
int i, r = 0; int i, r = 0;
for (i = 0; i < adev->num_ip_blocks; i++) { for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_block_status[i].valid)
continue;
if (adev->ip_blocks[i].type == block_type) { if (adev->ip_blocks[i].type == block_type) {
r = adev->ip_blocks[i].funcs->set_powergating_state((void *)adev, r = adev->ip_blocks[i].funcs->set_powergating_state((void *)adev,
state); state);
if (r) if (r)
return r; return r;
break;
} }
} }
return r; return r;
} }
int amdgpu_wait_for_idle(struct amdgpu_device *adev,
enum amd_ip_block_type block_type)
{
int i, r;
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_block_status[i].valid)
continue;
if (adev->ip_blocks[i].type == block_type) {
r = adev->ip_blocks[i].funcs->wait_for_idle((void *)adev);
if (r)
return r;
break;
}
}
return 0;
}
bool amdgpu_is_idle(struct amdgpu_device *adev,
enum amd_ip_block_type block_type)
{
int i;
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_block_status[i].valid)
continue;
if (adev->ip_blocks[i].type == block_type)
return adev->ip_blocks[i].funcs->is_idle((void *)adev);
}
return true;
}
const struct amdgpu_ip_block_version * amdgpu_get_ip_block( const struct amdgpu_ip_block_version * amdgpu_get_ip_block(
struct amdgpu_device *adev, struct amdgpu_device *adev,
enum amd_ip_block_type type) enum amd_ip_block_type type)
...@@ -1209,6 +1254,9 @@ static int amdgpu_early_init(struct amdgpu_device *adev) ...@@ -1209,6 +1254,9 @@ static int amdgpu_early_init(struct amdgpu_device *adev)
} }
} }
adev->cg_flags &= amdgpu_cg_mask;
adev->pg_flags &= amdgpu_pg_mask;
return 0; return 0;
} }
...@@ -1440,9 +1488,12 @@ int amdgpu_device_init(struct amdgpu_device *adev, ...@@ -1440,9 +1488,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->uvd_ctx_wreg = &amdgpu_invalid_wreg; adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
adev->didt_rreg = &amdgpu_invalid_rreg; adev->didt_rreg = &amdgpu_invalid_rreg;
adev->didt_wreg = &amdgpu_invalid_wreg; adev->didt_wreg = &amdgpu_invalid_wreg;
adev->gc_cac_rreg = &amdgpu_invalid_rreg;
adev->gc_cac_wreg = &amdgpu_invalid_wreg;
adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg; adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg; adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n", DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device, amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision); pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
...@@ -1467,6 +1518,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, ...@@ -1467,6 +1518,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
spin_lock_init(&adev->pcie_idx_lock); spin_lock_init(&adev->pcie_idx_lock);
spin_lock_init(&adev->uvd_ctx_idx_lock); spin_lock_init(&adev->uvd_ctx_idx_lock);
spin_lock_init(&adev->didt_idx_lock); spin_lock_init(&adev->didt_idx_lock);
spin_lock_init(&adev->gc_cac_idx_lock);
spin_lock_init(&adev->audio_endpt_idx_lock); spin_lock_init(&adev->audio_endpt_idx_lock);
adev->rmmio_base = pci_resource_start(adev->pdev, 5); adev->rmmio_base = pci_resource_start(adev->pdev, 5);
...@@ -1511,17 +1563,20 @@ int amdgpu_device_init(struct amdgpu_device *adev, ...@@ -1511,17 +1563,20 @@ int amdgpu_device_init(struct amdgpu_device *adev,
vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain); vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
/* Read BIOS */ /* Read BIOS */
if (!amdgpu_get_bios(adev)) if (!amdgpu_get_bios(adev)) {
return -EINVAL; r = -EINVAL;
goto failed;
}
/* Must be an ATOMBIOS */ /* Must be an ATOMBIOS */
if (!adev->is_atom_bios) { if (!adev->is_atom_bios) {
dev_err(adev->dev, "Expecting atombios for GPU\n"); dev_err(adev->dev, "Expecting atombios for GPU\n");
return -EINVAL; r = -EINVAL;
goto failed;
} }
r = amdgpu_atombios_init(adev); r = amdgpu_atombios_init(adev);
if (r) { if (r) {
dev_err(adev->dev, "amdgpu_atombios_init failed\n"); dev_err(adev->dev, "amdgpu_atombios_init failed\n");
return r; goto failed;
} }
/* See if the asic supports SR-IOV */ /* See if the asic supports SR-IOV */
...@@ -1538,7 +1593,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, ...@@ -1538,7 +1593,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
!(adev->virtualization.caps & AMDGPU_VIRT_CAPS_SRIOV_EN))) { !(adev->virtualization.caps & AMDGPU_VIRT_CAPS_SRIOV_EN))) {
if (!adev->bios) { if (!adev->bios) {
dev_err(adev->dev, "Card not posted and no BIOS - ignoring\n"); dev_err(adev->dev, "Card not posted and no BIOS - ignoring\n");
return -EINVAL; r = -EINVAL;
goto failed;
} }
DRM_INFO("GPU not posted. posting now...\n"); DRM_INFO("GPU not posted. posting now...\n");
amdgpu_atom_asic_init(adev->mode_info.atom_context); amdgpu_atom_asic_init(adev->mode_info.atom_context);
...@@ -1548,7 +1604,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, ...@@ -1548,7 +1604,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
r = amdgpu_atombios_get_clock_info(adev); r = amdgpu_atombios_get_clock_info(adev);
if (r) { if (r) {
dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n"); dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
return r; goto failed;
} }
/* init i2c buses */ /* init i2c buses */
amdgpu_atombios_i2c_init(adev); amdgpu_atombios_i2c_init(adev);
...@@ -1557,7 +1613,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, ...@@ -1557,7 +1613,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
r = amdgpu_fence_driver_init(adev); r = amdgpu_fence_driver_init(adev);
if (r) { if (r) {
dev_err(adev->dev, "amdgpu_fence_driver_init failed\n"); dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
return r; goto failed;
} }
/* init the mode config */ /* init the mode config */
...@@ -1567,7 +1623,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, ...@@ -1567,7 +1623,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (r) { if (r) {
dev_err(adev->dev, "amdgpu_init failed\n"); dev_err(adev->dev, "amdgpu_init failed\n");
amdgpu_fini(adev); amdgpu_fini(adev);
return r; goto failed;
} }
adev->accel_working = true; adev->accel_working = true;
...@@ -1577,7 +1633,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, ...@@ -1577,7 +1633,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
r = amdgpu_ib_pool_init(adev); r = amdgpu_ib_pool_init(adev);
if (r) { if (r) {
dev_err(adev->dev, "IB initialization failed (%d).\n", r); dev_err(adev->dev, "IB initialization failed (%d).\n", r);
return r; goto failed;
} }
r = amdgpu_ib_ring_tests(adev); r = amdgpu_ib_ring_tests(adev);
...@@ -1594,6 +1650,12 @@ int amdgpu_device_init(struct amdgpu_device *adev, ...@@ -1594,6 +1650,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
DRM_ERROR("registering register debugfs failed (%d).\n", r); DRM_ERROR("registering register debugfs failed (%d).\n", r);
} }
r = amdgpu_debugfs_firmware_init(adev);
if (r) {
DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
return r;
}
if ((amdgpu_testing & 1)) { if ((amdgpu_testing & 1)) {
if (adev->accel_working) if (adev->accel_working)
amdgpu_test_moves(adev); amdgpu_test_moves(adev);
...@@ -1619,10 +1681,15 @@ int amdgpu_device_init(struct amdgpu_device *adev, ...@@ -1619,10 +1681,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,
r = amdgpu_late_init(adev); r = amdgpu_late_init(adev);
if (r) { if (r) {
dev_err(adev->dev, "amdgpu_late_init failed\n"); dev_err(adev->dev, "amdgpu_late_init failed\n");
return r; goto failed;
} }
return 0; return 0;
failed:
if (runtime)
vga_switcheroo_fini_domain_pm_ops(adev->dev);
return r;
} }
static void amdgpu_debugfs_remove_files(struct amdgpu_device *adev); static void amdgpu_debugfs_remove_files(struct amdgpu_device *adev);
...@@ -1656,6 +1723,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev) ...@@ -1656,6 +1723,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
kfree(adev->bios); kfree(adev->bios);
adev->bios = NULL; adev->bios = NULL;
vga_switcheroo_unregister_client(adev->pdev); vga_switcheroo_unregister_client(adev->pdev);
if (adev->flags & AMD_IS_PX)
vga_switcheroo_fini_domain_pm_ops(adev->dev);
vga_client_register(adev->pdev, NULL, NULL, NULL); vga_client_register(adev->pdev, NULL, NULL, NULL);
if (adev->rio_mem) if (adev->rio_mem)
pci_iounmap(adev->pdev, adev->rio_mem); pci_iounmap(adev->pdev, adev->rio_mem);
...@@ -1861,11 +1930,6 @@ int amdgpu_resume_kms(struct drm_device *dev, bool resume, bool fbcon) ...@@ -1861,11 +1930,6 @@ int amdgpu_resume_kms(struct drm_device *dev, bool resume, bool fbcon)
*/ */
int amdgpu_gpu_reset(struct amdgpu_device *adev) int amdgpu_gpu_reset(struct amdgpu_device *adev)
{ {
unsigned ring_sizes[AMDGPU_MAX_RINGS];
uint32_t *ring_data[AMDGPU_MAX_RINGS];
bool saved = false;
int i, r; int i, r;
int resched; int resched;
...@@ -1874,22 +1938,30 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) ...@@ -1874,22 +1938,30 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
/* block TTM */ /* block TTM */
resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
r = amdgpu_suspend(adev); /* block scheduler */
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i]; struct amdgpu_ring *ring = adev->rings[i];
if (!ring) if (!ring)
continue; continue;
kthread_park(ring->sched.thread);
ring_sizes[i] = amdgpu_ring_backup(ring, &ring_data[i]); amd_sched_hw_job_reset(&ring->sched);
if (ring_sizes[i]) {
saved = true;
dev_info(adev->dev, "Saved %d dwords of commands "
"on ring %d.\n", ring_sizes[i], i);
}
} }
/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
amdgpu_fence_driver_force_completion(adev);
/* save scratch */
amdgpu_atombios_scratch_regs_save(adev);
r = amdgpu_suspend(adev);
retry: retry:
/* Disable fb access */
if (adev->mode_info.num_crtc) {
struct amdgpu_mode_mc_save save;
amdgpu_display_stop_mc_access(adev, &save);
amdgpu_wait_for_idle(adev, AMD_IP_BLOCK_TYPE_GMC);
}
r = amdgpu_asic_reset(adev); r = amdgpu_asic_reset(adev);
/* post card */ /* post card */
amdgpu_atom_asic_init(adev->mode_info.atom_context); amdgpu_atom_asic_init(adev->mode_info.atom_context);
...@@ -1898,32 +1970,29 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) ...@@ -1898,32 +1970,29 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
dev_info(adev->dev, "GPU reset succeeded, trying to resume\n"); dev_info(adev->dev, "GPU reset succeeded, trying to resume\n");
r = amdgpu_resume(adev); r = amdgpu_resume(adev);
} }
/* restore scratch */
amdgpu_atombios_scratch_regs_restore(adev);
if (!r) { if (!r) {
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
if (!ring)
continue;
amdgpu_ring_restore(ring, ring_sizes[i], ring_data[i]);
ring_sizes[i] = 0;
ring_data[i] = NULL;
}
r = amdgpu_ib_ring_tests(adev); r = amdgpu_ib_ring_tests(adev);
if (r) { if (r) {
dev_err(adev->dev, "ib ring test failed (%d).\n", r); dev_err(adev->dev, "ib ring test failed (%d).\n", r);
if (saved) {
saved = false;
r = amdgpu_suspend(adev); r = amdgpu_suspend(adev);
goto retry; goto retry;
} }
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
if (!ring)
continue;
amd_sched_job_recovery(&ring->sched);
kthread_unpark(ring->sched.thread);
} }
} else { } else {
amdgpu_fence_driver_force_completion(adev); dev_err(adev->dev, "asic resume failed (%d).\n", r);
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
if (adev->rings[i]) if (adev->rings[i]) {
kfree(ring_data[i]); kthread_unpark(adev->rings[i]->sched.thread);
}
} }
} }
...@@ -1934,13 +2003,11 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) ...@@ -1934,13 +2003,11 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
/* bad news, how to tell it to userspace ? */ /* bad news, how to tell it to userspace ? */
dev_info(adev->dev, "GPU reset failed\n"); dev_info(adev->dev, "GPU reset failed\n");
} }
amdgpu_irq_gpu_reset_resume_helper(adev);
return r; return r;
} }
#define AMDGPU_DEFAULT_PCIE_GEN_MASK 0x30007 /* gen: chipset 1/2, asic 1/2/3 */
#define AMDGPU_DEFAULT_PCIE_MLW_MASK 0x2f0000 /* 1/2/4/8/16 lanes */
void amdgpu_get_pcie_info(struct amdgpu_device *adev) void amdgpu_get_pcie_info(struct amdgpu_device *adev)
{ {
u32 mask; u32 mask;
...@@ -2094,20 +2161,43 @@ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf, ...@@ -2094,20 +2161,43 @@ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
struct amdgpu_device *adev = f->f_inode->i_private; struct amdgpu_device *adev = f->f_inode->i_private;
ssize_t result = 0; ssize_t result = 0;
int r; int r;
bool use_bank;
unsigned instance_bank, sh_bank, se_bank;
if (size & 0x3 || *pos & 0x3) if (size & 0x3 || *pos & 0x3)
return -EINVAL; return -EINVAL;
if (*pos & (1ULL << 62)) {
se_bank = (*pos >> 24) & 0x3FF;
sh_bank = (*pos >> 34) & 0x3FF;
instance_bank = (*pos >> 44) & 0x3FF;
use_bank = 1;
*pos &= 0xFFFFFF;
} else {
use_bank = 0;
}
if (use_bank) {
if (sh_bank >= adev->gfx.config.max_sh_per_se ||
se_bank >= adev->gfx.config.max_shader_engines)
return -EINVAL;
mutex_lock(&adev->grbm_idx_mutex);
amdgpu_gfx_select_se_sh(adev, se_bank,
sh_bank, instance_bank);
}
while (size) { while (size) {
uint32_t value; uint32_t value;
if (*pos > adev->rmmio_size) if (*pos > adev->rmmio_size)
return result; goto end;
value = RREG32(*pos >> 2); value = RREG32(*pos >> 2);
r = put_user(value, (uint32_t *)buf); r = put_user(value, (uint32_t *)buf);
if (r) if (r) {
return r; result = r;
goto end;
}
result += 4; result += 4;
buf += 4; buf += 4;
...@@ -2115,6 +2205,12 @@ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf, ...@@ -2115,6 +2205,12 @@ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
size -= 4; size -= 4;
} }
end:
if (use_bank) {
amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
mutex_unlock(&adev->grbm_idx_mutex);
}
return result; return result;
} }
...@@ -2314,6 +2410,68 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user * ...@@ -2314,6 +2410,68 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
return result; return result;
} }
static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
struct amdgpu_device *adev = f->f_inode->i_private;
ssize_t result = 0;
int r;
uint32_t *config, no_regs = 0;
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
config = kmalloc(256 * sizeof(*config), GFP_KERNEL);
if (!config)
return -ENOMEM;
/* version, increment each time something is added */
config[no_regs++] = 0;
config[no_regs++] = adev->gfx.config.max_shader_engines;
config[no_regs++] = adev->gfx.config.max_tile_pipes;
config[no_regs++] = adev->gfx.config.max_cu_per_sh;
config[no_regs++] = adev->gfx.config.max_sh_per_se;
config[no_regs++] = adev->gfx.config.max_backends_per_se;
config[no_regs++] = adev->gfx.config.max_texture_channel_caches;
config[no_regs++] = adev->gfx.config.max_gprs;
config[no_regs++] = adev->gfx.config.max_gs_threads;
config[no_regs++] = adev->gfx.config.max_hw_contexts;
config[no_regs++] = adev->gfx.config.sc_prim_fifo_size_frontend;
config[no_regs++] = adev->gfx.config.sc_prim_fifo_size_backend;
config[no_regs++] = adev->gfx.config.sc_hiz_tile_fifo_size;
config[no_regs++] = adev->gfx.config.sc_earlyz_tile_fifo_size;
config[no_regs++] = adev->gfx.config.num_tile_pipes;
config[no_regs++] = adev->gfx.config.backend_enable_mask;
config[no_regs++] = adev->gfx.config.mem_max_burst_length_bytes;
config[no_regs++] = adev->gfx.config.mem_row_size_in_kb;
config[no_regs++] = adev->gfx.config.shader_engine_tile_size;
config[no_regs++] = adev->gfx.config.num_gpus;
config[no_regs++] = adev->gfx.config.multi_gpu_tile_size;
config[no_regs++] = adev->gfx.config.mc_arb_ramcfg;
config[no_regs++] = adev->gfx.config.gb_addr_config;
config[no_regs++] = adev->gfx.config.num_rbs;
while (size && (*pos < no_regs * 4)) {
uint32_t value;
value = config[*pos >> 2];
r = put_user(value, (uint32_t *)buf);
if (r) {
kfree(config);
return r;
}
result += 4;
buf += 4;
*pos += 4;
size -= 4;
}
kfree(config);
return result;
}
static const struct file_operations amdgpu_debugfs_regs_fops = { static const struct file_operations amdgpu_debugfs_regs_fops = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.read = amdgpu_debugfs_regs_read, .read = amdgpu_debugfs_regs_read,
...@@ -2339,11 +2497,18 @@ static const struct file_operations amdgpu_debugfs_regs_smc_fops = { ...@@ -2339,11 +2497,18 @@ static const struct file_operations amdgpu_debugfs_regs_smc_fops = {
.llseek = default_llseek .llseek = default_llseek
}; };
static const struct file_operations amdgpu_debugfs_gca_config_fops = {
.owner = THIS_MODULE,
.read = amdgpu_debugfs_gca_config_read,
.llseek = default_llseek
};
static const struct file_operations *debugfs_regs[] = { static const struct file_operations *debugfs_regs[] = {
&amdgpu_debugfs_regs_fops, &amdgpu_debugfs_regs_fops,
&amdgpu_debugfs_regs_didt_fops, &amdgpu_debugfs_regs_didt_fops,
&amdgpu_debugfs_regs_pcie_fops, &amdgpu_debugfs_regs_pcie_fops,
&amdgpu_debugfs_regs_smc_fops, &amdgpu_debugfs_regs_smc_fops,
&amdgpu_debugfs_gca_config_fops,
}; };
static const char *debugfs_regs_names[] = { static const char *debugfs_regs_names[] = {
...@@ -2351,6 +2516,7 @@ static const char *debugfs_regs_names[] = { ...@@ -2351,6 +2516,7 @@ static const char *debugfs_regs_names[] = {
"amdgpu_regs_didt", "amdgpu_regs_didt",
"amdgpu_regs_pcie", "amdgpu_regs_pcie",
"amdgpu_regs_smc", "amdgpu_regs_smc",
"amdgpu_gca_config",
}; };
static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
......
...@@ -220,19 +220,17 @@ int amdgpu_crtc_page_flip(struct drm_crtc *crtc, ...@@ -220,19 +220,17 @@ int amdgpu_crtc_page_flip(struct drm_crtc *crtc,
r = amdgpu_bo_pin_restricted(new_rbo, AMDGPU_GEM_DOMAIN_VRAM, 0, 0, &base); r = amdgpu_bo_pin_restricted(new_rbo, AMDGPU_GEM_DOMAIN_VRAM, 0, 0, &base);
if (unlikely(r != 0)) { if (unlikely(r != 0)) {
amdgpu_bo_unreserve(new_rbo);
r = -EINVAL; r = -EINVAL;
DRM_ERROR("failed to pin new rbo buffer before flip\n"); DRM_ERROR("failed to pin new rbo buffer before flip\n");
goto cleanup; goto unreserve;
} }
r = reservation_object_get_fences_rcu(new_rbo->tbo.resv, &work->excl, r = reservation_object_get_fences_rcu(new_rbo->tbo.resv, &work->excl,
&work->shared_count, &work->shared_count,
&work->shared); &work->shared);
if (unlikely(r != 0)) { if (unlikely(r != 0)) {
amdgpu_bo_unreserve(new_rbo);
DRM_ERROR("failed to get fences for buffer\n"); DRM_ERROR("failed to get fences for buffer\n");
goto cleanup; goto unpin;
} }
amdgpu_bo_get_tiling_flags(new_rbo, &tiling_flags); amdgpu_bo_get_tiling_flags(new_rbo, &tiling_flags);
...@@ -275,9 +273,11 @@ int amdgpu_crtc_page_flip(struct drm_crtc *crtc, ...@@ -275,9 +273,11 @@ int amdgpu_crtc_page_flip(struct drm_crtc *crtc,
DRM_ERROR("failed to reserve new rbo in error path\n"); DRM_ERROR("failed to reserve new rbo in error path\n");
goto cleanup; goto cleanup;
} }
unpin:
if (unlikely(amdgpu_bo_unpin(new_rbo) != 0)) { if (unlikely(amdgpu_bo_unpin(new_rbo) != 0)) {
DRM_ERROR("failed to unpin new rbo in error path\n"); DRM_ERROR("failed to unpin new rbo in error path\n");
} }
unreserve:
amdgpu_bo_unreserve(new_rbo); amdgpu_bo_unreserve(new_rbo);
cleanup: cleanup:
......
...@@ -82,8 +82,12 @@ int amdgpu_exp_hw_support = 0; ...@@ -82,8 +82,12 @@ int amdgpu_exp_hw_support = 0;
int amdgpu_sched_jobs = 32; int amdgpu_sched_jobs = 32;
int amdgpu_sched_hw_submission = 2; int amdgpu_sched_hw_submission = 2;
int amdgpu_powerplay = -1; int amdgpu_powerplay = -1;
int amdgpu_powercontainment = 1;
unsigned amdgpu_pcie_gen_cap = 0; unsigned amdgpu_pcie_gen_cap = 0;
unsigned amdgpu_pcie_lane_cap = 0; unsigned amdgpu_pcie_lane_cap = 0;
unsigned amdgpu_cg_mask = 0xffffffff;
unsigned amdgpu_pg_mask = 0xffffffff;
char *amdgpu_disable_cu = NULL;
MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
...@@ -160,6 +164,9 @@ module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444); ...@@ -160,6 +164,9 @@ module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444);
#ifdef CONFIG_DRM_AMD_POWERPLAY #ifdef CONFIG_DRM_AMD_POWERPLAY
MODULE_PARM_DESC(powerplay, "Powerplay component (1 = enable, 0 = disable, -1 = auto (default))"); MODULE_PARM_DESC(powerplay, "Powerplay component (1 = enable, 0 = disable, -1 = auto (default))");
module_param_named(powerplay, amdgpu_powerplay, int, 0444); module_param_named(powerplay, amdgpu_powerplay, int, 0444);
MODULE_PARM_DESC(powercontainment, "Power Containment (1 = enable (default), 0 = disable)");
module_param_named(powercontainment, amdgpu_powercontainment, int, 0444);
#endif #endif
MODULE_PARM_DESC(pcie_gen_cap, "PCIE Gen Caps (0: autodetect (default))"); MODULE_PARM_DESC(pcie_gen_cap, "PCIE Gen Caps (0: autodetect (default))");
...@@ -168,6 +175,15 @@ module_param_named(pcie_gen_cap, amdgpu_pcie_gen_cap, uint, 0444); ...@@ -168,6 +175,15 @@ module_param_named(pcie_gen_cap, amdgpu_pcie_gen_cap, uint, 0444);
MODULE_PARM_DESC(pcie_lane_cap, "PCIE Lane Caps (0: autodetect (default))"); MODULE_PARM_DESC(pcie_lane_cap, "PCIE Lane Caps (0: autodetect (default))");
module_param_named(pcie_lane_cap, amdgpu_pcie_lane_cap, uint, 0444); module_param_named(pcie_lane_cap, amdgpu_pcie_lane_cap, uint, 0444);
MODULE_PARM_DESC(cg_mask, "Clockgating flags mask (0 = disable clock gating)");
module_param_named(cg_mask, amdgpu_cg_mask, uint, 0444);
MODULE_PARM_DESC(pg_mask, "Powergating flags mask (0 = disable power gating)");
module_param_named(pg_mask, amdgpu_pg_mask, uint, 0444);
MODULE_PARM_DESC(disable_cu, "Disable CUs (se.sh.cu,...)");
module_param_named(disable_cu, amdgpu_disable_cu, charp, 0444);
static const struct pci_device_id pciidlist[] = { static const struct pci_device_id pciidlist[] = {
#ifdef CONFIG_DRM_AMDGPU_CIK #ifdef CONFIG_DRM_AMDGPU_CIK
/* Kaveri */ /* Kaveri */
...@@ -413,7 +429,10 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) ...@@ -413,7 +429,10 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
pci_save_state(pdev); pci_save_state(pdev);
pci_disable_device(pdev); pci_disable_device(pdev);
pci_ignore_hotplug(pdev); pci_ignore_hotplug(pdev);
if (amdgpu_is_atpx_hybrid())
pci_set_power_state(pdev, PCI_D3cold); pci_set_power_state(pdev, PCI_D3cold);
else if (!amdgpu_has_atpx_dgpu_power_cntl())
pci_set_power_state(pdev, PCI_D3hot);
drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF; drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF;
return 0; return 0;
...@@ -430,6 +449,8 @@ static int amdgpu_pmops_runtime_resume(struct device *dev) ...@@ -430,6 +449,8 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
if (amdgpu_is_atpx_hybrid() ||
!amdgpu_has_atpx_dgpu_power_cntl())
pci_set_power_state(pdev, PCI_D0); pci_set_power_state(pdev, PCI_D0);
pci_restore_state(pdev); pci_restore_state(pdev);
ret = pci_enable_device(pdev); ret = pci_enable_device(pdev);
...@@ -515,7 +536,7 @@ static struct drm_driver kms_driver = { ...@@ -515,7 +536,7 @@ static struct drm_driver kms_driver = {
.driver_features = .driver_features =
DRIVER_USE_AGP | DRIVER_USE_AGP |
DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM |
DRIVER_PRIME | DRIVER_RENDER, DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET,
.dev_priv_size = 0, .dev_priv_size = 0,
.load = amdgpu_driver_load_kms, .load = amdgpu_driver_load_kms,
.open = amdgpu_driver_open_kms, .open = amdgpu_driver_open_kms,
...@@ -590,7 +611,6 @@ static int __init amdgpu_init(void) ...@@ -590,7 +611,6 @@ static int __init amdgpu_init(void)
DRM_INFO("amdgpu kernel modesetting enabled.\n"); DRM_INFO("amdgpu kernel modesetting enabled.\n");
driver = &kms_driver; driver = &kms_driver;
pdriver = &amdgpu_kms_pci_driver; pdriver = &amdgpu_kms_pci_driver;
driver->driver_features |= DRIVER_MODESET;
driver->num_ioctls = amdgpu_max_kms_ioctl; driver->num_ioctls = amdgpu_max_kms_ioctl;
amdgpu_register_atpx_handler(); amdgpu_register_atpx_handler();
/* let modprobe override vga console setting */ /* let modprobe override vga console setting */
......
...@@ -503,7 +503,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, ...@@ -503,7 +503,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
if (r) if (r)
goto error_print; goto error_print;
amdgpu_vm_get_pt_bos(bo_va->vm, &duplicates); amdgpu_vm_get_pt_bos(adev, bo_va->vm, &duplicates);
list_for_each_entry(entry, &list, head) { list_for_each_entry(entry, &list, head) {
domain = amdgpu_mem_type_to_domain(entry->bo->mem.mem_type); domain = amdgpu_mem_type_to_domain(entry->bo->mem.mem_type);
/* if anything is swapped out don't swap it in here, /* if anything is swapped out don't swap it in here,
......
...@@ -70,3 +70,47 @@ void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg) ...@@ -70,3 +70,47 @@ void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg)
} }
} }
} }
/**
* amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter
*
* @mask: array in which the per-shader array disable masks will be stored
* @max_se: number of SEs
* @max_sh: number of SHs
*
* The bitmask of CUs to be disabled in the shader array determined by se and
* sh is stored in mask[se * max_sh + sh].
*/
void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh)
{
unsigned se, sh, cu;
const char *p;
memset(mask, 0, sizeof(*mask) * max_se * max_sh);
if (!amdgpu_disable_cu || !*amdgpu_disable_cu)
return;
p = amdgpu_disable_cu;
for (;;) {
char *next;
int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu);
if (ret < 3) {
DRM_ERROR("amdgpu: could not parse disable_cu\n");
return;
}
if (se < max_se && sh < max_sh && cu < 16) {
DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu);
mask[se * max_sh + sh] |= 1u << cu;
} else {
DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n",
se, sh, cu);
}
next = strchr(p, ',');
if (!next)
break;
p = next + 1;
}
}
...@@ -27,4 +27,6 @@ ...@@ -27,4 +27,6 @@
int amdgpu_gfx_scratch_get(struct amdgpu_device *adev, uint32_t *reg); int amdgpu_gfx_scratch_get(struct amdgpu_device *adev, uint32_t *reg);
void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg); void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg);
unsigned amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh);
#endif #endif
...@@ -160,10 +160,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, ...@@ -160,10 +160,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
patch_offset = amdgpu_ring_init_cond_exec(ring); patch_offset = amdgpu_ring_init_cond_exec(ring);
if (vm) { if (vm) {
r = amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr, r = amdgpu_vm_flush(ring, job);
job->gds_base, job->gds_size,
job->gws_base, job->gws_size,
job->oa_base, job->oa_size);
if (r) { if (r) {
amdgpu_ring_undo(ring); amdgpu_ring_undo(ring);
return r; return r;
...@@ -203,11 +200,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, ...@@ -203,11 +200,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
} }
/* wrap the last IB with fence */ /* wrap the last IB with fence */
if (job && job->uf_bo) { if (job && job->uf_addr) {
uint64_t addr = amdgpu_bo_gpu_offset(job->uf_bo); amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence,
addr += job->uf_offset;
amdgpu_ring_emit_fence(ring, addr, job->uf_sequence,
AMDGPU_FENCE_FLAG_64BIT); AMDGPU_FENCE_FLAG_64BIT);
} }
......
...@@ -383,6 +383,18 @@ int amdgpu_irq_update(struct amdgpu_device *adev, ...@@ -383,6 +383,18 @@ int amdgpu_irq_update(struct amdgpu_device *adev,
return r; return r;
} }
void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev)
{
int i, j;
for (i = 0; i < AMDGPU_MAX_IRQ_SRC_ID; i++) {
struct amdgpu_irq_src *src = adev->irq.sources[i];
if (!src)
continue;
for (j = 0; j < src->num_types; j++)
amdgpu_irq_update(adev, src, j);
}
}
/** /**
* amdgpu_irq_get - enable interrupt * amdgpu_irq_get - enable interrupt
* *
......
...@@ -94,6 +94,7 @@ int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src, ...@@ -94,6 +94,7 @@ int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
unsigned type); unsigned type);
bool amdgpu_irq_enabled(struct amdgpu_device *adev, struct amdgpu_irq_src *src, bool amdgpu_irq_enabled(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
unsigned type); unsigned type);
void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev);
int amdgpu_irq_add_domain(struct amdgpu_device *adev); int amdgpu_irq_add_domain(struct amdgpu_device *adev);
void amdgpu_irq_remove_domain(struct amdgpu_device *adev); void amdgpu_irq_remove_domain(struct amdgpu_device *adev);
......
...@@ -28,21 +28,15 @@ ...@@ -28,21 +28,15 @@
#include "amdgpu.h" #include "amdgpu.h"
#include "amdgpu_trace.h" #include "amdgpu_trace.h"
static void amdgpu_job_free_handler(struct work_struct *ws) static void amdgpu_job_timedout(struct amd_sched_job *s_job)
{ {
struct amdgpu_job *job = container_of(ws, struct amdgpu_job, base.work_free_job); struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base);
amd_sched_job_put(&job->base);
}
void amdgpu_job_timeout_func(struct work_struct *work)
{
struct amdgpu_job *job = container_of(work, struct amdgpu_job, base.work_tdr.work);
DRM_ERROR("ring %s timeout, last signaled seq=%u, last emitted seq=%u\n", DRM_ERROR("ring %s timeout, last signaled seq=%u, last emitted seq=%u\n",
job->base.sched->name, job->base.sched->name,
(uint32_t)atomic_read(&job->ring->fence_drv.last_seq), atomic_read(&job->ring->fence_drv.last_seq),
job->ring->fence_drv.sync_seq); job->ring->fence_drv.sync_seq);
amdgpu_gpu_reset(job->adev);
amd_sched_job_put(&job->base);
} }
int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
...@@ -63,7 +57,6 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, ...@@ -63,7 +57,6 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
(*job)->vm = vm; (*job)->vm = vm;
(*job)->ibs = (void *)&(*job)[1]; (*job)->ibs = (void *)&(*job)[1];
(*job)->num_ibs = num_ibs; (*job)->num_ibs = num_ibs;
INIT_WORK(&(*job)->base.work_free_job, amdgpu_job_free_handler);
amdgpu_sync_create(&(*job)->sync); amdgpu_sync_create(&(*job)->sync);
...@@ -86,27 +79,33 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, ...@@ -86,27 +79,33 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
return r; return r;
} }
void amdgpu_job_free(struct amdgpu_job *job) void amdgpu_job_free_resources(struct amdgpu_job *job)
{ {
unsigned i;
struct fence *f; struct fence *f;
unsigned i;
/* use sched fence if available */ /* use sched fence if available */
f = (job->base.s_fence)? &job->base.s_fence->base : job->fence; f = job->base.s_fence ? &job->base.s_fence->finished : job->fence;
for (i = 0; i < job->num_ibs; ++i) for (i = 0; i < job->num_ibs; ++i)
amdgpu_sa_bo_free(job->adev, &job->ibs[i].sa_bo, f); amdgpu_ib_free(job->adev, &job->ibs[i], f);
fence_put(job->fence); }
amdgpu_bo_unref(&job->uf_bo); void amdgpu_job_free_cb(struct amd_sched_job *s_job)
amdgpu_sync_free(&job->sync); {
struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base);
if (!job->base.use_sched) fence_put(job->fence);
amdgpu_sync_free(&job->sync);
kfree(job); kfree(job);
} }
void amdgpu_job_free_func(struct kref *refcount) void amdgpu_job_free(struct amdgpu_job *job)
{ {
struct amdgpu_job *job = container_of(refcount, struct amdgpu_job, base.refcount); amdgpu_job_free_resources(job);
fence_put(job->fence);
amdgpu_sync_free(&job->sync);
kfree(job); kfree(job);
} }
...@@ -114,22 +113,20 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, ...@@ -114,22 +113,20 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
struct amd_sched_entity *entity, void *owner, struct amd_sched_entity *entity, void *owner,
struct fence **f) struct fence **f)
{ {
struct fence *fence;
int r; int r;
job->ring = ring; job->ring = ring;
if (!f) if (!f)
return -EINVAL; return -EINVAL;
r = amd_sched_job_init(&job->base, &ring->sched, r = amd_sched_job_init(&job->base, &ring->sched, entity, owner);
entity, amdgpu_job_timeout_func,
amdgpu_job_free_func, owner, &fence);
if (r) if (r)
return r; return r;
job->owner = owner; job->owner = owner;
job->ctx = entity->fence_context; job->ctx = entity->fence_context;
*f = fence_get(fence); *f = fence_get(&job->base.s_fence->finished);
amdgpu_job_free_resources(job);
amd_sched_entity_push_job(&job->base); amd_sched_entity_push_job(&job->base);
return 0; return 0;
...@@ -147,8 +144,8 @@ static struct fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) ...@@ -147,8 +144,8 @@ static struct fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
int r; int r;
r = amdgpu_vm_grab_id(vm, ring, &job->sync, r = amdgpu_vm_grab_id(vm, ring, &job->sync,
&job->base.s_fence->base, &job->base.s_fence->finished,
&job->vm_id, &job->vm_pd_addr); job);
if (r) if (r)
DRM_ERROR("Error getting VM ID (%d)\n", r); DRM_ERROR("Error getting VM ID (%d)\n", r);
...@@ -170,11 +167,7 @@ static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job) ...@@ -170,11 +167,7 @@ static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job)
} }
job = to_amdgpu_job(sched_job); job = to_amdgpu_job(sched_job);
r = amdgpu_sync_wait(&job->sync); BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL));
if (r) {
DRM_ERROR("failed to sync wait (%d)\n", r);
return NULL;
}
trace_amdgpu_sched_run_job(job); trace_amdgpu_sched_run_job(job);
r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs,
...@@ -185,14 +178,15 @@ static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job) ...@@ -185,14 +178,15 @@ static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job)
} }
err: err:
/* if gpu reset, hw fence will be replaced here */
fence_put(job->fence);
job->fence = fence; job->fence = fence;
amdgpu_job_free(job);
return fence; return fence;
} }
const struct amd_sched_backend_ops amdgpu_sched_ops = { const struct amd_sched_backend_ops amdgpu_sched_ops = {
.dependency = amdgpu_job_dependency, .dependency = amdgpu_job_dependency,
.run_job = amdgpu_job_run, .run_job = amdgpu_job_run,
.begin_job = amd_sched_job_begin, .timedout_job = amdgpu_job_timedout,
.finish_job = amd_sched_job_finish, .free_job = amdgpu_job_free_cb
}; };
...@@ -142,6 +142,65 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) ...@@ -142,6 +142,65 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
return r; return r;
} }
static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
struct drm_amdgpu_query_fw *query_fw,
struct amdgpu_device *adev)
{
switch (query_fw->fw_type) {
case AMDGPU_INFO_FW_VCE:
fw_info->ver = adev->vce.fw_version;
fw_info->feature = adev->vce.fb_version;
break;
case AMDGPU_INFO_FW_UVD:
fw_info->ver = adev->uvd.fw_version;
fw_info->feature = 0;
break;
case AMDGPU_INFO_FW_GMC:
fw_info->ver = adev->mc.fw_version;
fw_info->feature = 0;
break;
case AMDGPU_INFO_FW_GFX_ME:
fw_info->ver = adev->gfx.me_fw_version;
fw_info->feature = adev->gfx.me_feature_version;
break;
case AMDGPU_INFO_FW_GFX_PFP:
fw_info->ver = adev->gfx.pfp_fw_version;
fw_info->feature = adev->gfx.pfp_feature_version;
break;
case AMDGPU_INFO_FW_GFX_CE:
fw_info->ver = adev->gfx.ce_fw_version;
fw_info->feature = adev->gfx.ce_feature_version;
break;
case AMDGPU_INFO_FW_GFX_RLC:
fw_info->ver = adev->gfx.rlc_fw_version;
fw_info->feature = adev->gfx.rlc_feature_version;
break;
case AMDGPU_INFO_FW_GFX_MEC:
if (query_fw->index == 0) {
fw_info->ver = adev->gfx.mec_fw_version;
fw_info->feature = adev->gfx.mec_feature_version;
} else if (query_fw->index == 1) {
fw_info->ver = adev->gfx.mec2_fw_version;
fw_info->feature = adev->gfx.mec2_feature_version;
} else
return -EINVAL;
break;
case AMDGPU_INFO_FW_SMC:
fw_info->ver = adev->pm.fw_version;
fw_info->feature = 0;
break;
case AMDGPU_INFO_FW_SDMA:
if (query_fw->index >= adev->sdma.num_instances)
return -EINVAL;
fw_info->ver = adev->sdma.instance[query_fw->index].fw_version;
fw_info->feature = adev->sdma.instance[query_fw->index].feature_version;
break;
default:
return -EINVAL;
}
return 0;
}
/* /*
* Userspace get information ioctl * Userspace get information ioctl
*/ */
...@@ -288,67 +347,20 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file ...@@ -288,67 +347,20 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
return copy_to_user(out, &count, min(size, 4u)) ? -EFAULT : 0; return copy_to_user(out, &count, min(size, 4u)) ? -EFAULT : 0;
} }
case AMDGPU_INFO_TIMESTAMP: case AMDGPU_INFO_TIMESTAMP:
ui64 = amdgpu_asic_get_gpu_clock_counter(adev); ui64 = amdgpu_gfx_get_gpu_clock_counter(adev);
return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
case AMDGPU_INFO_FW_VERSION: { case AMDGPU_INFO_FW_VERSION: {
struct drm_amdgpu_info_firmware fw_info; struct drm_amdgpu_info_firmware fw_info;
int ret;
/* We only support one instance of each IP block right now. */ /* We only support one instance of each IP block right now. */
if (info->query_fw.ip_instance != 0) if (info->query_fw.ip_instance != 0)
return -EINVAL; return -EINVAL;
switch (info->query_fw.fw_type) { ret = amdgpu_firmware_info(&fw_info, &info->query_fw, adev);
case AMDGPU_INFO_FW_VCE: if (ret)
fw_info.ver = adev->vce.fw_version; return ret;
fw_info.feature = adev->vce.fb_version;
break;
case AMDGPU_INFO_FW_UVD:
fw_info.ver = adev->uvd.fw_version;
fw_info.feature = 0;
break;
case AMDGPU_INFO_FW_GMC:
fw_info.ver = adev->mc.fw_version;
fw_info.feature = 0;
break;
case AMDGPU_INFO_FW_GFX_ME:
fw_info.ver = adev->gfx.me_fw_version;
fw_info.feature = adev->gfx.me_feature_version;
break;
case AMDGPU_INFO_FW_GFX_PFP:
fw_info.ver = adev->gfx.pfp_fw_version;
fw_info.feature = adev->gfx.pfp_feature_version;
break;
case AMDGPU_INFO_FW_GFX_CE:
fw_info.ver = adev->gfx.ce_fw_version;
fw_info.feature = adev->gfx.ce_feature_version;
break;
case AMDGPU_INFO_FW_GFX_RLC:
fw_info.ver = adev->gfx.rlc_fw_version;
fw_info.feature = adev->gfx.rlc_feature_version;
break;
case AMDGPU_INFO_FW_GFX_MEC:
if (info->query_fw.index == 0) {
fw_info.ver = adev->gfx.mec_fw_version;
fw_info.feature = adev->gfx.mec_feature_version;
} else if (info->query_fw.index == 1) {
fw_info.ver = adev->gfx.mec2_fw_version;
fw_info.feature = adev->gfx.mec2_feature_version;
} else
return -EINVAL;
break;
case AMDGPU_INFO_FW_SMC:
fw_info.ver = adev->pm.fw_version;
fw_info.feature = 0;
break;
case AMDGPU_INFO_FW_SDMA:
if (info->query_fw.index >= adev->sdma.num_instances)
return -EINVAL;
fw_info.ver = adev->sdma.instance[info->query_fw.index].fw_version;
fw_info.feature = adev->sdma.instance[info->query_fw.index].feature_version;
break;
default:
return -EINVAL;
}
return copy_to_user(out, &fw_info, return copy_to_user(out, &fw_info,
min((size_t)size, sizeof(fw_info))) ? -EFAULT : 0; min((size_t)size, sizeof(fw_info))) ? -EFAULT : 0;
} }
...@@ -756,3 +768,130 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { ...@@ -756,3 +768,130 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
}; };
const int amdgpu_max_kms_ioctl = ARRAY_SIZE(amdgpu_ioctls_kms); const int amdgpu_max_kms_ioctl = ARRAY_SIZE(amdgpu_ioctls_kms);
/*
* Debugfs info
*/
#if defined(CONFIG_DEBUG_FS)
static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = (struct drm_info_node *) m->private;
struct drm_device *dev = node->minor->dev;
struct amdgpu_device *adev = dev->dev_private;
struct drm_amdgpu_info_firmware fw_info;
struct drm_amdgpu_query_fw query_fw;
int ret, i;
/* VCE */
query_fw.fw_type = AMDGPU_INFO_FW_VCE;
ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
if (ret)
return ret;
seq_printf(m, "VCE feature version: %u, firmware version: 0x%08x\n",
fw_info.feature, fw_info.ver);
/* UVD */
query_fw.fw_type = AMDGPU_INFO_FW_UVD;
ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
if (ret)
return ret;
seq_printf(m, "UVD feature version: %u, firmware version: 0x%08x\n",
fw_info.feature, fw_info.ver);
/* GMC */
query_fw.fw_type = AMDGPU_INFO_FW_GMC;
ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
if (ret)
return ret;
seq_printf(m, "MC feature version: %u, firmware version: 0x%08x\n",
fw_info.feature, fw_info.ver);
/* ME */
query_fw.fw_type = AMDGPU_INFO_FW_GFX_ME;
ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
if (ret)
return ret;
seq_printf(m, "ME feature version: %u, firmware version: 0x%08x\n",
fw_info.feature, fw_info.ver);
/* PFP */
query_fw.fw_type = AMDGPU_INFO_FW_GFX_PFP;
ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
if (ret)
return ret;
seq_printf(m, "PFP feature version: %u, firmware version: 0x%08x\n",
fw_info.feature, fw_info.ver);
/* CE */
query_fw.fw_type = AMDGPU_INFO_FW_GFX_CE;
ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
if (ret)
return ret;
seq_printf(m, "CE feature version: %u, firmware version: 0x%08x\n",
fw_info.feature, fw_info.ver);
/* RLC */
query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC;
ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
if (ret)
return ret;
seq_printf(m, "RLC feature version: %u, firmware version: 0x%08x\n",
fw_info.feature, fw_info.ver);
/* MEC */
query_fw.fw_type = AMDGPU_INFO_FW_GFX_MEC;
query_fw.index = 0;
ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
if (ret)
return ret;
seq_printf(m, "MEC feature version: %u, firmware version: 0x%08x\n",
fw_info.feature, fw_info.ver);
/* MEC2 */
if (adev->asic_type == CHIP_KAVERI ||
(adev->asic_type > CHIP_TOPAZ && adev->asic_type != CHIP_STONEY)) {
query_fw.index = 1;
ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
if (ret)
return ret;
seq_printf(m, "MEC2 feature version: %u, firmware version: 0x%08x\n",
fw_info.feature, fw_info.ver);
}
/* SMC */
query_fw.fw_type = AMDGPU_INFO_FW_SMC;
ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
if (ret)
return ret;
seq_printf(m, "SMC feature version: %u, firmware version: 0x%08x\n",
fw_info.feature, fw_info.ver);
/* SDMA */
query_fw.fw_type = AMDGPU_INFO_FW_SDMA;
for (i = 0; i < adev->sdma.num_instances; i++) {
query_fw.index = i;
ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
if (ret)
return ret;
seq_printf(m, "SDMA%d feature version: %u, firmware version: 0x%08x\n",
i, fw_info.feature, fw_info.ver);
}
return 0;
}
static const struct drm_info_list amdgpu_firmware_info_list[] = {
{"amdgpu_firmware_info", amdgpu_debugfs_firmware_info, 0, NULL},
};
#endif
int amdgpu_debugfs_firmware_init(struct amdgpu_device *adev)
{
#if defined(CONFIG_DEBUG_FS)
return amdgpu_debugfs_add_files(adev, amdgpu_firmware_info_list,
ARRAY_SIZE(amdgpu_firmware_info_list));
#else
return 0;
#endif
}
...@@ -589,6 +589,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, ...@@ -589,6 +589,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
struct ttm_mem_reg *new_mem) struct ttm_mem_reg *new_mem)
{ {
struct amdgpu_bo *rbo; struct amdgpu_bo *rbo;
struct ttm_mem_reg *old_mem = &bo->mem;
if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) if (!amdgpu_ttm_bo_is_amdgpu_bo(bo))
return; return;
...@@ -602,6 +603,8 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, ...@@ -602,6 +603,8 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
/* move_notify is called before move happens */ /* move_notify is called before move happens */
amdgpu_update_memory_usage(rbo->adev, &bo->mem, new_mem); amdgpu_update_memory_usage(rbo->adev, &bo->mem, new_mem);
trace_amdgpu_ttm_bo_move(rbo, new_mem->mem_type, old_mem->mem_type);
} }
int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
......
...@@ -347,6 +347,8 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev, ...@@ -347,6 +347,8 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
if (adev->pp_enabled) if (adev->pp_enabled)
size = amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf); size = amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf);
else if (adev->pm.funcs->print_clock_levels)
size = adev->pm.funcs->print_clock_levels(adev, PP_SCLK, buf);
return size; return size;
} }
...@@ -363,7 +365,9 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, ...@@ -363,7 +365,9 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
uint32_t i, mask = 0; uint32_t i, mask = 0;
char sub_str[2]; char sub_str[2];
for (i = 0; i < strlen(buf) - 1; i++) { for (i = 0; i < strlen(buf); i++) {
if (*(buf + i) == '\n')
continue;
sub_str[0] = *(buf + i); sub_str[0] = *(buf + i);
sub_str[1] = '\0'; sub_str[1] = '\0';
ret = kstrtol(sub_str, 0, &level); ret = kstrtol(sub_str, 0, &level);
...@@ -377,6 +381,8 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, ...@@ -377,6 +381,8 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
if (adev->pp_enabled) if (adev->pp_enabled)
amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask); amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask);
else if (adev->pm.funcs->force_clock_level)
adev->pm.funcs->force_clock_level(adev, PP_SCLK, mask);
fail: fail:
return count; return count;
} }
...@@ -391,6 +397,8 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev, ...@@ -391,6 +397,8 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev,
if (adev->pp_enabled) if (adev->pp_enabled)
size = amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf); size = amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf);
else if (adev->pm.funcs->print_clock_levels)
size = adev->pm.funcs->print_clock_levels(adev, PP_MCLK, buf);
return size; return size;
} }
...@@ -407,7 +415,9 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, ...@@ -407,7 +415,9 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
uint32_t i, mask = 0; uint32_t i, mask = 0;
char sub_str[2]; char sub_str[2];
for (i = 0; i < strlen(buf) - 1; i++) { for (i = 0; i < strlen(buf); i++) {
if (*(buf + i) == '\n')
continue;
sub_str[0] = *(buf + i); sub_str[0] = *(buf + i);
sub_str[1] = '\0'; sub_str[1] = '\0';
ret = kstrtol(sub_str, 0, &level); ret = kstrtol(sub_str, 0, &level);
...@@ -421,6 +431,8 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, ...@@ -421,6 +431,8 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
if (adev->pp_enabled) if (adev->pp_enabled)
amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask); amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask);
else if (adev->pm.funcs->force_clock_level)
adev->pm.funcs->force_clock_level(adev, PP_MCLK, mask);
fail: fail:
return count; return count;
} }
...@@ -435,6 +447,8 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev, ...@@ -435,6 +447,8 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev,
if (adev->pp_enabled) if (adev->pp_enabled)
size = amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf); size = amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf);
else if (adev->pm.funcs->print_clock_levels)
size = adev->pm.funcs->print_clock_levels(adev, PP_PCIE, buf);
return size; return size;
} }
...@@ -451,7 +465,9 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, ...@@ -451,7 +465,9 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
uint32_t i, mask = 0; uint32_t i, mask = 0;
char sub_str[2]; char sub_str[2];
for (i = 0; i < strlen(buf) - 1; i++) { for (i = 0; i < strlen(buf); i++) {
if (*(buf + i) == '\n')
continue;
sub_str[0] = *(buf + i); sub_str[0] = *(buf + i);
sub_str[1] = '\0'; sub_str[1] = '\0';
ret = kstrtol(sub_str, 0, &level); ret = kstrtol(sub_str, 0, &level);
...@@ -465,6 +481,100 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, ...@@ -465,6 +481,100 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
if (adev->pp_enabled) if (adev->pp_enabled)
amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask); amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask);
else if (adev->pm.funcs->force_clock_level)
adev->pm.funcs->force_clock_level(adev, PP_PCIE, mask);
fail:
return count;
}
static ssize_t amdgpu_get_pp_sclk_od(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
uint32_t value = 0;
if (adev->pp_enabled)
value = amdgpu_dpm_get_sclk_od(adev);
else if (adev->pm.funcs->get_sclk_od)
value = adev->pm.funcs->get_sclk_od(adev);
return snprintf(buf, PAGE_SIZE, "%d\n", value);
}
static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
int ret;
long int value;
ret = kstrtol(buf, 0, &value);
if (ret) {
count = -EINVAL;
goto fail;
}
if (adev->pp_enabled) {
amdgpu_dpm_set_sclk_od(adev, (uint32_t)value);
amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_READJUST_POWER_STATE, NULL, NULL);
} else if (adev->pm.funcs->set_sclk_od) {
adev->pm.funcs->set_sclk_od(adev, (uint32_t)value);
adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps;
amdgpu_pm_compute_clocks(adev);
}
fail:
return count;
}
static ssize_t amdgpu_get_pp_mclk_od(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
uint32_t value = 0;
if (adev->pp_enabled)
value = amdgpu_dpm_get_mclk_od(adev);
else if (adev->pm.funcs->get_mclk_od)
value = adev->pm.funcs->get_mclk_od(adev);
return snprintf(buf, PAGE_SIZE, "%d\n", value);
}
static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
int ret;
long int value;
ret = kstrtol(buf, 0, &value);
if (ret) {
count = -EINVAL;
goto fail;
}
if (adev->pp_enabled) {
amdgpu_dpm_set_mclk_od(adev, (uint32_t)value);
amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_READJUST_POWER_STATE, NULL, NULL);
} else if (adev->pm.funcs->set_mclk_od) {
adev->pm.funcs->set_mclk_od(adev, (uint32_t)value);
adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps;
amdgpu_pm_compute_clocks(adev);
}
fail: fail:
return count; return count;
} }
...@@ -490,6 +600,12 @@ static DEVICE_ATTR(pp_dpm_mclk, S_IRUGO | S_IWUSR, ...@@ -490,6 +600,12 @@ static DEVICE_ATTR(pp_dpm_mclk, S_IRUGO | S_IWUSR,
static DEVICE_ATTR(pp_dpm_pcie, S_IRUGO | S_IWUSR, static DEVICE_ATTR(pp_dpm_pcie, S_IRUGO | S_IWUSR,
amdgpu_get_pp_dpm_pcie, amdgpu_get_pp_dpm_pcie,
amdgpu_set_pp_dpm_pcie); amdgpu_set_pp_dpm_pcie);
static DEVICE_ATTR(pp_sclk_od, S_IRUGO | S_IWUSR,
amdgpu_get_pp_sclk_od,
amdgpu_set_pp_sclk_od);
static DEVICE_ATTR(pp_mclk_od, S_IRUGO | S_IWUSR,
amdgpu_get_pp_mclk_od,
amdgpu_set_pp_mclk_od);
static ssize_t amdgpu_hwmon_show_temp(struct device *dev, static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
struct device_attribute *attr, struct device_attribute *attr,
...@@ -1108,6 +1224,8 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) ...@@ -1108,6 +1224,8 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
DRM_ERROR("failed to create device file pp_table\n"); DRM_ERROR("failed to create device file pp_table\n");
return ret; return ret;
} }
}
ret = device_create_file(adev->dev, &dev_attr_pp_dpm_sclk); ret = device_create_file(adev->dev, &dev_attr_pp_dpm_sclk);
if (ret) { if (ret) {
DRM_ERROR("failed to create device file pp_dpm_sclk\n"); DRM_ERROR("failed to create device file pp_dpm_sclk\n");
...@@ -1123,7 +1241,17 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) ...@@ -1123,7 +1241,17 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
DRM_ERROR("failed to create device file pp_dpm_pcie\n"); DRM_ERROR("failed to create device file pp_dpm_pcie\n");
return ret; return ret;
} }
ret = device_create_file(adev->dev, &dev_attr_pp_sclk_od);
if (ret) {
DRM_ERROR("failed to create device file pp_sclk_od\n");
return ret;
} }
ret = device_create_file(adev->dev, &dev_attr_pp_mclk_od);
if (ret) {
DRM_ERROR("failed to create device file pp_mclk_od\n");
return ret;
}
ret = amdgpu_debugfs_pm_init(adev); ret = amdgpu_debugfs_pm_init(adev);
if (ret) { if (ret) {
DRM_ERROR("Failed to register debugfs file for dpm!\n"); DRM_ERROR("Failed to register debugfs file for dpm!\n");
...@@ -1146,10 +1274,12 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) ...@@ -1146,10 +1274,12 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
device_remove_file(adev->dev, &dev_attr_pp_cur_state); device_remove_file(adev->dev, &dev_attr_pp_cur_state);
device_remove_file(adev->dev, &dev_attr_pp_force_state); device_remove_file(adev->dev, &dev_attr_pp_force_state);
device_remove_file(adev->dev, &dev_attr_pp_table); device_remove_file(adev->dev, &dev_attr_pp_table);
}
device_remove_file(adev->dev, &dev_attr_pp_dpm_sclk); device_remove_file(adev->dev, &dev_attr_pp_dpm_sclk);
device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk); device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk);
device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie); device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie);
} device_remove_file(adev->dev, &dev_attr_pp_sclk_od);
device_remove_file(adev->dev, &dev_attr_pp_mclk_od);
} }
void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
......
...@@ -52,6 +52,7 @@ static int amdgpu_powerplay_init(struct amdgpu_device *adev) ...@@ -52,6 +52,7 @@ static int amdgpu_powerplay_init(struct amdgpu_device *adev)
pp_init->chip_family = adev->family; pp_init->chip_family = adev->family;
pp_init->chip_id = adev->asic_type; pp_init->chip_id = adev->asic_type;
pp_init->device = amdgpu_cgs_create_device(adev); pp_init->device = amdgpu_cgs_create_device(adev);
pp_init->powercontainment_enabled = amdgpu_powercontainment;
ret = amd_powerplay_init(pp_init, amd_pp); ret = amd_powerplay_init(pp_init, amd_pp);
kfree(pp_init); kfree(pp_init);
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
*/ */
#include <linux/seq_file.h> #include <linux/seq_file.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/debugfs.h>
#include <drm/drmP.h> #include <drm/drmP.h>
#include <drm/amdgpu_drm.h> #include <drm/amdgpu_drm.h>
#include "amdgpu.h" #include "amdgpu.h"
...@@ -48,6 +49,7 @@ ...@@ -48,6 +49,7 @@
*/ */
static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
struct amdgpu_ring *ring); struct amdgpu_ring *ring);
static void amdgpu_debugfs_ring_fini(struct amdgpu_ring *ring);
/** /**
* amdgpu_ring_alloc - allocate space on the ring buffer * amdgpu_ring_alloc - allocate space on the ring buffer
...@@ -139,78 +141,6 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring) ...@@ -139,78 +141,6 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring)
ring->wptr = ring->wptr_old; ring->wptr = ring->wptr_old;
} }
/**
* amdgpu_ring_backup - Back up the content of a ring
*
* @ring: the ring we want to back up
*
* Saves all unprocessed commits from a ring, returns the number of dwords saved.
*/
unsigned amdgpu_ring_backup(struct amdgpu_ring *ring,
uint32_t **data)
{
unsigned size, ptr, i;
*data = NULL;
if (ring->ring_obj == NULL)
return 0;
/* it doesn't make sense to save anything if all fences are signaled */
if (!amdgpu_fence_count_emitted(ring))
return 0;
ptr = le32_to_cpu(*ring->next_rptr_cpu_addr);
size = ring->wptr + (ring->ring_size / 4);
size -= ptr;
size &= ring->ptr_mask;
if (size == 0)
return 0;
/* and then save the content of the ring */
*data = kmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
if (!*data)
return 0;
for (i = 0; i < size; ++i) {
(*data)[i] = ring->ring[ptr++];
ptr &= ring->ptr_mask;
}
return size;
}
/**
* amdgpu_ring_restore - append saved commands to the ring again
*
* @ring: ring to append commands to
* @size: number of dwords we want to write
* @data: saved commands
*
* Allocates space on the ring and restore the previously saved commands.
*/
int amdgpu_ring_restore(struct amdgpu_ring *ring,
unsigned size, uint32_t *data)
{
int i, r;
if (!size || !data)
return 0;
/* restore the saved ring content */
r = amdgpu_ring_alloc(ring, size);
if (r)
return r;
for (i = 0; i < size; ++i) {
amdgpu_ring_write(ring, data[i]);
}
amdgpu_ring_commit(ring);
kfree(data);
return 0;
}
/** /**
* amdgpu_ring_init - init driver ring struct. * amdgpu_ring_init - init driver ring struct.
* *
...@@ -260,14 +190,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, ...@@ -260,14 +190,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
return r; return r;
} }
r = amdgpu_wb_get(adev, &ring->next_rptr_offs);
if (r) {
dev_err(adev->dev, "(%d) ring next_rptr wb alloc failed\n", r);
return r;
}
ring->next_rptr_gpu_addr = adev->wb.gpu_addr + ring->next_rptr_offs * 4;
ring->next_rptr_cpu_addr = &adev->wb.wb[ring->next_rptr_offs];
r = amdgpu_wb_get(adev, &ring->cond_exe_offs); r = amdgpu_wb_get(adev, &ring->cond_exe_offs);
if (r) { if (r) {
dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r); dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r);
...@@ -310,6 +232,9 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, ...@@ -310,6 +232,9 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
} }
r = amdgpu_bo_kmap(ring->ring_obj, r = amdgpu_bo_kmap(ring->ring_obj,
(void **)&ring->ring); (void **)&ring->ring);
memset((void *)ring->ring, 0, ring->ring_size);
amdgpu_bo_unreserve(ring->ring_obj); amdgpu_bo_unreserve(ring->ring_obj);
if (r) { if (r) {
dev_err(adev->dev, "(%d) ring map failed\n", r); dev_err(adev->dev, "(%d) ring map failed\n", r);
...@@ -347,7 +272,6 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) ...@@ -347,7 +272,6 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
amdgpu_wb_free(ring->adev, ring->fence_offs); amdgpu_wb_free(ring->adev, ring->fence_offs);
amdgpu_wb_free(ring->adev, ring->rptr_offs); amdgpu_wb_free(ring->adev, ring->rptr_offs);
amdgpu_wb_free(ring->adev, ring->wptr_offs); amdgpu_wb_free(ring->adev, ring->wptr_offs);
amdgpu_wb_free(ring->adev, ring->next_rptr_offs);
if (ring_obj) { if (ring_obj) {
r = amdgpu_bo_reserve(ring_obj, false); r = amdgpu_bo_reserve(ring_obj, false);
...@@ -358,6 +282,7 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) ...@@ -358,6 +282,7 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
} }
amdgpu_bo_unref(&ring_obj); amdgpu_bo_unref(&ring_obj);
} }
amdgpu_debugfs_ring_fini(ring);
} }
/* /*
...@@ -365,57 +290,62 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) ...@@ -365,57 +290,62 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
*/ */
#if defined(CONFIG_DEBUG_FS) #if defined(CONFIG_DEBUG_FS)
static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data) /* Layout of file is 12 bytes consisting of
* - rptr
* - wptr
* - driver's copy of wptr
*
* followed by n-words of ring data
*/
static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{ {
struct drm_info_node *node = (struct drm_info_node *) m->private; struct amdgpu_ring *ring = (struct amdgpu_ring*)f->f_inode->i_private;
struct drm_device *dev = node->minor->dev; int r, i;
struct amdgpu_device *adev = dev->dev_private; uint32_t value, result, early[3];
int roffset = (unsigned long)node->info_ent->data;
struct amdgpu_ring *ring = (void *)(((uint8_t*)adev) + roffset);
uint32_t rptr, wptr, rptr_next;
unsigned i;
wptr = amdgpu_ring_get_wptr(ring);
seq_printf(m, "wptr: 0x%08x [%5d]\n", wptr, wptr);
rptr = amdgpu_ring_get_rptr(ring); if (*pos & 3 || size & 3)
rptr_next = le32_to_cpu(*ring->next_rptr_cpu_addr); return -EINVAL;
seq_printf(m, "rptr: 0x%08x [%5d]\n", rptr, rptr); result = 0;
seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n", if (*pos < 12) {
ring->wptr, ring->wptr); early[0] = amdgpu_ring_get_rptr(ring);
early[1] = amdgpu_ring_get_wptr(ring);
early[2] = ring->wptr;
for (i = *pos / 4; i < 3 && size; i++) {
r = put_user(early[i], (uint32_t *)buf);
if (r)
return r;
buf += 4;
result += 4;
size -= 4;
*pos += 4;
}
}
if (!ring->ready) while (size) {
return 0; if (*pos >= (ring->ring_size + 12))
return result;
/* print 8 dw before current rptr as often it's the last executed value = ring->ring[(*pos - 12)/4];
* packet that is the root issue r = put_user(value, (uint32_t*)buf);
*/ if (r)
i = (rptr + ring->ptr_mask + 1 - 32) & ring->ptr_mask; return r;
while (i != rptr) { buf += 4;
seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]); result += 4;
if (i == rptr) size -= 4;
seq_puts(m, " *"); *pos += 4;
if (i == rptr_next)
seq_puts(m, " #");
seq_puts(m, "\n");
i = (i + 1) & ring->ptr_mask;
}
while (i != wptr) {
seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]);
if (i == rptr)
seq_puts(m, " *");
if (i == rptr_next)
seq_puts(m, " #");
seq_puts(m, "\n");
i = (i + 1) & ring->ptr_mask;
} }
return 0;
return result;
} }
static struct drm_info_list amdgpu_debugfs_ring_info_list[AMDGPU_MAX_RINGS]; static const struct file_operations amdgpu_debugfs_ring_fops = {
static char amdgpu_debugfs_ring_names[AMDGPU_MAX_RINGS][32]; .owner = THIS_MODULE,
.read = amdgpu_debugfs_ring_read,
.llseek = default_llseek
};
#endif #endif
...@@ -423,28 +353,27 @@ static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, ...@@ -423,28 +353,27 @@ static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
struct amdgpu_ring *ring) struct amdgpu_ring *ring)
{ {
#if defined(CONFIG_DEBUG_FS) #if defined(CONFIG_DEBUG_FS)
unsigned offset = (uint8_t*)ring - (uint8_t*)adev; struct drm_minor *minor = adev->ddev->primary;
unsigned i; struct dentry *ent, *root = minor->debugfs_root;
struct drm_info_list *info; char name[32];
char *name;
for (i = 0; i < ARRAY_SIZE(amdgpu_debugfs_ring_info_list); ++i) {
info = &amdgpu_debugfs_ring_info_list[i];
if (!info->data)
break;
}
if (i == ARRAY_SIZE(amdgpu_debugfs_ring_info_list))
return -ENOSPC;
name = &amdgpu_debugfs_ring_names[i][0];
sprintf(name, "amdgpu_ring_%s", ring->name); sprintf(name, "amdgpu_ring_%s", ring->name);
info->name = name;
info->show = amdgpu_debugfs_ring_info;
info->driver_features = 0;
info->data = (void*)(uintptr_t)offset;
return amdgpu_debugfs_add_files(adev, info, 1); ent = debugfs_create_file(name,
S_IFREG | S_IRUGO, root,
ring, &amdgpu_debugfs_ring_fops);
if (IS_ERR(ent))
return PTR_ERR(ent);
i_size_write(ent->d_inode, ring->ring_size + 12);
ring->ent = ent;
#endif #endif
return 0; return 0;
} }
static void amdgpu_debugfs_ring_fini(struct amdgpu_ring *ring)
{
#if defined(CONFIG_DEBUG_FS)
debugfs_remove(ring->ent);
#endif
}
...@@ -223,13 +223,16 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, ...@@ -223,13 +223,16 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
} }
/** /**
* amdgpu_sync_is_idle - test if all fences are signaled * amdgpu_sync_peek_fence - get the next fence not signaled yet
* *
* @sync: the sync object * @sync: the sync object
* @ring: optional ring to use for test
* *
* Returns true if all fences in the sync object are signaled. * Returns the next fence not signaled yet without removing it from the sync
* object.
*/ */
bool amdgpu_sync_is_idle(struct amdgpu_sync *sync) struct fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
struct amdgpu_ring *ring)
{ {
struct amdgpu_sync_entry *e; struct amdgpu_sync_entry *e;
struct hlist_node *tmp; struct hlist_node *tmp;
...@@ -237,6 +240,19 @@ bool amdgpu_sync_is_idle(struct amdgpu_sync *sync) ...@@ -237,6 +240,19 @@ bool amdgpu_sync_is_idle(struct amdgpu_sync *sync)
hash_for_each_safe(sync->fences, i, tmp, e, node) { hash_for_each_safe(sync->fences, i, tmp, e, node) {
struct fence *f = e->fence; struct fence *f = e->fence;
struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
if (ring && s_fence) {
/* For fences from the same ring it is sufficient
* when they are scheduled.
*/
if (s_fence->sched == &ring->sched) {
if (fence_is_signaled(&s_fence->scheduled))
continue;
return &s_fence->scheduled;
}
}
if (fence_is_signaled(f)) { if (fence_is_signaled(f)) {
hash_del(&e->node); hash_del(&e->node);
...@@ -245,58 +261,19 @@ bool amdgpu_sync_is_idle(struct amdgpu_sync *sync) ...@@ -245,58 +261,19 @@ bool amdgpu_sync_is_idle(struct amdgpu_sync *sync)
continue; continue;
} }
return false; return f;
} }
return true; return NULL;
} }
/** /**
* amdgpu_sync_cycle_fences - move fences from one sync object into another * amdgpu_sync_get_fence - get the next fence from the sync object
* *
* @dst: the destination sync object * @sync: sync object to use
* @src: the source sync object
* @fence: fence to add to source
* *
* Remove all fences from source and put them into destination and add * Get and removes the next fence from the sync object not signaled yet.
* fence as new one into source.
*/ */
int amdgpu_sync_cycle_fences(struct amdgpu_sync *dst, struct amdgpu_sync *src,
struct fence *fence)
{
struct amdgpu_sync_entry *e, *newone;
struct hlist_node *tmp;
int i;
/* Allocate the new entry before moving the old ones */
newone = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
if (!newone)
return -ENOMEM;
hash_for_each_safe(src->fences, i, tmp, e, node) {
struct fence *f = e->fence;
hash_del(&e->node);
if (fence_is_signaled(f)) {
fence_put(f);
kmem_cache_free(amdgpu_sync_slab, e);
continue;
}
if (amdgpu_sync_add_later(dst, f)) {
kmem_cache_free(amdgpu_sync_slab, e);
continue;
}
hash_add(dst->fences, &e->node, f->context);
}
hash_add(src->fences, &newone->node, fence->context);
newone->fence = fence_get(fence);
return 0;
}
struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync) struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
{ {
struct amdgpu_sync_entry *e; struct amdgpu_sync_entry *e;
...@@ -319,25 +296,6 @@ struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync) ...@@ -319,25 +296,6 @@ struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
return NULL; return NULL;
} }
int amdgpu_sync_wait(struct amdgpu_sync *sync)
{
struct amdgpu_sync_entry *e;
struct hlist_node *tmp;
int i, r;
hash_for_each_safe(sync->fences, i, tmp, e, node) {
r = fence_wait(e->fence, false);
if (r)
return r;
hash_del(&e->node);
fence_put(e->fence);
kmem_cache_free(amdgpu_sync_slab, e);
}
return 0;
}
/** /**
* amdgpu_sync_free - free the sync object * amdgpu_sync_free - free the sync object
* *
......
...@@ -11,19 +11,68 @@ ...@@ -11,19 +11,68 @@
#define TRACE_SYSTEM amdgpu #define TRACE_SYSTEM amdgpu
#define TRACE_INCLUDE_FILE amdgpu_trace #define TRACE_INCLUDE_FILE amdgpu_trace
TRACE_EVENT(amdgpu_mm_rreg,
TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
TP_ARGS(did, reg, value),
TP_STRUCT__entry(
__field(unsigned, did)
__field(uint32_t, reg)
__field(uint32_t, value)
),
TP_fast_assign(
__entry->did = did;
__entry->reg = reg;
__entry->value = value;
),
TP_printk("0x%04lx, 0x%04lx, 0x%08lx",
(unsigned long)__entry->did,
(unsigned long)__entry->reg,
(unsigned long)__entry->value)
);
TRACE_EVENT(amdgpu_mm_wreg,
TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
TP_ARGS(did, reg, value),
TP_STRUCT__entry(
__field(unsigned, did)
__field(uint32_t, reg)
__field(uint32_t, value)
),
TP_fast_assign(
__entry->did = did;
__entry->reg = reg;
__entry->value = value;
),
TP_printk("0x%04lx, 0x%04lx, 0x%08lx",
(unsigned long)__entry->did,
(unsigned long)__entry->reg,
(unsigned long)__entry->value)
);
TRACE_EVENT(amdgpu_bo_create, TRACE_EVENT(amdgpu_bo_create,
TP_PROTO(struct amdgpu_bo *bo), TP_PROTO(struct amdgpu_bo *bo),
TP_ARGS(bo), TP_ARGS(bo),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(struct amdgpu_bo *, bo) __field(struct amdgpu_bo *, bo)
__field(u32, pages) __field(u32, pages)
__field(u32, type)
__field(u32, prefer)
__field(u32, allow)
__field(u32, visible)
), ),
TP_fast_assign( TP_fast_assign(
__entry->bo = bo; __entry->bo = bo;
__entry->pages = bo->tbo.num_pages; __entry->pages = bo->tbo.num_pages;
__entry->type = bo->tbo.mem.mem_type;
__entry->prefer = bo->prefered_domains;
__entry->allow = bo->allowed_domains;
__entry->visible = bo->flags;
), ),
TP_printk("bo=%p, pages=%u", __entry->bo, __entry->pages)
TP_printk("bo=%p,pages=%u,type=%d,prefered=%d,allowed=%d,visible=%d",
__entry->bo, __entry->pages, __entry->type,
__entry->prefer, __entry->allow, __entry->visible)
); );
TRACE_EVENT(amdgpu_cs, TRACE_EVENT(amdgpu_cs,
...@@ -64,7 +113,7 @@ TRACE_EVENT(amdgpu_cs_ioctl, ...@@ -64,7 +113,7 @@ TRACE_EVENT(amdgpu_cs_ioctl,
__entry->adev = job->adev; __entry->adev = job->adev;
__entry->sched_job = &job->base; __entry->sched_job = &job->base;
__entry->ib = job->ibs; __entry->ib = job->ibs;
__entry->fence = &job->base.s_fence->base; __entry->fence = &job->base.s_fence->finished;
__entry->ring_name = job->ring->name; __entry->ring_name = job->ring->name;
__entry->num_ibs = job->num_ibs; __entry->num_ibs = job->num_ibs;
), ),
...@@ -89,7 +138,7 @@ TRACE_EVENT(amdgpu_sched_run_job, ...@@ -89,7 +138,7 @@ TRACE_EVENT(amdgpu_sched_run_job,
__entry->adev = job->adev; __entry->adev = job->adev;
__entry->sched_job = &job->base; __entry->sched_job = &job->base;
__entry->ib = job->ibs; __entry->ib = job->ibs;
__entry->fence = &job->base.s_fence->base; __entry->fence = &job->base.s_fence->finished;
__entry->ring_name = job->ring->name; __entry->ring_name = job->ring->name;
__entry->num_ibs = job->num_ibs; __entry->num_ibs = job->num_ibs;
), ),
...@@ -244,13 +293,55 @@ TRACE_EVENT(amdgpu_bo_list_set, ...@@ -244,13 +293,55 @@ TRACE_EVENT(amdgpu_bo_list_set,
TP_STRUCT__entry( TP_STRUCT__entry(
__field(struct amdgpu_bo_list *, list) __field(struct amdgpu_bo_list *, list)
__field(struct amdgpu_bo *, bo) __field(struct amdgpu_bo *, bo)
__field(u64, bo_size)
), ),
TP_fast_assign( TP_fast_assign(
__entry->list = list; __entry->list = list;
__entry->bo = bo; __entry->bo = bo;
__entry->bo_size = amdgpu_bo_size(bo);
),
TP_printk("list=%p, bo=%p, bo_size = %Ld",
__entry->list,
__entry->bo,
__entry->bo_size)
);
TRACE_EVENT(amdgpu_cs_bo_status,
TP_PROTO(uint64_t total_bo, uint64_t total_size),
TP_ARGS(total_bo, total_size),
TP_STRUCT__entry(
__field(u64, total_bo)
__field(u64, total_size)
),
TP_fast_assign(
__entry->total_bo = total_bo;
__entry->total_size = total_size;
),
TP_printk("total bo size = %Ld, total bo count = %Ld",
__entry->total_bo, __entry->total_size)
);
TRACE_EVENT(amdgpu_ttm_bo_move,
TP_PROTO(struct amdgpu_bo* bo, uint32_t new_placement, uint32_t old_placement),
TP_ARGS(bo, new_placement, old_placement),
TP_STRUCT__entry(
__field(struct amdgpu_bo *, bo)
__field(u64, bo_size)
__field(u32, new_placement)
__field(u32, old_placement)
),
TP_fast_assign(
__entry->bo = bo;
__entry->bo_size = amdgpu_bo_size(bo);
__entry->new_placement = new_placement;
__entry->old_placement = old_placement;
), ),
TP_printk("list=%p, bo=%p", __entry->list, __entry->bo) TP_printk("bo=%p from:%d to %d with size = %Ld",
__entry->bo, __entry->old_placement,
__entry->new_placement, __entry->bo_size)
); );
#endif #endif
......
...@@ -286,9 +286,10 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, ...@@ -286,9 +286,10 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
r = amdgpu_copy_buffer(ring, old_start, new_start, r = amdgpu_copy_buffer(ring, old_start, new_start,
new_mem->num_pages * PAGE_SIZE, /* bytes */ new_mem->num_pages * PAGE_SIZE, /* bytes */
bo->resv, &fence); bo->resv, &fence);
/* FIXME: handle copy error */ if (r)
r = ttm_bo_move_accel_cleanup(bo, fence, return r;
evict, no_wait_gpu, new_mem);
r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
fence_put(fence); fence_put(fence);
return r; return r;
} }
...@@ -396,6 +397,11 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, ...@@ -396,6 +397,11 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo,
return -EINVAL; return -EINVAL;
adev = amdgpu_get_adev(bo->bdev); adev = amdgpu_get_adev(bo->bdev);
/* remember the eviction */
if (evict)
atomic64_inc(&adev->num_evictions);
if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) { if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
amdgpu_move_null(bo, new_mem); amdgpu_move_null(bo, new_mem);
return 0; return 0;
...@@ -429,7 +435,8 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, ...@@ -429,7 +435,8 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo,
if (r) { if (r) {
memcpy: memcpy:
r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem); r = ttm_bo_move_memcpy(bo, evict, interruptible,
no_wait_gpu, new_mem);
if (r) { if (r) {
return r; return r;
} }
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
* Alex Deucher * Alex Deucher
* Jerome Glisse * Jerome Glisse
*/ */
#include <linux/fence-array.h>
#include <drm/drmP.h> #include <drm/drmP.h>
#include <drm/amdgpu_drm.h> #include <drm/amdgpu_drm.h>
#include "amdgpu.h" #include "amdgpu.h"
...@@ -114,16 +115,26 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, ...@@ -114,16 +115,26 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
/** /**
* amdgpu_vm_get_bos - add the vm BOs to a duplicates list * amdgpu_vm_get_bos - add the vm BOs to a duplicates list
* *
* @adev: amdgpu device pointer
* @vm: vm providing the BOs * @vm: vm providing the BOs
* @duplicates: head of duplicates list * @duplicates: head of duplicates list
* *
* Add the page directory to the BO duplicates list * Add the page directory to the BO duplicates list
* for command submission. * for command submission.
*/ */
void amdgpu_vm_get_pt_bos(struct amdgpu_vm *vm, struct list_head *duplicates) void amdgpu_vm_get_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct list_head *duplicates)
{ {
uint64_t num_evictions;
unsigned i; unsigned i;
/* We only need to validate the page tables
* if they aren't already valid.
*/
num_evictions = atomic64_read(&adev->num_evictions);
if (num_evictions == vm->last_eviction_counter)
return;
/* add the vm page table to the list */ /* add the vm page table to the list */
for (i = 0; i <= vm->max_pde_used; ++i) { for (i = 0; i <= vm->max_pde_used; ++i) {
struct amdgpu_bo_list_entry *entry = &vm->page_tables[i].entry; struct amdgpu_bo_list_entry *entry = &vm->page_tables[i].entry;
...@@ -162,6 +173,13 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, ...@@ -162,6 +173,13 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
spin_unlock(&glob->lru_lock); spin_unlock(&glob->lru_lock);
} }
static bool amdgpu_vm_is_gpu_reset(struct amdgpu_device *adev,
struct amdgpu_vm_id *id)
{
return id->current_gpu_reset_count !=
atomic_read(&adev->gpu_reset_counter) ? true : false;
}
/** /**
* amdgpu_vm_grab_id - allocate the next free VMID * amdgpu_vm_grab_id - allocate the next free VMID
* *
...@@ -174,20 +192,69 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, ...@@ -174,20 +192,69 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
*/ */
int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
struct amdgpu_sync *sync, struct fence *fence, struct amdgpu_sync *sync, struct fence *fence,
unsigned *vm_id, uint64_t *vm_pd_addr) struct amdgpu_job *job)
{ {
uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
struct amdgpu_device *adev = ring->adev; struct amdgpu_device *adev = ring->adev;
struct fence *updates = sync->last_vm_update; struct fence *updates = sync->last_vm_update;
struct amdgpu_vm_id *id; struct amdgpu_vm_id *id, *idle;
unsigned i = ring->idx; struct fence **fences;
int r; unsigned i;
int r = 0;
fences = kmalloc_array(sizeof(void *), adev->vm_manager.num_ids,
GFP_KERNEL);
if (!fences)
return -ENOMEM;
mutex_lock(&adev->vm_manager.lock); mutex_lock(&adev->vm_manager.lock);
/* Check if we have an idle VMID */
i = 0;
list_for_each_entry(idle, &adev->vm_manager.ids_lru, list) {
fences[i] = amdgpu_sync_peek_fence(&idle->active, ring);
if (!fences[i])
break;
++i;
}
/* If we can't find a idle VMID to use, wait till one becomes available */
if (&idle->list == &adev->vm_manager.ids_lru) {
u64 fence_context = adev->vm_manager.fence_context + ring->idx;
unsigned seqno = ++adev->vm_manager.seqno[ring->idx];
struct fence_array *array;
unsigned j;
for (j = 0; j < i; ++j)
fence_get(fences[j]);
array = fence_array_create(i, fences, fence_context,
seqno, true);
if (!array) {
for (j = 0; j < i; ++j)
fence_put(fences[j]);
kfree(fences);
r = -ENOMEM;
goto error;
}
r = amdgpu_sync_fence(ring->adev, sync, &array->base);
fence_put(&array->base);
if (r)
goto error;
mutex_unlock(&adev->vm_manager.lock);
return 0;
}
kfree(fences);
job->vm_needs_flush = true;
/* Check if we can use a VMID already assigned to this VM */ /* Check if we can use a VMID already assigned to this VM */
i = ring->idx;
do { do {
struct fence *flushed; struct fence *flushed;
bool same_ring = ring->idx == i;
id = vm->ids[i++]; id = vm->ids[i++];
if (i == AMDGPU_MAX_RINGS) if (i == AMDGPU_MAX_RINGS)
...@@ -196,67 +263,49 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, ...@@ -196,67 +263,49 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
/* Check all the prerequisites to using this VMID */ /* Check all the prerequisites to using this VMID */
if (!id) if (!id)
continue; continue;
if (amdgpu_vm_is_gpu_reset(adev, id))
continue;
if (atomic64_read(&id->owner) != vm->client_id) if (atomic64_read(&id->owner) != vm->client_id)
continue; continue;
if (pd_addr != id->pd_gpu_addr) if (job->vm_pd_addr != id->pd_gpu_addr)
continue; continue;
if (id->last_user != ring && if (!same_ring &&
(!id->last_flush || !fence_is_signaled(id->last_flush))) (!id->last_flush || !fence_is_signaled(id->last_flush)))
continue; continue;
flushed = id->flushed_updates; flushed = id->flushed_updates;
if (updates && (!flushed || fence_is_later(updates, flushed))) if (updates &&
(!flushed || fence_is_later(updates, flushed)))
continue; continue;
/* Good we can use this VMID */ /* Good we can use this VMID. Remember this submission as
if (id->last_user == ring) { * user of the VMID.
r = amdgpu_sync_fence(ring->adev, sync, */
id->first);
if (r)
goto error;
}
/* And remember this submission as user of the VMID */
r = amdgpu_sync_fence(ring->adev, &id->active, fence); r = amdgpu_sync_fence(ring->adev, &id->active, fence);
if (r) if (r)
goto error; goto error;
id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
list_move_tail(&id->list, &adev->vm_manager.ids_lru); list_move_tail(&id->list, &adev->vm_manager.ids_lru);
vm->ids[ring->idx] = id; vm->ids[ring->idx] = id;
*vm_id = id - adev->vm_manager.ids; job->vm_id = id - adev->vm_manager.ids;
*vm_pd_addr = AMDGPU_VM_NO_FLUSH; job->vm_needs_flush = false;
trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr); trace_amdgpu_vm_grab_id(vm, ring->idx, job->vm_id, job->vm_pd_addr);
mutex_unlock(&adev->vm_manager.lock); mutex_unlock(&adev->vm_manager.lock);
return 0; return 0;
} while (i != ring->idx); } while (i != ring->idx);
id = list_first_entry(&adev->vm_manager.ids_lru, /* Still no ID to use? Then use the idle one found earlier */
struct amdgpu_vm_id, id = idle;
list);
if (!amdgpu_sync_is_idle(&id->active)) {
struct list_head *head = &adev->vm_manager.ids_lru;
struct amdgpu_vm_id *tmp;
list_for_each_entry_safe(id, tmp, &adev->vm_manager.ids_lru,
list) {
if (amdgpu_sync_is_idle(&id->active)) {
list_move(&id->list, head);
head = &id->list;
}
}
id = list_first_entry(&adev->vm_manager.ids_lru,
struct amdgpu_vm_id,
list);
}
r = amdgpu_sync_cycle_fences(sync, &id->active, fence); /* Remember this submission as user of the VMID */
r = amdgpu_sync_fence(ring->adev, &id->active, fence);
if (r) if (r)
goto error; goto error;
...@@ -269,22 +318,46 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, ...@@ -269,22 +318,46 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
fence_put(id->flushed_updates); fence_put(id->flushed_updates);
id->flushed_updates = fence_get(updates); id->flushed_updates = fence_get(updates);
id->pd_gpu_addr = pd_addr; id->pd_gpu_addr = job->vm_pd_addr;
id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
list_move_tail(&id->list, &adev->vm_manager.ids_lru); list_move_tail(&id->list, &adev->vm_manager.ids_lru);
id->last_user = ring;
atomic64_set(&id->owner, vm->client_id); atomic64_set(&id->owner, vm->client_id);
vm->ids[ring->idx] = id; vm->ids[ring->idx] = id;
*vm_id = id - adev->vm_manager.ids; job->vm_id = id - adev->vm_manager.ids;
*vm_pd_addr = pd_addr; trace_amdgpu_vm_grab_id(vm, ring->idx, job->vm_id, job->vm_pd_addr);
trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr);
error: error:
mutex_unlock(&adev->vm_manager.lock); mutex_unlock(&adev->vm_manager.lock);
return r; return r;
} }
static bool amdgpu_vm_ring_has_compute_vm_bug(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
const struct amdgpu_ip_block_version *ip_block;
if (ring->type != AMDGPU_RING_TYPE_COMPUTE)
/* only compute rings */
return false;
ip_block = amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
if (!ip_block)
return false;
if (ip_block->major <= 7) {
/* gfx7 has no workaround */
return true;
} else if (ip_block->major == 8) {
if (adev->gfx.mec_fw_version >= 673)
/* gfx8 is fixed in MEC firmware 673 */
return false;
else
return true;
}
return false;
}
/** /**
* amdgpu_vm_flush - hardware flush the vm * amdgpu_vm_flush - hardware flush the vm
* *
...@@ -294,59 +367,52 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, ...@@ -294,59 +367,52 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
* *
* Emit a VM flush when it is necessary. * Emit a VM flush when it is necessary.
*/ */
int amdgpu_vm_flush(struct amdgpu_ring *ring, int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
unsigned vm_id, uint64_t pd_addr,
uint32_t gds_base, uint32_t gds_size,
uint32_t gws_base, uint32_t gws_size,
uint32_t oa_base, uint32_t oa_size)
{ {
struct amdgpu_device *adev = ring->adev; struct amdgpu_device *adev = ring->adev;
struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id]; struct amdgpu_vm_id *id = &adev->vm_manager.ids[job->vm_id];
bool gds_switch_needed = ring->funcs->emit_gds_switch && ( bool gds_switch_needed = ring->funcs->emit_gds_switch && (
id->gds_base != gds_base || id->gds_base != job->gds_base ||
id->gds_size != gds_size || id->gds_size != job->gds_size ||
id->gws_base != gws_base || id->gws_base != job->gws_base ||
id->gws_size != gws_size || id->gws_size != job->gws_size ||
id->oa_base != oa_base || id->oa_base != job->oa_base ||
id->oa_size != oa_size); id->oa_size != job->oa_size);
int r; int r;
if (ring->funcs->emit_pipeline_sync && ( if (ring->funcs->emit_pipeline_sync && (
pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed || job->vm_needs_flush || gds_switch_needed ||
ring->type == AMDGPU_RING_TYPE_COMPUTE)) amdgpu_vm_ring_has_compute_vm_bug(ring)))
amdgpu_ring_emit_pipeline_sync(ring); amdgpu_ring_emit_pipeline_sync(ring);
if (ring->funcs->emit_vm_flush && if (ring->funcs->emit_vm_flush && (job->vm_needs_flush ||
pd_addr != AMDGPU_VM_NO_FLUSH) { amdgpu_vm_is_gpu_reset(adev, id))) {
struct fence *fence; struct fence *fence;
trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id); trace_amdgpu_vm_flush(job->vm_pd_addr, ring->idx, job->vm_id);
amdgpu_ring_emit_vm_flush(ring, vm_id, pd_addr); amdgpu_ring_emit_vm_flush(ring, job->vm_id, job->vm_pd_addr);
mutex_lock(&adev->vm_manager.lock);
if ((id->pd_gpu_addr == pd_addr) && (id->last_user == ring)) {
r = amdgpu_fence_emit(ring, &fence); r = amdgpu_fence_emit(ring, &fence);
if (r) { if (r)
mutex_unlock(&adev->vm_manager.lock);
return r; return r;
}
mutex_lock(&adev->vm_manager.lock);
fence_put(id->last_flush); fence_put(id->last_flush);
id->last_flush = fence; id->last_flush = fence;
}
mutex_unlock(&adev->vm_manager.lock); mutex_unlock(&adev->vm_manager.lock);
} }
if (gds_switch_needed) { if (gds_switch_needed) {
id->gds_base = gds_base; id->gds_base = job->gds_base;
id->gds_size = gds_size; id->gds_size = job->gds_size;
id->gws_base = gws_base; id->gws_base = job->gws_base;
id->gws_size = gws_size; id->gws_size = job->gws_size;
id->oa_base = oa_base; id->oa_base = job->oa_base;
id->oa_size = oa_size; id->oa_size = job->oa_size;
amdgpu_ring_emit_gds_switch(ring, vm_id, amdgpu_ring_emit_gds_switch(ring, job->vm_id,
gds_base, gds_size, job->gds_base, job->gds_size,
gws_base, gws_size, job->gws_base, job->gws_size,
oa_base, oa_size); job->oa_base, job->oa_size);
} }
return 0; return 0;
...@@ -723,7 +789,7 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev, ...@@ -723,7 +789,7 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
* @vm: requested vm * @vm: requested vm
* @start: start of GPU address range * @start: start of GPU address range
* @end: end of GPU address range * @end: end of GPU address range
* @dst: destination address to map to * @dst: destination address to map to, the next dst inside the function
* @flags: mapping flags * @flags: mapping flags
* *
* Update the page tables in the range @start - @end. * Update the page tables in the range @start - @end.
...@@ -737,49 +803,75 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev, ...@@ -737,49 +803,75 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
{ {
const uint64_t mask = AMDGPU_VM_PTE_COUNT - 1; const uint64_t mask = AMDGPU_VM_PTE_COUNT - 1;
uint64_t last_pe_start = ~0, last_pe_end = ~0, last_dst = ~0; uint64_t cur_pe_start, cur_pe_end, cur_dst;
uint64_t addr; uint64_t addr; /* next GPU address to be updated */
uint64_t pt_idx;
struct amdgpu_bo *pt;
unsigned nptes; /* next number of ptes to be updated */
uint64_t next_pe_start;
/* walk over the address space and update the page tables */ /* initialize the variables */
for (addr = start; addr < end; ) { addr = start;
uint64_t pt_idx = addr >> amdgpu_vm_block_size; pt_idx = addr >> amdgpu_vm_block_size;
struct amdgpu_bo *pt = vm->page_tables[pt_idx].entry.robj; pt = vm->page_tables[pt_idx].entry.robj;
unsigned nptes;
uint64_t pe_start;
if ((addr & ~mask) == (end & ~mask)) if ((addr & ~mask) == (end & ~mask))
nptes = end - addr; nptes = end - addr;
else else
nptes = AMDGPU_VM_PTE_COUNT - (addr & mask); nptes = AMDGPU_VM_PTE_COUNT - (addr & mask);
pe_start = amdgpu_bo_gpu_offset(pt); cur_pe_start = amdgpu_bo_gpu_offset(pt);
pe_start += (addr & mask) * 8; cur_pe_start += (addr & mask) * 8;
cur_pe_end = cur_pe_start + 8 * nptes;
cur_dst = dst;
if (last_pe_end != pe_start) { /* for next ptb*/
addr += nptes;
dst += nptes * AMDGPU_GPU_PAGE_SIZE;
amdgpu_vm_frag_ptes(adev, vm_update_params, /* walk over the address space and update the page tables */
last_pe_start, last_pe_end, while (addr < end) {
last_dst, flags); pt_idx = addr >> amdgpu_vm_block_size;
pt = vm->page_tables[pt_idx].entry.robj;
if ((addr & ~mask) == (end & ~mask))
nptes = end - addr;
else
nptes = AMDGPU_VM_PTE_COUNT - (addr & mask);
next_pe_start = amdgpu_bo_gpu_offset(pt);
next_pe_start += (addr & mask) * 8;
last_pe_start = pe_start; if (cur_pe_end == next_pe_start) {
last_pe_end = pe_start + 8 * nptes; /* The next ptb is consecutive to current ptb.
last_dst = dst; * Don't call amdgpu_vm_frag_ptes now.
* Will update two ptbs together in future.
*/
cur_pe_end += 8 * nptes;
} else { } else {
last_pe_end += 8 * nptes; amdgpu_vm_frag_ptes(adev, vm_update_params,
cur_pe_start, cur_pe_end,
cur_dst, flags);
cur_pe_start = next_pe_start;
cur_pe_end = next_pe_start + 8 * nptes;
cur_dst = dst;
} }
/* for next ptb*/
addr += nptes; addr += nptes;
dst += nptes * AMDGPU_GPU_PAGE_SIZE; dst += nptes * AMDGPU_GPU_PAGE_SIZE;
} }
amdgpu_vm_frag_ptes(adev, vm_update_params, last_pe_start, amdgpu_vm_frag_ptes(adev, vm_update_params, cur_pe_start,
last_pe_end, last_dst, flags); cur_pe_end, cur_dst, flags);
} }
/** /**
* amdgpu_vm_bo_update_mapping - update a mapping in the vm page table * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
* *
* @adev: amdgpu_device pointer * @adev: amdgpu_device pointer
* @exclusive: fence we need to sync to
* @src: address where to copy page table entries from * @src: address where to copy page table entries from
* @pages_addr: DMA addresses to use for mapping * @pages_addr: DMA addresses to use for mapping
* @vm: requested vm * @vm: requested vm
...@@ -793,6 +885,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev, ...@@ -793,6 +885,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
* Returns 0 for success, -EINVAL for failure. * Returns 0 for success, -EINVAL for failure.
*/ */
static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
struct fence *exclusive,
uint64_t src, uint64_t src,
dma_addr_t *pages_addr, dma_addr_t *pages_addr,
struct amdgpu_vm *vm, struct amdgpu_vm *vm,
...@@ -853,6 +946,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, ...@@ -853,6 +946,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
vm_update_params.ib = &job->ibs[0]; vm_update_params.ib = &job->ibs[0];
r = amdgpu_sync_fence(adev, &job->sync, exclusive);
if (r)
goto error_free;
r = amdgpu_sync_resv(adev, &job->sync, vm->page_directory->tbo.resv, r = amdgpu_sync_resv(adev, &job->sync, vm->page_directory->tbo.resv,
owner); owner);
if (r) if (r)
...@@ -889,6 +986,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, ...@@ -889,6 +986,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
* amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks
* *
* @adev: amdgpu_device pointer * @adev: amdgpu_device pointer
* @exclusive: fence we need to sync to
* @gtt_flags: flags as they are used for GTT * @gtt_flags: flags as they are used for GTT
* @pages_addr: DMA addresses to use for mapping * @pages_addr: DMA addresses to use for mapping
* @vm: requested vm * @vm: requested vm
...@@ -902,6 +1000,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, ...@@ -902,6 +1000,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
* Returns 0 for success, -EINVAL for failure. * Returns 0 for success, -EINVAL for failure.
*/ */
static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
struct fence *exclusive,
uint32_t gtt_flags, uint32_t gtt_flags,
dma_addr_t *pages_addr, dma_addr_t *pages_addr,
struct amdgpu_vm *vm, struct amdgpu_vm *vm,
...@@ -932,7 +1031,8 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, ...@@ -932,7 +1031,8 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
addr += mapping->offset; addr += mapping->offset;
if (!pages_addr || src) if (!pages_addr || src)
return amdgpu_vm_bo_update_mapping(adev, src, pages_addr, vm, return amdgpu_vm_bo_update_mapping(adev, exclusive,
src, pages_addr, vm,
start, mapping->it.last, start, mapping->it.last,
flags, addr, fence); flags, addr, fence);
...@@ -940,7 +1040,8 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, ...@@ -940,7 +1040,8 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
uint64_t last; uint64_t last;
last = min((uint64_t)mapping->it.last, start + max_size - 1); last = min((uint64_t)mapping->it.last, start + max_size - 1);
r = amdgpu_vm_bo_update_mapping(adev, src, pages_addr, vm, r = amdgpu_vm_bo_update_mapping(adev, exclusive,
src, pages_addr, vm,
start, last, flags, addr, start, last, flags, addr,
fence); fence);
if (r) if (r)
...@@ -973,6 +1074,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, ...@@ -973,6 +1074,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping *mapping; struct amdgpu_bo_va_mapping *mapping;
dma_addr_t *pages_addr = NULL; dma_addr_t *pages_addr = NULL;
uint32_t gtt_flags, flags; uint32_t gtt_flags, flags;
struct fence *exclusive;
uint64_t addr; uint64_t addr;
int r; int r;
...@@ -994,8 +1096,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, ...@@ -994,8 +1096,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
default: default:
break; break;
} }
exclusive = reservation_object_get_excl(bo_va->bo->tbo.resv);
} else { } else {
addr = 0; addr = 0;
exclusive = NULL;
} }
flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem); flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem);
...@@ -1007,7 +1112,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, ...@@ -1007,7 +1112,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
spin_unlock(&vm->status_lock); spin_unlock(&vm->status_lock);
list_for_each_entry(mapping, &bo_va->invalids, list) { list_for_each_entry(mapping, &bo_va->invalids, list) {
r = amdgpu_vm_bo_split_mapping(adev, gtt_flags, pages_addr, vm, r = amdgpu_vm_bo_split_mapping(adev, exclusive,
gtt_flags, pages_addr, vm,
mapping, flags, addr, mapping, flags, addr,
&bo_va->last_pt_update); &bo_va->last_pt_update);
if (r) if (r)
...@@ -1054,7 +1160,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, ...@@ -1054,7 +1160,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping, list); struct amdgpu_bo_va_mapping, list);
list_del(&mapping->list); list_del(&mapping->list);
r = amdgpu_vm_bo_split_mapping(adev, 0, NULL, vm, mapping, r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, NULL, vm, mapping,
0, 0, NULL); 0, 0, NULL);
kfree(mapping); kfree(mapping);
if (r) if (r)
...@@ -1445,6 +1551,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) ...@@ -1445,6 +1551,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
amdgpu_bo_unreserve(vm->page_directory); amdgpu_bo_unreserve(vm->page_directory);
if (r) if (r)
goto error_free_page_directory; goto error_free_page_directory;
vm->last_eviction_counter = atomic64_read(&adev->num_evictions);
return 0; return 0;
...@@ -1516,6 +1623,10 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) ...@@ -1516,6 +1623,10 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
&adev->vm_manager.ids_lru); &adev->vm_manager.ids_lru);
} }
adev->vm_manager.fence_context = fence_context_alloc(AMDGPU_MAX_RINGS);
for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
adev->vm_manager.seqno[i] = 0;
atomic_set(&adev->vm_manager.vm_pte_next_ring, 0); atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
atomic64_set(&adev->vm_manager.client_counter, 0); atomic64_set(&adev->vm_manager.client_counter, 0);
} }
......
...@@ -50,7 +50,9 @@ ...@@ -50,7 +50,9 @@
#include "gmc/gmc_7_1_sh_mask.h" #include "gmc/gmc_7_1_sh_mask.h"
MODULE_FIRMWARE("radeon/bonaire_smc.bin"); MODULE_FIRMWARE("radeon/bonaire_smc.bin");
MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
MODULE_FIRMWARE("radeon/hawaii_smc.bin"); MODULE_FIRMWARE("radeon/hawaii_smc.bin");
MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
#define MC_CG_ARB_FREQ_F0 0x0a #define MC_CG_ARB_FREQ_F0 0x0a
#define MC_CG_ARB_FREQ_F1 0x0b #define MC_CG_ARB_FREQ_F1 0x0b
...@@ -736,19 +738,19 @@ static int ci_enable_didt(struct amdgpu_device *adev, bool enable) ...@@ -736,19 +738,19 @@ static int ci_enable_didt(struct amdgpu_device *adev, bool enable)
if (pi->caps_sq_ramping || pi->caps_db_ramping || if (pi->caps_sq_ramping || pi->caps_db_ramping ||
pi->caps_td_ramping || pi->caps_tcp_ramping) { pi->caps_td_ramping || pi->caps_tcp_ramping) {
gfx_v7_0_enter_rlc_safe_mode(adev); adev->gfx.rlc.funcs->enter_safe_mode(adev);
if (enable) { if (enable) {
ret = ci_program_pt_config_registers(adev, didt_config_ci); ret = ci_program_pt_config_registers(adev, didt_config_ci);
if (ret) { if (ret) {
gfx_v7_0_exit_rlc_safe_mode(adev); adev->gfx.rlc.funcs->exit_safe_mode(adev);
return ret; return ret;
} }
} }
ci_do_enable_didt(adev, enable); ci_do_enable_didt(adev, enable);
gfx_v7_0_exit_rlc_safe_mode(adev); adev->gfx.rlc.funcs->exit_safe_mode(adev);
} }
return 0; return 0;
...@@ -3636,6 +3638,10 @@ static int ci_setup_default_dpm_tables(struct amdgpu_device *adev) ...@@ -3636,6 +3638,10 @@ static int ci_setup_default_dpm_tables(struct amdgpu_device *adev)
ci_setup_default_pcie_tables(adev); ci_setup_default_pcie_tables(adev);
/* save a copy of the default DPM table */
memcpy(&(pi->golden_dpm_table), &(pi->dpm_table),
sizeof(struct ci_dpm_table));
return 0; return 0;
} }
...@@ -5754,9 +5760,17 @@ static int ci_dpm_init_microcode(struct amdgpu_device *adev) ...@@ -5754,9 +5760,17 @@ static int ci_dpm_init_microcode(struct amdgpu_device *adev)
switch (adev->asic_type) { switch (adev->asic_type) {
case CHIP_BONAIRE: case CHIP_BONAIRE:
if ((adev->pdev->revision == 0x80) ||
(adev->pdev->revision == 0x81) ||
(adev->pdev->device == 0x665f))
chip_name = "bonaire_k";
else
chip_name = "bonaire"; chip_name = "bonaire";
break; break;
case CHIP_HAWAII: case CHIP_HAWAII:
if (adev->pdev->revision == 0x80)
chip_name = "hawaii_k";
else
chip_name = "hawaii"; chip_name = "hawaii";
break; break;
case CHIP_KAVERI: case CHIP_KAVERI:
...@@ -6404,6 +6418,186 @@ static int ci_dpm_set_powergating_state(void *handle, ...@@ -6404,6 +6418,186 @@ static int ci_dpm_set_powergating_state(void *handle,
return 0; return 0;
} }
static int ci_dpm_print_clock_levels(struct amdgpu_device *adev,
enum pp_clock_type type, char *buf)
{
struct ci_power_info *pi = ci_get_pi(adev);
struct ci_single_dpm_table *sclk_table = &pi->dpm_table.sclk_table;
struct ci_single_dpm_table *mclk_table = &pi->dpm_table.mclk_table;
struct ci_single_dpm_table *pcie_table = &pi->dpm_table.pcie_speed_table;
int i, now, size = 0;
uint32_t clock, pcie_speed;
switch (type) {
case PP_SCLK:
amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_API_GetSclkFrequency);
clock = RREG32(mmSMC_MSG_ARG_0);
for (i = 0; i < sclk_table->count; i++) {
if (clock > sclk_table->dpm_levels[i].value)
continue;
break;
}
now = i;
for (i = 0; i < sclk_table->count; i++)
size += sprintf(buf + size, "%d: %uMhz %s\n",
i, sclk_table->dpm_levels[i].value / 100,
(i == now) ? "*" : "");
break;
case PP_MCLK:
amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_API_GetMclkFrequency);
clock = RREG32(mmSMC_MSG_ARG_0);
for (i = 0; i < mclk_table->count; i++) {
if (clock > mclk_table->dpm_levels[i].value)
continue;
break;
}
now = i;
for (i = 0; i < mclk_table->count; i++)
size += sprintf(buf + size, "%d: %uMhz %s\n",
i, mclk_table->dpm_levels[i].value / 100,
(i == now) ? "*" : "");
break;
case PP_PCIE:
pcie_speed = ci_get_current_pcie_speed(adev);
for (i = 0; i < pcie_table->count; i++) {
if (pcie_speed != pcie_table->dpm_levels[i].value)
continue;
break;
}
now = i;
for (i = 0; i < pcie_table->count; i++)
size += sprintf(buf + size, "%d: %s %s\n", i,
(pcie_table->dpm_levels[i].value == 0) ? "2.5GB, x1" :
(pcie_table->dpm_levels[i].value == 1) ? "5.0GB, x16" :
(pcie_table->dpm_levels[i].value == 2) ? "8.0GB, x16" : "",
(i == now) ? "*" : "");
break;
default:
break;
}
return size;
}
static int ci_dpm_force_clock_level(struct amdgpu_device *adev,
enum pp_clock_type type, uint32_t mask)
{
struct ci_power_info *pi = ci_get_pi(adev);
if (adev->pm.dpm.forced_level
!= AMDGPU_DPM_FORCED_LEVEL_MANUAL)
return -EINVAL;
switch (type) {
case PP_SCLK:
if (!pi->sclk_dpm_key_disabled)
amdgpu_ci_send_msg_to_smc_with_parameter(adev,
PPSMC_MSG_SCLKDPM_SetEnabledMask,
pi->dpm_level_enable_mask.sclk_dpm_enable_mask & mask);
break;
case PP_MCLK:
if (!pi->mclk_dpm_key_disabled)
amdgpu_ci_send_msg_to_smc_with_parameter(adev,
PPSMC_MSG_MCLKDPM_SetEnabledMask,
pi->dpm_level_enable_mask.mclk_dpm_enable_mask & mask);
break;
case PP_PCIE:
{
uint32_t tmp = mask & pi->dpm_level_enable_mask.pcie_dpm_enable_mask;
uint32_t level = 0;
while (tmp >>= 1)
level++;
if (!pi->pcie_dpm_key_disabled)
amdgpu_ci_send_msg_to_smc_with_parameter(adev,
PPSMC_MSG_PCIeDPM_ForceLevel,
level);
break;
}
default:
break;
}
return 0;
}
static int ci_dpm_get_sclk_od(struct amdgpu_device *adev)
{
struct ci_power_info *pi = ci_get_pi(adev);
struct ci_single_dpm_table *sclk_table = &(pi->dpm_table.sclk_table);
struct ci_single_dpm_table *golden_sclk_table =
&(pi->golden_dpm_table.sclk_table);
int value;
value = (sclk_table->dpm_levels[sclk_table->count - 1].value -
golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value) *
100 /
golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value;
return value;
}
static int ci_dpm_set_sclk_od(struct amdgpu_device *adev, uint32_t value)
{
struct ci_power_info *pi = ci_get_pi(adev);
struct ci_ps *ps = ci_get_ps(adev->pm.dpm.requested_ps);
struct ci_single_dpm_table *golden_sclk_table =
&(pi->golden_dpm_table.sclk_table);
if (value > 20)
value = 20;
ps->performance_levels[ps->performance_level_count - 1].sclk =
golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value *
value / 100 +
golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value;
return 0;
}
static int ci_dpm_get_mclk_od(struct amdgpu_device *adev)
{
struct ci_power_info *pi = ci_get_pi(adev);
struct ci_single_dpm_table *mclk_table = &(pi->dpm_table.mclk_table);
struct ci_single_dpm_table *golden_mclk_table =
&(pi->golden_dpm_table.mclk_table);
int value;
value = (mclk_table->dpm_levels[mclk_table->count - 1].value -
golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value) *
100 /
golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value;
return value;
}
static int ci_dpm_set_mclk_od(struct amdgpu_device *adev, uint32_t value)
{
struct ci_power_info *pi = ci_get_pi(adev);
struct ci_ps *ps = ci_get_ps(adev->pm.dpm.requested_ps);
struct ci_single_dpm_table *golden_mclk_table =
&(pi->golden_dpm_table.mclk_table);
if (value > 20)
value = 20;
ps->performance_levels[ps->performance_level_count - 1].mclk =
golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value *
value / 100 +
golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value;
return 0;
}
const struct amd_ip_funcs ci_dpm_ip_funcs = { const struct amd_ip_funcs ci_dpm_ip_funcs = {
.name = "ci_dpm", .name = "ci_dpm",
.early_init = ci_dpm_early_init, .early_init = ci_dpm_early_init,
...@@ -6438,6 +6632,12 @@ static const struct amdgpu_dpm_funcs ci_dpm_funcs = { ...@@ -6438,6 +6632,12 @@ static const struct amdgpu_dpm_funcs ci_dpm_funcs = {
.get_fan_control_mode = &ci_dpm_get_fan_control_mode, .get_fan_control_mode = &ci_dpm_get_fan_control_mode,
.set_fan_speed_percent = &ci_dpm_set_fan_speed_percent, .set_fan_speed_percent = &ci_dpm_set_fan_speed_percent,
.get_fan_speed_percent = &ci_dpm_get_fan_speed_percent, .get_fan_speed_percent = &ci_dpm_get_fan_speed_percent,
.print_clock_levels = ci_dpm_print_clock_levels,
.force_clock_level = ci_dpm_force_clock_level,
.get_sclk_od = ci_dpm_get_sclk_od,
.set_sclk_od = ci_dpm_set_sclk_od,
.get_mclk_od = ci_dpm_get_mclk_od,
.set_mclk_od = ci_dpm_set_mclk_od,
}; };
static void ci_dpm_set_dpm_funcs(struct amdgpu_device *adev) static void ci_dpm_set_dpm_funcs(struct amdgpu_device *adev)
......
...@@ -193,6 +193,7 @@ struct ci_pt_defaults { ...@@ -193,6 +193,7 @@ struct ci_pt_defaults {
struct ci_power_info { struct ci_power_info {
struct ci_dpm_table dpm_table; struct ci_dpm_table dpm_table;
struct ci_dpm_table golden_dpm_table;
u32 voltage_control; u32 voltage_control;
u32 mvdd_control; u32 mvdd_control;
u32 vddci_control; u32 vddci_control;
......
...@@ -1035,12 +1035,12 @@ static uint32_t cik_read_indexed_register(struct amdgpu_device *adev, ...@@ -1035,12 +1035,12 @@ static uint32_t cik_read_indexed_register(struct amdgpu_device *adev,
mutex_lock(&adev->grbm_idx_mutex); mutex_lock(&adev->grbm_idx_mutex);
if (se_num != 0xffffffff || sh_num != 0xffffffff) if (se_num != 0xffffffff || sh_num != 0xffffffff)
gfx_v7_0_select_se_sh(adev, se_num, sh_num); amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
val = RREG32(reg_offset); val = RREG32(reg_offset);
if (se_num != 0xffffffff || sh_num != 0xffffffff) if (se_num != 0xffffffff || sh_num != 0xffffffff)
gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
mutex_unlock(&adev->grbm_idx_mutex); mutex_unlock(&adev->grbm_idx_mutex);
return val; return val;
} }
...@@ -1158,10 +1158,11 @@ static void kv_restore_regs_for_reset(struct amdgpu_device *adev, ...@@ -1158,10 +1158,11 @@ static void kv_restore_regs_for_reset(struct amdgpu_device *adev,
WREG32(mmGMCON_RENG_EXECUTE, save->gmcon_reng_execute); WREG32(mmGMCON_RENG_EXECUTE, save->gmcon_reng_execute);
} }
static void cik_gpu_pci_config_reset(struct amdgpu_device *adev) static int cik_gpu_pci_config_reset(struct amdgpu_device *adev)
{ {
struct kv_reset_save_regs kv_save = { 0 }; struct kv_reset_save_regs kv_save = { 0 };
u32 i; u32 i;
int r = -EINVAL;
dev_info(adev->dev, "GPU pci config reset\n"); dev_info(adev->dev, "GPU pci config reset\n");
...@@ -1177,14 +1178,20 @@ static void cik_gpu_pci_config_reset(struct amdgpu_device *adev) ...@@ -1177,14 +1178,20 @@ static void cik_gpu_pci_config_reset(struct amdgpu_device *adev)
/* wait for asic to come out of reset */ /* wait for asic to come out of reset */
for (i = 0; i < adev->usec_timeout; i++) { for (i = 0; i < adev->usec_timeout; i++) {
if (RREG32(mmCONFIG_MEMSIZE) != 0xffffffff) if (RREG32(mmCONFIG_MEMSIZE) != 0xffffffff) {
/* enable BM */
pci_set_master(adev->pdev);
r = 0;
break; break;
}
udelay(1); udelay(1);
} }
/* does asic init need to be run first??? */ /* does asic init need to be run first??? */
if (adev->flags & AMD_IS_APU) if (adev->flags & AMD_IS_APU)
kv_restore_regs_for_reset(adev, &kv_save); kv_restore_regs_for_reset(adev, &kv_save);
return r;
} }
static void cik_set_bios_scratch_engine_hung(struct amdgpu_device *adev, bool hung) static void cik_set_bios_scratch_engine_hung(struct amdgpu_device *adev, bool hung)
...@@ -1210,13 +1217,14 @@ static void cik_set_bios_scratch_engine_hung(struct amdgpu_device *adev, bool hu ...@@ -1210,13 +1217,14 @@ static void cik_set_bios_scratch_engine_hung(struct amdgpu_device *adev, bool hu
*/ */
static int cik_asic_reset(struct amdgpu_device *adev) static int cik_asic_reset(struct amdgpu_device *adev)
{ {
int r;
cik_set_bios_scratch_engine_hung(adev, true); cik_set_bios_scratch_engine_hung(adev, true);
cik_gpu_pci_config_reset(adev); r = cik_gpu_pci_config_reset(adev);
cik_set_bios_scratch_engine_hung(adev, false); cik_set_bios_scratch_engine_hung(adev, false);
return 0; return r;
} }
static int cik_set_uvd_clock(struct amdgpu_device *adev, u32 clock, static int cik_set_uvd_clock(struct amdgpu_device *adev, u32 clock,
...@@ -2014,9 +2022,6 @@ static const struct amdgpu_asic_funcs cik_asic_funcs = ...@@ -2014,9 +2022,6 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
.set_uvd_clocks = &cik_set_uvd_clocks, .set_uvd_clocks = &cik_set_uvd_clocks,
.set_vce_clocks = &cik_set_vce_clocks, .set_vce_clocks = &cik_set_vce_clocks,
.get_virtual_caps = &cik_get_virtual_caps, .get_virtual_caps = &cik_get_virtual_caps,
/* these should be moved to their own ip modules */
.get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter,
.wait_for_mc_idle = &gmc_v7_0_mc_wait_for_idle,
}; };
static int cik_common_early_init(void *handle) static int cik_common_early_init(void *handle)
......
...@@ -224,17 +224,6 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring, ...@@ -224,17 +224,6 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
unsigned vm_id, bool ctx_switch) unsigned vm_id, bool ctx_switch)
{ {
u32 extra_bits = vm_id & 0xf; u32 extra_bits = vm_id & 0xf;
u32 next_rptr = ring->wptr + 5;
while ((next_rptr & 7) != 4)
next_rptr++;
next_rptr += 4;
amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
amdgpu_ring_write(ring, 1); /* number of DWs to follow */
amdgpu_ring_write(ring, next_rptr);
/* IB packet must end on a 8 DW boundary */ /* IB packet must end on a 8 DW boundary */
cik_sdma_ring_insert_nop(ring, (12 - (ring->wptr & 7)) % 8); cik_sdma_ring_insert_nop(ring, (12 - (ring->wptr & 7)) % 8);
......
...@@ -2219,6 +2219,7 @@ static void cz_dpm_powergate_vce(struct amdgpu_device *adev, bool gate) ...@@ -2219,6 +2219,7 @@ static void cz_dpm_powergate_vce(struct amdgpu_device *adev, bool gate)
} }
} }
} else { /*pi->caps_vce_pg*/ } else { /*pi->caps_vce_pg*/
pi->vce_power_gated = gate;
cz_update_vce_dpm(adev); cz_update_vce_dpm(adev);
cz_enable_vce_dpm(adev, !gate); cz_enable_vce_dpm(adev, !gate);
} }
......
...@@ -307,11 +307,10 @@ static void dce_v11_0_page_flip(struct amdgpu_device *adev, ...@@ -307,11 +307,10 @@ static void dce_v11_0_page_flip(struct amdgpu_device *adev,
struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
u32 tmp; u32 tmp;
/* flip at hsync for async, default is vsync */ /* flip immediate for async, default is vsync */
/* use UPDATE_IMMEDIATE_EN instead for async? */
tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset); tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset);
tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL, tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL,
GRPH_SURFACE_UPDATE_H_RETRACE_EN, async ? 1 : 0); GRPH_SURFACE_UPDATE_IMMEDIATE_EN, async ? 1 : 0);
WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp); WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
/* update the scanout addresses */ /* update the scanout addresses */
WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
......
...@@ -526,36 +526,16 @@ static void dce_v8_0_stop_mc_access(struct amdgpu_device *adev, ...@@ -526,36 +526,16 @@ static void dce_v8_0_stop_mc_access(struct amdgpu_device *adev,
crtc_enabled = REG_GET_FIELD(RREG32(mmCRTC_CONTROL + crtc_offsets[i]), crtc_enabled = REG_GET_FIELD(RREG32(mmCRTC_CONTROL + crtc_offsets[i]),
CRTC_CONTROL, CRTC_MASTER_EN); CRTC_CONTROL, CRTC_MASTER_EN);
if (crtc_enabled) { if (crtc_enabled) {
#if 0 #if 1
u32 frame_count;
int j;
save->crtc_enabled[i] = true; save->crtc_enabled[i] = true;
tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]); tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]);
if (REG_GET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN) == 0) { if (REG_GET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN) == 0) {
amdgpu_display_vblank_wait(adev, i); /*it is correct only for RGB ; black is 0*/
WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1); WREG32(mmCRTC_BLANK_DATA_COLOR + crtc_offsets[i], 0);
tmp = REG_SET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN, 1); tmp = REG_SET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN, 1);
WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp); WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp);
WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0);
}
/* wait for the next frame */
frame_count = amdgpu_display_vblank_get_counter(adev, i);
for (j = 0; j < adev->usec_timeout; j++) {
if (amdgpu_display_vblank_get_counter(adev, i) != frame_count)
break;
udelay(1);
}
tmp = RREG32(mmGRPH_UPDATE + crtc_offsets[i]);
if (REG_GET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK) == 0) {
tmp = REG_SET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK, 1);
WREG32(mmGRPH_UPDATE + crtc_offsets[i], tmp);
}
tmp = RREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i]);
if (REG_GET_FIELD(tmp, MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK) == 0) {
tmp = REG_SET_FIELD(tmp, MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK, 1);
WREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i], tmp);
} }
mdelay(20);
#else #else
/* XXX this is a hack to avoid strange behavior with EFI on certain systems */ /* XXX this is a hack to avoid strange behavior with EFI on certain systems */
WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1); WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1);
...@@ -575,55 +555,22 @@ static void dce_v8_0_stop_mc_access(struct amdgpu_device *adev, ...@@ -575,55 +555,22 @@ static void dce_v8_0_stop_mc_access(struct amdgpu_device *adev,
static void dce_v8_0_resume_mc_access(struct amdgpu_device *adev, static void dce_v8_0_resume_mc_access(struct amdgpu_device *adev,
struct amdgpu_mode_mc_save *save) struct amdgpu_mode_mc_save *save)
{ {
u32 tmp, frame_count; u32 tmp;
int i, j; int i;
/* update crtc base addresses */ /* update crtc base addresses */
for (i = 0; i < adev->mode_info.num_crtc; i++) { for (i = 0; i < adev->mode_info.num_crtc; i++) {
WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + crtc_offsets[i], WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + crtc_offsets[i],
upper_32_bits(adev->mc.vram_start)); upper_32_bits(adev->mc.vram_start));
WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + crtc_offsets[i],
upper_32_bits(adev->mc.vram_start));
WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + crtc_offsets[i], WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + crtc_offsets[i],
(u32)adev->mc.vram_start); (u32)adev->mc.vram_start);
WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + crtc_offsets[i],
(u32)adev->mc.vram_start);
if (save->crtc_enabled[i]) { if (save->crtc_enabled[i]) {
tmp = RREG32(mmMASTER_UPDATE_MODE + crtc_offsets[i]);
if (REG_GET_FIELD(tmp, MASTER_UPDATE_MODE, MASTER_UPDATE_MODE) != 3) {
tmp = REG_SET_FIELD(tmp, MASTER_UPDATE_MODE, MASTER_UPDATE_MODE, 3);
WREG32(mmMASTER_UPDATE_MODE + crtc_offsets[i], tmp);
}
tmp = RREG32(mmGRPH_UPDATE + crtc_offsets[i]);
if (REG_GET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK)) {
tmp = REG_SET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK, 0);
WREG32(mmGRPH_UPDATE + crtc_offsets[i], tmp);
}
tmp = RREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i]);
if (REG_GET_FIELD(tmp, MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK)) {
tmp = REG_SET_FIELD(tmp, MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK, 0);
WREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i], tmp);
}
for (j = 0; j < adev->usec_timeout; j++) {
tmp = RREG32(mmGRPH_UPDATE + crtc_offsets[i]);
if (REG_GET_FIELD(tmp, GRPH_UPDATE, GRPH_SURFACE_UPDATE_PENDING) == 0)
break;
udelay(1);
}
tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]); tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]);
tmp = REG_SET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN, 0); tmp = REG_SET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN, 0);
WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1);
WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp); WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp);
WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0);
/* wait for the next frame */
frame_count = amdgpu_display_vblank_get_counter(adev, i);
for (j = 0; j < adev->usec_timeout; j++) {
if (amdgpu_display_vblank_get_counter(adev, i) != frame_count)
break;
udelay(1);
}
} }
mdelay(20);
} }
WREG32(mmVGA_MEMORY_BASE_ADDRESS_HIGH, upper_32_bits(adev->mc.vram_start)); WREG32(mmVGA_MEMORY_BASE_ADDRESS_HIGH, upper_32_bits(adev->mc.vram_start));
......
...@@ -1583,9 +1583,15 @@ static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev) ...@@ -1583,9 +1583,15 @@ static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev)
* registers are instanced per SE or SH. 0xffffffff means * registers are instanced per SE or SH. 0xffffffff means
* broadcast to all SEs or SHs (CIK). * broadcast to all SEs or SHs (CIK).
*/ */
void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num) static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev,
u32 se_num, u32 sh_num, u32 instance)
{ {
u32 data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK; u32 data;
if (instance == 0xffffffff)
data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
else
data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
...@@ -1659,13 +1665,13 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) ...@@ -1659,13 +1665,13 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex); mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
gfx_v7_0_select_se_sh(adev, i, j); gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
data = gfx_v7_0_get_rb_active_bitmap(adev); data = gfx_v7_0_get_rb_active_bitmap(adev);
active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
rb_bitmap_width_per_sh); rb_bitmap_width_per_sh);
} }
} }
gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
mutex_unlock(&adev->grbm_idx_mutex); mutex_unlock(&adev->grbm_idx_mutex);
adev->gfx.config.backend_enable_mask = active_rbs; adev->gfx.config.backend_enable_mask = active_rbs;
...@@ -1746,7 +1752,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) ...@@ -1746,7 +1752,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
* making sure that the following register writes will be broadcasted * making sure that the following register writes will be broadcasted
* to all the shaders * to all the shaders
*/ */
gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
/* XXX SH_MEM regs */ /* XXX SH_MEM regs */
/* where to put LDS, scratch, GPUVM in FSA64 space */ /* where to put LDS, scratch, GPUVM in FSA64 space */
...@@ -2050,17 +2056,6 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, ...@@ -2050,17 +2056,6 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
unsigned vm_id, bool ctx_switch) unsigned vm_id, bool ctx_switch)
{ {
u32 header, control = 0; u32 header, control = 0;
u32 next_rptr = ring->wptr + 5;
if (ctx_switch)
next_rptr += 2;
next_rptr += 4;
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
amdgpu_ring_write(ring, next_rptr);
/* insert SWITCH_BUFFER packet before first IB in the ring frame */ /* insert SWITCH_BUFFER packet before first IB in the ring frame */
if (ctx_switch) { if (ctx_switch) {
...@@ -2089,22 +2084,9 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring, ...@@ -2089,22 +2084,9 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
struct amdgpu_ib *ib, struct amdgpu_ib *ib,
unsigned vm_id, bool ctx_switch) unsigned vm_id, bool ctx_switch)
{ {
u32 header, control = 0; u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
u32 next_rptr = ring->wptr + 5;
control |= INDIRECT_BUFFER_VALID; amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
next_rptr += 4;
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
amdgpu_ring_write(ring, next_rptr);
header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
control |= ib->length_dw | (vm_id << 24);
amdgpu_ring_write(ring, header);
amdgpu_ring_write(ring, amdgpu_ring_write(ring,
#ifdef __BIG_ENDIAN #ifdef __BIG_ENDIAN
(2 << 0) | (2 << 0) |
...@@ -3221,7 +3203,8 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev) ...@@ -3221,7 +3203,8 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
} }
} }
adev->gfx.rlc.cs_data = ci_cs_data; adev->gfx.rlc.cs_data = ci_cs_data;
adev->gfx.rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4; adev->gfx.rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
adev->gfx.rlc.cp_table_size += 64 * 1024; /* GDS */
src_ptr = adev->gfx.rlc.reg_list; src_ptr = adev->gfx.rlc.reg_list;
dws = adev->gfx.rlc.reg_list_size; dws = adev->gfx.rlc.reg_list_size;
...@@ -3379,7 +3362,7 @@ static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev) ...@@ -3379,7 +3362,7 @@ static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex); mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
gfx_v7_0_select_se_sh(adev, i, j); gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
for (k = 0; k < adev->usec_timeout; k++) { for (k = 0; k < adev->usec_timeout; k++) {
if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
break; break;
...@@ -3387,7 +3370,7 @@ static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev) ...@@ -3387,7 +3370,7 @@ static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
} }
} }
} }
gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
mutex_unlock(&adev->grbm_idx_mutex); mutex_unlock(&adev->grbm_idx_mutex);
mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
...@@ -3434,7 +3417,7 @@ static u32 gfx_v7_0_halt_rlc(struct amdgpu_device *adev) ...@@ -3434,7 +3417,7 @@ static u32 gfx_v7_0_halt_rlc(struct amdgpu_device *adev)
return orig; return orig;
} }
void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev) static void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
{ {
u32 tmp, i, mask; u32 tmp, i, mask;
...@@ -3456,7 +3439,7 @@ void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev) ...@@ -3456,7 +3439,7 @@ void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
} }
} }
void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev) static void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
{ {
u32 tmp; u32 tmp;
...@@ -3471,7 +3454,7 @@ void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev) ...@@ -3471,7 +3454,7 @@ void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
* *
* Halt the RLC ME (MicroEngine) (CIK). * Halt the RLC ME (MicroEngine) (CIK).
*/ */
void gfx_v7_0_rlc_stop(struct amdgpu_device *adev) static void gfx_v7_0_rlc_stop(struct amdgpu_device *adev)
{ {
WREG32(mmRLC_CNTL, 0); WREG32(mmRLC_CNTL, 0);
...@@ -3547,7 +3530,7 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev) ...@@ -3547,7 +3530,7 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev)
WREG32(mmRLC_LB_CNTR_MAX, 0x00008000); WREG32(mmRLC_LB_CNTR_MAX, 0x00008000);
mutex_lock(&adev->grbm_idx_mutex); mutex_lock(&adev->grbm_idx_mutex);
gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
WREG32(mmRLC_LB_INIT_CU_MASK, 0xffffffff); WREG32(mmRLC_LB_INIT_CU_MASK, 0xffffffff);
WREG32(mmRLC_LB_PARAMS, 0x00600408); WREG32(mmRLC_LB_PARAMS, 0x00600408);
WREG32(mmRLC_LB_CNTL, 0x80000004); WREG32(mmRLC_LB_CNTL, 0x80000004);
...@@ -3587,7 +3570,7 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable) ...@@ -3587,7 +3570,7 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
tmp = gfx_v7_0_halt_rlc(adev); tmp = gfx_v7_0_halt_rlc(adev);
mutex_lock(&adev->grbm_idx_mutex); mutex_lock(&adev->grbm_idx_mutex);
gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
tmp2 = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | tmp2 = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
...@@ -3638,7 +3621,7 @@ static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable) ...@@ -3638,7 +3621,7 @@ static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
tmp = gfx_v7_0_halt_rlc(adev); tmp = gfx_v7_0_halt_rlc(adev);
mutex_lock(&adev->grbm_idx_mutex); mutex_lock(&adev->grbm_idx_mutex);
gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
...@@ -3689,7 +3672,7 @@ static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable) ...@@ -3689,7 +3672,7 @@ static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
tmp = gfx_v7_0_halt_rlc(adev); tmp = gfx_v7_0_halt_rlc(adev);
mutex_lock(&adev->grbm_idx_mutex); mutex_lock(&adev->grbm_idx_mutex);
gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_1_MASK; data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_1_MASK;
...@@ -3867,6 +3850,20 @@ static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev, ...@@ -3867,6 +3850,20 @@ static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev,
} }
} }
static void gfx_v7_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
u32 bitmap)
{
u32 data;
if (!bitmap)
return;
data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
}
static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev) static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev)
{ {
u32 data, mask; u32 data, mask;
...@@ -4123,7 +4120,7 @@ static void gfx_v7_0_fini_pg(struct amdgpu_device *adev) ...@@ -4123,7 +4120,7 @@ static void gfx_v7_0_fini_pg(struct amdgpu_device *adev)
* Fetches a GPU clock counter snapshot (SI). * Fetches a GPU clock counter snapshot (SI).
* Returns the 64 bit clock counter snapshot. * Returns the 64 bit clock counter snapshot.
*/ */
uint64_t gfx_v7_0_get_gpu_clock_counter(struct amdgpu_device *adev) static uint64_t gfx_v7_0_get_gpu_clock_counter(struct amdgpu_device *adev)
{ {
uint64_t clock; uint64_t clock;
...@@ -4183,12 +4180,24 @@ static void gfx_v7_0_ring_emit_gds_switch(struct amdgpu_ring *ring, ...@@ -4183,12 +4180,24 @@ static void gfx_v7_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
} }
static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = {
.get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter,
.select_se_sh = &gfx_v7_0_select_se_sh,
};
static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = {
.enter_safe_mode = gfx_v7_0_enter_rlc_safe_mode,
.exit_safe_mode = gfx_v7_0_exit_rlc_safe_mode
};
static int gfx_v7_0_early_init(void *handle) static int gfx_v7_0_early_init(void *handle)
{ {
struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_device *adev = (struct amdgpu_device *)handle;
adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS; adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS;
adev->gfx.num_compute_rings = GFX7_NUM_COMPUTE_RINGS; adev->gfx.num_compute_rings = GFX7_NUM_COMPUTE_RINGS;
adev->gfx.funcs = &gfx_v7_0_gfx_funcs;
adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs;
gfx_v7_0_set_ring_funcs(adev); gfx_v7_0_set_ring_funcs(adev);
gfx_v7_0_set_irq_funcs(adev); gfx_v7_0_set_irq_funcs(adev);
gfx_v7_0_set_gds_init(adev); gfx_v7_0_set_gds_init(adev);
...@@ -5032,16 +5041,22 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) ...@@ -5032,16 +5041,22 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
int i, j, k, counter, active_cu_number = 0; int i, j, k, counter, active_cu_number = 0;
u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
unsigned disable_masks[4 * 2];
memset(cu_info, 0, sizeof(*cu_info)); memset(cu_info, 0, sizeof(*cu_info));
amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
mutex_lock(&adev->grbm_idx_mutex); mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
mask = 1; mask = 1;
ao_bitmap = 0; ao_bitmap = 0;
counter = 0; counter = 0;
gfx_v7_0_select_se_sh(adev, i, j); gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
if (i < 4 && j < 2)
gfx_v7_0_set_user_cu_inactive_bitmap(
adev, disable_masks[i * 2 + j]);
bitmap = gfx_v7_0_get_cu_active_bitmap(adev); bitmap = gfx_v7_0_get_cu_active_bitmap(adev);
cu_info->bitmap[i][j] = bitmap; cu_info->bitmap[i][j] = bitmap;
...@@ -5057,7 +5072,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) ...@@ -5057,7 +5072,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
} }
} }
gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
mutex_unlock(&adev->grbm_idx_mutex); mutex_unlock(&adev->grbm_idx_mutex);
cu_info->number = active_cu_number; cu_info->number = active_cu_number;
......
...@@ -26,11 +26,4 @@ ...@@ -26,11 +26,4 @@
extern const struct amd_ip_funcs gfx_v7_0_ip_funcs; extern const struct amd_ip_funcs gfx_v7_0_ip_funcs;
/* XXX these shouldn't be exported */
void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev);
void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev);
void gfx_v7_0_rlc_stop(struct amdgpu_device *adev);
uint64_t gfx_v7_0_get_gpu_clock_counter(struct amdgpu_device *adev);
void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num);
#endif #endif
...@@ -1150,6 +1150,71 @@ static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, ...@@ -1150,6 +1150,71 @@ static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
buffer[count++] = cpu_to_le32(0); buffer[count++] = cpu_to_le32(0);
} }
static void cz_init_cp_jump_table(struct amdgpu_device *adev)
{
const __le32 *fw_data;
volatile u32 *dst_ptr;
int me, i, max_me = 4;
u32 bo_offset = 0;
u32 table_offset, table_size;
if (adev->asic_type == CHIP_CARRIZO)
max_me = 5;
/* write the cp table buffer */
dst_ptr = adev->gfx.rlc.cp_table_ptr;
for (me = 0; me < max_me; me++) {
if (me == 0) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
fw_data = (const __le32 *)
(adev->gfx.ce_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else if (me == 1) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
fw_data = (const __le32 *)
(adev->gfx.pfp_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else if (me == 2) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
fw_data = (const __le32 *)
(adev->gfx.me_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else if (me == 3) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
fw_data = (const __le32 *)
(adev->gfx.mec_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else if (me == 4) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
fw_data = (const __le32 *)
(adev->gfx.mec2_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
}
for (i = 0; i < table_size; i ++) {
dst_ptr[bo_offset + i] =
cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
}
bo_offset += table_size;
}
}
static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev) static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
{ {
int r; int r;
...@@ -1165,6 +1230,18 @@ static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev) ...@@ -1165,6 +1230,18 @@ static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
adev->gfx.rlc.clear_state_obj = NULL; adev->gfx.rlc.clear_state_obj = NULL;
} }
/* jump table block */
if (adev->gfx.rlc.cp_table_obj) {
r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
if (unlikely(r != 0))
dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
adev->gfx.rlc.cp_table_obj = NULL;
}
} }
static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
...@@ -1221,6 +1298,46 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) ...@@ -1221,6 +1298,46 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
} }
if ((adev->asic_type == CHIP_CARRIZO) ||
(adev->asic_type == CHIP_STONEY)) {
adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
if (adev->gfx.rlc.cp_table_obj == NULL) {
r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
NULL, NULL,
&adev->gfx.rlc.cp_table_obj);
if (r) {
dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
return r;
}
}
r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
if (unlikely(r != 0)) {
dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
return r;
}
r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.rlc.cp_table_gpu_addr);
if (r) {
amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
dev_warn(adev->dev, "(%d) pin RLC cp_table bo failed\n", r);
return r;
}
r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
if (r) {
dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
return r;
}
cz_init_cp_jump_table(adev);
amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
}
return 0; return 0;
} }
...@@ -3329,9 +3446,15 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) ...@@ -3329,9 +3446,15 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
} }
} }
void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num) static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
u32 se_num, u32 sh_num, u32 instance)
{ {
u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); u32 data;
if (instance == 0xffffffff)
data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
else
data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) { if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
...@@ -3381,13 +3504,13 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) ...@@ -3381,13 +3504,13 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex); mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
gfx_v8_0_select_se_sh(adev, i, j); gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
data = gfx_v8_0_get_rb_active_bitmap(adev); data = gfx_v8_0_get_rb_active_bitmap(adev);
active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
rb_bitmap_width_per_sh); rb_bitmap_width_per_sh);
} }
} }
gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
mutex_unlock(&adev->grbm_idx_mutex); mutex_unlock(&adev->grbm_idx_mutex);
adev->gfx.config.backend_enable_mask = active_rbs; adev->gfx.config.backend_enable_mask = active_rbs;
...@@ -3491,7 +3614,7 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) ...@@ -3491,7 +3614,7 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
* making sure that the following register writes will be broadcasted * making sure that the following register writes will be broadcasted
* to all the shaders * to all the shaders
*/ */
gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
WREG32(mmPA_SC_FIFO_SIZE, WREG32(mmPA_SC_FIFO_SIZE,
(adev->gfx.config.sc_prim_fifo_size_frontend << (adev->gfx.config.sc_prim_fifo_size_frontend <<
...@@ -3514,7 +3637,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) ...@@ -3514,7 +3637,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex); mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
gfx_v8_0_select_se_sh(adev, i, j); gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
for (k = 0; k < adev->usec_timeout; k++) { for (k = 0; k < adev->usec_timeout; k++) {
if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
break; break;
...@@ -3522,7 +3645,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) ...@@ -3522,7 +3645,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
} }
} }
} }
gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
mutex_unlock(&adev->grbm_idx_mutex); mutex_unlock(&adev->grbm_idx_mutex);
mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
...@@ -3683,7 +3806,7 @@ static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev) ...@@ -3683,7 +3806,7 @@ static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
WREG32(mmRLC_SRM_CNTL, data); WREG32(mmRLC_SRM_CNTL, data);
} }
static void polaris11_init_power_gating(struct amdgpu_device *adev) static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
{ {
uint32_t data; uint32_t data;
...@@ -3714,6 +3837,53 @@ static void polaris11_init_power_gating(struct amdgpu_device *adev) ...@@ -3714,6 +3837,53 @@ static void polaris11_init_power_gating(struct amdgpu_device *adev)
} }
} }
static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
bool enable)
{
u32 data, orig;
orig = data = RREG32(mmRLC_PG_CNTL);
if (enable)
data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
else
data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
if (orig != data)
WREG32(mmRLC_PG_CNTL, data);
}
static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
bool enable)
{
u32 data, orig;
orig = data = RREG32(mmRLC_PG_CNTL);
if (enable)
data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
else
data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
if (orig != data)
WREG32(mmRLC_PG_CNTL, data);
}
static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
{
u32 data, orig;
orig = data = RREG32(mmRLC_PG_CNTL);
if (enable)
data &= ~RLC_PG_CNTL__CP_PG_DISABLE_MASK;
else
data |= RLC_PG_CNTL__CP_PG_DISABLE_MASK;
if (orig != data)
WREG32(mmRLC_PG_CNTL, data);
}
static void gfx_v8_0_init_pg(struct amdgpu_device *adev) static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
{ {
if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
...@@ -3726,8 +3896,25 @@ static void gfx_v8_0_init_pg(struct amdgpu_device *adev) ...@@ -3726,8 +3896,25 @@ static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
gfx_v8_0_init_save_restore_list(adev); gfx_v8_0_init_save_restore_list(adev);
gfx_v8_0_enable_save_restore_machine(adev); gfx_v8_0_enable_save_restore_machine(adev);
if (adev->asic_type == CHIP_POLARIS11) if ((adev->asic_type == CHIP_CARRIZO) ||
polaris11_init_power_gating(adev); (adev->asic_type == CHIP_STONEY)) {
WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
gfx_v8_0_init_power_gating(adev);
WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
cz_enable_sck_slow_down_on_power_up(adev, true);
cz_enable_sck_slow_down_on_power_down(adev, true);
} else {
cz_enable_sck_slow_down_on_power_up(adev, false);
cz_enable_sck_slow_down_on_power_down(adev, false);
}
if (adev->pg_flags & AMD_PG_SUPPORT_CP)
cz_enable_cp_power_gating(adev, true);
else
cz_enable_cp_power_gating(adev, false);
} else if (adev->asic_type == CHIP_POLARIS11) {
gfx_v8_0_init_power_gating(adev);
}
} }
} }
...@@ -4966,7 +5153,7 @@ static int gfx_v8_0_soft_reset(void *handle) ...@@ -4966,7 +5153,7 @@ static int gfx_v8_0_soft_reset(void *handle)
* Fetches a GPU clock counter snapshot. * Fetches a GPU clock counter snapshot.
* Returns the 64 bit clock counter snapshot. * Returns the 64 bit clock counter snapshot.
*/ */
uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev) static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
{ {
uint64_t clock; uint64_t clock;
...@@ -5026,12 +5213,18 @@ static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring, ...@@ -5026,12 +5213,18 @@ static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
} }
static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
.select_se_sh = &gfx_v8_0_select_se_sh,
};
static int gfx_v8_0_early_init(void *handle) static int gfx_v8_0_early_init(void *handle)
{ {
struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_device *adev = (struct amdgpu_device *)handle;
adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS; adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
gfx_v8_0_set_ring_funcs(adev); gfx_v8_0_set_ring_funcs(adev);
gfx_v8_0_set_irq_funcs(adev); gfx_v8_0_set_irq_funcs(adev);
gfx_v8_0_set_gds_init(adev); gfx_v8_0_set_gds_init(adev);
...@@ -5064,51 +5257,43 @@ static int gfx_v8_0_late_init(void *handle) ...@@ -5064,51 +5257,43 @@ static int gfx_v8_0_late_init(void *handle)
return 0; return 0;
} }
static void polaris11_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
bool enable) bool enable)
{ {
uint32_t data, temp; uint32_t data, temp;
if (adev->asic_type == CHIP_POLARIS11)
/* Send msg to SMU via Powerplay */ /* Send msg to SMU via Powerplay */
amdgpu_set_powergating_state(adev, amdgpu_set_powergating_state(adev,
AMD_IP_BLOCK_TYPE_SMC, AMD_IP_BLOCK_TYPE_SMC,
enable ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE); enable ?
AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
if (enable) {
/* Enable static MGPG */
temp = data = RREG32(mmRLC_PG_CNTL); temp = data = RREG32(mmRLC_PG_CNTL);
/* Enable static MGPG */
if (enable)
data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK; data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
else
if (temp != data)
WREG32(mmRLC_PG_CNTL, data);
} else {
temp = data = RREG32(mmRLC_PG_CNTL);
data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK; data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
if (temp != data) if (temp != data)
WREG32(mmRLC_PG_CNTL, data); WREG32(mmRLC_PG_CNTL, data);
}
} }
static void polaris11_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
bool enable) bool enable)
{ {
uint32_t data, temp; uint32_t data, temp;
if (enable) {
/* Enable dynamic MGPG */
temp = data = RREG32(mmRLC_PG_CNTL); temp = data = RREG32(mmRLC_PG_CNTL);
/* Enable dynamic MGPG */
if (enable)
data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK; data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
else
if (temp != data)
WREG32(mmRLC_PG_CNTL, data);
} else {
temp = data = RREG32(mmRLC_PG_CNTL);
data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK; data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
if (temp != data) if (temp != data)
WREG32(mmRLC_PG_CNTL, data); WREG32(mmRLC_PG_CNTL, data);
}
} }
static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev, static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
...@@ -5116,19 +5301,63 @@ static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *ade ...@@ -5116,19 +5301,63 @@ static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *ade
{ {
uint32_t data, temp; uint32_t data, temp;
if (enable) {
/* Enable quick PG */
temp = data = RREG32(mmRLC_PG_CNTL); temp = data = RREG32(mmRLC_PG_CNTL);
data |= 0x100000; /* Enable quick PG */
if (enable)
data |= RLC_PG_CNTL__QUICK_PG_ENABLE_MASK;
else
data &= ~RLC_PG_CNTL__QUICK_PG_ENABLE_MASK;
if (temp != data) if (temp != data)
WREG32(mmRLC_PG_CNTL, data); WREG32(mmRLC_PG_CNTL, data);
} else { }
temp = data = RREG32(mmRLC_PG_CNTL);
data &= ~0x100000;
if (temp != data) static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
bool enable)
{
u32 data, orig;
orig = data = RREG32(mmRLC_PG_CNTL);
if (enable)
data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
else
data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
if (orig != data)
WREG32(mmRLC_PG_CNTL, data); WREG32(mmRLC_PG_CNTL, data);
}
static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
bool enable)
{
u32 data, orig;
orig = data = RREG32(mmRLC_PG_CNTL);
if (enable)
data |= RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
else
data &= ~RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
if (orig != data)
WREG32(mmRLC_PG_CNTL, data);
/* Read any GFX register to wake up GFX. */
if (!enable)
data = RREG32(mmDB_RENDER_CONTROL);
}
static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
bool enable)
{
if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
cz_enable_gfx_cg_power_gating(adev, true);
if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
cz_enable_gfx_pipeline_power_gating(adev, true);
} else {
cz_enable_gfx_cg_power_gating(adev, false);
cz_enable_gfx_pipeline_power_gating(adev, false);
} }
} }
...@@ -5136,21 +5365,42 @@ static int gfx_v8_0_set_powergating_state(void *handle, ...@@ -5136,21 +5365,42 @@ static int gfx_v8_0_set_powergating_state(void *handle,
enum amd_powergating_state state) enum amd_powergating_state state)
{ {
struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_device *adev = (struct amdgpu_device *)handle;
bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
return 0; return 0;
switch (adev->asic_type) { switch (adev->asic_type) {
case CHIP_CARRIZO:
case CHIP_STONEY:
if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
cz_update_gfx_cg_power_gating(adev, enable);
if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
else
gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
else
gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
break;
case CHIP_POLARIS11: case CHIP_POLARIS11:
if (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
polaris11_enable_gfx_static_mg_power_gating(adev, gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
state == AMD_PG_STATE_GATE ? true : false);
else if (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG)
polaris11_enable_gfx_dynamic_mg_power_gating(adev,
state == AMD_PG_STATE_GATE ? true : false);
else else
polaris11_enable_gfx_quick_mg_power_gating(adev, gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
state == AMD_PG_STATE_GATE ? true : false);
if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
else
gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
polaris11_enable_gfx_quick_mg_power_gating(adev, true);
else
polaris11_enable_gfx_quick_mg_power_gating(adev, false);
break; break;
default: default:
break; break;
...@@ -5164,7 +5414,7 @@ static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, ...@@ -5164,7 +5414,7 @@ static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
{ {
uint32_t data; uint32_t data;
gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
...@@ -5552,6 +5802,8 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev ...@@ -5552,6 +5802,8 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
WREG32(mmRLC_CGCG_CGLS_CTRL, data); WREG32(mmRLC_CGCG_CGLS_CTRL, data);
} }
gfx_v8_0_wait_for_rlc_serdes(adev);
adev->gfx.rlc.funcs->exit_safe_mode(adev); adev->gfx.rlc.funcs->exit_safe_mode(adev);
} }
static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
...@@ -5677,17 +5929,6 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, ...@@ -5677,17 +5929,6 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
unsigned vm_id, bool ctx_switch) unsigned vm_id, bool ctx_switch)
{ {
u32 header, control = 0; u32 header, control = 0;
u32 next_rptr = ring->wptr + 5;
if (ctx_switch)
next_rptr += 2;
next_rptr += 4;
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
amdgpu_ring_write(ring, next_rptr);
/* insert SWITCH_BUFFER packet before first IB in the ring frame */ /* insert SWITCH_BUFFER packet before first IB in the ring frame */
if (ctx_switch) { if (ctx_switch) {
...@@ -5716,23 +5957,9 @@ static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, ...@@ -5716,23 +5957,9 @@ static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
struct amdgpu_ib *ib, struct amdgpu_ib *ib,
unsigned vm_id, bool ctx_switch) unsigned vm_id, bool ctx_switch)
{ {
u32 header, control = 0; u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
u32 next_rptr = ring->wptr + 5;
control |= INDIRECT_BUFFER_VALID;
next_rptr += 4; amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
amdgpu_ring_write(ring, next_rptr);
header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
control |= ib->length_dw | (vm_id << 24);
amdgpu_ring_write(ring, header);
amdgpu_ring_write(ring, amdgpu_ring_write(ring,
#ifdef __BIG_ENDIAN #ifdef __BIG_ENDIAN
(2 << 0) | (2 << 0) |
...@@ -6185,9 +6412,9 @@ static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) ...@@ -6185,9 +6412,9 @@ static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
{ {
switch (adev->asic_type) { switch (adev->asic_type) {
case CHIP_TOPAZ: case CHIP_TOPAZ:
case CHIP_STONEY:
adev->gfx.rlc.funcs = &iceland_rlc_funcs; adev->gfx.rlc.funcs = &iceland_rlc_funcs;
break; break;
case CHIP_STONEY:
case CHIP_CARRIZO: case CHIP_CARRIZO:
adev->gfx.rlc.funcs = &cz_rlc_funcs; adev->gfx.rlc.funcs = &cz_rlc_funcs;
break; break;
...@@ -6225,6 +6452,20 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) ...@@ -6225,6 +6452,20 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
} }
} }
static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
u32 bitmap)
{
u32 data;
if (!bitmap)
return;
data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
}
static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev) static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
{ {
u32 data, mask; u32 data, mask;
...@@ -6245,16 +6486,22 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) ...@@ -6245,16 +6486,22 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
int i, j, k, counter, active_cu_number = 0; int i, j, k, counter, active_cu_number = 0;
u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
unsigned disable_masks[4 * 2];
memset(cu_info, 0, sizeof(*cu_info)); memset(cu_info, 0, sizeof(*cu_info));
amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
mutex_lock(&adev->grbm_idx_mutex); mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
mask = 1; mask = 1;
ao_bitmap = 0; ao_bitmap = 0;
counter = 0; counter = 0;
gfx_v8_0_select_se_sh(adev, i, j); gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
if (i < 4 && j < 2)
gfx_v8_0_set_user_cu_inactive_bitmap(
adev, disable_masks[i * 2 + j]);
bitmap = gfx_v8_0_get_cu_active_bitmap(adev); bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
cu_info->bitmap[i][j] = bitmap; cu_info->bitmap[i][j] = bitmap;
...@@ -6270,7 +6517,7 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) ...@@ -6270,7 +6517,7 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
} }
} }
gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
mutex_unlock(&adev->grbm_idx_mutex); mutex_unlock(&adev->grbm_idx_mutex);
cu_info->number = active_cu_number; cu_info->number = active_cu_number;
......
...@@ -26,7 +26,6 @@ ...@@ -26,7 +26,6 @@
extern const struct amd_ip_funcs gfx_v8_0_ip_funcs; extern const struct amd_ip_funcs gfx_v8_0_ip_funcs;
uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev);
void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num); void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num);
#endif #endif
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
static void gmc_v7_0_set_gart_funcs(struct amdgpu_device *adev); static void gmc_v7_0_set_gart_funcs(struct amdgpu_device *adev);
static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev); static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev);
static int gmc_v7_0_wait_for_idle(void *handle);
MODULE_FIRMWARE("radeon/bonaire_mc.bin"); MODULE_FIRMWARE("radeon/bonaire_mc.bin");
MODULE_FIRMWARE("radeon/hawaii_mc.bin"); MODULE_FIRMWARE("radeon/hawaii_mc.bin");
...@@ -73,31 +74,7 @@ static void gmc_v7_0_init_golden_registers(struct amdgpu_device *adev) ...@@ -73,31 +74,7 @@ static void gmc_v7_0_init_golden_registers(struct amdgpu_device *adev)
} }
} }
/** static void gmc_v7_0_mc_stop(struct amdgpu_device *adev,
* gmc7_mc_wait_for_idle - wait for MC idle callback.
*
* @adev: amdgpu_device pointer
*
* Wait for the MC (memory controller) to be idle.
* (evergreen+).
* Returns 0 if the MC is idle, -1 if not.
*/
int gmc_v7_0_mc_wait_for_idle(struct amdgpu_device *adev)
{
unsigned i;
u32 tmp;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
tmp = RREG32(mmSRBM_STATUS) & 0x1F00;
if (!tmp)
return 0;
udelay(1);
}
return -1;
}
void gmc_v7_0_mc_stop(struct amdgpu_device *adev,
struct amdgpu_mode_mc_save *save) struct amdgpu_mode_mc_save *save)
{ {
u32 blackout; u32 blackout;
...@@ -105,7 +82,7 @@ void gmc_v7_0_mc_stop(struct amdgpu_device *adev, ...@@ -105,7 +82,7 @@ void gmc_v7_0_mc_stop(struct amdgpu_device *adev,
if (adev->mode_info.num_crtc) if (adev->mode_info.num_crtc)
amdgpu_display_stop_mc_access(adev, save); amdgpu_display_stop_mc_access(adev, save);
amdgpu_asic_wait_for_mc_idle(adev); gmc_v7_0_wait_for_idle((void *)adev);
blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL); blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL);
if (REG_GET_FIELD(blackout, MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE) != 1) { if (REG_GET_FIELD(blackout, MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE) != 1) {
...@@ -120,7 +97,7 @@ void gmc_v7_0_mc_stop(struct amdgpu_device *adev, ...@@ -120,7 +97,7 @@ void gmc_v7_0_mc_stop(struct amdgpu_device *adev,
udelay(100); udelay(100);
} }
void gmc_v7_0_mc_resume(struct amdgpu_device *adev, static void gmc_v7_0_mc_resume(struct amdgpu_device *adev,
struct amdgpu_mode_mc_save *save) struct amdgpu_mode_mc_save *save)
{ {
u32 tmp; u32 tmp;
...@@ -311,7 +288,7 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev) ...@@ -311,7 +288,7 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
amdgpu_display_set_vga_render_state(adev, false); amdgpu_display_set_vga_render_state(adev, false);
gmc_v7_0_mc_stop(adev, &save); gmc_v7_0_mc_stop(adev, &save);
if (amdgpu_asic_wait_for_mc_idle(adev)) { if (gmc_v7_0_wait_for_idle((void *)adev)) {
dev_warn(adev->dev, "Wait for MC idle timedout !\n"); dev_warn(adev->dev, "Wait for MC idle timedout !\n");
} }
/* Update configuration */ /* Update configuration */
...@@ -331,7 +308,7 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev) ...@@ -331,7 +308,7 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
WREG32(mmMC_VM_AGP_BASE, 0); WREG32(mmMC_VM_AGP_BASE, 0);
WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF); WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF);
WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF); WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF);
if (amdgpu_asic_wait_for_mc_idle(adev)) { if (gmc_v7_0_wait_for_idle((void *)adev)) {
dev_warn(adev->dev, "Wait for MC idle timedout !\n"); dev_warn(adev->dev, "Wait for MC idle timedout !\n");
} }
gmc_v7_0_mc_resume(adev, &save); gmc_v7_0_mc_resume(adev, &save);
...@@ -1137,7 +1114,7 @@ static int gmc_v7_0_soft_reset(void *handle) ...@@ -1137,7 +1114,7 @@ static int gmc_v7_0_soft_reset(void *handle)
if (srbm_soft_reset) { if (srbm_soft_reset) {
gmc_v7_0_mc_stop(adev, &save); gmc_v7_0_mc_stop(adev, &save);
if (gmc_v7_0_wait_for_idle(adev)) { if (gmc_v7_0_wait_for_idle((void *)adev)) {
dev_warn(adev->dev, "Wait for GMC idle timed out !\n"); dev_warn(adev->dev, "Wait for GMC idle timed out !\n");
} }
......
...@@ -26,11 +26,4 @@ ...@@ -26,11 +26,4 @@
extern const struct amd_ip_funcs gmc_v7_0_ip_funcs; extern const struct amd_ip_funcs gmc_v7_0_ip_funcs;
/* XXX these shouldn't be exported */
void gmc_v7_0_mc_stop(struct amdgpu_device *adev,
struct amdgpu_mode_mc_save *save);
void gmc_v7_0_mc_resume(struct amdgpu_device *adev,
struct amdgpu_mode_mc_save *save);
int gmc_v7_0_mc_wait_for_idle(struct amdgpu_device *adev);
#endif #endif
...@@ -41,6 +41,7 @@ ...@@ -41,6 +41,7 @@
static void gmc_v8_0_set_gart_funcs(struct amdgpu_device *adev); static void gmc_v8_0_set_gart_funcs(struct amdgpu_device *adev);
static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev); static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev);
static int gmc_v8_0_wait_for_idle(void *handle);
MODULE_FIRMWARE("amdgpu/tonga_mc.bin"); MODULE_FIRMWARE("amdgpu/tonga_mc.bin");
MODULE_FIRMWARE("amdgpu/polaris11_mc.bin"); MODULE_FIRMWARE("amdgpu/polaris11_mc.bin");
...@@ -147,36 +148,7 @@ static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev) ...@@ -147,36 +148,7 @@ static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev)
} }
} }
/** static void gmc_v8_0_mc_stop(struct amdgpu_device *adev,
* gmc8_mc_wait_for_idle - wait for MC idle callback.
*
* @adev: amdgpu_device pointer
*
* Wait for the MC (memory controller) to be idle.
* (evergreen+).
* Returns 0 if the MC is idle, -1 if not.
*/
int gmc_v8_0_mc_wait_for_idle(struct amdgpu_device *adev)
{
unsigned i;
u32 tmp;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
tmp = RREG32(mmSRBM_STATUS) & (SRBM_STATUS__VMC_BUSY_MASK |
SRBM_STATUS__MCB_BUSY_MASK |
SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
SRBM_STATUS__MCC_BUSY_MASK |
SRBM_STATUS__MCD_BUSY_MASK |
SRBM_STATUS__VMC1_BUSY_MASK);
if (!tmp)
return 0;
udelay(1);
}
return -1;
}
void gmc_v8_0_mc_stop(struct amdgpu_device *adev,
struct amdgpu_mode_mc_save *save) struct amdgpu_mode_mc_save *save)
{ {
u32 blackout; u32 blackout;
...@@ -184,7 +156,7 @@ void gmc_v8_0_mc_stop(struct amdgpu_device *adev, ...@@ -184,7 +156,7 @@ void gmc_v8_0_mc_stop(struct amdgpu_device *adev,
if (adev->mode_info.num_crtc) if (adev->mode_info.num_crtc)
amdgpu_display_stop_mc_access(adev, save); amdgpu_display_stop_mc_access(adev, save);
amdgpu_asic_wait_for_mc_idle(adev); gmc_v8_0_wait_for_idle(adev);
blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL); blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL);
if (REG_GET_FIELD(blackout, MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE) != 1) { if (REG_GET_FIELD(blackout, MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE) != 1) {
...@@ -199,7 +171,7 @@ void gmc_v8_0_mc_stop(struct amdgpu_device *adev, ...@@ -199,7 +171,7 @@ void gmc_v8_0_mc_stop(struct amdgpu_device *adev,
udelay(100); udelay(100);
} }
void gmc_v8_0_mc_resume(struct amdgpu_device *adev, static void gmc_v8_0_mc_resume(struct amdgpu_device *adev,
struct amdgpu_mode_mc_save *save) struct amdgpu_mode_mc_save *save)
{ {
u32 tmp; u32 tmp;
...@@ -393,7 +365,7 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev) ...@@ -393,7 +365,7 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
amdgpu_display_set_vga_render_state(adev, false); amdgpu_display_set_vga_render_state(adev, false);
gmc_v8_0_mc_stop(adev, &save); gmc_v8_0_mc_stop(adev, &save);
if (amdgpu_asic_wait_for_mc_idle(adev)) { if (gmc_v8_0_wait_for_idle((void *)adev)) {
dev_warn(adev->dev, "Wait for MC idle timedout !\n"); dev_warn(adev->dev, "Wait for MC idle timedout !\n");
} }
/* Update configuration */ /* Update configuration */
...@@ -413,7 +385,7 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev) ...@@ -413,7 +385,7 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
WREG32(mmMC_VM_AGP_BASE, 0); WREG32(mmMC_VM_AGP_BASE, 0);
WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF); WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF);
WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF); WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF);
if (amdgpu_asic_wait_for_mc_idle(adev)) { if (gmc_v8_0_wait_for_idle((void *)adev)) {
dev_warn(adev->dev, "Wait for MC idle timedout !\n"); dev_warn(adev->dev, "Wait for MC idle timedout !\n");
} }
gmc_v8_0_mc_resume(adev, &save); gmc_v8_0_mc_resume(adev, &save);
...@@ -1140,7 +1112,7 @@ static int gmc_v8_0_soft_reset(void *handle) ...@@ -1140,7 +1112,7 @@ static int gmc_v8_0_soft_reset(void *handle)
if (srbm_soft_reset) { if (srbm_soft_reset) {
gmc_v8_0_mc_stop(adev, &save); gmc_v8_0_mc_stop(adev, &save);
if (gmc_v8_0_wait_for_idle(adev)) { if (gmc_v8_0_wait_for_idle((void *)adev)) {
dev_warn(adev->dev, "Wait for GMC idle timed out !\n"); dev_warn(adev->dev, "Wait for GMC idle timed out !\n");
} }
......
...@@ -26,11 +26,4 @@ ...@@ -26,11 +26,4 @@
extern const struct amd_ip_funcs gmc_v8_0_ip_funcs; extern const struct amd_ip_funcs gmc_v8_0_ip_funcs;
/* XXX these shouldn't be exported */
void gmc_v8_0_mc_stop(struct amdgpu_device *adev,
struct amdgpu_mode_mc_save *save);
void gmc_v8_0_mc_resume(struct amdgpu_device *adev,
struct amdgpu_mode_mc_save *save);
int gmc_v8_0_mc_wait_for_idle(struct amdgpu_device *adev);
#endif #endif
...@@ -507,19 +507,19 @@ static int kv_enable_didt(struct amdgpu_device *adev, bool enable) ...@@ -507,19 +507,19 @@ static int kv_enable_didt(struct amdgpu_device *adev, bool enable)
pi->caps_db_ramping || pi->caps_db_ramping ||
pi->caps_td_ramping || pi->caps_td_ramping ||
pi->caps_tcp_ramping) { pi->caps_tcp_ramping) {
gfx_v7_0_enter_rlc_safe_mode(adev); adev->gfx.rlc.funcs->enter_safe_mode(adev);
if (enable) { if (enable) {
ret = kv_program_pt_config_registers(adev, didt_config_kv); ret = kv_program_pt_config_registers(adev, didt_config_kv);
if (ret) { if (ret) {
gfx_v7_0_exit_rlc_safe_mode(adev); adev->gfx.rlc.funcs->exit_safe_mode(adev);
return ret; return ret;
} }
} }
kv_do_enable_didt(adev, enable); kv_do_enable_didt(adev, enable);
gfx_v7_0_exit_rlc_safe_mode(adev); adev->gfx.rlc.funcs->exit_safe_mode(adev);
} }
return 0; return 0;
......
...@@ -255,19 +255,6 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring, ...@@ -255,19 +255,6 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
unsigned vm_id, bool ctx_switch) unsigned vm_id, bool ctx_switch)
{ {
u32 vmid = vm_id & 0xf; u32 vmid = vm_id & 0xf;
u32 next_rptr = ring->wptr + 5;
while ((next_rptr & 7) != 2)
next_rptr++;
next_rptr += 6;
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
amdgpu_ring_write(ring, lower_32_bits(ring->next_rptr_gpu_addr) & 0xfffffffc);
amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1));
amdgpu_ring_write(ring, next_rptr);
/* IB packet must end on a 8 DW boundary */ /* IB packet must end on a 8 DW boundary */
sdma_v2_4_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8); sdma_v2_4_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8);
......
...@@ -415,18 +415,6 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring, ...@@ -415,18 +415,6 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
unsigned vm_id, bool ctx_switch) unsigned vm_id, bool ctx_switch)
{ {
u32 vmid = vm_id & 0xf; u32 vmid = vm_id & 0xf;
u32 next_rptr = ring->wptr + 5;
while ((next_rptr & 7) != 2)
next_rptr++;
next_rptr += 6;
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
amdgpu_ring_write(ring, lower_32_bits(ring->next_rptr_gpu_addr) & 0xfffffffc);
amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1));
amdgpu_ring_write(ring, next_rptr);
/* IB packet must end on a 8 DW boundary */ /* IB packet must end on a 8 DW boundary */
sdma_v3_0_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8); sdma_v3_0_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8);
......
...@@ -34,6 +34,8 @@ ...@@ -34,6 +34,8 @@
#include "oss/oss_2_0_d.h" #include "oss/oss_2_0_d.h"
#include "oss/oss_2_0_sh_mask.h" #include "oss/oss_2_0_sh_mask.h"
#include "bif/bif_4_1_d.h"
static void uvd_v4_2_mc_resume(struct amdgpu_device *adev); static void uvd_v4_2_mc_resume(struct amdgpu_device *adev);
static void uvd_v4_2_init_cg(struct amdgpu_device *adev); static void uvd_v4_2_init_cg(struct amdgpu_device *adev);
static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev); static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev);
...@@ -438,6 +440,32 @@ static void uvd_v4_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq ...@@ -438,6 +440,32 @@ static void uvd_v4_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
amdgpu_ring_write(ring, 2); amdgpu_ring_write(ring, 2);
} }
/**
* uvd_v4_2_ring_emit_hdp_flush - emit an hdp flush
*
* @ring: amdgpu_ring pointer
*
* Emits an hdp flush.
*/
static void uvd_v4_2_ring_emit_hdp_flush(struct amdgpu_ring *ring)
{
amdgpu_ring_write(ring, PACKET0(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0));
amdgpu_ring_write(ring, 0);
}
/**
* uvd_v4_2_ring_hdp_invalidate - emit an hdp invalidate
*
* @ring: amdgpu_ring pointer
*
* Emits an hdp invalidate.
*/
static void uvd_v4_2_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
{
amdgpu_ring_write(ring, PACKET0(mmHDP_DEBUG0, 0));
amdgpu_ring_write(ring, 1);
}
/** /**
* uvd_v4_2_ring_test_ring - register write test * uvd_v4_2_ring_test_ring - register write test
* *
...@@ -763,6 +791,8 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = { ...@@ -763,6 +791,8 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = {
.parse_cs = amdgpu_uvd_ring_parse_cs, .parse_cs = amdgpu_uvd_ring_parse_cs,
.emit_ib = uvd_v4_2_ring_emit_ib, .emit_ib = uvd_v4_2_ring_emit_ib,
.emit_fence = uvd_v4_2_ring_emit_fence, .emit_fence = uvd_v4_2_ring_emit_fence,
.emit_hdp_flush = uvd_v4_2_ring_emit_hdp_flush,
.emit_hdp_invalidate = uvd_v4_2_ring_emit_hdp_invalidate,
.test_ring = uvd_v4_2_ring_test_ring, .test_ring = uvd_v4_2_ring_test_ring,
.test_ib = uvd_v4_2_ring_test_ib, .test_ib = uvd_v4_2_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop, .insert_nop = amdgpu_ring_insert_nop,
......
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#include "uvd/uvd_5_0_sh_mask.h" #include "uvd/uvd_5_0_sh_mask.h"
#include "oss/oss_2_0_d.h" #include "oss/oss_2_0_d.h"
#include "oss/oss_2_0_sh_mask.h" #include "oss/oss_2_0_sh_mask.h"
#include "bif/bif_5_0_d.h"
#include "vi.h" #include "vi.h"
static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev); static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev);
...@@ -488,6 +489,32 @@ static void uvd_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq ...@@ -488,6 +489,32 @@ static void uvd_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
amdgpu_ring_write(ring, 2); amdgpu_ring_write(ring, 2);
} }
/**
* uvd_v5_0_ring_emit_hdp_flush - emit an hdp flush
*
* @ring: amdgpu_ring pointer
*
* Emits an hdp flush.
*/
static void uvd_v5_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
{
amdgpu_ring_write(ring, PACKET0(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0));
amdgpu_ring_write(ring, 0);
}
/**
* uvd_v5_0_ring_hdp_invalidate - emit an hdp invalidate
*
* @ring: amdgpu_ring pointer
*
* Emits an hdp invalidate.
*/
static void uvd_v5_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
{
amdgpu_ring_write(ring, PACKET0(mmHDP_DEBUG0, 0));
amdgpu_ring_write(ring, 1);
}
/** /**
* uvd_v5_0_ring_test_ring - register write test * uvd_v5_0_ring_test_ring - register write test
* *
...@@ -815,6 +842,8 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = { ...@@ -815,6 +842,8 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = {
.parse_cs = amdgpu_uvd_ring_parse_cs, .parse_cs = amdgpu_uvd_ring_parse_cs,
.emit_ib = uvd_v5_0_ring_emit_ib, .emit_ib = uvd_v5_0_ring_emit_ib,
.emit_fence = uvd_v5_0_ring_emit_fence, .emit_fence = uvd_v5_0_ring_emit_fence,
.emit_hdp_flush = uvd_v5_0_ring_emit_hdp_flush,
.emit_hdp_invalidate = uvd_v5_0_ring_emit_hdp_invalidate,
.test_ring = uvd_v5_0_ring_test_ring, .test_ring = uvd_v5_0_ring_test_ring,
.test_ib = uvd_v5_0_ring_test_ib, .test_ib = uvd_v5_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop, .insert_nop = amdgpu_ring_insert_nop,
......
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include "oss/oss_2_0_sh_mask.h" #include "oss/oss_2_0_sh_mask.h"
#include "smu/smu_7_1_3_d.h" #include "smu/smu_7_1_3_d.h"
#include "smu/smu_7_1_3_sh_mask.h" #include "smu/smu_7_1_3_sh_mask.h"
#include "bif/bif_5_1_d.h"
#include "vi.h" #include "vi.h"
static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev); static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev);
...@@ -385,8 +386,8 @@ static int uvd_v6_0_start(struct amdgpu_device *adev) ...@@ -385,8 +386,8 @@ static int uvd_v6_0_start(struct amdgpu_device *adev)
uint32_t mp_swap_cntl; uint32_t mp_swap_cntl;
int i, j, r; int i, j, r;
/*disable DPG */ /* disable DPG */
WREG32_P(mmUVD_POWER_STATUS, 0, ~(1 << 2)); WREG32_P(mmUVD_POWER_STATUS, 0, ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
/* disable byte swapping */ /* disable byte swapping */
lmi_swap_cntl = 0; lmi_swap_cntl = 0;
...@@ -405,17 +406,21 @@ static int uvd_v6_0_start(struct amdgpu_device *adev) ...@@ -405,17 +406,21 @@ static int uvd_v6_0_start(struct amdgpu_device *adev)
} }
/* disable interupt */ /* disable interupt */
WREG32_P(mmUVD_MASTINT_EN, 0, ~(1 << 1)); WREG32_P(mmUVD_MASTINT_EN, 0, ~UVD_MASTINT_EN__VCPU_EN_MASK);
/* stall UMC and register bus before resetting VCPU */ /* stall UMC and register bus before resetting VCPU */
WREG32_P(mmUVD_LMI_CTRL2, 1 << 8, ~(1 << 8)); WREG32_P(mmUVD_LMI_CTRL2, UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
mdelay(1); mdelay(1);
/* put LMI, VCPU, RBC etc... into reset */ /* put LMI, VCPU, RBC etc... into reset */
WREG32(mmUVD_SOFT_RESET, UVD_SOFT_RESET__LMI_SOFT_RESET_MASK | WREG32(mmUVD_SOFT_RESET,
UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK | UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
UVD_SOFT_RESET__RBC_SOFT_RESET_MASK | UVD_SOFT_RESET__CSM_SOFT_RESET_MASK | UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
UVD_SOFT_RESET__CXW_SOFT_RESET_MASK | UVD_SOFT_RESET__TAP_SOFT_RESET_MASK | UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK); UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
mdelay(5); mdelay(5);
...@@ -424,8 +429,13 @@ static int uvd_v6_0_start(struct amdgpu_device *adev) ...@@ -424,8 +429,13 @@ static int uvd_v6_0_start(struct amdgpu_device *adev)
mdelay(5); mdelay(5);
/* initialize UVD memory controller */ /* initialize UVD memory controller */
WREG32(mmUVD_LMI_CTRL, 0x40 | (1 << 8) | (1 << 13) | WREG32(mmUVD_LMI_CTRL,
(1 << 21) | (1 << 9) | (1 << 20)); (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
UVD_LMI_CTRL__REQ_MODE_MASK |
UVD_LMI_CTRL__DISABLE_ON_FWV_FAIL_MASK);
#ifdef __BIG_ENDIAN #ifdef __BIG_ENDIAN
/* swap (8 in 32) RB and IB */ /* swap (8 in 32) RB and IB */
...@@ -447,10 +457,10 @@ static int uvd_v6_0_start(struct amdgpu_device *adev) ...@@ -447,10 +457,10 @@ static int uvd_v6_0_start(struct amdgpu_device *adev)
mdelay(5); mdelay(5);
/* enable VCPU clock */ /* enable VCPU clock */
WREG32(mmUVD_VCPU_CNTL, 1 << 9); WREG32(mmUVD_VCPU_CNTL, UVD_VCPU_CNTL__CLK_EN_MASK);
/* enable UMC */ /* enable UMC */
WREG32_P(mmUVD_LMI_CTRL2, 0, ~(1 << 8)); WREG32_P(mmUVD_LMI_CTRL2, 0, ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
/* boot up the VCPU */ /* boot up the VCPU */
WREG32(mmUVD_SOFT_RESET, 0); WREG32(mmUVD_SOFT_RESET, 0);
...@@ -484,10 +494,12 @@ static int uvd_v6_0_start(struct amdgpu_device *adev) ...@@ -484,10 +494,12 @@ static int uvd_v6_0_start(struct amdgpu_device *adev)
return r; return r;
} }
/* enable master interrupt */ /* enable master interrupt */
WREG32_P(mmUVD_MASTINT_EN, 3 << 1, ~(3 << 1)); WREG32_P(mmUVD_MASTINT_EN,
(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
/* clear the bit 4 of UVD_STATUS */ /* clear the bit 4 of UVD_STATUS */
WREG32_P(mmUVD_STATUS, 0, ~(2 << 1)); WREG32_P(mmUVD_STATUS, 0, ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
rb_bufsz = order_base_2(ring->ring_size); rb_bufsz = order_base_2(ring->ring_size);
tmp = 0; tmp = 0;
...@@ -580,6 +592,32 @@ static void uvd_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq ...@@ -580,6 +592,32 @@ static void uvd_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
amdgpu_ring_write(ring, 2); amdgpu_ring_write(ring, 2);
} }
/**
* uvd_v6_0_ring_emit_hdp_flush - emit an hdp flush
*
* @ring: amdgpu_ring pointer
*
* Emits an hdp flush.
*/
static void uvd_v6_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
{
amdgpu_ring_write(ring, PACKET0(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0));
amdgpu_ring_write(ring, 0);
}
/**
* uvd_v6_0_ring_hdp_invalidate - emit an hdp invalidate
*
* @ring: amdgpu_ring pointer
*
* Emits an hdp invalidate.
*/
static void uvd_v6_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
{
amdgpu_ring_write(ring, PACKET0(mmHDP_DEBUG0, 0));
amdgpu_ring_write(ring, 1);
}
/** /**
* uvd_v6_0_ring_test_ring - register write test * uvd_v6_0_ring_test_ring - register write test
* *
...@@ -847,7 +885,8 @@ static int uvd_v6_0_set_clockgating_state(void *handle, ...@@ -847,7 +885,8 @@ static int uvd_v6_0_set_clockgating_state(void *handle,
bool enable = (state == AMD_CG_STATE_GATE) ? true : false; bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
static int curstate = -1; static int curstate = -1;
if (adev->asic_type == CHIP_FIJI) if (adev->asic_type == CHIP_FIJI ||
adev->asic_type == CHIP_POLARIS10)
uvd_v6_set_bypass_mode(adev, enable); uvd_v6_set_bypass_mode(adev, enable);
if (!(adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG)) if (!(adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG))
...@@ -919,6 +958,8 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_funcs = { ...@@ -919,6 +958,8 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_funcs = {
.parse_cs = amdgpu_uvd_ring_parse_cs, .parse_cs = amdgpu_uvd_ring_parse_cs,
.emit_ib = uvd_v6_0_ring_emit_ib, .emit_ib = uvd_v6_0_ring_emit_ib,
.emit_fence = uvd_v6_0_ring_emit_fence, .emit_fence = uvd_v6_0_ring_emit_fence,
.emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush,
.emit_hdp_invalidate = uvd_v6_0_ring_emit_hdp_invalidate,
.test_ring = uvd_v6_0_ring_test_ring, .test_ring = uvd_v6_0_ring_test_ring,
.test_ib = uvd_v6_0_ring_test_ib, .test_ib = uvd_v6_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop, .insert_nop = amdgpu_ring_insert_nop,
......
此差异已折叠。
...@@ -37,6 +37,13 @@ ...@@ -37,6 +37,13 @@
#define CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_MASK 0x0000FFFF #define CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_MASK 0x0000FFFF
#define CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_SHIFT 0 #define CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_SHIFT 0
/* gen: chipset 1/2, asic 1/2/3 */
#define AMDGPU_DEFAULT_PCIE_GEN_MASK (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 \
| CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 \
| CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 \
| CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 \
| CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3)
/* Following flags shows PCIe lane width switch supported in driver which are decided by chipset and ASIC */ /* Following flags shows PCIe lane width switch supported in driver which are decided by chipset and ASIC */
#define CAIL_PCIE_LINK_WIDTH_SUPPORT_X1 0x00010000 #define CAIL_PCIE_LINK_WIDTH_SUPPORT_X1 0x00010000
#define CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 0x00020000 #define CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 0x00020000
...@@ -47,4 +54,11 @@ ...@@ -47,4 +54,11 @@
#define CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 0x00400000 #define CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 0x00400000
#define CAIL_PCIE_LINK_WIDTH_SUPPORT_SHIFT 16 #define CAIL_PCIE_LINK_WIDTH_SUPPORT_SHIFT 16
/* 1/2/4/8/16 lanes */
#define AMDGPU_DEFAULT_PCIE_MLW_MASK (CAIL_PCIE_LINK_WIDTH_SUPPORT_X1 \
| CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 \
| CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 \
| CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 \
| CAIL_PCIE_LINK_WIDTH_SUPPORT_X16)
#endif #endif
...@@ -25,15 +25,6 @@ ...@@ -25,15 +25,6 @@
#define AMD_MAX_USEC_TIMEOUT 100000 /* 100 ms */ #define AMD_MAX_USEC_TIMEOUT 100000 /* 100 ms */
/*
* Supported GPU families (aligned with amdgpu_drm.h)
*/
#define AMD_FAMILY_UNKNOWN 0
#define AMD_FAMILY_CI 120 /* Bonaire, Hawaii */
#define AMD_FAMILY_KV 125 /* Kaveri, Kabini, Mullins */
#define AMD_FAMILY_VI 130 /* Iceland, Tonga */
#define AMD_FAMILY_CZ 135 /* Carrizo */
/* /*
* Supported ASIC types * Supported ASIC types
*/ */
...@@ -120,6 +111,8 @@ enum amd_powergating_state { ...@@ -120,6 +111,8 @@ enum amd_powergating_state {
#define AMD_PG_SUPPORT_SDMA (1 << 8) #define AMD_PG_SUPPORT_SDMA (1 << 8)
#define AMD_PG_SUPPORT_ACP (1 << 9) #define AMD_PG_SUPPORT_ACP (1 << 9)
#define AMD_PG_SUPPORT_SAMU (1 << 10) #define AMD_PG_SUPPORT_SAMU (1 << 10)
#define AMD_PG_SUPPORT_GFX_QUICK_MG (1 << 11)
#define AMD_PG_SUPPORT_GFX_PIPELINE (1 << 12)
enum amd_pm_state_type { enum amd_pm_state_type {
/* not used for dpm */ /* not used for dpm */
......
...@@ -2823,4 +2823,7 @@ ...@@ -2823,4 +2823,7 @@
#define mmDC_EDC_CSINVOC_CNT 0x3192 #define mmDC_EDC_CSINVOC_CNT 0x3192
#define mmDC_EDC_RESTORE_CNT 0x3193 #define mmDC_EDC_RESTORE_CNT 0x3193
#define mmGC_CAC_IND_INDEX 0x129a
#define mmGC_CAC_IND_DATA 0x129b
#endif /* GFX_8_0_D_H */ #endif /* GFX_8_0_D_H */
...@@ -49,6 +49,7 @@ enum cgs_ind_reg { ...@@ -49,6 +49,7 @@ enum cgs_ind_reg {
CGS_IND_REG__SMC, CGS_IND_REG__SMC,
CGS_IND_REG__UVD_CTX, CGS_IND_REG__UVD_CTX,
CGS_IND_REG__DIDT, CGS_IND_REG__DIDT,
CGS_IND_REG_GC_CAC,
CGS_IND_REG__AUDIO_ENDPT CGS_IND_REG__AUDIO_ENDPT
}; };
...@@ -115,6 +116,7 @@ enum cgs_system_info_id { ...@@ -115,6 +116,7 @@ enum cgs_system_info_id {
CGS_SYSTEM_INFO_CG_FLAGS, CGS_SYSTEM_INFO_CG_FLAGS,
CGS_SYSTEM_INFO_PG_FLAGS, CGS_SYSTEM_INFO_PG_FLAGS,
CGS_SYSTEM_INFO_GFX_CU_INFO, CGS_SYSTEM_INFO_GFX_CU_INFO,
CGS_SYSTEM_INFO_GFX_SE_INFO,
CGS_SYSTEM_INFO_ID_MAXIMUM, CGS_SYSTEM_INFO_ID_MAXIMUM,
}; };
...@@ -189,7 +191,6 @@ typedef unsigned long cgs_handle_t; ...@@ -189,7 +191,6 @@ typedef unsigned long cgs_handle_t;
struct cgs_acpi_method_argument { struct cgs_acpi_method_argument {
uint32_t type; uint32_t type;
uint32_t method_length;
uint32_t data_length; uint32_t data_length;
union{ union{
uint32_t value; uint32_t value;
......
...@@ -132,8 +132,7 @@ int pem_task_enable_dynamic_state_management(struct pp_eventmgr *eventmgr, struc ...@@ -132,8 +132,7 @@ int pem_task_enable_dynamic_state_management(struct pp_eventmgr *eventmgr, struc
int pem_task_disable_dynamic_state_management(struct pp_eventmgr *eventmgr, struct pem_event_data *event_data) int pem_task_disable_dynamic_state_management(struct pp_eventmgr *eventmgr, struct pem_event_data *event_data)
{ {
/* TODO */ return phm_disable_dynamic_state_management(eventmgr->hwmgr);
return 0;
} }
int pem_task_enable_clock_power_gatings_tasks(struct pp_eventmgr *eventmgr, struct pem_event_data *event_data) int pem_task_enable_clock_power_gatings_tasks(struct pp_eventmgr *eventmgr, struct pem_event_data *event_data)
......
...@@ -206,7 +206,7 @@ int cz_dpm_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate) ...@@ -206,7 +206,7 @@ int cz_dpm_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate)
AMD_IP_BLOCK_TYPE_VCE, AMD_IP_BLOCK_TYPE_VCE,
AMD_PG_STATE_GATE); AMD_PG_STATE_GATE);
cz_enable_disable_vce_dpm(hwmgr, false); cz_enable_disable_vce_dpm(hwmgr, false);
/* TODO: to figure out why vce can't be poweroff*/ cz_dpm_powerdown_vce(hwmgr);
cz_hwmgr->vce_power_gated = true; cz_hwmgr->vce_power_gated = true;
} else { } else {
cz_dpm_powerup_vce(hwmgr); cz_dpm_powerup_vce(hwmgr);
...@@ -225,6 +225,7 @@ int cz_dpm_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate) ...@@ -225,6 +225,7 @@ int cz_dpm_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate)
} }
} }
} else { } else {
cz_hwmgr->vce_power_gated = bgate;
cz_dpm_update_vce_dpm(hwmgr); cz_dpm_update_vce_dpm(hwmgr);
cz_enable_disable_vce_dpm(hwmgr, !bgate); cz_enable_disable_vce_dpm(hwmgr, !bgate);
return 0; return 0;
......
...@@ -302,9 +302,6 @@ struct fiji_hwmgr { ...@@ -302,9 +302,6 @@ struct fiji_hwmgr {
bool pg_acp_init; bool pg_acp_init;
bool frtc_enabled; bool frtc_enabled;
bool frtc_status_changed; bool frtc_status_changed;
/* soft pptable for re-uploading into smu */
void *soft_pp_table;
}; };
/* To convert to Q8.8 format for firmware */ /* To convert to Q8.8 format for firmware */
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册