提交 8cd3ac52 编写于 作者: D Dave Airlie

Merge branch 'drm-next-4.12' of git://people.freedesktop.org/~agd5f/linux into drm-next

New stuff for 4.12:
- Preliminary vega10 support
- Support for multi-level page tables
- GPU sensor stuff for mesa
- job tracing improvements
- PRT support for sparse buffers
- Additional SR-IOV improvements
- ttm improvements
- misc bug fixes and code cleanups

* 'drm-next-4.12' of git://people.freedesktop.org/~agd5f/linux: (315 commits)
  drm/amdgpu: Fix 32bit x86 compilation warning
  drm/amdgpu: just disallow reading untouched registers
  drm/amdgpu: remove duplicate allowed reg CP_CPF_BUSY_STAT
  drm/amdgpu/soc15: enable psp block for SRIOV
  drm/amdgpu/soc15: bypass pp block for vf
  drm/amdgpu/psp: add check sOS sign
  drm/amd/amdgpu: Correct ring wptr address in debugfs (v2)
  drm/amdgpu: Fix multi-level page table bugs for large BOs v3
  drm/amdgpu: Fix Vega10 VM initialization
  drm/amdgpu: Make max_pfn 64-bit
  drm/amdgpu: drop GB_GPU_ID from the golden settings
  drm/amdgpu: fix vm pte pde flags to 64-bit for sdma (v3)
  drm/amd/amdgpu: fix Tonga S3 resume hang on rhel6.8
  drm/ttm: decrease ttm bo priority number
  drm/amd/amdgpu: fix performance drop when VRAM pressure
  drm/amdgpu: Couple small warning fixes
  drm/amdgpu: Clean up GFX 9 VM fault messages
  drm/amdgpu: Register UTCL2 as a source of VM faults
  drm/amdgpu/soc15: drop support for reading some registers
  drm/amdgpu/soc15: return cached values for some registers (v2)
  ...
......@@ -24,7 +24,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o
amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o
# add asic specific block
amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
......@@ -34,12 +34,13 @@ amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o
amdgpu-y += \
vi.o mxgpu_vi.o
vi.o mxgpu_vi.o nbio_v6_1.o soc15.o mxgpu_ai.o
# add GMC block
amdgpu-y += \
gmc_v7_0.o \
gmc_v8_0.o
gmc_v8_0.o \
gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o
# add IH block
amdgpu-y += \
......@@ -47,7 +48,13 @@ amdgpu-y += \
amdgpu_ih.o \
iceland_ih.o \
tonga_ih.o \
cz_ih.o
cz_ih.o \
vega10_ih.o
# add PSP block
amdgpu-y += \
amdgpu_psp.o \
psp_v3_1.o
# add SMC block
amdgpu-y += \
......@@ -63,23 +70,27 @@ amdgpu-y += \
# add GFX block
amdgpu-y += \
amdgpu_gfx.o \
gfx_v8_0.o
gfx_v8_0.o \
gfx_v9_0.o
# add async DMA block
amdgpu-y += \
sdma_v2_4.o \
sdma_v3_0.o
sdma_v3_0.o \
sdma_v4_0.o
# add UVD block
amdgpu-y += \
amdgpu_uvd.o \
uvd_v5_0.o \
uvd_v6_0.o
uvd_v6_0.o \
uvd_v7_0.o
# add VCE block
amdgpu-y += \
amdgpu_vce.o \
vce_v3_0.o
vce_v3_0.o \
vce_v4_0.o
# add amdkfd interfaces
amdgpu-y += \
......
......@@ -52,6 +52,7 @@
#include "amdgpu_irq.h"
#include "amdgpu_ucode.h"
#include "amdgpu_ttm.h"
#include "amdgpu_psp.h"
#include "amdgpu_gds.h"
#include "amdgpu_sync.h"
#include "amdgpu_ring.h"
......@@ -59,6 +60,8 @@
#include "amd_powerplay.h"
#include "amdgpu_dpm.h"
#include "amdgpu_acp.h"
#include "amdgpu_uvd.h"
#include "amdgpu_vce.h"
#include "gpu_scheduler.h"
#include "amdgpu_virt.h"
......@@ -79,7 +82,7 @@ extern int amdgpu_pcie_gen2;
extern int amdgpu_msi;
extern int amdgpu_lockup_timeout;
extern int amdgpu_dpm;
extern int amdgpu_smc_load_fw;
extern int amdgpu_fw_load_type;
extern int amdgpu_aspm;
extern int amdgpu_runtime_pm;
extern unsigned amdgpu_ip_block_mask;
......@@ -101,6 +104,11 @@ extern char *amdgpu_disable_cu;
extern char *amdgpu_virtual_display;
extern unsigned amdgpu_pp_feature_mask;
extern int amdgpu_vram_page_split;
extern int amdgpu_ngg;
extern int amdgpu_prim_buf_per_se;
extern int amdgpu_pos_buf_per_se;
extern int amdgpu_cntl_sb_buf_per_se;
extern int amdgpu_param_buf_per_se;
#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
#define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */
......@@ -109,11 +117,16 @@ extern int amdgpu_vram_page_split;
#define AMDGPU_IB_POOL_SIZE 16
#define AMDGPU_DEBUGFS_MAX_COMPONENTS 32
#define AMDGPUFB_CONN_LIMIT 4
#define AMDGPU_BIOS_NUM_SCRATCH 8
#define AMDGPU_BIOS_NUM_SCRATCH 16
/* max number of IP instances */
#define AMDGPU_MAX_SDMA_INSTANCES 2
/* max number of VMHUB */
#define AMDGPU_MAX_VMHUBS 2
#define AMDGPU_MMHUB 0
#define AMDGPU_GFXHUB 1
/* hardcode that limit for now */
#define AMDGPU_VA_RESERVED_SIZE (8 << 20)
......@@ -280,7 +293,7 @@ struct amdgpu_vm_pte_funcs {
void (*set_pte_pde)(struct amdgpu_ib *ib,
uint64_t pe,
uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags);
uint32_t incr, uint64_t flags);
};
/* provided by the gmc block */
......@@ -293,7 +306,18 @@ struct amdgpu_gart_funcs {
void *cpu_pt_addr, /* cpu addr of page table */
uint32_t gpu_page_idx, /* pte/pde to update */
uint64_t addr, /* addr to write into pte/pde */
uint32_t flags); /* access flags */
uint64_t flags); /* access flags */
/* enable/disable PRT support */
void (*set_prt)(struct amdgpu_device *adev, bool enable);
/* set pte flags based per asic */
uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev,
uint32_t flags);
};
/* provided by the mc block */
struct amdgpu_mc_funcs {
/* adjust mc addr in fb for APU case */
u64 (*adjust_mc_addr)(struct amdgpu_device *adev, u64 addr);
};
/* provided by the ih block */
......@@ -522,6 +546,10 @@ struct amdgpu_gart {
struct page **pages;
#endif
bool ready;
/* Asic default pte flags */
uint64_t gart_pte_flags;
const struct amdgpu_gart_funcs *gart_funcs;
};
......@@ -537,9 +565,24 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
int pages);
int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
int pages, struct page **pagelist,
dma_addr_t *dma_addr, uint32_t flags);
dma_addr_t *dma_addr, uint64_t flags);
int amdgpu_ttm_recover_gart(struct amdgpu_device *adev);
/*
* VMHUB structures, functions & helpers
*/
struct amdgpu_vmhub {
uint32_t ctx0_ptb_addr_lo32;
uint32_t ctx0_ptb_addr_hi32;
uint32_t vm_inv_eng0_req;
uint32_t vm_inv_eng0_ack;
uint32_t vm_context0_cntl;
uint32_t vm_l2_pro_fault_status;
uint32_t vm_l2_pro_fault_cntl;
uint32_t (*get_invalidate_req)(unsigned int vm_id);
uint32_t (*get_vm_protection_bits)(void);
};
/*
* GPU MC structures, functions & helpers
*/
......@@ -567,6 +610,15 @@ struct amdgpu_mc {
uint32_t vram_type;
uint32_t srbm_soft_reset;
struct amdgpu_mode_mc_save save;
bool prt_warning;
/* apertures */
u64 shared_aperture_start;
u64 shared_aperture_end;
u64 private_aperture_start;
u64 private_aperture_end;
/* protects concurrent invalidation */
spinlock_t invalidate_lock;
const struct amdgpu_mc_funcs *mc_funcs;
};
/*
......@@ -601,6 +653,83 @@ struct amdgpu_doorbell {
u32 num_doorbells; /* Number of doorbells actually reserved for amdgpu. */
};
/*
* 64bit doorbell, offset are in QWORD, occupy 2KB doorbell space
*/
typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT
{
/*
* All compute related doorbells: kiq, hiq, diq, traditional compute queue, user queue, should locate in
* a continues range so that programming CP_MEC_DOORBELL_RANGE_LOWER/UPPER can cover this range.
* Compute related doorbells are allocated from 0x00 to 0x8a
*/
/* kernel scheduling */
AMDGPU_DOORBELL64_KIQ = 0x00,
/* HSA interface queue and debug queue */
AMDGPU_DOORBELL64_HIQ = 0x01,
AMDGPU_DOORBELL64_DIQ = 0x02,
/* Compute engines */
AMDGPU_DOORBELL64_MEC_RING0 = 0x03,
AMDGPU_DOORBELL64_MEC_RING1 = 0x04,
AMDGPU_DOORBELL64_MEC_RING2 = 0x05,
AMDGPU_DOORBELL64_MEC_RING3 = 0x06,
AMDGPU_DOORBELL64_MEC_RING4 = 0x07,
AMDGPU_DOORBELL64_MEC_RING5 = 0x08,
AMDGPU_DOORBELL64_MEC_RING6 = 0x09,
AMDGPU_DOORBELL64_MEC_RING7 = 0x0a,
/* User queue doorbell range (128 doorbells) */
AMDGPU_DOORBELL64_USERQUEUE_START = 0x0b,
AMDGPU_DOORBELL64_USERQUEUE_END = 0x8a,
/* Graphics engine */
AMDGPU_DOORBELL64_GFX_RING0 = 0x8b,
/*
* Other graphics doorbells can be allocated here: from 0x8c to 0xef
* Graphics voltage island aperture 1
* default non-graphics QWORD index is 0xF0 - 0xFF inclusive
*/
/* sDMA engines */
AMDGPU_DOORBELL64_sDMA_ENGINE0 = 0xF0,
AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xF1,
AMDGPU_DOORBELL64_sDMA_ENGINE1 = 0xF2,
AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE1 = 0xF3,
/* Interrupt handler */
AMDGPU_DOORBELL64_IH = 0xF4, /* For legacy interrupt ring buffer */
AMDGPU_DOORBELL64_IH_RING1 = 0xF5, /* For page migration request log */
AMDGPU_DOORBELL64_IH_RING2 = 0xF6, /* For page migration translation/invalidation log */
/* VCN engine use 32 bits doorbell */
AMDGPU_DOORBELL64_VCN0_1 = 0xF8, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */
AMDGPU_DOORBELL64_VCN2_3 = 0xF9,
AMDGPU_DOORBELL64_VCN4_5 = 0xFA,
AMDGPU_DOORBELL64_VCN6_7 = 0xFB,
/* overlap the doorbell assignment with VCN as they are mutually exclusive
* VCE engine's doorbell is 32 bit and two VCE ring share one QWORD
*/
AMDGPU_DOORBELL64_RING0_1 = 0xF8,
AMDGPU_DOORBELL64_RING2_3 = 0xF9,
AMDGPU_DOORBELL64_RING4_5 = 0xFA,
AMDGPU_DOORBELL64_RING6_7 = 0xFB,
AMDGPU_DOORBELL64_UVD_RING0_1 = 0xFC,
AMDGPU_DOORBELL64_UVD_RING2_3 = 0xFD,
AMDGPU_DOORBELL64_UVD_RING4_5 = 0xFE,
AMDGPU_DOORBELL64_UVD_RING6_7 = 0xFF,
AMDGPU_DOORBELL64_MAX_ASSIGNMENT = 0xFF,
AMDGPU_DOORBELL64_INVALID = 0xFFFF
} AMDGPU_DOORBELL64_ASSIGNMENT;
void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
phys_addr_t *aperture_base,
size_t *aperture_size,
......@@ -699,6 +828,7 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
struct amdgpu_fpriv {
struct amdgpu_vm vm;
struct amdgpu_bo_va *prt_va;
struct mutex bo_list_lock;
struct idr bo_list_handles;
struct amdgpu_ctx_mgr ctx_mgr;
......@@ -776,9 +906,12 @@ struct amdgpu_rlc {
struct amdgpu_mec {
struct amdgpu_bo *hpd_eop_obj;
u64 hpd_eop_gpu_addr;
struct amdgpu_bo *mec_fw_obj;
u64 mec_fw_gpu_addr;
u32 num_pipe;
u32 num_mec;
u32 num_queue;
void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1];
};
struct amdgpu_kiq {
......@@ -810,7 +943,16 @@ struct amdgpu_rb_config {
uint32_t raster_config_1;
};
struct amdgpu_gca_config {
struct gb_addr_config {
uint16_t pipe_interleave_size;
uint8_t num_pipes;
uint8_t max_compress_frags;
uint8_t num_banks;
uint8_t num_se;
uint8_t num_rb_per_se;
};
struct amdgpu_gfx_config {
unsigned max_shader_engines;
unsigned max_tile_pipes;
unsigned max_cu_per_sh;
......@@ -839,7 +981,11 @@ struct amdgpu_gca_config {
uint32_t tile_mode_array[32];
uint32_t macrotile_mode_array[16];
struct gb_addr_config gb_addr_config_fields;
struct amdgpu_rb_config rb_config[AMDGPU_GFX_MAX_SE][AMDGPU_GFX_MAX_SH_PER_SE];
/* gfx configure feature */
uint32_t double_offchip_lds_buf;
};
struct amdgpu_cu_info {
......@@ -857,9 +1003,31 @@ struct amdgpu_gfx_funcs {
void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst);
};
struct amdgpu_ngg_buf {
struct amdgpu_bo *bo;
uint64_t gpu_addr;
uint32_t size;
uint32_t bo_size;
};
enum {
PRIM = 0,
POS,
CNTL,
PARAM,
NGG_BUF_MAX
};
struct amdgpu_ngg {
struct amdgpu_ngg_buf buf[NGG_BUF_MAX];
uint32_t gds_reserve_addr;
uint32_t gds_reserve_size;
bool init;
};
struct amdgpu_gfx {
struct mutex gpu_clock_mutex;
struct amdgpu_gca_config config;
struct amdgpu_gfx_config config;
struct amdgpu_rlc rlc;
struct amdgpu_mec mec;
struct amdgpu_kiq kiq;
......@@ -899,6 +1067,9 @@ struct amdgpu_gfx {
/* reset mask */
uint32_t grbm_soft_reset;
uint32_t srbm_soft_reset;
bool in_reset;
/* NGG */
struct amdgpu_ngg ngg;
};
int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
......@@ -1007,66 +1178,11 @@ struct amdgpu_wb {
int amdgpu_wb_get(struct amdgpu_device *adev, u32 *wb);
void amdgpu_wb_free(struct amdgpu_device *adev, u32 wb);
int amdgpu_wb_get_64bit(struct amdgpu_device *adev, u32 *wb);
void amdgpu_wb_free_64bit(struct amdgpu_device *adev, u32 wb);
void amdgpu_get_pcie_info(struct amdgpu_device *adev);
/*
* UVD
*/
#define AMDGPU_DEFAULT_UVD_HANDLES 10
#define AMDGPU_MAX_UVD_HANDLES 40
#define AMDGPU_UVD_STACK_SIZE (200*1024)
#define AMDGPU_UVD_HEAP_SIZE (256*1024)
#define AMDGPU_UVD_SESSION_SIZE (50*1024)
#define AMDGPU_UVD_FIRMWARE_OFFSET 256
struct amdgpu_uvd {
struct amdgpu_bo *vcpu_bo;
void *cpu_addr;
uint64_t gpu_addr;
unsigned fw_version;
void *saved_bo;
unsigned max_handles;
atomic_t handles[AMDGPU_MAX_UVD_HANDLES];
struct drm_file *filp[AMDGPU_MAX_UVD_HANDLES];
struct delayed_work idle_work;
const struct firmware *fw; /* UVD firmware */
struct amdgpu_ring ring;
struct amdgpu_irq_src irq;
bool address_64_bit;
bool use_ctx_buf;
struct amd_sched_entity entity;
uint32_t srbm_soft_reset;
};
/*
* VCE
*/
#define AMDGPU_MAX_VCE_HANDLES 16
#define AMDGPU_VCE_FIRMWARE_OFFSET 256
#define AMDGPU_VCE_HARVEST_VCE0 (1 << 0)
#define AMDGPU_VCE_HARVEST_VCE1 (1 << 1)
struct amdgpu_vce {
struct amdgpu_bo *vcpu_bo;
uint64_t gpu_addr;
unsigned fw_version;
unsigned fb_version;
atomic_t handles[AMDGPU_MAX_VCE_HANDLES];
struct drm_file *filp[AMDGPU_MAX_VCE_HANDLES];
uint32_t img_size[AMDGPU_MAX_VCE_HANDLES];
struct delayed_work idle_work;
struct mutex idle_mutex;
const struct firmware *fw; /* VCE firmware */
struct amdgpu_ring ring[AMDGPU_MAX_VCE_RINGS];
struct amdgpu_irq_src irq;
unsigned harvest_config;
struct amd_sched_entity entity;
uint32_t srbm_soft_reset;
unsigned num_rings;
};
/*
* SDMA
*/
......@@ -1095,11 +1211,22 @@ struct amdgpu_sdma {
/*
* Firmware
*/
enum amdgpu_firmware_load_type {
AMDGPU_FW_LOAD_DIRECT = 0,
AMDGPU_FW_LOAD_SMU,
AMDGPU_FW_LOAD_PSP,
};
struct amdgpu_firmware {
struct amdgpu_firmware_info ucode[AMDGPU_UCODE_ID_MAXIMUM];
bool smu_load;
enum amdgpu_firmware_load_type load_type;
struct amdgpu_bo *fw_buf;
unsigned int fw_size;
unsigned int max_ucodes;
/* firmwares are loaded by psp instead of smu from vega10 */
const struct amdgpu_psp_funcs *funcs;
struct amdgpu_bo *rbuf;
struct mutex mutex;
};
/*
......@@ -1112,10 +1239,6 @@ void amdgpu_benchmark(struct amdgpu_device *adev, int test_number);
* Testing
*/
void amdgpu_test_moves(struct amdgpu_device *adev);
void amdgpu_test_ring_sync(struct amdgpu_device *adev,
struct amdgpu_ring *cpA,
struct amdgpu_ring *cpB);
void amdgpu_test_syncing(struct amdgpu_device *adev);
/*
* MMU Notifier
......@@ -1202,6 +1325,8 @@ struct amdgpu_asic_funcs {
/* static power management */
int (*get_pcie_lanes)(struct amdgpu_device *adev);
void (*set_pcie_lanes)(struct amdgpu_device *adev, int lanes);
/* get config memsize register */
u32 (*get_config_memsize)(struct amdgpu_device *adev);
};
/*
......@@ -1342,9 +1467,11 @@ struct amdgpu_device {
bool have_disp_power_ref;
/* BIOS */
bool is_atom_fw;
uint8_t *bios;
uint32_t bios_size;
struct amdgpu_bo *stollen_vga_memory;
uint32_t bios_scratch_reg_offset;
uint32_t bios_scratch[AMDGPU_BIOS_NUM_SCRATCH];
/* Register/doorbell mmio */
......@@ -1391,6 +1518,7 @@ struct amdgpu_device {
struct amdgpu_gart gart;
struct amdgpu_dummy_page dummy_page;
struct amdgpu_vm_manager vm_manager;
struct amdgpu_vmhub vmhub[AMDGPU_MAX_VMHUBS];
/* memory management */
struct amdgpu_mman mman;
......@@ -1457,6 +1585,9 @@ struct amdgpu_device {
/* firmwares */
struct amdgpu_firmware firmware;
/* PSP */
struct psp_context psp;
/* GDS */
struct amdgpu_gds gds;
......@@ -1501,23 +1632,32 @@ void amdgpu_device_fini(struct amdgpu_device *adev);
int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);
uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
bool always_indirect);
uint32_t acc_flags);
void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
bool always_indirect);
uint32_t acc_flags);
u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg);
void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v);
u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index);
void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index);
void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v);
/*
* Registers read & write functions.
*/
#define RREG32(reg) amdgpu_mm_rreg(adev, (reg), false)
#define RREG32_IDX(reg) amdgpu_mm_rreg(adev, (reg), true)
#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_mm_rreg(adev, (reg), false))
#define WREG32(reg, v) amdgpu_mm_wreg(adev, (reg), (v), false)
#define WREG32_IDX(reg, v) amdgpu_mm_wreg(adev, (reg), (v), true)
#define AMDGPU_REGS_IDX (1<<0)
#define AMDGPU_REGS_NO_KIQ (1<<1)
#define RREG32_NO_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
#define WREG32_NO_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ)
#define RREG32(reg) amdgpu_mm_rreg(adev, (reg), 0)
#define RREG32_IDX(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_IDX)
#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_mm_rreg(adev, (reg), 0))
#define WREG32(reg, v) amdgpu_mm_wreg(adev, (reg), (v), 0)
#define WREG32_IDX(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_IDX)
#define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
#define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
#define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg))
......@@ -1556,6 +1696,8 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
#define RDOORBELL32(index) amdgpu_mm_rdoorbell(adev, (index))
#define WDOORBELL32(index, v) amdgpu_mm_wdoorbell(adev, (index), (v))
#define RDOORBELL64(index) amdgpu_mm_rdoorbell64(adev, (index))
#define WDOORBELL64(index, v) amdgpu_mm_wdoorbell64(adev, (index), (v))
#define REG_FIELD_SHIFT(reg, field) reg##__##field##__SHIFT
#define REG_FIELD_MASK(reg, field) reg##__##field##_MASK
......@@ -1584,7 +1726,7 @@ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v)
{
if (ring->count_dw <= 0)
DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
ring->ring[ring->wptr++] = v;
ring->ring[ring->wptr++ & ring->buf_mask] = v;
ring->wptr &= ring->ptr_mask;
ring->count_dw--;
}
......@@ -1597,9 +1739,9 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, void *sr
if (ring->count_dw < count_dw) {
DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
} else {
occupied = ring->wptr & ring->ptr_mask;
occupied = ring->wptr & ring->buf_mask;
dst = (void *)&ring->ring[occupied];
chunk1 = ring->ptr_mask + 1 - occupied;
chunk1 = ring->buf_mask + 1 - occupied;
chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1;
chunk2 = count_dw - chunk1;
chunk1 <<= 2;
......@@ -1650,11 +1792,13 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
#define amdgpu_asic_read_disabled_bios(adev) (adev)->asic_funcs->read_disabled_bios((adev))
#define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l))
#define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v)))
#define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev))
#define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid))
#define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
#define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
#define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
#define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
#define amdgpu_vm_get_pte_flags(adev, flags) (adev)->gart.gart_funcs->get_vm_pte_flags((adev),(flags))
#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))
#define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
#define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t))
......@@ -1698,6 +1842,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
#define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
#define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance))
#define amdgpu_gds_switch(adev, r, v, d, w, a) (adev)->gds.funcs->patch_gds_switch((r), (v), (d), (w), (a))
#define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i))
/* Common functions */
int amdgpu_gpu_reset(struct amdgpu_device *adev);
......@@ -1723,7 +1868,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
int *last_invalidated);
bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
struct ttm_mem_reg *mem);
void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64 base);
void amdgpu_gtt_location(struct amdgpu_device *adev, struct amdgpu_mc *mc);
......@@ -1762,8 +1907,6 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev);
int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv);
void amdgpu_driver_postclose_kms(struct drm_device *dev,
struct drm_file *file_priv);
void amdgpu_driver_preclose_kms(struct drm_device *dev,
struct drm_file *file_priv);
int amdgpu_suspend(struct amdgpu_device *adev);
int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon);
int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon);
......
......@@ -74,9 +74,9 @@ static void amdgpu_afmt_calc_cts(uint32_t clock, int *CTS, int *N, int freq)
/* Check that we are in spec (not always possible) */
if (n < (128*freq/1500))
printk(KERN_WARNING "Calculated ACR N value is too small. You may experience audio problems.\n");
pr_warn("Calculated ACR N value is too small. You may experience audio problems.\n");
if (n > (128*freq/300))
printk(KERN_WARNING "Calculated ACR N value is too large. You may experience audio problems.\n");
pr_warn("Calculated ACR N value is too large. You may experience audio problems.\n");
*N = n;
*CTS = cts;
......
......@@ -1748,3 +1748,31 @@ void amdgpu_atombios_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le)
memcpy(dst, src, num_bytes);
#endif
}
int amdgpu_atombios_allocate_fb_scratch(struct amdgpu_device *adev)
{
struct atom_context *ctx = adev->mode_info.atom_context;
int index = GetIndexIntoMasterTable(DATA, VRAM_UsageByFirmware);
uint16_t data_offset;
int usage_bytes = 0;
struct _ATOM_VRAM_USAGE_BY_FIRMWARE *firmware_usage;
if (amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) {
firmware_usage = (struct _ATOM_VRAM_USAGE_BY_FIRMWARE *)(ctx->bios + data_offset);
DRM_DEBUG("atom firmware requested %08x %dkb\n",
le32_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].ulStartAddrUsedByFirmware),
le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb));
usage_bytes = le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb) * 1024;
}
ctx->scratch_size_bytes = 0;
if (usage_bytes == 0)
usage_bytes = 20 * 1024;
/* allocate some scratch memory */
ctx->scratch = kzalloc(usage_bytes, GFP_KERNEL);
if (!ctx->scratch)
return -ENOMEM;
ctx->scratch_size_bytes = usage_bytes;
return 0;
}
......@@ -215,4 +215,7 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
int amdgpu_atombios_get_svi2_info(struct amdgpu_device *adev,
u8 voltage_type,
u8 *svd_gpio_id, u8 *svc_gpio_id);
int amdgpu_atombios_allocate_fb_scratch(struct amdgpu_device *adev);
#endif
/*
* Copyright 2016 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include <drm/drmP.h>
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
#include "atomfirmware.h"
#include "amdgpu_atomfirmware.h"
#include "atom.h"
#define get_index_into_master_table(master_table, table_name) (offsetof(struct master_table, table_name) / sizeof(uint16_t))
bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev)
{
int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
firmwareinfo);
uint16_t data_offset;
if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, NULL,
NULL, NULL, &data_offset)) {
struct atom_firmware_info_v3_1 *firmware_info =
(struct atom_firmware_info_v3_1 *)(adev->mode_info.atom_context->bios +
data_offset);
if (le32_to_cpu(firmware_info->firmware_capability) &
ATOM_FIRMWARE_CAP_GPU_VIRTUALIZATION)
return true;
}
return false;
}
void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev)
{
int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
firmwareinfo);
uint16_t data_offset;
if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, NULL,
NULL, NULL, &data_offset)) {
struct atom_firmware_info_v3_1 *firmware_info =
(struct atom_firmware_info_v3_1 *)(adev->mode_info.atom_context->bios +
data_offset);
adev->bios_scratch_reg_offset =
le32_to_cpu(firmware_info->bios_scratch_reg_startaddr);
}
}
void amdgpu_atomfirmware_scratch_regs_save(struct amdgpu_device *adev)
{
int i;
for (i = 0; i < AMDGPU_BIOS_NUM_SCRATCH; i++)
adev->bios_scratch[i] = RREG32(adev->bios_scratch_reg_offset + i);
}
void amdgpu_atomfirmware_scratch_regs_restore(struct amdgpu_device *adev)
{
int i;
for (i = 0; i < AMDGPU_BIOS_NUM_SCRATCH; i++)
WREG32(adev->bios_scratch_reg_offset + i, adev->bios_scratch[i]);
}
int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev)
{
struct atom_context *ctx = adev->mode_info.atom_context;
int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
vram_usagebyfirmware);
uint16_t data_offset;
int usage_bytes = 0;
if (amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) {
struct vram_usagebyfirmware_v2_1 *firmware_usage =
(struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset);
DRM_DEBUG("atom firmware requested %08x %dkb fw %dkb drv\n",
le32_to_cpu(firmware_usage->start_address_in_kb),
le16_to_cpu(firmware_usage->used_by_firmware_in_kb),
le16_to_cpu(firmware_usage->used_by_driver_in_kb));
usage_bytes = le16_to_cpu(firmware_usage->used_by_driver_in_kb) * 1024;
}
ctx->scratch_size_bytes = 0;
if (usage_bytes == 0)
usage_bytes = 20 * 1024;
/* allocate some scratch memory */
ctx->scratch = kzalloc(usage_bytes, GFP_KERNEL);
if (!ctx->scratch)
return -ENOMEM;
ctx->scratch_size_bytes = usage_bytes;
return 0;
}
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __AMDGPU_ATOMFIRMWARE_H__
#define __AMDGPU_ATOMFIRMWARE_H__
bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev);
void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev);
void amdgpu_atomfirmware_scratch_regs_save(struct amdgpu_device *adev);
void amdgpu_atomfirmware_scratch_regs_restore(struct amdgpu_device *adev);
int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev);
#endif
......@@ -583,8 +583,8 @@ static bool amdgpu_atpx_detect(void)
if (has_atpx && vga_count == 2) {
acpi_get_name(amdgpu_atpx_priv.atpx.handle, ACPI_FULL_PATHNAME, &buffer);
printk(KERN_INFO "vga_switcheroo: detected switching method %s handle\n",
acpi_method_name);
pr_info("vga_switcheroo: detected switching method %s handle\n",
acpi_method_name);
amdgpu_atpx_priv.atpx_detected = true;
amdgpu_atpx_priv.bridge_pm_usable = d3_supported;
amdgpu_atpx_init();
......
......@@ -86,6 +86,18 @@ static bool check_atom_bios(uint8_t *bios, size_t size)
return false;
}
static bool is_atom_fw(uint8_t *bios)
{
uint16_t bios_header_start = bios[0x48] | (bios[0x49] << 8);
uint8_t frev = bios[bios_header_start + 2];
uint8_t crev = bios[bios_header_start + 3];
if ((frev < 3) ||
((frev == 3) && (crev < 3)))
return false;
return true;
}
/* If you boot an IGP board with a discrete card as the primary,
* the IGP rom is not accessible via the rom bar as the IGP rom is
......@@ -419,26 +431,30 @@ static inline bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev)
bool amdgpu_get_bios(struct amdgpu_device *adev)
{
if (amdgpu_atrm_get_bios(adev))
return true;
goto success;
if (amdgpu_acpi_vfct_bios(adev))
return true;
goto success;
if (igp_read_bios_from_vram(adev))
return true;
goto success;
if (amdgpu_read_bios(adev))
return true;
goto success;
if (amdgpu_read_bios_from_rom(adev))
return true;
goto success;
if (amdgpu_read_disabled_bios(adev))
return true;
goto success;
if (amdgpu_read_platform_bios(adev))
return true;
goto success;
DRM_ERROR("Unable to locate a BIOS ROM\n");
return false;
success:
adev->is_atom_fw = is_atom_fw(adev->bios);
return true;
}
......@@ -571,7 +571,9 @@ static const struct amdgpu_irq_src_funcs cgs_irq_funcs = {
.process = cgs_process_irq,
};
static int amdgpu_cgs_add_irq_source(struct cgs_device *cgs_device, unsigned src_id,
static int amdgpu_cgs_add_irq_source(void *cgs_device,
unsigned client_id,
unsigned src_id,
unsigned num_types,
cgs_irq_source_set_func_t set,
cgs_irq_handler_func_t handler,
......@@ -597,7 +599,7 @@ static int amdgpu_cgs_add_irq_source(struct cgs_device *cgs_device, unsigned src
irq_params->handler = handler;
irq_params->private_data = private_data;
source->data = (void *)irq_params;
ret = amdgpu_irq_add_id(adev, src_id, source);
ret = amdgpu_irq_add_id(adev, client_id, src_id, source);
if (ret) {
kfree(irq_params);
kfree(source);
......@@ -606,16 +608,26 @@ static int amdgpu_cgs_add_irq_source(struct cgs_device *cgs_device, unsigned src
return ret;
}
static int amdgpu_cgs_irq_get(struct cgs_device *cgs_device, unsigned src_id, unsigned type)
static int amdgpu_cgs_irq_get(void *cgs_device, unsigned client_id,
unsigned src_id, unsigned type)
{
CGS_FUNC_ADEV;
return amdgpu_irq_get(adev, adev->irq.sources[src_id], type);
if (!adev->irq.client[client_id].sources)
return -EINVAL;
return amdgpu_irq_get(adev, adev->irq.client[client_id].sources[src_id], type);
}
static int amdgpu_cgs_irq_put(struct cgs_device *cgs_device, unsigned src_id, unsigned type)
static int amdgpu_cgs_irq_put(void *cgs_device, unsigned client_id,
unsigned src_id, unsigned type)
{
CGS_FUNC_ADEV;
return amdgpu_irq_put(adev, adev->irq.sources[src_id], type);
if (!adev->irq.client[client_id].sources)
return -EINVAL;
return amdgpu_irq_put(adev, adev->irq.client[client_id].sources[src_id], type);
}
static int amdgpu_cgs_set_clockgating_state(struct cgs_device *cgs_device,
......@@ -825,9 +837,8 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
uint32_t ucode_start_address;
const uint8_t *src;
const struct smc_firmware_header_v1_0 *hdr;
if (CGS_UCODE_ID_SMU_SK == type)
amdgpu_cgs_rel_firmware(cgs_device, CGS_UCODE_ID_SMU);
const struct common_firmware_header *header;
struct amdgpu_firmware_info *ucode = NULL;
if (!adev->pm.fw) {
switch (adev->asic_type) {
......@@ -889,6 +900,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
case CHIP_POLARIS12:
strcpy(fw_name, "amdgpu/polaris12_smc.bin");
break;
case CHIP_VEGA10:
strcpy(fw_name, "amdgpu/vega10_smc.bin");
break;
default:
DRM_ERROR("SMC firmware not supported\n");
return -EINVAL;
......@@ -907,6 +921,15 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
adev->pm.fw = NULL;
return err;
}
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
ucode = &adev->firmware.ucode[AMDGPU_UCODE_ID_SMC];
ucode->ucode_id = AMDGPU_UCODE_ID_SMC;
ucode->fw = adev->pm.fw;
header = (const struct common_firmware_header *)ucode->fw->data;
adev->firmware.fw_size +=
ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
}
}
hdr = (const struct smc_firmware_header_v1_0 *) adev->pm.fw->data;
......
......@@ -82,6 +82,15 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
return -EINVAL;
}
break;
case AMDGPU_HW_IP_UVD_ENC:
if (ring < adev->uvd.num_enc_rings){
*out_ring = &adev->uvd.ring_enc[ring];
} else {
DRM_ERROR("only %d UVD ENC rings are supported\n",
adev->uvd.num_enc_rings);
return -EINVAL;
}
break;
}
if (!(*out_ring && (*out_ring)->adev)) {
......@@ -759,23 +768,33 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
amdgpu_bo_unref(&parser->uf_entry.robj);
}
static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
struct amdgpu_vm *vm)
static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
{
struct amdgpu_device *adev = p->adev;
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_bo_va *bo_va;
struct amdgpu_bo *bo;
int i, r;
r = amdgpu_vm_update_page_directory(adev, vm);
r = amdgpu_vm_update_directories(adev, vm);
if (r)
return r;
r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_dir_update);
if (r)
return r;
r = amdgpu_sync_fence(adev, &p->job->sync, vm->page_directory_fence);
r = amdgpu_vm_clear_freed(adev, vm, NULL);
if (r)
return r;
r = amdgpu_vm_clear_freed(adev, vm);
r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
if (r)
return r;
r = amdgpu_sync_fence(adev, &p->job->sync,
fpriv->prt_va->last_pt_update);
if (r)
return r;
......@@ -853,9 +872,9 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
}
if (p->job->vm) {
p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.bo);
r = amdgpu_bo_vm_update_pte(p, vm);
r = amdgpu_bo_vm_update_pte(p);
if (r)
return r;
}
......@@ -869,7 +888,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
int i, j;
int r;
int r, ce_preempt = 0, de_preempt = 0;
for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
struct amdgpu_cs_chunk *chunk;
......@@ -884,13 +903,26 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
continue;
if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && amdgpu_sriov_vf(adev)) {
if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
ce_preempt++;
else
de_preempt++;
}
/* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */
if (ce_preempt > 1 || de_preempt > 1)
return -EINVAL;
}
r = amdgpu_cs_get_ring(adev, chunk_ib->ip_type,
chunk_ib->ip_instance, chunk_ib->ring,
&ring);
if (r)
return r;
if (ib->flags & AMDGPU_IB_FLAG_PREAMBLE) {
if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) {
parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT;
if (!parser->ctx->preamble_presented) {
parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
......
......@@ -31,86 +31,88 @@
void amdgpu_dpm_print_class_info(u32 class, u32 class2)
{
printk("\tui class: ");
const char *s;
switch (class & ATOM_PPLIB_CLASSIFICATION_UI_MASK) {
case ATOM_PPLIB_CLASSIFICATION_UI_NONE:
default:
printk("none\n");
s = "none";
break;
case ATOM_PPLIB_CLASSIFICATION_UI_BATTERY:
printk("battery\n");
s = "battery";
break;
case ATOM_PPLIB_CLASSIFICATION_UI_BALANCED:
printk("balanced\n");
s = "balanced";
break;
case ATOM_PPLIB_CLASSIFICATION_UI_PERFORMANCE:
printk("performance\n");
s = "performance";
break;
}
printk("\tinternal class: ");
printk("\tui class: %s\n", s);
printk("\tinternal class:");
if (((class & ~ATOM_PPLIB_CLASSIFICATION_UI_MASK) == 0) &&
(class2 == 0))
printk("none");
pr_cont(" none");
else {
if (class & ATOM_PPLIB_CLASSIFICATION_BOOT)
printk("boot ");
pr_cont(" boot");
if (class & ATOM_PPLIB_CLASSIFICATION_THERMAL)
printk("thermal ");
pr_cont(" thermal");
if (class & ATOM_PPLIB_CLASSIFICATION_LIMITEDPOWERSOURCE)
printk("limited_pwr ");
pr_cont(" limited_pwr");
if (class & ATOM_PPLIB_CLASSIFICATION_REST)
printk("rest ");
pr_cont(" rest");
if (class & ATOM_PPLIB_CLASSIFICATION_FORCED)
printk("forced ");
pr_cont(" forced");
if (class & ATOM_PPLIB_CLASSIFICATION_3DPERFORMANCE)
printk("3d_perf ");
pr_cont(" 3d_perf");
if (class & ATOM_PPLIB_CLASSIFICATION_OVERDRIVETEMPLATE)
printk("ovrdrv ");
pr_cont(" ovrdrv");
if (class & ATOM_PPLIB_CLASSIFICATION_UVDSTATE)
printk("uvd ");
pr_cont(" uvd");
if (class & ATOM_PPLIB_CLASSIFICATION_3DLOW)
printk("3d_low ");
pr_cont(" 3d_low");
if (class & ATOM_PPLIB_CLASSIFICATION_ACPI)
printk("acpi ");
pr_cont(" acpi");
if (class & ATOM_PPLIB_CLASSIFICATION_HD2STATE)
printk("uvd_hd2 ");
pr_cont(" uvd_hd2");
if (class & ATOM_PPLIB_CLASSIFICATION_HDSTATE)
printk("uvd_hd ");
pr_cont(" uvd_hd");
if (class & ATOM_PPLIB_CLASSIFICATION_SDSTATE)
printk("uvd_sd ");
pr_cont(" uvd_sd");
if (class2 & ATOM_PPLIB_CLASSIFICATION2_LIMITEDPOWERSOURCE_2)
printk("limited_pwr2 ");
pr_cont(" limited_pwr2");
if (class2 & ATOM_PPLIB_CLASSIFICATION2_ULV)
printk("ulv ");
pr_cont(" ulv");
if (class2 & ATOM_PPLIB_CLASSIFICATION2_MVC)
printk("uvd_mvc ");
pr_cont(" uvd_mvc");
}
printk("\n");
pr_cont("\n");
}
void amdgpu_dpm_print_cap_info(u32 caps)
{
printk("\tcaps: ");
printk("\tcaps:");
if (caps & ATOM_PPLIB_SINGLE_DISPLAY_ONLY)
printk("single_disp ");
pr_cont(" single_disp");
if (caps & ATOM_PPLIB_SUPPORTS_VIDEO_PLAYBACK)
printk("video ");
pr_cont(" video");
if (caps & ATOM_PPLIB_DISALLOW_ON_DC)
printk("no_dc ");
printk("\n");
pr_cont(" no_dc");
pr_cont("\n");
}
void amdgpu_dpm_print_ps_status(struct amdgpu_device *adev,
struct amdgpu_ps *rps)
{
printk("\tstatus: ");
printk("\tstatus:");
if (rps == adev->pm.dpm.current_ps)
printk("c ");
pr_cont(" c");
if (rps == adev->pm.dpm.requested_ps)
printk("r ");
pr_cont(" r");
if (rps == adev->pm.dpm.boot_ps)
printk("b ");
printk("\n");
pr_cont(" b");
pr_cont("\n");
}
......
......@@ -270,8 +270,18 @@ struct amdgpu_dpm_funcs {
struct amdgpu_ps *cps,
struct amdgpu_ps *rps,
bool *equal);
int (*read_sensor)(struct amdgpu_device *adev, int idx, void *value,
int *size);
struct amd_vce_state* (*get_vce_clock_state)(struct amdgpu_device *adev, unsigned idx);
int (*reset_power_profile_state)(struct amdgpu_device *adev,
struct amd_pp_profile *request);
int (*get_power_profile_state)(struct amdgpu_device *adev,
struct amd_pp_profile *query);
int (*set_power_profile_state)(struct amdgpu_device *adev,
struct amd_pp_profile *request);
int (*switch_power_profile)(struct amdgpu_device *adev,
enum amd_pp_profile_type type);
};
#define amdgpu_dpm_pre_set_power_state(adev) (adev)->pm.funcs->pre_set_power_state((adev))
......@@ -282,10 +292,10 @@ struct amdgpu_dpm_funcs {
#define amdgpu_dpm_vblank_too_short(adev) (adev)->pm.funcs->vblank_too_short((adev))
#define amdgpu_dpm_enable_bapm(adev, e) (adev)->pm.funcs->enable_bapm((adev), (e))
#define amdgpu_dpm_read_sensor(adev, idx, value) \
#define amdgpu_dpm_read_sensor(adev, idx, value, size) \
((adev)->pp_enabled ? \
(adev)->powerplay.pp_funcs->read_sensor(adev->powerplay.pp_handle, (idx), (value)) : \
-EINVAL)
(adev)->powerplay.pp_funcs->read_sensor(adev->powerplay.pp_handle, (idx), (value), (size)) : \
(adev)->pm.funcs->read_sensor((adev), (idx), (value), (size)))
#define amdgpu_dpm_get_temperature(adev) \
((adev)->pp_enabled ? \
......@@ -388,6 +398,22 @@ struct amdgpu_dpm_funcs {
(adev)->powerplay.pp_funcs->get_performance_level((adev)->powerplay.pp_handle) : \
(adev)->pm.dpm.forced_level)
#define amdgpu_dpm_reset_power_profile_state(adev, request) \
((adev)->powerplay.pp_funcs->reset_power_profile_state(\
(adev)->powerplay.pp_handle, request))
#define amdgpu_dpm_get_power_profile_state(adev, query) \
((adev)->powerplay.pp_funcs->get_power_profile_state(\
(adev)->powerplay.pp_handle, query))
#define amdgpu_dpm_set_power_profile_state(adev, request) \
((adev)->powerplay.pp_funcs->set_power_profile_state(\
(adev)->powerplay.pp_handle, request))
#define amdgpu_dpm_switch_power_profile(adev, type) \
((adev)->powerplay.pp_funcs->switch_power_profile(\
(adev)->powerplay.pp_handle, type))
struct amdgpu_dpm {
struct amdgpu_ps *ps;
/* number of valid power states */
......
......@@ -60,9 +60,12 @@
* - 3.8.0 - Add support raster config init in the kernel
* - 3.9.0 - Add support for memory query info about VRAM and GTT.
* - 3.10.0 - Add support for new fences ioctl, new gem ioctl flags
* - 3.11.0 - Add support for sensor query info (clocks, temp, etc).
* - 3.12.0 - Add query for double offchip LDS buffers
* - 3.13.0 - Add PRT support
*/
#define KMS_DRIVER_MAJOR 3
#define KMS_DRIVER_MINOR 10
#define KMS_DRIVER_MINOR 13
#define KMS_DRIVER_PATCHLEVEL 0
int amdgpu_vram_limit = 0;
......@@ -77,7 +80,7 @@ int amdgpu_pcie_gen2 = -1;
int amdgpu_msi = -1;
int amdgpu_lockup_timeout = 0;
int amdgpu_dpm = -1;
int amdgpu_smc_load_fw = 1;
int amdgpu_fw_load_type = -1;
int amdgpu_aspm = -1;
int amdgpu_runtime_pm = -1;
unsigned amdgpu_ip_block_mask = 0xffffffff;
......@@ -100,6 +103,11 @@ unsigned amdgpu_pg_mask = 0xffffffff;
char *amdgpu_disable_cu = NULL;
char *amdgpu_virtual_display = NULL;
unsigned amdgpu_pp_feature_mask = 0xffffffff;
int amdgpu_ngg = 0;
int amdgpu_prim_buf_per_se = 0;
int amdgpu_pos_buf_per_se = 0;
int amdgpu_cntl_sb_buf_per_se = 0;
int amdgpu_param_buf_per_se = 0;
MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
......@@ -137,8 +145,8 @@ module_param_named(lockup_timeout, amdgpu_lockup_timeout, int, 0444);
MODULE_PARM_DESC(dpm, "DPM support (1 = enable, 0 = disable, -1 = auto)");
module_param_named(dpm, amdgpu_dpm, int, 0444);
MODULE_PARM_DESC(smc_load_fw, "SMC firmware loading(1 = enable, 0 = disable)");
module_param_named(smc_load_fw, amdgpu_smc_load_fw, int, 0444);
MODULE_PARM_DESC(fw_load_type, "firmware loading type (0 = direct, 1 = SMU, 2 = PSP, -1 = auto)");
module_param_named(fw_load_type, amdgpu_fw_load_type, int, 0444);
MODULE_PARM_DESC(aspm, "ASPM support (1 = enable, 0 = disable, -1 = auto)");
module_param_named(aspm, amdgpu_aspm, int, 0444);
......@@ -207,6 +215,22 @@ MODULE_PARM_DESC(virtual_display,
"Enable virtual display feature (the virtual_display will be set like xxxx:xx:xx.x,x;xxxx:xx:xx.x,x)");
module_param_named(virtual_display, amdgpu_virtual_display, charp, 0444);
MODULE_PARM_DESC(ngg, "Next Generation Graphics (1 = enable, 0 = disable(default depending on gfx))");
module_param_named(ngg, amdgpu_ngg, int, 0444);
MODULE_PARM_DESC(prim_buf_per_se, "the size of Primitive Buffer per Shader Engine (default depending on gfx)");
module_param_named(prim_buf_per_se, amdgpu_prim_buf_per_se, int, 0444);
MODULE_PARM_DESC(pos_buf_per_se, "the size of Position Buffer per Shader Engine (default depending on gfx)");
module_param_named(pos_buf_per_se, amdgpu_pos_buf_per_se, int, 0444);
MODULE_PARM_DESC(cntl_sb_buf_per_se, "the size of Control Sideband per Shader Engine (default depending on gfx)");
module_param_named(cntl_sb_buf_per_se, amdgpu_cntl_sb_buf_per_se, int, 0444);
MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Pramater Cache per Shader Engine (default depending on gfx)");
module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444);
static const struct pci_device_id pciidlist[] = {
#ifdef CONFIG_DRM_AMDGPU_SI
{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
......@@ -409,6 +433,7 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x67C2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
{0x1002, 0x67C4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
{0x1002, 0x67C7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
{0x1002, 0x67D0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
{0x1002, 0x67DF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
{0x1002, 0x67C8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
{0x1002, 0x67C9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
......@@ -423,7 +448,14 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x6987, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
{0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
{0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
/* Vega 10 */
{0x1002, 0x6860, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
{0x1002, 0x6861, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
{0x1002, 0x6862, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
{0x1002, 0x6863, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
{0x1002, 0x6867, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
{0x1002, 0x686c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
{0x1002, 0x687f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
{0, 0, 0}
};
......@@ -686,7 +718,6 @@ static struct drm_driver kms_driver = {
DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET,
.load = amdgpu_driver_load_kms,
.open = amdgpu_driver_open_kms,
.preclose = amdgpu_driver_preclose_kms,
.postclose = amdgpu_driver_postclose_kms,
.lastclose = amdgpu_driver_lastclose_kms,
.set_busid = drm_pci_set_busid,
......
......@@ -147,11 +147,11 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
ret = amdgpu_gem_object_create(adev, aligned_size, 0,
AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
AMDGPU_GEM_CREATE_VRAM_CLEARED,
true, &gobj);
if (ret) {
printk(KERN_ERR "failed to allocate framebuffer (%d)\n",
aligned_size);
pr_err("failed to allocate framebuffer (%d)\n", aligned_size);
return -ENOMEM;
}
abo = gem_to_amdgpu_bo(gobj);
......@@ -241,8 +241,6 @@ static int amdgpufb_create(struct drm_fb_helper *helper,
/* setup helper */
rfbdev->helper.fb = fb;
memset_io(abo->kptr, 0x0, amdgpu_bo_size(abo));
strcpy(info->fix.id, "amdgpudrmfb");
drm_fb_helper_fill_fix(info, fb->pitches[0], fb->format->depth);
......
......@@ -229,7 +229,8 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
unsigned p;
int i, j;
u64 page_base;
uint32_t flags = AMDGPU_PTE_SYSTEM;
/* Starting from VEGA10, system bit must be 0 to mean invalid. */
uint64_t flags = 0;
if (!adev->gart.ready) {
WARN(1, "trying to unbind memory from uninitialized GART !\n");
......@@ -271,7 +272,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
*/
int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
int pages, struct page **pagelist, dma_addr_t *dma_addr,
uint32_t flags)
uint64_t flags)
{
unsigned t;
unsigned p;
......
......@@ -152,6 +152,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
struct ttm_validate_buffer tv;
struct ww_acquire_ctx ticket;
struct amdgpu_bo_va *bo_va;
struct dma_fence *fence = NULL;
int r;
INIT_LIST_HEAD(&list);
......@@ -173,6 +174,17 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
if (bo_va) {
if (--bo_va->ref_count == 0) {
amdgpu_vm_bo_rmv(adev, bo_va);
r = amdgpu_vm_clear_freed(adev, vm, &fence);
if (unlikely(r)) {
dev_err(adev->dev, "failed to clear page "
"tables on GEM object close (%d)\n", r);
}
if (fence) {
amdgpu_bo_fence(bo, fence, true);
dma_fence_put(fence);
}
}
}
ttm_eu_backoff_reservation(&ticket, &list);
......@@ -507,14 +519,16 @@ static int amdgpu_gem_va_check(void *param, struct amdgpu_bo *bo)
* amdgpu_gem_va_update_vm -update the bo_va in its VM
*
* @adev: amdgpu_device pointer
* @vm: vm to update
* @bo_va: bo_va to update
* @list: validation list
* @operation: map or unmap
* @operation: map, unmap or clear
*
* Update the bo_va directly after setting its address. Errors are not
* vital here, so they are not reported back to userspace.
*/
static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct amdgpu_bo_va *bo_va,
struct list_head *list,
uint32_t operation)
......@@ -529,20 +543,21 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
goto error;
}
r = amdgpu_vm_validate_pt_bos(adev, bo_va->vm, amdgpu_gem_va_check,
r = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_gem_va_check,
NULL);
if (r)
goto error;
r = amdgpu_vm_update_page_directory(adev, bo_va->vm);
r = amdgpu_vm_update_directories(adev, vm);
if (r)
goto error;
r = amdgpu_vm_clear_freed(adev, bo_va->vm);
r = amdgpu_vm_clear_freed(adev, vm, NULL);
if (r)
goto error;
if (operation == AMDGPU_VA_OP_MAP)
if (operation == AMDGPU_VA_OP_MAP ||
operation == AMDGPU_VA_OP_REPLACE)
r = amdgpu_vm_bo_update(adev, bo_va, false);
error:
......@@ -553,6 +568,12 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
const uint32_t valid_flags = AMDGPU_VM_DELAY_UPDATE |
AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE |
AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_MASK;
const uint32_t prt_flags = AMDGPU_VM_DELAY_UPDATE |
AMDGPU_VM_PAGE_PRT;
struct drm_amdgpu_gem_va *args = data;
struct drm_gem_object *gobj;
struct amdgpu_device *adev = dev->dev_private;
......@@ -563,7 +584,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
struct ttm_validate_buffer tv;
struct ww_acquire_ctx ticket;
struct list_head list;
uint32_t invalid_flags, va_flags = 0;
uint64_t va_flags;
int r = 0;
if (!adev->vm_manager.enabled)
......@@ -577,17 +598,17 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
}
invalid_flags = ~(AMDGPU_VM_DELAY_UPDATE | AMDGPU_VM_PAGE_READABLE |
AMDGPU_VM_PAGE_WRITEABLE | AMDGPU_VM_PAGE_EXECUTABLE);
if ((args->flags & invalid_flags)) {
dev_err(&dev->pdev->dev, "invalid flags 0x%08X vs 0x%08X\n",
args->flags, invalid_flags);
if ((args->flags & ~valid_flags) && (args->flags & ~prt_flags)) {
dev_err(&dev->pdev->dev, "invalid flags combination 0x%08X\n",
args->flags);
return -EINVAL;
}
switch (args->operation) {
case AMDGPU_VA_OP_MAP:
case AMDGPU_VA_OP_UNMAP:
case AMDGPU_VA_OP_CLEAR:
case AMDGPU_VA_OP_REPLACE:
break;
default:
dev_err(&dev->pdev->dev, "unsupported operation %d\n",
......@@ -595,38 +616,47 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
}
gobj = drm_gem_object_lookup(filp, args->handle);
if (gobj == NULL)
return -ENOENT;
abo = gem_to_amdgpu_bo(gobj);
INIT_LIST_HEAD(&list);
tv.bo = &abo->tbo;
tv.shared = false;
list_add(&tv.head, &list);
if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
!(args->flags & AMDGPU_VM_PAGE_PRT)) {
gobj = drm_gem_object_lookup(filp, args->handle);
if (gobj == NULL)
return -ENOENT;
abo = gem_to_amdgpu_bo(gobj);
tv.bo = &abo->tbo;
tv.shared = false;
list_add(&tv.head, &list);
} else {
gobj = NULL;
abo = NULL;
}
amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
if (r) {
drm_gem_object_unreference_unlocked(gobj);
return r;
}
if (r)
goto error_unref;
bo_va = amdgpu_vm_bo_find(&fpriv->vm, abo);
if (!bo_va) {
ttm_eu_backoff_reservation(&ticket, &list);
drm_gem_object_unreference_unlocked(gobj);
return -ENOENT;
if (abo) {
bo_va = amdgpu_vm_bo_find(&fpriv->vm, abo);
if (!bo_va) {
r = -ENOENT;
goto error_backoff;
}
} else if (args->operation != AMDGPU_VA_OP_CLEAR) {
bo_va = fpriv->prt_va;
} else {
bo_va = NULL;
}
switch (args->operation) {
case AMDGPU_VA_OP_MAP:
if (args->flags & AMDGPU_VM_PAGE_READABLE)
va_flags |= AMDGPU_PTE_READABLE;
if (args->flags & AMDGPU_VM_PAGE_WRITEABLE)
va_flags |= AMDGPU_PTE_WRITEABLE;
if (args->flags & AMDGPU_VM_PAGE_EXECUTABLE)
va_flags |= AMDGPU_PTE_EXECUTABLE;
r = amdgpu_vm_alloc_pts(adev, bo_va->vm, args->va_address,
args->map_size);
if (r)
goto error_backoff;
va_flags = amdgpu_vm_get_pte_flags(adev, args->flags);
r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
args->offset_in_bo, args->map_size,
va_flags);
......@@ -634,14 +664,34 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
case AMDGPU_VA_OP_UNMAP:
r = amdgpu_vm_bo_unmap(adev, bo_va, args->va_address);
break;
case AMDGPU_VA_OP_CLEAR:
r = amdgpu_vm_bo_clear_mappings(adev, &fpriv->vm,
args->va_address,
args->map_size);
break;
case AMDGPU_VA_OP_REPLACE:
r = amdgpu_vm_alloc_pts(adev, bo_va->vm, args->va_address,
args->map_size);
if (r)
goto error_backoff;
va_flags = amdgpu_vm_get_pte_flags(adev, args->flags);
r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address,
args->offset_in_bo, args->map_size,
va_flags);
break;
default:
break;
}
if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) &&
!amdgpu_vm_debug)
amdgpu_gem_va_update_vm(adev, bo_va, &list, args->operation);
if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !amdgpu_vm_debug)
amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va, &list,
args->operation);
error_backoff:
ttm_eu_backoff_reservation(&ticket, &list);
error_unref:
drm_gem_object_unreference_unlocked(gobj);
return r;
}
......
......@@ -161,9 +161,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
return r;
}
if (ring->funcs->init_cond_exec)
patch_offset = amdgpu_ring_init_cond_exec(ring);
if (vm) {
r = amdgpu_vm_flush(ring, job);
if (r) {
......@@ -172,7 +169,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
}
}
if (ring->funcs->emit_hdp_flush)
if (ring->funcs->init_cond_exec)
patch_offset = amdgpu_ring_init_cond_exec(ring);
if (ring->funcs->emit_hdp_flush
#ifdef CONFIG_X86_64
&& !(adev->flags & AMD_IS_APU)
#endif
)
amdgpu_ring_emit_hdp_flush(ring);
skip_preamble = ring->current_ctx == fence_ctx;
......@@ -202,7 +206,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
need_ctx_switch = false;
}
if (ring->funcs->emit_hdp_invalidate)
if (ring->funcs->emit_hdp_invalidate
#ifdef CONFIG_X86_64
&& !(adev->flags & AMD_IS_APU)
#endif
)
amdgpu_ring_emit_hdp_invalidate(ring);
r = amdgpu_fence_emit(ring, f);
......@@ -214,6 +222,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
return r;
}
if (ring->funcs->insert_end)
ring->funcs->insert_end(ring);
/* wrap the last IB with fence */
if (job && job->uf_addr) {
amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence,
......
......@@ -25,6 +25,48 @@
#define __AMDGPU_IH_H__
struct amdgpu_device;
/*
* vega10+ IH clients
*/
enum amdgpu_ih_clientid
{
AMDGPU_IH_CLIENTID_IH = 0x00,
AMDGPU_IH_CLIENTID_ACP = 0x01,
AMDGPU_IH_CLIENTID_ATHUB = 0x02,
AMDGPU_IH_CLIENTID_BIF = 0x03,
AMDGPU_IH_CLIENTID_DCE = 0x04,
AMDGPU_IH_CLIENTID_ISP = 0x05,
AMDGPU_IH_CLIENTID_PCIE0 = 0x06,
AMDGPU_IH_CLIENTID_RLC = 0x07,
AMDGPU_IH_CLIENTID_SDMA0 = 0x08,
AMDGPU_IH_CLIENTID_SDMA1 = 0x09,
AMDGPU_IH_CLIENTID_SE0SH = 0x0a,
AMDGPU_IH_CLIENTID_SE1SH = 0x0b,
AMDGPU_IH_CLIENTID_SE2SH = 0x0c,
AMDGPU_IH_CLIENTID_SE3SH = 0x0d,
AMDGPU_IH_CLIENTID_SYSHUB = 0x0e,
AMDGPU_IH_CLIENTID_THM = 0x0f,
AMDGPU_IH_CLIENTID_UVD = 0x10,
AMDGPU_IH_CLIENTID_VCE0 = 0x11,
AMDGPU_IH_CLIENTID_VMC = 0x12,
AMDGPU_IH_CLIENTID_XDMA = 0x13,
AMDGPU_IH_CLIENTID_GRBM_CP = 0x14,
AMDGPU_IH_CLIENTID_ATS = 0x15,
AMDGPU_IH_CLIENTID_ROM_SMUIO = 0x16,
AMDGPU_IH_CLIENTID_DF = 0x17,
AMDGPU_IH_CLIENTID_VCE1 = 0x18,
AMDGPU_IH_CLIENTID_PWR = 0x19,
AMDGPU_IH_CLIENTID_UTCL2 = 0x1b,
AMDGPU_IH_CLIENTID_EA = 0x1c,
AMDGPU_IH_CLIENTID_UTCL2LOG = 0x1d,
AMDGPU_IH_CLIENTID_MP0 = 0x1e,
AMDGPU_IH_CLIENTID_MP1 = 0x1f,
AMDGPU_IH_CLIENTID_MAX
};
#define AMDGPU_IH_CLIENTID_LEGACY 0
/*
* R6xx+ IH ring
......@@ -46,12 +88,19 @@ struct amdgpu_ih_ring {
dma_addr_t rb_dma_addr; /* only used when use_bus_addr = true */
};
#define AMDGPU_IH_SRC_DATA_MAX_SIZE_DW 4
struct amdgpu_iv_entry {
unsigned client_id;
unsigned src_id;
unsigned src_data;
unsigned ring_id;
unsigned vm_id;
unsigned vm_id_src;
uint64_t timestamp;
unsigned timestamp_src;
unsigned pas_id;
unsigned pasid_src;
unsigned src_data[AMDGPU_IH_SRC_DATA_MAX_SIZE_DW];
const uint32_t *iv_entry;
};
......
......@@ -33,6 +33,7 @@
#include "amdgpu_ih.h"
#include "atom.h"
#include "amdgpu_connectors.h"
#include "amdgpu_trace.h"
#include <linux/pm_runtime.h>
......@@ -89,23 +90,28 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work)
static void amdgpu_irq_disable_all(struct amdgpu_device *adev)
{
unsigned long irqflags;
unsigned i, j;
unsigned i, j, k;
int r;
spin_lock_irqsave(&adev->irq.lock, irqflags);
for (i = 0; i < AMDGPU_MAX_IRQ_SRC_ID; ++i) {
struct amdgpu_irq_src *src = adev->irq.sources[i];
if (!src || !src->funcs->set || !src->num_types)
for (i = 0; i < AMDGPU_IH_CLIENTID_MAX; ++i) {
if (!adev->irq.client[i].sources)
continue;
for (j = 0; j < src->num_types; ++j) {
atomic_set(&src->enabled_types[j], 0);
r = src->funcs->set(adev, src, j,
AMDGPU_IRQ_STATE_DISABLE);
if (r)
DRM_ERROR("error disabling interrupt (%d)\n",
r);
for (j = 0; j < AMDGPU_MAX_IRQ_SRC_ID; ++j) {
struct amdgpu_irq_src *src = adev->irq.client[i].sources[j];
if (!src || !src->funcs->set || !src->num_types)
continue;
for (k = 0; k < src->num_types; ++k) {
atomic_set(&src->enabled_types[k], 0);
r = src->funcs->set(adev, src, k,
AMDGPU_IRQ_STATE_DISABLE);
if (r)
DRM_ERROR("error disabling interrupt (%d)\n",
r);
}
}
}
spin_unlock_irqrestore(&adev->irq.lock, irqflags);
......@@ -254,7 +260,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
*/
void amdgpu_irq_fini(struct amdgpu_device *adev)
{
unsigned i;
unsigned i, j;
drm_vblank_cleanup(adev->ddev);
if (adev->irq.installed) {
......@@ -266,19 +272,25 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
cancel_work_sync(&adev->reset_work);
}
for (i = 0; i < AMDGPU_MAX_IRQ_SRC_ID; ++i) {
struct amdgpu_irq_src *src = adev->irq.sources[i];
if (!src)
for (i = 0; i < AMDGPU_IH_CLIENTID_MAX; ++i) {
if (!adev->irq.client[i].sources)
continue;
kfree(src->enabled_types);
src->enabled_types = NULL;
if (src->data) {
kfree(src->data);
kfree(src);
adev->irq.sources[i] = NULL;
for (j = 0; j < AMDGPU_MAX_IRQ_SRC_ID; ++j) {
struct amdgpu_irq_src *src = adev->irq.client[i].sources[j];
if (!src)
continue;
kfree(src->enabled_types);
src->enabled_types = NULL;
if (src->data) {
kfree(src->data);
kfree(src);
adev->irq.client[i].sources[j] = NULL;
}
}
kfree(adev->irq.client[i].sources);
}
}
......@@ -290,18 +302,30 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
* @source: irq source
*
*/
int amdgpu_irq_add_id(struct amdgpu_device *adev, unsigned src_id,
int amdgpu_irq_add_id(struct amdgpu_device *adev,
unsigned client_id, unsigned src_id,
struct amdgpu_irq_src *source)
{
if (src_id >= AMDGPU_MAX_IRQ_SRC_ID)
if (client_id >= AMDGPU_IH_CLIENTID_MAX)
return -EINVAL;
if (adev->irq.sources[src_id] != NULL)
if (src_id >= AMDGPU_MAX_IRQ_SRC_ID)
return -EINVAL;
if (!source->funcs)
return -EINVAL;
if (!adev->irq.client[client_id].sources) {
adev->irq.client[client_id].sources = kcalloc(AMDGPU_MAX_IRQ_SRC_ID,
sizeof(struct amdgpu_irq_src),
GFP_KERNEL);
if (!adev->irq.client[client_id].sources)
return -ENOMEM;
}
if (adev->irq.client[client_id].sources[src_id] != NULL)
return -EINVAL;
if (source->num_types && !source->enabled_types) {
atomic_t *types;
......@@ -313,8 +337,7 @@ int amdgpu_irq_add_id(struct amdgpu_device *adev, unsigned src_id,
source->enabled_types = types;
}
adev->irq.sources[src_id] = source;
adev->irq.client[client_id].sources[src_id] = source;
return 0;
}
......@@ -329,10 +352,18 @@ int amdgpu_irq_add_id(struct amdgpu_device *adev, unsigned src_id,
void amdgpu_irq_dispatch(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
unsigned client_id = entry->client_id;
unsigned src_id = entry->src_id;
struct amdgpu_irq_src *src;
int r;
trace_amdgpu_iv(entry);
if (client_id >= AMDGPU_IH_CLIENTID_MAX) {
DRM_DEBUG("Invalid client_id in IV: %d\n", client_id);
return;
}
if (src_id >= AMDGPU_MAX_IRQ_SRC_ID) {
DRM_DEBUG("Invalid src_id in IV: %d\n", src_id);
return;
......@@ -341,7 +372,13 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,
if (adev->irq.virq[src_id]) {
generic_handle_irq(irq_find_mapping(adev->irq.domain, src_id));
} else {
src = adev->irq.sources[src_id];
if (!adev->irq.client[client_id].sources) {
DRM_DEBUG("Unregistered interrupt client_id: %d src_id: %d\n",
client_id, src_id);
return;
}
src = adev->irq.client[client_id].sources[src_id];
if (!src) {
DRM_DEBUG("Unhandled interrupt src_id: %d\n", src_id);
return;
......@@ -385,13 +422,20 @@ int amdgpu_irq_update(struct amdgpu_device *adev,
void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev)
{
int i, j;
for (i = 0; i < AMDGPU_MAX_IRQ_SRC_ID; i++) {
struct amdgpu_irq_src *src = adev->irq.sources[i];
if (!src)
int i, j, k;
for (i = 0; i < AMDGPU_IH_CLIENTID_MAX; ++i) {
if (!adev->irq.client[i].sources)
continue;
for (j = 0; j < src->num_types; j++)
amdgpu_irq_update(adev, src, j);
for (j = 0; j < AMDGPU_MAX_IRQ_SRC_ID; ++j) {
struct amdgpu_irq_src *src = adev->irq.client[i].sources[j];
if (!src)
continue;
for (k = 0; k < src->num_types; k++)
amdgpu_irq_update(adev, src, k);
}
}
}
......
......@@ -28,6 +28,7 @@
#include "amdgpu_ih.h"
#define AMDGPU_MAX_IRQ_SRC_ID 0x100
#define AMDGPU_MAX_IRQ_CLIENT_ID 0x100
struct amdgpu_device;
struct amdgpu_iv_entry;
......@@ -44,6 +45,10 @@ struct amdgpu_irq_src {
void *data;
};
struct amdgpu_irq_client {
struct amdgpu_irq_src **sources;
};
/* provided by interrupt generating IP blocks */
struct amdgpu_irq_src_funcs {
int (*set)(struct amdgpu_device *adev, struct amdgpu_irq_src *source,
......@@ -58,7 +63,7 @@ struct amdgpu_irq {
bool installed;
spinlock_t lock;
/* interrupt sources */
struct amdgpu_irq_src *sources[AMDGPU_MAX_IRQ_SRC_ID];
struct amdgpu_irq_client client[AMDGPU_IH_CLIENTID_MAX];
/* status, etc. */
bool msi_enabled; /* msi enabled */
......@@ -80,7 +85,8 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg);
int amdgpu_irq_init(struct amdgpu_device *adev);
void amdgpu_irq_fini(struct amdgpu_device *adev);
int amdgpu_irq_add_id(struct amdgpu_device *adev, unsigned src_id,
int amdgpu_irq_add_id(struct amdgpu_device *adev,
unsigned client_id, unsigned src_id,
struct amdgpu_irq_src *source);
void amdgpu_irq_dispatch(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry);
......
......@@ -208,6 +208,14 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
fw_info->ver = adev->sdma.instance[query_fw->index].fw_version;
fw_info->feature = adev->sdma.instance[query_fw->index].feature_version;
break;
case AMDGPU_INFO_FW_SOS:
fw_info->ver = adev->psp.sos_fw_version;
fw_info->feature = adev->psp.sos_feature_version;
break;
case AMDGPU_INFO_FW_ASD:
fw_info->ver = adev->psp.asd_fw_version;
fw_info->feature = adev->psp.asd_feature_version;
break;
default:
return -EINVAL;
}
......@@ -240,6 +248,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
uint32_t ui32 = 0;
uint64_t ui64 = 0;
int i, found;
int ui32_size = sizeof(ui32);
if (!info->return_size || !info->return_pointer)
return -EINVAL;
......@@ -308,6 +317,13 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
ib_size_alignment = 1;
break;
case AMDGPU_HW_IP_UVD_ENC:
type = AMD_IP_BLOCK_TYPE_UVD;
for (i = 0; i < adev->uvd.num_enc_rings; i++)
ring_mask |= ((adev->uvd.ring_enc[i].ready ? 1 : 0) << i);
ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
ib_size_alignment = 1;
break;
default:
return -EINVAL;
}
......@@ -347,6 +363,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
case AMDGPU_HW_IP_VCE:
type = AMD_IP_BLOCK_TYPE_VCE;
break;
case AMDGPU_HW_IP_UVD_ENC:
type = AMD_IP_BLOCK_TYPE_UVD;
break;
default:
return -EINVAL;
}
......@@ -527,6 +546,15 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
dev_info.vram_type = adev->mc.vram_type;
dev_info.vram_bit_width = adev->mc.vram_width;
dev_info.vce_harvest_config = adev->vce.harvest_config;
dev_info.gc_double_offchip_lds_buf =
adev->gfx.config.double_offchip_lds_buf;
if (amdgpu_ngg) {
dev_info.prim_buf_gpu_addr = adev->gfx.ngg.buf[PRIM].gpu_addr;
dev_info.pos_buf_gpu_addr = adev->gfx.ngg.buf[POS].gpu_addr;
dev_info.cntl_sb_buf_gpu_addr = adev->gfx.ngg.buf[CNTL].gpu_addr;
dev_info.param_buf_gpu_addr = adev->gfx.ngg.buf[PARAM].gpu_addr;
}
return copy_to_user(out, &dev_info,
min((size_t)size, sizeof(dev_info))) ? -EFAULT : 0;
......@@ -596,6 +624,80 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
return -EINVAL;
}
}
case AMDGPU_INFO_SENSOR: {
struct pp_gpu_power query = {0};
int query_size = sizeof(query);
if (amdgpu_dpm == 0)
return -ENOENT;
switch (info->sensor_info.type) {
case AMDGPU_INFO_SENSOR_GFX_SCLK:
/* get sclk in Mhz */
if (amdgpu_dpm_read_sensor(adev,
AMDGPU_PP_SENSOR_GFX_SCLK,
(void *)&ui32, &ui32_size)) {
return -EINVAL;
}
ui32 /= 100;
break;
case AMDGPU_INFO_SENSOR_GFX_MCLK:
/* get mclk in Mhz */
if (amdgpu_dpm_read_sensor(adev,
AMDGPU_PP_SENSOR_GFX_MCLK,
(void *)&ui32, &ui32_size)) {
return -EINVAL;
}
ui32 /= 100;
break;
case AMDGPU_INFO_SENSOR_GPU_TEMP:
/* get temperature in millidegrees C */
if (amdgpu_dpm_read_sensor(adev,
AMDGPU_PP_SENSOR_GPU_TEMP,
(void *)&ui32, &ui32_size)) {
return -EINVAL;
}
break;
case AMDGPU_INFO_SENSOR_GPU_LOAD:
/* get GPU load */
if (amdgpu_dpm_read_sensor(adev,
AMDGPU_PP_SENSOR_GPU_LOAD,
(void *)&ui32, &ui32_size)) {
return -EINVAL;
}
break;
case AMDGPU_INFO_SENSOR_GPU_AVG_POWER:
/* get average GPU power */
if (amdgpu_dpm_read_sensor(adev,
AMDGPU_PP_SENSOR_GPU_POWER,
(void *)&query, &query_size)) {
return -EINVAL;
}
ui32 = query.average_gpu_power >> 8;
break;
case AMDGPU_INFO_SENSOR_VDDNB:
/* get VDDNB in millivolts */
if (amdgpu_dpm_read_sensor(adev,
AMDGPU_PP_SENSOR_VDDNB,
(void *)&ui32, &ui32_size)) {
return -EINVAL;
}
break;
case AMDGPU_INFO_SENSOR_VDDGFX:
/* get VDDGFX in millivolts */
if (amdgpu_dpm_read_sensor(adev,
AMDGPU_PP_SENSOR_VDDGFX,
(void *)&ui32, &ui32_size)) {
return -EINVAL;
}
break;
default:
DRM_DEBUG_KMS("Invalid request %d\n",
info->sensor_info.type);
return -EINVAL;
}
return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0;
}
default:
DRM_DEBUG_KMS("Invalid request %d\n", info->query);
return -EINVAL;
......@@ -655,6 +757,14 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
goto out_suspend;
}
fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL);
if (!fpriv->prt_va) {
r = -ENOMEM;
amdgpu_vm_fini(adev, &fpriv->vm);
kfree(fpriv);
goto out_suspend;
}
if (amdgpu_sriov_vf(adev)) {
r = amdgpu_map_static_csa(adev, &fpriv->vm);
if (r)
......@@ -694,11 +804,15 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
if (!fpriv)
return;
pm_runtime_get_sync(dev->dev);
amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
amdgpu_uvd_free_handles(adev, file_priv);
amdgpu_vce_free_handles(adev, file_priv);
amdgpu_vm_bo_rmv(adev, fpriv->prt_va);
if (amdgpu_sriov_vf(adev)) {
/* TODO: how to handle reserve failure */
BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, false));
......@@ -722,21 +836,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
pm_runtime_put_autosuspend(dev->dev);
}
/**
* amdgpu_driver_preclose_kms - drm callback for pre close
*
* @dev: drm dev pointer
* @file_priv: drm file
*
* On device pre close, tear down hyperz and cmask filps on r1xx-r5xx
* (all asics).
*/
void amdgpu_driver_preclose_kms(struct drm_device *dev,
struct drm_file *file_priv)
{
pm_runtime_get_sync(dev->dev);
}
/*
* VBlank related functions.
*/
......@@ -989,6 +1088,23 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
fw_info.feature, fw_info.ver);
}
/* PSP SOS */
query_fw.fw_type = AMDGPU_INFO_FW_SOS;
ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
if (ret)
return ret;
seq_printf(m, "SOS feature version: %u, firmware version: 0x%08x\n",
fw_info.feature, fw_info.ver);
/* PSP ASD */
query_fw.fw_type = AMDGPU_INFO_FW_ASD;
ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
if (ret)
return ret;
seq_printf(m, "ASD feature version: %u, firmware version: 0x%08x\n",
fw_info.feature, fw_info.ver);
/* SMC */
query_fw.fw_type = AMDGPU_INFO_FW_SMC;
ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
......
......@@ -395,32 +395,18 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
amdgpu_fill_placement_to_bo(bo, placement);
/* Kernel allocation are uninterruptible */
if (!resv) {
bool locked;
reservation_object_init(&bo->tbo.ttm_resv);
locked = ww_mutex_trylock(&bo->tbo.ttm_resv.lock);
WARN_ON(!locked);
}
initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
r = ttm_bo_init(&adev->mman.bdev, &bo->tbo, size, type,
&bo->placement, page_align, !kernel, NULL,
acc_size, sg, resv ? resv : &bo->tbo.ttm_resv,
&amdgpu_ttm_bo_destroy);
r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type,
&bo->placement, page_align, !kernel, NULL,
acc_size, sg, resv, &amdgpu_ttm_bo_destroy);
amdgpu_cs_report_moved_bytes(adev,
atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved);
if (unlikely(r != 0)) {
if (!resv)
ww_mutex_unlock(&bo->tbo.resv->lock);
if (unlikely(r != 0))
return r;
}
bo->tbo.priority = ilog2(bo->tbo.num_pages);
if (kernel)
bo->tbo.priority *= 2;
bo->tbo.priority = min(bo->tbo.priority, (unsigned)(TTM_MAX_BO_PRIORITY - 1));
bo->tbo.priority = 1;
if (flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) {
......@@ -436,7 +422,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
dma_fence_put(fence);
}
if (!resv)
ww_mutex_unlock(&bo->tbo.resv->lock);
amdgpu_bo_unreserve(bo);
*bo_ptr = bo;
trace_amdgpu_bo_create(bo);
......@@ -827,7 +813,10 @@ int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags)
{
if (AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT) > 6)
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
if (adev->family <= AMDGPU_FAMILY_CZ &&
AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT) > 6)
return -EINVAL;
bo->tiling_flags = tiling_flags;
......
......@@ -43,16 +43,22 @@ static const struct cg_flag_name clocks[] = {
{AMD_CG_SUPPORT_GFX_CGTS_LS, "Graphics Coarse Grain Tree Shader Light Sleep"},
{AMD_CG_SUPPORT_GFX_CP_LS, "Graphics Command Processor Light Sleep"},
{AMD_CG_SUPPORT_GFX_RLC_LS, "Graphics Run List Controller Light Sleep"},
{AMD_CG_SUPPORT_GFX_3D_CGCG, "Graphics 3D Coarse Grain Clock Gating"},
{AMD_CG_SUPPORT_GFX_3D_CGLS, "Graphics 3D Coarse Grain memory Light Sleep"},
{AMD_CG_SUPPORT_MC_LS, "Memory Controller Light Sleep"},
{AMD_CG_SUPPORT_MC_MGCG, "Memory Controller Medium Grain Clock Gating"},
{AMD_CG_SUPPORT_SDMA_LS, "System Direct Memory Access Light Sleep"},
{AMD_CG_SUPPORT_SDMA_MGCG, "System Direct Memory Access Medium Grain Clock Gating"},
{AMD_CG_SUPPORT_BIF_MGCG, "Bus Interface Medium Grain Clock Gating"},
{AMD_CG_SUPPORT_BIF_LS, "Bus Interface Light Sleep"},
{AMD_CG_SUPPORT_UVD_MGCG, "Unified Video Decoder Medium Grain Clock Gating"},
{AMD_CG_SUPPORT_VCE_MGCG, "Video Compression Engine Medium Grain Clock Gating"},
{AMD_CG_SUPPORT_HDP_LS, "Host Data Path Light Sleep"},
{AMD_CG_SUPPORT_HDP_MGCG, "Host Data Path Medium Grain Clock Gating"},
{AMD_CG_SUPPORT_DRM_MGCG, "Digital Right Management Medium Grain Clock Gating"},
{AMD_CG_SUPPORT_DRM_LS, "Digital Right Management Light Sleep"},
{AMD_CG_SUPPORT_ROM_MGCG, "Rom Medium Grain Clock Gating"},
{AMD_CG_SUPPORT_DF_MGCG, "Data Fabric Medium Grain Clock Gating"},
{0, NULL},
};
......@@ -610,6 +616,174 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
return count;
}
static ssize_t amdgpu_get_pp_power_profile(struct device *dev,
char *buf, struct amd_pp_profile *query)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
int ret = 0;
if (adev->pp_enabled)
ret = amdgpu_dpm_get_power_profile_state(
adev, query);
else if (adev->pm.funcs->get_power_profile_state)
ret = adev->pm.funcs->get_power_profile_state(
adev, query);
if (ret)
return ret;
return snprintf(buf, PAGE_SIZE,
"%d %d %d %d %d\n",
query->min_sclk / 100,
query->min_mclk / 100,
query->activity_threshold,
query->up_hyst,
query->down_hyst);
}
static ssize_t amdgpu_get_pp_gfx_power_profile(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct amd_pp_profile query = {0};
query.type = AMD_PP_GFX_PROFILE;
return amdgpu_get_pp_power_profile(dev, buf, &query);
}
static ssize_t amdgpu_get_pp_compute_power_profile(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct amd_pp_profile query = {0};
query.type = AMD_PP_COMPUTE_PROFILE;
return amdgpu_get_pp_power_profile(dev, buf, &query);
}
static ssize_t amdgpu_set_pp_power_profile(struct device *dev,
const char *buf,
size_t count,
struct amd_pp_profile *request)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
uint32_t loop = 0;
char *sub_str, buf_cpy[128], *tmp_str;
const char delimiter[3] = {' ', '\n', '\0'};
long int value;
int ret = 0;
if (strncmp("reset", buf, strlen("reset")) == 0) {
if (adev->pp_enabled)
ret = amdgpu_dpm_reset_power_profile_state(
adev, request);
else if (adev->pm.funcs->reset_power_profile_state)
ret = adev->pm.funcs->reset_power_profile_state(
adev, request);
if (ret) {
count = -EINVAL;
goto fail;
}
return count;
}
if (strncmp("set", buf, strlen("set")) == 0) {
if (adev->pp_enabled)
ret = amdgpu_dpm_set_power_profile_state(
adev, request);
else if (adev->pm.funcs->set_power_profile_state)
ret = adev->pm.funcs->set_power_profile_state(
adev, request);
if (ret) {
count = -EINVAL;
goto fail;
}
return count;
}
if (count + 1 >= 128) {
count = -EINVAL;
goto fail;
}
memcpy(buf_cpy, buf, count + 1);
tmp_str = buf_cpy;
while (tmp_str[0]) {
sub_str = strsep(&tmp_str, delimiter);
ret = kstrtol(sub_str, 0, &value);
if (ret) {
count = -EINVAL;
goto fail;
}
switch (loop) {
case 0:
/* input unit MHz convert to dpm table unit 10KHz*/
request->min_sclk = (uint32_t)value * 100;
break;
case 1:
/* input unit MHz convert to dpm table unit 10KHz*/
request->min_mclk = (uint32_t)value * 100;
break;
case 2:
request->activity_threshold = (uint16_t)value;
break;
case 3:
request->up_hyst = (uint8_t)value;
break;
case 4:
request->down_hyst = (uint8_t)value;
break;
default:
break;
}
loop++;
}
if (adev->pp_enabled)
ret = amdgpu_dpm_set_power_profile_state(
adev, request);
else if (adev->pm.funcs->set_power_profile_state)
ret = adev->pm.funcs->set_power_profile_state(
adev, request);
if (ret)
count = -EINVAL;
fail:
return count;
}
static ssize_t amdgpu_set_pp_gfx_power_profile(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count)
{
struct amd_pp_profile request = {0};
request.type = AMD_PP_GFX_PROFILE;
return amdgpu_set_pp_power_profile(dev, buf, count, &request);
}
static ssize_t amdgpu_set_pp_compute_power_profile(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count)
{
struct amd_pp_profile request = {0};
request.type = AMD_PP_COMPUTE_PROFILE;
return amdgpu_set_pp_power_profile(dev, buf, count, &request);
}
static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state);
static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR,
amdgpu_get_dpm_forced_performance_level,
......@@ -637,6 +811,12 @@ static DEVICE_ATTR(pp_sclk_od, S_IRUGO | S_IWUSR,
static DEVICE_ATTR(pp_mclk_od, S_IRUGO | S_IWUSR,
amdgpu_get_pp_mclk_od,
amdgpu_set_pp_mclk_od);
static DEVICE_ATTR(pp_gfx_power_profile, S_IRUGO | S_IWUSR,
amdgpu_get_pp_gfx_power_profile,
amdgpu_set_pp_gfx_power_profile);
static DEVICE_ATTR(pp_compute_power_profile, S_IRUGO | S_IWUSR,
amdgpu_get_pp_compute_power_profile,
amdgpu_set_pp_compute_power_profile);
static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
struct device_attribute *attr,
......@@ -1142,11 +1322,11 @@ void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable)
/* XXX select vce level based on ring/task */
adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL;
mutex_unlock(&adev->pm.mutex);
amdgpu_pm_compute_clocks(adev);
amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
AMD_PG_STATE_UNGATE);
amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
AMD_CG_STATE_UNGATE);
amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
AMD_PG_STATE_UNGATE);
amdgpu_pm_compute_clocks(adev);
} else {
amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
AMD_PG_STATE_GATE);
......@@ -1255,6 +1435,20 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
DRM_ERROR("failed to create device file pp_mclk_od\n");
return ret;
}
ret = device_create_file(adev->dev,
&dev_attr_pp_gfx_power_profile);
if (ret) {
DRM_ERROR("failed to create device file "
"pp_gfx_power_profile\n");
return ret;
}
ret = device_create_file(adev->dev,
&dev_attr_pp_compute_power_profile);
if (ret) {
DRM_ERROR("failed to create device file "
"pp_compute_power_profile\n");
return ret;
}
ret = amdgpu_debugfs_pm_init(adev);
if (ret) {
......@@ -1284,6 +1478,10 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie);
device_remove_file(adev->dev, &dev_attr_pp_sclk_od);
device_remove_file(adev->dev, &dev_attr_pp_mclk_od);
device_remove_file(adev->dev,
&dev_attr_pp_gfx_power_profile);
device_remove_file(adev->dev,
&dev_attr_pp_compute_power_profile);
}
void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
......@@ -1340,7 +1538,9 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *adev)
{
int32_t value;
uint32_t value;
struct pp_gpu_power query = {0};
int size;
/* sanity check PP is enabled */
if (!(adev->powerplay.pp_funcs &&
......@@ -1348,47 +1548,60 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
return -EINVAL;
/* GPU Clocks */
size = sizeof(value);
seq_printf(m, "GFX Clocks and Power:\n");
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_MCLK, &value))
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_MCLK, (void *)&value, &size))
seq_printf(m, "\t%u MHz (MCLK)\n", value/100);
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK, &value))
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK, (void *)&value, &size))
seq_printf(m, "\t%u MHz (SCLK)\n", value/100);
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX, &value))
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX, (void *)&value, &size))
seq_printf(m, "\t%u mV (VDDGFX)\n", value);
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, &value))
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&value, &size))
seq_printf(m, "\t%u mV (VDDNB)\n", value);
size = sizeof(query);
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, (void *)&query, &size)) {
seq_printf(m, "\t%u.%u W (VDDC)\n", query.vddc_power >> 8,
query.vddc_power & 0xff);
seq_printf(m, "\t%u.%u W (VDDCI)\n", query.vddci_power >> 8,
query.vddci_power & 0xff);
seq_printf(m, "\t%u.%u W (max GPU)\n", query.max_gpu_power >> 8,
query.max_gpu_power & 0xff);
seq_printf(m, "\t%u.%u W (average GPU)\n", query.average_gpu_power >> 8,
query.average_gpu_power & 0xff);
}
size = sizeof(value);
seq_printf(m, "\n");
/* GPU Temp */
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP, &value))
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP, (void *)&value, &size))
seq_printf(m, "GPU Temperature: %u C\n", value/1000);
/* GPU Load */
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD, &value))
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD, (void *)&value, &size))
seq_printf(m, "GPU Load: %u %%\n", value);
seq_printf(m, "\n");
/* UVD clocks */
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_POWER, &value)) {
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_POWER, (void *)&value, &size)) {
if (!value) {
seq_printf(m, "UVD: Disabled\n");
} else {
seq_printf(m, "UVD: Enabled\n");
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_DCLK, &value))
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_DCLK, (void *)&value, &size))
seq_printf(m, "\t%u MHz (DCLK)\n", value/100);
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_VCLK, &value))
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_VCLK, (void *)&value, &size))
seq_printf(m, "\t%u MHz (VCLK)\n", value/100);
}
}
seq_printf(m, "\n");
/* VCE clocks */
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_POWER, &value)) {
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_POWER, (void *)&value, &size)) {
if (!value) {
seq_printf(m, "VCE: Disabled\n");
} else {
seq_printf(m, "VCE: Enabled\n");
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_ECCLK, &value))
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_ECCLK, (void *)&value, &size))
seq_printf(m, "\t%u MHz (ECCLK)\n", value/100);
}
}
......
......@@ -43,7 +43,7 @@ static int amdgpu_create_pp_handle(struct amdgpu_device *adev)
amd_pp = &(adev->powerplay);
pp_init.chip_family = adev->family;
pp_init.chip_id = adev->asic_type;
pp_init.pm_en = amdgpu_dpm != 0 ? true : false;
pp_init.pm_en = (amdgpu_dpm != 0 && !amdgpu_sriov_vf(adev)) ? true : false;
pp_init.feature_mask = amdgpu_pp_feature_mask;
pp_init.device = amdgpu_cgs_create_device(adev);
ret = amd_powerplay_create(&pp_init, &(amd_pp->pp_handle));
......@@ -71,6 +71,7 @@ static int amdgpu_pp_early_init(void *handle)
case CHIP_TOPAZ:
case CHIP_CARRIZO:
case CHIP_STONEY:
case CHIP_VEGA10:
adev->pp_enabled = true;
if (amdgpu_create_pp_handle(adev))
return -EINVAL;
......@@ -163,7 +164,7 @@ static int amdgpu_pp_hw_init(void *handle)
int ret = 0;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->pp_enabled && adev->firmware.smu_load)
if (adev->pp_enabled && adev->firmware.load_type == AMDGPU_FW_LOAD_SMU)
amdgpu_ucode_init_bo(adev);
if (adev->powerplay.ip_funcs->hw_init)
......@@ -190,7 +191,7 @@ static int amdgpu_pp_hw_fini(void *handle)
ret = adev->powerplay.ip_funcs->hw_fini(
adev->powerplay.pp_handle);
if (adev->pp_enabled && adev->firmware.smu_load)
if (adev->pp_enabled && adev->firmware.load_type == AMDGPU_FW_LOAD_SMU)
amdgpu_ucode_fini_bo(adev);
return ret;
......
/*
* Copyright 2016 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Author: Huang Rui
*
*/
#include <linux/firmware.h>
#include "drmP.h"
#include "amdgpu.h"
#include "amdgpu_psp.h"
#include "amdgpu_ucode.h"
#include "soc15_common.h"
#include "psp_v3_1.h"
static void psp_set_funcs(struct amdgpu_device *adev);
static int psp_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
psp_set_funcs(adev);
return 0;
}
static int psp_sw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct psp_context *psp = &adev->psp;
int ret;
switch (adev->asic_type) {
case CHIP_VEGA10:
psp->init_microcode = psp_v3_1_init_microcode;
psp->bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv;
psp->bootloader_load_sos = psp_v3_1_bootloader_load_sos;
psp->prep_cmd_buf = psp_v3_1_prep_cmd_buf;
psp->ring_init = psp_v3_1_ring_init;
psp->cmd_submit = psp_v3_1_cmd_submit;
psp->compare_sram_data = psp_v3_1_compare_sram_data;
psp->smu_reload_quirk = psp_v3_1_smu_reload_quirk;
break;
default:
return -EINVAL;
}
psp->adev = adev;
ret = psp_init_microcode(psp);
if (ret) {
DRM_ERROR("Failed to load psp firmware!\n");
return ret;
}
return 0;
}
static int psp_sw_fini(void *handle)
{
return 0;
}
int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
uint32_t reg_val, uint32_t mask, bool check_changed)
{
uint32_t val;
int i;
struct amdgpu_device *adev = psp->adev;
val = RREG32(reg_index);
for (i = 0; i < adev->usec_timeout; i++) {
if (check_changed) {
if (val != reg_val)
return 0;
} else {
if ((val & mask) == reg_val)
return 0;
}
udelay(1);
}
return -ETIME;
}
static int
psp_cmd_submit_buf(struct psp_context *psp,
struct amdgpu_firmware_info *ucode,
struct psp_gfx_cmd_resp *cmd, uint64_t fence_mc_addr,
int index)
{
int ret;
struct amdgpu_bo *cmd_buf_bo;
uint64_t cmd_buf_mc_addr;
struct psp_gfx_cmd_resp *cmd_buf_mem;
struct amdgpu_device *adev = psp->adev;
ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
&cmd_buf_bo, &cmd_buf_mc_addr,
(void **)&cmd_buf_mem);
if (ret)
return ret;
memset(cmd_buf_mem, 0, PSP_CMD_BUFFER_SIZE);
memcpy(cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp));
ret = psp_cmd_submit(psp, ucode, cmd_buf_mc_addr,
fence_mc_addr, index);
while (*((unsigned int *)psp->fence_buf) != index) {
msleep(1);
};
amdgpu_bo_free_kernel(&cmd_buf_bo,
&cmd_buf_mc_addr,
(void **)&cmd_buf_mem);
return ret;
}
static void psp_prep_tmr_cmd_buf(struct psp_gfx_cmd_resp *cmd,
uint64_t tmr_mc, uint32_t size)
{
cmd->cmd_id = GFX_CMD_ID_SETUP_TMR;
cmd->cmd.cmd_setup_tmr.buf_phy_addr_lo = (uint32_t)tmr_mc;
cmd->cmd.cmd_setup_tmr.buf_phy_addr_hi = (uint32_t)(tmr_mc >> 32);
cmd->cmd.cmd_setup_tmr.buf_size = size;
}
/* Set up Trusted Memory Region */
static int psp_tmr_init(struct psp_context *psp)
{
int ret;
struct psp_gfx_cmd_resp *cmd;
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
if (!cmd)
return -ENOMEM;
/*
* Allocate 3M memory aligned to 1M from Frame Buffer (local
* physical).
*
* Note: this memory need be reserved till the driver
* uninitializes.
*/
ret = amdgpu_bo_create_kernel(psp->adev, 0x300000, 0x100000,
AMDGPU_GEM_DOMAIN_VRAM,
&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
if (ret)
goto failed;
psp_prep_tmr_cmd_buf(cmd, psp->tmr_mc_addr, 0x300000);
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr, 1);
if (ret)
goto failed_mem;
kfree(cmd);
return 0;
failed_mem:
amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
failed:
kfree(cmd);
return ret;
}
static void psp_prep_asd_cmd_buf(struct psp_gfx_cmd_resp *cmd,
uint64_t asd_mc, uint64_t asd_mc_shared,
uint32_t size, uint32_t shared_size)
{
cmd->cmd_id = GFX_CMD_ID_LOAD_ASD;
cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(asd_mc);
cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(asd_mc);
cmd->cmd.cmd_load_ta.app_len = size;
cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(asd_mc_shared);
cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(asd_mc_shared);
cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size;
}
static int psp_asd_load(struct psp_context *psp)
{
int ret;
struct amdgpu_bo *asd_bo, *asd_shared_bo;
uint64_t asd_mc_addr, asd_shared_mc_addr;
void *asd_buf, *asd_shared_buf;
struct psp_gfx_cmd_resp *cmd;
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
if (!cmd)
return -ENOMEM;
/*
* Allocate 16k memory aligned to 4k from Frame Buffer (local
* physical) for shared ASD <-> Driver
*/
ret = amdgpu_bo_create_kernel(psp->adev, PSP_ASD_SHARED_MEM_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
&asd_shared_bo, &asd_shared_mc_addr, &asd_buf);
if (ret)
goto failed;
/*
* Allocate 256k memory aligned to 4k from Frame Buffer (local
* physical) for ASD firmware
*/
ret = amdgpu_bo_create_kernel(psp->adev, PSP_ASD_BIN_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
&asd_bo, &asd_mc_addr, &asd_buf);
if (ret)
goto failed_mem;
memcpy(asd_buf, psp->asd_start_addr, psp->asd_ucode_size);
psp_prep_asd_cmd_buf(cmd, asd_mc_addr, asd_shared_mc_addr,
psp->asd_ucode_size, PSP_ASD_SHARED_MEM_SIZE);
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr, 2);
if (ret)
goto failed_mem1;
amdgpu_bo_free_kernel(&asd_bo, &asd_mc_addr, &asd_buf);
amdgpu_bo_free_kernel(&asd_shared_bo, &asd_shared_mc_addr, &asd_shared_buf);
kfree(cmd);
return 0;
failed_mem1:
amdgpu_bo_free_kernel(&asd_bo, &asd_mc_addr, &asd_buf);
failed_mem:
amdgpu_bo_free_kernel(&asd_shared_bo, &asd_shared_mc_addr, &asd_shared_buf);
failed:
kfree(cmd);
return ret;
}
static int psp_load_fw(struct amdgpu_device *adev)
{
int ret;
struct psp_gfx_cmd_resp *cmd;
int i;
struct amdgpu_firmware_info *ucode;
struct psp_context *psp = &adev->psp;
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
if (!cmd)
return -ENOMEM;
ret = psp_bootloader_load_sysdrv(psp);
if (ret)
goto failed;
ret = psp_bootloader_load_sos(psp);
if (ret)
goto failed;
ret = psp_ring_init(psp, PSP_RING_TYPE__KM);
if (ret)
goto failed;
ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
&psp->fence_buf_bo,
&psp->fence_buf_mc_addr,
&psp->fence_buf);
if (ret)
goto failed;
memset(psp->fence_buf, 0, PSP_FENCE_BUFFER_SIZE);
ret = psp_tmr_init(psp);
if (ret)
goto failed_mem;
ret = psp_asd_load(psp);
if (ret)
goto failed_mem;
for (i = 0; i < adev->firmware.max_ucodes; i++) {
ucode = &adev->firmware.ucode[i];
if (!ucode->fw)
continue;
if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC &&
psp_smu_reload_quirk(psp))
continue;
ret = psp_prep_cmd_buf(ucode, cmd);
if (ret)
goto failed_mem;
ret = psp_cmd_submit_buf(psp, ucode, cmd,
psp->fence_buf_mc_addr, i + 3);
if (ret)
goto failed_mem;
#if 0
/* check if firmware loaded sucessfully */
if (!amdgpu_psp_check_fw_loading_status(adev, i))
return -EINVAL;
#endif
}
amdgpu_bo_free_kernel(&psp->fence_buf_bo,
&psp->fence_buf_mc_addr, &psp->fence_buf);
kfree(cmd);
return 0;
failed_mem:
amdgpu_bo_free_kernel(&psp->fence_buf_bo,
&psp->fence_buf_mc_addr, &psp->fence_buf);
failed:
kfree(cmd);
return ret;
}
static int psp_hw_init(void *handle)
{
int ret;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
return 0;
mutex_lock(&adev->firmware.mutex);
/*
* This sequence is just used on hw_init only once, no need on
* resume.
*/
ret = amdgpu_ucode_init_bo(adev);
if (ret)
goto failed;
ret = psp_load_fw(adev);
if (ret) {
DRM_ERROR("PSP firmware loading failed\n");
goto failed;
}
mutex_unlock(&adev->firmware.mutex);
return 0;
failed:
adev->firmware.load_type = AMDGPU_FW_LOAD_DIRECT;
mutex_unlock(&adev->firmware.mutex);
return -EINVAL;
}
static int psp_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct psp_context *psp = &adev->psp;
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)
amdgpu_ucode_fini_bo(adev);
if (psp->tmr_buf)
amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
return 0;
}
static int psp_suspend(void *handle)
{
return 0;
}
static int psp_resume(void *handle)
{
int ret;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
return 0;
mutex_lock(&adev->firmware.mutex);
ret = psp_load_fw(adev);
if (ret)
DRM_ERROR("PSP resume failed\n");
mutex_unlock(&adev->firmware.mutex);
return ret;
}
static bool psp_check_fw_loading_status(struct amdgpu_device *adev,
enum AMDGPU_UCODE_ID ucode_type)
{
struct amdgpu_firmware_info *ucode = NULL;
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
DRM_INFO("firmware is not loaded by PSP\n");
return true;
}
if (!adev->firmware.fw_size)
return false;
ucode = &adev->firmware.ucode[ucode_type];
if (!ucode->fw || !ucode->ucode_size)
return false;
return psp_compare_sram_data(&adev->psp, ucode, ucode_type);
}
static int psp_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
return 0;
}
static int psp_set_powergating_state(void *handle,
enum amd_powergating_state state)
{
return 0;
}
const struct amd_ip_funcs psp_ip_funcs = {
.name = "psp",
.early_init = psp_early_init,
.late_init = NULL,
.sw_init = psp_sw_init,
.sw_fini = psp_sw_fini,
.hw_init = psp_hw_init,
.hw_fini = psp_hw_fini,
.suspend = psp_suspend,
.resume = psp_resume,
.is_idle = NULL,
.wait_for_idle = NULL,
.soft_reset = NULL,
.set_clockgating_state = psp_set_clockgating_state,
.set_powergating_state = psp_set_powergating_state,
};
static const struct amdgpu_psp_funcs psp_funcs = {
.check_fw_loading_status = psp_check_fw_loading_status,
};
static void psp_set_funcs(struct amdgpu_device *adev)
{
if (NULL == adev->firmware.funcs)
adev->firmware.funcs = &psp_funcs;
}
const struct amdgpu_ip_block_version psp_v3_1_ip_block =
{
.type = AMD_IP_BLOCK_TYPE_PSP,
.major = 3,
.minor = 1,
.rev = 0,
.funcs = &psp_ip_funcs,
};
/*
* Copyright 2016 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Author: Huang Rui
*
*/
#ifndef __AMDGPU_PSP_H__
#define __AMDGPU_PSP_H__
#include "amdgpu.h"
#include "psp_gfx_if.h"
#define PSP_FENCE_BUFFER_SIZE 0x1000
#define PSP_CMD_BUFFER_SIZE 0x1000
#define PSP_ASD_BIN_SIZE 0x40000
#define PSP_ASD_SHARED_MEM_SIZE 0x4000
enum psp_ring_type
{
PSP_RING_TYPE__INVALID = 0,
/*
* These values map to the way the PSP kernel identifies the
* rings.
*/
PSP_RING_TYPE__UM = 1, /* User mode ring (formerly called RBI) */
PSP_RING_TYPE__KM = 2 /* Kernel mode ring (formerly called GPCOM) */
};
struct psp_ring
{
enum psp_ring_type ring_type;
struct psp_gfx_rb_frame *ring_mem;
uint64_t ring_mem_mc_addr;
void *ring_mem_handle;
uint32_t ring_size;
};
struct psp_context
{
struct amdgpu_device *adev;
struct psp_ring km_ring;
int (*init_microcode)(struct psp_context *psp);
int (*bootloader_load_sysdrv)(struct psp_context *psp);
int (*bootloader_load_sos)(struct psp_context *psp);
int (*prep_cmd_buf)(struct amdgpu_firmware_info *ucode,
struct psp_gfx_cmd_resp *cmd);
int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type);
int (*cmd_submit)(struct psp_context *psp, struct amdgpu_firmware_info *ucode,
uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, int index);
bool (*compare_sram_data)(struct psp_context *psp,
struct amdgpu_firmware_info *ucode,
enum AMDGPU_UCODE_ID ucode_type);
bool (*smu_reload_quirk)(struct psp_context *psp);
/* sos firmware */
const struct firmware *sos_fw;
uint32_t sos_fw_version;
uint32_t sos_feature_version;
uint32_t sys_bin_size;
uint32_t sos_bin_size;
uint8_t *sys_start_addr;
uint8_t *sos_start_addr;
/* tmr buffer */
struct amdgpu_bo *tmr_bo;
uint64_t tmr_mc_addr;
void *tmr_buf;
/* asd firmware */
const struct firmware *asd_fw;
uint32_t asd_fw_version;
uint32_t asd_feature_version;
uint32_t asd_ucode_size;
uint8_t *asd_start_addr;
/* fence buffer */
struct amdgpu_bo *fence_buf_bo;
uint64_t fence_buf_mc_addr;
void *fence_buf;
};
struct amdgpu_psp_funcs {
bool (*check_fw_loading_status)(struct amdgpu_device *adev,
enum AMDGPU_UCODE_ID);
};
#define psp_prep_cmd_buf(ucode, type) (psp)->prep_cmd_buf((ucode), (type))
#define psp_ring_init(psp, type) (psp)->ring_init((psp), (type))
#define psp_cmd_submit(psp, ucode, cmd_mc, fence_mc, index) \
(psp)->cmd_submit((psp), (ucode), (cmd_mc), (fence_mc), (index))
#define psp_compare_sram_data(psp, ucode, type) \
(psp)->compare_sram_data((psp), (ucode), (type))
#define psp_init_microcode(psp) \
((psp)->init_microcode ? (psp)->init_microcode((psp)) : 0)
#define psp_bootloader_load_sysdrv(psp) \
((psp)->bootloader_load_sysdrv ? (psp)->bootloader_load_sysdrv((psp)) : 0)
#define psp_bootloader_load_sos(psp) \
((psp)->bootloader_load_sos ? (psp)->bootloader_load_sos((psp)) : 0)
#define psp_smu_reload_quirk(psp) \
((psp)->smu_reload_quirk ? (psp)->smu_reload_quirk((psp)) : false)
extern const struct amd_ip_funcs psp_ip_funcs;
extern const struct amdgpu_ip_block_version psp_v3_1_ip_block;
extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
uint32_t field_val, uint32_t mask, bool check_changed);
#endif
......@@ -182,16 +182,32 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
return r;
}
r = amdgpu_wb_get(adev, &ring->rptr_offs);
if (r) {
dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r);
return r;
}
if (ring->funcs->support_64bit_ptrs) {
r = amdgpu_wb_get_64bit(adev, &ring->rptr_offs);
if (r) {
dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r);
return r;
}
r = amdgpu_wb_get_64bit(adev, &ring->wptr_offs);
if (r) {
dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r);
return r;
}
} else {
r = amdgpu_wb_get(adev, &ring->rptr_offs);
if (r) {
dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r);
return r;
}
r = amdgpu_wb_get(adev, &ring->wptr_offs);
if (r) {
dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r);
return r;
}
r = amdgpu_wb_get(adev, &ring->wptr_offs);
if (r) {
dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r);
return r;
}
r = amdgpu_wb_get(adev, &ring->fence_offs);
......@@ -219,6 +235,9 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
ring->ring_size = roundup_pow_of_two(max_dw * 4 *
amdgpu_sched_hw_submission);
ring->buf_mask = (ring->ring_size / 4) - 1;
ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
0xffffffffffffffff : ring->buf_mask;
/* Allocate ring buffer */
if (ring->ring_obj == NULL) {
r = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
......@@ -230,9 +249,9 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
dev_err(adev->dev, "(%d) ring create failed\n", r);
return r;
}
memset((void *)ring->ring, 0, ring->ring_size);
amdgpu_ring_clear_ring(ring);
}
ring->ptr_mask = (ring->ring_size / 4) - 1;
ring->max_dw = max_dw;
if (amdgpu_debugfs_ring_init(adev, ring)) {
......@@ -253,10 +272,18 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
{
ring->ready = false;
amdgpu_wb_free(ring->adev, ring->cond_exe_offs);
amdgpu_wb_free(ring->adev, ring->fence_offs);
amdgpu_wb_free(ring->adev, ring->rptr_offs);
amdgpu_wb_free(ring->adev, ring->wptr_offs);
if (ring->funcs->support_64bit_ptrs) {
amdgpu_wb_free_64bit(ring->adev, ring->cond_exe_offs);
amdgpu_wb_free_64bit(ring->adev, ring->fence_offs);
amdgpu_wb_free_64bit(ring->adev, ring->rptr_offs);
amdgpu_wb_free_64bit(ring->adev, ring->wptr_offs);
} else {
amdgpu_wb_free(ring->adev, ring->cond_exe_offs);
amdgpu_wb_free(ring->adev, ring->fence_offs);
amdgpu_wb_free(ring->adev, ring->rptr_offs);
amdgpu_wb_free(ring->adev, ring->wptr_offs);
}
amdgpu_bo_free_kernel(&ring->ring_obj,
&ring->gpu_addr,
......@@ -293,8 +320,8 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
if (*pos < 12) {
early[0] = amdgpu_ring_get_rptr(ring);
early[1] = amdgpu_ring_get_wptr(ring);
early[2] = ring->wptr;
early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask;
early[2] = ring->wptr & ring->buf_mask;
for (i = *pos / 4; i < 3 && size; i++) {
r = put_user(early[i], (uint32_t *)buf);
if (r)
......
......@@ -27,10 +27,11 @@
#include "gpu_scheduler.h"
/* max number of rings */
#define AMDGPU_MAX_RINGS 16
#define AMDGPU_MAX_RINGS 18
#define AMDGPU_MAX_GFX_RINGS 1
#define AMDGPU_MAX_COMPUTE_RINGS 8
#define AMDGPU_MAX_VCE_RINGS 3
#define AMDGPU_MAX_UVD_ENC_RINGS 2
/* some special values for the owner field */
#define AMDGPU_FENCE_OWNER_UNDEFINED ((void*)0ul)
......@@ -45,7 +46,8 @@ enum amdgpu_ring_type {
AMDGPU_RING_TYPE_SDMA,
AMDGPU_RING_TYPE_UVD,
AMDGPU_RING_TYPE_VCE,
AMDGPU_RING_TYPE_KIQ
AMDGPU_RING_TYPE_KIQ,
AMDGPU_RING_TYPE_UVD_ENC
};
struct amdgpu_device;
......@@ -96,10 +98,11 @@ struct amdgpu_ring_funcs {
enum amdgpu_ring_type type;
uint32_t align_mask;
u32 nop;
bool support_64bit_ptrs;
/* ring read/write ptr handling */
u32 (*get_rptr)(struct amdgpu_ring *ring);
u32 (*get_wptr)(struct amdgpu_ring *ring);
u64 (*get_rptr)(struct amdgpu_ring *ring);
u64 (*get_wptr)(struct amdgpu_ring *ring);
void (*set_wptr)(struct amdgpu_ring *ring);
/* validating and patching of IBs */
int (*parse_cs)(struct amdgpu_cs_parser *p, uint32_t ib_idx);
......@@ -126,6 +129,7 @@ struct amdgpu_ring_funcs {
int (*test_ib)(struct amdgpu_ring *ring, long timeout);
/* insert NOP packets */
void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count);
void (*insert_end)(struct amdgpu_ring *ring);
/* pad the indirect buffer to the necessary number of dw */
void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
unsigned (*init_cond_exec)(struct amdgpu_ring *ring);
......@@ -148,19 +152,23 @@ struct amdgpu_ring {
struct amdgpu_bo *ring_obj;
volatile uint32_t *ring;
unsigned rptr_offs;
unsigned wptr;
unsigned wptr_old;
u64 wptr;
u64 wptr_old;
unsigned ring_size;
unsigned max_dw;
int count_dw;
uint64_t gpu_addr;
uint32_t ptr_mask;
uint64_t ptr_mask;
uint32_t buf_mask;
bool ready;
u32 idx;
u32 me;
u32 pipe;
u32 queue;
struct amdgpu_bo *mqd_obj;
uint64_t mqd_gpu_addr;
void *mqd_ptr;
uint64_t eop_gpu_addr;
u32 doorbell_index;
bool use_doorbell;
unsigned wptr_offs;
......@@ -184,5 +192,12 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
unsigned ring_size, struct amdgpu_irq_src *irq_src,
unsigned irq_type);
void amdgpu_ring_fini(struct amdgpu_ring *ring);
static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
{
int i = 0;
while (i <= ring->buf_mask)
ring->ring[i++] = ring->funcs->nop;
}
#endif
......@@ -228,7 +228,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
out_cleanup:
kfree(gtt_obj);
if (r) {
printk(KERN_WARNING "Error while testing BO move.\n");
pr_warn("Error while testing BO move\n");
}
}
......@@ -237,82 +237,3 @@ void amdgpu_test_moves(struct amdgpu_device *adev)
if (adev->mman.buffer_funcs)
amdgpu_do_test_moves(adev);
}
void amdgpu_test_ring_sync(struct amdgpu_device *adev,
struct amdgpu_ring *ringA,
struct amdgpu_ring *ringB)
{
}
static void amdgpu_test_ring_sync2(struct amdgpu_device *adev,
struct amdgpu_ring *ringA,
struct amdgpu_ring *ringB,
struct amdgpu_ring *ringC)
{
}
static bool amdgpu_test_sync_possible(struct amdgpu_ring *ringA,
struct amdgpu_ring *ringB)
{
if (ringA == &ringA->adev->vce.ring[0] &&
ringB == &ringB->adev->vce.ring[1])
return false;
return true;
}
void amdgpu_test_syncing(struct amdgpu_device *adev)
{
int i, j, k;
for (i = 1; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ringA = adev->rings[i];
if (!ringA || !ringA->ready)
continue;
for (j = 0; j < i; ++j) {
struct amdgpu_ring *ringB = adev->rings[j];
if (!ringB || !ringB->ready)
continue;
if (!amdgpu_test_sync_possible(ringA, ringB))
continue;
DRM_INFO("Testing syncing between rings %d and %d...\n", i, j);
amdgpu_test_ring_sync(adev, ringA, ringB);
DRM_INFO("Testing syncing between rings %d and %d...\n", j, i);
amdgpu_test_ring_sync(adev, ringB, ringA);
for (k = 0; k < j; ++k) {
struct amdgpu_ring *ringC = adev->rings[k];
if (!ringC || !ringC->ready)
continue;
if (!amdgpu_test_sync_possible(ringA, ringC))
continue;
if (!amdgpu_test_sync_possible(ringB, ringC))
continue;
DRM_INFO("Testing syncing between rings %d, %d and %d...\n", i, j, k);
amdgpu_test_ring_sync2(adev, ringA, ringB, ringC);
DRM_INFO("Testing syncing between rings %d, %d and %d...\n", i, k, j);
amdgpu_test_ring_sync2(adev, ringA, ringC, ringB);
DRM_INFO("Testing syncing between rings %d, %d and %d...\n", j, i, k);
amdgpu_test_ring_sync2(adev, ringB, ringA, ringC);
DRM_INFO("Testing syncing between rings %d, %d and %d...\n", j, k, i);
amdgpu_test_ring_sync2(adev, ringB, ringC, ringA);
DRM_INFO("Testing syncing between rings %d, %d and %d...\n", k, i, j);
amdgpu_test_ring_sync2(adev, ringC, ringA, ringB);
DRM_INFO("Testing syncing between rings %d, %d and %d...\n", k, j, i);
amdgpu_test_ring_sync2(adev, ringC, ringB, ringA);
}
}
}
}
......@@ -11,6 +11,9 @@
#define TRACE_SYSTEM amdgpu
#define TRACE_INCLUDE_FILE amdgpu_trace
#define AMDGPU_JOB_GET_TIMELINE_NAME(job) \
job->base.s_fence->finished.ops->get_timeline_name(&job->base.s_fence->finished)
TRACE_EVENT(amdgpu_mm_rreg,
TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
TP_ARGS(did, reg, value),
......@@ -49,6 +52,43 @@ TRACE_EVENT(amdgpu_mm_wreg,
(unsigned long)__entry->value)
);
TRACE_EVENT(amdgpu_iv,
TP_PROTO(struct amdgpu_iv_entry *iv),
TP_ARGS(iv),
TP_STRUCT__entry(
__field(unsigned, client_id)
__field(unsigned, src_id)
__field(unsigned, ring_id)
__field(unsigned, vm_id)
__field(unsigned, vm_id_src)
__field(uint64_t, timestamp)
__field(unsigned, timestamp_src)
__field(unsigned, pas_id)
__array(unsigned, src_data, 4)
),
TP_fast_assign(
__entry->client_id = iv->client_id;
__entry->src_id = iv->src_id;
__entry->ring_id = iv->ring_id;
__entry->vm_id = iv->vm_id;
__entry->vm_id_src = iv->vm_id_src;
__entry->timestamp = iv->timestamp;
__entry->timestamp_src = iv->timestamp_src;
__entry->pas_id = iv->pas_id;
__entry->src_data[0] = iv->src_data[0];
__entry->src_data[1] = iv->src_data[1];
__entry->src_data[2] = iv->src_data[2];
__entry->src_data[3] = iv->src_data[3];
),
TP_printk("client_id:%u src_id:%u ring:%u vm_id:%u timestamp: %llu pas_id:%u src_data: %08x %08x %08x %08x\n",
__entry->client_id, __entry->src_id,
__entry->ring_id, __entry->vm_id,
__entry->timestamp, __entry->pas_id,
__entry->src_data[0], __entry->src_data[1],
__entry->src_data[2], __entry->src_data[3])
);
TRACE_EVENT(amdgpu_bo_create,
TP_PROTO(struct amdgpu_bo *bo),
TP_ARGS(bo),
......@@ -70,7 +110,7 @@ TRACE_EVENT(amdgpu_bo_create,
__entry->visible = bo->flags;
),
TP_printk("bo=%p,pages=%u,type=%d,prefered=%d,allowed=%d,visible=%d",
TP_printk("bo=%p, pages=%u, type=%d, prefered=%d, allowed=%d, visible=%d",
__entry->bo, __entry->pages, __entry->type,
__entry->prefer, __entry->allow, __entry->visible)
);
......@@ -101,50 +141,51 @@ TRACE_EVENT(amdgpu_cs_ioctl,
TP_PROTO(struct amdgpu_job *job),
TP_ARGS(job),
TP_STRUCT__entry(
__field(struct amdgpu_device *, adev)
__field(struct amd_sched_job *, sched_job)
__field(struct amdgpu_ib *, ib)
__field(uint64_t, sched_job_id)
__string(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
__field(unsigned int, context)
__field(unsigned int, seqno)
__field(struct dma_fence *, fence)
__field(char *, ring_name)
__field(u32, num_ibs)
),
TP_fast_assign(
__entry->adev = job->adev;
__entry->sched_job = &job->base;
__entry->ib = job->ibs;
__entry->fence = &job->base.s_fence->finished;
__entry->sched_job_id = job->base.id;
__assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
__entry->context = job->base.s_fence->finished.context;
__entry->seqno = job->base.s_fence->finished.seqno;
__entry->ring_name = job->ring->name;
__entry->num_ibs = job->num_ibs;
),
TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u",
__entry->adev, __entry->sched_job, __entry->ib,
__entry->fence, __entry->ring_name, __entry->num_ibs)
TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u",
__entry->sched_job_id, __get_str(timeline), __entry->context,
__entry->seqno, __entry->ring_name, __entry->num_ibs)
);
TRACE_EVENT(amdgpu_sched_run_job,
TP_PROTO(struct amdgpu_job *job),
TP_ARGS(job),
TP_STRUCT__entry(
__field(struct amdgpu_device *, adev)
__field(struct amd_sched_job *, sched_job)
__field(struct amdgpu_ib *, ib)
__field(struct dma_fence *, fence)
__field(uint64_t, sched_job_id)
__string(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
__field(unsigned int, context)
__field(unsigned int, seqno)
__field(char *, ring_name)
__field(u32, num_ibs)
),
TP_fast_assign(
__entry->adev = job->adev;
__entry->sched_job = &job->base;
__entry->ib = job->ibs;
__entry->fence = &job->base.s_fence->finished;
__entry->sched_job_id = job->base.id;
__assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
__entry->context = job->base.s_fence->finished.context;
__entry->seqno = job->base.s_fence->finished.seqno;
__entry->ring_name = job->ring->name;
__entry->num_ibs = job->num_ibs;
),
TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u",
__entry->adev, __entry->sched_job, __entry->ib,
__entry->fence, __entry->ring_name, __entry->num_ibs)
TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u",
__entry->sched_job_id, __get_str(timeline), __entry->context,
__entry->seqno, __entry->ring_name, __entry->num_ibs)
);
......@@ -184,7 +225,7 @@ TRACE_EVENT(amdgpu_vm_bo_map,
),
TP_fast_assign(
__entry->bo = bo_va->bo;
__entry->bo = bo_va ? bo_va->bo : NULL;
__entry->start = mapping->it.start;
__entry->last = mapping->it.last;
__entry->offset = mapping->offset;
......@@ -321,7 +362,7 @@ TRACE_EVENT(amdgpu_bo_list_set,
__entry->bo = bo;
__entry->bo_size = amdgpu_bo_size(bo);
),
TP_printk("list=%p, bo=%p, bo_size = %Ld",
TP_printk("list=%p, bo=%p, bo_size=%Ld",
__entry->list,
__entry->bo,
__entry->bo_size)
......@@ -339,7 +380,7 @@ TRACE_EVENT(amdgpu_cs_bo_status,
__entry->total_bo = total_bo;
__entry->total_size = total_size;
),
TP_printk("total bo size = %Ld, total bo count = %Ld",
TP_printk("total_bo_size=%Ld, total_bo_count=%Ld",
__entry->total_bo, __entry->total_size)
);
......@@ -359,11 +400,12 @@ TRACE_EVENT(amdgpu_ttm_bo_move,
__entry->new_placement = new_placement;
__entry->old_placement = old_placement;
),
TP_printk("bo=%p from:%d to %d with size = %Ld",
TP_printk("bo=%p, from=%d, to=%d, size=%Ld",
__entry->bo, __entry->old_placement,
__entry->new_placement, __entry->bo_size)
);
#undef AMDGPU_JOB_GET_TIMELINE_NAME
#endif
/* This part must be outside protection */
......
......@@ -746,7 +746,7 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem)
{
struct ttm_tt *ttm = bo->ttm;
struct amdgpu_ttm_tt *gtt = (void *)bo->ttm;
uint32_t flags;
uint64_t flags;
int r;
if (!ttm || amdgpu_ttm_is_bound(ttm))
......@@ -1027,10 +1027,10 @@ bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
}
uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
struct ttm_mem_reg *mem)
{
uint32_t flags = 0;
uint64_t flags = 0;
if (mem && mem->mem_type != TTM_PL_SYSTEM)
flags |= AMDGPU_PTE_VALID;
......@@ -1042,9 +1042,7 @@ uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
flags |= AMDGPU_PTE_SNOOPED;
}
if (adev->asic_type >= CHIP_TONGA)
flags |= AMDGPU_PTE_EXECUTABLE;
flags |= adev->gart.gart_pte_flags;
flags |= AMDGPU_PTE_READABLE;
if (!amdgpu_ttm_tt_is_readonly(ttm))
......@@ -1160,27 +1158,33 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
adev->gds.oa.gfx_partition_size = adev->gds.oa.gfx_partition_size << AMDGPU_OA_SHIFT;
adev->gds.oa.cs_partition_size = adev->gds.oa.cs_partition_size << AMDGPU_OA_SHIFT;
/* GDS Memory */
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS,
adev->gds.mem.total_size >> PAGE_SHIFT);
if (r) {
DRM_ERROR("Failed initializing GDS heap.\n");
return r;
if (adev->gds.mem.total_size) {
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS,
adev->gds.mem.total_size >> PAGE_SHIFT);
if (r) {
DRM_ERROR("Failed initializing GDS heap.\n");
return r;
}
}
/* GWS */
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS,
adev->gds.gws.total_size >> PAGE_SHIFT);
if (r) {
DRM_ERROR("Failed initializing gws heap.\n");
return r;
if (adev->gds.gws.total_size) {
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS,
adev->gds.gws.total_size >> PAGE_SHIFT);
if (r) {
DRM_ERROR("Failed initializing gws heap.\n");
return r;
}
}
/* OA */
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,
adev->gds.oa.total_size >> PAGE_SHIFT);
if (r) {
DRM_ERROR("Failed initializing oa heap.\n");
return r;
if (adev->gds.oa.total_size) {
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,
adev->gds.oa.total_size >> PAGE_SHIFT);
if (r) {
DRM_ERROR("Failed initializing oa heap.\n");
return r;
}
}
r = amdgpu_ttm_debugfs_init(adev);
......@@ -1208,9 +1212,12 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
}
ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM);
ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT);
ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GDS);
ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS);
ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA);
if (adev->gds.mem.total_size)
ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GDS);
if (adev->gds.gws.total_size)
ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS);
if (adev->gds.oa.total_size)
ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA);
ttm_bo_device_release(&adev->mman.bdev);
amdgpu_gart_fini(adev);
amdgpu_ttm_global_fini(adev);
......
......@@ -217,10 +217,55 @@ bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
return true;
}
static int amdgpu_ucode_init_single_fw(struct amdgpu_firmware_info *ucode,
uint64_t mc_addr, void *kptr)
enum amdgpu_firmware_load_type
amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
{
switch (adev->asic_type) {
#ifdef CONFIG_DRM_AMDGPU_SI
case CHIP_TAHITI:
case CHIP_PITCAIRN:
case CHIP_VERDE:
case CHIP_OLAND:
return AMDGPU_FW_LOAD_DIRECT;
#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_BONAIRE:
case CHIP_KAVERI:
case CHIP_KABINI:
case CHIP_HAWAII:
case CHIP_MULLINS:
return AMDGPU_FW_LOAD_DIRECT;
#endif
case CHIP_TOPAZ:
case CHIP_TONGA:
case CHIP_FIJI:
case CHIP_CARRIZO:
case CHIP_STONEY:
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
if (!load_type)
return AMDGPU_FW_LOAD_DIRECT;
else
return AMDGPU_FW_LOAD_SMU;
case CHIP_VEGA10:
if (!load_type)
return AMDGPU_FW_LOAD_DIRECT;
else
return AMDGPU_FW_LOAD_PSP;
default:
DRM_ERROR("Unknow firmware load type\n");
}
return AMDGPU_FW_LOAD_DIRECT;
}
static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
struct amdgpu_firmware_info *ucode,
uint64_t mc_addr, void *kptr)
{
const struct common_firmware_header *header = NULL;
const struct gfx_firmware_header_v1_0 *cp_hdr = NULL;
if (NULL == ucode->fw)
return 0;
......@@ -232,9 +277,36 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_firmware_info *ucode,
return 0;
header = (const struct common_firmware_header *)ucode->fw->data;
memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
le32_to_cpu(header->ucode_array_offset_bytes)),
le32_to_cpu(header->ucode_size_bytes));
cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP ||
(ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 &&
ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2 &&
ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1_JT &&
ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT)) {
ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes);
memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
le32_to_cpu(header->ucode_array_offset_bytes)),
ucode->ucode_size);
} else if (ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC1 ||
ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2) {
ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes) -
le32_to_cpu(cp_hdr->jt_size) * 4;
memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
le32_to_cpu(header->ucode_array_offset_bytes)),
ucode->ucode_size);
} else if (ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC1_JT ||
ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2_JT) {
ucode->ucode_size = le32_to_cpu(cp_hdr->jt_size) * 4;
memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
le32_to_cpu(header->ucode_array_offset_bytes) +
le32_to_cpu(cp_hdr->jt_offset) * 4),
ucode->ucode_size);
}
return 0;
}
......@@ -260,10 +332,11 @@ static int amdgpu_ucode_patch_jt(struct amdgpu_firmware_info *ucode,
(le32_to_cpu(header->jt_offset) * 4);
memcpy(dst_addr, src_addr, le32_to_cpu(header->jt_size) * 4);
ucode->ucode_size += le32_to_cpu(header->jt_size) * 4;
return 0;
}
int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
{
struct amdgpu_bo **bo = &adev->firmware.fw_buf;
......@@ -303,20 +376,32 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
amdgpu_bo_unreserve(*bo);
for (i = 0; i < AMDGPU_UCODE_ID_MAXIMUM; i++) {
memset(fw_buf_ptr, 0, adev->firmware.fw_size);
/*
* if SMU loaded firmware, it needn't add SMC, UVD, and VCE
* ucode info here
*/
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM - 4;
else
adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM;
for (i = 0; i < adev->firmware.max_ucodes; i++) {
ucode = &adev->firmware.ucode[i];
if (ucode->fw) {
header = (const struct common_firmware_header *)ucode->fw->data;
amdgpu_ucode_init_single_fw(ucode, fw_mc_addr + fw_offset,
fw_buf_ptr + fw_offset);
if (i == AMDGPU_UCODE_ID_CP_MEC1) {
amdgpu_ucode_init_single_fw(adev, ucode, fw_mc_addr + fw_offset,
(void *)((uint8_t *)fw_buf_ptr + fw_offset));
if (i == AMDGPU_UCODE_ID_CP_MEC1 &&
adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
const struct gfx_firmware_header_v1_0 *cp_hdr;
cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
amdgpu_ucode_patch_jt(ucode, fw_mc_addr + fw_offset,
fw_buf_ptr + fw_offset);
fw_offset += ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
}
fw_offset += ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
fw_offset += ALIGN(ucode->ucode_size, PAGE_SIZE);
}
}
return 0;
......@@ -328,7 +413,8 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
failed_reserve:
amdgpu_bo_unref(bo);
failed:
adev->firmware.smu_load = false;
if (err)
adev->firmware.load_type = AMDGPU_FW_LOAD_DIRECT;
return err;
}
......@@ -338,7 +424,7 @@ int amdgpu_ucode_fini_bo(struct amdgpu_device *adev)
int i;
struct amdgpu_firmware_info *ucode = NULL;
for (i = 0; i < AMDGPU_UCODE_ID_MAXIMUM; i++) {
for (i = 0; i < adev->firmware.max_ucodes; i++) {
ucode = &adev->firmware.ucode[i];
if (ucode->fw) {
ucode->mc_addr = 0;
......
......@@ -49,6 +49,14 @@ struct smc_firmware_header_v1_0 {
uint32_t ucode_start_addr;
};
/* version_major=1, version_minor=0 */
struct psp_firmware_header_v1_0 {
struct common_firmware_header header;
uint32_t ucode_feature_version;
uint32_t sos_offset_bytes;
uint32_t sos_size_bytes;
};
/* version_major=1, version_minor=0 */
struct gfx_firmware_header_v1_0 {
struct common_firmware_header header;
......@@ -110,6 +118,7 @@ union amdgpu_firmware_header {
struct common_firmware_header common;
struct mc_firmware_header_v1_0 mc;
struct smc_firmware_header_v1_0 smc;
struct psp_firmware_header_v1_0 psp;
struct gfx_firmware_header_v1_0 gfx;
struct rlc_firmware_header_v1_0 rlc;
struct rlc_firmware_header_v2_0 rlc_v2_0;
......@@ -128,9 +137,14 @@ enum AMDGPU_UCODE_ID {
AMDGPU_UCODE_ID_CP_PFP,
AMDGPU_UCODE_ID_CP_ME,
AMDGPU_UCODE_ID_CP_MEC1,
AMDGPU_UCODE_ID_CP_MEC1_JT,
AMDGPU_UCODE_ID_CP_MEC2,
AMDGPU_UCODE_ID_CP_MEC2_JT,
AMDGPU_UCODE_ID_RLC_G,
AMDGPU_UCODE_ID_STORAGE,
AMDGPU_UCODE_ID_SMC,
AMDGPU_UCODE_ID_UVD,
AMDGPU_UCODE_ID_VCE,
AMDGPU_UCODE_ID_MAXIMUM,
};
......@@ -161,6 +175,8 @@ struct amdgpu_firmware_info {
uint64_t mc_addr;
/* kernel linear address */
void *kaddr;
/* ucode_size_bytes */
uint32_t ucode_size;
};
void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr);
......@@ -174,4 +190,7 @@ bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
int amdgpu_ucode_init_bo(struct amdgpu_device *adev);
int amdgpu_ucode_fini_bo(struct amdgpu_device *adev);
enum amdgpu_firmware_load_type
amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type);
#endif
......@@ -67,6 +67,14 @@
#define FIRMWARE_POLARIS11 "amdgpu/polaris11_uvd.bin"
#define FIRMWARE_POLARIS12 "amdgpu/polaris12_uvd.bin"
#define FIRMWARE_VEGA10 "amdgpu/vega10_uvd.bin"
#define mmUVD_GPCOM_VCPU_DATA0_VEGA10 (0x03c4 + 0x7e00)
#define mmUVD_GPCOM_VCPU_DATA1_VEGA10 (0x03c5 + 0x7e00)
#define mmUVD_GPCOM_VCPU_CMD_VEGA10 (0x03c3 + 0x7e00)
#define mmUVD_NO_OP_VEGA10 (0x03ff + 0x7e00)
#define mmUVD_ENGINE_CNTL_VEGA10 (0x03c6 + 0x7e00)
/**
* amdgpu_uvd_cs_ctx - Command submission parser context
*
......@@ -101,6 +109,8 @@ MODULE_FIRMWARE(FIRMWARE_POLARIS10);
MODULE_FIRMWARE(FIRMWARE_POLARIS11);
MODULE_FIRMWARE(FIRMWARE_POLARIS12);
MODULE_FIRMWARE(FIRMWARE_VEGA10);
static void amdgpu_uvd_idle_work_handler(struct work_struct *work);
int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
......@@ -151,6 +161,9 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
case CHIP_POLARIS11:
fw_name = FIRMWARE_POLARIS11;
break;
case CHIP_VEGA10:
fw_name = FIRMWARE_VEGA10;
break;
case CHIP_POLARIS12:
fw_name = FIRMWARE_POLARIS12;
break;
......@@ -203,9 +216,11 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
DRM_ERROR("POLARIS10/11 UVD firmware version %hu.%hu is too old.\n",
version_major, version_minor);
bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8)
+ AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE
bo_size = AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE
+ AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles;
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.vcpu_bo,
&adev->uvd.gpu_addr, &adev->uvd.cpu_addr);
......@@ -319,11 +334,13 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev)
unsigned offset;
hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
memcpy_toio(adev->uvd.cpu_addr, adev->uvd.fw->data + offset,
le32_to_cpu(hdr->ucode_size_bytes));
size -= le32_to_cpu(hdr->ucode_size_bytes);
ptr += le32_to_cpu(hdr->ucode_size_bytes);
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
memcpy_toio(adev->uvd.cpu_addr, adev->uvd.fw->data + offset,
le32_to_cpu(hdr->ucode_size_bytes));
size -= le32_to_cpu(hdr->ucode_size_bytes);
ptr += le32_to_cpu(hdr->ucode_size_bytes);
}
memset_io(ptr, 0, size);
}
......@@ -936,6 +953,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
struct dma_fence *f = NULL;
struct amdgpu_device *adev = ring->adev;
uint64_t addr;
uint32_t data[4];
int i, r;
memset(&tv, 0, sizeof(tv));
......@@ -961,16 +979,28 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
if (r)
goto err;
if (adev->asic_type >= CHIP_VEGA10) {
data[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0_VEGA10, 0);
data[1] = PACKET0(mmUVD_GPCOM_VCPU_DATA1_VEGA10, 0);
data[2] = PACKET0(mmUVD_GPCOM_VCPU_CMD_VEGA10, 0);
data[3] = PACKET0(mmUVD_NO_OP_VEGA10, 0);
} else {
data[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0);
data[1] = PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0);
data[2] = PACKET0(mmUVD_GPCOM_VCPU_CMD, 0);
data[3] = PACKET0(mmUVD_NO_OP, 0);
}
ib = &job->ibs[0];
addr = amdgpu_bo_gpu_offset(bo);
ib->ptr[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0);
ib->ptr[0] = data[0];
ib->ptr[1] = addr;
ib->ptr[2] = PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0);
ib->ptr[2] = data[1];
ib->ptr[3] = addr >> 32;
ib->ptr[4] = PACKET0(mmUVD_GPCOM_VCPU_CMD, 0);
ib->ptr[4] = data[2];
ib->ptr[5] = 0;
for (i = 6; i < 16; i += 2) {
ib->ptr[i] = PACKET0(mmUVD_NO_OP, 0);
ib->ptr[i] = data[3];
ib->ptr[i+1] = 0;
}
ib->length_dw = 16;
......@@ -1108,6 +1138,9 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
container_of(work, struct amdgpu_device, uvd.idle_work.work);
unsigned fences = amdgpu_fence_count_emitted(&adev->uvd.ring);
if (amdgpu_sriov_vf(adev))
return;
if (fences == 0) {
if (adev->pm.dpm_enabled) {
amdgpu_dpm_enable_uvd(adev, false);
......@@ -1129,6 +1162,9 @@ void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
bool set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work);
if (amdgpu_sriov_vf(adev))
return;
if (set_clocks) {
if (adev->pm.dpm_enabled) {
amdgpu_dpm_enable_uvd(adev, true);
......
......@@ -24,6 +24,35 @@
#ifndef __AMDGPU_UVD_H__
#define __AMDGPU_UVD_H__
#define AMDGPU_DEFAULT_UVD_HANDLES 10
#define AMDGPU_MAX_UVD_HANDLES 40
#define AMDGPU_UVD_STACK_SIZE (200*1024)
#define AMDGPU_UVD_HEAP_SIZE (256*1024)
#define AMDGPU_UVD_SESSION_SIZE (50*1024)
#define AMDGPU_UVD_FIRMWARE_OFFSET 256
struct amdgpu_uvd {
struct amdgpu_bo *vcpu_bo;
void *cpu_addr;
uint64_t gpu_addr;
unsigned fw_version;
void *saved_bo;
unsigned max_handles;
atomic_t handles[AMDGPU_MAX_UVD_HANDLES];
struct drm_file *filp[AMDGPU_MAX_UVD_HANDLES];
struct delayed_work idle_work;
const struct firmware *fw; /* UVD firmware */
struct amdgpu_ring ring;
struct amdgpu_ring ring_enc[AMDGPU_MAX_UVD_ENC_RINGS];
struct amdgpu_irq_src irq;
bool address_64_bit;
bool use_ctx_buf;
struct amd_sched_entity entity;
struct amd_sched_entity entity_enc;
uint32_t srbm_soft_reset;
unsigned num_enc_rings;
};
int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
int amdgpu_uvd_sw_fini(struct amdgpu_device *adev);
int amdgpu_uvd_suspend(struct amdgpu_device *adev);
......
......@@ -54,6 +54,8 @@
#define FIRMWARE_POLARIS11 "amdgpu/polaris11_vce.bin"
#define FIRMWARE_POLARIS12 "amdgpu/polaris12_vce.bin"
#define FIRMWARE_VEGA10 "amdgpu/vega10_vce.bin"
#ifdef CONFIG_DRM_AMDGPU_CIK
MODULE_FIRMWARE(FIRMWARE_BONAIRE);
MODULE_FIRMWARE(FIRMWARE_KABINI);
......@@ -69,6 +71,8 @@ MODULE_FIRMWARE(FIRMWARE_POLARIS10);
MODULE_FIRMWARE(FIRMWARE_POLARIS11);
MODULE_FIRMWARE(FIRMWARE_POLARIS12);
MODULE_FIRMWARE(FIRMWARE_VEGA10);
static void amdgpu_vce_idle_work_handler(struct work_struct *work);
/**
......@@ -123,6 +127,9 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
case CHIP_POLARIS11:
fw_name = FIRMWARE_POLARIS11;
break;
case CHIP_VEGA10:
fw_name = FIRMWARE_VEGA10;
break;
case CHIP_POLARIS12:
fw_name = FIRMWARE_POLARIS12;
break;
......@@ -313,6 +320,9 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work)
container_of(work, struct amdgpu_device, vce.idle_work.work);
unsigned i, count = 0;
if (amdgpu_sriov_vf(adev))
return;
for (i = 0; i < adev->vce.num_rings; i++)
count += amdgpu_fence_count_emitted(&adev->vce.ring[i]);
......@@ -343,6 +353,9 @@ void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
bool set_clocks;
if (amdgpu_sriov_vf(adev))
return;
mutex_lock(&adev->vce.idle_mutex);
set_clocks = !cancel_delayed_work_sync(&adev->vce.idle_work);
if (set_clocks) {
......@@ -944,6 +957,10 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
unsigned i;
int r;
/* TODO: remove it if VCE can work for sriov */
if (amdgpu_sriov_vf(adev))
return 0;
r = amdgpu_ring_alloc(ring, 16);
if (r) {
DRM_ERROR("amdgpu: vce failed to lock ring %d (%d).\n",
......@@ -982,6 +999,10 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
struct dma_fence *fence = NULL;
long r;
/* TODO: remove it if VCE can work for sriov */
if (amdgpu_sriov_vf(ring->adev))
return 0;
/* skip vce ring1/2 ib test for now, since it's not reliable */
if (ring != &ring->adev->vce.ring[0])
return 0;
......
......@@ -24,6 +24,31 @@
#ifndef __AMDGPU_VCE_H__
#define __AMDGPU_VCE_H__
#define AMDGPU_MAX_VCE_HANDLES 16
#define AMDGPU_VCE_FIRMWARE_OFFSET 256
#define AMDGPU_VCE_HARVEST_VCE0 (1 << 0)
#define AMDGPU_VCE_HARVEST_VCE1 (1 << 1)
struct amdgpu_vce {
struct amdgpu_bo *vcpu_bo;
uint64_t gpu_addr;
unsigned fw_version;
unsigned fb_version;
atomic_t handles[AMDGPU_MAX_VCE_HANDLES];
struct drm_file *filp[AMDGPU_MAX_VCE_HANDLES];
uint32_t img_size[AMDGPU_MAX_VCE_HANDLES];
struct delayed_work idle_work;
struct mutex idle_mutex;
const struct firmware *fw; /* VCE firmware */
struct amdgpu_ring ring[AMDGPU_MAX_VCE_RINGS];
struct amdgpu_irq_src irq;
unsigned harvest_config;
struct amd_sched_entity entity;
uint32_t srbm_soft_reset;
unsigned num_rings;
};
int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size);
int amdgpu_vce_sw_fini(struct amdgpu_device *adev);
int amdgpu_vce_suspend(struct amdgpu_device *adev);
......
......@@ -75,6 +75,15 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm)
return -ENOMEM;
}
r = amdgpu_vm_alloc_pts(adev, bo_va->vm, AMDGPU_CSA_VADDR,
AMDGPU_CSA_SIZE);
if (r) {
DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
amdgpu_vm_bo_rmv(adev, bo_va);
ttm_eu_backoff_reservation(&ticket, &list);
return r;
}
r = amdgpu_vm_bo_map(adev, bo_va, AMDGPU_CSA_VADDR, 0,AMDGPU_CSA_SIZE,
AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
AMDGPU_PTE_EXECUTABLE);
......@@ -97,7 +106,8 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
adev->mode_info.num_crtc = 1;
adev->enable_virtual_display = true;
mutex_init(&adev->virt.lock);
mutex_init(&adev->virt.lock_kiq);
mutex_init(&adev->virt.lock_reset);
}
uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
......@@ -110,14 +120,14 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
BUG_ON(!ring->funcs->emit_rreg);
mutex_lock(&adev->virt.lock);
mutex_lock(&adev->virt.lock_kiq);
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_hdp_flush(ring);
amdgpu_ring_emit_rreg(ring, reg);
amdgpu_ring_emit_hdp_invalidate(ring);
amdgpu_fence_emit(ring, &f);
amdgpu_ring_commit(ring);
mutex_unlock(&adev->virt.lock);
mutex_unlock(&adev->virt.lock_kiq);
r = dma_fence_wait(f, false);
if (r)
......@@ -138,14 +148,14 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
BUG_ON(!ring->funcs->emit_wreg);
mutex_lock(&adev->virt.lock);
mutex_lock(&adev->virt.lock_kiq);
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_hdp_flush(ring);
amdgpu_ring_emit_wreg(ring, reg, v);
amdgpu_ring_emit_hdp_invalidate(ring);
amdgpu_fence_emit(ring, &f);
amdgpu_ring_commit(ring);
mutex_unlock(&adev->virt.lock);
mutex_unlock(&adev->virt.lock_kiq);
r = dma_fence_wait(f, false);
if (r)
......
......@@ -30,6 +30,12 @@
#define AMDGPU_PASSTHROUGH_MODE (1 << 3) /* thw whole GPU is pass through for VM */
#define AMDGPU_SRIOV_CAPS_RUNTIME (1 << 4) /* is out of full access mode */
struct amdgpu_mm_table {
struct amdgpu_bo *bo;
uint32_t *cpu_addr;
uint64_t gpu_addr;
};
/**
* struct amdgpu_virt_ops - amdgpu device virt operations
*/
......@@ -46,10 +52,12 @@ struct amdgpu_virt {
uint64_t csa_vmid0_addr;
bool chained_ib_support;
uint32_t reg_val_offs;
struct mutex lock;
struct mutex lock_kiq;
struct mutex lock_reset;
struct amdgpu_irq_src ack_irq;
struct amdgpu_irq_src rcv_irq;
struct delayed_work flr_work;
struct work_struct flr_work;
struct amdgpu_mm_table mm_table;
const struct amdgpu_virt_ops *ops;
};
......@@ -89,5 +97,6 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary);
#endif
此差异已折叠。
......@@ -32,6 +32,7 @@
#define ATOM_ATI_MAGIC_PTR 0x30
#define ATOM_ATI_MAGIC " 761295520"
#define ATOM_ROM_TABLE_PTR 0x48
#define ATOM_ROM_PART_NUMBER_PTR 0x6E
#define ATOM_ROM_MAGIC "ATOM"
#define ATOM_ROM_MAGIC_PTR 4
......@@ -151,7 +152,6 @@ bool amdgpu_atom_parse_data_header(struct atom_context *ctx, int index, uint16_t
uint8_t *frev, uint8_t *crev, uint16_t *data_start);
bool amdgpu_atom_parse_cmd_header(struct atom_context *ctx, int index,
uint8_t *frev, uint8_t *crev);
int amdgpu_atom_allocate_fb_scratch(struct atom_context *ctx);
#include "atom-types.h"
#include "atombios.h"
#include "ObjectID.h"
......
此差异已折叠。
......@@ -295,6 +295,13 @@ struct ci_power_info {
bool fan_is_controlled_by_smc;
u32 t_min;
u32 fan_ctrl_default_mode;
/* power profile */
struct amd_pp_profile gfx_power_profile;
struct amd_pp_profile compute_power_profile;
struct amd_pp_profile default_gfx_power_profile;
struct amd_pp_profile default_compute_power_profile;
enum amd_pp_profile_type current_power_profile;
};
#define CISLANDS_VOLTAGE_CONTROL_NONE 0x0
......
......@@ -1212,6 +1212,11 @@ static int cik_asic_reset(struct amdgpu_device *adev)
return r;
}
static u32 cik_get_config_memsize(struct amdgpu_device *adev)
{
return RREG32(mmCONFIG_MEMSIZE);
}
static int cik_set_uvd_clock(struct amdgpu_device *adev, u32 clock,
u32 cntl_reg, u32 status_reg)
{
......@@ -1641,6 +1646,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
.get_xclk = &cik_get_xclk,
.set_uvd_clocks = &cik_set_uvd_clocks,
.set_vce_clocks = &cik_set_vce_clocks,
.get_config_memsize = &cik_get_config_memsize,
};
static int cik_common_early_init(void *handle)
......@@ -1779,6 +1785,8 @@ static int cik_common_early_init(void *handle)
return -EINVAL;
}
adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
amdgpu_get_pcie_info(adev);
return 0;
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册