提交 bd4a68da 编写于 作者: D Dave Airlie

Merge branch 'drm-next-4.9' of git://people.freedesktop.org/~agd5f/linux into drm-next

More radeon and amdgpu changes for 4.9. Highlights:
- Initial SI support for amdgpu (controlled by a Kconfig option)
- misc ttm cleanups
- runtimepm fixes
- S3/S4 fixes
- power improvements
- lots of code cleanups and optimizations

* 'drm-next-4.9' of git://people.freedesktop.org/~agd5f/linux: (151 commits)
  drm/ttm: remove cpu_address member from ttm_tt
  drm/radeon/radeon_device: remove unused function
  drm/amdgpu: clean function declarations in amdgpu_ttm.c up
  drm/amdgpu: use the new ring ib and dma frame size callbacks (v2)
  drm/amdgpu/vce3: add ring callbacks for ib and dma frame size
  drm/amdgpu/vce2: add ring callbacks for ib and dma frame size
  drm/amdgpu/vce: add common ring callbacks for ib and dma frame size
  drm/amdgpu/uvd6: add ring callbacks for ib and dma frame size
  drm/amdgpu/uvd5: add ring callbacks for ib and dma frame size
  drm/amdgpu/uvd4.2: add ring callbacks for ib and dma frame size
  drm/amdgpu/sdma3: add ring callbacks for ib and dma frame size
  drm/amdgpu/sdma2.4: add ring callbacks for ib and dma frame size
  drm/amdgpu/cik_sdma: add ring callbacks for ib and dma frame size
  drm/amdgpu/si_dma: add ring callbacks for ib and dma frame size
  drm/amdgpu/gfx8: add ring callbacks for ib and dma frame size
  drm/amdgpu/gfx7: add ring callbacks for ib and dma frame size
  drm/amdgpu/gfx6: add ring callbacks for ib and dma frame size
  drm/amdgpu/ring: add an interface to get dma frame and ib size
  drm/amdgpu/sdma3: drop unused functions
  drm/amdgpu/gfx6: drop gds_switch callback
  ...
config DRM_AMDGPU_SI
bool "Enable amdgpu support for SI parts"
depends on DRM_AMDGPU
help
Choose this option if you want to enable experimental support
for SI asics.
config DRM_AMDGPU_CIK
bool "Enable amdgpu support for CIK parts"
depends on DRM_AMDGPU
......
......@@ -30,6 +30,8 @@ amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o \
amdgpu_amdkfd_gfx_v7.o
amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o
amdgpu-y += \
vi.o
......
......@@ -64,6 +64,7 @@
extern int amdgpu_modeset;
extern int amdgpu_vram_limit;
extern int amdgpu_gart_size;
extern int amdgpu_moverate;
extern int amdgpu_benchmarking;
extern int amdgpu_testing;
extern int amdgpu_audio;
......@@ -94,6 +95,7 @@ extern unsigned amdgpu_pg_mask;
extern char *amdgpu_disable_cu;
extern int amdgpu_sclk_deep_sleep_en;
extern char *amdgpu_virtual_display;
extern unsigned amdgpu_pp_feature_mask;
#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
#define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */
......@@ -108,7 +110,7 @@ extern char *amdgpu_virtual_display;
#define AMDGPU_MAX_RINGS 16
#define AMDGPU_MAX_GFX_RINGS 1
#define AMDGPU_MAX_COMPUTE_RINGS 8
#define AMDGPU_MAX_VCE_RINGS 2
#define AMDGPU_MAX_VCE_RINGS 3
/* max number of IP instances */
#define AMDGPU_MAX_SDMA_INSTANCES 2
......@@ -318,6 +320,10 @@ struct amdgpu_ring_funcs {
/* note usage for clock and power gating */
void (*begin_use)(struct amdgpu_ring *ring);
void (*end_use)(struct amdgpu_ring *ring);
void (*emit_switch_buffer) (struct amdgpu_ring *ring);
void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
unsigned (*get_emit_ib_size) (struct amdgpu_ring *ring);
unsigned (*get_dma_frame_size) (struct amdgpu_ring *ring);
};
/*
......@@ -618,6 +624,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, unsigned offset,
int amdgpu_gart_bind(struct amdgpu_device *adev, unsigned offset,
int pages, struct page **pagelist,
dma_addr_t *dma_addr, uint32_t flags);
int amdgpu_ttm_recover_gart(struct amdgpu_device *adev);
/*
* GPU MC structures, functions & helpers
......@@ -963,6 +970,7 @@ struct amdgpu_ctx {
spinlock_t ring_lock;
struct fence **fences;
struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS];
bool preamble_presented;
};
struct amdgpu_ctx_mgr {
......@@ -1222,11 +1230,16 @@ struct amdgpu_cs_parser {
struct fence *fence;
uint64_t bytes_moved_threshold;
uint64_t bytes_moved;
struct amdgpu_bo_list_entry *evictable;
/* user fence */
struct amdgpu_bo_list_entry uf_entry;
};
#define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */
#define AMDGPU_PREAMBLE_IB_PRESENT_FIRST (1 << 1) /* bit set means preamble IB is first presented in belonging context */
#define AMDGPU_HAVE_CTX_SWITCH (1 << 2) /* bit set means context switch occured */
struct amdgpu_job {
struct amd_sched_job base;
struct amdgpu_device *adev;
......@@ -1235,9 +1248,10 @@ struct amdgpu_job {
struct amdgpu_sync sync;
struct amdgpu_ib *ibs;
struct fence *fence; /* the hw fence */
uint32_t preamble_status;
uint32_t num_ibs;
void *owner;
uint64_t ctx;
uint64_t fence_ctx; /* the fence_context this job uses */
bool vm_needs_flush;
unsigned vm_id;
uint64_t vm_pd_addr;
......@@ -1686,6 +1700,7 @@ struct amdgpu_vce {
unsigned harvest_config;
struct amd_sched_entity entity;
uint32_t srbm_soft_reset;
unsigned num_rings;
};
/*
......@@ -1703,6 +1718,10 @@ struct amdgpu_sdma_instance {
struct amdgpu_sdma {
struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES];
#ifdef CONFIG_DRM_AMDGPU_SI
//SI DMA has a difference trap irq number for the second engine
struct amdgpu_irq_src trap_irq_1;
#endif
struct amdgpu_irq_src trap_irq;
struct amdgpu_irq_src illegal_inst_irq;
int num_instances;
......@@ -1819,6 +1838,9 @@ struct amdgpu_asic_funcs {
int (*set_vce_clocks)(struct amdgpu_device *adev, u32 evclk, u32 ecclk);
/* query virtual capabilities */
u32 (*get_virtual_caps)(struct amdgpu_device *adev);
/* static power management */
int (*get_pcie_lanes)(struct amdgpu_device *adev);
void (*set_pcie_lanes)(struct amdgpu_device *adev, int lanes);
};
/*
......@@ -1993,6 +2015,8 @@ struct amdgpu_device {
spinlock_t pcie_idx_lock;
amdgpu_rreg_t pcie_rreg;
amdgpu_wreg_t pcie_wreg;
amdgpu_rreg_t pciep_rreg;
amdgpu_wreg_t pciep_wreg;
/* protects concurrent UVD register access */
spinlock_t uvd_ctx_idx_lock;
amdgpu_rreg_t uvd_ctx_rreg;
......@@ -2033,6 +2057,14 @@ struct amdgpu_device {
atomic64_t num_evictions;
atomic_t gpu_reset_counter;
/* data for buffer migration throttling */
struct {
spinlock_t lock;
s64 last_update_us;
s64 accum_us; /* accumulated microseconds */
u32 log2_max_MBps;
} mm_stats;
/* display */
bool enable_virtual_display;
struct amdgpu_mode_info mode_info;
......@@ -2101,6 +2133,10 @@ struct amdgpu_device {
/* link all shadow bo */
struct list_head shadow_list;
struct mutex shadow_list_lock;
/* link all gtt */
spinlock_t gtt_list_lock;
struct list_head gtt_list;
};
bool amdgpu_device_is_px(struct drm_device *dev);
......@@ -2133,6 +2169,8 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
#define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
#define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg))
#define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v))
#define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg))
#define WREG32_PCIE_PORT(reg, v) adev->pciep_wreg(adev, (reg), (v))
#define RREG32_SMC(reg) adev->smc_rreg(adev, (reg))
#define WREG32_SMC(reg, v) adev->smc_wreg(adev, (reg), (v))
#define RREG32_UVD_CTX(reg) adev->uvd_ctx_rreg(adev, (reg))
......@@ -2223,6 +2261,9 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
#define amdgpu_asic_set_uvd_clocks(adev, v, d) (adev)->asic_funcs->set_uvd_clocks((adev), (v), (d))
#define amdgpu_asic_set_vce_clocks(adev, ev, ec) (adev)->asic_funcs->set_vce_clocks((adev), (ev), (ec))
#define amdgpu_asic_get_virtual_caps(adev) ((adev)->asic_funcs->get_virtual_caps((adev)))
#define amdgpu_get_pcie_lanes(adev) (adev)->asic_funcs->get_pcie_lanes((adev))
#define amdgpu_set_pcie_lanes(adev, l) (adev)->asic_funcs->set_pcie_lanes((adev), (l))
#define amdgpu_asic_get_gpu_clock_counter(adev) (adev)->asic_funcs->get_gpu_clock_counter((adev))
#define amdgpu_asic_read_disabled_bios(adev) (adev)->asic_funcs->read_disabled_bios((adev))
#define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l))
#define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v)))
......@@ -2244,9 +2285,13 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
#define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as))
#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
#define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r))
#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
#define amdgpu_ring_get_emit_ib_size(r) (r)->funcs->get_emit_ib_size((r))
#define amdgpu_ring_get_dma_frame_size(r) (r)->funcs->get_dma_frame_size((r))
#define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev))
#define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv))
#define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev))
......@@ -2402,6 +2447,8 @@ void amdgpu_gtt_location(struct amdgpu_device *adev, struct amdgpu_mc *mc);
void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size);
u64 amdgpu_ttm_get_gtt_mem_size(struct amdgpu_device *adev);
int amdgpu_ttm_global_init(struct amdgpu_device *adev);
int amdgpu_ttm_init(struct amdgpu_device *adev);
void amdgpu_ttm_fini(struct amdgpu_device *adev);
void amdgpu_program_register_sequence(struct amdgpu_device *adev,
const u32 *registers,
const u32 array_size);
......@@ -2434,8 +2481,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
struct drm_file *file_priv);
void amdgpu_driver_preclose_kms(struct drm_device *dev,
struct drm_file *file_priv);
int amdgpu_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon);
int amdgpu_resume_kms(struct drm_device *dev, bool resume, bool fbcon);
int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon);
int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon);
u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe);
int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe);
void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe);
......@@ -2481,6 +2528,7 @@ static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { }
struct amdgpu_bo_va_mapping *
amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
uint64_t addr, struct amdgpu_bo **bo);
int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser);
#include "amdgpu_object.h"
#endif
......@@ -978,6 +978,48 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
return -EINVAL;
switch (crev) {
case 2:
case 3:
case 5:
/* r6xx, r7xx, evergreen, ni, si.
* TODO: add support for asic_type <= CHIP_RV770*/
if (clock_type == COMPUTE_ENGINE_PLL_PARAM) {
args.v3.ulClockParams = cpu_to_le32((clock_type << 24) | clock);
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
dividers->post_div = args.v3.ucPostDiv;
dividers->enable_post_div = (args.v3.ucCntlFlag &
ATOM_PLL_CNTL_FLAG_PLL_POST_DIV_EN) ? true : false;
dividers->enable_dithen = (args.v3.ucCntlFlag &
ATOM_PLL_CNTL_FLAG_FRACTION_DISABLE) ? false : true;
dividers->whole_fb_div = le16_to_cpu(args.v3.ulFbDiv.usFbDiv);
dividers->frac_fb_div = le16_to_cpu(args.v3.ulFbDiv.usFbDivFrac);
dividers->ref_div = args.v3.ucRefDiv;
dividers->vco_mode = (args.v3.ucCntlFlag &
ATOM_PLL_CNTL_FLAG_MPLL_VCO_MODE) ? 1 : 0;
} else {
/* for SI we use ComputeMemoryClockParam for memory plls */
if (adev->asic_type >= CHIP_TAHITI)
return -EINVAL;
args.v5.ulClockParams = cpu_to_le32((clock_type << 24) | clock);
if (strobe_mode)
args.v5.ucInputFlag = ATOM_PLL_INPUT_FLAG_PLL_STROBE_MODE_EN;
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
dividers->post_div = args.v5.ucPostDiv;
dividers->enable_post_div = (args.v5.ucCntlFlag &
ATOM_PLL_CNTL_FLAG_PLL_POST_DIV_EN) ? true : false;
dividers->enable_dithen = (args.v5.ucCntlFlag &
ATOM_PLL_CNTL_FLAG_FRACTION_DISABLE) ? false : true;
dividers->whole_fb_div = le16_to_cpu(args.v5.ulFbDiv.usFbDiv);
dividers->frac_fb_div = le16_to_cpu(args.v5.ulFbDiv.usFbDivFrac);
dividers->ref_div = args.v5.ucRefDiv;
dividers->vco_mode = (args.v5.ucCntlFlag &
ATOM_PLL_CNTL_FLAG_MPLL_VCO_MODE) ? 1 : 0;
}
break;
case 4:
/* fusion */
args.v4.ulClock = cpu_to_le32(clock); /* 10 khz */
......@@ -1122,6 +1164,32 @@ void amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev,
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
}
void amdgpu_atombios_get_default_voltages(struct amdgpu_device *adev,
u16 *vddc, u16 *vddci, u16 *mvdd)
{
struct amdgpu_mode_info *mode_info = &adev->mode_info;
int index = GetIndexIntoMasterTable(DATA, FirmwareInfo);
u8 frev, crev;
u16 data_offset;
union firmware_info *firmware_info;
*vddc = 0;
*vddci = 0;
*mvdd = 0;
if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
&frev, &crev, &data_offset)) {
firmware_info =
(union firmware_info *)(mode_info->atom_context->bios +
data_offset);
*vddc = le16_to_cpu(firmware_info->info_14.usBootUpVDDCVoltage);
if ((frev == 2) && (crev >= 2)) {
*vddci = le16_to_cpu(firmware_info->info_22.usBootUpVDDCIVoltage);
*mvdd = le16_to_cpu(firmware_info->info_22.usBootUpMVDDCVoltage);
}
}
}
union set_voltage {
struct _SET_VOLTAGE_PS_ALLOCATION alloc;
struct _SET_VOLTAGE_PARAMETERS v1;
......@@ -1129,6 +1197,52 @@ union set_voltage {
struct _SET_VOLTAGE_PARAMETERS_V1_3 v3;
};
int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type,
u16 voltage_id, u16 *voltage)
{
union set_voltage args;
int index = GetIndexIntoMasterTable(COMMAND, SetVoltage);
u8 frev, crev;
if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev, &crev))
return -EINVAL;
switch (crev) {
case 1:
return -EINVAL;
case 2:
args.v2.ucVoltageType = SET_VOLTAGE_GET_MAX_VOLTAGE;
args.v2.ucVoltageMode = 0;
args.v2.usVoltageLevel = 0;
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
*voltage = le16_to_cpu(args.v2.usVoltageLevel);
break;
case 3:
args.v3.ucVoltageType = voltage_type;
args.v3.ucVoltageMode = ATOM_GET_VOLTAGE_LEVEL;
args.v3.usVoltageLevel = cpu_to_le16(voltage_id);
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
*voltage = le16_to_cpu(args.v3.usVoltageLevel);
break;
default:
DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
return -EINVAL;
}
return 0;
}
int amdgpu_atombios_get_leakage_vddc_based_on_leakage_idx(struct amdgpu_device *adev,
u16 *voltage,
u16 leakage_idx)
{
return amdgpu_atombios_get_max_vddc(adev, VOLTAGE_TYPE_VDDC, leakage_idx, voltage);
}
void amdgpu_atombios_set_voltage(struct amdgpu_device *adev,
u16 voltage_level,
u8 voltage_type)
......@@ -1349,6 +1463,50 @@ static ATOM_VOLTAGE_OBJECT_V3 *amdgpu_atombios_lookup_voltage_object_v3(ATOM_VOL
return NULL;
}
int amdgpu_atombios_get_svi2_info(struct amdgpu_device *adev,
u8 voltage_type,
u8 *svd_gpio_id, u8 *svc_gpio_id)
{
int index = GetIndexIntoMasterTable(DATA, VoltageObjectInfo);
u8 frev, crev;
u16 data_offset, size;
union voltage_object_info *voltage_info;
union voltage_object *voltage_object = NULL;
if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, &size,
&frev, &crev, &data_offset)) {
voltage_info = (union voltage_object_info *)
(adev->mode_info.atom_context->bios + data_offset);
switch (frev) {
case 3:
switch (crev) {
case 1:
voltage_object = (union voltage_object *)
amdgpu_atombios_lookup_voltage_object_v3(&voltage_info->v3,
voltage_type,
VOLTAGE_OBJ_SVID2);
if (voltage_object) {
*svd_gpio_id = voltage_object->v3.asSVID2Obj.ucSVDGpioId;
*svc_gpio_id = voltage_object->v3.asSVID2Obj.ucSVCGpioId;
} else {
return -EINVAL;
}
break;
default:
DRM_ERROR("unknown voltage object table\n");
return -EINVAL;
}
break;
default:
DRM_ERROR("unknown voltage object table\n");
return -EINVAL;
}
}
return 0;
}
bool
amdgpu_atombios_is_voltage_gpio(struct amdgpu_device *adev,
u8 voltage_type, u8 voltage_mode)
......
......@@ -208,5 +208,19 @@ void amdgpu_atombios_scratch_regs_save(struct amdgpu_device *adev);
void amdgpu_atombios_scratch_regs_restore(struct amdgpu_device *adev);
void amdgpu_atombios_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le);
int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type,
u16 voltage_id, u16 *voltage);
int amdgpu_atombios_get_leakage_vddc_based_on_leakage_idx(struct amdgpu_device *adev,
u16 *voltage,
u16 leakage_idx);
void amdgpu_atombios_get_default_voltages(struct amdgpu_device *adev,
u16 *vddc, u16 *vddci, u16 *mvdd);
int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
u8 clock_type,
u32 clock,
bool strobe_mode,
struct atom_clock_dividers *dividers);
int amdgpu_atombios_get_svi2_info(struct amdgpu_device *adev,
u8 voltage_type,
u8 *svd_gpio_id, u8 *svc_gpio_id);
#endif
......@@ -616,7 +616,7 @@ static int amdgpu_cgs_irq_put(struct cgs_device *cgs_device, unsigned src_id, un
return amdgpu_irq_put(adev, adev->irq.sources[src_id], type);
}
int amdgpu_cgs_set_clockgating_state(struct cgs_device *cgs_device,
static int amdgpu_cgs_set_clockgating_state(struct cgs_device *cgs_device,
enum amd_ip_block_type block_type,
enum amd_clockgating_state state)
{
......@@ -637,7 +637,7 @@ int amdgpu_cgs_set_clockgating_state(struct cgs_device *cgs_device,
return r;
}
int amdgpu_cgs_set_powergating_state(struct cgs_device *cgs_device,
static int amdgpu_cgs_set_powergating_state(struct cgs_device *cgs_device,
enum amd_ip_block_type block_type,
enum amd_powergating_state state)
{
......@@ -848,6 +848,12 @@ static int amdgpu_cgs_query_system_info(struct cgs_device *cgs_device,
case CGS_SYSTEM_INFO_GFX_SE_INFO:
sys_info->value = adev->gfx.config.max_shader_engines;
break;
case CGS_SYSTEM_INFO_PCIE_SUB_SYS_ID:
sys_info->value = adev->pdev->subsystem_device;
break;
case CGS_SYSTEM_INFO_PCIE_SUB_SYS_VENDOR_ID:
sys_info->value = adev->pdev->subsystem_vendor;
break;
default:
return -ENODEV;
}
......
......@@ -91,6 +91,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
uint32_t *offset)
{
struct drm_gem_object *gobj;
unsigned long size;
gobj = drm_gem_object_lookup(p->filp, data->handle);
if (gobj == NULL)
......@@ -101,6 +102,11 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
p->uf_entry.tv.bo = &p->uf_entry.robj->tbo;
p->uf_entry.tv.shared = true;
p->uf_entry.user_pages = NULL;
size = amdgpu_bo_size(p->uf_entry.robj);
if (size != PAGE_SIZE || (data->offset + 8) > size)
return -EINVAL;
*offset = data->offset;
drm_gem_object_unreference_unlocked(gobj);
......@@ -235,56 +241,115 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
return ret;
}
/* Returns how many bytes TTM can move per IB.
/* Convert microseconds to bytes. */
static u64 us_to_bytes(struct amdgpu_device *adev, s64 us)
{
if (us <= 0 || !adev->mm_stats.log2_max_MBps)
return 0;
/* Since accum_us is incremented by a million per second, just
* multiply it by the number of MB/s to get the number of bytes.
*/
return us << adev->mm_stats.log2_max_MBps;
}
static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)
{
if (!adev->mm_stats.log2_max_MBps)
return 0;
return bytes >> adev->mm_stats.log2_max_MBps;
}
/* Returns how many bytes TTM can move right now. If no bytes can be moved,
* it returns 0. If it returns non-zero, it's OK to move at least one buffer,
* which means it can go over the threshold once. If that happens, the driver
* will be in debt and no other buffer migrations can be done until that debt
* is repaid.
*
* This approach allows moving a buffer of any size (it's important to allow
* that).
*
* The currency is simply time in microseconds and it increases as the clock
* ticks. The accumulated microseconds (us) are converted to bytes and
* returned.
*/
static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
{
u64 real_vram_size = adev->mc.real_vram_size;
u64 vram_usage = atomic64_read(&adev->vram_usage);
s64 time_us, increment_us;
u64 max_bytes;
u64 free_vram, total_vram, used_vram;
/* This function is based on the current VRAM usage.
/* Allow a maximum of 200 accumulated ms. This is basically per-IB
* throttling.
*
* - If all of VRAM is free, allow relocating the number of bytes that
* is equal to 1/4 of the size of VRAM for this IB.
* It means that in order to get full max MBps, at least 5 IBs per
* second must be submitted and not more than 200ms apart from each
* other.
*/
const s64 us_upper_bound = 200000;
* - If more than one half of VRAM is occupied, only allow relocating
* 1 MB of data for this IB.
*
* - From 0 to one half of used VRAM, the threshold decreases
* linearly.
* __________________
* 1/4 of -|\ |
* VRAM | \ |
* | \ |
* | \ |
* | \ |
* | \ |
* | \ |
* | \________|1 MB
* |----------------|
* VRAM 0 % 100 %
* used used
*
* Note: It's a threshold, not a limit. The threshold must be crossed
* for buffer relocations to stop, so any buffer of an arbitrary size
* can be moved as long as the threshold isn't crossed before
* the relocation takes place. We don't want to disable buffer
* relocations completely.
if (!adev->mm_stats.log2_max_MBps)
return 0;
total_vram = adev->mc.real_vram_size - adev->vram_pin_size;
used_vram = atomic64_read(&adev->vram_usage);
free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
spin_lock(&adev->mm_stats.lock);
/* Increase the amount of accumulated us. */
time_us = ktime_to_us(ktime_get());
increment_us = time_us - adev->mm_stats.last_update_us;
adev->mm_stats.last_update_us = time_us;
adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us,
us_upper_bound);
/* This prevents the short period of low performance when the VRAM
* usage is low and the driver is in debt or doesn't have enough
* accumulated us to fill VRAM quickly.
*
* The idea is that buffers should be placed in VRAM at creation time
* and TTM should only do a minimum number of relocations during
* command submission. In practice, you need to submit at least
* a dozen IBs to move all buffers to VRAM if they are in GTT.
* The situation can occur in these cases:
* - a lot of VRAM is freed by userspace
* - the presence of a big buffer causes a lot of evictions
* (solution: split buffers into smaller ones)
*
* Also, things can get pretty crazy under memory pressure and actual
* VRAM usage can change a lot, so playing safe even at 50% does
* consistently increase performance.
* If 128 MB or 1/8th of VRAM is free, start filling it now by setting
* accum_us to a positive number.
*/
if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) {
s64 min_us;
/* Be more aggresive on dGPUs. Try to fill a portion of free
* VRAM now.
*/
if (!(adev->flags & AMD_IS_APU))
min_us = bytes_to_us(adev, free_vram / 4);
else
min_us = 0; /* Reset accum_us on APUs. */
adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);
}
/* This returns 0 if the driver is in debt to disallow (optional)
* buffer moves.
*/
max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
u64 half_vram = real_vram_size >> 1;
u64 half_free_vram = vram_usage >= half_vram ? 0 : half_vram - vram_usage;
u64 bytes_moved_threshold = half_free_vram >> 1;
return max(bytes_moved_threshold, 1024*1024ull);
spin_unlock(&adev->mm_stats.lock);
return max_bytes;
}
/* Report how many bytes have really been moved for the last command
* submission. This can result in a debt that can stop buffer migrations
* temporarily.
*/
static void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev,
u64 num_bytes)
{
spin_lock(&adev->mm_stats.lock);
adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
spin_unlock(&adev->mm_stats.lock);
}
static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
......@@ -297,15 +362,10 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
if (bo->pin_count)
return 0;
/* Avoid moving this one if we have moved too many buffers
* for this IB already.
*
* Note that this allows moving at least one buffer of
* any size, because it doesn't take the current "bo"
* into account. We don't want to disallow buffer moves
* completely.
/* Don't move this buffer if we have depleted our allowance
* to move it. Don't move anything if the threshold is zero.
*/
if (p->bytes_moved <= p->bytes_moved_threshold)
if (p->bytes_moved < p->bytes_moved_threshold)
domain = bo->prefered_domains;
else
domain = bo->allowed_domains;
......@@ -317,17 +377,67 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
p->bytes_moved += atomic64_read(&bo->adev->num_bytes_moved) -
initial_bytes_moved;
if (unlikely(r)) {
if (r != -ERESTARTSYS && domain != bo->allowed_domains) {
domain = bo->allowed_domains;
goto retry;
}
if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
domain = bo->allowed_domains;
goto retry;
}
return r;
}
int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
/* Last resort, try to evict something from the current working set */
static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
struct amdgpu_bo_list_entry *lobj)
{
uint32_t domain = lobj->robj->allowed_domains;
int r;
if (!p->evictable)
return false;
for (;&p->evictable->tv.head != &p->validated;
p->evictable = list_prev_entry(p->evictable, tv.head)) {
struct amdgpu_bo_list_entry *candidate = p->evictable;
struct amdgpu_bo *bo = candidate->robj;
u64 initial_bytes_moved;
uint32_t other;
/* If we reached our current BO we can forget it */
if (candidate == lobj)
break;
other = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
/* Check if this BO is in one of the domains we need space for */
if (!(other & domain))
continue;
/* Check if we can move this BO somewhere else */
other = bo->allowed_domains & ~domain;
if (!other)
continue;
/* Good we can try to move this BO somewhere else */
amdgpu_ttm_placement_from_domain(bo, other);
initial_bytes_moved = atomic64_read(&bo->adev->num_bytes_moved);
r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
p->bytes_moved += atomic64_read(&bo->adev->num_bytes_moved) -
initial_bytes_moved;
if (unlikely(r))
break;
p->evictable = list_prev_entry(p->evictable, tv.head);
list_move(&candidate->tv.head, &p->validated);
return true;
}
return false;
}
static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
struct list_head *validated)
{
struct amdgpu_bo_list_entry *lobj;
......@@ -351,9 +461,15 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
binding_userptr = true;
}
r = amdgpu_cs_bo_validate(p, bo);
if (p->evictable == lobj)
p->evictable = NULL;
do {
r = amdgpu_cs_bo_validate(p, bo);
} while (r == -ENOMEM && amdgpu_cs_try_evict(p, lobj));
if (r)
return r;
if (bo->shadow) {
r = amdgpu_cs_bo_validate(p, bo);
if (r)
......@@ -481,6 +597,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev);
p->bytes_moved = 0;
p->evictable = list_last_entry(&p->validated,
struct amdgpu_bo_list_entry,
tv.head);
r = amdgpu_cs_list_validate(p, &duplicates);
if (r) {
......@@ -494,6 +613,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
goto error_validate;
}
amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved);
fpriv->vm.last_eviction_counter =
atomic64_read(&p->adev->num_evictions);
......@@ -524,8 +645,12 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
}
}
if (p->uf_entry.robj)
p->job->uf_addr += amdgpu_bo_gpu_offset(p->uf_entry.robj);
if (!r && p->uf_entry.robj) {
struct amdgpu_bo *uf = p->uf_entry.robj;
r = amdgpu_ttm_bind(uf->tbo.ttm, &uf->tbo.mem);
p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
}
error_validate:
if (r) {
......@@ -735,6 +860,14 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
if (r)
return r;
if (ib->flags & AMDGPU_IB_FLAG_PREAMBLE) {
parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT;
if (!parser->ctx->preamble_presented) {
parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
parser->ctx->preamble_presented = true;
}
}
if (parser->job->ring && parser->job->ring != ring)
return -EINVAL;
......@@ -874,7 +1007,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
}
job->owner = p->filp;
job->ctx = entity->fence_context;
job->fence_ctx = entity->fence_context;
p->fence = fence_get(&job->base.s_fence->finished);
cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);
job->uf_sequence = cs->out.handle;
......@@ -1040,3 +1173,29 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
return NULL;
}
/**
* amdgpu_cs_sysvm_access_required - make BOs accessible by the system VM
*
* @parser: command submission parser context
*
* Helper for UVD/VCE VM emulation, make sure BOs are accessible by the system VM.
*/
int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser)
{
unsigned i;
int r;
if (!parser->bo_list)
return 0;
for (i = 0; i < parser->bo_list->num_entries; i++) {
struct amdgpu_bo *bo = parser->bo_list->array[i].robj;
r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
if (unlikely(r))
return r;
}
return 0;
}
......@@ -41,6 +41,9 @@
#include "atom.h"
#include "amdgpu_atombios.h"
#include "amd_pcie.h"
#ifdef CONFIG_DRM_AMDGPU_SI
#include "si.h"
#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
#include "cik.h"
#endif
......@@ -52,6 +55,11 @@ static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev);
static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev);
static const char *amdgpu_asic_name[] = {
"TAHITI",
"PITCAIRN",
"VERDE",
"OLAND",
"HAINAN",
"BONAIRE",
"KAVERI",
"KABINI",
......@@ -1027,7 +1035,7 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero
/* don't suspend or resume card normally */
dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
amdgpu_resume_kms(dev, true, true);
amdgpu_device_resume(dev, true, true);
dev->pdev->d3_delay = d3_delay;
......@@ -1037,7 +1045,7 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero
printk(KERN_INFO "amdgpu: switched off\n");
drm_kms_helper_poll_disable(dev);
dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
amdgpu_suspend_kms(dev, true, true);
amdgpu_device_suspend(dev, true, true);
dev->switch_power_state = DRM_SWITCH_POWER_OFF;
}
}
......@@ -1231,6 +1239,18 @@ static int amdgpu_early_init(struct amdgpu_device *adev)
if (r)
return r;
break;
#ifdef CONFIG_DRM_AMDGPU_SI
case CHIP_VERDE:
case CHIP_TAHITI:
case CHIP_PITCAIRN:
case CHIP_OLAND:
case CHIP_HAINAN:
adev->family = AMDGPU_FAMILY_SI;
r = si_set_ip_blocks(adev);
if (r)
return r;
break;
#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_BONAIRE:
case CHIP_HAWAII:
......@@ -1347,6 +1367,9 @@ static int amdgpu_late_init(struct amdgpu_device *adev)
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_block_status[i].valid)
continue;
if (adev->ip_blocks[i].type == AMD_IP_BLOCK_TYPE_UVD ||
adev->ip_blocks[i].type == AMD_IP_BLOCK_TYPE_VCE)
continue;
/* enable clockgating to save power */
r = adev->ip_blocks[i].funcs->set_clockgating_state((void *)adev,
AMD_CG_STATE_GATE);
......@@ -1490,6 +1513,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
{
int r, i;
bool runtime = false;
u32 max_MBps;
adev->shutdown = false;
adev->dev = &pdev->dev;
......@@ -1513,6 +1537,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->smc_wreg = &amdgpu_invalid_wreg;
adev->pcie_rreg = &amdgpu_invalid_rreg;
adev->pcie_wreg = &amdgpu_invalid_wreg;
adev->pciep_rreg = &amdgpu_invalid_rreg;
adev->pciep_wreg = &amdgpu_invalid_wreg;
adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
adev->didt_rreg = &amdgpu_invalid_rreg;
......@@ -1549,12 +1575,22 @@ int amdgpu_device_init(struct amdgpu_device *adev,
spin_lock_init(&adev->didt_idx_lock);
spin_lock_init(&adev->gc_cac_idx_lock);
spin_lock_init(&adev->audio_endpt_idx_lock);
spin_lock_init(&adev->mm_stats.lock);
INIT_LIST_HEAD(&adev->shadow_list);
mutex_init(&adev->shadow_list_lock);
adev->rmmio_base = pci_resource_start(adev->pdev, 5);
adev->rmmio_size = pci_resource_len(adev->pdev, 5);
INIT_LIST_HEAD(&adev->gtt_list);
spin_lock_init(&adev->gtt_list_lock);
if (adev->asic_type >= CHIP_BONAIRE) {
adev->rmmio_base = pci_resource_start(adev->pdev, 5);
adev->rmmio_size = pci_resource_len(adev->pdev, 5);
} else {
adev->rmmio_base = pci_resource_start(adev->pdev, 2);
adev->rmmio_size = pci_resource_len(adev->pdev, 2);
}
adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
if (adev->rmmio == NULL) {
return -ENOMEM;
......@@ -1562,8 +1598,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
/* doorbell bar mapping */
amdgpu_doorbell_init(adev);
if (adev->asic_type >= CHIP_BONAIRE)
/* doorbell bar mapping */
amdgpu_doorbell_init(adev);
/* io port mapping */
for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
......@@ -1660,6 +1697,14 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->accel_working = true;
/* Initialize the buffer migration limit. */
if (amdgpu_moverate >= 0)
max_MBps = amdgpu_moverate;
else
max_MBps = 8; /* Allow 8 MB/s. */
/* Get a log2 for easy divisions. */
adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
amdgpu_fbdev_init(adev);
r = amdgpu_ib_pool_init(adev);
......@@ -1764,7 +1809,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
adev->rio_mem = NULL;
iounmap(adev->rmmio);
adev->rmmio = NULL;
amdgpu_doorbell_fini(adev);
if (adev->asic_type >= CHIP_BONAIRE)
amdgpu_doorbell_fini(adev);
amdgpu_debugfs_regs_cleanup(adev);
amdgpu_debugfs_remove_files(adev);
}
......@@ -1774,7 +1820,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
* Suspend & resume.
*/
/**
* amdgpu_suspend_kms - initiate device suspend
* amdgpu_device_suspend - initiate device suspend
*
* @pdev: drm dev pointer
* @state: suspend state
......@@ -1783,7 +1829,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
* Returns 0 for success or an error on failure.
* Called at driver suspend.
*/
int amdgpu_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon)
int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
{
struct amdgpu_device *adev;
struct drm_crtc *crtc;
......@@ -1796,7 +1842,8 @@ int amdgpu_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon)
adev = dev->dev_private;
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF ||
dev->switch_power_state == DRM_SWITCH_POWER_DYNAMIC_OFF)
return 0;
drm_kms_helper_poll_disable(dev);
......@@ -1851,6 +1898,10 @@ int amdgpu_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon)
/* Shut down the device */
pci_disable_device(dev->pdev);
pci_set_power_state(dev->pdev, PCI_D3hot);
} else {
r = amdgpu_asic_reset(adev);
if (r)
DRM_ERROR("amdgpu asic reset failed\n");
}
if (fbcon) {
......@@ -1862,7 +1913,7 @@ int amdgpu_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon)
}
/**
* amdgpu_resume_kms - initiate device resume
* amdgpu_device_resume - initiate device resume
*
* @pdev: drm dev pointer
*
......@@ -1870,32 +1921,37 @@ int amdgpu_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon)
* Returns 0 for success or an error on failure.
* Called at driver resume.
*/
int amdgpu_resume_kms(struct drm_device *dev, bool resume, bool fbcon)
int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
{
struct drm_connector *connector;
struct amdgpu_device *adev = dev->dev_private;
struct drm_crtc *crtc;
int r;
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF ||
dev->switch_power_state == DRM_SWITCH_POWER_DYNAMIC_OFF)
return 0;
if (fbcon) {
if (fbcon)
console_lock();
}
if (resume) {
pci_set_power_state(dev->pdev, PCI_D0);
pci_restore_state(dev->pdev);
if (pci_enable_device(dev->pdev)) {
r = pci_enable_device(dev->pdev);
if (r) {
if (fbcon)
console_unlock();
return -1;
return r;
}
}
/* post card */
if (!amdgpu_card_posted(adev))
amdgpu_atom_asic_init(adev->mode_info.atom_context);
if (!amdgpu_card_posted(adev) || !resume) {
r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
if (r)
DRM_ERROR("amdgpu asic init failed\n");
}
r = amdgpu_resume(adev);
if (r)
......@@ -2163,6 +2219,11 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
}
if (!r) {
amdgpu_irq_gpu_reset_resume_helper(adev);
if (need_full_reset && amdgpu_need_backup(adev)) {
r = amdgpu_ttm_recover_gart(adev);
if (r)
DRM_ERROR("gart recovery failed!!!\n");
}
r = amdgpu_ib_ring_tests(adev);
if (r) {
dev_err(adev->dev, "ib ring test failed (%d).\n", r);
......@@ -2600,7 +2661,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
while (size) {
uint32_t value;
value = RREG32_SMC(*pos >> 2);
value = RREG32_SMC(*pos);
r = put_user(value, (uint32_t *)buf);
if (r)
return r;
......@@ -2631,7 +2692,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
if (r)
return r;
WREG32_SMC(*pos >> 2, value);
WREG32_SMC(*pos, value);
result += 4;
buf += 4;
......
......@@ -55,13 +55,15 @@
* - 3.3.0 - Add VM support for UVD on supported hardware.
* - 3.4.0 - Add AMDGPU_INFO_NUM_EVICTIONS.
* - 3.5.0 - Add support for new UVD_NO_OP register.
* - 3.6.0 - kmd involves use CONTEXT_CONTROL in ring buffer.
*/
#define KMS_DRIVER_MAJOR 3
#define KMS_DRIVER_MINOR 5
#define KMS_DRIVER_MINOR 6
#define KMS_DRIVER_PATCHLEVEL 0
int amdgpu_vram_limit = 0;
int amdgpu_gart_size = -1; /* auto */
int amdgpu_moverate = -1; /* auto */
int amdgpu_benchmarking = 0;
int amdgpu_testing = 0;
int amdgpu_audio = -1;
......@@ -93,6 +95,7 @@ unsigned amdgpu_cg_mask = 0xffffffff;
unsigned amdgpu_pg_mask = 0xffffffff;
char *amdgpu_disable_cu = NULL;
char *amdgpu_virtual_display = NULL;
unsigned amdgpu_pp_feature_mask = 0xffffffff;
MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
......@@ -100,6 +103,9 @@ module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
MODULE_PARM_DESC(gartsize, "Size of PCIE/IGP gart to setup in megabytes (32, 64, etc., -1 = auto)");
module_param_named(gartsize, amdgpu_gart_size, int, 0600);
MODULE_PARM_DESC(moverate, "Maximum buffer migration rate in MB/s. (32, 64, etc., -1=auto, 0=1=disabled)");
module_param_named(moverate, amdgpu_moverate, int, 0600);
MODULE_PARM_DESC(benchmark, "Run benchmark");
module_param_named(benchmark, amdgpu_benchmarking, int, 0444);
......@@ -172,6 +178,9 @@ module_param_named(powerplay, amdgpu_powerplay, int, 0444);
MODULE_PARM_DESC(powercontainment, "Power Containment (1 = enable (default), 0 = disable)");
module_param_named(powercontainment, amdgpu_powercontainment, int, 0444);
MODULE_PARM_DESC(ppfeaturemask, "all power features enabled (default))");
module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, int, 0444);
#endif
MODULE_PARM_DESC(sclkdeepsleep, "SCLK Deep Sleep (1 = enable (default), 0 = disable)");
......@@ -196,6 +205,80 @@ MODULE_PARM_DESC(virtual_display, "Enable virtual display feature (the virtual_d
module_param_named(virtual_display, amdgpu_virtual_display, charp, 0444);
static const struct pci_device_id pciidlist[] = {
#ifdef CONFIG_DRM_AMDGPU_SI
{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
{0x1002, 0x6784, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
{0x1002, 0x6788, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
{0x1002, 0x678A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
{0x1002, 0x6790, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
{0x1002, 0x6791, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
{0x1002, 0x6792, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
{0x1002, 0x6798, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
{0x1002, 0x6799, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
{0x1002, 0x679A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
{0x1002, 0x679B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
{0x1002, 0x679E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
{0x1002, 0x679F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
{0x1002, 0x6800, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN|AMD_IS_MOBILITY},
{0x1002, 0x6801, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN|AMD_IS_MOBILITY},
{0x1002, 0x6802, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN|AMD_IS_MOBILITY},
{0x1002, 0x6806, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
{0x1002, 0x6808, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
{0x1002, 0x6809, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
{0x1002, 0x6810, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
{0x1002, 0x6811, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
{0x1002, 0x6816, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
{0x1002, 0x6817, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
{0x1002, 0x6818, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
{0x1002, 0x6819, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
{0x1002, 0x6600, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
{0x1002, 0x6601, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
{0x1002, 0x6602, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
{0x1002, 0x6603, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
{0x1002, 0x6604, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
{0x1002, 0x6605, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
{0x1002, 0x6606, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
{0x1002, 0x6607, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
{0x1002, 0x6608, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND},
{0x1002, 0x6610, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND},
{0x1002, 0x6611, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND},
{0x1002, 0x6613, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND},
{0x1002, 0x6617, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
{0x1002, 0x6620, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
{0x1002, 0x6621, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
{0x1002, 0x6623, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
{0x1002, 0x6631, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND},
{0x1002, 0x6820, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
{0x1002, 0x6821, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
{0x1002, 0x6822, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
{0x1002, 0x6823, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
{0x1002, 0x6824, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
{0x1002, 0x6825, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
{0x1002, 0x6826, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
{0x1002, 0x6827, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
{0x1002, 0x6828, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
{0x1002, 0x6829, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
{0x1002, 0x682A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
{0x1002, 0x682B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
{0x1002, 0x682C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
{0x1002, 0x682D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
{0x1002, 0x682F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
{0x1002, 0x6830, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
{0x1002, 0x6831, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
{0x1002, 0x6835, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
{0x1002, 0x6837, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
{0x1002, 0x6838, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
{0x1002, 0x6839, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
{0x1002, 0x683B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
{0x1002, 0x683D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
{0x1002, 0x683F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
{0x1002, 0x6660, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
{0x1002, 0x6663, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
{0x1002, 0x6664, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
{0x1002, 0x6665, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
{0x1002, 0x6667, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
{0x1002, 0x666F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
/* Kaveri */
{0x1002, 0x1304, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
......@@ -393,32 +476,72 @@ amdgpu_pci_remove(struct pci_dev *pdev)
drm_put_dev(dev);
}
static void
amdgpu_pci_shutdown(struct pci_dev *pdev)
{
struct drm_device *dev = pci_get_drvdata(pdev);
struct amdgpu_device *adev = dev->dev_private;
/* if we are running in a VM, make sure the device
* torn down properly on reboot/shutdown
*/
if (adev->virtualization.is_virtual)
amdgpu_pci_remove(pdev);
}
static int amdgpu_pmops_suspend(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct drm_device *drm_dev = pci_get_drvdata(pdev);
return amdgpu_suspend_kms(drm_dev, true, true);
return amdgpu_device_suspend(drm_dev, true, true);
}
static int amdgpu_pmops_resume(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct drm_device *drm_dev = pci_get_drvdata(pdev);
return amdgpu_resume_kms(drm_dev, true, true);
/* GPU comes up enabled by the bios on resume */
if (amdgpu_device_is_px(drm_dev)) {
pm_runtime_disable(dev);
pm_runtime_set_active(dev);
pm_runtime_enable(dev);
}
return amdgpu_device_resume(drm_dev, true, true);
}
static int amdgpu_pmops_freeze(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct drm_device *drm_dev = pci_get_drvdata(pdev);
return amdgpu_suspend_kms(drm_dev, false, true);
return amdgpu_device_suspend(drm_dev, false, true);
}
static int amdgpu_pmops_thaw(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct drm_device *drm_dev = pci_get_drvdata(pdev);
return amdgpu_device_resume(drm_dev, false, true);
}
static int amdgpu_pmops_poweroff(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct drm_device *drm_dev = pci_get_drvdata(pdev);
return amdgpu_device_suspend(drm_dev, true, true);
}
static int amdgpu_pmops_restore(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct drm_device *drm_dev = pci_get_drvdata(pdev);
return amdgpu_resume_kms(drm_dev, false, true);
return amdgpu_device_resume(drm_dev, false, true);
}
static int amdgpu_pmops_runtime_suspend(struct device *dev)
......@@ -436,7 +559,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
drm_kms_helper_poll_disable(drm_dev);
vga_switcheroo_set_dynamic_switch(pdev, VGA_SWITCHEROO_OFF);
ret = amdgpu_suspend_kms(drm_dev, false, false);
ret = amdgpu_device_suspend(drm_dev, false, false);
pci_save_state(pdev);
pci_disable_device(pdev);
pci_ignore_hotplug(pdev);
......@@ -469,7 +592,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
return ret;
pci_set_master(pdev);
ret = amdgpu_resume_kms(drm_dev, false, false);
ret = amdgpu_device_resume(drm_dev, false, false);
drm_kms_helper_poll_enable(drm_dev);
vga_switcheroo_set_dynamic_switch(pdev, VGA_SWITCHEROO_ON);
drm_dev->switch_power_state = DRM_SWITCH_POWER_ON;
......@@ -523,8 +646,8 @@ static const struct dev_pm_ops amdgpu_pm_ops = {
.resume = amdgpu_pmops_resume,
.freeze = amdgpu_pmops_freeze,
.thaw = amdgpu_pmops_thaw,
.poweroff = amdgpu_pmops_freeze,
.restore = amdgpu_pmops_resume,
.poweroff = amdgpu_pmops_poweroff,
.restore = amdgpu_pmops_restore,
.runtime_suspend = amdgpu_pmops_runtime_suspend,
.runtime_resume = amdgpu_pmops_runtime_resume,
.runtime_idle = amdgpu_pmops_runtime_idle,
......@@ -606,6 +729,7 @@ static struct pci_driver amdgpu_kms_pci_driver = {
.id_table = pciidlist,
.probe = amdgpu_pci_probe,
.remove = amdgpu_pci_remove,
.shutdown = amdgpu_pci_shutdown,
.driver.pm = &amdgpu_pm_ops,
};
......
......@@ -25,6 +25,7 @@
*/
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/pm_runtime.h>
#include <drm/drmP.h>
#include <drm/drm_crtc.h>
......@@ -47,8 +48,35 @@ struct amdgpu_fbdev {
struct amdgpu_device *adev;
};
static int
amdgpufb_open(struct fb_info *info, int user)
{
struct amdgpu_fbdev *rfbdev = info->par;
struct amdgpu_device *adev = rfbdev->adev;
int ret = pm_runtime_get_sync(adev->ddev->dev);
if (ret < 0 && ret != -EACCES) {
pm_runtime_mark_last_busy(adev->ddev->dev);
pm_runtime_put_autosuspend(adev->ddev->dev);
return ret;
}
return 0;
}
static int
amdgpufb_release(struct fb_info *info, int user)
{
struct amdgpu_fbdev *rfbdev = info->par;
struct amdgpu_device *adev = rfbdev->adev;
pm_runtime_mark_last_busy(adev->ddev->dev);
pm_runtime_put_autosuspend(adev->ddev->dev);
return 0;
}
static struct fb_ops amdgpufb_ops = {
.owner = THIS_MODULE,
.fb_open = amdgpufb_open,
.fb_release = amdgpufb_release,
.fb_check_var = drm_fb_helper_check_var,
.fb_set_par = drm_fb_helper_set_par,
.fb_fillrect = drm_fb_helper_cfb_fillrect,
......
......@@ -124,7 +124,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
bool skip_preamble, need_ctx_switch;
unsigned patch_offset = ~0;
struct amdgpu_vm *vm;
uint64_t ctx;
uint64_t fence_ctx;
uint32_t status = 0, alloc_size;
unsigned i;
int r = 0;
......@@ -135,10 +136,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
/* ring tests don't use a job */
if (job) {
vm = job->vm;
ctx = job->ctx;
fence_ctx = job->fence_ctx;
} else {
vm = NULL;
ctx = 0;
fence_ctx = 0;
}
if (!ring->ready) {
......@@ -151,7 +152,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
return -EINVAL;
}
r = amdgpu_ring_alloc(ring, 256 * num_ibs);
alloc_size = amdgpu_ring_get_dma_frame_size(ring) +
num_ibs * amdgpu_ring_get_emit_ib_size(ring);
r = amdgpu_ring_alloc(ring, alloc_size);
if (r) {
dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
return r;
......@@ -174,13 +178,22 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
/* always set cond_exec_polling to CONTINUE */
*ring->cond_exe_cpu_addr = 1;
skip_preamble = ring->current_ctx == ctx;
need_ctx_switch = ring->current_ctx != ctx;
skip_preamble = ring->current_ctx == fence_ctx;
need_ctx_switch = ring->current_ctx != fence_ctx;
if (job && ring->funcs->emit_cntxcntl) {
if (need_ctx_switch)
status |= AMDGPU_HAVE_CTX_SWITCH;
status |= job->preamble_status;
amdgpu_ring_emit_cntxcntl(ring, status);
}
for (i = 0; i < num_ibs; ++i) {
ib = &ibs[i];
/* drop preamble IBs if we don't have a context switch */
if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble)
if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
skip_preamble &&
!(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST))
continue;
amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,
......@@ -209,7 +222,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
amdgpu_ring_patch_cond_exec(ring, patch_offset);
ring->current_ctx = ctx;
ring->current_ctx = fence_ctx;
if (ring->funcs->emit_switch_buffer)
amdgpu_ring_emit_switch_buffer(ring);
amdgpu_ring_commit(ring);
return 0;
}
......
......@@ -119,8 +119,6 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size,
*/
void amdgpu_ih_ring_fini(struct amdgpu_device *adev)
{
int r;
if (adev->irq.ih.use_bus_addr) {
if (adev->irq.ih.ring) {
/* add 8 bytes for the rptr/wptr shadows and
......@@ -132,17 +130,9 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev)
adev->irq.ih.ring = NULL;
}
} else {
if (adev->irq.ih.ring_obj) {
r = amdgpu_bo_reserve(adev->irq.ih.ring_obj, false);
if (likely(r == 0)) {
amdgpu_bo_kunmap(adev->irq.ih.ring_obj);
amdgpu_bo_unpin(adev->irq.ih.ring_obj);
amdgpu_bo_unreserve(adev->irq.ih.ring_obj);
}
amdgpu_bo_unref(&adev->irq.ih.ring_obj);
adev->irq.ih.ring = NULL;
adev->irq.ih.ring_obj = NULL;
}
amdgpu_bo_free_kernel(&adev->irq.ih.ring_obj,
&adev->irq.ih.gpu_addr,
(void **)&adev->irq.ih.ring);
amdgpu_wb_free(adev, adev->irq.ih.wptr_offs);
amdgpu_wb_free(adev, adev->irq.ih.rptr_offs);
}
......
......@@ -91,7 +91,7 @@ void amdgpu_job_free_resources(struct amdgpu_job *job)
amdgpu_ib_free(job->adev, &job->ibs[i], f);
}
void amdgpu_job_free_cb(struct amd_sched_job *s_job)
static void amdgpu_job_free_cb(struct amd_sched_job *s_job)
{
struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base);
......@@ -124,7 +124,7 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
return r;
job->owner = owner;
job->ctx = entity->fence_context;
job->fence_ctx = entity->fence_context;
*f = fence_get(&job->base.s_fence->finished);
amdgpu_job_free_resources(job);
amd_sched_entity_push_job(&job->base);
......
......@@ -296,7 +296,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
break;
case AMDGPU_HW_IP_VCE:
type = AMD_IP_BLOCK_TYPE_VCE;
for (i = 0; i < AMDGPU_MAX_VCE_RINGS; i++)
for (i = 0; i < adev->vce.num_rings; i++)
ring_mask |= ((adev->vce.ring[i].ready ? 1 : 0) << i);
ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
ib_size_alignment = 1;
......@@ -542,12 +542,16 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
return r;
fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL);
if (unlikely(!fpriv))
return -ENOMEM;
if (unlikely(!fpriv)) {
r = -ENOMEM;
goto out_suspend;
}
r = amdgpu_vm_init(adev, &fpriv->vm);
if (r)
goto error_free;
if (r) {
kfree(fpriv);
goto out_suspend;
}
mutex_init(&fpriv->bo_list_lock);
idr_init(&fpriv->bo_list_handles);
......@@ -556,12 +560,9 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
file_priv->driver_priv = fpriv;
out_suspend:
pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
return 0;
error_free:
kfree(fpriv);
return r;
}
......@@ -600,6 +601,9 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
kfree(fpriv);
file_priv->driver_priv = NULL;
pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
}
/**
......@@ -614,6 +618,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
void amdgpu_driver_preclose_kms(struct drm_device *dev,
struct drm_file *file_priv)
{
pm_runtime_get_sync(dev->dev);
}
/*
......
......@@ -38,8 +38,6 @@
#include "amdgpu_trace.h"
int amdgpu_ttm_init(struct amdgpu_device *adev);
void amdgpu_ttm_fini(struct amdgpu_device *adev);
static u64 amdgpu_get_vis_part_size(struct amdgpu_device *adev,
struct ttm_mem_reg *mem)
......@@ -287,6 +285,35 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
return r;
}
/**
* amdgpu_bo_free_kernel - free BO for kernel use
*
* @bo: amdgpu BO to free
*
* unmaps and unpin a BO for kernel internal use.
*/
void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
void **cpu_addr)
{
if (*bo == NULL)
return;
if (likely(amdgpu_bo_reserve(*bo, false) == 0)) {
if (cpu_addr)
amdgpu_bo_kunmap(*bo);
amdgpu_bo_unpin(*bo);
amdgpu_bo_unreserve(*bo);
}
amdgpu_bo_unref(bo);
if (gpu_addr)
*gpu_addr = 0;
if (cpu_addr)
*cpu_addr = NULL;
}
int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
unsigned long size, int byte_align,
bool kernel, u32 domain, u64 flags,
......@@ -646,6 +673,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
dev_err(bo->adev->dev, "%p pin failed\n", bo);
goto error;
}
r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
if (unlikely(r)) {
dev_err(bo->adev->dev, "%p bind failed\n", bo);
goto error;
}
bo->pin_count = 1;
if (gpu_addr != NULL)
......@@ -692,7 +724,7 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo)
bo->adev->vram_pin_size -= amdgpu_bo_size(bo);
if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
bo->adev->invisible_pin_size -= amdgpu_bo_size(bo);
} else {
} else if (bo->tbo.mem.mem_type == TTM_PL_TT) {
bo->adev->gart_pin_size -= amdgpu_bo_size(bo);
}
......@@ -918,8 +950,11 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct fence *fence,
u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
{
WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);
WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT &&
!amdgpu_ttm_is_bound(bo->tbo.ttm));
WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&
!bo->pin_count);
WARN_ON_ONCE(bo->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET);
return bo->tbo.offset;
}
......@@ -31,6 +31,8 @@
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
#define AMDGPU_BO_INVALID_OFFSET LONG_MAX
/**
* amdgpu_mem_type_to_domain - return domain corresponding to mem_type
* @mem_type: ttm memory type
......@@ -128,6 +130,8 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
unsigned long size, int align,
u32 domain, struct amdgpu_bo **bo_ptr,
u64 *gpu_addr, void **cpu_addr);
void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
void **cpu_addr);
int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr);
void amdgpu_bo_kunmap(struct amdgpu_bo *bo);
struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo);
......
......@@ -25,6 +25,7 @@
#include "amdgpu.h"
#include "atom.h"
#include "atombios_encoders.h"
#include "amdgpu_pll.h"
#include <asm/div64.h>
#include <linux/gcd.h>
......
......@@ -30,6 +30,7 @@
#include "amdgpu_pm.h"
#include <drm/amdgpu_drm.h>
#include "amdgpu_powerplay.h"
#include "si_dpm.h"
#include "cik_dpm.h"
#include "vi_dpm.h"
......@@ -52,10 +53,6 @@ static int amdgpu_powerplay_init(struct amdgpu_device *adev)
pp_init->chip_family = adev->family;
pp_init->chip_id = adev->asic_type;
pp_init->device = amdgpu_cgs_create_device(adev);
pp_init->rev_id = adev->pdev->revision;
pp_init->sub_sys_id = adev->pdev->subsystem_device;
pp_init->sub_vendor_id = adev->pdev->subsystem_vendor;
ret = amd_powerplay_init(pp_init, amd_pp);
kfree(pp_init);
#endif
......@@ -63,6 +60,15 @@ static int amdgpu_powerplay_init(struct amdgpu_device *adev)
amd_pp->pp_handle = (void *)adev;
switch (adev->asic_type) {
#ifdef CONFIG_DRM_AMDGPU_SI
case CHIP_TAHITI:
case CHIP_PITCAIRN:
case CHIP_VERDE:
case CHIP_OLAND:
case CHIP_HAINAN:
amd_pp->ip_funcs = &si_dpm_ip_funcs;
break;
#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_BONAIRE:
case CHIP_HAWAII:
......
......@@ -252,28 +252,17 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
*/
void amdgpu_ring_fini(struct amdgpu_ring *ring)
{
int r;
struct amdgpu_bo *ring_obj;
ring_obj = ring->ring_obj;
ring->ready = false;
ring->ring = NULL;
ring->ring_obj = NULL;
amdgpu_wb_free(ring->adev, ring->cond_exe_offs);
amdgpu_wb_free(ring->adev, ring->fence_offs);
amdgpu_wb_free(ring->adev, ring->rptr_offs);
amdgpu_wb_free(ring->adev, ring->wptr_offs);
if (ring_obj) {
r = amdgpu_bo_reserve(ring_obj, false);
if (likely(r == 0)) {
amdgpu_bo_kunmap(ring_obj);
amdgpu_bo_unpin(ring_obj);
amdgpu_bo_unreserve(ring_obj);
}
amdgpu_bo_unref(&ring_obj);
}
amdgpu_bo_free_kernel(&ring->ring_obj,
&ring->gpu_addr,
(void **)&ring->ring);
amdgpu_debugfs_ring_fini(ring);
}
......
......@@ -89,10 +89,10 @@ int amdgpu_ttm_global_init(struct amdgpu_device *adev)
global_ref->init = &amdgpu_ttm_mem_global_init;
global_ref->release = &amdgpu_ttm_mem_global_release;
r = drm_global_item_ref(global_ref);
if (r != 0) {
if (r) {
DRM_ERROR("Failed setting up TTM memory accounting "
"subsystem.\n");
return r;
goto error_mem;
}
adev->mman.bo_global_ref.mem_glob =
......@@ -103,26 +103,30 @@ int amdgpu_ttm_global_init(struct amdgpu_device *adev)
global_ref->init = &ttm_bo_global_init;
global_ref->release = &ttm_bo_global_release;
r = drm_global_item_ref(global_ref);
if (r != 0) {
if (r) {
DRM_ERROR("Failed setting up TTM BO subsystem.\n");
drm_global_item_unref(&adev->mman.mem_global_ref);
return r;
goto error_bo;
}
ring = adev->mman.buffer_funcs_ring;
rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL];
r = amd_sched_entity_init(&ring->sched, &adev->mman.entity,
rq, amdgpu_sched_jobs);
if (r != 0) {
if (r) {
DRM_ERROR("Failed setting up TTM BO move run queue.\n");
drm_global_item_unref(&adev->mman.mem_global_ref);
drm_global_item_unref(&adev->mman.bo_global_ref.ref);
return r;
goto error_entity;
}
adev->mman.mem_global_referenced = true;
return 0;
error_entity:
drm_global_item_unref(&adev->mman.bo_global_ref.ref);
error_bo:
drm_global_item_unref(&adev->mman.mem_global_ref);
error_mem:
return r;
}
static void amdgpu_ttm_global_fini(struct amdgpu_device *adev)
......@@ -197,6 +201,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
.lpfn = 0,
.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM
};
unsigned i;
if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) {
placement->placement = &placements;
......@@ -208,10 +213,25 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
rbo = container_of(bo, struct amdgpu_bo, tbo);
switch (bo->mem.mem_type) {
case TTM_PL_VRAM:
if (rbo->adev->mman.buffer_funcs_ring->ready == false)
if (rbo->adev->mman.buffer_funcs_ring->ready == false) {
amdgpu_ttm_placement_from_domain(rbo, AMDGPU_GEM_DOMAIN_CPU);
else
} else {
amdgpu_ttm_placement_from_domain(rbo, AMDGPU_GEM_DOMAIN_GTT);
for (i = 0; i < rbo->placement.num_placement; ++i) {
if (!(rbo->placements[i].flags &
TTM_PL_FLAG_TT))
continue;
if (rbo->placements[i].lpfn)
continue;
/* set an upper limit to force directly
* allocating address space for the BO.
*/
rbo->placements[i].lpfn =
rbo->adev->mc.gtt_size >> PAGE_SHIFT;
}
}
break;
case TTM_PL_TT:
default:
......@@ -256,8 +276,12 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
new_start = new_mem->start << PAGE_SHIFT;
switch (old_mem->mem_type) {
case TTM_PL_VRAM:
case TTM_PL_TT:
r = amdgpu_ttm_bind(bo->ttm, old_mem);
if (r)
return r;
case TTM_PL_VRAM:
old_start += bo->bdev->man[old_mem->mem_type].gpu_offset;
break;
default:
......@@ -265,8 +289,12 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
return -EINVAL;
}
switch (new_mem->mem_type) {
case TTM_PL_VRAM:
case TTM_PL_TT:
r = amdgpu_ttm_bind(bo->ttm, new_mem);
if (r)
return r;
case TTM_PL_VRAM:
new_start += bo->bdev->man[new_mem->mem_type].gpu_offset;
break;
default:
......@@ -311,7 +339,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo,
placement.num_busy_placement = 1;
placement.busy_placement = &placements;
placements.fpfn = 0;
placements.lpfn = 0;
placements.lpfn = adev->mc.gtt_size >> PAGE_SHIFT;
placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
r = ttm_bo_mem_space(bo, &placement, &tmp_mem,
interruptible, no_wait_gpu);
......@@ -358,7 +386,7 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo,
placement.num_busy_placement = 1;
placement.busy_placement = &placements;
placements.fpfn = 0;
placements.lpfn = 0;
placements.lpfn = adev->mc.gtt_size >> PAGE_SHIFT;
placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
r = ttm_bo_mem_space(bo, &placement, &tmp_mem,
interruptible, no_wait_gpu);
......@@ -520,6 +548,7 @@ struct amdgpu_ttm_tt {
spinlock_t guptasklock;
struct list_head guptasks;
atomic_t mmu_invalidations;
struct list_head list;
};
int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
......@@ -637,7 +666,6 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
struct ttm_mem_reg *bo_mem)
{
struct amdgpu_ttm_tt *gtt = (void*)ttm;
uint32_t flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
int r;
if (gtt->userptr) {
......@@ -647,7 +675,7 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
return r;
}
}
gtt->offset = (unsigned long)(bo_mem->start << PAGE_SHIFT);
gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
if (!ttm->num_pages) {
WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n",
ttm->num_pages, bo_mem, ttm);
......@@ -658,14 +686,62 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
bo_mem->mem_type == AMDGPU_PL_OA)
return -EINVAL;
return 0;
}
bool amdgpu_ttm_is_bound(struct ttm_tt *ttm)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
return gtt && !list_empty(&gtt->list);
}
int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
uint32_t flags;
int r;
if (!ttm || amdgpu_ttm_is_bound(ttm))
return 0;
flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
ttm->pages, gtt->ttm.dma_address, flags);
if (r) {
DRM_ERROR("failed to bind %lu pages at 0x%08X\n",
ttm->num_pages, (unsigned)gtt->offset);
DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
ttm->num_pages, gtt->offset);
return r;
}
spin_lock(&gtt->adev->gtt_list_lock);
list_add_tail(&gtt->list, &gtt->adev->gtt_list);
spin_unlock(&gtt->adev->gtt_list_lock);
return 0;
}
int amdgpu_ttm_recover_gart(struct amdgpu_device *adev)
{
struct amdgpu_ttm_tt *gtt, *tmp;
struct ttm_mem_reg bo_mem;
uint32_t flags;
int r;
bo_mem.mem_type = TTM_PL_TT;
spin_lock(&adev->gtt_list_lock);
list_for_each_entry_safe(gtt, tmp, &adev->gtt_list, list) {
flags = amdgpu_ttm_tt_pte_flags(gtt->adev, &gtt->ttm.ttm, &bo_mem);
r = amdgpu_gart_bind(adev, gtt->offset, gtt->ttm.ttm.num_pages,
gtt->ttm.ttm.pages, gtt->ttm.dma_address,
flags);
if (r) {
spin_unlock(&adev->gtt_list_lock);
DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
gtt->ttm.ttm.num_pages, gtt->offset);
return r;
}
}
spin_unlock(&adev->gtt_list_lock);
return 0;
}
......@@ -673,6 +749,9 @@ static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
if (!amdgpu_ttm_is_bound(ttm))
return 0;
/* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
if (gtt->adev->gart.ready)
amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages);
......@@ -680,6 +759,10 @@ static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
if (gtt->userptr)
amdgpu_ttm_tt_unpin_userptr(ttm);
spin_lock(&gtt->adev->gtt_list_lock);
list_del_init(&gtt->list);
spin_unlock(&gtt->adev->gtt_list_lock);
return 0;
}
......@@ -716,6 +799,7 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_bo_device *bdev,
kfree(gtt);
return NULL;
}
INIT_LIST_HEAD(&gtt->list);
return &gtt->ttm.ttm;
}
......
......@@ -26,13 +26,13 @@
#include "gpu_scheduler.h"
#define AMDGPU_PL_GDS TTM_PL_PRIV0
#define AMDGPU_PL_GWS TTM_PL_PRIV1
#define AMDGPU_PL_OA TTM_PL_PRIV2
#define AMDGPU_PL_GDS (TTM_PL_PRIV + 0)
#define AMDGPU_PL_GWS (TTM_PL_PRIV + 1)
#define AMDGPU_PL_OA (TTM_PL_PRIV + 2)
#define AMDGPU_PL_FLAG_GDS TTM_PL_FLAG_PRIV0
#define AMDGPU_PL_FLAG_GWS TTM_PL_FLAG_PRIV1
#define AMDGPU_PL_FLAG_OA TTM_PL_FLAG_PRIV2
#define AMDGPU_PL_FLAG_GDS (TTM_PL_FLAG_PRIV << 0)
#define AMDGPU_PL_FLAG_GWS (TTM_PL_FLAG_PRIV << 1)
#define AMDGPU_PL_FLAG_OA (TTM_PL_FLAG_PRIV << 2)
#define AMDGPU_TTM_LRU_SIZE 20
......@@ -77,4 +77,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
struct fence **fence);
int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem);
#endif
......@@ -247,35 +247,28 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
const struct common_firmware_header *header = NULL;
err = amdgpu_bo_create(adev, adev->firmware.fw_size, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, bo);
AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, bo);
if (err) {
dev_err(adev->dev, "(%d) Firmware buffer allocate failed\n", err);
err = -ENOMEM;
goto failed;
}
err = amdgpu_bo_reserve(*bo, false);
if (err) {
amdgpu_bo_unref(bo);
dev_err(adev->dev, "(%d) Firmware buffer reserve failed\n", err);
goto failed;
goto failed_reserve;
}
err = amdgpu_bo_pin(*bo, AMDGPU_GEM_DOMAIN_GTT, &fw_mc_addr);
if (err) {
amdgpu_bo_unreserve(*bo);
amdgpu_bo_unref(bo);
dev_err(adev->dev, "(%d) Firmware buffer pin failed\n", err);
goto failed;
goto failed_pin;
}
err = amdgpu_bo_kmap(*bo, &fw_buf_ptr);
if (err) {
dev_err(adev->dev, "(%d) Firmware buffer kmap failed\n", err);
amdgpu_bo_unpin(*bo);
amdgpu_bo_unreserve(*bo);
amdgpu_bo_unref(bo);
goto failed;
goto failed_kmap;
}
amdgpu_bo_unreserve(*bo);
......@@ -290,10 +283,16 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
fw_offset += ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
}
}
return 0;
failed_kmap:
amdgpu_bo_unpin(*bo);
failed_pin:
amdgpu_bo_unreserve(*bo);
failed_reserve:
amdgpu_bo_unref(bo);
failed:
if (err)
adev->firmware.smu_load = false;
adev->firmware.smu_load = false;
return err;
}
......
......@@ -249,22 +249,13 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
{
int r;
kfree(adev->uvd.saved_bo);
amd_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity);
if (adev->uvd.vcpu_bo) {
r = amdgpu_bo_reserve(adev->uvd.vcpu_bo, false);
if (!r) {
amdgpu_bo_kunmap(adev->uvd.vcpu_bo);
amdgpu_bo_unpin(adev->uvd.vcpu_bo);
amdgpu_bo_unreserve(adev->uvd.vcpu_bo);
}
amdgpu_bo_unref(&adev->uvd.vcpu_bo);
}
amdgpu_bo_free_kernel(&adev->uvd.vcpu_bo,
&adev->uvd.gpu_addr,
(void **)&adev->uvd.cpu_addr);
amdgpu_ring_fini(&adev->uvd.ring);
......@@ -891,6 +882,10 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
return -EINVAL;
}
r = amdgpu_cs_sysvm_access_required(parser);
if (r)
return r;
ctx.parser = parser;
ctx.buf_sizes = buf_sizes;
ctx.ib_idx = ib_idx;
......
......@@ -634,7 +634,11 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
uint32_t allocated = 0;
uint32_t tmp, handle = 0;
uint32_t *size = &tmp;
int i, r = 0, idx = 0;
int i, r, idx = 0;
r = amdgpu_cs_sysvm_access_required(p);
if (r)
return r;
while (idx < ib->length_dw) {
uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
......@@ -799,6 +803,18 @@ void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
amdgpu_ring_write(ring, VCE_CMD_END);
}
unsigned amdgpu_vce_ring_get_emit_ib_size(struct amdgpu_ring *ring)
{
return
4; /* amdgpu_vce_ring_emit_ib */
}
unsigned amdgpu_vce_ring_get_dma_frame_size(struct amdgpu_ring *ring)
{
return
6; /* amdgpu_vce_ring_emit_fence x1 no user fence */
}
/**
* amdgpu_vce_ring_test_ring - test if VCE ring is working
*
......@@ -850,8 +866,8 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
struct fence *fence = NULL;
long r;
/* skip vce ring1 ib test for now, since it's not reliable */
if (ring == &ring->adev->vce.ring[1])
/* skip vce ring1/2 ib test for now, since it's not reliable */
if (ring != &ring->adev->vce.ring[0])
return 0;
r = amdgpu_vce_get_create_msg(ring, 1, NULL);
......
......@@ -42,5 +42,7 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring);
int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout);
void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring);
void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring);
unsigned amdgpu_vce_ring_get_emit_ib_size(struct amdgpu_ring *ring);
unsigned amdgpu_vce_ring_get_dma_frame_size(struct amdgpu_ring *ring);
#endif
......@@ -1163,7 +1163,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
}
flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem);
gtt_flags = (adev == bo_va->bo->adev) ? flags : 0;
gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) &&
adev == bo_va->bo->adev) ? flags : 0;
spin_lock(&vm->status_lock);
if (!list_empty(&bo_va->vm_status))
......
......@@ -497,7 +497,13 @@ void amdgpu_atombios_crtc_set_disp_eng_pll(struct amdgpu_device *adev,
* SetPixelClock provides the dividers
*/
args.v6.ulDispEngClkFreq = cpu_to_le32(dispclk);
args.v6.ucPpll = ATOM_EXT_PLL1;
if (adev->asic_type == CHIP_TAHITI ||
adev->asic_type == CHIP_PITCAIRN ||
adev->asic_type == CHIP_VERDE ||
adev->asic_type == CHIP_OLAND)
args.v6.ucPpll = ATOM_PPLL0;
else
args.v6.ucPpll = ATOM_EXT_PLL1;
break;
default:
DRM_ERROR("Unknown table version %d %d\n", frev, crev);
......
......@@ -27,6 +27,7 @@
#include "amdgpu.h"
#include "atom.h"
#include "amdgpu_atombios.h"
#include "atombios_i2c.h"
#define TARGET_HW_I2C_CLOCK 50
......
......@@ -5396,7 +5396,7 @@ static void ci_dpm_disable(struct amdgpu_device *adev)
amdgpu_irq_put(adev, &adev->pm.dpm.thermal.irq,
AMDGPU_THERMAL_IRQ_HIGH_TO_LOW);
ci_dpm_powergate_uvd(adev, false);
ci_dpm_powergate_uvd(adev, true);
if (!amdgpu_ci_is_smc_running(adev))
return;
......@@ -6036,7 +6036,7 @@ static int ci_dpm_init(struct amdgpu_device *adev)
pi->caps_dynamic_ac_timing = true;
pi->uvd_power_gated = false;
pi->uvd_power_gated = true;
/* make sure dc limits are valid */
if ((adev->pm.dpm.dyn_state.max_clock_voltage_on_dc.sclk == 0) ||
......@@ -6179,8 +6179,6 @@ static int ci_dpm_late_init(void *handle)
if (ret)
return ret;
ci_dpm_powergate_uvd(adev, true);
return 0;
}
......
......@@ -847,6 +847,22 @@ static void cik_sdma_ring_emit_vm_flush(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */
}
static unsigned cik_sdma_ring_get_emit_ib_size(struct amdgpu_ring *ring)
{
return
7 + 4; /* cik_sdma_ring_emit_ib */
}
static unsigned cik_sdma_ring_get_dma_frame_size(struct amdgpu_ring *ring)
{
return
6 + /* cik_sdma_ring_emit_hdp_flush */
3 + /* cik_sdma_ring_emit_hdp_invalidate */
6 + /* cik_sdma_ring_emit_pipeline_sync */
12 + /* cik_sdma_ring_emit_vm_flush */
9 + 9 + 9; /* cik_sdma_ring_emit_fence x3 for user fence, vm fence */
}
static void cik_enable_sdma_mgcg(struct amdgpu_device *adev,
bool enable)
{
......@@ -1220,6 +1236,8 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = {
.test_ib = cik_sdma_ring_test_ib,
.insert_nop = cik_sdma_ring_insert_nop,
.pad_ib = cik_sdma_ring_pad_ib,
.get_emit_ib_size = cik_sdma_ring_get_emit_ib_size,
.get_dma_frame_size = cik_sdma_ring_get_dma_frame_size,
};
static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev)
......
......@@ -44,6 +44,7 @@
static void cz_dpm_powergate_uvd(struct amdgpu_device *adev, bool gate);
static void cz_dpm_powergate_vce(struct amdgpu_device *adev, bool gate);
static void cz_dpm_fini(struct amdgpu_device *adev);
static struct cz_ps *cz_get_ps(struct amdgpu_ps *rps)
{
......@@ -350,6 +351,8 @@ static int cz_parse_power_table(struct amdgpu_device *adev)
ps = kzalloc(sizeof(struct cz_ps), GFP_KERNEL);
if (ps == NULL) {
for (j = 0; j < i; j++)
kfree(adev->pm.dpm.ps[j].ps_priv);
kfree(adev->pm.dpm.ps);
return -ENOMEM;
}
......@@ -409,11 +412,11 @@ static int cz_dpm_init(struct amdgpu_device *adev)
ret = amdgpu_get_platform_caps(adev);
if (ret)
return ret;
goto err;
ret = amdgpu_parse_extended_power_table(adev);
if (ret)
return ret;
goto err;
pi->sram_end = SMC_RAM_END;
......@@ -467,23 +470,26 @@ static int cz_dpm_init(struct amdgpu_device *adev)
ret = cz_parse_sys_info_table(adev);
if (ret)
return ret;
goto err;
cz_patch_voltage_values(adev);
cz_construct_boot_state(adev);
ret = cz_parse_power_table(adev);
if (ret)
return ret;
goto err;
ret = cz_process_firmware_header(adev);
if (ret)
return ret;
goto err;
pi->dpm_enabled = true;
pi->uvd_dynamic_pg = false;
return 0;
err:
cz_dpm_fini(adev);
return ret;
}
static void cz_dpm_fini(struct amdgpu_device *adev)
......@@ -672,17 +678,12 @@ static void cz_reset_ap_mask(struct amdgpu_device *adev)
struct cz_power_info *pi = cz_get_pi(adev);
pi->active_process_mask = 0;
}
static int cz_dpm_download_pptable_from_smu(struct amdgpu_device *adev,
void **table)
{
int ret = 0;
ret = cz_smu_download_pptable(adev, table);
return ret;
return cz_smu_download_pptable(adev, table);
}
static int cz_dpm_upload_pptable_to_smu(struct amdgpu_device *adev)
......@@ -822,9 +823,9 @@ static void cz_init_sclk_limit(struct amdgpu_device *adev)
pi->sclk_dpm.hard_min_clk = 0;
cz_send_msg_to_smc(adev, PPSMC_MSG_GetMaxSclkLevel);
level = cz_get_argument(adev);
if (level < table->count)
if (level < table->count) {
clock = table->entries[level].clk;
else {
} else {
DRM_ERROR("Invalid SLCK Voltage Dependency table entry.\n");
clock = table->entries[table->count - 1].clk;
}
......@@ -850,9 +851,9 @@ static void cz_init_uvd_limit(struct amdgpu_device *adev)
pi->uvd_dpm.hard_min_clk = 0;
cz_send_msg_to_smc(adev, PPSMC_MSG_GetMaxUvdLevel);
level = cz_get_argument(adev);
if (level < table->count)
if (level < table->count) {
clock = table->entries[level].vclk;
else {
} else {
DRM_ERROR("Invalid UVD Voltage Dependency table entry.\n");
clock = table->entries[table->count - 1].vclk;
}
......@@ -878,9 +879,9 @@ static void cz_init_vce_limit(struct amdgpu_device *adev)
pi->vce_dpm.hard_min_clk = table->entries[0].ecclk;
cz_send_msg_to_smc(adev, PPSMC_MSG_GetMaxEclkLevel);
level = cz_get_argument(adev);
if (level < table->count)
if (level < table->count) {
clock = table->entries[level].ecclk;
else {
} else {
/* future BIOS would fix this error */
DRM_ERROR("Invalid VCE Voltage Dependency table entry.\n");
clock = table->entries[table->count - 1].ecclk;
......@@ -907,9 +908,9 @@ static void cz_init_acp_limit(struct amdgpu_device *adev)
pi->acp_dpm.hard_min_clk = 0;
cz_send_msg_to_smc(adev, PPSMC_MSG_GetMaxAclkLevel);
level = cz_get_argument(adev);
if (level < table->count)
if (level < table->count) {
clock = table->entries[level].clk;
else {
} else {
DRM_ERROR("Invalid ACP Voltage Dependency table entry.\n");
clock = table->entries[table->count - 1].clk;
}
......@@ -934,7 +935,6 @@ static void cz_init_sclk_threshold(struct amdgpu_device *adev)
struct cz_power_info *pi = cz_get_pi(adev);
pi->low_sclk_interrupt_threshold = 0;
}
static void cz_dpm_setup_asic(struct amdgpu_device *adev)
......@@ -1207,7 +1207,7 @@ static int cz_enable_didt(struct amdgpu_device *adev, bool enable)
int ret;
if (pi->caps_sq_ramping || pi->caps_db_ramping ||
pi->caps_td_ramping || pi->caps_tcp_ramping) {
pi->caps_td_ramping || pi->caps_tcp_ramping) {
if (adev->gfx.gfx_current_status != AMDGPU_GFX_SAFE_MODE) {
ret = cz_disable_cgpg(adev);
if (ret) {
......@@ -1281,7 +1281,7 @@ static void cz_apply_state_adjust_rules(struct amdgpu_device *adev,
ps->force_high = false;
ps->need_dfs_bypass = true;
pi->video_start = new_rps->dclk || new_rps->vclk ||
new_rps->evclk || new_rps->ecclk;
new_rps->evclk || new_rps->ecclk;
if ((new_rps->class & ATOM_PPLIB_CLASSIFICATION_UI_MASK) ==
ATOM_PPLIB_CLASSIFICATION_UI_BATTERY)
......@@ -1339,7 +1339,6 @@ static int cz_dpm_enable(struct amdgpu_device *adev)
}
cz_reset_acp_boot_level(adev);
cz_update_current_ps(adev, adev->pm.dpm.boot_ps);
return 0;
......@@ -1669,7 +1668,6 @@ static void cz_dpm_post_set_power_state(struct amdgpu_device *adev)
struct amdgpu_ps *ps = &pi->requested_rps;
cz_update_current_ps(adev, ps);
}
static int cz_dpm_force_highest(struct amdgpu_device *adev)
......@@ -2201,7 +2199,6 @@ static int cz_update_vce_dpm(struct amdgpu_device *adev)
/* Stable Pstate is enabled and we need to set the VCE DPM to highest level */
if (pi->caps_stable_power_state) {
pi->vce_dpm.hard_min_clk = table->entries[table->count-1].ecclk;
} else { /* non-stable p-state cases. without vce.Arbiter.EcclkHardMin */
/* leave it as set by user */
/*pi->vce_dpm.hard_min_clk = table->entries[0].ecclk;*/
......
......@@ -29,6 +29,8 @@
#include "cz_smumgr.h"
#include "smu_ucode_xfer_cz.h"
#include "amdgpu_ucode.h"
#include "cz_dpm.h"
#include "vi_dpm.h"
#include "smu/smu_8_0_d.h"
#include "smu/smu_8_0_sh_mask.h"
......@@ -48,7 +50,7 @@ static struct cz_smu_private_data *cz_smu_get_priv(struct amdgpu_device *adev)
return priv;
}
int cz_send_msg_to_smc_async(struct amdgpu_device *adev, u16 msg)
static int cz_send_msg_to_smc_async(struct amdgpu_device *adev, u16 msg)
{
int i;
u32 content = 0, tmp;
......@@ -140,7 +142,7 @@ int cz_read_smc_sram_dword(struct amdgpu_device *adev, u32 smc_address,
return 0;
}
int cz_write_smc_sram_dword(struct amdgpu_device *adev, u32 smc_address,
static int cz_write_smc_sram_dword(struct amdgpu_device *adev, u32 smc_address,
u32 value, u32 limit)
{
int ret;
......
此差异已折叠。
/*
* Copyright 2015 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __DCE_V6_0_H__
#define __DCE_V6_0_H__
extern const struct amd_ip_funcs dce_v6_0_ip_funcs;
#endif
此差异已折叠。
/*
* Copyright 2015 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __GFX_V6_0_H__
#define __GFX_V6_0_H__
extern const struct amd_ip_funcs gfx_v6_0_ip_funcs;
#endif
......@@ -2096,6 +2096,25 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, control);
}
static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
{
uint32_t dw2 = 0;
dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
if (flags & AMDGPU_HAVE_CTX_SWITCH) {
/* set load_global_config & load_global_uconfig */
dw2 |= 0x8001;
/* set load_cs_sh_regs */
dw2 |= 0x01000000;
/* set load_per_context_state & load_gfx_sh_regs */
dw2 |= 0x10002;
}
amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
amdgpu_ring_write(ring, dw2);
amdgpu_ring_write(ring, 0);
}
/**
* gfx_v7_0_ring_test_ib - basic ring IB test
*
......@@ -2443,7 +2462,7 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
return 0;
}
static u32 gfx_v7_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
static u32 gfx_v7_0_ring_get_rptr(struct amdgpu_ring *ring)
{
return ring->adev->wb.wb[ring->rptr_offs];
}
......@@ -2463,11 +2482,6 @@ static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
(void)RREG32(mmCP_RB0_WPTR);
}
static u32 gfx_v7_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
{
return ring->adev->wb.wb[ring->rptr_offs];
}
static u32 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
{
/* XXX check if swapping is necessary on BE */
......@@ -4176,6 +4190,41 @@ static void gfx_v7_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
}
static unsigned gfx_v7_0_ring_get_emit_ib_size_gfx(struct amdgpu_ring *ring)
{
return
4; /* gfx_v7_0_ring_emit_ib_gfx */
}
static unsigned gfx_v7_0_ring_get_dma_frame_size_gfx(struct amdgpu_ring *ring)
{
return
20 + /* gfx_v7_0_ring_emit_gds_switch */
7 + /* gfx_v7_0_ring_emit_hdp_flush */
5 + /* gfx_v7_0_ring_emit_hdp_invalidate */
12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for user fence, vm fence */
7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */
17 + 6 + /* gfx_v7_0_ring_emit_vm_flush */
3; /* gfx_v7_ring_emit_cntxcntl */
}
static unsigned gfx_v7_0_ring_get_emit_ib_size_compute(struct amdgpu_ring *ring)
{
return
4; /* gfx_v7_0_ring_emit_ib_compute */
}
static unsigned gfx_v7_0_ring_get_dma_frame_size_compute(struct amdgpu_ring *ring)
{
return
20 + /* gfx_v7_0_ring_emit_gds_switch */
7 + /* gfx_v7_0_ring_emit_hdp_flush */
5 + /* gfx_v7_0_ring_emit_hdp_invalidate */
7 + /* gfx_v7_0_ring_emit_pipeline_sync */
17 + /* gfx_v7_0_ring_emit_vm_flush */
7 + 7 + 7; /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */
}
static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = {
.get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter,
.select_se_sh = &gfx_v7_0_select_se_sh,
......@@ -4495,9 +4544,9 @@ static int gfx_v7_0_sw_fini(void *handle)
int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
......@@ -4928,7 +4977,7 @@ const struct amd_ip_funcs gfx_v7_0_ip_funcs = {
};
static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
.get_rptr = gfx_v7_0_ring_get_rptr_gfx,
.get_rptr = gfx_v7_0_ring_get_rptr,
.get_wptr = gfx_v7_0_ring_get_wptr_gfx,
.set_wptr = gfx_v7_0_ring_set_wptr_gfx,
.parse_cs = NULL,
......@@ -4943,10 +4992,13 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
.test_ib = gfx_v7_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
.get_emit_ib_size = gfx_v7_0_ring_get_emit_ib_size_gfx,
.get_dma_frame_size = gfx_v7_0_ring_get_dma_frame_size_gfx,
};
static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
.get_rptr = gfx_v7_0_ring_get_rptr_compute,
.get_rptr = gfx_v7_0_ring_get_rptr,
.get_wptr = gfx_v7_0_ring_get_wptr_compute,
.set_wptr = gfx_v7_0_ring_set_wptr_compute,
.parse_cs = NULL,
......@@ -4961,6 +5013,8 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
.test_ib = gfx_v7_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.get_emit_ib_size = gfx_v7_0_ring_get_emit_ib_size_compute,
.get_dma_frame_size = gfx_v7_0_ring_get_dma_frame_size_compute,
};
static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
......
......@@ -2113,9 +2113,9 @@ static int gfx_v8_0_sw_fini(void *handle)
int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
......@@ -3866,7 +3866,7 @@ static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
}
}
void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
{
WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
......@@ -5835,7 +5835,7 @@ static int gfx_v8_0_set_clockgating_state(void *handle,
return 0;
}
static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
{
return ring->adev->wb.wb[ring->rptr_offs];
}
......@@ -5915,12 +5915,6 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
{
u32 header, control = 0;
/* insert SWITCH_BUFFER packet before first IB in the ring frame */
if (ctx_switch) {
amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
amdgpu_ring_write(ring, 0);
}
if (ib->flags & AMDGPU_IB_FLAG_CE)
header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
else
......@@ -5990,14 +5984,6 @@ static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, seq);
amdgpu_ring_write(ring, 0xffffffff);
amdgpu_ring_write(ring, 4); /* poll interval */
if (usepfp) {
/* synce CE with ME to prevent CE fetch CEIB before context switch done */
amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
amdgpu_ring_write(ring, 0);
}
}
static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
......@@ -6005,6 +5991,10 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
{
int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
/* GFX8 emits 128 dw nop to prevent DE do vm_flush before CE finish CEIB */
if (usepfp)
amdgpu_ring_insert_nop(ring, 128);
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
WRITE_DATA_DST_SEL(0)) |
......@@ -6044,18 +6034,11 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
/* sync PFP to ME, otherwise we might get invalid PFP reads */
amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
amdgpu_ring_write(ring, 0x0);
amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
amdgpu_ring_write(ring, 0);
/* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
amdgpu_ring_insert_nop(ring, 128);
}
}
static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
{
return ring->adev->wb.wb[ring->rptr_offs];
}
static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
{
return ring->adev->wb.wb[ring->wptr_offs];
......@@ -6091,6 +6074,77 @@ static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, upper_32_bits(seq));
}
static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
{
amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
amdgpu_ring_write(ring, 0);
}
static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
{
uint32_t dw2 = 0;
dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
if (flags & AMDGPU_HAVE_CTX_SWITCH) {
/* set load_global_config & load_global_uconfig */
dw2 |= 0x8001;
/* set load_cs_sh_regs */
dw2 |= 0x01000000;
/* set load_per_context_state & load_gfx_sh_regs for GFX */
dw2 |= 0x10002;
/* set load_ce_ram if preamble presented */
if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
dw2 |= 0x10000000;
} else {
/* still load_ce_ram if this is the first time preamble presented
* although there is no context switch happens.
*/
if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
dw2 |= 0x10000000;
}
amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
amdgpu_ring_write(ring, dw2);
amdgpu_ring_write(ring, 0);
}
static unsigned gfx_v8_0_ring_get_emit_ib_size_gfx(struct amdgpu_ring *ring)
{
return
4; /* gfx_v8_0_ring_emit_ib_gfx */
}
static unsigned gfx_v8_0_ring_get_dma_frame_size_gfx(struct amdgpu_ring *ring)
{
return
20 + /* gfx_v8_0_ring_emit_gds_switch */
7 + /* gfx_v8_0_ring_emit_hdp_flush */
5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
7 + /* gfx_v8_0_ring_emit_pipeline_sync */
256 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
2 + /* gfx_v8_ring_emit_sb */
3; /* gfx_v8_ring_emit_cntxcntl */
}
static unsigned gfx_v8_0_ring_get_emit_ib_size_compute(struct amdgpu_ring *ring)
{
return
4; /* gfx_v8_0_ring_emit_ib_compute */
}
static unsigned gfx_v8_0_ring_get_dma_frame_size_compute(struct amdgpu_ring *ring)
{
return
20 + /* gfx_v8_0_ring_emit_gds_switch */
7 + /* gfx_v8_0_ring_emit_hdp_flush */
5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7 + /* gfx_v8_0_ring_emit_pipeline_sync */
17 + /* gfx_v8_0_ring_emit_vm_flush */
7 + 7 + 7; /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
}
static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
enum amdgpu_interrupt_state state)
{
......@@ -6257,7 +6311,7 @@ const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
};
static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
.get_rptr = gfx_v8_0_ring_get_rptr_gfx,
.get_rptr = gfx_v8_0_ring_get_rptr,
.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
.parse_cs = NULL,
......@@ -6272,10 +6326,14 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
.test_ib = gfx_v8_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_switch_buffer = gfx_v8_ring_emit_sb,
.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
.get_emit_ib_size = gfx_v8_0_ring_get_emit_ib_size_gfx,
.get_dma_frame_size = gfx_v8_0_ring_get_dma_frame_size_gfx,
};
static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
.get_rptr = gfx_v8_0_ring_get_rptr_compute,
.get_rptr = gfx_v8_0_ring_get_rptr,
.get_wptr = gfx_v8_0_ring_get_wptr_compute,
.set_wptr = gfx_v8_0_ring_set_wptr_compute,
.parse_cs = NULL,
......@@ -6290,6 +6348,8 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
.test_ib = gfx_v8_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.get_emit_ib_size = gfx_v8_0_ring_get_emit_ib_size_compute,
.get_dma_frame_size = gfx_v8_0_ring_get_dma_frame_size_compute,
};
static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
......
此差异已折叠。
/*
* Copyright 2015 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __GMC_V6_0_H__
#define __GMC_V6_0_H__
extern const struct amd_ip_funcs gmc_v6_0_ip_funcs;
#endif
......@@ -121,7 +121,7 @@ static int iceland_copy_bytes_to_smc(struct amdgpu_device *adev,
return result;
}
void iceland_start_smc(struct amdgpu_device *adev)
static void iceland_start_smc(struct amdgpu_device *adev)
{
uint32_t val = RREG32_SMC(ixSMC_SYSCON_RESET_CNTL);
......@@ -129,7 +129,7 @@ void iceland_start_smc(struct amdgpu_device *adev)
WREG32_SMC(ixSMC_SYSCON_RESET_CNTL, val);
}
void iceland_reset_smc(struct amdgpu_device *adev)
static void iceland_reset_smc(struct amdgpu_device *adev)
{
uint32_t val = RREG32_SMC(ixSMC_SYSCON_RESET_CNTL);
......@@ -145,7 +145,7 @@ static int iceland_program_jump_on_start(struct amdgpu_device *adev)
return 0;
}
void iceland_stop_smc_clock(struct amdgpu_device *adev)
static void iceland_stop_smc_clock(struct amdgpu_device *adev)
{
uint32_t val = RREG32_SMC(ixSMC_SYSCON_CLOCK_CNTL_0);
......@@ -153,7 +153,7 @@ void iceland_stop_smc_clock(struct amdgpu_device *adev)
WREG32_SMC(ixSMC_SYSCON_CLOCK_CNTL_0, val);
}
void iceland_start_smc_clock(struct amdgpu_device *adev)
static void iceland_start_smc_clock(struct amdgpu_device *adev)
{
uint32_t val = RREG32_SMC(ixSMC_SYSCON_CLOCK_CNTL_0);
......
/*
* Copyright 2011 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __R600_DPM_H__
#define __R600_DPM_H__
#define R600_ASI_DFLT 10000
#define R600_BSP_DFLT 0x41EB
#define R600_BSU_DFLT 0x2
#define R600_AH_DFLT 5
#define R600_RLP_DFLT 25
#define R600_RMP_DFLT 65
#define R600_LHP_DFLT 40
#define R600_LMP_DFLT 15
#define R600_TD_DFLT 0
#define R600_UTC_DFLT_00 0x24
#define R600_UTC_DFLT_01 0x22
#define R600_UTC_DFLT_02 0x22
#define R600_UTC_DFLT_03 0x22
#define R600_UTC_DFLT_04 0x22
#define R600_UTC_DFLT_05 0x22
#define R600_UTC_DFLT_06 0x22
#define R600_UTC_DFLT_07 0x22
#define R600_UTC_DFLT_08 0x22
#define R600_UTC_DFLT_09 0x22
#define R600_UTC_DFLT_10 0x22
#define R600_UTC_DFLT_11 0x22
#define R600_UTC_DFLT_12 0x22
#define R600_UTC_DFLT_13 0x22
#define R600_UTC_DFLT_14 0x22
#define R600_DTC_DFLT_00 0x24
#define R600_DTC_DFLT_01 0x22
#define R600_DTC_DFLT_02 0x22
#define R600_DTC_DFLT_03 0x22
#define R600_DTC_DFLT_04 0x22
#define R600_DTC_DFLT_05 0x22
#define R600_DTC_DFLT_06 0x22
#define R600_DTC_DFLT_07 0x22
#define R600_DTC_DFLT_08 0x22
#define R600_DTC_DFLT_09 0x22
#define R600_DTC_DFLT_10 0x22
#define R600_DTC_DFLT_11 0x22
#define R600_DTC_DFLT_12 0x22
#define R600_DTC_DFLT_13 0x22
#define R600_DTC_DFLT_14 0x22
#define R600_VRC_DFLT 0x0000C003
#define R600_VOLTAGERESPONSETIME_DFLT 1000
#define R600_BACKBIASRESPONSETIME_DFLT 1000
#define R600_VRU_DFLT 0x3
#define R600_SPLLSTEPTIME_DFLT 0x1000
#define R600_SPLLSTEPUNIT_DFLT 0x3
#define R600_TPU_DFLT 0
#define R600_TPC_DFLT 0x200
#define R600_SSTU_DFLT 0
#define R600_SST_DFLT 0x00C8
#define R600_GICST_DFLT 0x200
#define R600_FCT_DFLT 0x0400
#define R600_FCTU_DFLT 0
#define R600_CTXCGTT3DRPHC_DFLT 0x20
#define R600_CTXCGTT3DRSDC_DFLT 0x40
#define R600_VDDC3DOORPHC_DFLT 0x100
#define R600_VDDC3DOORSDC_DFLT 0x7
#define R600_VDDC3DOORSU_DFLT 0
#define R600_MPLLLOCKTIME_DFLT 100
#define R600_MPLLRESETTIME_DFLT 150
#define R600_VCOSTEPPCT_DFLT 20
#define R600_ENDINGVCOSTEPPCT_DFLT 5
#define R600_REFERENCEDIVIDER_DFLT 4
#define R600_PM_NUMBER_OF_TC 15
#define R600_PM_NUMBER_OF_SCLKS 20
#define R600_PM_NUMBER_OF_MCLKS 4
#define R600_PM_NUMBER_OF_VOLTAGE_LEVELS 4
#define R600_PM_NUMBER_OF_ACTIVITY_LEVELS 3
/* XXX are these ok? */
#define R600_TEMP_RANGE_MIN (90 * 1000)
#define R600_TEMP_RANGE_MAX (120 * 1000)
#define FDO_PWM_MODE_STATIC 1
#define FDO_PWM_MODE_STATIC_RPM 5
enum r600_power_level {
R600_POWER_LEVEL_LOW = 0,
R600_POWER_LEVEL_MEDIUM = 1,
R600_POWER_LEVEL_HIGH = 2,
R600_POWER_LEVEL_CTXSW = 3,
};
enum r600_td {
R600_TD_AUTO,
R600_TD_UP,
R600_TD_DOWN,
};
enum r600_display_watermark {
R600_DISPLAY_WATERMARK_LOW = 0,
R600_DISPLAY_WATERMARK_HIGH = 1,
};
enum r600_display_gap
{
R600_PM_DISPLAY_GAP_VBLANK_OR_WM = 0,
R600_PM_DISPLAY_GAP_VBLANK = 1,
R600_PM_DISPLAY_GAP_WATERMARK = 2,
R600_PM_DISPLAY_GAP_IGNORE = 3,
};
#endif
......@@ -902,6 +902,22 @@ static void sdma_v2_4_ring_emit_vm_flush(struct amdgpu_ring *ring,
SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
}
static unsigned sdma_v2_4_ring_get_emit_ib_size(struct amdgpu_ring *ring)
{
return
7 + 6; /* sdma_v2_4_ring_emit_ib */
}
static unsigned sdma_v2_4_ring_get_dma_frame_size(struct amdgpu_ring *ring)
{
return
6 + /* sdma_v2_4_ring_emit_hdp_flush */
3 + /* sdma_v2_4_ring_emit_hdp_invalidate */
6 + /* sdma_v2_4_ring_emit_pipeline_sync */
12 + /* sdma_v2_4_ring_emit_vm_flush */
10 + 10 + 10; /* sdma_v2_4_ring_emit_fence x3 for user fence, vm fence */
}
static int sdma_v2_4_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
......@@ -1220,6 +1236,8 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = {
.test_ib = sdma_v2_4_ring_test_ib,
.insert_nop = sdma_v2_4_ring_insert_nop,
.pad_ib = sdma_v2_4_ring_pad_ib,
.get_emit_ib_size = sdma_v2_4_ring_get_emit_ib_size,
.get_dma_frame_size = sdma_v2_4_ring_get_dma_frame_size,
};
static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev)
......
......@@ -495,31 +495,6 @@ static void sdma_v3_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
}
unsigned init_cond_exec(struct amdgpu_ring *ring)
{
unsigned ret;
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE));
amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
amdgpu_ring_write(ring, 1);
ret = ring->wptr;/* this is the offset we need patch later */
amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
return ret;
}
void patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
{
unsigned cur;
BUG_ON(ring->ring[offset] != 0x55aa55aa);
cur = ring->wptr - 1;
if (likely(cur > offset))
ring->ring[offset] = cur - offset;
else
ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
}
/**
* sdma_v3_0_gfx_stop - stop the gfx async dma engines
*
......@@ -1129,6 +1104,22 @@ static void sdma_v3_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
}
static unsigned sdma_v3_0_ring_get_emit_ib_size(struct amdgpu_ring *ring)
{
return
7 + 6; /* sdma_v3_0_ring_emit_ib */
}
static unsigned sdma_v3_0_ring_get_dma_frame_size(struct amdgpu_ring *ring)
{
return
6 + /* sdma_v3_0_ring_emit_hdp_flush */
3 + /* sdma_v3_0_ring_emit_hdp_invalidate */
6 + /* sdma_v3_0_ring_emit_pipeline_sync */
12 + /* sdma_v3_0_ring_emit_vm_flush */
10 + 10 + 10; /* sdma_v3_0_ring_emit_fence x3 for user fence, vm fence */
}
static int sdma_v3_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
......@@ -1590,6 +1581,8 @@ static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = {
.test_ib = sdma_v3_0_ring_test_ib,
.insert_nop = sdma_v3_0_ring_insert_nop,
.pad_ib = sdma_v3_0_ring_pad_ib,
.get_emit_ib_size = sdma_v3_0_ring_get_emit_ib_size,
.get_dma_frame_size = sdma_v3_0_ring_get_dma_frame_size,
};
static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev)
......
此差异已折叠。
/*
* Copyright 2015 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __SI_H__
#define __SI_H__
extern const struct amd_ip_funcs si_common_ip_funcs;
void si_srbm_select(struct amdgpu_device *adev,
u32 me, u32 pipe, u32 queue, u32 vmid);
int si_set_ip_blocks(struct amdgpu_device *adev);
#endif
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册