提交 5263925c 编写于 作者: D Dave Airlie

Merge branch 'drm-next-4.6' of git://people.freedesktop.org/~agd5f/linux into drm-next

First radeon and amdgpu pull request for 4.6.  Highlights:
- ACP support for APUs with i2s audio
- CS ioctl optimizations
- GPU scheduler optimizations
- GPUVM optimizations
- Initial GPU reset support (not enabled yet)
- New powerplay sysfs interface for manually selecting clocks
- Powerplay fixes
- Virtualization fixes
- Removal of hw semaphore support
- Lots of other misc fixes and cleanups

* 'drm-next-4.6' of git://people.freedesktop.org/~agd5f/linux: (118 commits)
  drm/amdgpu: Don't call interval_tree_remove in amdgpu_mn_destroy
  drm/amdgpu: Fix race condition in amdgpu_mn_unregister
  drm/amdgpu: cleanup gem init/finit
  drm/amdgpu: rework GEM info printing
  drm/amdgpu: print the GPU offset as well in gem_info
  drm/amdgpu: optionally print the pin count in gem_info as well
  drm/amdgpu: print the BO size only once in amdgpu_gem_info
  drm/amdgpu: print pid as integer
  drm/amdgpu: remove page flip work queue v3
  drm/amdgpu: stop blocking for page filp fences
  drm/amdgpu: stop calling amdgpu_gpu_reset from the flip code
  drm/amdgpu: remove fence reset detection leftovers
  drm/amdgpu: Fix race condition in MMU notifier release
  drm/radeon: Fix WARN_ON if DRM_DP_AUX_CHARDEV is enabled
  drm/amdgpu/vi: move uvd tiling config setup into uvd code
  drm/amdgpu/vi: move sdma tiling config setup into sdma code
  drm/amdgpu/cik: move uvd tiling config setup into uvd code
  drm/amdgpu/cik: move sdma tiling config setup into sdma code
  drm/amdgpu/gfx7: rework gpu_init()
  drm/amdgpu/gfx: clean up harvest configuration (v2)
  ...
......@@ -172,6 +172,8 @@ config DRM_AMDGPU
source "drivers/gpu/drm/amd/amdgpu/Kconfig"
source "drivers/gpu/drm/amd/powerplay/Kconfig"
source "drivers/gpu/drm/amd/acp/Kconfig"
source "drivers/gpu/drm/nouveau/Kconfig"
config DRM_I810
......
menu "ACP Configuration"
config DRM_AMD_ACP
bool "Enable ACP IP support"
default y
select MFD_CORE
select PM_GENERIC_DOMAINS if PM
help
Choose this option to enable ACP IP support for AMD SOCs.
endmenu
#
# Makefile for the ACP, which is a sub-component
# of AMDSOC/AMDGPU drm driver.
# It provides the HW control for ACP related functionalities.
subdir-ccflags-y += -I$(AMDACPPATH)/ -I$(AMDACPPATH)/include
AMD_ACP_FILES := $(AMDACPPATH)/acp_hw.o
/*
* Copyright 2015 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/device.h>
#include <linux/delay.h>
#include <linux/errno.h>
#include "acp_gfx_if.h"
#define ACP_MODE_I2S 0
#define ACP_MODE_AZ 1
#define mmACP_AZALIA_I2S_SELECT 0x51d4
int amd_acp_hw_init(void *cgs_device,
unsigned acp_version_major, unsigned acp_version_minor)
{
unsigned int acp_mode = ACP_MODE_I2S;
if ((acp_version_major == 2) && (acp_version_minor == 2))
acp_mode = cgs_read_register(cgs_device,
mmACP_AZALIA_I2S_SELECT);
if (acp_mode != ACP_MODE_I2S)
return -ENODEV;
return 0;
}
/*
* Copyright 2015 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef _ACP_GFX_IF_H
#define _ACP_GFX_IF_H
#include <linux/types.h>
#include "cgs_linux.h"
#include "cgs_common.h"
int amd_acp_hw_init(void *cgs_device,
unsigned acp_version_major, unsigned acp_version_minor);
#endif /* _ACP_GFX_IF_H */
......@@ -8,7 +8,8 @@ ccflags-y := -Iinclude/drm -I$(FULL_AMD_PATH)/include/asic_reg \
-I$(FULL_AMD_PATH)/include \
-I$(FULL_AMD_PATH)/amdgpu \
-I$(FULL_AMD_PATH)/scheduler \
-I$(FULL_AMD_PATH)/powerplay/inc
-I$(FULL_AMD_PATH)/powerplay/inc \
-I$(FULL_AMD_PATH)/acp/include
amdgpu-y := amdgpu_drv.o
......@@ -20,7 +21,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_fb.o amdgpu_gem.o amdgpu_ring.o \
amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \
amdgpu_pm.o atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
atombios_encoders.o amdgpu_semaphore.o amdgpu_sa.o atombios_i2c.o \
atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o
......@@ -92,7 +93,17 @@ amdgpu-y += amdgpu_cgs.o
amdgpu-y += \
../scheduler/gpu_scheduler.o \
../scheduler/sched_fence.o \
amdgpu_sched.o
amdgpu_job.o
# ACP componet
ifneq ($(CONFIG_DRM_AMD_ACP),)
amdgpu-y += amdgpu_acp.o
AMDACPPATH := ../acp
include $(FULL_AMD_PATH)/acp/Makefile
amdgpu-y += $(AMD_ACP_FILES)
endif
amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
......
此差异已折叠。
/*
* Copyright 2015 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: AMD
*
*/
#include <linux/irqdomain.h>
#include <linux/pm_domain.h>
#include <linux/platform_device.h>
#include <sound/designware_i2s.h>
#include <sound/pcm.h>
#include "amdgpu.h"
#include "atom.h"
#include "amdgpu_acp.h"
#include "acp_gfx_if.h"
#define ACP_TILE_ON_MASK 0x03
#define ACP_TILE_OFF_MASK 0x02
#define ACP_TILE_ON_RETAIN_REG_MASK 0x1f
#define ACP_TILE_OFF_RETAIN_REG_MASK 0x20
#define ACP_TILE_P1_MASK 0x3e
#define ACP_TILE_P2_MASK 0x3d
#define ACP_TILE_DSP0_MASK 0x3b
#define ACP_TILE_DSP1_MASK 0x37
#define ACP_TILE_DSP2_MASK 0x2f
#define ACP_DMA_REGS_END 0x146c0
#define ACP_I2S_PLAY_REGS_START 0x14840
#define ACP_I2S_PLAY_REGS_END 0x148b4
#define ACP_I2S_CAP_REGS_START 0x148b8
#define ACP_I2S_CAP_REGS_END 0x1496c
#define ACP_I2S_COMP1_CAP_REG_OFFSET 0xac
#define ACP_I2S_COMP2_CAP_REG_OFFSET 0xa8
#define ACP_I2S_COMP1_PLAY_REG_OFFSET 0x6c
#define ACP_I2S_COMP2_PLAY_REG_OFFSET 0x68
#define mmACP_PGFSM_RETAIN_REG 0x51c9
#define mmACP_PGFSM_CONFIG_REG 0x51ca
#define mmACP_PGFSM_READ_REG_0 0x51cc
#define mmACP_MEM_SHUT_DOWN_REQ_LO 0x51f8
#define mmACP_MEM_SHUT_DOWN_REQ_HI 0x51f9
#define mmACP_MEM_SHUT_DOWN_STS_LO 0x51fa
#define mmACP_MEM_SHUT_DOWN_STS_HI 0x51fb
#define ACP_TIMEOUT_LOOP 0x000000FF
#define ACP_DEVS 3
#define ACP_SRC_ID 162
enum {
ACP_TILE_P1 = 0,
ACP_TILE_P2,
ACP_TILE_DSP0,
ACP_TILE_DSP1,
ACP_TILE_DSP2,
};
static int acp_sw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
adev->acp.parent = adev->dev;
adev->acp.cgs_device =
amdgpu_cgs_create_device(adev);
if (!adev->acp.cgs_device)
return -EINVAL;
return 0;
}
static int acp_sw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->acp.cgs_device)
amdgpu_cgs_destroy_device(adev->acp.cgs_device);
return 0;
}
/* power off a tile/block within ACP */
static int acp_suspend_tile(void *cgs_dev, int tile)
{
u32 val = 0;
u32 count = 0;
if ((tile < ACP_TILE_P1) || (tile > ACP_TILE_DSP2)) {
pr_err("Invalid ACP tile : %d to suspend\n", tile);
return -1;
}
val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0 + tile);
val &= ACP_TILE_ON_MASK;
if (val == 0x0) {
val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG);
val = val | (1 << tile);
cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val);
cgs_write_register(cgs_dev, mmACP_PGFSM_CONFIG_REG,
0x500 + tile);
count = ACP_TIMEOUT_LOOP;
while (true) {
val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0
+ tile);
val = val & ACP_TILE_ON_MASK;
if (val == ACP_TILE_OFF_MASK)
break;
if (--count == 0) {
pr_err("Timeout reading ACP PGFSM status\n");
return -ETIMEDOUT;
}
udelay(100);
}
val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG);
val |= ACP_TILE_OFF_RETAIN_REG_MASK;
cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val);
}
return 0;
}
/* power on a tile/block within ACP */
static int acp_resume_tile(void *cgs_dev, int tile)
{
u32 val = 0;
u32 count = 0;
if ((tile < ACP_TILE_P1) || (tile > ACP_TILE_DSP2)) {
pr_err("Invalid ACP tile to resume\n");
return -1;
}
val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0 + tile);
val = val & ACP_TILE_ON_MASK;
if (val != 0x0) {
cgs_write_register(cgs_dev, mmACP_PGFSM_CONFIG_REG,
0x600 + tile);
count = ACP_TIMEOUT_LOOP;
while (true) {
val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0
+ tile);
val = val & ACP_TILE_ON_MASK;
if (val == 0x0)
break;
if (--count == 0) {
pr_err("Timeout reading ACP PGFSM status\n");
return -ETIMEDOUT;
}
udelay(100);
}
val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG);
if (tile == ACP_TILE_P1)
val = val & (ACP_TILE_P1_MASK);
else if (tile == ACP_TILE_P2)
val = val & (ACP_TILE_P2_MASK);
cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val);
}
return 0;
}
struct acp_pm_domain {
void *cgs_dev;
struct generic_pm_domain gpd;
};
static int acp_poweroff(struct generic_pm_domain *genpd)
{
int i, ret;
struct acp_pm_domain *apd;
apd = container_of(genpd, struct acp_pm_domain, gpd);
if (apd != NULL) {
/* Donot return abruptly if any of power tile fails to suspend.
* Log it and continue powering off other tile
*/
for (i = 4; i >= 0 ; i--) {
ret = acp_suspend_tile(apd->cgs_dev, ACP_TILE_P1 + i);
if (ret)
pr_err("ACP tile %d tile suspend failed\n", i);
}
}
return 0;
}
static int acp_poweron(struct generic_pm_domain *genpd)
{
int i, ret;
struct acp_pm_domain *apd;
apd = container_of(genpd, struct acp_pm_domain, gpd);
if (apd != NULL) {
for (i = 0; i < 2; i++) {
ret = acp_resume_tile(apd->cgs_dev, ACP_TILE_P1 + i);
if (ret) {
pr_err("ACP tile %d resume failed\n", i);
break;
}
}
/* Disable DSPs which are not going to be used */
for (i = 0; i < 3; i++) {
ret = acp_suspend_tile(apd->cgs_dev, ACP_TILE_DSP0 + i);
/* Continue suspending other DSP, even if one fails */
if (ret)
pr_err("ACP DSP %d suspend failed\n", i);
}
}
return 0;
}
static struct device *get_mfd_cell_dev(const char *device_name, int r)
{
char auto_dev_name[25];
char buf[8];
struct device *dev;
sprintf(buf, ".%d.auto", r);
strcpy(auto_dev_name, device_name);
strcat(auto_dev_name, buf);
dev = bus_find_device_by_name(&platform_bus_type, NULL, auto_dev_name);
dev_info(dev, "device %s added to pm domain\n", auto_dev_name);
return dev;
}
/**
* acp_hw_init - start and test ACP block
*
* @adev: amdgpu_device pointer
*
*/
static int acp_hw_init(void *handle)
{
int r, i;
uint64_t acp_base;
struct device *dev;
struct i2s_platform_data *i2s_pdata;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
const struct amdgpu_ip_block_version *ip_version =
amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_ACP);
if (!ip_version)
return -EINVAL;
r = amd_acp_hw_init(adev->acp.cgs_device,
ip_version->major, ip_version->minor);
/* -ENODEV means board uses AZ rather than ACP */
if (r == -ENODEV)
return 0;
else if (r)
return r;
r = cgs_get_pci_resource(adev->acp.cgs_device, CGS_RESOURCE_TYPE_MMIO,
0x5289, 0, &acp_base);
if (r == -ENODEV)
return 0;
else if (r)
return r;
adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL);
if (adev->acp.acp_genpd == NULL)
return -ENOMEM;
adev->acp.acp_genpd->gpd.name = "ACP_AUDIO";
adev->acp.acp_genpd->gpd.power_off = acp_poweroff;
adev->acp.acp_genpd->gpd.power_on = acp_poweron;
adev->acp.acp_genpd->cgs_dev = adev->acp.cgs_device;
pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false);
adev->acp.acp_cell = kzalloc(sizeof(struct mfd_cell) * ACP_DEVS,
GFP_KERNEL);
if (adev->acp.acp_cell == NULL)
return -ENOMEM;
adev->acp.acp_res = kzalloc(sizeof(struct resource) * 4, GFP_KERNEL);
if (adev->acp.acp_res == NULL) {
kfree(adev->acp.acp_cell);
return -ENOMEM;
}
i2s_pdata = kzalloc(sizeof(struct i2s_platform_data) * 2, GFP_KERNEL);
if (i2s_pdata == NULL) {
kfree(adev->acp.acp_res);
kfree(adev->acp.acp_cell);
return -ENOMEM;
}
i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET;
i2s_pdata[0].cap = DWC_I2S_PLAY;
i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000;
i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_PLAY_REG_OFFSET;
i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_PLAY_REG_OFFSET;
i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
DW_I2S_QUIRK_COMP_PARAM1;
i2s_pdata[1].cap = DWC_I2S_RECORD;
i2s_pdata[1].snd_rates = SNDRV_PCM_RATE_8000_96000;
i2s_pdata[1].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET;
i2s_pdata[1].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET;
adev->acp.acp_res[0].name = "acp2x_dma";
adev->acp.acp_res[0].flags = IORESOURCE_MEM;
adev->acp.acp_res[0].start = acp_base;
adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END;
adev->acp.acp_res[1].name = "acp2x_dw_i2s_play";
adev->acp.acp_res[1].flags = IORESOURCE_MEM;
adev->acp.acp_res[1].start = acp_base + ACP_I2S_PLAY_REGS_START;
adev->acp.acp_res[1].end = acp_base + ACP_I2S_PLAY_REGS_END;
adev->acp.acp_res[2].name = "acp2x_dw_i2s_cap";
adev->acp.acp_res[2].flags = IORESOURCE_MEM;
adev->acp.acp_res[2].start = acp_base + ACP_I2S_CAP_REGS_START;
adev->acp.acp_res[2].end = acp_base + ACP_I2S_CAP_REGS_END;
adev->acp.acp_res[3].name = "acp2x_dma_irq";
adev->acp.acp_res[3].flags = IORESOURCE_IRQ;
adev->acp.acp_res[3].start = amdgpu_irq_create_mapping(adev, 162);
adev->acp.acp_res[3].end = adev->acp.acp_res[3].start;
adev->acp.acp_cell[0].name = "acp_audio_dma";
adev->acp.acp_cell[0].num_resources = 4;
adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0];
adev->acp.acp_cell[1].name = "designware-i2s";
adev->acp.acp_cell[1].num_resources = 1;
adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1];
adev->acp.acp_cell[1].platform_data = &i2s_pdata[0];
adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data);
adev->acp.acp_cell[2].name = "designware-i2s";
adev->acp.acp_cell[2].num_resources = 1;
adev->acp.acp_cell[2].resources = &adev->acp.acp_res[2];
adev->acp.acp_cell[2].platform_data = &i2s_pdata[1];
adev->acp.acp_cell[2].pdata_size = sizeof(struct i2s_platform_data);
r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell,
ACP_DEVS);
if (r)
return r;
for (i = 0; i < ACP_DEVS ; i++) {
dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i);
r = pm_genpd_add_device(&adev->acp.acp_genpd->gpd, dev);
if (r) {
dev_err(dev, "Failed to add dev to genpd\n");
return r;
}
}
return 0;
}
/**
* acp_hw_fini - stop the hardware block
*
* @adev: amdgpu_device pointer
*
*/
static int acp_hw_fini(void *handle)
{
int i, ret;
struct device *dev;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
for (i = 0; i < ACP_DEVS ; i++) {
dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i);
ret = pm_genpd_remove_device(&adev->acp.acp_genpd->gpd, dev);
/* If removal fails, dont giveup and try rest */
if (ret)
dev_err(dev, "remove dev from genpd failed\n");
}
mfd_remove_devices(adev->acp.parent);
kfree(adev->acp.acp_res);
kfree(adev->acp.acp_genpd);
kfree(adev->acp.acp_cell);
return 0;
}
static int acp_suspend(void *handle)
{
return 0;
}
static int acp_resume(void *handle)
{
int i, ret;
struct acp_pm_domain *apd;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* SMU block will power on ACP irrespective of ACP runtime status.
* Power off explicitly based on genpd ACP runtime status so that ACP
* hw and ACP-genpd status are in sync.
* 'suspend_power_off' represents "Power status before system suspend"
*/
if (adev->acp.acp_genpd->gpd.suspend_power_off == true) {
apd = container_of(&adev->acp.acp_genpd->gpd,
struct acp_pm_domain, gpd);
for (i = 4; i >= 0 ; i--) {
ret = acp_suspend_tile(apd->cgs_dev, ACP_TILE_P1 + i);
if (ret)
pr_err("ACP tile %d tile suspend failed\n", i);
}
}
return 0;
}
static int acp_early_init(void *handle)
{
return 0;
}
static bool acp_is_idle(void *handle)
{
return true;
}
static int acp_wait_for_idle(void *handle)
{
return 0;
}
static int acp_soft_reset(void *handle)
{
return 0;
}
static void acp_print_status(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
dev_info(adev->dev, "ACP STATUS\n");
}
static int acp_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
return 0;
}
static int acp_set_powergating_state(void *handle,
enum amd_powergating_state state)
{
return 0;
}
const struct amd_ip_funcs acp_ip_funcs = {
.early_init = acp_early_init,
.late_init = NULL,
.sw_init = acp_sw_init,
.sw_fini = acp_sw_fini,
.hw_init = acp_hw_init,
.hw_fini = acp_hw_fini,
.suspend = acp_suspend,
.resume = acp_resume,
.is_idle = acp_is_idle,
.wait_for_idle = acp_wait_for_idle,
.soft_reset = acp_soft_reset,
.print_status = acp_print_status,
.set_clockgating_state = acp_set_clockgating_state,
.set_powergating_state = acp_set_powergating_state,
};
/*
* Copyright 2015 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: AMD
*
*/
#ifndef __AMDGPU_ACP_H__
#define __AMDGPU_ACP_H__
#include <linux/mfd/core.h>
struct amdgpu_acp {
struct device *parent;
void *cgs_device;
struct amd_acp_private *private;
struct mfd_cell *acp_cell;
struct resource *acp_res;
struct acp_pm_domain *acp_genpd;
};
extern const struct amd_ip_funcs acp_ip_funcs;
#endif /* __AMDGPU_ACP_H__ */
......@@ -1514,6 +1514,19 @@ int amdgpu_atombios_init_mc_reg_table(struct amdgpu_device *adev,
return -EINVAL;
}
bool amdgpu_atombios_has_gpu_virtualization_table(struct amdgpu_device *adev)
{
int index = GetIndexIntoMasterTable(DATA, GPUVirtualizationInfo);
u8 frev, crev;
u16 data_offset, size;
if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, &size,
&frev, &crev, &data_offset))
return true;
return false;
}
void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock)
{
uint32_t bios_6_scratch;
......
......@@ -196,6 +196,8 @@ int amdgpu_atombios_init_mc_reg_table(struct amdgpu_device *adev,
u8 module_index,
struct atom_mc_reg_table *reg_table);
bool amdgpu_atombios_has_gpu_virtualization_table(struct amdgpu_device *adev);
void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock);
void amdgpu_atombios_scratch_regs_init(struct amdgpu_device *adev);
void amdgpu_atombios_scratch_regs_save(struct amdgpu_device *adev);
......
......@@ -32,6 +32,9 @@
#include "amdgpu.h"
#include "amdgpu_trace.h"
#define AMDGPU_BO_LIST_MAX_PRIORITY 32u
#define AMDGPU_BO_LIST_NUM_BUCKETS (AMDGPU_BO_LIST_MAX_PRIORITY + 1)
static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv,
struct amdgpu_bo_list **result,
int *id)
......@@ -90,6 +93,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
bool has_userptr = false;
unsigned i;
int r;
array = drm_malloc_ab(num_entries, sizeof(struct amdgpu_bo_list_entry));
if (!array)
......@@ -99,31 +103,34 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
for (i = 0; i < num_entries; ++i) {
struct amdgpu_bo_list_entry *entry = &array[i];
struct drm_gem_object *gobj;
struct mm_struct *usermm;
gobj = drm_gem_object_lookup(adev->ddev, filp, info[i].bo_handle);
if (!gobj)
if (!gobj) {
r = -ENOENT;
goto error_free;
}
entry->robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
drm_gem_object_unreference_unlocked(gobj);
entry->priority = info[i].bo_priority;
entry->prefered_domains = entry->robj->initial_domain;
entry->allowed_domains = entry->prefered_domains;
if (entry->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
entry->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
if (amdgpu_ttm_tt_has_userptr(entry->robj->tbo.ttm)) {
entry->priority = min(info[i].bo_priority,
AMDGPU_BO_LIST_MAX_PRIORITY);
usermm = amdgpu_ttm_tt_get_usermm(entry->robj->tbo.ttm);
if (usermm) {
if (usermm != current->mm) {
r = -EPERM;
goto error_free;
}
has_userptr = true;
entry->prefered_domains = AMDGPU_GEM_DOMAIN_GTT;
entry->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
}
entry->tv.bo = &entry->robj->tbo;
entry->tv.shared = true;
if (entry->prefered_domains == AMDGPU_GEM_DOMAIN_GDS)
if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_GDS)
gds_obj = entry->robj;
if (entry->prefered_domains == AMDGPU_GEM_DOMAIN_GWS)
if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_GWS)
gws_obj = entry->robj;
if (entry->prefered_domains == AMDGPU_GEM_DOMAIN_OA)
if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_OA)
oa_obj = entry->robj;
trace_amdgpu_bo_list_set(list, entry->robj);
......@@ -145,7 +152,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
error_free:
drm_free_large(array);
return -ENOENT;
return r;
}
struct amdgpu_bo_list *
......@@ -161,6 +168,36 @@ amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id)
return result;
}
void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
struct list_head *validated)
{
/* This is based on the bucket sort with O(n) time complexity.
* An item with priority "i" is added to bucket[i]. The lists are then
* concatenated in descending order.
*/
struct list_head bucket[AMDGPU_BO_LIST_NUM_BUCKETS];
unsigned i;
for (i = 0; i < AMDGPU_BO_LIST_NUM_BUCKETS; i++)
INIT_LIST_HEAD(&bucket[i]);
/* Since buffers which appear sooner in the relocation list are
* likely to be used more often than buffers which appear later
* in the list, the sort mustn't change the ordering of buffers
* with the same priority, i.e. it must be stable.
*/
for (i = 0; i < list->num_entries; i++) {
unsigned priority = list->array[i].priority;
list_add_tail(&list->array[i].tv.head,
&bucket[priority]);
}
/* Connect the sorted buckets in the output list. */
for (i = 0; i < AMDGPU_BO_LIST_NUM_BUCKETS; i++)
list_splice(&bucket[i], validated);
}
void amdgpu_bo_list_put(struct amdgpu_bo_list *list)
{
mutex_unlock(&list->lock);
......
......@@ -25,8 +25,7 @@
#include <drm/drmP.h>
#include "amdgpu.h"
int amdgpu_ctx_init(struct amdgpu_device *adev, enum amd_sched_priority pri,
struct amdgpu_ctx *ctx)
static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
{
unsigned i, j;
int r;
......@@ -35,44 +34,38 @@ int amdgpu_ctx_init(struct amdgpu_device *adev, enum amd_sched_priority pri,
ctx->adev = adev;
kref_init(&ctx->refcount);
spin_lock_init(&ctx->ring_lock);
ctx->fences = kzalloc(sizeof(struct fence *) * amdgpu_sched_jobs *
AMDGPU_MAX_RINGS, GFP_KERNEL);
ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS,
sizeof(struct fence*), GFP_KERNEL);
if (!ctx->fences)
return -ENOMEM;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
ctx->rings[i].sequence = 1;
ctx->rings[i].fences = (void *)ctx->fences + sizeof(struct fence *) *
amdgpu_sched_jobs * i;
ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
}
if (amdgpu_enable_scheduler) {
/* create context entity for each ring */
for (i = 0; i < adev->num_rings; i++) {
struct amd_sched_rq *rq;
if (pri >= AMD_SCHED_MAX_PRIORITY) {
kfree(ctx->fences);
return -EINVAL;
}
rq = &adev->rings[i]->sched.sched_rq[pri];
r = amd_sched_entity_init(&adev->rings[i]->sched,
&ctx->rings[i].entity,
rq, amdgpu_sched_jobs);
if (r)
break;
}
if (i < adev->num_rings) {
for (j = 0; j < i; j++)
amd_sched_entity_fini(&adev->rings[j]->sched,
&ctx->rings[j].entity);
kfree(ctx->fences);
return r;
}
/* create context entity for each ring */
for (i = 0; i < adev->num_rings; i++) {
struct amdgpu_ring *ring = adev->rings[i];
struct amd_sched_rq *rq;
rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
r = amd_sched_entity_init(&ring->sched, &ctx->rings[i].entity,
rq, amdgpu_sched_jobs);
if (r)
break;
}
if (i < adev->num_rings) {
for (j = 0; j < i; j++)
amd_sched_entity_fini(&adev->rings[j]->sched,
&ctx->rings[j].entity);
kfree(ctx->fences);
return r;
}
return 0;
}
void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
{
struct amdgpu_device *adev = ctx->adev;
unsigned i, j;
......@@ -85,11 +78,9 @@ void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
fence_put(ctx->rings[i].fences[j]);
kfree(ctx->fences);
if (amdgpu_enable_scheduler) {
for (i = 0; i < adev->num_rings; i++)
amd_sched_entity_fini(&adev->rings[i]->sched,
&ctx->rings[i].entity);
}
for (i = 0; i < adev->num_rings; i++)
amd_sched_entity_fini(&adev->rings[i]->sched,
&ctx->rings[i].entity);
}
static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
......@@ -112,7 +103,7 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
return r;
}
*id = (uint32_t)r;
r = amdgpu_ctx_init(adev, AMD_SCHED_PRIORITY_NORMAL, ctx);
r = amdgpu_ctx_init(adev, ctx);
if (r) {
idr_remove(&mgr->ctx_handles, *id);
*id = 0;
......@@ -200,18 +191,18 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
id = args->in.ctx_id;
switch (args->in.op) {
case AMDGPU_CTX_OP_ALLOC_CTX:
r = amdgpu_ctx_alloc(adev, fpriv, &id);
args->out.alloc.ctx_id = id;
break;
case AMDGPU_CTX_OP_FREE_CTX:
r = amdgpu_ctx_free(fpriv, id);
break;
case AMDGPU_CTX_OP_QUERY_STATE:
r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
break;
default:
return -EINVAL;
case AMDGPU_CTX_OP_ALLOC_CTX:
r = amdgpu_ctx_alloc(adev, fpriv, &id);
args->out.alloc.ctx_id = id;
break;
case AMDGPU_CTX_OP_FREE_CTX:
r = amdgpu_ctx_free(fpriv, id);
break;
case AMDGPU_CTX_OP_QUERY_STATE:
r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
break;
default:
return -EINVAL;
}
return r;
......
......@@ -635,31 +635,6 @@ bool amdgpu_card_posted(struct amdgpu_device *adev)
}
/**
* amdgpu_boot_test_post_card - check and possibly initialize the hw
*
* @adev: amdgpu_device pointer
*
* Check if the asic is initialized and if not, attempt to initialize
* it (all asics).
* Returns true if initialized or false if not.
*/
bool amdgpu_boot_test_post_card(struct amdgpu_device *adev)
{
if (amdgpu_card_posted(adev))
return true;
if (adev->bios) {
DRM_INFO("GPU not posted. posting now...\n");
if (adev->is_atom_bios)
amdgpu_atom_asic_init(adev->mode_info.atom_context);
return true;
} else {
dev_err(adev->dev, "Card not posted and no BIOS - ignoring\n");
return false;
}
}
/**
* amdgpu_dummy_page_init - init dummy page used by the driver
*
......@@ -959,12 +934,6 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev)
amdgpu_sched_jobs);
amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
}
/* vramlimit must be a power of two */
if (!amdgpu_check_pot_argument(amdgpu_vram_limit)) {
dev_warn(adev->dev, "vram limit (%d) must be a power of 2\n",
amdgpu_vram_limit);
amdgpu_vram_limit = 0;
}
if (amdgpu_gart_size != -1) {
/* gtt size must be power of two and greater or equal to 32M */
......@@ -1434,7 +1403,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->mman.buffer_funcs = NULL;
adev->mman.buffer_funcs_ring = NULL;
adev->vm_manager.vm_pte_funcs = NULL;
adev->vm_manager.vm_pte_funcs_ring = NULL;
adev->vm_manager.vm_pte_num_rings = 0;
adev->gart.gart_funcs = NULL;
adev->fence_context = fence_context_alloc(AMDGPU_MAX_RINGS);
......@@ -1455,9 +1424,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
/* mutex initialization are all done here so we
* can recall function without having locking issues */
mutex_init(&adev->ring_lock);
mutex_init(&adev->vm_manager.lock);
atomic_set(&adev->irq.ih.lock, 0);
mutex_init(&adev->gem.mutex);
mutex_init(&adev->pm.mutex);
mutex_init(&adev->gfx.gpu_clock_mutex);
mutex_init(&adev->srbm_mutex);
......@@ -1531,8 +1499,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
return r;
}
/* See if the asic supports SR-IOV */
adev->virtualization.supports_sr_iov =
amdgpu_atombios_has_gpu_virtualization_table(adev);
/* Post card if necessary */
if (!amdgpu_card_posted(adev)) {
if (!amdgpu_card_posted(adev) ||
adev->virtualization.supports_sr_iov) {
if (!adev->bios) {
dev_err(adev->dev, "Card not posted and no BIOS - ignoring\n");
return -EINVAL;
......@@ -1577,11 +1550,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
return r;
}
r = amdgpu_ctx_init(adev, AMD_SCHED_PRIORITY_KERNEL, &adev->kernel_ctx);
if (r) {
dev_err(adev->dev, "failed to create kernel context (%d).\n", r);
return r;
}
r = amdgpu_ib_ring_tests(adev);
if (r)
DRM_ERROR("ib ring test failed (%d).\n", r);
......@@ -1645,7 +1613,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
adev->shutdown = true;
/* evict vram memory */
amdgpu_bo_evict_vram(adev);
amdgpu_ctx_fini(&adev->kernel_ctx);
amdgpu_ib_pool_fini(adev);
amdgpu_fence_driver_fini(adev);
amdgpu_fbdev_fini(adev);
......@@ -1889,6 +1856,9 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
retry:
r = amdgpu_asic_reset(adev);
/* post card */
amdgpu_atom_asic_init(adev->mode_info.atom_context);
if (!r) {
dev_info(adev->dev, "GPU reset succeeded, trying to resume\n");
r = amdgpu_resume(adev);
......
......@@ -35,32 +35,30 @@
#include <drm/drm_crtc_helper.h>
#include <drm/drm_edid.h>
static void amdgpu_flip_wait_fence(struct amdgpu_device *adev,
struct fence **f)
static void amdgpu_flip_callback(struct fence *f, struct fence_cb *cb)
{
struct amdgpu_fence *fence;
long r;
struct amdgpu_flip_work *work =
container_of(cb, struct amdgpu_flip_work, cb);
if (*f == NULL)
return;
fence_put(f);
schedule_work(&work->flip_work);
}
fence = to_amdgpu_fence(*f);
if (fence) {
r = fence_wait(&fence->base, false);
if (r == -EDEADLK)
r = amdgpu_gpu_reset(adev);
} else
r = fence_wait(*f, false);
static bool amdgpu_flip_handle_fence(struct amdgpu_flip_work *work,
struct fence **f)
{
struct fence *fence= *f;
if (r)
DRM_ERROR("failed to wait on page flip fence (%ld)!\n", r);
if (fence == NULL)
return false;
/* We continue with the page flip even if we failed to wait on
* the fence, otherwise the DRM core and userspace will be
* confused about which BO the CRTC is scanning out
*/
fence_put(*f);
*f = NULL;
if (!fence_add_callback(fence, &work->cb, amdgpu_flip_callback))
return true;
fence_put(*f);
return false;
}
static void amdgpu_flip_work_func(struct work_struct *__work)
......@@ -76,9 +74,12 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
int vpos, hpos, stat, min_udelay;
struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id];
amdgpu_flip_wait_fence(adev, &work->excl);
if (amdgpu_flip_handle_fence(work, &work->excl))
return;
for (i = 0; i < work->shared_count; ++i)
amdgpu_flip_wait_fence(adev, &work->shared[i]);
if (amdgpu_flip_handle_fence(work, &work->shared[i]))
return;
/* We borrow the event spin lock for protecting flip_status */
spin_lock_irqsave(&crtc->dev->event_lock, flags);
......@@ -118,12 +119,12 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
spin_lock_irqsave(&crtc->dev->event_lock, flags);
};
/* do the flip (mmio) */
adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base);
/* set the flip status */
amdgpuCrtc->pflip_status = AMDGPU_FLIP_SUBMITTED;
spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
/* Do the flip (mmio) */
adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base);
}
/*
......@@ -242,7 +243,7 @@ int amdgpu_crtc_page_flip(struct drm_crtc *crtc,
/* update crtc fb */
crtc->primary->fb = fb;
spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
queue_work(amdgpu_crtc->pflip_queue, &work->flip_work);
amdgpu_flip_work_func(&work->flip_work);
return 0;
vblank_cleanup:
......
......@@ -69,7 +69,6 @@ int amdgpu_dpm = -1;
int amdgpu_smc_load_fw = 1;
int amdgpu_aspm = -1;
int amdgpu_runtime_pm = -1;
int amdgpu_hard_reset = 0;
unsigned amdgpu_ip_block_mask = 0xffffffff;
int amdgpu_bapm = -1;
int amdgpu_deep_color = 0;
......@@ -78,10 +77,8 @@ int amdgpu_vm_block_size = -1;
int amdgpu_vm_fault_stop = 0;
int amdgpu_vm_debug = 0;
int amdgpu_exp_hw_support = 0;
int amdgpu_enable_scheduler = 1;
int amdgpu_sched_jobs = 32;
int amdgpu_sched_hw_submission = 2;
int amdgpu_enable_semaphores = 0;
int amdgpu_powerplay = -1;
MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
......@@ -126,9 +123,6 @@ module_param_named(aspm, amdgpu_aspm, int, 0444);
MODULE_PARM_DESC(runpm, "PX runtime pm (1 = force enable, 0 = disable, -1 = PX only default)");
module_param_named(runpm, amdgpu_runtime_pm, int, 0444);
MODULE_PARM_DESC(hard_reset, "PCI config reset (1 = force enable, 0 = disable (default))");
module_param_named(hard_reset, amdgpu_hard_reset, int, 0444);
MODULE_PARM_DESC(ip_block_mask, "IP Block Mask (all blocks enabled (default))");
module_param_named(ip_block_mask, amdgpu_ip_block_mask, uint, 0444);
......@@ -153,18 +147,12 @@ module_param_named(vm_debug, amdgpu_vm_debug, int, 0644);
MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))");
module_param_named(exp_hw_support, amdgpu_exp_hw_support, int, 0444);
MODULE_PARM_DESC(enable_scheduler, "enable SW GPU scheduler (1 = enable (default), 0 = disable)");
module_param_named(enable_scheduler, amdgpu_enable_scheduler, int, 0444);
MODULE_PARM_DESC(sched_jobs, "the max number of jobs supported in the sw queue (default 32)");
module_param_named(sched_jobs, amdgpu_sched_jobs, int, 0444);
MODULE_PARM_DESC(sched_hw_submission, "the max number of HW submissions (default 2)");
module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444);
MODULE_PARM_DESC(enable_semaphores, "Enable semaphores (1 = enable, 0 = disable (default))");
module_param_named(enable_semaphores, amdgpu_enable_semaphores, int, 0644);
#ifdef CONFIG_DRM_AMD_POWERPLAY
MODULE_PARM_DESC(powerplay, "Powerplay component (1 = enable, 0 = disable, -1 = auto (default))");
module_param_named(powerplay, amdgpu_powerplay, int, 0444);
......
......@@ -107,7 +107,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner,
if ((*fence) == NULL) {
return -ENOMEM;
}
(*fence)->seq = ++ring->fence_drv.sync_seq[ring->idx];
(*fence)->seq = ++ring->fence_drv.sync_seq;
(*fence)->ring = ring;
(*fence)->owner = owner;
fence_init(&(*fence)->base, &amdgpu_fence_ops,
......@@ -171,7 +171,7 @@ static bool amdgpu_fence_activity(struct amdgpu_ring *ring)
*/
last_seq = atomic64_read(&ring->fence_drv.last_seq);
do {
last_emitted = ring->fence_drv.sync_seq[ring->idx];
last_emitted = ring->fence_drv.sync_seq;
seq = amdgpu_fence_read(ring);
seq |= last_seq & 0xffffffff00000000LL;
if (seq < last_seq) {
......@@ -260,34 +260,28 @@ static bool amdgpu_fence_seq_signaled(struct amdgpu_ring *ring, u64 seq)
}
/*
* amdgpu_ring_wait_seq_timeout - wait for seq of the specific ring to signal
* amdgpu_ring_wait_seq - wait for seq of the specific ring to signal
* @ring: ring to wait on for the seq number
* @seq: seq number wait for
*
* return value:
* 0: seq signaled, and gpu not hang
* -EDEADL: GPU hang detected
* -EINVAL: some paramter is not valid
*/
static int amdgpu_fence_ring_wait_seq(struct amdgpu_ring *ring, uint64_t seq)
{
bool signaled = false;
BUG_ON(!ring);
if (seq > ring->fence_drv.sync_seq[ring->idx])
if (seq > ring->fence_drv.sync_seq)
return -EINVAL;
if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
return 0;
amdgpu_fence_schedule_fallback(ring);
wait_event(ring->fence_drv.fence_queue, (
(signaled = amdgpu_fence_seq_signaled(ring, seq))));
wait_event(ring->fence_drv.fence_queue,
amdgpu_fence_seq_signaled(ring, seq));
if (signaled)
return 0;
else
return -EDEADLK;
return 0;
}
/**
......@@ -304,7 +298,7 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring)
{
uint64_t seq = atomic64_read(&ring->fence_drv.last_seq) + 1ULL;
if (seq >= ring->fence_drv.sync_seq[ring->idx])
if (seq >= ring->fence_drv.sync_seq)
return -ENOENT;
return amdgpu_fence_ring_wait_seq(ring, seq);
......@@ -322,7 +316,7 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring)
*/
int amdgpu_fence_wait_empty(struct amdgpu_ring *ring)
{
uint64_t seq = ring->fence_drv.sync_seq[ring->idx];
uint64_t seq = ring->fence_drv.sync_seq;
if (!seq)
return 0;
......@@ -347,7 +341,7 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
* but it's ok to report slightly wrong fence count here.
*/
amdgpu_fence_process(ring);
emitted = ring->fence_drv.sync_seq[ring->idx]
emitted = ring->fence_drv.sync_seq
- atomic64_read(&ring->fence_drv.last_seq);
/* to avoid 32bits warp around */
if (emitted > 0x10000000)
......@@ -356,68 +350,6 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
return (unsigned)emitted;
}
/**
* amdgpu_fence_need_sync - do we need a semaphore
*
* @fence: amdgpu fence object
* @dst_ring: which ring to check against
*
* Check if the fence needs to be synced against another ring
* (all asics). If so, we need to emit a semaphore.
* Returns true if we need to sync with another ring, false if
* not.
*/
bool amdgpu_fence_need_sync(struct amdgpu_fence *fence,
struct amdgpu_ring *dst_ring)
{
struct amdgpu_fence_driver *fdrv;
if (!fence)
return false;
if (fence->ring == dst_ring)
return false;
/* we are protected by the ring mutex */
fdrv = &dst_ring->fence_drv;
if (fence->seq <= fdrv->sync_seq[fence->ring->idx])
return false;
return true;
}
/**
* amdgpu_fence_note_sync - record the sync point
*
* @fence: amdgpu fence object
* @dst_ring: which ring to check against
*
* Note the sequence number at which point the fence will
* be synced with the requested ring (all asics).
*/
void amdgpu_fence_note_sync(struct amdgpu_fence *fence,
struct amdgpu_ring *dst_ring)
{
struct amdgpu_fence_driver *dst, *src;
unsigned i;
if (!fence)
return;
if (fence->ring == dst_ring)
return;
/* we are protected by the ring mutex */
src = &fence->ring->fence_drv;
dst = &dst_ring->fence_drv;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
if (i == dst_ring->idx)
continue;
dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]);
}
}
/**
* amdgpu_fence_driver_start_ring - make the fence driver
* ready for use on the requested ring.
......@@ -471,13 +403,12 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
*/
int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
{
int i, r;
long timeout;
int r;
ring->fence_drv.cpu_addr = NULL;
ring->fence_drv.gpu_addr = 0;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
ring->fence_drv.sync_seq[i] = 0;
ring->fence_drv.sync_seq = 0;
atomic64_set(&ring->fence_drv.last_seq, 0);
ring->fence_drv.initialized = false;
......@@ -486,26 +417,24 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
init_waitqueue_head(&ring->fence_drv.fence_queue);
if (amdgpu_enable_scheduler) {
long timeout = msecs_to_jiffies(amdgpu_lockup_timeout);
if (timeout == 0) {
/*
* FIXME:
* Delayed workqueue cannot use it directly,
* so the scheduler will not use delayed workqueue if
* MAX_SCHEDULE_TIMEOUT is set.
* Currently keep it simple and silly.
*/
timeout = MAX_SCHEDULE_TIMEOUT;
}
r = amd_sched_init(&ring->sched, &amdgpu_sched_ops,
amdgpu_sched_hw_submission,
timeout, ring->name);
if (r) {
DRM_ERROR("Failed to create scheduler on ring %s.\n",
ring->name);
return r;
}
timeout = msecs_to_jiffies(amdgpu_lockup_timeout);
if (timeout == 0) {
/*
* FIXME:
* Delayed workqueue cannot use it directly,
* so the scheduler will not use delayed workqueue if
* MAX_SCHEDULE_TIMEOUT is set.
* Currently keep it simple and silly.
*/
timeout = MAX_SCHEDULE_TIMEOUT;
}
r = amd_sched_init(&ring->sched, &amdgpu_sched_ops,
amdgpu_sched_hw_submission,
timeout, ring->name);
if (r) {
DRM_ERROR("Failed to create scheduler on ring %s.\n",
ring->name);
return r;
}
return 0;
......@@ -552,7 +481,6 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
if (atomic_dec_and_test(&amdgpu_fence_slab_ref))
kmem_cache_destroy(amdgpu_fence_slab);
mutex_lock(&adev->ring_lock);
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
......@@ -570,7 +498,6 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
del_timer_sync(&ring->fence_drv.fallback_timer);
ring->fence_drv.initialized = false;
}
mutex_unlock(&adev->ring_lock);
}
/**
......@@ -585,7 +512,6 @@ void amdgpu_fence_driver_suspend(struct amdgpu_device *adev)
{
int i, r;
mutex_lock(&adev->ring_lock);
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
if (!ring || !ring->fence_drv.initialized)
......@@ -602,7 +528,6 @@ void amdgpu_fence_driver_suspend(struct amdgpu_device *adev)
amdgpu_irq_put(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type);
}
mutex_unlock(&adev->ring_lock);
}
/**
......@@ -621,7 +546,6 @@ void amdgpu_fence_driver_resume(struct amdgpu_device *adev)
{
int i;
mutex_lock(&adev->ring_lock);
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
if (!ring || !ring->fence_drv.initialized)
......@@ -631,7 +555,6 @@ void amdgpu_fence_driver_resume(struct amdgpu_device *adev)
amdgpu_irq_get(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type);
}
mutex_unlock(&adev->ring_lock);
}
/**
......@@ -651,7 +574,7 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev)
if (!ring || !ring->fence_drv.initialized)
continue;
amdgpu_fence_write(ring, ring->fence_drv.sync_seq[i]);
amdgpu_fence_write(ring, ring->fence_drv.sync_seq);
}
}
......@@ -781,7 +704,7 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
struct drm_info_node *node = (struct drm_info_node *)m->private;
struct drm_device *dev = node->minor->dev;
struct amdgpu_device *adev = dev->dev_private;
int i, j;
int i;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
......@@ -794,28 +717,38 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
seq_printf(m, "Last signaled fence 0x%016llx\n",
(unsigned long long)atomic64_read(&ring->fence_drv.last_seq));
seq_printf(m, "Last emitted 0x%016llx\n",
ring->fence_drv.sync_seq[i]);
for (j = 0; j < AMDGPU_MAX_RINGS; ++j) {
struct amdgpu_ring *other = adev->rings[j];
if (i != j && other && other->fence_drv.initialized &&
ring->fence_drv.sync_seq[j])
seq_printf(m, "Last sync to ring %d 0x%016llx\n",
j, ring->fence_drv.sync_seq[j]);
}
ring->fence_drv.sync_seq);
}
return 0;
}
/**
* amdgpu_debugfs_gpu_reset - manually trigger a gpu reset
*
* Manually trigger a gpu reset at the next fence wait.
*/
static int amdgpu_debugfs_gpu_reset(struct seq_file *m, void *data)
{
struct drm_info_node *node = (struct drm_info_node *) m->private;
struct drm_device *dev = node->minor->dev;
struct amdgpu_device *adev = dev->dev_private;
seq_printf(m, "gpu reset\n");
amdgpu_gpu_reset(adev);
return 0;
}
static struct drm_info_list amdgpu_debugfs_fence_list[] = {
{"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL},
{"amdgpu_gpu_reset", &amdgpu_debugfs_gpu_reset, 0, NULL}
};
#endif
int amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
{
#if defined(CONFIG_DEBUG_FS)
return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 1);
return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 2);
#else
return 0;
#endif
......
......@@ -83,24 +83,32 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
return r;
}
*obj = &robj->gem_base;
robj->pid = task_pid_nr(current);
mutex_lock(&adev->gem.mutex);
list_add_tail(&robj->list, &adev->gem.objects);
mutex_unlock(&adev->gem.mutex);
return 0;
}
int amdgpu_gem_init(struct amdgpu_device *adev)
void amdgpu_gem_force_release(struct amdgpu_device *adev)
{
INIT_LIST_HEAD(&adev->gem.objects);
return 0;
}
struct drm_device *ddev = adev->ddev;
struct drm_file *file;
void amdgpu_gem_fini(struct amdgpu_device *adev)
{
amdgpu_bo_force_delete(adev);
mutex_lock(&ddev->struct_mutex);
list_for_each_entry(file, &ddev->filelist, lhead) {
struct drm_gem_object *gobj;
int handle;
WARN_ONCE(1, "Still active user space clients!\n");
spin_lock(&file->table_lock);
idr_for_each_entry(&file->object_idr, gobj, handle) {
WARN_ONCE(1, "And also active allocations!\n");
drm_gem_object_unreference(gobj);
}
idr_destroy(&file->object_idr);
spin_unlock(&file->table_lock);
}
mutex_unlock(&ddev->struct_mutex);
}
/*
......@@ -252,6 +260,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
goto handle_lockup;
bo = gem_to_amdgpu_bo(gobj);
bo->prefered_domains = AMDGPU_GEM_DOMAIN_GTT;
bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
r = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, args->addr, args->flags);
if (r)
goto release_object;
......@@ -308,7 +318,7 @@ int amdgpu_mode_dumb_mmap(struct drm_file *filp,
return -ENOENT;
}
robj = gem_to_amdgpu_bo(gobj);
if (amdgpu_ttm_tt_has_userptr(robj->tbo.ttm) ||
if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm) ||
(robj->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) {
drm_gem_object_unreference_unlocked(gobj);
return -EPERM;
......@@ -628,7 +638,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
info.bo_size = robj->gem_base.size;
info.alignment = robj->tbo.mem.page_alignment << PAGE_SHIFT;
info.domains = robj->initial_domain;
info.domains = robj->prefered_domains;
info.domain_flags = robj->flags;
amdgpu_bo_unreserve(robj);
if (copy_to_user(out, &info, sizeof(info)))
......@@ -636,14 +646,18 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
break;
}
case AMDGPU_GEM_OP_SET_PLACEMENT:
if (amdgpu_ttm_tt_has_userptr(robj->tbo.ttm)) {
if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm)) {
r = -EPERM;
amdgpu_bo_unreserve(robj);
break;
}
robj->initial_domain = args->value & (AMDGPU_GEM_DOMAIN_VRAM |
AMDGPU_GEM_DOMAIN_GTT |
AMDGPU_GEM_DOMAIN_CPU);
robj->prefered_domains = args->value & (AMDGPU_GEM_DOMAIN_VRAM |
AMDGPU_GEM_DOMAIN_GTT |
AMDGPU_GEM_DOMAIN_CPU);
robj->allowed_domains = robj->prefered_domains;
if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
amdgpu_bo_unreserve(robj);
break;
default:
......@@ -688,38 +702,73 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
}
#if defined(CONFIG_DEBUG_FS)
static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data)
{
struct drm_gem_object *gobj = ptr;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
struct seq_file *m = data;
unsigned domain;
const char *placement;
unsigned pin_count;
domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
switch (domain) {
case AMDGPU_GEM_DOMAIN_VRAM:
placement = "VRAM";
break;
case AMDGPU_GEM_DOMAIN_GTT:
placement = " GTT";
break;
case AMDGPU_GEM_DOMAIN_CPU:
default:
placement = " CPU";
break;
}
seq_printf(m, "\t0x%08x: %12ld byte %s @ 0x%010Lx",
id, amdgpu_bo_size(bo), placement,
amdgpu_bo_gpu_offset(bo));
pin_count = ACCESS_ONCE(bo->pin_count);
if (pin_count)
seq_printf(m, " pin count %d", pin_count);
seq_printf(m, "\n");
return 0;
}
static int amdgpu_debugfs_gem_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = (struct drm_info_node *)m->private;
struct drm_device *dev = node->minor->dev;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_bo *rbo;
unsigned i = 0;
struct drm_file *file;
int r;
mutex_lock(&adev->gem.mutex);
list_for_each_entry(rbo, &adev->gem.objects, list) {
unsigned domain;
const char *placement;
r = mutex_lock_interruptible(&dev->struct_mutex);
if (r)
return r;
domain = amdgpu_mem_type_to_domain(rbo->tbo.mem.mem_type);
switch (domain) {
case AMDGPU_GEM_DOMAIN_VRAM:
placement = "VRAM";
break;
case AMDGPU_GEM_DOMAIN_GTT:
placement = " GTT";
break;
case AMDGPU_GEM_DOMAIN_CPU:
default:
placement = " CPU";
break;
}
seq_printf(m, "bo[0x%08x] %8ldkB %8ldMB %s pid %8ld\n",
i, amdgpu_bo_size(rbo) >> 10, amdgpu_bo_size(rbo) >> 20,
placement, (unsigned long)rbo->pid);
i++;
list_for_each_entry(file, &dev->filelist, lhead) {
struct task_struct *task;
/*
* Although we have a valid reference on file->pid, that does
* not guarantee that the task_struct who called get_pid() is
* still alive (e.g. get_pid(current) => fork() => exit()).
* Therefore, we need to protect this ->comm access using RCU.
*/
rcu_read_lock();
task = pid_task(file->pid, PIDTYPE_PID);
seq_printf(m, "pid %8d command %s:\n", pid_nr(file->pid),
task ? task->comm : "<unknown>");
rcu_read_unlock();
spin_lock(&file->table_lock);
idr_for_each(&file->object_idr, amdgpu_debugfs_gem_bo_info, m);
spin_unlock(&file->table_lock);
}
mutex_unlock(&adev->gem.mutex);
mutex_unlock(&dev->struct_mutex);
return 0;
}
......
......@@ -55,10 +55,9 @@ static int amdgpu_debugfs_sa_init(struct amdgpu_device *adev);
* suballocator.
* Returns 0 on success, error on failure.
*/
int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm,
int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned size, struct amdgpu_ib *ib)
{
struct amdgpu_device *adev = ring->adev;
int r;
if (size) {
......@@ -75,9 +74,6 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm,
ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
}
amdgpu_sync_create(&ib->sync);
ib->ring = ring;
ib->vm = vm;
return 0;
......@@ -93,7 +89,6 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm,
*/
void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib)
{
amdgpu_sync_free(adev, &ib->sync, &ib->fence->base);
amdgpu_sa_bo_free(adev, &ib->sa_bo, &ib->fence->base);
if (ib->fence)
fence_put(&ib->fence->base);
......@@ -106,6 +101,7 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib)
* @num_ibs: number of IBs to schedule
* @ibs: IB objects to schedule
* @owner: owner for creating the fences
* @f: fence created during this submission
*
* Schedule an IB on the associated ring (all asics).
* Returns 0 on success, error on failure.
......@@ -120,11 +116,13 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib)
* a CONST_IB), it will be put on the ring prior to the DE IB. Prior
* to SI there was just a DE IB.
*/
int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
struct amdgpu_ib *ibs, void *owner)
int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
struct amdgpu_ib *ibs, void *owner,
struct fence *last_vm_update,
struct fence **f)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_ib *ib = &ibs[0];
struct amdgpu_ring *ring;
struct amdgpu_ctx *ctx, *old_ctx;
struct amdgpu_vm *vm;
unsigned i;
......@@ -133,7 +131,6 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
if (num_ibs == 0)
return -EINVAL;
ring = ibs->ring;
ctx = ibs->ctx;
vm = ibs->vm;
......@@ -141,36 +138,21 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
dev_err(adev->dev, "couldn't schedule ib\n");
return -EINVAL;
}
r = amdgpu_sync_wait(&ibs->sync);
if (r) {
dev_err(adev->dev, "IB sync failed (%d).\n", r);
return r;
}
r = amdgpu_ring_lock(ring, (256 + AMDGPU_NUM_SYNCS * 8) * num_ibs);
if (r) {
dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
return r;
}
if (vm) {
/* grab a vm id if necessary */
r = amdgpu_vm_grab_id(ibs->vm, ibs->ring, &ibs->sync);
if (r) {
amdgpu_ring_unlock_undo(ring);
return r;
}
if (vm && !ibs->grabbed_vmid) {
dev_err(adev->dev, "VM IB without ID\n");
return -EINVAL;
}
r = amdgpu_sync_rings(&ibs->sync, ring);
r = amdgpu_ring_alloc(ring, 256 * num_ibs);
if (r) {
amdgpu_ring_unlock_undo(ring);
dev_err(adev->dev, "failed to sync rings (%d)\n", r);
dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
return r;
}
if (vm) {
/* do context switch */
amdgpu_vm_flush(ring, vm, ib->sync.last_vm_update);
amdgpu_vm_flush(ring, vm, last_vm_update);
if (ring->funcs->emit_gds_switch)
amdgpu_ring_emit_gds_switch(ring, ib->vm->ids[ring->idx].id,
......@@ -186,9 +168,9 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
for (i = 0; i < num_ibs; ++i) {
ib = &ibs[i];
if (ib->ring != ring || ib->ctx != ctx || ib->vm != vm) {
if (ib->ctx != ctx || ib->vm != vm) {
ring->current_ctx = old_ctx;
amdgpu_ring_unlock_undo(ring);
amdgpu_ring_undo(ring);
return -EINVAL;
}
amdgpu_ring_emit_ib(ring, ib);
......@@ -199,14 +181,10 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
if (r) {
dev_err(adev->dev, "failed to emit fence (%d)\n", r);
ring->current_ctx = old_ctx;
amdgpu_ring_unlock_undo(ring);
amdgpu_ring_undo(ring);
return r;
}
if (!amdgpu_enable_scheduler && ib->ctx)
ib->sequence = amdgpu_ctx_add_fence(ib->ctx, ring,
&ib->fence->base);
/* wrap the last IB with fence */
if (ib->user) {
uint64_t addr = amdgpu_bo_gpu_offset(ib->user->bo);
......@@ -215,10 +193,10 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
AMDGPU_FENCE_FLAG_64BIT);
}
if (ib->vm)
amdgpu_vm_fence(adev, ib->vm, &ib->fence->base);
if (f)
*f = fence_get(&ib->fence->base);
amdgpu_ring_unlock_commit(ring);
amdgpu_ring_commit(ring);
return 0;
}
......
......@@ -28,15 +28,103 @@
#include "amdgpu.h"
#include "amdgpu_trace.h"
static struct fence *amdgpu_sched_dependency(struct amd_sched_job *sched_job)
int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
struct amdgpu_job **job)
{
size_t size = sizeof(struct amdgpu_job);
if (num_ibs == 0)
return -EINVAL;
size += sizeof(struct amdgpu_ib) * num_ibs;
*job = kzalloc(size, GFP_KERNEL);
if (!*job)
return -ENOMEM;
(*job)->adev = adev;
(*job)->ibs = (void *)&(*job)[1];
(*job)->num_ibs = num_ibs;
amdgpu_sync_create(&(*job)->sync);
return 0;
}
int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
struct amdgpu_job **job)
{
int r;
r = amdgpu_job_alloc(adev, 1, job);
if (r)
return r;
r = amdgpu_ib_get(adev, NULL, size, &(*job)->ibs[0]);
if (r)
kfree(*job);
return r;
}
void amdgpu_job_free(struct amdgpu_job *job)
{
unsigned i;
for (i = 0; i < job->num_ibs; ++i)
amdgpu_ib_free(job->adev, &job->ibs[i]);
amdgpu_bo_unref(&job->uf.bo);
amdgpu_sync_free(&job->sync);
kfree(job);
}
int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
struct amd_sched_entity *entity, void *owner,
struct fence **f)
{
job->ring = ring;
job->base.sched = &ring->sched;
job->base.s_entity = entity;
job->base.s_fence = amd_sched_fence_create(job->base.s_entity, owner);
if (!job->base.s_fence)
return -ENOMEM;
*f = fence_get(&job->base.s_fence->base);
job->owner = owner;
amd_sched_entity_push_job(&job->base);
return 0;
}
static struct fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
{
struct amdgpu_job *job = to_amdgpu_job(sched_job);
return amdgpu_sync_get_fence(&job->ibs->sync);
struct amdgpu_vm *vm = job->ibs->vm;
struct fence *fence = amdgpu_sync_get_fence(&job->sync);
if (fence == NULL && vm && !job->ibs->grabbed_vmid) {
struct amdgpu_ring *ring = job->ring;
int r;
r = amdgpu_vm_grab_id(vm, ring, &job->sync,
&job->base.s_fence->base);
if (r)
DRM_ERROR("Error getting VM ID (%d)\n", r);
else
job->ibs->grabbed_vmid = true;
fence = amdgpu_sync_get_fence(&job->sync);
}
return fence;
}
static struct fence *amdgpu_sched_run_job(struct amd_sched_job *sched_job)
static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job)
{
struct amdgpu_fence *fence = NULL;
struct fence *fence = NULL;
struct amdgpu_job *job;
int r;
......@@ -45,64 +133,27 @@ static struct fence *amdgpu_sched_run_job(struct amd_sched_job *sched_job)
return NULL;
}
job = to_amdgpu_job(sched_job);
r = amdgpu_sync_wait(&job->sync);
if (r) {
DRM_ERROR("failed to sync wait (%d)\n", r);
return NULL;
}
trace_amdgpu_sched_run_job(job);
r = amdgpu_ib_schedule(job->adev, job->num_ibs, job->ibs, job->owner);
r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job->owner,
job->sync.last_vm_update, &fence);
if (r) {
DRM_ERROR("Error scheduling IBs (%d)\n", r);
goto err;
}
fence = job->ibs[job->num_ibs - 1].fence;
fence_get(&fence->base);
err:
if (job->free_job)
job->free_job(job);
kfree(job);
return fence ? &fence->base : NULL;
amdgpu_job_free(job);
return fence;
}
struct amd_sched_backend_ops amdgpu_sched_ops = {
.dependency = amdgpu_sched_dependency,
.run_job = amdgpu_sched_run_job,
.dependency = amdgpu_job_dependency,
.run_job = amdgpu_job_run,
};
int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_ib *ibs,
unsigned num_ibs,
int (*free_job)(struct amdgpu_job *),
void *owner,
struct fence **f)
{
int r = 0;
if (amdgpu_enable_scheduler) {
struct amdgpu_job *job =
kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL);
if (!job)
return -ENOMEM;
job->base.sched = &ring->sched;
job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity;
job->base.s_fence = amd_sched_fence_create(job->base.s_entity, owner);
if (!job->base.s_fence) {
kfree(job);
return -ENOMEM;
}
*f = fence_get(&job->base.s_fence->base);
job->adev = adev;
job->ibs = ibs;
job->num_ibs = num_ibs;
job->owner = owner;
job->free_job = free_job;
amd_sched_entity_push_job(&job->base);
} else {
r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner);
if (r)
return r;
*f = fence_get(&ibs[num_ibs - 1].fence->base);
}
return 0;
}
......@@ -447,8 +447,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
dev_info.max_memory_clock = adev->pm.default_mclk * 10;
}
dev_info.enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask;
dev_info.num_rb_pipes = adev->gfx.config.max_backends_per_se *
adev->gfx.config.max_shader_engines;
dev_info.num_rb_pipes = adev->gfx.config.num_rbs;
dev_info.num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts;
dev_info._pad = 0;
dev_info.ids_flags = 0;
......@@ -727,6 +726,12 @@ int amdgpu_get_vblank_timestamp_kms(struct drm_device *dev, unsigned int pipe,
/* Get associated drm_crtc: */
crtc = &adev->mode_info.crtcs[pipe]->base;
if (!crtc) {
/* This can occur on driver load if some component fails to
* initialize completely and driver is unloaded */
DRM_ERROR("Uninitialized crtc %d\n", pipe);
return -EINVAL;
}
/* Helper routine in DRM core does all the work: */
return drm_calc_vbltimestamp_from_scanoutpos(dev, pipe, max_error,
......
......@@ -48,8 +48,7 @@ struct amdgpu_mn {
/* protected by adev->mn_lock */
struct hlist_node node;
/* objects protected by lock */
struct mutex lock;
/* objects protected by mm->mmap_sem */
struct rb_root objects;
};
......@@ -73,21 +72,19 @@ static void amdgpu_mn_destroy(struct work_struct *work)
struct amdgpu_bo *bo, *next_bo;
mutex_lock(&adev->mn_lock);
mutex_lock(&rmn->lock);
down_write(&rmn->mm->mmap_sem);
hash_del(&rmn->node);
rbtree_postorder_for_each_entry_safe(node, next_node, &rmn->objects,
it.rb) {
interval_tree_remove(&node->it, &rmn->objects);
list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) {
bo->mn = NULL;
list_del_init(&bo->mn_list);
}
kfree(node);
}
mutex_unlock(&rmn->lock);
up_write(&rmn->mm->mmap_sem);
mutex_unlock(&adev->mn_lock);
mmu_notifier_unregister(&rmn->mn, rmn->mm);
mmu_notifier_unregister_no_release(&rmn->mn, rmn->mm);
kfree(rmn);
}
......@@ -129,8 +126,6 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
/* notification is exclusive, but interval is inclusive */
end -= 1;
mutex_lock(&rmn->lock);
it = interval_tree_iter_first(&rmn->objects, start, end);
while (it) {
struct amdgpu_mn_node *node;
......@@ -142,7 +137,8 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
list_for_each_entry(bo, &node->bos, mn_list) {
if (!bo->tbo.ttm || bo->tbo.ttm->state != tt_bound)
if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start,
end))
continue;
r = amdgpu_bo_reserve(bo, true);
......@@ -164,8 +160,6 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
amdgpu_bo_unreserve(bo);
}
}
mutex_unlock(&rmn->lock);
}
static const struct mmu_notifier_ops amdgpu_mn_ops = {
......@@ -186,8 +180,8 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
struct amdgpu_mn *rmn;
int r;
down_write(&mm->mmap_sem);
mutex_lock(&adev->mn_lock);
down_write(&mm->mmap_sem);
hash_for_each_possible(adev->mn_hash, rmn, node, (unsigned long)mm)
if (rmn->mm == mm)
......@@ -202,7 +196,6 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
rmn->adev = adev;
rmn->mm = mm;
rmn->mn.ops = &amdgpu_mn_ops;
mutex_init(&rmn->lock);
rmn->objects = RB_ROOT;
r = __mmu_notifier_register(&rmn->mn, mm);
......@@ -212,14 +205,14 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
hash_add(adev->mn_hash, &rmn->node, (unsigned long)mm);
release_locks:
mutex_unlock(&adev->mn_lock);
up_write(&mm->mmap_sem);
mutex_unlock(&adev->mn_lock);
return rmn;
free_rmn:
mutex_unlock(&adev->mn_lock);
up_write(&mm->mmap_sem);
mutex_unlock(&adev->mn_lock);
kfree(rmn);
return ERR_PTR(r);
......@@ -249,7 +242,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
INIT_LIST_HEAD(&bos);
mutex_lock(&rmn->lock);
down_write(&rmn->mm->mmap_sem);
while ((it = interval_tree_iter_first(&rmn->objects, addr, end))) {
kfree(node);
......@@ -263,7 +256,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
if (!node) {
node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL);
if (!node) {
mutex_unlock(&rmn->lock);
up_write(&rmn->mm->mmap_sem);
return -ENOMEM;
}
}
......@@ -278,7 +271,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
interval_tree_insert(&node->it, &rmn->objects);
mutex_unlock(&rmn->lock);
up_write(&rmn->mm->mmap_sem);
return 0;
}
......@@ -297,13 +290,15 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
struct list_head *head;
mutex_lock(&adev->mn_lock);
rmn = bo->mn;
if (rmn == NULL) {
mutex_unlock(&adev->mn_lock);
return;
}
mutex_lock(&rmn->lock);
down_write(&rmn->mm->mmap_sem);
/* save the next list entry for later */
head = bo->mn_list.next;
......@@ -317,6 +312,6 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
kfree(node);
}
mutex_unlock(&rmn->lock);
up_write(&rmn->mm->mmap_sem);
mutex_unlock(&adev->mn_lock);
}
......@@ -390,7 +390,6 @@ struct amdgpu_crtc {
struct drm_display_mode native_mode;
u32 pll_id;
/* page flipping */
struct workqueue_struct *pflip_queue;
struct amdgpu_flip_work *pflip_works;
enum amdgpu_flip_status pflip_status;
int deferred_flip_completion;
......
......@@ -97,9 +97,6 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
amdgpu_update_memory_usage(bo->adev, &bo->tbo.mem, NULL);
mutex_lock(&bo->adev->gem.mutex);
list_del_init(&bo->list);
mutex_unlock(&bo->adev->gem.mutex);
drm_gem_object_release(&bo->gem_base);
amdgpu_bo_unref(&bo->parent);
kfree(bo->metadata);
......@@ -254,12 +251,15 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
bo->adev = adev;
INIT_LIST_HEAD(&bo->list);
INIT_LIST_HEAD(&bo->va);
bo->initial_domain = domain & (AMDGPU_GEM_DOMAIN_VRAM |
AMDGPU_GEM_DOMAIN_GTT |
AMDGPU_GEM_DOMAIN_CPU |
AMDGPU_GEM_DOMAIN_GDS |
AMDGPU_GEM_DOMAIN_GWS |
AMDGPU_GEM_DOMAIN_OA);
bo->prefered_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM |
AMDGPU_GEM_DOMAIN_GTT |
AMDGPU_GEM_DOMAIN_CPU |
AMDGPU_GEM_DOMAIN_GDS |
AMDGPU_GEM_DOMAIN_GWS |
AMDGPU_GEM_DOMAIN_OA);
bo->allowed_domains = bo->prefered_domains;
if (!kernel && bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
bo->flags = flags;
......@@ -367,7 +367,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
int r, i;
unsigned fpfn, lpfn;
if (amdgpu_ttm_tt_has_userptr(bo->tbo.ttm))
if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
return -EPERM;
if (WARN_ON_ONCE(min_offset > max_offset))
......@@ -470,26 +470,6 @@ int amdgpu_bo_evict_vram(struct amdgpu_device *adev)
return ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_VRAM);
}
void amdgpu_bo_force_delete(struct amdgpu_device *adev)
{
struct amdgpu_bo *bo, *n;
if (list_empty(&adev->gem.objects)) {
return;
}
dev_err(adev->dev, "Userspace still has active objects !\n");
list_for_each_entry_safe(bo, n, &adev->gem.objects, list) {
dev_err(adev->dev, "%p %p %lu %lu force free\n",
&bo->gem_base, bo, (unsigned long)bo->gem_base.size,
*((unsigned long *)&bo->gem_base.refcount));
mutex_lock(&bo->adev->gem.mutex);
list_del_init(&bo->list);
mutex_unlock(&bo->adev->gem.mutex);
/* this should unref the ttm bo */
drm_gem_object_unreference_unlocked(&bo->gem_base);
}
}
int amdgpu_bo_init(struct amdgpu_device *adev)
{
/* Add an MTRR for the VRAM */
......
......@@ -149,7 +149,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
u64 *gpu_addr);
int amdgpu_bo_unpin(struct amdgpu_bo *bo);
int amdgpu_bo_evict_vram(struct amdgpu_device *adev);
void amdgpu_bo_force_delete(struct amdgpu_device *adev);
int amdgpu_bo_init(struct amdgpu_device *adev);
void amdgpu_bo_fini(struct amdgpu_device *adev);
int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
......
......@@ -119,7 +119,9 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev,
level = amdgpu_dpm_get_performance_level(adev);
return snprintf(buf, PAGE_SIZE, "%s\n",
(level == AMD_DPM_FORCED_LEVEL_AUTO) ? "auto" :
(level == AMD_DPM_FORCED_LEVEL_LOW) ? "low" : "high");
(level == AMD_DPM_FORCED_LEVEL_LOW) ? "low" :
(level == AMD_DPM_FORCED_LEVEL_HIGH) ? "high" :
(level == AMD_DPM_FORCED_LEVEL_MANUAL) ? "manual" : "unknown");
} else {
enum amdgpu_dpm_forced_level level;
......@@ -146,6 +148,8 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
level = AMDGPU_DPM_FORCED_LEVEL_HIGH;
} else if (strncmp("auto", buf, strlen("auto")) == 0) {
level = AMDGPU_DPM_FORCED_LEVEL_AUTO;
} else if (strncmp("manual", buf, strlen("manual")) == 0) {
level = AMDGPU_DPM_FORCED_LEVEL_MANUAL;
} else {
count = -EINVAL;
goto fail;
......@@ -172,10 +176,293 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
return count;
}
static ssize_t amdgpu_get_pp_num_states(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
struct pp_states_info data;
int i, buf_len;
if (adev->pp_enabled)
amdgpu_dpm_get_pp_num_states(adev, &data);
buf_len = snprintf(buf, PAGE_SIZE, "states: %d\n", data.nums);
for (i = 0; i < data.nums; i++)
buf_len += snprintf(buf + buf_len, PAGE_SIZE, "%d %s\n", i,
(data.states[i] == POWER_STATE_TYPE_INTERNAL_BOOT) ? "boot" :
(data.states[i] == POWER_STATE_TYPE_BATTERY) ? "battery" :
(data.states[i] == POWER_STATE_TYPE_BALANCED) ? "balanced" :
(data.states[i] == POWER_STATE_TYPE_PERFORMANCE) ? "performance" : "default");
return buf_len;
}
static ssize_t amdgpu_get_pp_cur_state(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
struct pp_states_info data;
enum amd_pm_state_type pm = 0;
int i = 0;
if (adev->pp_enabled) {
pm = amdgpu_dpm_get_current_power_state(adev);
amdgpu_dpm_get_pp_num_states(adev, &data);
for (i = 0; i < data.nums; i++) {
if (pm == data.states[i])
break;
}
if (i == data.nums)
i = -EINVAL;
}
return snprintf(buf, PAGE_SIZE, "%d\n", i);
}
static ssize_t amdgpu_get_pp_force_state(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
struct pp_states_info data;
enum amd_pm_state_type pm = 0;
int i;
if (adev->pp_force_state_enabled && adev->pp_enabled) {
pm = amdgpu_dpm_get_current_power_state(adev);
amdgpu_dpm_get_pp_num_states(adev, &data);
for (i = 0; i < data.nums; i++) {
if (pm == data.states[i])
break;
}
if (i == data.nums)
i = -EINVAL;
return snprintf(buf, PAGE_SIZE, "%d\n", i);
} else
return snprintf(buf, PAGE_SIZE, "\n");
}
static ssize_t amdgpu_set_pp_force_state(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
enum amd_pm_state_type state = 0;
long idx;
int ret;
if (strlen(buf) == 1)
adev->pp_force_state_enabled = false;
else {
ret = kstrtol(buf, 0, &idx);
if (ret) {
count = -EINVAL;
goto fail;
}
if (adev->pp_enabled) {
struct pp_states_info data;
amdgpu_dpm_get_pp_num_states(adev, &data);
state = data.states[idx];
/* only set user selected power states */
if (state != POWER_STATE_TYPE_INTERNAL_BOOT &&
state != POWER_STATE_TYPE_DEFAULT) {
amdgpu_dpm_dispatch_task(adev,
AMD_PP_EVENT_ENABLE_USER_STATE, &state, NULL);
adev->pp_force_state_enabled = true;
}
}
}
fail:
return count;
}
static ssize_t amdgpu_get_pp_table(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
char *table = NULL;
int size, i;
if (adev->pp_enabled)
size = amdgpu_dpm_get_pp_table(adev, &table);
else
return 0;
if (size >= PAGE_SIZE)
size = PAGE_SIZE - 1;
for (i = 0; i < size; i++) {
sprintf(buf + i, "%02x", table[i]);
}
sprintf(buf + i, "\n");
return size;
}
static ssize_t amdgpu_set_pp_table(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
if (adev->pp_enabled)
amdgpu_dpm_set_pp_table(adev, buf, count);
return count;
}
static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
ssize_t size = 0;
if (adev->pp_enabled)
size = amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf);
return size;
}
static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
int ret;
long level;
ret = kstrtol(buf, 0, &level);
if (ret) {
count = -EINVAL;
goto fail;
}
if (adev->pp_enabled)
amdgpu_dpm_force_clock_level(adev, PP_SCLK, level);
fail:
return count;
}
static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
ssize_t size = 0;
if (adev->pp_enabled)
size = amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf);
return size;
}
static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
int ret;
long level;
ret = kstrtol(buf, 0, &level);
if (ret) {
count = -EINVAL;
goto fail;
}
if (adev->pp_enabled)
amdgpu_dpm_force_clock_level(adev, PP_MCLK, level);
fail:
return count;
}
static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
ssize_t size = 0;
if (adev->pp_enabled)
size = amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf);
return size;
}
static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
int ret;
long level;
ret = kstrtol(buf, 0, &level);
if (ret) {
count = -EINVAL;
goto fail;
}
if (adev->pp_enabled)
amdgpu_dpm_force_clock_level(adev, PP_PCIE, level);
fail:
return count;
}
static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state);
static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR,
amdgpu_get_dpm_forced_performance_level,
amdgpu_set_dpm_forced_performance_level);
static DEVICE_ATTR(pp_num_states, S_IRUGO, amdgpu_get_pp_num_states, NULL);
static DEVICE_ATTR(pp_cur_state, S_IRUGO, amdgpu_get_pp_cur_state, NULL);
static DEVICE_ATTR(pp_force_state, S_IRUGO | S_IWUSR,
amdgpu_get_pp_force_state,
amdgpu_set_pp_force_state);
static DEVICE_ATTR(pp_table, S_IRUGO | S_IWUSR,
amdgpu_get_pp_table,
amdgpu_set_pp_table);
static DEVICE_ATTR(pp_dpm_sclk, S_IRUGO | S_IWUSR,
amdgpu_get_pp_dpm_sclk,
amdgpu_set_pp_dpm_sclk);
static DEVICE_ATTR(pp_dpm_mclk, S_IRUGO | S_IWUSR,
amdgpu_get_pp_dpm_mclk,
amdgpu_set_pp_dpm_mclk);
static DEVICE_ATTR(pp_dpm_pcie, S_IRUGO | S_IWUSR,
amdgpu_get_pp_dpm_pcie,
amdgpu_set_pp_dpm_pcie);
static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
struct device_attribute *attr,
......@@ -623,14 +910,12 @@ static void amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)
amdgpu_dpm_print_power_state(adev, adev->pm.dpm.requested_ps);
}
mutex_lock(&adev->ring_lock);
/* update whether vce is active */
ps->vce_active = adev->pm.dpm.vce_active;
ret = amdgpu_dpm_pre_set_power_state(adev);
if (ret)
goto done;
return;
/* update display watermarks based on new power state */
amdgpu_display_bandwidth_update(adev);
......@@ -667,9 +952,6 @@ static void amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)
amdgpu_dpm_force_performance_level(adev, adev->pm.dpm.forced_level);
}
}
done:
mutex_unlock(&adev->ring_lock);
}
void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)
......@@ -770,6 +1052,44 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
DRM_ERROR("failed to create device file for dpm state\n");
return ret;
}
if (adev->pp_enabled) {
ret = device_create_file(adev->dev, &dev_attr_pp_num_states);
if (ret) {
DRM_ERROR("failed to create device file pp_num_states\n");
return ret;
}
ret = device_create_file(adev->dev, &dev_attr_pp_cur_state);
if (ret) {
DRM_ERROR("failed to create device file pp_cur_state\n");
return ret;
}
ret = device_create_file(adev->dev, &dev_attr_pp_force_state);
if (ret) {
DRM_ERROR("failed to create device file pp_force_state\n");
return ret;
}
ret = device_create_file(adev->dev, &dev_attr_pp_table);
if (ret) {
DRM_ERROR("failed to create device file pp_table\n");
return ret;
}
ret = device_create_file(adev->dev, &dev_attr_pp_dpm_sclk);
if (ret) {
DRM_ERROR("failed to create device file pp_dpm_sclk\n");
return ret;
}
ret = device_create_file(adev->dev, &dev_attr_pp_dpm_mclk);
if (ret) {
DRM_ERROR("failed to create device file pp_dpm_mclk\n");
return ret;
}
ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie);
if (ret) {
DRM_ERROR("failed to create device file pp_dpm_pcie\n");
return ret;
}
}
ret = amdgpu_debugfs_pm_init(adev);
if (ret) {
DRM_ERROR("Failed to register debugfs file for dpm!\n");
......@@ -787,6 +1107,15 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
hwmon_device_unregister(adev->pm.int_hwmon_dev);
device_remove_file(adev->dev, &dev_attr_power_dpm_state);
device_remove_file(adev->dev, &dev_attr_power_dpm_force_performance_level);
if (adev->pp_enabled) {
device_remove_file(adev->dev, &dev_attr_pp_num_states);
device_remove_file(adev->dev, &dev_attr_pp_cur_state);
device_remove_file(adev->dev, &dev_attr_pp_force_state);
device_remove_file(adev->dev, &dev_attr_pp_table);
device_remove_file(adev->dev, &dev_attr_pp_dpm_sclk);
device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk);
device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie);
}
}
void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
......@@ -802,13 +1131,11 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
int i = 0;
amdgpu_display_bandwidth_update(adev);
mutex_lock(&adev->ring_lock);
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
if (ring && ring->ready)
amdgpu_fence_wait_empty(ring);
}
mutex_unlock(&adev->ring_lock);
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
if (ring && ring->ready)
amdgpu_fence_wait_empty(ring);
}
amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_DISPLAY_CONFIG_CHANGE, NULL, NULL);
} else {
......
......@@ -73,10 +73,6 @@ struct drm_gem_object *amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
if (ret)
return ERR_PTR(ret);
mutex_lock(&adev->gem.mutex);
list_add_tail(&bo->list, &adev->gem.objects);
mutex_unlock(&adev->gem.mutex);
return &bo->gem_base;
}
......@@ -121,7 +117,7 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
{
struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
if (amdgpu_ttm_tt_has_userptr(bo->tbo.ttm))
if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
return ERR_PTR(-EPERM);
return drm_gem_prime_export(dev, gobj, flags);
......
......@@ -48,28 +48,6 @@
*/
static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring);
/**
* amdgpu_ring_free_size - update the free size
*
* @adev: amdgpu_device pointer
* @ring: amdgpu_ring structure holding ring information
*
* Update the free dw slots in the ring buffer (all asics).
*/
void amdgpu_ring_free_size(struct amdgpu_ring *ring)
{
uint32_t rptr = amdgpu_ring_get_rptr(ring);
/* This works because ring_size is a power of 2 */
ring->ring_free_dw = rptr + (ring->ring_size / 4);
ring->ring_free_dw -= ring->wptr;
ring->ring_free_dw &= ring->ptr_mask;
if (!ring->ring_free_dw) {
/* this is an empty ring */
ring->ring_free_dw = ring->ring_size / 4;
}
}
/**
* amdgpu_ring_alloc - allocate space on the ring buffer
*
......@@ -82,50 +60,18 @@ void amdgpu_ring_free_size(struct amdgpu_ring *ring)
*/
int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw)
{
int r;
/* make sure we aren't trying to allocate more space than there is on the ring */
if (ndw > (ring->ring_size / 4))
return -ENOMEM;
/* Align requested size with padding so unlock_commit can
* pad safely */
amdgpu_ring_free_size(ring);
ndw = (ndw + ring->align_mask) & ~ring->align_mask;
while (ndw > (ring->ring_free_dw - 1)) {
amdgpu_ring_free_size(ring);
if (ndw < ring->ring_free_dw) {
break;
}
r = amdgpu_fence_wait_next(ring);
if (r)
return r;
}
ring->count_dw = ndw;
ring->wptr_old = ring->wptr;
return 0;
}
/**
* amdgpu_ring_lock - lock the ring and allocate space on it
*
* @adev: amdgpu_device pointer
* @ring: amdgpu_ring structure holding ring information
* @ndw: number of dwords to allocate in the ring buffer
*
* Lock the ring and allocate @ndw dwords in the ring buffer
* (all asics).
* Returns 0 on success, error on failure.
*/
int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw)
{
int r;
/* Make sure we aren't trying to allocate more space
* than the maximum for one submission
*/
if (WARN_ON_ONCE(ndw > ring->max_dw))
return -ENOMEM;
mutex_lock(ring->ring_lock);
r = amdgpu_ring_alloc(ring, ndw);
if (r) {
mutex_unlock(ring->ring_lock);
return r;
}
ring->count_dw = ndw;
ring->wptr_old = ring->wptr;
return 0;
}
......@@ -144,6 +90,19 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
amdgpu_ring_write(ring, ring->nop);
}
/** amdgpu_ring_generic_pad_ib - pad IB with NOP packets
*
* @ring: amdgpu_ring structure holding ring information
* @ib: IB to add NOP packets to
*
* This is the generic pad_ib function for rings except SDMA
*/
void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
{
while (ib->length_dw & ring->align_mask)
ib->ptr[ib->length_dw++] = ring->nop;
}
/**
* amdgpu_ring_commit - tell the GPU to execute the new
* commands on the ring buffer
......@@ -167,20 +126,6 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
amdgpu_ring_set_wptr(ring);
}
/**
* amdgpu_ring_unlock_commit - tell the GPU to execute the new
* commands on the ring buffer and unlock it
*
* @ring: amdgpu_ring structure holding ring information
*
* Call amdgpu_ring_commit() then unlock the ring (all asics).
*/
void amdgpu_ring_unlock_commit(struct amdgpu_ring *ring)
{
amdgpu_ring_commit(ring);
mutex_unlock(ring->ring_lock);
}
/**
* amdgpu_ring_undo - reset the wptr
*
......@@ -193,19 +138,6 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring)
ring->wptr = ring->wptr_old;
}
/**
* amdgpu_ring_unlock_undo - reset the wptr and unlock the ring
*
* @ring: amdgpu_ring structure holding ring information
*
* Call amdgpu_ring_undo() then unlock the ring (all asics).
*/
void amdgpu_ring_unlock_undo(struct amdgpu_ring *ring)
{
amdgpu_ring_undo(ring);
mutex_unlock(ring->ring_lock);
}
/**
* amdgpu_ring_backup - Back up the content of a ring
*
......@@ -218,43 +150,32 @@ unsigned amdgpu_ring_backup(struct amdgpu_ring *ring,
{
unsigned size, ptr, i;
/* just in case lock the ring */
mutex_lock(ring->ring_lock);
*data = NULL;
if (ring->ring_obj == NULL) {
mutex_unlock(ring->ring_lock);
if (ring->ring_obj == NULL)
return 0;
}
/* it doesn't make sense to save anything if all fences are signaled */
if (!amdgpu_fence_count_emitted(ring)) {
mutex_unlock(ring->ring_lock);
if (!amdgpu_fence_count_emitted(ring))
return 0;
}
ptr = le32_to_cpu(*ring->next_rptr_cpu_addr);
size = ring->wptr + (ring->ring_size / 4);
size -= ptr;
size &= ring->ptr_mask;
if (size == 0) {
mutex_unlock(ring->ring_lock);
if (size == 0)
return 0;
}
/* and then save the content of the ring */
*data = kmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
if (!*data) {
mutex_unlock(ring->ring_lock);
if (!*data)
return 0;
}
for (i = 0; i < size; ++i) {
(*data)[i] = ring->ring[ptr++];
ptr &= ring->ptr_mask;
}
mutex_unlock(ring->ring_lock);
return size;
}
......@@ -276,7 +197,7 @@ int amdgpu_ring_restore(struct amdgpu_ring *ring,
return 0;
/* restore the saved ring content */
r = amdgpu_ring_lock(ring, size);
r = amdgpu_ring_alloc(ring, size);
if (r)
return r;
......@@ -284,7 +205,7 @@ int amdgpu_ring_restore(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, data[i]);
}
amdgpu_ring_unlock_commit(ring);
amdgpu_ring_commit(ring);
kfree(data);
return 0;
}
......@@ -352,7 +273,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
return r;
}
ring->ring_lock = &adev->ring_lock;
/* Align ring size */
rb_bufsz = order_base_2(ring_size / 8);
ring_size = (1 << (rb_bufsz + 1)) * 4;
......@@ -389,7 +309,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
}
}
ring->ptr_mask = (ring->ring_size / 4) - 1;
ring->ring_free_dw = ring->ring_size / 4;
ring->max_dw = DIV_ROUND_UP(ring->ring_size / 4,
amdgpu_sched_hw_submission);
if (amdgpu_debugfs_ring_init(adev, ring)) {
DRM_ERROR("Failed to register debugfs file for rings !\n");
......@@ -410,15 +331,10 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
int r;
struct amdgpu_bo *ring_obj;
if (ring->ring_lock == NULL)
return;
mutex_lock(ring->ring_lock);
ring_obj = ring->ring_obj;
ring->ready = false;
ring->ring = NULL;
ring->ring_obj = NULL;
mutex_unlock(ring->ring_lock);
amdgpu_wb_free(ring->adev, ring->fence_offs);
amdgpu_wb_free(ring->adev, ring->rptr_offs);
......@@ -474,29 +390,18 @@ static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data)
struct amdgpu_ring *ring = (void *)(((uint8_t*)adev) + roffset);
uint32_t rptr, wptr, rptr_next;
unsigned count, i, j;
amdgpu_ring_free_size(ring);
count = (ring->ring_size / 4) - ring->ring_free_dw;
unsigned i;
wptr = amdgpu_ring_get_wptr(ring);
seq_printf(m, "wptr: 0x%08x [%5d]\n",
wptr, wptr);
seq_printf(m, "wptr: 0x%08x [%5d]\n", wptr, wptr);
rptr = amdgpu_ring_get_rptr(ring);
seq_printf(m, "rptr: 0x%08x [%5d]\n",
rptr, rptr);
rptr_next = le32_to_cpu(*ring->next_rptr_cpu_addr);
seq_printf(m, "rptr: 0x%08x [%5d]\n", rptr, rptr);
seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n",
ring->wptr, ring->wptr);
seq_printf(m, "last semaphore signal addr : 0x%016llx\n",
ring->last_semaphore_signal_addr);
seq_printf(m, "last semaphore wait addr : 0x%016llx\n",
ring->last_semaphore_wait_addr);
seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw);
seq_printf(m, "%u dwords in ring\n", count);
if (!ring->ready)
return 0;
......@@ -505,11 +410,20 @@ static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data)
* packet that is the root issue
*/
i = (rptr + ring->ptr_mask + 1 - 32) & ring->ptr_mask;
for (j = 0; j <= (count + 32); j++) {
while (i != rptr) {
seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]);
if (i == rptr)
seq_puts(m, " *");
if (i == rptr_next)
seq_puts(m, " #");
seq_puts(m, "\n");
i = (i + 1) & ring->ptr_mask;
}
while (i != wptr) {
seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]);
if (rptr == i)
if (i == rptr)
seq_puts(m, " *");
if (rptr_next == i)
if (i == rptr_next)
seq_puts(m, " #");
seq_puts(m, "\n");
i = (i + 1) & ring->ptr_mask;
......
......@@ -321,8 +321,11 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
int i, r;
signed long t;
BUG_ON(align > sa_manager->align);
BUG_ON(size > sa_manager->size);
if (WARN_ON_ONCE(align > sa_manager->align))
return -EINVAL;
if (WARN_ON_ONCE(size > sa_manager->size))
return -EINVAL;
*sa_bo = kmalloc(sizeof(struct amdgpu_sa_bo), GFP_KERNEL);
if ((*sa_bo) == NULL) {
......
/*
* Copyright 2011 Christian König.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
*/
/*
* Authors:
* Christian König <deathsimple@vodafone.de>
*/
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
int amdgpu_semaphore_create(struct amdgpu_device *adev,
struct amdgpu_semaphore **semaphore)
{
int r;
*semaphore = kmalloc(sizeof(struct amdgpu_semaphore), GFP_KERNEL);
if (*semaphore == NULL) {
return -ENOMEM;
}
r = amdgpu_sa_bo_new(&adev->ring_tmp_bo,
&(*semaphore)->sa_bo, 8, 8);
if (r) {
kfree(*semaphore);
*semaphore = NULL;
return r;
}
(*semaphore)->waiters = 0;
(*semaphore)->gpu_addr = amdgpu_sa_bo_gpu_addr((*semaphore)->sa_bo);
*((uint64_t *)amdgpu_sa_bo_cpu_addr((*semaphore)->sa_bo)) = 0;
return 0;
}
bool amdgpu_semaphore_emit_signal(struct amdgpu_ring *ring,
struct amdgpu_semaphore *semaphore)
{
trace_amdgpu_semaphore_signale(ring->idx, semaphore);
if (amdgpu_ring_emit_semaphore(ring, semaphore, false)) {
--semaphore->waiters;
/* for debugging lockup only, used by sysfs debug files */
ring->last_semaphore_signal_addr = semaphore->gpu_addr;
return true;
}
return false;
}
bool amdgpu_semaphore_emit_wait(struct amdgpu_ring *ring,
struct amdgpu_semaphore *semaphore)
{
trace_amdgpu_semaphore_wait(ring->idx, semaphore);
if (amdgpu_ring_emit_semaphore(ring, semaphore, true)) {
++semaphore->waiters;
/* for debugging lockup only, used by sysfs debug files */
ring->last_semaphore_wait_addr = semaphore->gpu_addr;
return true;
}
return false;
}
void amdgpu_semaphore_free(struct amdgpu_device *adev,
struct amdgpu_semaphore **semaphore,
struct fence *fence)
{
if (semaphore == NULL || *semaphore == NULL) {
return;
}
if ((*semaphore)->waiters > 0) {
dev_err(adev->dev, "semaphore %p has more waiters than signalers,"
" hardware lockup imminent!\n", *semaphore);
}
amdgpu_sa_bo_free(adev, &(*semaphore)->sa_bo, fence);
kfree(*semaphore);
*semaphore = NULL;
}
......@@ -46,14 +46,6 @@ struct amdgpu_sync_entry {
*/
void amdgpu_sync_create(struct amdgpu_sync *sync)
{
unsigned i;
for (i = 0; i < AMDGPU_NUM_SYNCS; ++i)
sync->semaphores[i] = NULL;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
sync->sync_to[i] = NULL;
hash_init(sync->fences);
sync->last_vm_update = NULL;
}
......@@ -107,7 +99,6 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
struct fence *f)
{
struct amdgpu_sync_entry *e;
struct amdgpu_fence *fence;
if (!f)
return 0;
......@@ -116,27 +107,20 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
amdgpu_sync_test_owner(f, AMDGPU_FENCE_OWNER_VM))
amdgpu_sync_keep_later(&sync->last_vm_update, f);
fence = to_amdgpu_fence(f);
if (!fence || fence->ring->adev != adev) {
hash_for_each_possible(sync->fences, e, node, f->context) {
if (unlikely(e->fence->context != f->context))
continue;
amdgpu_sync_keep_later(&e->fence, f);
return 0;
}
e = kmalloc(sizeof(struct amdgpu_sync_entry), GFP_KERNEL);
if (!e)
return -ENOMEM;
hash_for_each_possible(sync->fences, e, node, f->context) {
if (unlikely(e->fence->context != f->context))
continue;
hash_add(sync->fences, &e->node, f->context);
e->fence = fence_get(f);
amdgpu_sync_keep_later(&e->fence, f);
return 0;
}
amdgpu_sync_keep_later(&sync->sync_to[fence->ring->idx], f);
e = kmalloc(sizeof(struct amdgpu_sync_entry), GFP_KERNEL);
if (!e)
return -ENOMEM;
hash_add(sync->fences, &e->node, f->context);
e->fence = fence_get(f);
return 0;
}
......@@ -153,13 +137,13 @@ static void *amdgpu_sync_get_owner(struct fence *f)
}
/**
* amdgpu_sync_resv - use the semaphores to sync to a reservation object
* amdgpu_sync_resv - sync to a reservation object
*
* @sync: sync object to add fences from reservation object to
* @resv: reservation object with embedded fence
* @shared: true if we should only sync to the exclusive fence
*
* Sync to the fence using the semaphore objects
* Sync to the fence
*/
int amdgpu_sync_resv(struct amdgpu_device *adev,
struct amdgpu_sync *sync,
......@@ -250,123 +234,17 @@ int amdgpu_sync_wait(struct amdgpu_sync *sync)
kfree(e);
}
if (amdgpu_enable_semaphores)
return 0;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct fence *fence = sync->sync_to[i];
if (!fence)
continue;
r = fence_wait(fence, false);
if (r)
return r;
}
return 0;
}
/**
* amdgpu_sync_rings - sync ring to all registered fences
*
* @sync: sync object to use
* @ring: ring that needs sync
*
* Ensure that all registered fences are signaled before letting
* the ring continue. The caller must hold the ring lock.
*/
int amdgpu_sync_rings(struct amdgpu_sync *sync,
struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
unsigned count = 0;
int i, r;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *other = adev->rings[i];
struct amdgpu_semaphore *semaphore;
struct amdgpu_fence *fence;
if (!sync->sync_to[i])
continue;
fence = to_amdgpu_fence(sync->sync_to[i]);
/* check if we really need to sync */
if (!amdgpu_enable_scheduler &&
!amdgpu_fence_need_sync(fence, ring))
continue;
/* prevent GPU deadlocks */
if (!other->ready) {
dev_err(adev->dev, "Syncing to a disabled ring!");
return -EINVAL;
}
if (amdgpu_enable_scheduler || !amdgpu_enable_semaphores) {
r = fence_wait(sync->sync_to[i], true);
if (r)
return r;
continue;
}
if (count >= AMDGPU_NUM_SYNCS) {
/* not enough room, wait manually */
r = fence_wait(&fence->base, false);
if (r)
return r;
continue;
}
r = amdgpu_semaphore_create(adev, &semaphore);
if (r)
return r;
sync->semaphores[count++] = semaphore;
/* allocate enough space for sync command */
r = amdgpu_ring_alloc(other, 16);
if (r)
return r;
/* emit the signal semaphore */
if (!amdgpu_semaphore_emit_signal(other, semaphore)) {
/* signaling wasn't successful wait manually */
amdgpu_ring_undo(other);
r = fence_wait(&fence->base, false);
if (r)
return r;
continue;
}
/* we assume caller has already allocated space on waiters ring */
if (!amdgpu_semaphore_emit_wait(ring, semaphore)) {
/* waiting wasn't successful wait manually */
amdgpu_ring_undo(other);
r = fence_wait(&fence->base, false);
if (r)
return r;
continue;
}
amdgpu_ring_commit(other);
amdgpu_fence_note_sync(fence, ring);
}
return 0;
}
/**
* amdgpu_sync_free - free the sync object
*
* @adev: amdgpu_device pointer
* @sync: sync object to use
* @fence: fence to use for the free
*
* Free the sync object by freeing all semaphores in it.
* Free the sync object.
*/
void amdgpu_sync_free(struct amdgpu_device *adev,
struct amdgpu_sync *sync,
struct fence *fence)
void amdgpu_sync_free(struct amdgpu_sync *sync)
{
struct amdgpu_sync_entry *e;
struct hlist_node *tmp;
......@@ -378,11 +256,5 @@ void amdgpu_sync_free(struct amdgpu_device *adev,
kfree(e);
}
for (i = 0; i < AMDGPU_NUM_SYNCS; ++i)
amdgpu_semaphore_free(adev, &sync->semaphores[i], fence);
for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
fence_put(sync->sync_to[i]);
fence_put(sync->last_vm_update);
}
......@@ -238,144 +238,10 @@ void amdgpu_test_moves(struct amdgpu_device *adev)
amdgpu_do_test_moves(adev);
}
static int amdgpu_test_create_and_emit_fence(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct fence **fence)
{
uint32_t handle = ring->idx ^ 0xdeafbeef;
int r;
if (ring == &adev->uvd.ring) {
r = amdgpu_uvd_get_create_msg(ring, handle, NULL);
if (r) {
DRM_ERROR("Failed to get dummy create msg\n");
return r;
}
r = amdgpu_uvd_get_destroy_msg(ring, handle, fence);
if (r) {
DRM_ERROR("Failed to get dummy destroy msg\n");
return r;
}
} else if (ring == &adev->vce.ring[0] ||
ring == &adev->vce.ring[1]) {
r = amdgpu_vce_get_create_msg(ring, handle, NULL);
if (r) {
DRM_ERROR("Failed to get dummy create msg\n");
return r;
}
r = amdgpu_vce_get_destroy_msg(ring, handle, fence);
if (r) {
DRM_ERROR("Failed to get dummy destroy msg\n");
return r;
}
} else {
struct amdgpu_fence *a_fence = NULL;
r = amdgpu_ring_lock(ring, 64);
if (r) {
DRM_ERROR("Failed to lock ring A %d\n", ring->idx);
return r;
}
amdgpu_fence_emit(ring, AMDGPU_FENCE_OWNER_UNDEFINED, &a_fence);
amdgpu_ring_unlock_commit(ring);
*fence = &a_fence->base;
}
return 0;
}
void amdgpu_test_ring_sync(struct amdgpu_device *adev,
struct amdgpu_ring *ringA,
struct amdgpu_ring *ringB)
{
struct fence *fence1 = NULL, *fence2 = NULL;
struct amdgpu_semaphore *semaphore = NULL;
int r;
r = amdgpu_semaphore_create(adev, &semaphore);
if (r) {
DRM_ERROR("Failed to create semaphore\n");
goto out_cleanup;
}
r = amdgpu_ring_lock(ringA, 64);
if (r) {
DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
goto out_cleanup;
}
amdgpu_semaphore_emit_wait(ringA, semaphore);
amdgpu_ring_unlock_commit(ringA);
r = amdgpu_test_create_and_emit_fence(adev, ringA, &fence1);
if (r)
goto out_cleanup;
r = amdgpu_ring_lock(ringA, 64);
if (r) {
DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
goto out_cleanup;
}
amdgpu_semaphore_emit_wait(ringA, semaphore);
amdgpu_ring_unlock_commit(ringA);
r = amdgpu_test_create_and_emit_fence(adev, ringA, &fence2);
if (r)
goto out_cleanup;
mdelay(1000);
if (fence_is_signaled(fence1)) {
DRM_ERROR("Fence 1 signaled without waiting for semaphore.\n");
goto out_cleanup;
}
r = amdgpu_ring_lock(ringB, 64);
if (r) {
DRM_ERROR("Failed to lock ring B %p\n", ringB);
goto out_cleanup;
}
amdgpu_semaphore_emit_signal(ringB, semaphore);
amdgpu_ring_unlock_commit(ringB);
r = fence_wait(fence1, false);
if (r) {
DRM_ERROR("Failed to wait for sync fence 1\n");
goto out_cleanup;
}
mdelay(1000);
if (fence_is_signaled(fence2)) {
DRM_ERROR("Fence 2 signaled without waiting for semaphore.\n");
goto out_cleanup;
}
r = amdgpu_ring_lock(ringB, 64);
if (r) {
DRM_ERROR("Failed to lock ring B %p\n", ringB);
goto out_cleanup;
}
amdgpu_semaphore_emit_signal(ringB, semaphore);
amdgpu_ring_unlock_commit(ringB);
r = fence_wait(fence2, false);
if (r) {
DRM_ERROR("Failed to wait for sync fence 1\n");
goto out_cleanup;
}
out_cleanup:
amdgpu_semaphore_free(adev, &semaphore, NULL);
if (fence1)
fence_put(fence1);
if (fence2)
fence_put(fence2);
if (r)
printk(KERN_WARNING "Error while testing ring sync (%d).\n", r);
}
static void amdgpu_test_ring_sync2(struct amdgpu_device *adev,
......@@ -383,109 +249,6 @@ static void amdgpu_test_ring_sync2(struct amdgpu_device *adev,
struct amdgpu_ring *ringB,
struct amdgpu_ring *ringC)
{
struct fence *fenceA = NULL, *fenceB = NULL;
struct amdgpu_semaphore *semaphore = NULL;
bool sigA, sigB;
int i, r;
r = amdgpu_semaphore_create(adev, &semaphore);
if (r) {
DRM_ERROR("Failed to create semaphore\n");
goto out_cleanup;
}
r = amdgpu_ring_lock(ringA, 64);
if (r) {
DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
goto out_cleanup;
}
amdgpu_semaphore_emit_wait(ringA, semaphore);
amdgpu_ring_unlock_commit(ringA);
r = amdgpu_test_create_and_emit_fence(adev, ringA, &fenceA);
if (r)
goto out_cleanup;
r = amdgpu_ring_lock(ringB, 64);
if (r) {
DRM_ERROR("Failed to lock ring B %d\n", ringB->idx);
goto out_cleanup;
}
amdgpu_semaphore_emit_wait(ringB, semaphore);
amdgpu_ring_unlock_commit(ringB);
r = amdgpu_test_create_and_emit_fence(adev, ringB, &fenceB);
if (r)
goto out_cleanup;
mdelay(1000);
if (fence_is_signaled(fenceA)) {
DRM_ERROR("Fence A signaled without waiting for semaphore.\n");
goto out_cleanup;
}
if (fence_is_signaled(fenceB)) {
DRM_ERROR("Fence B signaled without waiting for semaphore.\n");
goto out_cleanup;
}
r = amdgpu_ring_lock(ringC, 64);
if (r) {
DRM_ERROR("Failed to lock ring B %p\n", ringC);
goto out_cleanup;
}
amdgpu_semaphore_emit_signal(ringC, semaphore);
amdgpu_ring_unlock_commit(ringC);
for (i = 0; i < 30; ++i) {
mdelay(100);
sigA = fence_is_signaled(fenceA);
sigB = fence_is_signaled(fenceB);
if (sigA || sigB)
break;
}
if (!sigA && !sigB) {
DRM_ERROR("Neither fence A nor B has been signaled\n");
goto out_cleanup;
} else if (sigA && sigB) {
DRM_ERROR("Both fence A and B has been signaled\n");
goto out_cleanup;
}
DRM_INFO("Fence %c was first signaled\n", sigA ? 'A' : 'B');
r = amdgpu_ring_lock(ringC, 64);
if (r) {
DRM_ERROR("Failed to lock ring B %p\n", ringC);
goto out_cleanup;
}
amdgpu_semaphore_emit_signal(ringC, semaphore);
amdgpu_ring_unlock_commit(ringC);
mdelay(1000);
r = fence_wait(fenceA, false);
if (r) {
DRM_ERROR("Failed to wait for sync fence A\n");
goto out_cleanup;
}
r = fence_wait(fenceB, false);
if (r) {
DRM_ERROR("Failed to wait for sync fence B\n");
goto out_cleanup;
}
out_cleanup:
amdgpu_semaphore_free(adev, &semaphore, NULL);
if (fenceA)
fence_put(fenceA);
if (fenceB)
fence_put(fenceB);
if (r)
printk(KERN_WARNING "Error while testing ring sync (%d).\n", r);
}
static bool amdgpu_test_sync_possible(struct amdgpu_ring *ringA,
......
......@@ -38,10 +38,10 @@ TRACE_EVENT(amdgpu_cs,
TP_fast_assign(
__entry->bo_list = p->bo_list;
__entry->ring = p->ibs[i].ring->idx;
__entry->dw = p->ibs[i].length_dw;
__entry->ring = p->job->ring->idx;
__entry->dw = p->job->ibs[i].length_dw;
__entry->fences = amdgpu_fence_count_emitted(
p->ibs[i].ring);
p->job->ring);
),
TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u",
__entry->bo_list, __entry->ring, __entry->dw,
......@@ -65,7 +65,7 @@ TRACE_EVENT(amdgpu_cs_ioctl,
__entry->sched_job = &job->base;
__entry->ib = job->ibs;
__entry->fence = &job->base.s_fence->base;
__entry->ring_name = job->ibs[0].ring->name;
__entry->ring_name = job->ring->name;
__entry->num_ibs = job->num_ibs;
),
TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u",
......@@ -90,7 +90,7 @@ TRACE_EVENT(amdgpu_sched_run_job,
__entry->sched_job = &job->base;
__entry->ib = job->ibs;
__entry->fence = &job->base.s_fence->base;
__entry->ring_name = job->ibs[0].ring->name;
__entry->ring_name = job->ring->name;
__entry->num_ibs = job->num_ibs;
),
TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u",
......@@ -100,18 +100,21 @@ TRACE_EVENT(amdgpu_sched_run_job,
TRACE_EVENT(amdgpu_vm_grab_id,
TP_PROTO(unsigned vmid, int ring),
TP_ARGS(vmid, ring),
TP_PROTO(struct amdgpu_vm *vm, unsigned vmid, int ring),
TP_ARGS(vm, vmid, ring),
TP_STRUCT__entry(
__field(struct amdgpu_vm *, vm)
__field(u32, vmid)
__field(u32, ring)
),
TP_fast_assign(
__entry->vm = vm;
__entry->vmid = vmid;
__entry->ring = ring;
),
TP_printk("vmid=%u, ring=%u", __entry->vmid, __entry->ring)
TP_printk("vm=%p, id=%u, ring=%u", __entry->vm, __entry->vmid,
__entry->ring)
);
TRACE_EVENT(amdgpu_vm_bo_map,
......@@ -247,42 +250,6 @@ TRACE_EVENT(amdgpu_bo_list_set,
TP_printk("list=%p, bo=%p", __entry->list, __entry->bo)
);
DECLARE_EVENT_CLASS(amdgpu_semaphore_request,
TP_PROTO(int ring, struct amdgpu_semaphore *sem),
TP_ARGS(ring, sem),
TP_STRUCT__entry(
__field(int, ring)
__field(signed, waiters)
__field(uint64_t, gpu_addr)
),
TP_fast_assign(
__entry->ring = ring;
__entry->waiters = sem->waiters;
__entry->gpu_addr = sem->gpu_addr;
),
TP_printk("ring=%u, waiters=%d, addr=%010Lx", __entry->ring,
__entry->waiters, __entry->gpu_addr)
);
DEFINE_EVENT(amdgpu_semaphore_request, amdgpu_semaphore_signale,
TP_PROTO(int ring, struct amdgpu_semaphore *sem),
TP_ARGS(ring, sem)
);
DEFINE_EVENT(amdgpu_semaphore_request, amdgpu_semaphore_wait,
TP_PROTO(int ring, struct amdgpu_semaphore *sem),
TP_ARGS(ring, sem)
);
#endif
/* This part must be outside protection */
......
......@@ -31,7 +31,7 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev);
int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
struct fence **fence);
int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
struct fence **fence);
bool direct, struct fence **fence);
void amdgpu_uvd_free_handles(struct amdgpu_device *adev,
struct drm_file *filp);
int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx);
......
......@@ -31,12 +31,9 @@ int amdgpu_vce_resume(struct amdgpu_device *adev);
int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
struct fence **fence);
int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
struct fence **fence);
bool direct, struct fence **fence);
void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
bool amdgpu_vce_ring_emit_semaphore(struct amdgpu_ring *ring,
struct amdgpu_semaphore *semaphore,
bool emit_wait);
void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
unsigned flags);
......
此差异已折叠。
......@@ -2670,7 +2670,6 @@ static void dce_v10_0_crtc_destroy(struct drm_crtc *crtc)
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
drm_crtc_cleanup(crtc);
destroy_workqueue(amdgpu_crtc->pflip_queue);
kfree(amdgpu_crtc);
}
......@@ -2890,7 +2889,6 @@ static int dce_v10_0_crtc_init(struct amdgpu_device *adev, int index)
drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256);
amdgpu_crtc->crtc_id = index;
amdgpu_crtc->pflip_queue = create_singlethread_workqueue("amdgpu-pageflip-queue");
adev->mode_info.crtcs[index] = amdgpu_crtc;
amdgpu_crtc->max_cursor_width = 128;
......@@ -3366,7 +3364,7 @@ static int dce_v10_0_pageflip_irq(struct amdgpu_device *adev,
spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id);
queue_work(amdgpu_crtc->pflip_queue, &works->unpin_work);
schedule_work(&works->unpin_work);
return 0;
}
......
......@@ -2661,7 +2661,6 @@ static void dce_v11_0_crtc_destroy(struct drm_crtc *crtc)
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
drm_crtc_cleanup(crtc);
destroy_workqueue(amdgpu_crtc->pflip_queue);
kfree(amdgpu_crtc);
}
......@@ -2881,7 +2880,6 @@ static int dce_v11_0_crtc_init(struct amdgpu_device *adev, int index)
drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256);
amdgpu_crtc->crtc_id = index;
amdgpu_crtc->pflip_queue = create_singlethread_workqueue("amdgpu-pageflip-queue");
adev->mode_info.crtcs[index] = amdgpu_crtc;
amdgpu_crtc->max_cursor_width = 128;
......@@ -3361,7 +3359,7 @@ static int dce_v11_0_pageflip_irq(struct amdgpu_device *adev,
spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id);
queue_work(amdgpu_crtc->pflip_queue, &works->unpin_work);
schedule_work(&works->unpin_work);
return 0;
}
......
......@@ -2582,7 +2582,6 @@ static void dce_v8_0_crtc_destroy(struct drm_crtc *crtc)
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
drm_crtc_cleanup(crtc);
destroy_workqueue(amdgpu_crtc->pflip_queue);
kfree(amdgpu_crtc);
}
......@@ -2809,7 +2808,6 @@ static int dce_v8_0_crtc_init(struct amdgpu_device *adev, int index)
drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256);
amdgpu_crtc->crtc_id = index;
amdgpu_crtc->pflip_queue = create_singlethread_workqueue("amdgpu-pageflip-queue");
adev->mode_info.crtcs[index] = amdgpu_crtc;
amdgpu_crtc->max_cursor_width = CIK_CURSOR_WIDTH;
......@@ -3375,7 +3373,7 @@ static int dce_v8_0_pageflip_irq(struct amdgpu_device *adev,
spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id);
queue_work(amdgpu_crtc->pflip_queue, &works->unpin_work);
schedule_work(&works->unpin_work);
return 0;
}
......
此差异已折叠。
......@@ -73,6 +73,7 @@ enum amd_ip_block_type {
AMD_IP_BLOCK_TYPE_SDMA,
AMD_IP_BLOCK_TYPE_UVD,
AMD_IP_BLOCK_TYPE_VCE,
AMD_IP_BLOCK_TYPE_ACP,
};
enum amd_clockgating_state {
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册