提交 898c2cb5 编写于 作者: C Christian König 提交者: Alex Deucher

drm/amdgpu: use scheduler fault instead of reset work

Signal a fault to the scheduler on an illegal instruction or register
access violation instead of kicking of the reset handler directly.
Signed-off-by: NChristian König <christian.koenig@amd.com>
Acked-by: NAndrey Grodzovsky <andrey.grodzovsky@amd.com>
Signed-off-by: NAlex Deucher <alexander.deucher@amd.com>
上级 2c498d1d
...@@ -830,7 +830,6 @@ struct amdgpu_device { ...@@ -830,7 +830,6 @@ struct amdgpu_device {
bool need_dma32; bool need_dma32;
bool need_swiotlb; bool need_swiotlb;
bool accel_working; bool accel_working;
struct work_struct reset_work;
struct notifier_block acpi_nb; struct notifier_block acpi_nb;
struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS]; struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS];
struct amdgpu_debugfs debugfs[AMDGPU_DEBUGFS_MAX_COMPONENTS]; struct amdgpu_debugfs debugfs[AMDGPU_DEBUGFS_MAX_COMPONENTS];
......
...@@ -93,23 +93,6 @@ static void amdgpu_hotplug_work_func(struct work_struct *work) ...@@ -93,23 +93,6 @@ static void amdgpu_hotplug_work_func(struct work_struct *work)
drm_helper_hpd_irq_event(dev); drm_helper_hpd_irq_event(dev);
} }
/**
* amdgpu_irq_reset_work_func - execute GPU reset
*
* @work: work struct pointer
*
* Execute scheduled GPU reset (Cayman+).
* This function is called when the IRQ handler thinks we need a GPU reset.
*/
static void amdgpu_irq_reset_work_func(struct work_struct *work)
{
struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
reset_work);
if (!amdgpu_sriov_vf(adev) && amdgpu_device_should_recover_gpu(adev))
amdgpu_device_gpu_recover(adev, NULL);
}
/** /**
* amdgpu_irq_disable_all - disable *all* interrupts * amdgpu_irq_disable_all - disable *all* interrupts
* *
...@@ -262,15 +245,12 @@ int amdgpu_irq_init(struct amdgpu_device *adev) ...@@ -262,15 +245,12 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
amdgpu_hotplug_work_func); amdgpu_hotplug_work_func);
} }
INIT_WORK(&adev->reset_work, amdgpu_irq_reset_work_func);
adev->irq.installed = true; adev->irq.installed = true;
r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq); r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq);
if (r) { if (r) {
adev->irq.installed = false; adev->irq.installed = false;
if (!amdgpu_device_has_dc_support(adev)) if (!amdgpu_device_has_dc_support(adev))
flush_work(&adev->hotplug_work); flush_work(&adev->hotplug_work);
cancel_work_sync(&adev->reset_work);
return r; return r;
} }
adev->ddev->max_vblank_count = 0x00ffffff; adev->ddev->max_vblank_count = 0x00ffffff;
...@@ -299,7 +279,6 @@ void amdgpu_irq_fini(struct amdgpu_device *adev) ...@@ -299,7 +279,6 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
pci_disable_msi(adev->pdev); pci_disable_msi(adev->pdev);
if (!amdgpu_device_has_dc_support(adev)) if (!amdgpu_device_has_dc_support(adev))
flush_work(&adev->hotplug_work); flush_work(&adev->hotplug_work);
cancel_work_sync(&adev->reset_work);
} }
for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) { for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
......
...@@ -1214,8 +1214,11 @@ static int cik_sdma_process_illegal_inst_irq(struct amdgpu_device *adev, ...@@ -1214,8 +1214,11 @@ static int cik_sdma_process_illegal_inst_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source, struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry) struct amdgpu_iv_entry *entry)
{ {
u8 instance_id;
DRM_ERROR("Illegal instruction in SDMA command stream\n"); DRM_ERROR("Illegal instruction in SDMA command stream\n");
schedule_work(&adev->reset_work); instance_id = (entry->ring_id & 0x3) >> 0;
drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched);
return 0; return 0;
} }
......
...@@ -3393,12 +3393,31 @@ static int gfx_v6_0_eop_irq(struct amdgpu_device *adev, ...@@ -3393,12 +3393,31 @@ static int gfx_v6_0_eop_irq(struct amdgpu_device *adev,
return 0; return 0;
} }
static void gfx_v6_0_fault(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
struct amdgpu_ring *ring;
switch (entry->ring_id) {
case 0:
ring = &adev->gfx.gfx_ring[0];
break;
case 1:
case 2:
ring = &adev->gfx.compute_ring[entry->ring_id - 1];
break;
default:
return;
}
drm_sched_fault(&ring->sched);
}
static int gfx_v6_0_priv_reg_irq(struct amdgpu_device *adev, static int gfx_v6_0_priv_reg_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source, struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry) struct amdgpu_iv_entry *entry)
{ {
DRM_ERROR("Illegal register access in command stream\n"); DRM_ERROR("Illegal register access in command stream\n");
schedule_work(&adev->reset_work); gfx_v6_0_fault(adev, entry);
return 0; return 0;
} }
...@@ -3407,7 +3426,7 @@ static int gfx_v6_0_priv_inst_irq(struct amdgpu_device *adev, ...@@ -3407,7 +3426,7 @@ static int gfx_v6_0_priv_inst_irq(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry) struct amdgpu_iv_entry *entry)
{ {
DRM_ERROR("Illegal instruction in command stream\n"); DRM_ERROR("Illegal instruction in command stream\n");
schedule_work(&adev->reset_work); gfx_v6_0_fault(adev, entry);
return 0; return 0;
} }
......
...@@ -4959,12 +4959,36 @@ static int gfx_v7_0_eop_irq(struct amdgpu_device *adev, ...@@ -4959,12 +4959,36 @@ static int gfx_v7_0_eop_irq(struct amdgpu_device *adev,
return 0; return 0;
} }
static void gfx_v7_0_fault(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
struct amdgpu_ring *ring;
u8 me_id, pipe_id;
int i;
me_id = (entry->ring_id & 0x0c) >> 2;
pipe_id = (entry->ring_id & 0x03) >> 0;
switch (me_id) {
case 0:
drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
break;
case 1:
case 2:
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
if ((ring->me == me_id) && (ring->pipe == pipe_id))
drm_sched_fault(&ring->sched);
}
break;
}
}
static int gfx_v7_0_priv_reg_irq(struct amdgpu_device *adev, static int gfx_v7_0_priv_reg_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source, struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry) struct amdgpu_iv_entry *entry)
{ {
DRM_ERROR("Illegal register access in command stream\n"); DRM_ERROR("Illegal register access in command stream\n");
schedule_work(&adev->reset_work); gfx_v7_0_fault(adev, entry);
return 0; return 0;
} }
...@@ -4974,7 +4998,7 @@ static int gfx_v7_0_priv_inst_irq(struct amdgpu_device *adev, ...@@ -4974,7 +4998,7 @@ static int gfx_v7_0_priv_inst_irq(struct amdgpu_device *adev,
{ {
DRM_ERROR("Illegal instruction in command stream\n"); DRM_ERROR("Illegal instruction in command stream\n");
// XXX soft reset the gfx block only // XXX soft reset the gfx block only
schedule_work(&adev->reset_work); gfx_v7_0_fault(adev, entry);
return 0; return 0;
} }
......
...@@ -6738,12 +6738,39 @@ static int gfx_v8_0_eop_irq(struct amdgpu_device *adev, ...@@ -6738,12 +6738,39 @@ static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
return 0; return 0;
} }
static void gfx_v8_0_fault(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
u8 me_id, pipe_id, queue_id;
struct amdgpu_ring *ring;
int i;
me_id = (entry->ring_id & 0x0c) >> 2;
pipe_id = (entry->ring_id & 0x03) >> 0;
queue_id = (entry->ring_id & 0x70) >> 4;
switch (me_id) {
case 0:
drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
break;
case 1:
case 2:
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
if (ring->me == me_id && ring->pipe == pipe_id &&
ring->queue == queue_id)
drm_sched_fault(&ring->sched);
}
break;
}
}
static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev, static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source, struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry) struct amdgpu_iv_entry *entry)
{ {
DRM_ERROR("Illegal register access in command stream\n"); DRM_ERROR("Illegal register access in command stream\n");
schedule_work(&adev->reset_work); gfx_v8_0_fault(adev, entry);
return 0; return 0;
} }
...@@ -6752,7 +6779,7 @@ static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev, ...@@ -6752,7 +6779,7 @@ static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry) struct amdgpu_iv_entry *entry)
{ {
DRM_ERROR("Illegal instruction in command stream\n"); DRM_ERROR("Illegal instruction in command stream\n");
schedule_work(&adev->reset_work); gfx_v8_0_fault(adev, entry);
return 0; return 0;
} }
......
...@@ -4695,12 +4695,39 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, ...@@ -4695,12 +4695,39 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
return 0; return 0;
} }
static void gfx_v9_0_fault(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
u8 me_id, pipe_id, queue_id;
struct amdgpu_ring *ring;
int i;
me_id = (entry->ring_id & 0x0c) >> 2;
pipe_id = (entry->ring_id & 0x03) >> 0;
queue_id = (entry->ring_id & 0x70) >> 4;
switch (me_id) {
case 0:
drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
break;
case 1:
case 2:
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
if (ring->me == me_id && ring->pipe == pipe_id &&
ring->queue == queue_id)
drm_sched_fault(&ring->sched);
}
break;
}
}
static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source, struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry) struct amdgpu_iv_entry *entry)
{ {
DRM_ERROR("Illegal register access in command stream\n"); DRM_ERROR("Illegal register access in command stream\n");
schedule_work(&adev->reset_work); gfx_v9_0_fault(adev, entry);
return 0; return 0;
} }
...@@ -4709,7 +4736,7 @@ static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, ...@@ -4709,7 +4736,7 @@ static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry) struct amdgpu_iv_entry *entry)
{ {
DRM_ERROR("Illegal instruction in command stream\n"); DRM_ERROR("Illegal instruction in command stream\n");
schedule_work(&adev->reset_work); gfx_v9_0_fault(adev, entry);
return 0; return 0;
} }
......
...@@ -1105,8 +1105,14 @@ static int sdma_v2_4_process_illegal_inst_irq(struct amdgpu_device *adev, ...@@ -1105,8 +1105,14 @@ static int sdma_v2_4_process_illegal_inst_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source, struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry) struct amdgpu_iv_entry *entry)
{ {
u8 instance_id, queue_id;
DRM_ERROR("Illegal instruction in SDMA command stream\n"); DRM_ERROR("Illegal instruction in SDMA command stream\n");
schedule_work(&adev->reset_work); instance_id = (entry->ring_id & 0x3) >> 0;
queue_id = (entry->ring_id & 0xc) >> 2;
if (instance_id <= 1 && queue_id == 0)
drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched);
return 0; return 0;
} }
......
...@@ -1440,8 +1440,14 @@ static int sdma_v3_0_process_illegal_inst_irq(struct amdgpu_device *adev, ...@@ -1440,8 +1440,14 @@ static int sdma_v3_0_process_illegal_inst_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source, struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry) struct amdgpu_iv_entry *entry)
{ {
u8 instance_id, queue_id;
DRM_ERROR("Illegal instruction in SDMA command stream\n"); DRM_ERROR("Illegal instruction in SDMA command stream\n");
schedule_work(&adev->reset_work); instance_id = (entry->ring_id & 0x3) >> 0;
queue_id = (entry->ring_id & 0xc) >> 2;
if (instance_id <= 1 && queue_id == 0)
drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched);
return 0; return 0;
} }
......
...@@ -1717,12 +1717,29 @@ static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev, ...@@ -1717,12 +1717,29 @@ static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source, struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry) struct amdgpu_iv_entry *entry)
{ {
int instance;
DRM_ERROR("Illegal instruction in SDMA command stream\n"); DRM_ERROR("Illegal instruction in SDMA command stream\n");
schedule_work(&adev->reset_work);
switch (entry->client_id) {
case SOC15_IH_CLIENTID_SDMA0:
instance = 0;
break;
case SOC15_IH_CLIENTID_SDMA1:
instance = 1;
break;
default:
return 0;
}
switch (entry->ring_id) {
case 0:
drm_sched_fault(&adev->sdma.instance[instance].ring.sched);
break;
}
return 0; return 0;
} }
static void sdma_v4_0_update_medium_grain_clock_gating( static void sdma_v4_0_update_medium_grain_clock_gating(
struct amdgpu_device *adev, struct amdgpu_device *adev,
bool enable) bool enable)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册