diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 309461d0c275a6968e730d39a115fd49e197b4fe..da120fe330be1c37b7be5f8dc6565b16f77fdd3b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2745,7 +2745,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, } /* must succeed. */ - amdgpu_ras_post_init(adev); + amdgpu_ras_resume(adev); r = device_create_file(adev->dev, &dev_attr_pcie_replay_count); if (r) { @@ -3503,7 +3503,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, goto out; /* must succeed. */ - amdgpu_ras_post_init(tmp_adev); + amdgpu_ras_resume(tmp_adev); /* Update PSP FW topology after reset */ if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 49c71cfc7fc6ccb82d530e837b931b89cc11f582..da1dc40b9b14b13b05885812959ec11c9c9f3fa0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1594,12 +1594,9 @@ int amdgpu_ras_init(struct amdgpu_device *adev) } /* do some init work after IP late init as dependence. - * TODO - * gpu reset will re-enable ras, need fint out one way to run it again. - * for now, if a gpu reset happened, unless IP enable its ras, the ras state - * will be showed as disabled. + * and it runs in resume/gpu reset/booting up cases. */ -void amdgpu_ras_post_init(struct amdgpu_device *adev) +void amdgpu_ras_resume(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj, *tmp; @@ -1642,6 +1639,19 @@ void amdgpu_ras_post_init(struct amdgpu_device *adev) } } +void amdgpu_ras_suspend(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + if (!con) + return; + + amdgpu_ras_disable_all_features(adev, 0); + /* Make sure all ras objects are disabled. */ + if (con->features) + amdgpu_ras_disable_all_features(adev, 1); +} + /* do some fini work before IP fini as dependence */ int amdgpu_ras_pre_fini(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 59994ee0085533c220958fdf0c9a9bb02f1985be..c6b34fbd695f8186a48594e9f23f6d22f0d2a9e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -179,6 +179,9 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev, int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev, unsigned int block); +void amdgpu_ras_resume(struct amdgpu_device *adev); +void amdgpu_ras_suspend(struct amdgpu_device *adev); + int amdgpu_ras_query_error_count(struct amdgpu_device *adev, bool is_ce); @@ -256,7 +259,6 @@ amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) { /* called in ip_init and ip_fini */ int amdgpu_ras_init(struct amdgpu_device *adev); -void amdgpu_ras_post_init(struct amdgpu_device *adev); int amdgpu_ras_fini(struct amdgpu_device *adev); int amdgpu_ras_pre_fini(struct amdgpu_device *adev);