diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 49484ea5366d5a207db5a1fa330702295f932940..0f9e7309fc279dbc080cf0dbc766dd79f1c67ca7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -180,7 +180,7 @@ struct amdgpu_mgpu_info mgpu_info = { }; int amdgpu_ras_enable = -1; uint amdgpu_ras_mask = 0xffffffff; -int amdgpu_bad_page_threshold = 100; +int amdgpu_bad_page_threshold = -1; struct amdgpu_watchdog_timer amdgpu_watchdog_timer = { .timeout_fatal_disable = false, .period = 0x23, /* default to max. timeout = 1 << 0x23 cycles */ @@ -854,7 +854,7 @@ module_param_named(reset_method, amdgpu_reset_method, int, 0444); * faulty pages by ECC exceed threshold value and leave it for user's further * check. */ -MODULE_PARM_DESC(bad_page_threshold, "Bad page threshold(-1 = auto, 0 = disable bad page retirement, 100 = default value"); +MODULE_PARM_DESC(bad_page_threshold, "Bad page threshold(-1 = auto(default value), 0 = disable bad page retirement)"); module_param_named(bad_page_threshold, amdgpu_bad_page_threshold, int, 0444); MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index a90bf33358d30fcd5a41f32413ddd952cac559a7..0e16683876aa482128ebd6f9333872debb06789a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1790,14 +1790,13 @@ static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev, return ret; } -static uint32_t -amdgpu_ras_calculate_badpags_threshold(struct amdgpu_device *adev) +static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev, + uint32_t max_length) { + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); int tmp_threshold = amdgpu_bad_page_threshold; u64 val; - uint32_t max_length = 0; - max_length = amdgpu_ras_eeprom_get_record_max_length(); /* * Justification of value bad_page_cnt_threshold in ras structure * @@ -1823,18 +1822,20 @@ amdgpu_ras_calculate_badpags_threshold(struct amdgpu_device *adev) tmp_threshold = max_length; if (tmp_threshold == -1) { - val = adev->gmc.real_vram_size; + val = adev->gmc.mc_vram_size; do_div(val, RAS_BAD_PAGE_RATE); - tmp_threshold = min(lower_32_bits(val), max_length); + con->bad_page_cnt_threshold = min(lower_32_bits(val), + max_length); + } else { + con->bad_page_cnt_threshold = tmp_threshold; } - - return tmp_threshold; } int amdgpu_ras_recovery_init(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data **data; + uint32_t max_eeprom_records_len = 0; bool exc_err_limit = false; int ret; @@ -1854,16 +1855,8 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) atomic_set(&con->in_recovery, 0); con->adev = adev; - if (!con->bad_page_cnt_threshold) { - con->bad_page_cnt_threshold = - amdgpu_ras_calculate_badpags_threshold(adev); - - ret = amdgpu_vram_mgr_reserve_backup_pages( - ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM), - con->bad_page_cnt_threshold); - if (ret) - goto out; - } + max_eeprom_records_len = amdgpu_ras_eeprom_get_record_max_length(); + amdgpu_ras_validate_threshold(adev, max_eeprom_records_len); /* Todo: During test the SMU might fail to read the eeprom through I2C * when the GPU is pending on XGMI reset during probe time diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 9117d8749c7a6a7647a66da12bc0625cdb47c04a..e62992797557a00169ac20cec85d213ce58f78a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -48,8 +48,6 @@ struct amdgpu_vram_mgr { spinlock_t lock; struct list_head reservations_pending; struct list_head reserved_pages; - struct list_head backup_pages; - uint32_t num_backup_pages; atomic64_t usage; atomic64_t vis_usage; }; @@ -124,8 +122,6 @@ uint64_t amdgpu_vram_mgr_usage(struct ttm_resource_manager *man); uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_resource_manager *man); int amdgpu_vram_mgr_reserve_range(struct ttm_resource_manager *man, uint64_t start, uint64_t size); -int amdgpu_vram_mgr_reserve_backup_pages(struct ttm_resource_manager *man, - uint32_t num_pages); int amdgpu_vram_mgr_query_page_status(struct ttm_resource_manager *man, uint64_t start); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 96fa1833cd9268cc13129b89f92d0aedf68c551a..b2fc475ce6f705e5d044fed6f2e5916ef77992cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -28,9 +28,6 @@ #include "amdgpu_atomfirmware.h" #include "atom.h" -static int amdgpu_vram_mgr_free_backup_pages(struct amdgpu_vram_mgr *mgr, - uint32_t num_pages); - static inline struct amdgpu_vram_mgr *to_vram_mgr(struct ttm_resource_manager *man) { return container_of(man, struct amdgpu_vram_mgr, manager); @@ -189,7 +186,6 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev) spin_lock_init(&mgr->lock); INIT_LIST_HEAD(&mgr->reservations_pending); INIT_LIST_HEAD(&mgr->reserved_pages); - INIT_LIST_HEAD(&mgr->backup_pages); /* Add the two VRAM-related sysfs files */ ret = sysfs_create_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes); @@ -230,11 +226,6 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev) drm_mm_remove_node(&rsv->mm_node); kfree(rsv); } - - list_for_each_entry_safe(rsv, temp, &mgr->backup_pages, node) { - drm_mm_remove_node(&rsv->mm_node); - kfree(rsv); - } drm_mm_takedown(&mgr->mm); spin_unlock(&mgr->lock); @@ -306,14 +297,12 @@ static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man) continue; dev_dbg(adev->dev, "Reservation 0x%llx - %lld, Succeeded\n", - rsv->mm_node.start << PAGE_SHIFT, rsv->mm_node.size); + rsv->mm_node.start, rsv->mm_node.size); vis_usage = amdgpu_vram_mgr_vis_size(adev, &rsv->mm_node); atomic64_add(vis_usage, &mgr->vis_usage); atomic64_add(rsv->mm_node.size << PAGE_SHIFT, &mgr->usage); list_move(&rsv->node, &mgr->reserved_pages); - - amdgpu_vram_mgr_free_backup_pages(mgr, rsv->mm_node.size); } } @@ -330,7 +319,6 @@ int amdgpu_vram_mgr_reserve_range(struct ttm_resource_manager *man, uint64_t start, uint64_t size) { struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); - struct amdgpu_device *adev = to_amdgpu_device(mgr); struct amdgpu_vram_reservation *rsv; rsv = kzalloc(sizeof(*rsv), GFP_KERNEL); @@ -341,94 +329,14 @@ int amdgpu_vram_mgr_reserve_range(struct ttm_resource_manager *man, rsv->mm_node.start = start >> PAGE_SHIFT; rsv->mm_node.size = size >> PAGE_SHIFT; - dev_dbg(adev->dev, "Pending Reservation: 0x%llx\n", start); - spin_lock(&mgr->lock); - list_add_tail(&rsv->node, &mgr->reservations_pending); + list_add_tail(&mgr->reservations_pending, &rsv->node); amdgpu_vram_mgr_do_reserve(man); spin_unlock(&mgr->lock); return 0; } -static int amdgpu_vram_mgr_free_backup_pages(struct amdgpu_vram_mgr *mgr, - uint32_t num_pages) -{ - struct amdgpu_device *adev = to_amdgpu_device(mgr); - struct amdgpu_vram_reservation *rsv; - uint32_t i; - uint64_t vis_usage = 0, total_usage = 0; - - if (num_pages > mgr->num_backup_pages) { - dev_warn(adev->dev, "No enough backup pages\n"); - return -EINVAL; - } - - for (i = 0; i < num_pages; i++) { - rsv = list_first_entry(&mgr->backup_pages, - struct amdgpu_vram_reservation, node); - vis_usage += amdgpu_vram_mgr_vis_size(adev, &rsv->mm_node); - total_usage += (rsv->mm_node.size << PAGE_SHIFT); - drm_mm_remove_node(&rsv->mm_node); - list_del(&rsv->node); - kfree(rsv); - mgr->num_backup_pages--; - } - - atomic64_sub(total_usage, &mgr->usage); - atomic64_sub(vis_usage, &mgr->vis_usage); - - return 0; -} - -int amdgpu_vram_mgr_reserve_backup_pages(struct ttm_resource_manager *man, - uint32_t num_pages) -{ - struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); - struct amdgpu_device *adev = to_amdgpu_device(mgr); - struct amdgpu_vram_reservation *rsv; - struct drm_mm *mm = &mgr->mm; - uint32_t i; - int ret = 0; - uint64_t vis_usage, total_usage; - - for (i = 0; i < num_pages; i++) { - rsv = kzalloc(sizeof(*rsv), GFP_KERNEL); - if (!rsv) { - ret = -ENOMEM; - goto pro_end; - } - - INIT_LIST_HEAD(&rsv->node); - - ret = drm_mm_insert_node(mm, &rsv->mm_node, 1); - if (ret) { - dev_err(adev->dev, "failed to reserve backup page %d, ret 0x%x\n", i, ret); - kfree(rsv); - goto pro_end; - } - - vis_usage = amdgpu_vram_mgr_vis_size(adev, &rsv->mm_node); - total_usage = (rsv->mm_node.size << PAGE_SHIFT); - - spin_lock(&mgr->lock); - atomic64_add(vis_usage, &mgr->vis_usage); - atomic64_add(total_usage, &mgr->usage); - list_add_tail(&rsv->node, &mgr->backup_pages); - mgr->num_backup_pages++; - spin_unlock(&mgr->lock); - } - -pro_end: - if (ret) { - spin_lock(&mgr->lock); - amdgpu_vram_mgr_free_backup_pages(mgr, mgr->num_backup_pages); - spin_unlock(&mgr->lock); - } - - return ret; -} - /** * amdgpu_vram_mgr_query_page_status - query the reservation status *