if (amdgpu_bad_page_threshold != 0) {
amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
err_data.err_addr_cnt);
- amdgpu_ras_save_bad_pages(adev);
+ amdgpu_ras_save_bad_pages(adev, NULL);
}
dev_warn(adev->dev, "WARNING: THIS IS ONLY FOR TEST PURPOSES AND WILL CORRUPT RAS EEPROM\n");
/*
* write error record array to eeprom, the function should be
* protected by recovery_lock
+ * new_cnt: new added UE count, excluding reserved bad pages, can be NULL
*/
-int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev)
+int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
+ unsigned long *new_cnt)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data *data;
struct amdgpu_ras_eeprom_control *control;
int save_count;
- if (!con || !con->eh_data)
+ if (!con || !con->eh_data) {
+ if (new_cnt)
+ *new_cnt = 0;
+
return 0;
+ }
mutex_lock(&con->recovery_lock);
control = &con->eeprom_control;
data = con->eh_data;
save_count = data->count - control->ras_num_recs;
mutex_unlock(&con->recovery_lock);
+
+ if (new_cnt)
+ *new_cnt = save_count / adev->umc.retire_unit;
+
/* only new entries are saved */
if (save_count > 0) {
if (amdgpu_ras_eeprom_append(control,
int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
struct eeprom_table_record *bps, int pages);
-int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev);
+int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
+ unsigned long *new_cnt);
static inline enum ta_ras_block
amdgpu_ras_block_to_ta(enum amdgpu_ras_block block) {
if (amdgpu_bad_page_threshold != 0) {
amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
err_data.err_addr_cnt);
- amdgpu_ras_save_bad_pages(adev);
+ amdgpu_ras_save_bad_pages(adev, NULL);
}
out:
err_data->err_addr_cnt) {
amdgpu_ras_add_bad_pages(adev, err_data->err_addr,
err_data->err_addr_cnt);
- amdgpu_ras_save_bad_pages(adev);
+ amdgpu_ras_save_bad_pages(adev, &(err_data->ue_count));
amdgpu_dpm_send_hbm_bad_pages_num(adev, con->eeprom_control.ras_num_recs);