drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c

   1 /*
   2  * Copyright 2019 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  */
  23
  24 #include "amdgpu_ras_eeprom.h"
  25 #include "amdgpu.h"
  26 #include "amdgpu_ras.h"
  27 #include <linux/bits.h>
  28 #include "atom.h"
  29
  30 #define EEPROM_I2C_TARGET_ADDR_VEGA20           0xA0
  31 #define EEPROM_I2C_TARGET_ADDR_ARCTURUS         0xA8
  32 #define EEPROM_I2C_TARGET_ADDR_ARCTURUS_D342    0xA0
  33 #define EEPROM_I2C_TARGET_ADDR_SIENNA_CICHLID   0xA0
  34 #define EEPROM_I2C_TARGET_ADDR_ALDEBARAN        0xA0
  35
  36 /*
  37  * The 2 macros bellow represent the actual size in bytes that
  38  * those entities occupy in the EEPROM memory.
  39  * EEPROM_TABLE_RECORD_SIZE is different than sizeof(eeprom_table_record) which
  40  * uses uint64 to store 6b fields such as retired_page.
  41  */
  42 #define EEPROM_TABLE_HEADER_SIZE 20
  43 #define EEPROM_TABLE_RECORD_SIZE 24
  44
  45 #define EEPROM_ADDRESS_SIZE 0x2
  46
  47 /* Table hdr is 'AMDR' */
  48 #define EEPROM_TABLE_HDR_VAL 0x414d4452
  49 #define EEPROM_TABLE_VER 0x00010000
  50
  51 /* Bad GPU tag ‘BADG’ */
  52 #define EEPROM_TABLE_HDR_BAD 0x42414447
  53
  54 /* Assume 2 Mbit size */
  55 #define EEPROM_SIZE_BYTES 256000
  56 #define EEPROM_PAGE__SIZE_BYTES 256
  57 #define EEPROM_HDR_START 0
  58 #define EEPROM_RECORD_START (EEPROM_HDR_START + EEPROM_TABLE_HEADER_SIZE)
  59 #define EEPROM_MAX_RECORD_NUM ((EEPROM_SIZE_BYTES - EEPROM_TABLE_HEADER_SIZE) / EEPROM_TABLE_RECORD_SIZE)
  60 #define EEPROM_ADDR_MSB_MASK GENMASK(17, 8)
  61
  62 #define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, eeprom_control))->adev
  63
  64 static bool __is_ras_eeprom_supported(struct amdgpu_device *adev)
  65 {
  66         if ((adev->asic_type == CHIP_VEGA20) ||
  67             (adev->asic_type == CHIP_ARCTURUS) ||
  68             (adev->asic_type == CHIP_SIENNA_CICHLID) ||
  69             (adev->asic_type == CHIP_ALDEBARAN))
  70                 return true;
  71
  72         return false;
  73 }
  74
  75 static bool __get_eeprom_i2c_addr_arct(struct amdgpu_device *adev,
  76                                        uint16_t *i2c_addr)
  77 {
  78         struct atom_context *atom_ctx = adev->mode_info.atom_context;
  79
  80         if (!i2c_addr || !atom_ctx)
  81                 return false;
  82
  83         if (strnstr(atom_ctx->vbios_version,
  84                     "D342",
  85                     sizeof(atom_ctx->vbios_version)))
  86                 *i2c_addr = EEPROM_I2C_TARGET_ADDR_ARCTURUS_D342;
  87         else
  88                 *i2c_addr = EEPROM_I2C_TARGET_ADDR_ARCTURUS;
  89
  90         return true;
  91 }
  92
  93 static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev,
  94                                   uint16_t *i2c_addr)
  95 {
  96         if (!i2c_addr)
  97                 return false;
  98
  99         switch (adev->asic_type) {
 100         case CHIP_VEGA20:
 101                 *i2c_addr = EEPROM_I2C_TARGET_ADDR_VEGA20;
 102                 break;
 103
 104         case CHIP_ARCTURUS:
 105                 return __get_eeprom_i2c_addr_arct(adev, i2c_addr);
 106
 107         case CHIP_SIENNA_CICHLID:
 108                 *i2c_addr = EEPROM_I2C_TARGET_ADDR_SIENNA_CICHLID;
 109                 break;
 110
 111         case CHIP_ALDEBARAN:
 112                 *i2c_addr = EEPROM_I2C_TARGET_ADDR_ALDEBARAN;
 113                 break;
 114
 115         default:
 116                 return false;
 117         }
 118
 119         return true;
 120 }
 121
 122 static void __encode_table_header_to_buff(struct amdgpu_ras_eeprom_table_header *hdr,
 123                                           unsigned char *buff)
 124 {
 125         uint32_t *pp = (uint32_t *) buff;
 126
 127         pp[0] = cpu_to_le32(hdr->header);
 128         pp[1] = cpu_to_le32(hdr->version);
 129         pp[2] = cpu_to_le32(hdr->first_rec_offset);
 130         pp[3] = cpu_to_le32(hdr->tbl_size);
 131         pp[4] = cpu_to_le32(hdr->checksum);
 132 }
 133
 134 static void __decode_table_header_from_buff(struct amdgpu_ras_eeprom_table_header *hdr,
 135                                           unsigned char *buff)
 136 {
 137         uint32_t *pp = (uint32_t *)buff;
 138
 139         hdr->header           = le32_to_cpu(pp[0]);
 140         hdr->version          = le32_to_cpu(pp[1]);
 141         hdr->first_rec_offset = le32_to_cpu(pp[2]);
 142         hdr->tbl_size         = le32_to_cpu(pp[3]);
 143         hdr->checksum         = le32_to_cpu(pp[4]);
 144 }
 145
 146 static int __update_table_header(struct amdgpu_ras_eeprom_control *control,
 147                                  unsigned char *buff)
 148 {
 149         int ret = 0;
 150         struct amdgpu_device *adev = to_amdgpu_device(control);
 151         struct i2c_msg msg = {
 152                         .addr   = 0,
 153                         .flags  = 0,
 154                         .len    = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE,
 155                         .buf    = buff,
 156         };
 157
 158
 159         *(uint16_t *)buff = EEPROM_HDR_START;
 160         __encode_table_header_to_buff(&control->tbl_hdr, buff + EEPROM_ADDRESS_SIZE);
 161
 162         msg.addr = control->i2c_address;
 163
 164         /* i2c may be unstable in gpu reset */
 165         down_read(&adev->reset_sem);
 166         ret = i2c_transfer(&adev->pm.smu_i2c, &msg, 1);
 167         up_read(&adev->reset_sem);
 168
 169         if (ret < 1)
 170                 DRM_ERROR("Failed to write EEPROM table header, ret:%d", ret);
 171
 172         return ret;
 173 }
 174
 175 static uint32_t  __calc_hdr_byte_sum(struct amdgpu_ras_eeprom_control *control)
 176 {
 177         int i;
 178         uint32_t tbl_sum = 0;
 179
 180         /* Header checksum, skip checksum field in the calculation */
 181         for (i = 0; i < sizeof(control->tbl_hdr) - sizeof(control->tbl_hdr.checksum); i++)
 182                 tbl_sum += *(((unsigned char *)&control->tbl_hdr) + i);
 183
 184         return tbl_sum;
 185 }
 186
 187 static uint32_t  __calc_recs_byte_sum(struct eeprom_table_record *records,
 188                                       int num)
 189 {
 190         int i, j;
 191         uint32_t tbl_sum = 0;
 192
 193         /* Records checksum */
 194         for (i = 0; i < num; i++) {
 195                 struct eeprom_table_record *record = &records[i];
 196
 197                 for (j = 0; j < sizeof(*record); j++) {
 198                         tbl_sum += *(((unsigned char *)record) + j);
 199                 }
 200         }
 201
 202         return tbl_sum;
 203 }
 204
 205 static inline uint32_t  __calc_tbl_byte_sum(struct amdgpu_ras_eeprom_control *control,
 206                                   struct eeprom_table_record *records, int num)
 207 {
 208         return __calc_hdr_byte_sum(control) + __calc_recs_byte_sum(records, num);
 209 }
 210
 211 /* Checksum = 256 -((sum of all table entries) mod 256) */
 212 static void __update_tbl_checksum(struct amdgpu_ras_eeprom_control *control,
 213                                   struct eeprom_table_record *records, int num,
 214                                   uint32_t old_hdr_byte_sum)
 215 {
 216         /*
 217          * This will update the table sum with new records.
 218          *
 219          * TODO: What happens when the EEPROM table is to be wrapped around
 220          * and old records from start will get overridden.
 221          */
 222
 223         /* need to recalculate updated header byte sum */
 224         control->tbl_byte_sum -= old_hdr_byte_sum;
 225         control->tbl_byte_sum += __calc_tbl_byte_sum(control, records, num);
 226
 227         control->tbl_hdr.checksum = 256 - (control->tbl_byte_sum % 256);
 228 }
 229
 230 /* table sum mod 256 + checksum must equals 256 */
 231 static bool __validate_tbl_checksum(struct amdgpu_ras_eeprom_control *control,
 232                             struct eeprom_table_record *records, int num)
 233 {
 234         control->tbl_byte_sum = __calc_tbl_byte_sum(control, records, num);
 235
 236         if (control->tbl_hdr.checksum + (control->tbl_byte_sum % 256) != 256) {
 237                 DRM_WARN("Checksum mismatch, checksum: %u ", control->tbl_hdr.checksum);
 238                 return false;
 239         }
 240
 241         return true;
 242 }
 243
 244 static int amdgpu_ras_eeprom_correct_header_tag(
 245                                 struct amdgpu_ras_eeprom_control *control,
 246                                 uint32_t header)
 247 {
 248         unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE];
 249         struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
 250         int ret = 0;
 251
 252         memset(buff, 0, EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE);
 253
 254         mutex_lock(&control->tbl_mutex);
 255         hdr->header = header;
 256         ret = __update_table_header(control, buff);
 257         mutex_unlock(&control->tbl_mutex);
 258
 259         return ret;
 260 }
 261
 262 int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control)
 263 {
 264         unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 };
 265         struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
 266         int ret = 0;
 267
 268         mutex_lock(&control->tbl_mutex);
 269
 270         hdr->header = EEPROM_TABLE_HDR_VAL;
 271         hdr->version = EEPROM_TABLE_VER;
 272         hdr->first_rec_offset = EEPROM_RECORD_START;
 273         hdr->tbl_size = EEPROM_TABLE_HEADER_SIZE;
 274
 275         control->tbl_byte_sum = 0;
 276         __update_tbl_checksum(control, NULL, 0, 0);
 277         control->next_addr = EEPROM_RECORD_START;
 278
 279         ret = __update_table_header(control, buff);
 280
 281         mutex_unlock(&control->tbl_mutex);
 282
 283         return ret;
 284
 285 }
 286
 287 int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
 288                         bool *exceed_err_limit)
 289 {
 290         int ret = 0;
 291         struct amdgpu_device *adev = to_amdgpu_device(control);
 292         unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 };
 293         struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
 294         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
 295         struct i2c_msg msg = {
 296                         .addr   = 0,
 297                         .flags  = I2C_M_RD,
 298                         .len    = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE,
 299                         .buf    = buff,
 300         };
 301
 302         *exceed_err_limit = false;
 303
 304         if (!__is_ras_eeprom_supported(adev))
 305                 return 0;
 306
 307         /* Verify i2c adapter is initialized */
 308         if (!adev->pm.smu_i2c.algo)
 309                 return -ENOENT;
 310
 311         if (!__get_eeprom_i2c_addr(adev, &control->i2c_address))
 312                 return -EINVAL;
 313
 314         mutex_init(&control->tbl_mutex);
 315
 316         msg.addr = control->i2c_address;
 317         /* Read/Create table header from EEPROM address 0 */
 318         ret = i2c_transfer(&adev->pm.smu_i2c, &msg, 1);
 319         if (ret < 1) {
 320                 DRM_ERROR("Failed to read EEPROM table header, ret:%d", ret);
 321                 return ret;
 322         }
 323
 324         __decode_table_header_from_buff(hdr, &buff[2]);
 325
 326         if (hdr->header == EEPROM_TABLE_HDR_VAL) {
 327                 control->num_recs = (hdr->tbl_size - EEPROM_TABLE_HEADER_SIZE) /
 328                                     EEPROM_TABLE_RECORD_SIZE;
 329                 control->tbl_byte_sum = __calc_hdr_byte_sum(control);
 330                 control->next_addr = EEPROM_RECORD_START;
 331
 332                 DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records",
 333                                  control->num_recs);
 334
 335         } else if ((hdr->header == EEPROM_TABLE_HDR_BAD) &&
 336                         (amdgpu_bad_page_threshold != 0)) {
 337                 if (ras->bad_page_cnt_threshold > control->num_recs) {
 338                         dev_info(adev->dev, "Using one valid bigger bad page "
 339                                 "threshold and correcting eeprom header tag.\n");
 340                         ret = amdgpu_ras_eeprom_correct_header_tag(control,
 341                                                         EEPROM_TABLE_HDR_VAL);
 342                 } else {
 343                         *exceed_err_limit = true;
 344                         dev_err(adev->dev, "Exceeding the bad_page_threshold parameter, "
 345                                 "disabling the GPU.\n");
 346                 }
 347         } else {
 348                 DRM_INFO("Creating new EEPROM table");
 349
 350                 ret = amdgpu_ras_eeprom_reset_table(control);
 351         }
 352
 353         return ret == 1 ? 0 : -EIO;
 354 }
 355
 356 static void __encode_table_record_to_buff(struct amdgpu_ras_eeprom_control *control,
 357                                           struct eeprom_table_record *record,
 358                                           unsigned char *buff)
 359 {
 360         __le64 tmp = 0;
 361         int i = 0;
 362
 363         /* Next are all record fields according to EEPROM page spec in LE foramt */
 364         buff[i++] = record->err_type;
 365
 366         buff[i++] = record->bank;
 367
 368         tmp = cpu_to_le64(record->ts);
 369         memcpy(buff + i, &tmp, 8);
 370         i += 8;
 371
 372         tmp = cpu_to_le64((record->offset & 0xffffffffffff));
 373         memcpy(buff + i, &tmp, 6);
 374         i += 6;
 375
 376         buff[i++] = record->mem_channel;
 377         buff[i++] = record->mcumc_id;
 378
 379         tmp = cpu_to_le64((record->retired_page & 0xffffffffffff));
 380         memcpy(buff + i, &tmp, 6);
 381 }
 382
 383 static void __decode_table_record_from_buff(struct amdgpu_ras_eeprom_control *control,
 384                                             struct eeprom_table_record *record,
 385                                             unsigned char *buff)
 386 {
 387         __le64 tmp = 0;
 388         int i =  0;
 389
 390         /* Next are all record fields according to EEPROM page spec in LE foramt */
 391         record->err_type = buff[i++];
 392
 393         record->bank = buff[i++];
 394
 395         memcpy(&tmp, buff + i, 8);
 396         record->ts = le64_to_cpu(tmp);
 397         i += 8;
 398
 399         memcpy(&tmp, buff + i, 6);
 400         record->offset = (le64_to_cpu(tmp) & 0xffffffffffff);
 401         i += 6;
 402
 403         record->mem_channel = buff[i++];
 404         record->mcumc_id = buff[i++];
 405
 406         memcpy(&tmp, buff + i,  6);
 407         record->retired_page = (le64_to_cpu(tmp) & 0xffffffffffff);
 408 }
 409
 410 /*
 411  * When reaching end of EEPROM memory jump back to 0 record address
 412  * When next record access will go beyond EEPROM page boundary modify bits A17/A8
 413  * in I2C selector to go to next page
 414  */
 415 static uint32_t __correct_eeprom_dest_address(uint32_t curr_address)
 416 {
 417         uint32_t next_address = curr_address + EEPROM_TABLE_RECORD_SIZE;
 418
 419         /* When all EEPROM memory used jump back to 0 address */
 420         if (next_address > EEPROM_SIZE_BYTES) {
 421                 DRM_INFO("Reached end of EEPROM memory, jumping to 0 "
 422                          "and overriding old record");
 423                 return EEPROM_RECORD_START;
 424         }
 425
 426         /*
 427          * To check if we overflow page boundary  compare next address with
 428          * current and see if bits 17/8 of the EEPROM address will change
 429          * If they do start from the next 256b page
 430          *
 431          * https://www.st.com/resource/en/datasheet/m24m02-dr.pdf sec. 5.1.2
 432          */
 433         if ((curr_address & EEPROM_ADDR_MSB_MASK) != (next_address & EEPROM_ADDR_MSB_MASK)) {
 434                 DRM_DEBUG_DRIVER("Reached end of EEPROM memory page, jumping to next: %lx",
 435                                 (next_address & EEPROM_ADDR_MSB_MASK));
 436
 437                 return  (next_address & EEPROM_ADDR_MSB_MASK);
 438         }
 439
 440         return curr_address;
 441 }
 442
 443 bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev)
 444 {
 445         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 446
 447         if (!__is_ras_eeprom_supported(adev))
 448                 return false;
 449
 450         /* skip check eeprom table for VEGA20 Gaming */
 451         if (!con)
 452                 return false;
 453         else
 454                 if (!(con->features & BIT(AMDGPU_RAS_BLOCK__UMC)))
 455                         return false;
 456
 457         if (con->eeprom_control.tbl_hdr.header == EEPROM_TABLE_HDR_BAD) {
 458                 dev_warn(adev->dev, "This GPU is in BAD status.");
 459                 dev_warn(adev->dev, "Please retire it or setting one bigger "
 460                                 "threshold value when reloading driver.\n");
 461                 return true;
 462         }
 463
 464         return false;
 465 }
 466
 467 int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,
 468                                             struct eeprom_table_record *records,
 469                                             bool write,
 470                                             int num)
 471 {
 472         int i, ret = 0;
 473         struct i2c_msg *msgs, *msg;
 474         unsigned char *buffs, *buff;
 475         struct eeprom_table_record *record;
 476         struct amdgpu_device *adev = to_amdgpu_device(control);
 477         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
 478
 479         if (!__is_ras_eeprom_supported(adev))
 480                 return 0;
 481
 482         buffs = kcalloc(num, EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE,
 483                          GFP_KERNEL);
 484         if (!buffs)
 485                 return -ENOMEM;
 486
 487         mutex_lock(&control->tbl_mutex);
 488
 489         msgs = kcalloc(num, sizeof(*msgs), GFP_KERNEL);
 490         if (!msgs) {
 491                 ret = -ENOMEM;
 492                 goto free_buff;
 493         }
 494
 495         /*
 496          * If saved bad pages number exceeds the bad page threshold for
 497          * the whole VRAM, update table header to mark the BAD GPU tag
 498          * and schedule one ras recovery after eeprom write is done,
 499          * this can avoid the missing for latest records.
 500          *
 501          * This new header will be picked up and checked in the bootup
 502          * by ras recovery, which may break bootup process to notify
 503          * user this GPU is in bad state and to retire such GPU for
 504          * further check.
 505          */
 506         if (write && (amdgpu_bad_page_threshold != 0) &&
 507                 ((control->num_recs + num) >= ras->bad_page_cnt_threshold)) {
 508                 dev_warn(adev->dev,
 509                         "Saved bad pages(%d) reaches threshold value(%d).\n",
 510                         control->num_recs + num, ras->bad_page_cnt_threshold);
 511                 control->tbl_hdr.header = EEPROM_TABLE_HDR_BAD;
 512         }
 513
 514         /* In case of overflow just start from beginning to not lose newest records */
 515         if (write && (control->next_addr + EEPROM_TABLE_RECORD_SIZE * num > EEPROM_SIZE_BYTES))
 516                 control->next_addr = EEPROM_RECORD_START;
 517
 518         /*
 519          * TODO Currently makes EEPROM writes for each record, this creates
 520          * internal fragmentation. Optimized the code to do full page write of
 521          * 256b
 522          */
 523         for (i = 0; i < num; i++) {
 524                 buff = &buffs[i * (EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)];
 525                 record = &records[i];
 526                 msg = &msgs[i];
 527
 528                 control->next_addr = __correct_eeprom_dest_address(control->next_addr);
 529
 530                 /*
 531                  * Update bits 16,17 of EEPROM address in I2C address by setting them
 532                  * to bits 1,2 of Device address byte
 533                  */
 534                 msg->addr = control->i2c_address |
 535                                 ((control->next_addr & EEPROM_ADDR_MSB_MASK) >> 15);
 536                 msg->flags      = write ? 0 : I2C_M_RD;
 537                 msg->len        = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE;
 538                 msg->buf        = buff;
 539
 540                 /* Insert the EEPROM dest addess, bits 0-15 */
 541                 buff[0] = ((control->next_addr >> 8) & 0xff);
 542                 buff[1] = (control->next_addr & 0xff);
 543
 544                 /* EEPROM table content is stored in LE format */
 545                 if (write)
 546                         __encode_table_record_to_buff(control, record, buff + EEPROM_ADDRESS_SIZE);
 547
 548                 /*
 549                  * The destination EEPROM address might need to be corrected to account
 550                  * for page or entire memory wrapping
 551                  */
 552                 control->next_addr += EEPROM_TABLE_RECORD_SIZE;
 553         }
 554
 555         /* i2c may be unstable in gpu reset */
 556         down_read(&adev->reset_sem);
 557         ret = i2c_transfer(&adev->pm.smu_i2c, msgs, num);
 558         up_read(&adev->reset_sem);
 559
 560         if (ret < 1) {
 561                 DRM_ERROR("Failed to process EEPROM table records, ret:%d", ret);
 562
 563                 /* TODO Restore prev next EEPROM address ? */
 564                 goto free_msgs;
 565         }
 566
 567
 568         if (!write) {
 569                 for (i = 0; i < num; i++) {
 570                         buff = &buffs[i*(EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)];
 571                         record = &records[i];
 572
 573                         __decode_table_record_from_buff(control, record, buff + EEPROM_ADDRESS_SIZE);
 574                 }
 575         }
 576
 577         if (write) {
 578                 uint32_t old_hdr_byte_sum = __calc_hdr_byte_sum(control);
 579
 580                 /*
 581                  * Update table header with size and CRC and account for table
 582                  * wrap around where the assumption is that we treat it as empty
 583                  * table
 584                  *
 585                  * TODO - Check the assumption is correct
 586                  */
 587                 control->num_recs += num;
 588                 control->num_recs %= EEPROM_MAX_RECORD_NUM;
 589                 control->tbl_hdr.tbl_size += EEPROM_TABLE_RECORD_SIZE * num;
 590                 if (control->tbl_hdr.tbl_size > EEPROM_SIZE_BYTES)
 591                         control->tbl_hdr.tbl_size = EEPROM_TABLE_HEADER_SIZE +
 592                         control->num_recs * EEPROM_TABLE_RECORD_SIZE;
 593
 594                 __update_tbl_checksum(control, records, num, old_hdr_byte_sum);
 595
 596                 __update_table_header(control, buffs);
 597         } else if (!__validate_tbl_checksum(control, records, num)) {
 598                 DRM_WARN("EEPROM Table checksum mismatch!");
 599                 /* TODO Uncomment when EEPROM read/write is relliable */
 600                 /* ret = -EIO; */
 601         }
 602
 603 free_msgs:
 604         kfree(msgs);
 605
 606 free_buff:
 607         kfree(buffs);
 608
 609         mutex_unlock(&control->tbl_mutex);
 610
 611         return ret == num ? 0 : -EIO;
 612 }
 613
 614 inline uint32_t amdgpu_ras_eeprom_get_record_max_length(void)
 615 {
 616         return EEPROM_MAX_RECORD_NUM;
 617 }
 618
 619 /* Used for testing if bugs encountered */
 620 #if 0
 621 void amdgpu_ras_eeprom_test(struct amdgpu_ras_eeprom_control *control)
 622 {
 623         int i;
 624         struct eeprom_table_record *recs = kcalloc(1, sizeof(*recs), GFP_KERNEL);
 625
 626         if (!recs)
 627                 return;
 628
 629         for (i = 0; i < 1 ; i++) {
 630                 recs[i].address = 0xdeadbeef;
 631                 recs[i].retired_page = i;
 632         }
 633
 634         if (!amdgpu_ras_eeprom_process_recods(control, recs, true, 1)) {
 635
 636                 memset(recs, 0, sizeof(*recs) * 1);
 637
 638                 control->next_addr = EEPROM_RECORD_START;
 639
 640                 if (!amdgpu_ras_eeprom_process_recods(control, recs, false, 1)) {
 641                         for (i = 0; i < 1; i++)
 642                                 DRM_INFO("rec.address :0x%llx, rec.retired_page :%llu",
 643                                          recs[i].address, recs[i].retired_page);
 644                 } else
 645                         DRM_ERROR("Failed in reading from table");
 646
 647         } else
 648                 DRM_ERROR("Failed in writing to table");
 649 }
 650 #endif