drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c

   1 /*
   2  * Copyright 2019 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  */
  23
  24 #include "amdgpu_ras_eeprom.h"
  25 #include "amdgpu.h"
  26 #include "amdgpu_ras.h"
  27 #include <linux/bits.h>
  28 #include "atom.h"
  29
  30 #define EEPROM_I2C_TARGET_ADDR_VEGA20           0xA0
  31 #define EEPROM_I2C_TARGET_ADDR_ARCTURUS         0xA8
  32 #define EEPROM_I2C_TARGET_ADDR_ARCTURUS_D342    0xA0
  33 #define EEPROM_I2C_TARGET_ADDR_SIENNA_CICHLID   0xA0
  34
  35 /*
  36  * The 2 macros bellow represent the actual size in bytes that
  37  * those entities occupy in the EEPROM memory.
  38  * EEPROM_TABLE_RECORD_SIZE is different than sizeof(eeprom_table_record) which
  39  * uses uint64 to store 6b fields such as retired_page.
  40  */
  41 #define EEPROM_TABLE_HEADER_SIZE 20
  42 #define EEPROM_TABLE_RECORD_SIZE 24
  43
  44 #define EEPROM_ADDRESS_SIZE 0x2
  45
  46 /* Table hdr is 'AMDR' */
  47 #define EEPROM_TABLE_HDR_VAL 0x414d4452
  48 #define EEPROM_TABLE_VER 0x00010000
  49
  50 /* Bad GPU tag ‘BADG’ */
  51 #define EEPROM_TABLE_HDR_BAD 0x42414447
  52
  53 /* Assume 2 Mbit size */
  54 #define EEPROM_SIZE_BYTES 256000
  55 #define EEPROM_PAGE__SIZE_BYTES 256
  56 #define EEPROM_HDR_START 0
  57 #define EEPROM_RECORD_START (EEPROM_HDR_START + EEPROM_TABLE_HEADER_SIZE)
  58 #define EEPROM_MAX_RECORD_NUM ((EEPROM_SIZE_BYTES - EEPROM_TABLE_HEADER_SIZE) / EEPROM_TABLE_RECORD_SIZE)
  59 #define EEPROM_ADDR_MSB_MASK GENMASK(17, 8)
  60
  61 #define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, eeprom_control))->adev
  62
  63 static bool __is_ras_eeprom_supported(struct amdgpu_device *adev)
  64 {
  65         if ((adev->asic_type == CHIP_VEGA20) ||
  66             (adev->asic_type == CHIP_ARCTURUS) ||
  67             (adev->asic_type == CHIP_SIENNA_CICHLID))
  68                 return true;
  69
  70         return false;
  71 }
  72
  73 static bool __get_eeprom_i2c_addr_arct(struct amdgpu_device *adev,
  74                                        uint16_t *i2c_addr)
  75 {
  76         struct atom_context *atom_ctx = adev->mode_info.atom_context;
  77
  78         if (!i2c_addr || !atom_ctx)
  79                 return false;
  80
  81         if (strnstr(atom_ctx->vbios_version,
  82                     "D342",
  83                     sizeof(atom_ctx->vbios_version)))
  84                 *i2c_addr = EEPROM_I2C_TARGET_ADDR_ARCTURUS_D342;
  85         else
  86                 *i2c_addr = EEPROM_I2C_TARGET_ADDR_ARCTURUS;
  87
  88         return true;
  89 }
  90
  91 static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev,
  92                                   uint16_t *i2c_addr)
  93 {
  94         if (!i2c_addr)
  95                 return false;
  96
  97         switch (adev->asic_type) {
  98         case CHIP_VEGA20:
  99                 *i2c_addr = EEPROM_I2C_TARGET_ADDR_VEGA20;
 100                 break;
 101
 102         case CHIP_ARCTURUS:
 103                 return __get_eeprom_i2c_addr_arct(adev, i2c_addr);
 104
 105         case CHIP_SIENNA_CICHLID:
 106                 *i2c_addr = EEPROM_I2C_TARGET_ADDR_SIENNA_CICHLID;
 107                 break;
 108
 109         default:
 110                 return false;
 111         }
 112
 113         return true;
 114 }
 115
 116 static void __encode_table_header_to_buff(struct amdgpu_ras_eeprom_table_header *hdr,
 117                                           unsigned char *buff)
 118 {
 119         uint32_t *pp = (uint32_t *) buff;
 120
 121         pp[0] = cpu_to_le32(hdr->header);
 122         pp[1] = cpu_to_le32(hdr->version);
 123         pp[2] = cpu_to_le32(hdr->first_rec_offset);
 124         pp[3] = cpu_to_le32(hdr->tbl_size);
 125         pp[4] = cpu_to_le32(hdr->checksum);
 126 }
 127
 128 static void __decode_table_header_from_buff(struct amdgpu_ras_eeprom_table_header *hdr,
 129                                           unsigned char *buff)
 130 {
 131         uint32_t *pp = (uint32_t *)buff;
 132
 133         hdr->header           = le32_to_cpu(pp[0]);
 134         hdr->version          = le32_to_cpu(pp[1]);
 135         hdr->first_rec_offset = le32_to_cpu(pp[2]);
 136         hdr->tbl_size         = le32_to_cpu(pp[3]);
 137         hdr->checksum         = le32_to_cpu(pp[4]);
 138 }
 139
 140 static int __update_table_header(struct amdgpu_ras_eeprom_control *control,
 141                                  unsigned char *buff)
 142 {
 143         int ret = 0;
 144         struct amdgpu_device *adev = to_amdgpu_device(control);
 145         struct i2c_msg msg = {
 146                         .addr   = 0,
 147                         .flags  = 0,
 148                         .len    = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE,
 149                         .buf    = buff,
 150         };
 151
 152
 153         *(uint16_t *)buff = EEPROM_HDR_START;
 154         __encode_table_header_to_buff(&control->tbl_hdr, buff + EEPROM_ADDRESS_SIZE);
 155
 156         msg.addr = control->i2c_address;
 157
 158         /* i2c may be unstable in gpu reset */
 159         down_read(&adev->reset_sem);
 160         ret = i2c_transfer(&adev->pm.smu_i2c, &msg, 1);
 161         up_read(&adev->reset_sem);
 162
 163         if (ret < 1)
 164                 DRM_ERROR("Failed to write EEPROM table header, ret:%d", ret);
 165
 166         return ret;
 167 }
 168
 169 static uint32_t  __calc_hdr_byte_sum(struct amdgpu_ras_eeprom_control *control)
 170 {
 171         int i;
 172         uint32_t tbl_sum = 0;
 173
 174         /* Header checksum, skip checksum field in the calculation */
 175         for (i = 0; i < sizeof(control->tbl_hdr) - sizeof(control->tbl_hdr.checksum); i++)
 176                 tbl_sum += *(((unsigned char *)&control->tbl_hdr) + i);
 177
 178         return tbl_sum;
 179 }
 180
 181 static uint32_t  __calc_recs_byte_sum(struct eeprom_table_record *records,
 182                                       int num)
 183 {
 184         int i, j;
 185         uint32_t tbl_sum = 0;
 186
 187         /* Records checksum */
 188         for (i = 0; i < num; i++) {
 189                 struct eeprom_table_record *record = &records[i];
 190
 191                 for (j = 0; j < sizeof(*record); j++) {
 192                         tbl_sum += *(((unsigned char *)record) + j);
 193                 }
 194         }
 195
 196         return tbl_sum;
 197 }
 198
 199 static inline uint32_t  __calc_tbl_byte_sum(struct amdgpu_ras_eeprom_control *control,
 200                                   struct eeprom_table_record *records, int num)
 201 {
 202         return __calc_hdr_byte_sum(control) + __calc_recs_byte_sum(records, num);
 203 }
 204
 205 /* Checksum = 256 -((sum of all table entries) mod 256) */
 206 static void __update_tbl_checksum(struct amdgpu_ras_eeprom_control *control,
 207                                   struct eeprom_table_record *records, int num,
 208                                   uint32_t old_hdr_byte_sum)
 209 {
 210         /*
 211          * This will update the table sum with new records.
 212          *
 213          * TODO: What happens when the EEPROM table is to be wrapped around
 214          * and old records from start will get overridden.
 215          */
 216
 217         /* need to recalculate updated header byte sum */
 218         control->tbl_byte_sum -= old_hdr_byte_sum;
 219         control->tbl_byte_sum += __calc_tbl_byte_sum(control, records, num);
 220
 221         control->tbl_hdr.checksum = 256 - (control->tbl_byte_sum % 256);
 222 }
 223
 224 /* table sum mod 256 + checksum must equals 256 */
 225 static bool __validate_tbl_checksum(struct amdgpu_ras_eeprom_control *control,
 226                             struct eeprom_table_record *records, int num)
 227 {
 228         control->tbl_byte_sum = __calc_tbl_byte_sum(control, records, num);
 229
 230         if (control->tbl_hdr.checksum + (control->tbl_byte_sum % 256) != 256) {
 231                 DRM_WARN("Checksum mismatch, checksum: %u ", control->tbl_hdr.checksum);
 232                 return false;
 233         }
 234
 235         return true;
 236 }
 237
 238 static int amdgpu_ras_eeprom_correct_header_tag(
 239                                 struct amdgpu_ras_eeprom_control *control,
 240                                 uint32_t header)
 241 {
 242         unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE];
 243         struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
 244         int ret = 0;
 245
 246         memset(buff, 0, EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE);
 247
 248         mutex_lock(&control->tbl_mutex);
 249         hdr->header = header;
 250         ret = __update_table_header(control, buff);
 251         mutex_unlock(&control->tbl_mutex);
 252
 253         return ret;
 254 }
 255
 256 int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control)
 257 {
 258         unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 };
 259         struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
 260         int ret = 0;
 261
 262         mutex_lock(&control->tbl_mutex);
 263
 264         hdr->header = EEPROM_TABLE_HDR_VAL;
 265         hdr->version = EEPROM_TABLE_VER;
 266         hdr->first_rec_offset = EEPROM_RECORD_START;
 267         hdr->tbl_size = EEPROM_TABLE_HEADER_SIZE;
 268
 269         control->tbl_byte_sum = 0;
 270         __update_tbl_checksum(control, NULL, 0, 0);
 271         control->next_addr = EEPROM_RECORD_START;
 272
 273         ret = __update_table_header(control, buff);
 274
 275         mutex_unlock(&control->tbl_mutex);
 276
 277         return ret;
 278
 279 }
 280
 281 int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
 282                         bool *exceed_err_limit)
 283 {
 284         int ret = 0;
 285         struct amdgpu_device *adev = to_amdgpu_device(control);
 286         unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 };
 287         struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
 288         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
 289         struct i2c_msg msg = {
 290                         .addr   = 0,
 291                         .flags  = I2C_M_RD,
 292                         .len    = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE,
 293                         .buf    = buff,
 294         };
 295
 296         *exceed_err_limit = false;
 297
 298         if (!__is_ras_eeprom_supported(adev))
 299                 return 0;
 300
 301         /* Verify i2c adapter is initialized */
 302         if (!adev->pm.smu_i2c.algo)
 303                 return -ENOENT;
 304
 305         if (!__get_eeprom_i2c_addr(adev, &control->i2c_address))
 306                 return -EINVAL;
 307
 308         mutex_init(&control->tbl_mutex);
 309
 310         msg.addr = control->i2c_address;
 311         /* Read/Create table header from EEPROM address 0 */
 312         ret = i2c_transfer(&adev->pm.smu_i2c, &msg, 1);
 313         if (ret < 1) {
 314                 DRM_ERROR("Failed to read EEPROM table header, ret:%d", ret);
 315                 return ret;
 316         }
 317
 318         __decode_table_header_from_buff(hdr, &buff[2]);
 319
 320         if (hdr->header == EEPROM_TABLE_HDR_VAL) {
 321                 control->num_recs = (hdr->tbl_size - EEPROM_TABLE_HEADER_SIZE) /
 322                                     EEPROM_TABLE_RECORD_SIZE;
 323                 control->tbl_byte_sum = __calc_hdr_byte_sum(control);
 324                 control->next_addr = EEPROM_RECORD_START;
 325
 326                 DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records",
 327                                  control->num_recs);
 328
 329         } else if ((hdr->header == EEPROM_TABLE_HDR_BAD) &&
 330                         (amdgpu_bad_page_threshold != 0)) {
 331                 if (ras->bad_page_cnt_threshold > control->num_recs) {
 332                         dev_info(adev->dev, "Using one valid bigger bad page "
 333                                 "threshold and correcting eeprom header tag.\n");
 334                         ret = amdgpu_ras_eeprom_correct_header_tag(control,
 335                                                         EEPROM_TABLE_HDR_VAL);
 336                 } else {
 337                         *exceed_err_limit = true;
 338                         dev_err(adev->dev, "Exceeding the bad_page_threshold parameter, "
 339                                 "disabling the GPU.\n");
 340                 }
 341         } else {
 342                 DRM_INFO("Creating new EEPROM table");
 343
 344                 ret = amdgpu_ras_eeprom_reset_table(control);
 345         }
 346
 347         return ret == 1 ? 0 : -EIO;
 348 }
 349
 350 static void __encode_table_record_to_buff(struct amdgpu_ras_eeprom_control *control,
 351                                           struct eeprom_table_record *record,
 352                                           unsigned char *buff)
 353 {
 354         __le64 tmp = 0;
 355         int i = 0;
 356
 357         /* Next are all record fields according to EEPROM page spec in LE foramt */
 358         buff[i++] = record->err_type;
 359
 360         buff[i++] = record->bank;
 361
 362         tmp = cpu_to_le64(record->ts);
 363         memcpy(buff + i, &tmp, 8);
 364         i += 8;
 365
 366         tmp = cpu_to_le64((record->offset & 0xffffffffffff));
 367         memcpy(buff + i, &tmp, 6);
 368         i += 6;
 369
 370         buff[i++] = record->mem_channel;
 371         buff[i++] = record->mcumc_id;
 372
 373         tmp = cpu_to_le64((record->retired_page & 0xffffffffffff));
 374         memcpy(buff + i, &tmp, 6);
 375 }
 376
 377 static void __decode_table_record_from_buff(struct amdgpu_ras_eeprom_control *control,
 378                                             struct eeprom_table_record *record,
 379                                             unsigned char *buff)
 380 {
 381         __le64 tmp = 0;
 382         int i =  0;
 383
 384         /* Next are all record fields according to EEPROM page spec in LE foramt */
 385         record->err_type = buff[i++];
 386
 387         record->bank = buff[i++];
 388
 389         memcpy(&tmp, buff + i, 8);
 390         record->ts = le64_to_cpu(tmp);
 391         i += 8;
 392
 393         memcpy(&tmp, buff + i, 6);
 394         record->offset = (le64_to_cpu(tmp) & 0xffffffffffff);
 395         i += 6;
 396
 397         record->mem_channel = buff[i++];
 398         record->mcumc_id = buff[i++];
 399
 400         memcpy(&tmp, buff + i,  6);
 401         record->retired_page = (le64_to_cpu(tmp) & 0xffffffffffff);
 402 }
 403
 404 /*
 405  * When reaching end of EEPROM memory jump back to 0 record address
 406  * When next record access will go beyond EEPROM page boundary modify bits A17/A8
 407  * in I2C selector to go to next page
 408  */
 409 static uint32_t __correct_eeprom_dest_address(uint32_t curr_address)
 410 {
 411         uint32_t next_address = curr_address + EEPROM_TABLE_RECORD_SIZE;
 412
 413         /* When all EEPROM memory used jump back to 0 address */
 414         if (next_address > EEPROM_SIZE_BYTES) {
 415                 DRM_INFO("Reached end of EEPROM memory, jumping to 0 "
 416                          "and overriding old record");
 417                 return EEPROM_RECORD_START;
 418         }
 419
 420         /*
 421          * To check if we overflow page boundary  compare next address with
 422          * current and see if bits 17/8 of the EEPROM address will change
 423          * If they do start from the next 256b page
 424          *
 425          * https://www.st.com/resource/en/datasheet/m24m02-dr.pdf sec. 5.1.2
 426          */
 427         if ((curr_address & EEPROM_ADDR_MSB_MASK) != (next_address & EEPROM_ADDR_MSB_MASK)) {
 428                 DRM_DEBUG_DRIVER("Reached end of EEPROM memory page, jumping to next: %lx",
 429                                 (next_address & EEPROM_ADDR_MSB_MASK));
 430
 431                 return  (next_address & EEPROM_ADDR_MSB_MASK);
 432         }
 433
 434         return curr_address;
 435 }
 436
 437 int amdgpu_ras_eeprom_check_err_threshold(
 438                                 struct amdgpu_ras_eeprom_control *control,
 439                                 bool *exceed_err_limit)
 440 {
 441         struct amdgpu_device *adev = to_amdgpu_device(control);
 442         unsigned char buff[EEPROM_ADDRESS_SIZE +
 443                         EEPROM_TABLE_HEADER_SIZE] = { 0 };
 444         struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
 445         struct i2c_msg msg = {
 446                         .addr = control->i2c_address,
 447                         .flags = I2C_M_RD,
 448                         .len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE,
 449                         .buf = buff,
 450         };
 451         int ret;
 452
 453         *exceed_err_limit = false;
 454
 455         if (!__is_ras_eeprom_supported(adev))
 456                 return 0;
 457
 458         /* read EEPROM table header */
 459         mutex_lock(&control->tbl_mutex);
 460         ret = i2c_transfer(&adev->pm.smu_i2c, &msg, 1);
 461         if (ret < 1) {
 462                 dev_err(adev->dev, "Failed to read EEPROM table header.\n");
 463                 goto err;
 464         }
 465
 466         __decode_table_header_from_buff(hdr, &buff[2]);
 467
 468         if (hdr->header == EEPROM_TABLE_HDR_BAD) {
 469                 dev_warn(adev->dev, "This GPU is in BAD status.");
 470                 dev_warn(adev->dev, "Please retire it or setting one bigger "
 471                                 "threshold value when reloading driver.\n");
 472                 *exceed_err_limit = true;
 473         }
 474
 475 err:
 476         mutex_unlock(&control->tbl_mutex);
 477         return 0;
 478 }
 479
 480 int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,
 481                                             struct eeprom_table_record *records,
 482                                             bool write,
 483                                             int num)
 484 {
 485         int i, ret = 0;
 486         struct i2c_msg *msgs, *msg;
 487         unsigned char *buffs, *buff;
 488         struct eeprom_table_record *record;
 489         struct amdgpu_device *adev = to_amdgpu_device(control);
 490         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
 491
 492         if (!__is_ras_eeprom_supported(adev))
 493                 return 0;
 494
 495         buffs = kcalloc(num, EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE,
 496                          GFP_KERNEL);
 497         if (!buffs)
 498                 return -ENOMEM;
 499
 500         mutex_lock(&control->tbl_mutex);
 501
 502         msgs = kcalloc(num, sizeof(*msgs), GFP_KERNEL);
 503         if (!msgs) {
 504                 ret = -ENOMEM;
 505                 goto free_buff;
 506         }
 507
 508         /*
 509          * If saved bad pages number exceeds the bad page threshold for
 510          * the whole VRAM, update table header to mark the BAD GPU tag
 511          * and schedule one ras recovery after eeprom write is done,
 512          * this can avoid the missing for latest records.
 513          *
 514          * This new header will be picked up and checked in the bootup
 515          * by ras recovery, which may break bootup process to notify
 516          * user this GPU is in bad state and to retire such GPU for
 517          * further check.
 518          */
 519         if (write && (amdgpu_bad_page_threshold != 0) &&
 520                 ((control->num_recs + num) >= ras->bad_page_cnt_threshold)) {
 521                 dev_warn(adev->dev,
 522                         "Saved bad pages(%d) reaches threshold value(%d).\n",
 523                         control->num_recs + num, ras->bad_page_cnt_threshold);
 524                 control->tbl_hdr.header = EEPROM_TABLE_HDR_BAD;
 525         }
 526
 527         /* In case of overflow just start from beginning to not lose newest records */
 528         if (write && (control->next_addr + EEPROM_TABLE_RECORD_SIZE * num > EEPROM_SIZE_BYTES))
 529                 control->next_addr = EEPROM_RECORD_START;
 530
 531         /*
 532          * TODO Currently makes EEPROM writes for each record, this creates
 533          * internal fragmentation. Optimized the code to do full page write of
 534          * 256b
 535          */
 536         for (i = 0; i < num; i++) {
 537                 buff = &buffs[i * (EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)];
 538                 record = &records[i];
 539                 msg = &msgs[i];
 540
 541                 control->next_addr = __correct_eeprom_dest_address(control->next_addr);
 542
 543                 /*
 544                  * Update bits 16,17 of EEPROM address in I2C address by setting them
 545                  * to bits 1,2 of Device address byte
 546                  */
 547                 msg->addr = control->i2c_address |
 548                                 ((control->next_addr & EEPROM_ADDR_MSB_MASK) >> 15);
 549                 msg->flags      = write ? 0 : I2C_M_RD;
 550                 msg->len        = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE;
 551                 msg->buf        = buff;
 552
 553                 /* Insert the EEPROM dest addess, bits 0-15 */
 554                 buff[0] = ((control->next_addr >> 8) & 0xff);
 555                 buff[1] = (control->next_addr & 0xff);
 556
 557                 /* EEPROM table content is stored in LE format */
 558                 if (write)
 559                         __encode_table_record_to_buff(control, record, buff + EEPROM_ADDRESS_SIZE);
 560
 561                 /*
 562                  * The destination EEPROM address might need to be corrected to account
 563                  * for page or entire memory wrapping
 564                  */
 565                 control->next_addr += EEPROM_TABLE_RECORD_SIZE;
 566         }
 567
 568         /* i2c may be unstable in gpu reset */
 569         down_read(&adev->reset_sem);
 570         ret = i2c_transfer(&adev->pm.smu_i2c, msgs, num);
 571         up_read(&adev->reset_sem);
 572
 573         if (ret < 1) {
 574                 DRM_ERROR("Failed to process EEPROM table records, ret:%d", ret);
 575
 576                 /* TODO Restore prev next EEPROM address ? */
 577                 goto free_msgs;
 578         }
 579
 580
 581         if (!write) {
 582                 for (i = 0; i < num; i++) {
 583                         buff = &buffs[i*(EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)];
 584                         record = &records[i];
 585
 586                         __decode_table_record_from_buff(control, record, buff + EEPROM_ADDRESS_SIZE);
 587                 }
 588         }
 589
 590         if (write) {
 591                 uint32_t old_hdr_byte_sum = __calc_hdr_byte_sum(control);
 592
 593                 /*
 594                  * Update table header with size and CRC and account for table
 595                  * wrap around where the assumption is that we treat it as empty
 596                  * table
 597                  *
 598                  * TODO - Check the assumption is correct
 599                  */
 600                 control->num_recs += num;
 601                 control->num_recs %= EEPROM_MAX_RECORD_NUM;
 602                 control->tbl_hdr.tbl_size += EEPROM_TABLE_RECORD_SIZE * num;
 603                 if (control->tbl_hdr.tbl_size > EEPROM_SIZE_BYTES)
 604                         control->tbl_hdr.tbl_size = EEPROM_TABLE_HEADER_SIZE +
 605                         control->num_recs * EEPROM_TABLE_RECORD_SIZE;
 606
 607                 __update_tbl_checksum(control, records, num, old_hdr_byte_sum);
 608
 609                 __update_table_header(control, buffs);
 610         } else if (!__validate_tbl_checksum(control, records, num)) {
 611                 DRM_WARN("EEPROM Table checksum mismatch!");
 612                 /* TODO Uncomment when EEPROM read/write is relliable */
 613                 /* ret = -EIO; */
 614         }
 615
 616 free_msgs:
 617         kfree(msgs);
 618
 619 free_buff:
 620         kfree(buffs);
 621
 622         mutex_unlock(&control->tbl_mutex);
 623
 624         return ret == num ? 0 : -EIO;
 625 }
 626
 627 inline uint32_t amdgpu_ras_eeprom_get_record_max_length(void)
 628 {
 629         return EEPROM_MAX_RECORD_NUM;
 630 }
 631
 632 /* Used for testing if bugs encountered */
 633 #if 0
 634 void amdgpu_ras_eeprom_test(struct amdgpu_ras_eeprom_control *control)
 635 {
 636         int i;
 637         struct eeprom_table_record *recs = kcalloc(1, sizeof(*recs), GFP_KERNEL);
 638
 639         if (!recs)
 640                 return;
 641
 642         for (i = 0; i < 1 ; i++) {
 643                 recs[i].address = 0xdeadbeef;
 644                 recs[i].retired_page = i;
 645         }
 646
 647         if (!amdgpu_ras_eeprom_process_recods(control, recs, true, 1)) {
 648
 649                 memset(recs, 0, sizeof(*recs) * 1);
 650
 651                 control->next_addr = EEPROM_RECORD_START;
 652
 653                 if (!amdgpu_ras_eeprom_process_recods(control, recs, false, 1)) {
 654                         for (i = 0; i < 1; i++)
 655                                 DRM_INFO("rec.address :0x%llx, rec.retired_page :%llu",
 656                                          recs[i].address, recs[i].retired_page);
 657                 } else
 658                         DRM_ERROR("Failed in reading from table");
 659
 660         } else
 661                 DRM_ERROR("Failed in writing to table");
 662 }
 663 #endif