drivers/net/ethernet/mellanox/mlx5/core/health.c

   1 /*
   2  * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
   3  *
   4  * This software is available to you under a choice of one of two
   5  * licenses.  You may choose to be licensed under the terms of the GNU
   6  * General Public License (GPL) Version 2, available from the file
   7  * COPYING in the main directory of this source tree, or the
   8  * OpenIB.org BSD license below:
   9  *
  10  *     Redistribution and use in source and binary forms, with or
  11  *     without modification, are permitted provided that the following
  12  *     conditions are met:
  13  *
  14  *      - Redistributions of source code must retain the above
  15  *        copyright notice, this list of conditions and the following
  16  *        disclaimer.
  17  *
  18  *      - Redistributions in binary form must reproduce the above
  19  *        copyright notice, this list of conditions and the following
  20  *        disclaimer in the documentation and/or other materials
  21  *        provided with the distribution.
  22  *
  23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30  * SOFTWARE.
  31  */
  32
  33 #include <linux/kernel.h>
  34 #include <linux/module.h>
  35 #include <linux/random.h>
  36 #include <linux/vmalloc.h>
  37 #include <linux/hardirq.h>
  38 #include <linux/mlx5/driver.h>
  39 #include <linux/mlx5/cmd.h>
  40 #include "mlx5_core.h"
  41 #include "lib/eq.h"
  42 #include "lib/mlx5.h"
  43 #include "lib/pci_vsc.h"
  44
  45 enum {
  46         MLX5_HEALTH_POLL_INTERVAL       = 2 * HZ,
  47         MAX_MISSES                      = 3,
  48 };
  49
  50 enum {
  51         MLX5_HEALTH_SYNDR_FW_ERR                = 0x1,
  52         MLX5_HEALTH_SYNDR_IRISC_ERR             = 0x7,
  53         MLX5_HEALTH_SYNDR_HW_UNRECOVERABLE_ERR  = 0x8,
  54         MLX5_HEALTH_SYNDR_CRC_ERR               = 0x9,
  55         MLX5_HEALTH_SYNDR_FETCH_PCI_ERR         = 0xa,
  56         MLX5_HEALTH_SYNDR_HW_FTL_ERR            = 0xb,
  57         MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR  = 0xc,
  58         MLX5_HEALTH_SYNDR_EQ_ERR                = 0xd,
  59         MLX5_HEALTH_SYNDR_EQ_INV                = 0xe,
  60         MLX5_HEALTH_SYNDR_FFSER_ERR             = 0xf,
  61         MLX5_HEALTH_SYNDR_HIGH_TEMP             = 0x10
  62 };
  63
  64 enum {
  65         MLX5_DROP_NEW_HEALTH_WORK,
  66 };
  67
  68 enum  {
  69         MLX5_SENSOR_NO_ERR              = 0,
  70         MLX5_SENSOR_PCI_COMM_ERR        = 1,
  71         MLX5_SENSOR_PCI_ERR             = 2,
  72         MLX5_SENSOR_NIC_DISABLED        = 3,
  73         MLX5_SENSOR_NIC_SW_RESET        = 4,
  74         MLX5_SENSOR_FW_SYND_RFR         = 5,
  75 };
  76
  77 u8 mlx5_get_nic_state(struct mlx5_core_dev *dev)
  78 {
  79         return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 7;
  80 }
  81
  82 void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state)
  83 {
  84         u32 cur_cmdq_addr_l_sz;
  85
  86         cur_cmdq_addr_l_sz = ioread32be(&dev->iseg->cmdq_addr_l_sz);
  87         iowrite32be((cur_cmdq_addr_l_sz & 0xFFFFF000) |
  88                     state << MLX5_NIC_IFC_OFFSET,
  89                     &dev->iseg->cmdq_addr_l_sz);
  90 }
  91
  92 static bool sensor_pci_not_working(struct mlx5_core_dev *dev)
  93 {
  94         struct mlx5_core_health *health = &dev->priv.health;
  95         struct health_buffer __iomem *h = health->health;
  96
  97         /* Offline PCI reads return 0xffffffff */
  98         return (ioread32be(&h->fw_ver) == 0xffffffff);
  99 }
 100
 101 static bool sensor_fw_synd_rfr(struct mlx5_core_dev *dev)
 102 {
 103         struct mlx5_core_health *health = &dev->priv.health;
 104         struct health_buffer __iomem *h = health->health;
 105         u32 rfr = ioread32be(&h->rfr) >> MLX5_RFR_OFFSET;
 106         u8 synd = ioread8(&h->synd);
 107
 108         if (rfr && synd)
 109                 mlx5_core_dbg(dev, "FW requests reset, synd: %d\n", synd);
 110         return rfr && synd;
 111 }
 112
 113 static u32 check_fatal_sensors(struct mlx5_core_dev *dev)
 114 {
 115         if (sensor_pci_not_working(dev))
 116                 return MLX5_SENSOR_PCI_COMM_ERR;
 117         if (pci_channel_offline(dev->pdev))
 118                 return MLX5_SENSOR_PCI_ERR;
 119         if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
 120                 return MLX5_SENSOR_NIC_DISABLED;
 121         if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_SW_RESET)
 122                 return MLX5_SENSOR_NIC_SW_RESET;
 123         if (sensor_fw_synd_rfr(dev))
 124                 return MLX5_SENSOR_FW_SYND_RFR;
 125
 126         return MLX5_SENSOR_NO_ERR;
 127 }
 128
 129 static int lock_sem_sw_reset(struct mlx5_core_dev *dev, bool lock)
 130 {
 131         enum mlx5_vsc_state state;
 132         int ret;
 133
 134         if (!mlx5_core_is_pf(dev))
 135                 return -EBUSY;
 136
 137         /* Try to lock GW access, this stage doesn't return
 138          * EBUSY because locked GW does not mean that other PF
 139          * already started the reset.
 140          */
 141         ret = mlx5_vsc_gw_lock(dev);
 142         if (ret == -EBUSY)
 143                 return -EINVAL;
 144         if (ret)
 145                 return ret;
 146
 147         state = lock ? MLX5_VSC_LOCK : MLX5_VSC_UNLOCK;
 148         /* At this stage, if the return status == EBUSY, then we know
 149          * for sure that another PF started the reset, so don't allow
 150          * another reset.
 151          */
 152         ret = mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, state);
 153         if (ret)
 154                 mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n");
 155
 156         /* Unlock GW access */
 157         mlx5_vsc_gw_unlock(dev);
 158
 159         return ret;
 160 }
 161
 162 static bool reset_fw_if_needed(struct mlx5_core_dev *dev)
 163 {
 164         bool supported = (ioread32be(&dev->iseg->initializing) >>
 165                           MLX5_FW_RESET_SUPPORTED_OFFSET) & 1;
 166         u32 fatal_error;
 167
 168         if (!supported)
 169                 return false;
 170
 171         /* The reset only needs to be issued by one PF. The health buffer is
 172          * shared between all functions, and will be cleared during a reset.
 173          * Check again to avoid a redundant 2nd reset. If the fatal erros was
 174          * PCI related a reset won't help.
 175          */
 176         fatal_error = check_fatal_sensors(dev);
 177         if (fatal_error == MLX5_SENSOR_PCI_COMM_ERR ||
 178             fatal_error == MLX5_SENSOR_NIC_DISABLED ||
 179             fatal_error == MLX5_SENSOR_NIC_SW_RESET) {
 180                 mlx5_core_warn(dev, "Not issuing FW reset. Either it's already done or won't help.");
 181                 return false;
 182         }
 183
 184         mlx5_core_warn(dev, "Issuing FW Reset\n");
 185         /* Write the NIC interface field to initiate the reset, the command
 186          * interface address also resides here, don't overwrite it.
 187          */
 188         mlx5_set_nic_state(dev, MLX5_NIC_IFC_SW_RESET);
 189
 190         return true;
 191 }
 192
 193 void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
 194 {
 195         mutex_lock(&dev->intf_state_mutex);
 196         if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
 197                 goto unlock;
 198         if (dev->state == MLX5_DEVICE_STATE_UNINITIALIZED) {
 199                 dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
 200                 goto unlock;
 201         }
 202
 203         if (check_fatal_sensors(dev) || force) {
 204                 dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
 205                 mlx5_cmd_flush(dev);
 206         }
 207
 208         mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1);
 209 unlock:
 210         mutex_unlock(&dev->intf_state_mutex);
 211 }
 212
 213 #define MLX5_CRDUMP_WAIT_MS     60000
 214 #define MLX5_FW_RESET_WAIT_MS   1000
 215 void mlx5_error_sw_reset(struct mlx5_core_dev *dev)
 216 {
 217         unsigned long end, delay_ms = MLX5_FW_RESET_WAIT_MS;
 218         int lock = -EBUSY;
 219
 220         mutex_lock(&dev->intf_state_mutex);
 221         if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)
 222                 goto unlock;
 223
 224         mlx5_core_err(dev, "start\n");
 225
 226         if (check_fatal_sensors(dev) == MLX5_SENSOR_FW_SYND_RFR) {
 227                 /* Get cr-dump and reset FW semaphore */
 228                 lock = lock_sem_sw_reset(dev, true);
 229
 230                 if (lock == -EBUSY) {
 231                         delay_ms = MLX5_CRDUMP_WAIT_MS;
 232                         goto recover_from_sw_reset;
 233                 }
 234                 /* Execute SW reset */
 235                 reset_fw_if_needed(dev);
 236         }
 237
 238 recover_from_sw_reset:
 239         /* Recover from SW reset */
 240         end = jiffies + msecs_to_jiffies(delay_ms);
 241         do {
 242                 if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
 243                         break;
 244
 245                 cond_resched();
 246         } while (!time_after(jiffies, end));
 247
 248         if (mlx5_get_nic_state(dev) != MLX5_NIC_IFC_DISABLED) {
 249                 dev_err(&dev->pdev->dev, "NIC IFC still %d after %lums.\n",
 250                         mlx5_get_nic_state(dev), delay_ms);
 251         }
 252
 253         /* Release FW semaphore if you are the lock owner */
 254         if (!lock)
 255                 lock_sem_sw_reset(dev, false);
 256
 257         mlx5_core_err(dev, "end\n");
 258
 259 unlock:
 260         mutex_unlock(&dev->intf_state_mutex);
 261 }
 262
 263 static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
 264 {
 265         u8 nic_interface = mlx5_get_nic_state(dev);
 266
 267         switch (nic_interface) {
 268         case MLX5_NIC_IFC_FULL:
 269                 mlx5_core_warn(dev, "Expected to see disabled NIC but it is full driver\n");
 270                 break;
 271
 272         case MLX5_NIC_IFC_DISABLED:
 273                 mlx5_core_warn(dev, "starting teardown\n");
 274                 break;
 275
 276         case MLX5_NIC_IFC_NO_DRAM_NIC:
 277                 mlx5_core_warn(dev, "Expected to see disabled NIC but it is no dram nic\n");
 278                 break;
 279
 280         case MLX5_NIC_IFC_SW_RESET:
 281                 /* The IFC mode field is 3 bits, so it will read 0x7 in 2 cases:
 282                  * 1. PCI has been disabled (ie. PCI-AER, PF driver unloaded
 283                  *    and this is a VF), this is not recoverable by SW reset.
 284                  *    Logging of this is handled elsewhere.
 285                  * 2. FW reset has been issued by another function, driver can
 286                  *    be reloaded to recover after the mode switches to
 287                  *    MLX5_NIC_IFC_DISABLED.
 288                  */
 289                 if (dev->priv.health.fatal_error != MLX5_SENSOR_PCI_COMM_ERR)
 290                         mlx5_core_warn(dev, "NIC SW reset in progress\n");
 291                 break;
 292
 293         default:
 294                 mlx5_core_warn(dev, "Expected to see disabled NIC but it is has invalid value %d\n",
 295                                nic_interface);
 296         }
 297
 298         mlx5_disable_device(dev);
 299 }
 300
 301 /* How much time to wait until health resetting the driver (in msecs) */
 302 #define MLX5_RECOVERY_WAIT_MSECS 60000
 303 static void health_care(struct work_struct *work)
 304 {
 305         struct mlx5_core_health *health;
 306         struct mlx5_core_dev *dev;
 307         struct mlx5_priv *priv;
 308         unsigned long end;
 309
 310         health = container_of(work, struct mlx5_core_health, work);
 311         priv = container_of(health, struct mlx5_priv, health);
 312         dev = container_of(priv, struct mlx5_core_dev, priv);
 313         mlx5_core_warn(dev, "handling bad device here\n");
 314         mlx5_handle_bad_state(dev);
 315
 316         end = jiffies + msecs_to_jiffies(MLX5_RECOVERY_WAIT_MSECS);
 317         while (sensor_pci_not_working(dev)) {
 318                 if (time_after(jiffies, end)) {
 319                         mlx5_core_err(dev,
 320                                       "health recovery flow aborted, PCI reads still not working\n");
 321                         return;
 322                 }
 323                 msleep(100);
 324         }
 325
 326         mlx5_core_err(dev, "starting health recovery flow\n");
 327         mlx5_recover_device(dev);
 328 }
 329
 330 static const char *hsynd_str(u8 synd)
 331 {
 332         switch (synd) {
 333         case MLX5_HEALTH_SYNDR_FW_ERR:
 334                 return "firmware internal error";
 335         case MLX5_HEALTH_SYNDR_IRISC_ERR:
 336                 return "irisc not responding";
 337         case MLX5_HEALTH_SYNDR_HW_UNRECOVERABLE_ERR:
 338                 return "unrecoverable hardware error";
 339         case MLX5_HEALTH_SYNDR_CRC_ERR:
 340                 return "firmware CRC error";
 341         case MLX5_HEALTH_SYNDR_FETCH_PCI_ERR:
 342                 return "ICM fetch PCI error";
 343         case MLX5_HEALTH_SYNDR_HW_FTL_ERR:
 344                 return "HW fatal error\n";
 345         case MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR:
 346                 return "async EQ buffer overrun";
 347         case MLX5_HEALTH_SYNDR_EQ_ERR:
 348                 return "EQ error";
 349         case MLX5_HEALTH_SYNDR_EQ_INV:
 350                 return "Invalid EQ referenced";
 351         case MLX5_HEALTH_SYNDR_FFSER_ERR:
 352                 return "FFSER error";
 353         case MLX5_HEALTH_SYNDR_HIGH_TEMP:
 354                 return "High temperature";
 355         default:
 356                 return "unrecognized error";
 357         }
 358 }
 359
 360 static void print_health_info(struct mlx5_core_dev *dev)
 361 {
 362         struct mlx5_core_health *health = &dev->priv.health;
 363         struct health_buffer __iomem *h = health->health;
 364         char fw_str[18];
 365         u32 fw;
 366         int i;
 367
 368         /* If the syndrome is 0, the device is OK and no need to print buffer */
 369         if (!ioread8(&h->synd))
 370                 return;
 371
 372         for (i = 0; i < ARRAY_SIZE(h->assert_var); i++)
 373                 mlx5_core_err(dev, "assert_var[%d] 0x%08x\n", i,
 374                               ioread32be(h->assert_var + i));
 375
 376         mlx5_core_err(dev, "assert_exit_ptr 0x%08x\n",
 377                       ioread32be(&h->assert_exit_ptr));
 378         mlx5_core_err(dev, "assert_callra 0x%08x\n",
 379                       ioread32be(&h->assert_callra));
 380         sprintf(fw_str, "%d.%d.%d", fw_rev_maj(dev), fw_rev_min(dev), fw_rev_sub(dev));
 381         mlx5_core_err(dev, "fw_ver %s\n", fw_str);
 382         mlx5_core_err(dev, "hw_id 0x%08x\n", ioread32be(&h->hw_id));
 383         mlx5_core_err(dev, "irisc_index %d\n", ioread8(&h->irisc_index));
 384         mlx5_core_err(dev, "synd 0x%x: %s\n", ioread8(&h->synd),
 385                       hsynd_str(ioread8(&h->synd)));
 386         mlx5_core_err(dev, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd));
 387         fw = ioread32be(&h->fw_ver);
 388         mlx5_core_err(dev, "raw fw_ver 0x%08x\n", fw);
 389 }
 390
 391 static unsigned long get_next_poll_jiffies(void)
 392 {
 393         unsigned long next;
 394
 395         get_random_bytes(&next, sizeof(next));
 396         next %= HZ;
 397         next += jiffies + MLX5_HEALTH_POLL_INTERVAL;
 398
 399         return next;
 400 }
 401
 402 void mlx5_trigger_health_work(struct mlx5_core_dev *dev)
 403 {
 404         struct mlx5_core_health *health = &dev->priv.health;
 405         unsigned long flags;
 406
 407         spin_lock_irqsave(&health->wq_lock, flags);
 408         if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags))
 409                 queue_work(health->wq, &health->work);
 410         else
 411                 mlx5_core_err(dev, "new health works are not permitted at this stage\n");
 412         spin_unlock_irqrestore(&health->wq_lock, flags);
 413 }
 414
 415 static void poll_health(struct timer_list *t)
 416 {
 417         struct mlx5_core_dev *dev = from_timer(dev, t, priv.health.timer);
 418         struct mlx5_core_health *health = &dev->priv.health;
 419         u32 fatal_error;
 420         u32 count;
 421
 422         if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
 423                 goto out;
 424
 425         count = ioread32be(health->health_counter);
 426         if (count == health->prev)
 427                 ++health->miss_counter;
 428         else
 429                 health->miss_counter = 0;
 430
 431         health->prev = count;
 432         if (health->miss_counter == MAX_MISSES) {
 433                 mlx5_core_err(dev, "device's health compromised - reached miss count\n");
 434                 print_health_info(dev);
 435         }
 436
 437         fatal_error = check_fatal_sensors(dev);
 438
 439         if (fatal_error && !health->fatal_error) {
 440                 mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error);
 441                 dev->priv.health.fatal_error = fatal_error;
 442                 print_health_info(dev);
 443                 mlx5_trigger_health_work(dev);
 444         }
 445
 446 out:
 447         mod_timer(&health->timer, get_next_poll_jiffies());
 448 }
 449
 450 void mlx5_start_health_poll(struct mlx5_core_dev *dev)
 451 {
 452         struct mlx5_core_health *health = &dev->priv.health;
 453
 454         timer_setup(&health->timer, poll_health, 0);
 455         health->fatal_error = MLX5_SENSOR_NO_ERR;
 456         clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
 457         health->health = &dev->iseg->health;
 458         health->health_counter = &dev->iseg->health_counter;
 459
 460         health->timer.expires = round_jiffies(jiffies + MLX5_HEALTH_POLL_INTERVAL);
 461         add_timer(&health->timer);
 462 }
 463
 464 void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health)
 465 {
 466         struct mlx5_core_health *health = &dev->priv.health;
 467         unsigned long flags;
 468
 469         if (disable_health) {
 470                 spin_lock_irqsave(&health->wq_lock, flags);
 471                 set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
 472                 spin_unlock_irqrestore(&health->wq_lock, flags);
 473         }
 474
 475         del_timer_sync(&health->timer);
 476 }
 477
 478 void mlx5_drain_health_wq(struct mlx5_core_dev *dev)
 479 {
 480         struct mlx5_core_health *health = &dev->priv.health;
 481         unsigned long flags;
 482
 483         spin_lock_irqsave(&health->wq_lock, flags);
 484         set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
 485         spin_unlock_irqrestore(&health->wq_lock, flags);
 486         cancel_work_sync(&health->work);
 487 }
 488
 489 void mlx5_health_flush(struct mlx5_core_dev *dev)
 490 {
 491         struct mlx5_core_health *health = &dev->priv.health;
 492
 493         flush_workqueue(health->wq);
 494 }
 495
 496 void mlx5_health_cleanup(struct mlx5_core_dev *dev)
 497 {
 498         struct mlx5_core_health *health = &dev->priv.health;
 499
 500         destroy_workqueue(health->wq);
 501 }
 502
 503 int mlx5_health_init(struct mlx5_core_dev *dev)
 504 {
 505         struct mlx5_core_health *health;
 506         char *name;
 507
 508         health = &dev->priv.health;
 509         name = kmalloc(64, GFP_KERNEL);
 510         if (!name)
 511                 return -ENOMEM;
 512
 513         strcpy(name, "mlx5_health");
 514         strcat(name, dev_name(dev->device));
 515         health->wq = create_singlethread_workqueue(name);
 516         kfree(name);
 517         if (!health->wq)
 518                 return -ENOMEM;
 519         spin_lock_init(&health->wq_lock);
 520         INIT_WORK(&health->work, health_care);
 521
 522         return 0;
 523 }