Linux 6.9-rc1
[linux-2.6-microblaze.git] / drivers / net / ethernet / mellanox / mlx5 / core / health.c
1 /*
2  * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32
33 #include <linux/kernel.h>
34 #include <linux/random.h>
35 #include <linux/vmalloc.h>
36 #include <linux/hardirq.h>
37 #include <linux/mlx5/driver.h>
38 #include <linux/kern_levels.h>
39 #include "mlx5_core.h"
40 #include "lib/eq.h"
41 #include "lib/mlx5.h"
42 #include "lib/events.h"
43 #include "lib/pci_vsc.h"
44 #include "lib/tout.h"
45 #include "diag/fw_tracer.h"
46 #include "diag/reporter_vnic.h"
47
48 enum {
49         MAX_MISSES                      = 3,
50 };
51
52 enum {
53         MLX5_DROP_HEALTH_WORK,
54 };
55
56 enum  {
57         MLX5_SENSOR_NO_ERR              = 0,
58         MLX5_SENSOR_PCI_COMM_ERR        = 1,
59         MLX5_SENSOR_PCI_ERR             = 2,
60         MLX5_SENSOR_NIC_DISABLED        = 3,
61         MLX5_SENSOR_NIC_SW_RESET        = 4,
62         MLX5_SENSOR_FW_SYND_RFR         = 5,
63 };
64
65 enum {
66         MLX5_SEVERITY_MASK              = 0x7,
67         MLX5_SEVERITY_VALID_MASK        = 0x8,
68 };
69
70 u8 mlx5_get_nic_state(struct mlx5_core_dev *dev)
71 {
72         return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 7;
73 }
74
75 void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state)
76 {
77         u32 cur_cmdq_addr_l_sz;
78
79         cur_cmdq_addr_l_sz = ioread32be(&dev->iseg->cmdq_addr_l_sz);
80         iowrite32be((cur_cmdq_addr_l_sz & 0xFFFFF000) |
81                     state << MLX5_NIC_IFC_OFFSET,
82                     &dev->iseg->cmdq_addr_l_sz);
83 }
84
85 static bool sensor_pci_not_working(struct mlx5_core_dev *dev)
86 {
87         struct mlx5_core_health *health = &dev->priv.health;
88         struct health_buffer __iomem *h = health->health;
89
90         /* Offline PCI reads return 0xffffffff */
91         return (ioread32be(&h->fw_ver) == 0xffffffff);
92 }
93
94 static int mlx5_health_get_rfr(u8 rfr_severity)
95 {
96         return rfr_severity >> MLX5_RFR_BIT_OFFSET;
97 }
98
99 static bool sensor_fw_synd_rfr(struct mlx5_core_dev *dev)
100 {
101         struct mlx5_core_health *health = &dev->priv.health;
102         struct health_buffer __iomem *h = health->health;
103         u8 synd = ioread8(&h->synd);
104         u8 rfr;
105
106         rfr = mlx5_health_get_rfr(ioread8(&h->rfr_severity));
107
108         if (rfr && synd)
109                 mlx5_core_dbg(dev, "FW requests reset, synd: %d\n", synd);
110         return rfr && synd;
111 }
112
113 u32 mlx5_health_check_fatal_sensors(struct mlx5_core_dev *dev)
114 {
115         if (sensor_pci_not_working(dev))
116                 return MLX5_SENSOR_PCI_COMM_ERR;
117         if (pci_channel_offline(dev->pdev))
118                 return MLX5_SENSOR_PCI_ERR;
119         if (mlx5_get_nic_state(dev) == MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED)
120                 return MLX5_SENSOR_NIC_DISABLED;
121         if (mlx5_get_nic_state(dev) == MLX5_INITIAL_SEG_NIC_INTERFACE_SW_RESET)
122                 return MLX5_SENSOR_NIC_SW_RESET;
123         if (sensor_fw_synd_rfr(dev))
124                 return MLX5_SENSOR_FW_SYND_RFR;
125
126         return MLX5_SENSOR_NO_ERR;
127 }
128
129 static int lock_sem_sw_reset(struct mlx5_core_dev *dev, bool lock)
130 {
131         enum mlx5_vsc_state state;
132         int ret;
133
134         if (!mlx5_core_is_pf(dev))
135                 return -EBUSY;
136
137         /* Try to lock GW access, this stage doesn't return
138          * EBUSY because locked GW does not mean that other PF
139          * already started the reset.
140          */
141         ret = mlx5_vsc_gw_lock(dev);
142         if (ret == -EBUSY)
143                 return -EINVAL;
144         if (ret)
145                 return ret;
146
147         state = lock ? MLX5_VSC_LOCK : MLX5_VSC_UNLOCK;
148         /* At this stage, if the return status == EBUSY, then we know
149          * for sure that another PF started the reset, so don't allow
150          * another reset.
151          */
152         ret = mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, state);
153         if (ret)
154                 mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n");
155
156         /* Unlock GW access */
157         mlx5_vsc_gw_unlock(dev);
158
159         return ret;
160 }
161
162 static bool reset_fw_if_needed(struct mlx5_core_dev *dev)
163 {
164         bool supported = (ioread32be(&dev->iseg->initializing) >>
165                           MLX5_FW_RESET_SUPPORTED_OFFSET) & 1;
166         u32 fatal_error;
167
168         if (!supported)
169                 return false;
170
171         /* The reset only needs to be issued by one PF. The health buffer is
172          * shared between all functions, and will be cleared during a reset.
173          * Check again to avoid a redundant 2nd reset. If the fatal errors was
174          * PCI related a reset won't help.
175          */
176         fatal_error = mlx5_health_check_fatal_sensors(dev);
177         if (fatal_error == MLX5_SENSOR_PCI_COMM_ERR ||
178             fatal_error == MLX5_SENSOR_NIC_DISABLED ||
179             fatal_error == MLX5_SENSOR_NIC_SW_RESET) {
180                 mlx5_core_warn(dev, "Not issuing FW reset. Either it's already done or won't help.");
181                 return false;
182         }
183
184         mlx5_core_warn(dev, "Issuing FW Reset\n");
185         /* Write the NIC interface field to initiate the reset, the command
186          * interface address also resides here, don't overwrite it.
187          */
188         mlx5_set_nic_state(dev, MLX5_INITIAL_SEG_NIC_INTERFACE_SW_RESET);
189
190         return true;
191 }
192
193 static void enter_error_state(struct mlx5_core_dev *dev, bool force)
194 {
195         if (mlx5_health_check_fatal_sensors(dev) || force) { /* protected state setting */
196                 dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
197                 mlx5_cmd_flush(dev);
198         }
199
200         mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1);
201 }
202
203 void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
204 {
205         bool err_detected = false;
206
207         /* Mark the device as fatal in order to abort FW commands */
208         if ((mlx5_health_check_fatal_sensors(dev) || force) &&
209             dev->state == MLX5_DEVICE_STATE_UP) {
210                 dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
211                 err_detected = true;
212         }
213         mutex_lock(&dev->intf_state_mutex);
214         if (!err_detected && dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
215                 goto unlock;/* a previous error is still being handled */
216
217         enter_error_state(dev, force);
218 unlock:
219         mutex_unlock(&dev->intf_state_mutex);
220 }
221
222 void mlx5_error_sw_reset(struct mlx5_core_dev *dev)
223 {
224         unsigned long end, delay_ms = mlx5_tout_ms(dev, PCI_TOGGLE);
225         int lock = -EBUSY;
226
227         mutex_lock(&dev->intf_state_mutex);
228         if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)
229                 goto unlock;
230
231         mlx5_core_err(dev, "start\n");
232
233         if (mlx5_health_check_fatal_sensors(dev) == MLX5_SENSOR_FW_SYND_RFR) {
234                 /* Get cr-dump and reset FW semaphore */
235                 lock = lock_sem_sw_reset(dev, true);
236
237                 if (lock == -EBUSY) {
238                         delay_ms = mlx5_tout_ms(dev, FULL_CRDUMP);
239                         goto recover_from_sw_reset;
240                 }
241                 /* Execute SW reset */
242                 reset_fw_if_needed(dev);
243         }
244
245 recover_from_sw_reset:
246         /* Recover from SW reset */
247         end = jiffies + msecs_to_jiffies(delay_ms);
248         do {
249                 if (mlx5_get_nic_state(dev) == MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED)
250                         break;
251
252                 msleep(20);
253         } while (!time_after(jiffies, end));
254
255         if (mlx5_get_nic_state(dev) != MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED) {
256                 dev_err(&dev->pdev->dev, "NIC IFC still %d after %lums.\n",
257                         mlx5_get_nic_state(dev), delay_ms);
258         }
259
260         /* Release FW semaphore if you are the lock owner */
261         if (!lock)
262                 lock_sem_sw_reset(dev, false);
263
264         mlx5_core_err(dev, "end\n");
265
266 unlock:
267         mutex_unlock(&dev->intf_state_mutex);
268 }
269
270 static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
271 {
272         u8 nic_interface = mlx5_get_nic_state(dev);
273
274         switch (nic_interface) {
275         case MLX5_INITIAL_SEG_NIC_INTERFACE_FULL_DRIVER:
276                 mlx5_core_warn(dev, "Expected to see disabled NIC but it is full driver\n");
277                 break;
278
279         case MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED:
280                 mlx5_core_warn(dev, "starting teardown\n");
281                 break;
282
283         case MLX5_INITIAL_SEG_NIC_INTERFACE_NO_DRAM_NIC:
284                 mlx5_core_warn(dev, "Expected to see disabled NIC but it is no dram nic\n");
285                 break;
286
287         case MLX5_INITIAL_SEG_NIC_INTERFACE_SW_RESET:
288                 /* The IFC mode field is 3 bits, so it will read 0x7 in 2 cases:
289                  * 1. PCI has been disabled (ie. PCI-AER, PF driver unloaded
290                  *    and this is a VF), this is not recoverable by SW reset.
291                  *    Logging of this is handled elsewhere.
292                  * 2. FW reset has been issued by another function, driver can
293                  *    be reloaded to recover after the mode switches to
294                  *    MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED.
295                  */
296                 if (dev->priv.health.fatal_error != MLX5_SENSOR_PCI_COMM_ERR)
297                         mlx5_core_warn(dev, "NIC SW reset in progress\n");
298                 break;
299
300         default:
301                 mlx5_core_warn(dev, "Expected to see disabled NIC but it is has invalid value %d\n",
302                                nic_interface);
303         }
304
305         mlx5_disable_device(dev);
306 }
307
308 int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev)
309 {
310         unsigned long end;
311
312         end = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FW_RESET));
313         while (sensor_pci_not_working(dev)) {
314                 if (time_after(jiffies, end))
315                         return -ETIMEDOUT;
316                 if (test_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state)) {
317                         mlx5_core_warn(dev, "device is being removed, stop waiting for PCI\n");
318                         return -ENODEV;
319                 }
320                 msleep(100);
321         }
322         return 0;
323 }
324
325 static int mlx5_health_try_recover(struct mlx5_core_dev *dev)
326 {
327         mlx5_core_warn(dev, "handling bad device here\n");
328         mlx5_handle_bad_state(dev);
329         if (mlx5_health_wait_pci_up(dev)) {
330                 mlx5_core_err(dev, "health recovery flow aborted, PCI reads still not working\n");
331                 return -EIO;
332         }
333         mlx5_core_err(dev, "starting health recovery flow\n");
334         if (mlx5_recover_device(dev) || mlx5_health_check_fatal_sensors(dev)) {
335                 mlx5_core_err(dev, "health recovery failed\n");
336                 return -EIO;
337         }
338
339         mlx5_core_info(dev, "health recovery succeeded\n");
340         return 0;
341 }
342
343 static const char *hsynd_str(u8 synd)
344 {
345         switch (synd) {
346         case MLX5_INITIAL_SEG_HEALTH_SYNDROME_FW_INTERNAL_ERR:
347                 return "firmware internal error";
348         case MLX5_INITIAL_SEG_HEALTH_SYNDROME_DEAD_IRISC:
349                 return "irisc not responding";
350         case MLX5_INITIAL_SEG_HEALTH_SYNDROME_HW_FATAL_ERR:
351                 return "unrecoverable hardware error";
352         case MLX5_INITIAL_SEG_HEALTH_SYNDROME_FW_CRC_ERR:
353                 return "firmware CRC error";
354         case MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_FETCH_PCI_ERR:
355                 return "ICM fetch PCI error";
356         case MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_PAGE_ERR:
357                 return "HW fatal error\n";
358         case MLX5_INITIAL_SEG_HEALTH_SYNDROME_ASYNCHRONOUS_EQ_BUF_OVERRUN:
359                 return "async EQ buffer overrun";
360         case MLX5_INITIAL_SEG_HEALTH_SYNDROME_EQ_IN_ERR:
361                 return "EQ error";
362         case MLX5_INITIAL_SEG_HEALTH_SYNDROME_EQ_INV:
363                 return "Invalid EQ referenced";
364         case MLX5_INITIAL_SEG_HEALTH_SYNDROME_FFSER_ERR:
365                 return "FFSER error";
366         case MLX5_INITIAL_SEG_HEALTH_SYNDROME_HIGH_TEMP_ERR:
367                 return "High temperature";
368         case MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_PCI_POISONED_ERR:
369                 return "ICM fetch PCI data poisoned error";
370         default:
371                 return "unrecognized error";
372         }
373 }
374
375 static const char *mlx5_loglevel_str(int level)
376 {
377         switch (level) {
378         case LOGLEVEL_EMERG:
379                 return "EMERGENCY";
380         case LOGLEVEL_ALERT:
381                 return "ALERT";
382         case LOGLEVEL_CRIT:
383                 return "CRITICAL";
384         case LOGLEVEL_ERR:
385                 return "ERROR";
386         case LOGLEVEL_WARNING:
387                 return "WARNING";
388         case LOGLEVEL_NOTICE:
389                 return "NOTICE";
390         case LOGLEVEL_INFO:
391                 return "INFO";
392         case LOGLEVEL_DEBUG:
393                 return "DEBUG";
394         }
395         return "Unknown log level";
396 }
397
398 static int mlx5_health_get_severity(u8 rfr_severity)
399 {
400         return rfr_severity & MLX5_SEVERITY_VALID_MASK ?
401                rfr_severity & MLX5_SEVERITY_MASK : LOGLEVEL_ERR;
402 }
403
404 static void print_health_info(struct mlx5_core_dev *dev)
405 {
406         struct mlx5_core_health *health = &dev->priv.health;
407         struct health_buffer __iomem *h = health->health;
408         u8 rfr_severity;
409         int severity;
410         int i;
411
412         /* If the syndrome is 0, the device is OK and no need to print buffer */
413         if (!ioread8(&h->synd))
414                 return;
415
416         if (ioread32be(&h->fw_ver) == 0xFFFFFFFF) {
417                 mlx5_log(dev, LOGLEVEL_ERR, "PCI slot is unavailable\n");
418                 return;
419         }
420
421         rfr_severity = ioread8(&h->rfr_severity);
422         severity  = mlx5_health_get_severity(rfr_severity);
423         mlx5_log(dev, severity, "Health issue observed, %s, severity(%d) %s:\n",
424                  hsynd_str(ioread8(&h->synd)), severity, mlx5_loglevel_str(severity));
425
426         for (i = 0; i < ARRAY_SIZE(h->assert_var); i++)
427                 mlx5_log(dev, severity, "assert_var[%d] 0x%08x\n", i,
428                          ioread32be(h->assert_var + i));
429
430         mlx5_log(dev, severity, "assert_exit_ptr 0x%08x\n", ioread32be(&h->assert_exit_ptr));
431         mlx5_log(dev, severity, "assert_callra 0x%08x\n", ioread32be(&h->assert_callra));
432         mlx5_log(dev, severity, "fw_ver %d.%d.%d", fw_rev_maj(dev), fw_rev_min(dev),
433                  fw_rev_sub(dev));
434         mlx5_log(dev, severity, "time %u\n", ioread32be(&h->time));
435         mlx5_log(dev, severity, "hw_id 0x%08x\n", ioread32be(&h->hw_id));
436         mlx5_log(dev, severity, "rfr %d\n", mlx5_health_get_rfr(rfr_severity));
437         mlx5_log(dev, severity, "severity %d (%s)\n", severity, mlx5_loglevel_str(severity));
438         mlx5_log(dev, severity, "irisc_index %d\n", ioread8(&h->irisc_index));
439         mlx5_log(dev, severity, "synd 0x%x: %s\n", ioread8(&h->synd),
440                  hsynd_str(ioread8(&h->synd)));
441         mlx5_log(dev, severity, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd));
442         mlx5_log(dev, severity, "raw fw_ver 0x%08x\n", ioread32be(&h->fw_ver));
443 }
444
445 static int
446 mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
447                           struct devlink_fmsg *fmsg,
448                           struct netlink_ext_ack *extack)
449 {
450         struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
451         struct mlx5_core_health *health = &dev->priv.health;
452         struct health_buffer __iomem *h = health->health;
453         u8 synd = ioread8(&h->synd);
454
455         devlink_fmsg_u8_pair_put(fmsg, "Syndrome", synd);
456         if (!synd)
457                 return 0;
458
459         devlink_fmsg_string_pair_put(fmsg, "Description", hsynd_str(synd));
460
461         return 0;
462 }
463
464 struct mlx5_fw_reporter_ctx {
465         u8 err_synd;
466         int miss_counter;
467 };
468
469 static void
470 mlx5_fw_reporter_ctx_pairs_put(struct devlink_fmsg *fmsg,
471                                struct mlx5_fw_reporter_ctx *fw_reporter_ctx)
472 {
473         devlink_fmsg_u8_pair_put(fmsg, "syndrome", fw_reporter_ctx->err_synd);
474         devlink_fmsg_u32_pair_put(fmsg, "fw_miss_counter", fw_reporter_ctx->miss_counter);
475 }
476
477 static void
478 mlx5_fw_reporter_heath_buffer_data_put(struct mlx5_core_dev *dev,
479                                        struct devlink_fmsg *fmsg)
480 {
481         struct mlx5_core_health *health = &dev->priv.health;
482         struct health_buffer __iomem *h = health->health;
483         u8 rfr_severity;
484         int i;
485
486         if (!ioread8(&h->synd))
487                 return;
488
489         devlink_fmsg_pair_nest_start(fmsg, "health buffer");
490         devlink_fmsg_obj_nest_start(fmsg);
491         devlink_fmsg_arr_pair_nest_start(fmsg, "assert_var");
492         for (i = 0; i < ARRAY_SIZE(h->assert_var); i++)
493                 devlink_fmsg_u32_put(fmsg, ioread32be(h->assert_var + i));
494         devlink_fmsg_arr_pair_nest_end(fmsg);
495         devlink_fmsg_u32_pair_put(fmsg, "assert_exit_ptr",
496                                   ioread32be(&h->assert_exit_ptr));
497         devlink_fmsg_u32_pair_put(fmsg, "assert_callra",
498                                   ioread32be(&h->assert_callra));
499         devlink_fmsg_u32_pair_put(fmsg, "time", ioread32be(&h->time));
500         devlink_fmsg_u32_pair_put(fmsg, "hw_id", ioread32be(&h->hw_id));
501         rfr_severity = ioread8(&h->rfr_severity);
502         devlink_fmsg_u8_pair_put(fmsg, "rfr", mlx5_health_get_rfr(rfr_severity));
503         devlink_fmsg_u8_pair_put(fmsg, "severity", mlx5_health_get_severity(rfr_severity));
504         devlink_fmsg_u8_pair_put(fmsg, "irisc_index", ioread8(&h->irisc_index));
505         devlink_fmsg_u8_pair_put(fmsg, "synd", ioread8(&h->synd));
506         devlink_fmsg_u32_pair_put(fmsg, "ext_synd", ioread16be(&h->ext_synd));
507         devlink_fmsg_u32_pair_put(fmsg, "raw_fw_ver", ioread32be(&h->fw_ver));
508         devlink_fmsg_obj_nest_end(fmsg);
509         devlink_fmsg_pair_nest_end(fmsg);
510 }
511
512 static int
513 mlx5_fw_reporter_dump(struct devlink_health_reporter *reporter,
514                       struct devlink_fmsg *fmsg, void *priv_ctx,
515                       struct netlink_ext_ack *extack)
516 {
517         struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
518         int err;
519
520         err = mlx5_fw_tracer_trigger_core_dump_general(dev);
521         if (err)
522                 return err;
523
524         if (priv_ctx) {
525                 struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx;
526
527                 mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx);
528         }
529
530         mlx5_fw_reporter_heath_buffer_data_put(dev, fmsg);
531
532         return mlx5_fw_tracer_get_saved_traces_objects(dev->tracer, fmsg);
533 }
534
535 static void mlx5_fw_reporter_err_work(struct work_struct *work)
536 {
537         struct mlx5_fw_reporter_ctx fw_reporter_ctx;
538         struct mlx5_core_health *health;
539
540         health = container_of(work, struct mlx5_core_health, report_work);
541
542         if (IS_ERR_OR_NULL(health->fw_reporter))
543                 return;
544
545         fw_reporter_ctx.err_synd = health->synd;
546         fw_reporter_ctx.miss_counter = health->miss_counter;
547         if (fw_reporter_ctx.err_synd) {
548                 devlink_health_report(health->fw_reporter,
549                                       "FW syndrome reported", &fw_reporter_ctx);
550                 return;
551         }
552         if (fw_reporter_ctx.miss_counter)
553                 devlink_health_report(health->fw_reporter,
554                                       "FW miss counter reported",
555                                       &fw_reporter_ctx);
556 }
557
558 static const struct devlink_health_reporter_ops mlx5_fw_reporter_pf_ops = {
559                 .name = "fw",
560                 .diagnose = mlx5_fw_reporter_diagnose,
561                 .dump = mlx5_fw_reporter_dump,
562 };
563
564 static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = {
565                 .name = "fw",
566                 .diagnose = mlx5_fw_reporter_diagnose,
567 };
568
569 static int
570 mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter,
571                                void *priv_ctx,
572                                struct netlink_ext_ack *extack)
573 {
574         struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
575
576         return mlx5_health_try_recover(dev);
577 }
578
579 static int
580 mlx5_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter,
581                             struct devlink_fmsg *fmsg, void *priv_ctx,
582                             struct netlink_ext_ack *extack)
583 {
584         struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
585         u32 crdump_size = dev->priv.health.crdump_size;
586         u32 *cr_data;
587         int err;
588
589         if (!mlx5_core_is_pf(dev))
590                 return -EPERM;
591
592         cr_data = kvmalloc(crdump_size, GFP_KERNEL);
593         if (!cr_data)
594                 return -ENOMEM;
595         err = mlx5_crdump_collect(dev, cr_data);
596         if (err)
597                 goto free_data;
598
599         if (priv_ctx) {
600                 struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx;
601
602                 mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx);
603         }
604
605         devlink_fmsg_binary_pair_put(fmsg, "crdump_data", cr_data, crdump_size);
606
607 free_data:
608         kvfree(cr_data);
609         return err;
610 }
611
612 static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work)
613 {
614         struct mlx5_fw_reporter_ctx fw_reporter_ctx;
615         struct mlx5_core_health *health;
616         struct mlx5_core_dev *dev;
617         struct devlink *devlink;
618         struct mlx5_priv *priv;
619
620         health = container_of(work, struct mlx5_core_health, fatal_report_work);
621         priv = container_of(health, struct mlx5_priv, health);
622         dev = container_of(priv, struct mlx5_core_dev, priv);
623         devlink = priv_to_devlink(dev);
624
625         mutex_lock(&dev->intf_state_mutex);
626         if (test_bit(MLX5_DROP_HEALTH_WORK, &health->flags)) {
627                 mlx5_core_err(dev, "health works are not permitted at this stage\n");
628                 mutex_unlock(&dev->intf_state_mutex);
629                 return;
630         }
631         mutex_unlock(&dev->intf_state_mutex);
632         enter_error_state(dev, false);
633         if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) {
634                 devl_lock(devlink);
635                 if (mlx5_health_try_recover(dev))
636                         mlx5_core_err(dev, "health recovery failed\n");
637                 devl_unlock(devlink);
638                 return;
639         }
640         fw_reporter_ctx.err_synd = health->synd;
641         fw_reporter_ctx.miss_counter = health->miss_counter;
642         if (devlink_health_report(health->fw_fatal_reporter,
643                                   "FW fatal error reported", &fw_reporter_ctx) == -ECANCELED) {
644                 /* If recovery wasn't performed, due to grace period,
645                  * unload the driver. This ensures that the driver
646                  * closes all its resources and it is not subjected to
647                  * requests from the kernel.
648                  */
649                 mlx5_core_err(dev, "Driver is in error state. Unloading\n");
650                 mlx5_unload_one(dev, false);
651         }
652 }
653
654 static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_pf_ops = {
655                 .name = "fw_fatal",
656                 .recover = mlx5_fw_fatal_reporter_recover,
657                 .dump = mlx5_fw_fatal_reporter_dump,
658 };
659
660 static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = {
661                 .name = "fw_fatal",
662                 .recover = mlx5_fw_fatal_reporter_recover,
663 };
664
665 #define MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD 180000
666 #define MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD 60000
667 #define MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD 30000
668 #define MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD
669
670 void mlx5_fw_reporters_create(struct mlx5_core_dev *dev)
671 {
672         const struct devlink_health_reporter_ops *fw_fatal_ops;
673         struct mlx5_core_health *health = &dev->priv.health;
674         const struct devlink_health_reporter_ops *fw_ops;
675         struct devlink *devlink = priv_to_devlink(dev);
676         u64 grace_period;
677
678         fw_fatal_ops = &mlx5_fw_fatal_reporter_pf_ops;
679         fw_ops = &mlx5_fw_reporter_pf_ops;
680         if (mlx5_core_is_ecpf(dev)) {
681                 grace_period = MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD;
682         } else if (mlx5_core_is_pf(dev)) {
683                 grace_period = MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD;
684         } else {
685                 /* VF or SF */
686                 grace_period = MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD;
687                 fw_fatal_ops = &mlx5_fw_fatal_reporter_ops;
688                 fw_ops = &mlx5_fw_reporter_ops;
689         }
690
691         health->fw_reporter =
692                 devl_health_reporter_create(devlink, fw_ops, 0, dev);
693         if (IS_ERR(health->fw_reporter))
694                 mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n",
695                                PTR_ERR(health->fw_reporter));
696
697         health->fw_fatal_reporter =
698                 devl_health_reporter_create(devlink,
699                                             fw_fatal_ops,
700                                             grace_period,
701                                             dev);
702         if (IS_ERR(health->fw_fatal_reporter))
703                 mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %ld\n",
704                                PTR_ERR(health->fw_fatal_reporter));
705 }
706
707 static void mlx5_fw_reporters_destroy(struct mlx5_core_dev *dev)
708 {
709         struct mlx5_core_health *health = &dev->priv.health;
710
711         if (!IS_ERR_OR_NULL(health->fw_reporter))
712                 devlink_health_reporter_destroy(health->fw_reporter);
713
714         if (!IS_ERR_OR_NULL(health->fw_fatal_reporter))
715                 devlink_health_reporter_destroy(health->fw_fatal_reporter);
716 }
717
718 static unsigned long get_next_poll_jiffies(struct mlx5_core_dev *dev)
719 {
720         unsigned long next;
721
722         get_random_bytes(&next, sizeof(next));
723         next %= HZ;
724         next += jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, HEALTH_POLL_INTERVAL));
725
726         return next;
727 }
728
729 void mlx5_trigger_health_work(struct mlx5_core_dev *dev)
730 {
731         struct mlx5_core_health *health = &dev->priv.health;
732
733         if (!mlx5_dev_is_lightweight(dev))
734                 queue_work(health->wq, &health->fatal_report_work);
735 }
736
737 #define MLX5_MSEC_PER_HOUR (MSEC_PER_SEC * 60 * 60)
738 static void mlx5_health_log_ts_update(struct work_struct *work)
739 {
740         struct delayed_work *dwork = to_delayed_work(work);
741         u32 out[MLX5_ST_SZ_DW(mrtc_reg)] = {};
742         u32 in[MLX5_ST_SZ_DW(mrtc_reg)] = {};
743         struct mlx5_core_health *health;
744         struct mlx5_core_dev *dev;
745         struct mlx5_priv *priv;
746         u64 now_us;
747
748         health = container_of(dwork, struct mlx5_core_health, update_fw_log_ts_work);
749         priv = container_of(health, struct mlx5_priv, health);
750         dev = container_of(priv, struct mlx5_core_dev, priv);
751
752         now_us =  ktime_to_us(ktime_get_real());
753
754         MLX5_SET(mrtc_reg, in, time_h, now_us >> 32);
755         MLX5_SET(mrtc_reg, in, time_l, now_us & 0xFFFFFFFF);
756         mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MRTC, 0, 1);
757
758         queue_delayed_work(health->wq, &health->update_fw_log_ts_work,
759                            msecs_to_jiffies(MLX5_MSEC_PER_HOUR));
760 }
761
762 static void poll_health(struct timer_list *t)
763 {
764         struct mlx5_core_dev *dev = from_timer(dev, t, priv.health.timer);
765         struct mlx5_core_health *health = &dev->priv.health;
766         struct health_buffer __iomem *h = health->health;
767         u32 fatal_error;
768         u8 prev_synd;
769         u32 count;
770
771         if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
772                 goto out;
773
774         fatal_error = mlx5_health_check_fatal_sensors(dev);
775
776         if (fatal_error && !health->fatal_error) {
777                 mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error);
778                 dev->priv.health.fatal_error = fatal_error;
779                 print_health_info(dev);
780                 dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
781                 mlx5_trigger_health_work(dev);
782                 return;
783         }
784
785         count = ioread32be(health->health_counter);
786         if (count == health->prev)
787                 ++health->miss_counter;
788         else
789                 health->miss_counter = 0;
790
791         health->prev = count;
792         if (health->miss_counter == MAX_MISSES) {
793                 mlx5_core_err(dev, "device's health compromised - reached miss count\n");
794                 print_health_info(dev);
795                 queue_work(health->wq, &health->report_work);
796         }
797
798         prev_synd = health->synd;
799         health->synd = ioread8(&h->synd);
800         if (health->synd && health->synd != prev_synd)
801                 queue_work(health->wq, &health->report_work);
802
803 out:
804         mod_timer(&health->timer, get_next_poll_jiffies(dev));
805 }
806
807 void mlx5_start_health_poll(struct mlx5_core_dev *dev)
808 {
809         u64 poll_interval_ms =  mlx5_tout_ms(dev, HEALTH_POLL_INTERVAL);
810         struct mlx5_core_health *health = &dev->priv.health;
811
812         timer_setup(&health->timer, poll_health, 0);
813         health->fatal_error = MLX5_SENSOR_NO_ERR;
814         clear_bit(MLX5_DROP_HEALTH_WORK, &health->flags);
815         health->health = &dev->iseg->health;
816         health->health_counter = &dev->iseg->health_counter;
817
818         health->timer.expires = jiffies + msecs_to_jiffies(poll_interval_ms);
819         add_timer(&health->timer);
820 }
821
822 void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health)
823 {
824         struct mlx5_core_health *health = &dev->priv.health;
825
826         if (disable_health)
827                 set_bit(MLX5_DROP_HEALTH_WORK, &health->flags);
828
829         del_timer_sync(&health->timer);
830 }
831
832 void mlx5_start_health_fw_log_up(struct mlx5_core_dev *dev)
833 {
834         struct mlx5_core_health *health = &dev->priv.health;
835
836         if (mlx5_core_is_pf(dev) && MLX5_CAP_MCAM_REG(dev, mrtc))
837                 queue_delayed_work(health->wq, &health->update_fw_log_ts_work, 0);
838 }
839
840 void mlx5_drain_health_wq(struct mlx5_core_dev *dev)
841 {
842         struct mlx5_core_health *health = &dev->priv.health;
843
844         set_bit(MLX5_DROP_HEALTH_WORK, &health->flags);
845         cancel_delayed_work_sync(&health->update_fw_log_ts_work);
846         cancel_work_sync(&health->report_work);
847         cancel_work_sync(&health->fatal_report_work);
848 }
849
850 void mlx5_health_cleanup(struct mlx5_core_dev *dev)
851 {
852         struct mlx5_core_health *health = &dev->priv.health;
853
854         cancel_delayed_work_sync(&health->update_fw_log_ts_work);
855         destroy_workqueue(health->wq);
856         mlx5_reporter_vnic_destroy(dev);
857         mlx5_fw_reporters_destroy(dev);
858 }
859
860 int mlx5_health_init(struct mlx5_core_dev *dev)
861 {
862         struct devlink *devlink = priv_to_devlink(dev);
863         struct mlx5_core_health *health;
864         char *name;
865
866         if (!mlx5_dev_is_lightweight(dev)) {
867                 devl_lock(devlink);
868                 mlx5_fw_reporters_create(dev);
869                 devl_unlock(devlink);
870         }
871         mlx5_reporter_vnic_create(dev);
872
873         health = &dev->priv.health;
874         name = kmalloc(64, GFP_KERNEL);
875         if (!name)
876                 goto out_err;
877
878         strcpy(name, "mlx5_health");
879         strcat(name, dev_name(dev->device));
880         health->wq = create_singlethread_workqueue(name);
881         kfree(name);
882         if (!health->wq)
883                 goto out_err;
884         INIT_WORK(&health->fatal_report_work, mlx5_fw_fatal_reporter_err_work);
885         INIT_WORK(&health->report_work, mlx5_fw_reporter_err_work);
886         INIT_DELAYED_WORK(&health->update_fw_log_ts_work, mlx5_health_log_ts_update);
887
888         return 0;
889
890 out_err:
891         mlx5_reporter_vnic_destroy(dev);
892         mlx5_fw_reporters_destroy(dev);
893         return -ENOMEM;
894 }