Merge branch 'for-linus' into for-next
[linux-2.6-microblaze.git] / drivers / net / ethernet / mellanox / mlx5 / core / health.c
1 /*
2  * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32
33 #include <linux/kernel.h>
34 #include <linux/module.h>
35 #include <linux/random.h>
36 #include <linux/vmalloc.h>
37 #include <linux/hardirq.h>
38 #include <linux/mlx5/driver.h>
39 #include "mlx5_core.h"
40 #include "lib/eq.h"
41 #include "lib/mlx5.h"
42 #include "lib/pci_vsc.h"
43 #include "diag/fw_tracer.h"
44
45 enum {
46         MLX5_HEALTH_POLL_INTERVAL       = 2 * HZ,
47         MAX_MISSES                      = 3,
48 };
49
50 enum {
51         MLX5_HEALTH_SYNDR_FW_ERR                = 0x1,
52         MLX5_HEALTH_SYNDR_IRISC_ERR             = 0x7,
53         MLX5_HEALTH_SYNDR_HW_UNRECOVERABLE_ERR  = 0x8,
54         MLX5_HEALTH_SYNDR_CRC_ERR               = 0x9,
55         MLX5_HEALTH_SYNDR_FETCH_PCI_ERR         = 0xa,
56         MLX5_HEALTH_SYNDR_HW_FTL_ERR            = 0xb,
57         MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR  = 0xc,
58         MLX5_HEALTH_SYNDR_EQ_ERR                = 0xd,
59         MLX5_HEALTH_SYNDR_EQ_INV                = 0xe,
60         MLX5_HEALTH_SYNDR_FFSER_ERR             = 0xf,
61         MLX5_HEALTH_SYNDR_HIGH_TEMP             = 0x10
62 };
63
64 enum {
65         MLX5_DROP_NEW_HEALTH_WORK,
66 };
67
68 enum  {
69         MLX5_SENSOR_NO_ERR              = 0,
70         MLX5_SENSOR_PCI_COMM_ERR        = 1,
71         MLX5_SENSOR_PCI_ERR             = 2,
72         MLX5_SENSOR_NIC_DISABLED        = 3,
73         MLX5_SENSOR_NIC_SW_RESET        = 4,
74         MLX5_SENSOR_FW_SYND_RFR         = 5,
75 };
76
77 u8 mlx5_get_nic_state(struct mlx5_core_dev *dev)
78 {
79         return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 7;
80 }
81
82 void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state)
83 {
84         u32 cur_cmdq_addr_l_sz;
85
86         cur_cmdq_addr_l_sz = ioread32be(&dev->iseg->cmdq_addr_l_sz);
87         iowrite32be((cur_cmdq_addr_l_sz & 0xFFFFF000) |
88                     state << MLX5_NIC_IFC_OFFSET,
89                     &dev->iseg->cmdq_addr_l_sz);
90 }
91
92 static bool sensor_pci_not_working(struct mlx5_core_dev *dev)
93 {
94         struct mlx5_core_health *health = &dev->priv.health;
95         struct health_buffer __iomem *h = health->health;
96
97         /* Offline PCI reads return 0xffffffff */
98         return (ioread32be(&h->fw_ver) == 0xffffffff);
99 }
100
101 static bool sensor_fw_synd_rfr(struct mlx5_core_dev *dev)
102 {
103         struct mlx5_core_health *health = &dev->priv.health;
104         struct health_buffer __iomem *h = health->health;
105         u32 rfr = ioread32be(&h->rfr) >> MLX5_RFR_OFFSET;
106         u8 synd = ioread8(&h->synd);
107
108         if (rfr && synd)
109                 mlx5_core_dbg(dev, "FW requests reset, synd: %d\n", synd);
110         return rfr && synd;
111 }
112
113 u32 mlx5_health_check_fatal_sensors(struct mlx5_core_dev *dev)
114 {
115         if (sensor_pci_not_working(dev))
116                 return MLX5_SENSOR_PCI_COMM_ERR;
117         if (pci_channel_offline(dev->pdev))
118                 return MLX5_SENSOR_PCI_ERR;
119         if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
120                 return MLX5_SENSOR_NIC_DISABLED;
121         if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_SW_RESET)
122                 return MLX5_SENSOR_NIC_SW_RESET;
123         if (sensor_fw_synd_rfr(dev))
124                 return MLX5_SENSOR_FW_SYND_RFR;
125
126         return MLX5_SENSOR_NO_ERR;
127 }
128
129 static int lock_sem_sw_reset(struct mlx5_core_dev *dev, bool lock)
130 {
131         enum mlx5_vsc_state state;
132         int ret;
133
134         if (!mlx5_core_is_pf(dev))
135                 return -EBUSY;
136
137         /* Try to lock GW access, this stage doesn't return
138          * EBUSY because locked GW does not mean that other PF
139          * already started the reset.
140          */
141         ret = mlx5_vsc_gw_lock(dev);
142         if (ret == -EBUSY)
143                 return -EINVAL;
144         if (ret)
145                 return ret;
146
147         state = lock ? MLX5_VSC_LOCK : MLX5_VSC_UNLOCK;
148         /* At this stage, if the return status == EBUSY, then we know
149          * for sure that another PF started the reset, so don't allow
150          * another reset.
151          */
152         ret = mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, state);
153         if (ret)
154                 mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n");
155
156         /* Unlock GW access */
157         mlx5_vsc_gw_unlock(dev);
158
159         return ret;
160 }
161
162 static bool reset_fw_if_needed(struct mlx5_core_dev *dev)
163 {
164         bool supported = (ioread32be(&dev->iseg->initializing) >>
165                           MLX5_FW_RESET_SUPPORTED_OFFSET) & 1;
166         u32 fatal_error;
167
168         if (!supported)
169                 return false;
170
171         /* The reset only needs to be issued by one PF. The health buffer is
172          * shared between all functions, and will be cleared during a reset.
173          * Check again to avoid a redundant 2nd reset. If the fatal erros was
174          * PCI related a reset won't help.
175          */
176         fatal_error = mlx5_health_check_fatal_sensors(dev);
177         if (fatal_error == MLX5_SENSOR_PCI_COMM_ERR ||
178             fatal_error == MLX5_SENSOR_NIC_DISABLED ||
179             fatal_error == MLX5_SENSOR_NIC_SW_RESET) {
180                 mlx5_core_warn(dev, "Not issuing FW reset. Either it's already done or won't help.");
181                 return false;
182         }
183
184         mlx5_core_warn(dev, "Issuing FW Reset\n");
185         /* Write the NIC interface field to initiate the reset, the command
186          * interface address also resides here, don't overwrite it.
187          */
188         mlx5_set_nic_state(dev, MLX5_NIC_IFC_SW_RESET);
189
190         return true;
191 }
192
193 static void enter_error_state(struct mlx5_core_dev *dev, bool force)
194 {
195         if (mlx5_health_check_fatal_sensors(dev) || force) { /* protected state setting */
196                 dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
197                 mlx5_cmd_flush(dev);
198         }
199
200         mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1);
201 }
202
203 void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
204 {
205         bool err_detected = false;
206
207         /* Mark the device as fatal in order to abort FW commands */
208         if ((mlx5_health_check_fatal_sensors(dev) || force) &&
209             dev->state == MLX5_DEVICE_STATE_UP) {
210                 dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
211                 err_detected = true;
212         }
213         mutex_lock(&dev->intf_state_mutex);
214         if (!err_detected && dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
215                 goto unlock;/* a previous error is still being handled */
216         if (dev->state == MLX5_DEVICE_STATE_UNINITIALIZED) {
217                 dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
218                 goto unlock;
219         }
220
221         enter_error_state(dev, force);
222 unlock:
223         mutex_unlock(&dev->intf_state_mutex);
224 }
225
226 #define MLX5_CRDUMP_WAIT_MS     60000
227 #define MLX5_FW_RESET_WAIT_MS   1000
228 void mlx5_error_sw_reset(struct mlx5_core_dev *dev)
229 {
230         unsigned long end, delay_ms = MLX5_FW_RESET_WAIT_MS;
231         int lock = -EBUSY;
232
233         mutex_lock(&dev->intf_state_mutex);
234         if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)
235                 goto unlock;
236
237         mlx5_core_err(dev, "start\n");
238
239         if (mlx5_health_check_fatal_sensors(dev) == MLX5_SENSOR_FW_SYND_RFR) {
240                 /* Get cr-dump and reset FW semaphore */
241                 lock = lock_sem_sw_reset(dev, true);
242
243                 if (lock == -EBUSY) {
244                         delay_ms = MLX5_CRDUMP_WAIT_MS;
245                         goto recover_from_sw_reset;
246                 }
247                 /* Execute SW reset */
248                 reset_fw_if_needed(dev);
249         }
250
251 recover_from_sw_reset:
252         /* Recover from SW reset */
253         end = jiffies + msecs_to_jiffies(delay_ms);
254         do {
255                 if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
256                         break;
257
258                 msleep(20);
259         } while (!time_after(jiffies, end));
260
261         if (mlx5_get_nic_state(dev) != MLX5_NIC_IFC_DISABLED) {
262                 dev_err(&dev->pdev->dev, "NIC IFC still %d after %lums.\n",
263                         mlx5_get_nic_state(dev), delay_ms);
264         }
265
266         /* Release FW semaphore if you are the lock owner */
267         if (!lock)
268                 lock_sem_sw_reset(dev, false);
269
270         mlx5_core_err(dev, "end\n");
271
272 unlock:
273         mutex_unlock(&dev->intf_state_mutex);
274 }
275
276 static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
277 {
278         u8 nic_interface = mlx5_get_nic_state(dev);
279
280         switch (nic_interface) {
281         case MLX5_NIC_IFC_FULL:
282                 mlx5_core_warn(dev, "Expected to see disabled NIC but it is full driver\n");
283                 break;
284
285         case MLX5_NIC_IFC_DISABLED:
286                 mlx5_core_warn(dev, "starting teardown\n");
287                 break;
288
289         case MLX5_NIC_IFC_NO_DRAM_NIC:
290                 mlx5_core_warn(dev, "Expected to see disabled NIC but it is no dram nic\n");
291                 break;
292
293         case MLX5_NIC_IFC_SW_RESET:
294                 /* The IFC mode field is 3 bits, so it will read 0x7 in 2 cases:
295                  * 1. PCI has been disabled (ie. PCI-AER, PF driver unloaded
296                  *    and this is a VF), this is not recoverable by SW reset.
297                  *    Logging of this is handled elsewhere.
298                  * 2. FW reset has been issued by another function, driver can
299                  *    be reloaded to recover after the mode switches to
300                  *    MLX5_NIC_IFC_DISABLED.
301                  */
302                 if (dev->priv.health.fatal_error != MLX5_SENSOR_PCI_COMM_ERR)
303                         mlx5_core_warn(dev, "NIC SW reset in progress\n");
304                 break;
305
306         default:
307                 mlx5_core_warn(dev, "Expected to see disabled NIC but it is has invalid value %d\n",
308                                nic_interface);
309         }
310
311         mlx5_disable_device(dev);
312 }
313
314 /* How much time to wait until health resetting the driver (in msecs) */
315 #define MLX5_RECOVERY_WAIT_MSECS 60000
316 int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev)
317 {
318         unsigned long end;
319
320         end = jiffies + msecs_to_jiffies(MLX5_RECOVERY_WAIT_MSECS);
321         while (sensor_pci_not_working(dev)) {
322                 if (time_after(jiffies, end))
323                         return -ETIMEDOUT;
324                 msleep(100);
325         }
326         return 0;
327 }
328
329 static int mlx5_health_try_recover(struct mlx5_core_dev *dev)
330 {
331         mlx5_core_warn(dev, "handling bad device here\n");
332         mlx5_handle_bad_state(dev);
333         if (mlx5_health_wait_pci_up(dev)) {
334                 mlx5_core_err(dev, "health recovery flow aborted, PCI reads still not working\n");
335                 return -EIO;
336         }
337         mlx5_core_err(dev, "starting health recovery flow\n");
338         if (mlx5_recover_device(dev) || mlx5_health_check_fatal_sensors(dev)) {
339                 mlx5_core_err(dev, "health recovery failed\n");
340                 return -EIO;
341         }
342
343         mlx5_core_info(dev, "health recovery succeeded\n");
344         return 0;
345 }
346
347 static const char *hsynd_str(u8 synd)
348 {
349         switch (synd) {
350         case MLX5_HEALTH_SYNDR_FW_ERR:
351                 return "firmware internal error";
352         case MLX5_HEALTH_SYNDR_IRISC_ERR:
353                 return "irisc not responding";
354         case MLX5_HEALTH_SYNDR_HW_UNRECOVERABLE_ERR:
355                 return "unrecoverable hardware error";
356         case MLX5_HEALTH_SYNDR_CRC_ERR:
357                 return "firmware CRC error";
358         case MLX5_HEALTH_SYNDR_FETCH_PCI_ERR:
359                 return "ICM fetch PCI error";
360         case MLX5_HEALTH_SYNDR_HW_FTL_ERR:
361                 return "HW fatal error\n";
362         case MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR:
363                 return "async EQ buffer overrun";
364         case MLX5_HEALTH_SYNDR_EQ_ERR:
365                 return "EQ error";
366         case MLX5_HEALTH_SYNDR_EQ_INV:
367                 return "Invalid EQ referenced";
368         case MLX5_HEALTH_SYNDR_FFSER_ERR:
369                 return "FFSER error";
370         case MLX5_HEALTH_SYNDR_HIGH_TEMP:
371                 return "High temperature";
372         default:
373                 return "unrecognized error";
374         }
375 }
376
377 static void print_health_info(struct mlx5_core_dev *dev)
378 {
379         struct mlx5_core_health *health = &dev->priv.health;
380         struct health_buffer __iomem *h = health->health;
381         char fw_str[18];
382         u32 fw;
383         int i;
384
385         /* If the syndrome is 0, the device is OK and no need to print buffer */
386         if (!ioread8(&h->synd))
387                 return;
388
389         for (i = 0; i < ARRAY_SIZE(h->assert_var); i++)
390                 mlx5_core_err(dev, "assert_var[%d] 0x%08x\n", i,
391                               ioread32be(h->assert_var + i));
392
393         mlx5_core_err(dev, "assert_exit_ptr 0x%08x\n",
394                       ioread32be(&h->assert_exit_ptr));
395         mlx5_core_err(dev, "assert_callra 0x%08x\n",
396                       ioread32be(&h->assert_callra));
397         sprintf(fw_str, "%d.%d.%d", fw_rev_maj(dev), fw_rev_min(dev), fw_rev_sub(dev));
398         mlx5_core_err(dev, "fw_ver %s\n", fw_str);
399         mlx5_core_err(dev, "hw_id 0x%08x\n", ioread32be(&h->hw_id));
400         mlx5_core_err(dev, "irisc_index %d\n", ioread8(&h->irisc_index));
401         mlx5_core_err(dev, "synd 0x%x: %s\n", ioread8(&h->synd),
402                       hsynd_str(ioread8(&h->synd)));
403         mlx5_core_err(dev, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd));
404         fw = ioread32be(&h->fw_ver);
405         mlx5_core_err(dev, "raw fw_ver 0x%08x\n", fw);
406 }
407
408 static int
409 mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
410                           struct devlink_fmsg *fmsg,
411                           struct netlink_ext_ack *extack)
412 {
413         struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
414         struct mlx5_core_health *health = &dev->priv.health;
415         struct health_buffer __iomem *h = health->health;
416         u8 synd;
417         int err;
418
419         synd = ioread8(&h->synd);
420         err = devlink_fmsg_u8_pair_put(fmsg, "Syndrome", synd);
421         if (err || !synd)
422                 return err;
423         return devlink_fmsg_string_pair_put(fmsg, "Description", hsynd_str(synd));
424 }
425
426 struct mlx5_fw_reporter_ctx {
427         u8 err_synd;
428         int miss_counter;
429 };
430
431 static int
432 mlx5_fw_reporter_ctx_pairs_put(struct devlink_fmsg *fmsg,
433                                struct mlx5_fw_reporter_ctx *fw_reporter_ctx)
434 {
435         int err;
436
437         err = devlink_fmsg_u8_pair_put(fmsg, "syndrome",
438                                        fw_reporter_ctx->err_synd);
439         if (err)
440                 return err;
441         err = devlink_fmsg_u32_pair_put(fmsg, "fw_miss_counter",
442                                         fw_reporter_ctx->miss_counter);
443         if (err)
444                 return err;
445         return 0;
446 }
447
448 static int
449 mlx5_fw_reporter_heath_buffer_data_put(struct mlx5_core_dev *dev,
450                                        struct devlink_fmsg *fmsg)
451 {
452         struct mlx5_core_health *health = &dev->priv.health;
453         struct health_buffer __iomem *h = health->health;
454         int err;
455         int i;
456
457         if (!ioread8(&h->synd))
458                 return 0;
459
460         err = devlink_fmsg_pair_nest_start(fmsg, "health buffer");
461         if (err)
462                 return err;
463         err = devlink_fmsg_obj_nest_start(fmsg);
464         if (err)
465                 return err;
466         err = devlink_fmsg_arr_pair_nest_start(fmsg, "assert_var");
467         if (err)
468                 return err;
469
470         for (i = 0; i < ARRAY_SIZE(h->assert_var); i++) {
471                 err = devlink_fmsg_u32_put(fmsg, ioread32be(h->assert_var + i));
472                 if (err)
473                         return err;
474         }
475         err = devlink_fmsg_arr_pair_nest_end(fmsg);
476         if (err)
477                 return err;
478         err = devlink_fmsg_u32_pair_put(fmsg, "assert_exit_ptr",
479                                         ioread32be(&h->assert_exit_ptr));
480         if (err)
481                 return err;
482         err = devlink_fmsg_u32_pair_put(fmsg, "assert_callra",
483                                         ioread32be(&h->assert_callra));
484         if (err)
485                 return err;
486         err = devlink_fmsg_u32_pair_put(fmsg, "hw_id", ioread32be(&h->hw_id));
487         if (err)
488                 return err;
489         err = devlink_fmsg_u8_pair_put(fmsg, "irisc_index",
490                                        ioread8(&h->irisc_index));
491         if (err)
492                 return err;
493         err = devlink_fmsg_u8_pair_put(fmsg, "synd", ioread8(&h->synd));
494         if (err)
495                 return err;
496         err = devlink_fmsg_u32_pair_put(fmsg, "ext_synd",
497                                         ioread16be(&h->ext_synd));
498         if (err)
499                 return err;
500         err = devlink_fmsg_u32_pair_put(fmsg, "raw_fw_ver",
501                                         ioread32be(&h->fw_ver));
502         if (err)
503                 return err;
504         err = devlink_fmsg_obj_nest_end(fmsg);
505         if (err)
506                 return err;
507         return devlink_fmsg_pair_nest_end(fmsg);
508 }
509
510 static int
511 mlx5_fw_reporter_dump(struct devlink_health_reporter *reporter,
512                       struct devlink_fmsg *fmsg, void *priv_ctx,
513                       struct netlink_ext_ack *extack)
514 {
515         struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
516         int err;
517
518         err = mlx5_fw_tracer_trigger_core_dump_general(dev);
519         if (err)
520                 return err;
521
522         if (priv_ctx) {
523                 struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx;
524
525                 err = mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx);
526                 if (err)
527                         return err;
528         }
529
530         err = mlx5_fw_reporter_heath_buffer_data_put(dev, fmsg);
531         if (err)
532                 return err;
533         return mlx5_fw_tracer_get_saved_traces_objects(dev->tracer, fmsg);
534 }
535
536 static void mlx5_fw_reporter_err_work(struct work_struct *work)
537 {
538         struct mlx5_fw_reporter_ctx fw_reporter_ctx;
539         struct mlx5_core_health *health;
540
541         health = container_of(work, struct mlx5_core_health, report_work);
542
543         if (IS_ERR_OR_NULL(health->fw_reporter))
544                 return;
545
546         fw_reporter_ctx.err_synd = health->synd;
547         fw_reporter_ctx.miss_counter = health->miss_counter;
548         if (fw_reporter_ctx.err_synd) {
549                 devlink_health_report(health->fw_reporter,
550                                       "FW syndrom reported", &fw_reporter_ctx);
551                 return;
552         }
553         if (fw_reporter_ctx.miss_counter)
554                 devlink_health_report(health->fw_reporter,
555                                       "FW miss counter reported",
556                                       &fw_reporter_ctx);
557 }
558
559 static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = {
560                 .name = "fw",
561                 .diagnose = mlx5_fw_reporter_diagnose,
562                 .dump = mlx5_fw_reporter_dump,
563 };
564
565 static int
566 mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter,
567                                void *priv_ctx,
568                                struct netlink_ext_ack *extack)
569 {
570         struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
571
572         return mlx5_health_try_recover(dev);
573 }
574
575 static int
576 mlx5_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter,
577                             struct devlink_fmsg *fmsg, void *priv_ctx,
578                             struct netlink_ext_ack *extack)
579 {
580         struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
581         u32 crdump_size = dev->priv.health.crdump_size;
582         u32 *cr_data;
583         int err;
584
585         if (!mlx5_core_is_pf(dev))
586                 return -EPERM;
587
588         cr_data = kvmalloc(crdump_size, GFP_KERNEL);
589         if (!cr_data)
590                 return -ENOMEM;
591         err = mlx5_crdump_collect(dev, cr_data);
592         if (err)
593                 goto free_data;
594
595         if (priv_ctx) {
596                 struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx;
597
598                 err = mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx);
599                 if (err)
600                         goto free_data;
601         }
602
603         err = devlink_fmsg_binary_pair_put(fmsg, "crdump_data", cr_data, crdump_size);
604
605 free_data:
606         kvfree(cr_data);
607         return err;
608 }
609
610 static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work)
611 {
612         struct mlx5_fw_reporter_ctx fw_reporter_ctx;
613         struct mlx5_core_health *health;
614         struct mlx5_core_dev *dev;
615         struct mlx5_priv *priv;
616
617         health = container_of(work, struct mlx5_core_health, fatal_report_work);
618         priv = container_of(health, struct mlx5_priv, health);
619         dev = container_of(priv, struct mlx5_core_dev, priv);
620
621         enter_error_state(dev, false);
622         if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) {
623                 if (mlx5_health_try_recover(dev))
624                         mlx5_core_err(dev, "health recovery failed\n");
625                 return;
626         }
627         fw_reporter_ctx.err_synd = health->synd;
628         fw_reporter_ctx.miss_counter = health->miss_counter;
629         devlink_health_report(health->fw_fatal_reporter,
630                               "FW fatal error reported", &fw_reporter_ctx);
631 }
632
633 static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = {
634                 .name = "fw_fatal",
635                 .recover = mlx5_fw_fatal_reporter_recover,
636                 .dump = mlx5_fw_fatal_reporter_dump,
637 };
638
639 #define MLX5_REPORTER_FW_GRACEFUL_PERIOD 1200000
640 static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev)
641 {
642         struct mlx5_core_health *health = &dev->priv.health;
643         struct devlink *devlink = priv_to_devlink(dev);
644
645         health->fw_reporter =
646                 devlink_health_reporter_create(devlink, &mlx5_fw_reporter_ops,
647                                                0, dev);
648         if (IS_ERR(health->fw_reporter))
649                 mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n",
650                                PTR_ERR(health->fw_reporter));
651
652         health->fw_fatal_reporter =
653                 devlink_health_reporter_create(devlink,
654                                                &mlx5_fw_fatal_reporter_ops,
655                                                MLX5_REPORTER_FW_GRACEFUL_PERIOD,
656                                                dev);
657         if (IS_ERR(health->fw_fatal_reporter))
658                 mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %ld\n",
659                                PTR_ERR(health->fw_fatal_reporter));
660 }
661
662 static void mlx5_fw_reporters_destroy(struct mlx5_core_dev *dev)
663 {
664         struct mlx5_core_health *health = &dev->priv.health;
665
666         if (!IS_ERR_OR_NULL(health->fw_reporter))
667                 devlink_health_reporter_destroy(health->fw_reporter);
668
669         if (!IS_ERR_OR_NULL(health->fw_fatal_reporter))
670                 devlink_health_reporter_destroy(health->fw_fatal_reporter);
671 }
672
673 static unsigned long get_next_poll_jiffies(void)
674 {
675         unsigned long next;
676
677         get_random_bytes(&next, sizeof(next));
678         next %= HZ;
679         next += jiffies + MLX5_HEALTH_POLL_INTERVAL;
680
681         return next;
682 }
683
684 void mlx5_trigger_health_work(struct mlx5_core_dev *dev)
685 {
686         struct mlx5_core_health *health = &dev->priv.health;
687         unsigned long flags;
688
689         spin_lock_irqsave(&health->wq_lock, flags);
690         if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags))
691                 queue_work(health->wq, &health->fatal_report_work);
692         else
693                 mlx5_core_err(dev, "new health works are not permitted at this stage\n");
694         spin_unlock_irqrestore(&health->wq_lock, flags);
695 }
696
697 static void poll_health(struct timer_list *t)
698 {
699         struct mlx5_core_dev *dev = from_timer(dev, t, priv.health.timer);
700         struct mlx5_core_health *health = &dev->priv.health;
701         struct health_buffer __iomem *h = health->health;
702         u32 fatal_error;
703         u8 prev_synd;
704         u32 count;
705
706         if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
707                 goto out;
708
709         fatal_error = mlx5_health_check_fatal_sensors(dev);
710
711         if (fatal_error && !health->fatal_error) {
712                 mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error);
713                 dev->priv.health.fatal_error = fatal_error;
714                 print_health_info(dev);
715                 dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
716                 mlx5_trigger_health_work(dev);
717                 return;
718         }
719
720         count = ioread32be(health->health_counter);
721         if (count == health->prev)
722                 ++health->miss_counter;
723         else
724                 health->miss_counter = 0;
725
726         health->prev = count;
727         if (health->miss_counter == MAX_MISSES) {
728                 mlx5_core_err(dev, "device's health compromised - reached miss count\n");
729                 print_health_info(dev);
730                 queue_work(health->wq, &health->report_work);
731         }
732
733         prev_synd = health->synd;
734         health->synd = ioread8(&h->synd);
735         if (health->synd && health->synd != prev_synd)
736                 queue_work(health->wq, &health->report_work);
737
738 out:
739         mod_timer(&health->timer, get_next_poll_jiffies());
740 }
741
742 void mlx5_start_health_poll(struct mlx5_core_dev *dev)
743 {
744         struct mlx5_core_health *health = &dev->priv.health;
745
746         timer_setup(&health->timer, poll_health, 0);
747         health->fatal_error = MLX5_SENSOR_NO_ERR;
748         clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
749         health->health = &dev->iseg->health;
750         health->health_counter = &dev->iseg->health_counter;
751
752         health->timer.expires = round_jiffies(jiffies + MLX5_HEALTH_POLL_INTERVAL);
753         add_timer(&health->timer);
754 }
755
756 void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health)
757 {
758         struct mlx5_core_health *health = &dev->priv.health;
759         unsigned long flags;
760
761         if (disable_health) {
762                 spin_lock_irqsave(&health->wq_lock, flags);
763                 set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
764                 spin_unlock_irqrestore(&health->wq_lock, flags);
765         }
766
767         del_timer_sync(&health->timer);
768 }
769
770 void mlx5_drain_health_wq(struct mlx5_core_dev *dev)
771 {
772         struct mlx5_core_health *health = &dev->priv.health;
773         unsigned long flags;
774
775         spin_lock_irqsave(&health->wq_lock, flags);
776         set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
777         spin_unlock_irqrestore(&health->wq_lock, flags);
778         cancel_work_sync(&health->report_work);
779         cancel_work_sync(&health->fatal_report_work);
780 }
781
782 void mlx5_health_flush(struct mlx5_core_dev *dev)
783 {
784         struct mlx5_core_health *health = &dev->priv.health;
785
786         flush_workqueue(health->wq);
787 }
788
789 void mlx5_health_cleanup(struct mlx5_core_dev *dev)
790 {
791         struct mlx5_core_health *health = &dev->priv.health;
792
793         destroy_workqueue(health->wq);
794         mlx5_fw_reporters_destroy(dev);
795 }
796
797 int mlx5_health_init(struct mlx5_core_dev *dev)
798 {
799         struct mlx5_core_health *health;
800         char *name;
801
802         mlx5_fw_reporters_create(dev);
803
804         health = &dev->priv.health;
805         name = kmalloc(64, GFP_KERNEL);
806         if (!name)
807                 goto out_err;
808
809         strcpy(name, "mlx5_health");
810         strcat(name, dev_name(dev->device));
811         health->wq = create_singlethread_workqueue(name);
812         kfree(name);
813         if (!health->wq)
814                 goto out_err;
815         spin_lock_init(&health->wq_lock);
816         INIT_WORK(&health->fatal_report_work, mlx5_fw_fatal_reporter_err_work);
817         INIT_WORK(&health->report_work, mlx5_fw_reporter_err_work);
818
819         return 0;
820
821 out_err:
822         mlx5_fw_reporters_destroy(dev);
823         return -ENOMEM;
824 }