devlink: Add health report functionality
authorEran Ben Elisha <eranbe@mellanox.com>
Thu, 7 Feb 2019 09:36:34 +0000 (11:36 +0200)
committerDavid S. Miller <davem@davemloft.net>
Thu, 7 Feb 2019 18:34:28 +0000 (10:34 -0800)
Upon error discover, every driver can report it to the devlink health
mechanism via devlink_health_report function, using the appropriate
reporter registered to it. Driver can pass error specific context which
will be delivered to it as part of the dump / recovery callbacks.

Once an error is reported, devlink health will do the following actions:
* A log is being send to the kernel trace events buffer
* Health status and statistics are being updated for the reporter instance
* Object dump is being taken and stored at the reporter instance (as long
  as there is no other dump which is already stored)
* Auto recovery attempt is being done. Depends on:
  - Auto Recovery configuration
  - Grace period vs. Time since last recover

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/net/devlink.h
include/trace/events/devlink.h
net/core/devlink.c

index 3dfe302..c12ad6e 100644 (file)
@@ -704,6 +704,8 @@ devlink_health_reporter_destroy(struct devlink_health_reporter *reporter);
 
 void *
 devlink_health_reporter_priv(struct devlink_health_reporter *reporter);
+int devlink_health_report(struct devlink_health_reporter *reporter,
+                         const char *msg, void *priv_ctx);
 
 #else
 
@@ -1173,6 +1175,13 @@ devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
 {
        return NULL;
 }
+
+static inline int
+devlink_health_report(struct devlink_health_reporter *reporter,
+                     const char *msg, void *priv_ctx)
+{
+       return 0;
+}
 #endif
 
 #if IS_REACHABLE(CONFIG_NET_DEVLINK)
index 4070536..191ddf6 100644 (file)
@@ -75,6 +75,71 @@ TRACE_EVENT(devlink_hwerr,
                        __get_str(driver_name), __entry->err, __get_str(msg))
 );
 
+/*
+ * Tracepoint for devlink health message:
+ */
+TRACE_EVENT(devlink_health_report,
+       TP_PROTO(const struct devlink *devlink, const char *reporter_name,
+                const char *msg),
+
+       TP_ARGS(devlink, reporter_name, msg),
+
+       TP_STRUCT__entry(
+               __string(bus_name, devlink->dev->bus->name)
+               __string(dev_name, dev_name(devlink->dev))
+               __string(driver_name, devlink->dev->driver->name)
+               __string(reporter_name, msg)
+               __string(msg, msg)
+       ),
+
+       TP_fast_assign(
+               __assign_str(bus_name, devlink->dev->bus->name);
+               __assign_str(dev_name, dev_name(devlink->dev));
+               __assign_str(driver_name, devlink->dev->driver->name);
+               __assign_str(reporter_name, reporter_name);
+               __assign_str(msg, msg);
+       ),
+
+       TP_printk("bus_name=%s dev_name=%s driver_name=%s reporter_name=%s: %s",
+                 __get_str(bus_name), __get_str(dev_name),
+                 __get_str(driver_name), __get_str(reporter_name),
+                 __get_str(msg))
+);
+
+/*
+ * Tracepoint for devlink health recover aborted message:
+ */
+TRACE_EVENT(devlink_health_recover_aborted,
+       TP_PROTO(const struct devlink *devlink, const char *reporter_name,
+                bool health_state, u64 time_since_last_recover),
+
+       TP_ARGS(devlink, reporter_name, health_state, time_since_last_recover),
+
+       TP_STRUCT__entry(
+               __string(bus_name, devlink->dev->bus->name)
+               __string(dev_name, dev_name(devlink->dev))
+               __string(driver_name, devlink->dev->driver->name)
+               __string(reporter_name, reporter_name)
+               __field(bool, health_state)
+               __field(u64, time_since_last_recover)
+       ),
+
+       TP_fast_assign(
+               __assign_str(bus_name, devlink->dev->bus->name);
+               __assign_str(dev_name, dev_name(devlink->dev));
+               __assign_str(driver_name, devlink->dev->driver->name);
+               __assign_str(reporter_name, reporter_name);
+               __entry->health_state = health_state;
+               __entry->time_since_last_recover = time_since_last_recover;
+       ),
+
+       TP_printk("bus_name=%s dev_name=%s driver_name=%s reporter_name=%s: health_state=%d time_since_last_recover=%llu recover aborted",
+                 __get_str(bus_name), __get_str(dev_name),
+                 __get_str(driver_name), __get_str(reporter_name),
+                 __entry->health_state,
+                 __entry->time_since_last_recover)
+);
+
 #endif /* _TRACE_DEVLINK_H */
 
 /* This part must be outside protection */
index 341548d..3eaa290 100644 (file)
@@ -4367,9 +4367,20 @@ struct devlink_health_reporter {
        void *priv;
        const struct devlink_health_reporter_ops *ops;
        struct devlink *devlink;
+       struct devlink_fmsg *dump_fmsg;
+       struct mutex dump_lock; /* lock parallel read/write from dump buffers */
        u64 graceful_period;
        bool auto_recover;
        u8 health_state;
+       u64 dump_ts;
+       u64 error_count;
+       u64 recovery_count;
+       u64 last_recovery_ts;
+};
+
+enum devlink_health_reporter_state {
+       DEVLINK_HEALTH_REPORTER_STATE_HEALTHY,
+       DEVLINK_HEALTH_REPORTER_STATE_ERROR,
 };
 
 void *
@@ -4431,6 +4442,7 @@ devlink_health_reporter_create(struct devlink *devlink,
        reporter->devlink = devlink;
        reporter->graceful_period = graceful_period;
        reporter->auto_recover = auto_recover;
+       mutex_init(&reporter->dump_lock);
        list_add_tail(&reporter->list, &devlink->reporter_list);
 unlock:
        mutex_unlock(&devlink->lock);
@@ -4449,10 +4461,117 @@ devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
        mutex_lock(&reporter->devlink->lock);
        list_del(&reporter->list);
        mutex_unlock(&reporter->devlink->lock);
+       if (reporter->dump_fmsg)
+               devlink_fmsg_free(reporter->dump_fmsg);
        kfree(reporter);
 }
 EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
 
+static int
+devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
+                               void *priv_ctx)
+{
+       int err;
+
+       if (!reporter->ops->recover)
+               return -EOPNOTSUPP;
+
+       err = reporter->ops->recover(reporter, priv_ctx);
+       if (err)
+               return err;
+
+       reporter->recovery_count++;
+       reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
+       reporter->last_recovery_ts = jiffies;
+
+       return 0;
+}
+
+static void
+devlink_health_dump_clear(struct devlink_health_reporter *reporter)
+{
+       if (!reporter->dump_fmsg)
+               return;
+       devlink_fmsg_free(reporter->dump_fmsg);
+       reporter->dump_fmsg = NULL;
+}
+
+static int devlink_health_do_dump(struct devlink_health_reporter *reporter,
+                                 void *priv_ctx)
+{
+       int err;
+
+       if (!reporter->ops->dump)
+               return 0;
+
+       if (reporter->dump_fmsg)
+               return 0;
+
+       reporter->dump_fmsg = devlink_fmsg_alloc();
+       if (!reporter->dump_fmsg) {
+               err = -ENOMEM;
+               return err;
+       }
+
+       err = devlink_fmsg_obj_nest_start(reporter->dump_fmsg);
+       if (err)
+               goto dump_err;
+
+       err = reporter->ops->dump(reporter, reporter->dump_fmsg,
+                                 priv_ctx);
+       if (err)
+               goto dump_err;
+
+       err = devlink_fmsg_obj_nest_end(reporter->dump_fmsg);
+       if (err)
+               goto dump_err;
+
+       reporter->dump_ts = jiffies;
+
+       return 0;
+
+dump_err:
+       devlink_health_dump_clear(reporter);
+       return err;
+}
+
+int devlink_health_report(struct devlink_health_reporter *reporter,
+                         const char *msg, void *priv_ctx)
+{
+       struct devlink *devlink = reporter->devlink;
+
+       /* write a log message of the current error */
+       WARN_ON(!msg);
+       trace_devlink_health_report(devlink, reporter->ops->name, msg);
+       reporter->error_count++;
+
+       /* abort if the previous error wasn't recovered */
+       if (reporter->auto_recover &&
+           (reporter->health_state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY ||
+            jiffies - reporter->last_recovery_ts <
+            msecs_to_jiffies(reporter->graceful_period))) {
+               trace_devlink_health_recover_aborted(devlink,
+                                                    reporter->ops->name,
+                                                    reporter->health_state,
+                                                    jiffies -
+                                                    reporter->last_recovery_ts);
+               return -ECANCELED;
+       }
+
+       reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR;
+
+       mutex_lock(&reporter->dump_lock);
+       /* store current dump of current error, for later analysis */
+       devlink_health_do_dump(reporter, priv_ctx);
+       mutex_unlock(&reporter->dump_lock);
+
+       if (reporter->auto_recover)
+               return devlink_health_reporter_recover(reporter, priv_ctx);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_health_report);
+
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
        [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
        [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },