powerpc/papr_scm: Fetch nvdimm health information from PHYP
[linux-2.6-microblaze.git] / arch / powerpc / platforms / pseries / papr_scm.c
index f355924..0c09162 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/libnvdimm.h>
 #include <linux/platform_device.h>
 #include <linux/delay.h>
+#include <linux/seq_buf.h>
 
 #include <asm/plpar_wrappers.h>
 
         (1ul << ND_CMD_GET_CONFIG_DATA) | \
         (1ul << ND_CMD_SET_CONFIG_DATA))
 
+/* DIMM health bitmap bitmap indicators */
+/* SCM device is unable to persist memory contents */
+#define PAPR_PMEM_UNARMED                   (1ULL << (63 - 0))
+/* SCM device failed to persist memory contents */
+#define PAPR_PMEM_SHUTDOWN_DIRTY            (1ULL << (63 - 1))
+/* SCM device contents are persisted from previous IPL */
+#define PAPR_PMEM_SHUTDOWN_CLEAN            (1ULL << (63 - 2))
+/* SCM device contents are not persisted from previous IPL */
+#define PAPR_PMEM_EMPTY                     (1ULL << (63 - 3))
+/* SCM device memory life remaining is critically low */
+#define PAPR_PMEM_HEALTH_CRITICAL           (1ULL << (63 - 4))
+/* SCM device will be garded off next IPL due to failure */
+#define PAPR_PMEM_HEALTH_FATAL              (1ULL << (63 - 5))
+/* SCM contents cannot persist due to current platform health status */
+#define PAPR_PMEM_HEALTH_UNHEALTHY          (1ULL << (63 - 6))
+/* SCM device is unable to persist memory contents in certain conditions */
+#define PAPR_PMEM_HEALTH_NON_CRITICAL       (1ULL << (63 - 7))
+/* SCM device is encrypted */
+#define PAPR_PMEM_ENCRYPTED                 (1ULL << (63 - 8))
+/* SCM device has been scrubbed and locked */
+#define PAPR_PMEM_SCRUBBED_AND_LOCKED       (1ULL << (63 - 9))
+
+/* Bits status indicators for health bitmap indicating unarmed dimm */
+#define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED |            \
+                               PAPR_PMEM_HEALTH_UNHEALTHY)
+
+/* Bits status indicators for health bitmap indicating unflushed dimm */
+#define PAPR_PMEM_BAD_SHUTDOWN_MASK (PAPR_PMEM_SHUTDOWN_DIRTY)
+
+/* Bits status indicators for health bitmap indicating unrestored dimm */
+#define PAPR_PMEM_BAD_RESTORE_MASK  (PAPR_PMEM_EMPTY)
+
+/* Bit status indicators for smart event notification */
+#define PAPR_PMEM_SMART_EVENT_MASK (PAPR_PMEM_HEALTH_CRITICAL | \
+                                   PAPR_PMEM_HEALTH_FATAL |    \
+                                   PAPR_PMEM_HEALTH_UNHEALTHY)
+
+/* private struct associated with each region */
 struct papr_scm_priv {
        struct platform_device *pdev;
        struct device_node *dn;
@@ -39,6 +78,15 @@ struct papr_scm_priv {
        struct resource res;
        struct nd_region *region;
        struct nd_interleave_set nd_set;
+
+       /* Protect dimm health data from concurrent read/writes */
+       struct mutex health_mutex;
+
+       /* Last time the health information of the dimm was updated */
+       unsigned long lasthealth_jiffies;
+
+       /* Health information for the dimm */
+       u64 health_bitmap;
 };
 
 static int drc_pmem_bind(struct papr_scm_priv *p)
@@ -144,6 +192,61 @@ err_out:
        return drc_pmem_bind(p);
 }
 
+/*
+ * Issue hcall to retrieve dimm health info and populate papr_scm_priv with the
+ * health information.
+ */
+static int __drc_pmem_query_health(struct papr_scm_priv *p)
+{
+       unsigned long ret[PLPAR_HCALL_BUFSIZE];
+       long rc;
+
+       /* issue the hcall */
+       rc = plpar_hcall(H_SCM_HEALTH, ret, p->drc_index);
+       if (rc != H_SUCCESS) {
+               dev_err(&p->pdev->dev,
+                       "Failed to query health information, Err:%ld\n", rc);
+               return -ENXIO;
+       }
+
+       p->lasthealth_jiffies = jiffies;
+       p->health_bitmap = ret[0] & ret[1];
+
+       dev_dbg(&p->pdev->dev,
+               "Queried dimm health info. Bitmap:0x%016lx Mask:0x%016lx\n",
+               ret[0], ret[1]);
+
+       return 0;
+}
+
+/* Min interval in seconds for assuming stable dimm health */
+#define MIN_HEALTH_QUERY_INTERVAL 60
+
+/* Query cached health info and if needed call drc_pmem_query_health */
+static int drc_pmem_query_health(struct papr_scm_priv *p)
+{
+       unsigned long cache_timeout;
+       int rc;
+
+       /* Protect concurrent modifications to papr_scm_priv */
+       rc = mutex_lock_interruptible(&p->health_mutex);
+       if (rc)
+               return rc;
+
+       /* Jiffies offset for which the health data is assumed to be same */
+       cache_timeout = p->lasthealth_jiffies +
+               msecs_to_jiffies(MIN_HEALTH_QUERY_INTERVAL * 1000);
+
+       /* Fetch new health info is its older than MIN_HEALTH_QUERY_INTERVAL */
+       if (time_after(jiffies, cache_timeout))
+               rc = __drc_pmem_query_health(p);
+       else
+               /* Assume cached health data is valid */
+               rc = 0;
+
+       mutex_unlock(&p->health_mutex);
+       return rc;
+}
 
 static int papr_scm_meta_get(struct papr_scm_priv *p,
                             struct nd_cmd_get_config_data_hdr *hdr)
@@ -286,6 +389,64 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc,
        return 0;
 }
 
+static ssize_t flags_show(struct device *dev,
+                         struct device_attribute *attr, char *buf)
+{
+       struct nvdimm *dimm = to_nvdimm(dev);
+       struct papr_scm_priv *p = nvdimm_provider_data(dimm);
+       struct seq_buf s;
+       u64 health;
+       int rc;
+
+       rc = drc_pmem_query_health(p);
+       if (rc)
+               return rc;
+
+       /* Copy health_bitmap locally, check masks & update out buffer */
+       health = READ_ONCE(p->health_bitmap);
+
+       seq_buf_init(&s, buf, PAGE_SIZE);
+       if (health & PAPR_PMEM_UNARMED_MASK)
+               seq_buf_printf(&s, "not_armed ");
+
+       if (health & PAPR_PMEM_BAD_SHUTDOWN_MASK)
+               seq_buf_printf(&s, "flush_fail ");
+
+       if (health & PAPR_PMEM_BAD_RESTORE_MASK)
+               seq_buf_printf(&s, "restore_fail ");
+
+       if (health & PAPR_PMEM_ENCRYPTED)
+               seq_buf_printf(&s, "encrypted ");
+
+       if (health & PAPR_PMEM_SMART_EVENT_MASK)
+               seq_buf_printf(&s, "smart_notify ");
+
+       if (health & PAPR_PMEM_SCRUBBED_AND_LOCKED)
+               seq_buf_printf(&s, "scrubbed locked ");
+
+       if (seq_buf_used(&s))
+               seq_buf_printf(&s, "\n");
+
+       return seq_buf_used(&s);
+}
+DEVICE_ATTR_RO(flags);
+
+/* papr_scm specific dimm attributes */
+static struct attribute *papr_nd_attributes[] = {
+       &dev_attr_flags.attr,
+       NULL,
+};
+
+static struct attribute_group papr_nd_attribute_group = {
+       .name = "papr",
+       .attrs = papr_nd_attributes,
+};
+
+static const struct attribute_group *papr_nd_attr_groups[] = {
+       &papr_nd_attribute_group,
+       NULL,
+};
+
 static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
 {
        struct device *dev = &p->pdev->dev;
@@ -312,8 +473,8 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
        dimm_flags = 0;
        set_bit(NDD_LABELING, &dimm_flags);
 
-       p->nvdimm = nvdimm_create(p->bus, p, NULL, dimm_flags,
-                                 PAPR_SCM_DIMM_CMD_MASK, 0, NULL);
+       p->nvdimm = nvdimm_create(p->bus, p, papr_nd_attr_groups,
+                                 dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL);
        if (!p->nvdimm) {
                dev_err(dev, "Error creating DIMM object for %pOF\n", p->dn);
                goto err;
@@ -399,6 +560,9 @@ static int papr_scm_probe(struct platform_device *pdev)
        if (!p)
                return -ENOMEM;
 
+       /* Initialize the dimm mutex */
+       mutex_init(&p->health_mutex);
+
        /* optional DT properties */
        of_property_read_u32(dn, "ibm,metadata-size", &metadata_size);