2 * edac_mc kernel module
3 * (C) 2005 Linux Networx (http://lnxi.com)
4 * This file may be distributed under the terms of the
5 * GNU General Public License.
7 * Written by Thayne Harbaugh
8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
9 * http://www.anime.net/~goemon/linux-ecc/
11 * Modified by Dave Peterson and Doug Thompson
16 #include <linux/config.h>
17 #include <linux/module.h>
18 #include <linux/proc_fs.h>
19 #include <linux/kernel.h>
20 #include <linux/types.h>
21 #include <linux/smp.h>
22 #include <linux/init.h>
23 #include <linux/sysctl.h>
24 #include <linux/highmem.h>
25 #include <linux/timer.h>
26 #include <linux/slab.h>
27 #include <linux/jiffies.h>
28 #include <linux/spinlock.h>
29 #include <linux/list.h>
30 #include <linux/sysdev.h>
31 #include <linux/ctype.h>
32 #include <linux/kthread.h>
34 #include <asm/uaccess.h>
40 #define EDAC_MC_VERSION "Ver: 2.0.0 " __DATE__
42 /* For now, disable the EDAC sysfs code. The sysfs interface that EDAC
43 * presents to user space needs more thought, and is likely to change
46 #define DISABLE_EDAC_SYSFS
48 #ifdef CONFIG_EDAC_DEBUG
49 /* Values of 0 to 4 will generate output */
50 int edac_debug_level = 1;
51 EXPORT_SYMBOL(edac_debug_level);
54 /* EDAC Controls, setable by module parameter, and sysfs */
55 static int log_ue = 1;
56 static int log_ce = 1;
57 static int panic_on_ue;
58 static int poll_msec = 1000;
60 static int check_pci_parity = 0; /* default YES check PCI parity */
61 static int panic_on_pci_parity; /* default no panic on PCI Parity */
62 static atomic_t pci_parity_count = ATOMIC_INIT(0);
64 /* lock to memory controller's control array */
65 static DECLARE_MUTEX(mem_ctls_mutex);
66 static struct list_head mc_devices = LIST_HEAD_INIT(mc_devices);
68 static struct task_struct *edac_thread;
70 /* Structure of the whitelist and blacklist arrays */
71 struct edac_pci_device_list {
72 unsigned int vendor; /* Vendor ID */
73 unsigned int device; /* Deviice ID */
77 #define MAX_LISTED_PCI_DEVICES 32
79 /* List of PCI devices (vendor-id:device-id) that should be skipped */
80 static struct edac_pci_device_list pci_blacklist[MAX_LISTED_PCI_DEVICES];
81 static int pci_blacklist_count;
83 /* List of PCI devices (vendor-id:device-id) that should be scanned */
84 static struct edac_pci_device_list pci_whitelist[MAX_LISTED_PCI_DEVICES];
85 static int pci_whitelist_count ;
87 /* START sysfs data and methods */
89 #ifndef DISABLE_EDAC_SYSFS
91 static const char *mem_types[] = {
92 [MEM_EMPTY] = "Empty",
93 [MEM_RESERVED] = "Reserved",
94 [MEM_UNKNOWN] = "Unknown",
98 [MEM_SDR] = "Unbuffered-SDR",
99 [MEM_RDR] = "Registered-SDR",
100 [MEM_DDR] = "Unbuffered-DDR",
101 [MEM_RDDR] = "Registered-DDR",
105 static const char *dev_types[] = {
106 [DEV_UNKNOWN] = "Unknown",
116 static const char *edac_caps[] = {
117 [EDAC_UNKNOWN] = "Unknown",
118 [EDAC_NONE] = "None",
119 [EDAC_RESERVED] = "Reserved",
120 [EDAC_PARITY] = "PARITY",
122 [EDAC_SECDED] = "SECDED",
123 [EDAC_S2ECD2ED] = "S2ECD2ED",
124 [EDAC_S4ECD4ED] = "S4ECD4ED",
125 [EDAC_S8ECD8ED] = "S8ECD8ED",
126 [EDAC_S16ECD16ED] = "S16ECD16ED"
130 /* sysfs object: /sys/devices/system/edac */
131 static struct sysdev_class edac_class = {
132 set_kset_name("edac"),
136 * /sys/devices/system/edac/mc
137 * /sys/devices/system/edac/pci
139 static struct kobject edac_memctrl_kobj;
140 static struct kobject edac_pci_kobj;
143 * /sys/devices/system/edac/mc;
144 * data structures and methods
147 static ssize_t memctrl_string_show(void *ptr, char *buffer)
149 char *value = (char*) ptr;
150 return sprintf(buffer, "%s\n", value);
154 static ssize_t memctrl_int_show(void *ptr, char *buffer)
156 int *value = (int*) ptr;
157 return sprintf(buffer, "%d\n", *value);
160 static ssize_t memctrl_int_store(void *ptr, const char *buffer, size_t count)
162 int *value = (int*) ptr;
164 if (isdigit(*buffer))
165 *value = simple_strtoul(buffer, NULL, 0);
170 struct memctrl_dev_attribute {
171 struct attribute attr;
173 ssize_t (*show)(void *,char *);
174 ssize_t (*store)(void *, const char *, size_t);
177 /* Set of show/store abstract level functions for memory control object */
179 memctrl_dev_show(struct kobject *kobj, struct attribute *attr, char *buffer)
181 struct memctrl_dev_attribute *memctrl_dev;
182 memctrl_dev = (struct memctrl_dev_attribute*)attr;
184 if (memctrl_dev->show)
185 return memctrl_dev->show(memctrl_dev->value, buffer);
190 memctrl_dev_store(struct kobject *kobj, struct attribute *attr,
191 const char *buffer, size_t count)
193 struct memctrl_dev_attribute *memctrl_dev;
194 memctrl_dev = (struct memctrl_dev_attribute*)attr;
196 if (memctrl_dev->store)
197 return memctrl_dev->store(memctrl_dev->value, buffer, count);
201 static struct sysfs_ops memctrlfs_ops = {
202 .show = memctrl_dev_show,
203 .store = memctrl_dev_store
206 #define MEMCTRL_ATTR(_name,_mode,_show,_store) \
207 struct memctrl_dev_attribute attr_##_name = { \
208 .attr = {.name = __stringify(_name), .mode = _mode }, \
214 #define MEMCTRL_STRING_ATTR(_name,_data,_mode,_show,_store) \
215 struct memctrl_dev_attribute attr_##_name = { \
216 .attr = {.name = __stringify(_name), .mode = _mode }, \
222 /* cwrow<id> attribute f*/
224 MEMCTRL_STRING_ATTR(mc_version,EDAC_MC_VERSION,S_IRUGO,memctrl_string_show,NULL);
227 /* csrow<id> control files */
228 MEMCTRL_ATTR(panic_on_ue,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store);
229 MEMCTRL_ATTR(log_ue,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store);
230 MEMCTRL_ATTR(log_ce,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store);
231 MEMCTRL_ATTR(poll_msec,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store);
234 /* Base Attributes of the memory ECC object */
235 static struct memctrl_dev_attribute *memctrl_attr[] = {
243 /* Main MC kobject release() function */
244 static void edac_memctrl_master_release(struct kobject *kobj)
246 debugf1("%s()\n", __func__);
249 static struct kobj_type ktype_memctrl = {
250 .release = edac_memctrl_master_release,
251 .sysfs_ops = &memctrlfs_ops,
252 .default_attrs = (struct attribute **) memctrl_attr,
255 #endif /* DISABLE_EDAC_SYSFS */
257 /* Initialize the main sysfs entries for edac:
258 * /sys/devices/system/edac
265 static int edac_sysfs_memctrl_setup(void)
266 #ifdef DISABLE_EDAC_SYSFS
274 debugf1("%s()\n", __func__);
276 /* create the /sys/devices/system/edac directory */
277 err = sysdev_class_register(&edac_class);
279 /* Init the MC's kobject */
280 memset(&edac_memctrl_kobj, 0, sizeof (edac_memctrl_kobj));
281 kobject_init(&edac_memctrl_kobj);
283 edac_memctrl_kobj.parent = &edac_class.kset.kobj;
284 edac_memctrl_kobj.ktype = &ktype_memctrl;
286 /* generate sysfs "..../edac/mc" */
287 err = kobject_set_name(&edac_memctrl_kobj,"mc");
289 /* FIXME: maybe new sysdev_create_subdir() */
290 err = kobject_register(&edac_memctrl_kobj);
292 debugf1("Failed to register '.../edac/mc'\n");
294 debugf1("Registered '.../edac/mc' kobject\n");
298 debugf1("%s() error=%d\n", __func__, err);
303 #endif /* DISABLE_EDAC_SYSFS */
307 * the '..../edac/mc' kobject followed by '..../edac' itself
309 static void edac_sysfs_memctrl_teardown(void)
311 #ifndef DISABLE_EDAC_SYSFS
312 debugf0("MC: " __FILE__ ": %s()\n", __func__);
314 /* Unregister the MC's kobject */
315 kobject_unregister(&edac_memctrl_kobj);
317 /* release the master edac mc kobject */
318 kobject_put(&edac_memctrl_kobj);
320 /* Unregister the 'edac' object */
321 sysdev_class_unregister(&edac_class);
322 #endif /* DISABLE_EDAC_SYSFS */
325 #ifndef DISABLE_EDAC_SYSFS
328 * /sys/devices/system/edac/pci;
329 * data structures and methods
332 struct list_control {
333 struct edac_pci_device_list *list;
339 /* Output the list as: vendor_id:device:id<,vendor_id:device_id> */
340 static ssize_t edac_pci_list_string_show(void *ptr, char *buffer)
342 struct list_control *listctl;
343 struct edac_pci_device_list *list;
349 list = listctl->list;
351 for (i = 0; i < *(listctl->count); i++, list++ ) {
353 len += snprintf(p + len, (PAGE_SIZE-len), ",");
355 len += snprintf(p + len,
358 list->vendor,list->device);
361 len += snprintf(p + len,(PAGE_SIZE-len), "\n");
363 return (ssize_t) len;
368 * Scan string from **s to **e looking for one 'vendor:device' tuple
369 * where each field is a hex value
371 * return 0 if an entry is NOT found
372 * return 1 if an entry is found
373 * fill in *vendor_id and *device_id with values found
375 * In both cases, make sure *s has been moved forward toward *e
377 static int parse_one_device(const char **s,const char **e,
378 unsigned int *vendor_id, unsigned int *device_id)
380 const char *runner, *p;
382 /* if null byte, we are done */
384 (*s)++; /* keep *s moving */
388 /* skip over newlines & whitespace */
389 if ((**s == '\n') || isspace(**s)) {
394 if (!isxdigit(**s)) {
399 /* parse vendor_id */
401 while (runner < *e) {
402 /* scan for vendor:device delimiter */
403 if (*runner == ':') {
404 *vendor_id = simple_strtol((char*) *s, (char**) &p, 16);
411 if (!isxdigit(*runner)) {
416 /* parse device_id */
418 *device_id = simple_strtol((char*)runner, (char**)&p, 16);
427 static ssize_t edac_pci_list_string_store(void *ptr, const char *buffer,
430 struct list_control *listctl;
431 struct edac_pci_device_list *list;
432 unsigned int vendor_id, device_id;
440 list = listctl->list;
441 index = listctl->count;
444 while (*index < MAX_LISTED_PCI_DEVICES) {
446 if (parse_one_device(&s,&e,&vendor_id,&device_id)) {
447 list[ *index ].vendor = vendor_id;
448 list[ *index ].device = device_id;
452 /* check for all data consume */
461 static ssize_t edac_pci_int_show(void *ptr, char *buffer)
464 return sprintf(buffer,"%d\n",*value);
467 static ssize_t edac_pci_int_store(void *ptr, const char *buffer, size_t count)
471 if (isdigit(*buffer))
472 *value = simple_strtoul(buffer,NULL,0);
477 struct edac_pci_dev_attribute {
478 struct attribute attr;
480 ssize_t (*show)(void *,char *);
481 ssize_t (*store)(void *, const char *,size_t);
484 /* Set of show/store abstract level functions for PCI Parity object */
485 static ssize_t edac_pci_dev_show(struct kobject *kobj, struct attribute *attr,
488 struct edac_pci_dev_attribute *edac_pci_dev;
489 edac_pci_dev= (struct edac_pci_dev_attribute*)attr;
491 if (edac_pci_dev->show)
492 return edac_pci_dev->show(edac_pci_dev->value, buffer);
496 static ssize_t edac_pci_dev_store(struct kobject *kobj, struct attribute *attr,
497 const char *buffer, size_t count)
499 struct edac_pci_dev_attribute *edac_pci_dev;
500 edac_pci_dev= (struct edac_pci_dev_attribute*)attr;
502 if (edac_pci_dev->show)
503 return edac_pci_dev->store(edac_pci_dev->value, buffer, count);
507 static struct sysfs_ops edac_pci_sysfs_ops = {
508 .show = edac_pci_dev_show,
509 .store = edac_pci_dev_store
513 #define EDAC_PCI_ATTR(_name,_mode,_show,_store) \
514 struct edac_pci_dev_attribute edac_pci_attr_##_name = { \
515 .attr = {.name = __stringify(_name), .mode = _mode }, \
521 #define EDAC_PCI_STRING_ATTR(_name,_data,_mode,_show,_store) \
522 struct edac_pci_dev_attribute edac_pci_attr_##_name = { \
523 .attr = {.name = __stringify(_name), .mode = _mode }, \
530 static struct list_control pci_whitelist_control = {
531 .list = pci_whitelist,
532 .count = &pci_whitelist_count
535 static struct list_control pci_blacklist_control = {
536 .list = pci_blacklist,
537 .count = &pci_blacklist_count
540 /* whitelist attribute */
541 EDAC_PCI_STRING_ATTR(pci_parity_whitelist,
542 &pci_whitelist_control,
544 edac_pci_list_string_show,
545 edac_pci_list_string_store);
547 EDAC_PCI_STRING_ATTR(pci_parity_blacklist,
548 &pci_blacklist_control,
550 edac_pci_list_string_show,
551 edac_pci_list_string_store);
554 /* PCI Parity control files */
555 EDAC_PCI_ATTR(check_pci_parity,S_IRUGO|S_IWUSR,edac_pci_int_show,edac_pci_int_store);
556 EDAC_PCI_ATTR(panic_on_pci_parity,S_IRUGO|S_IWUSR,edac_pci_int_show,edac_pci_int_store);
557 EDAC_PCI_ATTR(pci_parity_count,S_IRUGO,edac_pci_int_show,NULL);
559 /* Base Attributes of the memory ECC object */
560 static struct edac_pci_dev_attribute *edac_pci_attr[] = {
561 &edac_pci_attr_check_pci_parity,
562 &edac_pci_attr_panic_on_pci_parity,
563 &edac_pci_attr_pci_parity_count,
567 /* No memory to release */
568 static void edac_pci_release(struct kobject *kobj)
570 debugf1("%s()\n", __func__);
573 static struct kobj_type ktype_edac_pci = {
574 .release = edac_pci_release,
575 .sysfs_ops = &edac_pci_sysfs_ops,
576 .default_attrs = (struct attribute **) edac_pci_attr,
579 #endif /* DISABLE_EDAC_SYSFS */
582 * edac_sysfs_pci_setup()
585 static int edac_sysfs_pci_setup(void)
586 #ifdef DISABLE_EDAC_SYSFS
594 debugf1("%s()\n", __func__);
596 memset(&edac_pci_kobj, 0, sizeof(edac_pci_kobj));
598 kobject_init(&edac_pci_kobj);
599 edac_pci_kobj.parent = &edac_class.kset.kobj;
600 edac_pci_kobj.ktype = &ktype_edac_pci;
602 err = kobject_set_name(&edac_pci_kobj, "pci");
604 /* Instanstiate the csrow object */
605 /* FIXME: maybe new sysdev_create_subdir() */
606 err = kobject_register(&edac_pci_kobj);
608 debugf1("Failed to register '.../edac/pci'\n");
610 debugf1("Registered '.../edac/pci' kobject\n");
614 #endif /* DISABLE_EDAC_SYSFS */
616 static void edac_sysfs_pci_teardown(void)
618 #ifndef DISABLE_EDAC_SYSFS
619 debugf0("%s()\n", __func__);
621 kobject_unregister(&edac_pci_kobj);
622 kobject_put(&edac_pci_kobj);
626 #ifndef DISABLE_EDAC_SYSFS
628 /* EDAC sysfs CSROW data structures and methods */
630 /* Set of more detailed csrow<id> attribute show/store functions */
631 static ssize_t csrow_ch0_dimm_label_show(struct csrow_info *csrow, char *data)
635 if (csrow->nr_channels > 0) {
636 size = snprintf(data, EDAC_MC_LABEL_LEN,"%s\n",
637 csrow->channels[0].label);
642 static ssize_t csrow_ch1_dimm_label_show(struct csrow_info *csrow, char *data)
646 if (csrow->nr_channels > 0) {
647 size = snprintf(data, EDAC_MC_LABEL_LEN, "%s\n",
648 csrow->channels[1].label);
653 static ssize_t csrow_ch0_dimm_label_store(struct csrow_info *csrow,
654 const char *data, size_t size)
656 ssize_t max_size = 0;
658 if (csrow->nr_channels > 0) {
659 max_size = min((ssize_t)size,(ssize_t)EDAC_MC_LABEL_LEN-1);
660 strncpy(csrow->channels[0].label, data, max_size);
661 csrow->channels[0].label[max_size] = '\0';
666 static ssize_t csrow_ch1_dimm_label_store(struct csrow_info *csrow,
667 const char *data, size_t size)
669 ssize_t max_size = 0;
671 if (csrow->nr_channels > 1) {
672 max_size = min((ssize_t)size,(ssize_t)EDAC_MC_LABEL_LEN-1);
673 strncpy(csrow->channels[1].label, data, max_size);
674 csrow->channels[1].label[max_size] = '\0';
679 static ssize_t csrow_ue_count_show(struct csrow_info *csrow, char *data)
681 return sprintf(data,"%u\n", csrow->ue_count);
684 static ssize_t csrow_ce_count_show(struct csrow_info *csrow, char *data)
686 return sprintf(data,"%u\n", csrow->ce_count);
689 static ssize_t csrow_ch0_ce_count_show(struct csrow_info *csrow, char *data)
693 if (csrow->nr_channels > 0) {
694 size = sprintf(data,"%u\n", csrow->channels[0].ce_count);
699 static ssize_t csrow_ch1_ce_count_show(struct csrow_info *csrow, char *data)
703 if (csrow->nr_channels > 1) {
704 size = sprintf(data,"%u\n", csrow->channels[1].ce_count);
709 static ssize_t csrow_size_show(struct csrow_info *csrow, char *data)
711 return sprintf(data,"%u\n", PAGES_TO_MiB(csrow->nr_pages));
714 static ssize_t csrow_mem_type_show(struct csrow_info *csrow, char *data)
716 return sprintf(data,"%s\n", mem_types[csrow->mtype]);
719 static ssize_t csrow_dev_type_show(struct csrow_info *csrow, char *data)
721 return sprintf(data,"%s\n", dev_types[csrow->dtype]);
724 static ssize_t csrow_edac_mode_show(struct csrow_info *csrow, char *data)
726 return sprintf(data,"%s\n", edac_caps[csrow->edac_mode]);
729 struct csrowdev_attribute {
730 struct attribute attr;
731 ssize_t (*show)(struct csrow_info *,char *);
732 ssize_t (*store)(struct csrow_info *, const char *,size_t);
735 #define to_csrow(k) container_of(k, struct csrow_info, kobj)
736 #define to_csrowdev_attr(a) container_of(a, struct csrowdev_attribute, attr)
738 /* Set of show/store higher level functions for csrow objects */
739 static ssize_t csrowdev_show(struct kobject *kobj, struct attribute *attr,
742 struct csrow_info *csrow = to_csrow(kobj);
743 struct csrowdev_attribute *csrowdev_attr = to_csrowdev_attr(attr);
745 if (csrowdev_attr->show)
746 return csrowdev_attr->show(csrow, buffer);
750 static ssize_t csrowdev_store(struct kobject *kobj, struct attribute *attr,
751 const char *buffer, size_t count)
753 struct csrow_info *csrow = to_csrow(kobj);
754 struct csrowdev_attribute * csrowdev_attr = to_csrowdev_attr(attr);
756 if (csrowdev_attr->store)
757 return csrowdev_attr->store(csrow, buffer, count);
761 static struct sysfs_ops csrowfs_ops = {
762 .show = csrowdev_show,
763 .store = csrowdev_store
766 #define CSROWDEV_ATTR(_name,_mode,_show,_store) \
767 struct csrowdev_attribute attr_##_name = { \
768 .attr = {.name = __stringify(_name), .mode = _mode }, \
773 /* cwrow<id>/attribute files */
774 CSROWDEV_ATTR(size_mb,S_IRUGO,csrow_size_show,NULL);
775 CSROWDEV_ATTR(dev_type,S_IRUGO,csrow_dev_type_show,NULL);
776 CSROWDEV_ATTR(mem_type,S_IRUGO,csrow_mem_type_show,NULL);
777 CSROWDEV_ATTR(edac_mode,S_IRUGO,csrow_edac_mode_show,NULL);
778 CSROWDEV_ATTR(ue_count,S_IRUGO,csrow_ue_count_show,NULL);
779 CSROWDEV_ATTR(ce_count,S_IRUGO,csrow_ce_count_show,NULL);
780 CSROWDEV_ATTR(ch0_ce_count,S_IRUGO,csrow_ch0_ce_count_show,NULL);
781 CSROWDEV_ATTR(ch1_ce_count,S_IRUGO,csrow_ch1_ce_count_show,NULL);
783 /* control/attribute files */
784 CSROWDEV_ATTR(ch0_dimm_label,S_IRUGO|S_IWUSR,
785 csrow_ch0_dimm_label_show,
786 csrow_ch0_dimm_label_store);
787 CSROWDEV_ATTR(ch1_dimm_label,S_IRUGO|S_IWUSR,
788 csrow_ch1_dimm_label_show,
789 csrow_ch1_dimm_label_store);
792 /* Attributes of the CSROW<id> object */
793 static struct csrowdev_attribute *csrow_attr[] = {
802 &attr_ch0_dimm_label,
803 &attr_ch1_dimm_label,
808 /* No memory to release */
809 static void edac_csrow_instance_release(struct kobject *kobj)
811 debugf1("%s()\n", __func__);
814 static struct kobj_type ktype_csrow = {
815 .release = edac_csrow_instance_release,
816 .sysfs_ops = &csrowfs_ops,
817 .default_attrs = (struct attribute **) csrow_attr,
820 /* Create a CSROW object under specifed edac_mc_device */
821 static int edac_create_csrow_object(struct kobject *edac_mci_kobj,
822 struct csrow_info *csrow, int index )
826 debugf0("%s()\n", __func__);
828 memset(&csrow->kobj, 0, sizeof(csrow->kobj));
830 /* generate ..../edac/mc/mc<id>/csrow<index> */
832 kobject_init(&csrow->kobj);
833 csrow->kobj.parent = edac_mci_kobj;
834 csrow->kobj.ktype = &ktype_csrow;
836 /* name this instance of csrow<id> */
837 err = kobject_set_name(&csrow->kobj,"csrow%d",index);
839 /* Instanstiate the csrow object */
840 err = kobject_register(&csrow->kobj);
842 debugf0("Failed to register CSROW%d\n",index);
844 debugf0("Registered CSROW%d\n",index);
850 /* sysfs data structures and methods for the MCI kobjects */
852 static ssize_t mci_reset_counters_store(struct mem_ctl_info *mci,
853 const char *data, size_t count )
857 mci->ue_noinfo_count = 0;
858 mci->ce_noinfo_count = 0;
861 for (row = 0; row < mci->nr_csrows; row++) {
862 struct csrow_info *ri = &mci->csrows[row];
866 for (chan = 0; chan < ri->nr_channels; chan++)
867 ri->channels[chan].ce_count = 0;
869 mci->start_time = jiffies;
874 static ssize_t mci_ue_count_show(struct mem_ctl_info *mci, char *data)
876 return sprintf(data,"%d\n", mci->ue_count);
879 static ssize_t mci_ce_count_show(struct mem_ctl_info *mci, char *data)
881 return sprintf(data,"%d\n", mci->ce_count);
884 static ssize_t mci_ce_noinfo_show(struct mem_ctl_info *mci, char *data)
886 return sprintf(data,"%d\n", mci->ce_noinfo_count);
889 static ssize_t mci_ue_noinfo_show(struct mem_ctl_info *mci, char *data)
891 return sprintf(data,"%d\n", mci->ue_noinfo_count);
894 static ssize_t mci_seconds_show(struct mem_ctl_info *mci, char *data)
896 return sprintf(data,"%ld\n", (jiffies - mci->start_time) / HZ);
899 static ssize_t mci_mod_name_show(struct mem_ctl_info *mci, char *data)
901 return sprintf(data,"%s %s\n", mci->mod_name, mci->mod_ver);
904 static ssize_t mci_ctl_name_show(struct mem_ctl_info *mci, char *data)
906 return sprintf(data,"%s\n", mci->ctl_name);
909 static int mci_output_edac_cap(char *buf, unsigned long edac_cap)
914 for (bit_idx = 0; bit_idx < 8 * sizeof(edac_cap); bit_idx++) {
915 if ((edac_cap >> bit_idx) & 0x1)
916 p += sprintf(p, "%s ", edac_caps[bit_idx]);
922 static ssize_t mci_edac_capability_show(struct mem_ctl_info *mci, char *data)
926 p += mci_output_edac_cap(p,mci->edac_ctl_cap);
927 p += sprintf(p, "\n");
932 static ssize_t mci_edac_current_capability_show(struct mem_ctl_info *mci,
937 p += mci_output_edac_cap(p,mci->edac_cap);
938 p += sprintf(p, "\n");
943 static int mci_output_mtype_cap(char *buf, unsigned long mtype_cap)
948 for (bit_idx = 0; bit_idx < 8 * sizeof(mtype_cap); bit_idx++) {
949 if ((mtype_cap >> bit_idx) & 0x1)
950 p += sprintf(p, "%s ", mem_types[bit_idx]);
956 static ssize_t mci_supported_mem_type_show(struct mem_ctl_info *mci, char *data)
960 p += mci_output_mtype_cap(p,mci->mtype_cap);
961 p += sprintf(p, "\n");
966 static ssize_t mci_size_mb_show(struct mem_ctl_info *mci, char *data)
968 int total_pages, csrow_idx;
970 for (total_pages = csrow_idx = 0; csrow_idx < mci->nr_csrows;
972 struct csrow_info *csrow = &mci->csrows[csrow_idx];
974 if (!csrow->nr_pages)
976 total_pages += csrow->nr_pages;
979 return sprintf(data,"%u\n", PAGES_TO_MiB(total_pages));
982 struct mcidev_attribute {
983 struct attribute attr;
984 ssize_t (*show)(struct mem_ctl_info *,char *);
985 ssize_t (*store)(struct mem_ctl_info *, const char *,size_t);
988 #define to_mci(k) container_of(k, struct mem_ctl_info, edac_mci_kobj)
989 #define to_mcidev_attr(a) container_of(a, struct mcidev_attribute, attr)
991 static ssize_t mcidev_show(struct kobject *kobj, struct attribute *attr,
994 struct mem_ctl_info *mem_ctl_info = to_mci(kobj);
995 struct mcidev_attribute * mcidev_attr = to_mcidev_attr(attr);
997 if (mcidev_attr->show)
998 return mcidev_attr->show(mem_ctl_info, buffer);
1002 static ssize_t mcidev_store(struct kobject *kobj, struct attribute *attr,
1003 const char *buffer, size_t count)
1005 struct mem_ctl_info *mem_ctl_info = to_mci(kobj);
1006 struct mcidev_attribute * mcidev_attr = to_mcidev_attr(attr);
1008 if (mcidev_attr->store)
1009 return mcidev_attr->store(mem_ctl_info, buffer, count);
1013 static struct sysfs_ops mci_ops = {
1014 .show = mcidev_show,
1015 .store = mcidev_store
1018 #define MCIDEV_ATTR(_name,_mode,_show,_store) \
1019 struct mcidev_attribute mci_attr_##_name = { \
1020 .attr = {.name = __stringify(_name), .mode = _mode }, \
1026 MCIDEV_ATTR(reset_counters,S_IWUSR,NULL,mci_reset_counters_store);
1028 /* Attribute files */
1029 MCIDEV_ATTR(mc_name,S_IRUGO,mci_ctl_name_show,NULL);
1030 MCIDEV_ATTR(module_name,S_IRUGO,mci_mod_name_show,NULL);
1031 MCIDEV_ATTR(edac_capability,S_IRUGO,mci_edac_capability_show,NULL);
1032 MCIDEV_ATTR(size_mb,S_IRUGO,mci_size_mb_show,NULL);
1033 MCIDEV_ATTR(seconds_since_reset,S_IRUGO,mci_seconds_show,NULL);
1034 MCIDEV_ATTR(ue_noinfo_count,S_IRUGO,mci_ue_noinfo_show,NULL);
1035 MCIDEV_ATTR(ce_noinfo_count,S_IRUGO,mci_ce_noinfo_show,NULL);
1036 MCIDEV_ATTR(ue_count,S_IRUGO,mci_ue_count_show,NULL);
1037 MCIDEV_ATTR(ce_count,S_IRUGO,mci_ce_count_show,NULL);
1038 MCIDEV_ATTR(edac_current_capability,S_IRUGO,
1039 mci_edac_current_capability_show,NULL);
1040 MCIDEV_ATTR(supported_mem_type,S_IRUGO,
1041 mci_supported_mem_type_show,NULL);
1044 static struct mcidev_attribute *mci_attr[] = {
1045 &mci_attr_reset_counters,
1046 &mci_attr_module_name,
1048 &mci_attr_edac_capability,
1049 &mci_attr_edac_current_capability,
1050 &mci_attr_supported_mem_type,
1052 &mci_attr_seconds_since_reset,
1053 &mci_attr_ue_noinfo_count,
1054 &mci_attr_ce_noinfo_count,
1062 * Release of a MC controlling instance
1064 static void edac_mci_instance_release(struct kobject *kobj)
1066 struct mem_ctl_info *mci;
1067 mci = container_of(kobj,struct mem_ctl_info,edac_mci_kobj);
1069 debugf0("%s() idx=%d calling kfree\n", __func__, mci->mc_idx);
1074 static struct kobj_type ktype_mci = {
1075 .release = edac_mci_instance_release,
1076 .sysfs_ops = &mci_ops,
1077 .default_attrs = (struct attribute **) mci_attr,
1080 #endif /* DISABLE_EDAC_SYSFS */
1082 #define EDAC_DEVICE_SYMLINK "device"
1085 * Create a new Memory Controller kobject instance,
1086 * mc<id> under the 'mc' directory
1092 static int edac_create_sysfs_mci_device(struct mem_ctl_info *mci)
1093 #ifdef DISABLE_EDAC_SYSFS
1101 struct csrow_info *csrow;
1102 struct kobject *edac_mci_kobj=&mci->edac_mci_kobj;
1104 debugf0("%s() idx=%d\n", __func__, mci->mc_idx);
1106 memset(edac_mci_kobj, 0, sizeof(*edac_mci_kobj));
1107 kobject_init(edac_mci_kobj);
1109 /* set the name of the mc<id> object */
1110 err = kobject_set_name(edac_mci_kobj,"mc%d",mci->mc_idx);
1114 /* link to our parent the '..../edac/mc' object */
1115 edac_mci_kobj->parent = &edac_memctrl_kobj;
1116 edac_mci_kobj->ktype = &ktype_mci;
1118 /* register the mc<id> kobject */
1119 err = kobject_register(edac_mci_kobj);
1123 /* create a symlink for the device */
1124 err = sysfs_create_link(edac_mci_kobj, &mci->pdev->dev.kobj,
1125 EDAC_DEVICE_SYMLINK);
1127 kobject_unregister(edac_mci_kobj);
1131 /* Make directories for each CSROW object
1132 * under the mc<id> kobject
1134 for (i = 0; i < mci->nr_csrows; i++) {
1136 csrow = &mci->csrows[i];
1138 /* Only expose populated CSROWs */
1139 if (csrow->nr_pages > 0) {
1140 err = edac_create_csrow_object(edac_mci_kobj,csrow,i);
1146 /* Mark this MCI instance as having sysfs entries */
1147 mci->sysfs_active = MCI_SYSFS_ACTIVE;
1152 /* CSROW error: backout what has already been registered, */
1154 for ( i--; i >= 0; i--) {
1155 if (csrow->nr_pages > 0) {
1156 kobject_unregister(&mci->csrows[i].kobj);
1157 kobject_put(&mci->csrows[i].kobj);
1161 kobject_unregister(edac_mci_kobj);
1162 kobject_put(edac_mci_kobj);
1166 #endif /* DISABLE_EDAC_SYSFS */
1169 * remove a Memory Controller instance
1171 static void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci)
1173 #ifndef DISABLE_EDAC_SYSFS
1176 debugf0("%s()\n", __func__);
1178 /* remove all csrow kobjects */
1179 for (i = 0; i < mci->nr_csrows; i++) {
1180 if (mci->csrows[i].nr_pages > 0) {
1181 kobject_unregister(&mci->csrows[i].kobj);
1182 kobject_put(&mci->csrows[i].kobj);
1186 sysfs_remove_link(&mci->edac_mci_kobj, EDAC_DEVICE_SYMLINK);
1188 kobject_unregister(&mci->edac_mci_kobj);
1189 kobject_put(&mci->edac_mci_kobj);
1190 #endif /* DISABLE_EDAC_SYSFS */
1193 /* END OF sysfs data and methods */
1195 #ifdef CONFIG_EDAC_DEBUG
1197 EXPORT_SYMBOL(edac_mc_dump_channel);
1199 void edac_mc_dump_channel(struct channel_info *chan)
1201 debugf4("\tchannel = %p\n", chan);
1202 debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
1203 debugf4("\tchannel->ce_count = %d\n", chan->ce_count);
1204 debugf4("\tchannel->label = '%s'\n", chan->label);
1205 debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
1209 EXPORT_SYMBOL(edac_mc_dump_csrow);
1211 void edac_mc_dump_csrow(struct csrow_info *csrow)
1213 debugf4("\tcsrow = %p\n", csrow);
1214 debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
1215 debugf4("\tcsrow->first_page = 0x%lx\n",
1217 debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
1218 debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
1219 debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages);
1220 debugf4("\tcsrow->nr_channels = %d\n",
1221 csrow->nr_channels);
1222 debugf4("\tcsrow->channels = %p\n", csrow->channels);
1223 debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
1227 EXPORT_SYMBOL(edac_mc_dump_mci);
1229 void edac_mc_dump_mci(struct mem_ctl_info *mci)
1231 debugf3("\tmci = %p\n", mci);
1232 debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
1233 debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
1234 debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
1235 debugf4("\tmci->edac_check = %p\n", mci->edac_check);
1236 debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
1237 mci->nr_csrows, mci->csrows);
1238 debugf3("\tpdev = %p\n", mci->pdev);
1239 debugf3("\tmod_name:ctl_name = %s:%s\n",
1240 mci->mod_name, mci->ctl_name);
1241 debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
1245 #endif /* CONFIG_EDAC_DEBUG */
1247 /* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'.
1248 * Adjust 'ptr' so that its alignment is at least as stringent as what the
1249 * compiler would provide for X and return the aligned result.
1251 * If 'size' is a constant, the compiler will optimize this whole function
1252 * down to either a no-op or the addition of a constant to the value of 'ptr'.
1254 static inline char * align_ptr (void *ptr, unsigned size)
1258 /* Here we assume that the alignment of a "long long" is the most
1259 * stringent alignment that the compiler will ever provide by default.
1260 * As far as I know, this is a reasonable assumption.
1262 if (size > sizeof(long))
1263 align = sizeof(long long);
1264 else if (size > sizeof(int))
1265 align = sizeof(long);
1266 else if (size > sizeof(short))
1267 align = sizeof(int);
1268 else if (size > sizeof(char))
1269 align = sizeof(short);
1271 return (char *) ptr;
1276 return (char *) ptr;
1278 return (char *) (((unsigned long) ptr) + align - r);
1282 EXPORT_SYMBOL(edac_mc_alloc);
1285 * edac_mc_alloc: Allocate a struct mem_ctl_info structure
1286 * @size_pvt: size of private storage needed
1287 * @nr_csrows: Number of CWROWS needed for this MC
1288 * @nr_chans: Number of channels for the MC
1290 * Everything is kmalloc'ed as one big chunk - more efficient.
1291 * Only can be used if all structures have the same lifetime - otherwise
1292 * you have to allocate and initialize your own structures.
1294 * Use edac_mc_free() to free mc structures allocated by this function.
1297 * NULL allocation failed
1298 * struct mem_ctl_info pointer
1300 struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
1303 struct mem_ctl_info *mci;
1304 struct csrow_info *csi, *csrow;
1305 struct channel_info *chi, *chp, *chan;
1310 /* Figure out the offsets of the various items from the start of an mc
1311 * structure. We want the alignment of each item to be at least as
1312 * stringent as what the compiler would provide if we could simply
1313 * hardcode everything into a single struct.
1315 mci = (struct mem_ctl_info *) 0;
1316 csi = (struct csrow_info *)align_ptr(&mci[1], sizeof(*csi));
1317 chi = (struct channel_info *)
1318 align_ptr(&csi[nr_csrows], sizeof(*chi));
1319 pvt = align_ptr(&chi[nr_chans * nr_csrows], sz_pvt);
1320 size = ((unsigned long) pvt) + sz_pvt;
1322 if ((mci = kmalloc(size, GFP_KERNEL)) == NULL)
1325 /* Adjust pointers so they point within the memory we just allocated
1326 * rather than an imaginary chunk of memory located at address 0.
1328 csi = (struct csrow_info *) (((char *) mci) + ((unsigned long) csi));
1329 chi = (struct channel_info *) (((char *) mci) + ((unsigned long) chi));
1330 pvt = sz_pvt ? (((char *) mci) + ((unsigned long) pvt)) : NULL;
1332 memset(mci, 0, size); /* clear all fields */
1335 mci->pvt_info = pvt;
1336 mci->nr_csrows = nr_csrows;
1338 for (row = 0; row < nr_csrows; row++) {
1340 csrow->csrow_idx = row;
1342 csrow->nr_channels = nr_chans;
1343 chp = &chi[row * nr_chans];
1344 csrow->channels = chp;
1346 for (chn = 0; chn < nr_chans; chn++) {
1348 chan->chan_idx = chn;
1349 chan->csrow = csrow;
1357 EXPORT_SYMBOL(edac_mc_free);
1360 * edac_mc_free: Free a previously allocated 'mci' structure
1361 * @mci: pointer to a struct mem_ctl_info structure
1363 * Free up a previously allocated mci structure
1364 * A MCI structure can be in 2 states after being allocated
1365 * by edac_mc_alloc().
1366 * 1) Allocated in a MC driver's probe, but not yet committed
1367 * 2) Allocated and committed, by a call to edac_mc_add_mc()
1368 * edac_mc_add_mc() is the function that adds the sysfs entries
1369 * thus, this free function must determine which state the 'mci'
1370 * structure is in, then either free it directly or
1371 * perform kobject cleanup by calling edac_remove_sysfs_mci_device().
1375 void edac_mc_free(struct mem_ctl_info *mci)
1377 /* only if sysfs entries for this mci instance exist
1378 * do we remove them and defer the actual kfree via
1379 * the kobject 'release()' callback.
1381 * Otherwise, do a straight kfree now.
1383 if (mci->sysfs_active == MCI_SYSFS_ACTIVE)
1384 edac_remove_sysfs_mci_device(mci);
1391 EXPORT_SYMBOL(edac_mc_find_mci_by_pdev);
1393 struct mem_ctl_info *edac_mc_find_mci_by_pdev(struct pci_dev *pdev)
1395 struct mem_ctl_info *mci;
1396 struct list_head *item;
1398 debugf3("%s()\n", __func__);
1400 list_for_each(item, &mc_devices) {
1401 mci = list_entry(item, struct mem_ctl_info, link);
1403 if (mci->pdev == pdev)
1410 static int add_mc_to_global_list (struct mem_ctl_info *mci)
1412 struct list_head *item, *insert_before;
1413 struct mem_ctl_info *p;
1416 if (list_empty(&mc_devices)) {
1418 insert_before = &mc_devices;
1420 if (edac_mc_find_mci_by_pdev(mci->pdev)) {
1421 edac_printk(KERN_WARNING, EDAC_MC,
1422 "%s (%s) %s %s already assigned %d\n",
1423 mci->pdev->dev.bus_id,
1424 pci_name(mci->pdev), mci->mod_name,
1425 mci->ctl_name, mci->mc_idx);
1429 insert_before = NULL;
1432 list_for_each(item, &mc_devices) {
1433 p = list_entry(item, struct mem_ctl_info, link);
1435 if (p->mc_idx != i) {
1436 insert_before = item;
1445 if (insert_before == NULL)
1446 insert_before = &mc_devices;
1449 list_add_tail_rcu(&mci->link, insert_before);
1454 static void complete_mc_list_del (struct rcu_head *head)
1456 struct mem_ctl_info *mci;
1458 mci = container_of(head, struct mem_ctl_info, rcu);
1459 INIT_LIST_HEAD(&mci->link);
1460 complete(&mci->complete);
1464 static void del_mc_from_global_list (struct mem_ctl_info *mci)
1466 list_del_rcu(&mci->link);
1467 init_completion(&mci->complete);
1468 call_rcu(&mci->rcu, complete_mc_list_del);
1469 wait_for_completion(&mci->complete);
1473 EXPORT_SYMBOL(edac_mc_add_mc);
1476 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list
1477 * @mci: pointer to the mci structure to be added to the list
1484 /* FIXME - should a warning be printed if no error detection? correction? */
1485 int edac_mc_add_mc(struct mem_ctl_info *mci)
1487 debugf0("%s()\n", __func__);
1488 #ifdef CONFIG_EDAC_DEBUG
1489 if (edac_debug_level >= 3)
1490 edac_mc_dump_mci(mci);
1491 if (edac_debug_level >= 4) {
1494 for (i = 0; i < mci->nr_csrows; i++) {
1496 edac_mc_dump_csrow(&mci->csrows[i]);
1497 for (j = 0; j < mci->csrows[i].nr_channels; j++)
1498 edac_mc_dump_channel(&mci->csrows[i].
1503 down(&mem_ctls_mutex);
1505 if (add_mc_to_global_list(mci))
1508 /* set load time so that error rate can be tracked */
1509 mci->start_time = jiffies;
1511 if (edac_create_sysfs_mci_device(mci)) {
1512 edac_mc_printk(mci, KERN_WARNING,
1513 "failed to create sysfs device\n");
1517 /* Report action taken */
1518 edac_mc_printk(mci, KERN_INFO, "Giving out device to %s %s: PCI %s\n",
1519 mci->mod_name, mci->ctl_name, pci_name(mci->pdev));
1521 up(&mem_ctls_mutex);
1525 del_mc_from_global_list(mci);
1528 up(&mem_ctls_mutex);
1533 EXPORT_SYMBOL(edac_mc_del_mc);
1536 * edac_mc_del_mc: Remove the specified mci structure from global list
1537 * @mci: Pointer to struct mem_ctl_info structure
1543 int edac_mc_del_mc(struct mem_ctl_info *mci)
1547 debugf0("MC%d: %s()\n", mci->mc_idx, __func__);
1548 down(&mem_ctls_mutex);
1549 del_mc_from_global_list(mci);
1550 edac_printk(KERN_INFO, EDAC_MC,
1551 "Removed device %d for %s %s: PCI %s\n", mci->mc_idx,
1552 mci->mod_name, mci->ctl_name, pci_name(mci->pdev));
1554 up(&mem_ctls_mutex);
1560 EXPORT_SYMBOL(edac_mc_scrub_block);
1562 void edac_mc_scrub_block(unsigned long page, unsigned long offset,
1567 unsigned long flags = 0;
1569 debugf3("%s()\n", __func__);
1571 /* ECC error page was not in our memory. Ignore it. */
1572 if(!pfn_valid(page))
1575 /* Find the actual page structure then map it and fix */
1576 pg = pfn_to_page(page);
1578 if (PageHighMem(pg))
1579 local_irq_save(flags);
1581 virt_addr = kmap_atomic(pg, KM_BOUNCE_READ);
1583 /* Perform architecture specific atomic scrub operation */
1584 atomic_scrub(virt_addr + offset, size);
1586 /* Unmap and complete */
1587 kunmap_atomic(virt_addr, KM_BOUNCE_READ);
1589 if (PageHighMem(pg))
1590 local_irq_restore(flags);
1594 /* FIXME - should return -1 */
1595 EXPORT_SYMBOL(edac_mc_find_csrow_by_page);
1597 int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,
1600 struct csrow_info *csrows = mci->csrows;
1603 debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
1606 for (i = 0; i < mci->nr_csrows; i++) {
1607 struct csrow_info *csrow = &csrows[i];
1609 if (csrow->nr_pages == 0)
1612 debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
1613 "mask(0x%lx)\n", mci->mc_idx, __func__,
1614 csrow->first_page, page, csrow->last_page,
1617 if ((page >= csrow->first_page) &&
1618 (page <= csrow->last_page) &&
1619 ((page & csrow->page_mask) ==
1620 (csrow->first_page & csrow->page_mask))) {
1627 edac_mc_printk(mci, KERN_ERR,
1628 "could not look up page error address %lx\n",
1629 (unsigned long) page);
1635 EXPORT_SYMBOL(edac_mc_handle_ce);
1637 /* FIXME - setable log (warning/emerg) levels */
1638 /* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */
1639 void edac_mc_handle_ce(struct mem_ctl_info *mci,
1640 unsigned long page_frame_number,
1641 unsigned long offset_in_page,
1642 unsigned long syndrome, int row, int channel,
1645 unsigned long remapped_page;
1647 debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
1649 /* FIXME - maybe make panic on INTERNAL ERROR an option */
1650 if (row >= mci->nr_csrows || row < 0) {
1651 /* something is wrong */
1652 edac_mc_printk(mci, KERN_ERR,
1653 "INTERNAL ERROR: row out of range "
1654 "(%d >= %d)\n", row, mci->nr_csrows);
1655 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
1658 if (channel >= mci->csrows[row].nr_channels || channel < 0) {
1659 /* something is wrong */
1660 edac_mc_printk(mci, KERN_ERR,
1661 "INTERNAL ERROR: channel out of range "
1662 "(%d >= %d)\n", channel,
1663 mci->csrows[row].nr_channels);
1664 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
1669 /* FIXME - put in DIMM location */
1670 edac_mc_printk(mci, KERN_WARNING,
1671 "CE page 0x%lx, offset 0x%lx, grain %d, syndrome "
1672 "0x%lx, row %d, channel %d, label \"%s\": %s\n",
1673 page_frame_number, offset_in_page,
1674 mci->csrows[row].grain, syndrome, row, channel,
1675 mci->csrows[row].channels[channel].label, msg);
1678 mci->csrows[row].ce_count++;
1679 mci->csrows[row].channels[channel].ce_count++;
1681 if (mci->scrub_mode & SCRUB_SW_SRC) {
1683 * Some MC's can remap memory so that it is still available
1684 * at a different address when PCI devices map into memory.
1685 * MC's that can't do this lose the memory where PCI devices
1686 * are mapped. This mapping is MC dependant and so we call
1687 * back into the MC driver for it to map the MC page to
1688 * a physical (CPU) page which can then be mapped to a virtual
1689 * page - which can then be scrubbed.
1691 remapped_page = mci->ctl_page_to_phys ?
1692 mci->ctl_page_to_phys(mci, page_frame_number) :
1695 edac_mc_scrub_block(remapped_page, offset_in_page,
1696 mci->csrows[row].grain);
1701 EXPORT_SYMBOL(edac_mc_handle_ce_no_info);
1703 void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci,
1707 edac_mc_printk(mci, KERN_WARNING,
1708 "CE - no information available: %s\n", msg);
1709 mci->ce_noinfo_count++;
1714 EXPORT_SYMBOL(edac_mc_handle_ue);
1716 void edac_mc_handle_ue(struct mem_ctl_info *mci,
1717 unsigned long page_frame_number,
1718 unsigned long offset_in_page, int row,
1721 int len = EDAC_MC_LABEL_LEN * 4;
1722 char labels[len + 1];
1727 debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
1729 /* FIXME - maybe make panic on INTERNAL ERROR an option */
1730 if (row >= mci->nr_csrows || row < 0) {
1731 /* something is wrong */
1732 edac_mc_printk(mci, KERN_ERR,
1733 "INTERNAL ERROR: row out of range "
1734 "(%d >= %d)\n", row, mci->nr_csrows);
1735 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
1739 chars = snprintf(pos, len + 1, "%s",
1740 mci->csrows[row].channels[0].label);
1743 for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0);
1745 chars = snprintf(pos, len + 1, ":%s",
1746 mci->csrows[row].channels[chan].label);
1752 edac_mc_printk(mci, KERN_EMERG,
1753 "UE page 0x%lx, offset 0x%lx, grain %d, row %d, "
1754 "labels \"%s\": %s\n", page_frame_number,
1755 offset_in_page, mci->csrows[row].grain, row, labels,
1760 ("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, row %d,"
1761 " labels \"%s\": %s\n", mci->mc_idx,
1762 page_frame_number, offset_in_page,
1763 mci->csrows[row].grain, row, labels, msg);
1766 mci->csrows[row].ue_count++;
1770 EXPORT_SYMBOL(edac_mc_handle_ue_no_info);
1772 void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci,
1776 panic("EDAC MC%d: Uncorrected Error", mci->mc_idx);
1779 edac_mc_printk(mci, KERN_WARNING,
1780 "UE - no information available: %s\n", msg);
1781 mci->ue_noinfo_count++;
1788 static u16 get_pci_parity_status(struct pci_dev *dev, int secondary)
1793 where = secondary ? PCI_SEC_STATUS : PCI_STATUS;
1794 pci_read_config_word(dev, where, &status);
1796 /* If we get back 0xFFFF then we must suspect that the card has been pulled but
1797 the Linux PCI layer has not yet finished cleaning up. We don't want to report
1800 if (status == 0xFFFF) {
1802 pci_read_config_dword(dev, 0, &sanity);
1803 if (sanity == 0xFFFFFFFF)
1806 status &= PCI_STATUS_DETECTED_PARITY | PCI_STATUS_SIG_SYSTEM_ERROR |
1810 /* reset only the bits we are interested in */
1811 pci_write_config_word(dev, where, status);
1816 typedef void (*pci_parity_check_fn_t) (struct pci_dev *dev);
1818 /* Clear any PCI parity errors logged by this device. */
1819 static void edac_pci_dev_parity_clear( struct pci_dev *dev )
1823 get_pci_parity_status(dev, 0);
1825 /* read the device TYPE, looking for bridges */
1826 pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
1828 if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE)
1829 get_pci_parity_status(dev, 1);
1833 * PCI Parity polling
1836 static void edac_pci_dev_parity_test(struct pci_dev *dev)
1841 /* read the STATUS register on this device
1843 status = get_pci_parity_status(dev, 0);
1845 debugf2("PCI STATUS= 0x%04x %s\n", status, dev->dev.bus_id );
1847 /* check the status reg for errors */
1849 if (status & (PCI_STATUS_SIG_SYSTEM_ERROR))
1850 edac_printk(KERN_CRIT, EDAC_PCI,
1851 "Signaled System Error on %s\n",
1854 if (status & (PCI_STATUS_PARITY)) {
1855 edac_printk(KERN_CRIT, EDAC_PCI,
1856 "Master Data Parity Error on %s\n",
1859 atomic_inc(&pci_parity_count);
1862 if (status & (PCI_STATUS_DETECTED_PARITY)) {
1863 edac_printk(KERN_CRIT, EDAC_PCI,
1864 "Detected Parity Error on %s\n",
1867 atomic_inc(&pci_parity_count);
1871 /* read the device TYPE, looking for bridges */
1872 pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
1874 debugf2("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev->dev.bus_id );
1876 if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) {
1877 /* On bridges, need to examine secondary status register */
1878 status = get_pci_parity_status(dev, 1);
1880 debugf2("PCI SEC_STATUS= 0x%04x %s\n",
1881 status, dev->dev.bus_id );
1883 /* check the secondary status reg for errors */
1885 if (status & (PCI_STATUS_SIG_SYSTEM_ERROR))
1886 edac_printk(KERN_CRIT, EDAC_PCI, "Bridge "
1887 "Signaled System Error on %s\n",
1890 if (status & (PCI_STATUS_PARITY)) {
1891 edac_printk(KERN_CRIT, EDAC_PCI, "Bridge "
1892 "Master Data Parity Error on "
1893 "%s\n", pci_name(dev));
1895 atomic_inc(&pci_parity_count);
1898 if (status & (PCI_STATUS_DETECTED_PARITY)) {
1899 edac_printk(KERN_CRIT, EDAC_PCI, "Bridge "
1900 "Detected Parity Error on %s\n",
1903 atomic_inc(&pci_parity_count);
1910 * check_dev_on_list: Scan for a PCI device on a white/black list
1911 * @list: an EDAC &edac_pci_device_list white/black list pointer
1912 * @free_index: index of next free entry on the list
1913 * @pci_dev: PCI Device pointer
1915 * see if list contains the device.
1917 * Returns: 0 not found
1920 static int check_dev_on_list(struct edac_pci_device_list *list, int free_index,
1921 struct pci_dev *dev)
1924 int rc = 0; /* Assume not found */
1925 unsigned short vendor=dev->vendor;
1926 unsigned short device=dev->device;
1928 /* Scan the list, looking for a vendor/device match
1930 for (i = 0; i < free_index; i++, list++ ) {
1931 if ( (list->vendor == vendor ) &&
1932 (list->device == device )) {
1942 * pci_dev parity list iterator
1943 * Scan the PCI device list for one iteration, looking for SERRORs
1944 * Master Parity ERRORS or Parity ERRORs on primary or secondary devices
1946 static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn)
1948 struct pci_dev *dev=NULL;
1950 /* request for kernel access to the next PCI device, if any,
1951 * and while we are looking at it have its reference count
1952 * bumped until we are done with it
1954 while((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
1956 /* if whitelist exists then it has priority, so only scan those
1957 * devices on the whitelist
1959 if (pci_whitelist_count > 0 ) {
1960 if (check_dev_on_list(pci_whitelist,
1961 pci_whitelist_count, dev))
1965 * if no whitelist, then check if this devices is
1968 if (!check_dev_on_list(pci_blacklist,
1969 pci_blacklist_count, dev))
1975 static void do_pci_parity_check(void)
1977 unsigned long flags;
1980 debugf3("%s()\n", __func__);
1982 if (!check_pci_parity)
1985 before_count = atomic_read(&pci_parity_count);
1987 /* scan all PCI devices looking for a Parity Error on devices and
1990 local_irq_save(flags);
1991 edac_pci_dev_parity_iterator(edac_pci_dev_parity_test);
1992 local_irq_restore(flags);
1994 /* Only if operator has selected panic on PCI Error */
1995 if (panic_on_pci_parity) {
1996 /* If the count is different 'after' from 'before' */
1997 if (before_count != atomic_read(&pci_parity_count))
1998 panic("EDAC: PCI Parity Error");
2003 static inline void clear_pci_parity_errors(void)
2005 /* Clear any PCI bus parity errors that devices initially have logged
2006 * in their registers.
2008 edac_pci_dev_parity_iterator(edac_pci_dev_parity_clear);
2012 #else /* CONFIG_PCI */
2015 static inline void do_pci_parity_check(void)
2021 static inline void clear_pci_parity_errors(void)
2027 #endif /* CONFIG_PCI */
2030 * Iterate over all MC instances and check for ECC, et al, errors
2032 static inline void check_mc_devices (void)
2034 unsigned long flags;
2035 struct list_head *item;
2036 struct mem_ctl_info *mci;
2038 debugf3("%s()\n", __func__);
2040 /* during poll, have interrupts off */
2041 local_irq_save(flags);
2043 list_for_each(item, &mc_devices) {
2044 mci = list_entry(item, struct mem_ctl_info, link);
2046 if (mci->edac_check != NULL)
2047 mci->edac_check(mci);
2050 local_irq_restore(flags);
2055 * Check MC status every poll_msec.
2056 * Check PCI status every poll_msec as well.
2058 * This where the work gets done for edac.
2060 * SMP safe, doesn't use NMI, and auto-rate-limits.
2062 static void do_edac_check(void)
2064 debugf3("%s()\n", __func__);
2066 do_pci_parity_check();
2069 static int edac_kernel_thread(void *arg)
2071 while (!kthread_should_stop()) {
2074 /* goto sleep for the interval */
2075 schedule_timeout_interruptible((HZ * poll_msec) / 1000);
2084 * module initialization entry point
2086 static int __init edac_mc_init(void)
2088 edac_printk(KERN_INFO, EDAC_MC, EDAC_MC_VERSION "\n");
2091 * Harvest and clear any boot/initialization PCI parity errors
2093 * FIXME: This only clears errors logged by devices present at time of
2094 * module initialization. We should also do an initial clear
2095 * of each newly hotplugged device.
2097 clear_pci_parity_errors();
2099 /* Create the MC sysfs entires */
2100 if (edac_sysfs_memctrl_setup()) {
2101 edac_printk(KERN_ERR, EDAC_MC,
2102 "Error initializing sysfs code\n");
2106 /* Create the PCI parity sysfs entries */
2107 if (edac_sysfs_pci_setup()) {
2108 edac_sysfs_memctrl_teardown();
2109 edac_printk(KERN_ERR, EDAC_MC,
2110 "EDAC PCI: Error initializing sysfs code\n");
2114 /* create our kernel thread */
2115 edac_thread = kthread_run(edac_kernel_thread, NULL, "kedac");
2116 if (IS_ERR(edac_thread)) {
2117 /* remove the sysfs entries */
2118 edac_sysfs_memctrl_teardown();
2119 edac_sysfs_pci_teardown();
2120 return PTR_ERR(edac_thread);
2129 * module exit/termination functioni
2131 static void __exit edac_mc_exit(void)
2133 debugf0("%s()\n", __func__);
2135 kthread_stop(edac_thread);
2137 /* tear down the sysfs device */
2138 edac_sysfs_memctrl_teardown();
2139 edac_sysfs_pci_teardown();
2145 module_init(edac_mc_init);
2146 module_exit(edac_mc_exit);
2148 MODULE_LICENSE("GPL");
2149 MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh et al\n"
2150 "Based on.work by Dan Hollis et al");
2151 MODULE_DESCRIPTION("Core library routines for MC reporting");
2153 module_param(panic_on_ue, int, 0644);
2154 MODULE_PARM_DESC(panic_on_ue, "Panic on uncorrected error: 0=off 1=on");
2155 module_param(check_pci_parity, int, 0644);
2156 MODULE_PARM_DESC(check_pci_parity, "Check for PCI bus parity errors: 0=off 1=on");
2157 module_param(panic_on_pci_parity, int, 0644);
2158 MODULE_PARM_DESC(panic_on_pci_parity, "Panic on PCI Bus Parity error: 0=off 1=on");
2159 module_param(log_ue, int, 0644);
2160 MODULE_PARM_DESC(log_ue, "Log uncorrectable error to console: 0=off 1=on");
2161 module_param(log_ce, int, 0644);
2162 MODULE_PARM_DESC(log_ce, "Log correctable error to console: 0=off 1=on");
2163 module_param(poll_msec, int, 0644);
2164 MODULE_PARM_DESC(poll_msec, "Polling period in milliseconds");
2165 #ifdef CONFIG_EDAC_DEBUG
2166 module_param(edac_debug_level, int, 0644);
2167 MODULE_PARM_DESC(edac_debug_level, "Debug level");