Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
[linux-2.6-microblaze.git] / drivers / edac / i7core_edac.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Intel i7 core/Nehalem Memory Controller kernel module
3  *
4  * This driver supports the memory controllers found on the Intel
5  * processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx,
6  * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield
7  * and Westmere-EP.
8  *
9  * Copyright (c) 2009-2010 by:
10  *       Mauro Carvalho Chehab
11  *
12  * Red Hat Inc. https://www.redhat.com
13  *
14  * Forked and adapted from the i5400_edac driver
15  *
16  * Based on the following public Intel datasheets:
17  * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
18  * Datasheet, Volume 2:
19  *      http://download.intel.com/design/processor/datashts/320835.pdf
20  * Intel Xeon Processor 5500 Series Datasheet Volume 2
21  *      http://www.intel.com/Assets/PDF/datasheet/321322.pdf
22  * also available at:
23  *      http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
24  */
25
26 #include <linux/module.h>
27 #include <linux/init.h>
28 #include <linux/pci.h>
29 #include <linux/pci_ids.h>
30 #include <linux/slab.h>
31 #include <linux/delay.h>
32 #include <linux/dmi.h>
33 #include <linux/edac.h>
34 #include <linux/mmzone.h>
35 #include <linux/smp.h>
36 #include <asm/mce.h>
37 #include <asm/processor.h>
38 #include <asm/div64.h>
39
40 #include "edac_module.h"
41
42 /* Static vars */
43 static LIST_HEAD(i7core_edac_list);
44 static DEFINE_MUTEX(i7core_edac_lock);
45 static int probed;
46
47 static int use_pci_fixup;
48 module_param(use_pci_fixup, int, 0444);
49 MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
50 /*
51  * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
52  * registers start at bus 255, and are not reported by BIOS.
53  * We currently find devices with only 2 sockets. In order to support more QPI
54  * Quick Path Interconnect, just increment this number.
55  */
56 #define MAX_SOCKET_BUSES        2
57
58
59 /*
60  * Alter this version for the module when modifications are made
61  */
62 #define I7CORE_REVISION    " Ver: 1.0.0"
63 #define EDAC_MOD_STR      "i7core_edac"
64
65 /*
66  * Debug macros
67  */
68 #define i7core_printk(level, fmt, arg...)                       \
69         edac_printk(level, "i7core", fmt, ##arg)
70
71 #define i7core_mc_printk(mci, level, fmt, arg...)               \
72         edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
73
74 /*
75  * i7core Memory Controller Registers
76  */
77
78         /* OFFSETS for Device 0 Function 0 */
79
80 #define MC_CFG_CONTROL  0x90
81   #define MC_CFG_UNLOCK         0x02
82   #define MC_CFG_LOCK           0x00
83
84         /* OFFSETS for Device 3 Function 0 */
85
86 #define MC_CONTROL      0x48
87 #define MC_STATUS       0x4c
88 #define MC_MAX_DOD      0x64
89
90 /*
91  * OFFSETS for Device 3 Function 4, as indicated on Xeon 5500 datasheet:
92  * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
93  */
94
95 #define MC_TEST_ERR_RCV1        0x60
96   #define DIMM2_COR_ERR(r)                      ((r) & 0x7fff)
97
98 #define MC_TEST_ERR_RCV0        0x64
99   #define DIMM1_COR_ERR(r)                      (((r) >> 16) & 0x7fff)
100   #define DIMM0_COR_ERR(r)                      ((r) & 0x7fff)
101
102 /* OFFSETS for Device 3 Function 2, as indicated on Xeon 5500 datasheet */
103 #define MC_SSRCONTROL           0x48
104   #define SSR_MODE_DISABLE      0x00
105   #define SSR_MODE_ENABLE       0x01
106   #define SSR_MODE_MASK         0x03
107
108 #define MC_SCRUB_CONTROL        0x4c
109   #define STARTSCRUB            (1 << 24)
110   #define SCRUBINTERVAL_MASK    0xffffff
111
112 #define MC_COR_ECC_CNT_0        0x80
113 #define MC_COR_ECC_CNT_1        0x84
114 #define MC_COR_ECC_CNT_2        0x88
115 #define MC_COR_ECC_CNT_3        0x8c
116 #define MC_COR_ECC_CNT_4        0x90
117 #define MC_COR_ECC_CNT_5        0x94
118
119 #define DIMM_TOP_COR_ERR(r)                     (((r) >> 16) & 0x7fff)
120 #define DIMM_BOT_COR_ERR(r)                     ((r) & 0x7fff)
121
122
123         /* OFFSETS for Devices 4,5 and 6 Function 0 */
124
125 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
126   #define THREE_DIMMS_PRESENT           (1 << 24)
127   #define SINGLE_QUAD_RANK_PRESENT      (1 << 23)
128   #define QUAD_RANK_PRESENT             (1 << 22)
129   #define REGISTERED_DIMM               (1 << 15)
130
131 #define MC_CHANNEL_MAPPER       0x60
132   #define RDLCH(r, ch)          ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
133   #define WRLCH(r, ch)          ((((r) >> (ch * 6)) & 0x07) - 1)
134
135 #define MC_CHANNEL_RANK_PRESENT 0x7c
136   #define RANK_PRESENT_MASK             0xffff
137
138 #define MC_CHANNEL_ADDR_MATCH   0xf0
139 #define MC_CHANNEL_ERROR_MASK   0xf8
140 #define MC_CHANNEL_ERROR_INJECT 0xfc
141   #define INJECT_ADDR_PARITY    0x10
142   #define INJECT_ECC            0x08
143   #define MASK_CACHELINE        0x06
144   #define MASK_FULL_CACHELINE   0x06
145   #define MASK_MSB32_CACHELINE  0x04
146   #define MASK_LSB32_CACHELINE  0x02
147   #define NO_MASK_CACHELINE     0x00
148   #define REPEAT_EN             0x01
149
150         /* OFFSETS for Devices 4,5 and 6 Function 1 */
151
152 #define MC_DOD_CH_DIMM0         0x48
153 #define MC_DOD_CH_DIMM1         0x4c
154 #define MC_DOD_CH_DIMM2         0x50
155   #define RANKOFFSET_MASK       ((1 << 12) | (1 << 11) | (1 << 10))
156   #define RANKOFFSET(x)         ((x & RANKOFFSET_MASK) >> 10)
157   #define DIMM_PRESENT_MASK     (1 << 9)
158   #define DIMM_PRESENT(x)       (((x) & DIMM_PRESENT_MASK) >> 9)
159   #define MC_DOD_NUMBANK_MASK           ((1 << 8) | (1 << 7))
160   #define MC_DOD_NUMBANK(x)             (((x) & MC_DOD_NUMBANK_MASK) >> 7)
161   #define MC_DOD_NUMRANK_MASK           ((1 << 6) | (1 << 5))
162   #define MC_DOD_NUMRANK(x)             (((x) & MC_DOD_NUMRANK_MASK) >> 5)
163   #define MC_DOD_NUMROW_MASK            ((1 << 4) | (1 << 3) | (1 << 2))
164   #define MC_DOD_NUMROW(x)              (((x) & MC_DOD_NUMROW_MASK) >> 2)
165   #define MC_DOD_NUMCOL_MASK            3
166   #define MC_DOD_NUMCOL(x)              ((x) & MC_DOD_NUMCOL_MASK)
167
168 #define MC_RANK_PRESENT         0x7c
169
170 #define MC_SAG_CH_0     0x80
171 #define MC_SAG_CH_1     0x84
172 #define MC_SAG_CH_2     0x88
173 #define MC_SAG_CH_3     0x8c
174 #define MC_SAG_CH_4     0x90
175 #define MC_SAG_CH_5     0x94
176 #define MC_SAG_CH_6     0x98
177 #define MC_SAG_CH_7     0x9c
178
179 #define MC_RIR_LIMIT_CH_0       0x40
180 #define MC_RIR_LIMIT_CH_1       0x44
181 #define MC_RIR_LIMIT_CH_2       0x48
182 #define MC_RIR_LIMIT_CH_3       0x4C
183 #define MC_RIR_LIMIT_CH_4       0x50
184 #define MC_RIR_LIMIT_CH_5       0x54
185 #define MC_RIR_LIMIT_CH_6       0x58
186 #define MC_RIR_LIMIT_CH_7       0x5C
187 #define MC_RIR_LIMIT_MASK       ((1 << 10) - 1)
188
189 #define MC_RIR_WAY_CH           0x80
190   #define MC_RIR_WAY_OFFSET_MASK        (((1 << 14) - 1) & ~0x7)
191   #define MC_RIR_WAY_RANK_MASK          0x7
192
193 /*
194  * i7core structs
195  */
196
197 #define NUM_CHANS 3
198 #define MAX_DIMMS 3             /* Max DIMMS per channel */
199 #define MAX_MCR_FUNC  4
200 #define MAX_CHAN_FUNC 3
201
202 struct i7core_info {
203         u32     mc_control;
204         u32     mc_status;
205         u32     max_dod;
206         u32     ch_map;
207 };
208
209
210 struct i7core_inject {
211         int     enable;
212
213         u32     section;
214         u32     type;
215         u32     eccmask;
216
217         /* Error address mask */
218         int channel, dimm, rank, bank, page, col;
219 };
220
221 struct i7core_channel {
222         bool            is_3dimms_present;
223         bool            is_single_4rank;
224         bool            has_4rank;
225         u32             dimms;
226 };
227
228 struct pci_id_descr {
229         int                     dev;
230         int                     func;
231         int                     dev_id;
232         int                     optional;
233 };
234
235 struct pci_id_table {
236         const struct pci_id_descr       *descr;
237         int                             n_devs;
238 };
239
240 struct i7core_dev {
241         struct list_head        list;
242         u8                      socket;
243         struct pci_dev          **pdev;
244         int                     n_devs;
245         struct mem_ctl_info     *mci;
246 };
247
248 struct i7core_pvt {
249         struct device *addrmatch_dev, *chancounts_dev;
250
251         struct pci_dev  *pci_noncore;
252         struct pci_dev  *pci_mcr[MAX_MCR_FUNC + 1];
253         struct pci_dev  *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
254
255         struct i7core_dev *i7core_dev;
256
257         struct i7core_info      info;
258         struct i7core_inject    inject;
259         struct i7core_channel   channel[NUM_CHANS];
260
261         int             ce_count_available;
262
263                         /* ECC corrected errors counts per udimm */
264         unsigned long   udimm_ce_count[MAX_DIMMS];
265         int             udimm_last_ce_count[MAX_DIMMS];
266                         /* ECC corrected errors counts per rdimm */
267         unsigned long   rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
268         int             rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
269
270         bool            is_registered, enable_scrub;
271
272         /* DCLK Frequency used for computing scrub rate */
273         int                     dclk_freq;
274
275         /* Struct to control EDAC polling */
276         struct edac_pci_ctl_info *i7core_pci;
277 };
278
279 #define PCI_DESCR(device, function, device_id)  \
280         .dev = (device),                        \
281         .func = (function),                     \
282         .dev_id = (device_id)
283
284 static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
285                 /* Memory controller */
286         { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR)     },
287         { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD)  },
288                         /* Exists only for RDIMM */
289         { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1  },
290         { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
291
292                 /* Channel 0 */
293         { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
294         { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
295         { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
296         { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC)   },
297
298                 /* Channel 1 */
299         { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
300         { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
301         { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
302         { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC)   },
303
304                 /* Channel 2 */
305         { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
306         { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
307         { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
308         { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC)   },
309
310                 /* Generic Non-core registers */
311         /*
312          * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
313          * On Xeon 55xx, however, it has a different id (8086:2c40). So,
314          * the probing code needs to test for the other address in case of
315          * failure of this one
316          */
317         { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE)  },
318
319 };
320
321 static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
322         { PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR)         },
323         { PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD)      },
324         { PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST)     },
325
326         { PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
327         { PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
328         { PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
329         { PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC)   },
330
331         { PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
332         { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
333         { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
334         { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC)   },
335
336         /*
337          * This is the PCI device has an alternate address on some
338          * processors like Core i7 860
339          */
340         { PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE)     },
341 };
342
343 static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
344                 /* Memory controller */
345         { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR_REV2)     },
346         { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD_REV2)  },
347                         /* Exists only for RDIMM */
348         { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2), .optional = 1  },
349         { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST_REV2) },
350
351                 /* Channel 0 */
352         { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL_REV2) },
353         { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR_REV2) },
354         { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK_REV2) },
355         { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC_REV2)   },
356
357                 /* Channel 1 */
358         { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL_REV2) },
359         { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR_REV2) },
360         { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK_REV2) },
361         { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC_REV2)   },
362
363                 /* Channel 2 */
364         { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_CTRL_REV2) },
365         { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
366         { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
367         { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2)   },
368
369                 /* Generic Non-core registers */
370         { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2)  },
371
372 };
373
374 #define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
375 static const struct pci_id_table pci_dev_table[] = {
376         PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem),
377         PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield),
378         PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere),
379         {0,}                    /* 0 terminated list. */
380 };
381
382 /*
383  *      pci_device_id   table for which devices we are looking for
384  */
385 static const struct pci_device_id i7core_pci_tbl[] = {
386         {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
387         {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
388         {0,}                    /* 0 terminated list. */
389 };
390
391 /****************************************************************************
392                         Ancillary status routines
393  ****************************************************************************/
394
395         /* MC_CONTROL bits */
396 #define CH_ACTIVE(pvt, ch)      ((pvt)->info.mc_control & (1 << (8 + ch)))
397 #define ECCx8(pvt)              ((pvt)->info.mc_control & (1 << 1))
398
399         /* MC_STATUS bits */
400 #define ECC_ENABLED(pvt)        ((pvt)->info.mc_status & (1 << 4))
401 #define CH_DISABLED(pvt, ch)    ((pvt)->info.mc_status & (1 << ch))
402
403         /* MC_MAX_DOD read functions */
404 static inline int numdimms(u32 dimms)
405 {
406         return (dimms & 0x3) + 1;
407 }
408
409 static inline int numrank(u32 rank)
410 {
411         static const int ranks[] = { 1, 2, 4, -EINVAL };
412
413         return ranks[rank & 0x3];
414 }
415
416 static inline int numbank(u32 bank)
417 {
418         static const int banks[] = { 4, 8, 16, -EINVAL };
419
420         return banks[bank & 0x3];
421 }
422
423 static inline int numrow(u32 row)
424 {
425         static const int rows[] = {
426                 1 << 12, 1 << 13, 1 << 14, 1 << 15,
427                 1 << 16, -EINVAL, -EINVAL, -EINVAL,
428         };
429
430         return rows[row & 0x7];
431 }
432
433 static inline int numcol(u32 col)
434 {
435         static const int cols[] = {
436                 1 << 10, 1 << 11, 1 << 12, -EINVAL,
437         };
438         return cols[col & 0x3];
439 }
440
441 static struct i7core_dev *get_i7core_dev(u8 socket)
442 {
443         struct i7core_dev *i7core_dev;
444
445         list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
446                 if (i7core_dev->socket == socket)
447                         return i7core_dev;
448         }
449
450         return NULL;
451 }
452
453 static struct i7core_dev *alloc_i7core_dev(u8 socket,
454                                            const struct pci_id_table *table)
455 {
456         struct i7core_dev *i7core_dev;
457
458         i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
459         if (!i7core_dev)
460                 return NULL;
461
462         i7core_dev->pdev = kcalloc(table->n_devs, sizeof(*i7core_dev->pdev),
463                                    GFP_KERNEL);
464         if (!i7core_dev->pdev) {
465                 kfree(i7core_dev);
466                 return NULL;
467         }
468
469         i7core_dev->socket = socket;
470         i7core_dev->n_devs = table->n_devs;
471         list_add_tail(&i7core_dev->list, &i7core_edac_list);
472
473         return i7core_dev;
474 }
475
476 static void free_i7core_dev(struct i7core_dev *i7core_dev)
477 {
478         list_del(&i7core_dev->list);
479         kfree(i7core_dev->pdev);
480         kfree(i7core_dev);
481 }
482
483 /****************************************************************************
484                         Memory check routines
485  ****************************************************************************/
486
487 static int get_dimm_config(struct mem_ctl_info *mci)
488 {
489         struct i7core_pvt *pvt = mci->pvt_info;
490         struct pci_dev *pdev;
491         int i, j;
492         enum edac_type mode;
493         enum mem_type mtype;
494         struct dimm_info *dimm;
495
496         /* Get data from the MC register, function 0 */
497         pdev = pvt->pci_mcr[0];
498         if (!pdev)
499                 return -ENODEV;
500
501         /* Device 3 function 0 reads */
502         pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
503         pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
504         pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
505         pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
506
507         edac_dbg(0, "QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
508                  pvt->i7core_dev->socket, pvt->info.mc_control,
509                  pvt->info.mc_status, pvt->info.max_dod, pvt->info.ch_map);
510
511         if (ECC_ENABLED(pvt)) {
512                 edac_dbg(0, "ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
513                 if (ECCx8(pvt))
514                         mode = EDAC_S8ECD8ED;
515                 else
516                         mode = EDAC_S4ECD4ED;
517         } else {
518                 edac_dbg(0, "ECC disabled\n");
519                 mode = EDAC_NONE;
520         }
521
522         /* FIXME: need to handle the error codes */
523         edac_dbg(0, "DOD Max limits: DIMMS: %d, %d-ranked, %d-banked x%x x 0x%x\n",
524                  numdimms(pvt->info.max_dod),
525                  numrank(pvt->info.max_dod >> 2),
526                  numbank(pvt->info.max_dod >> 4),
527                  numrow(pvt->info.max_dod >> 6),
528                  numcol(pvt->info.max_dod >> 9));
529
530         for (i = 0; i < NUM_CHANS; i++) {
531                 u32 data, dimm_dod[3], value[8];
532
533                 if (!pvt->pci_ch[i][0])
534                         continue;
535
536                 if (!CH_ACTIVE(pvt, i)) {
537                         edac_dbg(0, "Channel %i is not active\n", i);
538                         continue;
539                 }
540                 if (CH_DISABLED(pvt, i)) {
541                         edac_dbg(0, "Channel %i is disabled\n", i);
542                         continue;
543                 }
544
545                 /* Devices 4-6 function 0 */
546                 pci_read_config_dword(pvt->pci_ch[i][0],
547                                 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
548
549
550                 if (data & THREE_DIMMS_PRESENT)
551                         pvt->channel[i].is_3dimms_present = true;
552
553                 if (data & SINGLE_QUAD_RANK_PRESENT)
554                         pvt->channel[i].is_single_4rank = true;
555
556                 if (data & QUAD_RANK_PRESENT)
557                         pvt->channel[i].has_4rank = true;
558
559                 if (data & REGISTERED_DIMM)
560                         mtype = MEM_RDDR3;
561                 else
562                         mtype = MEM_DDR3;
563
564                 /* Devices 4-6 function 1 */
565                 pci_read_config_dword(pvt->pci_ch[i][1],
566                                 MC_DOD_CH_DIMM0, &dimm_dod[0]);
567                 pci_read_config_dword(pvt->pci_ch[i][1],
568                                 MC_DOD_CH_DIMM1, &dimm_dod[1]);
569                 pci_read_config_dword(pvt->pci_ch[i][1],
570                                 MC_DOD_CH_DIMM2, &dimm_dod[2]);
571
572                 edac_dbg(0, "Ch%d phy rd%d, wr%d (0x%08x): %s%s%s%cDIMMs\n",
573                          i,
574                          RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
575                          data,
576                          pvt->channel[i].is_3dimms_present ? "3DIMMS " : "",
577                          pvt->channel[i].is_3dimms_present ? "SINGLE_4R " : "",
578                          pvt->channel[i].has_4rank ? "HAS_4R " : "",
579                          (data & REGISTERED_DIMM) ? 'R' : 'U');
580
581                 for (j = 0; j < 3; j++) {
582                         u32 banks, ranks, rows, cols;
583                         u32 size, npages;
584
585                         if (!DIMM_PRESENT(dimm_dod[j]))
586                                 continue;
587
588                         dimm = edac_get_dimm(mci, i, j, 0);
589                         banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
590                         ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
591                         rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
592                         cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
593
594                         /* DDR3 has 8 I/O banks */
595                         size = (rows * cols * banks * ranks) >> (20 - 3);
596
597                         edac_dbg(0, "\tdimm %d %d MiB offset: %x, bank: %d, rank: %d, row: %#x, col: %#x\n",
598                                  j, size,
599                                  RANKOFFSET(dimm_dod[j]),
600                                  banks, ranks, rows, cols);
601
602                         npages = MiB_TO_PAGES(size);
603
604                         dimm->nr_pages = npages;
605
606                         switch (banks) {
607                         case 4:
608                                 dimm->dtype = DEV_X4;
609                                 break;
610                         case 8:
611                                 dimm->dtype = DEV_X8;
612                                 break;
613                         case 16:
614                                 dimm->dtype = DEV_X16;
615                                 break;
616                         default:
617                                 dimm->dtype = DEV_UNKNOWN;
618                         }
619
620                         snprintf(dimm->label, sizeof(dimm->label),
621                                  "CPU#%uChannel#%u_DIMM#%u",
622                                  pvt->i7core_dev->socket, i, j);
623                         dimm->grain = 8;
624                         dimm->edac_mode = mode;
625                         dimm->mtype = mtype;
626                 }
627
628                 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
629                 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
630                 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
631                 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
632                 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
633                 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
634                 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
635                 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
636                 edac_dbg(1, "\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
637                 for (j = 0; j < 8; j++)
638                         edac_dbg(1, "\t\t%#x\t%#x\t%#x\n",
639                                  (value[j] >> 27) & 0x1,
640                                  (value[j] >> 24) & 0x7,
641                                  (value[j] & ((1 << 24) - 1)));
642         }
643
644         return 0;
645 }
646
647 /****************************************************************************
648                         Error insertion routines
649  ****************************************************************************/
650
651 #define to_mci(k) container_of(k, struct mem_ctl_info, dev)
652
653 /* The i7core has independent error injection features per channel.
654    However, to have a simpler code, we don't allow enabling error injection
655    on more than one channel.
656    Also, since a change at an inject parameter will be applied only at enable,
657    we're disabling error injection on all write calls to the sysfs nodes that
658    controls the error code injection.
659  */
660 static int disable_inject(const struct mem_ctl_info *mci)
661 {
662         struct i7core_pvt *pvt = mci->pvt_info;
663
664         pvt->inject.enable = 0;
665
666         if (!pvt->pci_ch[pvt->inject.channel][0])
667                 return -ENODEV;
668
669         pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
670                                 MC_CHANNEL_ERROR_INJECT, 0);
671
672         return 0;
673 }
674
675 /*
676  * i7core inject inject.section
677  *
678  *      accept and store error injection inject.section value
679  *      bit 0 - refers to the lower 32-byte half cacheline
680  *      bit 1 - refers to the upper 32-byte half cacheline
681  */
682 static ssize_t i7core_inject_section_store(struct device *dev,
683                                            struct device_attribute *mattr,
684                                            const char *data, size_t count)
685 {
686         struct mem_ctl_info *mci = to_mci(dev);
687         struct i7core_pvt *pvt = mci->pvt_info;
688         unsigned long value;
689         int rc;
690
691         if (pvt->inject.enable)
692                 disable_inject(mci);
693
694         rc = kstrtoul(data, 10, &value);
695         if ((rc < 0) || (value > 3))
696                 return -EIO;
697
698         pvt->inject.section = (u32) value;
699         return count;
700 }
701
702 static ssize_t i7core_inject_section_show(struct device *dev,
703                                           struct device_attribute *mattr,
704                                           char *data)
705 {
706         struct mem_ctl_info *mci = to_mci(dev);
707         struct i7core_pvt *pvt = mci->pvt_info;
708         return sprintf(data, "0x%08x\n", pvt->inject.section);
709 }
710
711 /*
712  * i7core inject.type
713  *
714  *      accept and store error injection inject.section value
715  *      bit 0 - repeat enable - Enable error repetition
716  *      bit 1 - inject ECC error
717  *      bit 2 - inject parity error
718  */
719 static ssize_t i7core_inject_type_store(struct device *dev,
720                                         struct device_attribute *mattr,
721                                         const char *data, size_t count)
722 {
723         struct mem_ctl_info *mci = to_mci(dev);
724         struct i7core_pvt *pvt = mci->pvt_info;
725         unsigned long value;
726         int rc;
727
728         if (pvt->inject.enable)
729                 disable_inject(mci);
730
731         rc = kstrtoul(data, 10, &value);
732         if ((rc < 0) || (value > 7))
733                 return -EIO;
734
735         pvt->inject.type = (u32) value;
736         return count;
737 }
738
739 static ssize_t i7core_inject_type_show(struct device *dev,
740                                        struct device_attribute *mattr,
741                                        char *data)
742 {
743         struct mem_ctl_info *mci = to_mci(dev);
744         struct i7core_pvt *pvt = mci->pvt_info;
745
746         return sprintf(data, "0x%08x\n", pvt->inject.type);
747 }
748
749 /*
750  * i7core_inject_inject.eccmask_store
751  *
752  * The type of error (UE/CE) will depend on the inject.eccmask value:
753  *   Any bits set to a 1 will flip the corresponding ECC bit
754  *   Correctable errors can be injected by flipping 1 bit or the bits within
755  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
756  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
757  *   uncorrectable error to be injected.
758  */
759 static ssize_t i7core_inject_eccmask_store(struct device *dev,
760                                            struct device_attribute *mattr,
761                                            const char *data, size_t count)
762 {
763         struct mem_ctl_info *mci = to_mci(dev);
764         struct i7core_pvt *pvt = mci->pvt_info;
765         unsigned long value;
766         int rc;
767
768         if (pvt->inject.enable)
769                 disable_inject(mci);
770
771         rc = kstrtoul(data, 10, &value);
772         if (rc < 0)
773                 return -EIO;
774
775         pvt->inject.eccmask = (u32) value;
776         return count;
777 }
778
779 static ssize_t i7core_inject_eccmask_show(struct device *dev,
780                                           struct device_attribute *mattr,
781                                           char *data)
782 {
783         struct mem_ctl_info *mci = to_mci(dev);
784         struct i7core_pvt *pvt = mci->pvt_info;
785
786         return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
787 }
788
789 /*
790  * i7core_addrmatch
791  *
792  * The type of error (UE/CE) will depend on the inject.eccmask value:
793  *   Any bits set to a 1 will flip the corresponding ECC bit
794  *   Correctable errors can be injected by flipping 1 bit or the bits within
795  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
796  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
797  *   uncorrectable error to be injected.
798  */
799
800 #define DECLARE_ADDR_MATCH(param, limit)                        \
801 static ssize_t i7core_inject_store_##param(                     \
802         struct device *dev,                                     \
803         struct device_attribute *mattr,                         \
804         const char *data, size_t count)                         \
805 {                                                               \
806         struct mem_ctl_info *mci = dev_get_drvdata(dev);        \
807         struct i7core_pvt *pvt;                                 \
808         long value;                                             \
809         int rc;                                                 \
810                                                                 \
811         edac_dbg(1, "\n");                                      \
812         pvt = mci->pvt_info;                                    \
813                                                                 \
814         if (pvt->inject.enable)                                 \
815                 disable_inject(mci);                            \
816                                                                 \
817         if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
818                 value = -1;                                     \
819         else {                                                  \
820                 rc = kstrtoul(data, 10, &value);                \
821                 if ((rc < 0) || (value >= limit))               \
822                         return -EIO;                            \
823         }                                                       \
824                                                                 \
825         pvt->inject.param = value;                              \
826                                                                 \
827         return count;                                           \
828 }                                                               \
829                                                                 \
830 static ssize_t i7core_inject_show_##param(                      \
831         struct device *dev,                                     \
832         struct device_attribute *mattr,                         \
833         char *data)                                             \
834 {                                                               \
835         struct mem_ctl_info *mci = dev_get_drvdata(dev);        \
836         struct i7core_pvt *pvt;                                 \
837                                                                 \
838         pvt = mci->pvt_info;                                    \
839         edac_dbg(1, "pvt=%p\n", pvt);                           \
840         if (pvt->inject.param < 0)                              \
841                 return sprintf(data, "any\n");                  \
842         else                                                    \
843                 return sprintf(data, "%d\n", pvt->inject.param);\
844 }
845
846 #define ATTR_ADDR_MATCH(param)                                  \
847         static DEVICE_ATTR(param, S_IRUGO | S_IWUSR,            \
848                     i7core_inject_show_##param,                 \
849                     i7core_inject_store_##param)
850
851 DECLARE_ADDR_MATCH(channel, 3);
852 DECLARE_ADDR_MATCH(dimm, 3);
853 DECLARE_ADDR_MATCH(rank, 4);
854 DECLARE_ADDR_MATCH(bank, 32);
855 DECLARE_ADDR_MATCH(page, 0x10000);
856 DECLARE_ADDR_MATCH(col, 0x4000);
857
858 ATTR_ADDR_MATCH(channel);
859 ATTR_ADDR_MATCH(dimm);
860 ATTR_ADDR_MATCH(rank);
861 ATTR_ADDR_MATCH(bank);
862 ATTR_ADDR_MATCH(page);
863 ATTR_ADDR_MATCH(col);
864
865 static int write_and_test(struct pci_dev *dev, const int where, const u32 val)
866 {
867         u32 read;
868         int count;
869
870         edac_dbg(0, "setting pci %02x:%02x.%x reg=%02x value=%08x\n",
871                  dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
872                  where, val);
873
874         for (count = 0; count < 10; count++) {
875                 if (count)
876                         msleep(100);
877                 pci_write_config_dword(dev, where, val);
878                 pci_read_config_dword(dev, where, &read);
879
880                 if (read == val)
881                         return 0;
882         }
883
884         i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
885                 "write=%08x. Read=%08x\n",
886                 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
887                 where, val, read);
888
889         return -EINVAL;
890 }
891
892 /*
893  * This routine prepares the Memory Controller for error injection.
894  * The error will be injected when some process tries to write to the
895  * memory that matches the given criteria.
896  * The criteria can be set in terms of a mask where dimm, rank, bank, page
897  * and col can be specified.
898  * A -1 value for any of the mask items will make the MCU to ignore
899  * that matching criteria for error injection.
900  *
901  * It should be noticed that the error will only happen after a write operation
902  * on a memory that matches the condition. if REPEAT_EN is not enabled at
903  * inject mask, then it will produce just one error. Otherwise, it will repeat
904  * until the injectmask would be cleaned.
905  *
906  * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
907  *    is reliable enough to check if the MC is using the
908  *    three channels. However, this is not clear at the datasheet.
909  */
910 static ssize_t i7core_inject_enable_store(struct device *dev,
911                                           struct device_attribute *mattr,
912                                           const char *data, size_t count)
913 {
914         struct mem_ctl_info *mci = to_mci(dev);
915         struct i7core_pvt *pvt = mci->pvt_info;
916         u32 injectmask;
917         u64 mask = 0;
918         int  rc;
919         long enable;
920
921         if (!pvt->pci_ch[pvt->inject.channel][0])
922                 return 0;
923
924         rc = kstrtoul(data, 10, &enable);
925         if ((rc < 0))
926                 return 0;
927
928         if (enable) {
929                 pvt->inject.enable = 1;
930         } else {
931                 disable_inject(mci);
932                 return count;
933         }
934
935         /* Sets pvt->inject.dimm mask */
936         if (pvt->inject.dimm < 0)
937                 mask |= 1LL << 41;
938         else {
939                 if (pvt->channel[pvt->inject.channel].dimms > 2)
940                         mask |= (pvt->inject.dimm & 0x3LL) << 35;
941                 else
942                         mask |= (pvt->inject.dimm & 0x1LL) << 36;
943         }
944
945         /* Sets pvt->inject.rank mask */
946         if (pvt->inject.rank < 0)
947                 mask |= 1LL << 40;
948         else {
949                 if (pvt->channel[pvt->inject.channel].dimms > 2)
950                         mask |= (pvt->inject.rank & 0x1LL) << 34;
951                 else
952                         mask |= (pvt->inject.rank & 0x3LL) << 34;
953         }
954
955         /* Sets pvt->inject.bank mask */
956         if (pvt->inject.bank < 0)
957                 mask |= 1LL << 39;
958         else
959                 mask |= (pvt->inject.bank & 0x15LL) << 30;
960
961         /* Sets pvt->inject.page mask */
962         if (pvt->inject.page < 0)
963                 mask |= 1LL << 38;
964         else
965                 mask |= (pvt->inject.page & 0xffff) << 14;
966
967         /* Sets pvt->inject.column mask */
968         if (pvt->inject.col < 0)
969                 mask |= 1LL << 37;
970         else
971                 mask |= (pvt->inject.col & 0x3fff);
972
973         /*
974          * bit    0: REPEAT_EN
975          * bits 1-2: MASK_HALF_CACHELINE
976          * bit    3: INJECT_ECC
977          * bit    4: INJECT_ADDR_PARITY
978          */
979
980         injectmask = (pvt->inject.type & 1) |
981                      (pvt->inject.section & 0x3) << 1 |
982                      (pvt->inject.type & 0x6) << (3 - 1);
983
984         /* Unlock writes to registers - this register is write only */
985         pci_write_config_dword(pvt->pci_noncore,
986                                MC_CFG_CONTROL, 0x2);
987
988         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
989                                MC_CHANNEL_ADDR_MATCH, mask);
990         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
991                                MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
992
993         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
994                                MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
995
996         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
997                                MC_CHANNEL_ERROR_INJECT, injectmask);
998
999         /*
1000          * This is something undocumented, based on my tests
1001          * Without writing 8 to this register, errors aren't injected. Not sure
1002          * why.
1003          */
1004         pci_write_config_dword(pvt->pci_noncore,
1005                                MC_CFG_CONTROL, 8);
1006
1007         edac_dbg(0, "Error inject addr match 0x%016llx, ecc 0x%08x, inject 0x%08x\n",
1008                  mask, pvt->inject.eccmask, injectmask);
1009
1010
1011         return count;
1012 }
1013
1014 static ssize_t i7core_inject_enable_show(struct device *dev,
1015                                          struct device_attribute *mattr,
1016                                          char *data)
1017 {
1018         struct mem_ctl_info *mci = to_mci(dev);
1019         struct i7core_pvt *pvt = mci->pvt_info;
1020         u32 injectmask;
1021
1022         if (!pvt->pci_ch[pvt->inject.channel][0])
1023                 return 0;
1024
1025         pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
1026                                MC_CHANNEL_ERROR_INJECT, &injectmask);
1027
1028         edac_dbg(0, "Inject error read: 0x%018x\n", injectmask);
1029
1030         if (injectmask & 0x0c)
1031                 pvt->inject.enable = 1;
1032
1033         return sprintf(data, "%d\n", pvt->inject.enable);
1034 }
1035
1036 #define DECLARE_COUNTER(param)                                  \
1037 static ssize_t i7core_show_counter_##param(                     \
1038         struct device *dev,                                     \
1039         struct device_attribute *mattr,                         \
1040         char *data)                                             \
1041 {                                                               \
1042         struct mem_ctl_info *mci = dev_get_drvdata(dev);        \
1043         struct i7core_pvt *pvt = mci->pvt_info;                 \
1044                                                                 \
1045         edac_dbg(1, "\n");                                      \
1046         if (!pvt->ce_count_available || (pvt->is_registered))   \
1047                 return sprintf(data, "data unavailable\n");     \
1048         return sprintf(data, "%lu\n",                           \
1049                         pvt->udimm_ce_count[param]);            \
1050 }
1051
1052 #define ATTR_COUNTER(param)                                     \
1053         static DEVICE_ATTR(udimm##param, S_IRUGO | S_IWUSR,     \
1054                     i7core_show_counter_##param,                \
1055                     NULL)
1056
1057 DECLARE_COUNTER(0);
1058 DECLARE_COUNTER(1);
1059 DECLARE_COUNTER(2);
1060
1061 ATTR_COUNTER(0);
1062 ATTR_COUNTER(1);
1063 ATTR_COUNTER(2);
1064
1065 /*
1066  * inject_addrmatch device sysfs struct
1067  */
1068
1069 static struct attribute *i7core_addrmatch_attrs[] = {
1070         &dev_attr_channel.attr,
1071         &dev_attr_dimm.attr,
1072         &dev_attr_rank.attr,
1073         &dev_attr_bank.attr,
1074         &dev_attr_page.attr,
1075         &dev_attr_col.attr,
1076         NULL
1077 };
1078
1079 static const struct attribute_group addrmatch_grp = {
1080         .attrs  = i7core_addrmatch_attrs,
1081 };
1082
1083 static const struct attribute_group *addrmatch_groups[] = {
1084         &addrmatch_grp,
1085         NULL
1086 };
1087
1088 static void addrmatch_release(struct device *device)
1089 {
1090         edac_dbg(1, "Releasing device %s\n", dev_name(device));
1091         kfree(device);
1092 }
1093
1094 static const struct device_type addrmatch_type = {
1095         .groups         = addrmatch_groups,
1096         .release        = addrmatch_release,
1097 };
1098
1099 /*
1100  * all_channel_counts sysfs struct
1101  */
1102
1103 static struct attribute *i7core_udimm_counters_attrs[] = {
1104         &dev_attr_udimm0.attr,
1105         &dev_attr_udimm1.attr,
1106         &dev_attr_udimm2.attr,
1107         NULL
1108 };
1109
1110 static const struct attribute_group all_channel_counts_grp = {
1111         .attrs  = i7core_udimm_counters_attrs,
1112 };
1113
1114 static const struct attribute_group *all_channel_counts_groups[] = {
1115         &all_channel_counts_grp,
1116         NULL
1117 };
1118
1119 static void all_channel_counts_release(struct device *device)
1120 {
1121         edac_dbg(1, "Releasing device %s\n", dev_name(device));
1122         kfree(device);
1123 }
1124
1125 static const struct device_type all_channel_counts_type = {
1126         .groups         = all_channel_counts_groups,
1127         .release        = all_channel_counts_release,
1128 };
1129
1130 /*
1131  * inject sysfs attributes
1132  */
1133
1134 static DEVICE_ATTR(inject_section, S_IRUGO | S_IWUSR,
1135                    i7core_inject_section_show, i7core_inject_section_store);
1136
1137 static DEVICE_ATTR(inject_type, S_IRUGO | S_IWUSR,
1138                    i7core_inject_type_show, i7core_inject_type_store);
1139
1140
1141 static DEVICE_ATTR(inject_eccmask, S_IRUGO | S_IWUSR,
1142                    i7core_inject_eccmask_show, i7core_inject_eccmask_store);
1143
1144 static DEVICE_ATTR(inject_enable, S_IRUGO | S_IWUSR,
1145                    i7core_inject_enable_show, i7core_inject_enable_store);
1146
1147 static struct attribute *i7core_dev_attrs[] = {
1148         &dev_attr_inject_section.attr,
1149         &dev_attr_inject_type.attr,
1150         &dev_attr_inject_eccmask.attr,
1151         &dev_attr_inject_enable.attr,
1152         NULL
1153 };
1154
1155 ATTRIBUTE_GROUPS(i7core_dev);
1156
1157 static int i7core_create_sysfs_devices(struct mem_ctl_info *mci)
1158 {
1159         struct i7core_pvt *pvt = mci->pvt_info;
1160         int rc;
1161
1162         pvt->addrmatch_dev = kzalloc(sizeof(*pvt->addrmatch_dev), GFP_KERNEL);
1163         if (!pvt->addrmatch_dev)
1164                 return -ENOMEM;
1165
1166         pvt->addrmatch_dev->type = &addrmatch_type;
1167         pvt->addrmatch_dev->bus = mci->dev.bus;
1168         device_initialize(pvt->addrmatch_dev);
1169         pvt->addrmatch_dev->parent = &mci->dev;
1170         dev_set_name(pvt->addrmatch_dev, "inject_addrmatch");
1171         dev_set_drvdata(pvt->addrmatch_dev, mci);
1172
1173         edac_dbg(1, "creating %s\n", dev_name(pvt->addrmatch_dev));
1174
1175         rc = device_add(pvt->addrmatch_dev);
1176         if (rc < 0)
1177                 goto err_put_addrmatch;
1178
1179         if (!pvt->is_registered) {
1180                 pvt->chancounts_dev = kzalloc(sizeof(*pvt->chancounts_dev),
1181                                               GFP_KERNEL);
1182                 if (!pvt->chancounts_dev) {
1183                         rc = -ENOMEM;
1184                         goto err_del_addrmatch;
1185                 }
1186
1187                 pvt->chancounts_dev->type = &all_channel_counts_type;
1188                 pvt->chancounts_dev->bus = mci->dev.bus;
1189                 device_initialize(pvt->chancounts_dev);
1190                 pvt->chancounts_dev->parent = &mci->dev;
1191                 dev_set_name(pvt->chancounts_dev, "all_channel_counts");
1192                 dev_set_drvdata(pvt->chancounts_dev, mci);
1193
1194                 edac_dbg(1, "creating %s\n", dev_name(pvt->chancounts_dev));
1195
1196                 rc = device_add(pvt->chancounts_dev);
1197                 if (rc < 0)
1198                         goto err_put_chancounts;
1199         }
1200         return 0;
1201
1202 err_put_chancounts:
1203         put_device(pvt->chancounts_dev);
1204 err_del_addrmatch:
1205         device_del(pvt->addrmatch_dev);
1206 err_put_addrmatch:
1207         put_device(pvt->addrmatch_dev);
1208
1209         return rc;
1210 }
1211
1212 static void i7core_delete_sysfs_devices(struct mem_ctl_info *mci)
1213 {
1214         struct i7core_pvt *pvt = mci->pvt_info;
1215
1216         edac_dbg(1, "\n");
1217
1218         if (!pvt->is_registered) {
1219                 device_del(pvt->chancounts_dev);
1220                 put_device(pvt->chancounts_dev);
1221         }
1222         device_del(pvt->addrmatch_dev);
1223         put_device(pvt->addrmatch_dev);
1224 }
1225
1226 /****************************************************************************
1227         Device initialization routines: put/get, init/exit
1228  ****************************************************************************/
1229
1230 /*
1231  *      i7core_put_all_devices  'put' all the devices that we have
1232  *                              reserved via 'get'
1233  */
1234 static void i7core_put_devices(struct i7core_dev *i7core_dev)
1235 {
1236         int i;
1237
1238         edac_dbg(0, "\n");
1239         for (i = 0; i < i7core_dev->n_devs; i++) {
1240                 struct pci_dev *pdev = i7core_dev->pdev[i];
1241                 if (!pdev)
1242                         continue;
1243                 edac_dbg(0, "Removing dev %02x:%02x.%d\n",
1244                          pdev->bus->number,
1245                          PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1246                 pci_dev_put(pdev);
1247         }
1248 }
1249
1250 static void i7core_put_all_devices(void)
1251 {
1252         struct i7core_dev *i7core_dev, *tmp;
1253
1254         list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
1255                 i7core_put_devices(i7core_dev);
1256                 free_i7core_dev(i7core_dev);
1257         }
1258 }
1259
1260 static void __init i7core_xeon_pci_fixup(const struct pci_id_table *table)
1261 {
1262         struct pci_dev *pdev = NULL;
1263         int i;
1264
1265         /*
1266          * On Xeon 55xx, the Intel Quick Path Arch Generic Non-core pci buses
1267          * aren't announced by acpi. So, we need to use a legacy scan probing
1268          * to detect them
1269          */
1270         while (table && table->descr) {
1271                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, table->descr[0].dev_id, NULL);
1272                 if (unlikely(!pdev)) {
1273                         for (i = 0; i < MAX_SOCKET_BUSES; i++)
1274                                 pcibios_scan_specific_bus(255-i);
1275                 }
1276                 pci_dev_put(pdev);
1277                 table++;
1278         }
1279 }
1280
1281 static unsigned i7core_pci_lastbus(void)
1282 {
1283         int last_bus = 0, bus;
1284         struct pci_bus *b = NULL;
1285
1286         while ((b = pci_find_next_bus(b)) != NULL) {
1287                 bus = b->number;
1288                 edac_dbg(0, "Found bus %d\n", bus);
1289                 if (bus > last_bus)
1290                         last_bus = bus;
1291         }
1292
1293         edac_dbg(0, "Last bus %d\n", last_bus);
1294
1295         return last_bus;
1296 }
1297
1298 /*
1299  *      i7core_get_all_devices  Find and perform 'get' operation on the MCH's
1300  *                      device/functions we want to reference for this driver
1301  *
1302  *                      Need to 'get' device 16 func 1 and func 2
1303  */
1304 static int i7core_get_onedevice(struct pci_dev **prev,
1305                                 const struct pci_id_table *table,
1306                                 const unsigned devno,
1307                                 const unsigned last_bus)
1308 {
1309         struct i7core_dev *i7core_dev;
1310         const struct pci_id_descr *dev_descr = &table->descr[devno];
1311
1312         struct pci_dev *pdev = NULL;
1313         u8 bus = 0;
1314         u8 socket = 0;
1315
1316         pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1317                               dev_descr->dev_id, *prev);
1318
1319         /*
1320          * On Xeon 55xx, the Intel QuickPath Arch Generic Non-core regs
1321          * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1322          * to probe for the alternate address in case of failure
1323          */
1324         if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev) {
1325                 pci_dev_get(*prev);     /* pci_get_device will put it */
1326                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1327                                       PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
1328         }
1329
1330         if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE &&
1331             !pdev) {
1332                 pci_dev_get(*prev);     /* pci_get_device will put it */
1333                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1334                                       PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
1335                                       *prev);
1336         }
1337
1338         if (!pdev) {
1339                 if (*prev) {
1340                         *prev = pdev;
1341                         return 0;
1342                 }
1343
1344                 if (dev_descr->optional)
1345                         return 0;
1346
1347                 if (devno == 0)
1348                         return -ENODEV;
1349
1350                 i7core_printk(KERN_INFO,
1351                         "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1352                         dev_descr->dev, dev_descr->func,
1353                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1354
1355                 /* End of list, leave */
1356                 return -ENODEV;
1357         }
1358         bus = pdev->bus->number;
1359
1360         socket = last_bus - bus;
1361
1362         i7core_dev = get_i7core_dev(socket);
1363         if (!i7core_dev) {
1364                 i7core_dev = alloc_i7core_dev(socket, table);
1365                 if (!i7core_dev) {
1366                         pci_dev_put(pdev);
1367                         return -ENOMEM;
1368                 }
1369         }
1370
1371         if (i7core_dev->pdev[devno]) {
1372                 i7core_printk(KERN_ERR,
1373                         "Duplicated device for "
1374                         "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1375                         bus, dev_descr->dev, dev_descr->func,
1376                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1377                 pci_dev_put(pdev);
1378                 return -ENODEV;
1379         }
1380
1381         i7core_dev->pdev[devno] = pdev;
1382
1383         /* Sanity check */
1384         if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1385                         PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1386                 i7core_printk(KERN_ERR,
1387                         "Device PCI ID %04x:%04x "
1388                         "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1389                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1390                         bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1391                         bus, dev_descr->dev, dev_descr->func);
1392                 return -ENODEV;
1393         }
1394
1395         /* Be sure that the device is enabled */
1396         if (unlikely(pci_enable_device(pdev) < 0)) {
1397                 i7core_printk(KERN_ERR,
1398                         "Couldn't enable "
1399                         "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1400                         bus, dev_descr->dev, dev_descr->func,
1401                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1402                 return -ENODEV;
1403         }
1404
1405         edac_dbg(0, "Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1406                  socket, bus, dev_descr->dev,
1407                  dev_descr->func,
1408                  PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1409
1410         /*
1411          * As stated on drivers/pci/search.c, the reference count for
1412          * @from is always decremented if it is not %NULL. So, as we need
1413          * to get all devices up to null, we need to do a get for the device
1414          */
1415         pci_dev_get(pdev);
1416
1417         *prev = pdev;
1418
1419         return 0;
1420 }
1421
1422 static int i7core_get_all_devices(void)
1423 {
1424         int i, rc, last_bus;
1425         struct pci_dev *pdev = NULL;
1426         const struct pci_id_table *table = pci_dev_table;
1427
1428         last_bus = i7core_pci_lastbus();
1429
1430         while (table && table->descr) {
1431                 for (i = 0; i < table->n_devs; i++) {
1432                         pdev = NULL;
1433                         do {
1434                                 rc = i7core_get_onedevice(&pdev, table, i,
1435                                                           last_bus);
1436                                 if (rc < 0) {
1437                                         if (i == 0) {
1438                                                 i = table->n_devs;
1439                                                 break;
1440                                         }
1441                                         i7core_put_all_devices();
1442                                         return -ENODEV;
1443                                 }
1444                         } while (pdev);
1445                 }
1446                 table++;
1447         }
1448
1449         return 0;
1450 }
1451
1452 static int mci_bind_devs(struct mem_ctl_info *mci,
1453                          struct i7core_dev *i7core_dev)
1454 {
1455         struct i7core_pvt *pvt = mci->pvt_info;
1456         struct pci_dev *pdev;
1457         int i, func, slot;
1458         char *family;
1459
1460         pvt->is_registered = false;
1461         pvt->enable_scrub  = false;
1462         for (i = 0; i < i7core_dev->n_devs; i++) {
1463                 pdev = i7core_dev->pdev[i];
1464                 if (!pdev)
1465                         continue;
1466
1467                 func = PCI_FUNC(pdev->devfn);
1468                 slot = PCI_SLOT(pdev->devfn);
1469                 if (slot == 3) {
1470                         if (unlikely(func > MAX_MCR_FUNC))
1471                                 goto error;
1472                         pvt->pci_mcr[func] = pdev;
1473                 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1474                         if (unlikely(func > MAX_CHAN_FUNC))
1475                                 goto error;
1476                         pvt->pci_ch[slot - 4][func] = pdev;
1477                 } else if (!slot && !func) {
1478                         pvt->pci_noncore = pdev;
1479
1480                         /* Detect the processor family */
1481                         switch (pdev->device) {
1482                         case PCI_DEVICE_ID_INTEL_I7_NONCORE:
1483                                 family = "Xeon 35xx/ i7core";
1484                                 pvt->enable_scrub = false;
1485                                 break;
1486                         case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT:
1487                                 family = "i7-800/i5-700";
1488                                 pvt->enable_scrub = false;
1489                                 break;
1490                         case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE:
1491                                 family = "Xeon 34xx";
1492                                 pvt->enable_scrub = false;
1493                                 break;
1494                         case PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT:
1495                                 family = "Xeon 55xx";
1496                                 pvt->enable_scrub = true;
1497                                 break;
1498                         case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2:
1499                                 family = "Xeon 56xx / i7-900";
1500                                 pvt->enable_scrub = true;
1501                                 break;
1502                         default:
1503                                 family = "unknown";
1504                                 pvt->enable_scrub = false;
1505                         }
1506                         edac_dbg(0, "Detected a processor type %s\n", family);
1507                 } else
1508                         goto error;
1509
1510                 edac_dbg(0, "Associated fn %d.%d, dev = %p, socket %d\n",
1511                          PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1512                          pdev, i7core_dev->socket);
1513
1514                 if (PCI_SLOT(pdev->devfn) == 3 &&
1515                         PCI_FUNC(pdev->devfn) == 2)
1516                         pvt->is_registered = true;
1517         }
1518
1519         return 0;
1520
1521 error:
1522         i7core_printk(KERN_ERR, "Device %d, function %d "
1523                       "is out of the expected range\n",
1524                       slot, func);
1525         return -EINVAL;
1526 }
1527
1528 /****************************************************************************
1529                         Error check routines
1530  ****************************************************************************/
1531
1532 static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1533                                          const int chan,
1534                                          const int new0,
1535                                          const int new1,
1536                                          const int new2)
1537 {
1538         struct i7core_pvt *pvt = mci->pvt_info;
1539         int add0 = 0, add1 = 0, add2 = 0;
1540         /* Updates CE counters if it is not the first time here */
1541         if (pvt->ce_count_available) {
1542                 /* Updates CE counters */
1543
1544                 add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1545                 add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1546                 add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
1547
1548                 if (add2 < 0)
1549                         add2 += 0x7fff;
1550                 pvt->rdimm_ce_count[chan][2] += add2;
1551
1552                 if (add1 < 0)
1553                         add1 += 0x7fff;
1554                 pvt->rdimm_ce_count[chan][1] += add1;
1555
1556                 if (add0 < 0)
1557                         add0 += 0x7fff;
1558                 pvt->rdimm_ce_count[chan][0] += add0;
1559         } else
1560                 pvt->ce_count_available = 1;
1561
1562         /* Store the new values */
1563         pvt->rdimm_last_ce_count[chan][2] = new2;
1564         pvt->rdimm_last_ce_count[chan][1] = new1;
1565         pvt->rdimm_last_ce_count[chan][0] = new0;
1566
1567         /*updated the edac core */
1568         if (add0 != 0)
1569                 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add0,
1570                                      0, 0, 0,
1571                                      chan, 0, -1, "error", "");
1572         if (add1 != 0)
1573                 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add1,
1574                                      0, 0, 0,
1575                                      chan, 1, -1, "error", "");
1576         if (add2 != 0)
1577                 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add2,
1578                                      0, 0, 0,
1579                                      chan, 2, -1, "error", "");
1580 }
1581
1582 static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1583 {
1584         struct i7core_pvt *pvt = mci->pvt_info;
1585         u32 rcv[3][2];
1586         int i, new0, new1, new2;
1587
1588         /*Read DEV 3: FUN 2:  MC_COR_ECC_CNT regs directly*/
1589         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
1590                                                                 &rcv[0][0]);
1591         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
1592                                                                 &rcv[0][1]);
1593         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
1594                                                                 &rcv[1][0]);
1595         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
1596                                                                 &rcv[1][1]);
1597         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
1598                                                                 &rcv[2][0]);
1599         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
1600                                                                 &rcv[2][1]);
1601         for (i = 0 ; i < 3; i++) {
1602                 edac_dbg(3, "MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1603                          (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1604                 /*if the channel has 3 dimms*/
1605                 if (pvt->channel[i].dimms > 2) {
1606                         new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1607                         new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1608                         new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1609                 } else {
1610                         new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1611                                         DIMM_BOT_COR_ERR(rcv[i][0]);
1612                         new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1613                                         DIMM_BOT_COR_ERR(rcv[i][1]);
1614                         new2 = 0;
1615                 }
1616
1617                 i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
1618         }
1619 }
1620
1621 /* This function is based on the device 3 function 4 registers as described on:
1622  * Intel Xeon Processor 5500 Series Datasheet Volume 2
1623  *      http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1624  * also available at:
1625  *      http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1626  */
1627 static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1628 {
1629         struct i7core_pvt *pvt = mci->pvt_info;
1630         u32 rcv1, rcv0;
1631         int new0, new1, new2;
1632
1633         if (!pvt->pci_mcr[4]) {
1634                 edac_dbg(0, "MCR registers not found\n");
1635                 return;
1636         }
1637
1638         /* Corrected test errors */
1639         pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1640         pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
1641
1642         /* Store the new values */
1643         new2 = DIMM2_COR_ERR(rcv1);
1644         new1 = DIMM1_COR_ERR(rcv0);
1645         new0 = DIMM0_COR_ERR(rcv0);
1646
1647         /* Updates CE counters if it is not the first time here */
1648         if (pvt->ce_count_available) {
1649                 /* Updates CE counters */
1650                 int add0, add1, add2;
1651
1652                 add2 = new2 - pvt->udimm_last_ce_count[2];
1653                 add1 = new1 - pvt->udimm_last_ce_count[1];
1654                 add0 = new0 - pvt->udimm_last_ce_count[0];
1655
1656                 if (add2 < 0)
1657                         add2 += 0x7fff;
1658                 pvt->udimm_ce_count[2] += add2;
1659
1660                 if (add1 < 0)
1661                         add1 += 0x7fff;
1662                 pvt->udimm_ce_count[1] += add1;
1663
1664                 if (add0 < 0)
1665                         add0 += 0x7fff;
1666                 pvt->udimm_ce_count[0] += add0;
1667
1668                 if (add0 | add1 | add2)
1669                         i7core_printk(KERN_ERR, "New Corrected error(s): "
1670                                       "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1671                                       add0, add1, add2);
1672         } else
1673                 pvt->ce_count_available = 1;
1674
1675         /* Store the new values */
1676         pvt->udimm_last_ce_count[2] = new2;
1677         pvt->udimm_last_ce_count[1] = new1;
1678         pvt->udimm_last_ce_count[0] = new0;
1679 }
1680
1681 /*
1682  * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1683  * Architectures Software Developer’s Manual Volume 3B.
1684  * Nehalem are defined as family 0x06, model 0x1a
1685  *
1686  * The MCA registers used here are the following ones:
1687  *     struct mce field MCA Register
1688  *     m->status        MSR_IA32_MC8_STATUS
1689  *     m->addr          MSR_IA32_MC8_ADDR
1690  *     m->misc          MSR_IA32_MC8_MISC
1691  * In the case of Nehalem, the error information is masked at .status and .misc
1692  * fields
1693  */
1694 static void i7core_mce_output_error(struct mem_ctl_info *mci,
1695                                     const struct mce *m)
1696 {
1697         struct i7core_pvt *pvt = mci->pvt_info;
1698         char *optype, *err;
1699         enum hw_event_mc_err_type tp_event;
1700         unsigned long error = m->status & 0x1ff0000l;
1701         bool uncorrected_error = m->mcgstatus & 1ll << 61;
1702         bool ripv = m->mcgstatus & 1;
1703         u32 optypenum = (m->status >> 4) & 0x07;
1704         u32 core_err_cnt = (m->status >> 38) & 0x7fff;
1705         u32 dimm = (m->misc >> 16) & 0x3;
1706         u32 channel = (m->misc >> 18) & 0x3;
1707         u32 syndrome = m->misc >> 32;
1708         u32 errnum = find_first_bit(&error, 32);
1709
1710         if (uncorrected_error) {
1711                 core_err_cnt = 1;
1712                 if (ripv)
1713                         tp_event = HW_EVENT_ERR_UNCORRECTED;
1714                 else
1715                         tp_event = HW_EVENT_ERR_FATAL;
1716         } else {
1717                 tp_event = HW_EVENT_ERR_CORRECTED;
1718         }
1719
1720         switch (optypenum) {
1721         case 0:
1722                 optype = "generic undef request";
1723                 break;
1724         case 1:
1725                 optype = "read error";
1726                 break;
1727         case 2:
1728                 optype = "write error";
1729                 break;
1730         case 3:
1731                 optype = "addr/cmd error";
1732                 break;
1733         case 4:
1734                 optype = "scrubbing error";
1735                 break;
1736         default:
1737                 optype = "reserved";
1738                 break;
1739         }
1740
1741         switch (errnum) {
1742         case 16:
1743                 err = "read ECC error";
1744                 break;
1745         case 17:
1746                 err = "RAS ECC error";
1747                 break;
1748         case 18:
1749                 err = "write parity error";
1750                 break;
1751         case 19:
1752                 err = "redundancy loss";
1753                 break;
1754         case 20:
1755                 err = "reserved";
1756                 break;
1757         case 21:
1758                 err = "memory range error";
1759                 break;
1760         case 22:
1761                 err = "RTID out of range";
1762                 break;
1763         case 23:
1764                 err = "address parity error";
1765                 break;
1766         case 24:
1767                 err = "byte enable parity error";
1768                 break;
1769         default:
1770                 err = "unknown";
1771         }
1772
1773         /*
1774          * Call the helper to output message
1775          * FIXME: what to do if core_err_cnt > 1? Currently, it generates
1776          * only one event
1777          */
1778         if (uncorrected_error || !pvt->is_registered)
1779                 edac_mc_handle_error(tp_event, mci, core_err_cnt,
1780                                      m->addr >> PAGE_SHIFT,
1781                                      m->addr & ~PAGE_MASK,
1782                                      syndrome,
1783                                      channel, dimm, -1,
1784                                      err, optype);
1785 }
1786
1787 /*
1788  *      i7core_check_error      Retrieve and process errors reported by the
1789  *                              hardware. Called by the Core module.
1790  */
1791 static void i7core_check_error(struct mem_ctl_info *mci, struct mce *m)
1792 {
1793         struct i7core_pvt *pvt = mci->pvt_info;
1794
1795         i7core_mce_output_error(mci, m);
1796
1797         /*
1798          * Now, let's increment CE error counts
1799          */
1800         if (!pvt->is_registered)
1801                 i7core_udimm_check_mc_ecc_err(mci);
1802         else
1803                 i7core_rdimm_check_mc_ecc_err(mci);
1804 }
1805
1806 /*
1807  * Check that logging is enabled and that this is the right type
1808  * of error for us to handle.
1809  */
1810 static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
1811                                   void *data)
1812 {
1813         struct mce *mce = (struct mce *)data;
1814         struct i7core_dev *i7_dev;
1815         struct mem_ctl_info *mci;
1816
1817         i7_dev = get_i7core_dev(mce->socketid);
1818         if (!i7_dev || (mce->kflags & MCE_HANDLED_CEC))
1819                 return NOTIFY_DONE;
1820
1821         mci = i7_dev->mci;
1822
1823         /*
1824          * Just let mcelog handle it if the error is
1825          * outside the memory controller
1826          */
1827         if (((mce->status & 0xffff) >> 7) != 1)
1828                 return NOTIFY_DONE;
1829
1830         /* Bank 8 registers are the only ones that we know how to handle */
1831         if (mce->bank != 8)
1832                 return NOTIFY_DONE;
1833
1834         i7core_check_error(mci, mce);
1835
1836         /* Advise mcelog that the errors were handled */
1837         mce->kflags |= MCE_HANDLED_EDAC;
1838         return NOTIFY_OK;
1839 }
1840
1841 static struct notifier_block i7_mce_dec = {
1842         .notifier_call  = i7core_mce_check_error,
1843         .priority       = MCE_PRIO_EDAC,
1844 };
1845
1846 struct memdev_dmi_entry {
1847         u8 type;
1848         u8 length;
1849         u16 handle;
1850         u16 phys_mem_array_handle;
1851         u16 mem_err_info_handle;
1852         u16 total_width;
1853         u16 data_width;
1854         u16 size;
1855         u8 form;
1856         u8 device_set;
1857         u8 device_locator;
1858         u8 bank_locator;
1859         u8 memory_type;
1860         u16 type_detail;
1861         u16 speed;
1862         u8 manufacturer;
1863         u8 serial_number;
1864         u8 asset_tag;
1865         u8 part_number;
1866         u8 attributes;
1867         u32 extended_size;
1868         u16 conf_mem_clk_speed;
1869 } __attribute__((__packed__));
1870
1871
1872 /*
1873  * Decode the DRAM Clock Frequency, be paranoid, make sure that all
1874  * memory devices show the same speed, and if they don't then consider
1875  * all speeds to be invalid.
1876  */
1877 static void decode_dclk(const struct dmi_header *dh, void *_dclk_freq)
1878 {
1879         int *dclk_freq = _dclk_freq;
1880         u16 dmi_mem_clk_speed;
1881
1882         if (*dclk_freq == -1)
1883                 return;
1884
1885         if (dh->type == DMI_ENTRY_MEM_DEVICE) {
1886                 struct memdev_dmi_entry *memdev_dmi_entry =
1887                         (struct memdev_dmi_entry *)dh;
1888                 unsigned long conf_mem_clk_speed_offset =
1889                         (unsigned long)&memdev_dmi_entry->conf_mem_clk_speed -
1890                         (unsigned long)&memdev_dmi_entry->type;
1891                 unsigned long speed_offset =
1892                         (unsigned long)&memdev_dmi_entry->speed -
1893                         (unsigned long)&memdev_dmi_entry->type;
1894
1895                 /* Check that a DIMM is present */
1896                 if (memdev_dmi_entry->size == 0)
1897                         return;
1898
1899                 /*
1900                  * Pick the configured speed if it's available, otherwise
1901                  * pick the DIMM speed, or we don't have a speed.
1902                  */
1903                 if (memdev_dmi_entry->length > conf_mem_clk_speed_offset) {
1904                         dmi_mem_clk_speed =
1905                                 memdev_dmi_entry->conf_mem_clk_speed;
1906                 } else if (memdev_dmi_entry->length > speed_offset) {
1907                         dmi_mem_clk_speed = memdev_dmi_entry->speed;
1908                 } else {
1909                         *dclk_freq = -1;
1910                         return;
1911                 }
1912
1913                 if (*dclk_freq == 0) {
1914                         /* First pass, speed was 0 */
1915                         if (dmi_mem_clk_speed > 0) {
1916                                 /* Set speed if a valid speed is read */
1917                                 *dclk_freq = dmi_mem_clk_speed;
1918                         } else {
1919                                 /* Otherwise we don't have a valid speed */
1920                                 *dclk_freq = -1;
1921                         }
1922                 } else if (*dclk_freq > 0 &&
1923                            *dclk_freq != dmi_mem_clk_speed) {
1924                         /*
1925                          * If we have a speed, check that all DIMMS are the same
1926                          * speed, otherwise set the speed as invalid.
1927                          */
1928                         *dclk_freq = -1;
1929                 }
1930         }
1931 }
1932
1933 /*
1934  * The default DCLK frequency is used as a fallback if we
1935  * fail to find anything reliable in the DMI. The value
1936  * is taken straight from the datasheet.
1937  */
1938 #define DEFAULT_DCLK_FREQ 800
1939
1940 static int get_dclk_freq(void)
1941 {
1942         int dclk_freq = 0;
1943
1944         dmi_walk(decode_dclk, (void *)&dclk_freq);
1945
1946         if (dclk_freq < 1)
1947                 return DEFAULT_DCLK_FREQ;
1948
1949         return dclk_freq;
1950 }
1951
1952 /*
1953  * set_sdram_scrub_rate         This routine sets byte/sec bandwidth scrub rate
1954  *                              to hardware according to SCRUBINTERVAL formula
1955  *                              found in datasheet.
1956  */
1957 static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 new_bw)
1958 {
1959         struct i7core_pvt *pvt = mci->pvt_info;
1960         struct pci_dev *pdev;
1961         u32 dw_scrub;
1962         u32 dw_ssr;
1963
1964         /* Get data from the MC register, function 2 */
1965         pdev = pvt->pci_mcr[2];
1966         if (!pdev)
1967                 return -ENODEV;
1968
1969         pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &dw_scrub);
1970
1971         if (new_bw == 0) {
1972                 /* Prepare to disable petrol scrub */
1973                 dw_scrub &= ~STARTSCRUB;
1974                 /* Stop the patrol scrub engine */
1975                 write_and_test(pdev, MC_SCRUB_CONTROL,
1976                                dw_scrub & ~SCRUBINTERVAL_MASK);
1977
1978                 /* Get current status of scrub rate and set bit to disable */
1979                 pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
1980                 dw_ssr &= ~SSR_MODE_MASK;
1981                 dw_ssr |= SSR_MODE_DISABLE;
1982         } else {
1983                 const int cache_line_size = 64;
1984                 const u32 freq_dclk_mhz = pvt->dclk_freq;
1985                 unsigned long long scrub_interval;
1986                 /*
1987                  * Translate the desired scrub rate to a register value and
1988                  * program the corresponding register value.
1989                  */
1990                 scrub_interval = (unsigned long long)freq_dclk_mhz *
1991                         cache_line_size * 1000000;
1992                 do_div(scrub_interval, new_bw);
1993
1994                 if (!scrub_interval || scrub_interval > SCRUBINTERVAL_MASK)
1995                         return -EINVAL;
1996
1997                 dw_scrub = SCRUBINTERVAL_MASK & scrub_interval;
1998
1999                 /* Start the patrol scrub engine */
2000                 pci_write_config_dword(pdev, MC_SCRUB_CONTROL,
2001                                        STARTSCRUB | dw_scrub);
2002
2003                 /* Get current status of scrub rate and set bit to enable */
2004                 pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
2005                 dw_ssr &= ~SSR_MODE_MASK;
2006                 dw_ssr |= SSR_MODE_ENABLE;
2007         }
2008         /* Disable or enable scrubbing */
2009         pci_write_config_dword(pdev, MC_SSRCONTROL, dw_ssr);
2010
2011         return new_bw;
2012 }
2013
2014 /*
2015  * get_sdram_scrub_rate         This routine convert current scrub rate value
2016  *                              into byte/sec bandwidth according to
2017  *                              SCRUBINTERVAL formula found in datasheet.
2018  */
2019 static int get_sdram_scrub_rate(struct mem_ctl_info *mci)
2020 {
2021         struct i7core_pvt *pvt = mci->pvt_info;
2022         struct pci_dev *pdev;
2023         const u32 cache_line_size = 64;
2024         const u32 freq_dclk_mhz = pvt->dclk_freq;
2025         unsigned long long scrub_rate;
2026         u32 scrubval;
2027
2028         /* Get data from the MC register, function 2 */
2029         pdev = pvt->pci_mcr[2];
2030         if (!pdev)
2031                 return -ENODEV;
2032
2033         /* Get current scrub control data */
2034         pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &scrubval);
2035
2036         /* Mask highest 8-bits to 0 */
2037         scrubval &=  SCRUBINTERVAL_MASK;
2038         if (!scrubval)
2039                 return 0;
2040
2041         /* Calculate scrub rate value into byte/sec bandwidth */
2042         scrub_rate =  (unsigned long long)freq_dclk_mhz *
2043                 1000000 * cache_line_size;
2044         do_div(scrub_rate, scrubval);
2045         return (int)scrub_rate;
2046 }
2047
2048 static void enable_sdram_scrub_setting(struct mem_ctl_info *mci)
2049 {
2050         struct i7core_pvt *pvt = mci->pvt_info;
2051         u32 pci_lock;
2052
2053         /* Unlock writes to pci registers */
2054         pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
2055         pci_lock &= ~0x3;
2056         pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
2057                                pci_lock | MC_CFG_UNLOCK);
2058
2059         mci->set_sdram_scrub_rate = set_sdram_scrub_rate;
2060         mci->get_sdram_scrub_rate = get_sdram_scrub_rate;
2061 }
2062
2063 static void disable_sdram_scrub_setting(struct mem_ctl_info *mci)
2064 {
2065         struct i7core_pvt *pvt = mci->pvt_info;
2066         u32 pci_lock;
2067
2068         /* Lock writes to pci registers */
2069         pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
2070         pci_lock &= ~0x3;
2071         pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
2072                                pci_lock | MC_CFG_LOCK);
2073 }
2074
2075 static void i7core_pci_ctl_create(struct i7core_pvt *pvt)
2076 {
2077         pvt->i7core_pci = edac_pci_create_generic_ctl(
2078                                                 &pvt->i7core_dev->pdev[0]->dev,
2079                                                 EDAC_MOD_STR);
2080         if (unlikely(!pvt->i7core_pci))
2081                 i7core_printk(KERN_WARNING,
2082                               "Unable to setup PCI error report via EDAC\n");
2083 }
2084
2085 static void i7core_pci_ctl_release(struct i7core_pvt *pvt)
2086 {
2087         if (likely(pvt->i7core_pci))
2088                 edac_pci_release_generic_ctl(pvt->i7core_pci);
2089         else
2090                 i7core_printk(KERN_ERR,
2091                                 "Couldn't find mem_ctl_info for socket %d\n",
2092                                 pvt->i7core_dev->socket);
2093         pvt->i7core_pci = NULL;
2094 }
2095
2096 static void i7core_unregister_mci(struct i7core_dev *i7core_dev)
2097 {
2098         struct mem_ctl_info *mci = i7core_dev->mci;
2099         struct i7core_pvt *pvt;
2100
2101         if (unlikely(!mci || !mci->pvt_info)) {
2102                 edac_dbg(0, "MC: dev = %p\n", &i7core_dev->pdev[0]->dev);
2103
2104                 i7core_printk(KERN_ERR, "Couldn't find mci handler\n");
2105                 return;
2106         }
2107
2108         pvt = mci->pvt_info;
2109
2110         edac_dbg(0, "MC: mci = %p, dev = %p\n", mci, &i7core_dev->pdev[0]->dev);
2111
2112         /* Disable scrubrate setting */
2113         if (pvt->enable_scrub)
2114                 disable_sdram_scrub_setting(mci);
2115
2116         /* Disable EDAC polling */
2117         i7core_pci_ctl_release(pvt);
2118
2119         /* Remove MC sysfs nodes */
2120         i7core_delete_sysfs_devices(mci);
2121         edac_mc_del_mc(mci->pdev);
2122
2123         edac_dbg(1, "%s: free mci struct\n", mci->ctl_name);
2124         kfree(mci->ctl_name);
2125         edac_mc_free(mci);
2126         i7core_dev->mci = NULL;
2127 }
2128
2129 static int i7core_register_mci(struct i7core_dev *i7core_dev)
2130 {
2131         struct mem_ctl_info *mci;
2132         struct i7core_pvt *pvt;
2133         int rc;
2134         struct edac_mc_layer layers[2];
2135
2136         /* allocate a new MC control structure */
2137
2138         layers[0].type = EDAC_MC_LAYER_CHANNEL;
2139         layers[0].size = NUM_CHANS;
2140         layers[0].is_virt_csrow = false;
2141         layers[1].type = EDAC_MC_LAYER_SLOT;
2142         layers[1].size = MAX_DIMMS;
2143         layers[1].is_virt_csrow = true;
2144         mci = edac_mc_alloc(i7core_dev->socket, ARRAY_SIZE(layers), layers,
2145                             sizeof(*pvt));
2146         if (unlikely(!mci))
2147                 return -ENOMEM;
2148
2149         edac_dbg(0, "MC: mci = %p, dev = %p\n", mci, &i7core_dev->pdev[0]->dev);
2150
2151         pvt = mci->pvt_info;
2152         memset(pvt, 0, sizeof(*pvt));
2153
2154         /* Associates i7core_dev and mci for future usage */
2155         pvt->i7core_dev = i7core_dev;
2156         i7core_dev->mci = mci;
2157
2158         /*
2159          * FIXME: how to handle RDDR3 at MCI level? It is possible to have
2160          * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
2161          * memory channels
2162          */
2163         mci->mtype_cap = MEM_FLAG_DDR3;
2164         mci->edac_ctl_cap = EDAC_FLAG_NONE;
2165         mci->edac_cap = EDAC_FLAG_NONE;
2166         mci->mod_name = "i7core_edac.c";
2167
2168         mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d", i7core_dev->socket);
2169         if (!mci->ctl_name) {
2170                 rc = -ENOMEM;
2171                 goto fail1;
2172         }
2173
2174         mci->dev_name = pci_name(i7core_dev->pdev[0]);
2175         mci->ctl_page_to_phys = NULL;
2176
2177         /* Store pci devices at mci for faster access */
2178         rc = mci_bind_devs(mci, i7core_dev);
2179         if (unlikely(rc < 0))
2180                 goto fail0;
2181
2182
2183         /* Get dimm basic config */
2184         get_dimm_config(mci);
2185         /* record ptr to the generic device */
2186         mci->pdev = &i7core_dev->pdev[0]->dev;
2187
2188         /* Enable scrubrate setting */
2189         if (pvt->enable_scrub)
2190                 enable_sdram_scrub_setting(mci);
2191
2192         /* add this new MC control structure to EDAC's list of MCs */
2193         if (unlikely(edac_mc_add_mc_with_groups(mci, i7core_dev_groups))) {
2194                 edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
2195                 /* FIXME: perhaps some code should go here that disables error
2196                  * reporting if we just enabled it
2197                  */
2198
2199                 rc = -EINVAL;
2200                 goto fail0;
2201         }
2202         if (i7core_create_sysfs_devices(mci)) {
2203                 edac_dbg(0, "MC: failed to create sysfs nodes\n");
2204                 edac_mc_del_mc(mci->pdev);
2205                 rc = -EINVAL;
2206                 goto fail0;
2207         }
2208
2209         /* Default error mask is any memory */
2210         pvt->inject.channel = 0;
2211         pvt->inject.dimm = -1;
2212         pvt->inject.rank = -1;
2213         pvt->inject.bank = -1;
2214         pvt->inject.page = -1;
2215         pvt->inject.col = -1;
2216
2217         /* allocating generic PCI control info */
2218         i7core_pci_ctl_create(pvt);
2219
2220         /* DCLK for scrub rate setting */
2221         pvt->dclk_freq = get_dclk_freq();
2222
2223         return 0;
2224
2225 fail0:
2226         kfree(mci->ctl_name);
2227
2228 fail1:
2229         edac_mc_free(mci);
2230         i7core_dev->mci = NULL;
2231         return rc;
2232 }
2233
2234 /*
2235  *      i7core_probe    Probe for ONE instance of device to see if it is
2236  *                      present.
2237  *      return:
2238  *              0 for FOUND a device
2239  *              < 0 for error code
2240  */
2241
2242 static int i7core_probe(struct pci_dev *pdev, const struct pci_device_id *id)
2243 {
2244         int rc, count = 0;
2245         struct i7core_dev *i7core_dev;
2246
2247         /* get the pci devices we want to reserve for our use */
2248         mutex_lock(&i7core_edac_lock);
2249
2250         /*
2251          * All memory controllers are allocated at the first pass.
2252          */
2253         if (unlikely(probed >= 1)) {
2254                 mutex_unlock(&i7core_edac_lock);
2255                 return -ENODEV;
2256         }
2257         probed++;
2258
2259         rc = i7core_get_all_devices();
2260         if (unlikely(rc < 0))
2261                 goto fail0;
2262
2263         list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
2264                 count++;
2265                 rc = i7core_register_mci(i7core_dev);
2266                 if (unlikely(rc < 0))
2267                         goto fail1;
2268         }
2269
2270         /*
2271          * Nehalem-EX uses a different memory controller. However, as the
2272          * memory controller is not visible on some Nehalem/Nehalem-EP, we
2273          * need to indirectly probe via a X58 PCI device. The same devices
2274          * are found on (some) Nehalem-EX. So, on those machines, the
2275          * probe routine needs to return -ENODEV, as the actual Memory
2276          * Controller registers won't be detected.
2277          */
2278         if (!count) {
2279                 rc = -ENODEV;
2280                 goto fail1;
2281         }
2282
2283         i7core_printk(KERN_INFO,
2284                       "Driver loaded, %d memory controller(s) found.\n",
2285                       count);
2286
2287         mutex_unlock(&i7core_edac_lock);
2288         return 0;
2289
2290 fail1:
2291         list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2292                 i7core_unregister_mci(i7core_dev);
2293
2294         i7core_put_all_devices();
2295 fail0:
2296         mutex_unlock(&i7core_edac_lock);
2297         return rc;
2298 }
2299
2300 /*
2301  *      i7core_remove   destructor for one instance of device
2302  *
2303  */
2304 static void i7core_remove(struct pci_dev *pdev)
2305 {
2306         struct i7core_dev *i7core_dev;
2307
2308         edac_dbg(0, "\n");
2309
2310         /*
2311          * we have a trouble here: pdev value for removal will be wrong, since
2312          * it will point to the X58 register used to detect that the machine
2313          * is a Nehalem or upper design. However, due to the way several PCI
2314          * devices are grouped together to provide MC functionality, we need
2315          * to use a different method for releasing the devices
2316          */
2317
2318         mutex_lock(&i7core_edac_lock);
2319
2320         if (unlikely(!probed)) {
2321                 mutex_unlock(&i7core_edac_lock);
2322                 return;
2323         }
2324
2325         list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2326                 i7core_unregister_mci(i7core_dev);
2327
2328         /* Release PCI resources */
2329         i7core_put_all_devices();
2330
2331         probed--;
2332
2333         mutex_unlock(&i7core_edac_lock);
2334 }
2335
2336 MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
2337
2338 /*
2339  *      i7core_driver   pci_driver structure for this module
2340  *
2341  */
2342 static struct pci_driver i7core_driver = {
2343         .name     = "i7core_edac",
2344         .probe    = i7core_probe,
2345         .remove   = i7core_remove,
2346         .id_table = i7core_pci_tbl,
2347 };
2348
2349 /*
2350  *      i7core_init             Module entry function
2351  *                      Try to initialize this module for its devices
2352  */
2353 static int __init i7core_init(void)
2354 {
2355         int pci_rc;
2356
2357         edac_dbg(2, "\n");
2358
2359         /* Ensure that the OPSTATE is set correctly for POLL or NMI */
2360         opstate_init();
2361
2362         if (use_pci_fixup)
2363                 i7core_xeon_pci_fixup(pci_dev_table);
2364
2365         pci_rc = pci_register_driver(&i7core_driver);
2366
2367         if (pci_rc >= 0) {
2368                 mce_register_decode_chain(&i7_mce_dec);
2369                 return 0;
2370         }
2371
2372         i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2373                       pci_rc);
2374
2375         return pci_rc;
2376 }
2377
2378 /*
2379  *      i7core_exit()   Module exit function
2380  *                      Unregister the driver
2381  */
2382 static void __exit i7core_exit(void)
2383 {
2384         edac_dbg(2, "\n");
2385         pci_unregister_driver(&i7core_driver);
2386         mce_unregister_decode_chain(&i7_mce_dec);
2387 }
2388
2389 module_init(i7core_init);
2390 module_exit(i7core_exit);
2391
2392 MODULE_LICENSE("GPL");
2393 MODULE_AUTHOR("Mauro Carvalho Chehab");
2394 MODULE_AUTHOR("Red Hat Inc. (https://www.redhat.com)");
2395 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2396                    I7CORE_REVISION);
2397
2398 module_param(edac_op_state, int, 0444);
2399 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");