edac: Cleanup the logs for i7core and sb edac drivers
[linux-2.6-microblaze.git] / drivers / edac / i7core_edac.c
1 /* Intel i7 core/Nehalem Memory Controller kernel module
2  *
3  * This driver supports the memory controllers found on the Intel
4  * processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx,
5  * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield
6  * and Westmere-EP.
7  *
8  * This file may be distributed under the terms of the
9  * GNU General Public License version 2 only.
10  *
11  * Copyright (c) 2009-2010 by:
12  *       Mauro Carvalho Chehab <mchehab@redhat.com>
13  *
14  * Red Hat Inc. http://www.redhat.com
15  *
16  * Forked and adapted from the i5400_edac driver
17  *
18  * Based on the following public Intel datasheets:
19  * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
20  * Datasheet, Volume 2:
21  *      http://download.intel.com/design/processor/datashts/320835.pdf
22  * Intel Xeon Processor 5500 Series Datasheet Volume 2
23  *      http://www.intel.com/Assets/PDF/datasheet/321322.pdf
24  * also available at:
25  *      http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
26  */
27
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/pci.h>
31 #include <linux/pci_ids.h>
32 #include <linux/slab.h>
33 #include <linux/delay.h>
34 #include <linux/dmi.h>
35 #include <linux/edac.h>
36 #include <linux/mmzone.h>
37 #include <linux/smp.h>
38 #include <asm/mce.h>
39 #include <asm/processor.h>
40 #include <asm/div64.h>
41
42 #include "edac_core.h"
43
44 /* Static vars */
45 static LIST_HEAD(i7core_edac_list);
46 static DEFINE_MUTEX(i7core_edac_lock);
47 static int probed;
48
49 static int use_pci_fixup;
50 module_param(use_pci_fixup, int, 0444);
51 MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
52 /*
53  * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
54  * registers start at bus 255, and are not reported by BIOS.
55  * We currently find devices with only 2 sockets. In order to support more QPI
56  * Quick Path Interconnect, just increment this number.
57  */
58 #define MAX_SOCKET_BUSES        2
59
60
61 /*
62  * Alter this version for the module when modifications are made
63  */
64 #define I7CORE_REVISION    " Ver: 1.0.0"
65 #define EDAC_MOD_STR      "i7core_edac"
66
67 /*
68  * Debug macros
69  */
70 #define i7core_printk(level, fmt, arg...)                       \
71         edac_printk(level, "i7core", fmt, ##arg)
72
73 #define i7core_mc_printk(mci, level, fmt, arg...)               \
74         edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
75
76 /*
77  * i7core Memory Controller Registers
78  */
79
80         /* OFFSETS for Device 0 Function 0 */
81
82 #define MC_CFG_CONTROL  0x90
83   #define MC_CFG_UNLOCK         0x02
84   #define MC_CFG_LOCK           0x00
85
86         /* OFFSETS for Device 3 Function 0 */
87
88 #define MC_CONTROL      0x48
89 #define MC_STATUS       0x4c
90 #define MC_MAX_DOD      0x64
91
92 /*
93  * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
94  * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
95  */
96
97 #define MC_TEST_ERR_RCV1        0x60
98   #define DIMM2_COR_ERR(r)                      ((r) & 0x7fff)
99
100 #define MC_TEST_ERR_RCV0        0x64
101   #define DIMM1_COR_ERR(r)                      (((r) >> 16) & 0x7fff)
102   #define DIMM0_COR_ERR(r)                      ((r) & 0x7fff)
103
104 /* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
105 #define MC_SSRCONTROL           0x48
106   #define SSR_MODE_DISABLE      0x00
107   #define SSR_MODE_ENABLE       0x01
108   #define SSR_MODE_MASK         0x03
109
110 #define MC_SCRUB_CONTROL        0x4c
111   #define STARTSCRUB            (1 << 24)
112   #define SCRUBINTERVAL_MASK    0xffffff
113
114 #define MC_COR_ECC_CNT_0        0x80
115 #define MC_COR_ECC_CNT_1        0x84
116 #define MC_COR_ECC_CNT_2        0x88
117 #define MC_COR_ECC_CNT_3        0x8c
118 #define MC_COR_ECC_CNT_4        0x90
119 #define MC_COR_ECC_CNT_5        0x94
120
121 #define DIMM_TOP_COR_ERR(r)                     (((r) >> 16) & 0x7fff)
122 #define DIMM_BOT_COR_ERR(r)                     ((r) & 0x7fff)
123
124
125         /* OFFSETS for Devices 4,5 and 6 Function 0 */
126
127 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
128   #define THREE_DIMMS_PRESENT           (1 << 24)
129   #define SINGLE_QUAD_RANK_PRESENT      (1 << 23)
130   #define QUAD_RANK_PRESENT             (1 << 22)
131   #define REGISTERED_DIMM               (1 << 15)
132
133 #define MC_CHANNEL_MAPPER       0x60
134   #define RDLCH(r, ch)          ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
135   #define WRLCH(r, ch)          ((((r) >> (ch * 6)) & 0x07) - 1)
136
137 #define MC_CHANNEL_RANK_PRESENT 0x7c
138   #define RANK_PRESENT_MASK             0xffff
139
140 #define MC_CHANNEL_ADDR_MATCH   0xf0
141 #define MC_CHANNEL_ERROR_MASK   0xf8
142 #define MC_CHANNEL_ERROR_INJECT 0xfc
143   #define INJECT_ADDR_PARITY    0x10
144   #define INJECT_ECC            0x08
145   #define MASK_CACHELINE        0x06
146   #define MASK_FULL_CACHELINE   0x06
147   #define MASK_MSB32_CACHELINE  0x04
148   #define MASK_LSB32_CACHELINE  0x02
149   #define NO_MASK_CACHELINE     0x00
150   #define REPEAT_EN             0x01
151
152         /* OFFSETS for Devices 4,5 and 6 Function 1 */
153
154 #define MC_DOD_CH_DIMM0         0x48
155 #define MC_DOD_CH_DIMM1         0x4c
156 #define MC_DOD_CH_DIMM2         0x50
157   #define RANKOFFSET_MASK       ((1 << 12) | (1 << 11) | (1 << 10))
158   #define RANKOFFSET(x)         ((x & RANKOFFSET_MASK) >> 10)
159   #define DIMM_PRESENT_MASK     (1 << 9)
160   #define DIMM_PRESENT(x)       (((x) & DIMM_PRESENT_MASK) >> 9)
161   #define MC_DOD_NUMBANK_MASK           ((1 << 8) | (1 << 7))
162   #define MC_DOD_NUMBANK(x)             (((x) & MC_DOD_NUMBANK_MASK) >> 7)
163   #define MC_DOD_NUMRANK_MASK           ((1 << 6) | (1 << 5))
164   #define MC_DOD_NUMRANK(x)             (((x) & MC_DOD_NUMRANK_MASK) >> 5)
165   #define MC_DOD_NUMROW_MASK            ((1 << 4) | (1 << 3) | (1 << 2))
166   #define MC_DOD_NUMROW(x)              (((x) & MC_DOD_NUMROW_MASK) >> 2)
167   #define MC_DOD_NUMCOL_MASK            3
168   #define MC_DOD_NUMCOL(x)              ((x) & MC_DOD_NUMCOL_MASK)
169
170 #define MC_RANK_PRESENT         0x7c
171
172 #define MC_SAG_CH_0     0x80
173 #define MC_SAG_CH_1     0x84
174 #define MC_SAG_CH_2     0x88
175 #define MC_SAG_CH_3     0x8c
176 #define MC_SAG_CH_4     0x90
177 #define MC_SAG_CH_5     0x94
178 #define MC_SAG_CH_6     0x98
179 #define MC_SAG_CH_7     0x9c
180
181 #define MC_RIR_LIMIT_CH_0       0x40
182 #define MC_RIR_LIMIT_CH_1       0x44
183 #define MC_RIR_LIMIT_CH_2       0x48
184 #define MC_RIR_LIMIT_CH_3       0x4C
185 #define MC_RIR_LIMIT_CH_4       0x50
186 #define MC_RIR_LIMIT_CH_5       0x54
187 #define MC_RIR_LIMIT_CH_6       0x58
188 #define MC_RIR_LIMIT_CH_7       0x5C
189 #define MC_RIR_LIMIT_MASK       ((1 << 10) - 1)
190
191 #define MC_RIR_WAY_CH           0x80
192   #define MC_RIR_WAY_OFFSET_MASK        (((1 << 14) - 1) & ~0x7)
193   #define MC_RIR_WAY_RANK_MASK          0x7
194
195 /*
196  * i7core structs
197  */
198
199 #define NUM_CHANS 3
200 #define MAX_DIMMS 3             /* Max DIMMS per channel */
201 #define MAX_MCR_FUNC  4
202 #define MAX_CHAN_FUNC 3
203
204 struct i7core_info {
205         u32     mc_control;
206         u32     mc_status;
207         u32     max_dod;
208         u32     ch_map;
209 };
210
211
212 struct i7core_inject {
213         int     enable;
214
215         u32     section;
216         u32     type;
217         u32     eccmask;
218
219         /* Error address mask */
220         int channel, dimm, rank, bank, page, col;
221 };
222
223 struct i7core_channel {
224         u32             ranks;
225         u32             dimms;
226 };
227
228 struct pci_id_descr {
229         int                     dev;
230         int                     func;
231         int                     dev_id;
232         int                     optional;
233 };
234
235 struct pci_id_table {
236         const struct pci_id_descr       *descr;
237         int                             n_devs;
238 };
239
240 struct i7core_dev {
241         struct list_head        list;
242         u8                      socket;
243         struct pci_dev          **pdev;
244         int                     n_devs;
245         struct mem_ctl_info     *mci;
246 };
247
248 struct i7core_pvt {
249         struct pci_dev  *pci_noncore;
250         struct pci_dev  *pci_mcr[MAX_MCR_FUNC + 1];
251         struct pci_dev  *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
252
253         struct i7core_dev *i7core_dev;
254
255         struct i7core_info      info;
256         struct i7core_inject    inject;
257         struct i7core_channel   channel[NUM_CHANS];
258
259         int             ce_count_available;
260
261                         /* ECC corrected errors counts per udimm */
262         unsigned long   udimm_ce_count[MAX_DIMMS];
263         int             udimm_last_ce_count[MAX_DIMMS];
264                         /* ECC corrected errors counts per rdimm */
265         unsigned long   rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
266         int             rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
267
268         bool            is_registered, enable_scrub;
269
270         /* Fifo double buffers */
271         struct mce              mce_entry[MCE_LOG_LEN];
272         struct mce              mce_outentry[MCE_LOG_LEN];
273
274         /* Fifo in/out counters */
275         unsigned                mce_in, mce_out;
276
277         /* Count indicator to show errors not got */
278         unsigned                mce_overrun;
279
280         /* DCLK Frequency used for computing scrub rate */
281         int                     dclk_freq;
282
283         /* Struct to control EDAC polling */
284         struct edac_pci_ctl_info *i7core_pci;
285 };
286
287 #define PCI_DESCR(device, function, device_id)  \
288         .dev = (device),                        \
289         .func = (function),                     \
290         .dev_id = (device_id)
291
292 static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
293                 /* Memory controller */
294         { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR)     },
295         { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD)  },
296                         /* Exists only for RDIMM */
297         { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1  },
298         { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
299
300                 /* Channel 0 */
301         { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
302         { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
303         { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
304         { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC)   },
305
306                 /* Channel 1 */
307         { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
308         { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
309         { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
310         { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC)   },
311
312                 /* Channel 2 */
313         { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
314         { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
315         { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
316         { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC)   },
317
318                 /* Generic Non-core registers */
319         /*
320          * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
321          * On Xeon 55xx, however, it has a different id (8086:2c40). So,
322          * the probing code needs to test for the other address in case of
323          * failure of this one
324          */
325         { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE)  },
326
327 };
328
329 static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
330         { PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR)         },
331         { PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD)      },
332         { PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST)     },
333
334         { PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
335         { PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
336         { PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
337         { PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC)   },
338
339         { PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
340         { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
341         { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
342         { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC)   },
343
344         /*
345          * This is the PCI device has an alternate address on some
346          * processors like Core i7 860
347          */
348         { PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE)     },
349 };
350
351 static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
352                 /* Memory controller */
353         { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR_REV2)     },
354         { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD_REV2)  },
355                         /* Exists only for RDIMM */
356         { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2), .optional = 1  },
357         { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST_REV2) },
358
359                 /* Channel 0 */
360         { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL_REV2) },
361         { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR_REV2) },
362         { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK_REV2) },
363         { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC_REV2)   },
364
365                 /* Channel 1 */
366         { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL_REV2) },
367         { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR_REV2) },
368         { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK_REV2) },
369         { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC_REV2)   },
370
371                 /* Channel 2 */
372         { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_CTRL_REV2) },
373         { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
374         { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
375         { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2)   },
376
377                 /* Generic Non-core registers */
378         { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2)  },
379
380 };
381
382 #define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
383 static const struct pci_id_table pci_dev_table[] = {
384         PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem),
385         PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield),
386         PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere),
387         {0,}                    /* 0 terminated list. */
388 };
389
390 /*
391  *      pci_device_id   table for which devices we are looking for
392  */
393 static DEFINE_PCI_DEVICE_TABLE(i7core_pci_tbl) = {
394         {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
395         {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
396         {0,}                    /* 0 terminated list. */
397 };
398
399 /****************************************************************************
400                         Anciliary status routines
401  ****************************************************************************/
402
403         /* MC_CONTROL bits */
404 #define CH_ACTIVE(pvt, ch)      ((pvt)->info.mc_control & (1 << (8 + ch)))
405 #define ECCx8(pvt)              ((pvt)->info.mc_control & (1 << 1))
406
407         /* MC_STATUS bits */
408 #define ECC_ENABLED(pvt)        ((pvt)->info.mc_status & (1 << 4))
409 #define CH_DISABLED(pvt, ch)    ((pvt)->info.mc_status & (1 << ch))
410
411         /* MC_MAX_DOD read functions */
412 static inline int numdimms(u32 dimms)
413 {
414         return (dimms & 0x3) + 1;
415 }
416
417 static inline int numrank(u32 rank)
418 {
419         static int ranks[4] = { 1, 2, 4, -EINVAL };
420
421         return ranks[rank & 0x3];
422 }
423
424 static inline int numbank(u32 bank)
425 {
426         static int banks[4] = { 4, 8, 16, -EINVAL };
427
428         return banks[bank & 0x3];
429 }
430
431 static inline int numrow(u32 row)
432 {
433         static int rows[8] = {
434                 1 << 12, 1 << 13, 1 << 14, 1 << 15,
435                 1 << 16, -EINVAL, -EINVAL, -EINVAL,
436         };
437
438         return rows[row & 0x7];
439 }
440
441 static inline int numcol(u32 col)
442 {
443         static int cols[8] = {
444                 1 << 10, 1 << 11, 1 << 12, -EINVAL,
445         };
446         return cols[col & 0x3];
447 }
448
449 static struct i7core_dev *get_i7core_dev(u8 socket)
450 {
451         struct i7core_dev *i7core_dev;
452
453         list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
454                 if (i7core_dev->socket == socket)
455                         return i7core_dev;
456         }
457
458         return NULL;
459 }
460
461 static struct i7core_dev *alloc_i7core_dev(u8 socket,
462                                            const struct pci_id_table *table)
463 {
464         struct i7core_dev *i7core_dev;
465
466         i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
467         if (!i7core_dev)
468                 return NULL;
469
470         i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * table->n_devs,
471                                    GFP_KERNEL);
472         if (!i7core_dev->pdev) {
473                 kfree(i7core_dev);
474                 return NULL;
475         }
476
477         i7core_dev->socket = socket;
478         i7core_dev->n_devs = table->n_devs;
479         list_add_tail(&i7core_dev->list, &i7core_edac_list);
480
481         return i7core_dev;
482 }
483
484 static void free_i7core_dev(struct i7core_dev *i7core_dev)
485 {
486         list_del(&i7core_dev->list);
487         kfree(i7core_dev->pdev);
488         kfree(i7core_dev);
489 }
490
491 /****************************************************************************
492                         Memory check routines
493  ****************************************************************************/
494
495 static int get_dimm_config(struct mem_ctl_info *mci)
496 {
497         struct i7core_pvt *pvt = mci->pvt_info;
498         struct pci_dev *pdev;
499         int i, j;
500         enum edac_type mode;
501         enum mem_type mtype;
502         struct dimm_info *dimm;
503
504         /* Get data from the MC register, function 0 */
505         pdev = pvt->pci_mcr[0];
506         if (!pdev)
507                 return -ENODEV;
508
509         /* Device 3 function 0 reads */
510         pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
511         pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
512         pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
513         pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
514
515         debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
516                 pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
517                 pvt->info.max_dod, pvt->info.ch_map);
518
519         if (ECC_ENABLED(pvt)) {
520                 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
521                 if (ECCx8(pvt))
522                         mode = EDAC_S8ECD8ED;
523                 else
524                         mode = EDAC_S4ECD4ED;
525         } else {
526                 debugf0("ECC disabled\n");
527                 mode = EDAC_NONE;
528         }
529
530         /* FIXME: need to handle the error codes */
531         debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
532                 "x%x x 0x%x\n",
533                 numdimms(pvt->info.max_dod),
534                 numrank(pvt->info.max_dod >> 2),
535                 numbank(pvt->info.max_dod >> 4),
536                 numrow(pvt->info.max_dod >> 6),
537                 numcol(pvt->info.max_dod >> 9));
538
539         for (i = 0; i < NUM_CHANS; i++) {
540                 u32 data, dimm_dod[3], value[8];
541
542                 if (!pvt->pci_ch[i][0])
543                         continue;
544
545                 if (!CH_ACTIVE(pvt, i)) {
546                         debugf0("Channel %i is not active\n", i);
547                         continue;
548                 }
549                 if (CH_DISABLED(pvt, i)) {
550                         debugf0("Channel %i is disabled\n", i);
551                         continue;
552                 }
553
554                 /* Devices 4-6 function 0 */
555                 pci_read_config_dword(pvt->pci_ch[i][0],
556                                 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
557
558                 pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
559                                                 4 : 2;
560
561                 if (data & REGISTERED_DIMM)
562                         mtype = MEM_RDDR3;
563                 else
564                         mtype = MEM_DDR3;
565 #if 0
566                 if (data & THREE_DIMMS_PRESENT)
567                         pvt->channel[i].dimms = 3;
568                 else if (data & SINGLE_QUAD_RANK_PRESENT)
569                         pvt->channel[i].dimms = 1;
570                 else
571                         pvt->channel[i].dimms = 2;
572 #endif
573
574                 /* Devices 4-6 function 1 */
575                 pci_read_config_dword(pvt->pci_ch[i][1],
576                                 MC_DOD_CH_DIMM0, &dimm_dod[0]);
577                 pci_read_config_dword(pvt->pci_ch[i][1],
578                                 MC_DOD_CH_DIMM1, &dimm_dod[1]);
579                 pci_read_config_dword(pvt->pci_ch[i][1],
580                                 MC_DOD_CH_DIMM2, &dimm_dod[2]);
581
582                 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
583                         "%d ranks, %cDIMMs\n",
584                         i,
585                         RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
586                         data,
587                         pvt->channel[i].ranks,
588                         (data & REGISTERED_DIMM) ? 'R' : 'U');
589
590                 for (j = 0; j < 3; j++) {
591                         u32 banks, ranks, rows, cols;
592                         u32 size, npages;
593
594                         if (!DIMM_PRESENT(dimm_dod[j]))
595                                 continue;
596
597                         dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
598                                        i, j, 0);
599                         banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
600                         ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
601                         rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
602                         cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
603
604                         /* DDR3 has 8 I/O banks */
605                         size = (rows * cols * banks * ranks) >> (20 - 3);
606
607                         debugf0("\tdimm %d %d Mb offset: %x, "
608                                 "bank: %d, rank: %d, row: %#x, col: %#x\n",
609                                 j, size,
610                                 RANKOFFSET(dimm_dod[j]),
611                                 banks, ranks, rows, cols);
612
613                         npages = MiB_TO_PAGES(size);
614
615                         dimm->nr_pages = npages;
616
617                         switch (banks) {
618                         case 4:
619                                 dimm->dtype = DEV_X4;
620                                 break;
621                         case 8:
622                                 dimm->dtype = DEV_X8;
623                                 break;
624                         case 16:
625                                 dimm->dtype = DEV_X16;
626                                 break;
627                         default:
628                                 dimm->dtype = DEV_UNKNOWN;
629                         }
630
631                         snprintf(dimm->label, sizeof(dimm->label),
632                                  "CPU#%uChannel#%u_DIMM#%u",
633                                  pvt->i7core_dev->socket, i, j);
634                         dimm->grain = 8;
635                         dimm->edac_mode = mode;
636                         dimm->mtype = mtype;
637                 }
638
639                 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
640                 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
641                 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
642                 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
643                 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
644                 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
645                 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
646                 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
647                 debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
648                 for (j = 0; j < 8; j++)
649                         debugf1("\t\t%#x\t%#x\t%#x\n",
650                                 (value[j] >> 27) & 0x1,
651                                 (value[j] >> 24) & 0x7,
652                                 (value[j] & ((1 << 24) - 1)));
653         }
654
655         return 0;
656 }
657
658 /****************************************************************************
659                         Error insertion routines
660  ****************************************************************************/
661
662 /* The i7core has independent error injection features per channel.
663    However, to have a simpler code, we don't allow enabling error injection
664    on more than one channel.
665    Also, since a change at an inject parameter will be applied only at enable,
666    we're disabling error injection on all write calls to the sysfs nodes that
667    controls the error code injection.
668  */
669 static int disable_inject(const struct mem_ctl_info *mci)
670 {
671         struct i7core_pvt *pvt = mci->pvt_info;
672
673         pvt->inject.enable = 0;
674
675         if (!pvt->pci_ch[pvt->inject.channel][0])
676                 return -ENODEV;
677
678         pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
679                                 MC_CHANNEL_ERROR_INJECT, 0);
680
681         return 0;
682 }
683
684 /*
685  * i7core inject inject.section
686  *
687  *      accept and store error injection inject.section value
688  *      bit 0 - refers to the lower 32-byte half cacheline
689  *      bit 1 - refers to the upper 32-byte half cacheline
690  */
691 static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
692                                            const char *data, size_t count)
693 {
694         struct i7core_pvt *pvt = mci->pvt_info;
695         unsigned long value;
696         int rc;
697
698         if (pvt->inject.enable)
699                 disable_inject(mci);
700
701         rc = strict_strtoul(data, 10, &value);
702         if ((rc < 0) || (value > 3))
703                 return -EIO;
704
705         pvt->inject.section = (u32) value;
706         return count;
707 }
708
709 static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
710                                               char *data)
711 {
712         struct i7core_pvt *pvt = mci->pvt_info;
713         return sprintf(data, "0x%08x\n", pvt->inject.section);
714 }
715
716 /*
717  * i7core inject.type
718  *
719  *      accept and store error injection inject.section value
720  *      bit 0 - repeat enable - Enable error repetition
721  *      bit 1 - inject ECC error
722  *      bit 2 - inject parity error
723  */
724 static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
725                                         const char *data, size_t count)
726 {
727         struct i7core_pvt *pvt = mci->pvt_info;
728         unsigned long value;
729         int rc;
730
731         if (pvt->inject.enable)
732                 disable_inject(mci);
733
734         rc = strict_strtoul(data, 10, &value);
735         if ((rc < 0) || (value > 7))
736                 return -EIO;
737
738         pvt->inject.type = (u32) value;
739         return count;
740 }
741
742 static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
743                                               char *data)
744 {
745         struct i7core_pvt *pvt = mci->pvt_info;
746         return sprintf(data, "0x%08x\n", pvt->inject.type);
747 }
748
749 /*
750  * i7core_inject_inject.eccmask_store
751  *
752  * The type of error (UE/CE) will depend on the inject.eccmask value:
753  *   Any bits set to a 1 will flip the corresponding ECC bit
754  *   Correctable errors can be injected by flipping 1 bit or the bits within
755  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
756  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
757  *   uncorrectable error to be injected.
758  */
759 static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
760                                         const char *data, size_t count)
761 {
762         struct i7core_pvt *pvt = mci->pvt_info;
763         unsigned long value;
764         int rc;
765
766         if (pvt->inject.enable)
767                 disable_inject(mci);
768
769         rc = strict_strtoul(data, 10, &value);
770         if (rc < 0)
771                 return -EIO;
772
773         pvt->inject.eccmask = (u32) value;
774         return count;
775 }
776
777 static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
778                                               char *data)
779 {
780         struct i7core_pvt *pvt = mci->pvt_info;
781         return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
782 }
783
784 /*
785  * i7core_addrmatch
786  *
787  * The type of error (UE/CE) will depend on the inject.eccmask value:
788  *   Any bits set to a 1 will flip the corresponding ECC bit
789  *   Correctable errors can be injected by flipping 1 bit or the bits within
790  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
791  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
792  *   uncorrectable error to be injected.
793  */
794
795 #define DECLARE_ADDR_MATCH(param, limit)                        \
796 static ssize_t i7core_inject_store_##param(                     \
797                 struct mem_ctl_info *mci,                       \
798                 const char *data, size_t count)                 \
799 {                                                               \
800         struct i7core_pvt *pvt;                                 \
801         long value;                                             \
802         int rc;                                                 \
803                                                                 \
804         debugf1("%s()\n", __func__);                            \
805         pvt = mci->pvt_info;                                    \
806                                                                 \
807         if (pvt->inject.enable)                                 \
808                 disable_inject(mci);                            \
809                                                                 \
810         if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
811                 value = -1;                                     \
812         else {                                                  \
813                 rc = strict_strtoul(data, 10, &value);          \
814                 if ((rc < 0) || (value >= limit))               \
815                         return -EIO;                            \
816         }                                                       \
817                                                                 \
818         pvt->inject.param = value;                              \
819                                                                 \
820         return count;                                           \
821 }                                                               \
822                                                                 \
823 static ssize_t i7core_inject_show_##param(                      \
824                 struct mem_ctl_info *mci,                       \
825                 char *data)                                     \
826 {                                                               \
827         struct i7core_pvt *pvt;                                 \
828                                                                 \
829         pvt = mci->pvt_info;                                    \
830         debugf1("%s() pvt=%p\n", __func__, pvt);                \
831         if (pvt->inject.param < 0)                              \
832                 return sprintf(data, "any\n");                  \
833         else                                                    \
834                 return sprintf(data, "%d\n", pvt->inject.param);\
835 }
836
837 #define ATTR_ADDR_MATCH(param)                                  \
838         {                                                       \
839                 .attr = {                                       \
840                         .name = #param,                         \
841                         .mode = (S_IRUGO | S_IWUSR)             \
842                 },                                              \
843                 .show  = i7core_inject_show_##param,            \
844                 .store = i7core_inject_store_##param,           \
845         }
846
847 DECLARE_ADDR_MATCH(channel, 3);
848 DECLARE_ADDR_MATCH(dimm, 3);
849 DECLARE_ADDR_MATCH(rank, 4);
850 DECLARE_ADDR_MATCH(bank, 32);
851 DECLARE_ADDR_MATCH(page, 0x10000);
852 DECLARE_ADDR_MATCH(col, 0x4000);
853
854 static int write_and_test(struct pci_dev *dev, const int where, const u32 val)
855 {
856         u32 read;
857         int count;
858
859         debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
860                 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
861                 where, val);
862
863         for (count = 0; count < 10; count++) {
864                 if (count)
865                         msleep(100);
866                 pci_write_config_dword(dev, where, val);
867                 pci_read_config_dword(dev, where, &read);
868
869                 if (read == val)
870                         return 0;
871         }
872
873         i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
874                 "write=%08x. Read=%08x\n",
875                 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
876                 where, val, read);
877
878         return -EINVAL;
879 }
880
881 /*
882  * This routine prepares the Memory Controller for error injection.
883  * The error will be injected when some process tries to write to the
884  * memory that matches the given criteria.
885  * The criteria can be set in terms of a mask where dimm, rank, bank, page
886  * and col can be specified.
887  * A -1 value for any of the mask items will make the MCU to ignore
888  * that matching criteria for error injection.
889  *
890  * It should be noticed that the error will only happen after a write operation
891  * on a memory that matches the condition. if REPEAT_EN is not enabled at
892  * inject mask, then it will produce just one error. Otherwise, it will repeat
893  * until the injectmask would be cleaned.
894  *
895  * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
896  *    is reliable enough to check if the MC is using the
897  *    three channels. However, this is not clear at the datasheet.
898  */
899 static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
900                                        const char *data, size_t count)
901 {
902         struct i7core_pvt *pvt = mci->pvt_info;
903         u32 injectmask;
904         u64 mask = 0;
905         int  rc;
906         long enable;
907
908         if (!pvt->pci_ch[pvt->inject.channel][0])
909                 return 0;
910
911         rc = strict_strtoul(data, 10, &enable);
912         if ((rc < 0))
913                 return 0;
914
915         if (enable) {
916                 pvt->inject.enable = 1;
917         } else {
918                 disable_inject(mci);
919                 return count;
920         }
921
922         /* Sets pvt->inject.dimm mask */
923         if (pvt->inject.dimm < 0)
924                 mask |= 1LL << 41;
925         else {
926                 if (pvt->channel[pvt->inject.channel].dimms > 2)
927                         mask |= (pvt->inject.dimm & 0x3LL) << 35;
928                 else
929                         mask |= (pvt->inject.dimm & 0x1LL) << 36;
930         }
931
932         /* Sets pvt->inject.rank mask */
933         if (pvt->inject.rank < 0)
934                 mask |= 1LL << 40;
935         else {
936                 if (pvt->channel[pvt->inject.channel].dimms > 2)
937                         mask |= (pvt->inject.rank & 0x1LL) << 34;
938                 else
939                         mask |= (pvt->inject.rank & 0x3LL) << 34;
940         }
941
942         /* Sets pvt->inject.bank mask */
943         if (pvt->inject.bank < 0)
944                 mask |= 1LL << 39;
945         else
946                 mask |= (pvt->inject.bank & 0x15LL) << 30;
947
948         /* Sets pvt->inject.page mask */
949         if (pvt->inject.page < 0)
950                 mask |= 1LL << 38;
951         else
952                 mask |= (pvt->inject.page & 0xffff) << 14;
953
954         /* Sets pvt->inject.column mask */
955         if (pvt->inject.col < 0)
956                 mask |= 1LL << 37;
957         else
958                 mask |= (pvt->inject.col & 0x3fff);
959
960         /*
961          * bit    0: REPEAT_EN
962          * bits 1-2: MASK_HALF_CACHELINE
963          * bit    3: INJECT_ECC
964          * bit    4: INJECT_ADDR_PARITY
965          */
966
967         injectmask = (pvt->inject.type & 1) |
968                      (pvt->inject.section & 0x3) << 1 |
969                      (pvt->inject.type & 0x6) << (3 - 1);
970
971         /* Unlock writes to registers - this register is write only */
972         pci_write_config_dword(pvt->pci_noncore,
973                                MC_CFG_CONTROL, 0x2);
974
975         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
976                                MC_CHANNEL_ADDR_MATCH, mask);
977         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
978                                MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
979
980         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
981                                MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
982
983         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
984                                MC_CHANNEL_ERROR_INJECT, injectmask);
985
986         /*
987          * This is something undocumented, based on my tests
988          * Without writing 8 to this register, errors aren't injected. Not sure
989          * why.
990          */
991         pci_write_config_dword(pvt->pci_noncore,
992                                MC_CFG_CONTROL, 8);
993
994         debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
995                 " inject 0x%08x\n",
996                 mask, pvt->inject.eccmask, injectmask);
997
998
999         return count;
1000 }
1001
1002 static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1003                                         char *data)
1004 {
1005         struct i7core_pvt *pvt = mci->pvt_info;
1006         u32 injectmask;
1007
1008         if (!pvt->pci_ch[pvt->inject.channel][0])
1009                 return 0;
1010
1011         pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
1012                                MC_CHANNEL_ERROR_INJECT, &injectmask);
1013
1014         debugf0("Inject error read: 0x%018x\n", injectmask);
1015
1016         if (injectmask & 0x0c)
1017                 pvt->inject.enable = 1;
1018
1019         return sprintf(data, "%d\n", pvt->inject.enable);
1020 }
1021
1022 #define DECLARE_COUNTER(param)                                  \
1023 static ssize_t i7core_show_counter_##param(                     \
1024                 struct mem_ctl_info *mci,                       \
1025                 char *data)                                     \
1026 {                                                               \
1027         struct i7core_pvt *pvt = mci->pvt_info;                 \
1028                                                                 \
1029         debugf1("%s() \n", __func__);                           \
1030         if (!pvt->ce_count_available || (pvt->is_registered))   \
1031                 return sprintf(data, "data unavailable\n");     \
1032         return sprintf(data, "%lu\n",                           \
1033                         pvt->udimm_ce_count[param]);            \
1034 }
1035
1036 #define ATTR_COUNTER(param)                                     \
1037         {                                                       \
1038                 .attr = {                                       \
1039                         .name = __stringify(udimm##param),      \
1040                         .mode = (S_IRUGO | S_IWUSR)             \
1041                 },                                              \
1042                 .show  = i7core_show_counter_##param            \
1043         }
1044
1045 DECLARE_COUNTER(0);
1046 DECLARE_COUNTER(1);
1047 DECLARE_COUNTER(2);
1048
1049 /*
1050  * Sysfs struct
1051  */
1052
1053 static const struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = {
1054         ATTR_ADDR_MATCH(channel),
1055         ATTR_ADDR_MATCH(dimm),
1056         ATTR_ADDR_MATCH(rank),
1057         ATTR_ADDR_MATCH(bank),
1058         ATTR_ADDR_MATCH(page),
1059         ATTR_ADDR_MATCH(col),
1060         { } /* End of list */
1061 };
1062
1063 static const struct mcidev_sysfs_group i7core_inject_addrmatch = {
1064         .name  = "inject_addrmatch",
1065         .mcidev_attr = i7core_addrmatch_attrs,
1066 };
1067
1068 static const struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = {
1069         ATTR_COUNTER(0),
1070         ATTR_COUNTER(1),
1071         ATTR_COUNTER(2),
1072         { .attr = { .name = NULL } }
1073 };
1074
1075 static const struct mcidev_sysfs_group i7core_udimm_counters = {
1076         .name  = "all_channel_counts",
1077         .mcidev_attr = i7core_udimm_counters_attrs,
1078 };
1079
1080 static const struct mcidev_sysfs_attribute i7core_sysfs_rdimm_attrs[] = {
1081         {
1082                 .attr = {
1083                         .name = "inject_section",
1084                         .mode = (S_IRUGO | S_IWUSR)
1085                 },
1086                 .show  = i7core_inject_section_show,
1087                 .store = i7core_inject_section_store,
1088         }, {
1089                 .attr = {
1090                         .name = "inject_type",
1091                         .mode = (S_IRUGO | S_IWUSR)
1092                 },
1093                 .show  = i7core_inject_type_show,
1094                 .store = i7core_inject_type_store,
1095         }, {
1096                 .attr = {
1097                         .name = "inject_eccmask",
1098                         .mode = (S_IRUGO | S_IWUSR)
1099                 },
1100                 .show  = i7core_inject_eccmask_show,
1101                 .store = i7core_inject_eccmask_store,
1102         }, {
1103                 .grp = &i7core_inject_addrmatch,
1104         }, {
1105                 .attr = {
1106                         .name = "inject_enable",
1107                         .mode = (S_IRUGO | S_IWUSR)
1108                 },
1109                 .show  = i7core_inject_enable_show,
1110                 .store = i7core_inject_enable_store,
1111         },
1112         { }     /* End of list */
1113 };
1114
1115 static const struct mcidev_sysfs_attribute i7core_sysfs_udimm_attrs[] = {
1116         {
1117                 .attr = {
1118                         .name = "inject_section",
1119                         .mode = (S_IRUGO | S_IWUSR)
1120                 },
1121                 .show  = i7core_inject_section_show,
1122                 .store = i7core_inject_section_store,
1123         }, {
1124                 .attr = {
1125                         .name = "inject_type",
1126                         .mode = (S_IRUGO | S_IWUSR)
1127                 },
1128                 .show  = i7core_inject_type_show,
1129                 .store = i7core_inject_type_store,
1130         }, {
1131                 .attr = {
1132                         .name = "inject_eccmask",
1133                         .mode = (S_IRUGO | S_IWUSR)
1134                 },
1135                 .show  = i7core_inject_eccmask_show,
1136                 .store = i7core_inject_eccmask_store,
1137         }, {
1138                 .grp = &i7core_inject_addrmatch,
1139         }, {
1140                 .attr = {
1141                         .name = "inject_enable",
1142                         .mode = (S_IRUGO | S_IWUSR)
1143                 },
1144                 .show  = i7core_inject_enable_show,
1145                 .store = i7core_inject_enable_store,
1146         }, {
1147                 .grp = &i7core_udimm_counters,
1148         },
1149         { }     /* End of list */
1150 };
1151
1152 /****************************************************************************
1153         Device initialization routines: put/get, init/exit
1154  ****************************************************************************/
1155
1156 /*
1157  *      i7core_put_all_devices  'put' all the devices that we have
1158  *                              reserved via 'get'
1159  */
1160 static void i7core_put_devices(struct i7core_dev *i7core_dev)
1161 {
1162         int i;
1163
1164         debugf0(__FILE__ ": %s()\n", __func__);
1165         for (i = 0; i < i7core_dev->n_devs; i++) {
1166                 struct pci_dev *pdev = i7core_dev->pdev[i];
1167                 if (!pdev)
1168                         continue;
1169                 debugf0("Removing dev %02x:%02x.%d\n",
1170                         pdev->bus->number,
1171                         PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1172                 pci_dev_put(pdev);
1173         }
1174 }
1175
1176 static void i7core_put_all_devices(void)
1177 {
1178         struct i7core_dev *i7core_dev, *tmp;
1179
1180         list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
1181                 i7core_put_devices(i7core_dev);
1182                 free_i7core_dev(i7core_dev);
1183         }
1184 }
1185
1186 static void __init i7core_xeon_pci_fixup(const struct pci_id_table *table)
1187 {
1188         struct pci_dev *pdev = NULL;
1189         int i;
1190
1191         /*
1192          * On Xeon 55xx, the Intel Quick Path Arch Generic Non-core pci buses
1193          * aren't announced by acpi. So, we need to use a legacy scan probing
1194          * to detect them
1195          */
1196         while (table && table->descr) {
1197                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, table->descr[0].dev_id, NULL);
1198                 if (unlikely(!pdev)) {
1199                         for (i = 0; i < MAX_SOCKET_BUSES; i++)
1200                                 pcibios_scan_specific_bus(255-i);
1201                 }
1202                 pci_dev_put(pdev);
1203                 table++;
1204         }
1205 }
1206
1207 static unsigned i7core_pci_lastbus(void)
1208 {
1209         int last_bus = 0, bus;
1210         struct pci_bus *b = NULL;
1211
1212         while ((b = pci_find_next_bus(b)) != NULL) {
1213                 bus = b->number;
1214                 debugf0("Found bus %d\n", bus);
1215                 if (bus > last_bus)
1216                         last_bus = bus;
1217         }
1218
1219         debugf0("Last bus %d\n", last_bus);
1220
1221         return last_bus;
1222 }
1223
1224 /*
1225  *      i7core_get_all_devices  Find and perform 'get' operation on the MCH's
1226  *                      device/functions we want to reference for this driver
1227  *
1228  *                      Need to 'get' device 16 func 1 and func 2
1229  */
1230 static int i7core_get_onedevice(struct pci_dev **prev,
1231                                 const struct pci_id_table *table,
1232                                 const unsigned devno,
1233                                 const unsigned last_bus)
1234 {
1235         struct i7core_dev *i7core_dev;
1236         const struct pci_id_descr *dev_descr = &table->descr[devno];
1237
1238         struct pci_dev *pdev = NULL;
1239         u8 bus = 0;
1240         u8 socket = 0;
1241
1242         pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1243                               dev_descr->dev_id, *prev);
1244
1245         /*
1246          * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
1247          * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1248          * to probe for the alternate address in case of failure
1249          */
1250         if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
1251                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1252                                       PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
1253
1254         if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev)
1255                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1256                                       PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
1257                                       *prev);
1258
1259         if (!pdev) {
1260                 if (*prev) {
1261                         *prev = pdev;
1262                         return 0;
1263                 }
1264
1265                 if (dev_descr->optional)
1266                         return 0;
1267
1268                 if (devno == 0)
1269                         return -ENODEV;
1270
1271                 i7core_printk(KERN_INFO,
1272                         "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1273                         dev_descr->dev, dev_descr->func,
1274                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1275
1276                 /* End of list, leave */
1277                 return -ENODEV;
1278         }
1279         bus = pdev->bus->number;
1280
1281         socket = last_bus - bus;
1282
1283         i7core_dev = get_i7core_dev(socket);
1284         if (!i7core_dev) {
1285                 i7core_dev = alloc_i7core_dev(socket, table);
1286                 if (!i7core_dev) {
1287                         pci_dev_put(pdev);
1288                         return -ENOMEM;
1289                 }
1290         }
1291
1292         if (i7core_dev->pdev[devno]) {
1293                 i7core_printk(KERN_ERR,
1294                         "Duplicated device for "
1295                         "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1296                         bus, dev_descr->dev, dev_descr->func,
1297                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1298                 pci_dev_put(pdev);
1299                 return -ENODEV;
1300         }
1301
1302         i7core_dev->pdev[devno] = pdev;
1303
1304         /* Sanity check */
1305         if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1306                         PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1307                 i7core_printk(KERN_ERR,
1308                         "Device PCI ID %04x:%04x "
1309                         "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1310                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1311                         bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1312                         bus, dev_descr->dev, dev_descr->func);
1313                 return -ENODEV;
1314         }
1315
1316         /* Be sure that the device is enabled */
1317         if (unlikely(pci_enable_device(pdev) < 0)) {
1318                 i7core_printk(KERN_ERR,
1319                         "Couldn't enable "
1320                         "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1321                         bus, dev_descr->dev, dev_descr->func,
1322                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1323                 return -ENODEV;
1324         }
1325
1326         debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1327                 socket, bus, dev_descr->dev,
1328                 dev_descr->func,
1329                 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1330
1331         /*
1332          * As stated on drivers/pci/search.c, the reference count for
1333          * @from is always decremented if it is not %NULL. So, as we need
1334          * to get all devices up to null, we need to do a get for the device
1335          */
1336         pci_dev_get(pdev);
1337
1338         *prev = pdev;
1339
1340         return 0;
1341 }
1342
1343 static int i7core_get_all_devices(void)
1344 {
1345         int i, rc, last_bus;
1346         struct pci_dev *pdev = NULL;
1347         const struct pci_id_table *table = pci_dev_table;
1348
1349         last_bus = i7core_pci_lastbus();
1350
1351         while (table && table->descr) {
1352                 for (i = 0; i < table->n_devs; i++) {
1353                         pdev = NULL;
1354                         do {
1355                                 rc = i7core_get_onedevice(&pdev, table, i,
1356                                                           last_bus);
1357                                 if (rc < 0) {
1358                                         if (i == 0) {
1359                                                 i = table->n_devs;
1360                                                 break;
1361                                         }
1362                                         i7core_put_all_devices();
1363                                         return -ENODEV;
1364                                 }
1365                         } while (pdev);
1366                 }
1367                 table++;
1368         }
1369
1370         return 0;
1371 }
1372
1373 static int mci_bind_devs(struct mem_ctl_info *mci,
1374                          struct i7core_dev *i7core_dev)
1375 {
1376         struct i7core_pvt *pvt = mci->pvt_info;
1377         struct pci_dev *pdev;
1378         int i, func, slot;
1379         char *family;
1380
1381         pvt->is_registered = false;
1382         pvt->enable_scrub  = false;
1383         for (i = 0; i < i7core_dev->n_devs; i++) {
1384                 pdev = i7core_dev->pdev[i];
1385                 if (!pdev)
1386                         continue;
1387
1388                 func = PCI_FUNC(pdev->devfn);
1389                 slot = PCI_SLOT(pdev->devfn);
1390                 if (slot == 3) {
1391                         if (unlikely(func > MAX_MCR_FUNC))
1392                                 goto error;
1393                         pvt->pci_mcr[func] = pdev;
1394                 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1395                         if (unlikely(func > MAX_CHAN_FUNC))
1396                                 goto error;
1397                         pvt->pci_ch[slot - 4][func] = pdev;
1398                 } else if (!slot && !func) {
1399                         pvt->pci_noncore = pdev;
1400
1401                         /* Detect the processor family */
1402                         switch (pdev->device) {
1403                         case PCI_DEVICE_ID_INTEL_I7_NONCORE:
1404                                 family = "Xeon 35xx/ i7core";
1405                                 pvt->enable_scrub = false;
1406                                 break;
1407                         case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT:
1408                                 family = "i7-800/i5-700";
1409                                 pvt->enable_scrub = false;
1410                                 break;
1411                         case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE:
1412                                 family = "Xeon 34xx";
1413                                 pvt->enable_scrub = false;
1414                                 break;
1415                         case PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT:
1416                                 family = "Xeon 55xx";
1417                                 pvt->enable_scrub = true;
1418                                 break;
1419                         case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2:
1420                                 family = "Xeon 56xx / i7-900";
1421                                 pvt->enable_scrub = true;
1422                                 break;
1423                         default:
1424                                 family = "unknown";
1425                                 pvt->enable_scrub = false;
1426                         }
1427                         debugf0("Detected a processor type %s\n", family);
1428                 } else
1429                         goto error;
1430
1431                 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1432                         PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1433                         pdev, i7core_dev->socket);
1434
1435                 if (PCI_SLOT(pdev->devfn) == 3 &&
1436                         PCI_FUNC(pdev->devfn) == 2)
1437                         pvt->is_registered = true;
1438         }
1439
1440         return 0;
1441
1442 error:
1443         i7core_printk(KERN_ERR, "Device %d, function %d "
1444                       "is out of the expected range\n",
1445                       slot, func);
1446         return -EINVAL;
1447 }
1448
1449 /****************************************************************************
1450                         Error check routines
1451  ****************************************************************************/
1452 static void i7core_rdimm_update_errcount(struct mem_ctl_info *mci,
1453                                       const int chan,
1454                                       const int dimm,
1455                                       const int add)
1456 {
1457         int i;
1458
1459         for (i = 0; i < add; i++) {
1460                 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 0, 0, 0,
1461                                      chan, dimm, -1, "error", "", NULL);
1462         }
1463 }
1464
1465 static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1466                                          const int chan,
1467                                          const int new0,
1468                                          const int new1,
1469                                          const int new2)
1470 {
1471         struct i7core_pvt *pvt = mci->pvt_info;
1472         int add0 = 0, add1 = 0, add2 = 0;
1473         /* Updates CE counters if it is not the first time here */
1474         if (pvt->ce_count_available) {
1475                 /* Updates CE counters */
1476
1477                 add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1478                 add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1479                 add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
1480
1481                 if (add2 < 0)
1482                         add2 += 0x7fff;
1483                 pvt->rdimm_ce_count[chan][2] += add2;
1484
1485                 if (add1 < 0)
1486                         add1 += 0x7fff;
1487                 pvt->rdimm_ce_count[chan][1] += add1;
1488
1489                 if (add0 < 0)
1490                         add0 += 0x7fff;
1491                 pvt->rdimm_ce_count[chan][0] += add0;
1492         } else
1493                 pvt->ce_count_available = 1;
1494
1495         /* Store the new values */
1496         pvt->rdimm_last_ce_count[chan][2] = new2;
1497         pvt->rdimm_last_ce_count[chan][1] = new1;
1498         pvt->rdimm_last_ce_count[chan][0] = new0;
1499
1500         /*updated the edac core */
1501         if (add0 != 0)
1502                 i7core_rdimm_update_errcount(mci, chan, 0, add0);
1503         if (add1 != 0)
1504                 i7core_rdimm_update_errcount(mci, chan, 1, add1);
1505         if (add2 != 0)
1506                 i7core_rdimm_update_errcount(mci, chan, 2, add2);
1507
1508 }
1509
1510 static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1511 {
1512         struct i7core_pvt *pvt = mci->pvt_info;
1513         u32 rcv[3][2];
1514         int i, new0, new1, new2;
1515
1516         /*Read DEV 3: FUN 2:  MC_COR_ECC_CNT regs directly*/
1517         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
1518                                                                 &rcv[0][0]);
1519         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
1520                                                                 &rcv[0][1]);
1521         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
1522                                                                 &rcv[1][0]);
1523         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
1524                                                                 &rcv[1][1]);
1525         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
1526                                                                 &rcv[2][0]);
1527         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
1528                                                                 &rcv[2][1]);
1529         for (i = 0 ; i < 3; i++) {
1530                 debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1531                         (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1532                 /*if the channel has 3 dimms*/
1533                 if (pvt->channel[i].dimms > 2) {
1534                         new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1535                         new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1536                         new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1537                 } else {
1538                         new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1539                                         DIMM_BOT_COR_ERR(rcv[i][0]);
1540                         new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1541                                         DIMM_BOT_COR_ERR(rcv[i][1]);
1542                         new2 = 0;
1543                 }
1544
1545                 i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
1546         }
1547 }
1548
1549 /* This function is based on the device 3 function 4 registers as described on:
1550  * Intel Xeon Processor 5500 Series Datasheet Volume 2
1551  *      http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1552  * also available at:
1553  *      http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1554  */
1555 static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1556 {
1557         struct i7core_pvt *pvt = mci->pvt_info;
1558         u32 rcv1, rcv0;
1559         int new0, new1, new2;
1560
1561         if (!pvt->pci_mcr[4]) {
1562                 debugf0("%s MCR registers not found\n", __func__);
1563                 return;
1564         }
1565
1566         /* Corrected test errors */
1567         pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1568         pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
1569
1570         /* Store the new values */
1571         new2 = DIMM2_COR_ERR(rcv1);
1572         new1 = DIMM1_COR_ERR(rcv0);
1573         new0 = DIMM0_COR_ERR(rcv0);
1574
1575         /* Updates CE counters if it is not the first time here */
1576         if (pvt->ce_count_available) {
1577                 /* Updates CE counters */
1578                 int add0, add1, add2;
1579
1580                 add2 = new2 - pvt->udimm_last_ce_count[2];
1581                 add1 = new1 - pvt->udimm_last_ce_count[1];
1582                 add0 = new0 - pvt->udimm_last_ce_count[0];
1583
1584                 if (add2 < 0)
1585                         add2 += 0x7fff;
1586                 pvt->udimm_ce_count[2] += add2;
1587
1588                 if (add1 < 0)
1589                         add1 += 0x7fff;
1590                 pvt->udimm_ce_count[1] += add1;
1591
1592                 if (add0 < 0)
1593                         add0 += 0x7fff;
1594                 pvt->udimm_ce_count[0] += add0;
1595
1596                 if (add0 | add1 | add2)
1597                         i7core_printk(KERN_ERR, "New Corrected error(s): "
1598                                       "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1599                                       add0, add1, add2);
1600         } else
1601                 pvt->ce_count_available = 1;
1602
1603         /* Store the new values */
1604         pvt->udimm_last_ce_count[2] = new2;
1605         pvt->udimm_last_ce_count[1] = new1;
1606         pvt->udimm_last_ce_count[0] = new0;
1607 }
1608
1609 /*
1610  * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1611  * Architectures Software Developer’s Manual Volume 3B.
1612  * Nehalem are defined as family 0x06, model 0x1a
1613  *
1614  * The MCA registers used here are the following ones:
1615  *     struct mce field MCA Register
1616  *     m->status        MSR_IA32_MC8_STATUS
1617  *     m->addr          MSR_IA32_MC8_ADDR
1618  *     m->misc          MSR_IA32_MC8_MISC
1619  * In the case of Nehalem, the error information is masked at .status and .misc
1620  * fields
1621  */
1622 static void i7core_mce_output_error(struct mem_ctl_info *mci,
1623                                     const struct mce *m)
1624 {
1625         struct i7core_pvt *pvt = mci->pvt_info;
1626         char *type, *optype, *err, msg[80];
1627         enum hw_event_mc_err_type tp_event;
1628         unsigned long error = m->status & 0x1ff0000l;
1629         bool uncorrected_error = m->mcgstatus & 1ll << 61;
1630         bool ripv = m->mcgstatus & 1;
1631         u32 optypenum = (m->status >> 4) & 0x07;
1632         u32 core_err_cnt = (m->status >> 38) & 0x7fff;
1633         u32 dimm = (m->misc >> 16) & 0x3;
1634         u32 channel = (m->misc >> 18) & 0x3;
1635         u32 syndrome = m->misc >> 32;
1636         u32 errnum = find_first_bit(&error, 32);
1637
1638         if (uncorrected_error) {
1639                 if (ripv) {
1640                         type = "FATAL";
1641                         tp_event = HW_EVENT_ERR_FATAL;
1642                 } else {
1643                         type = "NON_FATAL";
1644                         tp_event = HW_EVENT_ERR_UNCORRECTED;
1645                 }
1646         } else {
1647                 type = "CORRECTED";
1648                 tp_event = HW_EVENT_ERR_CORRECTED;
1649         }
1650
1651         switch (optypenum) {
1652         case 0:
1653                 optype = "generic undef request";
1654                 break;
1655         case 1:
1656                 optype = "read error";
1657                 break;
1658         case 2:
1659                 optype = "write error";
1660                 break;
1661         case 3:
1662                 optype = "addr/cmd error";
1663                 break;
1664         case 4:
1665                 optype = "scrubbing error";
1666                 break;
1667         default:
1668                 optype = "reserved";
1669                 break;
1670         }
1671
1672         switch (errnum) {
1673         case 16:
1674                 err = "read ECC error";
1675                 break;
1676         case 17:
1677                 err = "RAS ECC error";
1678                 break;
1679         case 18:
1680                 err = "write parity error";
1681                 break;
1682         case 19:
1683                 err = "redundacy loss";
1684                 break;
1685         case 20:
1686                 err = "reserved";
1687                 break;
1688         case 21:
1689                 err = "memory range error";
1690                 break;
1691         case 22:
1692                 err = "RTID out of range";
1693                 break;
1694         case 23:
1695                 err = "address parity error";
1696                 break;
1697         case 24:
1698                 err = "byte enable parity error";
1699                 break;
1700         default:
1701                 err = "unknown";
1702         }
1703
1704         snprintf(msg, sizeof(msg), "count=%d %s", core_err_cnt, optype);
1705
1706         /*
1707          * Call the helper to output message
1708          * FIXME: what to do if core_err_cnt > 1? Currently, it generates
1709          * only one event
1710          */
1711         if (uncorrected_error || !pvt->is_registered)
1712                 edac_mc_handle_error(tp_event, mci,
1713                                      m->addr >> PAGE_SHIFT,
1714                                      m->addr & ~PAGE_MASK,
1715                                      syndrome,
1716                                      channel, dimm, -1,
1717                                      err, msg, m);
1718 }
1719
1720 /*
1721  *      i7core_check_error      Retrieve and process errors reported by the
1722  *                              hardware. Called by the Core module.
1723  */
1724 static void i7core_check_error(struct mem_ctl_info *mci)
1725 {
1726         struct i7core_pvt *pvt = mci->pvt_info;
1727         int i;
1728         unsigned count = 0;
1729         struct mce *m;
1730
1731         /*
1732          * MCE first step: Copy all mce errors into a temporary buffer
1733          * We use a double buffering here, to reduce the risk of
1734          * losing an error.
1735          */
1736         smp_rmb();
1737         count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1738                 % MCE_LOG_LEN;
1739         if (!count)
1740                 goto check_ce_error;
1741
1742         m = pvt->mce_outentry;
1743         if (pvt->mce_in + count > MCE_LOG_LEN) {
1744                 unsigned l = MCE_LOG_LEN - pvt->mce_in;
1745
1746                 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1747                 smp_wmb();
1748                 pvt->mce_in = 0;
1749                 count -= l;
1750                 m += l;
1751         }
1752         memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1753         smp_wmb();
1754         pvt->mce_in += count;
1755
1756         smp_rmb();
1757         if (pvt->mce_overrun) {
1758                 i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1759                               pvt->mce_overrun);
1760                 smp_wmb();
1761                 pvt->mce_overrun = 0;
1762         }
1763
1764         /*
1765          * MCE second step: parse errors and display
1766          */
1767         for (i = 0; i < count; i++)
1768                 i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
1769
1770         /*
1771          * Now, let's increment CE error counts
1772          */
1773 check_ce_error:
1774         if (!pvt->is_registered)
1775                 i7core_udimm_check_mc_ecc_err(mci);
1776         else
1777                 i7core_rdimm_check_mc_ecc_err(mci);
1778 }
1779
1780 /*
1781  * i7core_mce_check_error       Replicates mcelog routine to get errors
1782  *                              This routine simply queues mcelog errors, and
1783  *                              return. The error itself should be handled later
1784  *                              by i7core_check_error.
1785  * WARNING: As this routine should be called at NMI time, extra care should
1786  * be taken to avoid deadlocks, and to be as fast as possible.
1787  */
1788 static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
1789                                   void *data)
1790 {
1791         struct mce *mce = (struct mce *)data;
1792         struct i7core_dev *i7_dev;
1793         struct mem_ctl_info *mci;
1794         struct i7core_pvt *pvt;
1795
1796         i7_dev = get_i7core_dev(mce->socketid);
1797         if (!i7_dev)
1798                 return NOTIFY_BAD;
1799
1800         mci = i7_dev->mci;
1801         pvt = mci->pvt_info;
1802
1803         /*
1804          * Just let mcelog handle it if the error is
1805          * outside the memory controller
1806          */
1807         if (((mce->status & 0xffff) >> 7) != 1)
1808                 return NOTIFY_DONE;
1809
1810         /* Bank 8 registers are the only ones that we know how to handle */
1811         if (mce->bank != 8)
1812                 return NOTIFY_DONE;
1813
1814 #ifdef CONFIG_SMP
1815         /* Only handle if it is the right mc controller */
1816         if (mce->socketid != pvt->i7core_dev->socket)
1817                 return NOTIFY_DONE;
1818 #endif
1819
1820         smp_rmb();
1821         if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1822                 smp_wmb();
1823                 pvt->mce_overrun++;
1824                 return NOTIFY_DONE;
1825         }
1826
1827         /* Copy memory error at the ringbuffer */
1828         memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1829         smp_wmb();
1830         pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1831
1832         /* Handle fatal errors immediately */
1833         if (mce->mcgstatus & 1)
1834                 i7core_check_error(mci);
1835
1836         /* Advise mcelog that the errors were handled */
1837         return NOTIFY_STOP;
1838 }
1839
1840 static struct notifier_block i7_mce_dec = {
1841         .notifier_call  = i7core_mce_check_error,
1842 };
1843
1844 struct memdev_dmi_entry {
1845         u8 type;
1846         u8 length;
1847         u16 handle;
1848         u16 phys_mem_array_handle;
1849         u16 mem_err_info_handle;
1850         u16 total_width;
1851         u16 data_width;
1852         u16 size;
1853         u8 form;
1854         u8 device_set;
1855         u8 device_locator;
1856         u8 bank_locator;
1857         u8 memory_type;
1858         u16 type_detail;
1859         u16 speed;
1860         u8 manufacturer;
1861         u8 serial_number;
1862         u8 asset_tag;
1863         u8 part_number;
1864         u8 attributes;
1865         u32 extended_size;
1866         u16 conf_mem_clk_speed;
1867 } __attribute__((__packed__));
1868
1869
1870 /*
1871  * Decode the DRAM Clock Frequency, be paranoid, make sure that all
1872  * memory devices show the same speed, and if they don't then consider
1873  * all speeds to be invalid.
1874  */
1875 static void decode_dclk(const struct dmi_header *dh, void *_dclk_freq)
1876 {
1877         int *dclk_freq = _dclk_freq;
1878         u16 dmi_mem_clk_speed;
1879
1880         if (*dclk_freq == -1)
1881                 return;
1882
1883         if (dh->type == DMI_ENTRY_MEM_DEVICE) {
1884                 struct memdev_dmi_entry *memdev_dmi_entry =
1885                         (struct memdev_dmi_entry *)dh;
1886                 unsigned long conf_mem_clk_speed_offset =
1887                         (unsigned long)&memdev_dmi_entry->conf_mem_clk_speed -
1888                         (unsigned long)&memdev_dmi_entry->type;
1889                 unsigned long speed_offset =
1890                         (unsigned long)&memdev_dmi_entry->speed -
1891                         (unsigned long)&memdev_dmi_entry->type;
1892
1893                 /* Check that a DIMM is present */
1894                 if (memdev_dmi_entry->size == 0)
1895                         return;
1896
1897                 /*
1898                  * Pick the configured speed if it's available, otherwise
1899                  * pick the DIMM speed, or we don't have a speed.
1900                  */
1901                 if (memdev_dmi_entry->length > conf_mem_clk_speed_offset) {
1902                         dmi_mem_clk_speed =
1903                                 memdev_dmi_entry->conf_mem_clk_speed;
1904                 } else if (memdev_dmi_entry->length > speed_offset) {
1905                         dmi_mem_clk_speed = memdev_dmi_entry->speed;
1906                 } else {
1907                         *dclk_freq = -1;
1908                         return;
1909                 }
1910
1911                 if (*dclk_freq == 0) {
1912                         /* First pass, speed was 0 */
1913                         if (dmi_mem_clk_speed > 0) {
1914                                 /* Set speed if a valid speed is read */
1915                                 *dclk_freq = dmi_mem_clk_speed;
1916                         } else {
1917                                 /* Otherwise we don't have a valid speed */
1918                                 *dclk_freq = -1;
1919                         }
1920                 } else if (*dclk_freq > 0 &&
1921                            *dclk_freq != dmi_mem_clk_speed) {
1922                         /*
1923                          * If we have a speed, check that all DIMMS are the same
1924                          * speed, otherwise set the speed as invalid.
1925                          */
1926                         *dclk_freq = -1;
1927                 }
1928         }
1929 }
1930
1931 /*
1932  * The default DCLK frequency is used as a fallback if we
1933  * fail to find anything reliable in the DMI. The value
1934  * is taken straight from the datasheet.
1935  */
1936 #define DEFAULT_DCLK_FREQ 800
1937
1938 static int get_dclk_freq(void)
1939 {
1940         int dclk_freq = 0;
1941
1942         dmi_walk(decode_dclk, (void *)&dclk_freq);
1943
1944         if (dclk_freq < 1)
1945                 return DEFAULT_DCLK_FREQ;
1946
1947         return dclk_freq;
1948 }
1949
1950 /*
1951  * set_sdram_scrub_rate         This routine sets byte/sec bandwidth scrub rate
1952  *                              to hardware according to SCRUBINTERVAL formula
1953  *                              found in datasheet.
1954  */
1955 static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 new_bw)
1956 {
1957         struct i7core_pvt *pvt = mci->pvt_info;
1958         struct pci_dev *pdev;
1959         u32 dw_scrub;
1960         u32 dw_ssr;
1961
1962         /* Get data from the MC register, function 2 */
1963         pdev = pvt->pci_mcr[2];
1964         if (!pdev)
1965                 return -ENODEV;
1966
1967         pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &dw_scrub);
1968
1969         if (new_bw == 0) {
1970                 /* Prepare to disable petrol scrub */
1971                 dw_scrub &= ~STARTSCRUB;
1972                 /* Stop the patrol scrub engine */
1973                 write_and_test(pdev, MC_SCRUB_CONTROL,
1974                                dw_scrub & ~SCRUBINTERVAL_MASK);
1975
1976                 /* Get current status of scrub rate and set bit to disable */
1977                 pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
1978                 dw_ssr &= ~SSR_MODE_MASK;
1979                 dw_ssr |= SSR_MODE_DISABLE;
1980         } else {
1981                 const int cache_line_size = 64;
1982                 const u32 freq_dclk_mhz = pvt->dclk_freq;
1983                 unsigned long long scrub_interval;
1984                 /*
1985                  * Translate the desired scrub rate to a register value and
1986                  * program the corresponding register value.
1987                  */
1988                 scrub_interval = (unsigned long long)freq_dclk_mhz *
1989                         cache_line_size * 1000000;
1990                 do_div(scrub_interval, new_bw);
1991
1992                 if (!scrub_interval || scrub_interval > SCRUBINTERVAL_MASK)
1993                         return -EINVAL;
1994
1995                 dw_scrub = SCRUBINTERVAL_MASK & scrub_interval;
1996
1997                 /* Start the patrol scrub engine */
1998                 pci_write_config_dword(pdev, MC_SCRUB_CONTROL,
1999                                        STARTSCRUB | dw_scrub);
2000
2001                 /* Get current status of scrub rate and set bit to enable */
2002                 pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
2003                 dw_ssr &= ~SSR_MODE_MASK;
2004                 dw_ssr |= SSR_MODE_ENABLE;
2005         }
2006         /* Disable or enable scrubbing */
2007         pci_write_config_dword(pdev, MC_SSRCONTROL, dw_ssr);
2008
2009         return new_bw;
2010 }
2011
2012 /*
2013  * get_sdram_scrub_rate         This routine convert current scrub rate value
2014  *                              into byte/sec bandwidth accourding to
2015  *                              SCRUBINTERVAL formula found in datasheet.
2016  */
2017 static int get_sdram_scrub_rate(struct mem_ctl_info *mci)
2018 {
2019         struct i7core_pvt *pvt = mci->pvt_info;
2020         struct pci_dev *pdev;
2021         const u32 cache_line_size = 64;
2022         const u32 freq_dclk_mhz = pvt->dclk_freq;
2023         unsigned long long scrub_rate;
2024         u32 scrubval;
2025
2026         /* Get data from the MC register, function 2 */
2027         pdev = pvt->pci_mcr[2];
2028         if (!pdev)
2029                 return -ENODEV;
2030
2031         /* Get current scrub control data */
2032         pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &scrubval);
2033
2034         /* Mask highest 8-bits to 0 */
2035         scrubval &=  SCRUBINTERVAL_MASK;
2036         if (!scrubval)
2037                 return 0;
2038
2039         /* Calculate scrub rate value into byte/sec bandwidth */
2040         scrub_rate =  (unsigned long long)freq_dclk_mhz *
2041                 1000000 * cache_line_size;
2042         do_div(scrub_rate, scrubval);
2043         return (int)scrub_rate;
2044 }
2045
2046 static void enable_sdram_scrub_setting(struct mem_ctl_info *mci)
2047 {
2048         struct i7core_pvt *pvt = mci->pvt_info;
2049         u32 pci_lock;
2050
2051         /* Unlock writes to pci registers */
2052         pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
2053         pci_lock &= ~0x3;
2054         pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
2055                                pci_lock | MC_CFG_UNLOCK);
2056
2057         mci->set_sdram_scrub_rate = set_sdram_scrub_rate;
2058         mci->get_sdram_scrub_rate = get_sdram_scrub_rate;
2059 }
2060
2061 static void disable_sdram_scrub_setting(struct mem_ctl_info *mci)
2062 {
2063         struct i7core_pvt *pvt = mci->pvt_info;
2064         u32 pci_lock;
2065
2066         /* Lock writes to pci registers */
2067         pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
2068         pci_lock &= ~0x3;
2069         pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
2070                                pci_lock | MC_CFG_LOCK);
2071 }
2072
2073 static void i7core_pci_ctl_create(struct i7core_pvt *pvt)
2074 {
2075         pvt->i7core_pci = edac_pci_create_generic_ctl(
2076                                                 &pvt->i7core_dev->pdev[0]->dev,
2077                                                 EDAC_MOD_STR);
2078         if (unlikely(!pvt->i7core_pci))
2079                 i7core_printk(KERN_WARNING,
2080                               "Unable to setup PCI error report via EDAC\n");
2081 }
2082
2083 static void i7core_pci_ctl_release(struct i7core_pvt *pvt)
2084 {
2085         if (likely(pvt->i7core_pci))
2086                 edac_pci_release_generic_ctl(pvt->i7core_pci);
2087         else
2088                 i7core_printk(KERN_ERR,
2089                                 "Couldn't find mem_ctl_info for socket %d\n",
2090                                 pvt->i7core_dev->socket);
2091         pvt->i7core_pci = NULL;
2092 }
2093
2094 static void i7core_unregister_mci(struct i7core_dev *i7core_dev)
2095 {
2096         struct mem_ctl_info *mci = i7core_dev->mci;
2097         struct i7core_pvt *pvt;
2098
2099         if (unlikely(!mci || !mci->pvt_info)) {
2100                 debugf0("MC: " __FILE__ ": %s(): dev = %p\n",
2101                         __func__, &i7core_dev->pdev[0]->dev);
2102
2103                 i7core_printk(KERN_ERR, "Couldn't find mci handler\n");
2104                 return;
2105         }
2106
2107         pvt = mci->pvt_info;
2108
2109         debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
2110                 __func__, mci, &i7core_dev->pdev[0]->dev);
2111
2112         /* Disable scrubrate setting */
2113         if (pvt->enable_scrub)
2114                 disable_sdram_scrub_setting(mci);
2115
2116         mce_unregister_decode_chain(&i7_mce_dec);
2117
2118         /* Disable EDAC polling */
2119         i7core_pci_ctl_release(pvt);
2120
2121         /* Remove MC sysfs nodes */
2122         edac_mc_del_mc(mci->dev);
2123
2124         debugf1("%s: free mci struct\n", mci->ctl_name);
2125         kfree(mci->ctl_name);
2126         edac_mc_free(mci);
2127         i7core_dev->mci = NULL;
2128 }
2129
2130 static int i7core_register_mci(struct i7core_dev *i7core_dev)
2131 {
2132         struct mem_ctl_info *mci;
2133         struct i7core_pvt *pvt;
2134         int rc;
2135         struct edac_mc_layer layers[2];
2136
2137         /* allocate a new MC control structure */
2138
2139         layers[0].type = EDAC_MC_LAYER_CHANNEL;
2140         layers[0].size = NUM_CHANS;
2141         layers[0].is_virt_csrow = false;
2142         layers[1].type = EDAC_MC_LAYER_SLOT;
2143         layers[1].size = MAX_DIMMS;
2144         layers[1].is_virt_csrow = true;
2145         mci = edac_mc_alloc(i7core_dev->socket, ARRAY_SIZE(layers), layers,
2146                             sizeof(*pvt));
2147         if (unlikely(!mci))
2148                 return -ENOMEM;
2149
2150         debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
2151                 __func__, mci, &i7core_dev->pdev[0]->dev);
2152
2153         pvt = mci->pvt_info;
2154         memset(pvt, 0, sizeof(*pvt));
2155
2156         /* Associates i7core_dev and mci for future usage */
2157         pvt->i7core_dev = i7core_dev;
2158         i7core_dev->mci = mci;
2159
2160         /*
2161          * FIXME: how to handle RDDR3 at MCI level? It is possible to have
2162          * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
2163          * memory channels
2164          */
2165         mci->mtype_cap = MEM_FLAG_DDR3;
2166         mci->edac_ctl_cap = EDAC_FLAG_NONE;
2167         mci->edac_cap = EDAC_FLAG_NONE;
2168         mci->mod_name = "i7core_edac.c";
2169         mci->mod_ver = I7CORE_REVISION;
2170         mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
2171                                   i7core_dev->socket);
2172         mci->dev_name = pci_name(i7core_dev->pdev[0]);
2173         mci->ctl_page_to_phys = NULL;
2174
2175         /* Store pci devices at mci for faster access */
2176         rc = mci_bind_devs(mci, i7core_dev);
2177         if (unlikely(rc < 0))
2178                 goto fail0;
2179
2180         if (pvt->is_registered)
2181                 mci->mc_driver_sysfs_attributes = i7core_sysfs_rdimm_attrs;
2182         else
2183                 mci->mc_driver_sysfs_attributes = i7core_sysfs_udimm_attrs;
2184
2185         /* Get dimm basic config */
2186         get_dimm_config(mci);
2187         /* record ptr to the generic device */
2188         mci->dev = &i7core_dev->pdev[0]->dev;
2189         /* Set the function pointer to an actual operation function */
2190         mci->edac_check = i7core_check_error;
2191
2192         /* Enable scrubrate setting */
2193         if (pvt->enable_scrub)
2194                 enable_sdram_scrub_setting(mci);
2195
2196         /* add this new MC control structure to EDAC's list of MCs */
2197         if (unlikely(edac_mc_add_mc(mci))) {
2198                 debugf0("MC: " __FILE__
2199                         ": %s(): failed edac_mc_add_mc()\n", __func__);
2200                 /* FIXME: perhaps some code should go here that disables error
2201                  * reporting if we just enabled it
2202                  */
2203
2204                 rc = -EINVAL;
2205                 goto fail0;
2206         }
2207
2208         /* Default error mask is any memory */
2209         pvt->inject.channel = 0;
2210         pvt->inject.dimm = -1;
2211         pvt->inject.rank = -1;
2212         pvt->inject.bank = -1;
2213         pvt->inject.page = -1;
2214         pvt->inject.col = -1;
2215
2216         /* allocating generic PCI control info */
2217         i7core_pci_ctl_create(pvt);
2218
2219         /* DCLK for scrub rate setting */
2220         pvt->dclk_freq = get_dclk_freq();
2221
2222         mce_register_decode_chain(&i7_mce_dec);
2223
2224         return 0;
2225
2226 fail0:
2227         kfree(mci->ctl_name);
2228         edac_mc_free(mci);
2229         i7core_dev->mci = NULL;
2230         return rc;
2231 }
2232
2233 /*
2234  *      i7core_probe    Probe for ONE instance of device to see if it is
2235  *                      present.
2236  *      return:
2237  *              0 for FOUND a device
2238  *              < 0 for error code
2239  */
2240
2241 static int __devinit i7core_probe(struct pci_dev *pdev,
2242                                   const struct pci_device_id *id)
2243 {
2244         int rc, count = 0;
2245         struct i7core_dev *i7core_dev;
2246
2247         /* get the pci devices we want to reserve for our use */
2248         mutex_lock(&i7core_edac_lock);
2249
2250         /*
2251          * All memory controllers are allocated at the first pass.
2252          */
2253         if (unlikely(probed >= 1)) {
2254                 mutex_unlock(&i7core_edac_lock);
2255                 return -ENODEV;
2256         }
2257         probed++;
2258
2259         rc = i7core_get_all_devices();
2260         if (unlikely(rc < 0))
2261                 goto fail0;
2262
2263         list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
2264                 count++;
2265                 rc = i7core_register_mci(i7core_dev);
2266                 if (unlikely(rc < 0))
2267                         goto fail1;
2268         }
2269
2270         /*
2271          * Nehalem-EX uses a different memory controller. However, as the
2272          * memory controller is not visible on some Nehalem/Nehalem-EP, we
2273          * need to indirectly probe via a X58 PCI device. The same devices
2274          * are found on (some) Nehalem-EX. So, on those machines, the
2275          * probe routine needs to return -ENODEV, as the actual Memory
2276          * Controller registers won't be detected.
2277          */
2278         if (!count) {
2279                 rc = -ENODEV;
2280                 goto fail1;
2281         }
2282
2283         i7core_printk(KERN_INFO,
2284                       "Driver loaded, %d memory controller(s) found.\n",
2285                       count);
2286
2287         mutex_unlock(&i7core_edac_lock);
2288         return 0;
2289
2290 fail1:
2291         list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2292                 i7core_unregister_mci(i7core_dev);
2293
2294         i7core_put_all_devices();
2295 fail0:
2296         mutex_unlock(&i7core_edac_lock);
2297         return rc;
2298 }
2299
2300 /*
2301  *      i7core_remove   destructor for one instance of device
2302  *
2303  */
2304 static void __devexit i7core_remove(struct pci_dev *pdev)
2305 {
2306         struct i7core_dev *i7core_dev;
2307
2308         debugf0(__FILE__ ": %s()\n", __func__);
2309
2310         /*
2311          * we have a trouble here: pdev value for removal will be wrong, since
2312          * it will point to the X58 register used to detect that the machine
2313          * is a Nehalem or upper design. However, due to the way several PCI
2314          * devices are grouped together to provide MC functionality, we need
2315          * to use a different method for releasing the devices
2316          */
2317
2318         mutex_lock(&i7core_edac_lock);
2319
2320         if (unlikely(!probed)) {
2321                 mutex_unlock(&i7core_edac_lock);
2322                 return;
2323         }
2324
2325         list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2326                 i7core_unregister_mci(i7core_dev);
2327
2328         /* Release PCI resources */
2329         i7core_put_all_devices();
2330
2331         probed--;
2332
2333         mutex_unlock(&i7core_edac_lock);
2334 }
2335
2336 MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
2337
2338 /*
2339  *      i7core_driver   pci_driver structure for this module
2340  *
2341  */
2342 static struct pci_driver i7core_driver = {
2343         .name     = "i7core_edac",
2344         .probe    = i7core_probe,
2345         .remove   = __devexit_p(i7core_remove),
2346         .id_table = i7core_pci_tbl,
2347 };
2348
2349 /*
2350  *      i7core_init             Module entry function
2351  *                      Try to initialize this module for its devices
2352  */
2353 static int __init i7core_init(void)
2354 {
2355         int pci_rc;
2356
2357         debugf2("MC: " __FILE__ ": %s()\n", __func__);
2358
2359         /* Ensure that the OPSTATE is set correctly for POLL or NMI */
2360         opstate_init();
2361
2362         if (use_pci_fixup)
2363                 i7core_xeon_pci_fixup(pci_dev_table);
2364
2365         pci_rc = pci_register_driver(&i7core_driver);
2366
2367         if (pci_rc >= 0)
2368                 return 0;
2369
2370         i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2371                       pci_rc);
2372
2373         return pci_rc;
2374 }
2375
2376 /*
2377  *      i7core_exit()   Module exit function
2378  *                      Unregister the driver
2379  */
2380 static void __exit i7core_exit(void)
2381 {
2382         debugf2("MC: " __FILE__ ": %s()\n", __func__);
2383         pci_unregister_driver(&i7core_driver);
2384 }
2385
2386 module_init(i7core_init);
2387 module_exit(i7core_exit);
2388
2389 MODULE_LICENSE("GPL");
2390 MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
2391 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2392 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2393                    I7CORE_REVISION);
2394
2395 module_param(edac_op_state, int, 0444);
2396 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");