Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
[linux-2.6-microblaze.git] / drivers / edac / al_mc_edac.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
4  */
5 #include <linux/bitfield.h>
6 #include <linux/bitops.h>
7 #include <linux/edac.h>
8 #include <linux/of_irq.h>
9 #include <linux/platform_device.h>
10 #include <linux/spinlock.h>
11 #include "edac_module.h"
12
13 /* Registers Offset */
14 #define AL_MC_ECC_CFG           0x70
15 #define AL_MC_ECC_CLEAR         0x7c
16 #define AL_MC_ECC_ERR_COUNT     0x80
17 #define AL_MC_ECC_CE_ADDR0      0x84
18 #define AL_MC_ECC_CE_ADDR1      0x88
19 #define AL_MC_ECC_UE_ADDR0      0xa4
20 #define AL_MC_ECC_UE_ADDR1      0xa8
21 #define AL_MC_ECC_CE_SYND0      0x8c
22 #define AL_MC_ECC_CE_SYND1      0x90
23 #define AL_MC_ECC_CE_SYND2      0x94
24 #define AL_MC_ECC_UE_SYND0      0xac
25 #define AL_MC_ECC_UE_SYND1      0xb0
26 #define AL_MC_ECC_UE_SYND2      0xb4
27
28 /* Registers Fields */
29 #define AL_MC_ECC_CFG_SCRUB_DISABLED    BIT(4)
30
31 #define AL_MC_ECC_CLEAR_UE_COUNT        BIT(3)
32 #define AL_MC_ECC_CLEAR_CE_COUNT        BIT(2)
33 #define AL_MC_ECC_CLEAR_UE_ERR          BIT(1)
34 #define AL_MC_ECC_CLEAR_CE_ERR          BIT(0)
35
36 #define AL_MC_ECC_ERR_COUNT_UE          GENMASK(31, 16)
37 #define AL_MC_ECC_ERR_COUNT_CE          GENMASK(15, 0)
38
39 #define AL_MC_ECC_CE_ADDR0_RANK         GENMASK(25, 24)
40 #define AL_MC_ECC_CE_ADDR0_ROW          GENMASK(17, 0)
41
42 #define AL_MC_ECC_CE_ADDR1_BG           GENMASK(25, 24)
43 #define AL_MC_ECC_CE_ADDR1_BANK         GENMASK(18, 16)
44 #define AL_MC_ECC_CE_ADDR1_COLUMN       GENMASK(11, 0)
45
46 #define AL_MC_ECC_UE_ADDR0_RANK         GENMASK(25, 24)
47 #define AL_MC_ECC_UE_ADDR0_ROW          GENMASK(17, 0)
48
49 #define AL_MC_ECC_UE_ADDR1_BG           GENMASK(25, 24)
50 #define AL_MC_ECC_UE_ADDR1_BANK         GENMASK(18, 16)
51 #define AL_MC_ECC_UE_ADDR1_COLUMN       GENMASK(11, 0)
52
53 #define DRV_NAME "al_mc_edac"
54 #define AL_MC_EDAC_MSG_MAX 256
55
56 struct al_mc_edac {
57         void __iomem *mmio_base;
58         spinlock_t lock;
59         int irq_ce;
60         int irq_ue;
61 };
62
63 static void prepare_msg(char *message, size_t buffer_size,
64                         enum hw_event_mc_err_type type,
65                         u8 rank, u32 row, u8 bg, u8 bank, u16 column,
66                         u32 syn0, u32 syn1, u32 syn2)
67 {
68         snprintf(message, buffer_size,
69                  "%s rank=0x%x row=0x%x bg=0x%x bank=0x%x col=0x%x syn0: 0x%x syn1: 0x%x syn2: 0x%x",
70                  type == HW_EVENT_ERR_UNCORRECTED ? "UE" : "CE",
71                  rank, row, bg, bank, column, syn0, syn1, syn2);
72 }
73
74 static int handle_ce(struct mem_ctl_info *mci)
75 {
76         u32 eccerrcnt, ecccaddr0, ecccaddr1, ecccsyn0, ecccsyn1, ecccsyn2, row;
77         struct al_mc_edac *al_mc = mci->pvt_info;
78         char msg[AL_MC_EDAC_MSG_MAX];
79         u16 ce_count, column;
80         unsigned long flags;
81         u8 rank, bg, bank;
82
83         eccerrcnt = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_ERR_COUNT);
84         ce_count = FIELD_GET(AL_MC_ECC_ERR_COUNT_CE, eccerrcnt);
85         if (!ce_count)
86                 return 0;
87
88         ecccaddr0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_ADDR0);
89         ecccaddr1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_ADDR1);
90         ecccsyn0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND0);
91         ecccsyn1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND1);
92         ecccsyn2 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND2);
93
94         writel_relaxed(AL_MC_ECC_CLEAR_CE_COUNT | AL_MC_ECC_CLEAR_CE_ERR,
95                        al_mc->mmio_base + AL_MC_ECC_CLEAR);
96
97         dev_dbg(mci->pdev, "eccuaddr0=0x%08x eccuaddr1=0x%08x\n",
98                 ecccaddr0, ecccaddr1);
99
100         rank = FIELD_GET(AL_MC_ECC_CE_ADDR0_RANK, ecccaddr0);
101         row = FIELD_GET(AL_MC_ECC_CE_ADDR0_ROW, ecccaddr0);
102
103         bg = FIELD_GET(AL_MC_ECC_CE_ADDR1_BG, ecccaddr1);
104         bank = FIELD_GET(AL_MC_ECC_CE_ADDR1_BANK, ecccaddr1);
105         column = FIELD_GET(AL_MC_ECC_CE_ADDR1_COLUMN, ecccaddr1);
106
107         prepare_msg(msg, sizeof(msg), HW_EVENT_ERR_CORRECTED,
108                     rank, row, bg, bank, column,
109                     ecccsyn0, ecccsyn1, ecccsyn2);
110
111         spin_lock_irqsave(&al_mc->lock, flags);
112         edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
113                              ce_count, 0, 0, 0, 0, 0, -1, mci->ctl_name, msg);
114         spin_unlock_irqrestore(&al_mc->lock, flags);
115
116         return ce_count;
117 }
118
119 static int handle_ue(struct mem_ctl_info *mci)
120 {
121         u32 eccerrcnt, eccuaddr0, eccuaddr1, eccusyn0, eccusyn1, eccusyn2, row;
122         struct al_mc_edac *al_mc = mci->pvt_info;
123         char msg[AL_MC_EDAC_MSG_MAX];
124         u16 ue_count, column;
125         unsigned long flags;
126         u8 rank, bg, bank;
127
128         eccerrcnt = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_ERR_COUNT);
129         ue_count = FIELD_GET(AL_MC_ECC_ERR_COUNT_UE, eccerrcnt);
130         if (!ue_count)
131                 return 0;
132
133         eccuaddr0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_ADDR0);
134         eccuaddr1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_ADDR1);
135         eccusyn0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND0);
136         eccusyn1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND1);
137         eccusyn2 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND2);
138
139         writel_relaxed(AL_MC_ECC_CLEAR_UE_COUNT | AL_MC_ECC_CLEAR_UE_ERR,
140                        al_mc->mmio_base + AL_MC_ECC_CLEAR);
141
142         dev_dbg(mci->pdev, "eccuaddr0=0x%08x eccuaddr1=0x%08x\n",
143                 eccuaddr0, eccuaddr1);
144
145         rank = FIELD_GET(AL_MC_ECC_UE_ADDR0_RANK, eccuaddr0);
146         row = FIELD_GET(AL_MC_ECC_UE_ADDR0_ROW, eccuaddr0);
147
148         bg = FIELD_GET(AL_MC_ECC_UE_ADDR1_BG, eccuaddr1);
149         bank = FIELD_GET(AL_MC_ECC_UE_ADDR1_BANK, eccuaddr1);
150         column = FIELD_GET(AL_MC_ECC_UE_ADDR1_COLUMN, eccuaddr1);
151
152         prepare_msg(msg, sizeof(msg), HW_EVENT_ERR_UNCORRECTED,
153                     rank, row, bg, bank, column,
154                     eccusyn0, eccusyn1, eccusyn2);
155
156         spin_lock_irqsave(&al_mc->lock, flags);
157         edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
158                              ue_count, 0, 0, 0, 0, 0, -1, mci->ctl_name, msg);
159         spin_unlock_irqrestore(&al_mc->lock, flags);
160
161         return ue_count;
162 }
163
164 static void al_mc_edac_check(struct mem_ctl_info *mci)
165 {
166         struct al_mc_edac *al_mc = mci->pvt_info;
167
168         if (al_mc->irq_ue <= 0)
169                 handle_ue(mci);
170
171         if (al_mc->irq_ce <= 0)
172                 handle_ce(mci);
173 }
174
175 static irqreturn_t al_mc_edac_irq_handler_ue(int irq, void *info)
176 {
177         struct platform_device *pdev = info;
178         struct mem_ctl_info *mci = platform_get_drvdata(pdev);
179
180         if (handle_ue(mci))
181                 return IRQ_HANDLED;
182         return IRQ_NONE;
183 }
184
185 static irqreturn_t al_mc_edac_irq_handler_ce(int irq, void *info)
186 {
187         struct platform_device *pdev = info;
188         struct mem_ctl_info *mci = platform_get_drvdata(pdev);
189
190         if (handle_ce(mci))
191                 return IRQ_HANDLED;
192         return IRQ_NONE;
193 }
194
195 static enum scrub_type get_scrub_mode(void __iomem *mmio_base)
196 {
197         u32 ecccfg0;
198
199         ecccfg0 = readl(mmio_base + AL_MC_ECC_CFG);
200
201         if (FIELD_GET(AL_MC_ECC_CFG_SCRUB_DISABLED, ecccfg0))
202                 return SCRUB_NONE;
203         else
204                 return SCRUB_HW_SRC;
205 }
206
207 static void devm_al_mc_edac_free(void *data)
208 {
209         edac_mc_free(data);
210 }
211
212 static void devm_al_mc_edac_del(void *data)
213 {
214         edac_mc_del_mc(data);
215 }
216
217 static int al_mc_edac_probe(struct platform_device *pdev)
218 {
219         struct edac_mc_layer layers[1];
220         struct mem_ctl_info *mci;
221         struct al_mc_edac *al_mc;
222         void __iomem *mmio_base;
223         struct dimm_info *dimm;
224         int ret;
225
226         mmio_base = devm_platform_ioremap_resource(pdev, 0);
227         if (IS_ERR(mmio_base)) {
228                 dev_err(&pdev->dev, "failed to ioremap memory (%ld)\n",
229                         PTR_ERR(mmio_base));
230                 return PTR_ERR(mmio_base);
231         }
232
233         layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
234         layers[0].size = 1;
235         layers[0].is_virt_csrow = false;
236         mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
237                             sizeof(struct al_mc_edac));
238         if (!mci)
239                 return -ENOMEM;
240
241         ret = devm_add_action(&pdev->dev, devm_al_mc_edac_free, mci);
242         if (ret) {
243                 edac_mc_free(mci);
244                 return ret;
245         }
246
247         platform_set_drvdata(pdev, mci);
248         al_mc = mci->pvt_info;
249
250         al_mc->mmio_base = mmio_base;
251
252         al_mc->irq_ue = of_irq_get_byname(pdev->dev.of_node, "ue");
253         if (al_mc->irq_ue <= 0)
254                 dev_dbg(&pdev->dev,
255                         "no IRQ defined for UE - falling back to polling\n");
256
257         al_mc->irq_ce = of_irq_get_byname(pdev->dev.of_node, "ce");
258         if (al_mc->irq_ce <= 0)
259                 dev_dbg(&pdev->dev,
260                         "no IRQ defined for CE - falling back to polling\n");
261
262         /*
263          * In case both interrupts (ue/ce) are to be found, use interrupt mode.
264          * In case none of the interrupt are foud, use polling mode.
265          * In case only one interrupt is found, use interrupt mode for it but
266          * keep polling mode enable for the other.
267          */
268         if (al_mc->irq_ue <= 0 || al_mc->irq_ce <= 0) {
269                 edac_op_state = EDAC_OPSTATE_POLL;
270                 mci->edac_check = al_mc_edac_check;
271         } else {
272                 edac_op_state = EDAC_OPSTATE_INT;
273         }
274
275         spin_lock_init(&al_mc->lock);
276
277         mci->mtype_cap = MEM_FLAG_DDR3 | MEM_FLAG_DDR4;
278         mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
279         mci->edac_cap = EDAC_FLAG_SECDED;
280         mci->mod_name = DRV_NAME;
281         mci->ctl_name = "al_mc";
282         mci->pdev = &pdev->dev;
283         mci->scrub_mode = get_scrub_mode(mmio_base);
284
285         dimm = *mci->dimms;
286         dimm->grain = 1;
287
288         ret = edac_mc_add_mc(mci);
289         if (ret < 0) {
290                 dev_err(&pdev->dev,
291                         "fail to add memory controller device (%d)\n",
292                         ret);
293                 return ret;
294         }
295
296         ret = devm_add_action(&pdev->dev, devm_al_mc_edac_del, &pdev->dev);
297         if (ret) {
298                 edac_mc_del_mc(&pdev->dev);
299                 return ret;
300         }
301
302         if (al_mc->irq_ue > 0) {
303                 ret = devm_request_irq(&pdev->dev,
304                                        al_mc->irq_ue,
305                                        al_mc_edac_irq_handler_ue,
306                                        IRQF_SHARED,
307                                        pdev->name,
308                                        pdev);
309                 if (ret != 0) {
310                         dev_err(&pdev->dev,
311                                 "failed to request UE IRQ %d (%d)\n",
312                                 al_mc->irq_ue, ret);
313                         return ret;
314                 }
315         }
316
317         if (al_mc->irq_ce > 0) {
318                 ret = devm_request_irq(&pdev->dev,
319                                        al_mc->irq_ce,
320                                        al_mc_edac_irq_handler_ce,
321                                        IRQF_SHARED,
322                                        pdev->name,
323                                        pdev);
324                 if (ret != 0) {
325                         dev_err(&pdev->dev,
326                                 "failed to request CE IRQ %d (%d)\n",
327                                 al_mc->irq_ce, ret);
328                         return ret;
329                 }
330         }
331
332         return 0;
333 }
334
335 static const struct of_device_id al_mc_edac_of_match[] = {
336         { .compatible = "amazon,al-mc-edac", },
337         {},
338 };
339
340 MODULE_DEVICE_TABLE(of, al_mc_edac_of_match);
341
342 static struct platform_driver al_mc_edac_driver = {
343         .probe = al_mc_edac_probe,
344         .driver = {
345                 .name = DRV_NAME,
346                 .of_match_table = al_mc_edac_of_match,
347         },
348 };
349
350 module_platform_driver(al_mc_edac_driver);
351
352 MODULE_LICENSE("GPL v2");
353 MODULE_AUTHOR("Talel Shenhar");
354 MODULE_DESCRIPTION("Amazon's Annapurna Lab's Memory Controller EDAC Driver");