Merge remote-tracking branch 'spi/for-5.14' into spi-next
[linux-2.6-microblaze.git] / drivers / misc / habanalabs / common / firmware_if.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2019 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include "habanalabs.h"
9 #include "../include/common/hl_boot_if.h"
10
11 #include <linux/firmware.h>
12 #include <linux/slab.h>
13
14 #define FW_FILE_MAX_SIZE        0x1400000 /* maximum size of 20MB */
15 /**
16  * hl_fw_load_fw_to_device() - Load F/W code to device's memory.
17  *
18  * @hdev: pointer to hl_device structure.
19  * @fw_name: the firmware image name
20  * @dst: IO memory mapped address space to copy firmware to
21  * @src_offset: offset in src FW to copy from
22  * @size: amount of bytes to copy (0 to copy the whole binary)
23  *
24  * Copy fw code from firmware file to device memory.
25  *
26  * Return: 0 on success, non-zero for failure.
27  */
28 int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
29                                 void __iomem *dst, u32 src_offset, u32 size)
30 {
31         const struct firmware *fw;
32         const void *fw_data;
33         size_t fw_size;
34         int rc;
35
36         rc = request_firmware(&fw, fw_name, hdev->dev);
37         if (rc) {
38                 dev_err(hdev->dev, "Firmware file %s is not found!\n", fw_name);
39                 goto out;
40         }
41
42         fw_size = fw->size;
43         if ((fw_size % 4) != 0) {
44                 dev_err(hdev->dev, "Illegal %s firmware size %zu\n",
45                         fw_name, fw_size);
46                 rc = -EINVAL;
47                 goto out;
48         }
49
50         dev_dbg(hdev->dev, "%s firmware size == %zu\n", fw_name, fw_size);
51
52         if (fw_size > FW_FILE_MAX_SIZE) {
53                 dev_err(hdev->dev,
54                         "FW file size %zu exceeds maximum of %u bytes\n",
55                         fw_size, FW_FILE_MAX_SIZE);
56                 rc = -EINVAL;
57                 goto out;
58         }
59
60         if (size - src_offset > fw_size) {
61                 dev_err(hdev->dev,
62                         "size to copy(%u) and offset(%u) are invalid\n",
63                         size, src_offset);
64                 rc = -EINVAL;
65                 goto out;
66         }
67
68         if (size)
69                 fw_size = size;
70
71         fw_data = (const void *) fw->data;
72
73         memcpy_toio(dst, fw_data + src_offset, fw_size);
74
75 out:
76         release_firmware(fw);
77         return rc;
78 }
79
80 int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode)
81 {
82         struct cpucp_packet pkt = {};
83
84         pkt.ctl = cpu_to_le32(opcode << CPUCP_PKT_CTL_OPCODE_SHIFT);
85
86         return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt,
87                                                 sizeof(pkt), 0, NULL);
88 }
89
90 int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
91                                 u16 len, u32 timeout, u64 *result)
92 {
93         struct hl_hw_queue *queue = &hdev->kernel_queues[hw_queue_id];
94         struct cpucp_packet *pkt;
95         dma_addr_t pkt_dma_addr;
96         u32 tmp, expected_ack_val;
97         int rc = 0;
98
99         pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len,
100                                                                 &pkt_dma_addr);
101         if (!pkt) {
102                 dev_err(hdev->dev,
103                         "Failed to allocate DMA memory for packet to CPU\n");
104                 return -ENOMEM;
105         }
106
107         memcpy(pkt, msg, len);
108
109         mutex_lock(&hdev->send_cpu_message_lock);
110
111         if (hdev->disabled)
112                 goto out;
113
114         if (hdev->device_cpu_disabled) {
115                 rc = -EIO;
116                 goto out;
117         }
118
119         /* set fence to a non valid value */
120         pkt->fence = UINT_MAX;
121
122         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, len, pkt_dma_addr);
123         if (rc) {
124                 dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc);
125                 goto out;
126         }
127
128         if (hdev->asic_prop.fw_app_security_map &
129                         CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN)
130                 expected_ack_val = queue->pi;
131         else
132                 expected_ack_val = CPUCP_PACKET_FENCE_VAL;
133
134         rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp,
135                                 (tmp == expected_ack_val), 1000,
136                                 timeout, true);
137
138         hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
139
140         if (rc == -ETIMEDOUT) {
141                 dev_err(hdev->dev, "Device CPU packet timeout (0x%x)\n", tmp);
142                 hdev->device_cpu_disabled = true;
143                 goto out;
144         }
145
146         tmp = le32_to_cpu(pkt->ctl);
147
148         rc = (tmp & CPUCP_PKT_CTL_RC_MASK) >> CPUCP_PKT_CTL_RC_SHIFT;
149         if (rc) {
150                 dev_err(hdev->dev, "F/W ERROR %d for CPU packet %d\n",
151                         rc,
152                         (tmp & CPUCP_PKT_CTL_OPCODE_MASK)
153                                                 >> CPUCP_PKT_CTL_OPCODE_SHIFT);
154                 rc = -EIO;
155         } else if (result) {
156                 *result = le64_to_cpu(pkt->result);
157         }
158
159 out:
160         mutex_unlock(&hdev->send_cpu_message_lock);
161
162         hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, len, pkt);
163
164         return rc;
165 }
166
167 int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type)
168 {
169         struct cpucp_packet pkt;
170         u64 result;
171         int rc;
172
173         memset(&pkt, 0, sizeof(pkt));
174
175         pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ <<
176                                 CPUCP_PKT_CTL_OPCODE_SHIFT);
177         pkt.value = cpu_to_le64(event_type);
178
179         rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
180                                                 0, &result);
181
182         if (rc)
183                 dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
184
185         return rc;
186 }
187
188 int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
189                 size_t irq_arr_size)
190 {
191         struct cpucp_unmask_irq_arr_packet *pkt;
192         size_t total_pkt_size;
193         u64 result;
194         int rc;
195
196         total_pkt_size = sizeof(struct cpucp_unmask_irq_arr_packet) +
197                         irq_arr_size;
198
199         /* data should be aligned to 8 bytes in order to CPU-CP to copy it */
200         total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
201
202         /* total_pkt_size is casted to u16 later on */
203         if (total_pkt_size > USHRT_MAX) {
204                 dev_err(hdev->dev, "too many elements in IRQ array\n");
205                 return -EINVAL;
206         }
207
208         pkt = kzalloc(total_pkt_size, GFP_KERNEL);
209         if (!pkt)
210                 return -ENOMEM;
211
212         pkt->length = cpu_to_le32(irq_arr_size / sizeof(irq_arr[0]));
213         memcpy(&pkt->irqs, irq_arr, irq_arr_size);
214
215         pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
216                                                 CPUCP_PKT_CTL_OPCODE_SHIFT);
217
218         rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
219                                                 total_pkt_size, 0, &result);
220
221         if (rc)
222                 dev_err(hdev->dev, "failed to unmask IRQ array\n");
223
224         kfree(pkt);
225
226         return rc;
227 }
228
229 int hl_fw_test_cpu_queue(struct hl_device *hdev)
230 {
231         struct cpucp_packet test_pkt = {};
232         u64 result;
233         int rc;
234
235         test_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST <<
236                                         CPUCP_PKT_CTL_OPCODE_SHIFT);
237         test_pkt.value = cpu_to_le64(CPUCP_PACKET_FENCE_VAL);
238
239         rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt,
240                                                 sizeof(test_pkt), 0, &result);
241
242         if (!rc) {
243                 if (result != CPUCP_PACKET_FENCE_VAL)
244                         dev_err(hdev->dev,
245                                 "CPU queue test failed (%#08llx)\n", result);
246         } else {
247                 dev_err(hdev->dev, "CPU queue test failed, error %d\n", rc);
248         }
249
250         return rc;
251 }
252
253 void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
254                                                 dma_addr_t *dma_handle)
255 {
256         u64 kernel_addr;
257
258         kernel_addr = gen_pool_alloc(hdev->cpu_accessible_dma_pool, size);
259
260         *dma_handle = hdev->cpu_accessible_dma_address +
261                 (kernel_addr - (u64) (uintptr_t) hdev->cpu_accessible_dma_mem);
262
263         return (void *) (uintptr_t) kernel_addr;
264 }
265
266 void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
267                                         void *vaddr)
268 {
269         gen_pool_free(hdev->cpu_accessible_dma_pool, (u64) (uintptr_t) vaddr,
270                         size);
271 }
272
273 int hl_fw_send_heartbeat(struct hl_device *hdev)
274 {
275         struct cpucp_packet hb_pkt = {};
276         u64 result;
277         int rc;
278
279         hb_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST <<
280                                         CPUCP_PKT_CTL_OPCODE_SHIFT);
281         hb_pkt.value = cpu_to_le64(CPUCP_PACKET_FENCE_VAL);
282
283         rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt,
284                                                 sizeof(hb_pkt), 0, &result);
285
286         if ((rc) || (result != CPUCP_PACKET_FENCE_VAL))
287                 rc = -EIO;
288
289         return rc;
290 }
291
292 static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg,
293                 u32 cpu_security_boot_status_reg)
294 {
295         u32 err_val, security_val;
296         bool err_exists = false;
297
298         /* Some of the firmware status codes are deprecated in newer f/w
299          * versions. In those versions, the errors are reported
300          * in different registers. Therefore, we need to check those
301          * registers and print the exact errors. Moreover, there
302          * may be multiple errors, so we need to report on each error
303          * separately. Some of the error codes might indicate a state
304          * that is not an error per-se, but it is an error in production
305          * environment
306          */
307         err_val = RREG32(boot_err0_reg);
308         if (!(err_val & CPU_BOOT_ERR0_ENABLED))
309                 return 0;
310
311         if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL) {
312                 dev_err(hdev->dev,
313                         "Device boot error - DRAM initialization failed\n");
314                 err_exists = true;
315         }
316
317         if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED) {
318                 dev_err(hdev->dev, "Device boot error - FIT image corrupted\n");
319                 err_exists = true;
320         }
321
322         if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL) {
323                 dev_err(hdev->dev,
324                         "Device boot error - Thermal Sensor initialization failed\n");
325                 err_exists = true;
326         }
327
328         if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED) {
329                 dev_warn(hdev->dev,
330                         "Device boot warning - Skipped DRAM initialization\n");
331                 /* This is a warning so we don't want it to disable the
332                  * device
333                  */
334                 err_val &= ~CPU_BOOT_ERR0_DRAM_SKIPPED;
335         }
336
337         if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED) {
338                 if (hdev->bmc_enable) {
339                         dev_err(hdev->dev,
340                                 "Device boot error - Skipped waiting for BMC\n");
341                         err_exists = true;
342                 } else {
343                         dev_info(hdev->dev,
344                                 "Device boot message - Skipped waiting for BMC\n");
345                         /* This is an info so we don't want it to disable the
346                          * device
347                          */
348                         err_val &= ~CPU_BOOT_ERR0_BMC_WAIT_SKIPPED;
349                 }
350         }
351
352         if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY) {
353                 dev_err(hdev->dev,
354                         "Device boot error - Serdes data from BMC not available\n");
355                 err_exists = true;
356         }
357
358         if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL) {
359                 dev_err(hdev->dev,
360                         "Device boot error - NIC F/W initialization failed\n");
361                 err_exists = true;
362         }
363
364         if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY) {
365                 dev_err(hdev->dev,
366                         "Device boot warning - security not ready\n");
367                 err_exists = true;
368         }
369
370         if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL) {
371                 dev_err(hdev->dev, "Device boot error - security failure\n");
372                 err_exists = true;
373         }
374
375         if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL) {
376                 dev_err(hdev->dev, "Device boot error - eFuse failure\n");
377                 err_exists = true;
378         }
379
380         if (err_val & CPU_BOOT_ERR0_PLL_FAIL) {
381                 dev_err(hdev->dev, "Device boot error - PLL failure\n");
382                 err_exists = true;
383         }
384
385         if (err_val & CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL) {
386                 dev_err(hdev->dev,
387                         "Device boot error - device unusable\n");
388                 err_exists = true;
389         }
390
391         security_val = RREG32(cpu_security_boot_status_reg);
392         if (security_val & CPU_BOOT_DEV_STS0_ENABLED)
393                 dev_dbg(hdev->dev, "Device security status %#x\n",
394                                 security_val);
395
396         if (!err_exists && (err_val & ~CPU_BOOT_ERR0_ENABLED)) {
397                 dev_err(hdev->dev,
398                         "Device boot error - unknown error 0x%08x\n",
399                         err_val);
400                 err_exists = true;
401         }
402
403         if (err_exists && ((err_val & ~CPU_BOOT_ERR0_ENABLED) &
404                                 lower_32_bits(hdev->boot_error_status_mask)))
405                 return -EIO;
406
407         return 0;
408 }
409
410 int hl_fw_cpucp_info_get(struct hl_device *hdev,
411                         u32 cpu_security_boot_status_reg,
412                         u32 boot_err0_reg)
413 {
414         struct asic_fixed_properties *prop = &hdev->asic_prop;
415         struct cpucp_packet pkt = {};
416         void *cpucp_info_cpu_addr;
417         dma_addr_t cpucp_info_dma_addr;
418         u64 result;
419         int rc;
420
421         cpucp_info_cpu_addr =
422                         hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
423                                         sizeof(struct cpucp_info),
424                                         &cpucp_info_dma_addr);
425         if (!cpucp_info_cpu_addr) {
426                 dev_err(hdev->dev,
427                         "Failed to allocate DMA memory for CPU-CP info packet\n");
428                 return -ENOMEM;
429         }
430
431         memset(cpucp_info_cpu_addr, 0, sizeof(struct cpucp_info));
432
433         pkt.ctl = cpu_to_le32(CPUCP_PACKET_INFO_GET <<
434                                 CPUCP_PKT_CTL_OPCODE_SHIFT);
435         pkt.addr = cpu_to_le64(cpucp_info_dma_addr);
436         pkt.data_max_size = cpu_to_le32(sizeof(struct cpucp_info));
437
438         rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
439                                         HL_CPUCP_INFO_TIMEOUT_USEC, &result);
440         if (rc) {
441                 dev_err(hdev->dev,
442                         "Failed to handle CPU-CP info pkt, error %d\n", rc);
443                 goto out;
444         }
445
446         rc = fw_read_errors(hdev, boot_err0_reg, cpu_security_boot_status_reg);
447         if (rc) {
448                 dev_err(hdev->dev, "Errors in device boot\n");
449                 goto out;
450         }
451
452         memcpy(&prop->cpucp_info, cpucp_info_cpu_addr,
453                         sizeof(prop->cpucp_info));
454
455         rc = hl_build_hwmon_channel_info(hdev, prop->cpucp_info.sensors);
456         if (rc) {
457                 dev_err(hdev->dev,
458                         "Failed to build hwmon channel info, error %d\n", rc);
459                 rc = -EFAULT;
460                 goto out;
461         }
462
463         /* Read FW application security bits again */
464         if (hdev->asic_prop.fw_security_status_valid)
465                 hdev->asic_prop.fw_app_security_map =
466                                 RREG32(cpu_security_boot_status_reg);
467
468 out:
469         hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
470                         sizeof(struct cpucp_info), cpucp_info_cpu_addr);
471
472         return rc;
473 }
474
475 static int hl_fw_send_msi_info_msg(struct hl_device *hdev)
476 {
477         struct cpucp_array_data_packet *pkt;
478         size_t total_pkt_size, data_size;
479         u64 result;
480         int rc;
481
482         /* skip sending this info for unsupported ASICs */
483         if (!hdev->asic_funcs->get_msi_info)
484                 return 0;
485
486         data_size = CPUCP_NUM_OF_MSI_TYPES * sizeof(u32);
487         total_pkt_size = sizeof(struct cpucp_array_data_packet) + data_size;
488
489         /* data should be aligned to 8 bytes in order to CPU-CP to copy it */
490         total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
491
492         /* total_pkt_size is casted to u16 later on */
493         if (total_pkt_size > USHRT_MAX) {
494                 dev_err(hdev->dev, "CPUCP array data is too big\n");
495                 return -EINVAL;
496         }
497
498         pkt = kzalloc(total_pkt_size, GFP_KERNEL);
499         if (!pkt)
500                 return -ENOMEM;
501
502         pkt->length = cpu_to_le32(CPUCP_NUM_OF_MSI_TYPES);
503
504         hdev->asic_funcs->get_msi_info((u32 *)&pkt->data);
505
506         pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_MSI_INFO_SET <<
507                                                 CPUCP_PKT_CTL_OPCODE_SHIFT);
508
509         rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *)pkt,
510                                                 total_pkt_size, 0, &result);
511
512         /*
513          * in case packet result is invalid it means that FW does not support
514          * this feature and will use default/hard coded MSI values. no reason
515          * to stop the boot
516          */
517         if (rc && result == cpucp_packet_invalid)
518                 rc = 0;
519
520         if (rc)
521                 dev_err(hdev->dev, "failed to send CPUCP array data\n");
522
523         kfree(pkt);
524
525         return rc;
526 }
527
528 int hl_fw_cpucp_handshake(struct hl_device *hdev,
529                         u32 cpu_security_boot_status_reg,
530                         u32 boot_err0_reg)
531 {
532         int rc;
533
534         rc = hl_fw_cpucp_info_get(hdev, cpu_security_boot_status_reg,
535                                         boot_err0_reg);
536         if (rc)
537                 return rc;
538
539         return hl_fw_send_msi_info_msg(hdev);
540 }
541
542 int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
543 {
544         struct cpucp_packet pkt = {};
545         void *eeprom_info_cpu_addr;
546         dma_addr_t eeprom_info_dma_addr;
547         u64 result;
548         int rc;
549
550         eeprom_info_cpu_addr =
551                         hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
552                                         max_size, &eeprom_info_dma_addr);
553         if (!eeprom_info_cpu_addr) {
554                 dev_err(hdev->dev,
555                         "Failed to allocate DMA memory for CPU-CP EEPROM packet\n");
556                 return -ENOMEM;
557         }
558
559         memset(eeprom_info_cpu_addr, 0, max_size);
560
561         pkt.ctl = cpu_to_le32(CPUCP_PACKET_EEPROM_DATA_GET <<
562                                 CPUCP_PKT_CTL_OPCODE_SHIFT);
563         pkt.addr = cpu_to_le64(eeprom_info_dma_addr);
564         pkt.data_max_size = cpu_to_le32(max_size);
565
566         rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
567                         HL_CPUCP_EEPROM_TIMEOUT_USEC, &result);
568
569         if (rc) {
570                 dev_err(hdev->dev,
571                         "Failed to handle CPU-CP EEPROM packet, error %d\n",
572                         rc);
573                 goto out;
574         }
575
576         /* result contains the actual size */
577         memcpy(data, eeprom_info_cpu_addr, min((size_t)result, max_size));
578
579 out:
580         hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, max_size,
581                         eeprom_info_cpu_addr);
582
583         return rc;
584 }
585
586 int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
587                 struct hl_info_pci_counters *counters)
588 {
589         struct cpucp_packet pkt = {};
590         u64 result;
591         int rc;
592
593         pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_THROUGHPUT_GET <<
594                         CPUCP_PKT_CTL_OPCODE_SHIFT);
595
596         /* Fetch PCI rx counter */
597         pkt.index = cpu_to_le32(cpucp_pcie_throughput_rx);
598         rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
599                                         HL_CPUCP_INFO_TIMEOUT_USEC, &result);
600         if (rc) {
601                 dev_err(hdev->dev,
602                         "Failed to handle CPU-CP PCI info pkt, error %d\n", rc);
603                 return rc;
604         }
605         counters->rx_throughput = result;
606
607         memset(&pkt, 0, sizeof(pkt));
608         pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_THROUGHPUT_GET <<
609                         CPUCP_PKT_CTL_OPCODE_SHIFT);
610
611         /* Fetch PCI tx counter */
612         pkt.index = cpu_to_le32(cpucp_pcie_throughput_tx);
613         rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
614                                         HL_CPUCP_INFO_TIMEOUT_USEC, &result);
615         if (rc) {
616                 dev_err(hdev->dev,
617                         "Failed to handle CPU-CP PCI info pkt, error %d\n", rc);
618                 return rc;
619         }
620         counters->tx_throughput = result;
621
622         /* Fetch PCI replay counter */
623         memset(&pkt, 0, sizeof(pkt));
624         pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_REPLAY_CNT_GET <<
625                         CPUCP_PKT_CTL_OPCODE_SHIFT);
626
627         rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
628                         HL_CPUCP_INFO_TIMEOUT_USEC, &result);
629         if (rc) {
630                 dev_err(hdev->dev,
631                         "Failed to handle CPU-CP PCI info pkt, error %d\n", rc);
632                 return rc;
633         }
634         counters->replay_cnt = (u32) result;
635
636         return rc;
637 }
638
639 int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy)
640 {
641         struct cpucp_packet pkt = {};
642         u64 result;
643         int rc;
644
645         pkt.ctl = cpu_to_le32(CPUCP_PACKET_TOTAL_ENERGY_GET <<
646                                 CPUCP_PKT_CTL_OPCODE_SHIFT);
647
648         rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
649                                         HL_CPUCP_INFO_TIMEOUT_USEC, &result);
650         if (rc) {
651                 dev_err(hdev->dev,
652                         "Failed to handle CpuCP total energy pkt, error %d\n",
653                                 rc);
654                 return rc;
655         }
656
657         *total_energy = result;
658
659         return rc;
660 }
661
662 int get_used_pll_index(struct hl_device *hdev, u32 input_pll_index,
663                                                 enum pll_index *pll_index)
664 {
665         struct asic_fixed_properties *prop = &hdev->asic_prop;
666         u8 pll_byte, pll_bit_off;
667         bool dynamic_pll;
668         int fw_pll_idx;
669
670         dynamic_pll = prop->fw_security_status_valid &&
671                 (prop->fw_app_security_map & CPU_BOOT_DEV_STS0_DYN_PLL_EN);
672
673         if (!dynamic_pll) {
674                 /*
675                  * in case we are working with legacy FW (each asic has unique
676                  * PLL numbering) use the driver based index as they are
677                  * aligned with fw legacy numbering
678                  */
679                 *pll_index = input_pll_index;
680                 return 0;
681         }
682
683         /* retrieve a FW compatible PLL index based on
684          * ASIC specific user request
685          */
686         fw_pll_idx = hdev->asic_funcs->map_pll_idx_to_fw_idx(input_pll_index);
687         if (fw_pll_idx < 0) {
688                 dev_err(hdev->dev, "Invalid PLL index (%u) error %d\n",
689                         input_pll_index, fw_pll_idx);
690                 return -EINVAL;
691         }
692
693         /* PLL map is a u8 array */
694         pll_byte = prop->cpucp_info.pll_map[fw_pll_idx >> 3];
695         pll_bit_off = fw_pll_idx & 0x7;
696
697         if (!(pll_byte & BIT(pll_bit_off))) {
698                 dev_err(hdev->dev, "PLL index %d is not supported\n",
699                         fw_pll_idx);
700                 return -EINVAL;
701         }
702
703         *pll_index = fw_pll_idx;
704
705         return 0;
706 }
707
708 int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u32 pll_index,
709                 u16 *pll_freq_arr)
710 {
711         struct cpucp_packet pkt;
712         enum pll_index used_pll_idx;
713         u64 result;
714         int rc;
715
716         rc = get_used_pll_index(hdev, pll_index, &used_pll_idx);
717         if (rc)
718                 return rc;
719
720         memset(&pkt, 0, sizeof(pkt));
721
722         pkt.ctl = cpu_to_le32(CPUCP_PACKET_PLL_INFO_GET <<
723                                 CPUCP_PKT_CTL_OPCODE_SHIFT);
724         pkt.pll_type = __cpu_to_le16((u16)used_pll_idx);
725
726         rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
727                         HL_CPUCP_INFO_TIMEOUT_USEC, &result);
728         if (rc)
729                 dev_err(hdev->dev, "Failed to read PLL info, error %d\n", rc);
730
731         pll_freq_arr[0] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT0_MASK, result);
732         pll_freq_arr[1] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT1_MASK, result);
733         pll_freq_arr[2] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT2_MASK, result);
734         pll_freq_arr[3] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT3_MASK, result);
735
736         return rc;
737 }
738
739 int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power)
740 {
741         struct cpucp_packet pkt;
742         u64 result;
743         int rc;
744
745         memset(&pkt, 0, sizeof(pkt));
746
747         pkt.ctl = cpu_to_le32(CPUCP_PACKET_POWER_GET <<
748                                 CPUCP_PKT_CTL_OPCODE_SHIFT);
749
750         rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
751                         HL_CPUCP_INFO_TIMEOUT_USEC, &result);
752         if (rc) {
753                 dev_err(hdev->dev, "Failed to read power, error %d\n", rc);
754                 return rc;
755         }
756
757         *power = result;
758
759         return rc;
760 }
761
762 static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
763 {
764         /* Some of the status codes below are deprecated in newer f/w
765          * versions but we keep them here for backward compatibility
766          */
767         switch (status) {
768         case CPU_BOOT_STATUS_NA:
769                 dev_err(hdev->dev,
770                         "Device boot error - BTL did NOT run\n");
771                 break;
772         case CPU_BOOT_STATUS_IN_WFE:
773                 dev_err(hdev->dev,
774                         "Device boot error - Stuck inside WFE loop\n");
775                 break;
776         case CPU_BOOT_STATUS_IN_BTL:
777                 dev_err(hdev->dev,
778                         "Device boot error - Stuck in BTL\n");
779                 break;
780         case CPU_BOOT_STATUS_IN_PREBOOT:
781                 dev_err(hdev->dev,
782                         "Device boot error - Stuck in Preboot\n");
783                 break;
784         case CPU_BOOT_STATUS_IN_SPL:
785                 dev_err(hdev->dev,
786                         "Device boot error - Stuck in SPL\n");
787                 break;
788         case CPU_BOOT_STATUS_IN_UBOOT:
789                 dev_err(hdev->dev,
790                         "Device boot error - Stuck in u-boot\n");
791                 break;
792         case CPU_BOOT_STATUS_DRAM_INIT_FAIL:
793                 dev_err(hdev->dev,
794                         "Device boot error - DRAM initialization failed\n");
795                 break;
796         case CPU_BOOT_STATUS_UBOOT_NOT_READY:
797                 dev_err(hdev->dev,
798                         "Device boot error - u-boot stopped by user\n");
799                 break;
800         case CPU_BOOT_STATUS_TS_INIT_FAIL:
801                 dev_err(hdev->dev,
802                         "Device boot error - Thermal Sensor initialization failed\n");
803                 break;
804         default:
805                 dev_err(hdev->dev,
806                         "Device boot error - Invalid status code %d\n",
807                         status);
808                 break;
809         }
810 }
811
812 int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
813                 u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
814                 u32 timeout)
815 {
816         struct asic_fixed_properties *prop = &hdev->asic_prop;
817         u32 status, security_status;
818         int rc;
819
820         /* pldm was added for cases in which we use preboot on pldm and want
821          * to load boot fit, but we can't wait for preboot because it runs
822          * very slowly
823          */
824         if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU) || hdev->pldm)
825                 return 0;
826
827         /* Need to check two possible scenarios:
828          *
829          * CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT - for newer firmwares where
830          * the preboot is waiting for the boot fit
831          *
832          * All other status values - for older firmwares where the uboot was
833          * loaded from the FLASH
834          */
835         rc = hl_poll_timeout(
836                 hdev,
837                 cpu_boot_status_reg,
838                 status,
839                 (status == CPU_BOOT_STATUS_IN_UBOOT) ||
840                 (status == CPU_BOOT_STATUS_DRAM_RDY) ||
841                 (status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
842                 (status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
843                 (status == CPU_BOOT_STATUS_SRAM_AVAIL) ||
844                 (status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT),
845                 10000,
846                 timeout);
847
848         if (rc) {
849                 dev_err(hdev->dev, "Failed to read preboot version\n");
850                 detect_cpu_boot_status(hdev, status);
851
852                 /* If we read all FF, then something is totally wrong, no point
853                  * of reading specific errors
854                  */
855                 if (status != -1)
856                         fw_read_errors(hdev, boot_err0_reg,
857                                         cpu_security_boot_status_reg);
858                 return -EIO;
859         }
860
861         rc = hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_PREBOOT);
862         if (rc)
863                 return rc;
864
865         security_status = RREG32(cpu_security_boot_status_reg);
866
867         /* We read security status multiple times during boot:
868          * 1. preboot - a. Check whether the security status bits are valid
869          *              b. Check whether fw security is enabled
870          *              c. Check whether hard reset is done by preboot
871          * 2. boot cpu - a. Fetch boot cpu security status
872          *               b. Check whether hard reset is done by boot cpu
873          * 3. FW application - a. Fetch fw application security status
874          *                     b. Check whether hard reset is done by fw app
875          *
876          * Preboot:
877          * Check security status bit (CPU_BOOT_DEV_STS0_ENABLED), if it is set
878          * check security enabled bit (CPU_BOOT_DEV_STS0_SECURITY_EN)
879          */
880         if (security_status & CPU_BOOT_DEV_STS0_ENABLED) {
881                 prop->fw_security_status_valid = 1;
882
883                 /* FW security should be derived from PCI ID, we keep this
884                  * check for backward compatibility
885                  */
886                 if (security_status & CPU_BOOT_DEV_STS0_SECURITY_EN)
887                         prop->fw_security_disabled = false;
888
889                 if (security_status & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
890                         prop->hard_reset_done_by_fw = true;
891         } else {
892                 prop->fw_security_status_valid = 0;
893         }
894
895         dev_dbg(hdev->dev, "Firmware preboot security status %#x\n",
896                         security_status);
897
898         dev_dbg(hdev->dev, "Firmware preboot hard-reset is %s\n",
899                         prop->hard_reset_done_by_fw ? "enabled" : "disabled");
900
901         dev_info(hdev->dev, "firmware-level security is %s\n",
902                         prop->fw_security_disabled ? "disabled" : "enabled");
903
904         return 0;
905 }
906
907 int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
908                         u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
909                         u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
910                         bool skip_bmc, u32 cpu_timeout, u32 boot_fit_timeout)
911 {
912         struct asic_fixed_properties *prop = &hdev->asic_prop;
913         u32 status;
914         int rc;
915
916         if (!(hdev->fw_components & FW_TYPE_BOOT_CPU))
917                 return 0;
918
919         dev_info(hdev->dev, "Going to wait for device boot (up to %lds)\n",
920                 cpu_timeout / USEC_PER_SEC);
921
922         /* Wait for boot FIT request */
923         rc = hl_poll_timeout(
924                 hdev,
925                 cpu_boot_status_reg,
926                 status,
927                 status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT,
928                 10000,
929                 boot_fit_timeout);
930
931         if (rc) {
932                 dev_dbg(hdev->dev,
933                         "No boot fit request received, resuming boot\n");
934         } else {
935                 rc = hdev->asic_funcs->load_boot_fit_to_device(hdev);
936                 if (rc)
937                         goto out;
938
939                 /* Clear device CPU message status */
940                 WREG32(cpu_msg_status_reg, CPU_MSG_CLR);
941
942                 /* Signal device CPU that boot loader is ready */
943                 WREG32(msg_to_cpu_reg, KMD_MSG_FIT_RDY);
944
945                 /* Poll for CPU device ack */
946                 rc = hl_poll_timeout(
947                         hdev,
948                         cpu_msg_status_reg,
949                         status,
950                         status == CPU_MSG_OK,
951                         10000,
952                         boot_fit_timeout);
953
954                 if (rc) {
955                         dev_err(hdev->dev,
956                                 "Timeout waiting for boot fit load ack\n");
957                         goto out;
958                 }
959
960                 /* Clear message */
961                 WREG32(msg_to_cpu_reg, KMD_MSG_NA);
962         }
963
964         /* Make sure CPU boot-loader is running */
965         rc = hl_poll_timeout(
966                 hdev,
967                 cpu_boot_status_reg,
968                 status,
969                 (status == CPU_BOOT_STATUS_DRAM_RDY) ||
970                 (status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
971                 (status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
972                 (status == CPU_BOOT_STATUS_SRAM_AVAIL),
973                 10000,
974                 cpu_timeout);
975
976         dev_dbg(hdev->dev, "uboot status = %d\n", status);
977
978         /* Read U-Boot version now in case we will later fail */
979         hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_UBOOT);
980
981         /* Clear reset status since we need to read it again from boot CPU */
982         prop->hard_reset_done_by_fw = false;
983
984         /* Read boot_cpu security bits */
985         if (prop->fw_security_status_valid) {
986                 prop->fw_boot_cpu_security_map =
987                                 RREG32(cpu_security_boot_status_reg);
988
989                 if (prop->fw_boot_cpu_security_map &
990                                 CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
991                         prop->hard_reset_done_by_fw = true;
992
993                 dev_dbg(hdev->dev,
994                         "Firmware boot CPU security status %#x\n",
995                         prop->fw_boot_cpu_security_map);
996         }
997
998         dev_dbg(hdev->dev, "Firmware boot CPU hard-reset is %s\n",
999                         prop->hard_reset_done_by_fw ? "enabled" : "disabled");
1000
1001         if (rc) {
1002                 detect_cpu_boot_status(hdev, status);
1003                 rc = -EIO;
1004                 goto out;
1005         }
1006
1007         if (!(hdev->fw_components & FW_TYPE_LINUX)) {
1008                 dev_info(hdev->dev, "Skip loading Linux F/W\n");
1009                 goto out;
1010         }
1011
1012         if (status == CPU_BOOT_STATUS_SRAM_AVAIL)
1013                 goto out;
1014
1015         dev_info(hdev->dev,
1016                 "Loading firmware to device, may take some time...\n");
1017
1018         rc = hdev->asic_funcs->load_firmware_to_device(hdev);
1019         if (rc)
1020                 goto out;
1021
1022         if (skip_bmc) {
1023                 WREG32(msg_to_cpu_reg, KMD_MSG_SKIP_BMC);
1024
1025                 rc = hl_poll_timeout(
1026                         hdev,
1027                         cpu_boot_status_reg,
1028                         status,
1029                         (status == CPU_BOOT_STATUS_BMC_WAITING_SKIPPED),
1030                         10000,
1031                         cpu_timeout);
1032
1033                 if (rc) {
1034                         dev_err(hdev->dev,
1035                                 "Failed to get ACK on skipping BMC, %d\n",
1036                                 status);
1037                         WREG32(msg_to_cpu_reg, KMD_MSG_NA);
1038                         rc = -EIO;
1039                         goto out;
1040                 }
1041         }
1042
1043         WREG32(msg_to_cpu_reg, KMD_MSG_FIT_RDY);
1044
1045         rc = hl_poll_timeout(
1046                 hdev,
1047                 cpu_boot_status_reg,
1048                 status,
1049                 (status == CPU_BOOT_STATUS_SRAM_AVAIL),
1050                 10000,
1051                 cpu_timeout);
1052
1053         /* Clear message */
1054         WREG32(msg_to_cpu_reg, KMD_MSG_NA);
1055
1056         if (rc) {
1057                 if (status == CPU_BOOT_STATUS_FIT_CORRUPTED)
1058                         dev_err(hdev->dev,
1059                                 "Device reports FIT image is corrupted\n");
1060                 else
1061                         dev_err(hdev->dev,
1062                                 "Failed to load firmware to device, %d\n",
1063                                 status);
1064
1065                 rc = -EIO;
1066                 goto out;
1067         }
1068
1069         rc = fw_read_errors(hdev, boot_err0_reg, cpu_security_boot_status_reg);
1070         if (rc)
1071                 return rc;
1072
1073         /* Clear reset status since we need to read again from app */
1074         prop->hard_reset_done_by_fw = false;
1075
1076         /* Read FW application security bits */
1077         if (prop->fw_security_status_valid) {
1078                 prop->fw_app_security_map =
1079                                 RREG32(cpu_security_boot_status_reg);
1080
1081                 if (prop->fw_app_security_map &
1082                                 CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
1083                         prop->hard_reset_done_by_fw = true;
1084
1085                 dev_dbg(hdev->dev,
1086                         "Firmware application CPU security status %#x\n",
1087                         prop->fw_app_security_map);
1088         }
1089
1090         dev_dbg(hdev->dev, "Firmware application CPU hard-reset is %s\n",
1091                         prop->hard_reset_done_by_fw ? "enabled" : "disabled");
1092
1093         dev_info(hdev->dev, "Successfully loaded firmware to device\n");
1094
1095         return 0;
1096
1097 out:
1098         fw_read_errors(hdev, boot_err0_reg, cpu_security_boot_status_reg);
1099
1100         return rc;
1101 }