1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2016-2019 HabanaLabs, Ltd.
8 #include "habanalabs.h"
9 #include "../include/common/hl_boot_if.h"
11 #include <linux/firmware.h>
12 #include <linux/slab.h>
14 #define FW_FILE_MAX_SIZE 0x1400000 /* maximum size of 20MB */
16 * hl_fw_load_fw_to_device() - Load F/W code to device's memory.
18 * @hdev: pointer to hl_device structure.
19 * @fw_name: the firmware image name
20 * @dst: IO memory mapped address space to copy firmware to
21 * @src_offset: offset in src FW to copy from
22 * @size: amount of bytes to copy (0 to copy the whole binary)
24 * Copy fw code from firmware file to device memory.
26 * Return: 0 on success, non-zero for failure.
28 int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
29 void __iomem *dst, u32 src_offset, u32 size)
31 const struct firmware *fw;
36 rc = request_firmware(&fw, fw_name, hdev->dev);
38 dev_err(hdev->dev, "Firmware file %s is not found!\n", fw_name);
43 if ((fw_size % 4) != 0) {
44 dev_err(hdev->dev, "Illegal %s firmware size %zu\n",
50 dev_dbg(hdev->dev, "%s firmware size == %zu\n", fw_name, fw_size);
52 if (fw_size > FW_FILE_MAX_SIZE) {
54 "FW file size %zu exceeds maximum of %u bytes\n",
55 fw_size, FW_FILE_MAX_SIZE);
60 if (size - src_offset > fw_size) {
62 "size to copy(%u) and offset(%u) are invalid\n",
71 fw_data = (const void *) fw->data;
73 memcpy_toio(dst, fw_data + src_offset, fw_size);
80 int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode)
82 struct cpucp_packet pkt = {};
84 pkt.ctl = cpu_to_le32(opcode << CPUCP_PKT_CTL_OPCODE_SHIFT);
86 return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt,
87 sizeof(pkt), 0, NULL);
90 int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
91 u16 len, u32 timeout, u64 *result)
93 struct hl_hw_queue *queue = &hdev->kernel_queues[hw_queue_id];
94 struct cpucp_packet *pkt;
95 dma_addr_t pkt_dma_addr;
96 u32 tmp, expected_ack_val;
99 pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len,
103 "Failed to allocate DMA memory for packet to CPU\n");
107 memcpy(pkt, msg, len);
109 mutex_lock(&hdev->send_cpu_message_lock);
114 if (hdev->device_cpu_disabled) {
119 /* set fence to a non valid value */
120 pkt->fence = UINT_MAX;
122 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, len, pkt_dma_addr);
124 dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc);
128 if (hdev->asic_prop.fw_app_security_map &
129 CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN)
130 expected_ack_val = queue->pi;
132 expected_ack_val = CPUCP_PACKET_FENCE_VAL;
134 rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp,
135 (tmp == expected_ack_val), 1000,
138 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
140 if (rc == -ETIMEDOUT) {
141 dev_err(hdev->dev, "Device CPU packet timeout (0x%x)\n", tmp);
142 hdev->device_cpu_disabled = true;
146 tmp = le32_to_cpu(pkt->ctl);
148 rc = (tmp & CPUCP_PKT_CTL_RC_MASK) >> CPUCP_PKT_CTL_RC_SHIFT;
150 dev_err(hdev->dev, "F/W ERROR %d for CPU packet %d\n",
152 (tmp & CPUCP_PKT_CTL_OPCODE_MASK)
153 >> CPUCP_PKT_CTL_OPCODE_SHIFT);
156 *result = le64_to_cpu(pkt->result);
160 mutex_unlock(&hdev->send_cpu_message_lock);
162 hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, len, pkt);
167 int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type)
169 struct cpucp_packet pkt;
173 memset(&pkt, 0, sizeof(pkt));
175 pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ <<
176 CPUCP_PKT_CTL_OPCODE_SHIFT);
177 pkt.value = cpu_to_le64(event_type);
179 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
183 dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
188 int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
191 struct cpucp_unmask_irq_arr_packet *pkt;
192 size_t total_pkt_size;
196 total_pkt_size = sizeof(struct cpucp_unmask_irq_arr_packet) +
199 /* data should be aligned to 8 bytes in order to CPU-CP to copy it */
200 total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
202 /* total_pkt_size is casted to u16 later on */
203 if (total_pkt_size > USHRT_MAX) {
204 dev_err(hdev->dev, "too many elements in IRQ array\n");
208 pkt = kzalloc(total_pkt_size, GFP_KERNEL);
212 pkt->length = cpu_to_le32(irq_arr_size / sizeof(irq_arr[0]));
213 memcpy(&pkt->irqs, irq_arr, irq_arr_size);
215 pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
216 CPUCP_PKT_CTL_OPCODE_SHIFT);
218 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
219 total_pkt_size, 0, &result);
222 dev_err(hdev->dev, "failed to unmask IRQ array\n");
229 int hl_fw_test_cpu_queue(struct hl_device *hdev)
231 struct cpucp_packet test_pkt = {};
235 test_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST <<
236 CPUCP_PKT_CTL_OPCODE_SHIFT);
237 test_pkt.value = cpu_to_le64(CPUCP_PACKET_FENCE_VAL);
239 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt,
240 sizeof(test_pkt), 0, &result);
243 if (result != CPUCP_PACKET_FENCE_VAL)
245 "CPU queue test failed (%#08llx)\n", result);
247 dev_err(hdev->dev, "CPU queue test failed, error %d\n", rc);
253 void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
254 dma_addr_t *dma_handle)
258 kernel_addr = gen_pool_alloc(hdev->cpu_accessible_dma_pool, size);
260 *dma_handle = hdev->cpu_accessible_dma_address +
261 (kernel_addr - (u64) (uintptr_t) hdev->cpu_accessible_dma_mem);
263 return (void *) (uintptr_t) kernel_addr;
266 void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
269 gen_pool_free(hdev->cpu_accessible_dma_pool, (u64) (uintptr_t) vaddr,
273 int hl_fw_send_heartbeat(struct hl_device *hdev)
275 struct cpucp_packet hb_pkt = {};
279 hb_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST <<
280 CPUCP_PKT_CTL_OPCODE_SHIFT);
281 hb_pkt.value = cpu_to_le64(CPUCP_PACKET_FENCE_VAL);
283 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt,
284 sizeof(hb_pkt), 0, &result);
286 if ((rc) || (result != CPUCP_PACKET_FENCE_VAL))
292 static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg,
293 u32 cpu_security_boot_status_reg)
295 u32 err_val, security_val;
296 bool err_exists = false;
298 /* Some of the firmware status codes are deprecated in newer f/w
299 * versions. In those versions, the errors are reported
300 * in different registers. Therefore, we need to check those
301 * registers and print the exact errors. Moreover, there
302 * may be multiple errors, so we need to report on each error
303 * separately. Some of the error codes might indicate a state
304 * that is not an error per-se, but it is an error in production
307 err_val = RREG32(boot_err0_reg);
308 if (!(err_val & CPU_BOOT_ERR0_ENABLED))
311 if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL) {
313 "Device boot error - DRAM initialization failed\n");
317 if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED) {
318 dev_err(hdev->dev, "Device boot error - FIT image corrupted\n");
322 if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL) {
324 "Device boot error - Thermal Sensor initialization failed\n");
328 if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED) {
330 "Device boot warning - Skipped DRAM initialization\n");
331 /* This is a warning so we don't want it to disable the
334 err_val &= ~CPU_BOOT_ERR0_DRAM_SKIPPED;
337 if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED) {
338 if (hdev->bmc_enable) {
340 "Device boot error - Skipped waiting for BMC\n");
344 "Device boot message - Skipped waiting for BMC\n");
345 /* This is an info so we don't want it to disable the
348 err_val &= ~CPU_BOOT_ERR0_BMC_WAIT_SKIPPED;
352 if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY) {
354 "Device boot error - Serdes data from BMC not available\n");
358 if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL) {
360 "Device boot error - NIC F/W initialization failed\n");
364 if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY) {
366 "Device boot warning - security not ready\n");
370 if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL) {
371 dev_err(hdev->dev, "Device boot error - security failure\n");
375 if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL) {
376 dev_err(hdev->dev, "Device boot error - eFuse failure\n");
380 if (err_val & CPU_BOOT_ERR0_PLL_FAIL) {
381 dev_err(hdev->dev, "Device boot error - PLL failure\n");
385 if (err_val & CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL) {
387 "Device boot error - device unusable\n");
391 security_val = RREG32(cpu_security_boot_status_reg);
392 if (security_val & CPU_BOOT_DEV_STS0_ENABLED)
393 dev_dbg(hdev->dev, "Device security status %#x\n",
396 if (!err_exists && (err_val & ~CPU_BOOT_ERR0_ENABLED)) {
398 "Device boot error - unknown error 0x%08x\n",
403 if (err_exists && ((err_val & ~CPU_BOOT_ERR0_ENABLED) &
404 lower_32_bits(hdev->boot_error_status_mask)))
410 int hl_fw_cpucp_info_get(struct hl_device *hdev,
411 u32 cpu_security_boot_status_reg,
414 struct asic_fixed_properties *prop = &hdev->asic_prop;
415 struct cpucp_packet pkt = {};
416 void *cpucp_info_cpu_addr;
417 dma_addr_t cpucp_info_dma_addr;
421 cpucp_info_cpu_addr =
422 hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
423 sizeof(struct cpucp_info),
424 &cpucp_info_dma_addr);
425 if (!cpucp_info_cpu_addr) {
427 "Failed to allocate DMA memory for CPU-CP info packet\n");
431 memset(cpucp_info_cpu_addr, 0, sizeof(struct cpucp_info));
433 pkt.ctl = cpu_to_le32(CPUCP_PACKET_INFO_GET <<
434 CPUCP_PKT_CTL_OPCODE_SHIFT);
435 pkt.addr = cpu_to_le64(cpucp_info_dma_addr);
436 pkt.data_max_size = cpu_to_le32(sizeof(struct cpucp_info));
438 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
439 HL_CPUCP_INFO_TIMEOUT_USEC, &result);
442 "Failed to handle CPU-CP info pkt, error %d\n", rc);
446 rc = fw_read_errors(hdev, boot_err0_reg, cpu_security_boot_status_reg);
448 dev_err(hdev->dev, "Errors in device boot\n");
452 memcpy(&prop->cpucp_info, cpucp_info_cpu_addr,
453 sizeof(prop->cpucp_info));
455 rc = hl_build_hwmon_channel_info(hdev, prop->cpucp_info.sensors);
458 "Failed to build hwmon channel info, error %d\n", rc);
463 /* Read FW application security bits again */
464 if (hdev->asic_prop.fw_security_status_valid)
465 hdev->asic_prop.fw_app_security_map =
466 RREG32(cpu_security_boot_status_reg);
469 hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
470 sizeof(struct cpucp_info), cpucp_info_cpu_addr);
475 static int hl_fw_send_msi_info_msg(struct hl_device *hdev)
477 struct cpucp_array_data_packet *pkt;
478 size_t total_pkt_size, data_size;
482 /* skip sending this info for unsupported ASICs */
483 if (!hdev->asic_funcs->get_msi_info)
486 data_size = CPUCP_NUM_OF_MSI_TYPES * sizeof(u32);
487 total_pkt_size = sizeof(struct cpucp_array_data_packet) + data_size;
489 /* data should be aligned to 8 bytes in order to CPU-CP to copy it */
490 total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
492 /* total_pkt_size is casted to u16 later on */
493 if (total_pkt_size > USHRT_MAX) {
494 dev_err(hdev->dev, "CPUCP array data is too big\n");
498 pkt = kzalloc(total_pkt_size, GFP_KERNEL);
502 pkt->length = cpu_to_le32(CPUCP_NUM_OF_MSI_TYPES);
504 hdev->asic_funcs->get_msi_info((u32 *)&pkt->data);
506 pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_MSI_INFO_SET <<
507 CPUCP_PKT_CTL_OPCODE_SHIFT);
509 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *)pkt,
510 total_pkt_size, 0, &result);
513 * in case packet result is invalid it means that FW does not support
514 * this feature and will use default/hard coded MSI values. no reason
517 if (rc && result == cpucp_packet_invalid)
521 dev_err(hdev->dev, "failed to send CPUCP array data\n");
528 int hl_fw_cpucp_handshake(struct hl_device *hdev,
529 u32 cpu_security_boot_status_reg,
534 rc = hl_fw_cpucp_info_get(hdev, cpu_security_boot_status_reg,
539 return hl_fw_send_msi_info_msg(hdev);
542 int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
544 struct cpucp_packet pkt = {};
545 void *eeprom_info_cpu_addr;
546 dma_addr_t eeprom_info_dma_addr;
550 eeprom_info_cpu_addr =
551 hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
552 max_size, &eeprom_info_dma_addr);
553 if (!eeprom_info_cpu_addr) {
555 "Failed to allocate DMA memory for CPU-CP EEPROM packet\n");
559 memset(eeprom_info_cpu_addr, 0, max_size);
561 pkt.ctl = cpu_to_le32(CPUCP_PACKET_EEPROM_DATA_GET <<
562 CPUCP_PKT_CTL_OPCODE_SHIFT);
563 pkt.addr = cpu_to_le64(eeprom_info_dma_addr);
564 pkt.data_max_size = cpu_to_le32(max_size);
566 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
567 HL_CPUCP_EEPROM_TIMEOUT_USEC, &result);
571 "Failed to handle CPU-CP EEPROM packet, error %d\n",
576 /* result contains the actual size */
577 memcpy(data, eeprom_info_cpu_addr, min((size_t)result, max_size));
580 hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, max_size,
581 eeprom_info_cpu_addr);
586 int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
587 struct hl_info_pci_counters *counters)
589 struct cpucp_packet pkt = {};
593 pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_THROUGHPUT_GET <<
594 CPUCP_PKT_CTL_OPCODE_SHIFT);
596 /* Fetch PCI rx counter */
597 pkt.index = cpu_to_le32(cpucp_pcie_throughput_rx);
598 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
599 HL_CPUCP_INFO_TIMEOUT_USEC, &result);
602 "Failed to handle CPU-CP PCI info pkt, error %d\n", rc);
605 counters->rx_throughput = result;
607 memset(&pkt, 0, sizeof(pkt));
608 pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_THROUGHPUT_GET <<
609 CPUCP_PKT_CTL_OPCODE_SHIFT);
611 /* Fetch PCI tx counter */
612 pkt.index = cpu_to_le32(cpucp_pcie_throughput_tx);
613 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
614 HL_CPUCP_INFO_TIMEOUT_USEC, &result);
617 "Failed to handle CPU-CP PCI info pkt, error %d\n", rc);
620 counters->tx_throughput = result;
622 /* Fetch PCI replay counter */
623 memset(&pkt, 0, sizeof(pkt));
624 pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_REPLAY_CNT_GET <<
625 CPUCP_PKT_CTL_OPCODE_SHIFT);
627 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
628 HL_CPUCP_INFO_TIMEOUT_USEC, &result);
631 "Failed to handle CPU-CP PCI info pkt, error %d\n", rc);
634 counters->replay_cnt = (u32) result;
639 int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy)
641 struct cpucp_packet pkt = {};
645 pkt.ctl = cpu_to_le32(CPUCP_PACKET_TOTAL_ENERGY_GET <<
646 CPUCP_PKT_CTL_OPCODE_SHIFT);
648 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
649 HL_CPUCP_INFO_TIMEOUT_USEC, &result);
652 "Failed to handle CpuCP total energy pkt, error %d\n",
657 *total_energy = result;
662 int get_used_pll_index(struct hl_device *hdev, u32 input_pll_index,
663 enum pll_index *pll_index)
665 struct asic_fixed_properties *prop = &hdev->asic_prop;
666 u8 pll_byte, pll_bit_off;
670 dynamic_pll = prop->fw_security_status_valid &&
671 (prop->fw_app_security_map & CPU_BOOT_DEV_STS0_DYN_PLL_EN);
675 * in case we are working with legacy FW (each asic has unique
676 * PLL numbering) use the driver based index as they are
677 * aligned with fw legacy numbering
679 *pll_index = input_pll_index;
683 /* retrieve a FW compatible PLL index based on
684 * ASIC specific user request
686 fw_pll_idx = hdev->asic_funcs->map_pll_idx_to_fw_idx(input_pll_index);
687 if (fw_pll_idx < 0) {
688 dev_err(hdev->dev, "Invalid PLL index (%u) error %d\n",
689 input_pll_index, fw_pll_idx);
693 /* PLL map is a u8 array */
694 pll_byte = prop->cpucp_info.pll_map[fw_pll_idx >> 3];
695 pll_bit_off = fw_pll_idx & 0x7;
697 if (!(pll_byte & BIT(pll_bit_off))) {
698 dev_err(hdev->dev, "PLL index %d is not supported\n",
703 *pll_index = fw_pll_idx;
708 int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u32 pll_index,
711 struct cpucp_packet pkt;
712 enum pll_index used_pll_idx;
716 rc = get_used_pll_index(hdev, pll_index, &used_pll_idx);
720 memset(&pkt, 0, sizeof(pkt));
722 pkt.ctl = cpu_to_le32(CPUCP_PACKET_PLL_INFO_GET <<
723 CPUCP_PKT_CTL_OPCODE_SHIFT);
724 pkt.pll_type = __cpu_to_le16((u16)used_pll_idx);
726 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
727 HL_CPUCP_INFO_TIMEOUT_USEC, &result);
729 dev_err(hdev->dev, "Failed to read PLL info, error %d\n", rc);
731 pll_freq_arr[0] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT0_MASK, result);
732 pll_freq_arr[1] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT1_MASK, result);
733 pll_freq_arr[2] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT2_MASK, result);
734 pll_freq_arr[3] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT3_MASK, result);
739 int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power)
741 struct cpucp_packet pkt;
745 memset(&pkt, 0, sizeof(pkt));
747 pkt.ctl = cpu_to_le32(CPUCP_PACKET_POWER_GET <<
748 CPUCP_PKT_CTL_OPCODE_SHIFT);
750 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
751 HL_CPUCP_INFO_TIMEOUT_USEC, &result);
753 dev_err(hdev->dev, "Failed to read power, error %d\n", rc);
762 static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
764 /* Some of the status codes below are deprecated in newer f/w
765 * versions but we keep them here for backward compatibility
768 case CPU_BOOT_STATUS_NA:
770 "Device boot error - BTL did NOT run\n");
772 case CPU_BOOT_STATUS_IN_WFE:
774 "Device boot error - Stuck inside WFE loop\n");
776 case CPU_BOOT_STATUS_IN_BTL:
778 "Device boot error - Stuck in BTL\n");
780 case CPU_BOOT_STATUS_IN_PREBOOT:
782 "Device boot error - Stuck in Preboot\n");
784 case CPU_BOOT_STATUS_IN_SPL:
786 "Device boot error - Stuck in SPL\n");
788 case CPU_BOOT_STATUS_IN_UBOOT:
790 "Device boot error - Stuck in u-boot\n");
792 case CPU_BOOT_STATUS_DRAM_INIT_FAIL:
794 "Device boot error - DRAM initialization failed\n");
796 case CPU_BOOT_STATUS_UBOOT_NOT_READY:
798 "Device boot error - u-boot stopped by user\n");
800 case CPU_BOOT_STATUS_TS_INIT_FAIL:
802 "Device boot error - Thermal Sensor initialization failed\n");
806 "Device boot error - Invalid status code %d\n",
812 int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
813 u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
816 struct asic_fixed_properties *prop = &hdev->asic_prop;
817 u32 status, security_status;
820 /* pldm was added for cases in which we use preboot on pldm and want
821 * to load boot fit, but we can't wait for preboot because it runs
824 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU) || hdev->pldm)
827 /* Need to check two possible scenarios:
829 * CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT - for newer firmwares where
830 * the preboot is waiting for the boot fit
832 * All other status values - for older firmwares where the uboot was
833 * loaded from the FLASH
835 rc = hl_poll_timeout(
839 (status == CPU_BOOT_STATUS_IN_UBOOT) ||
840 (status == CPU_BOOT_STATUS_DRAM_RDY) ||
841 (status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
842 (status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
843 (status == CPU_BOOT_STATUS_SRAM_AVAIL) ||
844 (status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT),
849 dev_err(hdev->dev, "Failed to read preboot version\n");
850 detect_cpu_boot_status(hdev, status);
852 /* If we read all FF, then something is totally wrong, no point
853 * of reading specific errors
856 fw_read_errors(hdev, boot_err0_reg,
857 cpu_security_boot_status_reg);
861 rc = hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_PREBOOT);
865 security_status = RREG32(cpu_security_boot_status_reg);
867 /* We read security status multiple times during boot:
868 * 1. preboot - a. Check whether the security status bits are valid
869 * b. Check whether fw security is enabled
870 * c. Check whether hard reset is done by preboot
871 * 2. boot cpu - a. Fetch boot cpu security status
872 * b. Check whether hard reset is done by boot cpu
873 * 3. FW application - a. Fetch fw application security status
874 * b. Check whether hard reset is done by fw app
877 * Check security status bit (CPU_BOOT_DEV_STS0_ENABLED), if it is set
878 * check security enabled bit (CPU_BOOT_DEV_STS0_SECURITY_EN)
880 if (security_status & CPU_BOOT_DEV_STS0_ENABLED) {
881 prop->fw_security_status_valid = 1;
883 /* FW security should be derived from PCI ID, we keep this
884 * check for backward compatibility
886 if (security_status & CPU_BOOT_DEV_STS0_SECURITY_EN)
887 prop->fw_security_disabled = false;
889 if (security_status & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
890 prop->hard_reset_done_by_fw = true;
892 prop->fw_security_status_valid = 0;
895 dev_dbg(hdev->dev, "Firmware preboot security status %#x\n",
898 dev_dbg(hdev->dev, "Firmware preboot hard-reset is %s\n",
899 prop->hard_reset_done_by_fw ? "enabled" : "disabled");
901 dev_info(hdev->dev, "firmware-level security is %s\n",
902 prop->fw_security_disabled ? "disabled" : "enabled");
907 int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
908 u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
909 u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
910 bool skip_bmc, u32 cpu_timeout, u32 boot_fit_timeout)
912 struct asic_fixed_properties *prop = &hdev->asic_prop;
916 if (!(hdev->fw_components & FW_TYPE_BOOT_CPU))
919 dev_info(hdev->dev, "Going to wait for device boot (up to %lds)\n",
920 cpu_timeout / USEC_PER_SEC);
922 /* Wait for boot FIT request */
923 rc = hl_poll_timeout(
927 status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT,
933 "No boot fit request received, resuming boot\n");
935 rc = hdev->asic_funcs->load_boot_fit_to_device(hdev);
939 /* Clear device CPU message status */
940 WREG32(cpu_msg_status_reg, CPU_MSG_CLR);
942 /* Signal device CPU that boot loader is ready */
943 WREG32(msg_to_cpu_reg, KMD_MSG_FIT_RDY);
945 /* Poll for CPU device ack */
946 rc = hl_poll_timeout(
950 status == CPU_MSG_OK,
956 "Timeout waiting for boot fit load ack\n");
961 WREG32(msg_to_cpu_reg, KMD_MSG_NA);
964 /* Make sure CPU boot-loader is running */
965 rc = hl_poll_timeout(
969 (status == CPU_BOOT_STATUS_DRAM_RDY) ||
970 (status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
971 (status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
972 (status == CPU_BOOT_STATUS_SRAM_AVAIL),
976 dev_dbg(hdev->dev, "uboot status = %d\n", status);
978 /* Read U-Boot version now in case we will later fail */
979 hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_UBOOT);
981 /* Clear reset status since we need to read it again from boot CPU */
982 prop->hard_reset_done_by_fw = false;
984 /* Read boot_cpu security bits */
985 if (prop->fw_security_status_valid) {
986 prop->fw_boot_cpu_security_map =
987 RREG32(cpu_security_boot_status_reg);
989 if (prop->fw_boot_cpu_security_map &
990 CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
991 prop->hard_reset_done_by_fw = true;
994 "Firmware boot CPU security status %#x\n",
995 prop->fw_boot_cpu_security_map);
998 dev_dbg(hdev->dev, "Firmware boot CPU hard-reset is %s\n",
999 prop->hard_reset_done_by_fw ? "enabled" : "disabled");
1002 detect_cpu_boot_status(hdev, status);
1007 if (!(hdev->fw_components & FW_TYPE_LINUX)) {
1008 dev_info(hdev->dev, "Skip loading Linux F/W\n");
1012 if (status == CPU_BOOT_STATUS_SRAM_AVAIL)
1016 "Loading firmware to device, may take some time...\n");
1018 rc = hdev->asic_funcs->load_firmware_to_device(hdev);
1023 WREG32(msg_to_cpu_reg, KMD_MSG_SKIP_BMC);
1025 rc = hl_poll_timeout(
1027 cpu_boot_status_reg,
1029 (status == CPU_BOOT_STATUS_BMC_WAITING_SKIPPED),
1035 "Failed to get ACK on skipping BMC, %d\n",
1037 WREG32(msg_to_cpu_reg, KMD_MSG_NA);
1043 WREG32(msg_to_cpu_reg, KMD_MSG_FIT_RDY);
1045 rc = hl_poll_timeout(
1047 cpu_boot_status_reg,
1049 (status == CPU_BOOT_STATUS_SRAM_AVAIL),
1054 WREG32(msg_to_cpu_reg, KMD_MSG_NA);
1057 if (status == CPU_BOOT_STATUS_FIT_CORRUPTED)
1059 "Device reports FIT image is corrupted\n");
1062 "Failed to load firmware to device, %d\n",
1069 rc = fw_read_errors(hdev, boot_err0_reg, cpu_security_boot_status_reg);
1073 /* Clear reset status since we need to read again from app */
1074 prop->hard_reset_done_by_fw = false;
1076 /* Read FW application security bits */
1077 if (prop->fw_security_status_valid) {
1078 prop->fw_app_security_map =
1079 RREG32(cpu_security_boot_status_reg);
1081 if (prop->fw_app_security_map &
1082 CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
1083 prop->hard_reset_done_by_fw = true;
1086 "Firmware application CPU security status %#x\n",
1087 prop->fw_app_security_map);
1090 dev_dbg(hdev->dev, "Firmware application CPU hard-reset is %s\n",
1091 prop->hard_reset_done_by_fw ? "enabled" : "disabled");
1093 dev_info(hdev->dev, "Successfully loaded firmware to device\n");
1098 fw_read_errors(hdev, boot_err0_reg, cpu_security_boot_status_reg);