#include <linux/scatterlist.h>
#include <linux/hashtable.h>
#include <linux/debugfs.h>
+#include <linux/rwsem.h>
#include <linux/bitfield.h>
#include <linux/genalloc.h>
#include <linux/sched/signal.h>
#define HL_PENDING_RESET_LONG_SEC 60
#define HL_HARD_RESET_MAX_TIMEOUT 120
+#define HL_PLDM_HARD_RESET_MAX_TIMEOUT (HL_HARD_RESET_MAX_TIMEOUT * 3)
#define HL_DEVICE_TIMEOUT_USEC 1000000 /* 1 s */
#define HL_COMMON_USER_INTERRUPT_ID 0xFFF
+#define HL_STATE_DUMP_HIST_LEN 5
+
+#define OBJ_NAMES_HASH_TABLE_BITS 7 /* 1 << 7 buckets */
+#define SYNC_TO_ENGINE_HASH_TABLE_BITS 7 /* 1 << 7 buckets */
+
/* Memory */
#define MEM_HASH_TABLE_BITS 7 /* 1 << 7 buckets */
*
* - HL_RESET_HEARTBEAT
* Set if reset is due to heartbeat
+ *
+ * - HL_RESET_TDR
+ * Set if reset is due to TDR
+ *
+ * - HL_RESET_DEVICE_RELEASE
+ * Set if reset is due to device release
+ *
+ * - HL_RESET_FW
+ * F/W will perform the reset. No need to ask it to reset the device. This is relevant
+ * only when running with secured f/w
*/
#define HL_RESET_HARD (1 << 0)
#define HL_RESET_FROM_RESET_THREAD (1 << 1)
#define HL_RESET_HEARTBEAT (1 << 2)
+#define HL_RESET_TDR (1 << 3)
+#define HL_RESET_DEVICE_RELEASE (1 << 4)
+#define HL_RESET_FW (1 << 5)
#define HL_MAX_SOBS_PER_MONITOR 8
/**
* enum hl_fw_component - F/W components to read version through registers.
- * @FW_COMP_UBOOT: u-boot.
+ * @FW_COMP_BOOT_FIT: boot fit.
* @FW_COMP_PREBOOT: preboot.
+ * @FW_COMP_LINUX: linux.
*/
enum hl_fw_component {
- FW_COMP_UBOOT,
- FW_COMP_PREBOOT
+ FW_COMP_BOOT_FIT,
+ FW_COMP_PREBOOT,
+ FW_COMP_LINUX,
};
/**
CS_TYPE_DEFAULT,
CS_TYPE_SIGNAL,
CS_TYPE_WAIT,
- CS_TYPE_COLLECTIVE_WAIT
+ CS_TYPE_COLLECTIVE_WAIT,
+ CS_RESERVE_SIGNALS,
+ CS_UNRESERVE_SIGNALS
};
/*
* @hdev: habanalabs device structure.
* @kref: refcount of this SOB. The SOB will reset once the refcount is zero.
* @sob_id: id of this SOB.
+ * @sob_addr: the sob offset from the base address.
* @q_idx: the H/W queue that uses this SOB.
+ * @need_reset: reset indication set when switching to the other sob.
*/
struct hl_hw_sob {
struct hl_device *hdev;
struct kref kref;
u32 sob_id;
+ u32 sob_addr;
u32 q_idx;
+ bool need_reset;
};
enum hl_collective_mode {
};
/**
- * enum vm_type_t - virtual memory mapping request information.
+ * enum vm_type - virtual memory mapping request information.
* @VM_TYPE_USERPTR: mapping of user memory to device virtual address.
* @VM_TYPE_PHYS_PACK: mapping of DRAM memory to device virtual address.
*/
-enum vm_type_t {
+enum vm_type {
VM_TYPE_USERPTR = 0x1,
VM_TYPE_PHYS_PACK = 0x2
};
u8 host_resident;
};
+/**
+ * struct hl_hints_range - hint addresses reserved va range.
+ * @start_addr: start address of the va range.
+ * @end_addr: end address of the va range.
+ */
+struct hl_hints_range {
+ u64 start_addr;
+ u64 end_addr;
+};
+
/**
* struct asic_fixed_properties - ASIC specific immutable properties.
* @hw_queues_props: H/W queues properties.
* @pmmu: PCI (host) MMU address translation properties.
* @pmmu_huge: PCI (host) MMU address translation properties for memory
* allocated with huge pages.
+ * @hints_dram_reserved_va_range: dram hint addresses reserved range.
+ * @hints_host_reserved_va_range: host hint addresses reserved range.
+ * @hints_host_hpage_reserved_va_range: host huge page hint addresses reserved
+ * range.
* @sram_base_address: SRAM physical start address.
* @sram_end_address: SRAM physical end address.
* @sram_user_base_address - SRAM physical start address for user access.
* to the device's MMU.
* @cb_va_end_addr: virtual end address of command buffers which are mapped to
* the device's MMU.
+ * @dram_hints_align_mask: dram va hint addresses alignment mask which is used
+ * for hints validity check.
+ * device_dma_offset_for_host_access: the offset to add to host DMA addresses
+ * to enable the device to access them.
* @mmu_pgt_size: MMU page tables total size.
* @mmu_pte_size: PTE size in MMU page tables.
* @mmu_hop_table_size: MMU hop table size.
* @cb_pool_cb_size: size of each CB in the CB pool.
* @max_pending_cs: maximum of concurrent pending command submissions
* @max_queues: maximum amount of queues in the system
- * @fw_boot_cpu_security_map: bitmap representation of boot cpu security status
- * reported by FW, bit description can be found in
- * CPU_BOOT_DEV_STS*
- * @fw_app_security_map: bitmap representation of application security status
- * reported by FW, bit description can be found in
- * CPU_BOOT_DEV_STS*
+ * @fw_preboot_cpu_boot_dev_sts0: bitmap representation of preboot cpu
+ * capabilities reported by FW, bit description
+ * can be found in CPU_BOOT_DEV_STS0
+ * @fw_preboot_cpu_boot_dev_sts1: bitmap representation of preboot cpu
+ * capabilities reported by FW, bit description
+ * can be found in CPU_BOOT_DEV_STS1
+ * @fw_bootfit_cpu_boot_dev_sts0: bitmap representation of boot cpu security
+ * status reported by FW, bit description can be
+ * found in CPU_BOOT_DEV_STS0
+ * @fw_bootfit_cpu_boot_dev_sts1: bitmap representation of boot cpu security
+ * status reported by FW, bit description can be
+ * found in CPU_BOOT_DEV_STS1
+ * @fw_app_cpu_boot_dev_sts0: bitmap representation of application security
+ * status reported by FW, bit description can be
+ * found in CPU_BOOT_DEV_STS0
+ * @fw_app_cpu_boot_dev_sts1: bitmap representation of application security
+ * status reported by FW, bit description can be
+ * found in CPU_BOOT_DEV_STS1
* @collective_first_sob: first sync object available for collective use
* @collective_first_mon: first monitor available for collective use
* @sync_stream_first_sob: first sync object available for sync stream use
* reserved for the user
* @first_available_cq: first available CQ for the user.
* @user_interrupt_count: number of user interrupts.
+ * @server_type: Server type that the ASIC is currently installed in.
+ * The value is according to enum hl_server_type in uapi file.
* @tpc_enabled_mask: which TPCs are enabled.
* @completion_queues_count: number of completion queues.
- * @fw_security_disabled: true if security measures are disabled in firmware,
- * false otherwise
- * @fw_security_status_valid: security status bits are valid and can be fetched
- * from BOOT_DEV_STS0
+ * @fw_security_enabled: true if security measures are enabled in firmware,
+ * false otherwise
+ * @fw_cpu_boot_dev_sts0_valid: status bits are valid and can be fetched from
+ * BOOT_DEV_STS0
+ * @fw_cpu_boot_dev_sts1_valid: status bits are valid and can be fetched from
+ * BOOT_DEV_STS1
* @dram_supports_virtual_memory: is there an MMU towards the DRAM
* @hard_reset_done_by_fw: true if firmware is handling hard reset flow
* @num_functional_hbms: number of functional HBMs in each DCORE.
+ * @hints_range_reservation: device support hint addresses range reservation.
* @iatu_done_by_fw: true if iATU configuration is being done by FW.
+ * @dynamic_fw_load: is dynamic FW load is supported.
+ * @gic_interrupts_enable: true if FW is not blocking GIC controller,
+ * false otherwise.
*/
struct asic_fixed_properties {
struct hw_queue_properties *hw_queues_props;
struct hl_mmu_properties dmmu;
struct hl_mmu_properties pmmu;
struct hl_mmu_properties pmmu_huge;
+ struct hl_hints_range hints_dram_reserved_va_range;
+ struct hl_hints_range hints_host_reserved_va_range;
+ struct hl_hints_range hints_host_hpage_reserved_va_range;
u64 sram_base_address;
u64 sram_end_address;
u64 sram_user_base_address;
u64 mmu_dram_default_page_addr;
u64 cb_va_start_addr;
u64 cb_va_end_addr;
+ u64 dram_hints_align_mask;
+ u64 device_dma_offset_for_host_access;
u32 mmu_pgt_size;
u32 mmu_pte_size;
u32 mmu_hop_table_size;
u32 cb_pool_cb_size;
u32 max_pending_cs;
u32 max_queues;
- u32 fw_boot_cpu_security_map;
- u32 fw_app_security_map;
+ u32 fw_preboot_cpu_boot_dev_sts0;
+ u32 fw_preboot_cpu_boot_dev_sts1;
+ u32 fw_bootfit_cpu_boot_dev_sts0;
+ u32 fw_bootfit_cpu_boot_dev_sts1;
+ u32 fw_app_cpu_boot_dev_sts0;
+ u32 fw_app_cpu_boot_dev_sts1;
u16 collective_first_sob;
u16 collective_first_mon;
u16 sync_stream_first_sob;
u16 first_available_user_msix_interrupt;
u16 first_available_cq[HL_MAX_DCORES];
u16 user_interrupt_count;
+ u16 server_type;
u8 tpc_enabled_mask;
u8 completion_queues_count;
- u8 fw_security_disabled;
- u8 fw_security_status_valid;
+ u8 fw_security_enabled;
+ u8 fw_cpu_boot_dev_sts0_valid;
+ u8 fw_cpu_boot_dev_sts1_valid;
u8 dram_supports_virtual_memory;
u8 hard_reset_done_by_fw;
u8 num_functional_hbms;
+ u8 hints_range_reservation;
u8 iatu_done_by_fw;
+ u8 dynamic_fw_load;
+ u8 gic_interrupts_enable;
};
/**
* @completion: fence is implemented using completion
* @refcount: refcount for this fence
* @cs_sequence: sequence of the corresponding command submission
+ * @stream_master_qid_map: streams masters QID bitmap to represent all streams
+ * masters QIDs that multi cs is waiting on
* @error: mark this fence with error
* @timestamp: timestamp upon completion
- *
*/
struct hl_fence {
struct completion completion;
struct kref refcount;
u64 cs_sequence;
+ u32 stream_master_qid_map;
int error;
ktime_t timestamp;
};
/**
* struct hl_cs_compl - command submission completion object.
- * @sob_reset_work: workqueue object to run SOB reset flow.
* @base_fence: hl fence object.
* @lock: spinlock to protect fence.
* @hdev: habanalabs device structure.
* @hw_sob: the H/W SOB used in this signal/wait CS.
+ * @encaps_sig_hdl: encaps signals hanlder.
* @cs_seq: command submission sequence number.
* @type: type of the CS - signal/wait.
* @sob_val: the SOB value that is used in this signal/wait CS.
* @sob_group: the SOB group that is used in this collective wait CS.
+ * @encaps_signals: indication whether it's a completion object of cs with
+ * encaps signals or not.
*/
struct hl_cs_compl {
- struct work_struct sob_reset_work;
struct hl_fence base_fence;
spinlock_t lock;
struct hl_device *hdev;
struct hl_hw_sob *hw_sob;
+ struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
u64 cs_seq;
enum hl_cs_type type;
u16 sob_val;
u16 sob_group;
+ bool encaps_signals;
};
/*
u8 curr_sob_offset;
};
+/**
+ * struct hl_encaps_signals_mgr - describes sync stream encapsulated signals
+ * handlers manager
+ * @lock: protects handles.
+ * @handles: an idr to hold all encapsulated signals handles.
+ */
+struct hl_encaps_signals_mgr {
+ spinlock_t lock;
+ struct idr handles;
+};
+
/**
* struct hl_hw_queue - describes a H/W transport queue.
* @shadow_queue: pointer to a shadow queue that holds pointers to jobs.
* @kernel_address: holds the queue's kernel virtual address
* @bus_address: holds the queue's DMA address
* @ci: ci inside the queue
+ * @prev_eqe_index: the index of the previous event queue entry. The index of
+ * the current entry's index must be +1 of the previous one.
+ * @check_eqe_index: do we need to check the index of the current entry vs. the
+ * previous one. This is for backward compatibility with older
+ * firmwares
*/
struct hl_eq {
struct hl_device *hdev;
void *kernel_address;
dma_addr_t bus_address;
u32 ci;
+ u32 prev_eqe_index;
+ bool check_eqe_index;
};
DIV_SEL_DIVIDED_PLL = 3,
};
+enum pci_region {
+ PCI_REGION_CFG,
+ PCI_REGION_SRAM,
+ PCI_REGION_DRAM,
+ PCI_REGION_SP_SRAM,
+ PCI_REGION_NUMBER,
+};
+
+/**
+ * struct pci_mem_region - describe memory region in a PCI bar
+ * @region_base: region base address
+ * @region_size: region size
+ * @bar_size: size of the BAR
+ * @offset_in_bar: region offset into the bar
+ * @bar_id: bar ID of the region
+ * @used: if used 1, otherwise 0
+ */
+struct pci_mem_region {
+ u64 region_base;
+ u64 region_size;
+ u64 bar_size;
+ u64 offset_in_bar;
+ u8 bar_id;
+ u8 used;
+};
+
+/**
+ * struct static_fw_load_mgr - static FW load manager
+ * @preboot_version_max_off: max offset to preboot version
+ * @boot_fit_version_max_off: max offset to boot fit version
+ * @kmd_msg_to_cpu_reg: register address for KDM->CPU messages
+ * @cpu_cmd_status_to_host_reg: register address for CPU command status response
+ * @cpu_boot_status_reg: boot status register
+ * @cpu_boot_dev_status0_reg: boot device status register 0
+ * @cpu_boot_dev_status1_reg: boot device status register 1
+ * @boot_err0_reg: boot error register 0
+ * @boot_err1_reg: boot error register 1
+ * @preboot_version_offset_reg: SRAM offset to preboot version register
+ * @boot_fit_version_offset_reg: SRAM offset to boot fit version register
+ * @sram_offset_mask: mask for getting offset into the SRAM
+ * @cpu_reset_wait_msec: used when setting WFE via kmd_msg_to_cpu_reg
+ */
+struct static_fw_load_mgr {
+ u64 preboot_version_max_off;
+ u64 boot_fit_version_max_off;
+ u32 kmd_msg_to_cpu_reg;
+ u32 cpu_cmd_status_to_host_reg;
+ u32 cpu_boot_status_reg;
+ u32 cpu_boot_dev_status0_reg;
+ u32 cpu_boot_dev_status1_reg;
+ u32 boot_err0_reg;
+ u32 boot_err1_reg;
+ u32 preboot_version_offset_reg;
+ u32 boot_fit_version_offset_reg;
+ u32 sram_offset_mask;
+ u32 cpu_reset_wait_msec;
+};
+
+/**
+ * struct fw_response - FW response to LKD command
+ * @ram_offset: descriptor offset into the RAM
+ * @ram_type: RAM type containing the descriptor (SRAM/DRAM)
+ * @status: command status
+ */
+struct fw_response {
+ u32 ram_offset;
+ u8 ram_type;
+ u8 status;
+};
+
+/**
+ * struct dynamic_fw_load_mgr - dynamic FW load manager
+ * @response: FW to LKD response
+ * @comm_desc: the communication descriptor with FW
+ * @image_region: region to copy the FW image to
+ * @fw_image_size: size of FW image to load
+ * @wait_for_bl_timeout: timeout for waiting for boot loader to respond
+ */
+struct dynamic_fw_load_mgr {
+ struct fw_response response;
+ struct lkd_fw_comms_desc comm_desc;
+ struct pci_mem_region *image_region;
+ size_t fw_image_size;
+ u32 wait_for_bl_timeout;
+};
+
+/**
+ * struct fw_image_props - properties of FW image
+ * @image_name: name of the image
+ * @src_off: offset in src FW to copy from
+ * @copy_size: amount of bytes to copy (0 to copy the whole binary)
+ */
+struct fw_image_props {
+ char *image_name;
+ u32 src_off;
+ u32 copy_size;
+};
+
+/**
+ * struct fw_load_mgr - manager FW loading process
+ * @dynamic_loader: specific structure for dynamic load
+ * @static_loader: specific structure for static load
+ * @boot_fit_img: boot fit image properties
+ * @linux_img: linux image properties
+ * @cpu_timeout: CPU response timeout in usec
+ * @boot_fit_timeout: Boot fit load timeout in usec
+ * @skip_bmc: should BMC be skipped
+ * @sram_bar_id: SRAM bar ID
+ * @dram_bar_id: DRAM bar ID
+ * @linux_loaded: true if linux was loaded so far
+ */
+struct fw_load_mgr {
+ union {
+ struct dynamic_fw_load_mgr dynamic_loader;
+ struct static_fw_load_mgr static_loader;
+ };
+ struct fw_image_props boot_fit_img;
+ struct fw_image_props linux_img;
+ u32 cpu_timeout;
+ u32 boot_fit_timeout;
+ u8 skip_bmc;
+ u8 sram_bar_id;
+ u8 dram_bar_id;
+ u8 linux_loaded;
+};
+
/**
* struct hl_asic_funcs - ASIC specific functions that are can be called from
* common code.
* hw_fini and before CS rollback.
* @suspend: handles IP specific H/W or SW changes for suspend.
* @resume: handles IP specific H/W or SW changes for resume.
- * @cb_mmap: maps a CB.
+ * @mmap: maps a memory.
* @ring_doorbell: increment PI on a given QMAN.
* @pqe_write: Write the PQ entry to the PQ. This is ASIC-specific
* function because the PQs are located in different memory areas
* @ctx_fini: context dependent cleanup.
* @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz
* @get_queue_id_for_cq: Get the H/W queue id related to the given CQ index.
- * @read_device_fw_version: read the device's firmware versions that are
- * contained in registers
* @load_firmware_to_device: load the firmware to the device's memory
* @load_boot_fit_to_device: load boot fit to device's memory
* @get_signal_cb_size: Get signal CB size.
* @get_msi_info: Retrieve asic-specific MSI ID of the f/w async event
* @map_pll_idx_to_fw_idx: convert driver specific per asic PLL index to
* generic f/w compatible PLL Indexes
+ * @init_firmware_loader: initialize data for FW loader.
+ * @init_cpu_scrambler_dram: Enable CPU specific DRAM scrambling
+ * @state_dump_init: initialize constants required for state dump
+ * @get_sob_addr: get SOB base address offset.
+ * @set_pci_memory_regions: setting properties of PCI memory regions
+ * @get_stream_master_qid_arr: get pointer to stream masters QID array
*/
struct hl_asic_funcs {
int (*early_init)(struct hl_device *hdev);
int (*sw_init)(struct hl_device *hdev);
int (*sw_fini)(struct hl_device *hdev);
int (*hw_init)(struct hl_device *hdev);
- void (*hw_fini)(struct hl_device *hdev, bool hard_reset);
- void (*halt_engines)(struct hl_device *hdev, bool hard_reset);
+ void (*hw_fini)(struct hl_device *hdev, bool hard_reset, bool fw_reset);
+ void (*halt_engines)(struct hl_device *hdev, bool hard_reset, bool fw_reset);
int (*suspend)(struct hl_device *hdev);
int (*resume)(struct hl_device *hdev);
- int (*cb_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
+ int (*mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
void *cpu_addr, dma_addr_t dma_addr, size_t size);
void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi);
void (*pqe_write)(struct hl_device *hdev, __le64 *pqe,
int (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard,
u32 flags);
int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard,
- u32 asid, u64 va, u64 size);
+ u32 flags, u32 asid, u64 va, u64 size);
int (*send_heartbeat)(struct hl_device *hdev);
void (*set_clock_gating)(struct hl_device *hdev);
void (*disable_clock_gating)(struct hl_device *hdev);
void (*ctx_fini)(struct hl_ctx *ctx);
int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx);
- int (*read_device_fw_version)(struct hl_device *hdev,
- enum hl_fw_component fwc);
int (*load_firmware_to_device)(struct hl_device *hdev);
int (*load_boot_fit_to_device)(struct hl_device *hdev);
u32 (*get_signal_cb_size)(struct hl_device *hdev);
void (*reset_sob_group)(struct hl_device *hdev, u16 sob_group);
void (*set_dma_mask_from_fw)(struct hl_device *hdev);
u64 (*get_device_time)(struct hl_device *hdev);
- void (*collective_wait_init_cs)(struct hl_cs *cs);
+ int (*collective_wait_init_cs)(struct hl_cs *cs);
int (*collective_wait_create_jobs)(struct hl_device *hdev,
- struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
- u32 collective_engine_id);
+ struct hl_ctx *ctx, struct hl_cs *cs,
+ u32 wait_queue_id, u32 collective_engine_id,
+ u32 encaps_signal_offset);
u64 (*scramble_addr)(struct hl_device *hdev, u64 addr);
u64 (*descramble_addr)(struct hl_device *hdev, u64 addr);
void (*ack_protection_bits_errors)(struct hl_device *hdev);
int (*hw_block_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
u32 block_id, u32 block_size);
void (*enable_events_from_fw)(struct hl_device *hdev);
- void (*get_msi_info)(u32 *table);
+ void (*get_msi_info)(__le32 *table);
int (*map_pll_idx_to_fw_idx)(u32 pll_idx);
+ void (*init_firmware_loader)(struct hl_device *hdev);
+ void (*init_cpu_scrambler_dram)(struct hl_device *hdev);
+ void (*state_dump_init)(struct hl_device *hdev);
+ u32 (*get_sob_addr)(struct hl_device *hdev, u32 sob_id);
+ void (*set_pci_memory_regions)(struct hl_device *hdev);
+ u32* (*get_stream_master_qid_arr)(void);
};
atomic64_t validation_drop_cnt;
};
-/**
- * struct hl_pending_cb - pending command buffer structure
- * @cb_node: cb node in pending cb list
- * @cb: command buffer to send in next submission
- * @cb_size: command buffer size
- * @hw_queue_id: destination queue id
- */
-struct hl_pending_cb {
- struct list_head cb_node;
- struct hl_cb *cb;
- u32 cb_size;
- u32 hw_queue_id;
-};
-
/**
* struct hl_ctx - user/kernel context.
* @mem_hash: holds mapping from virtual address to virtual memory area
* MMU hash or walking the PGT requires talking this lock.
* @hw_block_list_lock: protects the HW block memory list.
* @debugfs_list: node in debugfs list of contexts.
- * pending_cb_list: list of pending command buffers waiting to be sent upon
- * next user command submission context.
* @hw_block_mem_list: list of HW block virtual mapped addresses.
* @cs_counters: context command submission counters.
* @cb_va_pool: device VA pool for command buffers which are mapped to the
* device's MMU.
+ * @sig_mgr: encaps signals handle manager.
* @cs_sequence: sequence number for CS. Value is assigned to a CS and passed
* to user so user could inquire about CS. It is used as
* index to cs_pending array.
* @dram_default_hops: array that holds all hops addresses needed for default
* DRAM mapping.
- * @pending_cb_lock: spinlock to protect pending cb list
* @cs_lock: spinlock to protect cs_sequence.
* @dram_phys_mem: amount of used physical DRAM memory by this context.
* @thread_ctx_switch_token: token to prevent multiple threads of the same
* context from running the context switch phase.
* Only a single thread should run it.
- * @thread_pending_cb_token: token to prevent multiple threads from processing
- * the pending CB list. Only a single thread should
- * process the list since it is protected by a
- * spinlock and we don't want to halt the entire
- * command submission sequence.
* @thread_ctx_switch_wait_token: token to prevent the threads that didn't run
* the context switch phase from moving to their
* execution phase before the context switch phase
struct mutex mmu_lock;
struct mutex hw_block_list_lock;
struct list_head debugfs_list;
- struct list_head pending_cb_list;
struct list_head hw_block_mem_list;
struct hl_cs_counters_atomic cs_counters;
struct gen_pool *cb_va_pool;
+ struct hl_encaps_signals_mgr sig_mgr;
u64 cs_sequence;
u64 *dram_default_hops;
- spinlock_t pending_cb_lock;
spinlock_t cs_lock;
atomic64_t dram_phys_mem;
atomic_t thread_ctx_switch_token;
- atomic_t thread_pending_cb_token;
u32 thread_ctx_switch_wait_token;
u32 asid;
u32 handle;
* @sgt: pointer to the scatter-gather table that holds the pages.
* @dir: for DMA unmapping, the direction must be supplied, so save it.
* @debugfs_list: node in debugfs list of command submissions.
+ * @pid: the pid of the user process owning the memory
* @addr: user-space virtual address of the start of the memory area.
* @size: size of the memory area to pin & map.
* @dma_mapped: true if the SG was mapped to DMA addresses, false otherwise.
*/
struct hl_userptr {
- enum vm_type_t vm_type; /* must be first */
+ enum vm_type vm_type; /* must be first */
struct list_head job_node;
struct page **pages;
unsigned int npages;
struct sg_table *sgt;
enum dma_data_direction dir;
struct list_head debugfs_list;
+ pid_t pid;
u64 addr;
- u32 size;
+ u64 size;
u8 dma_mapped;
};
* @mirror_node : node in device mirror list of command submissions.
* @staged_cs_node: node in the staged cs list.
* @debugfs_list: node in debugfs list of command submissions.
+ * @encaps_sig_hdl: holds the encaps signals handle.
* @sequence: the sequence number of this CS.
* @staged_sequence: the sequence of the staged submission this CS is part of,
* relevant only if staged_cs is set.
* @timeout_jiffies: cs timeout in jiffies.
+ * @submission_time_jiffies: submission time of the cs
* @type: CS_TYPE_*.
+ * @encaps_sig_hdl_id: encaps signals handle id, set for the first staged cs.
* @submitted: true if CS was submitted to H/W.
* @completed: true if CS was completed by device.
* @timedout : true if CS was timedout.
* @staged_first: true if this is the first staged CS and we need to receive
* timeout for this CS.
* @staged_cs: true if this CS is part of a staged submission.
+ * @skip_reset_on_timeout: true if we shall not reset the device in case
+ * timeout occurs (debug scenario).
+ * @encaps_signals: true if this CS has encaps reserved signals.
*/
struct hl_cs {
u16 *jobs_in_queue_cnt;
struct list_head mirror_node;
struct list_head staged_cs_node;
struct list_head debugfs_list;
+ struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
u64 sequence;
u64 staged_sequence;
u64 timeout_jiffies;
+ u64 submission_time_jiffies;
enum hl_cs_type type;
+ u32 encaps_sig_hdl_id;
u8 submitted;
u8 completed;
u8 timedout;
u8 staged_last;
u8 staged_first;
u8 staged_cs;
+ u8 skip_reset_on_timeout;
+ u8 encaps_signals;
};
/**
* @hw_queue_id: the id of the H/W queue this job is submitted to.
* @user_cb_size: the actual size of the CB we got from the user.
* @job_cb_size: the actual size of the CB that we put on the queue.
+ * @encaps_sig_wait_offset: encapsulated signals offset, which allow user
+ * to wait on part of the reserved signals.
* @is_kernel_allocated_cb: true if the CB handle we got from the user holds a
* handle to a kernel-allocated CB object, false
* otherwise (SRAM/DRAM/host address).
u32 hw_queue_id;
u32 user_cb_size;
u32 job_cb_size;
+ u32 encaps_sig_wait_offset;
u8 is_kernel_allocated_cb;
u8 contains_dma_pkt;
};
* @created_from_userptr: is product of host virtual address.
*/
struct hl_vm_phys_pg_pack {
- enum vm_type_t vm_type; /* must be first */
+ enum vm_type vm_type; /* must be first */
u64 *pages;
u64 npages;
u64 total_size;
* @ctx_mem_hash_list: list of available contexts with MMU mappings.
* @ctx_mem_hash_spinlock: protects cb_list.
* @blob_desc: descriptor of blob
+ * @state_dump: data of the system states in case of a bad cs.
+ * @state_dump_sem: protects state_dump.
* @addr: next address to read/write from/to in read/write32.
* @mmu_addr: next virtual address to translate to physical address in mmu_show.
+ * @userptr_lookup: the target user ptr to look up for on demand.
* @mmu_asid: ASID to use while translating in mmu_show.
+ * @state_dump_head: index of the latest state dump
* @i2c_bus: generic u8 debugfs file for bus value to use in i2c_data_read.
* @i2c_addr: generic u8 debugfs file for address value to use in i2c_data_read.
* @i2c_reg: generic u8 debugfs file for register value to use in i2c_data_read.
struct list_head ctx_mem_hash_list;
spinlock_t ctx_mem_hash_spinlock;
struct debugfs_blob_wrapper blob_desc;
+ char *state_dump[HL_STATE_DUMP_HIST_LEN];
+ struct rw_semaphore state_dump_sem;
u64 addr;
u64 mmu_addr;
+ u64 userptr_lookup;
u32 mmu_asid;
+ u32 state_dump_head;
u8 i2c_bus;
u8 i2c_addr;
u8 i2c_reg;
};
+/**
+ * struct hl_hw_obj_name_entry - single hw object name, member of
+ * hl_state_dump_specs
+ * @node: link to the containing hash table
+ * @name: hw object name
+ * @id: object identifier
+ */
+struct hl_hw_obj_name_entry {
+ struct hlist_node node;
+ const char *name;
+ u32 id;
+};
+
+enum hl_state_dump_specs_props {
+ SP_SYNC_OBJ_BASE_ADDR,
+ SP_NEXT_SYNC_OBJ_ADDR,
+ SP_SYNC_OBJ_AMOUNT,
+ SP_MON_OBJ_WR_ADDR_LOW,
+ SP_MON_OBJ_WR_ADDR_HIGH,
+ SP_MON_OBJ_WR_DATA,
+ SP_MON_OBJ_ARM_DATA,
+ SP_MON_OBJ_STATUS,
+ SP_MONITORS_AMOUNT,
+ SP_TPC0_CMDQ,
+ SP_TPC0_CFG_SO,
+ SP_NEXT_TPC,
+ SP_MME_CMDQ,
+ SP_MME_CFG_SO,
+ SP_NEXT_MME,
+ SP_DMA_CMDQ,
+ SP_DMA_CFG_SO,
+ SP_DMA_QUEUES_OFFSET,
+ SP_NUM_OF_MME_ENGINES,
+ SP_SUB_MME_ENG_NUM,
+ SP_NUM_OF_DMA_ENGINES,
+ SP_NUM_OF_TPC_ENGINES,
+ SP_ENGINE_NUM_OF_QUEUES,
+ SP_ENGINE_NUM_OF_STREAMS,
+ SP_ENGINE_NUM_OF_FENCES,
+ SP_FENCE0_CNT_OFFSET,
+ SP_FENCE0_RDATA_OFFSET,
+ SP_CP_STS_OFFSET,
+ SP_NUM_CORES,
+
+ SP_MAX
+};
+
+enum hl_sync_engine_type {
+ ENGINE_TPC,
+ ENGINE_DMA,
+ ENGINE_MME,
+};
+
+/**
+ * struct hl_mon_state_dump - represents a state dump of a single monitor
+ * @id: monitor id
+ * @wr_addr_low: address monitor will write to, low bits
+ * @wr_addr_high: address monitor will write to, high bits
+ * @wr_data: data monitor will write
+ * @arm_data: register value containing monitor configuration
+ * @status: monitor status
+ */
+struct hl_mon_state_dump {
+ u32 id;
+ u32 wr_addr_low;
+ u32 wr_addr_high;
+ u32 wr_data;
+ u32 arm_data;
+ u32 status;
+};
+
+/**
+ * struct hl_sync_to_engine_map_entry - sync object id to engine mapping entry
+ * @engine_type: type of the engine
+ * @engine_id: id of the engine
+ * @sync_id: id of the sync object
+ */
+struct hl_sync_to_engine_map_entry {
+ struct hlist_node node;
+ enum hl_sync_engine_type engine_type;
+ u32 engine_id;
+ u32 sync_id;
+};
+
+/**
+ * struct hl_sync_to_engine_map - maps sync object id to associated engine id
+ * @tb: hash table containing the mapping, each element is of type
+ * struct hl_sync_to_engine_map_entry
+ */
+struct hl_sync_to_engine_map {
+ DECLARE_HASHTABLE(tb, SYNC_TO_ENGINE_HASH_TABLE_BITS);
+};
+
+/**
+ * struct hl_state_dump_specs_funcs - virtual functions used by the state dump
+ * @gen_sync_to_engine_map: generate a hash map from sync obj id to its engine
+ * @print_single_monitor: format monitor data as string
+ * @monitor_valid: return true if given monitor dump is valid
+ * @print_fences_single_engine: format fences data as string
+ */
+struct hl_state_dump_specs_funcs {
+ int (*gen_sync_to_engine_map)(struct hl_device *hdev,
+ struct hl_sync_to_engine_map *map);
+ int (*print_single_monitor)(char **buf, size_t *size, size_t *offset,
+ struct hl_device *hdev,
+ struct hl_mon_state_dump *mon);
+ int (*monitor_valid)(struct hl_mon_state_dump *mon);
+ int (*print_fences_single_engine)(struct hl_device *hdev,
+ u64 base_offset,
+ u64 status_base_offset,
+ enum hl_sync_engine_type engine_type,
+ u32 engine_id, char **buf,
+ size_t *size, size_t *offset);
+};
+
+/**
+ * struct hl_state_dump_specs - defines ASIC known hw objects names
+ * @so_id_to_str_tb: sync objects names index table
+ * @monitor_id_to_str_tb: monitors names index table
+ * @funcs: virtual functions used for state dump
+ * @sync_namager_names: readable names for sync manager if available (ex: N_E)
+ * @props: pointer to a per asic const props array required for state dump
+ */
+struct hl_state_dump_specs {
+ DECLARE_HASHTABLE(so_id_to_str_tb, OBJ_NAMES_HASH_TABLE_BITS);
+ DECLARE_HASHTABLE(monitor_id_to_str_tb, OBJ_NAMES_HASH_TABLE_BITS);
+ struct hl_state_dump_specs_funcs funcs;
+ const char * const *sync_namager_names;
+ s64 *props;
+};
+
/*
* DEVICES
#define HL_STR_MAX 32
-#define HL_DEV_STS_MAX (HL_DEVICE_STATUS_NEEDS_RESET + 1)
+#define HL_DEV_STS_MAX (HL_DEVICE_STATUS_LAST + 1)
/* Theoretical limit only. A single host can only contain up to 4 or 8 PCIe
* x16 cards. In extreme cases, there are hosts that can accommodate 16 cards.
* @wq: work queue for device reset procedure.
* @reset_work: reset work to be done.
* @hdev: habanalabs device structure.
+ * @fw_reset: whether f/w will do the reset without us sending them a message to do it.
*/
struct hl_device_reset_work {
struct workqueue_struct *wq;
struct delayed_work reset_work;
struct hl_device *hdev;
+ bool fw_reset;
};
/**
u64 virt_addr, struct hl_mmu_hop_info *hops);
};
+/**
+ * number of user contexts allowed to call wait_for_multi_cs ioctl in
+ * parallel
+ */
+#define MULTI_CS_MAX_USER_CTX 2
+
+/**
+ * struct multi_cs_completion - multi CS wait completion.
+ * @completion: completion of any of the CS in the list
+ * @lock: spinlock for the completion structure
+ * @timestamp: timestamp for the multi-CS completion
+ * @stream_master_qid_map: bitmap of all stream masters on which the multi-CS
+ * is waiting
+ * @used: 1 if in use, otherwise 0
+ */
+struct multi_cs_completion {
+ struct completion completion;
+ spinlock_t lock;
+ s64 timestamp;
+ u32 stream_master_qid_map;
+ u8 used;
+};
+
+/**
+ * struct multi_cs_data - internal data for multi CS call
+ * @ctx: pointer to the context structure
+ * @fence_arr: array of fences of all CSs
+ * @seq_arr: array of CS sequence numbers
+ * @timeout_us: timeout in usec for waiting for CS to complete
+ * @timestamp: timestamp of first completed CS
+ * @wait_status: wait for CS status
+ * @completion_bitmap: bitmap of completed CSs (1- completed, otherwise 0)
+ * @stream_master_qid_map: bitmap of all stream master QIDs on which the
+ * multi-CS is waiting
+ * @arr_len: fence_arr and seq_arr array length
+ * @gone_cs: indication of gone CS (1- there was gone CS, otherwise 0)
+ * @update_ts: update timestamp. 1- update the timestamp, otherwise 0.
+ */
+struct multi_cs_data {
+ struct hl_ctx *ctx;
+ struct hl_fence **fence_arr;
+ u64 *seq_arr;
+ s64 timeout_us;
+ s64 timestamp;
+ long wait_status;
+ u32 completion_bitmap;
+ u32 stream_master_qid_map;
+ u8 arr_len;
+ u8 gone_cs;
+ u8 update_ts;
+};
+
/**
* struct hl_device - habanalabs device structure.
* @pdev: pointer to PCI device, can be NULL in case of simulator device.
* @kernel_queues: array of hl_hw_queue.
* @cs_mirror_list: CS mirror list for TDR.
* @cs_mirror_lock: protects cs_mirror_list.
- * @kernel_cb_mgr: command buffer manager for creating/destroying/handling CGs.
+ * @kernel_cb_mgr: command buffer manager for creating/destroying/handling CBs.
* @event_queue: event queue for IRQ from CPU-CP.
* @dma_pool: DMA pool for small allocations.
* @cpu_accessible_dma_mem: Host <-> CPU-CP shared memory CPU address.
* @aggregated_cs_counters: aggregated cs counters among all contexts
* @mmu_priv: device-specific MMU data.
* @mmu_func: device-related MMU functions.
+ * @fw_loader: FW loader manager.
+ * @pci_mem_region: array of memory regions in the PCI
+ * @state_dump_specs: constants and dictionaries needed to dump system state.
+ * @multi_cs_completion: array of multi-CS completion.
* @dram_used_mem: current DRAM memory consumption.
* @timeout_jiffies: device CS timeout value.
* @max_power: the max power of the device, as configured by the sysadmin. This
* the error will be ignored by the driver during
* device initialization. Mainly used to debug and
* workaround firmware bugs
+ * @last_successful_open_jif: timestamp (jiffies) of the last successful
+ * device open.
+ * @last_open_session_duration_jif: duration (jiffies) of the last device open
+ * session.
+ * @open_counter: number of successful device open operations.
* @in_reset: is device in reset flow.
* @curr_pll_profile: current PLL profile.
* @card_type: Various ASICs have several card types. This indicates the card
* @collective_mon_idx: helper index for collective initialization
* @supports_coresight: is CoreSight supported.
* @supports_soft_reset: is soft reset supported.
+ * @allow_external_soft_reset: true if soft reset initiated by user or TDR is
+ * allowed.
* @supports_cb_mapping: is mapping a CB to the device's MMU supported.
* @needs_reset: true if reset_on_lockup is false and device should be reset
* due to lockup.
* @device_fini_pending: true if device_fini was called and might be
* waiting for the reset thread to finish
* @supports_staged_submission: true if staged submissions are supported
+ * @curr_reset_cause: saves an enumerated reset cause when a hard reset is
+ * triggered, and cleared after it is shared with preboot.
+ * @skip_reset_on_timeout: Skip device reset if CS has timed out, wait for it to
+ * complete instead.
+ * @device_cpu_is_halted: Flag to indicate whether the device CPU was already
+ * halted. We can't halt it again because the COMMS
+ * protocol will throw an error. Relevant only for
+ * cases where Linux was not loaded to device CPU
+ * @supports_wait_for_multi_cs: true if wait for multi CS is supported
*/
struct hl_device {
struct pci_dev *pdev;
struct hl_mmu_priv mmu_priv;
struct hl_mmu_funcs mmu_func[MMU_NUM_PGT_LOCATIONS];
+ struct fw_load_mgr fw_loader;
+
+ struct pci_mem_region pci_mem_region[PCI_REGION_NUMBER];
+
+ struct hl_state_dump_specs state_dump_specs;
+
+ struct multi_cs_completion multi_cs_completion[
+ MULTI_CS_MAX_USER_CTX];
+ u32 *stream_master_qid_arr;
atomic64_t dram_used_mem;
u64 timeout_jiffies;
u64 max_power;
u64 clock_gating_mask;
u64 boot_error_status_mask;
+ u64 last_successful_open_jif;
+ u64 last_open_session_duration_jif;
+ u64 open_counter;
atomic_t in_reset;
enum hl_pll_frequency curr_pll_profile;
enum cpucp_card_types card_type;
u8 collective_mon_idx;
u8 supports_coresight;
u8 supports_soft_reset;
+ u8 allow_external_soft_reset;
u8 supports_cb_mapping;
u8 needs_reset;
u8 process_kill_trial_cnt;
u8 device_fini_pending;
u8 supports_staged_submission;
+ u8 curr_reset_cause;
+ u8 skip_reset_on_timeout;
+ u8 device_cpu_is_halted;
+ u8 supports_wait_for_multi_cs;
+ u8 stream_master_qid_arr_size;
/* Parameters for bring-up */
u64 nic_ports_mask;
u8 rl_enable;
u8 reset_on_preboot_fail;
u8 reset_upon_device_release;
+ u8 reset_if_device_not_idle;
};
+/**
+ * struct hl_cs_encaps_sig_handle - encapsulated signals handle structure
+ * @refcount: refcount used to protect removing this id when several
+ * wait cs are used to wait of the reserved encaps signals.
+ * @hdev: pointer to habanalabs device structure.
+ * @hw_sob: pointer to H/W SOB used in the reservation.
+ * @cs_seq: staged cs sequence which contains encapsulated signals
+ * @id: idr handler id to be used to fetch the handler info
+ * @q_idx: stream queue index
+ * @pre_sob_val: current SOB value before reservation
+ * @count: signals number
+ */
+struct hl_cs_encaps_sig_handle {
+ struct kref refcount;
+ struct hl_device *hdev;
+ struct hl_hw_sob *hw_sob;
+ u64 cs_seq;
+ u32 id;
+ u32 q_idx;
+ u32 pre_sob_val;
+ u32 count;
+};
+
/*
* IOCTLs
*/
* Kernel module functions that can be accessed by entire module
*/
+/**
+ * hl_get_sg_info() - get number of pages and the DMA address from SG list.
+ * @sg: the SG list.
+ * @dma_addr: pointer to DMA address to return.
+ *
+ * Calculate the number of consecutive pages described by the SG list. Take the
+ * offset of the address in the first page, add to it the length and round it up
+ * to the number of needed pages.
+ */
+static inline u32 hl_get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
+{
+ *dma_addr = sg_dma_address(sg);
+
+ return ((((*dma_addr) & (PAGE_SIZE - 1)) + sg_dma_len(sg)) +
+ (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+}
+
/**
* hl_mem_area_inside_range() - Checks whether address+size are inside a range.
* @address: The start address of the area we want to validate.
int hl_hw_queues_create(struct hl_device *hdev);
void hl_hw_queues_destroy(struct hl_device *hdev);
int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
- u32 cb_size, u64 cb_ptr);
+ u32 cb_size, u64 cb_ptr);
+void hl_hw_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q,
+ u32 ctl, u32 len, u64 ptr);
int hl_hw_queue_schedule_cs(struct hl_cs *cs);
u32 hl_hw_queue_add_ptr(u32 ptr, u16 val);
void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id);
void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx);
int hl_ctx_put(struct hl_ctx *ctx);
struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq);
+int hl_ctx_get_fences(struct hl_ctx *ctx, u64 *seq_arr,
+ struct hl_fence **fence, u32 arr_len);
void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr);
void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr);
void hl_cb_va_pool_fini(struct hl_ctx *ctx);
void hl_cs_rollback_all(struct hl_device *hdev);
-void hl_pending_cb_list_flush(struct hl_ctx *ctx);
struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
enum hl_queue_type queue_type, bool is_kernel_allocated_cb);
void hl_sob_reset_error(struct kref *ref);
int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask);
void hl_fence_put(struct hl_fence *fence);
+void hl_fences_put(struct hl_fence **fence, int len);
void hl_fence_get(struct hl_fence *fence);
void cs_get(struct hl_cs *cs);
bool cs_needs_completion(struct hl_cs *cs);
bool cs_needs_timeout(struct hl_cs *cs);
bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs);
struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq);
+void hl_multi_cs_completion_init(struct hl_device *hdev);
void goya_set_asic_funcs(struct hl_device *hdev);
void gaudi_set_asic_funcs(struct hl_device *hdev);
void *vaddr);
int hl_fw_send_heartbeat(struct hl_device *hdev);
int hl_fw_cpucp_info_get(struct hl_device *hdev,
- u32 cpu_security_boot_status_reg,
- u32 boot_err0_reg);
+ u32 sts_boot_dev_sts0_reg,
+ u32 sts_boot_dev_sts1_reg, u32 boot_err0_reg,
+ u32 boot_err1_reg);
int hl_fw_cpucp_handshake(struct hl_device *hdev,
- u32 cpu_security_boot_status_reg,
- u32 boot_err0_reg);
+ u32 sts_boot_dev_sts0_reg,
+ u32 sts_boot_dev_sts1_reg, u32 boot_err0_reg,
+ u32 boot_err1_reg);
int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size);
int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
struct hl_info_pci_counters *counters);
int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u32 pll_index,
u16 *pll_freq_arr);
int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power);
-int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
- u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
- u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
- bool skip_bmc, u32 cpu_timeout, u32 boot_fit_timeout);
+void hl_fw_ask_hard_reset_without_linux(struct hl_device *hdev);
+void hl_fw_ask_halt_machine_without_linux(struct hl_device *hdev);
+int hl_fw_init_cpu(struct hl_device *hdev);
int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
- u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
- u32 timeout);
-
+ u32 sts_boot_dev_sts0_reg,
+ u32 sts_boot_dev_sts1_reg, u32 boot_err0_reg,
+ u32 boot_err1_reg, u32 timeout);
+int hl_fw_dynamic_send_protocol_cmd(struct hl_device *hdev,
+ struct fw_load_mgr *fw_loader,
+ enum comms_cmd cmd, unsigned int size,
+ bool wait_ok, u32 timeout);
int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
bool is_wc[3]);
int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data);
struct hl_inbound_pci_region *pci_region);
int hl_pci_set_outbound_region(struct hl_device *hdev,
struct hl_outbound_pci_region *pci_region);
+enum pci_region hl_get_pci_memory_region(struct hl_device *hdev, u64 addr);
int hl_pci_init(struct hl_device *hdev);
void hl_pci_fini(struct hl_device *hdev);
int sensor_index, u32 attr, long value);
int hl_set_current(struct hl_device *hdev,
int sensor_index, u32 attr, long value);
+void hw_sob_get(struct hl_hw_sob *hw_sob);
+void hw_sob_put(struct hl_hw_sob *hw_sob);
+void hl_encaps_handle_do_release(struct kref *ref);
+void hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev,
+ struct hl_cs *cs, struct hl_cs_job *job,
+ struct hl_cs_compl *cs_cmpl);
void hl_release_pending_user_interrupts(struct hl_device *hdev);
+int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
+ struct hl_hw_sob **hw_sob, u32 count, bool encaps_sig);
+
+int hl_state_dump(struct hl_device *hdev);
+const char *hl_state_dump_get_sync_name(struct hl_device *hdev, u32 sync_id);
+const char *hl_state_dump_get_monitor_name(struct hl_device *hdev,
+ struct hl_mon_state_dump *mon);
+void hl_state_dump_free_sync_to_engine_map(struct hl_sync_to_engine_map *map);
+__printf(4, 5) int hl_snprintf_resize(char **buf, size_t *size, size_t *offset,
+ const char *format, ...);
+char *hl_format_as_binary(char *buf, size_t buf_len, u32 n);
+const char *hl_sync_engine_to_string(enum hl_sync_engine_type engine_type);
#ifdef CONFIG_DEBUG_FS
struct hl_userptr *userptr);
void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
+void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data,
+ unsigned long length);
#else
{
}
+static inline void hl_debugfs_set_state_dump(struct hl_device *hdev,
+ char *data, unsigned long length)
+{
+}
+
#endif
/* IOCTLs */