Merge branch 'misc.namei' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

[linux-2.6-microblaze.git] / drivers / misc / habanalabs / common / habanalabs.h
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h

index 81b6825..bebebcb 100644 (file)
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -128,12 +128,17 @@ enum hl_mmu_page_table_location {
   *
   * - HL_RESET_DEVICE_RELEASE
   *       Set if reset is due to device release
+ *
+ * - HL_RESET_FW
+ *       F/W will perform the reset. No need to ask it to reset the device. This is relevant
+ *       only when running with secured f/w
   */
  #define HL_RESET_HARD                  (1 << 0)
  #define HL_RESET_FROM_RESET_THREAD     (1 << 1)
  #define HL_RESET_HEARTBEAT             (1 << 2)
  #define HL_RESET_TDR                   (1 << 3)
  #define HL_RESET_DEVICE_RELEASE                (1 << 4)
+#define HL_RESET_FW                    (1 << 5)
  
  #define HL_MAX_SOBS_PER_MONITOR        8
  
@@ -489,6 +494,8 @@ struct hl_hints_range {
   *                                       reserved for the user
   * @first_available_cq: first available CQ for the user.
   * @user_interrupt_count: number of user interrupts.
+ * @server_type: Server type that the ASIC is currently installed in.
+ *               The value is according to enum hl_server_type in uapi file.
   * @tpc_enabled_mask: which TPCs are enabled.
   * @completion_queues_count: number of completion queues.
   * @fw_security_enabled: true if security measures are enabled in firmware,
@@ -570,6 +577,7 @@ struct asic_fixed_properties {
         u16                             first_available_user_msix_interrupt;
         u16                             first_available_cq[HL_MAX_DCORES];
         u16                             user_interrupt_count;
+       u16                             server_type;
         u8                              tpc_enabled_mask;
         u8                              completion_queues_count;
         u8                              fw_security_enabled;
@@ -589,27 +597,27 @@ struct asic_fixed_properties {
   * @completion: fence is implemented using completion
   * @refcount: refcount for this fence
   * @cs_sequence: sequence of the corresponding command submission
+ * @stream_master_qid_map: streams masters QID bitmap to represent all streams
+ *                         masters QIDs that multi cs is waiting on
   * @error: mark this fence with error
   * @timestamp: timestamp upon completion
- * @stream_map: streams bitmap to represent all streams that multi cs is
- *              waiting on
   */
  struct hl_fence {
         struct completion       completion;
         struct kref             refcount;
         u64                     cs_sequence;
+       u32                     stream_master_qid_map;
         int                     error;
         ktime_t                 timestamp;
-       u8                      stream_map;
  };
  
  /**
   * struct hl_cs_compl - command submission completion object.
- * @sob_reset_work: workqueue object to run SOB reset flow.
   * @base_fence: hl fence object.
   * @lock: spinlock to protect fence.
   * @hdev: habanalabs device structure.
   * @hw_sob: the H/W SOB used in this signal/wait CS.
+ * @encaps_sig_hdl: encaps signals hanlder.
   * @cs_seq: command submission sequence number.
   * @type: type of the CS - signal/wait.
   * @sob_val: the SOB value that is used in this signal/wait CS.
@@ -618,11 +626,11 @@ struct hl_fence {
   * encaps signals or not.
   */
  struct hl_cs_compl {
-       struct work_struct      sob_reset_work;
         struct hl_fence         base_fence;
         spinlock_t              lock;
         struct hl_device        *hdev;
         struct hl_hw_sob        *hw_sob;
+       struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
         u64                     cs_seq;
         enum hl_cs_type         type;
         u16                     sob_val;
@@ -928,7 +936,7 @@ struct pci_mem_region {
         u64 region_base;
         u64 region_size;
         u64 bar_size;
-       u32 offset_in_bar;
+       u64 offset_in_bar;
         u8 bar_id;
         u8 used;
  };
@@ -1156,6 +1164,8 @@ struct fw_load_mgr {
   * @init_cpu_scrambler_dram: Enable CPU specific DRAM scrambling
   * @state_dump_init: initialize constants required for state dump
   * @get_sob_addr: get SOB base address offset.
+ * @set_pci_memory_regions: setting properties of PCI memory regions
+ * @get_stream_master_qid_arr: get pointer to stream masters QID array
   */
  struct hl_asic_funcs {
         int (*early_init)(struct hl_device *hdev);
@@ -1165,8 +1175,8 @@ struct hl_asic_funcs {
         int (*sw_init)(struct hl_device *hdev);
         int (*sw_fini)(struct hl_device *hdev);
         int (*hw_init)(struct hl_device *hdev);
-       void (*hw_fini)(struct hl_device *hdev, bool hard_reset);
-       void (*halt_engines)(struct hl_device *hdev, bool hard_reset);
+       void (*hw_fini)(struct hl_device *hdev, bool hard_reset, bool fw_reset);
+       void (*halt_engines)(struct hl_device *hdev, bool hard_reset, bool fw_reset);
         int (*suspend)(struct hl_device *hdev);
         int (*resume)(struct hl_device *hdev);
         int (*mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
@@ -1267,8 +1277,9 @@ struct hl_asic_funcs {
         u64 (*get_device_time)(struct hl_device *hdev);
         int (*collective_wait_init_cs)(struct hl_cs *cs);
         int (*collective_wait_create_jobs)(struct hl_device *hdev,
-                       struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
-                       u32 collective_engine_id);
+                       struct hl_ctx *ctx, struct hl_cs *cs,
+                       u32 wait_queue_id, u32 collective_engine_id,
+                       u32 encaps_signal_offset);
         u64 (*scramble_addr)(struct hl_device *hdev, u64 addr);
         u64 (*descramble_addr)(struct hl_device *hdev, u64 addr);
         void (*ack_protection_bits_errors)(struct hl_device *hdev);
@@ -1283,6 +1294,8 @@ struct hl_asic_funcs {
         void (*init_cpu_scrambler_dram)(struct hl_device *hdev);
         void (*state_dump_init)(struct hl_device *hdev);
         u32 (*get_sob_addr)(struct hl_device *hdev, u32 sob_id);
+       void (*set_pci_memory_regions)(struct hl_device *hdev);
+       u32* (*get_stream_master_qid_arr)(void);
  };
  
  
@@ -1339,20 +1352,6 @@ struct hl_cs_counters_atomic {
         atomic64_t validation_drop_cnt;
  };
  
-/**
- * struct hl_pending_cb - pending command buffer structure
- * @cb_node: cb node in pending cb list
- * @cb: command buffer to send in next submission
- * @cb_size: command buffer size
- * @hw_queue_id: destination queue id
- */
-struct hl_pending_cb {
-       struct list_head        cb_node;
-       struct hl_cb            *cb;
-       u32                     cb_size;
-       u32                     hw_queue_id;
-};
-
  /**
   * struct hl_ctx - user/kernel context.
   * @mem_hash: holds mapping from virtual address to virtual memory area
@@ -1369,8 +1368,6 @@ struct hl_pending_cb {
   *            MMU hash or walking the PGT requires talking this lock.
   * @hw_block_list_lock: protects the HW block memory list.
   * @debugfs_list: node in debugfs list of contexts.
- * pending_cb_list: list of pending command buffers waiting to be sent upon
- *                  next user command submission context.
   * @hw_block_mem_list: list of HW block virtual mapped addresses.
   * @cs_counters: context command submission counters.
   * @cb_va_pool: device VA pool for command buffers which are mapped to the
@@ -1381,17 +1378,11 @@ struct hl_pending_cb {
   *                     index to cs_pending array.
   * @dram_default_hops: array that holds all hops addresses needed for default
   *                     DRAM mapping.
- * @pending_cb_lock: spinlock to protect pending cb list
   * @cs_lock: spinlock to protect cs_sequence.
   * @dram_phys_mem: amount of used physical DRAM memory by this context.
   * @thread_ctx_switch_token: token to prevent multiple threads of the same
   *                             context from running the context switch phase.
   *                             Only a single thread should run it.
- * @thread_pending_cb_token: token to prevent multiple threads from processing
- *                             the pending CB list. Only a single thread should
- *                             process the list since it is protected by a
- *                             spinlock and we don't want to halt the entire
- *                             command submission sequence.
   * @thread_ctx_switch_wait_token: token to prevent the threads that didn't run
   *                             the context switch phase from moving to their
   *                             execution phase before the context switch phase
@@ -1411,18 +1402,15 @@ struct hl_ctx {
         struct mutex                    mmu_lock;
         struct mutex                    hw_block_list_lock;
         struct list_head                debugfs_list;
-       struct list_head                pending_cb_list;
         struct list_head                hw_block_mem_list;
         struct hl_cs_counters_atomic    cs_counters;
         struct gen_pool                 *cb_va_pool;
         struct hl_encaps_signals_mgr    sig_mgr;
         u64                             cs_sequence;
         u64                             *dram_default_hops;
-       spinlock_t                      pending_cb_lock;
         spinlock_t                      cs_lock;
         atomic64_t                      dram_phys_mem;
         atomic_t                        thread_ctx_switch_token;
-       atomic_t                        thread_pending_cb_token;
         u32                             thread_ctx_switch_wait_token;
         u32                             asid;
         u32                             handle;
@@ -1453,6 +1441,7 @@ struct hl_ctx_mgr {
   * @sgt: pointer to the scatter-gather table that holds the pages.
   * @dir: for DMA unmapping, the direction must be supplied, so save it.
   * @debugfs_list: node in debugfs list of command submissions.
+ * @pid: the pid of the user process owning the memory
   * @addr: user-space virtual address of the start of the memory area.
   * @size: size of the memory area to pin & map.
   * @dma_mapped: true if the SG was mapped to DMA addresses, false otherwise.
@@ -1465,6 +1454,7 @@ struct hl_userptr {
         struct sg_table         *sgt;
         enum dma_data_direction dir;
         struct list_head        debugfs_list;
+       pid_t                   pid;
         u64                     addr;
         u64                     size;
         u8                      dma_mapped;
@@ -1485,12 +1475,14 @@ struct hl_userptr {
   * @mirror_node : node in device mirror list of command submissions.
   * @staged_cs_node: node in the staged cs list.
   * @debugfs_list: node in debugfs list of command submissions.
+ * @encaps_sig_hdl: holds the encaps signals handle.
   * @sequence: the sequence number of this CS.
   * @staged_sequence: the sequence of the staged submission this CS is part of,
   *                   relevant only if staged_cs is set.
   * @timeout_jiffies: cs timeout in jiffies.
   * @submission_time_jiffies: submission time of the cs
   * @type: CS_TYPE_*.
+ * @encaps_sig_hdl_id: encaps signals handle id, set for the first staged cs.
   * @submitted: true if CS was submitted to H/W.
   * @completed: true if CS was completed by device.
   * @timedout : true if CS was timedout.
@@ -1504,6 +1496,7 @@ struct hl_userptr {
   * @staged_cs: true if this CS is part of a staged submission.
   * @skip_reset_on_timeout: true if we shall not reset the device in case
   *                         timeout occurs (debug scenario).
+ * @encaps_signals: true if this CS has encaps reserved signals.
   */
  struct hl_cs {
         u16                     *jobs_in_queue_cnt;
@@ -1518,11 +1511,13 @@ struct hl_cs {
         struct list_head        mirror_node;
         struct list_head        staged_cs_node;
         struct list_head        debugfs_list;
+       struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
         u64                     sequence;
         u64                     staged_sequence;
         u64                     timeout_jiffies;
         u64                     submission_time_jiffies;
         enum hl_cs_type         type;
+       u32                     encaps_sig_hdl_id;
         u8                      submitted;
         u8                      completed;
         u8                      timedout;
@@ -1533,6 +1528,7 @@ struct hl_cs {
         u8                      staged_first;
         u8                      staged_cs;
         u8                      skip_reset_on_timeout;
+       u8                      encaps_signals;
  };
  
  /**
@@ -1552,6 +1548,8 @@ struct hl_cs {
   * @hw_queue_id: the id of the H/W queue this job is submitted to.
   * @user_cb_size: the actual size of the CB we got from the user.
   * @job_cb_size: the actual size of the CB that we put on the queue.
+ * @encaps_sig_wait_offset: encapsulated signals offset, which allow user
+ *                          to wait on part of the reserved signals.
   * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a
   *                          handle to a kernel-allocated CB object, false
   *                          otherwise (SRAM/DRAM/host address).
@@ -1576,6 +1574,7 @@ struct hl_cs_job {
         u32                     hw_queue_id;
         u32                     user_cb_size;
         u32                     job_cb_size;
+       u32                     encaps_sig_wait_offset;
         u8                      is_kernel_allocated_cb;
         u8                      contains_dma_pkt;
  };
@@ -1822,6 +1821,7 @@ struct hl_debugfs_entry {
   * @state_dump_sem: protects state_dump.
   * @addr: next address to read/write from/to in read/write32.
   * @mmu_addr: next virtual address to translate to physical address in mmu_show.
+ * @userptr_lookup: the target user ptr to look up for on demand.
   * @mmu_asid: ASID to use while translating in mmu_show.
   * @state_dump_head: index of the latest state dump
   * @i2c_bus: generic u8 debugfs file for bus value to use in i2c_data_read.
@@ -1849,6 +1849,7 @@ struct hl_dbg_device_entry {
         struct rw_semaphore             state_dump_sem;
         u64                             addr;
         u64                             mmu_addr;
+       u64                             userptr_lookup;
         u32                             mmu_asid;
         u32                             state_dump_head;
         u8                              i2c_bus;
@@ -1994,7 +1995,7 @@ struct hl_state_dump_specs {
  
  #define HL_STR_MAX     32
  
-#define HL_DEV_STS_MAX (HL_DEVICE_STATUS_NEEDS_RESET + 1)
+#define HL_DEV_STS_MAX (HL_DEVICE_STATUS_LAST + 1)
  
  /* Theoretical limit only. A single host can only contain up to 4 or 8 PCIe
   * x16 cards. In extreme cases, there are hosts that can accommodate 16 cards.
@@ -2142,11 +2143,13 @@ struct hwmon_chip_info;
   * @wq: work queue for device reset procedure.
   * @reset_work: reset work to be done.
   * @hdev: habanalabs device structure.
+ * @fw_reset: whether f/w will do the reset without us sending them a message to do it.
   */
  struct hl_device_reset_work {
         struct workqueue_struct         *wq;
         struct delayed_work             reset_work;
         struct hl_device                *hdev;
+       bool                            fw_reset;
  };
  
  /**
@@ -2271,16 +2274,16 @@ struct hl_mmu_funcs {
   * @completion: completion of any of the CS in the list
   * @lock: spinlock for the completion structure
   * @timestamp: timestamp for the multi-CS completion
+ * @stream_master_qid_map: bitmap of all stream masters on which the multi-CS
+ *                        is waiting
   * @used: 1 if in use, otherwise 0
- * @stream_map: bitmap of all HW/external queues streams on which the multi-CS
- *              is waiting
   */
  struct multi_cs_completion {
         struct completion       completion;
         spinlock_t              lock;
         s64                     timestamp;
+       u32                     stream_master_qid_map;
         u8                      used;
-       u8                      stream_map;
  };
  
  /**
@@ -2292,9 +2295,9 @@ struct multi_cs_completion {
   * @timestamp: timestamp of first completed CS
   * @wait_status: wait for CS status
   * @completion_bitmap: bitmap of completed CSs (1- completed, otherwise 0)
+ * @stream_master_qid_map: bitmap of all stream master QIDs on which the
+ *                         multi-CS is waiting
   * @arr_len: fence_arr and seq_arr array length
- * @stream_map: bitmap of all HW/external queues streams on which the multi-CS
- *              is waiting
   * @gone_cs: indication of gone CS (1- there was gone CS, otherwise 0)
   * @update_ts: update timestamp. 1- update the timestamp, otherwise 0.
   */
@@ -2306,8 +2309,8 @@ struct multi_cs_data {
         s64             timestamp;
         long            wait_status;
         u32             completion_bitmap;
+       u32             stream_master_qid_map;
         u8              arr_len;
-       u8              stream_map;
         u8              gone_cs;
         u8              update_ts;
  };
@@ -2528,6 +2531,7 @@ struct hl_device {
  
         struct multi_cs_completion      multi_cs_completion[
                                                         MULTI_CS_MAX_USER_CTX];
+       u32                             *stream_master_qid_arr;
         atomic64_t                      dram_used_mem;
         u64                             timeout_jiffies;
         u64                             max_power;
@@ -2578,6 +2582,7 @@ struct hl_device {
         u8                              skip_reset_on_timeout;
         u8                              device_cpu_is_halted;
         u8                              supports_wait_for_multi_cs;
+       u8                              stream_master_qid_arr_size;
  
         /* Parameters for bring-up */
         u64                             nic_ports_mask;
@@ -2651,6 +2656,23 @@ struct hl_ioctl_desc {
   * Kernel module functions that can be accessed by entire module
   */
  
+/**
+ * hl_get_sg_info() - get number of pages and the DMA address from SG list.
+ * @sg: the SG list.
+ * @dma_addr: pointer to DMA address to return.
+ *
+ * Calculate the number of consecutive pages described by the SG list. Take the
+ * offset of the address in the first page, add to it the length and round it up
+ * to the number of needed pages.
+ */
+static inline u32 hl_get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
+{
+       *dma_addr = sg_dma_address(sg);
+
+       return ((((*dma_addr) & (PAGE_SIZE - 1)) + sg_dma_len(sg)) +
+                       (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+}
+
  /**
   * hl_mem_area_inside_range() - Checks whether address+size are inside a range.
   * @address: The start address of the area we want to validate.
@@ -2794,7 +2816,6 @@ int hl_cb_va_pool_init(struct hl_ctx *ctx);
  void hl_cb_va_pool_fini(struct hl_ctx *ctx);
  
  void hl_cs_rollback_all(struct hl_device *hdev);
-void hl_pending_cb_list_flush(struct hl_ctx *ctx);
  struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
                 enum hl_queue_type queue_type, bool is_kernel_allocated_cb);
  void hl_sob_reset_error(struct kref *ref);
@@ -2935,9 +2956,12 @@ int hl_set_voltage(struct hl_device *hdev,
                         int sensor_index, u32 attr, long value);
  int hl_set_current(struct hl_device *hdev,
                         int sensor_index, u32 attr, long value);
-void hl_encaps_handle_do_release(struct kref *ref);
  void hw_sob_get(struct hl_hw_sob *hw_sob);
  void hw_sob_put(struct hl_hw_sob *hw_sob);
+void hl_encaps_handle_do_release(struct kref *ref);
+void hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev,
+                       struct hl_cs *cs, struct hl_cs_job *job,
+                       struct hl_cs_compl *cs_cmpl);
  void hl_release_pending_user_interrupts(struct hl_device *hdev);
  int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
                         struct hl_hw_sob **hw_sob, u32 count, bool encaps_sig);