accel/habanalabs: add info when FD released while device still in use
[linux-2.6-microblaze.git] / drivers / accel / habanalabs / common / command_submission.c
index 8270db0..d89b539 100644 (file)
@@ -17,7 +17,7 @@
                        HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES)
 
 
-#define MAX_TS_ITER_NUM 10
+#define MAX_TS_ITER_NUM 100
 
 /**
  * enum hl_cs_wait_status - cs wait status
@@ -1168,6 +1168,22 @@ static void cs_completion(struct work_struct *work)
                hl_complete_job(hdev, job);
 }
 
+u32 hl_get_active_cs_num(struct hl_device *hdev)
+{
+       u32 active_cs_num = 0;
+       struct hl_cs *cs;
+
+       spin_lock(&hdev->cs_mirror_lock);
+
+       list_for_each_entry(cs, &hdev->cs_mirror_list, mirror_node)
+               if (!cs->completed)
+                       active_cs_num++;
+
+       spin_unlock(&hdev->cs_mirror_lock);
+
+       return active_cs_num;
+}
+
 static int validate_queue_index(struct hl_device *hdev,
                                struct hl_cs_chunk *chunk,
                                enum hl_queue_type *queue_type,
@@ -3145,6 +3161,7 @@ static int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf,
                        (ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt));
        unsigned long flags, iter_counter = 0;
        u64 current_cq_counter;
+       ktime_t timestamp;
 
        /* Validate ts_offset not exceeding last max */
        if (requested_offset_record >= cb_last) {
@@ -3153,6 +3170,8 @@ static int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf,
                return -EINVAL;
        }
 
+       timestamp = ktime_get();
+
 start_over:
        spin_lock_irqsave(wait_list_lock, flags);
 
@@ -3178,11 +3197,12 @@ start_over:
 
                        /* irq handling in the middle give it time to finish */
                        spin_unlock_irqrestore(wait_list_lock, flags);
-                       usleep_range(1, 10);
+                       usleep_range(100, 1000);
                        if (++iter_counter == MAX_TS_ITER_NUM) {
                                dev_err(buf->mmg->dev,
-                                       "handling registration interrupt took too long!!\n");
-                               return -EINVAL;
+                                       "Timestamp offset processing reached timeout of %lld ms\n",
+                                       ktime_ms_delta(ktime_get(), timestamp));
+                               return -EAGAIN;
                        }
 
                        goto start_over;