Merge tag 'icc-5.11-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/djakov...
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 6 Jan 2021 17:54:21 +0000 (18:54 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 6 Jan 2021 17:54:21 +0000 (18:54 +0100)
Georgi writes:

interconnect fixes for v5.11

This contains a few fixes for iMX and Qualcomm drivers and also
updates my email to my kernel.org address.

- qcom: Fix rpmh link failures when compile test is enabled
- imx: Add a missing of_node_put after of_device_is_available
- imx: Remove a useless test
- imx8mq: Use icc_sync_state

Signed-off-by: Georgi Djakov <georgi.djakov@linaro.org>
* tag 'icc-5.11-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/djakov/icc:
  MAINTAINERS: Update Georgi's email address
  interconnect: imx8mq: Use icc_sync_state
  interconnect: imx: Remove a useless test
  interconnect: imx: Add a missing of_node_put after of_device_is_available
  interconnect: qcom: fix rpmh link failures

16 files changed:
MAINTAINERS
crypto/asymmetric_keys/asym_tpm.c
drivers/misc/habanalabs/common/command_submission.c
drivers/misc/habanalabs/common/device.c
drivers/misc/habanalabs/common/firmware_if.c
drivers/misc/habanalabs/common/habanalabs.h
drivers/misc/habanalabs/common/habanalabs_drv.c
drivers/misc/habanalabs/common/habanalabs_ioctl.c
drivers/misc/habanalabs/common/hw_queue.c
drivers/misc/habanalabs/common/pci.c
drivers/misc/habanalabs/gaudi/gaudi.c
drivers/misc/habanalabs/gaudi/gaudiP.h
drivers/misc/habanalabs/gaudi/gaudi_coresight.c
drivers/misc/habanalabs/goya/goya.c
drivers/misc/habanalabs/include/common/hl_boot_if.h
include/uapi/misc/habanalabs.h

index a15e306..7d6cf0f 100644 (file)
@@ -16710,6 +16710,8 @@ M:      Samuel Thibault <samuel.thibault@ens-lyon.org>
 L:     speakup@linux-speakup.org
 S:     Odd Fixes
 W:     http://www.linux-speakup.org/
+W:     https://github.com/linux-speakup/speakup
+B:     https://github.com/linux-speakup/speakup/issues
 F:     drivers/accessibility/speakup/
 
 SPEAR CLOCK FRAMEWORK SUPPORT
index 511932a..0959613 100644 (file)
@@ -354,7 +354,7 @@ static uint32_t derive_pub_key(const void *pub_key, uint32_t len, uint8_t *buf)
        memcpy(cur, e, sizeof(e));
        cur += sizeof(e);
        /* Zero parameters to satisfy set_pub_key ABI. */
-       memset(cur, 0, SETKEY_PARAMS_SIZE);
+       memzero_explicit(cur, SETKEY_PARAMS_SIZE);
 
        return cur - buf;
 }
index beb4823..b2b3d2b 100644 (file)
@@ -472,8 +472,11 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
        cntr = &hdev->aggregated_cs_counters;
 
        cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
-       if (!cs)
+       if (!cs) {
+               atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
+               atomic64_inc(&cntr->out_of_mem_drop_cnt);
                return -ENOMEM;
+       }
 
        cs->ctx = ctx;
        cs->submitted = false;
@@ -486,6 +489,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 
        cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
        if (!cs_cmpl) {
+               atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
+               atomic64_inc(&cntr->out_of_mem_drop_cnt);
                rc = -ENOMEM;
                goto free_cs;
        }
@@ -513,6 +518,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
        cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
                        sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
        if (!cs->jobs_in_queue_cnt) {
+               atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
+               atomic64_inc(&cntr->out_of_mem_drop_cnt);
                rc = -ENOMEM;
                goto free_fence;
        }
@@ -562,7 +569,7 @@ void hl_cs_rollback_all(struct hl_device *hdev)
        for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
                flush_workqueue(hdev->cq_wq[i]);
 
-       /* Make sure we don't have leftovers in the H/W queues mirror list */
+       /* Make sure we don't have leftovers in the CS mirror list */
        list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) {
                cs_get(cs);
                cs->aborted = true;
@@ -764,11 +771,14 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
 
 static int hl_cs_copy_chunk_array(struct hl_device *hdev,
                                        struct hl_cs_chunk **cs_chunk_array,
-                                       void __user *chunks, u32 num_chunks)
+                                       void __user *chunks, u32 num_chunks,
+                                       struct hl_ctx *ctx)
 {
        u32 size_to_copy;
 
        if (num_chunks > HL_MAX_JOBS_PER_CS) {
+               atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+               atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
                dev_err(hdev->dev,
                        "Number of chunks can NOT be larger than %d\n",
                        HL_MAX_JOBS_PER_CS);
@@ -777,11 +787,16 @@ static int hl_cs_copy_chunk_array(struct hl_device *hdev,
 
        *cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array),
                                        GFP_ATOMIC);
-       if (!*cs_chunk_array)
+       if (!*cs_chunk_array) {
+               atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
+               atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
                return -ENOMEM;
+       }
 
        size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
        if (copy_from_user(*cs_chunk_array, chunks, size_to_copy)) {
+               atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+               atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
                dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
                kfree(*cs_chunk_array);
                return -EFAULT;
@@ -797,6 +812,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
        struct hl_device *hdev = hpriv->hdev;
        struct hl_cs_chunk *cs_chunk_array;
        struct hl_cs_counters_atomic *cntr;
+       struct hl_ctx *ctx = hpriv->ctx;
        struct hl_cs_job *job;
        struct hl_cs *cs;
        struct hl_cb *cb;
@@ -805,7 +821,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
        cntr = &hdev->aggregated_cs_counters;
        *cs_seq = ULLONG_MAX;
 
-       rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks);
+       rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
+                       hpriv->ctx);
        if (rc)
                goto out;
 
@@ -832,8 +849,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
                rc = validate_queue_index(hdev, chunk, &queue_type,
                                                &is_kernel_allocated_cb);
                if (rc) {
-                       atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
-                       atomic64_inc(&cntr->parsing_drop_cnt);
+                       atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+                       atomic64_inc(&cntr->validation_drop_cnt);
                        goto free_cs_object;
                }
 
@@ -841,8 +858,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
                        cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
                        if (!cb) {
                                atomic64_inc(
-                               &hpriv->ctx->cs_counters.parsing_drop_cnt);
-                               atomic64_inc(&cntr->parsing_drop_cnt);
+                                       &ctx->cs_counters.validation_drop_cnt);
+                               atomic64_inc(&cntr->validation_drop_cnt);
                                rc = -EINVAL;
                                goto free_cs_object;
                        }
@@ -856,8 +873,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
                job = hl_cs_allocate_job(hdev, queue_type,
                                                is_kernel_allocated_cb);
                if (!job) {
-                       atomic64_inc(
-                       &hpriv->ctx->cs_counters.out_of_mem_drop_cnt);
+                       atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
                        atomic64_inc(&cntr->out_of_mem_drop_cnt);
                        dev_err(hdev->dev, "Failed to allocate a new job\n");
                        rc = -ENOMEM;
@@ -891,7 +907,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 
                rc = cs_parser(hpriv, job);
                if (rc) {
-                       atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
+                       atomic64_inc(&ctx->cs_counters.parsing_drop_cnt);
                        atomic64_inc(&cntr->parsing_drop_cnt);
                        dev_err(hdev->dev,
                                "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
@@ -901,8 +917,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
        }
 
        if (int_queues_only) {
-               atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
-               atomic64_inc(&cntr->parsing_drop_cnt);
+               atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+               atomic64_inc(&cntr->validation_drop_cnt);
                dev_err(hdev->dev,
                        "Reject CS %d.%llu because only internal queues jobs are present\n",
                        cs->ctx->asid, cs->sequence);
@@ -1042,7 +1058,7 @@ out:
 }
 
 static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
-               struct hl_cs_chunk *chunk, u64 *signal_seq)
+               struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx)
 {
        u64 *signal_seq_arr = NULL;
        u32 size_to_copy, signal_seq_arr_len;
@@ -1052,6 +1068,8 @@ static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
 
        /* currently only one signal seq is supported */
        if (signal_seq_arr_len != 1) {
+               atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+               atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
                dev_err(hdev->dev,
                        "Wait for signal CS supports only one signal CS seq\n");
                return -EINVAL;
@@ -1060,13 +1078,18 @@ static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
        signal_seq_arr = kmalloc_array(signal_seq_arr_len,
                                        sizeof(*signal_seq_arr),
                                        GFP_ATOMIC);
-       if (!signal_seq_arr)
+       if (!signal_seq_arr) {
+               atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
+               atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
                return -ENOMEM;
+       }
 
        size_to_copy = chunk->num_signal_seq_arr * sizeof(*signal_seq_arr);
        if (copy_from_user(signal_seq_arr,
                                u64_to_user_ptr(chunk->signal_seq_arr),
                                size_to_copy)) {
+               atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+               atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
                dev_err(hdev->dev,
                        "Failed to copy signal seq array from user\n");
                rc = -EFAULT;
@@ -1153,6 +1176,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
        struct hl_device *hdev = hpriv->hdev;
        struct hl_cs_compl *sig_waitcs_cmpl;
        u32 q_idx, collective_engine_id = 0;
+       struct hl_cs_counters_atomic *cntr;
        struct hl_fence *sig_fence = NULL;
        struct hl_ctx *ctx = hpriv->ctx;
        enum hl_queue_type q_type;
@@ -1160,9 +1184,11 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
        u64 signal_seq;
        int rc;
 
+       cntr = &hdev->aggregated_cs_counters;
        *cs_seq = ULLONG_MAX;
 
-       rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks);
+       rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
+                       ctx);
        if (rc)
                goto out;
 
@@ -1170,6 +1196,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
        chunk = &cs_chunk_array[0];
 
        if (chunk->queue_index >= hdev->asic_prop.max_queues) {
+               atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+               atomic64_inc(&cntr->validation_drop_cnt);
                dev_err(hdev->dev, "Queue index %d is invalid\n",
                        chunk->queue_index);
                rc = -EINVAL;
@@ -1181,6 +1209,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
        q_type = hw_queue_prop->type;
 
        if (!hw_queue_prop->supports_sync_stream) {
+               atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+               atomic64_inc(&cntr->validation_drop_cnt);
                dev_err(hdev->dev,
                        "Queue index %d does not support sync stream operations\n",
                        q_idx);
@@ -1190,6 +1220,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 
        if (cs_type == CS_TYPE_COLLECTIVE_WAIT) {
                if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
+                       atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+                       atomic64_inc(&cntr->validation_drop_cnt);
                        dev_err(hdev->dev,
                                "Queue index %d is invalid\n", q_idx);
                        rc = -EINVAL;
@@ -1200,12 +1232,14 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
        }
 
        if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT) {
-               rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq);
+               rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq, ctx);
                if (rc)
                        goto free_cs_chunk_array;
 
                sig_fence = hl_ctx_get_fence(ctx, signal_seq);
                if (IS_ERR(sig_fence)) {
+                       atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+                       atomic64_inc(&cntr->validation_drop_cnt);
                        dev_err(hdev->dev,
                                "Failed to get signal CS with seq 0x%llx\n",
                                signal_seq);
@@ -1223,6 +1257,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
                        container_of(sig_fence, struct hl_cs_compl, base_fence);
 
                if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL) {
+                       atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+                       atomic64_inc(&cntr->validation_drop_cnt);
                        dev_err(hdev->dev,
                                "CS seq 0x%llx is not of a signal CS\n",
                                signal_seq);
@@ -1270,8 +1306,11 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
        else if (cs_type == CS_TYPE_COLLECTIVE_WAIT)
                rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx,
                                cs, q_idx, collective_engine_id);
-       else
+       else {
+               atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+               atomic64_inc(&cntr->validation_drop_cnt);
                rc = -EINVAL;
+       }
 
        if (rc)
                goto free_cs_object;
index 5871162..1456eab 100644 (file)
@@ -17,12 +17,12 @@ enum hl_device_status hl_device_status(struct hl_device *hdev)
 {
        enum hl_device_status status;
 
-       if (hdev->disabled)
-               status = HL_DEVICE_STATUS_MALFUNCTION;
-       else if (atomic_read(&hdev->in_reset))
+       if (atomic_read(&hdev->in_reset))
                status = HL_DEVICE_STATUS_IN_RESET;
        else if (hdev->needs_reset)
                status = HL_DEVICE_STATUS_NEEDS_RESET;
+       else if (hdev->disabled)
+               status = HL_DEVICE_STATUS_MALFUNCTION;
        else
                status = HL_DEVICE_STATUS_OPERATIONAL;
 
@@ -1092,6 +1092,7 @@ kill_processes:
                                                GFP_KERNEL);
                if (!hdev->kernel_ctx) {
                        rc = -ENOMEM;
+                       hl_mmu_fini(hdev);
                        goto out_err;
                }
 
@@ -1103,6 +1104,7 @@ kill_processes:
                                "failed to init kernel ctx in hard reset\n");
                        kfree(hdev->kernel_ctx);
                        hdev->kernel_ctx = NULL;
+                       hl_mmu_fini(hdev);
                        goto out_err;
                }
        }
index 0e1c629..20f77f5 100644 (file)
@@ -627,25 +627,38 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
        security_status = RREG32(cpu_security_boot_status_reg);
 
        /* We read security status multiple times during boot:
-        * 1. preboot - we check if fw security feature is supported
-        * 2. boot cpu - we get boot cpu security status
-        * 3. FW application - we get FW application security status
+        * 1. preboot - a. Check whether the security status bits are valid
+        *              b. Check whether fw security is enabled
+        *              c. Check whether hard reset is done by preboot
+        * 2. boot cpu - a. Fetch boot cpu security status
+        *               b. Check whether hard reset is done by boot cpu
+        * 3. FW application - a. Fetch fw application security status
+        *                     b. Check whether hard reset is done by fw app
         *
         * Preboot:
         * Check security status bit (CPU_BOOT_DEV_STS0_ENABLED), if it is set
         * check security enabled bit (CPU_BOOT_DEV_STS0_SECURITY_EN)
         */
        if (security_status & CPU_BOOT_DEV_STS0_ENABLED) {
-               hdev->asic_prop.fw_security_status_valid = 1;
-               prop->fw_security_disabled =
-                       !(security_status & CPU_BOOT_DEV_STS0_SECURITY_EN);
+               prop->fw_security_status_valid = 1;
+
+               if (security_status & CPU_BOOT_DEV_STS0_SECURITY_EN)
+                       prop->fw_security_disabled = false;
+               else
+                       prop->fw_security_disabled = true;
+
+               if (security_status & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
+                       prop->hard_reset_done_by_fw = true;
        } else {
-               hdev->asic_prop.fw_security_status_valid = 0;
+               prop->fw_security_status_valid = 0;
                prop->fw_security_disabled = true;
        }
 
+       dev_dbg(hdev->dev, "Firmware preboot hard-reset is %s\n",
+                       prop->hard_reset_done_by_fw ? "enabled" : "disabled");
+
        dev_info(hdev->dev, "firmware-level security is %s\n",
-               prop->fw_security_disabled ? "disabled" : "enabled");
+                       prop->fw_security_disabled ? "disabled" : "enabled");
 
        return 0;
 }
@@ -655,6 +668,7 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
                        u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
                        bool skip_bmc, u32 cpu_timeout, u32 boot_fit_timeout)
 {
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
        u32 status;
        int rc;
 
@@ -723,11 +737,22 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
        /* Read U-Boot version now in case we will later fail */
        hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_UBOOT);
 
+       /* Clear reset status since we need to read it again from boot CPU */
+       prop->hard_reset_done_by_fw = false;
+
        /* Read boot_cpu security bits */
-       if (hdev->asic_prop.fw_security_status_valid)
-               hdev->asic_prop.fw_boot_cpu_security_map =
+       if (prop->fw_security_status_valid) {
+               prop->fw_boot_cpu_security_map =
                                RREG32(cpu_security_boot_status_reg);
 
+               if (prop->fw_boot_cpu_security_map &
+                               CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
+                       prop->hard_reset_done_by_fw = true;
+       }
+
+       dev_dbg(hdev->dev, "Firmware boot CPU hard-reset is %s\n",
+                       prop->hard_reset_done_by_fw ? "enabled" : "disabled");
+
        if (rc) {
                detect_cpu_boot_status(hdev, status);
                rc = -EIO;
@@ -796,18 +821,21 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
                goto out;
        }
 
+       /* Clear reset status since we need to read again from app */
+       prop->hard_reset_done_by_fw = false;
+
        /* Read FW application security bits */
-       if (hdev->asic_prop.fw_security_status_valid) {
-               hdev->asic_prop.fw_app_security_map =
+       if (prop->fw_security_status_valid) {
+               prop->fw_app_security_map =
                                RREG32(cpu_security_boot_status_reg);
 
-               if (hdev->asic_prop.fw_app_security_map &
+               if (prop->fw_app_security_map &
                                CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
-                       hdev->asic_prop.hard_reset_done_by_fw = true;
+                       prop->hard_reset_done_by_fw = true;
        }
 
-       dev_dbg(hdev->dev, "Firmware hard-reset is %s\n",
-               hdev->asic_prop.hard_reset_done_by_fw ? "enabled" : "disabled");
+       dev_dbg(hdev->dev, "Firmware application CPU hard-reset is %s\n",
+                       prop->hard_reset_done_by_fw ? "enabled" : "disabled");
 
        dev_info(hdev->dev, "Successfully loaded firmware to device\n");
 
index 571eda6..e0d7f5f 100644 (file)
@@ -944,7 +944,7 @@ struct hl_asic_funcs {
        u32 (*get_signal_cb_size)(struct hl_device *hdev);
        u32 (*get_wait_cb_size)(struct hl_device *hdev);
        u32 (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id,
-                       u32 size);
+                       u32 size, bool eb);
        u32 (*gen_wait_cb)(struct hl_device *hdev,
                        struct hl_gen_wait_properties *prop);
        void (*reset_sob)(struct hl_device *hdev, void *data);
@@ -1000,6 +1000,7 @@ struct hl_va_range {
  * @queue_full_drop_cnt: dropped due to queue full
  * @device_in_reset_drop_cnt: dropped due to device in reset
  * @max_cs_in_flight_drop_cnt: dropped due to maximum CS in-flight
+ * @validation_drop_cnt: dropped due to error in validation
  */
 struct hl_cs_counters_atomic {
        atomic64_t out_of_mem_drop_cnt;
@@ -1007,6 +1008,7 @@ struct hl_cs_counters_atomic {
        atomic64_t queue_full_drop_cnt;
        atomic64_t device_in_reset_drop_cnt;
        atomic64_t max_cs_in_flight_drop_cnt;
+       atomic64_t validation_drop_cnt;
 };
 
 /**
index 6bbb6bc..032d114 100644 (file)
@@ -544,6 +544,7 @@ static struct pci_driver hl_pci_driver = {
        .id_table = ids,
        .probe = hl_pci_probe,
        .remove = hl_pci_remove,
+       .shutdown = hl_pci_remove,
        .driver.pm = &hl_pm_ops,
        .err_handler = &hl_pci_err_handler,
 };
index 32e6af1..12efbd9 100644 (file)
@@ -335,6 +335,8 @@ static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
                        atomic64_read(&cntr->device_in_reset_drop_cnt);
        cs_counters.total_max_cs_in_flight_drop_cnt =
                        atomic64_read(&cntr->max_cs_in_flight_drop_cnt);
+       cs_counters.total_validation_drop_cnt =
+                       atomic64_read(&cntr->validation_drop_cnt);
 
        if (hpriv->ctx) {
                cs_counters.ctx_out_of_mem_drop_cnt =
@@ -352,6 +354,9 @@ static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
                cs_counters.ctx_max_cs_in_flight_drop_cnt =
                                atomic64_read(
                        &hpriv->ctx->cs_counters.max_cs_in_flight_drop_cnt);
+               cs_counters.ctx_validation_drop_cnt =
+                               atomic64_read(
+                               &hpriv->ctx->cs_counters.validation_drop_cnt);
        }
 
        return copy_to_user(out, &cs_counters,
@@ -406,7 +411,7 @@ static int total_energy_consumption_info(struct hl_fpriv *hpriv,
 static int pll_frequency_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
 {
        struct hl_device *hdev = hpriv->hdev;
-       struct hl_pll_frequency_info freq_info = {0};
+       struct hl_pll_frequency_info freq_info = { {0} };
        u32 max_size = args->return_size;
        void __user *out = (void __user *) (uintptr_t) args->return_pointer;
        int rc;
index 7caf868..7621725 100644 (file)
@@ -418,8 +418,11 @@ static void init_signal_cs(struct hl_device *hdev,
                "generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n",
                cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx);
 
+       /* we set an EB since we must make sure all oeprations are done
+        * when sending the signal
+        */
        hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb,
-                               cs_cmpl->hw_sob->sob_id, 0);
+                               cs_cmpl->hw_sob->sob_id, 0, true);
 
        kref_get(&hw_sob->kref);
 
index 923b260..b4725e6 100644 (file)
@@ -130,10 +130,8 @@ static int hl_pci_elbi_write(struct hl_device *hdev, u64 addr, u32 data)
        if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE)
                return 0;
 
-       if (val & PCI_CONFIG_ELBI_STS_ERR) {
-               dev_err(hdev->dev, "Error writing to ELBI\n");
+       if (val & PCI_CONFIG_ELBI_STS_ERR)
                return -EIO;
-       }
 
        if (!(val & PCI_CONFIG_ELBI_STS_MASK)) {
                dev_err(hdev->dev, "ELBI write didn't finish in time\n");
@@ -160,8 +158,12 @@ int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data)
 
        dbi_offset = addr & 0xFFF;
 
-       rc = hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0x00300000);
-       rc |= hl_pci_elbi_write(hdev, prop->pcie_dbi_base_address + dbi_offset,
+       /* Ignore result of writing to pcie_aux_dbi_reg_addr as it could fail
+        * in case the firmware security is enabled
+        */
+       hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0x00300000);
+
+       rc = hl_pci_elbi_write(hdev, prop->pcie_dbi_base_address + dbi_offset,
                                data);
 
        if (rc)
@@ -244,9 +246,11 @@ int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
 
        rc |= hl_pci_iatu_write(hdev, offset + 0x4, ctrl_reg_val);
 
-       /* Return the DBI window to the default location */
-       rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
-       rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);
+       /* Return the DBI window to the default location
+        * Ignore result of writing to pcie_aux_dbi_reg_addr as it could fail
+        * in case the firmware security is enabled
+        */
+       hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
 
        if (rc)
                dev_err(hdev->dev, "failed to map bar %u to 0x%08llx\n",
@@ -294,9 +298,11 @@ int hl_pci_set_outbound_region(struct hl_device *hdev,
        /* Enable */
        rc |= hl_pci_iatu_write(hdev, 0x004, 0x80000000);
 
-       /* Return the DBI window to the default location */
-       rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
-       rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);
+       /* Return the DBI window to the default location
+        * Ignore result of writing to pcie_aux_dbi_reg_addr as it could fail
+        * in case the firmware security is enabled
+        */
+       hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
 
        return rc;
 }
index 1f19266..8c09e44 100644 (file)
@@ -151,19 +151,6 @@ static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
        [PACKET_LOAD_AND_EXE]   = sizeof(struct packet_load_and_exe)
 };
 
-static const u32 gaudi_pll_base_addresses[GAUDI_PLL_MAX] = {
-       [CPU_PLL] = mmPSOC_CPU_PLL_NR,
-       [PCI_PLL] = mmPSOC_PCI_PLL_NR,
-       [SRAM_PLL] = mmSRAM_W_PLL_NR,
-       [HBM_PLL] = mmPSOC_HBM_PLL_NR,
-       [NIC_PLL] = mmNIC0_PLL_NR,
-       [DMA_PLL] = mmDMA_W_PLL_NR,
-       [MESH_PLL] = mmMESH_W_PLL_NR,
-       [MME_PLL] = mmPSOC_MME_PLL_NR,
-       [TPC_PLL] = mmPSOC_TPC_PLL_NR,
-       [IF_PLL] = mmIF_W_PLL_NR
-};
-
 static inline bool validate_packet_id(enum packet_id id)
 {
        switch (id) {
@@ -374,7 +361,7 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev);
 static void gaudi_disable_clock_gating(struct hl_device *hdev);
 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
-                               u32 size);
+                               u32 size, bool eb);
 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
                                struct hl_gen_wait_properties *prop);
 
@@ -667,12 +654,6 @@ static int gaudi_early_init(struct hl_device *hdev)
        if (rc)
                goto free_queue_props;
 
-       if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
-               dev_info(hdev->dev,
-                       "H/W state is dirty, must reset before initializing\n");
-               hdev->asic_funcs->hw_fini(hdev, true);
-       }
-
        /* Before continuing in the initialization, we need to read the preboot
         * version to determine whether we run with a security-enabled firmware
         */
@@ -685,6 +666,12 @@ static int gaudi_early_init(struct hl_device *hdev)
                goto pci_fini;
        }
 
+       if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
+               dev_info(hdev->dev,
+                       "H/W state is dirty, must reset before initializing\n");
+               hdev->asic_funcs->hw_fini(hdev, true);
+       }
+
        return 0;
 
 pci_fini:
@@ -703,93 +690,60 @@ static int gaudi_early_fini(struct hl_device *hdev)
 }
 
 /**
- * gaudi_fetch_pll_frequency - Fetch PLL frequency values
+ * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
  *
  * @hdev: pointer to hl_device structure
- * @pll_index: index of the pll to fetch frequency from
- * @pll_freq: pointer to store the pll frequency in MHz in each of the available
- *            outputs. if a certain output is not available a 0 will be set
  *
  */
-static int gaudi_fetch_pll_frequency(struct hl_device *hdev,
-                               enum gaudi_pll_index pll_index,
-                               u16 *pll_freq_arr)
+static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
 {
-       u32 nr = 0, nf = 0, od = 0, pll_clk = 0, div_fctr, div_sel,
-                       pll_base_addr = gaudi_pll_base_addresses[pll_index];
-       u16 freq = 0;
-       int i, rc;
-
-       if (hdev->asic_prop.fw_security_status_valid &&
-                       (hdev->asic_prop.fw_app_security_map &
-                                       CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
-               rc = hl_fw_cpucp_pll_info_get(hdev, pll_index, pll_freq_arr);
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+       u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
+       u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
+       int rc;
 
-               if (rc)
-                       return rc;
-       } else if (hdev->asic_prop.fw_security_disabled) {
+       if (hdev->asic_prop.fw_security_disabled) {
                /* Backward compatibility */
-               nr = RREG32(pll_base_addr + PLL_NR_OFFSET);
-               nf = RREG32(pll_base_addr + PLL_NF_OFFSET);
-               od = RREG32(pll_base_addr + PLL_OD_OFFSET);
-
-               for (i = 0; i < HL_PLL_NUM_OUTPUTS; i++) {
-                       div_fctr = RREG32(pll_base_addr +
-                                       PLL_DIV_FACTOR_0_OFFSET + i * 4);
-                       div_sel = RREG32(pll_base_addr +
-                                       PLL_DIV_SEL_0_OFFSET + i * 4);
+               div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
+               div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
+               nr = RREG32(mmPSOC_CPU_PLL_NR);
+               nf = RREG32(mmPSOC_CPU_PLL_NF);
+               od = RREG32(mmPSOC_CPU_PLL_OD);
 
-                       if (div_sel == DIV_SEL_REF_CLK ||
+               if (div_sel == DIV_SEL_REF_CLK ||
                                div_sel == DIV_SEL_DIVIDED_REF) {
-                               if (div_sel == DIV_SEL_REF_CLK)
-                                       freq = PLL_REF_CLK;
-                               else
-                                       freq = PLL_REF_CLK / (div_fctr + 1);
-                       } else if (div_sel == DIV_SEL_PLL_CLK ||
-                                       div_sel == DIV_SEL_DIVIDED_PLL) {
-                               pll_clk = PLL_REF_CLK * (nf + 1) /
-                                               ((nr + 1) * (od + 1));
-                               if (div_sel == DIV_SEL_PLL_CLK)
-                                       freq = pll_clk;
-                               else
-                                       freq = pll_clk / (div_fctr + 1);
-                       } else {
-                               dev_warn(hdev->dev,
-                                       "Received invalid div select value: %d",
-                                       div_sel);
-                       }
-
-                       pll_freq_arr[i] = freq;
+                       if (div_sel == DIV_SEL_REF_CLK)
+                               freq = PLL_REF_CLK;
+                       else
+                               freq = PLL_REF_CLK / (div_fctr + 1);
+               } else if (div_sel == DIV_SEL_PLL_CLK ||
+                       div_sel == DIV_SEL_DIVIDED_PLL) {
+                       pll_clk = PLL_REF_CLK * (nf + 1) /
+                                       ((nr + 1) * (od + 1));
+                       if (div_sel == DIV_SEL_PLL_CLK)
+                               freq = pll_clk;
+                       else
+                               freq = pll_clk / (div_fctr + 1);
+               } else {
+                       dev_warn(hdev->dev,
+                               "Received invalid div select value: %d",
+                               div_sel);
+                       freq = 0;
                }
        } else {
-               dev_err(hdev->dev, "Failed to fetch PLL frequency values\n");
-               return -EIO;
-       }
+               rc = hl_fw_cpucp_pll_info_get(hdev, CPU_PLL, pll_freq_arr);
 
-       return 0;
-}
-
-/**
- * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
- *
- * @hdev: pointer to hl_device structure
- *
- */
-static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
-{
-       struct asic_fixed_properties *prop = &hdev->asic_prop;
-       u16 pll_freq[HL_PLL_NUM_OUTPUTS];
-       int rc;
+               if (rc)
+                       return rc;
 
-       rc = gaudi_fetch_pll_frequency(hdev, CPU_PLL, pll_freq);
-       if (rc)
-               return rc;
+               freq = pll_freq_arr[2];
+       }
 
-       prop->psoc_timestamp_frequency = pll_freq[2];
-       prop->psoc_pci_pll_nr = 0;
-       prop->psoc_pci_pll_nf = 0;
-       prop->psoc_pci_pll_od = 0;
-       prop->psoc_pci_pll_div_factor = 0;
+       prop->psoc_timestamp_frequency = freq;
+       prop->psoc_pci_pll_nr = nr;
+       prop->psoc_pci_pll_nf = nf;
+       prop->psoc_pci_pll_od = od;
+       prop->psoc_pci_pll_div_factor = div_fctr;
 
        return 0;
 }
@@ -884,11 +838,17 @@ static int gaudi_init_tpc_mem(struct hl_device *hdev)
        size_t fw_size;
        void *cpu_addr;
        dma_addr_t dma_handle;
-       int rc;
+       int rc, count = 5;
 
+again:
        rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
+       if (rc == -EINTR && count-- > 0) {
+               msleep(50);
+               goto again;
+       }
+
        if (rc) {
-               dev_err(hdev->dev, "Firmware file %s is not found!\n",
+               dev_err(hdev->dev, "Failed to load firmware file %s\n",
                                GAUDI_TPC_FW_FILE);
                goto out;
        }
@@ -1110,7 +1070,7 @@ static void gaudi_collective_slave_init_job(struct hl_device *hdev,
                prop->collective_sob_id, queue_id);
 
        cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
-                       prop->collective_sob_id, cb_size);
+                       prop->collective_sob_id, cb_size, false);
 }
 
 static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
@@ -2449,8 +2409,6 @@ static void gaudi_init_golden_registers(struct hl_device *hdev)
        gaudi_init_e2e(hdev);
        gaudi_init_hbm_cred(hdev);
 
-       hdev->asic_funcs->disable_clock_gating(hdev);
-
        for (tpc_id = 0, tpc_offset = 0;
                                tpc_id < TPC_NUMBER_OF_ENGINES;
                                tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
@@ -3462,6 +3420,9 @@ static void gaudi_set_clock_gating(struct hl_device *hdev)
        if (hdev->in_debug)
                return;
 
+       if (!hdev->asic_prop.fw_security_disabled)
+               return;
+
        for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
                enable = !!(hdev->clock_gating_mask &
                                (BIT_ULL(gaudi_dma_assignment[i])));
@@ -3513,7 +3474,7 @@ static void gaudi_disable_clock_gating(struct hl_device *hdev)
        u32 qman_offset;
        int i;
 
-       if (!(gaudi->hw_cap_initialized & HW_CAP_CLK_GATE))
+       if (!hdev->asic_prop.fw_security_disabled)
                return;
 
        for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
@@ -3806,7 +3767,7 @@ static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
 static void gaudi_pre_hw_init(struct hl_device *hdev)
 {
        /* Perform read from the device to make sure device is up */
-       RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
+       RREG32(mmHW_STATE);
 
        if (hdev->asic_prop.fw_security_disabled) {
                /* Set the access through PCI bars (Linux driver only) as
@@ -3847,6 +3808,13 @@ static int gaudi_hw_init(struct hl_device *hdev)
                return rc;
        }
 
+       /* In case the clock gating was enabled in preboot we need to disable
+        * it here before touching the MME/TPC registers.
+        * There is no need to take clk gating mutex because when this function
+        * runs, no other relevant code can run
+        */
+       hdev->asic_funcs->disable_clock_gating(hdev);
+
        /* SRAM scrambler must be initialized after CPU is running from HBM */
        gaudi_init_scrambler_sram(hdev);
 
@@ -3885,7 +3853,7 @@ static int gaudi_hw_init(struct hl_device *hdev)
        }
 
        /* Perform read from the device to flush all configuration */
-       RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
+       RREG32(mmHW_STATE);
 
        return 0;
 
@@ -3927,7 +3895,10 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
        /* I don't know what is the state of the CPU so make sure it is
         * stopped in any means necessary
         */
-       WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
+       if (hdev->asic_prop.hard_reset_done_by_fw)
+               WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_RST_DEV);
+       else
+               WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
 
        WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_HALT_MACHINE);
 
@@ -3971,11 +3942,15 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
 
                WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
                        1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
-       }
 
-       dev_info(hdev->dev,
-               "Issued HARD reset command, going to wait %dms\n",
-               reset_timeout_ms);
+               dev_info(hdev->dev,
+                       "Issued HARD reset command, going to wait %dms\n",
+                       reset_timeout_ms);
+       } else {
+               dev_info(hdev->dev,
+                       "Firmware performs HARD reset, going to wait %dms\n",
+                       reset_timeout_ms);
+       }
 
        /*
         * After hard reset, we can't poll the BTM_FSM register because the PSOC
@@ -7936,7 +7911,7 @@ static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
 }
 
 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
-                               u32 size)
+                               u32 size, bool eb)
 {
        struct hl_cb *cb = (struct hl_cb *) data;
        struct packet_msg_short *pkt;
@@ -7953,7 +7928,7 @@ static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
-       ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 1);
+       ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, eb);
        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
 
index f2d91f4..a7ab2d7 100644 (file)
 #define MME_ACC_OFFSET         (mmMME1_ACC_BASE - mmMME0_ACC_BASE)
 #define SRAM_BANK_OFFSET       (mmSRAM_Y0_X1_RTR_BASE - mmSRAM_Y0_X0_RTR_BASE)
 
-#define PLL_NR_OFFSET          0
-#define PLL_NF_OFFSET          (mmPSOC_CPU_PLL_NF - mmPSOC_CPU_PLL_NR)
-#define PLL_OD_OFFSET          (mmPSOC_CPU_PLL_OD - mmPSOC_CPU_PLL_NR)
-#define PLL_DIV_FACTOR_0_OFFSET        (mmPSOC_CPU_PLL_DIV_FACTOR_0 - \
-                               mmPSOC_CPU_PLL_NR)
-#define PLL_DIV_SEL_0_OFFSET   (mmPSOC_CPU_PLL_DIV_SEL_0 - mmPSOC_CPU_PLL_NR)
-
 #define NUM_OF_SOB_IN_BLOCK            \
        (((mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_2047 - \
        mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0) + 4) >> 2)
index 2e3612e..88a09d4 100644 (file)
@@ -9,6 +9,7 @@
 #include "../include/gaudi/gaudi_coresight.h"
 #include "../include/gaudi/asic_reg/gaudi_regs.h"
 #include "../include/gaudi/gaudi_masks.h"
+#include "../include/gaudi/gaudi_reg_map.h"
 
 #include <uapi/misc/habanalabs.h>
 #define SPMU_SECTION_SIZE              MME0_ACC_SPMU_MAX_OFFSET
@@ -874,7 +875,7 @@ int gaudi_debug_coresight(struct hl_device *hdev, void *data)
        }
 
        /* Perform read from the device to flush all configuration */
-       RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
+       RREG32(mmHW_STATE);
 
        return rc;
 }
index 3e5eb9e..b8b4aa6 100644 (file)
@@ -613,12 +613,6 @@ static int goya_early_init(struct hl_device *hdev)
        if (rc)
                goto free_queue_props;
 
-       if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
-               dev_info(hdev->dev,
-                       "H/W state is dirty, must reset before initializing\n");
-               hdev->asic_funcs->hw_fini(hdev, true);
-       }
-
        /* Before continuing in the initialization, we need to read the preboot
         * version to determine whether we run with a security-enabled firmware
         */
@@ -631,6 +625,12 @@ static int goya_early_init(struct hl_device *hdev)
                goto pci_fini;
        }
 
+       if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
+               dev_info(hdev->dev,
+                       "H/W state is dirty, must reset before initializing\n");
+               hdev->asic_funcs->hw_fini(hdev, true);
+       }
+
        if (!hdev->pldm) {
                val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
                if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK)
@@ -694,32 +694,47 @@ static void goya_qman0_set_security(struct hl_device *hdev, bool secure)
 static void goya_fetch_psoc_frequency(struct hl_device *hdev)
 {
        struct asic_fixed_properties *prop = &hdev->asic_prop;
-       u32 trace_freq = 0;
-       u32 pll_clk = 0;
-       u32 div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
-       u32 div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1);
-       u32 nr = RREG32(mmPSOC_PCI_PLL_NR);
-       u32 nf = RREG32(mmPSOC_PCI_PLL_NF);
-       u32 od = RREG32(mmPSOC_PCI_PLL_OD);
-
-       if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) {
-               if (div_sel == DIV_SEL_REF_CLK)
-                       trace_freq = PLL_REF_CLK;
-               else
-                       trace_freq = PLL_REF_CLK / (div_fctr + 1);
-       } else if (div_sel == DIV_SEL_PLL_CLK ||
-                                       div_sel == DIV_SEL_DIVIDED_PLL) {
-               pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1));
-               if (div_sel == DIV_SEL_PLL_CLK)
-                       trace_freq = pll_clk;
-               else
-                       trace_freq = pll_clk / (div_fctr + 1);
+       u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
+       u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
+       int rc;
+
+       if (hdev->asic_prop.fw_security_disabled) {
+               div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
+               div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1);
+               nr = RREG32(mmPSOC_PCI_PLL_NR);
+               nf = RREG32(mmPSOC_PCI_PLL_NF);
+               od = RREG32(mmPSOC_PCI_PLL_OD);
+
+               if (div_sel == DIV_SEL_REF_CLK ||
+                               div_sel == DIV_SEL_DIVIDED_REF) {
+                       if (div_sel == DIV_SEL_REF_CLK)
+                               freq = PLL_REF_CLK;
+                       else
+                               freq = PLL_REF_CLK / (div_fctr + 1);
+               } else if (div_sel == DIV_SEL_PLL_CLK ||
+                               div_sel == DIV_SEL_DIVIDED_PLL) {
+                       pll_clk = PLL_REF_CLK * (nf + 1) /
+                                       ((nr + 1) * (od + 1));
+                       if (div_sel == DIV_SEL_PLL_CLK)
+                               freq = pll_clk;
+                       else
+                               freq = pll_clk / (div_fctr + 1);
+               } else {
+                       dev_warn(hdev->dev,
+                               "Received invalid div select value: %d",
+                               div_sel);
+                       freq = 0;
+               }
        } else {
-               dev_warn(hdev->dev,
-                       "Received invalid div select value: %d", div_sel);
+               rc = hl_fw_cpucp_pll_info_get(hdev, PCI_PLL, pll_freq_arr);
+
+               if (rc)
+                       return;
+
+               freq = pll_freq_arr[1];
        }
 
-       prop->psoc_timestamp_frequency = trace_freq;
+       prop->psoc_timestamp_frequency = freq;
        prop->psoc_pci_pll_nr = nr;
        prop->psoc_pci_pll_nf = nf;
        prop->psoc_pci_pll_od = od;
@@ -5324,7 +5339,7 @@ static u32 goya_get_wait_cb_size(struct hl_device *hdev)
 }
 
 static u32 goya_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
-               u32 size)
+                               u32 size, bool eb)
 {
        return 0;
 }
index e5801ec..b637dfd 100644 (file)
  *                                     implemented. This means that FW will
  *                                     perform hard reset procedure on
  *                                     receiving the halt-machine event.
- *                                     Initialized in: linux
+ *                                     Initialized in: preboot, u-boot, linux
  *
  * CPU_BOOT_DEV_STS0_PLL_INFO_EN       FW retrieval of PLL info is enabled.
  *                                     Initialized in: linux
  *
+ * CPU_BOOT_DEV_STS0_CLK_GATE_EN       Clock Gating enabled.
+ *                                     FW initialized Clock Gating.
+ *                                     Initialized in: preboot
+ *
  * CPU_BOOT_DEV_STS0_ENABLED           Device status register enabled.
  *                                     This is a main indication that the
  *                                     running FW populates the device status
 #define CPU_BOOT_DEV_STS0_DRAM_SCR_EN                  (1 << 9)
 #define CPU_BOOT_DEV_STS0_FW_HARD_RST_EN               (1 << 10)
 #define CPU_BOOT_DEV_STS0_PLL_INFO_EN                  (1 << 11)
+#define CPU_BOOT_DEV_STS0_CLK_GATE_EN                  (1 << 13)
 #define CPU_BOOT_DEV_STS0_ENABLED                      (1 << 31)
 
 enum cpu_boot_status {
@@ -204,6 +209,8 @@ enum kmd_msg {
        KMD_MSG_GOTO_WFE,
        KMD_MSG_FIT_RDY,
        KMD_MSG_SKIP_BMC,
+       RESERVED,
+       KMD_MSG_RST_DEV,
 };
 
 enum cpu_msg_status {
index 8c15a7d..dba3827 100644 (file)
@@ -279,6 +279,7 @@ enum hl_device_status {
  * HL_INFO_CLK_THROTTLE_REASON - Retrieve clock throttling reason
  * HL_INFO_SYNC_MANAGER  - Retrieve sync manager info per dcore
  * HL_INFO_TOTAL_ENERGY  - Retrieve total energy consumption
+ * HL_INFO_PLL_FREQUENCY - Retrieve PLL frequency
  */
 #define HL_INFO_HW_IP_INFO             0
 #define HL_INFO_HW_EVENTS              1
@@ -425,6 +426,8 @@ struct hl_info_sync_manager {
  * @ctx_device_in_reset_drop_cnt: context dropped due to device in reset
  * @total_max_cs_in_flight_drop_cnt: total dropped due to maximum CS in-flight
  * @ctx_max_cs_in_flight_drop_cnt: context dropped due to maximum CS in-flight
+ * @total_validation_drop_cnt: total dropped due to validation error
+ * @ctx_validation_drop_cnt: context dropped due to validation error
  */
 struct hl_info_cs_counters {
        __u64 total_out_of_mem_drop_cnt;
@@ -437,6 +440,8 @@ struct hl_info_cs_counters {
        __u64 ctx_device_in_reset_drop_cnt;
        __u64 total_max_cs_in_flight_drop_cnt;
        __u64 ctx_max_cs_in_flight_drop_cnt;
+       __u64 total_validation_drop_cnt;
+       __u64 ctx_validation_drop_cnt;
 };
 
 enum gaudi_dcores {