drm/amd/display: add a debugfs interface for the DMUB trace mask
authorHamza Mahfooz <hamza.mahfooz@amd.com>
Thu, 9 Nov 2023 21:42:32 +0000 (16:42 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 17 Nov 2023 14:30:49 +0000 (09:30 -0500)
For features that are implemented primarily in DMUB (e.g. PSR), it is
useful to be able to trace them at a DMUB level from the kernel,
especially when debugging issues. So, introduce a debugfs interface that
is able to read and set the DMUB trace mask dynamically at runtime and
document how to use it.

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Mario Limonciello <mario.limonciello@amd.com>
Reviewed-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Documentation/gpu/amdgpu/display/dc-debug.rst
Documentation/gpu/amdgpu/display/trace-groups-table.csv [new file with mode: 0644]
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h

index 40c55a6..817631b 100644 (file)
@@ -75,3 +75,44 @@ change in real-time by using something like::
 
 When reporting a bug related to DC, consider attaching this log before and
 after you reproduce the bug.
+
+DMUB Firmware Debug
+===================
+
+Sometimes, dmesg logs aren't enough. This is especially true if a feature is
+implemented primarily in DMUB firmware. In such cases, all we see in dmesg when
+an issue arises is some generic timeout error. So, to get more relevant
+information, we can trace DMUB commands by enabling the relevant bits in
+`amdgpu_dm_dmub_trace_mask`.
+
+Currently, we support the tracing of the following groups:
+
+Trace Groups
+------------
+
+.. csv-table::
+   :header-rows: 1
+   :widths: 1, 1
+   :file: ./trace-groups-table.csv
+
+**Note: Not all ASICs support all of the listed trace groups**
+
+So, to enable just PSR tracing you can use the following command::
+
+  # echo 0x8020 > /sys/kernel/debug/dri/0/amdgpu_dm_dmub_trace_mask
+
+Then, you need to enable logging trace events to the buffer, which you can do
+using the following::
+
+  # echo 1 > /sys/kernel/debug/dri/0/amdgpu_dm_dmcub_trace_event_en
+
+Lastly, after you are able to reproduce the issue you are trying to debug,
+you can disable tracing and read the trace log by using the following::
+
+  # echo 0 > /sys/kernel/debug/dri/0/amdgpu_dm_dmcub_trace_event_en
+  # cat /sys/kernel/debug/dri/0/amdgpu_dm_dmub_tracebuffer
+
+So, when reporting bugs related to features such as PSR and ABM, consider
+enabling the relevant bits in the mask before reproducing the issue and
+attach the log that you obtain from the trace buffer in any bug reports that you
+create.
diff --git a/Documentation/gpu/amdgpu/display/trace-groups-table.csv b/Documentation/gpu/amdgpu/display/trace-groups-table.csv
new file mode 100644 (file)
index 0000000..3f6a50d
--- /dev/null
@@ -0,0 +1,29 @@
+Name, Mask Value
+INFO, 0x1
+IRQ SVC, 0x2
+VBIOS, 0x4
+REGISTER, 0x8
+PHY DBG, 0x10
+PSR, 0x20
+AUX, 0x40
+SMU, 0x80
+MALL, 0x100
+ABM, 0x200
+ALPM, 0x400
+TIMER, 0x800
+HW LOCK MGR, 0x1000
+INBOX1, 0x2000
+PHY SEQ, 0x4000
+PSR STATE, 0x8000
+ZSTATE, 0x10000
+TRANSMITTER CTL, 0x20000
+PANEL CNTL, 0x40000
+FAMS, 0x80000
+DPIA, 0x100000
+SUBVP, 0x200000
+INBOX0, 0x400000
+SDP, 0x4000000
+REPLAY, 0x8000000
+REPLAY RESIDENCY, 0x20000000
+CURSOR INFO, 0x80000000
+IPS, 0x100000000
index 45c972f..98b41ec 100644 (file)
@@ -2971,6 +2971,104 @@ static int allow_edp_hotplug_detection_set(void *data, u64 val)
        return 0;
 }
 
+static int dmub_trace_mask_set(void *data, u64 val)
+{
+       struct amdgpu_device *adev = data;
+       struct dmub_srv *srv = adev->dm.dc->ctx->dmub_srv->dmub;
+       enum dmub_gpint_command cmd;
+       enum dmub_status status;
+       u64 mask = 0xffff;
+       u8 shift = 0;
+       u32 res;
+       int i;
+
+       if (!srv->fw_version)
+               return -EINVAL;
+
+       for (i = 0;  i < 4; i++) {
+               res = (val & mask) >> shift;
+
+               switch (i) {
+               case 0:
+                       cmd = DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD0;
+                       break;
+               case 1:
+                       cmd = DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD1;
+                       break;
+               case 2:
+                       cmd = DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD2;
+                       break;
+               case 3:
+                       cmd = DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD3;
+                       break;
+               }
+
+               status = dmub_srv_send_gpint_command(srv, cmd, res, 30);
+
+               if (status == DMUB_STATUS_TIMEOUT)
+                       return -ETIMEDOUT;
+               else if (status == DMUB_STATUS_INVALID)
+                       return -EINVAL;
+               else if (status != DMUB_STATUS_OK)
+                       return -EIO;
+
+               usleep_range(100, 1000);
+
+               mask <<= 16;
+               shift += 16;
+       }
+
+       return 0;
+}
+
+static int dmub_trace_mask_show(void *data, u64 *val)
+{
+       enum dmub_gpint_command cmd = DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD0;
+       struct amdgpu_device *adev = data;
+       struct dmub_srv *srv = adev->dm.dc->ctx->dmub_srv->dmub;
+       enum dmub_status status;
+       u8 shift = 0;
+       u64 raw = 0;
+       u64 res = 0;
+       int i = 0;
+
+       if (!srv->fw_version)
+               return -EINVAL;
+
+       while (i < 4) {
+               status = dmub_srv_send_gpint_command(srv, cmd, 0, 30);
+
+               if (status == DMUB_STATUS_OK) {
+                       status = dmub_srv_get_gpint_response(srv, (u32 *) &raw);
+
+                       if (status == DMUB_STATUS_INVALID)
+                               return -EINVAL;
+                       else if (status != DMUB_STATUS_OK)
+                               return -EIO;
+               } else if (status == DMUB_STATUS_TIMEOUT) {
+                       return -ETIMEDOUT;
+               } else if (status == DMUB_STATUS_INVALID) {
+                       return -EINVAL;
+               } else {
+                       return -EIO;
+               }
+
+               usleep_range(100, 1000);
+
+               cmd++;
+               res |= (raw << shift);
+               shift += 16;
+               i++;
+       }
+
+       *val = res;
+
+       return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(dmub_trace_mask_fops, dmub_trace_mask_show,
+                        dmub_trace_mask_set, "0x%llx\n");
+
 /*
  * Set dmcub trace event IRQ enable or disable.
  * Usage to enable dmcub trace event IRQ: echo 1 > /sys/kernel/debug/dri/0/amdgpu_dm_dmcub_trace_event_en
@@ -3884,6 +3982,9 @@ void dtn_debugfs_init(struct amdgpu_device *adev)
        debugfs_create_file_unsafe("amdgpu_dm_force_timing_sync", 0644, root,
                                   adev, &force_timing_sync_ops);
 
+       debugfs_create_file_unsafe("amdgpu_dm_dmub_trace_mask", 0644, root,
+                                  adev, &dmub_trace_mask_fops);
+
        debugfs_create_file_unsafe("amdgpu_dm_dmcub_trace_event_en", 0644, root,
                                   adev, &dmcub_trace_event_state_fops);
 
index ed4379c..aa6e692 100644 (file)
@@ -818,18 +818,54 @@ enum dmub_gpint_command {
         * RETURN: Lower 32-bit mask.
         */
        DMUB_GPINT__UPDATE_TRACE_BUFFER_MASK = 101,
+
        /**
-        * DESC: Updates the trace buffer lower 32-bit mask.
+        * DESC: Updates the trace buffer mask bit0~bit15.
         * ARGS: The new mask
         * RETURN: Lower 32-bit mask.
         */
        DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD0 = 102,
+
        /**
-        * DESC: Updates the trace buffer mask bi0~bit15.
+        * DESC: Updates the trace buffer mask bit16~bit31.
         * ARGS: The new mask
         * RETURN: Lower 32-bit mask.
         */
        DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD1 = 103,
+
+       /**
+        * DESC: Updates the trace buffer mask bit32~bit47.
+        * ARGS: The new mask
+        * RETURN: Lower 32-bit mask.
+        */
+       DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD2 = 114,
+
+       /**
+        * DESC: Updates the trace buffer mask bit48~bit63.
+        * ARGS: The new mask
+        * RETURN: Lower 32-bit mask.
+        */
+       DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD3 = 115,
+
+       /**
+        * DESC: Read the trace buffer mask bi0~bit15.
+        */
+       DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD0 = 116,
+
+       /**
+        * DESC: Read the trace buffer mask bit16~bit31.
+        */
+       DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD1 = 117,
+
+       /**
+        * DESC: Read the trace buffer mask bi32~bit47.
+        */
+       DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD2 = 118,
+
+       /**
+        * DESC: Updates the trace buffer mask bit32~bit63.
+        */
+       DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD3 = 119,
 };
 
 /**