lib/list_debug.c: Detect uninitialized lists
authorGuenter Roeck <linux@roeck-us.net>
Tue, 31 May 2022 22:29:51 +0000 (15:29 -0700)
committerakpm <akpm@linux-foundation.org>
Fri, 17 Jun 2022 02:58:20 +0000 (19:58 -0700)
In some circumstances, attempts are made to add entries to or to remove
entries from an uninitialized list.  A prime example is
amdgpu_bo_vm_destroy(): It is indirectly called from
ttm_bo_init_reserved() if that function fails, and tries to remove an
entry from a list.  However, that list is only initialized in
amdgpu_bo_create_vm() after the call to ttm_bo_init_reserved() returned
success.  This results in crashes such as

 BUG: kernel NULL pointer dereference, address: 0000000000000000
 #PF: supervisor read access in kernel mode
 #PF: error_code(0x0000) - not-present page
 PGD 0 P4D 0
 Oops: 0000 [#1] PREEMPT SMP NOPTI
 CPU: 1 PID: 1479 Comm: chrome Not tainted 5.10.110-15768-g29a72e65dae5
 Hardware name: Google Grunt/Grunt, BIOS Google_Grunt.11031.149.0 07/15/2020
 RIP: 0010:__list_del_entry_valid+0x26/0x7d
 ...
 Call Trace:
  amdgpu_bo_vm_destroy+0x48/0x8b
  ttm_bo_init_reserved+0x1d7/0x1e0
  amdgpu_bo_create+0x212/0x476
  ? amdgpu_bo_user_destroy+0x23/0x23
  ? kmem_cache_alloc+0x60/0x271
  amdgpu_bo_create_vm+0x40/0x7d
  amdgpu_vm_pt_create+0xe8/0x24b
 ...

Check if the list's prev and next pointers are NULL to catch such problems.

Link: https://lkml.kernel.org/r/20220531222951.92073-1-linux@roeck-us.net
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
lib/list_debug.c

index 9daa3fb..d98d43f 100644 (file)
 bool __list_add_valid(struct list_head *new, struct list_head *prev,
                      struct list_head *next)
 {
-       if (CHECK_DATA_CORRUPTION(next->prev != prev,
+       if (CHECK_DATA_CORRUPTION(prev == NULL,
+                       "list_add corruption. prev is NULL.\n") ||
+           CHECK_DATA_CORRUPTION(next == NULL,
+                       "list_add corruption. next is NULL.\n") ||
+           CHECK_DATA_CORRUPTION(next->prev != prev,
                        "list_add corruption. next->prev should be prev (%px), but was %px. (next=%px).\n",
                        prev, next->prev, next) ||
            CHECK_DATA_CORRUPTION(prev->next != next,
@@ -42,7 +46,11 @@ bool __list_del_entry_valid(struct list_head *entry)
        prev = entry->prev;
        next = entry->next;
 
-       if (CHECK_DATA_CORRUPTION(next == LIST_POISON1,
+       if (CHECK_DATA_CORRUPTION(next == NULL,
+                       "list_del corruption, %px->next is NULL\n", entry) ||
+           CHECK_DATA_CORRUPTION(prev == NULL,
+                       "list_del corruption, %px->prev is NULL\n", entry) ||
+           CHECK_DATA_CORRUPTION(next == LIST_POISON1,
                        "list_del corruption, %px->next is LIST_POISON1 (%px)\n",
                        entry, LIST_POISON1) ||
            CHECK_DATA_CORRUPTION(prev == LIST_POISON2,