lib/list_debug.c: add object information in case of invalid object
authorManinder Singh <maninder1.s@samsung.com>
Mon, 30 Dec 2024 10:10:43 +0000 (15:40 +0530)
committerAndrew Morton <akpm@linux-foundation.org>
Sun, 26 Jan 2025 04:22:23 +0000 (20:22 -0800)
As of now during link list corruption it prints about cluprit address and
its wrong value, but sometime it is not enough to catch the actual issue
point.

If it prints allocation and free path of that corrupted node, it will be a
lot easier to find and fix the issues.

Adding the same information when data mismatch is found in link list
debug data:

[   14.243055]  slab kmalloc-32 start ffff0000cda19320 data offset 32 pointer offset 8 size 32 allocated at add_to_list+0x28/0xb0
[   14.245259]     __kmalloc_cache_noprof+0x1c4/0x358
[   14.245572]     add_to_list+0x28/0xb0
...
[   14.248632]     do_el0_svc_compat+0x1c/0x34
[   14.249018]     el0_svc_compat+0x2c/0x80
[   14.249244]  Free path:
[   14.249410]     kfree+0x24c/0x2f0
[   14.249724]     do_force_corruption+0xbc/0x100
...
[   14.252266]     el0_svc_common.constprop.0+0x40/0xe0
[   14.252540]     do_el0_svc_compat+0x1c/0x34
[   14.252763]     el0_svc_compat+0x2c/0x80
[   14.253071] ------------[ cut here ]------------
[   14.253303] list_del corruption. next->prev should be ffff0000cda192a8, but was 6b6b6b6b6b6b6b6b. (next=ffff0000cda19348)
[   14.254255] WARNING: CPU: 3 PID: 84 at lib/list_debug.c:65 __list_del_entry_valid_or_report+0x158/0x164

Moved prototype of mem_dump_obj() to bug.h, as mm.h can not be included in
bug.h.

Link: https://lkml.kernel.org/r/20241230101043.53773-1-maninder1.s@samsung.com
Signed-off-by: Maninder Singh <maninder1.s@samsung.com>
Acked-by: Jan Kara <jack@suse.cz>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Marco Elver <elver@google.com>
Cc: Rohit Thapliyal <r.thapliyal@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
fs/open.c
fs/super.c
include/linux/bug.h
include/linux/mm.h
lib/list_debug.c

index e691110..0f75e22 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -1504,7 +1504,7 @@ static int filp_flush(struct file *filp, fl_owner_t id)
 {
        int retval = 0;
 
-       if (CHECK_DATA_CORRUPTION(file_count(filp) == 0,
+       if (CHECK_DATA_CORRUPTION(file_count(filp) == 0, filp,
                        "VFS: Close: file count is 0 (f_op=%ps)",
                        filp->f_op)) {
                return 0;
index c9c7223..5a7db4a 100644 (file)
@@ -647,7 +647,7 @@ void generic_shutdown_super(struct super_block *sb)
                 */
                fscrypt_destroy_keyring(sb);
 
-               if (CHECK_DATA_CORRUPTION(!list_empty(&sb->s_inodes),
+               if (CHECK_DATA_CORRUPTION(!list_empty(&sb->s_inodes), NULL,
                                "VFS: Busy inodes after unmount of %s (%s)",
                                sb->s_id, sb->s_type->name)) {
                        /*
index 348acf2..a9948a9 100644 (file)
@@ -73,15 +73,23 @@ static inline void generic_bug_clear_once(void) {}
 
 #endif /* CONFIG_GENERIC_BUG */
 
+#ifdef CONFIG_PRINTK
+void mem_dump_obj(void *object);
+#else
+static inline void mem_dump_obj(void *object) {}
+#endif
+
 /*
  * Since detected data corruption should stop operation on the affected
  * structures. Return value must be checked and sanely acted on by caller.
  */
 static inline __must_check bool check_data_corruption(bool v) { return v; }
-#define CHECK_DATA_CORRUPTION(condition, fmt, ...)                      \
+#define CHECK_DATA_CORRUPTION(condition, addr, fmt, ...)                \
        check_data_corruption(({                                         \
                bool corruption = unlikely(condition);                   \
                if (corruption) {                                        \
+                       if (addr)                                        \
+                               mem_dump_obj(addr);                      \
                        if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) { \
                                pr_err(fmt, ##__VA_ARGS__);              \
                                BUG();                                   \
index 15a903d..c550912 100644 (file)
@@ -4084,12 +4084,6 @@ unsigned long wp_shared_mapping_range(struct address_space *mapping,
 
 extern int sysctl_nr_trim_pages;
 
-#ifdef CONFIG_PRINTK
-void mem_dump_obj(void *object);
-#else
-static inline void mem_dump_obj(void *object) {}
-#endif
-
 #ifdef CONFIG_ANON_VMA_NAME
 int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
                          unsigned long len_in,
index db60241..ee7eeeb 100644 (file)
@@ -22,17 +22,17 @@ __list_valid_slowpath
 bool __list_add_valid_or_report(struct list_head *new, struct list_head *prev,
                                struct list_head *next)
 {
-       if (CHECK_DATA_CORRUPTION(prev == NULL,
+       if (CHECK_DATA_CORRUPTION(prev == NULL, NULL,
                        "list_add corruption. prev is NULL.\n") ||
-           CHECK_DATA_CORRUPTION(next == NULL,
+           CHECK_DATA_CORRUPTION(next == NULL, NULL,
                        "list_add corruption. next is NULL.\n") ||
-           CHECK_DATA_CORRUPTION(next->prev != prev,
+           CHECK_DATA_CORRUPTION(next->prev != prev, next,
                        "list_add corruption. next->prev should be prev (%px), but was %px. (next=%px).\n",
                        prev, next->prev, next) ||
-           CHECK_DATA_CORRUPTION(prev->next != next,
+           CHECK_DATA_CORRUPTION(prev->next != next, prev,
                        "list_add corruption. prev->next should be next (%px), but was %px. (prev=%px).\n",
                        next, prev->next, prev) ||
-           CHECK_DATA_CORRUPTION(new == prev || new == next,
+           CHECK_DATA_CORRUPTION(new == prev || new == next, NULL,
                        "list_add double add: new=%px, prev=%px, next=%px.\n",
                        new, prev, next))
                return false;
@@ -49,20 +49,20 @@ bool __list_del_entry_valid_or_report(struct list_head *entry)
        prev = entry->prev;
        next = entry->next;
 
-       if (CHECK_DATA_CORRUPTION(next == NULL,
+       if (CHECK_DATA_CORRUPTION(next == NULL, NULL,
                        "list_del corruption, %px->next is NULL\n", entry) ||
-           CHECK_DATA_CORRUPTION(prev == NULL,
+           CHECK_DATA_CORRUPTION(prev == NULL, NULL,
                        "list_del corruption, %px->prev is NULL\n", entry) ||
-           CHECK_DATA_CORRUPTION(next == LIST_POISON1,
+           CHECK_DATA_CORRUPTION(next == LIST_POISON1, next,
                        "list_del corruption, %px->next is LIST_POISON1 (%px)\n",
                        entry, LIST_POISON1) ||
-           CHECK_DATA_CORRUPTION(prev == LIST_POISON2,
+           CHECK_DATA_CORRUPTION(prev == LIST_POISON2, prev,
                        "list_del corruption, %px->prev is LIST_POISON2 (%px)\n",
                        entry, LIST_POISON2) ||
-           CHECK_DATA_CORRUPTION(prev->next != entry,
+           CHECK_DATA_CORRUPTION(prev->next != entry, prev,
                        "list_del corruption. prev->next should be %px, but was %px. (prev=%px)\n",
                        entry, prev->next, prev) ||
-           CHECK_DATA_CORRUPTION(next->prev != entry,
+           CHECK_DATA_CORRUPTION(next->prev != entry, next,
                        "list_del corruption. next->prev should be %px, but was %px. (next=%px)\n",
                        entry, next->prev, next))
                return false;