net/mlx5: Pack mlx5_rl_entry structure
authorParav Pandit <parav@nvidia.com>
Fri, 19 Feb 2021 06:18:12 +0000 (08:18 +0200)
committerSaeed Mahameed <saeedm@nvidia.com>
Fri, 2 Apr 2021 23:13:05 +0000 (16:13 -0700)
mlx5_rl_entry structure is not properly packed as shown below. Due to this
an array of size 9144 bytes allocated which is aligned to 16Kbytes.
Hence, pack the structure and avoid the wastage.

This offers 8Kbytes of saving per mlx5_core_dev struct.

pahole -C mlx5_rl_entry  drivers/net/ethernet/mellanox/mlx5/core/en_main.o

Existing layout:

struct mlx5_rl_entry {
        u8                         rl_raw[48];           /*     0    48 */
        u16                        index;                /*    48     2 */

        /* XXX 6 bytes hole, try to pack */

        u64                        refcount;             /*    56     8 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        u16                        uid;                  /*    64     2 */
        u8                         dedicated:1;          /*    66: 0  1 */

        /* size: 72, cachelines: 2, members: 5 */
        /* sum members: 60, holes: 1, sum holes: 6 */
        /* sum bitfield members: 1 bits (0 bytes) */
        /* padding: 5 */
        /* bit_padding: 7 bits */
        /* last cacheline: 8 bytes */
};

After alignment:

struct mlx5_rl_entry {
        u8                         rl_raw[48];           /*     0    48 */
        u64                        refcount;             /*    48     8 */
        u16                        index;                /*    56     2 */
        u16                        uid;                  /*    58     2 */
        u8                         dedicated:1;          /*    60: 0  1 */

        /* size: 64, cachelines: 1, members: 5 */
        /* padding: 3 */
        /* bit_padding: 7 bits */
};

Signed-off-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
include/linux/mlx5/driver.h

index 23bb01d..a9bd7e3 100644 (file)
@@ -517,8 +517,8 @@ struct mlx5_rate_limit {
 
 struct mlx5_rl_entry {
        u8 rl_raw[MLX5_ST_SZ_BYTES(set_pp_rate_limit_context)];
-       u16 index;
        u64 refcount;
+       u16 index;
        u16 uid;
        u8 dedicated : 1;
 };