selftests/bpf: De-flake test_tcpbpf
authorStanislav Fomichev <sdf@google.com>
Wed, 4 Dec 2019 19:09:55 +0000 (11:09 -0800)
committerAlexei Starovoitov <ast@kernel.org>
Thu, 5 Dec 2019 02:01:05 +0000 (18:01 -0800)
It looks like BPF program that handles BPF_SOCK_OPS_STATE_CB state
can race with the bpf_map_lookup_elem("global_map"); I sometimes
see the failures in this test and re-running helps.

Since we know that we expect the callback to be called 3 times (one
time for listener socket, two times for both ends of the connection),
let's export this number and add simple retry logic around that.

Also, let's make EXPECT_EQ() not return on failure, but continue
evaluating all conditions; that should make potential debugging
easier.

With this fix in place I don't observe the flakiness anymore.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: Lawrence Brakmo <brakmo@fb.com>
Link: https://lore.kernel.org/bpf/20191204190955.170934-1-sdf@google.com
tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
tools/testing/selftests/bpf/test_tcpbpf.h
tools/testing/selftests/bpf/test_tcpbpf_user.c

index 2e23361..7fa4595 100644 (file)
@@ -131,6 +131,7 @@ int bpf_testcb(struct bpf_sock_ops *skops)
                                g.bytes_received = skops->bytes_received;
                                g.bytes_acked = skops->bytes_acked;
                        }
+                       g.num_close_events++;
                        bpf_map_update_elem(&global_map, &key, &g,
                                            BPF_ANY);
                }
index 7bcfa62..6220b95 100644 (file)
@@ -13,5 +13,6 @@ struct tcpbpf_globals {
        __u64 bytes_received;
        __u64 bytes_acked;
        __u32 num_listen;
+       __u32 num_close_events;
 };
 #endif
index 716b4e3..3ae1276 100644 (file)
@@ -16,6 +16,9 @@
 
 #include "test_tcpbpf.h"
 
+/* 3 comes from one listening socket + both ends of the connection */
+#define EXPECTED_CLOSE_EVENTS          3
+
 #define EXPECT_EQ(expected, actual, fmt)                       \
        do {                                                    \
                if ((expected) != (actual)) {                   \
                               "    Actual: %" fmt "\n"         \
                               "  Expected: %" fmt "\n",        \
                               (actual), (expected));           \
-                       goto err;                               \
+                       ret--;                                  \
                }                                               \
        } while (0)
 
 int verify_result(const struct tcpbpf_globals *result)
 {
        __u32 expected_events;
+       int ret = 0;
 
        expected_events = ((1 << BPF_SOCK_OPS_TIMEOUT_INIT) |
                           (1 << BPF_SOCK_OPS_RWND_INIT) |
@@ -48,15 +52,15 @@ int verify_result(const struct tcpbpf_globals *result)
        EXPECT_EQ(0x80, result->bad_cb_test_rv, PRIu32);
        EXPECT_EQ(0, result->good_cb_test_rv, PRIu32);
        EXPECT_EQ(1, result->num_listen, PRIu32);
+       EXPECT_EQ(EXPECTED_CLOSE_EVENTS, result->num_close_events, PRIu32);
 
-       return 0;
-err:
-       return -1;
+       return ret;
 }
 
 int verify_sockopt_result(int sock_map_fd)
 {
        __u32 key = 0;
+       int ret = 0;
        int res;
        int rv;
 
@@ -69,9 +73,7 @@ int verify_sockopt_result(int sock_map_fd)
        rv = bpf_map_lookup_elem(sock_map_fd, &key, &res);
        EXPECT_EQ(0, rv, "d");
        EXPECT_EQ(1, res, "d");
-       return 0;
-err:
-       return -1;
+       return ret;
 }
 
 static int bpf_find_map(const char *test, struct bpf_object *obj,
@@ -96,6 +98,7 @@ int main(int argc, char **argv)
        int error = EXIT_FAILURE;
        struct bpf_object *obj;
        int cg_fd = -1;
+       int retry = 10;
        __u32 key = 0;
        int rv;
 
@@ -134,12 +137,20 @@ int main(int argc, char **argv)
        if (sock_map_fd < 0)
                goto err;
 
+retry_lookup:
        rv = bpf_map_lookup_elem(map_fd, &key, &g);
        if (rv != 0) {
                printf("FAILED: bpf_map_lookup_elem returns %d\n", rv);
                goto err;
        }
 
+       if (g.num_close_events != EXPECTED_CLOSE_EVENTS && retry--) {
+               printf("Unexpected number of close events (%d), retrying!\n",
+                      g.num_close_events);
+               usleep(100);
+               goto retry_lookup;
+       }
+
        if (verify_result(&g)) {
                printf("FAILED: Wrong stats\n");
                goto err;