selftests: mlxsw: Add a RED selftest
authorPetr Machata <petrm@mellanox.com>
Thu, 27 Feb 2020 07:50:07 +0000 (08:50 +0100)
committerDavid S. Miller <davem@davemloft.net>
Thu, 27 Feb 2020 19:10:14 +0000 (11:10 -0800)
This tests that below the queue minimum length, there is no dropping /
marking, and above max, everything is dropped / marked.

The test is structured as a core file with topology and test code, and
three wrappers: one for RED used as a root Qdisc, and two for
testing (W)RED under PRIO and ETS.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh [new file with mode: 0644]
tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh [new file with mode: 0755]
tools/testing/selftests/drivers/net/mlxsw/sch_red_prio.sh [new file with mode: 0755]
tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/lib.sh

diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
new file mode 100644 (file)
index 0000000..ebf7752
--- /dev/null
@@ -0,0 +1,499 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# This test sends a >1Gbps stream of traffic from H1, to the switch, which
+# forwards it to a 1Gbps port. This 1Gbps stream is then looped back to the
+# switch and forwarded to the port under test $swp3, which is also 1Gbps.
+#
+# This way, $swp3 should be 100% filled with traffic without any of it spilling
+# to the backlog. Any extra packets sent should almost 1:1 go to backlog. That
+# is what H2 is used for--it sends the extra traffic to create backlog.
+#
+# A RED Qdisc is installed on $swp3. The configuration is such that the minimum
+# and maximum size are 1 byte apart, so there is a very clear border under which
+# no marking or dropping takes place, and above which everything is marked or
+# dropped.
+#
+# The test uses the buffer build-up behavior to test the installed RED.
+#
+# In order to test WRED, $swp3 actually contains RED under PRIO, with two
+# different configurations. Traffic is prioritized using 802.1p and relies on
+# the implicit mlxsw configuration, where packet priority is taken 1:1 from the
+# 802.1p marking.
+#
+# +--------------------------+                     +--------------------------+
+# | H1                       |                     | H2                       |
+# |     + $h1.10             |                     |     + $h2.10             |
+# |     | 192.0.2.1/28       |                     |     | 192.0.2.2/28       |
+# |     |                    |                     |     |                    |
+# |     |         $h1.11 +   |                     |     |         $h2.11 +   |
+# |     |  192.0.2.17/28 |   |                     |     |  192.0.2.18/28 |   |
+# |     |                |   |                     |     |                |   |
+# |     \______    ______/   |                     |     \______    ______/   |
+# |            \ /           |                     |            \ /           |
+# |             + $h1        |                     |             + $h2        |
+# +-------------|------------+                     +-------------|------------+
+#               | >1Gbps                                         |
+# +-------------|------------------------------------------------|------------+
+# | SW          + $swp1                                          + $swp2      |
+# |     _______/ \___________                        ___________/ \_______    |
+# |    /                     \                      /                     \   |
+# |  +-|-----------------+   |                    +-|-----------------+   |   |
+# |  | + $swp1.10        |   |                    | + $swp2.10        |   |   |
+# |  |                   |   |        .-------------+ $swp5.10        |   |   |
+# |  |     BR1_10        |   |        |           |                   |   |   |
+# |  |                   |   |        |           |     BR2_10        |   |   |
+# |  | + $swp2.10        |   |        |           |                   |   |   |
+# |  +-|-----------------+   |        |           | + $swp3.10        |   |   |
+# |    |                     |        |           +-|-----------------+   |   |
+# |    |   +-----------------|-+      |             |   +-----------------|-+ |
+# |    |   |        $swp1.11 + |      |             |   |        $swp2.11 + | |
+# |    |   |                   |      | .-----------------+ $swp5.11        | |
+# |    |   |      BR1_11       |      | |           |   |                   | |
+# |    |   |                   |      | |           |   |      BR2_11       | |
+# |    |   |        $swp2.11 + |      | |           |   |                   | |
+# |    |   +-----------------|-+      | |           |   |        $swp3.11 + | |
+# |    |                     |        | |           |   +-----------------|-+ |
+# |    \_______   ___________/        | |           \___________   _______/   |
+# |            \ /                    \ /                       \ /           |
+# |             + $swp4                + $swp5                   + $swp3      |
+# +-------------|----------------------|-------------------------|------------+
+#               |                      |                         | 1Gbps
+#               \________1Gbps_________/                         |
+#                                   +----------------------------|------------+
+#                                   | H3                         + $h3        |
+#                                   |      _____________________/ \_______    |
+#                                   |     /                               \   |
+#                                   |     |                               |   |
+#                                   |     + $h3.10                 $h3.11 +   |
+#                                   |       192.0.2.3/28    192.0.2.19/28     |
+#                                   +-----------------------------------------+
+
+NUM_NETIFS=8
+CHECK_TC="yes"
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source qos_lib.sh
+
+ipaddr()
+{
+       local host=$1; shift
+       local vlan=$1; shift
+
+       echo 192.0.2.$((16 * (vlan - 10) + host))
+}
+
+host_create()
+{
+       local dev=$1; shift
+       local host=$1; shift
+
+       simple_if_init $dev
+       mtu_set $dev 10000
+
+       vlan_create $dev 10 v$dev $(ipaddr $host 10)/28
+       ip link set dev $dev.10 type vlan egress 0:0
+
+       vlan_create $dev 11 v$dev $(ipaddr $host 11)/28
+       ip link set dev $dev.11 type vlan egress 0:1
+}
+
+host_destroy()
+{
+       local dev=$1; shift
+
+       vlan_destroy $dev 11
+       vlan_destroy $dev 10
+       mtu_restore $dev
+       simple_if_fini $dev
+}
+
+h1_create()
+{
+       host_create $h1 1
+}
+
+h1_destroy()
+{
+       host_destroy $h1
+}
+
+h2_create()
+{
+       host_create $h2 2
+
+       # Some of the tests in this suite use multicast traffic. As this traffic
+       # enters BR2_10 resp. BR2_11, it is flooded to all other ports. Thus
+       # e.g. traffic ingressing through $swp2 is flooded to $swp3 (the
+       # intended destination) and $swp5 (which is intended as ingress for
+       # another stream of traffic).
+       #
+       # This is generally not a problem, but if the $swp5 throughput is lower
+       # than $swp2 throughput, there will be a build-up at $swp5. That may
+       # cause packets to fail to queue up at $swp3 due to shared buffer
+       # quotas, and the test to spuriously fail.
+       #
+       # Prevent this by setting the speed of $h2 to 1Gbps.
+
+       ethtool -s $h2 speed 1000 autoneg off
+}
+
+h2_destroy()
+{
+       ethtool -s $h2 autoneg on
+       host_destroy $h2
+}
+
+h3_create()
+{
+       host_create $h3 3
+       ethtool -s $h3 speed 1000 autoneg off
+}
+
+h3_destroy()
+{
+       ethtool -s $h3 autoneg on
+       host_destroy $h3
+}
+
+switch_create()
+{
+       local intf
+       local vlan
+
+       ip link add dev br1_10 type bridge
+       ip link add dev br1_11 type bridge
+
+       ip link add dev br2_10 type bridge
+       ip link add dev br2_11 type bridge
+
+       for intf in $swp1 $swp2 $swp3 $swp4 $swp5; do
+               ip link set dev $intf up
+               mtu_set $intf 10000
+       done
+
+       for intf in $swp1 $swp4; do
+               for vlan in 10 11; do
+                       vlan_create $intf $vlan
+                       ip link set dev $intf.$vlan master br1_$vlan
+                       ip link set dev $intf.$vlan up
+               done
+       done
+
+       for intf in $swp2 $swp3 $swp5; do
+               for vlan in 10 11; do
+                       vlan_create $intf $vlan
+                       ip link set dev $intf.$vlan master br2_$vlan
+                       ip link set dev $intf.$vlan up
+               done
+       done
+
+       ip link set dev $swp4.10 type vlan egress 0:0
+       ip link set dev $swp4.11 type vlan egress 0:1
+       for intf in $swp1 $swp2 $swp5; do
+               for vlan in 10 11; do
+                       ip link set dev $intf.$vlan type vlan ingress 0:0 1:1
+               done
+       done
+
+       for intf in $swp2 $swp3 $swp4 $swp5; do
+               ethtool -s $intf speed 1000 autoneg off
+       done
+
+       ip link set dev br1_10 up
+       ip link set dev br1_11 up
+       ip link set dev br2_10 up
+       ip link set dev br2_11 up
+
+       local size=$(devlink_pool_size_thtype 0 | cut -d' ' -f 1)
+       devlink_port_pool_th_set $swp3 8 $size
+}
+
+switch_destroy()
+{
+       local intf
+       local vlan
+
+       devlink_port_pool_th_restore $swp3 8
+
+       tc qdisc del dev $swp3 root 2>/dev/null
+
+       ip link set dev br2_11 down
+       ip link set dev br2_10 down
+       ip link set dev br1_11 down
+       ip link set dev br1_10 down
+
+       for intf in $swp5 $swp4 $swp3 $swp2; do
+               ethtool -s $intf autoneg on
+       done
+
+       for intf in $swp5 $swp3 $swp2 $swp4 $swp1; do
+               for vlan in 11 10; do
+                       ip link set dev $intf.$vlan down
+                       ip link set dev $intf.$vlan nomaster
+                       vlan_destroy $intf $vlan
+               done
+
+               mtu_restore $intf
+               ip link set dev $intf down
+       done
+
+       ip link del dev br2_11
+       ip link del dev br2_10
+       ip link del dev br1_11
+       ip link del dev br1_10
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       swp1=${NETIFS[p2]}
+
+       swp2=${NETIFS[p3]}
+       h2=${NETIFS[p4]}
+
+       swp3=${NETIFS[p5]}
+       h3=${NETIFS[p6]}
+
+       swp4=${NETIFS[p7]}
+       swp5=${NETIFS[p8]}
+
+       h3_mac=$(mac_get $h3)
+
+       vrf_prepare
+
+       h1_create
+       h2_create
+       h3_create
+       switch_create
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       switch_destroy
+       h3_destroy
+       h2_destroy
+       h1_destroy
+
+       vrf_cleanup
+}
+
+ping_ipv4()
+{
+       ping_test $h1.10 $(ipaddr 3 10) " from host 1, vlan 10"
+       ping_test $h1.11 $(ipaddr 3 11) " from host 1, vlan 11"
+       ping_test $h2.10 $(ipaddr 3 10) " from host 2, vlan 10"
+       ping_test $h2.11 $(ipaddr 3 11) " from host 2, vlan 11"
+}
+
+get_tc()
+{
+       local vlan=$1; shift
+
+       echo $((vlan - 10))
+}
+
+get_qdisc_handle()
+{
+       local vlan=$1; shift
+
+       local tc=$(get_tc $vlan)
+       local band=$((8 - tc))
+
+       # Handle is 107: for TC1, 108: for TC0.
+       echo "10$band:"
+}
+
+get_qdisc_backlog()
+{
+       local vlan=$1; shift
+
+       qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .backlog
+}
+
+get_mc_transmit_queue()
+{
+       local vlan=$1; shift
+
+       local tc=$(($(get_tc $vlan) + 8))
+       ethtool_stats_get $swp3 tc_transmit_queue_tc_$tc
+}
+
+get_nmarked()
+{
+       local vlan=$1; shift
+
+       ethtool_stats_get $swp3 ecn_marked
+}
+
+get_qdisc_npackets()
+{
+       local vlan=$1; shift
+
+       busywait_for_counter 1100 +1 \
+               qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .packets
+}
+
+# This sends traffic in an attempt to build a backlog of $size. Returns 0 on
+# success. After 10 failed attempts it bails out and returns 1. It dumps the
+# backlog size to stdout.
+build_backlog()
+{
+       local vlan=$1; shift
+       local size=$1; shift
+       local proto=$1; shift
+
+       local tc=$((vlan - 10))
+       local band=$((8 - tc))
+       local cur=-1
+       local i=0
+
+       while :; do
+               local cur=$(busywait 1100 until_counter_is $((cur + 1)) \
+                                           get_qdisc_backlog $vlan)
+               local diff=$((size - cur))
+               local pkts=$(((diff + 7999) / 8000))
+
+               if ((cur >= size)); then
+                       echo $cur
+                       return 0
+               elif ((i++ > 10)); then
+                       echo $cur
+                       return 1
+               fi
+
+               $MZ $h2.$vlan -p 8000 -a own -b $h3_mac \
+                   -A $(ipaddr 2 $vlan) -B $(ipaddr 3 $vlan) \
+                   -t $proto -q -c $pkts "$@"
+       done
+}
+
+check_marking()
+{
+       local vlan=$1; shift
+       local cond=$1; shift
+
+       local npackets_0=$(get_qdisc_npackets $vlan)
+       local nmarked_0=$(get_nmarked $vlan)
+       sleep 5
+       local npackets_1=$(get_qdisc_npackets $vlan)
+       local nmarked_1=$(get_nmarked $vlan)
+
+       local nmarked_d=$((nmarked_1 - nmarked_0))
+       local npackets_d=$((npackets_1 - npackets_0))
+       local pct=$((100 * nmarked_d / npackets_d))
+
+       echo $pct
+       ((pct $cond))
+}
+
+do_ecn_test()
+{
+       local vlan=$1; shift
+       local limit=$1; shift
+       local backlog
+       local pct
+
+       # Main stream.
+       start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
+                         $h3_mac tos=0x01
+
+       # Build the below-the-limit backlog using UDP. We could use TCP just
+       # fine, but this way we get a proof that UDP is accepted when queue
+       # length is below the limit. The main stream is using TCP, and if the
+       # limit is misconfigured, we would see this traffic being ECN marked.
+       RET=0
+       backlog=$(build_backlog $vlan $((2 * limit / 3)) udp)
+       check_err $? "Could not build the requested backlog"
+       pct=$(check_marking $vlan "== 0")
+       check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
+       log_test "TC $((vlan - 10)): ECN backlog < limit"
+
+       # Now push TCP, because non-TCP traffic would be early-dropped after the
+       # backlog crosses the limit, and we want to make sure that the backlog
+       # is above the limit.
+       RET=0
+       backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01)
+       check_err $? "Could not build the requested backlog"
+       pct=$(check_marking $vlan ">= 95")
+       check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected >= 95."
+       log_test "TC $((vlan - 10)): ECN backlog > limit"
+
+       # Up there we saw that UDP gets accepted when backlog is below the
+       # limit. Now that it is above, it should all get dropped, and backlog
+       # building should fail.
+       RET=0
+       build_backlog $vlan $((2 * limit)) udp >/dev/null
+       check_fail $? "UDP traffic went into backlog instead of being early-dropped"
+       log_test "TC $((vlan - 10)): ECN backlog > limit: UDP early-dropped"
+
+       stop_traffic
+       sleep 1
+}
+
+do_red_test()
+{
+       local vlan=$1; shift
+       local limit=$1; shift
+       local backlog
+       local pct
+
+       # Use ECN-capable TCP to verify there's no marking even though the queue
+       # is above limit.
+       start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
+                         $h3_mac tos=0x01
+
+       # Pushing below the queue limit should work.
+       RET=0
+       backlog=$(build_backlog $vlan $((2 * limit / 3)) tcp tos=0x01)
+       check_err $? "Could not build the requested backlog"
+       pct=$(check_marking $vlan "== 0")
+       check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
+       log_test "TC $((vlan - 10)): RED backlog < limit"
+
+       # Pushing above should not.
+       RET=0
+       backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01)
+       check_fail $? "Traffic went into backlog instead of being early-dropped"
+       pct=$(check_marking $vlan "== 0")
+       check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
+       local diff=$((limit - backlog))
+       pct=$((100 * diff / limit))
+       ((0 <= pct && pct <= 5))
+       check_err $? "backlog $backlog / $limit expected <= 5% distance"
+       log_test "TC $((vlan - 10)): RED backlog > limit"
+
+       stop_traffic
+       sleep 1
+}
+
+do_mc_backlog_test()
+{
+       local vlan=$1; shift
+       local limit=$1; shift
+       local backlog
+       local pct
+
+       RET=0
+
+       start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) bc
+       start_tcp_traffic $h2.$vlan $(ipaddr 2 $vlan) $(ipaddr 3 $vlan) bc
+
+       qbl=$(busywait 5000 until_counter_is 500000 \
+                      get_qdisc_backlog $vlan)
+       check_err $? "Could not build MC backlog"
+
+       # Verify that we actually see the backlog on BUM TC. Do a busywait as
+       # well, performance blips might cause false fail.
+       local ebl
+       ebl=$(busywait 5000 until_counter_is 500000 \
+                      get_mc_transmit_queue $vlan)
+       check_err $? "MC backlog reported by qdisc not visible in ethtool"
+
+       stop_traffic
+       stop_traffic
+
+       log_test "TC $((vlan - 10)): Qdisc reports MC backlog"
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
new file mode 100755 (executable)
index 0000000..af83efe
--- /dev/null
@@ -0,0 +1,83 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+       ping_ipv4
+       ecn_test
+       red_test
+       mc_backlog_test
+"
+: ${QDISC:=ets}
+source sch_red_core.sh
+
+# do_ecn_test first build 2/3 of the requested backlog and expects no marking,
+# and then builds 3/2 of it and does expect marking. The values of $BACKLOG1 and
+# $BACKLOG2 are far enough not to overlap, so that we can assume that if we do
+# see (do not see) marking, it is actually due to the configuration of that one
+# TC, and not due to configuration of the other TC leaking over.
+BACKLOG1=200000
+BACKLOG2=500000
+
+install_qdisc()
+{
+       local -a args=("$@")
+
+       tc qdisc add dev $swp3 root handle 10: $QDISC \
+          bands 8 priomap 7 6 5 4 3 2 1 0
+       tc qdisc add dev $swp3 parent 10:8 handle 108: red \
+          limit 1000000 min $BACKLOG1 max $((BACKLOG1 + 1)) \
+          probability 1.0 avpkt 8000 burst 38 "${args[@]}"
+       tc qdisc add dev $swp3 parent 10:7 handle 107: red \
+          limit 1000000 min $BACKLOG2 max $((BACKLOG2 + 1)) \
+          probability 1.0 avpkt 8000 burst 63 "${args[@]}"
+       sleep 1
+}
+
+uninstall_qdisc()
+{
+       tc qdisc del dev $swp3 parent 10:7
+       tc qdisc del dev $swp3 parent 10:8
+       tc qdisc del dev $swp3 root
+}
+
+ecn_test()
+{
+       install_qdisc ecn
+
+       do_ecn_test 10 $BACKLOG1
+       do_ecn_test 11 $BACKLOG2
+
+       uninstall_qdisc
+}
+
+red_test()
+{
+       install_qdisc
+
+       do_red_test 10 $BACKLOG1
+       do_red_test 11 $BACKLOG2
+
+       uninstall_qdisc
+}
+
+mc_backlog_test()
+{
+       install_qdisc
+
+       # Note that the backlog numbers here do not correspond to RED
+       # configuration, but are arbitrary.
+       do_mc_backlog_test 10 $BACKLOG1
+       do_mc_backlog_test 11 $BACKLOG2
+
+       uninstall_qdisc
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+bail_on_lldpad
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_prio.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_prio.sh
new file mode 100755 (executable)
index 0000000..76820a0
--- /dev/null
@@ -0,0 +1,5 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+QDISC=prio
+source sch_red_ets.sh
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
new file mode 100755 (executable)
index 0000000..b221749
--- /dev/null
@@ -0,0 +1,60 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+       ping_ipv4
+       ecn_test
+       red_test
+       mc_backlog_test
+"
+source sch_red_core.sh
+
+BACKLOG=300000
+
+install_qdisc()
+{
+       local -a args=("$@")
+
+       tc qdisc add dev $swp3 root handle 108: red \
+          limit 1000000 min $BACKLOG max $((BACKLOG + 1)) \
+          probability 1.0 avpkt 8000 burst 38 "${args[@]}"
+       sleep 1
+}
+
+uninstall_qdisc()
+{
+       tc qdisc del dev $swp3 root
+}
+
+ecn_test()
+{
+       install_qdisc ecn
+       do_ecn_test 10 $BACKLOG
+       uninstall_qdisc
+}
+
+red_test()
+{
+       install_qdisc
+       do_red_test 10 $BACKLOG
+       uninstall_qdisc
+}
+
+mc_backlog_test()
+{
+       install_qdisc
+       # Note that the backlog value here does not correspond to RED
+       # configuration, but is arbitrary.
+       do_mc_backlog_test 10 $BACKLOG
+       uninstall_qdisc
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+bail_on_lldpad
+tests_run
+
+exit $EXIT_STATUS
index f80f384..aff3178 100644 (file)
@@ -607,6 +607,16 @@ ethtool_stats_get()
        ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2
 }
 
+qdisc_stats_get()
+{
+       local dev=$1; shift
+       local handle=$1; shift
+       local selector=$1; shift
+
+       tc -j -s qdisc show dev "$dev" \
+           | jq '.[] | select(.handle == "'"$handle"'") | '"$selector"
+}
+
 humanize()
 {
        local speed=$1; shift