2 # SPDX-License-Identifier: GPL-2.0
4 # This tests basic flowtable functionality.
5 # Creates following default topology:
7 # Originator (MTU 9000) <-Router1-> MTU 1500 <-Router2-> Responder (MTU 2000)
8 # Router1 is the one doing flow offloading, Router2 has no special
9 # purpose other than having a link that is smaller than either Originator
10 # and responder, i.e. TCPMSS announced values are too large and will still
11 # result in fragmentation and/or PMTU discovery.
13 # You can check with different Orgininator/Link/Responder MTU eg:
14 # nft_flowtable.sh -o8000 -l1500 -r2000
18 # Kselftest framework requirement - SKIP code is 4.
27 log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
30 if ! $1 > /dev/null 2>&1; then
31 echo "SKIP: Could not $2"
36 checktool "nft --version" "run test without nft tool"
37 checktool "ip -Version" "run test without ip tool"
38 checktool "which nc" "run test without nc (netcat)"
39 checktool "ip netns add nsr1" "create net namespace"
52 rm -f "$ns1in" "$ns1out"
53 rm -f "$ns2in" "$ns2out"
55 [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns
60 sysctl -q net.netfilter.nf_log_all_netns=1
62 ip link add veth0 netns nsr1 type veth peer name eth0 netns ns1
63 ip link add veth1 netns nsr1 type veth peer name veth0 netns nsr2
65 ip link add veth1 netns nsr2 type veth peer name eth0 netns ns2
67 for dev in lo veth0 veth1; do
69 ip -net nsr$i link set $dev up
73 ip -net nsr1 addr add 10.0.1.1/24 dev veth0
74 ip -net nsr1 addr add dead:1::1/64 dev veth0
76 ip -net nsr2 addr add 10.0.2.1/24 dev veth1
77 ip -net nsr2 addr add dead:2::1/64 dev veth1
79 # set different MTUs so we need to push packets coming from ns1 (large MTU)
80 # to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1),
81 # or to do PTMU discovery (send ICMP error back to originator).
82 # ns2 is going via nsr2 with a smaller mtu, so that TCPMSS announced by both peers
83 # is NOT the lowest link mtu.
90 echo "nft_flowtable.sh [OPTIONS]"
99 while getopts "o:l:r:" o
109 if ! ip -net nsr1 link set veth0 mtu $omtu; then
113 ip -net ns1 link set eth0 mtu $omtu
115 if ! ip -net nsr2 link set veth1 mtu $rmtu; then
119 ip -net ns2 link set eth0 mtu $rmtu
121 # transfer-net between nsr1 and nsr2.
122 # these addresses are not used for connections.
123 ip -net nsr1 addr add 192.168.10.1/24 dev veth1
124 ip -net nsr1 addr add fee1:2::1/64 dev veth1
126 ip -net nsr2 addr add 192.168.10.2/24 dev veth0
127 ip -net nsr2 addr add fee1:2::2/64 dev veth0
130 ip netns exec nsr$i sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
131 ip netns exec nsr$i sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
133 ip -net ns$i link set lo up
134 ip -net ns$i link set eth0 up
135 ip -net ns$i addr add 10.0.$i.99/24 dev eth0
136 ip -net ns$i route add default via 10.0.$i.1
137 ip -net ns$i addr add dead:$i::99/64 dev eth0
138 ip -net ns$i route add default via dead:$i::1
139 if ! ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then
140 echo "ERROR: Check Originator/Responder values (problem during address addition)"
144 # don't set ip DF bit for first two tests
145 ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
148 ip -net nsr1 route add default via 192.168.10.2
149 ip -net nsr2 route add default via 192.168.10.1
151 ip netns exec nsr1 nft -f - <<EOF
154 hook ingress priority 0
155 devices = { veth0, veth1 }
159 type filter hook forward priority 0; policy drop;
161 # flow offloaded? Tag ct with mark 1, so we can detect when it fails.
162 meta oif "veth1" tcp dport 12345 flow offload @f1 counter
164 # use packet size to trigger 'should be offloaded by now'.
165 # otherwise, if 'flow offload' expression never offloads, the
167 tcp dport 12345 meta length gt 200 ct mark set 1 counter
169 # this turns off flow offloading internally, so expect packets again
170 tcp flags fin,rst ct mark set 0 accept
172 # this allows large packets from responder, we need this as long
174 # This rule is deleted for the last test, when we expect PMTUd
175 # to kick in and ensure all packets meet mtu requirements.
176 meta length gt $lmtu accept comment something-to-grep-for
178 # next line blocks connection w.o. working offload.
179 # we only do this for reverse dir, because we expect packets to
180 # enter slow path due to MTU mismatch of veth0 and veth1.
181 tcp sport 12345 ct mark 1 counter log prefix "mark failure " drop
183 ct state established,related accept
185 # for packets that we can't offload yet, i.e. SYN (any ct that is not confirmed)
186 meta length lt 200 oif "veth1" tcp dport 12345 counter accept
188 meta nfproto ipv4 meta l4proto icmp accept
189 meta nfproto ipv6 meta l4proto icmpv6 accept
194 if [ $? -ne 0 ]; then
195 echo "SKIP: Could not load nft ruleset"
199 # test basic connectivity
200 if ! ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then
201 echo "ERROR: ns1 cannot reach ns2" 1>&2
205 if ! ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then
206 echo "ERROR: ns2 cannot reach ns1" 1>&2
210 if [ $ret -eq 0 ];then
211 echo "PASS: netns routing/connectivity: ns1 can reach ns2"
223 SIZE=$((RANDOM % (1024 * 8)))
224 TSIZE=$((SIZE * 1024))
226 dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null
228 SIZE=$((RANDOM % 1024))
230 TSIZE=$((TSIZE + SIZE))
231 dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null
240 if ! cmp "$in" "$out" > /dev/null 2>&1; then
241 echo "FAIL: file mismatch for $what" 1>&2
250 test_tcp_forwarding_ip()
258 ip netns exec $nsb nc -w 5 -l -p 12345 < "$ns2in" > "$ns2out" &
262 ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$ns1in" > "$ns1out" &
267 if ps -p $lpid > /dev/null;then
271 if ps -p $cpid > /dev/null;then
277 if ! check_transfer "$ns1in" "$ns2out" "ns1 -> ns2"; then
281 if ! check_transfer "$ns2in" "$ns1out" "ns1 <- ns2"; then
288 test_tcp_forwarding()
290 test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
295 test_tcp_forwarding_nat()
299 test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
302 if [ $lret -eq 0 ] ; then
303 test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666
314 # No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed.
315 if test_tcp_forwarding ns1 ns2; then
316 echo "PASS: flow offloaded for ns1/ns2"
318 echo "FAIL: flow offload for ns1/ns2:" 1>&2
319 ip netns exec nsr1 nft list ruleset
323 # delete default route, i.e. ns2 won't be able to reach ns1 and
324 # will depend on ns1 being masqueraded in nsr1.
325 # expect ns1 has nsr1 address.
326 ip -net ns2 route del default via 10.0.2.1
327 ip -net ns2 route del default via dead:2::1
328 ip -net ns2 route add 192.168.10.1 via 10.0.2.1
331 # Same, but with NAT enabled.
332 ip netns exec nsr1 nft -f - <<EOF
335 type nat hook prerouting priority 0; policy accept;
336 meta iif "veth0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345
340 type nat hook postrouting priority 0; policy accept;
341 meta oifname "veth1" counter masquerade
346 if test_tcp_forwarding_nat ns1 ns2; then
347 echo "PASS: flow offloaded for ns1/ns2 with NAT"
349 echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2
350 ip netns exec nsr1 nft list ruleset
355 # Same as second test, but with PMTU discovery enabled.
356 handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2)
358 if ! ip netns exec nsr1 nft delete rule inet filter forward $handle; then
359 echo "FAIL: Could not delete large-packet accept rule"
363 ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
364 ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
366 if test_tcp_forwarding_nat ns1 ns2; then
367 echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery"
369 echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
370 ip netns exec nsr1 nft list ruleset
374 # Add bridge interface br0 to Router1, with NAT enabled.
375 ip -net nsr1 link add name br0 type bridge
376 ip -net nsr1 addr flush dev veth0
377 ip -net nsr1 link set up dev veth0
378 ip -net nsr1 link set veth0 master br0
379 ip -net nsr1 addr add 10.0.1.1/24 dev br0
380 ip -net nsr1 addr add dead:1::1/64 dev br0
381 ip -net nsr1 link set up dev br0
383 ip netns exec nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null
385 # br0 with NAT enabled.
386 ip netns exec nsr1 nft -f - <<EOF
390 type nat hook prerouting priority 0; policy accept;
391 meta iif "br0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345
395 type nat hook postrouting priority 0; policy accept;
396 meta oifname "veth1" counter masquerade
401 if test_tcp_forwarding_nat ns1 ns2; then
402 echo "PASS: flow offloaded for ns1/ns2 with bridge NAT"
404 echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2
405 ip netns exec nsr1 nft list ruleset
410 # Add bridge interface br0 to Router1, with NAT and VLAN.
411 ip -net nsr1 link set veth0 nomaster
412 ip -net nsr1 link set down dev veth0
413 ip -net nsr1 link add link veth0 name veth0.10 type vlan id 10
414 ip -net nsr1 link set up dev veth0
415 ip -net nsr1 link set up dev veth0.10
416 ip -net nsr1 link set veth0.10 master br0
418 ip -net ns1 addr flush dev eth0
419 ip -net ns1 link add link eth0 name eth0.10 type vlan id 10
420 ip -net ns1 link set eth0 up
421 ip -net ns1 link set eth0.10 up
422 ip -net ns1 addr add 10.0.1.99/24 dev eth0.10
423 ip -net ns1 route add default via 10.0.1.1
424 ip -net ns1 addr add dead:1::99/64 dev eth0.10
426 if test_tcp_forwarding_nat ns1 ns2; then
427 echo "PASS: flow offloaded for ns1/ns2 with bridge NAT and VLAN"
429 echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2
430 ip netns exec nsr1 nft list ruleset
434 # restore test topology (remove bridge and VLAN)
435 ip -net nsr1 link set veth0 nomaster
436 ip -net nsr1 link set veth0 down
437 ip -net nsr1 link set veth0.10 down
438 ip -net nsr1 link delete veth0.10 type vlan
439 ip -net nsr1 link delete br0 type bridge
440 ip -net ns1 addr flush dev eth0.10
441 ip -net ns1 link set eth0.10 down
442 ip -net ns1 link set eth0 down
443 ip -net ns1 link delete eth0.10 type vlan
445 # restore address in ns1 and nsr1
446 ip -net ns1 link set eth0 up
447 ip -net ns1 addr add 10.0.1.99/24 dev eth0
448 ip -net ns1 route add default via 10.0.1.1
449 ip -net ns1 addr add dead:1::99/64 dev eth0
450 ip -net ns1 route add default via dead:1::1
451 ip -net nsr1 addr add 10.0.1.1/24 dev veth0
452 ip -net nsr1 addr add dead:1::1/64 dev veth0
453 ip -net nsr1 link set up dev veth0
455 KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1)
456 KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1)
460 if [ $SPI1 -eq $SPI2 ]; then
473 ip -net $ns xfrm state add src $remote dst $me proto esp spi $spi_in enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $rnet dst $lnet
474 ip -net $ns xfrm state add src $me dst $remote proto esp spi $spi_out enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $lnet dst $rnet
476 # to encrypt packets as they go out (includes forwarded packets that need encapsulation)
477 ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 1 action allow
478 # to fwd decrypted packets after esp processing:
479 ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 1 action allow
483 do_esp nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2
485 do_esp nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1
487 ip netns exec nsr1 nft delete table ip nat
489 # restore default routes
490 ip -net ns2 route del 192.168.10.1 via 10.0.2.1
491 ip -net ns2 route add default via 10.0.2.1
492 ip -net ns2 route add default via dead:2::1
494 if test_tcp_forwarding ns1 ns2; then
495 echo "PASS: ipsec tunnel mode for ns1/ns2"
497 echo "FAIL: ipsec tunnel mode for ns1/ns2"
498 ip netns exec nsr1 nft list ruleset 1>&2
499 ip netns exec nsr1 cat /proc/net/xfrm_stat 1>&2