2 # SPDX-License-Identifier: GPL-2.0
4 # This tests basic flowtable functionality.
5 # Creates following default topology:
7 # Originator (MTU 9000) <-Router1-> MTU 1500 <-Router2-> Responder (MTU 2000)
8 # Router1 is the one doing flow offloading, Router2 has no special
9 # purpose other than having a link that is smaller than either Originator
10 # and responder, i.e. TCPMSS announced values are too large and will still
11 # result in fragmentation and/or PMTU discovery.
13 # You can check with different Orgininator/Link/Responder MTU eg:
14 # nft_flowtable.sh -o8000 -l1500 -r2000
18 # Kselftest framework requirement - SKIP code is 4.
27 log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
32 echo "SKIP: Could not $2"
37 checktool "nft --version" "run test without nft tool"
38 checktool "ip -Version" "run test without ip tool"
39 checktool "which nc" "run test without nc (netcat)"
40 checktool "ip netns add nsr1" "create net namespace"
53 rm -f "$ns1in" "$ns1out"
54 rm -f "$ns2in" "$ns2out"
56 [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns
61 sysctl -q net.netfilter.nf_log_all_netns=1
63 ip link add veth0 netns nsr1 type veth peer name eth0 netns ns1
64 ip link add veth1 netns nsr1 type veth peer name veth0 netns nsr2
66 ip link add veth1 netns nsr2 type veth peer name eth0 netns ns2
68 for dev in lo veth0 veth1; do
70 ip -net nsr$i link set $dev up
74 ip -net nsr1 addr add 10.0.1.1/24 dev veth0
75 ip -net nsr1 addr add dead:1::1/64 dev veth0
77 ip -net nsr2 addr add 10.0.2.1/24 dev veth1
78 ip -net nsr2 addr add dead:2::1/64 dev veth1
80 # set different MTUs so we need to push packets coming from ns1 (large MTU)
81 # to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1),
82 # or to do PTMU discovery (send ICMP error back to originator).
83 # ns2 is going via nsr2 with a smaller mtu, so that TCPMSS announced by both peers
84 # is NOT the lowest link mtu.
90 while getopts "o:l:r:" o
99 if ! ip -net nsr1 link set veth0 mtu $omtu; then
103 ip -net ns1 link set eth0 mtu $omtu
105 if ! ip -net nsr2 link set veth1 mtu $rmtu; then
109 ip -net ns2 link set eth0 mtu $rmtu
111 # transfer-net between nsr1 and nsr2.
112 # these addresses are not used for connections.
113 ip -net nsr1 addr add 192.168.10.1/24 dev veth1
114 ip -net nsr1 addr add fee1:2::1/64 dev veth1
116 ip -net nsr2 addr add 192.168.10.2/24 dev veth0
117 ip -net nsr2 addr add fee1:2::2/64 dev veth0
120 ip netns exec nsr$i sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
121 ip netns exec nsr$i sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
123 ip -net ns$i link set lo up
124 ip -net ns$i link set eth0 up
125 ip -net ns$i addr add 10.0.$i.99/24 dev eth0
126 ip -net ns$i route add default via 10.0.$i.1
127 ip -net ns$i addr add dead:$i::99/64 dev eth0
128 ip -net ns$i route add default via dead:$i::1
129 if ! ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then
130 echo "ERROR: Check Originator/Responder values (problem during address addition)"
134 # don't set ip DF bit for first two tests
135 ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
138 ip -net nsr1 route add default via 192.168.10.2
139 ip -net nsr2 route add default via 192.168.10.1
141 ip netns exec nsr1 nft -f - <<EOF
144 hook ingress priority 0
145 devices = { veth0, veth1 }
149 type filter hook forward priority 0; policy drop;
151 # flow offloaded? Tag ct with mark 1, so we can detect when it fails.
152 meta oif "veth1" tcp dport 12345 flow offload @f1 counter
154 # use packet size to trigger 'should be offloaded by now'.
155 # otherwise, if 'flow offload' expression never offloads, the
157 tcp dport 12345 meta length gt 200 ct mark set 1 counter
159 # this turns off flow offloading internally, so expect packets again
160 tcp flags fin,rst ct mark set 0 accept
162 # this allows large packets from responder, we need this as long
164 # This rule is deleted for the last test, when we expect PMTUd
165 # to kick in and ensure all packets meet mtu requirements.
166 meta length gt $lmtu accept comment something-to-grep-for
168 # next line blocks connection w.o. working offload.
169 # we only do this for reverse dir, because we expect packets to
170 # enter slow path due to MTU mismatch of veth0 and veth1.
171 tcp sport 12345 ct mark 1 counter log prefix "mark failure " drop
173 ct state established,related accept
175 # for packets that we can't offload yet, i.e. SYN (any ct that is not confirmed)
176 meta length lt 200 oif "veth1" tcp dport 12345 counter accept
178 meta nfproto ipv4 meta l4proto icmp accept
179 meta nfproto ipv6 meta l4proto icmpv6 accept
184 if [ $? -ne 0 ]; then
185 echo "SKIP: Could not load nft ruleset"
189 # test basic connectivity
190 ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null
192 echo "ERROR: ns1 cannot reach ns2" 1>&2
197 ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null
199 echo "ERROR: ns2 cannot reach ns1" 1>&2
203 if [ $ret -eq 0 ];then
204 echo "PASS: netns routing/connectivity: ns1 can reach ns2"
216 SIZE=$((RANDOM % (1024 * 8)))
217 TSIZE=$((SIZE * 1024))
219 dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null
221 SIZE=$((RANDOM % 1024))
223 TSIZE=$((TSIZE + SIZE))
224 dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null
233 cmp "$in" "$out" > /dev/null 2>&1
234 if [ $? -ne 0 ] ;then
235 echo "FAIL: file mismatch for $what" 1>&2
244 test_tcp_forwarding_ip()
252 ip netns exec $nsb nc -w 5 -l -p 12345 < "$ns2in" > "$ns2out" &
256 ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$ns1in" > "$ns1out" &
261 if ps -p $lpid > /dev/null;then
265 if ps -p $cpid > /dev/null;then
271 check_transfer "$ns1in" "$ns2out" "ns1 -> ns2"
276 check_transfer "$ns2in" "$ns1out" "ns1 <- ns2"
284 test_tcp_forwarding()
286 test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
291 test_tcp_forwarding_nat()
295 test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
298 if [ $lret -eq 0 ] ; then
299 test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666
310 # No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed.
311 test_tcp_forwarding ns1 ns2
312 if [ $? -eq 0 ] ;then
313 echo "PASS: flow offloaded for ns1/ns2"
315 echo "FAIL: flow offload for ns1/ns2:" 1>&2
316 ip netns exec nsr1 nft list ruleset
320 # delete default route, i.e. ns2 won't be able to reach ns1 and
321 # will depend on ns1 being masqueraded in nsr1.
322 # expect ns1 has nsr1 address.
323 ip -net ns2 route del default via 10.0.2.1
324 ip -net ns2 route del default via dead:2::1
325 ip -net ns2 route add 192.168.10.1 via 10.0.2.1
328 # Same, but with NAT enabled.
329 ip netns exec nsr1 nft -f - <<EOF
332 type nat hook prerouting priority 0; policy accept;
333 meta iif "veth0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345
337 type nat hook postrouting priority 0; policy accept;
338 meta oifname "veth1" counter masquerade
343 test_tcp_forwarding_nat ns1 ns2
345 if [ $? -eq 0 ] ;then
346 echo "PASS: flow offloaded for ns1/ns2 with NAT"
348 echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2
349 ip netns exec nsr1 nft list ruleset
354 # Same as second test, but with PMTU discovery enabled.
355 handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2)
357 ip netns exec nsr1 nft delete rule inet filter forward $handle
358 if [ $? -ne 0 ] ;then
359 echo "FAIL: Could not delete large-packet accept rule"
363 ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
364 ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
366 test_tcp_forwarding_nat ns1 ns2
367 if [ $? -eq 0 ] ;then
368 echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery"
370 echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
371 ip netns exec nsr1 nft list ruleset
374 KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1)
375 KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1)
379 if [ $SPI1 -eq $SPI2 ]; then
392 ip -net $ns xfrm state add src $remote dst $me proto esp spi $spi_in enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $rnet dst $lnet
393 ip -net $ns xfrm state add src $me dst $remote proto esp spi $spi_out enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $lnet dst $rnet
395 # to encrypt packets as they go out (includes forwarded packets that need encapsulation)
396 ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 1 action allow
397 # to fwd decrypted packets after esp processing:
398 ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 1 action allow
402 do_esp nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2
404 do_esp nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1
406 ip netns exec nsr1 nft delete table ip nat
408 # restore default routes
409 ip -net ns2 route del 192.168.10.1 via 10.0.2.1
410 ip -net ns2 route add default via 10.0.2.1
411 ip -net ns2 route add default via dead:2::1
413 test_tcp_forwarding ns1 ns2
414 if [ $? -eq 0 ] ;then
415 echo "PASS: ipsec tunnel mode for ns1/ns2"
417 echo "FAIL: ipsec tunnel mode for ns1/ns2"
418 ip netns exec nsr1 nft list ruleset 1>&2
419 ip netns exec nsr1 cat /proc/net/xfrm_stat 1>&2