2 # SPDX-License-Identifier: GPL-2.0
4 # This tests basic flowtable functionality.
5 # Creates following default topology:
7 # Originator (MTU 9000) <-Router1-> MTU 1500 <-Router2-> Responder (MTU 2000)
8 # Router1 is the one doing flow offloading, Router2 has no special
9 # purpose other than having a link that is smaller than either Originator
10 # and responder, i.e. TCPMSS announced values are too large and will still
11 # result in fragmentation and/or PMTU discovery.
13 # You can check with different Orgininator/Link/Responder MTU eg:
14 # sh nft_flowtable.sh -o1000 -l500 -r100
18 # Kselftest framework requirement - SKIP code is 4.
27 log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
32 echo "SKIP: Could not $2"
37 checktool "nft --version" "run test without nft tool"
38 checktool "ip -Version" "run test without ip tool"
39 checktool "which nc" "run test without nc (netcat)"
40 checktool "ip netns add nsr1" "create net namespace"
53 rm -f "$ns1in" "$ns1out"
54 rm -f "$ns2in" "$ns2out"
56 [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns
61 sysctl -q net.netfilter.nf_log_all_netns=1
63 ip link add veth0 netns nsr1 type veth peer name eth0 netns ns1
64 ip link add veth1 netns nsr1 type veth peer name veth0 netns nsr2
66 ip link add veth1 netns nsr2 type veth peer name eth0 netns ns2
68 for dev in lo veth0 veth1; do
70 ip -net nsr$i link set $dev up
74 ip -net nsr1 addr add 10.0.1.1/24 dev veth0
75 ip -net nsr1 addr add dead:1::1/64 dev veth0
77 ip -net nsr2 addr add 10.0.2.1/24 dev veth1
78 ip -net nsr2 addr add dead:2::1/64 dev veth1
80 # set different MTUs so we need to push packets coming from ns1 (large MTU)
81 # to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1),
82 # or to do PTMU discovery (send ICMP error back to originator).
83 # ns2 is going via nsr2 with a smaller mtu, so that TCPMSS announced by both peers
84 # is NOT the lowest link mtu.
90 while getopts "o:l:r:" o
99 ip -net nsr1 link set veth0 mtu $omtu
100 ip -net ns1 link set eth0 mtu $omtu
102 ip -net nsr2 link set veth1 mtu $rmtu
103 ip -net ns2 link set eth0 mtu $rmtu
105 # transfer-net between nsr1 and nsr2.
106 # these addresses are not used for connections.
107 ip -net nsr1 addr add 192.168.10.1/24 dev veth1
108 ip -net nsr1 addr add fee1:2::1/64 dev veth1
110 ip -net nsr2 addr add 192.168.10.2/24 dev veth0
111 ip -net nsr2 addr add fee1:2::2/64 dev veth0
114 ip netns exec nsr$i sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
115 ip netns exec nsr$i sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
117 ip -net ns$i link set lo up
118 ip -net ns$i link set eth0 up
119 ip -net ns$i addr add 10.0.$i.99/24 dev eth0
120 ip -net ns$i route add default via 10.0.$i.1
121 ip -net ns$i addr add dead:$i::99/64 dev eth0
122 ip -net ns$i route add default via dead:$i::1
123 ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null
125 # don't set ip DF bit for first two tests
126 ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
129 ip -net nsr1 route add default via 192.168.10.2
130 ip -net nsr2 route add default via 192.168.10.1
132 ip netns exec nsr1 nft -f - <<EOF
135 hook ingress priority 0
136 devices = { veth0, veth1 }
140 type filter hook forward priority 0; policy drop;
142 # flow offloaded? Tag ct with mark 1, so we can detect when it fails.
143 meta oif "veth1" tcp dport 12345 flow offload @f1 counter
145 # use packet size to trigger 'should be offloaded by now'.
146 # otherwise, if 'flow offload' expression never offloads, the
148 tcp dport 12345 meta length gt 200 ct mark set 1 counter
150 # this turns off flow offloading internally, so expect packets again
151 tcp flags fin,rst ct mark set 0 accept
153 # this allows large packets from responder, we need this as long
155 # This rule is deleted for the last test, when we expect PMTUd
156 # to kick in and ensure all packets meet mtu requirements.
157 meta length gt $lmtu accept comment something-to-grep-for
159 # next line blocks connection w.o. working offload.
160 # we only do this for reverse dir, because we expect packets to
161 # enter slow path due to MTU mismatch of veth0 and veth1.
162 tcp sport 12345 ct mark 1 counter log prefix "mark failure " drop
164 ct state established,related accept
166 # for packets that we can't offload yet, i.e. SYN (any ct that is not confirmed)
167 meta length lt 200 oif "veth1" tcp dport 12345 counter accept
169 meta nfproto ipv4 meta l4proto icmp accept
170 meta nfproto ipv6 meta l4proto icmpv6 accept
175 if [ $? -ne 0 ]; then
176 echo "SKIP: Could not load nft ruleset"
180 # test basic connectivity
181 ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null
183 echo "ERROR: ns1 cannot reach ns2" 1>&2
188 ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null
190 echo "ERROR: ns2 cannot reach ns1" 1>&2
194 if [ $ret -eq 0 ];then
195 echo "PASS: netns routing/connectivity: ns1 can reach ns2"
208 SIZE=$((RANDOM % (1024 * 8)))
209 TSIZE=$((SIZE * 1024))
211 dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null
213 SIZE=$((RANDOM % 1024))
215 TSIZE=$((TSIZE + SIZE))
216 dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null
225 cmp "$in" "$out" > /dev/null 2>&1
226 if [ $? -ne 0 ] ;then
227 echo "FAIL: file mismatch for $what" 1>&2
236 test_tcp_forwarding_ip()
244 ip netns exec $nsb nc -w 5 -l -p 12345 < "$ns2in" > "$ns2out" &
248 ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$ns1in" > "$ns1out" &
253 if ps -p $lpid > /dev/null;then
257 if ps -p $cpid > /dev/null;then
263 check_transfer "$ns1in" "$ns2out" "ns1 -> ns2"
268 check_transfer "$ns2in" "$ns1out" "ns1 <- ns2"
276 test_tcp_forwarding()
278 test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
283 test_tcp_forwarding_nat()
287 test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
290 if [ $lret -eq 0 ] ; then
291 test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666
298 make_file "$ns1in" "ns1"
299 make_file "$ns2in" "ns2"
302 # No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed.
303 test_tcp_forwarding ns1 ns2
304 if [ $? -eq 0 ] ;then
305 echo "PASS: flow offloaded for ns1/ns2"
307 echo "FAIL: flow offload for ns1/ns2:" 1>&2
308 ip netns exec nsr1 nft list ruleset
312 # delete default route, i.e. ns2 won't be able to reach ns1 and
313 # will depend on ns1 being masqueraded in nsr1.
314 # expect ns1 has nsr1 address.
315 ip -net ns2 route del default via 10.0.2.1
316 ip -net ns2 route del default via dead:2::1
317 ip -net ns2 route add 192.168.10.1 via 10.0.2.1
320 # Same, but with NAT enabled.
321 ip netns exec nsr1 nft -f - <<EOF
324 type nat hook prerouting priority 0; policy accept;
325 meta iif "veth0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345
329 type nat hook postrouting priority 0; policy accept;
330 meta oifname "veth1" counter masquerade
335 test_tcp_forwarding_nat ns1 ns2
337 if [ $? -eq 0 ] ;then
338 echo "PASS: flow offloaded for ns1/ns2 with NAT"
340 echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2
341 ip netns exec nsr1 nft list ruleset
346 # Same as second test, but with PMTU discovery enabled.
347 handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2)
349 ip netns exec nsr1 nft delete rule inet filter forward $handle
350 if [ $? -ne 0 ] ;then
351 echo "FAIL: Could not delete large-packet accept rule"
355 ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
356 ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
358 test_tcp_forwarding_nat ns1 ns2
359 if [ $? -eq 0 ] ;then
360 echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery"
362 echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
363 ip netns exec nsr1 nft list ruleset
366 KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1)
367 KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1)
371 if [ $SPI1 -eq $SPI2 ]; then
384 ip -net $ns xfrm state add src $remote dst $me proto esp spi $spi_in enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $rnet dst $lnet
385 ip -net $ns xfrm state add src $me dst $remote proto esp spi $spi_out enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $lnet dst $rnet
387 # to encrypt packets as they go out (includes forwarded packets that need encapsulation)
388 ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 1 action allow
389 # to fwd decrypted packets after esp processing:
390 ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 1 action allow
394 do_esp nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2
396 do_esp nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1
398 ip netns exec nsr1 nft delete table ip nat
400 # restore default routes
401 ip -net ns2 route del 192.168.10.1 via 10.0.2.1
402 ip -net ns2 route add default via 10.0.2.1
403 ip -net ns2 route add default via dead:2::1
405 test_tcp_forwarding ns1 ns2
406 if [ $? -eq 0 ] ;then
407 echo "PASS: ipsec tunnel mode for ns1/ns2"
409 echo "FAIL: ipsec tunnel mode for ns1/ns2"
410 ip netns exec nsr1 nft list ruleset 1>&2
411 ip netns exec nsr1 cat /proc/net/xfrm_stat 1>&2