2 # SPDX-License-Identifier: GPL-2.0
4 # This tests basic flowtable functionality.
5 # Creates following topology:
7 # Originator (MTU 9000) <-Router1-> MTU 1500 <-Router2-> Responder (MTU 2000)
8 # Router1 is the one doing flow offloading, Router2 has no special
9 # purpose other than having a link that is smaller than either Originator
10 # and responder, i.e. TCPMSS announced values are too large and will still
11 # result in fragmentation and/or PMTU discovery.
13 # Kselftest framework requirement - SKIP code is 4.
22 log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
24 nft --version > /dev/null 2>&1
26 echo "SKIP: Could not run test without nft tool"
30 ip -Version > /dev/null 2>&1
32 echo "SKIP: Could not run test without ip tool"
36 which nc > /dev/null 2>&1
38 echo "SKIP: Could not run test without nc (netcat)"
44 echo "SKIP: Could not create net namespace"
59 rm -f "$ns1in" "$ns1out"
60 rm -f "$ns2in" "$ns2out"
62 [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns
67 sysctl -q net.netfilter.nf_log_all_netns=1
69 ip link add veth0 netns nsr1 type veth peer name eth0 netns ns1
70 ip link add veth1 netns nsr1 type veth peer name veth0 netns nsr2
72 ip link add veth1 netns nsr2 type veth peer name eth0 netns ns2
74 for dev in lo veth0 veth1; do
76 ip -net nsr$i link set $dev up
80 ip -net nsr1 addr add 10.0.1.1/24 dev veth0
81 ip -net nsr1 addr add dead:1::1/64 dev veth0
83 ip -net nsr2 addr add 10.0.2.1/24 dev veth1
84 ip -net nsr2 addr add dead:2::1/64 dev veth1
86 # set different MTUs so we need to push packets coming from ns1 (large MTU)
87 # to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1),
88 # or to do PTMU discovery (send ICMP error back to originator).
89 # ns2 is going via nsr2 with a smaller mtu, so that TCPMSS announced by both peers
90 # is NOT the lowest link mtu.
92 ip -net nsr1 link set veth0 mtu 9000
93 ip -net ns1 link set eth0 mtu 9000
95 ip -net nsr2 link set veth1 mtu 2000
96 ip -net ns2 link set eth0 mtu 2000
98 # transfer-net between nsr1 and nsr2.
99 # these addresses are not used for connections.
100 ip -net nsr1 addr add 192.168.10.1/24 dev veth1
101 ip -net nsr1 addr add fee1:2::1/64 dev veth1
103 ip -net nsr2 addr add 192.168.10.2/24 dev veth0
104 ip -net nsr2 addr add fee1:2::2/64 dev veth0
107 ip netns exec nsr$i sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
108 ip netns exec nsr$i sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
110 ip -net ns$i link set lo up
111 ip -net ns$i link set eth0 up
112 ip -net ns$i addr add 10.0.$i.99/24 dev eth0
113 ip -net ns$i route add default via 10.0.$i.1
114 ip -net ns$i addr add dead:$i::99/64 dev eth0
115 ip -net ns$i route add default via dead:$i::1
116 ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null
118 # don't set ip DF bit for first two tests
119 ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
122 ip -net nsr1 route add default via 192.168.10.2
123 ip -net nsr2 route add default via 192.168.10.1
125 ip netns exec nsr1 nft -f - <<EOF
128 hook ingress priority 0
129 devices = { veth0, veth1 }
133 type filter hook forward priority 0; policy drop;
135 # flow offloaded? Tag ct with mark 1, so we can detect when it fails.
136 meta oif "veth1" tcp dport 12345 flow offload @f1 counter
138 # use packet size to trigger 'should be offloaded by now'.
139 # otherwise, if 'flow offload' expression never offloads, the
141 tcp dport 12345 meta length gt 200 ct mark set 1 counter
143 # this turns off flow offloading internally, so expect packets again
144 tcp flags fin,rst ct mark set 0 accept
146 # this allows large packets from responder, we need this as long
148 # This rule is deleted for the last test, when we expect PMTUd
149 # to kick in and ensure all packets meet mtu requirements.
150 meta length gt 1500 accept comment something-to-grep-for
152 # next line blocks connection w.o. working offload.
153 # we only do this for reverse dir, because we expect packets to
154 # enter slow path due to MTU mismatch of veth0 and veth1.
155 tcp sport 12345 ct mark 1 counter log prefix "mark failure " drop
157 ct state established,related accept
159 # for packets that we can't offload yet, i.e. SYN (any ct that is not confirmed)
160 meta length lt 200 oif "veth1" tcp dport 12345 counter accept
162 meta nfproto ipv4 meta l4proto icmp accept
163 meta nfproto ipv6 meta l4proto icmpv6 accept
168 if [ $? -ne 0 ]; then
169 echo "SKIP: Could not load nft ruleset"
173 # test basic connectivity
174 ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null
176 echo "ERROR: ns1 cannot reach ns2" 1>&2
181 ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null
183 echo "ERROR: ns2 cannot reach ns1" 1>&2
187 if [ $ret -eq 0 ];then
188 echo "PASS: netns routing/connectivity: ns1 can reach ns2"
201 SIZE=$((RANDOM % (1024 * 8)))
202 TSIZE=$((SIZE * 1024))
204 dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null
206 SIZE=$((RANDOM % 1024))
208 TSIZE=$((TSIZE + SIZE))
209 dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null
218 cmp "$in" "$out" > /dev/null 2>&1
219 if [ $? -ne 0 ] ;then
220 echo "FAIL: file mismatch for $what" 1>&2
229 test_tcp_forwarding()
235 ip netns exec $nsb nc -w 5 -l -p 12345 < "$ns2in" > "$ns2out" &
239 ip netns exec $nsa nc -w 4 10.0.2.99 12345 < "$ns1in" > "$ns1out" &
248 check_transfer "$ns1in" "$ns2out" "ns1 -> ns2"
253 check_transfer "$ns2in" "$ns1out" "ns1 <- ns2"
261 make_file "$ns1in" "ns1"
262 make_file "$ns2in" "ns2"
265 # No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed.
266 test_tcp_forwarding ns1 ns2
267 if [ $? -eq 0 ] ;then
268 echo "PASS: flow offloaded for ns1/ns2"
270 echo "FAIL: flow offload for ns1/ns2:" 1>&2
271 ip netns exec nsr1 nft list ruleset
275 # delete default route, i.e. ns2 won't be able to reach ns1 and
276 # will depend on ns1 being masqueraded in nsr1.
277 # expect ns1 has nsr1 address.
278 ip -net ns2 route del default via 10.0.2.1
279 ip -net ns2 route del default via dead:2::1
280 ip -net ns2 route add 192.168.10.1 via 10.0.2.1
283 # Same, but with NAT enabled.
284 ip netns exec nsr1 nft -f - <<EOF
287 type nat hook postrouting priority 0; policy accept;
288 meta oifname "veth1" masquerade
293 test_tcp_forwarding ns1 ns2
295 if [ $? -eq 0 ] ;then
296 echo "PASS: flow offloaded for ns1/ns2 with NAT"
298 echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2
299 ip netns exec nsr1 nft list ruleset
304 # Same as second test, but with PMTU discovery enabled.
305 handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2)
307 ip netns exec nsr1 nft delete rule inet filter forward $handle
308 if [ $? -ne 0 ] ;then
309 echo "FAIL: Could not delete large-packet accept rule"
313 ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
314 ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
316 test_tcp_forwarding ns1 ns2
317 if [ $? -eq 0 ] ;then
318 echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery"
320 echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
321 ip netns exec nsr1 nft list ruleset
324 KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1)
325 KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1)
329 if [ $SPI1 -eq $SPI2 ]; then
342 ip -net $ns xfrm state add src $remote dst $me proto esp spi $spi_in enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $rnet dst $lnet
343 ip -net $ns xfrm state add src $me dst $remote proto esp spi $spi_out enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $lnet dst $rnet
345 # to encrypt packets as they go out (includes forwarded packets that need encapsulation)
346 ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 1 action allow
347 # to fwd decrypted packets after esp processing:
348 ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 1 action allow
352 do_esp nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2
354 do_esp nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1
356 ip netns exec nsr1 nft delete table ip nat
358 # restore default routes
359 ip -net ns2 route del 192.168.10.1 via 10.0.2.1
360 ip -net ns2 route add default via 10.0.2.1
361 ip -net ns2 route add default via dead:2::1
363 test_tcp_forwarding ns1 ns2
364 if [ $? -eq 0 ] ;then
365 echo "PASS: ipsec tunnel mode for ns1/ns2"
367 echo "FAIL: ipsec tunnel mode for ns1/ns2"
368 ip netns exec nsr1 nft list ruleset 1>&2
369 ip netns exec nsr1 cat /proc/net/xfrm_stat 1>&2