tools/testing/selftests/netfilter/nft_flowtable.sh

   1 #!/bin/bash
   2 # SPDX-License-Identifier: GPL-2.0
   3 #
   4 # This tests basic flowtable functionality.
   5 # Creates following topology:
   6 #
   7 # Originator (MTU 9000) <-Router1-> MTU 1500 <-Router2-> Responder (MTU 2000)
   8 # Router1 is the one doing flow offloading, Router2 has no special
   9 # purpose other than having a link that is smaller than either Originator
  10 # and responder, i.e. TCPMSS announced values are too large and will still
  11 # result in fragmentation and/or PMTU discovery.
  12
  13 # Kselftest framework requirement - SKIP code is 4.
  14 ksft_skip=4
  15 ret=0
  16
  17 ns1in=""
  18 ns2in=""
  19 ns1out=""
  20 ns2out=""
  21
  22 log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
  23
  24 nft --version > /dev/null 2>&1
  25 if [ $? -ne 0 ];then
  26         echo "SKIP: Could not run test without nft tool"
  27         exit $ksft_skip
  28 fi
  29
  30 ip -Version > /dev/null 2>&1
  31 if [ $? -ne 0 ];then
  32         echo "SKIP: Could not run test without ip tool"
  33         exit $ksft_skip
  34 fi
  35
  36 which nc > /dev/null 2>&1
  37 if [ $? -ne 0 ];then
  38         echo "SKIP: Could not run test without nc (netcat)"
  39         exit $ksft_skip
  40 fi
  41
  42 ip netns add nsr1
  43 if [ $? -ne 0 ];then
  44         echo "SKIP: Could not create net namespace"
  45         exit $ksft_skip
  46 fi
  47
  48 ip netns add ns1
  49 ip netns add ns2
  50
  51 ip netns add nsr2
  52
  53 cleanup() {
  54         for i in 1 2; do
  55                 ip netns del ns$i
  56                 ip netns del nsr$i
  57         done
  58
  59         rm -f "$ns1in" "$ns1out"
  60         rm -f "$ns2in" "$ns2out"
  61
  62         [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns
  63 }
  64
  65 trap cleanup EXIT
  66
  67 sysctl -q net.netfilter.nf_log_all_netns=1
  68
  69 ip link add veth0 netns nsr1 type veth peer name eth0 netns ns1
  70 ip link add veth1 netns nsr1 type veth peer name veth0 netns nsr2
  71
  72 ip link add veth1 netns nsr2 type veth peer name eth0 netns ns2
  73
  74 for dev in lo veth0 veth1; do
  75   for i in 1 2; do
  76     ip -net nsr$i link set $dev up
  77   done
  78 done
  79
  80 ip -net nsr1 addr add 10.0.1.1/24 dev veth0
  81 ip -net nsr1 addr add dead:1::1/64 dev veth0
  82
  83 ip -net nsr2 addr add 10.0.2.1/24 dev veth1
  84 ip -net nsr2 addr add dead:2::1/64 dev veth1
  85
  86 # set different MTUs so we need to push packets coming from ns1 (large MTU)
  87 # to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1),
  88 # or to do PTMU discovery (send ICMP error back to originator).
  89 # ns2 is going via nsr2 with a smaller mtu, so that TCPMSS announced by both peers
  90 # is NOT the lowest link mtu.
  91
  92 ip -net nsr1 link set veth0 mtu 9000
  93 ip -net ns1 link set eth0 mtu 9000
  94
  95 ip -net nsr2 link set veth1 mtu 2000
  96 ip -net ns2 link set eth0 mtu 2000
  97
  98 # transfer-net between nsr1 and nsr2.
  99 # these addresses are not used for connections.
 100 ip -net nsr1 addr add 192.168.10.1/24 dev veth1
 101 ip -net nsr1 addr add fee1:2::1/64 dev veth1
 102
 103 ip -net nsr2 addr add 192.168.10.2/24 dev veth0
 104 ip -net nsr2 addr add fee1:2::2/64 dev veth0
 105
 106 for i in 1 2; do
 107   ip netns exec nsr$i sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
 108   ip netns exec nsr$i sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
 109
 110   ip -net ns$i link set lo up
 111   ip -net ns$i link set eth0 up
 112   ip -net ns$i addr add 10.0.$i.99/24 dev eth0
 113   ip -net ns$i route add default via 10.0.$i.1
 114   ip -net ns$i addr add dead:$i::99/64 dev eth0
 115   ip -net ns$i route add default via dead:$i::1
 116   ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null
 117
 118   # don't set ip DF bit for first two tests
 119   ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
 120 done
 121
 122 ip -net nsr1 route add default via 192.168.10.2
 123 ip -net nsr2 route add default via 192.168.10.1
 124
 125 ip netns exec nsr1 nft -f - <<EOF
 126 table inet filter {
 127   flowtable f1 {
 128      hook ingress priority 0
 129      devices = { veth0, veth1 }
 130    }
 131
 132    chain forward {
 133       type filter hook forward priority 0; policy drop;
 134
 135       # flow offloaded? Tag ct with mark 1, so we can detect when it fails.
 136       meta oif "veth1" tcp dport 12345 flow offload @f1 counter
 137
 138       # use packet size to trigger 'should be offloaded by now'.
 139       # otherwise, if 'flow offload' expression never offloads, the
 140       # test will pass.
 141       tcp dport 12345 meta length gt 200 ct mark set 1 counter
 142
 143       # this turns off flow offloading internally, so expect packets again
 144       tcp flags fin,rst ct mark set 0 accept
 145
 146       # this allows large packets from responder, we need this as long
 147       # as PMTUd is off.
 148       # This rule is deleted for the last test, when we expect PMTUd
 149       # to kick in and ensure all packets meet mtu requirements.
 150       meta length gt 1500 accept comment something-to-grep-for
 151
 152       # next line blocks connection w.o. working offload.
 153       # we only do this for reverse dir, because we expect packets to
 154       # enter slow path due to MTU mismatch of veth0 and veth1.
 155       tcp sport 12345 ct mark 1 counter log prefix "mark failure " drop
 156
 157       ct state established,related accept
 158
 159       # for packets that we can't offload yet, i.e. SYN (any ct that is not confirmed)
 160       meta length lt 200 oif "veth1" tcp dport 12345 counter accept
 161
 162       meta nfproto ipv4 meta l4proto icmp accept
 163       meta nfproto ipv6 meta l4proto icmpv6 accept
 164    }
 165 }
 166 EOF
 167
 168 if [ $? -ne 0 ]; then
 169         echo "SKIP: Could not load nft ruleset"
 170         exit $ksft_skip
 171 fi
 172
 173 # test basic connectivity
 174 ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null
 175 if [ $? -ne 0 ];then
 176   echo "ERROR: ns1 cannot reach ns2" 1>&2
 177   bash
 178   exit 1
 179 fi
 180
 181 ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null
 182 if [ $? -ne 0 ];then
 183   echo "ERROR: ns2 cannot reach ns1" 1>&2
 184   exit 1
 185 fi
 186
 187 if [ $ret -eq 0 ];then
 188         echo "PASS: netns routing/connectivity: ns1 can reach ns2"
 189 fi
 190
 191 ns1in=$(mktemp)
 192 ns1out=$(mktemp)
 193 ns2in=$(mktemp)
 194 ns2out=$(mktemp)
 195
 196 make_file()
 197 {
 198         name=$1
 199         who=$2
 200
 201         SIZE=$((RANDOM % (1024 * 8)))
 202         TSIZE=$((SIZE * 1024))
 203
 204         dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null
 205
 206         SIZE=$((RANDOM % 1024))
 207         SIZE=$((SIZE + 128))
 208         TSIZE=$((TSIZE + SIZE))
 209         dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null
 210 }
 211
 212 check_transfer()
 213 {
 214         in=$1
 215         out=$2
 216         what=$3
 217
 218         cmp "$in" "$out" > /dev/null 2>&1
 219         if [ $? -ne 0 ] ;then
 220                 echo "FAIL: file mismatch for $what" 1>&2
 221                 ls -l "$in"
 222                 ls -l "$out"
 223                 return 1
 224         fi
 225
 226         return 0
 227 }
 228
 229 test_tcp_forwarding()
 230 {
 231         local nsa=$1
 232         local nsb=$2
 233         local lret=0
 234
 235         ip netns exec $nsb nc -w 5 -l -p 12345 < "$ns2in" > "$ns2out" &
 236         lpid=$!
 237
 238         sleep 1
 239         ip netns exec $nsa nc -w 4 10.0.2.99 12345 < "$ns1in" > "$ns1out" &
 240         cpid=$!
 241
 242         sleep 3
 243
 244         kill $lpid
 245         kill $cpid
 246         wait
 247
 248         check_transfer "$ns1in" "$ns2out" "ns1 -> ns2"
 249         if [ $? -ne 0 ];then
 250                 lret=1
 251         fi
 252
 253         check_transfer "$ns2in" "$ns1out" "ns1 <- ns2"
 254         if [ $? -ne 0 ];then
 255                 lret=1
 256         fi
 257
 258         return $lret
 259 }
 260
 261 make_file "$ns1in" "ns1"
 262 make_file "$ns2in" "ns2"
 263
 264 # First test:
 265 # No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed.
 266 test_tcp_forwarding ns1 ns2
 267 if [ $? -eq 0 ] ;then
 268         echo "PASS: flow offloaded for ns1/ns2"
 269 else
 270         echo "FAIL: flow offload for ns1/ns2:" 1>&2
 271         ip netns exec nsr1 nft list ruleset
 272         ret=1
 273 fi
 274
 275 # delete default route, i.e. ns2 won't be able to reach ns1 and
 276 # will depend on ns1 being masqueraded in nsr1.
 277 # expect ns1 has nsr1 address.
 278 ip -net ns2 route del default via 10.0.2.1
 279 ip -net ns2 route del default via dead:2::1
 280 ip -net ns2 route add 192.168.10.1 via 10.0.2.1
 281
 282 # Second test:
 283 # Same, but with NAT enabled.
 284 ip netns exec nsr1 nft -f - <<EOF
 285 table ip nat {
 286    chain postrouting {
 287       type nat hook postrouting priority 0; policy accept;
 288       meta oifname "veth1" masquerade
 289    }
 290 }
 291 EOF
 292
 293 test_tcp_forwarding ns1 ns2
 294
 295 if [ $? -eq 0 ] ;then
 296         echo "PASS: flow offloaded for ns1/ns2 with NAT"
 297 else
 298         echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2
 299         ip netns exec nsr1 nft list ruleset
 300         ret=1
 301 fi
 302
 303 # Third test:
 304 # Same as second test, but with PMTU discovery enabled.
 305 handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2)
 306
 307 ip netns exec nsr1 nft delete rule inet filter forward $handle
 308 if [ $? -ne 0 ] ;then
 309         echo "FAIL: Could not delete large-packet accept rule"
 310         exit 1
 311 fi
 312
 313 ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
 314 ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
 315
 316 test_tcp_forwarding ns1 ns2
 317 if [ $? -eq 0 ] ;then
 318         echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery"
 319 else
 320         echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
 321         ip netns exec nsr1 nft list ruleset
 322 fi
 323
 324 KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1)
 325 KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1)
 326 SPI1=$RANDOM
 327 SPI2=$RANDOM
 328
 329 if [ $SPI1 -eq $SPI2 ]; then
 330         SPI2=$((SPI2+1))
 331 fi
 332
 333 do_esp() {
 334     local ns=$1
 335     local me=$2
 336     local remote=$3
 337     local lnet=$4
 338     local rnet=$5
 339     local spi_out=$6
 340     local spi_in=$7
 341
 342     ip -net $ns xfrm state add src $remote dst $me proto esp spi $spi_in  enc aes $KEY_AES  auth sha1 $KEY_SHA mode tunnel sel src $rnet dst $lnet
 343     ip -net $ns xfrm state add src $me  dst $remote proto esp spi $spi_out enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $lnet dst $rnet
 344
 345     # to encrypt packets as they go out (includes forwarded packets that need encapsulation)
 346     ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 1 action allow
 347     # to fwd decrypted packets after esp processing:
 348     ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 1 action allow
 349
 350 }
 351
 352 do_esp nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2
 353
 354 do_esp nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1
 355
 356 ip netns exec nsr1 nft delete table ip nat
 357
 358 # restore default routes
 359 ip -net ns2 route del 192.168.10.1 via 10.0.2.1
 360 ip -net ns2 route add default via 10.0.2.1
 361 ip -net ns2 route add default via dead:2::1
 362
 363 test_tcp_forwarding ns1 ns2
 364 if [ $? -eq 0 ] ;then
 365         echo "PASS: ipsec tunnel mode for ns1/ns2"
 366 else
 367         echo "FAIL: ipsec tunnel mode for ns1/ns2"
 368         ip netns exec nsr1 nft list ruleset 1>&2
 369         ip netns exec nsr1 cat /proc/net/xfrm_stat 1>&2
 370 fi
 371
 372 exit $ret