tools/testing/selftests/netfilter/nft_flowtable.sh

   1 #!/bin/bash
   2 # SPDX-License-Identifier: GPL-2.0
   3 #
   4 # This tests basic flowtable functionality.
   5 # Creates following default topology:
   6 #
   7 # Originator (MTU 9000) <-Router1-> MTU 1500 <-Router2-> Responder (MTU 2000)
   8 # Router1 is the one doing flow offloading, Router2 has no special
   9 # purpose other than having a link that is smaller than either Originator
  10 # and responder, i.e. TCPMSS announced values are too large and will still
  11 # result in fragmentation and/or PMTU discovery.
  12 #
  13 # You can check with different Orgininator/Link/Responder MTU eg:
  14 # sh nft_flowtable.sh -o1000 -l500 -r100
  15 #
  16
  17
  18 # Kselftest framework requirement - SKIP code is 4.
  19 ksft_skip=4
  20 ret=0
  21
  22 ns1in=""
  23 ns2in=""
  24 ns1out=""
  25 ns2out=""
  26
  27 log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
  28
  29 checktool (){
  30         $1 > /dev/null 2>&1
  31         if [ $? -ne 0 ];then
  32                 echo "SKIP: Could not $2"
  33                 exit $ksft_skip
  34         fi
  35 }
  36
  37 checktool "nft --version" "run test without nft tool"
  38 checktool "ip -Version" "run test without ip tool"
  39 checktool "which nc" "run test without nc (netcat)"
  40 checktool "ip netns add nsr1" "create net namespace"
  41
  42 ip netns add ns1
  43 ip netns add ns2
  44
  45 ip netns add nsr2
  46
  47 cleanup() {
  48         for i in 1 2; do
  49                 ip netns del ns$i
  50                 ip netns del nsr$i
  51         done
  52
  53         rm -f "$ns1in" "$ns1out"
  54         rm -f "$ns2in" "$ns2out"
  55
  56         [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns
  57 }
  58
  59 trap cleanup EXIT
  60
  61 sysctl -q net.netfilter.nf_log_all_netns=1
  62
  63 ip link add veth0 netns nsr1 type veth peer name eth0 netns ns1
  64 ip link add veth1 netns nsr1 type veth peer name veth0 netns nsr2
  65
  66 ip link add veth1 netns nsr2 type veth peer name eth0 netns ns2
  67
  68 for dev in lo veth0 veth1; do
  69   for i in 1 2; do
  70     ip -net nsr$i link set $dev up
  71   done
  72 done
  73
  74 ip -net nsr1 addr add 10.0.1.1/24 dev veth0
  75 ip -net nsr1 addr add dead:1::1/64 dev veth0
  76
  77 ip -net nsr2 addr add 10.0.2.1/24 dev veth1
  78 ip -net nsr2 addr add dead:2::1/64 dev veth1
  79
  80 # set different MTUs so we need to push packets coming from ns1 (large MTU)
  81 # to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1),
  82 # or to do PTMU discovery (send ICMP error back to originator).
  83 # ns2 is going via nsr2 with a smaller mtu, so that TCPMSS announced by both peers
  84 # is NOT the lowest link mtu.
  85
  86 omtu=9000
  87 lmtu=1500
  88 rmtu=2000
  89
  90 while getopts "o:l:r:" o
  91 do
  92         case $o in
  93                 o) omtu=$OPTARG;;
  94                 l) lmtu=$OPTARG;;
  95                 r) rmtu=$OPTARG;;
  96         esac
  97 done
  98
  99 ip -net nsr1 link set veth0 mtu $omtu
 100 ip -net ns1 link set eth0 mtu $omtu
 101
 102 ip -net nsr2 link set veth1 mtu $rmtu
 103 ip -net ns2 link set eth0 mtu $rmtu
 104
 105 # transfer-net between nsr1 and nsr2.
 106 # these addresses are not used for connections.
 107 ip -net nsr1 addr add 192.168.10.1/24 dev veth1
 108 ip -net nsr1 addr add fee1:2::1/64 dev veth1
 109
 110 ip -net nsr2 addr add 192.168.10.2/24 dev veth0
 111 ip -net nsr2 addr add fee1:2::2/64 dev veth0
 112
 113 for i in 1 2; do
 114   ip netns exec nsr$i sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
 115   ip netns exec nsr$i sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
 116
 117   ip -net ns$i link set lo up
 118   ip -net ns$i link set eth0 up
 119   ip -net ns$i addr add 10.0.$i.99/24 dev eth0
 120   ip -net ns$i route add default via 10.0.$i.1
 121   ip -net ns$i addr add dead:$i::99/64 dev eth0
 122   ip -net ns$i route add default via dead:$i::1
 123   ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null
 124
 125   # don't set ip DF bit for first two tests
 126   ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
 127 done
 128
 129 ip -net nsr1 route add default via 192.168.10.2
 130 ip -net nsr2 route add default via 192.168.10.1
 131
 132 ip netns exec nsr1 nft -f - <<EOF
 133 table inet filter {
 134   flowtable f1 {
 135      hook ingress priority 0
 136      devices = { veth0, veth1 }
 137    }
 138
 139    chain forward {
 140       type filter hook forward priority 0; policy drop;
 141
 142       # flow offloaded? Tag ct with mark 1, so we can detect when it fails.
 143       meta oif "veth1" tcp dport 12345 flow offload @f1 counter
 144
 145       # use packet size to trigger 'should be offloaded by now'.
 146       # otherwise, if 'flow offload' expression never offloads, the
 147       # test will pass.
 148       tcp dport 12345 meta length gt 200 ct mark set 1 counter
 149
 150       # this turns off flow offloading internally, so expect packets again
 151       tcp flags fin,rst ct mark set 0 accept
 152
 153       # this allows large packets from responder, we need this as long
 154       # as PMTUd is off.
 155       # This rule is deleted for the last test, when we expect PMTUd
 156       # to kick in and ensure all packets meet mtu requirements.
 157       meta length gt $lmtu accept comment something-to-grep-for
 158
 159       # next line blocks connection w.o. working offload.
 160       # we only do this for reverse dir, because we expect packets to
 161       # enter slow path due to MTU mismatch of veth0 and veth1.
 162       tcp sport 12345 ct mark 1 counter log prefix "mark failure " drop
 163
 164       ct state established,related accept
 165
 166       # for packets that we can't offload yet, i.e. SYN (any ct that is not confirmed)
 167       meta length lt 200 oif "veth1" tcp dport 12345 counter accept
 168
 169       meta nfproto ipv4 meta l4proto icmp accept
 170       meta nfproto ipv6 meta l4proto icmpv6 accept
 171    }
 172 }
 173 EOF
 174
 175 if [ $? -ne 0 ]; then
 176         echo "SKIP: Could not load nft ruleset"
 177         exit $ksft_skip
 178 fi
 179
 180 # test basic connectivity
 181 ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null
 182 if [ $? -ne 0 ];then
 183   echo "ERROR: ns1 cannot reach ns2" 1>&2
 184   bash
 185   exit 1
 186 fi
 187
 188 ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null
 189 if [ $? -ne 0 ];then
 190   echo "ERROR: ns2 cannot reach ns1" 1>&2
 191   exit 1
 192 fi
 193
 194 if [ $ret -eq 0 ];then
 195         echo "PASS: netns routing/connectivity: ns1 can reach ns2"
 196 fi
 197
 198 ns1in=$(mktemp)
 199 ns1out=$(mktemp)
 200 ns2in=$(mktemp)
 201 ns2out=$(mktemp)
 202
 203 make_file()
 204 {
 205         name=$1
 206         who=$2
 207
 208         SIZE=$((RANDOM % (1024 * 8)))
 209         TSIZE=$((SIZE * 1024))
 210
 211         dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null
 212
 213         SIZE=$((RANDOM % 1024))
 214         SIZE=$((SIZE + 128))
 215         TSIZE=$((TSIZE + SIZE))
 216         dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null
 217 }
 218
 219 check_transfer()
 220 {
 221         in=$1
 222         out=$2
 223         what=$3
 224
 225         cmp "$in" "$out" > /dev/null 2>&1
 226         if [ $? -ne 0 ] ;then
 227                 echo "FAIL: file mismatch for $what" 1>&2
 228                 ls -l "$in"
 229                 ls -l "$out"
 230                 return 1
 231         fi
 232
 233         return 0
 234 }
 235
 236 test_tcp_forwarding_ip()
 237 {
 238         local nsa=$1
 239         local nsb=$2
 240         local dstip=$3
 241         local dstport=$4
 242         local lret=0
 243
 244         ip netns exec $nsb nc -w 5 -l -p 12345 < "$ns2in" > "$ns2out" &
 245         lpid=$!
 246
 247         sleep 1
 248         ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$ns1in" > "$ns1out" &
 249         cpid=$!
 250
 251         sleep 3
 252
 253         if ps -p $lpid > /dev/null;then
 254                 kill $lpid
 255         fi
 256
 257         if ps -p $cpid > /dev/null;then
 258                 kill $cpid
 259         fi
 260
 261         wait
 262
 263         check_transfer "$ns1in" "$ns2out" "ns1 -> ns2"
 264         if [ $? -ne 0 ];then
 265                 lret=1
 266         fi
 267
 268         check_transfer "$ns2in" "$ns1out" "ns1 <- ns2"
 269         if [ $? -ne 0 ];then
 270                 lret=1
 271         fi
 272
 273         return $lret
 274 }
 275
 276 test_tcp_forwarding()
 277 {
 278         test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
 279
 280         return $?
 281 }
 282
 283 test_tcp_forwarding_nat()
 284 {
 285         local lret
 286
 287         test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
 288         lret=$?
 289
 290         if [ $lret -eq 0 ] ; then
 291                 test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666
 292                 lret=$?
 293         fi
 294
 295         return $lret
 296 }
 297
 298 make_file "$ns1in" "ns1"
 299 make_file "$ns2in" "ns2"
 300
 301 # First test:
 302 # No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed.
 303 test_tcp_forwarding ns1 ns2
 304 if [ $? -eq 0 ] ;then
 305         echo "PASS: flow offloaded for ns1/ns2"
 306 else
 307         echo "FAIL: flow offload for ns1/ns2:" 1>&2
 308         ip netns exec nsr1 nft list ruleset
 309         ret=1
 310 fi
 311
 312 # delete default route, i.e. ns2 won't be able to reach ns1 and
 313 # will depend on ns1 being masqueraded in nsr1.
 314 # expect ns1 has nsr1 address.
 315 ip -net ns2 route del default via 10.0.2.1
 316 ip -net ns2 route del default via dead:2::1
 317 ip -net ns2 route add 192.168.10.1 via 10.0.2.1
 318
 319 # Second test:
 320 # Same, but with NAT enabled.
 321 ip netns exec nsr1 nft -f - <<EOF
 322 table ip nat {
 323    chain prerouting {
 324       type nat hook prerouting priority 0; policy accept;
 325       meta iif "veth0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345
 326    }
 327
 328    chain postrouting {
 329       type nat hook postrouting priority 0; policy accept;
 330       meta oifname "veth1" counter masquerade
 331    }
 332 }
 333 EOF
 334
 335 test_tcp_forwarding_nat ns1 ns2
 336
 337 if [ $? -eq 0 ] ;then
 338         echo "PASS: flow offloaded for ns1/ns2 with NAT"
 339 else
 340         echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2
 341         ip netns exec nsr1 nft list ruleset
 342         ret=1
 343 fi
 344
 345 # Third test:
 346 # Same as second test, but with PMTU discovery enabled.
 347 handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2)
 348
 349 ip netns exec nsr1 nft delete rule inet filter forward $handle
 350 if [ $? -ne 0 ] ;then
 351         echo "FAIL: Could not delete large-packet accept rule"
 352         exit 1
 353 fi
 354
 355 ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
 356 ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
 357
 358 test_tcp_forwarding_nat ns1 ns2
 359 if [ $? -eq 0 ] ;then
 360         echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery"
 361 else
 362         echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
 363         ip netns exec nsr1 nft list ruleset
 364 fi
 365
 366 KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1)
 367 KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1)
 368 SPI1=$RANDOM
 369 SPI2=$RANDOM
 370
 371 if [ $SPI1 -eq $SPI2 ]; then
 372         SPI2=$((SPI2+1))
 373 fi
 374
 375 do_esp() {
 376     local ns=$1
 377     local me=$2
 378     local remote=$3
 379     local lnet=$4
 380     local rnet=$5
 381     local spi_out=$6
 382     local spi_in=$7
 383
 384     ip -net $ns xfrm state add src $remote dst $me proto esp spi $spi_in  enc aes $KEY_AES  auth sha1 $KEY_SHA mode tunnel sel src $rnet dst $lnet
 385     ip -net $ns xfrm state add src $me  dst $remote proto esp spi $spi_out enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $lnet dst $rnet
 386
 387     # to encrypt packets as they go out (includes forwarded packets that need encapsulation)
 388     ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 1 action allow
 389     # to fwd decrypted packets after esp processing:
 390     ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 1 action allow
 391
 392 }
 393
 394 do_esp nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2
 395
 396 do_esp nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1
 397
 398 ip netns exec nsr1 nft delete table ip nat
 399
 400 # restore default routes
 401 ip -net ns2 route del 192.168.10.1 via 10.0.2.1
 402 ip -net ns2 route add default via 10.0.2.1
 403 ip -net ns2 route add default via dead:2::1
 404
 405 test_tcp_forwarding ns1 ns2
 406 if [ $? -eq 0 ] ;then
 407         echo "PASS: ipsec tunnel mode for ns1/ns2"
 408 else
 409         echo "FAIL: ipsec tunnel mode for ns1/ns2"
 410         ip netns exec nsr1 nft list ruleset 1>&2
 411         ip netns exec nsr1 cat /proc/net/xfrm_stat 1>&2
 412 fi
 413
 414 exit $ret