tools/testing/selftests/netfilter/nft_flowtable.sh

   1 #!/bin/bash
   2 # SPDX-License-Identifier: GPL-2.0
   3 #
   4 # This tests basic flowtable functionality.
   5 # Creates following default topology:
   6 #
   7 # Originator (MTU 9000) <-Router1-> MTU 1500 <-Router2-> Responder (MTU 2000)
   8 # Router1 is the one doing flow offloading, Router2 has no special
   9 # purpose other than having a link that is smaller than either Originator
  10 # and responder, i.e. TCPMSS announced values are too large and will still
  11 # result in fragmentation and/or PMTU discovery.
  12 #
  13 # You can check with different Orgininator/Link/Responder MTU eg:
  14 # nft_flowtable.sh -o8000 -l1500 -r2000
  15 #
  16
  17
  18 # Kselftest framework requirement - SKIP code is 4.
  19 ksft_skip=4
  20 ret=0
  21
  22 ns1in=""
  23 ns2in=""
  24 ns1out=""
  25 ns2out=""
  26
  27 log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
  28
  29 checktool (){
  30         if ! $1 > /dev/null 2>&1; then
  31                 echo "SKIP: Could not $2"
  32                 exit $ksft_skip
  33         fi
  34 }
  35
  36 checktool "nft --version" "run test without nft tool"
  37 checktool "ip -Version" "run test without ip tool"
  38 checktool "which nc" "run test without nc (netcat)"
  39 checktool "ip netns add nsr1" "create net namespace"
  40
  41 ip netns add ns1
  42 ip netns add ns2
  43
  44 ip netns add nsr2
  45
  46 cleanup() {
  47         for i in 1 2; do
  48                 ip netns del ns$i
  49                 ip netns del nsr$i
  50         done
  51
  52         rm -f "$ns1in" "$ns1out"
  53         rm -f "$ns2in" "$ns2out"
  54
  55         [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns
  56 }
  57
  58 trap cleanup EXIT
  59
  60 sysctl -q net.netfilter.nf_log_all_netns=1
  61
  62 ip link add veth0 netns nsr1 type veth peer name eth0 netns ns1
  63 ip link add veth1 netns nsr1 type veth peer name veth0 netns nsr2
  64
  65 ip link add veth1 netns nsr2 type veth peer name eth0 netns ns2
  66
  67 for dev in lo veth0 veth1; do
  68   for i in 1 2; do
  69     ip -net nsr$i link set $dev up
  70   done
  71 done
  72
  73 ip -net nsr1 addr add 10.0.1.1/24 dev veth0
  74 ip -net nsr1 addr add dead:1::1/64 dev veth0
  75
  76 ip -net nsr2 addr add 10.0.2.1/24 dev veth1
  77 ip -net nsr2 addr add dead:2::1/64 dev veth1
  78
  79 # set different MTUs so we need to push packets coming from ns1 (large MTU)
  80 # to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1),
  81 # or to do PTMU discovery (send ICMP error back to originator).
  82 # ns2 is going via nsr2 with a smaller mtu, so that TCPMSS announced by both peers
  83 # is NOT the lowest link mtu.
  84
  85 omtu=9000
  86 lmtu=1500
  87 rmtu=2000
  88
  89 usage(){
  90         echo "nft_flowtable.sh [OPTIONS]"
  91         echo
  92         echo "MTU options"
  93         echo "   -o originator"
  94         echo "   -l link"
  95         echo "   -r responder"
  96         exit 1
  97 }
  98
  99 while getopts "o:l:r:" o
 100 do
 101         case $o in
 102                 o) omtu=$OPTARG;;
 103                 l) lmtu=$OPTARG;;
 104                 r) rmtu=$OPTARG;;
 105                 *) usage;;
 106         esac
 107 done
 108
 109 if ! ip -net nsr1 link set veth0 mtu $omtu; then
 110         exit 1
 111 fi
 112
 113 ip -net ns1 link set eth0 mtu $omtu
 114
 115 if ! ip -net nsr2 link set veth1 mtu $rmtu; then
 116         exit 1
 117 fi
 118
 119 ip -net ns2 link set eth0 mtu $rmtu
 120
 121 # transfer-net between nsr1 and nsr2.
 122 # these addresses are not used for connections.
 123 ip -net nsr1 addr add 192.168.10.1/24 dev veth1
 124 ip -net nsr1 addr add fee1:2::1/64 dev veth1
 125
 126 ip -net nsr2 addr add 192.168.10.2/24 dev veth0
 127 ip -net nsr2 addr add fee1:2::2/64 dev veth0
 128
 129 for i in 1 2; do
 130   ip netns exec nsr$i sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
 131   ip netns exec nsr$i sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
 132
 133   ip -net ns$i link set lo up
 134   ip -net ns$i link set eth0 up
 135   ip -net ns$i addr add 10.0.$i.99/24 dev eth0
 136   ip -net ns$i route add default via 10.0.$i.1
 137   ip -net ns$i addr add dead:$i::99/64 dev eth0
 138   ip -net ns$i route add default via dead:$i::1
 139   if ! ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then
 140         echo "ERROR: Check Originator/Responder values (problem during address addition)"
 141         exit 1
 142   fi
 143
 144   # don't set ip DF bit for first two tests
 145   ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
 146 done
 147
 148 ip -net nsr1 route add default via 192.168.10.2
 149 ip -net nsr2 route add default via 192.168.10.1
 150
 151 ip netns exec nsr1 nft -f - <<EOF
 152 table inet filter {
 153   flowtable f1 {
 154      hook ingress priority 0
 155      devices = { veth0, veth1 }
 156    }
 157
 158    chain forward {
 159       type filter hook forward priority 0; policy drop;
 160
 161       # flow offloaded? Tag ct with mark 1, so we can detect when it fails.
 162       meta oif "veth1" tcp dport 12345 flow offload @f1 counter
 163
 164       # use packet size to trigger 'should be offloaded by now'.
 165       # otherwise, if 'flow offload' expression never offloads, the
 166       # test will pass.
 167       tcp dport 12345 meta length gt 200 ct mark set 1 counter
 168
 169       # this turns off flow offloading internally, so expect packets again
 170       tcp flags fin,rst ct mark set 0 accept
 171
 172       # this allows large packets from responder, we need this as long
 173       # as PMTUd is off.
 174       # This rule is deleted for the last test, when we expect PMTUd
 175       # to kick in and ensure all packets meet mtu requirements.
 176       meta length gt $lmtu accept comment something-to-grep-for
 177
 178       # next line blocks connection w.o. working offload.
 179       # we only do this for reverse dir, because we expect packets to
 180       # enter slow path due to MTU mismatch of veth0 and veth1.
 181       tcp sport 12345 ct mark 1 counter log prefix "mark failure " drop
 182
 183       ct state established,related accept
 184
 185       # for packets that we can't offload yet, i.e. SYN (any ct that is not confirmed)
 186       meta length lt 200 oif "veth1" tcp dport 12345 counter accept
 187
 188       meta nfproto ipv4 meta l4proto icmp accept
 189       meta nfproto ipv6 meta l4proto icmpv6 accept
 190    }
 191 }
 192 EOF
 193
 194 if [ $? -ne 0 ]; then
 195         echo "SKIP: Could not load nft ruleset"
 196         exit $ksft_skip
 197 fi
 198
 199 # test basic connectivity
 200 if ! ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then
 201   echo "ERROR: ns1 cannot reach ns2" 1>&2
 202   exit 1
 203 fi
 204
 205 if ! ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then
 206   echo "ERROR: ns2 cannot reach ns1" 1>&2
 207   exit 1
 208 fi
 209
 210 if [ $ret -eq 0 ];then
 211         echo "PASS: netns routing/connectivity: ns1 can reach ns2"
 212 fi
 213
 214 ns1in=$(mktemp)
 215 ns1out=$(mktemp)
 216 ns2in=$(mktemp)
 217 ns2out=$(mktemp)
 218
 219 make_file()
 220 {
 221         name=$1
 222
 223         SIZE=$((RANDOM % (1024 * 8)))
 224         TSIZE=$((SIZE * 1024))
 225
 226         dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null
 227
 228         SIZE=$((RANDOM % 1024))
 229         SIZE=$((SIZE + 128))
 230         TSIZE=$((TSIZE + SIZE))
 231         dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null
 232 }
 233
 234 check_transfer()
 235 {
 236         in=$1
 237         out=$2
 238         what=$3
 239
 240         if ! cmp "$in" "$out" > /dev/null 2>&1; then
 241                 echo "FAIL: file mismatch for $what" 1>&2
 242                 ls -l "$in"
 243                 ls -l "$out"
 244                 return 1
 245         fi
 246
 247         return 0
 248 }
 249
 250 test_tcp_forwarding_ip()
 251 {
 252         local nsa=$1
 253         local nsb=$2
 254         local dstip=$3
 255         local dstport=$4
 256         local lret=0
 257
 258         ip netns exec $nsb nc -w 5 -l -p 12345 < "$ns2in" > "$ns2out" &
 259         lpid=$!
 260
 261         sleep 1
 262         ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$ns1in" > "$ns1out" &
 263         cpid=$!
 264
 265         sleep 3
 266
 267         if ps -p $lpid > /dev/null;then
 268                 kill $lpid
 269         fi
 270
 271         if ps -p $cpid > /dev/null;then
 272                 kill $cpid
 273         fi
 274
 275         wait
 276
 277         if ! check_transfer "$ns1in" "$ns2out" "ns1 -> ns2"; then
 278                 lret=1
 279         fi
 280
 281         if ! check_transfer "$ns2in" "$ns1out" "ns1 <- ns2"; then
 282                 lret=1
 283         fi
 284
 285         return $lret
 286 }
 287
 288 test_tcp_forwarding()
 289 {
 290         test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
 291
 292         return $?
 293 }
 294
 295 test_tcp_forwarding_nat()
 296 {
 297         local lret
 298
 299         test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
 300         lret=$?
 301
 302         if [ $lret -eq 0 ] ; then
 303                 test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666
 304                 lret=$?
 305         fi
 306
 307         return $lret
 308 }
 309
 310 make_file "$ns1in"
 311 make_file "$ns2in"
 312
 313 # First test:
 314 # No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed.
 315 if test_tcp_forwarding ns1 ns2; then
 316         echo "PASS: flow offloaded for ns1/ns2"
 317 else
 318         echo "FAIL: flow offload for ns1/ns2:" 1>&2
 319         ip netns exec nsr1 nft list ruleset
 320         ret=1
 321 fi
 322
 323 # delete default route, i.e. ns2 won't be able to reach ns1 and
 324 # will depend on ns1 being masqueraded in nsr1.
 325 # expect ns1 has nsr1 address.
 326 ip -net ns2 route del default via 10.0.2.1
 327 ip -net ns2 route del default via dead:2::1
 328 ip -net ns2 route add 192.168.10.1 via 10.0.2.1
 329
 330 # Second test:
 331 # Same, but with NAT enabled.
 332 ip netns exec nsr1 nft -f - <<EOF
 333 table ip nat {
 334    chain prerouting {
 335       type nat hook prerouting priority 0; policy accept;
 336       meta iif "veth0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345
 337    }
 338
 339    chain postrouting {
 340       type nat hook postrouting priority 0; policy accept;
 341       meta oifname "veth1" counter masquerade
 342    }
 343 }
 344 EOF
 345
 346 if test_tcp_forwarding_nat ns1 ns2; then
 347         echo "PASS: flow offloaded for ns1/ns2 with NAT"
 348 else
 349         echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2
 350         ip netns exec nsr1 nft list ruleset
 351         ret=1
 352 fi
 353
 354 # Third test:
 355 # Same as second test, but with PMTU discovery enabled.
 356 handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2)
 357
 358 if ! ip netns exec nsr1 nft delete rule inet filter forward $handle; then
 359         echo "FAIL: Could not delete large-packet accept rule"
 360         exit 1
 361 fi
 362
 363 ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
 364 ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
 365
 366 if test_tcp_forwarding_nat ns1 ns2; then
 367         echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery"
 368 else
 369         echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
 370         ip netns exec nsr1 nft list ruleset
 371 fi
 372
 373 # Another test:
 374 # Add bridge interface br0 to Router1, with NAT enabled.
 375 ip -net nsr1 link add name br0 type bridge
 376 ip -net nsr1 addr flush dev veth0
 377 ip -net nsr1 link set up dev veth0
 378 ip -net nsr1 link set veth0 master br0
 379 ip -net nsr1 addr add 10.0.1.1/24 dev br0
 380 ip -net nsr1 addr add dead:1::1/64 dev br0
 381 ip -net nsr1 link set up dev br0
 382
 383 ip netns exec nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null
 384
 385 # br0 with NAT enabled.
 386 ip netns exec nsr1 nft -f - <<EOF
 387 flush table ip nat
 388 table ip nat {
 389    chain prerouting {
 390       type nat hook prerouting priority 0; policy accept;
 391       meta iif "br0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345
 392    }
 393
 394    chain postrouting {
 395       type nat hook postrouting priority 0; policy accept;
 396       meta oifname "veth1" counter masquerade
 397    }
 398 }
 399 EOF
 400
 401 if test_tcp_forwarding_nat ns1 ns2; then
 402         echo "PASS: flow offloaded for ns1/ns2 with bridge NAT"
 403 else
 404         echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2
 405         ip netns exec nsr1 nft list ruleset
 406         ret=1
 407 fi
 408
 409 # Another test:
 410 # Add bridge interface br0 to Router1, with NAT and VLAN.
 411 ip -net nsr1 link set veth0 nomaster
 412 ip -net nsr1 link set down dev veth0
 413 ip -net nsr1 link add link veth0 name veth0.10 type vlan id 10
 414 ip -net nsr1 link set up dev veth0
 415 ip -net nsr1 link set up dev veth0.10
 416 ip -net nsr1 link set veth0.10 master br0
 417
 418 ip -net ns1 addr flush dev eth0
 419 ip -net ns1 link add link eth0 name eth0.10 type vlan id 10
 420 ip -net ns1 link set eth0 up
 421 ip -net ns1 link set eth0.10 up
 422 ip -net ns1 addr add 10.0.1.99/24 dev eth0.10
 423 ip -net ns1 route add default via 10.0.1.1
 424 ip -net ns1 addr add dead:1::99/64 dev eth0.10
 425
 426 if test_tcp_forwarding_nat ns1 ns2; then
 427         echo "PASS: flow offloaded for ns1/ns2 with bridge NAT and VLAN"
 428 else
 429         echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2
 430         ip netns exec nsr1 nft list ruleset
 431         ret=1
 432 fi
 433
 434 # restore test topology (remove bridge and VLAN)
 435 ip -net nsr1 link set veth0 nomaster
 436 ip -net nsr1 link set veth0 down
 437 ip -net nsr1 link set veth0.10 down
 438 ip -net nsr1 link delete veth0.10 type vlan
 439 ip -net nsr1 link delete br0 type bridge
 440 ip -net ns1 addr flush dev eth0.10
 441 ip -net ns1 link set eth0.10 down
 442 ip -net ns1 link set eth0 down
 443 ip -net ns1 link delete eth0.10 type vlan
 444
 445 # restore address in ns1 and nsr1
 446 ip -net ns1 link set eth0 up
 447 ip -net ns1 addr add 10.0.1.99/24 dev eth0
 448 ip -net ns1 route add default via 10.0.1.1
 449 ip -net ns1 addr add dead:1::99/64 dev eth0
 450 ip -net ns1 route add default via dead:1::1
 451 ip -net nsr1 addr add 10.0.1.1/24 dev veth0
 452 ip -net nsr1 addr add dead:1::1/64 dev veth0
 453 ip -net nsr1 link set up dev veth0
 454
 455 KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1)
 456 KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1)
 457 SPI1=$RANDOM
 458 SPI2=$RANDOM
 459
 460 if [ $SPI1 -eq $SPI2 ]; then
 461         SPI2=$((SPI2+1))
 462 fi
 463
 464 do_esp() {
 465     local ns=$1
 466     local me=$2
 467     local remote=$3
 468     local lnet=$4
 469     local rnet=$5
 470     local spi_out=$6
 471     local spi_in=$7
 472
 473     ip -net $ns xfrm state add src $remote dst $me proto esp spi $spi_in  enc aes $KEY_AES  auth sha1 $KEY_SHA mode tunnel sel src $rnet dst $lnet
 474     ip -net $ns xfrm state add src $me  dst $remote proto esp spi $spi_out enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $lnet dst $rnet
 475
 476     # to encrypt packets as they go out (includes forwarded packets that need encapsulation)
 477     ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 1 action allow
 478     # to fwd decrypted packets after esp processing:
 479     ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 1 action allow
 480
 481 }
 482
 483 do_esp nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2
 484
 485 do_esp nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1
 486
 487 ip netns exec nsr1 nft delete table ip nat
 488
 489 # restore default routes
 490 ip -net ns2 route del 192.168.10.1 via 10.0.2.1
 491 ip -net ns2 route add default via 10.0.2.1
 492 ip -net ns2 route add default via dead:2::1
 493
 494 if test_tcp_forwarding ns1 ns2; then
 495         echo "PASS: ipsec tunnel mode for ns1/ns2"
 496 else
 497         echo "FAIL: ipsec tunnel mode for ns1/ns2"
 498         ip netns exec nsr1 nft list ruleset 1>&2
 499         ip netns exec nsr1 cat /proc/net/xfrm_stat 1>&2
 500 fi
 501
 502 exit $ret