2 # SPDX-License-Identifier: GPL-2.0
4 # Kselftest framework requirement - SKIP code is 4.
7 # Conntrack needs to reassemble fragments in order to have complete
8 # packets for rule matching. Reassembly can lead to packet loss.
10 # Consider the following setup:
11 # +--------+ +---------+ +--------+
12 # |Router A|-------|Wanrouter|-------|Router B|
13 # | |.IPIP..| |..IPIP.| |
14 # +--------+ +---------+ +--------+
17 #+--------+ +--------+
18 #|Client A| |Client B|
20 #+--------+ +--------+
22 # Router A and Router B use IPIP tunnel interfaces to tunnel traffic
23 # between Client A and Client B over WAN. Wanrouter has MTU 1400 set
26 rnd=$(mktemp -u XXXXXXXX)
36 if ! $1 > /dev/null 2>&1; then
37 echo "SKIP: Could not $2"
42 checktool "iptables --version" "run test without iptables"
43 checktool "ip -Version" "run test without ip tool"
44 checktool "which nc" "run test without nc (netcat)"
45 checktool "ip netns add ${r_a}" "create net namespace"
47 for n in ${r_b} ${r_w} ${c_a} ${c_b};do
52 for n in ${r_a} ${r_b} ${r_w} ${c_a} ${c_b};do
63 ip netns exec ${c_b} nc -n -w 3 -q 3 -u -l -p 5000 > ${rx} < /dev/null &
67 head -c1400 /dev/zero | tr "\000" "a" | ip netns exec ${c_a} nc -n -w 1 -u 192.168.20.2 5000
72 bytes=$(wc -c < ${rx})
74 if [ $bytes -eq 1400 ];then
75 echo "OK: PMTU $msg connection tracking"
77 echo "FAIL: PMTU $msg connection tracking: got $bytes, expected 1400"
82 # Detailed setup for Router A
83 # ---------------------------
86 # eth1: 192.168.10.1/24
87 # ipip0: No IP address, local 10.2.2.1 remote 10.4.4.1
89 # 192.168.20.0/24 dev ipip0 (192.168.20.0/24 is subnet of Client B)
90 # 10.4.4.1 via 10.2.2.254 (Router B via Wanrouter)
91 # No iptables rules at all.
93 ip link add veth0 netns ${r_a} type veth peer name veth0 netns ${r_w}
94 ip link add veth1 netns ${r_a} type veth peer name veth0 netns ${c_a}
98 ip netns exec ${r_a} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip
100 for dev in lo veth0 veth1 ipip0; do
101 ip -net ${r_a} link set $dev up
104 ip -net ${r_a} addr add 10.2.2.1/24 dev veth0
105 ip -net ${r_a} addr add 192.168.10.1/24 dev veth1
107 ip -net ${r_a} route add 192.168.20.0/24 dev ipip0
108 ip -net ${r_a} route add 10.4.4.0/24 via 10.2.2.254
110 ip netns exec ${r_a} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
112 # Detailed setup for Router B
113 # ---------------------------
116 # eth1: 192.168.20.1/24
117 # ipip0: No IP address, local 10.4.4.1 remote 10.2.2.1
119 # 192.168.10.0/24 dev ipip0 (192.168.10.0/24 is subnet of Client A)
120 # 10.2.2.1 via 10.4.4.254 (Router A via Wanrouter)
121 # No iptables rules at all.
123 ip link add veth0 netns ${r_b} type veth peer name veth1 netns ${r_w}
124 ip link add veth1 netns ${r_b} type veth peer name veth0 netns ${c_b}
129 ip netns exec ${r_b} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip
131 for dev in lo veth0 veth1 ipip0; do
132 ip -net ${r_b} link set $dev up
135 ip -net ${r_b} addr add 10.4.4.1/24 dev veth0
136 ip -net ${r_b} addr add 192.168.20.1/24 dev veth1
138 ip -net ${r_b} route add 192.168.10.0/24 dev ipip0
139 ip -net ${r_b} route add 10.2.2.0/24 via 10.4.4.254
140 ip netns exec ${r_b} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
143 ip -net ${c_a} addr add 192.168.10.2/24 dev veth0
144 ip -net ${c_a} link set dev lo up
145 ip -net ${c_a} link set dev veth0 up
146 ip -net ${c_a} route add default via 192.168.10.1
149 ip -net ${c_b} addr add 192.168.20.2/24 dev veth0
150 ip -net ${c_b} link set dev veth0 up
151 ip -net ${c_b} link set dev lo up
152 ip -net ${c_b} route add default via 192.168.20.1
155 ip -net ${r_w} addr add 10.2.2.254/24 dev veth0
156 ip -net ${r_w} addr add 10.4.4.254/24 dev veth1
158 ip -net ${r_w} link set dev lo up
159 ip -net ${r_w} link set dev veth0 up mtu 1400
160 ip -net ${r_w} link set dev veth1 up mtu 1400
162 ip -net ${r_a} link set dev veth0 mtu 1400
163 ip -net ${r_b} link set dev veth0 mtu 1400
165 ip netns exec ${r_w} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
169 # Running tracepath from Client A to Client B shows PMTU discovery is working
172 # clienta:~# tracepath 192.168.20.2
173 # 1?: [LOCALHOST] pmtu 1500
174 # 1: 192.168.10.1 0.867ms
175 # 1: 192.168.10.1 0.302ms
176 # 2: 192.168.10.1 0.312ms pmtu 1480
178 # 3: 192.168.10.1 0.510ms pmtu 1380
179 # 3: 192.168.20.2 2.320ms reached
180 # Resume: pmtu 1380 hops 3 back 3
182 # ip netns exec ${c_a} traceroute --mtu 192.168.20.2
184 # Router A has learned PMTU (1400) to Router B from Wanrouter.
185 # Client A has learned PMTU (1400 - IPIP overhead = 1380) to Client B
188 #Send large UDP packet
189 #---------------------
190 #Now we send a 1400 bytes UDP packet from Client A to Client B:
192 # clienta:~# head -c1400 /dev/zero | tr "\000" "a" | nc -u 192.168.20.2 5000
195 # The IPv4 stack on Client A already knows the PMTU to Client B, so the
196 # UDP packet is sent as two fragments (1380 + 20). Router A forwards the
197 # fragments between eth1 and ipip0. The fragments fit into the tunnel and
198 # reach their destination.
200 #When sending the large UDP packet again, Router A now reassembles the
201 #fragments before routing the packet over ipip0. The resulting IPIP
202 #packet is too big (1400) for the tunnel PMTU (1380) to Router B, it is
203 #dropped on Router A before sending.
205 ip netns exec ${r_a} iptables -A FORWARD -m conntrack --ctstate NEW