tools/testing/selftests/rcutorture/bin/kvm-remote.sh

   1 #!/bin/bash
   2 # SPDX-License-Identifier: GPL-2.0+
   3 #
   4 # Run a series of tests on remote systems under KVM.
   5 #
   6 # Usage: kvm-remote.sh "systems" [ <kvm.sh args> ]
   7 #        kvm-remote.sh "systems" /path/to/old/run [ <kvm-again.sh args> ]
   8 #
   9 # Copyright (C) 2021 Facebook, Inc.
  10 #
  11 # Authors: Paul E. McKenney <paulmck@kernel.org>
  12
  13 scriptname=$0
  14 args="$*"
  15
  16 if ! test -d tools/testing/selftests/rcutorture/bin
  17 then
  18         echo $scriptname must be run from top-level directory of kernel source tree.
  19         exit 1
  20 fi
  21
  22 KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
  23 PATH=${KVM}/bin:$PATH; export PATH
  24 . functions.sh
  25
  26 starttime="`get_starttime`"
  27
  28 systems="$1"
  29 if test -z "$systems"
  30 then
  31         echo $scriptname: Empty list of systems will go nowhere good, giving up.
  32         exit 1
  33 fi
  34 shift
  35
  36 # Pathnames:
  37 # T:      /tmp/kvm-remote.sh.$$
  38 # resdir: /tmp/kvm-remote.sh.$$/res
  39 # rundir: /tmp/kvm-remote.sh.$$/res/$ds ("-remote" suffix)
  40 # oldrun: `pwd`/tools/testing/.../res/$otherds
  41 #
  42 # Pathname segments:
  43 # TD:     kvm-remote.sh.$$
  44 # ds:     yyyy.mm.dd-hh.mm.ss-remote
  45
  46 TD=kvm-remote.sh.$$
  47 T=${TMPDIR-/tmp}/$TD
  48 trap 'rm -rf $T' 0
  49 mkdir $T
  50
  51 resdir="$T/res"
  52 ds=`date +%Y.%m.%d-%H.%M.%S`-remote
  53 rundir=$resdir/$ds
  54 echo Results directory: $rundir
  55 echo $scriptname $args
  56 if echo $1 | grep -q '^--'
  57 then
  58         # Fresh build.  Create a datestamp unless the caller supplied one.
  59         datestamp="`echo "$@" | awk -v ds="$ds" '{
  60                 for (i = 1; i < NF; i++) {
  61                         if ($i == "--datestamp") {
  62                                 ds = "";
  63                                 break;
  64                         }
  65                 }
  66                 if (ds != "")
  67                         print "--datestamp " ds;
  68         }'`"
  69         kvm.sh --remote "$@" $datestamp --buildonly > $T/kvm.sh.out 2>&1
  70         ret=$?
  71         if test "$ret" -ne 0
  72         then
  73                 echo $scriptname: kvm.sh failed exit code $?
  74                 cat $T/kvm.sh.out
  75                 exit 2
  76         fi
  77         oldrun="`grep -m 1 "^Results directory: " $T/kvm.sh.out | awk '{ print $3 }'`"
  78         touch "$oldrun/remote-log"
  79         echo $scriptname $args >> "$oldrun/remote-log"
  80         echo | tee -a "$oldrun/remote-log"
  81         echo " ----" kvm.sh output: "(`date`)" | tee -a "$oldrun/remote-log"
  82         cat $T/kvm.sh.out | tee -a "$oldrun/remote-log"
  83         # We are going to run this, so remove the buildonly files.
  84         rm -f "$oldrun"/*/buildonly
  85         kvm-again.sh $oldrun --dryrun --remote --rundir "$rundir" > $T/kvm-again.sh.out 2>&1
  86         ret=$?
  87         if test "$ret" -ne 0
  88         then
  89                 echo $scriptname: kvm-again.sh failed exit code $? | tee -a "$oldrun/remote-log"
  90                 cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log"
  91                 exit 2
  92         fi
  93 else
  94         # Re-use old run.
  95         oldrun="$1"
  96         if ! echo $oldrun | grep -q '^/'
  97         then
  98                 oldrun="`pwd`/$oldrun"
  99         fi
 100         shift
 101         touch "$oldrun/remote-log"
 102         echo $scriptname $args >> "$oldrun/remote-log"
 103         kvm-again.sh "$oldrun" "$@" --dryrun --remote --rundir "$rundir" > $T/kvm-again.sh.out 2>&1
 104         ret=$?
 105         if test "$ret" -ne 0
 106         then
 107                 echo $scriptname: kvm-again.sh failed exit code $? | tee -a "$oldrun/remote-log"
 108                 cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log"
 109                 exit 2
 110         fi
 111         cp -a "$rundir" "$KVM/res/"
 112         oldrun="$KVM/res/$ds"
 113 fi
 114 echo | tee -a "$oldrun/remote-log"
 115 echo " ----" kvm-again.sh output: "(`date`)" | tee -a "$oldrun/remote-log"
 116 cat $T/kvm-again.sh.out
 117 echo | tee -a "$oldrun/remote-log"
 118 echo Remote run directory: $rundir | tee -a "$oldrun/remote-log"
 119 echo Local build-side run directory: $oldrun | tee -a "$oldrun/remote-log"
 120
 121 # Create the kvm-remote-N.sh scripts in the bin directory.
 122 awk < "$rundir"/scenarios -v dest="$T/bin" -v rundir="$rundir" '
 123 {
 124         n = $1;
 125         sub(/\./, "", n);
 126         fn = dest "/kvm-remote-" n ".sh"
 127         print "kvm-remote-noreap.sh " rundir " &" > fn;
 128         scenarios = "";
 129         for (i = 2; i <= NF; i++)
 130                 scenarios = scenarios " " $i;
 131         print "kvm-test-1-run-batch.sh" scenarios >> fn;
 132         print "sync" >> fn;
 133         print "rm " rundir "/remote.run" >> fn;
 134 }'
 135 chmod +x $T/bin/kvm-remote-*.sh
 136 ( cd "`dirname $T`"; tar -chzf $T/binres.tgz "$TD/bin" "$TD/res" )
 137
 138 # Check first to avoid the need for cleanup for system-name typos
 139 for i in $systems
 140 do
 141         ncpus="`ssh $i getconf _NPROCESSORS_ONLN 2> /dev/null`"
 142         echo $i: $ncpus CPUs " " `date` | tee -a "$oldrun/remote-log"
 143         ret=$?
 144         if test "$ret" -ne 0
 145         then
 146                 echo System $i unreachable, giving up. | tee -a "$oldrun/remote-log"
 147                 exit 4 | tee -a "$oldrun/remote-log"
 148         fi
 149 done
 150
 151 # Download and expand the tarball on all systems.
 152 for i in $systems
 153 do
 154         echo Downloading tarball to $i `date` | tee -a "$oldrun/remote-log"
 155         cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -"
 156         ret=$?
 157         if test "$ret" -ne 0
 158         then
 159                 echo Unable to download $T/binres.tgz to system $i, giving up. | tee -a "$oldrun/remote-log"
 160                 exit 10 | tee -a "$oldrun/remote-log"
 161         fi
 162 done
 163
 164 # Function to check for presence of a file on the specified system.
 165 # Complain if the system cannot be reached, and retry after a wait.
 166 # Currently just waits forever if a machine disappears.
 167 #
 168 # Usage: checkremotefile system pathname
 169 checkremotefile () {
 170         local ret
 171         local sleeptime=60
 172
 173         while :
 174         do
 175                 ssh $1 "test -f \"$2\""
 176                 ret=$?
 177                 if test "$ret" -eq 255
 178                 then
 179                         echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date`
 180                 elif test "$ret" -eq 0
 181                 then
 182                         return 0
 183                 elif test "$ret" -eq 1
 184                 then
 185                         echo " ---" File \"$2\" not found: ssh $1 test -f \"$2\"
 186                         return 1
 187                 else
 188                         echo " ---" Exit code $ret: ssh $1 test -f \"$2\", retry after $sleeptime seconds. `date`
 189                         return $ret
 190                 fi
 191                 sleep $sleeptime
 192         done
 193 }
 194
 195 # Function to start batches on idle remote $systems
 196 #
 197 # Usage: startbatches curbatch nbatches
 198 #
 199 # Batches are numbered starting at 1.  Returns the next batch to start.
 200 # Be careful to redirect all debug output to FD 2 (stderr).
 201 startbatches () {
 202         local curbatch="$1"
 203         local nbatches="$2"
 204         local ret
 205
 206         # Each pass through the following loop examines one system.
 207         for i in $systems
 208         do
 209                 if test "$curbatch" -gt "$nbatches"
 210                 then
 211                         echo $((nbatches + 1))
 212                         return 0
 213                 fi
 214                 if checkremotefile "$i" "$resdir/$ds/remote.run" 1>&2
 215                 then
 216                         continue # System still running last test, skip.
 217                 fi
 218                 ssh "$i" "cd \"$resdir/$ds\"; touch remote.run; PATH=\"$T/bin:$PATH\" nohup kvm-remote-$curbatch.sh > kvm-remote-$curbatch.sh.out 2>&1 &" 1>&2
 219                 ret=$?
 220                 if test "$ret" -ne 0
 221                 then
 222                         echo ssh $i failed: exitcode $ret 1>&2
 223                         exit 11
 224                 fi
 225                 echo " ----" System $i Batch `head -n $curbatch < "$rundir"/scenarios | tail -1` `date` 1>&2
 226                 curbatch=$((curbatch + 1))
 227         done
 228         echo $curbatch
 229 }
 230
 231 # Launch all the scenarios.
 232 nbatches="`wc -l "$rundir"/scenarios | awk '{ print $1 }'`"
 233 curbatch=1
 234 while test "$curbatch" -le "$nbatches"
 235 do
 236         startbatches $curbatch $nbatches > $T/curbatch 2> $T/startbatches.stderr
 237         curbatch="`cat $T/curbatch`"
 238         if test -s "$T/startbatches.stderr"
 239         then
 240                 cat "$T/startbatches.stderr" | tee -a "$oldrun/remote-log"
 241         fi
 242         if test "$curbatch" -le "$nbatches"
 243         then
 244                 sleep 30
 245         fi
 246 done
 247 echo All batches started. `date`
 248
 249 # Wait for all remaining scenarios to complete and collect results.
 250 for i in $systems
 251 do
 252         while checkremotefile "$i" "$resdir/$ds/remote.run"
 253         do
 254                 sleep 30
 255         done
 256         echo " ---" Collecting results from $i `date`
 257         ( cd "$oldrun"; ssh $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - )
 258 done
 259
 260 ( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log"
 261 exit "`cat $T/exitcode`"