tools/testing/selftests/rcutorture/bin/kvm-remote.sh

   1 #!/bin/bash
   2 # SPDX-License-Identifier: GPL-2.0+
   3 #
   4 # Run a series of tests on remote systems under KVM.
   5 #
   6 # Usage: kvm-remote.sh "systems" [ <kvm.sh args> ]
   7 #        kvm-remote.sh "systems" /path/to/old/run [ <kvm-again.sh args> ]
   8 #
   9 # Copyright (C) 2021 Facebook, Inc.
  10 #
  11 # Authors: Paul E. McKenney <paulmck@kernel.org>
  12
  13 scriptname=$0
  14 args="$*"
  15
  16 if ! test -d tools/testing/selftests/rcutorture/bin
  17 then
  18         echo $scriptname must be run from top-level directory of kernel source tree.
  19         exit 1
  20 fi
  21
  22 RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
  23 PATH=${RCUTORTURE}/bin:$PATH; export PATH
  24 . functions.sh
  25
  26 starttime="`get_starttime`"
  27
  28 systems="$1"
  29 if test -z "$systems"
  30 then
  31         echo $scriptname: Empty list of systems will go nowhere good, giving up.
  32         exit 1
  33 fi
  34 shift
  35
  36 # Pathnames:
  37 # T:      /tmp/kvm-remote.sh.$$
  38 # resdir: /tmp/kvm-remote.sh.$$/res
  39 # rundir: /tmp/kvm-remote.sh.$$/res/$ds ("-remote" suffix)
  40 # oldrun: `pwd`/tools/testing/.../res/$otherds
  41 #
  42 # Pathname segments:
  43 # TD:     kvm-remote.sh.$$
  44 # ds:     yyyy.mm.dd-hh.mm.ss-remote
  45
  46 TD=kvm-remote.sh.$$
  47 T=${TMPDIR-/tmp}/$TD
  48 trap 'rm -rf $T' 0
  49 mkdir $T
  50
  51 resdir="$T/res"
  52 ds=`date +%Y.%m.%d-%H.%M.%S`-remote
  53 rundir=$resdir/$ds
  54 echo Results directory: $rundir
  55 echo $scriptname $args
  56 if echo $1 | grep -q '^--'
  57 then
  58         # Fresh build.  Create a datestamp unless the caller supplied one.
  59         datestamp="`echo "$@" | awk -v ds="$ds" '{
  60                 for (i = 1; i < NF; i++) {
  61                         if ($i == "--datestamp") {
  62                                 ds = "";
  63                                 break;
  64                         }
  65                 }
  66                 if (ds != "")
  67                         print "--datestamp " ds;
  68         }'`"
  69         kvm.sh --remote "$@" $datestamp --buildonly > $T/kvm.sh.out 2>&1
  70         ret=$?
  71         if test "$ret" -ne 0
  72         then
  73                 echo $scriptname: kvm.sh failed exit code $?
  74                 cat $T/kvm.sh.out
  75                 exit 2
  76         fi
  77         oldrun="`grep -m 1 "^Results directory: " $T/kvm.sh.out | awk '{ print $3 }'`"
  78         touch "$oldrun/remote-log"
  79         echo $scriptname $args >> "$oldrun/remote-log"
  80         echo | tee -a "$oldrun/remote-log"
  81         echo " ----" kvm.sh output: "(`date`)" | tee -a "$oldrun/remote-log"
  82         cat $T/kvm.sh.out | tee -a "$oldrun/remote-log"
  83         # We are going to run this, so remove the buildonly files.
  84         rm -f "$oldrun"/*/buildonly
  85         kvm-again.sh $oldrun --dryrun --remote --rundir "$rundir" > $T/kvm-again.sh.out 2>&1
  86         ret=$?
  87         if test "$ret" -ne 0
  88         then
  89                 echo $scriptname: kvm-again.sh failed exit code $? | tee -a "$oldrun/remote-log"
  90                 cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log"
  91                 exit 2
  92         fi
  93 else
  94         # Re-use old run.
  95         oldrun="$1"
  96         if ! echo $oldrun | grep -q '^/'
  97         then
  98                 oldrun="`pwd`/$oldrun"
  99         fi
 100         shift
 101         touch "$oldrun/remote-log"
 102         echo $scriptname $args >> "$oldrun/remote-log"
 103         kvm-again.sh "$oldrun" "$@" --dryrun --remote --rundir "$rundir" > $T/kvm-again.sh.out 2>&1
 104         ret=$?
 105         if test "$ret" -ne 0
 106         then
 107                 echo $scriptname: kvm-again.sh failed exit code $? | tee -a "$oldrun/remote-log"
 108                 cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log"
 109                 exit 2
 110         fi
 111         cp -a "$rundir" "$RCUTORTURE/res/"
 112         oldrun="$RCUTORTURE/res/$ds"
 113 fi
 114 echo | tee -a "$oldrun/remote-log"
 115 echo " ----" kvm-again.sh output: "(`date`)" | tee -a "$oldrun/remote-log"
 116 cat $T/kvm-again.sh.out
 117 echo | tee -a "$oldrun/remote-log"
 118 echo Remote run directory: $rundir | tee -a "$oldrun/remote-log"
 119 echo Local build-side run directory: $oldrun | tee -a "$oldrun/remote-log"
 120
 121 # Create the kvm-remote-N.sh scripts in the bin directory.
 122 awk < "$rundir"/scenarios -v dest="$T/bin" -v rundir="$rundir" '
 123 {
 124         n = $1;
 125         sub(/\./, "", n);
 126         fn = dest "/kvm-remote-" n ".sh"
 127         print "kvm-remote-noreap.sh " rundir " &" > fn;
 128         scenarios = "";
 129         for (i = 2; i <= NF; i++)
 130                 scenarios = scenarios " " $i;
 131         print "kvm-test-1-run-batch.sh" scenarios >> fn;
 132         print "sync" >> fn;
 133         print "rm " rundir "/remote.run" >> fn;
 134 }'
 135 chmod +x $T/bin/kvm-remote-*.sh
 136 ( cd "`dirname $T`"; tar -chzf $T/binres.tgz "$TD/bin" "$TD/res" )
 137
 138 # Check first to avoid the need for cleanup for system-name typos
 139 for i in $systems
 140 do
 141         ncpus="`ssh $i getconf _NPROCESSORS_ONLN 2> /dev/null`"
 142         ret=$?
 143         if test "$ret" -ne 0
 144         then
 145                 echo System $i unreachable, giving up. | tee -a "$oldrun/remote-log"
 146                 exit 4
 147         fi
 148         echo $i: $ncpus CPUs " " `date` | tee -a "$oldrun/remote-log"
 149 done
 150
 151 # Download and expand the tarball on all systems.
 152 echo Build-products tarball: `du -h $T/binres.tgz` | tee -a "$oldrun/remote-log"
 153 for i in $systems
 154 do
 155         echo Downloading tarball to $i `date` | tee -a "$oldrun/remote-log"
 156         cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -"
 157         ret=$?
 158         tries=0
 159         while test "$ret" -ne 0
 160         do
 161                 echo Unable to download $T/binres.tgz to system $i, waiting and then retrying.  $tries prior retries. | tee -a "$oldrun/remote-log"
 162                 sleep 60
 163                 cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -"
 164                 ret=$?
 165                 if test "$ret" -ne 0
 166                 then
 167                         if test "$tries" > 5
 168                         then
 169                                 echo Unable to download $T/binres.tgz to system $i, giving up. | tee -a "$oldrun/remote-log"
 170                                 exit 10
 171                         fi
 172                 fi
 173                 tries=$((tries+1))
 174         done
 175 done
 176
 177 # Function to check for presence of a file on the specified system.
 178 # Complain if the system cannot be reached, and retry after a wait.
 179 # Currently just waits forever if a machine disappears.
 180 #
 181 # Usage: checkremotefile system pathname
 182 checkremotefile () {
 183         local ret
 184         local sleeptime=60
 185
 186         while :
 187         do
 188                 ssh $1 "test -f \"$2\""
 189                 ret=$?
 190                 if test "$ret" -eq 255
 191                 then
 192                         echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date` | tee -a "$oldrun/remote-log"
 193                 elif test "$ret" -eq 0
 194                 then
 195                         return 0
 196                 elif test "$ret" -eq 1
 197                 then
 198                         echo " ---" File \"$2\" not found: ssh $1 test -f \"$2\" | tee -a "$oldrun/remote-log"
 199                         return 1
 200                 else
 201                         echo " ---" Exit code $ret: ssh $1 test -f \"$2\", retry after $sleeptime seconds. `date` | tee -a "$oldrun/remote-log"
 202                         return $ret
 203                 fi
 204                 sleep $sleeptime
 205         done
 206 }
 207
 208 # Function to start batches on idle remote $systems
 209 #
 210 # Usage: startbatches curbatch nbatches
 211 #
 212 # Batches are numbered starting at 1.  Returns the next batch to start.
 213 # Be careful to redirect all debug output to FD 2 (stderr).
 214 startbatches () {
 215         local curbatch="$1"
 216         local nbatches="$2"
 217         local ret
 218
 219         # Each pass through the following loop examines one system.
 220         for i in $systems
 221         do
 222                 if test "$curbatch" -gt "$nbatches"
 223                 then
 224                         echo $((nbatches + 1))
 225                         return 0
 226                 fi
 227                 if checkremotefile "$i" "$resdir/$ds/remote.run" 1>&2
 228                 then
 229                         continue # System still running last test, skip.
 230                 fi
 231                 ssh "$i" "cd \"$resdir/$ds\"; touch remote.run; PATH=\"$T/bin:$PATH\" nohup kvm-remote-$curbatch.sh > kvm-remote-$curbatch.sh.out 2>&1 &" 1>&2
 232                 ret=$?
 233                 if test "$ret" -ne 0
 234                 then
 235                         echo ssh $i failed: exitcode $ret 1>&2
 236                         exit 11
 237                 fi
 238                 echo " ----" System $i Batch `head -n $curbatch < "$rundir"/scenarios | tail -1` `date` 1>&2
 239                 curbatch=$((curbatch + 1))
 240         done
 241         echo $curbatch
 242 }
 243
 244 # Launch all the scenarios.
 245 nbatches="`wc -l "$rundir"/scenarios | awk '{ print $1 }'`"
 246 curbatch=1
 247 while test "$curbatch" -le "$nbatches"
 248 do
 249         startbatches $curbatch $nbatches > $T/curbatch 2> $T/startbatches.stderr
 250         curbatch="`cat $T/curbatch`"
 251         if test -s "$T/startbatches.stderr"
 252         then
 253                 cat "$T/startbatches.stderr" | tee -a "$oldrun/remote-log"
 254         fi
 255         if test "$curbatch" -le "$nbatches"
 256         then
 257                 sleep 30
 258         fi
 259 done
 260 echo All batches started. `date` | tee -a "$oldrun/remote-log"
 261
 262 # Wait for all remaining scenarios to complete and collect results.
 263 for i in $systems
 264 do
 265         while checkremotefile "$i" "$resdir/$ds/remote.run"
 266         do
 267                 sleep 30
 268         done
 269         echo " ---" Collecting results from $i `date` | tee -a "$oldrun/remote-log"
 270         ( cd "$oldrun"; ssh $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - )
 271 done
 272
 273 ( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log"
 274 exit "`cat $T/exitcode`"