#!/bin/bash # $HeadURL$ $LastChangedRevision$ PROGNAME=${0##*/} SANDPIT=$(pwd) . $(ade-config ade_share_prefix)/include/adetestsupport.sh ############################################################################## # # Purpose of test: to check that ade_fork_multi() does the right things. # ############################################################################## # exec() will use $SHELL to run the command if the command contains shell meta chars. # Previously our command was 'sleep X && echo X', so that launched a shell. The # problem with that is that killing the shell doesn't kill the sleep (although # it does stop the shell going on to run the echo). Here's how to demonstrate # that: # # sugo# pgrep sleep # sugo# bash -c 'sleep 123 && echo hello' & # [2] 3635 # sugo# kill $! # sugo# # [2]- Terminated bash -c 'sleep 123 && echo hello' # sugo# !pgre # pgrep sleep # 3636 # sugo# # # So instead we provide an implementation that does not rely on a shell. SLEEP_AND_ECHO=$ADETEST_MODROOT/tests/bin/sleep-and-echo INNER_LOOPS=3 THREADS=1 TIMEOUT=0 echo "launching $INNER_LOOPS processes with maximum concurrency $THREADS and timeout ${TIMEOUT}s ..." adeperlf "my(@rcs)=();ADE::fork_multi(\$errstack_ref, $TIMEOUT, $THREADS, ['echo apple','echo banana','echo coconut'], \\@rcs);" echo OUTER_LOOPS=100 echo "doing the same again $OUTER_LOOPS times and checking the output order is always the same ..." for X in $(seq 1 $OUTER_LOOPS); do adeperlf "my(@rcs)=();ADE::fork_multi(\$errstack_ref, $TIMEOUT, $THREADS, ['echo apple','echo banana','echo coconut'], \\@rcs);" | paste -d' ' -s done | sort | uniq -c echo INNER_LOOPS=1000 THREADS=50 TIMEOUT=0 echo "launching $INNER_LOOPS processes with maximum concurrency $THREADS and timeout ${TIMEOUT}s ..." # Note the use of '[ .... ]' to cast the array that map returns into an array ref! adeperlf "my(@rcs)=();ADE::fork_multi(\$errstack_ref, $TIMEOUT, $THREADS, [ map { \"echo XXX\$_\" } 0..$((INNER_LOOPS-1)) ], \\@rcs);" | grep -c XXX echo INNER_LOOPS=50 THREADS=0 TIMEOUT=15 echo "launching $INNER_LOOPS processes with maximum concurrency $THREADS and timeout ${TIMEOUT}s ..." adeperlf "my(@rcs)=();ADE::fork_multi(\$errstack_ref, $TIMEOUT, $THREADS, [ map { sprintf('$SLEEP_AND_ECHO %d', 10*\$_) } 0..$((INNER_LOOPS-1)) ], \\@rcs);" 2>&1 echo # With outer loop 100 I get fork failures (not surprising as it launches 100*50 processes very very # quickly). # With a sleep of 0, 10, 20, ... and a timeout of 15 then sometimes the 10 takes more than 15 (if the # system is loaded). So instead we do sleeps of 0, 20, 40, ... and set the timeout to be 30. This # way a bigger "lateness" (<10s rather than <5s) is tolerated. OUTER_LOOPS=50 TIMEOUT=30 echo "same again but $OUTER_LOOPS times in parallel ..." { for I in $(seq 1 $OUTER_LOOPS); do adeperlf "my(@rcs)=();ADE::fork_multi(\$errstack_ref, $TIMEOUT, $THREADS, [ map { sprintf('$SLEEP_AND_ECHO %d', 20*\$_) } 0..$((INNER_LOOPS-1)) ], \\@rcs);" 2>&1 | paste -d' ' -s & done wait } | sort | uniq -c echo # This one is hideous! It is a test that fails if ade_fork_multi() uses Time::HiRes::alarm() but succeeds, # albeit with minor delays, if it uses the stock alarm() function. The problem is that Time::HiRes::alarm() # fails to send a SIGALRM or if it does then POSIX::sigsuspend() is not seeing it. # # We should see: # # first child (which is meant to run for 0 seconds) exits immediately # second child (which is meant to run for 10 seconds) exits 10 seconds later # third child (which is meant to run for 20 seconds) exits 5 seconds later because it gets killed # same for all later children # # meaning that the entire adeperlf command completes in around 15s. But with # Time::HiRes::alarm() what happens is: # # first child (which is meant to run for 0 seconds) exits immediately # second child (which is meant to run for 10 seconds) exits 10 seconds later # third child (which is meant to run for 20 seconds) exits 5 seconds later because it gets killed # ... # last child is killed (i.e. signal sent) and it exits but main loop does not see and, because # its running alarm() rather than alarm(), which it would do if there # was still a child process that might reach its timeout. # # It looks like SIGCHLD is (1) either not sent because Time::HiRes::alarm() somehow blocks it # or (2) SIGCHLD is sent but some interaction between Time::HiRes::alarm() and POSIX::sigsuspend() # means that the latter does not receive or see the signal and therefore does not return. The # result is that the entire adeperlf command completes in around 75s rather than 15s. # # This test runs adeperlf lots of times and looks for anomolous execution times. echo "running the same again but looking for anomolous execution times ..." QUICK=0 SLOW=0 OUTER_LOOPS=30 TIMEOUT=15 for X in $(seq 1 $OUTER_LOOPS); do # Note that we don't use sleep-and-echo 'cos we're concerned with timings, not output. T="$(/usr/bin/time -f %e adeperlf -d 1 "my(@rcs)=();ADE::fork_multi(\$errstack_ref, $TIMEOUT, $THREADS, [ map { sprintf('sleep %d', 10*\$_) } 0..$((INNER_LOOPS-1)) ], \\@rcs);" 2>&1)" if ((${T%.*}>20)); then ((SLOW++)) else ((QUICK++)) fi done echo "Quick: $QUICK" echo "Slow: $SLOW" echo echo "end of tests"