head 1.9; access; symbols; locks; strict; comment @# @; 1.9 date 2001.08.02.08.45.19; author ahuxley; state Exp; branches; next 1.8; 1.8 date 2001.06.05.09.23.48; author ahuxley; state Exp; branches; next 1.7; 1.7 date 2001.05.17.06.20.36; author ahuxley; state Exp; branches; next 1.6; 1.6 date 2001.05.15.06.40.59; author ahuxley; state Exp; branches; next 1.5; 1.5 date 2001.05.11.07.31.12; author ahuxley; state Exp; branches; next 1.4; 1.4 date 2001.05.09.11.02.56; author ahuxley; state Exp; branches; next 1.3; 1.3 date 2001.05.07.07.25.06; author ahuxley; state Exp; branches; next 1.2; 1.2 date 2001.05.04.07.13.00; author ahuxley; state Exp; branches; next 1.1; 1.1 date 2001.05.03.10.48.21; author ahuxley; state Exp; branches; next ; desc @PORT OF NON-SHPP VERSION OF AUBDOM - WITH ADDED CACHE SUPPORT @ 1.9 log @added '-s' and '-g' to restrict server list and newsgroup list to be processed to only one of the ones it would have processed otherwise long group list construction broke xargs, so now use while loop @ text @#!MARKER_SHELL_CMD #shpp include ../bldcfg/paths.shpp # A good path is needed several times in this program (for aub and lynx) #OLDPATH="$PATH" #PATH=/bin:/usr/bin:/sbin:/usr/sbin PROGNAME=`basename $0` RCS_ID='$Header: /home/ahuxley/dev/active/small/bin/RCS/aubdom.shpp,v 1.8 2001/06/05 09:23:48 ahuxley Exp $' VERSION_SCHEME=rcs ############################################################################## # # CONFIGURABLE STUFF STARTS HERE # ############################################################################## #TMP_DIR= AUB_CMD=/home/ahuxley/bin/OS/OSI/aub #AUBDIR= #BLACK_LIST= LYNX_CMD="`which lynx` -cfg /dev/null" #AUBDOM_LOG= AUBDOM_CONFIG_STEM=$HOME/var/tmp/aubdom/ #CACHE_FILE= #CACHE_EXPIRY= #MIN_LIST_SIZE= ############################################################################## # # CONFIGURABLE STUFF ENDS HERE # ############################################################################## TMP_DIR=${TMP_DIR:-/var/tmp} AUB_CMD=${AUB_CMD:-aub} AUBDIR=${AUBDIR:-$HOME/var/spool/aub} BLACK_LIST=${BLACK_LIST:-$HOME/etc/$PROGNAME.bl} LYNX_CMD=${LYNX_CMD:-lynx} AUBDOM_LOG=${AUBDOM_LOG:-$HOME/var/log/$PROGNAME.log} AUBDOM_CONFIG_STEM=${AUBDOM_CONFIG_STEM:-$HOME/.$PROGNAME-} CACHE_FILE=${CACHE_FILE:-$HOME/var/$PROGNAME-cache} CACHE_EXPIRY=${CACHE_EXPIRY:-1} MIN_LIST_SIZE=${MIN_LIST_SIZE:-200} NNTPSERVER_LIST_URL="http://www.newzbot.com/serverlist.php?since=ALL¬able=1&orderby=kps&sortorder=desc" NNTPSERVER_LIST_FILTER="sed -n 's/.* news:\/\/\([^\/]*\)\/.*/\1/p'" main() { CACHE_EXPIRED=false CANDIDATE_SERVER=all CANDIDATE_GROUP=all while :; do case "$1" in -V) echo "$PROGNAME $VERSION" exit 0 ;; -d) [ "X$2" = X ] && usage VERBOSE_LEVEL=$2 shift ;; -v) VERBOSE_LEVEL=3 ;; -f) CACHE_EXPIRED=true ;; -s) [ "X$2" = X ] && usage CANDIDATE_SERVER=$2 shift ;; -g) [ "X$2" = X ] && usage CANDIDATE_GROUP=$2 shift ;; -*) usage ;; *) break ;; esac shift done ########################################################################## # # SANITY CHECKS # ########################################################################## [ "X$1" != X ] && usage [ ! -f $HOME/.${PROGNAME}rc ] && error "please list your newsgroups in ~/.${PROGNAME}rc" export NNTPSERVER AUBDIR [ -f $AUBDOM_LOG -a -w $AUBDOM_LOG ] || { warning "AUBDOM_LOG is set, or maybe defaulted to, $AUBDOM_LOG, which is not writable, no logging will be done" AUBDOM_LOG=/dev/null } ########################################################################## # # LOCKS AND TEMP FILE MANAGEMENT # ########################################################################## # # Initialise locking and temp file maintenance # trap genericsighandler 1 2 15 umask 077 lock DUMMY gen_lock_file_name || error "lock file found (pid=$?)" ########################################################################## # # GUTS # ########################################################################## # # Get the various sources of information # info "getting server list ..." delonexit $TMP_DIR/$PROGNAME.$$.serversraw # Can't pipe this directly in cos it may fail, if it does its 'exit' won't # exit 'cos in pipe, and then error becomes testable. get_server_list > $TMP_DIR/$PROGNAME.$$.serversraw delonexit $TMP_DIR/$PROGNAME.$$.servers cat $TMP_DIR/$PROGNAME.$$.serversraw | sort | sed "s/\$/ server/" > $TMP_DIR/$PROGNAME.$$.servers rm $TMP_DIR/$PROGNAME.$$.serversraw dontdelonexit $TMP_DIR/$PROGNAME.$$.serversraw info "getting blacklist ..." delonexit $TMP_DIR/$PROGNAME.$$.blacklisted cat $BLACK_LIST | sort | sed "s/\$/ blacklisted/" > $TMP_DIR/$PROGNAME.$$.blacklisted info "getting directory list ..." delonexit $TMP_DIR/$PROGNAME.$$.directories if [ -d $AUBDOM_CONFIG_STEM ]; then ( cd $AUBDOM_CONFIG_STEM && ls -a ) else ( cd `dirname $AUBDOM_CONFIG_STEM` && ls -a | sed "s/`basename $AUBDOM_CONFIG_STEM`//" ) fi | egrep -v '^\.$|^\.\.$' | sort | sed "s/\$/ directory/" > $TMP_DIR/$PROGNAME.$$.directories # # Merge the various sources in such a way that we can shortly process each server # only once, but we know what we have to do with it (because we know which # lists it was on). # info "merging lists ..." delonexit $TMP_DIR/$PROGNAME.$$.servers+directories join -1 1 -2 1 -a 1 -a 2 $TMP_DIR/$PROGNAME.$$.servers $TMP_DIR/$PROGNAME.$$.directories > $TMP_DIR/$PROGNAME.$$.servers+directories delonexit $TMP_DIR/$PROGNAME.$$.merged join -1 1 -2 1 -a 1 -a 2 $TMP_DIR/$PROGNAME.$$.servers+directories $TMP_DIR/$PROGNAME.$$.blacklisted > $TMP_DIR/$PROGNAME.$$.merged rm -f $TMP_DIR/$PROGNAME.$$.servers $TMP_DIR/$PROGNAME.$$.blacklisted $TMP_DIR/$PROGNAME.$$.directories $TMP_DIR/$PROGNAME.$$.servers+directories dontdelonexit $TMP_DIR/$PROGNAME.$$.servers $TMP_DIR/$PROGNAME.$$.blacklisted $TMP_DIR/$PROGNAME.$$.directories $TMP_DIR/$PROGNAME.$$.servers+directories # # Process the list # info "processing merged list ..." while read SERVER STATUS; do if [ $CANDIDATE_SERVER = $SERVER -o $CANDIDATE_SERVER = all ]; then case "$STATUS" in "server") run_aub_on_nntp_server $SERVER ;; "directory") process_directory $SERVER obsolete ;; "blacklisted") remove_from_blacklist $SERVER ;; "directory blacklisted") process_directory $SERVER blacklisted ;; "server blacklisted") ignore_blacklisted_server $SERVER ;; "server directory") run_aub_on_nntp_server $SERVER ;; "server directory blacklisted") process_directory $SERVER blacklisted ;; esac fi done < $TMP_DIR/$PROGNAME.$$.merged rm $TMP_DIR/$PROGNAME.$$.merged dontdelonexit $TMP_DIR/$PROGNAME.$$.merged # # Clean up and exit # unlock DUMMY gen_lock_file_name return 0 } get_server_list() { ########################################################################### # # DETERMINE IF CACHE EXPIRED # ########################################################################### [ $CACHE_EXPIRED = false -a \( ! -f $CACHE_FILE -o "X`find $CACHE_FILE -mtime +1 -print 2>/dev/null`" != X \) ] && CACHE_EXPIRED=true ########################################################################### # # IF CACHE EXPIRED SANITY CHECK CACHE PERMISSIONS AND UPDATE CACHE # ########################################################################### if [ $CACHE_EXPIRED = true ]; then CACHE_DIR=`dirname $CACHE_FILE` # if cache file exists check writable if [ -f $CACHE_FILE ]; then [ ! -w $CACHE_FILE ] && error "server list cache ($CACHE_FILE) exists but is not writable!" # else cache directory must allow its creation elif [ -d $CACHE_DIR ]; then [ ! -w $CACHE_DIR ] && error "server list cache ($CACHE_FILE) does not exist and cache directory ($CACHE_DIR) is not writable!" # else stuffed! else error "server list cache directory ($CACHE_DIR) does not exist!" fi # create a temporary copy of new server list info "updating server cache, please wait ... " delonexit $TMP_DIR/$PROGNAME.$$.newcache $LYNX_CMD -dump "$NNTPSERVER_LIST_URL" | { eval $NNTPSERVER_LIST_FILTER; } > $TMP_DIR/$PROGNAME.$$.newcache # if size is fine accept it if [ `wc -c < $TMP_DIR/$PROGNAME.$$.newcache` -ge $MIN_LIST_SIZE ]; then cat $TMP_DIR/$PROGNAME.$$.newcache > $CACHE_FILE rm $TMP_DIR/$PROGNAME.$$.newcache dontdelonexit $TMP_DIR/$PROGNAME.$$.newcache # else if have old version warn and don't accept new version - leave bad file in place for manual examination elif [ -f $CACHE_FILE ]; then dontdelonexit $TMP_DIR/$PROGNAME.$$.newcache warning "new server list cache file ($TMP_DIR/$PROGNAME.$$.newcache) is too small; not replacing old version" # else (don't have old version) we're stuffed - leave bad file in place for manual examination else dontdelonexit $TMP_DIR/$PROGNAME.$$.newcache error "new server list cache file ($TMP_DIR/$PROGNAME.$$.newcache) is too small; no old version is available" fi fi ########################################################################## # # NOW SEND (POSSIBLY JUST UPDATED) CACHED LIST TO STDOUT # ########################################################################## cat $CACHE_FILE } remove_from_blacklist() { typeset SERVER=$1 info "$SERVER: removing expired blacklist entry ..." delonexit $TMP_DIR/$PROGNAME.$$.newblacklist # need to do the grep here instead of cat fgrep -v $SERVER $BLACK_LIST > $TMP_DIR/$PROGNAME.$$.newblacklist cat $TMP_DIR/$PROGNAME.$$.newblacklist > $BLACK_LIST rm $TMP_DIR/$PROGNAME.$$.newblacklist dontdelonexit $TMP_DIR/$PROGNAME.$$.newblacklist } add_to_blacklist() { typeset SERVER=$1 info "$SERVER: adding server to blacklist ..." echo "$SERVER" >> $BLACK_LIST } ignore_blacklisted_server() { typeset SERVER=$1 debug 4 "ignore_blacklisted_server: $SERVER: blacklisted" } process_directory() { typeset SERVER=$1 typeset STATE=$2 case $STATE in blacklisted) info "$SERVER: removing files for blacklisted server ..." rm -fr $AUBDOM_CONFIG_STEM$SERVER ;; obsolete) info "$SERVER: removing files for obsolete server ..." rm -fr $AUBDOM_CONFIG_STEM$SERVER ;; nothingdownloaded) info "$SERVER: removing files for server which has never downloaded anything ..." rm -fr $AUBDOM_CONFIG_STEM$SERVER ;; esac } usage() { { echo echo "Usage: $PROGNAME [ -d | -v ] [ -f ] [ -V ] [ -s ] [ -g ]" echo echo "Options: -f force update of server list cache" echo " -d set debug level to " echo " -v verbose (same as '-d 3')" echo " -V report version info and exit" echo " -s only process specified news server" echo " -g only process specified newsgroup" echo } >&2 exit 1 } run_aub_on_nntp_server() { typeset NNTPSERVER=$1 typeset START_SECONDS FINISH_SECONDS RC debug 10 "preparing config file for server $NNTPSERVER ... " [ ! -d $AUBDOM_CONFIG_STEM$NNTPSERVER ] && mkdir $AUBDOM_CONFIG_STEM$NNTPSERVER { echo "DEBUG 1" echo "GROUP \\c" while read GROUP; do if [ $CANDIDATE_GROUP = $GROUP -o $CANDIDATE_GROUP = all ]; then echo "$GROUP \\c" fi done < $HOME/.${PROGNAME}rc echo } > $AUBDOM_CONFIG_STEM$NNTPSERVER/.aubconf info "$NNTPSERVER: connecting ..." START_SECONDS=`date_to_secs \`date\`` START_FILECOUNT=`ls $AUBDIR | wc -l` export NNTPSERVER delonexit $TMP_DIR/$PROGNAME.$$.aublog HOME=$AUBDOM_CONFIG_STEM$NNTPSERVER $AUB_CMD > $TMP_DIR/$PROGNAME.$$.aublog 2>/dev/null RC=$? FINISH_SECONDS=`date_to_secs \`date\`` FINISH_FILECOUNT=`ls $AUBDIR | wc -l` logline $RC `expr $FINISH_SECONDS - $START_SECONDS` `expr $FINISH_FILECOUNT - $START_FILECOUNT` ########################################################################## # # IF ALL GROUPS CONSIDERED INVALID THEN BLACKLIST SERVER # ########################################################################## SERVER_THINKS_ALL_GROUPS_ARE_INVALID=true for NEWSGROUP in `cat ~/.aubdomrc`; do fgrep -q "Invalid group: $NEWSGROUP." $TMP_DIR/$PROGNAME.$$.aublog || { SERVER_THINKS_ALL_GROUPS_ARE_INVALID=false break } done rm $TMP_DIR/$PROGNAME.$$.aublog dontdelonexit $TMP_DIR/$PROGNAME.$$.aublog if [ $SERVER_THINKS_ALL_GROUPS_ARE_INVALID = true ]; then info "$SERVER: server thinks all groups are invalid" add_to_blacklist $SERVER process_directory $SERVER blacklisted elif [ ! -f $AUBDOM_CONFIG_STEM$NNTPSERVER/.aubrc ]; then process_directory $NNTPSERVER nothingdownloaded fi } date_to_secs() { typeset HOUR MINUTE SECOND DATE typeset OLD_IFS OLD_IFS="$IFS" DATE="$@@" IFS="$IFS:" set DUMMY $DATE shift HOUR=$4 MINUTE=$5 SECOND=$6 expr 3600 \* $HOUR + 60 \* $MINUTE + $SECOND } logline() { echo "date=`date '+%y/%m/%d_%H:%M:%S'`, pid=$$, server=$NNTPSERVER, connecttime=$2, exitcode=$1, downloadcount=$3" >> $AUBDOM_LOG } gen_lock_file_name() { echo $TMP_DIR/.$PROGNAME.lock } #shpp include ../lib/utils.sh.shpp #shpp include ../lib/gep.sh.shpp @ 1.8 log @changes for modified web page (URL changed, output format changed, URL needs protecting from expansion) @ text @d7 1 a7 1 RCS_ID='$Header: /home/ahuxley/dev/active/small/bin/RCS/aubdom.shpp,v 1.7 2001/05/17 06:20:36 ahuxley Exp ahuxley $' d51 2 d63 6 d157 11 a167 9 case "$STATUS" in "server") run_aub_on_nntp_server $SERVER ;; "directory") process_directory $SERVER obsolete ;; "blacklisted") remove_from_blacklist $SERVER ;; "directory blacklisted") process_directory $SERVER blacklisted ;; "server blacklisted") ignore_blacklisted_server $SERVER ;; "server directory") run_aub_on_nntp_server $SERVER ;; "server directory blacklisted") process_directory $SERVER blacklisted ;; esac d293 1 a293 1 echo "Usage: $PROGNAME [ -d | -v ] [ -f ] [ -V ]" d295 6 a300 4 echo "Options: -f force update of server list cache" echo " -d set debug level to " echo " -v verbose (same as '-d 3')" echo " -V report version info and exit" d316 6 a321 1 xargs echo < $HOME/.${PROGNAME}rc @ 1.7 log @changed info message to debug 4 to reduce info about blacklisted servers @ text @d7 1 a7 1 RCS_ID='$Header: /home/ahuxley/dev/active/small/bin/RCS/aubdom.shpp,v 1.6 2001/05/15 06:40:59 ahuxley Exp ahuxley $' d45 2 a46 2 NNTPSERVER_LIST_URL="http://www.newzbot.com/sorted-speed.txt" NNTPSERVER_LIST_FILTER="sed -n 's/^Server: \(.*\) (.*\$/\1/p'" d204 1 a204 1 $LYNX_CMD -dump $NNTPSERVER_LIST_URL | { eval $NNTPSERVER_LIST_FILTER; } > $TMP_DIR/$PROGNAME.$$.newcache @ 1.6 log @reduced cache expiry time to one day renamed aub session logs (which are grepped for non-total-failure) @ text @d7 1 a7 1 RCS_ID='$Header: /home/ahuxley/dev/active/small/bin/RCS/aubdom.shpp,v 1.5 2001/05/11 07:31:12 ahuxley Exp ahuxley $' d259 3 a261 1 info "$1: blacklisted" @ 1.5 log @added detection of servers which think that all groups are invalid, and add these servers to the blacklist aub itself now has all its output redirected, since this is needed for the above and the 'script' hack ('SHELL=aub script') does not work in a loop context (stdin termio problems). @ text @d3 2 d7 1 a7 1 RCS_ID='$Header: /home/ahuxley/dev/active/small/bin/RCS/aubdom.shpp,v 1.4 2001/05/09 11:02:56 ahuxley Exp ahuxley $' d21 1 d24 1 a24 1 #CACHE_EXPIRY=2 d41 1 a41 1 CACHE_EXPIRY=${CACHE_EXPIRY:-2} d309 2 a310 2 delonexit $TMP_DIR/$PROGNAME.$$.typescript HOME=$AUBDOM_CONFIG_STEM$NNTPSERVER $AUB_CMD > $TMP_DIR/$PROGNAME.$$.typescript 2>/dev/null d325 1 a325 1 fgrep -q "Invalid group: $NEWSGROUP." $TMP_DIR/$PROGNAME.$$.typescript || { d330 2 a331 2 rm $TMP_DIR/$PROGNAME.$$.typescript dontdelonexit $TMP_DIR/$PROGNAME.$$.typescript @ 1.4 log @added missing unlock to fix 'stale lock file removed' bug added nicer usage message cosmetic comment changes @ text @d5 1 a5 1 RCS_ID='$Header: /home/ahuxley/dev/active/small/bin/RCS/aubdom.shpp,v 1.3 2001/05/07 07:25:06 ahuxley Exp ahuxley $' d232 1 a232 1 info "$1: removing expired blacklist entry ..." d234 2 d239 1 a239 1 cat $BLACK_LIST > $TMP_DIR/$PROGNAME.$$.newblacklist d246 8 d306 2 a307 5 if [ $VERBOSE_LEVEL -ge 3 ]; then HOME=$AUBDOM_CONFIG_STEM$NNTPSERVER $AUB_CMD else HOME=$AUBDOM_CONFIG_STEM$NNTPSERVER $AUB_CMD > /dev/null 2>&1 fi d314 23 a336 2 [ ! -f $AUBDOM_CONFIG_STEM$NNTPSERVER/.aubrc ] && process_directory $NNTPSERVER nothingdownloaded #sleep 10 @ 1.3 log @added locking added removal of config directory for server which has, as yet, downloaded nothing @ text @d5 1 a5 1 RCS_ID='$Header: /home/ahuxley/dev/active/small/bin/RCS/aubdom.shpp,v 1.2 2001/05/04 07:13:00 ahuxley Exp $' d82 1 a82 1 # GUTS d94 6 d103 1 d106 2 a107 2 # Can't pipe this directly in cos it may fail, if it does its 'exit' won't exit 'cos in pipe, and then error # becomes testable. d127 3 a129 1 # Join them all up! d131 1 d140 4 a157 1 } d159 6 a164 3 ignore_blacklisted_server() { info "$1: blacklisted" d223 1 a223 1 # NOW SEND LIST TO STDOUT d244 5 d266 10 a275 1 echo "Usage: $PROGNAME [ -d | -v ] [ -V ]" >&2 @ 1.2 log @rationalised a few info messages took out duplicated blacklist check added 'export' of NNTPSERVER from function, otherwise aub called the one defined in the login environment took out intra-loop sleep @ text @d5 1 a5 1 RCS_ID='$Header: /home/ahuxley/dev/active/small/bin/RCS/aubdom.shpp,v 1.1 2001/05/03 10:48:21 ahuxley Exp ahuxley $' d91 3 d122 1 a122 1 debug 5 "main: join part 1 ..." a125 2 debug 5 "main: join part 2 ..." d130 1 d134 1 a134 1 "directory") remove_files $SERVER expired ;; d136 1 a136 1 "directory blacklisted") remove_files $SERVER blacklisted ;; d139 1 a139 1 "server directory blacklisted") remove_files $SERVER blacklisted ;; d228 1 a228 1 remove_files() d230 2 a231 1 info "$1: removing files ($2) ..." d233 8 a240 1 rm -fr $AUBDOM_CONFIG_STEM$1 d276 2 d302 5 @ 1.1 log @Initial revision @ text @d5 1 a5 1 RCS_ID='$Header: /home/ahuxley/dev/active/small/bin/RCS/abm.shpp,v 1.4 2001/03/15 08:08:24 ahuxley Exp $' d133 1 a133 1 "blacklisted") remove_from_blacklist $SEREVER ;; d135 1 a135 1 "server blacklisted") : ;; d144 4 a148 1 d214 1 a214 1 info "removing expired entry from from black list for $1 ..." d228 1 a228 1 info "removing files for $2 server $1 ..." d244 1 a244 2 [ -r $BLACK_LIST ] && fgrep -q $NNTPSERVER $BLACK_LIST && { echo "$NNTPSERVER is in black list" >&2; continue; } info "preparing config file for server $NNTPSERVER ... " d252 1 a252 1 info "running aub from $NNTPSERVER ..." d255 6 a260 1 HOME=$AUBDOM_CONFIG_STEM$NNTPSERVER $AUB_CMD > /dev/null 2>&1 d266 1 a266 1 sleep 10 @