#!/bin/bash # Includes . $(miniade) || { echo "${0##*/}: ERROR: miniade failed (hint: run 'miniade' to see error)" >&2; exit 1; } # Configurable stuff LOCK_DIR=/tmp # Other globals ABS_PROGNAME=$(realpath $0) CLEANUP_COMMAND_LIST=() main() { local MY_ARGS local CHECK_NODES_ALIGNED_FLAG PROGNAME # Minimal option processing miniade_process_options --special-opts-handler=special_opts_handler MY_ARGS "$@" && set -- "${MY_ARGS[@]}" # Process arguments miniade_debug 10 "main: processing arguments ..." [ $# -ge 2 ] || miniade_bad_usage VERB=$1 RESOURCE=$2 shift 2 [ $VERB != init ] || [ $# -ge 2 ] || miniade_bad_usage NODE_LIST=( "$@" ) # Sanity checks and derivations miniade_debug 10 "main: sanity checks and derivations ..." miniade_get_progname PROGNAME DB_DIR=/tmp/$PROGNAME.d [[ $VERB =~ ^(status|init|uninit|read-dummy|test|push-align|primary|secondary)$ ]] || miniade_bad_usage THIS_NODE=$HOSTNAME DB_FILE=$DB_DIR/$RESOURCE.sqlite trap cleanup EXIT # Create local lock miniade_debug 10 "main: creating local lock ..." cleanup_push "unlock $LOCK_DIR/$PROGNAME.lock" if ! miniade_lock $LOCK_DIR/$PROGNAME.lock; then miniade_error "can't create lock (hint: locked already by another process?)" fi # Connect to database (it may not be aligned, but we check that shortly) miniade_debug 10 "main: connecting to database ..." sql_connect db_init # Get node list miniade_debug 10 "main: getting node list ..." if [ $VERB = read-dummy ]; then : else if [ $VERB = init ]; then : # node list already in NODE_LIST else get_nodelist NODE_LIST [ ${#NODE_LIST[*]} -gt 0 ] || miniade_error "node list empty (hint: run '$PROGNAME init $RESOURCE ...')" fi # Temporarily, '-r' to force farfalle to be cluster lock manager LOCK_MANAGER_NODE=$(echo "${NODE_LIST[@]}" | xargs -n 1 echo | sort -r | head -1) miniade_debug 10 "main: LOCK_MANAGER_NODE=$LOCK_MANAGER_NODE" fi # Check all nodes sshable if [ $VERB = read-dummy ]; then : else miniade_debug 10 "main: checking ssh access to node list ..." for NODE in "${NODE_LIST[@]}"; do [ $NODE != $THIS_NODE ] || continue miniade_check_ssh_ok $NODE || miniade_error "$NODE: no unfettered ssh access" done fi # Check databases are aligned (couldn't do until node list known and ssh access tested) if [ $VERB = read-dummy ]; then : else miniade_debug 10 "main: checking database is aligned ..." check_db_aligned $DB_FILE "${NODE_LIST[@]}" fi # Create network lock if [ $VERB = read-dummy ]; then : elif [ $THIS_NODE = $LOCK_MANAGER_NODE ]; then miniade_debug 10 "main: creating local network lock ..." cleanup_push "unlock $LOCK_DIR/$RESOURCE.lock" if ! lock $LOCK_DIR/$RESOURCE.lock; then miniade_error "can't create cluster lock locally (hint: locked already by another process?)" fi else miniade_debug 10 "main: creating remote network lock ..." # If this script were to be interrupted by CTRL-C *after* ssh starts, # then it would leave ssh running. Set up a handler to kill ssh on CTRL-C. cleanup_push "[ \"X\$SSH_PID\" = X ] || { ! kill \$SSH_PID 2>/dev/null; } || wait \$SSH_PID" SSH_CMDLINE="ssh $LOCK_MANAGER_NODE $ABS_PROGNAME --debug=$VERBOSELEVEL read-dummy $RESOURCE" miniade_debug 10 "main: creating remote lock with [$SSH_CMDLINE] ..." mycoproc "$SSH_CMDLINE" SSH SSH_PID=$! miniade_debug 10 "main: SSH_PID=$SSH_PID, SSH[0]=${SSH[0]}, SSH[1]=${SSH[1]}" # ssh is asynchronous, which means we need to wait until # it says the lock is in place. miniade_debug 10 "main: waiting for confirmation that the remote lock is in place ..." if ! read DUMMY <&${SSH[0]} 2>/dev/null; then miniade_error "can't create lock (hint: locked already by another process?)" fi miniade_debug 10 "main: received confirmation that the remote lock is in place (confirmation was '$DUMMY')" fi # Release local lock miniade_debug 10 "main: releasing local lock ..." miniade_unlock $LOCK_DIR/$PROGNAME.lock cleanup_pop "unlock $LOCK_DIR/$PROGNAME.lock" # Do whatever miniade_debug 10 "main: handling verb $VERB ..." if [ $VERB = init ]; then mode_${VERB//-/_} "${NODE_LIST[@]}" else mode_${VERB//-/_} fi # Disconnect from database miniade_debug 10 "main: disconnecting from database ..." sql_disconnect cleanup_pop sql_disconnect # Sync database miniade_debug 10 "main: syncing database to node list ..." for NODE in "${NODE_LIST[@]}"; do if [ $VERB = uninit ] && [ $NODE = $THIS_NODE ]; then rm -f $DB_FILE elif [ $VERB = uninit ]; then ssh -n $NODE rm -f $DB_FILE elif [ $NODE = $THIS_NODE ]; then : else scp -q $DB_FILE $NODE:$DB_DIR/ fi done # Release network lock if [ $VERB = read-dummy ]; then : elif [ $THIS_NODE = $LOCK_MANAGER_NODE ]; then miniade_debug 10 "main: releasing local network lock ..." miniade_unlock $LOCK_DIR/$RESOURCE.lock cleanup_pop "unlock $LOCK_DIR/$RESOURCE.lock" miniade_debug 10 "main: on lock manager cleanup_pop() returned $?" else miniade_debug 10 "main: releasing remote network lock ..." echo "RELEASE LOCK BY GIVING READ THIS INPUT" >&${SSH[1]} wait $SSH_PID cleanup_pop "[ \"X\$SSH_PID\" = X ] || { ! kill \$SSH_PID 2>/dev/null; } || wait \$SSH_PID" SSH_RC=$? miniade_debug 10 "main: on non-lock manager ssh returned $SSH_RC" [ $SSH_RC = 0 ] || { debug 2 "main: exiting non-zero due to failed ssh ..."; exit $SSH_RC; } fi } help() { local PROGNAME miniade_get_progname PROGNAME echo "Usage: $PROGNAME [ ] [ ... ]" echo echo "Verbs: init ... initialise resource" echo "Verbs: status summarise status (for internal use only)" echo " uninit uninitialise resource" echo " test perform empty transaction" echo " primary make this node primary" echo " secondary make this node secondary" echo " read-dummy (for internal use only)" echo " push-align push this node's config to other nodes" exit 0 } check_db_aligned() { local DB_FILE NODE_LIST COMMON_LAST_TRANSACTION NODE LAST_TRANSACTION THIS_NODE [ $# -ge 2 ] || miniade_internal "check_db_aligned: bad argument count ($#)" DB_FILE=$1 shift NODE_LIST=( "$@" ) # Sanity checks and derivations THIS_NODE=$HOSTNAME # Guts COMMON_LAST_TRANSACTION= for NODE in "${NODE_LIST[@]}"; do if [ $NODE = $THIS_NODE ]; then if [ -f $DB_FILE ]; then miniade_debug 10 "check_db_aligned: reading LAST_TRANSACTION from local database ..." LAST_TRANSACTION=$(sqlite3 $DB_FILE "SELECT transaction_id FROM transactions ORDER BY timestamp DESC LIMIT 1;") else miniade_debug 10 "check_db_aligned: local database does not exist; defaulting LAST_TRANSACTION ..." LAST_TRANSACTION="00000000-0000-0000-0000-000000000000" fi else if ssh -n $NODE "[ -f $DB_FILE ]"; then miniade_debug 10 "check_db_aligned: reading LAST_TRANSACTION from remote database ..." LAST_TRANSACTION=$(ssh -n $NODE "sqlite3 $DB_FILE \"SELECT transaction_id FROM transactions ORDER BY timestamp DESC LIMIT 1;\"") else miniade_debug 10 "check_db_aligned: remote database does not exist; defaulting LAST_TRANSACTION ..." LAST_TRANSACTION="00000000-0000-0000-0000-000000000000" fi fi miniade_debug 20 "check_db_aligned: FILE=$FILE, NODE=$NODE, LAST_TRANSACTION=$LAST_TRANSACTION, COMMON_LAST_TRANSACTION=$COMMON_LAST_TRANSACTION" if [ "X$COMMON_LAST_TRANSACTION" = X ]; then COMMON_LAST_TRANSACTION=$LAST_TRANSACTION elif [ "$LAST_TRANSACTION" != "$COMMON_LAST_TRANSACTION" ]; then miniade_error "$NODE: misaligned database (or perhaps it's *this* node that has the misaligned config file?)" fi done } mycoproc() { local COMMAND NAME FD0 FD1 PROGNAME # Process arguments [ $# = 2 ] || miniade_internal "mycoproc: bad arg count ($#)" COMMAND="$1" NAME="$2" # Sanity checks and derivations miniade_get_progname PROGNAME miniade_debug 10 "mycoproc: creating fifos ..." mkfifo /tmp/$PROGNAME.$$.$NAME.fifo0 mkfifo /tmp/$PROGNAME.$$.$NAME.fifo1 miniade_debug 10 "mycoproc: attaching handles ..." FD0=$(comm --nocheck-order -2 -3 <(echo {0..255} | xargs -n 1 echo | sort -n) <(ls /proc/$$/fd | sort -n) | head -1) #FD0=60 miniade_debug 10 "mycoproc: FD0=$FD0" eval "exec $FD0<>/tmp/$PROGNAME.$$.$NAME.fifo0" FD1=$(comm --nocheck-order -2 -3 <(echo {0..255} | xargs -n 1 echo | sort -n) <(ls /proc/$$/fd | sort -n) | head -1) #FD1=61 miniade_debug 10 "mycoproc: FD1=$FD1" eval "exec $FD1<>/tmp/$PROGNAME.$$.$NAME.fifo1" eval "${NAME}[0]=$FD0" eval "${NAME}[1]=$FD1" # Run the command last so that $! is not overwritten. miniade_debug 10 "mycoproc: running command in background ..." eval "$COMMAND <&$FD1 >&$FD0 &" } mode_status() { local PRIMARY_NODES PRIMARY_NODE COMMA NODE_LIST NODE # Argument processing [ $# = 0 ] || miniade_internal "mode_status: bad argument count ($#)" # Sanity checks # Guts get_nodelist NODE_LIST echo -n "nodes:" COMMA= for NODE in "${NODE_LIST[@]}"; do echo -n "$COMMA$NODE" COMMA=, done echo get_primaries PRIMARY_NODES echo -n "primary:" COMMA= for PRIMARY_NODE in "${PRIMARY_NODES[@]}"; do echo -n "$COMMA$PRIMARY_NODE" COMMA=, done echo } mode_init() { local NODE_LIST NODE THIS_NODE NODE_LIST=( "$@" ) # Sanity checks THIS_NODE=$HOSTNAME [ ${#NODE_LIST[*]} -ge 2 ] || miniade_bad_usage THIS_NODE_IN_NODE_LIST_FLAG=false for NODE in "${NODE_LIST[@]}"; do [ $NODE != $THIS_NODE ] || { THIS_NODE_IN_NODE_LIST_FLAG=true; break; } done $THIS_NODE_IN_NODE_LIST_FLAG || miniade_error "this node is not in node list (hint: either you missed specifying *this* host or you should run the command one of the specified nodes)" for ((I=0; I<${#NODE_LIST[*]}; I++)); do for ((J=0; I<${#NODE_LIST[*]}; I++)); do if [ $I != $J ] && [ ${NODE_LIST[$I]} = ${NODE_LIST[$J]} ]; then miniade_error "duplicate nodes listed" fi done done sql_write_and_read "SELECT COUNT(*) FROM nodes;" COUNT [ $COUNT = 0 ] || miniade_error "nodes have already been specified" # Guts sql_write "BEGIN TRANSACTION;" sql_write "INSERT INTO transactions VALUES ('$(uuidgen)', $(date +%s));" for NODE in "${NODE_LIST[@]}"; do sql_write "INSERT INTO nodes VALUES ('$NODE');" done sql_write "END TRANSACTION;" } mode_uninit() { # Sanity checks # Guts sql_write "BEGIN TRANSACTION;" sql_write "INSERT INTO transactions VALUES ('$(uuidgen)', $(date +%s));" sql_write "DELETE FROM primary_nodes;" sql_write "DELETE FROM nodes;" sql_write "END TRANSACTION;" } mode_read_dummy() { [ $# = 0 ] || miniade_internal "mode_read_dummy: bad argument count ($#)" # Sanity checks # We cannot easily check that this is running on the lock manager because the lock manager # will not have loaded the database and the database contains the list of nodes from which # the lock manager is selected. However, the node that ran zfsadm in the first place (be # that the lock manager node itself or, more likely, another node) has already checked # all databases are aligned and has decided that this *is* the lock manager, so we don't # need to check. # Guts # This echo is to tell the caller that the lock is in place. echo "BLOCKING UNTIL READ A LINE OF INPUT ..." # This read is to wait for the caller to ask us to release the lock. miniade_debug 10 "mode_read_dummy: reading line of input ..." read DUMMY miniade_debug 10 "mode_read_dummy: read line of input (line was '$DUMMY')" } mode_test() { [ $# = 0 ] || miniade_internal "mode_test: bad argument count ($#)" # Any verb that writes to the database needs to (1) do it in a transaction # to make database changes atomic in case of failure. sql_write "BEGIN TRANSACTION;" sql_write "INSERT INTO transactions VALUES ('$(uuidgen)', $(date +%s));" # Do what this mode is meant to do miniade_info "sleeping 10s ..." sleep 10 # Complete the transaction sql_write "END TRANSACTION;" } mode_push_align() { miniade_internal "mode_push_align: not implemented yet" [ $# = 0 ] || miniade_internal "mode_push_align: bad argument count ($#)" # Any verb that writes to the database needs to (1) do it in a transaction # to make database changes atomic in case of failure. sql_write "BEGIN TRANSACTION;" sql_write "INSERT INTO transactions VALUES ('$(uuidgen)', $(date +%s));" # Do what this mode is meant to do for NODE in "${NODE_LIST[@]}"; do [ $NODE = $THIS_NODE ] || { ssh -n $NODE "mkdir -p \"$DB_DIR\""; scp -q $DB_FILE $NODE:$DB_DIR/; } done # Complete the transaction sql_write "END TRANSACTION;" } mode_primary() { local SQL_STATEMENT PRIMARY_NODES THIS_NODE [ $# = 0 ] || miniade_internal "mode_primary: bad argument count ($#)" THIS_NODE=$HOSTNAME # Any verb that writes to the database needs to (1) do it in a transaction # to make database changes atomic in case of failure. sql_write "BEGIN TRANSACTION;" sql_write "INSERT INTO transactions VALUES ('$(uuidgen)', $(date +%s));" # Check that no other node is primary. get_primaries PRIMARY_NODES if [ ${#PRIMARY_NODES[*]} -ge 2 ]; then miniade_internal "multiple primaries detected!" elif [ ${#PRIMARY_NODES[*]} = 0 ]; then sql_write "INSERT INTO primary_nodes VALUES ('$THIS_NODE');" elif [ ${PRIMARY_NODES[0]} = $THIS_NODE ]; then : else miniade_error "${PRIMARY_NODES[0]} is primary" fi # Start new transaction. sql_write "END TRANSACTION;" } mode_secondary() { local SQL_STATEMENT PRIMARY_NODES [ $# = 0 ] || miniade_internal "mode_secondary: bad argument count ($#)" # Any verb that writes to the database needs to (1) do it in a transaction # to make database changes atomic in case of failure. sql_write "BEGIN TRANSACTION;" sql_write "INSERT INTO transactions VALUES ('$(uuidgen)', $(date +%s));" # Check that no other node is primary. get_primaries PRIMARY_NODES if [ ${#PRIMARY_NODES[*]} -ge 2 ]; then miniade_internal "multiple primaries detected!" elif [ ${#PRIMARY_NODES[*]} = 0 ]; then : elif [ ${PRIMARY_NODES[0]} = $THIS_NODE ]; then sql_write "DELETE FROM primary_nodes WHERE node = '$THIS_NODE';" else : fi # Start new transaction. sql_write "END TRANSACTION;" } get_nodelist() { local NODE_LIST_REF [ $# = 1 ] || miniade_internal "get_nodelist: bad argument count ($#)" NODE_LIST_REF=$1 # Sanity checks and derivations # Guts sql_write_and_read "SELECT node FROM nodes;" $NODE_LIST_REF eval "miniade_debug 10 \"get_nodelist: node list: \${$NODE_LIST_REF[*]}\"" } get_primaries() { local PRIMARY_NODES_REF [ $# = 1 ] || miniade_internal "get_primaries: bad argument count ($#)" PRIMARY_NODES_REF=$1 SQL_STATEMENT="SELECT node FROM primary_nodes;" sql_write_and_read "$SQL_STATEMENT" $PRIMARY_NODES_REF } db_init() { miniade_debug 10 "db_init: initialising database ..." sql_write "BEGIN TRANSACTION;" ## Static types #sql_write "CREATE TABLE logicals (" #sql_write " logical CHAR NOT NULL," #sql_write " PRIMARY KEY (logical)," #sql_write " CHECK (logical = 'true' OR logical = 'false')" #sql_write ");" #sql_write "INSERT INTO logicals VALUES ('true');" #sql_write "INSERT INTO logicals VALUES ('false');" # Transaction count sql_write "CREATE TABLE transactions (" sql_write " transaction_id CHAR NOT NULL," sql_write " timestamp INT UNIQUE NOT NULL," sql_write " PRIMARY KEY (transaction_id)" sql_write ");" sql_write "INSERT INTO transactions VALUES ('00000000-0000-0000-0000-000000000000', 0);" ## List of resources #sql_write "CREATE TABLE resources (" #sql_write " resource CHAR NOT NULL," #sql_write " enabled CHAR NOT NULL," #sql_write " PRIMARY KEY (resource)," #sql_write " FOREIGN KEY (enabled) REFERENCES logicals(logical)" #sql_write ");" # List of nodes sql_write "CREATE TABLE nodes (" sql_write " node CHAR NOT NULL," sql_write " PRIMARY KEY (node)" sql_write ");" # List of primary nodes sql_write "CREATE TABLE primary_nodes (" sql_write " node CHAR NOT NULL," sql_write " PRIMARY KEY (node)," sql_write " FOREIGN KEY (node) REFERENCES nodes(node)" sql_write ");" sql_write "END TRANSACTION;" miniade_debug 10 "db_init: done initialising database" } sql_connect() { local DB_INIT_FUNC_REF # Process arguments [ $# = 1 ] || miniade_internal "sql_connect: invalid arg count" DB_INIT_FUNC_REF=$1 # Note if DB exists before connecting as connecting creates. if [ "X$DB_FILE" = X ]; then DATABASE_EXISTS_FLAG=false elif [ -f $DB_FILE ]; then DATABASE_EXISTS_FLAG=true else DATABASE_EXISTS_FLAG=false fi # Guts # Connect to database cleanup_push sql_disconnect mkdir -p "${DB_FILE%/*}" mycoproc "sqlite3 $DB_FILE" SQLITE SQLITE_PID=$! miniade_debug 10 "sql_connect: SQLITE_PID=$SQLITE_PID, SQLITE[0]=${SQLITE[0]}, SQLITE[1]=${SQLITE[1]}" # Enforce foreign key integrity. sql_write "PRAGMA foreign_keys = ON;" # Bash doesn't support multi-dimensional arrays so results from # SQL SELECT statements need to be 'packed'. We pack columns # using this separator into rows and then put each row in # a separate entry in a one-dimensional array. Note that this # means calls to sql_write_and_read() need to unpack if they # select *multiple* columns but don't need to otherwise (though # it would do no harm). sql_write ".separator @@@" # Initialise database if ! $DATABASE_EXISTS_FLAG; then $DB_INIT_FUNC_REF fi } sql_disconnect() { # Disconnect from database miniade_debug 10 "sql_disconnect: telling sqlite to exit ..." sql_write ".quit" wait $SQLITE_PID miniade_debug 10 "sql_disconnect: sqlite has exited" } sql_write() { local SQL_STATEMENT [ $# = 1 ] || miniade_internal "sql_write: $#: invalid arg count" SQL_STATEMENT=$1 miniade_debug 10 "sql_write: SQL_STATEMENT=[$SQL_STATEMENT]" echo "$SQL_STATEMENT" >&${SQLITE[1]} } sql_write_and_read() { local SQL_STATEMENT RESULT_LINES_REF RESULT_LINE [ $# = 2 ] || miniade_internal "sql_write_and_read: $#: invalid arg count" SQL_STATEMENT=$1 RESULT_LINES_REF=$2 miniade_debug 20 "sql_write_and_read: SQL_STATEMENT=[$SQL_STATEMENT], RESULT_LINES_REF=$RESULT_LINES_REF" sql_write "$SQL_STATEMENT" # It should not happen that we start reading results from # previous SELECT statements, but to be doubly sure we # make each end-of-results marker unique. END_OF_RESULTS_MARKER="END_OF_${RANDOM}_RESULTS_LINES" sql_write "SELECT '$END_OF_RESULTS_MARKER';" eval "$RESULT_LINES_REF=()" while read -u ${SQLITE[0]} RESULT_LINE; do miniade_debug 20 "sql_write_and_read: RESULT_LINE=[$RESULT_LINE]" case $RESULT_LINE in *Error*) miniade_internal "sql_write_and_read: $RESULT_LINE" ;; $END_OF_RESULTS_MARKER) break ;; *) eval "$RESULT_LINES_REF=( \"\${$RESULT_LINES_REF[@]}\" \"\$RESULT_LINE\" )" ;; esac done } sql_split_row_into_columns() { local ROW COLUMN_REFS REGEXP COLUMN_REF I [ $# -ge 2 ] || miniade_internal "sql_split_row_into_columns: $#: invalid arg count" ROW="$1" shift COLUMN_REFS=( "$@" ) miniade_debug 20 "sql_split_row_into_columns: ROW=[$ROW], COLUMN_REFS=( ${COLUMN_REFS[*]} )" # Construct a regexp to chop the row into columns. REGEXP= for COLUMN_REF in "${COLUMN_REFS[@]}"; do REGEXP="$REGEXP${REGEXP:+@@@}(.*)" done # Chop row up. [[ $ROW =~ ^${REGEXP}$ ]] || miniade_internal "sql_split_row_into_columns: $ROW: failed to split" # Store columns in passed variables. for ((I=0; I<${#COLUMN_REFS[*]}; I++)); do eval "${COLUMN_REFS[$I]}=\"\${BASH_REMATCH[$((I+1))]}\"" done } cleanup() { local CLEANUP_COMMAND for CLEANUP_COMMAND in "${CLEANUP_COMMAND_LIST[@]}"; do miniade_debug 2 "cleanup: calling [$CLEANUP_COMMAND] ..." # Errors in the cleanup commands should not cause eval to exit; set -e is set. eval "$CLEANUP_COMMAND" || true done } cleanup_show() { local I miniade_debug 10 "cleanup_show: list follows: " for ((I=0; I<${#CLEANUP_COMMAND_LIST[*]}; I++)); do miniade_debug 50 "cleanup_show: ${CLEANUP_COMMAND_LIST[$I]}" done } cleanup_push() { CLEANUP_COMMAND_LIST=( "${CLEANUP_COMMAND_LIST[@]}" "$@" ) cleanup_show } cleanup_pop() { local CLEANUP_COMMAND_LIST_TMP CLEANUP_COMMAND CLEANUP_COMMAND2 COPY_FLAG CLEANUP_COMMAND_LIST_TMP=() for CLEANUP_COMMAND in "${CLEANUP_COMMAND_LIST[@]}"; do COPY_FLAG=true for CLEANUP_COMMAND2 in "$@"; do if [ "$CLEANUP_COMMAND" = "$CLEANUP_COMMAND2" ]; then COPY_FLAG=false break fi done if $COPY_FLAG; then CLEANUP_COMMAND_LIST_TMP=( "${CLEANUP_COMMAND_LIST_TMP[@]}" "$CLEANUP_COMMAND" ) fi done CLEANUP_COMMAND_LIST=( "${CLEANUP_COMMAND_LIST_TMP[@]}" ) cleanup_show } main "$@"