Thu Sep 23 17:03:42 PDT 2004
- Previous message: [Slony1-commit] By cbbrowne: Reorganized a couple of notes, added new ones...
- Next message: [Slony1-commit] By darcyb: Add time stamps to log messages
- Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
Log Message: ----------- Added a new watchdog that's quite a bit smarter than the old one. Modified Files: -------------- slony1-engine/tools/altperl: slon-tools.pm (r1.9 -> r1.10) Added Files: ----------- slony1-engine/tools/altperl: slon_watchdog2.pl (r1.1) -------------- next part -------------- Index: slon-tools.pm =================================================================== RCS file: /usr/local/cvsroot/slony1/slony1-engine/tools/altperl/slon-tools.pm,v retrieving revision 1.9 retrieving revision 1.10 diff -Ltools/altperl/slon-tools.pm -Ltools/altperl/slon-tools.pm -u -w -r1.9 -r1.10 --- tools/altperl/slon-tools.pm +++ tools/altperl/slon-tools.pm @@ -134,6 +134,33 @@ system $cmd; } +sub query_slony_status { + my ($nodenum) = @_; + my $query = qq{ + select now() - ev_timestamp > '00:20:00'::interval as event_old, now() - ev_timestamp as age, + ev_timestamp, ev_seqno, ev_origin as origin +from _$SETNAME.sl_event events, _$SETNAME.sl_subscribe slony_master + where + events.ev_origin = slony_master.sub_provider and + not exists (select * from _$SETNAME.sl_subscribe providers + where providers.sub_receiver = slony_master.sub_provider and + providers.sub_set = slony_master.sub_set and + slony_master.sub_active = 't' and + providers.sub_active = 't') +order by ev_origin desc, ev_seqno desc limit 1; +}; + my ($port, $host, $dbname)= ($PORT[$nodenum], $HOST[$nodenum], $DBNAME[$nodenum]); + my $result=`$SLON_BIN_PATH/psql -p $port -h $host -c "$query" --tuples-only $dbname`; + return $result; +} +sub query_slon_connections { + my ($nodenum) = @_; + my $query = "select count(*) from pg_catalog.pg_listener where relname = '_" . $SETNAME . "_Restart';"; + my ($port, $host, $dbname)= ($PORT[$nodenum], $HOST[$nodenum], $DBNAME[$nodenum]); + my $result=`$SLON_BIN_PATH/psql -p $port -h $host -c "$query" --tuples-only $dbname`; + chomp $result; + return $result; +} -return 1; +1; --- /dev/null +++ tools/altperl/slon_watchdog2.pl @@ -0,0 +1,87 @@ +#!perl # -*- perl -*- +# $Id: slon_watchdog2.pl,v 1.1 2004/09/23 16:03:32 cbbrowne Exp $ +# Author: Christopher Browne +# Copyright 2004 Afilias Canada + +require 'slon-tools.pm'; +require 'slon.env'; + +$node =$ARGV[0]; +$sleep =$ARGV[1]; + +if ( scalar(@ARGV) < 2 ) { + die "Usage: ./slon_watchdog node sleep-time\n"; +} + +if ($node =~/^node(\d+)$/) { + $nodenum = $1; +} + +log_to_watchdog_log("Invoking watchdog for $SETNAME node $nodenum"); +while (1) { + my $res = query_slony_status($nodenum); # See where the node stands + my $eventsOK; + if ($res =~ /^\s*f\s*\|/) { + $eventsOK = "YES"; + } else { + $eventsOK = "NO"; + } + my $pid = get_pid($node); # See if the slon process is alive + my ($restart, $kick); + $kick = "NO"; # Initially, assume we don't need to submit a "restart node" command + if ($pid) { # PID is alive... + if ($eventsOK eq "YES") { + # All is well - do nothing! + $restart = "NO"; + } else { + $restart = "YES"; + } + } else { + $restart = "YES"; + } + + if ($restart eq "YES") { + if ($pid) { # process is still alive, but evidently deranged, so it's getting terminated + log_to_watchdog_log("terminate slon daemon for $SETNAME node $nodenum"); + # Kill slon until dead... + kill 2, $pid; + sleep 3; + kill 15, $pid; + sleep 3; + kill 9, $pid; + sleep 3; + } + } + if ($restart eq "YES") { + # Now, let's see if there's a lingering pg_listener entry + my $dead_connections = query_slon_connections($nodenum); + log_to_watchdog_log("spurious pg_listener entries for $SETNAME node $node - Count=[$dead_connections]"); + if ($dead_connections > 0) { + $kick = "YES"; + } + } + # If the node needs a swift kick in the "RESTART", then submit that to slonik + if ($kick eq "YES") { + log_to_watchdog_log("submit slonik to restart $SETNAME node $nodenum"); + open(SLONIK, "|$SLON_BIN_PATH/slonik"); + print SLONIK genheader(); + print SLONIK "restart node $node\n"; + close SLONIK; + } + if ($restart eq "YES") { + log_to_watchdog_log("restart slon for $nodenum"); + start_slon($nodenum); + } + sleep $sleep; +} + + +sub log_to_watchdog_log { + my ($message) = @_; + chomp $message; + my $date = `date`; + chomp $date; + open (SLONLOG, ">>$LOGDIR/slony-watchdog.log"); + print SLONLOG $date, "|", $message, "\n"; + close SLONLOG; +}
- Previous message: [Slony1-commit] By cbbrowne: Reorganized a couple of notes, added new ones...
- Next message: [Slony1-commit] By darcyb: Add time stamps to log messages
- Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
More information about the Slony1-commit mailing list