Thu Sep 23 17:03:42 PDT 2004
- Previous message: [Slony1-commit] By cbbrowne: Reorganized a couple of notes, added new ones...
- Next message: [Slony1-commit] By darcyb: Add time stamps to log messages
- Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
Log Message:
-----------
Added a new watchdog that's quite a bit smarter than the old one.
Modified Files:
--------------
slony1-engine/tools/altperl:
slon-tools.pm (r1.9 -> r1.10)
Added Files:
-----------
slony1-engine/tools/altperl:
slon_watchdog2.pl (r1.1)
-------------- next part --------------
Index: slon-tools.pm
===================================================================
RCS file: /usr/local/cvsroot/slony1/slony1-engine/tools/altperl/slon-tools.pm,v
retrieving revision 1.9
retrieving revision 1.10
diff -Ltools/altperl/slon-tools.pm -Ltools/altperl/slon-tools.pm -u -w -r1.9 -r1.10
--- tools/altperl/slon-tools.pm
+++ tools/altperl/slon-tools.pm
@@ -134,6 +134,33 @@
system $cmd;
}
+sub query_slony_status {
+ my ($nodenum) = @_;
+ my $query = qq{
+ select now() - ev_timestamp > '00:20:00'::interval as event_old, now() - ev_timestamp as age,
+ ev_timestamp, ev_seqno, ev_origin as origin
+from _$SETNAME.sl_event events, _$SETNAME.sl_subscribe slony_master
+ where
+ events.ev_origin = slony_master.sub_provider and
+ not exists (select * from _$SETNAME.sl_subscribe providers
+ where providers.sub_receiver = slony_master.sub_provider and
+ providers.sub_set = slony_master.sub_set and
+ slony_master.sub_active = 't' and
+ providers.sub_active = 't')
+order by ev_origin desc, ev_seqno desc limit 1;
+};
+ my ($port, $host, $dbname)= ($PORT[$nodenum], $HOST[$nodenum], $DBNAME[$nodenum]);
+ my $result=`$SLON_BIN_PATH/psql -p $port -h $host -c "$query" --tuples-only $dbname`;
+ return $result;
+}
+sub query_slon_connections {
+ my ($nodenum) = @_;
+ my $query = "select count(*) from pg_catalog.pg_listener where relname = '_" . $SETNAME . "_Restart';";
+ my ($port, $host, $dbname)= ($PORT[$nodenum], $HOST[$nodenum], $DBNAME[$nodenum]);
+ my $result=`$SLON_BIN_PATH/psql -p $port -h $host -c "$query" --tuples-only $dbname`;
+ chomp $result;
+ return $result;
+}
-return 1;
+1;
--- /dev/null
+++ tools/altperl/slon_watchdog2.pl
@@ -0,0 +1,87 @@
+#!perl # -*- perl -*-
+# $Id: slon_watchdog2.pl,v 1.1 2004/09/23 16:03:32 cbbrowne Exp $
+# Author: Christopher Browne
+# Copyright 2004 Afilias Canada
+
+require 'slon-tools.pm';
+require 'slon.env';
+
+$node =$ARGV[0];
+$sleep =$ARGV[1];
+
+if ( scalar(@ARGV) < 2 ) {
+ die "Usage: ./slon_watchdog node sleep-time\n";
+}
+
+if ($node =~/^node(\d+)$/) {
+ $nodenum = $1;
+}
+
+log_to_watchdog_log("Invoking watchdog for $SETNAME node $nodenum");
+while (1) {
+ my $res = query_slony_status($nodenum); # See where the node stands
+ my $eventsOK;
+ if ($res =~ /^\s*f\s*\|/) {
+ $eventsOK = "YES";
+ } else {
+ $eventsOK = "NO";
+ }
+ my $pid = get_pid($node); # See if the slon process is alive
+ my ($restart, $kick);
+ $kick = "NO"; # Initially, assume we don't need to submit a "restart node" command
+ if ($pid) { # PID is alive...
+ if ($eventsOK eq "YES") {
+ # All is well - do nothing!
+ $restart = "NO";
+ } else {
+ $restart = "YES";
+ }
+ } else {
+ $restart = "YES";
+ }
+
+ if ($restart eq "YES") {
+ if ($pid) { # process is still alive, but evidently deranged, so it's getting terminated
+ log_to_watchdog_log("terminate slon daemon for $SETNAME node $nodenum");
+ # Kill slon until dead...
+ kill 2, $pid;
+ sleep 3;
+ kill 15, $pid;
+ sleep 3;
+ kill 9, $pid;
+ sleep 3;
+ }
+ }
+ if ($restart eq "YES") {
+ # Now, let's see if there's a lingering pg_listener entry
+ my $dead_connections = query_slon_connections($nodenum);
+ log_to_watchdog_log("spurious pg_listener entries for $SETNAME node $node - Count=[$dead_connections]");
+ if ($dead_connections > 0) {
+ $kick = "YES";
+ }
+ }
+ # If the node needs a swift kick in the "RESTART", then submit that to slonik
+ if ($kick eq "YES") {
+ log_to_watchdog_log("submit slonik to restart $SETNAME node $nodenum");
+ open(SLONIK, "|$SLON_BIN_PATH/slonik");
+ print SLONIK genheader();
+ print SLONIK "restart node $node\n";
+ close SLONIK;
+ }
+ if ($restart eq "YES") {
+ log_to_watchdog_log("restart slon for $nodenum");
+ start_slon($nodenum);
+ }
+ sleep $sleep;
+}
+
+
+sub log_to_watchdog_log {
+ my ($message) = @_;
+ chomp $message;
+ my $date = `date`;
+ chomp $date;
+ open (SLONLOG, ">>$LOGDIR/slony-watchdog.log");
+ print SLONLOG $date, "|", $message, "\n";
+ close SLONLOG;
+}
- Previous message: [Slony1-commit] By cbbrowne: Reorganized a couple of notes, added new ones...
- Next message: [Slony1-commit] By darcyb: Add time stamps to log messages
- Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
More information about the Slony1-commit mailing list