CVS User Account cvsuser
Mon Sep 27 21:32:29 PDT 2004
Log Message:
-----------
Additions due to the "productionizing" of the 2nd Generation Slon 
Watchdog.

Also, very importantly, the Slonik script generators no longer
submit slonik output directly to the slonik instance.  You 
should REALLY review the slonik before submitting it.  Otherwise
there are plenty of opportunities for this to be a big set of
"foot guns."

And so you are free to, if you wish, submit the output of scripts
directly to slonik by piping them to slonik.

Modified Files:
--------------
    slony1-engine/tools/altperl:
        slon-tools.pm (r1.10 -> r1.11)
        slon_watchdog2.pl (r1.1 -> r1.2)

-------------- next part --------------
Index: slon_watchdog2.pl
===================================================================
RCS file: /usr/local/cvsroot/slony1/slony1-engine/tools/altperl/slon_watchdog2.pl,v
retrieving revision 1.1
retrieving revision 1.2
diff -Ltools/altperl/slon_watchdog2.pl -Ltools/altperl/slon_watchdog2.pl -u -w -r1.1 -r1.2
--- tools/altperl/slon_watchdog2.pl
+++ tools/altperl/slon_watchdog2.pl
@@ -38,28 +38,14 @@
     }
   } else {
     $restart = "YES";
-  }
-
-  if ($restart eq "YES") {
-    if ($pid) {  # process is still alive, but evidently deranged, so it's getting terminated
-      log_to_watchdog_log("terminate slon daemon for $SETNAME node $nodenum");
-      # Kill slon until dead...
-      kill 2, $pid;
-      sleep 3;
-      kill 15, $pid;
-      sleep 3;
-      kill 9, $pid;
-      sleep 3;
-    }
-  }
-  if ($restart eq "YES") {
-    # Now, let's see if there's a lingering pg_listener entry
-    my $dead_connections = query_slon_connections($nodenum);
-    log_to_watchdog_log("spurious pg_listener entries for $SETNAME node $node - Count=[$dead_connections]");
-    if ($dead_connections > 0) {
-      $kick = "YES";
+    # See if the slon log ends with "FATAL  localListenThread: Another slon daemon is serving this node already"
+    my $lastlog=`/bin/ls -t $LOGDIR/slony1/node$nodenum/$dbname*log | head -1`;
+    my $lastline=`tail -1 $lastlog`;
+    if ($lastline =~ /Another slon daemon is serving this node already/) {
+      $kick = "YES";   # Yup, need to tell slonik to reset this node
     }
   }
+
   # If the node needs a swift kick in the "RESTART", then submit that to slonik
   if ($kick eq "YES") {
     log_to_watchdog_log("submit slonik to restart $SETNAME node $nodenum");
@@ -69,6 +55,15 @@
     close SLONIK;
   }
   if ($restart eq "YES") {
+    if ($pid) {
+      log_to_watchdog_log("terminate slon daemon for $SETNAME node $nodenum");
+      # Kill slon until dead...
+      kill 2, $pid;
+      sleep 3;
+      kill 15, $pid;
+      sleep 3;
+      kill 9, $pid;
+    }
     log_to_watchdog_log("restart slon for $nodenum");
     start_slon($nodenum);
   }
Index: slon-tools.pm
===================================================================
RCS file: /usr/local/cvsroot/slony1/slony1-engine/tools/altperl/slon-tools.pm,v
retrieving revision 1.10
retrieving revision 1.11
diff -Ltools/altperl/slon-tools.pm -Ltools/altperl/slon-tools.pm -u -w -r1.10 -r1.11
--- tools/altperl/slon-tools.pm
+++ tools/altperl/slon-tools.pm
@@ -86,13 +86,14 @@
   print OUT "# ------------------------------------------------------------- \n";
   close OUT;
   `cat $script >> $LOGDIR/slonik_scripts.log`;
-  print `$SLON_BIN_PATH/slonik < $script`;
+  #print `slonik < $script`;
+  print `cat $script`;
   unlink($script);
 }
 
 sub ps_args {
   my $sys=`uname`;
-  chomp $sys;    # Strip off cruft
+  chomp $sys;   # strip off edges
   if ($sys eq "Linux") {
     return "/bin/ps -auxww";
   } elsif ($sys eq "FreeBSD") {
@@ -109,35 +110,39 @@
   my ($node) = @_;
   $node =~ /node(\d*)$/;
   my $nodenum = $1;
-  my ($retpid, $pid);
+  my $pid;
+  my $tpid;
   my ($dbname, $dbport, $dbhost) = ($DBNAME[$nodenum], $PORT[$nodenum], $HOST[$nodenum]);
   #  print "Searching for PID for $dbname on port $dbport\n";
-  open(PSOUT, ps_args() . "| egrep \"[s]lon .*$SETNAME\" | egrep \"host=$dbhost dbname=$dbname.*port=$dbport\" | sort -n | awk '{print \$2}'|");
-  while ($pid = <PSOUT>) {
-    chomp $pid;
-    $retpid = $pid;
+  my $command =  ps_args() . "| egrep \"[s]lon .*$SETNAME\" | egrep \"host=$dbhost dbname=$dbname.*port=$dbport\" | sort -n | awk '{print \$2}'";
+  #print "Command:\n$command\n";
+  open(PSOUT, "$command|");
+  while ($tpid = <PSOUT>) {
+    chomp $tpid;
+    $pid = $tpid;
   }
   close(PSOUT);
-  return $retpid;
+  return $pid;
 }
 
 sub start_slon {
   my ($nodenum) = @_;
   my ($dsn, $dbname) = ($DSN[$nodenum], $DBNAME[$nodenum]);
   my $cmd;
+  `mkdir -p $LOGDIR/slony1/node$nodenum`;
   if ($APACHE_ROTATOR) {
-    $cmd = "$SLON_BIN_PATH/slon -s 1000 -d2  $SETNAME '$dsn' 2>&1 | $APACHE_ROTATOR \"$LOGDIR/slony1/node$nodenum/$dbname_%Y-%m-%d_%H:%M:%S.log\" 10M &";
+    $cmd = "$SLON_BIN_PATH/slon -s 1000 -d2 $SETNAME '$dsn' 2>&1 | $APACHE_ROTATOR \"$LOGDIR/slony1/node$nodenum/" . $dbname . "_%Y-%m-%d_%H:%M:%S.log\" 10M&";
   } else {
     $cmd = "$SLON_BIN_PATH/slon -s 1000 -d2  $SETNAME '$dsn' 2>&1 > $LOGDIR/slony1/node$nodenum/$dbname.log &";
   }
-  print "Invoke slon: $cmd\n";
+  print "Invoke slon for node $nodenum - $cmd\n";
   system $cmd;
 }
 
 sub query_slony_status {
   my ($nodenum) = @_;
   my $query = qq{
-  select now() - ev_timestamp > '00:20:00'::interval as event_old, now() - ev_timestamp as age,
+  select now() - ev_timestamp > '00:40:00'::interval as event_old, now() - ev_timestamp as age,
        ev_timestamp, ev_seqno, ev_origin as origin
 from _$SETNAME.sl_event events, _$SETNAME.sl_subscribe slony_master
   where 
@@ -151,15 +156,9 @@
 };
   my ($port, $host, $dbname)= ($PORT[$nodenum], $HOST[$nodenum], $DBNAME[$nodenum]);
   my $result=`$SLON_BIN_PATH/psql -p $port -h $host -c "$query" --tuples-only $dbname`;
-  return $result;
-}
-
-sub query_slon_connections {
-  my ($nodenum) = @_;
-  my $query = "select count(*) from pg_catalog.pg_listener where relname = '_" . $SETNAME . "_Restart';";
-  my ($port, $host, $dbname)= ($PORT[$nodenum], $HOST[$nodenum], $DBNAME[$nodenum]);
-  my $result=`$SLON_BIN_PATH/psql -p $port -h $host -c "$query" --tuples-only $dbname`;
   chomp $result;
+  #print "Query was: $query\n";
+  #print "Result was: $result\n";
   return $result;
 }
 


More information about the Slony1-commit mailing list