CVS User Account cvsuser
Sat Dec 4 00:21:36 PST 2004
Log Message:
-----------
Added in node_is_subscribing() function to make watchdog less aggressive about restarting slon nodes

If a node is in the process of being subscribed, then you can get
long-running COPY requests that run for hours (this happens if it's a
very large database).

slon_watchdog2.pl therefore has two criteria to use to "throttle" its
slon; thus if there has not been a local event on the node in the
interval $killafter, then it will kill (and ultimately restart) the slon
UNLESS a subscription is in progress.

Modified Files:
--------------
    slony1-engine/tools/altperl:
        slon-tools.pm (r1.11 -> r1.12)
        slon_watchdog2.pl (r1.2 -> r1.3)

-------------- next part --------------
Index: slon_watchdog2.pl
===================================================================
RCS file: /usr/local/cvsroot/slony1/slony1-engine/tools/altperl/slon_watchdog2.pl,v
retrieving revision 1.2
retrieving revision 1.3
diff -Ltools/altperl/slon_watchdog2.pl -Ltools/altperl/slon_watchdog2.pl -u -w -r1.2 -r1.3
--- tools/altperl/slon_watchdog2.pl
+++ tools/altperl/slon_watchdog2.pl
@@ -24,8 +24,13 @@
   if ($res =~ /^\s*f\s*\|/) {
     $eventsOK = "YES";
   } else {
+    $res = node_is_subscribing();
+    if ($res =~ /SUBSCRIBE_SET/) {
+      $eventsOK = "YES";
+    } else {
     $eventsOK = "NO";
   }
+  }
   my $pid = get_pid($node);                  # See if the slon process is alive
   my ($restart, $kick);
   $kick = "NO";   # Initially, assume we don't need to submit a "restart node" command
@@ -70,7 +75,6 @@
   sleep $sleep;
 }
 
-
 sub log_to_watchdog_log {
   my ($message) = @_;
   chomp $message;
Index: slon-tools.pm
===================================================================
RCS file: /usr/local/cvsroot/slony1/slony1-engine/tools/altperl/slon-tools.pm,v
retrieving revision 1.11
retrieving revision 1.12
diff -Ltools/altperl/slon-tools.pm -Ltools/altperl/slon-tools.pm -u -w -r1.11 -r1.12
--- tools/altperl/slon-tools.pm
+++ tools/altperl/slon-tools.pm
@@ -139,10 +139,12 @@
   system $cmd;
 }
 
+
+$killafter="00:40:00";  # Restart slon after this interval, if there is no activity
 sub query_slony_status {
   my ($nodenum) = @_;
   my $query = qq{
-  select now() - ev_timestamp > '00:40:00'::interval as event_old, now() - ev_timestamp as age,
+  select now() - ev_timestamp > '$killafter'::interval as event_old, now() - ev_timestamp as age,
        ev_timestamp, ev_seqno, ev_origin as origin
 from _$SETNAME.sl_event events, _$SETNAME.sl_subscribe slony_master
   where 
@@ -162,4 +164,28 @@
   return $result;
 }
 
+# This function checks to see if there is a still-in-progress subscription
+# It does so by looking to see if there is a SUBSCRIBE_SET event corresponding
+# to a sl_subscribe entry that is not yet active.
+sub node_is_subscribing {
+  my $see_if_subscribing = qq {
+select * from "_$SETNAME".sl_event e, "_$SETNAME".sl_subscribe s
+where ev_origin = "_$SETNAME".getlocalnodeid('_$SETNAME') and  -- Event on local node
+      ev_type = 'SUBSCRIBE_SET' and                            -- Event is SUBSCRIBE SET
+      --- Then, match criteria against sl_subscribe
+      sub_set = ev_data1 and sub_provider = ev_data2 and sub_receiver = ev_data3 and
+      (case sub_forward when 'f' then 'f'::text when 't' then 't'::text end) = ev_data4
+
+      --- And we're looking for a subscription that is not yet active
+      and not sub_active
+limit 1;   --- One such entry is sufficient...
+};
+  my ($port, $host, $dbname)= ($PORT[$nodenum], $HOST[$nodenum], $DBNAME[$nodenum]);
+  my $result=`$SLON_BIN_PATH/psql -p $port -h $host -c "$query" --tuples-only $dbname`;
+  chomp $result;
+  #print "Query was: $query\n";
+  #print "Result was: $result\n";
+  return $result;
+}
+
 1;


More information about the Slony1-commit mailing list