CVS User Account cvsuser
Fri Dec 10 18:44:36 PST 2004
Log Message:
-----------
Made watchdog2 smarter

Instead of just "thumping" a slon once the latest _originating_ events
get pretty old, it now bases the decision on the time of the latest
_confirmation_ of an event from the provider...

Modified Files:
--------------
    slony1-engine/tools/altperl:
        slon-tools.pm (r1.12 -> r1.13)
        slon_watchdog2.pl (r1.3 -> r1.4)

-------------- next part --------------
Index: slon_watchdog2.pl
===================================================================
RCS file: /usr/local/cvsroot/slony1/slony1-engine/tools/altperl/slon_watchdog2.pl,v
retrieving revision 1.3
retrieving revision 1.4
diff -Ltools/altperl/slon_watchdog2.pl -Ltools/altperl/slon_watchdog2.pl -u -w -r1.3 -r1.4
--- tools/altperl/slon_watchdog2.pl
+++ tools/altperl/slon_watchdog2.pl
@@ -21,7 +21,7 @@
 while (1) {
   my $res = query_slony_status($nodenum);    # See where the node stands
   my $eventsOK;
-  if ($res =~ /^\s*f\s*\|/) {
+  if ($res =~ /^\s*t\s*\|/) {
     $eventsOK = "YES";
   } else {
     $res = node_is_subscribing();
Index: slon-tools.pm
===================================================================
RCS file: /usr/local/cvsroot/slony1/slony1-engine/tools/altperl/slon-tools.pm,v
retrieving revision 1.12
retrieving revision 1.13
diff -Ltools/altperl/slon-tools.pm -Ltools/altperl/slon-tools.pm -u -w -r1.12 -r1.13
--- tools/altperl/slon-tools.pm
+++ tools/altperl/slon-tools.pm
@@ -140,21 +140,53 @@
 }
 
 
-$killafter="00:40:00";  # Restart slon after this interval, if there is no activity
+$killafter="00:20:00";  # Restart slon after this interval, if there is no activity
 sub query_slony_status {
   my ($nodenum) = @_;
+
+# Old query - basically looked at how far we are behind
+#   my $query = qq{
+#   select now() - ev_timestamp > '$killafter'::interval as event_old, now() - ev_timestamp as age,
+#        ev_timestamp, ev_seqno, ev_origin as origin
+# from _$SETNAME.sl_event events, _$SETNAME.sl_subscribe slony_master
+#   where 
+#      events.ev_origin = slony_master.sub_provider and
+#      not exists (select * from _$SETNAME.sl_subscribe providers
+#                   where providers.sub_receiver = slony_master.sub_provider and
+#                         providers.sub_set = slony_master.sub_set and
+#                         slony_master.sub_active = 't' and
+#                         providers.sub_active = 't')
+# order by ev_origin desc, ev_seqno desc limit 1;
+# };
+
+# New query: Looks to see if an event has been confirmed, for the set,
+# for the master node, within the interval requested
+
   my $query = qq{
-  select now() - ev_timestamp > '$killafter'::interval as event_old, now() - ev_timestamp as age,
-       ev_timestamp, ev_seqno, ev_origin as origin
-from _$SETNAME.sl_event events, _$SETNAME.sl_subscribe slony_master
-  where 
-     events.ev_origin = slony_master.sub_provider and
+select * from 
+(select now() - con_timestamp < '$killafter'::interval, now() - con_timestamp as age,
+       con_timestamp
+from _$SETNAME.sl_confirm c, _$SETNAME.sl_subscribe slony_master
+  where c.con_origin = slony_master.sub_provider and
      not exists (select * from _$SETNAME.sl_subscribe providers
                   where providers.sub_receiver = slony_master.sub_provider and
                         providers.sub_set = slony_master.sub_set and
                         slony_master.sub_active = 't' and
-                        providers.sub_active = 't')
-order by ev_origin desc, ev_seqno desc limit 1;
+                        providers.sub_active = 't') and
+        c.con_received = _$SETNAME.getLocalNodeId('_$SETNAME') and
+        now() - con_timestamp < '$killafter'::interval
+limit 1) as slave_confirmed_events
+union all (select
+now() - con_timestamp < '$killafter'::interval, now() - con_timestamp as age,
+       con_timestamp
+from _$SETNAME.sl_confirm c, _$SETNAME.sl_subscribe slony_master
+  where c.con_origin = _$SETNAME.getLocalNodeId('_$SETNAME') and
+             exists (select * from _$SETNAME.sl_subscribe providers
+                  where providers.sub_provider = _$SETNAME.getLocalNodeId('_$SETNAME') and
+                        slony_master.sub_active = 't') and
+        now() - con_timestamp < '$killafter'::interval
+limit 1)
+;
 };
   my ($port, $host, $dbname)= ($PORT[$nodenum], $HOST[$nodenum], $DBNAME[$nodenum]);
   my $result=`$SLON_BIN_PATH/psql -p $port -h $host -c "$query" --tuples-only $dbname`;


More information about the Slony1-commit mailing list