Steve Singer ssinger at ca.afilias.info
Mon Apr 10 06:38:15 PDT 2017
In fixing that issue we also discovered that slon was not
reloading the set origin's into memory following a FAILOVER
command.  This could mean that we were processing SYNC events
from a node which without realizing that node now is the origin
for a set.
---
 src/backend/slony1_funcs.sql | 17 +++++++++++++---
 src/slon/remote_worker.c     | 15 +++++++++++++-
 src/slon/runtime_config.c    | 48 ++++++++++++++++++++++++++++++++++++++++++++
 src/slon/slon.h              |  1 +
 4 files changed, 77 insertions(+), 4 deletions(-)

diff --git a/src/backend/slony1_funcs.sql b/src/backend/slony1_funcs.sql
index a47e89d..46d9017 100644
--- a/src/backend/slony1_funcs.sql
+++ b/src/backend/slony1_funcs.sql
@@ -793,7 +793,7 @@ $$ language plpgsql
 
 comment on function @NAMESPACE at .storeNode(p_no_id int4, p_no_comment text) is
 'no_id - Node ID #
-no_comment - Human-oriented comment
+no_comment - Human-oriented commentb
 
 Generate the STORE_NODE event for node no_id';
 
@@ -1526,13 +1526,24 @@ begin
 	-- provider for all subscriptions served
 	-- by the failed node. (otherwise it
 	-- wouldn't be a allowable backup node).
+--	delete from @NAMESPACE at .sl_subscribe
+--		   where sub_receiver=p_backup_node;
+		   
 	update @NAMESPACE at .sl_subscribe	       
 	       set sub_provider=p_backup_node
 	       from @NAMESPACE at .sl_node
 	       where sub_provider=p_failed_node
 	       and sl_node.no_id=sub_receiver
-	       and sl_node.no_failed=false;	
-
+	       and sl_node.no_failed=false
+		   and sub_receiver<>p_backup_node;
+		   
+	update @NAMESPACE at .sl_subscribe	       
+	       set sub_provider=(select set_origin from
+		   	   @NAMESPACE at .sl_set where set_id=
+			   sub_set)
+			where sub_provider=p_failed_node
+			and sub_receiver=p_backup_node;
+		   
 	update @NAMESPACE at .sl_node
 		   set no_active=false WHERE 
 		   no_id=p_failed_node;
diff --git a/src/slon/remote_worker.c b/src/slon/remote_worker.c
index cde16db..f1c360b 100644
--- a/src/slon/remote_worker.c
+++ b/src/slon/remote_worker.c
@@ -301,6 +301,8 @@ remoteWorkerThread_main(void *cdata)
 	char		seqbuf[64];
 	bool		event_ok;
 	bool		need_reloadListen = false;
+	bool		need_reloadSets = false;
+	
 	char		conn_symname[32];
 
 	SlonSyncStatus sync_status = SYNC_INITIAL;
@@ -1276,8 +1278,14 @@ remoteWorkerThread_main(void *cdata)
 								 rtcfg_namespace,
 								 rtcfg_namespace,
 								 failed_node, node->no_id, seq_no_c);
-
+				slon_log(SLON_INFO, "remoteWorkerThread_%d FAILOVER_NODE finished %d\n"
+							 ,node->no_id,
+							 failed_node);
+				/**
+				 * The list of set origins has now changed.
+				 */
 				need_reloadListen = true;
+				need_reloadSets = true;
 			}
 			else if (strcmp(event->ev_type, "SUBSCRIBE_SET") == 0)
 			{
@@ -1516,6 +1524,11 @@ remoteWorkerThread_main(void *cdata)
 				rtcfg_reloadListen(local_dbconn);
 				need_reloadListen = false;
 			}
+			if(need_reloadSets)
+			{
+				rtcfg_reloadSets(local_dbconn);
+				need_reloadSets = true;
+			}
 		}
 
 #ifdef SLON_MEMDEBUG
diff --git a/src/slon/runtime_config.c b/src/slon/runtime_config.c
index 566d5ac..8877f9d 100644
--- a/src/slon/runtime_config.c
+++ b/src/slon/runtime_config.c
@@ -772,6 +772,54 @@ rtcfg_dropSet(int set_id)
 	rtcfg_unlock();
 }
 
+/* ------
+ * rtcfg_reloadSets
+ */
+void rtcfg_reloadSets(PGconn * db)
+{
+	SlonDString query;
+	PGresult   *res;
+	int			i,
+				n;
+	SlonSet    *set;
+	
+	rtcfg_lock();
+	
+	/*
+	 * Read configuration table sl_set
+	 */
+	slon_mkquery(&query,
+				 "select set_id, set_origin, set_comment "
+				 "from %s.sl_set",
+				 rtcfg_namespace);
+	res = PQexec(db, dstring_data(&query));
+	if (PQresultStatus(res) != PGRES_TUPLES_OK)
+	{
+		slon_log(SLON_FATAL, "main: Cannot get set config - %s\n",
+				 PQresultErrorMessage(res));
+		PQclear(res);
+		dstring_free(&query);
+		slon_retry();
+	}
+	for (i = 0, n = PQntuples(res); i < n; i++)
+	{
+		int			set_id = (int) strtol(PQgetvalue(res, i, 0), NULL, 10);
+		int			set_origin = (int) strtol(PQgetvalue(res, i, 1), NULL, 10);
+		for (set = rtcfg_set_list_head; set; set = set->next)
+		{
+			if (set->set_id == set_id)
+			{
+				set->set_origin=set_origin;				
+			}
+		}/*for set in array*/
+	}/*for tuple*/
+	PQclear(res);
+	rtcfg_unlock();
+}
+
+
+
+
 /* ----------
  * rtcfg_moveSet
  * ----------
diff --git a/src/slon/slon.h b/src/slon/slon.h
index c0adf6e..cdc68e4 100644
--- a/src/slon/slon.h
+++ b/src/slon/slon.h
@@ -478,6 +478,7 @@ extern void rtcfg_storeSet(int set_id, int set_origin, char *set_comment);
 extern void rtcfg_dropSet(int set_id);
 extern void rtcfg_moveSet(int set_id, int old_origin, int new_origin,
 			  int sub_provider);
+extern void rtcfg_reloadSets(PGconn *db);
 
 extern void rtcfg_storeSubscribe(int sub_set, int sub_provider,
 					 char *sub_forward);
-- 
2.1.4


--------------000002000709080200040908--


More information about the Slony1-hackers mailing list