Wed Apr 13 22:21:28 PDT 2005
- Previous message: [Slony1-commit] By cbbrowne: Address bug #1226 If a set contains no tables, then warn
- Next message: [Slony1-commit] By cbbrowne: Add a whole bunch of index entries to admin guide to make
- Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
Log Message: ----------- Address synchronization problem where a MOVE_SET event may get to cascaded subscribers *AFTER* the new origin has started generating SYNCs. The subscriber ignored those syncs since it doesn't yet know the origin has moved; this can lead to data loss. The ACCEPT_SET is added as a remedy; upon submitting the MOVE_SET, it also raises, on the new origin, an ACCEPT_SET event. That will necessarily precede any SYNCs coming from the new origin. Nodes that get the ACCEPT_SET event will wait until they also get the MOVE_SET event before proceeding, protecting from data loss. Tags: ---- REL_1_0_STABLE Modified Files: -------------- slony1-engine/src/slon: remote_worker.c (r1.55.2.7 -> r1.55.2.8) slony1-engine/src/backend: slony1_funcs.sql (r1.15.2.14 -> r1.15.2.15) -------------- next part -------------- Index: remote_worker.c =================================================================== RCS file: /usr/local/cvsroot/slony1/slony1-engine/src/slon/remote_worker.c,v retrieving revision 1.55.2.7 retrieving revision 1.55.2.8 diff -Lsrc/slon/remote_worker.c -Lsrc/slon/remote_worker.c -u -w -r1.55.2.7 -r1.55.2.8 --- src/slon/remote_worker.c +++ src/slon/remote_worker.c @@ -744,6 +744,66 @@ rtcfg_namespace, trig_tabid, trig_tgname); } + else if (strcmp(event->ev_type, "ACCEPT_SET") == 0) + { + /* If we're a remote node, and haven't yet + * received the MOVE_SET event from the + * new origin, then we'll need to sleep a + * bit... This avoids a race condition + * where new SYNCs take place on the new + * origin, and are ignored on some + * subscribers (and their children) + * because the MOVE_SET wasn't yet + * received and processed */ + + int set_id = (int) strtol(event->ev_data1, NULL, 10); + int old_origin = (int) strtol(event->ev_data2, NULL, 10); + int new_origin = (int) strtol(event->ev_data3, NULL, 10); + PGresult *res; + + if ((rtcfg_nodeid != old_origin) && (rtcfg_nodeid != new_origin)) { + slon_mkquery(&query1, + "select 1 from %s.sl_event accept " + "where " + " accept.ev_type = 'ACCEPT_SET' and " + " accept.ev_origin = %d and " + " accept.ev_data1 = %d and " + " accept.ev_data2 = %d and " + " accept.ev_data3 = %d and " + " not exists " + " (select 1 from %s.sl_event move " + " where " + " accept.ev_origin = move.ev_data3 and " + " move.ev_type = 'MOVE_SET' and " + " move.ev_data1 = accept.ev_data1 and " + " move.ev_data2 = accept.ev_data2 and " + " move.ev_data3 = accept.ev_data3 and " + " move.ev_seqno = accept.ev_data4); ", + + rtcfg_namespace, + old_origin, set_id, old_origin, new_origin, + rtcfg_namespace); + res = PQexec(local_dbconn, dstring_data(&query1)); + while (PQntuples(res) > 0) { + int sleeptime = 15; + int sched_rc; + slon_log(SLON_WARN, "remoteWorkerThread_%d: " + "accept set: node has not yet received MOVE_SET event " + "for set %d old origin %d new origin - sleep %d seconds\n", + rtcfg_nodeid, set_id, old_origin, new_origin, sleeptime); + sched_rc = sched_msleep(node, sleeptime * 1000); + if (sched_rc != SCHED_STATUS_OK) { + event_ok = false; + break; + } else { + if (sleeptime < 60) + sleeptime *= 2; + } + if (query_execute(node, local_dbconn, &query1) < 0) + slon_abort(); + } + } + } else if (strcmp(event->ev_type, "MOVE_SET") == 0) { int set_id = (int) strtol(event->ev_data1, NULL, 10); @@ -758,6 +818,7 @@ * chain. To catch up with that, we need to execute * it now and select the resulting provider for us. */ + slon_appendquery(&query1, "select %s.moveSet_int(%d, %d, %d); ", rtcfg_namespace, Index: slony1_funcs.sql =================================================================== RCS file: /usr/local/cvsroot/slony1/slony1-engine/src/backend/slony1_funcs.sql,v retrieving revision 1.15.2.14 retrieving revision 1.15.2.15 diff -Lsrc/backend/slony1_funcs.sql -Lsrc/backend/slony1_funcs.sql -u -w -r1.15.2.14 -r1.15.2.15 --- src/backend/slony1_funcs.sql +++ src/backend/slony1_funcs.sql @@ -1657,14 +1657,28 @@ end loop; end if; + -- On the new origin, raise an event - ACCEPT_SET + if v_local_node_id = p_new_origin then + -- Find the event number from the origin + select max(ev_seqno) as seqno into v_sub_row + from @NAMESPACE at .sl_event + where ev_type = ''MOVE_SET'' and + ev_data1 = p_set_id and + ev_data2 = p_old_origin and + ev_data3 = p_new_origin and + ev_origin = p_old_origin; + + perform @NAMESPACE at .createEvent(''_ at CLUSTERNAME@'', ''ACCEPT_SET'', + p_set_id, p_old_origin, p_new_origin, v_sub_row.seqno); + end if; + -- ---- -- Next we have to reverse the subscription path -- ---- v_sub_last = p_new_origin; select sub_provider into v_sub_node from @NAMESPACE at .sl_subscribe - where sub_set = p_set_id - and sub_receiver = p_new_origin; + where sub_receiver = p_new_origin; if not found then raise exception ''Slony-I: subscription path broken in moveSet_int''; end if; @@ -3451,6 +3465,7 @@ ''Slony-I: set provider and receiver cannot be identical''; end if; + -- --- -- Check to see if the set contains any tables - gripe if not - bug #1226 -- ---
- Previous message: [Slony1-commit] By cbbrowne: Address bug #1226 If a set contains no tables, then warn
- Next message: [Slony1-commit] By cbbrowne: Add a whole bunch of index entries to admin guide to make
- Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
More information about the Slony1-commit mailing list