Wed Mar 16 16:23:13 PST 2005
- Previous message: [Slony1-general] slony 1.1 release date
- Next message: [Slony1-general] test_slony_state.pl from CVS
- Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
Hannu Krosing wrote:
> Has anything been done to make slony *setup* less brittle ?
>
>I mean avoiding things like running a COPY for 48 hours, then failing to
>setup replica for a sequence, and merrily reporting 'sorry it failed,
>trying again in 15 sec'.
>
>This could be avoided if some sanity checks would be run before the real
>time-consuming actions when subscribing a set.
>
>These checks could be either separate functions, or perhaps a new no-op
>flag fro existing ones, so that two rounds of subscribe could be run,
>first with no_op=true and if it runs ok, only then a second with
>no_op=false to do the actual work.
>
>
Good thought. I know we have gotten bitten by this, notably with a
missing sequence blowing things up.
Here's a patch that I'm running through some tests now...
Overview:
- Before starting to copy tables...
- Do all the checks on tables that are done as part of copy_set, but
don't update anything
- Then add the subscriptions for all the sequences
Then proceed with copying tables. And OMIT the "setAddSequence_int()"
calls later, as the sequences have already been subscribed.
By the way, when you run into things such as this, it would be a slick
idea to head to GBorg and submit a feature request. All sorts of ideas
have been mentioned now and again of things that would be "neat to
have;" it is easy to forget about them if they aren't listed somewhere
persistent.
-------------- next part --------------
Index: remote_worker.c
===================================================================
RCS file: /usr/local/cvsroot/slony1/slony1-engine/src/slon/remote_worker.c,v
retrieving revision 1.77
diff -c -r1.77 remote_worker.c
*** remote_worker.c 7 Mar 2005 23:27:03 -0000 1.77
--- remote_worker.c 16 Mar 2005 16:11:36 -0000
***************
*** 2352,2357 ****
--- 2352,2531 ----
}
}
+ /* cbbrowne - in progress - check tables/sequences in set to
+ * make sure they are there and in good order. Don't copy any
+ * data yet; we want to just do a first pass that finds "bozo
+ * errors" */
+
+ /* Check tables and sequences in set to make sure they are all
+ * appropriately configured... */
+
+ /*
+ * Select the list of all tables the provider currently has in the set.
+ */
+ slon_mkquery(&query1,
+ "select T.tab_id, "
+ " \"pg_catalog\".quote_ident(PGN.nspname) || '.' || "
+ " \"pg_catalog\".quote_ident(PGC.relname) as tab_fqname, "
+ " T.tab_idxname, T.tab_comment "
+ "from %s.sl_table T, "
+ " \"pg_catalog\".pg_class PGC, "
+ " \"pg_catalog\".pg_namespace PGN "
+ "where T.tab_set = %d "
+ " and T.tab_reloid = PGC.oid "
+ " and PGC.relnamespace = PGN.oid "
+ "order by tab_id; ",
+ rtcfg_namespace, set_id);
+ res1 = PQexec(pro_dbconn, dstring_data(&query1));
+ if (PQresultStatus(res1) != PGRES_TUPLES_OK)
+ {
+ slon_log(SLON_ERROR, "remoteWorkerThread_%d: \"%s\" %s",
+ node->no_id, dstring_data(&query1),
+ PQresultErrorMessage(res1));
+ PQclear(res1);
+ slon_disconnectdb(pro_conn);
+ dstring_free(&query1);
+ terminate_log_archive();
+ return -1;
+ }
+ ntuples1 = PQntuples(res1);
+
+ /*
+ * For each table in the set
+ */
+ for (tupno1 = 0; tupno1 < ntuples1; tupno1++)
+ {
+ int tab_id = strtol(PQgetvalue(res1, tupno1, 0), NULL, 10);
+ char *tab_fqname = PQgetvalue(res1, tupno1, 1);
+ char *tab_idxname = PQgetvalue(res1, tupno1, 2);
+ char *tab_comment = PQgetvalue(res1, tupno1, 3);
+ int64 copysize = 0;
+
+ gettimeofday(&tv_start2, NULL);
+ slon_log(SLON_DEBUG2, "remoteWorkerThread_%d: "
+ "prepare to copy table %s\n",
+ node->no_id, tab_fqname);
+
+ /*
+ * Find out if the table we're copying has the special slony serial
+ * number key on the provider DB
+ */
+ slon_mkquery(&query1,
+ "select %s.tableHasSerialKey('%q');",
+ rtcfg_namespace, tab_fqname);
+ res2 = PQexec(pro_dbconn, dstring_data(&query1));
+ if (PQresultStatus(res2) != PGRES_TUPLES_OK)
+ {
+ slon_log(SLON_ERROR, "remoteWorkerThread_%d: \"%s\" %s",
+ node->no_id, dstring_data(&query1),
+ PQresultErrorMessage(res2));
+ PQclear(res2);
+ PQclear(res1);
+ slon_disconnectdb(pro_conn);
+ dstring_free(&query1);
+ terminate_log_archive();
+ return -1;
+ }
+ rc = *PQgetvalue(res2, 0, 0) == 't';
+ PQclear(res2);
+
+ if (rc)
+ {
+ /*
+ * It has, check if the table has this on the local DB too.
+ */
+ slon_log(SLON_DEBUG3, "remoteWorkerThread_%d: "
+ "table %s will require Slony-I serial key\n",
+ node->no_id, tab_fqname);
+ res2 = PQexec(loc_dbconn, dstring_data(&query1));
+ if (PQresultStatus(res2) != PGRES_TUPLES_OK)
+ {
+ slon_log(SLON_ERROR, "remoteWorkerThread_%d: \"%s\" %s",
+ node->no_id, dstring_data(&query1),
+ PQresultErrorMessage(res2));
+ PQclear(res2);
+ PQclear(res1);
+ slon_disconnectdb(pro_conn);
+ dstring_free(&query1);
+ terminate_log_archive();
+ return -1;
+ }
+ rc = *PQgetvalue(res2, 0, 0) == 't';
+ PQclear(res2);
+
+ if (!rc)
+ {
+ slon_log(SLON_DEBUG3, "remoteWorkerThread_%d: "
+ "table %s Slony-I serial key to be added local\n",
+ node->no_id, tab_fqname);
+ }
+ }
+ else
+ {
+ slon_log(SLON_DEBUG3, "remoteWorkerThread_%d: "
+ "table %s does not require Slony-I serial key\n",
+ node->no_id, tab_fqname);
+ }
+ }
+ PQclear(res1);
+
+ slon_log(SLON_DEBUG2, "remoteWorkerThread_%d: "
+ "all tables for set %d found on subscriber\n",
+ node->no_id, set_id);
+ /*
+ * Add in the sequences contained in the set
+ */
+ slon_mkquery(&query1,
+ "select SQ.seq_id, "
+ " \"pg_catalog\".quote_ident(PGN.nspname) || '.' || "
+ " \"pg_catalog\".quote_ident(PGC.relname), "
+ " SQ.seq_comment "
+ " from %s.sl_sequence SQ, "
+ " \"pg_catalog\".pg_class PGC, "
+ " \"pg_catalog\".pg_namespace PGN "
+ " where SQ.seq_set = %d "
+ " and PGC.oid = SQ.seq_reloid "
+ " and PGN.oid = PGC.relnamespace; ",
+ rtcfg_namespace, set_id);
+ res1 = PQexec(pro_dbconn, dstring_data(&query1));
+ if (PQresultStatus(res1) != PGRES_TUPLES_OK)
+ {
+ slon_log(SLON_ERROR, "remoteWorkerThread_%d: \"%s\" %s",
+ node->no_id, dstring_data(&query1),
+ PQresultErrorMessage(res1));
+ PQclear(res1);
+ slon_disconnectdb(pro_conn);
+ dstring_free(&query1);
+ terminate_log_archive();
+ return -1;
+ }
+ ntuples1 = PQntuples(res1);
+ for (tupno1 = 0; tupno1 < ntuples1; tupno1++)
+ {
+ char *seq_id = PQgetvalue(res1, tupno1, 0);
+ char *seq_fqname = PQgetvalue(res1, tupno1, 1);
+ char *seq_comment = PQgetvalue(res1, tupno1, 2);
+
+ slon_log(SLON_DEBUG2, "remoteWorkerThread_%d: "
+ "copy sequence %s\n",
+ node->no_id, seq_fqname);
+
+ slon_mkquery(&query1,
+ "select %s.setAddSequence_int(%d, %s, '%q', '%q')",
+ rtcfg_namespace, set_id, seq_id,
+ seq_fqname, seq_comment);
+ if (query_execute(node, loc_dbconn, &query1) < 0)
+ {
+ PQclear(res1);
+ slon_disconnectdb(pro_conn);
+ dstring_free(&query1);
+ terminate_log_archive();
+ return -1;
+ }
+ }
+ PQclear(res1);
+
+
/*
* Select the list of all tables the provider currently has in the set.
*/
***************
*** 2842,2896 ****
/*
* Copy the sequences contained in the set
*/
- slon_mkquery(&query1,
- "select SQ.seq_id, "
- " \"pg_catalog\".quote_ident(PGN.nspname) || '.' || "
- " \"pg_catalog\".quote_ident(PGC.relname), "
- " SQ.seq_comment "
- " from %s.sl_sequence SQ, "
- " \"pg_catalog\".pg_class PGC, "
- " \"pg_catalog\".pg_namespace PGN "
- " where SQ.seq_set = %d "
- " and PGC.oid = SQ.seq_reloid "
- " and PGN.oid = PGC.relnamespace; ",
- rtcfg_namespace, set_id);
- res1 = PQexec(pro_dbconn, dstring_data(&query1));
- if (PQresultStatus(res1) != PGRES_TUPLES_OK)
- {
- slon_log(SLON_ERROR, "remoteWorkerThread_%d: \"%s\" %s",
- node->no_id, dstring_data(&query1),
- PQresultErrorMessage(res1));
- PQclear(res1);
- slon_disconnectdb(pro_conn);
- dstring_free(&query1);
- terminate_log_archive();
- return -1;
- }
- ntuples1 = PQntuples(res1);
- for (tupno1 = 0; tupno1 < ntuples1; tupno1++)
- {
- char *seq_id = PQgetvalue(res1, tupno1, 0);
- char *seq_fqname = PQgetvalue(res1, tupno1, 1);
- char *seq_comment = PQgetvalue(res1, tupno1, 2);
-
- slon_log(SLON_DEBUG2, "remoteWorkerThread_%d: "
- "copy sequence %s\n",
- node->no_id, seq_fqname);
! slon_mkquery(&query1,
! "select %s.setAddSequence_int(%d, %s, '%q', '%q')",
! rtcfg_namespace, set_id, seq_id,
! seq_fqname, seq_comment);
! if (query_execute(node, loc_dbconn, &query1) < 0)
! {
! PQclear(res1);
! slon_disconnectdb(pro_conn);
! dstring_free(&query1);
! terminate_log_archive();
! return -1;
! }
! }
! PQclear(res1);
/*
* And copy over the sequence last_value corresponding to the
--- 3016,3075 ----
/*
* Copy the sequences contained in the set
*/
! /* The copy of sequences is being done earlier, before we
! * start doing tables, so that if anything is missing, that is
! * noticed BEFORE 8 hours of copying of data takes place... */
!
! /* slon_mkquery(&query1, */
! /* "select SQ.seq_id, " */
! /* " \"pg_catalog\".quote_ident(PGN.nspname) || '.' || " */
! /* " \"pg_catalog\".quote_ident(PGC.relname), " */
! /* " SQ.seq_comment " */
! /* " from %s.sl_sequence SQ, " */
! /* " \"pg_catalog\".pg_class PGC, " */
! /* " \"pg_catalog\".pg_namespace PGN " */
! /* " where SQ.seq_set = %d " */
! /* " and PGC.oid = SQ.seq_reloid " */
! /* " and PGN.oid = PGC.relnamespace; ", */
! /* rtcfg_namespace, set_id); */
! /* res1 = PQexec(pro_dbconn, dstring_data(&query1)); */
! /* if (PQresultStatus(res1) != PGRES_TUPLES_OK) */
! /* { */
! /* slon_log(SLON_ERROR, "remoteWorkerThread_%d: \"%s\" %s", */
! /* node->no_id, dstring_data(&query1), */
! /* PQresultErrorMessage(res1)); */
! /* PQclear(res1); */
! /* slon_disconnectdb(pro_conn); */
! /* dstring_free(&query1); */
! /* terminate_log_archive(); */
! /* return -1; */
! /* } */
! /* ntuples1 = PQntuples(res1); */
! /* for (tupno1 = 0; tupno1 < ntuples1; tupno1++) */
! /* { */
! /* char *seq_id = PQgetvalue(res1, tupno1, 0); */
! /* char *seq_fqname = PQgetvalue(res1, tupno1, 1); */
! /* char *seq_comment = PQgetvalue(res1, tupno1, 2); */
!
! /* slon_log(SLON_DEBUG2, "remoteWorkerThread_%d: " */
! /* "copy sequence %s\n", */
! /* node->no_id, seq_fqname); */
!
! /* slon_mkquery(&query1, */
! /* "select %s.setAddSequence_int(%d, %s, '%q', '%q')", */
! /* rtcfg_namespace, set_id, seq_id, */
! /* seq_fqname, seq_comment); */
! /* if (query_execute(node, loc_dbconn, &query1) < 0) */
! /* { */
! /* PQclear(res1); */
! /* slon_disconnectdb(pro_conn); */
! /* dstring_free(&query1); */
! /* terminate_log_archive(); */
! /* return -1; */
! /* } */
! /* } */
! /* PQclear(res1); */
/*
* And copy over the sequence last_value corresponding to the
- Previous message: [Slony1-general] slony 1.1 release date
- Next message: [Slony1-general] test_slony_state.pl from CVS
- Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
More information about the Slony1-general mailing list