Fri Aug 1 13:18:19 PDT 2008
- Previous message: [Slony1-commit] slony1-engine/src/slon slon.c
- Next message: [Slony1-commit] slony1-engine/src/slon remote_worker.c
- Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
Update of /home/cvsd/slony1/slony1-engine/src/slon
In directory main.slony.info:/tmp/cvs-serv25935
Modified Files:
remote_worker.c
Log Message:
Add in additional performance monitoring, indicating how much time
and how many queries are being spent against the provider + subscriber.
This should enable people to more readily infer where bottlenecks are
themselves, rather than having to go to Jan & Chris and have them infer
things *really* indirectly.
Index: remote_worker.c
===================================================================
RCS file: /home/cvsd/slony1/slony1-engine/src/slon/remote_worker.c,v
retrieving revision 1.173
retrieving revision 1.174
diff -C2 -d -r1.173 -r1.174
*** remote_worker.c 27 Jun 2008 20:16:04 -0000 1.173
--- remote_worker.c 1 Aug 2008 20:18:17 -0000 1.174
***************
*** 28,33 ****
#define MAXGROUPSIZE 10000 /* What is the largest number of SYNCs we'd
! * want to group together??? */
/* ----------
* Local definitions
--- 28,35 ----
#define MAXGROUPSIZE 10000 /* What is the largest number of SYNCs we'd
! * want to group together??? */
!
+ void slon_terminate_worker(void);
/* ----------
* Local definitions
***************
*** 139,142 ****
--- 141,161 ----
} WorkGroupLineCode;
+ typedef struct PerfMon_s PerfMon; /* Structure for doing performance monitoring */
+ struct PerfMon_s
+ {
+ struct timeval prev_t;
+ struct timeval now_t;
+ double prov_query_t; /* Time spent running queries against the provider */
+ int prov_query_c; /* Number of queries run against the provider */
+ double subscr_query_t; /* Time spent running prep queries against the subscriber */
+ int subscr_query_c; /* Number of prep queries run against the subscriber */
+ double subscr_iud__t; /* Time spent running IUD against subscriber */
+ int subscr_iud__c; /* Number of IUD requests run against subscriber */
+ double large_tuples_t; /* Number of large tuples processed */
+ int large_tuples_c; /* Number of large tuples processed */
+ int num_inserts;
+ int num_updates;
+ int num_deletes;
+ };
struct ProviderInfo_s
***************
*** 241,244 ****
--- 260,274 ----
* ----------
*/
+ /*
+ * Monitoring data structure
+ */
+
+ static void init_perfmon(PerfMon *pm);
+ static void start_monitored_event(PerfMon *pm);
+ static void monitor_provider_query(PerfMon *pm);
+ static void monitor_subscriber_query(PerfMon *pm);
+ static void monitor_subscriber_iud(PerfMon *pm);
+ static void monitor_largetuples(PerfMon *pm);
+
static void adjust_provider_info(SlonNode *node,
WorkerGroupData *wd, int cleanup);
***************
*** 2103,2112 ****
* Move the message to the head of the queue
*/
! if (oldmsg != node->message_head)
{
DLLIST_REMOVE(node->message_head,
! node->message_tail, oldmsg);
DLLIST_ADD_HEAD(node->message_head,
! node->message_tail, oldmsg);
}
}
--- 2133,2142 ----
* Move the message to the head of the queue
*/
! if ((SlonWorkMsg *) oldmsg != node->message_head)
{
DLLIST_REMOVE(node->message_head,
! node->message_tail, (SlonWorkMsg *) oldmsg);
DLLIST_ADD_HEAD(node->message_head,
! node->message_tail, (SlonWorkMsg *) oldmsg);
}
}
***************
*** 2420,2423 ****
--- 2450,2470 ----
}
}
+ if (sub_provider < 0) {
+ rtcfg_unlock();
+ slon_log(SLON_ERROR, "remoteWorkerThread_%d: provider node for set %"
+ "not found in runtime configuration\n",
+ set_id);
+ slon_terminate_worker();
+ return -1;
+
+ }
+ if (set_origin < 0) {
+ rtcfg_unlock();
+ slon_log(SLON_ERROR, "remoteWorkerThread_%d: origin node for set %"
+ "not found in runtime configuration\n",
+ set_id);
+ slon_terminate_worker();
+ return -1;
+ }
if (set == NULL)
{
***************
*** 2431,2435 ****
{
rtcfg_unlock();
! slon_log(SLON_ERROR, "remoteWorkerThread_%d: node %d "
"not found in runtime configuration\n",
node->no_id, sub_provider);
--- 2478,2482 ----
{
rtcfg_unlock();
! slon_log(SLON_ERROR, "remoteWorkerThread_%d: provider node %d "
"not found in runtime configuration\n",
node->no_id, sub_provider);
***************
*** 3499,3502 ****
--- 3546,3550 ----
slon_log(SLON_INFO, "copy_set %d done in %.3f seconds\n", set_id,
TIMEVAL_DIFF(&tv_start, &tv_now));
+
return 0;
}
***************
*** 3537,3542 ****
--- 3585,3592 ----
int actionlist_len;
int64 min_ssy_seqno;
+ PerfMon pm;
gettimeofday(&tv_start, NULL);
+
slon_log(SLON_DEBUG2, "remoteWorkerThread_%d: SYNC " INT64_FORMAT
" processing\n",
***************
*** 3547,3550 ****
--- 3597,3602 ----
dstring_init(&lsquery);
+ init_perfmon(&pm);
+
/*
* If this slon is running in log archiving mode, open a temporary file
***************
*** 3590,3593 ****
--- 3642,3647 ----
sprintf(conn_symname, "subscriber_%d_provider_%d",
node->no_id, provider->no_id);
+
+
provider->conn = slon_connectdb(provider->pa_conninfo,
conn_symname);
***************
*** 3610,3613 ****
--- 3664,3668 ----
"select %s.registerNodeConnection(%d); ",
rtcfg_namespace, rtcfg_nodeid);
+ start_monitored_event(&pm);
if (query_execute(node, provider->conn->dbconn, &query) < 0)
{
***************
*** 3619,3622 ****
--- 3674,3679 ----
return provider->pa_connretry;
}
+ monitor_provider_query(&pm);
+
slon_log(SLON_DEBUG1, "remoteWorkerThread_%d: "
"connected to data provider %d on '%s'\n",
***************
*** 3718,3722 ****
--- 3775,3783 ----
slon_appendquery(&query, "); ");
+ start_monitored_event(&pm);
res1 = PQexec(local_dbconn, dstring_data(&query));
+ slon_log(SLON_INFO, "about to monitor_subscriber_query - pulling big actionid list %d\n", provider);
+ monitor_subscriber_query(&pm);
+
if (PQresultStatus(res1) != PGRES_TUPLES_OK)
{
***************
*** 3773,3777 ****
--- 3834,3840 ----
rtcfg_namespace,
sub_set);
+ start_monitored_event(&pm);
res2 = PQexec(local_dbconn, dstring_data(&query));
+ monitor_subscriber_query(&pm);
if (PQresultStatus(res2) != PGRES_TUPLES_OK)
{
***************
*** 3946,3949 ****
--- 4009,4013 ----
rtcfg_namespace);
res1 = PQexec(local_dbconn, dstring_data(&query));
+
if (PQresultStatus(res1) != PGRES_TUPLES_OK)
{
***************
*** 4031,4035 ****
--- 4095,4102 ----
if (wgline->log.n_used > 0)
{
+ start_monitored_event(&pm);
res1 = PQexec(local_dbconn, dstring_data(&(wgline->log)));
+
+ monitor_subscriber_iud(&pm);
if (PQresultStatus(res1) == PGRES_EMPTY_QUERY)
{
***************
*** 4049,4053 ****
--- 4116,4122 ----
}
+ start_monitored_event(&pm);
res1 = PQexec(local_dbconn, dstring_data(&(wgline->data)));
+ monitor_subscriber_iud(&pm);
if (PQresultStatus(res1) == PGRES_EMPTY_QUERY)
{
***************
*** 4228,4232 ****
--- 4297,4303 ----
" group by 1; ");
+ start_monitored_event(&pm);
res1 = PQexec(provider->conn->dbconn, dstring_data(&query));
+ monitor_subscriber_iud(&pm);
if (PQresultStatus(res1) != PGRES_TUPLES_OK)
{
***************
*** 4252,4255 ****
--- 4323,4327 ----
rtcfg_namespace,
seql_seqid, node->no_id, seqbuf, seql_last_value);
+ start_monitored_event(&pm);
if (query_execute(node, local_dbconn, &query) < 0)
{
***************
*** 4260,4263 ****
--- 4332,4336 ----
return 60;
}
+ monitor_subscriber_iud(&pm);
/*
***************
*** 4344,4347 ****
--- 4417,4432 ----
node->no_id, event->ev_seqno,
TIMEVAL_DIFF(&tv_start, &tv_now));
+
+ slon_log(SLON_INFO,
+ "remoteWorkerThread_%d: SYNC " INT64_FORMAT " Timing: "
+ " pqexec (s/count)"
+ "- provider %.3f/%d "
+ "- subscriber %.3f/%d "
+ "- IUD %.3f/%d\n",
+ node->no_id, event->ev_seqno,
+ pm.prov_query_t, pm.prov_query_c,
+ pm.subscr_query_t, pm.prov_query_c,
+ pm.subscr_iud__t, pm.subscr_iud__c);
+
return 0;
}
***************
*** 4383,4393 ****
int line_ncmds;
! int num_inserts,
! num_deletes,
! num_updates;
! num_inserts = 0;
! num_deletes = 0;
! num_updates = 0;
dstring_init(&query);
--- 4468,4474 ----
int line_ncmds;
! PerfMon pm;
! init_perfmon(&pm);
dstring_init(&query);
***************
*** 4618,4622 ****
/*
* First make sure that the overall memory usage is inside
! * bouds.
*/
if (wd->workdata_largemem > sync_max_largemem)
--- 4699,4703 ----
/*
* First make sure that the overall memory usage is inside
! * bounds.
*/
if (wd->workdata_largemem > sync_max_largemem)
***************
*** 4759,4762 ****
--- 4840,4844 ----
if (log_cmdsize >= sync_max_rowsize)
{
+ start_monitored_event(&pm);
(void) slon_mkquery(&query2,
"select log_cmddata "
***************
*** 4776,4779 ****
--- 4858,4862 ----
log_origin, log_txid, log_actionseq);
res2 = PQexec(dbconn, dstring_data(&query2));
+ monitor_largetuples(&pm);
if (PQresultStatus(res2) != PGRES_TUPLES_OK)
{
***************
*** 4839,4843 ****
wd->tab_fqname[log_tableid],
log_cmddata);
! num_inserts++;
break;
--- 4922,4926 ----
wd->tab_fqname[log_tableid],
log_cmddata);
! pm.num_inserts++;
break;
***************
*** 4847,4851 ****
wd->tab_fqname[log_tableid],
log_cmddata);
! num_updates++;
break;
--- 4930,4934 ----
wd->tab_fqname[log_tableid],
log_cmddata);
! pm.num_updates++;
break;
***************
*** 4855,4859 ****
wd->tab_fqname[log_tableid],
log_cmddata);
! num_deletes++;
break;
}
--- 4938,4942 ----
wd->tab_fqname[log_tableid],
log_cmddata);
! pm.num_deletes++;
break;
}
***************
*** 4959,4967 ****
slon_log(SLON_INFO, "remoteHelperThread_%d_%d: inserts=%d updates=%d deletes=%d\n",
! node->no_id, provider->no_id, num_inserts, num_updates, num_deletes);
! num_inserts = 0;
! num_deletes = 0;
! num_updates = 0;
/*
--- 5042,5052 ----
slon_log(SLON_INFO, "remoteHelperThread_%d_%d: inserts=%d updates=%d deletes=%d\n",
! node->no_id, provider->no_id, pm.num_inserts, pm.num_updates, pm.num_deletes);
! slon_log(SLON_INFO,
! "remoteWorkerThread_%d: sync_helper timing: "
! " large tuples %.3f/%d\n",
! node->no_id,
! pm.large_tuples_t, pm.large_tuples_c);
/*
***************
*** 5650,5655 ****
check_set_subscriber(int set_id, int node_id, PGconn *local_dbconn)
{
-
-
SlonDString query1;
PGresult *res;
--- 5735,5738 ----
***************
*** 5675,5679 ****
--- 5758,5806 ----
PQclear(res);
return 1;
+ }
+ static void init_perfmon(PerfMon *perf_info) {
+ perf_info->prov_query_t = 0.0;
+ perf_info->prov_query_c = 0;
+ perf_info->subscr_query_t = 0.0;
+ perf_info->subscr_query_c = 0;
+ perf_info->subscr_iud__t = 0.0;
+ perf_info->subscr_iud__c = 0;
+ perf_info->large_tuples_t = 0;
+ perf_info->large_tuples_c = 0;
+ perf_info->num_inserts = 0;
+ perf_info->num_updates = 0;
+ perf_info->num_deletes = 0;
+ }
+ static void start_monitored_event(PerfMon *perf_info) {
+ gettimeofday(&(perf_info->prev_t), NULL);
+ }
+ static void monitor_subscriber_query(PerfMon *perf_info) {
+ double diff;
+ gettimeofday(&(perf_info->now_t), NULL);
+ diff = TIMEVAL_DIFF(&(perf_info->prev_t), &(perf_info->now_t));
+ (perf_info->subscr_query_t) += diff;
+ (perf_info->subscr_query_c) ++;
+ }
+ static void monitor_provider_query(PerfMon *perf_info) {
+ double diff;
+ gettimeofday(&(perf_info->now_t), NULL);
+ diff = TIMEVAL_DIFF(&(perf_info->prev_t), &(perf_info->now_t));
+ (perf_info->prov_query_t) += diff;
+ (perf_info->prov_query_c) ++;
+ }
+ static void monitor_subscriber_iud(PerfMon *perf_info) {
+ double diff;
+ gettimeofday(&(perf_info->now_t), NULL);
+ diff = TIMEVAL_DIFF(&(perf_info->prev_t), &(perf_info->now_t));
+ (perf_info->subscr_iud__t) += diff;
+ (perf_info->subscr_iud__c) ++;
+ }
+ static void monitor_largetuples(PerfMon *perf_info) {
+ double diff;
+ gettimeofday(&(perf_info->now_t), NULL);
+ diff = TIMEVAL_DIFF(&(perf_info->prev_t), &(perf_info->now_t));
+ (perf_info->large_tuples_t) += diff;
+ (perf_info->large_tuples_c) ++;
}
- Previous message: [Slony1-commit] slony1-engine/src/slon slon.c
- Next message: [Slony1-commit] slony1-engine/src/slon remote_worker.c
- Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
More information about the Slony1-commit mailing list