CVS User Account cvsuser
Thu Mar 10 23:11:28 PST 2005
Log Message:
-----------

signal handling watchdog using forked processes - Frank Thompson


Slony-I 1.1 Change Request:

	The problem at hand is related to signal handling differences
	in multithreaded programs between different operating
	systems. In the slon process, it is intended that only the
	"main" thread is dealing with signals and that it uses the
	regular thread communication like mutexes and condition
	variables to control all the other working threads.

	That mutex and condition var communication sometimes can lock
	up, which causes the entire slon daemon to freeze. This can in
	extreme cases even happen after one of the working threads
	sent the main thread a signal to restart the slon process.

	What I had in mind to fix this is to have the program to
	actually fork() very early on. The parent process will then be
	the only one dealing with signals, while the childs ignores
	all signals entirely. The parent and the master thread of the
	child (which is the current slon) will communicate over a pipe
	or a socketpair. This way, the parent could detect that the
	slon stopped responding altogether and can issue a signal 9 to
	cleanup the situation.

	In other words, building a watchdog process right into the
	slon executable.


Summary of Changes:

	slon main() will now fork() at the start and allow the parent
	process to handle cleanup, signal and termination of the child
	process.  The child process is the scheduler which will talk
	to the parent (watchdog) process and vice versa via two sets
	of socket pairs.  When shutting down the child process in the
	case of restart or termination, non blocking reads and writes
	are used to avoid lockups.  Failure to shutdown the scheduler
	process nicely, the parent will SIGKILL it to ensure timely
	operation within the signal handler.



	globals:

		slon.c
		------
		int watchdog_pipe[2];		// socket pair to talk with child process
		int	sched_wakeuppipe[2];	// socket pair to talk with scheduler and parent process

		runtime_config.c
		----------------
		pid_t slon_pid;				// current pid
		pid_t slon_ppid;			// parent pid
		pid_t slon_cpid;			// child pid
		
	macros:

		slon_abort() and slon_restart() will now observe which process 
		in the tree they will act upon, parent or child

		#define slon_abort() \
		do { \
			kill((slon_ppid == 0 ? slon_pid : slon_ppid), SIGTERM); \
			pthread_exit(NULL); \
		} while (0)
		#define slon_restart() \
		do { \
			kill((slon_ppid == 0 ? slon_pid : slon_ppid), SIGHUP); \
		} while (0)

	functions:

		slon.c
		------
		static void sighandler(int signo);				// new slon parent process signal handler
		static void main_sigalrmhandler(int signo);		// new scheduler alarm signal handler
		static void slon_kill_child(void);				// new pipe notify based child termination function

Modified Files:
--------------
    slony1-engine/src/slon:
        cleanup_thread.c (r1.20 -> r1.21)
        local_listen.c (r1.29 -> r1.30)
        runtime_config.c (r1.23 -> r1.24)
        scheduler.c (r1.18 -> r1.19)
        slon.c (r1.45 -> r1.46)
        slon.h (r1.47 -> r1.48)

-------------- next part --------------
Index: scheduler.c
===================================================================
RCS file: /usr/local/cvsroot/slony1/slony1-engine/src/slon/scheduler.c,v
retrieving revision 1.18
retrieving revision 1.19
diff -Lsrc/slon/scheduler.c -Lsrc/slon/scheduler.c -u -w -r1.18 -r1.19
--- src/slon/scheduler.c
+++ src/slon/scheduler.c
@@ -15,10 +15,10 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <signal.h>
 #include <unistd.h>
 #include <string.h>
 #include <errno.h>
-#include <signal.h>
 #include <sys/time.h>
 #include <sys/types.h>
 #include <sys/socket.h>
@@ -36,7 +36,6 @@
 #define PF_LOCAL PF_UNIX
 #endif
 
-
 /*
  * ---------- Static data ----------
  */
@@ -45,7 +44,6 @@
 static int	sched_numfd = 0;
 static fd_set sched_fdset_read;
 static fd_set sched_fdset_write;
-static int	sched_wakeuppipe[2];
 static SlonConn *sched_waitqueue_head = NULL;
 static SlonConn *sched_waitqueue_tail = NULL;
 
@@ -55,17 +53,14 @@
 static pthread_mutex_t sched_master_lock = PTHREAD_MUTEX_INITIALIZER;
 static pthread_cond_t sched_master_cond = PTHREAD_COND_INITIALIZER;
 
-static sigset_t sched_sigset;
-
 
 /*
  * ---------- Local functions ----------
  */
 static void *sched_mainloop(void *);
-static void sched_sighandler(int signo);
-static void sched_sighuphandler(int signo);
 static void sched_add_fdset(int fd, fd_set * fds);
 static void sched_remove_fdset(int fd, fd_set * fds);
+static void sched_shutdown();
 
 
 /*
@@ -85,16 +80,6 @@
 	sched_main_thread = pthread_self();
 
 	/*
-	 * Block signals. Since sched_start_mainloop() is called before any other
-	 * thread is created, this will be inherited by all threads in the system.
-	 */
-	sigemptyset(&sched_sigset);
-	sigaddset(&sched_sigset, SIGHUP);
-	sigaddset(&sched_sigset, SIGINT);
-	sigaddset(&sched_sigset, SIGTERM);
-	pthread_sigmask(SIG_BLOCK, &sched_sigset, NULL);
-
-	/*
 	 * Grab the scheduler master lock
 	 */
 	if (pthread_mutex_lock(&sched_master_lock) < 0)
@@ -153,32 +138,6 @@
 int
 sched_wait_mainloop(void)
 {
-	int			signo;
-
-	/*
-	 * Wait for signal.
-	 */
-	sigemptyset(&sched_sigset);
-	sigaddset(&sched_sigset, SIGHUP);
-	sigaddset(&sched_sigset, SIGINT);
-	sigaddset(&sched_sigset, SIGTERM);
-	sigwait(&sched_sigset, &signo);
-
-	sigemptyset(&sched_sigset);
-	pthread_sigmask(SIG_SETMASK, &sched_sigset, NULL);
-
-	switch (signo)
-	{
-		case SIGHUP:
-			sched_sighuphandler(signo);
-			break;
-
-		case SIGINT:
-		case SIGTERM:
-			sched_sighandler(signo);
-			break;
-	}
-
 	/*
 	 * Wait for the scheduler to finish.
 	 */
@@ -413,17 +372,6 @@
 	FD_ZERO(&sched_fdset_read);
 	FD_ZERO(&sched_fdset_write);
 
-	/*
-	 * Create a pipe used by the main thread to cleanly wakeup the scheduler
-	 * on signals.
-	 */
-	if (pipe(sched_wakeuppipe) < 0)
-	{
-		perror("sched_mainloop: pipe()");
-		sched_status = SCHED_STATUS_ERROR;
-		pthread_cond_signal(&sched_master_cond);
-		pthread_exit(NULL);
-	}
 	sched_add_fdset(sched_wakeuppipe[0], &sched_fdset_read);
 
 	/*
@@ -575,6 +523,11 @@
 				sched_status = SCHED_STATUS_ERROR;
 				break;
 			}
+
+			if (buf[0] == 'p')
+			{
+				sched_status = SCHED_STATUS_SHUTDOWN;
+			}
 		}
 
 		/*
@@ -644,9 +597,13 @@
 	 * close the scheduler heads-up socket pair so nobody will think we're
 	 * listening any longer.
 	 */
+
+	/*
 	close(sched_wakeuppipe[0]);
+	sched_wakeuppipe[0] = -1;
 	close(sched_wakeuppipe[1]);
-	sched_wakeuppipe[0] = sched_wakeuppipe[1] = -1;
+	sched_wakeuppipe[1] = -1;
+	*/
 
 	/*
 	 * Then we cond_signal all connections that are in the queue.
@@ -692,7 +649,7 @@
  * conditions with signals. ----------
  */
 static void
-sched_sighandler(int signo)
+sched_shutdown()
 {
 	/*
 	 * Lock the master mutex and make sure that we are the main thread
@@ -733,13 +690,6 @@
 }
 
 
-static void
-sched_sighuphandler(int signo)
-{
-	slon_restart_request = true;
-	sched_sighandler(signo);
-}
-
 
 /*
  * ---------- sched_add_fdset
Index: slon.c
===================================================================
RCS file: /usr/local/cvsroot/slony1/slony1-engine/src/slon/slon.c,v
retrieving revision 1.45
retrieving revision 1.46
diff -Lsrc/slon/slon.c -Lsrc/slon/slon.c -u -w -r1.45 -r1.46
--- src/slon/slon.c
+++ src/slon/slon.c
@@ -32,12 +32,12 @@
 /*
  * ---------- Global data ----------
  */
-int			slon_restart_request = false;
+int			watchdog_pipe[2];
+int			sched_wakeuppipe[2];
 
 pthread_mutex_t slon_wait_listen_lock = PTHREAD_MUTEX_INITIALIZER;
 pthread_cond_t slon_wait_listen_cond = PTHREAD_COND_INITIALIZER;
 
-
 /*
  * ---------- Local data ----------
  */
@@ -51,11 +51,16 @@
 
 static pthread_t main_thread;
 static char *const *main_argv;
-static void sigalrmhandler(int signo);
+
+static void sighandler(int signo);
+static void main_sigalrmhandler(int signo);
+static void slon_kill_child(void);
 
 int			slon_log_level;
 char	   *pid_file;
 char	   *archive_dir = NULL;
+int			child_status;
+
 
 /*
  * ---------- main ----------
@@ -72,13 +77,15 @@
 	PGconn	   *startup_conn;
 	int			c;
 	int			errors = 0;
+	int			signo;
+	char		pipe_c;
+	pid_t		pid;
 	extern int	optind;
 	extern char *optarg;
-
+	struct sigaction act;
 
 	InitializeConfOptions();
 
-
 	while ((c = getopt(argc, argv, "f:a:d:s:t:g:c:p:o:hv")) != EOF)
 	{
 		switch (c)
@@ -146,6 +153,9 @@
 	 * identifier
 	 */
 	slon_pid = getpid();
+	slon_cpid = 0;
+	slon_ppid = 0;
+	main_argv = argv;
 
 	if ((char *)argv[optind])
 	{
@@ -214,7 +224,7 @@
 	}
 	if (PQstatus(startup_conn) != CONNECTION_OK)
 	{
-		slon_log(SLON_FATAL, "main: Cannot connect to local database - %s",
+		slon_log(SLON_FATAL, "main: Cannot connect to local database - %s\n",
 				 PQerrorMessage(startup_conn));
 		PQfinish(startup_conn);
 		slon_exit(-1);
@@ -252,6 +262,81 @@
 		}
 	}
 
+	/*
+	 * Pipes to be used as communication devices between the parent (watchdog)
+	 * and child (worker) processes.
+	 */
+	if (pipe(watchdog_pipe) < 0)
+	{
+		slon_log(SLON_FATAL, "slon: parent pipe create failed -(%d) %s\n", errno,strerror(errno));
+		slon_exit(-1);
+	}
+	if (pipe(sched_wakeuppipe) < 0)
+	{
+		slon_log(SLON_FATAL, "slon: sched_wakeuppipe create failed -(%d) %s\n", errno,strerror(errno));
+		slon_exit(-1);
+	}
+
+	/*
+	 * Fork here to allow parent process to trap signals and child process to 
+	 * handle real processing work creating a watchdog and worker process
+	 * hierarchy
+	 */
+	if ((slon_cpid = fork()) < 0)
+	{
+		slon_log(SLON_FATAL, "Fork failed -(%d) %s\n", errno,strerror(errno));
+		slon_exit(-1);
+	}
+	else if (slon_cpid == 0) /* child */
+	{
+		slon_pid = getpid();
+		slon_ppid = getppid();
+
+		slon_log(SLON_DEBUG2, "main: main process started\n");
+		/*
+		 * Wait for the parent process to initialize
+		 */
+		if (read(watchdog_pipe[0], &pipe_c, 1) != 1)
+		{
+			slon_log(SLON_FATAL, "main: read from parent pipe failed -(%d) %s\n", errno,strerror(errno));
+			slon_exit(-1);
+		}
+
+		if (pipe_c != 'p')
+		{
+			slon_log(SLON_FATAL, "main: incorrect data from parent pipe -(%c)\n",pipe_c);
+			slon_exit(-1);
+		}
+
+		slon_log(SLON_DEBUG2, "main: begin signal handler setup\n");
+
+		if (signal(SIGHUP,SIG_IGN) == SIG_ERR)
+		{
+			slon_log(SLON_FATAL, "slon: SIGHUP signal handler setup failed -(%d) %s\n", errno,strerror(errno));
+			slon_exit(-1);
+		}
+		if (signal(SIGINT,SIG_IGN) == SIG_ERR)
+		{
+			slon_log(SLON_FATAL, "slon: SIGINT signal handler setup failed -(%d) %s\n", errno,strerror(errno));
+			slon_exit(-1);
+		}
+		if (signal(SIGTERM,SIG_IGN) == SIG_ERR)
+		{
+			slon_log(SLON_FATAL, "slon: SIGTERM signal handler setup failed -(%d) %s\n", errno,strerror(errno));
+			slon_exit(-1);
+		}
+		if (signal(SIGCHLD,SIG_IGN) == SIG_ERR)
+		{
+			slon_log(SLON_FATAL, "slon: SIGCHLD signal handler setup failed -(%d) %s\n", errno,strerror(errno));
+			slon_exit(-1);
+		}
+		if (signal(SIGQUIT,SIG_IGN) == SIG_ERR)
+		{
+			slon_log(SLON_FATAL, "slon: SIGQUIT signal handler setup failed -(%d) %s\n", errno,strerror(errno));
+			slon_exit(-1);
+		}
+
+		slon_log(SLON_DEBUG2, "main: end signal handler setup\n");
 
 	/*
 	 * Start the event scheduling system
@@ -270,7 +355,7 @@
 				 "set transaction isolation level serializable;");
 	if (PQresultStatus(res) != PGRES_COMMAND_OK)
 	{
-		slon_log(SLON_FATAL, "Cannot start transaction - %s",
+			slon_log(SLON_FATAL, "Cannot start transaction - %s\n",
 				 PQresultErrorMessage(res));
 		PQclear(res);
 		slon_exit(-1);
@@ -292,7 +377,7 @@
 	res = PQexec(startup_conn, dstring_data(&query));
 	if (PQresultStatus(res) != PGRES_TUPLES_OK)
 	{
-		slon_log(SLON_FATAL, "main: Cannot get node list - %s",
+			slon_log(SLON_FATAL, "main: Cannot get node list - %s\n",
 				 PQresultErrorMessage(res));
 		PQclear(res);
 		dstring_free(&query);
@@ -342,7 +427,7 @@
 	res = PQexec(startup_conn, dstring_data(&query));
 	if (PQresultStatus(res) != PGRES_TUPLES_OK)
 	{
-		slon_log(SLON_FATAL, "main: Cannot get path config - %s",
+			slon_log(SLON_FATAL, "main: Cannot get path config - %s\n",
 				 PQresultErrorMessage(res));
 		PQclear(res);
 		dstring_free(&query);
@@ -373,7 +458,7 @@
 	res = PQexec(startup_conn, dstring_data(&query));
 	if (PQresultStatus(res) != PGRES_TUPLES_OK)
 	{
-		slon_log(SLON_FATAL, "main: Cannot get set config - %s",
+			slon_log(SLON_FATAL, "main: Cannot get set config - %s\n",
 				 PQresultErrorMessage(res));
 		PQclear(res);
 		dstring_free(&query);
@@ -400,7 +485,7 @@
 	res = PQexec(startup_conn, dstring_data(&query));
 	if (PQresultStatus(res) != PGRES_TUPLES_OK)
 	{
-		slon_log(SLON_FATAL, "main: Cannot get subscription config - %s",
+			slon_log(SLON_FATAL, "main: Cannot get subscription config - %s\n",
 				 PQresultErrorMessage(res));
 		PQclear(res);
 		dstring_free(&query);
@@ -429,7 +514,7 @@
 	res = PQexec(startup_conn, dstring_data(&query));
 	if (PQresultStatus(res) != PGRES_TUPLES_OK)
 	{
-		slon_log(SLON_FATAL, "main: Cannot get last local eventid - %s",
+			slon_log(SLON_FATAL, "main: Cannot get last local eventid - %s\n",
 				 PQresultErrorMessage(res));
 		PQclear(res);
 		dstring_free(&query);
@@ -453,7 +538,7 @@
 	res = PQexec(startup_conn, "rollback transaction;");
 	if (PQresultStatus(res) != PGRES_COMMAND_OK)
 	{
-		slon_log(SLON_FATAL, "main: Cannot rollback transaction - %s",
+			slon_log(SLON_FATAL, "main: Cannot rollback transaction - %s\n",
 				 PQresultErrorMessage(res));
 		PQclear(res);
 		slon_exit(-1);
@@ -532,10 +617,10 @@
 	 * Wait for all remote threads to finish
 	 */
 	main_thread = pthread_self();
-	main_argv = argv;
-	signal(SIGALRM, sigalrmhandler);
+		signal(SIGALRM, main_sigalrmhandler);
 	alarm(20);
 
+		slon_log(SLON_DEBUG2, "main: wait for remote threads\n");
 	rtcfg_joinAllRemoteThreads();
 
 	alarm(0);
@@ -560,53 +645,222 @@
 		slon_log(SLON_ERROR, "main: cannot join snmpThread - %s\n",
 				strerror(errno));
 #endif
-	if (slon_restart_request)
+
+		/*
+		 * Tell parent that worker is done
+		 */
+		slon_log(SLON_DEBUG2, "main: notify parent that worker is done\n");
+
+		if (write(watchdog_pipe[1], "c", 1) != 1)
 	{
-		slon_log(SLON_DEBUG1, "main: restart requested\n");
-		execvp(argv[0], argv);
-		slon_log(SLON_FATAL,
-				 "main: cannot restart via execvp(): %s\n", strerror(errno));
-		exit(-1);
+			slon_log(SLON_FATAL, "main: write to watchdog pipe failed -(%d) %s\n", errno,strerror(errno));
+			slon_exit(-1);
 	}
 
+		slon_log(SLON_DEBUG1, "main: done\n");
+
+		exit(0);
+	}
+	else /* parent */
+	{
+		slon_log(SLON_DEBUG2, "slon: watchdog process started\n");
+
 	/*
-	 * That's it.
+		 * Install signal handlers 
 	 */
-	slon_log(SLON_DEBUG1, "main: done\n");
-	return 0;
+		
+		slon_log(SLON_DEBUG2, "slon: begin signal handler setup\n");
+
+		act.sa_handler = &sighandler; 
+		sigemptyset(&act.sa_mask);
+		act.sa_flags = SA_NOMASK;
+
+		if (sigaction(SIGHUP,&act,NULL) < 0)
+		{
+			slon_log(SLON_FATAL, "slon: SIGHUP signal handler setup failed -(%d) %s\n", errno,strerror(errno));
+			slon_exit(-1);
+		}
+		if (signal(SIGINT,sighandler) == SIG_ERR)
+		{
+			slon_log(SLON_FATAL, "slon: SIGINT signal handler setup failed -(%d) %s\n", errno,strerror(errno));
+			slon_exit(-1);
+		}
+		if (signal(SIGTERM,sighandler) == SIG_ERR)
+		{
+			slon_log(SLON_FATAL, "slon: SIGTERM signal handler setup failed -(%d) %s\n", errno,strerror(errno));
+			slon_exit(-1);
+		}
+		if (signal(SIGCHLD,sighandler) == SIG_ERR)
+		{
+			slon_log(SLON_FATAL, "slon: SIGCHLD signal handler setup failed -(%d) %s\n", errno,strerror(errno));
+			slon_exit(-1);
+		}
+		if (signal(SIGQUIT,sighandler) == SIG_ERR)
+		{
+			slon_log(SLON_FATAL, "slon: SIGQUIT signal handler setup failed -(%d) %s\n", errno,strerror(errno));
+			slon_exit(-1);
 }
 
+		slon_log(SLON_DEBUG2, "slon: end signal handler setup\n");
 
-void
-slon_exit(int code)
+		/*
+		 * Tell worker/scheduler that parent has completed initialization
+		 */
+		if (write(watchdog_pipe[1], "p", 1) != 1)
 {
-	if (pid_file)
+			slon_log(SLON_FATAL, "slon: write to pipe failed -(%d) %s\n", errno,strerror(errno));
+			slon_exit(-1);
+		}
+
+		slon_log(SLON_DEBUG2, "slon: wait for main child process\n");
+
+		while ((pid = wait(&child_status)) != slon_cpid)
 	{
-		unlink(pid_file);
+			slon_log(SLON_DEBUG2, "slon: child terminated status: %d; pid: %d, current worker pid: %d\n", child_status, pid, slon_cpid);
+		}
+
+		slon_log(SLON_DEBUG1, "slon: done\n");
+	
+		/*
+		 * That's it.
+		 */
+		slon_exit(0);
 	}
-	exit(code);
 }
 
 
 static void
-sigalrmhandler(int signo)
+main_sigalrmhandler(int signo)
 {
 	if (main_thread == pthread_self())
 	{
 		alarm(0);
+		slon_log(SLON_WARN, "main: shutdown timeout exiting\n");
+		kill(slon_ppid,SIGQUIT);
+		exit(-1);
+	}
+	else
+	{
+		slon_log(SLON_WARN, "main: force SIGALRM the main thread\n");
+		pthread_kill(main_thread,SIGALRM);
+	}
+}
 
-		slon_log(SLON_WARN, "main: shutdown timeout\n");
-		if (slon_restart_request)
+static void
+sighandler(int signo)
+{
+	switch (signo)
 		{
+	case SIGALRM:
+	case SIGCHLD:
+		break;
+		
+	case SIGHUP:
+		slon_log(SLON_DEBUG1, "slon: restart requested\n");
+		slon_kill_child();
 			execvp(main_argv[0], main_argv);
-			slon_log(SLON_FATAL,
-				 "main: cannot restart via execvp(): %s\n", strerror(errno));
+		slon_log(SLON_FATAL, "slon: cannot restart via execvp(): %s\n", strerror(errno));
+		slon_exit(-1);
+		break;
+
+	case SIGINT:
+	case SIGTERM:
+		slon_log(SLON_DEBUG1, "slon: shutdown requested\n");
+		slon_kill_child();
+		slon_exit(-1);
+		break;
+
+	case SIGQUIT:
+		slon_log(SLON_DEBUG1, "slon: shutdown now requested\n");
+		kill(slon_cpid,SIGKILL);
+		slon_exit(-1);
+		break;
 		}
-		exit(-1);
 	}
-	pthread_kill(main_thread, SIGALRM);
+
+void
+slon_kill_child()
+{
+	char			pipe_c;
+	struct timeval	tv;
+	fd_set			fds;
+	int				rc;
+	int				fd;
+
+	if (slon_cpid == 0) return;
+
+	tv.tv_sec = 60;
+	tv.tv_usec = 0;
+
+	slon_log(SLON_DEBUG2, "slon: notify worker process to shutdown\n");
+
+	fd = sched_wakeuppipe[1];
+	FD_ZERO(&fds);
+	FD_SET(fd,&fds);
+
+	rc = select(fd + 1, NULL, &fds, NULL, &tv);
+
+	if (rc == 0 || rc < 0)
+	{
+		slon_log(SLON_DEBUG2, "slon: select write to worker timeout\n");
+		kill(slon_cpid,SIGKILL);
+		slon_exit(-1);
 }
 
+	if (write(sched_wakeuppipe[1], "p", 1) != 1)
+	{
+		slon_log(SLON_FATAL, "main: write to worker pipe failed -(%d) %s\n", errno,strerror(errno));
+		kill(slon_cpid,SIGKILL);
+		slon_exit(-1);
+	}
+
+	slon_log(SLON_DEBUG2, "slon: wait for worker process to shutdown\n");
+
+	fd = watchdog_pipe[0];
+	FD_ZERO(&fds);
+	FD_SET(fd,&fds);
+
+	rc = select(fd + 1, &fds, NULL, NULL, &tv);
+
+	if (rc == 0 || rc < 0)
+	{
+		slon_log(SLON_DEBUG2, "slon: select read from worker pipe timeout\n");
+		kill(slon_cpid,SIGKILL);
+		slon_exit(-1);
+	}
+	
+	if (read(watchdog_pipe[0], &pipe_c, 1) != 1)
+	{
+		slon_log(SLON_FATAL, "slon: read from worker pipe failed -(%d) %s\n", errno,strerror(errno));
+		kill(slon_cpid,SIGKILL);
+		slon_exit(-1);
+	}
+
+	if (pipe_c != 'c')
+	{
+		slon_log(SLON_FATAL, "slon: incorrect data from worker pipe -(%c)\n",pipe_c);
+		kill(slon_cpid,SIGKILL);
+		slon_exit(-1);
+	}
+
+	slon_log(SLON_DEBUG2, "slon: worker process shutdown ok\n");
+}
+
+void
+slon_exit(int code)
+{
+	if (slon_ppid == 0 && pid_file)
+	{
+		slon_log(SLON_DEBUG2, "slon: remove pid file\n");
+		unlink(pid_file);
+	}
+
+	slon_log(SLON_DEBUG2, "slon: exit(%d)\n",code);
+
+	exit(code);
+}
+
+
 /*
  * Local Variables:
  *	tab-width: 4
Index: slon.h
===================================================================
RCS file: /usr/local/cvsroot/slony1/slony1-engine/src/slon/slon.h,v
retrieving revision 1.47
retrieving revision 1.48
diff -Lsrc/slon/slon.h -Lsrc/slon/slon.h -u -w -r1.47 -r1.48
--- src/slon/slon.h
+++ src/slon/slon.h
@@ -324,6 +324,8 @@
  * ----------
  */
 extern pid_t slon_pid;
+extern pid_t slon_ppid;
+extern pid_t slon_cpid;
 extern char *rtcfg_cluster_name;
 extern char *rtcfg_namespace;
 extern char *rtcfg_conninfo;
@@ -344,16 +346,18 @@
  */
 #define slon_abort() \
 do { \
-	kill(slon_pid, SIGTERM); \
+	kill((slon_ppid == 0 ? slon_pid : slon_ppid), SIGTERM); \
 	pthread_exit(NULL); \
 } while (0)
 #define slon_restart() \
 do { \
-	kill(slon_pid, SIGHUP); \
+	kill((slon_ppid == 0 ? slon_pid : slon_ppid), SIGHUP); \
 } while (0)
 extern void slon_exit(int code);
 
 extern int	slon_restart_request;
+extern int watchdog_pipe[];
+extern int sched_wakeuppipe[];
 extern pthread_mutex_t slon_wait_listen_lock;
 extern pthread_cond_t slon_wait_listen_cond;
 
Index: runtime_config.c
===================================================================
RCS file: /usr/local/cvsroot/slony1/slony1-engine/src/slon/runtime_config.c,v
retrieving revision 1.23
retrieving revision 1.24
diff -Lsrc/slon/runtime_config.c -Lsrc/slon/runtime_config.c -u -w -r1.23 -r1.24
--- src/slon/runtime_config.c
+++ src/slon/runtime_config.c
@@ -33,6 +33,8 @@
  * ---------- Global data ----------
  */
 pid_t		slon_pid;
+pid_t		slon_cpid;
+pid_t		slon_ppid;
 char	   *rtcfg_cluster_name = NULL;
 char	   *rtcfg_namespace = NULL;
 char	   *rtcfg_conninfo = NULL;
Index: local_listen.c
===================================================================
RCS file: /usr/local/cvsroot/slony1/slony1-engine/src/slon/local_listen.c,v
retrieving revision 1.29
retrieving revision 1.30
diff -Lsrc/slon/local_listen.c -Lsrc/slon/local_listen.c -u -w -r1.29 -r1.30
--- src/slon/local_listen.c
+++ src/slon/local_listen.c
@@ -160,7 +160,7 @@
 			slon_log(SLON_INFO,
 					 "localListenThread: got restart notification - "
 					 "signal scheduler\n");
-			slon_restart();
+			kill(getppid(), SIGHUP);
 		}
 
 		/*
Index: cleanup_thread.c
===================================================================
RCS file: /usr/local/cvsroot/slony1/slony1-engine/src/slon/cleanup_thread.c,v
retrieving revision 1.20
retrieving revision 1.21
diff -Lsrc/slon/cleanup_thread.c -Lsrc/slon/cleanup_thread.c -u -w -r1.20 -r1.21
--- src/slon/cleanup_thread.c
+++ src/slon/cleanup_thread.c
@@ -67,6 +67,7 @@
 	{
 		kill(getpid(), SIGTERM);
 		pthread_exit(NULL);
+		/* slon_abort(); */
 	}
 	dbconn = conn->dbconn;
 


More information about the Slony1-commit mailing list