SIGTSTP propagation for 0.80 ?
David Golden
dgolden at cp.dias.ie
Thu Sep 15 13:36:32 EDT 2005
Okay, attached patch is my first attempt. use at your own risk, no warranty, etc...
I haven't implemented any "pause mpiexec and stdio only" variant as yet,
nor made anything command-line and/or ./configure configurable.
I've done some _very_ cursory testing with mpich2-1.0.2p1 with 2
concurrent mpiexecs, and it doesn't immediately fall over, so that's
nice.
Next major trouble is, it *doesn't* work with Torque right now - I think
it might just be the time between the TSTP and the STOP torque sends is too
short, as "qsig -s TSTP <jobid>" does appear to do the right thing even
though "qsig -s suspend <jobid>" doesn't.
I wasn't too sure about the word "kill" usage in the sources - I decided "kill"
should mean the traditional confusing "signal" rather than "signal with sigkill"
to avoid needless dupping of functionality, but "have_killed" and stdio.c
are particularly inconsistent with that (for stdio child there's kill_ stop_
and cont_ functions.).
-------------- next part --------------
diff -Nar -U 8 mpiexec-0.80/concurrent.c mpiexec-0.80.tstp1/concurrent.c
--- mpiexec-0.80/concurrent.c 2005-07-15 15:03:47.000000000 +0100
+++ mpiexec-0.80.tstp1/concurrent.c 2005-09-15 13:30:55.015298832 +0100
@@ -630,17 +630,17 @@
ret = read_client(n, &tasknum, sizeof(tasknum));
if (ret) return;
ret = read_client(n, &signum, sizeof(signum));
if (ret) return;
evt = -1;
tp = tid_find(n, tasknum);
if (tp)
- evt = kill_tid(tp);
+ evt = kill_tid(tp, signum);
/* return evt */
ret = write_client(n, &cmd, sizeof(cmd));
if (ret == 0)
write_client(n, &evt, sizeof(evt));
}
@@ -1171,17 +1171,17 @@
debug(2, "%s: client %d", __func__, n);
(void) close(clients[n].fd);
clients[n].fd = -1;
/* kill tids */
list_for_each_entry_safe(tp, tpnext, tids, list) {
if (tp->client == n)
- kill_tid(tp); /* might tid_del tp */
+ kill_tid(tp, SIGKILL); /* might tid_del tp */
}
free_client_if_zero(n);
}
/*
* The connection to this client is closed but he still has events and
* tids outstanding. Clean up. Don't delete the event.
*/
@@ -1194,40 +1194,41 @@
debug(2, "%s: evt %d client %d task %d type %s", __func__, ep->evt,
n, ep->task, evt_type_string(ep->type));
switch (ep->type) {
case EVT_OBIT:
case EVT_KILL: {
/* if this was the last event, release the tid; they may come
* in either order. */
evts_t *eq;
- evt_type_t other;
int found;
tp = tid_find(n, ep->task);
if (!tp)
error("%s: lost tid for client %d task %d", __func__, n,
ep->task);
- other = (ep->type == EVT_OBIT) ? EVT_KILL : EVT_OBIT;
found = 0;
+ /* check for any other events */
list_for_each_entry(eq, evts, list) {
- if (eq->client == ep->client
- && eq->task == ep->task
- && eq->type == other) {
+ if (ep != eq) {
+ if (eq->client == ep->client && eq->task == ep->task) {
+ if (eq->type == EVT_OBIT || eq->type == EVT_KILL ) {
found = 1;
break;
+ }
}
+ }
}
if (!found)
tid_del(tp);
break;
}
case EVT_START:
/* submit a kill and obit on this new task */
tp = tid_add(c->pending_tids[ep->task], n, ep->task);
- kill_tid(tp);
+ kill_tid(tp, SIGKILL);
break;
default:
error("%s: unknown event type %d", __func__, ep->type);
}
free_client_if_zero(n);
}
diff -Nar -U 8 mpiexec-0.80/mpiexec.c mpiexec-0.80.tstp1/mpiexec.c
--- mpiexec-0.80/mpiexec.c 2005-07-15 15:03:47.000000000 +0100
+++ mpiexec-0.80.tstp1/mpiexec.c 2005-09-15 17:44:12.004078328 +0100
@@ -153,36 +153,75 @@
if (sig == SIGKILL) return "SIGKILL";
# endif
# if defined(SIGSEGV)
if (sig == SIGSEGV) return "SIGSEGV";
# endif
# if defined(SIGTERM)
if (sig == SIGTERM) return "SIGTERM";
# endif
+# if defined(SIGTSTP)
+ if (sig == SIGTSTP) return "SIGTSTP";
+# endif
+# if defined(SIGCONT)
+ if (sig == SIGCONT) return "SIGCONT";
+# endif
return "unknown";
#endif
}
-static int killall_sig = 0;
+
static jmp_buf jmp_env;
/*
- * Signal handling.
+ * Signal handling for any propagated signals
+ *
+ * FIXME: lethal signals should be split out into a list,
+ * and probably made a command-line option with
+ * configurable defaults.
+ * FIXME: TSTP propagating STOP vs. TSTP propagating TSTP then STOP
+ * vs. TSTP propagating TSTP should probably be a command-line
+ * option with configurable defaults.
*/
void
killall(int sig)
{
static int killall_count = 0;
-
+#if defined(SIGTSTP)
+ const int tstp_siglist[] = { SIGTSTP };
+#endif
debug(1, "%s: caught signal %d (%s)", __func__, sig,
parse_signal_number(sig));
- ++killall_count;
- killall_sig = sig;
- longjmp(jmp_env, killall_count);
+ switch (sig) {
+ default:
+ kill_tasks(sig);
+ break;
+#if defined(SIGTSTP)
+ case SIGTSTP:
+ handle_signals(tstp_siglist,list_count(tstp_siglist), SIG_IGN); /* qualms about concurrent mpiexecs */
+ kill_tasks(SIGSTOP);
+ stop_stdio();
+ if (!concurrent_master) raise(SIGSTOP); /* uhoh. master has to stay awake to propagate for clients? */
+ break;
+#endif
+#if defined(SIGCONT)
+ case SIGCONT:
+ cont_stdio();
+ kill_tasks(SIGCONT);
+ handle_signals(tstp_siglist,list_count(tstp_siglist), killall);
+ break;
+#endif
+ case SIGHUP:
+ case SIGINT:
+ case SIGTERM:
+ kill_tasks(SIGKILL);
+ ++killall_count;
+ longjmp(jmp_env, killall_count);
+ break;
+ }
}
/*
* Enable one signal handler for a list of signals. Do not defer
* signal reception while handling these, to let the impatient user
* interrupt again to really exit.
*/
void
@@ -199,16 +238,34 @@
sigemptyset(&act.sa_mask);
act.sa_flags = SA_NODEFER;
act.sa_handler = handler;
for (i=0; i<num; i++)
sigaction(list[i], &act, 0);
}
/*
+ * Setup signals that should be propagated to all tasks if
+ * caught by the master.
+ * FIXME: Might be nice if the signal set was a command-line option with
+ * configurable defaults.
+ */
+void
+setup_propagated_signals()
+{
+#if defined(SIGTSTP)
+ const int propagated_siglist[] = { SIGHUP, SIGINT, SIGTERM, SIGTSTP, SIGCONT };
+#else
+ const int propagated_siglist[] = { SIGHUP, SIGINT, SIGTERM };
+#endif
+ handle_signals(propagated_siglist, list_count(propagated_siglist), killall);
+}
+
+
+/*
* Just print a little version string.
*/
static void
version(FILE *fp)
{
if (!strcmp(CONFIGURE_OPTIONS, ""))
fprintf(fp, "Version %s, no configure options\n", VERSION);
else
@@ -649,17 +706,17 @@
getpid(), getpid());
system(s);
sleep(1); /* wait for attach */
}
#endif
if (cl_args->server_only) {
cm_permit_new_clients(1);
- handle_signals(0, 0, killall);
+ setup_propagated_signals(); /* FIXME: should server-only propagated sigs be different? */
numtasks = 0;
numspawned = 0;
goto server_only;
}
/*
* Now look at the command-line constraints.
*/
@@ -689,16 +746,17 @@
nodes[tasks[i].node].numcpu -= tasks[i].num_copies;
cm_permit_new_clients(1);
} else
concurrent_node_alloc();
/*
* Run the tasks and wait for them to finish. Use of setjmp is
* to avoid complex shutdown activity in the signal handler.
+ * Any "lethal" signals (see killall()) will jump back to here.
*/
server_only:
jmp_return = setjmp(jmp_env);
switch (jmp_return) {
case 0:
if (cl_args->server_only) {
cm_serve_clients();
concurrent_exit(); /* not reached, wait for ctrl-c path */
@@ -707,17 +765,16 @@
distribute_executable();
start_tasks();
wait_tasks();
}
break;
/* reached by setjmp return likely */
case 1:
- kill_tasks(killall_sig);
if (concurrent_master) {
cm_kill_clients();
cm_permit_new_clients(0); /* no new connections */
}
wait_tasks();
break;
/* second ctrl-c, don't try to communicate with anything, just die */
diff -Nar -U 8 mpiexec-0.80/mpiexec.h mpiexec-0.80.tstp1/mpiexec.h
--- mpiexec-0.80/mpiexec.h 2005-07-15 15:03:47.000000000 +0100
+++ mpiexec-0.80.tstp1/mpiexec.h 2005-09-15 17:17:26.297182880 +0100
@@ -172,16 +172,17 @@
/*
* Prototypes
*/
/* mpiexec.c */
const char *resolve_exe(const char *exe);
const char *parse_signal_number(int sig);
void killall(int sig);
void handle_signals(const int *list, int num, void (*handler)(int sig));
+void setup_propagated_signals(void);
int stat_exe(const char *exe, int complain);
/* get_hosts.c */
void get_hosts(void);
void constrain_nodes(void);
void reconnect_to_mom(void);
/* start_tasks.c */
@@ -189,17 +190,17 @@
int do_tm_poll(tm_event_t *evt, const char *caller, int block);
/* task.c */
tids_t *tid_add(int tid, int client, int task);
tids_t *tid_find(int client, int task);
void tid_del(tids_t *tp);
void tid_dump(void);
const char *node_name_from_nid(tm_node_id nid);
-tm_event_t kill_tid(tids_t *tp);
+tm_event_t kill_tid(tids_t *tp, int signum);
void kill_tasks(int signum);
void wait_tasks(void);
/* event.c */
void evt_add(int tid, int client, int task, evt_type_t type);
evts_t *evt_lookup(int evt);
void evt_del(evts_t *ep);
const char *evt_type_string(evt_type_t type);
@@ -216,16 +217,18 @@
const char *config_get_unique_executable(void);
void config_set_unique_executable(const char *s);
/* stdio.c */
void stdio_fork(int expected_in[3], int abort_fd_in[2], int pmi_fd);
int stdio_port(int n);
void stdio_notice_streams(void);
void kill_stdio(void);
+void stop_stdio(void);
+void cont_stdio(void);
void try_kill_stdio(void);
void kill_stdio_abort_fd(int abort_fd_index);
void poll_set(int fd, fd_set *fds);
void poll_del(int fd, fd_set *fds);
void maybe_exit_stdio(void); /* for use by device-specific code */
/* concurrent.c */
void concurrent_init(void);
diff -Nar -U 8 mpiexec-0.80/start_tasks.c mpiexec-0.80.tstp1/start_tasks.c
--- mpiexec-0.80/start_tasks.c 2005-07-15 15:03:47.000000000 +0100
+++ mpiexec-0.80.tstp1/start_tasks.c 2005-09-15 13:06:33.250520856 +0100
@@ -380,17 +380,17 @@
stdio_fork(conns, gmpi_fd, pmi_fd);
if (pmi_fd >= 0)
close(pmi_fd); /* child has it now */
/*
* Start signal handling _after_ stdio child is up.
*/
numspawned = 0;
- handle_signals(0, 0, killall);
+ setup_propagated_signals();
/*
* environment variables common to all tasks
*/
env_init();
/* override user env with these */
if (cl_args->comm == COMM_MPICH_GM) {
diff -Nar -U 8 mpiexec-0.80/stdio.c mpiexec-0.80.tstp1/stdio.c
--- mpiexec-0.80/stdio.c 2005-07-15 15:03:47.000000000 +0100
+++ mpiexec-0.80.tstp1/stdio.c 2005-09-15 17:15:40.445274816 +0100
@@ -1039,16 +1039,30 @@
WTERMSIG(stat), parse_signal_number(WTERMSIG(stat)));
} else
error("%s: wait stat 0x%x, ifsignaled %d, termsig %d,"
" ifstopped %d, stopsig %d", __func__, stat, WIFSIGNALED(stat),
WTERMSIG(stat), WIFSTOPPED(stat), WSTOPSIG(stat));
}
}
+/* called by parent to stop stdio child */
+void
+stop_stdio(void)
+{
+ kill(pid, SIGSTOP);
+}
+
+void
+cont_stdio(void)
+{
+ kill(pid, SIGCONT);
+}
+
+
/*
* Try to kill stdio, but fail quietly if unsuccessful.
*/
void
try_kill_stdio(void)
{
if (!pid) return;
kill(pid, SIGTERM);
diff -Nar -U 8 mpiexec-0.80/task.c mpiexec-0.80.tstp1/task.c
--- mpiexec-0.80/task.c 2005-07-15 15:03:47.000000000 +0100
+++ mpiexec-0.80.tstp1/task.c 2005-09-15 15:53:44.831777664 +0100
@@ -89,23 +89,23 @@
return nodes[i].name;
}
/*
* Get rid of this task, but keep around any events it might generate.
* Return the new event number that will report when the kill is completed.
*/
tm_event_t
-kill_tid(tids_t *tp)
+kill_tid(tids_t *tp, int signum)
{
tm_event_t evt = -1;
int ret;
debug(2, "%s: kill client %d task %d", __func__, tp->client, tp->task);
- ret = tm_kill(tp->tid, SIGKILL, &evt);
+ ret = tm_kill(tp->tid, signum, &evt);
if (ret == TM_SUCCESS)
evt_add(evt, tp->client, tp->task, EVT_KILL);
else if (ret == TM_ENOTFOUND) {
debug(2, "%s: delete already dead client %d task %d",
__func__, tp->client, tp->task);
tid_del(tp);
} else
error_tm(ret, "%s: tm_kill client %d task %d", __func__,
@@ -120,38 +120,39 @@
/*
* Use tm to send a signal to all tasks.
*/
void
kill_tasks(int signum)
{
int i;
- debug(1, "%s: killing all tasks", __func__);
+ debug(1, "%s: killing all tasks with sig %d (%s)", __func__,
+ signum, parse_signal_number(signum) );
for (i=0; i<numtasks; i++) {
if (tasks[i].done) continue;
if (concurrent_master) {
tm_event_t evt;
int ret;
- debug(2, "%s: kill my task %d on %s", __func__,
- i, nodes[tasks[i].node].name);
- ret = tm_kill(tasks[i].tid, SIGKILL, &evt);
+ debug(2, "%s: kill my task %d on %s with sig %d (%s)", __func__,
+ i, nodes[tasks[i].node].name, signum, parse_signal_number(signum) );
+ ret = tm_kill(tasks[i].tid, signum, &evt);
if (ret == TM_SUCCESS)
evt_add(evt, -1, i, EVT_KILL);
else if (ret == TM_ENOTFOUND) {
debug(2, "%s: tried to kill my already dead task %d",
__func__, i);
/* but no tid to delete, and don't mark done until obit */
} else
error_tm(ret, "%s: tm_kill my task %d", __func__, i);
} else {
concurrent_request_kill(i, signum);
}
}
- have_killed = 1;
+ if (signum == SIGKILL) have_killed = 1;
}
/*
* Wait for tasks to finish, if any exit with non-zero status, perhaps
* kill the rest. Also, if concurrent_master, pay attention to other
* mpiexec requests.
*/
void
More information about the mpiexec
mailing list