Patch for TotalView support with MPICH2/MVAPICH2
Frank Mietke
frank.mietke at informatik.tu-chemnitz.de
Thu Mar 20 09:23:10 EDT 2008
Hi,
here's my patch for adding TotalView support for MPICH2/MVAPICH2. I've tested it
on our system and it worked well. To work properly it is recommended but not
necessary to set the following environment variables:
TOTALVIEW=<totalview executable>
TVDSVR=<ssh|rsh>
MPICH2/MVAPICH2 have to be configured with --enable-debuginfo.
Best Regards,
Frank
--
Dipl.-Inf. Frank Mietke | Fakultätsrechen- und Informationszentrum
Tel.: 0371 - 531 - 35538 | Fak. für Informatik
Fax: 0371 - 531 8 35538 | TU-Chemnitz
Key-ID: 60F59599 | frank.mietke at informatik.tu-chemnitz.de
-------------- next part --------------
--- Makefile.in 2008-02-22 21:23:21.000000000 +0100
+++ Makefile.in 2008-03-20 12:40:44.000000000 +0100
@@ -7,8 +7,8 @@
#
SRC = mpiexec.c get_hosts.c start_tasks.c task.c event.c util.c config.c \
stdio.c growstr.c pmi.c gm.c ib.c p4.c rai.c concurrent.c exedist.c \
- spawn.c
-H = mpiexec.h util.h growstr.h list.h
+ spawn.c tv_attach.c
+H = mpiexec.h util.h growstr.h list.h tv_attach.h
OTHER = ChangeLog LICENSE README mpiexec.1 proc-relations.fig \
hello.c hellof.f hellomp.f redir-helper.c \
runtests.pl README.lam
--- mpiexec.h 2008-02-22 21:23:21.000000000 +0100
+++ mpiexec.h 2008-03-20 12:40:44.000000000 +0100
@@ -134,6 +134,11 @@
} cl_args_t;
extern cl_args_t *cl_args;
+/* TotalView executable, could also be set through environment variable TOTALVIEW */
+extern char* tvname;
+/* Flag which signals listener process when the tv_ready command can be sent to MPICH2 processes */
+extern int tvready;
+
/*
* Types of TM events that can happen.
*/
--- start_tasks.c 2008-02-22 21:23:21.000000000 +0100
+++ start_tasks.c 2008-03-20 13:30:56.000000000 +0100
@@ -18,10 +18,15 @@
#include <errno.h>
#include <pwd.h>
#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
#include <signal.h>
#include <netdb.h> /* gethostbyname for portals */
#include "mpiexec.h"
+#include "tv_attach.h"
#ifdef HAVE_PATH_H
# include <paths.h>
@@ -296,6 +301,10 @@
int task_start, task_end;
const char *mpiexec_redir_helper_path;
+ /* TotalView PMI stuff */
+ int pid_socket = -1;
+ struct sockaddr_in tv_sockaddr;
+
/* for looping from 0..numtasks in the case of MPI_Spawn */
task_start = spawns[spawn].task_start;
task_end = spawns[spawn].task_end;
@@ -523,6 +532,48 @@
env_add_if_not("PATH", _PATH_DEFPATH);
env_add_if_not("USER", pswd->pw_name);
+
+ if (cl_args->tview && cl_args->comm == COMM_MPICH2_PMI)
+ {
+ int len;
+ char *str;
+ int rc;
+ socklen_t tv_sockaddr_len;
+
+ env_add_int("PMI_TOTALVIEW", 1);
+ env_add_int("MPIEXEC_DEBUG", 1);
+ tv_sockaddr_len = sizeof(tv_sockaddr);
+ memset(&tv_sockaddr, 0, tv_sockaddr_len);
+
+ /* Creating socket for pid exchange of remote processes */
+ pid_socket = socket(PF_INET, SOCK_STREAM, 0);
+ if (pid_socket == -1)
+ error_errno("%s: socket ", __func__);
+
+ tv_sockaddr.sin_family = AF_INET;
+ tv_sockaddr.sin_addr = myaddr.sin_addr;
+ tv_sockaddr.sin_port = 0;
+ memset(tv_sockaddr.sin_zero, '\0', sizeof(tv_sockaddr.sin_zero));
+
+ rc = bind(pid_socket, (struct sockaddr *)&tv_sockaddr, tv_sockaddr_len);
+ if (rc)
+ error_errno("%s: bind ", __func__);
+ rc = getsockname(pid_socket, (struct sockaddr *)&tv_sockaddr, &tv_sockaddr_len);
+ if (rc)
+ error_errno("%s: getsockname ", __func__);
+ rc = listen(pid_socket, 32767);
+ if (rc)
+ error_errno("%s: listen ", __func__);
+
+ len = snprintf(NULL,0,"%s -e \"dattach mpiexec %d; dgo; dassign MPIR_being_debugged 1\" &", tvname, getpid());
+ str = malloc((len + 1) * sizeof(char));
+ snprintf(str, len + 1, "%s -e \"dattach mpiexec %d; dgo; dassign MPIR_being_debugged 1\" &", tvname, getpid());
+ system(str);
+ free(str);
+ if (initiate_tv(task_end - task_start))
+ error("%s: initiate_tv could not allocate MPIR_proctable", __func__);
+ }
+
/*
* Spawn each task, adding its private env vars.
* numspawned set to zero earlier before signal handler setup;
@@ -585,8 +636,12 @@
/* build proc-specific command line */
growstr_zero(g);
g->translate_single_quote = 0;
- growstr_printf(g, "if test -d \"%s\"; then cd \"%s\"; fi; exec %s -c ",
- pwd, pwd, user_shell);
+ if (cl_args->tview && cl_args->comm == COMM_MPICH2_PMI)
+ growstr_printf(g, "printf \"%%10d\" $$ > /dev/tcp/%s/%d; if test -d \"%s\"; then cd \"%s\"; fi; exec %s -c ",
+ nodes[0].name, ntohs(tv_sockaddr.sin_port), pwd, pwd, user_shell);
+ else
+ growstr_printf(g, "if test -d \"%s\"; then cd \"%s\"; fi; exec %s -c ",
+ pwd, pwd, user_shell);
growstr_append(g, "'exec ");
g->translate_single_quote = 1;
@@ -606,10 +661,15 @@
*/
if (cl_args->tview) {
if (i == 0)
- growstr_printf(g, "totalview %s -a -mpichtv",
- tasks[i].conf->exe);
+ if (cl_args->comm == COMM_MPICH2_PMI)
+ growstr_printf(g, "%s", tasks[i].conf->exe);
+ else
+ growstr_printf(g, "%s %s -a -mpichtv", tvname, tasks[i].conf->exe);
else
- growstr_printf(g, "%s -mpichtv", tasks[i].conf->exe);
+ if (cl_args->comm == COMM_MPICH2_PMI)
+ growstr_printf(g, "%s", tasks[i].conf->exe);
+ else
+ growstr_printf(g, "%s -mpichtv", tasks[i].conf->exe);
} else
growstr_printf(g, "%s", tasks[i].conf->exe);
@@ -722,8 +782,33 @@
break;
}
}
+ if (cl_args->tview && cl_args->comm == COMM_MPICH2_PMI)
+ {
+ int ps1;
+ char rpid[10];
+ MPIR_PROCDESC desc;
+
+ ps1 = accept(pid_socket, 0, 0);
+ if (ps1 == -1)
+ error_errno("%s: accept() of TotalView setup", __func__);
+ read_full(ps1, rpid, 10);
+ desc.host_name = strdup(nodes[tasks[i].node].name);
+ desc.executable_name = strdup(tasks[i].conf->exe);
+ desc.pid = atoi(rpid);
+ fill_procdesc(desc, i);
+ free(desc.host_name);
+ free(desc.executable_name);
+ close(ps1);
+ }
+ }
+
+ if (cl_args->tview && cl_args->comm == COMM_MPICH2_PMI)
+ {
+ complete_tv();
+ close(pid_socket);
}
+
/* don't need these anymore */
free(nargv[0]);
free(nargv[1]);
@@ -741,6 +826,7 @@
goto out;
}
+
debug(1, "All %d task%s (spawn %d) started", task_end - task_start,
task_end - task_start > 1 ? "s": "", spawn);
--- tv_attach.c 2008-03-20 14:06:01.000000000 +0100
+++ tv_attach.c 2008-03-20 14:01:53.000000000 +0100
@@ -0,0 +1,70 @@
+/*
+ * tv_attach.c - variables and routines for TotalView attachment
+ *
+ * See: http://www-unix.mcs.anl.gov/mpi/mpi-debug/mpich-attach.txt
+ *
+ * Created: 02/2008 Frank Mietke <frank.mietke at s1998.tu-chemnitz.de>
+ *
+ * Distributed under the GNU Public License Version 2 or later (See LICENSE)
+ */
+
+#include "tv_attach.h"
+#include <time.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+MPIR_PROCDESC *MPIR_proctable = NULL;
+int MPIR_proctable_size = 0;
+
+volatile int MPIR_debug_state = 0;
+volatile int MPIR_being_debugged = 0;
+int MPIR_i_am_starter = 0;
+int MPIR_partial_attach_ok = 0;
+
+#define MPIR_DEBUG_SPAWNED 1
+#define MPIR_DEBUG_ABORTING 1
+
+void MPIR_Breakpoint(void); /* waited on by TV */
+
+/* This function is used by TotalView to detect a new event where is something
+ * to do by the debugger. Don't forget to set MPIR_debug_state to something useful
+ * before calling this function.
+ */
+void MPIR_Breakpoint(void)
+{
+ // This is only to prevent removing of function if compiled with -O !!
+ printf("Waiting for TotalView to attach to all MPI processes\n");
+ return;
+}
+
+int initiate_tv(int ntasks)
+{
+ struct timespec ts;
+
+ ts.tv_sec = 0;
+ ts.tv_nsec = 20000000; /* wait for 20ms */
+
+ while(!MPIR_being_debugged)
+ nanosleep( &ts, NULL);
+
+ MPIR_proctable = (MPIR_PROCDESC*) malloc(sizeof(MPIR_PROCDESC) * ntasks);
+ if (MPIR_proctable == NULL)
+ return 1;
+
+ return 0;
+}
+
+void fill_procdesc(MPIR_PROCDESC desc, int rank)
+{
+ MPIR_proctable[rank].host_name = strdup(desc.host_name);
+ MPIR_proctable[rank].executable_name = strdup(desc.executable_name);
+ MPIR_proctable[rank].pid = desc.pid;
+ MPIR_proctable_size++;
+}
+
+void complete_tv(void)
+{
+ MPIR_debug_state=MPIR_DEBUG_SPAWNED;
+ MPIR_Breakpoint();
+}
--- tv_attach.h 2008-03-20 14:06:06.000000000 +0100
+++ tv_attach.h 2008-03-20 13:02:48.000000000 +0100
@@ -0,0 +1,30 @@
+/*
+ * tv_attach.h - variables and routines for TotalView attachment
+ *
+ * See: http://www-unix.mcs.anl.gov/mpi/mpi-debug/mpich-attach.txt
+ *
+ * Created: 02/2008 Frank Mietke <frank.mietke at s1998.tu-chemnitz.de>
+ *
+ * Distributed under the GNU Public License Version 2 or later (See LICENSE)
+ */
+#ifndef __tv_attach_h
+#define __tv_attach_h
+
+#include <sys/time.h>
+#include <unistd.h>
+
+/* Required by TotalView MPI startup */
+
+typedef struct {
+ char *host_name;
+ char *executable_name;
+ int pid;
+} MPIR_PROCDESC;
+
+/* Functions to be called by starter */
+
+int initiate_tv(int);
+void fill_procdesc(MPIR_PROCDESC desc, int rank);
+void complete_tv(void);
+
+#endif
--- mpiexec.c 2008-02-22 21:23:21.000000000 +0100
+++ mpiexec.c 2008-03-20 13:14:21.000000000 +0100
@@ -43,6 +43,8 @@
int numspawns;
struct passwd *pswd;
struct sockaddr_in myaddr;
+char *tvname = NULL;
+int tvready = 0;
/*
* Ensure it's executable. Return true if so.
@@ -500,8 +502,23 @@
if (*cr || l <= 0)
error("argument -n requires positive integral number of nodes");
cl_args->numproc = l;
- } else if (!strcmp(cp, "tv") || !strncmp(cp, "totalview", MAX(2,len)))
+ } else if (!strcmp(cp, "tv") || !strncmp(cp, "totalview", MAX(2,len))) {
+ char *tvenv = NULL;
+ int len = 0;
cl_args->tview = 1;
+ tvname = strdup("totalview");
+ tvenv = getenv("TOTALVIEW");
+ if (tvenv != NULL) {
+ struct stat *buf;
+ buf = malloc(sizeof(struct stat));
+ len = strlen(tvenv);
+ tvname = (char*) malloc((len + 1) * sizeof(char));
+ strncpy(tvname, tvenv, len);
+ if (stat(tvname, buf))
+ warning("%s: TOTALVIEW env variable set to \"%s\" is not working, trying with totalview in PATH!\n", __func__, tvname);
+ free(buf);
+ }
+ }
else if (!strncmp(cp, "config", MAX(3,len))) {
cp += MAX(3,len);
cl_args->config_file = find_optarg(cp, &argc, &argv, "config");
--- pmi.c 2008-02-22 21:23:21.000000000 +0100
+++ pmi.c 2008-03-20 12:40:44.000000000 +0100
@@ -263,10 +263,13 @@
error_errno("%s: response cmd=set debug=%d", __func__, mpi_task_debug);
}
- /*
- * XXX: PMI_TOTALVIEW env var means we must send another little
- * string; add it sometime.
- */
+ if (cl_args->tview)
+ {
+ growstr_zero(g);
+ growstr_printf(g, "cmd=tv_ready\n");
+ if (write_full(fd, g->s, g->len) < 0)
+ error_errno("%s: response cmd=tv_ready", __func__);
+ }
/*
* PMII_getmaxes
More information about the mpiexec
mailing list