Patch for TotalView support with MPICH2/MVAPICH2

Frank Mietke frank.mietke at informatik.tu-chemnitz.de
Thu Mar 20 09:23:10 EDT 2008


Hi,

here's my patch for adding TotalView support for MPICH2/MVAPICH2. I've tested it
on our system and it worked well. To work properly it is recommended but not
necessary to set the following environment variables:

TOTALVIEW=<totalview executable>
TVDSVR=<ssh|rsh>

MPICH2/MVAPICH2 have to be configured with --enable-debuginfo.

Best Regards,
Frank

-- 
Dipl.-Inf. Frank Mietke     |     Fakultätsrechen- und Informationszentrum
Tel.: 0371 - 531 - 35538    |     Fak. für Informatik
Fax:  0371 - 531 8 35538    |     TU-Chemnitz
Key-ID: 60F59599            |     frank.mietke at informatik.tu-chemnitz.de
-------------- next part --------------
--- Makefile.in	2008-02-22 21:23:21.000000000 +0100
+++ Makefile.in	2008-03-20 12:40:44.000000000 +0100
@@ -7,8 +7,8 @@
 #
 SRC   = mpiexec.c get_hosts.c start_tasks.c task.c event.c util.c config.c \
 	stdio.c growstr.c pmi.c gm.c ib.c p4.c rai.c concurrent.c exedist.c \
-	spawn.c
-H     = mpiexec.h util.h growstr.h list.h
+	spawn.c tv_attach.c
+H     = mpiexec.h util.h growstr.h list.h tv_attach.h
 OTHER = ChangeLog LICENSE README mpiexec.1 proc-relations.fig \
 	hello.c hellof.f hellomp.f redir-helper.c \
 	runtests.pl README.lam
--- mpiexec.h	2008-02-22 21:23:21.000000000 +0100
+++ mpiexec.h	2008-03-20 12:40:44.000000000 +0100
@@ -134,6 +134,11 @@
 } cl_args_t;
 extern cl_args_t *cl_args;
 
+/* TotalView executable, could also be set through environment variable TOTALVIEW */
+extern char* tvname;
+/* Flag which signals listener process when the tv_ready command can be sent to MPICH2 processes */
+extern int tvready;
+
 /*
  * Types of TM events that can happen.
  */
--- start_tasks.c	2008-02-22 21:23:21.000000000 +0100
+++ start_tasks.c	2008-03-20 13:30:56.000000000 +0100
@@ -18,10 +18,15 @@
 #include <errno.h>
 #include <pwd.h>
 #include <sys/time.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
 #include <signal.h>
 #include <netdb.h>  /* gethostbyname for portals */
 
 #include "mpiexec.h"
+#include "tv_attach.h"
 
 #ifdef HAVE_PATH_H
 #  include <paths.h>
@@ -296,6 +301,10 @@
     int task_start, task_end;
     const char *mpiexec_redir_helper_path;
 
+    /* TotalView PMI stuff */
+    int pid_socket = -1;
+    struct sockaddr_in tv_sockaddr;
+
     /* for looping from 0..numtasks in the case of MPI_Spawn */
     task_start = spawns[spawn].task_start;
     task_end = spawns[spawn].task_end;
@@ -523,6 +532,48 @@
     env_add_if_not("PATH", _PATH_DEFPATH);
     env_add_if_not("USER", pswd->pw_name);
 
+
+    if (cl_args->tview && cl_args->comm == COMM_MPICH2_PMI)
+    {
+	   int len;
+	   char *str;
+	   int rc;
+	   socklen_t tv_sockaddr_len;
+
+	   env_add_int("PMI_TOTALVIEW", 1);
+	   env_add_int("MPIEXEC_DEBUG", 1);
+	   tv_sockaddr_len = sizeof(tv_sockaddr);
+	   memset(&tv_sockaddr, 0, tv_sockaddr_len);
+
+	   /* Creating socket for pid exchange of remote processes */
+	   pid_socket = socket(PF_INET, SOCK_STREAM, 0);
+	   if (pid_socket == -1)
+		   error_errno("%s: socket ", __func__);
+
+	   tv_sockaddr.sin_family = AF_INET;
+	   tv_sockaddr.sin_addr = myaddr.sin_addr;
+	   tv_sockaddr.sin_port = 0;
+	   memset(tv_sockaddr.sin_zero, '\0', sizeof(tv_sockaddr.sin_zero));
+
+	   rc = bind(pid_socket, (struct sockaddr *)&tv_sockaddr, tv_sockaddr_len);
+	   if (rc)
+		   error_errno("%s: bind ", __func__);
+	   rc = getsockname(pid_socket, (struct sockaddr *)&tv_sockaddr, &tv_sockaddr_len);
+	   if (rc)
+		   error_errno("%s: getsockname ", __func__);
+	   rc = listen(pid_socket, 32767);
+	   if (rc)
+		   error_errno("%s: listen ", __func__);
+
+	   len = snprintf(NULL,0,"%s -e \"dattach mpiexec %d; dgo; dassign MPIR_being_debugged 1\" &", tvname, getpid());
+	   str = malloc((len + 1) * sizeof(char));
+	   snprintf(str, len + 1, "%s -e \"dattach mpiexec %d; dgo; dassign MPIR_being_debugged 1\" &", tvname, getpid());
+	   system(str);
+	   free(str);
+	   if (initiate_tv(task_end - task_start))
+		error("%s: initiate_tv could not allocate MPIR_proctable", __func__);
+    }
+
     /*
      * Spawn each task, adding its private env vars.
      * numspawned set to zero earlier before signal handler setup;
@@ -585,8 +636,12 @@
 	/* build proc-specific command line */
 	growstr_zero(g);
 	g->translate_single_quote = 0;
-	growstr_printf(g, "if test -d \"%s\"; then cd \"%s\"; fi; exec %s -c ",
-	  pwd, pwd, user_shell);
+	if (cl_args->tview && cl_args->comm == COMM_MPICH2_PMI)
+		growstr_printf(g, "printf \"%%10d\" $$ > /dev/tcp/%s/%d; if test -d \"%s\"; then cd \"%s\"; fi; exec %s -c ",
+		nodes[0].name, ntohs(tv_sockaddr.sin_port), pwd, pwd, user_shell);
+	else
+		growstr_printf(g, "if test -d \"%s\"; then cd \"%s\"; fi; exec %s -c ",
+		pwd, pwd, user_shell);
 	growstr_append(g, "'exec ");
 	g->translate_single_quote = 1;
 
@@ -606,10 +661,15 @@
 	 */
 	if (cl_args->tview) {
 	    if (i == 0)
-		growstr_printf(g, "totalview %s -a -mpichtv",
-		  tasks[i].conf->exe);
+		if (cl_args->comm == COMM_MPICH2_PMI)
+			growstr_printf(g, "%s", tasks[i].conf->exe);
+		else
+			growstr_printf(g, "%s %s -a -mpichtv", tvname, tasks[i].conf->exe);
 	    else
-		growstr_printf(g, "%s -mpichtv", tasks[i].conf->exe);
+		if (cl_args->comm == COMM_MPICH2_PMI)
+			growstr_printf(g, "%s", tasks[i].conf->exe);
+		else
+			growstr_printf(g, "%s -mpichtv", tasks[i].conf->exe);
 	} else
 	    growstr_printf(g, "%s", tasks[i].conf->exe);
 
@@ -722,8 +782,33 @@
 		    break;
 	    }
 	}
+	if (cl_args->tview && cl_args->comm == COMM_MPICH2_PMI)
+	{
+		int ps1;
+		char rpid[10];
+		MPIR_PROCDESC desc;
+
+		ps1 = accept(pid_socket, 0, 0);
+		if (ps1 == -1)
+			error_errno("%s: accept() of TotalView setup", __func__);
+		read_full(ps1, rpid, 10);
+		desc.host_name = strdup(nodes[tasks[i].node].name);
+		desc.executable_name = strdup(tasks[i].conf->exe);
+		desc.pid = atoi(rpid);
+		fill_procdesc(desc, i);
+		free(desc.host_name);
+		free(desc.executable_name);
+		close(ps1);
+	}
+    }
+
+    if (cl_args->tview && cl_args->comm == COMM_MPICH2_PMI)
+    {
+	   complete_tv();
+	   close(pid_socket);
     }
 
+
     /* don't need these anymore */
     free(nargv[0]);
     free(nargv[1]);
@@ -741,6 +826,7 @@
 	    goto out;
     }
 
+
     debug(1, "All %d task%s (spawn %d) started", task_end - task_start,
           task_end - task_start > 1 ? "s": "", spawn);
 
--- tv_attach.c	2008-03-20 14:06:01.000000000 +0100
+++ tv_attach.c	2008-03-20 14:01:53.000000000 +0100
@@ -0,0 +1,70 @@
+/*
+ * tv_attach.c - variables and routines for TotalView attachment
+ * 
+ * See: http://www-unix.mcs.anl.gov/mpi/mpi-debug/mpich-attach.txt
+ *
+ * Created: 02/2008 Frank Mietke <frank.mietke at s1998.tu-chemnitz.de>
+ *
+ * Distributed under the GNU Public License Version 2 or later (See LICENSE)
+ */
+
+#include "tv_attach.h"
+#include <time.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+MPIR_PROCDESC *MPIR_proctable = NULL;
+int MPIR_proctable_size = 0;
+
+volatile int MPIR_debug_state = 0;
+volatile int MPIR_being_debugged = 0;
+int MPIR_i_am_starter = 0;
+int MPIR_partial_attach_ok = 0;
+
+#define MPIR_DEBUG_SPAWNED 1
+#define MPIR_DEBUG_ABORTING 1
+
+void MPIR_Breakpoint(void); /* waited on by TV */
+
+/* This function is used by TotalView to detect a new event where is something
+ * to do by the debugger. Don't forget to set MPIR_debug_state to something useful
+ * before calling this function.
+ */
+void MPIR_Breakpoint(void)
+{
+	// This is only to prevent removing of function if compiled with -O !!
+	printf("Waiting for TotalView to attach to all MPI processes\n");
+	return;
+}
+
+int initiate_tv(int ntasks)
+{
+    struct timespec ts;
+
+    ts.tv_sec = 0;
+    ts.tv_nsec = 20000000; /* wait for 20ms */
+
+    while(!MPIR_being_debugged)
+	    nanosleep( &ts, NULL);
+
+    MPIR_proctable = (MPIR_PROCDESC*) malloc(sizeof(MPIR_PROCDESC) * ntasks);
+    if (MPIR_proctable == NULL)
+	return 1;
+
+    return 0;
+}
+
+void fill_procdesc(MPIR_PROCDESC desc, int rank)
+{
+    MPIR_proctable[rank].host_name = strdup(desc.host_name);
+    MPIR_proctable[rank].executable_name = strdup(desc.executable_name);
+    MPIR_proctable[rank].pid = desc.pid;
+    MPIR_proctable_size++;
+}
+
+void complete_tv(void)
+{
+    MPIR_debug_state=MPIR_DEBUG_SPAWNED;
+    MPIR_Breakpoint();
+}
--- tv_attach.h	2008-03-20 14:06:06.000000000 +0100
+++ tv_attach.h	2008-03-20 13:02:48.000000000 +0100
@@ -0,0 +1,30 @@
+/*
+ * tv_attach.h - variables and routines for TotalView attachment
+ * 
+ * See: http://www-unix.mcs.anl.gov/mpi/mpi-debug/mpich-attach.txt
+ *
+ * Created: 02/2008 Frank Mietke <frank.mietke at s1998.tu-chemnitz.de>
+ *
+ * Distributed under the GNU Public License Version 2 or later (See LICENSE)
+ */
+#ifndef __tv_attach_h
+#define __tv_attach_h
+
+#include <sys/time.h>
+#include <unistd.h>
+
+/* Required by TotalView MPI startup */
+
+typedef struct {
+	char *host_name;
+	char *executable_name;
+	int	pid;
+} MPIR_PROCDESC;
+
+/* Functions to be called by starter */
+
+int initiate_tv(int);
+void fill_procdesc(MPIR_PROCDESC desc, int rank);
+void complete_tv(void);
+
+#endif
--- mpiexec.c	2008-02-22 21:23:21.000000000 +0100
+++ mpiexec.c	2008-03-20 13:14:21.000000000 +0100
@@ -43,6 +43,8 @@
 int numspawns;
 struct passwd *pswd;
 struct sockaddr_in myaddr;
+char *tvname = NULL;
+int tvready = 0;
 
 /*
  * Ensure it's executable.  Return true if so.
@@ -500,8 +502,23 @@
 	    if (*cr || l <= 0)
 		error("argument -n requires positive integral number of nodes");
 	    cl_args->numproc = l;
-	} else if (!strcmp(cp, "tv") || !strncmp(cp, "totalview", MAX(2,len)))
+	} else if (!strcmp(cp, "tv") || !strncmp(cp, "totalview", MAX(2,len))) {
+	    char *tvenv = NULL;
+	    int len = 0;
 	    cl_args->tview = 1;
+	    tvname = strdup("totalview");
+	    tvenv = getenv("TOTALVIEW");
+	    if (tvenv != NULL) {
+		struct stat *buf;
+		buf = malloc(sizeof(struct stat));
+		len = strlen(tvenv);
+		tvname = (char*) malloc((len + 1) * sizeof(char));
+		strncpy(tvname, tvenv, len);
+		if (stat(tvname, buf))
+			warning("%s: TOTALVIEW env variable set to \"%s\" is not working, trying with totalview in PATH!\n", __func__, tvname);
+		free(buf);
+	    }
+	}
 	else if (!strncmp(cp, "config", MAX(3,len))) {
 	    cp += MAX(3,len);
 	    cl_args->config_file = find_optarg(cp, &argc, &argv, "config");
--- pmi.c	2008-02-22 21:23:21.000000000 +0100
+++ pmi.c	2008-03-20 12:40:44.000000000 +0100
@@ -263,10 +263,13 @@
 	error_errno("%s: response cmd=set debug=%d", __func__, mpi_task_debug);
     }
 
-    /*
-     * XXX: PMI_TOTALVIEW env var means we must send another little
-     * string; add it sometime.
-     */
+    if (cl_args->tview)
+    {
+	   growstr_zero(g);
+	   growstr_printf(g, "cmd=tv_ready\n");
+	   if (write_full(fd, g->s, g->len) < 0)
+		   error_errno("%s: response cmd=tv_ready", __func__);
+    }
 
     /*
      * PMII_getmaxes


More information about the mpiexec mailing list