equivalent of mpirun -nolocal option?

Ron Brightwell rbbrigh at valeria.mp.sandia.gov
Fri Apr 16 19:15:48 EDT 2004


> > Is there an mpiexec equivalent to the mpirun -nolocal option?
> > I'd like to be able to start a job where the mpiexec process
> > is on a separate host from the MPI application process(es).
> 
> Ah, no.  It's not even possible to hack via config-file stuff
> for a generic MPI code either, unless you're allowed to recompile
> the code to exit task 0, form a new smaller communicator, and continue.
> 
> But you could toss in this option toward the end of get_hosts().
> Just chop off tasks[0] while the name matches the original tasks[0].name
> and move up everybody else in the array, reducing numtasks.  I'll
> fold it in to the distro if you get it to work (mostly) and find it useful.
> 

Here's a patch for the -nolocal option.  It seems to work (mostly).  I'm not
sure that I've found it useful yet, but I'll let you know :)

-Ron

-------------- next part --------------
--- get_hosts.c.orig	Fri Apr 16 17:01:44 2004
+++ get_hosts.c	Fri Apr 16 15:09:07 2004
@@ -250,6 +250,35 @@
 	free(oldtasks);
     }
 
+    if (cl_args->nolocal) {
+        task_cntrl_t *oldtasks = tasks;
+        int old_numtask = numtask;
+        int j;
+        tasks_mpiexec_host = Malloc( sizeof(*tasks));
+        memcpy( tasks_mpiexec_host, tasks, sizeof(*tasks));
+        tasks_mpiexec_host->name = strsave(tasks->name);
+        if ( cl_args->verbose )
+            printf("removing mpiexec host %s from host list\n",tasks_mpiexec_host->name);
+        numtask = 0;
+        for (i=0; i<old_numtask; i++ ) {
+          numtask += abs(strcmp(tasks_mpiexec_host->name,oldtasks[i].name));
+        }
+        if ( numtask == 0 ) {
+            error("%s: no processors left after processing -nolocal flag",__func__);
+        }
+        tasks = Malloc(numtask * sizeof(*tasks));
+        for (i=0,j=0; i<old_numtask; i++ ) {
+            if ( strcmp(tasks_mpiexec_host->name,oldtasks[i].name) ) {
+                memcpy(&tasks[j], &oldtasks[i], sizeof(*tasks));
+                tasks[j].name = strsave(oldtasks[i].name);
+                j++;
+            }
+        }
+        free(oldtasks);
+    } else {
+        tasks_mpiexec_host = &tasks[0];
+    }
+
     /* enforce one process per physical node by strcmp on host name */
     if (cl_args->pernode)
 	cull_nodes(matching_node);
@@ -260,9 +289,10 @@
     if (cl_args->numproc) {
 	if (cl_args->numproc > numtask)
 	    error(
-	     "%s: argument -n specifies %d processors, only %d available%s",
+	     "%s: argument -n specifies %d processors, only %d available%s%s",
 	      __func__, cl_args->numproc, numtask,
-	      cl_args->pernode ? "\n  after processing -pernode flag" : "");
+	      cl_args->pernode ? "\n  after processing -pernode flag" : "",
+	      cl_args->nolocal ? "\n and after processing -nolocal flag" : "");
 	/* just take whatever the user specified, which may be fewer,
 	 * discarding the rest of the tasks */
 	numtask = cl_args->numproc;
@@ -299,7 +329,7 @@
       || cl_args->comm == COMM_MPICH_IB) {
 	struct hostent *he;
 
-	myname = strsave(tasks[0].name);
+	myname = strsave(tasks_mpiexec_host->name);
 	he = gethostbyname(myname);
 	if (!he)
 	    error("%s: gethostbyname cannot find my name %s", __func__, myname);
@@ -311,8 +341,6 @@
 	memcpy(&myaddr.sin_addr, he->h_addr_list[0], sizeof(myaddr.sin_addr));
     }
 
-    /* remember which one holds this mpiexec, even if culled out later */
-    tasks_mpiexec_host = &tasks[0];
 }
 
 /*
--- mpiexec.c.orig	Fri Apr 16 17:01:59 2004
+++ mpiexec.c	Fri Apr 16 12:00:57 2004
@@ -236,6 +236,8 @@
     fprintf(stderr,
       "  -verbose : be verbose about mpiexec operation\n");
     fprintf(stderr,
+      "  -nolocal : do not run any MPI processes on the local machine\n");
+    fprintf(stderr,
       "  -nostdin : do not listen to stdin, allowing process to go into background\n");
     fprintf(stderr,
       "  -allstdin : send stdin to all processes (default just proc #0)\n");
@@ -389,6 +391,8 @@
 	    cl_args->which_stdin = STDIN_ALL;
 	} else if (!strncmp(cp, "pernode", MAX(4,len)))
 	    cl_args->pernode = 1;
+	else if (!strncmp(cp, "nolocal", MAX(4,len)))
+	    cl_args->nolocal = 1;
 	else if (HAVE_COMM_MPICH_GM && !strncmp(cp, "no-shmem", MAX(2,len)))
 	    warning("-no-shmem ignored, use GMPI_SHMEM=0 environment setting");
 	/* keep this after other "n..." items */
--- mpiexec.h.orig	Fri Apr 16 17:02:04 2004
+++ mpiexec.h	Fri Apr 16 11:45:43 2004
@@ -67,6 +67,7 @@
 typedef struct {
     int numproc;                /* -n <numproc> */
     int tview;                  /* -tv */
+    int nolocal;                /* -nolocal */
     int pernode;                /* -pernode */
     const char *config_file;    /* -config <config_file> */
     which_stdin_t which_stdin;  /* -(no|all)stdin */
--- mpiexec.1.orig	Fri Apr 16 17:25:59 2004
+++ mpiexec.1	Fri Apr 16 17:25:14 2004
@@ -128,6 +128,9 @@
 this flag also utilizes only some of the processors allocated to the job
 by pbs.
 .TP
+.B \-nolocal
+Do not run any MPI processes on the local machine.
+.TP
 .B \-transform-hostname \fIsed_expression\fR
 Use an alternate hostname for message passing.  Processes will be
 spawned using a separate namespace for their message passing communications.


More information about the mpiexec mailing list