equivalent of mpirun -nolocal option?
Ron Brightwell
rbbrigh at valeria.mp.sandia.gov
Fri Apr 16 19:15:48 EDT 2004
> > Is there an mpiexec equivalent to the mpirun -nolocal option?
> > I'd like to be able to start a job where the mpiexec process
> > is on a separate host from the MPI application process(es).
>
> Ah, no. It's not even possible to hack via config-file stuff
> for a generic MPI code either, unless you're allowed to recompile
> the code to exit task 0, form a new smaller communicator, and continue.
>
> But you could toss in this option toward the end of get_hosts().
> Just chop off tasks[0] while the name matches the original tasks[0].name
> and move up everybody else in the array, reducing numtasks. I'll
> fold it in to the distro if you get it to work (mostly) and find it useful.
>
Here's a patch for the -nolocal option. It seems to work (mostly). I'm not
sure that I've found it useful yet, but I'll let you know :)
-Ron
-------------- next part --------------
--- get_hosts.c.orig Fri Apr 16 17:01:44 2004
+++ get_hosts.c Fri Apr 16 15:09:07 2004
@@ -250,6 +250,35 @@
free(oldtasks);
}
+ if (cl_args->nolocal) {
+ task_cntrl_t *oldtasks = tasks;
+ int old_numtask = numtask;
+ int j;
+ tasks_mpiexec_host = Malloc( sizeof(*tasks));
+ memcpy( tasks_mpiexec_host, tasks, sizeof(*tasks));
+ tasks_mpiexec_host->name = strsave(tasks->name);
+ if ( cl_args->verbose )
+ printf("removing mpiexec host %s from host list\n",tasks_mpiexec_host->name);
+ numtask = 0;
+ for (i=0; i<old_numtask; i++ ) {
+ numtask += abs(strcmp(tasks_mpiexec_host->name,oldtasks[i].name));
+ }
+ if ( numtask == 0 ) {
+ error("%s: no processors left after processing -nolocal flag",__func__);
+ }
+ tasks = Malloc(numtask * sizeof(*tasks));
+ for (i=0,j=0; i<old_numtask; i++ ) {
+ if ( strcmp(tasks_mpiexec_host->name,oldtasks[i].name) ) {
+ memcpy(&tasks[j], &oldtasks[i], sizeof(*tasks));
+ tasks[j].name = strsave(oldtasks[i].name);
+ j++;
+ }
+ }
+ free(oldtasks);
+ } else {
+ tasks_mpiexec_host = &tasks[0];
+ }
+
/* enforce one process per physical node by strcmp on host name */
if (cl_args->pernode)
cull_nodes(matching_node);
@@ -260,9 +289,10 @@
if (cl_args->numproc) {
if (cl_args->numproc > numtask)
error(
- "%s: argument -n specifies %d processors, only %d available%s",
+ "%s: argument -n specifies %d processors, only %d available%s%s",
__func__, cl_args->numproc, numtask,
- cl_args->pernode ? "\n after processing -pernode flag" : "");
+ cl_args->pernode ? "\n after processing -pernode flag" : "",
+ cl_args->nolocal ? "\n and after processing -nolocal flag" : "");
/* just take whatever the user specified, which may be fewer,
* discarding the rest of the tasks */
numtask = cl_args->numproc;
@@ -299,7 +329,7 @@
|| cl_args->comm == COMM_MPICH_IB) {
struct hostent *he;
- myname = strsave(tasks[0].name);
+ myname = strsave(tasks_mpiexec_host->name);
he = gethostbyname(myname);
if (!he)
error("%s: gethostbyname cannot find my name %s", __func__, myname);
@@ -311,8 +341,6 @@
memcpy(&myaddr.sin_addr, he->h_addr_list[0], sizeof(myaddr.sin_addr));
}
- /* remember which one holds this mpiexec, even if culled out later */
- tasks_mpiexec_host = &tasks[0];
}
/*
--- mpiexec.c.orig Fri Apr 16 17:01:59 2004
+++ mpiexec.c Fri Apr 16 12:00:57 2004
@@ -236,6 +236,8 @@
fprintf(stderr,
" -verbose : be verbose about mpiexec operation\n");
fprintf(stderr,
+ " -nolocal : do not run any MPI processes on the local machine\n");
+ fprintf(stderr,
" -nostdin : do not listen to stdin, allowing process to go into background\n");
fprintf(stderr,
" -allstdin : send stdin to all processes (default just proc #0)\n");
@@ -389,6 +391,8 @@
cl_args->which_stdin = STDIN_ALL;
} else if (!strncmp(cp, "pernode", MAX(4,len)))
cl_args->pernode = 1;
+ else if (!strncmp(cp, "nolocal", MAX(4,len)))
+ cl_args->nolocal = 1;
else if (HAVE_COMM_MPICH_GM && !strncmp(cp, "no-shmem", MAX(2,len)))
warning("-no-shmem ignored, use GMPI_SHMEM=0 environment setting");
/* keep this after other "n..." items */
--- mpiexec.h.orig Fri Apr 16 17:02:04 2004
+++ mpiexec.h Fri Apr 16 11:45:43 2004
@@ -67,6 +67,7 @@
typedef struct {
int numproc; /* -n <numproc> */
int tview; /* -tv */
+ int nolocal; /* -nolocal */
int pernode; /* -pernode */
const char *config_file; /* -config <config_file> */
which_stdin_t which_stdin; /* -(no|all)stdin */
--- mpiexec.1.orig Fri Apr 16 17:25:59 2004
+++ mpiexec.1 Fri Apr 16 17:25:14 2004
@@ -128,6 +128,9 @@
this flag also utilizes only some of the processors allocated to the job
by pbs.
.TP
+.B \-nolocal
+Do not run any MPI processes on the local machine.
+.TP
.B \-transform-hostname \fIsed_expression\fR
Use an alternate hostname for message passing. Processes will be
spawned using a separate namespace for their message passing communications.
More information about the mpiexec
mailing list