additional command line switches / extensions for mpiexec?
Thomas Zeiser
thomas.zeiser at rrze.uni-erlangen.de
Mon Nov 27 16:21:05 EST 2006
On Thu, Nov 23, 2006 at 08:26:12PM -0500, Pete Wyckoff wrote:
> thomas.zeiser at rrze.uni-erlangen.de wrote on Thu, 23 Nov 2006 10:49 +0100:
> >
> > 1) mpiexec already has "-pernode" but thinking of n-way nodes with
> > dual-core CPUs, a switch like "-Npernode <n>" might be very useful
> > (and probably easy to implement, i.e. in get_hosts.c one probably
> > only would have to set nodes[i].availcpu to the correct n)
>
> This sounds like a good suggestion, and pretty easy to implement
> in constrain_nodes() along with how -pernode is implemented. I'll
> stick it in the tree if you code it up (with manpage entry too).
>
please find attached a patch with my implementation of the feature;
"-npernode <nprocs>" is added as new command line feature;
constrain_nodes tries to be smart if different numbers of CPUs are
available on the nodes and takes the minimum of available CPUs and
requested number of processes per node ...
Regards,
thomas
--
Dipl.-Ing. Thomas ZEISER
Regionales Rechenzentrum Erlangen
Martensstr. 1, 91058 Erlangen, GERMANY
-------------- next part --------------
--- mpiexec-0.81+20061120/mpiexec.c 2006-11-20 19:50:19.000000000 +0000
+++ mpiexec-0.81+20061120+npernode/mpiexec.c 2006-11-27 19:26:12.000000000 +0000
@@ -305,6 +305,8 @@
fprintf(stderr,
" -pernode : allocate only one process per compute node\n");
fprintf(stderr,
+ " -npernode <nprocs>: allocate only up to <nprocs> processes per compute node\n");
+ fprintf(stderr,
" -nolocal : do not run any MPI processes on the local node\n");
if (HAVE_SED)
fprintf(stderr,
@@ -468,7 +470,15 @@
cl_args->which_stdin = STDIN_ALL;
} else if (!strncmp(cp, "pernode", MAX(1,len)))
cl_args->pernode = 1;
- else if (!strncmp(cp, "nolocal", MAX(3,len)))
+ else if (!strncmp(cp, "npernode", MAX(3,len))) {
+ long l;
+ cp += len;
+ cp = find_optarg(cp, &argc, &argv, "n");
+ l = strtol(cp, &cr, 10); /* negative value to strtoul is legal! */
+ if (*cr || l <= 0)
+ error("argument -npernode requires positive number of processes per node");
+ cl_args->pernode = l;
+ } else if (!strncmp(cp, "nolocal", MAX(3,len)))
cl_args->nolocal = 1;
else if (HAVE_COMM_MPICH_GM && !strncmp(cp, "no-shmem", MAX(2,len)))
warning("-no-shmem ignored, use GMPI_SHMEM=0 or MX_DISABLE_SHMEM=1"
--- mpiexec-0.81+20061120/get_hosts.c 2006-11-20 19:50:19.000000000 +0000
+++ mpiexec-0.81+20061120+npernode/get_hosts.c 2006-11-27 19:25:36.000000000 +0000
@@ -342,9 +342,13 @@
/* enforce one process per physical node by strcmp on host name */
if (cl_args->pernode) {
for (i=0; i<numnodes; i++) {
- if (nodes[i].availcpu > 0) {
- numleft -= (nodes[i].availcpu - 1);
- nodes[i].availcpu = 1;
+ int cpus2use;
+ /* use at most cl_args->pernode CPUs but be sure not to use more that
+ available! */
+ cpus2use = cl_args->pernode < (int)(nodes[i].availcpu) ? cl_args->pernode : (int)(nodes[i].availcpu);
+ if (cpus2use > 0) {
+ numleft -= (nodes[i].availcpu - cpus2use);
+ nodes[i].availcpu = cpus2use;
}
}
}
@@ -352,11 +356,11 @@
/* only used if there's a problem */
if (cl_args->nolocal) {
if (cl_args->pernode)
- complaint = "-nolocal and -pernode flags";
+ complaint = "-nolocal and -[n]pernode flags";
else
complaint = "-nolocal flag";
} else
- complaint = "-pernode flag";
+ complaint = "-[n]pernode flag";
if (numleft == 0) {
error("%s: no processors left after processing %s", __func__,
--- mpiexec-0.81+20061120/mpiexec.1 2006-11-20 19:50:19.000000000 +0000
+++ mpiexec-0.81+20061120+npernode/mpiexec.1 2006-11-27 19:24:46.000000000 +0000
@@ -125,6 +125,13 @@
level parallelism with MPI between nodes, and threads within a node, assmuming
the code is set up to do that.
.TP
+.B \-npernode\ \fInprocs\fR (SMP only)
+Allocate only\ \fInprocs\fR processes per compute node. For SMP nodes, only\ \fInprocs\fR
+processors will be allocated a job. This flag is used to implement multiple
+level parallelism with MPI between nodes, and threads within a node, assmuming
+the code is set up to do that or to use the available memory bandwidth by just some CPUs.
+\fI-npernode nprocs\fR is a generalization of \fI-pernode\fR.
+.TP
.B \-nolocal (not MPICH/P4)
Do not run any MPI processes on the local compute node. In a batch job, one
of the machines allocated to run a parallel job will run the batch script and
More information about the mpiexec
mailing list