two mpiexec questions on compatability

Christopher Penney cpenney at ford.com
Fri May 3 11:20:30 EDT 2002


I have been using mpiexec under OpenPBS for quite a while.  It's an 
excellent tool, nice work.  I have two questions:

We are converting to PBS Pro 5.2.  When I compile mpiexec it compiles 
and links just fine, but when I try and use it (with -v) I get the error:

resolve_exe: using absolute exe "/apps/radioss/v4/v41n/ENGNV41N_SPMD"
node  0: name = node16r1, cpu = 0
node  1: name = node15r1, cpu = 0
node  2: name = node14r1, cpu = 0
node  3: name = node13r1, cpu = 0
node  4: name = node12r1, cpu = 0
node  5: name = node11r1, cpu = 0
node  6: name = node6r1, cpu = 0
node  7: name = node2r1, cpu = 0
gmpi conf file = /.../cae.ford.com/fs/u/cpenney/.gmpiconf.161
mpiexec: Error: wait_one_task_start: tm_poll remote: tm: system error.
Command exited with non-zero status 1

I have also included as an attachment the output of 'strace' if that helps.

My other question is that I'm curious how you would handle SMP boxes 
with a Myrinet card.  Each process has to be assigned a port number. 
Does mpiexec handle that so it picks on not in use?

Thanks!

    Chris Penney
    Ford Motor Company
    cpenney at ford.com



-------------- next part --------------

execve("/apps/mpiexec/bin/mpiexec", ["/apps/mpiexec/bin/mpiexec", "-kill", "-nostdout", "-verbose", "/apps/radioss/v4/v41n/ENGNV41N_SPMD"], [/* 42 vars */]) = 0
uname({sys="Linux", node="node31r1", ...}) = 0
brk(0)                                  = 0x12002a2c0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2000001e000
open("/etc/ld.so.preload", O_RDONLY)    = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY)      = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=19894, ...}) = 0
mmap(NULL, 19894, PROT_READ, MAP_PRIVATE, 3, 0) = 0x20000020000
close(3)                                = 0
open("/lib/libc.so.6.1", O_RDONLY)      = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0&\220\1\0\0\0\260\222"..., 1024) = 1024
fstat(3, {st_mode=S_IFREG|0755, st_size=10136136, ...}) = 0
mmap(NULL, 1614528, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0x2000002e000
mprotect(0x20000196000, 139968, PROT_NONE) = 0
mmap(0x2000019e000, 90112, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED, 3, 0x160000) = 0x2000019e000
mmap(0x200001b4000, 17088, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x200001b4000
close(3)                                = 0
munmap(0x20000020000, 19894)            = 0
fstat(0, {st_mode=S_IFREG|0600, st_size=245, ...}) = 0
fstat(1, {st_mode=S_IFREG|0600, st_size=1765, ...}) = 0
fstat(2, {st_mode=S_IFREG|0600, st_size=1821, ...}) = 0
brk(0)                                  = 0x12002a2c0
brk(0x12002a320)                        = 0x12002a320
brk(0x12002c000)                        = 0x12002c000
access("/apps/radioss/v4/v41n/ENGNV41N_SPMD", X_OK) = 0
stat("/apps/radioss/v4/v41n/ENGNV41N_SPMD", {st_mode=S_IFREG|0755, st_size=9190464, ...}) = 0
fstat(1, {st_mode=S_IFREG|0600, st_size=2189, ...}) = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x20000020000
getxuid()                               = 637911
socket(PF_UNIX, SOCK_STREAM, 0)         = 3
connect(3, {sin_family=AF_UNIX, path="                                                                                       /var/run/.nscd_socket"}, 110) = 0
write(3, "\2\0\0\0\1\0\0\0\7\0\0\0", 12) = 12
write(3, "637911\0", 7)                 = 7
read(3, "\340q \0\1\0\0\0\10\0\0\0\0\0\0\0\6\0\0\0\0\0\0\0\327\273"..., 56) = 56
read(3, "cpenney\0-DCE-\0Christopher Penney"..., 89) = 89
close(3)                                = 0
gettimeofday({1020438977, 209704}, NULL) = 0
getxpid()                               = 13549
open("/etc/resolv.conf", O_RDONLY)      = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=143, ...}) = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x20000022000
read(3, "search srl.ford.com pd3.ford.com"..., 8192) = 143
read(3, "", 8192)                       = 0
close(3)                                = 0
munmap(0x20000022000, 8192)             = 0
socket(PF_UNIX, SOCK_STREAM, 0)         = 3
connect(3, {sin_family=AF_UNIX, path="                                                                                       /var/run/.nscd_socket"}, 110) = 0
write(3, "\2\0\0\0\4\0\0\0\n\0\0\0", 12) = 12
write(3, "localhost\0", 10)             = 10
read(3, "\340q \0\1\0\0\0\26\0\0\0\0\0\0\0\1\0\0\0\0\0\0\0\2\0\0"..., 48) = 48
readv(3, ptrace: umoven: Input/output error
0x11ffff520, 3)                = 30
read(3, "localhost\0", 10)              = 10
close(3)                                = 0
socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 3
setsockopt(3, SOL_SOCKET, SO_LINGER, [1], 8) = 0
connect(3, {sin_family=AF_INET, sin_port=htons(15003), sin_addr=inet_addr("127.0.0.1")}}, 16) = 0
write(3, "+2+22+11162.beowulf+83FD580293+1"..., 38) = 38
select(1024, [3], NULL, NULL, {2592000, 0}) = 1 (in [3], left {2592000, 0})
read(3, "+2+2+0+1+8+0+1+2+3+4+5+6+72+1116"..., 1024) = 45
close(3)                                = 0
open("/etc/nsswitch.conf", O_RDONLY)    = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=315, ...}) = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x20000022000
read(3, "#\n# /etc/nsswitch.conf\n#\n\npasswd"..., 8192) = 315
brk(0x12002e000)                        = 0x12002e000
read(3, "", 8192)                       = 0
close(3)                                = 0
munmap(0x20000022000, 8192)             = 0
open("/etc/ld.so.cache", O_RDONLY)      = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=19894, ...}) = 0
mmap(NULL, 19894, PROT_READ, MAP_PRIVATE, 3, 0) = 0x20000022000
close(3)                                = 0
open("/lib/libnss_files.so.2", O_RDONLY) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0&\220\1\0\0\0\2408\0"..., 1024) = 1024
fstat(3, {st_mode=S_IFREG|0755, st_size=554225, ...}) = 0
mmap(NULL, 123808, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0x200001ba000
mprotect(0x200001c8000, 66464, PROT_NONE) = 0
mmap(0x200001ca000, 65536, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED, 3, 0) = 0x200001ca000
close(3)                                = 0
munmap(0x20000022000, 19894)            = 0
open("/etc/services", O_RDONLY)         = 3
fcntl(3, F_GETFD)                       = 0
fcntl(3, F_SETFD, FD_CLOEXEC)           = 0
fstat(3, {st_mode=S_IFREG|0644, st_size=17705, ...}) = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x20000022000
read(3, "# /etc/services:\n# $Id: services"..., 8192) = 8192
read(3, "ell\t\t514/tcp\t\tcmd\t\t# no password"..., 8192) = 8192
read(3, "log\t\t5354/tcp\t\t\t# noclogd with T"..., 8192) = 1321
read(3, "", 8192)                       = 0
close(3)                                = 0
munmap(0x20000022000, 8192)             = 0
open("/etc/services", O_RDONLY)         = 3
fcntl(3, F_GETFD)                       = 0
fcntl(3, F_SETFD, FD_CLOEXEC)           = 0
fstat(3, {st_mode=S_IFREG|0644, st_size=17705, ...}) = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x20000022000
read(3, "# /etc/services:\n# $Id: services"..., 8192) = 8192
read(3, "ell\t\t514/tcp\t\tcmd\t\t# no password"..., 8192) = 8192
read(3, "log\t\t5354/tcp\t\t\t# noclogd with T"..., 8192) = 1321
read(3, "", 8192)                       = 0
close(3)                                = 0
munmap(0x20000022000, 8192)             = 0
open("/etc/services", O_RDONLY)         = 3
fcntl(3, F_GETFD)                       = 0
fcntl(3, F_SETFD, FD_CLOEXEC)           = 0
fstat(3, {st_mode=S_IFREG|0644, st_size=17705, ...}) = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x20000022000
read(3, "# /etc/services:\n# $Id: services"..., 8192) = 8192
read(3, "ell\t\t514/tcp\t\tcmd\t\t# no password"..., 8192) = 8192
read(3, "log\t\t5354/tcp\t\t\t# noclogd with T"..., 8192) = 1321
read(3, "", 8192)                       = 0
close(3)                                = 0
munmap(0x20000022000, 8192)             = 0
open("/etc/services", O_RDONLY)         = 3
fcntl(3, F_GETFD)                       = 0
fcntl(3, F_SETFD, FD_CLOEXEC)           = 0
fstat(3, {st_mode=S_IFREG|0644, st_size=17705, ...}) = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x20000022000
read(3, "# /etc/services:\n# $Id: services"..., 8192) = 8192
read(3, "ell\t\t514/tcp\t\tcmd\t\t# no password"..., 8192) = 8192
read(3, "log\t\t5354/tcp\t\t\t# noclogd with T"..., 8192) = 1321
read(3, "", 8192)                       = 0
close(3)                                = 0
munmap(0x20000022000, 8192)             = 0
open("/etc/services", O_RDONLY)         = 3
fcntl(3, F_GETFD)                       = 0
fcntl(3, F_SETFD, FD_CLOEXEC)           = 0
fstat(3, {st_mode=S_IFREG|0644, st_size=17705, ...}) = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x20000022000
read(3, "# /etc/services:\n# $Id: services"..., 8192) = 8192
read(3, "ell\t\t514/tcp\t\tcmd\t\t# no password"..., 8192) = 8192
read(3, "log\t\t5354/tcp\t\t\t# noclogd with T"..., 8192) = 1321
read(3, "", 8192)                       = 0
close(3)                                = 0
munmap(0x20000022000, 8192)             = 0
open("/etc/services", O_RDONLY)         = 3
fcntl(3, F_GETFD)                       = 0
fcntl(3, F_SETFD, FD_CLOEXEC)           = 0
fstat(3, {st_mode=S_IFREG|0644, st_size=17705, ...}) = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x20000022000
read(3, "# /etc/services:\n# $Id: services"..., 8192) = 8192
read(3, "ell\t\t514/tcp\t\tcmd\t\t# no password"..., 8192) = 8192
read(3, "log\t\t5354/tcp\t\t\t# noclogd with T"..., 8192) = 1321
read(3, "", 8192)                       = 0
close(3)                                = 0
munmap(0x20000022000, 8192)             = 0
open("/etc/services", O_RDONLY)         = 3
fcntl(3, F_GETFD)                       = 0
fcntl(3, F_SETFD, FD_CLOEXEC)           = 0
fstat(3, {st_mode=S_IFREG|0644, st_size=17705, ...}) = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x20000022000
read(3, "# /etc/services:\n# $Id: services"..., 8192) = 8192
read(3, "ell\t\t514/tcp\t\tcmd\t\t# no password"..., 8192) = 8192
read(3, "log\t\t5354/tcp\t\t\t# noclogd with T"..., 8192) = 1321
read(3, "", 8192)                       = 0
close(3)                                = 0
munmap(0x20000022000, 8192)             = 0
open("/etc/pbs.conf", O_RDONLY)         = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=119, ...}) = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x20000022000
read(3, "PBS_EXEC=/opt/pbs/v520\nPBS_HOME="..., 8192) = 119
read(3, "", 8192)                       = 0
close(4832020848)                       = -1 EBADF (Bad file descriptor)
getxuid()                               = 637911
socket(PF_UNIX, SOCK_STREAM, 0)         = 4
connect(4, {sin_family=AF_UNIX, path="                                                                                       /var/run/.nscd_socket"}, 110) = 0
write(4, "\2\0\0\0\1\0\0\0\7\0\0\0", 12) = 12
write(4, "637911\0", 7)                 = 7
read(4, "\340q \0\1\0\0\0\10\0\0\0\0\0\0\0\6\0\0\0\0\0\0\0\327\273"..., 56) = 56
read(4, "cpenney\0-DCE-\0Christopher Penney"..., 89) = 89
close(4)                                = 0
socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 4
socket(PF_UNIX, SOCK_STREAM, 0)         = 5
connect(5, {sin_family=AF_UNIX, path="                                                                                       /var/run/.nscd_socket"}, 110) = 0
write(5, "\2\0\0\0\4\0\0\0\10\0\0\0", 12) = 12
write(5, "beowulf\0", 8)                = 8
read(5, "\340q \0\1\0\0\0\10\0\0\0\0\0\0\0\1\0\0\0\0\0\0\0\2\0\0"..., 48) = 48
readv(5, ptrace: umoven: Input/output error
0x11ffff590, 3)                = 16
read(5, "beowulf.srl.ford.com\0", 21)   = 21
close(5)                                = 0
connect(4, {sin_family=AF_INET, sin_port=htons(15001), sin_addr=inet_addr("128.5.192.98")}}, 16) = 0
pipe([5, 0])                            = 5
vfork()                                 = 13550
close(6)                                = 0
read(5, "\0\0\0\0", 4)                  = 4
close(5)                                = 0
wait4(13550, [WIFEXITED(s) && WEXITSTATUS(s) == 0], 0, NULL) = 13550
--- SIGCHLD (Child exited) ---
write(4, "+2+12+19+7cpenney2+11162.beowulf"..., 57) = 57
select(1024, [4], NULL, NULL, {10800, 0}) = 1 (in [4], left {10800, 0})
read(4, "+2+1+0+0+6+1+22+11162.beowulf+12"..., 1024) = 141
write(4, "+2+12+59+7cpenney", 17)       = 17
read(4, "", 1)                          = 0
close(4)                                = 0
open("/.../cae.ford.com/fs/u/cpenney/.gmpiconf.162", O_WRONLY|O_CREAT|O_TRUNC, 0666) = 4
fstat(4, {st_mode=S_IFREG|0644, st_size=0, ...}) = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x20000024000
write(4, "8\nnode31r1 2 0\nnode30r1 2 0\nnode"..., 106) = 106
close(4)                                = 0
munmap(0x20000024000, 8192)             = 0
getcwd("/p/cpenney/radioss/test", 4095) = 24
brk(0x120038000)                        = 0x120038000
write(1, "resolve_exe: using absolute exe "..., 404resolve_exe: using absolute exe "/apps/radioss/v4/v41n/ENGNV41N_SPMD"
node  0: name = node31r1, cpu = 0
node  1: name = node30r1, cpu = 0
node  2: name = node25r1, cpu = 0
node  3: name = node24r1, cpu = 0
node  4: name = node23r1, cpu = 0
node  5: name = node22r1, cpu = 0
node  6: name = node21r1, cpu = 0
node  7: name = node20r1, cpu = 0
gmpi conf file = /.../cae.ford.com/fs/u/cpenney/.gmpiconf.162
) = 404
getrlimit(RLIMIT_NOFILE, {rlim_cur=1024, rlim_max=1024}) = 0
socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 4
listen(4, 1024)                         = 0
getsockname(4, {sin_family=AF_INET, sin_port=htons(35398), sin_addr=inet_addr("0.0.0.0")}}, [16]) = 0
fork()                                  = 13551
close(0)                                = 0
close(4)                                = 0
close(-1)                               = -1 EBADF (Bad file descriptor)
close(-1)                               = -1 EBADF (Bad file descriptor)
rt_sigaction(SIGHUP, {0x120002280, [], 0}, NULL, 8, 0xffffffffffffffff) = 0
rt_sigaction(SIGINT, {0x120002280, [], 0}, NULL, 8, 0xffffffffffffffff) = 0
rt_sigaction(SIGTERM, {0x120002280, [], 0}, NULL, 8, 0xffffffffffffffff) = 0
socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 0
setsockopt(0, SOL_SOCKET, SO_LINGER, [1], 8) = 0
connect(0, {sin_family=AF_INET, sin_port=htons(15003), sin_addr=inet_addr("127.0.0.1")}}, 16) = 0
brk(0x12003a000)                        = 0x12003a000
write(0, "+2+22+11162.beowulf+83FD580293+1"..., 1000) = 1000
write(0, "3+155PBS_O_PATH=/local/bin:/apps"..., 424) = 424
write(0, "+2+22+11162.beowulf+83FD580293+1"..., 1000) = 1000
write(0, "3+155PBS_O_PATH=/local/bin:/apps"..., 396) = 396
write(0, "+2+22+11162.beowulf+83FD580293+1"..., 1000) = 1000
write(0, "3+155PBS_O_PATH=/local/bin:/apps"..., 396) = 396
write(0, "+2+22+11162.beowulf+83FD580293+1"..., 1000) = 1000
write(0, "3+155PBS_O_PATH=/local/bin:/apps"..., 396) = 396
write(0, "+2+22+11162.beowulf+83FD580293+1"..., 1000) = 1000
write(0, "3+155PBS_O_PATH=/local/bin:/apps"..., 396) = 396
write(0, "+2+22+11162.beowulf+83FD580293+1"..., 1000) = 1000
write(0, "3+155PBS_O_PATH=/local/bin:/apps"..., 396) = 396
write(0, "+2+22+11162.beowulf+83FD580293+1"..., 1000) = 1000
write(0, "3+155PBS_O_PATH=/local/bin:/apps"..., 396) = 396
write(0, "+2+22+11162.beowulf+83FD580293+1"..., 1000) = 1000
write(0, "3+155PBS_O_PATH=/local/bin:/apps"..., 396) = 396
select(1024, [0], NULL, NULL, {2592000, 0}) = 1 (in [0], left {2591993, 978928})
read(0, "+2+23+999+25+17000", 1024)     = 18
write(2, "mpiexec: Error: ", 16mpiexec: Error: )        = 16
write(2, "wait_one_task_start: tm_poll rem"..., 35wait_one_task_start: tm_poll remote) = 35
write(2, ": tm: ", 6: tm: )                   = 6
write(2, "system error.\n", 14system error.
)         = 14
munmap(0x20000020000, 8192)             = 0
exit(1)                                 = ?



More information about the mpiexec mailing list