mpiexec mvapich rank error
Pete Wyckoff
pw at osc.edu
Wed Jan 25 17:02:16 EST 2006
pw at osc.edu wrote on Wed, 25 Jan 2006 16:58 -0500:
[..]
> Attached is the current diff of my modified openib SVN tree.
Or you can hack mpiexec to force version = 3, but then
it won't work for mvapich (non-gen2) if you ever care
about that.
-- Pete
-------------- next part --------------
Index: mpid/ch_gen2/process/mpirun_rsh.c
===================================================================
--- mpid/ch_gen2/process/mpirun_rsh.c (revision 5183)
+++ mpid/ch_gen2/process/mpirun_rsh.c (working copy)
@@ -749,7 +749,8 @@
int id = getpid();
int str_len;
- str_len = strlen(command_name) + strlen(env) + strlen(wd) + 512;
+ str_len = strlen(command_name) + strlen(env) + strlen(wd)
+ + strlen(mpirun_processes) + 512;
if ((remote_command = malloc(str_len)) == NULL) {
fprintf(stderr, "Failed to malloc %d bytes for remote_command\n",
Index: mpid/ch_gen2/process/pmgr_client.h
===================================================================
--- mpid/ch_gen2/process/pmgr_client.h (revision 5183)
+++ mpid/ch_gen2/process/pmgr_client.h (working copy)
@@ -112,6 +112,6 @@
* of the spawner, e.g. mpirun_rsh, to check that it understands
* the version of the executable.
*/
-#define PMGR_VERSION 2
+#define PMGR_VERSION 3
#endif
Index: mpid/ch_gen2/process/pmgr_client_mpirun_rsh.c
===================================================================
--- mpid/ch_gen2/process/pmgr_client_mpirun_rsh.c (revision 5183)
+++ mpid/ch_gen2/process/pmgr_client_mpirun_rsh.c (working copy)
@@ -169,10 +169,10 @@
struct hostent *mpirun_hostent;
char mypid[12];
- int my_pid_int = (int) getpid();
+ pid_t my_pid_int = (int) getpid();
int pidlen, mypid_len;
- int *ppids = (int*)pallpids;
- char *allpids=NULL;
+ pid_t *ppids = (int*)pallpids;
+ pid_t *allpids=NULL;
/*
* Exchange information with the mpirun program. Send it our
@@ -252,8 +252,7 @@
exit(1);
}
- sprintf(mypid,"%10d",my_pid_int);
- mypid_len = strlen(mypid);
+ mypid_len = sizeof(my_pid_int);
if(!mypid_len)
{
@@ -277,7 +276,7 @@
}
if (pidlen != 0) {
- nwritten = write(mpirun_socket, mypid, pidlen);
+ nwritten = write(mpirun_socket, &my_pid_int, pidlen);
if (nwritten != pidlen) {
perror("write");
sleep(2);
@@ -303,10 +302,8 @@
tot_nread=0;
/* finally, read addresses from all processes */
while (tot_nread < pmgr_nprocs*pidlen) {
- nread = read(mpirun_socket,
- /* FIXME: assumed unsigned long == 4B on IA32 */
- (void*)((unsigned long)allpids+tot_nread),
- (pmgr_nprocs*pidlen)-tot_nread);
+ nread = read(mpirun_socket, (void*)((char *)allpids+tot_nread),
+ (pmgr_nprocs*pidlen)-tot_nread);
if (nread <= 0) {
perror("read");
sleep(2);
@@ -322,18 +319,10 @@
close(mpirun_socket);
#endif
-#if 0
- printf("process %d finished with address exchange\n", pmgr_me);
- fflush(stdout);
-#endif
-
if (allpids) {
if(allpids) {
- char tem_pid[12];
for(i=0;i < pmgr_nprocs; i++) {
- strncpy(tem_pid, allpids+(i*pidlen), pidlen);
- tem_pid[pidlen]='\0';
- ppids[i] = atoi(tem_pid);
+ ppids[i] = allpids[i];
}
}
free(allpids);
More information about the mpiexec
mailing list