mpiexec mvapich rank error

Pete Wyckoff pw at osc.edu
Wed Jan 25 17:02:16 EST 2006


pw at osc.edu wrote on Wed, 25 Jan 2006 16:58 -0500:
[..]
> Attached is the current diff of my modified openib SVN tree.

Or you can hack mpiexec to force version = 3, but then
it won't work for mvapich (non-gen2) if you ever care
about that.

	-- Pete
-------------- next part --------------
Index: mpid/ch_gen2/process/mpirun_rsh.c
===================================================================
--- mpid/ch_gen2/process/mpirun_rsh.c	(revision 5183)
+++ mpid/ch_gen2/process/mpirun_rsh.c	(working copy)
@@ -749,7 +749,8 @@
     int id = getpid();
     int str_len;
 
-    str_len = strlen(command_name) + strlen(env) + strlen(wd) + 512;
+    str_len = strlen(command_name) + strlen(env) + strlen(wd)
+              + strlen(mpirun_processes) + 512;
 
     if ((remote_command = malloc(str_len)) == NULL) {
         fprintf(stderr, "Failed to malloc %d bytes for remote_command\n",
Index: mpid/ch_gen2/process/pmgr_client.h
===================================================================
--- mpid/ch_gen2/process/pmgr_client.h	(revision 5183)
+++ mpid/ch_gen2/process/pmgr_client.h	(working copy)
@@ -112,6 +112,6 @@
  * of the spawner, e.g. mpirun_rsh, to check that it understands
  * the version of the executable.
  */
-#define PMGR_VERSION 2
+#define PMGR_VERSION 3
 
 #endif
Index: mpid/ch_gen2/process/pmgr_client_mpirun_rsh.c
===================================================================
--- mpid/ch_gen2/process/pmgr_client_mpirun_rsh.c	(revision 5183)
+++ mpid/ch_gen2/process/pmgr_client_mpirun_rsh.c	(working copy)
@@ -169,10 +169,10 @@
     struct hostent *mpirun_hostent;
 
     char mypid[12];
-    int my_pid_int = (int) getpid();
+    pid_t my_pid_int = (int) getpid();
     int pidlen, mypid_len; 
-    int *ppids = (int*)pallpids;
-    char *allpids=NULL;
+    pid_t *ppids = (int*)pallpids;
+    pid_t *allpids=NULL;
   
     /*
      * Exchange information with the mpirun program. Send it our
@@ -252,8 +252,7 @@
         exit(1);
     }
  
-    sprintf(mypid,"%10d",my_pid_int);
-    mypid_len = strlen(mypid); 			  
+    mypid_len = sizeof(my_pid_int);
   
     if(!mypid_len)
     {
@@ -277,7 +276,7 @@
     }
 
     if (pidlen != 0) {
-      nwritten = write(mpirun_socket, mypid, pidlen);
+        nwritten = write(mpirun_socket, &my_pid_int, pidlen);
       if (nwritten != pidlen) {
         perror("write");
         sleep(2);
@@ -303,10 +302,8 @@
        	tot_nread=0;
        	/* finally, read addresses from all processes */
        	while (tot_nread < pmgr_nprocs*pidlen) {
-	    nread = read(mpirun_socket, 
-		    /* FIXME: assumed unsigned long == 4B on IA32 */
-		    (void*)((unsigned long)allpids+tot_nread), 
-		    (pmgr_nprocs*pidlen)-tot_nread);
+            nread = read(mpirun_socket, (void*)((char *)allpids+tot_nread),
+                    (pmgr_nprocs*pidlen)-tot_nread);
 	    if (nread <= 0) {
 	       	perror("read");
 	       	sleep(2);
@@ -322,18 +319,10 @@
     close(mpirun_socket);
 #endif
 
-#if 0
-    printf("process %d finished with address exchange\n", pmgr_me);
-    fflush(stdout);
-#endif
-  
     if (allpids) {		
          if(allpids) {
-             char tem_pid[12];
              for(i=0;i < pmgr_nprocs; i++) {
-         	strncpy(tem_pid, allpids+(i*pidlen), pidlen);
-         	tem_pid[pidlen]='\0';
-         	ppids[i] = atoi(tem_pid); 
+                 ppids[i] = allpids[i];
              } 
          }
     	free(allpids);	


More information about the mpiexec mailing list