Patch for MVAPICH-1.0beta
Frank Mietke
frank.mietke at informatik.tu-chemnitz.de
Mon Feb 4 07:44:17 EST 2008
Hi there,
due to a change of the protocol version in MVAPICH-1.0beta from 5 to 6. I've
made the following changes to ib.c
Hope this helps.
Best Regards,
Frank
--
Dipl.-Inf. Frank Mietke | Fakultätsrechen- und Informationszentrum
Tel.: 0371 - 531 - 35538 | Fak. für Informatik
Fax: 0371 - 531 8 35538 | TU-Chemnitz
Key-ID: 60F59599 | frank.mietke at informatik.tu-chemnitz.de
-------------- next part --------------
--- ib_old.c 2008-01-28 13:47:52.000000000 +0100
+++ ib_new.c 2008-02-01 14:00:51.000000000 +0100
@@ -28,10 +28,13 @@
static int version = -1;
static int is_homogeneous = 1;
static char *address = 0;
+static int *hca_type_arr = 0;
+static int first_rank = -1;
+static const int hca_type_len = sizeof(int);
static int address_size = 0;
static char *pids = 0;
static int pids_size = 0;
-static int phase = 0; /* for two-phase version 5 */
+static int phase = 0; /* for two-phase version 5 and 6*/
/* state of all the sockets */
static int num_waiting_to_accept; /* first accept all numtasks */
@@ -170,6 +173,23 @@
* pids[] # <pidlen> bytes
* Write back personalized out_addrs[] and full pids[].
*
+ * Version 6:
+ * It's 2 phase based like version 5 but there are some changes and
+ * additions to the prior version.
+ * First phase distributes hostids and hca_types:
+ * version # 6
+ * rank # 0..np-1
+ * hostidlen # 4 bytes
+ * hostid # <hostidlen> bytes
+ * hca_type # 4 bytes
+ * Write back whether hca_types are homogeneous or not and send the
+ * entire hostid[] array. Keep the fds open, go to phase 2 and gather:
+ * addrlen # 4 bytes, could be 0
+ * addrs[] # <addrlen> bytes
+ * pidlen # 4 bytes
+ * pids[] # <pidlen> bytes
+ * Write back personalized out_addrs[] and full pids[].
+ *
* Return negative on error, or new rank number for success.
*/
static int read_ib_one(int fd)
@@ -179,17 +199,25 @@
int j, ret = -1;
pid_t pidlen;
- if (version == 5 && phase == 1) {
+ if ((version == 5 || version == 6) && phase == 1) {
/* no version again on second phase */
testvers = version;
} else {
- if (read_full_ret(fd, &testvers, sizeof(int)) != sizeof(int))
- goto out;
+ if (read_full_ret(fd, &testvers, sizeof(int)) != sizeof(int))
+ goto out;
+ }
+ if (version == 6 && phase == 1){
+ int i;
+ for (i = 0; i < numtasks; i++)
+ if (fds[i] == fd)
+ rank = i;
+ }
+ else{
+ if (read_full_ret(fd, &rank, sizeof(int)) != sizeof(int))
+ goto out;
}
- if (read_full_ret(fd, &rank, sizeof(int)) != sizeof(int))
- goto out;
if (read_full_ret(fd, &addrlen, sizeof(int)) != sizeof(int))
- goto out;
+ goto out;
non_versioned_092 = 0;
if (rank == 32 + numtasks * 8) {
@@ -219,11 +247,11 @@
if (version == -1) {
version = testvers;
- if (!(version == 1 || version == 2 || version == 3 || version == 5)) {
+ if (!(version == 1 || version == 2 || version == 3 || version == 5 || version == 6)) {
warning(
"%s: protocol version %d not known, but might still work",
__func__, version);
- version = 5; /* guess the latest still works */
+ version = 6; /* guess the latest still works */
}
debug(1, "%s: version %d startup%s", __func__, version,
non_versioned_092 ? " (unversioned)" : "");
@@ -249,6 +277,9 @@
error("%s: wrong address size from rank %d, got %d, expected %d",
__func__, rank, addrlen, address_size);
}
+ /* New to protocol version 6 of MVAPICH-1.0 */
+ if (!hca_type_arr)
+ hca_type_arr = (int *) malloc(hca_type_len * numtasks);
if (non_versioned_092) {
/* push back the bit we accidentally read in guessing the version */
@@ -262,8 +293,16 @@
!= address_size)
goto out;
}
+ if (version == 6 && phase == 0){
+ if (read_full_ret(fd, &hca_type_arr[rank], hca_type_len) != hca_type_len)
+ goto out;
+ if (first_rank == -1)
+ first_rank = rank;
+ if (hca_type_arr[first_rank] != hca_type_arr[rank])
+ is_homogeneous = 0;
+ }
- if (version == 3 || (version == 5 && phase == 1)) {
+ if (version == 3 || ((version == 5 || version == 6) && phase == 1)) {
read_full(fd, &pidlen, sizeof(pidlen));
if (!pids) {
pids_size = pidlen;
@@ -367,23 +406,35 @@
}
free(pids);
}
- } else if (version == 5) {
+ } else if (version == 5 || version == 6) {
if (phase == 0) {
+ /* Protocol version 6 in MVAPICH-1.0 adds checking of HCA types whether
+ * they are homogeneous. */
/* These are actually the hostids, in mvapich parlance. Next
* phase will be the personalized addresses. */
for (i=0; i<numtasks; i++) {
+ if (version == 6)
+ if (write_full(fds[i], &is_homogeneous, sizeof(int)) < 0)
+ error_errno("%s: write homogeneous flag to rank %d", __func__, i);
if (write_full(fds[i], address, numtasks * address_size) < 0)
error_errno("%s: write addresses to rank %d", __func__, i);
}
phase = 1;
- for (i=0; i<numtasks; i++) {
- close(fds[i]);
- fds[i] = -1;
- }
+ if (version == 5){
+ for (i=0; i<numtasks; i++) {
+ close(fds[i]);
+ fds[i] = -1;
+ }
+ }
+ free(hca_type_arr);
+ hca_type_arr = NULL;
address_size = 0;
free(address);
address = NULL;
- num_waiting_to_accept = numtasks;
+ if (version == 6)
+ num_waiting_to_read = numtasks;
+ else
+ num_waiting_to_accept = numtasks;
goto next_phase;
} else if (phase == 1) {
/*
@@ -556,7 +607,8 @@
goto out;
}
- /*
+ if (!(version == 6 && phase == 1)){
+ /*
* If there's a new connection to accept, do so and add it to the
* poll list for later reading.
*/
@@ -591,6 +643,7 @@
#endif
++num_waiting_to_read;
}
+ }
/*
* Poll for something to read.
@@ -614,7 +667,6 @@
error_errno("%s: select", __func__);
for (fd=0; fd <= fdmax; fd++) {
if (FD_ISSET(fd, &trfs)) {
- FD_CLR(fd, &rfs);
#endif
--num_waiting_to_read;
++ret;
@@ -622,12 +674,20 @@
num_waiting_to_read);
rank = read_ib_one(fd);
+#ifndef HAVE_POLL
+ /* This ensures that the socket descriptors survive phase 0 of version 6. */
+ if (version != 6)
+ FD_CLR(fd, &rfs);
+#endif
+
if (rank < 0) {
close(fd);
ret = rank;
goto out; /* let obit poll catch it later */
}
+ if (version == 6 && phase == 1)
+ continue;
/* rank checked in already? */
if (fds[rank] != -1)
error("%s: rank %d checked in twice", __func__, rank);
More information about the mpiexec
mailing list