
Figure 12: Three-group pipeline.
Groups 0 and 1 communicate. Groups 1 and 2 communicate. Therefore, group 0 requires one inter-communicator, group 1 requires two inter-communicators, and group 2 requires 1 inter-communicator.
main(int argc, char **argv)
{
MPI_Comm myComm; /* intra-communicator of local sub-group */
MPI_Comm myFirstComm; /* inter-communicator */
MPI_Comm mySecondComm; /* second inter-communicator (group 1 only) */
int membershipKey;
int rank;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
/* User code must generate membershipKey in the range [0, 1, 2] */
membershipKey = rank % 3;
/* Build intra-communicator for local sub-group */
MPI_Comm_split(MPI_COMM_WORLD, membershipKey, rank, &myComm);
/* Build inter-communicators. Tags are hard-coded. */
if (membershipKey == 0)
{ /* Group 0 communicates with group 1. */
MPI_Intercomm_create( myComm, 0, MPI_COMM_WORLD, 1,
1, &myFirstComm);
}
else if (membershipKey == 1)
{ /* Group 1 communicates with groups 0 and 2. */
MPI_Intercomm_create( myComm, 0, MPI_COMM_WORLD, 0,
1, &myFirstComm);
MPI_Intercomm_create( myComm, 0, MPI_COMM_WORLD, 2,
12, &mySecondComm);
}
else if (membershipKey == 2)
{ /* Group 2 communicates with group 1. */
MPI_Intercomm_create( myComm, 0, MPI_COMM_WORLD, 1,
12, &myFirstComm);
}
/* Do work ... */
switch(membershipKey) /* free communicators appropriately */
{
case 1:
MPI_Comm_free(&mySecondComm);
case 0:
case 2:
MPI_Comm_free(&myFirstComm);
break;
}
MPI_Finalize();
}

Figure 13: Three-group ring.
Groups 0 and 1 communicate. Groups 1 and 2 communicate. Groups 0 and 2 communicate. Therefore, each requires two inter-communicators.
main(int argc, char **argv)
{
MPI_Comm myComm; /* intra-communicator of local sub-group */
MPI_Comm myFirstComm; /* inter-communicators */
MPI_Comm mySecondComm;
MPI_Status status;
int membershipKey;
int rank;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
...
/* User code must generate membershipKey in the range [0, 1, 2] */
membershipKey = rank % 3;
/* Build intra-communicator for local sub-group */
MPI_Comm_split(MPI_COMM_WORLD, membershipKey, rank, &myComm);
/* Build inter-communicators. Tags are hard-coded. */
if (membershipKey == 0)
{ /* Group 0 communicates with groups 1 and 2. */
MPI_Intercomm_create( myComm, 0, MPI_COMM_WORLD, 1,
1, &myFirstComm);
MPI_Intercomm_create( myComm, 0, MPI_COMM_WORLD, 2,
2, &mySecondComm);
}
else if (membershipKey == 1)
{ /* Group 1 communicates with groups 0 and 2. */
MPI_Intercomm_create( myComm, 0, MPI_COMM_WORLD, 0,
1, &myFirstComm);
MPI_Intercomm_create( myComm, 0, MPI_COMM_WORLD, 2,
12, &mySecondComm);
}
else if (membershipKey == 2)
{ /* Group 2 communicates with groups 0 and 1. */
MPI_Intercomm_create( myComm, 0, MPI_COMM_WORLD, 0,
2, &myFirstComm);
MPI_Intercomm_create( myComm, 0, MPI_COMM_WORLD, 1,
12, &mySecondComm);
}
/* Do some work ... */
/* Then free communicators before terminating... */
MPI_Comm_free(&myFirstComm);
MPI_Comm_free(&mySecondComm);
MPI_Comm_free(&myComm);
MPI_Finalize();
}
The following procedures exemplify the process by which a user could create name service for building intercommunicators via a rendezvous involving a server communicator, and a tag name selected by both groups.
After all MPI processes execute MPI_INIT, every process calls the example function, Init_server(), defined below. Then, if the new_world returned is NULL, the process getting NULL is required to implement a server function, in a reactive loop, Do_server(). Everyone else just does their prescribed computation, using new_world as the new effective ``global" communicator. One designated process calls Undo_Server() to get rid of the server when it is not needed any longer.
Features of this approach include:
#define INIT_SERVER_TAG_1 666 #define UNDO_SERVER_TAG_1 777The actual server process would commit to running the following code:static int server_key_val;
/* for attribute management for server_comm, copy callback: */ void handle_copy_fn(MPI_Comm *oldcomm, int *keyval, void *extra_state, void *attribute_val_in, void **attribute_val_out, int *flag) { /* copy the handle */ *attribute_val_out = attribute_val_in; *flag = 1; /* indicate that copy to happen */ }
int Init_server(peer_comm, rank_of_server, server_comm, new_world) MPI_Comm peer_comm; int rank_of_server; MPI_Comm *server_comm; MPI_Comm *new_world; /* new effective world, sans server */ { MPI_Comm temp_comm, lone_comm; MPI_Group peer_group, temp_group; int rank_in_peer_comm, size, color, key = 0; int peer_leader, peer_leader_rank_in_temp_comm;
MPI_Comm_rank(peer_comm, &rank_in_peer_comm); MPI_Comm_size(peer_comm, &size);
if ((size < 2) || (0 > rank_of_server) || (rank_of_server >= size)) return (MPI_ERR_OTHER);
/* create two communicators, by splitting peer_comm into the server process, and everyone else */
peer_leader = (rank_of_server + 1) % size; /* arbitrary choice */
if ((color = (rank_in_peer_comm == rank_of_server))) { MPI_Comm_split(peer_comm, color, key, &lone_comm);
MPI_Intercomm_create(lone_comm, 0, peer_comm, peer_leader, INIT_SERVER_TAG_1, server_comm);
MPI_Comm_free(&lone_comm); *new_world = MPI_COMM_NULL; } else { MPI_Comm_Split(peer_comm, color, key, &temp_comm);
MPI_Comm_group(peer_comm, &peer_group); MPI_Comm_group(temp_comm, &temp_group); MPI_Group_translate_ranks(peer_group, 1, &peer_leader, temp_group, &peer_leader_rank_in_temp_comm);
MPI_Intercomm_create(temp_comm, peer_leader_rank_in_temp_comm, peer_comm, rank_of_server, INIT_SERVER_TAG_1, server_comm);
/* attach new_world communication attribute to server_comm: */
/* CRITICAL SECTION FOR MULTITHREADING */ if(server_keyval == MPI_KEYVAL_INVALID) { /* acquire the process-local name for the server keyval */ MPI_keyval_create(handle_copy_fn, NULL, &server_keyval, NULL); }
*new_world = temp_comm;
/* Cache handle of intra-communicator on inter-communicator: */ MPI_Attr_put(server_comm, server_keyval, (void *)(*new_world)); }
return (MPI_SUCCESS); }
int Do_server(server_comm)
MPI_Comm server_comm;
{
void init_queue();
int en_queue(), de_queue(); /* keep triplets of integers
for later matching (fns not shown) */
MPI_Comm comm;
MPI_Status status;
int client_tag, client_source;
int client_rank_in_new_world, pairs_rank_in_new_world;
int buffer[10], count = 1;
void *queue;
init_queue(&queue);
for (;;)
{
MPI_Recv(buffer, count, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG,
server_comm, &status); /* accept from any client */
/* determine client: */
client_tag = status.MPI_TAG;
client_source = status.MPI_SOURCE;
client_rank_in_new_world = buffer[0];
if (client_tag == UNDO_SERVER_TAG_1) /* client that
terminates server */
{
while (de_queue(queue, MPI_ANY_TAG, &pairs_rank_in_new_world,
&pairs_rank_in_server))
;
MPI_Intercomm_free(&server_comm);
break;
}
if (de_queue(queue, client_tag, &pairs_rank_in_new_world,
&pairs_rank_in_server))
{
/* matched pair with same tag, tell them
about each other! */
buffer[0] = pairs_rank_in_new_world;
MPI_Send(buffer, 1, MPI_INT, client_src, client_tag,
server_comm);
buffer[0] = client_rank_in_new_world;
MPI_Send(buffer, 1, MPI_INT, pairs_rank_in_server, client_tag,
server_comm);
}
else
en_queue(queue, client_tag, client_source,
client_rank_in_new_world);
}
}
A particular process would be responsible for ending the server
when it is no longer needed. Its call to Undo_server would
terminate server function.
int Undo_server(server_comm) /* example client that ends server */
MPI_Comm *server_comm;
{
int buffer = 0;
MPI_Send(&buffer, 1, MPI_INT, 0, UNDO_SERVER_TAG_1, *server_comm);
MPI_Intercomm_free(server_comm);
}
The following is a blocking name-service for inter-communication, with same
semantic restrictions as MPI_Intercomm_create, but simplified syntax.
It uses the functionality just defined to create the name service.
int Intercomm_name_create(local_comm, server_comm, tag, comm)
MPI_Comm local_comm, server_comm;
int tag;
MPI_Comm *comm;
{
int error;
int found; /* attribute acquisition mgmt for new_world */
/* comm in server_comm */
void *val;
MPI_Comm new_world;
int buffer[10], rank;
int local_leader = 0;
MPI_Attr_get(server_comm, server_keyval, &val, &found);
new_world = (MPI_Comm)val; /* retrieve cached handle */
MPI_Comm_rank(server_comm, &rank); /* rank in local group */
if (rank == local_leader)
{
buffer[0] = rank;
MPI_Send(&buffer, 1, MPI_INT, 0, tag, server_comm);
MPI_Recv(&buffer, 1, MPI_INT, 0, tag, server_comm);
}
error = MPI_Intercomm_create(local_comm, local_leader, new_world,
buffer[0], tag, comm);
return(error);
}