Exemple

/**
char valC[100] = {'c', 'a', 'v'};
ex:Se considera doua procese,

cu rangurile 0, 1. Sa se scrie
un cod MPI in care:
- rangul 0 trimite o valoare
tip double, int, char, etc, catre rangul
1;
- rangul 0 trimite un vector de
1;
*/
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <math.h>
int getRank(){
int rank;
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
return rank;
}
int getSize(){
int size;
MPI_Comm_size(MPI_COMM_WORLD,&size);
return size;
}
int main(int argc, char** argv){
MPI_Status status;
MPI_Init(&argc,&argv);
//a)
if(getRank() == 0){
//proc 0 sent the values, one by
one
int valI = 434;
double valD = 32.323;
char valC = 'c';
MPI_Send(&valI,1,MPI_INT,1,1,MPI_COMM_WO
RLD);
printf("processor %d sent
%d\n",getRank(),valI);
MPI_Send(&valD,1,MPI_DOUBLE,1,2,MPI_COMM
_WORLD);
%f\n",getRank(),valD);
MPI_Send(&valC,1,MPI_CHAR,1,3,MPI_COMM_W
ORLD);
%c\n",getRank(),valC);
}
if(getRank() == 1){
//process 1 receive the values,
one by one
int valI;
double valD;
char valC;
MPI_Recv(&valI,1,MPI_INT,0,1,MPI_COMM_WO
RLD,&status);
printf("processor %d got
MPI_Recv(&valD,1,MPI_DOUBLE,0,2,MPI_COMM
_WORLD,&status);
MPI_Recv(&valC,1,MPI_CHAR,0,3,MPI_COMM_W
ORLD,&status);
}
//b)
if(getRank() == 0){
one
int i;
int valI[100] = {434, 32, 123};
double valD[100] = {32.323, 1.4,
14.23};
//sends integers array

MPI_Send(&valI,100,MPI_INT,1,1,MPI_COMM_
WORLD);
{",getRank());
for(i = 0; i < 3; i++){
if(i == 2)
printf("%d", valI[i]);
else
printf("%d, ", valI[i]);
}
printf("}\n");
//sends doubles array
MPI_Send(&valD,100,MPI_DOUBLE,1,2,MPI_CO
MM_WORLD);
{",getRank());
for(i = 0; i < 3; i++){
if(i == 2)
printf("%f", valD[i]);
else
printf("%f, ", valD[i]);
}
printf("}\n");
//sends chars array
MPI_Send(&valC,100,MPI_CHAR,1,3,MPI_COMM
_WORLD);
{",getRank());
for(i = 0; i < 3; i++){
if(i == 2)
printf("%c", valC[i]);
else
printf("%c, ", valC[i]);
}
printf("}\n");
}
if(getRank() == 1){
one by one
int i;
int valI[100];
double valD[100];
char valC[100];
//receive array of integers
MPI_Recv(&valI,100,MPI_INT,0,1,MPI_COMM_
WORLD,&status);
{",getRank());
for(i = 0; i < 3; i++){
if(i == 2)
else
}
printf("}\n");
//receive array of doubles
MPI_Recv(&valD,100,MPI_DOUBLE,0,2,MPI_CO
MM_WORLD,&status);
{",getRank());
for(i = 0; i < 3; i++){
if(i == 2)
else
}
printf("}\n");
//receive array of chars
MPI_Recv(&valC,100,MPI_CHAR,0,3,MPI_COMM
_WORLD,&status);
{",getRank());
for(i = 0; i < 3; i++){
if(i == 2)
else
}
printf("}\n");
}
MPI_Finalize();
return 0;
}
/**
ex: Se considera 3 procese, cu
rangurile 0, 1 si 2.
Procesele 1 si 2 detin fiecare
cate un vector de tip double, de
lungime N, care sunt trimise
rangului 0. Acesta scrie datele
intr-un fisier.
*/
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <math.h>
#define N 100
int getRank(){
int rank;
return rank;
}
int getSize(){
int size;
return size;
}
MPI_Status status;
FILE* f;
f = fopen("ex2_Out.txt", "w");
//b)
if(getRank() == 1){
//proc 1 sent the array
int i;
double valD1[N] = {434.3, 32.12,
123.76};
MPI_Send(&valD1,N,MPI_DOUBLE,0,1,MPI_COM
M_WORLD);
{",getRank());
for(i = 0; i < 3; i++){
if(i == 2)
printf("%f", valD1[i]);
else
printf("%f, ", valD1[i]);
}
printf("}\n");
}
if(getRank() == 2){
//proc 2 sends the array
int i;
double valD2[N] = {32.323, 1.4,
14.23};
MPI_Send(&valD2,N,MPI_DOUBLE,0,1,MPI_COM
M_WORLD);
{",getRank());
for(i = 0; i < 3; i++){
if(i == 2)
printf("%f", valD2[i]);
else
printf("%f, ", valD2[i]);

}
printf("}\n");
}
if(getRank() == 0){
//process 0 receive the the arrays
and writes them in file
int i;
double valD1[100];
double valD2[100];
MPI_Recv(&valD1,100,MPI_DOUBLE,1,1,MPI_C
OMM_WORLD,&status);
fprintf(f,"processor %d writes
", getRank());
for(i = 0; i < 3; i++){
fprintf(f,"%f ",valD1[i]);
}
fprintf(f," from processor
1\n");
MPI_Recv(&valD2,100,MPI_DOUBLE,2,1,MPI_C
OMM_WORLD,&status);
fprintf(f,"processor %d writes
", getRank());
for(i = 0; i < 3; i++){
fprintf(f,"%f ",valD2[i]);
}
fprintf(f," from processor
2\n");
MPI_Send(&valD,1,MPI_DOUBLE,1,2,MPI_COMM
_WORLD);
MPI_Send(&valC,1,MPI_CHAR,1,3,MPI_COMM_W
ORLD);
}
if(getRank() == 1){
one by one
int valI;
double valD;
char valC;
MPI_Recv(&valI,1,MPI_INT,0,1,MPI_COMM_WO
RLD,&status);
MPI_Recv(&valD,1,MPI_DOUBLE,0,2,MPI_COMM
_WORLD,&status);
MPI_Recv(&valC,1,MPI_CHAR,0,3,MPI_COMM_W
ORLD,&status);
}
//b)
}
fclose(f);
MPI_Finalize();
return 0;
}
/**
ex:Se considera doua procese,
cu rangurile 0, 1. Sa se scrie
un cod MPI in care:
- rangul 0 trimite o valoare
1;
1;
*/
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <math.h>
else
MPI_Send(&valI,1,MPI_INT,1,1,MPI_COMM_WO
RLD);
if(getRank() == 0){
}
printf("}\n");
}
if(getRank() == 1){
one by one
int i;
int valI[100];
double valD[100];
char valC[100];
//receive array of integers
MPI_Recv(&valI,100,MPI_INT,0,1,MPI_COMM_
WORLD,&status);
{",getRank());
for(i = 0; i < 3; i++){
if(i == 2)
else
}
printf("}\n");
MPI_Recv(&valD,100,MPI_DOUBLE,0,2,MPI_CO
MM_WORLD,&status);
{",getRank());
for(i = 0; i < 3; i++){
if(i == 2)
else
}
printf("}\n");
one
//receive array of chars
int i;
int valI[100] = {434, 32, 123};
double valD[100] = {32.323, 1.4,
14.23};
char valC[100] = {'c', 'a', 'v'};
MPI_Send(&valI,100,MPI_INT,1,1,MPI_COMM_
WORLD);
{",getRank());
for(i = 0; i < 3; i++){
if(i == 2)
else
MPI_Recv(&valC,100,MPI_CHAR,0,3,MPI_COMM
_WORLD,&status);
{",getRank());
for(i = 0; i < 3; i++){
if(i == 2)
else
}
printf("}\n");
}
MPI_Finalize();
return 0;
}
/**
int getRank(){
int rank;
return rank;
}
int getSize(){
int size;
return size;
}
MPI_Status status;
//a)
}
printf("}\n");
//sends doubles array
MPI_Send(&valD,100,MPI_DOUBLE,1,2,MPI_CO
MM_WORLD);
{",getRank());
for(i = 0; i < 3; i++){
if(i == 2)
else
}
printf("}\n");
//sends chars array
if(getRank() == 0){
one
int valI = 434;
double valD = 32.323;
char valC = 'c';
MPI_Send(&valC,100,MPI_CHAR,1,3,MPI_COMM
_WORLD);
{",getRank());
for(i = 0; i < 3; i++){
if(i == 2)
ex:Sa se scrie un cod MPI in

care se aduna elementele unei
matrici.
*/
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <math.h>
#define N 3
int getRank(){
int rank;
return rank;
}
int getSize(){
int size;
return size;
}
MPI_Status status;
int** mat;
int i,j;
mat = ( int** )malloc( N*sizeof(
int* ));
ierr = MPI_Recv( &array2,

num_rows_returned, MPI_FLOAT,
MPI_ANY_SOURCE, MPI_ANY_TAG,
MPI_COMM_WORLD, &status);
/* do something with array2 here
//initialize the matrix
for ( i = 0; i < N; i++ ){
*/
}
mat[i] = ( int*
)malloc(N*sizeof(int) );
for(j = 0; j < N; j++)
mat[i][j] = i+j+5;
}
int sum = 0;
//if the current procces is not 3 then
send one line
if(getRank() != 3){
//just the number of the line
int line;
line = getRank();
MPI_Send(&line,1,MPI_INT,3,1,MPI_COMM_WO
RLD);
printf("send from %d \n",
getRank());
}else{
//process 3 received the values
for the lines
int line;
//sumarize the elements
for(i = 0; i < N; i++){
MPI_Recv(&line,1,MPI_INT,i,1,MPI_COMM_WO
RLD,&status);
printf("received from %d \n",
i);
for(j = 0; j < N; j++){
sum = sum + mat[line][j];
}
}
}
MPI_Finalize();
if(sum != 0)
printf("sum : %d \n", sum);
return 0;
}
ex4.
/* distribute portions of array1 to
slaves. */
for(an_id = 1; an_id < num_procs;
an_id++) {
start_row =
an_id*num_rows_per_process;
ierr = MPI_Send(
&num_rows_to_send, 1, MPI_INT,
an_id, send_data_tag,
MPI_COMM_WORLD);
ierr = MPI_Send(
&array1[start_row],
num_rows_per_process,
MPI_FLOAT, an_id,
send_data_tag, MPI_COMM_WORLD);
}
/* and, then collect the results from
the slave processes,
* here in a variable called array2,
and do something with them. */
for(an_id = 1 an_id < num_procs;
an_id++) {
/* and then print out some final

result using the
* information collected from the
slaves. */
/* Receive an array segment,
here called array2 */.
ierr = MPI_Recv(
&num_rows_to_receive, 1 , MPI_INT,
root_process, MPI_ANY_TAG,
ierr = MPI_Recv( &array2,
num_rows_to_receive, MPI_FLOAT,
root_process, MPI_ANY_TAG,
/* Do something with array2 here,
placing the result in array3,
* and send array3 to the root
process. */
ierr = MPI_Send( &array3,
num_rows_to_return, MPI_FLOAT,
root_process, return_data_tag,
MPI_COMM_WORLD);
ex5.
#include<stdlib.h>
#include<stdio.h>
#include<mpi.h>
#include<math.h>
#define N 100
double sinc(double x)
{
if(x==0.) return 1.0;
else return sin(x)/x;
}
int main(int argc, char **argv)
{
/****Inceput declaratii********/
int
my_rank,nproc,source,dest=0,tag=0;
int i, Nrem,Ndiv;
double pi=4.0*atan(1),
h=pi/(double)N, sum=0.,sumint;
double *buf;
MPI_Status status;
/********Sfarsit declaratii****/
/***********Init MPI***********/
MPI_Comm wcomm;
wcomm=MPI_COMM_WORLD;
MPI_Comm_size(wcomm,&nproc);
MPI_Comm_rank(wcomm,&my_rank);
/*********Sf MPI**************/
Nrem=(N+1)%nproc;
Ndiv=(N+1)/nproc;
if(Nrem && my_rank==nproc-1)
Ndiv+=Nrem;
for(i=0;i<Ndiv;i++)
{
double xi,xi1;
xi=my_rank*h*(N+1)/nproc+i*h;
xi1=xi+h;
sum += sinc(xi)+sinc(xi1);
}
sum=sum*h/2;
MPI_Reduce(&sum, &sumint, 1,
MPI_DOUBLE, MPI_SUM, 0, wcomm);
if(my_rank==0)
printf("Integrale cu
trapeze=%.7g\n\n",sumint);
MPI_Finalize();
return 0;
//compilare:
//MPI_Comm_rank(wcomm,&my_rank);
}
ex9.
#include<stdlib.h>
#include<stdio.h>
#include<mpi.h>
#include<math.h>
#define N 100
double sinc(double x)
{
if(x==0.) return 1.0;
else return sin(x)/x;
}
int main(int argc, char **argv)
{
/****Inceput declaratii********/
int my_rank,nproc,source,dest;
int i;
double pi=4.0*atan(1);
double *buf,*rbuf;
MPI_Status status;
/********Sfarsit declaratii****/
/***********Init MPI***********/
MPI_Comm wcomm;
wcomm=MPI_COMM_WORLD;
MPI_Comm_rank(wcomm,&my_rank);
/*********Sf MPI**************/
if((rbuf=(double
*)malloc(N*sizeof(double)))==NULL)
{
fprintf(stderr,"Nu pot aloca RAM
pt proc %d",my_rank);
MPI_Abort(wcomm,1);
}
if(my_rank==0)
{
if((buf=(double
*)malloc(N*sizeof(double)))==NULL)
{
fprintf(stderr,"Nu pot aloca
RAM pt proc %d",my_rank);
MPI_Abort(wcomm,1);
}
for(i=0;i<N;i++)
*(buf+i)=sinc(i*pi/N);
}
MPI_Scatter(buf,N,MPI_DOUBLE,rbuf,N,MPI_
DOUBLE,0,wcomm);
if(my_rank==2)
{
for(i=0;i<10;i++)
fprintf(stdout,"%.7g\t%.7g\t%.7
g\t%.7g\t%.7g\t%.7g\t%.7g\t%.7g\t%.7g\t%
.7g\t\n",*(rbuf+i),*(rbuf+i+1),
*(rbuf+i+2),*(rbuf+i+3),*(rbuf+
i+4),*(rbuf+i+5),*(rbuf+i+6),*(rbuf+i+7)
,*(rbuf+i+8),*(rbuf+i+9));
}
if(my_rank==0)
{
free(buf);
free(rbuf);
}
else free(rbuf);
MPI_Finalize();
return 0;
//compilare: mpicc.openmpi -o pr9

prob9.c -I/usr/lib/openmpi/include L/usr/lib/openmpi/lib -lmpi^C
//student@hermes:~/michael$
mpiexec.openmpi -np 4 ./pr9
}
/* C Example */
#include <mpi.h>
#include <stdio.h>
MPI_Reduce(&s, &result, 1, MPI_DOUBLE,

MPI_SUM, 0, MPI_COMM_WORLD);
if(world_rank==0) {
endwtime = MPI_Wtime();
printf("Result: %.10f\n", result+1);
printf("Time elapsed: %fms\n",
(endwtime-startwtime)*1000);
}
MPI_Finalize();
}
int main (int argc, char* argv[])

{
int rank, size;
MPI_Init (&argc, &argv);
/*
starts MPI */
MPI_Comm_rank (MPI_COMM_WORLD,
&rank);
/* get current process id
*/
MPI_Comm_size (MPI_COMM_WORLD,
&size);
/* get number of
processes */
printf( "Hello world from process %d
of %d\n", rank, size );
MPI_Finalize();
return 0;
}
// Task
#include
#include
#include
1. Sum(1/n!)
<mpi.h>
<stdio.h>
<stdlib.h>
int main(int argc, char** argv) {

double result;
int n = 0;
if(argc>1)
n = atoi(argv[1]);
double startwtime = 0.0;
double endwtime;
// Initialize the MPI environment
MPI_Init(&argc, &argv);
// Find out rank, size
int world_rank;
MPI_Comm_rank(MPI_COMM_WORLD,
&world_rank);
int world_size;
MPI_Comm_size(MPI_COMM_WORLD,
&world_size);
if(world_rank==0)
startwtime = MPI_Wtime();
MPI_Bcast(&n, 1, MPI_INT, 0,
MPI_COMM_WORLD);
double a = 0.0;
double f = 1.0;
double s = 0.0;
int i;
int t = n/world_size;
if(n%world_size) t++;
for(i = (n/world_size)*world_rank;
i<world_rank*(n/world_size)+t; i++) {
if(i+1>n)
break;
f /= (double)(i+1);
s += f;
}
// Receive A and F from previous node
double a_p, f_p = 1.0;
if(world_rank > 0) {
MPI_Recv(&f_p, 1, MPI_DOUBLE,
world_rank-1, 0, MPI_COMM_WORLD,
MPI_STATUS_IGNORE);
f *= f_p;
}
s *=f_p;
// Send my A nad F to the next node
if(world_rank < world_size-1) {
MPI_Send(&f, 1, MPI_DOUBLE,
world_rank+1, 0, MPI_COMM_WORLD);
}
You will notice that the first step to

building an MPI program is including the
MPI header files with #include <mpi.h> . After
this, the MPI environment must be
initialized with:
MPI_Init(int *argc, char ***argv)
During MPI_Init , all of MPIs global and

internal variables are constructed. For
example, a communicator is formed
around all of the processes that were
spawned, and unique ranks are assigned
to each process. Currently, MPI_Init takes
two arguments that are not necessary,
and the extra parameters are simply left
as extra space in case future
implementations might need them.
After MPI_Init , there are two main
functions that are called. These two
functions are used in almost every single
MPI program that you will write.
MPI_Comm_size(MPI_Comm communicator, int*
size)
MPI_Comm_size returns the size of a
communicator. In our
example, MPI_COMM_WORLD (which is
constructed for us by MPI) encloses all of
the processes in the job, so this call
should return the amount of processes
that were requested for the job.
MPI_Comm_rank(MPI_Comm communicator, int*

rank)
MPI_Comm_rank returns the rank of a process
in a communicator. Each process inside
of a communicator is assigned an
incremental rank starting from zero. The
ranks of the processes are primarily used
for identification purposes when sending
and receiving messages.
A miscellaneous and less-used function
in this program is:
MPI_Get_processor_name(char* name, int*

name_length)
MPI_Get_processor_name obtains the actual
name of the processor on which the
process is executing. The final call in this
program is:
MPI_Finalize()
is used to clean up the MPI

environment. No more MPI calls can be
made after this one.
Running MPI Hello World
Now compile the example by typing make .
My makefile looks for the MPICC
environment variable. If you installed
MPICH2 to a local directory, set your
MPICC environment variable to point to
your mpicc binary. The mpicc program in
your installation is really just a wrapper
around gcc, and it makes compiling and
linking all of the necessary MPI routines
much easier.
MPI_Finalize
>>> export MPICC=/home/kendall/bin/mpicc >>>

make /home/kendall/bin/mpicc -o
mpi_hello_world mpi_hello_world.c
After your program is compiled, it is ready

to be executed. Now comes the part
where you might have to do some
additional configuration. If you are

running MPI programs on a cluster of
nodes, you will have to set up a host file.
If you are simply running MPI on a laptop
or a single machine, disregard the next
piece of information.
The host file contains names of all of the
computers on which your MPI job will
execute. For ease of execution, you
should be sure that all of these
computers have SSH access, and you
should also setup an authorized keys
file to avoid a password prompt for SSH.
My host file looks like this.
>>> cat host_file cetus1 cetus2 cetus3 cetus4
For the run script that I have provided in

the download, you should set an
environment variable called MPI_HOSTS
and have it point to your hosts file. My
script will automatically include it in the
command line when the MPI job is
launched. If you do not need a hosts file,
simply do not set the environment
variable. Also, if you have a local
installation of MPI, you should set the
MPIRUN environment variable to point to
the mpirun binary from the installation.
After this, call ./run.perl mpi_hello_world to run
the example application.
>>> export MPIRUN=/home/kendall/bin/mpirun >>>
export MPI_HOSTS=host_file >>> ./run.perl
mpi_hello_world /home/kendall/bin/mpirun -n 4
-f host_file ./mpi_hello_world Hello world
from processor cetus2, rank 1 out of 4
processors Hello world from processor cetus1,
rank 0 out of 4 processors Hello world from
processor cetus4, rank 3 out of 4 processors
Hello world from processor cetus3, rank 2 out
of 4 processors
As expected, the MPI program was

launched across all of the hosts in my
host file. Each process was assigned a
unique rank, which was printed off along
with the process name. As one can see
from my example output, the output of
the processes is in an arbitrary order
since there is no synchronization involved
before printing.
Notice how the script called mpirun.
Now you might be asking, My hosts are
actually dual-core machines. How can I
get MPI to spawn processes across the
individual cores first before individual
machines? The solution is pretty simple.
Just modify your hosts file and place a
colon and the number of cores per
processor after the host name. For
example, I specified that each of my
hosts has two cores.
>>> cat host_file cetus1:2 cetus2:2 cetus3:2
cetus4:2
When I execute the run script

again, voila!, the MPI job spawns two
processes on only two of my hosts.
>>> ./run.perl mpi_hello_world
/home/kendall/bin/mpirun -n 4 -f host_file
./mpi_hello_world Hello world from processor
cetus1, rank 0 out of 4 processors Hello world
from processor cetus2, rank 2 out of 4
processors Hello world from processor cetus2,
rank 3 out of 4 processors Hello world from
processor cetus1, rank 1 out of 4 processors
MPI Send / Recv Program
// Find out rank, size

int world_rank;
MPI_Comm_rank(M
&world_size);
int number;
if (world_rank == 0) {
n
1) {
MPI_Recv(&number, 1, MPI_INT, 0, 0, MPI_COMM_WORLD,
number);
}
MPI_Comm_rank and MPI_Comm_size are first used to
determine the world size along with the rank of
the process. Then process zero initializes a
number to the value of negative one and sends
this value to process one. As you can see in
the else if statement, process one is
calling MPI_Recv to receive the number. It also

prints off the received value.
Since we are sending and receiving exactly
one integer, each process requests that
one MPI_INT be sent/received. Each process
also uses a tag number of zero to identify the
message. The processes could have also used
the predefined constant MPI_ANY_TAG for the tag
number since only one type of message was
being transmitted.
Running the example program looks like this.
>>> tar -xzf mpi_send_recv.tgz >>> cd mpi_send_recv >>>
make mpicc -o send_recv send_recv.c mpicc -o ping_pong
ping_pong.c mpicc -o ring ring.c >>> ./run.perl
send_recv mpirun -n 2 ./send_recv Process 1 received
number -1 from process 0
As expected, process one receives negative

one from process zero.
MPI Ping Pong Program
The next example is a ping pong program. In
this example, processes
use MPI_Send and MPI_Recv to continually bounce
messages off of each other until they decide to
stop. Take a look at ping_pong.c in
the example code download. The major
portions of the code look like this.
practice, send_count is often equal to the number

of elements in the array divided by the number
of processes. Whats that you say? The
number of elements isnt divisible by the
number of processes? Dont worry, we will
cover that in a later lesson
The receiving parameters of the function
prototype are nearly identical in respect to the
sending parameters. The recv_data parameter is
a buffer of data that can hold recv_count elements
that have a datatype of recv_datatype . The last
parameters, root and communicator , indicate the
root process that is scattering the array of data
and the communicator in which the processes
reside.
An Introduction to MPI_Gather
MPI_Gather is the inverse of MPI_Scatter . Instead of
spreading elements from one process to many
processes, MPI_Gather takes elements from many
processes and gathers them to one single
process. This routine is highly useful to many
parallel algorithms, such as parallel sorting and
searching. Below is a simple illustration of this
2;
algorithm.
while (ping_pong_count < PING_PONG_LIMIT) {
if (world_rank
many communication patterns, which simply

means that many processes send/receive to
one process. Oftentimes it is useful to be able
to send many elements to many processes (i.e.
a many-to-many communication
pattern). MPI_Allgather has this characteristic.
Given a set of elements distributed across all
processes, MPI_Allgather will gather all of the
elements to all the processes. In the most
basic sense, MPI_Allgather is an MPI_Gather followed
by an MPI_Bcast . The illustration below shows
how data is distributed after a call
to MPI_Allgather .
Just like MPI_Gather , the elements from each
process are gathered in order of their rank,
except this time the elements are gathered to
all processes. Pretty easy, right? The function
declaration for MPI_Allgather is almost identical to
MPI_Gather with the difference that there is no
root process in MPI_Allgather .
MPI_Allgather(void* send_data, int send_count,
MPI_Datatype send_datatype,
void*
recv_data, int recv_count, MPI_Datatype recv_datatype,
MPI_Comm communicator)
I have modified the average computation code

to use
. You can view the source in
all_avg.c from the lesson code. The main
difference in the code is shown below.
int ping_pong_count = 0;
int partner_rank = (world_rank + 1) %
== ping_pong_count %
2) {
// Increment the ping pong count before you send it
ping_pong_count++;
MPI_Send(&ping_pong_count, 1, MPI_INT, partner_rank, 0,
MPI_Allgather
MPI_COMM_WORLD);
printf("%d sent and incremented ping_pong_count "
"%d to %d\n", world_rank, ping_pong_count,
partner_rank);
} else {
MPI_Recv(&ping_pong_count, 1, MPI_INT, partner_rank, 0,
MPI_COMM_WORLD,
printf("%d received
MPI_Scatter
MPI_Gather MPI_STATUS_IGNORE);
ping_pong_count %d from %d\n",
world_rank, ping_pong_count, partner_rank);
}
}
This example is meant to be executed with

only two processes. The processes first
determine their partner with some simple
arithmetic. A ping_pong_count is initiated to zero
and it is incremented at each ping pong step
by the sending process. As the ping_pong_count is
incremented, the processes take turns being
the sender and receiver. Finally, after the limit
is reached (ten in my code), the processes
stop sending and receiving. The output of the
example code will look something like this.
An Introduction to MPI_Scatter
MPI_Scatter is a collective routine that is very
similar to MPI_Bcast (If you are unfamiliar with
these terms, please read the previous
lesson). MPI_Scatter involves a designated root
process sending data to all processes in a
communicator. The primary difference
between MPI_Bcast and MPI_Scatter is small but
important. MPI_Bcast sends the same piece of
data to all processes
while MPI_Scatter sendschunks of an array to
different processes. Check out the illustration
below for further clarification.
In the illustration, MPI_Bcast takes a single data
element at the root process (the red box) and
copies it to all other processes. MPI_Scatter takes
an array of elements and distributes the
elements in the order of process rank. The first
element (in red) goes to process zero, the
second element (in green) goes to process
one, and so on. Although the root process
(process zero) contains the entire array of
data, MPI_Scatter will copy the appropriate
element into the receiving buffer of the
process. Here is what the function prototype
of MPI_Scatter looks like.
MPI_Scatter(void* send_data, int send_count,
MPI_Datatype send_datatype,
void* recv_data,
int recv_count, MPI_Datatype recv_datatype,
int root, MPI_Comm communicator)
Yes, the function looks big and scary, but lets

examine it in more detail. The first
parameter, send_data , is an array of data that
resides on the root process. The second and
third parameters, send_count and send_datatype ,
dictate how many elements of a specific MPI
Datatype will be sent to each process.
If send_count is one and send_datatype is MPI_INT , then
process zero gets the first integer of the array,
process one gets the second integer, and so
on. If send_count is two, then process zero gets
the first and second integers, process one gets
the third and fourth, and so on. In
Similar to
,
takes elements
from each process and gathers them to the
root process. The elements are ordered by the
rank of the process from which they were
received. The function prototype for MPI_Gather is
identical to that of MPI_Scatter .
MPI_Gather(void* send_data, int send_count, MPI_Datatype

send_datatype,
void* recv_data, int
recv_count, MPI_Datatype recv_datatype,
int
root, MPI_Comm communicator)
In MPI_Gather , only the root process needs to

have a valid receive buffer. All other calling
processes can pass NULL for recv_data . Also,
dont forget that therecv_count parameter is
the count of elements received per process,
not the total summation of counts from all
processes. This can often confuse beginning
MPI programmers.
Computing average of numbers with
MPI_Scatter and MPI_Gather
In the code for this lesson, I have provided an
example program that computes the average
across all numbers in an array. The program is
in avg.c. Although the program is quite simple,
it demonstrates how one can use MPI to divide
work across processes, perform computation
on subsets of data, and then aggregate the
smaller pieces into the final answer. The
program takes the following steps:
1.
2.
3.
4.
Generate a random array of numbers on the root

process (process 0).
Scatter the numbers to all processes, giving each
process an equal amount of numbers.
Each process computes the average of their subset of
the numbers.
Gather all averages to the root process. The root
process then computes the average of these numbers
to get the final average.
The main part of the code with the MPI calls

looks like this:
At the beginning of the code, the root process
creates an array of random numbers.
When MPI_Scatter is called, each process now
contains elements_per_proc elements of the original
data. Each process computes the average of
their subset of data and then the root process
gathers each individual average. The total
average is computed on this much smaller
array of numbers.
Using the run script included in the code for
this lesson, the output of your program should
be similar to the following. Note that the
numbers are randomly generated, so your final
result might be different from mine.
>>> make /home/kendall/bin/mpicc -o avg avg.c >>>
./run.perl avg /home/kendall/bin/mpirun -n 4 ./avg 100
Avg of all elements is 0.478699 Avg computed across
original data is 0.478699
MPI_Allgather and modification of average

program
So far, we have covered two MPI routines that
perform many-to-one or one-to-
// Gather all partial averages down to all the processes flo

MPI_FLOAT, sub_avgs, 1, MPI_FLOAT,
MPI_COMM_WO
world_size);
The partial averages are now gathered to

everyone using MPI_Allgather . The averages are
now printed off from all of the processes.
Example output of the program should look
like the following:
>>> make /home/kendall/bin/mpicc -o avg avg.c
/home/kendall/bin/mpicc -o all_avg all_avg.c >>>
./run.perl all_avg /home/kendall/bin/mpirun -n 4
./all_avg 100 Avg of all elements from proc 1 is
0.479736 Avg of all elements from proc 3 is 0.479736 Avg
of all elements from proc 0 is 0.479736 Avg of all
elements from proc 2 is 0.479736
As you may have noticed, the only difference

between all_avg.c and avg.c is that all_avg.c
prints the average across all processes
with MPI_Allgather .

// Program that computes the average of
an array of elements in parallel using
// MPI_Scatter and MPI_Gather
//
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <assert.h>
// Creates an array of random numbers.
Each number has a value from 0 - 1
float *create_rand_nums(int
num_elements) {
float *rand_nums = (float
*)malloc(sizeof(float) * num_elements);
assert(rand_nums != NULL);
int i;
for (i = 0; i < num_elements; i++) {
rand_nums[i] = (rand() /
(float)RAND_MAX);
}
return rand_nums;
}
// Computes the average of an array of
numbers
float compute_avg(float *array, int
num_elements) {
float sum = 0.f;
int i;
for (i = 0; i < num_elements; i++) {
sum += array[i];
}
return sum / num_elements;
}
int main(int argc, char** argv) {
if (argc != 2) {
fprintf(stderr, "Usage: avg
num_elements_per_proc\n");
exit(1);
free(sub_avgs);
}
free(sub_rand_nums);
}
int num_elements_per_proc =
atoi(argv[1]);
// Seed the random number generator to
get different results each time
srand(time(NULL));
MPI_Barrier(MPI_COMM_WORLD);
MPI_Finalize();
}
MPI_Init(NULL, NULL);
int world_rank;
MPI_Comm_rank(MPI_COMM_WORLD,
&world_rank);
int world_size;
MPI_Comm_size(MPI_COMM_WORLD,
&world_size);
// Create a random array of elements
on the root process. Its total
// size will be the number of elements
per process times the number
// of processes
float *rand_nums = NULL;
rand_nums =
create_rand_nums(num_elements_per_proc *
world_size);
}
// For each process, create a buffer
that will hold a subset of the entire
// array
float *sub_rand_nums = (float
*)malloc(sizeof(float) *
num_elements_per_proc);
assert(sub_rand_nums != NULL);
// Scatter the random numbers from the
root process to all processes in
// the MPI world
MPI_Scatter(rand_nums,
num_elements_per_proc, MPI_FLOAT,
sub_rand_nums,
num_elements_per_proc,
MPI_FLOAT, 0, MPI_COMM_WORLD);
// Compute the average of your subset
float sub_avg =
compute_avg(sub_rand_nums,
num_elements_per_proc);
// Gather all partial averages down to
the root process
float *sub_avgs = NULL;
sub_avgs = (float
*)malloc(sizeof(float) * world_size);
assert(sub_avgs != NULL);
}
MPI_Gather(&sub_avg, 1, MPI_FLOAT,
sub_avgs, 1, MPI_FLOAT, 0,
MPI_COMM_WORLD);
// Now that we have all of the partial
averages on the root, compute the
// total average of all numbers. Since
we are assuming each process computed
// an average across an equal amount
of elements, this computation will
// produce the correct answer.
float avg = compute_avg(sub_avgs,
world_size);
printf("Avg of all elements is
%f\n", avg);
// Compute the average across the
original data for comparison
float original_data_avg =
compute_avg(rand_nums,
num_elements_per_proc * world_size);
printf("Avg computed across original
data is %f\n", original_data_avg);
}
// Clean up
free(rand_nums);
- Se considera doua procese, cu

rangurile 0, 1. Sa se scrie un
cod MPI in care: - rangul 0
trimite o valoare tip double, int,
char, etc, catre rangul 1;
tip double, int, char, etc, catre
rangul 1;
- Se considera 3 procese, cu
rangurile 0, 1 si 2.
Procesele 1 si 2 detin fiecare
cate un vector de tip double, de
lungime N, care sunt trimise
rangului 0. Acesta scrie datele
intr-un fisier.
- Sa se scrie un cod MPI in care se
aduna elementele unei matrici.
- Sa se implementeze conceptul de
task pool intr-o schema de tip
master-worker. Se considera o
problema impartita in subprobleme
(sarcini, de dimensiuni diferite).
Masterul va trimite sarcinile catre
procesele de tip worker, iar
acestea vor returna rezultatele.
Initial toate procesele de tip
worker vor fi ocupate cu cate o
sarcina. In momentul in care unul
din procese se elibereaza, masterul
ii va trimite o noua sarcina din
lista. Procesul continua pana la
epuizarea proceselor din lista.
- Sa se scrie un cod MPI pentru a
efectua o integrala (prin metoda
Monte Carlo, prin metoda
trapezului).
- Se considera un sistem cu 4
procese (rangurile 0,1,2,3). Sa se
efectueze o operatie de tip
MPI_Bcast(), avand drept root
procesul 1. Se vor pune in
evidenta datele primite de nodurile
0,2,3.
procese (rangurile 0,1,2,3).
Folosind operatia de reducere,
MPI_Reduce(), sa se calculeze suma,
produsul, etc a 4 valori, iar
rezultatul este colectat nodul 2
(nodul root).
Folosind operatia colectiva
MPI_Gather, sa se colecteze la
procesul root, vectori de tip
double, int, etc, de lungime N,
care sunt specificati in fiecare
proces.
Folosind operatia colectiva
MPI_Scatter, sa se distribuie cate
un bloc de date de la procesul
root, catre celelalte procese.
Exemplu : un vector de tip double,
int, etc, de lungime N, care este

specificat in procesul root.
- Sa se creeze o lista la care
fiecare nod adauga un nou element;

Exemple

Uploaded by

Document Information

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Exemple

Uploaded by

Copyright:

Available Formats

/**

char valC[100] = {'c', 'a', 'v'};

ex:Se considera doua procese,

//sends integers array

printf("%f, ", valD2[i]);

//receive array of chars

ex:Sa se scrie un cod MPI in

ierr = MPI_Recv( &array2,

/* and then print out some final

//compilare: mpicc.openmpi -o pr9

MPI_Reduce(&s, &result, 1, MPI_DOUBLE,

int main (int argc, char* argv[])

int main(int argc, char** argv) {

You will notice that the first step to

During MPI_Init , all of MPIs global and

MPI_Comm_rank(MPI_Comm communicator, int*

MPI_Get_processor_name(char* name, int*

is used to clean up the MPI

>>> export MPICC=/home/kendall/bin/mpicc >>>

After your program is compiled, it is ready

additional configuration. If you are

For the run script that I have provided in

As expected, the MPI program was

When I execute the run script

MPI Send / Recv Program

// Find out rank, size

calling MPI_Recv to receive the number. It also

As expected, process one receives negative

practice, send_count is often equal to the number

many communication patterns, which simply

I have modified the average computation code

This example is meant to be executed with

Yes, the function looks big and scary, but lets

MPI_Gather(void* send_data, int send_count, MPI_Datatype

In MPI_Gather , only the root process needs to

Generate a random array of numbers on the root

The main part of the code with the MPI calls

MPI_Allgather and modification of average

// Gather all partial averages down to all the processes flo

The partial averages are now gathered to

As you may have noticed, the only difference

- Se considera doua procese, cu

int, etc, de lungime N, care este

You might also like