You are on page 1of 57

OpenMP Fortran95

Parallel Programming in Fortran 95 using OpenMP


Fortran Coder http://www.fcode.cn

OpenMP Fortran ................................................................................ 4


1.1 .................................................................................................................................... 4
1.1.1 ................................................................................................................. 4
1.1.2 ..................................................................................................................... 5
1.1.3 ............................................................................................................. 5
1.2 .................................................................................................................................... 5
1.2.1 OpenMP ...........................................................................6
1.2.2 ................................................................................................. 6
OpenMP ....................................................................................................................10
2.1 Work-sharing ............................................................................................................ 10
2.1.1 !$OMP DO/!$OMP END DO.................................................................................10
2.1.2 !$OMP SECTIONS/!$OMP END SECTIONS...................................................... 14
2.1.3 !$OMP SINGLE/!$OMP END SINGLE................................................................15
2.1.4 !$OMP WORKSHARE/!$OMP END WORKSHARE.......................................... 17
2.2 Combined parallel work-sharing ............................................................................. 18
2.2.1 !$OMP PARALLEL DO/!$OMP END PARALLEL DO....................................... 18
2.2.2 !$OMP PARALLEL SECTIONS/!$OMP END PARALLEL SECTIONS.............19
2.2.3 !$OMP PARALLEL WORKSHARE/!$OMP END PARALLEL WORKSHARE 19
2.3 Synchronization constructs........................................................................19
2.3.1 !$OMP MASTER/!$OMP END MASTER............................................................19
2.3.2 !$OMP CRITICAL/!$OMP END CRITICAL....................................................... 20
2.3.3 !$OMP BARRIER.................................................................................................. 21
2.3.4 !$OMP ATOMIC.................................................................................................... 23
2.3.5 !$OMP FLUSH.......................................................................................................24
2.3.6 !$OMP ORDERED/!$OMP END ORDERED.......................................................25
2.4 Data environment constructs............................................................................................. 27
2.4.1 !$OMP THREADPRIVATE ( list)..........................................................................27
PRIVATE , SHARED & Co.............................................................................................. 30
3.1 Data scope attribute clauses...............................................................................................30
3.1.1 PRIVATE(list).........................................................................................................30
3.1.2 SHARED( list)........................................................................................................31
3.1.3 DEFAULT( PRIVATE | SHARED | NONE )..........................................................31
3.1.4 FIRSTPRIVATE( list).............................................................................................32
3.1.5 LASTPRIVATE(list)...............................................................................................33
3.1.6 COPYIN( list).........................................................................................................34
3.1.7 COPYPRIVATE(list).............................................................................................. 35
3.1.8 REDUCTION(operator:list ).................................................................................. 36

3.2 Other clauses..................................................................................................................... 37


3.2.1 IF( scalar logical expression)..................................................................................38
3.2.2 NUM_THREADS(scalar integer expression)........................................................ 38
3.2.3 NOWAIT................................................................................................................ 38
3.2.4 SCHEDULE( t ype, chunk).................................................................................... 39
3.2.5 ORDERED............................................................................................................. 42
The OpenMP run-time library........................................................................................... 43
4.1 Execution environment routines........................................................................................43
4.1.1 OMP_set_num_threads.......................................................................................... 43
4.1.2 OMP_get_num_threads.......................................................................................... 43
4.1.3 OMP_get_max_threads.......................................................................................... 44
4.1.4 OMP_get_thread_num............................................................................................44
4.1.5 OMP_get_num_procs.............................................................................................44
4.1.6 OMP_in_parallel.................................................................................................... 44
4.1.7 OMP_set_dynamic................................................................................................. 45
4.1.8 OMP_get_dynamic.................................................................................................45
4.1.9 OMP_set_nested.....................................................................................................45
4.1.10 OMP_get_nested.................................................................................................. 46
4.2 Lock routines..................................................................................................................... 46
4.2.1 OMP_init_lock and OMP_init_nest_lock.............................................................. 47
4.2.2 OMP_set_lock and OMP_set_nest_lock................................................................ 47
4.2.3 OMP_unset_lock and OMP_unset_nest_lock........................................................ 48
4.2.4 OMP_test_lock and OMP_test_nest_lock.............................................................. 48
4.2.5 OMP_destroy_lock and OMP_destroy_nest_lock..................................................48
4.2.6 ....................................................................................................................... 49
4.3 Timing routines..................................................................................................................51
4.3.1 OMP_get_wtime.....................................................................................................51
4.3.2 OMP get wtick........................................................................................................52
4.4 The Fortran 90 module omp_lib........................................................................................ 52
........................................................................................................................... 55
5.1 OMP_NUM_THREADS...................................................................................................55
5.2 OMP_SCHEDULE............................................................................................................56
5.3 OMP_DYNAMIC............................................................................................................. 56
5.4 OMP_NESTED................................................................................................................. 56


1
(1) vs2008 fortran (Project) -> (property) -> Fortran ->
(Language) Process OpenMP Directives Generate Parallel Code (/Qopenmp)
OpenMP
(2) -> -> ->
OMP_NUM_THREADS 2

program test
!$OMP PARALLEL
print*, 'ok!'
!$OMP END PARALLEL
End program

Ok
Ok

OpenMP Fortran

1.1

(Shared-memory architecture)
SMP(Symmetric Multi Processing)
(Distributed-memory architecture)
(Messages)(clusters)

OpenMP

1.1.1

ANSI X3H5
PVM MPI

1996-1997
(1)
(2)
OpenMP
Fortran C/C++
OpenMP coarse-grain parallelism
coarse-grain
fine-grain parallelism

1.1.2
OpenMP OpenMP Archi-tecture Review Board
2000 OpenMP ARB
US Department of Energy, through its ASCI program ()
Compaq Computer Corp
Fujitsu
Hewlett-Packard Company
Intel Corp.
International Business Machines
Kuck & Associates, Inc. 2000 intel
Silicon Graphics Incorporate
Sun Microsystems sun
OpenMP
OpenMP ARB

1.1.3
OpenMP Fortran95 OpenMP ARB
OpenMP OpenMP
OpenMP
OpenMP
Fortran95 Fortran77
Fortran95 Fortran77
OpenMP
OpenMP
OpenMP

2001 -2002 OpenMP v1.1 v2.0

v2.0

1.2
OpenMP

OpenMP
OpenMP OpenMP

1.2.1 OpenMP
OpenMP OpenMP
OpenMP
OpenMP
!$OMP
!$

OpenMP
!$OMPOpenMPOpenMP
Fortran95OpenMP

!$OMP PARALLEL DEFAULT(NONE) SHARED(A, B) PRIVATE(C, D) &


!$OMP REDUCTION(+:A)
!$OMPOpenMP

!$OpenMP

!$ interval = L * OMP_get_thread_num() / &


!$ (OMP_get_num_threads() - 1)
!$

1.2.2
OpenMP
//
!$OMP PARALLEL
write(*,*) "Hello"
!$OMP END PARALLEL
6


Hello

0Np-10Np1.1

!$OMP PARALLEL clause1 clause2 ...


...
!$OMP END PARALLEL
Opening

PRIVATE(list): see page 37.


SHARED( list): see page 38.
DEFAULT( PRIVATE | SHARED | NONE ) : see page 39.
FIRSTPRIVATE( list): see page 40.
COPYIN( list): see page 42.
REDUCTION(operator:list ): see page 43.
IF( scalar logical expression): see page 46.
NUM_THREADS(scalar integer expression): see page 47.

!$OMP END PARALLELPRIVATE

!$OMP END PARALLEL


OpenMP

(1)!$OMP PARALLEL clause1 clause2 / !$OMP END PARALLEL

(2)GOTO

!$OMP PARALLEL clause1 clause2 / !$OMP END PARALLEL


lexical extent
dynamic extent
!$OMP PARALLEL
write(*,*) "Hello"
call be_friendly()
!$OMP END PARALLEL
be_friendly

!$OMP PARALLEL
write(*,*) "Hello"
!$OMP PARALLEL
write(*,*) "Hi"
!$OMP END PARALLEL
!$OMP END PARALLEL
NpNp2+Np
1.2

OpenMP

OpenMP

OpenMP

2.1 Work-sharing
OpenMP

1
work-sharingdynamic extends
work-sharing
work-sharing!$OMP PARALLEL/!$OMP END PARALLEL

work-sharing
1work-sharing
2work-sharing
work-sharing
work-sharing

NOWAIT NOWAIT
NOWAIT
NOWAIT
NOWAIT

2.1.1 !$OMP DO/!$OMP END DO


do
!$OMP DO
do i = 1, 1000
...
end do
!$OMP END DO
do10

10

10001-1001101-200 2.1

!$OMP DOwork-sharing

!$OMP DO clause1 clause2 ...


...
!$OMP END DO end_clause
!$OMP DO
PRIVATE(list): see page 37.
FIRSTPRIVATE( list): see page 40.
LASTPRIVATE(list): see page 41.
REDUCTION(operator:list ): see page 43.
SCHEDULE( type, chunk ): see page 48.
ORDERED : see page 52.
NOWAIT
NOWAIT
NOWAIT
NOWAIT
doNOWAIT
NOWAIT

!$OMP FLUSH
!$OMP FLUSHNOWAIT

do!$OMP
DO/!$OMP END DOGOTO
OpenMP
work-sharing
!$OMP DO/!$OMP END DO

real(8) :: A(1000), B(1000)

11

do i = 1, 1000
B(i) = 10 * i
A(i) = A(i) + B(i)
End do
Bwork-sharing$OMP END DO

real(8) :: A(1000)
do i = 1, 999
A(i) = A(i+1)
End do
ii+1
i+1racing condition

real(8) :: A(1000), dummy(2:1000:2)


!Saves the even indices
!$OMP DO
do i = 2, 1000, 2
dummy(i) = A(i)
End do
!$OMP END DO
!Updates even indices from odds
!$OMP DO
do i = 0, 998, 2
A(i) = A(i+1)
End do
!$OMP END DO
!Updates odd indices with evens
!$OMP DO
do i = 1, 999, 2
A(i) = dummy(i+1)
End do
!$OMP END DO
doparallel region
AB

real(8) :: A(0:1000)

12

do i = 1, 1000
A(i) = A(i-1)
enddo

!$OMP DO/!$OMP END DO


ORDERED
doORDERED
ORDERED
real(8) :: A(0:1000)
!$OMP DO ORDERED
do i = 1, 1000
!$OMP ORDERED
A(i) = A(i-1)
!$OMP END ORDERED
enddo
!$OMP END DO
do
!$OMP ORDERED /!$OMP END ORDERED

!$OMP ORDERED /!$OMP END ORDERED

do i = 1, 10
do j=1,10
!$OMP DO
do k = 1, 10
A(i,j,k) =i*j*k
enddo
!$OMP END DO
enddo
enddo
i*j=10010
OpenMp
!$OMP DO
do i=1,10
do j = 1, 10
do k = 1, 10
A(i,j,k) =i*j*k
enddo
enddo
enddo

13

!$OMP END DO
1100

!$OMP DO
do k=1,10
do j = 1, 10
do i = 1, 10
A(i,j,k) =i*j*k
enddo
enddo
enddo
!$OMP END DO

Fortran90Fortran77

2.1.2 !$OMP SECTIONS/!$OMP END SECTIONS


MPMDMultiple Programs Multiple
Datawork-sharing
!$OMP SECTIONS clause1 clause2 ...
!$OMP SECTION
...
!$OMP SECTION
...
...
!$OMP END SECTIONS end_clause
!$OMP SECTION!$OMP SECTION
!$OMP END SECTIONS

45
3!$OMP SECTION
PRIVATE(list): see page 37.
FIRSTPRIVATE( list): see page 40.
14

LASTPRIVATE(list): see page 41.


REDUCTION(operator:list ): see page 43.
!$OMP END SECTIONSNOWAIT!$OMP SECTIONS / !$OMP
END SECTIONS
1
GOTO
2 !$OMP SECTION !$OMP SECTIONS / !$OMP END
SECTIONSlexical extendroutine

!$OMP SECTIONS
!$OMP SECTION
write
write(*,*) "Hello"
!$OMP SECTION
write
write(*,*) "Hi"
!$OMP SECTION
write
write(*,*) "Bye"
!$OMP END SECTIONS
HelloHiBye 2.2OpenMP

2.1.3 !$OMP SINGLE/!$OMP END SINGLE


!$OMP SINGLE
NOWAIT!$OMP END SINGLE
15


!$OMP SINGLE clause1 clause2 ...
...
!$OMP END SINGLE end_clause
NOWAIT COPYPRIVATE
COPYPRIVATE
PRIVATE(list): see page 37.
FIRSTPRIVATE( list): see page 40.
OpenMPGOTO
!$OMP SINGLE
write(*,*) "Hello"
!$OMP END SINGLE
Hello
NOWAIT
2.3

1SINGLE
2
3

16

2.1.4 !$OMP WORKSHARE/!$OMP END WORKSHARE


Fortran95forall where
DoOpenMPwork-sharing
3 !$OMP
WORKSHARE /!$OMP END WORKSHARE matmul, dot_product , sum ,
product , maxval, minval, count , any , all , spread, pack, unpack,reshape , transpose ,
eoshift , cshift, minloc and maxloc
!$OMP WORKSHARE
...
!$OMP END WORKSHARE end_clause
!$OMP END WORKSHARE
NOWAIT!$OMP
WORKSHARE

work-sharing

real(8) :: A(1000), B(1000)


!$OMP DO
do i = 1, 1000
B(i) = 10 * i
A(i) = A(i) + B(i)
enddo
!$OMP END DO

real(8) :: A(1000), B(1000)


!$OMP WORKSHARE
forall
forall(i=1:1000)
B(i) = 10 * i
end forall
A=A+B
!$OMP END WORKSHARE
Fortran!$OMP WORKSHARE/!$OMP END WORKSHARE
OpenMP(synchronizations)
overheadOpenMp

!$OMP WORKSHARE/!$OMP END WORKSHARE


OpenMP

17

1
2
3WORKSHARE

4WORKSHARE

5 WORKSHARE where
masked assignmentsworkshared
6WORKSHAREforall
masked assignmentsworkshared
ATOMIC
7ATOMIC
ATOMIC
CRITICAL
8CRITICAL
CRITICAL
9

OpenMp
!$OMP WORKSHARE
!$OMP
WORKSHARE
1
2array assignment statements
scalar
assignment statements
forallwhere
3pure
elemental
4shared
!$OMP WORKSHARE
WORKSHARE

2.2 Combined parallel work-sharing


Combined parallel work-sharing work-sharing
!$OMP PARALLEL/!$OMP END PARALLELwork-sharing
OpenMP-implementationOpenMp

2.2.1 !$OMP PARALLEL DO/!$OMP END PARALLEL DO


!$OMP DO/!$OMP END DO
!$OMP PARALLEL DO clause1 clause2 ...
...
!$OMP END PARALLEL DO

18

!$OMP PARALLEL!$OMP DO

2.2.2

!$OMP

PARALLEL

SECTIONS/!$OMP

END

PARALLEL SECTIONS

!$OMP PARALLEL SECTIONS clause1 clause2 ...


...
!$OMP END PARALLEL SECTIONS

2.2.3 !$OMP PARALLEL WORKSHARE/!$OMP END


PARALLEL WORKSHARE
!$OMP PARALLEL WORKSHARE clause1 clause2 ...
...
!$OMP END PARALLEL WORKSHARE
!$OMP PARALLEL
!$OMP
PARALLEL!$OMP WORKSHARE

2.3 Synchronization constructs

constructs

2.3.1 !$OMP MASTER/!$OMP END MASTER

!$OMP MASTER
write
write(*,*) "Hello"
!$OMP END MASTER
19

!$OMP SINGLE/!$OMP END SINGLE + NOWAIT


2.4

2.3.2 !$OMP CRITICAL/!$OMP END CRITICAL

!$OMP CRITICAL name


...
!$OMP END CRITICAL name
name
Name

!$OMP CRITICAL write_file


write
write(1,*) data
!$OMP END CRITICAL write_file
2.5 0NpNp1Np
20

2.3.3 !$OMP BARRIER

1
!$OMP CRITICAL
!$OMP BARRIER
!$OMP END CRITICAL
CRITICAL
deadlock
1
!$OMP SINGLE
!$OMP BARRIER
!$OMP END SINGLE
2
!$OMP MASTER
!$OMP BARRIER
!$OMP END MASTER
3
!$OMP SECTIONS
21

!$OMP SECTION
!$OMP BARRIER
!$OMP SECTION
...
...
!$OMP END SECTIONS
if

if
if(my_thread_ID < 5) then
!$OMP BARRIER
endif
55

!$OMP BARRIER
2!$OMP
BARRIER

OpenMp
!$OMP BARRIER
2.6 !$OMP
BARRIER1
0Np

22

2.3.4 !$OMP ATOMIC

!$OMP DO
do i = 1, 1000
a=a+i
enddo
!$OMP END DO
i

!$OMP ATOMIC

x = x operator expr
x = intrinsic_procedure(x, expr_list)

operator = +, *, -, /, .AND., .OR., .EQV. or .NEQV.


intrincic_procedure = MAX, MIN, IAND, IOR or IEOR
xexpr
atomic

xatomicexpr
expr x !$OMP
CRITICAL/!$OMP END CRITICAL
CRITICAL
xtmp = expr
!$OMP CRITICAL x
x = x operator xtmp
!$OMP END CRITICAL x
!$OMP CRITICAL/!$OMP END CRITICAL
!$OMP ATOMIC
ATOMIC!$OMP
OpenMP-implementation
OpenMP-implementation

23

2.3.5 !$OMP FLUSH


implementation

data synchronization

OpenMP-implementationone thread at a time


OpenMpOpenMP-implementation

!$OMP DO
do i = 1, 10000
!$OMP ATOMIC
A=A+i
enddo
!$OMP END DO
Aatomic1000
Atmp = 0
!$OMP DO
do i = 1, 1000
Atmp = Atmp + i
enddo
!$OMP END DO
!$OMP ATOMIC
A = A + Atmp
AtmpAatomicNp
!$OMP ATOMIC
ATOMIC
A
OpenMP-implementations

/
!$OMP FLUSH
!$OMP
FLUSH

!$OMP FLUSH
!$OMP
FLUSH
!$OMP FLUSH (variable1, variable2, ...)
OpenMP
!$OMP BARRIER

24

!$OMP CRITICAL and !$OMP END CRITICAL


!$OMP END DO
!$OMP END SECTIONS
!$OMP END SINGLE
!$OMP END WORKSHARE
!$OMP ORDERED and !$OMP END ORDERED
!$OMP PARALLEL and !$OMP END PARALLEL
!$OMP PARALLEL DO and !$OMP END PARALLEL DO
!$OMP PARALLEL SECTIONSand !$OMP END PARALLEL SECTIONS
!$OMP PARALLEL WORKSHARE and !$OMP END PARALLEL WORKSHARE

!$OMP DO
!$OMP MASTER and !$OMP END MASTER
!$OMP SECTIONS
!$OMP SINGLE
!$OMP WORKSHARE
NOWAIT

2.3.6 !$OMP ORDERED/!$OMP END ORDERED

do i = 1, 100
A(i) = 2 * A(i-1)
enddo
6059

do i = 1, 100
block1
block2
block3
enddo
Block2 block
3

!$OMP DO ORDERED
do i = 1, 100
25

block1
!$OMP ORDERED
block2
!$OMP END ORDERED
block3
enddo
!$OMP END DO
!$OMP DO
ORDERED
!$OMP
DOORDERED
ORDERED2.7

!$OMP ORDERED /!$OMP END ORDERED


ORDERED

ORDERED
ORDERED
ORDERED
!$OMP CRITICAL/!$OMP END CRITICAL
!$OMP
CRITICAL

ORDERED

!$OMP DO ORDERED
do i = 1, 1000
...
!$OMP ORDERED
...
!$OMP END ORDERED
...

26

!$OMP ORDERED
...
!$OMP END ORDERED
...
enddo
!$OMP END DO
ORDEREDORDERED
if
!$OMP DO ORDERED
do i = 1, 1000
...
if
if(i < 10) then
!$OMP ORDERED
write
write(4,*) i
!$OMP END ORDERED
else
!$OMP ORDERED
write
write(3,*) i
!$OMP END ORDERED
endif
...
enddo
!$OMP END DO

2.4 Data environment constructs


OpenMP
1OpenMP
2OpenMP

OpenMP

2.4.1 !$OMP THREADPRIVATE ( list)


My_ID

!$OMP THREADPRIVATE(a, blistab


ListCOMMONmodule
27

modulesave!$OMP
THREADPRIVATE
real(8)
real(8), save :: A(100), B
integer
integer, save :: C
!$OMP THREADPRIVATE(A, B, C)
THREADPRIVATE
COPYIN
THREADPRIVATE
COPYIN
integer
integer, save :: a
!$OMP THREADPRIVATE(a)
!$OMP PARALLEL
a = OMP_get_thread_num()
!$OMP END PARALLEL
!$OMP PARALLEL
...
!$OMP END PARALLEL
aa
THREADPRIVATE 2.8 THREADPRIVATE
THREADPRIVATE
THREADPRIVATE

28

THREADPRIVATECOPYIN COPYPRIVATE

29

PRIVATE , SHARED & Co.

!$OMP PARALLEL/!$OMP END


PARALLEL
PARALLEL

3.1 Data scope attribute clauses


3.1.1 PRIVATE(list)

!$OMP PARALLEL PRIVATE(a, b)


ab

3.1

30

5G
55GSMP
forallTHREADPRIVATE

3.1.2 SHARED( list)

!$OMP PARALLEL SHARED(c, d)


!$OMP PARALLEL/!$OMP END PARALLEL c
d3.2

OpenMP implementation
!$OMP FLUSH
racing condition
!$OMP ATOMICwriting
atomically

3.1.3 DEFAULT( PRIVATE | SHARED | NONE )

default

31

!$OMP PARALLEL DEFAULT(PRIVATE) SHARED(a)


a DEFAULT
3.3

PRIVATE SHAREDNONeDEFAULT(NONE)
THREADPRIVATE
forall
DEFAULT

3.1.4 FIRSTPRIVATE( list)

FIRSTPRIVATE
a=2
b=1
!$OMP PARALLEL PRIVATE(a) FIRSTPRIVATE(b)
ab13.4

32

FIRSTPRIVATE

FIRSTPRIVATE
5G1050G

3.1.5 LASTPRIVATE(list)

LASTPRIVATE
!$OMP DO PRIVATE(i) LASTPRIVATE(a)
do i = 1, 1000
a=i
enddo
!$OMP END DO
a1000 :
!$OMP SECTIONS LASTPRIVATE(a)
!$OMP SECTION
a=1
!$OMP SECTION
a=2
!$OMP END SECTIONS
SECTIONaa=2
SECTIONLASTPRIVATE

33

!$OMP SECTIONS LASTPRIVATE(a)


!$OMP SECTION
a=1
!$OMP SECTION
a=2
!$OMP SECTION
b=3
!$OMP END SECTIONS
last
NOWAIT

LASTPRIVATEsize
FIRSTPRIVATE

3.1.6 COPYIN( list)


THREADPRIVATECOPYIN

!$OMP THREADPRIVATE(a)
!$OMP PARALLEL
a = OMP_get_thread_num()
!$OMP END PARALLEL
!$OMP PARALLEL
...
!$OMP END PARALLEL
!$OMP PARALLEL COPYIN(a)
...
!$OMP END PARALLEL
a
THREADPRIVATEa0
3.5FIRSTPRIVATETHREADPRIVATE
PRIVATE
COPYINFIRSTPRIVATE

34

3.1.7 COPYPRIVATE(list)
!$OMP SINGLE/!$OMP END SINGLE

!$OMP SINGLE
read(1,*) a
!$OMP END SINGLE COPYPRIVATE(a)
aCOPYPRIVATE
COPYPRIVATE!$OMP SINGLE/!$OMP END SINGLE
35


OpenMP !$OMP END SINGLE COPYPRIVATE
NOWAIT COPYPRIVATECOPYPRIVATE
!$OMP END SINGLE

3.1.8 REDUCTION(operator:list )

!$OMP DO
do i = 1, 1000
a=a+i
enddo
!$OMP END DO
iREDUCTION
a !$OMP
ATOMIC / !$OMP CRITICAL
!$OMP DO REDUCTION(+:a)
do i = 1, 1000
a=a+i
enddo
!$OMP END DO
REDUCTION

PRIVATE3.1 REDUCTION

36

REDUCTION
REDUCTION

OpenMP implementation REDUCTION


REDUCTIONREDUCTION

x = x operator expr
x = intrinsic_procedure(x, expr_list)

operator = +, *, -, .AND., .OR., .EQV. or .NEQV.


intrincic_procedure = MAX, MIN, IAND, IOR or IEOR
xX
x
exprxREDUCTION
processassociative nature

REDUCTION

3.2 Other clauses

37

3.2.1 IF( scalar logical expression)

!$OMP PARALLEL IF(N > 1000)


!$OMP DO
doi=1,N
...
enddo
!$OMP END DO
!$OMP END PARALLEL
1000

3.2.2 NUM
NUM__THREADS(scalar integer expression)
!$OMP SECTIONS
4
!$OMP PARALLEL NUM_THREADS(4)
NUM THREADSOMP NUM THREADS
subroutine OMP set num threads

3.2.3 NOWAIT

NOWAIT
!$OMP PARALLEL
!$OMP DO
do i = 1, 1000
a=i
enddo
!$OMP END DO NOWAIT
!$OMP DO
do i = 1, 1000
b=i
enddo
38

!$OMP END DO
!$OMP END PARALLEL

!$OMP END DONOWAIT

NOWAIT
NOWAIT

3.2.4 SCHEDULE( t ype, chunk)

scheduling
!$OMP DO SCHEDULE(type, chunk )
SCHEDULE12
4
1STATIC :

chunk
chunk
3
!$OMP DO SCHEDULE(STATIC, chunk )
do i = 1, 600
...
enddo
!$OMP END DO
chunk6003.6

39

1chunk = 150 4chunk4


0

2chunk = 250 3
chunk

3chunk=300 2
12
4chunk=default chunkOpenMP

3
2DYNAMIC

SCHEDULE(DYNAMIC, chunk ) chunk


chunkchunk=1

3.712
33.7

40

3GUIDED

chunk
chunk OpenMP
implementation
3.8
!$OMP DO SCHEDULE(GUIDED,256)
do i = 1, 10230
...
enddo
!$OMP END DO

41

chunk25610230

DYNAMIC .
3RUNTIME3
RUNTIME
!$OMP DO SCHEDULE(RUNTIME)
OMP SCHEDULE

3.2.5 ORDERED

do i = 1, 1000
A(i) = 2 * A(i-1)
End do
!$OMP ORDERED /!$OMP END ORDERED!$OMP DO
ORDERED
!$OMP DO ORDERED
do i = 1, 1000
!$OMP ORDERED
A(i) = 2 * A(i-1)
!$OMP END ORDERED
enddo
!$OMP END DO

42

The OpenMP run-time library


OpenMP Fortran API
run-time library
OpenMP
Fortran95
omp_libOpenMP-implementation
sizekind

4.1 Execution environment routines


OpenMP Fortran API run-time library

4.1.1 OMP_set_num_threads

subroutine OMP_set_num_threads(number_of_threads)
integer(kind = OMP_integer_kind), intent(in) :: number_of_threads
end subroutine OMP_set_num_threads
Number_of_threads
OMP_set_num_threadsNumber_of_threads
OMP
NUM THREADS

4.1.2 OMP_get_num_threads

1
function OMP_get_num_threads()
integer(kind = OMP_integer_kind) :: OMP_get_num_threads
end function OMP_get_num_threads

43

4.1.3 OMP_get_max_threads
OMP
get num threads OMP get num threads
function OMP_get_max_threads()
integer(kind = OMP_integer_kind) :: OMP_get_max_threads
end function OMP_get_max_threads

4.1.4 OMP_get_thread_num
0OMP_get_num_threads()-1
function OMP_get_thread_num()
integer(kind = OMP_integer_kind) :: OMP_get_thread_num
end function OMP_get_thread_num
0

4.1.5 OMP_get_num_procs

function OMP_get_num_procs()
integer(kind = OMP_integer_kind) :: OMP_get_num_procs
end function OMP_get_num_procs

4.1.6 OMP_in_parallel
OMP_in_parallel
.true. false
function OMP_in_parallel()
logical(kind = OMP_logical_kind) :: OMP_in_parallel
end function OMP_in_parallel
44

true nested serialized


parallel regions

4.1.7 OMP_set_dynamic

subroutine OMP_set_dynamic(enable)
logical(kind = OMP_logical_kind), intent(in) :: enable
end subroutine OMP_set_dynamic
enable=.true.
SMP machine
OMP NUM THREADSsubroutine OMP set num threads

OMP set dynamicOMP DYNAMIC


OpenMP-implementation
implementation

4.1.8 OMP_get_dynamic
truefalseOpenMP-implementation
false
function OMP_get_dynamic()
logical(kind = OMP_logical_kind) :: OMP_get_dynamic
end function OMP_get_dynamic

4.1.9 OMP_set_nested
(true)(false)
subroutine OMP_set_nested(enable)
logical(kind = OMP_logical_kind), intent(in) :: enable
end subroutine OMP_set_nested
OpenMP-implementation

45

OpenMP
OMP NESTED

4.1.10 OMP_get_nested
(true)(false) OpenMP-implementation
false
function OMP_get_nested()
logical(kind = OMP_logical_kind) :: OMP_get_nested
end function OMP_get_nested

4.2 Lock routines


/LOCKSLOCKS!$OMP ATOMIC !$OMP
CRITICAL

ownership of the lock


releases the ownership
locklock

A(1:100,1:100)4
race conditions
!$OMP PARALLEL SHARED(A)
!$OMP CRITICAL
... !works on the matrix A
!$OMP END CRITICAL
!$OMP END PARALLEL
A
4 A1 = A(1:50,1:50) , A2 = A(51:100,1:50) , A3 = A(1:50,51:100) and A4=
A(51:100,51:100)4
race conditions4
OpenMP

1
2
3
4
46

nesting counter

_nest_lock
4.2.1 OMP_init_lock and OMP_init
OMP_init_

subroutine OMP_init_lock(svar)
integer(kind = OMP_lock_kind), intent(out) :: svar
end subroutine OMP_init_lock
subroutine OMP_init_nest_lock(nvar)
integer(kind = OMP_nest_lock_kind), intent(out) :: nvar
end subroutine OMP_init_nest_lock

OpenMP
implementation

4.2.2 OMP_set_lock and OMP_set_nest_lock

subroutine OMP_set_lock(svar)
integer(kind = OMP_lock_kind), intent(inout) :: svar
end subroutine OMP_set_lock
subroutine OMP_set_nest_lock(nvar)
integer(kind = OMP_nest_lock_kind), intent(inout) :: nvar
end subroutine OMP_set_nest_lock

47

4.2.3 OMP_unset_lock and OMP_unset_nest_lock

subroutine OMP_unset_lock(svar)
integer(kind = OMP_lock_kind), intent(inout) :: svar
end subroutine OMP_unset_lock
subroutine OMP_unset_nest_lock(nvar)
integer(kind = OMP_nest_lock_kind), intent(inout) :: nvar
end subroutine OMP_unset_nest_lock

4.2.4 OMP_test_lock and OMP_test_nest_lock


OMP_set_lockOMP_set_nest_lock

function OMP_test_lock(svar)
logical(kind = OMP_logical_kind) :: OMP_test_lock
integer(kind = OMP_lock_kind), intent(inout) :: svar
end function OMP_test_lock
function OMP_test_nest_lock(nvar)
integer(kind = OMP_integer_kind) :: OMP_test_nest_lock
integer(kind = OMP_nest_lock_kind), intent(inout) :: nvar
end function OMP_test_nest_lock
OMP_test_locktruefalseOMP_test_nest_lock

4.2.5 OMP_destroy_lock and OMP_destroy_nest_lock

subroutine OMP_destroy_lock(svar)
integer(kind = OMP_lock_kind), intent(inout) :: svar
end subroutine OMP_destroy_lock
subroutine OMP_destroy_nest_lock(nvar)
48

integer(kind = OMP_nest_lock_kind), intent(inout) :: nvar


end subroutine OMP_destroy_nest_lock

4.2.6

program Main
use omp_lib
implicit none
integer
integer(kind = OMP_lock_kind) :: lck
integer
integer(kind = OMP_integer_kind) :: ID
call OMP_init_lock(lck)
!$OMP PARALLEL SHARED(LCK) PRIVATE(ID)
ID = OMP_get_thread_num()
call OMP_set_lock(lck)
write
write(*,*) "My thread is ", ID
call OMP_unset_lock(lck)
!$OMP END PARALLEL
call OMP_destroy_lock(lck)
end program Main
OMP_init_lock lck
OMP_set_lock lckWrite
OMP_unset_lock write
OMP_destroy_lock
OpenMP!$OMP CRITICAL
program Main
use omp_lib
implicit none
integer
integer(kind = OMP_integer_kind) :: ID
!$OMP PARALLEL SHARED(LCK) PRIVATE(ID)
ID = OMP_get_thread_num()
!$OMP CRITICAL
write
write(*,*) "My thread is ", ID
!$OMP END CRITICAL
!$OMP END PARALLEL
end program Main
A
OpenMP

49

program Main
use omp_lib
implicit none
integer
integer(kind = OMP_lock_kind) :: lck
integer
integer(kind = OMP_integer_kind) :: ID
call OMP_init_lock(lck)
!$OMP PARALLEL SHARED(LCK) PRIVATE(ID)
ID = OMP_get_thread_num()
do while
while(.not.OMP_test_lock(lck))
... !work to do while the thread is waiting to get owner of the lock
enddo
... !work to do as the owner of the lock
!$OMP END PARALLEL
call OMP_destroy_lock(lck)
end program Main
OpenMP

module data_types
use omp_lib, only
only: OMP_nest_lock_kind
implicit none
type number
integer :: n
integer
integer(OMP_nest_lock_kind) :: lck
end type number
end module data_types
!---------------------------------------------------------!
program Main
use omp_lib
use data_types
implicit none
type
type(number) :: x
x%n = 0
call OMP_init_lock(x%lck)
!$OMP PARALLEL SECTIONS SHARED(x)
!$OMP SECTION
call add(x,20)
!$OMP SECTION
call substract(x,10)
!$OMP END PARALLEL
call OMP_destroy_lock(lck)

50

end program Main


!---------------------------------------------------------!
subroutine add(x,d)
use omp_lib
use data_types
implicit none
type
type(number) :: x
integer :: d
call OMP_set_nest_lock(x%lck)
x%n = x%n + d
call OMP_unset_nest_lock(x%lck)
end subroutine add
!---------------------------------------------------------!
subroutine substract(x,d)
use omp_lib
use data_types
implicit none
type
type(number) :: x
integer :: d
call OMP_set_nest_lock(x%lck)
call add(x,-d)
call OMP_unset_nest_lock(x%lck)
end subroutine substract
addx
xSubstractaddsubstract

4.3 Timing routines


OpenMP Fortran API run-time library

4.3.1 OMP_get_wtime

start = OMP_get_wtime()
... !work to be timed
end = OMP_get_wtime()
51

time = end - start

function OMP_get_wtime()
real(8) :: OMP_get_wtime
end function OMP_get_wtime
per-thread times

4.3.2 OMP get wtick

function OMP_get_wtick()
real(8) :: OMP_get_wtick
end function OMP_get_wtick

4.4 The Fortran 90 module omp_lib


OpenMP
implementationFortran 90 omp_lib
Fortran 90 parameter

module omp_lib
implicit none
!Defines standard variable precisions of the OpenMP-implementation
integer
integer, parameter :: OMP_integer_kind = 4
integer
integer, parameter :: OMP_logical_kind = 4
integer
integer, parameter :: OMP_lock_kind = 8
integer
integer, parameter :: OMP_nest_lock_kind = 8
!Defines the OpenMP version: OpenMP Fortran API v2.0
integer
integer, parameter :: openmp_version = 200011
!Gives the explicit interface for each routine of the run-time library
interface
subroutine OMP_set_num_threads(number_of_threads)
integer
integer(kind = OMP_integer_kind), intent
intent(in) :: number_of_threads
end subroutine OMP_set_num_threads
function OMP_get_num_threads()
integer
integer(kind = OMP_integer_kind) :: OMP_get_num_threads
end function OMP_get_num_threads
52

function OMP_get_max_threads()
integer
integer(kind = OMP_integer_kind) :: OMP_get_max_threads
end function OMP_get_max_threads
function OMP_get_thread_num()
integer
integer(kind = OMP_integer_kind) :: OMP_get_thread_num
end function OMP_get_thread_num
function OMP_get_num_procs()
integer
integer(kind = OMP_integer_kind) :: OMP_get_num_procs
end function OMP_get_num_procs
function OMP_in_parallel()
logical(kind = OMP_logical_kind) :: OMP_in_parallel
end function OMP_in_parallel
subroutine OMP_set_dynamic(enable)
logical(kind = OMP_logical_kind), intent
intent(in) :: enable
end subroutine OMP_set_dynamic
function OMP_get_dynamic()
logical(kind = OMP_logical_kind) :: OMP_get_dynamic
end function OMP_get_dynamic
subroutine OMP_set_nested(enable)
logical(kind = OMP_logical_kind), intent
intent(in) :: enable
end subroutine OMP_set_nested
function OMP_get_nested()
logical(kind = OMP_logical_kind) :: OMP_get_nested
end function OMP_get_nested
subroutine OMP_init_lock(var)
integer
integer(kind = OMP_lock_kind), intent
intent(out) :: var
end subroutine OMP_init_lock
subroutine OMP_init_nest_lock(var)
integer
integer(kind = OMP_nest_lock_kind), intent
intent(out) :: var
end subroutine OMP_init_nest_lock
subroutine OMP_destroy_lock(var)
integer
integer(kind = OMP_lock_kind), intent
intent(inout) :: var
end subroutine OMP_destroy_lock
subroutine OMP_destroy_nest_lock(var)
integer
integer(kind = OMP_nest_lock_kind), intent
intent(inout) :: var
end subroutine OMP_destroy_nest_lock
subroutine OMP_set_lock(var)
integer
integer(kind = OMP_lock_kind), intent
intent(inout) :: var
end subroutine OMP_set_lock
subroutine OMP_set_nest_lock(var)
integer
integer(kind = OMP_nest_lock_kind), intent
intent(inout) :: var
end subroutine OMP_set_nest_lock
subroutine OMP_unset_lock(var)
integer
integer(kind = OMP_lock_kind), intent
intent(inout) :: var

53

end subroutine OMP_unset_lock


subroutine OMP_unset_nest_lock(var)
integer
integer(kind = OMP_nest_lock_kind), intent
intent(inout) :: var
end subroutine OMP_unset_nest_lock
function OMP_test_lock(var)
logical(kind = OMP_logical_kind) :: OMP_test_lock
integer
integer(kind = OMP_lock_kind), intent
intent(inout) :: var
end function OMP_test_lock
function OMP_test_nest_lock(var)
integer
integer(kind = OMP_integer_kind) :: OMP_test_nest_lock
integer
integer(kind = OMP_nest_lock_kind), intent
intent(inout) :: var
end function OMP_test_nest_lock
function OMP_get_wtime()
real(8) :: OMP_get_wtime
end function OMP_get_wtime
function OMP_get_wtick()
real(8) :: OMP_get_wtick
end function OMP_get_wtick
end interface
end module omp_lib

54


OpenMP
OpenMP Fortran API run-time library
Linux/Unixcshshellsetenv

> setenv OMP_NUM_THREADS 4


shkshbash
> OMP_NUM_THREADS=4
> export OMP_NUM_THREADS
echo
> echo $OMP_NUM_THREADS
4
>
NT/2000/XP
AUTOEXEC.BAT (AUTOEXEC.BAT )

OMP_NUM_THREADS = 4

5.1 OMP_NUM_THREADS
OpenMP

Linux/Unixcshbash
> setenv OMP_NUM_THREADS 16
***************
> OMP_NUM_THREADS=16
> export OMP_NUM_THREADS
55

5.2 OMP_SCHEDULE
!$OMP DO!$OMP PARALLEL DORUNTIME
!$OMP DO SCHEDULE(RUNTIME)
chunk
chunkSTATIC, DYNAMICGUIDED!$OMP DO
OpenMP implementation
chunk1 STATIC
Linux/Unixcshbash
> setenv OMP_SCHEDULE "GUIDED,4"
*************************
> OMP_SCHEDULE=dynamic
> export OMP_SCHEDULE

5.3 OMP_DYNAMIC
SMP
SMP
OMP_DYNAMICtrue
OMP_DYNAMICOpenMP implementation
Linux/Unixcshbash
> setenv OMP_DYNAMIC TRUE
***********************
> OMP_DYNAMIC=FALSE
> export OMP_DYNAMIC

5.4 OMP_NESTED
OpenMPtrue
1.2

false
Linux/Unixcshbash

56

> setenv OMP_NESTED TRUE


> OMP_NESTED=FALSE
> export OMP_NESTED

End

57

You might also like