You are on page 1of 80

2012 8 24

C
C a.out
compiler

source language
target language
source programtarget

programC C
*1

1950
1980

I II

C
C
[2][1] [4]
*2 [7][3][6]

*1
*2

gcc C C++

4
1
2

.text
.globl _main

3
4

_main :
pushl

% ebp

5
6

movl
subl

% esp , % ebp
$24 , % esp

7
8

movl
movl

$1 , -12(% ebp )
-12(% ebp ) , % eax

9
10

addl
movl

$20 , % eax
% eax , -16(% ebp )

11
12

movl
popl

% ebp , % esp
% ebp

13
14

ret
.subsections_via_symbols

1.1 i386

1.1

C simple.c
int main ()
{
int x ;
int y = 1;
x = y + 20;
}

simple.c 1.1 *3
0/1
CPU simple.c
a.out
CPU

3 main main
4-6 main -16(%ebp) 4
x -12(%ebp) 4 y

7 y $1 1
8 y %eax
9 %eax 20
10 %eax x

*3

i386

1.1
11-13 main *4

1. C

2. 1
3. 2 /
1 3
2

, tokenlexical analysis
syntax analysis
semantic analysis 3
main, x, y

, optimization

\n

int main()\n{\n int x;\n int y = 1;\n x = y + 20;\n}\n


C
*5 C

int, main, (, ), {, int, x, ;, int, y, =, 1, ;, x, =, y, +, 20, ;, }

; C

x, =, y, +, 20
x, =, y + 20 3 y, +,

20 3
1.2

*4

11-12 leave 1

*5

y
1.2

20

x = y + 20

x = y + 20
temp1, temp2

temp1 = y
temp2 = temp1 + 20
x = temp2

temp1, temp2 %eax x -16(%ebp)


y -12(%ebp)

1.2
1.1 gcc -s
C

1.2 C C

x = 0; while (i < 100) { x += i; i++; }

2.1
symbol
alphabet*1
2.1

{a, b, , z} {0, 1} 2

*2 string*3
s s s |s|
2.2

main exercise {a, b, , z}

4, 8 , 0, 11, 01011 {0, 1}


0, 1, 2, 5 2
0 1 empty string

|| = 0

2.2
language

language

*1


*3 word

*2

8
2.3

{0}, {00, 01, 10, 11}, {1, 11, 111, 1111, } {0, 1}

0 2 1
2
{}
2
1 {} 1

2.3
, ,

2.3.1

2 x, y x y x y concatenation
x y xy
2.4

00 11 = 0011, 111 = 111 = 111 2

x x = x = x

x n xn x0 =
i 0, j 0 xi+j = xi xj
2.5

(01)2 = 0101, (01)3 = 010101 2

2 L, M

L M = {s t | s L, t M }
LM
2.6

L = {0, 1}, M = {0, 01, 111} L M = {00, 001, 0111, 10, 101, 1111}

L {} = {} L = L, M = M = 2
L L {} = {} L = L L = L =
{}
L n Ln L0 = {}

i 0, j 0 Li+j = Li Lj

2.3.2
L L L Kleene Kleene closure

L = L L L =
0

i=0

Li

2.4

L+ L positive closure

L = L L L =
+

Li

i=1

= L {}
2.7

L = {a}
L0 = {}
L1 = {a}
L2 = {aa}
L3 = {aaa}

L = {, a, aa, aaa, }, L+ = {a, aa, aaa, }


L = {a, b}

L0 = {}
L1 = {a, b}
L2 = {aa, ab, ba, bb}

L = {, a, b, aa, ab, ba, bb, }, L+ = {a, b, aa, ab, ba, bb, }


L 2
Kleene 0

= 0 1 2
+ 1

+ = 1 2
= {}
2.8

= {0, 1} = {, 0, 1, 00, 01, 10, 11, 000, }

+ = {0, 1, 00, 01, 10, 11, 000, } 2

2.4
*4regular expression

2.1

1.
*4

10
2.
3. a a

4. r, r r | r
5. r, r r r
6. r r
7. r (r)
2
2.9

0 (0 | 1) {0, 1} :

1. 2.1 3 0, 1
2. 2.1 4 0 | 1
3. 2.1 7 (0 | 1)
4. 2.1 6 (0 | 1)
5. 2.1 5 0 (0 | 1)
2

2.2 R L(R)

1. L() =
2. L() = {}
3. L(a) = {a}
4. r, r L(r), L(r ) L(r | r ) = L(r) L(r )
5. r, r L(r), L(r ) L(r r ) = L(r) L(r )
6. r L(r) L(r ) = L(r)
7. r L(r) L((r)) = L(r)
2
2.10

2.9 {0, 00, 01, 000, 001, 010, 011, 0000, }0

0 1 0 :

L(0 (0 | 1) ) = L(0) L((0 | 1) )


= L(0) L((0 | 1))
= L(0) L(0 | 1)
= L(0) (L(0) L(1))
= {0} {0, 1}
= {0} {, 0, 1, 00, 01, 10, 11, }
= {0, 00, 01, 000, 001, 010, 011, }
2

2.5

11

2.1

, , |
| r r rr

r+ rr
r? r |
[abc] a | b | c
[a - z] a | | z
2.11

C 10 0 0

[1 - 9] [0 - 9] 2.3, 2.4, 2.5 2

2.5
finite automaton

statetransition
2.1 1 initial state
accepting state 1

2.1
0 1 0 00

1 *5
2.3 A 5 (Q, , , q0 , F )

Q

*5

deterministic

12

transition function 1
q0 Q
F Q
2
*6

[7]

2.2

a = a = a
*7

2.6
2.1

1. {0, 1} 0 1 0

2. {a, b, c} 1 a 1 b

3. {0, 1} 0 1

2.2 (0 | 1) 0(0 | 1)(0 | 1)


2.3 C \t\n 1

2.4 C 16 0x 0X a, b, c, d, e, f,

A, B, C, D, E, F 1
2.5 C AZ, az, 09,

2.6 {0, 1}

1. 00
*6

*7

[7]

2.6

13

ab

a*
a
a

a+

a
a|b
b
2.2

2. 011
2.7 00(0 | 1)

15

3.1

1.1

lexemeC

identifier
int, while C
, keyword

constant

string literal
+, *, =, ==operator
separator
C

token

IDIF ifRELOP
*1

argc argc
453 453

*1

16

3.2
1
1
CPU

getc() 1

# include < stdio .h >


/* */
# define IF 1
/* */
/* */
int s i m p l e _ l e x i c a l _ a n a l y s i s ()
{
char c ;
/* file : */
while (( c = getc ( file )) != EOF ) {
/* if */
if ( c == i ) {
c = getc ( file );
if ( c == f ) {
return IF ;
}
}
/* */
}
}

C i if int i

3.3

1.

3.3

17

3.1

while

0-9
1-9

3.2 10

2. 1
3.

1 2
while
while 3.1
C 10 0
[1-9][0-9] 3.2

w, h, i, l, e
3.1
3.2
while 1

1. 2
while 3.1 C
3.5 2.5 C

18

<

<

3.3

<

3.4

0-9 | a-z | A-Z | _


a-z | A-Z | _

3.5

3.4

19

<
0

2
=

w
4

h
5

e
8

0-9 | a-z | A-Z | _

a-z | A-Z | _
10

11

12
0-9 | a-z |
A-Z | _

3.6

3.1

2. ()
<<= 2
3.3 <
= <<=

2 3.3 3.4 <


< =

<
< 1
<

3.4

<, <=, while, C

20

3.6

3.4.1
1
3.3

getc()
ungetc() *2
getc(), ungetc()
int nextchar ( FILE * infile )
{
return getc ( infile );
}
void backchar ( int c , FILE * infile )
{
ungetc (c , infile );
}

3.4.2

<, <=, while,


0, 1, 2, 3
# define LT
# define LE

0
1

# define WHILE 2
# define ID 3

int
i, main
char yytext[80]; *3

int state; 0
int initial state; 0
FILE *file;

*2

*3

ungetc()
getc()

3.4

3.4.3
nexttoken()

yytext

1
2
3
4
5

# include < ctype .h >


int fail ( char *s , int i )
{
int j ;

6
7

for ( j = 0; j < i ; j ++) {


backchar ( s [ j ] , file );

9
10

switch ( start ) {
case 0:

11
12

start = 4;
break ;

13
14

case 4:
start = 10;

15
16

break ;
case 10:
/* */
break ;

17
18
19
20

}
return start ;

21
22

23
24

int nexttoken ()
{

25
26

int c ; /* */
char s [80]; /* */

27
28

int i = 0;

29
30

/* */
state = 0;

31
32

start = 0;
yytext [0] = \0 ;

33
34

while (1) {

35
36
37
38
39

switch ( state ) {
case 0:
c = nextchar ( file );
if ( c == || c == \ t || c == \ n )
state = 0; /* */

40
41

else if ( c == < ) {
state = 1;

42
43

s [ i ++] = c ;
} else /* 1 */

44
45

state = fail (s , i );
break ;

46

case 1:

21

22
47
48
49

c = nextchar ( file );
if ( c == = ) {
state = 2;

50
51

s [ i ++] = c ;
} else

52
53

state = 3;
break ;

54
55

case 2:
return LE ;

56
57

case 3:
backchar (c , file ); /* 1 */

58
59

return LT ;
case 4:

60
61

c = nextchar ( file );
s [ i ++] = c ;

62
63

if ( c == w )
state = 5;

64
65

else
state = fail (s , i );

66
67

break ;
case 5:

68
69

c = nextchar ( file );
s [ i ++] = c ;

70
71

if ( c == h )
state = 6;

72
73

else
state = fail (s , i );

74
75

break ;
case 6:

76
77

c = nextchar ( file );
s [ i ++] = c ;

78

if ( c == i )

79
80

state = 7;
else

81
82

state = fail (s , i );
break ;

83
84

case 7:
c = nextchar ( file );

85
86

s [ i ++] = c ;
if ( c == l )

87
88

state = 8;
else

89
90

state = fail (s , i );
break ;

91
92

case 8:
c = nextchar ( file );

93
94

s [ i ++] = c ;
if ( c == e )

95
96

state = 9;
else

97
98

state = fail (s , i );
break ;

99
100

case 9:
return WHILE ;

101

case 10:

3.4
102
103

c = nextchar ( file );
s [ i ++] = c ;

104

if ( isalpha ( c ) || c == _ )

105
106

state = 11;
else

107
108

state = fail (s , i );
break ;

109
110

case 11:
c = nextchar ( file );

111
112

s [ i ++] = c ;
if ( isalpha ( c ) || isdigit ( c ) || c == _ )

113
114

state = 11;
else

115
116

state = 12;
break ;

117
118

case 12:
backchar (c , file );
s [ i ] = \0 ; /* c \0 */
strcpy (s , yytext );

119
120
121
122
123
124

return ID ;
}
}
}

statecstate
35
fail()
start

*4
3.6 1
< 1
< <= 1 2

12 1
2 12 backchar() 1

6 a
while w, h, a

s i

fail() s i fail()
12 s yytext

*4

isalpha(c) isdigit(c) c c

23

24

3.4.4 lex
3.4
3.6 3.2

3.6
[7] 3.4

Unix lex 3.4


lex

%{
# define LT 0
# define LE 1
# define WHILE 2
# define ID 3
%}
%%
[ \ t \ n ]+
" <"
" <= "
while

{}
{ return LT ;}
{ return LE ;}
{ return WHILE ;}

[A - Za - z ][0 -9 A - Za - z_ ]+
%%

{ return ID ;}

%{ %} %%
C
yytext

lex

3.5
nextchar() backchar()
buer

nextchar() backchar() 1
3.7ungetc()
*5
*5

getc() 1
gets()

3.5

25

backchar()

nextchar()

3.7

nextchar()backchar() 2048
char forward

int buffer [2048];


/* */
int nextchar ()
{
int i ;
if ( forward == 1023) { /* */
for ( i = 1024; i < 2048; i ++) {
buffer [ i ] = getc ( file );
}
forward ++;
}
else if ( forward == 2047) {
/* */
for ( i = 0; i < 1024; i ++) {
buffer [ i ] = getc ( file );
}
forward = 0;
}
else forward ++;
return buffer [ forward ];
}
void backchar ()
{
if ( forward == 0) {
forward = 2047;
} else {
forward - -;

26

}
}

3.4
backchar()
nexttoken() s

nexttoken() nexttoken()

fail() beginning
forward
# include < ctype .h >
int fail ()
{
forward = beginning ;
switch ( start ) {
case 0:
start = 4;
break ;
case 4:
start = 10;
break ;
case 10:
/* */
break ;
}
return start ;
}
int nexttoken ()
{
int c ; /* */
/* */
state = 0;
start = 0;
yytext [0] = \0 ;
while (1) {
switch ( state ) {
case 0:
c = nextchar ();
if ( c == || c == \ t || c == \ n ) {
state = 0; /* */
beginning ++;
}
else if ( c == < )
state = 1;
else /* 1 */
state = fail ();
break ;
case 1:

3.5
c = nextchar ();
if ( c == = )
state = 2;
else
state = 3;
break ;
case 2:
return LE ;
case 3:
backchar (); /* 1 */
return LT ;
case 4:
c = nextchar ();
if ( c == w )
state = 5;
else
state = fail ();
break ;
case 5:
c = nextchar ();
if ( c == h )
state = 6;
else
state = fail ();
break ;
case 6:
c = nextchar ();
if ( c == i )
state = 7;
else
state = fail ();
break ;
case 7:
c = nextchar ();
if ( c == l )
state = 8;
else
state = fail ();
break ;
case 8:
c = nextchar ();
if ( c == e )
state = 9;
else
state = fail ();
break ;
case 9:
return WHILE ;
case 10:
c = nextchar ();
if ( isalpha ( c ) || c == _ )
state = 11;
else
state = fail ();
break ;

27

28
case 11:
c = nextchar ();
if ( isalpha ( c ) || isdigit ( c ) || c == _ )
state = 11;
else
state = 12;
break ;
case 12:
backchar ();
strncpy (& buffer [ beginning ] ,
forward - beginning + 1 ,
yytext );
return ID ;
}
}
}

29

4.1
4.1.1
context free grammar
C if

selection statement if ( expression ) statement


selection statement if ( expression ) statement else statement
production rule
rule 1
nonterminal symbolterminal symbol

selection statement, expression, statement if, (, ), else

1 start symbol

1 +

list list + digit


list list digit
list digit

(4.1)

digit 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9
list, digit +,

30

1. V
2. T
3. P
4. S V 1
1 empty string
BNF Backus

Naur FormBNF

list ::= list + digit


list ::= list digit
list ::= digit
digit ::= 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9

1. (a, b, c, ) (+, , )

2. (A, B, C, )S ()
(expr, stmt )

3. (X, Y, Z, )
4. (w, x, y, z, )
5. (, , , ) (
)

6. S

4.1.2
G = (V, T, P, S) T

1
S S-

language

4.2
4.1

31

(4.1)
list list + digit

(4.2)

list list digit

(4.3)

list digit

(4.4)

digit 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9

(4.5)

(4.2) list list + digit


9 5 + 2

list list + digit

(4.2)

list digit + digit

(4.3)

digit digit + digit

(4.4)

9 digit + digit

(4.5)

9 5 + digit

(4.5)

95+2

(4.5)

A A

A A derive
A 1 0 n

1 n S sentential form
sentence

G L(G) L(G) = {w | S w} S G
G G

4.2

parse tree
(4.1)
9 5 + 2 4.1

1.
2.
3.
4. A X1 , X2 , , Xn
A X1 X2 Xn A A

result

32

list
list
digit
list

digit

digit
9

4.1

4.2.1

l r

list list + digit

{listl .v = listr .v + digit.v; }

list list digit

{listl .v = listr .v digit.v; }

list digit {list.v = digit.v; }


digit 0

{digit.v = 0; }

digit 1

{digit.v = 1; }

digit 9

{digit.v = 9; }

v
9 5 + 2
4.2

4.3
G S

E E + E | E E | (E) | E | id (id + id)

4.4

33

list
list.v = list1.v - digit.v = 4

list.v = list1.v + digit.v = 6

list

digit

digit.v = 2

list
digit

list.v = 9

digit.v = 5
digit.v = 9 digit
9

4.2

E E (E) (E + E) (id + E) (id + id)


E E (E) (E + E) (E + id) (id + id)
2

leftmost derivation
rightmost derivation1
2
*1

4.4
1 2 ambiguous
2
1
4.1
(4.1)
1

stmt if (expr) stmt


| if (expr) stmt else stmt
| S1 | S2 | S3
expr E1 | E2
if 4.3
2

if (E1 ) S1 else if (E2 ) S2 else S3

*1

34
stmt

if

stmt

expr
E1
if

expr

E2

stmt

else

S1

stmt
S2

stmt

if

expr

stmt

else

E1

stmt
S2

if

expr

E2

stmt
S1

4.3 2

else then
4.3

stmt matched stmt


| unmatched stmt
matched stmt if (expr) matched stmt else matched stmt
| S1 | S2 | S3
unmatched stmt if (expr) stmt
| if (expr) matched stmt else unmatched stmt
expr E1 | E2

4.5

A A

4.6

35

A A
A-

A A1 | A2 | | Am | 1 | 2 | | n
i A A-

A 1 A | 2 A | | n A
A 1 A | 2 A | | m A |

4.6
L = {wcw | w (a | b) } c w

4.1 8 - 2 + 6

string string + string | string string | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9


4.2 S 0S1 | 01
4.3

S A1B
A 0A |
B 0B | 1B |
00101
4.4

E E+T |T
T T F |F
F (E) | id

37

5.1

1
n

O(n3 )

O(n)

2
top-down parsing
bottom-up parsing

38

type

array

num

simple

dotdot

of

num

type

simple

integer

array

num

dotdot

num

of

integer

5.1

5.2
Pascal

type simple | id | array [ simple ] of type


simple integer | char | num dotdot num

(5.1)

array [ num dotdot num ] of integer

5.1
type
array type array
type array[ simple ]of type
type
array array
array
[

predictive parsing
1

5.3 LL(1)

39

G G

1. G
2. A 1 | 2 | | n a
A

4.4 4.5

5.3 LL(1)
LL(1) LL(1) grammar
LL(1)

5.3.1 FIRST FOLLOW


FIRST FOLLOW 2

FIRST() S FIRST()
A A
FOLLOW(A) A
$ FOLLOW(A)
5.1

E T E
E +T E |
T FT
T F T |
F (E) | id

40

FIRST

FIRST(E) = FIRST(T ) = FIRST(F ) = {(, id}


FIRST(E ) = {+, }
FIRST(T ) = {, }
FIRST(T E ) = FIRST(T ) = {(, id}
FIRST() = {}
FIRST(+T E ) = {+}
FIRST(F T ) = FIRST(F ) = {(, id}
FIRST(F T ) = {}
FIRST((E)) = {(}
FIRST(id) = {id}
FOLLOW

FOLLOW(E) = FOLLOW(E ) = {), $}


FOLLOW(T ) = FOLLOW(T ) = {+, ), $}
FOLLOW(F ) = {+, , ), $}
2

5.3.2 FIRST
1. a FIRST(a) = {a}
2. FIRST() = {}
3. X a a FIRST(X)
4. X FIRST(X)
5. X Y1 Y2 Yn
Y1 FIRST(Y1 ) FIRST(X)

Y1 FIRST(Y1 ) {} FIRST(X) Y2

FIRST(Y2 ) FIRST(X) Y2
FIRST(Y2 ) {} FIRST(X) ()

Y1 Y2 Yn FIRST(X)
6. FIRST(X1 X2 Xn ) 5
5 X Y1 Y2
x FIRST(X)

1. x FIRST(Y1 )

2. Y1 x FIRST(Y2 )
Y1 FIRST(Y2 )

FIRST(X) 5

5.3 LL(1)

41

{$}

FOLLOW(E)

FOLLOW(E')

FOLLOW(T)

FOLLOW(T')

FIRST(')')

FIRST(E')
FOLLOW(F)
FIRST(T')

5.2

FOLLOW

5.3.3 FOLLOW
1. FOLLOW(S) $
2. FOLLOW
A B FIRST() {} FOLLOW(B)
A B FIRST() FOLLOW(A)
FOLLOW(B)
A B FOLLOW(A) FOLLOW(B)
FOLLOW()
3

S $
A B FIRST() FOLLOW(B)
B

A B A B
FOLLOW(A) FOLLOW(B) A
B

E T E FIRST(E ) {} FOLLOW(T )

FIRST(E ) FOLLOW(T ) FIRST(E ) {}


E +T E | E
FOLLOW(E ) FOLLOW(T ) FOLLOW(E )
FOLLOW(T )
5.2
FOLLOW X Y
X Y X > Y X
Y 5.2 FOLLOW 5.1

42

5.3.4 LL(1)
DIRECTOR
FIRST FOLLOW DIRECTOR
A DIRECTOR(A, )

DIRECTOR(A, ) =

FIRST()
(FIRST() {}) FOLLOW(A)

DIRECTOR(A, ) A
FIRST()
A A
FOLLOW(A)
5.2

5.1

DIRECTOR(E, T E ) = FIRST(T ) = {(, id}


DIRECTOR(E , +T E ) = FIRST(+T E) = {+}
DIRECTOR(E , ) = FOLLOW(E ) = {), $}
DIRECTOR(T, F T ) = FIRST(F ) = {(, id}
DIRECTOR(T , F T ) = FIRST(F T ) = {}
DIRECTOR(T , ) = FOLLOW(T ) = {+, ), $}
DIRECTOR(F, (E)) = FIRST((E)) = {(}
DIRECTOR(F, id) = FIRST(id) = {id}
2

LL(1)
G A |

DIRECTOR(A, ) DIRECTOR(A, ) =

(5.2)

G LL(1) LL(1)

5.3

5.1

DIRECTOR(E , +T E ) DIRECTOR(E , ) =
DIRECTOR(T , F T ) DIRECTOR(T , ) =
DIRECTOR(F, (E)) DIRECTOR(F, id) =
LL(1) 2

5.4

5.4
A
DIRECTOR
(5.1) lookahead
nexttoken()
token *1

int lookahead ; /* */
int nexttoken (); /* */
/* n t */
void addChild ( node n , int t );
/* n */
node firstChild ( node n );
/* child */
node nextSibling ( node n );
void match ( int t )
{
if ( lookahead == t ) {
lookahead = nexttoken ();
} else {
/* */
}
}
void type ( node n )
{
node child ;
if ( lookahead == INTEGER || lookahead == CHAR
|| lookahead == NUM ) {
addChild (n , simple );
child = firstChild ( n );
simple ( child );
} else if ( lookahead == HAT ) {
addChild (n , HAT );
addChild (n , ID );
child = firstChild ( n );
match ( HAT , child );
child = nextSibling ( child );
match ( ID , child );
} else if ( lookahead == ARRAY ) {
addChild (n , ARRAY );
addChild (n , LPAREN );
addChild (n , simple );
addChild (n , RPAREN );
addChild (n , OF );
addChild (n , type );
child = firstChild ( n );
match ( ARRAY , child );
*1

C #define

43

44

child = nextSibling ( child );


match ( LPAREN );
child = nextSibling ( child );
simple ( child );
child = nextSibling ( child );
match ( RPAREN );
child = nextSibling ( child );
match ( OF );
child = nextSibling ( child );
type ( child );
} else {
/* */
}
}
void simple ( node n )
{
node child ;
if ( lookahead == INTEGER ) {
addChild (n , INTEGER );
child = firstChild ( n );
match ( INTEGER , child );
} else if ( lookahead == CHAR ) {
addChild (n , CHAR );
child = firstChild ( n );
match ( CHAR , child );
} else if ( lookahead == NUM ) {
addChild (n , NUM );
addChild (n , DOTDOT );
addChild (n , NUM );
child = firstChild ( n );
match ( NUM );
child = nextSibling ( child );
match ( DOTDOT );
child = nextSibling ( child );
match ( NUM );
} else {
/* */
}
}

5.5 LL(1)
G

1. G
2. A 1 | 2 | | n a
A

5.5 LL(1)

5.5.1

2
*2

4.4

5.4

C if
S iES | iESeS | a
Eb

S iESS | a
S eS |
Eb
LL(1) LL(1)
S eS S
S

void S_dash() { match(e); S(); }


2
4.4 else then

5.5.2

expr expr + term


term 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9
expr()

expr() { expr(); match(PLUS); term(); }


expr()

*2

nondeterministic

45

46

5.5.3 DIRECTOR
2 LL(1) (5.2)

5.5

S aBd, B b | bc LL(1) DIRECTOR(B, b)

DIRECTOR(B, bc)

void S() { match(a); B(); match(d); }


void B() { match(b); { match(b); match(c); } }
match(b) match(b); match(c);
*3
abcd

1. S()
2. S() B()
3. B() match(b) abd abcd
b
B()

4. B() match(b); match(c); abcd

S aBd, B b(c | )

void S() { match(a); B(); match(d); }


void B() { match(b); { if (lookahead == c) match(c); } }
LR(1)

*3

5.5 LL(1)

5.1 FIRST FOLLOW

SD
A aA |
B bAC | AcD
C bC | Ac
D BA | d
5.2 5.1 LL(1)
5.3 LL(1)

S AaAb | BbBa
A
B

47

49

4.6
LL(1)

C int x = 10.0 + 2; int x = (int)(10.0 +


(float)2);(1) 2 float (2) 10.0 + 2.0
float (3) int x
C

semantic analysis

6.1




static

symbol table

50

6.1.1

lexname MAXLEN
lextoken 2

MAXENT
typedef struct table_ent {
char lexname [ MAXLEN ];
int lextoken ;
};
table_ent lextable [ MAXENT ];

int insert(char *s, int t) s, t

-1

int lookup(char *s) s


-1
C

1.

2.

3.
insert lookup

1 lexname char malloc()


2
3 *1

6.1.2

1.

*1

[5]

6.2

2.

scope
C

1. global variable

2.
3. local variable

4.

1 2, 3

1. extern

2.

3.

4.

5.
C
goto

I ML II Java
C

6.2

51

52

El.val = Er.val + T.val

oat

int

T.val = y.val
E.val = T.val E

T.val = x.val

y.valfloat

6.1

I ML
x div y x, y int
4.2

list list div digit {listl .type =


if (listr .type == int && digit.type == int)
then int else }
list digit {list.type = digit.type; }
digit 0 {digit.type = int; }

digit 9 {digit.type = int; }

4.2

C
int, signed int, unsigned int, long, signed long, unsigned long, short,
x + y x float y int
y float
float *2 y
x y y
float y
float 6.1

*2

[6] A6.5

6.3

6.3
6.2
4.2

6.3.1
2 2 32

+ 9, 4 / 2 infix notation
2 prefix

notation2 postfix notation

6.1 E

1. E E E
2. 2 op E1 op E2 E1 E2 op
E1 , E2 E1 , E2
3. (E) E E E
2
6.1

32 + 3 32 3 + 9 + 4 3 9 4 3 +

(1 + 2) 4 1 2 + 4 2
1
1

1. push
2. op 2 x, y
popx op y
6.1

6.3.2
1

53

54

E E+E
E EE
E EE
EE /E

(6.1)

E (E)
E0

E9

(6.1) putchar

E E+E

{putchar( + ); }

E EE

{putchar( ); }

E EE

{putchar( ); }

EE /E

{putchar( / ); }

E (E)
E0

{putchar( 0 ); }

E9

{putchar( 9 ); }

(6.1)
(3 + 4) 5
6.2

6.2 3 4 + 5

6.4
6.2
(6.1)

6.2 G G
translation scheme
G underlying grammar
semantic action2
6.2

(6.1)

6.4

55

E putchar('*');
E
(
E

E
)
putchar('+');

putchar('5');

E putchar('4');

E putchar('3');

3
6.2 (3 + 4) 5

E E + E {putchar( + ); }
E E E {putchar( ); }
E E E {putchar( ); }
E E / E {putchar( / ); }
E (E)

(6.2)

E 0 {putchar( 0 ); }

E 9 {putchar( 9 ); }
2

6.3

(6.2) 4 + 3 5

6.3
4 3 5 +
6.4

(6.2) 4.5

56

E
E

4 putchar('4'); E

putchar('+');

putchar('*');

3 putchar('3'); 5 putchar('5);
6.3

4 + 3 5

E (E)E
| 0 {putchar( 0 ); } E
|
| 9 {putchar( 9 ); } E
E + E {putchar( + ); } E
| E {putchar( ); } E
| E {putchar( ); } E
| /E {putchar( / ); } E
|
3 (1 + 2) 6.4

6.4.1

X.v X
v
v X attribute

6.5

6.5

57

putchar('3');

E'

putchar('*');

E'

E'

putchar('1');

E'

putchar('+'); E'

putchar('2');

E'

6.4 (6.2) 3 (1 + 2)

1
S L

6.5.1

v = E
E 1

58

2
6.3 A A x A.x = f (c1 , c2 , , cn )
c1 , c2 , , cn A x A
synthesized attribute2
6.4 A A x A.x = f (c1 , c2 , , cn )
c1 , c2 , , cn A x A
inherited attribute2
A

A x x

B y A y

6.5

l, r

E E+T

{El .val = Er .val + T.val; }

ET

{E.val = T.val; }

T T F

{Tl .val = Tr .val F.val; }

T F

{T.val = F.val; }

F (E)

{F.val = E.val; }

F digit

{F.val = digit.lexval; }

2
6.6

L in

DT L

{L.in = T.type; }

T int

{T.type = integer; }

T f loat

{T.type = f loat; }

L L, id

{Lr .in = Ll .in; addtype(id.entry, Ll .in); }

L id

{addtype(id.entry, L.in); }

1 1

6.5.2 S
6.5 S S-attributed

definition
1.

6.5
2.
2
6.7

6.5 S 2

S 1

6.5.3 L
6.6 L L-attributed

definition
1.
2. A X1 X2 Xn Xj
X1 , X2 , , Xj1 A
3. A X1 X2 Xn Xj
Xj

4. A X1 X2 Xn A

2
6.8

L type in

*3 entry id
entry

addtype(id.entry, L.in) id.entry L.in


D T {L.in = T.type; } L
T int {T.type = integer; }
T float {T.type = f loat; }
L {Lr .in = Ll .in; } L , id {addtype(id.entry, Ll .in); }
L id {addtype(id.entry, L.in); }
2
6.6 1 4 L
S S L

S L 1

6.9

6.8 int id, id, id

6.5 L id

*3

int i, j; int i, j int

59

60

L1.in = T.type

int

T.type = integer

L2.in = L1.in

L3.in = L2.in

L3

id1

L1

L2

id2

id3

addtype(id3.entry , L1.in)

addtype(id2.entry, L2.in)

addtype(id1.entry, L3.in)

6.5 L

T.type = integer;
T.type L1 .in = T.type;

61

CPU
CPU
Pentium gcc
CPU

C ML, Java

C
CPU
*1 2
C

7.1
7.1.1

CPU

*2 1

address 0 32 232 1

*1
*2

62

OS

7.1

OS OS



malloc()

7.1
OS

*3
*3

7.1

malloc()

7.1.2 CPU
CPU
1

Pentium
1. general-purpose register
%eax, %ebx, %ecx,

%edx
base pointer%ebp stack pointer%esp

2. condition flag
%zf %sf

3. instruction pointerprogram counter


CPU %eip

7.1.3

Pentium AT&T

[]

[:] 1 [, 2 , , n ]


1.1 3

*4 2

relocatable
*4

63

64
1.

2.
( R ) (%R)( R + n) n(%R)

*5
$
Pentium

2 2 addl, subl, imull


2 add %ebx,%eax

%ebx %eax %eax

1 negdec1 inc1

1 inc %eax

%eax 1 %eax

2 1 2

movl movl %eax,%ebx %eax %ebx

2 1 2

1 cmpl

cmpl $4,%eax 4 %eax

zf = 0, sf = 0

4 < %eax

zf = 1, sf = 0

4 = %eax

zf = 0, sf = 1

4 > %eax

jmp 1

%eip

jge

sf = 0 1 2

*5

7.2

65

(x, y 1 2 )
zf = 0 sf = 0

(x < y)

jge

sf = 0

(x y)

je

zf = 1

(x = y)

jne

zf = 0

(x = y)

jl

sf = 1

(x > y)

zf = 1 sf = 1

(x y)

jg

jle

%esp

push
pop 7.1

push %eax %eax

%esp 1 4 *6 pop %ebx


%ebx %esp 4

call 1

ret

7.2
main
foo

foo main

foo main
foo

foo foo
*7 foo

foo

C
int foo ( int x )
{
int y = x * x ;

*6
*7

1 32 4

66

esp

esp
ebp

esp
eip
1

esp

...

esp

eip

eip

...

...

...

...

foo() l.5

foo() l.6

ebp
main() l.7

ebp

eip

ebp
main() l.8

7.2

ebp
foo() l.4

foo

return y +2;
}

main C
int main ()
{
foo (1);
}

foo
1

_foo :

2
3

pushl
movl

% ebp
% esp , % ebp

4
5

subl
movl

$24 , % esp
8(% ebp ) , % eax

6
7

imull
movl

8(% ebp ) , % eax


% eax , -12(% ebp )

8
9

movl
addl

-12(% ebp ) , % eax


$2 , % eax

10
11

movl
popl

% ebp , % esp
% ebp

12

ret

main foo

movl

$1 , (% esp )

call

_foo

foo foo
foo 7.2

7.2
1. main 7 : (%esp)
1 x
*8

2. main 8 : foo call ( %eip +1)


%eip foo foo
3 %eip

foo 3
3. foo 3 :
4. foo 4 : %ebp foo
%ebp

5. foo 5 : %esp %ebp


foo %ebp
%esp

6. foo 6 : %esp 24
6 y *9

7. foo 7-9 : y x 8(%ebp)


2
foo 7.3

1. foo 10-11 : Pentium


%eax y + 2
%eax

2. foo 12 : %ebp %esp


5 %ebp
%esp

3. foo 13 : %ebp
4
%ebp

4. foo 14 : foo ret


%eip 2

%eip %eip
call
+1 call

*8

*9 -12(%ebp) 3

67

68

esp

y
y+2
esp
ebp

ebp
eip

eip

eip

esp

...

...

...

...
ebp

eax

ebp

eip
foo() l.10-11

foo() l.12

7.3

foo() l.13

foo

esp

foo() l.14

69

C
7
Pentium AT&T

8.1 C
8.1.1
C
{ 1 ; n ; 1 ; m ; }

C C

1 , , n
1

n
1

push
1 , , m

70

int x = 10;
x -16(%ebp)
-16(%ebp) x

movl

-16(% ebp ) , 10

pop

8.1.2
C
if () 1 else 2

cmpl R, 0
je L1
1

jmp L2
L1 :

L2 :
C 0 1 0 2

8.1.3
while while

while ()

L1 :

cmpl R, 0
je L2
jmp L1
L2 :

8.1 C
while break while

jmp L2 continue
jmp L1

8.1.4
C *1
7.2
v v = e ; loc(v)
v

e R
movl R, loc(v)
i = foo(); e
%eax

call foo
movl %eax, loc(i)
e
inst e1 e2

e1 R
inst e2 , R
x + y

movl loc(x), R
addl loc(y), R
a b + y

movl loc(a), R
imul loc(b), R
movl R, R
addl loc(y), R
a b R 3 R
R

*1

e e
I

71

72

8.1.5

a b + y

movl loc(a), R
imul loc(b), R
movl R, R
addl loc(y), R
a b R R
R
3 1

movl loc(a), R
imul loc(b), R
addl loc(y), R

1
100
40 30

optimization

8.2

while

8.2.1
three-address code

[2] 8.1

1. x = y op z op

8.2

73

2. x = op y op
3. x = yy x
4. goto LL
5. if x op y goto Lop x op y L

1
2
x + y z

t1 = y * z
t2 = x + t1

8.2.2
6.4 S L

S id = E
E E+E
E EE
E E
E (E)
E id
id = id + id * id 8.1

E 8.1

*2 val(id)
id

t4 = val(id2 )

E4

t1 = val(id3 )

E1

t2 = val(id4 )
t3 = t1 t2

E2
E3

t5 = t4 + t3

E5

id1 = t5

*2

74

id1

E5

t5

E4
t4

id2

E1

E3 t3

E2

t2

t1

id3

id4

8.1 id = id + id * id

S
S place
newtemp()
gencode()

S id = E {gencode(id =E.place); }
E {E.place = newtemp(); }
E1 + E2 {gencode(E.place = E1 .place + E2 .place); }
E {E.place = newtemp(); }
E1 E2 {gencode(E.place = E1 .place E2 .place); }
E {E.place = newtemp(); }
E1 {gencode(E.place = E1 .place); }
E {E.place = newtemp(); }
(E1 ) {gencode(E.place = E1 .place); }
E {E.place = newtemp(); }
id {gencode(E.place = val(id)); }
while

while
S while (E) S
r

8.2
E

L1 :

if E.place = 0 goto L2
Sr
goto L1
L2 :
S
S begin, end

L1 , L2 newlabel()

S {S.begin = newlabel(); S.end = newlabel(); }


{gencode(S.begin : ); }
while (E) {/ E /}
{gencode(if E.place = 0 goto S.end); }
S {/ Sr /}
{gencode(goto S.begin); }
{gencode(S.end : ); }

75

77

[1] Alfred V. Aho, Monica S. Lam, Ravi Sethi, and Jerey D. Ullman. Compilers Principles, Techniques, & Tools (Second Edition). Addison Wesley, 2007.
[2] Alfred V. Aho, Ravi Sethi, and Jerey D. Ullman. Compilers - Principles, Techniques, and Tools. Addison-Wesley, 1986. : III
.

[3] . . , 1999.
[4] . . , 2001.
[5] A. V. , J. E. , J. D. . I.
, 1977.

[6] B. W. , D. M. . C 2 . , 1989.
[7] J. , R. , J. . I [ 2
]. , 2003.

79

1
1.1

1.2

x, =, 0, ;, while, (, i, <, 100, ), {, x, +=, i, ;, i, ++, ;, }

4
4.1

string

string

string
string

string

string

string

string

string

string

+,

80
4.2

0 n (n 1) 1 n

0, 1 {, }

4.3

S A1B 0A1B 00A1B 001B 0010B 00101B 00101


S A1B A10B A101B A101 0A101 00A101 00101
4.4

E T E
E +T E |
T FT
T F T |
F (E) | id

You might also like