Professional Documents
Culture Documents
Mark McDermott
Spring 2018
Simulator
gcc as ld Debugger
.c .s .coff .elf …
§ APCS defines
– Use of registers
– Use of stack
– Format of stack-based data structure
– Mechanism for argument passing
dynamic area
§ NOTE: Procedures with less than four
parameters are more effective stack pointer
stack area
§ Save code pointer (the value of pc) allows the save code pointer [fp, #0] ¬fp points
function corresponding to a stack backtrace return link value [fp, #-4]
structure to be located return sp value [fp, #-8]
return fp value [fp, #-12]
§ Entry code {saved v7 value}
MOV ip, sp ; save current sp,
; ready to save as old sp
{saved v6 value}
STMFD sp!, {a1-a4, v1-v7, sb, fp, ip, lr, pc} ;as needed {saved v5 value}
SUB fp, ip, #4 {saved v4 value}
{saved v3 value}
§ On function exit {saved v2 value}
return link value pc {saved v1 value}
return sp value sp {saved a4 value}
return fp value fp {saved a3 value}
{saved a2 value}
§ If no use of stack, can be simple as {saved a1 value}
MOV pc, lr ; or
BX lr
KDIR:= /usr/src/plnx_kernel
LIBPATH=$(KDIR)/lib
INCPATH=$(KDIR)/include
CC := arm-linux-gnueabihf-gcc
dm:
$(CC) $(CFLAGS) dm.c -o $@
clean:
rm dm
# obj-m is a list of what kernel modules to build. The .o and other objects will be automatically built from the
# corresponding .c file - no need to list the source files explicitly.
obj-m := zed_gpio_int.o
# KDIR is the location of the kernel source. The current standard is to link to the associated source tree
# from the directory containing the compiled modules.
KDIR:= /usr/src/plnx_kernel
# PWD is the current working directory and the location of our module source files.
PWD := $(shell pwd)
# default: is the default make target. The rule here says to run make with a working directory of the directory
# containing the kernel source and compile only the modules in the PWD (local) directory.
default:
clean::
$(RM) .skeleton* *.cmd *.o *.ko *.mod.c *.exe
$(RM) -R .tmp*
int gpio_set_pin(unsigned int target_addr, unsigned int pin_number, unsigned int bit_val)
{
unsigned int reg_data;
reg_data = *address;
if (bit_val == 0) {
reg_data&= ~ONE_BIT_MASK(pin_number);
*address = reg_data;
} else {
reg_data |= ONE_BIT_MASK(pin_number);
*address = reg_data;
}
// Prototypes
/* --------------------------------------------------------------------------
* init_spi: Initialize SPI. Two modes of operation
* channel = 0: H1KP Digital Channel
* channel = 1: H1KP Digital plus Analog Channel
* channel = 2: ADC -- 32 bit word
* channel = 3: DAC -- 32 bit word
* ------------------------------------------------------------------------ */
// -----------------------------------------------------------------------
// Enable IOMUX ports for CSPI1
// -----------------------------------------------------------------------
if ((channel == 0) || (channel == 1)) {
pm (0x020e0144, 0x00000001); // Select ALT1 for CSPI1_SCLK
pm (0x020e0148, 0x00000001); // Select ALT1 for CSPI1_MISO
pm (0x020e014c, 0x00000001); // Select ALT1 for CSPI1_MOSI
pm (0x020e01cc, 0x00000001); // Select ALT1 for CSPI1_SS0
pm (0x020e07dc, 0x00000002); // Select ALT1 for CSPI1_MISO input
pm (0x020e0518, 0x000030b0); // Select 100K PD on MISO input
pm (0x020e024c, 0x00000000); // Select ALT1 for CSPI1_SS1
}
} // End of init_spi routine
#!/bin/sh #!/bin/sh -e
# --------------------------------------------- # --------------------------------------------
# Manual way to setup device nodes and install # Automatically make nodes and insert kernel
# kernel modules
# modules on boot up
# ---------------------------------------------
# --------------------------------------------’
# ### BEGIN INIT INFO
# Make a device node: # Provides: imod
# # Required-Start:
# Required-Stop:
/bin/rm /dev/spi_int # Should-Start:
/bin/mknod /dev/spi_int c 245 0 # Should-Stop:
# Default-Start: S
# # Default-Stop:
# Load kernel module of the SPI device driver: # Short-Description: Make the various device
#
# nodes and insert kernel modules
/sbin/rmmod spi_int
# Description: Located in /etc/init.d
/sbin/insmod ./spi_int.ko
### END INIT INFO
if[ $? != 0 ];then
printf"$0: *** Error: insmod failed\n" PATH='/sbin:/bin'
exit1
fi /bin/mknod /dev/gpio_int c 243 0
/sbin/insmod /root/code/ZED/gpio-int-
/bin/pm 0x020c406c 0x00300c03 latency/zed_gpio_int.ko
/bin/pm 0x02008008 0x01f01019
/bin/pm 0x0200800c 0x00000100
/bin/pm 0x02008020 0x80000000
/bin/dm 0x02008000 0x10
§ High-level language
– memory model
– independent of processor architecture (partially true)
Assembly .s Assembler
source module
.o
.c .o
C source module C Compiler
int main() {
uint32_t val;
val = 0x22;
printf("val = 0x%08X\n", val);
printf("arm_ror(val, 1) = 0x%08X\n", arm_ror(val, 1));
printf("arm_ror(val, 2) = 0x%08X\n", arm_ror(val, 2));
printf("arm_ror_imm(val, 3) = 0x%08X\n", arm_ror_imm(val, 3));
printf("arm_ror_imm(val, 4) = 0x%08X\n", arm_ror_imm(val, 4));
return 0;
}
EE382N-4 Class Notes 23
Inline Assembly Language in gcc
Example 1
asm("foo %1,%2,%0" : "=r" (output) : "r" (input1), "r" (input2));
Example 2
asm("foo %1,%2,%0" : "=r" (ptr->vtable[3](a,b,c)->foo.bar[baz]) : : "r"
(gcc(is) + really(damn->cool)), "r" (42));
§ Assembler:
ADR r4,a ; get address for a
LDR r0,[r4] ; get value of a
ADR r4,b ; get address for b, reusing r4
LDR r1,[r4] ; get value of b
ADD r3,r0,r1 ; compute a+b
ADR r4,c ; get address for c
LDR r2,[r4] ; get value of c
SUB r3,r3,r2 ; complete computation of x
ADR r4,x ; get address for x
STR r3,[r4] ; store value of x
§ Assembler:
ADR r4,b ; get address for b
LDR r0,[r4] ; get value of b
ADR r4,c ; get address for c
LDR r1,[r4] ; get value of c
ADD r2,r0,r1 ; compute partial result
ADR r4,a ; get address for a
LDR r0,[r4] ; get value of a
MUL r2,r2,r0 ; compute final value for y
ADR r4,y ; get address for y
STR r2,[r4] ; store y
§ Assembler:
ADR r4,a ; get address for a
LDR r0,[r4] ; get value of a
MOV r0,r0,LSL 2 ; perform shift
ADR r4,b ; get address for b
LDR r1,[r4] ; get value of b
AND r1,r1,#15 ; perform AND
ORR r1,r0,r1 ; perform OR
ADR r4,z ; get address for z
STR r1,[r4] ; store value for z
§ Assembler:
; compute and test condition
ADR r4,a ; get address for a
LDR r0,[r4] ; get value of a
ADR r4,b ; get address for b
LDR r1,[r4] ; get value for b
CMP r0,r1 ; compare a < b
BLE fblock ; if a ><= b, branch to false block
after ...
§ Assembler:
ADR r2,test ; get address for test
LDR r0,[r2] ; load value for test
ADR r1,switchtab ; load address for switch table
LDR r1,[r1,r0,LSL #2] ; index switch table
switchtab:
DCD case0
DCD case1
...
§ Assembler
; loop initiation code
MOV r0,#0 ; use r0 for I
MOV r8,#0 ; use separate index for arrays
ADR r2,N ; get address for N
LDR r1,[r2] ; get value of N
MOV r2,#0 ; use r2 for f
.file “test.s”
.text
.global main
.type main, %function
main:
MOV R0, #100
ADD R0, R0, R0
SWI #11
.end
.file “test.s”
.text
export variable .global main
.type main, %function
main:
set the type of a
MOV R0, #100
symbol to be
ADD R0, R0, R0 either a function
SWI #11 or an object
signals the end
.end
of the program
call interrupt to
end the program
main:
LDR R1, value // load value
STR R1, result
SWI #11
§ Flow of control:
– Sequence.
– Decision: if-then-else, switch
– Iteration: repeat-until, do-while, for
BNE else
T
B endif
else:
E
endif:
BNE else
T CMP R0, R1
B endif BLE else
else: MOV R2, R0
B endif
E
else: MOV R2, R1
endif:
endif:
// find maximum
if (R0>R1) then R2:=R0
Two other options: else R2:=R1
CMP R0, R1
MOVGT R2, R0
MOVLE R2, R1 CMP R0, R1
BLE else
MOV R2, R0 MOV R2, R0
CMP R0, R1 B endif
MOVLE R2, R1 else: MOV R2, R1
endif:
if (R1==0) zero
else if (R1>0) plus
else if (R1<0) neg
TEQ R1, #0
BMI neg
BEQ zero
BPL plus
neg: ...
B exit
Zero: ...
B exit
...
S3
do { S } while ( C )
loop:
S
Test C
BEQ loop
endw:
while ( C ) { S }
loop:
B test
Test C loop:
BNE endw
S
S test:
Test C
BEQ loop
B loop endw:
endw:
while ( C ) { S }
C
B test BNE endw
loop: loop:
S S
test: test:
C C
BEQ loop BEQ loop
endw: endw:
loop:
I
C
BNE endfor
A
B loop
endfor:
loop:
I MOV R0, #0
ADR R2, A
C MOV R1, #0
BNE endfor loop: CMP R1, #10
BGE endfor
S STR R0,[R2,R1,LSL #2]
ADD R1, R1, #1
A B loop
endfor:
B loop
endfor:
main:
... func:
BL func ...
... ...
.end .end
//restore R5
.end .end
§ How to pass arguments? By registers? By stack? By memory?
In what order?
§ Who should save R5? Caller? Callee?
BL leaf1 main
...
leaf1: ...
... leaf leaf
leaf
BL leaf2
...
application
load address code
application image
top of application static data
heap
top of heap
stack
top of memory
2/8/18 EE382N-4 Class Notes 66
Accessing operands
high
stack
2/8/18 EE382N-4 Class Notes 68
Procedure
func: STMFD SP!, {R4-R6, LR} low
SUB SP, SP, #0xC
...
STR R0, [SP, #0] // v1=a1
v1
v2
...
v3
ADD SP, SP, #0xC
R4
LDMFD SP!, {R4-R6, PC}
R5
R6
LR
high
stack
2/8/18 EE382N-4 Class Notes 69
Block copy example
void bcopy(char *to, char *from, int n)
{
while (n--)
*to++ = *from++;
}
high
stack
2/8/18 EE382N-4 Class Notes 78
Search
mov r3, #4
low
str r3, [sp, #0] // s=4
mov r3, #0
str r3, [sp, #4] // i=0
s
loop: ldr r0, [sp, #4] // r0=i i
cmp r0, #10 // i<10? a[0]
bge end
ldr r1, [sp, #0] // r1=s :
mov r2, #4
mul r3, r0, r2
add r3, r3, #8 a[9]
ldr r4, [sp, r3] // r4=a[i]
high
stack
2/8/18 EE382N-4 Class Notes 79
Search
teq r1, r4 // test if s==a[i] low
beq end
high
stack
2/8/18 EE382N-4 Class Notes 80
Optimization
§ Remove unnecessary load/store
§ Remove loop invariant
§ Use addressing mode
§ Use conditional execution
high
stack
2/8/18 EE382N-4 Class Notes 82
Search (remove load/store)
teq r1, r4 // test if s==a[i]
beq end low
high
stack
2/8/18 EE382N-4 Class Notes 83
Search (loop invariant/addressing mode)
mov r1,
r3, #4
str r3, [sp, #0] // s=4 low
mov r0,
r3, #0
str r3, [sp, #4] // i=0
s
loop: ldr r0, [sp, #4] // r0=i
mov r2, sp, #8 i
cmp r0, #10 // i<10? a[0]
bge end
ldr r1, [sp, #0] // r1=s :
mov r2, #4
mul r3, r0, r2
add r3, r3, #8
a[9]
ldr r4, [sp, r3] // r4=a[i]
ldr r4, [r2, r0, LSL #2]
high
stack
2/8/18 EE382N-4 Class Notes 84
Search (conditional execution)
teq r1, r4 // test if s==a[i]
beq end low
add
addeq r0, r0, #1 // i++
s
str r0, [sp, #4] // update i
i
bbeq loop
a[0]
:
end: str r0, [sp, #4]
cmp r0, #10 a[9]
movge r0, #-1
add sp, sp, #48
mov pc, lr
high
stack
2/8/18 EE382N-4 Class Notes 85
Optimization Summary
§ Remove unnecessary load/store
§ Remove loop invariant
§ Use addressing mode
§ Use conditional execution