You are on page 1of 7

Speed

Not affected by compiler optimization

r0 r18-r25 r25-r27 (X) r30-r31 (Z) r1 (must be cleared before returning)

Arguments allocated left to right (r25 to r18) Even register aligned

Argument 8-bit 16-bit 32-bit 64-bit

Registers r24 r25:r24 r25:r24:r23:r22 r25:r24:r23:r22:r21:r20:r19:r18

Return size Data length 8-bit 8-bit 16-bit 32-bit 64-bit

Registers registers r24 r24 r25:r24 r25:r24 r25:r24:r23:r22 r25:r24:r23:r22 r25:r24:r23:r22:r21:r20:r19:r18 r25-r18

uint32_t subit(uint32_t ul, uint8_t b) { return(ul-b); }

#include <avr/io.h> .text .global subit subit: sub r22, sbc r23, sbc r24, sbc r25, ret .end

r20 r1 r1 r1

; ; ; ;

subtract b (r20) from ul (r25-r22) .. NOTE: gcc makes sure r1 is always 0 .. ..

#include <avr/io.h> ; defines the # of cpu cycles of overhead ; (includes the ldi r16,byte0; ldi r17,byte1; ldi r18, byte2, ; ldi r19, byte3, and the call _delay_cycles) OVERHEAD = 24 ; some register aliases cycles0 = 22 cycles1 = 23 cycles2 = 24 cycles3 = 25 temp = 19 .text .global delay_cycles delay_cycles: ; ; subtract the overhead subi cycles0,OVERHEAD sbc cycles1,r1 sbc cycles2,r1 sbc cycles3,r1 brcs dcx ; ; delay the lsb mov r30,cycles0 com r30 andi r30,7 clr r31 subi r30,lo8 (-(gs(jtable))) sbci r31,hi8 (-(gs(jtable))) ijmp jtable: nop nop nop nop nop nop nop ; ; delay the remaining delay loop: subi cycles0,8 sbc cycles1,r1 sbc cycles2,r1 sbc cycles3,r1 brcs dcx nop rjmp loop dcx: ret .end

; ; ; ; ;

subtract the overhead .. .. .. return if reqd delay too short

void delay_cycles(uint32_t cpucycles);

; ; ; ; ; ; ;

Z = jtable offset to delay 0-7 cycles .. .. .. add the table offset .. vector into table for partial delay

; ; ; ; ; ; ;

decrement the count (8 cycles per loop) .. .. .. exit if done .. add delay to make 8 cycles per loop ..

You might also like