Fast AVR Code for Dividing 16-Bit Register by 8-Bit Register

Kerim · Feb 14, 2023

In general, using more memory may let the code run faster. This applies in this division.

One may notice that the trick that lets the code of the previous thread “AVR Code for Dividing 16 Bits by 8-Bit Constant” be relatively fast is that the long division of reg_Kd [=(2*256*256/reg_N+1)/2] is made already by the compiler. This could be repeated for all possible values of reg_N [2 to 255] and the results will be listed on a table [it takes 256 words and its first two entries could be any numbers).

Code:

;======================================================
; Division of 16-bit register {A} by 8-bit register {N}
;======================================================

; {R}={A}/{N} = r17:r16 / {N} = r17:r16 * {Kd} /256 /256 then rounding

; {A} dividend in r17:r16 [0 to 65535]
; {N} constant divisor in r22 [N=0 to 255], see Note_1, Note_2 below
; {R} result in r21:r20 , {A}/{N} rounded
; {Kd} = round(256*256/{N},0), the division constant [from KdTable:]
; {Kr} = INT[ (1-{N})/2 ]    , the rounding constant
; Used registers: ZH,ZL,r19,r18,r14,r13,r1,r0
; 49, 59 or 60 cycles = 42c|52c|53c code + 4c RET + 3c RCALL

D16var8:
    TST   r22                   ;1abc
    BREQ  DOoverF               ;1abc

    CPI   r22, 1                ;1abc
    BREQ  DO_same               ;1abc

    LDI   ZH, high(KdTable)     ;1abc, address in words, 0x??00
    MOV   ZL, r22               ;1abc
; *2 [from word address to byte address]
    LSL   ZL                    ;1abc
    ROL   ZH                    ;1abc
    LPM   r13, Z+               ;3abc
    LPM   r14, Z                ;3abc, 14abc
; r14:r13 = reg_Kd

; multiplicand in r17:r16 = {A}
; multiplier   in r14:r13 = {Kd}
; mul. result  in r21:r20:r19:r18
; valid result in r21:r20
    MUL   r17, r14              ;2abc
    MOVW  r21:r20, r1:r0        ;1abc
    MUL   r16, r13              ;2abc
    MOVW  r19:r18, r1:r0        ;1abc
    MUL   r17, r13              ;2abc
    CLR   r13                   ;1abc
    ADD   r19, r0               ;1abc
    ADC   r20, r1               ;1abc
    ADC   r21, r13              ;1abc
    MUL   r14, r16              ;2abc
    ADD   r19, r0               ;1abc
    ADC   r20, r1               ;1abc
    ADC   r21, r13              ;1abc +17abc= 31abc
; {B} = r21:r20 = r17:r16 * r14:r13 /256/256 = {A}*{Kd}/256/256
; {R} = {B} or {B}+1

; for rounding
    MOV   r13, r22              ;1abc
; r13 = {N}

    MUL   r20, r13              ;2abc
    MOVW  r19:r18, r1:r0        ;1abc
    MUL   r21, r13              ;2abc
    ADD   r19, r0               ;1abc, +7abc= 38abc
; {C} = r19:r18 = {B}*{N} = r21:r20 * r13

; the following conditions were deduced empirically
; if( Carry_1=0, {R}={B}, if({D2}>0, {R}={B}, {R}={B}+1 ) )

    SUB   r18, r16              ;1abc
    SBC   r19, r17              ;1abc
; {D1} = r19:r18 = {C} - {A} = r19:r18 - r17:r16

    BRCC  DIV_ret               ;2a|1bc +4a=[42a]
; if Carry_1=0, {R}={B}

    DEC   r13                   ;1bc
    LSR   r13                   ;1bc
; r13 = - reg_Kr

    ADD   r18,  r13             ;1bc
    CLR   r13                   ;1bc
    ADC   r19,  r13             ;1bc
; {D2} = r19:r18 = {D1} + {-Kr} = r19:r18 + {-Kr}

    BRPL  DIV_ret               ;2b|1c, +10b=[52b]
; if {D2} positive, {R}={B}

    SUBI  r20,  low(-1)         ;1c
    SBCI  r21, high(-1)         ;1c, +11c=[53c]
; {R}={B}+1

DIV_ret:
    RET                         ;4
; {R} = r21:r20 = {B} or {B}+1 [ {A}/{N} rounded ]

DOoverF:
    SER   r20
    SER   r21
    RET
; {R} = r21:r20 = 0xFFFF

DO_same:
    MOV   r20, r16
    MOV   r21, r17
    RET

;====================================

    .org 0x??00

KdTable:
    .dw   0,0,32768,21845,16384,13107,10923,9362
    .dw   8192,7282,6554,5958,5461,5041,4681,4369
    .dw   4096,3855,3641,3449,3277,3121,2979,2849
    .dw   2731,2621,2521,2427,2341,2260,2185,2114
    .dw   2048,1986,1928,1872,1820,1771,1725,1680
    .dw   1638,1598,1560,1524,1489,1456,1425,1394
    .dw   1365,1337,1311,1285,1260,1237,1214,1192
    .dw   1170,1150,1130,1111,1092,1074,1057,1040
    .dw   1024,1008,993,978,964,950,936,923
    .dw   910,898,886,874,862,851,840,830
    .dw   819,809,799,790,780,771,762,753
    .dw   745,736,728,720,712,705,697,690
    .dw   683,676,669,662,655,649,643,636
    .dw   630,624,618,612,607,601,596,590
    .dw   585,580,575,570,565,560,555,551
    .dw   546,542,537,533,529,524,520,516
    .dw   512,508,504,500,496,493,489,485
    .dw   482,478,475,471,468,465,462,458
    .dw   455,452,449,446,443,440,437,434
    .dw   431,428,426,423,420,417,415,412
    .dw   410,407,405,402,400,397,395,392
    .dw   390,388,386,383,381,379,377,374
    .dw   372,370,368,366,364,362,360,358
    .dw   356,354,352,350,349,347,345,343
    .dw   341,340,338,336,334,333,331,329
    .dw   328,326,324,323,321,320,318,317
    .dw   315,314,312,311,309,308,306,305
    .dw   303,302,301,299,298,297,295,294
    .dw   293,291,290,289,287,286,285,284
    .dw   282,281,280,279,278,277,275,274
    .dw   273,272,271,270,269,267,266,265
    .dw   264,263,262,261,260,259,258,257

/*
Note_1:
If N=0, the returned result [r21:r20] is 65535 [the highest possible number for it]. But this could be replaced by setting an overflow flag for example.

Note_2:
In case reg_N [r22] won’t be 0 or 1 for sure, testing r22 at the beginning of the code could be removed, also the code of DOoverF: and DO_same:
*/

Welcome to our site!

Electro Tech is an online community (with over 170,000 members) who enjoy talking about and building electronic circuits, projects and gadgets. To participate you need to register. Registration is free. Click here to register now.

Fast AVR Code for Dividing 16-Bit Register by 8-Bit Register

Kerim

Member

Similar threads

Latest threads

New Articles From Microcontroller Tips