1
A non-answer, tinyARM assembler (web doc) instead of C++ or C. I modified a pretty generic multiply-by-squares-lookup for speed (<50 cycles excluding call&return overhead) at the cost of only fitting into AVRs with no less than 1KByte of RAM, using 512 aligned bytes for a table of the lower half of squares. At 20 MHz, that would nicely meet the 2 max 3 usec
time limit still not showing up in the question proper - but Sergio Formiggini wanted 16 MHz. As of 2015/04, there is just one ATtiny from Atmel with that much RAM, and that is specified up to 8 MHz … (Rolling your "own" (e.g., from OpenCores) your FPGA probably has a bunch of fast multipliers (18×18 bits seems popular), if not processor cores.)
For a stab at fast shift-and-add, have a look at shift and add, factor shifting left, unrolled 16×16→16 and/or improve on it (wiki post). (You might well create that community wiki answer begged for in the question.)
non-answer,tinyARM汇编(web文档)而不是c++或C,我修改一个相当通用multiply-by-squares-lookup速度(<50周期调用和返回开销除外)的成本只有适合avr的不少于1 kb的内存,使用512字节对齐的表广场的下半部分。在20mhz时,这将很好地满足usec时间限制2 max 3仍然没有出现在问题本身-但是塞尔吉奥·福米吉尼想要16 MHz。的2015/04,只有一个ATtiny从爱特梅尔公司那么多的内存,这是指定8 MHz…(你“拥有”(例如,从OpenCores)你的FPGA可能有一堆快乘数(18×18位似乎受欢迎),如果不是处理器核心)。尝试快速shift-and-add,看看改变和添加,因素转移离开,展开16×16→16和/或提高(wiki文章)。(在这个问题中,你很可能会创造出维基百科所要求的社区答案。)
.def a0 = r16 ; factor low byte
.def a1 = r17
#warning two warnings about preceding definitions of
#warning r16 and r17 are due and may as well be ignored
.def a = r16 ; 8-bit factor
.def b = r17 ; 8-bit factor ; or r18, rather?
.def b0 = r18 ; factor low byte
.def b1 = r19
.def p0 = r20 ; product low byte
.def p1 = r21
; "squares table" SqTab shall be two 512 Byte tables of
; squares of 9-bit natural numbers, divided by 4
; Idea: exploit p = a * b = Squares[a+b] - Squares[a-b]
init:
ldi r16, 0x73
ldi r17, 0xab
ldi r18, 23
ldi r19, 1
ldi r20, HIGH(SRAM_SIZE)
cpi r20, 2
brsh fillSqTable ; ATtiny 1634?
rjmp mpy16T16
fillSqTable:
ldi r20, SqTabH
subi r20, -2
ldi zh, SqTabH
clr zl
; generate sqares by adding up odd numbers starting at 1 += -1
ldi r22, 1
clr r23
ser r26
ser r27
fillLoop:
add r22, r26
adc r23, r27
adiw r26, 2
mov r21, r23
lsr r21 ; get bits 9:2
mov r21, r22
ror r21
lsr r21
bst r23, 1
bld r21, 7
st z+, r21
cp zh, r20
brne fillLoop
rjmp mpy16F16
; assembly lines are marked up with cycle count
; and (latest) start cycle in block.
; If first line in code block, the (latest) block start cycle
; follows; else if last line, the (max) block cycle total
;**************************************************************
;*
;* "mpy16F16" - 16x16->16 Bit Unsigned Multiplication
;* using table lookup
;* Sergio Formiggini special edition
;* Multiplies two 16-bit register values a1:a0 and b1:b0.
;* The result is placed in p1:p0.
;*
;* Number of flash words: 318 + return =
;* (40 + 256(flash table) + 22(RAM init))
;* Number of cycles : 49 + return
;* Low registers used : None
;* High registers used : 7+2 (a1:a0, b1:b0, p1:p0, sq;
;* + Z(r31:r30))
;* RAM bytes used : 512 (squares table)
;*
;**************************************************************
mpy16F16:
ldi ZH, SqTabH>>1;1 0 0 squares table>>1
mov ZL, a0 ; 1 1
add ZL, b0 ; 1 2 a0+b0
rol ZH ; 1 3 9 bit offset
ld p0, Z ; 2 4 a0+b0l 1
lpm p1, Z ; 3 6 9 a0+b0h 2
ldi ZH, SqTabH ; 1 0 9 squares table
mov ZL, a1 ; 1 0 10
sub ZL, b0 ; 1 1 a1-b0
brcc noNegF10 ; 1 2
neg ZL ; 1 3
noNegF10:
ld sq, Z ; 2 4 a1-b0l 3
sub p1, sq ; 1 6 7
mov ZL, a0 ; 1 0 17
sub ZL, b1 ; 1 1 a0-b1
brcc noNegF01 ; 1 2
neg ZL ; 1 3
noNegF01:
ld sq, Z ; 2 4 a0-b1l 4
sub p1, sq ; 1 6 7
mov ZL, a0 ; 1 0 24
sub ZL, b0 ; 1 1 a0-b0
brcc noNegF00 ; 1 2
neg ZL ; 1 3
noNegF00:
ld sq, Z ; 2 4 a0-b0l 5
sub p0, sq ; 1 6
lpm sq, Z ; 3 7 a0-b0h 6*
sbc p1, sq ; 1 10 11
ldi ZH, SqTabH>>1;1 0 35
mov ZL, a1 ; 1 1
add ZL, b0 ; 1 2 a1+b0
rol ZH ; 1 3
ld sq, Z ; 2 4 a1+b0l 7
add p1, sq ; 1 6 7
ldi ZH, SqTabH>>1;1 0 42
mov ZL, a0 ; 1 1
add ZL, b1 ; 1 2 a0+b1
rol ZH ; 1 3
ld sq, Z ; 2 4 a0+b1l 8
add p1, sq ; 1 6 7
ret ; 49
.CSEG
.org 256; words?!
SqTableH:
.db 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
.db 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
.db 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
.db 0, 0, 1, 1, 1, 1, 1, 1, 1, 1
.db 1, 1, 1, 1, 1, 1, 2, 2, 2, 2
.db 2, 2, 2, 2, 2, 2, 3, 3, 3, 3
.db 3, 3, 3, 3, 4, 4, 4, 4, 4, 4
.db 4, 4, 5, 5, 5, 5, 5, 5, 5, 6
.db 6, 6, 6, 6, 6, 7, 7, 7, 7, 7
.db 7, 8, 8, 8, 8, 8, 9, 9, 9, 9
.db 9, 9, 10, 10, 10, 10, 10, 11, 11, 11
.db 11, 12, 12, 12, 12, 12, 13, 13, 13, 13
.db 14, 14, 14, 14, 15, 15, 15, 15, 16, 16
.db 16, 16, 17, 17, 17, 17, 18, 18, 18, 18
.db 19, 19, 19, 19, 20, 20, 20, 21, 21, 21
.db 21, 22, 22, 22, 23, 23, 23, 24, 24, 24
.db 25, 25, 25, 25, 26, 26, 26, 27, 27, 27
.db 28, 28, 28, 29, 29, 29, 30, 30, 30, 31
.db 31, 31, 32, 32, 33, 33, 33, 34, 34, 34
.db 35, 35, 36, 36, 36, 37, 37, 37, 38, 38
.db 39, 39, 39, 40, 40, 41, 41, 41, 42, 42
.db 43, 43, 43, 44, 44, 45, 45, 45, 46, 46
.db 47, 47, 48, 48, 49, 49, 49, 50, 50, 51
.db 51, 52, 52, 53, 53, 53, 54, 54, 55, 55
.db 56, 56, 57, 57, 58, 58, 59, 59, 60, 60
.db 61, 61, 62, 62, 63, 63, 64, 64, 65, 65
.db 66, 66, 67, 67, 68, 68, 69, 69, 70, 70
.db 71, 71, 72, 72, 73, 73, 74, 74, 75, 76
.db 76, 77, 77, 78, 78, 79, 79, 80, 81, 81
.db 82, 82, 83, 83, 84, 84, 85, 86, 86, 87
.db 87, 88, 89, 89, 90, 90, 91, 92, 92, 93
.db 93, 94, 95, 95, 96, 96, 97, 98, 98, 99
.db 100, 100, 101, 101, 102, 103, 103, 104, 105, 105
.db 106, 106, 107, 108, 108, 109, 110, 110, 111, 112
.db 112, 113, 114, 114, 115, 116, 116, 117, 118, 118
.db 119, 120, 121, 121, 122, 123, 123, 124, 125, 125
.db 126, 127, 127, 128, 129, 130, 130, 131, 132, 132
.db 133, 134, 135, 135, 136, 137, 138, 138, 139, 140
.db 141, 141, 142, 143, 144, 144, 145, 146, 147, 147
.db 148, 149, 150, 150, 151, 152, 153, 153, 154, 155
.db 156, 157, 157, 158, 159, 160, 160, 161, 162, 163
.db 164, 164, 165, 166, 167, 168, 169, 169, 170, 171
.db 172, 173, 173, 174, 175, 176, 177, 178, 178, 179
.db 180, 181, 182, 183, 183, 184, 185, 186, 187, 188
.db 189, 189, 190, 191, 192, 193, 194, 195, 196, 196
.db 197, 198, 199, 200, 201, 202, 203, 203, 204, 205
.db 206, 207, 208, 209, 210, 211, 212, 212, 213, 214
.db 215, 216, 217, 218, 219, 220, 221, 222, 223, 224
.db 225, 225, 226, 227, 228, 229, 230, 231, 232, 233
.db 234, 235, 236, 237, 238, 239, 240, 241, 242, 243
.db 244, 245, 246, 247, 248, 249, 250, 251, 252, 253
.db 254, 255
; word addresses, again?!
.equ SqTabH = (high(SqTableH) <<1)
.DSEG
RAMTab .BYTE 512