“比avr200b.asm更快”?来自avr200b.asm的mpy8u
- 来自Atmel的AVR系列处理器的MUL
例程并未执行任何mpy16u
指令似乎非常通用,但MUL
看起来很邋for结果字节为16次而不是8. Antonio使用64个周期的最坏情况提出fast 16×16→16 unsigned multiplication,不包括呼叫/返回开销。
我随意建议优化目标最坏情况周期计数,字数(RAM和闪存),寄存器使用和预期周期数按优先级降低的顺序
(减少核心 AVR(“单个数字”-ATtiny,10/20/40),包括时间差异,我建议忽略。)
(注意:不要将此处的任何主张视为理所当然,至少在没有独立肯定的情况下。)
对于没有<?php
$subject="This is a test message";
$message="This is a Body Section now.....! :)";
$to="someaddress@somedomain.com";
// starting outlook
com_load_typelib("outlook.application");
if (!defined("olMailItem")) {define("olMailItem",0);}
$outlook_Obj = new COM("outlook.application") or die("Unable to start Outlook");
//just to check you are connected.
echo "Loaded MS Outlook, version {$outlook_Obj->Version}\n";
$oMsg = $outlook_Obj->CreateItem(olMailItem);
$oMsg->Recipients->Add($to);
$oMsg->Subject=$subject;
$oMsg->Body=$message;
$oMsg->Save();
$oMsg->Send();
?>
的AVR,目前已知的最佳8×8→8 / 16,16×16→16/32和16×8→16/24位乘法程序是什么?
答案 0 :(得分:0)
这是一个正方形查找的镜头,首先是常见的预赛:
;.def ZL = r30
;.def ZH = r31
; register assignment trying to follow "avr200b.asm";
; gcc would use 25 down in stead of 16 up
; (and same registers for parameters and result, requiring
; two or one movw for another four or two bytes & cycles)
.def a0 = r16 ; factor low byte
.def a1 = r17
.def a = r16 ; 8-bit factor
.def b = r17 ; 8-bit factor ; or r18, rather?
.def b0 = r18 ; factor low byte
.def b1 = r19
.def p0 = r20 ; product low byte
.def p1 = r21
.def p2 = r22
.def p3 = r23
.def sq = r25 ; tmp, might have used r0
; & parameterless LPM
.def s0 = r0
.def s1 = r24
; "squares table" shall be a 1 KByte table of squares of
; 9-bit natural numbers, divided by 4;
; aligned on a 1K border in program memory,
; organised as 512 lower bytes followed by the high bytes.
; the idea is to exploit
; p = a * b = Squares[a+b] - Squares[a-b]
; assembly lines are marked up with cycle count and
; (latest) start cycle in block.
; If first line in code block, the (latest)
; block start cycle follows;
; else if last line, the (max) block cycle total
8×8→16位:
;**********************************************************
;*
;* "mpy8T" - 8x8->16 Bit Unsigned Multiplication
;* using table lookup
;* (mpy8u: 34 words/cycles (avr200b.asm))
;* Multiplies two 8-bit register values a and b.
;* The result is placed in p1:p0.
;*
;* Number of words : 17 + 512(table)=553 + return
;* Number of cycles : 25 + return (table coming preset ...)
;* Low registers used : None
;* High registers used : 5+2 (a, b, p1:p0, sq;
;* + Z(r31:r30))
;*
;*********************************************************
mpy8T:
; p = a * b = Squares[a+b] - Squares[a-b]
ldi ZH, 2 ; 1 0 0 squares table / 2
mov ZL, a ; 1 1
add ZL, b ; 1 2 a+b
rol ZH ; 1 3 9 bit offset
lpm p0, Z ; 3 4 a+bl 1
sbr ZH, 1 ; 1 7
lpm p1, Z ; 1 8 11 a+bh 2*
ldi ZH, 4 ; 1 0 11 squares table
mov ZL, a ; 1 0 12
sub ZL, b ; 1 1 a-b
brcc pos ; 1 2
neg ZL ; 1 3
pos:
lpm sq, Z ; 3 4 a-bl 3
sub p0, sq ; 1 7
sbr ZH, 1 ; 1 8 (ldi ZH, 6)
lpm sq, Z ; 3 9 a-bh 4*
sbc p1, sq ; 1 12 13
ret ; 3 25
16×16→16/32位:
;**********************************************************
;*
;* "mpy16T" - 16x16->32 Bit Unsigned Multiplication
;* using table lookup
;*
;* Multiplies two 16-bit register values a1:a0 and b1:b0.
;* The result is placed in p3:p2:p1:p0.
;*
;* Number of words : 74 + 512(table) = 553
;* + return (+ push/pop)
;* Number of cycles : 106 + return (+ push/pop)
;* (table coming preset ...)
;* (avr200b.asm mpy16u improved: 100, as-is: 116)
;* Low registers used : 2 (s0, zero) (could use r26&r27)
;* High registers used : 10+2 (a1:a0, b1:b0, p3:p2:p1:p0,
;* sq, s1; + Z(r31:r30))
;*
;*********************************************************
mpy16T:
.def zero = r2 ; (gcc's choice of r1 is funny given mul)
; push zero
clr zero ; 1 0 0
; initialise p1:p0
ldi ZH, 2 ; 1 0 1 squares table / 2
mov ZL, a0 ; 1 1
add ZL, b0 ; 1 2 a0+b0
rol ZH ; 1 3 9 bit offset
lpm p0, Z ; 3 4 a0+b0l 1
sbr ZH, 1 ; 1 7 squares table 2nd half
lpm p1, Z ; 3 8 11 a0+b0h 2
; initialise p3:p2
ldi ZH, 2 ; 1 0 12
mov ZL, a1 ; 1 1
add ZL, b1 ; 1 2 a1+b1
rol ZH ; 1 3
lpm p2, Z ; 3 4 a1+b1l 3
sbr ZH, 1 ; 1 7
lpm p3, Z ; 3 8 11 a1+b1h 4
; all differences are 8 bit abs: save index carry handling
ldi ZH, 6 ; 1 0 23 squares table 2nd half
; do highest square first for carry handling
mov ZL, a1 ; 1 0 24
sub ZL, b1 ; 1 1 a1-b1
brcc pos11 ; 1 2
neg ZL ; 1 3
pos11:
lpm s1, Z ; 3 4 a1-b1h 5
ldi ZH, 4 ; 1 7 squares table 1st half
lpm s0, Z ; 3 8 11 a1-b1l 6
mov ZL, a0 ; 1 0 35
sub ZL, b0 ; 1 1 a0-b0
brcc pos00 ; 1 2
neg ZL ; 1 3
pos00:
lpm sq, Z ; 3 4 a0-b0l 7
sub p0, sq ; 1 7
ldi ZH, 6 ; 1 8 squares table 2nd half
lpm sq, Z ; 3 9 a0-b0h 8
sbc p1, sq ; 1 12
sbc p2, s0 ; 1 13
sbc p3, s1 ; 1 14 15
mov ZL, a1 ; 1 0 50
sub ZL, b0 ; 1 1 a1-b0
brcc pos10 ; 1 2
neg ZL ; 1 3
pos10:
lpm s1, Z ; 3 4 a1-b0h 9
ldi ZH, 4 ; 1 7 squares table 1st half
lpm sq, Z ; 3 8 a1-b0h 10
sub p1, sq ; 1 11
sbc p2, s1 ; 1 12
sbc p3, zero ; 1 13 14
mov ZL, a0 ; 1 0 64
sub ZL, b1 ; 1 1 a0-b1
brcc pos01 ; 1 2
neg ZL ; 1 3
pos01:
lpm sq, Z ; 3 4 a0-b1l 11
sub p1, sq ; 1 7
ldi ZH, 6 ; 1 8 squares table 2nd half
lpm sq, Z ; 3 9 a0-b1h 12
sbc p2, sq ; 1 12
sbc p3, zero ; 1 13 14
ldi ZH, 2 ; 1 0 78
mov ZL, a1 ; 1 1
add ZL, b0 ; 1 2 a1+b0
rol ZH ; 1 3
lpm sq, Z ; 3 4 a1+b0l 13
add p1, sq ; 1 7
sbr ZH, 1 ; 1 8 squares table 2nd half
lpm sq, Z ; 3 9 a1+b0h 14
adc p2, sq ; 1 12
adc p3, zero ; 1 13 14
ldi ZH, 2 ; 1 0 92
mov ZL, a0 ; 1 1
add ZL, b1 ; 1 2 a0+b1
rol ZH ; 1 3
lpm sq, Z ; 3 4 a0+b1l 15
add p1, sq ; 1 7
sbr ZH, 1 ; 1 8 squares table 2nd half
lpm sq, Z ; 3 9 a0+b1h 16
adc p2, sq ; 1 12
adc p3, zero ; 1 13 14
; pop zero
ret ; 106
16×16→16位:
;*********************************************************
;*
;* "mpy16T16" - 16x16->16 Bit Unsigned Multiplication
;* using table lookup
;*
;* Multiplies two 16-bit register values a1:a0 and b1:b0.
;* The result is placed in p1:p0.
;*
;* Number of words : 41 + 512(table)=553 + return
;* Number of cycles :<=57 + return
;* Low registers used : None
;* High registers used : 7+2 (a1:a0, b1:b0, p1:p0, sq;
;* + Z(r31:r30))
;*
;*********************************************************
mpy16T16:
ldi ZH, 2 ; 1 0 0 squares table / 2
mov ZL, a0 ; 1 1
add ZL, b0 ; 1 2 a0+b0
rol ZH ; 1 3 9 bit offset
lpm p0, Z ; 3 4 a0+b0l 1
sbr ZH, 1 ; 1 7
lpm p1, Z ; 1 8 11 a0+b0h 2*
ldi ZH, 4 ; 1 0 11 squares table
mov ZL, a1 ; 1 0 12
sub ZL, b0 ; 1 1 a1-b0
brcc noNeg10 ; 1 2
neg ZL ; 1 3
noNeg10:
lpm sq, Z ; 3 4 a1-b0l 3
sub p1, sq ; 1 7 8
mov ZL, a0 ; 1 0 20
sub ZL, b1 ; 1 1 a0-b1
brcc noNeg01 ; 1 2
neg ZL ; 1 3
noNeg01:
lpm sq, Z ; 3 4 a0-b1l 4
sub p1, sq ; 1 7 8
mov ZL, a0 ; 1 0 28
sub ZL, b0 ; 1 1 a0-b0
brcc noNeg00 ; 1 2
neg ZL ; 1 3
noNeg00:
lpm sq, Z ; 3 4 a0-b0l 5
sub p0, sq ; 1 7
sbr ZH, 1 ; 1 8 (ldi ZH, 6)
lpm sq, Z ; 3 9 a0-b0h 6*
sbc p1, sq ; 1 12 13
ldi ZH, 2 ; 1 0 41
mov ZL, a1 ; 1 1
add ZL, b0 ; 1 2 a1+b0
rol ZH ; 1 3
lpm sq, Z ; 3 4 a1+b0l 7
add p1, sq ; 1 7 8
ldi ZH, 2 ; 1 0 49
mov ZL, a0 ; 1 1
add ZL, b1 ; 1 2 a0+b1
rol ZH ; 1 3
lpm sq, Z ; 3 4 a0+b1l 8
add p1, sq ; 1 7 8
ret ; 57
答案 1 :(得分:0)
无符号8×8→8,左移位因子和加,展开。
; factors a0, b0 and product p0
.MACRO step8
sbrc b0, @0
add p0, a0
add a0, a0 ; +3
.EndM
; 8x8->8 bit unsigned multiply, factor shift, unrolled.
; 24 cycles & words + return (caveat emptor)
mpy8U8:
clr p0 ; 1
step8 0 ; 4
step8 1 ; 7
step8 2 ; 10
step8 3 ; 13
step8 4 ; 16
step8 5 ; 19
step8 6 ; 22
sbrc b0, 7 ; 23/24
add p0, a0 ; 24
ret
# endif
无符号16×16→16,左移位因子和加,展开。
现在显示宏;三种口味:简单和 - 好吧,不是。
; mpy16A16: 16x16->16 bit unsigned multiply, shifting
; one factor bit-by-bit, testing same bits in
; different bytes of the other; idea due to Antonio
; (http://stackoverflow.com/users/2436175/antonio)
; in <http://stackoverflow.com/a/29812254/3789665>
; <= 62/61/60 cycles, 62/87/155 words + return (caveat emptor)
; (57.5, 56.75 and 55.75 expected _for a uniform distribution_)
; "middle" variant assembled with neither Plain nor Need4Speed
; defined, shown without separate "timing comments"
; ("without speed345", just add one to the Need4Speed timings)
; some macros using factors a1:a0, b1:b0 and product p1:p0
.MACRO addA ; adds (weighted) factor "a" into product
add p0, a0 ; +1
adc p1, a1 ; +2
.EndM
.MACRO doubleA ; adds (shifts/weights) factor "a"
add a0, a0 ; +1
adc a1, a1 ; +2
.EndM
.MACRO doHighB ; "does" bit in b1, bit number as a parameter
sbrc b1, @0 ; 1
add p1, a0 ; 2
.EndM
; "do" 2 bits, bit numbers in b1 and b0 as parameters
.MACRO stepS
bst b0, @1 ; +1
brtc noadd ; +2/3
addA ; +4
noadd: ; gets decorated; almost as neat as "numeric labels"
doHighB @0 ; +6
doubleA ; +8
.EndM
.MACRO step16; "do" 2 bits, bit# in b1 and b0 as a parameter
stepS @0, @0
.EndM
; empty if no Need4Speed; speed3do45, really
.MACRO speed345
#if Need4Speed
brhc noadd ; 1/2
; kkbb1 (b starts with two Known bit, bit 3 just checked)
addA ; 3
noadd: ; 2/3
doHighB 3 ; 5
doubleA ; 7
stepS 4 ; 15
stepS 5 ; 23
#endif
.EndM
# if !Plain
; showing up here due to limited branch offset
no67: ; 29
; 00
speed345 ; 23
doHighB 6 ; 2 54
sbrs b1, 7 ; 1/2 doHighB 7 with early out
ret ; 55 last to start, first to finish
add a0, a0 ; 3
add p1, a0 ; 4 58
ret
no7: ; 27
; 0
brpl no67 ; 1/2 29
; 01
speed345 ; 23 51
addA ; 2
doHighB 6 ; 4 55
sbrs b1, 7 ; 1/2 doHighB 7 with early out
ret ; 56
add a0, a0 ; 3
add p1, a0 ; 4 59
ret
# endif
在单独的代码块中尝试轻松浏览:
mpy16A16:
clr p0 ; 1
clr p1 ; 2 ; p1:p0 = 0
sbrc b0, 0 ; 3
; "fast-laning the trailing zeroes case" isn't as attractive as
; in a shift pp variant: no gain from avoiding "shift pp", here
movw p0, a0 ; 4 ; p1:p0 "+=" a1:a0
doHighB 0 ; 6
add a0, a0 ; 7 ; breq a0zero for early out added 1
; (+you'd have to handle the carry)
adc a1, a1 ; 8 ; breq a1zero for early out added 1
; 8
step16 1 ; +8
step16 2 ; +8 24
# if !Need4Speed
step16 3 ; +8
step16 4 ; +8
step16 5 ; +8 48
# if Plain
step16 6 ; +8
; 56
doHighB 7 ; +2
sbrs b0, 7 ; +3/4
ret ; 59 top for 1bbbbbbb01bbbbbb ;-)
addA ; +6 62 _worst case_!
ret
# endif
# endif
# if !Plain
lsl b0 ; 1 24 make bit 7, 6(&3) "branchable"
; takes one cycle, but each conditional branch takes one less
; than skip-over-rjmp or bst b0,i brtc - netting 1 cycle off
; (at the cost of multiplying code)
brcc no7 ; 2/3 27
; 1
brpl no6but7 ; 3/4 28
; 11
speed345 ; 23 50
addA ; 2
doHighB 6 ; 2 54
doubleA ; 2
addA ; 2
doHighB 7 ; 2 60
ret
no6but7: ; 28
; 10
speed345 ; 23 51
doHighB 6 ; 2 53
doubleA ; 2
addA ; 2
doHighB 7 ; 2 59
ret
#endif
; for an analysis of expected case cycle count, assume half of
; bits b0:5-1 to be zero for 2.5 cycles less. b0:7 off needs
; 1 cycle less with b1:7 set(.5), another 3 if off(.75). b0:6
; off needs 1 cycle less(.5). _for a uniform distribution_, I'd
; _expect 55.75 cycles_. For a distribution with lower numbers
; more likely (upper bits more likely to be 0, remember b0 to
; be the least significant eight), expect this to be
; _finished in less than 55 cycles_.
下一步是什么? (修改过的Booth, no hold barred (目前支持计算goto )。)
答案 2 :(得分:0)
部分产品查询(对于(位)四重奏/半字节对)
/* multiply accumulating partial products looked up in a table,
* "product scanning, decreasing significance",
* non-aligned first (non-aligned partial products need
* to be adjusted "bit-wise").
* Aligned ones are "starred" below for the need to shift one
* of the operand nibbles for combination into a table index.
* 78 cycles, 256 bytes RAM, 83(69) words (caveat emptor)
*(88 cycles, 197 words with table in flash)
*/
.equ L = 0x0f
.equ H = 0xf0
; if indexing is to work by just setting the low byte, this is the
.equ PTable = 1 ; only value possibly working with 512 bytes of RAM
.def a10 = r16
.def a32 = r17
.def b10 = r18
.def b32 = r19
.def p10 = r20
.def p32 = r21
.def p = r25
.def t = r24
.def a0 = r23
.def b0 = r22
rcall nibbleFiller
mpy16n16:
mov b0, b10 ; 1
andi b0, L ; 2
mov a0, a10 ; 3
andi a0, L ; 4
ldi ZH, PTable ; 5
/* (values to the left of the gap shown for completeness, only)
03
13* 02*
23 12 01
33* 22* 11* 00*
32 21 10
31* 20*
30 */
;03
mov ZL, b32 ; 1
andi ZL, H ; 2
or ZL, a0 ; 3
ld p32, Z ; 5 10
;12
mov ZL, b32 ; 1
andi ZL, L ; 2
mov t, a10 ; 3
andi t, H ; 4
or ZL, t ; 5
ld t, Z ; 7
add p32, t ; 8 18
;21
mov ZL, a32 ; 1
andi ZL, L ; 2
mov t, b10 ; 3
andi t, H ; 4
or ZL, t ; 5
ld t, Z ; 7
add p32, t ; 8 26
;30
mov ZL, a32 ; 1
andi ZL, H ; 2
or ZL, b0 ; 3
ld p, Z ; 5 31
;01
mov ZL, b10 ; 1
andi ZL, H ; 2
or ZL, a0 ; 3
ld p10, Z ; 5 36
;10
mov ZL, a10 ; 1
andi ZL, H ; 2
or ZL, b0 ; 3
ld t, Z ; 5
add p10, t ; 6
adc p32, p ; 7 43
; align nibbles
swap p10 ; 1
swap p32 ; 2
mov p, p10 ; 3
; separate nibbles
andi p10, H ; 4
andi p32, H ; 5
andi p, L ; 6 49 postpone nibble addition
;00
swap a0 ; 1
mov ZL, a0 ; 2
or ZL, b0 ; 3
ld t, Z ; 4
add p10, t ; 5
adc p32, p ; 6 55 nibble addition here
;11
mov ZL, a10 ; 1
andi ZL, H ; 2
swap ZL ; 3
mov t, b10 ; 4
andi t, H ; 5
or ZL, t ; 6
ld t, Z ; 8
add p32, t ; 9 64
;02
mov ZL, b32 ; 1
andi ZL, L ; 2
or ZL, a0 ; 3
ld t, Z ; 5
add p32, t ; 6 70
;20
mov ZL, a32 ; 1
swap ZL ; 2
andi ZL, H ; 3
or ZL, b0 ; 4
ld t, Z ; 6
add p32, t ; 8 78 - 256 bytes of RAM, add 10 for flash RAM
ret
nibbleFiller:
ldi ZH, PTable
clr ZL
ser t
outer:
inc t
clr p
inner:
st Z+, p
sbrs ZH, 0
ret
mov a0, ZL
andi a0, L
breq outer
add p, t
rjmp inner
break
答案 3 :(得分:0)
Big-endian修改了Booth-2,展开了。正在进行的工作,ToDo:体面的测试装备,严格盯着关键的路径(58个周期?!),清晰的评论(以及关于那些保留书籍的好主意),移动标签以保存一份通用指令。
16×16→16位(×8位无法获得):
.MACRO doubleP ; adds (shifts/weights) (partial) product
add p0, p0 ; +1
adc p1, p1 ; +2
.EndM
b_010: ; 9 -1
sbrs b1, 7 ;1/2
rjmp nob20 ;2/3
add p1, a0 ; 3
add p1, a0 ; 4
nob20: ; 13
doHighB 6 ; 2 15 :-(( 14 if b1:7 off
b20: ; 15
doubleP ; 2
doHighB 5 ; 4
sbrs b0, 5 ;5/6
rjmp b200 ; 7 22
sbrs b0, 4 ;7/8
rjmp b2010 ; 9 24
b2011: ; 23
addA ; 2
doubleP ; 4 2
doHighB 4 ; 6 29
b41: ; 29
doubleP ; 2
doHighB 3 ; 4
sbrs b0, 3 ;5/6
rjmp b410 ; 7 36
b411:
sbrc b0, 2 ;7/8
rjmp b4111 ; 9 38
b4110: ; 37
doubleP ; 2
subA ; 4
doHighB 2 ; 6 43
b60: ; 43
doubleP ; 2
doHighB 1 ; 4
sbrs b0, 1 ;5/6
rjmp b600 ; 7 50
b601: ; 49
doubleP ; 2
addA ; 4
doHighB 0 ; 6 55 57
ret
mpy16BEB16:
lsl b0 ; 1
brcc b_0 ;2/3 3
b_1: ; 2
brpl b_10 ;1/2 4
b_11: ; 3
sbrc b0, 6 ;1/2
rjmp b_111 ; 3 6
b_110: ; 5
movw p0, a0 ; 1
doHighB 7 ; 3
doubleP ; 5
addA ; 7
doHighB 6 ; 9 14 :-|
;b20: ; 14
doubleP ; 2
doHighB 5 ; 4
sbrs b0, 5 ;5/6
rjmp b200 ; 7 21
sbrs b0, 4 ;7/8
rjmp b2010 ; 9 23
;b2011: ; 22
addA ; 2
doubleP ; 4
doHighB 4 ; 6 28
rjmp b41 ; 8 30
b_0: ; 3
brmi b_01 ;4/5 5
b_00: ;
sbrc b0, 6 ;5/6
rjmp b_001 ; 7 7
b_000:
ldi p0, 0 ; 1 6
ldi p1, 0 ; 2
doHighB 7 ; 4
add p1, p1 ; 5
rjmp nob20 ; 7 13 :-/ -> 15
b_01: ; 5
movw p0, a0 ; 1 useful to both b0:6 branches
sbrs b0, 6 ;2/3
rjmp b_010 ; 4 9 -1
b_011: ; 8
doHighB 7 ; 2 too lazy for more labels
doubleP ; 4
doHighB 6 ; 6 14 :-|
b21: ; 14
doubleP ; 2
doHighB 5 ; 4
sbrs b0, 5 ;5/6
rjmp b210 ; 7 21
b211: ; 20
sbrc b0, 4 ;7/8
rjmp b2111 ; 9 23
b2110: ; 22
doubleP ; 2
doHighB 5 ; 4
subA ; 6 28
rjmp b40 ; 8 30 following two skips - ?
b_10: ; 4
sbrs b0, 6 ;1/2
rjmp b_100 ; 3 7
b_101: ; 6
movw p0, a0 ; 1
doHighB 7 ; 3
doubleP ; 5
addA ; 7
doHighB 6 ; 9 15 :-(
;b21: ; 15
doubleP ; 2
doHighB 5 ; 4
sbrs b0, 5 ;5/6
rjmp b210 ; 7 22
sbrc b0, 4 ;7/8
rjmp b2111 ; 9 24
;b2110: ; 23
doubleP ; 2
doHighB 5 ; 4
subA ; 6 29 two skips, but
;b40: ; 29
doubleP ; 2
doHighB 3 ; 4
sbrs b0, 3 ;5/6
rjmp b400 ; 7 36 21
;b401: ; 35 21
sbrs b0, 2 ;1/2
rjmp b4010 ; 3 38 18
;b4011: ; 37 19
addA ; 2
doubleP ; 4
doHighB 2 ; 6 43
b61:
doubleP ; 2 43 13
doHighB 1 ; 4
sbrs b0, 1 ;5/6
rjmp b610 ; 7 50 6
b611: ; 52 6
doubleP ; 2
subA ; 4
doHighB 0 ; 6 58 :-((
ret
b_001: ; 7
movw p0, a0 ; 1
sbrs b1, 7 ;2/3
rjmp nob001 ;3/4
add p1, a0 ; 4
add p1, a0 ; 5
nob001:
doHighB 6 ; 7 14 :-|
;b21: ; 14
doubleP ; 2
doHighB 5 ; 4
sbrs b0, 5 ;5/6
rjmp b210 ; 7 21
;b211: ; 20
sbrc b0, 4 ;1/2
rjmp b2111 ; 3 23
;b2110: ; 22
doubleP ; 2
doHighB 5 ; 4
subA ; 6 28
;b40: ; 28
doubleP ; 2
doHighB 3 ; 4
sbrs b0, 3 ;5/6
rjmp b400 ; 7 35
;b401: ; 34
sbrs b0, 2 ;1/2
rjmp b4010 ; 3 37 18
;b4011: ; 36
addA ; 2
doubleP ; 4
doHighB 2 ; 6 42
;b61:
doubleP ; 2 42
doHighB 1 ; 4
sbrs b0, 1 ;5/6
rjmp b610 ; 7 49
;b611: ; 48 6
doubleP ; 8
subA ; 10
doHighB 0 ; 12 54
ret
b_100: ; 7
movw p0, a0 ; 1
doHighB 7 ; 3
doubleP ; 5
doHighB 6 ; 7 14 :-|
;b20: ; 14
doubleP ; 2
doHighB 5 ; 4
sbrs b0, 5 ;5/6
rjmp b200 ; 7 21
sbrs b0, 4 ;7/8
rjmp b2010 ; 9 23
;b2011: ; 22
addA ; 2
doubleP ; 4
doHighB 4 ; 6 28
rjmp b41 ; 30
#if !expected ; favouring space over expected cycles
b_111: ; 6
clr p0 ; 1
inc b1 ; 2
clr p1 ; 3
sbrc b1, 7 ;4/5
mov p1, a0 ; 5
add p1, p1 ; 6
doHighB 6 ; 8 14 :-|
#else
noB111: ; 10
clr p1 ; 1
doHighB 6 ; 3 13 :-/
;b21: ; 13
doubleP ; 2
doHighB 5 ; 4
sbrs b0, 5 ;5/6
rjmp b210 ; 7 20
;b211: ; 19
sbrc b0, 4 ;7/8
rjmp b2111 ; 9 22
;b2110: ; 21
doubleP ; 2
doHighB 5 ; 4
subA ; 6 27
rjmp b40 ; 8 29
b_111: ; 6
clr p0 ; 1
inc b1 ; 2
brpl noB111 ;3/4
mov p1, a0 ; 4
add p1, p1 ; 5
doHighB 6 ; 7 13 :-/
#endif
;b21: ; 14
doubleP ; 2
doHighB 5 ; 4
sbrs b0, 5 ;5/6
rjmp b210 ; 7 21
;b211: ; 20
sbrc b0, 4 ;7/8
rjmp b2111 ; 9 23
;b2110: ; 22
doubleP ; 2
doHighB 5 ; 4
subA ; 6 28
rjmp b40 ; 8 30
b200: ; 22
sbrs b0, 4 ;1/2
rjmp b2000 ; 3 25
b2001: ; 24
doubleP ; 4
doHighB 4 ; 6
addA ; 8 30
;b41: ; 30
doubleP ; 2
doHighB 3 ; 4
sbrs b0, 3 ;5/6
rjmp b410 ; 7 37
;b411: ;
sbrc b0, 2 ;7/8
rjmp b4111 ; 9 39
;b4110: ; 38
doubleP ; 2
subA ; 4
doHighB 2 ; 6 44
;b60: ; 44
doubleP ; 2
doHighB 1 ; 4
sbrs b0, 1 ;5/6
rjmp b600 ; 7 51
;b601: ; 50
doubleP ; 2
addA ; 4
doHighB 0 ; 6 56
ret
b2000: ; 25
doubleP ; 2
doHighB 4 ; 4 29
b40: ; 29 31
doubleP ; 2
doHighB 3 ; 4
sbrs b0, 3 ;5/6
rjmp b400 ; 7 36
b401: ; 35
sbrs b0, 2 ;1/2
rjmp b4010 ; 3 38 18
b4011: ; 37
addA ; 2
doubleP ; 4
doHighB 2 ; 6 43
;b61:
doubleP ; 2 43
doHighB 1 ; 4
sbrs b0, 1 ;5/6
rjmp b610 ; 7 50
;b611: ; 49 6
doubleP ; 8
subA ; 10
doHighB 0 ; 12 55
ret
b2010: ; 24
doubleP ; 2
addA ; 4
doHighB 4 ; 6 30
;b40: ; 30
doubleP ; 8
doHighB 3 ; 10
sbrs b0, 3 ; 11 XXX
rjmp b400 ; 13 37
;b401: ; 36
sbrs b0, 2 ;1/2
rjmp b4010 ; 3 39 18
;b4011: ; 38
addA ; 2
doubleP ; 4
doHighB 2 ; 6 44
;b61:
doubleP ; 2 44
doHighB 1 ; 4
sbrs b0, 1 ;5/6
rjmp b610 ; 7 51
;b611: ; 50 6
doubleP ; 8
subA ; 10
doHighB 0 ; 12 56
ret
b210: ; 21 ? 22
sbrs b0, 4 ;1/2
rjmp b2100 ; 3 24
;b2101: ; 24
doubleP ; 2
doHighB 5 ; 4
subA ; 6 30
;b41: ; 30
doubleP ; 2
doHighB 3 ; 4
sbrs b0, 3 ;5/6
rjmp b410 ; 7 37
;b411: ; 36
sbrc b0, 2 ;1/2
rjmp b4111 ; 3 39
;b4110: ; 38
doubleP ; 2
subA ; 4
doHighB 2 ; 6 44
;b60:
doubleP ; 2 44
doHighB 1 ; 4
sbrs b0, 1 ;5/6
rjmp b600 ; 7 51
;b601: ; 50
doubleP ; 8
addA ; 10
doHighB 0 ; 12 56
ret
b2100: ; 24
subA ; 2
doubleP ; 4
doHighB 4 ; 6 30
;b40: ; 30
doubleP ; 8
doHighB 3 ; 10
sbrs b0, 3 ; 11
rjmp b400 ; 13 37
;b401: ; 36 21
sbrs b0, 2 ;1/2
rjmp b4010 ; 3 39 18
;b4011: ; 38 18
addA ; 2
doubleP ; 4
doHighB 2 ; 6 44
;b61:
doubleP ; 2 44 12
doHighB 1 ; 4
sbrs b0, 1 ;5/6
rjmp b610 ; 7 51
;b611: ; 50 6
doubleP ; 8
subA ; 10
doHighB 0 ; 12 56
ret
b2111: ; 23
doubleP ; 2
doHighB 4 ; 4 27
rjmp b41 ; 6 29
b400: ; 37 21
sbrs b0, 2 ;1/2
rjmp b4000 ; 3 40 16
;b4001: ; 39 19
doubleP ; 2
addA ; 4
doHighB 2 ; 6 45
;b61:
doubleP ; 2 45 13
doHighB 1 ; 4
sbrc b0, 1 ;5/6
rjmp b611 ; 7 52 6
;b610: ; 51 6
subA ; 2
doubleP ; 4
doHighB 0 ; 6 57 :-(
ret
b4000: ; 40 16
doubleP ; 2
doHighB 2 ; 4 44
;b60:
doubleP ; 2 44 12
doHighB 1 ; 4
sbrs b0, 1 ;5/6
rjmp b600 ; 7 51
;b601: ; 50 6
doubleP ; 2
addA ; 4
doHighB 0 ; 6 56
ret
b4010: ; 39 18
doubleP ; 2
addA ; 4
doHighB 2 ; 6 45
;b60: ; 45 12
doubleP ; 2
doHighB 1 ; 4
sbrs b0, 1 ;5/6
rjmp b600 ; 7 52
;b601: ; 51 6
doubleP ; 2
addA ; 4
doHighB 0 ; 6 57
ret
b410: ; 37 21
sbrs b0, 2 ;1/2
rjmp b4100 ; 3 40 18
;b4101: ; 39 18
doubleP ; 2
subA ; 4
doHighB 2 ; 6 45
;b61:
doubleP ; 2 45 12
doHighB 1 ; 4
sbrc b0, 1 ;5/6
rjmp b611 ; 7 52
;b610: ; 51 6
subA ; 2
doubleP ; 4
doHighB 0 ; 6 57 :-(
ret
b4100: ; 40 18
subA ; 2
doubleP ; 4
doHighB 2 ; 6 46
;b60:
doubleP ; 2 46 12
doHighB 1 ; 4
sbrs b0, 1 ;5/6
rjmp b600 ; 7 53 4
;b601: ; 52 6
doubleP ; 2
addA ; 4
doHighB 0 ; 6 58 :-((
ret
b4111: ; 39 17
doubleP ; 2
doHighB 2 ; 4 43
;b61:
doubleP ; 2 43 13
doHighB 1 ; 4
sbrs b0, 1 ;5/6
rjmp b610 ; 7 50 6
;b611: ; 49
doubleP ; 2 6
subA ; 4
doHighB 0 ; 6 55
ret
b600: ; 51 4
doubleP ; 2
doHighB 0 ; 4 55 ;-)
ret
b610: ; 51 6
subA ; 2
doubleP ; 4
doHighB 0 ; 6 57
ret
theEnd:
stuck:
break
sleep
rjmp stuck
empty: ret
.def a = r22
.def b = r26
.def ah = r23
.def bh = r27
.def p = r24
.def ph = r25
testTest:
ldi a, 15
ldi ah, 1
movw b, a
nextA:
subi a, -1
sbci ah, -2
ldi b, 13
ldi bh, 128
movw p, a
nextB:
adiw b, 31
movw a0, a
movw b0, b
rcall mpy16BEB16
cp p0, p
cpc p1, ph
rcall bad
add p, a
adc ph, ah
ldi a0, 130
cpi b, 3
cpc bh, a0
brmi nextB
rjmp nextA
bad:
ret
break
sleep
rjmp bad
答案 4 :(得分:0)
作为起点的有符号和无符号8×8→8 / 16,16×16→16/32和8×16→16/24位的相关算法和实现列表:
答案 5 :(得分:0)
在58个周期以下没有得到modified Booth时感到沮丧,粗略地使用预先计算的倍数 - 3 * a。 “中间位对”(可预测?)与我尝试的Booth-2变体完全相同的15个循环,第一个和最后一个需要太长时间。我把它留在了64:
mpy16P316: ; 0
;prepare a3h:a3 = 3 * a1:a0 ...
movP a3,a3h, a0,a1 ; 2
double a3, a3h ; 4
; addP a3,a3h, a0,a1 ; 6 ... by half
lsl b0 ; 5 gains speed exactly once
brcc _ ;6/7
brpl _2 ;7/8
_3: ; 7
addP a3,a3h, a0,a1 ; 2 other half
movP p0,p1, a3,a3h ; 4 11
_2: ; 8
movP p0,p1, a3,a3h ; 2 reason for delay
addP a3,a3h, a0,a1 ; 4 12 other half
_: ; 7
brmi _1 ;1/2 9
_0: ; 8
ldi p0, 0 ; 1
ldi p1, 0 ; 2
addP a3,a3h, a0,a1 ; 4 12 other half
_1: ; 9
movP p0,p1, a0,a1 ; 2
addP a3,a3h, a0,a1 ; 4 13
doHighB 7 ; 2 13
doHighB 7 ; 4 17
doHighB 6 ; 2 17
doubleP ; 4 1
doHighB 5 ; 6
sbrs b0, 6 ;7/8
rjmp no6 ; 9
sbrs b0, 5 ;9/10
rjmp no5 ; 11
doubleP ; 12 2
add3 ; 14
no5: ; "11"
addA ; 13
no56:
doubleP ; 15 32 2
no6: ; "9"
sbrs b0, 5 ;10/11
rjmp no56 ; 12
doubleP ; 13 2
addA ; 15 32
; same for 43 ; 15
doHighB 2 ; 2 47
doubleP ; 4 5
doHighB 1 ; 6
sbrs b0, 2 ;7/8
rjmp no2 ; 9
sbrs b0, 1 ;9/10
rjmp no1 ; 11
doubleP ; 12 6
add3 ; 14
no1: ; 11
addA ; 13
no12:
doubleP ; 15 6
no2: ; 9
sbrs b0, 1 ;10/11
rjmp no12 ; 12
doubleP ; 13 6
addA ; 15
doHighB 0 ; 17 64?!
ret
答案 6 :(得分:0)
第一次尝试计算goto ,基于从Little End开始的Booth-2。比“编织展开的Big-Endian Booth”慢(59对57周期(?)),更小(~401字+返回)。
.equ base = 256
; modified Booth from Little End; multiply proper at mpy16LEB16
; 16 snippets reached via computed goto, for 1 multiplier nibble
.org base
; 00 00
doubleA ; 2
doHighB 1 ; 4
doubleA ; 6
doHighB 2 ; 8
doubleA ;10 low 4 bits done, 6 words to go
; doHighB 3 ;12 could keep doing this all day ...
; doubleA ;14 for no conceivable gain
;; doHighB 4 ;16 this would be the 1st too many
; rjmp hi0+4 ;16 -4
rjmp hi0 ;12
b600: ; 51 squeezing the shortest out and in
doubleP ; 2 branches saves _five_ words
doHighB 0 ; 4 55 ;-)
ret
.org base+16
; 00 01
addA ; 2
doubleA ; 4
doHighB 1 ; 6
doubleA ; 8
doHighB 2 ;10
doubleA ;12 low 4 bits done, 4 words to go
doHighB 3 ;14
rjmp hi0+2 ;16
.org base+32
doubleA ; 2 true Booth or not true Booth
subA ; 4
doHighB 1 ; 6
doubleA ; 8
doHighB 2 ;10
doubleA ;12
doHighB 3 ;14
rjmp hi0+2 ;16
.org base+48
subA ; 2
doubleA ; 4
doHighB 1 ; 6
doubleA ; 8
doHighB 2 ;10
doubleA ;12
doHighB 3 ;14
rjmp hi0+2 ;16
.org base+64
; 01 00
doubleA ; 2
doHighB 1 ; 4
doubleA ; 6
doHighB 2 ; 8
addA ;10
doubleA ;12
doHighB 3 ;14
rjmp hi0+2 ;16
.org base+80
; 01 01
addA ; 2
doubleA ; 4
doHighB 1 ; 6
doubleA ; 8
doHighB 2 ;10
addA ;12
doubleA ;14
rjmp hi0 ;16
.org base+96
; 01 10
doubleA ; 2
subA ; 4
doHighB 1 ; 6
doubleA ; 8
doHighB 2 ;10
doubleA ;12
addA ;14
rjmp hi0 ;16
.org base+112
; 01 11
doubleA ; 2
addA ; 4
doHighB 1 ; 6
doubleA ; 8
doHighB 2 ;10
doubleA ;12
addA ;14
rjmp hi0 ;16
.org base+128
; 10 00
doubleA ; 2
doHighB 1 ; 4
doubleA ; 6
doHighB 2 ; 8
doubleA ;10
subA ;12
doHighB 3 ;14
rjmp hi1+2 ;16
.org base+144
; 10 01
addA ; 2
doubleA ; 4
doHighB 1 ; 6
doubleA ; 8
doHighB 2 ;10
doubleA ;12
subA ;14
rjmp hi1 ;16
.org base+160
; 10 10
doubleA ; 2
subA ; 4
doHighB 1 ; 6
doubleA ; 8
doHighB 2 ;10
subA ;12
doubleA ;14
rjmp hi1 ;16
.org base+176
; 10 11
doubleA ; 2
addA ; 4
doHighB 1 ; 6
doubleA ; 8
doHighB 2 ;10
subA ;12
doubleA ;14
rjmp hi1 ;16
.org base+192
; 11 00
doubleA ; 2
doHighB 1 ; 4
doubleA ; 6
doHighB 2 ; 8
subA ;10
doubleA ;12
doHighB 3 ;14
rjmp hi1+2 ;16
.org base+208
; 11 01
addA ; 2
doubleA ; 4
doHighB 1 ; 6
doubleA ; 8
doHighB 2 ;10
subA ;12
doubleA ;14
rjmp hi1 ;16
.org base+224
; 11 10
doubleA ; 2
subA ; 4
doHighB 1 ; 6
doubleA ; 8
doHighB 2 ;10
doubleA ;12
doHighB 3 ;14
rjmp hi1+2 ;16
.org base+240
; 11 11
subA ; 2
doubleA ; 4
doHighB 1 ; 6
doubleA ; 8
doHighB 2 ;10
doubleA ;12 making the fast cases symmetrical
hi1:
doHighB 3 ; 2 28
doubleA ; 4
doHighB 4 ; 6
; four bits to go, last known to have been 1
sbrc b0, 5 ;7/8
rjmp sub4_1 ; 9 37
;add4_1
sbrc b0, 4 ;9/10
rjmp add41 ; 11
;add42 38
doubleA ; 2
addA ; 4
;b20 42
doHighB 5 ; 2
doubleA ; 4
doHighB 6 ; 6
lsl b0 ; 7
brcc add6_0 ;8/9 51
;sub6_0: 1b0
brmi sub61 ;9/10 52
;sub62 51
doubleA ; 2
subA ; 4
doHighB 7 ; 6 57 _not_ funny
ret
sub61: ; 52
doubleA ; 2
addA ; 4
doHighB 7 ; 6 58 _not at all_
ret
add6_0: ; 51
brpl noAS6 ;1/2 53
addA ; 3
doubleA ; 5
doHighB 7 ; 7 58 _not at all_
ret
noAS6: ; 53
doubleA ; 2
doHighB 7 ; 4 57 _not_ funny
ret
sub4_1:; bb1b1 37
sbrc b0, 4 ;1/2
rjmp sub40 ; 3 40
sub41: ; 39
subA ; 2
sub40: ; 41
doubleA ; 4 43
;b21 43
doHighB 5 ; 2
doubleA ; 4
doHighB 6 ; 6
lsl b0 ; 7
brcs sub6_1 ;8/9 52
;add6_1: 0b1
brpl add61 ;9/10 53
;add62 52
doubleA ; 2
addA ; 4
doHighB 7 ; 6 58 _not_ funny
ret
add61: ; 53
addA ; 2
doubleA ; 4
doHighB 7 ; 6 59 _not at all_
ret
add4_0:; bb0b0 37
sbrs b0, 4 ;1/2
rjmp add40 ; 3 40
add41: ; 39
addA ; 2
add40: ; 41
doubleA ; 4 43
;b20 43
doHighB 5 ; 2
doubleA ; 4
doHighB 6 ; 6
lsl b0 ; 7
brcc add6_0 ;8/9 52
;sub6_0: 1b0
brmi sub61 ;9/10 53
;sub62 52
doubleA ; 2
subA ; 4
doHighB 7 ; 6 57 _not_ funny
ret
sub6_1: ; 51
brmi noAS6 ;1/2 53
subA ; 3
doubleA ; 5
doHighB 7 ; 7 58 _not at all_
ret
hi0:
doHighB 3 ; 2 28
doubleA ; 4
doHighB 4 ; 6
; four bits to go, last known to have been 0
sbrs b0, 5 ;7/8
rjmp add4_0 ; 9
;sub4_0
sbrs b0, 4 ;9/10
rjmp sub41 ; 11 39
;sub42 38
doubleA ; 2
subA ; 4
;b21 42
doHighB 5 ; 2
doubleA ; 4
doHighB 6 ; 6
lsl b0 ; 7
brcs sub6_1 ;8/9 51
;add6_1: 0b1
brpl add61 ;9/10 52
;add62 51
doubleA ; 2
addA ; 4
doHighB 7 ; 6 57 _not_ funny
ret
.equ code = high(base)
mpy16LEB16: ; 0 modified Booth from Little End
mov ZL, b0 ; 1
andi ZL, 15 ; 2
swap ZL ; 3
ldi ZH, code; 4
ldi p0, 0 ; 5
ldi p1, 0 ; 6
sbrc b0, 7 ;7/8
add p1, a0 ; 8
doHighB 0 ;10
ijmp ;12
答案 7 :(得分:0)
接近空间意识的实施(参考,如果不是理智) 使用的资源应该是合格的(g:狂野猜测,G:猜测,e:有根据的猜测,E:估计,s:模拟,a:分析,A:分析和证实,如果通过模拟,m:测量)(字×worstCaseCycleCount是一种类似于IC设计中的面积×延迟的成本测量(“优点”的单个数字?)
$('select[name=category]').change(function () {
console.log($(":selected", this).text());
});
(我不止一次检查过相同的“wordcycle entries”。)
宏,应该可以理解为
algoritm bits cycles words regs remarks
wc exp ×wccc excl.
a,b,p
shift factor left 16×16→16(61 56 87 5307 see other
62 57 62 3844 answer)
73 68 37 2701
81 77 24 1944 (see edit history)
85 70g 15 1275 w*expcc~1050
108 64g 18 1944 w*expcc~1150
(jump table, for reference 51E49g 888e 44K G (almost done)
44E39g2888E127K e)
16×16→16位,85/81周期,15/24字:
.MACRO doubleA ; adds (shifts/weights) factor "a"
add a0, a0 ; +1
adc a1, a1 ; +2
.EndM
.MACRO doHighB ; "does" bit in b1, bit number as a parameter
sbrc b1, @0 ; 1
add p1, a0 ; 2
.EndM
.MACRO condAdd
doHighB @0 ; +2
sbrs b0, @1 ; +3
rjmp PC+3 ;+4/5
addA ; +6
.EndM
.MACRO step16; "do" 2 bits, bit# in b1 and b0 as a parameter
condAdd @0, @0 ; +6
doubleA ; +8
.EndM
16×16→16位,73个周期,37个字:
mpy16x16: ; 0
clr p0 ; 1
clr p1 ; 2
; wanting early out: shifting the factor; faster from Little End
lsr b0 ; 3
brcc shiftB1 ;4/5
addFull:
addA ; 2
shiftB1: ; due to handling this 2nd multiplier
lsr b1 ; 3 bit even if the multiplicand is zero
brcc pc+2 ;4/5 after the first shift, the earlyOutA
addHigh: ; variant is 3 cycles slower than 4.8
add p1, a0 ; 5 libgcc __mulhi3 - for * 0 or 0x8000
shiftA:
doubleA ; 7 why is adc zero-flag handling ...
#if 1||earlyOutA
brne shiftB0 ;+1/2 7 ... different from subc/sbci/cpc?
tst a0 ;+ 2
breq done ;+ 3/-1upto-69?
#endif
shiftB0:
lsr b0 ; 8
brcs addFull ;9/10
sbci b1, 0 ; 10 presume zero or high reg?
brne shiftB1 ;11/12-2
done: ; wc: 8*10+5=85 @15+1 words (?!)
ret ; best: 14 (0=b&0xfffe) (none for a)
;(earlyOutA: wc: 8*13+4=108 @18+1 words)
答案 8 :(得分:0)
最后,No Holds Barred版本,如果不是最终状态。 哦,答案体不应该超过30000个字符,未经编辑的来源大约是55K - 稍后。 有点大~2900字,快(&lt; = 44循环,预期~39)。
2015-06-26 14:16:16.042 ibtoold[16859:662960] [MT] DVTAssertions: ASSERTION FAILURE in /Library/Caches/com.apple.xbs/Sources/IDEInterfaceBuilder/IDEInterfaceBuilder-8121.17/InterfaceBuilderKit/Document/Platform/IBIdiom.m:105
Details: Assertion failed: !
Object: <IBIPadIdiom: 0x7f8f53e1cb50>
Method: -filePathWithTargetDeviceSuffixForBaseFilePath:
Thread: <NSThread: 0x7f8f53c09d80>{number = 1, name = main}
Hints: None
Backtrace:
0 0x0000000104ba2ff9 -[DVTAssertionHandler handleFailureInMethod:object:fileName:lineNumber:assertionSignature:messageFormat:arguments:] (in DVTFoundation)
1 0x0000000104ba2b2d _DVTAssertionHandler (in DVTFoundation)
2 0x0000000104ba2d44 _DVTAssertionFailureHandler (in DVTFoundation)
3 0x0000000104ba2ca6 _DVTAssertionFailureHandler (in DVTFoundation)
4 0x0000000105c972b5 -[IBIdiom filePathWithTargetDeviceSuffixForBaseFilePath:] (in IDEInterfaceBuilderKit)
5 0x000000010f8e6904 (in IDEInterfaceBuilderCocoaTouchIntegration)
6 0x0000000105b66866 -[IBDocument finishCompilingWithOutputPath:options:error:] (in IDEInterfaceBuilderKit)
7 0x0000000104961ff6 (in ibtoold)
8 0x00000001049644dc (in ibtoold)
9 0x0000000104967f15 (in ibtoold)
10 0x00000001049685c9 (in ibtoold)
11 0x00000001049684a0 (in ibtoold)
12 0x0000000104957bab (in ibtoold)
13 0x0000000104967b0a (in ibtoold)
14 0x0000000104966d6f (in ibtoold)
15 0x00007fff890825c9 start (in libdyld.dylib)
Command /Applications/Xcode-beta.app/Contents/Developer/usr/bin/ibtool failed with exit code 255
...
star_t:
rjmp testTest
.org 0x20
.def a0 = r16 ; addend low byte
.def a1 = r17
.def m0 = r18 ; multiplier low byte
.def m1 = r19
.def p0 = r20 ; (partial) product low byte
.def p1 = r21
.def _zero=r1
.def tmp = r0
; some macros using factors a1:a0, m1:m0 and product p1:p0
.MACRO addA ; adds (weighted) factor "a" into product
add p0, a0 ; +1
adc p1, a1 ; +2
.EndM
.MACRO subA ; subtracts (weighted) factor "a" from product
sub p0, a0 ; +1
sbc p1, a1 ; +2
.EndM
.MACRO doubleP ; adds (shifts/weights)(partial) product
add p0, p0 ; +1
adc p1, p1 ; +2
.EndM
settle:
inc m1 ; 1 _looks_ smarter than add p1, a0
doM1:
clr XH ; 2 ?
; mov a0, tmp
ldi ZH, high(highs); 3
mov ZL, m1 ; 4
ijmp ; 6
mpy1616: ; 0
movw XL, a0 ; 1
andi XH, 15 ; 2
eor XH, XL ; 3 XH = a0 ^ (a1 & 15)
andi XL, 15 ; 4
; swapping first could use -+ (dropping carry) in stead of ex-or
swap XL ; 5 XL = a0 << 4
; _if_ XH was used in few worst loxx cases, do these "on demand"
swap XH ; 6 XH = ((a1^a0)<<4)|(a0>>4)
eor XH, XL ; 7 XH = (a1<<4)|(a0>>4)
; mov tmp, a0
movw p0, a0 ; 1 other way 'round with gcc ABI?
ldi ZH, high(jump); 1
mov ZL, m0 ; 2
trampoline:
ijmp ; 14 (12+2) + 15 + 8 + 7 - 44? really?
...
#define done ret
hi07:
add p1, a0 ; 5
hi06:
add a0, a0 ; 4
hi03:
add p1, a0 ; 3
hi02:
add p1, a0 ; 2
hi01:
add p1, a0 ; 1
;hi00:
done
hi0a:
add a0, a0 ; 5
hi05:
add p1, a0 ; 4
hi04:
add a0, a0 ; 3
add a0, a0 ; 2
add p1, a0 ; 1
done
hi09:
add p1, a0 ; 5
hi08:
add a0, a0 ; 4
add a0, a0 ; 3
add a0, a0 ; 2
add p1, a0 ; 1
done
hi0b:
sub p1, a0 ; 5
hi0c:
add a0, a0 ; 4
add a0, a0 ; 3
sub p1, a0 ; 2
add p1, XL ; 2
done
hi0d:
sub p1, a0 ; 4
hi0e:
sub p1, a0 ; 3
hi0f:
sub p1, a0 ; 2
add p1, XL ; 1
done
hi17:
add p1, a0 ; 6
hi16:
add a0, a0 ; 5
hi13:
add p1, a0 ; 4
hi12:
add p1, a0 ; 3
hi11:
add p1, a0 ; 2
hi10:
add p1, XL ; 1
done
;hi1a:
; add a0, a0 ; 6
hi15:
add p1, a0 ; 5
hi14:
add a0, a0 ; 4
add a0, a0 ; 3
add p1, a0 ; 2
add p1, XL ; 1
done
hi19:
add p1, a0 ; 6
hi18:
add p1, XL ; 5
add a0, a0 ; 4
add a0, a0 ; 3
add p1, a0 ; 2
add p1, a0 ; 1
done
hi1a:
sub p1, a0 ; 6
hi1b:
sub p1, a0 ; 5
hi1c:
add a0, a0 ; 4
sub XL, a0 ; 3
add XL, XL ; 2
add p1, XL ; 1
done
hi1d:
sub p1, a0 ; 5
hi1e:
sub p1, a0 ; 4
hi1f:
sub p1, a0 ; 3
add p1, XL ; 2
add p1, XL ; 1
done
hi27:
add p1, a0 ; 7
hi26:
add a0, a0 ; 6
hi23:
add p1, a0 ; 5
hi22:
add p1, a0 ; 4
hi21:
add p1, a0 ; 3
hi20:
add p1, XL ; 2
add p1, XL ; 1
done
hi2a:
add a0, a0 ; 6
hi25:
add p1, a0 ; 5
hi24:
add XL, a0 ; 4
add XL, a0 ; 3
add p1, XL ; 2
add p1, XL ; 1
done
; ...
hi4d:
sub p1, a0 ; 7
hi4e:
sub p1, a0 ; 6
hi4f:
sub p1, a0 ; 5
add p1, XL ; 4
add XL, XL ; 3
add XL, XL ; 2
add p1, XL ; 1
done
hi53:;
add p1, a0 ; 7
hi52:;
add p1, a0 ; 6
hi51:;
add p1, a0 ; 5
hi50:;
add p1, XL ; 4
add XL, XL ; 3
add XL, XL ; 2
add p1, XL ; 1
done
hi56:;
add p1, a0 ; 7
; ...
hi5d:;
sub p1, a0 ; 7
hi5e:;
sub p1, a0 ; 6
hi5f:;
sub p1, a0 ; 5
add XL, XL ; 4
add p1, XL ; 3
add p1, XL ; 2
add p1, XL ; 1
done
hi63:;
add p1, a0 ; 7
hi62:;
add p1, a0 ; 6
hi61:;
add p1, a0 ; 5
hi60:;
add XL, XL ; 4
add p1, XL ; 3
add p1, XL ; 2
add p1, XL ; 1
done
; ...
hi68:;
add p1, a0 ; 7
hi67:;
add p1, a0 ; 6
add XL, a0 ; 5
add XL, XL ; 4
add p1, XL ; 3
add p1, XL ; 2
add p1, XL ; 1
done
hi6a:;
add p1, a0 ; 7
hi69:; ; 105 ~ 15 * 7
sub XL, a0 ; 6
add p1, XL ; 5
add XL, XL ; 4
add p1, XL ; 3
add p1, XL ; 2
add p1, XL ; 1
done
hi6b:; no symmetry
sub p1, a0 ; 7
hi6c:; no symmetry
add XL, a0 ; 6
add XL, a0 ; 5
add XL, XL ; 4
add p1, XL ; 3
add p1, XL ; 2
add p1, XL ; 1
done
hi6d:; no symmetry
;01101101
sub p1, a0 ; 6
hi6e:;
sub p1, a0 ; 5
hi6f:;
sub p1, a0 ; 4
sub p1, XL ; 3
sbrc a0, 0 ; 2
subi p1, -128; 1
done
hi73:;
add p1, a0 ; 6
hi72:;
add p1, a0 ; 5
hi71:;
add p1, a0 ; 4
hi70:;
sub p1, XL ; 3
sbrc XL, 4 ; 2
subi p1, -128; 1
done
hi75:; not quite symmetrical
add p1, a0 ; 7
hi74:;
add a0, a0 ; 6
add p1, a0 ; 5
add p1, a0 ; 4
sub p1, XL ; 3
sbrc XL, 4 ; 2
subi p1, -128; 1
done
hi76:;
sub p1, a0 ; 7
hi77:;
sub p1, a0 ; 6
hi78:; ; 120 ~ 15 * 8
sub XL, a0 ; 5
add XL, XL ; 4
add XL, XL ; 3
add p1, XL ; 2
add p1, XL ; 1
done
hi7b:;
sub p1, a0 ; 6
hi7c:;
add XL, XL ; 5
sub XL, a0 ; 4
add XL, XL ; 3
add XL, XL ; 2
add p1, XL ; 1
done
hi79:
sub p1, a0 ; 7
hi7a:
add a0, a0 ; 6
hi7d:
sub p1, a0 ; 5
hi7e:
sub p1, a0 ; 4
hi7f:
sub p1, a0 ; 3
sbrc XL, 4 ; 2
subi p1, -128; 1
done
hi85:
add p1, a0 ; 7
hi84:
add p1, a0 ; 6
hi83:
add p1, a0 ; 5
hi82:
add p1, a0 ; 4
hi81:
add p1, a0 ; 3
hi80:
sbrc XL, 4 ; 2
subi p1, -128; 1
done
hi86:
sub p1, a0 ; 7
hi87:; not quite symmetrical 135 ~ 15 * 9
sub XL, a0 ; 6
add p1, XL ; 5
add XL, XL ; 4
add XL, XL ; 3
add p1, XL ; 2
add p1, XL ; 1
done
hi8a:;
add p1, a0 ; 7
hi89:;
add p1, a0 ; 6
hi88:;
add XL, a0 ; 5
add XL, XL ; 4
add XL, XL ; 3
add XL, XL ; 2
add p1, XL ; 1
done
hi8b:; not quite symmetrical
sub p1, a0 ; 7
hi8c:; not quite symmetrical
add p1, XL ; 6
add XL, XL ; 5
sub XL, a0 ; 4
add XL, XL ; 3
add XL, XL ; 2
add p1, XL ; 1
done
hi8d:
sub p1, a0 ; 6
hi8e:
sub p1, a0 ; 5
hi8f:
sub p1, a0 ; 4
add p1, XL ; 3
sbrc XL, 4 ; 2
subi p1, -128; 1
done
hi93: ; 147 7*7*3
add p1, a0 ; 6
hi92:
add p1, a0 ; 5
hi91:
add p1, a0 ; 4
hi90:
add p1, XL ; 3
sbrc XL, 4 ; 2
subi p1, -128; 1
done
hi95:
add p1, a0 ; 7
hi94:; no symmetry
add p1, XL ; 6
add XL, XL ; 5
add XL, a0 ; 4
add XL, XL ; 3
add p1, XL ; 2
add p1, XL ; 1
done
hi96:; ; 150 ~ 15 * 10 nananananaana
sub p1, a0 ; 7
hi97:
sub XL, a0 ; 6 151 ~ (256-)15*-7
sub p1, XL ; 5
add XL, XL ; 4
sub p1, XL ; 3
sub p1, XL ; 2
sub p1, XL ; 1
done
hi98:;
sub p1, a0 ; 7
hi99:; ; 153 ~ 17 * 9
add XL, a0 ; 6
add p1, XL ; 5
add XL, XL ; 4
add XL, XL ; 3
add p1, XL ; 2
add p1, XL ; 1
done
hi9c:;
add p1, a0 ; 7
hi9b:;
add p1, a0 ; 6
hi9a:;
add XL, a0 ; 5
add XL, XL ; 4
sub p1, XL ; 3
sub p1, XL ; 2
sub p1, XL ; 1
done
hi9d:; ; 157
sub p1, a0 ; 7
hi9e:;
sub p1, a0 ; 6
hi9f:;
sub p1, a0 ; 5
add XL, XL ; 4
sub p1, XL ; 3
sub p1, XL ; 2
sub p1, XL ; 1
done
hia3:;
add p1, a0 ; 7
hia2:;
add p1, a0 ; 6
hia1:;
add p1, a0 ; 5
hia0:;
add XL, XL ; 4
sub p1, XL ; 3
sub p1, XL ; 2
sub p1, XL ; 1
done
hia4:;
sub p1, a0 ; 7
hia5:; ; 165 ~ 15 * 11
add XL, XL ; 6
add XL, a0 ; 5
add p1, XL ; 4
add XL, XL ; 3
add p1, XL ; 2
add p1, XL ; 1
done
hia7:;
add p1, a0 ; 6
hia6:;
sub XL, a0 ; 5
add XL, XL ; 4
sub p1, XL ; 3
add XL, XL ; 2
sub p1, XL ; 1
done
hia9:;
add p1, a0 ; 7
hia8:;
sub p1, XL ; 6
add XL, a0 ; 5
add XL, a0 ; 4
add XL, XL ; 3
sub p1, XL ; 2
sub p1, XL ; 1
done
hiaa:;
#if greedy
add a0, a0 ; 6
add XL, XL ; 5
add p1, XL ; 4
add XL, XL ; 3
add XL, XL ; 2
add p1, XL ; 1
#else
sub p1, a0 ; 7
#endif
hiab:;
sub p1, a0 ; 6
hiac:;
sub p1, XL ; 5
add XL, a0 ; 4
add XL, XL ; 3
add XL, XL ; 2
sub p1, XL ; 1
done
hiad:;
sub p1, a0 ; 7
hiae:;
sub p1, a0 ; 6
hiaf:;
sub p1, a0 ; 5
sub p1, XL ; 4
add XL, XL ; 3
add XL, XL ; 2
sub p1, XL ; 1
done
hib3:
add p1, a0 ; 7
hib2:
add p1, a0 ; 6
hib1:
add p1, a0 ; 5
hib0:;
sub p1, XL ; 4
add XL, XL ; 3
add XL, XL ; 2
sub p1, XL ; 1
done
hib5:
add p1, a0 ; 6
hib4:
sub p1, XL ; 5
sub XL, a0 ; 4
add XL, XL ; 3
add XL, XL ; 2
sub p1, XL ; 1
done
; ...
hieb: ; ouch
sub p1, a0 ; 5
hiec:
add a0, a0 ; 4
add a0, a0 ; 3
sub p1, a0 ; 2
sub p1, XL ; 1
done
hie9:
sub p1, a0 ; 6
hiea:
add a0, a0 ; 5
hied:
sub p1, a0 ; 4
hiee:
sub p1, a0 ; 3
hief:
sub p1, a0 ; 2
sub p1, XL ; 1
done
hif5:
add p1, a0 ; 6
hif4:
add p1, a0 ; 5
hif3:
add p1, a0 ; 4
hif2:
add p1, a0 ; 3
hif1:
add p1, a0 ; 2
hif0:
sub p1, XL ; 1
done
hif6:
sub p1, a0 ; 6
hif7:
sub p1, a0 ; 5
hif8:
add a0, a0 ; 4
add a0, a0 ; 3
sub p1, a0 ; 2
sub p1, a0 ; 1
done
...
.org (PC + 0x100) & 0xffff00
highs:
; rjmp hi00
done;-) to start code with a ret-insn, move this table first
rjmp hi01
rjmp hi02
rjmp hi03
rjmp hi04
rjmp hi05
rjmp hi06
rjmp hi07
rjmp hi08
rjmp hi09
rjmp hi0a
rjmp hi0b
rjmp hi0c
rjmp hi0d
rjmp hi0e
rjmp hi0f
rjmp hi10
rjmp hi11
rjmp hi12
rjmp hi13
; ...
rjmp hiee
rjmp hief
rjmp hif0
rjmp hif1
rjmp hif2
rjmp hif3
rjmp hif4
rjmp hif5
rjmp hif6
rjmp hif7
rjmp hif8
; rjmp hif9 ; jmp + 4 adds + 1 sub
sub p1, a0 ; 7
; rjmp hifa ; jmp + 3 adds + 2 subs -lutin?
sub p1, a0 ; 6
; rjmp hifb ; jmp + 2 adds + 2 subs
sub p1, a0 ; 5
; rjmp hifc
sub p1, a0 ; 4
; rjmp hifd
sub p1, a0 ; 3
; rjmp hife
sub p1, a0 ; 2
; rjmp hiff
sub p1, a0 ; 1
done
#undef done
#define done rjmp doM1
#define owing rjmp settle
.org (PC + 0x100) & 0xffff00
jump:
rjmp lo00
done ; rjmp lo01
rjmp lo02
;(you know the drill)
rjmp lofe
; rjmp loff
.Macro negP
com p1
neg p0
sbci p1, -1
.EndM
negP ; 6
owing
.Macro add4
add p0, XL
adc p1, XH
.EndM
.Macro sub4
sub p0, XL
sbc p1, XH
.EndM
.Macro set4
movw p0, XL
.EndM
.Macro pp2Z
movw ZL, p0
.EndM
.Macro addZ
add p0, ZL
adc p1, ZH
.EndM
.Macro subZ
sub p0, ZL
sbc p1, ZH
.EndM
.Macro clrP
clr p0
clr p1
.EndM
...
; do not tail merge to keep *-2/-1/0/1...16 fast
; (with 11, 13, 14, 17, 18 & 19 as collateral benefit, really)
lo00:
clr p0 ; 4
clr p1 ; 3
done
lo07:
addA ;10
lo05:
addA ; 8
lo03:
doubleP ; 6
lo02:
addA ; 4
;lo01:
done ; 2
lo08:
addA ; 8
lo04:
doubleP ; 6
;lo02:
doubleP ; 4
done ; 2
lo0a:
doubleP ;10
lo06:
doubleP ; 8
addA ; 6
doubleP ; 4
done ; 2
lo09:
doubleP ;10
doubleP ; 8
doubleP ; 6
addA ; 4
done ; 2
lo0b:
doubleP ;12 d
doubleP ;10 a0
addA ; 8 d
doubleP ; 6 d
addA ; 4 s0
done ; 2
lo0c:
doubleP ;10
addA ; 8
doubleP ; 6
doubleP ; 4
done ; 2
lo0d:
addA ;11
lo0e:
addA ; 9
lo0f:
negP ; 7
add4 ; 4
done ; 2
lo10:
set4 ; 1
done
lo15:
addA ;12
lo14:
addA ;10
lo13:
addA ; 8
lo12:
addA ; 6
lo11:
add4 ; 4
done
lo16:
doubleP ;10
addA ; 8
doubleP ; 6
add4 ; 4
done
lo17:
doubleP ;12
doubleP ;10
doubleP ; 8
subA ; 6
add4 ; 4
done
lo18:
doubleP ;10
doubleP ; 8
doubleP ; 6
add4 ; 4
done
lo19:
doubleP ;12
doubleP ;10
doubleP ; 8
addA ; 6
add4 ; 4
done
lo1a: ; ...++.+.
doubleP ;12
doubleP ;10
addA ; 8
doubleP ; 6
add4 ; 4
done
lo1b: ; ..+..-.-
negP ;13
doubleP ;10
add4 ; 8
doubleP ; 6
subA ; 4
done
lo1c: ; ..+..-..
negP ;11
doubleP ; 8
add4 ; 6
doubleP ; 4
done
lo1d:
subA ;14
lo1e:
subA ;12
lo1f:
subA ;10
lo20:
subA ; 8
add4 ; 6
add4 ; 4
done
lo25:
addA ;14
lo24:
addA ;12
lo23:
addA ;10
lo22:
addA ; 8
lo21:
add4 ; 6
add4 ; 4
done
...
lo6a:
doubleP ;14
add4 ;12
doubleP ;10
addA ; 8
add4 ; 6
doubleP ; 4
done ; 2
lo6b:; .++.++.- .+++.-.- .++.++.-?+..+.-.-?
;01101011
;XXX wc, faster without preparational p = a
addA ;15 set4 ;15 addA 16 ?set4 15
add4 ;13 subA ;12 a4 ?d ?
pp2Z ;11 doubleP ;10 d ?s0 ?
doubleP ;10 add4 ; 8 a0 ?d ?
addZ ; 8 doubleP ; 6 a4 ?d ?
doubleP ; 6 add4 ; 8 d ?s0 ?
subA ; 4 subA ; 4 s0 ?a4 ?
done ; 2
lo6c: ; .+++.-..
set4 ;13 a0
subA ;12 a0
doubleP ;10 a4
add4 ; 8 d
doubleP ; 6 a4
add4 ; 4 d
done ; 2
lo6d: ; .+++..--
set4 ;14
add4 ;13
subA ;11
pp2Z ; 9
doubleP ; 8
add4 ; 6
addZ ; 4
done ; 2
; ...
lo9a: ; +.
add4 ;14 nP 14
doubleP ;12 s4 12
doubleP ;10 Z 10
addA ; 8 d 9
doubleP ; 4 d 7
add4 ; 4 aZ 5
done ; 2
lo9b:; +.+..-.- +.+..-.- +.+.+.++ .-.-.-.-
set4 ;14 negP ;16 add4 ;16 negP15
doubleP ;13 add4 ;13 doubleP ;14 s4 12
subA ;11 add4 ;11 doubleP ;12 Z 10
pp2Z ; 9 pp2Z ; 9 add4 ;10 d 9
doubleP ; 8 doubleP ; 8 addA ; 8 d 7
doubleP ; 6 doubleP ; 6 doubleP ; 6 aZ 5
addZ ; 4 addZ ; 4 addA ; 4 owi 3
done ; 2
lo9c: ; +.+..-..
set4 ;13
doubleP ;12
subA ;10
doubleP ; 8
add4 ; 6
doubleP ; 4
done ; 2
lo9d:; +..+++.+ .--...-- +.+...--
;10011101
;XXX wc, faster without preparational p = a
add4 ;15 set4 ;15 negP ;15
pp2Z ;13 doubleP ;14 sub4 ;12
doubleP ;12 doubleP ;12 sub4 ;10
addA ;10 add4 ;10 pp2Z ; 8
doubleP ; 8 subA ; 8 doubleP ; 7
doubleP ; 6 doubleP ; 6 addZ ; 5
addZ ; 4 subA ; 4 owing ; 3
done
; ...
loaa: ; +.+.+.+.
add4 ;13 add4 ;14 d 14
pp2Z ;11 doubleP ;12 s4 12
doubleP ;10 doubleP ;10 Z 10
doubleP ; 8 add4 ; 8 d 9
addZ ; 6 doubleP ; 4 d 7
doubleP ; 4 done ; 2 aZ 5
done ; 2
loab:; .-.-.-.- +.+..-.- +.+.+.++ .-.-.-.-
;10101011
;XXX wc, faster without preparational p = a
negP ;15 set4 ;15 add4 ;16 negP16
sub4 ;12 doubleP ;14 doubleP ;14 s4 13
pp2Z ;10 subA ;12 doubleP ;12 d 11
doubleP ; 9 doubleP ;10 add4 ;10 a4 9
doubleP ; 7 add4 ; 8 addA ; 8 d 7
addZ ; 5 doubleP ; 6 doubleP ; 6 s0 5
owing ; 3 subA ; 4 addA ; 4 owi 3
loac:
add4 ;14
doubleP ;12
addA ;10
doubleP ; 8
add4 ; 6
doubleP ; 4
done ; 2
load: ; .-.-..-- .-.-.-.+ .--.++.+ 0.9 1.8 0.8 (avg)
; WC 10101101
negP ;15 -1 negP ;16 a4 a0 a0 17
sub4 ;12 -16-1 sub4 ;13 d s4 a0
pp2Z ;10 -16-1 doubleP ;11 a0 Z s4
sub4 ; 9 -32-1 doubleP ; 9 d d d
doubleP ; 7 -64-2 sub4 ; 7 a4 aZ d
addZ ; 5 -80-3 addA ; 5 d d a0
owing ; 3 owing ; 3 a0 a0 s4
loae: ; .-.-..-.
negP ;14 clrP ;15
sub4 ;11 sub4 ;13
sub4 ; 9 doubleP ;11
; subA ; 9
doubleP ; 7
sub4 ; 5
owing ; 3
loaf: ; .-.-...- nutritious
;10101111
;XXX wc, faster without preparational p = a
clrP ;15 s
sub4 ;13 d
doubleP ;11 d
doubleP ; 9 a4
sub4 ; 7 d
subA ; 5 a4
owing ; 3 s0
lob0: ; don't call me that
clrP ;13
sub4 ;11
doubleP ; 9
doubleP ; 7
sub4 ; 5
owing ; 3
lob1:
;XXX wc, faster without preparational p = a
clrP ;15 s
sub4 ;13 d
doubleP ;11 d
doubleP ; 9 a4
sub4 ; 7 d
addA ; 5 a4
owing ; 3 a0
lob2:
;XXX wc, faster without preparational p = a
clrP ;15
sub4 ;13
doubleP ;11
addA ; 9
doubleP ; 7
sub4 ; 5
owing ; 3
lob3:
sub4 ;13
doubleP ;11
doubleP ; 9
sub4 ; 7
subA ; 5
owing ; 3
; ...
lob6:
sub4 ;13
doubleP ;11
addA ; 9
doubleP ; 7
sub4 ; 5
owing ; 3
lob7:; .-.-+.++ ++.-+..- .-.-+..- +.+++..- ++..-..- .-..-..-
sub4 ;14 add4 ;15 doubleP ;15 a4 16 n 17 n 16 s4
pp2Z ;12 pp2Z ;13 sub4 ;13 d a4 d 13 z
doubleP ;11 doubleP ;12 doubleP ;11 d d s4 11 d
addA ; 9 add4 ;10 doubleP ; 9 a4 a4 d 9 a0
doubleP ; 7 doubleP ; 8 sub4 ; 7 d d d 7 d
addZ ; 5 doubleP ; 6 subA ; 5 a4 d s0 5 az
owing ; 3 subZ ; 4 owing ; 3 s0 s0 owi owi
lob8:
doubleP ;13
sub4 ;11
doubleP ; 9
doubleP ; 7
sub4 ; 5
owing ; 3
lob9:
sub4 ;14
pp2Z ;12
addA ;11
doubleP ; 9
doubleP ; 7
addZ ; 5
owing
loba: ; .-...--.
negP ;14
sub4 ;11
doubleP ; 9
subA ; 7
doubleP ; 5
owing ; 3
lobb:
negP ;14
sub4 ;11
doubleP ; 9
doubleP ; 7
subA ; 5
owing ; 3
lobc:
negP ;12
sub4 ; 9
doubleP ; 7
doubleP ; 5
owing ; 3
lobd:; .-...-.+
negP ;14 set4 ;15
sub4 ;11 doubleP ;14
doubleP ; 9 subA ;12
doubleP ; 7 doubleP ;10
addA ; 5 add4 ; 8
owing ; 3 doubleP ; 6
; subA ; 4
; done ; 2
lobe: ; Honni soit qui mal y pense !
negP ;12
sub4 ; 9
sub4 ; 7
doubleP ; 5
owing ; 3
lobf: ;
subA ;15 clrP sub4 d d suba owing 13
loc0: ; DONT't call me THAT!
subA ;13
loc1:
sub4 ;11
sub4 ; 9
sub4 ; 7
sub4 ; 5
owing
...
loca:
add4 ;14
doubleP ;12
add4 ;10
doubleP ; 8
addA ; 6
doubleP ; 4
done ; 2
locb:; .-..+.++ ..--.-.- ++..++.-
; WC?!? 11001011
doubleP ;15 n 16 a4 15 s 17 n 15
sub4 ;13 d 13 z 13 d 16 s4 12
doubleP ;11 s4 11 d 12 a4 14 z 10
addA ; 9 d 9 aZ 10 s0 12 s0 9
doubleP ; 7 s4 7 d 8 d 10 d 7
addA ; 5 s0 5 d 6 d 8 az 5
owing ; 3 owi 3 s0 4 a4 6
; s0 4
locc: ; what comments?
add4 ;13 n 14
pp2Z ;11 s4 11
doubleP ;10 d 9
addZ ; 8 d 7
doubleP ; 6 a4 5
doubleP ; 4 owi 3
done ; 2