Skip to content

Commit c1be4ac

Browse files
calc84maniacmateoconlechuga
authored andcommitted
Improve average speed of count-leading-zero libcalls
1 parent 2bbfa2d commit c1be4ac

File tree

8 files changed

+109
-131
lines changed

8 files changed

+109
-131
lines changed

src/crt/bctlz.src

-18
This file was deleted.

src/crt/ctlz.src

+101
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
assume adl=1
2+
3+
section .text
4+
public __bctlz
5+
__bctlz:
6+
cp a, 010h
7+
jr c, .low4
8+
cp a, 040h
9+
jr c, .low6
10+
add a, a
11+
sbc a, a
12+
inc a
13+
ret
14+
.low6:
15+
add a, -020h
16+
sbc a, a
17+
add a, 3
18+
ret
19+
.low4:
20+
cp a, 4
21+
jr c, .low2
22+
add a, -8
23+
sbc a, a
24+
add a, 5
25+
ret
26+
.low2:
27+
add a, -3
28+
cpl
29+
adc a, 6
30+
ret
31+
32+
section .text
33+
public __sctlz
34+
__sctlz:
35+
ld a, h
36+
or a, a
37+
jr nz, __bctlz
38+
ld a, l
39+
require __sctlz.hijack
40+
41+
section .text
42+
private __sctlz.hijack
43+
__sctlz.hijack:
44+
call __bctlz
45+
add a, 8
46+
ret
47+
48+
section .text
49+
public __ictlz
50+
__ictlz:
51+
dec sp
52+
push hl
53+
inc sp
54+
pop af
55+
or a, a
56+
jr nz, __bctlz
57+
or a, h
58+
jr nz, __sctlz.hijack
59+
ld a, l
60+
call __bctlz
61+
add a, 16
62+
ret
63+
64+
section .text
65+
public __lctlz
66+
__lctlz:
67+
ld a, e
68+
or a, a
69+
jr nz, __bctlz
70+
call __ictlz
71+
add a, 8
72+
ret
73+
74+
section .text
75+
public __llctlz
76+
__llctlz:
77+
ld a, b
78+
or a, a
79+
jr nz, __bctlz
80+
or a, c
81+
jr nz, __sctlz.hijack
82+
call __i48ctlz
83+
add a, 16
84+
ret
85+
86+
section .text
87+
public __i48ctlz
88+
__i48ctlz:
89+
ex de, hl
90+
add hl, de
91+
or a, a
92+
sbc hl, de
93+
jr z, .low
94+
call __ictlz
95+
ex de, hl
96+
ret
97+
.low:
98+
ex de, hl
99+
call __ictlz
100+
add a, 24
101+
ret

src/crt/i48ctlz.src

-31
This file was deleted.

src/crt/ictlz.src

-17
This file was deleted.

src/crt/lctlz.src

-20
This file was deleted.

src/crt/llctlz.src

-26
This file was deleted.

src/crt/sctlz.src

-17
This file was deleted.

src/libc/include/ez80_builtin.h

+8-2
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,19 @@ extern "C" {
1111
#if __has_builtin(__builtin_clzc)
1212
#define __ez80_clzc __builtin_clzc
1313
#else
14-
unsigned char __ez80_clzc(unsigned char) __NOEXCEPT_CONST;
14+
static inline __attribute__((__always_inline__))
15+
int __ez80_clzc(unsigned char __x) __NOEXCEPT_CONST {
16+
return (unsigned char)(__builtin_clzs(__x) - 8);
17+
}
1518
#endif
1619

1720
#if __has_builtin(__builtin_clzi48)
1821
#define __ez80_clzi48 __builtin_clzi48
1922
#else
20-
unsigned char __ez80_clzi48(unsigned __int48) __NOEXCEPT_CONST;
23+
static inline __attribute__((__always_inline__))
24+
int __ez80_clzi48(unsigned __int48 __x) __NOEXCEPT_CONST {
25+
return (unsigned char)(__builtin_clzll(__x) - 16);
26+
}
2127
#endif
2228

2329
#if __has_builtin(__builtin_ctzc)

0 commit comments

Comments
 (0)