Skip to content

Commit 778d471

Browse files
authored
Optimized transform.scale2x() (#2859)
* optimized all Bpp cases of scale2x * finish 3Bpp case * removed restrict keyword, added Bpp var * format
1 parent bef0d2e commit 778d471

File tree

1 file changed

+143
-109
lines changed

1 file changed

+143
-109
lines changed

src_c/scale2x.c

Lines changed: 143 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,19 @@
3434
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
3535
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
3636

37-
#define READINT24(x) ((x)[0] << 16 | (x)[1] << 8 | (x)[2])
38-
#define WRITEINT24(x, i) \
39-
{ \
40-
(x)[0] = i >> 16; \
41-
(x)[1] = (i >> 8) & 0xff; \
42-
x[2] = i & 0xff; \
43-
}
37+
static inline int
38+
read_int24(const Uint8 *x)
39+
{
40+
return (x[0] << 16 | x[1] << 8 | x[2]);
41+
}
42+
43+
static inline void
44+
store_int24(Uint8 *x, int i)
45+
{
46+
x[0] = i >> 16;
47+
x[1] = (i >> 8) & 0xff;
48+
x[2] = i & 0xff;
49+
}
4450

4551
/*
4652
this requires a destination surface already setup to be twice as
@@ -62,137 +68,165 @@ scale2x(SDL_Surface *src, SDL_Surface *dst)
6268
const int height = src->h;
6369

6470
#if SDL_VERSION_ATLEAST(3, 0, 0)
65-
switch (src->format->bytes_per_pixel) {
71+
const Uint8 Bpp = src->format->bytes_per_pixel;
6672
#else
67-
switch (src->format->BytesPerPixel) {
73+
const Uint8 Bpp = src->format->BytesPerPixel;
6874
#endif
75+
76+
switch (Bpp) {
6977
case 1: {
7078
Uint8 E0, E1, E2, E3, B, D, E, F, H;
7179
for (looph = 0; looph < height; ++looph) {
80+
Uint8 *src_row = srcpix + looph * srcpitch;
81+
Uint8 *dst_row0 = dstpix + looph * 2 * dstpitch;
82+
Uint8 *dst_row1 = dstpix + (looph * 2 + 1) * dstpitch;
83+
84+
Uint8 *src_row_prev = srcpix + MAX(0, looph - 1) * srcpitch;
85+
Uint8 *src_row_next =
86+
srcpix + MIN(height - 1, looph + 1) * srcpitch;
87+
7288
for (loopw = 0; loopw < width; ++loopw) {
73-
B = *(Uint8 *)(srcpix + (MAX(0, looph - 1) * srcpitch) +
74-
(1 * loopw));
75-
D = *(Uint8 *)(srcpix + (looph * srcpitch) +
76-
(1 * MAX(0, loopw - 1)));
77-
E = *(Uint8 *)(srcpix + (looph * srcpitch) + (1 * loopw));
78-
F = *(Uint8 *)(srcpix + (looph * srcpitch) +
79-
(1 * MIN(width - 1, loopw + 1)));
80-
H = *(Uint8 *)(srcpix +
81-
(MIN(height - 1, looph + 1) * srcpitch) +
82-
(1 * loopw));
83-
84-
E0 = D == B && B != F && D != H ? D : E;
85-
E1 = B == F && B != D && F != H ? F : E;
86-
E2 = D == H && D != B && H != F ? D : E;
87-
E3 = H == F && D != H && B != F ? F : E;
88-
89-
*(Uint8 *)(dstpix + looph * 2 * dstpitch + loopw * 2 * 1) =
90-
E0;
91-
*(Uint8 *)(dstpix + looph * 2 * dstpitch +
92-
(loopw * 2 + 1) * 1) = E1;
93-
*(Uint8 *)(dstpix + (looph * 2 + 1) * dstpitch +
94-
loopw * 2 * 1) = E2;
95-
*(Uint8 *)(dstpix + (looph * 2 + 1) * dstpitch +
96-
(loopw * 2 + 1) * 1) = E3;
89+
B = *(Uint8 *)(src_row_prev + loopw);
90+
D = *(Uint8 *)(src_row + MAX(0, loopw - 1));
91+
E = *(Uint8 *)(src_row + loopw);
92+
F = *(Uint8 *)(src_row + MIN(width - 1, loopw + 1));
93+
H = *(Uint8 *)(src_row_next + loopw);
94+
95+
if (B != H && D != F) {
96+
E0 = (D == B) ? D : E;
97+
E1 = (B == F) ? F : E;
98+
E2 = (D == H) ? D : E;
99+
E3 = (H == F) ? F : E;
100+
}
101+
else {
102+
E0 = E;
103+
E1 = E;
104+
E2 = E;
105+
E3 = E;
106+
}
107+
108+
*(Uint8 *)(dst_row0 + loopw * 2) = E0;
109+
*(Uint8 *)(dst_row0 + loopw * 2 + 1) = E1;
110+
*(Uint8 *)(dst_row1 + loopw * 2) = E2;
111+
*(Uint8 *)(dst_row1 + loopw * 2 + 1) = E3;
97112
}
98113
}
99114
break;
100115
}
101116
case 2: {
102117
Uint16 E0, E1, E2, E3, B, D, E, F, H;
103118
for (looph = 0; looph < height; ++looph) {
119+
Uint8 *src_row = srcpix + looph * srcpitch;
120+
Uint8 *dst_row0 = dstpix + looph * 2 * dstpitch;
121+
Uint8 *dst_row1 = dstpix + (looph * 2 + 1) * dstpitch;
122+
123+
Uint8 *src_row_prev = srcpix + MAX(0, looph - 1) * srcpitch;
124+
Uint8 *src_row_next =
125+
srcpix + MIN(height - 1, looph + 1) * srcpitch;
126+
104127
for (loopw = 0; loopw < width; ++loopw) {
105-
B = *(Uint16 *)(srcpix + (MAX(0, looph - 1) * srcpitch) +
106-
(2 * loopw));
107-
D = *(Uint16 *)(srcpix + (looph * srcpitch) +
108-
(2 * MAX(0, loopw - 1)));
109-
E = *(Uint16 *)(srcpix + (looph * srcpitch) + (2 * loopw));
110-
F = *(Uint16 *)(srcpix + (looph * srcpitch) +
111-
(2 * MIN(width - 1, loopw + 1)));
112-
H = *(Uint16 *)(srcpix +
113-
(MIN(height - 1, looph + 1) * srcpitch) +
114-
(2 * loopw));
115-
116-
E0 = D == B && B != F && D != H ? D : E;
117-
E1 = B == F && B != D && F != H ? F : E;
118-
E2 = D == H && D != B && H != F ? D : E;
119-
E3 = H == F && D != H && B != F ? F : E;
120-
121-
*(Uint16 *)(dstpix + looph * 2 * dstpitch +
122-
loopw * 2 * 2) = E0;
123-
*(Uint16 *)(dstpix + looph * 2 * dstpitch +
124-
(loopw * 2 + 1) * 2) = E1;
125-
*(Uint16 *)(dstpix + (looph * 2 + 1) * dstpitch +
126-
loopw * 2 * 2) = E2;
127-
*(Uint16 *)(dstpix + (looph * 2 + 1) * dstpitch +
128-
(loopw * 2 + 1) * 2) = E3;
128+
B = *(Uint16 *)(src_row_prev + 2 * loopw);
129+
D = *(Uint16 *)(src_row + 2 * MAX(0, loopw - 1));
130+
E = *(Uint16 *)(src_row + 2 * loopw);
131+
F = *(Uint16 *)(src_row + 2 * MIN(width - 1, loopw + 1));
132+
H = *(Uint16 *)(src_row_next + 2 * loopw);
133+
134+
if (B != H && D != F) {
135+
E0 = (D == B) ? D : E;
136+
E1 = (B == F) ? F : E;
137+
E2 = (D == H) ? D : E;
138+
E3 = (H == F) ? F : E;
139+
}
140+
else {
141+
E0 = E;
142+
E1 = E;
143+
E2 = E;
144+
E3 = E;
145+
}
146+
147+
*(Uint16 *)(dst_row0 + loopw * 2 * 2) = E0;
148+
*(Uint16 *)(dst_row0 + (loopw * 2 + 1) * 2) = E1;
149+
*(Uint16 *)(dst_row1 + loopw * 2 * 2) = E2;
150+
*(Uint16 *)(dst_row1 + (loopw * 2 + 1) * 2) = E3;
129151
}
130152
}
131153
break;
132154
}
133155
case 3: {
134156
int E0, E1, E2, E3, B, D, E, F, H;
135157
for (looph = 0; looph < height; ++looph) {
158+
Uint8 *src_row = srcpix + looph * srcpitch;
159+
Uint8 *dst_row0 = dstpix + looph * 2 * dstpitch;
160+
Uint8 *dst_row1 = dstpix + (looph * 2 + 1) * dstpitch;
161+
162+
Uint8 *src_row_prev = srcpix + MAX(0, looph - 1) * srcpitch;
163+
Uint8 *src_row_next =
164+
srcpix + MIN(height - 1, looph + 1) * srcpitch;
165+
136166
for (loopw = 0; loopw < width; ++loopw) {
137-
B = READINT24(srcpix + (MAX(0, looph - 1) * srcpitch) +
138-
(3 * loopw));
139-
D = READINT24(srcpix + (looph * srcpitch) +
140-
(3 * MAX(0, loopw - 1)));
141-
E = READINT24(srcpix + (looph * srcpitch) + (3 * loopw));
142-
F = READINT24(srcpix + (looph * srcpitch) +
143-
(3 * MIN(width - 1, loopw + 1)));
144-
H = READINT24(srcpix +
145-
(MIN(height - 1, looph + 1) * srcpitch) +
146-
(3 * loopw));
147-
148-
E0 = D == B && B != F && D != H ? D : E;
149-
E1 = B == F && B != D && F != H ? F : E;
150-
E2 = D == H && D != B && H != F ? D : E;
151-
E3 = H == F && D != H && B != F ? F : E;
152-
153-
WRITEINT24((dstpix + looph * 2 * dstpitch + loopw * 2 * 3),
154-
E0);
155-
WRITEINT24(
156-
(dstpix + looph * 2 * dstpitch + (loopw * 2 + 1) * 3),
157-
E1);
158-
WRITEINT24(
159-
(dstpix + (looph * 2 + 1) * dstpitch + loopw * 2 * 3),
160-
E2);
161-
WRITEINT24((dstpix + (looph * 2 + 1) * dstpitch +
162-
(loopw * 2 + 1) * 3),
163-
E3);
167+
B = read_int24(src_row_prev + (3 * loopw));
168+
D = read_int24(src_row + (3 * MAX(0, loopw - 1)));
169+
E = read_int24(src_row + (3 * loopw));
170+
F = read_int24(src_row + (3 * MIN(width - 1, loopw + 1)));
171+
H = read_int24(src_row_next + (3 * loopw));
172+
173+
if (B != H && D != F) {
174+
E0 = (D == B) ? D : E;
175+
E1 = (B == F) ? F : E;
176+
E2 = (D == H) ? D : E;
177+
E3 = (H == F) ? F : E;
178+
}
179+
else {
180+
E0 = E;
181+
E1 = E;
182+
E2 = E;
183+
E3 = E;
184+
}
185+
186+
store_int24(dst_row0 + loopw * 2 * 3, E0);
187+
store_int24(dst_row0 + (loopw * 2 + 1) * 3, E1);
188+
store_int24(dst_row1 + loopw * 2 * 3, E2);
189+
store_int24(dst_row1 + (loopw * 2 + 1) * 3, E3);
164190
}
165191
}
166192
break;
167193
}
168-
default: { /*case 4:*/
194+
default: {
169195
Uint32 E0, E1, E2, E3, B, D, E, F, H;
196+
170197
for (looph = 0; looph < height; ++looph) {
198+
Uint8 *src_row = srcpix + looph * srcpitch;
199+
Uint8 *dst_row0 = dstpix + looph * 2 * dstpitch;
200+
Uint8 *dst_row1 = dstpix + (looph * 2 + 1) * dstpitch;
201+
202+
Uint8 *src_row_prev = srcpix + MAX(0, looph - 1) * srcpitch;
203+
Uint8 *src_row_next =
204+
srcpix + MIN(height - 1, looph + 1) * srcpitch;
205+
171206
for (loopw = 0; loopw < width; ++loopw) {
172-
B = *(Uint32 *)(srcpix + (MAX(0, looph - 1) * srcpitch) +
173-
(4 * loopw));
174-
D = *(Uint32 *)(srcpix + (looph * srcpitch) +
175-
(4 * MAX(0, loopw - 1)));
176-
E = *(Uint32 *)(srcpix + (looph * srcpitch) + (4 * loopw));
177-
F = *(Uint32 *)(srcpix + (looph * srcpitch) +
178-
(4 * MIN(width - 1, loopw + 1)));
179-
H = *(Uint32 *)(srcpix +
180-
(MIN(height - 1, looph + 1) * srcpitch) +
181-
(4 * loopw));
182-
183-
E0 = D == B && B != F && D != H ? D : E;
184-
E1 = B == F && B != D && F != H ? F : E;
185-
E2 = D == H && D != B && H != F ? D : E;
186-
E3 = H == F && D != H && B != F ? F : E;
187-
188-
*(Uint32 *)(dstpix + looph * 2 * dstpitch +
189-
loopw * 2 * 4) = E0;
190-
*(Uint32 *)(dstpix + looph * 2 * dstpitch +
191-
(loopw * 2 + 1) * 4) = E1;
192-
*(Uint32 *)(dstpix + (looph * 2 + 1) * dstpitch +
193-
loopw * 2 * 4) = E2;
194-
*(Uint32 *)(dstpix + (looph * 2 + 1) * dstpitch +
195-
(loopw * 2 + 1) * 4) = E3;
207+
B = *(Uint32 *)(src_row_prev + 4 * loopw);
208+
D = *(Uint32 *)(src_row + 4 * MAX(0, loopw - 1));
209+
E = *(Uint32 *)(src_row + 4 * loopw);
210+
F = *(Uint32 *)(src_row + 4 * MIN(width - 1, loopw + 1));
211+
H = *(Uint32 *)(src_row_next + 4 * loopw);
212+
213+
if (B != H && D != F) {
214+
E0 = (D == B) ? D : E;
215+
E1 = (B == F) ? F : E;
216+
E2 = (D == H) ? D : E;
217+
E3 = (H == F) ? F : E;
218+
}
219+
else {
220+
E0 = E;
221+
E1 = E;
222+
E2 = E;
223+
E3 = E;
224+
}
225+
226+
*(Uint32 *)(dst_row0 + loopw * 2 * 4) = E0;
227+
*(Uint32 *)(dst_row0 + (loopw * 2 + 1) * 4) = E1;
228+
*(Uint32 *)(dst_row1 + loopw * 2 * 4) = E2;
229+
*(Uint32 *)(dst_row1 + (loopw * 2 + 1) * 4) = E3;
196230
}
197231
}
198232
break;

0 commit comments

Comments
 (0)