Skip to content

Commit

Permalink
Optimized transform.scale2x() (pygame-community#2859)
Browse files Browse the repository at this point in the history
* optimized all Bpp cases of scale2x

* finish 3Bpp case

* removed restrict keyword, added Bpp var

* format
  • Loading branch information
itzpr3d4t0r authored Jun 30, 2024
1 parent bef0d2e commit 778d471
Showing 1 changed file with 143 additions and 109 deletions.
252 changes: 143 additions & 109 deletions src_c/scale2x.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,19 @@
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
#define MIN(a, b) (((a) < (b)) ? (a) : (b))

#define READINT24(x) ((x)[0] << 16 | (x)[1] << 8 | (x)[2])
#define WRITEINT24(x, i) \
{ \
(x)[0] = i >> 16; \
(x)[1] = (i >> 8) & 0xff; \
x[2] = i & 0xff; \
}
static inline int
read_int24(const Uint8 *x)
{
return (x[0] << 16 | x[1] << 8 | x[2]);
}

static inline void
store_int24(Uint8 *x, int i)
{
x[0] = i >> 16;
x[1] = (i >> 8) & 0xff;
x[2] = i & 0xff;
}

/*
this requires a destination surface already setup to be twice as
Expand All @@ -62,137 +68,165 @@ scale2x(SDL_Surface *src, SDL_Surface *dst)
const int height = src->h;

#if SDL_VERSION_ATLEAST(3, 0, 0)
switch (src->format->bytes_per_pixel) {
const Uint8 Bpp = src->format->bytes_per_pixel;
#else
switch (src->format->BytesPerPixel) {
const Uint8 Bpp = src->format->BytesPerPixel;
#endif

switch (Bpp) {
case 1: {
Uint8 E0, E1, E2, E3, B, D, E, F, H;
for (looph = 0; looph < height; ++looph) {
Uint8 *src_row = srcpix + looph * srcpitch;
Uint8 *dst_row0 = dstpix + looph * 2 * dstpitch;
Uint8 *dst_row1 = dstpix + (looph * 2 + 1) * dstpitch;

Uint8 *src_row_prev = srcpix + MAX(0, looph - 1) * srcpitch;
Uint8 *src_row_next =
srcpix + MIN(height - 1, looph + 1) * srcpitch;

for (loopw = 0; loopw < width; ++loopw) {
B = *(Uint8 *)(srcpix + (MAX(0, looph - 1) * srcpitch) +
(1 * loopw));
D = *(Uint8 *)(srcpix + (looph * srcpitch) +
(1 * MAX(0, loopw - 1)));
E = *(Uint8 *)(srcpix + (looph * srcpitch) + (1 * loopw));
F = *(Uint8 *)(srcpix + (looph * srcpitch) +
(1 * MIN(width - 1, loopw + 1)));
H = *(Uint8 *)(srcpix +
(MIN(height - 1, looph + 1) * srcpitch) +
(1 * loopw));

E0 = D == B && B != F && D != H ? D : E;
E1 = B == F && B != D && F != H ? F : E;
E2 = D == H && D != B && H != F ? D : E;
E3 = H == F && D != H && B != F ? F : E;

*(Uint8 *)(dstpix + looph * 2 * dstpitch + loopw * 2 * 1) =
E0;
*(Uint8 *)(dstpix + looph * 2 * dstpitch +
(loopw * 2 + 1) * 1) = E1;
*(Uint8 *)(dstpix + (looph * 2 + 1) * dstpitch +
loopw * 2 * 1) = E2;
*(Uint8 *)(dstpix + (looph * 2 + 1) * dstpitch +
(loopw * 2 + 1) * 1) = E3;
B = *(Uint8 *)(src_row_prev + loopw);
D = *(Uint8 *)(src_row + MAX(0, loopw - 1));
E = *(Uint8 *)(src_row + loopw);
F = *(Uint8 *)(src_row + MIN(width - 1, loopw + 1));
H = *(Uint8 *)(src_row_next + loopw);

if (B != H && D != F) {
E0 = (D == B) ? D : E;
E1 = (B == F) ? F : E;
E2 = (D == H) ? D : E;
E3 = (H == F) ? F : E;
}
else {
E0 = E;
E1 = E;
E2 = E;
E3 = E;
}

*(Uint8 *)(dst_row0 + loopw * 2) = E0;
*(Uint8 *)(dst_row0 + loopw * 2 + 1) = E1;
*(Uint8 *)(dst_row1 + loopw * 2) = E2;
*(Uint8 *)(dst_row1 + loopw * 2 + 1) = E3;
}
}
break;
}
case 2: {
Uint16 E0, E1, E2, E3, B, D, E, F, H;
for (looph = 0; looph < height; ++looph) {
Uint8 *src_row = srcpix + looph * srcpitch;
Uint8 *dst_row0 = dstpix + looph * 2 * dstpitch;
Uint8 *dst_row1 = dstpix + (looph * 2 + 1) * dstpitch;

Uint8 *src_row_prev = srcpix + MAX(0, looph - 1) * srcpitch;
Uint8 *src_row_next =
srcpix + MIN(height - 1, looph + 1) * srcpitch;

for (loopw = 0; loopw < width; ++loopw) {
B = *(Uint16 *)(srcpix + (MAX(0, looph - 1) * srcpitch) +
(2 * loopw));
D = *(Uint16 *)(srcpix + (looph * srcpitch) +
(2 * MAX(0, loopw - 1)));
E = *(Uint16 *)(srcpix + (looph * srcpitch) + (2 * loopw));
F = *(Uint16 *)(srcpix + (looph * srcpitch) +
(2 * MIN(width - 1, loopw + 1)));
H = *(Uint16 *)(srcpix +
(MIN(height - 1, looph + 1) * srcpitch) +
(2 * loopw));

E0 = D == B && B != F && D != H ? D : E;
E1 = B == F && B != D && F != H ? F : E;
E2 = D == H && D != B && H != F ? D : E;
E3 = H == F && D != H && B != F ? F : E;

*(Uint16 *)(dstpix + looph * 2 * dstpitch +
loopw * 2 * 2) = E0;
*(Uint16 *)(dstpix + looph * 2 * dstpitch +
(loopw * 2 + 1) * 2) = E1;
*(Uint16 *)(dstpix + (looph * 2 + 1) * dstpitch +
loopw * 2 * 2) = E2;
*(Uint16 *)(dstpix + (looph * 2 + 1) * dstpitch +
(loopw * 2 + 1) * 2) = E3;
B = *(Uint16 *)(src_row_prev + 2 * loopw);
D = *(Uint16 *)(src_row + 2 * MAX(0, loopw - 1));
E = *(Uint16 *)(src_row + 2 * loopw);
F = *(Uint16 *)(src_row + 2 * MIN(width - 1, loopw + 1));
H = *(Uint16 *)(src_row_next + 2 * loopw);

if (B != H && D != F) {
E0 = (D == B) ? D : E;
E1 = (B == F) ? F : E;
E2 = (D == H) ? D : E;
E3 = (H == F) ? F : E;
}
else {
E0 = E;
E1 = E;
E2 = E;
E3 = E;
}

*(Uint16 *)(dst_row0 + loopw * 2 * 2) = E0;
*(Uint16 *)(dst_row0 + (loopw * 2 + 1) * 2) = E1;
*(Uint16 *)(dst_row1 + loopw * 2 * 2) = E2;
*(Uint16 *)(dst_row1 + (loopw * 2 + 1) * 2) = E3;
}
}
break;
}
case 3: {
int E0, E1, E2, E3, B, D, E, F, H;
for (looph = 0; looph < height; ++looph) {
Uint8 *src_row = srcpix + looph * srcpitch;
Uint8 *dst_row0 = dstpix + looph * 2 * dstpitch;
Uint8 *dst_row1 = dstpix + (looph * 2 + 1) * dstpitch;

Uint8 *src_row_prev = srcpix + MAX(0, looph - 1) * srcpitch;
Uint8 *src_row_next =
srcpix + MIN(height - 1, looph + 1) * srcpitch;

for (loopw = 0; loopw < width; ++loopw) {
B = READINT24(srcpix + (MAX(0, looph - 1) * srcpitch) +
(3 * loopw));
D = READINT24(srcpix + (looph * srcpitch) +
(3 * MAX(0, loopw - 1)));
E = READINT24(srcpix + (looph * srcpitch) + (3 * loopw));
F = READINT24(srcpix + (looph * srcpitch) +
(3 * MIN(width - 1, loopw + 1)));
H = READINT24(srcpix +
(MIN(height - 1, looph + 1) * srcpitch) +
(3 * loopw));

E0 = D == B && B != F && D != H ? D : E;
E1 = B == F && B != D && F != H ? F : E;
E2 = D == H && D != B && H != F ? D : E;
E3 = H == F && D != H && B != F ? F : E;

WRITEINT24((dstpix + looph * 2 * dstpitch + loopw * 2 * 3),
E0);
WRITEINT24(
(dstpix + looph * 2 * dstpitch + (loopw * 2 + 1) * 3),
E1);
WRITEINT24(
(dstpix + (looph * 2 + 1) * dstpitch + loopw * 2 * 3),
E2);
WRITEINT24((dstpix + (looph * 2 + 1) * dstpitch +
(loopw * 2 + 1) * 3),
E3);
B = read_int24(src_row_prev + (3 * loopw));
D = read_int24(src_row + (3 * MAX(0, loopw - 1)));
E = read_int24(src_row + (3 * loopw));
F = read_int24(src_row + (3 * MIN(width - 1, loopw + 1)));
H = read_int24(src_row_next + (3 * loopw));

if (B != H && D != F) {
E0 = (D == B) ? D : E;
E1 = (B == F) ? F : E;
E2 = (D == H) ? D : E;
E3 = (H == F) ? F : E;
}
else {
E0 = E;
E1 = E;
E2 = E;
E3 = E;
}

store_int24(dst_row0 + loopw * 2 * 3, E0);
store_int24(dst_row0 + (loopw * 2 + 1) * 3, E1);
store_int24(dst_row1 + loopw * 2 * 3, E2);
store_int24(dst_row1 + (loopw * 2 + 1) * 3, E3);
}
}
break;
}
default: { /*case 4:*/
default: {
Uint32 E0, E1, E2, E3, B, D, E, F, H;

for (looph = 0; looph < height; ++looph) {
Uint8 *src_row = srcpix + looph * srcpitch;
Uint8 *dst_row0 = dstpix + looph * 2 * dstpitch;
Uint8 *dst_row1 = dstpix + (looph * 2 + 1) * dstpitch;

Uint8 *src_row_prev = srcpix + MAX(0, looph - 1) * srcpitch;
Uint8 *src_row_next =
srcpix + MIN(height - 1, looph + 1) * srcpitch;

for (loopw = 0; loopw < width; ++loopw) {
B = *(Uint32 *)(srcpix + (MAX(0, looph - 1) * srcpitch) +
(4 * loopw));
D = *(Uint32 *)(srcpix + (looph * srcpitch) +
(4 * MAX(0, loopw - 1)));
E = *(Uint32 *)(srcpix + (looph * srcpitch) + (4 * loopw));
F = *(Uint32 *)(srcpix + (looph * srcpitch) +
(4 * MIN(width - 1, loopw + 1)));
H = *(Uint32 *)(srcpix +
(MIN(height - 1, looph + 1) * srcpitch) +
(4 * loopw));

E0 = D == B && B != F && D != H ? D : E;
E1 = B == F && B != D && F != H ? F : E;
E2 = D == H && D != B && H != F ? D : E;
E3 = H == F && D != H && B != F ? F : E;

*(Uint32 *)(dstpix + looph * 2 * dstpitch +
loopw * 2 * 4) = E0;
*(Uint32 *)(dstpix + looph * 2 * dstpitch +
(loopw * 2 + 1) * 4) = E1;
*(Uint32 *)(dstpix + (looph * 2 + 1) * dstpitch +
loopw * 2 * 4) = E2;
*(Uint32 *)(dstpix + (looph * 2 + 1) * dstpitch +
(loopw * 2 + 1) * 4) = E3;
B = *(Uint32 *)(src_row_prev + 4 * loopw);
D = *(Uint32 *)(src_row + 4 * MAX(0, loopw - 1));
E = *(Uint32 *)(src_row + 4 * loopw);
F = *(Uint32 *)(src_row + 4 * MIN(width - 1, loopw + 1));
H = *(Uint32 *)(src_row_next + 4 * loopw);

if (B != H && D != F) {
E0 = (D == B) ? D : E;
E1 = (B == F) ? F : E;
E2 = (D == H) ? D : E;
E3 = (H == F) ? F : E;
}
else {
E0 = E;
E1 = E;
E2 = E;
E3 = E;
}

*(Uint32 *)(dst_row0 + loopw * 2 * 4) = E0;
*(Uint32 *)(dst_row0 + (loopw * 2 + 1) * 4) = E1;
*(Uint32 *)(dst_row1 + loopw * 2 * 4) = E2;
*(Uint32 *)(dst_row1 + (loopw * 2 + 1) * 4) = E3;
}
}
break;
Expand Down

0 comments on commit 778d471

Please sign in to comment.