From 778d4712345827c6d9515998858ce9875bb0edcb Mon Sep 17 00:00:00 2001 From: Alberto <103119829+itzpr3d4t0r@users.noreply.github.com> Date: Sun, 30 Jun 2024 12:13:39 +0200 Subject: [PATCH] Optimized `transform.scale2x()` (#2859) * optimized all Bpp cases of scale2x * finish 3Bpp case * removed restrict keyword, added Bpp var * format --- src_c/scale2x.c | 252 +++++++++++++++++++++++++++--------------------- 1 file changed, 143 insertions(+), 109 deletions(-) diff --git a/src_c/scale2x.c b/src_c/scale2x.c index a60de524f4..61a9beffa5 100644 --- a/src_c/scale2x.c +++ b/src_c/scale2x.c @@ -34,13 +34,19 @@ #define MAX(a, b) (((a) > (b)) ? (a) : (b)) #define MIN(a, b) (((a) < (b)) ? (a) : (b)) -#define READINT24(x) ((x)[0] << 16 | (x)[1] << 8 | (x)[2]) -#define WRITEINT24(x, i) \ - { \ - (x)[0] = i >> 16; \ - (x)[1] = (i >> 8) & 0xff; \ - x[2] = i & 0xff; \ - } +static inline int +read_int24(const Uint8 *x) +{ + return (x[0] << 16 | x[1] << 8 | x[2]); +} + +static inline void +store_int24(Uint8 *x, int i) +{ + x[0] = i >> 16; + x[1] = (i >> 8) & 0xff; + x[2] = i & 0xff; +} /* this requires a destination surface already setup to be twice as @@ -62,38 +68,47 @@ scale2x(SDL_Surface *src, SDL_Surface *dst) const int height = src->h; #if SDL_VERSION_ATLEAST(3, 0, 0) - switch (src->format->bytes_per_pixel) { + const Uint8 Bpp = src->format->bytes_per_pixel; #else - switch (src->format->BytesPerPixel) { + const Uint8 Bpp = src->format->BytesPerPixel; #endif + + switch (Bpp) { case 1: { Uint8 E0, E1, E2, E3, B, D, E, F, H; for (looph = 0; looph < height; ++looph) { + Uint8 *src_row = srcpix + looph * srcpitch; + Uint8 *dst_row0 = dstpix + looph * 2 * dstpitch; + Uint8 *dst_row1 = dstpix + (looph * 2 + 1) * dstpitch; + + Uint8 *src_row_prev = srcpix + MAX(0, looph - 1) * srcpitch; + Uint8 *src_row_next = + srcpix + MIN(height - 1, looph + 1) * srcpitch; + for (loopw = 0; loopw < width; ++loopw) { - B = *(Uint8 *)(srcpix + (MAX(0, looph - 1) * srcpitch) + - (1 * loopw)); - D = *(Uint8 *)(srcpix + (looph * srcpitch) + - (1 * MAX(0, loopw - 1))); - E = *(Uint8 *)(srcpix + (looph * srcpitch) + (1 * loopw)); - F = *(Uint8 *)(srcpix + (looph * srcpitch) + - (1 * MIN(width - 1, loopw + 1))); - H = *(Uint8 *)(srcpix + - (MIN(height - 1, looph + 1) * srcpitch) + - (1 * loopw)); - - E0 = D == B && B != F && D != H ? D : E; - E1 = B == F && B != D && F != H ? F : E; - E2 = D == H && D != B && H != F ? D : E; - E3 = H == F && D != H && B != F ? F : E; - - *(Uint8 *)(dstpix + looph * 2 * dstpitch + loopw * 2 * 1) = - E0; - *(Uint8 *)(dstpix + looph * 2 * dstpitch + - (loopw * 2 + 1) * 1) = E1; - *(Uint8 *)(dstpix + (looph * 2 + 1) * dstpitch + - loopw * 2 * 1) = E2; - *(Uint8 *)(dstpix + (looph * 2 + 1) * dstpitch + - (loopw * 2 + 1) * 1) = E3; + B = *(Uint8 *)(src_row_prev + loopw); + D = *(Uint8 *)(src_row + MAX(0, loopw - 1)); + E = *(Uint8 *)(src_row + loopw); + F = *(Uint8 *)(src_row + MIN(width - 1, loopw + 1)); + H = *(Uint8 *)(src_row_next + loopw); + + if (B != H && D != F) { + E0 = (D == B) ? D : E; + E1 = (B == F) ? F : E; + E2 = (D == H) ? D : E; + E3 = (H == F) ? F : E; + } + else { + E0 = E; + E1 = E; + E2 = E; + E3 = E; + } + + *(Uint8 *)(dst_row0 + loopw * 2) = E0; + *(Uint8 *)(dst_row0 + loopw * 2 + 1) = E1; + *(Uint8 *)(dst_row1 + loopw * 2) = E2; + *(Uint8 *)(dst_row1 + loopw * 2 + 1) = E3; } } break; @@ -101,31 +116,38 @@ scale2x(SDL_Surface *src, SDL_Surface *dst) case 2: { Uint16 E0, E1, E2, E3, B, D, E, F, H; for (looph = 0; looph < height; ++looph) { + Uint8 *src_row = srcpix + looph * srcpitch; + Uint8 *dst_row0 = dstpix + looph * 2 * dstpitch; + Uint8 *dst_row1 = dstpix + (looph * 2 + 1) * dstpitch; + + Uint8 *src_row_prev = srcpix + MAX(0, looph - 1) * srcpitch; + Uint8 *src_row_next = + srcpix + MIN(height - 1, looph + 1) * srcpitch; + for (loopw = 0; loopw < width; ++loopw) { - B = *(Uint16 *)(srcpix + (MAX(0, looph - 1) * srcpitch) + - (2 * loopw)); - D = *(Uint16 *)(srcpix + (looph * srcpitch) + - (2 * MAX(0, loopw - 1))); - E = *(Uint16 *)(srcpix + (looph * srcpitch) + (2 * loopw)); - F = *(Uint16 *)(srcpix + (looph * srcpitch) + - (2 * MIN(width - 1, loopw + 1))); - H = *(Uint16 *)(srcpix + - (MIN(height - 1, looph + 1) * srcpitch) + - (2 * loopw)); - - E0 = D == B && B != F && D != H ? D : E; - E1 = B == F && B != D && F != H ? F : E; - E2 = D == H && D != B && H != F ? D : E; - E3 = H == F && D != H && B != F ? F : E; - - *(Uint16 *)(dstpix + looph * 2 * dstpitch + - loopw * 2 * 2) = E0; - *(Uint16 *)(dstpix + looph * 2 * dstpitch + - (loopw * 2 + 1) * 2) = E1; - *(Uint16 *)(dstpix + (looph * 2 + 1) * dstpitch + - loopw * 2 * 2) = E2; - *(Uint16 *)(dstpix + (looph * 2 + 1) * dstpitch + - (loopw * 2 + 1) * 2) = E3; + B = *(Uint16 *)(src_row_prev + 2 * loopw); + D = *(Uint16 *)(src_row + 2 * MAX(0, loopw - 1)); + E = *(Uint16 *)(src_row + 2 * loopw); + F = *(Uint16 *)(src_row + 2 * MIN(width - 1, loopw + 1)); + H = *(Uint16 *)(src_row_next + 2 * loopw); + + if (B != H && D != F) { + E0 = (D == B) ? D : E; + E1 = (B == F) ? F : E; + E2 = (D == H) ? D : E; + E3 = (H == F) ? F : E; + } + else { + E0 = E; + E1 = E; + E2 = E; + E3 = E; + } + + *(Uint16 *)(dst_row0 + loopw * 2 * 2) = E0; + *(Uint16 *)(dst_row0 + (loopw * 2 + 1) * 2) = E1; + *(Uint16 *)(dst_row1 + loopw * 2 * 2) = E2; + *(Uint16 *)(dst_row1 + (loopw * 2 + 1) * 2) = E3; } } break; @@ -133,66 +155,78 @@ scale2x(SDL_Surface *src, SDL_Surface *dst) case 3: { int E0, E1, E2, E3, B, D, E, F, H; for (looph = 0; looph < height; ++looph) { + Uint8 *src_row = srcpix + looph * srcpitch; + Uint8 *dst_row0 = dstpix + looph * 2 * dstpitch; + Uint8 *dst_row1 = dstpix + (looph * 2 + 1) * dstpitch; + + Uint8 *src_row_prev = srcpix + MAX(0, looph - 1) * srcpitch; + Uint8 *src_row_next = + srcpix + MIN(height - 1, looph + 1) * srcpitch; + for (loopw = 0; loopw < width; ++loopw) { - B = READINT24(srcpix + (MAX(0, looph - 1) * srcpitch) + - (3 * loopw)); - D = READINT24(srcpix + (looph * srcpitch) + - (3 * MAX(0, loopw - 1))); - E = READINT24(srcpix + (looph * srcpitch) + (3 * loopw)); - F = READINT24(srcpix + (looph * srcpitch) + - (3 * MIN(width - 1, loopw + 1))); - H = READINT24(srcpix + - (MIN(height - 1, looph + 1) * srcpitch) + - (3 * loopw)); - - E0 = D == B && B != F && D != H ? D : E; - E1 = B == F && B != D && F != H ? F : E; - E2 = D == H && D != B && H != F ? D : E; - E3 = H == F && D != H && B != F ? F : E; - - WRITEINT24((dstpix + looph * 2 * dstpitch + loopw * 2 * 3), - E0); - WRITEINT24( - (dstpix + looph * 2 * dstpitch + (loopw * 2 + 1) * 3), - E1); - WRITEINT24( - (dstpix + (looph * 2 + 1) * dstpitch + loopw * 2 * 3), - E2); - WRITEINT24((dstpix + (looph * 2 + 1) * dstpitch + - (loopw * 2 + 1) * 3), - E3); + B = read_int24(src_row_prev + (3 * loopw)); + D = read_int24(src_row + (3 * MAX(0, loopw - 1))); + E = read_int24(src_row + (3 * loopw)); + F = read_int24(src_row + (3 * MIN(width - 1, loopw + 1))); + H = read_int24(src_row_next + (3 * loopw)); + + if (B != H && D != F) { + E0 = (D == B) ? D : E; + E1 = (B == F) ? F : E; + E2 = (D == H) ? D : E; + E3 = (H == F) ? F : E; + } + else { + E0 = E; + E1 = E; + E2 = E; + E3 = E; + } + + store_int24(dst_row0 + loopw * 2 * 3, E0); + store_int24(dst_row0 + (loopw * 2 + 1) * 3, E1); + store_int24(dst_row1 + loopw * 2 * 3, E2); + store_int24(dst_row1 + (loopw * 2 + 1) * 3, E3); } } break; } - default: { /*case 4:*/ + default: { Uint32 E0, E1, E2, E3, B, D, E, F, H; + for (looph = 0; looph < height; ++looph) { + Uint8 *src_row = srcpix + looph * srcpitch; + Uint8 *dst_row0 = dstpix + looph * 2 * dstpitch; + Uint8 *dst_row1 = dstpix + (looph * 2 + 1) * dstpitch; + + Uint8 *src_row_prev = srcpix + MAX(0, looph - 1) * srcpitch; + Uint8 *src_row_next = + srcpix + MIN(height - 1, looph + 1) * srcpitch; + for (loopw = 0; loopw < width; ++loopw) { - B = *(Uint32 *)(srcpix + (MAX(0, looph - 1) * srcpitch) + - (4 * loopw)); - D = *(Uint32 *)(srcpix + (looph * srcpitch) + - (4 * MAX(0, loopw - 1))); - E = *(Uint32 *)(srcpix + (looph * srcpitch) + (4 * loopw)); - F = *(Uint32 *)(srcpix + (looph * srcpitch) + - (4 * MIN(width - 1, loopw + 1))); - H = *(Uint32 *)(srcpix + - (MIN(height - 1, looph + 1) * srcpitch) + - (4 * loopw)); - - E0 = D == B && B != F && D != H ? D : E; - E1 = B == F && B != D && F != H ? F : E; - E2 = D == H && D != B && H != F ? D : E; - E3 = H == F && D != H && B != F ? F : E; - - *(Uint32 *)(dstpix + looph * 2 * dstpitch + - loopw * 2 * 4) = E0; - *(Uint32 *)(dstpix + looph * 2 * dstpitch + - (loopw * 2 + 1) * 4) = E1; - *(Uint32 *)(dstpix + (looph * 2 + 1) * dstpitch + - loopw * 2 * 4) = E2; - *(Uint32 *)(dstpix + (looph * 2 + 1) * dstpitch + - (loopw * 2 + 1) * 4) = E3; + B = *(Uint32 *)(src_row_prev + 4 * loopw); + D = *(Uint32 *)(src_row + 4 * MAX(0, loopw - 1)); + E = *(Uint32 *)(src_row + 4 * loopw); + F = *(Uint32 *)(src_row + 4 * MIN(width - 1, loopw + 1)); + H = *(Uint32 *)(src_row_next + 4 * loopw); + + if (B != H && D != F) { + E0 = (D == B) ? D : E; + E1 = (B == F) ? F : E; + E2 = (D == H) ? D : E; + E3 = (H == F) ? F : E; + } + else { + E0 = E; + E1 = E; + E2 = E; + E3 = E; + } + + *(Uint32 *)(dst_row0 + loopw * 2 * 4) = E0; + *(Uint32 *)(dst_row0 + (loopw * 2 + 1) * 4) = E1; + *(Uint32 *)(dst_row1 + loopw * 2 * 4) = E2; + *(Uint32 *)(dst_row1 + (loopw * 2 + 1) * 4) = E3; } } break;