gugdun
/
SDL
şunun yansıması https://github.com/libsdl-org/SDL


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
							/*
  Simple DirectMedia Layer
  Copyright (C) 1997-2026 Sam Lantinga <slouken@libsdl.org>

  This software is provided 'as-is', without any express or implied
  warranty.  In no event will the authors be held liable for any damages
  arising from the use of this software.

  Permission is granted to anyone to use this software for any purpose,
  including commercial applications, and to alter it and redistribute it
  freely, subject to the following restrictions:

  1. The origin of this software must not be misrepresented; you must not
     claim that you wrote the original software. If you use this software
     in a product, an acknowledgment in the product documentation would be
     appreciated but is not required.
  2. Altered source versions must be plainly marked as such, and must not be
     misrepresented as being the original software.
  3. This notice may not be removed or altered from any source distribution.
*/
#include "SDL_internal.h"

#include "SDL_render_ops.hpp"
#include <3dtypes.h>

void ApplyColorMod(void *dest, void *source, int pitch, int width, int height, SDL_FColor color, const TUint8 *colorLUT)
{
    TUint16 *src_pixels = static_cast<TUint16 *>(source);
    TUint16 *dst_pixels = static_cast<TUint16 *>(dest);

    // Pre-calculate pitch in pixels to avoid repeated division.
    const TInt pitchPixels = pitch >> 1;

    // Pre-calculate LUT offsets to reduce addressing calculations.
    const TUint8 *lut_r = colorLUT;
    const TUint8 *lut_g = colorLUT + 256;
    const TUint8 *lut_b = colorLUT + 512;

    // Process 4 pixels at a time (loop unrolling).
    for (int y = 0; y < height; ++y) {
        const TInt rowOffset = y * pitchPixels;
        int x = 0;

        // Unrolled loop: process 4 pixels at once with optimized bit manipulation.
        for (; x < width - 3; x += 4) {
            // Load 4 pixels at once.
            TUint16 p0 = src_pixels[rowOffset + x];
            TUint16 p1 = src_pixels[rowOffset + x + 1];
            TUint16 p2 = src_pixels[rowOffset + x + 2];
            TUint16 p3 = src_pixels[rowOffset + x + 3];

            // Pixel 0: Extract and modulate RGB4444 components.
            // RGB4444 format: RRRR GGGG BBBB xxxx
            TUint8 r0 = lut_r[(p0 >> 8) & 0xF0];  // Extract R (bits 12-15), shift to byte position
            TUint8 g0 = lut_g[(p0 >> 3) & 0xF8];  // Extract G (bits 6-9), scale to 8-bit
            TUint8 b0 = lut_b[(p0 << 3) & 0xF8];  // Extract B (bits 0-3), scale to 8-bit
            dst_pixels[rowOffset + x] = ((r0 & 0xF0) << 8) | ((g0 & 0xF0) << 3) | ((b0 & 0xF0) >> 1);

            // Pixel 1
            TUint8 r1 = lut_r[(p1 >> 8) & 0xF0];
            TUint8 g1 = lut_g[(p1 >> 3) & 0xF8];
            TUint8 b1 = lut_b[(p1 << 3) & 0xF8];
            dst_pixels[rowOffset + x + 1] = ((r1 & 0xF0) << 8) | ((g1 & 0xF0) << 3) | ((b1 & 0xF0) >> 1);

            // Pixel 2
            TUint8 r2 = lut_r[(p2 >> 8) & 0xF0];
            TUint8 g2 = lut_g[(p2 >> 3) & 0xF8];
            TUint8 b2 = lut_b[(p2 << 3) & 0xF8];
            dst_pixels[rowOffset + x + 2] = ((r2 & 0xF0) << 8) | ((g2 & 0xF0) << 3) | ((b2 & 0xF0) >> 1);

            // Pixel 3
            TUint8 r3 = lut_r[(p3 >> 8) & 0xF0];
            TUint8 g3 = lut_g[(p3 >> 3) & 0xF8];
            TUint8 b3 = lut_b[(p3 << 3) & 0xF8];
            dst_pixels[rowOffset + x + 3] = ((r3 & 0xF0) << 8) | ((g3 & 0xF0) << 3) | ((b3 & 0xF0) >> 1);
        }

        // Handle remaining pixels.
        for (; x < width; ++x) {
            TUint16 pixel = src_pixels[rowOffset + x];
            TUint8 r = lut_r[(pixel >> 8) & 0xF0];
            TUint8 g = lut_g[(pixel >> 3) & 0xF8];
            TUint8 b = lut_b[(pixel << 3) & 0xF8];
            dst_pixels[rowOffset + x] = ((r & 0xF0) << 8) | ((g & 0xF0) << 3) | ((b & 0xF0) >> 1);
        }
    }
}

void ApplyFlip(void *dest, void *source, int pitch, int width, int height, SDL_FlipMode flip)
{
    TUint16 *src_pixels = static_cast<TUint16 *>(source);
    TUint16 *dst_pixels = static_cast<TUint16 *>(dest);

    // Pre-calculate pitch in pixels to avoid repeated division.
    const TInt pitchPixels = pitch >> 1;

    // Pre-calculate flip flags to avoid repeated bitwise operations.
    const bool flipHorizontal = (flip & SDL_FLIP_HORIZONTAL) != 0;
    const bool flipVertical = (flip & SDL_FLIP_VERTICAL) != 0;

    // Pre-calculate width/height bounds for horizontal/vertical flipping.
    const int width_m1 = width - 1;
    const int height_m1 = height - 1;

    for (int y = 0; y < height; ++y) {
        // Calculate destination row offset once per row.
        const TInt dstRowOffset = y * pitchPixels;

        // Calculate source Y coordinate once per row.
        const int src_y = flipVertical ? (height_m1 - y) : y;
        const TInt srcRowOffset = src_y * pitchPixels;

        int x = 0;

        // Unrolled loop: process 4 pixels at once.
        for (; x < width - 3; x += 4) {
            if (flipHorizontal) {
                dst_pixels[dstRowOffset + x] = src_pixels[srcRowOffset + (width_m1 - x)];
                dst_pixels[dstRowOffset + x + 1] = src_pixels[srcRowOffset + (width_m1 - x - 1)];
                dst_pixels[dstRowOffset + x + 2] = src_pixels[srcRowOffset + (width_m1 - x - 2)];
                dst_pixels[dstRowOffset + x + 3] = src_pixels[srcRowOffset + (width_m1 - x - 3)];
            } else {
                dst_pixels[dstRowOffset + x] = src_pixels[srcRowOffset + x];
                dst_pixels[dstRowOffset + x + 1] = src_pixels[srcRowOffset + x + 1];
                dst_pixels[dstRowOffset + x + 2] = src_pixels[srcRowOffset + x + 2];
                dst_pixels[dstRowOffset + x + 3] = src_pixels[srcRowOffset + x + 3];
            }
        }

        // Handle remaining pixels.
        for (; x < width; ++x) {
            const int src_x = flipHorizontal ? (width_m1 - x) : x;
            dst_pixels[dstRowOffset + x] = src_pixels[srcRowOffset + src_x];
        }
    }
}

void ApplyRotation(void *dest, void *source, int pitch, int width, int height, TFixed center_x, TFixed center_y, TFixed angle)
{
    TUint16 *src_pixels = static_cast<TUint16 *>(source);
    TUint16 *dst_pixels = static_cast<TUint16 *>(dest);

    TFixed cos_angle = 0;
    TFixed sin_angle = 0;

    if (angle != 0) {
        FixSinCos(angle, sin_angle, cos_angle);
    }

    // Pre-calculate pitch in pixels to avoid repeated division.
    const TInt pitchPixels = pitch >> 1;

    // Incremental DDA: Calculate per-pixel increments.
    // As we move right (x+1), the rotated position changes by (cos, -sin).
    const TFixed dx_cos = cos_angle;
    const TFixed dx_sin = -sin_angle;

    for (int y = 0; y < height; ++y) {
        // Calculate destination row offset once per row.
        const TInt dstRowOffset = y * pitchPixels;

        // Calculate starting position for this row.
        // For y, rotation transforms: x' = x*cos - y*sin, y' = x*sin + y*cos
        // At x=0: x' = -y*sin, y' = y*cos (relative to center)
        const TFixed translated_y = Int2Fix(y) - center_y;
        const TFixed row_start_x = center_x - FixMul(translated_y, sin_angle);
        const TFixed row_start_y = center_y + FixMul(translated_y, cos_angle);

        // Start at x=0 position.
        TFixed src_x = row_start_x;
        TFixed src_y = row_start_y;

        int x = 0;

        // Unrolled loop: process 4 pixels at once.
        for (; x < width - 3; x += 4) {
            // Pixel 0
            int final_x0 = Fix2Int(src_x);
            int final_y0 = Fix2Int(src_y);
            src_x += dx_cos;
            src_y += dx_sin;

            // Pixel 1
            int final_x1 = Fix2Int(src_x);
            int final_y1 = Fix2Int(src_y);
            src_x += dx_cos;
            src_y += dx_sin;

            // Pixel 2
            int final_x2 = Fix2Int(src_x);
            int final_y2 = Fix2Int(src_y);
            src_x += dx_cos;
            src_y += dx_sin;

            // Pixel 3
            int final_x3 = Fix2Int(src_x);
            int final_y3 = Fix2Int(src_y);
            src_x += dx_cos;
            src_y += dx_sin;

            // Write all 4 pixels with bounds checking.
            dst_pixels[dstRowOffset + x] = (final_x0 >= 0 && final_x0 < width && final_y0 >= 0 && final_y0 < height) ?
                src_pixels[final_y0 * pitchPixels + final_x0] : 0;
            dst_pixels[dstRowOffset + x + 1] = (final_x1 >= 0 && final_x1 < width && final_y1 >= 0 && final_y1 < height) ?
                src_pixels[final_y1 * pitchPixels + final_x1] : 0;
            dst_pixels[dstRowOffset + x + 2] = (final_x2 >= 0 && final_x2 < width && final_y2 >= 0 && final_y2 < height) ?
                src_pixels[final_y2 * pitchPixels + final_x2] : 0;
            dst_pixels[dstRowOffset + x + 3] = (final_x3 >= 0 && final_x3 < width && final_y3 >= 0 && final_y3 < height) ?
                src_pixels[final_y3 * pitchPixels + final_x3] : 0;
        }

        // Handle remaining pixels.
        for (; x < width; ++x) {
            // Convert to integer coordinates.
            int final_x = Fix2Int(src_x);
            int final_y = Fix2Int(src_y);

            // Check bounds.
            if (final_x >= 0 && final_x < width && final_y >= 0 && final_y < height) {
                dst_pixels[dstRowOffset + x] = src_pixels[final_y * pitchPixels + final_x];
            } else {
                dst_pixels[dstRowOffset + x] = 0;
            }

            // Incremental step: move to next pixel (just additions, no multiplications!).
            src_x += dx_cos;
            src_y += dx_sin;
        }
    }
}

void ApplyScale(void *dest, void *source, int pitch, int width, int height, TFixed center_x, TFixed center_y, TFixed scale_x, TFixed scale_y)
{
    TUint16 *src_pixels = static_cast<TUint16 *>(source);
    TUint16 *dst_pixels = static_cast<TUint16 *>(dest);

    // Pre-calculate pitch in pixels to avoid repeated division.
    const TInt pitchPixels = pitch >> 1;

    // Pre-calculate inverse scale factors to use FixMul instead of FixDiv.
    // This is MUCH faster on N-Gage hardware (no division per pixel!).
    TFixed inv_scale_x = FixDiv(Int2Fix(1), scale_x);
    TFixed inv_scale_y = FixDiv(Int2Fix(1), scale_y);

    // Pre-calculate center offset to reduce operations per pixel.
    TFixed center_x_fixed = center_x;
    TFixed center_y_fixed = center_y;

    for (int y = 0; y < height; ++y) {
        // Calculate destination row offset once per row.
        TInt dstRowOffset = y * pitchPixels;

        // Use inverse scale factor (multiply instead of divide).
        TFixed translated_y = Int2Fix(y) - center_y_fixed;
        TFixed scaled_y = FixMul(translated_y, inv_scale_y);
        int final_y = Fix2Int(scaled_y + center_y_fixed);

        // Check if this row is within bounds.
        bool rowInBounds = (final_y >= 0 && final_y < height);
        TInt srcRowOffset = final_y * pitchPixels;

        // Incremental DDA for X: pre-calculate starting position and increment.
        TFixed src_x_start = FixMul(-center_x_fixed, inv_scale_x) + center_x_fixed;
        TFixed src_x = src_x_start;

        int x = 0;

        // Unrolled loop: process 4 pixels at once.
        for (; x < width - 3; x += 4) {
            // Process 4 pixels using incremental approach.
            int final_x0 = Fix2Int(src_x);
            src_x += inv_scale_x;
            int final_x1 = Fix2Int(src_x);
            src_x += inv_scale_x;
            int final_x2 = Fix2Int(src_x);
            src_x += inv_scale_x;
            int final_x3 = Fix2Int(src_x);
            src_x += inv_scale_x;

            // Write all 4 pixels with bounds checking.
            dst_pixels[dstRowOffset + x] = (rowInBounds && final_x0 >= 0 && final_x0 < width) ?
                src_pixels[srcRowOffset + final_x0] : 0;
            dst_pixels[dstRowOffset + x + 1] = (rowInBounds && final_x1 >= 0 && final_x1 < width) ?
                src_pixels[srcRowOffset + final_x1] : 0;
            dst_pixels[dstRowOffset + x + 2] = (rowInBounds && final_x2 >= 0 && final_x2 < width) ?
                src_pixels[srcRowOffset + final_x2] : 0;
            dst_pixels[dstRowOffset + x + 3] = (rowInBounds && final_x3 >= 0 && final_x3 < width) ?
                src_pixels[srcRowOffset + final_x3] : 0;
        }

        // Handle remaining pixels.
        for (; x < width; ++x) {
            int final_x = Fix2Int(src_x);
            src_x += inv_scale_x;

            if (rowInBounds && final_x >= 0 && final_x < width) {
                dst_pixels[dstRowOffset + x] = src_pixels[srcRowOffset + final_x];
            } else {
                dst_pixels[dstRowOffset + x] = 0;
            }
        }
    }
}