Quellcode durchsuchen

adds Blit565to565SurfaceAlphaSVE2

Gabriel Wang vor 2 Tagen
Ursprung
Commit
508450e9c0

+ 5 - 0
src/video/SDL_blit_A.c

@@ -1570,6 +1570,11 @@ SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface)
                         if (SDL_HasMMX()) {
                             return Blit565to565SurfaceAlphaMMX;
                         } else
+#endif
+#ifdef SDL_SVE2_INTRINSICS
+                        if (SDL_HasSVE2()) {
+                            return Blit565to565SurfaceAlphaSVE2;
+                        } else 
 #endif
                         {
                             return Blit565to565SurfaceAlpha;

+ 97 - 0
src/video/arm/SDL_sve2_blit_A.c

@@ -86,4 +86,101 @@ size_t SDL_GetSVEVectorSize(void)
     return svlen(svundef_u8()) * 8;
 }
 
+/*-----------------------------------------------------------------------------*
+ * RGB565 Blend with Surface Alpha                                             *
+ *-----------------------------------------------------------------------------*/
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_rgb565_stride_blend_with_opacity(uint16_t *SDL_RESTRICT phwSource,
+                                                            uint16_t *SDL_RESTRICT phwTarget,
+                                                            size_t uStride,
+                                                            uint16_t hwOpacity)
+{
+    sdl_sve_stride_loop_rgb16(uStride, vTailPred)
+    {
+
+        svuint16x3_t vSource16x3 =
+            sdl_sve_rgb565_unpack(svld1_u16(vTailPred, phwSource));
+
+        svuint16x3_t vTarget16x3 =
+            sdl_sve_rgb565_unpack(svld1_u16(vTailPred, phwTarget));
+
+        sdl_sve_pixel_ccc_foreach_chn(
+            vSource16x3,
+            vTarget16x3,
+            {
+                sve_target_u16 = sdl_sve_chn_blend_with_opacity_fast(
+                    sve_source_u16,
+                    sve_target_u16,
+                    hwOpacity);
+            });
+
+        svst1_u16(vTailPred, phwTarget, sdl_sve_rgb565_pack(vTarget16x3));
+
+        phwSource += sve_iteration_advance;
+        phwTarget += sve_iteration_advance;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_rgb565_blend_with_opacity(uint8_t *SDL_RESTRICT pchSource,
+                                                     size_t uSourceStride,
+                                                     uint8_t *SDL_RESTRICT pchTarget,
+                                                     size_t uTargetStride,
+                                                     int nWidth,
+                                                     int nHeight,
+                                                     uint16_t hwOpacity)
+{
+    hwOpacity += hwOpacity == 255;
+    assert(0 == ((uintptr_t)pchSource & 0x01));
+    assert(0 == ((uintptr_t)pchTarget & 0x01));
+
+    while (nHeight--) {
+
+        sdl_sve_rgb565_stride_blend_with_opacity((uint16_t *)pchSource,
+                                                 (uint16_t *)pchTarget,
+                                                 nWidth,
+                                                 hwOpacity);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+// fast RGB565->RGB565 blending with surface alpha
+SDL_TARGETING("arch=armv8-a+sve2")
+void Blit565to565SurfaceAlphaSVE2(SDL_BlitInfo *info)
+{
+    uint16_t alpha = info->a;
+
+    int width = info->dst_w;
+    int height = info->dst_h;
+    uint8_t *src = info->src;
+    int srcskip = info->src_skip;
+    uint8_t *dst = info->dst;
+    int dstskip = info->dst_skip;
+
+    const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
+    const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
+
+    // Set up some basic variables
+    int srcbpp = srcfmt->bytes_per_pixel;
+    int dstbpp = dstfmt->bytes_per_pixel;
+
+    assert(srcbpp == 2);
+    assert(dstbpp == 2);
+
+    int srcstride = srcskip + srcbpp * width;
+    int dststride = dstskip + dstbpp * width;
+
+    sdl_sve_rgb565_blend_with_opacity(src,
+                                      srcstride,
+                                      dst,
+                                      dststride,
+                                      width,
+                                      height,
+                                      alpha);
+}
+
 #endif /* SDL_SVE2_INTRINSICS */

+ 2 - 0
src/video/arm/SDL_sve2_blit_A.h

@@ -30,6 +30,8 @@
 void Blit8888to8888PixelAlphaSwizzleSVE2(SDL_BlitInfo *info);
 void Blit8888to565PixelAlphaSwizzleSVE2(SDL_BlitInfo *info);
 
+void Blit565to565SurfaceAlphaSVE2(SDL_BlitInfo *info);
+
 size_t SDL_GetSVEVectorSize(void);
 
 #endif /* SDL_SVE2_INTRINSICS */

+ 17 - 0
src/video/arm/SDL_sve2_extension.h

@@ -964,6 +964,23 @@ static inline svuint16_t sdl_sve_chn_blend_with_opacity(svuint16_t vSource,
     return svlsr_n_u16_m(svptrue_b16(), vTarget, 8); // vTarget >> 8;
 }
 
+/*! \note the hwOpacity range [0, 0x100]
+ */
+SDL_TARGETING("arch=armv8-a+sve2")
+static inline svuint16_t sdl_sve_chn_blend_with_opacity_fast(svuint16_t vSource,
+                                                        svuint16_t vTarget,
+                                                        uint16_t hwOpacity)
+{
+    // vTarget = vSource * vMask + vTarget * (255 - vMask);
+    svuint16_t vTemp0 = svmul_n_u16_m(svptrue_b16(), vSource, hwOpacity);
+    vTemp0 = svmla_n_u16_m(svptrue_b16(),
+                         vTemp0,
+                         vTarget,
+                         256 - hwOpacity);
+
+    return svlsr_n_u16_m(svptrue_b16(), vTemp0, 8); // vTarget >> 8;
+}
+
 /*! \note the Element range of vMask is [0, 0xFF]
  *  \note the hwOpacity range [0, 0x100]
  */