Răsfoiți Sursa

adds sdl_sve_chn_blend_with_mask_fast for RGB565 alpha-blending

Gabriel Wang 2 zile în urmă
părinte
comite
e07cfdff2c
2 a modificat fișierele cu 28 adăugiri și 7 ștergeri
  1. 6 6
      src/video/arm/SDL_sve2_blit_A.c
  2. 22 1
      src/video/arm/SDL_sve2_extension.h

+ 6 - 6
src/video/arm/SDL_sve2_blit_A.c

@@ -51,12 +51,12 @@
     }
 
 #undef sdl_sve_rgb32_blend_to_rgb565_op
-#define sdl_sve_rgb32_blend_to_rgb565_op(ma_alpha_chn_idx)               \
-    do {                                                                 \
-        svuint16_t vMask = svget4(sve_source_u16x4, (ma_alpha_chn_idx)); \
-        sve_target_u16 = sdl_sve_chn_blend_with_mask(sve_source_u16,     \
-                                                     sve_target_u16,     \
-                                                     vMask);             \
+#define sdl_sve_rgb32_blend_to_rgb565_op(ma_alpha_chn_idx)                \
+    do {                                                                  \
+        svuint16_t vMask = svget4(sve_source_u16x4, (ma_alpha_chn_idx));  \
+        sve_target_u16 = sdl_sve_chn_blend_with_mask_fast(sve_source_u16, \
+                                                          sve_target_u16, \
+                                                          vMask);         \
     } while (0)
 
 #include "SDL_sve2_swizzle.h"

+ 22 - 1
src/video/arm/SDL_sve2_extension.h

@@ -902,7 +902,9 @@ static inline void svst4ub_u16(svbool_t vPredu8,
 /*! \note the Element range of vMask is [0, 0xFF]
  */
 SDL_TARGETING("arch=armv8-a+sve2")
-static inline svuint16_t sdl_sve_chn_blend_with_mask(svuint16_t vSource, svuint16_t vTarget, svuint16_t vMask)
+static inline svuint16_t sdl_sve_chn_blend_with_mask(svuint16_t vSource,
+                                                     svuint16_t vTarget,
+                                                     svuint16_t vMask)
 {
     // vTarget = vSource * vMask + vTarget * (255 - vMask);
     svuint16_t vTemp0 = svmul_u16_m(svptrue_b16(), vSource, vMask);
@@ -924,6 +926,25 @@ static inline svuint16_t sdl_sve_chn_blend_with_mask(svuint16_t vSource, svuint1
     return svlsr_n_u16_m(svptrue_b16(), vTemp0, 8); // vTarget >> 8;
 }
 
+/*! \note the Element range of vMask is [0, 0xFF]
+ */
+SDL_TARGETING("arch=armv8-a+sve2")
+static inline svuint16_t sdl_sve_chn_blend_with_mask_fast(svuint16_t vSource,
+                                                          svuint16_t vTarget,
+                                                          svuint16_t vMask)
+{
+    // vTarget = vSource * vMask + vTarget * (255 - vMask);
+    svuint16_t vTemp0 = svmul_u16_m(svptrue_b16(), vSource, vMask);
+    vTemp0 = svmla_u16_m(svptrue_b16(),
+                         vTemp0,
+                         vTarget,
+                         svsub_u16_m(svptrue_b16(),
+                                     svdup_u16(255),
+                                     vMask));
+
+    return svlsr_n_u16_m(svptrue_b16(), vTemp0, 8); // vTarget >> 8;
+}
+
 /*! \note the hwOpacity range [0, 0x100]
  */
 SDL_TARGETING("arch=armv8-a+sve2")