1 месяц назад · e07cfdff2c
--- a/src/video/arm/SDL_sve2_blit_A.c
+++ b/src/video/arm/SDL_sve2_blit_A.c
@@ -51,12 +51,12 @@
 
				     }
			
 
				 
			
 
				 #undef sdl_sve_rgb32_blend_to_rgb565_op
			
 
				-#define sdl_sve_rgb32_blend_to_rgb565_op(ma_alpha_chn_idx)               \
			
 
				-    do {                                                                 \
			
 
				-        svuint16_t vMask = svget4(sve_source_u16x4, (ma_alpha_chn_idx)); \
			
 
				-        sve_target_u16 = sdl_sve_chn_blend_with_mask(sve_source_u16,     \
			
 
				-                                                     sve_target_u16,     \
			
 
				-                                                     vMask);             \
			
 
				+#define sdl_sve_rgb32_blend_to_rgb565_op(ma_alpha_chn_idx)                \
			
 
				+    do {                                                                  \
			
 
				+        svuint16_t vMask = svget4(sve_source_u16x4, (ma_alpha_chn_idx));  \
			
 
				+        sve_target_u16 = sdl_sve_chn_blend_with_mask_fast(sve_source_u16, \
			
 
				+                                                          sve_target_u16, \
			
 
				+                                                          vMask);         \
			
 
				     } while (0)
			
 
				 
			
 
				 #include "SDL_sve2_swizzle.h"
			
--- a/src/video/arm/SDL_sve2_extension.h
+++ b/src/video/arm/SDL_sve2_extension.h
@@ -902,7 +902,9 @@ static inline void svst4ub_u16(svbool_t vPredu8,
 
				 /*! \note the Element range of vMask is [0, 0xFF]
			
 
				  */
			
 
				 SDL_TARGETING("arch=armv8-a+sve2")
			
 
				-static inline svuint16_t sdl_sve_chn_blend_with_mask(svuint16_t vSource, svuint16_t vTarget, svuint16_t vMask)
			
 
				+static inline svuint16_t sdl_sve_chn_blend_with_mask(svuint16_t vSource,
			
 
				+                                                     svuint16_t vTarget,
			
 
				+                                                     svuint16_t vMask)
			
 
				 {
			
 
				     // vTarget = vSource * vMask + vTarget * (255 - vMask);
			
 
				     svuint16_t vTemp0 = svmul_u16_m(svptrue_b16(), vSource, vMask);
			
@@ -924,6 +926,25 @@ static inline svuint16_t sdl_sve_chn_blend_with_mask(svuint16_t vSource, svuint1
 
				     return svlsr_n_u16_m(svptrue_b16(), vTemp0, 8); // vTarget >> 8;
			
 
				 }
			
 
				 
			
 
				+/*! \note the Element range of vMask is [0, 0xFF]
			
 
				+ */
			
 
				+SDL_TARGETING("arch=armv8-a+sve2")
			
 
				+static inline svuint16_t sdl_sve_chn_blend_with_mask_fast(svuint16_t vSource,
			
 
				+                                                          svuint16_t vTarget,
			
 
				+                                                          svuint16_t vMask)
			
 
				+{
			
 
				+    // vTarget = vSource * vMask + vTarget * (255 - vMask);
			
 
				+    svuint16_t vTemp0 = svmul_u16_m(svptrue_b16(), vSource, vMask);
			
 
				+    vTemp0 = svmla_u16_m(svptrue_b16(),
			
 
				+                         vTemp0,
			
 
				+                         vTarget,
			
 
				+                         svsub_u16_m(svptrue_b16(),
			
 
				+                                     svdup_u16(255),
			
 
				+                                     vMask));
			
 
				+
			
 
				+    return svlsr_n_u16_m(svptrue_b16(), vTemp0, 8); // vTarget >> 8;
			
 
				+}
			
 
				+
			
 
				 /*! \note the hwOpacity range [0, 0x100]
			
 
				  */
			
 
				 SDL_TARGETING("arch=armv8-a+sve2")