diff --git a/plugins/win-capture/game-capture.c b/plugins/win-capture/game-capture.c index 547b7c59b210618188b64413526352b0b9851b8b..47e8e66f41dd6adf00f3464830b92dc953422711 100644 --- a/plugins/win-capture/game-capture.c +++ b/plugins/win-capture/game-capture.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include "obfuscate.h" #include "graphics-hook-info.h" @@ -865,24 +866,67 @@ static void copy_b5g6r5_tex(struct game_capture *gc, int cur_texture, uint32_t gc_pitch = gc->pitch; for (uint32_t y = 0; y < gc_cy; y++) { - register uint8_t *in = input + (gc_pitch * y); - register uint8_t *end = in + (gc_cx * PIXEL_16BIT_SIZE); - register uint8_t *out = data + (pitch * y); - - while (in < end) { - register uint16_t in_pix = *(uint16_t*)in; - register uint32_t out_pix = 0xFF000000; - - out_pix |= convert_5_to_8bit(in_pix); - in_pix >>= 5; - out_pix |= convert_6_to_8bit(in_pix) << 8; - in_pix >>= 6; - out_pix |= convert_5_to_8bit(in_pix) << 16; - - *(uint32_t*)out = out_pix; - - in += PIXEL_16BIT_SIZE; - out += PIXEL_32BIT_SIZE; + uint8_t *row = input + (gc_pitch * y); + uint8_t *out = data + (pitch * y); + + for (uint32_t x = 0; x < gc_cx; x += 8) { + __m128i pixels_blue, pixels_green, pixels_red; + __m128i pixels_result; + __m128i *pixels_dest; + + __m128i *pixels_src = (__m128i*)(row + x * sizeof(uint16_t)); + __m128i pixels = _mm_load_si128(pixels_src); + + __m128i zero = _mm_setzero_si128(); + __m128i pixels_low = _mm_unpacklo_epi16(pixels, zero); + __m128i pixels_high = _mm_unpackhi_epi16(pixels, zero); + + __m128i blue_channel_mask = _mm_set1_epi32(0x0000001F); + __m128i blue_offset = _mm_set1_epi32(0x00000003); + __m128i green_channel_mask = _mm_set1_epi32(0x000007E0); + __m128i green_offset = _mm_set1_epi32(0x00000008); + __m128i red_channel_mask = _mm_set1_epi32(0x0000F800); + __m128i red_offset = _mm_set1_epi32(0x00000300); + + pixels_blue = _mm_and_si128(pixels_low, blue_channel_mask); + pixels_blue = _mm_slli_epi32(pixels_blue, 3); + pixels_blue = _mm_add_epi32(pixels_blue, blue_offset); + + pixels_green = _mm_and_si128(pixels_low, green_channel_mask); + pixels_green = _mm_add_epi32(pixels_green, green_offset); + pixels_green = _mm_slli_epi32(pixels_green, 5); + + pixels_red = _mm_and_si128(pixels_low, red_channel_mask); + pixels_red = _mm_add_epi32(pixels_red, red_offset); + pixels_red = _mm_slli_epi32(pixels_red, 8); + + pixels_result = _mm_set1_epi32(0xFF000000); + pixels_result = _mm_or_si128(pixels_result, pixels_blue); + pixels_result = _mm_or_si128(pixels_result, pixels_green); + pixels_result = _mm_or_si128(pixels_result, pixels_red); + + pixels_dest = (__m128i*)(out + x * sizeof(uint32_t)); + _mm_store_si128(pixels_dest, pixels_result); + + pixels_blue = _mm_and_si128(pixels_high, blue_channel_mask); + pixels_blue = _mm_slli_epi32(pixels_blue, 3); + pixels_blue = _mm_add_epi32(pixels_blue, blue_offset); + + pixels_green = _mm_and_si128(pixels_high, green_channel_mask); + pixels_green = _mm_add_epi32(pixels_green, green_offset); + pixels_green = _mm_slli_epi32(pixels_green, 5); + + pixels_red = _mm_and_si128(pixels_high, red_channel_mask); + pixels_red = _mm_add_epi32(pixels_red, red_offset); + pixels_red = _mm_slli_epi32(pixels_red, 8); + + pixels_result = _mm_set1_epi32(0xFF000000); + pixels_result = _mm_or_si128(pixels_result, pixels_blue); + pixels_result = _mm_or_si128(pixels_result, pixels_green); + pixels_result = _mm_or_si128(pixels_result, pixels_red); + + pixels_dest = (__m128i*)(out + (x + 4) * sizeof(uint32_t)); + _mm_store_si128(pixels_dest, pixels_result); } } } @@ -896,26 +940,80 @@ static void copy_b5g5r5a1_tex(struct game_capture *gc, int cur_texture, uint32_t gc_pitch = gc->pitch; for (uint32_t y = 0; y < gc_cy; y++) { - register uint8_t *in = input + (gc_pitch * y); - register uint8_t *end = in + (gc_cx * PIXEL_16BIT_SIZE); - register uint8_t *out = data + (pitch * y); - - while (in < end) { - register uint16_t in_pix = *(uint16_t*)in; - register uint32_t out_pix = 0; - - out_pix |= convert_5_to_8bit(in_pix); - in_pix >>= 5; - out_pix |= convert_5_to_8bit(in_pix) << 8; - in_pix >>= 5; - out_pix |= convert_5_to_8bit(in_pix) << 16; - in_pix >>= 5; - out_pix |= (in_pix * 255) << 24; - - *(uint32_t*)out = out_pix; - - in += PIXEL_16BIT_SIZE; - out += PIXEL_32BIT_SIZE; + uint8_t *row = input + (gc_pitch * y); + uint8_t *out = data + (pitch * y); + + for (uint32_t x = 0; x < gc_cx; x += 8) { + __m128i pixels_blue, pixels_green, pixels_red, pixels_alpha; + __m128i pixels_result; + __m128i *pixels_dest; + + __m128i *pixels_src = (__m128i*)(row + x * sizeof(uint16_t)); + __m128i pixels = _mm_load_si128(pixels_src); + + __m128i zero = _mm_setzero_si128(); + __m128i pixels_low = _mm_unpacklo_epi16(pixels, zero); + __m128i pixels_high = _mm_unpackhi_epi16(pixels, zero); + + __m128i blue_channel_mask = _mm_set1_epi32(0x0000001F); + __m128i blue_offset = _mm_set1_epi32(0x00000003); + __m128i green_channel_mask = _mm_set1_epi32(0x000003E0); + __m128i green_offset = _mm_set1_epi32(0x000000C); + __m128i red_channel_mask = _mm_set1_epi32(0x00007C00); + __m128i red_offset = _mm_set1_epi32(0x00000180); + __m128i alpha_channel_mask = _mm_set1_epi32(0x00008000); + __m128i alpha_offset = _mm_set1_epi32(0x00000001); + __m128i alpha_mask32 = _mm_set1_epi32(0xFF000000); + + pixels_blue = _mm_and_si128(pixels_low, blue_channel_mask); + pixels_blue = _mm_slli_epi32(pixels_blue, 3); + pixels_blue = _mm_add_epi32(pixels_blue, blue_offset); + + pixels_green = _mm_and_si128(pixels_low, green_channel_mask); + pixels_green = _mm_add_epi32(pixels_green, green_offset); + pixels_green = _mm_slli_epi32(pixels_green, 6); + + pixels_red = _mm_and_si128(pixels_low, red_channel_mask); + pixels_red = _mm_add_epi32(pixels_red, red_offset); + pixels_red = _mm_slli_epi32(pixels_red, 9); + + pixels_alpha = _mm_and_si128(pixels_low, alpha_channel_mask); + pixels_alpha = _mm_srli_epi32(pixels_alpha, 15); + pixels_alpha = _mm_sub_epi32(pixels_alpha, alpha_offset); + pixels_alpha = _mm_andnot_si128(pixels_alpha, alpha_mask32); + + pixels_result = pixels_red; + pixels_result = _mm_or_si128(pixels_result, pixels_alpha); + pixels_result = _mm_or_si128(pixels_result, pixels_blue); + pixels_result = _mm_or_si128(pixels_result, pixels_green); + + pixels_dest = (__m128i*)(out + x * sizeof(uint32_t)); + _mm_store_si128(pixels_dest, pixels_result); + + pixels_blue = _mm_and_si128(pixels_high, blue_channel_mask); + pixels_blue = _mm_slli_epi32(pixels_blue, 3); + pixels_blue = _mm_add_epi32(pixels_blue, blue_offset); + + pixels_green = _mm_and_si128(pixels_high, green_channel_mask); + pixels_green = _mm_add_epi32(pixels_green, green_offset); + pixels_green = _mm_slli_epi32(pixels_green, 6); + + pixels_red = _mm_and_si128(pixels_high, red_channel_mask); + pixels_red = _mm_add_epi32(pixels_red, red_offset); + pixels_red = _mm_slli_epi32(pixels_red, 9); + + pixels_alpha = _mm_and_si128(pixels_high, alpha_channel_mask); + pixels_alpha = _mm_srli_epi32(pixels_alpha, 15); + pixels_alpha = _mm_sub_epi32(pixels_alpha, alpha_offset); + pixels_alpha = _mm_andnot_si128(pixels_alpha, alpha_mask32); + + pixels_result = pixels_red; + pixels_result = _mm_or_si128(pixels_result, pixels_alpha); + pixels_result = _mm_or_si128(pixels_result, pixels_blue); + pixels_result = _mm_or_si128(pixels_result, pixels_green); + + pixels_dest = (__m128i*)(out + (x + 4) * sizeof(uint32_t)); + _mm_store_si128(pixels_dest, pixels_result); } } }