lossless*sse2: improve non-const 16-bit vector creation
use _mm_set1_epi32 instead of _mm_set_epi16 with non-const values; reduces shifts and ors. Change-Id: Ie2cb2ab815f642855d03c6f3001223bcac4bd35c
This commit is contained in:
parent
551948e45f
commit
8043504f95
@ -46,16 +46,14 @@ static void SubtractGreenFromBlueAndRed_SSE2(uint32_t* argb_data,
|
|||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// Color Transform
|
// Color Transform
|
||||||
|
|
||||||
|
#define MK_CST_16(HI, LO) \
|
||||||
|
_mm_set1_epi32((int)(((uint32_t)(HI) << 16) | ((LO) & 0xffff)))
|
||||||
|
|
||||||
static void TransformColor_SSE2(const VP8LMultipliers* const m,
|
static void TransformColor_SSE2(const VP8LMultipliers* const m,
|
||||||
uint32_t* argb_data, int num_pixels) {
|
uint32_t* argb_data, int num_pixels) {
|
||||||
const __m128i mults_rb = _mm_set_epi16(
|
const __m128i mults_rb = MK_CST_16(CST_5b(m->green_to_red_),
|
||||||
CST_5b(m->green_to_red_), CST_5b(m->green_to_blue_),
|
CST_5b(m->green_to_blue_));
|
||||||
CST_5b(m->green_to_red_), CST_5b(m->green_to_blue_),
|
const __m128i mults_b2 = MK_CST_16(CST_5b(m->red_to_blue_), 0);
|
||||||
CST_5b(m->green_to_red_), CST_5b(m->green_to_blue_),
|
|
||||||
CST_5b(m->green_to_red_), CST_5b(m->green_to_blue_));
|
|
||||||
const __m128i mults_b2 = _mm_set_epi16(
|
|
||||||
CST_5b(m->red_to_blue_), 0, CST_5b(m->red_to_blue_), 0,
|
|
||||||
CST_5b(m->red_to_blue_), 0, CST_5b(m->red_to_blue_), 0);
|
|
||||||
const __m128i mask_ag = _mm_set1_epi32(0xff00ff00); // alpha-green masks
|
const __m128i mask_ag = _mm_set1_epi32(0xff00ff00); // alpha-green masks
|
||||||
const __m128i mask_rb = _mm_set1_epi32(0x00ff00ff); // red-blue masks
|
const __m128i mask_rb = _mm_set1_epi32(0x00ff00ff); // red-blue masks
|
||||||
int i;
|
int i;
|
||||||
@ -85,12 +83,8 @@ static void CollectColorBlueTransforms_SSE2(const uint32_t* argb, int stride,
|
|||||||
int tile_width, int tile_height,
|
int tile_width, int tile_height,
|
||||||
int green_to_blue, int red_to_blue,
|
int green_to_blue, int red_to_blue,
|
||||||
int histo[]) {
|
int histo[]) {
|
||||||
const __m128i mults_r = _mm_set_epi16(
|
const __m128i mults_r = MK_CST_16(CST_5b(red_to_blue), 0);
|
||||||
CST_5b(red_to_blue), 0, CST_5b(red_to_blue), 0,
|
const __m128i mults_g = MK_CST_16(0, CST_5b(green_to_blue));
|
||||||
CST_5b(red_to_blue), 0, CST_5b(red_to_blue), 0);
|
|
||||||
const __m128i mults_g = _mm_set_epi16(
|
|
||||||
0, CST_5b(green_to_blue), 0, CST_5b(green_to_blue),
|
|
||||||
0, CST_5b(green_to_blue), 0, CST_5b(green_to_blue));
|
|
||||||
const __m128i mask_g = _mm_set1_epi32(0x00ff00); // green mask
|
const __m128i mask_g = _mm_set1_epi32(0x00ff00); // green mask
|
||||||
const __m128i mask_b = _mm_set1_epi32(0x0000ff); // blue mask
|
const __m128i mask_b = _mm_set1_epi32(0x0000ff); // blue mask
|
||||||
int y;
|
int y;
|
||||||
@ -135,9 +129,7 @@ static void CollectColorBlueTransforms_SSE2(const uint32_t* argb, int stride,
|
|||||||
static void CollectColorRedTransforms_SSE2(const uint32_t* argb, int stride,
|
static void CollectColorRedTransforms_SSE2(const uint32_t* argb, int stride,
|
||||||
int tile_width, int tile_height,
|
int tile_width, int tile_height,
|
||||||
int green_to_red, int histo[]) {
|
int green_to_red, int histo[]) {
|
||||||
const __m128i mults_g = _mm_set_epi16(
|
const __m128i mults_g = MK_CST_16(0, CST_5b(green_to_red));
|
||||||
0, CST_5b(green_to_red), 0, CST_5b(green_to_red),
|
|
||||||
0, CST_5b(green_to_red), 0, CST_5b(green_to_red));
|
|
||||||
const __m128i mask_g = _mm_set1_epi32(0x00ff00); // green mask
|
const __m128i mask_g = _mm_set1_epi32(0x00ff00); // green mask
|
||||||
const __m128i mask = _mm_set1_epi32(0xff);
|
const __m128i mask = _mm_set1_epi32(0xff);
|
||||||
|
|
||||||
@ -174,6 +166,7 @@ static void CollectColorRedTransforms_SSE2(const uint32_t* argb, int stride,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#undef SPAN
|
#undef SPAN
|
||||||
|
#undef MK_CST_16
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@ -453,14 +453,11 @@ static void TransformColorInverse_SSE2(const VP8LMultipliers* const m,
|
|||||||
int num_pixels, uint32_t* dst) {
|
int num_pixels, uint32_t* dst) {
|
||||||
// sign-extended multiplying constants, pre-shifted by 5.
|
// sign-extended multiplying constants, pre-shifted by 5.
|
||||||
#define CST(X) (((int16_t)(m->X << 8)) >> 5) // sign-extend
|
#define CST(X) (((int16_t)(m->X << 8)) >> 5) // sign-extend
|
||||||
const __m128i mults_rb = _mm_set_epi16(
|
#define MK_CST_16(HI, LO) \
|
||||||
CST(green_to_red_), CST(green_to_blue_),
|
_mm_set1_epi32((int)(((uint32_t)(HI) << 16) | ((LO) & 0xffff)))
|
||||||
CST(green_to_red_), CST(green_to_blue_),
|
const __m128i mults_rb = MK_CST_16(CST(green_to_red_), CST(green_to_blue_));
|
||||||
CST(green_to_red_), CST(green_to_blue_),
|
const __m128i mults_b2 = MK_CST_16(CST(red_to_blue_), 0);
|
||||||
CST(green_to_red_), CST(green_to_blue_));
|
#undef MK_CST_16
|
||||||
const __m128i mults_b2 = _mm_set_epi16(
|
|
||||||
CST(red_to_blue_), 0, CST(red_to_blue_), 0,
|
|
||||||
CST(red_to_blue_), 0, CST(red_to_blue_), 0);
|
|
||||||
#undef CST
|
#undef CST
|
||||||
const __m128i mask_ag = _mm_set1_epi32(0xff00ff00); // alpha-green masks
|
const __m128i mask_ag = _mm_set1_epi32(0xff00ff00); // alpha-green masks
|
||||||
int i;
|
int i;
|
||||||
|
Loading…
Reference in New Issue
Block a user