dec_sse2: remove HE8uv_SSE2
with gcc-4.8, clang-4.0.1/5 this is no faster (actually up to 2x slower) than the code generated for memset (0x01010... * dst[-1]). shuffles in sse4 recover a bit, but performance is still down. Change-Id: Ie85e8353f8ede559d0b05a1d388787fd18ecc80f
This commit is contained in:
parent
aebf59ac50
commit
b94cee98fb
@ -1127,15 +1127,6 @@ static void VE8uv_SSE2(uint8_t* dst) { // vertical
|
||||
}
|
||||
}
|
||||
|
||||
static void HE8uv_SSE2(uint8_t* dst) { // horizontal
|
||||
int j;
|
||||
for (j = 0; j < 8; ++j) {
|
||||
const __m128i values = _mm_set1_epi8(dst[-1]);
|
||||
_mm_storel_epi64((__m128i*)dst, values);
|
||||
dst += BPS;
|
||||
}
|
||||
}
|
||||
|
||||
// helper for chroma-DC predictions
|
||||
static WEBP_INLINE void Put8x8uv_SSE2(uint8_t v, uint8_t* dst) {
|
||||
int j;
|
||||
@ -1224,7 +1215,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitSSE2(void) {
|
||||
VP8PredChroma8[0] = DC8uv_SSE2;
|
||||
VP8PredChroma8[1] = TM8uv_SSE2;
|
||||
VP8PredChroma8[2] = VE8uv_SSE2;
|
||||
VP8PredChroma8[3] = HE8uv_SSE2;
|
||||
VP8PredChroma8[4] = DC8uvNoTop_SSE2;
|
||||
VP8PredChroma8[5] = DC8uvNoLeft_SSE2;
|
||||
VP8PredChroma8[6] = DC8uvNoTopLeft_SSE2;
|
||||
|
Loading…
x
Reference in New Issue
Block a user